summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-15 16:49:24 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-15 16:49:24 +0000
commit2415e66f889f38503b73e8ebc5f43ca342390e5c (patch)
treeac48ab69d1d96bae3d83756134921e0d90593aa5
parentInitial commit. (diff)
downloadyt-dlp-upstream/2024.03.10.tar.xz
yt-dlp-upstream/2024.03.10.zip
Adding upstream version 2024.03.10.upstream/2024.03.10
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
-rw-r--r--.editorconfig8
-rw-r--r--.gitattributes6
-rw-r--r--.github/FUNDING.yml13
-rw-r--r--.github/ISSUE_TEMPLATE/1_broken_site.yml79
-rw-r--r--.github/ISSUE_TEMPLATE/2_site_support_request.yml91
-rw-r--r--.github/ISSUE_TEMPLATE/3_site_feature_request.yml87
-rw-r--r--.github/ISSUE_TEMPLATE/4_bug_report.yml72
-rw-r--r--.github/ISSUE_TEMPLATE/5_feature_request.yml66
-rw-r--r--.github/ISSUE_TEMPLATE/6_question.yml72
-rw-r--r--.github/ISSUE_TEMPLATE/config.yml8
-rw-r--r--.github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml40
-rw-r--r--.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml52
-rw-r--r--.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml48
-rw-r--r--.github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml33
-rw-r--r--.github/ISSUE_TEMPLATE_tmpl/5_feature_request.yml31
-rw-r--r--.github/ISSUE_TEMPLATE_tmpl/6_question.yml37
-rw-r--r--.github/PULL_REQUEST_TEMPLATE.md43
-rw-r--r--.github/banner.svg31
-rw-r--r--.github/workflows/build.yml487
-rw-r--r--.github/workflows/codeql.yml65
-rw-r--r--.github/workflows/core.yml61
-rw-r--r--.github/workflows/download.yml48
-rw-r--r--.github/workflows/quick-test.yml35
-rw-r--r--.github/workflows/release-master.yml29
-rw-r--r--.github/workflows/release-nightly.yml42
-rw-r--r--.github/workflows/release.yml387
-rw-r--r--.gitignore128
-rw-r--r--CONTRIBUTING.md731
-rw-r--r--CONTRIBUTORS602
-rw-r--r--Changelog.md4280
-rw-r--r--Collaborators.md63
-rw-r--r--LICENSE24
-rw-r--r--Makefile160
-rw-r--r--README.md2317
-rw-r--r--bundle/__init__.py0
-rwxr-xr-xbundle/py2exe.py59
-rwxr-xr-xbundle/pyinstaller.py132
-rw-r--r--devscripts/__init__.py0
-rw-r--r--devscripts/bash-completion.in29
-rwxr-xr-xdevscripts/bash-completion.py31
-rw-r--r--devscripts/changelog_override.json130
-rw-r--r--devscripts/changelog_override.schema.json96
-rw-r--r--devscripts/check-porn.py61
-rw-r--r--devscripts/cli_to_api.py48
-rw-r--r--devscripts/fish-completion.in5
-rwxr-xr-xdevscripts/fish-completion.py52
-rw-r--r--devscripts/generate_aes_testdata.py46
-rwxr-xr-xdevscripts/install_deps.py73
-rw-r--r--devscripts/lazy_load_template.py39
-rw-r--r--devscripts/logo.icobin0 -> 41043 bytes
-rw-r--r--devscripts/make_changelog.py503
-rwxr-xr-xdevscripts/make_contributing.py32
-rw-r--r--devscripts/make_issue_template.py72
-rw-r--r--devscripts/make_lazy_extractors.py132
-rwxr-xr-xdevscripts/make_readme.py93
-rw-r--r--devscripts/make_supportedsites.py20
-rw-r--r--devscripts/prepare_manpage.py97
-rw-r--r--devscripts/run_tests.bat4
-rwxr-xr-xdevscripts/run_tests.py71
-rwxr-xr-xdevscripts/run_tests.sh4
-rw-r--r--devscripts/set-variant.py36
-rwxr-xr-xdevscripts/tomlparse.py189
-rw-r--r--devscripts/update-version.py82
-rw-r--r--devscripts/utils.py47
-rw-r--r--devscripts/zsh-completion.in30
-rwxr-xr-xdevscripts/zsh-completion.py50
-rw-r--r--public.key29
-rwxr-xr-xpyinst.py17
-rw-r--r--pyproject.toml120
-rw-r--r--setup.cfg45
-rwxr-xr-xsetup.py36
-rw-r--r--supportedsites.md1794
-rw-r--r--test/__init__.py0
-rw-r--r--test/conftest.py26
-rw-r--r--test/helper.py340
-rw-r--r--test/parameters.json49
-rw-r--r--test/test_InfoExtractor.py1911
-rw-r--r--test/test_YoutubeDL.py1346
-rw-r--r--test/test_YoutubeDLCookieJar.py66
-rw-r--r--test/test_aes.py152
-rw-r--r--test/test_age_restriction.py55
-rw-r--r--test/test_all_urls.py122
-rw-r--r--test/test_cache.py57
-rw-r--r--test/test_compat.py105
-rw-r--r--test/test_config.py227
-rw-r--r--test/test_cookies.py306
-rwxr-xr-xtest/test_download.py314
-rw-r--r--test/test_downloader_external.py139
-rw-r--r--test/test_downloader_http.py106
-rw-r--r--test/test_execution.py60
-rw-r--r--test/test_iqiyi_sdk_interpreter.py44
-rw-r--r--test/test_jsinterp.py380
-rw-r--r--test/test_netrc.py28
-rw-r--r--test/test_networking.py1631
-rw-r--r--test/test_networking_utils.py208
-rw-r--r--test/test_overwrites.py54
-rw-r--r--test/test_plugins.py73
-rw-r--r--test/test_post_hooks.py70
-rw-r--r--test/test_postprocessors.py579
-rw-r--r--test/test_socks.py477
-rw-r--r--test/test_subtitles.py452
-rw-r--r--test/test_update.py228
-rw-r--r--test/test_utils.py2457
-rw-r--r--test/test_verbose_output.py75
-rw-r--r--test/test_websockets.py383
-rw-r--r--test/test_write_annotations.py.disabled77
-rw-r--r--test/test_youtube_lists.py71
-rw-r--r--test/test_youtube_misc.py26
-rw-r--r--test/test_youtube_signature.py253
-rw-r--r--test/testcert.pem52
-rw-r--r--test/testdata/certificate/ca.crt10
-rw-r--r--test/testdata/certificate/ca.key5
-rw-r--r--test/testdata/certificate/ca.srl1
-rw-r--r--test/testdata/certificate/client.crt9
-rw-r--r--test/testdata/certificate/client.csr7
-rw-r--r--test/testdata/certificate/client.key5
-rw-r--r--test/testdata/certificate/clientencrypted.key8
-rw-r--r--test/testdata/certificate/clientwithencryptedkey.crt17
-rw-r--r--test/testdata/certificate/clientwithkey.crt14
-rw-r--r--test/testdata/certificate/instructions.md19
-rw-r--r--test/testdata/cookies/httponly_cookies.txt6
-rw-r--r--test/testdata/cookies/malformed_cookies.txt9
-rw-r--r--test/testdata/cookies/session_cookies.txt6
-rw-r--r--test/testdata/f4m/custom_base_url.f4m10
-rw-r--r--test/testdata/ism/ec-3_test.Manifest1
-rw-r--r--test/testdata/ism/sintel.Manifest988
-rw-r--r--test/testdata/m3u8/bipbop_16x9.m3u838
-rw-r--r--test/testdata/m3u8/img_bipbop_adv_example_fmp4.m3u876
-rw-r--r--test/testdata/mpd/float_duration.mpd18
-rw-r--r--test/testdata/mpd/subtitles.mpd351
-rw-r--r--test/testdata/mpd/unfragmented.mpd28
-rw-r--r--test/testdata/mpd/urls_only.mpd218
-rw-r--r--test/testdata/thumbnails/foo %d bar/foo_%d.webpbin0 -> 3928 bytes
-rw-r--r--test/testdata/xspf/foo_xspf.xspf34
-rw-r--r--test/testdata/yt_dlp_plugins/extractor/_ignore.py5
-rw-r--r--test/testdata/yt_dlp_plugins/extractor/ignore.py12
-rw-r--r--test/testdata/yt_dlp_plugins/extractor/normal.py9
-rw-r--r--test/testdata/yt_dlp_plugins/postprocessor/normal.py5
-rw-r--r--test/testdata/zipped_plugins/yt_dlp_plugins/extractor/zipped.py5
-rw-r--r--test/testdata/zipped_plugins/yt_dlp_plugins/postprocessor/zipped.py5
-rw-r--r--yt-dlp.cmd1
-rwxr-xr-xyt-dlp.sh2
-rw-r--r--yt_dlp/YoutubeDL.py4339
-rw-r--r--yt_dlp/__init__.py1054
-rw-r--r--yt_dlp/__main__.py17
-rw-r--r--yt_dlp/__pyinstaller/__init__.py5
-rw-r--r--yt_dlp/__pyinstaller/hook-yt_dlp.py34
-rw-r--r--yt_dlp/aes.py567
-rw-r--r--yt_dlp/cache.py91
-rw-r--r--yt_dlp/compat/__init__.py79
-rw-r--r--yt_dlp/compat/_deprecated.py23
-rw-r--r--yt_dlp/compat/_legacy.py108
-rw-r--r--yt_dlp/compat/compat_utils.py83
-rw-r--r--yt_dlp/compat/functools.py12
-rw-r--r--yt_dlp/compat/imghdr.py16
-rw-r--r--yt_dlp/compat/shutil.py30
-rw-r--r--yt_dlp/compat/types.py13
-rw-r--r--yt_dlp/compat/urllib/__init__.py10
-rw-r--r--yt_dlp/compat/urllib/request.py40
-rw-r--r--yt_dlp/cookies.py1346
-rw-r--r--yt_dlp/dependencies/Cryptodome.py38
-rw-r--r--yt_dlp/dependencies/__init__.py92
-rw-r--r--yt_dlp/downloader/__init__.py131
-rw-r--r--yt_dlp/downloader/common.py486
-rw-r--r--yt_dlp/downloader/dash.py90
-rw-r--r--yt_dlp/downloader/external.py664
-rw-r--r--yt_dlp/downloader/f4m.py427
-rw-r--r--yt_dlp/downloader/fc2.py46
-rw-r--r--yt_dlp/downloader/fragment.py527
-rw-r--r--yt_dlp/downloader/hls.py378
-rw-r--r--yt_dlp/downloader/http.py383
-rw-r--r--yt_dlp/downloader/ism.py283
-rw-r--r--yt_dlp/downloader/mhtml.py189
-rw-r--r--yt_dlp/downloader/niconico.py140
-rw-r--r--yt_dlp/downloader/rtmp.py213
-rw-r--r--yt_dlp/downloader/rtsp.py42
-rw-r--r--yt_dlp/downloader/websocket.py53
-rw-r--r--yt_dlp/downloader/youtube_live_chat.py228
-rw-r--r--yt_dlp/extractor/__init__.py42
-rw-r--r--yt_dlp/extractor/_extractors.py2493
-rw-r--r--yt_dlp/extractor/abc.py421
-rw-r--r--yt_dlp/extractor/abcnews.py153
-rw-r--r--yt_dlp/extractor/abcotvs.py130
-rw-r--r--yt_dlp/extractor/abematv.py484
-rw-r--r--yt_dlp/extractor/academicearth.py39
-rw-r--r--yt_dlp/extractor/acast.py143
-rw-r--r--yt_dlp/extractor/acfun.py200
-rw-r--r--yt_dlp/extractor/adn.py335
-rw-r--r--yt_dlp/extractor/adobeconnect.py34
-rw-r--r--yt_dlp/extractor/adobepass.py1778
-rw-r--r--yt_dlp/extractor/adobetv.py286
-rw-r--r--yt_dlp/extractor/adultswim.py198
-rw-r--r--yt_dlp/extractor/aenetworks.py369
-rw-r--r--yt_dlp/extractor/aeonco.py74
-rw-r--r--yt_dlp/extractor/afreecatv.py484
-rw-r--r--yt_dlp/extractor/agora.py251
-rw-r--r--yt_dlp/extractor/airtv.py96
-rw-r--r--yt_dlp/extractor/aitube.py60
-rw-r--r--yt_dlp/extractor/aliexpress.py50
-rw-r--r--yt_dlp/extractor/aljazeera.py83
-rw-r--r--yt_dlp/extractor/allocine.py125
-rw-r--r--yt_dlp/extractor/allstar.py253
-rw-r--r--yt_dlp/extractor/alphaporno.py75
-rw-r--r--yt_dlp/extractor/alsace20tv.py83
-rw-r--r--yt_dlp/extractor/altcensored.py104
-rw-r--r--yt_dlp/extractor/alura.py167
-rw-r--r--yt_dlp/extractor/amadeustv.py77
-rw-r--r--yt_dlp/extractor/amara.py100
-rw-r--r--yt_dlp/extractor/amazon.py170
-rw-r--r--yt_dlp/extractor/amazonminitv.py294
-rw-r--r--yt_dlp/extractor/amcnetworks.py147
-rw-r--r--yt_dlp/extractor/americastestkitchen.py215
-rw-r--r--yt_dlp/extractor/amp.py101
-rw-r--r--yt_dlp/extractor/anchorfm.py98
-rw-r--r--yt_dlp/extractor/angel.py56
-rw-r--r--yt_dlp/extractor/antenna.py143
-rw-r--r--yt_dlp/extractor/anvato.py404
-rw-r--r--yt_dlp/extractor/aol.py133
-rw-r--r--yt_dlp/extractor/apa.py82
-rw-r--r--yt_dlp/extractor/aparat.py88
-rw-r--r--yt_dlp/extractor/appleconnect.py50
-rw-r--r--yt_dlp/extractor/applepodcasts.py85
-rw-r--r--yt_dlp/extractor/appletrailers.py278
-rw-r--r--yt_dlp/extractor/archiveorg.py947
-rw-r--r--yt_dlp/extractor/arcpublishing.py164
-rw-r--r--yt_dlp/extractor/ard.py579
-rw-r--r--yt_dlp/extractor/arkena.py150
-rw-r--r--yt_dlp/extractor/arnes.py98
-rw-r--r--yt_dlp/extractor/art19.py303
-rw-r--r--yt_dlp/extractor/arte.py345
-rw-r--r--yt_dlp/extractor/asobichannel.py168
-rw-r--r--yt_dlp/extractor/atresplayer.py104
-rw-r--r--yt_dlp/extractor/atscaleconf.py34
-rw-r--r--yt_dlp/extractor/atvat.py108
-rw-r--r--yt_dlp/extractor/audimedia.py89
-rw-r--r--yt_dlp/extractor/audioboom.py57
-rw-r--r--yt_dlp/extractor/audiodraft.py93
-rw-r--r--yt_dlp/extractor/audiomack.py147
-rw-r--r--yt_dlp/extractor/audius.py271
-rw-r--r--yt_dlp/extractor/awaan.py184
-rw-r--r--yt_dlp/extractor/aws.py75
-rw-r--r--yt_dlp/extractor/axs.py89
-rw-r--r--yt_dlp/extractor/azmedien.py66
-rw-r--r--yt_dlp/extractor/baidu.py51
-rw-r--r--yt_dlp/extractor/banbye.py168
-rw-r--r--yt_dlp/extractor/bandaichannel.py33
-rw-r--r--yt_dlp/extractor/bandcamp.py485
-rw-r--r--yt_dlp/extractor/bannedvideo.py155
-rw-r--r--yt_dlp/extractor/bbc.py1660
-rw-r--r--yt_dlp/extractor/beatbump.py111
-rw-r--r--yt_dlp/extractor/beatport.py97
-rw-r--r--yt_dlp/extractor/beeg.py90
-rw-r--r--yt_dlp/extractor/behindkink.py42
-rw-r--r--yt_dlp/extractor/bellmedia.py91
-rw-r--r--yt_dlp/extractor/berufetv.py70
-rw-r--r--yt_dlp/extractor/bet.py79
-rw-r--r--yt_dlp/extractor/bfi.py35
-rw-r--r--yt_dlp/extractor/bfmtv.py119
-rw-r--r--yt_dlp/extractor/bibeltv.py197
-rw-r--r--yt_dlp/extractor/bigflix.py73
-rw-r--r--yt_dlp/extractor/bigo.py57
-rw-r--r--yt_dlp/extractor/bild.py63
-rw-r--r--yt_dlp/extractor/bilibili.py2233
-rw-r--r--yt_dlp/extractor/biobiochiletv.py83
-rw-r--r--yt_dlp/extractor/bitchute.py275
-rw-r--r--yt_dlp/extractor/blackboardcollaborate.py63
-rw-r--r--yt_dlp/extractor/bleacherreport.py110
-rw-r--r--yt_dlp/extractor/blerp.py167
-rw-r--r--yt_dlp/extractor/blogger.py45
-rw-r--r--yt_dlp/extractor/bloomberg.py77
-rw-r--r--yt_dlp/extractor/bokecc.py53
-rw-r--r--yt_dlp/extractor/bongacams.py70
-rw-r--r--yt_dlp/extractor/boosty.py209
-rw-r--r--yt_dlp/extractor/bostonglobe.py69
-rw-r--r--yt_dlp/extractor/box.py83
-rw-r--r--yt_dlp/extractor/boxcast.py102
-rw-r--r--yt_dlp/extractor/bpb.py170
-rw-r--r--yt_dlp/extractor/br.py166
-rw-r--r--yt_dlp/extractor/brainpop.py318
-rw-r--r--yt_dlp/extractor/bravotv.py189
-rw-r--r--yt_dlp/extractor/breitbart.py34
-rw-r--r--yt_dlp/extractor/brightcove.py952
-rw-r--r--yt_dlp/extractor/brilliantpala.py127
-rw-r--r--yt_dlp/extractor/bundesliga.py34
-rw-r--r--yt_dlp/extractor/bundestag.py123
-rw-r--r--yt_dlp/extractor/businessinsider.py45
-rw-r--r--yt_dlp/extractor/buzzfeed.py95
-rw-r--r--yt_dlp/extractor/byutv.py104
-rw-r--r--yt_dlp/extractor/c56.py59
-rw-r--r--yt_dlp/extractor/cableav.py32
-rw-r--r--yt_dlp/extractor/callin.py155
-rw-r--r--yt_dlp/extractor/caltrans.py37
-rw-r--r--yt_dlp/extractor/cam4.py31
-rw-r--r--yt_dlp/extractor/camdemy.py158
-rw-r--r--yt_dlp/extractor/camfm.py85
-rw-r--r--yt_dlp/extractor/cammodels.py77
-rw-r--r--yt_dlp/extractor/camsoda.py57
-rw-r--r--yt_dlp/extractor/camtasia.py71
-rw-r--r--yt_dlp/extractor/canal1.py39
-rw-r--r--yt_dlp/extractor/canalalpha.py94
-rw-r--r--yt_dlp/extractor/canalc2.py68
-rw-r--r--yt_dlp/extractor/canalplus.py110
-rw-r--r--yt_dlp/extractor/caracoltv.py136
-rw-r--r--yt_dlp/extractor/cartoonnetwork.py59
-rw-r--r--yt_dlp/extractor/cbc.py653
-rw-r--r--yt_dlp/extractor/cbs.py280
-rw-r--r--yt_dlp/extractor/cbsnews.py443
-rw-r--r--yt_dlp/extractor/cbssports.py111
-rw-r--r--yt_dlp/extractor/ccc.py115
-rw-r--r--yt_dlp/extractor/ccma.py147
-rw-r--r--yt_dlp/extractor/cctv.py201
-rw-r--r--yt_dlp/extractor/cda.py338
-rw-r--r--yt_dlp/extractor/cellebrite.py63
-rw-r--r--yt_dlp/extractor/ceskatelevize.py289
-rw-r--r--yt_dlp/extractor/cgtn.py65
-rw-r--r--yt_dlp/extractor/charlierose.py50
-rw-r--r--yt_dlp/extractor/chaturbate.py106
-rw-r--r--yt_dlp/extractor/chilloutzone.py123
-rw-r--r--yt_dlp/extractor/chzzk.py139
-rw-r--r--yt_dlp/extractor/cinemax.py25
-rw-r--r--yt_dlp/extractor/cinetecamilano.py61
-rw-r--r--yt_dlp/extractor/cineverse.py139
-rw-r--r--yt_dlp/extractor/ciscolive.py145
-rw-r--r--yt_dlp/extractor/ciscowebex.py106
-rw-r--r--yt_dlp/extractor/cjsw.py67
-rw-r--r--yt_dlp/extractor/clipchamp.py61
-rw-r--r--yt_dlp/extractor/clippit.py70
-rw-r--r--yt_dlp/extractor/cliprs.py31
-rw-r--r--yt_dlp/extractor/closertotruth.py89
-rw-r--r--yt_dlp/extractor/cloudflarestream.py76
-rw-r--r--yt_dlp/extractor/cloudycdn.py79
-rw-r--r--yt_dlp/extractor/clubic.py53
-rw-r--r--yt_dlp/extractor/clyp.py99
-rw-r--r--yt_dlp/extractor/cmt.py55
-rw-r--r--yt_dlp/extractor/cnbc.py97
-rw-r--r--yt_dlp/extractor/cnn.py198
-rw-r--r--yt_dlp/extractor/comedycentral.py55
-rw-r--r--yt_dlp/extractor/common.py3943
-rw-r--r--yt_dlp/extractor/commonmistakes.py42
-rw-r--r--yt_dlp/extractor/commonprotocols.py70
-rw-r--r--yt_dlp/extractor/condenast.py250
-rw-r--r--yt_dlp/extractor/contv.py113
-rw-r--r--yt_dlp/extractor/corus.py154
-rw-r--r--yt_dlp/extractor/coub.py136
-rw-r--r--yt_dlp/extractor/cozytv.py37
-rw-r--r--yt_dlp/extractor/cpac.py136
-rw-r--r--yt_dlp/extractor/cracked.py88
-rw-r--r--yt_dlp/extractor/crackle.py243
-rw-r--r--yt_dlp/extractor/craftsy.py75
-rw-r--r--yt_dlp/extractor/crooksandliars.py56
-rw-r--r--yt_dlp/extractor/crowdbunker.py109
-rw-r--r--yt_dlp/extractor/crtvg.py53
-rw-r--r--yt_dlp/extractor/crunchyroll.py650
-rw-r--r--yt_dlp/extractor/cspan.py286
-rw-r--r--yt_dlp/extractor/ctsnews.py84
-rw-r--r--yt_dlp/extractor/ctv.py49
-rw-r--r--yt_dlp/extractor/ctvnews.py70
-rw-r--r--yt_dlp/extractor/cultureunplugged.py65
-rw-r--r--yt_dlp/extractor/curiositystream.py203
-rw-r--r--yt_dlp/extractor/cwtv.py99
-rw-r--r--yt_dlp/extractor/cybrary.py144
-rw-r--r--yt_dlp/extractor/dacast.py158
-rw-r--r--yt_dlp/extractor/dailymail.py73
-rw-r--r--yt_dlp/extractor/dailymotion.py474
-rw-r--r--yt_dlp/extractor/dailywire.py113
-rw-r--r--yt_dlp/extractor/damtomo.py108
-rw-r--r--yt_dlp/extractor/daum.py258
-rw-r--r--yt_dlp/extractor/daystar.py47
-rw-r--r--yt_dlp/extractor/dbtv.py47
-rw-r--r--yt_dlp/extractor/dctp.py102
-rw-r--r--yt_dlp/extractor/deezer.py142
-rw-r--r--yt_dlp/extractor/democracynow.py91
-rw-r--r--yt_dlp/extractor/detik.py159
-rw-r--r--yt_dlp/extractor/deuxm.py76
-rw-r--r--yt_dlp/extractor/dfb.py52
-rw-r--r--yt_dlp/extractor/dhm.py58
-rw-r--r--yt_dlp/extractor/digitalconcerthall.py150
-rw-r--r--yt_dlp/extractor/digiteka.py98
-rw-r--r--yt_dlp/extractor/discogs.py35
-rw-r--r--yt_dlp/extractor/discovery.py115
-rw-r--r--yt_dlp/extractor/discoverygo.py172
-rw-r--r--yt_dlp/extractor/disney.py160
-rw-r--r--yt_dlp/extractor/dispeak.py127
-rw-r--r--yt_dlp/extractor/dlf.py192
-rw-r--r--yt_dlp/extractor/dlive.py92
-rw-r--r--yt_dlp/extractor/douyutv.py306
-rw-r--r--yt_dlp/extractor/dplay.py1059
-rw-r--r--yt_dlp/extractor/drbonanza.py54
-rw-r--r--yt_dlp/extractor/dreisat.py41
-rw-r--r--yt_dlp/extractor/drooble.py113
-rw-r--r--yt_dlp/extractor/dropbox.py90
-rw-r--r--yt_dlp/extractor/dropout.py224
-rw-r--r--yt_dlp/extractor/drtuber.py104
-rw-r--r--yt_dlp/extractor/drtv.py401
-rw-r--r--yt_dlp/extractor/dtube.py80
-rw-r--r--yt_dlp/extractor/duboku.py247
-rw-r--r--yt_dlp/extractor/dumpert.py114
-rw-r--r--yt_dlp/extractor/duoplay.py104
-rw-r--r--yt_dlp/extractor/dvtv.py177
-rw-r--r--yt_dlp/extractor/dw.py110
-rw-r--r--yt_dlp/extractor/eagleplatform.py215
-rw-r--r--yt_dlp/extractor/ebaumsworld.py31
-rw-r--r--yt_dlp/extractor/ebay.py36
-rw-r--r--yt_dlp/extractor/egghead.py134
-rw-r--r--yt_dlp/extractor/eighttracks.py161
-rw-r--r--yt_dlp/extractor/einthusan.py105
-rw-r--r--yt_dlp/extractor/eitb.py79
-rw-r--r--yt_dlp/extractor/elementorembed.py72
-rw-r--r--yt_dlp/extractor/elonet.py64
-rw-r--r--yt_dlp/extractor/elpais.py92
-rw-r--r--yt_dlp/extractor/eltrecetv.py62
-rw-r--r--yt_dlp/extractor/embedly.py109
-rw-r--r--yt_dlp/extractor/epicon.py115
-rw-r--r--yt_dlp/extractor/epidemicsound.py107
-rw-r--r--yt_dlp/extractor/eplus.py183
-rw-r--r--yt_dlp/extractor/epoch.py55
-rw-r--r--yt_dlp/extractor/eporner.py137
-rw-r--r--yt_dlp/extractor/erocast.py63
-rw-r--r--yt_dlp/extractor/eroprofile.py122
-rw-r--r--yt_dlp/extractor/err.py224
-rw-r--r--yt_dlp/extractor/ertgr.py302
-rw-r--r--yt_dlp/extractor/espn.py421
-rw-r--r--yt_dlp/extractor/ettutv.py60
-rw-r--r--yt_dlp/extractor/europa.py174
-rw-r--r--yt_dlp/extractor/europeantour.py34
-rw-r--r--yt_dlp/extractor/eurosport.py123
-rw-r--r--yt_dlp/extractor/euscreen.py60
-rw-r--r--yt_dlp/extractor/expressen.py96
-rw-r--r--yt_dlp/extractor/extractors.py28
-rw-r--r--yt_dlp/extractor/eyedotv.py61
-rw-r--r--yt_dlp/extractor/facebook.py1060
-rw-r--r--yt_dlp/extractor/fancode.py181
-rw-r--r--yt_dlp/extractor/faz.py89
-rw-r--r--yt_dlp/extractor/fc2.py280
-rw-r--r--yt_dlp/extractor/fczenit.py51
-rw-r--r--yt_dlp/extractor/fifa.py83
-rw-r--r--yt_dlp/extractor/filmon.py171
-rw-r--r--yt_dlp/extractor/filmweb.py38
-rw-r--r--yt_dlp/extractor/firsttv.py152
-rw-r--r--yt_dlp/extractor/fivetv.py85
-rw-r--r--yt_dlp/extractor/flextv.py62
-rw-r--r--yt_dlp/extractor/flickr.py114
-rw-r--r--yt_dlp/extractor/floatplane.py333
-rw-r--r--yt_dlp/extractor/folketinget.py73
-rw-r--r--yt_dlp/extractor/footyroom.py53
-rw-r--r--yt_dlp/extractor/formula1.py24
-rw-r--r--yt_dlp/extractor/fourtube.py306
-rw-r--r--yt_dlp/extractor/fox.py177
-rw-r--r--yt_dlp/extractor/fox9.py38
-rw-r--r--yt_dlp/extractor/foxnews.py185
-rw-r--r--yt_dlp/extractor/foxsports.py52
-rw-r--r--yt_dlp/extractor/fptplay.py117
-rw-r--r--yt_dlp/extractor/franceinter.py56
-rw-r--r--yt_dlp/extractor/francetv.py423
-rw-r--r--yt_dlp/extractor/freesound.py77
-rw-r--r--yt_dlp/extractor/freespeech.py29
-rw-r--r--yt_dlp/extractor/freetv.py139
-rw-r--r--yt_dlp/extractor/frontendmasters.py252
-rw-r--r--yt_dlp/extractor/fujitv.py71
-rw-r--r--yt_dlp/extractor/funimation.py349
-rw-r--r--yt_dlp/extractor/funk.py40
-rw-r--r--yt_dlp/extractor/funker530.py80
-rw-r--r--yt_dlp/extractor/fuyintv.py30
-rw-r--r--yt_dlp/extractor/gab.py140
-rw-r--r--yt_dlp/extractor/gaia.py122
-rw-r--r--yt_dlp/extractor/gamejolt.py537
-rw-r--r--yt_dlp/extractor/gamespot.py75
-rw-r--r--yt_dlp/extractor/gamestar.py60
-rw-r--r--yt_dlp/extractor/gaskrank.py96
-rw-r--r--yt_dlp/extractor/gazeta.py44
-rw-r--r--yt_dlp/extractor/gdcvault.py214
-rw-r--r--yt_dlp/extractor/gedidigital.py198
-rw-r--r--yt_dlp/extractor/generic.py2849
-rw-r--r--yt_dlp/extractor/genericembeds.py114
-rw-r--r--yt_dlp/extractor/genius.py145
-rw-r--r--yt_dlp/extractor/getcourseru.py178
-rw-r--r--yt_dlp/extractor/gettr.py206
-rw-r--r--yt_dlp/extractor/giantbomb.py85
-rw-r--r--yt_dlp/extractor/gigya.py20
-rw-r--r--yt_dlp/extractor/glide.py38
-rw-r--r--yt_dlp/extractor/globalplayer.py254
-rw-r--r--yt_dlp/extractor/globo.py246
-rw-r--r--yt_dlp/extractor/glomex.py216
-rw-r--r--yt_dlp/extractor/gmanetwork.py83
-rw-r--r--yt_dlp/extractor/go.py333
-rw-r--r--yt_dlp/extractor/godtube.py55
-rw-r--r--yt_dlp/extractor/gofile.py106
-rw-r--r--yt_dlp/extractor/golem.py68
-rw-r--r--yt_dlp/extractor/goodgame.py57
-rw-r--r--yt_dlp/extractor/googledrive.py341
-rw-r--r--yt_dlp/extractor/googlepodcasts.py84
-rw-r--r--yt_dlp/extractor/googlesearch.py38
-rw-r--r--yt_dlp/extractor/goplay.py433
-rw-r--r--yt_dlp/extractor/gopro.py105
-rw-r--r--yt_dlp/extractor/goshgay.py48
-rw-r--r--yt_dlp/extractor/gotostage.py70
-rw-r--r--yt_dlp/extractor/gputechconf.py32
-rw-r--r--yt_dlp/extractor/gronkh.py120
-rw-r--r--yt_dlp/extractor/groupon.py64
-rw-r--r--yt_dlp/extractor/harpodeon.py70
-rw-r--r--yt_dlp/extractor/hbo.py171
-rw-r--r--yt_dlp/extractor/hearthisat.py96
-rw-r--r--yt_dlp/extractor/heise.py207
-rw-r--r--yt_dlp/extractor/hellporno.py72
-rw-r--r--yt_dlp/extractor/hgtv.py37
-rw-r--r--yt_dlp/extractor/hidive.py119
-rw-r--r--yt_dlp/extractor/historicfilms.py45
-rw-r--r--yt_dlp/extractor/hitrecord.py66
-rw-r--r--yt_dlp/extractor/hketv.py187
-rw-r--r--yt_dlp/extractor/hollywoodreporter.py72
-rw-r--r--yt_dlp/extractor/holodex.py100
-rw-r--r--yt_dlp/extractor/hotnewhiphop.py61
-rw-r--r--yt_dlp/extractor/hotstar.py468
-rw-r--r--yt_dlp/extractor/hrefli.py15
-rw-r--r--yt_dlp/extractor/hrfensehen.py90
-rw-r--r--yt_dlp/extractor/hrti.py200
-rw-r--r--yt_dlp/extractor/hse.py93
-rw-r--r--yt_dlp/extractor/huajiao.py53
-rw-r--r--yt_dlp/extractor/huffpost.py90
-rw-r--r--yt_dlp/extractor/hungama.py201
-rw-r--r--yt_dlp/extractor/huya.py134
-rw-r--r--yt_dlp/extractor/hypem.py47
-rw-r--r--yt_dlp/extractor/hypergryph.py32
-rw-r--r--yt_dlp/extractor/hytale.py58
-rw-r--r--yt_dlp/extractor/icareus.py179
-rw-r--r--yt_dlp/extractor/ichinanalive.py160
-rw-r--r--yt_dlp/extractor/idolplus.py115
-rw-r--r--yt_dlp/extractor/ign.py399
-rw-r--r--yt_dlp/extractor/iheart.py94
-rw-r--r--yt_dlp/extractor/ilpost.py69
-rw-r--r--yt_dlp/extractor/iltalehti.py51
-rw-r--r--yt_dlp/extractor/imdb.py144
-rw-r--r--yt_dlp/extractor/imggaming.py126
-rw-r--r--yt_dlp/extractor/imgur.py366
-rw-r--r--yt_dlp/extractor/ina.py84
-rw-r--r--yt_dlp/extractor/inc.py57
-rw-r--r--yt_dlp/extractor/indavideo.py115
-rw-r--r--yt_dlp/extractor/infoq.py136
-rw-r--r--yt_dlp/extractor/instagram.py735
-rw-r--r--yt_dlp/extractor/internazionale.py75
-rw-r--r--yt_dlp/extractor/internetvideoarchive.py58
-rw-r--r--yt_dlp/extractor/iprima.py280
-rw-r--r--yt_dlp/extractor/iqiyi.py766
-rw-r--r--yt_dlp/extractor/islamchannel.py81
-rw-r--r--yt_dlp/extractor/israelnationalnews.py50
-rw-r--r--yt_dlp/extractor/itprotv.py139
-rw-r--r--yt_dlp/extractor/itv.py266
-rw-r--r--yt_dlp/extractor/ivi.py253
-rw-r--r--yt_dlp/extractor/ivideon.py77
-rw-r--r--yt_dlp/extractor/iwara.py298
-rw-r--r--yt_dlp/extractor/ixigua.py83
-rw-r--r--yt_dlp/extractor/izlesene.py113
-rw-r--r--yt_dlp/extractor/jable.py103
-rw-r--r--yt_dlp/extractor/jamendo.py210
-rw-r--r--yt_dlp/extractor/japandiet.py274
-rw-r--r--yt_dlp/extractor/jeuxvideo.py52
-rw-r--r--yt_dlp/extractor/jiosaavn.py105
-rw-r--r--yt_dlp/extractor/jixie.py47
-rw-r--r--yt_dlp/extractor/joj.py108
-rw-r--r--yt_dlp/extractor/joqrag.py112
-rw-r--r--yt_dlp/extractor/jove.py76
-rw-r--r--yt_dlp/extractor/jstream.py73
-rw-r--r--yt_dlp/extractor/jtbc.py156
-rw-r--r--yt_dlp/extractor/jwplatform.py90
-rw-r--r--yt_dlp/extractor/kakao.py152
-rw-r--r--yt_dlp/extractor/kaltura.py545
-rw-r--r--yt_dlp/extractor/kankanews.py49
-rw-r--r--yt_dlp/extractor/karaoketv.py61
-rw-r--r--yt_dlp/extractor/kelbyone.py81
-rw-r--r--yt_dlp/extractor/khanacademy.py110
-rw-r--r--yt_dlp/extractor/kick.py126
-rw-r--r--yt_dlp/extractor/kicker.py55
-rw-r--r--yt_dlp/extractor/kickstarter.py68
-rw-r--r--yt_dlp/extractor/kinja.py199
-rw-r--r--yt_dlp/extractor/kinopoisk.py63
-rw-r--r--yt_dlp/extractor/kommunetv.py31
-rw-r--r--yt_dlp/extractor/kompas.py26
-rw-r--r--yt_dlp/extractor/koo.py114
-rw-r--r--yt_dlp/extractor/krasview.py58
-rw-r--r--yt_dlp/extractor/kth.py28
-rw-r--r--yt_dlp/extractor/ku6.py30
-rw-r--r--yt_dlp/extractor/kukululive.py140
-rw-r--r--yt_dlp/extractor/kuwo.py352
-rw-r--r--yt_dlp/extractor/la7.py234
-rw-r--r--yt_dlp/extractor/lastfm.py129
-rw-r--r--yt_dlp/extractor/laxarxames.py73
-rw-r--r--yt_dlp/extractor/lbry.py429
-rw-r--r--yt_dlp/extractor/lci.py28
-rw-r--r--yt_dlp/extractor/lcp.py87
-rw-r--r--yt_dlp/extractor/lecture2go.py67
-rw-r--r--yt_dlp/extractor/lecturio.py235
-rw-r--r--yt_dlp/extractor/leeco.py364
-rw-r--r--yt_dlp/extractor/lefigaro.py136
-rw-r--r--yt_dlp/extractor/lego.py141
-rw-r--r--yt_dlp/extractor/lemonde.py56
-rw-r--r--yt_dlp/extractor/lenta.py51
-rw-r--r--yt_dlp/extractor/libraryofcongress.py148
-rw-r--r--yt_dlp/extractor/libsyn.py89
-rw-r--r--yt_dlp/extractor/lifenews.py234
-rw-r--r--yt_dlp/extractor/likee.py182
-rw-r--r--yt_dlp/extractor/limelight.py358
-rw-r--r--yt_dlp/extractor/linkedin.py272
-rw-r--r--yt_dlp/extractor/liputan6.py64
-rw-r--r--yt_dlp/extractor/listennotes.py86
-rw-r--r--yt_dlp/extractor/litv.py148
-rw-r--r--yt_dlp/extractor/livejournal.py39
-rw-r--r--yt_dlp/extractor/livestream.py388
-rw-r--r--yt_dlp/extractor/livestreamfails.py37
-rw-r--r--yt_dlp/extractor/lnkgo.py163
-rw-r--r--yt_dlp/extractor/lovehomeporn.py33
-rw-r--r--yt_dlp/extractor/lrt.py108
-rw-r--r--yt_dlp/extractor/lsm.py282
-rw-r--r--yt_dlp/extractor/lumni.py23
-rw-r--r--yt_dlp/extractor/lynda.py330
-rw-r--r--yt_dlp/extractor/maariv.py62
-rw-r--r--yt_dlp/extractor/magellantv.py62
-rw-r--r--yt_dlp/extractor/magentamusik.py62
-rw-r--r--yt_dlp/extractor/mailru.py338
-rw-r--r--yt_dlp/extractor/mainstreaming.py210
-rw-r--r--yt_dlp/extractor/mangomolo.py73
-rw-r--r--yt_dlp/extractor/manoto.py133
-rw-r--r--yt_dlp/extractor/manyvids.py162
-rw-r--r--yt_dlp/extractor/maoritv.py28
-rw-r--r--yt_dlp/extractor/markiza.py124
-rw-r--r--yt_dlp/extractor/massengeschmacktv.py72
-rw-r--r--yt_dlp/extractor/masters.py38
-rw-r--r--yt_dlp/extractor/matchtv.py51
-rw-r--r--yt_dlp/extractor/mbn.py89
-rw-r--r--yt_dlp/extractor/mdr.py184
-rw-r--r--yt_dlp/extractor/medaltv.py162
-rw-r--r--yt_dlp/extractor/mediaite.py104
-rw-r--r--yt_dlp/extractor/mediaklikk.py156
-rw-r--r--yt_dlp/extractor/medialaan.py111
-rw-r--r--yt_dlp/extractor/mediaset.py320
-rw-r--r--yt_dlp/extractor/mediasite.py411
-rw-r--r--yt_dlp/extractor/mediastream.py226
-rw-r--r--yt_dlp/extractor/mediaworksnz.py103
-rw-r--r--yt_dlp/extractor/medici.py67
-rw-r--r--yt_dlp/extractor/megaphone.py46
-rw-r--r--yt_dlp/extractor/megatvcom.py164
-rw-r--r--yt_dlp/extractor/meipai.py99
-rw-r--r--yt_dlp/extractor/melonvod.py68
-rw-r--r--yt_dlp/extractor/metacritic.py62
-rw-r--r--yt_dlp/extractor/mgtv.py165
-rw-r--r--yt_dlp/extractor/microsoftembed.py65
-rw-r--r--yt_dlp/extractor/microsoftstream.py121
-rw-r--r--yt_dlp/extractor/microsoftvirtualacademy.py189
-rw-r--r--yt_dlp/extractor/mildom.py291
-rw-r--r--yt_dlp/extractor/minds.py193
-rw-r--r--yt_dlp/extractor/minoto.py45
-rw-r--r--yt_dlp/extractor/mirrativ.py118
-rw-r--r--yt_dlp/extractor/mirrorcouk.py98
-rw-r--r--yt_dlp/extractor/mit.py130
-rw-r--r--yt_dlp/extractor/mitele.py82
-rw-r--r--yt_dlp/extractor/mixch.py81
-rw-r--r--yt_dlp/extractor/mixcloud.py379
-rw-r--r--yt_dlp/extractor/mlb.py379
-rw-r--r--yt_dlp/extractor/mlssoccer.py114
-rw-r--r--yt_dlp/extractor/mocha.py64
-rw-r--r--yt_dlp/extractor/mojvideo.py52
-rw-r--r--yt_dlp/extractor/monstercat.py77
-rw-r--r--yt_dlp/extractor/motherless.py297
-rw-r--r--yt_dlp/extractor/motorsport.py52
-rw-r--r--yt_dlp/extractor/moviepilot.py97
-rw-r--r--yt_dlp/extractor/moview.py43
-rw-r--r--yt_dlp/extractor/moviezine.py38
-rw-r--r--yt_dlp/extractor/movingimage.py50
-rw-r--r--yt_dlp/extractor/msn.py168
-rw-r--r--yt_dlp/extractor/mtv.py654
-rw-r--r--yt_dlp/extractor/muenchentv.py72
-rw-r--r--yt_dlp/extractor/murrtube.py164
-rw-r--r--yt_dlp/extractor/museai.py112
-rw-r--r--yt_dlp/extractor/musescore.py64
-rw-r--r--yt_dlp/extractor/musicdex.py172
-rw-r--r--yt_dlp/extractor/mx3.py171
-rw-r--r--yt_dlp/extractor/mxplayer.py241
-rw-r--r--yt_dlp/extractor/myspace.py195
-rw-r--r--yt_dlp/extractor/myspass.py92
-rw-r--r--yt_dlp/extractor/myvideoge.py81
-rw-r--r--yt_dlp/extractor/myvidster.py27
-rw-r--r--yt_dlp/extractor/mzaalo.py95
-rw-r--r--yt_dlp/extractor/n1.py163
-rw-r--r--yt_dlp/extractor/nate.py120
-rw-r--r--yt_dlp/extractor/nationalgeographic.py83
-rw-r--r--yt_dlp/extractor/naver.py404
-rw-r--r--yt_dlp/extractor/nba.py419
-rw-r--r--yt_dlp/extractor/nbc.py851
-rw-r--r--yt_dlp/extractor/ndr.py471
-rw-r--r--yt_dlp/extractor/ndtv.py107
-rw-r--r--yt_dlp/extractor/nebula.py468
-rw-r--r--yt_dlp/extractor/nekohacker.py213
-rw-r--r--yt_dlp/extractor/nerdcubed.py38
-rw-r--r--yt_dlp/extractor/neteasemusic.py615
-rw-r--r--yt_dlp/extractor/netverse.py281
-rw-r--r--yt_dlp/extractor/netzkino.py85
-rw-r--r--yt_dlp/extractor/newgrounds.py311
-rw-r--r--yt_dlp/extractor/newspicks.py53
-rw-r--r--yt_dlp/extractor/newsy.py47
-rw-r--r--yt_dlp/extractor/nextmedia.py237
-rw-r--r--yt_dlp/extractor/nexx.py525
-rw-r--r--yt_dlp/extractor/nfb.py300
-rw-r--r--yt_dlp/extractor/nfhsnetwork.py141
-rw-r--r--yt_dlp/extractor/nfl.py373
-rw-r--r--yt_dlp/extractor/nhk.py708
-rw-r--r--yt_dlp/extractor/nhl.py123
-rw-r--r--yt_dlp/extractor/nick.py224
-rw-r--r--yt_dlp/extractor/niconico.py1061
-rw-r--r--yt_dlp/extractor/niconicochannelplus.py426
-rw-r--r--yt_dlp/extractor/ninaprotocol.py225
-rw-r--r--yt_dlp/extractor/ninecninemedia.py130
-rw-r--r--yt_dlp/extractor/ninegag.py148
-rw-r--r--yt_dlp/extractor/ninenews.py72
-rw-r--r--yt_dlp/extractor/ninenow.py122
-rw-r--r--yt_dlp/extractor/nintendo.py131
-rw-r--r--yt_dlp/extractor/nitter.py360
-rw-r--r--yt_dlp/extractor/nobelprize.py59
-rw-r--r--yt_dlp/extractor/noice.py116
-rw-r--r--yt_dlp/extractor/nonktube.py36
-rw-r--r--yt_dlp/extractor/noodlemagazine.py80
-rw-r--r--yt_dlp/extractor/noovo.py101
-rw-r--r--yt_dlp/extractor/nosnl.py115
-rw-r--r--yt_dlp/extractor/nova.py307
-rw-r--r--yt_dlp/extractor/novaplay.py67
-rw-r--r--yt_dlp/extractor/nowness.py142
-rw-r--r--yt_dlp/extractor/noz.py83
-rw-r--r--yt_dlp/extractor/npo.py612
-rw-r--r--yt_dlp/extractor/npr.py132
-rw-r--r--yt_dlp/extractor/nrk.py875
-rw-r--r--yt_dlp/extractor/nrl.py27
-rw-r--r--yt_dlp/extractor/ntvcojp.py55
-rw-r--r--yt_dlp/extractor/ntvde.py83
-rw-r--r--yt_dlp/extractor/ntvru.py142
-rw-r--r--yt_dlp/extractor/nubilesporn.py99
-rw-r--r--yt_dlp/extractor/nuevo.py36
-rw-r--r--yt_dlp/extractor/nuum.py199
-rw-r--r--yt_dlp/extractor/nuvid.py99
-rw-r--r--yt_dlp/extractor/nytimes.py420
-rw-r--r--yt_dlp/extractor/nzherald.py123
-rw-r--r--yt_dlp/extractor/nzonscreen.py93
-rw-r--r--yt_dlp/extractor/nzz.py40
-rw-r--r--yt_dlp/extractor/odkmedia.py105
-rw-r--r--yt_dlp/extractor/odnoklassniki.py464
-rw-r--r--yt_dlp/extractor/oftv.py54
-rw-r--r--yt_dlp/extractor/oktoberfesttv.py44
-rw-r--r--yt_dlp/extractor/olympics.py65
-rw-r--r--yt_dlp/extractor/on24.py87
-rw-r--r--yt_dlp/extractor/once.py40
-rw-r--r--yt_dlp/extractor/ondemandkorea.py169
-rw-r--r--yt_dlp/extractor/onefootball.py51
-rw-r--r--yt_dlp/extractor/onenewsnz.py111
-rw-r--r--yt_dlp/extractor/oneplace.py43
-rw-r--r--yt_dlp/extractor/onet.py259
-rw-r--r--yt_dlp/extractor/onionstudios.py42
-rw-r--r--yt_dlp/extractor/opencast.py183
-rw-r--r--yt_dlp/extractor/openload.py243
-rw-r--r--yt_dlp/extractor/openrec.py151
-rw-r--r--yt_dlp/extractor/ora.py71
-rw-r--r--yt_dlp/extractor/orf.py630
-rw-r--r--yt_dlp/extractor/outsidetv.py25
-rw-r--r--yt_dlp/extractor/owncloud.py80
-rw-r--r--yt_dlp/extractor/packtpub.py155
-rw-r--r--yt_dlp/extractor/palcomp3.py143
-rw-r--r--yt_dlp/extractor/panopto.py600
-rw-r--r--yt_dlp/extractor/paramountplus.py201
-rw-r--r--yt_dlp/extractor/parler.py91
-rw-r--r--yt_dlp/extractor/parlview.py64
-rw-r--r--yt_dlp/extractor/patreon.py454
-rw-r--r--yt_dlp/extractor/pbs.py757
-rw-r--r--yt_dlp/extractor/pearvideo.py68
-rw-r--r--yt_dlp/extractor/peekvids.py188
-rw-r--r--yt_dlp/extractor/peertube.py1647
-rw-r--r--yt_dlp/extractor/peertv.py52
-rw-r--r--yt_dlp/extractor/peloton.py215
-rw-r--r--yt_dlp/extractor/performgroup.py77
-rw-r--r--yt_dlp/extractor/periscope.py188
-rw-r--r--yt_dlp/extractor/pgatour.py47
-rw-r--r--yt_dlp/extractor/philharmoniedeparis.py97
-rw-r--r--yt_dlp/extractor/phoenix.py130
-rw-r--r--yt_dlp/extractor/photobucket.py43
-rw-r--r--yt_dlp/extractor/piapro.py121
-rw-r--r--yt_dlp/extractor/piaulizaportal.py70
-rw-r--r--yt_dlp/extractor/picarto.py152
-rw-r--r--yt_dlp/extractor/piksel.py174
-rw-r--r--yt_dlp/extractor/pinkbike.py93
-rw-r--r--yt_dlp/extractor/pinterest.py248
-rw-r--r--yt_dlp/extractor/pixivsketch.py118
-rw-r--r--yt_dlp/extractor/pladform.py135
-rw-r--r--yt_dlp/extractor/planetmarathi.py71
-rw-r--r--yt_dlp/extractor/platzi.py213
-rw-r--r--yt_dlp/extractor/playplustv.py100
-rw-r--r--yt_dlp/extractor/playsuisse.py234
-rw-r--r--yt_dlp/extractor/playtvak.py185
-rw-r--r--yt_dlp/extractor/playwire.py72
-rw-r--r--yt_dlp/extractor/pluralsight.py491
-rw-r--r--yt_dlp/extractor/plutotv.py195
-rw-r--r--yt_dlp/extractor/podbayfm.py75
-rw-r--r--yt_dlp/extractor/podchaser.py97
-rw-r--r--yt_dlp/extractor/podomatic.py74
-rw-r--r--yt_dlp/extractor/pokemon.py136
-rw-r--r--yt_dlp/extractor/pokergo.py106
-rw-r--r--yt_dlp/extractor/polsatgo.py86
-rw-r--r--yt_dlp/extractor/polskieradio.py610
-rw-r--r--yt_dlp/extractor/popcorntimes.py91
-rw-r--r--yt_dlp/extractor/popcorntv.py72
-rw-r--r--yt_dlp/extractor/porn91.py95
-rw-r--r--yt_dlp/extractor/pornbox.py113
-rw-r--r--yt_dlp/extractor/pornflip.py77
-rw-r--r--yt_dlp/extractor/pornhub.py825
-rw-r--r--yt_dlp/extractor/pornotube.py83
-rw-r--r--yt_dlp/extractor/pornovoisines.py103
-rw-r--r--yt_dlp/extractor/pornoxo.py55
-rw-r--r--yt_dlp/extractor/pr0gramm.py201
-rw-r--r--yt_dlp/extractor/prankcast.py137
-rw-r--r--yt_dlp/extractor/premiershiprugby.py39
-rw-r--r--yt_dlp/extractor/presstv.py69
-rw-r--r--yt_dlp/extractor/projectveritas.py52
-rw-r--r--yt_dlp/extractor/prosiebensat1.py496
-rw-r--r--yt_dlp/extractor/prx.py428
-rw-r--r--yt_dlp/extractor/puhutv.py233
-rw-r--r--yt_dlp/extractor/puls4.py51
-rw-r--r--yt_dlp/extractor/pyvideo.py70
-rw-r--r--yt_dlp/extractor/qdance.py171
-rw-r--r--yt_dlp/extractor/qingting.py47
-rw-r--r--yt_dlp/extractor/qqmusic.py365
-rw-r--r--yt_dlp/extractor/r7.py112
-rw-r--r--yt_dlp/extractor/radiko.py261
-rw-r--r--yt_dlp/extractor/radiocanada.py165
-rw-r--r--yt_dlp/extractor/radiocomercial.py154
-rw-r--r--yt_dlp/extractor/radiode.py50
-rw-r--r--yt_dlp/extractor/radiofrance.py473
-rw-r--r--yt_dlp/extractor/radiojavan.py81
-rw-r--r--yt_dlp/extractor/radiokapital.py97
-rw-r--r--yt_dlp/extractor/radiozet.py50
-rw-r--r--yt_dlp/extractor/radlive.py180
-rw-r--r--yt_dlp/extractor/rai.py816
-rw-r--r--yt_dlp/extractor/raywenderlich.py177
-rw-r--r--yt_dlp/extractor/rbgtum.py142
-rw-r--r--yt_dlp/extractor/rcs.py372
-rw-r--r--yt_dlp/extractor/rcti.py373
-rw-r--r--yt_dlp/extractor/rds.py68
-rw-r--r--yt_dlp/extractor/redbee.py380
-rw-r--r--yt_dlp/extractor/redbulltv.py224
-rw-r--r--yt_dlp/extractor/reddit.py353
-rw-r--r--yt_dlp/extractor/redge.py135
-rw-r--r--yt_dlp/extractor/redgifs.py260
-rw-r--r--yt_dlp/extractor/redtube.py144
-rw-r--r--yt_dlp/extractor/rentv.py104
-rw-r--r--yt_dlp/extractor/restudy.py41
-rw-r--r--yt_dlp/extractor/reuters.py66
-rw-r--r--yt_dlp/extractor/reverbnation.py51
-rw-r--r--yt_dlp/extractor/rheinmaintv.py94
-rw-r--r--yt_dlp/extractor/ridehome.py96
-rw-r--r--yt_dlp/extractor/rinsefm.py89
-rw-r--r--yt_dlp/extractor/rmcdecouverte.py71
-rw-r--r--yt_dlp/extractor/rockstargames.py65
-rw-r--r--yt_dlp/extractor/rokfin.py455
-rw-r--r--yt_dlp/extractor/roosterteeth.py352
-rw-r--r--yt_dlp/extractor/rottentomatoes.py80
-rw-r--r--yt_dlp/extractor/rozhlas.py363
-rw-r--r--yt_dlp/extractor/rte.py162
-rw-r--r--yt_dlp/extractor/rtl2.py95
-rw-r--r--yt_dlp/extractor/rtlnl.py294
-rw-r--r--yt_dlp/extractor/rtnews.py196
-rw-r--r--yt_dlp/extractor/rtp.py97
-rw-r--r--yt_dlp/extractor/rtrfm.py65
-rw-r--r--yt_dlp/extractor/rts.py232
-rw-r--r--yt_dlp/extractor/rtvcplay.py285
-rw-r--r--yt_dlp/extractor/rtve.py344
-rw-r--r--yt_dlp/extractor/rtvs.py85
-rw-r--r--yt_dlp/extractor/rtvslo.py166
-rw-r--r--yt_dlp/extractor/rudovideo.py135
-rw-r--r--yt_dlp/extractor/rule34video.py123
-rw-r--r--yt_dlp/extractor/rumble.py390
-rw-r--r--yt_dlp/extractor/rutube.py365
-rw-r--r--yt_dlp/extractor/rutv.py203
-rw-r--r--yt_dlp/extractor/ruutu.py262
-rw-r--r--yt_dlp/extractor/ruv.py186
-rw-r--r--yt_dlp/extractor/s4c.py103
-rw-r--r--yt_dlp/extractor/safari.py259
-rw-r--r--yt_dlp/extractor/saitosan.py75
-rw-r--r--yt_dlp/extractor/samplefocus.py97
-rw-r--r--yt_dlp/extractor/sapo.py114
-rw-r--r--yt_dlp/extractor/sbs.py156
-rw-r--r--yt_dlp/extractor/sbscokr.py200
-rw-r--r--yt_dlp/extractor/screen9.py62
-rw-r--r--yt_dlp/extractor/screencast.py117
-rw-r--r--yt_dlp/extractor/screencastify.py70
-rw-r--r--yt_dlp/extractor/screencastomatic.py72
-rw-r--r--yt_dlp/extractor/scrippsnetworks.py155
-rw-r--r--yt_dlp/extractor/scrolller.py102
-rw-r--r--yt_dlp/extractor/scte.py137
-rw-r--r--yt_dlp/extractor/sejmpl.py218
-rw-r--r--yt_dlp/extractor/senalcolombia.py32
-rw-r--r--yt_dlp/extractor/senategov.py200
-rw-r--r--yt_dlp/extractor/sendtonews.py105
-rw-r--r--yt_dlp/extractor/servus.py135
-rw-r--r--yt_dlp/extractor/sevenplus.py132
-rw-r--r--yt_dlp/extractor/sexu.py61
-rw-r--r--yt_dlp/extractor/seznamzpravy.py157
-rw-r--r--yt_dlp/extractor/shahid.py217
-rw-r--r--yt_dlp/extractor/sharevideos.py6
-rw-r--r--yt_dlp/extractor/shemaroome.py102
-rw-r--r--yt_dlp/extractor/showroomlive.py80
-rw-r--r--yt_dlp/extractor/sibnet.py17
-rw-r--r--yt_dlp/extractor/simplecast.py151
-rw-r--r--yt_dlp/extractor/sina.py109
-rw-r--r--yt_dlp/extractor/sixplay.py122
-rw-r--r--yt_dlp/extractor/skeb.py140
-rw-r--r--yt_dlp/extractor/sky.py135
-rw-r--r--yt_dlp/extractor/skyit.py227
-rw-r--r--yt_dlp/extractor/skylinewebcams.py40
-rw-r--r--yt_dlp/extractor/skynewsarabia.py116
-rw-r--r--yt_dlp/extractor/skynewsau.py43
-rw-r--r--yt_dlp/extractor/slideshare.py53
-rw-r--r--yt_dlp/extractor/slideslive.py554
-rw-r--r--yt_dlp/extractor/slutload.py63
-rw-r--r--yt_dlp/extractor/smotrim.py65
-rw-r--r--yt_dlp/extractor/snotr.py68
-rw-r--r--yt_dlp/extractor/sohu.py293
-rw-r--r--yt_dlp/extractor/sonyliv.py220
-rw-r--r--yt_dlp/extractor/soundcloud.py948
-rw-r--r--yt_dlp/extractor/soundgasm.py74
-rw-r--r--yt_dlp/extractor/southpark.py188
-rw-r--r--yt_dlp/extractor/sovietscloset.py207
-rw-r--r--yt_dlp/extractor/spankbang.py195
-rw-r--r--yt_dlp/extractor/spiegel.py51
-rw-r--r--yt_dlp/extractor/spike.py46
-rw-r--r--yt_dlp/extractor/sport5.py86
-rw-r--r--yt_dlp/extractor/sportbox.py88
-rw-r--r--yt_dlp/extractor/sportdeutschland.py142
-rw-r--r--yt_dlp/extractor/spotify.py167
-rw-r--r--yt_dlp/extractor/spreaker.py173
-rw-r--r--yt_dlp/extractor/springboardplatform.py113
-rw-r--r--yt_dlp/extractor/sprout.py61
-rw-r--r--yt_dlp/extractor/srgssr.py247
-rw-r--r--yt_dlp/extractor/srmediathek.py57
-rw-r--r--yt_dlp/extractor/stacommu.py231
-rw-r--r--yt_dlp/extractor/stageplus.py515
-rw-r--r--yt_dlp/extractor/stanfordoc.py89
-rw-r--r--yt_dlp/extractor/startrek.py76
-rw-r--r--yt_dlp/extractor/startv.py100
-rw-r--r--yt_dlp/extractor/steam.py170
-rw-r--r--yt_dlp/extractor/stitcher.py142
-rw-r--r--yt_dlp/extractor/storyfire.py133
-rw-r--r--yt_dlp/extractor/streamable.py103
-rw-r--r--yt_dlp/extractor/streamcz.py122
-rw-r--r--yt_dlp/extractor/streetvoice.py97
-rw-r--r--yt_dlp/extractor/stretchinternet.py35
-rw-r--r--yt_dlp/extractor/stripchat.py66
-rw-r--r--yt_dlp/extractor/stv.py89
-rw-r--r--yt_dlp/extractor/substack.py108
-rw-r--r--yt_dlp/extractor/sunporno.py75
-rw-r--r--yt_dlp/extractor/sverigesradio.py149
-rw-r--r--yt_dlp/extractor/svt.py489
-rw-r--r--yt_dlp/extractor/swearnet.py79
-rw-r--r--yt_dlp/extractor/syfy.py58
-rw-r--r--yt_dlp/extractor/syvdk.py33
-rw-r--r--yt_dlp/extractor/sztvhu.py38
-rw-r--r--yt_dlp/extractor/tagesschau.py164
-rw-r--r--yt_dlp/extractor/tass.py59
-rw-r--r--yt_dlp/extractor/tbs.py89
-rw-r--r--yt_dlp/extractor/tbsjp.py152
-rw-r--r--yt_dlp/extractor/teachable.py296
-rw-r--r--yt_dlp/extractor/teachertube.py126
-rw-r--r--yt_dlp/extractor/teachingchannel.py32
-rw-r--r--yt_dlp/extractor/teamcoco.py280
-rw-r--r--yt_dlp/extractor/teamtreehouse.py134
-rw-r--r--yt_dlp/extractor/ted.py236
-rw-r--r--yt_dlp/extractor/tele13.py84
-rw-r--r--yt_dlp/extractor/tele5.py89
-rw-r--r--yt_dlp/extractor/telebruxelles.py72
-rw-r--r--yt_dlp/extractor/telecaribe.py91
-rw-r--r--yt_dlp/extractor/telecinco.py146
-rw-r--r--yt_dlp/extractor/telegraaf.py86
-rw-r--r--yt_dlp/extractor/telegram.py136
-rw-r--r--yt_dlp/extractor/telemb.py75
-rw-r--r--yt_dlp/extractor/telemundo.py50
-rw-r--r--yt_dlp/extractor/telequebec.py237
-rw-r--r--yt_dlp/extractor/teletask.py52
-rw-r--r--yt_dlp/extractor/telewebion.py133
-rw-r--r--yt_dlp/extractor/tempo.py114
-rw-r--r--yt_dlp/extractor/tencent.py490
-rw-r--r--yt_dlp/extractor/tennistv.py155
-rw-r--r--yt_dlp/extractor/tenplay.py170
-rw-r--r--yt_dlp/extractor/testurl.py50
-rw-r--r--yt_dlp/extractor/tf1.py101
-rw-r--r--yt_dlp/extractor/tfo.py48
-rw-r--r--yt_dlp/extractor/theguardian.py135
-rw-r--r--yt_dlp/extractor/theholetv.py35
-rw-r--r--yt_dlp/extractor/theintercept.py46
-rw-r--r--yt_dlp/extractor/theplatform.py429
-rw-r--r--yt_dlp/extractor/thestar.py33
-rw-r--r--yt_dlp/extractor/thesun.py43
-rw-r--r--yt_dlp/extractor/theweatherchannel.py99
-rw-r--r--yt_dlp/extractor/thisamericanlife.py38
-rw-r--r--yt_dlp/extractor/thisoldhouse.py104
-rw-r--r--yt_dlp/extractor/thisvid.py226
-rw-r--r--yt_dlp/extractor/threeqsdn.py156
-rw-r--r--yt_dlp/extractor/threespeak.py93
-rw-r--r--yt_dlp/extractor/tiktok.py1317
-rw-r--r--yt_dlp/extractor/tmz.py193
-rw-r--r--yt_dlp/extractor/tnaflix.py336
-rw-r--r--yt_dlp/extractor/toggle.py228
-rw-r--r--yt_dlp/extractor/toggo.py82
-rw-r--r--yt_dlp/extractor/tonline.py53
-rw-r--r--yt_dlp/extractor/toongoggles.py76
-rw-r--r--yt_dlp/extractor/toutv.py87
-rw-r--r--yt_dlp/extractor/toypics.py89
-rw-r--r--yt_dlp/extractor/traileraddict.py61
-rw-r--r--yt_dlp/extractor/triller.py329
-rw-r--r--yt_dlp/extractor/trovo.py342
-rw-r--r--yt_dlp/extractor/trtcocuk.py48
-rw-r--r--yt_dlp/extractor/trtworld.py101
-rw-r--r--yt_dlp/extractor/trueid.py136
-rw-r--r--yt_dlp/extractor/trunews.py32
-rw-r--r--yt_dlp/extractor/truth.py68
-rw-r--r--yt_dlp/extractor/trutv.py70
-rw-r--r--yt_dlp/extractor/tube8.py170
-rw-r--r--yt_dlp/extractor/tubetugraz.py252
-rw-r--r--yt_dlp/extractor/tubitv.py168
-rw-r--r--yt_dlp/extractor/tumblr.py387
-rw-r--r--yt_dlp/extractor/tunein.py234
-rw-r--r--yt_dlp/extractor/turner.py256
-rw-r--r--yt_dlp/extractor/tv2.py324
-rw-r--r--yt_dlp/extractor/tv24ua.py78
-rw-r--r--yt_dlp/extractor/tv2dk.py172
-rw-r--r--yt_dlp/extractor/tv2hu.py104
-rw-r--r--yt_dlp/extractor/tv4.py149
-rw-r--r--yt_dlp/extractor/tv5mondeplus.py190
-rw-r--r--yt_dlp/extractor/tv5unis.py116
-rw-r--r--yt_dlp/extractor/tva.py85
-rw-r--r--yt_dlp/extractor/tvanouvelles.py62
-rw-r--r--yt_dlp/extractor/tvc.py97
-rw-r--r--yt_dlp/extractor/tver.py103
-rw-r--r--yt_dlp/extractor/tvigle.py133
-rw-r--r--yt_dlp/extractor/tviplayer.py78
-rw-r--r--yt_dlp/extractor/tvland.py37
-rw-r--r--yt_dlp/extractor/tvn24.py100
-rw-r--r--yt_dlp/extractor/tvnoe.py46
-rw-r--r--yt_dlp/extractor/tvopengr.py116
-rw-r--r--yt_dlp/extractor/tvp.py642
-rw-r--r--yt_dlp/extractor/tvplay.py306
-rw-r--r--yt_dlp/extractor/tvplayer.py80
-rw-r--r--yt_dlp/extractor/tweakers.py59
-rw-r--r--yt_dlp/extractor/twentymin.py80
-rw-r--r--yt_dlp/extractor/twentythreevideo.py76
-rw-r--r--yt_dlp/extractor/twitcasting.py306
-rw-r--r--yt_dlp/extractor/twitch.py1211
-rw-r--r--yt_dlp/extractor/twitter.py1875
-rw-r--r--yt_dlp/extractor/txxx.py438
-rw-r--r--yt_dlp/extractor/udemy.py474
-rw-r--r--yt_dlp/extractor/udn.py98
-rw-r--r--yt_dlp/extractor/ufctv.py13
-rw-r--r--yt_dlp/extractor/ukcolumn.py71
-rw-r--r--yt_dlp/extractor/uktvplay.py36
-rw-r--r--yt_dlp/extractor/umg.py98
-rw-r--r--yt_dlp/extractor/unistra.py64
-rw-r--r--yt_dlp/extractor/unity.py31
-rw-r--r--yt_dlp/extractor/unsupported.py189
-rw-r--r--yt_dlp/extractor/uol.py138
-rw-r--r--yt_dlp/extractor/uplynk.py88
-rw-r--r--yt_dlp/extractor/urort.py60
-rw-r--r--yt_dlp/extractor/urplay.py164
-rw-r--r--yt_dlp/extractor/usanetwork.py21
-rw-r--r--yt_dlp/extractor/usatoday.py60
-rw-r--r--yt_dlp/extractor/ustream.py275
-rw-r--r--yt_dlp/extractor/ustudio.py119
-rw-r--r--yt_dlp/extractor/utreon.py98
-rw-r--r--yt_dlp/extractor/varzesh3.py73
-rw-r--r--yt_dlp/extractor/vbox7.py97
-rw-r--r--yt_dlp/extractor/veo.py76
-rw-r--r--yt_dlp/extractor/veoh.py188
-rw-r--r--yt_dlp/extractor/vesti.py119
-rw-r--r--yt_dlp/extractor/vevo.py353
-rw-r--r--yt_dlp/extractor/vgtv.py311
-rw-r--r--yt_dlp/extractor/vh1.py33
-rw-r--r--yt_dlp/extractor/vice.py313
-rw-r--r--yt_dlp/extractor/viddler.py135
-rw-r--r--yt_dlp/extractor/videa.py188
-rw-r--r--yt_dlp/extractor/videocampus_sachsen.py253
-rw-r--r--yt_dlp/extractor/videodetective.py27
-rw-r--r--yt_dlp/extractor/videofyme.py51
-rw-r--r--yt_dlp/extractor/videoken.py337
-rw-r--r--yt_dlp/extractor/videomore.py307
-rw-r--r--yt_dlp/extractor/videopress.py89
-rw-r--r--yt_dlp/extractor/vidio.py309
-rw-r--r--yt_dlp/extractor/vidlii.py154
-rw-r--r--yt_dlp/extractor/vidly.py83
-rw-r--r--yt_dlp/extractor/viewlift.py362
-rw-r--r--yt_dlp/extractor/viidea.py199
-rw-r--r--yt_dlp/extractor/viki.py346
-rw-r--r--yt_dlp/extractor/vimeo.py1455
-rw-r--r--yt_dlp/extractor/vimm.py66
-rw-r--r--yt_dlp/extractor/vine.py151
-rw-r--r--yt_dlp/extractor/viously.py60
-rw-r--r--yt_dlp/extractor/viqeo.py87
-rw-r--r--yt_dlp/extractor/viu.py542
-rw-r--r--yt_dlp/extractor/vk.py842
-rw-r--r--yt_dlp/extractor/vocaroo.py63
-rw-r--r--yt_dlp/extractor/vodpl.py29
-rw-r--r--yt_dlp/extractor/vodplatform.py37
-rw-r--r--yt_dlp/extractor/voicy.py146
-rw-r--r--yt_dlp/extractor/volejtv.py40
-rw-r--r--yt_dlp/extractor/voot.py212
-rw-r--r--yt_dlp/extractor/voxmedia.py215
-rw-r--r--yt_dlp/extractor/vrt.py427
-rw-r--r--yt_dlp/extractor/vtm.py60
-rw-r--r--yt_dlp/extractor/vuclip.py68
-rw-r--r--yt_dlp/extractor/vvvvid.py336
-rw-r--r--yt_dlp/extractor/walla.py82
-rw-r--r--yt_dlp/extractor/washingtonpost.py123
-rw-r--r--yt_dlp/extractor/wat.py119
-rw-r--r--yt_dlp/extractor/wdr.py384
-rw-r--r--yt_dlp/extractor/webcamerapl.py44
-rw-r--r--yt_dlp/extractor/webcaster.py92
-rw-r--r--yt_dlp/extractor/webofstories.py155
-rw-r--r--yt_dlp/extractor/weibo.py251
-rw-r--r--yt_dlp/extractor/weiqitv.py50
-rw-r--r--yt_dlp/extractor/weverse.py608
-rw-r--r--yt_dlp/extractor/wevidi.py108
-rw-r--r--yt_dlp/extractor/weyyak.py86
-rw-r--r--yt_dlp/extractor/whowatch.py96
-rw-r--r--yt_dlp/extractor/whyp.py50
-rw-r--r--yt_dlp/extractor/wikimedia.py55
-rw-r--r--yt_dlp/extractor/wimbledon.py61
-rw-r--r--yt_dlp/extractor/wimtv.py150
-rw-r--r--yt_dlp/extractor/wistia.py394
-rw-r--r--yt_dlp/extractor/wordpress.py154
-rw-r--r--yt_dlp/extractor/worldstarhiphop.py38
-rw-r--r--yt_dlp/extractor/wppilot.py173
-rw-r--r--yt_dlp/extractor/wrestleuniverse.py304
-rw-r--r--yt_dlp/extractor/wsj.py120
-rw-r--r--yt_dlp/extractor/wwe.py138
-rw-r--r--yt_dlp/extractor/wykop.py268
-rw-r--r--yt_dlp/extractor/xanimu.py51
-rw-r--r--yt_dlp/extractor/xboxclips.py62
-rw-r--r--yt_dlp/extractor/xfileshare.py198
-rw-r--r--yt_dlp/extractor/xhamster.py465
-rw-r--r--yt_dlp/extractor/ximalaya.py167
-rw-r--r--yt_dlp/extractor/xinpianchang.py92
-rw-r--r--yt_dlp/extractor/xminus.py77
-rw-r--r--yt_dlp/extractor/xnxx.py83
-rw-r--r--yt_dlp/extractor/xstream.py115
-rw-r--r--yt_dlp/extractor/xvideos.py180
-rw-r--r--yt_dlp/extractor/xxxymovies.py77
-rw-r--r--yt_dlp/extractor/yahoo.py430
-rw-r--r--yt_dlp/extractor/yandexdisk.py142
-rw-r--r--yt_dlp/extractor/yandexmusic.py454
-rw-r--r--yt_dlp/extractor/yandexvideo.py390
-rw-r--r--yt_dlp/extractor/yapfiles.py90
-rw-r--r--yt_dlp/extractor/yappy.py128
-rw-r--r--yt_dlp/extractor/yle_areena.py134
-rw-r--r--yt_dlp/extractor/youjizz.py90
-rw-r--r--yt_dlp/extractor/youku.py290
-rw-r--r--yt_dlp/extractor/younow.py201
-rw-r--r--yt_dlp/extractor/youporn.py198
-rw-r--r--yt_dlp/extractor/yourporn.py65
-rw-r--r--yt_dlp/extractor/yourupload.py43
-rw-r--r--yt_dlp/extractor/youtube.py7387
-rw-r--r--yt_dlp/extractor/zaiko.py139
-rw-r--r--yt_dlp/extractor/zapiks.py106
-rw-r--r--yt_dlp/extractor/zattoo.py865
-rw-r--r--yt_dlp/extractor/zdf.py442
-rw-r--r--yt_dlp/extractor/zee5.py270
-rw-r--r--yt_dlp/extractor/zeenews.py59
-rw-r--r--yt_dlp/extractor/zenporn.py118
-rw-r--r--yt_dlp/extractor/zetland.py71
-rw-r--r--yt_dlp/extractor/zhihu.py65
-rw-r--r--yt_dlp/extractor/zingmp3.py628
-rw-r--r--yt_dlp/extractor/zoom.py164
-rw-r--r--yt_dlp/extractor/zype.py135
-rw-r--r--yt_dlp/jsinterp.py853
-rw-r--r--yt_dlp/minicurses.py182
-rw-r--r--yt_dlp/networking/__init__.py30
-rw-r--r--yt_dlp/networking/_helper.py283
-rw-r--r--yt_dlp/networking/_requests.py408
-rw-r--r--yt_dlp/networking/_urllib.py422
-rw-r--r--yt_dlp/networking/_websockets.py173
-rw-r--r--yt_dlp/networking/common.py565
-rw-r--r--yt_dlp/networking/exceptions.py103
-rw-r--r--yt_dlp/networking/websocket.py23
-rw-r--r--yt_dlp/options.py1920
-rw-r--r--yt_dlp/plugins.py176
-rw-r--r--yt_dlp/postprocessor/__init__.py47
-rw-r--r--yt_dlp/postprocessor/common.py215
-rw-r--r--yt_dlp/postprocessor/embedthumbnail.py227
-rw-r--r--yt_dlp/postprocessor/exec.py41
-rw-r--r--yt_dlp/postprocessor/ffmpeg.py1192
-rw-r--r--yt_dlp/postprocessor/metadataparser.py125
-rw-r--r--yt_dlp/postprocessor/modify_chapters.py336
-rw-r--r--yt_dlp/postprocessor/movefilesafterdownload.py53
-rw-r--r--yt_dlp/postprocessor/sponskrub.py98
-rw-r--r--yt_dlp/postprocessor/sponsorblock.py104
-rw-r--r--yt_dlp/postprocessor/xattrpp.py63
-rw-r--r--yt_dlp/socks.py274
-rw-r--r--yt_dlp/update.py619
-rw-r--r--yt_dlp/utils/__init__.py10
-rw-r--r--yt_dlp/utils/_deprecated.py39
-rw-r--r--yt_dlp/utils/_legacy.py315
-rw-r--r--yt_dlp/utils/_utils.py5445
-rw-r--r--yt_dlp/utils/networking.py164
-rw-r--r--yt_dlp/utils/progress.py109
-rw-r--r--yt_dlp/utils/traversal.py276
-rw-r--r--yt_dlp/version.py15
-rw-r--r--yt_dlp/webvtt.py399
1205 files changed, 244386 insertions, 0 deletions
diff --git a/.editorconfig b/.editorconfig
new file mode 100644
index 0000000..40c19fa
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,8 @@
+root = true
+
+[**.py]
+charset = utf-8
+indent_size = 4
+indent_style = space
+trim_trailing_whitespace = true
+insert_final_newline = true
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..f3e1df5
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,6 @@
+* text=auto
+
+Makefile* text whitespace=-tab-in-indent
+*.sh text eol=lf
+*.md diff=markdown
+*.py diff=python
diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml
new file mode 100644
index 0000000..04de087
--- /dev/null
+++ b/.github/FUNDING.yml
@@ -0,0 +1,13 @@
+# These are supported funding model platforms
+
+github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
+patreon: # Replace with a single Patreon username
+open_collective: # Replace with a single Open Collective username
+ko_fi: # Replace with a single Ko-fi username
+tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
+community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
+liberapay: # Replace with a single Liberapay username
+issuehunt: # Replace with a single IssueHunt username
+otechie: # Replace with a single Otechie username
+
+custom: ['https://github.com/yt-dlp/yt-dlp/blob/master/Collaborators.md#collaborators']
diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.yml b/.github/ISSUE_TEMPLATE/1_broken_site.yml
new file mode 100644
index 0000000..5df13ad
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/1_broken_site.yml
@@ -0,0 +1,79 @@
+name: Broken site support
+description: Report issue with yt-dlp on a supported site
+labels: [triage, site-bug]
+body:
+ - type: checkboxes
+ attributes:
+ label: DO NOT REMOVE OR SKIP THE ISSUE TEMPLATE
+ description: Fill all fields even if you think it is irrelevant for the issue
+ options:
+ - label: I understand that I will be **blocked** if I *intentionally* remove or skip any mandatory\* field
+ required: true
+ - type: checkboxes
+ id: checklist
+ attributes:
+ label: Checklist
+ description: |
+ Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
+ options:
+ - label: I'm reporting that yt-dlp is broken on a **supported** site
+ required: true
+ - label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels))
+ required: true
+ - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
+ required: true
+ - label: I've checked that all URLs and arguments with special characters are [properly quoted or escaped](https://github.com/yt-dlp/yt-dlp/wiki/FAQ#video-url-contains-an-ampersand--and-im-getting-some-strange-output-1-2839-or-v-is-not-recognized-as-an-internal-or-external-command)
+ required: true
+ - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates
+ required: true
+ - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue)
+ required: true
+ - label: I've read about [sharing account credentials](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#are-you-willing-to-share-account-details-if-needed) and I'm willing to share it if required
+ - type: input
+ id: region
+ attributes:
+ label: Region
+ description: Enter the country/region that the site is accessible from
+ placeholder: India
+ - type: textarea
+ id: description
+ attributes:
+ label: Provide a description that is worded well enough to be understood
+ description: See [is-the-description-of-the-issue-itself-sufficient](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-description-of-the-issue-itself-sufficient)
+ placeholder: Provide any additional information, any suggested solutions, and as much context and examples as possible
+ validations:
+ required: true
+ - type: checkboxes
+ id: verbose
+ attributes:
+ label: Provide verbose output that clearly demonstrates the problem
+ options:
+ - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU <your command line>`)
+ required: true
+ - label: "If using API, add `'verbose': True` to `YoutubeDL` params instead"
+ required: false
+ - label: Copy the WHOLE output (starting with `[debug] Command-line config`) and insert it below
+ required: true
+ - type: textarea
+ id: log
+ attributes:
+ label: Complete Verbose Output
+ description: |
+ It should start like this:
+ placeholder: |
+ [debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc']
+ [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
+ [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe)
+ [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
+ [debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1
+ [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
+ [debug] Proxy map: {}
+ [debug] Request Handlers: urllib, requests
+ [debug] Loaded 1893 extractors
+ [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest
+ yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds)
+ [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc
+ <more lines>
+ render: shell
+ validations:
+ required: true
diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.yml b/.github/ISSUE_TEMPLATE/2_site_support_request.yml
new file mode 100644
index 0000000..644c87a
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/2_site_support_request.yml
@@ -0,0 +1,91 @@
+name: Site support request
+description: Request support for a new site
+labels: [triage, site-request]
+body:
+ - type: checkboxes
+ attributes:
+ label: DO NOT REMOVE OR SKIP THE ISSUE TEMPLATE
+ description: Fill all fields even if you think it is irrelevant for the issue
+ options:
+ - label: I understand that I will be **blocked** if I *intentionally* remove or skip any mandatory\* field
+ required: true
+ - type: checkboxes
+ id: checklist
+ attributes:
+ label: Checklist
+ description: |
+ Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
+ options:
+ - label: I'm reporting a new site support request
+ required: true
+ - label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels))
+ required: true
+ - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
+ required: true
+ - label: I've checked that none of provided URLs [violate any copyrights](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-website-primarily-used-for-piracy) or contain any [DRM](https://en.wikipedia.org/wiki/Digital_rights_management) to the best of my knowledge
+ required: true
+ - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates
+ required: true
+ - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue)
+ required: true
+ - label: I've read about [sharing account credentials](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#are-you-willing-to-share-account-details-if-needed) and am willing to share it if required
+ - type: input
+ id: region
+ attributes:
+ label: Region
+ description: Enter the country/region that the site is accessible from
+ placeholder: India
+ - type: textarea
+ id: example-urls
+ attributes:
+ label: Example URLs
+ description: |
+ Provide all kinds of example URLs for which support should be added
+ placeholder: |
+ - Single video: https://www.youtube.com/watch?v=BaW_jenozKc
+ - Single video: https://youtu.be/BaW_jenozKc
+ - Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc
+ validations:
+ required: true
+ - type: textarea
+ id: description
+ attributes:
+ label: Provide a description that is worded well enough to be understood
+ description: See [is-the-description-of-the-issue-itself-sufficient](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-description-of-the-issue-itself-sufficient)
+ placeholder: Provide any additional information, any suggested solutions, and as much context and examples as possible
+ validations:
+ required: true
+ - type: checkboxes
+ id: verbose
+ attributes:
+ label: Provide verbose output that clearly demonstrates the problem
+ options:
+ - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU <your command line>`)
+ required: true
+ - label: "If using API, add `'verbose': True` to `YoutubeDL` params instead"
+ required: false
+ - label: Copy the WHOLE output (starting with `[debug] Command-line config`) and insert it below
+ required: true
+ - type: textarea
+ id: log
+ attributes:
+ label: Complete Verbose Output
+ description: |
+ It should start like this:
+ placeholder: |
+ [debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc']
+ [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
+ [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe)
+ [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
+ [debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1
+ [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
+ [debug] Proxy map: {}
+ [debug] Request Handlers: urllib, requests
+ [debug] Loaded 1893 extractors
+ [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest
+ yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds)
+ [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc
+ <more lines>
+ render: shell
+ validations:
+ required: true
diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml
new file mode 100644
index 0000000..59d0474
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml
@@ -0,0 +1,87 @@
+name: Site feature request
+description: Request a new functionality for a supported site
+labels: [triage, site-enhancement]
+body:
+ - type: checkboxes
+ attributes:
+ label: DO NOT REMOVE OR SKIP THE ISSUE TEMPLATE
+ description: Fill all fields even if you think it is irrelevant for the issue
+ options:
+ - label: I understand that I will be **blocked** if I *intentionally* remove or skip any mandatory\* field
+ required: true
+ - type: checkboxes
+ id: checklist
+ attributes:
+ label: Checklist
+ description: |
+ Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
+ options:
+ - label: I'm requesting a site-specific feature
+ required: true
+ - label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels))
+ required: true
+ - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
+ required: true
+ - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates
+ required: true
+ - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue)
+ required: true
+ - label: I've read about [sharing account credentials](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#are-you-willing-to-share-account-details-if-needed) and I'm willing to share it if required
+ - type: input
+ id: region
+ attributes:
+ label: Region
+ description: Enter the country/region that the site is accessible from
+ placeholder: India
+ - type: textarea
+ id: example-urls
+ attributes:
+ label: Example URLs
+ description: |
+ Example URLs that can be used to demonstrate the requested feature
+ placeholder: |
+ https://www.youtube.com/watch?v=BaW_jenozKc
+ validations:
+ required: true
+ - type: textarea
+ id: description
+ attributes:
+ label: Provide a description that is worded well enough to be understood
+ description: See [is-the-description-of-the-issue-itself-sufficient](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-description-of-the-issue-itself-sufficient)
+ placeholder: Provide any additional information, any suggested solutions, and as much context and examples as possible
+ validations:
+ required: true
+ - type: checkboxes
+ id: verbose
+ attributes:
+ label: Provide verbose output that clearly demonstrates the problem
+ options:
+ - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU <your command line>`)
+ required: true
+ - label: "If using API, add `'verbose': True` to `YoutubeDL` params instead"
+ required: false
+ - label: Copy the WHOLE output (starting with `[debug] Command-line config`) and insert it below
+ required: true
+ - type: textarea
+ id: log
+ attributes:
+ label: Complete Verbose Output
+ description: |
+ It should start like this:
+ placeholder: |
+ [debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc']
+ [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
+ [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe)
+ [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
+ [debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1
+ [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
+ [debug] Proxy map: {}
+ [debug] Request Handlers: urllib, requests
+ [debug] Loaded 1893 extractors
+ [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest
+ yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds)
+ [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc
+ <more lines>
+ render: shell
+ validations:
+ required: true
diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.yml b/.github/ISSUE_TEMPLATE/4_bug_report.yml
new file mode 100644
index 0000000..e207396
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/4_bug_report.yml
@@ -0,0 +1,72 @@
+name: Core bug report
+description: Report a bug unrelated to any particular site or extractor
+labels: [triage, bug]
+body:
+ - type: checkboxes
+ attributes:
+ label: DO NOT REMOVE OR SKIP THE ISSUE TEMPLATE
+ description: Fill all fields even if you think it is irrelevant for the issue
+ options:
+ - label: I understand that I will be **blocked** if I *intentionally* remove or skip any mandatory\* field
+ required: true
+ - type: checkboxes
+ id: checklist
+ attributes:
+ label: Checklist
+ description: |
+ Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
+ options:
+ - label: I'm reporting a bug unrelated to a specific site
+ required: true
+ - label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels))
+ required: true
+ - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
+ required: true
+ - label: I've checked that all URLs and arguments with special characters are [properly quoted or escaped](https://github.com/yt-dlp/yt-dlp/wiki/FAQ#video-url-contains-an-ampersand--and-im-getting-some-strange-output-1-2839-or-v-is-not-recognized-as-an-internal-or-external-command)
+ required: true
+ - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates
+ required: true
+ - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue)
+ required: true
+ - type: textarea
+ id: description
+ attributes:
+ label: Provide a description that is worded well enough to be understood
+ description: See [is-the-description-of-the-issue-itself-sufficient](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-description-of-the-issue-itself-sufficient)
+ placeholder: Provide any additional information, any suggested solutions, and as much context and examples as possible
+ validations:
+ required: true
+ - type: checkboxes
+ id: verbose
+ attributes:
+ label: Provide verbose output that clearly demonstrates the problem
+ options:
+ - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU <your command line>`)
+ required: true
+ - label: "If using API, add `'verbose': True` to `YoutubeDL` params instead"
+ required: false
+ - label: Copy the WHOLE output (starting with `[debug] Command-line config`) and insert it below
+ required: true
+ - type: textarea
+ id: log
+ attributes:
+ label: Complete Verbose Output
+ description: |
+ It should start like this:
+ placeholder: |
+ [debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc']
+ [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
+ [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe)
+ [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
+ [debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1
+ [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
+ [debug] Proxy map: {}
+ [debug] Request Handlers: urllib, requests
+ [debug] Loaded 1893 extractors
+ [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest
+ yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds)
+ [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc
+ <more lines>
+ render: shell
+ validations:
+ required: true
diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.yml b/.github/ISSUE_TEMPLATE/5_feature_request.yml
new file mode 100644
index 0000000..e06db9c
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/5_feature_request.yml
@@ -0,0 +1,66 @@
+name: Feature request
+description: Request a new functionality unrelated to any particular site or extractor
+labels: [triage, enhancement]
+body:
+ - type: checkboxes
+ attributes:
+ label: DO NOT REMOVE OR SKIP THE ISSUE TEMPLATE
+ description: Fill all fields even if you think it is irrelevant for the issue
+ options:
+ - label: I understand that I will be **blocked** if I *intentionally* remove or skip any mandatory\* field
+ required: true
+ - type: checkboxes
+ id: checklist
+ attributes:
+ label: Checklist
+ description: |
+ Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
+ options:
+ - label: I'm requesting a feature unrelated to a specific site
+ required: true
+ - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme)
+ required: true
+ - label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels))
+ required: true
+ - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates
+ required: true
+ - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue)
+ required: true
+ - type: textarea
+ id: description
+ attributes:
+ label: Provide a description that is worded well enough to be understood
+ description: See [is-the-description-of-the-issue-itself-sufficient](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-description-of-the-issue-itself-sufficient)
+ placeholder: Provide any additional information, any suggested solutions, and as much context and examples as possible
+ validations:
+ required: true
+ - type: checkboxes
+ id: verbose
+ attributes:
+ label: Provide verbose output that clearly demonstrates the problem
+ options:
+ - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU <your command line>`)
+ - label: "If using API, add `'verbose': True` to `YoutubeDL` params instead"
+ required: false
+ - label: Copy the WHOLE output (starting with `[debug] Command-line config`) and insert it below
+ - type: textarea
+ id: log
+ attributes:
+ label: Complete Verbose Output
+ description: |
+ It should start like this:
+ placeholder: |
+ [debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc']
+ [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
+ [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe)
+ [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
+ [debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1
+ [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
+ [debug] Proxy map: {}
+ [debug] Request Handlers: urllib, requests
+ [debug] Loaded 1893 extractors
+ [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest
+ yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds)
+ [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc
+ <more lines>
+ render: shell
diff --git a/.github/ISSUE_TEMPLATE/6_question.yml b/.github/ISSUE_TEMPLATE/6_question.yml
new file mode 100644
index 0000000..571223a
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/6_question.yml
@@ -0,0 +1,72 @@
+name: Ask question
+description: Ask yt-dlp related question
+labels: [question]
+body:
+ - type: checkboxes
+ attributes:
+ label: DO NOT REMOVE OR SKIP THE ISSUE TEMPLATE
+ description: Fill all fields even if you think it is irrelevant for the issue
+ options:
+ - label: I understand that I will be **blocked** if I *intentionally* remove or skip any mandatory\* field
+ required: true
+ - type: markdown
+ attributes:
+ value: |
+ ### Make sure you are **only** asking a question and not reporting a bug or requesting a feature.
+ If your question contains "isn't working" or "can you add", this is most likely the wrong template.
+ If you are in doubt whether this is the right template, **USE ANOTHER TEMPLATE**!
+ - type: checkboxes
+ id: checklist
+ attributes:
+ label: Checklist
+ description: |
+ Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
+ options:
+ - label: I'm asking a question and **not** reporting a bug or requesting a feature
+ required: true
+ - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme)
+ required: true
+ - label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels))
+ required: true
+ - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar questions **including closed ones**. DO NOT post duplicates
+ required: true
+ - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue)
+ required: true
+ - type: textarea
+ id: question
+ attributes:
+ label: Please make sure the question is worded well enough to be understood
+ description: See [is-the-description-of-the-issue-itself-sufficient](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-description-of-the-issue-itself-sufficient)
+ placeholder: Provide any additional information and as much context and examples as possible
+ validations:
+ required: true
+ - type: checkboxes
+ id: verbose
+ attributes:
+ label: Provide verbose output that clearly demonstrates the problem
+ options:
+ - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU <your command line>`)
+ - label: "If using API, add `'verbose': True` to `YoutubeDL` params instead"
+ required: false
+ - label: Copy the WHOLE output (starting with `[debug] Command-line config`) and insert it below
+ - type: textarea
+ id: log
+ attributes:
+ label: Complete Verbose Output
+ description: |
+ It should start like this:
+ placeholder: |
+ [debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc']
+ [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
+ [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe)
+ [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
+ [debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1
+ [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
+ [debug] Proxy map: {}
+ [debug] Request Handlers: urllib, requests
+ [debug] Loaded 1893 extractors
+ [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest
+ yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds)
+ [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc
+ <more lines>
+ render: shell
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
new file mode 100644
index 0000000..9cdffa4
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -0,0 +1,8 @@
+blank_issues_enabled: false
+contact_links:
+ - name: Get help from the community on Discord
+ url: https://discord.gg/H5MNcFW63r
+ about: Join the yt-dlp Discord for community-powered support!
+ - name: Matrix Bridge to the Discord server
+ url: https://matrix.to/#/#yt-dlp:matrix.org
+ about: For those who do not want to use Discord
diff --git a/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml b/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml
new file mode 100644
index 0000000..bff28ae
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml
@@ -0,0 +1,40 @@
+name: Broken site support
+description: Report issue with yt-dlp on a supported site
+labels: [triage, site-bug]
+body:
+ %(no_skip)s
+ - type: checkboxes
+ id: checklist
+ attributes:
+ label: Checklist
+ description: |
+ Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
+ options:
+ - label: I'm reporting that yt-dlp is broken on a **supported** site
+ required: true
+ - label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels))
+ required: true
+ - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
+ required: true
+ - label: I've checked that all URLs and arguments with special characters are [properly quoted or escaped](https://github.com/yt-dlp/yt-dlp/wiki/FAQ#video-url-contains-an-ampersand--and-im-getting-some-strange-output-1-2839-or-v-is-not-recognized-as-an-internal-or-external-command)
+ required: true
+ - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates
+ required: true
+ - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue)
+ required: true
+ - label: I've read about [sharing account credentials](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#are-you-willing-to-share-account-details-if-needed) and I'm willing to share it if required
+ - type: input
+ id: region
+ attributes:
+ label: Region
+ description: Enter the country/region that the site is accessible from
+ placeholder: India
+ - type: textarea
+ id: description
+ attributes:
+ label: Provide a description that is worded well enough to be understood
+ description: See [is-the-description-of-the-issue-itself-sufficient](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-description-of-the-issue-itself-sufficient)
+ placeholder: Provide any additional information, any suggested solutions, and as much context and examples as possible
+ validations:
+ required: true
+ %(verbose)s
diff --git a/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml b/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml
new file mode 100644
index 0000000..2bffe73
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml
@@ -0,0 +1,52 @@
+name: Site support request
+description: Request support for a new site
+labels: [triage, site-request]
+body:
+ %(no_skip)s
+ - type: checkboxes
+ id: checklist
+ attributes:
+ label: Checklist
+ description: |
+ Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
+ options:
+ - label: I'm reporting a new site support request
+ required: true
+ - label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels))
+ required: true
+ - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
+ required: true
+ - label: I've checked that none of provided URLs [violate any copyrights](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-website-primarily-used-for-piracy) or contain any [DRM](https://en.wikipedia.org/wiki/Digital_rights_management) to the best of my knowledge
+ required: true
+ - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates
+ required: true
+ - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue)
+ required: true
+ - label: I've read about [sharing account credentials](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#are-you-willing-to-share-account-details-if-needed) and am willing to share it if required
+ - type: input
+ id: region
+ attributes:
+ label: Region
+ description: Enter the country/region that the site is accessible from
+ placeholder: India
+ - type: textarea
+ id: example-urls
+ attributes:
+ label: Example URLs
+ description: |
+ Provide all kinds of example URLs for which support should be added
+ placeholder: |
+ - Single video: https://www.youtube.com/watch?v=BaW_jenozKc
+ - Single video: https://youtu.be/BaW_jenozKc
+ - Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc
+ validations:
+ required: true
+ - type: textarea
+ id: description
+ attributes:
+ label: Provide a description that is worded well enough to be understood
+ description: See [is-the-description-of-the-issue-itself-sufficient](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-description-of-the-issue-itself-sufficient)
+ placeholder: Provide any additional information, any suggested solutions, and as much context and examples as possible
+ validations:
+ required: true
+ %(verbose)s
diff --git a/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml b/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml
new file mode 100644
index 0000000..6c31279
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml
@@ -0,0 +1,48 @@
+name: Site feature request
+description: Request a new functionality for a supported site
+labels: [triage, site-enhancement]
+body:
+ %(no_skip)s
+ - type: checkboxes
+ id: checklist
+ attributes:
+ label: Checklist
+ description: |
+ Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
+ options:
+ - label: I'm requesting a site-specific feature
+ required: true
+ - label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels))
+ required: true
+ - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
+ required: true
+ - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates
+ required: true
+ - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue)
+ required: true
+ - label: I've read about [sharing account credentials](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#are-you-willing-to-share-account-details-if-needed) and I'm willing to share it if required
+ - type: input
+ id: region
+ attributes:
+ label: Region
+ description: Enter the country/region that the site is accessible from
+ placeholder: India
+ - type: textarea
+ id: example-urls
+ attributes:
+ label: Example URLs
+ description: |
+ Example URLs that can be used to demonstrate the requested feature
+ placeholder: |
+ https://www.youtube.com/watch?v=BaW_jenozKc
+ validations:
+ required: true
+ - type: textarea
+ id: description
+ attributes:
+ label: Provide a description that is worded well enough to be understood
+ description: See [is-the-description-of-the-issue-itself-sufficient](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-description-of-the-issue-itself-sufficient)
+ placeholder: Provide any additional information, any suggested solutions, and as much context and examples as possible
+ validations:
+ required: true
+ %(verbose)s
diff --git a/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml b/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml
new file mode 100644
index 0000000..5f357d9
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml
@@ -0,0 +1,33 @@
+name: Core bug report
+description: Report a bug unrelated to any particular site or extractor
+labels: [triage, bug]
+body:
+ %(no_skip)s
+ - type: checkboxes
+ id: checklist
+ attributes:
+ label: Checklist
+ description: |
+ Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
+ options:
+ - label: I'm reporting a bug unrelated to a specific site
+ required: true
+ - label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels))
+ required: true
+ - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
+ required: true
+ - label: I've checked that all URLs and arguments with special characters are [properly quoted or escaped](https://github.com/yt-dlp/yt-dlp/wiki/FAQ#video-url-contains-an-ampersand--and-im-getting-some-strange-output-1-2839-or-v-is-not-recognized-as-an-internal-or-external-command)
+ required: true
+ - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates
+ required: true
+ - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue)
+ required: true
+ - type: textarea
+ id: description
+ attributes:
+ label: Provide a description that is worded well enough to be understood
+ description: See [is-the-description-of-the-issue-itself-sufficient](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-description-of-the-issue-itself-sufficient)
+ placeholder: Provide any additional information, any suggested solutions, and as much context and examples as possible
+ validations:
+ required: true
+ %(verbose)s
diff --git a/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.yml b/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.yml
new file mode 100644
index 0000000..99107ff
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.yml
@@ -0,0 +1,31 @@
+name: Feature request
+description: Request a new functionality unrelated to any particular site or extractor
+labels: [triage, enhancement]
+body:
+ %(no_skip)s
+ - type: checkboxes
+ id: checklist
+ attributes:
+ label: Checklist
+ description: |
+ Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
+ options:
+ - label: I'm requesting a feature unrelated to a specific site
+ required: true
+ - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme)
+ required: true
+ - label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels))
+ required: true
+ - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates
+ required: true
+ - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue)
+ required: true
+ - type: textarea
+ id: description
+ attributes:
+ label: Provide a description that is worded well enough to be understood
+ description: See [is-the-description-of-the-issue-itself-sufficient](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-description-of-the-issue-itself-sufficient)
+ placeholder: Provide any additional information, any suggested solutions, and as much context and examples as possible
+ validations:
+ required: true
+ %(verbose_optional)s
diff --git a/.github/ISSUE_TEMPLATE_tmpl/6_question.yml b/.github/ISSUE_TEMPLATE_tmpl/6_question.yml
new file mode 100644
index 0000000..bd74210
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE_tmpl/6_question.yml
@@ -0,0 +1,37 @@
+name: Ask question
+description: Ask yt-dlp related question
+labels: [question]
+body:
+ %(no_skip)s
+ - type: markdown
+ attributes:
+ value: |
+ ### Make sure you are **only** asking a question and not reporting a bug or requesting a feature.
+ If your question contains "isn't working" or "can you add", this is most likely the wrong template.
+ If you are in doubt whether this is the right template, **USE ANOTHER TEMPLATE**!
+ - type: checkboxes
+ id: checklist
+ attributes:
+ label: Checklist
+ description: |
+ Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
+ options:
+ - label: I'm asking a question and **not** reporting a bug or requesting a feature
+ required: true
+ - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme)
+ required: true
+ - label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels))
+ required: true
+ - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar questions **including closed ones**. DO NOT post duplicates
+ required: true
+ - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue)
+ required: true
+ - type: textarea
+ id: question
+ attributes:
+ label: Please make sure the question is worded well enough to be understood
+ description: See [is-the-description-of-the-issue-itself-sufficient](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-description-of-the-issue-itself-sufficient)
+ placeholder: Provide any additional information and as much context and examples as possible
+ validations:
+ required: true
+ %(verbose_optional)s
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
new file mode 100644
index 0000000..c4d3e81
--- /dev/null
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -0,0 +1,43 @@
+**IMPORTANT**: PRs without the template will be CLOSED
+
+### Description of your *pull request* and other information
+
+<!--
+
+Explanation of your *pull request* in arbitrary form goes here. Please **make sure the description explains the purpose and effect** of your *pull request* and is worded well enough to be understood. Provide as much **context and examples** as possible
+
+-->
+
+ADD DESCRIPTION HERE
+
+Fixes #
+
+
+<details open><summary>Template</summary> <!-- OPEN is intentional -->
+
+<!--
+
+# PLEASE FOLLOW THE GUIDE BELOW
+
+- You will be asked some questions, please read them **carefully** and answer honestly
+- Put an `x` into all the boxes `[ ]` relevant to your *pull request* (like [x])
+- Use *Preview* tab to see how your *pull request* will actually look like
+
+-->
+
+### Before submitting a *pull request* make sure you have:
+- [ ] At least skimmed through [contributing guidelines](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#developer-instructions) including [yt-dlp coding conventions](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#yt-dlp-coding-conventions)
+- [ ] [Searched](https://github.com/yt-dlp/yt-dlp/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests
+- [ ] Checked the code with [flake8](https://pypi.python.org/pypi/flake8) and [ran relevant tests](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#developer-instructions)
+
+### In order to be accepted and merged into yt-dlp each piece of code must be in public domain or released under [Unlicense](http://unlicense.org/). Check all of the following options that apply:
+- [ ] I am the original author of this code and I am willing to release it under [Unlicense](http://unlicense.org/)
+- [ ] I am not the original author of this code but it is in public domain or released under [Unlicense](http://unlicense.org/) (provide reliable evidence)
+
+### What is the purpose of your *pull request*?
+- [ ] Fix or improvement to an extractor (Make sure to add/update tests)
+- [ ] New extractor ([Piracy websites will not be accepted](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-website-primarily-used-for-piracy))
+- [ ] Core bug fix/improvement
+- [ ] New feature (It is strongly [recommended to open an issue first](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#adding-new-feature-or-making-overarching-changes))
+
+</details>
diff --git a/.github/banner.svg b/.github/banner.svg
new file mode 100644
index 0000000..35dc93e
--- /dev/null
+++ b/.github/banner.svg
@@ -0,0 +1,31 @@
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" preserveAspectRatio="xMidYMid" width="699.935" height="173.764" viewBox="0 0 717 178">
+ <defs>
+ <style>
+ .cls-1, .cls-4 {
+ fill: red;
+ }
+
+ .cls-1, .cls-2, .cls-3, .cls-4 {
+ fill-rule: evenodd;
+ }
+
+ .cls-2 {
+ fill: #666;
+ }
+
+ .cls-3 {
+ fill: #fff;
+ }
+
+ .cls-3, .cls-4 {
+ stroke: #282828;
+ stroke-linejoin: round;
+ stroke-width: 1px;
+ }
+ </style>
+ </defs>
+ <path d="M89.846,166.601 L87.111,166.601 L87.111,172.000 L82.173,172.000 L82.173,153.812 L90.024,153.812 C94.064,153.812 96.773,156.370 96.773,160.242 C96.773,164.158 93.993,166.601 89.846,166.601 ZM88.851,157.755 L87.111,157.755 L87.111,162.764 L88.851,162.764 C90.583,162.764 91.622,161.796 91.622,160.242 C91.622,158.679 90.583,157.755 88.851,157.755 ZM67.898,153.812 L72.835,153.812 L72.835,168.021 L80.189,168.021 L80.189,172.000 L67.898,172.000 L67.898,153.812 ZM56.572,172.000 L49.574,172.000 L49.574,153.812 L56.501,153.812 C62.113,153.812 65.630,157.223 65.630,162.906 C65.630,168.590 62.113,172.000 56.572,172.000 ZM56.252,158.004 L54.511,158.004 L54.511,167.808 L56.394,167.808 C59.094,167.808 60.657,166.707 60.657,162.906 C60.657,159.105 59.094,158.004 56.252,158.004 ZM38.211,162.906 L46.736,162.906 L46.736,166.601 L38.211,166.601 L38.211,162.906 ZM31.253,172.000 L26.387,172.000 L26.387,157.791 L20.916,157.791 L20.916,153.812 L36.724,153.812 L36.724,157.791 L31.253,157.791 L31.253,172.000 ZM12.007,172.000 L7.104,172.000 L7.104,166.281 L0.426,153.812 L5.932,153.812 L9.484,161.201 L9.627,161.201 L13.179,153.812 L18.685,153.812 L12.007,166.281 L12.007,172.000 Z" class="cls-1"/>
+ <path d="M714.317,161.947 C714.104,160.988 713.536,159.993 711.689,159.993 C710.019,159.993 708.634,160.846 708.456,162.018 C708.278,163.048 708.918,163.617 710.445,164.007 L712.399,164.505 C714.743,165.109 715.738,166.281 715.418,168.199 C715.028,170.544 712.577,172.284 709.415,172.284 C706.609,172.284 704.904,171.041 704.797,168.732 L706.893,168.235 C707.000,169.691 707.959,170.437 709.664,170.437 C711.617,170.437 713.038,169.478 713.216,168.306 C713.394,167.347 712.861,166.707 711.511,166.387 L709.344,165.855 C706.928,165.251 706.005,164.007 706.325,162.125 C706.715,159.816 709.131,158.182 712.008,158.182 C714.708,158.182 715.951,159.461 716.306,161.414 L714.317,161.947 ZM702.671,165.890 L692.751,165.890 C692.245,169.229 693.648,170.401 696.276,170.401 C697.955,170.401 699.269,169.691 700.042,168.270 L701.960,168.838 C700.974,170.899 698.736,172.284 695.957,172.284 C692.023,172.284 690.069,169.478 690.770,165.286 C691.454,161.095 694.403,158.182 698.088,158.182 C700.939,158.182 703.674,159.922 702.813,165.002 L702.671,165.890 ZM697.768,160.064 C695.477,160.064 693.461,162.143 693.044,164.078 L700.823,164.078 C701.223,161.770 700.051,160.064 697.768,160.064 ZM687.862,172.000 L685.446,172.000 L683.066,166.707 L678.910,172.000 L676.494,172.000 L681.965,165.180 L678.768,158.359 L681.183,158.359 L683.528,163.936 L687.720,158.359 L690.135,158.359 L684.594,165.180 L687.862,172.000 ZM673.886,154.630 C673.886,153.848 674.560,153.209 675.377,153.209 C676.194,153.209 676.869,153.848 676.869,154.630 C676.869,155.411 676.194,156.050 675.377,156.050 C674.560,156.050 673.886,155.411 673.886,154.630 ZM673.513,172.000 L671.417,172.000 L673.690,158.359 L675.786,158.359 L673.513,172.000 ZM670.212,154.914 C668.826,154.914 668.151,155.624 667.903,156.974 L667.672,158.359 L670.745,158.359 L670.460,160.135 L667.379,160.135 L665.416,172.000 L663.320,172.000 L665.301,160.135 L663.107,160.135 L663.391,158.359 L665.603,158.359 L665.914,156.477 C666.269,154.132 668.365,152.960 670.318,152.960 C671.348,152.960 671.952,153.173 672.237,153.315 L671.348,155.127 C671.135,155.056 670.816,154.914 670.212,154.914 ZM649.225,172.000 L649.580,169.904 L649.332,169.904 C648.745,170.650 647.582,172.284 644.962,172.284 C641.543,172.284 639.616,169.549 640.327,165.215 C641.046,160.917 643.879,158.182 647.324,158.182 C649.989,158.182 650.539,159.816 650.877,160.526 L651.054,160.526 L652.173,153.812 L654.269,153.812 L651.250,172.000 L649.225,172.000 ZM647.182,160.064 C644.527,160.064 642.911,162.302 642.440,165.180 C641.952,168.093 642.849,170.401 645.477,170.401 C647.999,170.401 649.811,168.270 650.326,165.180 C650.832,162.125 649.749,160.064 647.182,160.064 ZM635.980,172.000 L633.884,172.000 L635.305,163.475 C635.660,161.343 634.701,160.064 632.747,160.064 C630.723,160.064 629.053,161.414 628.627,163.794 L627.277,172.000 L625.181,172.000 L627.454,158.359 L629.479,158.359 L629.124,160.491 L629.302,160.491 C630.154,159.105 631.611,158.182 633.671,158.182 C636.406,158.182 638.005,159.851 637.436,163.333 L635.980,172.000 ZM621.349,172.000 L619.253,172.000 L619.573,170.153 L619.466,170.153 C618.898,171.041 617.442,172.320 615.062,172.320 C612.468,172.320 610.657,170.792 611.083,168.128 C611.616,165.002 614.458,164.434 617.051,164.114 C619.573,163.794 620.603,163.865 620.781,162.871 L620.781,162.800 C621.065,161.059 620.354,160.029 618.436,160.029 C616.447,160.029 615.097,161.095 614.458,162.089 L612.611,161.379 C614.067,158.892 616.554,158.182 618.614,158.182 C620.354,158.182 623.551,158.679 622.841,163.013 L621.349,172.000 ZM616.660,165.926 C614.991,166.139 613.428,166.636 613.179,168.235 C612.930,169.691 613.996,170.437 615.665,170.437 C618.152,170.437 619.786,168.767 620.070,167.062 L620.390,165.144 C619.964,165.570 617.548,165.819 616.660,165.926 ZM597.804,159.993 C596.135,159.993 594.749,160.846 594.572,162.018 C594.394,163.048 595.033,163.617 596.561,164.007 L598.515,164.505 C600.859,165.109 601.854,166.281 601.534,168.199 C601.143,170.544 598.692,172.284 595.531,172.284 C592.724,172.284 591.019,171.041 590.913,168.732 L593.009,168.235 C593.115,169.691 594.074,170.437 595.779,170.437 C597.733,170.437 599.154,169.478 599.332,168.306 C599.509,167.347 598.976,166.707 597.627,166.387 L595.460,165.855 C593.044,165.251 592.121,164.007 592.440,162.125 C592.831,159.816 595.247,158.182 598.124,158.182 C600.824,158.182 602.067,159.461 602.422,161.414 L600.433,161.947 C600.220,160.988 599.651,159.993 597.804,159.993 ZM588.786,165.890 L578.866,165.890 C578.360,169.229 579.763,170.401 582.392,170.401 C584.071,170.401 585.385,169.691 586.157,168.270 L588.076,168.838 C587.090,170.899 584.852,172.284 582.072,172.284 C578.138,172.284 576.185,169.478 576.886,165.286 C577.570,161.095 580.518,158.182 584.204,158.182 C587.054,158.182 589.790,159.922 588.928,165.002 L588.786,165.890 ZM583.884,160.064 C581.593,160.064 579.577,162.143 579.160,164.078 L586.939,164.078 C587.339,161.770 586.166,160.064 583.884,160.064 ZM574.722,160.171 C572.733,160.171 571.046,161.530 570.744,163.368 L569.323,172.000 L567.227,172.000 L569.500,158.359 L571.525,158.359 L571.170,160.420 L571.312,160.420 C572.023,159.070 573.586,158.146 575.255,158.146 C576.001,158.146 576.534,158.324 576.889,158.644 L575.894,160.384 C575.646,160.242 575.255,160.171 574.722,160.171 ZM561.299,172.000 L561.690,169.691 L561.548,169.691 C560.695,171.076 559.132,172.178 557.072,172.178 C554.515,172.178 552.952,170.508 553.520,167.027 L554.976,158.359 L557.072,158.359 L555.651,166.885 C555.332,168.874 556.362,170.153 558.102,170.153 C559.665,170.153 561.797,168.981 562.223,166.423 L563.573,158.359 L565.669,158.359 L563.395,172.000 L561.299,172.000 ZM551.534,160.135 L548.594,160.135 L547.271,168.093 C546.987,169.869 547.839,170.153 548.763,170.153 C549.225,170.153 549.509,170.082 549.686,170.046 L549.829,171.929 C549.509,172.036 548.976,172.178 548.195,172.178 C546.418,172.178 544.713,171.041 545.104,168.661 L546.507,160.135 L544.465,160.135 L544.749,158.359 L546.800,158.359 L547.342,155.091 L549.438,155.091 L548.896,158.359 L551.818,158.359 L551.534,160.135 ZM539.780,172.000 L537.684,172.000 L538.004,170.153 L537.897,170.153 C537.329,171.041 535.873,172.320 533.493,172.320 C530.900,172.320 529.088,170.792 529.514,168.128 C530.047,165.002 532.889,164.434 535.482,164.114 C538.004,163.794 539.034,163.865 539.212,162.871 L539.212,162.800 C539.496,161.059 538.786,160.029 536.867,160.029 C534.878,160.029 533.528,161.095 532.889,162.089 L531.042,161.379 C532.498,158.892 534.985,158.182 537.045,158.182 C538.786,158.182 541.983,158.679 541.272,163.013 L539.780,172.000 ZM535.091,165.926 C533.422,166.139 531.859,166.636 531.610,168.235 C531.361,169.691 532.427,170.437 534.097,170.437 C536.583,170.437 538.217,168.767 538.501,167.062 L538.821,165.144 C538.395,165.570 535.979,165.819 535.091,165.926 ZM527.316,165.890 L517.397,165.890 C516.891,169.229 518.294,170.401 520.922,170.401 C522.601,170.401 523.915,169.691 524.688,168.270 L526.606,168.838 C525.620,170.899 523.382,172.284 520.603,172.284 C516.669,172.284 514.715,169.478 515.416,165.286 C516.100,161.095 519.049,158.182 522.734,158.182 C525.585,158.182 528.320,159.922 527.459,165.002 L527.316,165.890 ZM522.414,160.064 C520.123,160.064 518.107,162.143 517.690,164.078 L525.469,164.078 C525.869,161.770 524.697,160.064 522.414,160.064 ZM514.282,154.914 C512.897,154.914 512.222,155.624 511.973,156.974 L511.742,158.359 L514.815,158.359 L514.531,160.135 L511.449,160.135 L509.487,172.000 L507.391,172.000 L509.371,160.135 L507.178,160.135 L507.462,158.359 L509.673,158.359 L509.984,156.477 C510.339,154.132 512.435,152.960 514.389,152.960 C515.419,152.960 516.023,153.173 516.307,153.315 L515.419,155.127 C515.206,155.056 514.886,154.914 514.282,154.914 ZM493.506,172.000 L496.525,153.812 L498.621,153.812 L495.601,172.000 L493.506,172.000 ZM489.674,172.000 L487.578,172.000 L487.898,170.153 L487.791,170.153 C487.223,171.041 485.766,172.320 483.386,172.320 C480.793,172.320 478.981,170.792 479.408,168.128 C479.941,165.002 482.782,164.434 485.375,164.114 C487.898,163.794 488.928,163.865 489.105,162.871 L489.105,162.800 C489.390,161.059 488.679,160.029 486.761,160.029 C484.772,160.029 483.422,161.095 482.782,162.089 L480.935,161.379 C482.392,158.892 484.878,158.182 486.938,158.182 C488.679,158.182 491.876,158.679 491.166,163.013 L489.674,172.000 ZM484.985,165.926 C483.315,166.139 481.752,166.636 481.504,168.235 C481.255,169.691 482.321,170.437 483.990,170.437 C486.477,170.437 488.111,168.767 488.395,167.062 L488.715,165.144 C488.288,165.570 485.873,165.819 484.985,165.926 ZM475.576,172.000 L473.480,172.000 L474.901,163.475 C475.256,161.343 474.297,160.064 472.343,160.064 C470.319,160.064 468.649,161.414 468.223,163.794 L466.873,172.000 L464.777,172.000 L467.051,158.359 L469.075,158.359 L468.720,160.491 L468.898,160.491 C469.750,159.105 471.207,158.182 473.267,158.182 C476.002,158.182 477.601,159.851 477.032,163.333 L475.576,172.000 ZM455.511,172.284 C451.745,172.284 449.703,169.407 450.395,165.109 C451.070,160.917 453.948,158.182 457.571,158.182 C461.336,158.182 463.388,161.059 462.686,165.393 C462.011,169.549 459.134,172.284 455.511,172.284 ZM457.535,160.064 C454.658,160.064 452.873,162.587 452.420,165.393 C451.994,168.057 452.811,170.401 455.546,170.401 C458.423,170.401 460.208,167.924 460.661,165.109 C461.088,162.444 460.271,160.064 457.535,160.064 ZM446.401,154.630 C446.401,153.848 447.076,153.209 447.893,153.209 C448.710,153.209 449.385,153.848 449.385,154.630 C449.385,155.411 448.710,156.050 447.893,156.050 C447.076,156.050 446.401,155.411 446.401,154.630 ZM446.028,172.000 L443.932,172.000 L446.206,158.359 L448.301,158.359 L446.028,172.000 ZM442.763,160.135 L439.823,160.135 L438.500,168.093 C438.216,169.869 439.069,170.153 439.992,170.153 C440.454,170.153 440.738,170.082 440.916,170.046 L441.058,171.929 C440.738,172.036 440.205,172.178 439.424,172.178 C437.648,172.178 435.943,171.041 436.333,168.661 L437.736,160.135 L435.694,160.135 L435.978,158.359 L438.030,158.359 L438.571,155.091 L440.667,155.091 L440.125,158.359 L443.047,158.359 L442.763,160.135 ZM431.380,154.630 C431.380,153.848 432.055,153.209 432.872,153.209 C433.689,153.209 434.364,153.848 434.364,154.630 C434.364,155.411 433.689,156.050 432.872,156.050 C432.055,156.050 431.380,155.411 431.380,154.630 ZM431.007,172.000 L428.911,172.000 L431.184,158.359 L433.280,158.359 L431.007,172.000 ZM422.770,172.000 L423.126,169.904 L422.877,169.904 C422.291,170.650 421.128,172.284 418.508,172.284 C415.089,172.284 413.162,169.549 413.872,165.215 C414.591,160.917 417.424,158.182 420.870,158.182 C423.534,158.182 424.085,159.816 424.422,160.526 L424.600,160.526 L425.719,153.812 L427.815,153.812 L424.795,172.000 L422.770,172.000 ZM420.728,160.064 C418.073,160.064 416.456,162.302 415.986,165.180 C415.497,168.093 416.394,170.401 419.023,170.401 C421.545,170.401 423.357,168.270 423.872,165.180 C424.378,162.125 423.294,160.064 420.728,160.064 ZM407.216,172.000 L407.572,169.904 L407.323,169.904 C406.737,170.650 405.573,172.284 402.954,172.284 C399.535,172.284 397.608,169.549 398.318,165.215 C399.037,160.917 401.870,158.182 405.316,158.182 C407.980,158.182 408.531,159.816 408.868,160.526 L409.046,160.526 L410.165,153.812 L412.261,153.812 L409.241,172.000 L407.216,172.000 ZM405.174,160.064 C402.519,160.064 400.902,162.302 400.432,165.180 C399.943,168.093 400.840,170.401 403.469,170.401 C405.991,170.401 407.803,168.270 408.318,165.180 C408.824,162.125 407.740,160.064 405.174,160.064 ZM393.971,172.000 L391.875,172.000 L392.195,170.153 L392.088,170.153 C391.520,171.041 390.063,172.320 387.683,172.320 C385.090,172.320 383.279,170.792 383.705,168.128 C384.238,165.002 387.080,164.434 389.673,164.114 C392.195,163.794 393.225,163.865 393.403,162.871 L393.403,162.800 C393.687,161.059 392.976,160.029 391.058,160.029 C389.069,160.029 387.719,161.095 387.080,162.089 L385.232,161.379 C386.689,158.892 389.175,158.182 391.236,158.182 C392.976,158.182 396.173,158.679 395.463,163.013 L393.971,172.000 ZM389.282,165.926 C387.612,166.139 386.049,166.636 385.801,168.235 C385.552,169.691 386.618,170.437 388.287,170.437 C390.774,170.437 392.408,168.767 392.692,167.062 L393.012,165.144 C392.586,165.570 390.170,165.819 389.282,165.926 ZM372.842,172.000 L370.746,172.000 L372.167,163.475 C372.522,161.308 371.528,160.064 369.574,160.064 C367.513,160.064 365.773,161.414 365.347,163.794 L363.997,172.000 L361.901,172.000 L364.920,153.812 L367.016,153.812 L365.915,160.491 L366.093,160.491 C366.945,159.070 368.330,158.182 370.497,158.182 C373.268,158.182 374.867,159.816 374.298,163.333 L372.842,172.000 ZM360.448,160.135 L357.508,160.135 L356.185,168.093 C355.901,169.869 356.753,170.153 357.677,170.153 C358.139,170.153 358.423,170.082 358.601,170.046 L358.743,171.929 C358.423,172.036 357.890,172.178 357.109,172.178 C355.333,172.178 353.627,171.041 354.018,168.661 L355.421,160.135 L353.379,160.135 L353.663,158.359 L355.714,158.359 L356.256,155.091 L358.352,155.091 L357.810,158.359 L360.732,158.359 L360.448,160.135 ZM349.065,154.630 C349.065,153.848 349.740,153.209 350.557,153.209 C351.374,153.209 352.049,153.848 352.049,154.630 C352.049,155.411 351.374,156.050 350.557,156.050 C349.740,156.050 349.065,155.411 349.065,154.630 ZM348.692,172.000 L346.596,172.000 L348.869,158.359 L350.965,158.359 L348.692,172.000 ZM337.615,172.000 L336.372,161.521 L336.159,161.521 L331.434,172.000 L329.374,172.000 L327.491,158.359 L329.694,158.359 L330.901,168.803 L331.043,168.803 L335.697,158.359 L337.935,158.359 L339.072,168.767 L339.214,168.767 L343.903,158.359 L346.105,158.359 L339.675,172.000 L337.615,172.000 ZM316.983,172.000 L314.319,172.000 L310.296,165.526 L308.600,166.885 L307.747,172.000 L305.651,172.000 L308.671,153.812 L310.767,153.812 L308.999,164.434 L309.239,164.434 L316.237,158.359 L318.830,158.359 L312.090,164.203 L316.983,172.000 ZM303.559,160.171 C301.569,160.171 299.882,161.530 299.580,163.368 L298.159,172.000 L296.063,172.000 L298.337,158.359 L300.362,158.359 L300.006,160.420 L300.149,160.420 C300.859,159.070 302.422,158.146 304.091,158.146 C304.837,158.146 305.370,158.324 305.726,158.644 L304.731,160.384 C304.482,160.242 304.091,160.171 303.559,160.171 ZM286.797,172.284 C283.031,172.284 280.989,169.407 281.682,165.109 C282.356,160.917 285.234,158.182 288.857,158.182 C292.622,158.182 294.674,161.059 293.972,165.393 C293.297,169.549 290.420,172.284 286.797,172.284 ZM288.822,160.064 C285.944,160.064 284.159,162.587 283.706,165.393 C283.280,168.057 284.097,170.401 286.832,170.401 C289.710,170.401 291.495,167.924 291.948,165.109 C292.374,162.444 291.557,160.064 288.822,160.064 ZM280.512,154.914 C279.126,154.914 278.452,155.624 278.203,156.974 L277.972,158.359 L281.045,158.359 L280.760,160.135 L277.679,160.135 L275.716,172.000 L273.620,172.000 L275.601,160.135 L273.407,160.135 L273.691,158.359 L275.903,158.359 L276.214,156.477 C276.569,154.132 278.665,152.960 280.618,152.960 C281.649,152.960 282.252,153.173 282.537,153.315 L281.649,155.127 C281.435,155.056 281.116,154.914 280.512,154.914 ZM259.735,172.000 L262.755,153.812 L264.851,153.812 L261.831,172.000 L259.735,172.000 ZM253.595,172.000 L253.950,169.904 L253.701,169.904 C253.115,170.650 251.952,172.284 249.332,172.284 C245.913,172.284 243.986,169.549 244.696,165.215 C245.416,160.917 248.249,158.182 251.694,158.182 C254.358,158.182 254.909,159.816 255.246,160.526 L255.424,160.526 L256.543,153.812 L258.639,153.812 L255.619,172.000 L253.595,172.000 ZM251.552,160.064 C248.897,160.064 247.281,162.302 246.810,165.180 C246.321,168.093 247.218,170.401 249.847,170.401 C252.369,170.401 254.181,168.270 254.696,165.180 C255.202,162.125 254.119,160.064 251.552,160.064 ZM233.670,165.180 L233.990,163.226 L241.947,163.226 L241.627,165.180 L233.670,165.180 ZM230.478,165.890 L220.558,165.890 C220.052,169.229 221.455,170.401 224.084,170.401 C225.762,170.401 227.077,169.691 227.849,168.270 L229.768,168.838 C228.782,170.899 226.544,172.284 223.764,172.284 C219.830,172.284 217.876,169.478 218.578,165.286 C219.262,161.095 222.210,158.182 225.896,158.182 C228.746,158.182 231.482,159.922 230.620,165.002 L230.478,165.890 ZM225.576,160.064 C223.285,160.064 221.269,162.143 220.851,164.078 L228.631,164.078 C229.030,161.770 227.858,160.064 225.576,160.064 ZM209.063,172.284 C206.434,172.284 205.830,170.650 205.475,169.904 L205.226,169.904 L204.871,172.000 L202.846,172.000 L205.866,153.812 L207.962,153.812 L206.860,160.526 L207.038,160.526 C207.606,159.816 208.708,158.182 211.372,158.182 C214.817,158.182 216.736,160.917 216.025,165.215 C215.315,169.549 212.473,172.284 209.063,172.284 ZM210.803,160.064 C208.246,160.064 206.541,162.125 206.043,165.180 C205.546,168.270 206.576,170.401 209.098,170.401 C211.727,170.401 213.432,168.093 213.929,165.180 C214.391,162.302 213.468,160.064 210.803,160.064 ZM196.634,172.000 L197.025,169.691 L196.883,169.691 C196.031,171.076 194.468,172.178 192.407,172.178 C189.850,172.178 188.287,170.508 188.855,167.027 L190.311,158.359 L192.407,158.359 L190.986,166.885 C190.667,168.874 191.697,170.153 193.437,170.153 C195.000,170.153 197.132,168.981 197.558,166.423 L198.908,158.359 L201.004,158.359 L198.730,172.000 L196.634,172.000 ZM186.869,160.135 L183.929,160.135 L182.606,168.093 C182.322,169.869 183.174,170.153 184.098,170.153 C184.560,170.153 184.844,170.082 185.022,170.046 L185.164,171.929 C184.844,172.036 184.311,172.178 183.530,172.178 C181.754,172.178 180.048,171.041 180.439,168.661 L181.842,160.135 L179.800,160.135 L180.084,158.359 L182.135,158.359 L182.677,155.091 L184.773,155.091 L184.231,158.359 L187.153,158.359 L186.869,160.135 ZM173.020,172.000 L173.410,169.691 L173.268,169.691 C172.416,171.076 170.853,172.178 168.792,172.178 C166.235,172.178 164.672,170.508 165.240,167.027 L166.697,158.359 L168.792,158.359 L167.372,166.885 C167.052,168.874 168.082,170.153 169.823,170.153 C171.386,170.153 173.517,168.981 173.943,166.423 L175.293,158.359 L177.389,158.359 L175.115,172.000 L173.020,172.000 ZM155.157,172.284 C151.391,172.284 149.349,169.407 150.041,165.109 C150.716,160.917 153.594,158.182 157.217,158.182 C160.982,158.182 163.034,161.059 162.332,165.393 C161.657,169.549 158.780,172.284 155.157,172.284 ZM157.181,160.064 C154.304,160.064 152.519,162.587 152.066,165.393 C151.640,168.057 152.457,170.401 155.192,170.401 C158.069,170.401 159.854,167.924 160.307,165.109 C160.734,162.444 159.917,160.064 157.181,160.064 ZM136.511,177.293 C135.801,177.293 135.197,177.151 135.019,177.044 L135.836,175.197 C137.293,175.588 138.207,175.366 139.317,173.350 L140.063,172.000 L137.293,158.359 L139.495,158.359 L141.449,169.229 L141.591,169.229 L147.168,158.359 L149.512,158.359 L141.023,174.202 C139.886,176.298 138.429,177.293 136.511,177.293 ZM123.225,166.849 L115.871,166.849 L113.181,172.000 L110.872,172.000 L120.569,153.812 L122.843,153.812 L126.501,172.000 L124.192,172.000 L123.225,166.849 ZM121.315,156.690 L121.173,156.690 L116.893,164.895 L122.860,164.895 L121.315,156.690 Z" class="cls-2"/>
+ <path d="M252.245,116.350 L252.245,102.200 L309.303,102.200 L309.303,116.350 L252.245,116.350 ZM208.254,81.088 L245.342,59.291 L208.254,38.180 L216.242,25.227 L260.862,52.844 L260.862,65.739 L216.413,93.355 L208.254,81.088 Z" class="cls-3"/>
+ <path d="M508.108,52.635 C507.921,55.093 507.643,57.527 507.274,59.937 L504.214,78.017 C503.658,81.170 502.754,84.324 501.502,87.475 C500.250,90.628 498.464,93.479 496.147,96.028 C493.829,98.579 491.047,100.503 487.802,101.800 C484.556,103.097 481.311,103.747 478.067,103.747 C476.211,103.747 474.357,103.491 472.504,102.982 C470.648,102.474 469.072,101.615 467.775,100.409 C466.475,99.205 465.410,97.767 464.576,96.098 C463.741,94.429 463.092,92.714 462.629,90.952 L455.953,131.146 L436.482,131.146 L453.310,28.922 L472.921,28.922 L471.391,38.240 C472.504,36.665 473.777,35.180 475.216,33.790 C476.652,32.399 478.228,31.240 479.944,30.313 C481.659,29.387 483.467,28.737 485.369,28.365 C487.268,27.996 489.145,27.809 491.001,27.809 C493.411,27.809 495.706,28.226 497.886,29.061 C500.063,29.895 501.871,31.171 503.310,32.886 C504.746,34.602 505.835,36.549 506.578,38.727 C507.319,40.907 507.806,43.156 508.039,45.472 C508.269,47.791 508.293,50.179 508.108,52.635 ZM487.455,48.184 C486.851,46.841 485.877,45.798 484.534,45.055 C483.189,44.314 481.729,43.942 480.153,43.942 C478.762,43.942 477.393,44.151 476.050,44.568 C474.705,44.986 473.499,45.681 472.434,46.655 C471.367,47.628 470.556,48.765 470.000,50.062 C469.444,51.362 469.027,52.659 468.748,53.956 L465.828,72.037 C465.641,73.149 465.480,74.286 465.341,75.444 C465.202,76.605 465.178,77.717 465.271,78.782 C465.363,79.849 465.526,80.916 465.758,81.981 C465.988,83.048 466.384,84.022 466.940,84.902 C467.497,85.784 468.283,86.456 469.305,86.918 C470.324,87.383 471.391,87.614 472.504,87.614 C474.079,87.614 475.633,87.314 477.163,86.710 C478.693,86.108 480.036,85.204 481.196,83.998 C482.354,82.794 483.235,81.425 483.839,79.895 C484.441,78.365 484.882,76.859 485.160,75.375 L488.081,57.294 C488.359,55.719 488.474,54.143 488.428,52.565 C488.381,50.990 488.057,49.530 487.455,48.184 ZM433.422,86.501 L433.422,102.635 L384.744,102.635 L384.744,86.501 L401.433,86.501 L412.977,16.544 L400.460,16.544 L400.460,0.410 L435.230,0.410 L420.905,86.501 L433.422,86.501 ZM354.285,102.635 L355.815,93.177 C354.793,94.846 353.542,96.354 352.060,97.697 C350.575,99.042 348.976,100.179 347.261,101.105 C345.545,102.033 343.736,102.704 341.837,103.121 C339.936,103.539 338.058,103.747 336.204,103.747 C333.792,103.747 331.497,103.330 329.320,102.495 C327.140,101.661 325.332,100.388 323.896,98.671 C322.457,96.956 321.368,94.985 320.627,92.760 C319.884,90.535 319.397,88.264 319.167,85.945 C318.934,83.628 318.911,81.264 319.097,78.852 C319.282,76.442 319.606,74.032 320.071,71.620 L322.992,53.539 C323.548,50.295 324.452,47.096 325.704,43.942 C326.955,40.791 328.764,37.962 331.128,35.459 C333.492,32.955 336.274,31.056 339.473,29.756 C342.672,28.459 345.892,27.809 349.139,27.809 C351.086,27.809 352.964,28.066 354.772,28.574 C356.580,29.085 358.155,29.943 359.500,31.147 C360.843,32.353 361.934,33.790 362.769,35.459 C363.603,37.128 364.205,38.844 364.577,40.605 L371.253,0.410 L390.724,0.410 L373.895,102.635 L354.285,102.635 ZM362.004,52.705 C361.956,51.592 361.795,50.503 361.517,49.436 C361.239,48.371 360.822,47.419 360.265,46.585 C359.709,45.751 358.920,45.103 357.901,44.638 C356.880,44.175 355.815,43.942 354.702,43.942 C353.124,43.942 351.573,44.245 350.043,44.846 C348.513,45.451 347.168,46.355 346.010,47.559 C344.849,48.765 343.969,50.110 343.367,51.592 C342.763,53.076 342.370,54.606 342.185,56.182 L339.125,74.262 C338.847,75.840 338.730,77.415 338.777,78.991 C338.823,80.569 339.147,82.029 339.751,83.372 C340.353,84.717 341.326,85.760 342.672,86.501 C344.015,87.244 345.475,87.614 347.053,87.614 C348.443,87.614 349.810,87.405 351.156,86.988 C352.499,86.571 353.705,85.875 354.772,84.902 C355.836,83.928 356.671,82.794 357.275,81.494 C357.877,80.197 358.270,78.900 358.457,77.600 L361.517,59.520 C361.702,58.407 361.841,57.272 361.934,56.112 C362.026,54.954 362.049,53.817 362.004,52.705 Z" class="cls-4"/>
+</svg>
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
new file mode 100644
index 0000000..4bed5af
--- /dev/null
+++ b/.github/workflows/build.yml
@@ -0,0 +1,487 @@
+name: Build Artifacts
+on:
+ workflow_call:
+ inputs:
+ version:
+ required: true
+ type: string
+ channel:
+ required: false
+ default: stable
+ type: string
+ unix:
+ default: true
+ type: boolean
+ linux_arm:
+ default: true
+ type: boolean
+ macos:
+ default: true
+ type: boolean
+ macos_legacy:
+ default: true
+ type: boolean
+ windows:
+ default: true
+ type: boolean
+ windows32:
+ default: true
+ type: boolean
+ meta_files:
+ default: true
+ type: boolean
+ origin:
+ required: false
+ default: ''
+ type: string
+ secrets:
+ GPG_SIGNING_KEY:
+ required: false
+
+ workflow_dispatch:
+ inputs:
+ version:
+ description: |
+ VERSION: yyyy.mm.dd[.rev] or rev
+ required: true
+ type: string
+ channel:
+ description: |
+ SOURCE of this build's updates: stable/nightly/master/<repo>
+ required: true
+ default: stable
+ type: string
+ unix:
+ description: yt-dlp, yt-dlp.tar.gz, yt-dlp_linux, yt-dlp_linux.zip
+ default: true
+ type: boolean
+ linux_arm:
+ description: yt-dlp_linux_aarch64, yt-dlp_linux_armv7l
+ default: true
+ type: boolean
+ macos:
+ description: yt-dlp_macos, yt-dlp_macos.zip
+ default: true
+ type: boolean
+ macos_legacy:
+ description: yt-dlp_macos_legacy
+ default: true
+ type: boolean
+ windows:
+ description: yt-dlp.exe, yt-dlp_min.exe, yt-dlp_win.zip
+ default: true
+ type: boolean
+ windows32:
+ description: yt-dlp_x86.exe
+ default: true
+ type: boolean
+ meta_files:
+ description: SHA2-256SUMS, SHA2-512SUMS, _update_spec
+ default: true
+ type: boolean
+ origin:
+ description: Origin
+ required: false
+ default: 'current repo'
+ type: choice
+ options:
+ - 'current repo'
+
+permissions:
+ contents: read
+
+jobs:
+ process:
+ runs-on: ubuntu-latest
+ outputs:
+ origin: ${{ steps.process_origin.outputs.origin }}
+ steps:
+ - name: Process origin
+ id: process_origin
+ run: |
+ echo "origin=${{ inputs.origin == 'current repo' && github.repository || inputs.origin }}" | tee "$GITHUB_OUTPUT"
+
+ unix:
+ needs: process
+ if: inputs.unix
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ - uses: actions/setup-python@v5
+ with:
+ python-version: "3.10"
+ - uses: conda-incubator/setup-miniconda@v3
+ with:
+ miniforge-variant: Mambaforge
+ use-mamba: true
+ channels: conda-forge
+ auto-update-conda: true
+ activate-environment: ""
+ auto-activate-base: false
+ - name: Install Requirements
+ run: |
+ sudo apt -y install zip pandoc man sed
+ cat > ./requirements.txt << EOF
+ python=3.10.*
+ brotli-python
+ EOF
+ python devscripts/install_deps.py --print \
+ --exclude brotli --exclude brotlicffi \
+ --include secretstorage --include pyinstaller >> ./requirements.txt
+ mamba create -n build --file ./requirements.txt
+
+ - name: Prepare
+ run: |
+ python devscripts/update-version.py -c "${{ inputs.channel }}" -r "${{ needs.process.outputs.origin }}" "${{ inputs.version }}"
+ python devscripts/make_lazy_extractors.py
+ - name: Build Unix platform-independent binary
+ run: |
+ make all tar
+ - name: Build Unix standalone binary
+ shell: bash -l {0}
+ run: |
+ unset LD_LIBRARY_PATH # Harmful; set by setup-python
+ conda activate build
+ python -m bundle.pyinstaller --onedir
+ (cd ./dist/yt-dlp_linux && zip -r ../yt-dlp_linux.zip .)
+ python -m bundle.pyinstaller
+ mv ./dist/yt-dlp_linux ./yt-dlp_linux
+ mv ./dist/yt-dlp_linux.zip ./yt-dlp_linux.zip
+
+ - name: Verify --update-to
+ if: vars.UPDATE_TO_VERIFICATION
+ run: |
+ binaries=("yt-dlp" "yt-dlp_linux")
+ for binary in "${binaries[@]}"; do
+ chmod +x ./${binary}
+ cp ./${binary} ./${binary}_downgraded
+ version="$(./${binary} --version)"
+ ./${binary}_downgraded -v --update-to yt-dlp/yt-dlp@2023.03.04
+ downgraded_version="$(./${binary}_downgraded --version)"
+ [[ "$version" != "$downgraded_version" ]]
+ done
+
+ - name: Upload artifacts
+ uses: actions/upload-artifact@v4
+ with:
+ name: build-bin-${{ github.job }}
+ path: |
+ yt-dlp
+ yt-dlp.tar.gz
+ yt-dlp_linux
+ yt-dlp_linux.zip
+ compression-level: 0
+
+ linux_arm:
+ needs: process
+ if: inputs.linux_arm
+ permissions:
+ contents: read
+ packages: write # for creating cache
+ runs-on: ubuntu-latest
+ strategy:
+ matrix:
+ architecture:
+ - armv7
+ - aarch64
+
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ path: ./repo
+ - name: Virtualized Install, Prepare & Build
+ uses: yt-dlp/run-on-arch-action@v2
+ with:
+ # Ref: https://github.com/uraimo/run-on-arch-action/issues/55
+ env: |
+ GITHUB_WORKFLOW: build
+ githubToken: ${{ github.token }} # To cache image
+ arch: ${{ matrix.architecture }}
+ distro: ubuntu18.04 # Standalone executable should be built on minimum supported OS
+ dockerRunArgs: --volume "${PWD}/repo:/repo"
+ install: | # Installing Python 3.10 from the Deadsnakes repo raises errors
+ apt update
+ apt -y install zlib1g-dev libffi-dev python3.8 python3.8-dev python3.8-distutils python3-pip
+ python3.8 -m pip install -U pip setuptools wheel
+ # Cannot access any files from the repo directory at this stage
+ python3.8 -m pip install -U Pyinstaller mutagen pycryptodomex websockets brotli certifi secretstorage cffi
+
+ run: |
+ cd repo
+ python3.8 devscripts/install_deps.py -o --include build
+ python3.8 devscripts/install_deps.py --include pyinstaller --include secretstorage # Cached version may be out of date
+ python3.8 devscripts/update-version.py -c "${{ inputs.channel }}" -r "${{ needs.process.outputs.origin }}" "${{ inputs.version }}"
+ python3.8 devscripts/make_lazy_extractors.py
+ python3.8 -m bundle.pyinstaller
+
+ if ${{ vars.UPDATE_TO_VERIFICATION && 'true' || 'false' }}; then
+ arch="${{ (matrix.architecture == 'armv7' && 'armv7l') || matrix.architecture }}"
+ chmod +x ./dist/yt-dlp_linux_${arch}
+ cp ./dist/yt-dlp_linux_${arch} ./dist/yt-dlp_linux_${arch}_downgraded
+ version="$(./dist/yt-dlp_linux_${arch} --version)"
+ ./dist/yt-dlp_linux_${arch}_downgraded -v --update-to yt-dlp/yt-dlp@2023.03.04
+ downgraded_version="$(./dist/yt-dlp_linux_${arch}_downgraded --version)"
+ [[ "$version" != "$downgraded_version" ]]
+ fi
+
+ - name: Upload artifacts
+ uses: actions/upload-artifact@v4
+ with:
+ name: build-bin-linux_${{ matrix.architecture }}
+ path: | # run-on-arch-action designates armv7l as armv7
+ repo/dist/yt-dlp_linux_${{ (matrix.architecture == 'armv7' && 'armv7l') || matrix.architecture }}
+ compression-level: 0
+
+ macos:
+ needs: process
+ if: inputs.macos
+ runs-on: macos-11
+
+ steps:
+ - uses: actions/checkout@v4
+ # NB: Building universal2 does not work with python from actions/setup-python
+ - name: Install Requirements
+ run: |
+ brew install coreutils
+ python3 devscripts/install_deps.py --user -o --include build
+ python3 devscripts/install_deps.py --print --include pyinstaller > requirements.txt
+ # We need to ignore wheels otherwise we break universal2 builds
+ python3 -m pip install -U --user --no-binary :all: -r requirements.txt
+
+ - name: Prepare
+ run: |
+ python3 devscripts/update-version.py -c "${{ inputs.channel }}" -r "${{ needs.process.outputs.origin }}" "${{ inputs.version }}"
+ python3 devscripts/make_lazy_extractors.py
+ - name: Build
+ run: |
+ python3 -m bundle.pyinstaller --target-architecture universal2 --onedir
+ (cd ./dist/yt-dlp_macos && zip -r ../yt-dlp_macos.zip .)
+ python3 -m bundle.pyinstaller --target-architecture universal2
+
+ - name: Verify --update-to
+ if: vars.UPDATE_TO_VERIFICATION
+ run: |
+ chmod +x ./dist/yt-dlp_macos
+ cp ./dist/yt-dlp_macos ./dist/yt-dlp_macos_downgraded
+ version="$(./dist/yt-dlp_macos --version)"
+ ./dist/yt-dlp_macos_downgraded -v --update-to yt-dlp/yt-dlp@2023.03.04
+ downgraded_version="$(./dist/yt-dlp_macos_downgraded --version)"
+ [[ "$version" != "$downgraded_version" ]]
+
+ - name: Upload artifacts
+ uses: actions/upload-artifact@v4
+ with:
+ name: build-bin-${{ github.job }}
+ path: |
+ dist/yt-dlp_macos
+ dist/yt-dlp_macos.zip
+ compression-level: 0
+
+ macos_legacy:
+ needs: process
+ if: inputs.macos_legacy
+ runs-on: macos-latest
+
+ steps:
+ - uses: actions/checkout@v4
+ - name: Install Python
+ # We need the official Python, because the GA ones only support newer macOS versions
+ env:
+ PYTHON_VERSION: 3.10.5
+ MACOSX_DEPLOYMENT_TARGET: 10.9 # Used up by the Python build tools
+ run: |
+ # Hack to get the latest patch version. Uncomment if needed
+ #brew install python@3.10
+ #export PYTHON_VERSION=$( $(brew --prefix)/opt/python@3.10/bin/python3 --version | cut -d ' ' -f 2 )
+ curl https://www.python.org/ftp/python/${PYTHON_VERSION}/python-${PYTHON_VERSION}-macos11.pkg -o "python.pkg"
+ sudo installer -pkg python.pkg -target /
+ python3 --version
+ - name: Install Requirements
+ run: |
+ brew install coreutils
+ python3 devscripts/install_deps.py --user -o --include build
+ python3 devscripts/install_deps.py --user --include pyinstaller
+
+ - name: Prepare
+ run: |
+ python3 devscripts/update-version.py -c "${{ inputs.channel }}" -r "${{ needs.process.outputs.origin }}" "${{ inputs.version }}"
+ python3 devscripts/make_lazy_extractors.py
+ - name: Build
+ run: |
+ python3 -m bundle.pyinstaller
+ mv dist/yt-dlp_macos dist/yt-dlp_macos_legacy
+
+ - name: Verify --update-to
+ if: vars.UPDATE_TO_VERIFICATION
+ run: |
+ chmod +x ./dist/yt-dlp_macos_legacy
+ cp ./dist/yt-dlp_macos_legacy ./dist/yt-dlp_macos_legacy_downgraded
+ version="$(./dist/yt-dlp_macos_legacy --version)"
+ ./dist/yt-dlp_macos_legacy_downgraded -v --update-to yt-dlp/yt-dlp@2023.03.04
+ downgraded_version="$(./dist/yt-dlp_macos_legacy_downgraded --version)"
+ [[ "$version" != "$downgraded_version" ]]
+
+ - name: Upload artifacts
+ uses: actions/upload-artifact@v4
+ with:
+ name: build-bin-${{ github.job }}
+ path: |
+ dist/yt-dlp_macos_legacy
+ compression-level: 0
+
+ windows:
+ needs: process
+ if: inputs.windows
+ runs-on: windows-latest
+
+ steps:
+ - uses: actions/checkout@v4
+ - uses: actions/setup-python@v5
+ with: # 3.8 is used for Win7 support
+ python-version: "3.8"
+ - name: Install Requirements
+ run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds
+ python devscripts/install_deps.py -o --include build
+ python devscripts/install_deps.py --include py2exe
+ python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-5.8.0-py3-none-any.whl"
+
+ - name: Prepare
+ run: |
+ python devscripts/update-version.py -c "${{ inputs.channel }}" -r "${{ needs.process.outputs.origin }}" "${{ inputs.version }}"
+ python devscripts/make_lazy_extractors.py
+ - name: Build
+ run: |
+ python -m bundle.py2exe
+ Move-Item ./dist/yt-dlp.exe ./dist/yt-dlp_min.exe
+ python -m bundle.pyinstaller
+ python -m bundle.pyinstaller --onedir
+ Compress-Archive -Path ./dist/yt-dlp/* -DestinationPath ./dist/yt-dlp_win.zip
+
+ - name: Verify --update-to
+ if: vars.UPDATE_TO_VERIFICATION
+ run: |
+ foreach ($name in @("yt-dlp","yt-dlp_min")) {
+ Copy-Item "./dist/${name}.exe" "./dist/${name}_downgraded.exe"
+ $version = & "./dist/${name}.exe" --version
+ & "./dist/${name}_downgraded.exe" -v --update-to yt-dlp/yt-dlp@2023.03.04
+ $downgraded_version = & "./dist/${name}_downgraded.exe" --version
+ if ($version -eq $downgraded_version) {
+ exit 1
+ }
+ }
+
+ - name: Upload artifacts
+ uses: actions/upload-artifact@v4
+ with:
+ name: build-bin-${{ github.job }}
+ path: |
+ dist/yt-dlp.exe
+ dist/yt-dlp_min.exe
+ dist/yt-dlp_win.zip
+ compression-level: 0
+
+ windows32:
+ needs: process
+ if: inputs.windows32
+ runs-on: windows-latest
+
+ steps:
+ - uses: actions/checkout@v4
+ - uses: actions/setup-python@v5
+ with:
+ python-version: "3.8"
+ architecture: "x86"
+ - name: Install Requirements
+ run: |
+ python devscripts/install_deps.py -o --include build
+ python devscripts/install_deps.py
+ python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-5.8.0-py3-none-any.whl"
+
+ - name: Prepare
+ run: |
+ python devscripts/update-version.py -c "${{ inputs.channel }}" -r "${{ needs.process.outputs.origin }}" "${{ inputs.version }}"
+ python devscripts/make_lazy_extractors.py
+ - name: Build
+ run: |
+ python -m bundle.pyinstaller
+
+ - name: Verify --update-to
+ if: vars.UPDATE_TO_VERIFICATION
+ run: |
+ foreach ($name in @("yt-dlp_x86")) {
+ Copy-Item "./dist/${name}.exe" "./dist/${name}_downgraded.exe"
+ $version = & "./dist/${name}.exe" --version
+ & "./dist/${name}_downgraded.exe" -v --update-to yt-dlp/yt-dlp@2023.03.04
+ $downgraded_version = & "./dist/${name}_downgraded.exe" --version
+ if ($version -eq $downgraded_version) {
+ exit 1
+ }
+ }
+
+ - name: Upload artifacts
+ uses: actions/upload-artifact@v4
+ with:
+ name: build-bin-${{ github.job }}
+ path: |
+ dist/yt-dlp_x86.exe
+ compression-level: 0
+
+ meta_files:
+ if: inputs.meta_files && always() && !cancelled()
+ needs:
+ - process
+ - unix
+ - linux_arm
+ - macos
+ - macos_legacy
+ - windows
+ - windows32
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/download-artifact@v4
+ with:
+ path: artifact
+ pattern: build-bin-*
+ merge-multiple: true
+
+ - name: Make SHA2-SUMS files
+ run: |
+ cd ./artifact/
+ sha256sum * > ../SHA2-256SUMS
+ sha512sum * > ../SHA2-512SUMS
+
+ - name: Make Update spec
+ run: |
+ cat >> _update_spec << EOF
+ # This file is used for regulating self-update
+ lock 2022.08.18.36 .+ Python 3\.6
+ lock 2023.11.16 (?!win_x86_exe).+ Python 3\.7
+ lock 2023.11.16 win_x86_exe .+ Windows-(?:Vista|2008Server)
+ lockV2 yt-dlp/yt-dlp 2022.08.18.36 .+ Python 3\.6
+ lockV2 yt-dlp/yt-dlp 2023.11.16 (?!win_x86_exe).+ Python 3\.7
+ lockV2 yt-dlp/yt-dlp 2023.11.16 win_x86_exe .+ Windows-(?:Vista|2008Server)
+ lockV2 yt-dlp/yt-dlp-nightly-builds 2023.11.15.232826 (?!win_x86_exe).+ Python 3\.7
+ lockV2 yt-dlp/yt-dlp-nightly-builds 2023.11.15.232826 win_x86_exe .+ Windows-(?:Vista|2008Server)
+ lockV2 yt-dlp/yt-dlp-master-builds 2023.11.15.232812 (?!win_x86_exe).+ Python 3\.7
+ lockV2 yt-dlp/yt-dlp-master-builds 2023.11.15.232812 win_x86_exe .+ Windows-(?:Vista|2008Server)
+ EOF
+
+ - name: Sign checksum files
+ env:
+ GPG_SIGNING_KEY: ${{ secrets.GPG_SIGNING_KEY }}
+ if: env.GPG_SIGNING_KEY != ''
+ run: |
+ gpg --batch --import <<< "${{ secrets.GPG_SIGNING_KEY }}"
+ for signfile in ./SHA*SUMS; do
+ gpg --batch --detach-sign "$signfile"
+ done
+
+ - name: Upload artifacts
+ uses: actions/upload-artifact@v4
+ with:
+ name: build-${{ github.job }}
+ path: |
+ _update_spec
+ SHA*SUMS*
+ compression-level: 0
+ overwrite: true
diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
new file mode 100644
index 0000000..170a6ac
--- /dev/null
+++ b/.github/workflows/codeql.yml
@@ -0,0 +1,65 @@
+name: "CodeQL"
+
+on:
+ push:
+ branches: [ 'master', 'gh-pages', 'release' ]
+ pull_request:
+ # The branches below must be a subset of the branches above
+ branches: [ 'master' ]
+ schedule:
+ - cron: '59 11 * * 5'
+
+jobs:
+ analyze:
+ name: Analyze
+ runs-on: ubuntu-latest
+ permissions:
+ actions: read
+ contents: read
+ security-events: write
+
+ strategy:
+ fail-fast: false
+ matrix:
+ language: [ 'python' ]
+ # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
+ # Use only 'java' to analyze code written in Java, Kotlin or both
+ # Use only 'javascript' to analyze code written in JavaScript, TypeScript or both
+ # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support
+
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v4
+
+ # Initializes the CodeQL tools for scanning.
+ - name: Initialize CodeQL
+ uses: github/codeql-action/init@v2
+ with:
+ languages: ${{ matrix.language }}
+ # If you wish to specify custom queries, you can do so here or in a config file.
+ # By default, queries listed here will override any specified in a config file.
+ # Prefix the list here with "+" to use these queries and those in the config file.
+
+ # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
+ # queries: security-extended,security-and-quality
+
+
+ # Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift).
+ # If this step fails, then you should remove it and run the build manually (see below)
+ - name: Autobuild
+ uses: github/codeql-action/autobuild@v2
+
+ # ℹ️ Command-line programs to run using the OS shell.
+ # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
+
+ # If the Autobuild fails above, remove it and uncomment the following three lines.
+ # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance.
+
+ # - run: |
+ # echo "Run, Build Application using script"
+ # ./location_of_script_within_repo/buildscript.sh
+
+ - name: Perform CodeQL Analysis
+ uses: github/codeql-action/analyze@v2
+ with:
+ category: "/language:${{matrix.language}}"
diff --git a/.github/workflows/core.yml b/.github/workflows/core.yml
new file mode 100644
index 0000000..ba86306
--- /dev/null
+++ b/.github/workflows/core.yml
@@ -0,0 +1,61 @@
+name: Core Tests
+on:
+ push:
+ paths:
+ - .github/**
+ - devscripts/**
+ - test/**
+ - yt_dlp/**.py
+ - '!yt_dlp/extractor/*.py'
+ - yt_dlp/extractor/__init__.py
+ - yt_dlp/extractor/common.py
+ - yt_dlp/extractor/extractors.py
+ pull_request:
+ paths:
+ - .github/**
+ - devscripts/**
+ - test/**
+ - yt_dlp/**.py
+ - '!yt_dlp/extractor/*.py'
+ - yt_dlp/extractor/__init__.py
+ - yt_dlp/extractor/common.py
+ - yt_dlp/extractor/extractors.py
+permissions:
+ contents: read
+
+concurrency:
+ group: core-${{ github.event.pull_request.number || github.ref }}
+ cancel-in-progress: ${{ github.event_name == 'pull_request' }}
+
+jobs:
+ tests:
+ name: Core Tests
+ if: "!contains(github.event.head_commit.message, 'ci skip')"
+ runs-on: ${{ matrix.os }}
+ strategy:
+ fail-fast: false
+ matrix:
+ os: [ubuntu-latest]
+ # CPython 3.8 is in quick-test
+ python-version: ['3.9', '3.10', '3.11', '3.12', pypy-3.8, pypy-3.10]
+ include:
+ # atleast one of each CPython/PyPy tests must be in windows
+ - os: windows-latest
+ python-version: '3.8'
+ - os: windows-latest
+ python-version: '3.12'
+ - os: windows-latest
+ python-version: pypy-3.9
+ steps:
+ - uses: actions/checkout@v4
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v5
+ with:
+ python-version: ${{ matrix.python-version }}
+ - name: Install test requirements
+ run: python3 ./devscripts/install_deps.py --include dev
+ - name: Run tests
+ continue-on-error: False
+ run: |
+ python3 -m yt_dlp -v || true # Print debug head
+ python3 ./devscripts/run_tests.py core
diff --git a/.github/workflows/download.yml b/.github/workflows/download.yml
new file mode 100644
index 0000000..7256804
--- /dev/null
+++ b/.github/workflows/download.yml
@@ -0,0 +1,48 @@
+name: Download Tests
+on: [push, pull_request]
+permissions:
+ contents: read
+
+jobs:
+ quick:
+ name: Quick Download Tests
+ if: "contains(github.event.head_commit.message, 'ci run dl')"
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: 3.9
+ - name: Install test requirements
+ run: python3 ./devscripts/install_deps.py --include dev
+ - name: Run tests
+ continue-on-error: true
+ run: python3 ./devscripts/run_tests.py download
+
+ full:
+ name: Full Download Tests
+ if: "contains(github.event.head_commit.message, 'ci run dl all')"
+ runs-on: ${{ matrix.os }}
+ strategy:
+ fail-fast: true
+ matrix:
+ os: [ubuntu-latest]
+ python-version: ['3.10', '3.11', '3.12', pypy-3.8, pypy-3.10]
+ include:
+ # atleast one of each CPython/PyPy tests must be in windows
+ - os: windows-latest
+ python-version: '3.8'
+ - os: windows-latest
+ python-version: pypy-3.9
+ steps:
+ - uses: actions/checkout@v4
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v5
+ with:
+ python-version: ${{ matrix.python-version }}
+ - name: Install test requirements
+ run: python3 ./devscripts/install_deps.py --include dev
+ - name: Run tests
+ continue-on-error: true
+ run: python3 ./devscripts/run_tests.py download
diff --git a/.github/workflows/quick-test.yml b/.github/workflows/quick-test.yml
new file mode 100644
index 0000000..3114e7b
--- /dev/null
+++ b/.github/workflows/quick-test.yml
@@ -0,0 +1,35 @@
+name: Quick Test
+on: [push, pull_request]
+permissions:
+ contents: read
+
+jobs:
+ tests:
+ name: Core Test
+ if: "!contains(github.event.head_commit.message, 'ci skip all')"
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ - name: Set up Python 3.8
+ uses: actions/setup-python@v5
+ with:
+ python-version: '3.8'
+ - name: Install test requirements
+ run: python3 ./devscripts/install_deps.py --include dev
+ - name: Run tests
+ run: |
+ python3 -m yt_dlp -v || true
+ python3 ./devscripts/run_tests.py core
+ flake8:
+ name: Linter
+ if: "!contains(github.event.head_commit.message, 'ci skip all')"
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ - uses: actions/setup-python@v5
+ - name: Install flake8
+ run: python3 ./devscripts/install_deps.py -o --include dev
+ - name: Make lazy extractors
+ run: python3 ./devscripts/make_lazy_extractors.py
+ - name: Run flake8
+ run: flake8 .
diff --git a/.github/workflows/release-master.yml b/.github/workflows/release-master.yml
new file mode 100644
index 0000000..a845475
--- /dev/null
+++ b/.github/workflows/release-master.yml
@@ -0,0 +1,29 @@
+name: Release (master)
+on:
+ push:
+ branches:
+ - master
+ paths:
+ - "yt_dlp/**.py"
+ - "!yt_dlp/version.py"
+ - "bundle/*.py"
+ - "pyproject.toml"
+ - "Makefile"
+ - ".github/workflows/build.yml"
+concurrency:
+ group: release-master
+permissions:
+ contents: read
+
+jobs:
+ release:
+ if: vars.BUILD_MASTER != ''
+ uses: ./.github/workflows/release.yml
+ with:
+ prerelease: true
+ source: master
+ permissions:
+ contents: write
+ packages: write
+ id-token: write # mandatory for trusted publishing
+ secrets: inherit
diff --git a/.github/workflows/release-nightly.yml b/.github/workflows/release-nightly.yml
new file mode 100644
index 0000000..f459a3a
--- /dev/null
+++ b/.github/workflows/release-nightly.yml
@@ -0,0 +1,42 @@
+name: Release (nightly)
+on:
+ schedule:
+ - cron: '23 23 * * *'
+permissions:
+ contents: read
+
+jobs:
+ check_nightly:
+ if: vars.BUILD_NIGHTLY != ''
+ runs-on: ubuntu-latest
+ outputs:
+ commit: ${{ steps.check_for_new_commits.outputs.commit }}
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+ - name: Check for new commits
+ id: check_for_new_commits
+ run: |
+ relevant_files=(
+ "yt_dlp/*.py"
+ ':!yt_dlp/version.py'
+ "bundle/*.py"
+ "pyproject.toml"
+ "Makefile"
+ ".github/workflows/build.yml"
+ )
+ echo "commit=$(git log --format=%H -1 --since="24 hours ago" -- "${relevant_files[@]}")" | tee "$GITHUB_OUTPUT"
+
+ release:
+ needs: [check_nightly]
+ if: ${{ needs.check_nightly.outputs.commit }}
+ uses: ./.github/workflows/release.yml
+ with:
+ prerelease: true
+ source: nightly
+ permissions:
+ contents: write
+ packages: write
+ id-token: write # mandatory for trusted publishing
+ secrets: inherit
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
new file mode 100644
index 0000000..fd99cec
--- /dev/null
+++ b/.github/workflows/release.yml
@@ -0,0 +1,387 @@
+name: Release
+on:
+ workflow_call:
+ inputs:
+ prerelease:
+ required: false
+ default: true
+ type: boolean
+ source:
+ required: false
+ default: ''
+ type: string
+ target:
+ required: false
+ default: ''
+ type: string
+ version:
+ required: false
+ default: ''
+ type: string
+ workflow_dispatch:
+ inputs:
+ source:
+ description: |
+ SOURCE of this release's updates:
+ channel, repo, tag, or channel/repo@tag
+ (default: <current_repo>)
+ required: false
+ default: ''
+ type: string
+ target:
+ description: |
+ TARGET to publish this release to:
+ channel, tag, or channel@tag
+ (default: <source> if writable else <current_repo>[@source_tag])
+ required: false
+ default: ''
+ type: string
+ version:
+ description: |
+ VERSION: yyyy.mm.dd[.rev] or rev
+ (default: auto-generated)
+ required: false
+ default: ''
+ type: string
+ prerelease:
+ description: Pre-release
+ default: false
+ type: boolean
+
+permissions:
+ contents: read
+
+jobs:
+ prepare:
+ permissions:
+ contents: write
+ runs-on: ubuntu-latest
+ outputs:
+ channel: ${{ steps.setup_variables.outputs.channel }}
+ version: ${{ steps.setup_variables.outputs.version }}
+ target_repo: ${{ steps.setup_variables.outputs.target_repo }}
+ target_repo_token: ${{ steps.setup_variables.outputs.target_repo_token }}
+ target_tag: ${{ steps.setup_variables.outputs.target_tag }}
+ pypi_project: ${{ steps.setup_variables.outputs.pypi_project }}
+ pypi_suffix: ${{ steps.setup_variables.outputs.pypi_suffix }}
+ head_sha: ${{ steps.get_target.outputs.head_sha }}
+
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+
+ - uses: actions/setup-python@v5
+ with:
+ python-version: "3.10"
+
+ - name: Process inputs
+ id: process_inputs
+ run: |
+ cat << EOF
+ ::group::Inputs
+ prerelease=${{ inputs.prerelease }}
+ source=${{ inputs.source }}
+ target=${{ inputs.target }}
+ version=${{ inputs.version }}
+ ::endgroup::
+ EOF
+ IFS='@' read -r source_repo source_tag <<<"${{ inputs.source }}"
+ IFS='@' read -r target_repo target_tag <<<"${{ inputs.target }}"
+ cat << EOF >> "$GITHUB_OUTPUT"
+ source_repo=${source_repo}
+ source_tag=${source_tag}
+ target_repo=${target_repo}
+ target_tag=${target_tag}
+ EOF
+
+ - name: Setup variables
+ id: setup_variables
+ env:
+ source_repo: ${{ steps.process_inputs.outputs.source_repo }}
+ source_tag: ${{ steps.process_inputs.outputs.source_tag }}
+ target_repo: ${{ steps.process_inputs.outputs.target_repo }}
+ target_tag: ${{ steps.process_inputs.outputs.target_tag }}
+ run: |
+ # unholy bash monstrosity (sincere apologies)
+ fallback_token () {
+ if ${{ !secrets.ARCHIVE_REPO_TOKEN }}; then
+ echo "::error::Repository access secret ${target_repo_token^^} not found"
+ exit 1
+ fi
+ target_repo_token=ARCHIVE_REPO_TOKEN
+ return 0
+ }
+
+ source_is_channel=0
+ [[ "${source_repo}" == 'stable' ]] && source_repo='yt-dlp/yt-dlp'
+ if [[ -z "${source_repo}" ]]; then
+ source_repo='${{ github.repository }}'
+ elif [[ '${{ vars[format('{0}_archive_repo', env.source_repo)] }}' ]]; then
+ source_is_channel=1
+ source_channel='${{ vars[format('{0}_archive_repo', env.source_repo)] }}'
+ elif [[ -z "${source_tag}" && "${source_repo}" != */* ]]; then
+ source_tag="${source_repo}"
+ source_repo='${{ github.repository }}'
+ fi
+ resolved_source="${source_repo}"
+ if [[ "${source_tag}" ]]; then
+ resolved_source="${resolved_source}@${source_tag}"
+ elif [[ "${source_repo}" == 'yt-dlp/yt-dlp' ]]; then
+ resolved_source='stable'
+ fi
+
+ revision="${{ (inputs.prerelease || !vars.PUSH_VERSION_COMMIT) && '$(date -u +"%H%M%S")' || '' }}"
+ version="$(
+ python devscripts/update-version.py \
+ -c "${resolved_source}" -r "${{ github.repository }}" ${{ inputs.version || '$revision' }} | \
+ grep -Po "version=\K\d+\.\d+\.\d+(\.\d+)?")"
+
+ if [[ "${target_repo}" ]]; then
+ if [[ -z "${target_tag}" ]]; then
+ if [[ '${{ vars[format('{0}_archive_repo', env.target_repo)] }}' ]]; then
+ target_tag="${source_tag:-${version}}"
+ else
+ target_tag="${target_repo}"
+ target_repo='${{ github.repository }}'
+ fi
+ fi
+ if [[ "${target_repo}" != '${{ github.repository}}' ]]; then
+ target_repo='${{ vars[format('{0}_archive_repo', env.target_repo)] }}'
+ target_repo_token='${{ env.target_repo }}_archive_repo_token'
+ ${{ !!secrets[format('{0}_archive_repo_token', env.target_repo)] }} || fallback_token
+ pypi_project='${{ vars[format('{0}_pypi_project', env.target_repo)] }}'
+ pypi_suffix='${{ vars[format('{0}_pypi_suffix', env.target_repo)] }}'
+ fi
+ else
+ target_tag="${source_tag:-${version}}"
+ if ((source_is_channel)); then
+ target_repo="${source_channel}"
+ target_repo_token='${{ env.source_repo }}_archive_repo_token'
+ ${{ !!secrets[format('{0}_archive_repo_token', env.source_repo)] }} || fallback_token
+ pypi_project='${{ vars[format('{0}_pypi_project', env.source_repo)] }}'
+ pypi_suffix='${{ vars[format('{0}_pypi_suffix', env.source_repo)] }}'
+ else
+ target_repo='${{ github.repository }}'
+ fi
+ fi
+
+ if [[ "${target_repo}" == '${{ github.repository }}' ]] && ${{ !inputs.prerelease }}; then
+ pypi_project='${{ vars.PYPI_PROJECT }}'
+ fi
+
+ echo "::group::Output variables"
+ cat << EOF | tee -a "$GITHUB_OUTPUT"
+ channel=${resolved_source}
+ version=${version}
+ target_repo=${target_repo}
+ target_repo_token=${target_repo_token}
+ target_tag=${target_tag}
+ pypi_project=${pypi_project}
+ pypi_suffix=${pypi_suffix}
+ EOF
+ echo "::endgroup::"
+
+ - name: Update documentation
+ env:
+ version: ${{ steps.setup_variables.outputs.version }}
+ target_repo: ${{ steps.setup_variables.outputs.target_repo }}
+ if: |
+ !inputs.prerelease && env.target_repo == github.repository
+ run: |
+ make doc
+ sed '/### /Q' Changelog.md >> ./CHANGELOG
+ echo '### ${{ env.version }}' >> ./CHANGELOG
+ python ./devscripts/make_changelog.py -vv -c >> ./CHANGELOG
+ echo >> ./CHANGELOG
+ grep -Poz '(?s)### \d+\.\d+\.\d+.+' 'Changelog.md' | head -n -1 >> ./CHANGELOG
+ cat ./CHANGELOG > Changelog.md
+
+ - name: Push to release
+ id: push_release
+ env:
+ version: ${{ steps.setup_variables.outputs.version }}
+ target_repo: ${{ steps.setup_variables.outputs.target_repo }}
+ if: |
+ !inputs.prerelease && env.target_repo == github.repository
+ run: |
+ git config --global user.name "github-actions[bot]"
+ git config --global user.email "41898282+github-actions[bot]@users.noreply.github.com"
+ git add -u
+ git commit -m "Release ${{ env.version }}" \
+ -m "Created by: ${{ github.event.sender.login }}" -m ":ci skip all :ci run dl"
+ git push origin --force ${{ github.event.ref }}:release
+
+ - name: Get target commitish
+ id: get_target
+ run: |
+ echo "head_sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT"
+
+ - name: Update master
+ env:
+ target_repo: ${{ steps.setup_variables.outputs.target_repo }}
+ if: |
+ vars.PUSH_VERSION_COMMIT != '' && !inputs.prerelease && env.target_repo == github.repository
+ run: git push origin ${{ github.event.ref }}
+
+ build:
+ needs: prepare
+ uses: ./.github/workflows/build.yml
+ with:
+ version: ${{ needs.prepare.outputs.version }}
+ channel: ${{ needs.prepare.outputs.channel }}
+ origin: ${{ needs.prepare.outputs.target_repo }}
+ permissions:
+ contents: read
+ packages: write # For package cache
+ secrets:
+ GPG_SIGNING_KEY: ${{ secrets.GPG_SIGNING_KEY }}
+
+ publish_pypi:
+ needs: [prepare, build]
+ if: ${{ needs.prepare.outputs.pypi_project }}
+ runs-on: ubuntu-latest
+ permissions:
+ id-token: write # mandatory for trusted publishing
+
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+ - uses: actions/setup-python@v5
+ with:
+ python-version: "3.10"
+
+ - name: Install Requirements
+ run: |
+ sudo apt -y install pandoc man
+ python devscripts/install_deps.py -o --include build
+
+ - name: Prepare
+ env:
+ version: ${{ needs.prepare.outputs.version }}
+ suffix: ${{ needs.prepare.outputs.pypi_suffix }}
+ channel: ${{ needs.prepare.outputs.channel }}
+ target_repo: ${{ needs.prepare.outputs.target_repo }}
+ pypi_project: ${{ needs.prepare.outputs.pypi_project }}
+ run: |
+ python devscripts/update-version.py -c "${{ env.channel }}" -r "${{ env.target_repo }}" -s "${{ env.suffix }}" "${{ env.version }}"
+ python devscripts/make_lazy_extractors.py
+ sed -i -E '0,/(name = ")[^"]+(")/s//\1${{ env.pypi_project }}\2/' pyproject.toml
+
+ - name: Build
+ run: |
+ rm -rf dist/*
+ make pypi-files
+ printf '%s\n\n' \
+ 'Official repository: <https://github.com/yt-dlp/yt-dlp>' \
+ '**PS**: Some links in this document will not work since this is a copy of the README.md from Github' > ./README.md.new
+ cat ./README.md >> ./README.md.new && mv -f ./README.md.new ./README.md
+ python devscripts/set-variant.py pip -M "You installed yt-dlp with pip or using the wheel from PyPi; Use that to update"
+ make clean-cache
+ python -m build --no-isolation .
+
+ - name: Publish to PyPI
+ uses: pypa/gh-action-pypi-publish@release/v1
+ with:
+ verbose: true
+
+ publish:
+ needs: [prepare, build]
+ permissions:
+ contents: write
+ runs-on: ubuntu-latest
+
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+ - uses: actions/download-artifact@v4
+ with:
+ path: artifact
+ pattern: build-*
+ merge-multiple: true
+ - uses: actions/setup-python@v5
+ with:
+ python-version: "3.10"
+
+ - name: Generate release notes
+ env:
+ head_sha: ${{ needs.prepare.outputs.head_sha }}
+ target_repo: ${{ needs.prepare.outputs.target_repo }}
+ target_tag: ${{ needs.prepare.outputs.target_tag }}
+ run: |
+ printf '%s' \
+ '[![Installation](https://img.shields.io/badge/-Which%20file%20to%20download%3F-white.svg?style=for-the-badge)]' \
+ '(https://github.com/${{ github.repository }}#installation "Installation instructions") ' \
+ '[![Discord](https://img.shields.io/discord/807245652072857610?color=blue&labelColor=555555&label=&logo=discord&style=for-the-badge)]' \
+ '(https://discord.gg/H5MNcFW63r "Discord") ' \
+ '[![Donate](https://img.shields.io/badge/_-Donate-red.svg?logo=githubsponsors&labelColor=555555&style=for-the-badge)]' \
+ '(https://github.com/yt-dlp/yt-dlp/blob/master/Collaborators.md#collaborators "Donate") ' \
+ '[![Documentation](https://img.shields.io/badge/-Docs-brightgreen.svg?style=for-the-badge&logo=GitBook&labelColor=555555)]' \
+ '(https://github.com/${{ github.repository }}' \
+ '${{ env.target_repo == github.repository && format('/tree/{0}', env.target_tag) || '' }}#readme "Documentation") ' \
+ ${{ env.target_repo == 'yt-dlp/yt-dlp' && '\
+ "[![Nightly](https://img.shields.io/badge/Nightly%20builds-purple.svg?style=for-the-badge)]" \
+ "(https://github.com/yt-dlp/yt-dlp-nightly-builds/releases/latest \"Nightly builds\") " \
+ "[![Master](https://img.shields.io/badge/Master%20builds-lightblue.svg?style=for-the-badge)]" \
+ "(https://github.com/yt-dlp/yt-dlp-master-builds/releases/latest \"Master builds\")"' || '' }} > ./RELEASE_NOTES
+ printf '\n\n' >> ./RELEASE_NOTES
+ cat >> ./RELEASE_NOTES << EOF
+ #### A description of the various files are in the [README](https://github.com/${{ github.repository }}#release-files)
+ ---
+ $(python ./devscripts/make_changelog.py -vv --collapsible)
+ EOF
+ printf '%s\n\n' '**This is a pre-release build**' >> ./PRERELEASE_NOTES
+ cat ./RELEASE_NOTES >> ./PRERELEASE_NOTES
+ printf '%s\n\n' 'Generated from: https://github.com/${{ github.repository }}/commit/${{ env.head_sha }}' >> ./ARCHIVE_NOTES
+ cat ./RELEASE_NOTES >> ./ARCHIVE_NOTES
+
+ - name: Publish to archive repo
+ env:
+ GH_TOKEN: ${{ secrets[needs.prepare.outputs.target_repo_token] }}
+ GH_REPO: ${{ needs.prepare.outputs.target_repo }}
+ version: ${{ needs.prepare.outputs.version }}
+ channel: ${{ needs.prepare.outputs.channel }}
+ if: |
+ inputs.prerelease && env.GH_TOKEN != '' && env.GH_REPO != '' && env.GH_REPO != github.repository
+ run: |
+ title="${{ startswith(env.GH_REPO, 'yt-dlp/') && 'yt-dlp ' || '' }}${{ env.channel }}"
+ gh release create \
+ --notes-file ARCHIVE_NOTES \
+ --title "${title} ${{ env.version }}" \
+ ${{ env.version }} \
+ artifact/*
+
+ - name: Prune old release
+ env:
+ GH_TOKEN: ${{ github.token }}
+ version: ${{ needs.prepare.outputs.version }}
+ target_repo: ${{ needs.prepare.outputs.target_repo }}
+ target_tag: ${{ needs.prepare.outputs.target_tag }}
+ if: |
+ env.target_repo == github.repository && env.target_tag != env.version
+ run: |
+ gh release delete --yes --cleanup-tag "${{ env.target_tag }}" || true
+ git tag --delete "${{ env.target_tag }}" || true
+ sleep 5 # Enough time to cover deletion race condition
+
+ - name: Publish release
+ env:
+ GH_TOKEN: ${{ github.token }}
+ version: ${{ needs.prepare.outputs.version }}
+ target_repo: ${{ needs.prepare.outputs.target_repo }}
+ target_tag: ${{ needs.prepare.outputs.target_tag }}
+ head_sha: ${{ needs.prepare.outputs.head_sha }}
+ if: |
+ env.target_repo == github.repository
+ run: |
+ title="${{ github.repository == 'yt-dlp/yt-dlp' && 'yt-dlp ' || '' }}"
+ title+="${{ env.target_tag != env.version && format('{0} ', env.target_tag) || '' }}"
+ gh release create \
+ --notes-file ${{ inputs.prerelease && 'PRERELEASE_NOTES' || 'RELEASE_NOTES' }} \
+ --target ${{ env.head_sha }} \
+ --title "${title}${{ env.version }}" \
+ ${{ inputs.prerelease && '--prerelease' || '' }} \
+ ${{ env.target_tag }} \
+ artifact/*
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..630c2e0
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,128 @@
+# Config
+*.conf
+cookies
+*cookies.txt
+.netrc
+
+# Downloaded
+*.annotations.xml
+*.aria2
+*.description
+*.dump
+*.frag
+*.frag.aria2
+*.frag.urls
+*.info.json
+*.live_chat.json
+*.meta
+*.part*
+*.tmp
+*.temp
+*.unknown_video
+*.ytdl
+.cache/
+
+*.3gp
+*.ape
+*.ass
+*.avi
+*.desktop
+*.f4v
+*.flac
+*.flv
+*.gif
+*.jpeg
+*.jpg
+*.lrc
+*.m4a
+*.m4v
+*.mhtml
+*.mkv
+*.mov
+*.mp3
+*.mp4
+*.mpg
+*.mpga
+*.oga
+*.ogg
+*.opus
+*.png
+*.sbv
+*.srt
+*.ssa
+*.swf
+*.swp
+*.tt
+*.ttml
+*.url
+*.vtt
+*.wav
+*.webloc
+*.webm
+*.webp
+
+# Allow config/media files in testdata
+!test/**
+
+# Python
+*.pyc
+*.pyo
+.pytest_cache
+wine-py2exe/
+py2exe.log
+build/
+dist/
+zip/
+tmp/
+venv/
+.venv/
+completions/
+
+# Misc
+*~
+*.DS_Store
+*.kate-swp
+MANIFEST
+test/local_parameters.json
+.coverage
+cover/
+secrets/
+updates_key.pem
+*.egg-info
+.tox
+*.class
+*.isorted
+*.stackdump
+
+# Generated
+AUTHORS
+README.txt
+.mailmap
+*.1
+*.bash-completion
+*.fish
+*.tar.gz
+*.zsh
+*.spec
+test/testdata/sigs/player-*.js
+
+# Binary
+/youtube-dl
+/youtube-dlc
+/yt-dlp
+yt-dlp.zip
+*.exe
+
+# Text Editor / IDE
+.idea
+*.iml
+.vscode
+*.sublime-*
+*.code-workspace
+
+# Lazy extractors
+*/extractor/lazy_extractors.py
+
+# Plugins
+ytdlp_plugins/
+yt-dlp-plugins
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..c94ec55
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,731 @@
+# CONTRIBUTING TO YT-DLP
+
+- [OPENING AN ISSUE](#opening-an-issue)
+ - [Is the description of the issue itself sufficient?](#is-the-description-of-the-issue-itself-sufficient)
+ - [Are you using the latest version?](#are-you-using-the-latest-version)
+ - [Is the issue already documented?](#is-the-issue-already-documented)
+ - [Why are existing options not enough?](#why-are-existing-options-not-enough)
+ - [Have you read and understood the changes, between youtube-dl and yt-dlp](#have-you-read-and-understood-the-changes-between-youtube-dl-and-yt-dlp)
+ - [Is there enough context in your bug report?](#is-there-enough-context-in-your-bug-report)
+ - [Does the issue involve one problem, and one problem only?](#does-the-issue-involve-one-problem-and-one-problem-only)
+ - [Is anyone going to need the feature?](#is-anyone-going-to-need-the-feature)
+ - [Is your question about yt-dlp?](#is-your-question-about-yt-dlp)
+ - [Are you willing to share account details if needed?](#are-you-willing-to-share-account-details-if-needed)
+ - [Is the website primarily used for piracy](#is-the-website-primarily-used-for-piracy)
+- [DEVELOPER INSTRUCTIONS](#developer-instructions)
+ - [Adding new feature or making overarching changes](#adding-new-feature-or-making-overarching-changes)
+ - [Adding support for a new site](#adding-support-for-a-new-site)
+ - [yt-dlp coding conventions](#yt-dlp-coding-conventions)
+ - [Mandatory and optional metafields](#mandatory-and-optional-metafields)
+ - [Provide fallbacks](#provide-fallbacks)
+ - [Regular expressions](#regular-expressions)
+ - [Long lines policy](#long-lines-policy)
+ - [Quotes](#quotes)
+ - [Inline values](#inline-values)
+ - [Collapse fallbacks](#collapse-fallbacks)
+ - [Trailing parentheses](#trailing-parentheses)
+ - [Use convenience conversion and parsing functions](#use-convenience-conversion-and-parsing-functions)
+ - [My pull request is labeled pending-fixes](#my-pull-request-is-labeled-pending-fixes)
+- [EMBEDDING YT-DLP](README.md#embedding-yt-dlp)
+
+
+
+# OPENING AN ISSUE
+
+Bugs and suggestions should be reported at: [yt-dlp/yt-dlp/issues](https://github.com/yt-dlp/yt-dlp/issues). Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in our [discord server](https://discord.gg/H5MNcFW63r).
+
+**Please include the full output of yt-dlp when run with `-vU`**, i.e. **add** `-vU` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this:
+```
+$ yt-dlp -vU <your command line>
+[debug] Command-line config: ['-v', 'demo.com']
+[debug] Encodings: locale UTF-8, fs utf-8, out utf-8, pref UTF-8
+[debug] yt-dlp version 2021.09.25 (zip)
+[debug] Python version 3.8.10 (CPython 64bit) - Linux-5.4.0-74-generic-x86_64-with-glibc2.29
+[debug] exe versions: ffmpeg 4.2.4, ffprobe 4.2.4
+[debug] Proxy map: {}
+Current Build Hash 25cc412d1d3c0725a1f2f5b7e4682f6fb40e6d15f7024e96f7afd572e9919535
+yt-dlp is up to date (2021.09.25)
+...
+```
+**Do not post screenshots of verbose logs; only plain text is acceptable.**
+
+The output (including the first lines) contains important debugging information. Issues without the full output are often not reproducible and therefore will be closed as `incomplete`.
+
+The templates provided for the Issues, should be completed and **not removed**, this helps aide the resolution of the issue.
+
+Please re-read your issue once again to avoid a couple of common mistakes (you can and should use this as a checklist):
+
+### Is the description of the issue itself sufficient?
+
+We often get issue reports that we cannot really decipher. While in most cases we eventually get the required information after asking back multiple times, this poses an unnecessary drain on our resources.
+
+So please elaborate on what feature you are requesting, or what bug you want to be fixed. Make sure that it's obvious
+
+- What the problem is
+- How it could be fixed
+- How your proposed solution would look like
+
+If your report is shorter than two lines, it is almost certainly missing some of these, which makes it hard for us to respond to it. We're often too polite to close the issue outright, but the missing info makes misinterpretation likely. We often get frustrated by these issues, since the only possible way for us to move forward on them is to ask for clarification over and over.
+
+For bug reports, this means that your report should contain the **complete** output of yt-dlp when called with the `-vU` flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information.
+
+If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--write-pages` and upload the `.dump` files you get [somewhere](https://gist.github.com).
+
+**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `https://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `https://www.youtube.com/`) is *not* an example URL.
+
+### Are you using the latest version?
+
+Before reporting any issue, type `yt-dlp -U`. This should report that you're up-to-date. This goes for feature requests as well.
+
+### Is the issue already documented?
+
+Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/yt-dlp/yt-dlp/search?type=Issues) of this repository. If there is an issue, subscribe to it to be notified when there is any progress. Unless you have something useful to add to the conversation, please refrain from commenting.
+
+Additionally, it is also helpful to see if the issue has already been documented in the [youtube-dl issue tracker](https://github.com/ytdl-org/youtube-dl/issues). If similar issues have already been reported in youtube-dl (but not in our issue tracker), links to them can be included in your issue report here.
+
+### Why are existing options not enough?
+
+Before requesting a new feature, please have a quick peek at [the list of supported options](README.md#usage-and-options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem.
+
+### Have you read and understood the changes, between youtube-dl and yt-dlp
+
+There are many changes between youtube-dl and yt-dlp [(changes to default behavior)](README.md#differences-in-default-behavior), and some of the options available have a different behaviour in yt-dlp, or have been removed all together [(list of changes to options)](README.md#deprecated-options). Make sure you have read and understand the differences in the options and how this may impact your downloads before opening an issue.
+
+### Is there enough context in your bug report?
+
+People want to solve problems, and often think they do us a favor by breaking down their larger problems (e.g. wanting to skip already downloaded files) to a specific request (e.g. requesting us to look whether the file exists before downloading the info page). However, what often happens is that they break down the problem into two steps: One simple, and one impossible (or extremely complicated one).
+
+We are then presented with a very complicated request when the original problem could be solved far easier, e.g. by recording the downloaded video IDs in a separate file. To avoid this, you must include the greater context where it is non-obvious. In particular, every feature request that does not consist of adding support for a new site should contain a use case scenario that explains in what situation the missing feature would be useful.
+
+### Does the issue involve one problem, and one problem only?
+
+Some of our users seem to think there is a limit of issues they can or should open. There is no limit of issues they can or should open. While it may seem appealing to be able to dump all your issues into one ticket, that means that someone who solves one of your issues cannot mark the issue as closed. Typically, reporting a bunch of issues leads to the ticket lingering since nobody wants to attack that behemoth, until someone mercifully splits the issue into multiple ones.
+
+In particular, every site support request issue should only pertain to services at one site (generally under a common domain, but always using the same backend technology). Do not request support for vimeo user videos, White house podcasts, and Google Plus pages in the same issue. Also, make sure that you don't post bug reports alongside feature requests. As a rule of thumb, a feature request does not include outputs of yt-dlp that are not immediately related to the feature at hand. Do not post reports of a network error alongside the request for a new video service.
+
+### Is anyone going to need the feature?
+
+Only post features that you (or an incapacitated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them.
+
+### Is your question about yt-dlp?
+
+Some bug reports are completely unrelated to yt-dlp and relate to a different, or even the reporter's own, application. Please make sure that you are actually using yt-dlp. If you are using a UI for yt-dlp, report the bug to the maintainer of the actual application providing the UI. In general, if you are unable to provide the verbose log, you should not be opening the issue here.
+
+If the issue is with `youtube-dl` (the upstream fork of yt-dlp) and not with yt-dlp, the issue should be raised in the youtube-dl project.
+
+### Are you willing to share account details if needed?
+
+The maintainers and potential contributors of the project often do not have an account for the website you are asking support for. So any developer interested in solving your issue may ask you for account details. It is your personal discretion whether you are willing to share the account in order for the developer to try and solve your issue. However, if you are unwilling or unable to provide details, they obviously cannot work on the issue and it cannot be solved unless some developer who both has an account and is willing/able to contribute decides to solve it.
+
+By sharing an account with anyone, you agree to bear all risks associated with it. The maintainers and yt-dlp can't be held responsible for any misuse of the credentials.
+
+While these steps won't necessarily ensure that no misuse of the account takes place, these are still some good practices to follow.
+
+- Look for people with `Member` (maintainers of the project) or `Contributor` (people who have previously contributed code) tag on their messages.
+- Change the password before sharing the account to something random (use [this](https://passwordsgenerator.net/) if you don't have a random password generator).
+- Change the password after receiving the account back.
+
+### Is the website primarily used for piracy?
+
+We follow [youtube-dl's policy](https://github.com/ytdl-org/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free) to not support services that is primarily used for infringing copyright. Additionally, it has been decided to not to support porn sites that specialize in fakes. We also cannot support any service that serves only [DRM protected content](https://en.wikipedia.org/wiki/Digital_rights_management).
+
+
+
+
+# DEVELOPER INSTRUCTIONS
+
+Most users do not need to build yt-dlp and can [download the builds](https://github.com/yt-dlp/yt-dlp/releases) or get them via [the other installation methods](README.md#installation).
+
+To run yt-dlp as a developer, you don't need to build anything either. Simply execute
+
+ python3 -m yt_dlp
+
+To run all the available core tests, use:
+
+ python3 devscripts/run_tests.py
+
+See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases.
+
+If you want to create a build of yt-dlp yourself, you can follow the instructions [here](README.md#compile).
+
+
+## Adding new feature or making overarching changes
+
+Before you start writing code for implementing a new feature, open an issue explaining your feature request and at least one use case. This allows the maintainers to decide whether such a feature is desired for the project in the first place, and will provide an avenue to discuss some implementation details. If you open a pull request for a new feature without discussing with us first, do not be surprised when we ask for large changes to the code, or even reject it outright.
+
+The same applies for changes to the documentation, code style, or overarching changes to the architecture
+
+
+## Adding support for a new site
+
+If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](#is-the-website-primarily-used-for-piracy)**. yt-dlp does **not support** such sites thus pull requests adding support for them **will be rejected**.
+
+After you have ensured this site is distributing its content legally, you can follow this quick list (assuming your service is called `yourextractor`):
+
+1. [Fork this repository](https://github.com/yt-dlp/yt-dlp/fork)
+1. Check out the source code with:
+
+ git clone git@github.com:YOUR_GITHUB_USERNAME/yt-dlp.git
+
+1. Start a new git branch with
+
+ cd yt-dlp
+ git checkout -b yourextractor
+
+1. Start with this simple template and save it to `yt_dlp/extractor/yourextractor.py`:
+
+ ```python
+ from .common import InfoExtractor
+
+
+ class YourExtractorIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://yourextractor.com/watch/42',
+ 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)',
+ 'info_dict': {
+ # For videos, only the 'id' and 'ext' fields are required to RUN the test:
+ 'id': '42',
+ 'ext': 'mp4',
+ # Then if the test run fails, it will output the missing/incorrect fields.
+ # Properties can be added as:
+ # * A value, e.g.
+ # 'title': 'Video title goes here',
+ # * MD5 checksum; start the string with 'md5:', e.g.
+ # 'description': 'md5:098f6bcd4621d373cade4e832627b4f6',
+ # * A regular expression; start the string with 're:', e.g.
+ # 'thumbnail': r're:^https?://.*\.jpg$',
+ # * A count of elements in a list; start the string with 'count:', e.g.
+ # 'tags': 'count:10',
+ # * Any Python type, e.g.
+ # 'view_count': int,
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ # TODO more code goes here, for example ...
+ title = self._html_search_regex(r'<h1>(.+?)</h1>', webpage, 'title')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': self._og_search_description(webpage),
+ 'uploader': self._search_regex(r'<div[^>]+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False),
+ # TODO more properties (see yt_dlp/extractor/common.py)
+ }
+ ```
+1. Add an import in [`yt_dlp/extractor/_extractors.py`](yt_dlp/extractor/_extractors.py). Note that the class name must end with `IE`.
+1. Run `python3 devscripts/run_tests.py YourExtractor`. This *may fail* at first, but you can continually re-run it until you're done. Upon failure, it will output the missing fields and/or correct values which you can copy. If you decide to add more than one test, the tests will then be named `YourExtractor`, `YourExtractor_1`, `YourExtractor_2`, etc. Note that tests with an `only_matching` key in the test's dict are not included in the count. You can also run all the tests in one go with `YourExtractor_all`
+1. Make sure you have at least one test for your extractor. Even if all videos covered by the extractor are expected to be inaccessible for automated testing, tests should still be added with a `skip` parameter indicating why the particular test is disabled from running.
+1. Have a look at [`yt_dlp/extractor/common.py`](yt_dlp/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](yt_dlp/extractor/common.py#L119-L440). Add tests and code for as many as you want.
+1. Make sure your code follows [yt-dlp coding conventions](#yt-dlp-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart):
+
+ $ flake8 yt_dlp/extractor/yourextractor.py
+
+1. Make sure your code works under all [Python](https://www.python.org/) versions supported by yt-dlp, namely CPython and PyPy for Python 3.8 and above. Backward compatibility is not required for even older versions of Python.
+1. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files, [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
+
+ $ git add yt_dlp/extractor/_extractors.py
+ $ git add yt_dlp/extractor/yourextractor.py
+ $ git commit -m '[yourextractor] Add extractor'
+ $ git push origin yourextractor
+
+1. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
+
+In any case, thank you very much for your contributions!
+
+**Tip:** To test extractors that require login information, create a file `test/local_parameters.json` and add `"usenetrc": true` or your `username`&`password` or `cookiefile`/`cookiesfrombrowser` in it:
+```json
+{
+ "username": "your user name",
+ "password": "your password"
+}
+```
+
+## yt-dlp coding conventions
+
+This section introduces a guide lines for writing idiomatic, robust and future-proof extractor code.
+
+Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hosters out of your control and this layout tends to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize dependency on the source's layout and even to make the code foresee potential future changes and be ready for that. This is important because it will allow the extractor not to break on minor layout changes thus keeping old yt-dlp versions working. Even though this breakage issue may be easily fixed by a new version of yt-dlp, this could take some time, during which the extractor will remain broken.
+
+
+### Mandatory and optional metafields
+
+For extraction to work yt-dlp relies on metadata your extractor extracts and provides to yt-dlp expressed by an [information dictionary](yt_dlp/extractor/common.py#L119-L440) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by yt-dlp:
+
+ - `id` (media identifier)
+ - `title` (media title)
+ - `url` (media download URL) or `formats`
+
+The aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken. While all extractors must return a `title`, they must also allow it's extraction to be non-fatal.
+
+For pornographic sites, appropriate `age_limit` must also be returned.
+
+The extractor is allowed to return the info dict without url or formats in some special cases if it allows the user to extract useful information with `--ignore-no-formats-error` - e.g. when the video is a live stream that has not started yet.
+
+[Any field](yt_dlp/extractor/common.py#219-L426) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
+
+#### Example
+
+Say you have some source dictionary `meta` that you've fetched as JSON with HTTP request and it has a key `summary`:
+
+```python
+meta = self._download_json(url, video_id)
+```
+
+Assume at this point `meta`'s layout is:
+
+```python
+{
+ "summary": "some fancy summary text",
+ "user": {
+ "name": "uploader name"
+ },
+ ...
+}
+```
+
+Assume you want to extract `summary` and put it into the resulting info dict as `description`. Since `description` is an optional meta field you should be ready that this key may be missing from the `meta` dict, so that you should extract it like:
+
+```python
+description = meta.get('summary') # correct
+```
+
+and not like:
+
+```python
+description = meta['summary'] # incorrect
+```
+
+The latter will break extraction process with `KeyError` if `summary` disappears from `meta` at some later time but with the former approach extraction will just go ahead with `description` set to `None` which is perfectly fine (remember `None` is equivalent to the absence of data).
+
+
+If the data is nested, do not use `.get` chains, but instead make use of `traverse_obj`.
+
+Considering the above `meta` again, assume you want to extract `["user"]["name"]` and put it in the resulting info dict as `uploader`
+
+```python
+uploader = traverse_obj(meta, ('user', 'name')) # correct
+```
+
+and not like:
+
+```python
+uploader = meta['user']['name'] # incorrect
+```
+or
+```python
+uploader = meta.get('user', {}).get('name') # incorrect
+```
+or
+```python
+uploader = try_get(meta, lambda x: x['user']['name']) # old utility
+```
+
+
+Similarly, you should pass `fatal=False` when extracting optional data from a webpage with `_search_regex`, `_html_search_regex` or similar methods, for instance:
+
+```python
+description = self._search_regex(
+ r'<span[^>]+id="title"[^>]*>([^<]+)<',
+ webpage, 'description', fatal=False)
+```
+
+With `fatal` set to `False` if `_search_regex` fails to extract `description` it will emit a warning and continue extraction.
+
+You can also pass `default=<some fallback value>`, for example:
+
+```python
+description = self._search_regex(
+ r'<span[^>]+id="title"[^>]*>([^<]+)<',
+ webpage, 'description', default=None)
+```
+
+On failure this code will silently continue the extraction with `description` set to `None`. That is useful for metafields that may or may not be present.
+
+
+Another thing to remember is not to try to iterate over `None`
+
+Say you extracted a list of thumbnails into `thumbnail_data` and want to iterate over them
+
+```python
+thumbnail_data = data.get('thumbnails') or []
+thumbnails = [{
+ 'url': item['url'],
+ 'height': item.get('h'),
+} for item in thumbnail_data if item.get('url')] # correct
+```
+
+and not like:
+
+```python
+thumbnail_data = data.get('thumbnails')
+thumbnails = [{
+ 'url': item['url'],
+ 'height': item.get('h'),
+} for item in thumbnail_data] # incorrect
+```
+
+In this case, `thumbnail_data` will be `None` if the field was not found and this will cause the loop `for item in thumbnail_data` to raise a fatal error. Using `or []` avoids this error and results in setting an empty list in `thumbnails` instead.
+
+Alternately, this can be further simplified by using `traverse_obj`
+
+```python
+thumbnails = [{
+ 'url': item['url'],
+ 'height': item.get('h'),
+} for item in traverse_obj(data, ('thumbnails', lambda _, v: v['url']))]
+```
+
+or, even better,
+
+```python
+thumbnails = traverse_obj(data, ('thumbnails', ..., {'url': 'url', 'height': 'h'}))
+```
+
+### Provide fallbacks
+
+When extracting metadata try to do so from multiple sources. For example if `title` is present in several places, try extracting from at least some of them. This makes it more future-proof in case some of the sources become unavailable.
+
+
+#### Example
+
+Say `meta` from the previous example has a `title` and you are about to extract it like:
+
+```python
+title = meta.get('title')
+```
+
+If `title` disappears from `meta` in future due to some changes on the hoster's side the title extraction would fail.
+
+Assume that you have some another source you can extract `title` from, for example `og:title` HTML meta of a `webpage`. In this case you can provide a fallback like:
+
+```python
+title = meta.get('title') or self._og_search_title(webpage)
+```
+
+This code will try to extract from `meta` first and if it fails it will try extracting `og:title` from a `webpage`, making the extractor more robust.
+
+
+### Regular expressions
+
+#### Don't capture groups you don't use
+
+Capturing group must be an indication that it's used somewhere in the code. Any group that is not used must be non capturing.
+
+##### Example
+
+Don't capture id attribute name here since you can't use it for anything anyway.
+
+Correct:
+
+```python
+r'(?:id|ID)=(?P<id>\d+)'
+```
+
+Incorrect:
+```python
+r'(id|ID)=(?P<id>\d+)'
+```
+
+#### Make regular expressions relaxed and flexible
+
+When using regular expressions try to write them fuzzy, relaxed and flexible, skipping insignificant parts that are more likely to change, allowing both single and double quotes for quoted values and so on.
+
+##### Example
+
+Say you need to extract `title` from the following HTML code:
+
+```html
+<span style="position: absolute; left: 910px; width: 90px; float: right; z-index: 9999;" class="title">some fancy title</span>
+```
+
+The code for that task should look similar to:
+
+```python
+title = self._search_regex( # correct
+ r'<span[^>]+class="title"[^>]*>([^<]+)', webpage, 'title')
+```
+
+which tolerates potential changes in the `style` attribute's value. Or even better:
+
+```python
+title = self._search_regex( # correct
+ r'<span[^>]+class=(["\'])title\1[^>]*>(?P<title>[^<]+)',
+ webpage, 'title', group='title')
+```
+
+which also handles both single quotes in addition to double quotes.
+
+The code definitely should not look like:
+
+```python
+title = self._search_regex( # incorrect
+ r'<span style="position: absolute; left: 910px; width: 90px; float: right; z-index: 9999;" class="title">(.*?)</span>',
+ webpage, 'title', group='title')
+```
+
+or even
+
+```python
+title = self._search_regex( # incorrect
+ r'<span style=".*?" class="title">(.*?)</span>',
+ webpage, 'title', group='title')
+```
+
+Here the presence or absence of other attributes including `style` is irrelevant for the data we need, and so the regex must not depend on it
+
+
+#### Keep the regular expressions as simple as possible, but no simpler
+
+Since many extractors deal with unstructured data provided by websites, we will often need to use very complex regular expressions. You should try to use the *simplest* regex that can accomplish what you want. In other words, each part of the regex must have a reason for existing. If you can take out a symbol and the functionality does not change, the symbol should not be there.
+
+##### Example
+
+Correct:
+
+```python
+_VALID_URL = r'https?://(?:www\.)?website\.com/(?:[^/]+/){3,4}(?P<display_id>[^/]+)_(?P<id>\d+)'
+```
+
+Incorrect:
+
+```python
+_VALID_URL = r'https?:\/\/(?:www\.)?website\.com\/[^\/]+/[^\/]+/[^\/]+(?:\/[^\/]+)?\/(?P<display_id>[^\/]+)_(?P<id>\d+)'
+```
+
+#### Do not misuse `.` and use the correct quantifiers (`+*?`)
+
+Avoid creating regexes that over-match because of wrong use of quantifiers. Also try to avoid non-greedy matching (`?`) where possible since they could easily result in [catastrophic backtracking](https://www.regular-expressions.info/catastrophic.html)
+
+Correct:
+
+```python
+title = self._search_regex(r'<span\b[^>]+class="title"[^>]*>([^<]+)', webpage, 'title')
+```
+
+Incorrect:
+
+```python
+title = self._search_regex(r'<span\b.*class="title".*>(.+?)<', webpage, 'title')
+```
+
+
+### Long lines policy
+
+There is a soft limit to keep lines of code under 100 characters long. This means it should be respected if possible and if it does not make readability and code maintenance worse. Sometimes, it may be reasonable to go upto 120 characters and sometimes even 80 can be unreadable. Keep in mind that this is not a hard limit and is just one of many tools to make the code more readable.
+
+For example, you should **never** split long string literals like URLs or some other often copied entities over multiple lines to fit this limit:
+
+Conversely, don't unnecessarily split small lines further. As a rule of thumb, if removing the line split keeps the code under 80 characters, it should be a single line.
+
+##### Examples
+
+Correct:
+
+```python
+'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
+```
+
+Incorrect:
+
+```python
+'https://www.youtube.com/watch?v=FqZTN594JQw&list='
+'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
+```
+
+Correct:
+
+```python
+uploader = traverse_obj(info, ('uploader', 'name'), ('author', 'fullname'))
+```
+
+Incorrect:
+
+```python
+uploader = traverse_obj(
+ info,
+ ('uploader', 'name'),
+ ('author', 'fullname'))
+```
+
+Correct:
+
+```python
+formats = self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls',
+ note='Downloading HD m3u8 information', errnote='Unable to download HD m3u8 information')
+```
+
+Incorrect:
+
+```python
+formats = self._extract_m3u8_formats(m3u8_url,
+ video_id,
+ 'mp4',
+ 'm3u8_native',
+ m3u8_id='hls',
+ note='Downloading HD m3u8 information',
+ errnote='Unable to download HD m3u8 information')
+```
+
+
+### Quotes
+
+Always use single quotes for strings (even if the string has `'`) and double quotes for docstrings. Use `'''` only for multi-line strings. An exception can be made if a string has multiple single quotes in it and escaping makes it *significantly* harder to read. For f-strings, use you can use double quotes on the inside. But avoid f-strings that have too many quotes inside.
+
+
+### Inline values
+
+Extracting variables is acceptable for reducing code duplication and improving readability of complex expressions. However, you should avoid extracting variables used only once and moving them to opposite parts of the extractor file, which makes reading the linear flow difficult.
+
+#### Examples
+
+Correct:
+
+```python
+return {
+ 'title': self._html_search_regex(r'<h1>([^<]+)</h1>', webpage, 'title'),
+ # ...some lines of code...
+}
+```
+
+Incorrect:
+
+```python
+TITLE_RE = r'<h1>([^<]+)</h1>'
+# ...some lines of code...
+title = self._html_search_regex(TITLE_RE, webpage, 'title')
+# ...some lines of code...
+return {
+ 'title': title,
+ # ...some lines of code...
+}
+```
+
+
+### Collapse fallbacks
+
+Multiple fallback values can quickly become unwieldy. Collapse multiple fallback values into a single expression via a list of patterns.
+
+#### Example
+
+Good:
+
+```python
+description = self._html_search_meta(
+ ['og:description', 'description', 'twitter:description'],
+ webpage, 'description', default=None)
+```
+
+Unwieldy:
+
+```python
+description = (
+ self._og_search_description(webpage, default=None)
+ or self._html_search_meta('description', webpage, default=None)
+ or self._html_search_meta('twitter:description', webpage, default=None))
+```
+
+Methods supporting list of patterns are: `_search_regex`, `_html_search_regex`, `_og_search_property`, `_html_search_meta`.
+
+
+### Trailing parentheses
+
+Always move trailing parentheses used for grouping/functions after the last argument. On the other hand, multi-line literal list/tuple/dict/set should closed be in a new line. Generators and list/dict comprehensions may use either style
+
+#### Examples
+
+Correct:
+
+```python
+url = traverse_obj(info, (
+ 'context', 'dispatcher', 'stores', 'VideoTitlePageStore', 'data', 'video', 0, 'VideoUrlSet', 'VideoUrl'), list)
+```
+Correct:
+
+```python
+url = traverse_obj(
+ info,
+ ('context', 'dispatcher', 'stores', 'VideoTitlePageStore', 'data', 'video', 0, 'VideoUrlSet', 'VideoUrl'),
+ list)
+```
+
+Incorrect:
+
+```python
+url = traverse_obj(
+ info,
+ ('context', 'dispatcher', 'stores', 'VideoTitlePageStore', 'data', 'video', 0, 'VideoUrlSet', 'VideoUrl'),
+ list
+)
+```
+
+Correct:
+
+```python
+f = {
+ 'url': url,
+ 'format_id': format_id,
+}
+```
+
+Incorrect:
+
+```python
+f = {'url': url,
+ 'format_id': format_id}
+```
+
+Correct:
+
+```python
+formats = [process_formats(f) for f in format_data
+ if f.get('type') in ('hls', 'dash', 'direct') and f.get('downloadable')]
+```
+
+Correct:
+
+```python
+formats = [
+ process_formats(f) for f in format_data
+ if f.get('type') in ('hls', 'dash', 'direct') and f.get('downloadable')
+]
+```
+
+
+### Use convenience conversion and parsing functions
+
+Wrap all extracted numeric data into safe functions from [`yt_dlp/utils/`](yt_dlp/utils/): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
+
+Use `url_or_none` for safe URL processing.
+
+Use `traverse_obj` and `try_call` (superseeds `dict_get` and `try_get`) for safe metadata extraction from parsed JSON.
+
+Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction.
+
+Explore [`yt_dlp/utils/`](yt_dlp/utils/) for more useful convenience functions.
+
+#### Examples
+
+```python
+description = traverse_obj(response, ('result', 'video', 'summary'), expected_type=str)
+thumbnails = traverse_obj(response, ('result', 'thumbnails', ..., 'url'), expected_type=url_or_none)
+video = traverse_obj(response, ('result', 'video', 0), default={}, expected_type=dict)
+duration = float_or_none(video.get('durationMs'), scale=1000)
+view_count = int_or_none(video.get('views'))
+```
+
+
+# My pull request is labeled pending-fixes
+
+The `pending-fixes` label is added when there are changes requested to a PR. When the necessary changes are made, the label should be removed. However, despite our best efforts, it may sometimes happen that the maintainer did not see the changes or forgot to remove the label. If your PR is still marked as `pending-fixes` a few days after all requested changes have been made, feel free to ping the maintainer who labeled your issue and ask them to re-review and remove the label.
+
+
+
+
+# EMBEDDING YT-DLP
+See [README.md#embedding-yt-dlp](README.md#embedding-yt-dlp) for instructions on how to embed yt-dlp in another Python program
diff --git a/CONTRIBUTORS b/CONTRIBUTORS
new file mode 100644
index 0000000..6ee3baa
--- /dev/null
+++ b/CONTRIBUTORS
@@ -0,0 +1,602 @@
+pukkandan (owner)
+shirt-dev (collaborator)
+coletdjnz/colethedj (collaborator)
+Ashish0804 (collaborator)
+bashonly (collaborator)
+Grub4K (collaborator)
+h-h-h-h
+pauldubois98
+nixxo
+GreyAlien502
+kyuyeunk
+siikamiika
+jbruchon
+alexmerkel
+glenn-slayden
+Unrud
+wporr
+mariuszskon
+ohnonot
+samiksome
+alxnull
+FelixFrog
+Zocker1999NET
+kurumigi
+bbepis
+animelover1984/horahoradev
+Pccode66
+RobinD42
+hseg
+DennyDai
+codeasashu
+teesid
+kevinoconnor7
+damianoamatruda
+2ShedsJackson
+CXwudi
+xtkoba
+llacb47
+hheimbuerger
+B0pol
+lkho
+fstirlitz
+Lamieur
+tsukumijima
+Hadi0609
+b5eff52
+craftingmod
+tpikonen
+tripulse
+king-millez
+alex-gedeon
+hhirtz
+louie-github
+MinePlayersPE
+olifre
+rhsmachine/zenerdi0de
+nihil-admirari
+krichbanana
+ohmybahgosh
+nyuszika7h
+blackjack4494
+pyx
+TpmKranz
+mzbaulhaque
+zackmark29
+mbway
+zerodytrash
+wesnm
+pento
+rigstot
+dirkf
+funniray
+Jessecar96
+jhwgh1968
+kikuyan
+max-te
+nchilada
+pgaig
+PSlava
+stdedos
+u-spec-png
+Sipherdrakon
+kidonng
+smege1001
+tandy1000
+IONECarter
+capntrips
+mrfade
+ParadoxGBB
+wlritchi
+NeroBurner
+mahanstreamer
+alerikaisattera
+Derkades
+BunnyHelp
+i6t
+std-move
+Chocobozzz
+ouwou
+korli
+octotherp
+CeruleanSky
+zootedb0t
+chao813
+ChillingPepper
+ConquerorDopy
+dalanmiller
+DigitalDJ
+f4pp3rk1ng
+gesa
+Jules-A
+makeworld-the-better-one
+MKSherbini
+mrx23dot
+poschi3
+raphaeldore
+renalid
+sleaux-meaux
+sulyi
+tmarki
+Vangelis66
+AjaxGb
+ajj8
+jakubadamw
+jfogelman
+timethrow
+sarnoud
+Bojidarist
+18928172992817182/gustaf
+nixklai
+smplayer-dev
+Zirro
+CrypticSignal
+flashdagger
+fractalf
+frafra
+kaz-us
+ozburo
+rhendric
+sdomi
+selfisekai
+stanoarn
+0xA7404A/Aurora
+4a1e2y5
+aarubui
+chio0hai
+cntrl-s
+Deer-Spangle
+DEvmIb
+Grabien/MaximVol
+j54vc1bk
+mpeter50
+mrpapersonic
+pabs3
+staubichsauger
+xenova
+Yakabuff
+zulaport
+ehoogeveen-medweb
+PilzAdam
+zmousm
+iw0nderhow
+unit193
+TwoThousandHedgehogs/KathrynElrod
+Jertzukka
+cypheron
+Hyeeji
+bwildenhain
+C0D3D3V
+kebianizao
+Lapin0t
+abdullah-if
+DavidSkrundz
+mkubecek
+raleeper
+YuenSzeHong
+Sematre
+jaller94
+r5d
+julien-hadleyjack
+git-anony-mouse
+mdawar
+trassshhub
+foghawk
+k3ns1n
+teridon
+mozlima
+timendum
+ischmidt20
+CreaValix
+sian1468
+arkamar
+hyano
+KiberInfinity
+tejing1
+Bricio
+lazypete365
+Aniruddh-J
+blackgear
+CplPwnies
+cyberfox1691
+FestplattenSchnitzel
+hatienl0i261299
+iphoting
+jakeogh
+lukasfink1
+lyz-code
+marieell
+mdpauley
+Mipsters
+mxmehl
+ofkz
+P-reducible
+pycabbage
+regarten
+Ronnnny
+schn0sch
+s0u1h
+MrRawes
+cffswb
+danielyli
+1-Byte
+mehq
+dzek69
+aaearon
+panatexxa
+kmark
+un-def
+goggle
+Soebb
+Fam0r
+bohwaz
+dodrian
+vvto33
+ca-za
+connercsbn
+diegorodriguezv
+ekangmonyet
+elyse0
+evansp
+GiedriusS
+HE7086
+JordanWeatherby
+m4tu4g
+MarwenDallel
+nevack
+putnam
+rand-net
+vertan
+Wikidepia
+Yipten
+moench-tegeder
+christoph-heinrich
+HobbyistDev
+LunarFang416
+sbor23
+aurelg
+adamanldo
+gamer191
+vkorablin
+Burve
+mnn
+ZhymabekRoman
+mozbugbox
+aejdl
+ping
+sqrtNOT
+bubbleguuum
+darkxex
+miseran
+StefanLobbenmeier
+crazymoose77756
+nomevi
+Brett824
+pingiun
+dosy4ev
+EhtishamSabir
+Ferdi265
+FirefoxMetzger
+ftk
+lamby
+llamasblade
+lockmatrix
+misaelaguayo
+odo2063
+pritam20ps05
+scy
+sheerluck
+AxiosDeminence
+DjesonPV
+eren-kemer
+freezboltz
+Galiley
+haobinliang
+Mehavoid
+winterbird-code
+yashkc2025
+aldoridhoni
+jacobtruman
+masta79
+palewire
+cgrigis
+DavidH-2022
+dfaker
+jackyyf
+ohaiibuzzle
+SamantazFox
+shreyasminocha
+tejasa97
+xenov
+satan1st
+0xGodspeed
+5736d79
+587021c
+basrieter
+Bobscorn
+CNugteren
+columndeeply
+DoubleCouponDay
+Fabi019
+GautamMKGarg
+itachi-19
+jeroenj
+josanabr
+LiviaMedeiros
+nikita-moor
+snapdgn
+SuperSonicHub1
+tannertechnology
+Timendum
+tobi1805
+TokyoBlackHole
+ajayyy
+Alienmaster
+bsun0000
+changren-wcr
+ClosedPort22
+CrankDatSouljaBoy
+cruel-efficiency
+endotronic
+Generator
+gibson042
+How-Bout-No
+invertico
+jahway603
+jwoglom
+lksj
+megapro17
+mlampe
+MrOctopus
+nosoop
+puc9
+sashashura
+schnusch
+SG5
+the-marenga
+tkgmomosheep
+vitkhab
+glensc
+synthpop123
+tntmod54321
+milkknife
+Bnyro
+CapacitorSet
+stelcodes
+skbeh
+muddi900
+digitall
+chengzhicn
+mexus
+JChris246
+redraskal
+Spicadox
+barsnick
+docbender
+KurtBestor
+Chrissi2812
+FrederikNS
+gschizas
+JC-Chung
+mzhou
+OndrejBakan
+ab4cbef
+aionescu
+amra
+ByteDream
+carusocr
+chexxor
+felixonmars
+FrankZ85
+FriedrichRehren
+gregsadetsky
+LeoniePhiline
+LowSuggestion912
+Matumo
+OIRNOIR
+OMEGARAZER
+oxamun
+pmitchell86
+qbnu
+qulaz
+rebane2001
+road-master
+rohieb
+sdht0
+seproDev
+Hill-98
+LXYan2333
+mushbite
+venkata-krishnas
+7vlad7
+alexklapheke
+arobase-che
+bepvte
+bergoid
+blmarket
+brandon-dacrib
+c-basalt
+CoryTibbettsDev
+Cyberes
+D0LLYNH0
+danog
+DataGhost
+falbrechtskirchinger
+foreignBlade
+garret1317
+hasezoey
+hoaluvn
+ItzMaxTV
+ivanskodje
+jo-nike
+kangalio
+linsui
+makew0rld
+menschel
+mikf
+mrscrapy
+NDagestad
+Neurognostic
+NextFire
+nick-cd
+permunkle
+pzhlkj6612
+ringus1
+rjy
+Schmoaaaaah
+sjthespian
+theperfectpunk
+toomyzoom
+truedread
+TxI5
+unbeatable-101
+vampirefrog
+vidiot720
+viktor-enzell
+zhgwn
+barthelmannk
+berkanteber
+OverlordQ
+rexlambert22
+Ti4eeT4e
+AmanSal1
+bbilly1
+meliber
+nnoboa
+rdamas
+RfadnjdExt
+urectanc
+nao20010128nao/Lesmiscore
+04-pasha-04
+aaruni96
+aky-01
+AmirAflak
+ApoorvShah111
+at-wat
+davinkevin
+demon071
+denhotte
+FinnRG
+fireattack
+Frankgoji
+GD-Slime
+hatsomatt
+ifan-t
+kshitiz305
+kylegustavo
+mabdelfattah
+nathantouze
+niemands
+Rajeshwaran2001
+RedDeffender
+Rohxn16
+sb0stn
+SevenLives
+simon300000
+snixon
+soundchaser128
+szabyg
+trainman261
+trislee
+wader
+Yalab7
+zhallgato
+zhong-yiyu
+Zprokkel
+AS6939
+drzraf
+handlerug
+jiru
+madewokherd
+xofe
+awalgarg
+midnightveil
+naginatana
+Riteo
+1100101
+aniolpages
+bartbroere
+CrendKing
+Esokrates
+HitomaruKonpaku
+LoserFox
+peci1
+saintliao
+shubhexists
+SirElderling
+almx
+elivinsky
+starius
+TravisDupes
+amir16yp
+Fymyte
+Ganesh910
+hashFactory
+kclauhk
+Kyraminol
+lstrojny
+middlingphys
+NickCis
+nicodato
+prettykool
+S-Aarab
+sonmezberkay
+TSRBerry
+114514ns
+agibson-fl
+alard
+alien-developers
+antonkesy
+ArnauvGilotra
+Arthurszzz
+Bibhav48
+Bl4Cc4t
+boredzo
+Caesim404
+chkuendig
+chtk
+Danish-H
+dasidiot
+diman8
+divStar
+DmitryScaletta
+feederbox826
+gmes78
+gonzalezjo
+hui1601
+infanf
+jazz1611
+jingtra
+jkmartindale
+johnvictorfs
+llistochek
+marcdumais
+martinxyz
+michal-repo
+mrmedieval
+nbr23
+Nicals
+Noor-5
+NurTasin
+pompos02
+Pranaxcau
+pwaldhauer
+RaduManole
+RalphORama
+rrgomes
+ruiminggu
+rvsit
+sefidel
+shmohawk
+Snack-X
+src-tinkerer
+stilor
+syntaxsurge
+t-nil
+ufukk
+vista-narvas
+x11x
+xpadev-net
+Xpl0itU
+YoshichikaAAA
+zhijinwuu
diff --git a/Changelog.md b/Changelog.md
new file mode 100644
index 0000000..45a9cef
--- /dev/null
+++ b/Changelog.md
@@ -0,0 +1,4280 @@
+# Changelog
+
+<!--
+# To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master
+-->
+
+### 2024.03.10
+
+#### Core changes
+- [Add `--compat-options 2023`](https://github.com/yt-dlp/yt-dlp/commit/3725b4f0c93ca3943e6300013a9670e4ab757fda) ([#9084](https://github.com/yt-dlp/yt-dlp/issues/9084)) by [Grub4K](https://github.com/Grub4K) (With fixes in [ffff1bc](https://github.com/yt-dlp/yt-dlp/commit/ffff1bc6598fc7a9258e51bc153cab812467f9f9) by [pukkandan](https://github.com/pukkandan))
+- [Create `ydl._request_director` when needed](https://github.com/yt-dlp/yt-dlp/commit/069b2aedae2279668b6051627a81fc4fbd9c146a) by [pukkandan](https://github.com/pukkandan) (With fixes in [dbd8b1b](https://github.com/yt-dlp/yt-dlp/commit/dbd8b1bff9afd8f05f982bcd52c20bc173c266ca) by [Grub4k](https://github.com/Grub4k))
+- [Don't select storyboard formats as fallback](https://github.com/yt-dlp/yt-dlp/commit/d63eae7e7ffb1f3e733e552b9e5e82355bfba214) by [bashonly](https://github.com/bashonly)
+- [Handle `--load-info-json` format selection errors](https://github.com/yt-dlp/yt-dlp/commit/263a4b55ac17a796e8991ca8d2d86a3c349f8a60) ([#9392](https://github.com/yt-dlp/yt-dlp/issues/9392)) by [bashonly](https://github.com/bashonly)
+- [Warn user when not launching through shell on Windows](https://github.com/yt-dlp/yt-dlp/commit/6a6cdcd1824a14e3b336332c8f31f65497b8c4b8) ([#9250](https://github.com/yt-dlp/yt-dlp/issues/9250)) by [Grub4K](https://github.com/Grub4K), [seproDev](https://github.com/seproDev)
+- **cookies**
+ - [Fix `--cookies-from-browser` for `snap` Firefox](https://github.com/yt-dlp/yt-dlp/commit/cbed249aaa053a3f425b9bafc97f8dbd71c44487) ([#9016](https://github.com/yt-dlp/yt-dlp/issues/9016)) by [Grub4K](https://github.com/Grub4K)
+ - [Fix `--cookies-from-browser` with macOS Firefox profiles](https://github.com/yt-dlp/yt-dlp/commit/85b33f5c163f60dbd089a6b9bc2ba1366d3ddf93) ([#8909](https://github.com/yt-dlp/yt-dlp/issues/8909)) by [RalphORama](https://github.com/RalphORama)
+ - [Improve error message for Windows `--cookies-from-browser chrome` issue](https://github.com/yt-dlp/yt-dlp/commit/2792092afd367e39251ace1fb2819c855ab8919f) ([#9080](https://github.com/yt-dlp/yt-dlp/issues/9080)) by [Grub4K](https://github.com/Grub4K)
+- **plugins**: [Handle `PermissionError`](https://github.com/yt-dlp/yt-dlp/commit/9a8afadd172b7cab143f0049959fa64973589d94) ([#9229](https://github.com/yt-dlp/yt-dlp/issues/9229)) by [pukkandan](https://github.com/pukkandan), [syntaxsurge](https://github.com/syntaxsurge)
+- **utils**
+ - [Improve `repr` of `DateRange`, `match_filter_func`](https://github.com/yt-dlp/yt-dlp/commit/45491a2a30da4d1723cfa9288cb664813bb09afb) by [pukkandan](https://github.com/pukkandan)
+ - `traverse_obj`: [Support `xml.etree.ElementTree.Element`](https://github.com/yt-dlp/yt-dlp/commit/ffbd4f2a02fee387ea5e0a267ce32df5259111ac) ([#8911](https://github.com/yt-dlp/yt-dlp/issues/8911)) by [Grub4K](https://github.com/Grub4K)
+- **webvtt**: [Don't parse single fragment files](https://github.com/yt-dlp/yt-dlp/commit/f24e44e8cbd88ce338d52f594a19330f64d38b50) ([#9034](https://github.com/yt-dlp/yt-dlp/issues/9034)) by [seproDev](https://github.com/seproDev)
+
+#### Extractor changes
+- [Migrate commonly plural fields to lists](https://github.com/yt-dlp/yt-dlp/commit/104a7b5a46dc1805157fb4cc11c05876934d37c1) ([#8917](https://github.com/yt-dlp/yt-dlp/issues/8917)) by [llistochek](https://github.com/llistochek), [pukkandan](https://github.com/pukkandan) (With fixes in [b136e2a](https://github.com/yt-dlp/yt-dlp/commit/b136e2af341f7a88028aea4c5cd50efe2fa9b182) by [bashonly](https://github.com/bashonly))
+- [Support multi-period MPD streams](https://github.com/yt-dlp/yt-dlp/commit/4ce57d3b873c2887814cbec03d029533e82f7db5) ([#6654](https://github.com/yt-dlp/yt-dlp/issues/6654)) by [alard](https://github.com/alard), [pukkandan](https://github.com/pukkandan)
+- **abematv**
+ - [Fix extraction with cache](https://github.com/yt-dlp/yt-dlp/commit/c51316f8a69fbd0080f2720777d42ab438e254a3) ([#8895](https://github.com/yt-dlp/yt-dlp/issues/8895)) by [sefidel](https://github.com/sefidel)
+ - [Support login for playlists](https://github.com/yt-dlp/yt-dlp/commit/8226a3818f804478c756cf460baa9bf3a3b062a5) ([#8901](https://github.com/yt-dlp/yt-dlp/issues/8901)) by [sefidel](https://github.com/sefidel)
+- **adn**
+ - [Add support for German site](https://github.com/yt-dlp/yt-dlp/commit/5eb1458be4767385a9bf1d570ff08e46100cbaa2) ([#8708](https://github.com/yt-dlp/yt-dlp/issues/8708)) by [infanf](https://github.com/infanf)
+ - [Improve auth error handling](https://github.com/yt-dlp/yt-dlp/commit/9526b1f179d19f75284eceaa5e0ee381af18cf19) ([#9068](https://github.com/yt-dlp/yt-dlp/issues/9068)) by [infanf](https://github.com/infanf)
+- **aenetworks**: [Rating should be optional for AP extraction](https://github.com/yt-dlp/yt-dlp/commit/014cb5774d7afe624b6eb4e07f7be924b9e5e186) ([#9005](https://github.com/yt-dlp/yt-dlp/issues/9005)) by [agibson-fl](https://github.com/agibson-fl)
+- **altcensored**: channel: [Fix playlist extraction](https://github.com/yt-dlp/yt-dlp/commit/e28e135d6fd6a430fed3e20dfe1a8c8bbc5f9185) ([#9297](https://github.com/yt-dlp/yt-dlp/issues/9297)) by [marcdumais](https://github.com/marcdumais)
+- **amadeustv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/e641aab7a61df7406df60ebfe0c77bd5186b2b41) ([#8744](https://github.com/yt-dlp/yt-dlp/issues/8744)) by [ArnauvGilotra](https://github.com/ArnauvGilotra)
+- **ant1newsgrembed**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/1ed5ee2f045f717e814f84ba461dadc58e712266) ([#9191](https://github.com/yt-dlp/yt-dlp/issues/9191)) by [seproDev](https://github.com/seproDev)
+- **archiveorg**: [Fix format URL encoding](https://github.com/yt-dlp/yt-dlp/commit/3894ab9574748188bbacbd925a3971eda6fa2bb0) ([#9279](https://github.com/yt-dlp/yt-dlp/issues/9279)) by [bashonly](https://github.com/bashonly)
+- **ard**
+ - mediathek
+ - [Revert to using old id](https://github.com/yt-dlp/yt-dlp/commit/b6951271ac014761c9c317b9cecd5e8e139cfa7c) ([#8916](https://github.com/yt-dlp/yt-dlp/issues/8916)) by [Grub4K](https://github.com/Grub4K)
+ - [Support cookies to verify age](https://github.com/yt-dlp/yt-dlp/commit/c099ec9392b0283dde34b290d1a04158ad8eb882) ([#9037](https://github.com/yt-dlp/yt-dlp/issues/9037)) by [StefanLobbenmeier](https://github.com/StefanLobbenmeier)
+- **art19**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/999ea80beb053491089d256104c4188aced3110f) ([#9099](https://github.com/yt-dlp/yt-dlp/issues/9099)) by [seproDev](https://github.com/seproDev)
+- **artetv**: [Separate closed captions](https://github.com/yt-dlp/yt-dlp/commit/393b487a4ea391c44e811505ec98531031d7e81e) ([#8231](https://github.com/yt-dlp/yt-dlp/issues/8231)) by [Nicals](https://github.com/Nicals), [seproDev](https://github.com/seproDev)
+- **asobichannel**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/12f042740550c06552819374e2251deb7a519bab) ([#8700](https://github.com/yt-dlp/yt-dlp/issues/8700)) by [Snack-X](https://github.com/Snack-X)
+- **bigo**: [Fix JSON extraction](https://github.com/yt-dlp/yt-dlp/commit/85a2d07c1f82c2082b568963d1c32ad3fc848f61) ([#8893](https://github.com/yt-dlp/yt-dlp/issues/8893)) by [DmitryScaletta](https://github.com/DmitryScaletta)
+- **bilibili**
+ - [Add referer header and fix metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/1713c882730a928ac344c099874d2093fc2c8b51) ([#8832](https://github.com/yt-dlp/yt-dlp/issues/8832)) by [SirElderling](https://github.com/SirElderling) (With fixes in [f1570ab](https://github.com/yt-dlp/yt-dlp/commit/f1570ab84d5f49564256c620063d2d3e9ed4acf0) by [TobiX](https://github.com/TobiX))
+ - [Support `--no-playlist`](https://github.com/yt-dlp/yt-dlp/commit/e439693f729daf6fb15457baea1bca10ef5da34d) ([#9139](https://github.com/yt-dlp/yt-dlp/issues/9139)) by [c-basalt](https://github.com/c-basalt)
+- **bilibilisearch**: [Set cookie to fix extraction](https://github.com/yt-dlp/yt-dlp/commit/ffa017cfc5973b265c92248546fcf5020dc43eaf) ([#9119](https://github.com/yt-dlp/yt-dlp/issues/9119)) by [c-basalt](https://github.com/c-basalt)
+- **biliintl**: [Fix and improve subtitles extraction](https://github.com/yt-dlp/yt-dlp/commit/cf6413e840476c15e5b166dc2f7cc2a90a4a9aad) ([#7077](https://github.com/yt-dlp/yt-dlp/issues/7077)) by [dirkf](https://github.com/dirkf), [HobbyistDev](https://github.com/HobbyistDev), [itachi-19](https://github.com/itachi-19), [seproDev](https://github.com/seproDev)
+- **boosty**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/540b68298192874c75ad5ee4589bed64d02a7d55) ([#9144](https://github.com/yt-dlp/yt-dlp/issues/9144)) by [un-def](https://github.com/un-def)
+- **ccma**: [Extract 1080p DASH formats](https://github.com/yt-dlp/yt-dlp/commit/4253e3b7f483127bd812bdac02466f4a5b47ff34) ([#9130](https://github.com/yt-dlp/yt-dlp/issues/9130)) by [seproDev](https://github.com/seproDev)
+- **cctv**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/6ad11fef65474bcf70f3a8556850d93c141e44a2) ([#9325](https://github.com/yt-dlp/yt-dlp/issues/9325)) by [src-tinkerer](https://github.com/src-tinkerer)
+- **chzzk**
+ - [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/ba6b0c8261e9f0a6373885736ff90a89dd1fb614) ([#8887](https://github.com/yt-dlp/yt-dlp/issues/8887)) by [DmitryScaletta](https://github.com/DmitryScaletta)
+ - live: [Support `--wait-for-video`](https://github.com/yt-dlp/yt-dlp/commit/804f2366117b7065552a1c3cddb9ec19b688a5c1) ([#9309](https://github.com/yt-dlp/yt-dlp/issues/9309)) by [hui1601](https://github.com/hui1601)
+- **cineverse**: [Detect when login required](https://github.com/yt-dlp/yt-dlp/commit/fc2cc626f07328a6c71b5e21853e4cfa7b1e6256) ([#9081](https://github.com/yt-dlp/yt-dlp/issues/9081)) by [garret1317](https://github.com/garret1317)
+- **cloudflarestream**
+ - [Extract subtitles](https://github.com/yt-dlp/yt-dlp/commit/4d9dc0abe24ad5d9d22a16f40fc61137dcd103f7) ([#9007](https://github.com/yt-dlp/yt-dlp/issues/9007)) by [Bibhav48](https://github.com/Bibhav48)
+ - [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/f3d5face83f948c24bcb91e06d4fa6e8622d7d79) ([#9280](https://github.com/yt-dlp/yt-dlp/issues/9280)) by [bashonly](https://github.com/bashonly)
+ - [Improve embed detection](https://github.com/yt-dlp/yt-dlp/commit/464c919ea82aefdf35f138a1ab2dd0bb8fb7fd0e) ([#9287](https://github.com/yt-dlp/yt-dlp/issues/9287)) by [bashonly](https://github.com/bashonly)
+- **cloudycdn, lsm**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/5dda3b291f59f388f953337e9fb09a94b64aaf34) ([#8643](https://github.com/yt-dlp/yt-dlp/issues/8643)) by [Caesim404](https://github.com/Caesim404)
+- **cnbc**: [Overhaul extractors](https://github.com/yt-dlp/yt-dlp/commit/998dffb5a2343ec709b3d6bbf2bf019649080239) ([#8741](https://github.com/yt-dlp/yt-dlp/issues/8741)) by [gonzalezjo](https://github.com/gonzalezjo), [Noor-5](https://github.com/Noor-5), [ruiminggu](https://github.com/ruiminggu), [seproDev](https://github.com/seproDev), [zhijinwuu](https://github.com/zhijinwuu)
+- **craftsy**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/96f3924bac174f2fd401f86f78e77d7e0c5ee008) ([#9384](https://github.com/yt-dlp/yt-dlp/issues/9384)) by [bashonly](https://github.com/bashonly)
+- **crooksandliars**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/03536126d32bd861e38536371f0cd5f1b71dcb7a) ([#9192](https://github.com/yt-dlp/yt-dlp/issues/9192)) by [seproDev](https://github.com/seproDev)
+- **crtvg**: [Fix `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/785ab1af7f131e73444634ad57b39478651a43d3) ([#9404](https://github.com/yt-dlp/yt-dlp/issues/9404)) by [Xpl0itU](https://github.com/Xpl0itU)
+- **dailymotion**: [Support search](https://github.com/yt-dlp/yt-dlp/commit/11ffa92a61e5847b3dfa8975f91ecb3ac2178841) ([#8292](https://github.com/yt-dlp/yt-dlp/issues/8292)) by [drzraf](https://github.com/drzraf), [seproDev](https://github.com/seproDev)
+- **douyin**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/9ff946645568e71046487571eefa9cb524a5189b) ([#9239](https://github.com/yt-dlp/yt-dlp/issues/9239)) by [114514ns](https://github.com/114514ns), [bashonly](https://github.com/bashonly) (With fixes in [e546e5d](https://github.com/yt-dlp/yt-dlp/commit/e546e5d3b33a50075e574a2e7b8eda7ea874d21e) by [bashonly](https://github.com/bashonly))
+- **duboku**: [Fix m3u8 formats extraction](https://github.com/yt-dlp/yt-dlp/commit/d3d4187da90a6b85f4ebae4bb07693cc9b412d75) ([#9161](https://github.com/yt-dlp/yt-dlp/issues/9161)) by [DmitryScaletta](https://github.com/DmitryScaletta)
+- **dumpert**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/eedb38ce4093500e19279d50b708fb9c18bf4dbf) ([#9320](https://github.com/yt-dlp/yt-dlp/issues/9320)) by [rvsit](https://github.com/rvsit)
+- **elementorembed**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/6171b050d70435008e64fa06aa6f19c4e5bec75f) ([#8948](https://github.com/yt-dlp/yt-dlp/issues/8948)) by [pompos02](https://github.com/pompos02), [seproDev](https://github.com/seproDev)
+- **eporner**: [Extract AV1 formats](https://github.com/yt-dlp/yt-dlp/commit/96d0f8c1cb8aec250c5614bfde6b5fb95f10819b) ([#9028](https://github.com/yt-dlp/yt-dlp/issues/9028)) by [michal-repo](https://github.com/michal-repo)
+- **errjupiter**
+ - [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/a514cc2feb1c3b265b19acab11487acad8bb3ab0) ([#8549](https://github.com/yt-dlp/yt-dlp/issues/8549)) by [glensc](https://github.com/glensc)
+ - [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/80ed8bdeba5a945f127ef9ab055a4823329a1210) ([#9218](https://github.com/yt-dlp/yt-dlp/issues/9218)) by [glensc](https://github.com/glensc)
+- **facebook**
+ - [Add new ID format](https://github.com/yt-dlp/yt-dlp/commit/cf9af2c7f1fedd881a157b3fbe725e5494b00924) ([#3824](https://github.com/yt-dlp/yt-dlp/issues/3824)) by [kclauhk](https://github.com/kclauhk), [Wikidepia](https://github.com/Wikidepia)
+ - [Improve extraction](https://github.com/yt-dlp/yt-dlp/commit/2e30b5567b5c6113d46b39163db5b044aea8667e) by [jingtra](https://github.com/jingtra), [ringus1](https://github.com/ringus1)
+ - [Improve thumbnail extraction](https://github.com/yt-dlp/yt-dlp/commit/3c4d3ee491b0ec22ed3cade51d943d3d27141ba7) ([#9060](https://github.com/yt-dlp/yt-dlp/issues/9060)) by [kclauhk](https://github.com/kclauhk)
+ - [Set format HTTP chunk size](https://github.com/yt-dlp/yt-dlp/commit/5b68c478fb0b93ea6b8fac23f50e12217fa063db) ([#9058](https://github.com/yt-dlp/yt-dlp/issues/9058)) by [bashonly](https://github.com/bashonly), [kclauhk](https://github.com/kclauhk)
+ - [Support events](https://github.com/yt-dlp/yt-dlp/commit/9b5efaf86b99a2664fff9fc725d275f766c3221d) ([#9055](https://github.com/yt-dlp/yt-dlp/issues/9055)) by [kclauhk](https://github.com/kclauhk)
+ - [Support permalink URLs](https://github.com/yt-dlp/yt-dlp/commit/87286e93af949c4e6a0f8ba34af6a1ab5aa102b6) ([#9061](https://github.com/yt-dlp/yt-dlp/issues/9061)) by [kclauhk](https://github.com/kclauhk)
+ - ads: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/a40b0070c2a00d3ed839897462171a82323aa875) ([#8870](https://github.com/yt-dlp/yt-dlp/issues/8870)) by [kclauhk](https://github.com/kclauhk)
+- **flextv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/4f043479090dc8a7e06e0bb53691e5414320dfb2) ([#9178](https://github.com/yt-dlp/yt-dlp/issues/9178)) by [DmitryScaletta](https://github.com/DmitryScaletta)
+- **floatplane**: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/9cd90447907a59c8a2727583f4a755fb23ed8cd3) ([#8934](https://github.com/yt-dlp/yt-dlp/issues/8934)) by [chtk](https://github.com/chtk)
+- **francetv**
+ - [Fix DAI livestreams](https://github.com/yt-dlp/yt-dlp/commit/e4fbe5f886a6693f2466877c12e99c30c5442ace) ([#9380](https://github.com/yt-dlp/yt-dlp/issues/9380)) by [bashonly](https://github.com/bashonly)
+ - [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/9749ac7fecbfda391afbadf2870797ce0e382622) ([#9333](https://github.com/yt-dlp/yt-dlp/issues/9333)) by [bashonly](https://github.com/bashonly)
+ - [Fix m3u8 formats extraction](https://github.com/yt-dlp/yt-dlp/commit/ede624d1db649f5a4b61f8abbb746f365322de27) ([#9347](https://github.com/yt-dlp/yt-dlp/issues/9347)) by [bashonly](https://github.com/bashonly)
+- **funk**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/cd0443fb14e2ed805abb02792473457553a123d1) ([#9194](https://github.com/yt-dlp/yt-dlp/issues/9194)) by [seproDev](https://github.com/seproDev)
+- **generic**: [Follow https redirects properly](https://github.com/yt-dlp/yt-dlp/commit/c8c9039e640495700f76a13496e3418bdd4382ba) ([#9121](https://github.com/yt-dlp/yt-dlp/issues/9121)) by [seproDev](https://github.com/seproDev)
+- **getcourseru**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/4310b6650eeb5630295f4591b37720877878c57a) ([#8873](https://github.com/yt-dlp/yt-dlp/issues/8873)) by [divStar](https://github.com/divStar), [seproDev](https://github.com/seproDev)
+- **gofile**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/77c2472ca1ef9050a66aa68bc5fa1bee88706c66) ([#9074](https://github.com/yt-dlp/yt-dlp/issues/9074)) by [jazz1611](https://github.com/jazz1611)
+- **googledrive**: [Fix source file extraction](https://github.com/yt-dlp/yt-dlp/commit/5498729c59b03a9511c64552da3ba2f802166f8d) ([#8990](https://github.com/yt-dlp/yt-dlp/issues/8990)) by [jazz1611](https://github.com/jazz1611)
+- **goplay**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/7e90e34fa4617b53f8c8a9e69f460508cb1f51b0) ([#6654](https://github.com/yt-dlp/yt-dlp/issues/6654)) by [alard](https://github.com/alard)
+- **gopro**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/4a07a455bbf7acf87550053bbba949c828e350ba) ([#9019](https://github.com/yt-dlp/yt-dlp/issues/9019)) by [stilor](https://github.com/stilor)
+- **ilpost**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/aa5dcc4ee65916a36cbe1b1b5b29b9110c3163ed) ([#9001](https://github.com/yt-dlp/yt-dlp/issues/9001)) by [CapacitorSet](https://github.com/CapacitorSet)
+- **jiosaavnsong**: [Support more bitrates](https://github.com/yt-dlp/yt-dlp/commit/5154dc0a687528f995cde22b5ff63f82c740e98a) ([#8834](https://github.com/yt-dlp/yt-dlp/issues/8834)) by [alien-developers](https://github.com/alien-developers), [bashonly](https://github.com/bashonly)
+- **kukululive**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/20cdad5a2c0499d5a6746f5466a2ab0c97b75884) ([#8877](https://github.com/yt-dlp/yt-dlp/issues/8877)) by [DmitryScaletta](https://github.com/DmitryScaletta)
+- **lefigarovideoembed**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/9401736fd08767c58af45a1e36ff5929c5fa1ac9) ([#9198](https://github.com/yt-dlp/yt-dlp/issues/9198)) by [seproDev](https://github.com/seproDev)
+- **linkedin**: [Fix metadata and extract subtitles](https://github.com/yt-dlp/yt-dlp/commit/017adb28e7fe7b8c8fc472332d86740f31141519) ([#9056](https://github.com/yt-dlp/yt-dlp/issues/9056)) by [barsnick](https://github.com/barsnick)
+- **magellantv**: [Support episodes](https://github.com/yt-dlp/yt-dlp/commit/3dc9232e1aa58fe3c2d8cafb50e8162d6f0e891e) ([#9199](https://github.com/yt-dlp/yt-dlp/issues/9199)) by [seproDev](https://github.com/seproDev)
+- **magentamusik**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/5e2e24b2c5795756d81785b06b10723ddb6db7b2) ([#7790](https://github.com/yt-dlp/yt-dlp/issues/7790)) by [pwaldhauer](https://github.com/pwaldhauer), [seproDev](https://github.com/seproDev)
+- **medaltv**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/02e343f6ef6d7b3f9087ff69e4a1db0b4b4a5c5d) ([#9098](https://github.com/yt-dlp/yt-dlp/issues/9098)) by [Danish-H](https://github.com/Danish-H)
+- **mlbarticle**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/50e06e21a68e336198198bda332b8e7d2314f201) ([#9021](https://github.com/yt-dlp/yt-dlp/issues/9021)) by [HobbyistDev](https://github.com/HobbyistDev)
+- **motherless**: [Support uploader playlists](https://github.com/yt-dlp/yt-dlp/commit/9f1e9dab21bbe651544c8f4663b0e615dc450e4d) ([#8994](https://github.com/yt-dlp/yt-dlp/issues/8994)) by [dasidiot](https://github.com/dasidiot)
+- **mujrozhlas**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/4170b3d7120e06db3391eef39c5add18a1ddf2c3) ([#9306](https://github.com/yt-dlp/yt-dlp/issues/9306)) by [bashonly](https://github.com/bashonly)
+- **mx3**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/5a63454b3637b3603434026cddfeac509218b90e) ([#8736](https://github.com/yt-dlp/yt-dlp/issues/8736)) by [martinxyz](https://github.com/martinxyz)
+- **naver**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/a281beba8d8f007cf220f96dd1d9412bb070c7d8) ([#8883](https://github.com/yt-dlp/yt-dlp/issues/8883)) by [seproDev](https://github.com/seproDev)
+- **nebula**: [Support podcasts](https://github.com/yt-dlp/yt-dlp/commit/0de09c5b9ed619d4a93d7c451c6ddff0381de808) ([#9140](https://github.com/yt-dlp/yt-dlp/issues/9140)) by [c-basalt](https://github.com/c-basalt), [seproDev](https://github.com/seproDev)
+- **nerdcubedfeed**: [Overhaul extractor](https://github.com/yt-dlp/yt-dlp/commit/29a74a6126101aabaa1726ae41b1ca55cf26e7a7) ([#9269](https://github.com/yt-dlp/yt-dlp/issues/9269)) by [seproDev](https://github.com/seproDev)
+- **newgrounds**
+ - [Fix login and clean up extraction](https://github.com/yt-dlp/yt-dlp/commit/0fcefb92f3ebfc5cada19c1e85a715f020d0f333) ([#9356](https://github.com/yt-dlp/yt-dlp/issues/9356)) by [Grub4K](https://github.com/Grub4K), [mrmedieval](https://github.com/mrmedieval)
+ - user: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/3e083191cdc34dd8c482da9a9b4bc682f824cb9d) ([#9046](https://github.com/yt-dlp/yt-dlp/issues/9046)) by [u-spec-png](https://github.com/u-spec-png)
+- **nfb**: [Add support for onf.ca and series](https://github.com/yt-dlp/yt-dlp/commit/4b8b0dded8c65cd5b2ab2e858058ba98c9bf49ff) ([#8997](https://github.com/yt-dlp/yt-dlp/issues/8997)) by [bashonly](https://github.com/bashonly), [rrgomes](https://github.com/rrgomes)
+- **nhkradiru**: [Extract extended description](https://github.com/yt-dlp/yt-dlp/commit/4392447d9404e3c25cfeb8f5bdfff31b0448da39) ([#9162](https://github.com/yt-dlp/yt-dlp/issues/9162)) by [garret1317](https://github.com/garret1317)
+- **nhkradirulive**: [Make metadata extraction non-fatal](https://github.com/yt-dlp/yt-dlp/commit/5af1f19787f7d652fce72dd3ab9536cdd980fe85) ([#8956](https://github.com/yt-dlp/yt-dlp/issues/8956)) by [garret1317](https://github.com/garret1317)
+- **niconico**
+ - [Remove legacy danmaku extraction](https://github.com/yt-dlp/yt-dlp/commit/974d444039c8bbffb57265c6792cd52d169fe1b9) ([#9209](https://github.com/yt-dlp/yt-dlp/issues/9209)) by [pzhlkj6612](https://github.com/pzhlkj6612)
+ - [Support DMS formats](https://github.com/yt-dlp/yt-dlp/commit/aa13a8e3dd3b698cc40ec438988b1ad834e11a41) ([#9282](https://github.com/yt-dlp/yt-dlp/issues/9282)) by [pzhlkj6612](https://github.com/pzhlkj6612), [xpadev-net](https://github.com/xpadev-net) (With fixes in [40966e8](https://github.com/yt-dlp/yt-dlp/commit/40966e8da27bbf770dacf9be9363fcc3ad72cc9f) by [pzhlkj6612](https://github.com/pzhlkj6612))
+- **ninaprotocol**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/62c65bfaf81e04e6746f6fdbafe384eb3edddfbc) ([#8946](https://github.com/yt-dlp/yt-dlp/issues/8946)) by [RaduManole](https://github.com/RaduManole), [seproDev](https://github.com/seproDev)
+- **ninenews**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/43694ce13c5a9f1afca8b02b8b2b9b1576d6503d) ([#8840](https://github.com/yt-dlp/yt-dlp/issues/8840)) by [SirElderling](https://github.com/SirElderling)
+- **nova**: [Fix embed extraction](https://github.com/yt-dlp/yt-dlp/commit/c168d8791d0974a8a8fcb3b4a4bc2d830df51622) ([#9221](https://github.com/yt-dlp/yt-dlp/issues/9221)) by [seproDev](https://github.com/seproDev)
+- **ntvru**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/7a29cbbd5fd7363e7e8535ee1506b7052465d13f) ([#9276](https://github.com/yt-dlp/yt-dlp/issues/9276)) by [bashonly](https://github.com/bashonly), [dirkf](https://github.com/dirkf)
+- **nuum**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/acaf806c15f0a802ba286c23af02a10cf4bd4731) ([#8868](https://github.com/yt-dlp/yt-dlp/issues/8868)) by [DmitryScaletta](https://github.com/DmitryScaletta), [seproDev](https://github.com/seproDev)
+- **nytimes**
+ - [Extract timestamp](https://github.com/yt-dlp/yt-dlp/commit/05420227aaab60a39c0f9ade069c5862be36b1fa) ([#9142](https://github.com/yt-dlp/yt-dlp/issues/9142)) by [SirElderling](https://github.com/SirElderling)
+ - [Overhaul extractors](https://github.com/yt-dlp/yt-dlp/commit/07256b9fee23960799024b95d5972abc7174aa81) ([#9075](https://github.com/yt-dlp/yt-dlp/issues/9075)) by [SirElderling](https://github.com/SirElderling)
+- **onefootball**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/644738ddaa45428cb0babd41ead22454e5a2545e) ([#9222](https://github.com/yt-dlp/yt-dlp/issues/9222)) by [seproDev](https://github.com/seproDev)
+- **openrec**: [Pass referer for m3u8 formats](https://github.com/yt-dlp/yt-dlp/commit/f591e605dfee4085ec007d6d056c943cbcacc429) ([#9253](https://github.com/yt-dlp/yt-dlp/issues/9253)) by [fireattack](https://github.com/fireattack)
+- **orf**: on: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/a0d50aabc5462aee302bd3f2663d3a3554875789) ([#9113](https://github.com/yt-dlp/yt-dlp/issues/9113)) by [HobbyistDev](https://github.com/HobbyistDev)
+- **patreon**: [Fix embedded HLS extraction](https://github.com/yt-dlp/yt-dlp/commit/f0e8bc7c60b61fe18b63116c975609d76b904771) ([#8993](https://github.com/yt-dlp/yt-dlp/issues/8993)) by [johnvictorfs](https://github.com/johnvictorfs)
+- **peertube**: [Update instances](https://github.com/yt-dlp/yt-dlp/commit/35d96982f1033e36215d323317981ee17e8ab0d5) ([#9070](https://github.com/yt-dlp/yt-dlp/issues/9070)) by [Chocobozzz](https://github.com/Chocobozzz)
+- **piapro**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/8e6e3651727b0b85764857fc6329fe5e0a3f00de) ([#8999](https://github.com/yt-dlp/yt-dlp/issues/8999)) by [FinnRG](https://github.com/FinnRG)
+- **playsuisse**: [Add login support](https://github.com/yt-dlp/yt-dlp/commit/cae6e461073fb7c32fd32052a3e6721447c469bc) ([#9077](https://github.com/yt-dlp/yt-dlp/issues/9077)) by [chkuendig](https://github.com/chkuendig)
+- **pornhub**: [Fix login support](https://github.com/yt-dlp/yt-dlp/commit/de954c1b4d3a6db8a6525507e65303c7bb03f39f) ([#9227](https://github.com/yt-dlp/yt-dlp/issues/9227)) by [feederbox826](https://github.com/feederbox826)
+- **pr0gramm**: [Enable POL filter and provide tags without login](https://github.com/yt-dlp/yt-dlp/commit/5f25f348f9eb5db842b1ec6799f95bebb7ba35a7) ([#9051](https://github.com/yt-dlp/yt-dlp/issues/9051)) by [Grub4K](https://github.com/Grub4K)
+- **prankcastpost**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/a2bac6b7adb7b0e955125838e20bb39eece630ce) ([#8933](https://github.com/yt-dlp/yt-dlp/issues/8933)) by [columndeeply](https://github.com/columndeeply)
+- **radiko**: [Extract more metadata](https://github.com/yt-dlp/yt-dlp/commit/e3ce2b385ec1f03fac9d4210c57fda77134495fc) ([#9115](https://github.com/yt-dlp/yt-dlp/issues/9115)) by [YoshichikaAAA](https://github.com/YoshichikaAAA)
+- **rai**
+ - [Filter unavailable formats](https://github.com/yt-dlp/yt-dlp/commit/f78814923748277e7067b796f25870686fb46205) ([#9189](https://github.com/yt-dlp/yt-dlp/issues/9189)) by [nixxo](https://github.com/nixxo)
+ - [Fix m3u8 formats extraction](https://github.com/yt-dlp/yt-dlp/commit/8f423cf8051fbfeedd57cca00d106012e6e86a97) ([#9291](https://github.com/yt-dlp/yt-dlp/issues/9291)) by [nixxo](https://github.com/nixxo)
+- **redcdnlivx, sejm**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/fcaa2e735b00b15a2b0d9f55f4187c654b4b5b39) ([#8676](https://github.com/yt-dlp/yt-dlp/issues/8676)) by [selfisekai](https://github.com/selfisekai)
+- **redtube**
+ - [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/c91d8b1899403daff6fc15206ad32de8db17fb8f) ([#9076](https://github.com/yt-dlp/yt-dlp/issues/9076)) by [jazz1611](https://github.com/jazz1611)
+ - [Support redtube.com.br URLs](https://github.com/yt-dlp/yt-dlp/commit/4a6ff0b47a700dee3ee5c54804c31965308479ae) ([#9103](https://github.com/yt-dlp/yt-dlp/issues/9103)) by [jazz1611](https://github.com/jazz1611)
+- **ridehome**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/cd7086c0d54ec1d7e02a30bd5bd934bdb2c54642) ([#8875](https://github.com/yt-dlp/yt-dlp/issues/8875)) by [SirElderling](https://github.com/SirElderling)
+- **rinsefmartistplaylist**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/1a36dbad712d359ec1c5b73d9bbbe562c03e9660) ([#8794](https://github.com/yt-dlp/yt-dlp/issues/8794)) by [SirElderling](https://github.com/SirElderling)
+- **roosterteeth**
+ - [Add Brightcove fallback](https://github.com/yt-dlp/yt-dlp/commit/b2cc150ad83ba20ceb2d6e73d09854eed3c2d05c) ([#9403](https://github.com/yt-dlp/yt-dlp/issues/9403)) by [bashonly](https://github.com/bashonly)
+ - [Extract ad-free streams](https://github.com/yt-dlp/yt-dlp/commit/dd29e6e5fdf0f3758cb0829e73749832768f1a4e) ([#9355](https://github.com/yt-dlp/yt-dlp/issues/9355)) by [jkmartindale](https://github.com/jkmartindale)
+ - [Extract release date and timestamp](https://github.com/yt-dlp/yt-dlp/commit/dfd8c0b69683b1c11beea039a96dd2949026c1d7) ([#9393](https://github.com/yt-dlp/yt-dlp/issues/9393)) by [bashonly](https://github.com/bashonly)
+ - [Support bonus features](https://github.com/yt-dlp/yt-dlp/commit/8993721ecb34867b52b79f6e92b233008d1cbe78) ([#9406](https://github.com/yt-dlp/yt-dlp/issues/9406)) by [Bl4Cc4t](https://github.com/Bl4Cc4t)
+- **rule34video**
+ - [Extract `creators`](https://github.com/yt-dlp/yt-dlp/commit/3d9dc2f3590e10abf1561ebdaed96734a740587c) ([#9258](https://github.com/yt-dlp/yt-dlp/issues/9258)) by [gmes78](https://github.com/gmes78)
+ - [Extract more metadata](https://github.com/yt-dlp/yt-dlp/commit/fee2d8d9c38f9b5f0a8df347c1e698983339c34d) ([#7416](https://github.com/yt-dlp/yt-dlp/issues/7416)) by [gmes78](https://github.com/gmes78)
+ - [Fix `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/c0ecceeefe6ebd27452d9d8f20658f83ae121d04) ([#9044](https://github.com/yt-dlp/yt-dlp/issues/9044)) by [gmes78](https://github.com/gmes78)
+- **rumblechannel**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/0023af81fbce01984f35b34ecaf8562739831227) ([#9092](https://github.com/yt-dlp/yt-dlp/issues/9092)) by [Pranaxcau](https://github.com/Pranaxcau), [vista-narvas](https://github.com/vista-narvas)
+- **screencastify**: [Update `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/0bee29493ca8f91a0055a3706c7c94f5860188df) ([#9232](https://github.com/yt-dlp/yt-dlp/issues/9232)) by [seproDev](https://github.com/seproDev)
+- **svtpage**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/ddd4b5e10a653bee78e656107710021c1b82934c) ([#8938](https://github.com/yt-dlp/yt-dlp/issues/8938)) by [diman8](https://github.com/diman8)
+- **swearnet**: [Raise for login required](https://github.com/yt-dlp/yt-dlp/commit/b05640d532c43a52c0a0da096bb2dbd51e105ec0) ([#9281](https://github.com/yt-dlp/yt-dlp/issues/9281)) by [bashonly](https://github.com/bashonly)
+- **tiktok**: [Fix webpage extraction](https://github.com/yt-dlp/yt-dlp/commit/d9b4154cbcb979d7e30af3a73b1bee422aae5aa3) ([#9327](https://github.com/yt-dlp/yt-dlp/issues/9327)) by [bashonly](https://github.com/bashonly)
+- **trtworld**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/8ab84650837e58046430c9f4b615c56a8886e071) ([#8701](https://github.com/yt-dlp/yt-dlp/issues/8701)) by [ufukk](https://github.com/ufukk)
+- **tvp**: [Support livestreams](https://github.com/yt-dlp/yt-dlp/commit/882e3b753c79c7799ce135c3a5edb72494b576af) ([#8860](https://github.com/yt-dlp/yt-dlp/issues/8860)) by [selfisekai](https://github.com/selfisekai)
+- **twitch**: [Fix m3u8 extraction](https://github.com/yt-dlp/yt-dlp/commit/5b8c69ae04444a4c80a5a99917e40f75a116c3b8) ([#8960](https://github.com/yt-dlp/yt-dlp/issues/8960)) by [DmitryScaletta](https://github.com/DmitryScaletta)
+- **twitter**
+ - [Extract bitrate for HLS audio formats](https://github.com/yt-dlp/yt-dlp/commit/28e53d60df9b8aadd52a93504e30e885c9c35262) ([#9257](https://github.com/yt-dlp/yt-dlp/issues/9257)) by [bashonly](https://github.com/bashonly)
+ - [Extract numeric `channel_id`](https://github.com/yt-dlp/yt-dlp/commit/55f1833376505ed1e4be0516b09bb3ea4425e8a4) ([#9263](https://github.com/yt-dlp/yt-dlp/issues/9263)) by [bashonly](https://github.com/bashonly)
+- **txxx**: [Extract thumbnails](https://github.com/yt-dlp/yt-dlp/commit/d79c7e9937c388c68b722ab7450960e43ef776d6) ([#9063](https://github.com/yt-dlp/yt-dlp/issues/9063)) by [shmohawk](https://github.com/shmohawk)
+- **utreon**: [Support playeur.com](https://github.com/yt-dlp/yt-dlp/commit/41d6b61e9852a5b97f47cc8a7718b31fb23f0aea) ([#9182](https://github.com/yt-dlp/yt-dlp/issues/9182)) by [DmitryScaletta](https://github.com/DmitryScaletta)
+- **vbox7**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/67bb70cd700c8d4c3149cd9e0539a5f32c3d1ce6) ([#9100](https://github.com/yt-dlp/yt-dlp/issues/9100)) by [seproDev](https://github.com/seproDev)
+- **viewlift**: [Add support for chorki.com](https://github.com/yt-dlp/yt-dlp/commit/41b6cdb4197aaf7ad82bdad6885eb5d5c64acd74) ([#9095](https://github.com/yt-dlp/yt-dlp/issues/9095)) by [NurTasin](https://github.com/NurTasin)
+- **vimeo**
+ - [Extract `live_status` and `release_timestamp`](https://github.com/yt-dlp/yt-dlp/commit/f0426e9ca57dd14b82e6c13afc17947614f1e8eb) ([#9290](https://github.com/yt-dlp/yt-dlp/issues/9290)) by [pzhlkj6612](https://github.com/pzhlkj6612)
+ - [Fix API headers](https://github.com/yt-dlp/yt-dlp/commit/8e765755f7f4909e1b535e61b7376b2d66e1ba6a) ([#9125](https://github.com/yt-dlp/yt-dlp/issues/9125)) by [bashonly](https://github.com/bashonly)
+ - [Fix login](https://github.com/yt-dlp/yt-dlp/commit/2e8de097ad82da378e97005e8f1ff7e5aebca585) ([#9274](https://github.com/yt-dlp/yt-dlp/issues/9274)) by [bashonly](https://github.com/bashonly)
+- **viously**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/95e82347b398d8bb160767cdd975edecd62cbabd) ([#8927](https://github.com/yt-dlp/yt-dlp/issues/8927)) by [nbr23](https://github.com/nbr23), [seproDev](https://github.com/seproDev)
+- **youtube**
+ - [Better error when all player responses are skipped](https://github.com/yt-dlp/yt-dlp/commit/5eedc208ec89d6284777060c94aadd06502338b9) ([#9083](https://github.com/yt-dlp/yt-dlp/issues/9083)) by [Grub4K](https://github.com/Grub4K), [pukkandan](https://github.com/pukkandan)
+ - [Bump Android and iOS client versions](https://github.com/yt-dlp/yt-dlp/commit/413d3675804599bc8fe419c19e36490fd8f0b30f) ([#9317](https://github.com/yt-dlp/yt-dlp/issues/9317)) by [bashonly](https://github.com/bashonly)
+ - [Further bump client versions](https://github.com/yt-dlp/yt-dlp/commit/7aad06541e543fa3452d3d2513e6f079aad1f99b) ([#9395](https://github.com/yt-dlp/yt-dlp/issues/9395)) by [bashonly](https://github.com/bashonly)
+ - tab: [Fix `tags` extraction](https://github.com/yt-dlp/yt-dlp/commit/8828f4576bd862438d4fbf634f1d6ab18a217b0e) ([#9413](https://github.com/yt-dlp/yt-dlp/issues/9413)) by [x11x](https://github.com/x11x)
+- **zenporn**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/f00c0def7434fac3c88503c2a77c4b2419b8e5ca) ([#8509](https://github.com/yt-dlp/yt-dlp/issues/8509)) by [SirElderling](https://github.com/SirElderling)
+- **zetland**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/2f4b57594673035a59d72f7667588da848820034) ([#9116](https://github.com/yt-dlp/yt-dlp/issues/9116)) by [HobbyistDev](https://github.com/HobbyistDev)
+
+#### Downloader changes
+- **http**: [Reset resume length to handle `FileNotFoundError`](https://github.com/yt-dlp/yt-dlp/commit/2d91b9845621639c53dca7ee9d3d954f3624ba18) ([#8399](https://github.com/yt-dlp/yt-dlp/issues/8399)) by [boredzo](https://github.com/boredzo)
+
+#### Networking changes
+- [Remove `_CompatHTTPError`](https://github.com/yt-dlp/yt-dlp/commit/811d298b231cfa29e75c321b23a91d1c2b17602c) ([#8871](https://github.com/yt-dlp/yt-dlp/issues/8871)) by [coletdjnz](https://github.com/coletdjnz)
+- **Request Handler**
+ - [Remove additional logging handlers on close](https://github.com/yt-dlp/yt-dlp/commit/0085e2bab8465ee7d46d16fcade3ed5e96cc8a48) ([#9032](https://github.com/yt-dlp/yt-dlp/issues/9032)) by [coletdjnz](https://github.com/coletdjnz)
+ - requests: [Apply `remove_dot_segments` to absolute redirect locations](https://github.com/yt-dlp/yt-dlp/commit/35f4f764a786685ea45d84abe1cf1ad3847f4c97) by [coletdjnz](https://github.com/coletdjnz)
+
+#### Misc. changes
+- **build**
+ - [Add `default` optional dependency group](https://github.com/yt-dlp/yt-dlp/commit/cf91400a1dd6cc99b11a6d163e1af73b64d618c9) ([#9295](https://github.com/yt-dlp/yt-dlp/issues/9295)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K)
+ - [Add transitional `setup.py` and `pyinst.py`](https://github.com/yt-dlp/yt-dlp/commit/0abf2f1f153ab47990edbeee3477dc55f74c7f89) ([#9296](https://github.com/yt-dlp/yt-dlp/issues/9296)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K), [pukkandan](https://github.com/pukkandan)
+ - [Bump `actions/upload-artifact` to v4 and adjust workflows](https://github.com/yt-dlp/yt-dlp/commit/3876429d72afb35247f4b2531eb9b16cfc7e0968) by [bashonly](https://github.com/bashonly)
+ - [Bump `conda-incubator/setup-miniconda` to v3](https://github.com/yt-dlp/yt-dlp/commit/b0059f0413a6ba6ab0a3aec1f00188ce083cd8bf) by [bashonly](https://github.com/bashonly)
+ - [Fix `secretstorage` for ARM builds](https://github.com/yt-dlp/yt-dlp/commit/920397634d1e84e76d2cb897bd6d69ba0c6bd5ca) by [bashonly](https://github.com/bashonly)
+ - [Migrate to `pyproject.toml` and `hatchling`](https://github.com/yt-dlp/yt-dlp/commit/775cde82dc5b1dc64ab0539a92dd8c7ba6c0ad33) by [bashonly](https://github.com/bashonly) (With fixes in [43cfd46](https://github.com/yt-dlp/yt-dlp/commit/43cfd462c0d01eff22c1d4290aeb96eb1ea2c0e1))
+ - [Move bundle scripts into `bundle` submodule](https://github.com/yt-dlp/yt-dlp/commit/a1b778428991b1779203bac243ef4e9b6baea90c) by [bashonly](https://github.com/bashonly)
+ - [Support failed build job re-runs](https://github.com/yt-dlp/yt-dlp/commit/eabbccc439720fba381919a88be4fe4d96464cbd) ([#9277](https://github.com/yt-dlp/yt-dlp/issues/9277)) by [bashonly](https://github.com/bashonly)
+ - Makefile
+ - [Add automated `CODE_FOLDERS` and `CODE_FILES`](https://github.com/yt-dlp/yt-dlp/commit/868d2f60a7cb59b410c8cbfb452cbdb072687b81) by [bashonly](https://github.com/bashonly)
+ - [Ensure compatibility with BSD `make`](https://github.com/yt-dlp/yt-dlp/commit/beaa1a44554d04d9fe63a743a5bb4431ca778f28) ([#9210](https://github.com/yt-dlp/yt-dlp/issues/9210)) by [bashonly](https://github.com/bashonly) (With fixes in [73fcfa3](https://github.com/yt-dlp/yt-dlp/commit/73fcfa39f59113a8728249de2c4cee3025f17dc2))
+ - [Fix man pages generated by `pandoc>=3`](https://github.com/yt-dlp/yt-dlp/commit/fb44020fa98e47620b3aa1dab94b4c5b7bfb40bd) ([#7047](https://github.com/yt-dlp/yt-dlp/issues/7047)) by [t-nil](https://github.com/t-nil)
+- **ci**: [Bump `actions/setup-python` to v5](https://github.com/yt-dlp/yt-dlp/commit/b14e818b37f62e3224da157b3ad768b3f0815fcd) by [bashonly](https://github.com/bashonly)
+- **cleanup**
+ - [Build files cleanup](https://github.com/yt-dlp/yt-dlp/commit/867f637b95b342e1cb9f1dc3c6cf0ffe727187ce) by [bashonly](https://github.com/bashonly)
+ - [Fix infodict returned fields](https://github.com/yt-dlp/yt-dlp/commit/f4f9f6d00edcac6d4eb2b3fb78bf81326235d492) ([#8906](https://github.com/yt-dlp/yt-dlp/issues/8906)) by [seproDev](https://github.com/seproDev)
+ - [Fix typo in README.md](https://github.com/yt-dlp/yt-dlp/commit/292d60b1ed3b9fe5bcb2775a894cca99b0f9473e) ([#8894](https://github.com/yt-dlp/yt-dlp/issues/8894)) by [antonkesy](https://github.com/antonkesy)
+ - [Mark broken and remove dead extractors](https://github.com/yt-dlp/yt-dlp/commit/df773c3d5d1cc1f877cf8582f0072e386fc49318) ([#9238](https://github.com/yt-dlp/yt-dlp/issues/9238)) by [seproDev](https://github.com/seproDev)
+ - [Match both `http` and `https` in `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/a687226b48f71b874fa18b0165ec528d591f53fb) ([#8968](https://github.com/yt-dlp/yt-dlp/issues/8968)) by [seproDev](https://github.com/seproDev)
+ - [Remove unused code](https://github.com/yt-dlp/yt-dlp/commit/ed3bb2b0a12c44334e0d09481752dabf2ca1dc13) ([#8968](https://github.com/yt-dlp/yt-dlp/issues/8968)) by [pukkandan](https://github.com/pukkandan), [seproDev](https://github.com/seproDev)
+ - Miscellaneous
+ - [93240fc](https://github.com/yt-dlp/yt-dlp/commit/93240fc1848de4a94f25844c96e0dcd282ef1d3b) by [bashonly](https://github.com/bashonly), [Grub4k](https://github.com/Grub4k), [pukkandan](https://github.com/pukkandan), [seproDev](https://github.com/seproDev)
+ - [615a844](https://github.com/yt-dlp/yt-dlp/commit/615a84447e8322720be77a0e64298d7f42848693) by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan), [seproDev](https://github.com/seproDev)
+- **devscripts**
+ - `install_deps`: [Add script and migrate to it](https://github.com/yt-dlp/yt-dlp/commit/b8a433aaca86b15cb9f1a451b0f69371d2fc22a9) by [bashonly](https://github.com/bashonly)
+ - `tomlparse`: [Add makeshift toml parser](https://github.com/yt-dlp/yt-dlp/commit/fd647775e27e030ab17387c249e2ebeba68f8ff0) by [Grub4K](https://github.com/Grub4K)
+- **docs**: [Misc Cleanup](https://github.com/yt-dlp/yt-dlp/commit/47ab66db0f083a76c7fba0f6e136b21dd5a93e3b) ([#8977](https://github.com/yt-dlp/yt-dlp/issues/8977)) by [Arthurszzz](https://github.com/Arthurszzz), [bashonly](https://github.com/bashonly), [Grub4k](https://github.com/Grub4k), [pukkandan](https://github.com/pukkandan), [seproDev](https://github.com/seproDev)
+- **test**
+ - [Skip source address tests if the address cannot be bound to](https://github.com/yt-dlp/yt-dlp/commit/69d31914952dd33082ac7019c6f76b43c45b9d06) ([#8900](https://github.com/yt-dlp/yt-dlp/issues/8900)) by [coletdjnz](https://github.com/coletdjnz)
+ - websockets: [Fix timeout test on Windows](https://github.com/yt-dlp/yt-dlp/commit/ac340d0745a9de5d494033e3507ef624ba25add3) ([#9344](https://github.com/yt-dlp/yt-dlp/issues/9344)) by [seproDev](https://github.com/seproDev)
+
+### 2023.12.30
+
+#### Core changes
+- [Fix format selection parse error for CPython 3.12](https://github.com/yt-dlp/yt-dlp/commit/00cdda4f6fe18712ced13dbc64b7ea10f323e268) ([#8797](https://github.com/yt-dlp/yt-dlp/issues/8797)) by [Grub4K](https://github.com/Grub4K)
+- [Let `read_stdin` obey `--quiet`](https://github.com/yt-dlp/yt-dlp/commit/a174c453ee1e853c584ceadeac17eef2bd433dc5) by [pukkandan](https://github.com/pukkandan)
+- [Merged with youtube-dl be008e6](https://github.com/yt-dlp/yt-dlp/commit/65de7d204ce88c0225df1321060304baab85dbd8) by [bashonly](https://github.com/bashonly), [dirkf](https://github.com/dirkf), [Grub4K](https://github.com/Grub4K)
+- [Parse `release_year` from `release_date`](https://github.com/yt-dlp/yt-dlp/commit/1732eccc0a40256e076bf0435a29f0f1d8419280) ([#8524](https://github.com/yt-dlp/yt-dlp/issues/8524)) by [seproDev](https://github.com/seproDev)
+- [Release workflow and Updater cleanup](https://github.com/yt-dlp/yt-dlp/commit/632b8ee54eb2df8ac6e20746a0bd95b7ebb053aa) ([#8640](https://github.com/yt-dlp/yt-dlp/issues/8640)) by [bashonly](https://github.com/bashonly)
+- [Remove Python 3.7 support](https://github.com/yt-dlp/yt-dlp/commit/f4b95acafcd69a50040730dfdf732e797278fdcc) ([#8361](https://github.com/yt-dlp/yt-dlp/issues/8361)) by [bashonly](https://github.com/bashonly)
+- [Support `NO_COLOR` environment variable](https://github.com/yt-dlp/yt-dlp/commit/a0b19d319a6ce8b7059318fa17a34b144fde1785) ([#8385](https://github.com/yt-dlp/yt-dlp/issues/8385)) by [Grub4K](https://github.com/Grub4K), [prettykool](https://github.com/prettykool)
+- **outtmpl**: [Support multiplication](https://github.com/yt-dlp/yt-dlp/commit/993edd3f6e17e966c763bc86dc34125445cec6b6) by [pukkandan](https://github.com/pukkandan)
+- **utils**: `traverse_obj`: [Move `is_user_input` into output template](https://github.com/yt-dlp/yt-dlp/commit/0b6f829b1dfda15d3c1d7d1fbe4ea6102c26dd24) ([#8673](https://github.com/yt-dlp/yt-dlp/issues/8673)) by [Grub4K](https://github.com/Grub4K)
+- **webvtt**: [Allow spaces before newlines for CueBlock](https://github.com/yt-dlp/yt-dlp/commit/15f22b4880b6b3f71f350c64d70976ae65b9f1ca) ([#7681](https://github.com/yt-dlp/yt-dlp/issues/7681)) by [TSRBerry](https://github.com/TSRBerry) (With fixes in [298230e](https://github.com/yt-dlp/yt-dlp/commit/298230e550886b746c266724dd701d842ca2696e) by [pukkandan](https://github.com/pukkandan))
+
+#### Extractor changes
+- [Add `media_type` field](https://github.com/yt-dlp/yt-dlp/commit/e370f9ec36972d06100a3db893b397bfc1b07b4d) by [trainman261](https://github.com/trainman261)
+- [Extract from `media` elements in SMIL manifests](https://github.com/yt-dlp/yt-dlp/commit/ddb2d7588bea48bae965dbfabe6df6550c9d3d43) ([#8504](https://github.com/yt-dlp/yt-dlp/issues/8504)) by [seproDev](https://github.com/seproDev)
+- **abematv**: [Fix season metadata](https://github.com/yt-dlp/yt-dlp/commit/cc07f5cc85d9e2a6cd0bedb9d961665eea0d6047) ([#8607](https://github.com/yt-dlp/yt-dlp/issues/8607)) by [middlingphys](https://github.com/middlingphys)
+- **allstar**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/3237f8ba29fe13bf95ff42b1e48b5b5109715feb) ([#8274](https://github.com/yt-dlp/yt-dlp/issues/8274)) by [S-Aarab](https://github.com/S-Aarab)
+- **altcensored**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/3f90813f0617e0d21302398010de7496c9ae36aa) ([#8291](https://github.com/yt-dlp/yt-dlp/issues/8291)) by [drzraf](https://github.com/drzraf)
+- **ard**: [Overhaul extractors](https://github.com/yt-dlp/yt-dlp/commit/5f009a094f0e8450792b097c4c8273622778052d) ([#8878](https://github.com/yt-dlp/yt-dlp/issues/8878)) by [seproDev](https://github.com/seproDev)
+- **ardbetamediathek**: [Fix series extraction](https://github.com/yt-dlp/yt-dlp/commit/1f8bd8eba82ba10ddb49ee7cc0be4540dab103d5) ([#8687](https://github.com/yt-dlp/yt-dlp/issues/8687)) by [lstrojny](https://github.com/lstrojny)
+- **bbc**
+ - [Extract more formats](https://github.com/yt-dlp/yt-dlp/commit/c919b68f7e79ea5010f75f648d3c9e45405a8011) ([#8321](https://github.com/yt-dlp/yt-dlp/issues/8321)) by [barsnick](https://github.com/barsnick), [dirkf](https://github.com/dirkf)
+ - [Fix JSON parsing bug](https://github.com/yt-dlp/yt-dlp/commit/19741ab8a401ec64d5e84fdbfcfb141d105e7bc8) by [bashonly](https://github.com/bashonly)
+- **bfmtv**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/4903f452b68efb62dadf22e81be8c7934fc743e7) ([#8651](https://github.com/yt-dlp/yt-dlp/issues/8651)) by [bashonly](https://github.com/bashonly)
+- **bilibili**: [Support courses and interactive videos](https://github.com/yt-dlp/yt-dlp/commit/9f09bdcfcb8e2b4b2decdc30d35d34b993bc7a94) ([#8343](https://github.com/yt-dlp/yt-dlp/issues/8343)) by [c-basalt](https://github.com/c-basalt)
+- **bitchute**: [Fix and improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/b1a1ec1540605d2ea7abdb63336ffb1c56bf6316) ([#8507](https://github.com/yt-dlp/yt-dlp/issues/8507)) by [SirElderling](https://github.com/SirElderling)
+- **box**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/5a230233d6fce06f4abd1fce0dc92b948e6f780b) ([#8649](https://github.com/yt-dlp/yt-dlp/issues/8649)) by [bashonly](https://github.com/bashonly)
+- **bundestag**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/00a3e47bf5440c96025a76e08337ff2a475ed83e) ([#8783](https://github.com/yt-dlp/yt-dlp/issues/8783)) by [Grub4K](https://github.com/Grub4K)
+- **drtv**: [Set default ext for m3u8 formats](https://github.com/yt-dlp/yt-dlp/commit/f96ab86cd837b1b5823baa87d144e15322ee9298) ([#8590](https://github.com/yt-dlp/yt-dlp/issues/8590)) by [seproDev](https://github.com/seproDev)
+- **duoplay**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/66a0127d45033c698bdbedf162cddc55d9e7b906) ([#8542](https://github.com/yt-dlp/yt-dlp/issues/8542)) by [glensc](https://github.com/glensc)
+- **eplus**: [Add login support and DRM detection](https://github.com/yt-dlp/yt-dlp/commit/d5d1517e7d838500800d193ac3234b06e89654cd) ([#8661](https://github.com/yt-dlp/yt-dlp/issues/8661)) by [pzhlkj6612](https://github.com/pzhlkj6612)
+- **facebook**
+ - [Fix Memories extraction](https://github.com/yt-dlp/yt-dlp/commit/c39358a54bc6675ae0c50b81024e5a086e41656a) ([#8681](https://github.com/yt-dlp/yt-dlp/issues/8681)) by [kclauhk](https://github.com/kclauhk)
+ - [Improve subtitles extraction](https://github.com/yt-dlp/yt-dlp/commit/9cafb9ff17e14475a35c9a58b5bb010c86c9db4b) ([#8296](https://github.com/yt-dlp/yt-dlp/issues/8296)) by [kclauhk](https://github.com/kclauhk)
+- **floatplane**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/628fa244bbce2ad39775a5959e99588f30cac152) ([#8639](https://github.com/yt-dlp/yt-dlp/issues/8639)) by [seproDev](https://github.com/seproDev)
+- **francetv**: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/71f28097fec1c9e029f74b68a4eadc8915399840) ([#8409](https://github.com/yt-dlp/yt-dlp/issues/8409)) by [Fymyte](https://github.com/Fymyte)
+- **instagram**: [Fix stories extraction](https://github.com/yt-dlp/yt-dlp/commit/50eaea9fd7787546b53660e736325fa31c77765d) ([#8843](https://github.com/yt-dlp/yt-dlp/issues/8843)) by [bashonly](https://github.com/bashonly)
+- **joqrag**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/db8b4edc7d0bd27da462f6fe82ff6e13e3d68a04) ([#8384](https://github.com/yt-dlp/yt-dlp/issues/8384)) by [pzhlkj6612](https://github.com/pzhlkj6612)
+- **litv**: [Fix premium content extraction](https://github.com/yt-dlp/yt-dlp/commit/f45c4efcd928a173e1300a8f1ce4258e70c969b1) ([#8842](https://github.com/yt-dlp/yt-dlp/issues/8842)) by [bashonly](https://github.com/bashonly)
+- **maariv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/c5f01bf7d4b9426c87c3f8248de23934a56579e0) ([#8331](https://github.com/yt-dlp/yt-dlp/issues/8331)) by [amir16yp](https://github.com/amir16yp)
+- **mediastream**: [Fix authenticated format extraction](https://github.com/yt-dlp/yt-dlp/commit/b03c89309eb141be1a1eceeeb7475dd3b7529ad9) ([#8657](https://github.com/yt-dlp/yt-dlp/issues/8657)) by [NickCis](https://github.com/NickCis)
+- **nebula**: [Overhaul extractors](https://github.com/yt-dlp/yt-dlp/commit/45d82be65f71bb05506bd55376c6fdb36bc54142) ([#8566](https://github.com/yt-dlp/yt-dlp/issues/8566)) by [elyse0](https://github.com/elyse0), [pukkandan](https://github.com/pukkandan), [seproDev](https://github.com/seproDev)
+- **nintendo**: [Fix Nintendo Direct extraction](https://github.com/yt-dlp/yt-dlp/commit/1d24da6c899ef280d8b0a48a5e280ecd5d39cdf4) ([#8609](https://github.com/yt-dlp/yt-dlp/issues/8609)) by [Grub4K](https://github.com/Grub4K)
+- **ondemandkorea**: [Fix upgraded format extraction](https://github.com/yt-dlp/yt-dlp/commit/04a5e06350e3ef7c03f94f2f3f90dd96c6411152) ([#8677](https://github.com/yt-dlp/yt-dlp/issues/8677)) by [seproDev](https://github.com/seproDev)
+- **pr0gramm**: [Support variant formats and subtitles](https://github.com/yt-dlp/yt-dlp/commit/f98a3305eb124a0c375d03209d5c5a64fe1766c8) ([#8674](https://github.com/yt-dlp/yt-dlp/issues/8674)) by [Grub4K](https://github.com/Grub4K)
+- **rinsefm**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/c91af948e43570025e4aa887e248fd025abae394) ([#8778](https://github.com/yt-dlp/yt-dlp/issues/8778)) by [hashFactory](https://github.com/hashFactory)
+- **rudovideo**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/0d531c35eca4c2eb36e160530a7a333edbc727cc) ([#8664](https://github.com/yt-dlp/yt-dlp/issues/8664)) by [nicodato](https://github.com/nicodato)
+- **theguardian**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/1fa3f24d4b5d22176b11d78420f1f4b64a5af0a8) ([#8535](https://github.com/yt-dlp/yt-dlp/issues/8535)) by [SirElderling](https://github.com/SirElderling)
+- **theplatform**: [Extract more metadata](https://github.com/yt-dlp/yt-dlp/commit/7e09c147fdccb44806bbf601573adc4b77210a89) ([#8635](https://github.com/yt-dlp/yt-dlp/issues/8635)) by [trainman261](https://github.com/trainman261)
+- **twitcasting**: [Detect livestreams via API and `show` page](https://github.com/yt-dlp/yt-dlp/commit/585d0ed9abcfcb957f2b2684b8ad43c3af160383) ([#8601](https://github.com/yt-dlp/yt-dlp/issues/8601)) by [bashonly](https://github.com/bashonly), [JC-Chung](https://github.com/JC-Chung)
+- **twitcastinguser**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/ff2fde1b8f922fd34bae6172602008cd67c07c93) ([#8650](https://github.com/yt-dlp/yt-dlp/issues/8650)) by [bashonly](https://github.com/bashonly)
+- **twitter**
+ - [Extract stale tweets](https://github.com/yt-dlp/yt-dlp/commit/1c54a98e19d047e7c15184237b6ef8ad50af489c) ([#8724](https://github.com/yt-dlp/yt-dlp/issues/8724)) by [bashonly](https://github.com/bashonly)
+ - [Prioritize m3u8 formats](https://github.com/yt-dlp/yt-dlp/commit/e7d22348e77367740da78a3db27167ecf894b7c9) ([#8826](https://github.com/yt-dlp/yt-dlp/issues/8826)) by [bashonly](https://github.com/bashonly)
+ - [Work around API rate-limit](https://github.com/yt-dlp/yt-dlp/commit/116c268438ea4d3738f6fa502c169081ca8f0ee7) ([#8825](https://github.com/yt-dlp/yt-dlp/issues/8825)) by [bashonly](https://github.com/bashonly)
+ - broadcast: [Extract `concurrent_view_count`](https://github.com/yt-dlp/yt-dlp/commit/6fe82491ed622b948c512cf4aab46ac3a234ae0a) ([#8600](https://github.com/yt-dlp/yt-dlp/issues/8600)) by [sonmezberkay](https://github.com/sonmezberkay)
+- **vidly**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/34df1c1f60fa652c0a6a5c712b06c10e45daf6b7) ([#8612](https://github.com/yt-dlp/yt-dlp/issues/8612)) by [seproDev](https://github.com/seproDev)
+- **vocaroo**: [Do not use deprecated `getheader`](https://github.com/yt-dlp/yt-dlp/commit/f223b1b0789f65e06619dcc9fc9e74f50d259379) ([#8606](https://github.com/yt-dlp/yt-dlp/issues/8606)) by [qbnu](https://github.com/qbnu)
+- **vvvvid**: [Set user-agent to fix extraction](https://github.com/yt-dlp/yt-dlp/commit/1725e943b0e8a8b585305660d4611e684374409c) ([#8615](https://github.com/yt-dlp/yt-dlp/issues/8615)) by [Kyraminol](https://github.com/Kyraminol)
+- **youtube**
+ - [Fix `like_count` extraction](https://github.com/yt-dlp/yt-dlp/commit/6b5d93b0b0240e287389d1d43b2d5293e18aa4cc) ([#8763](https://github.com/yt-dlp/yt-dlp/issues/8763)) by [Ganesh910](https://github.com/Ganesh910)
+ - [Improve detection of faulty HLS formats](https://github.com/yt-dlp/yt-dlp/commit/bb5a54e6db2422bbd155d93a0e105b6616c09467) ([#8646](https://github.com/yt-dlp/yt-dlp/issues/8646)) by [bashonly](https://github.com/bashonly)
+ - [Return empty playlist when channel/tab has no videos](https://github.com/yt-dlp/yt-dlp/commit/044886c220620a7679109e92352890e18b6079e3) by [pukkandan](https://github.com/pukkandan)
+ - [Support cf.piped.video](https://github.com/yt-dlp/yt-dlp/commit/6a9c7a2b52655bacfa7ab2da24fd0d14a6fff495) ([#8514](https://github.com/yt-dlp/yt-dlp/issues/8514)) by [OIRNOIR](https://github.com/OIRNOIR)
+- **zingmp3**: [Add support for radio and podcasts](https://github.com/yt-dlp/yt-dlp/commit/64de1a4c25bada90374b88d7353754fe8fbfcc51) ([#7189](https://github.com/yt-dlp/yt-dlp/issues/7189)) by [hatienl0i261299](https://github.com/hatienl0i261299)
+
+#### Postprocessor changes
+- **ffmpegmetadata**: [Embed stream metadata in single format downloads](https://github.com/yt-dlp/yt-dlp/commit/deeb13eae82e60f82a2c0c5861f460399a997528) ([#8647](https://github.com/yt-dlp/yt-dlp/issues/8647)) by [bashonly](https://github.com/bashonly)
+
+#### Networking changes
+- [Strip whitespace around header values](https://github.com/yt-dlp/yt-dlp/commit/196eb0fe77b78e2e5ca02c506c3837c2b1a7964c) ([#8802](https://github.com/yt-dlp/yt-dlp/issues/8802)) by [coletdjnz](https://github.com/coletdjnz)
+- **Request Handler**: websockets: [Migrate websockets to networking framework](https://github.com/yt-dlp/yt-dlp/commit/ccfd70f4c24b579c72123ca76ab50164f8f122b7) ([#7720](https://github.com/yt-dlp/yt-dlp/issues/7720)) by [coletdjnz](https://github.com/coletdjnz)
+
+#### Misc. changes
+- **ci**
+ - [Concurrency optimizations](https://github.com/yt-dlp/yt-dlp/commit/f124fa458826308afc86cf364c509f857686ecfd) ([#8614](https://github.com/yt-dlp/yt-dlp/issues/8614)) by [Grub4K](https://github.com/Grub4K)
+ - [Run core tests only for core changes](https://github.com/yt-dlp/yt-dlp/commit/13b3cb3c2b7169a1e17d6fc62593bf744170521c) ([#8841](https://github.com/yt-dlp/yt-dlp/issues/8841)) by [Grub4K](https://github.com/Grub4K)
+- **cleanup**
+ - [Fix spelling of `IE_NAME`](https://github.com/yt-dlp/yt-dlp/commit/bc4ab17b38f01000d99c5c2bedec89721fee65ec) ([#8810](https://github.com/yt-dlp/yt-dlp/issues/8810)) by [barsnick](https://github.com/barsnick)
+ - [Remove dead extractors](https://github.com/yt-dlp/yt-dlp/commit/9751a457cfdb18bf99d9ee0d10e4e6a594502bbf) ([#8604](https://github.com/yt-dlp/yt-dlp/issues/8604)) by [seproDev](https://github.com/seproDev)
+ - Miscellaneous: [f9fb3ce](https://github.com/yt-dlp/yt-dlp/commit/f9fb3ce86e3c6a0c3c33b45392b8d7288bceba76) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K), [pukkandan](https://github.com/pukkandan), [seproDev](https://github.com/seproDev)
+- **devscripts**: `run_tests`: [Create Python script](https://github.com/yt-dlp/yt-dlp/commit/2d1d683a541d71f3d3bb999dfe8eeb1976fb91ce) ([#8720](https://github.com/yt-dlp/yt-dlp/issues/8720)) by [Grub4K](https://github.com/Grub4K) (With fixes in [225cf2b](https://github.com/yt-dlp/yt-dlp/commit/225cf2b830a1de2c5eacd257edd2a01aed1e1114))
+- **docs**: [Update youtube-dl merge commit in `README.md`](https://github.com/yt-dlp/yt-dlp/commit/f10589e3453009bb523f55849bba144c9b91cf2a) by [bashonly](https://github.com/bashonly)
+- **test**: networking: [Update tests for OpenSSL 3.2](https://github.com/yt-dlp/yt-dlp/commit/37755a037e612bfc608c3d4722e8ef2ce6a022ee) ([#8814](https://github.com/yt-dlp/yt-dlp/issues/8814)) by [bashonly](https://github.com/bashonly)
+
+### 2023.11.16
+
+#### Extractor changes
+- **abc.net.au**: iview, showseries: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/15cb3528cbda7b6198f49a6b5953c226d701696b) ([#8586](https://github.com/yt-dlp/yt-dlp/issues/8586)) by [bashonly](https://github.com/bashonly)
+- **beatbump**: [Update `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/21dc069bea2d4d99345dd969e098f4535c751d45) ([#8576](https://github.com/yt-dlp/yt-dlp/issues/8576)) by [seproDev](https://github.com/seproDev)
+- **dailymotion**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/a489f071508ec5caf5f32052d142afe86c28df7a) ([#7692](https://github.com/yt-dlp/yt-dlp/issues/7692)) by [TravisDupes](https://github.com/TravisDupes)
+- **drtv**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/0783fd558ed0d3a8bc754beb75a406256f8b97b2) ([#8484](https://github.com/yt-dlp/yt-dlp/issues/8484)) by [almx](https://github.com/almx), [seproDev](https://github.com/seproDev)
+- **eltrecetv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/dcfad52812aa8ce007cefbfbe63f58b49f6b1046) ([#8216](https://github.com/yt-dlp/yt-dlp/issues/8216)) by [elivinsky](https://github.com/elivinsky)
+- **jiosaavn**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/b530118e7f48232cacf8050d79a6b20bdfcf5468) ([#8307](https://github.com/yt-dlp/yt-dlp/issues/8307)) by [awalgarg](https://github.com/awalgarg)
+- **njpwworld**: [Remove](https://github.com/yt-dlp/yt-dlp/commit/e569c2d1f4b665795a2b64f0aaf7f76930664233) ([#8570](https://github.com/yt-dlp/yt-dlp/issues/8570)) by [aarubui](https://github.com/aarubui)
+- **tv5mondeplus**: [Extract subtitles](https://github.com/yt-dlp/yt-dlp/commit/0f634dba3afdc429ece8839b02f6d56c27b7973a) ([#4209](https://github.com/yt-dlp/yt-dlp/issues/4209)) by [FrankZ85](https://github.com/FrankZ85)
+- **twitcasting**: [Fix livestream detection](https://github.com/yt-dlp/yt-dlp/commit/2325d03aa7bb80f56ba52cd6992258e44727b424) ([#8574](https://github.com/yt-dlp/yt-dlp/issues/8574)) by [JC-Chung](https://github.com/JC-Chung)
+- **zenyandex**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/5efe68b73cbf6e907c2e6a3aa338664385084184) ([#8454](https://github.com/yt-dlp/yt-dlp/issues/8454)) by [starius](https://github.com/starius)
+
+#### Misc. changes
+- **build**: [Make `secretstorage` an optional dependency](https://github.com/yt-dlp/yt-dlp/commit/24f827875c6ba513f12ed09a3aef2bbed223760d) ([#8585](https://github.com/yt-dlp/yt-dlp/issues/8585)) by [bashonly](https://github.com/bashonly)
+
+### 2023.11.14
+
+#### Important changes
+- **The release channels have been adjusted!**
+ * [`master`](https://github.com/yt-dlp/yt-dlp-master-builds) builds are made after each push, containing the latest fixes (but also possibly bugs). This was previously the `nightly` channel.
+ * [`nightly`](https://github.com/yt-dlp/yt-dlp-nightly-builds) builds are now made once a day, if there were any changes.
+- Security: [[CVE-2023-46121](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-46121)] Patch [Generic Extractor MITM Vulnerability via Arbitrary Proxy Injection](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-3ch3-jhc6-5r8x)
+ - Disallow smuggling of arbitrary `http_headers`; extractors now only use specific headers
+
+#### Core changes
+- [Add `--compat-option manifest-filesize-approx`](https://github.com/yt-dlp/yt-dlp/commit/10025b715ea01489557eb2c5a3cc04d361fcdb52) ([#8356](https://github.com/yt-dlp/yt-dlp/issues/8356)) by [bashonly](https://github.com/bashonly)
+- [Fix format sorting with `--load-info-json`](https://github.com/yt-dlp/yt-dlp/commit/595ea4a99b726b8fe9463e7853b7053978d0544e) ([#8521](https://github.com/yt-dlp/yt-dlp/issues/8521)) by [bashonly](https://github.com/bashonly)
+- [Include build origin in verbose output](https://github.com/yt-dlp/yt-dlp/commit/20314dd46f25e0e0a7e985a7804049aefa8b909f) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K)
+- [Only ensure playlist thumbnail dir if writing thumbs](https://github.com/yt-dlp/yt-dlp/commit/a40e0b37dfc8c26916b0e01aa3f29f3bc42250b6) ([#8373](https://github.com/yt-dlp/yt-dlp/issues/8373)) by [bashonly](https://github.com/bashonly)
+- **update**: [Overhaul self-updater](https://github.com/yt-dlp/yt-dlp/commit/0b6ad22e6a432006a75df968f0283e6c6b3cfae6) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K)
+
+#### Extractor changes
+- [Do not smuggle `http_headers`](https://github.com/yt-dlp/yt-dlp/commit/f04b5bedad7b281bee9814686bba1762bae092eb) by [coletdjnz](https://github.com/coletdjnz)
+- [Do not test truth value of `xml.etree.ElementTree.Element`](https://github.com/yt-dlp/yt-dlp/commit/d4f14a72dc1dd79396e0e80980268aee902b61e4) ([#8582](https://github.com/yt-dlp/yt-dlp/issues/8582)) by [bashonly](https://github.com/bashonly)
+- **brilliantpala**: [Fix cookies support](https://github.com/yt-dlp/yt-dlp/commit/9b5bedf13a3323074daceb0ec6ebb3cc6e0b9684) ([#8352](https://github.com/yt-dlp/yt-dlp/issues/8352)) by [pzhlkj6612](https://github.com/pzhlkj6612)
+- **generic**: [Improve direct video link ext detection](https://github.com/yt-dlp/yt-dlp/commit/4ce2f29a50fcfb9920e6f2ffe42192945a2bad7e) ([#8340](https://github.com/yt-dlp/yt-dlp/issues/8340)) by [bashonly](https://github.com/bashonly)
+- **laxarxames**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/312a2d1e8bc247264f9d85c5ec764e33aa0133b5) ([#8412](https://github.com/yt-dlp/yt-dlp/issues/8412)) by [aniolpages](https://github.com/aniolpages)
+- **n-tv.de**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/8afd9468b0c822843bc480d366d1c86698daabfb) ([#8414](https://github.com/yt-dlp/yt-dlp/issues/8414)) by [1100101](https://github.com/1100101)
+- **neteasemusic**: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/46acc418a53470b7f32581b3309c3cb87aa8488d) ([#8531](https://github.com/yt-dlp/yt-dlp/issues/8531)) by [LoserFox](https://github.com/LoserFox)
+- **nhk**: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/54579be4364e148277c32e20a5c3efc2c3f52f5b) ([#8388](https://github.com/yt-dlp/yt-dlp/issues/8388)) by [garret1317](https://github.com/garret1317)
+- **novaembed**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/3ff494f6f41c27549420fa88be27555bd449ffdc) ([#8368](https://github.com/yt-dlp/yt-dlp/issues/8368)) by [peci1](https://github.com/peci1)
+- **npo**: [Send `POST` request to streams API endpoint](https://github.com/yt-dlp/yt-dlp/commit/8e02a4dcc800f9444e9d461edc41edd7b662f435) ([#8413](https://github.com/yt-dlp/yt-dlp/issues/8413)) by [bartbroere](https://github.com/bartbroere)
+- **ondemandkorea**: [Overhaul extractor](https://github.com/yt-dlp/yt-dlp/commit/05adfd883a4f2ecae0267e670a62a2e45c351aeb) ([#8386](https://github.com/yt-dlp/yt-dlp/issues/8386)) by [seproDev](https://github.com/seproDev)
+- **orf**: podcast: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/6ba3085616652cbf05d1858efc321fdbfc4c6119) ([#8486](https://github.com/yt-dlp/yt-dlp/issues/8486)) by [Esokrates](https://github.com/Esokrates)
+- **polskieradio**: audition: [Fix playlist extraction](https://github.com/yt-dlp/yt-dlp/commit/464327acdb353ceb91d2115163a5a9621b22fe0d) ([#8459](https://github.com/yt-dlp/yt-dlp/issues/8459)) by [shubhexists](https://github.com/shubhexists)
+- **qdance**: [Update `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/177f0d963e4b9db749805c482e6f288354c8be84) ([#8426](https://github.com/yt-dlp/yt-dlp/issues/8426)) by [bashonly](https://github.com/bashonly)
+- **radiocomercial**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/ef12dbdcd3e7264bd3d744c1e3107597bd23ad35) ([#8508](https://github.com/yt-dlp/yt-dlp/issues/8508)) by [SirElderling](https://github.com/SirElderling)
+- **sbs.co.kr**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/25a4bd345a0dcfece6fef752d4537eb403da94d9) ([#8326](https://github.com/yt-dlp/yt-dlp/issues/8326)) by [seproDev](https://github.com/seproDev)
+- **theatercomplextown**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/2863fcf2b6876d0c7965ff7d6d9242eea653dc6b) ([#8560](https://github.com/yt-dlp/yt-dlp/issues/8560)) by [bashonly](https://github.com/bashonly)
+- **thisav**: [Remove](https://github.com/yt-dlp/yt-dlp/commit/cb480e390d85fb3a598c1b6d5eef3438ce729fc9) ([#8346](https://github.com/yt-dlp/yt-dlp/issues/8346)) by [bashonly](https://github.com/bashonly)
+- **thisoldhouse**: [Add login support](https://github.com/yt-dlp/yt-dlp/commit/c76c96677ff6a056f5844a568ef05ee22c46d6f4) ([#8561](https://github.com/yt-dlp/yt-dlp/issues/8561)) by [bashonly](https://github.com/bashonly)
+- **twitcasting**: [Fix livestream extraction](https://github.com/yt-dlp/yt-dlp/commit/7b8b1cf5eb8bf44ce70bc24e1f56f0dba2737e98) ([#8427](https://github.com/yt-dlp/yt-dlp/issues/8427)) by [JC-Chung](https://github.com/JC-Chung), [saintliao](https://github.com/saintliao)
+- **twitter**
+ - broadcast
+ - [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/7d337ca977d73a0a6c07ab481ed8faa8f6ff8726) ([#8383](https://github.com/yt-dlp/yt-dlp/issues/8383)) by [HitomaruKonpaku](https://github.com/HitomaruKonpaku)
+ - [Support `--wait-for-video`](https://github.com/yt-dlp/yt-dlp/commit/f6e97090d2ed9e05441ab0f4bec3559b816d7a00) ([#8475](https://github.com/yt-dlp/yt-dlp/issues/8475)) by [bashonly](https://github.com/bashonly)
+- **weibo**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/15b252dfd2c6807fe57afc5a95e59abadb32ccd2) ([#8463](https://github.com/yt-dlp/yt-dlp/issues/8463)) by [c-basalt](https://github.com/c-basalt)
+- **weverse**: [Fix login error handling](https://github.com/yt-dlp/yt-dlp/commit/4a601c9eff9fb42e24a4c8da3fa03628e035b35b) ([#8458](https://github.com/yt-dlp/yt-dlp/issues/8458)) by [seproDev](https://github.com/seproDev)
+- **youtube**: [Check newly uploaded iOS HLS formats](https://github.com/yt-dlp/yt-dlp/commit/ef79d20dc9d27ac002a7196f073b37f2f2721aed) ([#8336](https://github.com/yt-dlp/yt-dlp/issues/8336)) by [bashonly](https://github.com/bashonly)
+- **zoom**: [Extract combined view formats](https://github.com/yt-dlp/yt-dlp/commit/3906de07551fedb00b789345bf24cc27d6ddf128) ([#7847](https://github.com/yt-dlp/yt-dlp/issues/7847)) by [Mipsters](https://github.com/Mipsters)
+
+#### Downloader changes
+- **aria2c**: [Remove duplicate `--file-allocation=none`](https://github.com/yt-dlp/yt-dlp/commit/21b25281c51523620706b11bfc1c4a889858e1f2) ([#8332](https://github.com/yt-dlp/yt-dlp/issues/8332)) by [CrendKing](https://github.com/CrendKing)
+- **dash**: [Force native downloader for `--live-from-start`](https://github.com/yt-dlp/yt-dlp/commit/2622c804d1a5accc3045db398e0fc52074f4bdb3) ([#8339](https://github.com/yt-dlp/yt-dlp/issues/8339)) by [bashonly](https://github.com/bashonly)
+
+#### Networking changes
+- **Request Handler**: requests: [Add handler for `requests` HTTP library (#3668)](https://github.com/yt-dlp/yt-dlp/commit/8a8b54523addf46dfd50ef599761a81bc22362e6) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz), [Grub4K](https://github.com/Grub4K) (With fixes in [4e38e2a](https://github.com/yt-dlp/yt-dlp/commit/4e38e2ae9d7380015349e6aee59c78bb3938befd))
+
+ Adds support for HTTPS proxies and persistent connections (keep-alive)
+
+#### Misc. changes
+- **build**
+ - [Include secretstorage in Linux builds](https://github.com/yt-dlp/yt-dlp/commit/9970d74c8383432c6c8779aa47d3253dcf412b14) by [bashonly](https://github.com/bashonly)
+ - [Overhaul and unify release workflow](https://github.com/yt-dlp/yt-dlp/commit/1d03633c5a1621b9f3a756f0a4f9dc61fab3aeaa) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K)
+- **ci**
+ - [Bump `actions/checkout` to v4](https://github.com/yt-dlp/yt-dlp/commit/5438593a35b7b042fc48fe29cad0b9039f07c9bb) by [bashonly](https://github.com/bashonly)
+ - [Run core tests with dependencies](https://github.com/yt-dlp/yt-dlp/commit/700444c23ddb65f618c2abd942acdc0c58c650b1) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz)
+- **cleanup**
+ - [Fix changelog typo](https://github.com/yt-dlp/yt-dlp/commit/a9d3f4b20a3533d2a40104c85bc2cc6c2564c800) by [bashonly](https://github.com/bashonly)
+ - [Update documentation for master and nightly channels](https://github.com/yt-dlp/yt-dlp/commit/a00af29853b8c7350ce086f4cab8c2c9cf2fcf1d) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K)
+ - Miscellaneous: [b012271](https://github.com/yt-dlp/yt-dlp/commit/b012271d01b59759e4eefeab0308698cd9e7224c) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz), [dirkf](https://github.com/dirkf), [gamer191](https://github.com/gamer191), [Grub4K](https://github.com/Grub4K), [seproDev](https://github.com/seproDev)
+- **test**: update: [Implement simple updater unit tests](https://github.com/yt-dlp/yt-dlp/commit/87264d4fdadcddd91289b968dd0e4bf58d449267) by [bashonly](https://github.com/bashonly)
+
+### 2023.10.13
+
+#### Core changes
+- [Ensure thumbnail output directory exists](https://github.com/yt-dlp/yt-dlp/commit/2acd1d555ef89851c73773776715d3de9a0e30b9) ([#7985](https://github.com/yt-dlp/yt-dlp/issues/7985)) by [Riteo](https://github.com/Riteo)
+- **utils**
+ - `js_to_json`: [Fix `Date` constructor parsing](https://github.com/yt-dlp/yt-dlp/commit/9d7ded6419089c1bf252496073f73ad90ed71004) ([#8295](https://github.com/yt-dlp/yt-dlp/issues/8295)) by [awalgarg](https://github.com/awalgarg), [Grub4K](https://github.com/Grub4K)
+ - `write_xattr`: [Use `os.setxattr` if available](https://github.com/yt-dlp/yt-dlp/commit/84e26038d4002e763ea51ca1bdce4f7e63c540bf) ([#8205](https://github.com/yt-dlp/yt-dlp/issues/8205)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K)
+
+#### Extractor changes
+- **artetv**: [Support age-restricted content](https://github.com/yt-dlp/yt-dlp/commit/09f815ad52843219a7ee3f2a0dddf6c250c91f0c) ([#8301](https://github.com/yt-dlp/yt-dlp/issues/8301)) by [StefanLobbenmeier](https://github.com/StefanLobbenmeier)
+- **jtbc**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/b286ec68f1f28798b3e371f888a2ed97d399cf77) ([#8314](https://github.com/yt-dlp/yt-dlp/issues/8314)) by [seproDev](https://github.com/seproDev)
+- **mbn**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/e030b6b6fba7b2f4614ad2ab9f7649d40a2dd305) ([#8312](https://github.com/yt-dlp/yt-dlp/issues/8312)) by [seproDev](https://github.com/seproDev)
+- **nhk**: [Fix Japanese-language VOD extraction](https://github.com/yt-dlp/yt-dlp/commit/4de94b9e165bfd6421a692f5f2eabcdb08edcb71) ([#8309](https://github.com/yt-dlp/yt-dlp/issues/8309)) by [garret1317](https://github.com/garret1317)
+- **radiko**: [Fix bug with `downloader_options`](https://github.com/yt-dlp/yt-dlp/commit/b9316642313bbc9e209ac0d2276d37ba60bceb49) by [bashonly](https://github.com/bashonly)
+- **tenplay**: [Add support for seasons](https://github.com/yt-dlp/yt-dlp/commit/88a99c87b680ae59002534a517e191f46c42cbd4) ([#7939](https://github.com/yt-dlp/yt-dlp/issues/7939)) by [midnightveil](https://github.com/midnightveil)
+- **youku**: [Improve tudou.com support](https://github.com/yt-dlp/yt-dlp/commit/b7098d46b552a9322c6cea39ba80be5229f922de) ([#8160](https://github.com/yt-dlp/yt-dlp/issues/8160)) by [naginatana](https://github.com/naginatana)
+- **youtube**: [Fix bug with `--extractor-retries inf`](https://github.com/yt-dlp/yt-dlp/commit/feebf6d02fc9651331eee2af5e08e6112288163b) ([#8328](https://github.com/yt-dlp/yt-dlp/issues/8328)) by [Grub4K](https://github.com/Grub4K)
+
+#### Downloader changes
+- **fragment**: [Improve progress calculation](https://github.com/yt-dlp/yt-dlp/commit/1c51c520f7b511ebd9e4eb7322285a8c31eedbbd) ([#8241](https://github.com/yt-dlp/yt-dlp/issues/8241)) by [Grub4K](https://github.com/Grub4K)
+
+#### Misc. changes
+- **cleanup**: Miscellaneous: [b634ba7](https://github.com/yt-dlp/yt-dlp/commit/b634ba742d8f38ce9ecfa0546485728b0c6c59d1) by [bashonly](https://github.com/bashonly), [gamer191](https://github.com/gamer191)
+
+### 2023.10.07
+
+#### Extractor changes
+- **abc.net.au**: iview: [Improve `episode` extraction](https://github.com/yt-dlp/yt-dlp/commit/a9efb4b8d74f3583450ffda0ee57259a47d39c70) ([#8201](https://github.com/yt-dlp/yt-dlp/issues/8201)) by [xofe](https://github.com/xofe)
+- **erocast**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/47c598783c98c179e04dd12c2a3fee0f3dc53087) ([#8264](https://github.com/yt-dlp/yt-dlp/issues/8264)) by [madewokherd](https://github.com/madewokherd)
+- **gofile**: [Fix token cookie bug](https://github.com/yt-dlp/yt-dlp/commit/0730d5a966fa8a937d84bfb7f68be5198acb039b) by [bashonly](https://github.com/bashonly)
+- **iq.com**: [Fix extraction and subtitles](https://github.com/yt-dlp/yt-dlp/commit/35d9cbaf9638ccc9daf8a863063b2e7c135bc664) ([#8260](https://github.com/yt-dlp/yt-dlp/issues/8260)) by [AS6939](https://github.com/AS6939)
+- **lbry**
+ - [Add playlist support](https://github.com/yt-dlp/yt-dlp/commit/48cceec1ddb8649b5e771df8df79eb9c39c82b90) ([#8213](https://github.com/yt-dlp/yt-dlp/issues/8213)) by [bashonly](https://github.com/bashonly), [drzraf](https://github.com/drzraf), [Grub4K](https://github.com/Grub4K)
+ - [Extract `uploader_id`](https://github.com/yt-dlp/yt-dlp/commit/0e722f2f3ca42e634fd7b06ee70b16bf833ce132) ([#8244](https://github.com/yt-dlp/yt-dlp/issues/8244)) by [drzraf](https://github.com/drzraf)
+- **litv**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/91a670a4f7babe9c8aa2018f57d8c8952a6f49d8) ([#7785](https://github.com/yt-dlp/yt-dlp/issues/7785)) by [jiru](https://github.com/jiru)
+- **neteasemusic**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/f980df734cf5c0eaded2f7b38c6c60bccfeebb48) ([#8181](https://github.com/yt-dlp/yt-dlp/issues/8181)) by [c-basalt](https://github.com/c-basalt)
+- **nhk**: [Fix VOD extraction](https://github.com/yt-dlp/yt-dlp/commit/e831c80e8b2fc025b3b67d82974cc59e3526fdc8) ([#8249](https://github.com/yt-dlp/yt-dlp/issues/8249)) by [garret1317](https://github.com/garret1317)
+- **radiko**: [Improve extraction](https://github.com/yt-dlp/yt-dlp/commit/2ad3873f0dfa9285c91d2160e36c039e69d597c7) ([#8221](https://github.com/yt-dlp/yt-dlp/issues/8221)) by [garret1317](https://github.com/garret1317)
+- **substack**
+ - [Fix download cookies bug](https://github.com/yt-dlp/yt-dlp/commit/2f2dda3a7e85148773da3cdbc03ac9949ec1bc45) ([#8219](https://github.com/yt-dlp/yt-dlp/issues/8219)) by [handlerug](https://github.com/handlerug)
+ - [Fix embed extraction](https://github.com/yt-dlp/yt-dlp/commit/fbcc299bd8a19cf8b3c8805d6c268a9110230973) ([#8218](https://github.com/yt-dlp/yt-dlp/issues/8218)) by [handlerug](https://github.com/handlerug)
+- **theta**: [Remove extractors](https://github.com/yt-dlp/yt-dlp/commit/792f1e64f6a2beac51e85408d142b3118115c4fd) ([#8251](https://github.com/yt-dlp/yt-dlp/issues/8251)) by [alerikaisattera](https://github.com/alerikaisattera)
+- **wrestleuniversevod**: [Call API with device ID](https://github.com/yt-dlp/yt-dlp/commit/b095fd3fa9d58a65dc9b830bd63b9d909422aa86) ([#8272](https://github.com/yt-dlp/yt-dlp/issues/8272)) by [bashonly](https://github.com/bashonly)
+- **xhamster**: user: [Support creator urls](https://github.com/yt-dlp/yt-dlp/commit/cc8d8441524ec3442d7c0d3f8f33f15b66aa06f3) ([#8232](https://github.com/yt-dlp/yt-dlp/issues/8232)) by [Grub4K](https://github.com/Grub4K)
+- **youtube**
+ - [Fix `heatmap` extraction](https://github.com/yt-dlp/yt-dlp/commit/03e85ea99db76a2fddb65bf46f8819bda780aaf3) ([#8299](https://github.com/yt-dlp/yt-dlp/issues/8299)) by [bashonly](https://github.com/bashonly)
+ - [Raise a warning for `Incomplete Data` instead of an error](https://github.com/yt-dlp/yt-dlp/commit/eb5bdbfa70126c7d5355cc0954b63720522e462c) ([#8238](https://github.com/yt-dlp/yt-dlp/issues/8238)) by [coletdjnz](https://github.com/coletdjnz)
+
+#### Misc. changes
+- **cleanup**
+ - [Update extractor tests](https://github.com/yt-dlp/yt-dlp/commit/19c90e405b4137c06dfe6f9aaa02396df0da93e5) ([#7718](https://github.com/yt-dlp/yt-dlp/issues/7718)) by [trainman261](https://github.com/trainman261)
+ - Miscellaneous: [377e85a](https://github.com/yt-dlp/yt-dlp/commit/377e85a1797db9e98b78b38203ed9d4ded229991) by [dirkf](https://github.com/dirkf), [gamer191](https://github.com/gamer191), [Grub4K](https://github.com/Grub4K)
+
+### 2023.09.24
+
+#### Important changes
+- **The minimum *recommended* Python version has been raised to 3.8**
+Since Python 3.7 has reached end-of-life, support for it will be dropped soon. [Read more](https://github.com/yt-dlp/yt-dlp/issues/7803)
+- Security: [[CVE-2023-40581](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-40581)] [Prevent RCE when using `--exec` with `%q` on Windows](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-42h4-v29r-42qg)
+ - The shell escape function is now using `""` instead of `\"`.
+ - `utils.Popen` has been patched to properly quote commands.
+
+#### Core changes
+- [Fix HTTP headers and cookie handling](https://github.com/yt-dlp/yt-dlp/commit/6c5211cebeacfc53ad5d5ddf4a659be76039656f) by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan)
+- [Fix `--check-formats`](https://github.com/yt-dlp/yt-dlp/commit/8cb7fc44db010e965d808ee679ef0725cb6e147c) by [pukkandan](https://github.com/pukkandan)
+- [Fix support for upcoming Python 3.12](https://github.com/yt-dlp/yt-dlp/commit/836e06d246512f286f30c1371b2c54b72c9ecd93) ([#8130](https://github.com/yt-dlp/yt-dlp/issues/8130)) by [Grub4K](https://github.com/Grub4K)
+- [Merged with youtube-dl 66ab08](https://github.com/yt-dlp/yt-dlp/commit/9d6254069c75877bc88bc3584f4326fb1853a543) by [coletdjnz](https://github.com/coletdjnz)
+- [Prevent RCE when using `--exec` with `%q` (CVE-2023-40581)](https://github.com/yt-dlp/yt-dlp/commit/de015e930747165dbb8fcd360f8775fd973b7d6e) by [Grub4K](https://github.com/Grub4K)
+- [Raise minimum recommended Python version to 3.8](https://github.com/yt-dlp/yt-dlp/commit/61bdf15fc7400601c3da1aa7a43917310a5bf391) ([#8183](https://github.com/yt-dlp/yt-dlp/issues/8183)) by [Grub4K](https://github.com/Grub4K)
+- [`FFmpegFixupM3u8PP` may need to run with ffmpeg](https://github.com/yt-dlp/yt-dlp/commit/f73c11803579889dc8e1c99e25dba9a22fef39d8) by [pukkandan](https://github.com/pukkandan)
+- **compat**
+ - [Add `types.NoneType`](https://github.com/yt-dlp/yt-dlp/commit/e0c4db04dc82a699bdabd9821ddc239ebe17d30a) by [pukkandan](https://github.com/pukkandan) (With fixes in [25b6e8f](https://github.com/yt-dlp/yt-dlp/commit/25b6e8f94679b4458550702b46e61249b875a4fd))
+ - [Deprecate old functions](https://github.com/yt-dlp/yt-dlp/commit/3d2623a898196640f7cc0fc8b70118ff19e6925d) ([#2861](https://github.com/yt-dlp/yt-dlp/issues/2861)) by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
+ - [Ensure submodules are imported correctly](https://github.com/yt-dlp/yt-dlp/commit/a250b247334ce9f641e709cbb64974da6034a2b3) by [pukkandan](https://github.com/pukkandan)
+- **cookies**: [Containers JSON should be opened as utf-8](https://github.com/yt-dlp/yt-dlp/commit/dab87ca23650fd87184ff5286b53e6985b59f71d) ([#7800](https://github.com/yt-dlp/yt-dlp/issues/7800)) by [bashonly](https://github.com/bashonly)
+- **dependencies**: [Handle deprecation of `sqlite3.version`](https://github.com/yt-dlp/yt-dlp/commit/35f9a306e6934793cff100200cd03f288ec33f11) ([#8167](https://github.com/yt-dlp/yt-dlp/issues/8167)) by [bashonly](https://github.com/bashonly)
+- **outtmpl**: [Fix replacement for `playlist_index`](https://github.com/yt-dlp/yt-dlp/commit/a264433c9fba147ecae2420091614186cfeeb895) by [pukkandan](https://github.com/pukkandan)
+- **utils**
+ - [Add temporary shim for logging](https://github.com/yt-dlp/yt-dlp/commit/1b392f905d20ef1f1b300b180f867d43c9ce49b8) by [pukkandan](https://github.com/pukkandan)
+ - [Improve `parse_duration`](https://github.com/yt-dlp/yt-dlp/commit/af86873218c24c3859ccf575a87f2b00a73b49d0) by [bashonly](https://github.com/bashonly)
+ - HTTPHeaderDict: [Handle byte values](https://github.com/yt-dlp/yt-dlp/commit/3f7965105d8d2048359e67c1e8b8ebd51588143b) by [pukkandan](https://github.com/pukkandan)
+ - `clean_podcast_url`: [Handle more trackers](https://github.com/yt-dlp/yt-dlp/commit/2af4eeb77246b8183aae75a0a8d19f18c08115b2) ([#7556](https://github.com/yt-dlp/yt-dlp/issues/7556)) by [bashonly](https://github.com/bashonly), [mabdelfattah](https://github.com/mabdelfattah)
+ - `js_to_json`: [Handle `Array` objects](https://github.com/yt-dlp/yt-dlp/commit/52414d64ca7b92d3f83964cdd68247989b0c4625) by [Grub4K](https://github.com/Grub4K), [std-move](https://github.com/std-move)
+
+#### Extractor changes
+- [Extract subtitles from SMIL manifests](https://github.com/yt-dlp/yt-dlp/commit/550e65410a7a1b105923494ac44460a4dc1a15d9) ([#7667](https://github.com/yt-dlp/yt-dlp/issues/7667)) by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan)
+- [Fix `--load-pages`](https://github.com/yt-dlp/yt-dlp/commit/81b4712bca608b9015aa68a4d96661d56e9cb894) by [pukkandan](https://github.com/pukkandan)
+- [Make `_search_nuxt_data` more lenient](https://github.com/yt-dlp/yt-dlp/commit/904a19ee93195ce0bd4b08bd22b186120afb5b17) by [std-move](https://github.com/std-move)
+- **abematv**
+ - [Fix proxy handling](https://github.com/yt-dlp/yt-dlp/commit/497bbbbd7328cb705f70eced94dbd90993819a46) ([#8046](https://github.com/yt-dlp/yt-dlp/issues/8046)) by [SevenLives](https://github.com/SevenLives)
+ - [Temporary fix for protocol handler](https://github.com/yt-dlp/yt-dlp/commit/9f66247289b9f8ecf931833b3f5f127274dd2161) by [pukkandan](https://github.com/pukkandan)
+- **amazonminitv**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/538d37671a17e0782d17f08df17800e2e3bd57c8) by [bashonly](https://github.com/bashonly), [GautamMKGarg](https://github.com/GautamMKGarg)
+- **antenna**: [Support antenna.gr](https://github.com/yt-dlp/yt-dlp/commit/665876034c8d3c031443f6b4958bed02ccdf4164) ([#7584](https://github.com/yt-dlp/yt-dlp/issues/7584)) by [stdedos](https://github.com/stdedos)
+- **artetv**: [Fix HLS formats extraction](https://github.com/yt-dlp/yt-dlp/commit/c2da0b5ea215298135f76e3dc14b972a3c4afacb) by [bashonly](https://github.com/bashonly)
+- **axs**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/aee6b9b88c0bcccf27fd23b7e00fc0b7b168928f) ([#8094](https://github.com/yt-dlp/yt-dlp/issues/8094)) by [barsnick](https://github.com/barsnick)
+- **banbye**: [Support video ids containing a hyphen](https://github.com/yt-dlp/yt-dlp/commit/578a82e497502b951036ce9da6fe0dac6937ac27) ([#8059](https://github.com/yt-dlp/yt-dlp/issues/8059)) by [kshitiz305](https://github.com/kshitiz305)
+- **bbc**: [Extract tracklist as chapters](https://github.com/yt-dlp/yt-dlp/commit/eda0e415d26eb084e570cf5372d38ee1f616b70f) ([#7788](https://github.com/yt-dlp/yt-dlp/issues/7788)) by [garret1317](https://github.com/garret1317)
+- **bild.de**: [Extract HLS formats](https://github.com/yt-dlp/yt-dlp/commit/b4c1c408c63724339eb12b16c91b253a7ee62cfa) ([#8032](https://github.com/yt-dlp/yt-dlp/issues/8032)) by [barsnick](https://github.com/barsnick)
+- **bilibili**
+ - [Add support for series, favorites and watch later](https://github.com/yt-dlp/yt-dlp/commit/9e68747f9607f05e92bb7d9b6e79d678b50070e1) ([#7518](https://github.com/yt-dlp/yt-dlp/issues/7518)) by [c-basalt](https://github.com/c-basalt)
+ - [Extract Dolby audio formats](https://github.com/yt-dlp/yt-dlp/commit/b84fda7388dd20d38921e23b469147f3957c1812) ([#8142](https://github.com/yt-dlp/yt-dlp/issues/8142)) by [ClosedPort22](https://github.com/ClosedPort22)
+ - [Extract `format_id`](https://github.com/yt-dlp/yt-dlp/commit/5336bf57a7061e0955a37f0542fc8ebf50d55b17) ([#7555](https://github.com/yt-dlp/yt-dlp/issues/7555)) by [c-basalt](https://github.com/c-basalt)
+- **bilibilibangumi**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/bdd0b75e3f41ff35440eda6d395008beef19ef2f) ([#7337](https://github.com/yt-dlp/yt-dlp/issues/7337)) by [GD-Slime](https://github.com/GD-Slime)
+- **bpb**: [Overhaul extractor](https://github.com/yt-dlp/yt-dlp/commit/f659e6439444ac64305b5c80688cd82f59d2279c) ([#8119](https://github.com/yt-dlp/yt-dlp/issues/8119)) by [Grub4K](https://github.com/Grub4K)
+- **brilliantpala**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/92feb5654c5a4c81ba872904a618700fcbb3e546) ([#6680](https://github.com/yt-dlp/yt-dlp/issues/6680)) by [pzhlkj6612](https://github.com/pzhlkj6612)
+- **canal1, caracoltvplay**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/b3febedbeb662dfdf9b5c1d5799039ad4fc969de) ([#7151](https://github.com/yt-dlp/yt-dlp/issues/7151)) by [elyse0](https://github.com/elyse0)
+- **cbc**: [Ignore any 426 from API](https://github.com/yt-dlp/yt-dlp/commit/9bf14be775289bd88cc1f5c89fd761ae51879484) ([#7689](https://github.com/yt-dlp/yt-dlp/issues/7689)) by [makew0rld](https://github.com/makew0rld)
+- **cbcplayer**: [Extract HLS formats and subtitles](https://github.com/yt-dlp/yt-dlp/commit/339c339fec095ff4141b20e6aa83629117fb26df) ([#7484](https://github.com/yt-dlp/yt-dlp/issues/7484)) by [trainman261](https://github.com/trainman261)
+- **cbcplayerplaylist**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/ed711897814f3ee0b1822e4205e74133467e8f1c) ([#7870](https://github.com/yt-dlp/yt-dlp/issues/7870)) by [trainman261](https://github.com/trainman261)
+- **cineverse**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/15591940ff102d1ae337d603a46d8f238c83a61f) ([#8146](https://github.com/yt-dlp/yt-dlp/issues/8146)) by [garret1317](https://github.com/garret1317)
+- **crunchyroll**: [Remove initial state extraction](https://github.com/yt-dlp/yt-dlp/commit/9b16762f48914de9ac914601769c76668e433325) ([#7632](https://github.com/yt-dlp/yt-dlp/issues/7632)) by [Grub4K](https://github.com/Grub4K)
+- **douyutv**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/21f40e75dfc0055ea9cdbd7fe2c46c6f9b561afd) ([#7652](https://github.com/yt-dlp/yt-dlp/issues/7652)) by [c-basalt](https://github.com/c-basalt)
+- **dropbox**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/b9f2bc2dbed2323734a0d18e65e1e2e23dc833d8) ([#7926](https://github.com/yt-dlp/yt-dlp/issues/7926)) by [bashonly](https://github.com/bashonly), [denhotte](https://github.com/denhotte), [nathantouze](https://github.com/nathantouze) (With fixes in [099fb1b](https://github.com/yt-dlp/yt-dlp/commit/099fb1b35cf835303306549f5113d1802d79c9c7) by [bashonly](https://github.com/bashonly))
+- **eplus**: inbound: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/295fbb3ae3a7d0dd50e286be5c487cf145ed5778) ([#5782](https://github.com/yt-dlp/yt-dlp/issues/5782)) by [pzhlkj6612](https://github.com/pzhlkj6612)
+- **expressen**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/a5e264d74b4bd60c6e7ec4e38f1a23af4e420531) ([#8153](https://github.com/yt-dlp/yt-dlp/issues/8153)) by [kylegustavo](https://github.com/kylegustavo)
+- **facebook**
+ - [Add dash manifest URL](https://github.com/yt-dlp/yt-dlp/commit/a854fbec56d5004f5147116a41d1dd050632a579) ([#7743](https://github.com/yt-dlp/yt-dlp/issues/7743)) by [ringus1](https://github.com/ringus1)
+ - [Fix webpage extraction](https://github.com/yt-dlp/yt-dlp/commit/d3d81cc98f554d0adb87d24bfd6fabaaa803944d) ([#7890](https://github.com/yt-dlp/yt-dlp/issues/7890)) by [ringus1](https://github.com/ringus1)
+ - [Improve format sorting](https://github.com/yt-dlp/yt-dlp/commit/308936619c8a4f3a52d73c829c2006ff6c55fea2) ([#8074](https://github.com/yt-dlp/yt-dlp/issues/8074)) by [fireattack](https://github.com/fireattack)
+ - reel: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/bb5d84c9d2f1e978c3eddfb5ccbe138036682a36) ([#7564](https://github.com/yt-dlp/yt-dlp/issues/7564)) by [bashonly](https://github.com/bashonly), [demon071](https://github.com/demon071)
+- **fox**: [Support foxsports.com](https://github.com/yt-dlp/yt-dlp/commit/30b29f37159e9226e2f2d5434c9a4096ac4efa2e) ([#7724](https://github.com/yt-dlp/yt-dlp/issues/7724)) by [ischmidt20](https://github.com/ischmidt20)
+- **funker530**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/0ce1f48bf1cb78d40d734ce73ee1c90eccf92274) ([#8040](https://github.com/yt-dlp/yt-dlp/issues/8040)) by [04-pasha-04](https://github.com/04-pasha-04)
+- **generic**
+ - [Fix KVS thumbnail extraction](https://github.com/yt-dlp/yt-dlp/commit/53675852195d8dd859555d4789944a6887171ff8) by [bashonly](https://github.com/bashonly)
+ - [Fix generic title for embeds](https://github.com/yt-dlp/yt-dlp/commit/994f7ef8e6003f4b7b258528755d0b6adcc31714) by [pukkandan](https://github.com/pukkandan)
+- **gofile**: [Update token](https://github.com/yt-dlp/yt-dlp/commit/99c99c7185f5d8e9b3699a6fc7f86ec663d7b97e) by [bashonly](https://github.com/bashonly)
+- **hotstar**
+ - [Extract `release_year`](https://github.com/yt-dlp/yt-dlp/commit/7237c8dca0590aa7438ade93f927df88c9381ec7) ([#7869](https://github.com/yt-dlp/yt-dlp/issues/7869)) by [Rajeshwaran2001](https://github.com/Rajeshwaran2001)
+ - [Make metadata extraction non-fatal](https://github.com/yt-dlp/yt-dlp/commit/30ea88591b728cca0896018dbf67c2298070c669) by [bashonly](https://github.com/bashonly)
+ - [Support `/clips/` URLs](https://github.com/yt-dlp/yt-dlp/commit/86eeb044c2342d68c6ef177577f87852e6badd85) ([#7710](https://github.com/yt-dlp/yt-dlp/issues/7710)) by [bashonly](https://github.com/bashonly)
+- **hungama**: [Overhaul extractors](https://github.com/yt-dlp/yt-dlp/commit/4b3a6ef1b3e235ba9a45142830b6edb357c71696) ([#7757](https://github.com/yt-dlp/yt-dlp/issues/7757)) by [bashonly](https://github.com/bashonly), [Yalab7](https://github.com/Yalab7)
+- **indavideoembed**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/63e0c5748c0eb461a2ccca4181616eb930b4b750) ([#8129](https://github.com/yt-dlp/yt-dlp/issues/8129)) by [aky-01](https://github.com/aky-01)
+- **iprima**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/568f08051841aedea968258889539741e26009e9) ([#7216](https://github.com/yt-dlp/yt-dlp/issues/7216)) by [std-move](https://github.com/std-move)
+- **lbry**: [Fix original format extraction](https://github.com/yt-dlp/yt-dlp/commit/127a22460658ac39cbe5c4b3fb88d578363e0dfa) ([#7711](https://github.com/yt-dlp/yt-dlp/issues/7711)) by [bashonly](https://github.com/bashonly)
+- **lecturio**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/efa2339502a37cf13ae7f143bd8b2c28f452d1cd) ([#7649](https://github.com/yt-dlp/yt-dlp/issues/7649)) by [simon300000](https://github.com/simon300000)
+- **magellantv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/f4ea501551526ebcb54d19b84cf0ebe798583a85) ([#7616](https://github.com/yt-dlp/yt-dlp/issues/7616)) by [bashonly](https://github.com/bashonly)
+- **massengeschmack.tv**: [Fix title extraction](https://github.com/yt-dlp/yt-dlp/commit/81f46ac573dc443ad48560f308582a26784d3015) ([#7813](https://github.com/yt-dlp/yt-dlp/issues/7813)) by [sb0stn](https://github.com/sb0stn)
+- **media.ccc.de**: lists: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/cf11b40ac40e3d23a6352753296f3a732886efb9) ([#8144](https://github.com/yt-dlp/yt-dlp/issues/8144)) by [Rohxn16](https://github.com/Rohxn16)
+- **mediaite**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/630a55df8de7747e79aa680959d785dfff2c4b76) ([#7923](https://github.com/yt-dlp/yt-dlp/issues/7923)) by [Grabien](https://github.com/Grabien)
+- **mediaklikk**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/6e07e4bc7e59f5bdb60e93c011e57b18b009f2b5) ([#8086](https://github.com/yt-dlp/yt-dlp/issues/8086)) by [bashonly](https://github.com/bashonly), [zhallgato](https://github.com/zhallgato)
+- **mediastream**: [Make embed extraction non-fatal](https://github.com/yt-dlp/yt-dlp/commit/635ae31f68a3ac7f6393d59657ed711e34ee3552) by [bashonly](https://github.com/bashonly)
+- **mixcloud**: [Update API URL](https://github.com/yt-dlp/yt-dlp/commit/7b71643cc986de9a3768dac4ac9b64f4d05e7f5e) ([#8114](https://github.com/yt-dlp/yt-dlp/issues/8114)) by [garret1317](https://github.com/garret1317)
+- **monstercat**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/eaee21bf71889d495076037cbe590c8c0b21ef3a) ([#8133](https://github.com/yt-dlp/yt-dlp/issues/8133)) by [garret1317](https://github.com/garret1317)
+- **motortrendondemand**: [Update `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/c03a58ec9933e4a42c2d8fa80b8a0ddb2cde64e6) ([#7683](https://github.com/yt-dlp/yt-dlp/issues/7683)) by [AmirAflak](https://github.com/AmirAflak)
+- **museai**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/65cfa2b057d7946fbe322155a778fe206556d0c6) ([#7614](https://github.com/yt-dlp/yt-dlp/issues/7614)) by [bashonly](https://github.com/bashonly)
+- **mzaalo**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/d7aee8e310b2c4f21d50aac0b420e1b3abde21a4) by [bashonly](https://github.com/bashonly)
+- **n1info**: article: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/8ac5b6d96ae5c60cd5ae2495949e0068a6754c45) ([#7373](https://github.com/yt-dlp/yt-dlp/issues/7373)) by [u-spec-png](https://github.com/u-spec-png)
+- **nfl.com**: plus, replay: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/1eaca74bc2ca0f5b1ec532f24c61de44f2e8cb2d) ([#7838](https://github.com/yt-dlp/yt-dlp/issues/7838)) by [bashonly](https://github.com/bashonly)
+- **niconicochannelplus**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/698beb9a497f51693e64d167e572ff9efa4bc25f) ([#5686](https://github.com/yt-dlp/yt-dlp/issues/5686)) by [pzhlkj6612](https://github.com/pzhlkj6612)
+- **nitter**: [Fix title extraction fallback](https://github.com/yt-dlp/yt-dlp/commit/a83da3717d30697102e76f63a6f29d77f9373c2a) ([#8102](https://github.com/yt-dlp/yt-dlp/issues/8102)) by [ApoorvShah111](https://github.com/ApoorvShah111)
+- **noodlemagazine**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/bae4834245a708fff97219849ec880c319c88bc6) ([#7830](https://github.com/yt-dlp/yt-dlp/issues/7830)) by [RedDeffender](https://github.com/RedDeffender) (With fixes in [69dbfe0](https://github.com/yt-dlp/yt-dlp/commit/69dbfe01c47cd078682a87f179f5846e2679e927) by [bashonly](https://github.com/bashonly))
+- **novaembed**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/2269065ad60cb0ab62408ae6a7b20283e5252232) ([#7910](https://github.com/yt-dlp/yt-dlp/issues/7910)) by [std-move](https://github.com/std-move)
+- **patreoncampaign**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/11de6fec9c9b8d34d1f90c8e6218ec58a3471b58) ([#7664](https://github.com/yt-dlp/yt-dlp/issues/7664)) by [bashonly](https://github.com/bashonly)
+- **pbs**: [Add extractor `PBSKidsIE`](https://github.com/yt-dlp/yt-dlp/commit/6d6081dda1290a85bdab6717f239289e3aa74c8e) ([#7602](https://github.com/yt-dlp/yt-dlp/issues/7602)) by [snixon](https://github.com/snixon)
+- **piapro**: [Support `/content` URL](https://github.com/yt-dlp/yt-dlp/commit/1bcb9fe8715b1f288efc322be3de409ee0597080) ([#7592](https://github.com/yt-dlp/yt-dlp/issues/7592)) by [FinnRG](https://github.com/FinnRG)
+- **piaulizaportal**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/6636021206dad17c7745ae6bce6cb73d6f2ef319) ([#7903](https://github.com/yt-dlp/yt-dlp/issues/7903)) by [pzhlkj6612](https://github.com/pzhlkj6612)
+- **picartovod**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/db9743894071760f994f640a4c24358f749a78c0) ([#7727](https://github.com/yt-dlp/yt-dlp/issues/7727)) by [Frankgoji](https://github.com/Frankgoji)
+- **pornbox**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/40999467f72db074a3f13057da9bf82a857530fe) ([#7386](https://github.com/yt-dlp/yt-dlp/issues/7386)) by [niemands](https://github.com/niemands)
+- **pornhub**: [Update access cookies for UK](https://github.com/yt-dlp/yt-dlp/commit/1d3d579c2142f69831b6ae140e1d8e824e07fa0e) ([#7591](https://github.com/yt-dlp/yt-dlp/issues/7591)) by [zhong-yiyu](https://github.com/zhong-yiyu)
+- **pr0gramm**: [Rewrite extractor](https://github.com/yt-dlp/yt-dlp/commit/b532556d0a85e7d76f8f0880861232fb706ddbc5) ([#8151](https://github.com/yt-dlp/yt-dlp/issues/8151)) by [Grub4K](https://github.com/Grub4K)
+- **radiofrance**: [Add support for livestreams, podcasts, playlists](https://github.com/yt-dlp/yt-dlp/commit/ba8e9eb2c8bbb699f314169fab8e544437ad731e) ([#7006](https://github.com/yt-dlp/yt-dlp/issues/7006)) by [elyse0](https://github.com/elyse0)
+- **rbgtum**: [Fix extraction and support new URL format](https://github.com/yt-dlp/yt-dlp/commit/5fccabac27ca3c1165ade1b0df6fbadc24258dc2) ([#7690](https://github.com/yt-dlp/yt-dlp/issues/7690)) by [simon300000](https://github.com/simon300000)
+- **reddit**
+ - [Extract subtitles](https://github.com/yt-dlp/yt-dlp/commit/20c3c9b433dd47faf0dbde6b46e4e34eb76109a5) by [bashonly](https://github.com/bashonly)
+ - [Fix thumbnail extraction](https://github.com/yt-dlp/yt-dlp/commit/9a04113dfbb69b904e4e2bea736da293505786b8) by [bashonly](https://github.com/bashonly)
+- **rtvslo**: [Fix format extraction](https://github.com/yt-dlp/yt-dlp/commit/94389b225d9bcf29aa7ba8afaf1bbd7c62204eae) ([#8131](https://github.com/yt-dlp/yt-dlp/issues/8131)) by [bashonly](https://github.com/bashonly)
+- **rule34video**: [Extract tags](https://github.com/yt-dlp/yt-dlp/commit/58493923e9b6f774947a2131e5258e9f3cf816be) ([#7117](https://github.com/yt-dlp/yt-dlp/issues/7117)) by [soundchaser128](https://github.com/soundchaser128)
+- **rumble**: [Fix embed extraction](https://github.com/yt-dlp/yt-dlp/commit/23d829a3420450bcfb0788e6fb2cf4f6acdbe596) ([#8035](https://github.com/yt-dlp/yt-dlp/issues/8035)) by [trislee](https://github.com/trislee)
+- **s4c**
+ - [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/b9de629d78ce31699f2de886071dc257830f9676) ([#7730](https://github.com/yt-dlp/yt-dlp/issues/7730)) by [ifan-t](https://github.com/ifan-t)
+ - [Add series support and extract subs/thumbs](https://github.com/yt-dlp/yt-dlp/commit/fe371dcf0ba5ce8d42480eade54eeeac99ab3cb0) ([#7776](https://github.com/yt-dlp/yt-dlp/issues/7776)) by [ifan-t](https://github.com/ifan-t)
+- **sohu**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/5be7e978867b5f66ad6786c674d79d40e950ae16) ([#7628](https://github.com/yt-dlp/yt-dlp/issues/7628)) by [bashonly](https://github.com/bashonly), [c-basalt](https://github.com/c-basalt)
+- **stageplus**: [Fix m3u8 extraction](https://github.com/yt-dlp/yt-dlp/commit/56b3dc03354b75be995759d8441d2754c0442b9a) ([#7929](https://github.com/yt-dlp/yt-dlp/issues/7929)) by [bashonly](https://github.com/bashonly)
+- **streamanity**: [Remove](https://github.com/yt-dlp/yt-dlp/commit/2cfe221fbbe46faa3f46552c08d947a51f424903) ([#7571](https://github.com/yt-dlp/yt-dlp/issues/7571)) by [alerikaisattera](https://github.com/alerikaisattera)
+- **svtplay**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/2301b5c1b77a65abbb46b72f91e1e4666fd5d985) ([#7789](https://github.com/yt-dlp/yt-dlp/issues/7789)) by [dirkf](https://github.com/dirkf), [wader](https://github.com/wader)
+- **tbsjp**: [Add episode, program, playlist extractors](https://github.com/yt-dlp/yt-dlp/commit/876b70c8edf4c0147f180bd981fbc4d625cbfb9c) ([#7765](https://github.com/yt-dlp/yt-dlp/issues/7765)) by [garret1317](https://github.com/garret1317)
+- **tiktok**
+ - [Fix audio-only format extraction](https://github.com/yt-dlp/yt-dlp/commit/b09bd0c19648f60c59fb980cd454cb0069959fb9) ([#7712](https://github.com/yt-dlp/yt-dlp/issues/7712)) by [bashonly](https://github.com/bashonly)
+ - [Fix webpage extraction](https://github.com/yt-dlp/yt-dlp/commit/069cbece9dba6384f1cc5fcfc7ce562a31af42fc) by [bashonly](https://github.com/bashonly)
+- **triller**: [Fix unlisted video extraction](https://github.com/yt-dlp/yt-dlp/commit/39837ae3199aa934299badbd0d63243ed639e6c8) ([#7670](https://github.com/yt-dlp/yt-dlp/issues/7670)) by [bashonly](https://github.com/bashonly)
+- **tv5mondeplus**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/7d3d658f4c558ee7d72b1c01b46f2126948681cd) ([#7952](https://github.com/yt-dlp/yt-dlp/issues/7952)) by [dirkf](https://github.com/dirkf), [korli](https://github.com/korli)
+- **twitcasting**
+ - [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/cebbd33b1c678149fc8f0e254db6fc0da317ea80) ([#8120](https://github.com/yt-dlp/yt-dlp/issues/8120)) by [c-basalt](https://github.com/c-basalt)
+ - [Support `--wait-for-video`](https://github.com/yt-dlp/yt-dlp/commit/c1d71d0d9f41db5e4306c86af232f5f6220a130b) ([#7975](https://github.com/yt-dlp/yt-dlp/issues/7975)) by [at-wat](https://github.com/at-wat)
+- **twitter**
+ - [Add fallback, improve error handling](https://github.com/yt-dlp/yt-dlp/commit/6014355c6142f68e20c8374e3787e5b5820f19e2) ([#7621](https://github.com/yt-dlp/yt-dlp/issues/7621)) by [bashonly](https://github.com/bashonly)
+ - [Fix GraphQL and legacy API](https://github.com/yt-dlp/yt-dlp/commit/92315c03774cfabb3a921884326beb4b981f786b) ([#7516](https://github.com/yt-dlp/yt-dlp/issues/7516)) by [bashonly](https://github.com/bashonly)
+ - [Fix retweet extraction and syndication API](https://github.com/yt-dlp/yt-dlp/commit/a006ce2b27357c15792eb5c18f06765e640b801c) ([#8016](https://github.com/yt-dlp/yt-dlp/issues/8016)) by [bashonly](https://github.com/bashonly)
+ - [Revert 92315c03774cfabb3a921884326beb4b981f786b](https://github.com/yt-dlp/yt-dlp/commit/b03fa7834579a01cc5fba48c0e73488a16683d48) by [pukkandan](https://github.com/pukkandan)
+ - spaces
+ - [Fix format protocol](https://github.com/yt-dlp/yt-dlp/commit/613dbce177d34ffc31053e8e01acf4bb107bcd1e) ([#7550](https://github.com/yt-dlp/yt-dlp/issues/7550)) by [bashonly](https://github.com/bashonly)
+ - [Pass referer header to downloader](https://github.com/yt-dlp/yt-dlp/commit/c6ef553792ed48462f9fd0e78143bef6b1a71c2e) by [bashonly](https://github.com/bashonly)
+- **unsupported**: [List more sites with DRM](https://github.com/yt-dlp/yt-dlp/commit/e7057383380d7d53815f8feaf90ca3dcbde88983) by [pukkandan](https://github.com/pukkandan)
+- **videa**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/98eac0e6ba0e510ae7dfdfd249d42ee71fb272b1) ([#8003](https://github.com/yt-dlp/yt-dlp/issues/8003)) by [aky-01](https://github.com/aky-01), [hatsomatt](https://github.com/hatsomatt)
+- **vrt**: [Update token signing key](https://github.com/yt-dlp/yt-dlp/commit/325191d0c9bf3fe257b8a7c2eb95080f44f6ddfc) ([#7519](https://github.com/yt-dlp/yt-dlp/issues/7519)) by [Zprokkel](https://github.com/Zprokkel)
+- **wat.tv**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/7cccab79e7d00ed965b48b8cefce1da8a0513409) ([#7898](https://github.com/yt-dlp/yt-dlp/issues/7898)) by [davinkevin](https://github.com/davinkevin)
+- **wdr**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/5d0395498d7065aa5e55bac85fa9354b4b0d48eb) ([#7979](https://github.com/yt-dlp/yt-dlp/issues/7979)) by [szabyg](https://github.com/szabyg)
+- **web.archive**: vlive: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/9652bca1bd02f6bc1b8cb1e186f2ccbf32225561) ([#8132](https://github.com/yt-dlp/yt-dlp/issues/8132)) by [bashonly](https://github.com/bashonly)
+- **weibo**: [Fix extractor and support user extraction](https://github.com/yt-dlp/yt-dlp/commit/69b03f84f8378b0b5a2fbae56f9b7d860b2f529e) ([#7657](https://github.com/yt-dlp/yt-dlp/issues/7657)) by [c-basalt](https://github.com/c-basalt)
+- **weverse**: [Support extraction without auth](https://github.com/yt-dlp/yt-dlp/commit/c2d8ee0000302aba63476b7d5bd8793e57b6c8c6) ([#7924](https://github.com/yt-dlp/yt-dlp/issues/7924)) by [seproDev](https://github.com/seproDev)
+- **wimbledon**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/a15fcd299e767a510debd8dc1646fe863b96ce0e) ([#7551](https://github.com/yt-dlp/yt-dlp/issues/7551)) by [nnoboa](https://github.com/nnoboa)
+- **wrestleuniverseppv**: [Fix HLS AES key extraction](https://github.com/yt-dlp/yt-dlp/commit/dae349da97cafe7357106a8f3187fd48a2ad1210) by [bashonly](https://github.com/bashonly)
+- **youtube**
+ - [Add `player_params` extractor arg](https://github.com/yt-dlp/yt-dlp/commit/ba06d77a316650ff057347d224b5afa8b203ad65) ([#7719](https://github.com/yt-dlp/yt-dlp/issues/7719)) by [coletdjnz](https://github.com/coletdjnz)
+ - [Fix `player_params` arg being converted to lowercase](https://github.com/yt-dlp/yt-dlp/commit/546b2c28a106cf8101d481b215b676d1b091d276) by [coletdjnz](https://github.com/coletdjnz)
+ - [Fix consent cookie](https://github.com/yt-dlp/yt-dlp/commit/378ae9f9fb8e8c86e6ac89c4c5b815b48ce93620) ([#7774](https://github.com/yt-dlp/yt-dlp/issues/7774)) by [coletdjnz](https://github.com/coletdjnz)
+ - tab: [Detect looping feeds](https://github.com/yt-dlp/yt-dlp/commit/1ba6fe9db5f660d5538588315c23ad6cf0371c5f) ([#6621](https://github.com/yt-dlp/yt-dlp/issues/6621)) by [coletdjnz](https://github.com/coletdjnz)
+- **zaiko**: [Improve thumbnail extraction](https://github.com/yt-dlp/yt-dlp/commit/ecef42c3adbcb6a84405139047923c4967316f28) ([#8054](https://github.com/yt-dlp/yt-dlp/issues/8054)) by [pzhlkj6612](https://github.com/pzhlkj6612)
+- **zee5**: [Update access token endpoint](https://github.com/yt-dlp/yt-dlp/commit/a0de8bb8601146b8f87bf7cd562eef8bfb4690be) ([#7914](https://github.com/yt-dlp/yt-dlp/issues/7914)) by [bashonly](https://github.com/bashonly)
+- **zoom**: [Extract duration](https://github.com/yt-dlp/yt-dlp/commit/66cc64ff6696f9921ff112a278542f8d999ffea4) by [bashonly](https://github.com/bashonly)
+
+#### Downloader changes
+- **external**
+ - [Fix ffmpeg input from stdin](https://github.com/yt-dlp/yt-dlp/commit/e57eb98222d29cc4c09ee975d3c492274a6e5be3) ([#7655](https://github.com/yt-dlp/yt-dlp/issues/7655)) by [bashonly](https://github.com/bashonly)
+ - [Fixes to cookie handling](https://github.com/yt-dlp/yt-dlp/commit/42ded0a429c20ec13dc006825e1508d9a02f0ad4) by [bashonly](https://github.com/bashonly)
+
+#### Postprocessor changes
+- **embedthumbnail**: [Support `m4v`](https://github.com/yt-dlp/yt-dlp/commit/8a4cd12c8f8e93292e3e95200b9d17a3af39624c) ([#7583](https://github.com/yt-dlp/yt-dlp/issues/7583)) by [Neurognostic](https://github.com/Neurognostic)
+
+#### Networking changes
+- [Add module](https://github.com/yt-dlp/yt-dlp/commit/c365dba8430ee33abda85d31f95128605bf240eb) ([#2861](https://github.com/yt-dlp/yt-dlp/issues/2861)) by [pukkandan](https://github.com/pukkandan)
+- [Add request handler preference framework](https://github.com/yt-dlp/yt-dlp/commit/db7b054a6111ca387220d0eb87bf342f9c130eb8) ([#7603](https://github.com/yt-dlp/yt-dlp/issues/7603)) by [coletdjnz](https://github.com/coletdjnz)
+- [Add strict Request extension checking](https://github.com/yt-dlp/yt-dlp/commit/86aea0d3a213da3be1da638b9b828e6f0ee1d59f) ([#7604](https://github.com/yt-dlp/yt-dlp/issues/7604)) by [coletdjnz](https://github.com/coletdjnz)
+- [Fix POST requests with zero-length payloads](https://github.com/yt-dlp/yt-dlp/commit/71baa490ebd3655746430f208a9b605d120cd315) ([#7648](https://github.com/yt-dlp/yt-dlp/issues/7648)) by [bashonly](https://github.com/bashonly)
+- [Fix `--legacy-server-connect`](https://github.com/yt-dlp/yt-dlp/commit/75dc8e673b481a82d0688aeec30f6c65d82bb359) ([#7645](https://github.com/yt-dlp/yt-dlp/issues/7645)) by [bashonly](https://github.com/bashonly)
+- [Fix various socks proxy bugs](https://github.com/yt-dlp/yt-dlp/commit/20fbbd9249a2f26c7ae579bde5ba5d69aa8fac69) ([#8065](https://github.com/yt-dlp/yt-dlp/issues/8065)) by [coletdjnz](https://github.com/coletdjnz)
+- [Ignore invalid proxies in env](https://github.com/yt-dlp/yt-dlp/commit/bbeacff7fcaa3b521066088a5ccbf34ef5070d1d) ([#7704](https://github.com/yt-dlp/yt-dlp/issues/7704)) by [coletdjnz](https://github.com/coletdjnz)
+- [Rewrite architecture](https://github.com/yt-dlp/yt-dlp/commit/227bf1a33be7b89cd7d44ad046844c4ccba104f4) ([#2861](https://github.com/yt-dlp/yt-dlp/issues/2861)) by [coletdjnz](https://github.com/coletdjnz)
+- **Request Handler**
+ - urllib
+ - [Remove dot segments during URL normalization](https://github.com/yt-dlp/yt-dlp/commit/4bf912282a34b58b6b35d8f7e6be535770c89c76) ([#7662](https://github.com/yt-dlp/yt-dlp/issues/7662)) by [coletdjnz](https://github.com/coletdjnz)
+ - [Simplify gzip decoding](https://github.com/yt-dlp/yt-dlp/commit/59e92b1f1833440bb2190f847eb735cf0f90bc85) ([#7611](https://github.com/yt-dlp/yt-dlp/issues/7611)) by [Grub4K](https://github.com/Grub4K) (With fixes in [77bff23](https://github.com/yt-dlp/yt-dlp/commit/77bff23ee97565bab2e0d75b893a21bf7983219a))
+
+#### Misc. changes
+- **build**: [Make sure deprecated modules are added](https://github.com/yt-dlp/yt-dlp/commit/131d132da5c98c6c78bd7eed4b37f4458561b3d9) by [pukkandan](https://github.com/pukkandan)
+- **cleanup**
+ - [Add color to `download-archive` message](https://github.com/yt-dlp/yt-dlp/commit/2b029ca0a9f9105c4f7626993fa60e54c9782749) ([#5138](https://github.com/yt-dlp/yt-dlp/issues/5138)) by [aaruni96](https://github.com/aaruni96), [Grub4K](https://github.com/Grub4K), [pukkandan](https://github.com/pukkandan)
+ - Miscellaneous
+ - [6148833](https://github.com/yt-dlp/yt-dlp/commit/6148833f5ceb7674142ddb8d761ffe03cee7df69), [62b5c94](https://github.com/yt-dlp/yt-dlp/commit/62b5c94cadaa5f596dc1a7083db9db12efe357be) by [pukkandan](https://github.com/pukkandan)
+ - [5ca095c](https://github.com/yt-dlp/yt-dlp/commit/5ca095cbcde3e32642a4fe5b2d69e8e3c785a021) by [barsnick](https://github.com/barsnick), [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz), [gamer191](https://github.com/gamer191), [Grub4K](https://github.com/Grub4K), [sqrtNOT](https://github.com/sqrtNOT)
+ - [088add9](https://github.com/yt-dlp/yt-dlp/commit/088add9567d39b758737e4299a0e619fd89d2e8f) by [Grub4K](https://github.com/Grub4K)
+- **devscripts**: `make_changelog`: [Fix changelog grouping and add networking group](https://github.com/yt-dlp/yt-dlp/commit/30ba233d4cee945756ed7344e7ddb3a90d2ae608) ([#8124](https://github.com/yt-dlp/yt-dlp/issues/8124)) by [Grub4K](https://github.com/Grub4K)
+- **docs**: [Update collaborators](https://github.com/yt-dlp/yt-dlp/commit/1be0a96a4d14f629097509fcc89d15f69a8243c7) by [Grub4K](https://github.com/Grub4K)
+- **test**
+ - [Add tests for socks proxies](https://github.com/yt-dlp/yt-dlp/commit/fcd6a76adc49d5cd8783985c7ce35384b72e545f) ([#7908](https://github.com/yt-dlp/yt-dlp/issues/7908)) by [coletdjnz](https://github.com/coletdjnz)
+ - [Fix `httplib_validation_errors` test for old Python versions](https://github.com/yt-dlp/yt-dlp/commit/95abea9a03289da1384e5bda3d590223ccc0a238) ([#7677](https://github.com/yt-dlp/yt-dlp/issues/7677)) by [coletdjnz](https://github.com/coletdjnz)
+ - [Fix `test_load_certifi`](https://github.com/yt-dlp/yt-dlp/commit/de20687ee6b742646128a7629b57096631a20619) by [pukkandan](https://github.com/pukkandan)
+ - download: [Test for `expected_exception`](https://github.com/yt-dlp/yt-dlp/commit/661c9a1d029296b28e0b2f8be8a72a43abaf6536) by [at-wat](https://github.com/at-wat)
+
+### 2023.07.06
+
+#### Important changes
+- Security: [[CVE-2023-35934](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-35934)] Fix [Cookie leak](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj)
+ - `--add-header Cookie:` is deprecated and auto-scoped to input URL domains
+ - Cookies are scoped when passed to external downloaders
+ - Add `cookies` field to info.json and deprecate `http_headers.Cookie`
+
+#### Core changes
+- [Allow extractors to mark formats as potentially DRM](https://github.com/yt-dlp/yt-dlp/commit/bc344cd456380999c1ee74554dfd432a38f32ec7) ([#7396](https://github.com/yt-dlp/yt-dlp/issues/7396)) by [pukkandan](https://github.com/pukkandan)
+- [Bugfix for b4e0d75848e9447cee2cd3646ce54d4744a7ff56](https://github.com/yt-dlp/yt-dlp/commit/e59e20744eb32ce4b6ea0dece7c673be8376a710) by [pukkandan](https://github.com/pukkandan)
+- [Change how `Cookie` headers are handled](https://github.com/yt-dlp/yt-dlp/commit/3121512228487c9c690d3d39bfd2579addf96e07) by [Grub4K](https://github.com/Grub4K)
+- [Prevent `Cookie` leaks on HTTP redirect](https://github.com/yt-dlp/yt-dlp/commit/f8b4bcc0a791274223723488bfbfc23ea3276641) by [coletdjnz](https://github.com/coletdjnz)
+- **formats**: [Fix best fallback for storyboards](https://github.com/yt-dlp/yt-dlp/commit/906c0bdcd8974340d619e99ccd613c163eb0d0c2) by [pukkandan](https://github.com/pukkandan)
+- **outtmpl**: [Pad `playlist_index` etc even when with internal formatting](https://github.com/yt-dlp/yt-dlp/commit/47bcd437247152e0af5b3ebc5592db7bb66855c2) by [pukkandan](https://github.com/pukkandan)
+- **utils**: clean_podcast_url: [Handle protocol in redirect URL](https://github.com/yt-dlp/yt-dlp/commit/91302ed349f34dc26cc1d661bb45a4b71f4417f7) by [pukkandan](https://github.com/pukkandan)
+
+#### Extractor changes
+- **abc**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/8f05fbae2a79ce0713077ccc68b354e63216bf20) ([#7434](https://github.com/yt-dlp/yt-dlp/issues/7434)) by [meliber](https://github.com/meliber)
+- **AdultSwim**: [Extract subtitles from m3u8](https://github.com/yt-dlp/yt-dlp/commit/5e16cf92eb496b7c1541a6b1d727cb87542984db) ([#7421](https://github.com/yt-dlp/yt-dlp/issues/7421)) by [nnoboa](https://github.com/nnoboa)
+- **crunchyroll**: music: [Fix `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/5b4b92769afcc398475e481bfa839f1158902fe9) ([#7439](https://github.com/yt-dlp/yt-dlp/issues/7439)) by [AmanSal1](https://github.com/AmanSal1), [rdamas](https://github.com/rdamas)
+- **Douyin**: [Fix extraction from webpage](https://github.com/yt-dlp/yt-dlp/commit/a2be9781fbf4d7e4db245c277ca2ecc41cf3a7b2) by [bashonly](https://github.com/bashonly)
+- **googledrive**: [Fix source format extraction](https://github.com/yt-dlp/yt-dlp/commit/3b7f5300c577fef40464d46d4e4037a69d51fe82) ([#7395](https://github.com/yt-dlp/yt-dlp/issues/7395)) by [RfadnjdExt](https://github.com/RfadnjdExt)
+- **kick**: [Fix `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/ef8509c300ea50da86aea447eb214d3d6f6db6bb) by [bashonly](https://github.com/bashonly)
+- **qdance**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/f0a1ff118145b6449982ba401f9a9f656ecd8062) ([#7420](https://github.com/yt-dlp/yt-dlp/issues/7420)) by [bashonly](https://github.com/bashonly)
+- **sbs**: [Python 3.7 compat](https://github.com/yt-dlp/yt-dlp/commit/f393bbe724b1fc6c7f754a5da507e807b2b40ad2) by [pukkandan](https://github.com/pukkandan)
+- **stacommu**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/af1fd12f675220df6793fc019dff320bc76e8080) ([#7432](https://github.com/yt-dlp/yt-dlp/issues/7432)) by [urectanc](https://github.com/urectanc)
+- **twitter**
+ - [Fix unauthenticated extraction](https://github.com/yt-dlp/yt-dlp/commit/49296437a8e5fa91dacb5446e51ab588474c85d3) ([#7476](https://github.com/yt-dlp/yt-dlp/issues/7476)) by [bashonly](https://github.com/bashonly)
+ - spaces: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/1cffd621cb371f1563563cfb2fe37d137e8a7bee) ([#7512](https://github.com/yt-dlp/yt-dlp/issues/7512)) by [bashonly](https://github.com/bashonly)
+- **vidlii**: [Handle relative URLs](https://github.com/yt-dlp/yt-dlp/commit/ad8902f616ad2541f9b9626738f1393fad89a64c) by [pukkandan](https://github.com/pukkandan)
+- **vk**: VKPlay, VKPlayLive: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/8776349ef6b1f644584a92dfa00a05208a48edc4) ([#7358](https://github.com/yt-dlp/yt-dlp/issues/7358)) by [c-basalt](https://github.com/c-basalt)
+- **youtube**
+ - [Add extractor-arg `formats`](https://github.com/yt-dlp/yt-dlp/commit/58786a10f212bd63f9ad1d0b4d9e4d31c3b385e2) by [pukkandan](https://github.com/pukkandan)
+ - [Avoid false DRM detection](https://github.com/yt-dlp/yt-dlp/commit/94ed638a437fc766699d440e978982e24ce6a30a) ([#7396](https://github.com/yt-dlp/yt-dlp/issues/7396)) by [pukkandan](https://github.com/pukkandan)
+ - [Fix comments' `is_favorited`](https://github.com/yt-dlp/yt-dlp/commit/89bed013741a776506f60380b7fd89d27d0710b4) ([#7390](https://github.com/yt-dlp/yt-dlp/issues/7390)) by [bbilly1](https://github.com/bbilly1)
+ - [Ignore incomplete data for comment threads by default](https://github.com/yt-dlp/yt-dlp/commit/4dc4d8473c085900edc841c87c20041233d25b1f) ([#7475](https://github.com/yt-dlp/yt-dlp/issues/7475)) by [coletdjnz](https://github.com/coletdjnz)
+ - [Process `post_live` over 2 hours](https://github.com/yt-dlp/yt-dlp/commit/d949c10c45bfc359bdacd52e6a180169b8128958) by [pukkandan](https://github.com/pukkandan)
+ - stories: [Remove](https://github.com/yt-dlp/yt-dlp/commit/90db9a3c00ca80492c6a58c542e4cbf4c2710866) ([#7459](https://github.com/yt-dlp/yt-dlp/issues/7459)) by [pukkandan](https://github.com/pukkandan)
+ - tab: [Support shorts-only playlists](https://github.com/yt-dlp/yt-dlp/commit/fcbc9ed760be6e3455bbadfaf277b4504b06f068) ([#7425](https://github.com/yt-dlp/yt-dlp/issues/7425)) by [coletdjnz](https://github.com/coletdjnz)
+
+#### Downloader changes
+- **aria2c**: [Add `--no-conf`](https://github.com/yt-dlp/yt-dlp/commit/8a8af356e3bba98a7f7d333aff0777d5d92130c8) by [pukkandan](https://github.com/pukkandan)
+- **external**: [Scope cookies](https://github.com/yt-dlp/yt-dlp/commit/1ceb657bdd254ad961489e5060f2ccc7d556b729) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz)
+- **http**: [Avoid infinite loop when no data is received](https://github.com/yt-dlp/yt-dlp/commit/662ef1e910b72e57957f06589925b2332ba52821) by [pukkandan](https://github.com/pukkandan)
+
+#### Misc. changes
+- [Add CodeQL workflow](https://github.com/yt-dlp/yt-dlp/commit/6355b5f1e1e8e7f4ef866d71d51e03baf0e82f17) ([#7497](https://github.com/yt-dlp/yt-dlp/issues/7497)) by [jorgectf](https://github.com/jorgectf)
+- **cleanup**: Miscellaneous: [337734d](https://github.com/yt-dlp/yt-dlp/commit/337734d4a8a6500bc65434843db346b5cbd05e81) by [pukkandan](https://github.com/pukkandan)
+- **docs**: [Minor fixes](https://github.com/yt-dlp/yt-dlp/commit/b532a3481046e1eabb6232ee8196fb696c356ff6) by [pukkandan](https://github.com/pukkandan)
+- **make_changelog**: [Skip reverted commits](https://github.com/yt-dlp/yt-dlp/commit/fa44802809d189fca0f4782263d48d6533384503) by [pukkandan](https://github.com/pukkandan)
+
+### 2023.06.22
+
+#### Core changes
+- [Fix bug in db3ad8a67661d7b234a6954d9c6a4a9b1749f5eb](https://github.com/yt-dlp/yt-dlp/commit/d7cd97e8d8d42b500fea9abb2aa4ac9b0f98b2ad) by [pukkandan](https://github.com/pukkandan)
+- [Improve `--download-sections`](https://github.com/yt-dlp/yt-dlp/commit/b4e0d75848e9447cee2cd3646ce54d4744a7ff56) by [pukkandan](https://github.com/pukkandan)
+ - Support negative time-ranges
+ - Add `*from-url` to obey time-ranges in URL
+- [Indicate `filesize` approximated from `tbr` better](https://github.com/yt-dlp/yt-dlp/commit/0dff8e4d1e6e9fb938f4256ea9af7d81f42fd54f) by [pukkandan](https://github.com/pukkandan)
+
+#### Extractor changes
+- [Support multiple `_VALID_URL`s](https://github.com/yt-dlp/yt-dlp/commit/5fd8367496b42c7b900b896a0d5460561a2859de) ([#5812](https://github.com/yt-dlp/yt-dlp/issues/5812)) by [nixxo](https://github.com/nixxo)
+- **dplay**: GlobalCyclingNetworkPlus: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/774aa09dd6aa61ced9ec818d1f67e53414d22762) ([#7360](https://github.com/yt-dlp/yt-dlp/issues/7360)) by [bashonly](https://github.com/bashonly)
+- **dropout**: [Fix season extraction](https://github.com/yt-dlp/yt-dlp/commit/db22142f6f817ff673d417b4b78e8db497bf8ab3) ([#7304](https://github.com/yt-dlp/yt-dlp/issues/7304)) by [OverlordQ](https://github.com/OverlordQ)
+- **motherless**: [Add gallery support, fix groups](https://github.com/yt-dlp/yt-dlp/commit/f2ff0f6f1914b82d4a51681a72cc0828115dcb4a) ([#7211](https://github.com/yt-dlp/yt-dlp/issues/7211)) by [rexlambert22](https://github.com/rexlambert22), [Ti4eeT4e](https://github.com/Ti4eeT4e)
+- **nebula**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/3f756c8c4095b942cf49788eb0862ceaf57847f2) ([#7156](https://github.com/yt-dlp/yt-dlp/issues/7156)) by [Lamieur](https://github.com/Lamieur), [rohieb](https://github.com/rohieb)
+- **rheinmaintv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/98cb1eda7a4cf67c96078980dbd63e6c06ad7f7c) ([#7311](https://github.com/yt-dlp/yt-dlp/issues/7311)) by [barthelmannk](https://github.com/barthelmannk)
+- **youtube**
+ - [Add `ios` to default clients used](https://github.com/yt-dlp/yt-dlp/commit/1e75d97db21152acc764b30a688e516f04b8a142) by [pukkandan](https://github.com/pukkandan)
+ - IOS is affected neither by 403 nor by nsig so helps mitigate them preemptively
+ - IOS also has higher bit-rate 'premium' formats though they are not labeled as such
+ - [Improve description parsing performance](https://github.com/yt-dlp/yt-dlp/commit/71dc18fa29263a1ff0472c23d81bfc8dd4422d48) ([#7315](https://github.com/yt-dlp/yt-dlp/issues/7315)) by [berkanteber](https://github.com/berkanteber), [pukkandan](https://github.com/pukkandan)
+ - [Improve nsig function name extraction](https://github.com/yt-dlp/yt-dlp/commit/cd810afe2ac5567c822b7424800fc470ef2d0045) by [pukkandan](https://github.com/pukkandan)
+ - [Workaround 403 for android formats](https://github.com/yt-dlp/yt-dlp/commit/81ca451480051d7ce1a31c017e005358345a9149) by [pukkandan](https://github.com/pukkandan)
+
+#### Misc. changes
+- [Revert "Add automatic duplicate issue detection"](https://github.com/yt-dlp/yt-dlp/commit/a4486bfc1dc7057efca9dd3fe70d7fa25c56f700) by [pukkandan](https://github.com/pukkandan)
+- **cleanup**
+ - Miscellaneous
+ - [7f9c6a6](https://github.com/yt-dlp/yt-dlp/commit/7f9c6a63b16e145495479e9f666f5b9e2ee69e2f) by [bashonly](https://github.com/bashonly)
+ - [812cdfa](https://github.com/yt-dlp/yt-dlp/commit/812cdfa06c33a40e73a8e04b3e6f42c084666a43) by [pukkandan](https://github.com/pukkandan)
+
+### 2023.06.21
+
+#### Important changes
+- YouTube: Improved throttling and signature fixes
+
+#### Core changes
+- [Add `--compat-option playlist-match-filter`](https://github.com/yt-dlp/yt-dlp/commit/93b39cdbd9dcf351bfa0c4ee252805b4617fdca9) by [pukkandan](https://github.com/pukkandan)
+- [Add `--no-quiet`](https://github.com/yt-dlp/yt-dlp/commit/d669772c65e8630162fd6555d0a578b246591921) by [pukkandan](https://github.com/pukkandan)
+- [Add option `--color`](https://github.com/yt-dlp/yt-dlp/commit/8417f26b8a819cd7ffcd4e000ca3e45033e670fb) ([#6904](https://github.com/yt-dlp/yt-dlp/issues/6904)) by [Grub4K](https://github.com/Grub4K)
+- [Add option `--netrc-cmd`](https://github.com/yt-dlp/yt-dlp/commit/db3ad8a67661d7b234a6954d9c6a4a9b1749f5eb) ([#6682](https://github.com/yt-dlp/yt-dlp/issues/6682)) by [NDagestad](https://github.com/NDagestad), [pukkandan](https://github.com/pukkandan)
+- [Add option `--xff`](https://github.com/yt-dlp/yt-dlp/commit/c16644642b08e2bf4130a6c5fa01395d8718c990) by [pukkandan](https://github.com/pukkandan)
+- [Auto-select default format in `-f-`](https://github.com/yt-dlp/yt-dlp/commit/372a0f3b9dadd1e52234b498aa4c7040ef868c7d) ([#7101](https://github.com/yt-dlp/yt-dlp/issues/7101)) by [ivanskodje](https://github.com/ivanskodje), [pukkandan](https://github.com/pukkandan)
+- [Deprecate internal `Youtubedl-no-compression` header](https://github.com/yt-dlp/yt-dlp/commit/955c89584b66fcd0fcfab3e611f1edeb1ca63886) ([#6876](https://github.com/yt-dlp/yt-dlp/issues/6876)) by [coletdjnz](https://github.com/coletdjnz)
+- [Do not translate newlines in `--print-to-file`](https://github.com/yt-dlp/yt-dlp/commit/9874e82b5a61582169300bea561b3e8899ad1ef7) by [pukkandan](https://github.com/pukkandan)
+- [Ensure pre-processor errors do not block `--print`](https://github.com/yt-dlp/yt-dlp/commit/f005a35aa7e4f67a0c603a946c0dd714c151b2d6) by [pukkandan](https://github.com/pukkandan) (With fixes in [17ba434](https://github.com/yt-dlp/yt-dlp/commit/17ba4343cf99701692a7f4798fd42b50f644faba))
+- [Fix `filepath` being copied to underlying format dict](https://github.com/yt-dlp/yt-dlp/commit/84078a8b38f403495d00b46654c8750774d821de) by [pukkandan](https://github.com/pukkandan)
+- [Improve HTTP redirect handling](https://github.com/yt-dlp/yt-dlp/commit/08916a49c777cb6e000eec092881eb93ec22076c) ([#7094](https://github.com/yt-dlp/yt-dlp/issues/7094)) by [coletdjnz](https://github.com/coletdjnz)
+- [Populate `filename` and `urls` fields at all stages of `--print`](https://github.com/yt-dlp/yt-dlp/commit/170605840ea9d5ad75da6576485ea7d125b428ee) by [pukkandan](https://github.com/pukkandan) (With fixes in [b5f61b6](https://github.com/yt-dlp/yt-dlp/commit/b5f61b69d4561b81fc98c226b176f0c15493e688))
+- [Relaxed validation for numeric format filters](https://github.com/yt-dlp/yt-dlp/commit/c3f624ef0a5d7a6ae1c5ffeb243087e9fc7d79dc) by [pukkandan](https://github.com/pukkandan)
+- [Support decoding multiple content encodings](https://github.com/yt-dlp/yt-dlp/commit/daafbf49b3482edae4d70dd37070be99742a926e) ([#7142](https://github.com/yt-dlp/yt-dlp/issues/7142)) by [coletdjnz](https://github.com/coletdjnz)
+- [Support loading info.json with a list at it's root](https://github.com/yt-dlp/yt-dlp/commit/ab1de9cb1e39cf421c2b7dc6756c6ff1955bb313) by [pukkandan](https://github.com/pukkandan)
+- [Workaround erroneous urllib Windows proxy parsing](https://github.com/yt-dlp/yt-dlp/commit/3f66b6fe50f8d5b545712f8b19d5ae62f5373980) ([#7092](https://github.com/yt-dlp/yt-dlp/issues/7092)) by [coletdjnz](https://github.com/coletdjnz)
+- **cookies**
+ - [Defer extraction of v11 key from keyring](https://github.com/yt-dlp/yt-dlp/commit/9b7a48abd1b187eae1e3f6c9839c47d43ccec00b) by [Grub4K](https://github.com/Grub4K)
+ - [Move `YoutubeDLCookieJar` to cookies module](https://github.com/yt-dlp/yt-dlp/commit/b87e01c123fd560b6a674ce00f45a9459d82d98a) ([#7091](https://github.com/yt-dlp/yt-dlp/issues/7091)) by [coletdjnz](https://github.com/coletdjnz)
+ - [Support custom Safari cookies path](https://github.com/yt-dlp/yt-dlp/commit/a58182b75a05fe0a10c5e94a536711d3ade19c20) ([#6783](https://github.com/yt-dlp/yt-dlp/issues/6783)) by [NextFire](https://github.com/NextFire)
+ - [Update for chromium changes](https://github.com/yt-dlp/yt-dlp/commit/b38d4c941d1993ab27e4c0f8e024e23c2ec0f8f8) ([#6897](https://github.com/yt-dlp/yt-dlp/issues/6897)) by [mbway](https://github.com/mbway)
+- **Cryptodome**: [Fix `__bool__`](https://github.com/yt-dlp/yt-dlp/commit/98ac902c4979e4529b166e873473bef42baa2e3e) by [pukkandan](https://github.com/pukkandan)
+- **jsinterp**
+ - [Do not compile regex](https://github.com/yt-dlp/yt-dlp/commit/7aeda6cc9e73ada0b0a0b6a6748c66bef63a20a8) by [pukkandan](https://github.com/pukkandan)
+ - [Fix division](https://github.com/yt-dlp/yt-dlp/commit/b4a252fba81f53631c07ca40ce7583f5d19a8a36) ([#7279](https://github.com/yt-dlp/yt-dlp/issues/7279)) by [bashonly](https://github.com/bashonly)
+ - [Fix global object extraction](https://github.com/yt-dlp/yt-dlp/commit/01aba2519a0884ef17d5f85608dbd2a455577147) by [pukkandan](https://github.com/pukkandan)
+ - [Handle `NaN` in bitwise operators](https://github.com/yt-dlp/yt-dlp/commit/1d7656184c6b8aa46b29149893894b3c24f1df00) by [pukkandan](https://github.com/pukkandan)
+ - [Handle negative numbers better](https://github.com/yt-dlp/yt-dlp/commit/7cf51f21916292cd80bdeceb37489f5322f166dd) by [pukkandan](https://github.com/pukkandan)
+- **outtmpl**
+ - [Allow `\n` in replacements and default.](https://github.com/yt-dlp/yt-dlp/commit/78fde6e3398ff11e5d383a66b28664badeab5180) by [pukkandan](https://github.com/pukkandan)
+ - [Fix some minor bugs](https://github.com/yt-dlp/yt-dlp/commit/ebe1b4e34f43c3acad30e4bcb8484681a030c114) by [pukkandan](https://github.com/pukkandan) (With fixes in [1619ab3](https://github.com/yt-dlp/yt-dlp/commit/1619ab3e67d8dc4f86fc7ed292c79345bc0d91a0))
+ - [Support `str.format` syntax inside replacements](https://github.com/yt-dlp/yt-dlp/commit/ec9311c41b111110bc52cfbd6ea682c6fb23f77a) by [pukkandan](https://github.com/pukkandan)
+- **update**
+ - [Better error handling](https://github.com/yt-dlp/yt-dlp/commit/d2e84d5eb01c66fc5304e8566348d65a7be24ed7) by [pukkandan](https://github.com/pukkandan)
+ - [Do not restart into versions without `--update-to`](https://github.com/yt-dlp/yt-dlp/commit/02948a17d903f544363bb20b51a6d8baed7bba08) by [pukkandan](https://github.com/pukkandan)
+ - [Implement `--update-to` repo](https://github.com/yt-dlp/yt-dlp/commit/665472a7de3880578c0b7b3f95c71570c056368e) by [Grub4K](https://github.com/Grub4K), [pukkandan](https://github.com/pukkandan)
+- **upstream**
+ - [Merged with youtube-dl 07af47](https://github.com/yt-dlp/yt-dlp/commit/42f2d40b475db66486a4b4fe5b56751a640db5db) by [pukkandan](https://github.com/pukkandan)
+ - [Merged with youtube-dl d1c6c5](https://github.com/yt-dlp/yt-dlp/commit/4823ec9f461512daa1b8ab362893bb86a6320b26) by [pukkandan](https://github.com/pukkandan) (With fixes in [edbe5b5](https://github.com/yt-dlp/yt-dlp/commit/edbe5b589dd0860a67b4e03f58db3cd2539d91c2) by [bashonly](https://github.com/bashonly))
+- **utils**
+ - `FormatSorter`: [Improve `size` and `br`](https://github.com/yt-dlp/yt-dlp/commit/eedda5252c05327748dede204a8fccafa0288118) by [pukkandan](https://github.com/pukkandan), [u-spec-png](https://github.com/u-spec-png)
+ - `js_to_json`: [Implement template strings](https://github.com/yt-dlp/yt-dlp/commit/0898c5c8ccadfc404472456a7a7751b72afebadd) ([#6623](https://github.com/yt-dlp/yt-dlp/issues/6623)) by [Grub4K](https://github.com/Grub4K)
+ - `locked_file`: [Fix for virtiofs](https://github.com/yt-dlp/yt-dlp/commit/45998b3e371b819ce0dbe50da703809a048cc2fe) ([#6840](https://github.com/yt-dlp/yt-dlp/issues/6840)) by [brandon-dacrib](https://github.com/brandon-dacrib)
+ - `strftime_or_none`: [Handle negative timestamps](https://github.com/yt-dlp/yt-dlp/commit/a35af4306d24c56c6358f89cdf204860d1cd62b4) by [dirkf](https://github.com/dirkf), [pukkandan](https://github.com/pukkandan)
+ - `traverse_obj`
+ - [Allow iterables in traversal](https://github.com/yt-dlp/yt-dlp/commit/21b5ec86c2c37d10c5bb97edd7051d3aac16bb3e) ([#6902](https://github.com/yt-dlp/yt-dlp/issues/6902)) by [Grub4K](https://github.com/Grub4K)
+ - [More fixes](https://github.com/yt-dlp/yt-dlp/commit/b079c26f0af8085bccdadc72c61c8164ca5ab0f8) ([#6959](https://github.com/yt-dlp/yt-dlp/issues/6959)) by [Grub4K](https://github.com/Grub4K)
+ - `write_string`: [Fix noconsole behavior](https://github.com/yt-dlp/yt-dlp/commit/3b479100df02e20dd949e046003ae96ddbfced57) by [Grub4K](https://github.com/Grub4K)
+
+#### Extractor changes
+- [Do not exit early for unsuitable `url_result`](https://github.com/yt-dlp/yt-dlp/commit/baa922b5c74b10e3b86ff5e6cf6529b3aae8efab) by [pukkandan](https://github.com/pukkandan)
+- [Do not warn for invalid chapter data in description](https://github.com/yt-dlp/yt-dlp/commit/84ffeb7d5e72e3829319ba7720a8480fc4c7503b) by [pukkandan](https://github.com/pukkandan)
+- [Extract more metadata from ISM](https://github.com/yt-dlp/yt-dlp/commit/f68434cc74cfd3db01b266476a2eac8329fbb267) by [pukkandan](https://github.com/pukkandan)
+- **abematv**: [Add fallback for title and description extraction and extract more metadata](https://github.com/yt-dlp/yt-dlp/commit/c449c0655d7c8549e6e1389c26b628053b253d39) ([#6994](https://github.com/yt-dlp/yt-dlp/issues/6994)) by [Lesmiscore](https://github.com/Lesmiscore)
+- **acast**: [Support embeds](https://github.com/yt-dlp/yt-dlp/commit/c91ac833ea99b00506e470a44cf930e4e23378c9) ([#7212](https://github.com/yt-dlp/yt-dlp/issues/7212)) by [pabs3](https://github.com/pabs3)
+- **adobepass**: [Handle `Charter_Direct` MSO as `Spectrum`](https://github.com/yt-dlp/yt-dlp/commit/ea0570820336a0fe9c3b530d1b0d1e59313274f4) ([#6824](https://github.com/yt-dlp/yt-dlp/issues/6824)) by [bashonly](https://github.com/bashonly)
+- **aeonco**: [Support Youtube embeds](https://github.com/yt-dlp/yt-dlp/commit/ed81b74802b4247ee8d9dc0ef87eb52baefede1c) ([#6591](https://github.com/yt-dlp/yt-dlp/issues/6591)) by [alexklapheke](https://github.com/alexklapheke)
+- **afreecatv**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/fdd69db38924c38194ef236b26325d66ac815c88) ([#6283](https://github.com/yt-dlp/yt-dlp/issues/6283)) by [blmarket](https://github.com/blmarket)
+- **ARDBetaMediathek**: [Add thumbnail](https://github.com/yt-dlp/yt-dlp/commit/f78eb41e1c0f1dcdb10317358a26bf541dc7ee15) ([#6890](https://github.com/yt-dlp/yt-dlp/issues/6890)) by [StefanLobbenmeier](https://github.com/StefanLobbenmeier)
+- **bibeltv**: [Fix extraction, support live streams and series](https://github.com/yt-dlp/yt-dlp/commit/4ad58667c102bd82a7c4cca8aa395ec1682e3b4c) ([#6505](https://github.com/yt-dlp/yt-dlp/issues/6505)) by [flashdagger](https://github.com/flashdagger)
+- **bilibili**
+ - [Support festival videos](https://github.com/yt-dlp/yt-dlp/commit/ab29e47029e2f5b48abbbab78e82faf7cf6e9506) ([#6547](https://github.com/yt-dlp/yt-dlp/issues/6547)) by [qbnu](https://github.com/qbnu)
+ - SpaceVideo: [Extract signature](https://github.com/yt-dlp/yt-dlp/commit/6f10cdcf7eeaeae5b75e0a4428cd649c156a2d83) ([#7149](https://github.com/yt-dlp/yt-dlp/issues/7149)) by [elyse0](https://github.com/elyse0)
+- **biliIntl**: [Add comment extraction](https://github.com/yt-dlp/yt-dlp/commit/b093c38cc9f26b59a8504211d792f053142c847d) ([#6079](https://github.com/yt-dlp/yt-dlp/issues/6079)) by [HobbyistDev](https://github.com/HobbyistDev)
+- **bitchute**: [Add more fallback subdomains](https://github.com/yt-dlp/yt-dlp/commit/0c4e0fbcade0fc92d14c2a6d63e360fe067f6192) ([#6907](https://github.com/yt-dlp/yt-dlp/issues/6907)) by [Neurognostic](https://github.com/Neurognostic)
+- **booyah**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/f7f7a877bf8e87fd4eb0ad2494ad948ca7691114) by [pukkandan](https://github.com/pukkandan)
+- **BrainPOP**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/979568f26ece80bca72b48f0dd57d676e431059a) ([#6106](https://github.com/yt-dlp/yt-dlp/issues/6106)) by [MinePlayersPE](https://github.com/MinePlayersPE)
+- **bravotv**
+ - [Detect DRM](https://github.com/yt-dlp/yt-dlp/commit/1fe5bf240e6ade487d18079a62aa36bcc440a27a) ([#7171](https://github.com/yt-dlp/yt-dlp/issues/7171)) by [bashonly](https://github.com/bashonly)
+ - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/06966cb8966b9aa4f60ab9c44c182a057d4ca3a3) ([#6568](https://github.com/yt-dlp/yt-dlp/issues/6568)) by [bashonly](https://github.com/bashonly)
+- **camfm**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/4cbfa570a1b9bd65b0f48770693377e8d842dcb0) ([#7083](https://github.com/yt-dlp/yt-dlp/issues/7083)) by [garret1317](https://github.com/garret1317)
+- **cbc**
+ - [Fix live extractor, playlist `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/7a7b1376fbce0067cf37566bb47131bc0022638d) ([#6625](https://github.com/yt-dlp/yt-dlp/issues/6625)) by [makew0rld](https://github.com/makew0rld)
+ - [Ignore 426 from API](https://github.com/yt-dlp/yt-dlp/commit/4afb208cf07b59291ae3b0c4efc83945ee5b8812) ([#6781](https://github.com/yt-dlp/yt-dlp/issues/6781)) by [jo-nike](https://github.com/jo-nike)
+ - gem: [Update `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/871c907454693940cb56906ed9ea49fcb7154829) ([#6499](https://github.com/yt-dlp/yt-dlp/issues/6499)) by [makeworld-the-better-one](https://github.com/makeworld-the-better-one)
+- **cbs**: [Add `ParamountPressExpress` extractor](https://github.com/yt-dlp/yt-dlp/commit/44369c9afa996e14e9f466754481d878811b5b4a) ([#6604](https://github.com/yt-dlp/yt-dlp/issues/6604)) by [bashonly](https://github.com/bashonly)
+- **cbsnews**: [Overhaul extractors](https://github.com/yt-dlp/yt-dlp/commit/f6e43d6fa9804c24525e1fed0a87782754dab7ed) ([#6681](https://github.com/yt-dlp/yt-dlp/issues/6681)) by [bashonly](https://github.com/bashonly)
+- **chilloutzone**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/6f4fc5660f40f3458882a8f51601eae4af7be609) ([#6445](https://github.com/yt-dlp/yt-dlp/issues/6445)) by [bashonly](https://github.com/bashonly)
+- **clipchamp**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/2f07c4c1da4361af213e5791279b9d152d2e4ce3) ([#6978](https://github.com/yt-dlp/yt-dlp/issues/6978)) by [bashonly](https://github.com/bashonly)
+- **comedycentral**: [Add support for movies](https://github.com/yt-dlp/yt-dlp/commit/66468bbf49562ff82670cbbd456c5e8448a6df34) ([#7108](https://github.com/yt-dlp/yt-dlp/issues/7108)) by [sqrtNOT](https://github.com/sqrtNOT)
+- **crtvg**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/26c517b29c8727e47948d6fff749d5297f0efb60) ([#7168](https://github.com/yt-dlp/yt-dlp/issues/7168)) by [ItzMaxTV](https://github.com/ItzMaxTV)
+- **crunchyroll**: [Rework with support for movies, music and artists](https://github.com/yt-dlp/yt-dlp/commit/032de83ea9ff2f4977d9c71a93bbc1775597b762) ([#6237](https://github.com/yt-dlp/yt-dlp/issues/6237)) by [Grub4K](https://github.com/Grub4K)
+- **dacast**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/c25cac2f8e5fbac2737a426d7778fd2f0efc5381) ([#6896](https://github.com/yt-dlp/yt-dlp/issues/6896)) by [bashonly](https://github.com/bashonly)
+- **daftsex**: [Update domain and embed player url](https://github.com/yt-dlp/yt-dlp/commit/fc5a7f9b27d2a89b1f3ca7d33a95301c21d832cd) ([#5966](https://github.com/yt-dlp/yt-dlp/issues/5966)) by [JChris246](https://github.com/JChris246)
+- **DigitalConcertHall**: [Support films](https://github.com/yt-dlp/yt-dlp/commit/55ed4ff73487feb3177b037dfc2ea527e777da3e) ([#7202](https://github.com/yt-dlp/yt-dlp/issues/7202)) by [ItzMaxTV](https://github.com/ItzMaxTV)
+- **discogs**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/6daaf21092888beff11b807cd46f832f1f9c46a0) ([#6624](https://github.com/yt-dlp/yt-dlp/issues/6624)) by [rjy](https://github.com/rjy)
+- **dlf**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/b423b6a48e0b19260bc95ab7d72d2138d7f124dc) ([#6697](https://github.com/yt-dlp/yt-dlp/issues/6697)) by [nick-cd](https://github.com/nick-cd)
+- **drtv**: [Fix radio page extraction](https://github.com/yt-dlp/yt-dlp/commit/9a06b7b1891b48cebbe275652ae8025a36d97d97) ([#6552](https://github.com/yt-dlp/yt-dlp/issues/6552)) by [viktor-enzell](https://github.com/viktor-enzell)
+- **Dumpert**: [Fix m3u8 and support new URL pattern](https://github.com/yt-dlp/yt-dlp/commit/f8ae441501596733e2b967430471643a1d7cacb8) ([#6091](https://github.com/yt-dlp/yt-dlp/issues/6091)) by [DataGhost](https://github.com/DataGhost), [pukkandan](https://github.com/pukkandan)
+- **elevensports**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/ecfe47973f6603b5367fe2cc3c65274627d94516) ([#7172](https://github.com/yt-dlp/yt-dlp/issues/7172)) by [ItzMaxTV](https://github.com/ItzMaxTV)
+- **ettutv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/83465fc4100a2fb2c188898fbc2f3021f6a9b4dd) ([#6579](https://github.com/yt-dlp/yt-dlp/issues/6579)) by [elyse0](https://github.com/elyse0)
+- **europarl**: [Rewrite extractor](https://github.com/yt-dlp/yt-dlp/commit/03789976d301eaed3e957dbc041573098f6af059) ([#7114](https://github.com/yt-dlp/yt-dlp/issues/7114)) by [HobbyistDev](https://github.com/HobbyistDev)
+- **eurosport**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/45e87ea106ad37b2a002663fa30ee41ce97b16cd) ([#7076](https://github.com/yt-dlp/yt-dlp/issues/7076)) by [HobbyistDev](https://github.com/HobbyistDev)
+- **facebook**: [Fix metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/3b52a606881e6adadc33444abdeacce562b79330) ([#6856](https://github.com/yt-dlp/yt-dlp/issues/6856)) by [ringus1](https://github.com/ringus1)
+- **foxnews**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/97d60ad8cd6c99f01e463a9acfce8693aff2a609) ([#7222](https://github.com/yt-dlp/yt-dlp/issues/7222)) by [bashonly](https://github.com/bashonly)
+- **funker530**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/cab94a0cd8b6d3fffed5a6faff030274adbed182) ([#7291](https://github.com/yt-dlp/yt-dlp/issues/7291)) by [Cyberes](https://github.com/Cyberes)
+- **generic**
+ - [Accept values for `fragment_query`, `variant_query`](https://github.com/yt-dlp/yt-dlp/commit/5cc0a8fd2e9fec50026fb92170b57993af939e4a) ([#6600](https://github.com/yt-dlp/yt-dlp/issues/6600)) by [bashonly](https://github.com/bashonly) (With fixes in [9bfe0d1](https://github.com/yt-dlp/yt-dlp/commit/9bfe0d15bd7dbdc6b0e6378fa9f5e2e289b2373b))
+ - [Add extractor-args `hls_key`, `variant_query`](https://github.com/yt-dlp/yt-dlp/commit/c2e0fc40a73dd85ab3920f977f579d475e66ef59) ([#6567](https://github.com/yt-dlp/yt-dlp/issues/6567)) by [bashonly](https://github.com/bashonly)
+ - [Attempt to detect live HLS](https://github.com/yt-dlp/yt-dlp/commit/93e7c6995e07dafb9dcc06c0d06acf6c5bdfecc5) ([#6775](https://github.com/yt-dlp/yt-dlp/issues/6775)) by [bashonly](https://github.com/bashonly)
+- **genius**: [Add support for articles](https://github.com/yt-dlp/yt-dlp/commit/460da07439718d9af1e3661da2a23e05a913a2e6) ([#6474](https://github.com/yt-dlp/yt-dlp/issues/6474)) by [bashonly](https://github.com/bashonly)
+- **globalplayer**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/30647668a92a0ca5cd108776804baac0996bd9f7) ([#6903](https://github.com/yt-dlp/yt-dlp/issues/6903)) by [garret1317](https://github.com/garret1317)
+- **gmanetwork**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/2d97d154fe4fb84fe2ed3a4e1ed5819e89b71e88) ([#5945](https://github.com/yt-dlp/yt-dlp/issues/5945)) by [HobbyistDev](https://github.com/HobbyistDev)
+- **gronkh**: [Extract duration and chapters](https://github.com/yt-dlp/yt-dlp/commit/9c92b803fa24e48543ce969468d5404376e315b7) ([#6817](https://github.com/yt-dlp/yt-dlp/issues/6817)) by [satan1st](https://github.com/satan1st)
+- **hentaistigma**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/04f8018a0544736a18494bc3899d06b05b78fae6) by [pukkandan](https://github.com/pukkandan)
+- **hidive**: [Fix login](https://github.com/yt-dlp/yt-dlp/commit/e6ab678e36c40ded0aae305bbb866cdab554d417) by [pukkandan](https://github.com/pukkandan)
+- **hollywoodreporter**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/6bdb64e2a2a6d504d8ce1dc830fbfb8a7f199c63) ([#6614](https://github.com/yt-dlp/yt-dlp/issues/6614)) by [bashonly](https://github.com/bashonly)
+- **hotstar**: [Support `/shows/` URLs](https://github.com/yt-dlp/yt-dlp/commit/7f8ddebbb51c9fd4a347306332a718ba41b371b8) ([#7225](https://github.com/yt-dlp/yt-dlp/issues/7225)) by [bashonly](https://github.com/bashonly)
+- **hrefli**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/7e35526d5b970a034b9d76215ee3e4bd7631edcd) ([#6762](https://github.com/yt-dlp/yt-dlp/issues/6762)) by [selfisekai](https://github.com/selfisekai)
+- **idolplus**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/5c14b213679ed4401288bdc86ae696932e219222) ([#6732](https://github.com/yt-dlp/yt-dlp/issues/6732)) by [ping](https://github.com/ping)
+- **iq**: [Set more language codes](https://github.com/yt-dlp/yt-dlp/commit/2d5cae9636714ff922d28c548c349d5f2b48f317) ([#6476](https://github.com/yt-dlp/yt-dlp/issues/6476)) by [D0LLYNH0](https://github.com/D0LLYNH0)
+- **iwara**
+ - [Accept old URLs](https://github.com/yt-dlp/yt-dlp/commit/ab92d8651c48d247dfb7d3f0a824cc986e47c7ed) by [Lesmiscore](https://github.com/Lesmiscore)
+ - [Fix authentication](https://github.com/yt-dlp/yt-dlp/commit/0a5d7c39e17bb9bd50c9db42bcad40eb82d7f784) ([#7137](https://github.com/yt-dlp/yt-dlp/issues/7137)) by [toomyzoom](https://github.com/toomyzoom)
+ - [Fix format sorting](https://github.com/yt-dlp/yt-dlp/commit/56793f74c36899742d7abd52afb0deca97d469e1) ([#6651](https://github.com/yt-dlp/yt-dlp/issues/6651)) by [hasezoey](https://github.com/hasezoey)
+ - [Fix typo](https://github.com/yt-dlp/yt-dlp/commit/d1483ec693c79f0b4ddf493870bcb840aca4da08) by [Lesmiscore](https://github.com/Lesmiscore)
+ - [Implement login](https://github.com/yt-dlp/yt-dlp/commit/21b9413cf7dd4830b2ece57af21589dd4538fc52) ([#6721](https://github.com/yt-dlp/yt-dlp/issues/6721)) by [toomyzoom](https://github.com/toomyzoom)
+ - [Overhaul extractors](https://github.com/yt-dlp/yt-dlp/commit/c14af7a741931b364bab3d9546c0f4359f318f8c) ([#6557](https://github.com/yt-dlp/yt-dlp/issues/6557)) by [Lesmiscore](https://github.com/Lesmiscore)
+ - [Report private videos](https://github.com/yt-dlp/yt-dlp/commit/95a383be1b6fb00c92ee3fb091732c4f6009acb6) ([#6641](https://github.com/yt-dlp/yt-dlp/issues/6641)) by [Lesmiscore](https://github.com/Lesmiscore)
+- **JStream**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/3459d3c5af3b2572ed51e8ecfda6c11022a838c6) ([#6252](https://github.com/yt-dlp/yt-dlp/issues/6252)) by [Lesmiscore](https://github.com/Lesmiscore)
+- **jwplatform**: [Update `_extract_embed_urls`](https://github.com/yt-dlp/yt-dlp/commit/cf9fd52fabe71d6e7c30d3ea525029ffa561fc9c) ([#6383](https://github.com/yt-dlp/yt-dlp/issues/6383)) by [carusocr](https://github.com/carusocr)
+- **kick**: [Make initial request non-fatal](https://github.com/yt-dlp/yt-dlp/commit/0a6918a4a1431960181d8c50e0bbbcb0afbaff9a) by [bashonly](https://github.com/bashonly)
+- **LastFM**: [Rewrite playlist extraction](https://github.com/yt-dlp/yt-dlp/commit/026435714cb7c39613a0d7d2acd15d3823b78d94) ([#6379](https://github.com/yt-dlp/yt-dlp/issues/6379)) by [hatienl0i261299](https://github.com/hatienl0i261299), [pukkandan](https://github.com/pukkandan)
+- **lbry**: [Extract original quality formats](https://github.com/yt-dlp/yt-dlp/commit/44c0d66442b568d9e1359e669d8b029b08a77fa7) ([#7257](https://github.com/yt-dlp/yt-dlp/issues/7257)) by [bashonly](https://github.com/bashonly)
+- **line**: [Remove extractors](https://github.com/yt-dlp/yt-dlp/commit/faa0332ed69e070cf3bd31390589a596e962f392) ([#6734](https://github.com/yt-dlp/yt-dlp/issues/6734)) by [sian1468](https://github.com/sian1468)
+- **livestream**: [Support videos with account id](https://github.com/yt-dlp/yt-dlp/commit/bfdf144c7e5d7a93fbfa9d8e65598c72bf2b542a) ([#6324](https://github.com/yt-dlp/yt-dlp/issues/6324)) by [theperfectpunk](https://github.com/theperfectpunk)
+- **medaltv**: [Fix clips](https://github.com/yt-dlp/yt-dlp/commit/1e3c2b6ec28d7ab5e31341fa93c47b65be4fbff4) ([#6502](https://github.com/yt-dlp/yt-dlp/issues/6502)) by [xenova](https://github.com/xenova)
+- **mediastream**: [Improve `WinSports` and embed extraction](https://github.com/yt-dlp/yt-dlp/commit/03025b6e105139d01cd415ddc51fd692957fd2ba) ([#6426](https://github.com/yt-dlp/yt-dlp/issues/6426)) by [bashonly](https://github.com/bashonly)
+- **mgtv**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/59d9fe08312bbb76ee26238d207a8ca35410a48d) ([#7234](https://github.com/yt-dlp/yt-dlp/issues/7234)) by [bashonly](https://github.com/bashonly)
+- **Mzaalo**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/dc3c44f349ba85af320e706e2a27ad81a78b1c6e) ([#7163](https://github.com/yt-dlp/yt-dlp/issues/7163)) by [ItzMaxTV](https://github.com/ItzMaxTV)
+- **nbc**: [Fix `NBCStations` direct mp4 formats](https://github.com/yt-dlp/yt-dlp/commit/9be0fe1fd967f62cbf3c60bd14e1021a70abc147) ([#6637](https://github.com/yt-dlp/yt-dlp/issues/6637)) by [bashonly](https://github.com/bashonly)
+- **nebula**: [Add `beta.nebula.tv`](https://github.com/yt-dlp/yt-dlp/commit/cbfe2e5cbe0f4649a91e323a82b8f5f774f36662) ([#6516](https://github.com/yt-dlp/yt-dlp/issues/6516)) by [unbeatable-101](https://github.com/unbeatable-101)
+- **nekohacker**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/489f51279d00318018478fd7461eddbe3b45297e) ([#7003](https://github.com/yt-dlp/yt-dlp/issues/7003)) by [hasezoey](https://github.com/hasezoey)
+- **nhk**
+ - [Add `NhkRadiru` extractor](https://github.com/yt-dlp/yt-dlp/commit/8f0be90ecb3b8d862397177bb226f17b245ef933) ([#6819](https://github.com/yt-dlp/yt-dlp/issues/6819)) by [garret1317](https://github.com/garret1317)
+ - [Fix API extraction](https://github.com/yt-dlp/yt-dlp/commit/f41b949a2ef646fbc36375febbe3f0c19d742c0f) ([#7180](https://github.com/yt-dlp/yt-dlp/issues/7180)) by [menschel](https://github.com/menschel), [sjthespian](https://github.com/sjthespian)
+ - `NhkRadiruLive`: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/81c8b9bdd9841b72cbfc1bbff9dab5fb4aa038b0) ([#7332](https://github.com/yt-dlp/yt-dlp/issues/7332)) by [garret1317](https://github.com/garret1317)
+- **niconico**
+ - [Download comments from the new endpoint](https://github.com/yt-dlp/yt-dlp/commit/52ecc33e221f7de7eb6fed6c22489f0c5fdd2c6d) ([#6773](https://github.com/yt-dlp/yt-dlp/issues/6773)) by [Lesmiscore](https://github.com/Lesmiscore)
+ - live: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/f8f9250fe280d37f0988646cd5cc0072f4d33a6d) ([#5764](https://github.com/yt-dlp/yt-dlp/issues/5764)) by [Lesmiscore](https://github.com/Lesmiscore)
+ - series: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/c86e433c35fe5da6cb29f3539eef97497f84ed38) ([#6898](https://github.com/yt-dlp/yt-dlp/issues/6898)) by [sqrtNOT](https://github.com/sqrtNOT)
+- **nubilesporn**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/d4e6ef40772e0560a8ed33b844ef7549e86837be) ([#6231](https://github.com/yt-dlp/yt-dlp/issues/6231)) by [permunkle](https://github.com/permunkle)
+- **odnoklassniki**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/1a2eb5bda51d8b7a78a65acebf72a0dcf9da196b) ([#7217](https://github.com/yt-dlp/yt-dlp/issues/7217)) by [bashonly](https://github.com/bashonly)
+- **opencast**
+ - [Add ltitools to `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/3588be59cee429a0ab5c4ceb2f162298bb44147d) ([#6371](https://github.com/yt-dlp/yt-dlp/issues/6371)) by [C0D3D3V](https://github.com/C0D3D3V)
+ - [Fix format bug](https://github.com/yt-dlp/yt-dlp/commit/89dbf0848370deaa55af88c3593a2a264124caf5) ([#6512](https://github.com/yt-dlp/yt-dlp/issues/6512)) by [C0D3D3V](https://github.com/C0D3D3V)
+- **owncloud**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/c6d4b82a8b8bce59b1c9ce5e6d349ea428dac0a7) ([#6533](https://github.com/yt-dlp/yt-dlp/issues/6533)) by [C0D3D3V](https://github.com/C0D3D3V)
+- **Parler**: [Rewrite extractor](https://github.com/yt-dlp/yt-dlp/commit/80ea6d3dea8483cddd39fc89b5ee1fc06670c33c) ([#6446](https://github.com/yt-dlp/yt-dlp/issues/6446)) by [JChris246](https://github.com/JChris246)
+- **pgatour**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/3ae182ad89e1427ff7b1684d6a44ff93fa857a0c) ([#6613](https://github.com/yt-dlp/yt-dlp/issues/6613)) by [bashonly](https://github.com/bashonly)
+- **playsuisse**: [Support new url format](https://github.com/yt-dlp/yt-dlp/commit/94627c5dde12a72766bdba36e056916c29c40ed1) ([#6528](https://github.com/yt-dlp/yt-dlp/issues/6528)) by [sbor23](https://github.com/sbor23)
+- **polskieradio**: [Improve extractors](https://github.com/yt-dlp/yt-dlp/commit/738c90a463257634455ada3e5c18b714c531dede) ([#5948](https://github.com/yt-dlp/yt-dlp/issues/5948)) by [selfisekai](https://github.com/selfisekai)
+- **pornez**: [Support new URL formats](https://github.com/yt-dlp/yt-dlp/commit/cbdf9408e6f1e35e98fd6477b3d6902df5b8a47f) ([#6792](https://github.com/yt-dlp/yt-dlp/issues/6792)) by [zhgwn](https://github.com/zhgwn)
+- **pornhub**: [Set access cookies to fix extraction](https://github.com/yt-dlp/yt-dlp/commit/62beefa818c75c20b6941389bb197051554a5d41) ([#6685](https://github.com/yt-dlp/yt-dlp/issues/6685)) by [arobase-che](https://github.com/arobase-che), [Schmoaaaaah](https://github.com/Schmoaaaaah)
+- **rai**: [Rewrite extractors](https://github.com/yt-dlp/yt-dlp/commit/c6d3f81a4077aaf9cffc6aa2d0dec92f38e74bb0) ([#5940](https://github.com/yt-dlp/yt-dlp/issues/5940)) by [danog](https://github.com/danog), [nixxo](https://github.com/nixxo)
+- **recurbate**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/c2502cfed91415c7ccfff925fd3404d230046484) ([#6297](https://github.com/yt-dlp/yt-dlp/issues/6297)) by [mrscrapy](https://github.com/mrscrapy)
+- **reddit**
+ - [Add login support](https://github.com/yt-dlp/yt-dlp/commit/4d9280c9c853733534dda60486fa949bcca36c9e) ([#6950](https://github.com/yt-dlp/yt-dlp/issues/6950)) by [bashonly](https://github.com/bashonly)
+ - [Support cookies and short URLs](https://github.com/yt-dlp/yt-dlp/commit/7a6f6f24592a8065376f11a58e44878807732cf6) ([#6825](https://github.com/yt-dlp/yt-dlp/issues/6825)) by [bashonly](https://github.com/bashonly)
+- **rokfin**: [Re-construct manifest url](https://github.com/yt-dlp/yt-dlp/commit/7a6c8a0807941dd24fbf0d6172e811884f98e027) ([#6507](https://github.com/yt-dlp/yt-dlp/issues/6507)) by [vampirefrog](https://github.com/vampirefrog)
+- **rottentomatoes**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/2d306c03d6f2697fcbabb7da35aa62cc078359d3) ([#6844](https://github.com/yt-dlp/yt-dlp/issues/6844)) by [JChris246](https://github.com/JChris246)
+- **rozhlas**
+ - [Extract manifest formats](https://github.com/yt-dlp/yt-dlp/commit/e4cf7741f9302b3faa092962f2895b55cb3d89bb) ([#6590](https://github.com/yt-dlp/yt-dlp/issues/6590)) by [bashonly](https://github.com/bashonly)
+ - `MujRozhlas`: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/c2b801fea59628d5c873e06a0727fbf2051bbd1f) ([#7129](https://github.com/yt-dlp/yt-dlp/issues/7129)) by [stanoarn](https://github.com/stanoarn)
+- **rtvc**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/9b30cd3dfce83c2f0201b28a7a3ef44ab9722664) ([#6578](https://github.com/yt-dlp/yt-dlp/issues/6578)) by [elyse0](https://github.com/elyse0)
+- **rumble**
+ - [Detect timeline format](https://github.com/yt-dlp/yt-dlp/commit/78bc1868ff3352108ab2911033d1ac67a55f151e) by [pukkandan](https://github.com/pukkandan)
+ - [Fix videos without quality selection](https://github.com/yt-dlp/yt-dlp/commit/6994afc030d2a786d8032075ed71a14d7eac5a4f) by [pukkandan](https://github.com/pukkandan)
+- **sbs**: [Overhaul extractor for new API](https://github.com/yt-dlp/yt-dlp/commit/6a765f135ccb654861336ea27a2c1c24ea8e286f) ([#6839](https://github.com/yt-dlp/yt-dlp/issues/6839)) by [bashonly](https://github.com/bashonly), [dirkf](https://github.com/dirkf), [vidiot720](https://github.com/vidiot720)
+- **shemaroome**: [Pass `stream_key` header to downloader](https://github.com/yt-dlp/yt-dlp/commit/7bc92517463f5766e9d9b92c3823b5cf403c0e3d) ([#7224](https://github.com/yt-dlp/yt-dlp/issues/7224)) by [bashonly](https://github.com/bashonly)
+- **sonyliv**: [Fix login with token](https://github.com/yt-dlp/yt-dlp/commit/4815d35c191e7d375b94492a6486dd2ba43a8954) ([#7223](https://github.com/yt-dlp/yt-dlp/issues/7223)) by [bashonly](https://github.com/bashonly)
+- **stageplus**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/e5265dc6517478e589ee3c1ff0cb19bdf4e35ce1) ([#6838](https://github.com/yt-dlp/yt-dlp/issues/6838)) by [bashonly](https://github.com/bashonly)
+- **stripchat**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/f9213f8a2d7ba46b912afe1dd3ce6bb700a33d72) ([#7306](https://github.com/yt-dlp/yt-dlp/issues/7306)) by [foreignBlade](https://github.com/foreignBlade)
+- **substack**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/12037d8b0a578fcc78a5c8f98964e48ee6060e25) ([#7218](https://github.com/yt-dlp/yt-dlp/issues/7218)) by [bashonly](https://github.com/bashonly)
+- **sverigesradio**: [Support slug URLs](https://github.com/yt-dlp/yt-dlp/commit/5ee9a7d6e18ceea956e831994cf11c423979354f) ([#7220](https://github.com/yt-dlp/yt-dlp/issues/7220)) by [bashonly](https://github.com/bashonly)
+- **tagesschau**: [Fix single audio urls](https://github.com/yt-dlp/yt-dlp/commit/af7585c824a1e405bd8afa46d87b4be322edc93c) ([#6626](https://github.com/yt-dlp/yt-dlp/issues/6626)) by [flashdagger](https://github.com/flashdagger)
+- **teamcoco**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/c459d45dd4d417fb80a52e1a04e607776a44baa4) ([#6437](https://github.com/yt-dlp/yt-dlp/issues/6437)) by [bashonly](https://github.com/bashonly)
+- **telecaribe**: [Expand livestream support](https://github.com/yt-dlp/yt-dlp/commit/69b2f838d3d3e37dc17367ef64d978db1bea45cf) ([#6601](https://github.com/yt-dlp/yt-dlp/issues/6601)) by [bashonly](https://github.com/bashonly)
+- **tencent**: [Fix fatal metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/971d901d129403e875a04dd92109507a03fbc070) ([#7219](https://github.com/yt-dlp/yt-dlp/issues/7219)) by [bashonly](https://github.com/bashonly)
+- **thesun**: [Update `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/0181b9a1b31db3fde943f7cd3fe9662f23bff292) ([#6522](https://github.com/yt-dlp/yt-dlp/issues/6522)) by [hatienl0i261299](https://github.com/hatienl0i261299)
+- **tiktok**
+ - [Extract 1080p adaptive formats](https://github.com/yt-dlp/yt-dlp/commit/c2a1bdb00931969193f2a31ea27b9c66a07aaec2) ([#7228](https://github.com/yt-dlp/yt-dlp/issues/7228)) by [bashonly](https://github.com/bashonly)
+ - [Fix and improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/925936908a3c3ee0e508621db14696b9f6a8b563) ([#6777](https://github.com/yt-dlp/yt-dlp/issues/6777)) by [bashonly](https://github.com/bashonly)
+ - [Fix mp3 formats](https://github.com/yt-dlp/yt-dlp/commit/8ceb07e870424c219dced8f4348729553f05c5cc) ([#6615](https://github.com/yt-dlp/yt-dlp/issues/6615)) by [bashonly](https://github.com/bashonly)
+ - [Fix resolution extraction](https://github.com/yt-dlp/yt-dlp/commit/ab6057ec80aa75db6303b8206916d00c376c622c) ([#7237](https://github.com/yt-dlp/yt-dlp/issues/7237)) by [puc9](https://github.com/puc9)
+ - [Improve `TikTokLive` extractor](https://github.com/yt-dlp/yt-dlp/commit/216bcb66d7dce0762767d751dad10650cb57da9d) ([#6520](https://github.com/yt-dlp/yt-dlp/issues/6520)) by [bashonly](https://github.com/bashonly)
+- **triller**: [Support short URLs, detect removed videos](https://github.com/yt-dlp/yt-dlp/commit/33b737bedf8383c0d00d4e1d06a5273dcdfdb756) ([#6636](https://github.com/yt-dlp/yt-dlp/issues/6636)) by [bashonly](https://github.com/bashonly)
+- **tv4**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/125ffaa1737dd04716f2f6fbb0595ad3eb7a4b1c) ([#5649](https://github.com/yt-dlp/yt-dlp/issues/5649)) by [dirkf](https://github.com/dirkf), [TxI5](https://github.com/TxI5)
+- **tvp**: [Use new API](https://github.com/yt-dlp/yt-dlp/commit/0c7ce146e4d2a84e656d78f6857952bfd25ab389) ([#6989](https://github.com/yt-dlp/yt-dlp/issues/6989)) by [selfisekai](https://github.com/selfisekai)
+- **tvplay**: [Remove outdated domains](https://github.com/yt-dlp/yt-dlp/commit/937264419f9bf375d5656785ae6e53282587c15d) ([#7106](https://github.com/yt-dlp/yt-dlp/issues/7106)) by [ivanskodje](https://github.com/ivanskodje)
+- **twitch**
+ - [Extract original size thumbnail](https://github.com/yt-dlp/yt-dlp/commit/80b732b7a9585b2a61e456dc0d2d014a439cbaee) ([#6629](https://github.com/yt-dlp/yt-dlp/issues/6629)) by [JC-Chung](https://github.com/JC-Chung)
+ - [Fix `is_live`](https://github.com/yt-dlp/yt-dlp/commit/0551511b45f7847f40e4314aa9e624e80d086539) ([#6500](https://github.com/yt-dlp/yt-dlp/issues/6500)) by [elyse0](https://github.com/elyse0)
+ - [Support mobile clips](https://github.com/yt-dlp/yt-dlp/commit/02312c03cf53eb1da24c9ad022ee79af26060733) ([#6699](https://github.com/yt-dlp/yt-dlp/issues/6699)) by [bepvte](https://github.com/bepvte)
+ - [Update `_CLIENT_ID` and add extractor-arg](https://github.com/yt-dlp/yt-dlp/commit/01231feb142e80828985aabdec04ac608e3d43e2) ([#7200](https://github.com/yt-dlp/yt-dlp/issues/7200)) by [bashonly](https://github.com/bashonly)
+ - vod: [Support links from schedule tab](https://github.com/yt-dlp/yt-dlp/commit/dbce5afa6bb61f6272ade613f2e9a3d66b88c7ea) ([#7071](https://github.com/yt-dlp/yt-dlp/issues/7071)) by [falbrechtskirchinger](https://github.com/falbrechtskirchinger)
+- **twitter**
+ - [Add login support](https://github.com/yt-dlp/yt-dlp/commit/d1795f4a6af99c976c9d3ea2dabe5cf4f8965d3c) ([#7258](https://github.com/yt-dlp/yt-dlp/issues/7258)) by [bashonly](https://github.com/bashonly)
+ - [Default to GraphQL, handle auth errors](https://github.com/yt-dlp/yt-dlp/commit/147e62fc584c3ea6fdb09bb7a47905df68553a22) ([#6957](https://github.com/yt-dlp/yt-dlp/issues/6957)) by [bashonly](https://github.com/bashonly)
+ - spaces: [Add `release_timestamp`](https://github.com/yt-dlp/yt-dlp/commit/1c16d9df5330819cc79ad588b24aa5b72765c168) ([#7186](https://github.com/yt-dlp/yt-dlp/issues/7186)) by [CeruleanSky](https://github.com/CeruleanSky)
+- **urplay**: [Extract all subtitles](https://github.com/yt-dlp/yt-dlp/commit/7bcd4813215ac98daa4949af2ffc677c78307a38) ([#7309](https://github.com/yt-dlp/yt-dlp/issues/7309)) by [hoaluvn](https://github.com/hoaluvn)
+- **voot**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/4f7b11cc1c1cebf598107e00cd7295588ed484da) ([#7227](https://github.com/yt-dlp/yt-dlp/issues/7227)) by [bashonly](https://github.com/bashonly)
+- **vrt**: [Overhaul extractors](https://github.com/yt-dlp/yt-dlp/commit/1a7dcca378e80a387923ee05c250d8ba122441c6) ([#6244](https://github.com/yt-dlp/yt-dlp/issues/6244)) by [bashonly](https://github.com/bashonly), [bergoid](https://github.com/bergoid), [jeroenj](https://github.com/jeroenj)
+- **weverse**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/b844a3f8b16500663e7ab6c6ec061cc9b30f71ac) ([#6711](https://github.com/yt-dlp/yt-dlp/issues/6711)) by [bashonly](https://github.com/bashonly) (With fixes in [fd5d93f](https://github.com/yt-dlp/yt-dlp/commit/fd5d93f7040f9776fd541f4e4079dad7d3b3fb4f))
+- **wevidi**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/1ea15603d852971ed7d92f4de12808b27b3d9370) ([#6868](https://github.com/yt-dlp/yt-dlp/issues/6868)) by [truedread](https://github.com/truedread)
+- **weyyak**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/6dc00acf0f1f1107a626c21befd1691403e6aeeb) ([#7124](https://github.com/yt-dlp/yt-dlp/issues/7124)) by [ItzMaxTV](https://github.com/ItzMaxTV)
+- **whyp**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/2c566ed14101673c651c08c306c30fa5b4010b85) ([#6803](https://github.com/yt-dlp/yt-dlp/issues/6803)) by [CoryTibbettsDev](https://github.com/CoryTibbettsDev)
+- **wrestleuniverse**
+ - [Fix cookies support](https://github.com/yt-dlp/yt-dlp/commit/c8561c6d03f025268d6d3972abeb47987c8d7cbb) by [bashonly](https://github.com/bashonly)
+ - [Fix extraction, add login](https://github.com/yt-dlp/yt-dlp/commit/ef8fb7f029b816dfc95600727d84400591a3b5c5) ([#6982](https://github.com/yt-dlp/yt-dlp/issues/6982)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K)
+- **wykop**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/aed945e1b9b7d3af2a907e1a12e6508cc81d6a20) ([#6140](https://github.com/yt-dlp/yt-dlp/issues/6140)) by [selfisekai](https://github.com/selfisekai)
+- **ximalaya**: [Sort playlist entries](https://github.com/yt-dlp/yt-dlp/commit/8790ea7b2536332777bce68590386b1aa935fac7) ([#7292](https://github.com/yt-dlp/yt-dlp/issues/7292)) by [linsui](https://github.com/linsui)
+- **YahooGyaOIE, YahooGyaOPlayerIE**: [Delete extractors due to website close](https://github.com/yt-dlp/yt-dlp/commit/68be95bd0ca3f76aa63c9812935bd826b3a42e53) ([#6218](https://github.com/yt-dlp/yt-dlp/issues/6218)) by [Lesmiscore](https://github.com/Lesmiscore)
+- **yappy**: YappyProfile: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/6f69101dc912690338d32e2aab085c32e44eba3f) ([#7346](https://github.com/yt-dlp/yt-dlp/issues/7346)) by [7vlad7](https://github.com/7vlad7)
+- **youku**: [Improve error message](https://github.com/yt-dlp/yt-dlp/commit/ef0848abd425dfda6db62baa8d72897eefb0007f) ([#6690](https://github.com/yt-dlp/yt-dlp/issues/6690)) by [carusocr](https://github.com/carusocr)
+- **youporn**: [Extract m3u8 formats](https://github.com/yt-dlp/yt-dlp/commit/ddae33754ae1f32dd9c64cf895c47d20f6b5f336) by [pukkandan](https://github.com/pukkandan)
+- **youtube**
+ - [Add client name to `format_note` when `-v`](https://github.com/yt-dlp/yt-dlp/commit/c795c39f27244cbce846067891827e4847036441) ([#6254](https://github.com/yt-dlp/yt-dlp/issues/6254)) by [Lesmiscore](https://github.com/Lesmiscore), [pukkandan](https://github.com/pukkandan)
+ - [Add extractor-arg `include_duplicate_formats`](https://github.com/yt-dlp/yt-dlp/commit/86cb922118b236306310a72657f70426c20e28bb) by [pukkandan](https://github.com/pukkandan)
+ - [Bypass throttling for `-f17`](https://github.com/yt-dlp/yt-dlp/commit/c9abebb851e6188cb34b9eb744c1863dd46af919) by [pukkandan](https://github.com/pukkandan)
+ - [Construct fragment list lazily](https://github.com/yt-dlp/yt-dlp/commit/2a23d92d9ec44a0168079e38bcf3d383e5c4c7bb) by [pukkandan](https://github.com/pukkandan) (With fixes in [e389d17](https://github.com/yt-dlp/yt-dlp/commit/e389d172b6f42e4f332ae679dc48543fb7b9b61d))
+ - [Define strict uploader metadata mapping](https://github.com/yt-dlp/yt-dlp/commit/7666b93604b97e9ada981c6b04ccf5605dd1bd44) ([#6384](https://github.com/yt-dlp/yt-dlp/issues/6384)) by [coletdjnz](https://github.com/coletdjnz)
+ - [Determine audio language using automatic captions](https://github.com/yt-dlp/yt-dlp/commit/ff9b0e071ffae5543cc309e6f9e647ac51e5846e) by [pukkandan](https://github.com/pukkandan)
+ - [Extract `channel_is_verified`](https://github.com/yt-dlp/yt-dlp/commit/8213ce28a485e200f6a7e1af1434a987c8e702bd) ([#7213](https://github.com/yt-dlp/yt-dlp/issues/7213)) by [coletdjnz](https://github.com/coletdjnz)
+ - [Extract `heatmap` data](https://github.com/yt-dlp/yt-dlp/commit/5caf30dbc34f10b0be60676fece635b5c59f0d72) ([#7100](https://github.com/yt-dlp/yt-dlp/issues/7100)) by [tntmod54321](https://github.com/tntmod54321)
+ - [Extract more metadata for comments](https://github.com/yt-dlp/yt-dlp/commit/c35448b7b14113b35c4415dbfbf488c4731f006f) ([#7179](https://github.com/yt-dlp/yt-dlp/issues/7179)) by [coletdjnz](https://github.com/coletdjnz)
+ - [Extract uploader metadata for feed/playlist items](https://github.com/yt-dlp/yt-dlp/commit/93e12ed76ef49252dc6869b59d21d0777e5e11af) by [coletdjnz](https://github.com/coletdjnz)
+ - [Fix comment loop detection for pinned comments](https://github.com/yt-dlp/yt-dlp/commit/141a8dff98874a426d7fbe772e0a8421bb42656f) ([#6714](https://github.com/yt-dlp/yt-dlp/issues/6714)) by [coletdjnz](https://github.com/coletdjnz)
+ - [Fix continuation loop with no comments](https://github.com/yt-dlp/yt-dlp/commit/18f8fba7c89a87f99cc3313a1795848867e84fff) ([#7148](https://github.com/yt-dlp/yt-dlp/issues/7148)) by [coletdjnz](https://github.com/coletdjnz)
+ - [Fix parsing `comment_count`](https://github.com/yt-dlp/yt-dlp/commit/071670cbeaa01ddf2cc20a95ae6da25f8f086431) ([#6523](https://github.com/yt-dlp/yt-dlp/issues/6523)) by [nick-cd](https://github.com/nick-cd)
+ - [Handle incomplete initial data from watch page](https://github.com/yt-dlp/yt-dlp/commit/607510b9f2f67bfe7d33d74031a5c1fe22a24862) ([#6510](https://github.com/yt-dlp/yt-dlp/issues/6510)) by [coletdjnz](https://github.com/coletdjnz)
+ - [Ignore wrong fps of some formats](https://github.com/yt-dlp/yt-dlp/commit/97afb093d4cbe5df889145afa5f9ede4535e93e4) by [pukkandan](https://github.com/pukkandan)
+ - [Misc cleanup](https://github.com/yt-dlp/yt-dlp/commit/14a14335b280766fbf5a469ae26836d6c1fe450a) by [coletdjnz](https://github.com/coletdjnz)
+ - [Prioritize premium formats](https://github.com/yt-dlp/yt-dlp/commit/51a07b0dca4c079d58311c19b6d1c097c24bb021) by [pukkandan](https://github.com/pukkandan)
+ - [Revert default formats to `https`](https://github.com/yt-dlp/yt-dlp/commit/c6786ff3baaf72a5baa4d56d34058e54cbcf8ceb) by [pukkandan](https://github.com/pukkandan)
+ - [Support podcasts and releases tabs](https://github.com/yt-dlp/yt-dlp/commit/447afb9eaa65bc677e3245c83e53a8e69c174a3c) by [coletdjnz](https://github.com/coletdjnz)
+ - [Support shorter relative time format](https://github.com/yt-dlp/yt-dlp/commit/2fb35f6004c7625f0dd493da4a5abf0690f7777c) ([#7191](https://github.com/yt-dlp/yt-dlp/issues/7191)) by [coletdjnz](https://github.com/coletdjnz)
+ - music_search_url: [Extract title](https://github.com/yt-dlp/yt-dlp/commit/69a40e4a7f6caa5662527ebd2f3c4e8aa02857a2) ([#7102](https://github.com/yt-dlp/yt-dlp/issues/7102)) by [kangalio](https://github.com/kangalio)
+- **zaiko**
+ - [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/345b4c0aedd9d19898ce00d5cef35fe0d277a052) ([#7254](https://github.com/yt-dlp/yt-dlp/issues/7254)) by [c-basalt](https://github.com/c-basalt)
+ - ZaikoETicket: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/5cc09c004bd5edbbada9b041c08a720cadc4f4df) ([#7347](https://github.com/yt-dlp/yt-dlp/issues/7347)) by [pzhlkj6612](https://github.com/pzhlkj6612)
+- **zdf**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/ee0ed0338df328cd986f97315c8162b5a151476d) by [bashonly](https://github.com/bashonly)
+- **zee5**: [Fix extraction of new content](https://github.com/yt-dlp/yt-dlp/commit/9d7fde89a40360396f0baa2ee8bf507f92108b32) ([#7280](https://github.com/yt-dlp/yt-dlp/issues/7280)) by [bashonly](https://github.com/bashonly)
+- **zingmp3**: [Fix and improve extractors](https://github.com/yt-dlp/yt-dlp/commit/17d7ca84ea723c20668bd9bfa938be7ea0e64f6b) ([#6367](https://github.com/yt-dlp/yt-dlp/issues/6367)) by [hatienl0i261299](https://github.com/hatienl0i261299)
+- **zoom**
+ - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/79c77e85b70ae3b9942d5a88c14d021a9bd24222) ([#6741](https://github.com/yt-dlp/yt-dlp/issues/6741)) by [shreyasminocha](https://github.com/shreyasminocha)
+ - [Fix share URL extraction](https://github.com/yt-dlp/yt-dlp/commit/90c1f5120694105496a6ad9e3ecfc6c25de6cae1) ([#6789](https://github.com/yt-dlp/yt-dlp/issues/6789)) by [bashonly](https://github.com/bashonly)
+
+#### Downloader changes
+- **curl**: [Fix progress reporting](https://github.com/yt-dlp/yt-dlp/commit/66aeaac9aa30b5959069ba84e53a5508232deb38) by [pukkandan](https://github.com/pukkandan)
+- **fragment**: [Do not sleep between fragments](https://github.com/yt-dlp/yt-dlp/commit/424f3bf03305088df6e01d62f7311be8601ad3f4) by [pukkandan](https://github.com/pukkandan)
+
+#### Postprocessor changes
+- [Fix chapters if duration is not extracted](https://github.com/yt-dlp/yt-dlp/commit/01ddec7e661bf90dc4c34e6924eb9d7629886cef) ([#6037](https://github.com/yt-dlp/yt-dlp/issues/6037)) by [bashonly](https://github.com/bashonly)
+- [Print newline for `--progress-template`](https://github.com/yt-dlp/yt-dlp/commit/13ff78095372fd98900a32572cf817994c07ccb5) by [pukkandan](https://github.com/pukkandan)
+- **EmbedThumbnail, FFmpegMetadata**: [Fix error on attaching thumbnails and info json for mkv/mka](https://github.com/yt-dlp/yt-dlp/commit/0f0875ed555514f32522a0f30554fb08825d5124) ([#6647](https://github.com/yt-dlp/yt-dlp/issues/6647)) by [Lesmiscore](https://github.com/Lesmiscore)
+- **FFmpegFixupM3u8PP**: [Check audio codec before fixup](https://github.com/yt-dlp/yt-dlp/commit/3f7e2bd80e3c5d8a1682f20a1b245fcd974f295d) ([#6778](https://github.com/yt-dlp/yt-dlp/issues/6778)) by [bashonly](https://github.com/bashonly)
+- **FixupDuplicateMoov**: [Fix bug in triggering](https://github.com/yt-dlp/yt-dlp/commit/26010b5cec50193b98ad7845d1d77450f9f14c2b) by [pukkandan](https://github.com/pukkandan)
+
+#### Misc. changes
+- [Add automatic duplicate issue detection](https://github.com/yt-dlp/yt-dlp/commit/15b2d3db1d40b0437fca79d8874d392aa54b3cdd) by [pukkandan](https://github.com/pukkandan)
+- **build**
+ - [Fix macOS target](https://github.com/yt-dlp/yt-dlp/commit/44a79958f0b596ee71e1eb25f158610aada29d1b) by [Grub4K](https://github.com/Grub4K)
+ - [Implement build verification using `--update-to`](https://github.com/yt-dlp/yt-dlp/commit/b73193c99aa23b135732408a5fcf655c68d731c6) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K)
+ - [Pin `pyinstaller` version for MacOS](https://github.com/yt-dlp/yt-dlp/commit/427a8fafbb0e18c28d0ed7960be838d7b26b88d3) by [pukkandan](https://github.com/pukkandan)
+ - [Various build workflow improvements](https://github.com/yt-dlp/yt-dlp/commit/c4efa0aefec8daef1de62fd1693f13edf3c8b03c) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K)
+- **cleanup**
+ - Miscellaneous
+ - [6f2287c](https://github.com/yt-dlp/yt-dlp/commit/6f2287cb18cbfb27518f068d868fa9390fee78ad) by [pukkandan](https://github.com/pukkandan)
+ - [ad54c91](https://github.com/yt-dlp/yt-dlp/commit/ad54c9130e793ce433bf9da334fa80df9f3aee58) by [freezboltz](https://github.com/freezboltz), [mikf](https://github.com/mikf), [pukkandan](https://github.com/pukkandan)
+- **cleanup, utils**: [Split into submodules](https://github.com/yt-dlp/yt-dlp/commit/69bec6730ec9d724bcedeab199d9d684d61423ba) ([#7090](https://github.com/yt-dlp/yt-dlp/issues/7090)) by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
+- **cli_to_api**: [Add script](https://github.com/yt-dlp/yt-dlp/commit/46f1370e9af6f8af8762f67e27e5acb8f0c48a47) by [pukkandan](https://github.com/pukkandan)
+- **devscripts**: `make_changelog`: [Various improvements](https://github.com/yt-dlp/yt-dlp/commit/23c39a4beadee382060bb47fdaa21316ca707d38) by [Grub4K](https://github.com/Grub4K)
+- **docs**: [Misc improvements](https://github.com/yt-dlp/yt-dlp/commit/c8bc203fbf3bb09914e53f0833eed622ab7edbb9) by [pukkandan](https://github.com/pukkandan)
+
+### 2023.03.04
+
+#### Extractor changes
+- bilibili
+ - [Fix for downloading wrong subtitles](https://github.com/yt-dlp/yt-dlp/commit/8a83baaf218ab89e6e7faa76b7c7be3a2ec19e3a) ([#6358](https://github.com/yt-dlp/yt-dlp/issues/6358)) by [LXYan2333](https://github.com/LXYan2333)
+- ESPNcricinfo
+ - [Handle new URL pattern](https://github.com/yt-dlp/yt-dlp/commit/640c934823fc2d1ec77ec932566078014058635f) ([#6321](https://github.com/yt-dlp/yt-dlp/issues/6321)) by [venkata-krishnas](https://github.com/venkata-krishnas)
+- lefigaro
+ - [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/eb8fd6d044e8926532772b72be0645c6b8ecb3aa) ([#6309](https://github.com/yt-dlp/yt-dlp/issues/6309)) by [elyse0](https://github.com/elyse0)
+- lumni
+ - [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/1f8489cccbdc6e96027ef527b88717458f0900e8) ([#6302](https://github.com/yt-dlp/yt-dlp/issues/6302)) by [carusocr](https://github.com/carusocr)
+- Prankcast
+ - [Fix tags](https://github.com/yt-dlp/yt-dlp/commit/ed4cc4ea793314c50ae3f82e98248c1de1c25694) ([#6316](https://github.com/yt-dlp/yt-dlp/issues/6316)) by [columndeeply](https://github.com/columndeeply)
+- rutube
+ - [Extract chapters from description](https://github.com/yt-dlp/yt-dlp/commit/22ccd5420b3eb0782776071f12cccd1fedaa1fd0) ([#6345](https://github.com/yt-dlp/yt-dlp/issues/6345)) by [mushbite](https://github.com/mushbite)
+- SportDeutschland
+ - [Rewrite extractor](https://github.com/yt-dlp/yt-dlp/commit/45db357289b4e1eec09093c8bc5446520378f426) by [pukkandan](https://github.com/pukkandan)
+- telecaribe
+ - [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/b40471282286bd2b09c485bf79afd271d229272c) ([#6311](https://github.com/yt-dlp/yt-dlp/issues/6311)) by [elyse0](https://github.com/elyse0)
+- tubetugraz
+ - [Support `--twofactor` (#6424)](https://github.com/yt-dlp/yt-dlp/commit/f44cb4e77bb9be8be291d02ab6f79dc0b4c0d4a1) ([#6427](https://github.com/yt-dlp/yt-dlp/issues/6427)) by [Ferdi265](https://github.com/Ferdi265)
+- tunein
+ - [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/46580ced56c90b559885aded6aa8f46f20a9cdce) ([#6310](https://github.com/yt-dlp/yt-dlp/issues/6310)) by [elyse0](https://github.com/elyse0)
+- twitch
+ - [Update for GraphQL API changes](https://github.com/yt-dlp/yt-dlp/commit/4a6272c6d1bff89969b67cd22b26ebe6d7e72279) ([#6318](https://github.com/yt-dlp/yt-dlp/issues/6318)) by [elyse0](https://github.com/elyse0)
+- twitter
+ - [Fix retweet extraction](https://github.com/yt-dlp/yt-dlp/commit/cf605226521e99c89fc8dff26a319025810e63a0) ([#6422](https://github.com/yt-dlp/yt-dlp/issues/6422)) by [selfisekai](https://github.com/selfisekai)
+- xvideos
+ - quickies: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/283a0b5bc511f3b350eead4488158f50c20ec526) ([#6414](https://github.com/yt-dlp/yt-dlp/issues/6414)) by [Yakabuff](https://github.com/Yakabuff)
+
+#### Misc. changes
+- build
+ - [Fix publishing to PyPI and homebrew](https://github.com/yt-dlp/yt-dlp/commit/55676fe498345a389a2539d8baaba958d6d61c3e) by [bashonly](https://github.com/bashonly)
+ - [Only archive if `vars.ARCHIVE_REPO` is set](https://github.com/yt-dlp/yt-dlp/commit/08ff6d59f97b5f5f0128f6bf6fbef56fd836cc52) by [Grub4K](https://github.com/Grub4K)
+- cleanup
+ - Miscellaneous: [392389b](https://github.com/yt-dlp/yt-dlp/commit/392389b7df7b818f794b231f14dc396d4875fbad) by [pukkandan](https://github.com/pukkandan)
+- devscripts
+ - `make_changelog`: [Stop at `Release ...` commit](https://github.com/yt-dlp/yt-dlp/commit/7accdd9845fe7ce9d0aa5a9d16faaa489c1294eb) by [pukkandan](https://github.com/pukkandan)
+
+### 2023.03.03
+
+#### Important changes
+- **A new release type has been added!**
+ * [`nightly`](https://github.com/yt-dlp/yt-dlp/releases/tag/nightly) builds will be made after each push, containing the latest fixes (but also possibly bugs).
+ * When using `--update`/`-U`, a release binary will only update to its current channel (either `stable` or `nightly`).
+ * The `--update-to` option has been added allowing the user more control over program upgrades (or downgrades).
+ * `--update-to` can change the release channel (`stable`, `nightly`) and also upgrade or downgrade to specific tags.
+ * **Usage**: `--update-to CHANNEL`, `--update-to TAG`, `--update-to CHANNEL@TAG`
+- **YouTube throttling fixes!**
+
+#### Core changes
+- [Add option `--break-match-filters`](https://github.com/yt-dlp/yt-dlp/commit/fe2ce85aff0aa03735fc0152bb8cb9c3d4ef0753) by [pukkandan](https://github.com/pukkandan)
+- [Fix `--break-on-existing` with `--lazy-playlist`](https://github.com/yt-dlp/yt-dlp/commit/d21056f4cf0a1623daa107f9181074f5725ac436) by [pukkandan](https://github.com/pukkandan)
+- dependencies
+ - [Simplify `Cryptodome`](https://github.com/yt-dlp/yt-dlp/commit/65f6e807804d2af5e00f2aecd72bfc43af19324a) by [pukkandan](https://github.com/pukkandan)
+- jsinterp
+ - [Handle `Date` at epoch 0](https://github.com/yt-dlp/yt-dlp/commit/9acf1ee25f7ad3920ede574a9de95b8c18626af4) by [pukkandan](https://github.com/pukkandan)
+- plugins
+ - [Don't look in `.egg` directories](https://github.com/yt-dlp/yt-dlp/commit/b059188383eee4fa336ef728dda3ff4bb7335625) by [pukkandan](https://github.com/pukkandan)
+- update
+ - [Add option `--update-to`, including to nightly](https://github.com/yt-dlp/yt-dlp/commit/77df20f14cc9ed41dfe3a1fe2d77fd27f5365a94) ([#6220](https://github.com/yt-dlp/yt-dlp/issues/6220)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K), [pukkandan](https://github.com/pukkandan)
+- utils
+ - `LenientJSONDecoder`: [Parse unclosed objects](https://github.com/yt-dlp/yt-dlp/commit/cc09083636ce21e58ff74f45eac2dbda507462b0) by [pukkandan](https://github.com/pukkandan)
+ - `Popen`: [Shim undocumented `text_mode` property](https://github.com/yt-dlp/yt-dlp/commit/da8e2912b165005f76779a115a071cd6132ceedf) by [Grub4K](https://github.com/Grub4K)
+
+#### Extractor changes
+- [Fix DRM detection in m3u8](https://github.com/yt-dlp/yt-dlp/commit/43a3eaf96393b712d60cbcf5c6cb1e90ed7f42f5) by [pukkandan](https://github.com/pukkandan)
+- generic
+ - [Detect manifest links via extension](https://github.com/yt-dlp/yt-dlp/commit/b38cae49e6f4849c8ee2a774bdc3c1c647ae5f0e) by [bashonly](https://github.com/bashonly)
+ - [Handle basic-auth when checking redirects](https://github.com/yt-dlp/yt-dlp/commit/8e9fe43cd393e69fa49b3d842aa3180c1d105b8f) by [pukkandan](https://github.com/pukkandan)
+- GoogleDrive
+ - [Fix some audio](https://github.com/yt-dlp/yt-dlp/commit/4d248e29d20d983ededab0b03d4fe69dff9eb4ed) by [pukkandan](https://github.com/pukkandan)
+- iprima
+ - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/9fddc12ab022a31754e0eaa358fc4e1dfa974587) ([#6291](https://github.com/yt-dlp/yt-dlp/issues/6291)) by [std-move](https://github.com/std-move)
+- mediastream
+ - [Improve WinSports support](https://github.com/yt-dlp/yt-dlp/commit/2d5a8c5db2bd4ff1c2e45e00cd890a10f8ffca9e) ([#6401](https://github.com/yt-dlp/yt-dlp/issues/6401)) by [bashonly](https://github.com/bashonly)
+- ntvru
+ - [Extract HLS and DASH formats](https://github.com/yt-dlp/yt-dlp/commit/77d6d136468d0c23c8e79bc937898747804f585a) ([#6403](https://github.com/yt-dlp/yt-dlp/issues/6403)) by [bashonly](https://github.com/bashonly)
+- tencent
+ - [Add more formats and info](https://github.com/yt-dlp/yt-dlp/commit/18d295c9e0f95adc179eef345b7af64d6372db78) ([#5950](https://github.com/yt-dlp/yt-dlp/issues/5950)) by [Hill-98](https://github.com/Hill-98)
+- yle_areena
+ - [Extract non-Kaltura videos](https://github.com/yt-dlp/yt-dlp/commit/40d77d89027cd0e0ce31d22aec81db3e1d433900) ([#6402](https://github.com/yt-dlp/yt-dlp/issues/6402)) by [bashonly](https://github.com/bashonly)
+- youtube
+ - [Construct dash formats with `range` query](https://github.com/yt-dlp/yt-dlp/commit/5038f6d713303e0967d002216e7a88652401c22a) by [pukkandan](https://github.com/pukkandan) (With fixes in [f34804b](https://github.com/yt-dlp/yt-dlp/commit/f34804b2f920f62a6e893a14a9e2a2144b14dd23) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz))
+ - [Detect and break on looping comments](https://github.com/yt-dlp/yt-dlp/commit/7f51861b1820c37b157a239b1fe30628d907c034) ([#6301](https://github.com/yt-dlp/yt-dlp/issues/6301)) by [coletdjnz](https://github.com/coletdjnz)
+ - [Extract channel `view_count` when `/about` tab is passed](https://github.com/yt-dlp/yt-dlp/commit/31e183557fcd1b937582f9429f29207c1261f501) by [pukkandan](https://github.com/pukkandan)
+
+#### Misc. changes
+- build
+ - [Add `cffi` as a dependency for `yt_dlp_linux`](https://github.com/yt-dlp/yt-dlp/commit/776d1c3f0c9b00399896dd2e40e78e9a43218109) by [bashonly](https://github.com/bashonly)
+ - [Automated builds and nightly releases](https://github.com/yt-dlp/yt-dlp/commit/29cb20bd563c02671b31dd840139e93dd37150a1) ([#6220](https://github.com/yt-dlp/yt-dlp/issues/6220)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) (With fixes in [bfc861a](https://github.com/yt-dlp/yt-dlp/commit/bfc861a91ee65c9b0ac169754f512e052c6827cf) by [pukkandan](https://github.com/pukkandan))
+ - [Sign SHA files and release public key](https://github.com/yt-dlp/yt-dlp/commit/12647e03d417feaa9ea6a458bea5ebd747494a53) by [Grub4K](https://github.com/Grub4K)
+- cleanup
+ - [Fix `Changelog`](https://github.com/yt-dlp/yt-dlp/commit/17ca19ab60a6a13eb8a629c51442b5248b0d8394) by [pukkandan](https://github.com/pukkandan)
+ - jsinterp: [Give functions names to help debugging](https://github.com/yt-dlp/yt-dlp/commit/b2e0343ba0fc5d8702e90f6ba2b71358e2677e0b) by [pukkandan](https://github.com/pukkandan)
+ - Miscellaneous: [4815bbf](https://github.com/yt-dlp/yt-dlp/commit/4815bbfc41cf641e4a0650289dbff968cb3bde76), [5b28cef](https://github.com/yt-dlp/yt-dlp/commit/5b28cef72db3b531680d89c121631c73ae05354f) by [pukkandan](https://github.com/pukkandan)
+- devscripts
+ - [Script to generate changelog](https://github.com/yt-dlp/yt-dlp/commit/d400e261cf029a3f20d364113b14de973be75404) ([#6220](https://github.com/yt-dlp/yt-dlp/issues/6220)) by [Grub4K](https://github.com/Grub4K) (With fixes in [9344964](https://github.com/yt-dlp/yt-dlp/commit/93449642815a6973a4b09b289982ca7e1f961b5f))
+
+### 2023.02.17
+
+* Merge youtube-dl: Upto [commit/2dd6c6e](https://github.com/ytdl-org/youtube-dl/commit/2dd6c6e)
+* Fix `--concat-playlist`
+* Imply `--no-progress` when `--print`
+* Improve default subtitle language selection by [sdht0](https://github.com/sdht0)
+* Make `title` completely non-fatal
+* Sanitize formats before sorting by [pukkandan](https://github.com/pukkandan)
+* Support module level `__bool__` and `property`
+* [dependencies] Standardize `Cryptodome` imports
+* [hls] Allow extractors to provide AES key by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly)
+* [ExtractAudio] Handle outtmpl without ext by [carusocr](https://github.com/carusocr)
+* [extractor/common] Fix `_search_nuxt_data` by [LowSuggestion912](https://github.com/LowSuggestion912)
+* [extractor/generic] Avoid catastrophic backtracking in KVS regex by [bashonly](https://github.com/bashonly)
+* [jsinterp] Support `if` statements
+* [plugins] Fix zip search paths
+* [utils] `traverse_obj`: Various improvements by [Grub4K](https://github.com/Grub4K)
+* [utils] `traverse_obj`: Fix more bugs
+* [utils] `traverse_obj`: Fix several behavioral problems by [Grub4K](https://github.com/Grub4K)
+* [utils] Don't use Content-length with encoding by [felixonmars](https://github.com/felixonmars)
+* [utils] Fix `time_seconds` to use the provided TZ by [Grub4K](https://github.com/Grub4K), [Lesmiscore](https://github.com/Lesmiscore)
+* [utils] Fix race condition in `make_dir` by [aionescu](https://github.com/aionescu)
+* [utils] Use local kernel32 for file locking on Windows by [Grub4K](https://github.com/Grub4K)
+* [compat_utils] Improve `passthrough_module`
+* [compat_utils] Simplify `EnhancedModule`
+* [build] Update pyinstaller
+* [pyinst] Fix for pyinstaller 5.8
+* [devscripts] Provide `pyinstaller` hooks
+* [devscripts/pyinstaller] Analyze sub-modules of `Cryptodome`
+* [cleanup] Misc fixes and cleanup
+* [extractor/anchorfm] Add episode extractor by [HobbyistDev](https://github.com/HobbyistDev), [bashonly](https://github.com/bashonly)
+* [extractor/boxcast] Add extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/ebay] Add extractor by [JChris246](https://github.com/JChris246)
+* [extractor/hypergryph] Add extractor by [HobbyistDev](https://github.com/HobbyistDev), [bashonly](https://github.com/bashonly)
+* [extractor/NZOnScreen] Add extractor by [gregsadetsky](https://github.com/gregsadetsky), [pukkandan](https://github.com/pukkandan)
+* [extractor/rozhlas] Add extractor RozhlasVltavaIE by [amra](https://github.com/amra)
+* [extractor/tempo] Add IVXPlayer extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/txxx] Add extractors by [chio0hai](https://github.com/chio0hai)
+* [extractor/vocaroo] Add extractor by [SuperSonicHub1](https://github.com/SuperSonicHub1), [qbnu](https://github.com/qbnu)
+* [extractor/wrestleuniverse] Add extractors by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly)
+* [extractor/yappy] Add extractor by [HobbyistDev](https://github.com/HobbyistDev), [dirkf](https://github.com/dirkf)
+* [extractor/youtube] **Fix `uploader_id` extraction** by [bashonly](https://github.com/bashonly)
+* [extractor/youtube] Add hyperpipe instances by [Generator](https://github.com/Generator)
+* [extractor/youtube] Handle `consent.youtube`
+* [extractor/youtube] Support `/live/` URL
+* [extractor/youtube] Update invidious and piped instances by [rohieb](https://github.com/rohieb)
+* [extractor/91porn] Fix title and comment extraction by [pmitchell86](https://github.com/pmitchell86)
+* [extractor/AbemaTV] Cache user token whenever appropriate by [Lesmiscore](https://github.com/Lesmiscore)
+* [extractor/bfmtv] Support `rmc` prefix by [carusocr](https://github.com/carusocr)
+* [extractor/biliintl] Add intro and ending chapters by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/clyp] Support `wav` by [qulaz](https://github.com/qulaz)
+* [extractor/crunchyroll] Add intro chapter by [ByteDream](https://github.com/ByteDream)
+* [extractor/crunchyroll] Better message for premium videos
+* [extractor/crunchyroll] Fix incorrect premium-only error by [Grub4K](https://github.com/Grub4K)
+* [extractor/DouyuTV] Use new API by [hatienl0i261299](https://github.com/hatienl0i261299)
+* [extractor/embedly] Embedded links may be for other extractors
+* [extractor/freesound] Workaround invalid URL in webpage by [rebane2001](https://github.com/rebane2001)
+* [extractor/GoPlay] Use new API by [jeroenj](https://github.com/jeroenj)
+* [extractor/Hidive] Fix subtitles and age-restriction by [chexxor](https://github.com/chexxor)
+* [extractor/huya] Support HD streams by [felixonmars](https://github.com/felixonmars)
+* [extractor/moviepilot] Fix extractor by [panatexxa](https://github.com/panatexxa)
+* [extractor/nbc] Fix `NBC` and `NBCStations` extractors by [bashonly](https://github.com/bashonly)
+* [extractor/nbc] Fix XML parsing by [bashonly](https://github.com/bashonly)
+* [extractor/nebula] Remove broken cookie support by [hheimbuerger](https://github.com/hheimbuerger)
+* [extractor/nfl] Add `NFLPlus` extractors by [bashonly](https://github.com/bashonly)
+* [extractor/niconico] Add support for like history by [Matumo](https://github.com/Matumo), [pukkandan](https://github.com/pukkandan)
+* [extractor/nitter] Update instance list by [OIRNOIR](https://github.com/OIRNOIR)
+* [extractor/npo] Fix extractor and add HD support by [seproDev](https://github.com/seproDev)
+* [extractor/odkmedia] Add `OnDemandChinaEpisodeIE` by [HobbyistDev](https://github.com/HobbyistDev), [pukkandan](https://github.com/pukkandan)
+* [extractor/pornez] Handle relative URLs in iframe by [JChris246](https://github.com/JChris246)
+* [extractor/radiko] Fix format sorting for Time Free by [road-master](https://github.com/road-master)
+* [extractor/rcs] Fix extractors by [nixxo](https://github.com/nixxo), [pukkandan](https://github.com/pukkandan)
+* [extractor/reddit] Support user posts by [OMEGARAZER](https://github.com/OMEGARAZER)
+* [extractor/rumble] Fix format sorting by [pukkandan](https://github.com/pukkandan)
+* [extractor/servus] Rewrite extractor by [Ashish0804](https://github.com/Ashish0804), [FrankZ85](https://github.com/FrankZ85), [StefanLobbenmeier](https://github.com/StefanLobbenmeier)
+* [extractor/slideslive] Fix slides and chapters/duration by [bashonly](https://github.com/bashonly)
+* [extractor/SportDeutschland] Fix extractor by [FriedrichRehren](https://github.com/FriedrichRehren)
+* [extractor/Stripchat] Fix extractor by [JChris246](https://github.com/JChris246), [bashonly](https://github.com/bashonly)
+* [extractor/tnaflix] Fix extractor by [bashonly](https://github.com/bashonly), [oxamun](https://github.com/oxamun)
+* [extractor/tvp] Support `stream.tvp.pl` by [selfisekai](https://github.com/selfisekai)
+* [extractor/twitter] Fix `--no-playlist` and add media `view_count` when using GraphQL by [Grub4K](https://github.com/Grub4K)
+* [extractor/twitter] Fix graphql extraction on some tweets by [selfisekai](https://github.com/selfisekai)
+* [extractor/vimeo] Fix `playerConfig` extraction by [LeoniePhiline](https://github.com/LeoniePhiline), [bashonly](https://github.com/bashonly)
+* [extractor/viu] Add `ViuOTTIndonesiaIE` extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/vk] Fix playlists for new API by [the-marenga](https://github.com/the-marenga)
+* [extractor/vlive] Replace with `VLiveWebArchiveIE` by [seproDev](https://github.com/seproDev)
+* [extractor/ximalaya] Update album `_VALID_URL` by [carusocr](https://github.com/carusocr)
+* [extractor/zdf] Use android API endpoint for UHD downloads by [seproDev](https://github.com/seproDev)
+* [extractor/drtv] Fix bug in [ab4cbef](https://github.com/yt-dlp/yt-dlp/commit/ab4cbef) by [bashonly](https://github.com/bashonly)
+
+
+### 2023.01.06
+
+* Fix config locations by [Grub4K](https://github.com/Grub4K), [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
+* [downloader/aria2c] Disable native progress
+* [utils] `mimetype2ext`: `weba` is not standard
+* [utils] `windows_enable_vt_mode`: Better error handling
+* [build] Add minimal `pyproject.toml`
+* [update] Fix updater file removal on windows by [Grub4K](https://github.com/Grub4K)
+* [cleanup] Misc fixes and cleanup
+* [extractor/aitube] Add extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/drtv] Add series extractors by [FrederikNS](https://github.com/FrederikNS)
+* [extractor/volejtv] Add extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/xanimu] Add extractor by [JChris246](https://github.com/JChris246)
+* [extractor/youtube] Retry manifest refresh for live-from-start by [mzhou](https://github.com/mzhou)
+* [extractor/biliintl] Add `/media` to `VALID_URL` by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/biliIntl] Add fallback to `video_data` by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/crunchyroll:show] Add `language` to entries by [Chrissi2812](https://github.com/Chrissi2812)
+* [extractor/joj] Fix extractor by [OndrejBakan](https://github.com/OndrejBakan), [pukkandan](https://github.com/pukkandan)
+* [extractor/nbc] Update graphql query by [jacobtruman](https://github.com/jacobtruman)
+* [extractor/reddit] Add subreddit as `channel_id` by [gschizas](https://github.com/gschizas)
+* [extractor/tiktok] Add `TikTokLive` extractor by [JC-Chung](https://github.com/JC-Chung)
+
+### 2023.01.02
+
+* **Improve plugin architecture** by [Grub4K](https://github.com/Grub4K), [coletdjnz](https://github.com/coletdjnz), [flashdagger](https://github.com/flashdagger), [pukkandan](https://github.com/pukkandan)
+ * Plugins can be loaded in any distribution of yt-dlp (binary, pip, source, etc.) and can be distributed and installed as packages. See [the readme](https://github.com/yt-dlp/yt-dlp/tree/05997b6e98e638d97d409c65bb5eb86da68f3b64#plugins) for more information
+* Add `--compat-options 2021,2022`
+ * This allows devs to change defaults and make other potentially breaking changes more easily. If you need everything to work exactly as-is, put Use `--compat 2022` in your config to guard against future compat changes.
+* [downloader/aria2c] Native progress for aria2c via RPC by [Lesmiscore](https://github.com/Lesmiscore), [pukkandan](https://github.com/pukkandan)
+* Merge youtube-dl: Upto [commit/195f22f](https://github.com/ytdl-org/youtube-dl/commit/195f22f6) by [Grub4K](https://github.com/Grub4K), [pukkandan](https://github.com/pukkandan)
+* Add pre-processor stage `video`
+* Let `--parse/replace-in-metadata` run at any post-processing stage
+* Add `--enable-file-urls` by [coletdjnz](https://github.com/coletdjnz)
+* Add new field `aspect_ratio`
+* Add `ac4` to known codecs
+* Add `weba` to known extensions
+* [FFmpegVideoConvertor] Add `gif` to `--recode-video`
+* Add message when there are no subtitles/thumbnails
+* Deprioritize HEVC-over-FLV formats by [Lesmiscore](https://github.com/Lesmiscore)
+* Make early reject of `--match-filter` stricter
+* Fix `--cookies-from-browser` CLI parsing
+* Fix `original_url` in playlists
+* Fix bug in writing playlist info-json
+* Fix bugs in `PlaylistEntries`
+* [downloader/ffmpeg] Fix headers for video+audio formats by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly)
+* [extractor] Add a way to distinguish IEs that returns only videos
+* [extractor] Implement universal format sorting and deprecate `_sort_formats`
+* [extractor] Let `_extract_format` functions obey `--ignore-no-formats`
+* [extractor/generic] Add `fragment_query` extractor arg for DASH and HLS by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan)
+* [extractor/generic] Decode unicode-escaped embed URLs by [bashonly](https://github.com/bashonly)
+* [extractor/generic] Don't report redirect to https
+* [extractor/generic] Fix JSON LD manifest extraction by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan)
+* [extractor/generic] Use `Accept-Encoding: identity` for initial request by [coletdjnz](https://github.com/coletdjnz)
+* [FormatSort] Add `mov` to `vext`
+* [jsinterp] Escape regex that looks like nested set
+* [webvtt] Handle premature EOF by [flashdagger](https://github.com/flashdagger)
+* [utils] `classproperty`: Add cache support
+* [utils] `get_exe_version`: Detect broken executables by [dirkf](https://github.com/dirkf), [pukkandan](https://github.com/pukkandan)
+* [utils] `js_to_json`: Fix bug in [f55523c](https://github.com/yt-dlp/yt-dlp/commit/f55523c) by [ChillingPepper](https://github.com/ChillingPepper), [pukkandan](https://github.com/pukkandan)
+* [utils] Make `ExtractorError` mutable
+* [utils] Move `FileDownloader.parse_bytes` into utils
+* [utils] Move format sorting code into `utils`
+* [utils] `windows_enable_vt_mode`: Proper implementation by [Grub4K](https://github.com/Grub4K)
+* [update] Workaround [#5632](https://github.com/yt-dlp/yt-dlp/issues/5632)
+* [docs] Improvements
+* [cleanup] Misc fixes and cleanup
+* [cleanup] Use `random.choices` by [freezboltz](https://github.com/freezboltz)
+* [extractor/airtv] Add extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/amazonminitv] Add extractors by [GautamMKGarg](https://github.com/GautamMKGarg), [nyuszika7h](https://github.com/nyuszika7h)
+* [extractor/beatbump] Add extractors by [Bobscorn](https://github.com/Bobscorn), [pukkandan](https://github.com/pukkandan)
+* [extractor/europarl] Add EuroParlWebstream extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/kanal2] Add extractor by [bashonly](https://github.com/bashonly), [glensc](https://github.com/glensc), [pukkandan](https://github.com/pukkandan)
+* [extractor/kankanews] Add extractor by [synthpop123](https://github.com/synthpop123)
+* [extractor/kick] Add extractor by [bashonly](https://github.com/bashonly)
+* [extractor/mediastream] Add extractor by [HobbyistDev](https://github.com/HobbyistDev), [elyse0](https://github.com/elyse0)
+* [extractor/noice] Add NoicePodcast extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/oneplace] Add OnePlacePodcast extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/rumble] Add RumbleIE extractor by [flashdagger](https://github.com/flashdagger)
+* [extractor/screencastify] Add extractor by [bashonly](https://github.com/bashonly)
+* [extractor/trtcocuk] Add extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/Veoh] Add user extractor by [tntmod54321](https://github.com/tntmod54321)
+* [extractor/videoken] Add extractors by [bashonly](https://github.com/bashonly)
+* [extractor/webcamerapl] Add extractor by [milkknife](https://github.com/milkknife)
+* [extractor/amazon] Add `AmazonReviews` extractor by [bashonly](https://github.com/bashonly)
+* [extractor/netverse] Add `NetverseSearch` extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/vimeo] Add `VimeoProIE` by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan)
+* [extractor/xiami] Remove extractors by [synthpop123](https://github.com/synthpop123)
+* [extractor/youtube] Add `piped.video` by [Bnyro](https://github.com/Bnyro)
+* [extractor/youtube] Consider language in format de-duplication
+* [extractor/youtube] Extract DRC formats
+* [extractor/youtube] Fix `ytuser:`
+* [extractor/youtube] Fix bug in handling of music URLs
+* [extractor/youtube] Subtitles cannot be translated to `und`
+* [extractor/youtube:tab] Extract metadata from channel items by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/ARD] Add vtt subtitles by [CapacitorSet](https://github.com/CapacitorSet)
+* [extractor/ArteTV] Extract chapters by [bashonly](https://github.com/bashonly), [iw0nderhow](https://github.com/iw0nderhow)
+* [extractor/bandcamp] Add `album_artist` by [stelcodes](https://github.com/stelcodes)
+* [extractor/bilibili] Fix `--no-playlist` for anthology
+* [extractor/bilibili] Improve `_VALID_URL` by [skbeh](https://github.com/skbeh)
+* [extractor/biliintl:series] Make partial download of series faster
+* [extractor/BiliLive] Fix extractor
+* [extractor/brightcove] Add `BrightcoveNewBaseIE` and fix embed extraction
+* [extractor/cda] Support premium and misc improvements by [selfisekai](https://github.com/selfisekai)
+* [extractor/ciscowebex] Support password-protected videos by [damianoamatruda](https://github.com/damianoamatruda)
+* [extractor/curiositystream] Fix auth by [mnn](https://github.com/mnn)
+* [extractor/embedly] Handle vimeo embeds
+* [extractor/fifa] Fix Preplay extraction by [dirkf](https://github.com/dirkf)
+* [extractor/foxsports] Fix extractor by [bashonly](https://github.com/bashonly)
+* [extractor/gronkh] Fix `_VALID_URL` by [muddi900](https://github.com/muddi900)
+* [extractor/hotstar] Improve format metadata
+* [extractor/iqiyi] Fix `Iq` JS regex by [bashonly](https://github.com/bashonly)
+* [extractor/la7] Improve extractor by [nixxo](https://github.com/nixxo)
+* [extractor/mediaset] Better embed detection and error messages by [nixxo](https://github.com/nixxo)
+* [extractor/mixch] Support `--wait-for-video`
+* [extractor/naver] Improve `_VALID_URL` for `NaverNowIE` by [bashonly](https://github.com/bashonly)
+* [extractor/naver] Treat fan subtitles as separate language
+* [extractor/netverse] Extract comments by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/nosnl] Add support for /video by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/odnoklassniki] Extract subtitles by [bashonly](https://github.com/bashonly)
+* [extractor/pinterest] Fix extractor by [bashonly](https://github.com/bashonly)
+* [extractor/plutotv] Fix videos with non-zero start by [digitall](https://github.com/digitall)
+* [extractor/polskieradio] Adapt to next.js redesigns by [selfisekai](https://github.com/selfisekai)
+* [extractor/reddit] Add vcodec to fallback format by [chengzhicn](https://github.com/chengzhicn)
+* [extractor/reddit] Extract crossposted media by [bashonly](https://github.com/bashonly)
+* [extractor/reddit] Extract video embeds in text posts by [bashonly](https://github.com/bashonly)
+* [extractor/rutube] Support private videos by [mexus](https://github.com/mexus)
+* [extractor/sibnet] Separate from VKIE
+* [extractor/slideslive] Fix extractor by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly)
+* [extractor/slideslive] Support embeds and slides by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan)
+* [extractor/soundcloud] Support user permalink by [nosoop](https://github.com/nosoop)
+* [extractor/spankbang] Fix extractor by [JChris246](https://github.com/JChris246)
+* [extractor/stv] Detect DRM
+* [extractor/swearnet] Fix description bug
+* [extractor/tencent] Fix geo-restricted video by [elyse0](https://github.com/elyse0)
+* [extractor/tiktok] Fix subs, `DouyinIE`, improve `_VALID_URL` by [bashonly](https://github.com/bashonly)
+* [extractor/tiktok] Update `_VALID_URL`, add `api_hostname` arg by [bashonly](https://github.com/bashonly)
+* [extractor/tiktok] Update API hostname by [redraskal](https://github.com/redraskal)
+* [extractor/twitcasting] Fix videos with password by [Spicadox](https://github.com/Spicadox), [bashonly](https://github.com/bashonly)
+* [extractor/twitter] Heed `--no-playlist` for multi-video tweets by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly)
+* [extractor/twitter] Refresh guest token when expired by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly)
+* [extractor/twitter:spaces] Add `Referer` to m3u8 by [nixxo](https://github.com/nixxo)
+* [extractor/udemy] Fix lectures that have no URL and detect DRM
+* [extractor/unsupported] Add more URLs
+* [extractor/urplay] Support for audio-only formats by [barsnick](https://github.com/barsnick)
+* [extractor/wistia] Improve extension detection by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan)
+* [extractor/yle_areena] Support restricted videos by [docbender](https://github.com/docbender)
+* [extractor/youku] Fix extractor by [KurtBestor](https://github.com/KurtBestor)
+* [extractor/youporn] Fix metadata by [marieell](https://github.com/marieell)
+* [extractor/redgifs] Fix bug in [8c188d5](https://github.com/yt-dlp/yt-dlp/commit/8c188d5d09177ed213a05c900d3523867c5897fd)
+
+
+### 2022.11.11
+
+* Merge youtube-dl: Upto [commit/de39d12](https://github.com/ytdl-org/youtube-dl/commit/de39d128)
+* Backport SSL configuration from Python 3.10 by [coletdjnz](https://github.com/coletdjnz)
+* Do more processing in `--flat-playlist`
+* Fix `--list` options not implying `-s` in some cases by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly)
+* Fix end time of clips by [cruel-efficiency](https://github.com/cruel-efficiency)
+* Fix for `formats=None`
+* Write API params in debug head
+* [outtmpl] Ensure ASCII in json and add option for Unicode
+* [SponsorBlock] Add `type` field, obey `--retry-sleep extractor`, relax duration check for large segments
+* [SponsorBlock] **Support `chapter` category** by [ajayyy](https://github.com/ajayyy), [pukkandan](https://github.com/pukkandan)
+* [ThumbnailsConvertor] Fix filename escaping by [dirkf](https://github.com/dirkf), [pukkandan](https://github.com/pukkandan)
+* [ModifyChapters] Handle the entire video being marked for removal
+* [embedthumbnail] Fix thumbnail name in mp3 by [How-Bout-No](https://github.com/How-Bout-No)
+* [downloader/fragment] HLS download can continue without first fragment
+* [cookies] Improve `LenientSimpleCookie` by [Grub4K](https://github.com/Grub4K)
+* [jsinterp] Improve separating regex
+* [extractor/common] Fix `fatal=False` for `_search_nuxt_data`
+* [extractor/common] Improve `_generic_title`
+* [extractor/common] Fix `json_ld` type checks by [Grub4K](https://github.com/Grub4K)
+* [extractor/generic] Separate embed extraction into own function
+* [extractor/generic:quoted-html] Add extractor by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
+* [extractor/unsupported] Raise error on known DRM-only sites by [coletdjnz](https://github.com/coletdjnz)
+* [utils] `js_to_json`: Improve escape handling by [Grub4K](https://github.com/Grub4K)
+* [utils] `strftime_or_none`: Workaround Python bug on Windows
+* [utils] `traverse_obj`: Always return list when branching, allow `re.Match` objects by [Grub4K](https://github.com/Grub4K)
+* [build, test] Harden workflows' security by [sashashura](https://github.com/sashashura)
+* [build] `py2exe`: Migrate to freeze API by [SG5](https://github.com/SG5), [pukkandan](https://github.com/pukkandan)
+* [build] Create `armv7l` and `aarch64` releases by [MrOctopus](https://github.com/MrOctopus), [pukkandan](https://github.com/pukkandan)
+* [build] Make linux binary truly standalone using `conda` by [mlampe](https://github.com/mlampe)
+* [build] Replace `set-output` with `GITHUB_OUTPUT` by [Lesmiscore](https://github.com/Lesmiscore)
+* [update] Use error code `100` for update errors
+* [compat] Fix `shutils.move` in restricted ACL mode on BSD by [ClosedPort22](https://github.com/ClosedPort22), [pukkandan](https://github.com/pukkandan)
+* [docs, devscripts] Document `pyinst`'s argument passthrough by [jahway603](https://github.com/jahway603)
+* [test] Allow `extract_flat` in download tests by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
+* [cleanup] Misc fixes and cleanup by [pukkandan](https://github.com/pukkandan), [Alienmaster](https://github.com/Alienmaster)
+* [extractor/aeon] Add extractor by [DoubleCouponDay](https://github.com/DoubleCouponDay)
+* [extractor/agora] Add extractors by [selfisekai](https://github.com/selfisekai)
+* [extractor/camsoda] Add extractor by [zulaport](https://github.com/zulaport)
+* [extractor/cinetecamilano] Add extractor by [timendum](https://github.com/timendum)
+* [extractor/deuxm] Add extractors by [CrankDatSouljaBoy](https://github.com/CrankDatSouljaBoy)
+* [extractor/genius] Add extractors by [bashonly](https://github.com/bashonly)
+* [extractor/japandiet] Add extractors by [Lesmiscore](https://github.com/Lesmiscore)
+* [extractor/listennotes] Add extractor by [lksj](https://github.com/lksj), [pukkandan](https://github.com/pukkandan)
+* [extractor/nos.nl] Add extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/oftv] Add extractors by [DoubleCouponDay](https://github.com/DoubleCouponDay)
+* [extractor/podbayfm] Add extractor by [schnusch](https://github.com/schnusch)
+* [extractor/qingting] Add extractor by [bashonly](https://github.com/bashonly), [changren-wcr](https://github.com/changren-wcr)
+* [extractor/screen9] Add extractor by [tpikonen](https://github.com/tpikonen)
+* [extractor/swearnet] Add extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/YleAreena] Add extractor by [pukkandan](https://github.com/pukkandan), [vitkhab](https://github.com/vitkhab)
+* [extractor/zeenews] Add extractor by [m4tu4g](https://github.com/m4tu4g), [pukkandan](https://github.com/pukkandan)
+* [extractor/youtube:tab] **Update tab handling for redesign** by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
+ * Channel URLs download all uploads of the channel as multiple playlists, separated by tab
+* [extractor/youtube] Differentiate between no comments and disabled comments by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/youtube] Extract `concurrent_view_count` for livestreams by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/youtube] Fix `duration` for premieres by [nosoop](https://github.com/nosoop)
+* [extractor/youtube] Fix `live_status` by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
+* [extractor/youtube] Ignore incomplete data error for comment replies by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/youtube] Improve chapter parsing from description
+* [extractor/youtube] Mark videos as fully watched by [bsun0000](https://github.com/bsun0000)
+* [extractor/youtube] Update piped instances by [Generator](https://github.com/Generator)
+* [extractor/youtube] Update playlist metadata extraction for new layout by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/youtube:tab] Fix video metadata from tabs by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/youtube:tab] Let `approximate_date` return timestamp
+* [extractor/americastestkitchen] Fix extractor by [bashonly](https://github.com/bashonly)
+* [extractor/bbc] Support onion domains by [DoubleCouponDay](https://github.com/DoubleCouponDay)
+* [extractor/bilibili] Add chapters and misc cleanup by [lockmatrix](https://github.com/lockmatrix), [pukkandan](https://github.com/pukkandan)
+* [extractor/bilibili] Fix BilibiliIE and Bangumi extractors by [lockmatrix](https://github.com/lockmatrix), [pukkandan](https://github.com/pukkandan)
+* [extractor/bitchute] Better error for geo-restricted videos by [flashdagger](https://github.com/flashdagger)
+* [extractor/bitchute] Improve `BitChuteChannelIE` by [flashdagger](https://github.com/flashdagger), [pukkandan](https://github.com/pukkandan)
+* [extractor/bitchute] Simplify extractor by [flashdagger](https://github.com/flashdagger), [pukkandan](https://github.com/pukkandan)
+* [extractor/cda] Support login through API by [selfisekai](https://github.com/selfisekai)
+* [extractor/crunchyroll] Beta is now the only layout by [tejing1](https://github.com/tejing1)
+* [extractor/detik] Avoid unnecessary extraction
+* [extractor/doodstream] Remove extractor
+* [extractor/dplay] Add MotorTrendOnDemand extractor by [bashonly](https://github.com/bashonly)
+* [extractor/epoch] Support videos without data-trailer by [gibson042](https://github.com/gibson042), [pukkandan](https://github.com/pukkandan)
+* [extractor/fox] Extract thumbnail by [vitkhab](https://github.com/vitkhab)
+* [extractor/foxnews] Add `FoxNewsVideo` extractor
+* [extractor/hotstar] Add season support by [m4tu4g](https://github.com/m4tu4g)
+* [extractor/hotstar] Refactor v1 API calls
+* [extractor/iprima] Make json+ld non-fatal by [bashonly](https://github.com/bashonly)
+* [extractor/iq] Increase phantomjs timeout
+* [extractor/kaltura] Support playlists by [jwoglom](https://github.com/jwoglom), [pukkandan](https://github.com/pukkandan)
+* [extractor/lbry] Authenticate with cookies by [flashdagger](https://github.com/flashdagger)
+* [extractor/livestreamfails] Support posts by [invertico](https://github.com/invertico)
+* [extractor/mlb] Add `MLBArticle` extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/mxplayer] Improve extractor by [m4tu4g](https://github.com/m4tu4g)
+* [extractor/niconico] Always use HTTPS for requests
+* [extractor/nzherald] Support new video embed by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/odnoklassniki] Support boosty.to embeds by [Lesmiscore](https://github.com/Lesmiscore), [megapro17](https://github.com/megapro17), [pukkandan](https://github.com/pukkandan)
+* [extractor/paramountplus] Update API token by [bashonly](https://github.com/bashonly)
+* [extractor/reddit] Add fallback format by [bashonly](https://github.com/bashonly)
+* [extractor/redgifs] Fix extractors by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan)
+* [extractor/redgifs] Refresh auth token for 401 by [endotronic](https://github.com/endotronic), [pukkandan](https://github.com/pukkandan)
+* [extractor/rumble] Add HLS formats and extract more metadata by [flashdagger](https://github.com/flashdagger)
+* [extractor/sbs] Improve `_VALID_URL` by [bashonly](https://github.com/bashonly)
+* [extractor/skyit] Fix extractors by [nixxo](https://github.com/nixxo)
+* [extractor/stripchat] Fix hostname for HLS stream by [zulaport](https://github.com/zulaport)
+* [extractor/stripchat] Improve error message by [freezboltz](https://github.com/freezboltz)
+* [extractor/telegram] Add playlist support and more metadata by [bashonly](https://github.com/bashonly), [bsun0000](https://github.com/bsun0000)
+* [extractor/Tnaflix] Fix for HTTP 500 by [SG5](https://github.com/SG5), [pukkandan](https://github.com/pukkandan)
+* [extractor/tubitv] Better DRM detection by [bashonly](https://github.com/bashonly)
+* [extractor/tvp] Update extractors by [selfisekai](https://github.com/selfisekai)
+* [extractor/twitcasting] Fix `data-movie-playlist` extraction by [Lesmiscore](https://github.com/Lesmiscore)
+* [extractor/twitter] Add onion site to `_VALID_URL` by [DoubleCouponDay](https://github.com/DoubleCouponDay)
+* [extractor/twitter] Add Spaces extractor and GraphQL API by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly), [nixxo](https://github.com/nixxo), [pukkandan](https://github.com/pukkandan)
+* [extractor/twitter] Support multi-video posts by [Grub4K](https://github.com/Grub4K)
+* [extractor/uktvplay] Fix `_VALID_URL`
+* [extractor/viu] Support subtitles of on-screen text by [tkgmomosheep](https://github.com/tkgmomosheep)
+* [extractor/VK] Fix playlist URLs by [the-marenga](https://github.com/the-marenga)
+* [extractor/vlive] Extract `release_timestamp`
+* [extractor/voot] Improve `_VALID_URL` by [freezboltz](https://github.com/freezboltz)
+* [extractor/wordpress:mb.miniAudioPlayer] Add embed extractor by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/YoutubeWebArchive] Improve metadata extraction by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/zee5] Improve `_VALID_URL` by [m4tu4g](https://github.com/m4tu4g)
+* [extractor/zenyandex] Fix extractors by [lksj](https://github.com/lksj), [puc9](https://github.com/puc9), [pukkandan](https://github.com/pukkandan)
+
+
+### 2022.10.04
+
+* Allow a `set` to be passed as `download_archive` by [pukkandan](https://github.com/pukkandan), [bashonly](https://github.com/bashonly)
+* Allow open ranges for time ranges by [Lesmiscore](https://github.com/Lesmiscore)
+* Allow plugin extractors to replace the built-in ones
+* Don't download entire video when no matching `--download-sections`
+* Fix `--config-location -`
+* Improve [5736d79](https://github.com/yt-dlp/yt-dlp/pull/5044/commits/5736d79172c47ff84740d5720467370a560febad)
+* Fix for when playlists don't have `webpage_url`
+* Support environment variables in `--ffmpeg-location`
+* Workaround `libc_ver` not be available on Windows Store version of Python
+* [outtmpl] Curly braces to filter keys by [pukkandan](https://github.com/pukkandan)
+* [outtmpl] Make `%s` work in strfformat for all systems
+* [jsinterp] Workaround operator associativity issue
+* [cookies] Let `_get_mac_keyring_password` fail gracefully
+* [cookies] Parse cookies leniently by [Grub4K](https://github.com/Grub4K)
+* [phantomjs] Fix bug in [587021c](https://github.com/yt-dlp/yt-dlp/commit/587021cd9f717181b44e881941aca3f8d753758b) by [elyse0](https://github.com/elyse0)
+* [downloader/aria2c] Fix filename containing leading whitespace by [std-move](https://github.com/std-move)
+* [downloader/ism] Support ec-3 codec by [nixxo](https://github.com/nixxo)
+* [extractor] Fix `fatal=False` in `RetryManager`
+* [extractor] Improve json-ld extraction
+* [extractor] Make `_search_json` able to parse lists
+* [extractor] Escape `%` in `representation_id` of m3u8
+* [extractor/generic] Pass through referer from json-ld
+* [utils] `base_url`: URL paths can contain `&` by [elyse0](https://github.com/elyse0)
+* [utils] `js_to_json`: Improve
+* [utils] `Popen.run`: Fix default return in binary mode
+* [utils] `traverse_obj`: Rewrite, document and add tests by [Grub4K](https://github.com/Grub4K)
+* [devscripts] `make_lazy_extractors`: Fix for Docker by [josanabr](https://github.com/josanabr)
+* [docs] Misc Improvements
+* [cleanup] Misc fixes and cleanup by [pukkandan](https://github.com/pukkandan), [gamer191](https://github.com/gamer191)
+* [extractor/24tv.ua] Add extractors by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/BerufeTV] Add extractor by [Fabi019](https://github.com/Fabi019)
+* [extractor/booyah] Add extractor by [HobbyistDev](https://github.com/HobbyistDev), [elyse0](https://github.com/elyse0)
+* [extractor/bundesliga] Add extractor by [Fabi019](https://github.com/Fabi019)
+* [extractor/GoPlay] Add extractor by [CNugteren](https://github.com/CNugteren), [basrieter](https://github.com/basrieter), [jeroenj](https://github.com/jeroenj)
+* [extractor/iltalehti] Add extractor by [tpikonen](https://github.com/tpikonen)
+* [extractor/IsraelNationalNews] Add extractor by [Bobscorn](https://github.com/Bobscorn)
+* [extractor/mediaworksnzvod] Add extractor by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/MicrosoftEmbed] Add extractor by [DoubleCouponDay](https://github.com/DoubleCouponDay)
+* [extractor/nbc] Add NBCStations extractor by [bashonly](https://github.com/bashonly)
+* [extractor/onenewsnz] Add extractor by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/prankcast] Add extractor by [HobbyistDev](https://github.com/HobbyistDev), [columndeeply](https://github.com/columndeeply)
+* [extractor/Smotrim] Add extractor by [Lesmiscore](https://github.com/Lesmiscore), [nikita-moor](https://github.com/nikita-moor)
+* [extractor/tencent] Add Iflix extractor by [elyse0](https://github.com/elyse0)
+* [extractor/unscripted] Add extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/adobepass] Add MSO AlticeOne (Optimum TV) by [CplPwnies](https://github.com/CplPwnies)
+* [extractor/youtube] **Download `post_live` videos from start** by [Lesmiscore](https://github.com/Lesmiscore), [pukkandan](https://github.com/pukkandan)
+* [extractor/youtube] Add support for Shorts audio pivot feed by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
+* [extractor/youtube] Detect `lazy-load-for-videos` embeds
+* [extractor/youtube] Do not warn on duplicate chapters
+* [extractor/youtube] Fix video like count extraction by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/youtube] Support changing extraction language by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/youtube:tab] Improve continuation items extraction
+* [extractor/youtube:tab] Support `reporthistory` page
+* [extractor/amazonstore] Fix JSON extraction by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
+* [extractor/amazonstore] Retry to avoid captcha page by [Lesmiscore](https://github.com/Lesmiscore)
+* [extractor/animeondemand] Remove extractor by [TokyoBlackHole](https://github.com/TokyoBlackHole)
+* [extractor/anvato] Fix extractor and refactor by [bashonly](https://github.com/bashonly)
+* [extractor/artetv] Remove duplicate stream urls by [Grub4K](https://github.com/Grub4K)
+* [extractor/audioboom] Support direct URLs and refactor by [pukkandan](https://github.com/pukkandan), [tpikonen](https://github.com/tpikonen)
+* [extractor/bandcamp] Extract `uploader_url`
+* [extractor/bilibili] Add space.bilibili extractors by [lockmatrix](https://github.com/lockmatrix)
+* [extractor/BilibiliSpace] Fix extractor and better error message by [lockmatrix](https://github.com/lockmatrix)
+* [extractor/BiliIntl] Support uppercase lang in `_VALID_URL` by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/BiliIntlSeries] Fix `_VALID_URL`
+* [extractor/bongacams] Update `_VALID_URL` by [0xGodspeed](https://github.com/0xGodspeed)
+* [extractor/crunchyroll:beta] Improve handling of hardsubs by [Grub4K](https://github.com/Grub4K)
+* [extractor/detik] Generalize extractors by [HobbyistDev](https://github.com/HobbyistDev), [coletdjnz](https://github.com/coletdjnz)
+* [extractor/dplay:italy] Add default authentication by [Timendum](https://github.com/Timendum)
+* [extractor/heise] Fix extractor by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/holodex] Fix `_VALID_URL` by [LiviaMedeiros](https://github.com/LiviaMedeiros)
+* [extractor/hrfensehen] Fix extractor by [snapdgn](https://github.com/snapdgn)
+* [extractor/hungama] Add subtitle by [GautamMKGarg](https://github.com/GautamMKGarg), [pukkandan](https://github.com/pukkandan)
+* [extractor/instagram] Extract more metadata by [pritam20ps05](https://github.com/pritam20ps05)
+* [extractor/JWPlatform] Fix extractor by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/malltv] Fix video_id extraction by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/MLBTV] Detect live streams
+* [extractor/motorsport] Support native embeds
+* [extractor/Mxplayer] Fix extractor by [itachi-19](https://github.com/itachi-19)
+* [extractor/nebula] Add nebula.tv by [tannertechnology](https://github.com/tannertechnology)
+* [extractor/nfl] Fix extractor by [bashonly](https://github.com/bashonly)
+* [extractor/ondemandkorea] Update `jw_config` regex by [julien-hadleyjack](https://github.com/julien-hadleyjack)
+* [extractor/paramountplus] Better DRM detection by [bashonly](https://github.com/bashonly)
+* [extractor/patreon] Sort formats
+* [extractor/rcs] Fix embed extraction by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/redgifs] Fix extractor by [jhwgh1968](https://github.com/jhwgh1968)
+* [extractor/rutube] Fix `_EMBED_REGEX` by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/RUTV] Fix warnings for livestreams by [Lesmiscore](https://github.com/Lesmiscore)
+* [extractor/soundcloud:search] More metadata in `--flat-playlist` by [SuperSonicHub1](https://github.com/SuperSonicHub1)
+* [extractor/telegraaf] Use mobile GraphQL API endpoint by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/tennistv] Fix timestamp by [zenerdi0de](https://github.com/zenerdi0de)
+* [extractor/tiktok] Fix TikTokIE by [bashonly](https://github.com/bashonly)
+* [extractor/triller] Fix auth token by [bashonly](https://github.com/bashonly)
+* [extractor/trovo] Fix extractors by [Mehavoid](https://github.com/Mehavoid)
+* [extractor/tv2] Support new url format by [tobi1805](https://github.com/tobi1805)
+* [extractor/web.archive:youtube] Fix `_YT_INITIAL_PLAYER_RESPONSE_RE`
+* [extractor/wistia] Add support for channels by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/wistia] Match IDs in embed URLs by [bashonly](https://github.com/bashonly)
+* [extractor/wordpress:playlist] Add generic embed extractor by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/yandexvideopreview] Update `_VALID_URL` by [Grub4K](https://github.com/Grub4K)
+* [extractor/zee5] Fix `_VALID_URL` by [m4tu4g](https://github.com/m4tu4g)
+* [extractor/zee5] Generate device ids by [freezboltz](https://github.com/freezboltz)
+
+
+### 2022.09.01
+
+* Add option `--use-extractors`
+* Merge youtube-dl: Upto [commit/ed5c44e](https://github.com/ytdl-org/youtube-dl/commit/ed5c44e7)
+* Add yt-dlp version to infojson
+* Fix `--break-per-url --max-downloads`
+* Fix bug in `--alias`
+* [cookies] Support firefox container in `--cookies-from-browser` by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
+* [downloader/external] Smarter detection of executable
+* [extractor/generic] Don't return JW player without formats
+* [FormatSort] Fix `aext` for `--prefer-free-formats`
+* [jsinterp] Various improvements by [pukkandan](https://github.com/pukkandan), [dirkf](https://github.com/dirkf), [elyse0](https://github.com/elyse0)
+* [cache] Mechanism to invalidate old cache
+* [utils] Add `deprecation_warning`
+* [utils] Add `orderedSet_from_options`
+* [utils] `Popen`: Restore `LD_LIBRARY_PATH` when using PyInstaller by [Lesmiscore](https://github.com/Lesmiscore)
+* [build] `make tar` should not follow `DESTDIR` by [satan1st](https://github.com/satan1st)
+* [build] Update pyinstaller by [shirt-dev](https://github.com/shirt-dev)
+* [test] Fix `test_youtube_signature`
+* [cleanup] Misc fixes and cleanup by [DavidH-2022](https://github.com/DavidH-2022), [MrRawes](https://github.com/MrRawes), [pukkandan](https://github.com/pukkandan)
+* [extractor/epoch] Add extractor by [tejasa97](https://github.com/tejasa97)
+* [extractor/eurosport] Add extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/IslamChannel] Add extractors by [Lesmiscore](https://github.com/Lesmiscore)
+* [extractor/newspicks] Add extractor by [Lesmiscore](https://github.com/Lesmiscore)
+* [extractor/triller] Add extractor by [bashonly](https://github.com/bashonly)
+* [extractor/VQQ] Add extractors by [elyse0](https://github.com/elyse0)
+* [extractor/youtube] Improvements to nsig extraction
+* [extractor/youtube] Fix bug in format sorting
+* [extractor/youtube] Update iOS Innertube clients by [SamantazFox](https://github.com/SamantazFox)
+* [extractor/youtube] Use device-specific user agent by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/youtube] Add `--compat-option no-youtube-prefer-utc-upload-date` by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/arte] Bug fix by [cgrigis](https://github.com/cgrigis)
+* [extractor/bilibili] Extract `flac` with premium account by [jackyyf](https://github.com/jackyyf)
+* [extractor/BiliBiliSearch] Don't sort by date
+* [extractor/BiliBiliSearch] Fix infinite loop
+* [extractor/bitchute] Mark errors as expected
+* [extractor/crunchyroll:beta] Use anonymous access by [tejing1](https://github.com/tejing1)
+* [extractor/huya] Fix stream extraction by [ohaiibuzzle](https://github.com/ohaiibuzzle)
+* [extractor/medaltv] Fix extraction by [xenova](https://github.com/xenova)
+* [extractor/mediaset] Fix embed extraction
+* [extractor/mixcloud] All formats are audio-only
+* [extractor/rtbf] Fix jwt extraction by [elyse0](https://github.com/elyse0)
+* [extractor/screencastomatic] Support `--video-password` by [shreyasminocha](https://github.com/shreyasminocha)
+* [extractor/stripchat] Don't modify input URL by [dfaker](https://github.com/dfaker)
+* [extractor/uktv] Improve `_VALID_URL` by [dirkf](https://github.com/dirkf)
+* [extractor/vimeo:user] Fix `_VALID_URL`
+
+
+### 2022.08.19
+
+* Fix bug in `--download-archive`
+* [jsinterp] **Fix for new youtube players** and related improvements by [dirkf](https://github.com/dirkf), [pukkandan](https://github.com/pukkandan)
+* [phantomjs] Add function to execute JS without a DOM by [MinePlayersPE](https://github.com/MinePlayersPE), [pukkandan](https://github.com/pukkandan)
+* [build] Exclude devscripts from installs by [Lesmiscore](https://github.com/Lesmiscore)
+* [cleanup] Misc fixes and cleanup
+* [extractor/youtube] **Add fallback to phantomjs** for nsig
+* [extractor/youtube] Fix error reporting of "Incomplete data"
+* [extractor/youtube] Improve format sorting for IOS formats
+* [extractor/youtube] Improve signature caching
+* [extractor/instagram] Fix extraction by [bashonly](https://github.com/bashonly), [pritam20ps05](https://github.com/pritam20ps05)
+* [extractor/rai] Minor fix by [nixxo](https://github.com/nixxo)
+* [extractor/rtbf] Fix stream extractor by [elyse0](https://github.com/elyse0)
+* [extractor/SovietsCloset] Fix extractor by [ChillingPepper](https://github.com/ChillingPepper)
+* [extractor/zattoo] Fix Zattoo resellers by [goggle](https://github.com/goggle)
+
+### 2022.08.14
+
+* Merge youtube-dl: Upto [commit/d231b56](https://github.com/ytdl-org/youtube-dl/commit/d231b56)
+* [jsinterp] Handle **new youtube signature functions**
+* [jsinterp] Truncate error messages
+* [extractor] Fix format sorting of `channels`
+* [ffmpeg] Disable avconv unless `--prefer-avconv`
+* [ffmpeg] Smarter detection of ffprobe filename
+* [embedthumbnail] Detect `libatomicparsley.so`
+* [ThumbnailsConvertor] Fix conversion after `fixup_webp`
+* [utils] Fix `get_compatible_ext`
+* [build] Fix changelog
+* [update] Set executable bit-mask by [pukkandan](https://github.com/pukkandan), [Lesmiscore](https://github.com/Lesmiscore)
+* [devscripts] Fix import
+* [docs] Consistent use of `e.g.` by [Lesmiscore](https://github.com/Lesmiscore)
+* [cleanup] Misc fixes and cleanup
+* [extractor/moview] Add extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/parler] Add extractor by [palewire](https://github.com/palewire)
+* [extractor/patreon] Ignore erroneous media attachments by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/truth] Add extractor by [palewire](https://github.com/palewire)
+* [extractor/aenetworks] Add formats parameter by [jacobtruman](https://github.com/jacobtruman)
+* [extractor/crunchyroll] Improve `_VALID_URL`s
+* [extractor/doodstream] Add `wf` domain by [aldoridhoni](https://github.com/aldoridhoni)
+* [extractor/facebook] Add reel support by [bashonly](https://github.com/bashonly)
+* [extractor/MLB] New extractor by [ischmidt20](https://github.com/ischmidt20)
+* [extractor/rai] Misc fixes by [nixxo](https://github.com/nixxo)
+* [extractor/toggo] Improve `_VALID_URL` by [masta79](https://github.com/masta79)
+* [extractor/tubitv] Extract additional formats by [shirt-dev](https://github.com/shirt-dev)
+* [extractor/zattoo] Potential fix for resellers
+
+
+### 2022.08.08
+
+* **Remove Python 3.6 support**
+* Determine merge container better by [pukkandan](https://github.com/pukkandan), [selfisekai](https://github.com/selfisekai)
+* Framework for embed detection by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
+* Merge youtube-dl: Upto [commit/adb5294](https://github.com/ytdl-org/youtube-dl/commit/adb5294)
+* `--compat-option no-live-chat` should disable danmaku
+* Fix misleading DRM message
+* Import ctypes only when necessary
+* Minor bugfixes
+* Reject entire playlists faster with `--match-filter`
+* Remove filtered entries from `-J`
+* Standardize retry mechanism
+* Validate `--merge-output-format`
+* [downloader] Add average speed to final progress line
+* [extractor] Add field `audio_channels`
+* [extractor] Support multiple archive ids for one video
+* [ffmpeg] Set `ffmpeg_location` in a contextvar
+* [FFmpegThumbnailsConvertor] Fix conversion from GIF
+* [MetadataParser] Don't set `None` when the field didn't match
+* [outtmpl] Smarter replacing of unsupported characters
+* [outtmpl] Treat empty values as None in filenames
+* [utils] sanitize_open: Allow any IO stream as stdout
+* [build, devscripts] Add devscript to set a build variant
+* [build] Improve build process by [shirt-dev](https://github.com/shirt-dev)
+* [build] Update pyinstaller
+* [devscripts] Create `utils` and refactor
+* [docs] Clarify `best*`
+* [docs] Fix bug report issue template
+* [docs] Fix capitalization in references by [christoph-heinrich](https://github.com/christoph-heinrich)
+* [cleanup, mhtml] Use imghdr
+* [cleanup, utils] Consolidate known media extensions
+* [cleanup] Misc fixes and cleanup
+* [extractor/angel] Add extractor by [AxiosDeminence](https://github.com/AxiosDeminence)
+* [extractor/dplay] Add MotorTrend extractor by [Sipherdrakon](https://github.com/Sipherdrakon)
+* [extractor/harpodeon] Add extractor by [eren-kemer](https://github.com/eren-kemer)
+* [extractor/holodex] Add extractor by [pukkandan](https://github.com/pukkandan), [sqrtNOT](https://github.com/sqrtNOT)
+* [extractor/kompas] Add extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/rai] Add raisudtirol extractor by [nixxo](https://github.com/nixxo)
+* [extractor/tempo] Add extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/youtube] **Fixes for third party client detection** by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/youtube] Add `live_status=post_live` by [lazypete365](https://github.com/lazypete365)
+* [extractor/youtube] Extract more format info
+* [extractor/youtube] Parse translated subtitles only when requested
+* [extractor/youtube, extractor/twitch] Allow waiting for channels to become live
+* [extractor/youtube, webvtt] Extract auto-subs from livestream VODs by [fstirlitz](https://github.com/fstirlitz), [pukkandan](https://github.com/pukkandan)
+* [extractor/AbemaTVTitle] Implement paging by [Lesmiscore](https://github.com/Lesmiscore)
+* [extractor/archiveorg] Improve handling of formats by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
+* [extractor/arte] Fix title extraction
+* [extractor/arte] **Move to v2 API** by [fstirlitz](https://github.com/fstirlitz), [pukkandan](https://github.com/pukkandan)
+* [extractor/bbc] Fix news articles by [ajj8](https://github.com/ajj8)
+* [extractor/camtasia] Separate into own extractor by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/cloudflarestream] Fix video_id padding by [haobinliang](https://github.com/haobinliang)
+* [extractor/crunchyroll] Fix conversion of thumbnail from GIF
+* [extractor/crunchyroll] Handle missing metadata correctly by [Burve](https://github.com/Burve), [pukkandan](https://github.com/pukkandan)
+* [extractor/crunchyroll:beta] Extract timestamp and fix tests by [tejing1](https://github.com/tejing1)
+* [extractor/crunchyroll:beta] Use streams API by [tejing1](https://github.com/tejing1)
+* [extractor/doodstream] Support more domains by [Galiley](https://github.com/Galiley)
+* [extractor/ESPN] Extract duration by [ischmidt20](https://github.com/ischmidt20)
+* [extractor/FIFA] Change API endpoint by [Bricio](https://github.com/Bricio), [yashkc2025](https://github.com/yashkc2025)
+* [extractor/globo:article] Remove false positives by [Bricio](https://github.com/Bricio)
+* [extractor/Go] Extract timestamp by [ischmidt20](https://github.com/ischmidt20)
+* [extractor/hidive] Fix cookie login when netrc is also given by [winterbird-code](https://github.com/winterbird-code)
+* [extractor/html5] Separate into own extractor by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
+* [extractor/ina] Improve extractor by [elyse0](https://github.com/elyse0)
+* [extractor/NaverNow] Change endpoint by [ping](https://github.com/ping)
+* [extractor/ninegag] Extract uploader by [DjesonPV](https://github.com/DjesonPV)
+* [extractor/NovaPlay] Fix extractor by [Bojidarist](https://github.com/Bojidarist)
+* [extractor/orf:radio] Rewrite extractors
+* [extractor/patreon] Fix and improve extractors by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
+* [extractor/rai] Fix RaiNews extraction by [nixxo](https://github.com/nixxo)
+* [extractor/redbee] Unify and update extractors by [elyse0](https://github.com/elyse0)
+* [extractor/stripchat] Fix _VALID_URL by [freezboltz](https://github.com/freezboltz)
+* [extractor/tubi] Exclude playlists from playlist entries by [sqrtNOT](https://github.com/sqrtNOT)
+* [extractor/tviplayer] Improve `_VALID_URL` by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/twitch] Extract chapters for single chapter VODs by [mpeter50](https://github.com/mpeter50)
+* [extractor/vgtv] Support tv.vg.no by [sqrtNOT](https://github.com/sqrtNOT)
+* [extractor/vidio] Support embed link by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/vk] Fix extractor by [Mehavoid](https://github.com/Mehavoid)
+* [extractor/WASDTV:record] Fix `_VALID_URL`
+* [extractor/xfileshare] Add Referer by [Galiley](https://github.com/Galiley)
+* [extractor/YahooJapanNews] Fix extractor by [Lesmiscore](https://github.com/Lesmiscore)
+* [extractor/yandexmusic] Extract higher quality format
+* [extractor/zee5] Update Device ID by [m4tu4g](https://github.com/m4tu4g)
+
+
+### 2022.07.18
+
+* Allow users to specify encoding in each config files by [Lesmiscore](https://github.com/Lesmiscore)
+* Discard infodict from memory if no longer needed
+* Do not allow extractors to return `None`
+* Do not load system certificates when `certifi` is used
+* Fix rounding of integers in format table
+* Improve chapter sanitization
+* Skip some fixup if remux/recode is needed by [Lesmiscore](https://github.com/Lesmiscore)
+* Support `--no-progress` for `--wait-for-video`
+* Fix bug in [612f2be](https://github.com/yt-dlp/yt-dlp/commit/612f2be5d3924540158dfbe5f25d841f04cff8c6)
+* [outtmpl] Add alternate form `h` for HTML escaping
+* [aes] Add multiple padding modes in CBC by [elyse0](https://github.com/elyse0)
+* [extractor/common] Passthrough `errnote=False` to parsers
+* [extractor/generic] Remove HEAD request
+* [http] Ensure the file handle is always closed
+* [ModifyChapters] Modify duration in infodict
+* [options] Fix aliases to `--config-location`
+* [utils] Fix `get_domain`
+* [build] Consistent order for lazy extractors by [lamby](https://github.com/lamby)
+* [build] Fix architecture suffix of executables by [odo2063](https://github.com/odo2063)
+* [build] Improve `setup.py`
+* [update] Do not check `_update_spec` when up to date
+* [update] Prepare to remove Python 3.6 support
+* [compat] Let PyInstaller detect _legacy module
+* [devscripts/update-formulae] Do not change dependency section
+* [test] Split download tests so they can be more easily run in CI
+* [docs] Improve docstring of `download_ranges` by [FirefoxMetzger](https://github.com/FirefoxMetzger)
+* [docs] Improve issue templates
+* [build] Fix bug in [6d916fe](https://github.com/yt-dlp/yt-dlp/commit/6d916fe709a38e8c4c69b73843acf170b5165931)
+* [cleanup, utils] Refactor parse_codecs
+* [cleanup] Misc fixes and cleanup
+* [extractor/acfun] Add extractors by [lockmatrix](https://github.com/lockmatrix)
+* [extractor/Audiodraft] Add extractors by [Ashish0804](https://github.com/Ashish0804), [fstirlitz](https://github.com/fstirlitz)
+* [extractor/cellebrite] Add extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/detik] Add extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/hytale] Add extractor by [llamasblade](https://github.com/llamasblade), [pukkandan](https://github.com/pukkandan)
+* [extractor/liputan6] Add extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/mocha] Add extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/rtl.lu] Add extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/rtvsl] Add extractor by [iw0nderhow](https://github.com/iw0nderhow), [pukkandan](https://github.com/pukkandan)
+* [extractor/StarTrek] Add extractor by [scy](https://github.com/scy)
+* [extractor/syvdk] Add extractor by [misaelaguayo](https://github.com/misaelaguayo)
+* [extractor/theholetv] Add extractor by [dosy4ev](https://github.com/dosy4ev)
+* [extractor/TubeTuGraz] Add extractor by [Ferdi265](https://github.com/Ferdi265), [pukkandan](https://github.com/pukkandan)
+* [extractor/tviplayer] Add extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/wetv] Add extractors by [elyse0](https://github.com/elyse0)
+* [extractor/wikimedia] Add extractor by [EhtishamSabir](https://github.com/EhtishamSabir), [pukkandan](https://github.com/pukkandan)
+* [extractor/youtube] Fix duration check for post-live manifestless mode
+* [extractor/youtube] More metadata for storyboards by [ftk](https://github.com/ftk)
+* [extractor/bigo] Fix extractor by [Lesmiscore](https://github.com/Lesmiscore)
+* [extractor/BiliIntl] Fix subtitle extraction by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [extractor/crunchyroll] Improve `_VALID_URL`
+* [extractor/fifa] Fix extractor by [ischmidt20](https://github.com/ischmidt20)
+* [extractor/instagram] Fix post/story extractors by [pritam20ps05](https://github.com/pritam20ps05), [pukkandan](https://github.com/pukkandan)
+* [extractor/iq] Set language correctly for Korean subtitles
+* [extractor/MangoTV] Fix subtitle languages
+* [extractor/Netverse] Improve playlist extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/philharmoniedeparis] Fix extractor by [sqrtNOT](https://github.com/sqrtNOT)
+* [extractor/Trovo] Fix extractor by [u-spec-png](https://github.com/u-spec-png)
+* [extractor/twitch] Support storyboards for VODs by [ftk](https://github.com/ftk)
+* [extractor/WatchESPN] Improve `_VALID_URL` by [IONECarter](https://github.com/IONECarter), [dirkf](https://github.com/dirkf)
+* [extractor/WSJArticle] Fix video id extraction by [sqrtNOT](https://github.com/sqrtNOT)
+* [extractor/Ximalaya] Fix extractors by [lockmatrix](https://github.com/lockmatrix)
+* [cleanup, extractor/youtube] Fix tests by [sheerluck](https://github.com/sheerluck)
+
+
+### 2022.06.29
+
+* Fix `--downloader native`
+* Fix `section_end` of clips
+* Fix playlist error handling
+* Sanitize `chapters`
+* [extractor] Fix `_create_request` when headers is None
+* [extractor] Fix empty `BaseURL` in MPD
+* [ffmpeg] Write full output to debug on error
+* [hls] Warn user when trying to download live HLS
+* [options] Fix `parse_known_args` for `--`
+* [utils] Fix inconsistent default handling between HTTP and HTTPS requests by [coletdjnz](https://github.com/coletdjnz)
+* [build] Draft release until complete
+* [build] Fix release tag commit
+* [build] Standalone x64 builds for MacOS 10.9 by [StefanLobbenmeier](https://github.com/StefanLobbenmeier)
+* [update] Ability to set a maximum version for specific variants
+* [compat] Fix `compat.WINDOWS_VT_MODE`
+* [compat] Remove deprecated functions from core code
+* [compat] Remove more functions
+* [cleanup, extractor] Reduce direct use of `_downloader`
+* [cleanup] Consistent style for file heads
+* [cleanup] Fix some typos by [crazymoose77756](https://github.com/crazymoose77756)
+* [cleanup] Misc fixes and cleanup
+* [extractor/Scrolller] Add extractor by [LunarFang416](https://github.com/LunarFang416)
+* [extractor/ViMP] Add playlist extractor by [FestplattenSchnitzel](https://github.com/FestplattenSchnitzel)
+* [extractor/fuyin] Add extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/livestreamfails] Add extractor by [nomevi](https://github.com/nomevi)
+* [extractor/premiershiprugby] Add extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/steam] Add broadcast extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/youtube] Mark videos as fully watched by [Brett824](https://github.com/Brett824)
+* [extractor/CWTV] Extract thumbnail by [ischmidt20](https://github.com/ischmidt20)
+* [extractor/ViMP] Add thumbnail and support more sites by [FestplattenSchnitzel](https://github.com/FestplattenSchnitzel)
+* [extractor/dropout] Support cookies and login only as needed by [pingiun](https://github.com/pingiun), [pukkandan](https://github.com/pukkandan)
+* [extractor/ertflix] Improve `_VALID_URL`
+* [extractor/lbry] Use HEAD request for redirect URL by [flashdagger](https://github.com/flashdagger)
+* [extractor/mediaset] Improve `_VALID_URL`
+* [extractor/npr] Implement [e50c350](https://github.com/yt-dlp/yt-dlp/commit/e50c3500b43d80e4492569c4b4523c4379c6fbb2) differently
+* [extractor/tennistv] Rewrite extractor by [pukkandan](https://github.com/pukkandan), [zenerdi0de](https://github.com/zenerdi0de)
+
+### 2022.06.22.1
+
+* [build] Fix updating homebrew formula
+
+### 2022.06.22
+
+* [**Deprecate support for Python 3.6**](https://github.com/yt-dlp/yt-dlp/issues/3764#issuecomment-1154051119)
+* **Add option `--download-sections` to download video partially**
+ * Chapter regex and time ranges are accepted, e.g. `--download-sections *1:10-2:20`
+* Add option `--alias`
+* Add option `--lazy-playlist` to process entries as they are received
+* Add option `--retry-sleep`
+* Add slicing notation to `--playlist-items`
+ * Adds support for negative indices and step
+ * Add `-I` as alias for `--playlist-index`
+ * Makes `--playlist-start`, `--playlist-end`, `--playlist-reverse`, `--no-playlist-reverse` redundant
+* `--config-location -` to provide options interactively
+* [build] Add Linux standalone builds
+* [update] Self-restart after update
+* Merge youtube-dl: Upto [commit/8a158a9](https://github.com/ytdl-org/youtube-dl/commit/8a158a9)
+* Add `--no-update`
+* Allow extractors to specify section_start/end for clips
+* Do not print progress to `stderr` with `-q`
+* Ensure pre-processor errors do not block video download
+* Fix `--simulate --max-downloads`
+* Improve error handling of bad config files
+* Return an error code if update fails
+* Fix bug in [3a408f9](https://github.com/yt-dlp/yt-dlp/commit/3a408f9d199127ca2626359e21a866a09ab236b3)
+* [ExtractAudio] Allow conditional conversion
+* [ModifyChapters] Fix repeated removal of small segments
+* [ThumbnailsConvertor] Allow conditional conversion
+* [cookies] Detect profiles for cygwin/BSD by [moench-tegeder](https://github.com/moench-tegeder)
+* [dash] Show fragment count with `--live-from-start` by [flashdagger](https://github.com/flashdagger)
+* [extractor] Add `_search_json` by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
+* [extractor] Add `default` parameter to `_search_json` by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
+* [extractor] Add dev option `--load-pages`
+* [extractor] Handle `json_ld` with multiple `@type`s
+* [extractor] Import `_ALL_CLASSES` lazily
+* [extractor] Recognize `src` attribute from HTML5 media elements by [Lesmiscore](https://github.com/Lesmiscore)
+* [extractor/generic] Revert e6ae51c123897927eb3c9899923d8ffd31c7f85d
+* [f4m] Bugfix
+* [ffmpeg] Check version lazily
+* [jsinterp] Some optimizations and refactoring by [dirkf](https://github.com/dirkf), [pukkandan](https://github.com/pukkandan)
+* [utils] Improve performance using `functools.cache`
+* [utils] Send HTTP/1.1 ALPN extension by [coletdjnz](https://github.com/coletdjnz)
+* [utils] `ExtractorError`: Fix `exc_info`
+* [utils] `ISO3166Utils`: Add `EU` and `AP`
+* [utils] `Popen`: Refactor to use contextmanager
+* [utils] `locked_file`: Fix for PyPy on Windows
+* [update] Expose more functionality to API
+* [update] Use `.git` folder to distinguish `source`/`unknown`
+* [compat] Add `functools.cached_property`
+* [test] Fix `FakeYDL` signatures by [coletdjnz](https://github.com/coletdjnz)
+* [docs] Improvements
+* [cleanup, ExtractAudio] Refactor
+* [cleanup, downloader] Refactor `report_progress`
+* [cleanup, extractor] Refactor `_download_...` methods
+* [cleanup, extractor] Rename `extractors.py` to `_extractors.py`
+* [cleanup, utils] Don't use kwargs for `format_field`
+* [cleanup, build] Refactor
+* [cleanup, docs] Re-indent "Usage and Options" section
+* [cleanup] Deprecate `YoutubeDL.parse_outtmpl`
+* [cleanup] Misc fixes and cleanup by [Lesmiscore](https://github.com/Lesmiscore), [MrRawes](https://github.com/MrRawes), [christoph-heinrich](https://github.com/christoph-heinrich), [flashdagger](https://github.com/flashdagger), [gamer191](https://github.com/gamer191), [kwconder](https://github.com/kwconder), [pukkandan](https://github.com/pukkandan)
+* [extractor/DailyWire] Add extractors by [HobbyistDev](https://github.com/HobbyistDev), [pukkandan](https://github.com/pukkandan)
+* [extractor/fourzerostudio] Add extractors by [Lesmiscore](https://github.com/Lesmiscore)
+* [extractor/GoogleDrive] Add folder extractor by [evansp](https://github.com/evansp), [pukkandan](https://github.com/pukkandan)
+* [extractor/MirrorCoUK] Add extractor by [LunarFang416](https://github.com/LunarFang416), [pukkandan](https://github.com/pukkandan)
+* [extractor/atscaleconfevent] Add extractor by [Ashish0804](https://github.com/Ashish0804)
+* [extractor/freetv] Add extractor by [elyse0](https://github.com/elyse0)
+* [extractor/ixigua] Add Extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/kicker.de] Add extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/netverse] Add extractors by [HobbyistDev](https://github.com/HobbyistDev), [pukkandan](https://github.com/pukkandan)
+* [extractor/playsuisse] Add extractor by [pukkandan](https://github.com/pukkandan), [sbor23](https://github.com/sbor23)
+* [extractor/substack] Add extractor by [elyse0](https://github.com/elyse0)
+* [extractor/youtube] **Support downloading clips**
+* [extractor/youtube] Add `innertube_host` and `innertube_key` extractor args by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/youtube] Add warning for PostLiveDvr
+* [extractor/youtube] Bring back `_extract_chapters_from_description`
+* [extractor/youtube] Extract `comment_count` from webpage
+* [extractor/youtube] Fix `:ytnotifications` extractor by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/youtube] Fix initial player response extraction by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
+* [extractor/youtube] Fix live chat for videos with content warning by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/youtube] Make signature extraction non-fatal
+* [extractor/youtube:tab] Detect `videoRenderer` in `_post_thread_continuation_entries`
+* [extractor/BiliIntl] Fix metadata extraction
+* [extractor/BiliIntl] Fix subtitle extraction by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/FranceCulture] Fix extractor by [aurelg](https://github.com/aurelg), [pukkandan](https://github.com/pukkandan)
+* [extractor/PokemonSoundLibrary] Remove extractor by [Lesmiscore](https://github.com/Lesmiscore)
+* [extractor/StreamCZ] Fix extractor by [adamanldo](https://github.com/adamanldo), [dirkf](https://github.com/dirkf)
+* [extractor/WatchESPN] Support free videos and BAM_DTC by [ischmidt20](https://github.com/ischmidt20)
+* [extractor/animelab] Remove extractor by [gamer191](https://github.com/gamer191)
+* [extractor/bloomberg] Change playback endpoint by [m4tu4g](https://github.com/m4tu4g)
+* [extractor/ccc] Extract view_count by [vkorablin](https://github.com/vkorablin)
+* [extractor/crunchyroll:beta] Fix extractor after API change by [Burve](https://github.com/Burve), [tejing1](https://github.com/tejing1)
+* [extractor/curiositystream] Get `auth_token` from cookie by [mnn](https://github.com/mnn)
+* [extractor/digitalconcerthall] Fix extractor by [ZhymabekRoman](https://github.com/ZhymabekRoman)
+* [extractor/dropbox] Extract the correct `mountComponent`
+* [extractor/dropout] Login is not mandatory
+* [extractor/duboku] Fix for hostname change by [mozbugbox](https://github.com/mozbugbox)
+* [extractor/espn] Add `WatchESPN` extractor by [ischmidt20](https://github.com/ischmidt20), [pukkandan](https://github.com/pukkandan)
+* [extractor/expressen] Fix extractor by [aejdl](https://github.com/aejdl)
+* [extractor/foxnews] Update embed extraction by [elyse0](https://github.com/elyse0)
+* [extractor/ina] Fix extractor by [elyse0](https://github.com/elyse0)
+* [extractor/iwara:user] Make paging better by [Lesmiscore](https://github.com/Lesmiscore)
+* [extractor/jwplatform] Look for `data-video-jw-id`
+* [extractor/lbry] Update livestream API by [flashdagger](https://github.com/flashdagger)
+* [extractor/mediaset] Improve `_VALID_URL`
+* [extractor/naver] Add `navernow` extractor by [ping](https://github.com/ping)
+* [extractor/niconico:series] Fix extractor by [sqrtNOT](https://github.com/sqrtNOT)
+* [extractor/npr] Use stream url from json-ld by [r5d](https://github.com/r5d)
+* [extractor/pornhub] Extract `uploader_id` field by [Lesmiscore](https://github.com/Lesmiscore)
+* [extractor/radiofrance] Add more radios by [bubbleguuum](https://github.com/bubbleguuum)
+* [extractor/rumble] Detect JS embed
+* [extractor/rumble] Extract subtitles by [fstirlitz](https://github.com/fstirlitz)
+* [extractor/southpark] Add `southpark.lat` extractor by [darkxex](https://github.com/darkxex)
+* [extractor/spotify:show] Fix extractor
+* [extractor/tiktok] Detect embeds
+* [extractor/tiktok] Extract `SIGI_STATE` by [dirkf](https://github.com/dirkf), [pukkandan](https://github.com/pukkandan), [sulyi](https://github.com/sulyi)
+* [extractor/tver] Fix extractor by [Lesmiscore](https://github.com/Lesmiscore)
+* [extractor/vevo] Fix extractor by [Lesmiscore](https://github.com/Lesmiscore)
+* [extractor/yahoo:gyao] Fix extractor
+* [extractor/zattoo] Fix live streams by [miseran](https://github.com/miseran)
+* [extractor/zdf] Improve format sorting by [elyse0](https://github.com/elyse0)
+
+
+### 2022.05.18
+
+* Add support for SSL client certificate authentication by [coletdjnz](https://github.com/coletdjnz), [dirkf](https://github.com/dirkf)
+ * Adds `--client-certificate`, `--client-certificate-key`, `--client-certificate-password`
+* Add `--match-filter -` to interactively ask for each video
+* `--max-downloads` should obey `--break-per-input`
+* Allow use of weaker ciphers with `--legacy-server-connect`
+* Don't imply `-s` for later stages of `-O`
+* Fix `--date today`
+* Fix `--skip-unavailable-fragments`
+* Fix color in `-q -F`
+* Fix redirect HTTP method handling by [coletdjnz](https://github.com/coletdjnz)
+* Improve `--clean-infojson`
+* Remove warning for videos with an empty title
+* Run `FFmpegFixupM3u8PP` for live-streams if needed
+* Show name of downloader in verbose log
+* [cookies] Allow `cookiefile` to be a text stream
+* [cookies] Report progress when importing cookies
+* [downloader/ffmpeg] Specify headers for each URL by [elyse0](https://github.com/elyse0)
+* [fragment] Do not change chunk-size when `--test`
+* [fragment] Make single thread download work for `--live-from-start` by [Lesmiscore](https://github.com/Lesmiscore)
+* [hls] Fix `byte_range` for `EXT-X-MAP` fragment by [fstirlitz](https://github.com/fstirlitz)
+* [http] Fix retrying on read timeout by [coletdjnz](https://github.com/coletdjnz)
+* [ffmpeg] Fix features detection
+* [EmbedSubtitle] Enable for more video extensions
+* [EmbedThumbnail] Disable thumbnail conversion for mkv by [evansp](https://github.com/evansp)
+* [EmbedThumbnail] Do not obey `-k`
+* [EmbedThumbnail] Do not remove id3v1 tags
+* [FFmpegMetadata] Remove `\0` from metadata
+* [FFmpegMetadata] Remove filename from attached info-json
+* [FixupM3u8] Obey `--hls-prefer-mpegts`
+* [Sponsorblock] Don't crash when duration is unknown
+* [XAttrMetadata] Refactor and document dependencies
+* [extractor] Document netrc machines
+* [extractor] Update `manifest_url`s after redirect by [elyse0](https://github.com/elyse0)
+* [extractor] Update dash `manifest_url` after redirects by [elyse0](https://github.com/elyse0)
+* [extractor] Use `classmethod`/`property` where possible
+* [generic] Refactor `_extract_rss`
+* [utils] `is_html`: Handle double BOM
+* [utils] `locked_file`: Ignore illegal seek on `truncate` by [jakeogh](https://github.com/jakeogh)
+* [utils] `sanitize_path`: Fix when path is empty string
+* [utils] `write_string`: Workaround newline issue in `conhost`
+* [utils] `certifi`: Make sure the pem file exists
+* [utils] Fix `WebSocketsWrapper`
+* [utils] `locked_file`: Do not give executable bits for newly created files by [Lesmiscore](https://github.com/Lesmiscore)
+* [utils] `YoutubeDLCookieJar`: Detect and reject JSON file by [Lesmiscore](https://github.com/Lesmiscore)
+* [test] Convert warnings into errors and fix some existing warnings by [fstirlitz](https://github.com/fstirlitz)
+* [dependencies] Create module with all dependency imports
+* [compat] Split into sub-modules by [fstirlitz](https://github.com/fstirlitz), [pukkandan](https://github.com/pukkandan)
+* [compat] Implement `compat.imghdr`
+* [build] Add `make uninstall` by [MrRawes](https://github.com/MrRawes)
+* [build] Avoid use of `install -D`
+* [build] Fix `Makefile` by [putnam](https://github.com/putnam)
+* [build] Fix `--onedir` on macOS
+* [build] Add more test-runners
+* [cleanup] Deprecate some compat vars by [fstirlitz](https://github.com/fstirlitz), [pukkandan](https://github.com/pukkandan)
+* [cleanup] Remove unused code paths, extractors, scripts and tests by [fstirlitz](https://github.com/fstirlitz)
+* [cleanup] Upgrade syntax (`pyupgrade`) and sort imports (`isort`)
+* [cleanup, docs, build] Misc fixes
+* [BilibiliLive] Add extractor by [HE7086](https://github.com/HE7086), [pukkandan](https://github.com/pukkandan)
+* [Fifa] Add Extractor by [Bricio](https://github.com/Bricio)
+* [goodgame] Add extractor by [nevack](https://github.com/nevack)
+* [gronkh] Add playlist extractors by [hatienl0i261299](https://github.com/hatienl0i261299)
+* [icareus] Add extractor by [tpikonen](https://github.com/tpikonen), [pukkandan](https://github.com/pukkandan)
+* [iwara] Add playlist extractors by [i6t](https://github.com/i6t)
+* [Likee] Add extractor by [hatienl0i261299](https://github.com/hatienl0i261299)
+* [masters] Add extractor by [m4tu4g](https://github.com/m4tu4g)
+* [nebula] Add support for subscriptions by [hheimbuerger](https://github.com/hheimbuerger)
+* [Podchaser] Add extractors by [connercsbn](https://github.com/connercsbn)
+* [rokfin:search] Add extractor by [P-reducible](https://github.com/P-reducible), [pukkandan](https://github.com/pukkandan)
+* [youtube] Add `:ytnotifications` extractor by [krichbanana](https://github.com/krichbanana)
+* [youtube] Add YoutubeStoriesIE (`ytstories:<channel UCID>`) by [coletdjnz](https://github.com/coletdjnz)
+* [ZingMp3] Add chart and user extractors by [hatienl0i261299](https://github.com/hatienl0i261299)
+* [adn] Update AES key by [elyse0](https://github.com/elyse0)
+* [adobepass] Allow cookies for authenticating MSO
+* [bandcamp] Exclude merch links by [Yipten](https://github.com/Yipten)
+* [chingari] Fix archiving and tests
+* [DRTV] Improve `_VALID_URL` by [vertan](https://github.com/vertan)
+* [facebook] Improve thumbnail extraction by [Wikidepia](https://github.com/Wikidepia)
+* [fc2] Stop heatbeating once FFmpeg finishes by [Lesmiscore](https://github.com/Lesmiscore)
+* [Gofile] Fix extraction and support password-protected links by [mehq](https://github.com/mehq)
+* [hotstar, cleanup] Refactor extractors
+* [InfoQ] Don't fail on missing audio format by [evansp](https://github.com/evansp)
+* [Jamendo] Extract more metadata by [evansp](https://github.com/evansp)
+* [kaltura] Update API calls by [flashdagger](https://github.com/flashdagger)
+* [KhanAcademy] Fix extractor by [rand-net](https://github.com/rand-net)
+* [LCI] Fix extractor by [MarwenDallel](https://github.com/MarwenDallel)
+* [lrt] Support livestreams by [GiedriusS](https://github.com/GiedriusS)
+* [niconico] Set `expected_protocol` to a public field
+* [Niconico] Support 2FA by [ekangmonyet](https://github.com/ekangmonyet)
+* [Olympics] Fix format extension
+* [openrec:movie] Enable fallback for /movie/ URLs
+* [PearVideo] Add fallback for formats by [hatienl0i261299](https://github.com/hatienl0i261299)
+* [radiko] Fix extractor by [Lesmiscore](https://github.com/Lesmiscore)
+* [rai] Add `release_year`
+* [reddit] Prevent infinite loop
+* [rokfin] Implement login by [P-reducible](https://github.com/P-reducible), [pukkandan](https://github.com/pukkandan)
+* [ruutu] Support hs.fi embeds by [tpikonen](https://github.com/tpikonen), [pukkandan](https://github.com/pukkandan)
+* [spotify] Detect iframe embeds by [fstirlitz](https://github.com/fstirlitz)
+* [telegram] Fix metadata extraction
+* [tmz, cleanup] Update tests by [diegorodriguezv](https://github.com/diegorodriguezv)
+* [toggo] Fix `_VALID_URL` by [ca-za](https://github.com/ca-za)
+* [trovo] Update to new API by [nyuszika7h](https://github.com/nyuszika7h)
+* [TVer] Improve extraction by [Lesmiscore](https://github.com/Lesmiscore)
+* [twitcasting] Pass headers for each formats by [Lesmiscore](https://github.com/Lesmiscore)
+* [VideocampusSachsen] Improve extractor by [FestplattenSchnitzel](https://github.com/FestplattenSchnitzel)
+* [vimeo] Fix extractors
+* [wat] Fix extraction of multi-language videos and subtitles by [elyse0](https://github.com/elyse0)
+* [wistia] Fix `_VALID_URL` by [dirkf](https://github.com/dirkf)
+* [youtube, cleanup] Minor refactoring by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
+* [youtube] Added piped instance urls by [JordanWeatherby](https://github.com/JordanWeatherby)
+* [youtube] Deprioritize auto-generated thumbnails
+* [youtube] Deprioritize format 22 (often damaged)
+* [youtube] Fix episode metadata extraction
+* [zee5] Fix extractor by [Ashish0804](https://github.com/Ashish0804)
+* [zingmp3, cleanup] Refactor extractors
+
+
+### 2022.04.08
+
+* Use certificates from `certifi` if installed by [coletdjnz](https://github.com/coletdjnz)
+* Treat multiple `--match-filters` as OR
+* File locking improvements:
+ * Do not lock downloading file on Windows
+ * Do not prevent download if locking is unsupported
+ * Do not truncate files before locking by [jakeogh](https://github.com/jakeogh), [pukkandan](https://github.com/pukkandan)
+ * Fix non-blocking non-exclusive lock
+* De-prioritize automatic-subtitles when no `--sub-lang` is given
+* Exit after `--dump-user-agent`
+* Fallback to video-only format when selecting by extension
+* Fix `--abort-on-error` for subtitles
+* Fix `--no-overwrite` for playlist infojson
+* Fix `--print` with `--ignore-no-formats` when url is `None` by [flashdagger](https://github.com/flashdagger)
+* Fix `--sleep-interval`
+* Fix `--throttled-rate`
+* Fix `autonumber`
+* Fix case of `http_headers`
+* Fix filepath sanitization in `--print-to-file`
+* Handle float in `--wait-for-video`
+* Ignore `mhtml` formats from `-f mergeall`
+* Ignore format-specific fields in initial pass of `--match-filter`
+* Protect stdout from unexpected progress and console-title
+* Remove `Accept-Encoding` header from `std_headers` by [coletdjnz](https://github.com/coletdjnz)
+* Remove incorrect warning for `--dateafter`
+* Show warning when all media formats have DRM
+* [downloader] Fix invocation of `HttpieFD`
+* [http] Fix #3215
+* [http] Reject broken range before request by [Lesmiscore](https://github.com/Lesmiscore), [Jules-A](https://github.com/Jules-A), [pukkandan](https://github.com/pukkandan)
+* [fragment] Read downloaded fragments only when needed by [Lesmiscore](https://github.com/Lesmiscore)
+* [http] Retry on more errors by [coletdjnz](https://github.com/coletdjnz)
+* [mhtml] Fix fragments with absolute urls by [coletdjnz](https://github.com/coletdjnz)
+* [extractor] Add `_perform_login` function
+* [extractor] Allow control characters inside json
+* [extractor] Support merging subtitles with data by [coletdjnz](https://github.com/coletdjnz)
+* [generic] Extract subtitles from video.js by [Lesmiscore](https://github.com/Lesmiscore)
+* [ffmpeg] Cache version data
+* [FFmpegConcat] Ensure final directory exists
+* [FfmpegMetadata] Write id3v1 tags
+* [FFmpegVideoConvertor] Add more formats to `--remux-video`
+* [FFmpegVideoConvertor] Ensure all streams are copied
+* [MetadataParser] Validate outtmpl early
+* [outtmpl] Fix replacement/default when used with alternate
+* [outtmpl] Limit changes during sanitization
+* [phantomjs] Fix bug
+* [test] Add `test_locked_file`
+* [utils] `format_decimal_suffix`: Fix for very large numbers by [s0u1h](https://github.com/s0u1h)
+* [utils] `traverse_obj`: Allow filtering by value
+* [utils] Add `filter_dict`, `get_first`, `try_call`
+* [utils] ExtractorError: Fix for older Python versions
+* [utils] WebSocketsWrapper: Allow omitting `__enter__` invocation by [Lesmiscore](https://github.com/Lesmiscore)
+* [docs] Add an `.editorconfig` file by [fstirlitz](https://github.com/fstirlitz)
+* [docs] Clarify the exact `BSD` license of dependencies by [MrRawes](https://github.com/MrRawes)
+* [docs] Minor improvements by [pukkandan](https://github.com/pukkandan), [cffswb](https://github.com/cffswb), [danielyli](https://github.com/danielyli)
+* [docs] Remove readthedocs
+* [build] Add `requirements.txt` to pip distributions
+* [cleanup, postprocessor] Create `_download_json`
+* [cleanup, vimeo] Fix tests
+* [cleanup] Misc fixes and minor cleanup
+* [cleanup] Use `_html_extract_title`
+* [AfreecaTV] Add `AfreecaTVUserIE` by [hatienl0i261299](https://github.com/hatienl0i261299)
+* [arte] Add `format_note` to m3u8 formats
+* [azmedien] Add TVO Online to supported hosts by [1-Byte](https://github.com/1-Byte)
+* [BanBye] Add extractor by [mehq](https://github.com/mehq)
+* [bilibili] Fix extraction of title with quotes by [dzek69](https://github.com/dzek69)
+* [Craftsy] Add extractor by [Bricio](https://github.com/Bricio)
+* [Cybrary] Add extractor by [aaearon](https://github.com/aaearon)
+* [Huya] Add extractor by [hatienl0i261299](https://github.com/hatienl0i261299)
+* [ITProTV] Add extractor by [aaearon](https://github.com/aaearon)
+* [Jable] Add extractors by [mehq](https://github.com/mehq)
+* [LastFM] Add extractors by [mehq](https://github.com/mehq)
+* [Moviepilot] Add extractor by [panatexxa](https://github.com/panatexxa)
+* [panopto] Add extractors by [coletdjnz](https://github.com/coletdjnz), [kmark](https://github.com/kmark)
+* [PokemonSoundLibrary] Add extractor by [Lesmiscore](https://github.com/Lesmiscore)
+* [WasdTV] Add extractor by [un-def](https://github.com/un-def), [hatienl0i261299](https://github.com/hatienl0i261299)
+* [adobepass] Fix Suddenlink MSO by [CplPwnies](https://github.com/CplPwnies)
+* [afreecatv] Match new vod url by [wlritchi](https://github.com/wlritchi)
+* [AZMedien] Support `tv.telezueri.ch` by [goggle](https://github.com/goggle)
+* [BiliIntl] Support user-generated videos by [wlritchi](https://github.com/wlritchi)
+* [BRMediathek] Fix VALID_URL
+* [crunchyroll:playlist] Implement beta API by [tejing1](https://github.com/tejing1)
+* [crunchyroll] Fix inheritance
+* [daftsex] Fix extractor by [Soebb](https://github.com/Soebb)
+* [dailymotion] Support `geo.dailymotion.com` by [hatienl0i261299](https://github.com/hatienl0i261299)
+* [ellentube] Extract subtitles from manifest
+* [elonet] Rewrite extractor by [Fam0r](https://github.com/Fam0r), [pukkandan](https://github.com/pukkandan)
+* [fptplay] Fix metadata extraction by [hatienl0i261299](https://github.com/hatienl0i261299)
+* [FranceCulture] Support playlists by [bohwaz](https://github.com/bohwaz)
+* [go, viu] Extract subtitles from the m3u8 manifest by [fstirlitz](https://github.com/fstirlitz)
+* [Imdb] Improve extractor by [hatienl0i261299](https://github.com/hatienl0i261299)
+* [MangoTV] Improve extractor by [hatienl0i261299](https://github.com/hatienl0i261299)
+* [Nebula] Fix bug in 52efa4b31200119adaa8acf33e50b84fcb6948f0
+* [niconico] Fix extraction of thumbnails and uploader (#3266)
+* [niconico] Rewrite NiconicoIE by [Lesmiscore](https://github.com/Lesmiscore)
+* [nitter] Minor fixes and update instance list by [foghawk](https://github.com/foghawk)
+* [NRK] Extract timestamp by [hatienl0i261299](https://github.com/hatienl0i261299)
+* [openrec] Download archived livestreams by [Lesmiscore](https://github.com/Lesmiscore)
+* [openrec] Refactor extractors by [Lesmiscore](https://github.com/Lesmiscore)
+* [panopto] Improve subtitle extraction and support slides by [coletdjnz](https://github.com/coletdjnz)
+* [ParamountPlus, CBS] Change VALID_URL by [Sipherdrakon](https://github.com/Sipherdrakon)
+* [ParamountPlusSeries] Support multiple pages by [dodrian](https://github.com/dodrian)
+* [Piapro] Extract description with break lines by [Lesmiscore](https://github.com/Lesmiscore)
+* [rai] Fix extraction of http formas by [nixxo](https://github.com/nixxo)
+* [rumble] unescape title
+* [RUTV] Fix format sorting by [Lesmiscore](https://github.com/Lesmiscore)
+* [ruutu] Detect embeds by [tpikonen](https://github.com/tpikonen)
+* [tenplay] Improve extractor by [aarubui](https://github.com/aarubui)
+* [TikTok] Fix URLs with user id by [hatienl0i261299](https://github.com/hatienl0i261299)
+* [TikTokVM] Fix redirect to user URL
+* [TVer] Fix extractor by [Lesmiscore](https://github.com/Lesmiscore)
+* [TVer] Support landing page by [vvto33](https://github.com/vvto33)
+* [twitcasting] Don't return multi_video for archive with single hls manifest by [Lesmiscore](https://github.com/Lesmiscore)
+* [veo] Fix `_VALID_URL`
+* [Veo] Fix extractor by [i6t](https://github.com/i6t)
+* [viki] Don't attempt to modify URLs with signature by [nyuszika7h](https://github.com/nyuszika7h)
+* [viu] Fix bypass for preview by [zackmark29](https://github.com/zackmark29)
+* [viu] Fixed extractor by [zackmark29](https://github.com/zackmark29), [pukkandan](https://github.com/pukkandan)
+* [web.archive:youtube] Make CDX API requests non-fatal by [coletdjnz](https://github.com/coletdjnz)
+* [wget] Fix proxy by [kikuyan](https://github.com/kikuyan), [coletdjnz](https://github.com/coletdjnz)
+* [xnxx] Add `xnxx3.com` by [rozari0](https://github.com/rozari0)
+* [youtube] **Add new age-gate bypass** by [zerodytrash](https://github.com/zerodytrash), [pukkandan](https://github.com/pukkandan)
+* [youtube] Add extractor-arg to skip auto-translated subs
+* [youtube] Avoid false positives when detecting damaged formats
+* [youtube] Detect DRM better by [shirt](https://github.com/shirt-dev)
+* [youtube] Fix auto-translated automatic captions
+* [youtube] Fix pagination of `membership` tab
+* [youtube] Fix uploader for collaborative playlists by [coletdjnz](https://github.com/coletdjnz)
+* [youtube] Improve video upload date handling by [coletdjnz](https://github.com/coletdjnz)
+* [youtube:api] Prefer minified JSON response by [coletdjnz](https://github.com/coletdjnz)
+* [youtube:search] Support hashtag entries by [coletdjnz](https://github.com/coletdjnz)
+* [youtube:tab] Fix duration extraction for shorts by [coletdjnz](https://github.com/coletdjnz)
+* [youtube:tab] Minor improvements
+* [youtube:tab] Return shorts url if video is a short by [coletdjnz](https://github.com/coletdjnz)
+* [Zattoo] Fix extractors by [goggle](https://github.com/goggle)
+* [Zingmp3] Fix signature by [hatienl0i261299](https://github.com/hatienl0i261299)
+
+
+### 2022.03.08.1
+
+* [cleanup] Refactor `__init__.py`
+* [build] Fix bug
+
+### 2022.03.08
+
+* Merge youtube-dl: Upto [commit/6508688](https://github.com/ytdl-org/youtube-dl/commit/6508688e88c83bb811653083db9351702cd39a6a) (except NDR)
+* Add regex operator and quoting to format filters by [lukasfink1](https://github.com/lukasfink1)
+* Add brotli content-encoding support by [coletdjnz](https://github.com/coletdjnz)
+* Add pre-processor stage `after_filter`
+* Better error message when no `--live-from-start` format
+* Create necessary directories for `--print-to-file`
+* Fill more fields for playlists by [Lesmiscore](https://github.com/Lesmiscore)
+* Fix `-all` for `--sub-langs`
+* Fix doubling of `video_id` in `ExtractorError`
+* Fix for when stdout/stderr encoding is `None`
+* Handle negative duration from extractor
+* Implement `--add-header` without modifying `std_headers`
+* Obey `--abort-on-error` for "ffmpeg not installed"
+* Set `webpage_url_...` from `webpage_url` and not input URL
+* Tolerate failure to `--write-link` due to unknown URL
+* [aria2c] Add `--http-accept-gzip=true`
+* [build] Update pyinstaller to 4.10 by [shirt](https://github.com/shirt-dev)
+* [cookies] Update MacOS12 `Cookies.binarycookies` location by [mdpauley](https://github.com/mdpauley)
+* [devscripts] Improve `prepare_manpage`
+* [downloader] Do not use aria2c for non-native `m3u8`
+* [downloader] Obey `--file-access-retries` when deleting/renaming by [ehoogeveen-medweb](https://github.com/ehoogeveen-medweb)
+* [extractor] Allow `http_headers` to be specified for `thumbnails`
+* [extractor] Extract subtitles from manifests for vimeo, globo, kaltura, svt by [fstirlitz](https://github.com/fstirlitz)
+* [extractor] Fix for manifests without period duration by [dirkf](https://github.com/dirkf), [pukkandan](https://github.com/pukkandan)
+* [extractor] Support `--mark-watched` without `_NETRC_MACHINE` by [coletdjnz](https://github.com/coletdjnz)
+* [FFmpegConcat] Abort on `--simulate`
+* [FormatSort] Consider `acodec`=`ogg` as `vorbis`
+* [fragment] Fix bugs around resuming with Range by [Lesmiscore](https://github.com/Lesmiscore)
+* [fragment] Improve `--live-from-start` for YouTube livestreams by [Lesmiscore](https://github.com/Lesmiscore)
+* [generic] Pass referer to extracted formats
+* [generic] Set rss `guid` as video id by [Bricio](https://github.com/Bricio)
+* [options] Better ambiguous option resolution
+* [options] Rename `--clean-infojson` to `--clean-info-json`
+* [SponsorBlock] Fixes for highlight and "full video labels" by [nihil-admirari](https://github.com/nihil-admirari)
+* [Sponsorblock] minor fixes by [nihil-admirari](https://github.com/nihil-admirari)
+* [utils] Better traceback for `ExtractorError`
+* [utils] Fix file locking for AOSP by [jakeogh](https://github.com/jakeogh)
+* [utils] Improve file locking
+* [utils] OnDemandPagedList: Do not download pages after error
+* [utils] render_table: Fix character calculation for removing extra gap by [Lesmiscore](https://github.com/Lesmiscore)
+* [utils] Use `locked_file` for `sanitize_open` by [jakeogh](https://github.com/jakeogh)
+* [utils] Validate `DateRange` input
+* [utils] WebSockets wrapper for non-async functions by [Lesmiscore](https://github.com/Lesmiscore)
+* [cleanup] Don't pass protocol to `_extract_m3u8_formats` for live videos
+* [cleanup] Remove extractors for some dead websites by [marieell](https://github.com/marieell)
+* [cleanup, docs] Misc cleanup
+* [AbemaTV] Add extractors by [Lesmiscore](https://github.com/Lesmiscore)
+* [adobepass] Add Suddenlink MSO by [CplPwnies](https://github.com/CplPwnies)
+* [ant1newsgr] Add extractor by [zmousm](https://github.com/zmousm)
+* [bigo] Add extractor by [Lesmiscore](https://github.com/Lesmiscore)
+* [Caltrans] Add extractor by [Bricio](https://github.com/Bricio)
+* [daystar] Add extractor by [hatienl0i261299](https://github.com/hatienl0i261299)
+* [fc2:live] Add extractor by [Lesmiscore](https://github.com/Lesmiscore)
+* [fptplay] Add extractor by [hatienl0i261299](https://github.com/hatienl0i261299)
+* [murrtube] Add extractor by [cyberfox1691](https://github.com/cyberfox1691)
+* [nfb] Add extractor by [ofkz](https://github.com/ofkz)
+* [niconico] Add playlist extractors and refactor by [Lesmiscore](https://github.com/Lesmiscore)
+* [peekvids] Add extractor by [schn0sch](https://github.com/schn0sch)
+* [piapro] Add extractor by [pycabbage](https://github.com/pycabbage), [Lesmiscore](https://github.com/Lesmiscore)
+* [rokfin] Add extractor by [P-reducible](https://github.com/P-reducible), [pukkandan](https://github.com/pukkandan)
+* [rokfin] Add stack and channel extractors by [P-reducible](https://github.com/P-reducible), [pukkandan](https://github.com/pukkandan)
+* [ruv.is] Add extractor by [iw0nderhow](https://github.com/iw0nderhow)
+* [telegram] Add extractor by [hatienl0i261299](https://github.com/hatienl0i261299)
+* [VideocampusSachsen] Add extractors by [FestplattenSchnitzel](https://github.com/FestplattenSchnitzel)
+* [xinpianchang] Add extractor by [hatienl0i261299](https://github.com/hatienl0i261299)
+* [abc] Support 1080p by [Ronnnny](https://github.com/Ronnnny)
+* [afreecatv] Support password-protected livestreams by [wlritchi](https://github.com/wlritchi)
+* [ard] Fix valid URL
+* [ATVAt] Detect geo-restriction by [marieell](https://github.com/marieell)
+* [bandcamp] Detect acodec
+* [bandcamp] Fix user URLs by [lyz-code](https://github.com/lyz-code)
+* [bbc] Fix extraction of news articles by [ajj8](https://github.com/ajj8)
+* [beeg] Fix extractor by [Bricio](https://github.com/Bricio)
+* [bigo] Fix extractor to not to use `form_params`
+* [Bilibili] Pass referer for all formats by [blackgear](https://github.com/blackgear)
+* [Biqle] Fix extractor by [Bricio](https://github.com/Bricio)
+* [ccma] Fix timestamp parsing by [nyuszika7h](https://github.com/nyuszika7h)
+* [crunchyroll] Better error reporting on login failure by [tejing1](https://github.com/tejing1)
+* [cspan] Support of C-Span congress videos by [Grabien](https://github.com/Grabien)
+* [dropbox] fix regex by [zenerdi0de](https://github.com/zenerdi0de)
+* [fc2] Fix extraction by [Lesmiscore](https://github.com/Lesmiscore)
+* [fujitv] Extract resolution for free sources by [YuenSzeHong](https://github.com/YuenSzeHong)
+* [Gettr] Add `GettrStreamingIE` by [i6t](https://github.com/i6t)
+* [Gettr] Fix formats order by [i6t](https://github.com/i6t)
+* [Gettr] Improve extractor by [i6t](https://github.com/i6t)
+* [globo] Expand valid URL by [Bricio](https://github.com/Bricio)
+* [lbry] Fix `--ignore-no-formats-error`
+* [manyvids] Extract `uploader` by [regarten](https://github.com/regarten)
+* [mildom] Fix linter
+* [mildom] Rework extractors by [Lesmiscore](https://github.com/Lesmiscore)
+* [mirrativ] Cleanup extractor code by [Lesmiscore](https://github.com/Lesmiscore)
+* [nhk] Add support for NHK for School by [Lesmiscore](https://github.com/Lesmiscore)
+* [niconico:tag] Add support for searching tags
+* [nrk] Add fallback API
+* [peekvids] Use JSON-LD by [schn0sch](https://github.com/schn0sch)
+* [peertube] Add media.fsfe.org by [mxmehl](https://github.com/mxmehl)
+* [rtvs] Fix extractor by [Bricio](https://github.com/Bricio)
+* [spiegel] Fix `_VALID_URL`
+* [ThumbnailsConvertor] Support `webp`
+* [tiktok] Fix `vm.tiktok`/`vt.tiktok` URLs
+* [tubitv] Fix/improve TV series extraction by [bbepis](https://github.com/bbepis)
+* [tumblr] Fix extractor by [foghawk](https://github.com/foghawk)
+* [twitcasting] Add fallback for finding running live by [Lesmiscore](https://github.com/Lesmiscore)
+* [TwitCasting] Check for password protection by [Lesmiscore](https://github.com/Lesmiscore)
+* [twitcasting] Fix extraction by [Lesmiscore](https://github.com/Lesmiscore)
+* [twitch] Fix field name of `view_count`
+* [twitter] Fix for private videos by [iphoting](https://github.com/iphoting)
+* [washingtonpost] Fix extractor by [Bricio](https://github.com/Bricio)
+* [youtube:tab] Add `approximate_date` extractor-arg
+* [youtube:tab] Follow redirect to regional channel by [coletdjnz](https://github.com/coletdjnz)
+* [youtube:tab] Reject webpage data if redirected to home page
+* [youtube] De-prioritize potentially damaged formats
+* [youtube] Differentiate descriptive audio by language code
+* [youtube] Ensure subtitle urls are absolute by [coletdjnz](https://github.com/coletdjnz)
+* [youtube] Escape possible `$` in `_extract_n_function_name` regex by [Lesmiscore](https://github.com/Lesmiscore)
+* [youtube] Fix automatic captions
+* [youtube] Fix n-sig extraction for phone player JS by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [youtube] Further de-prioritize 3gp format
+* [youtube] Label original auto-subs
+* [youtube] Prefer UTC upload date for videos by [coletdjnz](https://github.com/coletdjnz)
+* [zaq1] Remove dead extractor by [marieell](https://github.com/marieell)
+* [zee5] Support web-series by [Aniruddh-J](https://github.com/Aniruddh-J)
+* [zingmp3] Fix extractor by [hatienl0i261299](https://github.com/hatienl0i261299)
+* [zoom] Add support for screen cast by [Mipsters](https://github.com/Mipsters)
+
+
+### 2022.02.04
+
+* [youtube:search] Fix extractor by [coletdjnz](https://github.com/coletdjnz)
+* [youtube:search] Add tests
+* [twitcasting] Enforce UTF-8 for POST payload by [Lesmiscore](https://github.com/Lesmiscore)
+* [mediaset] Fix extractor by [nixxo](https://github.com/nixxo)
+* [websocket] Make syntax error in `websockets` module non-fatal
+
+### 2022.02.03
+
+* Merge youtube-dl: Upto [commit/78ce962](https://github.com/ytdl-org/youtube-dl/commit/78ce962f4fe020994c216dd2671546fbe58a5c67)
+* Add option `--print-to-file`
+* Make nested --config-locations relative to parent file
+* Ensure `_type` is present in `info.json`
+* Fix `--compat-options list-formats`
+* Fix/improve `InAdvancePagedList`
+* [downloader/ffmpeg] Handle unknown formats better
+* [outtmpl] Handle `-o ""` better
+* [outtmpl] Handle hard-coded file extension better
+* [extractor] Add convenience function `_yes_playlist`
+* [extractor] Allow non-fatal `title` extraction
+* [extractor] Extract video inside `Article` json_ld
+* [generic] Allow further processing of json_ld URL
+* [cookies] Fix keyring selection for unsupported desktops
+* [utils] Strip double spaces in `clean_html` by [dirkf](https://github.com/dirkf)
+* [aes] Add `unpad_pkcs7`
+* [test] Fix `test_youtube_playlist_noplaylist`
+* [docs,cleanup] Misc cleanup
+* [dplay] Add extractors for site changes by [Sipherdrakon](https://github.com/Sipherdrakon)
+* [ertgr] Add extractors by [zmousm](https://github.com/zmousm), [dirkf](https://github.com/dirkf)
+* [Musicdex] Add extractors by [Ashish0804](https://github.com/Ashish0804)
+* [YandexVideoPreview] Add extractor by [KiberInfinity](https://github.com/KiberInfinity)
+* [youtube] Add extractor `YoutubeMusicSearchURLIE`
+* [archive.org] Ignore unnecessary files
+* [Bilibili] Add 8k support by [u-spec-png](https://github.com/u-spec-png)
+* [bilibili] Fix extractor, make anthology title non-fatal
+* [CAM4] Add thumbnail extraction by [alerikaisattera](https://github.com/alerikaisattera)
+* [cctv] De-prioritize sample format
+* [crunchyroll:beta] Add cookies support by [tejing1](https://github.com/tejing1)
+* [crunchyroll] Fix login by [tejing1](https://github.com/tejing1)
+* [doodstream] Fix extractor
+* [fc2] Fix extraction by [Lesmiscore](https://github.com/Lesmiscore)
+* [FFmpegConcat] Abort on --skip-download and download errors
+* [Fujitv] Extract metadata and support premium by [YuenSzeHong](https://github.com/YuenSzeHong)
+* [globo] Fix extractor by [Bricio](https://github.com/Bricio)
+* [glomex] Simplify embed detection
+* [GoogleSearch] Fix extractor
+* [Instagram] Fix extraction when logged in by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [iq.com] Add VIP support by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [mildom] Fix extractor by [lazypete365](https://github.com/lazypete365)
+* [MySpass] Fix video url processing by [trassshhub](https://github.com/trassshhub)
+* [Odnoklassniki] Improve embedded players extraction by [KiberInfinity](https://github.com/KiberInfinity)
+* [orf:tvthek] Lazy playlist extraction and obey --no-playlist
+* [Pladform] Fix redirection to external player by [KiberInfinity](https://github.com/KiberInfinity)
+* [ThisOldHouse] Improve Premium URL check by [Ashish0804](https://github.com/Ashish0804)
+* [TikTok] Iterate through app versions by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [tumblr] Fix 403 errors and handle vimeo embeds by [foghawk](https://github.com/foghawk)
+* [viki] Fix "Bad request" for manifest by [nyuszika7h](https://github.com/nyuszika7h)
+* [Vimm] add recording extractor by [alerikaisattera](https://github.com/alerikaisattera)
+* [web.archive:youtube] Add `ytarchive:` prefix and misc cleanup
+* [youtube:api] Do not use seek when reading HTTPError response by [coletdjnz](https://github.com/coletdjnz)
+* [youtube] Fix n-sig for player e06dea74
+* [youtube, cleanup] Misc fixes and cleanup
+
+
+### 2022.01.21
+
+* Add option `--concat-playlist` to **concat videos in a playlist**
+* Allow **multiple and nested configuration files**
+* Add more post-processing stages (`after_video`, `playlist`)
+* Allow `--exec` to be run at any post-processing stage (Deprecates `--exec-before-download`)
+* Allow `--print` to be run at any post-processing stage
+* Allow listing formats, thumbnails, subtitles using `--print` by [pukkandan](https://github.com/pukkandan), [Zirro](https://github.com/Zirro)
+* Add fields `video_autonumber`, `modified_date`, `modified_timestamp`, `playlist_count`, `channel_follower_count`
+* Add key `requested_downloads` in the root `info_dict`
+* Write `download_archive` only after all formats are downloaded
+* [FfmpegMetadata] Allow setting metadata of individual streams using `meta<n>_` prefix
+* Add option `--legacy-server-connect` by [xtkoba](https://github.com/xtkoba)
+* Allow escaped `,` in `--extractor-args`
+* Allow unicode characters in `info.json`
+* Check for existing thumbnail/subtitle in final directory
+* Don't treat empty containers as `None` in `sanitize_info`
+* Fix `-s --ignore-no-formats --force-write-archive`
+* Fix live title for multiple formats
+* List playlist thumbnails in `--list-thumbnails`
+* Raise error if subtitle download fails
+* [cookies] Fix bug when keyring is unspecified
+* [ffmpeg] Ignore unknown streams, standardize use of `-map 0`
+* [outtmpl] Alternate form for `D` and fix suffix's case
+* [utils] Add `Sec-Fetch-Mode` to `std_headers`
+* [utils] Fix `format_bytes` output for Bytes by [pukkandan](https://github.com/pukkandan), [mdawar](https://github.com/mdawar)
+* [utils] Handle `ss:xxx` in `parse_duration`
+* [utils] Improve parsing for nested HTML elements by [zmousm](https://github.com/zmousm), [pukkandan](https://github.com/pukkandan)
+* [utils] Use key `None` in `traverse_obj` to return as-is
+* [extractor] Detect more subtitle codecs in MPD manifests by [fstirlitz](https://github.com/fstirlitz)
+* [extractor] Extract chapters from JSON-LD by [iw0nderhow](https://github.com/iw0nderhow), [pukkandan](https://github.com/pukkandan)
+* [extractor] Extract thumbnails from JSON-LD by [nixxo](https://github.com/nixxo)
+* [extractor] Improve `url_result` and related
+* [generic] Improve KVS player extraction by [trassshhub](https://github.com/trassshhub)
+* [build] Reduce dependency on third party workflows
+* [extractor,cleanup] Use `_search_nextjs_data`, `format_field`
+* [cleanup] Minor fixes and cleanup
+* [docs] Improvements
+* [test] Fix TestVerboseOutput
+* [afreecatv] Add livestreams extractor by [wlritchi](https://github.com/wlritchi)
+* [callin] Add extractor by [foghawk](https://github.com/foghawk)
+* [CrowdBunker] Add extractors by [Ashish0804](https://github.com/Ashish0804)
+* [daftsex] Add extractors by [k3ns1n](https://github.com/k3ns1n)
+* [digitalconcerthall] Add extractor by [teridon](https://github.com/teridon)
+* [Drooble] Add extractor by [u-spec-png](https://github.com/u-spec-png)
+* [EuropeanTour] Add extractor by [Ashish0804](https://github.com/Ashish0804)
+* [iq.com] Add extractors by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [KelbyOne] Add extractor by [Ashish0804](https://github.com/Ashish0804)
+* [LnkIE] Add extractor by [Ashish0804](https://github.com/Ashish0804)
+* [MainStreaming] Add extractor by [coletdjnz](https://github.com/coletdjnz)
+* [megatvcom] Add extractors by [zmousm](https://github.com/zmousm)
+* [Newsy] Add extractor by [Ashish0804](https://github.com/Ashish0804)
+* [noodlemagazine] Add extractor by [trassshhub](https://github.com/trassshhub)
+* [PokerGo] Add extractors by [Ashish0804](https://github.com/Ashish0804)
+* [Pornez] Add extractor by [mozlima](https://github.com/mozlima)
+* [PRX] Add Extractors by [coletdjnz](https://github.com/coletdjnz)
+* [RTNews] Add extractor by [Ashish0804](https://github.com/Ashish0804)
+* [Rule34video] Add extractor by [trassshhub](https://github.com/trassshhub)
+* [tvopengr] Add extractors by [zmousm](https://github.com/zmousm)
+* [Vimm] Add extractor by [alerikaisattera](https://github.com/alerikaisattera)
+* [glomex] Add extractors by [zmousm](https://github.com/zmousm)
+* [instagram] Add story/highlight extractor by [u-spec-png](https://github.com/u-spec-png)
+* [openrec] Add movie extractor by [Lesmiscore](https://github.com/Lesmiscore)
+* [rai] Add Raiplaysound extractors by [nixxo](https://github.com/nixxo), [pukkandan](https://github.com/pukkandan)
+* [aparat] Fix extractor
+* [ard] Extract subtitles by [fstirlitz](https://github.com/fstirlitz)
+* [BiliIntl] Add login by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [CeskaTelevize] Use `http` for manifests
+* [CTVNewsIE] Add fallback for video search by [Ashish0804](https://github.com/Ashish0804)
+* [dplay] Migrate DiscoveryPlusItaly to DiscoveryPlus by [timendum](https://github.com/timendum)
+* [dplay] Re-structure DiscoveryPlus extractors
+* [Dropbox] Support password protected files and more formats by [zenerdi0de](https://github.com/zenerdi0de)
+* [facebook] Fix extraction from groups
+* [facebook] Improve title and uploader extraction
+* [facebook] Parse dash manifests
+* [fox] Extract m3u8 from preview by [ischmidt20](https://github.com/ischmidt20)
+* [funk] Support origin URLs
+* [gfycat] Fix `uploader`
+* [gfycat] Support embeds by [coletdjnz](https://github.com/coletdjnz)
+* [hotstar] Add extractor args to ignore tags by [Ashish0804](https://github.com/Ashish0804)
+* [hrfernsehen] Fix ardloader extraction by [CreaValix](https://github.com/CreaValix)
+* [instagram] Fix username extraction for stories and highlights by [nyuszika7h](https://github.com/nyuszika7h)
+* [kakao] Detect geo-restriction
+* [line] Remove `tv.line.me` by [sian1468](https://github.com/sian1468)
+* [mixch] Add `MixchArchiveIE` by [Lesmiscore](https://github.com/Lesmiscore)
+* [mixcloud] Detect restrictions by [llacb47](https://github.com/llacb47)
+* [NBCSports] Fix extraction of platform URLs by [ischmidt20](https://github.com/ischmidt20)
+* [Nexx] Extract more metadata by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [Nexx] Support 3q CDN by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [pbs] de-prioritize AD formats
+* [PornHub,YouTube] Refresh onion addresses by [unit193](https://github.com/unit193)
+* [RedBullTV] Parse subtitles from manifest by [Ashish0804](https://github.com/Ashish0804)
+* [streamcz] Fix extractor by [arkamar](https://github.com/arkamar), [pukkandan](https://github.com/pukkandan)
+* [Ted] Rewrite extractor by [pukkandan](https://github.com/pukkandan), [trassshhub](https://github.com/trassshhub)
+* [Theta] Fix valid URL by [alerikaisattera](https://github.com/alerikaisattera)
+* [ThisOldHouseIE] Add support for premium videos by [Ashish0804](https://github.com/Ashish0804)
+* [TikTok] Fix extraction for sigi-based webpages, add API fallback by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [TikTok] Pass cookies to formats, and misc fixes by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [TikTok] Extract captions, user thumbnail by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [TikTok] Change app version by [MinePlayersPE](https://github.com/MinePlayersPE), [llacb47](https://github.com/llacb47)
+* [TVer] Extract message for unaired live by [Lesmiscore](https://github.com/Lesmiscore)
+* [twitcasting] Refactor extractor by [Lesmiscore](https://github.com/Lesmiscore)
+* [twitter] Fix video in quoted tweets
+* [veoh] Improve extractor by [foghawk](https://github.com/foghawk)
+* [vk] Capture `clip` URLs
+* [vk] Fix VKUserVideosIE by [Ashish0804](https://github.com/Ashish0804)
+* [vk] Improve `_VALID_URL` by [k3ns1n](https://github.com/k3ns1n)
+* [VrtNU] Handle empty title by [pgaig](https://github.com/pgaig)
+* [XVideos] Check HLS formats by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [yahoo:gyao] Improved playlist handling by [hyano](https://github.com/hyano)
+* [youtube:tab] Extract more playlist metadata by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
+* [youtube:tab] Raise error on tab redirect by [krichbanana](https://github.com/krichbanana), [coletdjnz](https://github.com/coletdjnz)
+* [youtube] Update Innertube clients by [coletdjnz](https://github.com/coletdjnz)
+* [youtube] Detect live-stream embeds
+* [youtube] Do not return `upload_date` for playlists
+* [youtube] Extract channel subscriber count by [coletdjnz](https://github.com/coletdjnz)
+* [youtube] Make invalid storyboard URL non-fatal
+* [youtube] Enforce UTC, update innertube clients and tests by [coletdjnz](https://github.com/coletdjnz)
+* [zdf] Add chapter extraction by [iw0nderhow](https://github.com/iw0nderhow)
+* [zee5] Add geo-bypass
+
+
+### 2021.12.27
+
+* Avoid recursion error when re-extracting info
+* [ffmpeg] Fix position of `--ppa`
+* [aria2c] Don't show progress when `--no-progress`
+* [cookies] Support other keyrings by [mbway](https://github.com/mbway)
+* [EmbedThumbnail] Prefer AtomicParsley over ffmpeg if available
+* [generic] Fix HTTP KVS Player by [git-anony-mouse](https://github.com/git-anony-mouse)
+* [ThumbnailsConvertor] Fix for when there are no thumbnails
+* [docs] Add examples for using `TYPES:` in `-P`/`-o`
+* [PixivSketch] Add extractors by [nao20010128nao](https://github.com/nao20010128nao)
+* [tiktok] Add music, sticker and tag IEs by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [BiliIntl] Fix extractor by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [CBC] Fix URL regex
+* [tiktok] Fix `extractor_key` used in archive
+* [youtube] **End `live-from-start` properly when stream ends with 403**
+* [Zee5] Fix VALID_URL for tv-shows by [Ashish0804](https://github.com/Ashish0804)
+
+### 2021.12.25
+
+* [dash,youtube] **Download live from start to end** by [nao20010128nao](https://github.com/nao20010128nao), [pukkandan](https://github.com/pukkandan)
+ * Add option `--live-from-start` to enable downloading live videos from start
+ * Add key `is_from_start` in formats to identify formats (of live videos) that downloads from start
+ * [dash] Create protocol `http_dash_segments_generator` that allows a function to be passed instead of fragments
+ * [fragment] Allow multiple live dash formats to download simultaneously
+ * [youtube] Implement fragment re-fetching for the live dash formats
+ * [youtube] Re-extract dash manifest every 5 hours (manifest expires in 6hrs)
+ * [postprocessor/ffmpeg] Add `FFmpegFixupDuplicateMoovPP` to fixup duplicated moov atoms
+ * Known issues:
+ * Ctrl+C doesn't work on Windows when downloading multiple formats
+ * If video becomes private, download hangs
+* [SponsorBlock] Add `Filler` and `Highlight` categories by [nihil-admirari](https://github.com/nihil-admirari), [pukkandan](https://github.com/pukkandan)
+ * Change `--sponsorblock-cut all` to `--sponsorblock-cut default` if you do not want filler sections to be removed
+* Add field `webpage_url_domain`
+* Add interactive format selection with `-f -`
+* Add option `--file-access-retries` by [ehoogeveen-medweb](https://github.com/ehoogeveen-medweb)
+* [outtmpl] Add alternate forms `S`, `D` and improve `id` detection
+* [outtmpl] Add operator `&` for replacement text by [PilzAdam](https://github.com/PilzAdam)
+* [EmbedSubtitle] Disable duration check temporarily
+* [extractor] Add `_search_nuxt_data` by [nao20010128nao](https://github.com/nao20010128nao)
+* [extractor] Ignore errors in comment extraction when `-i` is given
+* [extractor] Standardize `_live_title`
+* [FormatSort] Prevent incorrect deprecation warning
+* [generic] Extract m3u8 formats from JSON-LD
+* [postprocessor/ffmpeg] Always add `faststart`
+* [utils] Fix parsing `YYYYMMDD` dates in Nov/Dec by [wlritchi](https://github.com/wlritchi)
+* [utils] Improve `parse_count`
+* [utils] Update `std_headers` by [kikuyan](https://github.com/kikuyan), [fstirlitz](https://github.com/fstirlitz)
+* [lazy_extractors] Fix for search IEs
+* [extractor] Support default implicit graph in JSON-LD by [zmousm](https://github.com/zmousm)
+* Allow `--no-write-thumbnail` to override `--write-all-thumbnail`
+* Fix `--throttled-rate`
+* Fix control characters being printed to `--console-title`
+* Fix PostProcessor hooks not registered for some PPs
+* Pre-process when using `--flat-playlist`
+* Remove known invalid thumbnails from `info_dict`
+* Add warning when using `-f best`
+* Use `parse_duration` for `--wait-for-video` and some minor fix
+* [test/download] Add more fields
+* [test/download] Ignore field `webpage_url_domain` by [std-move](https://github.com/std-move)
+* [compat] Suppress errors in enabling VT mode
+* [docs] Improve manpage format by [iw0nderhow](https://github.com/iw0nderhow), [pukkandan](https://github.com/pukkandan)
+* [docs,cleanup] Minor fixes and cleanup
+* [cleanup] Fix some typos by [unit193](https://github.com/unit193)
+* [ABC:iview] Add show extractor by [pabs3](https://github.com/pabs3)
+* [dropout] Add extractor by [TwoThousandHedgehogs](https://github.com/TwoThousandHedgehogs), [pukkandan](https://github.com/pukkandan)
+* [GameJolt] Add extractors by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [gofile] Add extractor by [Jertzukka](https://github.com/Jertzukka), [Ashish0804](https://github.com/Ashish0804)
+* [hse] Add extractors by [cypheron](https://github.com/cypheron), [pukkandan](https://github.com/pukkandan)
+* [NateTV] Add NateIE and NateProgramIE by [Ashish0804](https://github.com/Ashish0804), [Hyeeji](https://github.com/Hyeeji)
+* [OpenCast] Add extractors by [bwildenhain](https://github.com/bwildenhain), [C0D3D3V](https://github.com/C0D3D3V)
+* [rtve] Add `RTVEAudioIE` by [kebianizao](https://github.com/kebianizao)
+* [Rutube] Add RutubeChannelIE by [Ashish0804](https://github.com/Ashish0804)
+* [skeb] Add extractor by [nao20010128nao](https://github.com/nao20010128nao)
+* [soundcloud] Add related tracks extractor by [Lapin0t](https://github.com/Lapin0t)
+* [toggo] Add extractor by [nyuszika7h](https://github.com/nyuszika7h)
+* [TrueID] Add extractor by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [audiomack] Update album and song VALID_URL by [abdullah-if](https://github.com/abdullah-if), [dirkf](https://github.com/dirkf)
+* [CBC Gem] Extract 1080p formats by [DavidSkrundz](https://github.com/DavidSkrundz)
+* [ceskatelevize] Fetch iframe from nextJS data by [mkubecek](https://github.com/mkubecek)
+* [crackle] Look for non-DRM formats by [raleeper](https://github.com/raleeper)
+* [dplay] Temporary fix for `discoveryplus.com/it`
+* [DiscoveryPlusShowBaseIE] yield actual video id by [Ashish0804](https://github.com/Ashish0804)
+* [Facebook] Handle redirect URLs
+* [fujitv] Extract 1080p from `tv_android` m3u8 by [YuenSzeHong](https://github.com/YuenSzeHong)
+* [gronkh] Support new URL pattern by [Sematre](https://github.com/Sematre)
+* [instagram] Expand valid URL by [u-spec-png](https://github.com/u-spec-png)
+* [Instagram] Try bypassing login wall with embed page by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [Jamendo] Fix use of `_VALID_URL_RE` by [jaller94](https://github.com/jaller94)
+* [LBRY] Support livestreams by [Ashish0804](https://github.com/Ashish0804), [pukkandan](https://github.com/pukkandan)
+* [NJPWWorld] Extract formats from m3u8 by [aarubui](https://github.com/aarubui)
+* [NovaEmbed] update player regex by [std-move](https://github.com/std-move)
+* [npr] Make SMIL extraction non-fatal by [r5d](https://github.com/r5d)
+* [ntvcojp] Extract NUXT data by [nao20010128nao](https://github.com/nao20010128nao)
+* [ok.ru] add mobile fallback by [nao20010128nao](https://github.com/nao20010128nao)
+* [olympics] Add uploader and cleanup by [u-spec-png](https://github.com/u-spec-png)
+* [ondemandkorea] Update `jw_config` regex by [julien-hadleyjack](https://github.com/julien-hadleyjack)
+* [PlutoTV] Expand `_VALID_URL`
+* [RaiNews] Fix extractor by [nixxo](https://github.com/nixxo)
+* [RCTIPlusSeries] Lazy extraction and video type selection by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [redtube] Handle formats delivered inside a JSON by [dirkf](https://github.com/dirkf), [nixxo](https://github.com/nixxo)
+* [SonyLiv] Add OTP login support by [Ashish0804](https://github.com/Ashish0804)
+* [Steam] Fix extractor by [u-spec-png](https://github.com/u-spec-png)
+* [TikTok] Pass cookies to mobile API by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [trovo] Fix inheritance of `TrovoChannelBaseIE`
+* [TVer] Extract better thumbnails by [YuenSzeHong](https://github.com/YuenSzeHong)
+* [vimeo] Extract chapters
+* [web.archive:youtube] Improve metadata extraction by [coletdjnz](https://github.com/coletdjnz)
+* [youtube:comments] Add more options for limiting number of comments extracted by [coletdjnz](https://github.com/coletdjnz)
+* [youtube:tab] Extract more metadata from feeds/channels/playlists by [coletdjnz](https://github.com/coletdjnz)
+* [youtube:tab] Extract video thumbnails from playlist by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
+* [youtube:tab] Ignore query when redirecting channel to playlist and cleanup of related code
+* [youtube] Fix `ytsearchdate`
+* [zdf] Support videos with different ptmd location by [iw0nderhow](https://github.com/iw0nderhow)
+* [zee5] Support /episodes in URL
+
+
+### 2021.12.01
+
+* **Add option `--wait-for-video` to wait for scheduled streams**
+* Add option `--break-per-input` to apply --break-on... to each input URL
+* Add option `--embed-info-json` to embed info.json in mkv
+* Add compat-option `embed-metadata`
+* Allow using a custom format selector through API
+* [AES] Add ECB mode by [nao20010128nao](https://github.com/nao20010128nao)
+* [build] Fix MacOS Build
+* [build] Save Git HEAD at release alongside version info
+* [build] Use `workflow_dispatch` for release
+* [downloader/ffmpeg] Fix for direct videos inside mpd manifests
+* [downloader] Add colors to download progress
+* [EmbedSubtitles] Slightly relax duration check and related cleanup
+* [ExtractAudio] Fix conversion to `wav` and `vorbis`
+* [ExtractAudio] Support `alac`
+* [extractor] Extract `average_rating` from JSON-LD
+* [FixupM3u8] Fixup MPEG-TS in MP4 container
+* [generic] Support mpd manifests without extension by [shirt](https://github.com/shirt-dev)
+* [hls] Better FairPlay DRM detection by [nyuszika7h](https://github.com/nyuszika7h)
+* [jsinterp] Fix splice to handle float (for youtube js player f1ca6900)
+* [utils] Allow alignment in `render_table` and add tests
+* [utils] Fix `PagedList`
+* [utils] Fix error when copying `LazyList`
+* Clarify video/audio-only formats in -F
+* Ensure directory exists when checking formats
+* Ensure path for link files exists by [Zirro](https://github.com/Zirro)
+* Ensure same config file is not loaded multiple times
+* Fix `postprocessor_hooks`
+* Fix `--break-on-archive` when pre-checking
+* Fix `--check-formats` for `mhtml`
+* Fix `--load-info-json` of playlists with failed entries
+* Fix `--trim-filename` when filename has `.`
+* Fix bug in parsing `--add-header`
+* Fix error in `report_unplayable_conflict` by [shirt](https://github.com/shirt-dev)
+* Fix writing playlist infojson with `--no-clean-infojson`
+* Validate --get-bypass-country
+* [blogger] Add extractor by [pabs3](https://github.com/pabs3)
+* [breitbart] Add extractor by [Grabien](https://github.com/Grabien)
+* [CableAV] Add extractor by [j54vc1bk](https://github.com/j54vc1bk)
+* [CanalAlpha] Add extractor by [Ashish0804](https://github.com/Ashish0804)
+* [CozyTV] Add extractor by [Ashish0804](https://github.com/Ashish0804)
+* [CPTwentyFour] Add extractor by [Ashish0804](https://github.com/Ashish0804)
+* [DiscoveryPlus] Add `DiscoveryPlusItalyShowIE` by [Ashish0804](https://github.com/Ashish0804)
+* [ESPNCricInfo] Add extractor by [Ashish0804](https://github.com/Ashish0804)
+* [LinkedIn] Add extractor by [u-spec-png](https://github.com/u-spec-png)
+* [mixch] Add extractor by [nao20010128nao](https://github.com/nao20010128nao)
+* [nebula] Add `NebulaCollectionIE` and rewrite extractor by [hheimbuerger](https://github.com/hheimbuerger)
+* [OneFootball] Add extractor by [Ashish0804](https://github.com/Ashish0804)
+* [peer.tv] Add extractor by [u-spec-png](https://github.com/u-spec-png)
+* [radiozet] Add extractor by [0xA7404A](https://github.com/0xA7404A) (Aurora)
+* [redgifs] Add extractor by [chio0hai](https://github.com/chio0hai)
+* [RedGifs] Add Search and User extractors by [Deer-Spangle](https://github.com/Deer-Spangle)
+* [rtrfm] Add extractor by [pabs3](https://github.com/pabs3)
+* [Streamff] Add extractor by [cntrl-s](https://github.com/cntrl-s)
+* [Stripchat] Add extractor by [zulaport](https://github.com/zulaport)
+* [Aljazeera] Fix extractor by [u-spec-png](https://github.com/u-spec-png)
+* [AmazonStoreIE] Fix regex to not match vdp urls by [Ashish0804](https://github.com/Ashish0804)
+* [ARDBetaMediathek] Handle new URLs
+* [bbc] Get all available formats by [nyuszika7h](https://github.com/nyuszika7h)
+* [Bilibili] Fix title extraction by [u-spec-png](https://github.com/u-spec-png)
+* [CBC Gem] Fix for shows that don't have all seasons by [makeworld-the-better-one](https://github.com/makeworld-the-better-one)
+* [curiositystream] Add more metadata
+* [CuriosityStream] Fix series
+* [DiscoveryPlus] Rewrite extractors by [Ashish0804](https://github.com/Ashish0804), [pukkandan](https://github.com/pukkandan)
+* [HotStar] Set language field from tags by [Ashish0804](https://github.com/Ashish0804)
+* [instagram, cleanup] Refactor extractors
+* [Instagram] Display more login errors by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [itv] Fix extractor by [staubichsauger](https://github.com/staubichsauger), [pukkandan](https://github.com/pukkandan)
+* [mediaklikk] Expand valid URL
+* [MTV] Improve mgid extraction by [Sipherdrakon](https://github.com/Sipherdrakon), [kikuyan](https://github.com/kikuyan)
+* [nexx] Better error message for unsupported format
+* [NovaEmbed] Fix extractor by [pukkandan](https://github.com/pukkandan), [std-move](https://github.com/std-move)
+* [PatreonUser] Do not capture RSS URLs
+* [Reddit] Add support for 1080p videos by [xenova](https://github.com/xenova)
+* [RoosterTeethSeries] Fix for multiple pages by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [sbs] Fix for movies and livestreams
+* [Senate.gov] Add SenateGovIE and fix SenateISVPIE by [Grabien](https://github.com/Grabien), [pukkandan](https://github.com/pukkandan)
+* [soundcloud:search] Fix pagination
+* [tiktok:user] Set `webpage_url` correctly
+* [Tokentube] Fix description by [u-spec-png](https://github.com/u-spec-png)
+* [trovo] Fix extractor by [nyuszika7h](https://github.com/nyuszika7h)
+* [tv2] Expand valid URL
+* [Tvplayhome] Fix extractor by [pukkandan](https://github.com/pukkandan), [18928172992817182](https://github.com/18928172992817182)
+* [Twitch:vod] Add chapters by [mpeter50](https://github.com/mpeter50)
+* [twitch:vod] Extract live status by [DEvmIb](https://github.com/DEvmIb)
+* [VidLii] Add 720p support by [mrpapersonic](https://github.com/mrpapersonic)
+* [vimeo] Add fallback for config URL
+* [vimeo] Sort http formats higher
+* [WDR] Expand valid URL
+* [willow] Add extractor by [aarubui](https://github.com/aarubui)
+* [xvideos] Detect embed URLs by [4a1e2y5](https://github.com/4a1e2y5)
+* [xvideos] Fix extractor by [Yakabuff](https://github.com/Yakabuff)
+* [youtube, cleanup] Reorganize Tab and Search extractor inheritances
+* [youtube:search_url] Add playlist/channel support
+* [youtube] Add `default` player client by [coletdjnz](https://github.com/coletdjnz)
+* [youtube] Add storyboard formats
+* [youtube] Decrypt n-sig for URLs with `ratebypass`
+* [youtube] Minor improvement to format sorting
+* [cleanup] Add deprecation warnings
+* [cleanup] Refactor `JSInterpreter._seperate`
+* [Cleanup] Remove some unnecessary groups in regexes by [Ashish0804](https://github.com/Ashish0804)
+* [cleanup] Misc cleanup
+
+
+### 2021.11.10.1
+
+* Temporarily disable MacOS Build
+
+### 2021.11.10
+
+* [youtube] **Fix throttling by decrypting n-sig**
+* Merging extractors from [haruhi-dl](https://git.sakamoto.pl/laudom/haruhi-dl) by [selfisekai](https://github.com/selfisekai)
+ * [extractor] Add `_search_nextjs_data`
+ * [tvp] Fix extractors
+ * [tvp] Add TVPStreamIE
+ * [wppilot] Add extractors
+ * [polskieradio] Add extractors
+ * [radiokapital] Add extractors
+ * [polsatgo] Add extractor by [selfisekai](https://github.com/selfisekai), [sdomi](https://github.com/sdomi)
+* Separate `--check-all-formats` from `--check-formats`
+* Approximate filesize from bitrate
+* Don't create console in `windows_enable_vt_mode`
+* Fix bug in `--load-infojson` of playlists
+* [minicurses] Add colors to `-F` and standardize color-printing code
+* [outtmpl] Add type `link` for internet shortcut files
+* [outtmpl] Add alternate forms for `q` and `j`
+* [outtmpl] Do not traverse `None`
+* [fragment] Fix progress display in fragmented downloads
+* [downloader/ffmpeg] Fix vtt download with ffmpeg
+* [ffmpeg] Detect presence of setts and libavformat version
+* [ExtractAudio] Rescale `--audio-quality` correctly by [CrypticSignal](https://github.com/CrypticSignal), [pukkandan](https://github.com/pukkandan)
+* [ExtractAudio] Use `libfdk_aac` if available by [CrypticSignal](https://github.com/CrypticSignal)
+* [FormatSort] `eac3` is better than `ac3`
+* [FormatSort] Fix some fields' defaults
+* [generic] Detect more json_ld
+* [generic] parse jwplayer with only the json URL
+* [extractor] Add keyword automatically to SearchIE descriptions
+* [extractor] Fix some errors being converted to `ExtractorError`
+* [utils] Add `join_nonempty`
+* [utils] Add `jwt_decode_hs256` by [Ashish0804](https://github.com/Ashish0804)
+* [utils] Create `DownloadCancelled` exception
+* [utils] Parse `vp09` as vp9
+* [utils] Sanitize URL when determining protocol
+* [test/download] Fallback test to `bv`
+* [docs] Minor documentation improvements
+* [cleanup] Improvements to error and debug messages
+* [cleanup] Minor fixes and cleanup
+* [3speak] Add extractors by [Ashish0804](https://github.com/Ashish0804)
+* [AmazonStore] Add extractor by [Ashish0804](https://github.com/Ashish0804)
+* [Gab] Add extractor by [u-spec-png](https://github.com/u-spec-png)
+* [mediaset] Add playlist support by [nixxo](https://github.com/nixxo)
+* [MLSScoccer] Add extractor by [Ashish0804](https://github.com/Ashish0804)
+* [N1] Add support for nova.rs by [u-spec-png](https://github.com/u-spec-png)
+* [PlanetMarathi] Add extractor by [Ashish0804](https://github.com/Ashish0804)
+* [RaiplayRadio] Add extractors by [frafra](https://github.com/frafra)
+* [roosterteeth] Add series extractor
+* [sky] Add `SkyNewsStoryIE` by [ajj8](https://github.com/ajj8)
+* [youtube] Fix sorting for some videos
+* [youtube] Populate `thumbnail` with the best "known" thumbnail
+* [youtube] Refactor itag processing
+* [youtube] Remove unnecessary no-playlist warning
+* [youtube:tab] Add Invidious list for playlists/channels by [rhendric](https://github.com/rhendric)
+* [Bilibili:comments] Fix infinite loop by [u-spec-png](https://github.com/u-spec-png)
+* [ceskatelevize] Fix extractor by [flashdagger](https://github.com/flashdagger)
+* [Coub] Fix media format identification by [wlritchi](https://github.com/wlritchi)
+* [crunchyroll] Add extractor-args `language` and `hardsub`
+* [DiscoveryPlus] Allow language codes in URL
+* [imdb] Fix thumbnail by [ozburo](https://github.com/ozburo)
+* [instagram] Add IOS URL support by [u-spec-png](https://github.com/u-spec-png)
+* [instagram] Improve login code by [u-spec-png](https://github.com/u-spec-png)
+* [Instagram] Improve metadata extraction by [u-spec-png](https://github.com/u-spec-png)
+* [iPrima] Fix extractor by [stanoarn](https://github.com/stanoarn)
+* [itv] Add support for ITV News by [ajj8](https://github.com/ajj8)
+* [la7] Fix extractor by [nixxo](https://github.com/nixxo)
+* [linkedin] Don't login multiple times
+* [mtv] Fix some videos by [Sipherdrakon](https://github.com/Sipherdrakon)
+* [Newgrounds] Fix description by [u-spec-png](https://github.com/u-spec-png)
+* [Nrk] Minor fixes by [fractalf](https://github.com/fractalf)
+* [Olympics] Fix extractor by [u-spec-png](https://github.com/u-spec-png)
+* [piksel] Fix sorting
+* [twitter] Do not sort by codec
+* [viewlift] Add cookie-based login and series support by [Ashish0804](https://github.com/Ashish0804), [pukkandan](https://github.com/pukkandan)
+* [vimeo] Detect source extension and misc cleanup by [flashdagger](https://github.com/flashdagger)
+* [vimeo] Fix ondemand videos and direct URLs with hash
+* [vk] Fix login and add subtitles by [kaz-us](https://github.com/kaz-us)
+* [VLive] Add upload_date and thumbnail by [Ashish0804](https://github.com/Ashish0804)
+* [VRT] Fix login by [pgaig](https://github.com/pgaig)
+* [Vupload] Fix extractor by [u-spec-png](https://github.com/u-spec-png)
+* [wakanim] Add support for MPD manifests by [nyuszika7h](https://github.com/nyuszika7h)
+* [wakanim] Detect geo-restriction by [nyuszika7h](https://github.com/nyuszika7h)
+* [ZenYandex] Fix extractor by [u-spec-png](https://github.com/u-spec-png)
+
+
+### 2021.10.22
+
+* [build] Improvements
+ * Build standalone MacOS packages by [smplayer-dev](https://github.com/smplayer-dev)
+ * Release windows exe built with `py2exe`
+ * Enable lazy-extractors in releases
+ * Set env var `YTDLP_NO_LAZY_EXTRACTORS` to forcefully disable this (experimental)
+ * Clean up error reporting in update
+ * Refactor `pyinst.py`, misc cleanup and improve docs
+* [docs] Migrate issues to use forms by [Ashish0804](https://github.com/Ashish0804)
+* [downloader] **Fix slow progress hooks**
+ * This was causing HLS/DASH downloads to be extremely slow in some situations
+* [downloader/ffmpeg] Improve simultaneous download and merge
+* [EmbedMetadata] Allow overwriting all default metadata with `meta_default` key
+* [ModifyChapters] Add ability for `--remove-chapters` to remove sections by timestamp
+* [utils] Allow duration strings in `--match-filter`
+* Add HDR information to formats
+* Add negative option `--no-batch-file` by [Zirro](https://github.com/Zirro)
+* Calculate more fields for merged formats
+* Do not verify thumbnail URLs unless `--check-formats` is specified
+* Don't create console for subprocesses on Windows
+* Fix `--restrict-filename` when used with default template
+* Fix `check_formats` output being written to stdout when `-qv`
+* Fix bug in storyboards
+* Fix conflict b/w id and ext in format selection
+* Fix verbose head not showing custom configs
+* Load archive only after printing verbose head
+* Make `duration_string` and `resolution` available in --match-filter
+* Re-implement deprecated option `--id`
+* Reduce default `--socket-timeout`
+* Write verbose header to logger
+* [outtmpl] Fix bug in expanding environment variables
+* [cookies] Local State should be opened as utf-8
+* [extractor,utils] Detect more codecs/mimetypes
+* [extractor] Detect `EXT-X-KEY` Apple FairPlay
+* [utils] Use `importlib` to load plugins by [sulyi](https://github.com/sulyi)
+* [http] Retry on socket timeout and show the last encountered error
+* [fragment] Print error message when skipping fragment
+* [aria2c] Fix `--skip-unavailable-fragment`
+* [SponsorBlock] Obey `extractor-retries` and `sleep-requests`
+* [Merger] Do not add `aac_adtstoasc` to non-hls audio
+* [ModifyChapters] Do not mutate original chapters by [nihil-admirari](https://github.com/nihil-admirari)
+* [devscripts/run_tests] Use markers to filter tests by [sulyi](https://github.com/sulyi)
+* [7plus] Add cookie based authentication by [nyuszika7h](https://github.com/nyuszika7h)
+* [AdobePass] Fix RCN MSO by [jfogelman](https://github.com/jfogelman)
+* [CBC] Fix Gem livestream by [makeworld-the-better-one](https://github.com/makeworld-the-better-one)
+* [CBC] Support CBC Gem member content by [makeworld-the-better-one](https://github.com/makeworld-the-better-one)
+* [crunchyroll] Add season to flat-playlist
+* [crunchyroll] Add support for `beta.crunchyroll` URLs and fix series URLs with language code
+* [EUScreen] Add Extractor by [Ashish0804](https://github.com/Ashish0804)
+* [Gronkh] Add extractor by [Ashish0804](https://github.com/Ashish0804)
+* [hidive] Fix typo
+* [Hotstar] Mention Dynamic Range in `format_id` by [Ashish0804](https://github.com/Ashish0804)
+* [Hotstar] Raise appropriate error for DRM
+* [instagram] Add login by [u-spec-png](https://github.com/u-spec-png)
+* [instagram] Show appropriate error when login is needed
+* [microsoftstream] Add extractor by [damianoamatruda](https://github.com/damianoamatruda), [nixklai](https://github.com/nixklai)
+* [on24] Add extractor by [damianoamatruda](https://github.com/damianoamatruda)
+* [patreon] Fix vimeo player regex by [zenerdi0de](https://github.com/zenerdi0de)
+* [SkyNewsAU] Add extractor by [Ashish0804](https://github.com/Ashish0804)
+* [tagesschau] Fix extractor by [u-spec-png](https://github.com/u-spec-png)
+* [tbs] Add tbs live streams by [llacb47](https://github.com/llacb47)
+* [tiktok] Fix typo and update tests
+* [trovo] Support channel clips and VODs by [Ashish0804](https://github.com/Ashish0804)
+* [Viafree] Add support for Finland by [18928172992817182](https://github.com/18928172992817182)
+* [vimeo] Fix embedded `player.vimeo`
+* [vlive:channel] Fix extraction by [kikuyan](https://github.com/kikuyan), [pukkandan](https://github.com/pukkandan)
+* [youtube] Add auto-translated subtitles
+* [youtube] Expose different formats with same itag
+* [youtube:comments] Fix for new layout by [coletdjnz](https://github.com/coletdjnz)
+* [cleanup] Cleanup bilibili code by [pukkandan](https://github.com/pukkandan), [u-spec-png](https://github.com/u-spec-png)
+* [cleanup] Remove broken youtube login code
+* [cleanup] Standardize timestamp formatting code
+* [cleanup] Generalize `getcomments` implementation for extractors
+* [cleanup] Simplify search extractors code
+* [cleanup] misc
+
+
+### 2021.10.10
+
+* [downloader/ffmpeg] Fix bug in initializing `FFmpegPostProcessor`
+* [minicurses] Fix when printing to file
+* [downloader] Fix throttledratelimit
+* [francetv] Fix extractor by [fstirlitz](https://github.com/fstirlitz), [sarnoud](https://github.com/sarnoud)
+* [NovaPlay] Add extractor by [Bojidarist](https://github.com/Bojidarist)
+* [ffmpeg] Revert "Set max probesize" - No longer needed
+* [docs] Remove incorrect dependency on VC++10
+* [build] Allow to release without changelog
+
+### 2021.10.09
+
+* Improved progress reporting
+ * Separate `--console-title` and `--no-progress`
+ * Add option `--progress` to show progress-bar even in quiet mode
+ * Fix and refactor `minicurses` and use it for all progress reporting
+ * Standardize use of terminal sequences and enable color support for windows 10
+ * Add option `--progress-template` to customize progress-bar and console-title
+ * Add postprocessor hooks and progress reporting
+* [postprocessor] Add plugin support with option `--use-postprocessor`
+* [extractor] Extract storyboards from SMIL manifests by [fstirlitz](https://github.com/fstirlitz)
+* [outtmpl] Alternate form of format type `l` for `\n` delimited list
+* [outtmpl] Format type `U` for unicode normalization
+* [outtmpl] Allow empty output template to skip a type of file
+* Merge webm formats into mkv if thumbnails are to be embedded
+* [adobepass] Add RCN as MSO by [jfogelman](https://github.com/jfogelman)
+* [ciscowebex] Add extractor by [damianoamatruda](https://github.com/damianoamatruda)
+* [Gettr] Add extractor by [i6t](https://github.com/i6t)
+* [GoPro] Add extractor by [i6t](https://github.com/i6t)
+* [N1] Add extractor by [u-spec-png](https://github.com/u-spec-png)
+* [Theta] Add video extractor by [alerikaisattera](https://github.com/alerikaisattera)
+* [Veo] Add extractor by [i6t](https://github.com/i6t)
+* [Vupload] Add extractor by [u-spec-png](https://github.com/u-spec-png)
+* [bbc] Extract better quality videos by [ajj8](https://github.com/ajj8)
+* [Bilibili] Add subtitle converter by [u-spec-png](https://github.com/u-spec-png)
+* [CBC] Cleanup tests by [makeworld-the-better-one](https://github.com/makeworld-the-better-one)
+* [Douyin] Rewrite extractor by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [Funimation] Fix for /v/ urls by [pukkandan](https://github.com/pukkandan), [Jules-A](https://github.com/Jules-A)
+* [Funimation] Sort formats according to the relevant extractor-args
+* [Hidive] Fix duplicate and incorrect formats
+* [HotStarSeries] Fix cookies by [Ashish0804](https://github.com/Ashish0804)
+* [LinkedInLearning] Add subtitles by [Ashish0804](https://github.com/Ashish0804)
+* [Mediaite] Relax valid url by [coletdjnz](https://github.com/coletdjnz)
+* [Newgrounds] Add age_limit and fix duration by [u-spec-png](https://github.com/u-spec-png)
+* [Newgrounds] Fix view count on songs by [u-spec-png](https://github.com/u-spec-png)
+* [parliamentlive.tv] Fix extractor by [u-spec-png](https://github.com/u-spec-png)
+* [PolskieRadio] Fix extractors by [jakubadamw](https://github.com/jakubadamw), [u-spec-png](https://github.com/u-spec-png)
+* [reddit] Add embedded url by [u-spec-png](https://github.com/u-spec-png)
+* [reddit] Fix 429 by generating a random `reddit_session` by [AjaxGb](https://github.com/AjaxGb)
+* [Rumble] Add RumbleChannelIE by [Ashish0804](https://github.com/Ashish0804)
+* [soundcloud:playlist] Detect last page correctly
+* [SovietsCloset] Add duration from m3u8 by [ChillingPepper](https://github.com/ChillingPepper)
+* [Streamable] Add codecs by [u-spec-png](https://github.com/u-spec-png)
+* [vidme] Remove extractor by [alerikaisattera](https://github.com/alerikaisattera)
+* [youtube:tab] Fallback to API when webpage fails to download by [coletdjnz](https://github.com/coletdjnz)
+* [youtube] Fix non-fatal errors in fetching player
+* Fix `--flat-playlist` when neither IE nor id is known
+* Fix `-f mp4` behaving differently from youtube-dl
+* Workaround for bug in `ssl.SSLContext.load_default_certs`
+* [aes] Improve performance slightly by [sulyi](https://github.com/sulyi)
+* [cookies] Fix keyring fallback by [mbway](https://github.com/mbway)
+* [embedsubtitle] Fix error when duration is unknown
+* [ffmpeg] Fix error when subtitle file is missing
+* [ffmpeg] Set max probesize to workaround AAC HLS stream issues by [shirt](https://github.com/shirt-dev)
+* [FixupM3u8] Remove redundant run if merged is needed
+* [hls] Fix decryption issues by [shirt](https://github.com/shirt-dev), [pukkandan](https://github.com/pukkandan)
+* [http] Respect user-provided chunk size over extractor's
+* [utils] Let traverse_obj accept functions as keys
+* [docs] Add note about our custom ffmpeg builds
+* [docs] Write embedding and contributing documentation by [pukkandan](https://github.com/pukkandan), [timethrow](https://github.com/timethrow)
+* [update] Check for new version even if not updateable
+* [build] Add more files to the tarball
+* [build] Allow building with py2exe (and misc fixes)
+* [build] Use pycryptodomex by [shirt](https://github.com/shirt-dev), [pukkandan](https://github.com/pukkandan)
+* [cleanup] Some minor refactoring, improve docs and misc cleanup
+
+
+### 2021.09.25
+
+* Add new option `--netrc-location`
+* [outtmpl] Allow alternate fields using `,`
+* [outtmpl] Add format type `B` to treat the value as bytes, e.g. to limit the filename to a certain number of bytes
+* Separate the options `--ignore-errors` and `--no-abort-on-error`
+* Basic framework for simultaneous download of multiple formats by [nao20010128nao](https://github.com/nao20010128nao)
+* [17live] Add 17.live extractor by [nao20010128nao](https://github.com/nao20010128nao)
+* [bilibili] Add BiliIntlIE and BiliIntlSeriesIE by [Ashish0804](https://github.com/Ashish0804)
+* [CAM4] Add extractor by [alerikaisattera](https://github.com/alerikaisattera)
+* [Chingari] Add extractors by [Ashish0804](https://github.com/Ashish0804)
+* [CGTN] Add extractor by [chao813](https://github.com/chao813)
+* [damtomo] Add extractor by [nao20010128nao](https://github.com/nao20010128nao)
+* [gotostage] Add extractor by [poschi3](https://github.com/poschi3)
+* [Koo] Add extractor by [Ashish0804](https://github.com/Ashish0804)
+* [Mediaite] Add Extractor by [Ashish0804](https://github.com/Ashish0804)
+* [Mediaklikk] Add Extractor by [tmarki](https://github.com/tmarki), [mrx23dot](https://github.com/mrx23dot), [coletdjnz](https://github.com/coletdjnz)
+* [MuseScore] Add Extractor by [Ashish0804](https://github.com/Ashish0804)
+* [Newgrounds] Add NewgroundsUserIE and improve extractor by [u-spec-png](https://github.com/u-spec-png)
+* [nzherald] Add NZHeraldIE by [coletdjnz](https://github.com/coletdjnz)
+* [Olympics] Add replay extractor by [Ashish0804](https://github.com/Ashish0804)
+* [Peertube] Add channel and playlist extractors by [u-spec-png](https://github.com/u-spec-png)
+* [radlive] Add extractor by [nyuszika7h](https://github.com/nyuszika7h)
+* [SovietsCloset] Add extractor by [ChillingPepper](https://github.com/ChillingPepper)
+* [Streamanity] Add Extractor by [alerikaisattera](https://github.com/alerikaisattera)
+* [Theta] Add extractor by [alerikaisattera](https://github.com/alerikaisattera)
+* [Yandex] Add ZenYandexIE and ZenYandexChannelIE by [Ashish0804](https://github.com/Ashish0804)
+* [9Now] handle episodes of series by [dalanmiller](https://github.com/dalanmiller)
+* [AnimalPlanet] Fix extractor by [Sipherdrakon](https://github.com/Sipherdrakon)
+* [Arte] Improve description extraction by [renalid](https://github.com/renalid)
+* [atv.at] Use jwt for API by [NeroBurner](https://github.com/NeroBurner)
+* [brightcove] Extract subtitles from manifests
+* [CBC] Fix CBC Gem extractors by [makeworld-the-better-one](https://github.com/makeworld-the-better-one)
+* [cbs] Report appropriate error for DRM
+* [comedycentral] Support `collection-playlist` by [nixxo](https://github.com/nixxo)
+* [DIYNetwork] Support new format by [Sipherdrakon](https://github.com/Sipherdrakon)
+* [downloader/niconico] Pass custom headers by [nao20010128nao](https://github.com/nao20010128nao)
+* [dw] Fix extractor
+* [Fancode] Fix live streams by [zenerdi0de](https://github.com/zenerdi0de)
+* [funimation] Fix for locations outside US by [Jules-A](https://github.com/Jules-A), [pukkandan](https://github.com/pukkandan)
+* [globo] Fix GloboIE by [Ashish0804](https://github.com/Ashish0804)
+* [HiDive] Fix extractor by [Ashish0804](https://github.com/Ashish0804)
+* [Hotstar] Add referer for subs by [Ashish0804](https://github.com/Ashish0804)
+* [itv] Fix extractor, add subtitles and thumbnails by [coletdjnz](https://github.com/coletdjnz), [sleaux-meaux](https://github.com/sleaux-meaux), [Vangelis66](https://github.com/Vangelis66)
+* [lbry] Show error message from API response
+* [Mxplayer] Use mobile API by [Ashish0804](https://github.com/Ashish0804)
+* [NDR] Rewrite NDRIE by [Ashish0804](https://github.com/Ashish0804)
+* [Nuvid] Fix extractor by [u-spec-png](https://github.com/u-spec-png)
+* [Oreilly] Handle new web url by [MKSherbini](https://github.com/MKSherbini)
+* [pbs] Fix subtitle extraction by [coletdjnz](https://github.com/coletdjnz), [gesa](https://github.com/gesa), [raphaeldore](https://github.com/raphaeldore)
+* [peertube] Update instances by [u-spec-png](https://github.com/u-spec-png)
+* [plutotv] Fix extractor for URLs with `/en`
+* [reddit] Workaround for 429 by redirecting to old.reddit.com
+* [redtube] Fix exts
+* [soundcloud] Make playlist extraction lazy
+* [soundcloud] Retry playlist pages on `502` error and update `_CLIENT_ID`
+* [southpark] Fix SouthParkDE by [coletdjnz](https://github.com/coletdjnz)
+* [SovietsCloset] Fix playlists for games with only named categories by [ConquerorDopy](https://github.com/ConquerorDopy)
+* [SpankBang] Fix uploader by [f4pp3rk1ng](https://github.com/f4pp3rk1ng), [coletdjnz](https://github.com/coletdjnz)
+* [tiktok] Use API to fetch higher quality video by [MinePlayersPE](https://github.com/MinePlayersPE), [llacb47](https://github.com/llacb47)
+* [TikTokUser] Fix extractor using mobile API by [MinePlayersPE](https://github.com/MinePlayersPE), [llacb47](https://github.com/llacb47)
+* [videa] Fix some extraction errors by [nyuszika7h](https://github.com/nyuszika7h)
+* [VrtNU] Handle login errors by [llacb47](https://github.com/llacb47)
+* [vrv] Don't raise error when thumbnails are missing
+* [youtube] Cleanup authentication code by [coletdjnz](https://github.com/coletdjnz)
+* [youtube] Fix `--mark-watched` with `--cookies-from-browser`
+* [youtube] Improvements to JS player extraction and add extractor-args to skip it by [coletdjnz](https://github.com/coletdjnz)
+* [youtube] Retry on 'Unknown Error' by [coletdjnz](https://github.com/coletdjnz)
+* [youtube] Return full URL instead of just ID
+* [youtube] Warn when trying to download clips
+* [zdf] Improve format sorting
+* [zype] Extract subtitles from the m3u8 manifest by [fstirlitz](https://github.com/fstirlitz)
+* Allow `--force-write-archive` to work with `--flat-playlist`
+* Download subtitles in order of `--sub-langs`
+* Allow `0` in `--playlist-items`
+* Handle more playlist errors with `-i`
+* Fix `--no-get-comments`
+* Fix `extra_info` being reused across runs
+* Fix compat options `no-direct-merge` and `playlist-index`
+* Dump files should obey `--trim-filename` by [sulyi](https://github.com/sulyi)
+* [aes] Add `aes_gcm_decrypt_and_verify` by [sulyi](https://github.com/sulyi), [pukkandan](https://github.com/pukkandan)
+* [aria2c] Fix IV for some AES-128 streams by [shirt](https://github.com/shirt-dev)
+* [compat] Don't ignore `HOME` (if set) on windows
+* [cookies] Make browser names case insensitive
+* [cookies] Print warning for cookie decoding error only once
+* [extractor] Fix root-relative URLs in MPD by [DigitalDJ](https://github.com/DigitalDJ)
+* [ffmpeg] Add `aac_adtstoasc` when merging if needed
+* [fragment,aria2c] Generalize and refactor some code
+* [fragment] Avoid repeated request for AES key
+* [fragment] Fix range header when using `-N` and media sequence by [shirt](https://github.com/shirt-dev)
+* [hls,aes] Fallback to native implementation for AES-CBC and detect `Cryptodome` in addition to `Crypto`
+* [hls] Byterange + AES128 is supported by native downloader
+* [ModifyChapters] Improve sponsor chapter merge algorithm by [nihil-admirari](https://github.com/nihil-admirari)
+* [ModifyChapters] Minor fixes
+* [WebVTT] Adjust parser to accommodate PBS subtitles
+* [utils] Improve `extract_timezone` by [dirkf](https://github.com/dirkf)
+* [options] Fix `--no-config` and refactor reading of config files
+* [options] Strip spaces and ignore empty entries in list-like switches
+* [test/cookies] Improve logging
+* [build] Automate more of the release process by [animelover1984](https://github.com/animelover1984), [pukkandan](https://github.com/pukkandan)
+* [build] Fix sha256 by [nihil-admirari](https://github.com/nihil-admirari)
+* [build] Bring back brew taps by [nao20010128nao](https://github.com/nao20010128nao)
+* [build] Provide `--onedir` zip for windows
+* [cleanup,docs] Add deprecation warning in docs for some counter intuitive behaviour
+* [cleanup] Fix line endings for `nebula.py` by [glenn-slayden](https://github.com/glenn-slayden)
+* [cleanup] Improve `make clean-test` by [sulyi](https://github.com/sulyi)
+* [cleanup] Misc
+
+
+### 2021.09.02
+
+* **Native SponsorBlock** implementation by [nihil-admirari](https://github.com/nihil-admirari), [pukkandan](https://github.com/pukkandan)
+ * `--sponsorblock-remove CATS` removes specified chapters from file
+ * `--sponsorblock-mark CATS` marks the specified sponsor sections as chapters
+ * `--sponsorblock-chapter-title TMPL` to specify sponsor chapter template
+ * `--sponsorblock-api URL` to use a different API
+ * No re-encoding is done unless `--force-keyframes-at-cuts` is used
+ * The fetched sponsor sections are written to the infojson
+ * Deprecates: `--sponskrub`, `--no-sponskrub`, `--sponskrub-cut`, `--no-sponskrub-cut`, `--sponskrub-force`, `--no-sponskrub-force`, `--sponskrub-location`, `--sponskrub-args`
+* Split `--embed-chapters` from `--embed-metadata` (it still implies the former by default)
+* Add option `--remove-chapters` to remove arbitrary chapters by [nihil-admirari](https://github.com/nihil-admirari), [pukkandan](https://github.com/pukkandan)
+* Add option `--force-keyframes-at-cuts` for more accurate cuts when removing and splitting chapters by [nihil-admirari](https://github.com/nihil-admirari)
+* Let `--match-filter` reject entries early
+ * Makes redundant: `--match-title`, `--reject-title`, `--min-views`, `--max-views`
+* [lazy_extractor] Improvements (It now passes all tests)
+ * Bugfix for when plugin directory doesn't exist by [kidonng](https://github.com/kidonng)
+ * Create instance only after pre-checking archive
+ * Import actual class if an attribute is accessed
+ * Fix `suitable` and add flake8 test
+* [downloader/ffmpeg] Experimental support for DASH manifests (including live)
+ * Your ffmpeg must have [this patch](https://github.com/FFmpeg/FFmpeg/commit/3249c757aed678780e22e99a1a49f4672851bca9) applied for YouTube DASH to work
+* [downloader/ffmpeg] Allow passing custom arguments before `-i`
+* [BannedVideo] Add extractor by [smege1001](https://github.com/smege1001), [blackjack4494](https://github.com/blackjack4494), [pukkandan](https://github.com/pukkandan)
+* [bilibili] Add category extractor by [animelover1984](https://github.com/animelover1984)
+* [Epicon] Add extractors by [Ashish0804](https://github.com/Ashish0804)
+* [filmmodu] Add extractor by [mzbaulhaque](https://github.com/mzbaulhaque)
+* [GabTV] Add extractor by [Ashish0804](https://github.com/Ashish0804)
+* [Hungama] Fix `HungamaSongIE` and add `HungamaAlbumPlaylistIE` by [Ashish0804](https://github.com/Ashish0804)
+* [ManotoTV] Add new extractors by [tandy1000](https://github.com/tandy1000)
+* [Niconico] Add Search extractors by [animelover1984](https://github.com/animelover1984), [pukkandan](https://github.com/pukkandan)
+* [Patreon] Add `PatreonUserIE` by [zenerdi0de](https://github.com/zenerdi0de)
+* [peloton] Add extractor by [IONECarter](https://github.com/IONECarter), [capntrips](https://github.com/capntrips), [pukkandan](https://github.com/pukkandan)
+* [ProjectVeritas] Add extractor by [Ashish0804](https://github.com/Ashish0804)
+* [radiko] Add extractors by [nao20010128nao](https://github.com/nao20010128nao)
+* [StarTV] Add extractor for `startv.com.tr` by [mrfade](https://github.com/mrfade), [coletdjnz](https://github.com/coletdjnz)
+* [tiktok] Add `TikTokUserIE` by [Ashish0804](https://github.com/Ashish0804), [pukkandan](https://github.com/pukkandan)
+* [Tokentube] Add extractor by [u-spec-png](https://github.com/u-spec-png)
+* [TV2Hu] Fix `TV2HuIE` and add `TV2HuSeriesIE` by [Ashish0804](https://github.com/Ashish0804)
+* [voicy] Add extractor by [nao20010128nao](https://github.com/nao20010128nao)
+* [adobepass] Fix Verizon SAML login by [nyuszika7h](https://github.com/nyuszika7h), [ParadoxGBB](https://github.com/ParadoxGBB)
+* [afreecatv] Fix adult VODs by [wlritchi](https://github.com/wlritchi)
+* [afreecatv] Tolerate failure to parse date string by [wlritchi](https://github.com/wlritchi)
+* [aljazeera] Fix extractor by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [ATV.at] Fix extractor for ATV.at by [NeroBurner](https://github.com/NeroBurner), [coletdjnz](https://github.com/coletdjnz)
+* [bitchute] Fix test by [mahanstreamer](https://github.com/mahanstreamer)
+* [camtube] Remove obsolete extractor by [alerikaisattera](https://github.com/alerikaisattera)
+* [CDA] Add more formats by [u-spec-png](https://github.com/u-spec-png)
+* [eroprofile] Fix page skipping in albums by [jhwgh1968](https://github.com/jhwgh1968)
+* [facebook] Fix format sorting
+* [facebook] Fix metadata extraction by [kikuyan](https://github.com/kikuyan)
+* [facebook] Update onion URL by [Derkades](https://github.com/Derkades)
+* [HearThisAtIE] Fix extractor by [Ashish0804](https://github.com/Ashish0804)
+* [instagram] Add referrer to prevent throttling by [u-spec-png](https://github.com/u-spec-png), [kikuyan](https://github.com/kikuyan)
+* [iwara.tv] Extract more metadata by [BunnyHelp](https://github.com/BunnyHelp)
+* [iwara] Add thumbnail by [i6t](https://github.com/i6t)
+* [kakao] Fix extractor
+* [mediaset] Fix extraction for some videos by [nyuszika7h](https://github.com/nyuszika7h)
+* [Motherless] Fix extractor by [coletdjnz](https://github.com/coletdjnz)
+* [Nova] fix extractor by [std-move](https://github.com/std-move)
+* [ParamountPlus] Fix geo verification by [shirt](https://github.com/shirt-dev)
+* [peertube] handle new video URL format by [Chocobozzz](https://github.com/Chocobozzz)
+* [pornhub] Separate and fix playlist extractor by [mzbaulhaque](https://github.com/mzbaulhaque)
+* [reddit] Fix for quarantined subreddits by [ouwou](https://github.com/ouwou)
+* [ShemarooMe] Fix extractor by [Ashish0804](https://github.com/Ashish0804)
+* [soundcloud] Refetch `client_id` on 403
+* [tiktok] Fix metadata extraction
+* [TV2] Fix extractor by [Ashish0804](https://github.com/Ashish0804)
+* [tv5mondeplus] Fix extractor by [korli](https://github.com/korli)
+* [VH1,TVLand] Fix extractors by [Sipherdrakon](https://github.com/Sipherdrakon)
+* [Viafree] Fix extractor and extract subtitles by [coletdjnz](https://github.com/coletdjnz)
+* [XHamster] Extract `uploader_id` by [octotherp](https://github.com/octotherp)
+* [youtube] Add `shorts` to `_VALID_URL`
+* [youtube] Add av01 itags to known formats list by [blackjack4494](https://github.com/blackjack4494)
+* [youtube] Extract error messages from HTTPError response by [coletdjnz](https://github.com/coletdjnz)
+* [youtube] Fix subtitle names
+* [youtube] Prefer audio stream that YouTube considers default
+* [youtube] Remove annotations and deprecate `--write-annotations` by [coletdjnz](https://github.com/coletdjnz)
+* [Zee5] Fix extractor and add subtitles by [Ashish0804](https://github.com/Ashish0804)
+* [aria2c] Obey `--rate-limit`
+* [EmbedSubtitle] Continue even if some files are missing
+* [extractor] Better error message for DRM
+* [extractor] Common function `_match_valid_url`
+* [extractor] Show video id in error messages if possible
+* [FormatSort] Remove priority of `lang`
+* [options] Add `_set_from_options_callback`
+* [SubtitleConvertor] Fix bug during subtitle conversion
+* [utils] Add `parse_qs`
+* [webvtt] Fix timestamp overflow adjustment by [fstirlitz](https://github.com/fstirlitz)
+* Bugfix for `--replace-in-metadata`
+* Don't try to merge with final extension
+* Fix `--force-overwrites` when using `-k`
+* Fix `--no-prefer-free-formats` by [CeruleanSky](https://github.com/CeruleanSky)
+* Fix `-F` for extractors that directly return url
+* Fix `-J` when there are failed videos
+* Fix `extra_info` being reused across runs
+* Fix `playlist_index` not obeying `playlist_start` and add tests
+* Fix resuming of single formats when using `--no-part`
+* Revert erroneous use of the `Content-Length` header by [fstirlitz](https://github.com/fstirlitz)
+* Use `os.replace` where applicable by; paulwrubel
+* [build] Add homebrew taps `yt-dlp/taps/yt-dlp` by [nao20010128nao](https://github.com/nao20010128nao)
+* [build] Fix bug in making `yt-dlp.tar.gz`
+* [docs] Fix some typos by [pukkandan](https://github.com/pukkandan), [zootedb0t](https://github.com/zootedb0t)
+* [cleanup] Replace improper use of tab in trovo by [glenn-slayden](https://github.com/glenn-slayden)
+
+
+### 2021.08.10
+
+* Add option `--replace-in-metadata`
+* Add option `--no-simulate` to not simulate even when `--print` or `--list...` are used - Deprecates `--print-json`
+* Allow entire infodict to be printed using `%()s` - makes `--dump-json` redundant
+* Allow multiple `--exec` and `--exec-before-download`
+* Add regex to `--match-filter`
+* Add all format filtering operators also to `--match-filter` by [max-te](https://github.com/max-te)
+* Add compat-option `no-keep-subs`
+* [adobepass] Add MSO Cablevision by [Jessecar96](https://github.com/Jessecar96)
+* [BandCamp] Add BandcampMusicIE by [Ashish0804](https://github.com/Ashish0804)
+* [blackboardcollaborate] Add new extractor by [mzbaulhaque](https://github.com/mzbaulhaque)
+* [eroprofile] Add album downloader by [jhwgh1968](https://github.com/jhwgh1968)
+* [mirrativ] Add extractors by [nao20010128nao](https://github.com/nao20010128nao)
+* [openrec] Add extractors by [nao20010128nao](https://github.com/nao20010128nao)
+* [nbcolympics:stream] Fix extractor by [nchilada](https://github.com/nchilada), [pukkandan](https://github.com/pukkandan)
+* [nbcolympics] Update extractor for 2020 olympics by [wesnm](https://github.com/wesnm)
+* [paramountplus] Separate extractor and fix some titles by [shirt](https://github.com/shirt-dev), [pukkandan](https://github.com/pukkandan)
+* [RCTIPlus] Support events and TV by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [Newgrounds] Improve extractor and fix playlist by [u-spec-png](https://github.com/u-spec-png)
+* [aenetworks] Update `_THEPLATFORM_KEY` and `_THEPLATFORM_SECRET` by [wesnm](https://github.com/wesnm)
+* [crunchyroll] Fix thumbnail by [funniray](https://github.com/funniray)
+* [HotStar] Use API for metadata and extract subtitles by [Ashish0804](https://github.com/Ashish0804)
+* [instagram] Fix comments extraction by [u-spec-png](https://github.com/u-spec-png)
+* [peertube] Fix videos without description by [u-spec-png](https://github.com/u-spec-png)
+* [twitch:clips] Extract `display_id` by [dirkf](https://github.com/dirkf)
+* [viki] Print error message from API request
+* [Vine] Remove invalid formats by [u-spec-png](https://github.com/u-spec-png)
+* [VrtNU] Fix XSRF token by [pgaig](https://github.com/pgaig)
+* [vrv] Fix thumbnail extraction by [funniray](https://github.com/funniray)
+* [youtube] Add extractor-arg `include-live-dash` to show live dash formats
+* [youtube] Improve signature function detection by [PSlava](https://github.com/PSlava)
+* [youtube] Raise appropriate error when API pages can't be downloaded
+* Ensure `_write_ytdl_file` closes file handle on error
+* Fix `--compat-options filename` by [stdedos](https://github.com/stdedos)
+* Fix issues with infodict sanitization
+* Fix resuming when using `--no-part`
+* Fix wrong extension for intermediate files
+* Handle `BrokenPipeError` by [kikuyan](https://github.com/kikuyan)
+* Show libraries present in verbose head
+* [extractor] Detect `sttp` as subtitles in MPD by [fstirlitz](https://github.com/fstirlitz)
+* [extractor] Reset non-repeating warnings per video
+* [ffmpeg] Fix streaming `mp4` to `stdout`
+* [ffpmeg] Allow `--ffmpeg-location` to be a file with different name
+* [utils] Fix `InAdvancePagedList.__getitem__`
+* [utils] Fix `traverse_obj` depth when `is_user_input`
+* [webvtt] Merge daisy-chained duplicate cues by [fstirlitz](https://github.com/fstirlitz)
+* [build] Use custom build of `pyinstaller` by [shirt](https://github.com/shirt-dev)
+* [tests:download] Add batch testing for extractors (`test_YourExtractor_all`)
+* [docs] Document which fields `--add-metadata` adds to the file
+* [docs] Fix some mistakes and improve doc
+* [cleanup] Misc code cleanup
+
+
+### 2021.08.02
+
+* Add logo, banner and donate links
+* [outtmpl] Expand and escape environment variables
+* [outtmpl] Add format types `j` (json), `l` (comma delimited list), `q` (quoted for terminal)
+* [downloader] Allow streaming some unmerged formats to stdout using ffmpeg
+* [youtube] **Age-gate bypass**
+ * Add `agegate` clients by [pukkandan](https://github.com/pukkandan), [MinePlayersPE](https://github.com/MinePlayersPE)
+ * Add `thirdParty` to agegate clients to bypass more videos
+ * Simplify client definitions, expose `embedded` clients
+ * Improve age-gate detection by [coletdjnz](https://github.com/coletdjnz)
+ * Fix default global API key by [coletdjnz](https://github.com/coletdjnz)
+ * Add `creator` clients for age-gate bypass using unverified accounts by [zerodytrash](https://github.com/zerodytrash), [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
+* [adobepass] Add MSO Sling TV by [wesnm](https://github.com/wesnm)
+* [CBS] Add ParamountPlusSeriesIE by [Ashish0804](https://github.com/Ashish0804)
+* [dplay] Add `ScienceChannelIE` by [Sipherdrakon](https://github.com/Sipherdrakon)
+* [UtreonIE] Add extractor by [Ashish0804](https://github.com/Ashish0804)
+* [youtube] Add `mweb` client by [coletdjnz](https://github.com/coletdjnz)
+* [youtube] Add `player_client=all`
+* [youtube] Force `hl=en` for comments by [coletdjnz](https://github.com/coletdjnz)
+* [youtube] Fix format sorting when using alternate clients
+* [youtube] Misc cleanup by [pukkandan](https://github.com/pukkandan), [coletdjnz](https://github.com/coletdjnz)
+* [youtube] Extract SAPISID only once
+* [CBS] Add fallback by [llacb47](https://github.com/llacb47), [pukkandan](https://github.com/pukkandan)
+* [Hotstar] Support cookies by [Ashish0804](https://github.com/Ashish0804)
+* [HotStarSeriesIE] Fix regex by [Ashish0804](https://github.com/Ashish0804)
+* [bilibili] Improve `_VALID_URL`
+* [mediaset] Fix extraction by [nixxo](https://github.com/nixxo)
+* [Mxplayer] Add h265 formats by [Ashish0804](https://github.com/Ashish0804)
+* [RCTIPlus] Remove PhantomJS dependency by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [tenplay] Add MA15+ age limit by [pento](https://github.com/pento)
+* [vidio] Fix login error detection by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [vimeo] Better extraction of original file by [Ashish0804](https://github.com/Ashish0804)
+* [generic] Support KVS player (replaces ThisVidIE) by [rigstot](https://github.com/rigstot)
+* Add compat-option `no-clean-infojson`
+* Remove `asr` appearing twice in `-F`
+* Set `home:` as the default key for `-P`
+* [utils] Fix slicing of reversed `LazyList`
+* [FormatSort] Fix bug for audio with unknown codec
+* [test:download] Support testing with `ignore_no_formats_error`
+* [cleanup] Refactor some code
+
+
+### 2021.07.24
+
+* [youtube:tab] Extract video duration early
+* [downloader] Pass `info_dict` to `progress_hook`s
+* [youtube] Fix age-gated videos for API clients when cookies are supplied by [coletdjnz](https://github.com/coletdjnz)
+* [youtube] Disable `get_video_info` age-gate workaround - This endpoint seems to be completely dead
+* [youtube] Try all clients even if age-gated
+* [youtube] Fix subtitles only being extracted from the first client
+* [youtube] Simplify `_get_text`
+* [cookies] bugfix for microsoft edge on macOS
+* [cookies] Handle `sqlite` `ImportError` gracefully by [mbway](https://github.com/mbway)
+* [cookies] Handle errors when importing `keyring`
+
+### 2021.07.21
+
+* **Add option `--cookies-from-browser`** to load cookies from a browser by [mbway](https://github.com/mbway)
+ * Usage: `--cookies-from-browser BROWSER[:PROFILE_NAME_OR_PATH]`
+ * Also added `--no-cookies-from-browser`
+ * To decrypt chromium cookies, `keyring` is needed for UNIX and `pycryptodome` for Windows
+* Add option `--exec-before-download`
+* Add field `live_status`
+* [FFmpegMetadata] Add language of each stream and some refactoring
+* [douyin] Add extractor by [pukkandan](https://github.com/pukkandan), [pyx](https://github.com/pyx)
+* [pornflip] Add extractor by [mzbaulhaque](https://github.com/mzbaulhaque)
+* **[youtube] Extract data from multiple clients** by [pukkandan](https://github.com/pukkandan), [coletdjnz](https://github.com/coletdjnz)
+ * `player_client` now accepts multiple clients
+ * Default `player_client` = `android,web`
+ * This uses twice as many requests, but avoids throttling for most videos while also not losing any formats
+ * Music clients can be specifically requested and is enabled by default if `music.youtube.com`
+ * Added `player_client=ios` (Known issue: formats from ios are not sorted correctly)
+ * Add age-gate bypass for android and ios clients
+* [youtube] Extract more thumbnails
+ * The thumbnail URLs are hard-coded and their actual existence is tested lazily
+ * Added option `--no-check-formats` to not test them
+* [youtube] Misc fixes
+ * Improve extraction of livestream metadata by [pukkandan](https://github.com/pukkandan), [krichbanana](https://github.com/krichbanana)
+ * Hide live dash formats since they can't be downloaded anyway
+ * Fix authentication when using multiple accounts by [coletdjnz](https://github.com/coletdjnz)
+ * Fix controversial videos when requested via API by [coletdjnz](https://github.com/coletdjnz)
+ * Fix session index extraction and headers for non-web player clients by [coletdjnz](https://github.com/coletdjnz)
+ * Make `--extractor-retries` work for more errors
+ * Fix sorting of 3gp format
+ * Sanity check `chapters` (and refactor related code)
+ * Make `parse_time_text` and `_extract_chapters` non-fatal
+ * Misc cleanup and bug fixes by [coletdjnz](https://github.com/coletdjnz)
+* [youtube:tab] Fix channels tab
+* [youtube:tab] Extract playlist availability by [coletdjnz](https://github.com/coletdjnz)
+* **[youtube:comments] Move comment extraction to new API** by [coletdjnz](https://github.com/coletdjnz)
+ * Adds extractor-args `comment_sort` (`top`/`new`), `max_comments`, `max_comment_depth`
+* [youtube:comments] Fix `is_favorited`, improve `like_count` parsing by [coletdjnz](https://github.com/coletdjnz)
+* [BravoTV] Improve metadata extraction by [kevinoconnor7](https://github.com/kevinoconnor7)
+* [crunchyroll:playlist] Force http
+* [yahoo:gyao:player] Relax `_VALID_URL` by [nao20010128nao](https://github.com/nao20010128nao)
+* [nebula] Authentication via tokens from cookie jar by [hheimbuerger](https://github.com/hheimbuerger), [TpmKranz](https://github.com/TpmKranz)
+* [RTP] Fix extraction and add subtitles by [fstirlitz](https://github.com/fstirlitz)
+* [viki] Rewrite extractors and add extractor-arg `video_types` to `vikichannel` by [zackmark29](https://github.com/zackmark29), [pukkandan](https://github.com/pukkandan)
+* [vlive] Extract thumbnail directly in addition to the one from Naver
+* [generic] Extract previously missed subtitles by [fstirlitz](https://github.com/fstirlitz)
+* [generic] Extract everything in the SMIL manifest and detect discarded subtitles by [fstirlitz](https://github.com/fstirlitz)
+* [embedthumbnail] Fix `_get_thumbnail_resolution`
+* [metadatafromfield] Do not detect numbers as field names
+* Fix selectors `all`, `mergeall` and add tests
+* Errors in playlist extraction should obey `--ignore-errors`
+* Fix bug where `original_url` was not propagated when `_type`=`url`
+* Revert "Merge webm formats into mkv if thumbnails are to be embedded (#173)"
+ * This was wrongly checking for `write_thumbnail`
+* Improve `extractor_args` parsing
+* Rename `NOTE` in `-F` to `MORE INFO` since it's often confused to be the same as `format_note`
+* Add `only_once` param for `write_debug` and `report_warning`
+* [extractor] Allow extracting multiple groups in `_search_regex` by [fstirlitz](https://github.com/fstirlitz)
+* [utils] Improve `traverse_obj`
+* [utils] Add `variadic`
+* [utils] Improve `js_to_json` comment regex by [fstirlitz](https://github.com/fstirlitz)
+* [webtt] Fix timestamps
+* [compat] Remove unnecessary code
+* [docs] fix default of multistreams
+
+
+### 2021.07.07
+
+* Merge youtube-dl: Upto [commit/a803582](https://github.com/ytdl-org/youtube-dl/commit/a8035827177d6b59aca03bd717acb6a9bdd75ada)
+* Add `--extractor-args` to pass some extractor-specific arguments. See [readme](https://github.com/yt-dlp/yt-dlp#extractor-arguments)
+ * Add extractor option `skip` for `youtube`, e.g. `--extractor-args youtube:skip=hls,dash`
+ * Deprecates `--youtube-skip-dash-manifest`, `--youtube-skip-hls-manifest`, `--youtube-include-dash-manifest`, `--youtube-include-hls-manifest`
+* Allow `--list...` options to work with `--print`, `--quiet` and other `--list...` options
+* [youtube] Use `player` API for additional video extraction requests by [coletdjnz](https://github.com/coletdjnz)
+ * **Fixes youtube premium music** (format 141) extraction
+ * Adds extractor option `player_client` = `web`/`android`
+ * **`--extractor-args youtube:player_client=android` works around the throttling** for the time-being
+ * Adds extractor option `player_skip=config`
+ * Adds age-gate fallback using embedded client
+* [youtube] Choose correct Live chat API for upcoming streams by [krichbanana](https://github.com/krichbanana)
+* [youtube] Fix subtitle names for age-gated videos
+* [youtube:comments] Fix error handling and add `itct` to params by [coletdjnz](https://github.com/coletdjnz)
+* [youtube_live_chat] Fix download with cookies by [siikamiika](https://github.com/siikamiika)
+* [youtube_live_chat] use `clickTrackingParams` by [siikamiika](https://github.com/siikamiika)
+* [Funimation] Rewrite extractor
+ * Add `FunimationShowIE` by [Mevious](https://github.com/Mevious)
+ * **Treat the different versions of an episode as different formats of a single video**
+ * This changes the video `id` and will break break existing archives
+ * Compat option `seperate-video-versions` to fall back to old behavior including using the old video ids
+ * Support direct `/player/` URL
+ * Extractor options `language` and `version` to pre-select them during extraction
+ * These options may be removed in the future if we can extract all formats without additional network requests
+ * Do not rely on these for format selection and use `-f` filters instead
+* [AdobePass] Add Spectrum MSO by [kevinoconnor7](https://github.com/kevinoconnor7), [ohmybahgosh](https://github.com/ohmybahgosh)
+* [facebook] Extract description and fix title
+* [fancode] Fix extraction, support live and allow login with refresh token by [zenerdi0de](https://github.com/zenerdi0de)
+* [plutotv] Improve `_VALID_URL`
+* [RCTIPlus] Add extractor by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [Soundcloud] Allow login using oauth token by [blackjack4494](https://github.com/blackjack4494)
+* [TBS] Support livestreams by [llacb47](https://github.com/llacb47)
+* [videa] Fix extraction by [nyuszika7h](https://github.com/nyuszika7h)
+* [yahoo] Fix extraction by [llacb47](https://github.com/llacb47), [pukkandan](https://github.com/pukkandan)
+* Process videos when using `--ignore-no-formats-error` by [krichbanana](https://github.com/krichbanana)
+* Fix `--throttled-rate` when using `--load-info-json`
+* Fix `--flat-playlist` when entry has no `ie_key`
+* Fix `check_formats` catching `ExtractorError` instead of `DownloadError`
+* Fix deprecated option `--list-formats-old`
+* [downloader/ffmpeg] Fix `--ppa` when using simultaneous download
+* [extractor] Prevent unnecessary download of hls manifests and refactor `hls_split_discontinuity`
+* [fragment] Handle status of download and errors in threads correctly; and minor refactoring
+* [thumbnailsconvertor] Treat `jpeg` as `jpg`
+* [utils] Fix issues with `LazyList` reversal
+* [extractor] Allow extractors to set their own login hint
+* [cleanup] Simplify format selector code with `LazyList` and `yield from`
+* [cleanup] Clean `extractor.common._merge_subtitles` signature
+* [cleanup] Fix some typos
+
+
+### 2021.06.23
+
+* Merge youtube-dl: Upto [commit/379f52a](https://github.com/ytdl-org/youtube-dl/commit/379f52a4954013767219d25099cce9e0f9401961)
+* **Add option `--throttled-rate`** below which video data is re-extracted
+* [fragment] **Merge during download for `-N`**, and refactor `hls`/`dash`
+* [websockets] Add `WebSocketFragmentFD` by [nao20010128nao](https://github.com/nao20010128nao), [pukkandan](https://github.com/pukkandan)
+* Allow `images` formats in addition to video/audio
+* [downloader/mhtml] Add new downloader for slideshows/storyboards by [fstirlitz](https://github.com/fstirlitz)
+* [youtube] Temporary **fix for age-gate**
+* [youtube] Support ongoing live chat by [siikamiika](https://github.com/siikamiika)
+* [youtube] Improve SAPISID cookie handling by [coletdjnz](https://github.com/coletdjnz)
+* [youtube] Login is not needed for `:ytrec`
+* [youtube] Non-fatal alert reporting for unavailable videos page by [coletdjnz](https://github.com/coletdjnz)
+* [twitcasting] Websocket support by [nao20010128nao](https://github.com/nao20010128nao)
+* [mediasite] Extract slides by [fstirlitz](https://github.com/fstirlitz)
+* [funimation] Extract subtitles
+* [pornhub] Extract `cast`
+* [hotstar] Use server time for authentication instead of local time
+* [EmbedThumbnail] Fix for already downloaded thumbnail
+* [EmbedThumbnail] Add compat-option `embed-thumbnail-atomicparsley`
+* Expand `--check-formats` to thumbnails
+* Fix id sanitization in filenames
+* Skip fixup of existing files and add `--fixup force` to force it
+* Better error handling of syntax errors in `-f`
+* Use `NamedTemporaryFile` for `--check-formats`
+* [aria2c] Lower `--min-split-size` for HTTP downloads
+* [options] Rename `--add-metadata` to `--embed-metadata`
+* [utils] Improve `LazyList` and add tests
+* [build] Build Windows x86 version with py3.7 and remove redundant tests by [pukkandan](https://github.com/pukkandan), [shirt](https://github.com/shirt-dev)
+* [docs] Clarify that `--embed-metadata` embeds chapter markers
+* [cleanup] Refactor fixup
+
+
+### 2021.06.09
+
+* Fix bug where `%(field)d` in filename template throws error
+* [outtmpl] Improve offset parsing
+* [test] More rigorous tests for `prepare_filename`
+
+### 2021.06.08
+
+* Remove support for obsolete Python versions: Only 3.6+ is now supported
+* Merge youtube-dl: Upto [commit/c2350ca](https://github.com/ytdl-org/youtube-dl/commit/c2350cac243ba1ec1586fe85b0d62d1b700047a2)
+* [hls] Fix decryption for multithreaded downloader
+* [extractor] Fix pre-checking archive for some extractors
+* [extractor] Fix FourCC fallback when parsing ISM by [fstirlitz](https://github.com/fstirlitz)
+* [twitcasting] Add TwitCastingUserIE, TwitCastingLiveIE by [pukkandan](https://github.com/pukkandan), [nao20010128nao](https://github.com/nao20010128nao)
+* [vidio] Add VidioPremierIE and VidioLiveIE by [MinePlayersPE](Https://github.com/MinePlayersPE)
+* [viki] Fix extraction from [ytdl-org/youtube-dl@59e583f](https://github.com/ytdl-org/youtube-dl/commit/59e583f7e8530ca92776c866897d895c072e2a82)
+* [youtube] Support shorts URL
+* [zoom] Extract transcripts as subtitles
+* Add field `original_url` with the user-inputted URL
+* Fix and refactor `prepare_outtmpl`
+* Make more fields available for `--print` when used with `--flat-playlist`
+* [utils] Generalize `traverse_dict` to `traverse_obj`
+* [downloader/ffmpeg] Hide FFmpeg banner unless in verbose mode by [fstirlitz](https://github.com/fstirlitz)
+* [build] Release `yt-dlp.tar.gz`
+* [build,update] Add GNU-style SHA512 and prepare updater for similar SHA256 by [nihil-admirari](https://github.com/nihil-admirari)
+* [pyinst] Show Python version in exe metadata by [nihil-admirari](https://github.com/nihil-admirari)
+* [docs] Improve documentation of dependencies
+* [cleanup] Mark unused files
+* [cleanup] Point all shebang to `python3` by [fstirlitz](https://github.com/fstirlitz)
+* [cleanup] Remove duplicate file `trovolive.py`
+
+
+### 2021.06.01
+
+* Merge youtube-dl: Upto [commit/d495292](https://github.com/ytdl-org/youtube-dl/commit/d495292852b6c2f1bd58bc2141ff2b0265c952cf)
+* Pre-check archive and filters during playlist extraction
+* Handle Basic Auth `user:pass` in URLs by [hhirtz](https://github.com/hhirtz) and [pukkandan](https://github.com/pukkandan)
+* [archiveorg] Add YoutubeWebArchiveIE by [coletdjnz](https://github.com/coletdjnz) and [alex-gedeon](https://github.com/alex-gedeon)
+* [fancode] Add extractor by [rhsmachine](https://github.com/rhsmachine)
+* [patreon] Support vimeo embeds by [rhsmachine](https://github.com/rhsmachine)
+* [Saitosan] Add new extractor by [llacb47](https://github.com/llacb47)
+* [ShemarooMe] Add extractor by [Ashish0804](https://github.com/Ashish0804) and [pukkandan](https://github.com/pukkandan)
+* [telemundo] Add extractor by [king-millez](https://github.com/king-millez)
+* [SonyLIV] Add SonyLIVSeriesIE and subtitle support by [Ashish0804](https://github.com/Ashish0804)
+* [Hotstar] Add HotStarSeriesIE by [Ashish0804](https://github.com/Ashish0804)
+* [Voot] Add VootSeriesIE by [Ashish0804](https://github.com/Ashish0804)
+* [vidio] Support login and premium videos by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [fragment] When using `-N`, do not keep the fragment content in memory
+* [ffmpeg] Download and merge in a single step if possible
+* [ThumbnailsConvertor] Support conversion to `png` and make it the default by [louie-github](https://github.com/louie-github)
+* [VideoConvertor] Generalize with remuxer and allow conditional recoding
+* [EmbedThumbnail] Embed in `mp4`/`m4a` using mutagen by [tripulse](https://github.com/tripulse) and [pukkandan](https://github.com/pukkandan)
+* [EmbedThumbnail] Embed if any thumbnail was downloaded, not just the best
+* [EmbedThumbnail] Correctly escape filename
+* [update] replace self without launching a subprocess in windows
+* [update] Block further update for unsupported systems
+* Refactor `__process_playlist` by creating `LazyList`
+* Write messages to `stderr` when both `quiet` and `verbose`
+* Sanitize and sort playlist thumbnails
+* Remove `None` values from `info.json`
+* [extractor] Always prefer native hls downloader by default
+* [extractor] Skip subtitles without URI in m3u8 manifests by [hheimbuerger](https://github.com/hheimbuerger)
+* [extractor] Functions to parse `socket.io` response as `json` by [pukkandan](https://github.com/pukkandan) and [llacb47](https://github.com/llacb47)
+* [extractor] Allow `note=False` when extracting manifests
+* [utils] Escape URLs in `sanitized_Request`, not `sanitize_url`
+* [hls] Disable external downloader for `webtt`
+* [youtube] `/live` URLs should raise error if channel is not live
+* [youtube] Bug fixes
+* [zee5] Fix m3u8 formats' extension
+* [ard] Allow URLs without `-` before id by [olifre](https://github.com/olifre)
+* [cleanup] `YoutubeDL._match_entry`
+* [cleanup] Refactor updater
+* [cleanup] Refactor ffmpeg convertors
+* [cleanup] setup.py
+
+
+### 2021.05.20
+
+* **Youtube improvements**:
+ * Support youtube music `MP`, `VL` and `browse` pages
+ * Extract more formats for youtube music by [craftingmod](https://github.com/craftingmod), [coletdjnz](https://github.com/coletdjnz) and [pukkandan](https://github.com/pukkandan)
+ * Extract multiple subtitles in same language by [pukkandan](https://github.com/pukkandan) and [tpikonen](https://github.com/tpikonen)
+ * Redirect channels that doesn't have a `videos` tab to their `UU` playlists
+ * Support in-channel search
+ * Sort audio-only formats correctly
+ * Always extract `maxresdefault` thumbnail
+ * Extract audio language
+ * Add subtitle language names by [nixxo](https://github.com/nixxo) and [tpikonen](https://github.com/tpikonen)
+ * Show alerts only from the final webpage
+ * Add `html5=1` param to `get_video_info` page requests by [coletdjnz](https://github.com/coletdjnz)
+ * Better message when login required
+* **Add option `--print`**: to print any field/template
+ * Makes redundant: `--get-description`, `--get-duration`, `--get-filename`, `--get-format`, `--get-id`, `--get-thumbnail`, `--get-title`, `--get-url`
+* Field `additional_urls` to download additional videos from metadata using [`--parse-metadata`](https://github.com/yt-dlp/yt-dlp#modifying-metadata)
+* Merge youtube-dl: Upto [commit/dfbbe29](https://github.com/ytdl-org/youtube-dl/commit/dfbbe2902fc67f0f93ee47a8077c148055c67a9b)
+* Write thumbnail of playlist and add `pl_thumbnail` outtmpl key
+* [embedthumbnail] Add `flac` support and refactor `mutagen` code by [pukkandan](https://github.com/pukkandan) and [tripulse](https://github.com/tripulse)
+* [audius:artist] Add extractor by [king-millez](https://github.com/king-millez)
+* [parlview] Add extractor by [king-millez](https://github.com/king-millez)
+* [tenplay] Fix extractor by [king-millez](https://github.com/king-millez)
+* [rmcdecouverte] Generalize `_VALID_URL`
+* Add compat-option `no-attach-infojson`
+* Add field `name` for subtitles
+* Ensure `post_extract` and `pre_process` only run once
+* Fix `--check-formats` when there is network error
+* Standardize `write_debug` and `get_param`
+* [options] Alias `--write-comments`, `--no-write-comments`
+* [options] Refactor callbacks
+* [test:download] Only extract enough videos for `playlist_mincount`
+* [extractor] bugfix for when `compat_opts` is not given
+* [build] Fix x86 build by [shirt](https://github.com/shirt-dev)
+* [cleanup] code formatting, youtube tests and readme
+
+### 2021.05.11
+* **Deprecate support for Python versions < 3.6**
+* **Subtitle extraction from manifests** by [fstirlitz](https://github.com/fstirlitz). See [be6202f](https://github.com/yt-dlp/yt-dlp/commit/be6202f12b97858b9d716e608394b51065d0419f) for details
+* **Improve output template:**
+ * Allow slicing lists/strings using `field.start:end:step`
+ * A field can also be used as offset like `field1+num+field2`
+ * A default value can be given using `field|default`
+ * Prevent invalid fields from causing errors
+* **Merge youtube-dl**: Upto [commit/a726009](https://github.com/ytdl-org/youtube-dl/commit/a7260099873acc6dc7d76cafad2f6b139087afd0)
+* **Remove options** `-l`, `-t`, `-A` completely and disable `--auto-number`, `--title`, `--literal`, `--id`
+* [Plugins] Prioritize plugins over standard extractors and prevent plugins from overwriting the standard extractor classes
+* [downloader] Fix `quiet` and `to_stderr`
+* [fragment] Ensure the file is closed on error
+* [fragment] Make sure first segment is not skipped
+* [aria2c] Fix whitespace being stripped off
+* [embedthumbnail] Fix bug where jpeg thumbnails were converted again
+* [FormatSort] Fix for when some formats have quality and others don't
+* [utils] Add `network_exceptions`
+* [utils] Escape URL while sanitizing
+* [ukcolumn] Add Extractor
+* [whowatch] Add extractor by [nao20010128nao](https://github.com/nao20010128nao)
+* [CBS] Improve `_VALID_URL` to support movies
+* [crackle] Improve extraction
+* [curiositystream] Fix collections
+* [francetvinfo] Improve video id extraction
+* [generic] Respect the encoding in manifest
+* [limelight] Obey `allow_unplayable_formats`
+* [mediasite] Generalize URL pattern by [fstirlitz](https://github.com/fstirlitz)
+* [mxplayer] Add MxplayerShowIE by [Ashish0804](https://github.com/Ashish0804)
+* [nebula] Move to nebula.app by [Lamieur](https://github.com/Lamieur)
+* [niconico] Fix HLS formats by [CXwudi](https://github.com/CXwudi), [tsukumijima](https://github.com/tsukumijima), [nao20010128nao](https://github.com/nao20010128nao) and [pukkandan](https://github.com/pukkandan)
+* [niconico] Fix title and thumbnail extraction by [CXwudi](https://github.com/CXwudi)
+* [plutotv] Extract subtitles from manifests
+* [plutotv] Fix format extraction for some urls
+* [rmcdecouverte] Improve `_VALID_URL`
+* [sonyliv] Fix `title` and `series` extraction by [Ashish0804](https://github.com/Ashish0804)
+* [tubi] Raise "no video formats" error when video url is empty
+* [youtube:tab] Detect playlists inside community posts
+* [youtube] Add `oembed` to reserved names
+* [zee5] Fix extraction for some URLs by [Hadi0609](https://github.com/Hadi0609)
+* [zee5] Fix py2 compatibility
+* Fix `playlist_index` and add `playlist_autonumber`. See [#302](https://github.com/yt-dlp/yt-dlp/issues/302) for details
+* Add experimental option `--check-formats` to test the URLs before format selection
+* Option `--compat-options` to revert [some of yt-dlp's changes](https://github.com/yt-dlp/yt-dlp#differences-in-default-behavior)
+ * Deprecates `--list-formats-as-table`, `--list-formats-old`
+* Fix number of digits in `%(playlist_index)s`
+* Fix case sensitivity of format selector
+* Revert "[core] be able to hand over id and title using url_result"
+* Do not strip out whitespaces in `-o` and `-P`
+* Fix `preload_download_archive` writing verbose message to `stdout`
+* Move option warnings to `YoutubeDL`so that they obey `--no-warnings` and can output colors
+* Py2 compatibility for `FileNotFoundError`
+
+
+### 2021.04.22
+* **Improve output template:**
+ * Objects can be traversed like `%(field.key1.key2)s`
+ * An offset can be added to numeric fields as `%(field+N)s`
+ * Deprecates `--autonumber-start`
+* **Improve `--sub-langs`:**
+ * Treat `--sub-langs` entries as regex
+ * `all` can be used to refer to all the subtitles
+ * language codes can be prefixed with `-` to exclude it
+ * Deprecates `--all-subs`
+* Add option `--ignore-no-formats-error` to ignore the "no video format" and similar errors
+* Add option `--skip-playlist-after-errors` to skip the rest of a playlist after a given number of errors are encountered
+* Merge youtube-dl: Upto [commit/7e8b3f9](https://github.com/ytdl-org/youtube-dl/commit/7e8b3f9439ebefb3a3a4e5da9c0bd2b595976438)
+* [downloader] Fix bug in downloader selection
+* [BilibiliChannel] Fix pagination by [nao20010128nao](https://github.com/nao20010128nao) and [pukkandan](https://github.com/pukkandan)
+* [rai] Add support for http formats by [nixxo](https://github.com/nixxo)
+* [TubiTv] Add TubiTvShowIE by [Ashish0804](https://github.com/Ashish0804)
+* [twitcasting] Fix extractor
+* [viu:ott] Fix extractor and support series by [lkho](https://github.com/lkho) and [pukkandan](https://github.com/pukkandan)
+* [youtube:tab] Show unavailable videos in playlists by [coletdjnz](https://github.com/coletdjnz)
+* [youtube:tab] Reload with unavailable videos for all playlists
+* [youtube] Ignore invalid stretch ratio
+* [youtube] Improve channel syncid extraction to support ytcfg by [coletdjnz](https://github.com/coletdjnz)
+* [youtube] Standardize API calls for tabs, mixes and search by [coletdjnz](https://github.com/coletdjnz)
+* [youtube] Bugfix in `_extract_ytcfg`
+* [mildom:user:vod] Download only necessary amount of pages
+* [mildom] Remove proxy completely by [fstirlitz](https://github.com/fstirlitz)
+* [go] Fix `_VALID_URL`
+* [MetadataFromField] Improve regex and add tests
+* [Exec] Ensure backward compatibility when the command contains `%`
+* [extractor] Fix inconsistent use of `report_warning`
+* Ensure `mergeall` selects best format when multistreams are disabled
+* Improve the yt-dlp.sh script by [fstirlitz](https://github.com/fstirlitz)
+* [lazy_extractor] Do not load plugins
+* [ci] Disable fail-fast
+* [docs] Clarify which deprecated options still work
+* [docs] Fix typos
+
+
+### 2021.04.11
+* Add option `--convert-thumbnails` (only jpg currently supported)
+* Format selector `mergeall` to download and merge all formats
+* Pass any field to `--exec` using similar syntax to output template
+* Choose downloader for each protocol using `--downloader PROTO:NAME`
+ * Alias `--downloader` for `--external-downloader`
+ * Added `native` as an option for the downloader
+* Merge youtube-dl: Upto [commit/4fb25ff](https://github.com/ytdl-org/youtube-dl/commit/4fb25ff5a3be5206bb72e5c4046715b1529fb2c7) (except vimeo)
+* [DiscoveryPlusIndia] Add DiscoveryPlusIndiaShowIE by [Ashish0804](https://github.com/Ashish0804)
+* [NFHSNetwork] Add extractor by [llacb47](https://github.com/llacb47)
+* [nebula] Add extractor (watchnebula.com) by [hheimbuerger](https://github.com/hheimbuerger)
+* [nitter] Fix extraction of reply tweets and update instance list by [B0pol](https://github.com/B0pol)
+* [nitter] Fix thumbnails by [B0pol](https://github.com/B0pol)
+* [youtube] Fix thumbnail URL
+* [youtube] Parse API parameters from initial webpage by [coletdjnz](https://github.com/coletdjnz)
+* [youtube] Extract comments' approximate timestamp by [coletdjnz](https://github.com/coletdjnz)
+* [youtube] Fix alert extraction
+* [bilibili] Fix uploader
+* [utils] Add `datetime_from_str` and `datetime_add_months` by [coletdjnz](https://github.com/coletdjnz)
+* Run some `postprocessors` before actual download
+* Improve argument parsing for `-P`, `-o`, `-S`
+* Fix some `m3u8` not obeying `--allow-unplayable-formats`
+* Fix default of `dynamic_mpd`
+* Deprecate `--all-formats`, `--include-ads`, `--hls-prefer-native`, `--hls-prefer-ffmpeg`
+* [docs] Improvements
+
+### 2021.04.03
+* Merge youtube-dl: Upto [commit/654b4f4](https://github.com/ytdl-org/youtube-dl/commit/654b4f4ff2718f38b3182c1188c5d569c14cc70a)
+* Ability to set a specific field in the file's metadata using `--parse-metadata`
+* Ability to select n'th best format like `-f bv*.2`
+* [DiscoveryPlus] Add discoveryplus.in
+* [la7] Add podcasts and podcast playlists by [nixxo](https://github.com/nixxo)
+* [mildom] Update extractor with current proxy by [nao20010128nao](https://github.com/nao20010128nao)
+* [ard:mediathek] Fix video id extraction
+* [generic] Detect Invidious' link element
+* [youtube] Show premium state in `availability` by [coletdjnz](https://github.com/coletdjnz)
+* [viewsource] Add extractor to handle `view-source:`
+* [sponskrub] Run before embedding thumbnail
+* [docs] Improve `--parse-metadata` documentation
+
+
+### 2021.03.24.1
+* Revert [commit/8562218](https://github.com/ytdl-org/youtube-dl/commit/8562218350a79d4709da8593bb0c538aa0824acf)
+
+### 2021.03.24
+* Merge youtube-dl: Upto 2021.03.25 ([commit/8562218](https://github.com/ytdl-org/youtube-dl/commit/8562218350a79d4709da8593bb0c538aa0824acf))
+* Parse metadata from multiple fields using `--parse-metadata`
+* Ability to load playlist infojson using `--load-info-json`
+* Write current epoch to infojson when using `--no-clean-infojson`
+* [youtube_live_chat] fix bug when trying to set cookies
+* [niconico] Fix for when logged in by [CXwudi](https://github.com/CXwudi) and [xtkoba](https://github.com/xtkoba)
+* [linuxacadamy] Fix login
+
+
+### 2021.03.21
+* Merge youtube-dl: Upto [commit/7e79ba7](https://github.com/ytdl-org/youtube-dl/commit/7e79ba7dd6e6649dd2ce3a74004b2044f2182881)
+* Option `--no-clean-infojson` to keep private keys in the infojson
+* [aria2c] Support retry/abort unavailable fragments by [damianoamatruda](https://github.com/damianoamatruda)
+* [aria2c] Better default arguments
+* [movefiles] Fix bugs and make more robust
+* [formatSort] Fix `quality` being ignored
+* [splitchapters] Fix for older ffmpeg
+* [sponskrub] Pass proxy to sponskrub
+* Make sure `post_hook` gets the final filename
+* Recursively remove any private keys from infojson
+* Embed video URL metadata inside `mp4` by [damianoamatruda](https://github.com/damianoamatruda) and [pukkandan](https://github.com/pukkandan)
+* Merge `webm` formats into `mkv` if thumbnails are to be embedded by [damianoamatruda](https://github.com/damianoamatruda)
+* Use headers and cookies when downloading subtitles by [damianoamatruda](https://github.com/damianoamatruda)
+* Parse resolution in info dictionary by [damianoamatruda](https://github.com/damianoamatruda)
+* More consistent warning messages by [damianoamatruda](https://github.com/damianoamatruda) and [pukkandan](https://github.com/pukkandan)
+* [docs] Add deprecated options and aliases in readme
+* [docs] Fix some minor mistakes
+
+* [niconico] Partial fix adapted from [animelover1984/youtube-dl@b5eff52](https://github.com/animelover1984/youtube-dl/commit/b5eff52dd9ed5565672ea1694b38c9296db3fade) (login and smile formats still don't work)
+* [niconico] Add user extractor by [animelover1984](https://github.com/animelover1984)
+* [bilibili] Add anthology support by [animelover1984](https://github.com/animelover1984)
+* [amcnetworks] Fix extractor by [2ShedsJackson](https://github.com/2ShedsJackson)
+* [stitcher] Merge from youtube-dl by [nixxo](https://github.com/nixxo)
+* [rcs] Improved extraction by [nixxo](https://github.com/nixxo)
+* [linuxacadamy] Improve regex
+* [youtube] Show if video is `private`, `unlisted` etc in info (`availability`) by [coletdjnz](https://github.com/coletdjnz) and [pukkandan](https://github.com/pukkandan)
+* [youtube] bugfix for channel playlist extraction
+* [nbc] Improve metadata extraction by [2ShedsJackson](https://github.com/2ShedsJackson)
+
+
+### 2021.03.15
+* **Split video by chapters**: using option `--split-chapters`
+ * The output file of the split files can be set with `-o`/`-P` using the prefix `chapter:`
+ * Additional keys `section_title`, `section_number`, `section_start`, `section_end` are available in the output template
+* **Parallel fragment downloads** by [shirt](https://github.com/shirt-dev)
+ * Use option `--concurrent-fragments` (`-N`) to set the number of threads (default 1)
+* Merge youtube-dl: Upto [commit/3be0980](https://github.com/ytdl-org/youtube-dl/commit/3be098010f667b14075e3dfad1e74e5e2becc8ea)
+* [zee5] Add Show Extractor by [Ashish0804](https://github.com/Ashish0804) and [pukkandan](https://github.com/pukkandan)
+* [rai] fix drm check [nixxo](https://github.com/nixxo)
+* [wimtv] Add extractor by [nixxo](https://github.com/nixxo)
+* [mtv] Add mtv.it and extract series metadata by [nixxo](https://github.com/nixxo)
+* [pluto.tv] Add extractor by [kevinoconnor7](https://github.com/kevinoconnor7)
+* [youtube] Rewrite comment extraction by [coletdjnz](https://github.com/coletdjnz)
+* [embedthumbnail] Set mtime correctly
+* Refactor some postprocessor/downloader code by [pukkandan](https://github.com/pukkandan) and [shirt](https://github.com/shirt-dev)
+
+
+### 2021.03.07
+* [youtube] Fix history, mixes, community pages and trending by [pukkandan](https://github.com/pukkandan) and [coletdjnz](https://github.com/coletdjnz)
+* [youtube] Fix private feeds/playlists on multi-channel accounts by [coletdjnz](https://github.com/coletdjnz)
+* [youtube] Extract alerts from continuation by [coletdjnz](https://github.com/coletdjnz)
+* [cbs] Add support for ParamountPlus by [shirt](https://github.com/shirt-dev)
+* [mxplayer] Rewrite extractor with show support by [pukkandan](https://github.com/pukkandan) and [Ashish0804](https://github.com/Ashish0804)
+* [gedi] Improvements from youtube-dl by [nixxo](https://github.com/nixxo)
+* [vimeo] Fix videos with password by [teesid](https://github.com/teesid)
+* [lbry] Support `lbry://` url by [nixxo](https://github.com/nixxo)
+* [bilibili] Change `Accept` header by [pukkandan](https://github.com/pukkandan) and [animelover1984](https://github.com/animelover1984)
+* [trovo] Pass origin header
+* [rai] Check for DRM by [nixxo](https://github.com/nixxo)
+* [downloader] Fix bug for `ffmpeg`/`httpie`
+* [update] Fix updater removing the executable bit on some UNIX distros
+* [update] Fix current build hash for UNIX
+* [docs] Include wget/curl/aria2c install instructions for Unix by [Ashish0804](https://github.com/Ashish0804)
+* Fix some videos downloading with `m3u8` extension
+* Remove "fixup is ignored" warning when fixup wasn't passed by user
+
+
+### 2021.03.03.2
+* [build] Fix bug
+
+### 2021.03.03
+* [youtube] Use new browse API for continuation page extraction by [coletdjnz](https://github.com/coletdjnz) and [pukkandan](https://github.com/pukkandan)
+* Fix HLS playlist downloading by [shirt](https://github.com/shirt-dev)
+* Merge youtube-dl: Upto [2021.03.03](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.03.03)
+* [mtv] Fix extractor
+* [nick] Fix extractor by [DennyDai](https://github.com/DennyDai)
+* [mxplayer] Add new extractor by [codeasashu](https://github.com/codeasashu)
+* [youtube] Throw error when `--extractor-retries` are exhausted
+* Reduce default of `--extractor-retries` to 3
+* Fix packaging bugs by [hseg](https://github.com/hseg)
+
+
+### 2021.03.01
+* Allow specifying path in `--external-downloader`
+* Add option `--sleep-requests` to sleep b/w requests
+* Add option `--extractor-retries` to retry on known extractor errors
+* Extract comments only when needed
+* `--get-comments` doesn't imply `--write-info-json` if `-J`, `-j` or `--print-json` are used
+* Fix `get_executable_path` by [shirt](https://github.com/shirt-dev)
+* [youtube] Retry on more known errors than just HTTP-5xx
+* [youtube] Fix inconsistent `webpage_url`
+* [tennistv] Fix format sorting
+* [bilibiliaudio] Recognize the file as audio-only
+* [hrfensehen] Fix wrong import
+* [viki] Fix viki play pass authentication by [RobinD42](https://github.com/RobinD42)
+* [readthedocs] Improvements by [shirt](https://github.com/shirt-dev)
+* [hls] Fix bug with m3u8 format extraction
+* [hls] Enable `--hls-use-mpegts` by default when downloading live-streams
+* [embedthumbnail] Fix bug with deleting original thumbnail
+* [build] Fix completion paths, zsh pip completion install by [hseg](https://github.com/hseg)
+* [ci] Disable download tests unless specifically invoked
+* Cleanup some code and fix typos
+
+
+### 2021.02.24
+* Moved project to an organization [yt-dlp](https://github.com/yt-dlp)
+* **Completely changed project name to yt-dlp** by [Pccode66](https://github.com/Pccode66) and [pukkandan](https://github.com/pukkandan)
+ * Also, `youtube-dlc` config files are no longer loaded
+* Merge youtube-dl: Upto [commit/4460329](https://github.com/ytdl-org/youtube-dl/commit/44603290e5002153f3ebad6230cc73aef42cc2cd) (except tmz, gedi)
+* [Readthedocs](https://yt-dlp.readthedocs.io) support by [shirt](https://github.com/shirt-dev)
+* [youtube] Show if video was a live stream in info (`was_live`)
+* [Zee5] Add new extractor by [Ashish0804](https://github.com/Ashish0804) and [pukkandan](https://github.com/pukkandan)
+* [jwplatform] Add support for `hyland.com`
+* [tennistv] Fix extractor
+* [hls] Support media initialization by [shirt](https://github.com/shirt-dev)
+* [hls] Added options `--hls-split-discontinuity` to better support media discontinuity by [shirt](https://github.com/shirt-dev)
+* [ffmpeg] Allow passing custom arguments before -i using `--ppa "ffmpeg_i1:ARGS"` syntax
+* Fix `--windows-filenames` removing `/` from UNIX paths
+* [hls] Show warning if pycryptodome is not found
+* [docs] Improvements
+ * Fix documentation of `Extractor Options`
+ * Document `all` in format selection
+ * Document `playable_in_embed` in output templates
+
+
+### 2021.02.19
+* Merge youtube-dl: Upto [commit/cf2dbec](https://github.com/ytdl-org/youtube-dl/commit/cf2dbec6301177a1fddf72862de05fa912d9869d) (except kakao)
+* [viki] Fix extractor
+* [niconico] Extract `channel` and `channel_id` by [kurumigi](https://github.com/kurumigi)
+* [youtube] Multiple page support for hashtag URLs
+* [youtube] Add more invidious instances
+* [youtube] Fix comment extraction when comment text is empty
+* Option `--windows-filenames` to force use of windows compatible filenames
+* [ExtractAudio] Bugfix
+* Don't raise `parser.error` when exiting for update
+* [MoveFiles] Fix for when merger can't run
+* Changed `--trim-file-name` to `--trim-filenames` to be similar to related options
+* Format Sort improvements:
+ * Prefer `vp9.2` more than other `vp9` codecs
+ * Remove forced priority of `quality`
+ * Remove unnecessary `field_preference` and misuse of `preference` from extractors
+* Build improvements:
+ * Fix hash output by [shirt](https://github.com/shirt-dev)
+ * Lock Python package versions for x86 and use `wheels` by [shirt](https://github.com/shirt-dev)
+ * Exclude `vcruntime140.dll` from UPX by [jbruchon](https://github.com/jbruchon)
+ * Set version number based on UTC time, not local time
+ * Publish on PyPi only if token is set
+* [docs] Better document `--prefer-free-formats` and add `--no-prefer-free-format`
+
+
+### 2021.02.15
+* Merge youtube-dl: Upto [2021.02.10](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.02.10) (except archive.org)
+* [niconico] Improved extraction and support encrypted/SMILE movies by [kurumigi](https://github.com/kurumigi), [tsukumijima](https://github.com/tsukumijima), [bbepis](https://github.com/bbepis), [pukkandan](https://github.com/pukkandan)
+* Fix HLS AES-128 with multiple keys in external downloaders by [shirt](https://github.com/shirt-dev)
+* [youtube_live_chat] Fix by using POST API by [siikamiika](https://github.com/siikamiika)
+* [rumble] Add support for video page
+* Option `--allow-unplayable-formats` to allow downloading unplayable video formats
+* [ExtractAudio] Don't re-encode when file is already in a common audio format
+* [youtube] Fix search continuations
+* [youtube] Fix for new accounts
+* Improve build/updater: by [pukkandan](https://github.com/pukkandan) and [shirt](https://github.com/shirt-dev)
+ * Fix SHA256 calculation in build and implement hash checking for updater
+ * Exit immediately in windows once the update process starts
+ * Fix updater for `x86.exe`
+ * Updater looks for both `yt-dlp` and `youtube-dlc` in releases for future-proofing
+ * Change optional dependency to `pycryptodome`
+* Fix issue with unicode filenames in aria2c by [shirt](https://github.com/shirt-dev)
+* Fix `allow_playlist_files` not being correctly passed through
+* Fix for empty HTTP head requests by [shirt](https://github.com/shirt-dev)
+* Fix `get_executable_path` in UNIX
+* [sponskrub] Print ffmpeg output and errors to terminal
+* `__real_download` should be false when ffmpeg unavailable and no download
+* Show `exe`/`zip`/`source` and 32/64bit in verbose message
+
+
+### 2021.02.09
+* **aria2c support for DASH/HLS**: by [shirt](https://github.com/shirt-dev)
+* **Implement Updater** (`-U`) by [shirt](https://github.com/shirt-dev)
+* [youtube] Fix comment extraction
+* [youtube_live_chat] Improve extraction
+* [youtube] Fix for channel URLs sometimes not downloading all pages
+* [aria2c] Changed default arguments to `--console-log-level=warn --summary-interval=0 --file-allocation=none -x16 -j16 -s16`
+* Add fallback for thumbnails
+* [embedthumbnail] Keep original thumbnail after conversion if write_thumbnail given
+* [embedsubtitle] Keep original subtitle after conversion if write_subtitles given
+* [pyinst.py] Move back to root dir
+* [youtube] Simplified renderer parsing and bugfixes
+* [movefiles] Fix compatibility with python2
+* [remuxvideo] Fix validation of conditional remux
+* [sponskrub] Don't raise error when the video does not exist
+* [docs] Crypto is an optional dependency
+
+
+### 2021.02.04
+* Merge youtube-dl: Upto [2021.02.04.1](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.02.04.1)
+* **Date/time formatting in output template:**
+ * You can use [`strftime`](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) to format date/time fields. Example: `%(upload_date>%Y-%m-%d)s`
+* **Multiple output templates:**
+ * Separate output templates can be given for the different metadata files by using `-o TYPE:TEMPLATE`
+ * The allowed types are: `subtitle|thumbnail|description|annotation|infojson|pl_description|pl_infojson`
+* [youtube] More metadata extraction for channel/playlist URLs (channel, uploader, thumbnail, tags)
+* New option `--no-write-playlist-metafiles` to prevent writing playlist metadata files
+* [audius] Fix extractor
+* [youtube_live_chat] Fix `parse_yt_initial_data` and add `fragment_retries`
+* [postprocessor] Raise errors correctly
+* [metadatafromtitle] Fix bug when extracting data from numeric fields
+* Fix issue with overwriting files
+* Fix "Default format spec" appearing in quiet mode
+* [FormatSort] Allow user to prefer av01 over vp9 (The default is still vp9)
+* [FormatSort] fix bug where `quality` had more priority than `hasvid`
+* [pyinst] Automatically detect Python architecture and working directory
+* Strip out internal fields such as `_filename` from infojson
+
+
+### 2021.01.29
+* **Features from [animelover1984/youtube-dl](https://github.com/animelover1984/youtube-dl)**: by [animelover1984](https://github.com/animelover1984) and [bbepis](https://github.com/bbepis)
+ * Add `--get-comments`
+ * [youtube] Extract comments
+ * [billibilli] Added BiliBiliSearchIE, BilibiliChannelIE
+ * [billibilli] Extract comments
+ * [billibilli] Better video extraction
+ * Write playlist data to infojson
+ * [FFmpegMetadata] Embed infojson inside the video
+ * [EmbedThumbnail] Try embedding in mp4 using ffprobe and `-disposition`
+ * [EmbedThumbnail] Treat mka like mkv and mov like mp4
+ * [EmbedThumbnail] Embed in ogg/opus
+ * [VideoRemuxer] Conditionally remux video
+ * [VideoRemuxer] Add `-movflags +faststart` when remuxing to mp4
+ * [ffmpeg] Print entire stderr in verbose when there is error
+ * [EmbedSubtitle] Warn when embedding ass in mp4
+ * [anvato] Use NFLTokenGenerator if possible
+* **Parse additional metadata**: New option `--parse-metadata` to extract additional metadata from existing fields
+ * The extracted fields can be used in `--output`
+ * Deprecated `--metadata-from-title`
+* [Audius] Add extractor
+* [youtube] Extract playlist description and write it to `.description` file
+* Detect existing files even when using `recode`/`remux` (`extract-audio` is partially fixed)
+* Fix wrong user config from v2021.01.24
+* [youtube] Report error message from youtube as error instead of warning
+* [FormatSort] Fix some fields not sorting from v2021.01.24
+* [postprocessor] Deprecate `avconv`/`avprobe`. All current functionality is left untouched. But don't expect any new features to work with avconv
+* [postprocessor] fix `write_debug` to not throw error when there is no `_downloader`
+* [movefiles] Don't give "cant find" warning when move is unnecessary
+* Refactor `update-version`, `pyinst.py` and related files
+* [ffmpeg] Document more formats that are supported for remux/recode
+
+
+### 2021.01.24
+* Merge youtube-dl: Upto [2021.01.24](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.01.16)
+* Plugin support ([documentation](https://github.com/yt-dlp/yt-dlp#plugins))
+* **Multiple paths**: New option `-P`/`--paths` to give different paths for different types of files
+ * The syntax is `-P "type:path" -P "type:path"`
+ * Valid types are: home, temp, description, annotation, subtitle, infojson, thumbnail
+ * Additionally, configuration file is taken from home directory or current directory
+* Allow passing different arguments to different external downloaders
+* [mildom] Add extractor by [nao20010128nao](https://github.com/nao20010128nao)
+* Warn when using old style `--external-downloader-args` and `--post-processor-args`
+* Fix `--no-overwrite` when using `--write-link`
+* [sponskrub] Output `unrecognized argument` error message correctly
+* [cbs] Make failure to extract title non-fatal
+* Fix typecasting when pre-checking archive
+* Fix issue with setting title on UNIX
+* Deprecate redundant aliases in `formatSort`. The aliases remain functional for backward compatibility, but will be left undocumented
+* [tests] Fix test_post_hooks
+* [tests] Split core and download tests
+
+
+### 2021.01.20
+* [TrovoLive] Add extractor (only VODs)
+* [pokemon] Add `/#/player` URLs
+* Improved parsing of multiple postprocessor-args, add `--ppa` as alias
+* [EmbedThumbnail] Simplify embedding in mkv
+* [sponskrub] Encode filenames correctly, better debug output and error message
+* [readme] Cleanup options
+
+
+### 2021.01.16
+* Merge youtube-dl: Upto [2021.01.16](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.01.16)
+* **Configuration files:**
+ * Portable configuration file: `./yt-dlp.conf`
+ * Allow the configuration files to be named `yt-dlp` instead of `youtube-dlc`. See [this](https://github.com/yt-dlp/yt-dlp#configuration) for details
+* Add PyPI release
+
+
+### 2021.01.14
+* Added option `--break-on-reject`
+* [roosterteeth.com] Fix for bonus episodes by [Zocker1999NET](https://github.com/Zocker1999NET)
+* [tiktok] Fix for when share_info is empty
+* [EmbedThumbnail] Fix bug due to incorrect function name
+* [docs] Changed sponskrub links to point to [yt-dlp/SponSkrub](https://github.com/yt-dlp/SponSkrub) since I am now providing both linux and windows releases
+* [docs] Change all links to correctly point to new fork URL
+* [docs] Fixes typos
+
+
+### 2021.01.12
+* [roosterteeth.com] Add subtitle support by [samiksome](https://github.com/samiksome)
+* Added `--force-overwrites`, `--no-force-overwrites` by [alxnull](https://github.com/alxnull)
+* Changed fork name to `yt-dlp`
+* Fix typos by [FelixFrog](https://github.com/FelixFrog)
+* [ci] Option to skip
+* [changelog] Added unreleased changes in blackjack4494/yt-dlc
+
+
+### 2021.01.10
+* [archive.org] Fix extractor and add support for audio and playlists by [wporr](https://github.com/wporr)
+* [Animelab] Added by [mariuszskon](https://github.com/mariuszskon)
+* [youtube:search] Fix view_count by [ohnonot](https://github.com/ohnonot)
+* [youtube] Show if video is embeddable in info (`playable_in_embed`)
+* Update version badge automatically in README
+* Enable `test_youtube_search_matching`
+* Create `to_screen` and similar functions in postprocessor/common
+
+
+### 2021.01.09
+* [youtube] Fix bug in automatic caption extraction
+* Add `post_hooks` to YoutubeDL by [alexmerkel](https://github.com/alexmerkel)
+* Batch file enumeration improvements by [glenn-slayden](https://github.com/glenn-slayden)
+* Stop immediately when reaching `--max-downloads` by [glenn-slayden](https://github.com/glenn-slayden)
+* Fix incorrect ANSI sequence for restoring console-window title by [glenn-slayden](https://github.com/glenn-slayden)
+* Kill child processes when yt-dlc is killed by [Unrud](https://github.com/Unrud)
+
+
+### 2021.01.08
+* Merge youtube-dl: Upto [2021.01.08](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.01.08) except stitcher ([1](https://github.com/ytdl-org/youtube-dl/commit/bb38a1215718cdf36d73ff0a7830a64cd9fa37cc), [2](https://github.com/ytdl-org/youtube-dl/commit/a563c97c5cddf55f8989ed7ea8314ef78e30107f))
+* Moved changelog to separate file
+
+
+### 2021.01.07-1
+* [Akamai] fix by [nixxo](https://github.com/nixxo)
+* [Tiktok] merge youtube-dl tiktok extractor by [GreyAlien502](https://github.com/GreyAlien502)
+* [vlive] add support for playlists by [kyuyeunk](https://github.com/kyuyeunk)
+* [youtube_live_chat] make sure playerOffsetMs is positive by [siikamiika](https://github.com/siikamiika)
+* Ignore extra data streams in ffmpeg by [jbruchon](https://github.com/jbruchon)
+* Allow passing different arguments to different postprocessors using `--postprocessor-args`
+* Deprecated `--sponskrub-args`. The same can now be done using `--postprocessor-args "sponskrub:<args>"`
+* [CI] Split tests into core-test and full-test
+
+
+### 2021.01.07
+* Removed priority of `av01` codec in `-S` since most devices don't support it yet
+* Added `duration_string` to be used in `--output`
+* Created First Release
+
+
+### 2021.01.05-1
+* **Changed defaults:**
+ * Enabled `--ignore`
+ * Disabled `--video-multistreams` and `--audio-multistreams`
+ * Changed default format selection to `bv*+ba/b` when `--audio-multistreams` is disabled
+ * Changed default format sort order to `res,fps,codec,size,br,asr,proto,ext,has_audio,source,format_id`
+ * Changed `webm` to be more preferable than `flv` in format sorting
+ * Changed default output template to `%(title)s [%(id)s].%(ext)s`
+ * Enabled `--list-formats-as-table`
+
+
+### 2021.01.05
+* **Format Sort:** Added `--format-sort` (`-S`), `--format-sort-force` (`--S-force`) - See [Sorting Formats](README.md#sorting-formats) for details
+* **Format Selection:** See [Format Selection](README.md#format-selection) for details
+ * New format selectors: `best*`, `worst*`, `bestvideo*`, `bestaudio*`, `worstvideo*`, `worstaudio*`
+ * Changed video format sorting to show video only files and video+audio files together
+ * Added `--video-multistreams`, `--no-video-multistreams`, `--audio-multistreams`, `--no-audio-multistreams`
+ * Added `b`,`w`,`v`,`a` as alias for `best`, `worst`, `video` and `audio` respectively
+* Shortcut Options: Added `--write-link`, `--write-url-link`, `--write-webloc-link`, `--write-desktop-link` by [h-h-h-h](https://github.com/h-h-h-h) - See [Internet Shortcut Options](README.md#internet-shortcut-options) for details
+* **Sponskrub integration:** Added `--sponskrub`, `--sponskrub-cut`, `--sponskrub-force`, `--sponskrub-location`, `--sponskrub-args` - See [SponSkrub Options](README.md#sponskrub-sponsorblock-options) for details
+* Added `--force-download-archive` (`--force-write-archive`) by [h-h-h-h](https://github.com/h-h-h-h)
+* Added `--list-formats-as-table`, `--list-formats-old`
+* **Negative Options:** Makes it possible to negate most boolean options by adding a `no-` to the switch. Usefull when you want to reverse an option that is defined in a config file
+ * Added `--no-ignore-dynamic-mpd`, `--no-allow-dynamic-mpd`, `--allow-dynamic-mpd`, `--youtube-include-hls-manifest`, `--no-youtube-include-hls-manifest`, `--no-youtube-skip-hls-manifest`, `--no-download`, `--no-download-archive`, `--resize-buffer`, `--part`, `--mtime`, `--no-keep-fragments`, `--no-cookies`, `--no-write-annotations`, `--no-write-info-json`, `--no-write-description`, `--no-write-thumbnail`, `--youtube-include-dash-manifest`, `--post-overwrites`, `--no-keep-video`, `--no-embed-subs`, `--no-embed-thumbnail`, `--no-add-metadata`, `--no-include-ads`, `--no-write-sub`, `--no-write-auto-sub`, `--no-playlist-reverse`, `--no-restrict-filenames`, `--youtube-include-dash-manifest`, `--no-format-sort-force`, `--flat-videos`, `--no-list-formats-as-table`, `--no-sponskrub`, `--no-sponskrub-cut`, `--no-sponskrub-force`
+ * Renamed: `--write-subs`, `--no-write-subs`, `--no-write-auto-subs`, `--write-auto-subs`. Note that these can still be used without the ending "s"
+* Relaxed validation for format filters so that any arbitrary field can be used
+* Fix for embedding thumbnail in mp3 by [pauldubois98](https://github.com/pauldubois98) ([ytdl-org/youtube-dl#21569](https://github.com/ytdl-org/youtube-dl/pull/21569))
+* Make Twitch Video ID output from Playlist and VOD extractor same. This is only a temporary fix
+* Merge youtube-dl: Upto [2021.01.03](https://github.com/ytdl-org/youtube-dl/commit/8e953dcbb10a1a42f4e12e4e132657cb0100a1f8) - See [blackjack4494/yt-dlc#280](https://github.com/blackjack4494/yt-dlc/pull/280) for details
+ * Extractors [tiktok](https://github.com/ytdl-org/youtube-dl/commit/fb626c05867deab04425bad0c0b16b55473841a2) and [hotstar](https://github.com/ytdl-org/youtube-dl/commit/bb38a1215718cdf36d73ff0a7830a64cd9fa37cc) have not been merged
+* Cleaned up the fork for public use
+
+
+**Note**: All uncredited changes above this point are authored by [pukkandan](https://github.com/pukkandan)
+
+### Unreleased changes in [blackjack4494/yt-dlc](https://github.com/blackjack4494/yt-dlc)
+* Updated to youtube-dl release 2020.11.26 by [pukkandan](https://github.com/pukkandan)
+* Youtube improvements by [pukkandan](https://github.com/pukkandan)
+ * Implemented all Youtube Feeds (ytfav, ytwatchlater, ytsubs, ythistory, ytrec) and SearchURL
+ * Fix some improper Youtube URLs
+ * Redirect channel home to /video
+ * Print youtube's warning message
+ * Handle Multiple pages for feeds better
+* [youtube] Fix ytsearch not returning results sometimes due to promoted content by [coletdjnz](https://github.com/coletdjnz)
+* [youtube] Temporary fix for automatic captions - disable json3 by [blackjack4494](https://github.com/blackjack4494)
+* Add --break-on-existing by [gergesh](https://github.com/gergesh)
+* Pre-check video IDs in the archive before downloading by [pukkandan](https://github.com/pukkandan)
+* [bitwave.tv] New extractor by [lorpus](https://github.com/lorpus)
+* [Gedi] Add extractor by [nixxo](https://github.com/nixxo)
+* [Rcs] Add new extractor by [nixxo](https://github.com/nixxo)
+* [skyit] New skyitalia extractor by [nixxo](https://github.com/nixxo)
+* [france.tv] Fix thumbnail URL by [renalid](https://github.com/renalid)
+* [ina] support mobile links by [B0pol](https://github.com/B0pol)
+* [instagram] Fix thumbnail extractor by [nao20010128nao](https://github.com/nao20010128nao)
+* [SouthparkDe] Support for English URLs by [xypwn](https://github.com/xypwn)
+* [spreaker] fix SpreakerShowIE test URL by [pukkandan](https://github.com/pukkandan)
+* [Vlive] Fix playlist handling when downloading a channel by [kyuyeunk](https://github.com/kyuyeunk)
+* [tmz] Fix extractor by [diegorodriguezv](https://github.com/diegorodriguezv)
+* [ITV] BTCC URL update by [WolfganP](https://github.com/WolfganP)
+* [generic] Detect embedded bitchute videos by [pukkandan](https://github.com/pukkandan)
+* [generic] Extract embedded youtube and twitter videos by [diegorodriguezv](https://github.com/diegorodriguezv)
+* [ffmpeg] Ensure all streams are copied by [pukkandan](https://github.com/pukkandan)
+* [embedthumbnail] Fix for os.rename error by [pukkandan](https://github.com/pukkandan)
+* make_win.bat: don't use UPX to pack vcruntime140.dll by [jbruchon](https://github.com/jbruchon)
+
+
+### Changelog of [blackjack4494/yt-dlc](https://github.com/blackjack4494/yt-dlc) till release 2020.11.11-3
+
+**Note**: This was constructed from the merge commit messages and may not be entirely accurate
+
+* [bandcamp] fix failing test. remove subclass hack by [insaneracist](https://github.com/insaneracist)
+* [bandcamp] restore album downloads by [insaneracist](https://github.com/insaneracist)
+* [francetv] fix extractor by [Surkal](https://github.com/Surkal)
+* [gdcvault] fix extractor by [blackjack4494](https://github.com/blackjack4494)
+* [hotstar] Move to API v1 by [theincognito-inc](https://github.com/theincognito-inc)
+* [hrfernsehen] add extractor by [blocktrron](https://github.com/blocktrron)
+* [kakao] new apis by [blackjack4494](https://github.com/blackjack4494)
+* [la7] fix missing protocol by [nixxo](https://github.com/nixxo)
+* [mailru] removed escaped braces, use urljoin, added tests by [nixxo](https://github.com/nixxo)
+* [MTV/Nick] universal mgid extractor + fix nick.de feed by [blackjack4494](https://github.com/blackjack4494)
+* [mtv] Fix a missing match_id by [nixxo](https://github.com/nixxo)
+* [Mtv] updated extractor logic & more by [blackjack4494](https://github.com/blackjack4494)
+* [ndr] support Daserste ndr by [blackjack4494](https://github.com/blackjack4494)
+* [Netzkino] Only use video id to find metadata by [TobiX](https://github.com/TobiX)
+* [newgrounds] fix: video download by [insaneracist](https://github.com/insaneracist)
+* [nitter] Add new extractor by [B0pol](https://github.com/B0pol)
+* [soundcloud] Resolve audio/x-wav by [tfvlrue](https://github.com/tfvlrue)
+* [soundcloud] sets pattern and tests by [blackjack4494](https://github.com/blackjack4494)
+* [SouthparkDE/MTV] another mgid extraction (mtv_base) feed url updated by [blackjack4494](https://github.com/blackjack4494)
+* [StoryFire] Add new extractor by [sgstair](https://github.com/sgstair)
+* [twitch] by [geauxlo](https://github.com/geauxlo)
+* [videa] Adapt to updates by [adrianheine](https://github.com/adrianheine)
+* [Viki] subtitles, formats by [blackjack4494](https://github.com/blackjack4494)
+* [vlive] fix extractor for revamped website by [exwm](https://github.com/exwm)
+* [xtube] fix extractor by [insaneracist](https://github.com/insaneracist)
+* [youtube] Convert subs when download is skipped by [blackjack4494](https://github.com/blackjack4494)
+* [youtube] Fix age gate detection by [random-nick](https://github.com/random-nick)
+* [youtube] fix yt-only playback when age restricted/gated - requires cookies by [blackjack4494](https://github.com/blackjack4494)
+* [youtube] fix: extract artist metadata from ytInitialData by [insaneracist](https://github.com/insaneracist)
+* [youtube] fix: extract mix playlist ids from ytInitialData by [insaneracist](https://github.com/insaneracist)
+* [youtube] fix: mix playlist title by [insaneracist](https://github.com/insaneracist)
+* [youtube] fix: Youtube Music playlists by [insaneracist](https://github.com/insaneracist)
+* [Youtube] Fixed problem with new youtube player by [peet1993](https://github.com/peet1993)
+* [zoom] Fix url parsing for url's containing /share/ and dots by [Romern](https://github.com/Romern)
+* [zoom] new extractor by [insaneracist](https://github.com/insaneracist)
+* abc by [adrianheine](https://github.com/adrianheine)
+* Added Comcast_SSO fix by [merval](https://github.com/merval)
+* Added DRM logic to brightcove by [merval](https://github.com/merval)
+* Added regex for ABC.com site. by [kucksdorfs](https://github.com/kucksdorfs)
+* alura by [hugohaa](https://github.com/hugohaa)
+* Arbitrary merges by [fstirlitz](https://github.com/fstirlitz)
+* ard.py_add_playlist_support by [martin54](https://github.com/martin54)
+* Bugfix/youtube/chapters fix extractor by [gschizas](https://github.com/gschizas)
+* bugfix_youtube_like_extraction by [RedpointsBots](https://github.com/RedpointsBots)
+* Create build workflow by [blackjack4494](https://github.com/blackjack4494)
+* deezer by [LucBerge](https://github.com/LucBerge)
+* Detect embedded bitchute videos by [pukkandan](https://github.com/pukkandan)
+* Don't install tests by [l29ah](https://github.com/l29ah)
+* Don't try to embed/convert json subtitles generated by [youtube](https://github.com/youtube) livechat by [pukkandan](https://github.com/pukkandan)
+* Doodstream by [sxvghd](https://github.com/sxvghd)
+* duboku by [lkho](https://github.com/lkho)
+* elonet by [tpikonen](https://github.com/tpikonen)
+* ext/remuxe-video by [Zocker1999NET](https://github.com/Zocker1999NET)
+* fall-back to the old way to fetch subtitles, if needed by [RobinD42](https://github.com/RobinD42)
+* feature_subscriber_count by [RedpointsBots](https://github.com/RedpointsBots)
+* Fix external downloader when there is no http_header by [pukkandan](https://github.com/pukkandan)
+* Fix issue triggered by [tubeup](https://github.com/tubeup) by [nsapa](https://github.com/nsapa)
+* Fix YoutubePlaylistsIE by [ZenulAbidin](https://github.com/ZenulAbidin)
+* fix-mitele' by [DjMoren](https://github.com/DjMoren)
+* fix/google-drive-cookie-issue by [legraphista](https://github.com/legraphista)
+* fix_tiktok by [mervel-mervel](https://github.com/mervel-mervel)
+* Fixed problem with JS player URL by [peet1993](https://github.com/peet1993)
+* fixYTSearch by [xarantolus](https://github.com/xarantolus)
+* FliegendeWurst-3sat-zdf-merger-bugfix-feature
+* gilou-bandcamp_update
+* implement ThisVid extractor by [rigstot](https://github.com/rigstot)
+* JensTimmerman-patch-1 by [JensTimmerman](https://github.com/JensTimmerman)
+* Keep download archive in memory for better performance by [jbruchon](https://github.com/jbruchon)
+* la7-fix by [iamleot](https://github.com/iamleot)
+* magenta by [adrianheine](https://github.com/adrianheine)
+* Merge 26564 from [adrianheine](https://github.com/adrianheine)
+* Merge code from [ddland](https://github.com/ddland)
+* Merge code from [nixxo](https://github.com/nixxo)
+* Merge code from [ssaqua](https://github.com/ssaqua)
+* Merge code from [zubearc](https://github.com/zubearc)
+* mkvthumbnail by [MrDoritos](https://github.com/MrDoritos)
+* myvideo_ge by [fonkap](https://github.com/fonkap)
+* naver by [SeonjaeHyeon](https://github.com/SeonjaeHyeon)
+* ondemandkorea by [julien-hadleyjack](https://github.com/julien-hadleyjack)
+* rai-update by [iamleot](https://github.com/iamleot)
+* RFC: youtube: Polymer UI and JSON endpoints for playlists by [wlritchi](https://github.com/wlritchi)
+* rutv by [adrianheine](https://github.com/adrianheine)
+* Sc extractor web auth by [blackjack4494](https://github.com/blackjack4494)
+* Switch from binary search tree to Python sets by [jbruchon](https://github.com/jbruchon)
+* tiktok by [skyme5](https://github.com/skyme5)
+* tvnow by [TinyToweringTree](https://github.com/TinyToweringTree)
+* twitch-fix by [lel-amri](https://github.com/lel-amri)
+* Twitter shortener by [blackjack4494](https://github.com/blackjack4494)
+* Update README.md by [JensTimmerman](https://github.com/JensTimmerman)
+* Update to reflect website changes. by [amigatomte](https://github.com/amigatomte)
+* use webarchive to fix a dead link in README by [B0pol](https://github.com/B0pol)
+* Viki the second by [blackjack4494](https://github.com/blackjack4494)
+* wdr-subtitles by [mrtnmtth](https://github.com/mrtnmtth)
+* Webpfix by [alexmerkel](https://github.com/alexmerkel)
+* Youtube live chat by [siikamiika](https://github.com/siikamiika)
diff --git a/Collaborators.md b/Collaborators.md
new file mode 100644
index 0000000..894a853
--- /dev/null
+++ b/Collaborators.md
@@ -0,0 +1,63 @@
+# Collaborators
+
+This is a list of the collaborators of the project and their major contributions. See the [Changelog](Changelog.md) for more details.
+
+You can also find lists of all [contributors of yt-dlp](CONTRIBUTORS) and [authors of youtube-dl](https://github.com/ytdl-org/youtube-dl/blob/master/AUTHORS)
+
+
+## [pukkandan](https://github.com/pukkandan)
+
+[![ko-fi](https://img.shields.io/badge/_-Ko--fi-red.svg?logo=kofi&labelColor=555555&style=for-the-badge)](https://ko-fi.com/pukkandan)
+[![gh-sponsor](https://img.shields.io/badge/_-Github-white.svg?logo=github&labelColor=555555&style=for-the-badge)](https://github.com/sponsors/pukkandan)
+
+* Owner of the fork
+
+
+
+## [shirt](https://github.com/shirt-dev)
+
+[![ko-fi](https://img.shields.io/badge/_-Ko--fi-red.svg?logo=kofi&labelColor=555555&style=for-the-badge)](https://ko-fi.com/shirt)
+
+* Multithreading (`-N`) and aria2c support for fragment downloads
+* Support for media initialization and discontinuity in HLS
+* The self-updater (`-U`)
+
+
+
+## [coletdjnz](https://github.com/coletdjnz)
+
+[![gh-sponsor](https://img.shields.io/badge/_-Github-white.svg?logo=github&labelColor=555555&style=for-the-badge)](https://github.com/sponsors/coletdjnz)
+
+* Improved plugin architecture
+* Rewrote the networking infrastructure, implemented support for `requests`
+* YouTube improvements including: age-gate bypass, private playlists, multiple-clients (to avoid throttling) and a lot of under-the-hood improvements
+* Added support for new websites YoutubeWebArchive, MainStreaming, PRX, nzherald, Mediaklikk, StarTV etc
+* Improved/fixed support for Patreon, panopto, gfycat, itv, pbs, SouthParkDE etc
+
+
+
+## [Ashish0804](https://github.com/Ashish0804) <sub><sup>[Inactive]</sup></sub>
+
+[![ko-fi](https://img.shields.io/badge/_-Ko--fi-red.svg?logo=kofi&labelColor=555555&style=for-the-badge)](https://ko-fi.com/ashish0804)
+
+* Added support for new websites BiliIntl, DiscoveryPlusIndia, OlympicsReplay, PlanetMarathi, ShemarooMe, Utreon, Zee5 etc
+* Added playlist/series downloads for Hotstar, ParamountPlus, Rumble, SonyLIV, Trovo, TubiTv, Voot etc
+* Improved/fixed support for HiDive, HotStar, Hungama, LBRY, LinkedInLearning, Mxplayer, SonyLiv, TV2, Vimeo, VLive etc
+
+
+## [bashonly](https://github.com/bashonly)
+
+* `--update-to`, self-updater rewrite, automated/nightly/master releases
+* `--cookies-from-browser` support for Firefox containers, external downloader cookie handling overhaul
+* Added support for new websites like Dacast, Kick, NBCStations, Triller, VideoKen, Weverse, WrestleUniverse etc
+* Improved/fixed support for Anvato, Brightcove, Reddit, SlidesLive, TikTok, Twitter, Vimeo etc
+
+
+## [Grub4K](https://github.com/Grub4K)
+
+[![gh-sponsor](https://img.shields.io/badge/_-Github-white.svg?logo=github&labelColor=555555&style=for-the-badge)](https://github.com/sponsors/Grub4K) [![ko-fi](https://img.shields.io/badge/_-Ko--fi-red.svg?logo=kofi&labelColor=555555&style=for-the-badge)](https://ko-fi.com/Grub4K)
+
+* `--update-to`, self-updater rewrite, automated/nightly/master releases
+* Reworked internals like `traverse_obj`, various core refactors and bugs fixes
+* Implemented proper progress reporting for parallel downloads
+* Improved/fixed/added Bundestag, crunchyroll, pr0gramm, Twitter, WrestleUniverse etc
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..68a49da
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,24 @@
+This is free and unencumbered software released into the public domain.
+
+Anyone is free to copy, modify, publish, use, compile, sell, or
+distribute this software, either in source code form or as a compiled
+binary, for any purpose, commercial or non-commercial, and by any
+means.
+
+In jurisdictions that recognize copyright laws, the author or authors
+of this software dedicate any and all copyright interest in the
+software to the public domain. We make this dedication for the benefit
+of the public at large and to the detriment of our heirs and
+successors. We intend this dedication to be an overt act of
+relinquishment in perpetuity of all present and future rights to this
+software under copyright law.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+
+For more information, please refer to <http://unlicense.org/>
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..9344003
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,160 @@
+all: lazy-extractors yt-dlp doc pypi-files
+clean: clean-test clean-dist
+clean-all: clean clean-cache
+completions: completion-bash completion-fish completion-zsh
+doc: README.md CONTRIBUTING.md issuetemplates supportedsites
+ot: offlinetest
+tar: yt-dlp.tar.gz
+
+# Keep this list in sync with pyproject.toml includes/artifacts
+# intended use: when building a source distribution,
+# make pypi-files && python3 -m build -sn .
+pypi-files: AUTHORS Changelog.md LICENSE README.md README.txt supportedsites \
+ completions yt-dlp.1 pyproject.toml setup.cfg devscripts/* test/*
+
+.PHONY: all clean install test tar pypi-files completions ot offlinetest codetest supportedsites
+
+clean-test:
+ rm -rf test/testdata/sigs/player-*.js tmp/ *.annotations.xml *.aria2 *.description *.dump *.frag \
+ *.frag.aria2 *.frag.urls *.info.json *.live_chat.json *.meta *.part* *.tmp *.temp *.unknown_video *.ytdl \
+ *.3gp *.ape *.ass *.avi *.desktop *.f4v *.flac *.flv *.gif *.jpeg *.jpg *.lrc *.m4a *.m4v *.mhtml *.mkv *.mov *.mp3 *.mp4 \
+ *.mpg *.mpga *.oga *.ogg *.opus *.png *.sbv *.srt *.ssa *.swf *.swp *.tt *.ttml *.url *.vtt *.wav *.webloc *.webm *.webp
+clean-dist:
+ rm -rf yt-dlp.1.temp.md yt-dlp.1 README.txt MANIFEST build/ dist/ .coverage cover/ yt-dlp.tar.gz completions/ \
+ yt_dlp/extractor/lazy_extractors.py *.spec CONTRIBUTING.md.tmp yt-dlp yt-dlp.exe yt_dlp.egg-info/ AUTHORS
+clean-cache:
+ find . \( \
+ -type d -name .pytest_cache -o -type d -name __pycache__ -o -name "*.pyc" -o -name "*.class" \
+ \) -prune -exec rm -rf {} \;
+
+completion-bash: completions/bash/yt-dlp
+completion-fish: completions/fish/yt-dlp.fish
+completion-zsh: completions/zsh/_yt-dlp
+lazy-extractors: yt_dlp/extractor/lazy_extractors.py
+
+PREFIX ?= /usr/local
+BINDIR ?= $(PREFIX)/bin
+MANDIR ?= $(PREFIX)/man
+SHAREDIR ?= $(PREFIX)/share
+PYTHON ?= /usr/bin/env python3
+GNUTAR ?= tar
+
+# set markdown input format to "markdown-smart" for pandoc version 2+ and to "markdown" for pandoc prior to version 2
+PANDOC_VERSION_CMD = pandoc -v 2>/dev/null | head -n1 | cut -d' ' -f2 | head -c1
+PANDOC_VERSION != $(PANDOC_VERSION_CMD)
+PANDOC_VERSION ?= $(shell $(PANDOC_VERSION_CMD))
+MARKDOWN_CMD = if [ "$(PANDOC_VERSION)" = "1" -o "$(PANDOC_VERSION)" = "0" ]; then echo markdown; else echo markdown-smart; fi
+MARKDOWN != $(MARKDOWN_CMD)
+MARKDOWN ?= $(shell $(MARKDOWN_CMD))
+
+install: lazy-extractors yt-dlp yt-dlp.1 completions
+ mkdir -p $(DESTDIR)$(BINDIR)
+ install -m755 yt-dlp $(DESTDIR)$(BINDIR)/yt-dlp
+ mkdir -p $(DESTDIR)$(MANDIR)/man1
+ install -m644 yt-dlp.1 $(DESTDIR)$(MANDIR)/man1/yt-dlp.1
+ mkdir -p $(DESTDIR)$(SHAREDIR)/bash-completion/completions
+ install -m644 completions/bash/yt-dlp $(DESTDIR)$(SHAREDIR)/bash-completion/completions/yt-dlp
+ mkdir -p $(DESTDIR)$(SHAREDIR)/zsh/site-functions
+ install -m644 completions/zsh/_yt-dlp $(DESTDIR)$(SHAREDIR)/zsh/site-functions/_yt-dlp
+ mkdir -p $(DESTDIR)$(SHAREDIR)/fish/vendor_completions.d
+ install -m644 completions/fish/yt-dlp.fish $(DESTDIR)$(SHAREDIR)/fish/vendor_completions.d/yt-dlp.fish
+
+uninstall:
+ rm -f $(DESTDIR)$(BINDIR)/yt-dlp
+ rm -f $(DESTDIR)$(MANDIR)/man1/yt-dlp.1
+ rm -f $(DESTDIR)$(SHAREDIR)/bash-completion/completions/yt-dlp
+ rm -f $(DESTDIR)$(SHAREDIR)/zsh/site-functions/_yt-dlp
+ rm -f $(DESTDIR)$(SHAREDIR)/fish/vendor_completions.d/yt-dlp.fish
+
+codetest:
+ flake8 .
+
+test:
+ $(PYTHON) -m pytest
+ $(MAKE) codetest
+
+offlinetest: codetest
+ $(PYTHON) -m pytest -k "not download"
+
+CODE_FOLDERS_CMD = find yt_dlp -type f -name '__init__.py' | sed 's,/__init__.py,,' | grep -v '/__' | sort
+CODE_FOLDERS != $(CODE_FOLDERS_CMD)
+CODE_FOLDERS ?= $(shell $(CODE_FOLDERS_CMD))
+CODE_FILES_CMD = for f in $(CODE_FOLDERS) ; do echo "$$f" | sed 's,$$,/*.py,' ; done
+CODE_FILES != $(CODE_FILES_CMD)
+CODE_FILES ?= $(shell $(CODE_FILES_CMD))
+yt-dlp: $(CODE_FILES)
+ mkdir -p zip
+ for d in $(CODE_FOLDERS) ; do \
+ mkdir -p zip/$$d ;\
+ cp -pPR $$d/*.py zip/$$d/ ;\
+ done
+ (cd zip && touch -t 200001010101 $(CODE_FILES))
+ mv zip/yt_dlp/__main__.py zip/
+ (cd zip && zip -q ../yt-dlp $(CODE_FILES) __main__.py)
+ rm -rf zip
+ echo '#!$(PYTHON)' > yt-dlp
+ cat yt-dlp.zip >> yt-dlp
+ rm yt-dlp.zip
+ chmod a+x yt-dlp
+
+README.md: $(CODE_FILES) devscripts/make_readme.py
+ COLUMNS=80 $(PYTHON) yt_dlp/__main__.py --ignore-config --help | $(PYTHON) devscripts/make_readme.py
+
+CONTRIBUTING.md: README.md devscripts/make_contributing.py
+ $(PYTHON) devscripts/make_contributing.py README.md CONTRIBUTING.md
+
+issuetemplates: devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml .github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml .github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml .github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml .github/ISSUE_TEMPLATE_tmpl/5_feature_request.yml yt_dlp/version.py
+ $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml .github/ISSUE_TEMPLATE/1_broken_site.yml
+ $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml .github/ISSUE_TEMPLATE/2_site_support_request.yml
+ $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml .github/ISSUE_TEMPLATE/3_site_feature_request.yml
+ $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml .github/ISSUE_TEMPLATE/4_bug_report.yml
+ $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/5_feature_request.yml .github/ISSUE_TEMPLATE/5_feature_request.yml
+ $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/6_question.yml .github/ISSUE_TEMPLATE/6_question.yml
+
+supportedsites:
+ $(PYTHON) devscripts/make_supportedsites.py supportedsites.md
+
+README.txt: README.md
+ pandoc -f $(MARKDOWN) -t plain README.md -o README.txt
+
+yt-dlp.1: README.md devscripts/prepare_manpage.py
+ $(PYTHON) devscripts/prepare_manpage.py yt-dlp.1.temp.md
+ pandoc -s -f $(MARKDOWN) -t man yt-dlp.1.temp.md -o yt-dlp.1
+ rm -f yt-dlp.1.temp.md
+
+completions/bash/yt-dlp: $(CODE_FILES) devscripts/bash-completion.in
+ mkdir -p completions/bash
+ $(PYTHON) devscripts/bash-completion.py
+
+completions/zsh/_yt-dlp: $(CODE_FILES) devscripts/zsh-completion.in
+ mkdir -p completions/zsh
+ $(PYTHON) devscripts/zsh-completion.py
+
+completions/fish/yt-dlp.fish: $(CODE_FILES) devscripts/fish-completion.in
+ mkdir -p completions/fish
+ $(PYTHON) devscripts/fish-completion.py
+
+_EXTRACTOR_FILES_CMD = find yt_dlp/extractor -name '*.py' -and -not -name 'lazy_extractors.py'
+_EXTRACTOR_FILES != $(_EXTRACTOR_FILES_CMD)
+_EXTRACTOR_FILES ?= $(shell $(_EXTRACTOR_FILES_CMD))
+yt_dlp/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES)
+ $(PYTHON) devscripts/make_lazy_extractors.py $@
+
+yt-dlp.tar.gz: all
+ @$(GNUTAR) -czf yt-dlp.tar.gz --transform "s|^|yt-dlp/|" --owner 0 --group 0 \
+ --exclude '*.DS_Store' \
+ --exclude '*.kate-swp' \
+ --exclude '*.pyc' \
+ --exclude '*.pyo' \
+ --exclude '*~' \
+ --exclude '__pycache__' \
+ --exclude '.pytest_cache' \
+ --exclude '.git' \
+ -- \
+ README.md supportedsites.md Changelog.md LICENSE \
+ CONTRIBUTING.md Collaborators.md CONTRIBUTORS AUTHORS \
+ Makefile yt-dlp.1 README.txt completions .gitignore \
+ setup.cfg yt-dlp yt_dlp pyproject.toml devscripts test
+
+AUTHORS:
+ git shortlog -s -n HEAD | cut -f2 | sort > AUTHORS
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..1e108a2
--- /dev/null
+++ b/README.md
@@ -0,0 +1,2317 @@
+<!-- MANPAGE: BEGIN EXCLUDED SECTION -->
+<div align="center">
+
+[![YT-DLP](https://raw.githubusercontent.com/yt-dlp/yt-dlp/master/.github/banner.svg)](#readme)
+
+[![Release version](https://img.shields.io/github/v/release/yt-dlp/yt-dlp?color=brightgreen&label=Download&style=for-the-badge)](#installation "Installation")
+[![PyPi](https://img.shields.io/badge/-PyPi-blue.svg?logo=pypi&labelColor=555555&style=for-the-badge)](https://pypi.org/project/yt-dlp "PyPi")
+[![Donate](https://img.shields.io/badge/_-Donate-red.svg?logo=githubsponsors&labelColor=555555&style=for-the-badge)](Collaborators.md#collaborators "Donate")
+[![Matrix](https://img.shields.io/matrix/yt-dlp:matrix.org?color=brightgreen&labelColor=555555&label=&logo=element&style=for-the-badge)](https://matrix.to/#/#yt-dlp:matrix.org "Matrix")
+[![Discord](https://img.shields.io/discord/807245652072857610?color=blue&labelColor=555555&label=&logo=discord&style=for-the-badge)](https://discord.gg/H5MNcFW63r "Discord")
+[![Supported Sites](https://img.shields.io/badge/-Supported_Sites-brightgreen.svg?style=for-the-badge)](supportedsites.md "Supported Sites")
+[![License: Unlicense](https://img.shields.io/badge/-Unlicense-blue.svg?style=for-the-badge)](LICENSE "License")
+[![CI Status](https://img.shields.io/github/actions/workflow/status/yt-dlp/yt-dlp/core.yml?branch=master&label=Tests&style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/actions "CI Status")
+[![Commits](https://img.shields.io/github/commit-activity/m/yt-dlp/yt-dlp?label=commits&style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/commits "Commit History")
+[![Last Commit](https://img.shields.io/github/last-commit/yt-dlp/yt-dlp/master?label=&style=for-the-badge&display_timestamp=committer)](https://github.com/yt-dlp/yt-dlp/pulse/monthly "Last activity")
+
+</div>
+<!-- MANPAGE: END EXCLUDED SECTION -->
+
+yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on the now inactive [youtube-dlc](https://github.com/blackjack4494/yt-dlc). The main focus of this project is adding new features and patches while also keeping up to date with the original project
+
+<!-- MANPAGE: MOVE "USAGE AND OPTIONS" SECTION HERE -->
+
+<!-- MANPAGE: BEGIN EXCLUDED SECTION -->
+* [INSTALLATION](#installation)
+ * [Detailed instructions](https://github.com/yt-dlp/yt-dlp/wiki/Installation)
+ * [Release Files](#release-files)
+ * [Update](#update)
+ * [Dependencies](#dependencies)
+ * [Compile](#compile)
+* [USAGE AND OPTIONS](#usage-and-options)
+ * [General Options](#general-options)
+ * [Network Options](#network-options)
+ * [Geo-restriction](#geo-restriction)
+ * [Video Selection](#video-selection)
+ * [Download Options](#download-options)
+ * [Filesystem Options](#filesystem-options)
+ * [Thumbnail Options](#thumbnail-options)
+ * [Internet Shortcut Options](#internet-shortcut-options)
+ * [Verbosity and Simulation Options](#verbosity-and-simulation-options)
+ * [Workarounds](#workarounds)
+ * [Video Format Options](#video-format-options)
+ * [Subtitle Options](#subtitle-options)
+ * [Authentication Options](#authentication-options)
+ * [Post-processing Options](#post-processing-options)
+ * [SponsorBlock Options](#sponsorblock-options)
+ * [Extractor Options](#extractor-options)
+* [CONFIGURATION](#configuration)
+ * [Configuration file encoding](#configuration-file-encoding)
+ * [Authentication with netrc](#authentication-with-netrc)
+ * [Notes about environment variables](#notes-about-environment-variables)
+* [OUTPUT TEMPLATE](#output-template)
+ * [Output template examples](#output-template-examples)
+* [FORMAT SELECTION](#format-selection)
+ * [Filtering Formats](#filtering-formats)
+ * [Sorting Formats](#sorting-formats)
+ * [Format Selection examples](#format-selection-examples)
+* [MODIFYING METADATA](#modifying-metadata)
+ * [Modifying metadata examples](#modifying-metadata-examples)
+* [EXTRACTOR ARGUMENTS](#extractor-arguments)
+* [PLUGINS](#plugins)
+ * [Installing Plugins](#installing-plugins)
+ * [Developing Plugins](#developing-plugins)
+* [EMBEDDING YT-DLP](#embedding-yt-dlp)
+ * [Embedding examples](#embedding-examples)
+* [CHANGES FROM YOUTUBE-DL](#changes-from-youtube-dl)
+ * [New features](#new-features)
+ * [Differences in default behavior](#differences-in-default-behavior)
+ * [Deprecated options](#deprecated-options)
+* [CONTRIBUTING](CONTRIBUTING.md#contributing-to-yt-dlp)
+ * [Opening an Issue](CONTRIBUTING.md#opening-an-issue)
+ * [Developer Instructions](CONTRIBUTING.md#developer-instructions)
+* [WIKI](https://github.com/yt-dlp/yt-dlp/wiki)
+ * [FAQ](https://github.com/yt-dlp/yt-dlp/wiki/FAQ)
+<!-- MANPAGE: END EXCLUDED SECTION -->
+
+
+# INSTALLATION
+
+<!-- MANPAGE: BEGIN EXCLUDED SECTION -->
+[![Windows](https://img.shields.io/badge/-Windows_x64-blue.svg?style=for-the-badge&logo=windows)](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.exe)
+[![Unix](https://img.shields.io/badge/-Linux/BSD-red.svg?style=for-the-badge&logo=linux)](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp)
+[![MacOS](https://img.shields.io/badge/-MacOS-lightblue.svg?style=for-the-badge&logo=apple)](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos)
+[![PyPi](https://img.shields.io/badge/-PyPi-blue.svg?logo=pypi&labelColor=555555&style=for-the-badge)](https://pypi.org/project/yt-dlp)
+[![Source Tarball](https://img.shields.io/badge/-Source_tar-green.svg?style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz)
+[![Other variants](https://img.shields.io/badge/-Other-grey.svg?style=for-the-badge)](#release-files)
+[![All versions](https://img.shields.io/badge/-All_Versions-lightgrey.svg?style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/releases)
+<!-- MANPAGE: END EXCLUDED SECTION -->
+
+You can install yt-dlp using [the binaries](#release-files), [pip](https://pypi.org/project/yt-dlp) or one using a third-party package manager. See [the wiki](https://github.com/yt-dlp/yt-dlp/wiki/Installation) for detailed instructions
+
+
+<!-- MANPAGE: BEGIN EXCLUDED SECTION -->
+## RELEASE FILES
+
+#### Recommended
+
+File|Description
+:---|:---
+[yt-dlp](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp)|Platform-independent [zipimport](https://docs.python.org/3/library/zipimport.html) binary. Needs Python (recommended for **Linux/BSD**)
+[yt-dlp.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.exe)|Windows (Win7 SP1+) standalone x64 binary (recommended for **Windows**)
+[yt-dlp_macos](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos)|Universal MacOS (10.15+) standalone executable (recommended for **MacOS**)
+
+#### Alternatives
+
+File|Description
+:---|:---
+[yt-dlp_x86.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_x86.exe)|Windows (Win7 SP1+) standalone x86 (32-bit) binary
+[yt-dlp_min.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_min.exe)|Windows (Win7 SP1+) standalone x64 binary built with `py2exe`<br/> ([Not recommended](#standalone-py2exe-builds-windows))
+[yt-dlp_linux](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux)|Linux standalone x64 binary
+[yt-dlp_linux.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux.zip)|Unpackaged Linux executable (no auto-update)
+[yt-dlp_linux_armv7l](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux_armv7l)|Linux standalone armv7l (32-bit) binary
+[yt-dlp_linux_aarch64](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux_aarch64)|Linux standalone aarch64 (64-bit) binary
+[yt-dlp_win.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_win.zip)|Unpackaged Windows executable (no auto-update)
+[yt-dlp_macos.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos.zip)|Unpackaged MacOS (10.15+) executable (no auto-update)
+[yt-dlp_macos_legacy](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos_legacy)|MacOS (10.9+) standalone x64 executable
+
+#### Misc
+
+File|Description
+:---|:---
+[yt-dlp.tar.gz](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz)|Source tarball
+[SHA2-512SUMS](https://github.com/yt-dlp/yt-dlp/releases/latest/download/SHA2-512SUMS)|GNU-style SHA512 sums
+[SHA2-512SUMS.sig](https://github.com/yt-dlp/yt-dlp/releases/latest/download/SHA2-512SUMS.sig)|GPG signature file for SHA512 sums
+[SHA2-256SUMS](https://github.com/yt-dlp/yt-dlp/releases/latest/download/SHA2-256SUMS)|GNU-style SHA256 sums
+[SHA2-256SUMS.sig](https://github.com/yt-dlp/yt-dlp/releases/latest/download/SHA2-256SUMS.sig)|GPG signature file for SHA256 sums
+
+The public key that can be used to verify the GPG signatures is [available here](https://github.com/yt-dlp/yt-dlp/blob/master/public.key)
+Example usage:
+```
+curl -L https://github.com/yt-dlp/yt-dlp/raw/master/public.key | gpg --import
+gpg --verify SHA2-256SUMS.sig SHA2-256SUMS
+gpg --verify SHA2-512SUMS.sig SHA2-512SUMS
+```
+<!-- MANPAGE: END EXCLUDED SECTION -->
+
+**Note**: The manpages, shell completion (autocomplete) files etc. are available inside the [source tarball](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz)
+
+
+## UPDATE
+You can use `yt-dlp -U` to update if you are using the [release binaries](#release-files)
+
+If you [installed with pip](https://github.com/yt-dlp/yt-dlp/wiki/Installation#with-pip), simply re-run the same command that was used to install the program
+
+For other third-party package managers, see [the wiki](https://github.com/yt-dlp/yt-dlp/wiki/Installation#third-party-package-managers) or refer their documentation
+
+<a id="update-channels"></a>
+
+There are currently three release channels for binaries: `stable`, `nightly` and `master`.
+
+* `stable` is the default channel, and many of its changes have been tested by users of the `nightly` and `master` channels.
+* The `nightly` channel has releases scheduled to build every day around midnight UTC, for a snapshot of the project's new patches and changes. This is the **recommended channel for regular users** of yt-dlp. The `nightly` releases are available from [yt-dlp/yt-dlp-nightly-builds](https://github.com/yt-dlp/yt-dlp-nightly-builds/releases) or as development releases of the `yt-dlp` PyPI package (which can be installed with pip's `--pre` flag).
+* The `master` channel features releases that are built after each push to the master branch, and these will have the very latest fixes and additions, but may also be more prone to regressions. They are available from [yt-dlp/yt-dlp-master-builds](https://github.com/yt-dlp/yt-dlp-master-builds/releases).
+
+When using `--update`/`-U`, a release binary will only update to its current channel.
+`--update-to CHANNEL` can be used to switch to a different channel when a newer version is available. `--update-to [CHANNEL@]TAG` can also be used to upgrade or downgrade to specific tags from a channel.
+
+You may also use `--update-to <repository>` (`<owner>/<repository>`) to update to a channel on a completely different repository. Be careful with what repository you are updating to though, there is no verification done for binaries from different repositories.
+
+Example usage:
+* `yt-dlp --update-to master` switch to the `master` channel and update to its latest release
+* `yt-dlp --update-to stable@2023.07.06` upgrade/downgrade to release to `stable` channel tag `2023.07.06`
+* `yt-dlp --update-to 2023.10.07` upgrade/downgrade to tag `2023.10.07` if it exists on the current channel
+* `yt-dlp --update-to example/yt-dlp@2023.09.24` upgrade/downgrade to the release from the `example/yt-dlp` repository, tag `2023.09.24`
+
+**Important**: Any user experiencing an issue with the `stable` release should install or update to the `nightly` release before submitting a bug report:
+```
+# To update to nightly from stable executable/binary:
+yt-dlp --update-to nightly
+
+# To install nightly with pip:
+python3 -m pip install -U --pre yt-dlp[default]
+```
+
+## DEPENDENCIES
+Python versions 3.8+ (CPython and PyPy) are supported. Other versions and implementations may or may not work correctly.
+
+<!-- Python 3.5+ uses VC++14 and it is already embedded in the binary created
+<!x-- https://www.microsoft.com/en-us/download/details.aspx?id=26999 --x>
+On windows, [Microsoft Visual C++ 2010 SP1 Redistributable Package (x86)](https://download.microsoft.com/download/1/6/5/165255E7-1014-4D0A-B094-B6A430A6BFFC/vcredist_x86.exe) is also necessary to run yt-dlp. You probably already have this, but if the executable throws an error due to missing `MSVCR100.dll` you need to install it manually.
+-->
+
+While all the other dependencies are optional, `ffmpeg` and `ffprobe` are highly recommended
+
+### Strongly recommended
+
+* [**ffmpeg** and **ffprobe**](https://www.ffmpeg.org) - Required for [merging separate video and audio files](#format-selection) as well as for various [post-processing](#post-processing-options) tasks. License [depends on the build](https://www.ffmpeg.org/legal.html)
+
+ There are bugs in ffmpeg that cause various issues when used alongside yt-dlp. Since ffmpeg is such an important dependency, we provide [custom builds](https://github.com/yt-dlp/FFmpeg-Builds#ffmpeg-static-auto-builds) with patches for some of these issues at [yt-dlp/FFmpeg-Builds](https://github.com/yt-dlp/FFmpeg-Builds). See [the readme](https://github.com/yt-dlp/FFmpeg-Builds#patches-applied) for details on the specific issues solved by these builds
+
+ **Important**: What you need is ffmpeg *binary*, **NOT** [the Python package of the same name](https://pypi.org/project/ffmpeg)
+
+### Networking
+* [**certifi**](https://github.com/certifi/python-certifi)\* - Provides Mozilla's root certificate bundle. Licensed under [MPLv2](https://github.com/certifi/python-certifi/blob/master/LICENSE)
+* [**brotli**](https://github.com/google/brotli)\* or [**brotlicffi**](https://github.com/python-hyper/brotlicffi) - [Brotli](https://en.wikipedia.org/wiki/Brotli) content encoding support. Both licensed under MIT <sup>[1](https://github.com/google/brotli/blob/master/LICENSE) [2](https://github.com/python-hyper/brotlicffi/blob/master/LICENSE) </sup>
+* [**websockets**](https://github.com/aaugustin/websockets)\* - For downloading over websocket. Licensed under [BSD-3-Clause](https://github.com/aaugustin/websockets/blob/main/LICENSE)
+* [**requests**](https://github.com/psf/requests)\* - HTTP library. For HTTPS proxy and persistent connections support. Licensed under [Apache-2.0](https://github.com/psf/requests/blob/main/LICENSE)
+
+### Metadata
+
+* [**mutagen**](https://github.com/quodlibet/mutagen)\* - For `--embed-thumbnail` in certain formats. Licensed under [GPLv2+](https://github.com/quodlibet/mutagen/blob/master/COPYING)
+* [**AtomicParsley**](https://github.com/wez/atomicparsley) - For `--embed-thumbnail` in `mp4`/`m4a` files when `mutagen`/`ffmpeg` cannot. Licensed under [GPLv2+](https://github.com/wez/atomicparsley/blob/master/COPYING)
+* [**xattr**](https://github.com/xattr/xattr), [**pyxattr**](https://github.com/iustin/pyxattr) or [**setfattr**](http://savannah.nongnu.org/projects/attr) - For writing xattr metadata (`--xattr`) on **Mac** and **BSD**. Licensed under [MIT](https://github.com/xattr/xattr/blob/master/LICENSE.txt), [LGPL2.1](https://github.com/iustin/pyxattr/blob/master/COPYING) and [GPLv2+](http://git.savannah.nongnu.org/cgit/attr.git/tree/doc/COPYING) respectively
+
+### Misc
+
+* [**pycryptodomex**](https://github.com/Legrandin/pycryptodome)\* - For decrypting AES-128 HLS streams and various other data. Licensed under [BSD-2-Clause](https://github.com/Legrandin/pycryptodome/blob/master/LICENSE.rst)
+* [**phantomjs**](https://github.com/ariya/phantomjs) - Used in extractors where javascript needs to be run. Licensed under [BSD-3-Clause](https://github.com/ariya/phantomjs/blob/master/LICENSE.BSD)
+* [**secretstorage**](https://github.com/mitya57/secretstorage)\* - For `--cookies-from-browser` to access the **Gnome** keyring while decrypting cookies of **Chromium**-based browsers on **Linux**. Licensed under [BSD-3-Clause](https://github.com/mitya57/secretstorage/blob/master/LICENSE)
+* Any external downloader that you want to use with `--downloader`
+
+### Deprecated
+
+* [**avconv** and **avprobe**](https://www.libav.org) - Now **deprecated** alternative to ffmpeg. License [depends on the build](https://libav.org/legal)
+* [**sponskrub**](https://github.com/faissaloo/SponSkrub) - For using the now **deprecated** [sponskrub options](#sponskrub-options). Licensed under [GPLv3+](https://github.com/faissaloo/SponSkrub/blob/master/LICENCE.md)
+* [**rtmpdump**](http://rtmpdump.mplayerhq.hu) - For downloading `rtmp` streams. ffmpeg can be used instead with `--downloader ffmpeg`. Licensed under [GPLv2+](http://rtmpdump.mplayerhq.hu)
+* [**mplayer**](http://mplayerhq.hu/design7/info.html) or [**mpv**](https://mpv.io) - For downloading `rstp`/`mms` streams. ffmpeg can be used instead with `--downloader ffmpeg`. Licensed under [GPLv2+](https://github.com/mpv-player/mpv/blob/master/Copyright)
+
+To use or redistribute the dependencies, you must agree to their respective licensing terms.
+
+The standalone release binaries are built with the Python interpreter and the packages marked with **\*** included.
+
+If you do not have the necessary dependencies for a task you are attempting, yt-dlp will warn you. All the currently available dependencies are visible at the top of the `--verbose` output
+
+
+## COMPILE
+
+### Standalone PyInstaller Builds
+To build the standalone executable, you must have Python and `pyinstaller` (plus any of yt-dlp's [optional dependencies](#dependencies) if needed). The executable will be built for the same CPU architecture as the Python used.
+
+You can run the following commands:
+
+```
+python3 devscripts/install_deps.py --include pyinstaller
+python3 devscripts/make_lazy_extractors.py
+python3 -m bundle.pyinstaller
+```
+
+On some systems, you may need to use `py` or `python` instead of `python3`.
+
+`python -m bundle.pyinstaller` accepts any arguments that can be passed to `pyinstaller`, such as `--onefile/-F` or `--onedir/-D`, which is further [documented here](https://pyinstaller.org/en/stable/usage.html#what-to-generate).
+
+**Note**: Pyinstaller versions below 4.4 [do not support](https://github.com/pyinstaller/pyinstaller#requirements-and-tested-platforms) Python installed from the Windows store without using a virtual environment.
+
+**Important**: Running `pyinstaller` directly **instead of** using `python -m bundle.pyinstaller` is **not** officially supported. This may or may not work correctly.
+
+### Platform-independent Binary (UNIX)
+You will need the build tools `python` (3.8+), `zip`, `make` (GNU), `pandoc`\* and `pytest`\*.
+
+After installing these, simply run `make`.
+
+You can also run `make yt-dlp` instead to compile only the binary without updating any of the additional files. (The build tools marked with **\*** are not needed for this)
+
+### Standalone Py2Exe Builds (Windows)
+
+While we provide the option to build with [py2exe](https://www.py2exe.org), it is recommended to build [using PyInstaller](#standalone-pyinstaller-builds) instead since the py2exe builds **cannot contain `pycryptodomex`/`certifi` and needs VC++14** on the target computer to run.
+
+If you wish to build it anyway, install Python (if it is not already installed) and you can run the following commands:
+
+```
+py devscripts/install_deps.py --include py2exe
+py devscripts/make_lazy_extractors.py
+py -m bundle.py2exe
+```
+
+### Related scripts
+
+* **`devscripts/install_deps.py`** - Install dependencies for yt-dlp.
+* **`devscripts/update-version.py`** - Update the version number based on current date.
+* **`devscripts/set-variant.py`** - Set the build variant of the executable.
+* **`devscripts/make_changelog.py`** - Create a markdown changelog using short commit messages and update `CONTRIBUTORS` file.
+* **`devscripts/make_lazy_extractors.py`** - Create lazy extractors. Running this before building the binaries (any variant) will improve their startup performance. Set the environment variable `YTDLP_NO_LAZY_EXTRACTORS=1` if you wish to forcefully disable lazy extractor loading.
+
+Note: See their `--help` for more info.
+
+### Forking the project
+If you fork the project on GitHub, you can run your fork's [build workflow](.github/workflows/build.yml) to automatically build the selected version(s) as artifacts. Alternatively, you can run the [release workflow](.github/workflows/release.yml) or enable the [nightly workflow](.github/workflows/release-nightly.yml) to create full (pre-)releases.
+
+# USAGE AND OPTIONS
+
+<!-- MANPAGE: BEGIN EXCLUDED SECTION -->
+ yt-dlp [OPTIONS] [--] URL [URL...]
+
+`Ctrl+F` is your friend :D
+<!-- MANPAGE: END EXCLUDED SECTION -->
+
+<!-- Auto generated -->
+## General Options:
+ -h, --help Print this help text and exit
+ --version Print program version and exit
+ -U, --update Update this program to the latest version
+ --no-update Do not check for updates (default)
+ --update-to [CHANNEL]@[TAG] Upgrade/downgrade to a specific version.
+ CHANNEL can be a repository as well. CHANNEL
+ and TAG default to "stable" and "latest"
+ respectively if omitted; See "UPDATE" for
+ details. Supported channels: stable,
+ nightly, master
+ -i, --ignore-errors Ignore download and postprocessing errors.
+ The download will be considered successful
+ even if the postprocessing fails
+ --no-abort-on-error Continue with next video on download errors;
+ e.g. to skip unavailable videos in a
+ playlist (default)
+ --abort-on-error Abort downloading of further videos if an
+ error occurs (Alias: --no-ignore-errors)
+ --dump-user-agent Display the current user-agent and exit
+ --list-extractors List all supported extractors and exit
+ --extractor-descriptions Output descriptions of all supported
+ extractors and exit
+ --use-extractors NAMES Extractor names to use separated by commas.
+ You can also use regexes, "all", "default"
+ and "end" (end URL matching); e.g. --ies
+ "holodex.*,end,youtube". Prefix the name
+ with a "-" to exclude it, e.g. --ies
+ default,-generic. Use --list-extractors for
+ a list of extractor names. (Alias: --ies)
+ --default-search PREFIX Use this prefix for unqualified URLs. E.g.
+ "gvsearch2:python" downloads two videos from
+ google videos for the search term "python".
+ Use the value "auto" to let yt-dlp guess
+ ("auto_warning" to emit a warning when
+ guessing). "error" just throws an error. The
+ default value "fixup_error" repairs broken
+ URLs, but emits an error if this is not
+ possible instead of searching
+ --ignore-config Don't load any more configuration files
+ except those given to --config-locations.
+ For backward compatibility, if this option
+ is found inside the system configuration
+ file, the user configuration is not loaded.
+ (Alias: --no-config)
+ --no-config-locations Do not load any custom configuration files
+ (default). When given inside a configuration
+ file, ignore all previous --config-locations
+ defined in the current file
+ --config-locations PATH Location of the main configuration file;
+ either the path to the config or its
+ containing directory ("-" for stdin). Can be
+ used multiple times and inside other
+ configuration files
+ --flat-playlist Do not extract the videos of a playlist,
+ only list them
+ --no-flat-playlist Fully extract the videos of a playlist
+ (default)
+ --live-from-start Download livestreams from the start.
+ Currently only supported for YouTube
+ (Experimental)
+ --no-live-from-start Download livestreams from the current time
+ (default)
+ --wait-for-video MIN[-MAX] Wait for scheduled streams to become
+ available. Pass the minimum number of
+ seconds (or range) to wait between retries
+ --no-wait-for-video Do not wait for scheduled streams (default)
+ --mark-watched Mark videos watched (even with --simulate)
+ --no-mark-watched Do not mark videos watched (default)
+ --color [STREAM:]POLICY Whether to emit color codes in output,
+ optionally prefixed by the STREAM (stdout or
+ stderr) to apply the setting to. Can be one
+ of "always", "auto" (default), "never", or
+ "no_color" (use non color terminal
+ sequences). Can be used multiple times
+ --compat-options OPTS Options that can help keep compatibility
+ with youtube-dl or youtube-dlc
+ configurations by reverting some of the
+ changes made in yt-dlp. See "Differences in
+ default behavior" for details
+ --alias ALIASES OPTIONS Create aliases for an option string. Unless
+ an alias starts with a dash "-", it is
+ prefixed with "--". Arguments are parsed
+ according to the Python string formatting
+ mini-language. E.g. --alias get-audio,-X
+ "-S=aext:{0},abr -x --audio-format {0}"
+ creates options "--get-audio" and "-X" that
+ takes an argument (ARG0) and expands to
+ "-S=aext:ARG0,abr -x --audio-format ARG0".
+ All defined aliases are listed in the --help
+ output. Alias options can trigger more
+ aliases; so be careful to avoid defining
+ recursive options. As a safety measure, each
+ alias may be triggered a maximum of 100
+ times. This option can be used multiple times
+
+## Network Options:
+ --proxy URL Use the specified HTTP/HTTPS/SOCKS proxy. To
+ enable SOCKS proxy, specify a proper scheme,
+ e.g. socks5://user:pass@127.0.0.1:1080/.
+ Pass in an empty string (--proxy "") for
+ direct connection
+ --socket-timeout SECONDS Time to wait before giving up, in seconds
+ --source-address IP Client-side IP address to bind to
+ -4, --force-ipv4 Make all connections via IPv4
+ -6, --force-ipv6 Make all connections via IPv6
+ --enable-file-urls Enable file:// URLs. This is disabled by
+ default for security reasons.
+
+## Geo-restriction:
+ --geo-verification-proxy URL Use this proxy to verify the IP address for
+ some geo-restricted sites. The default proxy
+ specified by --proxy (or none, if the option
+ is not present) is used for the actual
+ downloading
+ --xff VALUE How to fake X-Forwarded-For HTTP header to
+ try bypassing geographic restriction. One of
+ "default" (only when known to be useful),
+ "never", an IP block in CIDR notation, or a
+ two-letter ISO 3166-2 country code
+
+## Video Selection:
+ -I, --playlist-items ITEM_SPEC Comma separated playlist_index of the items
+ to download. You can specify a range using
+ "[START]:[STOP][:STEP]". For backward
+ compatibility, START-STOP is also supported.
+ Use negative indices to count from the right
+ and negative STEP to download in reverse
+ order. E.g. "-I 1:3,7,-5::2" used on a
+ playlist of size 15 will download the items
+ at index 1,2,3,7,11,13,15
+ --min-filesize SIZE Abort download if filesize is smaller than
+ SIZE, e.g. 50k or 44.6M
+ --max-filesize SIZE Abort download if filesize is larger than
+ SIZE, e.g. 50k or 44.6M
+ --date DATE Download only videos uploaded on this date.
+ The date can be "YYYYMMDD" or in the format
+ [now|today|yesterday][-N[day|week|month|year]].
+ E.g. "--date today-2weeks" downloads only
+ videos uploaded on the same day two weeks ago
+ --datebefore DATE Download only videos uploaded on or before
+ this date. The date formats accepted is the
+ same as --date
+ --dateafter DATE Download only videos uploaded on or after
+ this date. The date formats accepted is the
+ same as --date
+ --match-filters FILTER Generic video filter. Any "OUTPUT TEMPLATE"
+ field can be compared with a number or a
+ string using the operators defined in
+ "Filtering Formats". You can also simply
+ specify a field to match if the field is
+ present, use "!field" to check if the field
+ is not present, and "&" to check multiple
+ conditions. Use a "\" to escape "&" or
+ quotes if needed. If used multiple times,
+ the filter matches if atleast one of the
+ conditions are met. E.g. --match-filter
+ !is_live --match-filter "like_count>?100 &
+ description~='(?i)\bcats \& dogs\b'" matches
+ only videos that are not live OR those that
+ have a like count more than 100 (or the like
+ field is not available) and also has a
+ description that contains the phrase "cats &
+ dogs" (caseless). Use "--match-filter -" to
+ interactively ask whether to download each
+ video
+ --no-match-filters Do not use any --match-filter (default)
+ --break-match-filters FILTER Same as "--match-filters" but stops the
+ download process when a video is rejected
+ --no-break-match-filters Do not use any --break-match-filters (default)
+ --no-playlist Download only the video, if the URL refers
+ to a video and a playlist
+ --yes-playlist Download the playlist, if the URL refers to
+ a video and a playlist
+ --age-limit YEARS Download only videos suitable for the given
+ age
+ --download-archive FILE Download only videos not listed in the
+ archive file. Record the IDs of all
+ downloaded videos in it
+ --no-download-archive Do not use archive file (default)
+ --max-downloads NUMBER Abort after downloading NUMBER files
+ --break-on-existing Stop the download process when encountering
+ a file that is in the archive
+ --break-per-input Alters --max-downloads, --break-on-existing,
+ --break-match-filter, and autonumber to
+ reset per input URL
+ --no-break-per-input --break-on-existing and similar options
+ terminates the entire download queue
+ --skip-playlist-after-errors N Number of allowed failures until the rest of
+ the playlist is skipped
+
+## Download Options:
+ -N, --concurrent-fragments N Number of fragments of a dash/hlsnative
+ video that should be downloaded concurrently
+ (default is 1)
+ -r, --limit-rate RATE Maximum download rate in bytes per second,
+ e.g. 50K or 4.2M
+ --throttled-rate RATE Minimum download rate in bytes per second
+ below which throttling is assumed and the
+ video data is re-extracted, e.g. 100K
+ -R, --retries RETRIES Number of retries (default is 10), or
+ "infinite"
+ --file-access-retries RETRIES Number of times to retry on file access
+ error (default is 3), or "infinite"
+ --fragment-retries RETRIES Number of retries for a fragment (default is
+ 10), or "infinite" (DASH, hlsnative and ISM)
+ --retry-sleep [TYPE:]EXPR Time to sleep between retries in seconds
+ (optionally) prefixed by the type of retry
+ (http (default), fragment, file_access,
+ extractor) to apply the sleep to. EXPR can
+ be a number, linear=START[:END[:STEP=1]] or
+ exp=START[:END[:BASE=2]]. This option can be
+ used multiple times to set the sleep for the
+ different retry types, e.g. --retry-sleep
+ linear=1::2 --retry-sleep fragment:exp=1:20
+ --skip-unavailable-fragments Skip unavailable fragments for DASH,
+ hlsnative and ISM downloads (default)
+ (Alias: --no-abort-on-unavailable-fragments)
+ --abort-on-unavailable-fragments
+ Abort download if a fragment is unavailable
+ (Alias: --no-skip-unavailable-fragments)
+ --keep-fragments Keep downloaded fragments on disk after
+ downloading is finished
+ --no-keep-fragments Delete downloaded fragments after
+ downloading is finished (default)
+ --buffer-size SIZE Size of download buffer, e.g. 1024 or 16K
+ (default is 1024)
+ --resize-buffer The buffer size is automatically resized
+ from an initial value of --buffer-size
+ (default)
+ --no-resize-buffer Do not automatically adjust the buffer size
+ --http-chunk-size SIZE Size of a chunk for chunk-based HTTP
+ downloading, e.g. 10485760 or 10M (default
+ is disabled). May be useful for bypassing
+ bandwidth throttling imposed by a webserver
+ (experimental)
+ --playlist-random Download playlist videos in random order
+ --lazy-playlist Process entries in the playlist as they are
+ received. This disables n_entries,
+ --playlist-random and --playlist-reverse
+ --no-lazy-playlist Process videos in the playlist only after
+ the entire playlist is parsed (default)
+ --xattr-set-filesize Set file xattribute ytdl.filesize with
+ expected file size
+ --hls-use-mpegts Use the mpegts container for HLS videos;
+ allowing some players to play the video
+ while downloading, and reducing the chance
+ of file corruption if download is
+ interrupted. This is enabled by default for
+ live streams
+ --no-hls-use-mpegts Do not use the mpegts container for HLS
+ videos. This is default when not downloading
+ live streams
+ --download-sections REGEX Download only chapters that match the
+ regular expression. A "*" prefix denotes
+ time-range instead of chapter. Negative
+ timestamps are calculated from the end.
+ "*from-url" can be used to download between
+ the "start_time" and "end_time" extracted
+ from the URL. Needs ffmpeg. This option can
+ be used multiple times to download multiple
+ sections, e.g. --download-sections
+ "*10:15-inf" --download-sections "intro"
+ --downloader [PROTO:]NAME Name or path of the external downloader to
+ use (optionally) prefixed by the protocols
+ (http, ftp, m3u8, dash, rstp, rtmp, mms) to
+ use it for. Currently supports native,
+ aria2c, avconv, axel, curl, ffmpeg, httpie,
+ wget. You can use this option multiple times
+ to set different downloaders for different
+ protocols. E.g. --downloader aria2c
+ --downloader "dash,m3u8:native" will use
+ aria2c for http/ftp downloads, and the
+ native downloader for dash/m3u8 downloads
+ (Alias: --external-downloader)
+ --downloader-args NAME:ARGS Give these arguments to the external
+ downloader. Specify the downloader name and
+ the arguments separated by a colon ":". For
+ ffmpeg, arguments can be passed to different
+ positions using the same syntax as
+ --postprocessor-args. You can use this
+ option multiple times to give different
+ arguments to different downloaders (Alias:
+ --external-downloader-args)
+
+## Filesystem Options:
+ -a, --batch-file FILE File containing URLs to download ("-" for
+ stdin), one URL per line. Lines starting
+ with "#", ";" or "]" are considered as
+ comments and ignored
+ --no-batch-file Do not read URLs from batch file (default)
+ -P, --paths [TYPES:]PATH The paths where the files should be
+ downloaded. Specify the type of file and the
+ path separated by a colon ":". All the same
+ TYPES as --output are supported.
+ Additionally, you can also provide "home"
+ (default) and "temp" paths. All intermediary
+ files are first downloaded to the temp path
+ and then the final files are moved over to
+ the home path after download is finished.
+ This option is ignored if --output is an
+ absolute path
+ -o, --output [TYPES:]TEMPLATE Output filename template; see "OUTPUT
+ TEMPLATE" for details
+ --output-na-placeholder TEXT Placeholder for unavailable fields in
+ --output (default: "NA")
+ --restrict-filenames Restrict filenames to only ASCII characters,
+ and avoid "&" and spaces in filenames
+ --no-restrict-filenames Allow Unicode characters, "&" and spaces in
+ filenames (default)
+ --windows-filenames Force filenames to be Windows-compatible
+ --no-windows-filenames Make filenames Windows-compatible only if
+ using Windows (default)
+ --trim-filenames LENGTH Limit the filename length (excluding
+ extension) to the specified number of
+ characters
+ -w, --no-overwrites Do not overwrite any files
+ --force-overwrites Overwrite all video and metadata files. This
+ option includes --no-continue
+ --no-force-overwrites Do not overwrite the video, but overwrite
+ related files (default)
+ -c, --continue Resume partially downloaded files/fragments
+ (default)
+ --no-continue Do not resume partially downloaded
+ fragments. If the file is not fragmented,
+ restart download of the entire file
+ --part Use .part files instead of writing directly
+ into output file (default)
+ --no-part Do not use .part files - write directly into
+ output file
+ --mtime Use the Last-modified header to set the file
+ modification time (default)
+ --no-mtime Do not use the Last-modified header to set
+ the file modification time
+ --write-description Write video description to a .description file
+ --no-write-description Do not write video description (default)
+ --write-info-json Write video metadata to a .info.json file
+ (this may contain personal information)
+ --no-write-info-json Do not write video metadata (default)
+ --write-playlist-metafiles Write playlist metadata in addition to the
+ video metadata when using --write-info-json,
+ --write-description etc. (default)
+ --no-write-playlist-metafiles Do not write playlist metadata when using
+ --write-info-json, --write-description etc.
+ --clean-info-json Remove some internal metadata such as
+ filenames from the infojson (default)
+ --no-clean-info-json Write all fields to the infojson
+ --write-comments Retrieve video comments to be placed in the
+ infojson. The comments are fetched even
+ without this option if the extraction is
+ known to be quick (Alias: --get-comments)
+ --no-write-comments Do not retrieve video comments unless the
+ extraction is known to be quick (Alias:
+ --no-get-comments)
+ --load-info-json FILE JSON file containing the video information
+ (created with the "--write-info-json" option)
+ --cookies FILE Netscape formatted file to read cookies from
+ and dump cookie jar in
+ --no-cookies Do not read/dump cookies from/to file
+ (default)
+ --cookies-from-browser BROWSER[+KEYRING][:PROFILE][::CONTAINER]
+ The name of the browser to load cookies
+ from. Currently supported browsers are:
+ brave, chrome, chromium, edge, firefox,
+ opera, safari, vivaldi. Optionally, the
+ KEYRING used for decrypting Chromium cookies
+ on Linux, the name/path of the PROFILE to
+ load cookies from, and the CONTAINER name
+ (if Firefox) ("none" for no container) can
+ be given with their respective seperators.
+ By default, all containers of the most
+ recently accessed profile are used.
+ Currently supported keyrings are: basictext,
+ gnomekeyring, kwallet, kwallet5, kwallet6
+ --no-cookies-from-browser Do not load cookies from browser (default)
+ --cache-dir DIR Location in the filesystem where yt-dlp can
+ store some downloaded information (such as
+ client ids and signatures) permanently. By
+ default ${XDG_CACHE_HOME}/yt-dlp
+ --no-cache-dir Disable filesystem caching
+ --rm-cache-dir Delete all filesystem cache files
+
+## Thumbnail Options:
+ --write-thumbnail Write thumbnail image to disk
+ --no-write-thumbnail Do not write thumbnail image to disk (default)
+ --write-all-thumbnails Write all thumbnail image formats to disk
+ --list-thumbnails List available thumbnails of each video.
+ Simulate unless --no-simulate is used
+
+## Internet Shortcut Options:
+ --write-link Write an internet shortcut file, depending
+ on the current platform (.url, .webloc or
+ .desktop). The URL may be cached by the OS
+ --write-url-link Write a .url Windows internet shortcut. The
+ OS caches the URL based on the file path
+ --write-webloc-link Write a .webloc macOS internet shortcut
+ --write-desktop-link Write a .desktop Linux internet shortcut
+
+## Verbosity and Simulation Options:
+ -q, --quiet Activate quiet mode. If used with --verbose,
+ print the log to stderr
+ --no-quiet Deactivate quiet mode. (Default)
+ --no-warnings Ignore warnings
+ -s, --simulate Do not download the video and do not write
+ anything to disk
+ --no-simulate Download the video even if printing/listing
+ options are used
+ --ignore-no-formats-error Ignore "No video formats" error. Useful for
+ extracting metadata even if the videos are
+ not actually available for download
+ (experimental)
+ --no-ignore-no-formats-error Throw error when no downloadable video
+ formats are found (default)
+ --skip-download Do not download the video but write all
+ related files (Alias: --no-download)
+ -O, --print [WHEN:]TEMPLATE Field name or output template to print to
+ screen, optionally prefixed with when to
+ print it, separated by a ":". Supported
+ values of "WHEN" are the same as that of
+ --use-postprocessor (default: video).
+ Implies --quiet. Implies --simulate unless
+ --no-simulate or later stages of WHEN are
+ used. This option can be used multiple times
+ --print-to-file [WHEN:]TEMPLATE FILE
+ Append given template to the file. The
+ values of WHEN and TEMPLATE are same as that
+ of --print. FILE uses the same syntax as the
+ output template. This option can be used
+ multiple times
+ -j, --dump-json Quiet, but print JSON information for each
+ video. Simulate unless --no-simulate is
+ used. See "OUTPUT TEMPLATE" for a
+ description of available keys
+ -J, --dump-single-json Quiet, but print JSON information for each
+ url or infojson passed. Simulate unless
+ --no-simulate is used. If the URL refers to
+ a playlist, the whole playlist information
+ is dumped in a single line
+ --force-write-archive Force download archive entries to be written
+ as far as no errors occur, even if -s or
+ another simulation option is used (Alias:
+ --force-download-archive)
+ --newline Output progress bar as new lines
+ --no-progress Do not print progress bar
+ --progress Show progress bar, even if in quiet mode
+ --console-title Display progress in console titlebar
+ --progress-template [TYPES:]TEMPLATE
+ Template for progress outputs, optionally
+ prefixed with one of "download:" (default),
+ "download-title:" (the console title),
+ "postprocess:", or "postprocess-title:".
+ The video's fields are accessible under the
+ "info" key and the progress attributes are
+ accessible under "progress" key. E.g.
+ --console-title --progress-template
+ "download-title:%(info.id)s-%(progress.eta)s"
+ -v, --verbose Print various debugging information
+ --dump-pages Print downloaded pages encoded using base64
+ to debug problems (very verbose)
+ --write-pages Write downloaded intermediary pages to files
+ in the current directory to debug problems
+ --print-traffic Display sent and read HTTP traffic
+
+## Workarounds:
+ --encoding ENCODING Force the specified encoding (experimental)
+ --legacy-server-connect Explicitly allow HTTPS connection to servers
+ that do not support RFC 5746 secure
+ renegotiation
+ --no-check-certificates Suppress HTTPS certificate validation
+ --prefer-insecure Use an unencrypted connection to retrieve
+ information about the video (Currently
+ supported only for YouTube)
+ --add-headers FIELD:VALUE Specify a custom HTTP header and its value,
+ separated by a colon ":". You can use this
+ option multiple times
+ --bidi-workaround Work around terminals that lack
+ bidirectional text support. Requires bidiv
+ or fribidi executable in PATH
+ --sleep-requests SECONDS Number of seconds to sleep between requests
+ during data extraction
+ --sleep-interval SECONDS Number of seconds to sleep before each
+ download. This is the minimum time to sleep
+ when used along with --max-sleep-interval
+ (Alias: --min-sleep-interval)
+ --max-sleep-interval SECONDS Maximum number of seconds to sleep. Can only
+ be used along with --min-sleep-interval
+ --sleep-subtitles SECONDS Number of seconds to sleep before each
+ subtitle download
+
+## Video Format Options:
+ -f, --format FORMAT Video format code, see "FORMAT SELECTION"
+ for more details
+ -S, --format-sort SORTORDER Sort the formats by the fields given, see
+ "Sorting Formats" for more details
+ --format-sort-force Force user specified sort order to have
+ precedence over all fields, see "Sorting
+ Formats" for more details (Alias: --S-force)
+ --no-format-sort-force Some fields have precedence over the user
+ specified sort order (default)
+ --video-multistreams Allow multiple video streams to be merged
+ into a single file
+ --no-video-multistreams Only one video stream is downloaded for each
+ output file (default)
+ --audio-multistreams Allow multiple audio streams to be merged
+ into a single file
+ --no-audio-multistreams Only one audio stream is downloaded for each
+ output file (default)
+ --prefer-free-formats Prefer video formats with free containers
+ over non-free ones of same quality. Use with
+ "-S ext" to strictly prefer free containers
+ irrespective of quality
+ --no-prefer-free-formats Don't give any special preference to free
+ containers (default)
+ --check-formats Make sure formats are selected only from
+ those that are actually downloadable
+ --check-all-formats Check all formats for whether they are
+ actually downloadable
+ --no-check-formats Do not check that the formats are actually
+ downloadable
+ -F, --list-formats List available formats of each video.
+ Simulate unless --no-simulate is used
+ --merge-output-format FORMAT Containers that may be used when merging
+ formats, separated by "/", e.g. "mp4/mkv".
+ Ignored if no merge is required. (currently
+ supported: avi, flv, mkv, mov, mp4, webm)
+
+## Subtitle Options:
+ --write-subs Write subtitle file
+ --no-write-subs Do not write subtitle file (default)
+ --write-auto-subs Write automatically generated subtitle file
+ (Alias: --write-automatic-subs)
+ --no-write-auto-subs Do not write auto-generated subtitles
+ (default) (Alias: --no-write-automatic-subs)
+ --list-subs List available subtitles of each video.
+ Simulate unless --no-simulate is used
+ --sub-format FORMAT Subtitle format; accepts formats preference,
+ e.g. "srt" or "ass/srt/best"
+ --sub-langs LANGS Languages of the subtitles to download (can
+ be regex) or "all" separated by commas, e.g.
+ --sub-langs "en.*,ja". You can prefix the
+ language code with a "-" to exclude it from
+ the requested languages, e.g. --sub-langs
+ all,-live_chat. Use --list-subs for a list
+ of available language tags
+
+## Authentication Options:
+ -u, --username USERNAME Login with this account ID
+ -p, --password PASSWORD Account password. If this option is left
+ out, yt-dlp will ask interactively
+ -2, --twofactor TWOFACTOR Two-factor authentication code
+ -n, --netrc Use .netrc authentication data
+ --netrc-location PATH Location of .netrc authentication data;
+ either the path or its containing directory.
+ Defaults to ~/.netrc
+ --netrc-cmd NETRC_CMD Command to execute to get the credentials
+ for an extractor.
+ --video-password PASSWORD Video-specific password
+ --ap-mso MSO Adobe Pass multiple-system operator (TV
+ provider) identifier, use --ap-list-mso for
+ a list of available MSOs
+ --ap-username USERNAME Multiple-system operator account login
+ --ap-password PASSWORD Multiple-system operator account password.
+ If this option is left out, yt-dlp will ask
+ interactively
+ --ap-list-mso List all supported multiple-system operators
+ --client-certificate CERTFILE Path to client certificate file in PEM
+ format. May include the private key
+ --client-certificate-key KEYFILE
+ Path to private key file for client
+ certificate
+ --client-certificate-password PASSWORD
+ Password for client certificate private key,
+ if encrypted. If not provided, and the key
+ is encrypted, yt-dlp will ask interactively
+
+## Post-Processing Options:
+ -x, --extract-audio Convert video files to audio-only files
+ (requires ffmpeg and ffprobe)
+ --audio-format FORMAT Format to convert the audio to when -x is
+ used. (currently supported: best (default),
+ aac, alac, flac, m4a, mp3, opus, vorbis,
+ wav). You can specify multiple rules using
+ similar syntax as --remux-video
+ --audio-quality QUALITY Specify ffmpeg audio quality to use when
+ converting the audio with -x. Insert a value
+ between 0 (best) and 10 (worst) for VBR or a
+ specific bitrate like 128K (default 5)
+ --remux-video FORMAT Remux the video into another container if
+ necessary (currently supported: avi, flv,
+ gif, mkv, mov, mp4, webm, aac, aiff, alac,
+ flac, m4a, mka, mp3, ogg, opus, vorbis,
+ wav). If target container does not support
+ the video/audio codec, remuxing will fail.
+ You can specify multiple rules; e.g.
+ "aac>m4a/mov>mp4/mkv" will remux aac to m4a,
+ mov to mp4 and anything else to mkv
+ --recode-video FORMAT Re-encode the video into another format if
+ necessary. The syntax and supported formats
+ are the same as --remux-video
+ --postprocessor-args NAME:ARGS Give these arguments to the postprocessors.
+ Specify the postprocessor/executable name
+ and the arguments separated by a colon ":"
+ to give the argument to the specified
+ postprocessor/executable. Supported PP are:
+ Merger, ModifyChapters, SplitChapters,
+ ExtractAudio, VideoRemuxer, VideoConvertor,
+ Metadata, EmbedSubtitle, EmbedThumbnail,
+ SubtitlesConvertor, ThumbnailsConvertor,
+ FixupStretched, FixupM4a, FixupM3u8,
+ FixupTimestamp and FixupDuration. The
+ supported executables are: AtomicParsley,
+ FFmpeg and FFprobe. You can also specify
+ "PP+EXE:ARGS" to give the arguments to the
+ specified executable only when being used by
+ the specified postprocessor. Additionally,
+ for ffmpeg/ffprobe, "_i"/"_o" can be
+ appended to the prefix optionally followed
+ by a number to pass the argument before the
+ specified input/output file, e.g. --ppa
+ "Merger+ffmpeg_i1:-v quiet". You can use
+ this option multiple times to give different
+ arguments to different postprocessors.
+ (Alias: --ppa)
+ -k, --keep-video Keep the intermediate video file on disk
+ after post-processing
+ --no-keep-video Delete the intermediate video file after
+ post-processing (default)
+ --post-overwrites Overwrite post-processed files (default)
+ --no-post-overwrites Do not overwrite post-processed files
+ --embed-subs Embed subtitles in the video (only for mp4,
+ webm and mkv videos)
+ --no-embed-subs Do not embed subtitles (default)
+ --embed-thumbnail Embed thumbnail in the video as cover art
+ --no-embed-thumbnail Do not embed thumbnail (default)
+ --embed-metadata Embed metadata to the video file. Also
+ embeds chapters/infojson if present unless
+ --no-embed-chapters/--no-embed-info-json are
+ used (Alias: --add-metadata)
+ --no-embed-metadata Do not add metadata to file (default)
+ (Alias: --no-add-metadata)
+ --embed-chapters Add chapter markers to the video file
+ (Alias: --add-chapters)
+ --no-embed-chapters Do not add chapter markers (default) (Alias:
+ --no-add-chapters)
+ --embed-info-json Embed the infojson as an attachment to
+ mkv/mka video files
+ --no-embed-info-json Do not embed the infojson as an attachment
+ to the video file
+ --parse-metadata [WHEN:]FROM:TO
+ Parse additional metadata like title/artist
+ from other fields; see "MODIFYING METADATA"
+ for details. Supported values of "WHEN" are
+ the same as that of --use-postprocessor
+ (default: pre_process)
+ --replace-in-metadata [WHEN:]FIELDS REGEX REPLACE
+ Replace text in a metadata field using the
+ given regex. This option can be used
+ multiple times. Supported values of "WHEN"
+ are the same as that of --use-postprocessor
+ (default: pre_process)
+ --xattrs Write metadata to the video file's xattrs
+ (using dublin core and xdg standards)
+ --concat-playlist POLICY Concatenate videos in a playlist. One of
+ "never", "always", or "multi_video"
+ (default; only when the videos form a single
+ show). All the video files must have same
+ codecs and number of streams to be
+ concatable. The "pl_video:" prefix can be
+ used with "--paths" and "--output" to set
+ the output filename for the concatenated
+ files. See "OUTPUT TEMPLATE" for details
+ --fixup POLICY Automatically correct known faults of the
+ file. One of never (do nothing), warn (only
+ emit a warning), detect_or_warn (the
+ default; fix file if we can, warn
+ otherwise), force (try fixing even if file
+ already exists)
+ --ffmpeg-location PATH Location of the ffmpeg binary; either the
+ path to the binary or its containing directory
+ --exec [WHEN:]CMD Execute a command, optionally prefixed with
+ when to execute it, separated by a ":".
+ Supported values of "WHEN" are the same as
+ that of --use-postprocessor (default:
+ after_move). Same syntax as the output
+ template can be used to pass any field as
+ arguments to the command. If no fields are
+ passed, %(filepath,_filename|)q is appended
+ to the end of the command. This option can
+ be used multiple times
+ --no-exec Remove any previously defined --exec
+ --convert-subs FORMAT Convert the subtitles to another format
+ (currently supported: ass, lrc, srt, vtt)
+ (Alias: --convert-subtitles)
+ --convert-thumbnails FORMAT Convert the thumbnails to another format
+ (currently supported: jpg, png, webp). You
+ can specify multiple rules using similar
+ syntax as --remux-video
+ --split-chapters Split video into multiple files based on
+ internal chapters. The "chapter:" prefix can
+ be used with "--paths" and "--output" to set
+ the output filename for the split files. See
+ "OUTPUT TEMPLATE" for details
+ --no-split-chapters Do not split video based on chapters (default)
+ --remove-chapters REGEX Remove chapters whose title matches the
+ given regular expression. The syntax is the
+ same as --download-sections. This option can
+ be used multiple times
+ --no-remove-chapters Do not remove any chapters from the file
+ (default)
+ --force-keyframes-at-cuts Force keyframes at cuts when
+ downloading/splitting/removing sections.
+ This is slow due to needing a re-encode, but
+ the resulting video may have fewer artifacts
+ around the cuts
+ --no-force-keyframes-at-cuts Do not force keyframes around the chapters
+ when cutting/splitting (default)
+ --use-postprocessor NAME[:ARGS]
+ The (case sensitive) name of plugin
+ postprocessors to be enabled, and
+ (optionally) arguments to be passed to it,
+ separated by a colon ":". ARGS are a
+ semicolon ";" delimited list of NAME=VALUE.
+ The "when" argument determines when the
+ postprocessor is invoked. It can be one of
+ "pre_process" (after video extraction),
+ "after_filter" (after video passes filter),
+ "video" (after --format; before
+ --print/--output), "before_dl" (before each
+ video download), "post_process" (after each
+ video download; default), "after_move"
+ (after moving video file to it's final
+ locations), "after_video" (after downloading
+ and processing all formats of a video), or
+ "playlist" (at end of playlist). This option
+ can be used multiple times to add different
+ postprocessors
+
+## SponsorBlock Options:
+Make chapter entries for, or remove various segments (sponsor,
+ introductions, etc.) from downloaded YouTube videos using the
+ [SponsorBlock API](https://sponsor.ajay.app)
+
+ --sponsorblock-mark CATS SponsorBlock categories to create chapters
+ for, separated by commas. Available
+ categories are sponsor, intro, outro,
+ selfpromo, preview, filler, interaction,
+ music_offtopic, poi_highlight, chapter, all
+ and default (=all). You can prefix the
+ category with a "-" to exclude it. See [1]
+ for description of the categories. E.g.
+ --sponsorblock-mark all,-preview
+ [1] https://wiki.sponsor.ajay.app/w/Segment_Categories
+ --sponsorblock-remove CATS SponsorBlock categories to be removed from
+ the video file, separated by commas. If a
+ category is present in both mark and remove,
+ remove takes precedence. The syntax and
+ available categories are the same as for
+ --sponsorblock-mark except that "default"
+ refers to "all,-filler" and poi_highlight,
+ chapter are not available
+ --sponsorblock-chapter-title TEMPLATE
+ An output template for the title of the
+ SponsorBlock chapters created by
+ --sponsorblock-mark. The only available
+ fields are start_time, end_time, category,
+ categories, name, category_names. Defaults
+ to "[SponsorBlock]: %(category_names)l"
+ --no-sponsorblock Disable both --sponsorblock-mark and
+ --sponsorblock-remove
+ --sponsorblock-api URL SponsorBlock API location, defaults to
+ https://sponsor.ajay.app
+
+## Extractor Options:
+ --extractor-retries RETRIES Number of retries for known extractor errors
+ (default is 3), or "infinite"
+ --allow-dynamic-mpd Process dynamic DASH manifests (default)
+ (Alias: --no-ignore-dynamic-mpd)
+ --ignore-dynamic-mpd Do not process dynamic DASH manifests
+ (Alias: --no-allow-dynamic-mpd)
+ --hls-split-discontinuity Split HLS playlists to different formats at
+ discontinuities such as ad breaks
+ --no-hls-split-discontinuity Do not split HLS playlists to different
+ formats at discontinuities such as ad breaks
+ (default)
+ --extractor-args IE_KEY:ARGS Pass ARGS arguments to the IE_KEY extractor.
+ See "EXTRACTOR ARGUMENTS" for details. You
+ can use this option multiple times to give
+ arguments for different extractors
+
+# CONFIGURATION
+
+You can configure yt-dlp by placing any supported command line option to a configuration file. The configuration is loaded from the following locations:
+
+1. **Main Configuration**:
+ * The file given to `--config-location`
+1. **Portable Configuration**: (Recommended for portable installations)
+ * If using a binary, `yt-dlp.conf` in the same directory as the binary
+ * If running from source-code, `yt-dlp.conf` in the parent directory of `yt_dlp`
+1. **Home Configuration**:
+ * `yt-dlp.conf` in the home path given to `-P`
+ * If `-P` is not given, the current directory is searched
+1. **User Configuration**:
+ * `${XDG_CONFIG_HOME}/yt-dlp.conf`
+ * `${XDG_CONFIG_HOME}/yt-dlp/config` (recommended on Linux/macOS)
+ * `${XDG_CONFIG_HOME}/yt-dlp/config.txt`
+ * `${APPDATA}/yt-dlp.conf`
+ * `${APPDATA}/yt-dlp/config` (recommended on Windows)
+ * `${APPDATA}/yt-dlp/config.txt`
+ * `~/yt-dlp.conf`
+ * `~/yt-dlp.conf.txt`
+ * `~/.yt-dlp/config`
+ * `~/.yt-dlp/config.txt`
+
+ See also: [Notes about environment variables](#notes-about-environment-variables)
+1. **System Configuration**:
+ * `/etc/yt-dlp.conf`
+ * `/etc/yt-dlp/config`
+ * `/etc/yt-dlp/config.txt`
+
+E.g. with the following configuration file yt-dlp will always extract the audio, not copy the mtime, use a proxy and save all videos under `YouTube` directory in your home directory:
+```
+# Lines starting with # are comments
+
+# Always extract audio
+-x
+
+# Do not copy the mtime
+--no-mtime
+
+# Use this proxy
+--proxy 127.0.0.1:3128
+
+# Save all videos under YouTube directory in your home directory
+-o ~/YouTube/%(title)s.%(ext)s
+```
+
+**Note**: Options in configuration file are just the same options aka switches used in regular command line calls; thus there **must be no whitespace** after `-` or `--`, e.g. `-o` or `--proxy` but not `- o` or `-- proxy`. They must also be quoted when necessary as-if it were a UNIX shell.
+
+You can use `--ignore-config` if you want to disable all configuration files for a particular yt-dlp run. If `--ignore-config` is found inside any configuration file, no further configuration will be loaded. For example, having the option in the portable configuration file prevents loading of home, user, and system configurations. Additionally, (for backward compatibility) if `--ignore-config` is found inside the system configuration file, the user configuration is not loaded.
+
+### Configuration file encoding
+
+The configuration files are decoded according to the UTF BOM if present, and in the encoding from system locale otherwise.
+
+If you want your file to be decoded differently, add `# coding: ENCODING` to the beginning of the file (e.g. `# coding: shift-jis`). There must be no characters before that, even spaces or BOM.
+
+### Authentication with netrc
+
+You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every yt-dlp execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](https://stackoverflow.com/tags/.netrc/info) on a per-extractor basis. For that you will need to create a `.netrc` file in `--netrc-location` and restrict permissions to read/write by only you:
+```
+touch ${HOME}/.netrc
+chmod a-rwx,u+rw ${HOME}/.netrc
+```
+After that you can add credentials for an extractor in the following format, where *extractor* is the name of the extractor in lowercase:
+```
+machine <extractor> login <username> password <password>
+```
+E.g.
+```
+machine youtube login myaccount@gmail.com password my_youtube_password
+machine twitch login my_twitch_account_name password my_twitch_password
+```
+To activate authentication with the `.netrc` file you should pass `--netrc` to yt-dlp or place it in the [configuration file](#configuration).
+
+The default location of the .netrc file is `~` (see below).
+
+As an alternative to using the `.netrc` file, which has the disadvantage of keeping your passwords in a plain text file, you can configure a custom shell command to provide the credentials for an extractor. This is done by providing the `--netrc-cmd` parameter, it shall output the credentials in the netrc format and return `0` on success, other values will be treated as an error. `{}` in the command will be replaced by the name of the extractor to make it possible to select the credentials for the right extractor.
+
+E.g. To use an encrypted `.netrc` file stored as `.authinfo.gpg`
+```
+yt-dlp --netrc-cmd 'gpg --decrypt ~/.authinfo.gpg' https://www.youtube.com/watch?v=BaW_jenozKc
+```
+
+
+### Notes about environment variables
+* Environment variables are normally specified as `${VARIABLE}`/`$VARIABLE` on UNIX and `%VARIABLE%` on Windows; but is always shown as `${VARIABLE}` in this documentation
+* yt-dlp also allow using UNIX-style variables on Windows for path-like options; e.g. `--output`, `--config-location`
+* If unset, `${XDG_CONFIG_HOME}` defaults to `~/.config` and `${XDG_CACHE_HOME}` to `~/.cache`
+* On Windows, `~` points to `${HOME}` if present; or, `${USERPROFILE}` or `${HOMEDRIVE}${HOMEPATH}` otherwise
+* On Windows, `${USERPROFILE}` generally points to `C:\Users\<user name>` and `${APPDATA}` to `${USERPROFILE}\AppData\Roaming`
+
+# OUTPUT TEMPLATE
+
+The `-o` option is used to indicate a template for the output file names while `-P` option is used to specify the path each type of file should be saved to.
+
+<!-- MANPAGE: BEGIN EXCLUDED SECTION -->
+**tl;dr:** [navigate me to examples](#output-template-examples).
+<!-- MANPAGE: END EXCLUDED SECTION -->
+
+The simplest usage of `-o` is not to set any template arguments when downloading a single file, like in `yt-dlp -o funny_video.flv "https://some/video"` (hard-coding file extension like this is _not_ recommended and could break some post-processing).
+
+It may however also contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [Python string formatting operations](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting), e.g. `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations.
+
+The field names themselves (the part inside the parenthesis) can also have some special formatting:
+
+1. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a dot `.` separator; e.g. `%(tags.0)s`, `%(subtitles.en.-1.ext)s`. You can do Python slicing with colon `:`; E.g. `%(id.3:7:-1)s`, `%(formats.:.format_id)s`. Curly braces `{}` can be used to build dictionaries with only specific keys; e.g. `%(formats.:.{format_id,height})#j`. An empty field name `%()s` refers to the entire infodict; e.g. `%(.{id,title})s`. Note that all the fields that become available using this method are not listed below. Use `-j` to see such fields
+
+1. **Arithmetic**: Simple arithmetic can be done on numeric fields using `+`, `-` and `*`. E.g. `%(playlist_index+10)03d`, `%(n_entries+1-playlist_index)d`
+
+1. **Date/time Formatting**: Date/time fields can be formatted according to [strftime formatting](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) by specifying it separated from the field name using a `>`. E.g. `%(duration>%H-%M-%S)s`, `%(upload_date>%Y-%m-%d)s`, `%(epoch-3600>%H-%M-%S)s`
+
+1. **Alternatives**: Alternate fields can be specified separated with a `,`. E.g. `%(release_date>%Y,upload_date>%Y|Unknown)s`
+
+1. **Replacement**: A replacement value can be specified using a `&` separator according to the [`str.format` mini-language](https://docs.python.org/3/library/string.html#format-specification-mini-language). If the field is *not* empty, this replacement value will be used instead of the actual field content. This is done after alternate fields are considered; thus the replacement is used if *any* of the alternative fields is *not* empty. E.g. `%(chapters&has chapters|no chapters)s`, `%(title&TITLE={:>20}|NO TITLE)s`
+
+1. **Default**: A literal default value can be specified for when the field is empty using a `|` separator. This overrides `--output-na-placeholder`. E.g. `%(uploader|Unknown)s`
+
+1. **More Conversions**: In addition to the normal format types `diouxXeEfFgGcrs`, yt-dlp additionally supports converting to `B` = **B**ytes, `j` = **j**son (flag `#` for pretty-printing, `+` for Unicode), `h` = HTML escaping, `l` = a comma separated **l**ist (flag `#` for `\n` newline-separated), `q` = a string **q**uoted for the terminal (flag `#` to split a list into different arguments), `D` = add **D**ecimal suffixes (e.g. 10M) (flag `#` to use 1024 as factor), and `S` = **S**anitize as filename (flag `#` for restricted)
+
+1. **Unicode normalization**: The format type `U` can be used for NFC [Unicode normalization](https://docs.python.org/3/library/unicodedata.html#unicodedata.normalize). The alternate form flag (`#`) changes the normalization to NFD and the conversion flag `+` can be used for NFKC/NFKD compatibility equivalence normalization. E.g. `%(title)+.100U` is NFKC
+
+To summarize, the general syntax for a field is:
+```
+%(name[.keys][addition][>strf][,alternate][&replacement][|default])[flags][width][.precision][length]type
+```
+
+Additionally, you can set different output templates for the various metadata files separately from the general output template by specifying the type of file followed by the template separated by a colon `:`. The different file types supported are `subtitle`, `thumbnail`, `description`, `annotation` (deprecated), `infojson`, `link`, `pl_thumbnail`, `pl_description`, `pl_infojson`, `chapter`, `pl_video`. E.g. `-o "%(title)s.%(ext)s" -o "thumbnail:%(title)s\%(title)s.%(ext)s"` will put the thumbnails in a folder with the same name as the video. If any of the templates is empty, that type of file will not be written. E.g. `--write-thumbnail -o "thumbnail:"` will write thumbnails only for playlists and not for video.
+
+<a id="outtmpl-postprocess-note"></a>
+
+**Note**: Due to post-processing (i.e. merging etc.), the actual output filename might differ. Use `--print after_move:filepath` to get the name after all post-processing is complete.
+
+The available fields are:
+
+ - `id` (string): Video identifier
+ - `title` (string): Video title
+ - `fulltitle` (string): Video title ignoring live timestamp and generic title
+ - `ext` (string): Video filename extension
+ - `alt_title` (string): A secondary title of the video
+ - `description` (string): The description of the video
+ - `display_id` (string): An alternative identifier for the video
+ - `uploader` (string): Full name of the video uploader
+ - `uploader_id` (string): Nickname or id of the video uploader
+ - `uploader_url` (string): URL to the video uploader's profile
+ - `license` (string): License name the video is licensed under
+ - `creators` (list): The creators of the video
+ - `creator` (string): The creators of the video; comma-separated
+ - `timestamp` (numeric): UNIX timestamp of the moment the video became available
+ - `upload_date` (string): Video upload date in UTC (YYYYMMDD)
+ - `release_timestamp` (numeric): UNIX timestamp of the moment the video was released
+ - `release_date` (string): The date (YYYYMMDD) when the video was released in UTC
+ - `release_year` (numeric): Year (YYYY) when the video or album was released
+ - `modified_timestamp` (numeric): UNIX timestamp of the moment the video was last modified
+ - `modified_date` (string): The date (YYYYMMDD) when the video was last modified in UTC
+ - `channel` (string): Full name of the channel the video is uploaded on
+ - `channel_id` (string): Id of the channel
+ - `channel_url` (string): URL of the channel
+ - `channel_follower_count` (numeric): Number of followers of the channel
+ - `channel_is_verified` (boolean): Whether the channel is verified on the platform
+ - `location` (string): Physical location where the video was filmed
+ - `duration` (numeric): Length of the video in seconds
+ - `duration_string` (string): Length of the video (HH:mm:ss)
+ - `view_count` (numeric): How many users have watched the video on the platform
+ - `concurrent_view_count` (numeric): How many users are currently watching the video on the platform.
+ - `like_count` (numeric): Number of positive ratings of the video
+ - `dislike_count` (numeric): Number of negative ratings of the video
+ - `repost_count` (numeric): Number of reposts of the video
+ - `average_rating` (numeric): Average rating give by users, the scale used depends on the webpage
+ - `comment_count` (numeric): Number of comments on the video (For some extractors, comments are only downloaded at the end, and so this field cannot be used)
+ - `age_limit` (numeric): Age restriction for the video (years)
+ - `live_status` (string): One of "not_live", "is_live", "is_upcoming", "was_live", "post_live" (was live, but VOD is not yet processed)
+ - `is_live` (boolean): Whether this video is a live stream or a fixed-length video
+ - `was_live` (boolean): Whether this video was originally a live stream
+ - `playable_in_embed` (string): Whether this video is allowed to play in embedded players on other sites
+ - `availability` (string): Whether the video is "private", "premium_only", "subscriber_only", "needs_auth", "unlisted" or "public"
+ - `media_type` (string): The type of media as classified by the site, e.g. "episode", "clip", "trailer"
+ - `start_time` (numeric): Time in seconds where the reproduction should start, as specified in the URL
+ - `end_time` (numeric): Time in seconds where the reproduction should end, as specified in the URL
+ - `extractor` (string): Name of the extractor
+ - `extractor_key` (string): Key name of the extractor
+ - `epoch` (numeric): Unix epoch of when the information extraction was completed
+ - `autonumber` (numeric): Number that will be increased with each download, starting at `--autonumber-start`, padded with leading zeros to 5 digits
+ - `video_autonumber` (numeric): Number that will be increased with each video
+ - `n_entries` (numeric): Total number of extracted items in the playlist
+ - `playlist_id` (string): Identifier of the playlist that contains the video
+ - `playlist_title` (string): Name of the playlist that contains the video
+ - `playlist` (string): `playlist_id` or `playlist_title`
+ - `playlist_count` (numeric): Total number of items in the playlist. May not be known if entire playlist is not extracted
+ - `playlist_index` (numeric): Index of the video in the playlist padded with leading zeros according the final index
+ - `playlist_autonumber` (numeric): Position of the video in the playlist download queue padded with leading zeros according to the total length of the playlist
+ - `playlist_uploader` (string): Full name of the playlist uploader
+ - `playlist_uploader_id` (string): Nickname or id of the playlist uploader
+ - `webpage_url` (string): A URL to the video webpage which if given to yt-dlp should allow to get the same result again
+ - `webpage_url_basename` (string): The basename of the webpage URL
+ - `webpage_url_domain` (string): The domain of the webpage URL
+ - `original_url` (string): The URL given by the user (or same as `webpage_url` for playlist entries)
+ - `categories` (list): List of categories the video belongs to
+ - `tags` (list): List of tags assigned to the video
+ - `cast` (list): List of cast members
+
+All the fields in [Filtering Formats](#filtering-formats) can also be used
+
+Available for the video that belongs to some logical chapter or section:
+
+ - `chapter` (string): Name or title of the chapter the video belongs to
+ - `chapter_number` (numeric): Number of the chapter the video belongs to
+ - `chapter_id` (string): Id of the chapter the video belongs to
+
+Available for the video that is an episode of some series or programme:
+
+ - `series` (string): Title of the series or programme the video episode belongs to
+ - `series_id` (string): Id of the series or programme the video episode belongs to
+ - `season` (string): Title of the season the video episode belongs to
+ - `season_number` (numeric): Number of the season the video episode belongs to
+ - `season_id` (string): Id of the season the video episode belongs to
+ - `episode` (string): Title of the video episode
+ - `episode_number` (numeric): Number of the video episode within a season
+ - `episode_id` (string): Id of the video episode
+
+Available for the media that is a track or a part of a music album:
+
+ - `track` (string): Title of the track
+ - `track_number` (numeric): Number of the track within an album or a disc
+ - `track_id` (string): Id of the track
+ - `artists` (list): Artist(s) of the track
+ - `artist` (string): Artist(s) of the track; comma-separated
+ - `genres` (list): Genre(s) of the track
+ - `genre` (string): Genre(s) of the track; comma-separated
+ - `composers` (list): Composer(s) of the piece
+ - `composer` (string): Composer(s) of the piece; comma-separated
+ - `album` (string): Title of the album the track belongs to
+ - `album_type` (string): Type of the album
+ - `album_artists` (list): All artists appeared on the album
+ - `album_artist` (string): All artists appeared on the album; comma-separated
+ - `disc_number` (numeric): Number of the disc or other physical medium the track belongs to
+
+Available only when using `--download-sections` and for `chapter:` prefix when using `--split-chapters` for videos with internal chapters:
+
+ - `section_title` (string): Title of the chapter
+ - `section_number` (numeric): Number of the chapter within the file
+ - `section_start` (numeric): Start time of the chapter in seconds
+ - `section_end` (numeric): End time of the chapter in seconds
+
+Available only when used in `--print`:
+
+ - `urls` (string): The URLs of all requested formats, one in each line
+ - `filename` (string): Name of the video file. Note that the [actual filename may differ](#outtmpl-postprocess-note)
+ - `formats_table` (table): The video format table as printed by `--list-formats`
+ - `thumbnails_table` (table): The thumbnail format table as printed by `--list-thumbnails`
+ - `subtitles_table` (table): The subtitle format table as printed by `--list-subs`
+ - `automatic_captions_table` (table): The automatic subtitle format table as printed by `--list-subs`
+
+ Available only after the video is downloaded (`post_process`/`after_move`):
+
+ - `filepath`: Actual path of downloaded video file
+
+Available only in `--sponsorblock-chapter-title`:
+
+ - `start_time` (numeric): Start time of the chapter in seconds
+ - `end_time` (numeric): End time of the chapter in seconds
+ - `categories` (list): The [SponsorBlock categories](https://wiki.sponsor.ajay.app/w/Types#Category) the chapter belongs to
+ - `category` (string): The smallest SponsorBlock category the chapter belongs to
+ - `category_names` (list): Friendly names of the categories
+ - `name` (string): Friendly name of the smallest category
+ - `type` (string): The [SponsorBlock action type](https://wiki.sponsor.ajay.app/w/Types#Action_Type) of the chapter
+
+Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. E.g. for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `yt-dlp test video` and id `BaW_jenozKc`, this will result in a `yt-dlp test video-BaW_jenozKc.mp4` file created in the current directory.
+
+**Note**: Some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with placeholder value provided with `--output-na-placeholder` (`NA` by default).
+
+**Tip**: Look at the `-j` output to identify which fields are available for the particular URL
+
+For numeric sequences you can use [numeric related formatting](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting); e.g. `%(view_count)05d` will result in a string with view count padded with zeros up to 5 characters, like in `00042`.
+
+Output templates can also contain arbitrary hierarchical path, e.g. `-o "%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s"` which will result in downloading each video in a directory corresponding to this path template. Any missing directory will be automatically created for you.
+
+To use percent literals in an output template use `%%`. To output to stdout use `-o -`.
+
+The current default template is `%(title)s [%(id)s].%(ext)s`.
+
+In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title.
+
+#### Output template examples
+
+```bash
+$ yt-dlp --print filename -o "test video.%(ext)s" BaW_jenozKc
+test video.webm # Literal name with correct extension
+
+$ yt-dlp --print filename -o "%(title)s.%(ext)s" BaW_jenozKc
+youtube-dl test video ''_ä↭𝕐.webm # All kinds of weird characters
+
+$ yt-dlp --print filename -o "%(title)s.%(ext)s" BaW_jenozKc --restrict-filenames
+youtube-dl_test_video_.webm # Restricted file name
+
+# Download YouTube playlist videos in separate directory indexed by video order in a playlist
+$ yt-dlp -o "%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s" "https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re"
+
+# Download YouTube playlist videos in separate directories according to their uploaded year
+$ yt-dlp -o "%(upload_date>%Y)s/%(title)s.%(ext)s" "https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re"
+
+# Prefix playlist index with " - " separator, but only if it is available
+$ yt-dlp -o "%(playlist_index&{} - |)s%(title)s.%(ext)s" BaW_jenozKc "https://www.youtube.com/user/TheLinuxFoundation/playlists"
+
+# Download all playlists of YouTube channel/user keeping each playlist in separate directory:
+$ yt-dlp -o "%(uploader)s/%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s" "https://www.youtube.com/user/TheLinuxFoundation/playlists"
+
+# Download Udemy course keeping each chapter in separate directory under MyVideos directory in your home
+$ yt-dlp -u user -p password -P "~/MyVideos" -o "%(playlist)s/%(chapter_number)s - %(chapter)s/%(title)s.%(ext)s" "https://www.udemy.com/java-tutorial"
+
+# Download entire series season keeping each series and each season in separate directory under C:/MyVideos
+$ yt-dlp -P "C:/MyVideos" -o "%(series)s/%(season_number)s - %(season)s/%(episode_number)s - %(episode)s.%(ext)s" "https://videomore.ru/kino_v_detalayah/5_sezon/367617"
+
+# Download video as "C:\MyVideos\uploader\title.ext", subtitles as "C:\MyVideos\subs\uploader\title.ext"
+# and put all temporary files in "C:\MyVideos\tmp"
+$ yt-dlp -P "C:/MyVideos" -P "temp:tmp" -P "subtitle:subs" -o "%(uploader)s/%(title)s.%(ext)s" BaW_jenoz --write-subs
+
+# Download video as "C:\MyVideos\uploader\title.ext" and subtitles as "C:\MyVideos\uploader\subs\title.ext"
+$ yt-dlp -P "C:/MyVideos" -o "%(uploader)s/%(title)s.%(ext)s" -o "subtitle:%(uploader)s/subs/%(title)s.%(ext)s" BaW_jenozKc --write-subs
+
+# Stream the video being downloaded to stdout
+$ yt-dlp -o - BaW_jenozKc
+```
+
+# FORMAT SELECTION
+
+By default, yt-dlp tries to download the best available quality if you **don't** pass any options.
+This is generally equivalent to using `-f bestvideo*+bestaudio/best`. However, if multiple audiostreams is enabled (`--audio-multistreams`), the default format changes to `-f bestvideo+bestaudio/best`. Similarly, if ffmpeg is unavailable, or if you use yt-dlp to stream to `stdout` (`-o -`), the default becomes `-f best/bestvideo+bestaudio`.
+
+**Deprecation warning**: Latest versions of yt-dlp can stream multiple formats to the stdout simultaneously using ffmpeg. So, in future versions, the default for this will be set to `-f bv*+ba/b` similar to normal downloads. If you want to preserve the `-f b/bv+ba` setting, it is recommended to explicitly specify it in the configuration options.
+
+The general syntax for format selection is `-f FORMAT` (or `--format FORMAT`) where `FORMAT` is a *selector expression*, i.e. an expression that describes format or formats you would like to download.
+
+<!-- MANPAGE: BEGIN EXCLUDED SECTION -->
+**tl;dr:** [navigate me to examples](#format-selection-examples).
+<!-- MANPAGE: END EXCLUDED SECTION -->
+
+The simplest case is requesting a specific format; e.g. with `-f 22` you can download the format with format code equal to 22. You can get the list of available format codes for particular video using `--list-formats` or `-F`. Note that these format codes are extractor specific.
+
+You can also use a file extension (currently `3gp`, `aac`, `flv`, `m4a`, `mp3`, `mp4`, `ogg`, `wav`, `webm` are supported) to download the best quality format of a particular file extension served as a single file, e.g. `-f webm` will download the best quality format with the `webm` extension served as a single file.
+
+You can use `-f -` to interactively provide the format selector *for each video*
+
+You can also use special names to select particular edge case formats:
+
+ - `all`: Select **all formats** separately
+ - `mergeall`: Select and **merge all formats** (Must be used with `--audio-multistreams`, `--video-multistreams` or both)
+ - `b*`, `best*`: Select the best quality format that **contains either** a video or an audio or both (ie; `vcodec!=none or acodec!=none`)
+ - `b`, `best`: Select the best quality format that **contains both** video and audio. Equivalent to `best*[vcodec!=none][acodec!=none]`
+ - `bv`, `bestvideo`: Select the best quality **video-only** format. Equivalent to `best*[acodec=none]`
+ - `bv*`, `bestvideo*`: Select the best quality format that **contains video**. It may also contain audio. Equivalent to `best*[vcodec!=none]`
+ - `ba`, `bestaudio`: Select the best quality **audio-only** format. Equivalent to `best*[vcodec=none]`
+ - `ba*`, `bestaudio*`: Select the best quality format that **contains audio**. It may also contain video. Equivalent to `best*[acodec!=none]` ([Do not use!](https://github.com/yt-dlp/yt-dlp/issues/979#issuecomment-919629354))
+ - `w*`, `worst*`: Select the worst quality format that contains either a video or an audio
+ - `w`, `worst`: Select the worst quality format that contains both video and audio. Equivalent to `worst*[vcodec!=none][acodec!=none]`
+ - `wv`, `worstvideo`: Select the worst quality video-only format. Equivalent to `worst*[acodec=none]`
+ - `wv*`, `worstvideo*`: Select the worst quality format that contains video. It may also contain audio. Equivalent to `worst*[vcodec!=none]`
+ - `wa`, `worstaudio`: Select the worst quality audio-only format. Equivalent to `worst*[vcodec=none]`
+ - `wa*`, `worstaudio*`: Select the worst quality format that contains audio. It may also contain video. Equivalent to `worst*[acodec!=none]`
+
+For example, to download the worst quality video-only format you can use `-f worstvideo`. It is however recommended not to use `worst` and related options. When your format selector is `worst`, the format which is worst in all respects is selected. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-S +size` or more rigorously, `-S +size,+br,+res,+fps` instead of `-f worst`. See [Sorting Formats](#sorting-formats) for more details.
+
+You can select the n'th best format of a type by using `best<type>.<n>`. For example, `best.2` will select the 2nd best combined format. Similarly, `bv*.3` will select the 3rd best format that contains a video stream.
+
+If you want to download multiple videos, and they don't have the same formats available, you can specify the order of preference using slashes. Note that formats on the left hand side are preferred; e.g. `-f 22/17/18` will download format 22 if it's available, otherwise it will download format 17 if it's available, otherwise it will download format 18 if it's available, otherwise it will complain that no suitable formats are available for download.
+
+If you want to download several formats of the same video use a comma as a separator, e.g. `-f 22,17,18` will download all these three formats, of course if they are available. Or a more sophisticated example combined with the precedence feature: `-f 136/137/mp4/bestvideo,140/m4a/bestaudio`.
+
+You can merge the video and audio of multiple formats into a single file using `-f <format1>+<format2>+...` (requires ffmpeg installed); e.g. `-f bestvideo+bestaudio` will download the best video-only format, the best audio-only format and mux them together with ffmpeg.
+
+**Deprecation warning**: Since the *below* described behavior is complex and counter-intuitive, this will be removed and multistreams will be enabled by default in the future. A new operator will be instead added to limit formats to single audio/video
+
+Unless `--video-multistreams` is used, all formats with a video stream except the first one are ignored. Similarly, unless `--audio-multistreams` is used, all formats with an audio stream except the first one are ignored. E.g. `-f bestvideo+best+bestaudio --video-multistreams --audio-multistreams` will download and merge all 3 given formats. The resulting file will have 2 video streams and 2 audio streams. But `-f bestvideo+best+bestaudio --no-video-multistreams` will download and merge only `bestvideo` and `bestaudio`. `best` is ignored since another format containing a video stream (`bestvideo`) has already been selected. The order of the formats is therefore important. `-f best+bestaudio --no-audio-multistreams` will download only `best` while `-f bestaudio+best --no-audio-multistreams` will ignore `best` and download only `bestaudio`.
+
+## Filtering Formats
+
+You can also filter the video formats by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"` since filters without a selector are interpreted as `best`).
+
+The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, `>=`, `=` (equals), `!=` (not equals):
+
+ - `filesize`: The number of bytes, if known in advance
+ - `filesize_approx`: An estimate for the number of bytes
+ - `width`: Width of the video, if known
+ - `height`: Height of the video, if known
+ - `aspect_ratio`: Aspect ratio of the video, if known
+ - `tbr`: Average bitrate of audio and video in KBit/s
+ - `abr`: Average audio bitrate in KBit/s
+ - `vbr`: Average video bitrate in KBit/s
+ - `asr`: Audio sampling rate in Hertz
+ - `fps`: Frame rate
+ - `audio_channels`: The number of audio channels
+ - `stretched_ratio`: `width:height` of the video's pixels, if not square
+
+Also filtering work for comparisons `=` (equals), `^=` (starts with), `$=` (ends with), `*=` (contains), `~=` (matches regex) and following string meta fields:
+
+ - `url`: Video URL
+ - `ext`: File extension
+ - `acodec`: Name of the audio codec in use
+ - `vcodec`: Name of the video codec in use
+ - `container`: Name of the container format
+ - `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `http_dash_segments`, `m3u8`, or `m3u8_native`)
+ - `language`: Language code
+ - `dynamic_range`: The dynamic range of the video
+ - `format_id`: A short description of the format
+ - `format`: A human-readable description of the format
+ - `format_note`: Additional info about the format
+ - `resolution`: Textual description of width and height
+
+Any string comparison may be prefixed with negation `!` in order to produce an opposite comparison, e.g. `!*=` (does not contain). The comparand of a string comparison needs to be quoted with either double or single quotes if it contains spaces or special characters other than `._-`.
+
+**Note**: None of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the website. Any other field made available by the extractor can also be used for filtering.
+
+Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "bv[height<=?720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. You can also use the filters with `all` to download all formats that satisfy the filter, e.g. `-f "all[vcodec=none]"` selects all audio-only formats.
+
+Format selectors can also be grouped using parentheses; e.g. `-f "(mp4,webm)[height<480]"` will download the best pre-merged mp4 and webm formats with a height lower than 480.
+
+## Sorting Formats
+
+You can change the criteria for being considered the `best` by using `-S` (`--format-sort`). The general format for this is `--format-sort field1,field2...`.
+
+The available fields are:
+
+ - `hasvid`: Gives priority to formats that have a video stream
+ - `hasaud`: Gives priority to formats that have an audio stream
+ - `ie_pref`: The format preference
+ - `lang`: The language preference
+ - `quality`: The quality of the format
+ - `source`: The preference of the source
+ - `proto`: Protocol used for download (`https`/`ftps` > `http`/`ftp` > `m3u8_native`/`m3u8` > `http_dash_segments`> `websocket_frag` > `mms`/`rtsp` > `f4f`/`f4m`)
+ - `vcodec`: Video Codec (`av01` > `vp9.2` > `vp9` > `h265` > `h264` > `vp8` > `h263` > `theora` > other)
+ - `acodec`: Audio Codec (`flac`/`alac` > `wav`/`aiff` > `opus` > `vorbis` > `aac` > `mp4a` > `mp3` > `ac4` > `eac3` > `ac3` > `dts` > other)
+ - `codec`: Equivalent to `vcodec,acodec`
+ - `vext`: Video Extension (`mp4` > `mov` > `webm` > `flv` > other). If `--prefer-free-formats` is used, `webm` is preferred.
+ - `aext`: Audio Extension (`m4a` > `aac` > `mp3` > `ogg` > `opus` > `webm` > other). If `--prefer-free-formats` is used, the order changes to `ogg` > `opus` > `webm` > `mp3` > `m4a` > `aac`
+ - `ext`: Equivalent to `vext,aext`
+ - `filesize`: Exact filesize, if known in advance
+ - `fs_approx`: Approximate filesize
+ - `size`: Exact filesize if available, otherwise approximate filesize
+ - `height`: Height of video
+ - `width`: Width of video
+ - `res`: Video resolution, calculated as the smallest dimension.
+ - `fps`: Framerate of video
+ - `hdr`: The dynamic range of the video (`DV` > `HDR12` > `HDR10+` > `HDR10` > `HLG` > `SDR`)
+ - `channels`: The number of audio channels
+ - `tbr`: Total average bitrate in KBit/s
+ - `vbr`: Average video bitrate in KBit/s
+ - `abr`: Average audio bitrate in KBit/s
+ - `br`: Average bitrate in KBit/s, `tbr`/`vbr`/`abr`
+ - `asr`: Audio sample rate in Hz
+
+**Deprecation warning**: Many of these fields have (currently undocumented) aliases, that may be removed in a future version. It is recommended to use only the documented field names.
+
+All fields, unless specified otherwise, are sorted in descending order. To reverse this, prefix the field with a `+`. E.g. `+res` prefers format with the smallest resolution. Additionally, you can suffix a preferred value for the fields, separated by a `:`. E.g. `res:720` prefers larger videos, but no larger than 720p and the smallest video if there are no videos less than 720p. For `codec` and `ext`, you can provide two preferred values, the first for video and the second for audio. E.g. `+codec:avc:m4a` (equivalent to `+vcodec:avc,+acodec:m4a`) sets the video codec preference to `h264` > `h265` > `vp9` > `vp9.2` > `av01` > `vp8` > `h263` > `theora` and audio codec preference to `mp4a` > `aac` > `vorbis` > `opus` > `mp3` > `ac3` > `dts`. You can also make the sorting prefer the nearest values to the provided by using `~` as the delimiter. E.g. `filesize~1G` prefers the format with filesize closest to 1 GiB.
+
+The fields `hasvid` and `ie_pref` are always given highest priority in sorting, irrespective of the user-defined order. This behaviour can be changed by using `--format-sort-force`. Apart from these, the default order used is: `lang,quality,res,fps,hdr:12,vcodec:vp9.2,channels,acodec,size,br,asr,proto,ext,hasaud,source,id`. The extractors may override this default order, but they cannot override the user-provided order.
+
+Note that the default has `vcodec:vp9.2`; i.e. `av1` is not preferred. Similarly, the default for hdr is `hdr:12`; i.e. dolby vision is not preferred. These choices are made since DV and AV1 formats are not yet fully compatible with most devices. This may be changed in the future as more devices become capable of smoothly playing back these formats.
+
+If your format selector is `worst`, the last item is selected after sorting. This means it will select the format that is worst in all respects. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-f best -S +size,+br,+res,+fps`.
+
+**Tip**: You can use the `-v -F` to see how the formats have been sorted (worst to best).
+
+## Format Selection examples
+
+```bash
+# Download and merge the best video-only format and the best audio-only format,
+# or download the best combined format if video-only format is not available
+$ yt-dlp -f "bv+ba/b"
+
+# Download best format that contains video,
+# and if it doesn't already have an audio stream, merge it with best audio-only format
+$ yt-dlp -f "bv*+ba/b"
+
+# Same as above
+$ yt-dlp
+
+# Download the best video-only format and the best audio-only format without merging them
+# For this case, an output template should be used since
+# by default, bestvideo and bestaudio will have the same file name.
+$ yt-dlp -f "bv,ba" -o "%(title)s.f%(format_id)s.%(ext)s"
+
+# Download and merge the best format that has a video stream,
+# and all audio-only formats into one file
+$ yt-dlp -f "bv*+mergeall[vcodec=none]" --audio-multistreams
+
+# Download and merge the best format that has a video stream,
+# and the best 2 audio-only formats into one file
+$ yt-dlp -f "bv*+ba+ba.2" --audio-multistreams
+
+
+# The following examples show the old method (without -S) of format selection
+# and how to use -S to achieve a similar but (generally) better result
+
+# Download the worst video available (old method)
+$ yt-dlp -f "wv*+wa/w"
+
+# Download the best video available but with the smallest resolution
+$ yt-dlp -S "+res"
+
+# Download the smallest video available
+$ yt-dlp -S "+size,+br"
+
+
+
+# Download the best mp4 video available, or the best video if no mp4 available
+$ yt-dlp -f "bv*[ext=mp4]+ba[ext=m4a]/b[ext=mp4] / bv*+ba/b"
+
+# Download the best video with the best extension
+# (For video, mp4 > mov > webm > flv. For audio, m4a > aac > mp3 ...)
+$ yt-dlp -S "ext"
+
+
+
+# Download the best video available but no better than 480p,
+# or the worst video if there is no video under 480p
+$ yt-dlp -f "bv*[height<=480]+ba/b[height<=480] / wv*+ba/w"
+
+# Download the best video available with the largest height but no better than 480p,
+# or the best video with the smallest resolution if there is no video under 480p
+$ yt-dlp -S "height:480"
+
+# Download the best video available with the largest resolution but no better than 480p,
+# or the best video with the smallest resolution if there is no video under 480p
+# Resolution is determined by using the smallest dimension.
+# So this works correctly for vertical videos as well
+$ yt-dlp -S "res:480"
+
+
+
+# Download the best video (that also has audio) but no bigger than 50 MB,
+# or the worst video (that also has audio) if there is no video under 50 MB
+$ yt-dlp -f "b[filesize<50M] / w"
+
+# Download largest video (that also has audio) but no bigger than 50 MB,
+# or the smallest video (that also has audio) if there is no video under 50 MB
+$ yt-dlp -f "b" -S "filesize:50M"
+
+# Download best video (that also has audio) that is closest in size to 50 MB
+$ yt-dlp -f "b" -S "filesize~50M"
+
+
+
+# Download best video available via direct link over HTTP/HTTPS protocol,
+# or the best video available via any protocol if there is no such video
+$ yt-dlp -f "(bv*+ba/b)[protocol^=http][protocol!*=dash] / (bv*+ba/b)"
+
+# Download best video available via the best protocol
+# (https/ftps > http/ftp > m3u8_native > m3u8 > http_dash_segments ...)
+$ yt-dlp -S "proto"
+
+
+
+# Download the best video with either h264 or h265 codec,
+# or the best video if there is no such video
+$ yt-dlp -f "(bv*[vcodec~='^((he|a)vc|h26[45])']+ba) / (bv*+ba/b)"
+
+# Download the best video with best codec no better than h264,
+# or the best video with worst codec if there is no such video
+$ yt-dlp -S "codec:h264"
+
+# Download the best video with worst codec no worse than h264,
+# or the best video with best codec if there is no such video
+$ yt-dlp -S "+codec:h264"
+
+
+
+# More complex examples
+
+# Download the best video no better than 720p preferring framerate greater than 30,
+# or the worst video (still preferring framerate greater than 30) if there is no such video
+$ yt-dlp -f "((bv*[fps>30]/bv*)[height<=720]/(wv*[fps>30]/wv*)) + ba / (b[fps>30]/b)[height<=720]/(w[fps>30]/w)"
+
+# Download the video with the largest resolution no better than 720p,
+# or the video with the smallest resolution available if there is no such video,
+# preferring larger framerate for formats with the same resolution
+$ yt-dlp -S "res:720,fps"
+
+
+
+# Download the video with smallest resolution no worse than 480p,
+# or the video with the largest resolution available if there is no such video,
+# preferring better codec and then larger total bitrate for the same resolution
+$ yt-dlp -S "+res:480,codec,br"
+```
+
+# MODIFYING METADATA
+
+The metadata obtained by the extractors can be modified by using `--parse-metadata` and `--replace-in-metadata`
+
+`--replace-in-metadata FIELDS REGEX REPLACE` is used to replace text in any metadata field using [Python regular expression](https://docs.python.org/3/library/re.html#regular-expression-syntax). [Backreferences](https://docs.python.org/3/library/re.html?highlight=backreferences#re.sub) can be used in the replace string for advanced use.
+
+The general syntax of `--parse-metadata FROM:TO` is to give the name of a field or an [output template](#output-template) to extract data from, and the format to interpret it as, separated by a colon `:`. Either a [Python regular expression](https://docs.python.org/3/library/re.html#regular-expression-syntax) with named capture groups, a single field name, or a similar syntax to the [output template](#output-template) (only `%(field)s` formatting is supported) can be used for `TO`. The option can be used multiple times to parse and modify various fields.
+
+Note that these options preserve their relative order, allowing replacements to be made in parsed fields and viceversa. Also, any field thus created can be used in the [output template](#output-template) and will also affect the media file's metadata added when using `--embed-metadata`.
+
+This option also has a few special uses:
+
+* You can download an additional URL based on the metadata of the currently downloaded video. To do this, set the field `additional_urls` to the URL that you want to download. E.g. `--parse-metadata "description:(?P<additional_urls>https?://www\.vimeo\.com/\d+)"` will download the first vimeo video found in the description
+
+* You can use this to change the metadata that is embedded in the media file. To do this, set the value of the corresponding field with a `meta_` prefix. For example, any value you set to `meta_description` field will be added to the `description` field in the file - you can use this to set a different "description" and "synopsis". To modify the metadata of individual streams, use the `meta<n>_` prefix (e.g. `meta1_language`). Any value set to the `meta_` field will overwrite all default values.
+
+**Note**: Metadata modification happens before format selection, post-extraction and other post-processing operations. Some fields may be added or changed during these steps, overriding your changes.
+
+For reference, these are the fields yt-dlp adds by default to the file metadata:
+
+Metadata fields | From
+:--------------------------|:------------------------------------------------
+`title` | `track` or `title`
+`date` | `upload_date`
+`description`, `synopsis` | `description`
+`purl`, `comment` | `webpage_url`
+`track` | `track_number`
+`artist` | `artist`, `artists`, `creator`, `creators`, `uploader` or `uploader_id`
+`composer` | `composer` or `composers`
+`genre` | `genre` or `genres`
+`album` | `album`
+`album_artist` | `album_artist` or `album_artists`
+`disc` | `disc_number`
+`show` | `series`
+`season_number` | `season_number`
+`episode_id` | `episode` or `episode_id`
+`episode_sort` | `episode_number`
+`language` of each stream | the format's `language`
+
+**Note**: The file format may not support some of these fields
+
+
+## Modifying metadata examples
+
+```bash
+# Interpret the title as "Artist - Title"
+$ yt-dlp --parse-metadata "title:%(artist)s - %(title)s"
+
+# Regex example
+$ yt-dlp --parse-metadata "description:Artist - (?P<artist>.+)"
+
+# Set title as "Series name S01E05"
+$ yt-dlp --parse-metadata "%(series)s S%(season_number)02dE%(episode_number)02d:%(title)s"
+
+# Prioritize uploader as the "artist" field in video metadata
+$ yt-dlp --parse-metadata "%(uploader|)s:%(meta_artist)s" --embed-metadata
+
+# Set "comment" field in video metadata using description instead of webpage_url,
+# handling multiple lines correctly
+$ yt-dlp --parse-metadata "description:(?s)(?P<meta_comment>.+)" --embed-metadata
+
+# Do not set any "synopsis" in the video metadata
+$ yt-dlp --parse-metadata ":(?P<meta_synopsis>)"
+
+# Remove "formats" field from the infojson by setting it to an empty string
+$ yt-dlp --parse-metadata "video::(?P<formats>)" --write-info-json
+
+# Replace all spaces and "_" in title and uploader with a `-`
+$ yt-dlp --replace-in-metadata "title,uploader" "[ _]" "-"
+
+```
+
+# EXTRACTOR ARGUMENTS
+
+Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) separated string of `ARG=VAL1,VAL2`. E.g. `--extractor-args "youtube:player-client=android_embedded,web;include_live_dash" --extractor-args "funimation:version=uncut"`
+
+Note: In CLI, `ARG` can use `-` instead of `_`; e.g. `youtube:player-client"` becomes `youtube:player_client"`
+
+The following extractors use this feature:
+
+#### youtube
+* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes
+* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
+* `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb`, `mweb_embedscreen` and `tv_embedded` (agegate bypass) with no variants. By default, `ios,android,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients.
+* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details
+* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
+* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
+* `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread`. Default is `all,all,all,all`
+ * E.g. `all,all,1000,10` will get a maximum of 1000 replies total, with up to 10 replies per thread. `1000,all,100` will get a maximum of 1000 comments, with a maximum of 100 replies total
+* `formats`: Change the types of formats to return. `dashy` (convert HTTP to DASH), `duplicate` (identical content but different URLs or protocol; includes `dashy`), `incomplete` (cannot be downloaded completely - live dash and post-live m3u8)
+* `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others
+* `innertube_key`: Innertube API key to use for all API requests
+* `raise_incomplete_data`: `Incomplete Data Received` raises an error instead of reporting a warning
+
+#### youtubetab (YouTube playlists, channels, feeds, etc.)
+* `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details)
+* `approximate_date`: Extract approximate `upload_date` and `timestamp` in flat-playlist. This may cause date-based filters to be slightly off
+
+#### generic
+* `fragment_query`: Passthrough any query in mpd/m3u8 manifest URLs to their fragments if no value is provided, or else apply the query string given as `fragment_query=VALUE`. Does not apply to ffmpeg
+* `variant_query`: Passthrough the master m3u8 URL query to its variant playlist URLs if no value is provided, or else apply the query string given as `variant_query=VALUE`
+* `hls_key`: An HLS AES-128 key URI *or* key (as hex), and optionally the IV (as hex), in the form of `(URI|KEY)[,IV]`; e.g. `generic:hls_key=ABCDEF1234567980,0xFEDCBA0987654321`. Passing any of these values will force usage of the native HLS downloader and override the corresponding values found in the m3u8 playlist
+* `is_live`: Bypass live HLS detection and manually set `live_status` - a value of `false` will set `not_live`, any other value (or no value) will set `is_live`
+
+#### funimation
+* `language`: Audio languages to extract, e.g. `funimation:language=english,japanese`
+* `version`: The video version to extract - `uncut` or `simulcast`
+
+#### crunchyrollbeta (Crunchyroll)
+* `format`: Which stream type(s) to extract (default: `adaptive_hls`). Potentially useful values include `adaptive_hls`, `adaptive_dash`, `vo_adaptive_hls`, `vo_adaptive_dash`, `download_hls`, `download_dash`, `multitrack_adaptive_hls_v2`
+* `hardsub`: Preference order for which hardsub versions to extract, or `all` (default: `None` = no hardsubs), e.g. `crunchyrollbeta:hardsub=en-US,None`
+
+#### vikichannel
+* `video_types`: Types of videos to download - one or more of `episodes`, `movies`, `clips`, `trailers`
+
+#### niconico
+* `segment_duration`: Segment duration in milliseconds for HLS-DMC formats. Use it at your own risk since this feature **may result in your account termination.**
+
+#### youtubewebarchive
+* `check_all`: Try to check more at the cost of more requests. One or more of `thumbnails`, `captures`
+
+#### gamejolt
+* `comment_sort`: `hot` (default), `you` (cookies needed), `top`, `new` - choose comment sorting mode (on GameJolt's side)
+
+#### hotstar
+* `res`: resolution to ignore - one or more of `sd`, `hd`, `fhd`
+* `vcodec`: vcodec to ignore - one or more of `h264`, `h265`, `dvh265`
+* `dr`: dynamic range to ignore - one or more of `sdr`, `hdr10`, `dv`
+
+#### niconicochannelplus
+* `max_comments`: Maximum number of comments to extract - default is `120`
+
+#### tiktok
+* `api_hostname`: Hostname to use for mobile API requests, e.g. `api-h2.tiktokv.com`
+* `app_version`: App version to call mobile APIs with - should be set along with `manifest_app_version`, e.g. `20.2.1`
+* `manifest_app_version`: Numeric app version to call mobile APIs with, e.g. `221`
+
+#### rokfinchannel
+* `tab`: Which tab to download - one of `new`, `top`, `videos`, `podcasts`, `streams`, `stacks`
+
+#### twitter
+* `api`: Select one of `graphql` (default), `legacy` or `syndication` as the API for tweet extraction. Has no effect if logged in
+
+#### stacommu, wrestleuniverse
+* `device_id`: UUID value assigned by the website and used to enforce device limits for paid livestream content. Can be found in browser local storage
+
+#### twitch
+* `client_id`: Client ID value to be sent with GraphQL requests, e.g. `twitch:client_id=kimne78kx3ncx6brgo4mv6wki5h1ko`
+
+#### nhkradirulive (NHK らじる★らじる LIVE)
+* `area`: Which regional variation to extract. Valid areas are: `sapporo`, `sendai`, `tokyo`, `nagoya`, `osaka`, `hiroshima`, `matsuyama`, `fukuoka`. Defaults to `tokyo`
+
+#### nflplusreplay
+* `type`: Type(s) of game replays to extract. Valid types are: `full_game`, `full_game_spanish`, `condensed_game` and `all_22`. You can use `all` to extract all available replay types, which is the default
+
+#### jiosaavn
+* `bitrate`: Audio bitrates to request. One or more of `16`, `32`, `64`, `128`, `320`. Default is `128,320`
+
+**Note**: These options may be changed/removed in the future without concern for backward compatibility
+
+<!-- MANPAGE: MOVE "INSTALLATION" SECTION HERE -->
+
+
+# PLUGINS
+
+Note that **all** plugins are imported even if not invoked, and that **there are no checks** performed on plugin code. **Use plugins at your own risk and only if you trust the code!**
+
+Plugins can be of `<type>`s `extractor` or `postprocessor`.
+- Extractor plugins do not need to be enabled from the CLI and are automatically invoked when the input URL is suitable for it.
+- Extractor plugins take priority over builtin extractors.
+- Postprocessor plugins can be invoked using `--use-postprocessor NAME`.
+
+
+Plugins are loaded from the namespace packages `yt_dlp_plugins.extractor` and `yt_dlp_plugins.postprocessor`.
+
+In other words, the file structure on the disk looks something like:
+
+ yt_dlp_plugins/
+ extractor/
+ myplugin.py
+ postprocessor/
+ myplugin.py
+
+yt-dlp looks for these `yt_dlp_plugins` namespace folders in many locations (see below) and loads in plugins from **all** of them.
+
+See the [wiki for some known plugins](https://github.com/yt-dlp/yt-dlp/wiki/Plugins)
+
+## Installing Plugins
+
+Plugins can be installed using various methods and locations.
+
+1. **Configuration directories**:
+ Plugin packages (containing a `yt_dlp_plugins` namespace folder) can be dropped into the following standard [configuration locations](#configuration):
+ * **User Plugins**
+ * `${XDG_CONFIG_HOME}/yt-dlp/plugins/<package name>/yt_dlp_plugins/` (recommended on Linux/macOS)
+ * `${XDG_CONFIG_HOME}/yt-dlp-plugins/<package name>/yt_dlp_plugins/`
+ * `${APPDATA}/yt-dlp/plugins/<package name>/yt_dlp_plugins/` (recommended on Windows)
+ * `${APPDATA}/yt-dlp-plugins/<package name>/yt_dlp_plugins/`
+ * `~/.yt-dlp/plugins/<package name>/yt_dlp_plugins/`
+ * `~/yt-dlp-plugins/<package name>/yt_dlp_plugins/`
+ * **System Plugins**
+ * `/etc/yt-dlp/plugins/<package name>/yt_dlp_plugins/`
+ * `/etc/yt-dlp-plugins/<package name>/yt_dlp_plugins/`
+2. **Executable location**: Plugin packages can similarly be installed in a `yt-dlp-plugins` directory under the executable location (recommended for portable installations):
+ * Binary: where `<root-dir>/yt-dlp.exe`, `<root-dir>/yt-dlp-plugins/<package name>/yt_dlp_plugins/`
+ * Source: where `<root-dir>/yt_dlp/__main__.py`, `<root-dir>/yt-dlp-plugins/<package name>/yt_dlp_plugins/`
+
+3. **pip and other locations in `PYTHONPATH`**
+ * Plugin packages can be installed and managed using `pip`. See [yt-dlp-sample-plugins](https://github.com/yt-dlp/yt-dlp-sample-plugins) for an example.
+ * Note: plugin files between plugin packages installed with pip must have unique filenames.
+ * Any path in `PYTHONPATH` is searched in for the `yt_dlp_plugins` namespace folder.
+ * Note: This does not apply for Pyinstaller/py2exe builds.
+
+
+`.zip`, `.egg` and `.whl` archives containing a `yt_dlp_plugins` namespace folder in their root are also supported as plugin packages.
+* e.g. `${XDG_CONFIG_HOME}/yt-dlp/plugins/mypluginpkg.zip` where `mypluginpkg.zip` contains `yt_dlp_plugins/<type>/myplugin.py`
+
+Run yt-dlp with `--verbose` to check if the plugin has been loaded.
+
+## Developing Plugins
+
+See the [yt-dlp-sample-plugins](https://github.com/yt-dlp/yt-dlp-sample-plugins) repo for a template plugin package and the [Plugin Development](https://github.com/yt-dlp/yt-dlp/wiki/Plugin-Development) section of the wiki for a plugin development guide.
+
+All public classes with a name ending in `IE`/`PP` are imported from each file for extractors and postprocessors repectively. This respects underscore prefix (e.g. `_MyBasePluginIE` is private) and `__all__`. Modules can similarly be excluded by prefixing the module name with an underscore (e.g. `_myplugin.py`).
+
+To replace an existing extractor with a subclass of one, set the `plugin_name` class keyword argument (e.g. `class MyPluginIE(ABuiltInIE, plugin_name='myplugin')` will replace `ABuiltInIE` with `MyPluginIE`). Since the extractor replaces the parent, you should exclude the subclass extractor from being imported separately by making it private using one of the methods described above.
+
+If you are a plugin author, add [yt-dlp-plugins](https://github.com/topics/yt-dlp-plugins) as a topic to your repository for discoverability.
+
+See the [Developer Instructions](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#developer-instructions) on how to write and test an extractor.
+
+# EMBEDDING YT-DLP
+
+yt-dlp makes the best effort to be a good command-line program, and thus should be callable from any programming language.
+
+Your program should avoid parsing the normal stdout since they may change in future versions. Instead they should use options such as `-J`, `--print`, `--progress-template`, `--exec` etc to create console output that you can reliably reproduce and parse.
+
+From a Python program, you can embed yt-dlp in a more powerful fashion, like this:
+
+```python
+from yt_dlp import YoutubeDL
+
+URLS = ['https://www.youtube.com/watch?v=BaW_jenozKc']
+with YoutubeDL() as ydl:
+ ydl.download(URLS)
+```
+
+Most likely, you'll want to use various options. For a list of options available, have a look at [`yt_dlp/YoutubeDL.py`](yt_dlp/YoutubeDL.py#L183) or `help(yt_dlp.YoutubeDL)` in a Python shell. If you are already familiar with the CLI, you can use [`devscripts/cli_to_api.py`](https://github.com/yt-dlp/yt-dlp/blob/master/devscripts/cli_to_api.py) to translate any CLI switches to `YoutubeDL` params.
+
+**Tip**: If you are porting your code from youtube-dl to yt-dlp, one important point to look out for is that we do not guarantee the return value of `YoutubeDL.extract_info` to be json serializable, or even be a dictionary. It will be dictionary-like, but if you want to ensure it is a serializable dictionary, pass it through `YoutubeDL.sanitize_info` as shown in the [example below](#extracting-information)
+
+## Embedding examples
+
+#### Extracting information
+
+```python
+import json
+import yt_dlp
+
+URL = 'https://www.youtube.com/watch?v=BaW_jenozKc'
+
+# ℹ️ See help(yt_dlp.YoutubeDL) for a list of available options and public functions
+ydl_opts = {}
+with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+ info = ydl.extract_info(URL, download=False)
+
+ # ℹ️ ydl.sanitize_info makes the info json-serializable
+ print(json.dumps(ydl.sanitize_info(info)))
+```
+#### Download using an info-json
+
+```python
+import yt_dlp
+
+INFO_FILE = 'path/to/video.info.json'
+
+with yt_dlp.YoutubeDL() as ydl:
+ error_code = ydl.download_with_info_file(INFO_FILE)
+
+print('Some videos failed to download' if error_code
+ else 'All videos successfully downloaded')
+```
+
+#### Extract audio
+
+```python
+import yt_dlp
+
+URLS = ['https://www.youtube.com/watch?v=BaW_jenozKc']
+
+ydl_opts = {
+ 'format': 'm4a/bestaudio/best',
+ # ℹ️ See help(yt_dlp.postprocessor) for a list of available Postprocessors and their arguments
+ 'postprocessors': [{ # Extract audio using ffmpeg
+ 'key': 'FFmpegExtractAudio',
+ 'preferredcodec': 'm4a',
+ }]
+}
+
+with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+ error_code = ydl.download(URLS)
+```
+
+#### Filter videos
+
+```python
+import yt_dlp
+
+URLS = ['https://www.youtube.com/watch?v=BaW_jenozKc']
+
+def longer_than_a_minute(info, *, incomplete):
+ """Download only videos longer than a minute (or with unknown duration)"""
+ duration = info.get('duration')
+ if duration and duration < 60:
+ return 'The video is too short'
+
+ydl_opts = {
+ 'match_filter': longer_than_a_minute,
+}
+
+with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+ error_code = ydl.download(URLS)
+```
+
+#### Adding logger and progress hook
+
+```python
+import yt_dlp
+
+URLS = ['https://www.youtube.com/watch?v=BaW_jenozKc']
+
+class MyLogger:
+ def debug(self, msg):
+ # For compatibility with youtube-dl, both debug and info are passed into debug
+ # You can distinguish them by the prefix '[debug] '
+ if msg.startswith('[debug] '):
+ pass
+ else:
+ self.info(msg)
+
+ def info(self, msg):
+ pass
+
+ def warning(self, msg):
+ pass
+
+ def error(self, msg):
+ print(msg)
+
+
+# ℹ️ See "progress_hooks" in help(yt_dlp.YoutubeDL)
+def my_hook(d):
+ if d['status'] == 'finished':
+ print('Done downloading, now post-processing ...')
+
+
+ydl_opts = {
+ 'logger': MyLogger(),
+ 'progress_hooks': [my_hook],
+}
+
+with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+ ydl.download(URLS)
+```
+
+#### Add a custom PostProcessor
+
+```python
+import yt_dlp
+
+URLS = ['https://www.youtube.com/watch?v=BaW_jenozKc']
+
+# ℹ️ See help(yt_dlp.postprocessor.PostProcessor)
+class MyCustomPP(yt_dlp.postprocessor.PostProcessor):
+ def run(self, info):
+ self.to_screen('Doing stuff')
+ return [], info
+
+
+with yt_dlp.YoutubeDL() as ydl:
+ # ℹ️ "when" can take any value in yt_dlp.utils.POSTPROCESS_WHEN
+ ydl.add_post_processor(MyCustomPP(), when='pre_process')
+ ydl.download(URLS)
+```
+
+
+#### Use a custom format selector
+
+```python
+import yt_dlp
+
+URLS = ['https://www.youtube.com/watch?v=BaW_jenozKc']
+
+def format_selector(ctx):
+ """ Select the best video and the best audio that won't result in an mkv.
+ NOTE: This is just an example and does not handle all cases """
+
+ # formats are already sorted worst to best
+ formats = ctx.get('formats')[::-1]
+
+ # acodec='none' means there is no audio
+ best_video = next(f for f in formats
+ if f['vcodec'] != 'none' and f['acodec'] == 'none')
+
+ # find compatible audio extension
+ audio_ext = {'mp4': 'm4a', 'webm': 'webm'}[best_video['ext']]
+ # vcodec='none' means there is no video
+ best_audio = next(f for f in formats if (
+ f['acodec'] != 'none' and f['vcodec'] == 'none' and f['ext'] == audio_ext))
+
+ # These are the minimum required fields for a merged format
+ yield {
+ 'format_id': f'{best_video["format_id"]}+{best_audio["format_id"]}',
+ 'ext': best_video['ext'],
+ 'requested_formats': [best_video, best_audio],
+ # Must be + separated list of protocols
+ 'protocol': f'{best_video["protocol"]}+{best_audio["protocol"]}'
+ }
+
+
+ydl_opts = {
+ 'format': format_selector,
+}
+
+with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+ ydl.download(URLS)
+```
+
+
+# CHANGES FROM YOUTUBE-DL
+
+### New features
+
+* Forked from [**yt-dlc@f9401f2**](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee) and merged with [**youtube-dl@be008e6**](https://github.com/ytdl-org/youtube-dl/commit/be008e657d79832642e2158557c899249c9e31cd) ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21))
+
+* **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in YouTube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API
+
+* **[Format Sorting](#sorting-formats)**: The default format sorting options have been changed so that higher resolution and better codecs will be now preferred instead of simply using larger bitrate. Furthermore, you can now specify the sort order using `-S`. This allows for much easier format selection than what is possible by simply using `--format` ([examples](#format-selection-examples))
+
+* **Merged with animelover1984/youtube-dl**: You get most of the features and improvements from [animelover1984/youtube-dl](https://github.com/animelover1984/youtube-dl) including `--write-comments`, `BiliBiliSearch`, `BilibiliChannel`, Embedding thumbnail in mp4/ogg/opus, playlist infojson etc. Note that NicoNico livestreams are not available. See [#31](https://github.com/yt-dlp/yt-dlp/pull/31) for details.
+
+* **YouTube improvements**:
+ * Supports Clips, Stories (`ytstories:<channel UCID>`), Search (including filters)**\***, YouTube Music Search, Channel-specific search, Search prefixes (`ytsearch:`, `ytsearchdate:`)**\***, Mixes, and Feeds (`:ytfav`, `:ytwatchlater`, `:ytsubs`, `:ythistory`, `:ytrec`, `:ytnotif`)
+ * Fix for [n-sig based throttling](https://github.com/ytdl-org/youtube-dl/issues/29326) **\***
+ * Supports some (but not all) age-gated content without cookies
+ * Download livestreams from the start using `--live-from-start` (*experimental*)
+ * Channel URLs download all uploads of the channel, including shorts and live
+
+* **Cookies from browser**: Cookies can be automatically extracted from all major web browsers using `--cookies-from-browser BROWSER[+KEYRING][:PROFILE][::CONTAINER]`
+
+* **Download time range**: Videos can be downloaded partially based on either timestamps or chapters using `--download-sections`
+
+* **Split video by chapters**: Videos can be split into multiple files based on chapters using `--split-chapters`
+
+* **Multi-threaded fragment downloads**: Download multiple fragments of m3u8/mpd videos in parallel. Use `--concurrent-fragments` (`-N`) option to set the number of threads used
+
+* **Aria2c with HLS/DASH**: You can use `aria2c` as the external downloader for DASH(mpd) and HLS(m3u8) formats
+
+* **New and fixed extractors**: Many new extractors have been added and a lot of existing ones have been fixed. See the [changelog](Changelog.md) or the [list of supported sites](supportedsites.md)
+
+* **New MSOs**: Philo, Spectrum, SlingTV, Cablevision, RCN etc.
+
+* **Subtitle extraction from manifests**: Subtitles can be extracted from streaming media manifests. See [commit/be6202f](https://github.com/yt-dlp/yt-dlp/commit/be6202f12b97858b9d716e608394b51065d0419f) for details
+
+* **Multiple paths and output templates**: You can give different [output templates](#output-template) and download paths for different types of files. You can also set a temporary path where intermediary files are downloaded to using `--paths` (`-P`)
+
+* **Portable Configuration**: Configuration files are automatically loaded from the home and root directories. See [CONFIGURATION](#configuration) for details
+
+* **Output template improvements**: Output templates can now have date-time formatting, numeric offsets, object traversal etc. See [output template](#output-template) for details. Even more advanced operations can also be done with the help of `--parse-metadata` and `--replace-in-metadata`
+
+* **Other new options**: Many new options have been added such as `--alias`, `--print`, `--concat-playlist`, `--wait-for-video`, `--retry-sleep`, `--sleep-requests`, `--convert-thumbnails`, `--force-download-archive`, `--force-overwrites`, `--break-match-filter` etc
+
+* **Improvements**: Regex and other operators in `--format`/`--match-filter`, multiple `--postprocessor-args` and `--downloader-args`, faster archive checking, more [format selection options](#format-selection), merge multi-video/audio, multiple `--config-locations`, `--exec` at different stages, etc
+
+* **Plugins**: Extractors and PostProcessors can be loaded from an external file. See [plugins](#plugins) for details
+
+* **Self updater**: The releases can be updated using `yt-dlp -U`, and downgraded using `--update-to` if required
+
+* **Automated builds**: [Nightly/master builds](#update-channels) can be used with `--update-to nightly` and `--update-to master`
+
+See [changelog](Changelog.md) or [commits](https://github.com/yt-dlp/yt-dlp/commits) for the full list of changes
+
+Features marked with a **\*** have been back-ported to youtube-dl
+
+### Differences in default behavior
+
+Some of yt-dlp's default options are different from that of youtube-dl and youtube-dlc:
+
+* yt-dlp supports only [Python 3.8+](## "Windows 7"), and *may* remove support for more versions as they [become EOL](https://devguide.python.org/versions/#python-release-cycle); while [youtube-dl still supports Python 2.6+ and 3.2+](https://github.com/ytdl-org/youtube-dl/issues/30568#issue-1118238743)
+* The options `--auto-number` (`-A`), `--title` (`-t`) and `--literal` (`-l`), no longer work. See [removed options](#Removed) for details
+* `avconv` is not supported as an alternative to `ffmpeg`
+* yt-dlp stores config files in slightly different locations to youtube-dl. See [CONFIGURATION](#configuration) for a list of correct locations
+* The default [output template](#output-template) is `%(title)s [%(id)s].%(ext)s`. There is no real reason for this change. This was changed before yt-dlp was ever made public and now there are no plans to change it back to `%(title)s-%(id)s.%(ext)s`. Instead, you may use `--compat-options filename`
+* The default [format sorting](#sorting-formats) is different from youtube-dl and prefers higher resolution and better codecs rather than higher bitrates. You can use the `--format-sort` option to change this to any order you prefer, or use `--compat-options format-sort` to use youtube-dl's sorting order
+* The default format selector is `bv*+ba/b`. This means that if a combined video + audio format that is better than the best video-only format is found, the former will be preferred. Use `-f bv+ba/b` or `--compat-options format-spec` to revert this
+* Unlike youtube-dlc, yt-dlp does not allow merging multiple audio/video streams into one file by default (since this conflicts with the use of `-f bv*+ba`). If needed, this feature must be enabled using `--audio-multistreams` and `--video-multistreams`. You can also use `--compat-options multistreams` to enable both
+* `--no-abort-on-error` is enabled by default. Use `--abort-on-error` or `--compat-options abort-on-error` to abort on errors instead
+* When writing metadata files such as thumbnails, description or infojson, the same information (if available) is also written for playlists. Use `--no-write-playlist-metafiles` or `--compat-options no-playlist-metafiles` to not write these files
+* `--add-metadata` attaches the `infojson` to `mkv` files in addition to writing the metadata when used with `--write-info-json`. Use `--no-embed-info-json` or `--compat-options no-attach-info-json` to revert this
+* Some metadata are embedded into different fields when using `--add-metadata` as compared to youtube-dl. Most notably, `comment` field contains the `webpage_url` and `synopsis` contains the `description`. You can [use `--parse-metadata`](#modifying-metadata) to modify this to your liking or use `--compat-options embed-metadata` to revert this
+* `playlist_index` behaves differently when used with options like `--playlist-reverse` and `--playlist-items`. See [#302](https://github.com/yt-dlp/yt-dlp/issues/302) for details. You can use `--compat-options playlist-index` if you want to keep the earlier behavior
+* The output of `-F` is listed in a new format. Use `--compat-options list-formats` to revert this
+* Live chats (if available) are considered as subtitles. Use `--sub-langs all,-live_chat` to download all subtitles except live chat. You can also use `--compat-options no-live-chat` to prevent any live chat/danmaku from downloading
+* YouTube channel URLs download all uploads of the channel. To download only the videos in a specific tab, pass the tab's URL. If the channel does not show the requested tab, an error will be raised. Also, `/live` URLs raise an error if there are no live videos instead of silently downloading the entire channel. You may use `--compat-options no-youtube-channel-redirect` to revert all these redirections
+* Unavailable videos are also listed for YouTube playlists. Use `--compat-options no-youtube-unavailable-videos` to remove this
+* The upload dates extracted from YouTube are in UTC [when available](https://github.com/yt-dlp/yt-dlp/blob/89e4d86171c7b7c997c77d4714542e0383bf0db0/yt_dlp/extractor/youtube.py#L3898-L3900). Use `--compat-options no-youtube-prefer-utc-upload-date` to prefer the non-UTC upload date.
+* If `ffmpeg` is used as the downloader, the downloading and merging of formats happen in a single step when possible. Use `--compat-options no-direct-merge` to revert this
+* Thumbnail embedding in `mp4` is done with mutagen if possible. Use `--compat-options embed-thumbnail-atomicparsley` to force the use of AtomicParsley instead
+* Some internal metadata such as filenames are removed by default from the infojson. Use `--no-clean-infojson` or `--compat-options no-clean-infojson` to revert this
+* When `--embed-subs` and `--write-subs` are used together, the subtitles are written to disk and also embedded in the media file. You can use just `--embed-subs` to embed the subs and automatically delete the separate file. See [#630 (comment)](https://github.com/yt-dlp/yt-dlp/issues/630#issuecomment-893659460) for more info. `--compat-options no-keep-subs` can be used to revert this
+* `certifi` will be used for SSL root certificates, if installed. If you want to use system certificates (e.g. self-signed), use `--compat-options no-certifi`
+* yt-dlp's sanitization of invalid characters in filenames is different/smarter than in youtube-dl. You can use `--compat-options filename-sanitization` to revert to youtube-dl's behavior
+* ~~yt-dlp tries to parse the external downloader outputs into the standard progress output if possible (Currently implemented: [aria2c](https://github.com/yt-dlp/yt-dlp/issues/5931)). You can use `--compat-options no-external-downloader-progress` to get the downloader output as-is~~
+* yt-dlp versions between 2021.09.01 and 2023.01.02 applies `--match-filter` to nested playlists. This was an unintentional side-effect of [8f18ac](https://github.com/yt-dlp/yt-dlp/commit/8f18aca8717bb0dd49054555af8d386e5eda3a88) and is fixed in [d7b460](https://github.com/yt-dlp/yt-dlp/commit/d7b460d0e5fc710950582baed2e3fc616ed98a80). Use `--compat-options playlist-match-filter` to revert this
+* yt-dlp versions between 2021.11.10 and 2023.06.21 estimated `filesize_approx` values for fragmented/manifest formats. This was added for convenience in [f2fe69](https://github.com/yt-dlp/yt-dlp/commit/f2fe69c7b0d208bdb1f6292b4ae92bc1e1a7444a), but was reverted in [0dff8e](https://github.com/yt-dlp/yt-dlp/commit/0dff8e4d1e6e9fb938f4256ea9af7d81f42fd54f) due to the potentially extreme inaccuracy of the estimated values. Use `--compat-options manifest-filesize-approx` to keep extracting the estimated values
+* yt-dlp uses modern http client backends such as `requests`. Use `--compat-options prefer-legacy-http-handler` to prefer the legacy http handler (`urllib`) to be used for standard http requests.
+* The sub-modules `swfinterp`, `casefold` are removed.
+
+For ease of use, a few more compat options are available:
+
+* `--compat-options all`: Use all compat options (Do NOT use)
+* `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams,-playlist-match-filter,-manifest-filesize-approx`
+* `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect,-playlist-match-filter,-manifest-filesize-approx`
+* `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization,no-youtube-prefer-utc-upload-date`
+* `--compat-options 2022`: Same as `--compat-options 2023,playlist-match-filter,no-external-downloader-progress,prefer-legacy-http-handler,manifest-filesize-approx`
+* `--compat-options 2023`: Currently does nothing. Use this to enable all future compat options
+
+### Deprecated options
+
+These are all the deprecated options and the current alternative to achieve the same effect
+
+#### Almost redundant options
+While these options are almost the same as their new counterparts, there are some differences that prevents them being redundant
+
+ -j, --dump-json --print "%()j"
+ -F, --list-formats --print formats_table
+ --list-thumbnails --print thumbnails_table --print playlist:thumbnails_table
+ --list-subs --print automatic_captions_table --print subtitles_table
+
+#### Redundant options
+While these options are redundant, they are still expected to be used due to their ease of use
+
+ --get-description --print description
+ --get-duration --print duration_string
+ --get-filename --print filename
+ --get-format --print format
+ --get-id --print id
+ --get-thumbnail --print thumbnail
+ -e, --get-title --print title
+ -g, --get-url --print urls
+ --match-title REGEX --match-filter "title ~= (?i)REGEX"
+ --reject-title REGEX --match-filter "title !~= (?i)REGEX"
+ --min-views COUNT --match-filter "view_count >=? COUNT"
+ --max-views COUNT --match-filter "view_count <=? COUNT"
+ --break-on-reject Use --break-match-filter
+ --user-agent UA --add-header "User-Agent:UA"
+ --referer URL --add-header "Referer:URL"
+ --playlist-start NUMBER -I NUMBER:
+ --playlist-end NUMBER -I :NUMBER
+ --playlist-reverse -I ::-1
+ --no-playlist-reverse Default
+ --no-colors --color no_color
+
+#### Not recommended
+While these options still work, their use is not recommended since there are other alternatives to achieve the same
+
+ --force-generic-extractor --ies generic,default
+ --exec-before-download CMD --exec "before_dl:CMD"
+ --no-exec-before-download --no-exec
+ --all-formats -f all
+ --all-subs --sub-langs all --write-subs
+ --print-json -j --no-simulate
+ --autonumber-size NUMBER Use string formatting, e.g. %(autonumber)03d
+ --autonumber-start NUMBER Use internal field formatting like %(autonumber+NUMBER)s
+ --id -o "%(id)s.%(ext)s"
+ --metadata-from-title FORMAT --parse-metadata "%(title)s:FORMAT"
+ --hls-prefer-native --downloader "m3u8:native"
+ --hls-prefer-ffmpeg --downloader "m3u8:ffmpeg"
+ --list-formats-old --compat-options list-formats (Alias: --no-list-formats-as-table)
+ --list-formats-as-table --compat-options -list-formats [Default] (Alias: --no-list-formats-old)
+ --youtube-skip-dash-manifest --extractor-args "youtube:skip=dash" (Alias: --no-youtube-include-dash-manifest)
+ --youtube-skip-hls-manifest --extractor-args "youtube:skip=hls" (Alias: --no-youtube-include-hls-manifest)
+ --youtube-include-dash-manifest Default (Alias: --no-youtube-skip-dash-manifest)
+ --youtube-include-hls-manifest Default (Alias: --no-youtube-skip-hls-manifest)
+ --geo-bypass --xff "default"
+ --no-geo-bypass --xff "never"
+ --geo-bypass-country CODE --xff CODE
+ --geo-bypass-ip-block IP_BLOCK --xff IP_BLOCK
+
+#### Developer options
+These options are not intended to be used by the end-user
+
+ --test Download only part of video for testing extractors
+ --load-pages Load pages dumped by --write-pages
+ --youtube-print-sig-code For testing youtube signatures
+ --allow-unplayable-formats List unplayable formats also
+ --no-allow-unplayable-formats Default
+
+#### Old aliases
+These are aliases that are no longer documented for various reasons
+
+ --avconv-location --ffmpeg-location
+ --clean-infojson --clean-info-json
+ --cn-verification-proxy URL --geo-verification-proxy URL
+ --dump-headers --print-traffic
+ --dump-intermediate-pages --dump-pages
+ --force-write-download-archive --force-write-archive
+ --load-info --load-info-json
+ --no-clean-infojson --no-clean-info-json
+ --no-split-tracks --no-split-chapters
+ --no-write-srt --no-write-subs
+ --prefer-unsecure --prefer-insecure
+ --rate-limit RATE --limit-rate RATE
+ --split-tracks --split-chapters
+ --srt-lang LANGS --sub-langs LANGS
+ --trim-file-names LENGTH --trim-filenames LENGTH
+ --write-srt --write-subs
+ --yes-overwrites --force-overwrites
+
+#### Sponskrub Options
+Support for [SponSkrub](https://github.com/faissaloo/SponSkrub) has been deprecated in favor of the `--sponsorblock` options
+
+ --sponskrub --sponsorblock-mark all
+ --no-sponskrub --no-sponsorblock
+ --sponskrub-cut --sponsorblock-remove all
+ --no-sponskrub-cut --sponsorblock-remove -all
+ --sponskrub-force Not applicable
+ --no-sponskrub-force Not applicable
+ --sponskrub-location Not applicable
+ --sponskrub-args Not applicable
+
+#### No longer supported
+These options may no longer work as intended
+
+ --prefer-avconv avconv is not officially supported by yt-dlp (Alias: --no-prefer-ffmpeg)
+ --prefer-ffmpeg Default (Alias: --no-prefer-avconv)
+ -C, --call-home Not implemented
+ --no-call-home Default
+ --include-ads No longer supported
+ --no-include-ads Default
+ --write-annotations No supported site has annotations now
+ --no-write-annotations Default
+ --compat-options seperate-video-versions No longer needed
+
+#### Removed
+These options were deprecated since 2014 and have now been entirely removed
+
+ -A, --auto-number -o "%(autonumber)s-%(id)s.%(ext)s"
+ -t, -l, --title, --literal -o "%(title)s-%(id)s.%(ext)s"
+
+
+# CONTRIBUTING
+See [CONTRIBUTING.md](CONTRIBUTING.md#contributing-to-yt-dlp) for instructions on [Opening an Issue](CONTRIBUTING.md#opening-an-issue) and [Contributing code to the project](CONTRIBUTING.md#developer-instructions)
+
+# WIKI
+See the [Wiki](https://github.com/yt-dlp/yt-dlp/wiki) for more information
diff --git a/bundle/__init__.py b/bundle/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/bundle/__init__.py
diff --git a/bundle/py2exe.py b/bundle/py2exe.py
new file mode 100755
index 0000000..ccb52ea
--- /dev/null
+++ b/bundle/py2exe.py
@@ -0,0 +1,59 @@
+#!/usr/bin/env python3
+
+# Allow execution from anywhere
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import warnings
+
+from py2exe import freeze
+
+from devscripts.utils import read_version
+
+VERSION = read_version()
+
+
+def main():
+ warnings.warn(
+ 'py2exe builds do not support pycryptodomex and needs VC++14 to run. '
+ 'It is recommended to run "pyinst.py" to build using pyinstaller instead')
+
+ freeze(
+ console=[{
+ 'script': './yt_dlp/__main__.py',
+ 'dest_base': 'yt-dlp',
+ 'icon_resources': [(1, 'devscripts/logo.ico')],
+ }],
+ version_info={
+ 'version': VERSION,
+ 'description': 'A youtube-dl fork with additional features and patches',
+ 'comments': 'Official repository: <https://github.com/yt-dlp/yt-dlp>',
+ 'product_name': 'yt-dlp',
+ 'product_version': VERSION,
+ },
+ options={
+ 'bundle_files': 0,
+ 'compressed': 1,
+ 'optimize': 2,
+ 'dist_dir': './dist',
+ 'excludes': [
+ # py2exe cannot import Crypto
+ 'Crypto',
+ 'Cryptodome',
+ # py2exe appears to confuse this with our socks library.
+ # We don't use pysocks and urllib3.contrib.socks would fail to import if tried.
+ 'urllib3.contrib.socks'
+ ],
+ 'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'],
+ # Modules that are only imported dynamically must be added here
+ 'includes': ['yt_dlp.compat._legacy', 'yt_dlp.compat._deprecated',
+ 'yt_dlp.utils._legacy', 'yt_dlp.utils._deprecated'],
+ },
+ zipfile=None,
+ )
+
+
+if __name__ == '__main__':
+ main()
diff --git a/bundle/pyinstaller.py b/bundle/pyinstaller.py
new file mode 100755
index 0000000..db9dbfd
--- /dev/null
+++ b/bundle/pyinstaller.py
@@ -0,0 +1,132 @@
+#!/usr/bin/env python3
+
+# Allow direct execution
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import platform
+
+from PyInstaller.__main__ import run as run_pyinstaller
+
+from devscripts.utils import read_version
+
+OS_NAME, MACHINE, ARCH = sys.platform, platform.machine().lower(), platform.architecture()[0][:2]
+if MACHINE in ('x86', 'x86_64', 'amd64', 'i386', 'i686'):
+ MACHINE = 'x86' if ARCH == '32' else ''
+
+
+def main():
+ opts, version = parse_options(), read_version()
+
+ onedir = '--onedir' in opts or '-D' in opts
+ if not onedir and '-F' not in opts and '--onefile' not in opts:
+ opts.append('--onefile')
+
+ name, final_file = exe(onedir)
+ print(f'Building yt-dlp v{version} for {OS_NAME} {platform.machine()} with options {opts}')
+ print('Remember to update the version using "devscripts/update-version.py"')
+ if not os.path.isfile('yt_dlp/extractor/lazy_extractors.py'):
+ print('WARNING: Building without lazy_extractors. Run '
+ '"devscripts/make_lazy_extractors.py" to build lazy extractors', file=sys.stderr)
+ print(f'Destination: {final_file}\n')
+
+ opts = [
+ f'--name={name}',
+ '--icon=devscripts/logo.ico',
+ '--upx-exclude=vcruntime140.dll',
+ '--noconfirm',
+ '--additional-hooks-dir=yt_dlp/__pyinstaller',
+ *opts,
+ 'yt_dlp/__main__.py',
+ ]
+
+ print(f'Running PyInstaller with {opts}')
+ run_pyinstaller(opts)
+ set_version_info(final_file, version)
+
+
+def parse_options():
+ # Compatibility with older arguments
+ opts = sys.argv[1:]
+ if opts[0:1] in (['32'], ['64']):
+ if ARCH != opts[0]:
+ raise Exception(f'{opts[0]}bit executable cannot be built on a {ARCH}bit system')
+ opts = opts[1:]
+ return opts
+
+
+def exe(onedir):
+ """@returns (name, path)"""
+ name = '_'.join(filter(None, (
+ 'yt-dlp',
+ {'win32': '', 'darwin': 'macos'}.get(OS_NAME, OS_NAME),
+ MACHINE,
+ )))
+ return name, ''.join(filter(None, (
+ 'dist/',
+ onedir and f'{name}/',
+ name,
+ OS_NAME == 'win32' and '.exe'
+ )))
+
+
+def version_to_list(version):
+ version_list = version.split('.')
+ return list(map(int, version_list)) + [0] * (4 - len(version_list))
+
+
+def set_version_info(exe, version):
+ if OS_NAME == 'win32':
+ windows_set_version(exe, version)
+
+
+def windows_set_version(exe, version):
+ from PyInstaller.utils.win32.versioninfo import (
+ FixedFileInfo,
+ StringFileInfo,
+ StringStruct,
+ StringTable,
+ VarFileInfo,
+ VarStruct,
+ VSVersionInfo,
+ )
+
+ try:
+ from PyInstaller.utils.win32.versioninfo import SetVersion
+ except ImportError: # Pyinstaller >= 5.8
+ from PyInstaller.utils.win32.versioninfo import write_version_info_to_executable as SetVersion
+
+ version_list = version_to_list(version)
+ suffix = MACHINE and f'_{MACHINE}'
+ SetVersion(exe, VSVersionInfo(
+ ffi=FixedFileInfo(
+ filevers=version_list,
+ prodvers=version_list,
+ mask=0x3F,
+ flags=0x0,
+ OS=0x4,
+ fileType=0x1,
+ subtype=0x0,
+ date=(0, 0),
+ ),
+ kids=[
+ StringFileInfo([StringTable('040904B0', [
+ StringStruct('Comments', 'yt-dlp%s Command Line Interface' % suffix),
+ StringStruct('CompanyName', 'https://github.com/yt-dlp'),
+ StringStruct('FileDescription', 'yt-dlp%s' % (MACHINE and f' ({MACHINE})')),
+ StringStruct('FileVersion', version),
+ StringStruct('InternalName', f'yt-dlp{suffix}'),
+ StringStruct('LegalCopyright', 'pukkandan.ytdlp@gmail.com | UNLICENSE'),
+ StringStruct('OriginalFilename', f'yt-dlp{suffix}.exe'),
+ StringStruct('ProductName', f'yt-dlp{suffix}'),
+ StringStruct(
+ 'ProductVersion', f'{version}{suffix} on Python {platform.python_version()}'),
+ ])]), VarFileInfo([VarStruct('Translation', [0, 1200])])
+ ]
+ ))
+
+
+if __name__ == '__main__':
+ main()
diff --git a/devscripts/__init__.py b/devscripts/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/devscripts/__init__.py
diff --git a/devscripts/bash-completion.in b/devscripts/bash-completion.in
new file mode 100644
index 0000000..21f5279
--- /dev/null
+++ b/devscripts/bash-completion.in
@@ -0,0 +1,29 @@
+__yt_dlp()
+{
+ local cur prev opts fileopts diropts keywords
+ COMPREPLY=()
+ cur="${COMP_WORDS[COMP_CWORD]}"
+ prev="${COMP_WORDS[COMP_CWORD-1]}"
+ opts="{{flags}}"
+ keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory"
+ fileopts="-a|--batch-file|--download-archive|--cookies|--load-info"
+ diropts="--cache-dir"
+
+ if [[ ${prev} =~ ${fileopts} ]]; then
+ COMPREPLY=( $(compgen -f -- ${cur}) )
+ return 0
+ elif [[ ${prev} =~ ${diropts} ]]; then
+ COMPREPLY=( $(compgen -d -- ${cur}) )
+ return 0
+ fi
+
+ if [[ ${cur} =~ : ]]; then
+ COMPREPLY=( $(compgen -W "${keywords}" -- ${cur}) )
+ return 0
+ elif [[ ${cur} == * ]] ; then
+ COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) )
+ return 0
+ fi
+}
+
+complete -F __yt_dlp yt-dlp
diff --git a/devscripts/bash-completion.py b/devscripts/bash-completion.py
new file mode 100755
index 0000000..9b4a9d4
--- /dev/null
+++ b/devscripts/bash-completion.py
@@ -0,0 +1,31 @@
+#!/usr/bin/env python3
+
+# Allow direct execution
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+import yt_dlp
+
+BASH_COMPLETION_FILE = "completions/bash/yt-dlp"
+BASH_COMPLETION_TEMPLATE = "devscripts/bash-completion.in"
+
+
+def build_completion(opt_parser):
+ opts_flag = []
+ for group in opt_parser.option_groups:
+ for option in group.option_list:
+ # for every long flag
+ opts_flag.append(option.get_opt_string())
+ with open(BASH_COMPLETION_TEMPLATE) as f:
+ template = f.read()
+ with open(BASH_COMPLETION_FILE, "w") as f:
+ # just using the special char
+ filled_template = template.replace("{{flags}}", " ".join(opts_flag))
+ f.write(filled_template)
+
+
+parser = yt_dlp.parseOpts(ignore_config_files=True)[0]
+build_completion(parser)
diff --git a/devscripts/changelog_override.json b/devscripts/changelog_override.json
new file mode 100644
index 0000000..2a34ad0
--- /dev/null
+++ b/devscripts/changelog_override.json
@@ -0,0 +1,130 @@
+[
+ {
+ "action": "add",
+ "when": "29cb20bd563c02671b31dd840139e93dd37150a1",
+ "short": "[priority] **A new release type has been added!**\n * [`nightly`](https://github.com/yt-dlp/yt-dlp/releases/tag/nightly) builds will be made after each push, containing the latest fixes (but also possibly bugs).\n * When using `--update`/`-U`, a release binary will only update to its current channel (either `stable` or `nightly`).\n * The `--update-to` option has been added allowing the user more control over program upgrades (or downgrades).\n * `--update-to` can change the release channel (`stable`, `nightly`) and also upgrade or downgrade to specific tags.\n * **Usage**: `--update-to CHANNEL`, `--update-to TAG`, `--update-to CHANNEL@TAG`"
+ },
+ {
+ "action": "add",
+ "when": "5038f6d713303e0967d002216e7a88652401c22a",
+ "short": "[priority] **YouTube throttling fixes!**"
+ },
+ {
+ "action": "remove",
+ "when": "2e023649ea4e11151545a34dc1360c114981a236"
+ },
+ {
+ "action": "add",
+ "when": "01aba2519a0884ef17d5f85608dbd2a455577147",
+ "short": "[priority] YouTube: Improved throttling and signature fixes"
+ },
+ {
+ "action": "change",
+ "when": "c86e433c35fe5da6cb29f3539eef97497f84ed38",
+ "short": "[extractor/niconico:series] Fix extraction (#6898)",
+ "authors": ["sqrtNOT"]
+ },
+ {
+ "action": "change",
+ "when": "69a40e4a7f6caa5662527ebd2f3c4e8aa02857a2",
+ "short": "[extractor/youtube:music_search_url] Extract title (#7102)",
+ "authors": ["kangalio"]
+ },
+ {
+ "action": "change",
+ "when": "8417f26b8a819cd7ffcd4e000ca3e45033e670fb",
+ "short": "Add option `--color` (#6904)",
+ "authors": ["Grub4K"]
+ },
+ {
+ "action": "change",
+ "when": "b4e0d75848e9447cee2cd3646ce54d4744a7ff56",
+ "short": "Improve `--download-sections`\n - Support negative time-ranges\n - Add `*from-url` to obey time-ranges in URL",
+ "authors": ["pukkandan"]
+ },
+ {
+ "action": "change",
+ "when": "1e75d97db21152acc764b30a688e516f04b8a142",
+ "short": "[extractor/youtube] Add `ios` to default clients used\n - IOS is affected neither by 403 nor by nsig so helps mitigate them preemptively\n - IOS also has higher bit-rate 'premium' formats though they are not labeled as such",
+ "authors": ["pukkandan"]
+ },
+ {
+ "action": "change",
+ "when": "f2ff0f6f1914b82d4a51681a72cc0828115dcb4a",
+ "short": "[extractor/motherless] Add gallery support, fix groups (#7211)",
+ "authors": ["rexlambert22", "Ti4eeT4e"]
+ },
+ {
+ "action": "change",
+ "when": "a4486bfc1dc7057efca9dd3fe70d7fa25c56f700",
+ "short": "[misc] Revert \"Add automatic duplicate issue detection\"",
+ "authors": ["pukkandan"]
+ },
+ {
+ "action": "add",
+ "when": "1ceb657bdd254ad961489e5060f2ccc7d556b729",
+ "short": "[priority] Security: [[CVE-2023-35934](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-35934)] Fix [Cookie leak](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj)\n - `--add-header Cookie:` is deprecated and auto-scoped to input URL domains\n - Cookies are scoped when passed to external downloaders\n - Add `cookies` field to info.json and deprecate `http_headers.Cookie`"
+ },
+ {
+ "action": "change",
+ "when": "b03fa7834579a01cc5fba48c0e73488a16683d48",
+ "short": "[ie/twitter] Revert 92315c03774cfabb3a921884326beb4b981f786b",
+ "authors": ["pukkandan"]
+ },
+ {
+ "action": "change",
+ "when": "fcd6a76adc49d5cd8783985c7ce35384b72e545f",
+ "short": "[test] Add tests for socks proxies (#7908)",
+ "authors": ["coletdjnz"]
+ },
+ {
+ "action": "change",
+ "when": "4bf912282a34b58b6b35d8f7e6be535770c89c76",
+ "short": "[rh:urllib] Remove dot segments during URL normalization (#7662)",
+ "authors": ["coletdjnz"]
+ },
+ {
+ "action": "change",
+ "when": "59e92b1f1833440bb2190f847eb735cf0f90bc85",
+ "short": "[rh:urllib] Simplify gzip decoding (#7611)",
+ "authors": ["Grub4K"]
+ },
+ {
+ "action": "add",
+ "when": "c1d71d0d9f41db5e4306c86af232f5f6220a130b",
+ "short": "[priority] **The minimum *recommended* Python version has been raised to 3.8**\nSince Python 3.7 has reached end-of-life, support for it will be dropped soon. [Read more](https://github.com/yt-dlp/yt-dlp/issues/7803)"
+ },
+ {
+ "action": "add",
+ "when": "61bdf15fc7400601c3da1aa7a43917310a5bf391",
+ "short": "[priority] Security: [[CVE-2023-40581](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-40581)] [Prevent RCE when using `--exec` with `%q` on Windows](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-42h4-v29r-42qg)\n - The shell escape function is now using `\"\"` instead of `\\\"`.\n - `utils.Popen` has been patched to properly quote commands."
+ },
+ {
+ "action": "change",
+ "when": "8a8b54523addf46dfd50ef599761a81bc22362e6",
+ "short": "[rh:requests] Add handler for `requests` HTTP library (#3668)\n\n\tAdds support for HTTPS proxies and persistent connections (keep-alive)",
+ "authors": ["bashonly", "coletdjnz", "Grub4K"]
+ },
+ {
+ "action": "add",
+ "when": "1d03633c5a1621b9f3a756f0a4f9dc61fab3aeaa",
+ "short": "[priority] **The release channels have been adjusted!**\n\t* [`master`](https://github.com/yt-dlp/yt-dlp-master-builds) builds are made after each push, containing the latest fixes (but also possibly bugs). This was previously the `nightly` channel.\n\t* [`nightly`](https://github.com/yt-dlp/yt-dlp-nightly-builds) builds are now made once a day, if there were any changes."
+ },
+ {
+ "action": "add",
+ "when": "f04b5bedad7b281bee9814686bba1762bae092eb",
+ "short": "[priority] Security: [[CVE-2023-46121](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-46121)] Patch [Generic Extractor MITM Vulnerability via Arbitrary Proxy Injection](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-3ch3-jhc6-5r8x)\n\t- Disallow smuggling of arbitrary `http_headers`; extractors now only use specific headers"
+ },
+ {
+ "action": "change",
+ "when": "15f22b4880b6b3f71f350c64d70976ae65b9f1ca",
+ "short": "[webvtt] Allow spaces before newlines for CueBlock (#7681)",
+ "authors": ["TSRBerry"]
+ },
+ {
+ "action": "change",
+ "when": "4ce57d3b873c2887814cbec03d029533e82f7db5",
+ "short": "[ie] Support multi-period MPD streams (#6654)",
+ "authors": ["alard", "pukkandan"]
+ }
+]
diff --git a/devscripts/changelog_override.schema.json b/devscripts/changelog_override.schema.json
new file mode 100644
index 0000000..9bd747b
--- /dev/null
+++ b/devscripts/changelog_override.schema.json
@@ -0,0 +1,96 @@
+{
+ "$schema": "http://json-schema.org/draft/2020-12/schema",
+ "type": "array",
+ "uniqueItems": true,
+ "items": {
+ "type": "object",
+ "oneOf": [
+ {
+ "type": "object",
+ "properties": {
+ "action": {
+ "enum": [
+ "add"
+ ]
+ },
+ "when": {
+ "type": "string",
+ "pattern": "^([0-9a-f]{40}|\\d{4}\\.\\d{2}\\.\\d{2})$"
+ },
+ "hash": {
+ "type": "string",
+ "pattern": "^[0-9a-f]{40}$"
+ },
+ "short": {
+ "type": "string"
+ },
+ "authors": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ }
+ },
+ "required": [
+ "action",
+ "short"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "action": {
+ "enum": [
+ "remove"
+ ]
+ },
+ "when": {
+ "type": "string",
+ "pattern": "^([0-9a-f]{40}|\\d{4}\\.\\d{2}\\.\\d{2})$"
+ },
+ "hash": {
+ "type": "string",
+ "pattern": "^[0-9a-f]{40}$"
+ }
+ },
+ "required": [
+ "action",
+ "hash"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "action": {
+ "enum": [
+ "change"
+ ]
+ },
+ "when": {
+ "type": "string",
+ "pattern": "^([0-9a-f]{40}|\\d{4}\\.\\d{2}\\.\\d{2})$"
+ },
+ "hash": {
+ "type": "string",
+ "pattern": "^[0-9a-f]{40}$"
+ },
+ "short": {
+ "type": "string"
+ },
+ "authors": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ }
+ },
+ "required": [
+ "action",
+ "hash",
+ "short",
+ "authors"
+ ]
+ }
+ ]
+ }
+}
diff --git a/devscripts/check-porn.py b/devscripts/check-porn.py
new file mode 100644
index 0000000..fc72c30
--- /dev/null
+++ b/devscripts/check-porn.py
@@ -0,0 +1,61 @@
+#!/usr/bin/env python3
+"""
+This script employs a VERY basic heuristic ('porn' in webpage.lower()) to check
+if we are not 'age_limit' tagging some porn site
+
+A second approach implemented relies on a list of porn domains, to activate it
+pass the list filename as the only argument
+"""
+
+# Allow direct execution
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+import urllib.parse
+import urllib.request
+
+from test.helper import gettestcases
+
+if len(sys.argv) > 1:
+ METHOD = 'LIST'
+ LIST = open(sys.argv[1]).read().decode('utf8').strip()
+else:
+ METHOD = 'EURISTIC'
+
+for test in gettestcases():
+ if METHOD == 'EURISTIC':
+ try:
+ webpage = urllib.request.urlopen(test['url'], timeout=10).read()
+ except Exception:
+ print('\nFail: {}'.format(test['name']))
+ continue
+
+ webpage = webpage.decode('utf8', 'replace')
+
+ RESULT = 'porn' in webpage.lower()
+
+ elif METHOD == 'LIST':
+ domain = urllib.parse.urlparse(test['url']).netloc
+ if not domain:
+ print('\nFail: {}'.format(test['name']))
+ continue
+ domain = '.'.join(domain.split('.')[-2:])
+
+ RESULT = ('.' + domain + '\n' in LIST or '\n' + domain + '\n' in LIST)
+
+ if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict']
+ or test['info_dict']['age_limit'] != 18):
+ print('\nPotential missing age_limit check: {}'.format(test['name']))
+
+ elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict']
+ and test['info_dict']['age_limit'] == 18):
+ print('\nPotential false negative: {}'.format(test['name']))
+
+ else:
+ sys.stdout.write('.')
+ sys.stdout.flush()
+
+print()
diff --git a/devscripts/cli_to_api.py b/devscripts/cli_to_api.py
new file mode 100644
index 0000000..2aa51eb
--- /dev/null
+++ b/devscripts/cli_to_api.py
@@ -0,0 +1,48 @@
+# Allow direct execution
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import yt_dlp
+import yt_dlp.options
+
+create_parser = yt_dlp.options.create_parser
+
+
+def parse_patched_options(opts):
+ patched_parser = create_parser()
+ patched_parser.defaults.update({
+ 'ignoreerrors': False,
+ 'retries': 0,
+ 'fragment_retries': 0,
+ 'extract_flat': False,
+ 'concat_playlist': 'never',
+ })
+ yt_dlp.options.create_parser = lambda: patched_parser
+ try:
+ return yt_dlp.parse_options(opts)
+ finally:
+ yt_dlp.options.create_parser = create_parser
+
+
+default_opts = parse_patched_options([]).ydl_opts
+
+
+def cli_to_api(opts, cli_defaults=False):
+ opts = (yt_dlp.parse_options if cli_defaults else parse_patched_options)(opts).ydl_opts
+
+ diff = {k: v for k, v in opts.items() if default_opts[k] != v}
+ if 'postprocessors' in diff:
+ diff['postprocessors'] = [pp for pp in diff['postprocessors']
+ if pp not in default_opts['postprocessors']]
+ return diff
+
+
+if __name__ == '__main__':
+ from pprint import pprint
+
+ print('\nThe arguments passed translate to:\n')
+ pprint(cli_to_api(sys.argv[1:]))
+ print('\nCombining these with the CLI defaults gives:\n')
+ pprint(cli_to_api(sys.argv[1:], True))
diff --git a/devscripts/fish-completion.in b/devscripts/fish-completion.in
new file mode 100644
index 0000000..32938fb
--- /dev/null
+++ b/devscripts/fish-completion.in
@@ -0,0 +1,5 @@
+
+{{commands}}
+
+
+complete --command yt-dlp --arguments ":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory"
diff --git a/devscripts/fish-completion.py b/devscripts/fish-completion.py
new file mode 100755
index 0000000..5d2f68a
--- /dev/null
+++ b/devscripts/fish-completion.py
@@ -0,0 +1,52 @@
+#!/usr/bin/env python3
+
+# Allow direct execution
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+import optparse
+
+import yt_dlp
+from yt_dlp.utils import shell_quote
+
+FISH_COMPLETION_FILE = 'completions/fish/yt-dlp.fish'
+FISH_COMPLETION_TEMPLATE = 'devscripts/fish-completion.in'
+
+EXTRA_ARGS = {
+ 'remux-video': ['--arguments', 'mp4 mkv', '--exclusive'],
+ 'recode-video': ['--arguments', 'mp4 flv ogg webm mkv', '--exclusive'],
+
+ # Options that need a file parameter
+ 'download-archive': ['--require-parameter'],
+ 'cookies': ['--require-parameter'],
+ 'load-info': ['--require-parameter'],
+ 'batch-file': ['--require-parameter'],
+}
+
+
+def build_completion(opt_parser):
+ commands = []
+
+ for group in opt_parser.option_groups:
+ for option in group.option_list:
+ long_option = option.get_opt_string().strip('-')
+ complete_cmd = ['complete', '--command', 'yt-dlp', '--long-option', long_option]
+ if option._short_opts:
+ complete_cmd += ['--short-option', option._short_opts[0].strip('-')]
+ if option.help != optparse.SUPPRESS_HELP:
+ complete_cmd += ['--description', option.help]
+ complete_cmd.extend(EXTRA_ARGS.get(long_option, []))
+ commands.append(shell_quote(complete_cmd))
+
+ with open(FISH_COMPLETION_TEMPLATE) as f:
+ template = f.read()
+ filled_template = template.replace('{{commands}}', '\n'.join(commands))
+ with open(FISH_COMPLETION_FILE, 'w') as f:
+ f.write(filled_template)
+
+
+parser = yt_dlp.parseOpts(ignore_config_files=True)[0]
+build_completion(parser)
diff --git a/devscripts/generate_aes_testdata.py b/devscripts/generate_aes_testdata.py
new file mode 100644
index 0000000..7f3c88b
--- /dev/null
+++ b/devscripts/generate_aes_testdata.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python3
+
+# Allow direct execution
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+import codecs
+import subprocess
+
+from yt_dlp.aes import aes_encrypt, key_expansion
+from yt_dlp.utils import intlist_to_bytes
+
+secret_msg = b'Secret message goes here'
+
+
+def hex_str(int_list):
+ return codecs.encode(intlist_to_bytes(int_list), 'hex')
+
+
+def openssl_encode(algo, key, iv):
+ cmd = ['openssl', 'enc', '-e', '-' + algo, '-K', hex_str(key), '-iv', hex_str(iv)]
+ prog = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+ out, _ = prog.communicate(secret_msg)
+ return out
+
+
+iv = key = [0x20, 0x15] + 14 * [0]
+
+r = openssl_encode('aes-128-cbc', key, iv)
+print('aes_cbc_decrypt')
+print(repr(r))
+
+password = key
+new_key = aes_encrypt(password, key_expansion(password))
+r = openssl_encode('aes-128-ctr', new_key, iv)
+print('aes_decrypt_text 16')
+print(repr(r))
+
+password = key + 16 * [0]
+new_key = aes_encrypt(password, key_expansion(password)) * (32 // 16)
+r = openssl_encode('aes-256-ctr', new_key, iv)
+print('aes_decrypt_text 32')
+print(repr(r))
diff --git a/devscripts/install_deps.py b/devscripts/install_deps.py
new file mode 100755
index 0000000..889d9ab
--- /dev/null
+++ b/devscripts/install_deps.py
@@ -0,0 +1,73 @@
+#!/usr/bin/env python3
+
+# Allow execution from anywhere
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import argparse
+import re
+import subprocess
+
+from devscripts.tomlparse import parse_toml
+from devscripts.utils import read_file
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(description='Install dependencies for yt-dlp')
+ parser.add_argument(
+ 'input', nargs='?', metavar='TOMLFILE', default='pyproject.toml', help='Input file (default: %(default)s)')
+ parser.add_argument(
+ '-e', '--exclude', metavar='DEPENDENCY', action='append', help='Exclude a dependency')
+ parser.add_argument(
+ '-i', '--include', metavar='GROUP', action='append', help='Include an optional dependency group')
+ parser.add_argument(
+ '-o', '--only-optional', action='store_true', help='Only install optional dependencies')
+ parser.add_argument(
+ '-p', '--print', action='store_true', help='Only print a requirements.txt to stdout')
+ parser.add_argument(
+ '-u', '--user', action='store_true', help='Install with pip as --user')
+ return parser.parse_args()
+
+
+def main():
+ args = parse_args()
+ project_table = parse_toml(read_file(args.input))['project']
+ optional_groups = project_table['optional-dependencies']
+ excludes = args.exclude or []
+
+ deps = []
+ if not args.only_optional: # `-o` should exclude 'dependencies' and the 'default' group
+ deps.extend(project_table['dependencies'])
+ if 'default' not in excludes: # `--exclude default` should exclude entire 'default' group
+ deps.extend(optional_groups['default'])
+
+ def name(dependency):
+ return re.match(r'[\w-]+', dependency)[0].lower()
+
+ target_map = {name(dep): dep for dep in deps}
+
+ for include in filter(None, map(optional_groups.get, args.include or [])):
+ target_map.update(zip(map(name, include), include))
+
+ for exclude in map(name, excludes):
+ target_map.pop(exclude, None)
+
+ targets = list(target_map.values())
+
+ if args.print:
+ for target in targets:
+ print(target)
+ return
+
+ pip_args = [sys.executable, '-m', 'pip', 'install', '-U']
+ if args.user:
+ pip_args.append('--user')
+ pip_args.extend(targets)
+
+ return subprocess.call(pip_args)
+
+
+if __name__ == '__main__':
+ sys.exit(main())
diff --git a/devscripts/lazy_load_template.py b/devscripts/lazy_load_template.py
new file mode 100644
index 0000000..6f52165
--- /dev/null
+++ b/devscripts/lazy_load_template.py
@@ -0,0 +1,39 @@
+import importlib
+import random
+import re
+
+from ..utils import (
+ age_restricted,
+ bug_reports_message,
+ classproperty,
+ variadic,
+ write_string,
+)
+
+# These bloat the lazy_extractors, so allow them to passthrough silently
+ALLOWED_CLASSMETHODS = {'extract_from_webpage', 'get_testcases', 'get_webpage_testcases'}
+_WARNED = False
+
+
+class LazyLoadMetaClass(type):
+ def __getattr__(cls, name):
+ global _WARNED
+ if ('_real_class' not in cls.__dict__
+ and name not in ALLOWED_CLASSMETHODS and not _WARNED):
+ _WARNED = True
+ write_string('WARNING: Falling back to normal extractor since lazy extractor '
+ f'{cls.__name__} does not have attribute {name}{bug_reports_message()}\n')
+ return getattr(cls.real_class, name)
+
+
+class LazyLoadExtractor(metaclass=LazyLoadMetaClass):
+ @classproperty
+ def real_class(cls):
+ if '_real_class' not in cls.__dict__:
+ cls._real_class = getattr(importlib.import_module(cls._module), cls.__name__)
+ return cls._real_class
+
+ def __new__(cls, *args, **kwargs):
+ instance = cls.real_class.__new__(cls.real_class)
+ instance.__init__(*args, **kwargs)
+ return instance
diff --git a/devscripts/logo.ico b/devscripts/logo.ico
new file mode 100644
index 0000000..5503a43
--- /dev/null
+++ b/devscripts/logo.ico
Binary files differ
diff --git a/devscripts/make_changelog.py b/devscripts/make_changelog.py
new file mode 100644
index 0000000..faab5fa
--- /dev/null
+++ b/devscripts/make_changelog.py
@@ -0,0 +1,503 @@
+from __future__ import annotations
+
+# Allow direct execution
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import enum
+import itertools
+import json
+import logging
+import re
+from collections import defaultdict
+from dataclasses import dataclass
+from functools import lru_cache
+from pathlib import Path
+
+from devscripts.utils import read_file, run_process, write_file
+
+BASE_URL = 'https://github.com'
+LOCATION_PATH = Path(__file__).parent
+HASH_LENGTH = 7
+
+logger = logging.getLogger(__name__)
+
+
+class CommitGroup(enum.Enum):
+ PRIORITY = 'Important'
+ CORE = 'Core'
+ EXTRACTOR = 'Extractor'
+ DOWNLOADER = 'Downloader'
+ POSTPROCESSOR = 'Postprocessor'
+ NETWORKING = 'Networking'
+ MISC = 'Misc.'
+
+ @classmethod
+ @lru_cache
+ def subgroup_lookup(cls):
+ return {
+ name: group
+ for group, names in {
+ cls.MISC: {
+ 'build',
+ 'ci',
+ 'cleanup',
+ 'devscripts',
+ 'docs',
+ 'test',
+ },
+ cls.NETWORKING: {
+ 'rh',
+ },
+ }.items()
+ for name in names
+ }
+
+ @classmethod
+ @lru_cache
+ def group_lookup(cls):
+ result = {
+ 'fd': cls.DOWNLOADER,
+ 'ie': cls.EXTRACTOR,
+ 'pp': cls.POSTPROCESSOR,
+ 'upstream': cls.CORE,
+ }
+ result.update({item.name.lower(): item for item in iter(cls)})
+ return result
+
+ @classmethod
+ def get(cls, value: str) -> tuple[CommitGroup | None, str | None]:
+ group, _, subgroup = (group.strip().lower() for group in value.partition('/'))
+
+ result = cls.group_lookup().get(group)
+ if not result:
+ if subgroup:
+ return None, value
+ subgroup = group
+ result = cls.subgroup_lookup().get(subgroup)
+
+ return result, subgroup or None
+
+
+@dataclass
+class Commit:
+ hash: str | None
+ short: str
+ authors: list[str]
+
+ def __str__(self):
+ result = f'{self.short!r}'
+
+ if self.hash:
+ result += f' ({self.hash[:HASH_LENGTH]})'
+
+ if self.authors:
+ authors = ', '.join(self.authors)
+ result += f' by {authors}'
+
+ return result
+
+
+@dataclass
+class CommitInfo:
+ details: str | None
+ sub_details: tuple[str, ...]
+ message: str
+ issues: list[str]
+ commit: Commit
+ fixes: list[Commit]
+
+ def key(self):
+ return ((self.details or '').lower(), self.sub_details, self.message)
+
+
+def unique(items):
+ return sorted({item.strip().lower(): item for item in items if item}.values())
+
+
+class Changelog:
+ MISC_RE = re.compile(r'(?:^|\b)(?:lint(?:ing)?|misc|format(?:ting)?|fixes)(?:\b|$)', re.IGNORECASE)
+ ALWAYS_SHOWN = (CommitGroup.PRIORITY,)
+
+ def __init__(self, groups, repo, collapsible=False):
+ self._groups = groups
+ self._repo = repo
+ self._collapsible = collapsible
+
+ def __str__(self):
+ return '\n'.join(self._format_groups(self._groups)).replace('\t', ' ')
+
+ def _format_groups(self, groups):
+ first = True
+ for item in CommitGroup:
+ if self._collapsible and item not in self.ALWAYS_SHOWN and first:
+ first = False
+ yield '\n<details><summary><h3>Changelog</h3></summary>\n'
+
+ group = groups[item]
+ if group:
+ yield self.format_module(item.value, group)
+
+ if self._collapsible:
+ yield '\n</details>'
+
+ def format_module(self, name, group):
+ result = f'\n#### {name} changes\n' if name else '\n'
+ return result + '\n'.join(self._format_group(group))
+
+ def _format_group(self, group):
+ sorted_group = sorted(group, key=CommitInfo.key)
+ detail_groups = itertools.groupby(sorted_group, lambda item: (item.details or '').lower())
+ for _, items in detail_groups:
+ items = list(items)
+ details = items[0].details
+
+ if details == 'cleanup':
+ items = self._prepare_cleanup_misc_items(items)
+
+ prefix = '-'
+ if details:
+ if len(items) == 1:
+ prefix = f'- **{details}**:'
+ else:
+ yield f'- **{details}**'
+ prefix = '\t-'
+
+ sub_detail_groups = itertools.groupby(items, lambda item: tuple(map(str.lower, item.sub_details)))
+ for sub_details, entries in sub_detail_groups:
+ if not sub_details:
+ for entry in entries:
+ yield f'{prefix} {self.format_single_change(entry)}'
+ continue
+
+ entries = list(entries)
+ sub_prefix = f'{prefix} {", ".join(entries[0].sub_details)}'
+ if len(entries) == 1:
+ yield f'{sub_prefix}: {self.format_single_change(entries[0])}'
+ continue
+
+ yield sub_prefix
+ for entry in entries:
+ yield f'\t{prefix} {self.format_single_change(entry)}'
+
+ def _prepare_cleanup_misc_items(self, items):
+ cleanup_misc_items = defaultdict(list)
+ sorted_items = []
+ for item in items:
+ if self.MISC_RE.search(item.message):
+ cleanup_misc_items[tuple(item.commit.authors)].append(item)
+ else:
+ sorted_items.append(item)
+
+ for commit_infos in cleanup_misc_items.values():
+ sorted_items.append(CommitInfo(
+ 'cleanup', ('Miscellaneous',), ', '.join(
+ self._format_message_link(None, info.commit.hash)
+ for info in sorted(commit_infos, key=lambda item: item.commit.hash or '')),
+ [], Commit(None, '', commit_infos[0].commit.authors), []))
+
+ return sorted_items
+
+ def format_single_change(self, info: CommitInfo):
+ message, sep, rest = info.message.partition('\n')
+ if '[' not in message:
+ # If the message doesn't already contain markdown links, try to add a link to the commit
+ message = self._format_message_link(message, info.commit.hash)
+
+ if info.issues:
+ message = f'{message} ({self._format_issues(info.issues)})'
+
+ if info.commit.authors:
+ message = f'{message} by {self._format_authors(info.commit.authors)}'
+
+ if info.fixes:
+ fix_message = ', '.join(f'{self._format_message_link(None, fix.hash)}' for fix in info.fixes)
+
+ authors = sorted({author for fix in info.fixes for author in fix.authors}, key=str.casefold)
+ if authors != info.commit.authors:
+ fix_message = f'{fix_message} by {self._format_authors(authors)}'
+
+ message = f'{message} (With fixes in {fix_message})'
+
+ return message if not sep else f'{message}{sep}{rest}'
+
+ def _format_message_link(self, message, hash):
+ assert message or hash, 'Improperly defined commit message or override'
+ message = message if message else hash[:HASH_LENGTH]
+ return f'[{message}]({self.repo_url}/commit/{hash})' if hash else message
+
+ def _format_issues(self, issues):
+ return ', '.join(f'[#{issue}]({self.repo_url}/issues/{issue})' for issue in issues)
+
+ @staticmethod
+ def _format_authors(authors):
+ return ', '.join(f'[{author}]({BASE_URL}/{author})' for author in authors)
+
+ @property
+ def repo_url(self):
+ return f'{BASE_URL}/{self._repo}'
+
+
+class CommitRange:
+ COMMAND = 'git'
+ COMMIT_SEPARATOR = '-----'
+
+ AUTHOR_INDICATOR_RE = re.compile(r'Authored by:? ', re.IGNORECASE)
+ MESSAGE_RE = re.compile(r'''
+ (?:\[(?P<prefix>[^\]]+)\]\ )?
+ (?:(?P<sub_details>`?[\w.-]+`?): )?
+ (?P<message>.+?)
+ (?:\ \((?P<issues>\#\d+(?:,\ \#\d+)*)\))?
+ ''', re.VERBOSE | re.DOTALL)
+ EXTRACTOR_INDICATOR_RE = re.compile(r'(?:Fix|Add)\s+Extractors?', re.IGNORECASE)
+ REVERT_RE = re.compile(r'(?:\[[^\]]+\]\s+)?(?i:Revert)\s+([\da-f]{40})')
+ FIXES_RE = re.compile(r'(?i:Fix(?:es)?(?:\s+bugs?)?(?:\s+in|\s+for)?|Revert|Improve)\s+([\da-f]{40})')
+ UPSTREAM_MERGE_RE = re.compile(r'Update to ytdl-commit-([\da-f]+)')
+
+ def __init__(self, start, end, default_author=None):
+ self._start, self._end = start, end
+ self._commits, self._fixes = self._get_commits_and_fixes(default_author)
+ self._commits_added = []
+
+ def __iter__(self):
+ return iter(itertools.chain(self._commits.values(), self._commits_added))
+
+ def __len__(self):
+ return len(self._commits) + len(self._commits_added)
+
+ def __contains__(self, commit):
+ if isinstance(commit, Commit):
+ if not commit.hash:
+ return False
+ commit = commit.hash
+
+ return commit in self._commits
+
+ def _get_commits_and_fixes(self, default_author):
+ result = run_process(
+ self.COMMAND, 'log', f'--format=%H%n%s%n%b%n{self.COMMIT_SEPARATOR}',
+ f'{self._start}..{self._end}' if self._start else self._end).stdout
+
+ commits, reverts = {}, {}
+ fixes = defaultdict(list)
+ lines = iter(result.splitlines(False))
+ for i, commit_hash in enumerate(lines):
+ short = next(lines)
+ skip = short.startswith('Release ') or short == '[version] update'
+
+ authors = [default_author] if default_author else []
+ for line in iter(lambda: next(lines), self.COMMIT_SEPARATOR):
+ match = self.AUTHOR_INDICATOR_RE.match(line)
+ if match:
+ authors = sorted(map(str.strip, line[match.end():].split(',')), key=str.casefold)
+
+ commit = Commit(commit_hash, short, authors)
+ if skip and (self._start or not i):
+ logger.debug(f'Skipped commit: {commit}')
+ continue
+ elif skip:
+ logger.debug(f'Reached Release commit, breaking: {commit}')
+ break
+
+ revert_match = self.REVERT_RE.fullmatch(commit.short)
+ if revert_match:
+ reverts[revert_match.group(1)] = commit
+ continue
+
+ fix_match = self.FIXES_RE.search(commit.short)
+ if fix_match:
+ commitish = fix_match.group(1)
+ fixes[commitish].append(commit)
+
+ commits[commit.hash] = commit
+
+ for commitish, revert_commit in reverts.items():
+ reverted = commits.pop(commitish, None)
+ if reverted:
+ logger.debug(f'{commitish} fully reverted {reverted}')
+ else:
+ commits[revert_commit.hash] = revert_commit
+
+ for commitish, fix_commits in fixes.items():
+ if commitish in commits:
+ hashes = ', '.join(commit.hash[:HASH_LENGTH] for commit in fix_commits)
+ logger.info(f'Found fix(es) for {commitish[:HASH_LENGTH]}: {hashes}')
+ for fix_commit in fix_commits:
+ del commits[fix_commit.hash]
+ else:
+ logger.debug(f'Commit with fixes not in changes: {commitish[:HASH_LENGTH]}')
+
+ return commits, fixes
+
+ def apply_overrides(self, overrides):
+ for override in overrides:
+ when = override.get('when')
+ if when and when not in self and when != self._start:
+ logger.debug(f'Ignored {when!r} override')
+ continue
+
+ override_hash = override.get('hash') or when
+ if override['action'] == 'add':
+ commit = Commit(override.get('hash'), override['short'], override.get('authors') or [])
+ logger.info(f'ADD {commit}')
+ self._commits_added.append(commit)
+
+ elif override['action'] == 'remove':
+ if override_hash in self._commits:
+ logger.info(f'REMOVE {self._commits[override_hash]}')
+ del self._commits[override_hash]
+
+ elif override['action'] == 'change':
+ if override_hash not in self._commits:
+ continue
+ commit = Commit(override_hash, override['short'], override.get('authors') or [])
+ logger.info(f'CHANGE {self._commits[commit.hash]} -> {commit}')
+ self._commits[commit.hash] = commit
+
+ self._commits = {key: value for key, value in reversed(self._commits.items())}
+
+ def groups(self):
+ group_dict = defaultdict(list)
+ for commit in self:
+ upstream_re = self.UPSTREAM_MERGE_RE.search(commit.short)
+ if upstream_re:
+ commit.short = f'[upstream] Merged with youtube-dl {upstream_re.group(1)}'
+
+ match = self.MESSAGE_RE.fullmatch(commit.short)
+ if not match:
+ logger.error(f'Error parsing short commit message: {commit.short!r}')
+ continue
+
+ prefix, sub_details_alt, message, issues = match.groups()
+ issues = [issue.strip()[1:] for issue in issues.split(',')] if issues else []
+
+ if prefix:
+ groups, details, sub_details = zip(*map(self.details_from_prefix, prefix.split(',')))
+ group = next(iter(filter(None, groups)), None)
+ details = ', '.join(unique(details))
+ sub_details = list(itertools.chain.from_iterable(sub_details))
+ else:
+ group = CommitGroup.CORE
+ details = None
+ sub_details = []
+
+ if sub_details_alt:
+ sub_details.append(sub_details_alt)
+ sub_details = tuple(unique(sub_details))
+
+ if not group:
+ if self.EXTRACTOR_INDICATOR_RE.search(commit.short):
+ group = CommitGroup.EXTRACTOR
+ logger.error(f'Assuming [ie] group for {commit.short!r}')
+ else:
+ group = CommitGroup.CORE
+
+ commit_info = CommitInfo(
+ details, sub_details, message.strip(),
+ issues, commit, self._fixes[commit.hash])
+
+ logger.debug(f'Resolved {commit.short!r} to {commit_info!r}')
+ group_dict[group].append(commit_info)
+
+ return group_dict
+
+ @staticmethod
+ def details_from_prefix(prefix):
+ if not prefix:
+ return CommitGroup.CORE, None, ()
+
+ prefix, *sub_details = prefix.split(':')
+
+ group, details = CommitGroup.get(prefix)
+ if group is CommitGroup.PRIORITY and details:
+ details = details.partition('/')[2].strip()
+
+ if details and '/' in details:
+ logger.error(f'Prefix is overnested, using first part: {prefix}')
+ details = details.partition('/')[0].strip()
+
+ if details == 'common':
+ details = None
+ elif group is CommitGroup.NETWORKING and details == 'rh':
+ details = 'Request Handler'
+
+ return group, details, sub_details
+
+
+def get_new_contributors(contributors_path, commits):
+ contributors = set()
+ if contributors_path.exists():
+ for line in read_file(contributors_path).splitlines():
+ author, _, _ = line.strip().partition(' (')
+ authors = author.split('/')
+ contributors.update(map(str.casefold, authors))
+
+ new_contributors = set()
+ for commit in commits:
+ for author in commit.authors:
+ author_folded = author.casefold()
+ if author_folded not in contributors:
+ contributors.add(author_folded)
+ new_contributors.add(author)
+
+ return sorted(new_contributors, key=str.casefold)
+
+
+if __name__ == '__main__':
+ import argparse
+
+ parser = argparse.ArgumentParser(
+ description='Create a changelog markdown from a git commit range')
+ parser.add_argument(
+ 'commitish', default='HEAD', nargs='?',
+ help='The commitish to create the range from (default: %(default)s)')
+ parser.add_argument(
+ '-v', '--verbosity', action='count', default=0,
+ help='increase verbosity (can be used twice)')
+ parser.add_argument(
+ '-c', '--contributors', action='store_true',
+ help='update CONTRIBUTORS file (default: %(default)s)')
+ parser.add_argument(
+ '--contributors-path', type=Path, default=LOCATION_PATH.parent / 'CONTRIBUTORS',
+ help='path to the CONTRIBUTORS file')
+ parser.add_argument(
+ '--no-override', action='store_true',
+ help='skip override json in commit generation (default: %(default)s)')
+ parser.add_argument(
+ '--override-path', type=Path, default=LOCATION_PATH / 'changelog_override.json',
+ help='path to the changelog_override.json file')
+ parser.add_argument(
+ '--default-author', default='pukkandan',
+ help='the author to use without a author indicator (default: %(default)s)')
+ parser.add_argument(
+ '--repo', default='yt-dlp/yt-dlp',
+ help='the github repository to use for the operations (default: %(default)s)')
+ parser.add_argument(
+ '--collapsible', action='store_true',
+ help='make changelog collapsible (default: %(default)s)')
+ args = parser.parse_args()
+
+ logging.basicConfig(
+ datefmt='%Y-%m-%d %H-%M-%S', format='{asctime} | {levelname:<8} | {message}',
+ level=logging.WARNING - 10 * args.verbosity, style='{', stream=sys.stderr)
+
+ commits = CommitRange(None, args.commitish, args.default_author)
+
+ if not args.no_override:
+ if args.override_path.exists():
+ overrides = json.loads(read_file(args.override_path))
+ commits.apply_overrides(overrides)
+ else:
+ logger.warning(f'File {args.override_path.as_posix()} does not exist')
+
+ logger.info(f'Loaded {len(commits)} commits')
+
+ new_contributors = get_new_contributors(args.contributors_path, commits)
+ if new_contributors:
+ if args.contributors:
+ write_file(args.contributors_path, '\n'.join(new_contributors) + '\n', mode='a')
+ logger.info(f'New contributors: {", ".join(new_contributors)}')
+
+ print(Changelog(commits.groups(), args.repo, args.collapsible))
diff --git a/devscripts/make_contributing.py b/devscripts/make_contributing.py
new file mode 100755
index 0000000..a06f8a6
--- /dev/null
+++ b/devscripts/make_contributing.py
@@ -0,0 +1,32 @@
+#!/usr/bin/env python3
+
+import optparse
+import re
+
+
+def main():
+ return # This is unused in yt-dlp
+
+ parser = optparse.OptionParser(usage='%prog INFILE OUTFILE')
+ options, args = parser.parse_args()
+ if len(args) != 2:
+ parser.error('Expected an input and an output filename')
+
+ infile, outfile = args
+
+ with open(infile, encoding='utf-8') as inf:
+ readme = inf.read()
+
+ bug_text = re.search(
+ r'(?s)#\s*BUGS\s*[^\n]*\s*(.*?)#\s*COPYRIGHT', readme).group(1)
+ dev_text = re.search(
+ r'(?s)(#\s*DEVELOPER INSTRUCTIONS.*?)#\s*EMBEDDING yt-dlp', readme).group(1)
+
+ out = bug_text + dev_text
+
+ with open(outfile, 'w', encoding='utf-8') as outf:
+ outf.write(out)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/devscripts/make_issue_template.py b/devscripts/make_issue_template.py
new file mode 100644
index 0000000..a5d59f3
--- /dev/null
+++ b/devscripts/make_issue_template.py
@@ -0,0 +1,72 @@
+#!/usr/bin/env python3
+
+# Allow direct execution
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+import re
+
+from devscripts.utils import get_filename_args, read_file, write_file
+
+VERBOSE_TMPL = '''
+ - type: checkboxes
+ id: verbose
+ attributes:
+ label: Provide verbose output that clearly demonstrates the problem
+ options:
+ - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU <your command line>`)
+ required: true
+ - label: "If using API, add `'verbose': True` to `YoutubeDL` params instead"
+ required: false
+ - label: Copy the WHOLE output (starting with `[debug] Command-line config`) and insert it below
+ required: true
+ - type: textarea
+ id: log
+ attributes:
+ label: Complete Verbose Output
+ description: |
+ It should start like this:
+ placeholder: |
+ [debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc']
+ [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
+ [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe)
+ [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
+ [debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1
+ [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
+ [debug] Proxy map: {}
+ [debug] Request Handlers: urllib, requests
+ [debug] Loaded 1893 extractors
+ [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest
+ yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds)
+ [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc
+ <more lines>
+ render: shell
+ validations:
+ required: true
+'''.strip()
+
+NO_SKIP = '''
+ - type: checkboxes
+ attributes:
+ label: DO NOT REMOVE OR SKIP THE ISSUE TEMPLATE
+ description: Fill all fields even if you think it is irrelevant for the issue
+ options:
+ - label: I understand that I will be **blocked** if I *intentionally* remove or skip any mandatory\\* field
+ required: true
+'''.strip()
+
+
+def main():
+ fields = {'no_skip': NO_SKIP}
+ fields['verbose'] = VERBOSE_TMPL % fields
+ fields['verbose_optional'] = re.sub(r'(\n\s+validations:)?\n\s+required: true', '', fields['verbose'])
+
+ infile, outfile = get_filename_args(has_infile=True)
+ write_file(outfile, read_file(infile) % fields)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py
new file mode 100644
index 0000000..d74ea20
--- /dev/null
+++ b/devscripts/make_lazy_extractors.py
@@ -0,0 +1,132 @@
+#!/usr/bin/env python3
+
+# Allow direct execution
+import os
+import shutil
+import sys
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+from inspect import getsource
+
+from devscripts.utils import get_filename_args, read_file, write_file
+
+NO_ATTR = object()
+STATIC_CLASS_PROPERTIES = [
+ 'IE_NAME', '_ENABLED', '_VALID_URL', # Used for URL matching
+ '_WORKING', 'IE_DESC', '_NETRC_MACHINE', 'SEARCH_KEY', # Used for --extractor-descriptions
+ 'age_limit', # Used for --age-limit (evaluated)
+ '_RETURN_TYPE', # Accessed in CLI only with instance (evaluated)
+]
+CLASS_METHODS = [
+ 'ie_key', 'suitable', '_match_valid_url', # Used for URL matching
+ 'working', 'get_temp_id', '_match_id', # Accessed just before instance creation
+ 'description', # Used for --extractor-descriptions
+ 'is_suitable', # Used for --age-limit
+ 'supports_login', 'is_single_video', # Accessed in CLI only with instance
+]
+IE_TEMPLATE = '''
+class {name}({bases}):
+ _module = {module!r}
+'''
+MODULE_TEMPLATE = read_file('devscripts/lazy_load_template.py')
+
+
+def main():
+ lazy_extractors_filename = get_filename_args(default_outfile='yt_dlp/extractor/lazy_extractors.py')
+ if os.path.exists(lazy_extractors_filename):
+ os.remove(lazy_extractors_filename)
+
+ _ALL_CLASSES = get_all_ies() # Must be before import
+
+ import yt_dlp.plugins
+ from yt_dlp.extractor.common import InfoExtractor, SearchInfoExtractor
+
+ # Filter out plugins
+ _ALL_CLASSES = [cls for cls in _ALL_CLASSES if not cls.__module__.startswith(f'{yt_dlp.plugins.PACKAGE_NAME}.')]
+
+ DummyInfoExtractor = type('InfoExtractor', (InfoExtractor,), {'IE_NAME': NO_ATTR})
+ module_src = '\n'.join((
+ MODULE_TEMPLATE,
+ ' _module = None',
+ *extra_ie_code(DummyInfoExtractor),
+ '\nclass LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n',
+ *build_ies(_ALL_CLASSES, (InfoExtractor, SearchInfoExtractor), DummyInfoExtractor),
+ ))
+
+ write_file(lazy_extractors_filename, f'{module_src}\n')
+
+
+def get_all_ies():
+ PLUGINS_DIRNAME = 'ytdlp_plugins'
+ BLOCKED_DIRNAME = f'{PLUGINS_DIRNAME}_blocked'
+ if os.path.exists(PLUGINS_DIRNAME):
+ # os.rename cannot be used, e.g. in Docker. See https://github.com/yt-dlp/yt-dlp/pull/4958
+ shutil.move(PLUGINS_DIRNAME, BLOCKED_DIRNAME)
+ try:
+ from yt_dlp.extractor.extractors import _ALL_CLASSES
+ finally:
+ if os.path.exists(BLOCKED_DIRNAME):
+ shutil.move(BLOCKED_DIRNAME, PLUGINS_DIRNAME)
+ return _ALL_CLASSES
+
+
+def extra_ie_code(ie, base=None):
+ for var in STATIC_CLASS_PROPERTIES:
+ val = getattr(ie, var)
+ if val != (getattr(base, var) if base else NO_ATTR):
+ yield f' {var} = {val!r}'
+ yield ''
+
+ for name in CLASS_METHODS:
+ f = getattr(ie, name)
+ if not base or f.__func__ != getattr(base, name).__func__:
+ yield getsource(f)
+
+
+def build_ies(ies, bases, attr_base):
+ names = []
+ for ie in sort_ies(ies, bases):
+ yield build_lazy_ie(ie, ie.__name__, attr_base)
+ if ie in ies:
+ names.append(ie.__name__)
+
+ yield f'\n_ALL_CLASSES = [{", ".join(names)}]'
+
+
+def sort_ies(ies, ignored_bases):
+ """find the correct sorting and add the required base classes so that subclasses can be correctly created"""
+ classes, returned_classes = ies[:-1], set()
+ assert ies[-1].__name__ == 'GenericIE', 'Last IE must be GenericIE'
+ while classes:
+ for c in classes[:]:
+ bases = set(c.__bases__) - {object, *ignored_bases}
+ restart = False
+ for b in sorted(bases, key=lambda x: x.__name__):
+ if b not in classes and b not in returned_classes:
+ assert b.__name__ != 'GenericIE', 'Cannot inherit from GenericIE'
+ classes.insert(0, b)
+ restart = True
+ if restart:
+ break
+ if bases <= returned_classes:
+ yield c
+ returned_classes.add(c)
+ classes.remove(c)
+ break
+ yield ies[-1]
+
+
+def build_lazy_ie(ie, name, attr_base):
+ bases = ', '.join({
+ 'InfoExtractor': 'LazyLoadExtractor',
+ 'SearchInfoExtractor': 'LazyLoadSearchExtractor',
+ }.get(base.__name__, base.__name__) for base in ie.__bases__)
+
+ s = IE_TEMPLATE.format(name=name, module=ie.__module__, bases=bases)
+ return s + '\n'.join(extra_ie_code(ie, attr_base))
+
+
+if __name__ == '__main__':
+ main()
diff --git a/devscripts/make_readme.py b/devscripts/make_readme.py
new file mode 100755
index 0000000..2270b31
--- /dev/null
+++ b/devscripts/make_readme.py
@@ -0,0 +1,93 @@
+#!/usr/bin/env python3
+
+"""
+yt-dlp --help | make_readme.py
+This must be run in a console of correct width
+"""
+
+# Allow direct execution
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+import functools
+import re
+
+from devscripts.utils import read_file, write_file
+
+README_FILE = 'README.md'
+
+OPTIONS_START = 'General Options:'
+OPTIONS_END = 'CONFIGURATION'
+EPILOG_START = 'See full documentation'
+ALLOWED_OVERSHOOT = 2
+
+DISABLE_PATCH = object()
+
+
+def take_section(text, start=None, end=None, *, shift=0):
+ return text[
+ text.index(start) + shift if start else None:
+ text.index(end) + shift if end else None
+ ]
+
+
+def apply_patch(text, patch):
+ return text if patch[0] is DISABLE_PATCH else re.sub(*patch, text)
+
+
+options = take_section(sys.stdin.read(), f'\n {OPTIONS_START}', f'\n{EPILOG_START}', shift=1)
+
+max_width = max(map(len, options.split('\n')))
+switch_col_width = len(re.search(r'(?m)^\s{5,}', options).group())
+delim = f'\n{" " * switch_col_width}'
+
+PATCHES = (
+ ( # Standardize `--update` message
+ r'(?m)^( -U, --update\s+).+(\n \s.+)*$',
+ r'\1Update this program to the latest version',
+ ),
+ ( # Headings
+ r'(?m)^ (\w.+\n)( (?=\w))?',
+ r'## \1'
+ ),
+ ( # Fixup `--date` formatting
+ rf'(?m)( --date DATE.+({delim}[^\[]+)*)\[.+({delim}.+)*$',
+ (rf'\1[now|today|yesterday][-N[day|week|month|year]].{delim}'
+ f'E.g. "--date today-2weeks" downloads only{delim}'
+ 'videos uploaded on the same day two weeks ago'),
+ ),
+ ( # Do not split URLs
+ rf'({delim[:-1]})? (?P<label>\[\S+\] )?(?P<url>https?({delim})?:({delim})?/({delim})?/(({delim})?\S+)+)\s',
+ lambda mobj: ''.join((delim, mobj.group('label') or '', re.sub(r'\s+', '', mobj.group('url')), '\n'))
+ ),
+ ( # Do not split "words"
+ rf'(?m)({delim}\S+)+$',
+ lambda mobj: ''.join((delim, mobj.group(0).replace(delim, '')))
+ ),
+ ( # Allow overshooting last line
+ rf'(?m)^(?P<prev>.+)${delim}(?P<current>.+)$(?!{delim})',
+ lambda mobj: (mobj.group().replace(delim, ' ')
+ if len(mobj.group()) - len(delim) + 1 <= max_width + ALLOWED_OVERSHOOT
+ else mobj.group())
+ ),
+ ( # Avoid newline when a space is available b/w switch and description
+ DISABLE_PATCH, # This creates issues with prepare_manpage
+ r'(?m)^(\s{4}-.{%d})(%s)' % (switch_col_width - 6, delim),
+ r'\1 '
+ ),
+ ( # Replace brackets with a Markdown link
+ r'SponsorBlock API \((http.+)\)',
+ r'[SponsorBlock API](\1)'
+ ),
+)
+
+readme = read_file(README_FILE)
+
+write_file(README_FILE, ''.join((
+ take_section(readme, end=f'## {OPTIONS_START}'),
+ functools.reduce(apply_patch, PATCHES, options),
+ take_section(readme, f'# {OPTIONS_END}'),
+)))
diff --git a/devscripts/make_supportedsites.py b/devscripts/make_supportedsites.py
new file mode 100644
index 0000000..01548ef
--- /dev/null
+++ b/devscripts/make_supportedsites.py
@@ -0,0 +1,20 @@
+#!/usr/bin/env python3
+
+# Allow direct execution
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+from devscripts.utils import get_filename_args, write_file
+from yt_dlp.extractor import list_extractor_classes
+
+
+def main():
+ out = '\n'.join(ie.description() for ie in list_extractor_classes() if ie.IE_DESC is not False)
+ write_file(get_filename_args(), f'# Supported sites\n{out}\n')
+
+
+if __name__ == '__main__':
+ main()
diff --git a/devscripts/prepare_manpage.py b/devscripts/prepare_manpage.py
new file mode 100644
index 0000000..9b12e71
--- /dev/null
+++ b/devscripts/prepare_manpage.py
@@ -0,0 +1,97 @@
+#!/usr/bin/env python3
+
+# Allow direct execution
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+import os.path
+import re
+
+from devscripts.utils import (
+ compose_functions,
+ get_filename_args,
+ read_file,
+ write_file,
+)
+
+ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+README_FILE = os.path.join(ROOT_DIR, 'README.md')
+
+PREFIX = r'''%yt-dlp(1)
+
+# NAME
+
+yt\-dlp \- A youtube-dl fork with additional features and patches
+
+# SYNOPSIS
+
+**yt-dlp** \[OPTIONS\] URL [URL...]
+
+# DESCRIPTION
+
+'''
+
+
+def filter_excluded_sections(readme):
+ EXCLUDED_SECTION_BEGIN_STRING = re.escape('<!-- MANPAGE: BEGIN EXCLUDED SECTION -->')
+ EXCLUDED_SECTION_END_STRING = re.escape('<!-- MANPAGE: END EXCLUDED SECTION -->')
+ return re.sub(
+ rf'(?s){EXCLUDED_SECTION_BEGIN_STRING}.+?{EXCLUDED_SECTION_END_STRING}\n',
+ '', readme)
+
+
+def move_sections(readme):
+ MOVE_TAG_TEMPLATE = '<!-- MANPAGE: MOVE "%s" SECTION HERE -->'
+ sections = re.findall(r'(?m)^%s$' % (
+ re.escape(MOVE_TAG_TEMPLATE).replace(r'\%', '%') % '(.+)'), readme)
+
+ for section_name in sections:
+ move_tag = MOVE_TAG_TEMPLATE % section_name
+ if readme.count(move_tag) > 1:
+ raise Exception(f'There is more than one occurrence of "{move_tag}". This is unexpected')
+
+ sections = re.findall(rf'(?sm)(^# {re.escape(section_name)}.+?)(?=^# )', readme)
+ if len(sections) < 1:
+ raise Exception(f'The section {section_name} does not exist')
+ elif len(sections) > 1:
+ raise Exception(f'There are multiple occurrences of section {section_name}, this is unhandled')
+
+ readme = readme.replace(sections[0], '', 1).replace(move_tag, sections[0], 1)
+ return readme
+
+
+def filter_options(readme):
+ section = re.search(r'(?sm)^# USAGE AND OPTIONS\n.+?(?=^# )', readme).group(0)
+ options = '# OPTIONS\n'
+ for line in section.split('\n')[1:]:
+ mobj = re.fullmatch(r'''(?x)
+ \s{4}(?P<opt>-(?:,\s|[^\s])+)
+ (?:\s(?P<meta>(?:[^\s]|\s(?!\s))+))?
+ (\s{2,}(?P<desc>.+))?
+ ''', line)
+ if not mobj:
+ options += f'{line.lstrip()}\n'
+ continue
+ option, metavar, description = mobj.group('opt', 'meta', 'desc')
+
+ # Pandoc's definition_lists. See http://pandoc.org/README.html
+ option = f'{option} *{metavar}*' if metavar else option
+ description = f'{description}\n' if description else ''
+ options += f'\n{option}\n: {description}'
+ continue
+
+ return readme.replace(section, options, 1)
+
+
+TRANSFORM = compose_functions(filter_excluded_sections, move_sections, filter_options)
+
+
+def main():
+ write_file(get_filename_args(), PREFIX + TRANSFORM(read_file(README_FILE)))
+
+
+if __name__ == '__main__':
+ main()
diff --git a/devscripts/run_tests.bat b/devscripts/run_tests.bat
new file mode 100644
index 0000000..57b1f4b
--- /dev/null
+++ b/devscripts/run_tests.bat
@@ -0,0 +1,4 @@
+@echo off
+
+>&2 echo run_tests.bat is deprecated. Please use `devscripts/run_tests.py` instead
+python %~dp0run_tests.py %~1
diff --git a/devscripts/run_tests.py b/devscripts/run_tests.py
new file mode 100755
index 0000000..6d638a9
--- /dev/null
+++ b/devscripts/run_tests.py
@@ -0,0 +1,71 @@
+#!/usr/bin/env python3
+
+import argparse
+import functools
+import os
+import re
+import subprocess
+import sys
+from pathlib import Path
+
+
+fix_test_name = functools.partial(re.compile(r'IE(_all|_\d+)?$').sub, r'\1')
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(description='Run selected yt-dlp tests')
+ parser.add_argument(
+ 'test', help='a extractor tests, or one of "core" or "download"', nargs='*')
+ parser.add_argument(
+ '-k', help='run a test matching EXPRESSION. Same as "pytest -k"', metavar='EXPRESSION')
+ return parser.parse_args()
+
+
+def run_tests(*tests, pattern=None, ci=False):
+ run_core = 'core' in tests or (not pattern and not tests)
+ run_download = 'download' in tests
+ tests = list(map(fix_test_name, tests))
+
+ arguments = ['pytest', '-Werror', '--tb=short']
+ if ci:
+ arguments.append('--color=yes')
+ if run_core:
+ arguments.extend(['-m', 'not download'])
+ elif run_download:
+ arguments.extend(['-m', 'download'])
+ elif pattern:
+ arguments.extend(['-k', pattern])
+ else:
+ arguments.extend(
+ f'test/test_download.py::TestDownload::test_{test}' for test in tests)
+
+ print(f'Running {arguments}', flush=True)
+ try:
+ return subprocess.call(arguments)
+ except FileNotFoundError:
+ pass
+
+ arguments = [sys.executable, '-Werror', '-m', 'unittest']
+ if run_core:
+ print('"pytest" needs to be installed to run core tests', file=sys.stderr, flush=True)
+ return 1
+ elif run_download:
+ arguments.append('test.test_download')
+ elif pattern:
+ arguments.extend(['-k', pattern])
+ else:
+ arguments.extend(
+ f'test.test_download.TestDownload.test_{test}' for test in tests)
+
+ print(f'Running {arguments}', flush=True)
+ return subprocess.call(arguments)
+
+
+if __name__ == '__main__':
+ try:
+ args = parse_args()
+
+ os.chdir(Path(__file__).parent.parent)
+ sys.exit(run_tests(*args.test, pattern=args.k, ci=bool(os.getenv('CI'))))
+ except KeyboardInterrupt:
+ pass
diff --git a/devscripts/run_tests.sh b/devscripts/run_tests.sh
new file mode 100755
index 0000000..123ceb1
--- /dev/null
+++ b/devscripts/run_tests.sh
@@ -0,0 +1,4 @@
+#!/usr/bin/env sh
+
+>&2 echo 'run_tests.sh is deprecated. Please use `devscripts/run_tests.py` instead'
+python3 devscripts/run_tests.py "$1"
diff --git a/devscripts/set-variant.py b/devscripts/set-variant.py
new file mode 100644
index 0000000..10341e7
--- /dev/null
+++ b/devscripts/set-variant.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python3
+
+# Allow direct execution
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+import argparse
+import functools
+import re
+
+from devscripts.utils import compose_functions, read_file, write_file
+
+VERSION_FILE = 'yt_dlp/version.py'
+
+
+def parse_options():
+ parser = argparse.ArgumentParser(description='Set the build variant of the package')
+ parser.add_argument('variant', help='Name of the variant')
+ parser.add_argument('-M', '--update-message', default=None, help='Message to show in -U')
+ return parser.parse_args()
+
+
+def property_setter(name, value):
+ return functools.partial(re.sub, rf'(?m)^{name}\s*=\s*.+$', f'{name} = {value!r}')
+
+
+opts = parse_options()
+transform = compose_functions(
+ property_setter('VARIANT', opts.variant),
+ property_setter('UPDATE_HINT', opts.update_message)
+)
+
+write_file(VERSION_FILE, transform(read_file(VERSION_FILE)))
diff --git a/devscripts/tomlparse.py b/devscripts/tomlparse.py
new file mode 100755
index 0000000..85ac4ee
--- /dev/null
+++ b/devscripts/tomlparse.py
@@ -0,0 +1,189 @@
+#!/usr/bin/env python3
+
+"""
+Simple parser for spec compliant toml files
+
+A simple toml parser for files that comply with the spec.
+Should only be used to parse `pyproject.toml` for `install_deps.py`.
+
+IMPORTANT: INVALID FILES OR MULTILINE STRINGS ARE NOT SUPPORTED!
+"""
+
+from __future__ import annotations
+
+import datetime
+import json
+import re
+
+WS = r'(?:[\ \t]*)'
+STRING_RE = re.compile(r'"(?:\\.|[^\\"\n])*"|\'[^\'\n]*\'')
+SINGLE_KEY_RE = re.compile(rf'{STRING_RE.pattern}|[A-Za-z0-9_-]+')
+KEY_RE = re.compile(rf'{WS}(?:{SINGLE_KEY_RE.pattern}){WS}(?:\.{WS}(?:{SINGLE_KEY_RE.pattern}){WS})*')
+EQUALS_RE = re.compile(rf'={WS}')
+WS_RE = re.compile(WS)
+
+_SUBTABLE = rf'(?P<subtable>^\[(?P<is_list>\[)?(?P<path>{KEY_RE.pattern})\]\]?)'
+EXPRESSION_RE = re.compile(rf'^(?:{_SUBTABLE}|{KEY_RE.pattern}=)', re.MULTILINE)
+
+LIST_WS_RE = re.compile(rf'{WS}((#[^\n]*)?\n{WS})*')
+LEFTOVER_VALUE_RE = re.compile(r'[^,}\]\t\n#]+')
+
+
+def parse_key(value: str):
+ for match in SINGLE_KEY_RE.finditer(value):
+ if match[0][0] == '"':
+ yield json.loads(match[0])
+ elif match[0][0] == '\'':
+ yield match[0][1:-1]
+ else:
+ yield match[0]
+
+
+def get_target(root: dict, paths: list[str], is_list=False):
+ target = root
+
+ for index, key in enumerate(paths, 1):
+ use_list = is_list and index == len(paths)
+ result = target.get(key)
+ if result is None:
+ result = [] if use_list else {}
+ target[key] = result
+
+ if isinstance(result, dict):
+ target = result
+ elif use_list:
+ target = {}
+ result.append(target)
+ else:
+ target = result[-1]
+
+ assert isinstance(target, dict)
+ return target
+
+
+def parse_enclosed(data: str, index: int, end: str, ws_re: re.Pattern):
+ index += 1
+
+ if match := ws_re.match(data, index):
+ index = match.end()
+
+ while data[index] != end:
+ index = yield True, index
+
+ if match := ws_re.match(data, index):
+ index = match.end()
+
+ if data[index] == ',':
+ index += 1
+
+ if match := ws_re.match(data, index):
+ index = match.end()
+
+ assert data[index] == end
+ yield False, index + 1
+
+
+def parse_value(data: str, index: int):
+ if data[index] == '[':
+ result = []
+
+ indices = parse_enclosed(data, index, ']', LIST_WS_RE)
+ valid, index = next(indices)
+ while valid:
+ index, value = parse_value(data, index)
+ result.append(value)
+ valid, index = indices.send(index)
+
+ return index, result
+
+ if data[index] == '{':
+ result = {}
+
+ indices = parse_enclosed(data, index, '}', WS_RE)
+ valid, index = next(indices)
+ while valid:
+ valid, index = indices.send(parse_kv_pair(data, index, result))
+
+ return index, result
+
+ if match := STRING_RE.match(data, index):
+ return match.end(), json.loads(match[0]) if match[0][0] == '"' else match[0][1:-1]
+
+ match = LEFTOVER_VALUE_RE.match(data, index)
+ assert match
+ value = match[0].strip()
+ for func in [
+ int,
+ float,
+ datetime.time.fromisoformat,
+ datetime.date.fromisoformat,
+ datetime.datetime.fromisoformat,
+ {'true': True, 'false': False}.get,
+ ]:
+ try:
+ value = func(value)
+ break
+ except Exception:
+ pass
+
+ return match.end(), value
+
+
+def parse_kv_pair(data: str, index: int, target: dict):
+ match = KEY_RE.match(data, index)
+ if not match:
+ return None
+
+ *keys, key = parse_key(match[0])
+
+ match = EQUALS_RE.match(data, match.end())
+ assert match
+ index = match.end()
+
+ index, value = parse_value(data, index)
+ get_target(target, keys)[key] = value
+ return index
+
+
+def parse_toml(data: str):
+ root = {}
+ target = root
+
+ index = 0
+ while True:
+ match = EXPRESSION_RE.search(data, index)
+ if not match:
+ break
+
+ if match.group('subtable'):
+ index = match.end()
+ path, is_list = match.group('path', 'is_list')
+ target = get_target(root, list(parse_key(path)), bool(is_list))
+ continue
+
+ index = parse_kv_pair(data, match.start(), target)
+ assert index is not None
+
+ return root
+
+
+def main():
+ import argparse
+ from pathlib import Path
+
+ parser = argparse.ArgumentParser()
+ parser.add_argument('infile', type=Path, help='The TOML file to read as input')
+ args = parser.parse_args()
+
+ with args.infile.open('r', encoding='utf-8') as file:
+ data = file.read()
+
+ def default(obj):
+ if isinstance(obj, (datetime.date, datetime.time, datetime.datetime)):
+ return obj.isoformat()
+
+ print(json.dumps(parse_toml(data), default=default))
+
+
+if __name__ == '__main__':
+ main()
diff --git a/devscripts/update-version.py b/devscripts/update-version.py
new file mode 100644
index 0000000..da54a6a
--- /dev/null
+++ b/devscripts/update-version.py
@@ -0,0 +1,82 @@
+#!/usr/bin/env python3
+
+# Allow direct execution
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+import argparse
+import contextlib
+import sys
+from datetime import datetime, timezone
+
+from devscripts.utils import read_version, run_process, write_file
+
+
+def get_new_version(version, revision):
+ if not version:
+ version = datetime.now(timezone.utc).strftime('%Y.%m.%d')
+
+ if revision:
+ assert revision.isdecimal(), 'Revision must be a number'
+ else:
+ old_version = read_version().split('.')
+ if version.split('.') == old_version[:3]:
+ revision = str(int((old_version + [0])[3]) + 1)
+
+ return f'{version}.{revision}' if revision else version
+
+
+def get_git_head():
+ with contextlib.suppress(Exception):
+ return run_process('git', 'rev-parse', 'HEAD').stdout.strip()
+
+
+VERSION_TEMPLATE = '''\
+# Autogenerated by devscripts/update-version.py
+
+__version__ = {version!r}
+
+RELEASE_GIT_HEAD = {git_head!r}
+
+VARIANT = None
+
+UPDATE_HINT = None
+
+CHANNEL = {channel!r}
+
+ORIGIN = {origin!r}
+
+_pkg_version = {package_version!r}
+'''
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description='Update the version.py file')
+ parser.add_argument(
+ '-c', '--channel', default='stable',
+ help='Select update channel (default: %(default)s)')
+ parser.add_argument(
+ '-r', '--origin', default='local',
+ help='Select origin/repository (default: %(default)s)')
+ parser.add_argument(
+ '-s', '--suffix', default='',
+ help='Add an alphanumeric suffix to the package version, e.g. "dev"')
+ parser.add_argument(
+ '-o', '--output', default='yt_dlp/version.py',
+ help='The output file to write to (default: %(default)s)')
+ parser.add_argument(
+ 'version', nargs='?', default=None,
+ help='A version or revision to use instead of generating one')
+ args = parser.parse_args()
+
+ git_head = get_git_head()
+ version = (
+ args.version if args.version and '.' in args.version
+ else get_new_version(None, args.version))
+ write_file(args.output, VERSION_TEMPLATE.format(
+ version=version, git_head=git_head, channel=args.channel, origin=args.origin,
+ package_version=f'{version}{args.suffix}'))
+
+ print(f'version={version} ({args.channel}), head={git_head}')
diff --git a/devscripts/utils.py b/devscripts/utils.py
new file mode 100644
index 0000000..a952c9f
--- /dev/null
+++ b/devscripts/utils.py
@@ -0,0 +1,47 @@
+import argparse
+import functools
+import subprocess
+
+
+def read_file(fname):
+ with open(fname, encoding='utf-8') as f:
+ return f.read()
+
+
+def write_file(fname, content, mode='w'):
+ with open(fname, mode, encoding='utf-8') as f:
+ return f.write(content)
+
+
+def read_version(fname='yt_dlp/version.py', varname='__version__'):
+ """Get the version without importing the package"""
+ items = {}
+ exec(compile(read_file(fname), fname, 'exec'), items)
+ return items[varname]
+
+
+def get_filename_args(has_infile=False, default_outfile=None):
+ parser = argparse.ArgumentParser()
+ if has_infile:
+ parser.add_argument('infile', help='Input file')
+ kwargs = {'nargs': '?', 'default': default_outfile} if default_outfile else {}
+ parser.add_argument('outfile', **kwargs, help='Output file')
+
+ opts = parser.parse_args()
+ if has_infile:
+ return opts.infile, opts.outfile
+ return opts.outfile
+
+
+def compose_functions(*functions):
+ return lambda x: functools.reduce(lambda y, f: f(y), functions, x)
+
+
+def run_process(*args, **kwargs):
+ kwargs.setdefault('text', True)
+ kwargs.setdefault('check', True)
+ kwargs.setdefault('capture_output', True)
+ if kwargs['text']:
+ kwargs.setdefault('encoding', 'utf-8')
+ kwargs.setdefault('errors', 'replace')
+ return subprocess.run(args, **kwargs)
diff --git a/devscripts/zsh-completion.in b/devscripts/zsh-completion.in
new file mode 100644
index 0000000..9117d33
--- /dev/null
+++ b/devscripts/zsh-completion.in
@@ -0,0 +1,30 @@
+#compdef yt-dlp
+
+__yt_dlp() {
+ local curcontext="$curcontext" fileopts diropts cur prev
+ typeset -A opt_args
+ fileopts="{{fileopts}}"
+ diropts="{{diropts}}"
+ cur=$words[CURRENT]
+ case $cur in
+ :)
+ _arguments '*: :(::ytfavorites ::ytrecommended ::ytsubscriptions ::ytwatchlater ::ythistory)'
+ ;;
+ *)
+ prev=$words[CURRENT-1]
+ if [[ ${prev} =~ ${fileopts} ]]; then
+ _path_files
+ elif [[ ${prev} =~ ${diropts} ]]; then
+ _path_files -/
+ elif [[ ${prev} == "--remux-video" ]]; then
+ _arguments '*: :(mp4 mkv)'
+ elif [[ ${prev} == "--recode-video" ]]; then
+ _arguments '*: :(mp4 flv ogg webm mkv)'
+ else
+ _arguments '*: :({{flags}})'
+ fi
+ ;;
+ esac
+}
+
+__yt_dlp \ No newline at end of file
diff --git a/devscripts/zsh-completion.py b/devscripts/zsh-completion.py
new file mode 100755
index 0000000..267af5f
--- /dev/null
+++ b/devscripts/zsh-completion.py
@@ -0,0 +1,50 @@
+#!/usr/bin/env python3
+
+# Allow direct execution
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+import yt_dlp
+
+ZSH_COMPLETION_FILE = "completions/zsh/_yt-dlp"
+ZSH_COMPLETION_TEMPLATE = "devscripts/zsh-completion.in"
+
+
+def build_completion(opt_parser):
+ opts = [opt for group in opt_parser.option_groups
+ for opt in group.option_list]
+ opts_file = [opt for opt in opts if opt.metavar == "FILE"]
+ opts_dir = [opt for opt in opts if opt.metavar == "DIR"]
+
+ fileopts = []
+ for opt in opts_file:
+ if opt._short_opts:
+ fileopts.extend(opt._short_opts)
+ if opt._long_opts:
+ fileopts.extend(opt._long_opts)
+
+ diropts = []
+ for opt in opts_dir:
+ if opt._short_opts:
+ diropts.extend(opt._short_opts)
+ if opt._long_opts:
+ diropts.extend(opt._long_opts)
+
+ flags = [opt.get_opt_string() for opt in opts]
+
+ with open(ZSH_COMPLETION_TEMPLATE) as f:
+ template = f.read()
+
+ template = template.replace("{{fileopts}}", "|".join(fileopts))
+ template = template.replace("{{diropts}}", "|".join(diropts))
+ template = template.replace("{{flags}}", " ".join(flags))
+
+ with open(ZSH_COMPLETION_FILE, "w") as f:
+ f.write(template)
+
+
+parser = yt_dlp.parseOpts(ignore_config_files=True)[0]
+build_completion(parser)
diff --git a/public.key b/public.key
new file mode 100644
index 0000000..b3af31e
--- /dev/null
+++ b/public.key
@@ -0,0 +1,29 @@
+-----BEGIN PGP PUBLIC KEY BLOCK-----
+
+mQINBGP78C4BEAD0rF9zjGPAt0thlt5C1ebzccAVX7Nb1v+eqQjk+WEZdTETVCg3
+WAM5ngArlHdm/fZqzUgO+pAYrB60GKeg7ffUDf+S0XFKEZdeRLYeAaqqKhSibVal
+DjvOBOztu3W607HLETQAqA7wTPuIt2WqmpL60NIcyr27LxqmgdN3mNvZ2iLO+bP0
+nKR/C+PgE9H4ytywDa12zMx6PmZCnVOOOu6XZEFmdUxxdQ9fFDqd9LcBKY2LDOcS
+Yo1saY0YWiZWHtzVoZu1kOzjnS5Fjq/yBHJLImDH7pNxHm7s/PnaurpmQFtDFruk
+t+2lhDnpKUmGr/I/3IHqH/X+9nPoS4uiqQ5HpblB8BK+4WfpaiEg75LnvuOPfZIP
+KYyXa/0A7QojMwgOrD88ozT+VCkKkkJ+ijXZ7gHNjmcBaUdKK7fDIEOYI63Lyc6Q
+WkGQTigFffSUXWHDCO9aXNhP3ejqFWgGMtCUsrbkcJkWuWY7q5ARy/05HbSM3K4D
+U9eqtnxmiV1WQ8nXuI9JgJQRvh5PTkny5LtxqzcmqvWO9TjHBbrs14BPEO9fcXxK
+L/CFBbzXDSvvAgArdqqlMoncQ/yicTlfL6qzJ8EKFiqW14QMTdAn6SuuZTodXCTi
+InwoT7WjjuFPKKdvfH1GP4bnqdzTnzLxCSDIEtfyfPsIX+9GI7Jkk/zZjQARAQAB
+tDdTaW1vbiBTYXdpY2tpICh5dC1kbHAgc2lnbmluZyBrZXkpIDxjb250YWN0QGdy
+dWI0ay54eXo+iQJOBBMBCgA4FiEErAy75oSNaoc0ZK9OV89lkztadYEFAmP78C4C
+GwMFCwkIBwIGFQoJCAsCBBYCAwECHgECF4AACgkQV89lkztadYEVqQ//cW7TxhXg
+7Xbh2EZQzXml0egn6j8QaV9KzGragMiShrlvTO2zXfLXqyizrFP4AspgjSn/4NrI
+8mluom+Yi+qr7DXT4BjQqIM9y3AjwZPdywe912Lxcw52NNoPZCm24I9T7ySc8lmR
+FQvZC0w4H/VTNj/2lgJ1dwMflpwvNRiWa5YzcFGlCUeDIPskLx9++AJE+xwU3LYm
+jQQsPBqpHHiTBEJzMLl+rfd9Fg4N+QNzpFkTDW3EPerLuvJniSBBwZthqxeAtw4M
+UiAXh6JvCc2hJkKCoygRfM281MeolvmsGNyQm+axlB0vyldiPP6BnaRgZlx+l6MU
+cPqgHblb7RW5j9lfr6OYL7SceBIHNv0CFrt1OnkGo/tVMwcs8LH3Ae4a7UJlIceL
+V54aRxSsZU7w4iX+PB79BWkEsQzwKrUuJVOeL4UDwWajp75OFaUqbS/slDDVXvK5
+OIeuth3mA/adjdvgjPxhRQjA3l69rRWIJDrqBSHldmRsnX6cvXTDy8wSXZgy51lP
+m4IVLHnCy9m4SaGGoAsfTZS0cC9FgjUIyTyrq9M67wOMpUxnuB0aRZgJE1DsI23E
+qdvcSNVlO+39xM/KPWUEh6b83wMn88QeW+DCVGWACQq5N3YdPnAJa50617fGbY6I
+gXIoRHXkDqe23PZ/jURYCv0sjVtjPoVC+bg=
+=bJkn
+-----END PGP PUBLIC KEY BLOCK-----
diff --git a/pyinst.py b/pyinst.py
new file mode 100755
index 0000000..4a8ed2d
--- /dev/null
+++ b/pyinst.py
@@ -0,0 +1,17 @@
+#!/usr/bin/env python3
+
+# Allow execution from anywhere
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+
+import warnings
+
+from bundle.pyinstaller import main
+
+warnings.warn(DeprecationWarning('`pyinst.py` is deprecated and will be removed in a future version. '
+ 'Use `bundle.pyinstaller` instead'))
+
+if __name__ == '__main__':
+ main()
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..64504ff
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,120 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "yt-dlp"
+maintainers = [
+ {name = "pukkandan", email = "pukkandan.ytdlp@gmail.com"},
+ {name = "Grub4K", email = "contact@grub4k.xyz"},
+ {name = "bashonly", email = "bashonly@protonmail.com"},
+ {name = "coletdjnz", email = "coletdjnz@protonmail.com"},
+]
+description = "A youtube-dl fork with additional features and patches"
+readme = "README.md"
+requires-python = ">=3.8"
+keywords = [
+ "youtube-dl",
+ "video-downloader",
+ "youtube-downloader",
+ "sponsorblock",
+ "youtube-dlc",
+ "yt-dlp",
+]
+license = {file = "LICENSE"}
+classifiers = [
+ "Topic :: Multimedia :: Video",
+ "Development Status :: 5 - Production/Stable",
+ "Environment :: Console",
+ "Programming Language :: Python",
+ "Programming Language :: Python :: 3 :: Only",
+ "Programming Language :: Python :: 3.8",
+ "Programming Language :: Python :: 3.9",
+ "Programming Language :: Python :: 3.10",
+ "Programming Language :: Python :: 3.11",
+ "Programming Language :: Python :: 3.12",
+ "Programming Language :: Python :: Implementation",
+ "Programming Language :: Python :: Implementation :: CPython",
+ "Programming Language :: Python :: Implementation :: PyPy",
+ "License :: OSI Approved :: The Unlicense (Unlicense)",
+ "Operating System :: OS Independent",
+]
+dynamic = ["version"]
+dependencies = [
+ "brotli; implementation_name=='cpython'",
+ "brotlicffi; implementation_name!='cpython'",
+ "certifi",
+ "mutagen",
+ "pycryptodomex",
+ "requests>=2.31.0,<3",
+ "urllib3>=1.26.17,<3",
+ "websockets>=12.0",
+]
+
+[project.optional-dependencies]
+default = []
+secretstorage = [
+ "cffi",
+ "secretstorage",
+]
+build = [
+ "build",
+ "hatchling",
+ "pip",
+ "wheel",
+]
+dev = [
+ "flake8",
+ "isort",
+ "pytest",
+]
+pyinstaller = ["pyinstaller>=6.3"]
+py2exe = ["py2exe>=0.12"]
+
+[project.urls]
+Documentation = "https://github.com/yt-dlp/yt-dlp#readme"
+Repository = "https://github.com/yt-dlp/yt-dlp"
+Tracker = "https://github.com/yt-dlp/yt-dlp/issues"
+Funding = "https://github.com/yt-dlp/yt-dlp/blob/master/Collaborators.md#collaborators"
+
+[project.scripts]
+yt-dlp = "yt_dlp:main"
+
+[project.entry-points.pyinstaller40]
+hook-dirs = "yt_dlp.__pyinstaller:get_hook_dirs"
+
+[tool.hatch.build.targets.sdist]
+include = [
+ "/yt_dlp",
+ "/devscripts",
+ "/test",
+ "/.gitignore", # included by default, needed for auto-excludes
+ "/Changelog.md",
+ "/LICENSE", # included as license
+ "/pyproject.toml", # included by default
+ "/README.md", # included as readme
+ "/setup.cfg",
+ "/supportedsites.md",
+]
+artifacts = [
+ "/yt_dlp/extractor/lazy_extractors.py",
+ "/completions",
+ "/AUTHORS", # included by default
+ "/README.txt",
+ "/yt-dlp.1",
+]
+
+[tool.hatch.build.targets.wheel]
+packages = ["yt_dlp"]
+artifacts = ["/yt_dlp/extractor/lazy_extractors.py"]
+
+[tool.hatch.build.targets.wheel.shared-data]
+"completions/bash/yt-dlp" = "share/bash-completion/completions/yt-dlp"
+"completions/zsh/_yt-dlp" = "share/zsh/site-functions/_yt-dlp"
+"completions/fish/yt-dlp.fish" = "share/fish/vendor_completions.d/yt-dlp.fish"
+"README.txt" = "share/doc/yt_dlp/README.txt"
+"yt-dlp.1" = "share/man/man1/yt-dlp.1"
+
+[tool.hatch.version]
+path = "yt_dlp/version.py"
+pattern = "_pkg_version = '(?P<version>[^']+)'"
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..aeb4cee
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,45 @@
+[flake8]
+exclude = build,venv,.tox,.git,.pytest_cache
+ignore = E402,E501,E731,E741,W503
+max_line_length = 120
+per_file_ignores =
+ devscripts/lazy_load_template.py: F401
+
+
+[autoflake]
+ignore-init-module-imports = true
+ignore-pass-after-docstring = true
+remove-all-unused-imports = true
+remove-duplicate-keys = true
+remove-unused-variables = true
+
+
+[tool:pytest]
+addopts = -ra -v --strict-markers
+markers =
+ download
+
+
+[tox:tox]
+skipsdist = true
+envlist = py{38,39,310,311,312},pypy{38,39,310}
+skip_missing_interpreters = true
+
+[testenv] # tox
+deps =
+ pytest
+commands = pytest {posargs:"-m not download"}
+passenv = HOME # For test_compat_expanduser
+setenv =
+ # PYTHONWARNINGS = error # Catches PIP's warnings too
+
+
+[isort]
+py_version = 38
+multi_line_output = VERTICAL_HANGING_INDENT
+line_length = 80
+reverse_relative = true
+ensure_newline_before_comments = true
+include_trailing_comma = true
+known_first_party =
+ test
diff --git a/setup.py b/setup.py
new file mode 100755
index 0000000..8d1e6d1
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python3
+
+# Allow execution from anywhere
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+
+import warnings
+
+
+if sys.argv[1:2] == ['py2exe']:
+ warnings.warn(DeprecationWarning('`setup.py py2exe` is deprecated and will be removed in a future version. '
+ 'Use `bundle.py2exe` instead'))
+
+ import bundle.py2exe
+
+ bundle.py2exe.main()
+
+elif 'build_lazy_extractors' in sys.argv:
+ warnings.warn(DeprecationWarning('`setup.py build_lazy_extractors` is deprecated and will be removed in a future version. '
+ 'Use `devscripts.make_lazy_extractors` instead'))
+
+ import subprocess
+
+ os.chdir(sys.path[0])
+ print('running build_lazy_extractors')
+ subprocess.run([sys.executable, 'devscripts/make_lazy_extractors.py'])
+
+else:
+
+ print(
+ 'ERROR: Building by calling `setup.py` is deprecated. '
+ 'Use a build frontend like `build` instead. ',
+ 'Refer to https://build.pypa.io for more info', file=sys.stderr)
+ sys.exit(1)
diff --git a/supportedsites.md b/supportedsites.md
new file mode 100644
index 0000000..a4b2d57
--- /dev/null
+++ b/supportedsites.md
@@ -0,0 +1,1794 @@
+# Supported sites
+ - **17live**
+ - **17live:clip**
+ - **1News**: 1news.co.nz article videos
+ - **1tv**: Первый канал
+ - **20min**
+ - **23video**
+ - **247sports**: (**Currently broken**)
+ - **24tv.ua**
+ - **3qsdn**: 3Q SDN
+ - **3sat**
+ - **4tube**
+ - **56.com**
+ - **6play**
+ - **7plus**
+ - **8tracks**
+ - **91porn**
+ - **9c9media**
+ - **9gag**: 9GAG
+ - **9News**
+ - **9now.com.au**
+ - **abc.net.au**
+ - **abc.net.au:iview**
+ - **abc.net.au:​iview:showseries**
+ - **abcnews**
+ - **abcnews:video**
+ - **abcotvs**: ABC Owned Television Stations
+ - **abcotvs:clips**
+ - **AbemaTV**: [*abematv*](## "netrc machine")
+ - **AbemaTVTitle**: [*abematv*](## "netrc machine")
+ - **AcademicEarth:Course**
+ - **acast**
+ - **acast:channel**
+ - **AcFunBangumi**
+ - **AcFunVideo**
+ - **ADN**: [*animationdigitalnetwork*](## "netrc machine") Animation Digital Network
+ - **ADNSeason**: [*animationdigitalnetwork*](## "netrc machine") Animation Digital Network
+ - **AdobeConnect**
+ - **adobetv**
+ - **adobetv:channel**
+ - **adobetv:embed**
+ - **adobetv:show**
+ - **adobetv:video**
+ - **AdultSwim**
+ - **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault
+ - **aenetworks:collection**
+ - **aenetworks:show**
+ - **AeonCo**
+ - **afreecatv**: [*afreecatv*](## "netrc machine") afreecatv.com
+ - **afreecatv:live**: [*afreecatv*](## "netrc machine") afreecatv.com
+ - **afreecatv:user**
+ - **AirTV**
+ - **AitubeKZVideo**
+ - **AliExpressLive**
+ - **AlJazeera**
+ - **Allocine**
+ - **Allstar**
+ - **AllstarProfile**
+ - **AlphaPorno**
+ - **Alsace20TV**
+ - **Alsace20TVEmbed**
+ - **altcensored**
+ - **altcensored:channel**
+ - **Alura**: [*alura*](## "netrc machine")
+ - **AluraCourse**: [*aluracourse*](## "netrc machine")
+ - **AmadeusTV**
+ - **Amara**
+ - **AmazonMiniTV**
+ - **amazonminitv:season**: Amazon MiniTV Season, "minitv:season:" prefix
+ - **amazonminitv:series**: Amazon MiniTV Series, "minitv:series:" prefix
+ - **AmazonReviews**
+ - **AmazonStore**
+ - **AMCNetworks**
+ - **AmericasTestKitchen**
+ - **AmericasTestKitchenSeason**
+ - **AmHistoryChannel**
+ - **AnchorFMEpisode**
+ - **anderetijden**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
+ - **Angel**
+ - **AnimalPlanet**
+ - **ant1newsgr:article**: ant1news.gr articles
+ - **ant1newsgr:embed**: ant1news.gr embedded videos
+ - **antenna:watch**: antenna.gr and ant1news.gr videos
+ - **Anvato**
+ - **aol.com**: Yahoo screen and movies (**Currently broken**)
+ - **APA**
+ - **Aparat**
+ - **AppleConnect**
+ - **AppleDaily**: 臺灣蘋果日報
+ - **ApplePodcasts**
+ - **appletrailers**
+ - **appletrailers:section**
+ - **archive.org**: archive.org video and audio
+ - **ArcPublishing**
+ - **ARD**
+ - **ARDMediathek**
+ - **ARDMediathekCollection**
+ - **Arkena**
+ - **Art19**
+ - **Art19Show**
+ - **arte.sky.it**
+ - **ArteTV**
+ - **ArteTVCategory**
+ - **ArteTVEmbed**
+ - **ArteTVPlaylist**
+ - **asobichannel**: ASOBI CHANNEL
+ - **asobichannel:tag**: ASOBI CHANNEL
+ - **AtresPlayer**: [*atresplayer*](## "netrc machine")
+ - **AtScaleConfEvent**
+ - **ATVAt**
+ - **AudiMedia**
+ - **AudioBoom**
+ - **Audiodraft:custom**
+ - **Audiodraft:generic**
+ - **audiomack**
+ - **audiomack:album**
+ - **Audius**: Audius.co
+ - **audius:artist**: Audius.co profile/artist pages
+ - **audius:playlist**: Audius.co playlists
+ - **audius:track**: Audius track ID or API link. Prepend with "audius:"
+ - **AWAAN**
+ - **awaan:live**
+ - **awaan:season**
+ - **awaan:video**
+ - **axs.tv**
+ - **AZMedien**: AZ Medien videos
+ - **BaiduVideo**: 百度视频
+ - **BanBye**
+ - **BanByeChannel**
+ - **bandaichannel**
+ - **Bandcamp**
+ - **Bandcamp:album**
+ - **Bandcamp:user**
+ - **Bandcamp:weekly**
+ - **BannedVideo**
+ - **bbc**: [*bbc*](## "netrc machine") BBC
+ - **bbc.co.uk**: [*bbc*](## "netrc machine") BBC iPlayer
+ - **bbc.co.uk:article**: BBC articles
+ - **bbc.co.uk:​iplayer:episodes**
+ - **bbc.co.uk:​iplayer:group**
+ - **bbc.co.uk:playlist**
+ - **BBVTV**: [*bbvtv*](## "netrc machine")
+ - **BBVTVLive**: [*bbvtv*](## "netrc machine")
+ - **BBVTVRecordings**: [*bbvtv*](## "netrc machine")
+ - **BeatBumpPlaylist**
+ - **BeatBumpVideo**
+ - **Beatport**
+ - **Beeg**
+ - **BehindKink**: (**Currently broken**)
+ - **Bellator**
+ - **BellMedia**
+ - **BerufeTV**
+ - **Bet**: (**Currently broken**)
+ - **bfi:player**: (**Currently broken**)
+ - **bfmtv**
+ - **bfmtv:article**
+ - **bfmtv:live**
+ - **bibeltv:live**: BibelTV live program
+ - **bibeltv:series**: BibelTV series playlist
+ - **bibeltv:video**: BibelTV single video
+ - **Bigflix**
+ - **Bigo**
+ - **Bild**: Bild.de
+ - **BiliBili**
+ - **Bilibili category extractor**
+ - **BilibiliAudio**
+ - **BilibiliAudioAlbum**
+ - **BiliBiliBangumi**
+ - **BiliBiliBangumiMedia**
+ - **BiliBiliBangumiSeason**
+ - **BilibiliCheese**
+ - **BilibiliCheeseSeason**
+ - **BilibiliCollectionList**
+ - **BilibiliFavoritesList**
+ - **BiliBiliPlayer**
+ - **BilibiliPlaylist**
+ - **BiliBiliSearch**: Bilibili video search; "bilisearch:" prefix
+ - **BilibiliSeriesList**
+ - **BilibiliSpaceAudio**
+ - **BilibiliSpaceVideo**
+ - **BilibiliWatchlater**
+ - **BiliIntl**: [*biliintl*](## "netrc machine")
+ - **biliIntl:series**: [*biliintl*](## "netrc machine")
+ - **BiliLive**
+ - **BioBioChileTV**
+ - **Biography**
+ - **BitChute**
+ - **BitChuteChannel**
+ - **BlackboardCollaborate**
+ - **BleacherReport**: (**Currently broken**)
+ - **BleacherReportCMS**: (**Currently broken**)
+ - **blerp**
+ - **blogger.com**
+ - **Bloomberg**
+ - **BokeCC**
+ - **BongaCams**
+ - **Boosty**
+ - **BostonGlobe**
+ - **Box**
+ - **BoxCastVideo**
+ - **Bpb**: Bundeszentrale für politische Bildung
+ - **BR**: Bayerischer Rundfunk (**Currently broken**)
+ - **BrainPOP**: [*brainpop*](## "netrc machine")
+ - **BrainPOPELL**: [*brainpop*](## "netrc machine")
+ - **BrainPOPEsp**: [*brainpop*](## "netrc machine") BrainPOP Español
+ - **BrainPOPFr**: [*brainpop*](## "netrc machine") BrainPOP Français
+ - **BrainPOPIl**: [*brainpop*](## "netrc machine") BrainPOP Hebrew
+ - **BrainPOPJr**: [*brainpop*](## "netrc machine")
+ - **BravoTV**
+ - **BreitBart**
+ - **brightcove:legacy**
+ - **brightcove:new**
+ - **Brilliantpala:Classes**: [*brilliantpala*](## "netrc machine") VoD on classes.brilliantpala.org
+ - **Brilliantpala:Elearn**: [*brilliantpala*](## "netrc machine") VoD on elearn.brilliantpala.org
+ - **bt:article**: Bergens Tidende Articles
+ - **bt:vestlendingen**: Bergens Tidende - Vestlendingen
+ - **Bundesliga**
+ - **Bundestag**
+ - **BusinessInsider**
+ - **BuzzFeed**
+ - **BYUtv**: (**Currently broken**)
+ - **CableAV**
+ - **Callin**
+ - **Caltrans**
+ - **CAM4**
+ - **Camdemy**
+ - **CamdemyFolder**
+ - **CamFMEpisode**
+ - **CamFMShow**
+ - **CamModels**
+ - **Camsoda**
+ - **CamtasiaEmbed**
+ - **Canal1**
+ - **CanalAlpha**
+ - **canalc2.tv**
+ - **Canalplus**: mycanal.fr and piwiplus.fr
+ - **CaracolTvPlay**: [*caracoltv-play*](## "netrc machine")
+ - **CartoonNetwork**
+ - **cbc.ca**
+ - **cbc.ca:player**
+ - **cbc.ca:​player:playlist**
+ - **CBS**: (**Currently broken**)
+ - **CBSLocal**
+ - **CBSLocalArticle**
+ - **CBSLocalLive**
+ - **cbsnews**: CBS News
+ - **cbsnews:embed**
+ - **cbsnews:live**: CBS News Livestream
+ - **cbsnews:livevideo**: CBS News Live Videos
+ - **cbssports**: (**Currently broken**)
+ - **cbssports:embed**: (**Currently broken**)
+ - **CCMA**
+ - **CCTV**: 央视网
+ - **CDA**: [*cdapl*](## "netrc machine")
+ - **Cellebrite**
+ - **CeskaTelevize**
+ - **CGTN**
+ - **CharlieRose**
+ - **Chaturbate**
+ - **Chilloutzone**
+ - **chzzk:live**
+ - **chzzk:video**
+ - **cielotv.it**
+ - **Cinemax**: (**Currently broken**)
+ - **CinetecaMilano**
+ - **Cineverse**
+ - **CineverseDetails**
+ - **CiscoLiveSearch**
+ - **CiscoLiveSession**
+ - **ciscowebex**: Cisco Webex
+ - **CJSW**
+ - **Clipchamp**
+ - **Clippit**
+ - **ClipRs**: (**Currently broken**)
+ - **ClipYouEmbed**
+ - **CloserToTruth**: (**Currently broken**)
+ - **CloudflareStream**
+ - **CloudyCDN**
+ - **Clubic**: (**Currently broken**)
+ - **Clyp**
+ - **cmt.com**: (**Currently broken**)
+ - **CNBCVideo**
+ - **CNN**
+ - **CNNArticle**
+ - **CNNBlogs**
+ - **CNNIndonesia**
+ - **ComedyCentral**
+ - **ComedyCentralTV**
+ - **ConanClassic**
+ - **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
+ - **CONtv**
+ - **CookingChannel**
+ - **Corus**
+ - **Coub**
+ - **CozyTV**
+ - **cp24**
+ - **cpac**
+ - **cpac:playlist**
+ - **Cracked**
+ - **Crackle**
+ - **Craftsy**
+ - **CrooksAndLiars**
+ - **CrowdBunker**
+ - **CrowdBunkerChannel**
+ - **Crtvg**
+ - **crunchyroll**: [*crunchyroll*](## "netrc machine")
+ - **crunchyroll:artist**: [*crunchyroll*](## "netrc machine")
+ - **crunchyroll:music**: [*crunchyroll*](## "netrc machine")
+ - **crunchyroll:playlist**: [*crunchyroll*](## "netrc machine")
+ - **CSpan**: C-SPAN
+ - **CSpanCongress**
+ - **CtsNews**: 華視新聞
+ - **CTV**
+ - **CTVNews**
+ - **cu.ntv.co.jp**: Nippon Television Network
+ - **CultureUnplugged**
+ - **curiositystream**: [*curiositystream*](## "netrc machine")
+ - **curiositystream:collections**: [*curiositystream*](## "netrc machine")
+ - **curiositystream:series**: [*curiositystream*](## "netrc machine")
+ - **CWTV**
+ - **Cybrary**: [*cybrary*](## "netrc machine")
+ - **CybraryCourse**: [*cybrary*](## "netrc machine")
+ - **DacastPlaylist**
+ - **DacastVOD**
+ - **DagelijkseKost**: dagelijksekost.een.be
+ - **DailyMail**
+ - **dailymotion**: [*dailymotion*](## "netrc machine")
+ - **dailymotion:playlist**: [*dailymotion*](## "netrc machine")
+ - **dailymotion:search**: [*dailymotion*](## "netrc machine")
+ - **dailymotion:user**: [*dailymotion*](## "netrc machine")
+ - **DailyWire**
+ - **DailyWirePodcast**
+ - **damtomo:record**
+ - **damtomo:video**
+ - **daum.net**
+ - **daum.net:clip**
+ - **daum.net:playlist**
+ - **daum.net:user**
+ - **daystar:clip**
+ - **DBTV**
+ - **DctpTv**
+ - **DeezerAlbum**
+ - **DeezerPlaylist**
+ - **democracynow**
+ - **DestinationAmerica**
+ - **DetikEmbed**
+ - **DeuxM**
+ - **DeuxMNews**
+ - **DHM**: Filmarchiv - Deutsches Historisches Museum (**Currently broken**)
+ - **DigitalConcertHall**: [*digitalconcerthall*](## "netrc machine") DigitalConcertHall extractor
+ - **DigitallySpeaking**
+ - **Digiteka**
+ - **DiscogsReleasePlaylist**
+ - **Discovery**
+ - **DiscoveryLife**
+ - **DiscoveryNetworksDe**
+ - **DiscoveryPlus**
+ - **DiscoveryPlusIndia**
+ - **DiscoveryPlusIndiaShow**
+ - **DiscoveryPlusItaly**
+ - **DiscoveryPlusItalyShow**
+ - **Disney**
+ - **DIYNetwork**
+ - **dlf**
+ - **dlf:corpus**: DLF Multi-feed Archives
+ - **dlive:stream**
+ - **dlive:vod**
+ - **Douyin**
+ - **DouyuShow**
+ - **DouyuTV**: 斗鱼直播
+ - **DPlay**
+ - **DRBonanza**
+ - **Drooble**
+ - **Dropbox**
+ - **Dropout**: [*dropout*](## "netrc machine")
+ - **DropoutSeason**
+ - **DrTuber**
+ - **drtv**
+ - **drtv:live**
+ - **drtv:season**
+ - **drtv:series**
+ - **DTube**: (**Currently broken**)
+ - **duboku**: www.duboku.io
+ - **duboku:list**: www.duboku.io entire series
+ - **Dumpert**
+ - **Duoplay**
+ - **dvtv**: http://video.aktualne.cz/
+ - **dw**: (**Currently broken**)
+ - **dw:article**: (**Currently broken**)
+ - **EaglePlatform**
+ - **EbaumsWorld**
+ - **Ebay**
+ - **egghead:course**: egghead.io course
+ - **egghead:lesson**: egghead.io lesson
+ - **EinsUndEinsTV**: [*1und1tv*](## "netrc machine")
+ - **EinsUndEinsTVLive**: [*1und1tv*](## "netrc machine")
+ - **EinsUndEinsTVRecordings**: [*1und1tv*](## "netrc machine")
+ - **Einthusan**
+ - **eitb.tv**
+ - **ElementorEmbed**
+ - **Elonet**
+ - **ElPais**: El País
+ - **ElTreceTV**: El Trece TV (Argentina)
+ - **Embedly**
+ - **EMPFlix**
+ - **Epicon**
+ - **EpiconSeries**
+ - **EpidemicSound**
+ - **eplus**: [*eplus*](## "netrc machine") e+ (イープラス)
+ - **Epoch**
+ - **Eporner**
+ - **Erocast**
+ - **EroProfile**: [*eroprofile*](## "netrc machine")
+ - **EroProfile:album**
+ - **ERRJupiter**
+ - **ertflix**: ERTFLIX videos
+ - **ertflix:codename**: ERTFLIX videos by codename
+ - **ertwebtv:embed**: ert.gr webtv embedded videos
+ - **ESPN**
+ - **ESPNArticle**
+ - **ESPNCricInfo**
+ - **EttuTv**
+ - **Europa**: (**Currently broken**)
+ - **EuroParlWebstream**
+ - **EuropeanTour**
+ - **Eurosport**
+ - **EUScreen**
+ - **EWETV**: [*ewetv*](## "netrc machine")
+ - **EWETVLive**: [*ewetv*](## "netrc machine")
+ - **EWETVRecordings**: [*ewetv*](## "netrc machine")
+ - **Expressen**
+ - **EyedoTV**
+ - **facebook**: [*facebook*](## "netrc machine")
+ - **facebook:ads**
+ - **facebook:reel**
+ - **FacebookPluginsVideo**
+ - **fancode:live**: [*fancode*](## "netrc machine") (**Currently broken**)
+ - **fancode:vod**: [*fancode*](## "netrc machine") (**Currently broken**)
+ - **faz.net**
+ - **fc2**: [*fc2*](## "netrc machine")
+ - **fc2:embed**
+ - **fc2:live**
+ - **Fczenit**
+ - **Fifa**
+ - **filmon**
+ - **filmon:channel**
+ - **Filmweb**
+ - **FiveThirtyEight**
+ - **FiveTV**
+ - **FlexTV**
+ - **Flickr**
+ - **Floatplane**
+ - **FloatplaneChannel**
+ - **Folketinget**: Folketinget (ft.dk; Danish parliament)
+ - **FoodNetwork**
+ - **FootyRoom**
+ - **Formula1**
+ - **FOX**
+ - **FOX9**
+ - **FOX9News**
+ - **foxnews**: Fox News and Fox Business Video
+ - **foxnews:article**
+ - **FoxNewsVideo**
+ - **FoxSports**
+ - **fptplay**: fptplay.vn
+ - **FranceCulture**
+ - **FranceInter**
+ - **FranceTV**
+ - **francetvinfo.fr**
+ - **FranceTVSite**
+ - **Freesound**
+ - **freespeech.org**
+ - **freetv:series**
+ - **FreeTvMovies**
+ - **FrontendMasters**: [*frontendmasters*](## "netrc machine")
+ - **FrontendMastersCourse**: [*frontendmasters*](## "netrc machine")
+ - **FrontendMastersLesson**: [*frontendmasters*](## "netrc machine")
+ - **FujiTVFODPlus7**
+ - **Funimation**: [*funimation*](## "netrc machine")
+ - **funimation:page**: [*funimation*](## "netrc machine")
+ - **funimation:show**: [*funimation*](## "netrc machine")
+ - **Funk**
+ - **Funker530**
+ - **Fux**
+ - **FuyinTV**
+ - **Gab**
+ - **GabTV**
+ - **Gaia**: [*gaia*](## "netrc machine")
+ - **GameJolt**
+ - **GameJoltCommunity**
+ - **GameJoltGame**
+ - **GameJoltGameSoundtrack**
+ - **GameJoltSearch**
+ - **GameJoltUser**
+ - **GameSpot**
+ - **GameStar**
+ - **Gaskrank**
+ - **Gazeta**: (**Currently broken**)
+ - **GDCVault**: [*gdcvault*](## "netrc machine") (**Currently broken**)
+ - **GediDigital**
+ - **gem.cbc.ca**: [*cbcgem*](## "netrc machine")
+ - **gem.cbc.ca:live**
+ - **gem.cbc.ca:playlist**
+ - **Genius**
+ - **GeniusLyrics**
+ - **GetCourseRu**: [*getcourseru*](## "netrc machine")
+ - **GetCourseRuPlayer**
+ - **Gettr**
+ - **GettrStreaming**
+ - **GiantBomb**
+ - **GlattvisionTV**: [*glattvisiontv*](## "netrc machine")
+ - **GlattvisionTVLive**: [*glattvisiontv*](## "netrc machine")
+ - **GlattvisionTVRecordings**: [*glattvisiontv*](## "netrc machine")
+ - **Glide**: Glide mobile video messages (glide.me)
+ - **GlobalCyclingNetworkPlus**
+ - **GlobalPlayerAudio**
+ - **GlobalPlayerAudioEpisode**
+ - **GlobalPlayerLive**
+ - **GlobalPlayerLivePlaylist**
+ - **GlobalPlayerVideo**
+ - **Globo**: [*globo*](## "netrc machine")
+ - **GloboArticle**
+ - **glomex**: Glomex videos
+ - **glomex:embed**: Glomex embedded videos
+ - **GMANetworkVideo**
+ - **Go**
+ - **GoDiscovery**
+ - **GodTube**: (**Currently broken**)
+ - **Gofile**
+ - **Golem**
+ - **goodgame:stream**
+ - **google:podcasts**
+ - **google:​podcasts:feed**
+ - **GoogleDrive**
+ - **GoogleDrive:Folder**
+ - **GoPlay**: [*goplay*](## "netrc machine")
+ - **GoPro**
+ - **Goshgay**
+ - **GoToStage**
+ - **GPUTechConf**
+ - **Gronkh**
+ - **gronkh:feed**
+ - **gronkh:vods**
+ - **Groupon**
+ - **Harpodeon**
+ - **hbo**
+ - **HearThisAt**
+ - **Heise**
+ - **HellPorno**
+ - **hetklokhuis**
+ - **hgtv.com:show**
+ - **HGTVDe**
+ - **HGTVUsa**
+ - **HiDive**: [*hidive*](## "netrc machine")
+ - **HistoricFilms**
+ - **history:player**
+ - **history:topic**: History.com Topic
+ - **HitRecord**
+ - **hketv**: 香港教育局教育電視 (HKETV) Educational Television, Hong Kong Educational Bureau
+ - **HollywoodReporter**
+ - **HollywoodReporterPlaylist**
+ - **Holodex**
+ - **HotNewHipHop**: (**Currently broken**)
+ - **hotstar**
+ - **hotstar:playlist**
+ - **hotstar:season**
+ - **hotstar:series**
+ - **hrfernsehen**
+ - **HRTi**: [*hrti*](## "netrc machine")
+ - **HRTiPlaylist**: [*hrti*](## "netrc machine")
+ - **HSEProduct**
+ - **HSEShow**
+ - **html5**
+ - **Huajiao**: 花椒直播
+ - **HuffPost**: Huffington Post
+ - **Hungama**
+ - **HungamaAlbumPlaylist**
+ - **HungamaSong**
+ - **huya:live**: huya.com
+ - **Hypem**
+ - **Hytale**
+ - **Icareus**
+ - **IdolPlus**
+ - **iflix:episode**
+ - **IflixSeries**
+ - **ign.com**
+ - **IGNArticle**
+ - **IGNVideo**
+ - **iheartradio**
+ - **iheartradio:podcast**
+ - **IlPost**
+ - **Iltalehti**
+ - **imdb**: Internet Movie Database trailers
+ - **imdb:list**: Internet Movie Database lists
+ - **Imgur**
+ - **imgur:album**
+ - **imgur:gallery**
+ - **Ina**
+ - **Inc**
+ - **IndavideoEmbed**
+ - **InfoQ**
+ - **Instagram**: [*instagram*](## "netrc machine")
+ - **instagram:story**: [*instagram*](## "netrc machine")
+ - **instagram:tag**: [*instagram*](## "netrc machine") Instagram hashtag search URLs
+ - **instagram:user**: [*instagram*](## "netrc machine") Instagram user profile (**Currently broken**)
+ - **InstagramIOS**: IOS instagram:// URL
+ - **Internazionale**
+ - **InternetVideoArchive**
+ - **InvestigationDiscovery**
+ - **IPrima**: [*iprima*](## "netrc machine")
+ - **IPrimaCNN**
+ - **iq.com**: International version of iQiyi
+ - **iq.com:album**
+ - **iqiyi**: [*iqiyi*](## "netrc machine") 爱奇艺
+ - **IslamChannel**
+ - **IslamChannelSeries**
+ - **IsraelNationalNews**
+ - **ITProTV**
+ - **ITProTVCourse**
+ - **ITV**
+ - **ITVBTCC**
+ - **ivi**: ivi.ru
+ - **ivi:compilation**: ivi.ru compilations
+ - **ivideon**: Ivideon TV
+ - **IVXPlayer**
+ - **iwara**: [*iwara*](## "netrc machine")
+ - **iwara:playlist**: [*iwara*](## "netrc machine")
+ - **iwara:user**: [*iwara*](## "netrc machine")
+ - **Ixigua**
+ - **Izlesene**
+ - **Jable**
+ - **JablePlaylist**
+ - **Jamendo**
+ - **JamendoAlbum**
+ - **JeuxVideo**: (**Currently broken**)
+ - **JioSaavnAlbum**
+ - **JioSaavnSong**
+ - **Joj**
+ - **JoqrAg**: 超!A&G+ 文化放送 (f.k.a. AGQR) Nippon Cultural Broadcasting, Inc. (JOQR)
+ - **Jove**
+ - **JStream**
+ - **JTBC**: jtbc.co.kr
+ - **JTBC:program**
+ - **JWPlatform**
+ - **Kakao**
+ - **Kaltura**
+ - **KankaNews**: (**Currently broken**)
+ - **Karaoketv**
+ - **Katsomo**: (**Currently broken**)
+ - **KelbyOne**: (**Currently broken**)
+ - **Ketnet**
+ - **khanacademy**
+ - **khanacademy:unit**
+ - **Kick**
+ - **Kicker**
+ - **KickStarter**
+ - **KickVOD**
+ - **kinja:embed**
+ - **KinoPoisk**
+ - **Kommunetv**
+ - **KompasVideo**
+ - **Koo**: (**Currently broken**)
+ - **KrasView**: Красвью (**Currently broken**)
+ - **KTH**
+ - **Ku6**
+ - **KukuluLive**
+ - **kuwo:album**: 酷我音乐 - 专辑 (**Currently broken**)
+ - **kuwo:category**: 酷我音乐 - 分类 (**Currently broken**)
+ - **kuwo:chart**: 酷我音乐 - 排行榜 (**Currently broken**)
+ - **kuwo:mv**: 酷我音乐 - MV (**Currently broken**)
+ - **kuwo:singer**: 酷我音乐 - 歌手 (**Currently broken**)
+ - **kuwo:song**: 酷我音乐 (**Currently broken**)
+ - **la7.it**
+ - **la7.it:​pod:episode**
+ - **la7.it:podcast**
+ - **LastFM**
+ - **LastFMPlaylist**
+ - **LastFMUser**
+ - **LaXarxaMes**: [*laxarxames*](## "netrc machine")
+ - **lbry**
+ - **lbry:channel**
+ - **lbry:playlist**
+ - **LCI**
+ - **Lcp**
+ - **LcpPlay**
+ - **Le**: 乐视网
+ - **Lecture2Go**: (**Currently broken**)
+ - **Lecturio**: [*lecturio*](## "netrc machine")
+ - **LecturioCourse**: [*lecturio*](## "netrc machine")
+ - **LecturioDeCourse**: [*lecturio*](## "netrc machine")
+ - **LeFigaroVideoEmbed**
+ - **LeFigaroVideoSection**
+ - **LEGO**
+ - **Lemonde**
+ - **Lenta**: (**Currently broken**)
+ - **LePlaylist**
+ - **LetvCloud**: 乐视云
+ - **Libsyn**
+ - **life**: Life.ru
+ - **life:embed**
+ - **likee**
+ - **likee:user**
+ - **limelight**
+ - **limelight:channel**
+ - **limelight:channel_list**
+ - **LinkedIn**: [*linkedin*](## "netrc machine")
+ - **linkedin:learning**: [*linkedin*](## "netrc machine")
+ - **linkedin:​learning:course**: [*linkedin*](## "netrc machine")
+ - **Liputan6**
+ - **ListenNotes**
+ - **LiTV**
+ - **LiveJournal**
+ - **livestream**
+ - **livestream:original**
+ - **Livestreamfails**
+ - **Lnk**
+ - **LnkGo**
+ - **loc**: Library of Congress
+ - **LoveHomePorn**
+ - **LRTStream**
+ - **LRTVOD**
+ - **LSMLREmbed**
+ - **LSMLTVEmbed**
+ - **LSMReplay**
+ - **Lumni**
+ - **lynda**: [*lynda*](## "netrc machine") lynda.com videos
+ - **lynda:course**: [*lynda*](## "netrc machine") lynda.com online courses
+ - **maariv.co.il**
+ - **MagellanTV**
+ - **MagentaMusik**
+ - **mailru**: Видео@Mail.Ru
+ - **mailru:music**: Музыка@Mail.Ru
+ - **mailru:​music:search**: Музыка@Mail.Ru
+ - **MainStreaming**: MainStreaming Player
+ - **mangomolo:live**
+ - **mangomolo:video**
+ - **MangoTV**: 芒果TV
+ - **ManotoTV**: Manoto TV (Episode)
+ - **ManotoTVLive**: Manoto TV (Live)
+ - **ManotoTVShow**: Manoto TV (Show)
+ - **ManyVids**: (**Currently broken**)
+ - **MaoriTV**
+ - **Markiza**: (**Currently broken**)
+ - **MarkizaPage**: (**Currently broken**)
+ - **massengeschmack.tv**
+ - **Masters**
+ - **MatchTV**
+ - **MBN**: mbn.co.kr (매일방송)
+ - **MDR**: MDR.DE and KiKA
+ - **MedalTV**
+ - **media.ccc.de**
+ - **media.ccc.de:lists**
+ - **Mediaite**
+ - **MediaKlikk**
+ - **Medialaan**
+ - **Mediaset**
+ - **MediasetShow**
+ - **Mediasite**
+ - **MediasiteCatalog**
+ - **MediasiteNamedCatalog**
+ - **MediaStream**
+ - **MediaWorksNZVOD**
+ - **Medici**
+ - **megaphone.fm**: megaphone.fm embedded players
+ - **megatvcom**: megatv.com videos
+ - **megatvcom:embed**: megatv.com embedded videos
+ - **Meipai**: 美拍
+ - **MelonVOD**
+ - **Metacritic**
+ - **mewatch**
+ - **MicrosoftEmbed**
+ - **microsoftstream**: Microsoft Stream
+ - **mildom**: Record ongoing live by specific user in Mildom
+ - **mildom:clip**: Clip in Mildom
+ - **mildom:​user:vod**: Download all VODs from specific user in Mildom
+ - **mildom:vod**: VOD in Mildom
+ - **minds**
+ - **minds:channel**
+ - **minds:group**
+ - **Minoto**
+ - **mirrativ**
+ - **mirrativ:user**
+ - **MirrorCoUK**
+ - **MiTele**: mitele.es
+ - **mixch**
+ - **mixch:archive**
+ - **mixcloud**
+ - **mixcloud:playlist**
+ - **mixcloud:user**
+ - **MLB**
+ - **MLBArticle**
+ - **MLBTV**: [*mlb*](## "netrc machine")
+ - **MLBVideo**
+ - **MLSSoccer**
+ - **MNetTV**: [*mnettv*](## "netrc machine")
+ - **MNetTVLive**: [*mnettv*](## "netrc machine")
+ - **MNetTVRecordings**: [*mnettv*](## "netrc machine")
+ - **MochaVideo**
+ - **Mojvideo**
+ - **Monstercat**
+ - **MonsterSirenHypergryphMusic**
+ - **Motherless**
+ - **MotherlessGallery**
+ - **MotherlessGroup**
+ - **MotherlessUploader**
+ - **Motorsport**: motorsport.com (**Currently broken**)
+ - **MotorTrend**
+ - **MotorTrendOnDemand**
+ - **MovieFap**
+ - **Moviepilot**
+ - **MoviewPlay**
+ - **Moviezine**
+ - **MovingImage**
+ - **MSN**: (**Currently broken**)
+ - **mtg**: MTG services
+ - **mtv**
+ - **mtv.de**: (**Currently broken**)
+ - **mtv.it**
+ - **mtv.it:programma**
+ - **mtv:video**
+ - **mtvjapan**
+ - **mtvservices:embedded**
+ - **MTVUutisetArticle**: (**Currently broken**)
+ - **MuenchenTV**: münchen.tv (**Currently broken**)
+ - **MujRozhlas**
+ - **Murrtube**: (**Currently broken**)
+ - **MurrtubeUser**: Murrtube user profile (**Currently broken**)
+ - **MuseAI**
+ - **MuseScore**
+ - **MusicdexAlbum**
+ - **MusicdexArtist**
+ - **MusicdexPlaylist**
+ - **MusicdexSong**
+ - **mva**: Microsoft Virtual Academy videos
+ - **mva:course**: Microsoft Virtual Academy courses
+ - **Mx3**
+ - **Mx3Neo**
+ - **Mx3Volksmusik**
+ - **Mxplayer**
+ - **MxplayerShow**
+ - **MySpace**
+ - **MySpace:album**
+ - **MySpass**
+ - **MyVideoGe**
+ - **MyVidster**
+ - **Mzaalo**
+ - **n-tv.de**
+ - **N1Info:article**
+ - **N1InfoAsset**
+ - **Nate**
+ - **NateProgram**
+ - **natgeo:video**
+ - **NationalGeographicTV**
+ - **Naver**
+ - **Naver:live**
+ - **navernow**
+ - **nba**
+ - **nba:channel**
+ - **nba:embed**
+ - **nba:watch**
+ - **nba:​watch:collection**
+ - **nba:​watch:embed**
+ - **NBC**
+ - **NBCNews**
+ - **nbcolympics**
+ - **nbcolympics:stream**
+ - **NBCSports**
+ - **NBCSportsStream**
+ - **NBCSportsVPlayer**
+ - **NBCStations**
+ - **ndr**: NDR.de - Norddeutscher Rundfunk
+ - **ndr:embed**
+ - **ndr:​embed:base**
+ - **NDTV**: (**Currently broken**)
+ - **nebula:channel**: [*watchnebula*](## "netrc machine")
+ - **nebula:media**: [*watchnebula*](## "netrc machine")
+ - **nebula:subscriptions**: [*watchnebula*](## "netrc machine")
+ - **nebula:video**: [*watchnebula*](## "netrc machine")
+ - **NekoHacker**
+ - **NerdCubedFeed**
+ - **netease:album**: 网易云音乐 - 专辑
+ - **netease:djradio**: 网易云音乐 - 电台
+ - **netease:mv**: 网易云音乐 - MV
+ - **netease:playlist**: 网易云音乐 - 歌单
+ - **netease:program**: 网易云音乐 - 电台节目
+ - **netease:singer**: 网易云音乐 - 歌手
+ - **netease:song**: 网易云音乐
+ - **NetPlusTV**: [*netplus*](## "netrc machine")
+ - **NetPlusTVLive**: [*netplus*](## "netrc machine")
+ - **NetPlusTVRecordings**: [*netplus*](## "netrc machine")
+ - **Netverse**
+ - **NetversePlaylist**
+ - **NetverseSearch**: "netsearch:" prefix
+ - **Netzkino**: (**Currently broken**)
+ - **Newgrounds**: [*newgrounds*](## "netrc machine")
+ - **Newgrounds:playlist**
+ - **Newgrounds:user**
+ - **NewsPicks**
+ - **Newsy**
+ - **NextMedia**: 蘋果日報
+ - **NextMediaActionNews**: 蘋果日報 - 動新聞
+ - **NextTV**: 壹電視 (**Currently broken**)
+ - **Nexx**
+ - **NexxEmbed**
+ - **nfb**: nfb.ca and onf.ca films and episodes
+ - **nfb:series**: nfb.ca and onf.ca series
+ - **NFHSNetwork**
+ - **nfl.com**
+ - **nfl.com:article**
+ - **nfl.com:​plus:episode**
+ - **nfl.com:​plus:replay**
+ - **NhkForSchoolBangumi**
+ - **NhkForSchoolProgramList**
+ - **NhkForSchoolSubject**: Portal page for each school subjects, like Japanese (kokugo, 国語) or math (sansuu/suugaku or 算数・数学)
+ - **NhkRadioNewsPage**
+ - **NhkRadiru**: NHK らじる (Radiru/Rajiru)
+ - **NhkRadiruLive**
+ - **NhkVod**
+ - **NhkVodProgram**
+ - **nhl.com**
+ - **nick.com**
+ - **nick.de**
+ - **nickelodeon:br**
+ - **nickelodeonru**
+ - **niconico**: [*niconico*](## "netrc machine") ニコニコ動画
+ - **niconico:history**: NicoNico user history or likes. Requires cookies.
+ - **niconico:live**: ニコニコ生放送
+ - **niconico:playlist**
+ - **niconico:series**
+ - **niconico:tag**: NicoNico video tag URLs
+ - **NiconicoChannelPlus**: ニコニコチャンネルプラス
+ - **NiconicoChannelPlus:​channel:lives**: ニコニコチャンネルプラス - チャンネル - ライブリスト. nicochannel.jp/channel/lives
+ - **NiconicoChannelPlus:​channel:videos**: ニコニコチャンネルプラス - チャンネル - 動画リスト. nicochannel.jp/channel/videos
+ - **NiconicoUser**
+ - **nicovideo:search**: Nico video search; "nicosearch:" prefix
+ - **nicovideo:​search:date**: Nico video search, newest first; "nicosearchdate:" prefix
+ - **nicovideo:search_url**: Nico video search URLs
+ - **NinaProtocol**
+ - **Nintendo**
+ - **Nitter**
+ - **njoy**: N-JOY
+ - **njoy:embed**
+ - **NobelPrize**: (**Currently broken**)
+ - **NoicePodcast**
+ - **NonkTube**
+ - **NoodleMagazine**
+ - **Noovo**
+ - **NOSNLArticle**
+ - **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz
+ - **NovaEmbed**
+ - **NovaPlay**
+ - **nowness**
+ - **nowness:playlist**
+ - **nowness:series**
+ - **Noz**: (**Currently broken**)
+ - **npo**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
+ - **npo.nl:live**
+ - **npo.nl:radio**
+ - **npo.nl:​radio:fragment**
+ - **Npr**
+ - **NRK**
+ - **NRKPlaylist**
+ - **NRKRadioPodkast**
+ - **NRKSkole**: NRK Skole
+ - **NRKTV**: NRK TV and NRK Radio
+ - **NRKTVDirekte**: NRK TV Direkte and NRK Radio Direkte
+ - **NRKTVEpisode**
+ - **NRKTVEpisodes**
+ - **NRKTVSeason**
+ - **NRKTVSeries**
+ - **NRLTV**: (**Currently broken**)
+ - **ntv.ru**
+ - **NubilesPorn**: [*nubiles-porn*](## "netrc machine")
+ - **nuum:live**
+ - **nuum:media**
+ - **nuum:tab**
+ - **Nuvid**
+ - **NYTimes**
+ - **NYTimesArticle**
+ - **NYTimesCookingGuide**
+ - **NYTimesCookingRecipe**
+ - **nzherald**
+ - **NZOnScreen**
+ - **NZZ**
+ - **ocw.mit.edu**
+ - **Odnoklassniki**
+ - **OfTV**
+ - **OfTVPlaylist**
+ - **OktoberfestTV**
+ - **OlympicsReplay**
+ - **on24**: ON24
+ - **OnDemandChinaEpisode**
+ - **OnDemandKorea**
+ - **OnDemandKoreaProgram**
+ - **OneFootball**
+ - **OnePlacePodcast**
+ - **onet.pl**
+ - **onet.tv**
+ - **onet.tv:channel**
+ - **OnetMVP**
+ - **OnionStudios**
+ - **Opencast**
+ - **OpencastPlaylist**
+ - **openrec**
+ - **openrec:capture**
+ - **openrec:movie**
+ - **OraTV**
+ - **orf:​fm4:story**: fm4.orf.at stories
+ - **orf:iptv**: iptv.ORF.at
+ - **orf:on**
+ - **orf:podcast**
+ - **orf:radio**
+ - **orf:tvthek**: ORF TVthek
+ - **OsnatelTV**: [*osnateltv*](## "netrc machine")
+ - **OsnatelTVLive**: [*osnateltv*](## "netrc machine")
+ - **OsnatelTVRecordings**: [*osnateltv*](## "netrc machine")
+ - **OutsideTV**
+ - **OwnCloud**
+ - **PacktPub**: [*packtpub*](## "netrc machine")
+ - **PacktPubCourse**
+ - **PalcoMP3:artist**
+ - **PalcoMP3:song**
+ - **PalcoMP3:video**
+ - **Panopto**
+ - **PanoptoList**
+ - **PanoptoPlaylist**
+ - **ParamountNetwork**
+ - **ParamountPlus**
+ - **ParamountPlusSeries**
+ - **ParamountPressExpress**
+ - **Parler**: Posts on parler.com
+ - **parliamentlive.tv**: UK parliament videos
+ - **Parlview**: (**Currently broken**)
+ - **Patreon**
+ - **PatreonCampaign**
+ - **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC)
+ - **PBSKids**
+ - **PearVideo**
+ - **PeekVids**
+ - **peer.tv**
+ - **PeerTube**
+ - **PeerTube:Playlist**
+ - **peloton**: [*peloton*](## "netrc machine")
+ - **peloton:live**: Peloton Live
+ - **PerformGroup**
+ - **periscope**: Periscope
+ - **periscope:user**: Periscope user videos
+ - **PGATour**
+ - **PhilharmonieDeParis**: Philharmonie de Paris
+ - **phoenix.de**
+ - **Photobucket**
+ - **Piapro**: [*piapro*](## "netrc machine")
+ - **PIAULIZAPortal**: ulizaportal.jp - PIA LIVE STREAM
+ - **Picarto**
+ - **PicartoVod**
+ - **Piksel**
+ - **Pinkbike**
+ - **Pinterest**
+ - **PinterestCollection**
+ - **pixiv:sketch**
+ - **pixiv:​sketch:user**
+ - **Pladform**
+ - **PlanetMarathi**
+ - **Platzi**: [*platzi*](## "netrc machine")
+ - **PlatziCourse**: [*platzi*](## "netrc machine")
+ - **player.sky.it**
+ - **playeur**
+ - **PlayPlusTV**: [*playplustv*](## "netrc machine")
+ - **PlaySuisse**: [*playsuisse*](## "netrc machine")
+ - **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz
+ - **PlayVids**
+ - **Playwire**
+ - **pluralsight**: [*pluralsight*](## "netrc machine")
+ - **pluralsight:course**
+ - **PlutoTV**: (**Currently broken**)
+ - **PodbayFM**
+ - **PodbayFMChannel**
+ - **Podchaser**
+ - **podomatic**: (**Currently broken**)
+ - **Pokemon**
+ - **PokemonWatch**
+ - **PokerGo**: [*pokergo*](## "netrc machine")
+ - **PokerGoCollection**: [*pokergo*](## "netrc machine")
+ - **PolsatGo**
+ - **PolskieRadio**
+ - **polskieradio:audition**
+ - **polskieradio:category**
+ - **polskieradio:legacy**
+ - **polskieradio:player**
+ - **polskieradio:podcast**
+ - **polskieradio:​podcast:list**
+ - **Popcorntimes**
+ - **PopcornTV**
+ - **Pornbox**
+ - **PornerBros**
+ - **PornFlip**
+ - **PornHub**: [*pornhub*](## "netrc machine") PornHub and Thumbzilla
+ - **PornHubPagedVideoList**: [*pornhub*](## "netrc machine")
+ - **PornHubPlaylist**: [*pornhub*](## "netrc machine")
+ - **PornHubUser**: [*pornhub*](## "netrc machine")
+ - **PornHubUserVideosUpload**: [*pornhub*](## "netrc machine")
+ - **Pornotube**
+ - **PornoVoisines**: (**Currently broken**)
+ - **PornoXO**: (**Currently broken**)
+ - **PornTop**
+ - **PornTube**
+ - **Pr0gramm**
+ - **PrankCast**
+ - **PrankCastPost**
+ - **PremiershipRugby**
+ - **PressTV**
+ - **ProjectVeritas**: (**Currently broken**)
+ - **prosiebensat1**: ProSiebenSat.1 Digital
+ - **PRXAccount**
+ - **PRXSeries**
+ - **prxseries:search**: PRX Series Search; "prxseries:" prefix
+ - **prxstories:search**: PRX Stories Search; "prxstories:" prefix
+ - **PRXStory**
+ - **puhutv**
+ - **puhutv:serie**
+ - **Puls4**
+ - **Pyvideo**
+ - **QDance**: [*qdance*](## "netrc machine")
+ - **QingTing**
+ - **qqmusic**: QQ音乐
+ - **qqmusic:album**: QQ音乐 - 专辑
+ - **qqmusic:playlist**: QQ音乐 - 歌单
+ - **qqmusic:singer**: QQ音乐 - 歌手
+ - **qqmusic:toplist**: QQ音乐 - 排行榜
+ - **QuantumTV**: [*quantumtv*](## "netrc machine")
+ - **QuantumTVLive**: [*quantumtv*](## "netrc machine")
+ - **QuantumTVRecordings**: [*quantumtv*](## "netrc machine")
+ - **Qub**
+ - **R7**: (**Currently broken**)
+ - **R7Article**: (**Currently broken**)
+ - **Radiko**
+ - **RadikoRadio**
+ - **radio.de**: (**Currently broken**)
+ - **radiocanada**
+ - **radiocanada:audiovideo**
+ - **RadioComercial**
+ - **RadioComercialPlaylist**
+ - **radiofrance**
+ - **RadioFranceLive**
+ - **RadioFrancePodcast**
+ - **RadioFranceProfile**
+ - **RadioFranceProgramSchedule**
+ - **RadioJavan**: (**Currently broken**)
+ - **radiokapital**
+ - **radiokapital:show**
+ - **RadioZetPodcast**
+ - **radlive**
+ - **radlive:channel**
+ - **radlive:season**
+ - **Rai**
+ - **RaiCultura**
+ - **RaiNews**
+ - **RaiPlay**
+ - **RaiPlayLive**
+ - **RaiPlayPlaylist**
+ - **RaiPlaySound**
+ - **RaiPlaySoundLive**
+ - **RaiPlaySoundPlaylist**
+ - **RaiSudtirol**
+ - **RayWenderlich**
+ - **RayWenderlichCourse**
+ - **RbgTum**
+ - **RbgTumCourse**
+ - **RbgTumNewCourse**
+ - **RCS**
+ - **RCSEmbeds**
+ - **RCSVarious**
+ - **RCTIPlus**
+ - **RCTIPlusSeries**
+ - **RCTIPlusTV**
+ - **RDS**: RDS.ca (**Currently broken**)
+ - **RedBull**
+ - **RedBullEmbed**
+ - **RedBullTV**
+ - **RedBullTVRrnContent**
+ - **redcdnlivx**
+ - **Reddit**: [*reddit*](## "netrc machine")
+ - **RedGifs**
+ - **RedGifsSearch**: Redgifs search
+ - **RedGifsUser**: Redgifs user
+ - **RedTube**
+ - **RENTV**: (**Currently broken**)
+ - **RENTVArticle**: (**Currently broken**)
+ - **Restudy**: (**Currently broken**)
+ - **Reuters**: (**Currently broken**)
+ - **ReverbNation**
+ - **RheinMainTV**
+ - **RideHome**
+ - **RinseFM**
+ - **RinseFMArtistPlaylist**
+ - **RMCDecouverte**
+ - **RockstarGames**: (**Currently broken**)
+ - **Rokfin**: [*rokfin*](## "netrc machine")
+ - **rokfin:channel**: Rokfin Channels
+ - **rokfin:search**: Rokfin Search; "rkfnsearch:" prefix
+ - **rokfin:stack**: Rokfin Stacks
+ - **RoosterTeeth**: [*roosterteeth*](## "netrc machine")
+ - **RoosterTeethSeries**: [*roosterteeth*](## "netrc machine")
+ - **RottenTomatoes**
+ - **Rozhlas**
+ - **RozhlasVltava**
+ - **RTBF**: [*rtbf*](## "netrc machine") (**Currently broken**)
+ - **RTDocumentry**
+ - **RTDocumentryPlaylist**
+ - **rte**: Raidió Teilifís Éireann TV
+ - **rte:radio**: Raidió Teilifís Éireann radio
+ - **rtl.lu:article**
+ - **rtl.lu:tele-vod**
+ - **rtl.nl**: rtl.nl and rtlxl.nl
+ - **rtl2**
+ - **RTLLuLive**
+ - **RTLLuRadio**
+ - **RTNews**
+ - **RTP**
+ - **RTRFM**
+ - **RTS**: RTS.ch (**Currently broken**)
+ - **RTVCKaltura**
+ - **RTVCPlay**
+ - **RTVCPlayEmbed**
+ - **rtve.es:alacarta**: RTVE a la carta
+ - **rtve.es:audio**: RTVE audio
+ - **rtve.es:infantil**: RTVE infantil
+ - **rtve.es:live**: RTVE.es live streams
+ - **rtve.es:television**
+ - **RTVS**
+ - **rtvslo.si**
+ - **RudoVideo**
+ - **Rule34Video**
+ - **Rumble**
+ - **RumbleChannel**
+ - **RumbleEmbed**
+ - **Ruptly**
+ - **rutube**: Rutube videos
+ - **rutube:channel**: Rutube channel
+ - **rutube:embed**: Rutube embedded videos
+ - **rutube:movie**: Rutube movies
+ - **rutube:person**: Rutube person videos
+ - **rutube:playlist**: Rutube playlists
+ - **rutube:tags**: Rutube tags
+ - **RUTV**: RUTV.RU
+ - **Ruutu**
+ - **Ruv**
+ - **ruv.is:spila**
+ - **S4C**
+ - **S4CSeries**
+ - **safari**: [*safari*](## "netrc machine") safaribooksonline.com online video
+ - **safari:api**: [*safari*](## "netrc machine")
+ - **safari:course**: [*safari*](## "netrc machine") safaribooksonline.com online courses
+ - **Saitosan**: (**Currently broken**)
+ - **SAKTV**: [*saktv*](## "netrc machine")
+ - **SAKTVLive**: [*saktv*](## "netrc machine")
+ - **SAKTVRecordings**: [*saktv*](## "netrc machine")
+ - **SaltTV**: [*salttv*](## "netrc machine")
+ - **SaltTVLive**: [*salttv*](## "netrc machine")
+ - **SaltTVRecordings**: [*salttv*](## "netrc machine")
+ - **SampleFocus**
+ - **Sangiin**: 参議院インターネット審議中継 (archive)
+ - **Sapo**: SAPO Vídeos
+ - **SBS**: sbs.com.au
+ - **sbs.co.kr**
+ - **sbs.co.kr:allvod_program**
+ - **sbs.co.kr:programs_vod**
+ - **schooltv**
+ - **ScienceChannel**
+ - **screen.yahoo:search**: Yahoo screen search; "yvsearch:" prefix
+ - **Screen9**
+ - **Screencast**
+ - **Screencastify**
+ - **ScreencastOMatic**
+ - **ScrippsNetworks**
+ - **scrippsnetworks:watch**
+ - **Scrolller**
+ - **SCTE**: [*scte*](## "netrc machine") (**Currently broken**)
+ - **SCTECourse**: [*scte*](## "netrc machine") (**Currently broken**)
+ - **sejm**
+ - **SenalColombiaLive**: (**Currently broken**)
+ - **SenateGov**
+ - **SenateISVP**
+ - **SendtoNews**: (**Currently broken**)
+ - **Servus**
+ - **Sexu**: (**Currently broken**)
+ - **SeznamZpravy**
+ - **SeznamZpravyArticle**
+ - **Shahid**: [*shahid*](## "netrc machine")
+ - **ShahidShow**
+ - **ShareVideosEmbed**
+ - **ShemarooMe**
+ - **ShowRoomLive**
+ - **ShugiinItvLive**: 衆議院インターネット審議中継
+ - **ShugiinItvLiveRoom**: 衆議院インターネット審議中継 (中継)
+ - **ShugiinItvVod**: 衆議院インターネット審議中継 (ビデオライブラリ)
+ - **SibnetEmbed**
+ - **simplecast**
+ - **simplecast:episode**
+ - **simplecast:podcast**
+ - **Sina**
+ - **Skeb**
+ - **sky.it**
+ - **sky:news**
+ - **sky:​news:story**
+ - **sky:sports**
+ - **sky:​sports:news**
+ - **SkylineWebcams**: (**Currently broken**)
+ - **skynewsarabia:article**: (**Currently broken**)
+ - **skynewsarabia:video**: (**Currently broken**)
+ - **SkyNewsAU**
+ - **Slideshare**
+ - **SlidesLive**
+ - **Slutload**
+ - **Smotrim**
+ - **Snotr**
+ - **Sohu**
+ - **SohuV**
+ - **SonyLIV**: [*sonyliv*](## "netrc machine")
+ - **SonyLIVSeries**
+ - **soundcloud**: [*soundcloud*](## "netrc machine")
+ - **soundcloud:playlist**: [*soundcloud*](## "netrc machine")
+ - **soundcloud:related**: [*soundcloud*](## "netrc machine")
+ - **soundcloud:search**: [*soundcloud*](## "netrc machine") Soundcloud search; "scsearch:" prefix
+ - **soundcloud:set**: [*soundcloud*](## "netrc machine")
+ - **soundcloud:trackstation**: [*soundcloud*](## "netrc machine")
+ - **soundcloud:user**: [*soundcloud*](## "netrc machine")
+ - **soundcloud:​user:permalink**: [*soundcloud*](## "netrc machine")
+ - **SoundcloudEmbed**
+ - **soundgasm**
+ - **soundgasm:profile**
+ - **southpark.cc.com**
+ - **southpark.cc.com:español**
+ - **southpark.de**
+ - **southpark.lat**
+ - **southpark.nl**
+ - **southparkstudios.dk**
+ - **SovietsCloset**
+ - **SovietsClosetPlaylist**
+ - **SpankBang**
+ - **SpankBangPlaylist**
+ - **Spiegel**
+ - **Sport5**
+ - **SportBox**
+ - **SportDeutschland**
+ - **spotify**: Spotify episodes (**Currently broken**)
+ - **spotify:show**: Spotify shows (**Currently broken**)
+ - **Spreaker**
+ - **SpreakerPage**
+ - **SpreakerShow**
+ - **SpreakerShowPage**
+ - **SpringboardPlatform**
+ - **Sprout**
+ - **sr:mediathek**: Saarländischer Rundfunk (**Currently broken**)
+ - **SRGSSR**
+ - **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites
+ - **StacommuLive**: [*stacommu*](## "netrc machine")
+ - **StacommuVOD**: [*stacommu*](## "netrc machine")
+ - **StagePlusVODConcert**: [*stageplus*](## "netrc machine")
+ - **stanfordoc**: Stanford Open ClassRoom
+ - **StarTrek**: (**Currently broken**)
+ - **startv**
+ - **Steam**
+ - **SteamCommunityBroadcast**
+ - **Stitcher**
+ - **StitcherShow**
+ - **StoryFire**
+ - **StoryFireSeries**
+ - **StoryFireUser**
+ - **Streamable**
+ - **StreamCZ**
+ - **StreetVoice**
+ - **StretchInternet**
+ - **Stripchat**
+ - **stv:player**
+ - **Substack**
+ - **SunPorno**
+ - **sverigesradio:episode**
+ - **sverigesradio:publication**
+ - **SVT**
+ - **SVTPage**
+ - **SVTPlay**: SVT Play and Öppet arkiv
+ - **SVTSeries**
+ - **SwearnetEpisode**
+ - **Syfy**: (**Currently broken**)
+ - **SYVDK**
+ - **SztvHu**
+ - **t-online.de**: (**Currently broken**)
+ - **Tagesschau**: (**Currently broken**)
+ - **Tass**: (**Currently broken**)
+ - **TBS**
+ - **TBSJPEpisode**
+ - **TBSJPPlaylist**
+ - **TBSJPProgram**
+ - **Teachable**: [*teachable*](## "netrc machine") (**Currently broken**)
+ - **TeachableCourse**: [*teachable*](## "netrc machine")
+ - **teachertube**: teachertube.com videos (**Currently broken**)
+ - **teachertube:​user:collection**: teachertube.com user and collection videos (**Currently broken**)
+ - **TeachingChannel**: (**Currently broken**)
+ - **Teamcoco**
+ - **TeamTreeHouse**: [*teamtreehouse*](## "netrc machine")
+ - **techtv.mit.edu**
+ - **TedEmbed**
+ - **TedPlaylist**
+ - **TedSeries**
+ - **TedTalk**
+ - **Tele13**
+ - **Tele5**: (**Currently broken**)
+ - **TeleBruxelles**
+ - **TelecaribePlay**
+ - **Telecinco**: telecinco.es, cuatro.com and mediaset.es
+ - **Telegraaf**
+ - **telegram:embed**
+ - **TeleMB**: (**Currently broken**)
+ - **Telemundo**: (**Currently broken**)
+ - **TeleQuebec**
+ - **TeleQuebecEmission**
+ - **TeleQuebecLive**
+ - **TeleQuebecSquat**
+ - **TeleQuebecVideo**
+ - **TeleTask**: (**Currently broken**)
+ - **Telewebion**
+ - **Tempo**
+ - **TennisTV**: [*tennistv*](## "netrc machine")
+ - **TenPlay**: [*10play*](## "netrc machine")
+ - **TenPlaySeason**
+ - **TF1**
+ - **TFO**
+ - **theatercomplextown:ppv**: [*theatercomplextown*](## "netrc machine")
+ - **theatercomplextown:vod**: [*theatercomplextown*](## "netrc machine")
+ - **TheGuardianPodcast**
+ - **TheGuardianPodcastPlaylist**
+ - **TheHoleTv**
+ - **TheIntercept**
+ - **ThePlatform**
+ - **ThePlatformFeed**
+ - **TheStar**
+ - **TheSun**
+ - **TheWeatherChannel**
+ - **ThisAmericanLife**
+ - **ThisOldHouse**: [*thisoldhouse*](## "netrc machine")
+ - **ThisVid**
+ - **ThisVidMember**
+ - **ThisVidPlaylist**
+ - **ThreeSpeak**
+ - **ThreeSpeakUser**
+ - **TikTok**
+ - **tiktok:effect**: (**Currently broken**)
+ - **tiktok:live**
+ - **tiktok:sound**: (**Currently broken**)
+ - **tiktok:tag**: (**Currently broken**)
+ - **tiktok:user**: (**Currently broken**)
+ - **TLC**
+ - **TMZ**
+ - **TNAFlix**
+ - **TNAFlixNetworkEmbed**
+ - **toggle**
+ - **toggo**
+ - **tokfm:audition**
+ - **tokfm:podcast**
+ - **ToonGoggles**
+ - **tou.tv**: [*toutv*](## "netrc machine")
+ - **Toypics**: Toypics video (**Currently broken**)
+ - **ToypicsUser**: Toypics user profile (**Currently broken**)
+ - **TrailerAddict**: (**Currently broken**)
+ - **TravelChannel**
+ - **Triller**: [*triller*](## "netrc machine")
+ - **TrillerShort**
+ - **TrillerUser**: [*triller*](## "netrc machine")
+ - **Trovo**
+ - **TrovoChannelClip**: All Clips of a trovo.live channel; "trovoclip:" prefix
+ - **TrovoChannelVod**: All VODs of a trovo.live channel; "trovovod:" prefix
+ - **TrovoVod**
+ - **TrtCocukVideo**
+ - **TrtWorld**
+ - **TrueID**
+ - **TruNews**
+ - **Truth**
+ - **TruTV**
+ - **Tube8**: (**Currently broken**)
+ - **TubeTuGraz**: [*tubetugraz*](## "netrc machine") tube.tugraz.at
+ - **TubeTuGrazSeries**: [*tubetugraz*](## "netrc machine")
+ - **TubiTv**: [*tubitv*](## "netrc machine")
+ - **TubiTvShow**
+ - **Tumblr**: [*tumblr*](## "netrc machine")
+ - **TuneInPodcast**
+ - **TuneInPodcastEpisode**
+ - **TuneInStation**
+ - **tv.dfb.de**
+ - **TV2**
+ - **TV2Article**
+ - **TV2DK**
+ - **TV2DKBornholmPlay**
+ - **tv2play.hu**
+ - **tv2playseries.hu**
+ - **TV4**: tv4.se and tv4play.se
+ - **TV5MondePlus**: TV5MONDE+
+ - **tv5unis**
+ - **tv5unis:video**
+ - **tv8.it**
+ - **TVA**
+ - **TVANouvelles**
+ - **TVANouvellesArticle**
+ - **TVC**
+ - **TVCArticle**
+ - **TVer**
+ - **tvigle**: Интернет-телевидение Tvigle.ru
+ - **TVIPlayer**
+ - **tvland.com**
+ - **TVN24**: (**Currently broken**)
+ - **TVNoe**: (**Currently broken**)
+ - **tvopengr:embed**: tvopen.gr embedded videos
+ - **tvopengr:watch**: tvopen.gr (and ethnos.gr) videos
+ - **tvp**: Telewizja Polska
+ - **tvp:embed**: Telewizja Polska
+ - **tvp:stream**
+ - **tvp:vod**
+ - **tvp:​vod:series**
+ - **TVPlayer**
+ - **TVPlayHome**
+ - **Tweakers**
+ - **TwitCasting**
+ - **TwitCastingLive**
+ - **TwitCastingUser**
+ - **twitch:clips**: [*twitch*](## "netrc machine")
+ - **twitch:stream**: [*twitch*](## "netrc machine")
+ - **twitch:vod**: [*twitch*](## "netrc machine")
+ - **TwitchCollection**: [*twitch*](## "netrc machine")
+ - **TwitchVideos**: [*twitch*](## "netrc machine")
+ - **TwitchVideosClips**: [*twitch*](## "netrc machine")
+ - **TwitchVideosCollections**: [*twitch*](## "netrc machine")
+ - **twitter**: [*twitter*](## "netrc machine")
+ - **twitter:amplify**: [*twitter*](## "netrc machine")
+ - **twitter:broadcast**: [*twitter*](## "netrc machine")
+ - **twitter:card**
+ - **twitter:shortener**: [*twitter*](## "netrc machine")
+ - **twitter:spaces**: [*twitter*](## "netrc machine")
+ - **Txxx**
+ - **udemy**: [*udemy*](## "netrc machine")
+ - **udemy:course**: [*udemy*](## "netrc machine")
+ - **UDNEmbed**: 聯合影音
+ - **UFCArabia**: [*ufcarabia*](## "netrc machine")
+ - **UFCTV**: [*ufctv*](## "netrc machine")
+ - **ukcolumn**: (**Currently broken**)
+ - **UKTVPlay**
+ - **umg:de**: Universal Music Deutschland (**Currently broken**)
+ - **Unistra**
+ - **Unity**: (**Currently broken**)
+ - **uol.com.br**
+ - **uplynk**
+ - **uplynk:preplay**
+ - **Urort**: NRK P3 Urørt (**Currently broken**)
+ - **URPlay**
+ - **USANetwork**
+ - **USAToday**
+ - **ustream**
+ - **ustream:channel**
+ - **ustudio**
+ - **ustudio:embed**
+ - **Varzesh3**: (**Currently broken**)
+ - **Vbox7**
+ - **Veo**
+ - **Veoh**
+ - **veoh:user**
+ - **Vesti**: Вести.Ru (**Currently broken**)
+ - **Vevo**
+ - **VevoPlaylist**
+ - **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet
+ - **vh1.com**
+ - **vhx:embed**: [*vimeo*](## "netrc machine")
+ - **vice**
+ - **vice:article**
+ - **vice:show**
+ - **Viddler**
+ - **Videa**
+ - **video.arnes.si**: Arnes Video
+ - **video.google:search**: Google Video search; "gvsearch:" prefix
+ - **video.sky.it**
+ - **video.sky.it:live**
+ - **VideoDetective**
+ - **videofy.me**: (**Currently broken**)
+ - **VideoKen**
+ - **VideoKenCategory**
+ - **VideoKenPlayer**
+ - **VideoKenPlaylist**
+ - **VideoKenTopic**
+ - **videomore**
+ - **videomore:season**
+ - **videomore:video**
+ - **VideoPress**
+ - **Vidio**: [*vidio*](## "netrc machine")
+ - **VidioLive**: [*vidio*](## "netrc machine")
+ - **VidioPremier**: [*vidio*](## "netrc machine")
+ - **VidLii**
+ - **Vidly**
+ - **viewlift**
+ - **viewlift:embed**
+ - **Viidea**
+ - **viki**: [*viki*](## "netrc machine")
+ - **viki:channel**: [*viki*](## "netrc machine")
+ - **vimeo**: [*vimeo*](## "netrc machine")
+ - **vimeo:album**: [*vimeo*](## "netrc machine")
+ - **vimeo:channel**: [*vimeo*](## "netrc machine")
+ - **vimeo:group**: [*vimeo*](## "netrc machine")
+ - **vimeo:likes**: [*vimeo*](## "netrc machine") Vimeo user likes
+ - **vimeo:ondemand**: [*vimeo*](## "netrc machine")
+ - **vimeo:pro**: [*vimeo*](## "netrc machine")
+ - **vimeo:review**: [*vimeo*](## "netrc machine") Review pages on vimeo
+ - **vimeo:user**: [*vimeo*](## "netrc machine")
+ - **vimeo:watchlater**: [*vimeo*](## "netrc machine") Vimeo watch later list, ":vimeowatchlater" keyword (requires authentication)
+ - **Vimm:recording**
+ - **Vimm:stream**
+ - **ViMP**
+ - **ViMP:Playlist**
+ - **Vine**
+ - **vine:user**
+ - **Viously**
+ - **Viqeo**: (**Currently broken**)
+ - **Viu**
+ - **viu:ott**: [*viu*](## "netrc machine")
+ - **viu:playlist**
+ - **ViuOTTIndonesia**
+ - **vk**: [*vk*](## "netrc machine") VK
+ - **vk:uservideos**: [*vk*](## "netrc machine") VK - User's Videos
+ - **vk:wallpost**: [*vk*](## "netrc machine")
+ - **VKPlay**
+ - **VKPlayLive**
+ - **vm.tiktok**
+ - **Vocaroo**
+ - **VODPl**
+ - **VODPlatform**
+ - **voicy**: (**Currently broken**)
+ - **voicy:channel**: (**Currently broken**)
+ - **VolejTV**
+ - **Voot**: [*voot*](## "netrc machine") (**Currently broken**)
+ - **VootSeries**: [*voot*](## "netrc machine") (**Currently broken**)
+ - **VoxMedia**
+ - **VoxMediaVolume**
+ - **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
+ - **vqq:series**
+ - **vqq:video**
+ - **VRT**: VRT NWS, Flanders News, Flandern Info and Sporza
+ - **VrtNU**: [*vrtnu*](## "netrc machine") VRT MAX
+ - **VTM**: (**Currently broken**)
+ - **VTXTV**: [*vtxtv*](## "netrc machine")
+ - **VTXTVLive**: [*vtxtv*](## "netrc machine")
+ - **VTXTVRecordings**: [*vtxtv*](## "netrc machine")
+ - **VuClip**
+ - **VVVVID**
+ - **VVVVIDShow**
+ - **Walla**
+ - **WalyTV**: [*walytv*](## "netrc machine")
+ - **WalyTVLive**: [*walytv*](## "netrc machine")
+ - **WalyTVRecordings**: [*walytv*](## "netrc machine")
+ - **washingtonpost**
+ - **washingtonpost:article**
+ - **wat.tv**
+ - **WatchESPN**
+ - **WDR**
+ - **wdr:mobile**: (**Currently broken**)
+ - **WDRElefant**
+ - **WDRPage**
+ - **web.archive:youtube**: web.archive.org saved youtube videos, "ytarchive:" prefix
+ - **Webcamerapl**
+ - **Webcaster**
+ - **WebcasterFeed**
+ - **WebOfStories**
+ - **WebOfStoriesPlaylist**
+ - **Weibo**
+ - **WeiboUser**
+ - **WeiboVideo**
+ - **WeiqiTV**: WQTV (**Currently broken**)
+ - **wetv:episode**
+ - **WeTvSeries**
+ - **Weverse**: [*weverse*](## "netrc machine")
+ - **WeverseLive**: [*weverse*](## "netrc machine")
+ - **WeverseLiveTab**: [*weverse*](## "netrc machine")
+ - **WeverseMedia**: [*weverse*](## "netrc machine")
+ - **WeverseMediaTab**: [*weverse*](## "netrc machine")
+ - **WeverseMoment**: [*weverse*](## "netrc machine")
+ - **WeVidi**
+ - **Weyyak**
+ - **whowatch**
+ - **Whyp**
+ - **wikimedia.org**
+ - **Wimbledon**
+ - **WimTV**
+ - **WinSportsVideo**
+ - **Wistia**
+ - **WistiaChannel**
+ - **WistiaPlaylist**
+ - **wnl**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
+ - **wordpress:mb.miniAudioPlayer**
+ - **wordpress:playlist**
+ - **WorldStarHipHop**
+ - **wppilot**
+ - **wppilot:channels**
+ - **WrestleUniversePPV**: [*wrestleuniverse*](## "netrc machine")
+ - **WrestleUniverseVOD**: [*wrestleuniverse*](## "netrc machine")
+ - **WSJ**: Wall Street Journal
+ - **WSJArticle**
+ - **WWE**
+ - **wyborcza:video**
+ - **WyborczaPodcast**
+ - **wykop:dig**
+ - **wykop:​dig:comment**
+ - **wykop:post**
+ - **wykop:​post:comment**
+ - **Xanimu**
+ - **XboxClips**
+ - **XFileShare**: XFileShare based sites: Aparat, ClipWatching, GoUnlimited, GoVid, HolaVid, Streamty, TheVideoBee, Uqload, VidBom, vidlo, VidLocker, VidShare, VUp, WolfStream, XVideoSharing
+ - **XHamster**
+ - **XHamsterEmbed**
+ - **XHamsterUser**
+ - **ximalaya**: 喜马拉雅FM
+ - **ximalaya:album**: 喜马拉雅FM 专辑
+ - **xinpianchang**: xinpianchang.com (**Currently broken**)
+ - **XMinus**: (**Currently broken**)
+ - **XNXX**
+ - **Xstream**
+ - **XVideos**
+ - **xvideos:quickies**
+ - **XXXYMovies**
+ - **Yahoo**: Yahoo screen and movies
+ - **yahoo:japannews**: Yahoo! Japan News
+ - **YandexDisk**
+ - **yandexmusic:album**: Яндекс.Музыка - Альбом
+ - **yandexmusic:​artist:albums**: Яндекс.Музыка - Артист - Альбомы
+ - **yandexmusic:​artist:tracks**: Яндекс.Музыка - Артист - Треки
+ - **yandexmusic:playlist**: Яндекс.Музыка - Плейлист
+ - **yandexmusic:track**: Яндекс.Музыка - Трек
+ - **YandexVideo**
+ - **YandexVideoPreview**
+ - **YapFiles**: (**Currently broken**)
+ - **Yappy**: (**Currently broken**)
+ - **YappyProfile**
+ - **YleAreena**
+ - **YouJizz**
+ - **youku**: 优酷
+ - **youku:show**
+ - **YouNowChannel**
+ - **YouNowLive**
+ - **YouNowMoment**
+ - **YouPorn**
+ - **YourPorn**
+ - **YourUpload**
+ - **youtube**: YouTube
+ - **youtube:clip**
+ - **youtube:favorites**: YouTube liked videos; ":ytfav" keyword (requires cookies)
+ - **youtube:history**: Youtube watch history; ":ythis" keyword (requires cookies)
+ - **youtube:​music:search_url**: YouTube music search URLs with selectable sections, e.g. #songs
+ - **youtube:notif**: YouTube notifications; ":ytnotif" keyword (requires cookies)
+ - **youtube:playlist**: YouTube playlists
+ - **youtube:recommended**: YouTube recommended videos; ":ytrec" keyword
+ - **youtube:search**: YouTube search; "ytsearch:" prefix
+ - **youtube:​search:date**: YouTube search, newest videos first; "ytsearchdate:" prefix
+ - **youtube:search_url**: YouTube search URLs with sorting and filter support
+ - **youtube:​shorts:pivot:audio**: YouTube Shorts audio pivot (Shorts using audio of a given video)
+ - **youtube:subscriptions**: YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)
+ - **youtube:tab**: YouTube Tabs
+ - **youtube:user**: YouTube user videos; "ytuser:" prefix
+ - **youtube:watchlater**: Youtube watch later list; ":ytwatchlater" keyword (requires cookies)
+ - **YoutubeLivestreamEmbed**: YouTube livestream embeds
+ - **YoutubeYtBe**: youtu.be
+ - **Zaiko**
+ - **ZaikoETicket**
+ - **Zapiks**
+ - **Zattoo**: [*zattoo*](## "netrc machine")
+ - **ZattooLive**: [*zattoo*](## "netrc machine")
+ - **ZattooMovies**: [*zattoo*](## "netrc machine")
+ - **ZattooRecordings**: [*zattoo*](## "netrc machine")
+ - **ZDF**
+ - **ZDFChannel**
+ - **Zee5**: [*zee5*](## "netrc machine")
+ - **zee5:series**
+ - **ZeeNews**: (**Currently broken**)
+ - **ZenPorn**
+ - **ZenYandex**
+ - **ZenYandexChannel**
+ - **ZetlandDKArticle**
+ - **Zhihu**
+ - **zingmp3**: zingmp3.vn
+ - **zingmp3:album**
+ - **zingmp3:chart-home**
+ - **zingmp3:chart-music-video**
+ - **zingmp3:hub**
+ - **zingmp3:liveradio**
+ - **zingmp3:podcast**
+ - **zingmp3:podcast-episode**
+ - **zingmp3:user**
+ - **zingmp3:week-chart**
+ - **zoom**
+ - **Zype**
+ - **generic**: Generic downloader that works on some sites
diff --git a/test/__init__.py b/test/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/__init__.py
diff --git a/test/conftest.py b/test/conftest.py
new file mode 100644
index 0000000..2fbc269
--- /dev/null
+++ b/test/conftest.py
@@ -0,0 +1,26 @@
+import functools
+import inspect
+
+import pytest
+
+from yt_dlp.networking import RequestHandler
+from yt_dlp.networking.common import _REQUEST_HANDLERS
+from yt_dlp.utils._utils import _YDLLogger as FakeLogger
+
+
+@pytest.fixture
+def handler(request):
+ RH_KEY = request.param
+ if inspect.isclass(RH_KEY) and issubclass(RH_KEY, RequestHandler):
+ handler = RH_KEY
+ elif RH_KEY in _REQUEST_HANDLERS:
+ handler = _REQUEST_HANDLERS[RH_KEY]
+ else:
+ pytest.skip(f'{RH_KEY} request handler is not available')
+
+ return functools.partial(handler, logger=FakeLogger)
+
+
+def validate_and_send(rh, req):
+ rh.validate(req)
+ return rh.send(req)
diff --git a/test/helper.py b/test/helper.py
new file mode 100644
index 0000000..7760fd8
--- /dev/null
+++ b/test/helper.py
@@ -0,0 +1,340 @@
+import errno
+import hashlib
+import json
+import os.path
+import re
+import ssl
+import sys
+import types
+
+import yt_dlp.extractor
+from yt_dlp import YoutubeDL
+from yt_dlp.compat import compat_os_name
+from yt_dlp.utils import preferredencoding, try_call, write_string, find_available_port
+
+if 'pytest' in sys.modules:
+ import pytest
+ is_download_test = pytest.mark.download
+else:
+ def is_download_test(testClass):
+ return testClass
+
+
+def get_params(override=None):
+ PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+ 'parameters.json')
+ LOCAL_PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+ 'local_parameters.json')
+ with open(PARAMETERS_FILE, encoding='utf-8') as pf:
+ parameters = json.load(pf)
+ if os.path.exists(LOCAL_PARAMETERS_FILE):
+ with open(LOCAL_PARAMETERS_FILE, encoding='utf-8') as pf:
+ parameters.update(json.load(pf))
+ if override:
+ parameters.update(override)
+ return parameters
+
+
+def try_rm(filename):
+ """ Remove a file if it exists """
+ try:
+ os.remove(filename)
+ except OSError as ose:
+ if ose.errno != errno.ENOENT:
+ raise
+
+
+def report_warning(message, *args, **kwargs):
+ '''
+ Print the message to stderr, it will be prefixed with 'WARNING:'
+ If stderr is a tty file the 'WARNING:' will be colored
+ '''
+ if sys.stderr.isatty() and compat_os_name != 'nt':
+ _msg_header = '\033[0;33mWARNING:\033[0m'
+ else:
+ _msg_header = 'WARNING:'
+ output = f'{_msg_header} {message}\n'
+ if 'b' in getattr(sys.stderr, 'mode', ''):
+ output = output.encode(preferredencoding())
+ sys.stderr.write(output)
+
+
+class FakeYDL(YoutubeDL):
+ def __init__(self, override=None):
+ # Different instances of the downloader can't share the same dictionary
+ # some test set the "sublang" parameter, which would break the md5 checks.
+ params = get_params(override=override)
+ super().__init__(params, auto_init=False)
+ self.result = []
+
+ def to_screen(self, s, *args, **kwargs):
+ print(s)
+
+ def trouble(self, s, *args, **kwargs):
+ raise Exception(s)
+
+ def download(self, x):
+ self.result.append(x)
+
+ def expect_warning(self, regex):
+ # Silence an expected warning matching a regex
+ old_report_warning = self.report_warning
+
+ def report_warning(self, message, *args, **kwargs):
+ if re.match(regex, message):
+ return
+ old_report_warning(message, *args, **kwargs)
+ self.report_warning = types.MethodType(report_warning, self)
+
+
+def gettestcases(include_onlymatching=False):
+ for ie in yt_dlp.extractor.gen_extractors():
+ yield from ie.get_testcases(include_onlymatching)
+
+
+def getwebpagetestcases():
+ for ie in yt_dlp.extractor.gen_extractors():
+ for tc in ie.get_webpage_testcases():
+ tc.setdefault('add_ie', []).append('Generic')
+ yield tc
+
+
+md5 = lambda s: hashlib.md5(s.encode()).hexdigest()
+
+
+def expect_value(self, got, expected, field):
+ if isinstance(expected, str) and expected.startswith('re:'):
+ match_str = expected[len('re:'):]
+ match_rex = re.compile(match_str)
+
+ self.assertTrue(
+ isinstance(got, str),
+ f'Expected a {str.__name__} object, but got {type(got).__name__} for field {field}')
+ self.assertTrue(
+ match_rex.match(got),
+ f'field {field} (value: {got!r}) should match {match_str!r}')
+ elif isinstance(expected, str) and expected.startswith('startswith:'):
+ start_str = expected[len('startswith:'):]
+ self.assertTrue(
+ isinstance(got, str),
+ f'Expected a {str.__name__} object, but got {type(got).__name__} for field {field}')
+ self.assertTrue(
+ got.startswith(start_str),
+ f'field {field} (value: {got!r}) should start with {start_str!r}')
+ elif isinstance(expected, str) and expected.startswith('contains:'):
+ contains_str = expected[len('contains:'):]
+ self.assertTrue(
+ isinstance(got, str),
+ f'Expected a {str.__name__} object, but got {type(got).__name__} for field {field}')
+ self.assertTrue(
+ contains_str in got,
+ f'field {field} (value: {got!r}) should contain {contains_str!r}')
+ elif isinstance(expected, type):
+ self.assertTrue(
+ isinstance(got, expected),
+ f'Expected type {expected!r} for field {field}, but got value {got!r} of type {type(got)!r}')
+ elif isinstance(expected, dict) and isinstance(got, dict):
+ expect_dict(self, got, expected)
+ elif isinstance(expected, list) and isinstance(got, list):
+ self.assertEqual(
+ len(expected), len(got),
+ 'Expect a list of length %d, but got a list of length %d for field %s' % (
+ len(expected), len(got), field))
+ for index, (item_got, item_expected) in enumerate(zip(got, expected)):
+ type_got = type(item_got)
+ type_expected = type(item_expected)
+ self.assertEqual(
+ type_expected, type_got,
+ 'Type mismatch for list item at index %d for field %s, expected %r, got %r' % (
+ index, field, type_expected, type_got))
+ expect_value(self, item_got, item_expected, field)
+ else:
+ if isinstance(expected, str) and expected.startswith('md5:'):
+ self.assertTrue(
+ isinstance(got, str),
+ f'Expected field {field} to be a unicode object, but got value {got!r} of type {type(got)!r}')
+ got = 'md5:' + md5(got)
+ elif isinstance(expected, str) and re.match(r'^(?:min|max)?count:\d+', expected):
+ self.assertTrue(
+ isinstance(got, (list, dict)),
+ f'Expected field {field} to be a list or a dict, but it is of type {type(got).__name__}')
+ op, _, expected_num = expected.partition(':')
+ expected_num = int(expected_num)
+ if op == 'mincount':
+ assert_func = assertGreaterEqual
+ msg_tmpl = 'Expected %d items in field %s, but only got %d'
+ elif op == 'maxcount':
+ assert_func = assertLessEqual
+ msg_tmpl = 'Expected maximum %d items in field %s, but got %d'
+ elif op == 'count':
+ assert_func = assertEqual
+ msg_tmpl = 'Expected exactly %d items in field %s, but got %d'
+ else:
+ assert False
+ assert_func(
+ self, len(got), expected_num,
+ msg_tmpl % (expected_num, field, len(got)))
+ return
+ self.assertEqual(
+ expected, got,
+ f'Invalid value for field {field}, expected {expected!r}, got {got!r}')
+
+
+def expect_dict(self, got_dict, expected_dict):
+ for info_field, expected in expected_dict.items():
+ got = got_dict.get(info_field)
+ expect_value(self, got, expected, info_field)
+
+
+def sanitize_got_info_dict(got_dict):
+ IGNORED_FIELDS = (
+ *YoutubeDL._format_fields,
+
+ # Lists
+ 'formats', 'thumbnails', 'subtitles', 'automatic_captions', 'comments', 'entries',
+
+ # Auto-generated
+ 'autonumber', 'playlist', 'format_index', 'video_ext', 'audio_ext', 'duration_string', 'epoch', 'n_entries',
+ 'fulltitle', 'extractor', 'extractor_key', 'filename', 'filepath', 'infojson_filename', 'original_url',
+
+ # Only live_status needs to be checked
+ 'is_live', 'was_live',
+ )
+
+ IGNORED_PREFIXES = ('', 'playlist', 'requested', 'webpage')
+
+ def sanitize(key, value):
+ if isinstance(value, str) and len(value) > 100 and key != 'thumbnail':
+ return f'md5:{md5(value)}'
+ elif isinstance(value, list) and len(value) > 10:
+ return f'count:{len(value)}'
+ elif key.endswith('_count') and isinstance(value, int):
+ return int
+ return value
+
+ test_info_dict = {
+ key: sanitize(key, value) for key, value in got_dict.items()
+ if value is not None and key not in IGNORED_FIELDS and (
+ not any(key.startswith(f'{prefix}_') for prefix in IGNORED_PREFIXES)
+ or key == '_old_archive_ids')
+ }
+
+ # display_id may be generated from id
+ if test_info_dict.get('display_id') == test_info_dict.get('id'):
+ test_info_dict.pop('display_id')
+
+ # Remove deprecated fields
+ for old in YoutubeDL._deprecated_multivalue_fields.keys():
+ test_info_dict.pop(old, None)
+
+ # release_year may be generated from release_date
+ if try_call(lambda: test_info_dict['release_year'] == int(test_info_dict['release_date'][:4])):
+ test_info_dict.pop('release_year')
+
+ # Check url for flat entries
+ if got_dict.get('_type', 'video') != 'video' and got_dict.get('url'):
+ test_info_dict['url'] = got_dict['url']
+
+ return test_info_dict
+
+
+def expect_info_dict(self, got_dict, expected_dict):
+ expect_dict(self, got_dict, expected_dict)
+ # Check for the presence of mandatory fields
+ if got_dict.get('_type') not in ('playlist', 'multi_video'):
+ mandatory_fields = ['id', 'title']
+ if expected_dict.get('ext'):
+ mandatory_fields.extend(('url', 'ext'))
+ for key in mandatory_fields:
+ self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key)
+ # Check for mandatory fields that are automatically set by YoutubeDL
+ if got_dict.get('_type', 'video') == 'video':
+ for key in ['webpage_url', 'extractor', 'extractor_key']:
+ self.assertTrue(got_dict.get(key), 'Missing field: %s' % key)
+
+ test_info_dict = sanitize_got_info_dict(got_dict)
+
+ missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys())
+ if missing_keys:
+ def _repr(v):
+ if isinstance(v, str):
+ return "'%s'" % v.replace('\\', '\\\\').replace("'", "\\'").replace('\n', '\\n')
+ elif isinstance(v, type):
+ return v.__name__
+ else:
+ return repr(v)
+ info_dict_str = ''.join(
+ f' {_repr(k)}: {_repr(v)},\n'
+ for k, v in test_info_dict.items() if k not in missing_keys)
+ if info_dict_str:
+ info_dict_str += '\n'
+ info_dict_str += ''.join(
+ f' {_repr(k)}: {_repr(test_info_dict[k])},\n'
+ for k in missing_keys)
+ info_dict_str = '\n\'info_dict\': {\n' + info_dict_str + '},\n'
+ write_string(info_dict_str.replace('\n', '\n '), out=sys.stderr)
+ self.assertFalse(
+ missing_keys,
+ 'Missing keys in test definition: %s' % (
+ ', '.join(sorted(missing_keys))))
+
+
+def assertRegexpMatches(self, text, regexp, msg=None):
+ if hasattr(self, 'assertRegexp'):
+ return self.assertRegexp(text, regexp, msg)
+ else:
+ m = re.match(regexp, text)
+ if not m:
+ note = 'Regexp didn\'t match: %r not found' % (regexp)
+ if len(text) < 1000:
+ note += ' in %r' % text
+ if msg is None:
+ msg = note
+ else:
+ msg = note + ', ' + msg
+ self.assertTrue(m, msg)
+
+
+def assertGreaterEqual(self, got, expected, msg=None):
+ if not (got >= expected):
+ if msg is None:
+ msg = f'{got!r} not greater than or equal to {expected!r}'
+ self.assertTrue(got >= expected, msg)
+
+
+def assertLessEqual(self, got, expected, msg=None):
+ if not (got <= expected):
+ if msg is None:
+ msg = f'{got!r} not less than or equal to {expected!r}'
+ self.assertTrue(got <= expected, msg)
+
+
+def assertEqual(self, got, expected, msg=None):
+ if not (got == expected):
+ if msg is None:
+ msg = f'{got!r} not equal to {expected!r}'
+ self.assertTrue(got == expected, msg)
+
+
+def expect_warnings(ydl, warnings_re):
+ real_warning = ydl.report_warning
+
+ def _report_warning(w, *args, **kwargs):
+ if not any(re.search(w_re, w) for w_re in warnings_re):
+ real_warning(w, *args, **kwargs)
+
+ ydl.report_warning = _report_warning
+
+
+def http_server_port(httpd):
+ if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
+ # In Jython SSLSocket is not a subclass of socket.socket
+ sock = httpd.socket.sock
+ else:
+ sock = httpd.socket
+ return sock.getsockname()[1]
+
+
+def verify_address_availability(address):
+ if find_available_port(address) is None:
+ pytest.skip(f'Unable to bind to source address {address} (address may not exist)')
diff --git a/test/parameters.json b/test/parameters.json
new file mode 100644
index 0000000..8789ce1
--- /dev/null
+++ b/test/parameters.json
@@ -0,0 +1,49 @@
+{
+ "check_formats": false,
+ "consoletitle": false,
+ "continuedl": true,
+ "forcedescription": false,
+ "forcefilename": false,
+ "forceformat": false,
+ "forcethumbnail": false,
+ "forcetitle": false,
+ "forceurl": false,
+ "force_write_download_archive": false,
+ "format": "b/bv",
+ "ignoreerrors": false,
+ "listformats": null,
+ "logtostderr": false,
+ "matchtitle": null,
+ "max_downloads": null,
+ "overwrites": null,
+ "nopart": false,
+ "noprogress": false,
+ "outtmpl": "%(id)s.%(ext)s",
+ "password": null,
+ "playliststart": 1,
+ "prefer_free_formats": false,
+ "quiet": false,
+ "ratelimit": null,
+ "rejecttitle": null,
+ "retries": 10,
+ "simulate": false,
+ "subtitleslang": null,
+ "subtitlesformat": "best",
+ "test": true,
+ "updatetime": true,
+ "usenetrc": false,
+ "username": null,
+ "verbose": true,
+ "writedescription": false,
+ "writeinfojson": true,
+ "writeannotations": false,
+ "writelink": false,
+ "writeurllink": false,
+ "writewebloclink": false,
+ "writedesktoplink": false,
+ "writesubtitles": false,
+ "allsubtitles": false,
+ "listsubtitles": false,
+ "fixup": "never",
+ "allow_playlist_files": false
+}
diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
new file mode 100644
index 0000000..b7dee49
--- /dev/null
+++ b/test/test_InfoExtractor.py
@@ -0,0 +1,1911 @@
+#!/usr/bin/env python3
+
+# Allow direct execution
+import os
+import sys
+import unittest
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+import http.server
+import threading
+
+from test.helper import FakeYDL, expect_dict, expect_value, http_server_port
+from yt_dlp.compat import compat_etree_fromstring
+from yt_dlp.extractor import YoutubeIE, get_info_extractor
+from yt_dlp.extractor.common import InfoExtractor
+from yt_dlp.utils import (
+ ExtractorError,
+ RegexNotFoundError,
+ encode_data_uri,
+ strip_jsonp,
+)
+
+TEAPOT_RESPONSE_STATUS = 418
+TEAPOT_RESPONSE_BODY = "<h1>418 I'm a teapot</h1>"
+
+
+class InfoExtractorTestRequestHandler(http.server.BaseHTTPRequestHandler):
+ def log_message(self, format, *args):
+ pass
+
+ def do_GET(self):
+ if self.path == '/teapot':
+ self.send_response(TEAPOT_RESPONSE_STATUS)
+ self.send_header('Content-Type', 'text/html; charset=utf-8')
+ self.end_headers()
+ self.wfile.write(TEAPOT_RESPONSE_BODY.encode())
+ else:
+ assert False
+
+
+class DummyIE(InfoExtractor):
+ def _sort_formats(self, formats, field_preference=[]):
+ self._downloader.sort_formats(
+ {'formats': formats, '_format_sort_fields': field_preference})
+
+
+class TestInfoExtractor(unittest.TestCase):
+ def setUp(self):
+ self.ie = DummyIE(FakeYDL())
+
+ def test_ie_key(self):
+ self.assertEqual(get_info_extractor(YoutubeIE.ie_key()), YoutubeIE)
+
+ def test_html_search_regex(self):
+ html = '<p id="foo">Watch this <a href="http://www.youtube.com/watch?v=BaW_jenozKc">video</a></p>'
+ search = lambda re, *args: self.ie._html_search_regex(re, html, *args)
+ self.assertEqual(search(r'<p id="foo">(.+?)</p>', 'foo'), 'Watch this video')
+
+ def test_opengraph(self):
+ ie = self.ie
+ html = '''
+ <meta name="og:title" content='Foo'/>
+ <meta content="Some video's description " name="og:description"/>
+ <meta property='og:image' content='http://domain.com/pic.jpg?key1=val1&amp;key2=val2'/>
+ <meta content='application/x-shockwave-flash' property='og:video:type'>
+ <meta content='Foo' property=og:foobar>
+ <meta name="og:test1" content='foo > < bar'/>
+ <meta name="og:test2" content="foo >//< bar"/>
+ <meta property=og-test3 content='Ill-formatted opengraph'/>
+ <meta property=og:test4 content=unquoted-value/>
+ '''
+ self.assertEqual(ie._og_search_title(html), 'Foo')
+ self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
+ self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2')
+ self.assertEqual(ie._og_search_video_url(html, default=None), None)
+ self.assertEqual(ie._og_search_property('foobar', html), 'Foo')
+ self.assertEqual(ie._og_search_property('test1', html), 'foo > < bar')
+ self.assertEqual(ie._og_search_property('test2', html), 'foo >//< bar')
+ self.assertEqual(ie._og_search_property('test3', html), 'Ill-formatted opengraph')
+ self.assertEqual(ie._og_search_property(('test0', 'test1'), html), 'foo > < bar')
+ self.assertRaises(RegexNotFoundError, ie._og_search_property, 'test0', html, None, fatal=True)
+ self.assertRaises(RegexNotFoundError, ie._og_search_property, ('test0', 'test00'), html, None, fatal=True)
+ self.assertEqual(ie._og_search_property('test4', html), 'unquoted-value')
+
+ def test_html_search_meta(self):
+ ie = self.ie
+ html = '''
+ <meta name="a" content="1" />
+ <meta name='b' content='2'>
+ <meta name="c" content='3'>
+ <meta name=d content='4'>
+ <meta property="e" content='5' >
+ <meta content="6" name="f">
+ '''
+
+ self.assertEqual(ie._html_search_meta('a', html), '1')
+ self.assertEqual(ie._html_search_meta('b', html), '2')
+ self.assertEqual(ie._html_search_meta('c', html), '3')
+ self.assertEqual(ie._html_search_meta('d', html), '4')
+ self.assertEqual(ie._html_search_meta('e', html), '5')
+ self.assertEqual(ie._html_search_meta('f', html), '6')
+ self.assertEqual(ie._html_search_meta(('a', 'b', 'c'), html), '1')
+ self.assertEqual(ie._html_search_meta(('c', 'b', 'a'), html), '3')
+ self.assertEqual(ie._html_search_meta(('z', 'x', 'c'), html), '3')
+ self.assertRaises(RegexNotFoundError, ie._html_search_meta, 'z', html, None, fatal=True)
+ self.assertRaises(RegexNotFoundError, ie._html_search_meta, ('z', 'x'), html, None, fatal=True)
+
+ def test_search_json_ld_realworld(self):
+ _TESTS = [
+ # https://github.com/ytdl-org/youtube-dl/issues/23306
+ (
+ r'''<script type="application/ld+json">
+{
+"@context": "http://schema.org/",
+"@type": "VideoObject",
+"name": "1 On 1 With Kleio",
+"url": "https://www.eporner.com/hd-porn/xN49A1cT3eB/1-On-1-With-Kleio/",
+"duration": "PT0H12M23S",
+"thumbnailUrl": ["https://static-eu-cdn.eporner.com/thumbs/static4/7/78/780/780814/9_360.jpg", "https://imggen.eporner.com/780814/1920/1080/9.jpg"],
+"contentUrl": "https://gvideo.eporner.com/xN49A1cT3eB/xN49A1cT3eB.mp4",
+"embedUrl": "https://www.eporner.com/embed/xN49A1cT3eB/1-On-1-With-Kleio/",
+"image": "https://static-eu-cdn.eporner.com/thumbs/static4/7/78/780/780814/9_360.jpg",
+"width": "1920",
+"height": "1080",
+"encodingFormat": "mp4",
+"bitrate": "6617kbps",
+"isFamilyFriendly": "False",
+"description": "Kleio Valentien",
+"uploadDate": "2015-12-05T21:24:35+01:00",
+"interactionStatistic": {
+"@type": "InteractionCounter",
+"interactionType": { "@type": "http://schema.org/WatchAction" },
+"userInteractionCount": 1120958
+}, "aggregateRating": {
+"@type": "AggregateRating",
+"ratingValue": "88",
+"ratingCount": "630",
+"bestRating": "100",
+"worstRating": "0"
+}, "actor": [{
+"@type": "Person",
+"name": "Kleio Valentien",
+"url": "https://www.eporner.com/pornstar/kleio-valentien/"
+}]}
+ </script>''',
+ {
+ 'title': '1 On 1 With Kleio',
+ 'description': 'Kleio Valentien',
+ 'url': 'https://gvideo.eporner.com/xN49A1cT3eB/xN49A1cT3eB.mp4',
+ 'timestamp': 1449347075,
+ 'duration': 743.0,
+ 'view_count': 1120958,
+ 'width': 1920,
+ 'height': 1080,
+ },
+ {},
+ ),
+ (
+ r'''<script type="application/ld+json">
+ {
+ "@context": "https://schema.org",
+ "@graph": [
+ {
+ "@type": "NewsArticle",
+ "mainEntityOfPage": {
+ "@type": "WebPage",
+ "@id": "https://www.ant1news.gr/Society/article/620286/symmoria-anilikon-dikigoros-thymaton-ithelan-na-toys-apoteleiosoyn"
+ },
+ "headline": "Συμμορία ανηλίκων – δικηγόρος θυμάτων: ήθελαν να τους αποτελειώσουν",
+ "name": "Συμμορία ανηλίκων – δικηγόρος θυμάτων: ήθελαν να τους αποτελειώσουν",
+ "description": "Τα παιδιά δέχθηκαν την επίθεση επειδή αρνήθηκαν να γίνουν μέλη της συμμορίας, ανέφερε ο Γ. Ζαχαρόπουλος.",
+ "image": {
+ "@type": "ImageObject",
+ "url": "https://ant1media.azureedge.net/imgHandler/1100/a635c968-be71-447c-bf9c-80d843ece21e.jpg",
+ "width": 1100,
+ "height": 756 },
+ "datePublished": "2021-11-10T08:50:00+03:00",
+ "dateModified": "2021-11-10T08:52:53+03:00",
+ "author": {
+ "@type": "Person",
+ "@id": "https://www.ant1news.gr/",
+ "name": "Ant1news",
+ "image": "https://www.ant1news.gr/images/logo-e5d7e4b3e714c88e8d2eca96130142f6.png",
+ "url": "https://www.ant1news.gr/"
+ },
+ "publisher": {
+ "@type": "Organization",
+ "@id": "https://www.ant1news.gr#publisher",
+ "name": "Ant1news",
+ "url": "https://www.ant1news.gr",
+ "logo": {
+ "@type": "ImageObject",
+ "url": "https://www.ant1news.gr/images/logo-e5d7e4b3e714c88e8d2eca96130142f6.png",
+ "width": 400,
+ "height": 400 },
+ "sameAs": [
+ "https://www.facebook.com/Ant1news.gr",
+ "https://twitter.com/antennanews",
+ "https://www.youtube.com/channel/UC0smvAbfczoN75dP0Hw4Pzw",
+ "https://www.instagram.com/ant1news/"
+ ]
+ },
+
+ "keywords": "μαχαίρωμα,συμμορία ανηλίκων,ΕΙΔΗΣΕΙΣ,ΕΙΔΗΣΕΙΣ ΣΗΜΕΡΑ,ΝΕΑ,Κοινωνία - Ant1news",
+
+
+ "articleSection": "Κοινωνία"
+ }
+ ]
+ }
+ </script>''',
+ {
+ 'timestamp': 1636523400,
+ 'title': 'md5:91fe569e952e4d146485740ae927662b',
+ },
+ {'expected_type': 'NewsArticle'},
+ ),
+ (
+ r'''<script type="application/ld+json">
+ {"url":"/vrtnu/a-z/het-journaal/2021/het-journaal-het-journaal-19u-20211231/",
+ "name":"Het journaal 19u",
+ "description":"Het journaal 19u van vrijdag 31 december 2021.",
+ "potentialAction":{"url":"https://vrtnu.page.link/pfVy6ihgCAJKgHqe8","@type":"ShareAction"},
+ "mainEntityOfPage":{"@id":"1640092242445","@type":"WebPage"},
+ "publication":[{
+ "startDate":"2021-12-31T19:00:00.000+01:00",
+ "endDate":"2022-01-30T23:55:00.000+01:00",
+ "publishedBy":{"name":"een","@type":"Organization"},
+ "publishedOn":{"url":"https://www.vrt.be/vrtnu/","name":"VRT NU","@type":"BroadcastService"},
+ "@id":"pbs-pub-3a7ec233-da95-4c1e-9b2b-cf5fdfebcbe8",
+ "@type":"BroadcastEvent"
+ }],
+ "video":{
+ "name":"Het journaal - Aflevering 365 (Seizoen 2021)",
+ "description":"Het journaal 19u van vrijdag 31 december 2021. Bekijk aflevering 365 van seizoen 2021 met VRT NU via de site of app.",
+ "thumbnailUrl":"//images.vrt.be/width1280/2021/12/31/80d5ed00-6a64-11ec-b07d-02b7b76bf47f.jpg",
+ "expires":"2022-01-30T23:55:00.000+01:00",
+ "hasPart":[
+ {"name":"Explosie Turnhout","startOffset":70,"@type":"Clip"},
+ {"name":"Jaarwisseling","startOffset":440,"@type":"Clip"},
+ {"name":"Natuurbranden Colorado","startOffset":1179,"@type":"Clip"},
+ {"name":"Klimaatverandering","startOffset":1263,"@type":"Clip"},
+ {"name":"Zacht weer","startOffset":1367,"@type":"Clip"},
+ {"name":"Financiële balans","startOffset":1383,"@type":"Clip"},
+ {"name":"Club Brugge","startOffset":1484,"@type":"Clip"},
+ {"name":"Mentale gezondheid bij topsporters","startOffset":1575,"@type":"Clip"},
+ {"name":"Olympische Winterspelen","startOffset":1728,"@type":"Clip"},
+ {"name":"Sober oudjaar in Nederland","startOffset":1873,"@type":"Clip"}
+ ],
+ "duration":"PT34M39.23S",
+ "uploadDate":"2021-12-31T19:00:00.000+01:00",
+ "@id":"vid-9457d0c6-b8ac-4aba-b5e1-15aa3a3295b5",
+ "@type":"VideoObject"
+ },
+ "genre":["Nieuws en actua"],
+ "episodeNumber":365,
+ "partOfSeries":{"name":"Het journaal","@id":"222831405527","@type":"TVSeries"},
+ "partOfSeason":{"name":"Seizoen 2021","@id":"961809365527","@type":"TVSeason"},
+ "@context":"https://schema.org","@id":"961685295527","@type":"TVEpisode"}</script>
+ ''',
+ {
+ 'chapters': [
+ {"title": "Explosie Turnhout", "start_time": 70, "end_time": 440},
+ {"title": "Jaarwisseling", "start_time": 440, "end_time": 1179},
+ {"title": "Natuurbranden Colorado", "start_time": 1179, "end_time": 1263},
+ {"title": "Klimaatverandering", "start_time": 1263, "end_time": 1367},
+ {"title": "Zacht weer", "start_time": 1367, "end_time": 1383},
+ {"title": "Financiële balans", "start_time": 1383, "end_time": 1484},
+ {"title": "Club Brugge", "start_time": 1484, "end_time": 1575},
+ {"title": "Mentale gezondheid bij topsporters", "start_time": 1575, "end_time": 1728},
+ {"title": "Olympische Winterspelen", "start_time": 1728, "end_time": 1873},
+ {"title": "Sober oudjaar in Nederland", "start_time": 1873, "end_time": 2079.23}
+ ],
+ 'title': 'Het journaal - Aflevering 365 (Seizoen 2021)'
+ }, {}
+ ),
+ (
+ # test multiple thumbnails in a list
+ r'''
+<script type="application/ld+json">
+{"@context":"https://schema.org",
+"@type":"VideoObject",
+"thumbnailUrl":["https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg"]}
+</script>''',
+ {
+ 'thumbnails': [{'url': 'https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg'}],
+ },
+ {},
+ ),
+ (
+ # test single thumbnail
+ r'''
+<script type="application/ld+json">
+{"@context":"https://schema.org",
+"@type":"VideoObject",
+"thumbnailUrl":"https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg"}
+</script>''',
+ {
+ 'thumbnails': [{'url': 'https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg'}],
+ },
+ {},
+ )
+ ]
+ for html, expected_dict, search_json_ld_kwargs in _TESTS:
+ expect_dict(
+ self,
+ self.ie._search_json_ld(html, None, **search_json_ld_kwargs),
+ expected_dict
+ )
+
+ def test_download_json(self):
+ uri = encode_data_uri(b'{"foo": "blah"}', 'application/json')
+ self.assertEqual(self.ie._download_json(uri, None), {'foo': 'blah'})
+ uri = encode_data_uri(b'callback({"foo": "blah"})', 'application/javascript')
+ self.assertEqual(self.ie._download_json(uri, None, transform_source=strip_jsonp), {'foo': 'blah'})
+ uri = encode_data_uri(b'{"foo": invalid}', 'application/json')
+ self.assertRaises(ExtractorError, self.ie._download_json, uri, None)
+ self.assertEqual(self.ie._download_json(uri, None, fatal=False), None)
+
+ def test_parse_html5_media_entries(self):
+ # inline video tag
+ expect_dict(
+ self,
+ self.ie._parse_html5_media_entries(
+ 'https://127.0.0.1/video.html',
+ r'<html><video src="/vid.mp4" /></html>', None)[0],
+ {
+ 'formats': [{
+ 'url': 'https://127.0.0.1/vid.mp4',
+ }],
+ })
+
+ # from https://www.r18.com/
+ # with kpbs in label
+ expect_dict(
+ self,
+ self.ie._parse_html5_media_entries(
+ 'https://www.r18.com/',
+ r'''
+ <video id="samplevideo_amateur" class="js-samplevideo video-js vjs-default-skin vjs-big-play-centered" controls preload="auto" width="400" height="225" poster="//pics.r18.com/digital/amateur/mgmr105/mgmr105jp.jpg">
+ <source id="video_source" src="https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_sm_w.mp4" type="video/mp4" res="240" label="300kbps">
+ <source id="video_source" src="https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_dm_w.mp4" type="video/mp4" res="480" label="1000kbps">
+ <source id="video_source" src="https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_dmb_w.mp4" type="video/mp4" res="740" label="1500kbps">
+ <p>Your browser does not support the video tag.</p>
+ </video>
+ ''', None)[0],
+ {
+ 'formats': [{
+ 'url': 'https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_sm_w.mp4',
+ 'ext': 'mp4',
+ 'format_id': '300kbps',
+ 'height': 240,
+ 'tbr': 300,
+ }, {
+ 'url': 'https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_dm_w.mp4',
+ 'ext': 'mp4',
+ 'format_id': '1000kbps',
+ 'height': 480,
+ 'tbr': 1000,
+ }, {
+ 'url': 'https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_dmb_w.mp4',
+ 'ext': 'mp4',
+ 'format_id': '1500kbps',
+ 'height': 740,
+ 'tbr': 1500,
+ }],
+ 'thumbnail': '//pics.r18.com/digital/amateur/mgmr105/mgmr105jp.jpg'
+ })
+
+ # from https://www.csfd.cz/
+ # with width and height
+ expect_dict(
+ self,
+ self.ie._parse_html5_media_entries(
+ 'https://www.csfd.cz/',
+ r'''
+ <video width="770" height="328" preload="none" controls poster="https://img.csfd.cz/files/images/film/video/preview/163/344/163344118_748d20.png?h360" >
+ <source src="https://video.csfd.cz/files/videos/157/750/157750813/163327358_eac647.mp4" type="video/mp4" width="640" height="360">
+ <source src="https://video.csfd.cz/files/videos/157/750/157750813/163327360_3d2646.mp4" type="video/mp4" width="1280" height="720">
+ <source src="https://video.csfd.cz/files/videos/157/750/157750813/163327356_91f258.mp4" type="video/mp4" width="1920" height="1080">
+ <source src="https://video.csfd.cz/files/videos/157/750/157750813/163327359_962b4a.webm" type="video/webm" width="640" height="360">
+ <source src="https://video.csfd.cz/files/videos/157/750/157750813/163327361_6feee0.webm" type="video/webm" width="1280" height="720">
+ <source src="https://video.csfd.cz/files/videos/157/750/157750813/163327357_8ab472.webm" type="video/webm" width="1920" height="1080">
+ <track src="https://video.csfd.cz/files/subtitles/163/344/163344115_4c388b.srt" type="text/x-srt" kind="subtitles" srclang="cs" label="cs">
+ </video>
+ ''', None)[0],
+ {
+ 'formats': [{
+ 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327358_eac647.mp4',
+ 'ext': 'mp4',
+ 'width': 640,
+ 'height': 360,
+ }, {
+ 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327360_3d2646.mp4',
+ 'ext': 'mp4',
+ 'width': 1280,
+ 'height': 720,
+ }, {
+ 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327356_91f258.mp4',
+ 'ext': 'mp4',
+ 'width': 1920,
+ 'height': 1080,
+ }, {
+ 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327359_962b4a.webm',
+ 'ext': 'webm',
+ 'width': 640,
+ 'height': 360,
+ }, {
+ 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327361_6feee0.webm',
+ 'ext': 'webm',
+ 'width': 1280,
+ 'height': 720,
+ }, {
+ 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327357_8ab472.webm',
+ 'ext': 'webm',
+ 'width': 1920,
+ 'height': 1080,
+ }],
+ 'subtitles': {
+ 'cs': [{'url': 'https://video.csfd.cz/files/subtitles/163/344/163344115_4c388b.srt'}]
+ },
+ 'thumbnail': 'https://img.csfd.cz/files/images/film/video/preview/163/344/163344118_748d20.png?h360'
+ })
+
+ # from https://tamasha.com/v/Kkdjw
+ # with height in label
+ expect_dict(
+ self,
+ self.ie._parse_html5_media_entries(
+ 'https://tamasha.com/v/Kkdjw',
+ r'''
+ <video crossorigin="anonymous">
+ <source src="https://s-v2.tamasha.com/statics/videos_file/19/8f/Kkdjw_198feff8577d0057536e905cce1fb61438dd64e0_n_240.mp4" type="video/mp4" label="AUTO" res="0"/>
+ <source src="https://s-v2.tamasha.com/statics/videos_file/19/8f/Kkdjw_198feff8577d0057536e905cce1fb61438dd64e0_n_240.mp4" type="video/mp4"
+ label="240p" res="240"/>
+ <source src="https://s-v2.tamasha.com/statics/videos_file/20/00/Kkdjw_200041c66f657fc967db464d156eafbc1ed9fe6f_n_144.mp4" type="video/mp4"
+ label="144p" res="144"/>
+ </video>
+ ''', None)[0],
+ {
+ 'formats': [{
+ 'url': 'https://s-v2.tamasha.com/statics/videos_file/19/8f/Kkdjw_198feff8577d0057536e905cce1fb61438dd64e0_n_240.mp4',
+ }, {
+ 'url': 'https://s-v2.tamasha.com/statics/videos_file/19/8f/Kkdjw_198feff8577d0057536e905cce1fb61438dd64e0_n_240.mp4',
+ 'ext': 'mp4',
+ 'format_id': '240p',
+ 'height': 240,
+ }, {
+ 'url': 'https://s-v2.tamasha.com/statics/videos_file/20/00/Kkdjw_200041c66f657fc967db464d156eafbc1ed9fe6f_n_144.mp4',
+ 'ext': 'mp4',
+ 'format_id': '144p',
+ 'height': 144,
+ }]
+ })
+
+ # from https://www.directvnow.com
+ # with data-src
+ expect_dict(
+ self,
+ self.ie._parse_html5_media_entries(
+ 'https://www.directvnow.com',
+ r'''
+ <video id="vid1" class="header--video-masked active" muted playsinline>
+ <source data-src="https://cdn.directv.com/content/dam/dtv/prod/website_directvnow-international/videos/DTVN_hdr_HBO_v3.mp4" type="video/mp4" />
+ </video>
+ ''', None)[0],
+ {
+ 'formats': [{
+ 'ext': 'mp4',
+ 'url': 'https://cdn.directv.com/content/dam/dtv/prod/website_directvnow-international/videos/DTVN_hdr_HBO_v3.mp4',
+ }]
+ })
+
+ # from https://www.directvnow.com
+ # with data-src
+ expect_dict(
+ self,
+ self.ie._parse_html5_media_entries(
+ 'https://www.directvnow.com',
+ r'''
+ <video id="vid1" class="header--video-masked active" muted playsinline>
+ <source data-src="https://cdn.directv.com/content/dam/dtv/prod/website_directvnow-international/videos/DTVN_hdr_HBO_v3.mp4" type="video/mp4" />
+ </video>
+ ''', None)[0],
+ {
+ 'formats': [{
+ 'url': 'https://cdn.directv.com/content/dam/dtv/prod/website_directvnow-international/videos/DTVN_hdr_HBO_v3.mp4',
+ 'ext': 'mp4',
+ }]
+ })
+
+ # from https://www.klarna.com/uk/
+ # with data-video-src
+ expect_dict(
+ self,
+ self.ie._parse_html5_media_entries(
+ 'https://www.directvnow.com',
+ r'''
+ <video loop autoplay muted class="responsive-video block-kl__video video-on-medium">
+ <source src="" data-video-desktop data-video-src="https://www.klarna.com/uk/wp-content/uploads/sites/11/2019/01/KL062_Smooth3_0_DogWalking_5s_920x080_.mp4" type="video/mp4" />
+ </video>
+ ''', None)[0],
+ {
+ 'formats': [{
+ 'url': 'https://www.klarna.com/uk/wp-content/uploads/sites/11/2019/01/KL062_Smooth3_0_DogWalking_5s_920x080_.mp4',
+ 'ext': 'mp4',
+ }],
+ })
+
+ # from https://0000.studio/
+ # with type attribute but without extension in URL
+ expect_dict(
+ self,
+ self.ie._parse_html5_media_entries(
+ 'https://0000.studio',
+ r'''
+ <video src="https://d1ggyt9m8pwf3g.cloudfront.net/protected/ap-northeast-1:1864af40-28d5-492b-b739-b32314b1a527/archive/clip/838db6a7-8973-4cd6-840d-8517e4093c92"
+ controls="controls" type="video/mp4" preload="metadata" autoplay="autoplay" playsinline class="object-contain">
+ </video>
+ ''', None)[0],
+ {
+ 'formats': [{
+ 'url': 'https://d1ggyt9m8pwf3g.cloudfront.net/protected/ap-northeast-1:1864af40-28d5-492b-b739-b32314b1a527/archive/clip/838db6a7-8973-4cd6-840d-8517e4093c92',
+ 'ext': 'mp4',
+ }],
+ })
+
+ def test_extract_jwplayer_data_realworld(self):
+ # from http://www.suffolk.edu/sjc/
+ expect_dict(
+ self,
+ self.ie._extract_jwplayer_data(r'''
+ <script type='text/javascript'>
+ jwplayer('my-video').setup({
+ file: 'rtmp://192.138.214.154/live/sjclive',
+ fallback: 'true',
+ width: '95%',
+ aspectratio: '16:9',
+ primary: 'flash',
+ mediaid:'XEgvuql4'
+ });
+ </script>
+ ''', None, require_title=False),
+ {
+ 'id': 'XEgvuql4',
+ 'formats': [{
+ 'url': 'rtmp://192.138.214.154/live/sjclive',
+ 'ext': 'flv'
+ }]
+ })
+
+ # from https://www.pornoxo.com/videos/7564/striptease-from-sexy-secretary/
+ expect_dict(
+ self,
+ self.ie._extract_jwplayer_data(r'''
+<script type="text/javascript">
+ jwplayer("mediaplayer").setup({
+ 'videoid': "7564",
+ 'width': "100%",
+ 'aspectratio': "16:9",
+ 'stretching': "exactfit",
+ 'autostart': 'false',
+ 'flashplayer': "https://t04.vipstreamservice.com/jwplayer/v5.10/player.swf",
+ 'file': "https://cdn.pornoxo.com/key=MF+oEbaxqTKb50P-w9G3nA,end=1489689259,ip=104.199.146.27/ip=104.199.146.27/speed=6573765/buffer=3.0/2009-12/4b2157147afe5efa93ce1978e0265289c193874e02597.flv",
+ 'image': "https://t03.vipstreamservice.com/thumbs/pxo-full/2009-12/14/a4b2157147afe5efa93ce1978e0265289c193874e02597.flv-full-13.jpg",
+ 'filefallback': "https://cdn.pornoxo.com/key=9ZPsTR5EvPLQrBaak2MUGA,end=1489689259,ip=104.199.146.27/ip=104.199.146.27/speed=6573765/buffer=3.0/2009-12/m_4b2157147afe5efa93ce1978e0265289c193874e02597.mp4",
+ 'logo.hide': true,
+ 'skin': "https://t04.vipstreamservice.com/jwplayer/skin/modieus-blk.zip",
+ 'plugins': "https://t04.vipstreamservice.com/jwplayer/dock/dockableskinnableplugin.swf",
+ 'dockableskinnableplugin.piclink': "/index.php?key=ajax-videothumbsn&vid=7564&data=2009-12--14--4b2157147afe5efa93ce1978e0265289c193874e02597.flv--17370",
+ 'controlbar': 'bottom',
+ 'modes': [
+ {type: 'flash', src: 'https://t04.vipstreamservice.com/jwplayer/v5.10/player.swf'}
+ ],
+ 'provider': 'http'
+ });
+ //noinspection JSAnnotator
+ invideo.setup({
+ adsUrl: "/banner-iframe/?zoneId=32",
+ adsUrl2: "",
+ autostart: false
+ });
+</script>
+ ''', 'dummy', require_title=False),
+ {
+ 'thumbnail': 'https://t03.vipstreamservice.com/thumbs/pxo-full/2009-12/14/a4b2157147afe5efa93ce1978e0265289c193874e02597.flv-full-13.jpg',
+ 'formats': [{
+ 'url': 'https://cdn.pornoxo.com/key=MF+oEbaxqTKb50P-w9G3nA,end=1489689259,ip=104.199.146.27/ip=104.199.146.27/speed=6573765/buffer=3.0/2009-12/4b2157147afe5efa93ce1978e0265289c193874e02597.flv',
+ 'ext': 'flv'
+ }]
+ })
+
+ # from http://www.indiedb.com/games/king-machine/videos
+ expect_dict(
+ self,
+ self.ie._extract_jwplayer_data(r'''
+<script>
+jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/\/www.indiedb.com\/","displaytitle":false,"autostart":false,"repeat":false,"title":"king machine trailer 1","sharing":{"link":"http:\/\/www.indiedb.com\/games\/king-machine\/videos\/king-machine-trailer-1","code":"<iframe width=\"560\" height=\"315\" src=\"http:\/\/www.indiedb.com\/media\/iframe\/1522983\" frameborder=\"0\" allowfullscreen><\/iframe><br><a href=\"http:\/\/www.indiedb.com\/games\/king-machine\/videos\/king-machine-trailer-1\">king machine trailer 1 - Indie DB<\/a>"},"related":{"file":"http:\/\/rss.indiedb.com\/media\/recommended\/1522983\/feed\/rss.xml","dimensions":"160x120","onclick":"link"},"sources":[{"file":"http:\/\/cdn.dbolical.com\/cache\/videos\/games\/1\/50\/49678\/encode_mp4\/king-machine-trailer.mp4","label":"360p SD","default":"true"},{"file":"http:\/\/cdn.dbolical.com\/cache\/videos\/games\/1\/50\/49678\/encode720p_mp4\/king-machine-trailer.mp4","label":"720p HD"}],"image":"http:\/\/media.indiedb.com\/cache\/images\/games\/1\/50\/49678\/thumb_620x2000\/king-machine-trailer.mp4.jpg","advertising":{"client":"vast","tag":"http:\/\/ads.intergi.com\/adrawdata\/3.0\/5205\/4251742\/0\/1013\/ADTECH;cors=yes;width=560;height=315;referring_url=http:\/\/www.indiedb.com\/games\/king-machine\/videos\/king-machine-trailer-1;content_url=http:\/\/www.indiedb.com\/games\/king-machine\/videos\/king-machine-trailer-1;media_id=1522983;title=king+machine+trailer+1;device=__DEVICE__;model=__MODEL__;os=Windows+OS;osversion=__OSVERSION__;ua=__UA__;ip=109.171.17.81;uniqueid=1522983;tags=__TAGS__;number=58cac25928151;time=1489683033"},"width":620,"height":349}).once("play", function(event) {
+ videoAnalytics("play");
+}).once("complete", function(event) {
+ videoAnalytics("completed");
+});
+</script>
+ ''', 'dummy'),
+ {
+ 'title': 'king machine trailer 1',
+ 'thumbnail': 'http://media.indiedb.com/cache/images/games/1/50/49678/thumb_620x2000/king-machine-trailer.mp4.jpg',
+ 'formats': [{
+ 'url': 'http://cdn.dbolical.com/cache/videos/games/1/50/49678/encode_mp4/king-machine-trailer.mp4',
+ 'height': 360,
+ 'ext': 'mp4'
+ }, {
+ 'url': 'http://cdn.dbolical.com/cache/videos/games/1/50/49678/encode720p_mp4/king-machine-trailer.mp4',
+ 'height': 720,
+ 'ext': 'mp4'
+ }]
+ })
+
+ def test_parse_m3u8_formats(self):
+ _TEST_CASES = [
+ (
+ # https://github.com/ytdl-org/youtube-dl/issues/11995
+ # http://teamcoco.com/video/clueless-gamer-super-bowl-for-honor
+ 'img_bipbop_adv_example_fmp4',
+ 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
+ [{
+ 'format_id': 'aud1-English',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/a1/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
+ 'language': 'en',
+ 'ext': 'mp4',
+ 'protocol': 'm3u8_native',
+ 'audio_ext': 'mp4',
+ }, {
+ 'format_id': 'aud2-English',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/a2/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
+ 'language': 'en',
+ 'ext': 'mp4',
+ 'protocol': 'm3u8_native',
+ 'audio_ext': 'mp4',
+ }, {
+ 'format_id': 'aud3-English',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/a3/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
+ 'language': 'en',
+ 'ext': 'mp4',
+ 'protocol': 'm3u8_native',
+ 'audio_ext': 'mp4',
+ }, {
+ 'format_id': '530',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v2/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
+ 'ext': 'mp4',
+ 'protocol': 'm3u8_native',
+ 'width': 480,
+ 'height': 270,
+ 'vcodec': 'avc1.640015',
+ }, {
+ 'format_id': '561',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v2/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
+ 'ext': 'mp4',
+ 'protocol': 'm3u8_native',
+ 'width': 480,
+ 'height': 270,
+ 'vcodec': 'avc1.640015',
+ }, {
+ 'format_id': '753',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v2/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
+ 'ext': 'mp4',
+ 'protocol': 'm3u8_native',
+ 'width': 480,
+ 'height': 270,
+ 'vcodec': 'avc1.640015',
+ }, {
+ 'format_id': '895',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v3/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
+ 'ext': 'mp4',
+ 'protocol': 'm3u8_native',
+ 'width': 640,
+ 'height': 360,
+ 'vcodec': 'avc1.64001e',
+ }, {
+ 'format_id': '926',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v3/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
+ 'ext': 'mp4',
+ 'protocol': 'm3u8_native',
+ 'width': 640,
+ 'height': 360,
+ 'vcodec': 'avc1.64001e',
+ }, {
+ 'format_id': '1118',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v3/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
+ 'ext': 'mp4',
+ 'protocol': 'm3u8_native',
+ 'width': 640,
+ 'height': 360,
+ 'vcodec': 'avc1.64001e',
+ }, {
+ 'format_id': '1265',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v4/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
+ 'ext': 'mp4',
+ 'protocol': 'm3u8_native',
+ 'width': 768,
+ 'height': 432,
+ 'vcodec': 'avc1.64001e',
+ }, {
+ 'format_id': '1295',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v4/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
+ 'ext': 'mp4',
+ 'protocol': 'm3u8_native',
+ 'width': 768,
+ 'height': 432,
+ 'vcodec': 'avc1.64001e',
+ }, {
+ 'format_id': '1487',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v4/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
+ 'ext': 'mp4',
+ 'protocol': 'm3u8_native',
+ 'width': 768,
+ 'height': 432,
+ 'vcodec': 'avc1.64001e',
+ }, {
+ 'format_id': '2168',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v5/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
+ 'ext': 'mp4',
+ 'protocol': 'm3u8_native',
+ 'width': 960,
+ 'height': 540,
+ 'vcodec': 'avc1.640020',
+ }, {
+ 'format_id': '2198',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v5/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
+ 'ext': 'mp4',
+ 'protocol': 'm3u8_native',
+ 'width': 960,
+ 'height': 540,
+ 'vcodec': 'avc1.640020',
+ }, {
+ 'format_id': '2390',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v5/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
+ 'ext': 'mp4',
+ 'protocol': 'm3u8_native',
+ 'width': 960,
+ 'height': 540,
+ 'vcodec': 'avc1.640020',
+ }, {
+ 'format_id': '3168',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v6/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
+ 'ext': 'mp4',
+ 'protocol': 'm3u8_native',
+ 'width': 1280,
+ 'height': 720,
+ 'vcodec': 'avc1.640020',
+ }, {
+ 'format_id': '3199',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v6/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
+ 'ext': 'mp4',
+ 'protocol': 'm3u8_native',
+ 'width': 1280,
+ 'height': 720,
+ 'vcodec': 'avc1.640020',
+ }, {
+ 'format_id': '3391',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v6/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
+ 'ext': 'mp4',
+ 'protocol': 'm3u8_native',
+ 'width': 1280,
+ 'height': 720,
+ 'vcodec': 'avc1.640020',
+ }, {
+ 'format_id': '4670',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v7/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
+ 'ext': 'mp4',
+ 'protocol': 'm3u8_native',
+ 'width': 1920,
+ 'height': 1080,
+ 'vcodec': 'avc1.64002a',
+ }, {
+ 'format_id': '4701',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v7/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
+ 'ext': 'mp4',
+ 'protocol': 'm3u8_native',
+ 'width': 1920,
+ 'height': 1080,
+ 'vcodec': 'avc1.64002a',
+ }, {
+ 'format_id': '4893',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v7/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
+ 'ext': 'mp4',
+ 'protocol': 'm3u8_native',
+ 'width': 1920,
+ 'height': 1080,
+ 'vcodec': 'avc1.64002a',
+ }, {
+ 'format_id': '6170',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v8/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
+ 'ext': 'mp4',
+ 'protocol': 'm3u8_native',
+ 'width': 1920,
+ 'height': 1080,
+ 'vcodec': 'avc1.64002a',
+ }, {
+ 'format_id': '6200',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v8/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
+ 'ext': 'mp4',
+ 'protocol': 'm3u8_native',
+ 'width': 1920,
+ 'height': 1080,
+ 'vcodec': 'avc1.64002a',
+ }, {
+ 'format_id': '6392',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v8/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
+ 'ext': 'mp4',
+ 'protocol': 'm3u8_native',
+ 'width': 1920,
+ 'height': 1080,
+ 'vcodec': 'avc1.64002a',
+ }, {
+ 'format_id': '7968',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v9/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
+ 'ext': 'mp4',
+ 'protocol': 'm3u8_native',
+ 'width': 1920,
+ 'height': 1080,
+ 'vcodec': 'avc1.64002a',
+ }, {
+ 'format_id': '7998',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v9/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
+ 'ext': 'mp4',
+ 'protocol': 'm3u8_native',
+ 'width': 1920,
+ 'height': 1080,
+ 'vcodec': 'avc1.64002a',
+ }, {
+ 'format_id': '8190',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v9/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
+ 'ext': 'mp4',
+ 'protocol': 'm3u8_native',
+ 'width': 1920,
+ 'height': 1080,
+ 'vcodec': 'avc1.64002a',
+ }],
+ {}
+ ),
+ (
+ 'bipbop_16x9',
+ 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8',
+ [{
+ 'format_id': 'bipbop_audio-BipBop Audio 2',
+ 'format_index': None,
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/alternate_audio_aac/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8',
+ 'language': 'eng',
+ 'ext': 'mp4',
+ 'protocol': 'm3u8_native',
+ 'preference': None,
+ 'quality': None,
+ 'vcodec': 'none',
+ 'audio_ext': 'mp4',
+ 'video_ext': 'none',
+ }, {
+ 'format_id': '41',
+ 'format_index': None,
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/gear0/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8',
+ 'tbr': 41.457,
+ 'ext': 'mp4',
+ 'fps': None,
+ 'protocol': 'm3u8_native',
+ 'preference': None,
+ 'quality': None,
+ 'vcodec': 'none',
+ 'acodec': 'mp4a.40.2',
+ 'audio_ext': 'mp4',
+ 'video_ext': 'none',
+ 'abr': 41.457,
+ }, {
+ 'format_id': '263',
+ 'format_index': None,
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/gear1/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8',
+ 'tbr': 263.851,
+ 'ext': 'mp4',
+ 'fps': None,
+ 'protocol': 'm3u8_native',
+ 'preference': None,
+ 'quality': None,
+ 'width': 416,
+ 'height': 234,
+ 'vcodec': 'avc1.4d400d',
+ 'acodec': 'mp4a.40.2',
+ 'video_ext': 'mp4',
+ 'audio_ext': 'none',
+ }, {
+ 'format_id': '577',
+ 'format_index': None,
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/gear2/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8',
+ 'tbr': 577.61,
+ 'ext': 'mp4',
+ 'fps': None,
+ 'protocol': 'm3u8_native',
+ 'preference': None,
+ 'quality': None,
+ 'width': 640,
+ 'height': 360,
+ 'vcodec': 'avc1.4d401e',
+ 'acodec': 'mp4a.40.2',
+ 'video_ext': 'mp4',
+ 'audio_ext': 'none',
+ }, {
+ 'format_id': '915',
+ 'format_index': None,
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/gear3/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8',
+ 'tbr': 915.905,
+ 'ext': 'mp4',
+ 'fps': None,
+ 'protocol': 'm3u8_native',
+ 'preference': None,
+ 'quality': None,
+ 'width': 960,
+ 'height': 540,
+ 'vcodec': 'avc1.4d401f',
+ 'acodec': 'mp4a.40.2',
+ 'video_ext': 'mp4',
+ 'audio_ext': 'none',
+ }, {
+ 'format_id': '1030',
+ 'format_index': None,
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/gear4/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8',
+ 'tbr': 1030.138,
+ 'ext': 'mp4',
+ 'fps': None,
+ 'protocol': 'm3u8_native',
+ 'preference': None,
+ 'quality': None,
+ 'width': 1280,
+ 'height': 720,
+ 'vcodec': 'avc1.4d401f',
+ 'acodec': 'mp4a.40.2',
+ 'video_ext': 'mp4',
+ 'audio_ext': 'none',
+ }, {
+ 'format_id': '1924',
+ 'format_index': None,
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/gear5/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8',
+ 'tbr': 1924.009,
+ 'ext': 'mp4',
+ 'fps': None,
+ 'protocol': 'm3u8_native',
+ 'preference': None,
+ 'quality': None,
+ 'width': 1920,
+ 'height': 1080,
+ 'vcodec': 'avc1.4d401f',
+ 'acodec': 'mp4a.40.2',
+ 'video_ext': 'mp4',
+ 'audio_ext': 'none',
+ }],
+ {
+ 'en': [{
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/eng/prog_index.m3u8',
+ 'ext': 'vtt',
+ 'protocol': 'm3u8_native'
+ }, {
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/eng_forced/prog_index.m3u8',
+ 'ext': 'vtt',
+ 'protocol': 'm3u8_native'
+ }],
+ 'fr': [{
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/fra/prog_index.m3u8',
+ 'ext': 'vtt',
+ 'protocol': 'm3u8_native'
+ }, {
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/fra_forced/prog_index.m3u8',
+ 'ext': 'vtt',
+ 'protocol': 'm3u8_native'
+ }],
+ 'es': [{
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/spa/prog_index.m3u8',
+ 'ext': 'vtt',
+ 'protocol': 'm3u8_native'
+ }, {
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/spa_forced/prog_index.m3u8',
+ 'ext': 'vtt',
+ 'protocol': 'm3u8_native'
+ }],
+ 'ja': [{
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/jpn/prog_index.m3u8',
+ 'ext': 'vtt',
+ 'protocol': 'm3u8_native'
+ }, {
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/jpn_forced/prog_index.m3u8',
+ 'ext': 'vtt',
+ 'protocol': 'm3u8_native'
+ }],
+ }
+ ),
+ ]
+
+ for m3u8_file, m3u8_url, expected_formats, expected_subs in _TEST_CASES:
+ with open('./test/testdata/m3u8/%s.m3u8' % m3u8_file, encoding='utf-8') as f:
+ formats, subs = self.ie._parse_m3u8_formats_and_subtitles(
+ f.read(), m3u8_url, ext='mp4')
+ self.ie._sort_formats(formats)
+ expect_value(self, formats, expected_formats, None)
+ expect_value(self, subs, expected_subs, None)
+
+ def test_parse_mpd_formats(self):
+ _TEST_CASES = [
+ (
+ # https://github.com/ytdl-org/youtube-dl/issues/13919
+ # Also tests duplicate representation ids, see
+ # https://github.com/ytdl-org/youtube-dl/issues/15111
+ 'float_duration',
+ 'http://unknown/manifest.mpd', # mpd_url
+ None, # mpd_base_url
+ [{
+ 'manifest_url': 'http://unknown/manifest.mpd',
+ 'ext': 'm4a',
+ 'format_id': '318597',
+ 'format_note': 'DASH audio',
+ 'protocol': 'http_dash_segments',
+ 'acodec': 'mp4a.40.2',
+ 'vcodec': 'none',
+ 'tbr': 61.587,
+ }, {
+ 'manifest_url': 'http://unknown/manifest.mpd',
+ 'ext': 'mp4',
+ 'format_id': '318597',
+ 'format_note': 'DASH video',
+ 'protocol': 'http_dash_segments',
+ 'acodec': 'none',
+ 'vcodec': 'avc1.42001f',
+ 'tbr': 318.597,
+ 'width': 340,
+ 'height': 192,
+ }, {
+ 'manifest_url': 'http://unknown/manifest.mpd',
+ 'ext': 'mp4',
+ 'format_id': '638590',
+ 'format_note': 'DASH video',
+ 'protocol': 'http_dash_segments',
+ 'acodec': 'none',
+ 'vcodec': 'avc1.42001f',
+ 'tbr': 638.59,
+ 'width': 512,
+ 'height': 288,
+ }, {
+ 'manifest_url': 'http://unknown/manifest.mpd',
+ 'ext': 'mp4',
+ 'format_id': '1022565',
+ 'format_note': 'DASH video',
+ 'protocol': 'http_dash_segments',
+ 'acodec': 'none',
+ 'vcodec': 'avc1.4d001f',
+ 'tbr': 1022.565,
+ 'width': 688,
+ 'height': 384,
+ }, {
+ 'manifest_url': 'http://unknown/manifest.mpd',
+ 'ext': 'mp4',
+ 'format_id': '2046506',
+ 'format_note': 'DASH video',
+ 'protocol': 'http_dash_segments',
+ 'acodec': 'none',
+ 'vcodec': 'avc1.4d001f',
+ 'tbr': 2046.506,
+ 'width': 1024,
+ 'height': 576,
+ }, {
+ 'manifest_url': 'http://unknown/manifest.mpd',
+ 'ext': 'mp4',
+ 'format_id': '3998017',
+ 'format_note': 'DASH video',
+ 'protocol': 'http_dash_segments',
+ 'acodec': 'none',
+ 'vcodec': 'avc1.640029',
+ 'tbr': 3998.017,
+ 'width': 1280,
+ 'height': 720,
+ }, {
+ 'manifest_url': 'http://unknown/manifest.mpd',
+ 'ext': 'mp4',
+ 'format_id': '5997485',
+ 'format_note': 'DASH video',
+ 'protocol': 'http_dash_segments',
+ 'acodec': 'none',
+ 'vcodec': 'avc1.640032',
+ 'tbr': 5997.485,
+ 'width': 1920,
+ 'height': 1080,
+ }],
+ {},
+ ), (
+ # https://github.com/ytdl-org/youtube-dl/pull/14844
+ 'urls_only',
+ 'http://unknown/manifest.mpd', # mpd_url
+ None, # mpd_base_url
+ [{
+ 'manifest_url': 'http://unknown/manifest.mpd',
+ 'ext': 'mp4',
+ 'format_id': 'h264_aac_144p_m4s',
+ 'format_note': 'DASH video',
+ 'protocol': 'http_dash_segments',
+ 'acodec': 'mp4a.40.2',
+ 'vcodec': 'avc3.42c01e',
+ 'tbr': 200,
+ 'width': 256,
+ 'height': 144,
+ }, {
+ 'manifest_url': 'http://unknown/manifest.mpd',
+ 'ext': 'mp4',
+ 'format_id': 'h264_aac_240p_m4s',
+ 'format_note': 'DASH video',
+ 'protocol': 'http_dash_segments',
+ 'acodec': 'mp4a.40.2',
+ 'vcodec': 'avc3.42c01e',
+ 'tbr': 400,
+ 'width': 424,
+ 'height': 240,
+ }, {
+ 'manifest_url': 'http://unknown/manifest.mpd',
+ 'ext': 'mp4',
+ 'format_id': 'h264_aac_360p_m4s',
+ 'format_note': 'DASH video',
+ 'protocol': 'http_dash_segments',
+ 'acodec': 'mp4a.40.2',
+ 'vcodec': 'avc3.42c01e',
+ 'tbr': 800,
+ 'width': 640,
+ 'height': 360,
+ }, {
+ 'manifest_url': 'http://unknown/manifest.mpd',
+ 'ext': 'mp4',
+ 'format_id': 'h264_aac_480p_m4s',
+ 'format_note': 'DASH video',
+ 'protocol': 'http_dash_segments',
+ 'acodec': 'mp4a.40.2',
+ 'vcodec': 'avc3.42c01e',
+ 'tbr': 1200,
+ 'width': 856,
+ 'height': 480,
+ }, {
+ 'manifest_url': 'http://unknown/manifest.mpd',
+ 'ext': 'mp4',
+ 'format_id': 'h264_aac_576p_m4s',
+ 'format_note': 'DASH video',
+ 'protocol': 'http_dash_segments',
+ 'acodec': 'mp4a.40.2',
+ 'vcodec': 'avc3.42c01e',
+ 'tbr': 1600,
+ 'width': 1024,
+ 'height': 576,
+ }, {
+ 'manifest_url': 'http://unknown/manifest.mpd',
+ 'ext': 'mp4',
+ 'format_id': 'h264_aac_720p_m4s',
+ 'format_note': 'DASH video',
+ 'protocol': 'http_dash_segments',
+ 'acodec': 'mp4a.40.2',
+ 'vcodec': 'avc3.42c01e',
+ 'tbr': 2400,
+ 'width': 1280,
+ 'height': 720,
+ }, {
+ 'manifest_url': 'http://unknown/manifest.mpd',
+ 'ext': 'mp4',
+ 'format_id': 'h264_aac_1080p_m4s',
+ 'format_note': 'DASH video',
+ 'protocol': 'http_dash_segments',
+ 'acodec': 'mp4a.40.2',
+ 'vcodec': 'avc3.42c01e',
+ 'tbr': 4400,
+ 'width': 1920,
+ 'height': 1080,
+ }],
+ {},
+ ), (
+ # https://github.com/ytdl-org/youtube-dl/issues/20346
+ # Media considered unfragmented even though it contains
+ # Initialization tag
+ 'unfragmented',
+ 'https://v.redd.it/hw1x7rcg7zl21/DASHPlaylist.mpd', # mpd_url
+ 'https://v.redd.it/hw1x7rcg7zl21', # mpd_base_url
+ [{
+ 'url': 'https://v.redd.it/hw1x7rcg7zl21/audio',
+ 'manifest_url': 'https://v.redd.it/hw1x7rcg7zl21/DASHPlaylist.mpd',
+ 'ext': 'm4a',
+ 'format_id': 'AUDIO-1',
+ 'format_note': 'DASH audio',
+ 'container': 'm4a_dash',
+ 'acodec': 'mp4a.40.2',
+ 'vcodec': 'none',
+ 'tbr': 129.87,
+ 'asr': 48000,
+
+ }, {
+ 'url': 'https://v.redd.it/hw1x7rcg7zl21/DASH_240',
+ 'manifest_url': 'https://v.redd.it/hw1x7rcg7zl21/DASHPlaylist.mpd',
+ 'ext': 'mp4',
+ 'format_id': 'VIDEO-2',
+ 'format_note': 'DASH video',
+ 'container': 'mp4_dash',
+ 'acodec': 'none',
+ 'vcodec': 'avc1.4d401e',
+ 'tbr': 608.0,
+ 'width': 240,
+ 'height': 240,
+ 'fps': 30,
+ }, {
+ 'url': 'https://v.redd.it/hw1x7rcg7zl21/DASH_360',
+ 'manifest_url': 'https://v.redd.it/hw1x7rcg7zl21/DASHPlaylist.mpd',
+ 'ext': 'mp4',
+ 'format_id': 'VIDEO-1',
+ 'format_note': 'DASH video',
+ 'container': 'mp4_dash',
+ 'acodec': 'none',
+ 'vcodec': 'avc1.4d401e',
+ 'tbr': 804.261,
+ 'width': 360,
+ 'height': 360,
+ 'fps': 30,
+ }],
+ {},
+ ), (
+ 'subtitles',
+ 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+ 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/',
+ [{
+ 'format_id': 'audio=128001',
+ 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+ 'ext': 'm4a',
+ 'tbr': 128.001,
+ 'asr': 48000,
+ 'format_note': 'DASH audio',
+ 'container': 'm4a_dash',
+ 'vcodec': 'none',
+ 'acodec': 'mp4a.40.2',
+ 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+ 'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
+ 'protocol': 'http_dash_segments',
+ 'audio_ext': 'm4a',
+ 'video_ext': 'none',
+ 'abr': 128.001,
+ }, {
+ 'format_id': 'video=100000',
+ 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+ 'ext': 'mp4',
+ 'width': 336,
+ 'height': 144,
+ 'tbr': 100,
+ 'format_note': 'DASH video',
+ 'container': 'mp4_dash',
+ 'vcodec': 'avc1.4D401F',
+ 'acodec': 'none',
+ 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+ 'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
+ 'protocol': 'http_dash_segments',
+ 'video_ext': 'mp4',
+ 'audio_ext': 'none',
+ 'vbr': 100,
+ }, {
+ 'format_id': 'video=326000',
+ 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+ 'ext': 'mp4',
+ 'width': 562,
+ 'height': 240,
+ 'tbr': 326,
+ 'format_note': 'DASH video',
+ 'container': 'mp4_dash',
+ 'vcodec': 'avc1.4D401F',
+ 'acodec': 'none',
+ 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+ 'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
+ 'protocol': 'http_dash_segments',
+ 'video_ext': 'mp4',
+ 'audio_ext': 'none',
+ 'vbr': 326,
+ }, {
+ 'format_id': 'video=698000',
+ 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+ 'ext': 'mp4',
+ 'width': 844,
+ 'height': 360,
+ 'tbr': 698,
+ 'format_note': 'DASH video',
+ 'container': 'mp4_dash',
+ 'vcodec': 'avc1.4D401F',
+ 'acodec': 'none',
+ 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+ 'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
+ 'protocol': 'http_dash_segments',
+ 'video_ext': 'mp4',
+ 'audio_ext': 'none',
+ 'vbr': 698,
+ }, {
+ 'format_id': 'video=1493000',
+ 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+ 'ext': 'mp4',
+ 'width': 1126,
+ 'height': 480,
+ 'tbr': 1493,
+ 'format_note': 'DASH video',
+ 'container': 'mp4_dash',
+ 'vcodec': 'avc1.4D401F',
+ 'acodec': 'none',
+ 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+ 'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
+ 'protocol': 'http_dash_segments',
+ 'video_ext': 'mp4',
+ 'audio_ext': 'none',
+ 'vbr': 1493,
+ }, {
+ 'format_id': 'video=4482000',
+ 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+ 'ext': 'mp4',
+ 'width': 1688,
+ 'height': 720,
+ 'tbr': 4482,
+ 'format_note': 'DASH video',
+ 'container': 'mp4_dash',
+ 'vcodec': 'avc1.4D401F',
+ 'acodec': 'none',
+ 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+ 'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
+ 'protocol': 'http_dash_segments',
+ 'video_ext': 'mp4',
+ 'audio_ext': 'none',
+ 'vbr': 4482,
+ }],
+ {
+ 'en': [
+ {
+ 'ext': 'mp4',
+ 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+ 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+ 'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
+ 'protocol': 'http_dash_segments',
+ }
+ ]
+ },
+ )
+ ]
+
+ for mpd_file, mpd_url, mpd_base_url, expected_formats, expected_subtitles in _TEST_CASES:
+ with open('./test/testdata/mpd/%s.mpd' % mpd_file, encoding='utf-8') as f:
+ formats, subtitles = self.ie._parse_mpd_formats_and_subtitles(
+ compat_etree_fromstring(f.read().encode()),
+ mpd_base_url=mpd_base_url, mpd_url=mpd_url)
+ self.ie._sort_formats(formats)
+ expect_value(self, formats, expected_formats, None)
+ expect_value(self, subtitles, expected_subtitles, None)
+
+ def test_parse_ism_formats(self):
+ _TEST_CASES = [
+ (
+ 'sintel',
+ 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
+ [{
+ 'format_id': 'audio-128',
+ 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
+ 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
+ 'ext': 'isma',
+ 'tbr': 128,
+ 'asr': 48000,
+ 'vcodec': 'none',
+ 'acodec': 'AACL',
+ 'protocol': 'ism',
+ 'audio_channels': 2,
+ '_download_params': {
+ 'stream_type': 'audio',
+ 'duration': 8880746666,
+ 'timescale': 10000000,
+ 'width': 0,
+ 'height': 0,
+ 'fourcc': 'AACL',
+ 'codec_private_data': '1190',
+ 'sampling_rate': 48000,
+ 'channels': 2,
+ 'bits_per_sample': 16,
+ 'nal_unit_length_field': 4
+ },
+ }, {
+ 'format_id': 'video-100',
+ 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
+ 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
+ 'ext': 'ismv',
+ 'width': 336,
+ 'height': 144,
+ 'tbr': 100,
+ 'vcodec': 'AVC1',
+ 'acodec': 'none',
+ 'protocol': 'ism',
+ '_download_params': {
+ 'stream_type': 'video',
+ 'duration': 8880746666,
+ 'timescale': 10000000,
+ 'width': 336,
+ 'height': 144,
+ 'fourcc': 'AVC1',
+ 'codec_private_data': '00000001674D401FDA0544EFFC2D002CBC40000003004000000C03C60CA80000000168EF32C8',
+ 'channels': 2,
+ 'bits_per_sample': 16,
+ 'nal_unit_length_field': 4
+ },
+ }, {
+ 'format_id': 'video-326',
+ 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
+ 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
+ 'ext': 'ismv',
+ 'width': 562,
+ 'height': 240,
+ 'tbr': 326,
+ 'vcodec': 'AVC1',
+ 'acodec': 'none',
+ 'protocol': 'ism',
+ '_download_params': {
+ 'stream_type': 'video',
+ 'duration': 8880746666,
+ 'timescale': 10000000,
+ 'width': 562,
+ 'height': 240,
+ 'fourcc': 'AVC1',
+ 'codec_private_data': '00000001674D401FDA0241FE23FFC3BC83BA44000003000400000300C03C60CA800000000168EF32C8',
+ 'channels': 2,
+ 'bits_per_sample': 16,
+ 'nal_unit_length_field': 4
+ },
+ }, {
+ 'format_id': 'video-698',
+ 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
+ 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
+ 'ext': 'ismv',
+ 'width': 844,
+ 'height': 360,
+ 'tbr': 698,
+ 'vcodec': 'AVC1',
+ 'acodec': 'none',
+ 'protocol': 'ism',
+ '_download_params': {
+ 'stream_type': 'video',
+ 'duration': 8880746666,
+ 'timescale': 10000000,
+ 'width': 844,
+ 'height': 360,
+ 'fourcc': 'AVC1',
+ 'codec_private_data': '00000001674D401FDA0350BFB97FF06AF06AD1000003000100000300300F1832A00000000168EF32C8',
+ 'channels': 2,
+ 'bits_per_sample': 16,
+ 'nal_unit_length_field': 4
+ },
+ }, {
+ 'format_id': 'video-1493',
+ 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
+ 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
+ 'ext': 'ismv',
+ 'width': 1126,
+ 'height': 480,
+ 'tbr': 1493,
+ 'vcodec': 'AVC1',
+ 'acodec': 'none',
+ 'protocol': 'ism',
+ '_download_params': {
+ 'stream_type': 'video',
+ 'duration': 8880746666,
+ 'timescale': 10000000,
+ 'width': 1126,
+ 'height': 480,
+ 'fourcc': 'AVC1',
+ 'codec_private_data': '00000001674D401FDA011C3DE6FFF0D890D871000003000100000300300F1832A00000000168EF32C8',
+ 'channels': 2,
+ 'bits_per_sample': 16,
+ 'nal_unit_length_field': 4
+ },
+ }, {
+ 'format_id': 'video-4482',
+ 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
+ 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
+ 'ext': 'ismv',
+ 'width': 1688,
+ 'height': 720,
+ 'tbr': 4482,
+ 'vcodec': 'AVC1',
+ 'acodec': 'none',
+ 'protocol': 'ism',
+ '_download_params': {
+ 'stream_type': 'video',
+ 'duration': 8880746666,
+ 'timescale': 10000000,
+ 'width': 1688,
+ 'height': 720,
+ 'fourcc': 'AVC1',
+ 'codec_private_data': '00000001674D401FDA01A816F97FFC1ABC1AB440000003004000000C03C60CA80000000168EF32C8',
+ 'channels': 2,
+ 'bits_per_sample': 16,
+ 'nal_unit_length_field': 4
+ },
+ }],
+ {
+ 'eng': [
+ {
+ 'ext': 'ismt',
+ 'protocol': 'ism',
+ 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
+ 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
+ '_download_params': {
+ 'stream_type': 'text',
+ 'duration': 8880746666,
+ 'timescale': 10000000,
+ 'fourcc': 'TTML',
+ 'codec_private_data': ''
+ }
+ }
+ ]
+ },
+ ),
+ (
+ 'ec-3_test',
+ 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
+ [{
+ 'format_id': 'audio_deu-127',
+ 'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
+ 'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
+ 'ext': 'isma',
+ 'tbr': 127,
+ 'asr': 48000,
+ 'vcodec': 'none',
+ 'acodec': 'AACL',
+ 'protocol': 'ism',
+ 'language': 'deu',
+ 'audio_channels': 2,
+ '_download_params': {
+ 'stream_type': 'audio',
+ 'duration': 370000000,
+ 'timescale': 10000000,
+ 'width': 0,
+ 'height': 0,
+ 'fourcc': 'AACL',
+ 'language': 'deu',
+ 'codec_private_data': '1190',
+ 'sampling_rate': 48000,
+ 'channels': 2,
+ 'bits_per_sample': 16,
+ 'nal_unit_length_field': 4
+ },
+ }, {
+ 'format_id': 'audio_deu_1-224',
+ 'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
+ 'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
+ 'ext': 'isma',
+ 'tbr': 224,
+ 'asr': 48000,
+ 'vcodec': 'none',
+ 'acodec': 'EC-3',
+ 'protocol': 'ism',
+ 'language': 'deu',
+ 'audio_channels': 6,
+ '_download_params': {
+ 'stream_type': 'audio',
+ 'duration': 370000000,
+ 'timescale': 10000000,
+ 'width': 0,
+ 'height': 0,
+ 'fourcc': 'EC-3',
+ 'language': 'deu',
+ 'codec_private_data': '00063F000000AF87FBA7022DFB42A4D405CD93843BDD0700200F00',
+ 'sampling_rate': 48000,
+ 'channels': 6,
+ 'bits_per_sample': 16,
+ 'nal_unit_length_field': 4
+ },
+ }, {
+ 'format_id': 'video_deu-23',
+ 'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
+ 'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
+ 'ext': 'ismv',
+ 'width': 384,
+ 'height': 216,
+ 'tbr': 23,
+ 'vcodec': 'AVC1',
+ 'acodec': 'none',
+ 'protocol': 'ism',
+ 'language': 'deu',
+ '_download_params': {
+ 'stream_type': 'video',
+ 'duration': 370000000,
+ 'timescale': 10000000,
+ 'width': 384,
+ 'height': 216,
+ 'fourcc': 'AVC1',
+ 'language': 'deu',
+ 'codec_private_data': '000000016742C00CDB06077E5C05A808080A00000300020000030009C0C02EE0177CC6300F142AE00000000168CA8DC8',
+ 'channels': 2,
+ 'bits_per_sample': 16,
+ 'nal_unit_length_field': 4
+ },
+ }, {
+ 'format_id': 'video_deu-403',
+ 'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
+ 'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
+ 'ext': 'ismv',
+ 'width': 400,
+ 'height': 224,
+ 'tbr': 403,
+ 'vcodec': 'AVC1',
+ 'acodec': 'none',
+ 'protocol': 'ism',
+ 'language': 'deu',
+ '_download_params': {
+ 'stream_type': 'video',
+ 'duration': 370000000,
+ 'timescale': 10000000,
+ 'width': 400,
+ 'height': 224,
+ 'fourcc': 'AVC1',
+ 'language': 'deu',
+ 'codec_private_data': '00000001674D4014E98323B602D4040405000003000100000300320F1429380000000168EAECF2',
+ 'channels': 2,
+ 'bits_per_sample': 16,
+ 'nal_unit_length_field': 4
+ },
+ }, {
+ 'format_id': 'video_deu-680',
+ 'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
+ 'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
+ 'ext': 'ismv',
+ 'width': 640,
+ 'height': 360,
+ 'tbr': 680,
+ 'vcodec': 'AVC1',
+ 'acodec': 'none',
+ 'protocol': 'ism',
+ 'language': 'deu',
+ '_download_params': {
+ 'stream_type': 'video',
+ 'duration': 370000000,
+ 'timescale': 10000000,
+ 'width': 640,
+ 'height': 360,
+ 'fourcc': 'AVC1',
+ 'language': 'deu',
+ 'codec_private_data': '00000001674D401EE981405FF2E02D4040405000000300100000030320F162D3800000000168EAECF2',
+ 'channels': 2,
+ 'bits_per_sample': 16,
+ 'nal_unit_length_field': 4
+ },
+ }, {
+ 'format_id': 'video_deu-1253',
+ 'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
+ 'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
+ 'ext': 'ismv',
+ 'width': 640,
+ 'height': 360,
+ 'tbr': 1253,
+ 'vcodec': 'AVC1',
+ 'acodec': 'none',
+ 'protocol': 'ism',
+ 'vbr': 1253,
+ 'language': 'deu',
+ '_download_params': {
+ 'stream_type': 'video',
+ 'duration': 370000000,
+ 'timescale': 10000000,
+ 'width': 640,
+ 'height': 360,
+ 'fourcc': 'AVC1',
+ 'language': 'deu',
+ 'codec_private_data': '00000001674D401EE981405FF2E02D4040405000000300100000030320F162D3800000000168EAECF2',
+ 'channels': 2,
+ 'bits_per_sample': 16,
+ 'nal_unit_length_field': 4
+ },
+ }, {
+ 'format_id': 'video_deu-2121',
+ 'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
+ 'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
+ 'ext': 'ismv',
+ 'width': 768,
+ 'height': 432,
+ 'tbr': 2121,
+ 'vcodec': 'AVC1',
+ 'acodec': 'none',
+ 'protocol': 'ism',
+ 'language': 'deu',
+ '_download_params': {
+ 'stream_type': 'video',
+ 'duration': 370000000,
+ 'timescale': 10000000,
+ 'width': 768,
+ 'height': 432,
+ 'fourcc': 'AVC1',
+ 'language': 'deu',
+ 'codec_private_data': '00000001674D401EECA0601BD80B50101014000003000400000300C83C58B6580000000168E93B3C80',
+ 'channels': 2,
+ 'bits_per_sample': 16,
+ 'nal_unit_length_field': 4
+ },
+ }, {
+ 'format_id': 'video_deu-3275',
+ 'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
+ 'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
+ 'ext': 'ismv',
+ 'width': 1280,
+ 'height': 720,
+ 'tbr': 3275,
+ 'vcodec': 'AVC1',
+ 'acodec': 'none',
+ 'protocol': 'ism',
+ 'language': 'deu',
+ '_download_params': {
+ 'stream_type': 'video',
+ 'duration': 370000000,
+ 'timescale': 10000000,
+ 'width': 1280,
+ 'height': 720,
+ 'fourcc': 'AVC1',
+ 'language': 'deu',
+ 'codec_private_data': '00000001674D4020ECA02802DD80B501010140000003004000000C83C60C65800000000168E93B3C80',
+ 'channels': 2,
+ 'bits_per_sample': 16,
+ 'nal_unit_length_field': 4
+ },
+ }, {
+ 'format_id': 'video_deu-5300',
+ 'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
+ 'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
+ 'ext': 'ismv',
+ 'width': 1920,
+ 'height': 1080,
+ 'tbr': 5300,
+ 'vcodec': 'AVC1',
+ 'acodec': 'none',
+ 'protocol': 'ism',
+ 'language': 'deu',
+ '_download_params': {
+ 'stream_type': 'video',
+ 'duration': 370000000,
+ 'timescale': 10000000,
+ 'width': 1920,
+ 'height': 1080,
+ 'fourcc': 'AVC1',
+ 'language': 'deu',
+ 'codec_private_data': '00000001674D4028ECA03C0113F2E02D4040405000000300100000030320F18319600000000168E93B3C80',
+ 'channels': 2,
+ 'bits_per_sample': 16,
+ 'nal_unit_length_field': 4
+ },
+ }, {
+ 'format_id': 'video_deu-8079',
+ 'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
+ 'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
+ 'ext': 'ismv',
+ 'width': 1920,
+ 'height': 1080,
+ 'tbr': 8079,
+ 'vcodec': 'AVC1',
+ 'acodec': 'none',
+ 'protocol': 'ism',
+ 'language': 'deu',
+ '_download_params': {
+ 'stream_type': 'video',
+ 'duration': 370000000,
+ 'timescale': 10000000,
+ 'width': 1920,
+ 'height': 1080,
+ 'fourcc': 'AVC1',
+ 'language': 'deu',
+ 'codec_private_data': '00000001674D4028ECA03C0113F2E02D4040405000000300100000030320F18319600000000168E93B3C80',
+ 'channels': 2,
+ 'bits_per_sample': 16,
+ 'nal_unit_length_field': 4
+ },
+ }],
+ {},
+ ),
+ ]
+
+ for ism_file, ism_url, expected_formats, expected_subtitles in _TEST_CASES:
+ with open('./test/testdata/ism/%s.Manifest' % ism_file, encoding='utf-8') as f:
+ formats, subtitles = self.ie._parse_ism_formats_and_subtitles(
+ compat_etree_fromstring(f.read().encode()), ism_url=ism_url)
+ self.ie._sort_formats(formats)
+ expect_value(self, formats, expected_formats, None)
+ expect_value(self, subtitles, expected_subtitles, None)
+
+ def test_parse_f4m_formats(self):
+ _TEST_CASES = [
+ (
+ # https://github.com/ytdl-org/youtube-dl/issues/14660
+ 'custom_base_url',
+ 'http://api.new.livestream.com/accounts/6115179/events/6764928/videos/144884262.f4m',
+ [{
+ 'manifest_url': 'http://api.new.livestream.com/accounts/6115179/events/6764928/videos/144884262.f4m',
+ 'ext': 'flv',
+ 'format_id': '2148',
+ 'protocol': 'f4m',
+ 'tbr': 2148,
+ 'width': 1280,
+ 'height': 720,
+ }]
+ ),
+ ]
+
+ for f4m_file, f4m_url, expected_formats in _TEST_CASES:
+ with open('./test/testdata/f4m/%s.f4m' % f4m_file, encoding='utf-8') as f:
+ formats = self.ie._parse_f4m_formats(
+ compat_etree_fromstring(f.read().encode()),
+ f4m_url, None)
+ self.ie._sort_formats(formats)
+ expect_value(self, formats, expected_formats, None)
+
+ def test_parse_xspf(self):
+ _TEST_CASES = [
+ (
+ 'foo_xspf',
+ 'https://example.org/src/foo_xspf.xspf',
+ [{
+ 'id': 'foo_xspf',
+ 'title': 'Pandemonium',
+ 'description': 'Visit http://bigbrother404.bandcamp.com',
+ 'duration': 202.416,
+ 'formats': [{
+ 'manifest_url': 'https://example.org/src/foo_xspf.xspf',
+ 'url': 'https://example.org/src/cd1/track%201.mp3',
+ }],
+ }, {
+ 'id': 'foo_xspf',
+ 'title': 'Final Cartridge (Nichico Twelve Remix)',
+ 'description': 'Visit http://bigbrother404.bandcamp.com',
+ 'duration': 255.857,
+ 'formats': [{
+ 'manifest_url': 'https://example.org/src/foo_xspf.xspf',
+ 'url': 'https://example.org/%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3',
+ }],
+ }, {
+ 'id': 'foo_xspf',
+ 'title': 'Rebuilding Nightingale',
+ 'description': 'Visit http://bigbrother404.bandcamp.com',
+ 'duration': 287.915,
+ 'formats': [{
+ 'manifest_url': 'https://example.org/src/foo_xspf.xspf',
+ 'url': 'https://example.org/src/track3.mp3',
+ }, {
+ 'manifest_url': 'https://example.org/src/foo_xspf.xspf',
+ 'url': 'https://example.com/track3.mp3',
+ }]
+ }]
+ ),
+ ]
+
+ for xspf_file, xspf_url, expected_entries in _TEST_CASES:
+ with open('./test/testdata/xspf/%s.xspf' % xspf_file, encoding='utf-8') as f:
+ entries = self.ie._parse_xspf(
+ compat_etree_fromstring(f.read().encode()),
+ xspf_file, xspf_url=xspf_url, xspf_base_url=xspf_url)
+ expect_value(self, entries, expected_entries, None)
+ for i in range(len(entries)):
+ expect_dict(self, entries[i], expected_entries[i])
+
+ def test_response_with_expected_status_returns_content(self):
+ # Checks for mitigations against the effects of
+ # <https://bugs.python.org/issue15002> that affect Python 3.4.1+, which
+ # manifest as `_download_webpage`, `_download_xml`, `_download_json`,
+ # or the underlying `_download_webpage_handle` returning no content
+ # when a response matches `expected_status`.
+
+ httpd = http.server.HTTPServer(
+ ('127.0.0.1', 0), InfoExtractorTestRequestHandler)
+ port = http_server_port(httpd)
+ server_thread = threading.Thread(target=httpd.serve_forever)
+ server_thread.daemon = True
+ server_thread.start()
+
+ (content, urlh) = self.ie._download_webpage_handle(
+ 'http://127.0.0.1:%d/teapot' % port, None,
+ expected_status=TEAPOT_RESPONSE_STATUS)
+ self.assertEqual(content, TEAPOT_RESPONSE_BODY)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py
new file mode 100644
index 0000000..6be47af
--- /dev/null
+++ b/test/test_YoutubeDL.py
@@ -0,0 +1,1346 @@
+#!/usr/bin/env python3
+
+# Allow direct execution
+import os
+import sys
+import unittest
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+import copy
+import json
+
+from test.helper import FakeYDL, assertRegexpMatches, try_rm
+from yt_dlp import YoutubeDL
+from yt_dlp.compat import compat_os_name
+from yt_dlp.extractor import YoutubeIE
+from yt_dlp.extractor.common import InfoExtractor
+from yt_dlp.postprocessor.common import PostProcessor
+from yt_dlp.utils import (
+ ExtractorError,
+ LazyList,
+ OnDemandPagedList,
+ int_or_none,
+ match_filter_func,
+)
+from yt_dlp.utils.traversal import traverse_obj
+
+TEST_URL = 'http://localhost/sample.mp4'
+
+
+class YDL(FakeYDL):
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.downloaded_info_dicts = []
+ self.msgs = []
+
+ def process_info(self, info_dict):
+ self.downloaded_info_dicts.append(info_dict.copy())
+
+ def to_screen(self, msg, *args, **kwargs):
+ self.msgs.append(msg)
+
+ def dl(self, *args, **kwargs):
+ assert False, 'Downloader must not be invoked for test_YoutubeDL'
+
+
+def _make_result(formats, **kwargs):
+ res = {
+ 'formats': formats,
+ 'id': 'testid',
+ 'title': 'testttitle',
+ 'extractor': 'testex',
+ 'extractor_key': 'TestEx',
+ 'webpage_url': 'http://example.com/watch?v=shenanigans',
+ }
+ res.update(**kwargs)
+ return res
+
+
+class TestFormatSelection(unittest.TestCase):
+ def test_prefer_free_formats(self):
+ # Same resolution => download webm
+ ydl = YDL()
+ ydl.params['prefer_free_formats'] = True
+ formats = [
+ {'ext': 'webm', 'height': 460, 'url': TEST_URL},
+ {'ext': 'mp4', 'height': 460, 'url': TEST_URL},
+ ]
+ info_dict = _make_result(formats)
+ ydl.sort_formats(info_dict)
+ ydl.process_ie_result(info_dict)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['ext'], 'webm')
+
+ # Different resolution => download best quality (mp4)
+ ydl = YDL()
+ ydl.params['prefer_free_formats'] = True
+ formats = [
+ {'ext': 'webm', 'height': 720, 'url': TEST_URL},
+ {'ext': 'mp4', 'height': 1080, 'url': TEST_URL},
+ ]
+ info_dict['formats'] = formats
+ ydl.sort_formats(info_dict)
+ ydl.process_ie_result(info_dict)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['ext'], 'mp4')
+
+ # No prefer_free_formats => prefer mp4 and webm
+ ydl = YDL()
+ ydl.params['prefer_free_formats'] = False
+ formats = [
+ {'ext': 'webm', 'height': 720, 'url': TEST_URL},
+ {'ext': 'mp4', 'height': 720, 'url': TEST_URL},
+ {'ext': 'flv', 'height': 720, 'url': TEST_URL},
+ ]
+ info_dict['formats'] = formats
+ ydl.sort_formats(info_dict)
+ ydl.process_ie_result(info_dict)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['ext'], 'mp4')
+
+ ydl = YDL()
+ ydl.params['prefer_free_formats'] = False
+ formats = [
+ {'ext': 'flv', 'height': 720, 'url': TEST_URL},
+ {'ext': 'webm', 'height': 720, 'url': TEST_URL},
+ ]
+ info_dict['formats'] = formats
+ ydl.sort_formats(info_dict)
+ ydl.process_ie_result(info_dict)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['ext'], 'webm')
+
+ def test_format_selection(self):
+ formats = [
+ {'format_id': '35', 'ext': 'mp4', 'preference': 0, 'url': TEST_URL},
+ {'format_id': 'example-with-dashes', 'ext': 'webm', 'preference': 1, 'url': TEST_URL},
+ {'format_id': '45', 'ext': 'webm', 'preference': 2, 'url': TEST_URL},
+ {'format_id': '47', 'ext': 'webm', 'preference': 3, 'url': TEST_URL},
+ {'format_id': '2', 'ext': 'flv', 'preference': 4, 'url': TEST_URL},
+ ]
+ info_dict = _make_result(formats)
+
+ def test(inp, *expected, multi=False):
+ ydl = YDL({
+ 'format': inp,
+ 'allow_multiple_video_streams': multi,
+ 'allow_multiple_audio_streams': multi,
+ })
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = map(lambda x: x['format_id'], ydl.downloaded_info_dicts)
+ self.assertEqual(list(downloaded), list(expected))
+
+ test('20/47', '47')
+ test('20/71/worst', '35')
+ test(None, '2')
+ test('webm/mp4', '47')
+ test('3gp/40/mp4', '35')
+ test('example-with-dashes', 'example-with-dashes')
+ test('all', '2', '47', '45', 'example-with-dashes', '35')
+ test('mergeall', '2+47+45+example-with-dashes+35', multi=True)
+ # See: https://github.com/yt-dlp/yt-dlp/pulls/8797
+ test('7_a/worst', '35')
+
+ def test_format_selection_audio(self):
+ formats = [
+ {'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL},
+ {'format_id': 'audio-mid', 'ext': 'webm', 'preference': 2, 'vcodec': 'none', 'url': TEST_URL},
+ {'format_id': 'audio-high', 'ext': 'flv', 'preference': 3, 'vcodec': 'none', 'url': TEST_URL},
+ {'format_id': 'vid', 'ext': 'mp4', 'preference': 4, 'url': TEST_URL},
+ ]
+ info_dict = _make_result(formats)
+
+ ydl = YDL({'format': 'bestaudio'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'audio-high')
+
+ ydl = YDL({'format': 'worstaudio'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'audio-low')
+
+ formats = [
+ {'format_id': 'vid-low', 'ext': 'mp4', 'preference': 1, 'url': TEST_URL},
+ {'format_id': 'vid-high', 'ext': 'mp4', 'preference': 2, 'url': TEST_URL},
+ ]
+ info_dict = _make_result(formats)
+
+ ydl = YDL({'format': 'bestaudio/worstaudio/best'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'vid-high')
+
+ def test_format_selection_audio_exts(self):
+ formats = [
+ {'format_id': 'mp3-64', 'ext': 'mp3', 'abr': 64, 'url': 'http://_', 'vcodec': 'none'},
+ {'format_id': 'ogg-64', 'ext': 'ogg', 'abr': 64, 'url': 'http://_', 'vcodec': 'none'},
+ {'format_id': 'aac-64', 'ext': 'aac', 'abr': 64, 'url': 'http://_', 'vcodec': 'none'},
+ {'format_id': 'mp3-32', 'ext': 'mp3', 'abr': 32, 'url': 'http://_', 'vcodec': 'none'},
+ {'format_id': 'aac-32', 'ext': 'aac', 'abr': 32, 'url': 'http://_', 'vcodec': 'none'},
+ ]
+
+ info_dict = _make_result(formats)
+ ydl = YDL({'format': 'best'})
+ ydl.sort_formats(info_dict)
+ ydl.process_ie_result(copy.deepcopy(info_dict))
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'aac-64')
+
+ ydl = YDL({'format': 'mp3'})
+ ydl.sort_formats(info_dict)
+ ydl.process_ie_result(copy.deepcopy(info_dict))
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'mp3-64')
+
+ ydl = YDL({'prefer_free_formats': True})
+ ydl.sort_formats(info_dict)
+ ydl.process_ie_result(copy.deepcopy(info_dict))
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'ogg-64')
+
+ def test_format_selection_video(self):
+ formats = [
+ {'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none', 'url': TEST_URL},
+ {'format_id': 'dash-video-high', 'ext': 'mp4', 'preference': 2, 'acodec': 'none', 'url': TEST_URL},
+ {'format_id': 'vid', 'ext': 'mp4', 'preference': 3, 'url': TEST_URL},
+ ]
+ info_dict = _make_result(formats)
+
+ ydl = YDL({'format': 'bestvideo'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'dash-video-high')
+
+ ydl = YDL({'format': 'worstvideo'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'dash-video-low')
+
+ ydl = YDL({'format': 'bestvideo[format_id^=dash][format_id$=low]'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'dash-video-low')
+
+ formats = [
+ {'format_id': 'vid-vcodec-dot', 'ext': 'mp4', 'preference': 1, 'vcodec': 'avc1.123456', 'acodec': 'none', 'url': TEST_URL},
+ ]
+ info_dict = _make_result(formats)
+
+ ydl = YDL({'format': 'bestvideo[vcodec=avc1.123456]'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'vid-vcodec-dot')
+
+ def test_format_selection_string_ops(self):
+ formats = [
+ {'format_id': 'abc-cba', 'ext': 'mp4', 'url': TEST_URL},
+ {'format_id': 'zxc-cxz', 'ext': 'webm', 'url': TEST_URL},
+ ]
+ info_dict = _make_result(formats)
+
+ # equals (=)
+ ydl = YDL({'format': '[format_id=abc-cba]'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'abc-cba')
+
+ # does not equal (!=)
+ ydl = YDL({'format': '[format_id!=abc-cba]'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'zxc-cxz')
+
+ ydl = YDL({'format': '[format_id!=abc-cba][format_id!=zxc-cxz]'})
+ self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
+
+ # starts with (^=)
+ ydl = YDL({'format': '[format_id^=abc]'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'abc-cba')
+
+ # does not start with (!^=)
+ ydl = YDL({'format': '[format_id!^=abc]'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'zxc-cxz')
+
+ ydl = YDL({'format': '[format_id!^=abc][format_id!^=zxc]'})
+ self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
+
+ # ends with ($=)
+ ydl = YDL({'format': '[format_id$=cba]'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'abc-cba')
+
+ # does not end with (!$=)
+ ydl = YDL({'format': '[format_id!$=cba]'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'zxc-cxz')
+
+ ydl = YDL({'format': '[format_id!$=cba][format_id!$=cxz]'})
+ self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
+
+ # contains (*=)
+ ydl = YDL({'format': '[format_id*=bc-cb]'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'abc-cba')
+
+ # does not contain (!*=)
+ ydl = YDL({'format': '[format_id!*=bc-cb]'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'zxc-cxz')
+
+ ydl = YDL({'format': '[format_id!*=abc][format_id!*=zxc]'})
+ self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
+
+ ydl = YDL({'format': '[format_id!*=-]'})
+ self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
+
+ def test_youtube_format_selection(self):
+ # FIXME: Rewrite in accordance with the new format sorting options
+ return
+
+ order = [
+ '38', '37', '46', '22', '45', '35', '44', '18', '34', '43', '6', '5', '17', '36', '13',
+ # Apple HTTP Live Streaming
+ '96', '95', '94', '93', '92', '132', '151',
+ # 3D
+ '85', '84', '102', '83', '101', '82', '100',
+ # Dash video
+ '137', '248', '136', '247', '135', '246',
+ '245', '244', '134', '243', '133', '242', '160',
+ # Dash audio
+ '141', '172', '140', '171', '139',
+ ]
+
+ def format_info(f_id):
+ info = YoutubeIE._formats[f_id].copy()
+
+ # XXX: In real cases InfoExtractor._parse_mpd_formats() fills up 'acodec'
+ # and 'vcodec', while in tests such information is incomplete since
+ # commit a6c2c24479e5f4827ceb06f64d855329c0a6f593
+ # test_YoutubeDL.test_youtube_format_selection is broken without
+ # this fix
+ if 'acodec' in info and 'vcodec' not in info:
+ info['vcodec'] = 'none'
+ elif 'vcodec' in info and 'acodec' not in info:
+ info['acodec'] = 'none'
+
+ info['format_id'] = f_id
+ info['url'] = 'url:' + f_id
+ return info
+ formats_order = [format_info(f_id) for f_id in order]
+
+ info_dict = _make_result(list(formats_order), extractor='youtube')
+ ydl = YDL({'format': 'bestvideo+bestaudio'})
+ ydl.sort_formats(info_dict)
+ ydl.process_ie_result(info_dict)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], '248+172')
+ self.assertEqual(downloaded['ext'], 'mp4')
+
+ info_dict = _make_result(list(formats_order), extractor='youtube')
+ ydl = YDL({'format': 'bestvideo[height>=999999]+bestaudio/best'})
+ ydl.sort_formats(info_dict)
+ ydl.process_ie_result(info_dict)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], '38')
+
+ info_dict = _make_result(list(formats_order), extractor='youtube')
+ ydl = YDL({'format': 'bestvideo/best,bestaudio'})
+ ydl.sort_formats(info_dict)
+ ydl.process_ie_result(info_dict)
+ downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
+ self.assertEqual(downloaded_ids, ['137', '141'])
+
+ info_dict = _make_result(list(formats_order), extractor='youtube')
+ ydl = YDL({'format': '(bestvideo[ext=mp4],bestvideo[ext=webm])+bestaudio'})
+ ydl.sort_formats(info_dict)
+ ydl.process_ie_result(info_dict)
+ downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
+ self.assertEqual(downloaded_ids, ['137+141', '248+141'])
+
+ info_dict = _make_result(list(formats_order), extractor='youtube')
+ ydl = YDL({'format': '(bestvideo[ext=mp4],bestvideo[ext=webm])[height<=720]+bestaudio'})
+ ydl.sort_formats(info_dict)
+ ydl.process_ie_result(info_dict)
+ downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
+ self.assertEqual(downloaded_ids, ['136+141', '247+141'])
+
+ info_dict = _make_result(list(formats_order), extractor='youtube')
+ ydl = YDL({'format': '(bestvideo[ext=none]/bestvideo[ext=webm])+bestaudio'})
+ ydl.sort_formats(info_dict)
+ ydl.process_ie_result(info_dict)
+ downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
+ self.assertEqual(downloaded_ids, ['248+141'])
+
+ for f1, f2 in zip(formats_order, formats_order[1:]):
+ info_dict = _make_result([f1, f2], extractor='youtube')
+ ydl = YDL({'format': 'best/bestvideo'})
+ ydl.sort_formats(info_dict)
+ ydl.process_ie_result(info_dict)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], f1['format_id'])
+
+ info_dict = _make_result([f2, f1], extractor='youtube')
+ ydl = YDL({'format': 'best/bestvideo'})
+ ydl.sort_formats(info_dict)
+ ydl.process_ie_result(info_dict)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], f1['format_id'])
+
+ def test_audio_only_extractor_format_selection(self):
+ # For extractors with incomplete formats (all formats are audio-only or
+ # video-only) best and worst should fallback to corresponding best/worst
+ # video-only or audio-only formats (as per
+ # https://github.com/ytdl-org/youtube-dl/pull/5556)
+ formats = [
+ {'format_id': 'low', 'ext': 'mp3', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL},
+ {'format_id': 'high', 'ext': 'mp3', 'preference': 2, 'vcodec': 'none', 'url': TEST_URL},
+ ]
+ info_dict = _make_result(formats)
+
+ ydl = YDL({'format': 'best'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'high')
+
+ ydl = YDL({'format': 'worst'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'low')
+
+ def test_format_not_available(self):
+ formats = [
+ {'format_id': 'regular', 'ext': 'mp4', 'height': 360, 'url': TEST_URL},
+ {'format_id': 'video', 'ext': 'mp4', 'height': 720, 'acodec': 'none', 'url': TEST_URL},
+ ]
+ info_dict = _make_result(formats)
+
+ # This must fail since complete video-audio format does not match filter
+ # and extractor does not provide incomplete only formats (i.e. only
+ # video-only or audio-only).
+ ydl = YDL({'format': 'best[height>360]'})
+ self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
+
+ def test_format_selection_issue_10083(self):
+ # See https://github.com/ytdl-org/youtube-dl/issues/10083
+ formats = [
+ {'format_id': 'regular', 'height': 360, 'url': TEST_URL},
+ {'format_id': 'video', 'height': 720, 'acodec': 'none', 'url': TEST_URL},
+ {'format_id': 'audio', 'vcodec': 'none', 'url': TEST_URL},
+ ]
+ info_dict = _make_result(formats)
+
+ ydl = YDL({'format': 'best[height>360]/bestvideo[height>360]+bestaudio'})
+ ydl.process_ie_result(info_dict.copy())
+ self.assertEqual(ydl.downloaded_info_dicts[0]['format_id'], 'video+audio')
+
+ def test_invalid_format_specs(self):
+ def assert_syntax_error(format_spec):
+ self.assertRaises(SyntaxError, YDL, {'format': format_spec})
+
+ assert_syntax_error('bestvideo,,best')
+ assert_syntax_error('+bestaudio')
+ assert_syntax_error('bestvideo+')
+ assert_syntax_error('/')
+ assert_syntax_error('[720<height]')
+
+ def test_format_filtering(self):
+ formats = [
+ {'format_id': 'A', 'filesize': 500, 'width': 1000},
+ {'format_id': 'B', 'filesize': 1000, 'width': 500},
+ {'format_id': 'C', 'filesize': 1000, 'width': 400},
+ {'format_id': 'D', 'filesize': 2000, 'width': 600},
+ {'format_id': 'E', 'filesize': 3000},
+ {'format_id': 'F'},
+ {'format_id': 'G', 'filesize': 1000000},
+ ]
+ for f in formats:
+ f['url'] = 'http://_/'
+ f['ext'] = 'unknown'
+ info_dict = _make_result(formats, _format_sort_fields=('id', ))
+
+ ydl = YDL({'format': 'best[filesize<3000]'})
+ ydl.process_ie_result(info_dict)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'D')
+
+ ydl = YDL({'format': 'best[filesize<=3000]'})
+ ydl.process_ie_result(info_dict)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'E')
+
+ ydl = YDL({'format': 'best[filesize <= ? 3000]'})
+ ydl.process_ie_result(info_dict)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'F')
+
+ ydl = YDL({'format': 'best [filesize = 1000] [width>450]'})
+ ydl.process_ie_result(info_dict)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'B')
+
+ ydl = YDL({'format': 'best [filesize = 1000] [width!=450]'})
+ ydl.process_ie_result(info_dict)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'C')
+
+ ydl = YDL({'format': '[filesize>?1]'})
+ ydl.process_ie_result(info_dict)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'G')
+
+ ydl = YDL({'format': '[filesize<1M]'})
+ ydl.process_ie_result(info_dict)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'E')
+
+ ydl = YDL({'format': '[filesize<1MiB]'})
+ ydl.process_ie_result(info_dict)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'G')
+
+ ydl = YDL({'format': 'all[width>=400][width<=600]'})
+ ydl.process_ie_result(info_dict)
+ downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
+ self.assertEqual(downloaded_ids, ['D', 'C', 'B'])
+
+ ydl = YDL({'format': 'best[height<40]'})
+ try:
+ ydl.process_ie_result(info_dict)
+ except ExtractorError:
+ pass
+ self.assertEqual(ydl.downloaded_info_dicts, [])
+
+ def test_default_format_spec(self):
+ ydl = YDL({'simulate': True})
+ self.assertEqual(ydl._default_format_spec({}), 'bestvideo*+bestaudio/best')
+
+ ydl = YDL({})
+ self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio')
+
+ ydl = YDL({'simulate': True})
+ self.assertEqual(ydl._default_format_spec({'is_live': True}), 'bestvideo*+bestaudio/best')
+
+ ydl = YDL({'outtmpl': '-'})
+ self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio')
+
+ ydl = YDL({})
+ self.assertEqual(ydl._default_format_spec({}, download=False), 'bestvideo*+bestaudio/best')
+ self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio')
+
+
+class TestYoutubeDL(unittest.TestCase):
+ def test_subtitles(self):
+ def s_formats(lang, autocaption=False):
+ return [{
+ 'ext': ext,
+ 'url': f'http://localhost/video.{lang}.{ext}',
+ '_auto': autocaption,
+ } for ext in ['vtt', 'srt', 'ass']]
+ subtitles = {l: s_formats(l) for l in ['en', 'fr', 'es']}
+ auto_captions = {l: s_formats(l, True) for l in ['it', 'pt', 'es']}
+ info_dict = {
+ 'id': 'test',
+ 'title': 'Test',
+ 'url': 'http://localhost/video.mp4',
+ 'subtitles': subtitles,
+ 'automatic_captions': auto_captions,
+ 'extractor': 'TEST',
+ 'webpage_url': 'http://example.com/watch?v=shenanigans',
+ }
+
+ def get_info(params={}):
+ params.setdefault('simulate', True)
+ ydl = YDL(params)
+ ydl.report_warning = lambda *args, **kargs: None
+ return ydl.process_video_result(info_dict, download=False)
+
+ result = get_info()
+ self.assertFalse(result.get('requested_subtitles'))
+ self.assertEqual(result['subtitles'], subtitles)
+ self.assertEqual(result['automatic_captions'], auto_captions)
+
+ result = get_info({'writesubtitles': True})
+ subs = result['requested_subtitles']
+ self.assertTrue(subs)
+ self.assertEqual(set(subs.keys()), {'en'})
+ self.assertTrue(subs['en'].get('data') is None)
+ self.assertEqual(subs['en']['ext'], 'ass')
+
+ result = get_info({'writesubtitles': True, 'subtitlesformat': 'foo/srt'})
+ subs = result['requested_subtitles']
+ self.assertEqual(subs['en']['ext'], 'srt')
+
+ result = get_info({'writesubtitles': True, 'subtitleslangs': ['es', 'fr', 'it']})
+ subs = result['requested_subtitles']
+ self.assertTrue(subs)
+ self.assertEqual(set(subs.keys()), {'es', 'fr'})
+
+ result = get_info({'writesubtitles': True, 'subtitleslangs': ['all', '-en']})
+ subs = result['requested_subtitles']
+ self.assertTrue(subs)
+ self.assertEqual(set(subs.keys()), {'es', 'fr'})
+
+ result = get_info({'writesubtitles': True, 'subtitleslangs': ['en', 'fr', '-en']})
+ subs = result['requested_subtitles']
+ self.assertTrue(subs)
+ self.assertEqual(set(subs.keys()), {'fr'})
+
+ result = get_info({'writesubtitles': True, 'subtitleslangs': ['-en', 'en']})
+ subs = result['requested_subtitles']
+ self.assertTrue(subs)
+ self.assertEqual(set(subs.keys()), {'en'})
+
+ result = get_info({'writesubtitles': True, 'subtitleslangs': ['e.+']})
+ subs = result['requested_subtitles']
+ self.assertTrue(subs)
+ self.assertEqual(set(subs.keys()), {'es', 'en'})
+
+ result = get_info({'writesubtitles': True, 'writeautomaticsub': True, 'subtitleslangs': ['es', 'pt']})
+ subs = result['requested_subtitles']
+ self.assertTrue(subs)
+ self.assertEqual(set(subs.keys()), {'es', 'pt'})
+ self.assertFalse(subs['es']['_auto'])
+ self.assertTrue(subs['pt']['_auto'])
+
+ result = get_info({'writeautomaticsub': True, 'subtitleslangs': ['es', 'pt']})
+ subs = result['requested_subtitles']
+ self.assertTrue(subs)
+ self.assertEqual(set(subs.keys()), {'es', 'pt'})
+ self.assertTrue(subs['es']['_auto'])
+ self.assertTrue(subs['pt']['_auto'])
+
+ def test_add_extra_info(self):
+ test_dict = {
+ 'extractor': 'Foo',
+ }
+ extra_info = {
+ 'extractor': 'Bar',
+ 'playlist': 'funny videos',
+ }
+ YDL.add_extra_info(test_dict, extra_info)
+ self.assertEqual(test_dict['extractor'], 'Foo')
+ self.assertEqual(test_dict['playlist'], 'funny videos')
+
+ outtmpl_info = {
+ 'id': '1234',
+ 'ext': 'mp4',
+ 'width': None,
+ 'height': 1080,
+ 'filesize': 1024,
+ 'title1': '$PATH',
+ 'title2': '%PATH%',
+ 'title3': 'foo/bar\\test',
+ 'title4': 'foo "bar" test',
+ 'title5': 'áéí 𝐀',
+ 'timestamp': 1618488000,
+ 'duration': 100000,
+ 'playlist_index': 1,
+ 'playlist_autonumber': 2,
+ '__last_playlist_index': 100,
+ 'n_entries': 10,
+ 'formats': [
+ {'id': 'id 1', 'height': 1080, 'width': 1920},
+ {'id': 'id 2', 'height': 720},
+ {'id': 'id 3'}
+ ]
+ }
+
+ def test_prepare_outtmpl_and_filename(self):
+ def test(tmpl, expected, *, info=None, **params):
+ params['outtmpl'] = tmpl
+ ydl = FakeYDL(params)
+ ydl._num_downloads = 1
+ self.assertEqual(ydl.validate_outtmpl(tmpl), None)
+
+ out = ydl.evaluate_outtmpl(tmpl, info or self.outtmpl_info)
+ fname = ydl.prepare_filename(info or self.outtmpl_info)
+
+ if not isinstance(expected, (list, tuple)):
+ expected = (expected, expected)
+ for (name, got), expect in zip((('outtmpl', out), ('filename', fname)), expected):
+ if callable(expect):
+ self.assertTrue(expect(got), f'Wrong {name} from {tmpl}')
+ elif expect is not None:
+ self.assertEqual(got, expect, f'Wrong {name} from {tmpl}')
+
+ # Side-effects
+ original_infodict = dict(self.outtmpl_info)
+ test('foo.bar', 'foo.bar')
+ original_infodict['epoch'] = self.outtmpl_info.get('epoch')
+ self.assertTrue(isinstance(original_infodict['epoch'], int))
+ test('%(epoch)d', int_or_none)
+ self.assertEqual(original_infodict, self.outtmpl_info)
+
+ # Auto-generated fields
+ test('%(id)s.%(ext)s', '1234.mp4')
+ test('%(duration_string)s', ('27:46:40', '27-46-40'))
+ test('%(resolution)s', '1080p')
+ test('%(playlist_index|)s', '001')
+ test('%(playlist_index&{}!)s', '1!')
+ test('%(playlist_autonumber)s', '02')
+ test('%(autonumber)s', '00001')
+ test('%(autonumber+2)03d', '005', autonumber_start=3)
+ test('%(autonumber)s', '001', autonumber_size=3)
+
+ # Escaping %
+ test('%', '%')
+ test('%%', '%')
+ test('%%%%', '%%')
+ test('%s', '%s')
+ test('%%%s', '%%s')
+ test('%d', '%d')
+ test('%abc%', '%abc%')
+ test('%%(width)06d.%(ext)s', '%(width)06d.mp4')
+ test('%%%(height)s', '%1080')
+ test('%(width)06d.%(ext)s', 'NA.mp4')
+ test('%(width)06d.%%(ext)s', 'NA.%(ext)s')
+ test('%%(width)06d.%(ext)s', '%(width)06d.mp4')
+
+ # ID sanitization
+ test('%(id)s', '_abcd', info={'id': '_abcd'})
+ test('%(some_id)s', '_abcd', info={'some_id': '_abcd'})
+ test('%(formats.0.id)s', '_abcd', info={'formats': [{'id': '_abcd'}]})
+ test('%(id)s', '-abcd', info={'id': '-abcd'})
+ test('%(id)s', '.abcd', info={'id': '.abcd'})
+ test('%(id)s', 'ab__cd', info={'id': 'ab__cd'})
+ test('%(id)s', ('ab:cd', 'ab:cd'), info={'id': 'ab:cd'})
+ test('%(id.0)s', '-', info={'id': '--'})
+
+ # Invalid templates
+ self.assertTrue(isinstance(YoutubeDL.validate_outtmpl('%(title)'), ValueError))
+ test('%(invalid@tmpl|def)s', 'none', outtmpl_na_placeholder='none')
+ test('%(..)s', 'NA')
+ test('%(formats.{id)s', 'NA')
+
+ # Entire info_dict
+ def expect_same_infodict(out):
+ got_dict = json.loads(out)
+ for info_field, expected in self.outtmpl_info.items():
+ self.assertEqual(got_dict.get(info_field), expected, info_field)
+ return True
+
+ test('%()j', (expect_same_infodict, None))
+
+ # NA placeholder
+ NA_TEST_OUTTMPL = '%(uploader_date)s-%(width)d-%(x|def)s-%(id)s.%(ext)s'
+ test(NA_TEST_OUTTMPL, 'NA-NA-def-1234.mp4')
+ test(NA_TEST_OUTTMPL, 'none-none-def-1234.mp4', outtmpl_na_placeholder='none')
+ test(NA_TEST_OUTTMPL, '--def-1234.mp4', outtmpl_na_placeholder='')
+ test('%(non_existent.0)s', 'NA')
+
+ # String formatting
+ FMT_TEST_OUTTMPL = '%%(height)%s.%%(ext)s'
+ test(FMT_TEST_OUTTMPL % 's', '1080.mp4')
+ test(FMT_TEST_OUTTMPL % 'd', '1080.mp4')
+ test(FMT_TEST_OUTTMPL % '6d', ' 1080.mp4')
+ test(FMT_TEST_OUTTMPL % '-6d', '1080 .mp4')
+ test(FMT_TEST_OUTTMPL % '06d', '001080.mp4')
+ test(FMT_TEST_OUTTMPL % ' 06d', ' 01080.mp4')
+ test(FMT_TEST_OUTTMPL % ' 06d', ' 01080.mp4')
+ test(FMT_TEST_OUTTMPL % '0 6d', ' 01080.mp4')
+ test(FMT_TEST_OUTTMPL % '0 6d', ' 01080.mp4')
+ test(FMT_TEST_OUTTMPL % ' 0 6d', ' 01080.mp4')
+
+ # Type casting
+ test('%(id)d', '1234')
+ test('%(height)c', '1')
+ test('%(ext)c', 'm')
+ test('%(id)d %(id)r', "1234 '1234'")
+ test('%(id)r %(height)r', "'1234' 1080")
+ test('%(title5)a %(height)a', (R"'\xe1\xe9\xed \U0001d400' 1080", None))
+ test('%(ext)s-%(ext|def)d', 'mp4-def')
+ test('%(width|0)04d', '0')
+ test('a%(width|b)d', 'ab', outtmpl_na_placeholder='none')
+
+ FORMATS = self.outtmpl_info['formats']
+
+ # Custom type casting
+ test('%(formats.:.id)l', 'id 1, id 2, id 3')
+ test('%(formats.:.id)#l', ('id 1\nid 2\nid 3', 'id 1 id 2 id 3'))
+ test('%(ext)l', 'mp4')
+ test('%(formats.:.id) 18l', ' id 1, id 2, id 3')
+ test('%(formats)j', (json.dumps(FORMATS), None))
+ test('%(formats)#j', (
+ json.dumps(FORMATS, indent=4),
+ json.dumps(FORMATS, indent=4).replace(':', ':').replace('"', """).replace('\n', ' ')
+ ))
+ test('%(title5).3B', 'á')
+ test('%(title5)U', 'áéí 𝐀')
+ test('%(title5)#U', 'a\u0301e\u0301i\u0301 𝐀')
+ test('%(title5)+U', 'áéí A')
+ test('%(title5)+#U', 'a\u0301e\u0301i\u0301 A')
+ test('%(height)D', '1k')
+ test('%(filesize)#D', '1Ki')
+ test('%(height)5.2D', ' 1.08k')
+ test('%(title4)#S', 'foo_bar_test')
+ test('%(title4).10S', ('foo "bar" ', 'foo "bar"' + ('#' if compat_os_name == 'nt' else ' ')))
+ if compat_os_name == 'nt':
+ test('%(title4)q', ('"foo ""bar"" test"', None))
+ test('%(formats.:.id)#q', ('"id 1" "id 2" "id 3"', None))
+ test('%(formats.0.id)#q', ('"id 1"', None))
+ else:
+ test('%(title4)q', ('\'foo "bar" test\'', '\'foo "bar" test\''))
+ test('%(formats.:.id)#q', "'id 1' 'id 2' 'id 3'")
+ test('%(formats.0.id)#q', "'id 1'")
+
+ # Internal formatting
+ test('%(timestamp-1000>%H-%M-%S)s', '11-43-20')
+ test('%(title|%)s %(title|%%)s', '% %%')
+ test('%(id+1-height+3)05d', '00158')
+ test('%(width+100)05d', 'NA')
+ test('%(filesize*8)d', '8192')
+ test('%(formats.0) 15s', ('% 15s' % FORMATS[0], None))
+ test('%(formats.0)r', (repr(FORMATS[0]), None))
+ test('%(height.0)03d', '001')
+ test('%(-height.0)04d', '-001')
+ test('%(formats.-1.id)s', FORMATS[-1]['id'])
+ test('%(formats.0.id.-1)d', FORMATS[0]['id'][-1])
+ test('%(formats.3)s', 'NA')
+ test('%(formats.:2:-1)r', repr(FORMATS[:2:-1]))
+ test('%(formats.0.id.-1+id)f', '1235.000000')
+ test('%(formats.0.id.-1+formats.1.id.-1)d', '3')
+ out = json.dumps([{'id': f['id'], 'height.:2': str(f['height'])[:2]}
+ if 'height' in f else {'id': f['id']}
+ for f in FORMATS])
+ test('%(formats.:.{id,height.:2})j', (out, None))
+ test('%(formats.:.{id,height}.id)l', ', '.join(f['id'] for f in FORMATS))
+ test('%(.{id,title})j', ('{"id": "1234"}', '{"id": "1234"}'))
+
+ # Alternates
+ test('%(title,id)s', '1234')
+ test('%(width-100,height+20|def)d', '1100')
+ test('%(width-100,height+width|def)s', 'def')
+ test('%(timestamp-x>%H\\,%M\\,%S,timestamp>%H\\,%M\\,%S)s', '12,00,00')
+
+ # Replacement
+ test('%(id&foo)s.bar', 'foo.bar')
+ test('%(title&foo)s.bar', 'NA.bar')
+ test('%(title&foo|baz)s.bar', 'baz.bar')
+ test('%(x,id&foo|baz)s.bar', 'foo.bar')
+ test('%(x,title&foo|baz)s.bar', 'baz.bar')
+ test('%(id&a\nb|)s', ('a\nb', 'a b'))
+ test('%(id&hi {:>10} {}|)s', 'hi 1234 1234')
+ test(R'%(id&{0} {}|)s', 'NA')
+ test(R'%(id&{0.1}|)s', 'NA')
+ test('%(height&{:,d})S', '1,080')
+
+ # Laziness
+ def gen():
+ yield from range(5)
+ raise self.assertTrue(False, 'LazyList should not be evaluated till here')
+ test('%(key.4)s', '4', info={'key': LazyList(gen())})
+
+ # Empty filename
+ test('%(foo|)s-%(bar|)s.%(ext)s', '-.mp4')
+ # test('%(foo|)s.%(ext)s', ('.mp4', '_.mp4')) # fixme
+ # test('%(foo|)s', ('', '_')) # fixme
+
+ # Environment variable expansion for prepare_filename
+ os.environ['__yt_dlp_var'] = 'expanded'
+ envvar = '%__yt_dlp_var%' if compat_os_name == 'nt' else '$__yt_dlp_var'
+ test(envvar, (envvar, 'expanded'))
+ if compat_os_name == 'nt':
+ test('%s%', ('%s%', '%s%'))
+ os.environ['s'] = 'expanded'
+ test('%s%', ('%s%', 'expanded')) # %s% should be expanded before escaping %s
+ os.environ['(test)s'] = 'expanded'
+ test('%(test)s%', ('NA%', 'expanded')) # Environment should take priority over template
+
+ # Path expansion and escaping
+ test('Hello %(title1)s', 'Hello $PATH')
+ test('Hello %(title2)s', 'Hello %PATH%')
+ test('%(title3)s', ('foo/bar\\test', 'foo⧸bar⧹test'))
+ test('folder/%(title3)s', ('folder/foo/bar\\test', 'folder%sfoo⧸bar⧹test' % os.path.sep))
+
+ def test_format_note(self):
+ ydl = YoutubeDL()
+ self.assertEqual(ydl._format_note({}), '')
+ assertRegexpMatches(self, ydl._format_note({
+ 'vbr': 10,
+ }), r'^\s*10k$')
+ assertRegexpMatches(self, ydl._format_note({
+ 'fps': 30,
+ }), r'^30fps$')
+
+ def test_postprocessors(self):
+ filename = 'post-processor-testfile.mp4'
+ audiofile = filename + '.mp3'
+
+ class SimplePP(PostProcessor):
+ def run(self, info):
+ with open(audiofile, 'w') as f:
+ f.write('EXAMPLE')
+ return [info['filepath']], info
+
+ def run_pp(params, PP):
+ with open(filename, 'w') as f:
+ f.write('EXAMPLE')
+ ydl = YoutubeDL(params)
+ ydl.add_post_processor(PP())
+ ydl.post_process(filename, {'filepath': filename})
+
+ run_pp({'keepvideo': True}, SimplePP)
+ self.assertTrue(os.path.exists(filename), '%s doesn\'t exist' % filename)
+ self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile)
+ os.unlink(filename)
+ os.unlink(audiofile)
+
+ run_pp({'keepvideo': False}, SimplePP)
+ self.assertFalse(os.path.exists(filename), '%s exists' % filename)
+ self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile)
+ os.unlink(audiofile)
+
+ class ModifierPP(PostProcessor):
+ def run(self, info):
+ with open(info['filepath'], 'w') as f:
+ f.write('MODIFIED')
+ return [], info
+
+ run_pp({'keepvideo': False}, ModifierPP)
+ self.assertTrue(os.path.exists(filename), '%s doesn\'t exist' % filename)
+ os.unlink(filename)
+
+ def test_match_filter(self):
+ first = {
+ 'id': '1',
+ 'url': TEST_URL,
+ 'title': 'one',
+ 'extractor': 'TEST',
+ 'duration': 30,
+ 'filesize': 10 * 1024,
+ 'playlist_id': '42',
+ 'uploader': "變態妍字幕版 太妍 тест",
+ 'creator': "тест ' 123 ' тест--",
+ 'webpage_url': 'http://example.com/watch?v=shenanigans',
+ }
+ second = {
+ 'id': '2',
+ 'url': TEST_URL,
+ 'title': 'two',
+ 'extractor': 'TEST',
+ 'duration': 10,
+ 'description': 'foo',
+ 'filesize': 5 * 1024,
+ 'playlist_id': '43',
+ 'uploader': "тест 123",
+ 'webpage_url': 'http://example.com/watch?v=SHENANIGANS',
+ }
+ videos = [first, second]
+
+ def get_videos(filter_=None):
+ ydl = YDL({'match_filter': filter_, 'simulate': True})
+ for v in videos:
+ ydl.process_ie_result(v.copy(), download=True)
+ return [v['id'] for v in ydl.downloaded_info_dicts]
+
+ res = get_videos()
+ self.assertEqual(res, ['1', '2'])
+
+ def f(v, incomplete):
+ if v['id'] == '1':
+ return None
+ else:
+ return 'Video id is not 1'
+ res = get_videos(f)
+ self.assertEqual(res, ['1'])
+
+ f = match_filter_func('duration < 30')
+ res = get_videos(f)
+ self.assertEqual(res, ['2'])
+
+ f = match_filter_func('description = foo')
+ res = get_videos(f)
+ self.assertEqual(res, ['2'])
+
+ f = match_filter_func('description =? foo')
+ res = get_videos(f)
+ self.assertEqual(res, ['1', '2'])
+
+ f = match_filter_func('filesize > 5KiB')
+ res = get_videos(f)
+ self.assertEqual(res, ['1'])
+
+ f = match_filter_func('playlist_id = 42')
+ res = get_videos(f)
+ self.assertEqual(res, ['1'])
+
+ f = match_filter_func('uploader = "變態妍字幕版 太妍 тест"')
+ res = get_videos(f)
+ self.assertEqual(res, ['1'])
+
+ f = match_filter_func('uploader != "變態妍字幕版 太妍 тест"')
+ res = get_videos(f)
+ self.assertEqual(res, ['2'])
+
+ f = match_filter_func('creator = "тест \' 123 \' тест--"')
+ res = get_videos(f)
+ self.assertEqual(res, ['1'])
+
+ f = match_filter_func("creator = 'тест \\' 123 \\' тест--'")
+ res = get_videos(f)
+ self.assertEqual(res, ['1'])
+
+ f = match_filter_func(r"creator = 'тест \' 123 \' тест--' & duration > 30")
+ res = get_videos(f)
+ self.assertEqual(res, [])
+
+ def test_playlist_items_selection(self):
+ INDICES, PAGE_SIZE = list(range(1, 11)), 3
+
+ def entry(i, evaluated):
+ evaluated.append(i)
+ return {
+ 'id': str(i),
+ 'title': str(i),
+ 'url': TEST_URL,
+ }
+
+ def pagedlist_entries(evaluated):
+ def page_func(n):
+ start = PAGE_SIZE * n
+ for i in INDICES[start: start + PAGE_SIZE]:
+ yield entry(i, evaluated)
+ return OnDemandPagedList(page_func, PAGE_SIZE)
+
+ def page_num(i):
+ return (i + PAGE_SIZE - 1) // PAGE_SIZE
+
+ def generator_entries(evaluated):
+ for i in INDICES:
+ yield entry(i, evaluated)
+
+ def list_entries(evaluated):
+ return list(generator_entries(evaluated))
+
+ def lazylist_entries(evaluated):
+ return LazyList(generator_entries(evaluated))
+
+ def get_downloaded_info_dicts(params, entries):
+ ydl = YDL(params)
+ ydl.process_ie_result({
+ '_type': 'playlist',
+ 'id': 'test',
+ 'extractor': 'test:playlist',
+ 'extractor_key': 'test:playlist',
+ 'webpage_url': 'http://example.com',
+ 'entries': entries,
+ })
+ return ydl.downloaded_info_dicts
+
+ def test_selection(params, expected_ids, evaluate_all=False):
+ expected_ids = list(expected_ids)
+ if evaluate_all:
+ generator_eval = pagedlist_eval = INDICES
+ elif not expected_ids:
+ generator_eval = pagedlist_eval = []
+ else:
+ generator_eval = INDICES[0: max(expected_ids)]
+ pagedlist_eval = INDICES[PAGE_SIZE * page_num(min(expected_ids)) - PAGE_SIZE:
+ PAGE_SIZE * page_num(max(expected_ids))]
+
+ for name, func, expected_eval in (
+ ('list', list_entries, INDICES),
+ ('Generator', generator_entries, generator_eval),
+ # ('LazyList', lazylist_entries, generator_eval), # Generator and LazyList follow the exact same code path
+ ('PagedList', pagedlist_entries, pagedlist_eval),
+ ):
+ evaluated = []
+ entries = func(evaluated)
+ results = [(v['playlist_autonumber'] - 1, (int(v['id']), v['playlist_index']))
+ for v in get_downloaded_info_dicts(params, entries)]
+ self.assertEqual(results, list(enumerate(zip(expected_ids, expected_ids))), f'Entries of {name} for {params}')
+ self.assertEqual(sorted(evaluated), expected_eval, f'Evaluation of {name} for {params}')
+
+ test_selection({}, INDICES)
+ test_selection({'playlistend': 20}, INDICES, True)
+ test_selection({'playlistend': 2}, INDICES[:2])
+ test_selection({'playliststart': 11}, [], True)
+ test_selection({'playliststart': 2}, INDICES[1:])
+ test_selection({'playlist_items': '2-4'}, INDICES[1:4])
+ test_selection({'playlist_items': '2,4'}, [2, 4])
+ test_selection({'playlist_items': '20'}, [], True)
+ test_selection({'playlist_items': '0'}, [])
+
+ # Tests for https://github.com/ytdl-org/youtube-dl/issues/10591
+ test_selection({'playlist_items': '2-4,3-4,3'}, [2, 3, 4])
+ test_selection({'playlist_items': '4,2'}, [4, 2])
+
+ # Tests for https://github.com/yt-dlp/yt-dlp/issues/720
+ # https://github.com/yt-dlp/yt-dlp/issues/302
+ test_selection({'playlistreverse': True}, INDICES[::-1])
+ test_selection({'playliststart': 2, 'playlistreverse': True}, INDICES[:0:-1])
+ test_selection({'playlist_items': '2,4', 'playlistreverse': True}, [4, 2])
+ test_selection({'playlist_items': '4,2'}, [4, 2])
+
+ # Tests for --playlist-items start:end:step
+ test_selection({'playlist_items': ':'}, INDICES, True)
+ test_selection({'playlist_items': '::1'}, INDICES, True)
+ test_selection({'playlist_items': '::-1'}, INDICES[::-1], True)
+ test_selection({'playlist_items': ':6'}, INDICES[:6])
+ test_selection({'playlist_items': ':-6'}, INDICES[:-5], True)
+ test_selection({'playlist_items': '-1:6:-2'}, INDICES[:4:-2], True)
+ test_selection({'playlist_items': '9:-6:-2'}, INDICES[8:3:-2], True)
+
+ test_selection({'playlist_items': '1:inf:2'}, INDICES[::2], True)
+ test_selection({'playlist_items': '-2:inf'}, INDICES[-2:], True)
+ test_selection({'playlist_items': ':inf:-1'}, [], True)
+ test_selection({'playlist_items': '0-2:2'}, [2])
+ test_selection({'playlist_items': '1-:2'}, INDICES[::2], True)
+ test_selection({'playlist_items': '0--2:2'}, INDICES[1:-1:2], True)
+
+ test_selection({'playlist_items': '10::3'}, [10], True)
+ test_selection({'playlist_items': '-1::3'}, [10], True)
+ test_selection({'playlist_items': '11::3'}, [], True)
+ test_selection({'playlist_items': '-15::2'}, INDICES[1::2], True)
+ test_selection({'playlist_items': '-15::15'}, [], True)
+
+ def test_do_not_override_ie_key_in_url_transparent(self):
+ ydl = YDL()
+
+ class Foo1IE(InfoExtractor):
+ _VALID_URL = r'foo1:'
+
+ def _real_extract(self, url):
+ return {
+ '_type': 'url_transparent',
+ 'url': 'foo2:',
+ 'ie_key': 'Foo2',
+ 'title': 'foo1 title',
+ 'id': 'foo1_id',
+ }
+
+ class Foo2IE(InfoExtractor):
+ _VALID_URL = r'foo2:'
+
+ def _real_extract(self, url):
+ return {
+ '_type': 'url',
+ 'url': 'foo3:',
+ 'ie_key': 'Foo3',
+ }
+
+ class Foo3IE(InfoExtractor):
+ _VALID_URL = r'foo3:'
+
+ def _real_extract(self, url):
+ return _make_result([{'url': TEST_URL}], title='foo3 title')
+
+ ydl.add_info_extractor(Foo1IE(ydl))
+ ydl.add_info_extractor(Foo2IE(ydl))
+ ydl.add_info_extractor(Foo3IE(ydl))
+ ydl.extract_info('foo1:')
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['url'], TEST_URL)
+ self.assertEqual(downloaded['title'], 'foo1 title')
+ self.assertEqual(downloaded['id'], 'testid')
+ self.assertEqual(downloaded['extractor'], 'testex')
+ self.assertEqual(downloaded['extractor_key'], 'TestEx')
+
+ # Test case for https://github.com/ytdl-org/youtube-dl/issues/27064
+ def test_ignoreerrors_for_playlist_with_url_transparent_iterable_entries(self):
+
+ class _YDL(YDL):
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+
+ def trouble(self, s, tb=None):
+ pass
+
+ ydl = _YDL({
+ 'format': 'extra',
+ 'ignoreerrors': True,
+ })
+
+ class VideoIE(InfoExtractor):
+ _VALID_URL = r'video:(?P<id>\d+)'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ formats = [{
+ 'format_id': 'default',
+ 'url': 'url:',
+ }]
+ if video_id == '0':
+ raise ExtractorError('foo')
+ if video_id == '2':
+ formats.append({
+ 'format_id': 'extra',
+ 'url': TEST_URL,
+ })
+ return {
+ 'id': video_id,
+ 'title': 'Video %s' % video_id,
+ 'formats': formats,
+ }
+
+ class PlaylistIE(InfoExtractor):
+ _VALID_URL = r'playlist:'
+
+ def _entries(self):
+ for n in range(3):
+ video_id = str(n)
+ yield {
+ '_type': 'url_transparent',
+ 'ie_key': VideoIE.ie_key(),
+ 'id': video_id,
+ 'url': 'video:%s' % video_id,
+ 'title': 'Video Transparent %s' % video_id,
+ }
+
+ def _real_extract(self, url):
+ return self.playlist_result(self._entries())
+
+ ydl.add_info_extractor(VideoIE(ydl))
+ ydl.add_info_extractor(PlaylistIE(ydl))
+ info = ydl.extract_info('playlist:')
+ entries = info['entries']
+ self.assertEqual(len(entries), 3)
+ self.assertTrue(entries[0] is None)
+ self.assertTrue(entries[1] is None)
+ self.assertEqual(len(ydl.downloaded_info_dicts), 1)
+ downloaded = ydl.downloaded_info_dicts[0]
+ entries[2].pop('requested_downloads', None)
+ self.assertEqual(entries[2], downloaded)
+ self.assertEqual(downloaded['url'], TEST_URL)
+ self.assertEqual(downloaded['title'], 'Video Transparent 2')
+ self.assertEqual(downloaded['id'], '2')
+ self.assertEqual(downloaded['extractor'], 'Video')
+ self.assertEqual(downloaded['extractor_key'], 'Video')
+
+ def test_header_cookies(self):
+ from http.cookiejar import Cookie
+
+ ydl = FakeYDL()
+ ydl.report_warning = lambda *_, **__: None
+
+ def cookie(name, value, version=None, domain='', path='', secure=False, expires=None):
+ return Cookie(
+ version or 0, name, value, None, False,
+ domain, bool(domain), bool(domain), path, bool(path),
+ secure, expires, False, None, None, rest={})
+
+ _test_url = 'https://yt.dlp/test'
+
+ def test(encoded_cookies, cookies, *, headers=False, round_trip=None, error_re=None):
+ def _test():
+ ydl.cookiejar.clear()
+ ydl._load_cookies(encoded_cookies, autoscope=headers)
+ if headers:
+ ydl._apply_header_cookies(_test_url)
+ data = {'url': _test_url}
+ ydl._calc_headers(data)
+ self.assertCountEqual(
+ map(vars, ydl.cookiejar), map(vars, cookies),
+ 'Extracted cookiejar.Cookie is not the same')
+ if not headers:
+ self.assertEqual(
+ data.get('cookies'), round_trip or encoded_cookies,
+ 'Cookie is not the same as round trip')
+ ydl.__dict__['_YoutubeDL__header_cookies'] = []
+
+ with self.subTest(msg=encoded_cookies):
+ if not error_re:
+ _test()
+ return
+ with self.assertRaisesRegex(Exception, error_re):
+ _test()
+
+ test('test=value; Domain=.yt.dlp', [cookie('test', 'value', domain='.yt.dlp')])
+ test('test=value', [cookie('test', 'value')], error_re=r'Unscoped cookies are not allowed')
+ test('cookie1=value1; Domain=.yt.dlp; Path=/test; cookie2=value2; Domain=.yt.dlp; Path=/', [
+ cookie('cookie1', 'value1', domain='.yt.dlp', path='/test'),
+ cookie('cookie2', 'value2', domain='.yt.dlp', path='/')])
+ test('test=value; Domain=.yt.dlp; Path=/test; Secure; Expires=9999999999', [
+ cookie('test', 'value', domain='.yt.dlp', path='/test', secure=True, expires=9999999999)])
+ test('test="value; "; path=/test; domain=.yt.dlp', [
+ cookie('test', 'value; ', domain='.yt.dlp', path='/test')],
+ round_trip='test="value\\073 "; Domain=.yt.dlp; Path=/test')
+ test('name=; Domain=.yt.dlp', [cookie('name', '', domain='.yt.dlp')],
+ round_trip='name=""; Domain=.yt.dlp')
+
+ test('test=value', [cookie('test', 'value', domain='.yt.dlp')], headers=True)
+ test('cookie1=value; Domain=.yt.dlp; cookie2=value', [], headers=True, error_re=r'Invalid syntax')
+ ydl.deprecated_feature = ydl.report_error
+ test('test=value', [], headers=True, error_re=r'Passing cookies as a header is a potential security risk')
+
+ def test_infojson_cookies(self):
+ TEST_FILE = 'test_infojson_cookies.info.json'
+ TEST_URL = 'https://example.com/example.mp4'
+ COOKIES = 'a=b; Domain=.example.com; c=d; Domain=.example.com'
+ COOKIE_HEADER = {'Cookie': 'a=b; c=d'}
+
+ ydl = FakeYDL()
+ ydl.process_info = lambda x: ydl._write_info_json('test', x, TEST_FILE)
+
+ def make_info(info_header_cookies=False, fmts_header_cookies=False, cookies_field=False):
+ fmt = {'url': TEST_URL}
+ if fmts_header_cookies:
+ fmt['http_headers'] = COOKIE_HEADER
+ if cookies_field:
+ fmt['cookies'] = COOKIES
+ return _make_result([fmt], http_headers=COOKIE_HEADER if info_header_cookies else None)
+
+ def test(initial_info, note):
+ result = {}
+ result['processed'] = ydl.process_ie_result(initial_info)
+ self.assertTrue(ydl.cookiejar.get_cookies_for_url(TEST_URL),
+ msg=f'No cookies set in cookiejar after initial process when {note}')
+ ydl.cookiejar.clear()
+ with open(TEST_FILE) as infojson:
+ result['loaded'] = ydl.sanitize_info(json.load(infojson), True)
+ result['final'] = ydl.process_ie_result(result['loaded'].copy(), download=False)
+ self.assertTrue(ydl.cookiejar.get_cookies_for_url(TEST_URL),
+ msg=f'No cookies set in cookiejar after final process when {note}')
+ ydl.cookiejar.clear()
+ for key in ('processed', 'loaded', 'final'):
+ info = result[key]
+ self.assertIsNone(
+ traverse_obj(info, ((None, ('formats', 0)), 'http_headers', 'Cookie'), casesense=False, get_all=False),
+ msg=f'Cookie header not removed in {key} result when {note}')
+ self.assertEqual(
+ traverse_obj(info, ((None, ('formats', 0)), 'cookies'), get_all=False), COOKIES,
+ msg=f'No cookies field found in {key} result when {note}')
+
+ test({'url': TEST_URL, 'http_headers': COOKIE_HEADER, 'id': '1', 'title': 'x'}, 'no formats field')
+ test(make_info(info_header_cookies=True), 'info_dict header cokies')
+ test(make_info(fmts_header_cookies=True), 'format header cookies')
+ test(make_info(info_header_cookies=True, fmts_header_cookies=True), 'info_dict and format header cookies')
+ test(make_info(info_header_cookies=True, fmts_header_cookies=True, cookies_field=True), 'all cookies fields')
+ test(make_info(cookies_field=True), 'cookies format field')
+ test({'url': TEST_URL, 'cookies': COOKIES, 'id': '1', 'title': 'x'}, 'info_dict cookies field only')
+
+ try_rm(TEST_FILE)
+
+ def test_add_headers_cookie(self):
+ def check_for_cookie_header(result):
+ return traverse_obj(result, ((None, ('formats', 0)), 'http_headers', 'Cookie'), casesense=False, get_all=False)
+
+ ydl = FakeYDL({'http_headers': {'Cookie': 'a=b'}})
+ ydl._apply_header_cookies(_make_result([])['webpage_url']) # Scope to input webpage URL: .example.com
+
+ fmt = {'url': 'https://example.com/video.mp4'}
+ result = ydl.process_ie_result(_make_result([fmt]), download=False)
+ self.assertIsNone(check_for_cookie_header(result), msg='http_headers cookies in result info_dict')
+ self.assertEqual(result.get('cookies'), 'a=b; Domain=.example.com', msg='No cookies were set in cookies field')
+ self.assertIn('a=b', ydl.cookiejar.get_cookie_header(fmt['url']), msg='No cookies were set in cookiejar')
+
+ fmt = {'url': 'https://wrong.com/video.mp4'}
+ result = ydl.process_ie_result(_make_result([fmt]), download=False)
+ self.assertIsNone(check_for_cookie_header(result), msg='http_headers cookies for wrong domain')
+ self.assertFalse(result.get('cookies'), msg='Cookies set in cookies field for wrong domain')
+ self.assertFalse(ydl.cookiejar.get_cookie_header(fmt['url']), msg='Cookies set in cookiejar for wrong domain')
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_YoutubeDLCookieJar.py b/test/test_YoutubeDLCookieJar.py
new file mode 100644
index 0000000..fdb9bae
--- /dev/null
+++ b/test/test_YoutubeDLCookieJar.py
@@ -0,0 +1,66 @@
+#!/usr/bin/env python3
+
+# Allow direct execution
+import os
+import sys
+import unittest
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+import re
+import tempfile
+
+from yt_dlp.cookies import YoutubeDLCookieJar
+
+
+class TestYoutubeDLCookieJar(unittest.TestCase):
+ def test_keep_session_cookies(self):
+ cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/session_cookies.txt')
+ cookiejar.load()
+ tf = tempfile.NamedTemporaryFile(delete=False)
+ try:
+ cookiejar.save(filename=tf.name)
+ temp = tf.read().decode()
+ self.assertTrue(re.search(
+ r'www\.foobar\.foobar\s+FALSE\s+/\s+TRUE\s+0\s+YoutubeDLExpiresEmpty\s+YoutubeDLExpiresEmptyValue', temp))
+ self.assertTrue(re.search(
+ r'www\.foobar\.foobar\s+FALSE\s+/\s+TRUE\s+0\s+YoutubeDLExpires0\s+YoutubeDLExpires0Value', temp))
+ finally:
+ tf.close()
+ os.remove(tf.name)
+
+ def test_strip_httponly_prefix(self):
+ cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/httponly_cookies.txt')
+ cookiejar.load()
+
+ def assert_cookie_has_value(key):
+ self.assertEqual(cookiejar._cookies['www.foobar.foobar']['/'][key].value, key + '_VALUE')
+
+ assert_cookie_has_value('HTTPONLY_COOKIE')
+ assert_cookie_has_value('JS_ACCESSIBLE_COOKIE')
+
+ def test_malformed_cookies(self):
+ cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/malformed_cookies.txt')
+ cookiejar.load()
+ # Cookies should be empty since all malformed cookie file entries
+ # will be ignored
+ self.assertFalse(cookiejar._cookies)
+
+ def test_get_cookie_header(self):
+ cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/httponly_cookies.txt')
+ cookiejar.load()
+ header = cookiejar.get_cookie_header('https://www.foobar.foobar')
+ self.assertIn('HTTPONLY_COOKIE', header)
+
+ def test_get_cookies_for_url(self):
+ cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/session_cookies.txt')
+ cookiejar.load()
+ cookies = cookiejar.get_cookies_for_url('https://www.foobar.foobar/')
+ self.assertEqual(len(cookies), 2)
+ cookies = cookiejar.get_cookies_for_url('https://foobar.foobar/')
+ self.assertFalse(cookies)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_aes.py b/test/test_aes.py
new file mode 100644
index 0000000..a26abfd
--- /dev/null
+++ b/test/test_aes.py
@@ -0,0 +1,152 @@
+#!/usr/bin/env python3
+
+# Allow direct execution
+import os
+import sys
+import unittest
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+import base64
+
+from yt_dlp.aes import (
+ aes_cbc_decrypt,
+ aes_cbc_decrypt_bytes,
+ aes_cbc_encrypt,
+ aes_ctr_decrypt,
+ aes_ctr_encrypt,
+ aes_decrypt,
+ aes_decrypt_text,
+ aes_ecb_decrypt,
+ aes_ecb_encrypt,
+ aes_encrypt,
+ aes_gcm_decrypt_and_verify,
+ aes_gcm_decrypt_and_verify_bytes,
+ key_expansion,
+ pad_block,
+)
+from yt_dlp.dependencies import Cryptodome
+from yt_dlp.utils import bytes_to_intlist, intlist_to_bytes
+
+# the encrypted data can be generate with 'devscripts/generate_aes_testdata.py'
+
+
+class TestAES(unittest.TestCase):
+ def setUp(self):
+ self.key = self.iv = [0x20, 0x15] + 14 * [0]
+ self.secret_msg = b'Secret message goes here'
+
+ def test_encrypt(self):
+ msg = b'message'
+ key = list(range(16))
+ encrypted = aes_encrypt(bytes_to_intlist(msg), key)
+ decrypted = intlist_to_bytes(aes_decrypt(encrypted, key))
+ self.assertEqual(decrypted, msg)
+
+ def test_cbc_decrypt(self):
+ data = b'\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6\x27\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd'
+ decrypted = intlist_to_bytes(aes_cbc_decrypt(bytes_to_intlist(data), self.key, self.iv))
+ self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
+ if Cryptodome.AES:
+ decrypted = aes_cbc_decrypt_bytes(data, intlist_to_bytes(self.key), intlist_to_bytes(self.iv))
+ self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
+
+ def test_cbc_encrypt(self):
+ data = bytes_to_intlist(self.secret_msg)
+ encrypted = intlist_to_bytes(aes_cbc_encrypt(data, self.key, self.iv))
+ self.assertEqual(
+ encrypted,
+ b'\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6\'\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd')
+
+ def test_ctr_decrypt(self):
+ data = bytes_to_intlist(b'\x03\xc7\xdd\xd4\x8e\xb3\xbc\x1a*O\xdc1\x12+8Aio\xd1z\xb5#\xaf\x08')
+ decrypted = intlist_to_bytes(aes_ctr_decrypt(data, self.key, self.iv))
+ self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
+
+ def test_ctr_encrypt(self):
+ data = bytes_to_intlist(self.secret_msg)
+ encrypted = intlist_to_bytes(aes_ctr_encrypt(data, self.key, self.iv))
+ self.assertEqual(
+ encrypted,
+ b'\x03\xc7\xdd\xd4\x8e\xb3\xbc\x1a*O\xdc1\x12+8Aio\xd1z\xb5#\xaf\x08')
+
+ def test_gcm_decrypt(self):
+ data = b'\x159Y\xcf5eud\x90\x9c\x85&]\x14\x1d\x0f.\x08\xb4T\xe4/\x17\xbd'
+ authentication_tag = b'\xe8&I\x80rI\x07\x9d}YWuU@:e'
+
+ decrypted = intlist_to_bytes(aes_gcm_decrypt_and_verify(
+ bytes_to_intlist(data), self.key, bytes_to_intlist(authentication_tag), self.iv[:12]))
+ self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
+ if Cryptodome.AES:
+ decrypted = aes_gcm_decrypt_and_verify_bytes(
+ data, intlist_to_bytes(self.key), authentication_tag, intlist_to_bytes(self.iv[:12]))
+ self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
+
+ def test_decrypt_text(self):
+ password = intlist_to_bytes(self.key).decode()
+ encrypted = base64.b64encode(
+ intlist_to_bytes(self.iv[:8])
+ + b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae'
+ ).decode()
+ decrypted = (aes_decrypt_text(encrypted, password, 16))
+ self.assertEqual(decrypted, self.secret_msg)
+
+ password = intlist_to_bytes(self.key).decode()
+ encrypted = base64.b64encode(
+ intlist_to_bytes(self.iv[:8])
+ + b'\x0b\xe6\xa4\xd9z\x0e\xb8\xb9\xd0\xd4i_\x85\x1d\x99\x98_\xe5\x80\xe7.\xbf\xa5\x83'
+ ).decode()
+ decrypted = (aes_decrypt_text(encrypted, password, 32))
+ self.assertEqual(decrypted, self.secret_msg)
+
+ def test_ecb_encrypt(self):
+ data = bytes_to_intlist(self.secret_msg)
+ encrypted = intlist_to_bytes(aes_ecb_encrypt(data, self.key))
+ self.assertEqual(
+ encrypted,
+ b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:')
+
+ def test_ecb_decrypt(self):
+ data = bytes_to_intlist(b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:')
+ decrypted = intlist_to_bytes(aes_ecb_decrypt(data, self.key, self.iv))
+ self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
+
+ def test_key_expansion(self):
+ key = '4f6bdaa39e2f8cb07f5e722d9edef314'
+
+ self.assertEqual(key_expansion(bytes_to_intlist(bytearray.fromhex(key))), [
+ 0x4F, 0x6B, 0xDA, 0xA3, 0x9E, 0x2F, 0x8C, 0xB0, 0x7F, 0x5E, 0x72, 0x2D, 0x9E, 0xDE, 0xF3, 0x14,
+ 0x53, 0x66, 0x20, 0xA8, 0xCD, 0x49, 0xAC, 0x18, 0xB2, 0x17, 0xDE, 0x35, 0x2C, 0xC9, 0x2D, 0x21,
+ 0x8C, 0xBE, 0xDD, 0xD9, 0x41, 0xF7, 0x71, 0xC1, 0xF3, 0xE0, 0xAF, 0xF4, 0xDF, 0x29, 0x82, 0xD5,
+ 0x2D, 0xAD, 0xDE, 0x47, 0x6C, 0x5A, 0xAF, 0x86, 0x9F, 0xBA, 0x00, 0x72, 0x40, 0x93, 0x82, 0xA7,
+ 0xF9, 0xBE, 0x82, 0x4E, 0x95, 0xE4, 0x2D, 0xC8, 0x0A, 0x5E, 0x2D, 0xBA, 0x4A, 0xCD, 0xAF, 0x1D,
+ 0x54, 0xC7, 0x26, 0x98, 0xC1, 0x23, 0x0B, 0x50, 0xCB, 0x7D, 0x26, 0xEA, 0x81, 0xB0, 0x89, 0xF7,
+ 0x93, 0x60, 0x4E, 0x94, 0x52, 0x43, 0x45, 0xC4, 0x99, 0x3E, 0x63, 0x2E, 0x18, 0x8E, 0xEA, 0xD9,
+ 0xCA, 0xE7, 0x7B, 0x39, 0x98, 0xA4, 0x3E, 0xFD, 0x01, 0x9A, 0x5D, 0xD3, 0x19, 0x14, 0xB7, 0x0A,
+ 0xB0, 0x4E, 0x1C, 0xED, 0x28, 0xEA, 0x22, 0x10, 0x29, 0x70, 0x7F, 0xC3, 0x30, 0x64, 0xC8, 0xC9,
+ 0xE8, 0xA6, 0xC1, 0xE9, 0xC0, 0x4C, 0xE3, 0xF9, 0xE9, 0x3C, 0x9C, 0x3A, 0xD9, 0x58, 0x54, 0xF3,
+ 0xB4, 0x86, 0xCC, 0xDC, 0x74, 0xCA, 0x2F, 0x25, 0x9D, 0xF6, 0xB3, 0x1F, 0x44, 0xAE, 0xE7, 0xEC])
+
+ def test_pad_block(self):
+ block = [0x21, 0xA0, 0x43, 0xFF]
+
+ self.assertEqual(pad_block(block, 'pkcs7'),
+ block + [0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C])
+
+ self.assertEqual(pad_block(block, 'iso7816'),
+ block + [0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00])
+
+ self.assertEqual(pad_block(block, 'whitespace'),
+ block + [0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20])
+
+ self.assertEqual(pad_block(block, 'zero'),
+ block + [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00])
+
+ block = list(range(16))
+ for mode in ('pkcs7', 'iso7816', 'whitespace', 'zero'):
+ self.assertEqual(pad_block(block, mode), block, mode)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_age_restriction.py b/test/test_age_restriction.py
new file mode 100644
index 0000000..6810759
--- /dev/null
+++ b/test/test_age_restriction.py
@@ -0,0 +1,55 @@
+#!/usr/bin/env python3
+
+# Allow direct execution
+import os
+import sys
+import unittest
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+from test.helper import is_download_test, try_rm
+from yt_dlp import YoutubeDL
+from yt_dlp.utils import DownloadError
+
+
+def _download_restricted(url, filename, age):
+ """ Returns true if the file has been downloaded """
+
+ params = {
+ 'age_limit': age,
+ 'skip_download': True,
+ 'writeinfojson': True,
+ 'outtmpl': '%(id)s.%(ext)s',
+ }
+ ydl = YoutubeDL(params)
+ ydl.add_default_info_extractors()
+ json_filename = os.path.splitext(filename)[0] + '.info.json'
+ try_rm(json_filename)
+ try:
+ ydl.download([url])
+ except DownloadError:
+ pass
+ else:
+ return os.path.exists(json_filename)
+ finally:
+ try_rm(json_filename)
+
+
+@is_download_test
+class TestAgeRestriction(unittest.TestCase):
+ def _assert_restricted(self, url, filename, age, old_age=None):
+ self.assertTrue(_download_restricted(url, filename, old_age))
+ self.assertFalse(_download_restricted(url, filename, age))
+
+ def test_youtube(self):
+ self._assert_restricted('HtVdAasjOgU', 'HtVdAasjOgU.mp4', 10)
+
+ def test_youporn(self):
+ self._assert_restricted(
+ 'https://www.youporn.com/watch/16715086/sex-ed-in-detention-18-asmr/',
+ '16715086.mp4', 2, old_age=25)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_all_urls.py b/test/test_all_urls.py
new file mode 100644
index 0000000..848c96f
--- /dev/null
+++ b/test/test_all_urls.py
@@ -0,0 +1,122 @@
+#!/usr/bin/env python3
+
+# Allow direct execution
+import os
+import sys
+import unittest
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+import collections
+
+from test.helper import gettestcases
+from yt_dlp.extractor import FacebookIE, YoutubeIE, gen_extractors
+
+
+class TestAllURLsMatching(unittest.TestCase):
+ def setUp(self):
+ self.ies = gen_extractors()
+
+ def matching_ies(self, url):
+ return [ie.IE_NAME for ie in self.ies if ie.suitable(url) and ie.IE_NAME != 'generic']
+
+ def assertMatch(self, url, ie_list):
+ self.assertEqual(self.matching_ies(url), ie_list)
+
+ def test_youtube_playlist_matching(self):
+ assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist'])
+ assertTab = lambda url: self.assertMatch(url, ['youtube:tab'])
+ assertPlaylist('ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
+ assertPlaylist('UUBABnxM4Ar9ten8Mdjj1j0Q') # 585
+ assertPlaylist('PL63F0C78739B09958')
+ assertTab('https://www.youtube.com/AsapSCIENCE')
+ assertTab('https://www.youtube.com/embedded')
+ assertTab('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
+ assertTab('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
+ assertTab('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') # 668
+ self.assertFalse('youtube:playlist' in self.matching_ies('PLtS2H6bU1M'))
+ # Top tracks
+ assertTab('https://www.youtube.com/playlist?list=MCUS.20142101')
+
+ def test_youtube_matching(self):
+ self.assertTrue(YoutubeIE.suitable('PLtS2H6bU1M'))
+ self.assertFalse(YoutubeIE.suitable('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) # 668
+ self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube'])
+ # self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube']) # /v/ is no longer valid
+ self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube'])
+ self.assertMatch('http://www.cleanvideosearch.com/media/action/yt/watch?videoId=8v_4O44sfjM', ['youtube'])
+
+ def test_youtube_channel_matching(self):
+ assertChannel = lambda url: self.assertMatch(url, ['youtube:tab'])
+ assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM')
+ assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec')
+ assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
+
+ def test_youtube_user_matching(self):
+ self.assertMatch('http://www.youtube.com/NASAgovVideo/videos', ['youtube:tab'])
+
+ def test_youtube_feeds(self):
+ self.assertMatch('https://www.youtube.com/feed/library', ['youtube:tab'])
+ self.assertMatch('https://www.youtube.com/feed/history', ['youtube:tab'])
+ self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:tab'])
+ self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:tab'])
+
+ def test_youtube_search_matching(self):
+ self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
+ self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
+
+ def test_facebook_matching(self):
+ self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
+ self.assertTrue(FacebookIE.suitable('https://www.facebook.com/cindyweather?fref=ts#!/photo.php?v=10152183998945793'))
+
+ def test_no_duplicates(self):
+ ies = gen_extractors()
+ for tc in gettestcases(include_onlymatching=True):
+ url = tc['url']
+ for ie in ies:
+ if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'):
+ self.assertTrue(ie.suitable(url), f'{type(ie).__name__} should match URL {url!r}')
+ else:
+ self.assertFalse(
+ ie.suitable(url),
+ f'{type(ie).__name__} should not match URL {url!r} . That URL belongs to {tc["name"]}.')
+
+ def test_keywords(self):
+ self.assertMatch(':ytsubs', ['youtube:subscriptions'])
+ self.assertMatch(':ytsubscriptions', ['youtube:subscriptions'])
+ self.assertMatch(':ythistory', ['youtube:history'])
+
+ def test_vimeo_matching(self):
+ self.assertMatch('https://vimeo.com/channels/tributes', ['vimeo:channel'])
+ self.assertMatch('https://vimeo.com/channels/31259', ['vimeo:channel'])
+ self.assertMatch('https://vimeo.com/channels/31259/53576664', ['vimeo'])
+ self.assertMatch('https://vimeo.com/user7108434', ['vimeo:user'])
+ self.assertMatch('https://vimeo.com/user7108434/videos', ['vimeo:user'])
+ self.assertMatch('https://vimeo.com/user21297594/review/75524534/3c257a1b5d', ['vimeo:review'])
+
+ # https://github.com/ytdl-org/youtube-dl/issues/1930
+ def test_soundcloud_not_matching_sets(self):
+ self.assertMatch('http://soundcloud.com/floex/sets/gone-ep', ['soundcloud:set'])
+
+ def test_tumblr(self):
+ self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes', ['Tumblr'])
+ self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430', ['Tumblr'])
+
+ def test_pbs(self):
+ # https://github.com/ytdl-org/youtube-dl/issues/2350
+ self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['pbs'])
+ self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['pbs'])
+
+ def test_no_duplicated_ie_names(self):
+ name_accu = collections.defaultdict(list)
+ for ie in self.ies:
+ name_accu[ie.IE_NAME.lower()].append(type(ie).__name__)
+ for (ie_name, ie_list) in name_accu.items():
+ self.assertEqual(
+ len(ie_list), 1,
+ f'Multiple extractors with the same IE_NAME "{ie_name}" ({", ".join(ie_list)})')
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_cache.py b/test/test_cache.py
new file mode 100644
index 0000000..ce1624b
--- /dev/null
+++ b/test/test_cache.py
@@ -0,0 +1,57 @@
+#!/usr/bin/env python3
+
+# Allow direct execution
+import os
+import sys
+import unittest
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+import shutil
+
+from test.helper import FakeYDL
+from yt_dlp.cache import Cache
+
+
+def _is_empty(d):
+ return not bool(os.listdir(d))
+
+
+def _mkdir(d):
+ if not os.path.exists(d):
+ os.mkdir(d)
+
+
+class TestCache(unittest.TestCase):
+ def setUp(self):
+ TEST_DIR = os.path.dirname(os.path.abspath(__file__))
+ TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata')
+ _mkdir(TESTDATA_DIR)
+ self.test_dir = os.path.join(TESTDATA_DIR, 'cache_test')
+ self.tearDown()
+
+ def tearDown(self):
+ if os.path.exists(self.test_dir):
+ shutil.rmtree(self.test_dir)
+
+ def test_cache(self):
+ ydl = FakeYDL({
+ 'cachedir': self.test_dir,
+ })
+ c = Cache(ydl)
+ obj = {'x': 1, 'y': ['ä', '\\a', True]}
+ self.assertEqual(c.load('test_cache', 'k.'), None)
+ c.store('test_cache', 'k.', obj)
+ self.assertEqual(c.load('test_cache', 'k2'), None)
+ self.assertFalse(_is_empty(self.test_dir))
+ self.assertEqual(c.load('test_cache', 'k.'), obj)
+ self.assertEqual(c.load('test_cache', 'y'), None)
+ self.assertEqual(c.load('test_cache2', 'k.'), None)
+ c.remove()
+ self.assertFalse(os.path.exists(self.test_dir))
+ self.assertEqual(c.load('test_cache', 'k.'), None)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_compat.py b/test/test_compat.py
new file mode 100644
index 0000000..71ca7f9
--- /dev/null
+++ b/test/test_compat.py
@@ -0,0 +1,105 @@
+#!/usr/bin/env python3
+
+# Allow direct execution
+import os
+import sys
+import unittest
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+import struct
+
+from yt_dlp import compat
+from yt_dlp.compat import urllib # isort: split
+from yt_dlp.compat import (
+ compat_etree_fromstring,
+ compat_expanduser,
+ compat_urllib_parse_unquote,
+ compat_urllib_parse_urlencode,
+)
+from yt_dlp.compat.urllib.request import getproxies
+
+
+class TestCompat(unittest.TestCase):
+ def test_compat_passthrough(self):
+ with self.assertWarns(DeprecationWarning):
+ compat.compat_basestring
+
+ with self.assertWarns(DeprecationWarning):
+ compat.WINDOWS_VT_MODE
+
+ self.assertEqual(urllib.request.getproxies, getproxies)
+
+ with self.assertWarns(DeprecationWarning):
+ compat.compat_pycrypto_AES # Must not raise error
+
+ def test_compat_expanduser(self):
+ old_home = os.environ.get('HOME')
+ test_str = R'C:\Documents and Settings\тест\Application Data'
+ try:
+ os.environ['HOME'] = test_str
+ self.assertEqual(compat_expanduser('~'), test_str)
+ finally:
+ os.environ['HOME'] = old_home or ''
+
+ def test_compat_urllib_parse_unquote(self):
+ self.assertEqual(compat_urllib_parse_unquote('abc%20def'), 'abc def')
+ self.assertEqual(compat_urllib_parse_unquote('%7e/abc+def'), '~/abc+def')
+ self.assertEqual(compat_urllib_parse_unquote(''), '')
+ self.assertEqual(compat_urllib_parse_unquote('%'), '%')
+ self.assertEqual(compat_urllib_parse_unquote('%%'), '%%')
+ self.assertEqual(compat_urllib_parse_unquote('%%%'), '%%%')
+ self.assertEqual(compat_urllib_parse_unquote('%2F'), '/')
+ self.assertEqual(compat_urllib_parse_unquote('%2f'), '/')
+ self.assertEqual(compat_urllib_parse_unquote('%E6%B4%A5%E6%B3%A2'), '津波')
+ self.assertEqual(
+ compat_urllib_parse_unquote('''<meta property="og:description" content="%E2%96%81%E2%96%82%E2%96%83%E2%96%84%25%E2%96%85%E2%96%86%E2%96%87%E2%96%88" />
+%<a href="https://ar.wikipedia.org/wiki/%D8%AA%D8%B3%D9%88%D9%86%D8%A7%D9%85%D9%8A">%a'''),
+ '''<meta property="og:description" content="▁▂▃▄%▅▆▇█" />
+%<a href="https://ar.wikipedia.org/wiki/تسونامي">%a''')
+ self.assertEqual(
+ compat_urllib_parse_unquote('''%28%5E%E2%97%A3_%E2%97%A2%5E%29%E3%81%A3%EF%B8%BB%E3%83%87%E2%95%90%E4%B8%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%86%B6%I%Break%25Things%'''),
+ '''(^◣_◢^)っ︻デ═一 ⇀ ⇀ ⇀ ⇀ ⇀ ↶%I%Break%Things%''')
+
+ def test_compat_urllib_parse_unquote_plus(self):
+ self.assertEqual(urllib.parse.unquote_plus('abc%20def'), 'abc def')
+ self.assertEqual(urllib.parse.unquote_plus('%7e/abc+def'), '~/abc def')
+
+ def test_compat_urllib_parse_urlencode(self):
+ self.assertEqual(compat_urllib_parse_urlencode({'abc': 'def'}), 'abc=def')
+ self.assertEqual(compat_urllib_parse_urlencode({'abc': b'def'}), 'abc=def')
+ self.assertEqual(compat_urllib_parse_urlencode({b'abc': 'def'}), 'abc=def')
+ self.assertEqual(compat_urllib_parse_urlencode({b'abc': b'def'}), 'abc=def')
+ self.assertEqual(compat_urllib_parse_urlencode([('abc', 'def')]), 'abc=def')
+ self.assertEqual(compat_urllib_parse_urlencode([('abc', b'def')]), 'abc=def')
+ self.assertEqual(compat_urllib_parse_urlencode([(b'abc', 'def')]), 'abc=def')
+ self.assertEqual(compat_urllib_parse_urlencode([(b'abc', b'def')]), 'abc=def')
+
+ def test_compat_etree_fromstring(self):
+ xml = '''
+ <root foo="bar" spam="中文">
+ <normal>foo</normal>
+ <chinese>中文</chinese>
+ <foo><bar>spam</bar></foo>
+ </root>
+ '''
+ doc = compat_etree_fromstring(xml.encode())
+ self.assertTrue(isinstance(doc.attrib['foo'], str))
+ self.assertTrue(isinstance(doc.attrib['spam'], str))
+ self.assertTrue(isinstance(doc.find('normal').text, str))
+ self.assertTrue(isinstance(doc.find('chinese').text, str))
+ self.assertTrue(isinstance(doc.find('foo/bar').text, str))
+
+ def test_compat_etree_fromstring_doctype(self):
+ xml = '''<?xml version="1.0"?>
+<!DOCTYPE smil PUBLIC "-//W3C//DTD SMIL 2.0//EN" "http://www.w3.org/2001/SMIL20/SMIL20.dtd">
+<smil xmlns="http://www.w3.org/2001/SMIL20/Language"></smil>'''
+ compat_etree_fromstring(xml)
+
+ def test_struct_unpack(self):
+ self.assertEqual(struct.unpack('!B', b'\x00'), (0,))
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_config.py b/test/test_config.py
new file mode 100644
index 0000000..a393b65
--- /dev/null
+++ b/test/test_config.py
@@ -0,0 +1,227 @@
+#!/usr/bin/env python3
+
+# Allow direct execution
+import os
+import sys
+import unittest
+import unittest.mock
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import contextlib
+import itertools
+from pathlib import Path
+
+from yt_dlp.compat import compat_expanduser
+from yt_dlp.options import create_parser, parseOpts
+from yt_dlp.utils import Config, get_executable_path
+
+ENVIRON_DEFAULTS = {
+ 'HOME': None,
+ 'XDG_CONFIG_HOME': '/_xdg_config_home/',
+ 'USERPROFILE': 'C:/Users/testing/',
+ 'APPDATA': 'C:/Users/testing/AppData/Roaming/',
+ 'HOMEDRIVE': 'C:/',
+ 'HOMEPATH': 'Users/testing/',
+}
+
+
+@contextlib.contextmanager
+def set_environ(**kwargs):
+ saved_environ = os.environ.copy()
+
+ for name, value in {**ENVIRON_DEFAULTS, **kwargs}.items():
+ if value is None:
+ os.environ.pop(name, None)
+ else:
+ os.environ[name] = value
+
+ yield
+
+ os.environ.clear()
+ os.environ.update(saved_environ)
+
+
+def _generate_expected_groups():
+ xdg_config_home = os.getenv('XDG_CONFIG_HOME') or compat_expanduser('~/.config')
+ appdata_dir = os.getenv('appdata')
+ home_dir = compat_expanduser('~')
+ return {
+ 'Portable': [
+ Path(get_executable_path(), 'yt-dlp.conf'),
+ ],
+ 'Home': [
+ Path('yt-dlp.conf'),
+ ],
+ 'User': [
+ Path(xdg_config_home, 'yt-dlp.conf'),
+ Path(xdg_config_home, 'yt-dlp', 'config'),
+ Path(xdg_config_home, 'yt-dlp', 'config.txt'),
+ *((
+ Path(appdata_dir, 'yt-dlp.conf'),
+ Path(appdata_dir, 'yt-dlp', 'config'),
+ Path(appdata_dir, 'yt-dlp', 'config.txt'),
+ ) if appdata_dir else ()),
+ Path(home_dir, 'yt-dlp.conf'),
+ Path(home_dir, 'yt-dlp.conf.txt'),
+ Path(home_dir, '.yt-dlp', 'config'),
+ Path(home_dir, '.yt-dlp', 'config.txt'),
+ ],
+ 'System': [
+ Path('/etc/yt-dlp.conf'),
+ Path('/etc/yt-dlp/config'),
+ Path('/etc/yt-dlp/config.txt'),
+ ]
+ }
+
+
+class TestConfig(unittest.TestCase):
+ maxDiff = None
+
+ @set_environ()
+ def test_config__ENVIRON_DEFAULTS_sanity(self):
+ expected = make_expected()
+ self.assertCountEqual(
+ set(expected), expected,
+ 'ENVIRON_DEFAULTS produces non unique names')
+
+ def test_config_all_environ_values(self):
+ for name, value in ENVIRON_DEFAULTS.items():
+ for new_value in (None, '', '.', value or '/some/dir'):
+ with set_environ(**{name: new_value}):
+ self._simple_grouping_test()
+
+ def test_config_default_expected_locations(self):
+ files, _ = self._simple_config_test()
+ self.assertEqual(
+ files, make_expected(),
+ 'Not all expected locations have been checked')
+
+ def test_config_default_grouping(self):
+ self._simple_grouping_test()
+
+ def _simple_grouping_test(self):
+ expected_groups = make_expected_groups()
+ for name, group in expected_groups.items():
+ for index, existing_path in enumerate(group):
+ result, opts = self._simple_config_test(existing_path)
+ expected = expected_from_expected_groups(expected_groups, existing_path)
+ self.assertEqual(
+ result, expected,
+ f'The checked locations do not match the expected ({name}, {index})')
+ self.assertEqual(
+ opts.outtmpl['default'], '1',
+ f'The used result value was incorrect ({name}, {index})')
+
+ def _simple_config_test(self, *stop_paths):
+ encountered = 0
+ paths = []
+
+ def read_file(filename, default=[]):
+ nonlocal encountered
+ path = Path(filename)
+ paths.append(path)
+ if path in stop_paths:
+ encountered += 1
+ return ['-o', f'{encountered}']
+
+ with ConfigMock(read_file):
+ _, opts, _ = parseOpts([], False)
+
+ return paths, opts
+
+ @set_environ()
+ def test_config_early_exit_commandline(self):
+ self._early_exit_test(0, '--ignore-config')
+
+ @set_environ()
+ def test_config_early_exit_files(self):
+ for index, _ in enumerate(make_expected(), 1):
+ self._early_exit_test(index)
+
+ def _early_exit_test(self, allowed_reads, *args):
+ reads = 0
+
+ def read_file(filename, default=[]):
+ nonlocal reads
+ reads += 1
+
+ if reads > allowed_reads:
+ self.fail('The remaining config was not ignored')
+ elif reads == allowed_reads:
+ return ['--ignore-config']
+
+ with ConfigMock(read_file):
+ parseOpts(args, False)
+
+ @set_environ()
+ def test_config_override_commandline(self):
+ self._override_test(0, '-o', 'pass')
+
+ @set_environ()
+ def test_config_override_files(self):
+ for index, _ in enumerate(make_expected(), 1):
+ self._override_test(index)
+
+ def _override_test(self, start_index, *args):
+ index = 0
+
+ def read_file(filename, default=[]):
+ nonlocal index
+ index += 1
+
+ if index > start_index:
+ return ['-o', 'fail']
+ elif index == start_index:
+ return ['-o', 'pass']
+
+ with ConfigMock(read_file):
+ _, opts, _ = parseOpts(args, False)
+
+ self.assertEqual(
+ opts.outtmpl['default'], 'pass',
+ 'The earlier group did not override the later ones')
+
+
+@contextlib.contextmanager
+def ConfigMock(read_file=None):
+ with unittest.mock.patch('yt_dlp.options.Config') as mock:
+ mock.return_value = Config(create_parser())
+ if read_file is not None:
+ mock.read_file = read_file
+
+ yield mock
+
+
+def make_expected(*filepaths):
+ return expected_from_expected_groups(_generate_expected_groups(), *filepaths)
+
+
+def make_expected_groups(*filepaths):
+ return _filter_expected_groups(_generate_expected_groups(), filepaths)
+
+
+def expected_from_expected_groups(expected_groups, *filepaths):
+ return list(itertools.chain.from_iterable(
+ _filter_expected_groups(expected_groups, filepaths).values()))
+
+
+def _filter_expected_groups(expected, filepaths):
+ if not filepaths:
+ return expected
+
+ result = {}
+ for group, paths in expected.items():
+ new_paths = []
+ for path in paths:
+ new_paths.append(path)
+ if path in filepaths:
+ break
+
+ result[group] = new_paths
+
+ return result
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_cookies.py b/test/test_cookies.py
new file mode 100644
index 0000000..5282ef6
--- /dev/null
+++ b/test/test_cookies.py
@@ -0,0 +1,306 @@
+import unittest
+from datetime import datetime, timezone
+
+from yt_dlp import cookies
+from yt_dlp.cookies import (
+ LenientSimpleCookie,
+ LinuxChromeCookieDecryptor,
+ MacChromeCookieDecryptor,
+ WindowsChromeCookieDecryptor,
+ _get_linux_desktop_environment,
+ _LinuxDesktopEnvironment,
+ parse_safari_cookies,
+ pbkdf2_sha1,
+)
+
+
+class Logger:
+ def debug(self, message, *args, **kwargs):
+ print(f'[verbose] {message}')
+
+ def info(self, message, *args, **kwargs):
+ print(message)
+
+ def warning(self, message, *args, **kwargs):
+ self.error(message)
+
+ def error(self, message, *args, **kwargs):
+ raise Exception(message)
+
+
+class MonkeyPatch:
+ def __init__(self, module, temporary_values):
+ self._module = module
+ self._temporary_values = temporary_values
+ self._backup_values = {}
+
+ def __enter__(self):
+ for name, temp_value in self._temporary_values.items():
+ self._backup_values[name] = getattr(self._module, name)
+ setattr(self._module, name, temp_value)
+
+ def __exit__(self, exc_type, exc_val, exc_tb):
+ for name, backup_value in self._backup_values.items():
+ setattr(self._module, name, backup_value)
+
+
+class TestCookies(unittest.TestCase):
+ def test_get_desktop_environment(self):
+ """ based on https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util_unittest.cc """
+ test_cases = [
+ ({}, _LinuxDesktopEnvironment.OTHER),
+ ({'DESKTOP_SESSION': 'my_custom_de'}, _LinuxDesktopEnvironment.OTHER),
+ ({'XDG_CURRENT_DESKTOP': 'my_custom_de'}, _LinuxDesktopEnvironment.OTHER),
+
+ ({'DESKTOP_SESSION': 'gnome'}, _LinuxDesktopEnvironment.GNOME),
+ ({'DESKTOP_SESSION': 'mate'}, _LinuxDesktopEnvironment.GNOME),
+ ({'DESKTOP_SESSION': 'kde4'}, _LinuxDesktopEnvironment.KDE4),
+ ({'DESKTOP_SESSION': 'kde'}, _LinuxDesktopEnvironment.KDE3),
+ ({'DESKTOP_SESSION': 'xfce'}, _LinuxDesktopEnvironment.XFCE),
+
+ ({'GNOME_DESKTOP_SESSION_ID': 1}, _LinuxDesktopEnvironment.GNOME),
+ ({'KDE_FULL_SESSION': 1}, _LinuxDesktopEnvironment.KDE3),
+ ({'KDE_FULL_SESSION': 1, 'DESKTOP_SESSION': 'kde4'}, _LinuxDesktopEnvironment.KDE4),
+
+ ({'XDG_CURRENT_DESKTOP': 'X-Cinnamon'}, _LinuxDesktopEnvironment.CINNAMON),
+ ({'XDG_CURRENT_DESKTOP': 'Deepin'}, _LinuxDesktopEnvironment.DEEPIN),
+ ({'XDG_CURRENT_DESKTOP': 'GNOME'}, _LinuxDesktopEnvironment.GNOME),
+ ({'XDG_CURRENT_DESKTOP': 'GNOME:GNOME-Classic'}, _LinuxDesktopEnvironment.GNOME),
+ ({'XDG_CURRENT_DESKTOP': 'GNOME : GNOME-Classic'}, _LinuxDesktopEnvironment.GNOME),
+
+ ({'XDG_CURRENT_DESKTOP': 'Unity', 'DESKTOP_SESSION': 'gnome-fallback'}, _LinuxDesktopEnvironment.GNOME),
+ ({'XDG_CURRENT_DESKTOP': 'KDE', 'KDE_SESSION_VERSION': '5'}, _LinuxDesktopEnvironment.KDE5),
+ ({'XDG_CURRENT_DESKTOP': 'KDE', 'KDE_SESSION_VERSION': '6'}, _LinuxDesktopEnvironment.KDE6),
+ ({'XDG_CURRENT_DESKTOP': 'KDE'}, _LinuxDesktopEnvironment.KDE4),
+ ({'XDG_CURRENT_DESKTOP': 'Pantheon'}, _LinuxDesktopEnvironment.PANTHEON),
+ ({'XDG_CURRENT_DESKTOP': 'UKUI'}, _LinuxDesktopEnvironment.UKUI),
+ ({'XDG_CURRENT_DESKTOP': 'Unity'}, _LinuxDesktopEnvironment.UNITY),
+ ({'XDG_CURRENT_DESKTOP': 'Unity:Unity7'}, _LinuxDesktopEnvironment.UNITY),
+ ({'XDG_CURRENT_DESKTOP': 'Unity:Unity8'}, _LinuxDesktopEnvironment.UNITY),
+ ]
+
+ for env, expected_desktop_environment in test_cases:
+ self.assertEqual(_get_linux_desktop_environment(env, Logger()), expected_desktop_environment)
+
+ def test_chrome_cookie_decryptor_linux_derive_key(self):
+ key = LinuxChromeCookieDecryptor.derive_key(b'abc')
+ self.assertEqual(key, b'7\xa1\xec\xd4m\xfcA\xc7\xb19Z\xd0\x19\xdcM\x17')
+
+ def test_chrome_cookie_decryptor_mac_derive_key(self):
+ key = MacChromeCookieDecryptor.derive_key(b'abc')
+ self.assertEqual(key, b'Y\xe2\xc0\xd0P\xf6\xf4\xe1l\xc1\x8cQ\xcb|\xcdY')
+
+ def test_chrome_cookie_decryptor_linux_v10(self):
+ with MonkeyPatch(cookies, {'_get_linux_keyring_password': lambda *args, **kwargs: b''}):
+ encrypted_value = b'v10\xccW%\xcd\xe6\xe6\x9fM" \xa7\xb0\xca\xe4\x07\xd6'
+ value = 'USD'
+ decryptor = LinuxChromeCookieDecryptor('Chrome', Logger())
+ self.assertEqual(decryptor.decrypt(encrypted_value), value)
+
+ def test_chrome_cookie_decryptor_linux_v11(self):
+ with MonkeyPatch(cookies, {'_get_linux_keyring_password': lambda *args, **kwargs: b''}):
+ encrypted_value = b'v11#\x81\x10>`w\x8f)\xc0\xb2\xc1\r\xf4\x1al\xdd\x93\xfd\xf8\xf8N\xf2\xa9\x83\xf1\xe9o\x0elVQd'
+ value = 'tz=Europe.London'
+ decryptor = LinuxChromeCookieDecryptor('Chrome', Logger())
+ self.assertEqual(decryptor.decrypt(encrypted_value), value)
+
+ def test_chrome_cookie_decryptor_windows_v10(self):
+ with MonkeyPatch(cookies, {
+ '_get_windows_v10_key': lambda *args, **kwargs: b'Y\xef\xad\xad\xeerp\xf0Y\xe6\x9b\x12\xc2<z\x16]\n\xbb\xb8\xcb\xd7\x9bA\xc3\x14e\x99{\xd6\xf4&'
+ }):
+ encrypted_value = b'v10T\xb8\xf3\xb8\x01\xa7TtcV\xfc\x88\xb8\xb8\xef\x05\xb5\xfd\x18\xc90\x009\xab\xb1\x893\x85)\x87\xe1\xa9-\xa3\xad='
+ value = '32101439'
+ decryptor = WindowsChromeCookieDecryptor('', Logger())
+ self.assertEqual(decryptor.decrypt(encrypted_value), value)
+
+ def test_chrome_cookie_decryptor_mac_v10(self):
+ with MonkeyPatch(cookies, {'_get_mac_keyring_password': lambda *args, **kwargs: b'6eIDUdtKAacvlHwBVwvg/Q=='}):
+ encrypted_value = b'v10\xb3\xbe\xad\xa1[\x9fC\xa1\x98\xe0\x9a\x01\xd9\xcf\xbfc'
+ value = '2021-06-01-22'
+ decryptor = MacChromeCookieDecryptor('', Logger())
+ self.assertEqual(decryptor.decrypt(encrypted_value), value)
+
+ def test_safari_cookie_parsing(self):
+ cookies = \
+ b'cook\x00\x00\x00\x01\x00\x00\x00i\x00\x00\x01\x00\x01\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00Y' \
+ b'\x00\x00\x00\x00\x00\x00\x00 \x00\x00\x00\x00\x00\x00\x008\x00\x00\x00B\x00\x00\x00F\x00\x00\x00H' \
+ b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x80\x03\xa5>\xc3A\x00\x00\x80\xc3\x07:\xc3A' \
+ b'localhost\x00foo\x00/\x00test%20%3Bcookie\x00\x00\x00\x054\x07\x17 \x05\x00\x00\x00Kbplist00\xd1\x01' \
+ b'\x02_\x10\x18NSHTTPCookieAcceptPolicy\x10\x02\x08\x0b&\x00\x00\x00\x00\x00\x00\x01\x01\x00\x00\x00' \
+ b'\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00('
+
+ jar = parse_safari_cookies(cookies)
+ self.assertEqual(len(jar), 1)
+ cookie = list(jar)[0]
+ self.assertEqual(cookie.domain, 'localhost')
+ self.assertEqual(cookie.port, None)
+ self.assertEqual(cookie.path, '/')
+ self.assertEqual(cookie.name, 'foo')
+ self.assertEqual(cookie.value, 'test%20%3Bcookie')
+ self.assertFalse(cookie.secure)
+ expected_expiration = datetime(2021, 6, 18, 21, 39, 19, tzinfo=timezone.utc)
+ self.assertEqual(cookie.expires, int(expected_expiration.timestamp()))
+
+ def test_pbkdf2_sha1(self):
+ key = pbkdf2_sha1(b'peanuts', b' ' * 16, 1, 16)
+ self.assertEqual(key, b'g\xe1\x8e\x0fQ\x1c\x9b\xf3\xc9`!\xaa\x90\xd9\xd34')
+
+
+class TestLenientSimpleCookie(unittest.TestCase):
+ def _run_tests(self, *cases):
+ for message, raw_cookie, expected in cases:
+ cookie = LenientSimpleCookie(raw_cookie)
+
+ with self.subTest(message, expected=expected):
+ self.assertEqual(cookie.keys(), expected.keys(), message)
+
+ for key, expected_value in expected.items():
+ morsel = cookie[key]
+ if isinstance(expected_value, tuple):
+ expected_value, expected_attributes = expected_value
+ else:
+ expected_attributes = {}
+
+ attributes = {
+ key: value
+ for key, value in dict(morsel).items()
+ if value != ""
+ }
+ self.assertEqual(attributes, expected_attributes, message)
+
+ self.assertEqual(morsel.value, expected_value, message)
+
+ def test_parsing(self):
+ self._run_tests(
+ # Copied from https://github.com/python/cpython/blob/v3.10.7/Lib/test/test_http_cookies.py
+ (
+ "Test basic cookie",
+ "chips=ahoy; vienna=finger",
+ {"chips": "ahoy", "vienna": "finger"},
+ ),
+ (
+ "Test quoted cookie",
+ 'keebler="E=mc2; L=\\"Loves\\"; fudge=\\012;"',
+ {"keebler": 'E=mc2; L="Loves"; fudge=\012;'},
+ ),
+ (
+ "Allow '=' in an unquoted value",
+ "keebler=E=mc2",
+ {"keebler": "E=mc2"},
+ ),
+ (
+ "Allow cookies with ':' in their name",
+ "key:term=value:term",
+ {"key:term": "value:term"},
+ ),
+ (
+ "Allow '[' and ']' in cookie values",
+ "a=b; c=[; d=r; f=h",
+ {"a": "b", "c": "[", "d": "r", "f": "h"},
+ ),
+ (
+ "Test basic cookie attributes",
+ 'Customer="WILE_E_COYOTE"; Version=1; Path=/acme',
+ {"Customer": ("WILE_E_COYOTE", {"version": "1", "path": "/acme"})},
+ ),
+ (
+ "Test flag only cookie attributes",
+ 'Customer="WILE_E_COYOTE"; HttpOnly; Secure',
+ {"Customer": ("WILE_E_COYOTE", {"httponly": True, "secure": True})},
+ ),
+ (
+ "Test flag only attribute with values",
+ "eggs=scrambled; httponly=foo; secure=bar; Path=/bacon",
+ {"eggs": ("scrambled", {"httponly": "foo", "secure": "bar", "path": "/bacon"})},
+ ),
+ (
+ "Test special case for 'expires' attribute, 4 digit year",
+ 'Customer="W"; expires=Wed, 01 Jan 2010 00:00:00 GMT',
+ {"Customer": ("W", {"expires": "Wed, 01 Jan 2010 00:00:00 GMT"})},
+ ),
+ (
+ "Test special case for 'expires' attribute, 2 digit year",
+ 'Customer="W"; expires=Wed, 01 Jan 98 00:00:00 GMT',
+ {"Customer": ("W", {"expires": "Wed, 01 Jan 98 00:00:00 GMT"})},
+ ),
+ (
+ "Test extra spaces in keys and values",
+ "eggs = scrambled ; secure ; path = bar ; foo=foo ",
+ {"eggs": ("scrambled", {"secure": True, "path": "bar"}), "foo": "foo"},
+ ),
+ (
+ "Test quoted attributes",
+ 'Customer="WILE_E_COYOTE"; Version="1"; Path="/acme"',
+ {"Customer": ("WILE_E_COYOTE", {"version": "1", "path": "/acme"})}
+ ),
+ # Our own tests that CPython passes
+ (
+ "Allow ';' in quoted value",
+ 'chips="a;hoy"; vienna=finger',
+ {"chips": "a;hoy", "vienna": "finger"},
+ ),
+ (
+ "Keep only the last set value",
+ "a=c; a=b",
+ {"a": "b"},
+ ),
+ )
+
+ def test_lenient_parsing(self):
+ self._run_tests(
+ (
+ "Ignore and try to skip invalid cookies",
+ 'chips={"ahoy;": 1}; vienna="finger;"',
+ {"vienna": "finger;"},
+ ),
+ (
+ "Ignore cookies without a name",
+ "a=b; unnamed; c=d",
+ {"a": "b", "c": "d"},
+ ),
+ (
+ "Ignore '\"' cookie without name",
+ 'a=b; "; c=d',
+ {"a": "b", "c": "d"},
+ ),
+ (
+ "Skip all space separated values",
+ "x a=b c=d x; e=f",
+ {"a": "b", "c": "d", "e": "f"},
+ ),
+ (
+ "Skip all space separated values",
+ 'x a=b; data={"complex": "json", "with": "key=value"}; x c=d x',
+ {"a": "b", "c": "d"},
+ ),
+ (
+ "Expect quote mending",
+ 'a=b; invalid="; c=d',
+ {"a": "b", "c": "d"},
+ ),
+ (
+ "Reset morsel after invalid to not capture attributes",
+ "a=b; invalid; Version=1; c=d",
+ {"a": "b", "c": "d"},
+ ),
+ (
+ "Reset morsel after invalid to not capture attributes",
+ "a=b; $invalid; $Version=1; c=d",
+ {"a": "b", "c": "d"},
+ ),
+ (
+ "Continue after non-flag attribute without value",
+ "a=b; path; Version=1; c=d",
+ {"a": "b", "c": "d"},
+ ),
+ (
+ "Allow cookie attributes with `$` prefix",
+ 'Customer="WILE_E_COYOTE"; $Version=1; $Secure; $Path=/acme',
+ {"Customer": ("WILE_E_COYOTE", {"version": "1", "secure": True, "path": "/acme"})},
+ ),
+ (
+ "Invalid Morsel keys should not result in an error",
+ "Key=Value; [Invalid]=Value; Another=Value",
+ {"Key": "Value", "Another": "Value"},
+ ),
+ )
diff --git a/test/test_download.py b/test/test_download.py
new file mode 100755
index 0000000..2530792
--- /dev/null
+++ b/test/test_download.py
@@ -0,0 +1,314 @@
+#!/usr/bin/env python3
+
+# Allow direct execution
+import os
+import sys
+import unittest
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+import collections
+import hashlib
+import json
+
+from test.helper import (
+ assertGreaterEqual,
+ expect_info_dict,
+ expect_warnings,
+ get_params,
+ gettestcases,
+ getwebpagetestcases,
+ is_download_test,
+ report_warning,
+ try_rm,
+)
+
+import yt_dlp.YoutubeDL # isort: split
+from yt_dlp.extractor import get_info_extractor
+from yt_dlp.networking.exceptions import HTTPError, TransportError
+from yt_dlp.utils import (
+ DownloadError,
+ ExtractorError,
+ UnavailableVideoError,
+ YoutubeDLError,
+ format_bytes,
+ join_nonempty,
+)
+
+RETRIES = 3
+
+
+class YoutubeDL(yt_dlp.YoutubeDL):
+ def __init__(self, *args, **kwargs):
+ self.to_stderr = self.to_screen
+ self.processed_info_dicts = []
+ super().__init__(*args, **kwargs)
+
+ def report_warning(self, message, *args, **kwargs):
+ # Don't accept warnings during tests
+ raise ExtractorError(message)
+
+ def process_info(self, info_dict):
+ self.processed_info_dicts.append(info_dict.copy())
+ return super().process_info(info_dict)
+
+
+def _file_md5(fn):
+ with open(fn, 'rb') as f:
+ return hashlib.md5(f.read()).hexdigest()
+
+
+normal_test_cases = gettestcases()
+webpage_test_cases = getwebpagetestcases()
+tests_counter = collections.defaultdict(collections.Counter)
+
+
+@is_download_test
+class TestDownload(unittest.TestCase):
+ # Parallel testing in nosetests. See
+ # http://nose.readthedocs.org/en/latest/doc_tests/test_multiprocess/multiprocess.html
+ _multiprocess_shared_ = True
+
+ maxDiff = None
+
+ COMPLETED_TESTS = {}
+
+ def __str__(self):
+ """Identify each test with the `add_ie` attribute, if available."""
+ cls, add_ie = type(self), getattr(self, self._testMethodName).add_ie
+ return f'{self._testMethodName} ({cls.__module__}.{cls.__name__}){f" [{add_ie}]" if add_ie else ""}:'
+
+
+# Dynamically generate tests
+
+def generator(test_case, tname):
+ def test_template(self):
+ if self.COMPLETED_TESTS.get(tname):
+ return
+ self.COMPLETED_TESTS[tname] = True
+ ie = yt_dlp.extractor.get_info_extractor(test_case['name'])()
+ other_ies = [get_info_extractor(ie_key)() for ie_key in test_case.get('add_ie', [])]
+ is_playlist = any(k.startswith('playlist') for k in test_case)
+ test_cases = test_case.get(
+ 'playlist', [] if is_playlist else [test_case])
+
+ def print_skipping(reason):
+ print('Skipping %s: %s' % (test_case['name'], reason))
+ self.skipTest(reason)
+
+ if not ie.working():
+ print_skipping('IE marked as not _WORKING')
+
+ for tc in test_cases:
+ if tc.get('expected_exception'):
+ continue
+ info_dict = tc.get('info_dict', {})
+ params = tc.get('params', {})
+ if not info_dict.get('id'):
+ raise Exception(f'Test {tname} definition incorrect - "id" key is not present')
+ elif not info_dict.get('ext') and info_dict.get('_type', 'video') == 'video':
+ if params.get('skip_download') and params.get('ignore_no_formats_error'):
+ continue
+ raise Exception(f'Test {tname} definition incorrect - "ext" key must be present to define the output file')
+
+ if 'skip' in test_case:
+ print_skipping(test_case['skip'])
+
+ for other_ie in other_ies:
+ if not other_ie.working():
+ print_skipping('test depends on %sIE, marked as not WORKING' % other_ie.ie_key())
+
+ params = get_params(test_case.get('params', {}))
+ params['outtmpl'] = tname + '_' + params['outtmpl']
+ if is_playlist and 'playlist' not in test_case:
+ params.setdefault('extract_flat', 'in_playlist')
+ params.setdefault('playlistend', test_case.get(
+ 'playlist_mincount', test_case.get('playlist_count', -2) + 1))
+ params.setdefault('skip_download', True)
+
+ ydl = YoutubeDL(params, auto_init=False)
+ ydl.add_default_info_extractors()
+ finished_hook_called = set()
+
+ def _hook(status):
+ if status['status'] == 'finished':
+ finished_hook_called.add(status['filename'])
+ ydl.add_progress_hook(_hook)
+ expect_warnings(ydl, test_case.get('expected_warnings', []))
+
+ def get_tc_filename(tc):
+ return ydl.prepare_filename(dict(tc.get('info_dict', {})))
+
+ res_dict = None
+
+ def match_exception(err):
+ expected_exception = test_case.get('expected_exception')
+ if not expected_exception:
+ return False
+ if err.__class__.__name__ == expected_exception:
+ return True
+ for exc in err.exc_info:
+ if exc.__class__.__name__ == expected_exception:
+ return True
+ return False
+
+ def try_rm_tcs_files(tcs=None):
+ if tcs is None:
+ tcs = test_cases
+ for tc in tcs:
+ tc_filename = get_tc_filename(tc)
+ try_rm(tc_filename)
+ try_rm(tc_filename + '.part')
+ try_rm(os.path.splitext(tc_filename)[0] + '.info.json')
+ try_rm_tcs_files()
+ try:
+ try_num = 1
+ while True:
+ try:
+ # We're not using .download here since that is just a shim
+ # for outside error handling, and returns the exit code
+ # instead of the result dict.
+ res_dict = ydl.extract_info(
+ test_case['url'],
+ force_generic_extractor=params.get('force_generic_extractor', False))
+ except (DownloadError, ExtractorError) as err:
+ # Check if the exception is not a network related one
+ if not isinstance(err.exc_info[1], (TransportError, UnavailableVideoError)) or (isinstance(err.exc_info[1], HTTPError) and err.exc_info[1].status == 503):
+ if match_exception(err):
+ return
+ err.msg = f'{getattr(err, "msg", err)} ({tname})'
+ raise
+
+ if try_num == RETRIES:
+ report_warning('%s failed due to network errors, skipping...' % tname)
+ return
+
+ print(f'Retrying: {try_num} failed tries\n\n##########\n\n')
+
+ try_num += 1
+ except YoutubeDLError as err:
+ if match_exception(err):
+ return
+ raise
+ else:
+ break
+
+ if is_playlist:
+ self.assertTrue(res_dict['_type'] in ['playlist', 'multi_video'])
+ self.assertTrue('entries' in res_dict)
+ expect_info_dict(self, res_dict, test_case.get('info_dict', {}))
+
+ if 'playlist_mincount' in test_case:
+ assertGreaterEqual(
+ self,
+ len(res_dict['entries']),
+ test_case['playlist_mincount'],
+ 'Expected at least %d in playlist %s, but got only %d' % (
+ test_case['playlist_mincount'], test_case['url'],
+ len(res_dict['entries'])))
+ if 'playlist_count' in test_case:
+ self.assertEqual(
+ len(res_dict['entries']),
+ test_case['playlist_count'],
+ 'Expected %d entries in playlist %s, but got %d.' % (
+ test_case['playlist_count'],
+ test_case['url'],
+ len(res_dict['entries']),
+ ))
+ if 'playlist_duration_sum' in test_case:
+ got_duration = sum(e['duration'] for e in res_dict['entries'])
+ self.assertEqual(
+ test_case['playlist_duration_sum'], got_duration)
+
+ # Generalize both playlists and single videos to unified format for
+ # simplicity
+ if 'entries' not in res_dict:
+ res_dict['entries'] = [res_dict]
+
+ for tc_num, tc in enumerate(test_cases):
+ tc_res_dict = res_dict['entries'][tc_num]
+ # First, check test cases' data against extracted data alone
+ expect_info_dict(self, tc_res_dict, tc.get('info_dict', {}))
+ if tc_res_dict.get('_type', 'video') != 'video':
+ continue
+ # Now, check downloaded file consistency
+ tc_filename = get_tc_filename(tc)
+ if not test_case.get('params', {}).get('skip_download', False):
+ self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename)
+ self.assertTrue(tc_filename in finished_hook_called)
+ expected_minsize = tc.get('file_minsize', 10000)
+ if expected_minsize is not None:
+ if params.get('test'):
+ expected_minsize = max(expected_minsize, 10000)
+ got_fsize = os.path.getsize(tc_filename)
+ assertGreaterEqual(
+ self, got_fsize, expected_minsize,
+ 'Expected %s to be at least %s, but it\'s only %s ' %
+ (tc_filename, format_bytes(expected_minsize),
+ format_bytes(got_fsize)))
+ if 'md5' in tc:
+ md5_for_file = _file_md5(tc_filename)
+ self.assertEqual(tc['md5'], md5_for_file)
+ # Finally, check test cases' data again but this time against
+ # extracted data from info JSON file written during processing
+ info_json_fn = os.path.splitext(tc_filename)[0] + '.info.json'
+ self.assertTrue(
+ os.path.exists(info_json_fn),
+ 'Missing info file %s' % info_json_fn)
+ with open(info_json_fn, encoding='utf-8') as infof:
+ info_dict = json.load(infof)
+ expect_info_dict(self, info_dict, tc.get('info_dict', {}))
+ finally:
+ try_rm_tcs_files()
+ if is_playlist and res_dict is not None and res_dict.get('entries'):
+ # Remove all other files that may have been extracted if the
+ # extractor returns full results even with extract_flat
+ res_tcs = [{'info_dict': e} for e in res_dict['entries']]
+ try_rm_tcs_files(res_tcs)
+ ydl.close()
+ return test_template
+
+
+# And add them to TestDownload
+def inject_tests(test_cases, label=''):
+ for test_case in test_cases:
+ name = test_case['name']
+ tname = join_nonempty('test', name, label, tests_counter[name][label], delim='_')
+ tests_counter[name][label] += 1
+
+ test_method = generator(test_case, tname)
+ test_method.__name__ = tname
+ test_method.add_ie = ','.join(test_case.get('add_ie', []))
+ setattr(TestDownload, test_method.__name__, test_method)
+
+
+inject_tests(normal_test_cases)
+
+# TODO: disable redirection to the IE to ensure we are actually testing the webpage extraction
+inject_tests(webpage_test_cases, 'webpage')
+
+
+def batch_generator(name):
+ def test_template(self):
+ for label, num_tests in tests_counter[name].items():
+ for i in range(num_tests):
+ test_name = join_nonempty('test', name, label, i, delim='_')
+ try:
+ getattr(self, test_name)()
+ except unittest.SkipTest:
+ print(f'Skipped {test_name}')
+
+ return test_template
+
+
+for name in tests_counter:
+ test_method = batch_generator(name)
+ test_method.__name__ = f'test_{name}_all'
+ test_method.add_ie = ''
+ setattr(TestDownload, test_method.__name__, test_method)
+del test_method
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_downloader_external.py b/test/test_downloader_external.py
new file mode 100644
index 0000000..62f7d45
--- /dev/null
+++ b/test/test_downloader_external.py
@@ -0,0 +1,139 @@
+#!/usr/bin/env python3
+
+# Allow direct execution
+import os
+import sys
+import unittest
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import http.cookiejar
+
+from test.helper import FakeYDL
+from yt_dlp.downloader.external import (
+ Aria2cFD,
+ AxelFD,
+ CurlFD,
+ FFmpegFD,
+ HttpieFD,
+ WgetFD,
+)
+
+TEST_COOKIE = {
+ 'version': 0,
+ 'name': 'test',
+ 'value': 'ytdlp',
+ 'port': None,
+ 'port_specified': False,
+ 'domain': '.example.com',
+ 'domain_specified': True,
+ 'domain_initial_dot': False,
+ 'path': '/',
+ 'path_specified': True,
+ 'secure': False,
+ 'expires': None,
+ 'discard': False,
+ 'comment': None,
+ 'comment_url': None,
+ 'rest': {},
+}
+
+TEST_INFO = {'url': 'http://www.example.com/'}
+
+
+class TestHttpieFD(unittest.TestCase):
+ def test_make_cmd(self):
+ with FakeYDL() as ydl:
+ downloader = HttpieFD(ydl, {})
+ self.assertEqual(
+ downloader._make_cmd('test', TEST_INFO),
+ ['http', '--download', '--output', 'test', 'http://www.example.com/'])
+
+ # Test cookie header is added
+ ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE))
+ self.assertEqual(
+ downloader._make_cmd('test', TEST_INFO),
+ ['http', '--download', '--output', 'test', 'http://www.example.com/', 'Cookie:test=ytdlp'])
+
+
+class TestAxelFD(unittest.TestCase):
+ def test_make_cmd(self):
+ with FakeYDL() as ydl:
+ downloader = AxelFD(ydl, {})
+ self.assertEqual(
+ downloader._make_cmd('test', TEST_INFO),
+ ['axel', '-o', 'test', '--', 'http://www.example.com/'])
+
+ # Test cookie header is added
+ ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE))
+ self.assertEqual(
+ downloader._make_cmd('test', TEST_INFO),
+ ['axel', '-o', 'test', '-H', 'Cookie: test=ytdlp', '--max-redirect=0', '--', 'http://www.example.com/'])
+
+
+class TestWgetFD(unittest.TestCase):
+ def test_make_cmd(self):
+ with FakeYDL() as ydl:
+ downloader = WgetFD(ydl, {})
+ self.assertNotIn('--load-cookies', downloader._make_cmd('test', TEST_INFO))
+ # Test cookiejar tempfile arg is added
+ ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE))
+ self.assertIn('--load-cookies', downloader._make_cmd('test', TEST_INFO))
+
+
+class TestCurlFD(unittest.TestCase):
+ def test_make_cmd(self):
+ with FakeYDL() as ydl:
+ downloader = CurlFD(ydl, {})
+ self.assertNotIn('--cookie', downloader._make_cmd('test', TEST_INFO))
+ # Test cookie header is added
+ ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE))
+ self.assertIn('--cookie', downloader._make_cmd('test', TEST_INFO))
+ self.assertIn('test=ytdlp', downloader._make_cmd('test', TEST_INFO))
+
+
+class TestAria2cFD(unittest.TestCase):
+ def test_make_cmd(self):
+ with FakeYDL() as ydl:
+ downloader = Aria2cFD(ydl, {})
+ downloader._make_cmd('test', TEST_INFO)
+ self.assertFalse(hasattr(downloader, '_cookies_tempfile'))
+
+ # Test cookiejar tempfile arg is added
+ ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE))
+ cmd = downloader._make_cmd('test', TEST_INFO)
+ self.assertIn(f'--load-cookies={downloader._cookies_tempfile}', cmd)
+
+
+@unittest.skipUnless(FFmpegFD.available(), 'ffmpeg not found')
+class TestFFmpegFD(unittest.TestCase):
+ _args = []
+
+ def _test_cmd(self, args):
+ self._args = args
+
+ def test_make_cmd(self):
+ with FakeYDL() as ydl:
+ downloader = FFmpegFD(ydl, {})
+ downloader._debug_cmd = self._test_cmd
+
+ downloader._call_downloader('test', {**TEST_INFO, 'ext': 'mp4'})
+ self.assertEqual(self._args, [
+ 'ffmpeg', '-y', '-hide_banner', '-i', 'http://www.example.com/',
+ '-c', 'copy', '-f', 'mp4', 'file:test'])
+
+ # Test cookies arg is added
+ ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE))
+ downloader._call_downloader('test', {**TEST_INFO, 'ext': 'mp4'})
+ self.assertEqual(self._args, [
+ 'ffmpeg', '-y', '-hide_banner', '-cookies', 'test=ytdlp; path=/; domain=.example.com;\r\n',
+ '-i', 'http://www.example.com/', '-c', 'copy', '-f', 'mp4', 'file:test'])
+
+ # Test with non-url input (ffmpeg reads from stdin '-' for websockets)
+ downloader._call_downloader('test', {'url': 'x', 'ext': 'mp4'})
+ self.assertEqual(self._args, [
+ 'ffmpeg', '-y', '-hide_banner', '-i', 'x', '-c', 'copy', '-f', 'mp4', 'file:test'])
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_downloader_http.py b/test/test_downloader_http.py
new file mode 100644
index 0000000..099ec2f
--- /dev/null
+++ b/test/test_downloader_http.py
@@ -0,0 +1,106 @@
+#!/usr/bin/env python3
+
+# Allow direct execution
+import os
+import sys
+import unittest
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+import http.server
+import re
+import threading
+
+from test.helper import http_server_port, try_rm
+from yt_dlp import YoutubeDL
+from yt_dlp.downloader.http import HttpFD
+from yt_dlp.utils import encodeFilename
+from yt_dlp.utils._utils import _YDLLogger as FakeLogger
+
+TEST_DIR = os.path.dirname(os.path.abspath(__file__))
+
+
+TEST_SIZE = 10 * 1024
+
+
+class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
+ def log_message(self, format, *args):
+ pass
+
+ def send_content_range(self, total=None):
+ range_header = self.headers.get('Range')
+ start = end = None
+ if range_header:
+ mobj = re.search(r'^bytes=(\d+)-(\d+)', range_header)
+ if mobj:
+ start = int(mobj.group(1))
+ end = int(mobj.group(2))
+ valid_range = start is not None and end is not None
+ if valid_range:
+ content_range = 'bytes %d-%d' % (start, end)
+ if total:
+ content_range += '/%d' % total
+ self.send_header('Content-Range', content_range)
+ return (end - start + 1) if valid_range else total
+
+ def serve(self, range=True, content_length=True):
+ self.send_response(200)
+ self.send_header('Content-Type', 'video/mp4')
+ size = TEST_SIZE
+ if range:
+ size = self.send_content_range(TEST_SIZE)
+ if content_length:
+ self.send_header('Content-Length', size)
+ self.end_headers()
+ self.wfile.write(b'#' * size)
+
+ def do_GET(self):
+ if self.path == '/regular':
+ self.serve()
+ elif self.path == '/no-content-length':
+ self.serve(content_length=False)
+ elif self.path == '/no-range':
+ self.serve(range=False)
+ elif self.path == '/no-range-no-content-length':
+ self.serve(range=False, content_length=False)
+ else:
+ assert False
+
+
+class TestHttpFD(unittest.TestCase):
+ def setUp(self):
+ self.httpd = http.server.HTTPServer(
+ ('127.0.0.1', 0), HTTPTestRequestHandler)
+ self.port = http_server_port(self.httpd)
+ self.server_thread = threading.Thread(target=self.httpd.serve_forever)
+ self.server_thread.daemon = True
+ self.server_thread.start()
+
+ def download(self, params, ep):
+ params['logger'] = FakeLogger()
+ ydl = YoutubeDL(params)
+ downloader = HttpFD(ydl, params)
+ filename = 'testfile.mp4'
+ try_rm(encodeFilename(filename))
+ self.assertTrue(downloader.real_download(filename, {
+ 'url': 'http://127.0.0.1:%d/%s' % (self.port, ep),
+ }), ep)
+ self.assertEqual(os.path.getsize(encodeFilename(filename)), TEST_SIZE, ep)
+ try_rm(encodeFilename(filename))
+
+ def download_all(self, params):
+ for ep in ('regular', 'no-content-length', 'no-range', 'no-range-no-content-length'):
+ self.download(params, ep)
+
+ def test_regular(self):
+ self.download_all({})
+
+ def test_chunked(self):
+ self.download_all({
+ 'http_chunk_size': 1000,
+ })
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_execution.py b/test/test_execution.py
new file mode 100644
index 0000000..c6ee9cf
--- /dev/null
+++ b/test/test_execution.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python3
+
+# Allow direct execution
+import os
+import sys
+import unittest
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+import contextlib
+import subprocess
+
+from yt_dlp.utils import Popen
+
+rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+LAZY_EXTRACTORS = 'yt_dlp/extractor/lazy_extractors.py'
+
+
+class TestExecution(unittest.TestCase):
+ def run_yt_dlp(self, exe=(sys.executable, 'yt_dlp/__main__.py'), opts=('--version', )):
+ stdout, stderr, returncode = Popen.run(
+ [*exe, '--ignore-config', *opts], cwd=rootDir, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ print(stderr, file=sys.stderr)
+ self.assertEqual(returncode, 0)
+ return stdout.strip(), stderr.strip()
+
+ def test_main_exec(self):
+ self.run_yt_dlp()
+
+ def test_import(self):
+ self.run_yt_dlp(exe=(sys.executable, '-c', 'import yt_dlp'))
+
+ def test_module_exec(self):
+ self.run_yt_dlp(exe=(sys.executable, '-m', 'yt_dlp'))
+
+ def test_cmdline_umlauts(self):
+ _, stderr = self.run_yt_dlp(opts=('ä', '--version'))
+ self.assertFalse(stderr)
+
+ def test_lazy_extractors(self):
+ try:
+ subprocess.check_call([sys.executable, 'devscripts/make_lazy_extractors.py', LAZY_EXTRACTORS],
+ cwd=rootDir, stdout=subprocess.DEVNULL)
+ self.assertTrue(os.path.exists(LAZY_EXTRACTORS))
+
+ _, stderr = self.run_yt_dlp(opts=('-s', 'test:'))
+ # `MIN_RECOMMENDED` emits a deprecated feature warning for deprecated Python versions
+ if stderr and stderr.startswith('Deprecated Feature: Support for Python'):
+ stderr = ''
+ self.assertFalse(stderr)
+
+ subprocess.check_call([sys.executable, 'test/test_all_urls.py'], cwd=rootDir, stdout=subprocess.DEVNULL)
+ finally:
+ with contextlib.suppress(OSError):
+ os.remove(LAZY_EXTRACTORS)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_iqiyi_sdk_interpreter.py b/test/test_iqiyi_sdk_interpreter.py
new file mode 100644
index 0000000..47c632a
--- /dev/null
+++ b/test/test_iqiyi_sdk_interpreter.py
@@ -0,0 +1,44 @@
+#!/usr/bin/env python3
+
+# Allow direct execution
+import os
+import sys
+import unittest
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+from test.helper import FakeYDL, is_download_test
+from yt_dlp.extractor import IqiyiIE
+
+
+class WarningLogger:
+ def __init__(self):
+ self.messages = []
+
+ def warning(self, msg):
+ self.messages.append(msg)
+
+ def debug(self, msg):
+ pass
+
+ def error(self, msg):
+ pass
+
+
+@is_download_test
+class TestIqiyiSDKInterpreter(unittest.TestCase):
+ def test_iqiyi_sdk_interpreter(self):
+ '''
+ Test the functionality of IqiyiSDKInterpreter by trying to log in
+
+ If `sign` is incorrect, /validate call throws an HTTP 556 error
+ '''
+ logger = WarningLogger()
+ ie = IqiyiIE(FakeYDL({'logger': logger}))
+ ie._perform_login('foo', 'bar')
+ self.assertTrue('unable to log in:' in logger.messages[0])
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
new file mode 100644
index 0000000..86928a6
--- /dev/null
+++ b/test/test_jsinterp.py
@@ -0,0 +1,380 @@
+#!/usr/bin/env python3
+
+# Allow direct execution
+import os
+import sys
+import unittest
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import math
+
+from yt_dlp.jsinterp import JS_Undefined, JSInterpreter
+
+
+class NaN:
+ pass
+
+
+class TestJSInterpreter(unittest.TestCase):
+ def _test(self, jsi_or_code, expected, func='f', args=()):
+ if isinstance(jsi_or_code, str):
+ jsi_or_code = JSInterpreter(jsi_or_code)
+ got = jsi_or_code.call_function(func, *args)
+ if expected is NaN:
+ self.assertTrue(math.isnan(got), f'{got} is not NaN')
+ else:
+ self.assertEqual(got, expected)
+
+ def test_basic(self):
+ jsi = JSInterpreter('function f(){;}')
+ self.assertEqual(repr(jsi.extract_function('f')), 'F<f>')
+ self._test(jsi, None)
+
+ self._test('function f(){return 42;}', 42)
+ self._test('function f(){42}', None)
+ self._test('var f = function(){return 42;}', 42)
+
+ def test_add(self):
+ self._test('function f(){return 42 + 7;}', 49)
+ self._test('function f(){return 42 + undefined;}', NaN)
+ self._test('function f(){return 42 + null;}', 42)
+
+ def test_sub(self):
+ self._test('function f(){return 42 - 7;}', 35)
+ self._test('function f(){return 42 - undefined;}', NaN)
+ self._test('function f(){return 42 - null;}', 42)
+
+ def test_mul(self):
+ self._test('function f(){return 42 * 7;}', 294)
+ self._test('function f(){return 42 * undefined;}', NaN)
+ self._test('function f(){return 42 * null;}', 0)
+
+ def test_div(self):
+ jsi = JSInterpreter('function f(a, b){return a / b;}')
+ self._test(jsi, NaN, args=(0, 0))
+ self._test(jsi, NaN, args=(JS_Undefined, 1))
+ self._test(jsi, float('inf'), args=(2, 0))
+ self._test(jsi, 0, args=(0, 3))
+
+ def test_mod(self):
+ self._test('function f(){return 42 % 7;}', 0)
+ self._test('function f(){return 42 % 0;}', NaN)
+ self._test('function f(){return 42 % undefined;}', NaN)
+
+ def test_exp(self):
+ self._test('function f(){return 42 ** 2;}', 1764)
+ self._test('function f(){return 42 ** undefined;}', NaN)
+ self._test('function f(){return 42 ** null;}', 1)
+ self._test('function f(){return undefined ** 42;}', NaN)
+
+ def test_calc(self):
+ self._test('function f(a){return 2*a+1;}', 7, args=[3])
+
+ def test_empty_return(self):
+ self._test('function f(){return; y()}', None)
+
+ def test_morespace(self):
+ self._test('function f (a) { return 2 * a + 1 ; }', 7, args=[3])
+ self._test('function f () { x = 2 ; return x; }', 2)
+
+ def test_strange_chars(self):
+ self._test('function $_xY1 ($_axY1) { var $_axY2 = $_axY1 + 1; return $_axY2; }',
+ 21, args=[20], func='$_xY1')
+
+ def test_operators(self):
+ self._test('function f(){return 1 << 5;}', 32)
+ self._test('function f(){return 2 ** 5}', 32)
+ self._test('function f(){return 19 & 21;}', 17)
+ self._test('function f(){return 11 >> 2;}', 2)
+ self._test('function f(){return []? 2+3: 4;}', 5)
+ self._test('function f(){return 1 == 2}', False)
+ self._test('function f(){return 0 && 1 || 2;}', 2)
+ self._test('function f(){return 0 ?? 42;}', 0)
+ self._test('function f(){return "life, the universe and everything" < 42;}', False)
+
+ def test_array_access(self):
+ self._test('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2.0] = 7; return x;}', [5, 2, 7])
+
+ def test_parens(self):
+ self._test('function f(){return (1) + (2) * ((( (( (((((3)))))) )) ));}', 7)
+ self._test('function f(){return (1 + 2) * 3;}', 9)
+
+ def test_quotes(self):
+ self._test(R'function f(){return "a\"\\("}', R'a"\(')
+
+ def test_assignments(self):
+ self._test('function f(){var x = 20; x = 30 + 1; return x;}', 31)
+ self._test('function f(){var x = 20; x += 30 + 1; return x;}', 51)
+ self._test('function f(){var x = 20; x -= 30 + 1; return x;}', -11)
+
+ @unittest.skip('Not implemented')
+ def test_comments(self):
+ self._test('''
+ function f() {
+ var x = /* 1 + */ 2;
+ var y = /* 30
+ * 40 */ 50;
+ return x + y;
+ }
+ ''', 52)
+
+ self._test('''
+ function f() {
+ var x = "/*";
+ var y = 1 /* comment */ + 2;
+ return y;
+ }
+ ''', 3)
+
+ def test_precedence(self):
+ self._test('''
+ function f() {
+ var a = [10, 20, 30, 40, 50];
+ var b = 6;
+ a[0]=a[b%a.length];
+ return a;
+ }
+ ''', [20, 20, 30, 40, 50])
+
+ def test_builtins(self):
+ self._test('function f() { return NaN }', NaN)
+
+ def test_date(self):
+ self._test('function f() { return new Date("Wednesday 31 December 1969 18:01:26 MDT") - 0; }', 86000)
+
+ jsi = JSInterpreter('function f(dt) { return new Date(dt) - 0; }')
+ self._test(jsi, 86000, args=['Wednesday 31 December 1969 18:01:26 MDT'])
+ self._test(jsi, 86000, args=['12/31/1969 18:01:26 MDT']) # m/d/y
+ self._test(jsi, 0, args=['1 January 1970 00:00:00 UTC'])
+
+ def test_call(self):
+ jsi = JSInterpreter('''
+ function x() { return 2; }
+ function y(a) { return x() + (a?a:0); }
+ function z() { return y(3); }
+ ''')
+ self._test(jsi, 5, func='z')
+ self._test(jsi, 2, func='y')
+
+ def test_if(self):
+ self._test('''
+ function f() {
+ let a = 9;
+ if (0==0) {a++}
+ return a
+ }
+ ''', 10)
+
+ self._test('''
+ function f() {
+ if (0==0) {return 10}
+ }
+ ''', 10)
+
+ self._test('''
+ function f() {
+ if (0!=0) {return 1}
+ else {return 10}
+ }
+ ''', 10)
+
+ """ # Unsupported
+ self._test('''
+ function f() {
+ if (0!=0) {return 1}
+ else if (1==0) {return 2}
+ else {return 10}
+ }
+ ''', 10)
+ """
+
+ def test_for_loop(self):
+ self._test('function f() { a=0; for (i=0; i-10; i++) {a++} return a }', 10)
+
+ def test_switch(self):
+ jsi = JSInterpreter('''
+ function f(x) { switch(x){
+ case 1:x+=1;
+ case 2:x+=2;
+ case 3:x+=3;break;
+ case 4:x+=4;
+ default:x=0;
+ } return x }
+ ''')
+ self._test(jsi, 7, args=[1])
+ self._test(jsi, 6, args=[3])
+ self._test(jsi, 0, args=[5])
+
+ def test_switch_default(self):
+ jsi = JSInterpreter('''
+ function f(x) { switch(x){
+ case 2: x+=2;
+ default: x-=1;
+ case 5:
+ case 6: x+=6;
+ case 0: break;
+ case 1: x+=1;
+ } return x }
+ ''')
+ self._test(jsi, 2, args=[1])
+ self._test(jsi, 11, args=[5])
+ self._test(jsi, 14, args=[9])
+
+ def test_try(self):
+ self._test('function f() { try{return 10} catch(e){return 5} }', 10)
+
+ def test_catch(self):
+ self._test('function f() { try{throw 10} catch(e){return 5} }', 5)
+
+ def test_finally(self):
+ self._test('function f() { try{throw 10} finally {return 42} }', 42)
+ self._test('function f() { try{throw 10} catch(e){return 5} finally {return 42} }', 42)
+
+ def test_nested_try(self):
+ self._test('''
+ function f() {try {
+ try{throw 10} finally {throw 42}
+ } catch(e){return 5} }
+ ''', 5)
+
+ def test_for_loop_continue(self):
+ self._test('function f() { a=0; for (i=0; i-10; i++) { continue; a++ } return a }', 0)
+
+ def test_for_loop_break(self):
+ self._test('function f() { a=0; for (i=0; i-10; i++) { break; a++ } return a }', 0)
+
+ def test_for_loop_try(self):
+ self._test('''
+ function f() {
+ for (i=0; i-10; i++) { try { if (i == 5) throw i} catch {return 10} finally {break} };
+ return 42 }
+ ''', 42)
+
+ def test_literal_list(self):
+ self._test('function f() { return [1, 2, "asdf", [5, 6, 7]][3] }', [5, 6, 7])
+
+ def test_comma(self):
+ self._test('function f() { a=5; a -= 1, a+=3; return a }', 7)
+ self._test('function f() { a=5; return (a -= 1, a+=3, a); }', 7)
+ self._test('function f() { return (l=[0,1,2,3], function(a, b){return a+b})((l[1], l[2]), l[3]) }', 5)
+
+ def test_void(self):
+ self._test('function f() { return void 42; }', None)
+
+ def test_return_function(self):
+ jsi = JSInterpreter('''
+ function f() { return [1, function(){return 1}][1] }
+ ''')
+ self.assertEqual(jsi.call_function('f')([]), 1)
+
+ def test_null(self):
+ self._test('function f() { return null; }', None)
+ self._test('function f() { return [null > 0, null < 0, null == 0, null === 0]; }',
+ [False, False, False, False])
+ self._test('function f() { return [null >= 0, null <= 0]; }', [True, True])
+
+ def test_undefined(self):
+ self._test('function f() { return undefined === undefined; }', True)
+ self._test('function f() { return undefined; }', JS_Undefined)
+ self._test('function f() {return undefined ?? 42; }', 42)
+ self._test('function f() { let v; return v; }', JS_Undefined)
+ self._test('function f() { let v; return v**0; }', 1)
+ self._test('function f() { let v; return [v>42, v<=42, v&&42, 42&&v]; }',
+ [False, False, JS_Undefined, JS_Undefined])
+
+ self._test('''
+ function f() { return [
+ undefined === undefined,
+ undefined == undefined,
+ undefined == null,
+ undefined < undefined,
+ undefined > undefined,
+ undefined === 0,
+ undefined == 0,
+ undefined < 0,
+ undefined > 0,
+ undefined >= 0,
+ undefined <= 0,
+ undefined > null,
+ undefined < null,
+ undefined === null
+ ]; }
+ ''', list(map(bool, (1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0))))
+
+ jsi = JSInterpreter('''
+ function f() { let v; return [42+v, v+42, v**42, 42**v, 0**v]; }
+ ''')
+ for y in jsi.call_function('f'):
+ self.assertTrue(math.isnan(y))
+
+ def test_object(self):
+ self._test('function f() { return {}; }', {})
+ self._test('function f() { let a = {m1: 42, m2: 0 }; return [a["m1"], a.m2]; }', [42, 0])
+ self._test('function f() { let a; return a?.qq; }', JS_Undefined)
+ self._test('function f() { let a = {m1: 42, m2: 0 }; return a?.qq; }', JS_Undefined)
+
+ def test_regex(self):
+ self._test('function f() { let a=/,,[/,913,/](,)}/; }', None)
+ self._test('function f() { let a=/,,[/,913,/](,)}/; return a; }', R'/,,[/,913,/](,)}/0')
+
+ R''' # We are not compiling regex
+ jsi = JSInterpreter('function f() { let a=/,,[/,913,/](,)}/; return a; }')
+ self.assertIsInstance(jsi.call_function('f'), re.Pattern)
+
+ jsi = JSInterpreter('function f() { let a=/,,[/,913,/](,)}/i; return a; }')
+ self.assertEqual(jsi.call_function('f').flags & re.I, re.I)
+
+ jsi = JSInterpreter(R'function f() { let a=/,][}",],()}(\[)/; return a; }')
+ self.assertEqual(jsi.call_function('f').pattern, r',][}",],()}(\[)')
+
+ jsi = JSInterpreter(R'function f() { let a=[/[)\\]/]; return a[0]; }')
+ self.assertEqual(jsi.call_function('f').pattern, r'[)\\]')
+ '''
+
+ @unittest.skip('Not implemented')
+ def test_replace(self):
+ self._test('function f() { let a="data-name".replace("data-", ""); return a }',
+ 'name')
+ self._test('function f() { let a="data-name".replace(new RegExp("^.+-"), ""); return a; }',
+ 'name')
+ self._test('function f() { let a="data-name".replace(/^.+-/, ""); return a; }',
+ 'name')
+ self._test('function f() { let a="data-name".replace(/a/g, "o"); return a; }',
+ 'doto-nome')
+ self._test('function f() { let a="data-name".replaceAll("a", "o"); return a; }',
+ 'doto-nome')
+
+ def test_char_code_at(self):
+ jsi = JSInterpreter('function f(i){return "test".charCodeAt(i)}')
+ self._test(jsi, 116, args=[0])
+ self._test(jsi, 101, args=[1])
+ self._test(jsi, 115, args=[2])
+ self._test(jsi, 116, args=[3])
+ self._test(jsi, None, args=[4])
+ self._test(jsi, 116, args=['not_a_number'])
+
+ def test_bitwise_operators_overflow(self):
+ self._test('function f(){return -524999584 << 5}', 379882496)
+ self._test('function f(){return 1236566549 << 5}', 915423904)
+
+ def test_bitwise_operators_typecast(self):
+ self._test('function f(){return null << 5}', 0)
+ self._test('function f(){return undefined >> 5}', 0)
+ self._test('function f(){return 42 << NaN}', 42)
+
+ def test_negative(self):
+ self._test('function f(){return 2 * -2.0 ;}', -4)
+ self._test('function f(){return 2 - - -2 ;}', 0)
+ self._test('function f(){return 2 - - - -2 ;}', 4)
+ self._test('function f(){return 2 - + + - -2;}', 0)
+ self._test('function f(){return 2 + - + - -2;}', 0)
+
+ @unittest.skip('Not implemented')
+ def test_packed(self):
+ jsi = JSInterpreter('''function f(p,a,c,k,e,d){while(c--)if(k[c])p=p.replace(new RegExp('\\b'+c.toString(a)+'\\b','g'),k[c]);return p}''')
+ self.assertEqual(jsi.call_function('f', '''h 7=g("1j");7.7h({7g:[{33:"w://7f-7e-7d-7c.v.7b/7a/79/78/77/76.74?t=73&s=2s&e=72&f=2t&71=70.0.0.1&6z=6y&6x=6w"}],6v:"w://32.v.u/6u.31",16:"r%",15:"r%",6t:"6s",6r:"",6q:"l",6p:"l",6o:"6n",6m:\'6l\',6k:"6j",9:[{33:"/2u?b=6i&n=50&6h=w://32.v.u/6g.31",6f:"6e"}],1y:{6d:1,6c:\'#6b\',6a:\'#69\',68:"67",66:30,65:r,},"64":{63:"%62 2m%m%61%5z%5y%5x.u%5w%5v%5u.2y%22 2k%m%1o%22 5t%m%1o%22 5s%m%1o%22 2j%m%5r%22 16%m%5q%22 15%m%5p%22 5o%2z%5n%5m%2z",5l:"w://v.u/d/1k/5k.2y",5j:[]},\'5i\':{"5h":"5g"},5f:"5e",5d:"w://v.u",5c:{},5b:l,1x:[0.25,0.50,0.75,1,1.25,1.5,2]});h 1m,1n,5a;h 59=0,58=0;h 7=g("1j");h 2x=0,57=0,56=0;$.55({54:{\'53-52\':\'2i-51\'}});7.j(\'4z\',6(x){c(5>0&&x.1l>=5&&1n!=1){1n=1;$(\'q.4y\').4x(\'4w\')}});7.j(\'13\',6(x){2x=x.1l});7.j(\'2g\',6(x){2w(x)});7.j(\'4v\',6(){$(\'q.2v\').4u()});6 2w(x){$(\'q.2v\').4t();c(1m)19;1m=1;17=0;c(4s.4r===l){17=1}$.4q(\'/2u?b=4p&2l=1k&4o=2t-4n-4m-2s-4l&4k=&4j=&4i=&17=\'+17,6(2r){$(\'#4h\').4g(2r)});$(\'.3-8-4f-4e:4d("4c")\').2h(6(e){2q();g().4b(0);g().4a(l)});6 2q(){h $14=$("<q />").2p({1l:"49",16:"r%",15:"r%",48:0,2n:0,2o:47,46:"45(10%, 10%, 10%, 0.4)","44-43":"42"});$("<41 />").2p({16:"60%",15:"60%",2o:40,"3z-2n":"3y"}).3x({\'2m\':\'/?b=3w&2l=1k\',\'2k\':\'0\',\'2j\':\'2i\'}).2f($14);$14.2h(6(){$(3v).3u();g().2g()});$14.2f($(\'#1j\'))}g().13(0);}6 3t(){h 9=7.1b(2e);2d.2c(9);c(9.n>1){1r(i=0;i<9.n;i++){c(9[i].1a==2e){2d.2c(\'!!=\'+i);7.1p(i)}}}}7.j(\'3s\',6(){g().1h("/2a/3r.29","3q 10 28",6(){g().13(g().27()+10)},"2b");$("q[26=2b]").23().21(\'.3-20-1z\');g().1h("/2a/3p.29","3o 10 28",6(){h 12=g().27()-10;c(12<0)12=0;g().13(12)},"24");$("q[26=24]").23().21(\'.3-20-1z\');});6 1i(){}7.j(\'3n\',6(){1i()});7.j(\'3m\',6(){1i()});7.j("k",6(y){h 9=7.1b();c(9.n<2)19;$(\'.3-8-3l-3k\').3j(6(){$(\'#3-8-a-k\').1e(\'3-8-a-z\');$(\'.3-a-k\').p(\'o-1f\',\'11\')});7.1h("/3i/3h.3g","3f 3e",6(){$(\'.3-1w\').3d(\'3-8-1v\');$(\'.3-8-1y, .3-8-1x\').p(\'o-1g\',\'11\');c($(\'.3-1w\').3c(\'3-8-1v\')){$(\'.3-a-k\').p(\'o-1g\',\'l\');$(\'.3-a-k\').p(\'o-1f\',\'l\');$(\'.3-8-a\').1e(\'3-8-a-z\');$(\'.3-8-a:1u\').3b(\'3-8-a-z\')}3a{$(\'.3-a-k\').p(\'o-1g\',\'11\');$(\'.3-a-k\').p(\'o-1f\',\'11\');$(\'.3-8-a:1u\').1e(\'3-8-a-z\')}},"39");7.j("38",6(y){1d.37(\'1c\',y.9[y.36].1a)});c(1d.1t(\'1c\')){35("1s(1d.1t(\'1c\'));",34)}});h 18;6 1s(1q){h 9=7.1b();c(9.n>1){1r(i=0;i<9.n;i++){c(9[i].1a==1q){c(i==18){19}18=i;7.1p(i)}}}}',36,270,'|||jw|||function|player|settings|tracks|submenu||if||||jwplayer|var||on|audioTracks|true|3D|length|aria|attr|div|100|||sx|filemoon|https||event|active||false|tt|seek|dd|height|width|adb|current_audio|return|name|getAudioTracks|default_audio|localStorage|removeClass|expanded|checked|addButton|callMeMaybe|vplayer|0fxcyc2ajhp1|position|vvplay|vvad|220|setCurrentAudioTrack|audio_name|for|audio_set|getItem|last|open|controls|playbackRates|captions|rewind|icon|insertAfter||detach|ff00||button|getPosition|sec|png|player8|ff11|log|console|track_name|appendTo|play|click|no|scrolling|frameborder|file_code|src|top|zIndex|css|showCCform|data|1662367683|383371|dl|video_ad|doPlay|prevt|mp4|3E||jpg|thumbs|file|300|setTimeout|currentTrack|setItem|audioTrackChanged|dualSound|else|addClass|hasClass|toggleClass|Track|Audio|svg|dualy|images|mousedown|buttons|topbar|playAttemptFailed|beforePlay|Rewind|fr|Forward|ff|ready|set_audio_track|remove|this|upload_srt|prop|50px|margin|1000001|iframe|center|align|text|rgba|background|1000000|left|absolute|pause|setCurrentCaptions|Upload|contains|item|content|html|fviews|referer|prem|embed|3e57249ef633e0d03bf76ceb8d8a4b65|216|83|hash|view|get|TokenZir|window|hide|show|complete|slow|fadeIn|video_ad_fadein|time||cache|Cache|Content|headers|ajaxSetup|v2done|tott|vastdone2|vastdone1|vvbefore|playbackRateControls|cast|aboutlink|FileMoon|abouttext|UHD|1870|qualityLabels|sites|GNOME_POWER|link|2Fiframe|3C|allowfullscreen|22360|22640|22no|marginheight|marginwidth|2FGNOME_POWER|2F0fxcyc2ajhp1|2Fe|2Ffilemoon|2F|3A||22https|3Ciframe|code|sharing|fontOpacity|backgroundOpacity|Tahoma|fontFamily|303030|backgroundColor|FFFFFF|color|userFontScale|thumbnails|kind|0fxcyc2ajhp10000|url|get_slides|start|startparam|none|preload|html5|primary|hlshtml|androidhls|duration|uniform|stretching|0fxcyc2ajhp1_xt|image|2048|sp|6871|asn|127|srv|43200|_g3XlBcu2lmD9oDexD2NLWSmah2Nu3XcDrl93m9PwXY|m3u8||master|0fxcyc2ajhp1_x|00076|01|hls2|to|s01|delivery|storage|moon|sources|setup'''.split('|')))
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_netrc.py b/test/test_netrc.py
new file mode 100644
index 0000000..dc708d9
--- /dev/null
+++ b/test/test_netrc.py
@@ -0,0 +1,28 @@
+#!/usr/bin/env python3
+
+# Allow direct execution
+import os
+import sys
+import unittest
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+from yt_dlp.extractor import gen_extractor_classes
+from yt_dlp.extractor.common import InfoExtractor
+
+NO_LOGIN = InfoExtractor._perform_login
+
+
+class TestNetRc(unittest.TestCase):
+ def test_netrc_present(self):
+ for ie in gen_extractor_classes():
+ if ie._perform_login is NO_LOGIN:
+ continue
+ self.assertTrue(
+ ie._NETRC_MACHINE,
+ 'Extractor %s supports login, but is missing a _NETRC_MACHINE property' % ie.IE_NAME)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_networking.py b/test/test_networking.py
new file mode 100644
index 0000000..628f1f1
--- /dev/null
+++ b/test/test_networking.py
@@ -0,0 +1,1631 @@
+#!/usr/bin/env python3
+
+# Allow direct execution
+import os
+import sys
+
+import pytest
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import gzip
+import http.client
+import http.cookiejar
+import http.server
+import io
+import logging
+import pathlib
+import random
+import ssl
+import tempfile
+import threading
+import time
+import urllib.error
+import urllib.request
+import warnings
+import zlib
+from email.message import Message
+from http.cookiejar import CookieJar
+
+from test.helper import FakeYDL, http_server_port, verify_address_availability
+from yt_dlp.cookies import YoutubeDLCookieJar
+from yt_dlp.dependencies import brotli, requests, urllib3
+from yt_dlp.networking import (
+ HEADRequest,
+ PUTRequest,
+ Request,
+ RequestDirector,
+ RequestHandler,
+ Response,
+)
+from yt_dlp.networking._urllib import UrllibRH
+from yt_dlp.networking.exceptions import (
+ CertificateVerifyError,
+ HTTPError,
+ IncompleteRead,
+ NoSupportingHandlers,
+ ProxyError,
+ RequestError,
+ SSLError,
+ TransportError,
+ UnsupportedRequest,
+)
+from yt_dlp.utils._utils import _YDLLogger as FakeLogger
+from yt_dlp.utils.networking import HTTPHeaderDict
+
+from test.conftest import validate_and_send
+
+TEST_DIR = os.path.dirname(os.path.abspath(__file__))
+
+
+def _build_proxy_handler(name):
+ class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
+ proxy_name = name
+
+ def log_message(self, format, *args):
+ pass
+
+ def do_GET(self):
+ self.send_response(200)
+ self.send_header('Content-Type', 'text/plain; charset=utf-8')
+ self.end_headers()
+ self.wfile.write(f'{self.proxy_name}: {self.path}'.encode())
+ return HTTPTestRequestHandler
+
+
+class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
+ protocol_version = 'HTTP/1.1'
+
+ def log_message(self, format, *args):
+ pass
+
+ def _headers(self):
+ payload = str(self.headers).encode()
+ self.send_response(200)
+ self.send_header('Content-Type', 'application/json')
+ self.send_header('Content-Length', str(len(payload)))
+ self.end_headers()
+ self.wfile.write(payload)
+
+ def _redirect(self):
+ self.send_response(int(self.path[len('/redirect_'):]))
+ self.send_header('Location', '/method')
+ self.send_header('Content-Length', '0')
+ self.end_headers()
+
+ def _method(self, method, payload=None):
+ self.send_response(200)
+ self.send_header('Content-Length', str(len(payload or '')))
+ self.send_header('Method', method)
+ self.end_headers()
+ if payload:
+ self.wfile.write(payload)
+
+ def _status(self, status):
+ payload = f'<html>{status} NOT FOUND</html>'.encode()
+ self.send_response(int(status))
+ self.send_header('Content-Type', 'text/html; charset=utf-8')
+ self.send_header('Content-Length', str(len(payload)))
+ self.end_headers()
+ self.wfile.write(payload)
+
+ def _read_data(self):
+ if 'Content-Length' in self.headers:
+ return self.rfile.read(int(self.headers['Content-Length']))
+
+ def do_POST(self):
+ data = self._read_data() + str(self.headers).encode()
+ if self.path.startswith('/redirect_'):
+ self._redirect()
+ elif self.path.startswith('/method'):
+ self._method('POST', data)
+ elif self.path.startswith('/headers'):
+ self._headers()
+ else:
+ self._status(404)
+
+ def do_HEAD(self):
+ if self.path.startswith('/redirect_'):
+ self._redirect()
+ elif self.path.startswith('/method'):
+ self._method('HEAD')
+ else:
+ self._status(404)
+
+ def do_PUT(self):
+ data = self._read_data() + str(self.headers).encode()
+ if self.path.startswith('/redirect_'):
+ self._redirect()
+ elif self.path.startswith('/method'):
+ self._method('PUT', data)
+ else:
+ self._status(404)
+
+ def do_GET(self):
+ if self.path == '/video.html':
+ payload = b'<html><video src="/vid.mp4" /></html>'
+ self.send_response(200)
+ self.send_header('Content-Type', 'text/html; charset=utf-8')
+ self.send_header('Content-Length', str(len(payload)))
+ self.end_headers()
+ self.wfile.write(payload)
+ elif self.path == '/vid.mp4':
+ payload = b'\x00\x00\x00\x00\x20\x66\x74[video]'
+ self.send_response(200)
+ self.send_header('Content-Type', 'video/mp4')
+ self.send_header('Content-Length', str(len(payload)))
+ self.end_headers()
+ self.wfile.write(payload)
+ elif self.path == '/%E4%B8%AD%E6%96%87.html':
+ payload = b'<html><video src="/vid.mp4" /></html>'
+ self.send_response(200)
+ self.send_header('Content-Type', 'text/html; charset=utf-8')
+ self.send_header('Content-Length', str(len(payload)))
+ self.end_headers()
+ self.wfile.write(payload)
+ elif self.path == '/%c7%9f':
+ payload = b'<html><video src="/vid.mp4" /></html>'
+ self.send_response(200)
+ self.send_header('Content-Type', 'text/html; charset=utf-8')
+ self.send_header('Content-Length', str(len(payload)))
+ self.end_headers()
+ self.wfile.write(payload)
+ elif self.path.startswith('/redirect_loop'):
+ self.send_response(301)
+ self.send_header('Location', self.path)
+ self.send_header('Content-Length', '0')
+ self.end_headers()
+ elif self.path == '/redirect_dotsegments':
+ self.send_response(301)
+ # redirect to /headers but with dot segments before
+ self.send_header('Location', '/a/b/./../../headers')
+ self.send_header('Content-Length', '0')
+ self.end_headers()
+ elif self.path == '/redirect_dotsegments_absolute':
+ self.send_response(301)
+ # redirect to /headers but with dot segments before - absolute url
+ self.send_header('Location', f'http://127.0.0.1:{http_server_port(self.server)}/a/b/./../../headers')
+ self.send_header('Content-Length', '0')
+ self.end_headers()
+ elif self.path.startswith('/redirect_'):
+ self._redirect()
+ elif self.path.startswith('/method'):
+ self._method('GET', str(self.headers).encode())
+ elif self.path.startswith('/headers'):
+ self._headers()
+ elif self.path.startswith('/308-to-headers'):
+ self.send_response(308)
+ self.send_header('Location', '/headers')
+ self.send_header('Content-Length', '0')
+ self.end_headers()
+ elif self.path == '/trailing_garbage':
+ payload = b'<html><video src="/vid.mp4" /></html>'
+ self.send_response(200)
+ self.send_header('Content-Type', 'text/html; charset=utf-8')
+ self.send_header('Content-Encoding', 'gzip')
+ buf = io.BytesIO()
+ with gzip.GzipFile(fileobj=buf, mode='wb') as f:
+ f.write(payload)
+ compressed = buf.getvalue() + b'trailing garbage'
+ self.send_header('Content-Length', str(len(compressed)))
+ self.end_headers()
+ self.wfile.write(compressed)
+ elif self.path == '/302-non-ascii-redirect':
+ new_url = f'http://127.0.0.1:{http_server_port(self.server)}/中文.html'
+ self.send_response(301)
+ self.send_header('Location', new_url)
+ self.send_header('Content-Length', '0')
+ self.end_headers()
+ elif self.path == '/content-encoding':
+ encodings = self.headers.get('ytdl-encoding', '')
+ payload = b'<html><video src="/vid.mp4" /></html>'
+ for encoding in filter(None, (e.strip() for e in encodings.split(','))):
+ if encoding == 'br' and brotli:
+ payload = brotli.compress(payload)
+ elif encoding == 'gzip':
+ buf = io.BytesIO()
+ with gzip.GzipFile(fileobj=buf, mode='wb') as f:
+ f.write(payload)
+ payload = buf.getvalue()
+ elif encoding == 'deflate':
+ payload = zlib.compress(payload)
+ elif encoding == 'unsupported':
+ payload = b'raw'
+ break
+ else:
+ self._status(415)
+ return
+ self.send_response(200)
+ self.send_header('Content-Encoding', encodings)
+ self.send_header('Content-Length', str(len(payload)))
+ self.end_headers()
+ self.wfile.write(payload)
+ elif self.path.startswith('/gen_'):
+ payload = b'<html></html>'
+ self.send_response(int(self.path[len('/gen_'):]))
+ self.send_header('Content-Type', 'text/html; charset=utf-8')
+ self.send_header('Content-Length', str(len(payload)))
+ self.end_headers()
+ self.wfile.write(payload)
+ elif self.path.startswith('/incompleteread'):
+ payload = b'<html></html>'
+ self.send_response(200)
+ self.send_header('Content-Type', 'text/html; charset=utf-8')
+ self.send_header('Content-Length', '234234')
+ self.end_headers()
+ self.wfile.write(payload)
+ self.finish()
+ elif self.path.startswith('/timeout_'):
+ time.sleep(int(self.path[len('/timeout_'):]))
+ self._headers()
+ elif self.path == '/source_address':
+ payload = str(self.client_address[0]).encode()
+ self.send_response(200)
+ self.send_header('Content-Type', 'text/html; charset=utf-8')
+ self.send_header('Content-Length', str(len(payload)))
+ self.end_headers()
+ self.wfile.write(payload)
+ self.finish()
+ else:
+ self._status(404)
+
+ def send_header(self, keyword, value):
+ """
+ Forcibly allow HTTP server to send non percent-encoded non-ASCII characters in headers.
+ This is against what is defined in RFC 3986, however we need to test we support this
+ since some sites incorrectly do this.
+ """
+ if keyword.lower() == 'connection':
+ return super().send_header(keyword, value)
+
+ if not hasattr(self, '_headers_buffer'):
+ self._headers_buffer = []
+
+ self._headers_buffer.append(f'{keyword}: {value}\r\n'.encode())
+
+
+class TestRequestHandlerBase:
+ @classmethod
+ def setup_class(cls):
+ cls.http_httpd = http.server.ThreadingHTTPServer(
+ ('127.0.0.1', 0), HTTPTestRequestHandler)
+ cls.http_port = http_server_port(cls.http_httpd)
+ cls.http_server_thread = threading.Thread(target=cls.http_httpd.serve_forever)
+ # FIXME: we should probably stop the http server thread after each test
+ # See: https://github.com/yt-dlp/yt-dlp/pull/7094#discussion_r1199746041
+ cls.http_server_thread.daemon = True
+ cls.http_server_thread.start()
+
+ # HTTPS server
+ certfn = os.path.join(TEST_DIR, 'testcert.pem')
+ cls.https_httpd = http.server.ThreadingHTTPServer(
+ ('127.0.0.1', 0), HTTPTestRequestHandler)
+ sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
+ sslctx.load_cert_chain(certfn, None)
+ cls.https_httpd.socket = sslctx.wrap_socket(cls.https_httpd.socket, server_side=True)
+ cls.https_port = http_server_port(cls.https_httpd)
+ cls.https_server_thread = threading.Thread(target=cls.https_httpd.serve_forever)
+ cls.https_server_thread.daemon = True
+ cls.https_server_thread.start()
+
+
+class TestHTTPRequestHandler(TestRequestHandlerBase):
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
+ def test_verify_cert(self, handler):
+ with handler() as rh:
+ with pytest.raises(CertificateVerifyError):
+ validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
+
+ with handler(verify=False) as rh:
+ r = validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
+ assert r.status == 200
+ r.close()
+
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
+ def test_ssl_error(self, handler):
+ # HTTPS server with too old TLS version
+ # XXX: is there a better way to test this than to create a new server?
+ https_httpd = http.server.ThreadingHTTPServer(
+ ('127.0.0.1', 0), HTTPTestRequestHandler)
+ sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
+ https_httpd.socket = sslctx.wrap_socket(https_httpd.socket, server_side=True)
+ https_port = http_server_port(https_httpd)
+ https_server_thread = threading.Thread(target=https_httpd.serve_forever)
+ https_server_thread.daemon = True
+ https_server_thread.start()
+
+ with handler(verify=False) as rh:
+ with pytest.raises(SSLError, match=r'ssl(?:v3|/tls) alert handshake failure') as exc_info:
+ validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers'))
+ assert not issubclass(exc_info.type, CertificateVerifyError)
+
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
+ def test_percent_encode(self, handler):
+ with handler() as rh:
+ # Unicode characters should be encoded with uppercase percent-encoding
+ res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/中文.html'))
+ assert res.status == 200
+ res.close()
+ # don't normalize existing percent encodings
+ res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/%c7%9f'))
+ assert res.status == 200
+ res.close()
+
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
+ @pytest.mark.parametrize('path', [
+ '/a/b/./../../headers',
+ '/redirect_dotsegments',
+ # https://github.com/yt-dlp/yt-dlp/issues/9020
+ '/redirect_dotsegments_absolute',
+ ])
+ def test_remove_dot_segments(self, handler, path):
+ with handler(verbose=True) as rh:
+ # This isn't a comprehensive test,
+ # but it should be enough to check whether the handler is removing dot segments in required scenarios
+ res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}{path}'))
+ assert res.status == 200
+ assert res.url == f'http://127.0.0.1:{self.http_port}/headers'
+ res.close()
+
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
+ def test_unicode_path_redirection(self, handler):
+ with handler() as rh:
+ r = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/302-non-ascii-redirect'))
+ assert r.url == f'http://127.0.0.1:{self.http_port}/%E4%B8%AD%E6%96%87.html'
+ r.close()
+
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
+ def test_raise_http_error(self, handler):
+ with handler() as rh:
+ for bad_status in (400, 500, 599, 302):
+ with pytest.raises(HTTPError):
+ validate_and_send(rh, Request('http://127.0.0.1:%d/gen_%d' % (self.http_port, bad_status)))
+
+ # Should not raise an error
+ validate_and_send(rh, Request('http://127.0.0.1:%d/gen_200' % self.http_port)).close()
+
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
+ def test_response_url(self, handler):
+ with handler() as rh:
+ # Response url should be that of the last url in redirect chain
+ res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_301'))
+ assert res.url == f'http://127.0.0.1:{self.http_port}/method'
+ res.close()
+ res2 = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_200'))
+ assert res2.url == f'http://127.0.0.1:{self.http_port}/gen_200'
+ res2.close()
+
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
+ def test_redirect(self, handler):
+ with handler() as rh:
+ def do_req(redirect_status, method, assert_no_content=False):
+ data = b'testdata' if method in ('POST', 'PUT') else None
+ res = validate_and_send(
+ rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_{redirect_status}', method=method, data=data))
+
+ headers = b''
+ data_sent = b''
+ if data is not None:
+ data_sent += res.read(len(data))
+ if data_sent != data:
+ headers += data_sent
+ data_sent = b''
+
+ headers += res.read()
+
+ if assert_no_content or data is None:
+ assert b'Content-Type' not in headers
+ assert b'Content-Length' not in headers
+ else:
+ assert b'Content-Type' in headers
+ assert b'Content-Length' in headers
+
+ return data_sent.decode(), res.headers.get('method', '')
+
+ # A 303 must either use GET or HEAD for subsequent request
+ assert do_req(303, 'POST', True) == ('', 'GET')
+ assert do_req(303, 'HEAD') == ('', 'HEAD')
+
+ assert do_req(303, 'PUT', True) == ('', 'GET')
+
+ # 301 and 302 turn POST only into a GET
+ assert do_req(301, 'POST', True) == ('', 'GET')
+ assert do_req(301, 'HEAD') == ('', 'HEAD')
+ assert do_req(302, 'POST', True) == ('', 'GET')
+ assert do_req(302, 'HEAD') == ('', 'HEAD')
+
+ assert do_req(301, 'PUT') == ('testdata', 'PUT')
+ assert do_req(302, 'PUT') == ('testdata', 'PUT')
+
+ # 307 and 308 should not change method
+ for m in ('POST', 'PUT'):
+ assert do_req(307, m) == ('testdata', m)
+ assert do_req(308, m) == ('testdata', m)
+
+ assert do_req(307, 'HEAD') == ('', 'HEAD')
+ assert do_req(308, 'HEAD') == ('', 'HEAD')
+
+ # These should not redirect and instead raise an HTTPError
+ for code in (300, 304, 305, 306):
+ with pytest.raises(HTTPError):
+ do_req(code, 'GET')
+
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
+ def test_request_cookie_header(self, handler):
+ # We should accept a Cookie header being passed as in normal headers and handle it appropriately.
+ with handler() as rh:
+ # Specified Cookie header should be used
+ res = validate_and_send(
+ rh, Request(
+ f'http://127.0.0.1:{self.http_port}/headers',
+ headers={'Cookie': 'test=test'})).read().decode()
+ assert 'Cookie: test=test' in res
+
+ # Specified Cookie header should be removed on any redirect
+ res = validate_and_send(
+ rh, Request(
+ f'http://127.0.0.1:{self.http_port}/308-to-headers',
+ headers={'Cookie': 'test=test'})).read().decode()
+ assert 'Cookie: test=test' not in res
+
+ # Specified Cookie header should override global cookiejar for that request
+ cookiejar = YoutubeDLCookieJar()
+ cookiejar.set_cookie(http.cookiejar.Cookie(
+ version=0, name='test', value='ytdlp', port=None, port_specified=False,
+ domain='127.0.0.1', domain_specified=True, domain_initial_dot=False, path='/',
+ path_specified=True, secure=False, expires=None, discard=False, comment=None,
+ comment_url=None, rest={}))
+
+ with handler(cookiejar=cookiejar) as rh:
+ data = validate_and_send(
+ rh, Request(f'http://127.0.0.1:{self.http_port}/headers', headers={'cookie': 'test=test'})).read()
+ assert b'Cookie: test=ytdlp' not in data
+ assert b'Cookie: test=test' in data
+
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
+ def test_redirect_loop(self, handler):
+ with handler() as rh:
+ with pytest.raises(HTTPError, match='redirect loop'):
+ validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_loop'))
+
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
+ def test_incompleteread(self, handler):
+ with handler(timeout=2) as rh:
+ with pytest.raises(IncompleteRead):
+ validate_and_send(rh, Request('http://127.0.0.1:%d/incompleteread' % self.http_port)).read()
+
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
+ def test_cookies(self, handler):
+ cookiejar = YoutubeDLCookieJar()
+ cookiejar.set_cookie(http.cookiejar.Cookie(
+ 0, 'test', 'ytdlp', None, False, '127.0.0.1', True,
+ False, '/headers', True, False, None, False, None, None, {}))
+
+ with handler(cookiejar=cookiejar) as rh:
+ data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read()
+ assert b'Cookie: test=ytdlp' in data
+
+ # Per request
+ with handler() as rh:
+ data = validate_and_send(
+ rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions={'cookiejar': cookiejar})).read()
+ assert b'Cookie: test=ytdlp' in data
+
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
+ def test_headers(self, handler):
+
+ with handler(headers=HTTPHeaderDict({'test1': 'test', 'test2': 'test2'})) as rh:
+ # Global Headers
+ data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read()
+ assert b'Test1: test' in data
+
+ # Per request headers, merged with global
+ data = validate_and_send(rh, Request(
+ f'http://127.0.0.1:{self.http_port}/headers', headers={'test2': 'changed', 'test3': 'test3'})).read()
+ assert b'Test1: test' in data
+ assert b'Test2: changed' in data
+ assert b'Test2: test2' not in data
+ assert b'Test3: test3' in data
+
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
+ def test_timeout(self, handler):
+ with handler() as rh:
+ # Default timeout is 20 seconds, so this should go through
+ validate_and_send(
+ rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_3'))
+
+ with handler(timeout=0.5) as rh:
+ with pytest.raises(TransportError):
+ validate_and_send(
+ rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1'))
+
+ # Per request timeout, should override handler timeout
+ validate_and_send(
+ rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1', extensions={'timeout': 4}))
+
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
+ def test_source_address(self, handler):
+ source_address = f'127.0.0.{random.randint(5, 255)}'
+ # on some systems these loopback addresses we need for testing may not be available
+ # see: https://github.com/yt-dlp/yt-dlp/issues/8890
+ verify_address_availability(source_address)
+ with handler(source_address=source_address) as rh:
+ data = validate_and_send(
+ rh, Request(f'http://127.0.0.1:{self.http_port}/source_address')).read().decode()
+ assert source_address == data
+
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
+ def test_gzip_trailing_garbage(self, handler):
+ with handler() as rh:
+ data = validate_and_send(rh, Request(f'http://localhost:{self.http_port}/trailing_garbage')).read().decode()
+ assert data == '<html><video src="/vid.mp4" /></html>'
+
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
+ @pytest.mark.skipif(not brotli, reason='brotli support is not installed')
+ def test_brotli(self, handler):
+ with handler() as rh:
+ res = validate_and_send(
+ rh, Request(
+ f'http://127.0.0.1:{self.http_port}/content-encoding',
+ headers={'ytdl-encoding': 'br'}))
+ assert res.headers.get('Content-Encoding') == 'br'
+ assert res.read() == b'<html><video src="/vid.mp4" /></html>'
+
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
+ def test_deflate(self, handler):
+ with handler() as rh:
+ res = validate_and_send(
+ rh, Request(
+ f'http://127.0.0.1:{self.http_port}/content-encoding',
+ headers={'ytdl-encoding': 'deflate'}))
+ assert res.headers.get('Content-Encoding') == 'deflate'
+ assert res.read() == b'<html><video src="/vid.mp4" /></html>'
+
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
+ def test_gzip(self, handler):
+ with handler() as rh:
+ res = validate_and_send(
+ rh, Request(
+ f'http://127.0.0.1:{self.http_port}/content-encoding',
+ headers={'ytdl-encoding': 'gzip'}))
+ assert res.headers.get('Content-Encoding') == 'gzip'
+ assert res.read() == b'<html><video src="/vid.mp4" /></html>'
+
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
+ def test_multiple_encodings(self, handler):
+ with handler() as rh:
+ for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'):
+ res = validate_and_send(
+ rh, Request(
+ f'http://127.0.0.1:{self.http_port}/content-encoding',
+ headers={'ytdl-encoding': pair}))
+ assert res.headers.get('Content-Encoding') == pair
+ assert res.read() == b'<html><video src="/vid.mp4" /></html>'
+
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
+ def test_unsupported_encoding(self, handler):
+ with handler() as rh:
+ res = validate_and_send(
+ rh, Request(
+ f'http://127.0.0.1:{self.http_port}/content-encoding',
+ headers={'ytdl-encoding': 'unsupported'}))
+ assert res.headers.get('Content-Encoding') == 'unsupported'
+ assert res.read() == b'raw'
+
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
+ def test_read(self, handler):
+ with handler() as rh:
+ res = validate_and_send(
+ rh, Request(f'http://127.0.0.1:{self.http_port}/headers'))
+ assert res.readable()
+ assert res.read(1) == b'H'
+ assert res.read(3) == b'ost'
+
+
+class TestHTTPProxy(TestRequestHandlerBase):
+ @classmethod
+ def setup_class(cls):
+ super().setup_class()
+ # HTTP Proxy server
+ cls.proxy = http.server.ThreadingHTTPServer(
+ ('127.0.0.1', 0), _build_proxy_handler('normal'))
+ cls.proxy_port = http_server_port(cls.proxy)
+ cls.proxy_thread = threading.Thread(target=cls.proxy.serve_forever)
+ cls.proxy_thread.daemon = True
+ cls.proxy_thread.start()
+
+ # Geo proxy server
+ cls.geo_proxy = http.server.ThreadingHTTPServer(
+ ('127.0.0.1', 0), _build_proxy_handler('geo'))
+ cls.geo_port = http_server_port(cls.geo_proxy)
+ cls.geo_proxy_thread = threading.Thread(target=cls.geo_proxy.serve_forever)
+ cls.geo_proxy_thread.daemon = True
+ cls.geo_proxy_thread.start()
+
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
+ def test_http_proxy(self, handler):
+ http_proxy = f'http://127.0.0.1:{self.proxy_port}'
+ geo_proxy = f'http://127.0.0.1:{self.geo_port}'
+
+ # Test global http proxy
+ # Test per request http proxy
+ # Test per request http proxy disables proxy
+ url = 'http://foo.com/bar'
+
+ # Global HTTP proxy
+ with handler(proxies={'http': http_proxy}) as rh:
+ res = validate_and_send(rh, Request(url)).read().decode()
+ assert res == f'normal: {url}'
+
+ # Per request proxy overrides global
+ res = validate_and_send(rh, Request(url, proxies={'http': geo_proxy})).read().decode()
+ assert res == f'geo: {url}'
+
+ # and setting to None disables all proxies for that request
+ real_url = f'http://127.0.0.1:{self.http_port}/headers'
+ res = validate_and_send(
+ rh, Request(real_url, proxies={'http': None})).read().decode()
+ assert res != f'normal: {real_url}'
+ assert 'Accept' in res
+
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
+ def test_noproxy(self, handler):
+ with handler(proxies={'proxy': f'http://127.0.0.1:{self.proxy_port}'}) as rh:
+ # NO_PROXY
+ for no_proxy in (f'127.0.0.1:{self.http_port}', '127.0.0.1', 'localhost'):
+ nop_response = validate_and_send(
+ rh, Request(f'http://127.0.0.1:{self.http_port}/headers', proxies={'no': no_proxy})).read().decode(
+ 'utf-8')
+ assert 'Accept' in nop_response
+
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
+ def test_allproxy(self, handler):
+ url = 'http://foo.com/bar'
+ with handler() as rh:
+ response = validate_and_send(rh, Request(url, proxies={'all': f'http://127.0.0.1:{self.proxy_port}'})).read().decode(
+ 'utf-8')
+ assert response == f'normal: {url}'
+
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
+ def test_http_proxy_with_idn(self, handler):
+ with handler(proxies={
+ 'http': f'http://127.0.0.1:{self.proxy_port}',
+ }) as rh:
+ url = 'http://中文.tw/'
+ response = rh.send(Request(url)).read().decode()
+ # b'xn--fiq228c' is '中文'.encode('idna')
+ assert response == 'normal: http://xn--fiq228c.tw/'
+
+
+class TestClientCertificate:
+
+ @classmethod
+ def setup_class(cls):
+ certfn = os.path.join(TEST_DIR, 'testcert.pem')
+ cls.certdir = os.path.join(TEST_DIR, 'testdata', 'certificate')
+ cacertfn = os.path.join(cls.certdir, 'ca.crt')
+ cls.httpd = http.server.ThreadingHTTPServer(('127.0.0.1', 0), HTTPTestRequestHandler)
+ sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
+ sslctx.verify_mode = ssl.CERT_REQUIRED
+ sslctx.load_verify_locations(cafile=cacertfn)
+ sslctx.load_cert_chain(certfn, None)
+ cls.httpd.socket = sslctx.wrap_socket(cls.httpd.socket, server_side=True)
+ cls.port = http_server_port(cls.httpd)
+ cls.server_thread = threading.Thread(target=cls.httpd.serve_forever)
+ cls.server_thread.daemon = True
+ cls.server_thread.start()
+
+ def _run_test(self, handler, **handler_kwargs):
+ with handler(
+ # Disable client-side validation of unacceptable self-signed testcert.pem
+ # The test is of a check on the server side, so unaffected
+ verify=False,
+ **handler_kwargs,
+ ) as rh:
+ validate_and_send(rh, Request(f'https://127.0.0.1:{self.port}/video.html')).read().decode()
+
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
+ def test_certificate_combined_nopass(self, handler):
+ self._run_test(handler, client_cert={
+ 'client_certificate': os.path.join(self.certdir, 'clientwithkey.crt'),
+ })
+
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
+ def test_certificate_nocombined_nopass(self, handler):
+ self._run_test(handler, client_cert={
+ 'client_certificate': os.path.join(self.certdir, 'client.crt'),
+ 'client_certificate_key': os.path.join(self.certdir, 'client.key'),
+ })
+
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
+ def test_certificate_combined_pass(self, handler):
+ self._run_test(handler, client_cert={
+ 'client_certificate': os.path.join(self.certdir, 'clientwithencryptedkey.crt'),
+ 'client_certificate_password': 'foobar',
+ })
+
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
+ def test_certificate_nocombined_pass(self, handler):
+ self._run_test(handler, client_cert={
+ 'client_certificate': os.path.join(self.certdir, 'client.crt'),
+ 'client_certificate_key': os.path.join(self.certdir, 'clientencrypted.key'),
+ 'client_certificate_password': 'foobar',
+ })
+
+
+class TestRequestHandlerMisc:
+ """Misc generic tests for request handlers, not related to request or validation testing"""
+ @pytest.mark.parametrize('handler,logger_name', [
+ ('Requests', 'urllib3'),
+ ('Websockets', 'websockets.client'),
+ ('Websockets', 'websockets.server')
+ ], indirect=['handler'])
+ def test_remove_logging_handler(self, handler, logger_name):
+ # Ensure any logging handlers, which may contain a YoutubeDL instance,
+ # are removed when we close the request handler
+ # See: https://github.com/yt-dlp/yt-dlp/issues/8922
+ logging_handlers = logging.getLogger(logger_name).handlers
+ before_count = len(logging_handlers)
+ rh = handler()
+ assert len(logging_handlers) == before_count + 1
+ rh.close()
+ assert len(logging_handlers) == before_count
+
+
+class TestUrllibRequestHandler(TestRequestHandlerBase):
+ @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+ def test_file_urls(self, handler):
+ # See https://github.com/ytdl-org/youtube-dl/issues/8227
+ tf = tempfile.NamedTemporaryFile(delete=False)
+ tf.write(b'foobar')
+ tf.close()
+ req = Request(pathlib.Path(tf.name).as_uri())
+ with handler() as rh:
+ with pytest.raises(UnsupportedRequest):
+ rh.validate(req)
+
+ # Test that urllib never loaded FileHandler
+ with pytest.raises(TransportError):
+ rh.send(req)
+
+ with handler(enable_file_urls=True) as rh:
+ res = validate_and_send(rh, req)
+ assert res.read() == b'foobar'
+ res.close()
+
+ os.unlink(tf.name)
+
+ @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+ def test_http_error_returns_content(self, handler):
+ # urllib HTTPError will try close the underlying response if reference to the HTTPError object is lost
+ def get_response():
+ with handler() as rh:
+ # headers url
+ try:
+ validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_404'))
+ except HTTPError as e:
+ return e.response
+
+ assert get_response().read() == b'<html></html>'
+
+ @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+ def test_verify_cert_error_text(self, handler):
+ # Check the output of the error message
+ with handler() as rh:
+ with pytest.raises(
+ CertificateVerifyError,
+ match=r'\[SSL: CERTIFICATE_VERIFY_FAILED\] certificate verify failed: self.signed certificate'
+ ):
+ validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
+
+ @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+ @pytest.mark.parametrize('req,match,version_check', [
+ # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1256
+ # bpo-39603: Check implemented in 3.7.9+, 3.8.5+
+ (
+ Request('http://127.0.0.1', method='GET\n'),
+ 'method can\'t contain control characters',
+ lambda v: v < (3, 7, 9) or (3, 8, 0) <= v < (3, 8, 5)
+ ),
+ # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1265
+ # bpo-38576: Check implemented in 3.7.8+, 3.8.3+
+ (
+ Request('http://127.0.0. 1', method='GET'),
+ 'URL can\'t contain control characters',
+ lambda v: v < (3, 7, 8) or (3, 8, 0) <= v < (3, 8, 3)
+ ),
+ # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1288C31-L1288C50
+ (Request('http://127.0.0.1', headers={'foo\n': 'bar'}), 'Invalid header name', None),
+ ])
+ def test_httplib_validation_errors(self, handler, req, match, version_check):
+ if version_check and version_check(sys.version_info):
+ pytest.skip(f'Python {sys.version} version does not have the required validation for this test.')
+
+ with handler() as rh:
+ with pytest.raises(RequestError, match=match) as exc_info:
+ validate_and_send(rh, req)
+ assert not isinstance(exc_info.value, TransportError)
+
+
+@pytest.mark.parametrize('handler', ['Requests'], indirect=True)
+class TestRequestsRequestHandler(TestRequestHandlerBase):
+ @pytest.mark.parametrize('raised,expected', [
+ (lambda: requests.exceptions.ConnectTimeout(), TransportError),
+ (lambda: requests.exceptions.ReadTimeout(), TransportError),
+ (lambda: requests.exceptions.Timeout(), TransportError),
+ (lambda: requests.exceptions.ConnectionError(), TransportError),
+ (lambda: requests.exceptions.ProxyError(), ProxyError),
+ (lambda: requests.exceptions.SSLError('12[CERTIFICATE_VERIFY_FAILED]34'), CertificateVerifyError),
+ (lambda: requests.exceptions.SSLError(), SSLError),
+ (lambda: requests.exceptions.InvalidURL(), RequestError),
+ (lambda: requests.exceptions.InvalidHeader(), RequestError),
+ # catch-all: https://github.com/psf/requests/blob/main/src/requests/adapters.py#L535
+ (lambda: urllib3.exceptions.HTTPError(), TransportError),
+ (lambda: requests.exceptions.RequestException(), RequestError)
+ # (lambda: requests.exceptions.TooManyRedirects(), HTTPError) - Needs a response object
+ ])
+ def test_request_error_mapping(self, handler, monkeypatch, raised, expected):
+ with handler() as rh:
+ def mock_get_instance(*args, **kwargs):
+ class MockSession:
+ def request(self, *args, **kwargs):
+ raise raised()
+ return MockSession()
+
+ monkeypatch.setattr(rh, '_get_instance', mock_get_instance)
+
+ with pytest.raises(expected) as exc_info:
+ rh.send(Request('http://fake'))
+
+ assert exc_info.type is expected
+
+ @pytest.mark.parametrize('raised,expected,match', [
+ (lambda: urllib3.exceptions.SSLError(), SSLError, None),
+ (lambda: urllib3.exceptions.TimeoutError(), TransportError, None),
+ (lambda: urllib3.exceptions.ReadTimeoutError(None, None, None), TransportError, None),
+ (lambda: urllib3.exceptions.ProtocolError(), TransportError, None),
+ (lambda: urllib3.exceptions.DecodeError(), TransportError, None),
+ (lambda: urllib3.exceptions.HTTPError(), TransportError, None), # catch-all
+ (
+ lambda: urllib3.exceptions.ProtocolError('error', http.client.IncompleteRead(partial=b'abc', expected=4)),
+ IncompleteRead,
+ '3 bytes read, 4 more expected'
+ ),
+ (
+ lambda: urllib3.exceptions.ProtocolError('error', urllib3.exceptions.IncompleteRead(partial=3, expected=5)),
+ IncompleteRead,
+ '3 bytes read, 5 more expected'
+ ),
+ ])
+ def test_response_error_mapping(self, handler, monkeypatch, raised, expected, match):
+ from requests.models import Response as RequestsResponse
+ from urllib3.response import HTTPResponse as Urllib3Response
+
+ from yt_dlp.networking._requests import RequestsResponseAdapter
+ requests_res = RequestsResponse()
+ requests_res.raw = Urllib3Response(body=b'', status=200)
+ res = RequestsResponseAdapter(requests_res)
+
+ def mock_read(*args, **kwargs):
+ raise raised()
+ monkeypatch.setattr(res.fp, 'read', mock_read)
+
+ with pytest.raises(expected, match=match) as exc_info:
+ res.read()
+
+ assert exc_info.type is expected
+
+ def test_close(self, handler, monkeypatch):
+ rh = handler()
+ session = rh._get_instance(cookiejar=rh.cookiejar)
+ called = False
+ original_close = session.close
+
+ def mock_close(*args, **kwargs):
+ nonlocal called
+ called = True
+ return original_close(*args, **kwargs)
+
+ monkeypatch.setattr(session, 'close', mock_close)
+ rh.close()
+ assert called
+
+
+def run_validation(handler, error, req, **handler_kwargs):
+ with handler(**handler_kwargs) as rh:
+ if error:
+ with pytest.raises(error):
+ rh.validate(req)
+ else:
+ rh.validate(req)
+
+
+class TestRequestHandlerValidation:
+
+ class ValidationRH(RequestHandler):
+ def _send(self, request):
+ raise RequestError('test')
+
+ class NoCheckRH(ValidationRH):
+ _SUPPORTED_FEATURES = None
+ _SUPPORTED_PROXY_SCHEMES = None
+ _SUPPORTED_URL_SCHEMES = None
+
+ def _check_extensions(self, extensions):
+ extensions.clear()
+
+ class HTTPSupportedRH(ValidationRH):
+ _SUPPORTED_URL_SCHEMES = ('http',)
+
+ URL_SCHEME_TESTS = [
+ # scheme, expected to fail, handler kwargs
+ ('Urllib', [
+ ('http', False, {}),
+ ('https', False, {}),
+ ('data', False, {}),
+ ('ftp', False, {}),
+ ('file', UnsupportedRequest, {}),
+ ('file', False, {'enable_file_urls': True}),
+ ]),
+ ('Requests', [
+ ('http', False, {}),
+ ('https', False, {}),
+ ]),
+ ('Websockets', [
+ ('ws', False, {}),
+ ('wss', False, {}),
+ ]),
+ (NoCheckRH, [('http', False, {})]),
+ (ValidationRH, [('http', UnsupportedRequest, {})])
+ ]
+
+ PROXY_SCHEME_TESTS = [
+ # scheme, expected to fail
+ ('Urllib', 'http', [
+ ('http', False),
+ ('https', UnsupportedRequest),
+ ('socks4', False),
+ ('socks4a', False),
+ ('socks5', False),
+ ('socks5h', False),
+ ('socks', UnsupportedRequest),
+ ]),
+ ('Requests', 'http', [
+ ('http', False),
+ ('https', False),
+ ('socks4', False),
+ ('socks4a', False),
+ ('socks5', False),
+ ('socks5h', False),
+ ]),
+ (NoCheckRH, 'http', [('http', False)]),
+ (HTTPSupportedRH, 'http', [('http', UnsupportedRequest)]),
+ ('Websockets', 'ws', [('http', UnsupportedRequest)]),
+ (NoCheckRH, 'http', [('http', False)]),
+ (HTTPSupportedRH, 'http', [('http', UnsupportedRequest)]),
+ ]
+
+ PROXY_KEY_TESTS = [
+ # key, expected to fail
+ ('Urllib', [
+ ('all', False),
+ ('unrelated', False),
+ ]),
+ ('Requests', [
+ ('all', False),
+ ('unrelated', False),
+ ]),
+ (NoCheckRH, [('all', False)]),
+ (HTTPSupportedRH, [('all', UnsupportedRequest)]),
+ (HTTPSupportedRH, [('no', UnsupportedRequest)]),
+ ]
+
+ EXTENSION_TESTS = [
+ ('Urllib', 'http', [
+ ({'cookiejar': 'notacookiejar'}, AssertionError),
+ ({'cookiejar': YoutubeDLCookieJar()}, False),
+ ({'cookiejar': CookieJar()}, AssertionError),
+ ({'timeout': 1}, False),
+ ({'timeout': 'notatimeout'}, AssertionError),
+ ({'unsupported': 'value'}, UnsupportedRequest),
+ ]),
+ ('Requests', 'http', [
+ ({'cookiejar': 'notacookiejar'}, AssertionError),
+ ({'cookiejar': YoutubeDLCookieJar()}, False),
+ ({'timeout': 1}, False),
+ ({'timeout': 'notatimeout'}, AssertionError),
+ ({'unsupported': 'value'}, UnsupportedRequest),
+ ]),
+ (NoCheckRH, 'http', [
+ ({'cookiejar': 'notacookiejar'}, False),
+ ({'somerandom': 'test'}, False), # but any extension is allowed through
+ ]),
+ ('Websockets', 'ws', [
+ ({'cookiejar': YoutubeDLCookieJar()}, False),
+ ({'timeout': 2}, False),
+ ]),
+ ]
+
+ @pytest.mark.parametrize('handler,scheme,fail,handler_kwargs', [
+ (handler_tests[0], scheme, fail, handler_kwargs)
+ for handler_tests in URL_SCHEME_TESTS
+ for scheme, fail, handler_kwargs in handler_tests[1]
+
+ ], indirect=['handler'])
+ def test_url_scheme(self, handler, scheme, fail, handler_kwargs):
+ run_validation(handler, fail, Request(f'{scheme}://'), **(handler_kwargs or {}))
+
+ @pytest.mark.parametrize('handler,fail', [('Urllib', False), ('Requests', False)], indirect=['handler'])
+ def test_no_proxy(self, handler, fail):
+ run_validation(handler, fail, Request('http://', proxies={'no': '127.0.0.1,github.com'}))
+ run_validation(handler, fail, Request('http://'), proxies={'no': '127.0.0.1,github.com'})
+
+ @pytest.mark.parametrize('handler,proxy_key,fail', [
+ (handler_tests[0], proxy_key, fail)
+ for handler_tests in PROXY_KEY_TESTS
+ for proxy_key, fail in handler_tests[1]
+ ], indirect=['handler'])
+ def test_proxy_key(self, handler, proxy_key, fail):
+ run_validation(handler, fail, Request('http://', proxies={proxy_key: 'http://example.com'}))
+ run_validation(handler, fail, Request('http://'), proxies={proxy_key: 'http://example.com'})
+
+ @pytest.mark.parametrize('handler,req_scheme,scheme,fail', [
+ (handler_tests[0], handler_tests[1], scheme, fail)
+ for handler_tests in PROXY_SCHEME_TESTS
+ for scheme, fail in handler_tests[2]
+ ], indirect=['handler'])
+ def test_proxy_scheme(self, handler, req_scheme, scheme, fail):
+ run_validation(handler, fail, Request(f'{req_scheme}://', proxies={req_scheme: f'{scheme}://example.com'}))
+ run_validation(handler, fail, Request(f'{req_scheme}://'), proxies={req_scheme: f'{scheme}://example.com'})
+
+ @pytest.mark.parametrize('handler', ['Urllib', HTTPSupportedRH, 'Requests'], indirect=True)
+ def test_empty_proxy(self, handler):
+ run_validation(handler, False, Request('http://', proxies={'http': None}))
+ run_validation(handler, False, Request('http://'), proxies={'http': None})
+
+ @pytest.mark.parametrize('proxy_url', ['//example.com', 'example.com', '127.0.0.1', '/a/b/c'])
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
+ def test_invalid_proxy_url(self, handler, proxy_url):
+ run_validation(handler, UnsupportedRequest, Request('http://', proxies={'http': proxy_url}))
+
+ @pytest.mark.parametrize('handler,scheme,extensions,fail', [
+ (handler_tests[0], handler_tests[1], extensions, fail)
+ for handler_tests in EXTENSION_TESTS
+ for extensions, fail in handler_tests[2]
+ ], indirect=['handler'])
+ def test_extension(self, handler, scheme, extensions, fail):
+ run_validation(
+ handler, fail, Request(f'{scheme}://', extensions=extensions))
+
+ def test_invalid_request_type(self):
+ rh = self.ValidationRH(logger=FakeLogger())
+ for method in (rh.validate, rh.send):
+ with pytest.raises(TypeError, match='Expected an instance of Request'):
+ method('not a request')
+
+
+class FakeResponse(Response):
+ def __init__(self, request):
+ # XXX: we could make request part of standard response interface
+ self.request = request
+ super().__init__(fp=io.BytesIO(b''), headers={}, url=request.url)
+
+
+class FakeRH(RequestHandler):
+
+ def _validate(self, request):
+ return
+
+ def _send(self, request: Request):
+ if request.url.startswith('ssl://'):
+ raise SSLError(request.url[len('ssl://'):])
+ return FakeResponse(request)
+
+
+class FakeRHYDL(FakeYDL):
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self._request_director = self.build_request_director([FakeRH])
+
+
+class AllUnsupportedRHYDL(FakeYDL):
+
+ def __init__(self, *args, **kwargs):
+
+ class UnsupportedRH(RequestHandler):
+ def _send(self, request: Request):
+ pass
+
+ _SUPPORTED_FEATURES = ()
+ _SUPPORTED_PROXY_SCHEMES = ()
+ _SUPPORTED_URL_SCHEMES = ()
+
+ super().__init__(*args, **kwargs)
+ self._request_director = self.build_request_director([UnsupportedRH])
+
+
+class TestRequestDirector:
+
+ def test_handler_operations(self):
+ director = RequestDirector(logger=FakeLogger())
+ handler = FakeRH(logger=FakeLogger())
+ director.add_handler(handler)
+ assert director.handlers.get(FakeRH.RH_KEY) is handler
+
+ # Handler should overwrite
+ handler2 = FakeRH(logger=FakeLogger())
+ director.add_handler(handler2)
+ assert director.handlers.get(FakeRH.RH_KEY) is not handler
+ assert director.handlers.get(FakeRH.RH_KEY) is handler2
+ assert len(director.handlers) == 1
+
+ class AnotherFakeRH(FakeRH):
+ pass
+ director.add_handler(AnotherFakeRH(logger=FakeLogger()))
+ assert len(director.handlers) == 2
+ assert director.handlers.get(AnotherFakeRH.RH_KEY).RH_KEY == AnotherFakeRH.RH_KEY
+
+ director.handlers.pop(FakeRH.RH_KEY, None)
+ assert director.handlers.get(FakeRH.RH_KEY) is None
+ assert len(director.handlers) == 1
+
+ # RequestErrors should passthrough
+ with pytest.raises(SSLError):
+ director.send(Request('ssl://something'))
+
+ def test_send(self):
+ director = RequestDirector(logger=FakeLogger())
+ with pytest.raises(RequestError):
+ director.send(Request('any://'))
+ director.add_handler(FakeRH(logger=FakeLogger()))
+ assert isinstance(director.send(Request('http://')), FakeResponse)
+
+ def test_unsupported_handlers(self):
+ class SupportedRH(RequestHandler):
+ _SUPPORTED_URL_SCHEMES = ['http']
+
+ def _send(self, request: Request):
+ return Response(fp=io.BytesIO(b'supported'), headers={}, url=request.url)
+
+ director = RequestDirector(logger=FakeLogger())
+ director.add_handler(SupportedRH(logger=FakeLogger()))
+ director.add_handler(FakeRH(logger=FakeLogger()))
+
+ # First should take preference
+ assert director.send(Request('http://')).read() == b'supported'
+ assert director.send(Request('any://')).read() == b''
+
+ director.handlers.pop(FakeRH.RH_KEY)
+ with pytest.raises(NoSupportingHandlers):
+ director.send(Request('any://'))
+
+ def test_unexpected_error(self):
+ director = RequestDirector(logger=FakeLogger())
+
+ class UnexpectedRH(FakeRH):
+ def _send(self, request: Request):
+ raise TypeError('something')
+
+ director.add_handler(UnexpectedRH(logger=FakeLogger))
+ with pytest.raises(NoSupportingHandlers, match=r'1 unexpected error'):
+ director.send(Request('any://'))
+
+ director.handlers.clear()
+ assert len(director.handlers) == 0
+
+ # Should not be fatal
+ director.add_handler(FakeRH(logger=FakeLogger()))
+ director.add_handler(UnexpectedRH(logger=FakeLogger))
+ assert director.send(Request('any://'))
+
+ def test_preference(self):
+ director = RequestDirector(logger=FakeLogger())
+ director.add_handler(FakeRH(logger=FakeLogger()))
+
+ class SomeRH(RequestHandler):
+ _SUPPORTED_URL_SCHEMES = ['http']
+
+ def _send(self, request: Request):
+ return Response(fp=io.BytesIO(b'supported'), headers={}, url=request.url)
+
+ def some_preference(rh, request):
+ return (0 if not isinstance(rh, SomeRH)
+ else 100 if 'prefer' in request.headers
+ else -1)
+
+ director.add_handler(SomeRH(logger=FakeLogger()))
+ director.preferences.add(some_preference)
+
+ assert director.send(Request('http://')).read() == b''
+ assert director.send(Request('http://', headers={'prefer': '1'})).read() == b'supported'
+
+ def test_close(self, monkeypatch):
+ director = RequestDirector(logger=FakeLogger())
+ director.add_handler(FakeRH(logger=FakeLogger()))
+ called = False
+
+ def mock_close(*args, **kwargs):
+ nonlocal called
+ called = True
+
+ monkeypatch.setattr(director.handlers[FakeRH.RH_KEY], 'close', mock_close)
+ director.close()
+ assert called
+
+
+# XXX: do we want to move this to test_YoutubeDL.py?
+class TestYoutubeDLNetworking:
+
+ @staticmethod
+ def build_handler(ydl, handler: RequestHandler = FakeRH):
+ return ydl.build_request_director([handler]).handlers.get(handler.RH_KEY)
+
+ def test_compat_opener(self):
+ with FakeYDL() as ydl:
+ with warnings.catch_warnings():
+ warnings.simplefilter('ignore', category=DeprecationWarning)
+ assert isinstance(ydl._opener, urllib.request.OpenerDirector)
+
+ @pytest.mark.parametrize('proxy,expected', [
+ ('http://127.0.0.1:8080', {'all': 'http://127.0.0.1:8080'}),
+ ('', {'all': '__noproxy__'}),
+ (None, {'http': 'http://127.0.0.1:8081', 'https': 'http://127.0.0.1:8081'}) # env, set https
+ ])
+ def test_proxy(self, proxy, expected):
+ old_http_proxy = os.environ.get('HTTP_PROXY')
+ try:
+ os.environ['HTTP_PROXY'] = 'http://127.0.0.1:8081' # ensure that provided proxies override env
+ with FakeYDL({'proxy': proxy}) as ydl:
+ assert ydl.proxies == expected
+ finally:
+ if old_http_proxy:
+ os.environ['HTTP_PROXY'] = old_http_proxy
+
+ def test_compat_request(self):
+ with FakeRHYDL() as ydl:
+ assert ydl.urlopen('test://')
+ urllib_req = urllib.request.Request('http://foo.bar', data=b'test', method='PUT', headers={'X-Test': '1'})
+ urllib_req.add_unredirected_header('Cookie', 'bob=bob')
+ urllib_req.timeout = 2
+ with warnings.catch_warnings():
+ warnings.simplefilter('ignore', category=DeprecationWarning)
+ req = ydl.urlopen(urllib_req).request
+ assert req.url == urllib_req.get_full_url()
+ assert req.data == urllib_req.data
+ assert req.method == urllib_req.get_method()
+ assert 'X-Test' in req.headers
+ assert 'Cookie' in req.headers
+ assert req.extensions.get('timeout') == 2
+
+ with pytest.raises(AssertionError):
+ ydl.urlopen(None)
+
+ def test_extract_basic_auth(self):
+ with FakeRHYDL() as ydl:
+ res = ydl.urlopen(Request('http://user:pass@foo.bar'))
+ assert res.request.headers['Authorization'] == 'Basic dXNlcjpwYXNz'
+
+ def test_sanitize_url(self):
+ with FakeRHYDL() as ydl:
+ res = ydl.urlopen(Request('httpss://foo.bar'))
+ assert res.request.url == 'https://foo.bar'
+
+ def test_file_urls_error(self):
+ # use urllib handler
+ with FakeYDL() as ydl:
+ with pytest.raises(RequestError, match=r'file:// URLs are disabled by default'):
+ ydl.urlopen('file://')
+
+ @pytest.mark.parametrize('scheme', (['ws', 'wss']))
+ def test_websocket_unavailable_error(self, scheme):
+ with AllUnsupportedRHYDL() as ydl:
+ with pytest.raises(RequestError, match=r'This request requires WebSocket support'):
+ ydl.urlopen(f'{scheme}://')
+
+ def test_legacy_server_connect_error(self):
+ with FakeRHYDL() as ydl:
+ for error in ('UNSAFE_LEGACY_RENEGOTIATION_DISABLED', 'SSLV3_ALERT_HANDSHAKE_FAILURE'):
+ with pytest.raises(RequestError, match=r'Try using --legacy-server-connect'):
+ ydl.urlopen(f'ssl://{error}')
+
+ with pytest.raises(SSLError, match='testerror'):
+ ydl.urlopen('ssl://testerror')
+
+ @pytest.mark.parametrize('proxy_key,proxy_url,expected', [
+ ('http', '__noproxy__', None),
+ ('no', '127.0.0.1,foo.bar', '127.0.0.1,foo.bar'),
+ ('https', 'example.com', 'http://example.com'),
+ ('https', '//example.com', 'http://example.com'),
+ ('https', 'socks5://example.com', 'socks5h://example.com'),
+ ('http', 'socks://example.com', 'socks4://example.com'),
+ ('http', 'socks4://example.com', 'socks4://example.com'),
+ ('unrelated', '/bad/proxy', '/bad/proxy'), # clean_proxies should ignore bad proxies
+ ])
+ def test_clean_proxy(self, proxy_key, proxy_url, expected):
+ # proxies should be cleaned in urlopen()
+ with FakeRHYDL() as ydl:
+ req = ydl.urlopen(Request('test://', proxies={proxy_key: proxy_url})).request
+ assert req.proxies[proxy_key] == expected
+
+ # and should also be cleaned when building the handler
+ env_key = f'{proxy_key.upper()}_PROXY'
+ old_env_proxy = os.environ.get(env_key)
+ try:
+ os.environ[env_key] = proxy_url # ensure that provided proxies override env
+ with FakeYDL() as ydl:
+ rh = self.build_handler(ydl)
+ assert rh.proxies[proxy_key] == expected
+ finally:
+ if old_env_proxy:
+ os.environ[env_key] = old_env_proxy
+
+ def test_clean_proxy_header(self):
+ with FakeRHYDL() as ydl:
+ req = ydl.urlopen(Request('test://', headers={'ytdl-request-proxy': '//foo.bar'})).request
+ assert 'ytdl-request-proxy' not in req.headers
+ assert req.proxies == {'all': 'http://foo.bar'}
+
+ with FakeYDL({'http_headers': {'ytdl-request-proxy': '//foo.bar'}}) as ydl:
+ rh = self.build_handler(ydl)
+ assert 'ytdl-request-proxy' not in rh.headers
+ assert rh.proxies == {'all': 'http://foo.bar'}
+
+ def test_clean_header(self):
+ with FakeRHYDL() as ydl:
+ res = ydl.urlopen(Request('test://', headers={'Youtubedl-no-compression': True}))
+ assert 'Youtubedl-no-compression' not in res.request.headers
+ assert res.request.headers.get('Accept-Encoding') == 'identity'
+
+ with FakeYDL({'http_headers': {'Youtubedl-no-compression': True}}) as ydl:
+ rh = self.build_handler(ydl)
+ assert 'Youtubedl-no-compression' not in rh.headers
+ assert rh.headers.get('Accept-Encoding') == 'identity'
+
+ with FakeYDL({'http_headers': {'Ytdl-socks-proxy': 'socks://localhost:1080'}}) as ydl:
+ rh = self.build_handler(ydl)
+ assert 'Ytdl-socks-proxy' not in rh.headers
+
+ def test_build_handler_params(self):
+ with FakeYDL({
+ 'http_headers': {'test': 'testtest'},
+ 'socket_timeout': 2,
+ 'proxy': 'http://127.0.0.1:8080',
+ 'source_address': '127.0.0.45',
+ 'debug_printtraffic': True,
+ 'compat_opts': ['no-certifi'],
+ 'nocheckcertificate': True,
+ 'legacyserverconnect': True,
+ }) as ydl:
+ rh = self.build_handler(ydl)
+ assert rh.headers.get('test') == 'testtest'
+ assert 'Accept' in rh.headers # ensure std_headers are still there
+ assert rh.timeout == 2
+ assert rh.proxies.get('all') == 'http://127.0.0.1:8080'
+ assert rh.source_address == '127.0.0.45'
+ assert rh.verbose is True
+ assert rh.prefer_system_certs is True
+ assert rh.verify is False
+ assert rh.legacy_ssl_support is True
+
+ @pytest.mark.parametrize('ydl_params', [
+ {'client_certificate': 'fakecert.crt'},
+ {'client_certificate': 'fakecert.crt', 'client_certificate_key': 'fakekey.key'},
+ {'client_certificate': 'fakecert.crt', 'client_certificate_key': 'fakekey.key', 'client_certificate_password': 'foobar'},
+ {'client_certificate_key': 'fakekey.key', 'client_certificate_password': 'foobar'},
+ ])
+ def test_client_certificate(self, ydl_params):
+ with FakeYDL(ydl_params) as ydl:
+ rh = self.build_handler(ydl)
+ assert rh._client_cert == ydl_params # XXX: Too bound to implementation
+
+ def test_urllib_file_urls(self):
+ with FakeYDL({'enable_file_urls': False}) as ydl:
+ rh = self.build_handler(ydl, UrllibRH)
+ assert rh.enable_file_urls is False
+
+ with FakeYDL({'enable_file_urls': True}) as ydl:
+ rh = self.build_handler(ydl, UrllibRH)
+ assert rh.enable_file_urls is True
+
+ def test_compat_opt_prefer_urllib(self):
+ # This assumes urllib only has a preference when this compat opt is given
+ with FakeYDL({'compat_opts': ['prefer-legacy-http-handler']}) as ydl:
+ director = ydl.build_request_director([UrllibRH])
+ assert len(director.preferences) == 1
+ assert director.preferences.pop()(UrllibRH, None)
+
+
+class TestRequest:
+
+ def test_query(self):
+ req = Request('http://example.com?q=something', query={'v': 'xyz'})
+ assert req.url == 'http://example.com?q=something&v=xyz'
+
+ req.update(query={'v': '123'})
+ assert req.url == 'http://example.com?q=something&v=123'
+ req.update(url='http://example.com', query={'v': 'xyz'})
+ assert req.url == 'http://example.com?v=xyz'
+
+ def test_method(self):
+ req = Request('http://example.com')
+ assert req.method == 'GET'
+ req.data = b'test'
+ assert req.method == 'POST'
+ req.data = None
+ assert req.method == 'GET'
+ req.data = b'test2'
+ req.method = 'PUT'
+ assert req.method == 'PUT'
+ req.data = None
+ assert req.method == 'PUT'
+ with pytest.raises(TypeError):
+ req.method = 1
+
+ def test_request_helpers(self):
+ assert HEADRequest('http://example.com').method == 'HEAD'
+ assert PUTRequest('http://example.com').method == 'PUT'
+
+ def test_headers(self):
+ req = Request('http://example.com', headers={'tesT': 'test'})
+ assert req.headers == HTTPHeaderDict({'test': 'test'})
+ req.update(headers={'teSt2': 'test2'})
+ assert req.headers == HTTPHeaderDict({'test': 'test', 'test2': 'test2'})
+
+ req.headers = new_headers = HTTPHeaderDict({'test': 'test'})
+ assert req.headers == HTTPHeaderDict({'test': 'test'})
+ assert req.headers is new_headers
+
+ # test converts dict to case insensitive dict
+ req.headers = new_headers = {'test2': 'test2'}
+ assert isinstance(req.headers, HTTPHeaderDict)
+ assert req.headers is not new_headers
+
+ with pytest.raises(TypeError):
+ req.headers = None
+
+ def test_data_type(self):
+ req = Request('http://example.com')
+ assert req.data is None
+ # test bytes is allowed
+ req.data = b'test'
+ assert req.data == b'test'
+ # test iterable of bytes is allowed
+ i = [b'test', b'test2']
+ req.data = i
+ assert req.data == i
+
+ # test file-like object is allowed
+ f = io.BytesIO(b'test')
+ req.data = f
+ assert req.data == f
+
+ # common mistake: test str not allowed
+ with pytest.raises(TypeError):
+ req.data = 'test'
+ assert req.data != 'test'
+
+ # common mistake: test dict is not allowed
+ with pytest.raises(TypeError):
+ req.data = {'test': 'test'}
+ assert req.data != {'test': 'test'}
+
+ def test_content_length_header(self):
+ req = Request('http://example.com', headers={'Content-Length': '0'}, data=b'')
+ assert req.headers.get('Content-Length') == '0'
+
+ req.data = b'test'
+ assert 'Content-Length' not in req.headers
+
+ req = Request('http://example.com', headers={'Content-Length': '10'})
+ assert 'Content-Length' not in req.headers
+
+ def test_content_type_header(self):
+ req = Request('http://example.com', headers={'Content-Type': 'test'}, data=b'test')
+ assert req.headers.get('Content-Type') == 'test'
+ req.data = b'test2'
+ assert req.headers.get('Content-Type') == 'test'
+ req.data = None
+ assert 'Content-Type' not in req.headers
+ req.data = b'test3'
+ assert req.headers.get('Content-Type') == 'application/x-www-form-urlencoded'
+
+ def test_update_req(self):
+ req = Request('http://example.com')
+ assert req.data is None
+ assert req.method == 'GET'
+ assert 'Content-Type' not in req.headers
+ # Test that zero-byte payloads will be sent
+ req.update(data=b'')
+ assert req.data == b''
+ assert req.method == 'POST'
+ assert req.headers.get('Content-Type') == 'application/x-www-form-urlencoded'
+
+ def test_proxies(self):
+ req = Request(url='http://example.com', proxies={'http': 'http://127.0.0.1:8080'})
+ assert req.proxies == {'http': 'http://127.0.0.1:8080'}
+
+ def test_extensions(self):
+ req = Request(url='http://example.com', extensions={'timeout': 2})
+ assert req.extensions == {'timeout': 2}
+
+ def test_copy(self):
+ req = Request(
+ url='http://example.com',
+ extensions={'cookiejar': CookieJar()},
+ headers={'Accept-Encoding': 'br'},
+ proxies={'http': 'http://127.0.0.1'},
+ data=[b'123']
+ )
+ req_copy = req.copy()
+ assert req_copy is not req
+ assert req_copy.url == req.url
+ assert req_copy.headers == req.headers
+ assert req_copy.headers is not req.headers
+ assert req_copy.proxies == req.proxies
+ assert req_copy.proxies is not req.proxies
+
+ # Data is not able to be copied
+ assert req_copy.data == req.data
+ assert req_copy.data is req.data
+
+ # Shallow copy extensions
+ assert req_copy.extensions is not req.extensions
+ assert req_copy.extensions['cookiejar'] == req.extensions['cookiejar']
+
+ # Subclasses are copied by default
+ class AnotherRequest(Request):
+ pass
+
+ req = AnotherRequest(url='http://127.0.0.1')
+ assert isinstance(req.copy(), AnotherRequest)
+
+ def test_url(self):
+ req = Request(url='https://фtest.example.com/ some spaceв?ä=c',)
+ assert req.url == 'https://xn--test-z6d.example.com/%20some%20space%D0%B2?%C3%A4=c'
+
+ assert Request(url='//example.com').url == 'http://example.com'
+
+ with pytest.raises(TypeError):
+ Request(url='https://').url = None
+
+
+class TestResponse:
+
+ @pytest.mark.parametrize('reason,status,expected', [
+ ('custom', 200, 'custom'),
+ (None, 404, 'Not Found'), # fallback status
+ ('', 403, 'Forbidden'),
+ (None, 999, None)
+ ])
+ def test_reason(self, reason, status, expected):
+ res = Response(io.BytesIO(b''), url='test://', headers={}, status=status, reason=reason)
+ assert res.reason == expected
+
+ def test_headers(self):
+ headers = Message()
+ headers.add_header('Test', 'test')
+ headers.add_header('Test', 'test2')
+ headers.add_header('content-encoding', 'br')
+ res = Response(io.BytesIO(b''), headers=headers, url='test://')
+ assert res.headers.get_all('test') == ['test', 'test2']
+ assert 'Content-Encoding' in res.headers
+
+ def test_get_header(self):
+ headers = Message()
+ headers.add_header('Set-Cookie', 'cookie1')
+ headers.add_header('Set-cookie', 'cookie2')
+ headers.add_header('Test', 'test')
+ headers.add_header('Test', 'test2')
+ res = Response(io.BytesIO(b''), headers=headers, url='test://')
+ assert res.get_header('test') == 'test, test2'
+ assert res.get_header('set-Cookie') == 'cookie1'
+ assert res.get_header('notexist', 'default') == 'default'
+
+ def test_compat(self):
+ res = Response(io.BytesIO(b''), url='test://', status=404, headers={'test': 'test'})
+ with warnings.catch_warnings():
+ warnings.simplefilter('ignore', category=DeprecationWarning)
+ assert res.code == res.getcode() == res.status
+ assert res.geturl() == res.url
+ assert res.info() is res.headers
+ assert res.getheader('test') == res.get_header('test')
diff --git a/test/test_networking_utils.py b/test/test_networking_utils.py
new file mode 100644
index 0000000..b7b7143
--- /dev/null
+++ b/test/test_networking_utils.py
@@ -0,0 +1,208 @@
+#!/usr/bin/env python3
+
+# Allow direct execution
+import os
+import sys
+
+import pytest
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import io
+import random
+import ssl
+
+from yt_dlp.cookies import YoutubeDLCookieJar
+from yt_dlp.dependencies import certifi
+from yt_dlp.networking import Response
+from yt_dlp.networking._helper import (
+ InstanceStoreMixin,
+ add_accept_encoding_header,
+ get_redirect_method,
+ make_socks_proxy_opts,
+ select_proxy,
+ ssl_load_certs,
+)
+from yt_dlp.networking.exceptions import (
+ HTTPError,
+ IncompleteRead,
+)
+from yt_dlp.socks import ProxyType
+from yt_dlp.utils.networking import HTTPHeaderDict
+
+TEST_DIR = os.path.dirname(os.path.abspath(__file__))
+
+
+class TestNetworkingUtils:
+
+ def test_select_proxy(self):
+ proxies = {
+ 'all': 'socks5://example.com',
+ 'http': 'http://example.com:1080',
+ 'no': 'bypass.example.com,yt-dl.org'
+ }
+
+ assert select_proxy('https://example.com', proxies) == proxies['all']
+ assert select_proxy('http://example.com', proxies) == proxies['http']
+ assert select_proxy('http://bypass.example.com', proxies) is None
+ assert select_proxy('https://yt-dl.org', proxies) is None
+
+ @pytest.mark.parametrize('socks_proxy,expected', [
+ ('socks5h://example.com', {
+ 'proxytype': ProxyType.SOCKS5,
+ 'addr': 'example.com',
+ 'port': 1080,
+ 'rdns': True,
+ 'username': None,
+ 'password': None
+ }),
+ ('socks5://user:@example.com:5555', {
+ 'proxytype': ProxyType.SOCKS5,
+ 'addr': 'example.com',
+ 'port': 5555,
+ 'rdns': False,
+ 'username': 'user',
+ 'password': ''
+ }),
+ ('socks4://u%40ser:pa%20ss@127.0.0.1:1080', {
+ 'proxytype': ProxyType.SOCKS4,
+ 'addr': '127.0.0.1',
+ 'port': 1080,
+ 'rdns': False,
+ 'username': 'u@ser',
+ 'password': 'pa ss'
+ }),
+ ('socks4a://:pa%20ss@127.0.0.1', {
+ 'proxytype': ProxyType.SOCKS4A,
+ 'addr': '127.0.0.1',
+ 'port': 1080,
+ 'rdns': True,
+ 'username': '',
+ 'password': 'pa ss'
+ })
+ ])
+ def test_make_socks_proxy_opts(self, socks_proxy, expected):
+ assert make_socks_proxy_opts(socks_proxy) == expected
+
+ def test_make_socks_proxy_unknown(self):
+ with pytest.raises(ValueError, match='Unknown SOCKS proxy version: socks'):
+ make_socks_proxy_opts('socks://127.0.0.1')
+
+ @pytest.mark.skipif(not certifi, reason='certifi is not installed')
+ def test_load_certifi(self):
+ context_certifi = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
+ context_certifi.load_verify_locations(cafile=certifi.where())
+ context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
+ ssl_load_certs(context, use_certifi=True)
+ assert context.get_ca_certs() == context_certifi.get_ca_certs()
+
+ context_default = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
+ context_default.load_default_certs()
+ context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
+ ssl_load_certs(context, use_certifi=False)
+ assert context.get_ca_certs() == context_default.get_ca_certs()
+
+ if context_default.get_ca_certs() == context_certifi.get_ca_certs():
+ pytest.skip('System uses certifi as default. The test is not valid')
+
+ @pytest.mark.parametrize('method,status,expected', [
+ ('GET', 303, 'GET'),
+ ('HEAD', 303, 'HEAD'),
+ ('PUT', 303, 'GET'),
+ ('POST', 301, 'GET'),
+ ('HEAD', 301, 'HEAD'),
+ ('POST', 302, 'GET'),
+ ('HEAD', 302, 'HEAD'),
+ ('PUT', 302, 'PUT'),
+ ('POST', 308, 'POST'),
+ ('POST', 307, 'POST'),
+ ('HEAD', 308, 'HEAD'),
+ ('HEAD', 307, 'HEAD'),
+ ])
+ def test_get_redirect_method(self, method, status, expected):
+ assert get_redirect_method(method, status) == expected
+
+ @pytest.mark.parametrize('headers,supported_encodings,expected', [
+ ({'Accept-Encoding': 'br'}, ['gzip', 'br'], {'Accept-Encoding': 'br'}),
+ ({}, ['gzip', 'br'], {'Accept-Encoding': 'gzip, br'}),
+ ({'Content-type': 'application/json'}, [], {'Content-type': 'application/json', 'Accept-Encoding': 'identity'}),
+ ])
+ def test_add_accept_encoding_header(self, headers, supported_encodings, expected):
+ headers = HTTPHeaderDict(headers)
+ add_accept_encoding_header(headers, supported_encodings)
+ assert headers == HTTPHeaderDict(expected)
+
+
+class TestInstanceStoreMixin:
+
+ class FakeInstanceStoreMixin(InstanceStoreMixin):
+ def _create_instance(self, **kwargs):
+ return random.randint(0, 1000000)
+
+ def _close_instance(self, instance):
+ pass
+
+ def test_mixin(self):
+ mixin = self.FakeInstanceStoreMixin()
+ assert mixin._get_instance(d={'a': 1, 'b': 2, 'c': {'d', 4}}) == mixin._get_instance(d={'a': 1, 'b': 2, 'c': {'d', 4}})
+
+ assert mixin._get_instance(d={'a': 1, 'b': 2, 'c': {'e', 4}}) != mixin._get_instance(d={'a': 1, 'b': 2, 'c': {'d', 4}})
+
+ assert mixin._get_instance(d={'a': 1, 'b': 2, 'c': {'d', 4}} != mixin._get_instance(d={'a': 1, 'b': 2, 'g': {'d', 4}}))
+
+ assert mixin._get_instance(d={'a': 1}, e=[1, 2, 3]) == mixin._get_instance(d={'a': 1}, e=[1, 2, 3])
+
+ assert mixin._get_instance(d={'a': 1}, e=[1, 2, 3]) != mixin._get_instance(d={'a': 1}, e=[1, 2, 3, 4])
+
+ cookiejar = YoutubeDLCookieJar()
+ assert mixin._get_instance(b=[1, 2], c=cookiejar) == mixin._get_instance(b=[1, 2], c=cookiejar)
+
+ assert mixin._get_instance(b=[1, 2], c=cookiejar) != mixin._get_instance(b=[1, 2], c=YoutubeDLCookieJar())
+
+ # Different order
+ assert mixin._get_instance(c=cookiejar, b=[1, 2]) == mixin._get_instance(b=[1, 2], c=cookiejar)
+
+ m = mixin._get_instance(t=1234)
+ assert mixin._get_instance(t=1234) == m
+ mixin._clear_instances()
+ assert mixin._get_instance(t=1234) != m
+
+
+class TestNetworkingExceptions:
+
+ @staticmethod
+ def create_response(status):
+ return Response(fp=io.BytesIO(b'test'), url='http://example.com', headers={'tesT': 'test'}, status=status)
+
+ def test_http_error(self):
+
+ response = self.create_response(403)
+ error = HTTPError(response)
+
+ assert error.status == 403
+ assert str(error) == error.msg == 'HTTP Error 403: Forbidden'
+ assert error.reason == response.reason
+ assert error.response is response
+
+ data = error.response.read()
+ assert data == b'test'
+ assert repr(error) == '<HTTPError 403: Forbidden>'
+
+ def test_redirect_http_error(self):
+ response = self.create_response(301)
+ error = HTTPError(response, redirect_loop=True)
+ assert str(error) == error.msg == 'HTTP Error 301: Moved Permanently (redirect loop detected)'
+ assert error.reason == 'Moved Permanently'
+
+ def test_incomplete_read_error(self):
+ error = IncompleteRead(4, 3, cause='test')
+ assert isinstance(error, IncompleteRead)
+ assert repr(error) == '<IncompleteRead: 4 bytes read, 3 more expected>'
+ assert str(error) == error.msg == '4 bytes read, 3 more expected'
+ assert error.partial == 4
+ assert error.expected == 3
+ assert error.cause == 'test'
+
+ error = IncompleteRead(3)
+ assert repr(error) == '<IncompleteRead: 3 bytes read>'
+ assert str(error) == '3 bytes read'
diff --git a/test/test_overwrites.py b/test/test_overwrites.py
new file mode 100644
index 0000000..6954c07
--- /dev/null
+++ b/test/test_overwrites.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env python3
+
+# Allow direct execution
+import os
+import sys
+import unittest
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+import subprocess
+
+from test.helper import is_download_test, try_rm
+
+root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+download_file = os.path.join(root_dir, 'test.webm')
+
+
+@is_download_test
+class TestOverwrites(unittest.TestCase):
+ def setUp(self):
+ # create an empty file
+ open(download_file, 'a').close()
+
+ def test_default_overwrites(self):
+ outp = subprocess.Popen(
+ [
+ sys.executable, 'yt_dlp/__main__.py',
+ '-o', 'test.webm',
+ 'https://www.youtube.com/watch?v=jNQXAC9IVRw'
+ ], cwd=root_dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ sout, serr = outp.communicate()
+ self.assertTrue(b'has already been downloaded' in sout)
+ # if the file has no content, it has not been redownloaded
+ self.assertTrue(os.path.getsize(download_file) < 1)
+
+ def test_yes_overwrites(self):
+ outp = subprocess.Popen(
+ [
+ sys.executable, 'yt_dlp/__main__.py', '--yes-overwrites',
+ '-o', 'test.webm',
+ 'https://www.youtube.com/watch?v=jNQXAC9IVRw'
+ ], cwd=root_dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ sout, serr = outp.communicate()
+ self.assertTrue(b'has already been downloaded' not in sout)
+ # if the file has no content, it has not been redownloaded
+ self.assertTrue(os.path.getsize(download_file) > 1)
+
+ def tearDown(self):
+ try_rm(os.path.join(root_dir, 'test.webm'))
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_plugins.py b/test/test_plugins.py
new file mode 100644
index 0000000..6cde579
--- /dev/null
+++ b/test/test_plugins.py
@@ -0,0 +1,73 @@
+import importlib
+import os
+import shutil
+import sys
+import unittest
+from pathlib import Path
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+TEST_DATA_DIR = Path(os.path.dirname(os.path.abspath(__file__)), 'testdata')
+sys.path.append(str(TEST_DATA_DIR))
+importlib.invalidate_caches()
+
+from yt_dlp.plugins import PACKAGE_NAME, directories, load_plugins
+
+
+class TestPlugins(unittest.TestCase):
+
+ TEST_PLUGIN_DIR = TEST_DATA_DIR / PACKAGE_NAME
+
+ def test_directories_containing_plugins(self):
+ self.assertIn(self.TEST_PLUGIN_DIR, map(Path, directories()))
+
+ def test_extractor_classes(self):
+ for module_name in tuple(sys.modules):
+ if module_name.startswith(f'{PACKAGE_NAME}.extractor'):
+ del sys.modules[module_name]
+ plugins_ie = load_plugins('extractor', 'IE')
+
+ self.assertIn(f'{PACKAGE_NAME}.extractor.normal', sys.modules.keys())
+ self.assertIn('NormalPluginIE', plugins_ie.keys())
+
+ # don't load modules with underscore prefix
+ self.assertFalse(
+ f'{PACKAGE_NAME}.extractor._ignore' in sys.modules.keys(),
+ 'loaded module beginning with underscore')
+ self.assertNotIn('IgnorePluginIE', plugins_ie.keys())
+
+ # Don't load extractors with underscore prefix
+ self.assertNotIn('_IgnoreUnderscorePluginIE', plugins_ie.keys())
+
+ # Don't load extractors not specified in __all__ (if supplied)
+ self.assertNotIn('IgnoreNotInAllPluginIE', plugins_ie.keys())
+ self.assertIn('InAllPluginIE', plugins_ie.keys())
+
+ def test_postprocessor_classes(self):
+ plugins_pp = load_plugins('postprocessor', 'PP')
+ self.assertIn('NormalPluginPP', plugins_pp.keys())
+
+ def test_importing_zipped_module(self):
+ zip_path = TEST_DATA_DIR / 'zipped_plugins.zip'
+ shutil.make_archive(str(zip_path)[:-4], 'zip', str(zip_path)[:-4])
+ sys.path.append(str(zip_path)) # add zip to search paths
+ importlib.invalidate_caches() # reset the import caches
+
+ try:
+ for plugin_type in ('extractor', 'postprocessor'):
+ package = importlib.import_module(f'{PACKAGE_NAME}.{plugin_type}')
+ self.assertIn(zip_path / PACKAGE_NAME / plugin_type, map(Path, package.__path__))
+
+ plugins_ie = load_plugins('extractor', 'IE')
+ self.assertIn('ZippedPluginIE', plugins_ie.keys())
+
+ plugins_pp = load_plugins('postprocessor', 'PP')
+ self.assertIn('ZippedPluginPP', plugins_pp.keys())
+
+ finally:
+ sys.path.remove(str(zip_path))
+ os.remove(zip_path)
+ importlib.invalidate_caches() # reset the import caches
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_post_hooks.py b/test/test_post_hooks.py
new file mode 100644
index 0000000..3778d17
--- /dev/null
+++ b/test/test_post_hooks.py
@@ -0,0 +1,70 @@
+#!/usr/bin/env python3
+
+# Allow direct execution
+import os
+import sys
+import unittest
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+from test.helper import get_params, is_download_test, try_rm
+import yt_dlp.YoutubeDL # isort: split
+from yt_dlp.utils import DownloadError
+
+
+class YoutubeDL(yt_dlp.YoutubeDL):
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.to_stderr = self.to_screen
+
+
+TEST_ID = 'gr51aVj-mLg'
+EXPECTED_NAME = 'gr51aVj-mLg'
+
+
+@is_download_test
+class TestPostHooks(unittest.TestCase):
+ def setUp(self):
+ self.stored_name_1 = None
+ self.stored_name_2 = None
+ self.params = get_params({
+ 'skip_download': False,
+ 'writeinfojson': False,
+ 'quiet': True,
+ 'verbose': False,
+ 'cachedir': False,
+ })
+ self.files = []
+
+ def test_post_hooks(self):
+ self.params['post_hooks'] = [self.hook_one, self.hook_two]
+ ydl = YoutubeDL(self.params)
+ ydl.download([TEST_ID])
+ self.assertEqual(self.stored_name_1, EXPECTED_NAME, 'Not the expected name from hook 1')
+ self.assertEqual(self.stored_name_2, EXPECTED_NAME, 'Not the expected name from hook 2')
+
+ def test_post_hook_exception(self):
+ self.params['post_hooks'] = [self.hook_three]
+ ydl = YoutubeDL(self.params)
+ self.assertRaises(DownloadError, ydl.download, [TEST_ID])
+
+ def hook_one(self, filename):
+ self.stored_name_1, _ = os.path.splitext(os.path.basename(filename))
+ self.files.append(filename)
+
+ def hook_two(self, filename):
+ self.stored_name_2, _ = os.path.splitext(os.path.basename(filename))
+ self.files.append(filename)
+
+ def hook_three(self, filename):
+ self.files.append(filename)
+ raise Exception('Test exception for \'%s\'' % filename)
+
+ def tearDown(self):
+ for f in self.files:
+ try_rm(f)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_postprocessors.py b/test/test_postprocessors.py
new file mode 100644
index 0000000..52e5587
--- /dev/null
+++ b/test/test_postprocessors.py
@@ -0,0 +1,579 @@
+#!/usr/bin/env python3
+
+# Allow direct execution
+import os
+import sys
+import unittest
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+from yt_dlp import YoutubeDL
+from yt_dlp.compat import compat_shlex_quote
+from yt_dlp.postprocessor import (
+ ExecPP,
+ FFmpegThumbnailsConvertorPP,
+ MetadataFromFieldPP,
+ MetadataParserPP,
+ ModifyChaptersPP,
+ SponsorBlockPP,
+)
+
+
+class TestMetadataFromField(unittest.TestCase):
+
+ def test_format_to_regex(self):
+ self.assertEqual(
+ MetadataParserPP.format_to_regex('%(title)s - %(artist)s'),
+ r'(?P<title>.+)\ \-\ (?P<artist>.+)')
+ self.assertEqual(MetadataParserPP.format_to_regex(r'(?P<x>.+)'), r'(?P<x>.+)')
+
+ def test_field_to_template(self):
+ self.assertEqual(MetadataParserPP.field_to_template('title'), '%(title)s')
+ self.assertEqual(MetadataParserPP.field_to_template('1'), '1')
+ self.assertEqual(MetadataParserPP.field_to_template('foo bar'), 'foo bar')
+ self.assertEqual(MetadataParserPP.field_to_template(' literal'), ' literal')
+
+ def test_metadatafromfield(self):
+ self.assertEqual(
+ MetadataFromFieldPP.to_action('%(title)s \\: %(artist)s:%(title)s : %(artist)s'),
+ (MetadataParserPP.Actions.INTERPRET, '%(title)s : %(artist)s', '%(title)s : %(artist)s'))
+
+
+class TestConvertThumbnail(unittest.TestCase):
+ def test_escaping(self):
+ pp = FFmpegThumbnailsConvertorPP()
+ if not pp.available:
+ print('Skipping: ffmpeg not found')
+ return
+
+ file = 'test/testdata/thumbnails/foo %d bar/foo_%d.{}'
+ tests = (('webp', 'png'), ('png', 'jpg'))
+
+ for inp, out in tests:
+ out_file = file.format(out)
+ if os.path.exists(out_file):
+ os.remove(out_file)
+ pp.convert_thumbnail(file.format(inp), out)
+ assert os.path.exists(out_file)
+
+ for _, out in tests:
+ os.remove(file.format(out))
+
+
+class TestExec(unittest.TestCase):
+ def test_parse_cmd(self):
+ pp = ExecPP(YoutubeDL(), '')
+ info = {'filepath': 'file name'}
+ cmd = 'echo %s' % compat_shlex_quote(info['filepath'])
+
+ self.assertEqual(pp.parse_cmd('echo', info), cmd)
+ self.assertEqual(pp.parse_cmd('echo {}', info), cmd)
+ self.assertEqual(pp.parse_cmd('echo %(filepath)q', info), cmd)
+
+
+class TestModifyChaptersPP(unittest.TestCase):
+ def setUp(self):
+ self._pp = ModifyChaptersPP(YoutubeDL())
+
+ @staticmethod
+ def _sponsor_chapter(start, end, cat, remove=False, title=None):
+ if title is None:
+ title = SponsorBlockPP.CATEGORIES[cat]
+ return {
+ 'start_time': start,
+ 'end_time': end,
+ '_categories': [(cat, start, end, title)],
+ **({'remove': True} if remove else {}),
+ }
+
+ @staticmethod
+ def _chapter(start, end, title=None, remove=False):
+ c = {'start_time': start, 'end_time': end}
+ if title is not None:
+ c['title'] = title
+ if remove:
+ c['remove'] = True
+ return c
+
+ def _chapters(self, ends, titles):
+ self.assertEqual(len(ends), len(titles))
+ start = 0
+ chapters = []
+ for e, t in zip(ends, titles):
+ chapters.append(self._chapter(start, e, t))
+ start = e
+ return chapters
+
+ def _remove_marked_arrange_sponsors_test_impl(
+ self, chapters, expected_chapters, expected_removed):
+ actual_chapters, actual_removed = (
+ self._pp._remove_marked_arrange_sponsors(chapters))
+ for c in actual_removed:
+ c.pop('title', None)
+ c.pop('_categories', None)
+ actual_chapters = [{
+ 'start_time': c['start_time'],
+ 'end_time': c['end_time'],
+ 'title': c['title'],
+ } for c in actual_chapters]
+ self.assertSequenceEqual(expected_chapters, actual_chapters)
+ self.assertSequenceEqual(expected_removed, actual_removed)
+
+ def test_remove_marked_arrange_sponsors_CanGetThroughUnaltered(self):
+ chapters = self._chapters([10, 20, 30, 40], ['c1', 'c2', 'c3', 'c4'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, chapters, [])
+
+ def test_remove_marked_arrange_sponsors_ChapterWithSponsors(self):
+ chapters = self._chapters([70], ['c']) + [
+ self._sponsor_chapter(10, 20, 'sponsor'),
+ self._sponsor_chapter(30, 40, 'preview'),
+ self._sponsor_chapter(50, 60, 'filler')]
+ expected = self._chapters(
+ [10, 20, 30, 40, 50, 60, 70],
+ ['c', '[SponsorBlock]: Sponsor', 'c', '[SponsorBlock]: Preview/Recap',
+ 'c', '[SponsorBlock]: Filler Tangent', 'c'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, [])
+
+ def test_remove_marked_arrange_sponsors_SponsorBlockChapters(self):
+ chapters = self._chapters([70], ['c']) + [
+ self._sponsor_chapter(10, 20, 'chapter', title='sb c1'),
+ self._sponsor_chapter(15, 16, 'chapter', title='sb c2'),
+ self._sponsor_chapter(30, 40, 'preview'),
+ self._sponsor_chapter(50, 60, 'filler')]
+ expected = self._chapters(
+ [10, 15, 16, 20, 30, 40, 50, 60, 70],
+ ['c', '[SponsorBlock]: sb c1', '[SponsorBlock]: sb c1, sb c2', '[SponsorBlock]: sb c1',
+ 'c', '[SponsorBlock]: Preview/Recap',
+ 'c', '[SponsorBlock]: Filler Tangent', 'c'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, [])
+
+ def test_remove_marked_arrange_sponsors_UniqueNamesForOverlappingSponsors(self):
+ chapters = self._chapters([120], ['c']) + [
+ self._sponsor_chapter(10, 45, 'sponsor'), self._sponsor_chapter(20, 40, 'selfpromo'),
+ self._sponsor_chapter(50, 70, 'sponsor'), self._sponsor_chapter(60, 85, 'selfpromo'),
+ self._sponsor_chapter(90, 120, 'selfpromo'), self._sponsor_chapter(100, 110, 'sponsor')]
+ expected = self._chapters(
+ [10, 20, 40, 45, 50, 60, 70, 85, 90, 100, 110, 120],
+ ['c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Sponsor, Unpaid/Self Promotion',
+ '[SponsorBlock]: Sponsor',
+ 'c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Sponsor, Unpaid/Self Promotion',
+ '[SponsorBlock]: Unpaid/Self Promotion',
+ 'c', '[SponsorBlock]: Unpaid/Self Promotion', '[SponsorBlock]: Unpaid/Self Promotion, Sponsor',
+ '[SponsorBlock]: Unpaid/Self Promotion'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, [])
+
+ def test_remove_marked_arrange_sponsors_ChapterWithCuts(self):
+ cuts = [self._chapter(10, 20, remove=True),
+ self._sponsor_chapter(30, 40, 'sponsor', remove=True),
+ self._chapter(50, 60, remove=True)]
+ chapters = self._chapters([70], ['c']) + cuts
+ self._remove_marked_arrange_sponsors_test_impl(
+ chapters, self._chapters([40], ['c']), cuts)
+
+ def test_remove_marked_arrange_sponsors_ChapterWithSponsorsAndCuts(self):
+ chapters = self._chapters([70], ['c']) + [
+ self._sponsor_chapter(10, 20, 'sponsor'),
+ self._sponsor_chapter(30, 40, 'selfpromo', remove=True),
+ self._sponsor_chapter(50, 60, 'interaction')]
+ expected = self._chapters([10, 20, 40, 50, 60],
+ ['c', '[SponsorBlock]: Sponsor', 'c',
+ '[SponsorBlock]: Interaction Reminder', 'c'])
+ self._remove_marked_arrange_sponsors_test_impl(
+ chapters, expected, [self._chapter(30, 40, remove=True)])
+
+ def test_remove_marked_arrange_sponsors_ChapterWithSponsorCutInTheMiddle(self):
+ cuts = [self._sponsor_chapter(20, 30, 'selfpromo', remove=True),
+ self._chapter(40, 50, remove=True)]
+ chapters = self._chapters([70], ['c']) + [self._sponsor_chapter(10, 60, 'sponsor')] + cuts
+ expected = self._chapters(
+ [10, 40, 50], ['c', '[SponsorBlock]: Sponsor', 'c'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts)
+
+ def test_remove_marked_arrange_sponsors_ChapterWithCutHidingSponsor(self):
+ cuts = [self._sponsor_chapter(20, 50, 'selfpromo', remove=True)]
+ chapters = self._chapters([60], ['c']) + [
+ self._sponsor_chapter(10, 20, 'intro'),
+ self._sponsor_chapter(30, 40, 'sponsor'),
+ self._sponsor_chapter(50, 60, 'outro'),
+ ] + cuts
+ expected = self._chapters(
+ [10, 20, 30], ['c', '[SponsorBlock]: Intermission/Intro Animation', '[SponsorBlock]: Endcards/Credits'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts)
+
+ def test_remove_marked_arrange_sponsors_ChapterWithAdjacentSponsors(self):
+ chapters = self._chapters([70], ['c']) + [
+ self._sponsor_chapter(10, 20, 'sponsor'),
+ self._sponsor_chapter(20, 30, 'selfpromo'),
+ self._sponsor_chapter(30, 40, 'interaction')]
+ expected = self._chapters(
+ [10, 20, 30, 40, 70],
+ ['c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Unpaid/Self Promotion',
+ '[SponsorBlock]: Interaction Reminder', 'c'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, [])
+
+ def test_remove_marked_arrange_sponsors_ChapterWithAdjacentCuts(self):
+ chapters = self._chapters([70], ['c']) + [
+ self._sponsor_chapter(10, 20, 'sponsor'),
+ self._sponsor_chapter(20, 30, 'interaction', remove=True),
+ self._chapter(30, 40, remove=True),
+ self._sponsor_chapter(40, 50, 'selfpromo', remove=True),
+ self._sponsor_chapter(50, 60, 'interaction')]
+ expected = self._chapters([10, 20, 30, 40],
+ ['c', '[SponsorBlock]: Sponsor',
+ '[SponsorBlock]: Interaction Reminder', 'c'])
+ self._remove_marked_arrange_sponsors_test_impl(
+ chapters, expected, [self._chapter(20, 50, remove=True)])
+
+ def test_remove_marked_arrange_sponsors_ChapterWithOverlappingSponsors(self):
+ chapters = self._chapters([70], ['c']) + [
+ self._sponsor_chapter(10, 30, 'sponsor'),
+ self._sponsor_chapter(20, 50, 'selfpromo'),
+ self._sponsor_chapter(40, 60, 'interaction')]
+ expected = self._chapters(
+ [10, 20, 30, 40, 50, 60, 70],
+ ['c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Sponsor, Unpaid/Self Promotion',
+ '[SponsorBlock]: Unpaid/Self Promotion', '[SponsorBlock]: Unpaid/Self Promotion, Interaction Reminder',
+ '[SponsorBlock]: Interaction Reminder', 'c'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, [])
+
+ def test_remove_marked_arrange_sponsors_ChapterWithOverlappingCuts(self):
+ chapters = self._chapters([70], ['c']) + [
+ self._sponsor_chapter(10, 30, 'sponsor', remove=True),
+ self._sponsor_chapter(20, 50, 'selfpromo', remove=True),
+ self._sponsor_chapter(40, 60, 'interaction', remove=True)]
+ self._remove_marked_arrange_sponsors_test_impl(
+ chapters, self._chapters([20], ['c']), [self._chapter(10, 60, remove=True)])
+
+ def test_remove_marked_arrange_sponsors_ChapterWithRunsOfOverlappingSponsors(self):
+ chapters = self._chapters([170], ['c']) + [
+ self._sponsor_chapter(0, 30, 'intro'),
+ self._sponsor_chapter(20, 50, 'sponsor'),
+ self._sponsor_chapter(40, 60, 'selfpromo'),
+ self._sponsor_chapter(70, 90, 'sponsor'),
+ self._sponsor_chapter(80, 100, 'sponsor'),
+ self._sponsor_chapter(90, 110, 'sponsor'),
+ self._sponsor_chapter(120, 140, 'selfpromo'),
+ self._sponsor_chapter(130, 160, 'interaction'),
+ self._sponsor_chapter(150, 170, 'outro')]
+ expected = self._chapters(
+ [20, 30, 40, 50, 60, 70, 110, 120, 130, 140, 150, 160, 170],
+ ['[SponsorBlock]: Intermission/Intro Animation', '[SponsorBlock]: Intermission/Intro Animation, Sponsor', '[SponsorBlock]: Sponsor',
+ '[SponsorBlock]: Sponsor, Unpaid/Self Promotion', '[SponsorBlock]: Unpaid/Self Promotion', 'c',
+ '[SponsorBlock]: Sponsor', 'c', '[SponsorBlock]: Unpaid/Self Promotion',
+ '[SponsorBlock]: Unpaid/Self Promotion, Interaction Reminder',
+ '[SponsorBlock]: Interaction Reminder',
+ '[SponsorBlock]: Interaction Reminder, Endcards/Credits', '[SponsorBlock]: Endcards/Credits'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, [])
+
+ def test_remove_marked_arrange_sponsors_ChapterWithRunsOfOverlappingCuts(self):
+ chapters = self._chapters([170], ['c']) + [
+ self._chapter(0, 30, remove=True),
+ self._sponsor_chapter(20, 50, 'sponsor', remove=True),
+ self._chapter(40, 60, remove=True),
+ self._sponsor_chapter(70, 90, 'sponsor', remove=True),
+ self._chapter(80, 100, remove=True),
+ self._chapter(90, 110, remove=True),
+ self._sponsor_chapter(120, 140, 'sponsor', remove=True),
+ self._sponsor_chapter(130, 160, 'selfpromo', remove=True),
+ self._chapter(150, 170, remove=True)]
+ expected_cuts = [self._chapter(0, 60, remove=True),
+ self._chapter(70, 110, remove=True),
+ self._chapter(120, 170, remove=True)]
+ self._remove_marked_arrange_sponsors_test_impl(
+ chapters, self._chapters([20], ['c']), expected_cuts)
+
+ def test_remove_marked_arrange_sponsors_OverlappingSponsorsDifferentTitlesAfterCut(self):
+ chapters = self._chapters([60], ['c']) + [
+ self._sponsor_chapter(10, 60, 'sponsor'),
+ self._sponsor_chapter(10, 40, 'intro'),
+ self._sponsor_chapter(30, 50, 'interaction'),
+ self._sponsor_chapter(30, 50, 'selfpromo', remove=True),
+ self._sponsor_chapter(40, 50, 'interaction'),
+ self._sponsor_chapter(50, 60, 'outro')]
+ expected = self._chapters(
+ [10, 30, 40], ['c', '[SponsorBlock]: Sponsor, Intermission/Intro Animation', '[SponsorBlock]: Sponsor, Endcards/Credits'])
+ self._remove_marked_arrange_sponsors_test_impl(
+ chapters, expected, [self._chapter(30, 50, remove=True)])
+
+ def test_remove_marked_arrange_sponsors_SponsorsNoLongerOverlapAfterCut(self):
+ chapters = self._chapters([70], ['c']) + [
+ self._sponsor_chapter(10, 30, 'sponsor'),
+ self._sponsor_chapter(20, 50, 'interaction'),
+ self._sponsor_chapter(30, 50, 'selfpromo', remove=True),
+ self._sponsor_chapter(40, 60, 'sponsor'),
+ self._sponsor_chapter(50, 60, 'interaction')]
+ expected = self._chapters(
+ [10, 20, 40, 50], ['c', '[SponsorBlock]: Sponsor',
+ '[SponsorBlock]: Sponsor, Interaction Reminder', 'c'])
+ self._remove_marked_arrange_sponsors_test_impl(
+ chapters, expected, [self._chapter(30, 50, remove=True)])
+
+ def test_remove_marked_arrange_sponsors_SponsorsStillOverlapAfterCut(self):
+ chapters = self._chapters([70], ['c']) + [
+ self._sponsor_chapter(10, 60, 'sponsor'),
+ self._sponsor_chapter(20, 60, 'interaction'),
+ self._sponsor_chapter(30, 50, 'selfpromo', remove=True)]
+ expected = self._chapters(
+ [10, 20, 40, 50], ['c', '[SponsorBlock]: Sponsor',
+ '[SponsorBlock]: Sponsor, Interaction Reminder', 'c'])
+ self._remove_marked_arrange_sponsors_test_impl(
+ chapters, expected, [self._chapter(30, 50, remove=True)])
+
+ def test_remove_marked_arrange_sponsors_ChapterWithRunsOfOverlappingSponsorsAndCuts(self):
+ chapters = self._chapters([200], ['c']) + [
+ self._sponsor_chapter(10, 40, 'sponsor'),
+ self._sponsor_chapter(10, 30, 'intro'),
+ self._chapter(20, 30, remove=True),
+ self._sponsor_chapter(30, 40, 'selfpromo'),
+ self._sponsor_chapter(50, 70, 'sponsor'),
+ self._sponsor_chapter(60, 80, 'interaction'),
+ self._chapter(70, 80, remove=True),
+ self._sponsor_chapter(70, 90, 'sponsor'),
+ self._sponsor_chapter(80, 100, 'interaction'),
+ self._sponsor_chapter(120, 170, 'selfpromo'),
+ self._sponsor_chapter(130, 180, 'outro'),
+ self._chapter(140, 150, remove=True),
+ self._chapter(150, 160, remove=True)]
+ expected = self._chapters(
+ [10, 20, 30, 40, 50, 70, 80, 100, 110, 130, 140, 160],
+ ['c', '[SponsorBlock]: Sponsor, Intermission/Intro Animation', '[SponsorBlock]: Sponsor, Unpaid/Self Promotion',
+ 'c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Sponsor, Interaction Reminder',
+ '[SponsorBlock]: Interaction Reminder', 'c', '[SponsorBlock]: Unpaid/Self Promotion',
+ '[SponsorBlock]: Unpaid/Self Promotion, Endcards/Credits', '[SponsorBlock]: Endcards/Credits', 'c'])
+ expected_cuts = [self._chapter(20, 30, remove=True),
+ self._chapter(70, 80, remove=True),
+ self._chapter(140, 160, remove=True)]
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, expected_cuts)
+
+ def test_remove_marked_arrange_sponsors_SponsorOverlapsMultipleChapters(self):
+ chapters = (self._chapters([20, 40, 60, 80, 100], ['c1', 'c2', 'c3', 'c4', 'c5'])
+ + [self._sponsor_chapter(10, 90, 'sponsor')])
+ expected = self._chapters([10, 90, 100], ['c1', '[SponsorBlock]: Sponsor', 'c5'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, [])
+
+ def test_remove_marked_arrange_sponsors_CutOverlapsMultipleChapters(self):
+ cuts = [self._chapter(10, 90, remove=True)]
+ chapters = self._chapters([20, 40, 60, 80, 100], ['c1', 'c2', 'c3', 'c4', 'c5']) + cuts
+ expected = self._chapters([10, 20], ['c1', 'c5'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts)
+
+ def test_remove_marked_arrange_sponsors_SponsorsWithinSomeChaptersAndOverlappingOthers(self):
+ chapters = (self._chapters([10, 40, 60, 80], ['c1', 'c2', 'c3', 'c4'])
+ + [self._sponsor_chapter(20, 30, 'sponsor'),
+ self._sponsor_chapter(50, 70, 'selfpromo')])
+ expected = self._chapters([10, 20, 30, 40, 50, 70, 80],
+ ['c1', 'c2', '[SponsorBlock]: Sponsor', 'c2', 'c3',
+ '[SponsorBlock]: Unpaid/Self Promotion', 'c4'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, [])
+
+ def test_remove_marked_arrange_sponsors_CutsWithinSomeChaptersAndOverlappingOthers(self):
+ cuts = [self._chapter(20, 30, remove=True), self._chapter(50, 70, remove=True)]
+ chapters = self._chapters([10, 40, 60, 80], ['c1', 'c2', 'c3', 'c4']) + cuts
+ expected = self._chapters([10, 30, 40, 50], ['c1', 'c2', 'c3', 'c4'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts)
+
+ def test_remove_marked_arrange_sponsors_ChaptersAfterLastSponsor(self):
+ chapters = (self._chapters([20, 40, 50, 60], ['c1', 'c2', 'c3', 'c4'])
+ + [self._sponsor_chapter(10, 30, 'music_offtopic')])
+ expected = self._chapters(
+ [10, 30, 40, 50, 60],
+ ['c1', '[SponsorBlock]: Non-Music Section', 'c2', 'c3', 'c4'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, [])
+
+ def test_remove_marked_arrange_sponsors_ChaptersAfterLastCut(self):
+ cuts = [self._chapter(10, 30, remove=True)]
+ chapters = self._chapters([20, 40, 50, 60], ['c1', 'c2', 'c3', 'c4']) + cuts
+ expected = self._chapters([10, 20, 30, 40], ['c1', 'c2', 'c3', 'c4'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts)
+
+ def test_remove_marked_arrange_sponsors_SponsorStartsAtChapterStart(self):
+ chapters = (self._chapters([10, 20, 40], ['c1', 'c2', 'c3'])
+ + [self._sponsor_chapter(20, 30, 'sponsor')])
+ expected = self._chapters([10, 20, 30, 40], ['c1', 'c2', '[SponsorBlock]: Sponsor', 'c3'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, [])
+
+ def test_remove_marked_arrange_sponsors_CutStartsAtChapterStart(self):
+ cuts = [self._chapter(20, 30, remove=True)]
+ chapters = self._chapters([10, 20, 40], ['c1', 'c2', 'c3']) + cuts
+ expected = self._chapters([10, 20, 30], ['c1', 'c2', 'c3'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts)
+
+ def test_remove_marked_arrange_sponsors_SponsorEndsAtChapterEnd(self):
+ chapters = (self._chapters([10, 30, 40], ['c1', 'c2', 'c3'])
+ + [self._sponsor_chapter(20, 30, 'sponsor')])
+ expected = self._chapters([10, 20, 30, 40], ['c1', 'c2', '[SponsorBlock]: Sponsor', 'c3'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, [])
+
+ def test_remove_marked_arrange_sponsors_CutEndsAtChapterEnd(self):
+ cuts = [self._chapter(20, 30, remove=True)]
+ chapters = self._chapters([10, 30, 40], ['c1', 'c2', 'c3']) + cuts
+ expected = self._chapters([10, 20, 30], ['c1', 'c2', 'c3'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts)
+
+ def test_remove_marked_arrange_sponsors_SponsorCoincidesWithChapters(self):
+ chapters = (self._chapters([10, 20, 30, 40], ['c1', 'c2', 'c3', 'c4'])
+ + [self._sponsor_chapter(10, 30, 'sponsor')])
+ expected = self._chapters([10, 30, 40], ['c1', '[SponsorBlock]: Sponsor', 'c4'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, [])
+
+ def test_remove_marked_arrange_sponsors_CutCoincidesWithChapters(self):
+ cuts = [self._chapter(10, 30, remove=True)]
+ chapters = self._chapters([10, 20, 30, 40], ['c1', 'c2', 'c3', 'c4']) + cuts
+ expected = self._chapters([10, 20], ['c1', 'c4'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts)
+
+ def test_remove_marked_arrange_sponsors_SponsorsAtVideoBoundaries(self):
+ chapters = (self._chapters([20, 40, 60], ['c1', 'c2', 'c3'])
+ + [self._sponsor_chapter(0, 10, 'intro'), self._sponsor_chapter(50, 60, 'outro')])
+ expected = self._chapters(
+ [10, 20, 40, 50, 60], ['[SponsorBlock]: Intermission/Intro Animation', 'c1', 'c2', 'c3', '[SponsorBlock]: Endcards/Credits'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, [])
+
+ def test_remove_marked_arrange_sponsors_CutsAtVideoBoundaries(self):
+ cuts = [self._chapter(0, 10, remove=True), self._chapter(50, 60, remove=True)]
+ chapters = self._chapters([20, 40, 60], ['c1', 'c2', 'c3']) + cuts
+ expected = self._chapters([10, 30, 40], ['c1', 'c2', 'c3'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts)
+
+ def test_remove_marked_arrange_sponsors_SponsorsOverlapChaptersAtVideoBoundaries(self):
+ chapters = (self._chapters([10, 40, 50], ['c1', 'c2', 'c3'])
+ + [self._sponsor_chapter(0, 20, 'intro'), self._sponsor_chapter(30, 50, 'outro')])
+ expected = self._chapters(
+ [20, 30, 50], ['[SponsorBlock]: Intermission/Intro Animation', 'c2', '[SponsorBlock]: Endcards/Credits'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, [])
+
+ def test_remove_marked_arrange_sponsors_CutsOverlapChaptersAtVideoBoundaries(self):
+ cuts = [self._chapter(0, 20, remove=True), self._chapter(30, 50, remove=True)]
+ chapters = self._chapters([10, 40, 50], ['c1', 'c2', 'c3']) + cuts
+ expected = self._chapters([10], ['c2'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts)
+
+ def test_remove_marked_arrange_sponsors_EverythingSponsored(self):
+ chapters = (self._chapters([10, 20, 30, 40], ['c1', 'c2', 'c3', 'c4'])
+ + [self._sponsor_chapter(0, 20, 'intro'), self._sponsor_chapter(20, 40, 'outro')])
+ expected = self._chapters([20, 40], ['[SponsorBlock]: Intermission/Intro Animation', '[SponsorBlock]: Endcards/Credits'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, [])
+
+ def test_remove_marked_arrange_sponsors_EverythingCut(self):
+ cuts = [self._chapter(0, 20, remove=True), self._chapter(20, 40, remove=True)]
+ chapters = self._chapters([10, 20, 30, 40], ['c1', 'c2', 'c3', 'c4']) + cuts
+ self._remove_marked_arrange_sponsors_test_impl(
+ chapters, [], [self._chapter(0, 40, remove=True)])
+
+ def test_remove_marked_arrange_sponsors_TinyChaptersInTheOriginalArePreserved(self):
+ chapters = self._chapters([0.1, 0.2, 0.3, 0.4], ['c1', 'c2', 'c3', 'c4'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, chapters, [])
+
+ def test_remove_marked_arrange_sponsors_TinySponsorsAreIgnored(self):
+ chapters = [self._sponsor_chapter(0, 0.1, 'intro'), self._chapter(0.1, 0.2, 'c1'),
+ self._sponsor_chapter(0.2, 0.3, 'sponsor'), self._chapter(0.3, 0.4, 'c2'),
+ self._sponsor_chapter(0.4, 0.5, 'outro')]
+ self._remove_marked_arrange_sponsors_test_impl(
+ chapters, self._chapters([0.3, 0.5], ['c1', 'c2']), [])
+
+ def test_remove_marked_arrange_sponsors_TinyChaptersResultingFromCutsAreIgnored(self):
+ cuts = [self._chapter(1.5, 2.5, remove=True)]
+ chapters = self._chapters([2, 3, 3.5], ['c1', 'c2', 'c3']) + cuts
+ self._remove_marked_arrange_sponsors_test_impl(
+ chapters, self._chapters([2, 2.5], ['c1', 'c3']), cuts)
+
+ def test_remove_marked_arrange_sponsors_SingleTinyChapterIsPreserved(self):
+ cuts = [self._chapter(0.5, 2, remove=True)]
+ chapters = self._chapters([2], ['c']) + cuts
+ self._remove_marked_arrange_sponsors_test_impl(
+ chapters, self._chapters([0.5], ['c']), cuts)
+
+ def test_remove_marked_arrange_sponsors_TinyChapterAtTheStartPrependedToTheNext(self):
+ cuts = [self._chapter(0.5, 2, remove=True)]
+ chapters = self._chapters([2, 4], ['c1', 'c2']) + cuts
+ self._remove_marked_arrange_sponsors_test_impl(
+ chapters, self._chapters([2.5], ['c2']), cuts)
+
+ def test_remove_marked_arrange_sponsors_TinyChaptersResultingFromSponsorOverlapAreIgnored(self):
+ chapters = self._chapters([1, 3, 4], ['c1', 'c2', 'c3']) + [
+ self._sponsor_chapter(1.5, 2.5, 'sponsor')]
+ self._remove_marked_arrange_sponsors_test_impl(
+ chapters, self._chapters([1.5, 2.5, 4], ['c1', '[SponsorBlock]: Sponsor', 'c3']), [])
+
+ def test_remove_marked_arrange_sponsors_TinySponsorsOverlapsAreIgnored(self):
+ chapters = self._chapters([2, 3, 5], ['c1', 'c2', 'c3']) + [
+ self._sponsor_chapter(1, 3, 'sponsor'),
+ self._sponsor_chapter(2.5, 4, 'selfpromo')
+ ]
+ self._remove_marked_arrange_sponsors_test_impl(
+ chapters, self._chapters([1, 3, 4, 5], [
+ 'c1', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Unpaid/Self Promotion', 'c3']), [])
+
+ def test_remove_marked_arrange_sponsors_TinySponsorsPrependedToTheNextSponsor(self):
+ chapters = self._chapters([4], ['c']) + [
+ self._sponsor_chapter(1.5, 2, 'sponsor'),
+ self._sponsor_chapter(2, 4, 'selfpromo')
+ ]
+ self._remove_marked_arrange_sponsors_test_impl(
+ chapters, self._chapters([1.5, 4], ['c', '[SponsorBlock]: Unpaid/Self Promotion']), [])
+
+ def test_remove_marked_arrange_sponsors_SmallestSponsorInTheOverlapGetsNamed(self):
+ self._pp._sponsorblock_chapter_title = '[SponsorBlock]: %(name)s'
+ chapters = self._chapters([10], ['c']) + [
+ self._sponsor_chapter(2, 8, 'sponsor'),
+ self._sponsor_chapter(4, 6, 'selfpromo')
+ ]
+ self._remove_marked_arrange_sponsors_test_impl(
+ chapters, self._chapters([2, 4, 6, 8, 10], [
+ 'c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Unpaid/Self Promotion',
+ '[SponsorBlock]: Sponsor', 'c'
+ ]), [])
+
+ def test_make_concat_opts_CommonCase(self):
+ sponsor_chapters = [self._chapter(1, 2, 's1'), self._chapter(10, 20, 's2')]
+ expected = '''ffconcat version 1.0
+file 'file:test'
+outpoint 1.000000
+file 'file:test'
+inpoint 2.000000
+outpoint 10.000000
+file 'file:test'
+inpoint 20.000000
+'''
+ opts = self._pp._make_concat_opts(sponsor_chapters, 30)
+ self.assertEqual(expected, ''.join(self._pp._concat_spec(['test'] * len(opts), opts)))
+
+ def test_make_concat_opts_NoZeroDurationChunkAtVideoStart(self):
+ sponsor_chapters = [self._chapter(0, 1, 's1'), self._chapter(10, 20, 's2')]
+ expected = '''ffconcat version 1.0
+file 'file:test'
+inpoint 1.000000
+outpoint 10.000000
+file 'file:test'
+inpoint 20.000000
+'''
+ opts = self._pp._make_concat_opts(sponsor_chapters, 30)
+ self.assertEqual(expected, ''.join(self._pp._concat_spec(['test'] * len(opts), opts)))
+
+ def test_make_concat_opts_NoZeroDurationChunkAtVideoEnd(self):
+ sponsor_chapters = [self._chapter(1, 2, 's1'), self._chapter(10, 20, 's2')]
+ expected = '''ffconcat version 1.0
+file 'file:test'
+outpoint 1.000000
+file 'file:test'
+inpoint 2.000000
+outpoint 10.000000
+'''
+ opts = self._pp._make_concat_opts(sponsor_chapters, 20)
+ self.assertEqual(expected, ''.join(self._pp._concat_spec(['test'] * len(opts), opts)))
+
+ def test_quote_for_concat_RunsOfQuotes(self):
+ self.assertEqual(
+ r"'special '\'' '\'\''characters'\'\'\''galore'",
+ self._pp._quote_for_ffmpeg("special ' ''characters'''galore"))
+
+ def test_quote_for_concat_QuotesAtStart(self):
+ self.assertEqual(
+ r"\'\'\''special '\'' characters '\'' galore'",
+ self._pp._quote_for_ffmpeg("'''special ' characters ' galore"))
+
+ def test_quote_for_concat_QuotesAtEnd(self):
+ self.assertEqual(
+ r"'special '\'' characters '\'' galore'\'\'\'",
+ self._pp._quote_for_ffmpeg("special ' characters ' galore'''"))
diff --git a/test/test_socks.py b/test/test_socks.py
new file mode 100644
index 0000000..cb22b61
--- /dev/null
+++ b/test/test_socks.py
@@ -0,0 +1,477 @@
+#!/usr/bin/env python3
+# Allow direct execution
+import os
+import sys
+import threading
+import unittest
+
+import pytest
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import abc
+import contextlib
+import enum
+import functools
+import http.server
+import json
+import random
+import socket
+import struct
+import time
+from socketserver import (
+ BaseRequestHandler,
+ StreamRequestHandler,
+ ThreadingTCPServer,
+)
+
+from test.helper import http_server_port, verify_address_availability
+from yt_dlp.networking import Request
+from yt_dlp.networking.exceptions import ProxyError, TransportError
+from yt_dlp.socks import (
+ SOCKS4_REPLY_VERSION,
+ SOCKS4_VERSION,
+ SOCKS5_USER_AUTH_SUCCESS,
+ SOCKS5_USER_AUTH_VERSION,
+ SOCKS5_VERSION,
+ Socks5AddressType,
+ Socks5Auth,
+)
+
+SOCKS5_USER_AUTH_FAILURE = 0x1
+
+
+class Socks4CD(enum.IntEnum):
+ REQUEST_GRANTED = 90
+ REQUEST_REJECTED_OR_FAILED = 91
+ REQUEST_REJECTED_CANNOT_CONNECT_TO_IDENTD = 92
+ REQUEST_REJECTED_DIFFERENT_USERID = 93
+
+
+class Socks5Reply(enum.IntEnum):
+ SUCCEEDED = 0x0
+ GENERAL_FAILURE = 0x1
+ CONNECTION_NOT_ALLOWED = 0x2
+ NETWORK_UNREACHABLE = 0x3
+ HOST_UNREACHABLE = 0x4
+ CONNECTION_REFUSED = 0x5
+ TTL_EXPIRED = 0x6
+ COMMAND_NOT_SUPPORTED = 0x7
+ ADDRESS_TYPE_NOT_SUPPORTED = 0x8
+
+
+class SocksTestRequestHandler(BaseRequestHandler):
+
+ def __init__(self, *args, socks_info=None, **kwargs):
+ self.socks_info = socks_info
+ super().__init__(*args, **kwargs)
+
+
+class SocksProxyHandler(BaseRequestHandler):
+ def __init__(self, request_handler_class, socks_server_kwargs, *args, **kwargs):
+ self.socks_kwargs = socks_server_kwargs or {}
+ self.request_handler_class = request_handler_class
+ super().__init__(*args, **kwargs)
+
+
+class Socks5ProxyHandler(StreamRequestHandler, SocksProxyHandler):
+
+ # SOCKS5 protocol https://tools.ietf.org/html/rfc1928
+ # SOCKS5 username/password authentication https://tools.ietf.org/html/rfc1929
+
+ def handle(self):
+ sleep = self.socks_kwargs.get('sleep')
+ if sleep:
+ time.sleep(sleep)
+ version, nmethods = self.connection.recv(2)
+ assert version == SOCKS5_VERSION
+ methods = list(self.connection.recv(nmethods))
+
+ auth = self.socks_kwargs.get('auth')
+
+ if auth is not None and Socks5Auth.AUTH_USER_PASS not in methods:
+ self.connection.sendall(struct.pack('!BB', SOCKS5_VERSION, Socks5Auth.AUTH_NO_ACCEPTABLE))
+ self.server.close_request(self.request)
+ return
+
+ elif Socks5Auth.AUTH_USER_PASS in methods:
+ self.connection.sendall(struct.pack("!BB", SOCKS5_VERSION, Socks5Auth.AUTH_USER_PASS))
+
+ _, user_len = struct.unpack('!BB', self.connection.recv(2))
+ username = self.connection.recv(user_len).decode()
+ pass_len = ord(self.connection.recv(1))
+ password = self.connection.recv(pass_len).decode()
+
+ if username == auth[0] and password == auth[1]:
+ self.connection.sendall(struct.pack('!BB', SOCKS5_USER_AUTH_VERSION, SOCKS5_USER_AUTH_SUCCESS))
+ else:
+ self.connection.sendall(struct.pack('!BB', SOCKS5_USER_AUTH_VERSION, SOCKS5_USER_AUTH_FAILURE))
+ self.server.close_request(self.request)
+ return
+
+ elif Socks5Auth.AUTH_NONE in methods:
+ self.connection.sendall(struct.pack('!BB', SOCKS5_VERSION, Socks5Auth.AUTH_NONE))
+ else:
+ self.connection.sendall(struct.pack('!BB', SOCKS5_VERSION, Socks5Auth.AUTH_NO_ACCEPTABLE))
+ self.server.close_request(self.request)
+ return
+
+ version, command, _, address_type = struct.unpack('!BBBB', self.connection.recv(4))
+ socks_info = {
+ 'version': version,
+ 'auth_methods': methods,
+ 'command': command,
+ 'client_address': self.client_address,
+ 'ipv4_address': None,
+ 'domain_address': None,
+ 'ipv6_address': None,
+ }
+ if address_type == Socks5AddressType.ATYP_IPV4:
+ socks_info['ipv4_address'] = socket.inet_ntoa(self.connection.recv(4))
+ elif address_type == Socks5AddressType.ATYP_DOMAINNAME:
+ socks_info['domain_address'] = self.connection.recv(ord(self.connection.recv(1))).decode()
+ elif address_type == Socks5AddressType.ATYP_IPV6:
+ socks_info['ipv6_address'] = socket.inet_ntop(socket.AF_INET6, self.connection.recv(16))
+ else:
+ self.server.close_request(self.request)
+
+ socks_info['port'] = struct.unpack('!H', self.connection.recv(2))[0]
+
+ # dummy response, the returned IP is just a placeholder
+ self.connection.sendall(struct.pack(
+ '!BBBBIH', SOCKS5_VERSION, self.socks_kwargs.get('reply', Socks5Reply.SUCCEEDED), 0x0, 0x1, 0x7f000001, 40000))
+
+ self.request_handler_class(self.request, self.client_address, self.server, socks_info=socks_info)
+
+
+class Socks4ProxyHandler(StreamRequestHandler, SocksProxyHandler):
+
+ # SOCKS4 protocol http://www.openssh.com/txt/socks4.protocol
+ # SOCKS4A protocol http://www.openssh.com/txt/socks4a.protocol
+
+ def _read_until_null(self):
+ return b''.join(iter(functools.partial(self.connection.recv, 1), b'\x00'))
+
+ def handle(self):
+ sleep = self.socks_kwargs.get('sleep')
+ if sleep:
+ time.sleep(sleep)
+ socks_info = {
+ 'version': SOCKS4_VERSION,
+ 'command': None,
+ 'client_address': self.client_address,
+ 'ipv4_address': None,
+ 'port': None,
+ 'domain_address': None,
+ }
+ version, command, dest_port, dest_ip = struct.unpack('!BBHI', self.connection.recv(8))
+ socks_info['port'] = dest_port
+ socks_info['command'] = command
+ if version != SOCKS4_VERSION:
+ self.server.close_request(self.request)
+ return
+ use_remote_dns = False
+ if 0x0 < dest_ip <= 0xFF:
+ use_remote_dns = True
+ else:
+ socks_info['ipv4_address'] = socket.inet_ntoa(struct.pack("!I", dest_ip))
+
+ user_id = self._read_until_null().decode()
+ if user_id != (self.socks_kwargs.get('user_id') or ''):
+ self.connection.sendall(struct.pack(
+ '!BBHI', SOCKS4_REPLY_VERSION, Socks4CD.REQUEST_REJECTED_DIFFERENT_USERID, 0x00, 0x00000000))
+ self.server.close_request(self.request)
+ return
+
+ if use_remote_dns:
+ socks_info['domain_address'] = self._read_until_null().decode()
+
+ # dummy response, the returned IP is just a placeholder
+ self.connection.sendall(
+ struct.pack(
+ '!BBHI', SOCKS4_REPLY_VERSION,
+ self.socks_kwargs.get('cd_reply', Socks4CD.REQUEST_GRANTED), 40000, 0x7f000001))
+
+ self.request_handler_class(self.request, self.client_address, self.server, socks_info=socks_info)
+
+
+class IPv6ThreadingTCPServer(ThreadingTCPServer):
+ address_family = socket.AF_INET6
+
+
+class SocksHTTPTestRequestHandler(http.server.BaseHTTPRequestHandler, SocksTestRequestHandler):
+ def do_GET(self):
+ if self.path == '/socks_info':
+ payload = json.dumps(self.socks_info.copy())
+ self.send_response(200)
+ self.send_header('Content-Type', 'application/json; charset=utf-8')
+ self.send_header('Content-Length', str(len(payload)))
+ self.end_headers()
+ self.wfile.write(payload.encode())
+
+
+class SocksWebSocketTestRequestHandler(SocksTestRequestHandler):
+ def handle(self):
+ import websockets.sync.server
+ protocol = websockets.ServerProtocol()
+ connection = websockets.sync.server.ServerConnection(socket=self.request, protocol=protocol, close_timeout=0)
+ connection.handshake()
+ connection.send(json.dumps(self.socks_info))
+ connection.close()
+
+
+@contextlib.contextmanager
+def socks_server(socks_server_class, request_handler, bind_ip=None, **socks_server_kwargs):
+ server = server_thread = None
+ try:
+ bind_address = bind_ip or '127.0.0.1'
+ server_type = ThreadingTCPServer if '.' in bind_address else IPv6ThreadingTCPServer
+ server = server_type(
+ (bind_address, 0), functools.partial(socks_server_class, request_handler, socks_server_kwargs))
+ server_port = http_server_port(server)
+ server_thread = threading.Thread(target=server.serve_forever)
+ server_thread.daemon = True
+ server_thread.start()
+ if '.' not in bind_address:
+ yield f'[{bind_address}]:{server_port}'
+ else:
+ yield f'{bind_address}:{server_port}'
+ finally:
+ server.shutdown()
+ server.server_close()
+ server_thread.join(2.0)
+
+
+class SocksProxyTestContext(abc.ABC):
+ REQUEST_HANDLER_CLASS = None
+
+ def socks_server(self, server_class, *args, **kwargs):
+ return socks_server(server_class, self.REQUEST_HANDLER_CLASS, *args, **kwargs)
+
+ @abc.abstractmethod
+ def socks_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs) -> dict:
+ """return a dict of socks_info"""
+
+
+class HTTPSocksTestProxyContext(SocksProxyTestContext):
+ REQUEST_HANDLER_CLASS = SocksHTTPTestRequestHandler
+
+ def socks_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs):
+ request = Request(f'http://{target_domain or "127.0.0.1"}:{target_port or "40000"}/socks_info', **req_kwargs)
+ handler.validate(request)
+ return json.loads(handler.send(request).read().decode())
+
+
+class WebSocketSocksTestProxyContext(SocksProxyTestContext):
+ REQUEST_HANDLER_CLASS = SocksWebSocketTestRequestHandler
+
+ def socks_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs):
+ request = Request(f'ws://{target_domain or "127.0.0.1"}:{target_port or "40000"}', **req_kwargs)
+ handler.validate(request)
+ ws = handler.send(request)
+ ws.send('socks_info')
+ socks_info = ws.recv()
+ ws.close()
+ return json.loads(socks_info)
+
+
+CTX_MAP = {
+ 'http': HTTPSocksTestProxyContext,
+ 'ws': WebSocketSocksTestProxyContext,
+}
+
+
+@pytest.fixture(scope='module')
+def ctx(request):
+ return CTX_MAP[request.param]()
+
+
+class TestSocks4Proxy:
+ @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
+ def test_socks4_no_auth(self, handler, ctx):
+ with handler() as rh:
+ with ctx.socks_server(Socks4ProxyHandler) as server_address:
+ response = ctx.socks_info_request(
+ rh, proxies={'all': f'socks4://{server_address}'})
+ assert response['version'] == 4
+
+ @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
+ def test_socks4_auth(self, handler, ctx):
+ with handler() as rh:
+ with ctx.socks_server(Socks4ProxyHandler, user_id='user') as server_address:
+ with pytest.raises(ProxyError):
+ ctx.socks_info_request(rh, proxies={'all': f'socks4://{server_address}'})
+ response = ctx.socks_info_request(
+ rh, proxies={'all': f'socks4://user:@{server_address}'})
+ assert response['version'] == 4
+
+ @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
+ def test_socks4a_ipv4_target(self, handler, ctx):
+ with ctx.socks_server(Socks4ProxyHandler) as server_address:
+ with handler(proxies={'all': f'socks4a://{server_address}'}) as rh:
+ response = ctx.socks_info_request(rh, target_domain='127.0.0.1')
+ assert response['version'] == 4
+ assert (response['ipv4_address'] == '127.0.0.1') != (response['domain_address'] == '127.0.0.1')
+
+ @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
+ def test_socks4a_domain_target(self, handler, ctx):
+ with ctx.socks_server(Socks4ProxyHandler) as server_address:
+ with handler(proxies={'all': f'socks4a://{server_address}'}) as rh:
+ response = ctx.socks_info_request(rh, target_domain='localhost')
+ assert response['version'] == 4
+ assert response['ipv4_address'] is None
+ assert response['domain_address'] == 'localhost'
+
+ @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
+ def test_ipv4_client_source_address(self, handler, ctx):
+ with ctx.socks_server(Socks4ProxyHandler) as server_address:
+ source_address = f'127.0.0.{random.randint(5, 255)}'
+ verify_address_availability(source_address)
+ with handler(proxies={'all': f'socks4://{server_address}'},
+ source_address=source_address) as rh:
+ response = ctx.socks_info_request(rh)
+ assert response['client_address'][0] == source_address
+ assert response['version'] == 4
+
+ @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
+ @pytest.mark.parametrize('reply_code', [
+ Socks4CD.REQUEST_REJECTED_OR_FAILED,
+ Socks4CD.REQUEST_REJECTED_CANNOT_CONNECT_TO_IDENTD,
+ Socks4CD.REQUEST_REJECTED_DIFFERENT_USERID,
+ ])
+ def test_socks4_errors(self, handler, ctx, reply_code):
+ with ctx.socks_server(Socks4ProxyHandler, cd_reply=reply_code) as server_address:
+ with handler(proxies={'all': f'socks4://{server_address}'}) as rh:
+ with pytest.raises(ProxyError):
+ ctx.socks_info_request(rh)
+
+ @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
+ def test_ipv6_socks4_proxy(self, handler, ctx):
+ with ctx.socks_server(Socks4ProxyHandler, bind_ip='::1') as server_address:
+ with handler(proxies={'all': f'socks4://{server_address}'}) as rh:
+ response = ctx.socks_info_request(rh, target_domain='127.0.0.1')
+ assert response['client_address'][0] == '::1'
+ assert response['ipv4_address'] == '127.0.0.1'
+ assert response['version'] == 4
+
+ @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
+ def test_timeout(self, handler, ctx):
+ with ctx.socks_server(Socks4ProxyHandler, sleep=2) as server_address:
+ with handler(proxies={'all': f'socks4://{server_address}'}, timeout=0.5) as rh:
+ with pytest.raises(TransportError):
+ ctx.socks_info_request(rh)
+
+
+class TestSocks5Proxy:
+
+ @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
+ def test_socks5_no_auth(self, handler, ctx):
+ with ctx.socks_server(Socks5ProxyHandler) as server_address:
+ with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
+ response = ctx.socks_info_request(rh)
+ assert response['auth_methods'] == [0x0]
+ assert response['version'] == 5
+
+ @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
+ def test_socks5_user_pass(self, handler, ctx):
+ with ctx.socks_server(Socks5ProxyHandler, auth=('test', 'testpass')) as server_address:
+ with handler() as rh:
+ with pytest.raises(ProxyError):
+ ctx.socks_info_request(rh, proxies={'all': f'socks5://{server_address}'})
+
+ response = ctx.socks_info_request(
+ rh, proxies={'all': f'socks5://test:testpass@{server_address}'})
+
+ assert response['auth_methods'] == [Socks5Auth.AUTH_NONE, Socks5Auth.AUTH_USER_PASS]
+ assert response['version'] == 5
+
+ @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
+ def test_socks5_ipv4_target(self, handler, ctx):
+ with ctx.socks_server(Socks5ProxyHandler) as server_address:
+ with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
+ response = ctx.socks_info_request(rh, target_domain='127.0.0.1')
+ assert response['ipv4_address'] == '127.0.0.1'
+ assert response['version'] == 5
+
+ @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
+ def test_socks5_domain_target(self, handler, ctx):
+ with ctx.socks_server(Socks5ProxyHandler) as server_address:
+ with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
+ response = ctx.socks_info_request(rh, target_domain='localhost')
+ assert (response['ipv4_address'] == '127.0.0.1') != (response['ipv6_address'] == '::1')
+ assert response['version'] == 5
+
+ @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
+ def test_socks5h_domain_target(self, handler, ctx):
+ with ctx.socks_server(Socks5ProxyHandler) as server_address:
+ with handler(proxies={'all': f'socks5h://{server_address}'}) as rh:
+ response = ctx.socks_info_request(rh, target_domain='localhost')
+ assert response['ipv4_address'] is None
+ assert response['domain_address'] == 'localhost'
+ assert response['version'] == 5
+
+ @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
+ def test_socks5h_ip_target(self, handler, ctx):
+ with ctx.socks_server(Socks5ProxyHandler) as server_address:
+ with handler(proxies={'all': f'socks5h://{server_address}'}) as rh:
+ response = ctx.socks_info_request(rh, target_domain='127.0.0.1')
+ assert response['ipv4_address'] == '127.0.0.1'
+ assert response['domain_address'] is None
+ assert response['version'] == 5
+
+ @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
+ def test_socks5_ipv6_destination(self, handler, ctx):
+ with ctx.socks_server(Socks5ProxyHandler) as server_address:
+ with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
+ response = ctx.socks_info_request(rh, target_domain='[::1]')
+ assert response['ipv6_address'] == '::1'
+ assert response['version'] == 5
+
+ @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
+ def test_ipv6_socks5_proxy(self, handler, ctx):
+ with ctx.socks_server(Socks5ProxyHandler, bind_ip='::1') as server_address:
+ with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
+ response = ctx.socks_info_request(rh, target_domain='127.0.0.1')
+ assert response['client_address'][0] == '::1'
+ assert response['ipv4_address'] == '127.0.0.1'
+ assert response['version'] == 5
+
+ # XXX: is there any feasible way of testing IPv6 source addresses?
+ # Same would go for non-proxy source_address test...
+ @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
+ def test_ipv4_client_source_address(self, handler, ctx):
+ with ctx.socks_server(Socks5ProxyHandler) as server_address:
+ source_address = f'127.0.0.{random.randint(5, 255)}'
+ verify_address_availability(source_address)
+ with handler(proxies={'all': f'socks5://{server_address}'}, source_address=source_address) as rh:
+ response = ctx.socks_info_request(rh)
+ assert response['client_address'][0] == source_address
+ assert response['version'] == 5
+
+ @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
+ @pytest.mark.parametrize('reply_code', [
+ Socks5Reply.GENERAL_FAILURE,
+ Socks5Reply.CONNECTION_NOT_ALLOWED,
+ Socks5Reply.NETWORK_UNREACHABLE,
+ Socks5Reply.HOST_UNREACHABLE,
+ Socks5Reply.CONNECTION_REFUSED,
+ Socks5Reply.TTL_EXPIRED,
+ Socks5Reply.COMMAND_NOT_SUPPORTED,
+ Socks5Reply.ADDRESS_TYPE_NOT_SUPPORTED,
+ ])
+ def test_socks5_errors(self, handler, ctx, reply_code):
+ with ctx.socks_server(Socks5ProxyHandler, reply=reply_code) as server_address:
+ with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
+ with pytest.raises(ProxyError):
+ ctx.socks_info_request(rh)
+
+ @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Websockets', 'ws')], indirect=True)
+ def test_timeout(self, handler, ctx):
+ with ctx.socks_server(Socks5ProxyHandler, sleep=2) as server_address:
+ with handler(proxies={'all': f'socks5://{server_address}'}, timeout=1) as rh:
+ with pytest.raises(TransportError):
+ ctx.socks_info_request(rh)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_subtitles.py b/test/test_subtitles.py
new file mode 100644
index 0000000..5736289
--- /dev/null
+++ b/test/test_subtitles.py
@@ -0,0 +1,452 @@
+#!/usr/bin/env python3
+
+# Allow direct execution
+import os
+import sys
+import unittest
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+from test.helper import FakeYDL, is_download_test, md5
+from yt_dlp.extractor import (
+ NPOIE,
+ NRKTVIE,
+ PBSIE,
+ CeskaTelevizeIE,
+ ComedyCentralIE,
+ DailymotionIE,
+ DemocracynowIE,
+ LyndaIE,
+ RaiPlayIE,
+ RTVEALaCartaIE,
+ TedTalkIE,
+ ThePlatformFeedIE,
+ ThePlatformIE,
+ VikiIE,
+ VimeoIE,
+ WallaIE,
+ YoutubeIE,
+)
+
+
+@is_download_test
+class BaseTestSubtitles(unittest.TestCase):
+ url = None
+ IE = None
+
+ def setUp(self):
+ self.DL = FakeYDL()
+ self.ie = self.IE()
+ self.DL.add_info_extractor(self.ie)
+ if not self.IE.working():
+ print('Skipping: %s marked as not _WORKING' % self.IE.ie_key())
+ self.skipTest('IE marked as not _WORKING')
+
+ def getInfoDict(self):
+ info_dict = self.DL.extract_info(self.url, download=False)
+ return info_dict
+
+ def getSubtitles(self):
+ info_dict = self.getInfoDict()
+ subtitles = info_dict['requested_subtitles']
+ if not subtitles:
+ return subtitles
+ for sub_info in subtitles.values():
+ if sub_info.get('data') is None:
+ uf = self.DL.urlopen(sub_info['url'])
+ sub_info['data'] = uf.read().decode()
+ return {l: sub_info['data'] for l, sub_info in subtitles.items()}
+
+
+@is_download_test
+class TestYoutubeSubtitles(BaseTestSubtitles):
+ # Available subtitles for QRS8MkLhQmM:
+ # Language formats
+ # ru vtt, ttml, srv3, srv2, srv1, json3
+ # fr vtt, ttml, srv3, srv2, srv1, json3
+ # en vtt, ttml, srv3, srv2, srv1, json3
+ # nl vtt, ttml, srv3, srv2, srv1, json3
+ # de vtt, ttml, srv3, srv2, srv1, json3
+ # ko vtt, ttml, srv3, srv2, srv1, json3
+ # it vtt, ttml, srv3, srv2, srv1, json3
+ # zh-Hant vtt, ttml, srv3, srv2, srv1, json3
+ # hi vtt, ttml, srv3, srv2, srv1, json3
+ # pt-BR vtt, ttml, srv3, srv2, srv1, json3
+ # es-MX vtt, ttml, srv3, srv2, srv1, json3
+ # ja vtt, ttml, srv3, srv2, srv1, json3
+ # pl vtt, ttml, srv3, srv2, srv1, json3
+ url = 'QRS8MkLhQmM'
+ IE = YoutubeIE
+
+ def test_youtube_allsubtitles(self):
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertEqual(len(subtitles.keys()), 13)
+ self.assertEqual(md5(subtitles['en']), 'ae1bd34126571a77aabd4d276b28044d')
+ self.assertEqual(md5(subtitles['it']), '0e0b667ba68411d88fd1c5f4f4eab2f9')
+ for lang in ['fr', 'de']:
+ self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
+
+ def _test_subtitles_format(self, fmt, md5_hash, lang='en'):
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['subtitlesformat'] = fmt
+ subtitles = self.getSubtitles()
+ self.assertEqual(md5(subtitles[lang]), md5_hash)
+
+ def test_youtube_subtitles_ttml_format(self):
+ self._test_subtitles_format('ttml', 'c97ddf1217390906fa9fbd34901f3da2')
+
+ def test_youtube_subtitles_vtt_format(self):
+ self._test_subtitles_format('vtt', 'ae1bd34126571a77aabd4d276b28044d')
+
+ def test_youtube_subtitles_json3_format(self):
+ self._test_subtitles_format('json3', '688dd1ce0981683867e7fe6fde2a224b')
+
+ def _test_automatic_captions(self, url, lang):
+ self.url = url
+ self.DL.params['writeautomaticsub'] = True
+ self.DL.params['subtitleslangs'] = [lang]
+ subtitles = self.getSubtitles()
+ self.assertTrue(subtitles[lang] is not None)
+
+ def test_youtube_automatic_captions(self):
+ # Available automatic captions for 8YoUxe5ncPo:
+ # Language formats (all in vtt, ttml, srv3, srv2, srv1, json3)
+ # gu, zh-Hans, zh-Hant, gd, ga, gl, lb, la, lo, tt, tr,
+ # lv, lt, tk, th, tg, te, fil, haw, yi, ceb, yo, de, da,
+ # el, eo, en, eu, et, es, ru, rw, ro, bn, be, bg, uk, jv,
+ # bs, ja, or, xh, co, ca, cy, cs, ps, pt, pa, vi, pl, hy,
+ # hr, ht, hu, hmn, hi, ha, mg, uz, ml, mn, mi, mk, ur,
+ # mt, ms, mr, ug, ta, my, af, sw, is, am,
+ # *it*, iw, sv, ar,
+ # su, zu, az, id, ig, nl, no, ne, ny, fr, ku, fy, fa, fi,
+ # ka, kk, sr, sq, ko, kn, km, st, sk, si, so, sn, sm, sl,
+ # ky, sd
+ # ...
+ self._test_automatic_captions('8YoUxe5ncPo', 'it')
+
+ @unittest.skip('Video unavailable')
+ def test_youtube_translated_subtitles(self):
+ # This video has a subtitles track, which can be translated (#4555)
+ self._test_automatic_captions('Ky9eprVWzlI', 'it')
+
+ def test_youtube_nosubtitles(self):
+ self.DL.expect_warning('video doesn\'t have subtitles')
+ # Available automatic captions for 8YoUxe5ncPo:
+ # ...
+ # 8YoUxe5ncPo has no subtitles
+ self.url = '8YoUxe5ncPo'
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertFalse(subtitles)
+
+
+@is_download_test
+class TestDailymotionSubtitles(BaseTestSubtitles):
+ url = 'http://www.dailymotion.com/video/xczg00'
+ IE = DailymotionIE
+
+ def test_allsubtitles(self):
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertTrue(len(subtitles.keys()) >= 6)
+ self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
+ self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
+ for lang in ['es', 'fr', 'de']:
+ self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
+
+ def test_nosubtitles(self):
+ self.DL.expect_warning('video doesn\'t have subtitles')
+ self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv'
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertFalse(subtitles)
+
+
+@is_download_test
+@unittest.skip('IE broken')
+class TestTedSubtitles(BaseTestSubtitles):
+ url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html'
+ IE = TedTalkIE
+
+ def test_allsubtitles(self):
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertTrue(len(subtitles.keys()) >= 28)
+ self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14')
+ self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5')
+ for lang in ['es', 'fr', 'de']:
+ self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
+
+
+@is_download_test
+class TestVimeoSubtitles(BaseTestSubtitles):
+ url = 'http://vimeo.com/76979871'
+ IE = VimeoIE
+
+ def test_allsubtitles(self):
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertEqual(set(subtitles.keys()), {'de', 'en', 'es', 'fr'})
+ self.assertEqual(md5(subtitles['en']), '386cbc9320b94e25cb364b97935e5dd1')
+ self.assertEqual(md5(subtitles['fr']), 'c9b69eef35bc6641c0d4da8a04f9dfac')
+
+ def test_nosubtitles(self):
+ self.DL.expect_warning('video doesn\'t have subtitles')
+ self.url = 'http://vimeo.com/68093876'
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertFalse(subtitles)
+
+
+@is_download_test
+@unittest.skip('IE broken')
+class TestWallaSubtitles(BaseTestSubtitles):
+ url = 'http://vod.walla.co.il/movie/2705958/the-yes-men'
+ IE = WallaIE
+
+ def test_allsubtitles(self):
+ self.DL.expect_warning('Automatic Captions not supported by this server')
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertEqual(set(subtitles.keys()), {'heb'})
+ self.assertEqual(md5(subtitles['heb']), 'e758c5d7cb982f6bef14f377ec7a3920')
+
+ def test_nosubtitles(self):
+ self.DL.expect_warning('video doesn\'t have subtitles')
+ self.url = 'http://vod.walla.co.il/movie/2642630/one-direction-all-for-one'
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertFalse(subtitles)
+
+
+@is_download_test
+@unittest.skip('IE broken')
+class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
+ url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky'
+ IE = CeskaTelevizeIE
+
+ def test_allsubtitles(self):
+ self.DL.expect_warning('Automatic Captions not supported by this server')
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertEqual(set(subtitles.keys()), {'cs'})
+ self.assertTrue(len(subtitles['cs']) > 20000)
+
+ def test_nosubtitles(self):
+ self.DL.expect_warning('video doesn\'t have subtitles')
+ self.url = 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220'
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertFalse(subtitles)
+
+
+@is_download_test
+@unittest.skip('IE broken')
+class TestLyndaSubtitles(BaseTestSubtitles):
+ url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html'
+ IE = LyndaIE
+
+ def test_allsubtitles(self):
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertEqual(set(subtitles.keys()), {'en'})
+ self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7')
+
+
+@is_download_test
+@unittest.skip('IE broken')
+class TestNPOSubtitles(BaseTestSubtitles):
+ url = 'http://www.npo.nl/nos-journaal/28-08-2014/POW_00722860'
+ IE = NPOIE
+
+ def test_allsubtitles(self):
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertEqual(set(subtitles.keys()), {'nl'})
+ self.assertEqual(md5(subtitles['nl']), 'fc6435027572b63fb4ab143abd5ad3f4')
+
+
+@is_download_test
+@unittest.skip('IE broken')
+class TestMTVSubtitles(BaseTestSubtitles):
+ url = 'http://www.cc.com/video-clips/p63lk0/adam-devine-s-house-party-chasing-white-swans'
+ IE = ComedyCentralIE
+
+ def getInfoDict(self):
+ return super().getInfoDict()['entries'][0]
+
+ def test_allsubtitles(self):
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertEqual(set(subtitles.keys()), {'en'})
+ self.assertEqual(md5(subtitles['en']), '78206b8d8a0cfa9da64dc026eea48961')
+
+
+@is_download_test
+class TestNRKSubtitles(BaseTestSubtitles):
+ url = 'http://tv.nrk.no/serie/ikke-gjoer-dette-hjemme/DMPV73000411/sesong-2/episode-1'
+ IE = NRKTVIE
+
+ def test_allsubtitles(self):
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertEqual(set(subtitles.keys()), {'nb-ttv'})
+ self.assertEqual(md5(subtitles['nb-ttv']), '67e06ff02d0deaf975e68f6cb8f6a149')
+
+
+@is_download_test
+class TestRaiPlaySubtitles(BaseTestSubtitles):
+ IE = RaiPlayIE
+
+ def test_subtitles_key(self):
+ self.url = 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertEqual(set(subtitles.keys()), {'it'})
+ self.assertEqual(md5(subtitles['it']), 'b1d90a98755126b61e667567a1f6680a')
+
+ def test_subtitles_array_key(self):
+ self.url = 'https://www.raiplay.it/video/2020/12/Report---04-01-2021-2e90f1de-8eee-4de4-ac0e-78d21db5b600.html'
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertEqual(set(subtitles.keys()), {'it'})
+ self.assertEqual(md5(subtitles['it']), '4b3264186fbb103508abe5311cfcb9cd')
+
+
+@is_download_test
+@unittest.skip('IE broken - DRM only')
+class TestVikiSubtitles(BaseTestSubtitles):
+ url = 'http://www.viki.com/videos/1060846v-punch-episode-18'
+ IE = VikiIE
+
+ def test_allsubtitles(self):
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertEqual(set(subtitles.keys()), {'en'})
+ self.assertEqual(md5(subtitles['en']), '53cb083a5914b2d84ef1ab67b880d18a')
+
+
+@is_download_test
+class TestThePlatformSubtitles(BaseTestSubtitles):
+ # from http://www.3playmedia.com/services-features/tools/integrations/theplatform/
+ # (see http://theplatform.com/about/partners/type/subtitles-closed-captioning/)
+ url = 'theplatform:JFUjUE1_ehvq'
+ IE = ThePlatformIE
+
+ def test_allsubtitles(self):
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertEqual(set(subtitles.keys()), {'en'})
+ self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b')
+
+
+@is_download_test
+@unittest.skip('IE broken')
+class TestThePlatformFeedSubtitles(BaseTestSubtitles):
+ url = 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207'
+ IE = ThePlatformFeedIE
+
+ def test_allsubtitles(self):
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertEqual(set(subtitles.keys()), {'en'})
+ self.assertEqual(md5(subtitles['en']), '48649a22e82b2da21c9a67a395eedade')
+
+
+@is_download_test
+class TestRtveSubtitles(BaseTestSubtitles):
+ url = 'http://www.rtve.es/alacarta/videos/los-misterios-de-laura/misterios-laura-capitulo-32-misterio-del-numero-17-2-parte/2428621/'
+ IE = RTVEALaCartaIE
+
+ def test_allsubtitles(self):
+ print('Skipping, only available from Spain')
+ return
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertEqual(set(subtitles.keys()), {'es'})
+ self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
+
+
+@is_download_test
+class TestDemocracynowSubtitles(BaseTestSubtitles):
+ url = 'http://www.democracynow.org/shows/2015/7/3'
+ IE = DemocracynowIE
+
+ def test_allsubtitles(self):
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertEqual(set(subtitles.keys()), {'en'})
+ self.assertEqual(md5(subtitles['en']), 'a3cc4c0b5eadd74d9974f1c1f5101045')
+
+ def test_subtitles_in_page(self):
+ self.url = 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree'
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertEqual(set(subtitles.keys()), {'en'})
+ self.assertEqual(md5(subtitles['en']), 'a3cc4c0b5eadd74d9974f1c1f5101045')
+
+
+@is_download_test
+class TestPBSSubtitles(BaseTestSubtitles):
+ url = 'https://www.pbs.org/video/how-fantasy-reflects-our-world-picecq/'
+ IE = PBSIE
+
+ def test_allsubtitles(self):
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertEqual(set(subtitles.keys()), {'en'})
+
+ def test_subtitles_dfxp_format(self):
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['subtitlesformat'] = 'dfxp'
+ subtitles = self.getSubtitles()
+ self.assertIn(md5(subtitles['en']), ['643b034254cdc3768ff1e750b6b5873b'])
+
+ def test_subtitles_vtt_format(self):
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['subtitlesformat'] = 'vtt'
+ subtitles = self.getSubtitles()
+ self.assertIn(
+ md5(subtitles['en']), ['937a05711555b165d4c55a9667017045', 'f49ea998d6824d94959c8152a368ff73'])
+
+ def test_subtitles_srt_format(self):
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['subtitlesformat'] = 'srt'
+ subtitles = self.getSubtitles()
+ self.assertIn(md5(subtitles['en']), ['2082c21b43759d9bf172931b2f2ca371'])
+
+ def test_subtitles_sami_format(self):
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['subtitlesformat'] = 'sami'
+ subtitles = self.getSubtitles()
+ self.assertIn(md5(subtitles['en']), ['4256b16ac7da6a6780fafd04294e85cd'])
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_update.py b/test/test_update.py
new file mode 100644
index 0000000..bc13956
--- /dev/null
+++ b/test/test_update.py
@@ -0,0 +1,228 @@
+#!/usr/bin/env python3
+
+# Allow direct execution
+import os
+import sys
+import unittest
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+from test.helper import FakeYDL, report_warning
+from yt_dlp.update import UpdateInfo, Updater
+
+
+# XXX: Keep in sync with yt_dlp.update.UPDATE_SOURCES
+TEST_UPDATE_SOURCES = {
+ 'stable': 'yt-dlp/yt-dlp',
+ 'nightly': 'yt-dlp/yt-dlp-nightly-builds',
+ 'master': 'yt-dlp/yt-dlp-master-builds',
+}
+
+TEST_API_DATA = {
+ 'yt-dlp/yt-dlp/latest': {
+ 'tag_name': '2023.12.31',
+ 'target_commitish': 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb',
+ 'name': 'yt-dlp 2023.12.31',
+ 'body': 'BODY',
+ },
+ 'yt-dlp/yt-dlp-nightly-builds/latest': {
+ 'tag_name': '2023.12.31.123456',
+ 'target_commitish': 'master',
+ 'name': 'yt-dlp nightly 2023.12.31.123456',
+ 'body': 'Generated from: https://github.com/yt-dlp/yt-dlp/commit/cccccccccccccccccccccccccccccccccccccccc',
+ },
+ 'yt-dlp/yt-dlp-master-builds/latest': {
+ 'tag_name': '2023.12.31.987654',
+ 'target_commitish': 'master',
+ 'name': 'yt-dlp master 2023.12.31.987654',
+ 'body': 'Generated from: https://github.com/yt-dlp/yt-dlp/commit/dddddddddddddddddddddddddddddddddddddddd',
+ },
+ 'yt-dlp/yt-dlp/tags/testing': {
+ 'tag_name': 'testing',
+ 'target_commitish': '9999999999999999999999999999999999999999',
+ 'name': 'testing',
+ 'body': 'BODY',
+ },
+ 'fork/yt-dlp/latest': {
+ 'tag_name': '2050.12.31',
+ 'target_commitish': 'eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee',
+ 'name': '2050.12.31',
+ 'body': 'BODY',
+ },
+ 'fork/yt-dlp/tags/pr0000': {
+ 'tag_name': 'pr0000',
+ 'target_commitish': 'ffffffffffffffffffffffffffffffffffffffff',
+ 'name': 'pr1234 2023.11.11.000000',
+ 'body': 'BODY',
+ },
+ 'fork/yt-dlp/tags/pr1234': {
+ 'tag_name': 'pr1234',
+ 'target_commitish': '0000000000000000000000000000000000000000',
+ 'name': 'pr1234 2023.12.31.555555',
+ 'body': 'BODY',
+ },
+ 'fork/yt-dlp/tags/pr9999': {
+ 'tag_name': 'pr9999',
+ 'target_commitish': '1111111111111111111111111111111111111111',
+ 'name': 'pr9999',
+ 'body': 'BODY',
+ },
+ 'fork/yt-dlp-satellite/tags/pr987': {
+ 'tag_name': 'pr987',
+ 'target_commitish': 'master',
+ 'name': 'pr987',
+ 'body': 'Generated from: https://github.com/yt-dlp/yt-dlp/commit/2222222222222222222222222222222222222222',
+ },
+}
+
+TEST_LOCKFILE_COMMENT = '# This file is used for regulating self-update'
+
+TEST_LOCKFILE_V1 = r'''%s
+lock 2022.08.18.36 .+ Python 3\.6
+lock 2023.11.16 (?!win_x86_exe).+ Python 3\.7
+lock 2023.11.16 win_x86_exe .+ Windows-(?:Vista|2008Server)
+''' % TEST_LOCKFILE_COMMENT
+
+TEST_LOCKFILE_V2_TMPL = r'''%s
+lockV2 yt-dlp/yt-dlp 2022.08.18.36 .+ Python 3\.6
+lockV2 yt-dlp/yt-dlp 2023.11.16 (?!win_x86_exe).+ Python 3\.7
+lockV2 yt-dlp/yt-dlp 2023.11.16 win_x86_exe .+ Windows-(?:Vista|2008Server)
+lockV2 yt-dlp/yt-dlp-nightly-builds 2023.11.15.232826 (?!win_x86_exe).+ Python 3\.7
+lockV2 yt-dlp/yt-dlp-nightly-builds 2023.11.15.232826 win_x86_exe .+ Windows-(?:Vista|2008Server)
+lockV2 yt-dlp/yt-dlp-master-builds 2023.11.15.232812 (?!win_x86_exe).+ Python 3\.7
+lockV2 yt-dlp/yt-dlp-master-builds 2023.11.15.232812 win_x86_exe .+ Windows-(?:Vista|2008Server)
+'''
+
+TEST_LOCKFILE_V2 = TEST_LOCKFILE_V2_TMPL % TEST_LOCKFILE_COMMENT
+
+TEST_LOCKFILE_ACTUAL = TEST_LOCKFILE_V2_TMPL % TEST_LOCKFILE_V1.rstrip('\n')
+
+TEST_LOCKFILE_FORK = r'''%s# Test if a fork blocks updates to non-numeric tags
+lockV2 fork/yt-dlp pr0000 .+ Python 3.6
+lockV2 fork/yt-dlp pr1234 (?!win_x86_exe).+ Python 3\.7
+lockV2 fork/yt-dlp pr1234 win_x86_exe .+ Windows-(?:Vista|2008Server)
+lockV2 fork/yt-dlp pr9999 .+ Python 3.11
+''' % TEST_LOCKFILE_ACTUAL
+
+
+class FakeUpdater(Updater):
+ current_version = '2022.01.01'
+ current_commit = 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'
+
+ _channel = 'stable'
+ _origin = 'yt-dlp/yt-dlp'
+ _update_sources = TEST_UPDATE_SOURCES
+
+ def _download_update_spec(self, *args, **kwargs):
+ return TEST_LOCKFILE_ACTUAL
+
+ def _call_api(self, tag):
+ tag = f'tags/{tag}' if tag != 'latest' else tag
+ return TEST_API_DATA[f'{self.requested_repo}/{tag}']
+
+ def _report_error(self, msg, *args, **kwargs):
+ report_warning(msg)
+
+
+class TestUpdate(unittest.TestCase):
+ maxDiff = None
+
+ def test_update_spec(self):
+ ydl = FakeYDL()
+ updater = FakeUpdater(ydl, 'stable')
+
+ def test(lockfile, identifier, input_tag, expect_tag, exact=False, repo='yt-dlp/yt-dlp'):
+ updater._identifier = identifier
+ updater._exact = exact
+ updater.requested_repo = repo
+ result = updater._process_update_spec(lockfile, input_tag)
+ self.assertEqual(
+ result, expect_tag,
+ f'{identifier!r} requesting {repo}@{input_tag} (exact={exact}) '
+ f'returned {result!r} instead of {expect_tag!r}')
+
+ for lockfile in (TEST_LOCKFILE_V1, TEST_LOCKFILE_V2, TEST_LOCKFILE_ACTUAL, TEST_LOCKFILE_FORK):
+ # Normal operation
+ test(lockfile, 'zip Python 3.12.0', '2023.12.31', '2023.12.31')
+ test(lockfile, 'zip stable Python 3.12.0', '2023.12.31', '2023.12.31', exact=True)
+ # Python 3.6 --update should update only to its lock
+ test(lockfile, 'zip Python 3.6.0', '2023.11.16', '2022.08.18.36')
+ # --update-to an exact version later than the lock should return None
+ test(lockfile, 'zip stable Python 3.6.0', '2023.11.16', None, exact=True)
+ # Python 3.7 should be able to update to its lock
+ test(lockfile, 'zip Python 3.7.0', '2023.11.16', '2023.11.16')
+ test(lockfile, 'zip stable Python 3.7.1', '2023.11.16', '2023.11.16', exact=True)
+ # Non-win_x86_exe builds on py3.7 must be locked
+ test(lockfile, 'zip Python 3.7.1', '2023.12.31', '2023.11.16')
+ test(lockfile, 'zip stable Python 3.7.1', '2023.12.31', None, exact=True)
+ test( # Windows Vista w/ win_x86_exe must be locked
+ lockfile, 'win_x86_exe stable Python 3.7.9 (CPython x86 32bit) - Windows-Vista-6.0.6003-SP2',
+ '2023.12.31', '2023.11.16')
+ test( # Windows 2008Server w/ win_x86_exe must be locked
+ lockfile, 'win_x86_exe Python 3.7.9 (CPython x86 32bit) - Windows-2008Server',
+ '2023.12.31', None, exact=True)
+ test( # Windows 7 w/ win_x86_exe py3.7 build should be able to update beyond lock
+ lockfile, 'win_x86_exe stable Python 3.7.9 (CPython x86 32bit) - Windows-7-6.1.7601-SP1',
+ '2023.12.31', '2023.12.31')
+ test( # Windows 8.1 w/ '2008Server' in platform string should be able to update beyond lock
+ lockfile, 'win_x86_exe Python 3.7.9 (CPython x86 32bit) - Windows-post2008Server-6.2.9200',
+ '2023.12.31', '2023.12.31', exact=True)
+
+ # Forks can block updates to non-numeric tags rather than lock
+ test(TEST_LOCKFILE_FORK, 'zip Python 3.6.3', 'pr0000', None, repo='fork/yt-dlp')
+ test(TEST_LOCKFILE_FORK, 'zip stable Python 3.7.4', 'pr0000', 'pr0000', repo='fork/yt-dlp')
+ test(TEST_LOCKFILE_FORK, 'zip stable Python 3.7.4', 'pr1234', None, repo='fork/yt-dlp')
+ test(TEST_LOCKFILE_FORK, 'zip Python 3.8.1', 'pr1234', 'pr1234', repo='fork/yt-dlp', exact=True)
+ test(
+ TEST_LOCKFILE_FORK, 'win_x86_exe stable Python 3.7.9 (CPython x86 32bit) - Windows-Vista-6.0.6003-SP2',
+ 'pr1234', None, repo='fork/yt-dlp')
+ test(
+ TEST_LOCKFILE_FORK, 'win_x86_exe stable Python 3.7.9 (CPython x86 32bit) - Windows-7-6.1.7601-SP1',
+ '2023.12.31', '2023.12.31', repo='fork/yt-dlp')
+ test(TEST_LOCKFILE_FORK, 'zip Python 3.11.2', 'pr9999', None, repo='fork/yt-dlp', exact=True)
+ test(TEST_LOCKFILE_FORK, 'zip stable Python 3.12.0', 'pr9999', 'pr9999', repo='fork/yt-dlp')
+
+ def test_query_update(self):
+ ydl = FakeYDL()
+
+ def test(target, expected, current_version=None, current_commit=None, identifier=None):
+ updater = FakeUpdater(ydl, target)
+ if current_version:
+ updater.current_version = current_version
+ if current_commit:
+ updater.current_commit = current_commit
+ updater._identifier = identifier or 'zip'
+ update_info = updater.query_update(_output=True)
+ self.assertDictEqual(
+ update_info.__dict__ if update_info else {}, expected.__dict__ if expected else {})
+
+ test('yt-dlp/yt-dlp@latest', UpdateInfo(
+ '2023.12.31', version='2023.12.31', requested_version='2023.12.31', commit='b' * 40))
+ test('yt-dlp/yt-dlp-nightly-builds@latest', UpdateInfo(
+ '2023.12.31.123456', version='2023.12.31.123456', requested_version='2023.12.31.123456', commit='c' * 40))
+ test('yt-dlp/yt-dlp-master-builds@latest', UpdateInfo(
+ '2023.12.31.987654', version='2023.12.31.987654', requested_version='2023.12.31.987654', commit='d' * 40))
+ test('fork/yt-dlp@latest', UpdateInfo(
+ '2050.12.31', version='2050.12.31', requested_version='2050.12.31', commit='e' * 40))
+ test('fork/yt-dlp@pr0000', UpdateInfo(
+ 'pr0000', version='2023.11.11.000000', requested_version='2023.11.11.000000', commit='f' * 40))
+ test('fork/yt-dlp@pr1234', UpdateInfo(
+ 'pr1234', version='2023.12.31.555555', requested_version='2023.12.31.555555', commit='0' * 40))
+ test('fork/yt-dlp@pr9999', UpdateInfo(
+ 'pr9999', version=None, requested_version=None, commit='1' * 40))
+ test('fork/yt-dlp-satellite@pr987', UpdateInfo(
+ 'pr987', version=None, requested_version=None, commit='2' * 40))
+ test('yt-dlp/yt-dlp', None, current_version='2024.01.01')
+ test('stable', UpdateInfo(
+ '2023.12.31', version='2023.12.31', requested_version='2023.12.31', commit='b' * 40))
+ test('nightly', UpdateInfo(
+ '2023.12.31.123456', version='2023.12.31.123456', requested_version='2023.12.31.123456', commit='c' * 40))
+ test('master', UpdateInfo(
+ '2023.12.31.987654', version='2023.12.31.987654', requested_version='2023.12.31.987654', commit='d' * 40))
+ test('testing', None, current_commit='9' * 40)
+ test('testing', UpdateInfo('testing', commit='9' * 40))
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_utils.py b/test/test_utils.py
new file mode 100644
index 0000000..a3073f0
--- /dev/null
+++ b/test/test_utils.py
@@ -0,0 +1,2457 @@
+#!/usr/bin/env python3
+
+# Allow direct execution
+import os
+import re
+import sys
+import unittest
+import warnings
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+import contextlib
+import io
+import itertools
+import json
+import subprocess
+import xml.etree.ElementTree
+
+from yt_dlp.compat import (
+ compat_etree_fromstring,
+ compat_HTMLParseError,
+ compat_os_name,
+)
+from yt_dlp.utils import (
+ Config,
+ DateRange,
+ ExtractorError,
+ InAdvancePagedList,
+ LazyList,
+ OnDemandPagedList,
+ Popen,
+ age_restricted,
+ args_to_str,
+ base_url,
+ caesar,
+ clean_html,
+ clean_podcast_url,
+ cli_bool_option,
+ cli_option,
+ cli_valueless_option,
+ date_from_str,
+ datetime_from_str,
+ detect_exe_version,
+ determine_ext,
+ determine_file_encoding,
+ dfxp2srt,
+ dict_get,
+ encode_base_n,
+ encode_compat_str,
+ encodeFilename,
+ expand_path,
+ extract_attributes,
+ extract_basic_auth,
+ find_xpath_attr,
+ fix_xml_ampersands,
+ float_or_none,
+ format_bytes,
+ get_compatible_ext,
+ get_element_by_attribute,
+ get_element_by_class,
+ get_element_html_by_attribute,
+ get_element_html_by_class,
+ get_element_text_and_html_by_tag,
+ get_elements_by_attribute,
+ get_elements_by_class,
+ get_elements_html_by_attribute,
+ get_elements_html_by_class,
+ get_elements_text_and_html_by_attribute,
+ int_or_none,
+ intlist_to_bytes,
+ iri_to_uri,
+ is_html,
+ js_to_json,
+ limit_length,
+ locked_file,
+ lowercase_escape,
+ match_str,
+ merge_dicts,
+ mimetype2ext,
+ month_by_name,
+ multipart_encode,
+ ohdave_rsa_encrypt,
+ orderedSet,
+ parse_age_limit,
+ parse_bitrate,
+ parse_codecs,
+ parse_count,
+ parse_dfxp_time_expr,
+ parse_duration,
+ parse_filesize,
+ parse_iso8601,
+ parse_qs,
+ parse_resolution,
+ pkcs1pad,
+ prepend_extension,
+ read_batch_urls,
+ remove_end,
+ remove_quotes,
+ remove_start,
+ render_table,
+ replace_extension,
+ rot47,
+ sanitize_filename,
+ sanitize_path,
+ sanitize_url,
+ shell_quote,
+ smuggle_url,
+ str_or_none,
+ str_to_int,
+ strip_jsonp,
+ strip_or_none,
+ subtitles_filename,
+ timeconvert,
+ traverse_obj,
+ try_call,
+ unescapeHTML,
+ unified_strdate,
+ unified_timestamp,
+ unsmuggle_url,
+ update_url_query,
+ uppercase_escape,
+ url_basename,
+ url_or_none,
+ urlencode_postdata,
+ urljoin,
+ urshift,
+ variadic,
+ version_tuple,
+ xpath_attr,
+ xpath_element,
+ xpath_text,
+ xpath_with_ns,
+)
+from yt_dlp.utils.networking import (
+ HTTPHeaderDict,
+ escape_rfc3986,
+ normalize_url,
+ remove_dot_segments,
+)
+
+
+class TestUtil(unittest.TestCase):
+ def test_timeconvert(self):
+ self.assertTrue(timeconvert('') is None)
+ self.assertTrue(timeconvert('bougrg') is None)
+
+ def test_sanitize_filename(self):
+ self.assertEqual(sanitize_filename(''), '')
+ self.assertEqual(sanitize_filename('abc'), 'abc')
+ self.assertEqual(sanitize_filename('abc_d-e'), 'abc_d-e')
+
+ self.assertEqual(sanitize_filename('123'), '123')
+
+ self.assertEqual('abc⧸de', sanitize_filename('abc/de'))
+ self.assertFalse('/' in sanitize_filename('abc/de///'))
+
+ self.assertEqual('abc_de', sanitize_filename('abc/<>\\*|de', is_id=False))
+ self.assertEqual('xxx', sanitize_filename('xxx/<>\\*|', is_id=False))
+ self.assertEqual('yes no', sanitize_filename('yes? no', is_id=False))
+ self.assertEqual('this - that', sanitize_filename('this: that', is_id=False))
+
+ self.assertEqual(sanitize_filename('AT&T'), 'AT&T')
+ aumlaut = 'ä'
+ self.assertEqual(sanitize_filename(aumlaut), aumlaut)
+ tests = '\u043a\u0438\u0440\u0438\u043b\u043b\u0438\u0446\u0430'
+ self.assertEqual(sanitize_filename(tests), tests)
+
+ self.assertEqual(
+ sanitize_filename('New World record at 0:12:34'),
+ 'New World record at 0_12_34')
+
+ self.assertEqual(sanitize_filename('--gasdgf'), '--gasdgf')
+ self.assertEqual(sanitize_filename('--gasdgf', is_id=True), '--gasdgf')
+ self.assertEqual(sanitize_filename('--gasdgf', is_id=False), '_-gasdgf')
+ self.assertEqual(sanitize_filename('.gasdgf'), '.gasdgf')
+ self.assertEqual(sanitize_filename('.gasdgf', is_id=True), '.gasdgf')
+ self.assertEqual(sanitize_filename('.gasdgf', is_id=False), 'gasdgf')
+
+ forbidden = '"\0\\/'
+ for fc in forbidden:
+ for fbc in forbidden:
+ self.assertTrue(fbc not in sanitize_filename(fc))
+
+ def test_sanitize_filename_restricted(self):
+ self.assertEqual(sanitize_filename('abc', restricted=True), 'abc')
+ self.assertEqual(sanitize_filename('abc_d-e', restricted=True), 'abc_d-e')
+
+ self.assertEqual(sanitize_filename('123', restricted=True), '123')
+
+ self.assertEqual('abc_de', sanitize_filename('abc/de', restricted=True))
+ self.assertFalse('/' in sanitize_filename('abc/de///', restricted=True))
+
+ self.assertEqual('abc_de', sanitize_filename('abc/<>\\*|de', restricted=True))
+ self.assertEqual('xxx', sanitize_filename('xxx/<>\\*|', restricted=True))
+ self.assertEqual('yes_no', sanitize_filename('yes? no', restricted=True))
+ self.assertEqual('this_-_that', sanitize_filename('this: that', restricted=True))
+
+ tests = 'aäb\u4e2d\u56fd\u7684c'
+ self.assertEqual(sanitize_filename(tests, restricted=True), 'aab_c')
+ self.assertTrue(sanitize_filename('\xf6', restricted=True) != '') # No empty filename
+
+ forbidden = '"\0\\/&!: \'\t\n()[]{}$;`^,#'
+ for fc in forbidden:
+ for fbc in forbidden:
+ self.assertTrue(fbc not in sanitize_filename(fc, restricted=True))
+
+ # Handle a common case more neatly
+ self.assertEqual(sanitize_filename('\u5927\u58f0\u5e26 - Song', restricted=True), 'Song')
+ self.assertEqual(sanitize_filename('\u603b\u7edf: Speech', restricted=True), 'Speech')
+ # .. but make sure the file name is never empty
+ self.assertTrue(sanitize_filename('-', restricted=True) != '')
+ self.assertTrue(sanitize_filename(':', restricted=True) != '')
+
+ self.assertEqual(sanitize_filename(
+ 'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ', restricted=True),
+ 'AAAAAAAECEEEEIIIIDNOOOOOOOOEUUUUUYTHssaaaaaaaeceeeeiiiionooooooooeuuuuuythy')
+
+ def test_sanitize_ids(self):
+ self.assertEqual(sanitize_filename('_n_cd26wFpw', is_id=True), '_n_cd26wFpw')
+ self.assertEqual(sanitize_filename('_BD_eEpuzXw', is_id=True), '_BD_eEpuzXw')
+ self.assertEqual(sanitize_filename('N0Y__7-UOdI', is_id=True), 'N0Y__7-UOdI')
+
+ def test_sanitize_path(self):
+ if sys.platform != 'win32':
+ return
+
+ self.assertEqual(sanitize_path('abc'), 'abc')
+ self.assertEqual(sanitize_path('abc/def'), 'abc\\def')
+ self.assertEqual(sanitize_path('abc\\def'), 'abc\\def')
+ self.assertEqual(sanitize_path('abc|def'), 'abc#def')
+ self.assertEqual(sanitize_path('<>:"|?*'), '#######')
+ self.assertEqual(sanitize_path('C:/abc/def'), 'C:\\abc\\def')
+ self.assertEqual(sanitize_path('C?:/abc/def'), 'C##\\abc\\def')
+
+ self.assertEqual(sanitize_path('\\\\?\\UNC\\ComputerName\\abc'), '\\\\?\\UNC\\ComputerName\\abc')
+ self.assertEqual(sanitize_path('\\\\?\\UNC/ComputerName/abc'), '\\\\?\\UNC\\ComputerName\\abc')
+
+ self.assertEqual(sanitize_path('\\\\?\\C:\\abc'), '\\\\?\\C:\\abc')
+ self.assertEqual(sanitize_path('\\\\?\\C:/abc'), '\\\\?\\C:\\abc')
+ self.assertEqual(sanitize_path('\\\\?\\C:\\ab?c\\de:f'), '\\\\?\\C:\\ab#c\\de#f')
+ self.assertEqual(sanitize_path('\\\\?\\C:\\abc'), '\\\\?\\C:\\abc')
+
+ self.assertEqual(
+ sanitize_path('youtube/%(uploader)s/%(autonumber)s-%(title)s-%(upload_date)s.%(ext)s'),
+ 'youtube\\%(uploader)s\\%(autonumber)s-%(title)s-%(upload_date)s.%(ext)s')
+
+ self.assertEqual(
+ sanitize_path('youtube/TheWreckingYard ./00001-Not bad, Especially for Free! (1987 Yamaha 700)-20141116.mp4.part'),
+ 'youtube\\TheWreckingYard #\\00001-Not bad, Especially for Free! (1987 Yamaha 700)-20141116.mp4.part')
+ self.assertEqual(sanitize_path('abc/def...'), 'abc\\def..#')
+ self.assertEqual(sanitize_path('abc.../def'), 'abc..#\\def')
+ self.assertEqual(sanitize_path('abc.../def...'), 'abc..#\\def..#')
+
+ self.assertEqual(sanitize_path('../abc'), '..\\abc')
+ self.assertEqual(sanitize_path('../../abc'), '..\\..\\abc')
+ self.assertEqual(sanitize_path('./abc'), 'abc')
+ self.assertEqual(sanitize_path('./../abc'), '..\\abc')
+
+ def test_sanitize_url(self):
+ self.assertEqual(sanitize_url('//foo.bar'), 'http://foo.bar')
+ self.assertEqual(sanitize_url('httpss://foo.bar'), 'https://foo.bar')
+ self.assertEqual(sanitize_url('rmtps://foo.bar'), 'rtmps://foo.bar')
+ self.assertEqual(sanitize_url('https://foo.bar'), 'https://foo.bar')
+ self.assertEqual(sanitize_url('foo bar'), 'foo bar')
+
+ def test_expand_path(self):
+ def env(var):
+ return f'%{var}%' if sys.platform == 'win32' else f'${var}'
+
+ os.environ['yt_dlp_EXPATH_PATH'] = 'expanded'
+ self.assertEqual(expand_path(env('yt_dlp_EXPATH_PATH')), 'expanded')
+
+ old_home = os.environ.get('HOME')
+ test_str = R'C:\Documents and Settings\тест\Application Data'
+ try:
+ os.environ['HOME'] = test_str
+ self.assertEqual(expand_path(env('HOME')), os.getenv('HOME'))
+ self.assertEqual(expand_path('~'), os.getenv('HOME'))
+ self.assertEqual(
+ expand_path('~/%s' % env('yt_dlp_EXPATH_PATH')),
+ '%s/expanded' % os.getenv('HOME'))
+ finally:
+ os.environ['HOME'] = old_home or ''
+
+ def test_prepend_extension(self):
+ self.assertEqual(prepend_extension('abc.ext', 'temp'), 'abc.temp.ext')
+ self.assertEqual(prepend_extension('abc.ext', 'temp', 'ext'), 'abc.temp.ext')
+ self.assertEqual(prepend_extension('abc.unexpected_ext', 'temp', 'ext'), 'abc.unexpected_ext.temp')
+ self.assertEqual(prepend_extension('abc', 'temp'), 'abc.temp')
+ self.assertEqual(prepend_extension('.abc', 'temp'), '.abc.temp')
+ self.assertEqual(prepend_extension('.abc.ext', 'temp'), '.abc.temp.ext')
+
+ def test_replace_extension(self):
+ self.assertEqual(replace_extension('abc.ext', 'temp'), 'abc.temp')
+ self.assertEqual(replace_extension('abc.ext', 'temp', 'ext'), 'abc.temp')
+ self.assertEqual(replace_extension('abc.unexpected_ext', 'temp', 'ext'), 'abc.unexpected_ext.temp')
+ self.assertEqual(replace_extension('abc', 'temp'), 'abc.temp')
+ self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp')
+ self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp')
+
+ def test_subtitles_filename(self):
+ self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt'), 'abc.en.vtt')
+ self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt', 'ext'), 'abc.en.vtt')
+ self.assertEqual(subtitles_filename('abc.unexpected_ext', 'en', 'vtt', 'ext'), 'abc.unexpected_ext.en.vtt')
+
+ def test_remove_start(self):
+ self.assertEqual(remove_start(None, 'A - '), None)
+ self.assertEqual(remove_start('A - B', 'A - '), 'B')
+ self.assertEqual(remove_start('B - A', 'A - '), 'B - A')
+
+ def test_remove_end(self):
+ self.assertEqual(remove_end(None, ' - B'), None)
+ self.assertEqual(remove_end('A - B', ' - B'), 'A')
+ self.assertEqual(remove_end('B - A', ' - B'), 'B - A')
+
+ def test_remove_quotes(self):
+ self.assertEqual(remove_quotes(None), None)
+ self.assertEqual(remove_quotes('"'), '"')
+ self.assertEqual(remove_quotes("'"), "'")
+ self.assertEqual(remove_quotes(';'), ';')
+ self.assertEqual(remove_quotes('";'), '";')
+ self.assertEqual(remove_quotes('""'), '')
+ self.assertEqual(remove_quotes('";"'), ';')
+
+ def test_ordered_set(self):
+ self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7])
+ self.assertEqual(orderedSet([]), [])
+ self.assertEqual(orderedSet([1]), [1])
+ # keep the list ordered
+ self.assertEqual(orderedSet([135, 1, 1, 1]), [135, 1])
+
+ def test_unescape_html(self):
+ self.assertEqual(unescapeHTML('%20;'), '%20;')
+ self.assertEqual(unescapeHTML('&#x2F;'), '/')
+ self.assertEqual(unescapeHTML('&#47;'), '/')
+ self.assertEqual(unescapeHTML('&eacute;'), 'é')
+ self.assertEqual(unescapeHTML('&#2013266066;'), '&#2013266066;')
+ self.assertEqual(unescapeHTML('&a&quot;'), '&a"')
+ # HTML5 entities
+ self.assertEqual(unescapeHTML('&period;&apos;'), '.\'')
+
+ def test_date_from_str(self):
+ self.assertEqual(date_from_str('yesterday'), date_from_str('now-1day'))
+ self.assertEqual(date_from_str('now+7day'), date_from_str('now+1week'))
+ self.assertEqual(date_from_str('now+14day'), date_from_str('now+2week'))
+ self.assertEqual(date_from_str('20200229+365day'), date_from_str('20200229+1year'))
+ self.assertEqual(date_from_str('20210131+28day'), date_from_str('20210131+1month'))
+
+ def test_datetime_from_str(self):
+ self.assertEqual(datetime_from_str('yesterday', precision='day'), datetime_from_str('now-1day', precision='auto'))
+ self.assertEqual(datetime_from_str('now+7day', precision='day'), datetime_from_str('now+1week', precision='auto'))
+ self.assertEqual(datetime_from_str('now+14day', precision='day'), datetime_from_str('now+2week', precision='auto'))
+ self.assertEqual(datetime_from_str('20200229+365day', precision='day'), datetime_from_str('20200229+1year', precision='auto'))
+ self.assertEqual(datetime_from_str('20210131+28day', precision='day'), datetime_from_str('20210131+1month', precision='auto'))
+ self.assertEqual(datetime_from_str('20210131+59day', precision='day'), datetime_from_str('20210131+2month', precision='auto'))
+ self.assertEqual(datetime_from_str('now+1day', precision='hour'), datetime_from_str('now+24hours', precision='auto'))
+ self.assertEqual(datetime_from_str('now+23hours', precision='hour'), datetime_from_str('now+23hours', precision='auto'))
+
+ def test_daterange(self):
+ _20century = DateRange("19000101", "20000101")
+ self.assertFalse("17890714" in _20century)
+ _ac = DateRange("00010101")
+ self.assertTrue("19690721" in _ac)
+ _firstmilenium = DateRange(end="10000101")
+ self.assertTrue("07110427" in _firstmilenium)
+
+ def test_unified_dates(self):
+ self.assertEqual(unified_strdate('December 21, 2010'), '20101221')
+ self.assertEqual(unified_strdate('8/7/2009'), '20090708')
+ self.assertEqual(unified_strdate('Dec 14, 2012'), '20121214')
+ self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011')
+ self.assertEqual(unified_strdate('1968 12 10'), '19681210')
+ self.assertEqual(unified_strdate('1968-12-10'), '19681210')
+ self.assertEqual(unified_strdate('31-07-2022 20:00'), '20220731')
+ self.assertEqual(unified_strdate('28/01/2014 21:00:00 +0100'), '20140128')
+ self.assertEqual(
+ unified_strdate('11/26/2014 11:30:00 AM PST', day_first=False),
+ '20141126')
+ self.assertEqual(
+ unified_strdate('2/2/2015 6:47:40 PM', day_first=False),
+ '20150202')
+ self.assertEqual(unified_strdate('Feb 14th 2016 5:45PM'), '20160214')
+ self.assertEqual(unified_strdate('25-09-2014'), '20140925')
+ self.assertEqual(unified_strdate('27.02.2016 17:30'), '20160227')
+ self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None)
+ self.assertEqual(unified_strdate('Feb 7, 2016 at 6:35 pm'), '20160207')
+ self.assertEqual(unified_strdate('July 15th, 2013'), '20130715')
+ self.assertEqual(unified_strdate('September 1st, 2013'), '20130901')
+ self.assertEqual(unified_strdate('Sep 2nd, 2013'), '20130902')
+ self.assertEqual(unified_strdate('November 3rd, 2019'), '20191103')
+ self.assertEqual(unified_strdate('October 23rd, 2005'), '20051023')
+
+ def test_unified_timestamps(self):
+ self.assertEqual(unified_timestamp('December 21, 2010'), 1292889600)
+ self.assertEqual(unified_timestamp('8/7/2009'), 1247011200)
+ self.assertEqual(unified_timestamp('Dec 14, 2012'), 1355443200)
+ self.assertEqual(unified_timestamp('2012/10/11 01:56:38 +0000'), 1349920598)
+ self.assertEqual(unified_timestamp('1968 12 10'), -33436800)
+ self.assertEqual(unified_timestamp('1968-12-10'), -33436800)
+ self.assertEqual(unified_timestamp('28/01/2014 21:00:00 +0100'), 1390939200)
+ self.assertEqual(
+ unified_timestamp('11/26/2014 11:30:00 AM PST', day_first=False),
+ 1417001400)
+ self.assertEqual(
+ unified_timestamp('2/2/2015 6:47:40 PM', day_first=False),
+ 1422902860)
+ self.assertEqual(unified_timestamp('Feb 14th 2016 5:45PM'), 1455471900)
+ self.assertEqual(unified_timestamp('25-09-2014'), 1411603200)
+ self.assertEqual(unified_timestamp('27.02.2016 17:30'), 1456594200)
+ self.assertEqual(unified_timestamp('UNKNOWN DATE FORMAT'), None)
+ self.assertEqual(unified_timestamp('May 16, 2016 11:15 PM'), 1463440500)
+ self.assertEqual(unified_timestamp('Feb 7, 2016 at 6:35 pm'), 1454870100)
+ self.assertEqual(unified_timestamp('2017-03-30T17:52:41Q'), 1490896361)
+ self.assertEqual(unified_timestamp('Sep 11, 2013 | 5:49 AM'), 1378878540)
+ self.assertEqual(unified_timestamp('December 15, 2017 at 7:49 am'), 1513324140)
+ self.assertEqual(unified_timestamp('2018-03-14T08:32:43.1493874+00:00'), 1521016363)
+
+ self.assertEqual(unified_timestamp('December 31 1969 20:00:01 EDT'), 1)
+ self.assertEqual(unified_timestamp('Wednesday 31 December 1969 18:01:26 MDT'), 86)
+ self.assertEqual(unified_timestamp('12/31/1969 20:01:18 EDT', False), 78)
+
+ def test_determine_ext(self):
+ self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
+ self.assertEqual(determine_ext('http://example.com/foo/bar/?download', None), None)
+ self.assertEqual(determine_ext('http://example.com/foo/bar.nonext/?download', None), None)
+ self.assertEqual(determine_ext('http://example.com/foo/bar/mp4?download', None), None)
+ self.assertEqual(determine_ext('http://example.com/foo/bar.m3u8//?download'), 'm3u8')
+ self.assertEqual(determine_ext('foobar', None), None)
+
+ def test_find_xpath_attr(self):
+ testxml = '''<root>
+ <node/>
+ <node x="a"/>
+ <node x="a" y="c" />
+ <node x="b" y="d" />
+ <node x="" />
+ </root>'''
+ doc = compat_etree_fromstring(testxml)
+
+ self.assertEqual(find_xpath_attr(doc, './/fourohfour', 'n'), None)
+ self.assertEqual(find_xpath_attr(doc, './/fourohfour', 'n', 'v'), None)
+ self.assertEqual(find_xpath_attr(doc, './/node', 'n'), None)
+ self.assertEqual(find_xpath_attr(doc, './/node', 'n', 'v'), None)
+ self.assertEqual(find_xpath_attr(doc, './/node', 'x'), doc[1])
+ self.assertEqual(find_xpath_attr(doc, './/node', 'x', 'a'), doc[1])
+ self.assertEqual(find_xpath_attr(doc, './/node', 'x', 'b'), doc[3])
+ self.assertEqual(find_xpath_attr(doc, './/node', 'y'), doc[2])
+ self.assertEqual(find_xpath_attr(doc, './/node', 'y', 'c'), doc[2])
+ self.assertEqual(find_xpath_attr(doc, './/node', 'y', 'd'), doc[3])
+ self.assertEqual(find_xpath_attr(doc, './/node', 'x', ''), doc[4])
+
+ def test_xpath_with_ns(self):
+ testxml = '''<root xmlns:media="http://example.com/">
+ <media:song>
+ <media:author>The Author</media:author>
+ <url>http://server.com/download.mp3</url>
+ </media:song>
+ </root>'''
+ doc = compat_etree_fromstring(testxml)
+ find = lambda p: doc.find(xpath_with_ns(p, {'media': 'http://example.com/'}))
+ self.assertTrue(find('media:song') is not None)
+ self.assertEqual(find('media:song/media:author').text, 'The Author')
+ self.assertEqual(find('media:song/url').text, 'http://server.com/download.mp3')
+
+ def test_xpath_element(self):
+ doc = xml.etree.ElementTree.Element('root')
+ div = xml.etree.ElementTree.SubElement(doc, 'div')
+ p = xml.etree.ElementTree.SubElement(div, 'p')
+ p.text = 'Foo'
+ self.assertEqual(xpath_element(doc, 'div/p'), p)
+ self.assertEqual(xpath_element(doc, ['div/p']), p)
+ self.assertEqual(xpath_element(doc, ['div/bar', 'div/p']), p)
+ self.assertEqual(xpath_element(doc, 'div/bar', default='default'), 'default')
+ self.assertEqual(xpath_element(doc, ['div/bar'], default='default'), 'default')
+ self.assertTrue(xpath_element(doc, 'div/bar') is None)
+ self.assertTrue(xpath_element(doc, ['div/bar']) is None)
+ self.assertTrue(xpath_element(doc, ['div/bar'], 'div/baz') is None)
+ self.assertRaises(ExtractorError, xpath_element, doc, 'div/bar', fatal=True)
+ self.assertRaises(ExtractorError, xpath_element, doc, ['div/bar'], fatal=True)
+ self.assertRaises(ExtractorError, xpath_element, doc, ['div/bar', 'div/baz'], fatal=True)
+
+ def test_xpath_text(self):
+ testxml = '''<root>
+ <div>
+ <p>Foo</p>
+ </div>
+ </root>'''
+ doc = compat_etree_fromstring(testxml)
+ self.assertEqual(xpath_text(doc, 'div/p'), 'Foo')
+ self.assertEqual(xpath_text(doc, 'div/bar', default='default'), 'default')
+ self.assertTrue(xpath_text(doc, 'div/bar') is None)
+ self.assertRaises(ExtractorError, xpath_text, doc, 'div/bar', fatal=True)
+
+ def test_xpath_attr(self):
+ testxml = '''<root>
+ <div>
+ <p x="a">Foo</p>
+ </div>
+ </root>'''
+ doc = compat_etree_fromstring(testxml)
+ self.assertEqual(xpath_attr(doc, 'div/p', 'x'), 'a')
+ self.assertEqual(xpath_attr(doc, 'div/bar', 'x'), None)
+ self.assertEqual(xpath_attr(doc, 'div/p', 'y'), None)
+ self.assertEqual(xpath_attr(doc, 'div/bar', 'x', default='default'), 'default')
+ self.assertEqual(xpath_attr(doc, 'div/p', 'y', default='default'), 'default')
+ self.assertRaises(ExtractorError, xpath_attr, doc, 'div/bar', 'x', fatal=True)
+ self.assertRaises(ExtractorError, xpath_attr, doc, 'div/p', 'y', fatal=True)
+
+ def test_smuggle_url(self):
+ data = {"ö": "ö", "abc": [3]}
+ url = 'https://foo.bar/baz?x=y#a'
+ smug_url = smuggle_url(url, data)
+ unsmug_url, unsmug_data = unsmuggle_url(smug_url)
+ self.assertEqual(url, unsmug_url)
+ self.assertEqual(data, unsmug_data)
+
+ res_url, res_data = unsmuggle_url(url)
+ self.assertEqual(res_url, url)
+ self.assertEqual(res_data, None)
+
+ smug_url = smuggle_url(url, {'a': 'b'})
+ smug_smug_url = smuggle_url(smug_url, {'c': 'd'})
+ res_url, res_data = unsmuggle_url(smug_smug_url)
+ self.assertEqual(res_url, url)
+ self.assertEqual(res_data, {'a': 'b', 'c': 'd'})
+
+ def test_shell_quote(self):
+ args = ['ffmpeg', '-i', encodeFilename('ñ€ß\'.mp4')]
+ self.assertEqual(
+ shell_quote(args),
+ """ffmpeg -i 'ñ€ß'"'"'.mp4'""" if compat_os_name != 'nt' else '''ffmpeg -i "ñ€ß'.mp4"''')
+
+ def test_float_or_none(self):
+ self.assertEqual(float_or_none('42.42'), 42.42)
+ self.assertEqual(float_or_none('42'), 42.0)
+ self.assertEqual(float_or_none(''), None)
+ self.assertEqual(float_or_none(None), None)
+ self.assertEqual(float_or_none([]), None)
+ self.assertEqual(float_or_none(set()), None)
+
+ def test_int_or_none(self):
+ self.assertEqual(int_or_none('42'), 42)
+ self.assertEqual(int_or_none(''), None)
+ self.assertEqual(int_or_none(None), None)
+ self.assertEqual(int_or_none([]), None)
+ self.assertEqual(int_or_none(set()), None)
+
+ def test_str_to_int(self):
+ self.assertEqual(str_to_int('123,456'), 123456)
+ self.assertEqual(str_to_int('123.456'), 123456)
+ self.assertEqual(str_to_int(523), 523)
+ self.assertEqual(str_to_int('noninteger'), None)
+ self.assertEqual(str_to_int([]), None)
+
+ def test_url_basename(self):
+ self.assertEqual(url_basename('http://foo.de/'), '')
+ self.assertEqual(url_basename('http://foo.de/bar/baz'), 'baz')
+ self.assertEqual(url_basename('http://foo.de/bar/baz?x=y'), 'baz')
+ self.assertEqual(url_basename('http://foo.de/bar/baz#x=y'), 'baz')
+ self.assertEqual(url_basename('http://foo.de/bar/baz/'), 'baz')
+ self.assertEqual(
+ url_basename('http://media.w3.org/2010/05/sintel/trailer.mp4'),
+ 'trailer.mp4')
+
+ def test_base_url(self):
+ self.assertEqual(base_url('http://foo.de/'), 'http://foo.de/')
+ self.assertEqual(base_url('http://foo.de/bar'), 'http://foo.de/')
+ self.assertEqual(base_url('http://foo.de/bar/'), 'http://foo.de/bar/')
+ self.assertEqual(base_url('http://foo.de/bar/baz'), 'http://foo.de/bar/')
+ self.assertEqual(base_url('http://foo.de/bar/baz?x=z/x/c'), 'http://foo.de/bar/')
+ self.assertEqual(base_url('http://foo.de/bar/baz&x=z&w=y/x/c'), 'http://foo.de/bar/baz&x=z&w=y/x/')
+
+ def test_urljoin(self):
+ self.assertEqual(urljoin('http://foo.de/', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
+ self.assertEqual(urljoin(b'http://foo.de/', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
+ self.assertEqual(urljoin('http://foo.de/', b'/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
+ self.assertEqual(urljoin(b'http://foo.de/', b'/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
+ self.assertEqual(urljoin('//foo.de/', '/a/b/c.txt'), '//foo.de/a/b/c.txt')
+ self.assertEqual(urljoin('http://foo.de/', 'a/b/c.txt'), 'http://foo.de/a/b/c.txt')
+ self.assertEqual(urljoin('http://foo.de', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
+ self.assertEqual(urljoin('http://foo.de', 'a/b/c.txt'), 'http://foo.de/a/b/c.txt')
+ self.assertEqual(urljoin('http://foo.de/', 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
+ self.assertEqual(urljoin('http://foo.de/', '//foo.de/a/b/c.txt'), '//foo.de/a/b/c.txt')
+ self.assertEqual(urljoin(None, 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
+ self.assertEqual(urljoin(None, '//foo.de/a/b/c.txt'), '//foo.de/a/b/c.txt')
+ self.assertEqual(urljoin('', 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
+ self.assertEqual(urljoin(['foobar'], 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
+ self.assertEqual(urljoin('http://foo.de/', None), None)
+ self.assertEqual(urljoin('http://foo.de/', ''), None)
+ self.assertEqual(urljoin('http://foo.de/', ['foobar']), None)
+ self.assertEqual(urljoin('http://foo.de/a/b/c.txt', '.././../d.txt'), 'http://foo.de/d.txt')
+ self.assertEqual(urljoin('http://foo.de/a/b/c.txt', 'rtmp://foo.de'), 'rtmp://foo.de')
+ self.assertEqual(urljoin(None, 'rtmp://foo.de'), 'rtmp://foo.de')
+
+ def test_url_or_none(self):
+ self.assertEqual(url_or_none(None), None)
+ self.assertEqual(url_or_none(''), None)
+ self.assertEqual(url_or_none('foo'), None)
+ self.assertEqual(url_or_none('http://foo.de'), 'http://foo.de')
+ self.assertEqual(url_or_none('https://foo.de'), 'https://foo.de')
+ self.assertEqual(url_or_none('http$://foo.de'), None)
+ self.assertEqual(url_or_none('http://foo.de'), 'http://foo.de')
+ self.assertEqual(url_or_none('//foo.de'), '//foo.de')
+ self.assertEqual(url_or_none('s3://foo.de'), None)
+ self.assertEqual(url_or_none('rtmpte://foo.de'), 'rtmpte://foo.de')
+ self.assertEqual(url_or_none('mms://foo.de'), 'mms://foo.de')
+ self.assertEqual(url_or_none('rtspu://foo.de'), 'rtspu://foo.de')
+ self.assertEqual(url_or_none('ftps://foo.de'), 'ftps://foo.de')
+
+ def test_parse_age_limit(self):
+ self.assertEqual(parse_age_limit(None), None)
+ self.assertEqual(parse_age_limit(False), None)
+ self.assertEqual(parse_age_limit('invalid'), None)
+ self.assertEqual(parse_age_limit(0), 0)
+ self.assertEqual(parse_age_limit(18), 18)
+ self.assertEqual(parse_age_limit(21), 21)
+ self.assertEqual(parse_age_limit(22), None)
+ self.assertEqual(parse_age_limit('18'), 18)
+ self.assertEqual(parse_age_limit('18+'), 18)
+ self.assertEqual(parse_age_limit('PG-13'), 13)
+ self.assertEqual(parse_age_limit('TV-14'), 14)
+ self.assertEqual(parse_age_limit('TV-MA'), 17)
+ self.assertEqual(parse_age_limit('TV14'), 14)
+ self.assertEqual(parse_age_limit('TV_G'), 0)
+
+ def test_parse_duration(self):
+ self.assertEqual(parse_duration(None), None)
+ self.assertEqual(parse_duration(False), None)
+ self.assertEqual(parse_duration('invalid'), None)
+ self.assertEqual(parse_duration('1'), 1)
+ self.assertEqual(parse_duration('1337:12'), 80232)
+ self.assertEqual(parse_duration('9:12:43'), 33163)
+ self.assertEqual(parse_duration('12:00'), 720)
+ self.assertEqual(parse_duration('00:01:01'), 61)
+ self.assertEqual(parse_duration('x:y'), None)
+ self.assertEqual(parse_duration('3h11m53s'), 11513)
+ self.assertEqual(parse_duration('3h 11m 53s'), 11513)
+ self.assertEqual(parse_duration('3 hours 11 minutes 53 seconds'), 11513)
+ self.assertEqual(parse_duration('3 hours 11 mins 53 secs'), 11513)
+ self.assertEqual(parse_duration('3 hours, 11 minutes, 53 seconds'), 11513)
+ self.assertEqual(parse_duration('3 hours, 11 mins, 53 secs'), 11513)
+ self.assertEqual(parse_duration('62m45s'), 3765)
+ self.assertEqual(parse_duration('6m59s'), 419)
+ self.assertEqual(parse_duration('49s'), 49)
+ self.assertEqual(parse_duration('0h0m0s'), 0)
+ self.assertEqual(parse_duration('0m0s'), 0)
+ self.assertEqual(parse_duration('0s'), 0)
+ self.assertEqual(parse_duration('01:02:03.05'), 3723.05)
+ self.assertEqual(parse_duration('T30M38S'), 1838)
+ self.assertEqual(parse_duration('5 s'), 5)
+ self.assertEqual(parse_duration('3 min'), 180)
+ self.assertEqual(parse_duration('2.5 hours'), 9000)
+ self.assertEqual(parse_duration('02:03:04'), 7384)
+ self.assertEqual(parse_duration('01:02:03:04'), 93784)
+ self.assertEqual(parse_duration('1 hour 3 minutes'), 3780)
+ self.assertEqual(parse_duration('87 Min.'), 5220)
+ self.assertEqual(parse_duration('PT1H0.040S'), 3600.04)
+ self.assertEqual(parse_duration('PT00H03M30SZ'), 210)
+ self.assertEqual(parse_duration('P0Y0M0DT0H4M20.880S'), 260.88)
+ self.assertEqual(parse_duration('01:02:03:050'), 3723.05)
+ self.assertEqual(parse_duration('103:050'), 103.05)
+ self.assertEqual(parse_duration('1HR 3MIN'), 3780)
+ self.assertEqual(parse_duration('2hrs 3mins'), 7380)
+
+ def test_fix_xml_ampersands(self):
+ self.assertEqual(
+ fix_xml_ampersands('"&x=y&z=a'), '"&amp;x=y&amp;z=a')
+ self.assertEqual(
+ fix_xml_ampersands('"&amp;x=y&wrong;&z=a'),
+ '"&amp;x=y&amp;wrong;&amp;z=a')
+ self.assertEqual(
+ fix_xml_ampersands('&amp;&apos;&gt;&lt;&quot;'),
+ '&amp;&apos;&gt;&lt;&quot;')
+ self.assertEqual(
+ fix_xml_ampersands('&#1234;&#x1abC;'), '&#1234;&#x1abC;')
+ self.assertEqual(fix_xml_ampersands('&#&#'), '&amp;#&amp;#')
+
+ def test_paged_list(self):
+ def testPL(size, pagesize, sliceargs, expected):
+ def get_page(pagenum):
+ firstid = pagenum * pagesize
+ upto = min(size, pagenum * pagesize + pagesize)
+ yield from range(firstid, upto)
+
+ pl = OnDemandPagedList(get_page, pagesize)
+ got = pl.getslice(*sliceargs)
+ self.assertEqual(got, expected)
+
+ iapl = InAdvancePagedList(get_page, size // pagesize + 1, pagesize)
+ got = iapl.getslice(*sliceargs)
+ self.assertEqual(got, expected)
+
+ testPL(5, 2, (), [0, 1, 2, 3, 4])
+ testPL(5, 2, (1,), [1, 2, 3, 4])
+ testPL(5, 2, (2,), [2, 3, 4])
+ testPL(5, 2, (4,), [4])
+ testPL(5, 2, (0, 3), [0, 1, 2])
+ testPL(5, 2, (1, 4), [1, 2, 3])
+ testPL(5, 2, (2, 99), [2, 3, 4])
+ testPL(5, 2, (20, 99), [])
+
+ def test_read_batch_urls(self):
+ f = io.StringIO('''\xef\xbb\xbf foo
+ bar\r
+ baz
+ # More after this line\r
+ ; or after this
+ bam''')
+ self.assertEqual(read_batch_urls(f), ['foo', 'bar', 'baz', 'bam'])
+
+ def test_urlencode_postdata(self):
+ data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'})
+ self.assertTrue(isinstance(data, bytes))
+
+ def test_update_url_query(self):
+ self.assertEqual(parse_qs(update_url_query(
+ 'http://example.com/path', {'quality': ['HD'], 'format': ['mp4']})),
+ parse_qs('http://example.com/path?quality=HD&format=mp4'))
+ self.assertEqual(parse_qs(update_url_query(
+ 'http://example.com/path', {'system': ['LINUX', 'WINDOWS']})),
+ parse_qs('http://example.com/path?system=LINUX&system=WINDOWS'))
+ self.assertEqual(parse_qs(update_url_query(
+ 'http://example.com/path', {'fields': 'id,formats,subtitles'})),
+ parse_qs('http://example.com/path?fields=id,formats,subtitles'))
+ self.assertEqual(parse_qs(update_url_query(
+ 'http://example.com/path', {'fields': ('id,formats,subtitles', 'thumbnails')})),
+ parse_qs('http://example.com/path?fields=id,formats,subtitles&fields=thumbnails'))
+ self.assertEqual(parse_qs(update_url_query(
+ 'http://example.com/path?manifest=f4m', {'manifest': []})),
+ parse_qs('http://example.com/path'))
+ self.assertEqual(parse_qs(update_url_query(
+ 'http://example.com/path?system=LINUX&system=WINDOWS', {'system': 'LINUX'})),
+ parse_qs('http://example.com/path?system=LINUX'))
+ self.assertEqual(parse_qs(update_url_query(
+ 'http://example.com/path', {'fields': b'id,formats,subtitles'})),
+ parse_qs('http://example.com/path?fields=id,formats,subtitles'))
+ self.assertEqual(parse_qs(update_url_query(
+ 'http://example.com/path', {'width': 1080, 'height': 720})),
+ parse_qs('http://example.com/path?width=1080&height=720'))
+ self.assertEqual(parse_qs(update_url_query(
+ 'http://example.com/path', {'bitrate': 5020.43})),
+ parse_qs('http://example.com/path?bitrate=5020.43'))
+ self.assertEqual(parse_qs(update_url_query(
+ 'http://example.com/path', {'test': '第二行тест'})),
+ parse_qs('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82'))
+
+ def test_multipart_encode(self):
+ self.assertEqual(
+ multipart_encode({b'field': b'value'}, boundary='AAAAAA')[0],
+ b'--AAAAAA\r\nContent-Disposition: form-data; name="field"\r\n\r\nvalue\r\n--AAAAAA--\r\n')
+ self.assertEqual(
+ multipart_encode({'欄位'.encode(): '值'.encode()}, boundary='AAAAAA')[0],
+ b'--AAAAAA\r\nContent-Disposition: form-data; name="\xe6\xac\x84\xe4\xbd\x8d"\r\n\r\n\xe5\x80\xbc\r\n--AAAAAA--\r\n')
+ self.assertRaises(
+ ValueError, multipart_encode, {b'field': b'value'}, boundary='value')
+
+ def test_dict_get(self):
+ FALSE_VALUES = {
+ 'none': None,
+ 'false': False,
+ 'zero': 0,
+ 'empty_string': '',
+ 'empty_list': [],
+ }
+ d = FALSE_VALUES.copy()
+ d['a'] = 42
+ self.assertEqual(dict_get(d, 'a'), 42)
+ self.assertEqual(dict_get(d, 'b'), None)
+ self.assertEqual(dict_get(d, 'b', 42), 42)
+ self.assertEqual(dict_get(d, ('a', )), 42)
+ self.assertEqual(dict_get(d, ('b', 'a', )), 42)
+ self.assertEqual(dict_get(d, ('b', 'c', 'a', 'd', )), 42)
+ self.assertEqual(dict_get(d, ('b', 'c', )), None)
+ self.assertEqual(dict_get(d, ('b', 'c', ), 42), 42)
+ for key, false_value in FALSE_VALUES.items():
+ self.assertEqual(dict_get(d, ('b', 'c', key, )), None)
+ self.assertEqual(dict_get(d, ('b', 'c', key, ), skip_false_values=False), false_value)
+
+ def test_merge_dicts(self):
+ self.assertEqual(merge_dicts({'a': 1}, {'b': 2}), {'a': 1, 'b': 2})
+ self.assertEqual(merge_dicts({'a': 1}, {'a': 2}), {'a': 1})
+ self.assertEqual(merge_dicts({'a': 1}, {'a': None}), {'a': 1})
+ self.assertEqual(merge_dicts({'a': 1}, {'a': ''}), {'a': 1})
+ self.assertEqual(merge_dicts({'a': 1}, {}), {'a': 1})
+ self.assertEqual(merge_dicts({'a': None}, {'a': 1}), {'a': 1})
+ self.assertEqual(merge_dicts({'a': ''}, {'a': 1}), {'a': ''})
+ self.assertEqual(merge_dicts({'a': ''}, {'a': 'abc'}), {'a': 'abc'})
+ self.assertEqual(merge_dicts({'a': None}, {'a': ''}, {'a': 'abc'}), {'a': 'abc'})
+
+ def test_encode_compat_str(self):
+ self.assertEqual(encode_compat_str(b'\xd1\x82\xd0\xb5\xd1\x81\xd1\x82', 'utf-8'), 'тест')
+ self.assertEqual(encode_compat_str('тест', 'utf-8'), 'тест')
+
+ def test_parse_iso8601(self):
+ self.assertEqual(parse_iso8601('2014-03-23T23:04:26+0100'), 1395612266)
+ self.assertEqual(parse_iso8601('2014-03-23T22:04:26+0000'), 1395612266)
+ self.assertEqual(parse_iso8601('2014-03-23T22:04:26Z'), 1395612266)
+ self.assertEqual(parse_iso8601('2014-03-23T22:04:26.1234Z'), 1395612266)
+ self.assertEqual(parse_iso8601('2015-09-29T08:27:31.727'), 1443515251)
+ self.assertEqual(parse_iso8601('2015-09-29T08-27-31.727'), None)
+
+ def test_strip_jsonp(self):
+ stripped = strip_jsonp('cb ([ {"id":"532cb",\n\n\n"x":\n3}\n]\n);')
+ d = json.loads(stripped)
+ self.assertEqual(d, [{"id": "532cb", "x": 3}])
+
+ stripped = strip_jsonp('parseMetadata({"STATUS":"OK"})\n\n\n//epc')
+ d = json.loads(stripped)
+ self.assertEqual(d, {'STATUS': 'OK'})
+
+ stripped = strip_jsonp('ps.embedHandler({"status": "success"});')
+ d = json.loads(stripped)
+ self.assertEqual(d, {'status': 'success'})
+
+ stripped = strip_jsonp('window.cb && window.cb({"status": "success"});')
+ d = json.loads(stripped)
+ self.assertEqual(d, {'status': 'success'})
+
+ stripped = strip_jsonp('window.cb && cb({"status": "success"});')
+ d = json.loads(stripped)
+ self.assertEqual(d, {'status': 'success'})
+
+ stripped = strip_jsonp('({"status": "success"});')
+ d = json.loads(stripped)
+ self.assertEqual(d, {'status': 'success'})
+
+ def test_strip_or_none(self):
+ self.assertEqual(strip_or_none(' abc'), 'abc')
+ self.assertEqual(strip_or_none('abc '), 'abc')
+ self.assertEqual(strip_or_none(' abc '), 'abc')
+ self.assertEqual(strip_or_none('\tabc\t'), 'abc')
+ self.assertEqual(strip_or_none('\n\tabc\n\t'), 'abc')
+ self.assertEqual(strip_or_none('abc'), 'abc')
+ self.assertEqual(strip_or_none(''), '')
+ self.assertEqual(strip_or_none(None), None)
+ self.assertEqual(strip_or_none(42), None)
+ self.assertEqual(strip_or_none([]), None)
+
+ def test_uppercase_escape(self):
+ self.assertEqual(uppercase_escape('aä'), 'aä')
+ self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')
+
+ def test_lowercase_escape(self):
+ self.assertEqual(lowercase_escape('aä'), 'aä')
+ self.assertEqual(lowercase_escape('\\u0026'), '&')
+
+ def test_limit_length(self):
+ self.assertEqual(limit_length(None, 12), None)
+ self.assertEqual(limit_length('foo', 12), 'foo')
+ self.assertTrue(
+ limit_length('foo bar baz asd', 12).startswith('foo bar'))
+ self.assertTrue('...' in limit_length('foo bar baz asd', 12))
+
+ def test_mimetype2ext(self):
+ self.assertEqual(mimetype2ext(None), None)
+ self.assertEqual(mimetype2ext('video/x-flv'), 'flv')
+ self.assertEqual(mimetype2ext('application/x-mpegURL'), 'm3u8')
+ self.assertEqual(mimetype2ext('text/vtt'), 'vtt')
+ self.assertEqual(mimetype2ext('text/vtt;charset=utf-8'), 'vtt')
+ self.assertEqual(mimetype2ext('text/html; charset=utf-8'), 'html')
+ self.assertEqual(mimetype2ext('audio/x-wav'), 'wav')
+ self.assertEqual(mimetype2ext('audio/x-wav;codec=pcm'), 'wav')
+
+ def test_month_by_name(self):
+ self.assertEqual(month_by_name(None), None)
+ self.assertEqual(month_by_name('December', 'en'), 12)
+ self.assertEqual(month_by_name('décembre', 'fr'), 12)
+ self.assertEqual(month_by_name('December'), 12)
+ self.assertEqual(month_by_name('décembre'), None)
+ self.assertEqual(month_by_name('Unknown', 'unknown'), None)
+
+ def test_parse_codecs(self):
+ self.assertEqual(parse_codecs(''), {})
+ self.assertEqual(parse_codecs('avc1.77.30, mp4a.40.2'), {
+ 'vcodec': 'avc1.77.30',
+ 'acodec': 'mp4a.40.2',
+ 'dynamic_range': None,
+ })
+ self.assertEqual(parse_codecs('mp4a.40.2'), {
+ 'vcodec': 'none',
+ 'acodec': 'mp4a.40.2',
+ 'dynamic_range': None,
+ })
+ self.assertEqual(parse_codecs('mp4a.40.5,avc1.42001e'), {
+ 'vcodec': 'avc1.42001e',
+ 'acodec': 'mp4a.40.5',
+ 'dynamic_range': None,
+ })
+ self.assertEqual(parse_codecs('avc3.640028'), {
+ 'vcodec': 'avc3.640028',
+ 'acodec': 'none',
+ 'dynamic_range': None,
+ })
+ self.assertEqual(parse_codecs(', h264,,newcodec,aac'), {
+ 'vcodec': 'h264',
+ 'acodec': 'aac',
+ 'dynamic_range': None,
+ })
+ self.assertEqual(parse_codecs('av01.0.05M.08'), {
+ 'vcodec': 'av01.0.05M.08',
+ 'acodec': 'none',
+ 'dynamic_range': None,
+ })
+ self.assertEqual(parse_codecs('vp9.2'), {
+ 'vcodec': 'vp9.2',
+ 'acodec': 'none',
+ 'dynamic_range': 'HDR10',
+ })
+ self.assertEqual(parse_codecs('av01.0.12M.10.0.110.09.16.09.0'), {
+ 'vcodec': 'av01.0.12M.10.0.110.09.16.09.0',
+ 'acodec': 'none',
+ 'dynamic_range': 'HDR10',
+ })
+ self.assertEqual(parse_codecs('dvhe'), {
+ 'vcodec': 'dvhe',
+ 'acodec': 'none',
+ 'dynamic_range': 'DV',
+ })
+ self.assertEqual(parse_codecs('theora, vorbis'), {
+ 'vcodec': 'theora',
+ 'acodec': 'vorbis',
+ 'dynamic_range': None,
+ })
+ self.assertEqual(parse_codecs('unknownvcodec, unknownacodec'), {
+ 'vcodec': 'unknownvcodec',
+ 'acodec': 'unknownacodec',
+ })
+ self.assertEqual(parse_codecs('unknown'), {})
+
+ def test_escape_rfc3986(self):
+ reserved = "!*'();:@&=+$,/?#[]"
+ unreserved = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.~'
+ self.assertEqual(escape_rfc3986(reserved), reserved)
+ self.assertEqual(escape_rfc3986(unreserved), unreserved)
+ self.assertEqual(escape_rfc3986('тест'), '%D1%82%D0%B5%D1%81%D1%82')
+ self.assertEqual(escape_rfc3986('%D1%82%D0%B5%D1%81%D1%82'), '%D1%82%D0%B5%D1%81%D1%82')
+ self.assertEqual(escape_rfc3986('foo bar'), 'foo%20bar')
+ self.assertEqual(escape_rfc3986('foo%20bar'), 'foo%20bar')
+
+ def test_normalize_url(self):
+ self.assertEqual(
+ normalize_url('http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavré_FD.mp4'),
+ 'http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavre%CC%81_FD.mp4'
+ )
+ self.assertEqual(
+ normalize_url('http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erklärt/Das-Erste/Video?documentId=22673108&bcastId=5290'),
+ 'http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erkl%C3%A4rt/Das-Erste/Video?documentId=22673108&bcastId=5290'
+ )
+ self.assertEqual(
+ normalize_url('http://тест.рф/фрагмент'),
+ 'http://xn--e1aybc.xn--p1ai/%D1%84%D1%80%D0%B0%D0%B3%D0%BC%D0%B5%D0%BD%D1%82'
+ )
+ self.assertEqual(
+ normalize_url('http://тест.рф/абв?абв=абв#абв'),
+ 'http://xn--e1aybc.xn--p1ai/%D0%B0%D0%B1%D0%B2?%D0%B0%D0%B1%D0%B2=%D0%B0%D0%B1%D0%B2#%D0%B0%D0%B1%D0%B2'
+ )
+ self.assertEqual(normalize_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0')
+
+ self.assertEqual(normalize_url('http://www.example.com/../a/b/../c/./d.html'), 'http://www.example.com/a/c/d.html')
+
+ def test_remove_dot_segments(self):
+ self.assertEqual(remove_dot_segments('/a/b/c/./../../g'), '/a/g')
+ self.assertEqual(remove_dot_segments('mid/content=5/../6'), 'mid/6')
+ self.assertEqual(remove_dot_segments('/ad/../cd'), '/cd')
+ self.assertEqual(remove_dot_segments('/ad/../cd/'), '/cd/')
+ self.assertEqual(remove_dot_segments('/..'), '/')
+ self.assertEqual(remove_dot_segments('/./'), '/')
+ self.assertEqual(remove_dot_segments('/./a'), '/a')
+ self.assertEqual(remove_dot_segments('/abc/./.././d/././e/.././f/./../../ghi'), '/ghi')
+ self.assertEqual(remove_dot_segments('/'), '/')
+ self.assertEqual(remove_dot_segments('/t'), '/t')
+ self.assertEqual(remove_dot_segments('t'), 't')
+ self.assertEqual(remove_dot_segments(''), '')
+ self.assertEqual(remove_dot_segments('/../a/b/c'), '/a/b/c')
+ self.assertEqual(remove_dot_segments('../a'), 'a')
+ self.assertEqual(remove_dot_segments('./a'), 'a')
+ self.assertEqual(remove_dot_segments('.'), '')
+ self.assertEqual(remove_dot_segments('////'), '////')
+
+ def test_js_to_json_vars_strings(self):
+ self.assertDictEqual(
+ json.loads(js_to_json(
+ '''{
+ 'null': a,
+ 'nullStr': b,
+ 'true': c,
+ 'trueStr': d,
+ 'false': e,
+ 'falseStr': f,
+ 'unresolvedVar': g,
+ }''',
+ {
+ 'a': 'null',
+ 'b': '"null"',
+ 'c': 'true',
+ 'd': '"true"',
+ 'e': 'false',
+ 'f': '"false"',
+ 'g': 'var',
+ }
+ )),
+ {
+ 'null': None,
+ 'nullStr': 'null',
+ 'true': True,
+ 'trueStr': 'true',
+ 'false': False,
+ 'falseStr': 'false',
+ 'unresolvedVar': 'var'
+ }
+ )
+
+ self.assertDictEqual(
+ json.loads(js_to_json(
+ '''{
+ 'int': a,
+ 'intStr': b,
+ 'float': c,
+ 'floatStr': d,
+ }''',
+ {
+ 'a': '123',
+ 'b': '"123"',
+ 'c': '1.23',
+ 'd': '"1.23"',
+ }
+ )),
+ {
+ 'int': 123,
+ 'intStr': '123',
+ 'float': 1.23,
+ 'floatStr': '1.23',
+ }
+ )
+
+ self.assertDictEqual(
+ json.loads(js_to_json(
+ '''{
+ 'object': a,
+ 'objectStr': b,
+ 'array': c,
+ 'arrayStr': d,
+ }''',
+ {
+ 'a': '{}',
+ 'b': '"{}"',
+ 'c': '[]',
+ 'd': '"[]"',
+ }
+ )),
+ {
+ 'object': {},
+ 'objectStr': '{}',
+ 'array': [],
+ 'arrayStr': '[]',
+ }
+ )
+
+ def test_js_to_json_realworld(self):
+ inp = '''{
+ 'clip':{'provider':'pseudo'}
+ }'''
+ self.assertEqual(js_to_json(inp), '''{
+ "clip":{"provider":"pseudo"}
+ }''')
+ json.loads(js_to_json(inp))
+
+ inp = '''{
+ 'playlist':[{'controls':{'all':null}}]
+ }'''
+ self.assertEqual(js_to_json(inp), '''{
+ "playlist":[{"controls":{"all":null}}]
+ }''')
+
+ inp = '''"The CW\\'s \\'Crazy Ex-Girlfriend\\'"'''
+ self.assertEqual(js_to_json(inp), '''"The CW's 'Crazy Ex-Girlfriend'"''')
+
+ inp = '"SAND Number: SAND 2013-7800P\\nPresenter: Tom Russo\\nHabanero Software Training - Xyce Software\\nXyce, Sandia\\u0027s"'
+ json_code = js_to_json(inp)
+ self.assertEqual(json.loads(json_code), json.loads(inp))
+
+ inp = '''{
+ 0:{src:'skipped', type: 'application/dash+xml'},
+ 1:{src:'skipped', type: 'application/vnd.apple.mpegURL'},
+ }'''
+ self.assertEqual(js_to_json(inp), '''{
+ "0":{"src":"skipped", "type": "application/dash+xml"},
+ "1":{"src":"skipped", "type": "application/vnd.apple.mpegURL"}
+ }''')
+
+ inp = '''{"foo":101}'''
+ self.assertEqual(js_to_json(inp), '''{"foo":101}''')
+
+ inp = '''{"duration": "00:01:07"}'''
+ self.assertEqual(js_to_json(inp), '''{"duration": "00:01:07"}''')
+
+ inp = '''{segments: [{"offset":-3.885780586188048e-16,"duration":39.75000000000001}]}'''
+ self.assertEqual(js_to_json(inp), '''{"segments": [{"offset":-3.885780586188048e-16,"duration":39.75000000000001}]}''')
+
+ def test_js_to_json_edgecases(self):
+ on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}")
+ self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"})
+
+ on = js_to_json('{"abc": true}')
+ self.assertEqual(json.loads(on), {'abc': True})
+
+ # Ignore JavaScript code as well
+ on = js_to_json('''{
+ "x": 1,
+ y: "a",
+ z: some.code
+ }''')
+ d = json.loads(on)
+ self.assertEqual(d['x'], 1)
+ self.assertEqual(d['y'], 'a')
+
+ # Just drop ! prefix for now though this results in a wrong value
+ on = js_to_json('''{
+ a: !0,
+ b: !1,
+ c: !!0,
+ d: !!42.42,
+ e: !!![],
+ f: !"abc",
+ g: !"",
+ !42: 42
+ }''')
+ self.assertEqual(json.loads(on), {
+ 'a': 0,
+ 'b': 1,
+ 'c': 0,
+ 'd': 42.42,
+ 'e': [],
+ 'f': "abc",
+ 'g': "",
+ '42': 42
+ })
+
+ on = js_to_json('["abc", "def",]')
+ self.assertEqual(json.loads(on), ['abc', 'def'])
+
+ on = js_to_json('[/*comment\n*/"abc"/*comment\n*/,/*comment\n*/"def",/*comment\n*/]')
+ self.assertEqual(json.loads(on), ['abc', 'def'])
+
+ on = js_to_json('[//comment\n"abc" //comment\n,//comment\n"def",//comment\n]')
+ self.assertEqual(json.loads(on), ['abc', 'def'])
+
+ on = js_to_json('{"abc": "def",}')
+ self.assertEqual(json.loads(on), {'abc': 'def'})
+
+ on = js_to_json('{/*comment\n*/"abc"/*comment\n*/:/*comment\n*/"def"/*comment\n*/,/*comment\n*/}')
+ self.assertEqual(json.loads(on), {'abc': 'def'})
+
+ on = js_to_json('{ 0: /* " \n */ ",]" , }')
+ self.assertEqual(json.loads(on), {'0': ',]'})
+
+ on = js_to_json('{ /*comment\n*/0/*comment\n*/: /* " \n */ ",]" , }')
+ self.assertEqual(json.loads(on), {'0': ',]'})
+
+ on = js_to_json('{ 0: // comment\n1 }')
+ self.assertEqual(json.loads(on), {'0': 1})
+
+ on = js_to_json(r'["<p>x<\/p>"]')
+ self.assertEqual(json.loads(on), ['<p>x</p>'])
+
+ on = js_to_json(r'["\xaa"]')
+ self.assertEqual(json.loads(on), ['\u00aa'])
+
+ on = js_to_json("['a\\\nb']")
+ self.assertEqual(json.loads(on), ['ab'])
+
+ on = js_to_json("/*comment\n*/[/*comment\n*/'a\\\nb'/*comment\n*/]/*comment\n*/")
+ self.assertEqual(json.loads(on), ['ab'])
+
+ on = js_to_json('{0xff:0xff}')
+ self.assertEqual(json.loads(on), {'255': 255})
+
+ on = js_to_json('{/*comment\n*/0xff/*comment\n*/:/*comment\n*/0xff/*comment\n*/}')
+ self.assertEqual(json.loads(on), {'255': 255})
+
+ on = js_to_json('{077:077}')
+ self.assertEqual(json.loads(on), {'63': 63})
+
+ on = js_to_json('{/*comment\n*/077/*comment\n*/:/*comment\n*/077/*comment\n*/}')
+ self.assertEqual(json.loads(on), {'63': 63})
+
+ on = js_to_json('{42:42}')
+ self.assertEqual(json.loads(on), {'42': 42})
+
+ on = js_to_json('{/*comment\n*/42/*comment\n*/:/*comment\n*/42/*comment\n*/}')
+ self.assertEqual(json.loads(on), {'42': 42})
+
+ on = js_to_json('{42:4.2e1}')
+ self.assertEqual(json.loads(on), {'42': 42.0})
+
+ on = js_to_json('{ "0x40": "0x40" }')
+ self.assertEqual(json.loads(on), {'0x40': '0x40'})
+
+ on = js_to_json('{ "040": "040" }')
+ self.assertEqual(json.loads(on), {'040': '040'})
+
+ on = js_to_json('[1,//{},\n2]')
+ self.assertEqual(json.loads(on), [1, 2])
+
+ on = js_to_json(R'"\^\$\#"')
+ self.assertEqual(json.loads(on), R'^$#', msg='Unnecessary escapes should be stripped')
+
+ on = js_to_json('\'"\\""\'')
+ self.assertEqual(json.loads(on), '"""', msg='Unnecessary quote escape should be escaped')
+
+ on = js_to_json('[new Date("spam"), \'("eggs")\']')
+ self.assertEqual(json.loads(on), ['spam', '("eggs")'], msg='Date regex should match a single string')
+
+ def test_js_to_json_malformed(self):
+ self.assertEqual(js_to_json('42a1'), '42"a1"')
+ self.assertEqual(js_to_json('42a-1'), '42"a"-1')
+
+ def test_js_to_json_template_literal(self):
+ self.assertEqual(js_to_json('`Hello ${name}`', {'name': '"world"'}), '"Hello world"')
+ self.assertEqual(js_to_json('`${name}${name}`', {'name': '"X"'}), '"XX"')
+ self.assertEqual(js_to_json('`${name}${name}`', {'name': '5'}), '"55"')
+ self.assertEqual(js_to_json('`${name}"${name}"`', {'name': '5'}), '"5\\"5\\""')
+ self.assertEqual(js_to_json('`${name}`', {}), '"name"')
+
+ def test_js_to_json_common_constructors(self):
+ self.assertEqual(json.loads(js_to_json('new Map([["a", 5]])')), {'a': 5})
+ self.assertEqual(json.loads(js_to_json('Array(5, 10)')), [5, 10])
+ self.assertEqual(json.loads(js_to_json('new Array(15,5)')), [15, 5])
+ self.assertEqual(json.loads(js_to_json('new Map([Array(5, 10),new Array(15,5)])')), {'5': 10, '15': 5})
+ self.assertEqual(json.loads(js_to_json('new Date("123")')), "123")
+ self.assertEqual(json.loads(js_to_json('new Date(\'2023-10-19\')')), "2023-10-19")
+
+ def test_extract_attributes(self):
+ self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
+ self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'})
+ self.assertEqual(extract_attributes('<e x=y>'), {'x': 'y'})
+ self.assertEqual(extract_attributes('<e x="a \'b\' c">'), {'x': "a 'b' c"})
+ self.assertEqual(extract_attributes('<e x=\'a "b" c\'>'), {'x': 'a "b" c'})
+ self.assertEqual(extract_attributes('<e x="&#121;">'), {'x': 'y'})
+ self.assertEqual(extract_attributes('<e x="&#x79;">'), {'x': 'y'})
+ self.assertEqual(extract_attributes('<e x="&amp;">'), {'x': '&'}) # XML
+ self.assertEqual(extract_attributes('<e x="&quot;">'), {'x': '"'})
+ self.assertEqual(extract_attributes('<e x="&pound;">'), {'x': '£'}) # HTML 3.2
+ self.assertEqual(extract_attributes('<e x="&lambda;">'), {'x': 'λ'}) # HTML 4.0
+ self.assertEqual(extract_attributes('<e x="&foo">'), {'x': '&foo'})
+ self.assertEqual(extract_attributes('<e x="\'">'), {'x': "'"})
+ self.assertEqual(extract_attributes('<e x=\'"\'>'), {'x': '"'})
+ self.assertEqual(extract_attributes('<e x >'), {'x': None})
+ self.assertEqual(extract_attributes('<e x=y a>'), {'x': 'y', 'a': None})
+ self.assertEqual(extract_attributes('<e x= y>'), {'x': 'y'})
+ self.assertEqual(extract_attributes('<e x=1 y=2 x=3>'), {'y': '2', 'x': '3'})
+ self.assertEqual(extract_attributes('<e \nx=\ny\n>'), {'x': 'y'})
+ self.assertEqual(extract_attributes('<e \nx=\n"y"\n>'), {'x': 'y'})
+ self.assertEqual(extract_attributes("<e \nx=\n'y'\n>"), {'x': 'y'})
+ self.assertEqual(extract_attributes('<e \nx="\ny\n">'), {'x': '\ny\n'})
+ self.assertEqual(extract_attributes('<e CAPS=x>'), {'caps': 'x'}) # Names lowercased
+ self.assertEqual(extract_attributes('<e x=1 X=2>'), {'x': '2'})
+ self.assertEqual(extract_attributes('<e X=1 x=2>'), {'x': '2'})
+ self.assertEqual(extract_attributes('<e _:funny-name1=1>'), {'_:funny-name1': '1'})
+ self.assertEqual(extract_attributes('<e x="Fáilte 世界 \U0001f600">'), {'x': 'Fáilte 世界 \U0001f600'})
+ self.assertEqual(extract_attributes('<e x="décompose&#769;">'), {'x': 'décompose\u0301'})
+ # "Narrow" Python builds don't support unicode code points outside BMP.
+ try:
+ chr(0x10000)
+ supports_outside_bmp = True
+ except ValueError:
+ supports_outside_bmp = False
+ if supports_outside_bmp:
+ self.assertEqual(extract_attributes('<e x="Smile &#128512;!">'), {'x': 'Smile \U0001f600!'})
+ # Malformed HTML should not break attributes extraction on older Python
+ self.assertEqual(extract_attributes('<mal"formed/>'), {})
+
+ def test_clean_html(self):
+ self.assertEqual(clean_html('a:\nb'), 'a: b')
+ self.assertEqual(clean_html('a:\n "b"'), 'a: "b"')
+ self.assertEqual(clean_html('a<br>\xa0b'), 'a\nb')
+
+ def test_intlist_to_bytes(self):
+ self.assertEqual(
+ intlist_to_bytes([0, 1, 127, 128, 255]),
+ b'\x00\x01\x7f\x80\xff')
+
+ def test_args_to_str(self):
+ self.assertEqual(
+ args_to_str(['foo', 'ba/r', '-baz', '2 be', '']),
+ 'foo ba/r -baz \'2 be\' \'\'' if compat_os_name != 'nt' else 'foo ba/r -baz "2 be" ""'
+ )
+
+ def test_parse_filesize(self):
+ self.assertEqual(parse_filesize(None), None)
+ self.assertEqual(parse_filesize(''), None)
+ self.assertEqual(parse_filesize('91 B'), 91)
+ self.assertEqual(parse_filesize('foobar'), None)
+ self.assertEqual(parse_filesize('2 MiB'), 2097152)
+ self.assertEqual(parse_filesize('5 GB'), 5000000000)
+ self.assertEqual(parse_filesize('1.2Tb'), 1200000000000)
+ self.assertEqual(parse_filesize('1.2tb'), 1200000000000)
+ self.assertEqual(parse_filesize('1,24 KB'), 1240)
+ self.assertEqual(parse_filesize('1,24 kb'), 1240)
+ self.assertEqual(parse_filesize('8.5 megabytes'), 8500000)
+
+ def test_parse_count(self):
+ self.assertEqual(parse_count(None), None)
+ self.assertEqual(parse_count(''), None)
+ self.assertEqual(parse_count('0'), 0)
+ self.assertEqual(parse_count('1000'), 1000)
+ self.assertEqual(parse_count('1.000'), 1000)
+ self.assertEqual(parse_count('1.1k'), 1100)
+ self.assertEqual(parse_count('1.1 k'), 1100)
+ self.assertEqual(parse_count('1,1 k'), 1100)
+ self.assertEqual(parse_count('1.1kk'), 1100000)
+ self.assertEqual(parse_count('1.1kk '), 1100000)
+ self.assertEqual(parse_count('1,1kk'), 1100000)
+ self.assertEqual(parse_count('100 views'), 100)
+ self.assertEqual(parse_count('1,100 views'), 1100)
+ self.assertEqual(parse_count('1.1kk views'), 1100000)
+ self.assertEqual(parse_count('10M views'), 10000000)
+ self.assertEqual(parse_count('has 10M views'), 10000000)
+
+ def test_parse_resolution(self):
+ self.assertEqual(parse_resolution(None), {})
+ self.assertEqual(parse_resolution(''), {})
+ self.assertEqual(parse_resolution(' 1920x1080'), {'width': 1920, 'height': 1080})
+ self.assertEqual(parse_resolution('1920×1080 '), {'width': 1920, 'height': 1080})
+ self.assertEqual(parse_resolution('1920 x 1080'), {'width': 1920, 'height': 1080})
+ self.assertEqual(parse_resolution('720p'), {'height': 720})
+ self.assertEqual(parse_resolution('4k'), {'height': 2160})
+ self.assertEqual(parse_resolution('8K'), {'height': 4320})
+ self.assertEqual(parse_resolution('pre_1920x1080_post'), {'width': 1920, 'height': 1080})
+ self.assertEqual(parse_resolution('ep1x2'), {})
+ self.assertEqual(parse_resolution('1920, 1080'), {'width': 1920, 'height': 1080})
+
+ def test_parse_bitrate(self):
+ self.assertEqual(parse_bitrate(None), None)
+ self.assertEqual(parse_bitrate(''), None)
+ self.assertEqual(parse_bitrate('300kbps'), 300)
+ self.assertEqual(parse_bitrate('1500kbps'), 1500)
+ self.assertEqual(parse_bitrate('300 kbps'), 300)
+
+ def test_version_tuple(self):
+ self.assertEqual(version_tuple('1'), (1,))
+ self.assertEqual(version_tuple('10.23.344'), (10, 23, 344))
+ self.assertEqual(version_tuple('10.1-6'), (10, 1, 6)) # avconv style
+
+ def test_detect_exe_version(self):
+ self.assertEqual(detect_exe_version('''ffmpeg version 1.2.1
+built on May 27 2013 08:37:26 with gcc 4.7 (Debian 4.7.3-4)
+configuration: --prefix=/usr --extra-'''), '1.2.1')
+ self.assertEqual(detect_exe_version('''ffmpeg version N-63176-g1fb4685
+built on May 15 2014 22:09:06 with gcc 4.8.2 (GCC)'''), 'N-63176-g1fb4685')
+ self.assertEqual(detect_exe_version('''X server found. dri2 connection failed!
+Trying to open render node...
+Success at /dev/dri/renderD128.
+ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
+
+ def test_age_restricted(self):
+ self.assertFalse(age_restricted(None, 10)) # unrestricted content
+ self.assertFalse(age_restricted(1, None)) # unrestricted policy
+ self.assertFalse(age_restricted(8, 10))
+ self.assertTrue(age_restricted(18, 14))
+ self.assertFalse(age_restricted(18, 18))
+
+ def test_is_html(self):
+ self.assertFalse(is_html(b'\x49\x44\x43<html'))
+ self.assertTrue(is_html(b'<!DOCTYPE foo>\xaaa'))
+ self.assertTrue(is_html( # UTF-8 with BOM
+ b'\xef\xbb\xbf<!DOCTYPE foo>\xaaa'))
+ self.assertTrue(is_html( # UTF-16-LE
+ b'\xff\xfe<\x00h\x00t\x00m\x00l\x00>\x00\xe4\x00'
+ ))
+ self.assertTrue(is_html( # UTF-16-BE
+ b'\xfe\xff\x00<\x00h\x00t\x00m\x00l\x00>\x00\xe4'
+ ))
+ self.assertTrue(is_html( # UTF-32-BE
+ b'\x00\x00\xFE\xFF\x00\x00\x00<\x00\x00\x00h\x00\x00\x00t\x00\x00\x00m\x00\x00\x00l\x00\x00\x00>\x00\x00\x00\xe4'))
+ self.assertTrue(is_html( # UTF-32-LE
+ b'\xFF\xFE\x00\x00<\x00\x00\x00h\x00\x00\x00t\x00\x00\x00m\x00\x00\x00l\x00\x00\x00>\x00\x00\x00\xe4\x00\x00\x00'))
+
+ def test_render_table(self):
+ self.assertEqual(
+ render_table(
+ ['a', 'empty', 'bcd'],
+ [[123, '', 4], [9999, '', 51]]),
+ 'a empty bcd\n'
+ '123 4\n'
+ '9999 51')
+
+ self.assertEqual(
+ render_table(
+ ['a', 'empty', 'bcd'],
+ [[123, '', 4], [9999, '', 51]],
+ hide_empty=True),
+ 'a bcd\n'
+ '123 4\n'
+ '9999 51')
+
+ self.assertEqual(
+ render_table(
+ ['\ta', 'bcd'],
+ [['1\t23', 4], ['\t9999', 51]]),
+ ' a bcd\n'
+ '1 23 4\n'
+ '9999 51')
+
+ self.assertEqual(
+ render_table(
+ ['a', 'bcd'],
+ [[123, 4], [9999, 51]],
+ delim='-'),
+ 'a bcd\n'
+ '--------\n'
+ '123 4\n'
+ '9999 51')
+
+ self.assertEqual(
+ render_table(
+ ['a', 'bcd'],
+ [[123, 4], [9999, 51]],
+ delim='-', extra_gap=2),
+ 'a bcd\n'
+ '----------\n'
+ '123 4\n'
+ '9999 51')
+
+ def test_match_str(self):
+ # Unary
+ self.assertFalse(match_str('xy', {'x': 1200}))
+ self.assertTrue(match_str('!xy', {'x': 1200}))
+ self.assertTrue(match_str('x', {'x': 1200}))
+ self.assertFalse(match_str('!x', {'x': 1200}))
+ self.assertTrue(match_str('x', {'x': 0}))
+ self.assertTrue(match_str('is_live', {'is_live': True}))
+ self.assertFalse(match_str('is_live', {'is_live': False}))
+ self.assertFalse(match_str('is_live', {'is_live': None}))
+ self.assertFalse(match_str('is_live', {}))
+ self.assertFalse(match_str('!is_live', {'is_live': True}))
+ self.assertTrue(match_str('!is_live', {'is_live': False}))
+ self.assertTrue(match_str('!is_live', {'is_live': None}))
+ self.assertTrue(match_str('!is_live', {}))
+ self.assertTrue(match_str('title', {'title': 'abc'}))
+ self.assertTrue(match_str('title', {'title': ''}))
+ self.assertFalse(match_str('!title', {'title': 'abc'}))
+ self.assertFalse(match_str('!title', {'title': ''}))
+
+ # Numeric
+ self.assertFalse(match_str('x>0', {'x': 0}))
+ self.assertFalse(match_str('x>0', {}))
+ self.assertTrue(match_str('x>?0', {}))
+ self.assertTrue(match_str('x>1K', {'x': 1200}))
+ self.assertFalse(match_str('x>2K', {'x': 1200}))
+ self.assertTrue(match_str('x>=1200 & x < 1300', {'x': 1200}))
+ self.assertFalse(match_str('x>=1100 & x < 1200', {'x': 1200}))
+ self.assertTrue(match_str('x > 1:0:0', {'x': 3700}))
+
+ # String
+ self.assertFalse(match_str('y=a212', {'y': 'foobar42'}))
+ self.assertTrue(match_str('y=foobar42', {'y': 'foobar42'}))
+ self.assertFalse(match_str('y!=foobar42', {'y': 'foobar42'}))
+ self.assertTrue(match_str('y!=foobar2', {'y': 'foobar42'}))
+ self.assertTrue(match_str('y^=foo', {'y': 'foobar42'}))
+ self.assertFalse(match_str('y!^=foo', {'y': 'foobar42'}))
+ self.assertFalse(match_str('y^=bar', {'y': 'foobar42'}))
+ self.assertTrue(match_str('y!^=bar', {'y': 'foobar42'}))
+ self.assertRaises(ValueError, match_str, 'x^=42', {'x': 42})
+ self.assertTrue(match_str('y*=bar', {'y': 'foobar42'}))
+ self.assertFalse(match_str('y!*=bar', {'y': 'foobar42'}))
+ self.assertFalse(match_str('y*=baz', {'y': 'foobar42'}))
+ self.assertTrue(match_str('y!*=baz', {'y': 'foobar42'}))
+ self.assertTrue(match_str('y$=42', {'y': 'foobar42'}))
+ self.assertFalse(match_str('y$=43', {'y': 'foobar42'}))
+
+ # And
+ self.assertFalse(match_str(
+ 'like_count > 100 & dislike_count <? 50 & description',
+ {'like_count': 90, 'description': 'foo'}))
+ self.assertTrue(match_str(
+ 'like_count > 100 & dislike_count <? 50 & description',
+ {'like_count': 190, 'description': 'foo'}))
+ self.assertFalse(match_str(
+ 'like_count > 100 & dislike_count <? 50 & description',
+ {'like_count': 190, 'dislike_count': 60, 'description': 'foo'}))
+ self.assertFalse(match_str(
+ 'like_count > 100 & dislike_count <? 50 & description',
+ {'like_count': 190, 'dislike_count': 10}))
+
+ # Regex
+ self.assertTrue(match_str(r'x~=\bbar', {'x': 'foo bar'}))
+ self.assertFalse(match_str(r'x~=\bbar.+', {'x': 'foo bar'}))
+ self.assertFalse(match_str(r'x~=^FOO', {'x': 'foo bar'}))
+ self.assertTrue(match_str(r'x~=(?i)^FOO', {'x': 'foo bar'}))
+
+ # Quotes
+ self.assertTrue(match_str(r'x^="foo"', {'x': 'foo "bar"'}))
+ self.assertFalse(match_str(r'x^="foo "', {'x': 'foo "bar"'}))
+ self.assertFalse(match_str(r'x$="bar"', {'x': 'foo "bar"'}))
+ self.assertTrue(match_str(r'x$=" \"bar\""', {'x': 'foo "bar"'}))
+
+ # Escaping &
+ self.assertFalse(match_str(r'x=foo & bar', {'x': 'foo & bar'}))
+ self.assertTrue(match_str(r'x=foo \& bar', {'x': 'foo & bar'}))
+ self.assertTrue(match_str(r'x=foo \& bar & x^=foo', {'x': 'foo & bar'}))
+ self.assertTrue(match_str(r'x="foo \& bar" & x^=foo', {'x': 'foo & bar'}))
+
+ # Example from docs
+ self.assertTrue(match_str(
+ r"!is_live & like_count>?100 & description~='(?i)\bcats \& dogs\b'",
+ {'description': 'Raining Cats & Dogs'}))
+
+ # Incomplete
+ self.assertFalse(match_str('id!=foo', {'id': 'foo'}, True))
+ self.assertTrue(match_str('x', {'id': 'foo'}, True))
+ self.assertTrue(match_str('!x', {'id': 'foo'}, True))
+ self.assertFalse(match_str('x', {'id': 'foo'}, False))
+
+ def test_parse_dfxp_time_expr(self):
+ self.assertEqual(parse_dfxp_time_expr(None), None)
+ self.assertEqual(parse_dfxp_time_expr(''), None)
+ self.assertEqual(parse_dfxp_time_expr('0.1'), 0.1)
+ self.assertEqual(parse_dfxp_time_expr('0.1s'), 0.1)
+ self.assertEqual(parse_dfxp_time_expr('00:00:01'), 1.0)
+ self.assertEqual(parse_dfxp_time_expr('00:00:01.100'), 1.1)
+ self.assertEqual(parse_dfxp_time_expr('00:00:01:100'), 1.1)
+
+ def test_dfxp2srt(self):
+ dfxp_data = '''<?xml version="1.0" encoding="UTF-8"?>
+ <tt xmlns="http://www.w3.org/ns/ttml" xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter">
+ <body>
+ <div xml:lang="en">
+ <p begin="0" end="1">The following line contains Chinese characters and special symbols</p>
+ <p begin="1" end="2">第二行<br/>♪♪</p>
+ <p begin="2" dur="1"><span>Third<br/>Line</span></p>
+ <p begin="3" end="-1">Lines with invalid timestamps are ignored</p>
+ <p begin="-1" end="-1">Ignore, two</p>
+ <p begin="3" dur="-1">Ignored, three</p>
+ </div>
+ </body>
+ </tt>'''.encode()
+ srt_data = '''1
+00:00:00,000 --> 00:00:01,000
+The following line contains Chinese characters and special symbols
+
+2
+00:00:01,000 --> 00:00:02,000
+第二行
+♪♪
+
+3
+00:00:02,000 --> 00:00:03,000
+Third
+Line
+
+'''
+ self.assertEqual(dfxp2srt(dfxp_data), srt_data)
+
+ dfxp_data_no_default_namespace = b'''<?xml version="1.0" encoding="UTF-8"?>
+ <tt xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter">
+ <body>
+ <div xml:lang="en">
+ <p begin="0" end="1">The first line</p>
+ </div>
+ </body>
+ </tt>'''
+ srt_data = '''1
+00:00:00,000 --> 00:00:01,000
+The first line
+
+'''
+ self.assertEqual(dfxp2srt(dfxp_data_no_default_namespace), srt_data)
+
+ dfxp_data_with_style = b'''<?xml version="1.0" encoding="utf-8"?>
+<tt xmlns="http://www.w3.org/2006/10/ttaf1" xmlns:ttp="http://www.w3.org/2006/10/ttaf1#parameter" ttp:timeBase="media" xmlns:tts="http://www.w3.org/2006/10/ttaf1#style" xml:lang="en" xmlns:ttm="http://www.w3.org/2006/10/ttaf1#metadata">
+ <head>
+ <styling>
+ <style id="s2" style="s0" tts:color="cyan" tts:fontWeight="bold" />
+ <style id="s1" style="s0" tts:color="yellow" tts:fontStyle="italic" />
+ <style id="s3" style="s0" tts:color="lime" tts:textDecoration="underline" />
+ <style id="s0" tts:backgroundColor="black" tts:fontStyle="normal" tts:fontSize="16" tts:fontFamily="sansSerif" tts:color="white" />
+ </styling>
+ </head>
+ <body tts:textAlign="center" style="s0">
+ <div>
+ <p begin="00:00:02.08" id="p0" end="00:00:05.84">default style<span tts:color="red">custom style</span></p>
+ <p style="s2" begin="00:00:02.08" id="p0" end="00:00:05.84"><span tts:color="lime">part 1<br /></span><span tts:color="cyan">part 2</span></p>
+ <p style="s3" begin="00:00:05.84" id="p1" end="00:00:09.56">line 3<br />part 3</p>
+ <p style="s1" tts:textDecoration="underline" begin="00:00:09.56" id="p2" end="00:00:12.36"><span style="s2" tts:color="lime">inner<br /> </span>style</p>
+ </div>
+ </body>
+</tt>'''
+ srt_data = '''1
+00:00:02,080 --> 00:00:05,840
+<font color="white" face="sansSerif" size="16">default style<font color="red">custom style</font></font>
+
+2
+00:00:02,080 --> 00:00:05,840
+<b><font color="cyan" face="sansSerif" size="16"><font color="lime">part 1
+</font>part 2</font></b>
+
+3
+00:00:05,840 --> 00:00:09,560
+<u><font color="lime">line 3
+part 3</font></u>
+
+4
+00:00:09,560 --> 00:00:12,360
+<i><u><font color="yellow"><font color="lime">inner
+ </font>style</font></u></i>
+
+'''
+ self.assertEqual(dfxp2srt(dfxp_data_with_style), srt_data)
+
+ dfxp_data_non_utf8 = '''<?xml version="1.0" encoding="UTF-16"?>
+ <tt xmlns="http://www.w3.org/ns/ttml" xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter">
+ <body>
+ <div xml:lang="en">
+ <p begin="0" end="1">Line 1</p>
+ <p begin="1" end="2">第二行</p>
+ </div>
+ </body>
+ </tt>'''.encode('utf-16')
+ srt_data = '''1
+00:00:00,000 --> 00:00:01,000
+Line 1
+
+2
+00:00:01,000 --> 00:00:02,000
+第二行
+
+'''
+ self.assertEqual(dfxp2srt(dfxp_data_non_utf8), srt_data)
+
+ def test_cli_option(self):
+ self.assertEqual(cli_option({'proxy': '127.0.0.1:3128'}, '--proxy', 'proxy'), ['--proxy', '127.0.0.1:3128'])
+ self.assertEqual(cli_option({'proxy': None}, '--proxy', 'proxy'), [])
+ self.assertEqual(cli_option({}, '--proxy', 'proxy'), [])
+ self.assertEqual(cli_option({'retries': 10}, '--retries', 'retries'), ['--retries', '10'])
+
+ def test_cli_valueless_option(self):
+ self.assertEqual(cli_valueless_option(
+ {'downloader': 'external'}, '--external-downloader', 'downloader', 'external'), ['--external-downloader'])
+ self.assertEqual(cli_valueless_option(
+ {'downloader': 'internal'}, '--external-downloader', 'downloader', 'external'), [])
+ self.assertEqual(cli_valueless_option(
+ {'nocheckcertificate': True}, '--no-check-certificate', 'nocheckcertificate'), ['--no-check-certificate'])
+ self.assertEqual(cli_valueless_option(
+ {'nocheckcertificate': False}, '--no-check-certificate', 'nocheckcertificate'), [])
+ self.assertEqual(cli_valueless_option(
+ {'checkcertificate': True}, '--no-check-certificate', 'checkcertificate', False), [])
+ self.assertEqual(cli_valueless_option(
+ {'checkcertificate': False}, '--no-check-certificate', 'checkcertificate', False), ['--no-check-certificate'])
+
+ def test_cli_bool_option(self):
+ self.assertEqual(
+ cli_bool_option(
+ {'nocheckcertificate': True}, '--no-check-certificate', 'nocheckcertificate'),
+ ['--no-check-certificate', 'true'])
+ self.assertEqual(
+ cli_bool_option(
+ {'nocheckcertificate': True}, '--no-check-certificate', 'nocheckcertificate', separator='='),
+ ['--no-check-certificate=true'])
+ self.assertEqual(
+ cli_bool_option(
+ {'nocheckcertificate': True}, '--check-certificate', 'nocheckcertificate', 'false', 'true'),
+ ['--check-certificate', 'false'])
+ self.assertEqual(
+ cli_bool_option(
+ {'nocheckcertificate': True}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='),
+ ['--check-certificate=false'])
+ self.assertEqual(
+ cli_bool_option(
+ {'nocheckcertificate': False}, '--check-certificate', 'nocheckcertificate', 'false', 'true'),
+ ['--check-certificate', 'true'])
+ self.assertEqual(
+ cli_bool_option(
+ {'nocheckcertificate': False}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='),
+ ['--check-certificate=true'])
+ self.assertEqual(
+ cli_bool_option(
+ {}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='),
+ [])
+
+ def test_ohdave_rsa_encrypt(self):
+ N = 0xab86b6371b5318aaa1d3c9e612a9f1264f372323c8c0f19875b5fc3b3fd3afcc1e5bec527aa94bfa85bffc157e4245aebda05389a5357b75115ac94f074aefcd
+ e = 65537
+
+ self.assertEqual(
+ ohdave_rsa_encrypt(b'aa111222', e, N),
+ '726664bd9a23fd0c70f9f1b84aab5e3905ce1e45a584e9cbcf9bcc7510338fc1986d6c599ff990d923aa43c51c0d9013cd572e13bc58f4ae48f2ed8c0b0ba881')
+
+ def test_pkcs1pad(self):
+ data = [1, 2, 3]
+ padded_data = pkcs1pad(data, 32)
+ self.assertEqual(padded_data[:2], [0, 2])
+ self.assertEqual(padded_data[28:], [0, 1, 2, 3])
+
+ self.assertRaises(ValueError, pkcs1pad, data, 8)
+
+ def test_encode_base_n(self):
+ self.assertEqual(encode_base_n(0, 30), '0')
+ self.assertEqual(encode_base_n(80, 30), '2k')
+
+ custom_table = '9876543210ZYXWVUTSRQPONMLKJIHGFEDCBA'
+ self.assertEqual(encode_base_n(0, 30, custom_table), '9')
+ self.assertEqual(encode_base_n(80, 30, custom_table), '7P')
+
+ self.assertRaises(ValueError, encode_base_n, 0, 70)
+ self.assertRaises(ValueError, encode_base_n, 0, 60, custom_table)
+
+ def test_caesar(self):
+ self.assertEqual(caesar('ace', 'abcdef', 2), 'cea')
+ self.assertEqual(caesar('cea', 'abcdef', -2), 'ace')
+ self.assertEqual(caesar('ace', 'abcdef', -2), 'eac')
+ self.assertEqual(caesar('eac', 'abcdef', 2), 'ace')
+ self.assertEqual(caesar('ace', 'abcdef', 0), 'ace')
+ self.assertEqual(caesar('xyz', 'abcdef', 2), 'xyz')
+ self.assertEqual(caesar('abc', 'acegik', 2), 'ebg')
+ self.assertEqual(caesar('ebg', 'acegik', -2), 'abc')
+
+ def test_rot47(self):
+ self.assertEqual(rot47('yt-dlp'), r'JE\5=A')
+ self.assertEqual(rot47('YT-DLP'), r'*%\s{!')
+
+ def test_urshift(self):
+ self.assertEqual(urshift(3, 1), 1)
+ self.assertEqual(urshift(-3, 1), 2147483646)
+
+ GET_ELEMENT_BY_CLASS_TEST_STRING = '''
+ <span class="foo bar">nice</span>
+ '''
+
+ def test_get_element_by_class(self):
+ html = self.GET_ELEMENT_BY_CLASS_TEST_STRING
+
+ self.assertEqual(get_element_by_class('foo', html), 'nice')
+ self.assertEqual(get_element_by_class('no-such-class', html), None)
+
+ def test_get_element_html_by_class(self):
+ html = self.GET_ELEMENT_BY_CLASS_TEST_STRING
+
+ self.assertEqual(get_element_html_by_class('foo', html), html.strip())
+ self.assertEqual(get_element_by_class('no-such-class', html), None)
+
+ GET_ELEMENT_BY_ATTRIBUTE_TEST_STRING = '''
+ <div itemprop="author" itemscope>foo</div>
+ '''
+
+ def test_get_element_by_attribute(self):
+ html = self.GET_ELEMENT_BY_CLASS_TEST_STRING
+
+ self.assertEqual(get_element_by_attribute('class', 'foo bar', html), 'nice')
+ self.assertEqual(get_element_by_attribute('class', 'foo', html), None)
+ self.assertEqual(get_element_by_attribute('class', 'no-such-foo', html), None)
+
+ html = self.GET_ELEMENT_BY_ATTRIBUTE_TEST_STRING
+
+ self.assertEqual(get_element_by_attribute('itemprop', 'author', html), 'foo')
+
+ def test_get_element_html_by_attribute(self):
+ html = self.GET_ELEMENT_BY_CLASS_TEST_STRING
+
+ self.assertEqual(get_element_html_by_attribute('class', 'foo bar', html), html.strip())
+ self.assertEqual(get_element_html_by_attribute('class', 'foo', html), None)
+ self.assertEqual(get_element_html_by_attribute('class', 'no-such-foo', html), None)
+
+ html = self.GET_ELEMENT_BY_ATTRIBUTE_TEST_STRING
+
+ self.assertEqual(get_element_html_by_attribute('itemprop', 'author', html), html.strip())
+
+ GET_ELEMENTS_BY_CLASS_TEST_STRING = '''
+ <span class="foo bar">nice</span><span class="foo bar">also nice</span>
+ '''
+ GET_ELEMENTS_BY_CLASS_RES = ['<span class="foo bar">nice</span>', '<span class="foo bar">also nice</span>']
+
+ def test_get_elements_by_class(self):
+ html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING
+
+ self.assertEqual(get_elements_by_class('foo', html), ['nice', 'also nice'])
+ self.assertEqual(get_elements_by_class('no-such-class', html), [])
+
+ def test_get_elements_html_by_class(self):
+ html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING
+
+ self.assertEqual(get_elements_html_by_class('foo', html), self.GET_ELEMENTS_BY_CLASS_RES)
+ self.assertEqual(get_elements_html_by_class('no-such-class', html), [])
+
+ def test_get_elements_by_attribute(self):
+ html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING
+
+ self.assertEqual(get_elements_by_attribute('class', 'foo bar', html), ['nice', 'also nice'])
+ self.assertEqual(get_elements_by_attribute('class', 'foo', html), [])
+ self.assertEqual(get_elements_by_attribute('class', 'no-such-foo', html), [])
+
+ def test_get_elements_html_by_attribute(self):
+ html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING
+
+ self.assertEqual(get_elements_html_by_attribute('class', 'foo bar', html), self.GET_ELEMENTS_BY_CLASS_RES)
+ self.assertEqual(get_elements_html_by_attribute('class', 'foo', html), [])
+ self.assertEqual(get_elements_html_by_attribute('class', 'no-such-foo', html), [])
+
+ def test_get_elements_text_and_html_by_attribute(self):
+ html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING
+
+ self.assertEqual(
+ list(get_elements_text_and_html_by_attribute('class', 'foo bar', html)),
+ list(zip(['nice', 'also nice'], self.GET_ELEMENTS_BY_CLASS_RES)))
+ self.assertEqual(list(get_elements_text_and_html_by_attribute('class', 'foo', html)), [])
+ self.assertEqual(list(get_elements_text_and_html_by_attribute('class', 'no-such-foo', html)), [])
+
+ self.assertEqual(list(get_elements_text_and_html_by_attribute(
+ 'class', 'foo', '<a class="foo">nice</a><span class="foo">nice</span>', tag='a')), [('nice', '<a class="foo">nice</a>')])
+
+ GET_ELEMENT_BY_TAG_TEST_STRING = '''
+ random text lorem ipsum</p>
+ <div>
+ this should be returned
+ <span>this should also be returned</span>
+ <div>
+ this should also be returned
+ </div>
+ closing tag above should not trick, so this should also be returned
+ </div>
+ but this text should not be returned
+ '''
+ GET_ELEMENT_BY_TAG_RES_OUTERDIV_HTML = GET_ELEMENT_BY_TAG_TEST_STRING.strip()[32:276]
+ GET_ELEMENT_BY_TAG_RES_OUTERDIV_TEXT = GET_ELEMENT_BY_TAG_RES_OUTERDIV_HTML[5:-6]
+ GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML = GET_ELEMENT_BY_TAG_TEST_STRING.strip()[78:119]
+ GET_ELEMENT_BY_TAG_RES_INNERSPAN_TEXT = GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML[6:-7]
+
+ def test_get_element_text_and_html_by_tag(self):
+ html = self.GET_ELEMENT_BY_TAG_TEST_STRING
+
+ self.assertEqual(
+ get_element_text_and_html_by_tag('div', html),
+ (self.GET_ELEMENT_BY_TAG_RES_OUTERDIV_TEXT, self.GET_ELEMENT_BY_TAG_RES_OUTERDIV_HTML))
+ self.assertEqual(
+ get_element_text_and_html_by_tag('span', html),
+ (self.GET_ELEMENT_BY_TAG_RES_INNERSPAN_TEXT, self.GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML))
+ self.assertRaises(compat_HTMLParseError, get_element_text_and_html_by_tag, 'article', html)
+
+ def test_iri_to_uri(self):
+ self.assertEqual(
+ iri_to_uri('https://www.google.com/search?q=foo&ie=utf-8&oe=utf-8&client=firefox-b'),
+ 'https://www.google.com/search?q=foo&ie=utf-8&oe=utf-8&client=firefox-b') # Same
+ self.assertEqual(
+ iri_to_uri('https://www.google.com/search?q=Käsesoßenrührlöffel'), # German for cheese sauce stirring spoon
+ 'https://www.google.com/search?q=K%C3%A4seso%C3%9Fenr%C3%BChrl%C3%B6ffel')
+ self.assertEqual(
+ iri_to_uri('https://www.google.com/search?q=lt<+gt>+eq%3D+amp%26+percent%25+hash%23+colon%3A+tilde~#trash=?&garbage=#'),
+ 'https://www.google.com/search?q=lt%3C+gt%3E+eq%3D+amp%26+percent%25+hash%23+colon%3A+tilde~#trash=?&garbage=#')
+ self.assertEqual(
+ iri_to_uri('http://правозащита38.рф/category/news/'),
+ 'http://xn--38-6kcaak9aj5chl4a3g.xn--p1ai/category/news/')
+ self.assertEqual(
+ iri_to_uri('http://www.правозащита38.рф/category/news/'),
+ 'http://www.xn--38-6kcaak9aj5chl4a3g.xn--p1ai/category/news/')
+ self.assertEqual(
+ iri_to_uri('https://i❤.ws/emojidomain/👍👏🤝💪'),
+ 'https://xn--i-7iq.ws/emojidomain/%F0%9F%91%8D%F0%9F%91%8F%F0%9F%A4%9D%F0%9F%92%AA')
+ self.assertEqual(
+ iri_to_uri('http://日本語.jp/'),
+ 'http://xn--wgv71a119e.jp/')
+ self.assertEqual(
+ iri_to_uri('http://导航.中国/'),
+ 'http://xn--fet810g.xn--fiqs8s/')
+
+ def test_clean_podcast_url(self):
+ self.assertEqual(clean_podcast_url('https://www.podtrac.com/pts/redirect.mp3/chtbl.com/track/5899E/traffic.megaphone.fm/HSW7835899191.mp3'), 'https://traffic.megaphone.fm/HSW7835899191.mp3')
+ self.assertEqual(clean_podcast_url('https://play.podtrac.com/npr-344098539/edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3'), 'https://edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3')
+ self.assertEqual(clean_podcast_url('https://pdst.fm/e/2.gum.fm/chtbl.com/track/chrt.fm/track/34D33/pscrb.fm/rss/p/traffic.megaphone.fm/ITLLC7765286967.mp3?updated=1687282661'), 'https://traffic.megaphone.fm/ITLLC7765286967.mp3?updated=1687282661')
+ self.assertEqual(clean_podcast_url('https://pdst.fm/e/https://mgln.ai/e/441/www.buzzsprout.com/1121972/13019085-ep-252-the-deep-life-stack.mp3'), 'https://www.buzzsprout.com/1121972/13019085-ep-252-the-deep-life-stack.mp3')
+
+ def test_LazyList(self):
+ it = list(range(10))
+
+ self.assertEqual(list(LazyList(it)), it)
+ self.assertEqual(LazyList(it).exhaust(), it)
+ self.assertEqual(LazyList(it)[5], it[5])
+
+ self.assertEqual(LazyList(it)[5:], it[5:])
+ self.assertEqual(LazyList(it)[:5], it[:5])
+ self.assertEqual(LazyList(it)[::2], it[::2])
+ self.assertEqual(LazyList(it)[1::2], it[1::2])
+ self.assertEqual(LazyList(it)[5::-1], it[5::-1])
+ self.assertEqual(LazyList(it)[6:2:-2], it[6:2:-2])
+ self.assertEqual(LazyList(it)[::-1], it[::-1])
+
+ self.assertTrue(LazyList(it))
+ self.assertFalse(LazyList(range(0)))
+ self.assertEqual(len(LazyList(it)), len(it))
+ self.assertEqual(repr(LazyList(it)), repr(it))
+ self.assertEqual(str(LazyList(it)), str(it))
+
+ self.assertEqual(list(LazyList(it, reverse=True)), it[::-1])
+ self.assertEqual(list(reversed(LazyList(it))[::-1]), it)
+ self.assertEqual(list(reversed(LazyList(it))[1:3:7]), it[::-1][1:3:7])
+
+ def test_LazyList_laziness(self):
+
+ def test(ll, idx, val, cache):
+ self.assertEqual(ll[idx], val)
+ self.assertEqual(ll._cache, list(cache))
+
+ ll = LazyList(range(10))
+ test(ll, 0, 0, range(1))
+ test(ll, 5, 5, range(6))
+ test(ll, -3, 7, range(10))
+
+ ll = LazyList(range(10), reverse=True)
+ test(ll, -1, 0, range(1))
+ test(ll, 3, 6, range(10))
+
+ ll = LazyList(itertools.count())
+ test(ll, 10, 10, range(11))
+ ll = reversed(ll)
+ test(ll, -15, 14, range(15))
+
+ def test_format_bytes(self):
+ self.assertEqual(format_bytes(0), '0.00B')
+ self.assertEqual(format_bytes(1000), '1000.00B')
+ self.assertEqual(format_bytes(1024), '1.00KiB')
+ self.assertEqual(format_bytes(1024**2), '1.00MiB')
+ self.assertEqual(format_bytes(1024**3), '1.00GiB')
+ self.assertEqual(format_bytes(1024**4), '1.00TiB')
+ self.assertEqual(format_bytes(1024**5), '1.00PiB')
+ self.assertEqual(format_bytes(1024**6), '1.00EiB')
+ self.assertEqual(format_bytes(1024**7), '1.00ZiB')
+ self.assertEqual(format_bytes(1024**8), '1.00YiB')
+ self.assertEqual(format_bytes(1024**9), '1024.00YiB')
+
+ def test_hide_login_info(self):
+ self.assertEqual(Config.hide_login_info(['-u', 'foo', '-p', 'bar']),
+ ['-u', 'PRIVATE', '-p', 'PRIVATE'])
+ self.assertEqual(Config.hide_login_info(['-u']), ['-u'])
+ self.assertEqual(Config.hide_login_info(['-u', 'foo', '-u', 'bar']),
+ ['-u', 'PRIVATE', '-u', 'PRIVATE'])
+ self.assertEqual(Config.hide_login_info(['--username=foo']),
+ ['--username=PRIVATE'])
+
+ def test_locked_file(self):
+ TEXT = 'test_locked_file\n'
+ FILE = 'test_locked_file.ytdl'
+ MODES = 'war' # Order is important
+
+ try:
+ for lock_mode in MODES:
+ with locked_file(FILE, lock_mode, False) as f:
+ if lock_mode == 'r':
+ self.assertEqual(f.read(), TEXT * 2, 'Wrong file content')
+ else:
+ f.write(TEXT)
+ for test_mode in MODES:
+ testing_write = test_mode != 'r'
+ try:
+ with locked_file(FILE, test_mode, False):
+ pass
+ except (BlockingIOError, PermissionError):
+ if not testing_write: # FIXME
+ print(f'Known issue: Exclusive lock ({lock_mode}) blocks read access ({test_mode})')
+ continue
+ self.assertTrue(testing_write, f'{test_mode} is blocked by {lock_mode}')
+ else:
+ self.assertFalse(testing_write, f'{test_mode} is not blocked by {lock_mode}')
+ finally:
+ with contextlib.suppress(OSError):
+ os.remove(FILE)
+
+ def test_determine_file_encoding(self):
+ self.assertEqual(determine_file_encoding(b''), (None, 0))
+ self.assertEqual(determine_file_encoding(b'--verbose -x --audio-format mkv\n'), (None, 0))
+
+ self.assertEqual(determine_file_encoding(b'\xef\xbb\xbf'), ('utf-8', 3))
+ self.assertEqual(determine_file_encoding(b'\x00\x00\xfe\xff'), ('utf-32-be', 4))
+ self.assertEqual(determine_file_encoding(b'\xff\xfe'), ('utf-16-le', 2))
+
+ self.assertEqual(determine_file_encoding(b'\xff\xfe# coding: utf-8\n--verbose'), ('utf-16-le', 2))
+
+ self.assertEqual(determine_file_encoding(b'# coding: utf-8\n--verbose'), ('utf-8', 0))
+ self.assertEqual(determine_file_encoding(b'# coding: someencodinghere-12345\n--verbose'), ('someencodinghere-12345', 0))
+
+ self.assertEqual(determine_file_encoding(b'#coding:utf-8\n--verbose'), ('utf-8', 0))
+ self.assertEqual(determine_file_encoding(b'# coding: utf-8 \r\n--verbose'), ('utf-8', 0))
+
+ self.assertEqual(determine_file_encoding('# coding: utf-32-be'.encode('utf-32-be')), ('utf-32-be', 0))
+ self.assertEqual(determine_file_encoding('# coding: utf-16-le'.encode('utf-16-le')), ('utf-16-le', 0))
+
+ def test_get_compatible_ext(self):
+ self.assertEqual(get_compatible_ext(
+ vcodecs=[None], acodecs=[None, None], vexts=['mp4'], aexts=['m4a', 'm4a']), 'mkv')
+ self.assertEqual(get_compatible_ext(
+ vcodecs=[None], acodecs=[None], vexts=['flv'], aexts=['flv']), 'flv')
+
+ self.assertEqual(get_compatible_ext(
+ vcodecs=[None], acodecs=[None], vexts=['mp4'], aexts=['m4a']), 'mp4')
+ self.assertEqual(get_compatible_ext(
+ vcodecs=[None], acodecs=[None], vexts=['mp4'], aexts=['webm']), 'mkv')
+ self.assertEqual(get_compatible_ext(
+ vcodecs=[None], acodecs=[None], vexts=['webm'], aexts=['m4a']), 'mkv')
+ self.assertEqual(get_compatible_ext(
+ vcodecs=[None], acodecs=[None], vexts=['webm'], aexts=['webm']), 'webm')
+ self.assertEqual(get_compatible_ext(
+ vcodecs=[None], acodecs=[None], vexts=['webm'], aexts=['weba']), 'webm')
+
+ self.assertEqual(get_compatible_ext(
+ vcodecs=['h264'], acodecs=['mp4a'], vexts=['mov'], aexts=['m4a']), 'mp4')
+ self.assertEqual(get_compatible_ext(
+ vcodecs=['av01.0.12M.08'], acodecs=['opus'], vexts=['mp4'], aexts=['webm']), 'webm')
+
+ self.assertEqual(get_compatible_ext(
+ vcodecs=['vp9'], acodecs=['opus'], vexts=['webm'], aexts=['webm'], preferences=['flv', 'mp4']), 'mp4')
+ self.assertEqual(get_compatible_ext(
+ vcodecs=['av1'], acodecs=['mp4a'], vexts=['webm'], aexts=['m4a'], preferences=('webm', 'mkv')), 'mkv')
+
+ def test_try_call(self):
+ def total(*x, **kwargs):
+ return sum(x) + sum(kwargs.values())
+
+ self.assertEqual(try_call(None), None,
+ msg='not a fn should give None')
+ self.assertEqual(try_call(lambda: 1), 1,
+ msg='int fn with no expected_type should give int')
+ self.assertEqual(try_call(lambda: 1, expected_type=int), 1,
+ msg='int fn with expected_type int should give int')
+ self.assertEqual(try_call(lambda: 1, expected_type=dict), None,
+ msg='int fn with wrong expected_type should give None')
+ self.assertEqual(try_call(total, args=(0, 1, 0, ), expected_type=int), 1,
+ msg='fn should accept arglist')
+ self.assertEqual(try_call(total, kwargs={'a': 0, 'b': 1, 'c': 0}, expected_type=int), 1,
+ msg='fn should accept kwargs')
+ self.assertEqual(try_call(lambda: 1, expected_type=dict), None,
+ msg='int fn with no expected_type should give None')
+ self.assertEqual(try_call(lambda x: {}, total, args=(42, ), expected_type=int), 42,
+ msg='expect first int result with expected_type int')
+
+ def test_variadic(self):
+ self.assertEqual(variadic(None), (None, ))
+ self.assertEqual(variadic('spam'), ('spam', ))
+ self.assertEqual(variadic('spam', allowed_types=dict), 'spam')
+ with warnings.catch_warnings():
+ warnings.simplefilter('ignore')
+ self.assertEqual(variadic('spam', allowed_types=[dict]), 'spam')
+
+ def test_traverse_obj(self):
+ _TEST_DATA = {
+ 100: 100,
+ 1.2: 1.2,
+ 'str': 'str',
+ 'None': None,
+ '...': ...,
+ 'urls': [
+ {'index': 0, 'url': 'https://www.example.com/0'},
+ {'index': 1, 'url': 'https://www.example.com/1'},
+ ],
+ 'data': (
+ {'index': 2},
+ {'index': 3},
+ ),
+ 'dict': {},
+ }
+
+ # Test base functionality
+ self.assertEqual(traverse_obj(_TEST_DATA, ('str',)), 'str',
+ msg='allow tuple path')
+ self.assertEqual(traverse_obj(_TEST_DATA, ['str']), 'str',
+ msg='allow list path')
+ self.assertEqual(traverse_obj(_TEST_DATA, (value for value in ("str",))), 'str',
+ msg='allow iterable path')
+ self.assertEqual(traverse_obj(_TEST_DATA, 'str'), 'str',
+ msg='single items should be treated as a path')
+ self.assertEqual(traverse_obj(_TEST_DATA, None), _TEST_DATA)
+ self.assertEqual(traverse_obj(_TEST_DATA, 100), 100)
+ self.assertEqual(traverse_obj(_TEST_DATA, 1.2), 1.2)
+
+ # Test Ellipsis behavior
+ self.assertCountEqual(traverse_obj(_TEST_DATA, ...),
+ (item for item in _TEST_DATA.values() if item not in (None, {})),
+ msg='`...` should give all non discarded values')
+ self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', 0, ...)), _TEST_DATA['urls'][0].values(),
+ msg='`...` selection for dicts should select all values')
+ self.assertEqual(traverse_obj(_TEST_DATA, (..., ..., 'url')),
+ ['https://www.example.com/0', 'https://www.example.com/1'],
+ msg='nested `...` queries should work')
+ self.assertCountEqual(traverse_obj(_TEST_DATA, (..., ..., 'index')), range(4),
+ msg='`...` query result should be flattened')
+ self.assertEqual(traverse_obj(iter(range(4)), ...), list(range(4)),
+ msg='`...` should accept iterables')
+
+ # Test function as key
+ self.assertEqual(traverse_obj(_TEST_DATA, lambda x, y: x == 'urls' and isinstance(y, list)),
+ [_TEST_DATA['urls']],
+ msg='function as query key should perform a filter based on (key, value)')
+ self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], str)), {'str'},
+ msg='exceptions in the query function should be catched')
+ self.assertEqual(traverse_obj(iter(range(4)), lambda _, x: x % 2 == 0), [0, 2],
+ msg='function key should accept iterables')
+ if __debug__:
+ with self.assertRaises(Exception, msg='Wrong function signature should raise in debug'):
+ traverse_obj(_TEST_DATA, lambda a: ...)
+ with self.assertRaises(Exception, msg='Wrong function signature should raise in debug'):
+ traverse_obj(_TEST_DATA, lambda a, b, c: ...)
+
+ # Test set as key (transformation/type, like `expected_type`)
+ self.assertEqual(traverse_obj(_TEST_DATA, (..., {str.upper}, )), ['STR'],
+ msg='Function in set should be a transformation')
+ self.assertEqual(traverse_obj(_TEST_DATA, (..., {str})), ['str'],
+ msg='Type in set should be a type filter')
+ self.assertEqual(traverse_obj(_TEST_DATA, {dict}), _TEST_DATA,
+ msg='A single set should be wrapped into a path')
+ self.assertEqual(traverse_obj(_TEST_DATA, (..., {str.upper})), ['STR'],
+ msg='Transformation function should not raise')
+ self.assertEqual(traverse_obj(_TEST_DATA, (..., {str_or_none})),
+ [item for item in map(str_or_none, _TEST_DATA.values()) if item is not None],
+ msg='Function in set should be a transformation')
+ self.assertEqual(traverse_obj(_TEST_DATA, ('fail', {lambda _: 'const'})), 'const',
+ msg='Function in set should always be called')
+ if __debug__:
+ with self.assertRaises(Exception, msg='Sets with length != 1 should raise in debug'):
+ traverse_obj(_TEST_DATA, set())
+ with self.assertRaises(Exception, msg='Sets with length != 1 should raise in debug'):
+ traverse_obj(_TEST_DATA, {str.upper, str})
+
+ # Test `slice` as a key
+ _SLICE_DATA = [0, 1, 2, 3, 4]
+ self.assertEqual(traverse_obj(_TEST_DATA, ('dict', slice(1))), None,
+ msg='slice on a dictionary should not throw')
+ self.assertEqual(traverse_obj(_SLICE_DATA, slice(1)), _SLICE_DATA[:1],
+ msg='slice key should apply slice to sequence')
+ self.assertEqual(traverse_obj(_SLICE_DATA, slice(1, 2)), _SLICE_DATA[1:2],
+ msg='slice key should apply slice to sequence')
+ self.assertEqual(traverse_obj(_SLICE_DATA, slice(1, 4, 2)), _SLICE_DATA[1:4:2],
+ msg='slice key should apply slice to sequence')
+
+ # Test alternative paths
+ self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'str'), 'str',
+ msg='multiple `paths` should be treated as alternative paths')
+ self.assertEqual(traverse_obj(_TEST_DATA, 'str', 100), 'str',
+ msg='alternatives should exit early')
+ self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'fail'), None,
+ msg='alternatives should return `default` if exhausted')
+ self.assertEqual(traverse_obj(_TEST_DATA, (..., 'fail'), 100), 100,
+ msg='alternatives should track their own branching return')
+ self.assertEqual(traverse_obj(_TEST_DATA, ('dict', ...), ('data', ...)), list(_TEST_DATA['data']),
+ msg='alternatives on empty objects should search further')
+
+ # Test branch and path nesting
+ self.assertEqual(traverse_obj(_TEST_DATA, ('urls', (3, 0), 'url')), ['https://www.example.com/0'],
+ msg='tuple as key should be treated as branches')
+ self.assertEqual(traverse_obj(_TEST_DATA, ('urls', [3, 0], 'url')), ['https://www.example.com/0'],
+ msg='list as key should be treated as branches')
+ self.assertEqual(traverse_obj(_TEST_DATA, ('urls', ((1, 'fail'), (0, 'url')))), ['https://www.example.com/0'],
+ msg='double nesting in path should be treated as paths')
+ self.assertEqual(traverse_obj(['0', [1, 2]], [(0, 1), 0]), [1],
+ msg='do not fail early on branching')
+ self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', ((1, ('fail', 'url')), (0, 'url')))),
+ ['https://www.example.com/0', 'https://www.example.com/1'],
+ msg='tripple nesting in path should be treated as branches')
+ self.assertEqual(traverse_obj(_TEST_DATA, ('urls', ('fail', (..., 'url')))),
+ ['https://www.example.com/0', 'https://www.example.com/1'],
+ msg='ellipsis as branch path start gets flattened')
+
+ # Test dictionary as key
+ self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2}), {0: 100, 1: 1.2},
+ msg='dict key should result in a dict with the same keys')
+ self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', 0, 'url')}),
+ {0: 'https://www.example.com/0'},
+ msg='dict key should allow paths')
+ self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', (3, 0), 'url')}),
+ {0: ['https://www.example.com/0']},
+ msg='tuple in dict path should be treated as branches')
+ self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', ((1, 'fail'), (0, 'url')))}),
+ {0: ['https://www.example.com/0']},
+ msg='double nesting in dict path should be treated as paths')
+ self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', ((1, ('fail', 'url')), (0, 'url')))}),
+ {0: ['https://www.example.com/1', 'https://www.example.com/0']},
+ msg='tripple nesting in dict path should be treated as branches')
+ self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}), {},
+ msg='remove `None` values when top level dict key fails')
+ self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}, default=...), {0: ...},
+ msg='use `default` if key fails and `default`')
+ self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}), {},
+ msg='remove empty values when dict key')
+ self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}, default=...), {0: ...},
+ msg='use `default` when dict key and `default`')
+ self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 'fail'}}), {},
+ msg='remove empty values when nested dict key fails')
+ self.assertEqual(traverse_obj(None, {0: 'fail'}), {},
+ msg='default to dict if pruned')
+ self.assertEqual(traverse_obj(None, {0: 'fail'}, default=...), {0: ...},
+ msg='default to dict if pruned and default is given')
+ self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 'fail'}}, default=...), {0: {0: ...}},
+ msg='use nested `default` when nested dict key fails and `default`')
+ self.assertEqual(traverse_obj(_TEST_DATA, {0: ('dict', ...)}), {},
+ msg='remove key if branch in dict key not successful')
+
+ # Testing default parameter behavior
+ _DEFAULT_DATA = {'None': None, 'int': 0, 'list': []}
+ self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail'), None,
+ msg='default value should be `None`')
+ self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail', 'fail', default=...), ...,
+ msg='chained fails should result in default')
+ self.assertEqual(traverse_obj(_DEFAULT_DATA, 'None', 'int'), 0,
+ msg='should not short cirquit on `None`')
+ self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail', default=1), 1,
+ msg='invalid dict key should result in `default`')
+ self.assertEqual(traverse_obj(_DEFAULT_DATA, 'None', default=1), 1,
+ msg='`None` is a deliberate sentinel and should become `default`')
+ self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', 10)), None,
+ msg='`IndexError` should result in `default`')
+ self.assertEqual(traverse_obj(_DEFAULT_DATA, (..., 'fail'), default=1), 1,
+ msg='if branched but not successful return `default` if defined, not `[]`')
+ self.assertEqual(traverse_obj(_DEFAULT_DATA, (..., 'fail'), default=None), None,
+ msg='if branched but not successful return `default` even if `default` is `None`')
+ self.assertEqual(traverse_obj(_DEFAULT_DATA, (..., 'fail')), [],
+ msg='if branched but not successful return `[]`, not `default`')
+ self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', ...)), [],
+ msg='if branched but object is empty return `[]`, not `default`')
+ self.assertEqual(traverse_obj(None, ...), [],
+ msg='if branched but object is `None` return `[]`, not `default`')
+ self.assertEqual(traverse_obj({0: None}, (0, ...)), [],
+ msg='if branched but state is `None` return `[]`, not `default`')
+
+ branching_paths = [
+ ('fail', ...),
+ (..., 'fail'),
+ 100 * ('fail',) + (...,),
+ (...,) + 100 * ('fail',),
+ ]
+ for branching_path in branching_paths:
+ self.assertEqual(traverse_obj({}, branching_path), [],
+ msg='if branched but state is `None`, return `[]` (not `default`)')
+ self.assertEqual(traverse_obj({}, 'fail', branching_path), [],
+ msg='if branching in last alternative and previous did not match, return `[]` (not `default`)')
+ self.assertEqual(traverse_obj({0: 'x'}, 0, branching_path), 'x',
+ msg='if branching in last alternative and previous did match, return single value')
+ self.assertEqual(traverse_obj({0: 'x'}, branching_path, 0), 'x',
+ msg='if branching in first alternative and non-branching path does match, return single value')
+ self.assertEqual(traverse_obj({}, branching_path, 'fail'), None,
+ msg='if branching in first alternative and non-branching path does not match, return `default`')
+
+ # Testing expected_type behavior
+ _EXPECTED_TYPE_DATA = {'str': 'str', 'int': 0}
+ self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=str),
+ 'str', msg='accept matching `expected_type` type')
+ self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=int),
+ None, msg='reject non matching `expected_type` type')
+ self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'int', expected_type=lambda x: str(x)),
+ '0', msg='transform type using type function')
+ self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=lambda _: 1 / 0),
+ None, msg='wrap expected_type fuction in try_call')
+ self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, ..., expected_type=str),
+ ['str'], msg='eliminate items that expected_type fails on')
+ self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2}, expected_type=int),
+ {0: 100}, msg='type as expected_type should filter dict values')
+ self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2, 2: 'None'}, expected_type=str_or_none),
+ {0: '100', 1: '1.2'}, msg='function as expected_type should transform dict values')
+ self.assertEqual(traverse_obj(_TEST_DATA, ({0: 1.2}, 0, {int_or_none}), expected_type=int),
+ 1, msg='expected_type should not filter non final dict values')
+ self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 100, 1: 'str'}}, expected_type=int),
+ {0: {0: 100}}, msg='expected_type should transform deep dict values')
+ self.assertEqual(traverse_obj(_TEST_DATA, [({0: '...'}, {0: '...'})], expected_type=type(...)),
+ [{0: ...}, {0: ...}], msg='expected_type should transform branched dict values')
+ self.assertEqual(traverse_obj({1: {3: 4}}, [(1, 2), 3], expected_type=int),
+ [4], msg='expected_type regression for type matching in tuple branching')
+ self.assertEqual(traverse_obj(_TEST_DATA, ['data', ...], expected_type=int),
+ [], msg='expected_type regression for type matching in dict result')
+
+ # Test get_all behavior
+ _GET_ALL_DATA = {'key': [0, 1, 2]}
+ self.assertEqual(traverse_obj(_GET_ALL_DATA, ('key', ...), get_all=False), 0,
+ msg='if not `get_all`, return only first matching value')
+ self.assertEqual(traverse_obj(_GET_ALL_DATA, ..., get_all=False), [0, 1, 2],
+ msg='do not overflatten if not `get_all`')
+
+ # Test casesense behavior
+ _CASESENSE_DATA = {
+ 'KeY': 'value0',
+ 0: {
+ 'KeY': 'value1',
+ 0: {'KeY': 'value2'},
+ },
+ }
+ self.assertEqual(traverse_obj(_CASESENSE_DATA, 'key'), None,
+ msg='dict keys should be case sensitive unless `casesense`')
+ self.assertEqual(traverse_obj(_CASESENSE_DATA, 'keY',
+ casesense=False), 'value0',
+ msg='allow non matching key case if `casesense`')
+ self.assertEqual(traverse_obj(_CASESENSE_DATA, (0, ('keY',)),
+ casesense=False), ['value1'],
+ msg='allow non matching key case in branch if `casesense`')
+ self.assertEqual(traverse_obj(_CASESENSE_DATA, (0, ((0, 'keY'),)),
+ casesense=False), ['value2'],
+ msg='allow non matching key case in branch path if `casesense`')
+
+ # Test traverse_string behavior
+ _TRAVERSE_STRING_DATA = {'str': 'str', 1.2: 1.2}
+ self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0)), None,
+ msg='do not traverse into string if not `traverse_string`')
+ self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0),
+ traverse_string=True), 's',
+ msg='traverse into string if `traverse_string`')
+ self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, (1.2, 1),
+ traverse_string=True), '.',
+ msg='traverse into converted data if `traverse_string`')
+ self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', ...),
+ traverse_string=True), 'str',
+ msg='`...` should result in string (same value) if `traverse_string`')
+ self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', slice(0, None, 2)),
+ traverse_string=True), 'sr',
+ msg='`slice` should result in string if `traverse_string`')
+ self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', lambda i, v: i or v == "s"),
+ traverse_string=True), 'str',
+ msg='function should result in string if `traverse_string`')
+ self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', (0, 2)),
+ traverse_string=True), ['s', 'r'],
+ msg='branching should result in list if `traverse_string`')
+ self.assertEqual(traverse_obj({}, (0, ...), traverse_string=True), [],
+ msg='branching should result in list if `traverse_string`')
+ self.assertEqual(traverse_obj({}, (0, lambda x, y: True), traverse_string=True), [],
+ msg='branching should result in list if `traverse_string`')
+ self.assertEqual(traverse_obj({}, (0, slice(1)), traverse_string=True), [],
+ msg='branching should result in list if `traverse_string`')
+
+ # Test re.Match as input obj
+ mobj = re.fullmatch(r'0(12)(?P<group>3)(4)?', '0123')
+ self.assertEqual(traverse_obj(mobj, ...), [x for x in mobj.groups() if x is not None],
+ msg='`...` on a `re.Match` should give its `groups()`')
+ self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 2)), ['0123', '3'],
+ msg='function on a `re.Match` should give groupno, value starting at 0')
+ self.assertEqual(traverse_obj(mobj, 'group'), '3',
+ msg='str key on a `re.Match` should give group with that name')
+ self.assertEqual(traverse_obj(mobj, 2), '3',
+ msg='int key on a `re.Match` should give group with that name')
+ self.assertEqual(traverse_obj(mobj, 'gRoUp', casesense=False), '3',
+ msg='str key on a `re.Match` should respect casesense')
+ self.assertEqual(traverse_obj(mobj, 'fail'), None,
+ msg='failing str key on a `re.Match` should return `default`')
+ self.assertEqual(traverse_obj(mobj, 'gRoUpS', casesense=False), None,
+ msg='failing str key on a `re.Match` should return `default`')
+ self.assertEqual(traverse_obj(mobj, 8), None,
+ msg='failing int key on a `re.Match` should return `default`')
+ self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 'group')), ['0123', '3'],
+ msg='function on a `re.Match` should give group name as well')
+
+ # Test xml.etree.ElementTree.Element as input obj
+ etree = xml.etree.ElementTree.fromstring('''<?xml version="1.0"?>
+ <data>
+ <country name="Liechtenstein">
+ <rank>1</rank>
+ <year>2008</year>
+ <gdppc>141100</gdppc>
+ <neighbor name="Austria" direction="E"/>
+ <neighbor name="Switzerland" direction="W"/>
+ </country>
+ <country name="Singapore">
+ <rank>4</rank>
+ <year>2011</year>
+ <gdppc>59900</gdppc>
+ <neighbor name="Malaysia" direction="N"/>
+ </country>
+ <country name="Panama">
+ <rank>68</rank>
+ <year>2011</year>
+ <gdppc>13600</gdppc>
+ <neighbor name="Costa Rica" direction="W"/>
+ <neighbor name="Colombia" direction="E"/>
+ </country>
+ </data>''')
+ self.assertEqual(traverse_obj(etree, ''), etree,
+ msg='empty str key should return the element itself')
+ self.assertEqual(traverse_obj(etree, 'country'), list(etree),
+ msg='str key should lead all children with that tag name')
+ self.assertEqual(traverse_obj(etree, ...), list(etree),
+ msg='`...` as key should return all children')
+ self.assertEqual(traverse_obj(etree, lambda _, x: x[0].text == '4'), [etree[1]],
+ msg='function as key should get element as value')
+ self.assertEqual(traverse_obj(etree, lambda i, _: i == 1), [etree[1]],
+ msg='function as key should get index as key')
+ self.assertEqual(traverse_obj(etree, 0), etree[0],
+ msg='int key should return the nth child')
+ self.assertEqual(traverse_obj(etree, './/neighbor/@name'),
+ ['Austria', 'Switzerland', 'Malaysia', 'Costa Rica', 'Colombia'],
+ msg='`@<attribute>` at end of path should give that attribute')
+ self.assertEqual(traverse_obj(etree, '//neighbor/@fail'), [None, None, None, None, None],
+ msg='`@<nonexistant>` at end of path should give `None`')
+ self.assertEqual(traverse_obj(etree, ('//neighbor/@', 2)), {'name': 'Malaysia', 'direction': 'N'},
+ msg='`@` should give the full attribute dict')
+ self.assertEqual(traverse_obj(etree, '//year/text()'), ['2008', '2011', '2011'],
+ msg='`text()` at end of path should give the inner text')
+ self.assertEqual(traverse_obj(etree, '//*[@direction]/@direction'), ['E', 'W', 'N', 'W', 'E'],
+ msg='full Python xpath features should be supported')
+ self.assertEqual(traverse_obj(etree, (0, '@name')), 'Liechtenstein',
+ msg='special transformations should act on current element')
+ self.assertEqual(traverse_obj(etree, ('country', 0, ..., 'text()', {int_or_none})), [1, 2008, 141100],
+ msg='special transformations should act on current element')
+
+ def test_http_header_dict(self):
+ headers = HTTPHeaderDict()
+ headers['ytdl-test'] = b'0'
+ self.assertEqual(list(headers.items()), [('Ytdl-Test', '0')])
+ headers['ytdl-test'] = 1
+ self.assertEqual(list(headers.items()), [('Ytdl-Test', '1')])
+ headers['Ytdl-test'] = '2'
+ self.assertEqual(list(headers.items()), [('Ytdl-Test', '2')])
+ self.assertTrue('ytDl-Test' in headers)
+ self.assertEqual(str(headers), str(dict(headers)))
+ self.assertEqual(repr(headers), str(dict(headers)))
+
+ headers.update({'X-dlp': 'data'})
+ self.assertEqual(set(headers.items()), {('Ytdl-Test', '2'), ('X-Dlp', 'data')})
+ self.assertEqual(dict(headers), {'Ytdl-Test': '2', 'X-Dlp': 'data'})
+ self.assertEqual(len(headers), 2)
+ self.assertEqual(headers.copy(), headers)
+ headers2 = HTTPHeaderDict({'X-dlp': 'data3'}, **headers, **{'X-dlp': 'data2'})
+ self.assertEqual(set(headers2.items()), {('Ytdl-Test', '2'), ('X-Dlp', 'data2')})
+ self.assertEqual(len(headers2), 2)
+ headers2.clear()
+ self.assertEqual(len(headers2), 0)
+
+ # ensure we prefer latter headers
+ headers3 = HTTPHeaderDict({'Ytdl-TeSt': 1}, {'Ytdl-test': 2})
+ self.assertEqual(set(headers3.items()), {('Ytdl-Test', '2')})
+ del headers3['ytdl-tesT']
+ self.assertEqual(dict(headers3), {})
+
+ headers4 = HTTPHeaderDict({'ytdl-test': 'data;'})
+ self.assertEqual(set(headers4.items()), {('Ytdl-Test', 'data;')})
+
+ # common mistake: strip whitespace from values
+ # https://github.com/yt-dlp/yt-dlp/issues/8729
+ headers5 = HTTPHeaderDict({'ytdl-test': ' data; '})
+ self.assertEqual(set(headers5.items()), {('Ytdl-Test', 'data;')})
+
+ def test_extract_basic_auth(self):
+ assert extract_basic_auth('http://:foo.bar') == ('http://:foo.bar', None)
+ assert extract_basic_auth('http://foo.bar') == ('http://foo.bar', None)
+ assert extract_basic_auth('http://@foo.bar') == ('http://foo.bar', 'Basic Og==')
+ assert extract_basic_auth('http://:pass@foo.bar') == ('http://foo.bar', 'Basic OnBhc3M=')
+ assert extract_basic_auth('http://user:@foo.bar') == ('http://foo.bar', 'Basic dXNlcjo=')
+ assert extract_basic_auth('http://user:pass@foo.bar') == ('http://foo.bar', 'Basic dXNlcjpwYXNz')
+
+ @unittest.skipUnless(compat_os_name == 'nt', 'Only relevant on Windows')
+ def test_Popen_windows_escaping(self):
+ def run_shell(args):
+ stdout, stderr, error = Popen.run(
+ args, text=True, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ assert not stderr
+ assert not error
+ return stdout
+
+ # Test escaping
+ assert run_shell(['echo', 'test"&']) == '"test""&"\n'
+ # Test if delayed expansion is disabled
+ assert run_shell(['echo', '^!']) == '"^!"\n'
+ assert run_shell('echo "^!"') == '"^!"\n'
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_verbose_output.py b/test/test_verbose_output.py
new file mode 100644
index 0000000..21ce10a
--- /dev/null
+++ b/test/test_verbose_output.py
@@ -0,0 +1,75 @@
+#!/usr/bin/env python3
+
+# Allow direct execution
+import os
+import sys
+import unittest
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+import subprocess
+
+rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+
+
+class TestVerboseOutput(unittest.TestCase):
+ def test_private_info_arg(self):
+ outp = subprocess.Popen(
+ [
+ sys.executable, 'yt_dlp/__main__.py',
+ '-v', '--ignore-config',
+ '--username', 'johnsmith@gmail.com',
+ '--password', 'my_secret_password',
+ ], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ sout, serr = outp.communicate()
+ self.assertTrue(b'--username' in serr)
+ self.assertTrue(b'johnsmith' not in serr)
+ self.assertTrue(b'--password' in serr)
+ self.assertTrue(b'my_secret_password' not in serr)
+
+ def test_private_info_shortarg(self):
+ outp = subprocess.Popen(
+ [
+ sys.executable, 'yt_dlp/__main__.py',
+ '-v', '--ignore-config',
+ '-u', 'johnsmith@gmail.com',
+ '-p', 'my_secret_password',
+ ], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ sout, serr = outp.communicate()
+ self.assertTrue(b'-u' in serr)
+ self.assertTrue(b'johnsmith' not in serr)
+ self.assertTrue(b'-p' in serr)
+ self.assertTrue(b'my_secret_password' not in serr)
+
+ def test_private_info_eq(self):
+ outp = subprocess.Popen(
+ [
+ sys.executable, 'yt_dlp/__main__.py',
+ '-v', '--ignore-config',
+ '--username=johnsmith@gmail.com',
+ '--password=my_secret_password',
+ ], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ sout, serr = outp.communicate()
+ self.assertTrue(b'--username' in serr)
+ self.assertTrue(b'johnsmith' not in serr)
+ self.assertTrue(b'--password' in serr)
+ self.assertTrue(b'my_secret_password' not in serr)
+
+ def test_private_info_shortarg_eq(self):
+ outp = subprocess.Popen(
+ [
+ sys.executable, 'yt_dlp/__main__.py',
+ '-v', '--ignore-config',
+ '-u=johnsmith@gmail.com',
+ '-p=my_secret_password',
+ ], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ sout, serr = outp.communicate()
+ self.assertTrue(b'-u' in serr)
+ self.assertTrue(b'johnsmith' not in serr)
+ self.assertTrue(b'-p' in serr)
+ self.assertTrue(b'my_secret_password' not in serr)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_websockets.py b/test/test_websockets.py
new file mode 100644
index 0000000..13b3a1e
--- /dev/null
+++ b/test/test_websockets.py
@@ -0,0 +1,383 @@
+#!/usr/bin/env python3
+
+# Allow direct execution
+import os
+import sys
+
+import pytest
+
+from test.helper import verify_address_availability
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import http.client
+import http.cookiejar
+import http.server
+import json
+import random
+import ssl
+import threading
+
+from yt_dlp import socks
+from yt_dlp.cookies import YoutubeDLCookieJar
+from yt_dlp.dependencies import websockets
+from yt_dlp.networking import Request
+from yt_dlp.networking.exceptions import (
+ CertificateVerifyError,
+ HTTPError,
+ ProxyError,
+ RequestError,
+ SSLError,
+ TransportError,
+)
+from yt_dlp.utils.networking import HTTPHeaderDict
+
+from test.conftest import validate_and_send
+
+TEST_DIR = os.path.dirname(os.path.abspath(__file__))
+
+
+def websocket_handler(websocket):
+ for message in websocket:
+ if isinstance(message, bytes):
+ if message == b'bytes':
+ return websocket.send('2')
+ elif isinstance(message, str):
+ if message == 'headers':
+ return websocket.send(json.dumps(dict(websocket.request.headers)))
+ elif message == 'path':
+ return websocket.send(websocket.request.path)
+ elif message == 'source_address':
+ return websocket.send(websocket.remote_address[0])
+ elif message == 'str':
+ return websocket.send('1')
+ return websocket.send(message)
+
+
+def process_request(self, request):
+ if request.path.startswith('/gen_'):
+ status = http.HTTPStatus(int(request.path[5:]))
+ if 300 <= status.value <= 300:
+ return websockets.http11.Response(
+ status.value, status.phrase, websockets.datastructures.Headers([('Location', '/')]), b'')
+ return self.protocol.reject(status.value, status.phrase)
+ return self.protocol.accept(request)
+
+
+def create_websocket_server(**ws_kwargs):
+ import websockets.sync.server
+ wsd = websockets.sync.server.serve(websocket_handler, '127.0.0.1', 0, process_request=process_request, **ws_kwargs)
+ ws_port = wsd.socket.getsockname()[1]
+ ws_server_thread = threading.Thread(target=wsd.serve_forever)
+ ws_server_thread.daemon = True
+ ws_server_thread.start()
+ return ws_server_thread, ws_port
+
+
+def create_ws_websocket_server():
+ return create_websocket_server()
+
+
+def create_wss_websocket_server():
+ certfn = os.path.join(TEST_DIR, 'testcert.pem')
+ sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
+ sslctx.load_cert_chain(certfn, None)
+ return create_websocket_server(ssl_context=sslctx)
+
+
+MTLS_CERT_DIR = os.path.join(TEST_DIR, 'testdata', 'certificate')
+
+
+def create_mtls_wss_websocket_server():
+ certfn = os.path.join(TEST_DIR, 'testcert.pem')
+ cacertfn = os.path.join(MTLS_CERT_DIR, 'ca.crt')
+
+ sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
+ sslctx.verify_mode = ssl.CERT_REQUIRED
+ sslctx.load_verify_locations(cafile=cacertfn)
+ sslctx.load_cert_chain(certfn, None)
+
+ return create_websocket_server(ssl_context=sslctx)
+
+
+@pytest.mark.skipif(not websockets, reason='websockets must be installed to test websocket request handlers')
+class TestWebsSocketRequestHandlerConformance:
+ @classmethod
+ def setup_class(cls):
+ cls.ws_thread, cls.ws_port = create_ws_websocket_server()
+ cls.ws_base_url = f'ws://127.0.0.1:{cls.ws_port}'
+
+ cls.wss_thread, cls.wss_port = create_wss_websocket_server()
+ cls.wss_base_url = f'wss://127.0.0.1:{cls.wss_port}'
+
+ cls.bad_wss_thread, cls.bad_wss_port = create_websocket_server(ssl_context=ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER))
+ cls.bad_wss_host = f'wss://127.0.0.1:{cls.bad_wss_port}'
+
+ cls.mtls_wss_thread, cls.mtls_wss_port = create_mtls_wss_websocket_server()
+ cls.mtls_wss_base_url = f'wss://127.0.0.1:{cls.mtls_wss_port}'
+
+ @pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
+ def test_basic_websockets(self, handler):
+ with handler() as rh:
+ ws = validate_and_send(rh, Request(self.ws_base_url))
+ assert 'upgrade' in ws.headers
+ assert ws.status == 101
+ ws.send('foo')
+ assert ws.recv() == 'foo'
+ ws.close()
+
+ # https://www.rfc-editor.org/rfc/rfc6455.html#section-5.6
+ @pytest.mark.parametrize('msg,opcode', [('str', 1), (b'bytes', 2)])
+ @pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
+ def test_send_types(self, handler, msg, opcode):
+ with handler() as rh:
+ ws = validate_and_send(rh, Request(self.ws_base_url))
+ ws.send(msg)
+ assert int(ws.recv()) == opcode
+ ws.close()
+
+ @pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
+ def test_verify_cert(self, handler):
+ with handler() as rh:
+ with pytest.raises(CertificateVerifyError):
+ validate_and_send(rh, Request(self.wss_base_url))
+
+ with handler(verify=False) as rh:
+ ws = validate_and_send(rh, Request(self.wss_base_url))
+ assert ws.status == 101
+ ws.close()
+
+ @pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
+ def test_ssl_error(self, handler):
+ with handler(verify=False) as rh:
+ with pytest.raises(SSLError, match=r'ssl(?:v3|/tls) alert handshake failure') as exc_info:
+ validate_and_send(rh, Request(self.bad_wss_host))
+ assert not issubclass(exc_info.type, CertificateVerifyError)
+
+ @pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
+ @pytest.mark.parametrize('path,expected', [
+ # Unicode characters should be encoded with uppercase percent-encoding
+ ('/中文', '/%E4%B8%AD%E6%96%87'),
+ # don't normalize existing percent encodings
+ ('/%c7%9f', '/%c7%9f'),
+ ])
+ def test_percent_encode(self, handler, path, expected):
+ with handler() as rh:
+ ws = validate_and_send(rh, Request(f'{self.ws_base_url}{path}'))
+ ws.send('path')
+ assert ws.recv() == expected
+ assert ws.status == 101
+ ws.close()
+
+ @pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
+ def test_remove_dot_segments(self, handler):
+ with handler() as rh:
+ # This isn't a comprehensive test,
+ # but it should be enough to check whether the handler is removing dot segments
+ ws = validate_and_send(rh, Request(f'{self.ws_base_url}/a/b/./../../test'))
+ assert ws.status == 101
+ ws.send('path')
+ assert ws.recv() == '/test'
+ ws.close()
+
+ # We are restricted to known HTTP status codes in http.HTTPStatus
+ # Redirects are not supported for websockets
+ @pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
+ @pytest.mark.parametrize('status', (200, 204, 301, 302, 303, 400, 500, 511))
+ def test_raise_http_error(self, handler, status):
+ with handler() as rh:
+ with pytest.raises(HTTPError) as exc_info:
+ validate_and_send(rh, Request(f'{self.ws_base_url}/gen_{status}'))
+ assert exc_info.value.status == status
+
+ @pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
+ @pytest.mark.parametrize('params,extensions', [
+ ({'timeout': sys.float_info.min}, {}),
+ ({}, {'timeout': sys.float_info.min}),
+ ])
+ def test_timeout(self, handler, params, extensions):
+ with handler(**params) as rh:
+ with pytest.raises(TransportError):
+ validate_and_send(rh, Request(self.ws_base_url, extensions=extensions))
+
+ @pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
+ def test_cookies(self, handler):
+ cookiejar = YoutubeDLCookieJar()
+ cookiejar.set_cookie(http.cookiejar.Cookie(
+ version=0, name='test', value='ytdlp', port=None, port_specified=False,
+ domain='127.0.0.1', domain_specified=True, domain_initial_dot=False, path='/',
+ path_specified=True, secure=False, expires=None, discard=False, comment=None,
+ comment_url=None, rest={}))
+
+ with handler(cookiejar=cookiejar) as rh:
+ ws = validate_and_send(rh, Request(self.ws_base_url))
+ ws.send('headers')
+ assert json.loads(ws.recv())['cookie'] == 'test=ytdlp'
+ ws.close()
+
+ with handler() as rh:
+ ws = validate_and_send(rh, Request(self.ws_base_url))
+ ws.send('headers')
+ assert 'cookie' not in json.loads(ws.recv())
+ ws.close()
+
+ ws = validate_and_send(rh, Request(self.ws_base_url, extensions={'cookiejar': cookiejar}))
+ ws.send('headers')
+ assert json.loads(ws.recv())['cookie'] == 'test=ytdlp'
+ ws.close()
+
+ @pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
+ def test_source_address(self, handler):
+ source_address = f'127.0.0.{random.randint(5, 255)}'
+ verify_address_availability(source_address)
+ with handler(source_address=source_address) as rh:
+ ws = validate_and_send(rh, Request(self.ws_base_url))
+ ws.send('source_address')
+ assert source_address == ws.recv()
+ ws.close()
+
+ @pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
+ def test_response_url(self, handler):
+ with handler() as rh:
+ url = f'{self.ws_base_url}/something'
+ ws = validate_and_send(rh, Request(url))
+ assert ws.url == url
+ ws.close()
+
+ @pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
+ def test_request_headers(self, handler):
+ with handler(headers=HTTPHeaderDict({'test1': 'test', 'test2': 'test2'})) as rh:
+ # Global Headers
+ ws = validate_and_send(rh, Request(self.ws_base_url))
+ ws.send('headers')
+ headers = HTTPHeaderDict(json.loads(ws.recv()))
+ assert headers['test1'] == 'test'
+ ws.close()
+
+ # Per request headers, merged with global
+ ws = validate_and_send(rh, Request(
+ self.ws_base_url, headers={'test2': 'changed', 'test3': 'test3'}))
+ ws.send('headers')
+ headers = HTTPHeaderDict(json.loads(ws.recv()))
+ assert headers['test1'] == 'test'
+ assert headers['test2'] == 'changed'
+ assert headers['test3'] == 'test3'
+ ws.close()
+
+ @pytest.mark.parametrize('client_cert', (
+ {'client_certificate': os.path.join(MTLS_CERT_DIR, 'clientwithkey.crt')},
+ {
+ 'client_certificate': os.path.join(MTLS_CERT_DIR, 'client.crt'),
+ 'client_certificate_key': os.path.join(MTLS_CERT_DIR, 'client.key'),
+ },
+ {
+ 'client_certificate': os.path.join(MTLS_CERT_DIR, 'clientwithencryptedkey.crt'),
+ 'client_certificate_password': 'foobar',
+ },
+ {
+ 'client_certificate': os.path.join(MTLS_CERT_DIR, 'client.crt'),
+ 'client_certificate_key': os.path.join(MTLS_CERT_DIR, 'clientencrypted.key'),
+ 'client_certificate_password': 'foobar',
+ }
+ ))
+ @pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
+ def test_mtls(self, handler, client_cert):
+ with handler(
+ # Disable client-side validation of unacceptable self-signed testcert.pem
+ # The test is of a check on the server side, so unaffected
+ verify=False,
+ client_cert=client_cert
+ ) as rh:
+ validate_and_send(rh, Request(self.mtls_wss_base_url)).close()
+
+
+def create_fake_ws_connection(raised):
+ import websockets.sync.client
+
+ class FakeWsConnection(websockets.sync.client.ClientConnection):
+ def __init__(self, *args, **kwargs):
+ class FakeResponse:
+ body = b''
+ headers = {}
+ status_code = 101
+ reason_phrase = 'test'
+
+ self.response = FakeResponse()
+
+ def send(self, *args, **kwargs):
+ raise raised()
+
+ def recv(self, *args, **kwargs):
+ raise raised()
+
+ def close(self, *args, **kwargs):
+ return
+
+ return FakeWsConnection()
+
+
+@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
+class TestWebsocketsRequestHandler:
+ @pytest.mark.parametrize('raised,expected', [
+ # https://websockets.readthedocs.io/en/stable/reference/exceptions.html
+ (lambda: websockets.exceptions.InvalidURI(msg='test', uri='test://'), RequestError),
+ # Requires a response object. Should be covered by HTTP error tests.
+ # (lambda: websockets.exceptions.InvalidStatus(), TransportError),
+ (lambda: websockets.exceptions.InvalidHandshake(), TransportError),
+ # These are subclasses of InvalidHandshake
+ (lambda: websockets.exceptions.InvalidHeader(name='test'), TransportError),
+ (lambda: websockets.exceptions.NegotiationError(), TransportError),
+ # Catch-all
+ (lambda: websockets.exceptions.WebSocketException(), TransportError),
+ (lambda: TimeoutError(), TransportError),
+ # These may be raised by our create_connection implementation, which should also be caught
+ (lambda: OSError(), TransportError),
+ (lambda: ssl.SSLError(), SSLError),
+ (lambda: ssl.SSLCertVerificationError(), CertificateVerifyError),
+ (lambda: socks.ProxyError(), ProxyError),
+ ])
+ def test_request_error_mapping(self, handler, monkeypatch, raised, expected):
+ import websockets.sync.client
+
+ import yt_dlp.networking._websockets
+ with handler() as rh:
+ def fake_connect(*args, **kwargs):
+ raise raised()
+ monkeypatch.setattr(yt_dlp.networking._websockets, 'create_connection', lambda *args, **kwargs: None)
+ monkeypatch.setattr(websockets.sync.client, 'connect', fake_connect)
+ with pytest.raises(expected) as exc_info:
+ rh.send(Request('ws://fake-url'))
+ assert exc_info.type is expected
+
+ @pytest.mark.parametrize('raised,expected,match', [
+ # https://websockets.readthedocs.io/en/stable/reference/sync/client.html#websockets.sync.client.ClientConnection.send
+ (lambda: websockets.exceptions.ConnectionClosed(None, None), TransportError, None),
+ (lambda: RuntimeError(), TransportError, None),
+ (lambda: TimeoutError(), TransportError, None),
+ (lambda: TypeError(), RequestError, None),
+ (lambda: socks.ProxyError(), ProxyError, None),
+ # Catch-all
+ (lambda: websockets.exceptions.WebSocketException(), TransportError, None),
+ ])
+ def test_ws_send_error_mapping(self, handler, monkeypatch, raised, expected, match):
+ from yt_dlp.networking._websockets import WebsocketsResponseAdapter
+ ws = WebsocketsResponseAdapter(create_fake_ws_connection(raised), url='ws://fake-url')
+ with pytest.raises(expected, match=match) as exc_info:
+ ws.send('test')
+ assert exc_info.type is expected
+
+ @pytest.mark.parametrize('raised,expected,match', [
+ # https://websockets.readthedocs.io/en/stable/reference/sync/client.html#websockets.sync.client.ClientConnection.recv
+ (lambda: websockets.exceptions.ConnectionClosed(None, None), TransportError, None),
+ (lambda: RuntimeError(), TransportError, None),
+ (lambda: TimeoutError(), TransportError, None),
+ (lambda: socks.ProxyError(), ProxyError, None),
+ # Catch-all
+ (lambda: websockets.exceptions.WebSocketException(), TransportError, None),
+ ])
+ def test_ws_recv_error_mapping(self, handler, monkeypatch, raised, expected, match):
+ from yt_dlp.networking._websockets import WebsocketsResponseAdapter
+ ws = WebsocketsResponseAdapter(create_fake_ws_connection(raised), url='ws://fake-url')
+ with pytest.raises(expected, match=match) as exc_info:
+ ws.recv()
+ assert exc_info.type is expected
diff --git a/test/test_write_annotations.py.disabled b/test/test_write_annotations.py.disabled
new file mode 100644
index 0000000..c7cf199
--- /dev/null
+++ b/test/test_write_annotations.py.disabled
@@ -0,0 +1,77 @@
+#!/usr/bin/env python3
+
+# Allow direct execution
+import os
+import sys
+import unittest
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+import xml.etree.ElementTree
+
+import yt_dlp.extractor
+import yt_dlp.YoutubeDL
+from test.helper import get_params, is_download_test, try_rm
+
+
+class YoutubeDL(yt_dlp.YoutubeDL):
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.to_stderr = self.to_screen
+
+
+params = get_params({
+ 'writeannotations': True,
+ 'skip_download': True,
+ 'writeinfojson': False,
+ 'format': 'flv',
+})
+
+
+TEST_ID = 'gr51aVj-mLg'
+ANNOTATIONS_FILE = TEST_ID + '.annotations.xml'
+EXPECTED_ANNOTATIONS = ['Speech bubble', 'Note', 'Title', 'Spotlight', 'Label']
+
+
+@is_download_test
+class TestAnnotations(unittest.TestCase):
+ def setUp(self):
+ # Clear old files
+ self.tearDown()
+
+ def test_info_json(self):
+ expected = list(EXPECTED_ANNOTATIONS) # Two annotations could have the same text.
+ ie = yt_dlp.extractor.YoutubeIE()
+ ydl = YoutubeDL(params)
+ ydl.add_info_extractor(ie)
+ ydl.download([TEST_ID])
+ self.assertTrue(os.path.exists(ANNOTATIONS_FILE))
+ annoxml = None
+ with open(ANNOTATIONS_FILE, encoding='utf-8') as annof:
+ annoxml = xml.etree.ElementTree.parse(annof)
+ self.assertTrue(annoxml is not None, 'Failed to parse annotations XML')
+ root = annoxml.getroot()
+ self.assertEqual(root.tag, 'document')
+ annotationsTag = root.find('annotations')
+ self.assertEqual(annotationsTag.tag, 'annotations')
+ annotations = annotationsTag.findall('annotation')
+
+ # Not all the annotations have TEXT children and the annotations are returned unsorted.
+ for a in annotations:
+ self.assertEqual(a.tag, 'annotation')
+ if a.get('type') == 'text':
+ textTag = a.find('TEXT')
+ text = textTag.text
+ self.assertTrue(text in expected) # assertIn only added in python 2.7
+ # remove the first occurrence, there could be more than one annotation with the same text
+ expected.remove(text)
+ # We should have seen (and removed) all the expected annotation texts.
+ self.assertEqual(len(expected), 0, 'Not all expected annotations were found.')
+
+ def tearDown(self):
+ try_rm(ANNOTATIONS_FILE)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py
new file mode 100644
index 0000000..b3f323e
--- /dev/null
+++ b/test/test_youtube_lists.py
@@ -0,0 +1,71 @@
+#!/usr/bin/env python3
+
+# Allow direct execution
+import os
+import sys
+import unittest
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+from test.helper import FakeYDL, is_download_test
+from yt_dlp.extractor import YoutubeIE, YoutubeTabIE
+from yt_dlp.utils import ExtractorError
+
+
+@is_download_test
+class TestYoutubeLists(unittest.TestCase):
+ def assertIsPlaylist(self, info):
+ """Make sure the info has '_type' set to 'playlist'"""
+ self.assertEqual(info['_type'], 'playlist')
+
+ def test_youtube_playlist_noplaylist(self):
+ dl = FakeYDL()
+ dl.params['noplaylist'] = True
+ ie = YoutubeTabIE(dl)
+ result = ie.extract('https://www.youtube.com/watch?v=OmJ-4B-mS-Y&list=PLydZ2Hrp_gPRJViZjLFKaBMgCQOYEEkyp&index=2')
+ self.assertEqual(result['_type'], 'url')
+ self.assertEqual(result['ie_key'], YoutubeIE.ie_key())
+ self.assertEqual(YoutubeIE.extract_id(result['url']), 'OmJ-4B-mS-Y')
+
+ def test_youtube_mix(self):
+ dl = FakeYDL()
+ ie = YoutubeTabIE(dl)
+ result = ie.extract('https://www.youtube.com/watch?v=tyITL_exICo&list=RDCLAK5uy_kLWIr9gv1XLlPbaDS965-Db4TrBoUTxQ8')
+ entries = list(result['entries'])
+ self.assertTrue(len(entries) >= 50)
+ original_video = entries[0]
+ self.assertEqual(original_video['id'], 'tyITL_exICo')
+
+ def test_youtube_flat_playlist_extraction(self):
+ dl = FakeYDL()
+ dl.params['extract_flat'] = True
+ ie = YoutubeTabIE(dl)
+ result = ie.extract('https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc')
+ self.assertIsPlaylist(result)
+ entries = list(result['entries'])
+ self.assertTrue(len(entries) == 1)
+ video = entries[0]
+ self.assertEqual(video['_type'], 'url')
+ self.assertEqual(video['ie_key'], 'Youtube')
+ self.assertEqual(video['id'], 'BaW_jenozKc')
+ self.assertEqual(video['url'], 'https://www.youtube.com/watch?v=BaW_jenozKc')
+ self.assertEqual(video['title'], 'youtube-dl test video "\'/\\ä↭𝕐')
+ self.assertEqual(video['duration'], 10)
+ self.assertEqual(video['uploader'], 'Philipp Hagemeister')
+
+ def test_youtube_channel_no_uploads(self):
+ dl = FakeYDL()
+ dl.params['extract_flat'] = True
+ ie = YoutubeTabIE(dl)
+ # no uploads
+ with self.assertRaisesRegex(ExtractorError, r'no uploads'):
+ ie.extract('https://www.youtube.com/channel/UC2yXPzFejc422buOIzn_0CA')
+
+ # no uploads and no UCID given
+ with self.assertRaisesRegex(ExtractorError, r'no uploads'):
+ ie.extract('https://www.youtube.com/news')
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_youtube_misc.py b/test/test_youtube_misc.py
new file mode 100644
index 0000000..81be5d3
--- /dev/null
+++ b/test/test_youtube_misc.py
@@ -0,0 +1,26 @@
+#!/usr/bin/env python3
+
+# Allow direct execution
+import os
+import sys
+import unittest
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+from yt_dlp.extractor import YoutubeIE
+
+
+class TestYoutubeMisc(unittest.TestCase):
+ def test_youtube_extract(self):
+ assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
+ assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
+ assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
+ assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc')
+ assertExtractId('https://www.youtube.com/watch_popup?v=BaW_jenozKc', 'BaW_jenozKc')
+ assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc')
+ assertExtractId('BaW_jenozKc', 'BaW_jenozKc')
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
new file mode 100644
index 0000000..c559284
--- /dev/null
+++ b/test/test_youtube_signature.py
@@ -0,0 +1,253 @@
+#!/usr/bin/env python3
+
+# Allow direct execution
+import os
+import sys
+import unittest
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+import contextlib
+import re
+import string
+import urllib.request
+
+from test.helper import FakeYDL, is_download_test
+from yt_dlp.extractor import YoutubeIE
+from yt_dlp.jsinterp import JSInterpreter
+
+_SIG_TESTS = [
+ (
+ 'https://s.ytimg.com/yts/jsbin/html5player-vflHOr_nV.js',
+ 86,
+ '>=<;:/.-[+*)(\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBA\\yxwvutsrqponmlkjihgfedcba987654321',
+ ),
+ (
+ 'https://s.ytimg.com/yts/jsbin/html5player-vfldJ8xgI.js',
+ 85,
+ '3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@',
+ ),
+ (
+ 'https://s.ytimg.com/yts/jsbin/html5player-vfle-mVwz.js',
+ 90,
+ ']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876',
+ ),
+ (
+ 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl0Cbn9e.js',
+ 84,
+ 'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVW@YZ!"#$%&\'()*+,-./:;<=',
+ ),
+ (
+ 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js',
+ '2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA',
+ 'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2',
+ ),
+ (
+ 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflBb0OQx.js',
+ 84,
+ '123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ0STUVWXYZ!"#$%&\'()*+,@./:;<=>'
+ ),
+ (
+ 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl9FYC6l.js',
+ 83,
+ '123456789abcdefghijklmnopqr0tuvwxyzABCDETGHIJKLMNOPQRS>UVWXYZ!"#$%&\'()*+,-./:;<=F'
+ ),
+ (
+ 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflCGk6yw/html5player.js',
+ '4646B5181C6C3020DF1D9C7FCFEA.AD80ABF70C39BD369CCCAE780AFBB98FA6B6CB42766249D9488C288',
+ '82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B'
+ ),
+ (
+ 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js',
+ '312AA52209E3623129A412D56A40F11CB0AF14AE.3EE09501CB14E3BCDC3B2AE808BF3F1D14E7FBF12',
+ '112AA5220913623229A412D56A40F11CB0AF14AE.3EE0950FCB14EEBCDC3B2AE808BF331D14E7FBF3',
+ ),
+ (
+ 'https://www.youtube.com/s/player/6ed0d907/player_ias.vflset/en_US/base.js',
+ '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
+ 'AOq0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL2QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0',
+ ),
+]
+
+_NSIG_TESTS = [
+ (
+ 'https://www.youtube.com/s/player/7862ca1f/player_ias.vflset/en_US/base.js',
+ 'X_LCxVDjAavgE5t', 'yxJ1dM6iz5ogUg',
+ ),
+ (
+ 'https://www.youtube.com/s/player/9216d1f7/player_ias.vflset/en_US/base.js',
+ 'SLp9F5bwjAdhE9F-', 'gWnb9IK2DJ8Q1w',
+ ),
+ (
+ 'https://www.youtube.com/s/player/f8cb7a3b/player_ias.vflset/en_US/base.js',
+ 'oBo2h5euWy6osrUt', 'ivXHpm7qJjJN',
+ ),
+ (
+ 'https://www.youtube.com/s/player/2dfe380c/player_ias.vflset/en_US/base.js',
+ 'oBo2h5euWy6osrUt', '3DIBbn3qdQ',
+ ),
+ (
+ 'https://www.youtube.com/s/player/f1ca6900/player_ias.vflset/en_US/base.js',
+ 'cu3wyu6LQn2hse', 'jvxetvmlI9AN9Q',
+ ),
+ (
+ 'https://www.youtube.com/s/player/8040e515/player_ias.vflset/en_US/base.js',
+ 'wvOFaY-yjgDuIEg5', 'HkfBFDHmgw4rsw',
+ ),
+ (
+ 'https://www.youtube.com/s/player/e06dea74/player_ias.vflset/en_US/base.js',
+ 'AiuodmaDDYw8d3y4bf', 'ankd8eza2T6Qmw',
+ ),
+ (
+ 'https://www.youtube.com/s/player/5dd88d1d/player-plasma-ias-phone-en_US.vflset/base.js',
+ 'kSxKFLeqzv_ZyHSAt', 'n8gS8oRlHOxPFA',
+ ),
+ (
+ 'https://www.youtube.com/s/player/324f67b9/player_ias.vflset/en_US/base.js',
+ 'xdftNy7dh9QGnhW', '22qLGxrmX8F1rA',
+ ),
+ (
+ 'https://www.youtube.com/s/player/4c3f79c5/player_ias.vflset/en_US/base.js',
+ 'TDCstCG66tEAO5pR9o', 'dbxNtZ14c-yWyw',
+ ),
+ (
+ 'https://www.youtube.com/s/player/c81bbb4a/player_ias.vflset/en_US/base.js',
+ 'gre3EcLurNY2vqp94', 'Z9DfGxWP115WTg',
+ ),
+ (
+ 'https://www.youtube.com/s/player/1f7d5369/player_ias.vflset/en_US/base.js',
+ 'batNX7sYqIJdkJ', 'IhOkL_zxbkOZBw',
+ ),
+ (
+ 'https://www.youtube.com/s/player/009f1d77/player_ias.vflset/en_US/base.js',
+ '5dwFHw8aFWQUQtffRq', 'audescmLUzI3jw',
+ ),
+ (
+ 'https://www.youtube.com/s/player/dc0c6770/player_ias.vflset/en_US/base.js',
+ '5EHDMgYLV6HPGk_Mu-kk', 'n9lUJLHbxUI0GQ',
+ ),
+ (
+ 'https://www.youtube.com/s/player/113ca41c/player_ias.vflset/en_US/base.js',
+ 'cgYl-tlYkhjT7A', 'hI7BBr2zUgcmMg',
+ ),
+ (
+ 'https://www.youtube.com/s/player/c57c113c/player_ias.vflset/en_US/base.js',
+ 'M92UUMHa8PdvPd3wyM', '3hPqLJsiNZx7yA',
+ ),
+ (
+ 'https://www.youtube.com/s/player/5a3b6271/player_ias.vflset/en_US/base.js',
+ 'B2j7f_UPT4rfje85Lu_e', 'm5DmNymaGQ5RdQ',
+ ),
+ (
+ 'https://www.youtube.com/s/player/7a062b77/player_ias.vflset/en_US/base.js',
+ 'NRcE3y3mVtm_cV-W', 'VbsCYUATvqlt5w',
+ ),
+ (
+ 'https://www.youtube.com/s/player/dac945fd/player_ias.vflset/en_US/base.js',
+ 'o8BkRxXhuYsBCWi6RplPdP', '3Lx32v_hmzTm6A',
+ ),
+ (
+ 'https://www.youtube.com/s/player/6f20102c/player_ias.vflset/en_US/base.js',
+ 'lE8DhoDmKqnmJJ', 'pJTTX6XyJP2BYw',
+ ),
+ (
+ 'https://www.youtube.com/s/player/cfa9e7cb/player_ias.vflset/en_US/base.js',
+ 'aCi3iElgd2kq0bxVbQ', 'QX1y8jGb2IbZ0w',
+ ),
+ (
+ 'https://www.youtube.com/s/player/8c7583ff/player_ias.vflset/en_US/base.js',
+ '1wWCVpRR96eAmMI87L', 'KSkWAVv1ZQxC3A',
+ ),
+ (
+ 'https://www.youtube.com/s/player/b7910ca8/player_ias.vflset/en_US/base.js',
+ '_hXMCwMt9qE310D', 'LoZMgkkofRMCZQ',
+ ),
+]
+
+
+@is_download_test
+class TestPlayerInfo(unittest.TestCase):
+ def test_youtube_extract_player_info(self):
+ PLAYER_URLS = (
+ ('https://www.youtube.com/s/player/4c3f79c5/player_ias.vflset/en_US/base.js', '4c3f79c5'),
+ ('https://www.youtube.com/s/player/64dddad9/player_ias.vflset/en_US/base.js', '64dddad9'),
+ ('https://www.youtube.com/s/player/64dddad9/player_ias.vflset/fr_FR/base.js', '64dddad9'),
+ ('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-phone-en_US.vflset/base.js', '64dddad9'),
+ ('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-phone-de_DE.vflset/base.js', '64dddad9'),
+ ('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-tablet-en_US.vflset/base.js', '64dddad9'),
+ # obsolete
+ ('https://www.youtube.com/yts/jsbin/player_ias-vfle4-e03/en_US/base.js', 'vfle4-e03'),
+ ('https://www.youtube.com/yts/jsbin/player_ias-vfl49f_g4/en_US/base.js', 'vfl49f_g4'),
+ ('https://www.youtube.com/yts/jsbin/player_ias-vflCPQUIL/en_US/base.js', 'vflCPQUIL'),
+ ('https://www.youtube.com/yts/jsbin/player-vflzQZbt7/en_US/base.js', 'vflzQZbt7'),
+ ('https://www.youtube.com/yts/jsbin/player-en_US-vflaxXRn1/base.js', 'vflaxXRn1'),
+ ('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js', 'vflXGBaUN'),
+ ('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js', 'vflKjOTVq'),
+ )
+ for player_url, expected_player_id in PLAYER_URLS:
+ player_id = YoutubeIE._extract_player_info(player_url)
+ self.assertEqual(player_id, expected_player_id)
+
+
+@is_download_test
+class TestSignature(unittest.TestCase):
+ def setUp(self):
+ TEST_DIR = os.path.dirname(os.path.abspath(__file__))
+ self.TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata/sigs')
+ if not os.path.exists(self.TESTDATA_DIR):
+ os.mkdir(self.TESTDATA_DIR)
+
+ def tearDown(self):
+ with contextlib.suppress(OSError):
+ for f in os.listdir(self.TESTDATA_DIR):
+ os.remove(f)
+
+
+def t_factory(name, sig_func, url_pattern):
+ def make_tfunc(url, sig_input, expected_sig):
+ m = url_pattern.match(url)
+ assert m, '%r should follow URL format' % url
+ test_id = m.group('id')
+
+ def test_func(self):
+ basename = f'player-{name}-{test_id}.js'
+ fn = os.path.join(self.TESTDATA_DIR, basename)
+
+ if not os.path.exists(fn):
+ urllib.request.urlretrieve(url, fn)
+ with open(fn, encoding='utf-8') as testf:
+ jscode = testf.read()
+ self.assertEqual(sig_func(jscode, sig_input), expected_sig)
+
+ test_func.__name__ = f'test_{name}_js_{test_id}'
+ setattr(TestSignature, test_func.__name__, test_func)
+ return make_tfunc
+
+
+def signature(jscode, sig_input):
+ func = YoutubeIE(FakeYDL())._parse_sig_js(jscode)
+ src_sig = (
+ str(string.printable[:sig_input])
+ if isinstance(sig_input, int) else sig_input)
+ return func(src_sig)
+
+
+def n_sig(jscode, sig_input):
+ funcname = YoutubeIE(FakeYDL())._extract_n_function_name(jscode)
+ return JSInterpreter(jscode).call_function(funcname, sig_input)
+
+
+make_sig_test = t_factory(
+ 'signature', signature, re.compile(r'.*(?:-|/player/)(?P<id>[a-zA-Z0-9_-]+)(?:/.+\.js|(?:/watch_as3|/html5player)?\.[a-z]+)$'))
+for test_spec in _SIG_TESTS:
+ make_sig_test(*test_spec)
+
+make_nsig_test = t_factory(
+ 'nsig', n_sig, re.compile(r'.+/player/(?P<id>[a-zA-Z0-9_-]+)/.+.js$'))
+for test_spec in _NSIG_TESTS:
+ make_nsig_test(*test_spec)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/testcert.pem b/test/testcert.pem
new file mode 100644
index 0000000..b3e0f00
--- /dev/null
+++ b/test/testcert.pem
@@ -0,0 +1,52 @@
+-----BEGIN PRIVATE KEY-----
+MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQDMF0bAzaHAdIyB
+HRmnIp4vv40lGqEePmWqicCl0QZ0wsb5dNysSxSa7330M2QeQopGfdaUYF1uTcNp
+Qx6ECgBSfg+RrOBI7r/u4F+sKX8MUXVaf/5QoBUrGNGSn/pp7HMGOuQqO6BVg4+h
+A1ySSwUG8mZItLRry1ISyErmW8b9xlqfd97uLME/5tX+sMelRFjUbAx8A4CK58Ev
+mMguHVTlXzx5RMdYcf1VScYcjlV/qA45uzP8zwI5aigfcmUD+tbGuQRhKxUhmw0J
+aobtOR6+JSOAULW5gYa/egE4dWLwbyM6b6eFbdnjlQzEA1EW7ChMPAW/Mo83KyiP
+tKMCSQulAgMBAAECggEALCfBDAexPjU5DNoh6bIorUXxIJzxTNzNHCdvgbCGiA54
+BBKPh8s6qwazpnjT6WQWDIg/O5zZufqjE4wM9x4+0Zoqfib742ucJO9wY4way6x4
+Clt0xzbLPabB+MoZ4H7ip+9n2+dImhe7pGdYyOHoNYeOL57BBi1YFW42Hj6u/8pd
+63YCXisto3Rz1YvRQVjwsrS+cRKZlzAFQRviL30jav7Wh1aWEfcXxjj4zhm8pJdk
+ITGtq6howz57M0NtX6hZnfe8ywzTnDFIGKIMA2cYHuYJcBh9bc4tCGubTvTKK9UE
+8fM+f6UbfGqfpKCq1mcgs0XMoFDSzKS9+mSJn0+5JQKBgQD+OCKaeH3Yzw5zGnlw
+XuQfMJGNcgNr+ImjmvzUAC2fAZUJLAcQueE5kzMv5Fmd+EFE2CEX1Vit3tg0SXvA
+G+bq609doILHMA03JHnV1npO/YNIhG3AAtJlKYGxQNfWH9mflYj9mEui8ZFxG52o
+zWhHYuifOjjZszUR+/eio6NPzwKBgQDNhUBTrT8LIX4SE/EFUiTlYmWIvOMgXYvN
+8Cm3IRNQ/yyphZaXEU0eJzfX5uCDfSVOgd6YM/2pRah+t+1Hvey4H8e0GVTu5wMP
+gkkqwKPGIR1YOmlw6ippqwvoJD7LuYrm6Q4D6e1PvkjwCq6lEndrOPmPrrXNd0JJ
+XO60y3U2SwKBgQDLkyZarryQXxcCI6Q10Tc6pskYDMIit095PUbTeiUOXNT9GE28
+Hi32ziLCakk9kCysNasii81MxtQ54tJ/f5iGbNMMddnkKl2a19Hc5LjjAm4cJzg/
+98KGEhvyVqvAo5bBDZ06/rcrD+lZOzUglQS5jcIcqCIYa0LHWQ/wJLxFzwKBgFcZ
+1SRhdSmDfUmuF+S4ZpistflYjC3IV5rk4NkS9HvMWaJS0nqdw4A3AMzItXgkjq4S
+DkOVLTkTI5Do5HAWRv/VwC5M2hkR4NMu1VGAKSisGiKtRsirBWSZMEenLNHshbjN
+Jrpz5rZ4H7NT46ZkCCZyFBpX4gb9NyOedjA7Via3AoGARF8RxbYjnEGGFuhnbrJB
+FTPR0vaL4faY3lOgRZ8jOG9V2c9Hzi/y8a8TU4C11jnJSDqYCXBTd5XN28npYxtD
+pjRsCwy6ze+yvYXPO7C978eMG3YRyj366NXUxnXN59ibwe/lxi2OD9z8J1LEdF6z
+VJua1Wn8HKxnXMI61DhTCSo=
+-----END PRIVATE KEY-----
+-----BEGIN CERTIFICATE-----
+MIIEEzCCAvugAwIBAgIJAK1haYi6gmSKMA0GCSqGSIb3DQEBCwUAMIGeMQswCQYD
+VQQGEwJERTEMMAoGA1UECAwDTlJXMRQwEgYDVQQHDAtEdWVzc2VsZG9yZjEbMBkG
+A1UECgwSeW91dHViZS1kbCBwcm9qZWN0MRkwFwYDVQQLDBB5b3V0dWJlLWRsIHRl
+c3RzMRIwEAYDVQQDDAlsb2NhbGhvc3QxHzAdBgkqhkiG9w0BCQEWEHBoaWhhZ0Bw
+aGloYWcuZGUwIBcNMTUwMTMwMDExNTA4WhgPMjExNTAxMDYwMTE1MDhaMIGeMQsw
+CQYDVQQGEwJERTEMMAoGA1UECAwDTlJXMRQwEgYDVQQHDAtEdWVzc2VsZG9yZjEb
+MBkGA1UECgwSeW91dHViZS1kbCBwcm9qZWN0MRkwFwYDVQQLDBB5b3V0dWJlLWRs
+IHRlc3RzMRIwEAYDVQQDDAlsb2NhbGhvc3QxHzAdBgkqhkiG9w0BCQEWEHBoaWhh
+Z0BwaGloYWcuZGUwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDMF0bA
+zaHAdIyBHRmnIp4vv40lGqEePmWqicCl0QZ0wsb5dNysSxSa7330M2QeQopGfdaU
+YF1uTcNpQx6ECgBSfg+RrOBI7r/u4F+sKX8MUXVaf/5QoBUrGNGSn/pp7HMGOuQq
+O6BVg4+hA1ySSwUG8mZItLRry1ISyErmW8b9xlqfd97uLME/5tX+sMelRFjUbAx8
+A4CK58EvmMguHVTlXzx5RMdYcf1VScYcjlV/qA45uzP8zwI5aigfcmUD+tbGuQRh
+KxUhmw0JaobtOR6+JSOAULW5gYa/egE4dWLwbyM6b6eFbdnjlQzEA1EW7ChMPAW/
+Mo83KyiPtKMCSQulAgMBAAGjUDBOMB0GA1UdDgQWBBTBUZoqhQkzHQ6xNgZfFxOd
+ZEVt8TAfBgNVHSMEGDAWgBTBUZoqhQkzHQ6xNgZfFxOdZEVt8TAMBgNVHRMEBTAD
+AQH/MA0GCSqGSIb3DQEBCwUAA4IBAQCUOCl3T/J9B08Z+ijfOJAtkbUaEHuVZb4x
+5EpZSy2ZbkLvtsftMFieHVNXn9dDswQc5qjYStCC4o60LKw4M6Y63FRsAZ/DNaqb
+PY3jyCyuugZ8/sNf50vHYkAcF7SQYqOQFQX4TQsNUk2xMJIt7H0ErQFmkf/u3dg6
+cy89zkT462IwxzSG7NNhIlRkL9o5qg+Y1mF9eZA1B0rcL6hO24PPTHOd90HDChBu
+SZ6XMi/LzYQSTf0Vg2R+uMIVlzSlkdcZ6sqVnnqeLL8dFyIa4e9sj/D4ZCYP8Mqe
+Z73H5/NNhmwCHRqVUTgm307xblQaWGhwAiDkaRvRW2aJQ0qGEdZK
+-----END CERTIFICATE-----
diff --git a/test/testdata/certificate/ca.crt b/test/testdata/certificate/ca.crt
new file mode 100644
index 0000000..ddf7be7
--- /dev/null
+++ b/test/testdata/certificate/ca.crt
@@ -0,0 +1,10 @@
+-----BEGIN CERTIFICATE-----
+MIIBfDCCASOgAwIBAgIUUgngoxFpuWft8gjj3uEFoqJyoJowCgYIKoZIzj0EAwIw
+FDESMBAGA1UEAwwJeXRkbHB0ZXN0MB4XDTIyMDQxNTAzMDEwMVoXDTM4MTAxNTAz
+MDEwMVowFDESMBAGA1UEAwwJeXRkbHB0ZXN0MFkwEwYHKoZIzj0CAQYIKoZIzj0D
+AQcDQgAEcTaKMtIn2/1kgid1zXFpLm87FMT5PP3/bltKVVH3DLO//0kUslCHYxFU
+KpcCfVt9aueRyUFi1TNkkkEZ9D6fbqNTMFEwHQYDVR0OBBYEFBdY2rVNLFGM6r1F
+iuamNDaiq0QoMB8GA1UdIwQYMBaAFBdY2rVNLFGM6r1FiuamNDaiq0QoMA8GA1Ud
+EwEB/wQFMAMBAf8wCgYIKoZIzj0EAwIDRwAwRAIgXJg2jio1kow2g/iP54Qq+iI2
+m4EAvZiY0Im/Ni3PHawCIC6KCl6QcHANbeq8ckOXNGusjl6OWhvEM3uPBPhqskq1
+-----END CERTIFICATE-----
diff --git a/test/testdata/certificate/ca.key b/test/testdata/certificate/ca.key
new file mode 100644
index 0000000..38920d5
--- /dev/null
+++ b/test/testdata/certificate/ca.key
@@ -0,0 +1,5 @@
+-----BEGIN EC PRIVATE KEY-----
+MHcCAQEEIG2L1bHdl3PnaLiJ7Zm8aAGCj4GiVbSbXQcrJAdL+yqOoAoGCCqGSM49
+AwEHoUQDQgAEcTaKMtIn2/1kgid1zXFpLm87FMT5PP3/bltKVVH3DLO//0kUslCH
+YxFUKpcCfVt9aueRyUFi1TNkkkEZ9D6fbg==
+-----END EC PRIVATE KEY-----
diff --git a/test/testdata/certificate/ca.srl b/test/testdata/certificate/ca.srl
new file mode 100644
index 0000000..de2d1ea
--- /dev/null
+++ b/test/testdata/certificate/ca.srl
@@ -0,0 +1 @@
+4A260C33C4D34612646E6321E1E767DF1A95EF0B
diff --git a/test/testdata/certificate/client.crt b/test/testdata/certificate/client.crt
new file mode 100644
index 0000000..874622f
--- /dev/null
+++ b/test/testdata/certificate/client.crt
@@ -0,0 +1,9 @@
+-----BEGIN CERTIFICATE-----
+MIIBIzCBygIUSiYMM8TTRhJkbmMh4edn3xqV7wswCgYIKoZIzj0EAwIwFDESMBAG
+A1UEAwwJeXRkbHB0ZXN0MB4XDTIyMDQxNTAzMDEyN1oXDTM4MTAxNTAzMDEyN1ow
+FTETMBEGA1UEAwwKeXRkbHB0ZXN0MjBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IA
+BKREKVDWfLKZknzYg+BUkmTn43f2pl/LNSyKPtXo/UV7hhp6JXIq3ZuZ7rubyuMS
+XNuH+2Cl9msSpJB2LhJs5kcwCgYIKoZIzj0EAwIDSAAwRQIhAMRr46vO25/5nUhD
+aHp4L67AeSvrjvSFHfubyD3Kr5dwAiA8EfOgVxc8Qh6ozTcbXO/WnBfS48ZFRSQY
+D0dB8M1kJw==
+-----END CERTIFICATE-----
diff --git a/test/testdata/certificate/client.csr b/test/testdata/certificate/client.csr
new file mode 100644
index 0000000..2d5d7a5
--- /dev/null
+++ b/test/testdata/certificate/client.csr
@@ -0,0 +1,7 @@
+-----BEGIN CERTIFICATE REQUEST-----
+MIHQMHcCAQAwFTETMBEGA1UEAwwKeXRkbHB0ZXN0MjBZMBMGByqGSM49AgEGCCqG
+SM49AwEHA0IABKREKVDWfLKZknzYg+BUkmTn43f2pl/LNSyKPtXo/UV7hhp6JXIq
+3ZuZ7rubyuMSXNuH+2Cl9msSpJB2LhJs5kegADAKBggqhkjOPQQDAgNJADBGAiEA
+1LZ72mtPmVxhGtdMvpZ0fyA68H2RC5IMHpLq18T55UcCIQDKpkXXVTvAzS0JioCq
+6kiYq8Oxx6ZMoI+11k75/Kip1g==
+-----END CERTIFICATE REQUEST-----
diff --git a/test/testdata/certificate/client.key b/test/testdata/certificate/client.key
new file mode 100644
index 0000000..e47389b
--- /dev/null
+++ b/test/testdata/certificate/client.key
@@ -0,0 +1,5 @@
+-----BEGIN EC PRIVATE KEY-----
+MHcCAQEEIAW6h9hwT0Aha+JBukgmHnrKRPoqPNWYA86ic0UaKHs8oAoGCCqGSM49
+AwEHoUQDQgAEpEQpUNZ8spmSfNiD4FSSZOfjd/amX8s1LIo+1ej9RXuGGnolcird
+m5nuu5vK4xJc24f7YKX2axKkkHYuEmzmRw==
+-----END EC PRIVATE KEY-----
diff --git a/test/testdata/certificate/clientencrypted.key b/test/testdata/certificate/clientencrypted.key
new file mode 100644
index 0000000..0baee37
--- /dev/null
+++ b/test/testdata/certificate/clientencrypted.key
@@ -0,0 +1,8 @@
+-----BEGIN EC PRIVATE KEY-----
+Proc-Type: 4,ENCRYPTED
+DEK-Info: AES-256-CBC,4B39160146F15544922E553E08299A35
+
+96A7/iBkIfTVb8r2812ued2pS49FfVY4Ppz/45OGF0uFayMtMl8/GuEBCamuhFXS
+rnOOpco96TTeeKZHqR45wnf4tgHM8IjoQ6H0EX3lVF19OHnArAgrGYtohWUGSyGn
+IgLJFdUewIjdI7XApTJprQFE5E2tETXFA95mCz88u1c=
+-----END EC PRIVATE KEY-----
diff --git a/test/testdata/certificate/clientwithencryptedkey.crt b/test/testdata/certificate/clientwithencryptedkey.crt
new file mode 100644
index 0000000..f357e4c
--- /dev/null
+++ b/test/testdata/certificate/clientwithencryptedkey.crt
@@ -0,0 +1,17 @@
+-----BEGIN CERTIFICATE-----
+MIIBIzCBygIUSiYMM8TTRhJkbmMh4edn3xqV7wswCgYIKoZIzj0EAwIwFDESMBAG
+A1UEAwwJeXRkbHB0ZXN0MB4XDTIyMDQxNTAzMDEyN1oXDTM4MTAxNTAzMDEyN1ow
+FTETMBEGA1UEAwwKeXRkbHB0ZXN0MjBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IA
+BKREKVDWfLKZknzYg+BUkmTn43f2pl/LNSyKPtXo/UV7hhp6JXIq3ZuZ7rubyuMS
+XNuH+2Cl9msSpJB2LhJs5kcwCgYIKoZIzj0EAwIDSAAwRQIhAMRr46vO25/5nUhD
+aHp4L67AeSvrjvSFHfubyD3Kr5dwAiA8EfOgVxc8Qh6ozTcbXO/WnBfS48ZFRSQY
+D0dB8M1kJw==
+-----END CERTIFICATE-----
+-----BEGIN EC PRIVATE KEY-----
+Proc-Type: 4,ENCRYPTED
+DEK-Info: AES-256-CBC,4B39160146F15544922E553E08299A35
+
+96A7/iBkIfTVb8r2812ued2pS49FfVY4Ppz/45OGF0uFayMtMl8/GuEBCamuhFXS
+rnOOpco96TTeeKZHqR45wnf4tgHM8IjoQ6H0EX3lVF19OHnArAgrGYtohWUGSyGn
+IgLJFdUewIjdI7XApTJprQFE5E2tETXFA95mCz88u1c=
+-----END EC PRIVATE KEY-----
diff --git a/test/testdata/certificate/clientwithkey.crt b/test/testdata/certificate/clientwithkey.crt
new file mode 100644
index 0000000..942f6e2
--- /dev/null
+++ b/test/testdata/certificate/clientwithkey.crt
@@ -0,0 +1,14 @@
+-----BEGIN CERTIFICATE-----
+MIIBIzCBygIUSiYMM8TTRhJkbmMh4edn3xqV7wswCgYIKoZIzj0EAwIwFDESMBAG
+A1UEAwwJeXRkbHB0ZXN0MB4XDTIyMDQxNTAzMDEyN1oXDTM4MTAxNTAzMDEyN1ow
+FTETMBEGA1UEAwwKeXRkbHB0ZXN0MjBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IA
+BKREKVDWfLKZknzYg+BUkmTn43f2pl/LNSyKPtXo/UV7hhp6JXIq3ZuZ7rubyuMS
+XNuH+2Cl9msSpJB2LhJs5kcwCgYIKoZIzj0EAwIDSAAwRQIhAMRr46vO25/5nUhD
+aHp4L67AeSvrjvSFHfubyD3Kr5dwAiA8EfOgVxc8Qh6ozTcbXO/WnBfS48ZFRSQY
+D0dB8M1kJw==
+-----END CERTIFICATE-----
+-----BEGIN EC PRIVATE KEY-----
+MHcCAQEEIAW6h9hwT0Aha+JBukgmHnrKRPoqPNWYA86ic0UaKHs8oAoGCCqGSM49
+AwEHoUQDQgAEpEQpUNZ8spmSfNiD4FSSZOfjd/amX8s1LIo+1ej9RXuGGnolcird
+m5nuu5vK4xJc24f7YKX2axKkkHYuEmzmRw==
+-----END EC PRIVATE KEY-----
diff --git a/test/testdata/certificate/instructions.md b/test/testdata/certificate/instructions.md
new file mode 100644
index 0000000..b0e3fbd
--- /dev/null
+++ b/test/testdata/certificate/instructions.md
@@ -0,0 +1,19 @@
+# Generate certificates for client cert tests
+
+## CA
+```sh
+openssl ecparam -name prime256v1 -genkey -noout -out ca.key
+openssl req -new -x509 -sha256 -days 6027 -key ca.key -out ca.crt -subj "/CN=ytdlptest"
+```
+
+## Client
+```sh
+openssl ecparam -name prime256v1 -genkey -noout -out client.key
+openssl ec -in client.key -out clientencrypted.key -passout pass:foobar -aes256
+openssl req -new -sha256 -key client.key -out client.csr -subj "/CN=ytdlptest2"
+openssl x509 -req -in client.csr -CA ca.crt -CAkey ca.key -CAcreateserial -out client.crt -days 6027 -sha256
+cp client.crt clientwithkey.crt
+cp client.crt clientwithencryptedkey.crt
+cat client.key >> clientwithkey.crt
+cat clientencrypted.key >> clientwithencryptedkey.crt
+``` \ No newline at end of file
diff --git a/test/testdata/cookies/httponly_cookies.txt b/test/testdata/cookies/httponly_cookies.txt
new file mode 100644
index 0000000..c46541d
--- /dev/null
+++ b/test/testdata/cookies/httponly_cookies.txt
@@ -0,0 +1,6 @@
+# Netscape HTTP Cookie File
+# http://curl.haxx.se/rfc/cookie_spec.html
+# This is a generated file! Do not edit.
+
+#HttpOnly_www.foobar.foobar FALSE / TRUE 2147483647 HTTPONLY_COOKIE HTTPONLY_COOKIE_VALUE
+www.foobar.foobar FALSE / TRUE 2147483647 JS_ACCESSIBLE_COOKIE JS_ACCESSIBLE_COOKIE_VALUE
diff --git a/test/testdata/cookies/malformed_cookies.txt b/test/testdata/cookies/malformed_cookies.txt
new file mode 100644
index 0000000..17bc403
--- /dev/null
+++ b/test/testdata/cookies/malformed_cookies.txt
@@ -0,0 +1,9 @@
+# Netscape HTTP Cookie File
+# http://curl.haxx.se/rfc/cookie_spec.html
+# This is a generated file! Do not edit.
+
+# Cookie file entry with invalid number of fields - 6 instead of 7
+www.foobar.foobar FALSE / FALSE 0 COOKIE
+
+# Cookie file entry with invalid expires at
+www.foobar.foobar FALSE / FALSE 1.7976931348623157e+308 COOKIE VALUE
diff --git a/test/testdata/cookies/session_cookies.txt b/test/testdata/cookies/session_cookies.txt
new file mode 100644
index 0000000..f6996f0
--- /dev/null
+++ b/test/testdata/cookies/session_cookies.txt
@@ -0,0 +1,6 @@
+# Netscape HTTP Cookie File
+# http://curl.haxx.se/rfc/cookie_spec.html
+# This is a generated file! Do not edit.
+
+www.foobar.foobar FALSE / TRUE YoutubeDLExpiresEmpty YoutubeDLExpiresEmptyValue
+www.foobar.foobar FALSE / TRUE 0 YoutubeDLExpires0 YoutubeDLExpires0Value
diff --git a/test/testdata/f4m/custom_base_url.f4m b/test/testdata/f4m/custom_base_url.f4m
new file mode 100644
index 0000000..74e1539
--- /dev/null
+++ b/test/testdata/f4m/custom_base_url.f4m
@@ -0,0 +1,10 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<manifest xmlns="http://ns.adobe.com/f4m/1.0">
+ <streamType>recorded</streamType>
+ <baseURL>http://vod.livestream.com/events/0000000000673980/</baseURL>
+ <duration>269.293</duration>
+ <bootstrapInfo profile="named" id="bootstrap_1">AAAAm2Fic3QAAAAAAAAAAQAAAAPoAAAAAAAEG+0AAAAAAAAAAAAAAAAAAQAAABlhc3J0AAAAAAAAAAABAAAAAQAAAC4BAAAAVmFmcnQAAAAAAAAD6AAAAAAEAAAAAQAAAAAAAAAAAAAXcAAAAC0AAAAAAAQHQAAAE5UAAAAuAAAAAAAEGtUAAAEYAAAAAAAAAAAAAAAAAAAAAAA=</bootstrapInfo>
+ <media url="b90f532f-b0f6-4f4e-8289-706d490b2fd8_2292" bootstrapInfoId="bootstrap_1" bitrate="2148" width="1280" height="720" videoCodec="avc1.4d401f" audioCodec="mp4a.40.2">
+ <metadata>AgAKb25NZXRhRGF0YQgAAAAIAAhkdXJhdGlvbgBAcNSwIMSbpgAFd2lkdGgAQJQAAAAAAAAABmhlaWdodABAhoAAAAAAAAAJZnJhbWVyYXRlAEA4/7DoLwW3AA12aWRlb2RhdGFyYXRlAECe1DLgjcobAAx2aWRlb2NvZGVjaWQAQBwAAAAAAAAADWF1ZGlvZGF0YXJhdGUAQGSimlvaPKQADGF1ZGlvY29kZWNpZABAJAAAAAAAAAAACQ==</metadata>
+ </media>
+</manifest>
diff --git a/test/testdata/ism/ec-3_test.Manifest b/test/testdata/ism/ec-3_test.Manifest
new file mode 100644
index 0000000..45f95de
--- /dev/null
+++ b/test/testdata/ism/ec-3_test.Manifest
@@ -0,0 +1 @@
+<?xml version="1.0" encoding="utf-8"?><!--Transformed by VSMT using XSL stylesheet for rule Identity--><!-- Created with Unified Streaming Platform (version=1.10.12-18737) --><SmoothStreamingMedia MajorVersion="2" MinorVersion="0" TimeScale="10000000" Duration="370000000"><StreamIndex Type="audio" QualityLevels="1" TimeScale="10000000" Language="deu" Name="audio_deu" Chunks="19" Url="QualityLevels({bitrate})/Fragments(audio_deu={start time})?noStreamProfile=1"><QualityLevel Index="0" Bitrate="127802" CodecPrivateData="1190" SamplingRate="48000" Channels="2" BitsPerSample="16" PacketSize="4" AudioTag="255" FourCC="AACL" /><c t="0" d="20053333" /><c d="20053334" /><c d="20053333" /><c d="19840000" /><c d="20053333" /><c d="20053334" /><c d="20053333" /><c d="19840000" /><c d="20053333" /><c d="20053334" /><c d="20053333" /><c d="19840000" /><c d="20053333" /><c d="20053334" /><c d="20053333" /><c d="19840000" /><c d="20053333" /><c d="20053334" /><c d="7253333" /></StreamIndex><StreamIndex Type="audio" QualityLevels="1" TimeScale="10000000" Language="deu" Name="audio_deu_1" Chunks="19" Url="QualityLevels({bitrate})/Fragments(audio_deu_1={start time})?noStreamProfile=1"><QualityLevel Index="0" Bitrate="224000" CodecPrivateData="00063F000000AF87FBA7022DFB42A4D405CD93843BDD0700200F00" FourCCData="0700200F00" SamplingRate="48000" Channels="6" BitsPerSample="16" PacketSize="896" AudioTag="65534" FourCC="EC-3" /><c t="0" d="20160000" /><c d="19840000" /><c d="20160000" /><c d="19840000" /><c d="20160000" /><c d="19840000" /><c d="20160000" /><c d="19840000" /><c d="20160000" /><c d="19840000" /><c d="20160000" /><c d="19840000" /><c d="20160000" /><c d="19840000" /><c d="20160000" /><c d="19840000" /><c d="20160000" /><c d="19840000" /><c d="8320000" /></StreamIndex><StreamIndex Type="video" QualityLevels="8" TimeScale="10000000" Language="deu" Name="video_deu" Chunks="19" Url="QualityLevels({bitrate})/Fragments(video_deu={start time})?noStreamProfile=1" MaxWidth="1920" MaxHeight="1080" DisplayWidth="1920" DisplayHeight="1080"><QualityLevel Index="0" Bitrate="23909" CodecPrivateData="000000016742C00CDB06077E5C05A808080A00000300020000030009C0C02EE0177CC6300F142AE00000000168CA8DC8" MaxWidth="384" MaxHeight="216" FourCC="AVC1" /><QualityLevel Index="1" Bitrate="403188" CodecPrivateData="00000001674D4014E98323B602D4040405000003000100000300320F1429380000000168EAECF2" MaxWidth="400" MaxHeight="224" FourCC="AVC1" /><QualityLevel Index="2" Bitrate="680365" CodecPrivateData="00000001674D401EE981405FF2E02D4040405000000300100000030320F162D3800000000168EAECF2" MaxWidth="640" MaxHeight="360" FourCC="AVC1" /><QualityLevel Index="3" Bitrate="1253465" CodecPrivateData="00000001674D401EE981405FF2E02D4040405000000300100000030320F162D3800000000168EAECF2" MaxWidth="640" MaxHeight="360" FourCC="AVC1" /><QualityLevel Index="4" Bitrate="2121558" CodecPrivateData="00000001674D401EECA0601BD80B50101014000003000400000300C83C58B6580000000168E93B3C80" MaxWidth="768" MaxHeight="432" FourCC="AVC1" /><QualityLevel Index="5" Bitrate="3275545" CodecPrivateData="00000001674D4020ECA02802DD80B501010140000003004000000C83C60C65800000000168E93B3C80" MaxWidth="1280" MaxHeight="720" FourCC="AVC1" /><QualityLevel Index="6" Bitrate="5300196" CodecPrivateData="00000001674D4028ECA03C0113F2E02D4040405000000300100000030320F18319600000000168E93B3C80" MaxWidth="1920" MaxHeight="1080" FourCC="AVC1" /><QualityLevel Index="7" Bitrate="8079312" CodecPrivateData="00000001674D4028ECA03C0113F2E02D4040405000000300100000030320F18319600000000168E93B3C80" MaxWidth="1920" MaxHeight="1080" FourCC="AVC1" /><c t="0" d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="10000000" /></StreamIndex></SmoothStreamingMedia> \ No newline at end of file
diff --git a/test/testdata/ism/sintel.Manifest b/test/testdata/ism/sintel.Manifest
new file mode 100644
index 0000000..2ff8c24
--- /dev/null
+++ b/test/testdata/ism/sintel.Manifest
@@ -0,0 +1,988 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!-- Created with Unified Streaming Platform (version=1.10.18-20255) -->
+<SmoothStreamingMedia
+ MajorVersion="2"
+ MinorVersion="0"
+ TimeScale="10000000"
+ Duration="8880746666">
+ <StreamIndex
+ Type="audio"
+ QualityLevels="1"
+ TimeScale="10000000"
+ Name="audio"
+ Chunks="445"
+ Url="QualityLevels({bitrate})/Fragments(audio={start time})">
+ <QualityLevel
+ Index="0"
+ Bitrate="128001"
+ CodecPrivateData="1190"
+ SamplingRate="48000"
+ Channels="2"
+ BitsPerSample="16"
+ PacketSize="4"
+ AudioTag="255"
+ FourCC="AACL" />
+ <c t="0" d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="746666" />
+ </StreamIndex>
+ <StreamIndex
+ Type="text"
+ QualityLevels="1"
+ TimeScale="10000000"
+ Language="eng"
+ Subtype="CAPT"
+ Name="textstream_eng"
+ Chunks="11"
+ Url="QualityLevels({bitrate})/Fragments(textstream_eng={start time})">
+ <QualityLevel
+ Index="0"
+ Bitrate="1000"
+ CodecPrivateData=""
+ FourCC="TTML" />
+ <c t="0" d="600000000" />
+ <c d="600000000" />
+ <c d="600000000" />
+ <c d="600000000" />
+ <c d="600000000" />
+ <c d="600000000" />
+ <c d="600000000" />
+ <c d="600000000" />
+ <c d="600000000" />
+ <c d="600000000" />
+ <c d="240000000" />
+ </StreamIndex>
+ <StreamIndex
+ Type="video"
+ QualityLevels="5"
+ TimeScale="10000000"
+ Name="video"
+ Chunks="444"
+ Url="QualityLevels({bitrate})/Fragments(video={start time})"
+ MaxWidth="1688"
+ MaxHeight="720"
+ DisplayWidth="1689"
+ DisplayHeight="720">
+ <QualityLevel
+ Index="0"
+ Bitrate="100000"
+ CodecPrivateData="00000001674D401FDA0544EFFC2D002CBC40000003004000000C03C60CA80000000168EF32C8"
+ MaxWidth="336"
+ MaxHeight="144"
+ FourCC="AVC1" />
+ <QualityLevel
+ Index="1"
+ Bitrate="326000"
+ CodecPrivateData="00000001674D401FDA0241FE23FFC3BC83BA44000003000400000300C03C60CA800000000168EF32C8"
+ MaxWidth="562"
+ MaxHeight="240"
+ FourCC="AVC1" />
+ <QualityLevel
+ Index="2"
+ Bitrate="698000"
+ CodecPrivateData="00000001674D401FDA0350BFB97FF06AF06AD1000003000100000300300F1832A00000000168EF32C8"
+ MaxWidth="844"
+ MaxHeight="360"
+ FourCC="AVC1" />
+ <QualityLevel
+ Index="3"
+ Bitrate="1493000"
+ CodecPrivateData="00000001674D401FDA011C3DE6FFF0D890D871000003000100000300300F1832A00000000168EF32C8"
+ MaxWidth="1126"
+ MaxHeight="480"
+ FourCC="AVC1" />
+ <QualityLevel
+ Index="4"
+ Bitrate="4482000"
+ CodecPrivateData="00000001674D401FDA01A816F97FFC1ABC1AB440000003004000000C03C60CA80000000168EF32C8"
+ MaxWidth="1688"
+ MaxHeight="720"
+ FourCC="AVC1" />
+ <c t="0" d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ </StreamIndex>
+</SmoothStreamingMedia>
diff --git a/test/testdata/m3u8/bipbop_16x9.m3u8 b/test/testdata/m3u8/bipbop_16x9.m3u8
new file mode 100644
index 0000000..1ce87dd
--- /dev/null
+++ b/test/testdata/m3u8/bipbop_16x9.m3u8
@@ -0,0 +1,38 @@
+#EXTM3U
+
+#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="bipbop_audio",LANGUAGE="eng",NAME="BipBop Audio 1",AUTOSELECT=YES,DEFAULT=YES
+#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="bipbop_audio",LANGUAGE="eng",NAME="BipBop Audio 2",AUTOSELECT=NO,DEFAULT=NO,URI="alternate_audio_aac/prog_index.m3u8"
+
+
+#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="English",DEFAULT=YES,AUTOSELECT=YES,FORCED=NO,LANGUAGE="en",CHARACTERISTICS="public.accessibility.transcribes-spoken-dialog, public.accessibility.describes-music-and-sound",URI="subtitles/eng/prog_index.m3u8"
+#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="English (Forced)",DEFAULT=NO,AUTOSELECT=NO,FORCED=YES,LANGUAGE="en",URI="subtitles/eng_forced/prog_index.m3u8"
+#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="Français",DEFAULT=NO,AUTOSELECT=YES,FORCED=NO,LANGUAGE="fr",CHARACTERISTICS="public.accessibility.transcribes-spoken-dialog, public.accessibility.describes-music-and-sound",URI="subtitles/fra/prog_index.m3u8"
+#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="Français (Forced)",DEFAULT=NO,AUTOSELECT=NO,FORCED=YES,LANGUAGE="fr",URI="subtitles/fra_forced/prog_index.m3u8"
+#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="Español",DEFAULT=NO,AUTOSELECT=YES,FORCED=NO,LANGUAGE="es",CHARACTERISTICS="public.accessibility.transcribes-spoken-dialog, public.accessibility.describes-music-and-sound",URI="subtitles/spa/prog_index.m3u8"
+#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="Español (Forced)",DEFAULT=NO,AUTOSELECT=NO,FORCED=YES,LANGUAGE="es",URI="subtitles/spa_forced/prog_index.m3u8"
+#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="日本語",DEFAULT=NO,AUTOSELECT=YES,FORCED=NO,LANGUAGE="ja",CHARACTERISTICS="public.accessibility.transcribes-spoken-dialog, public.accessibility.describes-music-and-sound",URI="subtitles/jpn/prog_index.m3u8"
+#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="日本語 (Forced)",DEFAULT=NO,AUTOSELECT=NO,FORCED=YES,LANGUAGE="ja",URI="subtitles/jpn_forced/prog_index.m3u8"
+
+
+#EXT-X-STREAM-INF:BANDWIDTH=263851,CODECS="mp4a.40.2, avc1.4d400d",RESOLUTION=416x234,AUDIO="bipbop_audio",SUBTITLES="subs"
+gear1/prog_index.m3u8
+#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=28451,CODECS="avc1.4d400d",URI="gear1/iframe_index.m3u8"
+
+#EXT-X-STREAM-INF:BANDWIDTH=577610,CODECS="mp4a.40.2, avc1.4d401e",RESOLUTION=640x360,AUDIO="bipbop_audio",SUBTITLES="subs"
+gear2/prog_index.m3u8
+#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=181534,CODECS="avc1.4d401e",URI="gear2/iframe_index.m3u8"
+
+#EXT-X-STREAM-INF:BANDWIDTH=915905,CODECS="mp4a.40.2, avc1.4d401f",RESOLUTION=960x540,AUDIO="bipbop_audio",SUBTITLES="subs"
+gear3/prog_index.m3u8
+#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=297056,CODECS="avc1.4d401f",URI="gear3/iframe_index.m3u8"
+
+#EXT-X-STREAM-INF:BANDWIDTH=1030138,CODECS="mp4a.40.2, avc1.4d401f",RESOLUTION=1280x720,AUDIO="bipbop_audio",SUBTITLES="subs"
+gear4/prog_index.m3u8
+#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=339492,CODECS="avc1.4d401f",URI="gear4/iframe_index.m3u8"
+
+#EXT-X-STREAM-INF:BANDWIDTH=1924009,CODECS="mp4a.40.2, avc1.4d401f",RESOLUTION=1920x1080,AUDIO="bipbop_audio",SUBTITLES="subs"
+gear5/prog_index.m3u8
+#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=669554,CODECS="avc1.4d401f",URI="gear5/iframe_index.m3u8"
+
+#EXT-X-STREAM-INF:BANDWIDTH=41457,CODECS="mp4a.40.2",AUDIO="bipbop_audio",SUBTITLES="subs"
+gear0/prog_index.m3u8
diff --git a/test/testdata/m3u8/img_bipbop_adv_example_fmp4.m3u8 b/test/testdata/m3u8/img_bipbop_adv_example_fmp4.m3u8
new file mode 100644
index 0000000..620ce04
--- /dev/null
+++ b/test/testdata/m3u8/img_bipbop_adv_example_fmp4.m3u8
@@ -0,0 +1,76 @@
+#EXTM3U
+#EXT-X-VERSION:6
+#EXT-X-INDEPENDENT-SEGMENTS
+
+
+#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=2168183,BANDWIDTH=2177116,CODECS="avc1.640020,mp4a.40.2",RESOLUTION=960x540,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud1",SUBTITLES="sub1"
+v5/prog_index.m3u8
+#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=7968416,BANDWIDTH=8001098,CODECS="avc1.64002a,mp4a.40.2",RESOLUTION=1920x1080,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud1",SUBTITLES="sub1"
+v9/prog_index.m3u8
+#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=6170000,BANDWIDTH=6312875,CODECS="avc1.64002a,mp4a.40.2",RESOLUTION=1920x1080,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud1",SUBTITLES="sub1"
+v8/prog_index.m3u8
+#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=4670769,BANDWIDTH=4943747,CODECS="avc1.64002a,mp4a.40.2",RESOLUTION=1920x1080,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud1",SUBTITLES="sub1"
+v7/prog_index.m3u8
+#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=3168702,BANDWIDTH=3216424,CODECS="avc1.640020,mp4a.40.2",RESOLUTION=1280x720,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud1",SUBTITLES="sub1"
+v6/prog_index.m3u8
+#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=1265132,BANDWIDTH=1268994,CODECS="avc1.64001e,mp4a.40.2",RESOLUTION=768x432,FRAME-RATE=30.000,CLOSED-CAPTIONS="cc1",AUDIO="aud1",SUBTITLES="sub1"
+v4/prog_index.m3u8
+#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=895755,BANDWIDTH=902298,CODECS="avc1.64001e,mp4a.40.2",RESOLUTION=640x360,FRAME-RATE=30.000,CLOSED-CAPTIONS="cc1",AUDIO="aud1",SUBTITLES="sub1"
+v3/prog_index.m3u8
+#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=530721,BANDWIDTH=541052,CODECS="avc1.640015,mp4a.40.2",RESOLUTION=480x270,FRAME-RATE=30.000,CLOSED-CAPTIONS="cc1",AUDIO="aud1",SUBTITLES="sub1"
+v2/prog_index.m3u8
+
+
+#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=2390686,BANDWIDTH=2399619,CODECS="avc1.640020,ac-3",RESOLUTION=960x540,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud2",SUBTITLES="sub1"
+v5/prog_index.m3u8
+#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=8190919,BANDWIDTH=8223601,CODECS="avc1.64002a,ac-3",RESOLUTION=1920x1080,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud2",SUBTITLES="sub1"
+v9/prog_index.m3u8
+#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=6392503,BANDWIDTH=6535378,CODECS="avc1.64002a,ac-3",RESOLUTION=1920x1080,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud2",SUBTITLES="sub1"
+v8/prog_index.m3u8
+#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=4893272,BANDWIDTH=5166250,CODECS="avc1.64002a,ac-3",RESOLUTION=1920x1080,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud2",SUBTITLES="sub1"
+v7/prog_index.m3u8
+#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=3391205,BANDWIDTH=3438927,CODECS="avc1.640020,ac-3",RESOLUTION=1280x720,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud2",SUBTITLES="sub1"
+v6/prog_index.m3u8
+#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=1487635,BANDWIDTH=1491497,CODECS="avc1.64001e,ac-3",RESOLUTION=768x432,FRAME-RATE=30.000,CLOSED-CAPTIONS="cc1",AUDIO="aud2",SUBTITLES="sub1"
+v4/prog_index.m3u8
+#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=1118258,BANDWIDTH=1124801,CODECS="avc1.64001e,ac-3",RESOLUTION=640x360,FRAME-RATE=30.000,CLOSED-CAPTIONS="cc1",AUDIO="aud2",SUBTITLES="sub1"
+v3/prog_index.m3u8
+#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=753224,BANDWIDTH=763555,CODECS="avc1.640015,ac-3",RESOLUTION=480x270,FRAME-RATE=30.000,CLOSED-CAPTIONS="cc1",AUDIO="aud2",SUBTITLES="sub1"
+v2/prog_index.m3u8
+
+
+#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=2198686,BANDWIDTH=2207619,CODECS="avc1.640020,ec-3",RESOLUTION=960x540,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud3",SUBTITLES="sub1"
+v5/prog_index.m3u8
+#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=7998919,BANDWIDTH=8031601,CODECS="avc1.64002a,ec-3",RESOLUTION=1920x1080,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud3",SUBTITLES="sub1"
+v9/prog_index.m3u8
+#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=6200503,BANDWIDTH=6343378,CODECS="avc1.64002a,ec-3",RESOLUTION=1920x1080,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud3",SUBTITLES="sub1"
+v8/prog_index.m3u8
+#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=4701272,BANDWIDTH=4974250,CODECS="avc1.64002a,ec-3",RESOLUTION=1920x1080,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud3",SUBTITLES="sub1"
+v7/prog_index.m3u8
+#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=3199205,BANDWIDTH=3246927,CODECS="avc1.640020,ec-3",RESOLUTION=1280x720,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud3",SUBTITLES="sub1"
+v6/prog_index.m3u8
+#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=1295635,BANDWIDTH=1299497,CODECS="avc1.64001e,ec-3",RESOLUTION=768x432,FRAME-RATE=30.000,CLOSED-CAPTIONS="cc1",AUDIO="aud3",SUBTITLES="sub1"
+v4/prog_index.m3u8
+#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=926258,BANDWIDTH=932801,CODECS="avc1.64001e,ec-3",RESOLUTION=640x360,FRAME-RATE=30.000,CLOSED-CAPTIONS="cc1",AUDIO="aud3",SUBTITLES="sub1"
+v3/prog_index.m3u8
+#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=561224,BANDWIDTH=571555,CODECS="avc1.640015,ec-3",RESOLUTION=480x270,FRAME-RATE=30.000,CLOSED-CAPTIONS="cc1",AUDIO="aud3",SUBTITLES="sub1"
+v2/prog_index.m3u8
+
+
+#EXT-X-I-FRAME-STREAM-INF:AVERAGE-BANDWIDTH=183689,BANDWIDTH=187492,CODECS="avc1.64002a",RESOLUTION=1920x1080,URI="v7/iframe_index.m3u8"
+#EXT-X-I-FRAME-STREAM-INF:AVERAGE-BANDWIDTH=132672,BANDWIDTH=136398,CODECS="avc1.640020",RESOLUTION=1280x720,URI="v6/iframe_index.m3u8"
+#EXT-X-I-FRAME-STREAM-INF:AVERAGE-BANDWIDTH=97767,BANDWIDTH=101378,CODECS="avc1.640020",RESOLUTION=960x540,URI="v5/iframe_index.m3u8"
+#EXT-X-I-FRAME-STREAM-INF:AVERAGE-BANDWIDTH=75722,BANDWIDTH=77818,CODECS="avc1.64001e",RESOLUTION=768x432,URI="v4/iframe_index.m3u8"
+#EXT-X-I-FRAME-STREAM-INF:AVERAGE-BANDWIDTH=63522,BANDWIDTH=65091,CODECS="avc1.64001e",RESOLUTION=640x360,URI="v3/iframe_index.m3u8"
+#EXT-X-I-FRAME-STREAM-INF:AVERAGE-BANDWIDTH=39678,BANDWIDTH=40282,CODECS="avc1.640015",RESOLUTION=480x270,URI="v2/iframe_index.m3u8"
+
+
+#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="aud1",LANGUAGE="en",NAME="English",AUTOSELECT=YES,DEFAULT=YES,CHANNELS="2",URI="a1/prog_index.m3u8"
+#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="aud2",LANGUAGE="en",NAME="English",AUTOSELECT=YES,DEFAULT=YES,CHANNELS="6",URI="a2/prog_index.m3u8"
+#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="aud3",LANGUAGE="en",NAME="English",AUTOSELECT=YES,DEFAULT=YES,CHANNELS="6",URI="a3/prog_index.m3u8"
+
+
+#EXT-X-MEDIA:TYPE=CLOSED-CAPTIONS,GROUP-ID="cc1",LANGUAGE="en",NAME="English",AUTOSELECT=YES,DEFAULT=YES,INSTREAM-ID="CC1"
+
+
+#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="sub1",LANGUAGE="en",NAME="English",AUTOSELECT=YES,DEFAULT=YES,FORCED=NO,URI="s1/en/prog_index.m3u8"
diff --git a/test/testdata/mpd/float_duration.mpd b/test/testdata/mpd/float_duration.mpd
new file mode 100644
index 0000000..8dc1d2d
--- /dev/null
+++ b/test/testdata/mpd/float_duration.mpd
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<MPD xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="urn:mpeg:dash:schema:mpd:2011" type="static" minBufferTime="PT2S" profiles="urn:mpeg:dash:profile:isoff-on-demand:2011" mediaPresentationDuration="PT6014S">
+ <Period bitstreamSwitching="true">
+ <AdaptationSet mimeType="audio/mp4" codecs="mp4a.40.2" startWithSAP="1" segmentAlignment="true">
+ <SegmentTemplate timescale="1000000" presentationTimeOffset="0" initialization="ai_$RepresentationID$.mp4d" media="a_$RepresentationID$_$Number$.mp4d" duration="2000000.0" startNumber="0"></SegmentTemplate>
+ <Representation id="318597" bandwidth="61587"></Representation>
+ </AdaptationSet>
+ <AdaptationSet mimeType="video/mp4" startWithSAP="1" segmentAlignment="true">
+ <SegmentTemplate timescale="1000000" presentationTimeOffset="0" initialization="vi_$RepresentationID$.mp4d" media="v_$RepresentationID$_$Number$.mp4d" duration="2000000.0" startNumber="0"></SegmentTemplate>
+ <Representation id="318597" codecs="avc1.42001f" width="340" height="192" bandwidth="318597"></Representation>
+ <Representation id="638590" codecs="avc1.42001f" width="512" height="288" bandwidth="638590"></Representation>
+ <Representation id="1022565" codecs="avc1.4d001f" width="688" height="384" bandwidth="1022565"></Representation>
+ <Representation id="2046506" codecs="avc1.4d001f" width="1024" height="576" bandwidth="2046506"></Representation>
+ <Representation id="3998017" codecs="avc1.640029" width="1280" height="720" bandwidth="3998017"></Representation>
+ <Representation id="5997485" codecs="avc1.640032" width="1920" height="1080" bandwidth="5997485"></Representation>
+ </AdaptationSet>
+ </Period>
+</MPD> \ No newline at end of file
diff --git a/test/testdata/mpd/subtitles.mpd b/test/testdata/mpd/subtitles.mpd
new file mode 100644
index 0000000..6f948ad
--- /dev/null
+++ b/test/testdata/mpd/subtitles.mpd
@@ -0,0 +1,351 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!-- Created with Unified Streaming Platform (version=1.10.18-20255) -->
+<MPD
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xmlns="urn:mpeg:dash:schema:mpd:2011"
+ xsi:schemaLocation="urn:mpeg:dash:schema:mpd:2011 http://standards.iso.org/ittf/PubliclyAvailableStandards/MPEG-DASH_schema_files/DASH-MPD.xsd"
+ type="static"
+ mediaPresentationDuration="PT14M48S"
+ maxSegmentDuration="PT1M"
+ minBufferTime="PT10S"
+ profiles="urn:mpeg:dash:profile:isoff-live:2011">
+ <Period
+ id="1"
+ duration="PT14M48S">
+ <BaseURL>dash/</BaseURL>
+ <AdaptationSet
+ id="1"
+ group="1"
+ contentType="audio"
+ segmentAlignment="true"
+ audioSamplingRate="48000"
+ mimeType="audio/mp4"
+ codecs="mp4a.40.2"
+ startWithSAP="1">
+ <AudioChannelConfiguration
+ schemeIdUri="urn:mpeg:dash:23003:3:audio_channel_configuration:2011"
+ value="2" />
+ <Role schemeIdUri="urn:mpeg:dash:role:2011" value="main" />
+ <SegmentTemplate
+ timescale="48000"
+ initialization="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$.dash"
+ media="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$-$Time$.dash">
+ <SegmentTimeline>
+ <S t="0" d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="3584" />
+ </SegmentTimeline>
+ </SegmentTemplate>
+ <Representation
+ id="audio=128001"
+ bandwidth="128001">
+ </Representation>
+ </AdaptationSet>
+ <AdaptationSet
+ id="2"
+ group="3"
+ contentType="text"
+ lang="en"
+ mimeType="application/mp4"
+ codecs="stpp"
+ startWithSAP="1">
+ <Role schemeIdUri="urn:mpeg:dash:role:2011" value="subtitle" />
+ <SegmentTemplate
+ timescale="1000"
+ initialization="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$.dash"
+ media="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$-$Time$.dash">
+ <SegmentTimeline>
+ <S t="0" d="60000" r="9" />
+ <S d="24000" />
+ </SegmentTimeline>
+ </SegmentTemplate>
+ <Representation
+ id="textstream_eng=1000"
+ bandwidth="1000">
+ </Representation>
+ </AdaptationSet>
+ <AdaptationSet
+ id="3"
+ group="2"
+ contentType="video"
+ par="960:409"
+ minBandwidth="100000"
+ maxBandwidth="4482000"
+ maxWidth="1689"
+ maxHeight="720"
+ segmentAlignment="true"
+ mimeType="video/mp4"
+ codecs="avc1.4D401F"
+ startWithSAP="1">
+ <Role schemeIdUri="urn:mpeg:dash:role:2011" value="main" />
+ <SegmentTemplate
+ timescale="12288"
+ initialization="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$.dash"
+ media="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$-$Time$.dash">
+ <SegmentTimeline>
+ <S t="0" d="24576" r="443" />
+ </SegmentTimeline>
+ </SegmentTemplate>
+ <Representation
+ id="video=100000"
+ bandwidth="100000"
+ width="336"
+ height="144"
+ sar="2880:2863"
+ scanType="progressive">
+ </Representation>
+ <Representation
+ id="video=326000"
+ bandwidth="326000"
+ width="562"
+ height="240"
+ sar="115200:114929"
+ scanType="progressive">
+ </Representation>
+ <Representation
+ id="video=698000"
+ bandwidth="698000"
+ width="844"
+ height="360"
+ sar="86400:86299"
+ scanType="progressive">
+ </Representation>
+ <Representation
+ id="video=1493000"
+ bandwidth="1493000"
+ width="1126"
+ height="480"
+ sar="230400:230267"
+ scanType="progressive">
+ </Representation>
+ <Representation
+ id="video=4482000"
+ bandwidth="4482000"
+ width="1688"
+ height="720"
+ sar="86400:86299"
+ scanType="progressive">
+ </Representation>
+ </AdaptationSet>
+ </Period>
+</MPD>
diff --git a/test/testdata/mpd/unfragmented.mpd b/test/testdata/mpd/unfragmented.mpd
new file mode 100644
index 0000000..5a3720b
--- /dev/null
+++ b/test/testdata/mpd/unfragmented.mpd
@@ -0,0 +1,28 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<MPD mediaPresentationDuration="PT54.915S" minBufferTime="PT1.500S" profiles="urn:mpeg:dash:profile:isoff-on-demand:2011" type="static" xmlns="urn:mpeg:dash:schema:mpd:2011">
+ <Period duration="PT54.915S">
+ <AdaptationSet segmentAlignment="true" subsegmentAlignment="true" subsegmentStartsWithSAP="1">
+ <Representation bandwidth="804261" codecs="avc1.4d401e" frameRate="30" height="360" id="VIDEO-1" mimeType="video/mp4" startWithSAP="1" width="360">
+ <BaseURL>DASH_360</BaseURL>
+ <SegmentBase indexRange="915-1114" indexRangeExact="true">
+ <Initialization range="0-914"/>
+ </SegmentBase>
+ </Representation>
+ <Representation bandwidth="608000" codecs="avc1.4d401e" frameRate="30" height="240" id="VIDEO-2" mimeType="video/mp4" startWithSAP="1" width="240">
+ <BaseURL>DASH_240</BaseURL>
+ <SegmentBase indexRange="913-1112" indexRangeExact="true">
+ <Initialization range="0-912"/>
+ </SegmentBase>
+ </Representation>
+ </AdaptationSet>
+ <AdaptationSet>
+ <Representation audioSamplingRate="48000" bandwidth="129870" codecs="mp4a.40.2" id="AUDIO-1" mimeType="audio/mp4" startWithSAP="1">
+ <AudioChannelConfiguration schemeIdUri="urn:mpeg:dash:23003:3:audio_channel_configuration:2011" value="2"/>
+ <BaseURL>audio</BaseURL>
+ <SegmentBase indexRange="832-1007" indexRangeExact="true">
+ <Initialization range="0-831"/>
+ </SegmentBase>
+ </Representation>
+ </AdaptationSet>
+ </Period>
+</MPD>
diff --git a/test/testdata/mpd/urls_only.mpd b/test/testdata/mpd/urls_only.mpd
new file mode 100644
index 0000000..2b9d595
--- /dev/null
+++ b/test/testdata/mpd/urls_only.mpd
@@ -0,0 +1,218 @@
+<?xml version="1.0" ?>
+<MPD maxSegmentDuration="PT0H0M10.000S" mediaPresentationDuration="PT0H4M1.728S" minBufferTime="PT1.500S" profiles="urn:mpeg:dash:profile:isoff-main:2011" type="static" xmlns="urn:mpeg:dash:schema:mpd:2011">
+ <Period duration="PT0H4M1.728S">
+ <AdaptationSet bitstreamSwitching="true" lang="und" maxHeight="1080" maxWidth="1920" par="16:9" segmentAlignment="true">
+ <ContentComponent contentType="video" id="1"/>
+ <Representation audioSamplingRate="44100" bandwidth="200000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="144" id="h264_aac_144p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="256">
+ <SegmentList duration="10000" timescale="1000">
+ <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/init/432f65a0.mp4"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/0/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/1/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/2/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/3/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/4/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/5/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/6/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/7/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/8/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/9/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/10/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/11/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/12/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/13/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/14/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/15/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/16/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/17/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/18/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/19/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/20/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/21/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/22/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/23/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/24/432f65a0.m4s"/>
+ </SegmentList>
+ </Representation>
+ <Representation audioSamplingRate="44100" bandwidth="400000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="240" id="h264_aac_240p_m4s" mimeType="video/mp4" sar="160:159" startWithSAP="1" width="424">
+ <SegmentList duration="10000" timescale="1000">
+ <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/init/432f65a0.mp4"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/0/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/1/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/2/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/3/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/4/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/5/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/6/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/7/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/8/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/9/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/10/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/11/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/12/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/13/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/14/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/15/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/16/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/17/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/18/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/19/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/20/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/21/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/22/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/23/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/24/432f65a0.m4s"/>
+ </SegmentList>
+ </Representation>
+ <Representation audioSamplingRate="44100" bandwidth="800000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="360" id="h264_aac_360p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="640">
+ <SegmentList duration="10000" timescale="1000">
+ <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/init/432f65a0.mp4"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/0/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/1/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/2/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/3/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/4/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/5/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/6/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/7/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/8/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/9/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/10/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/11/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/12/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/13/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/14/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/15/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/16/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/17/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/18/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/19/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/20/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/21/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/22/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/23/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/24/432f65a0.m4s"/>
+ </SegmentList>
+ </Representation>
+ <Representation audioSamplingRate="44100" bandwidth="1200000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="480" id="h264_aac_480p_m4s" mimeType="video/mp4" sar="320:321" startWithSAP="1" width="856">
+ <SegmentList duration="10000" timescale="1000">
+ <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/init/432f65a0.mp4"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/0/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/1/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/2/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/3/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/4/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/5/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/6/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/7/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/8/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/9/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/10/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/11/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/12/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/13/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/14/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/15/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/16/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/17/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/18/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/19/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/20/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/21/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/22/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/23/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/24/432f65a0.m4s"/>
+ </SegmentList>
+ </Representation>
+ <Representation audioSamplingRate="44100" bandwidth="1600000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="576" id="h264_aac_576p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="1024">
+ <SegmentList duration="10000" timescale="1000">
+ <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/init/432f65a0.mp4"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/0/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/1/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/2/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/3/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/4/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/5/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/6/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/7/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/8/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/9/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/10/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/11/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/12/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/13/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/14/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/15/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/16/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/17/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/18/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/19/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/20/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/21/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/22/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/23/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/24/432f65a0.m4s"/>
+ </SegmentList>
+ </Representation>
+ <Representation audioSamplingRate="44100" bandwidth="2400000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="720" id="h264_aac_720p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="1280">
+ <SegmentList duration="10000" timescale="1000">
+ <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/init/432f65a0.mp4"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/0/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/1/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/2/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/3/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/4/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/5/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/6/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/7/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/8/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/9/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/10/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/11/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/12/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/13/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/14/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/15/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/16/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/17/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/18/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/19/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/20/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/21/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/22/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/23/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/24/432f65a0.m4s"/>
+ </SegmentList>
+ </Representation>
+ <Representation audioSamplingRate="44100" bandwidth="4400000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="1080" id="h264_aac_1080p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="1920">
+ <SegmentList duration="10000" timescale="1000">
+ <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/init/432f65a0.mp4"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/0/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/1/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/2/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/3/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/4/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/5/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/6/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/7/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/8/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/9/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/10/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/11/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/12/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/13/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/14/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/15/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/16/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/17/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/18/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/19/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/20/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/21/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/22/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/23/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/24/432f65a0.m4s"/>
+ </SegmentList>
+ </Representation>
+ </AdaptationSet>
+ </Period>
+</MPD>
diff --git a/test/testdata/thumbnails/foo %d bar/foo_%d.webp b/test/testdata/thumbnails/foo %d bar/foo_%d.webp
new file mode 100644
index 0000000..d64d083
--- /dev/null
+++ b/test/testdata/thumbnails/foo %d bar/foo_%d.webp
Binary files differ
diff --git a/test/testdata/xspf/foo_xspf.xspf b/test/testdata/xspf/foo_xspf.xspf
new file mode 100644
index 0000000..b7f0086
--- /dev/null
+++ b/test/testdata/xspf/foo_xspf.xspf
@@ -0,0 +1,34 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<playlist version="1" xmlns="http://xspf.org/ns/0/">
+ <date>2018-03-09T18:01:43Z</date>
+ <trackList>
+ <track>
+ <location>cd1/track%201.mp3</location>
+ <title>Pandemonium</title>
+ <creator>Foilverb</creator>
+ <annotation>Visit http://bigbrother404.bandcamp.com</annotation>
+ <album>Pandemonium EP</album>
+ <trackNum>1</trackNum>
+ <duration>202416</duration>
+ </track>
+ <track>
+ <location>../%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3</location>
+ <title>Final Cartridge (Nichico Twelve Remix)</title>
+ <annotation>Visit http://bigbrother404.bandcamp.com</annotation>
+ <creator>Foilverb</creator>
+ <album>Pandemonium EP</album>
+ <trackNum>2</trackNum>
+ <duration>255857</duration>
+ </track>
+ <track>
+ <location>track3.mp3</location>
+ <location>https://example.com/track3.mp3</location>
+ <title>Rebuilding Nightingale</title>
+ <annotation>Visit http://bigbrother404.bandcamp.com</annotation>
+ <creator>Foilverb</creator>
+ <album>Pandemonium EP</album>
+ <trackNum>3</trackNum>
+ <duration>287915</duration>
+ </track>
+ </trackList>
+</playlist>
diff --git a/test/testdata/yt_dlp_plugins/extractor/_ignore.py b/test/testdata/yt_dlp_plugins/extractor/_ignore.py
new file mode 100644
index 0000000..57faf75
--- /dev/null
+++ b/test/testdata/yt_dlp_plugins/extractor/_ignore.py
@@ -0,0 +1,5 @@
+from yt_dlp.extractor.common import InfoExtractor
+
+
+class IgnorePluginIE(InfoExtractor):
+ pass
diff --git a/test/testdata/yt_dlp_plugins/extractor/ignore.py b/test/testdata/yt_dlp_plugins/extractor/ignore.py
new file mode 100644
index 0000000..816a16a
--- /dev/null
+++ b/test/testdata/yt_dlp_plugins/extractor/ignore.py
@@ -0,0 +1,12 @@
+from yt_dlp.extractor.common import InfoExtractor
+
+
+class IgnoreNotInAllPluginIE(InfoExtractor):
+ pass
+
+
+class InAllPluginIE(InfoExtractor):
+ pass
+
+
+__all__ = ['InAllPluginIE']
diff --git a/test/testdata/yt_dlp_plugins/extractor/normal.py b/test/testdata/yt_dlp_plugins/extractor/normal.py
new file mode 100644
index 0000000..b09009b
--- /dev/null
+++ b/test/testdata/yt_dlp_plugins/extractor/normal.py
@@ -0,0 +1,9 @@
+from yt_dlp.extractor.common import InfoExtractor
+
+
+class NormalPluginIE(InfoExtractor):
+ pass
+
+
+class _IgnoreUnderscorePluginIE(InfoExtractor):
+ pass
diff --git a/test/testdata/yt_dlp_plugins/postprocessor/normal.py b/test/testdata/yt_dlp_plugins/postprocessor/normal.py
new file mode 100644
index 0000000..315b85a
--- /dev/null
+++ b/test/testdata/yt_dlp_plugins/postprocessor/normal.py
@@ -0,0 +1,5 @@
+from yt_dlp.postprocessor.common import PostProcessor
+
+
+class NormalPluginPP(PostProcessor):
+ pass
diff --git a/test/testdata/zipped_plugins/yt_dlp_plugins/extractor/zipped.py b/test/testdata/zipped_plugins/yt_dlp_plugins/extractor/zipped.py
new file mode 100644
index 0000000..01542e0
--- /dev/null
+++ b/test/testdata/zipped_plugins/yt_dlp_plugins/extractor/zipped.py
@@ -0,0 +1,5 @@
+from yt_dlp.extractor.common import InfoExtractor
+
+
+class ZippedPluginIE(InfoExtractor):
+ pass
diff --git a/test/testdata/zipped_plugins/yt_dlp_plugins/postprocessor/zipped.py b/test/testdata/zipped_plugins/yt_dlp_plugins/postprocessor/zipped.py
new file mode 100644
index 0000000..223822b
--- /dev/null
+++ b/test/testdata/zipped_plugins/yt_dlp_plugins/postprocessor/zipped.py
@@ -0,0 +1,5 @@
+from yt_dlp.postprocessor.common import PostProcessor
+
+
+class ZippedPluginPP(PostProcessor):
+ pass
diff --git a/yt-dlp.cmd b/yt-dlp.cmd
new file mode 100644
index 0000000..5537e0e
--- /dev/null
+++ b/yt-dlp.cmd
@@ -0,0 +1 @@
+@py -Werror -Xdev "%~dp0yt_dlp\__main__.py" %*
diff --git a/yt-dlp.sh b/yt-dlp.sh
new file mode 100755
index 0000000..ce74df8
--- /dev/null
+++ b/yt-dlp.sh
@@ -0,0 +1,2 @@
+#!/usr/bin/env sh
+exec "${PYTHON:-python3}" -Werror -Xdev "$(dirname "$(realpath "$0")")/yt_dlp/__main__.py" "$@"
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
new file mode 100644
index 0000000..c34d97b
--- /dev/null
+++ b/yt_dlp/YoutubeDL.py
@@ -0,0 +1,4339 @@
+import collections
+import contextlib
+import copy
+import datetime
+import errno
+import fileinput
+import http.cookiejar
+import io
+import itertools
+import json
+import locale
+import operator
+import os
+import random
+import re
+import shutil
+import string
+import subprocess
+import sys
+import tempfile
+import time
+import tokenize
+import traceback
+import unicodedata
+
+from .cache import Cache
+from .compat import functools, urllib # isort: split
+from .compat import compat_os_name, compat_shlex_quote, urllib_req_to_req
+from .cookies import LenientSimpleCookie, load_cookies
+from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
+from .downloader.rtmp import rtmpdump_version
+from .extractor import gen_extractor_classes, get_info_extractor
+from .extractor.common import UnsupportedURLIE
+from .extractor.openload import PhantomJSwrapper
+from .minicurses import format_text
+from .networking import HEADRequest, Request, RequestDirector
+from .networking.common import _REQUEST_HANDLERS, _RH_PREFERENCES
+from .networking.exceptions import (
+ HTTPError,
+ NoSupportingHandlers,
+ RequestError,
+ SSLError,
+ network_exceptions,
+)
+from .plugins import directories as plugin_directories
+from .postprocessor import _PLUGIN_CLASSES as plugin_pps
+from .postprocessor import (
+ EmbedThumbnailPP,
+ FFmpegFixupDuplicateMoovPP,
+ FFmpegFixupDurationPP,
+ FFmpegFixupM3u8PP,
+ FFmpegFixupM4aPP,
+ FFmpegFixupStretchedPP,
+ FFmpegFixupTimestampPP,
+ FFmpegMergerPP,
+ FFmpegPostProcessor,
+ FFmpegVideoConvertorPP,
+ MoveFilesAfterDownloadPP,
+ get_postprocessor,
+)
+from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping
+from .update import (
+ REPOSITORY,
+ _get_system_deprecation,
+ _make_label,
+ current_git_head,
+ detect_variant,
+)
+from .utils import (
+ DEFAULT_OUTTMPL,
+ IDENTITY,
+ LINK_TEMPLATES,
+ MEDIA_EXTENSIONS,
+ NO_DEFAULT,
+ NUMBER_RE,
+ OUTTMPL_TYPES,
+ POSTPROCESS_WHEN,
+ STR_FORMAT_RE_TMPL,
+ STR_FORMAT_TYPES,
+ ContentTooShortError,
+ DateRange,
+ DownloadCancelled,
+ DownloadError,
+ EntryNotInPlaylist,
+ ExistingVideoReached,
+ ExtractorError,
+ FormatSorter,
+ GeoRestrictedError,
+ ISO3166Utils,
+ LazyList,
+ MaxDownloadsReached,
+ Namespace,
+ PagedList,
+ PlaylistEntries,
+ Popen,
+ PostProcessingError,
+ ReExtractInfo,
+ RejectedVideoReached,
+ SameFileError,
+ UnavailableVideoError,
+ UserNotLive,
+ age_restricted,
+ args_to_str,
+ bug_reports_message,
+ date_from_str,
+ deprecation_warning,
+ determine_ext,
+ determine_protocol,
+ encode_compat_str,
+ encodeFilename,
+ error_to_compat_str,
+ escapeHTML,
+ expand_path,
+ extract_basic_auth,
+ filter_dict,
+ float_or_none,
+ format_bytes,
+ format_decimal_suffix,
+ format_field,
+ formatSeconds,
+ get_compatible_ext,
+ get_domain,
+ int_or_none,
+ iri_to_uri,
+ is_path_like,
+ join_nonempty,
+ locked_file,
+ make_archive_id,
+ make_dir,
+ number_of_digits,
+ orderedSet,
+ orderedSet_from_options,
+ parse_filesize,
+ preferredencoding,
+ prepend_extension,
+ remove_terminal_sequences,
+ render_table,
+ replace_extension,
+ sanitize_filename,
+ sanitize_path,
+ sanitize_url,
+ str_or_none,
+ strftime_or_none,
+ subtitles_filename,
+ supports_terminal_sequences,
+ system_identifier,
+ timetuple_from_msec,
+ to_high_limit_path,
+ traverse_obj,
+ try_call,
+ try_get,
+ url_basename,
+ variadic,
+ version_tuple,
+ windows_enable_vt_mode,
+ write_json_file,
+ write_string,
+)
+from .utils._utils import _YDLLogger
+from .utils.networking import (
+ HTTPHeaderDict,
+ clean_headers,
+ clean_proxies,
+ std_headers,
+)
+from .version import CHANNEL, ORIGIN, RELEASE_GIT_HEAD, VARIANT, __version__
+
+if compat_os_name == 'nt':
+ import ctypes
+
+
+class YoutubeDL:
+ """YoutubeDL class.
+
+ YoutubeDL objects are the ones responsible of downloading the
+ actual video file and writing it to disk if the user has requested
+ it, among some other tasks. In most cases there should be one per
+ program. As, given a video URL, the downloader doesn't know how to
+ extract all the needed information, task that InfoExtractors do, it
+ has to pass the URL to one of them.
+
+ For this, YoutubeDL objects have a method that allows
+ InfoExtractors to be registered in a given order. When it is passed
+ a URL, the YoutubeDL object handles it to the first InfoExtractor it
+ finds that reports being able to handle it. The InfoExtractor extracts
+ all the information about the video or videos the URL refers to, and
+ YoutubeDL process the extracted information, possibly using a File
+ Downloader to download the video.
+
+ YoutubeDL objects accept a lot of parameters. In order not to saturate
+ the object constructor with arguments, it receives a dictionary of
+ options instead. These options are available through the params
+ attribute for the InfoExtractors to use. The YoutubeDL also
+ registers itself as the downloader in charge for the InfoExtractors
+ that are added to it, so this is a "mutual registration".
+
+ Available options:
+
+ username: Username for authentication purposes.
+ password: Password for authentication purposes.
+ videopassword: Password for accessing a video.
+ ap_mso: Adobe Pass multiple-system operator identifier.
+ ap_username: Multiple-system operator account username.
+ ap_password: Multiple-system operator account password.
+ usenetrc: Use netrc for authentication instead.
+ netrc_location: Location of the netrc file. Defaults to ~/.netrc.
+ netrc_cmd: Use a shell command to get credentials
+ verbose: Print additional info to stdout.
+ quiet: Do not print messages to stdout.
+ no_warnings: Do not print out anything for warnings.
+ forceprint: A dict with keys WHEN mapped to a list of templates to
+ print to stdout. The allowed keys are video or any of the
+ items in utils.POSTPROCESS_WHEN.
+ For compatibility, a single list is also accepted
+ print_to_file: A dict with keys WHEN (same as forceprint) mapped to
+ a list of tuples with (template, filename)
+ forcejson: Force printing info_dict as JSON.
+ dump_single_json: Force printing the info_dict of the whole playlist
+ (or video) as a single JSON line.
+ force_write_download_archive: Force writing download archive regardless
+ of 'skip_download' or 'simulate'.
+ simulate: Do not download the video files. If unset (or None),
+ simulate only if listsubtitles, listformats or list_thumbnails is used
+ format: Video format code. see "FORMAT SELECTION" for more details.
+ You can also pass a function. The function takes 'ctx' as
+ argument and returns the formats to download.
+ See "build_format_selector" for an implementation
+ allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
+ ignore_no_formats_error: Ignore "No video formats" error. Usefull for
+ extracting metadata even if the video is not actually
+ available for download (experimental)
+ format_sort: A list of fields by which to sort the video formats.
+ See "Sorting Formats" for more details.
+ format_sort_force: Force the given format_sort. see "Sorting Formats"
+ for more details.
+ prefer_free_formats: Whether to prefer video formats with free containers
+ over non-free ones of same quality.
+ allow_multiple_video_streams: Allow multiple video streams to be merged
+ into a single file
+ allow_multiple_audio_streams: Allow multiple audio streams to be merged
+ into a single file
+ check_formats Whether to test if the formats are downloadable.
+ Can be True (check all), False (check none),
+ 'selected' (check selected formats),
+ or None (check only if requested by extractor)
+ paths: Dictionary of output paths. The allowed keys are 'home'
+ 'temp' and the keys of OUTTMPL_TYPES (in utils/_utils.py)
+ outtmpl: Dictionary of templates for output names. Allowed keys
+ are 'default' and the keys of OUTTMPL_TYPES (in utils/_utils.py).
+ For compatibility with youtube-dl, a single string can also be used
+ outtmpl_na_placeholder: Placeholder for unavailable meta fields.
+ restrictfilenames: Do not allow "&" and spaces in file names
+ trim_file_name: Limit length of filename (extension excluded)
+ windowsfilenames: Force the filenames to be windows compatible
+ ignoreerrors: Do not stop on download/postprocessing errors.
+ Can be 'only_download' to ignore only download errors.
+ Default is 'only_download' for CLI, but False for API
+ skip_playlist_after_errors: Number of allowed failures until the rest of
+ the playlist is skipped
+ allowed_extractors: List of regexes to match against extractor names that are allowed
+ overwrites: Overwrite all video and metadata files if True,
+ overwrite only non-video files if None
+ and don't overwrite any file if False
+ playlist_items: Specific indices of playlist to download.
+ playlistrandom: Download playlist items in random order.
+ lazy_playlist: Process playlist entries as they are received.
+ matchtitle: Download only matching titles.
+ rejecttitle: Reject downloads for matching titles.
+ logger: Log messages to a logging.Logger instance.
+ logtostderr: Print everything to stderr instead of stdout.
+ consoletitle: Display progress in console window's titlebar.
+ writedescription: Write the video description to a .description file
+ writeinfojson: Write the video description to a .info.json file
+ clean_infojson: Remove internal metadata from the infojson
+ getcomments: Extract video comments. This will not be written to disk
+ unless writeinfojson is also given
+ writeannotations: Write the video annotations to a .annotations.xml file
+ writethumbnail: Write the thumbnail image to a file
+ allow_playlist_files: Whether to write playlists' description, infojson etc
+ also to disk when using the 'write*' options
+ write_all_thumbnails: Write all thumbnail formats to files
+ writelink: Write an internet shortcut file, depending on the
+ current platform (.url/.webloc/.desktop)
+ writeurllink: Write a Windows internet shortcut file (.url)
+ writewebloclink: Write a macOS internet shortcut file (.webloc)
+ writedesktoplink: Write a Linux internet shortcut file (.desktop)
+ writesubtitles: Write the video subtitles to a file
+ writeautomaticsub: Write the automatically generated subtitles to a file
+ listsubtitles: Lists all available subtitles for the video
+ subtitlesformat: The format code for subtitles
+ subtitleslangs: List of languages of the subtitles to download (can be regex).
+ The list may contain "all" to refer to all the available
+ subtitles. The language can be prefixed with a "-" to
+ exclude it from the requested languages, e.g. ['all', '-live_chat']
+ keepvideo: Keep the video file after post-processing
+ daterange: A utils.DateRange object, download only if the upload_date is in the range.
+ skip_download: Skip the actual download of the video file
+ cachedir: Location of the cache files in the filesystem.
+ False to disable filesystem cache.
+ noplaylist: Download single video instead of a playlist if in doubt.
+ age_limit: An integer representing the user's age in years.
+ Unsuitable videos for the given age are skipped.
+ min_views: An integer representing the minimum view count the video
+ must have in order to not be skipped.
+ Videos without view count information are always
+ downloaded. None for no limit.
+ max_views: An integer representing the maximum view count.
+ Videos that are more popular than that are not
+ downloaded.
+ Videos without view count information are always
+ downloaded. None for no limit.
+ download_archive: A set, or the name of a file where all downloads are recorded.
+ Videos already present in the file are not downloaded again.
+ break_on_existing: Stop the download process after attempting to download a
+ file that is in the archive.
+ break_per_url: Whether break_on_reject and break_on_existing
+ should act on each input URL as opposed to for the entire queue
+ cookiefile: File name or text stream from where cookies should be read and dumped to
+ cookiesfrombrowser: A tuple containing the name of the browser, the profile
+ name/path from where cookies are loaded, the name of the keyring,
+ and the container name, e.g. ('chrome', ) or
+ ('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta')
+ legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
+ support RFC 5746 secure renegotiation
+ nocheckcertificate: Do not verify SSL certificates
+ client_certificate: Path to client certificate file in PEM format. May include the private key
+ client_certificate_key: Path to private key file for client certificate
+ client_certificate_password: Password for client certificate private key, if encrypted.
+ If not provided and the key is encrypted, yt-dlp will ask interactively
+ prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
+ (Only supported by some extractors)
+ enable_file_urls: Enable file:// URLs. This is disabled by default for security reasons.
+ http_headers: A dictionary of custom headers to be used for all requests
+ proxy: URL of the proxy server to use
+ geo_verification_proxy: URL of the proxy to use for IP address verification
+ on geo-restricted sites.
+ socket_timeout: Time to wait for unresponsive hosts, in seconds
+ bidi_workaround: Work around buggy terminals without bidirectional text
+ support, using fridibi
+ debug_printtraffic:Print out sent and received HTTP traffic
+ default_search: Prepend this string if an input url is not valid.
+ 'auto' for elaborate guessing
+ encoding: Use this encoding instead of the system-specified.
+ extract_flat: Whether to resolve and process url_results further
+ * False: Always process. Default for API
+ * True: Never process
+ * 'in_playlist': Do not process inside playlist/multi_video
+ * 'discard': Always process, but don't return the result
+ from inside playlist/multi_video
+ * 'discard_in_playlist': Same as "discard", but only for
+ playlists (not multi_video). Default for CLI
+ wait_for_video: If given, wait for scheduled streams to become available.
+ The value should be a tuple containing the range
+ (min_secs, max_secs) to wait between retries
+ postprocessors: A list of dictionaries, each with an entry
+ * key: The name of the postprocessor. See
+ yt_dlp/postprocessor/__init__.py for a list.
+ * when: When to run the postprocessor. Allowed values are
+ the entries of utils.POSTPROCESS_WHEN
+ Assumed to be 'post_process' if not given
+ progress_hooks: A list of functions that get called on download
+ progress, with a dictionary with the entries
+ * status: One of "downloading", "error", or "finished".
+ Check this first and ignore unknown values.
+ * info_dict: The extracted info_dict
+
+ If status is one of "downloading", or "finished", the
+ following properties may also be present:
+ * filename: The final filename (always present)
+ * tmpfilename: The filename we're currently writing to
+ * downloaded_bytes: Bytes on disk
+ * total_bytes: Size of the whole file, None if unknown
+ * total_bytes_estimate: Guess of the eventual file size,
+ None if unavailable.
+ * elapsed: The number of seconds since download started.
+ * eta: The estimated time in seconds, None if unknown
+ * speed: The download speed in bytes/second, None if
+ unknown
+ * fragment_index: The counter of the currently
+ downloaded video fragment.
+ * fragment_count: The number of fragments (= individual
+ files that will be merged)
+
+ Progress hooks are guaranteed to be called at least once
+ (with status "finished") if the download is successful.
+ postprocessor_hooks: A list of functions that get called on postprocessing
+ progress, with a dictionary with the entries
+ * status: One of "started", "processing", or "finished".
+ Check this first and ignore unknown values.
+ * postprocessor: Name of the postprocessor
+ * info_dict: The extracted info_dict
+
+ Progress hooks are guaranteed to be called at least twice
+ (with status "started" and "finished") if the processing is successful.
+ merge_output_format: "/" separated list of extensions to use when merging formats.
+ final_ext: Expected final extension; used to detect when the file was
+ already downloaded and converted
+ fixup: Automatically correct known faults of the file.
+ One of:
+ - "never": do nothing
+ - "warn": only emit a warning
+ - "detect_or_warn": check whether we can do anything
+ about it, warn otherwise (default)
+ source_address: Client-side IP address to bind to.
+ sleep_interval_requests: Number of seconds to sleep between requests
+ during extraction
+ sleep_interval: Number of seconds to sleep before each download when
+ used alone or a lower bound of a range for randomized
+ sleep before each download (minimum possible number
+ of seconds to sleep) when used along with
+ max_sleep_interval.
+ max_sleep_interval:Upper bound of a range for randomized sleep before each
+ download (maximum possible number of seconds to sleep).
+ Must only be used along with sleep_interval.
+ Actual sleep time will be a random float from range
+ [sleep_interval; max_sleep_interval].
+ sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
+ listformats: Print an overview of available video formats and exit.
+ list_thumbnails: Print a table of all thumbnails and exit.
+ match_filter: A function that gets called for every video with the signature
+ (info_dict, *, incomplete: bool) -> Optional[str]
+ For backward compatibility with youtube-dl, the signature
+ (info_dict) -> Optional[str] is also allowed.
+ - If it returns a message, the video is ignored.
+ - If it returns None, the video is downloaded.
+ - If it returns utils.NO_DEFAULT, the user is interactively
+ asked whether to download the video.
+ - Raise utils.DownloadCancelled(msg) to abort remaining
+ downloads when a video is rejected.
+ match_filter_func in utils/_utils.py is one example for this.
+ color: A Dictionary with output stream names as keys
+ and their respective color policy as values.
+ Can also just be a single color policy,
+ in which case it applies to all outputs.
+ Valid stream names are 'stdout' and 'stderr'.
+ Valid color policies are one of 'always', 'auto', 'no_color' or 'never'.
+ geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
+ HTTP header
+ geo_bypass_country:
+ Two-letter ISO 3166-2 country code that will be used for
+ explicit geographic restriction bypassing via faking
+ X-Forwarded-For HTTP header
+ geo_bypass_ip_block:
+ IP range in CIDR notation that will be used similarly to
+ geo_bypass_country
+ external_downloader: A dictionary of protocol keys and the executable of the
+ external downloader to use for it. The allowed protocols
+ are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
+ Set the value to 'native' to use the native downloader
+ compat_opts: Compatibility options. See "Differences in default behavior".
+ The following options do not work when used through the API:
+ filename, abort-on-error, multistreams, no-live-chat, format-sort
+ no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
+ Refer __init__.py for their implementation
+ progress_template: Dictionary of templates for progress outputs.
+ Allowed keys are 'download', 'postprocess',
+ 'download-title' (console title) and 'postprocess-title'.
+ The template is mapped on a dictionary with keys 'progress' and 'info'
+ retry_sleep_functions: Dictionary of functions that takes the number of attempts
+ as argument and returns the time to sleep in seconds.
+ Allowed keys are 'http', 'fragment', 'file_access'
+ download_ranges: A callback function that gets called for every video with
+ the signature (info_dict, ydl) -> Iterable[Section].
+ Only the returned sections will be downloaded.
+ Each Section is a dict with the following keys:
+ * start_time: Start time of the section in seconds
+ * end_time: End time of the section in seconds
+ * title: Section title (Optional)
+ * index: Section number (Optional)
+ force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts
+ noprogress: Do not print the progress bar
+ live_from_start: Whether to download livestreams videos from the start
+
+ The following parameters are not used by YoutubeDL itself, they are used by
+ the downloader (see yt_dlp/downloader/common.py):
+ nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
+ max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
+ continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
+ external_downloader_args, concurrent_fragment_downloads.
+
+ The following options are used by the post processors:
+ ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
+ to the binary or its containing directory.
+ postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
+ and a list of additional command-line arguments for the
+ postprocessor/executable. The dict can also have "PP+EXE" keys
+ which are used when the given exe is used by the given PP.
+ Use 'default' as the name for arguments to passed to all PP
+ For compatibility with youtube-dl, a single list of args
+ can also be used
+
+ The following options are used by the extractors:
+ extractor_retries: Number of times to retry for known errors (default: 3)
+ dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
+ hls_split_discontinuity: Split HLS playlists to different formats at
+ discontinuities such as ad breaks (default: False)
+ extractor_args: A dictionary of arguments to be passed to the extractors.
+ See "EXTRACTOR ARGUMENTS" for details.
+ E.g. {'youtube': {'skip': ['dash', 'hls']}}
+ mark_watched: Mark videos watched (even with --simulate). Only for YouTube
+
+ The following options are deprecated and may be removed in the future:
+
+ break_on_reject: Stop the download process when encountering a video that
+ has been filtered out.
+ - `raise DownloadCancelled(msg)` in match_filter instead
+ force_generic_extractor: Force downloader to use the generic extractor
+ - Use allowed_extractors = ['generic', 'default']
+ playliststart: - Use playlist_items
+ Playlist item to start at.
+ playlistend: - Use playlist_items
+ Playlist item to end at.
+ playlistreverse: - Use playlist_items
+ Download playlist items in reverse order.
+ forceurl: - Use forceprint
+ Force printing final URL.
+ forcetitle: - Use forceprint
+ Force printing title.
+ forceid: - Use forceprint
+ Force printing ID.
+ forcethumbnail: - Use forceprint
+ Force printing thumbnail URL.
+ forcedescription: - Use forceprint
+ Force printing description.
+ forcefilename: - Use forceprint
+ Force printing final filename.
+ forceduration: - Use forceprint
+ Force printing duration.
+ allsubtitles: - Use subtitleslangs = ['all']
+ Downloads all the subtitles of the video
+ (requires writesubtitles or writeautomaticsub)
+ include_ads: - Doesn't work
+ Download ads as well
+ call_home: - Not implemented
+ Boolean, true iff we are allowed to contact the
+ yt-dlp servers for debugging.
+ post_hooks: - Register a custom postprocessor
+ A list of functions that get called as the final step
+ for each video file, after all postprocessors have been
+ called. The filename will be passed as the only argument.
+ hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
+ Use the native HLS downloader instead of ffmpeg/avconv
+ if True, otherwise use ffmpeg/avconv if False, otherwise
+ use downloader suggested by extractor if None.
+ prefer_ffmpeg: - avconv support is deprecated
+ If False, use avconv instead of ffmpeg if both are available,
+ otherwise prefer ffmpeg.
+ youtube_include_dash_manifest: - Use extractor_args
+ If True (default), DASH manifests and related
+ data will be downloaded and processed by extractor.
+ You can reduce network I/O by disabling it if you don't
+ care about DASH. (only for youtube)
+ youtube_include_hls_manifest: - Use extractor_args
+ If True (default), HLS manifests and related
+ data will be downloaded and processed by extractor.
+ You can reduce network I/O by disabling it if you don't
+ care about HLS. (only for youtube)
+ no_color: Same as `color='no_color'`
+ no_overwrites: Same as `overwrites=False`
+ """
+
+ _NUMERIC_FIELDS = {
+ 'width', 'height', 'asr', 'audio_channels', 'fps',
+ 'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx',
+ 'timestamp', 'release_timestamp',
+ 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
+ 'average_rating', 'comment_count', 'age_limit',
+ 'start_time', 'end_time',
+ 'chapter_number', 'season_number', 'episode_number',
+ 'track_number', 'disc_number', 'release_year',
+ }
+
+ _format_fields = {
+ # NB: Keep in sync with the docstring of extractor/common.py
+ 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
+ 'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',
+ 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',
+ 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start', 'is_dash_periods', 'request_data',
+ 'preference', 'language', 'language_preference', 'quality', 'source_preference', 'cookies',
+ 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options',
+ 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
+ }
+ _deprecated_multivalue_fields = {
+ 'album_artist': 'album_artists',
+ 'artist': 'artists',
+ 'composer': 'composers',
+ 'creator': 'creators',
+ 'genre': 'genres',
+ }
+ _format_selection_exts = {
+ 'audio': set(MEDIA_EXTENSIONS.common_audio),
+ 'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )),
+ 'storyboards': set(MEDIA_EXTENSIONS.storyboards),
+ }
+
+ def __init__(self, params=None, auto_init=True):
+ """Create a FileDownloader object with the given options.
+ @param auto_init Whether to load the default extractors and print header (if verbose).
+ Set to 'no_verbose_header' to not print the header
+ """
+ if params is None:
+ params = {}
+ self.params = params
+ self._ies = {}
+ self._ies_instances = {}
+ self._pps = {k: [] for k in POSTPROCESS_WHEN}
+ self._printed_messages = set()
+ self._first_webpage_request = True
+ self._post_hooks = []
+ self._progress_hooks = []
+ self._postprocessor_hooks = []
+ self._download_retcode = 0
+ self._num_downloads = 0
+ self._num_videos = 0
+ self._playlist_level = 0
+ self._playlist_urls = set()
+ self.cache = Cache(self)
+ self.__header_cookies = []
+
+ stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout
+ self._out_files = Namespace(
+ out=stdout,
+ error=sys.stderr,
+ screen=sys.stderr if self.params.get('quiet') else stdout,
+ console=None if compat_os_name == 'nt' else next(
+ filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
+ )
+
+ try:
+ windows_enable_vt_mode()
+ except Exception as e:
+ self.write_debug(f'Failed to enable VT mode: {e}')
+
+ if self.params.get('no_color'):
+ if self.params.get('color') is not None:
+ self.params.setdefault('_warnings', []).append(
+ 'Overwriting params from "color" with "no_color"')
+ self.params['color'] = 'no_color'
+
+ term_allow_color = os.getenv('TERM', '').lower() != 'dumb'
+ no_color = bool(os.getenv('NO_COLOR'))
+
+ def process_color_policy(stream):
+ stream_name = {sys.stdout: 'stdout', sys.stderr: 'stderr'}[stream]
+ policy = traverse_obj(self.params, ('color', (stream_name, None), {str}), get_all=False)
+ if policy in ('auto', None):
+ if term_allow_color and supports_terminal_sequences(stream):
+ return 'no_color' if no_color else True
+ return False
+ assert policy in ('always', 'never', 'no_color'), policy
+ return {'always': True, 'never': False}.get(policy, policy)
+
+ self._allow_colors = Namespace(**{
+ name: process_color_policy(stream)
+ for name, stream in self._out_files.items_ if name != 'console'
+ })
+
+ system_deprecation = _get_system_deprecation()
+ if system_deprecation:
+ self.deprecated_feature(system_deprecation.replace('\n', '\n '))
+
+ if self.params.get('allow_unplayable_formats'):
+ self.report_warning(
+ f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
+ 'This is a developer option intended for debugging. \n'
+ ' If you experience any issues while using this option, '
+ f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
+
+ if self.params.get('bidi_workaround', False):
+ try:
+ import pty
+ master, slave = pty.openpty()
+ width = shutil.get_terminal_size().columns
+ width_args = [] if width is None else ['-w', str(width)]
+ sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
+ try:
+ self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
+ except OSError:
+ self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
+ self._output_channel = os.fdopen(master, 'rb')
+ except OSError as ose:
+ if ose.errno == errno.ENOENT:
+ self.report_warning(
+ 'Could not find fribidi executable, ignoring --bidi-workaround. '
+ 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
+ else:
+ raise
+
+ self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
+ self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers'))
+ self._load_cookies(self.params['http_headers'].get('Cookie')) # compat
+ self.params['http_headers'].pop('Cookie', None)
+
+ if auto_init and auto_init != 'no_verbose_header':
+ self.print_debug_header()
+
+ def check_deprecated(param, option, suggestion):
+ if self.params.get(param) is not None:
+ self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
+ return True
+ return False
+
+ if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
+ if self.params.get('geo_verification_proxy') is None:
+ self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
+
+ check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
+ check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
+ check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
+
+ for msg in self.params.get('_warnings', []):
+ self.report_warning(msg)
+ for msg in self.params.get('_deprecation_warnings', []):
+ self.deprecated_feature(msg)
+
+ if 'list-formats' in self.params['compat_opts']:
+ self.params['listformats_table'] = False
+
+ if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
+ # nooverwrites was unnecessarily changed to overwrites
+ # in 0c3d0f51778b153f65c21906031c2e091fcfb641
+ # This ensures compatibility with both keys
+ self.params['overwrites'] = not self.params['nooverwrites']
+ elif self.params.get('overwrites') is None:
+ self.params.pop('overwrites', None)
+ else:
+ self.params['nooverwrites'] = not self.params['overwrites']
+
+ if self.params.get('simulate') is None and any((
+ self.params.get('list_thumbnails'),
+ self.params.get('listformats'),
+ self.params.get('listsubtitles'),
+ )):
+ self.params['simulate'] = 'list_only'
+
+ self.params.setdefault('forceprint', {})
+ self.params.setdefault('print_to_file', {})
+
+ # Compatibility with older syntax
+ if not isinstance(params['forceprint'], dict):
+ self.params['forceprint'] = {'video': params['forceprint']}
+
+ if auto_init:
+ self.add_default_info_extractors()
+
+ if (sys.platform != 'win32'
+ and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
+ and not self.params.get('restrictfilenames', False)):
+ # Unicode filesystem API will throw errors (#1474, #13027)
+ self.report_warning(
+ 'Assuming --restrict-filenames since file system encoding '
+ 'cannot encode all characters. '
+ 'Set the LC_ALL environment variable to fix this.')
+ self.params['restrictfilenames'] = True
+
+ self._parse_outtmpl()
+
+ # Creating format selector here allows us to catch syntax errors before the extraction
+ self.format_selector = (
+ self.params.get('format') if self.params.get('format') in (None, '-')
+ else self.params['format'] if callable(self.params['format'])
+ else self.build_format_selector(self.params['format']))
+
+ hooks = {
+ 'post_hooks': self.add_post_hook,
+ 'progress_hooks': self.add_progress_hook,
+ 'postprocessor_hooks': self.add_postprocessor_hook,
+ }
+ for opt, fn in hooks.items():
+ for ph in self.params.get(opt, []):
+ fn(ph)
+
+ for pp_def_raw in self.params.get('postprocessors', []):
+ pp_def = dict(pp_def_raw)
+ when = pp_def.pop('when', 'post_process')
+ self.add_post_processor(
+ get_postprocessor(pp_def.pop('key'))(self, **pp_def),
+ when=when)
+
+ def preload_download_archive(fn):
+ """Preload the archive, if any is specified"""
+ archive = set()
+ if fn is None:
+ return archive
+ elif not is_path_like(fn):
+ return fn
+
+ self.write_debug(f'Loading archive file {fn!r}')
+ try:
+ with locked_file(fn, 'r', encoding='utf-8') as archive_file:
+ for line in archive_file:
+ archive.add(line.strip())
+ except OSError as ioe:
+ if ioe.errno != errno.ENOENT:
+ raise
+ return archive
+
+ self.archive = preload_download_archive(self.params.get('download_archive'))
+
+ def warn_if_short_id(self, argv):
+ # short YouTube ID starting with dash?
+ idxs = [
+ i for i, a in enumerate(argv)
+ if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
+ if idxs:
+ correct_argv = (
+ ['yt-dlp']
+ + [a for i, a in enumerate(argv) if i not in idxs]
+ + ['--'] + [argv[i] for i in idxs]
+ )
+ self.report_warning(
+ 'Long argument string detected. '
+ 'Use -- to separate parameters and URLs, like this:\n%s' %
+ args_to_str(correct_argv))
+
+ def add_info_extractor(self, ie):
+ """Add an InfoExtractor object to the end of the list."""
+ ie_key = ie.ie_key()
+ self._ies[ie_key] = ie
+ if not isinstance(ie, type):
+ self._ies_instances[ie_key] = ie
+ ie.set_downloader(self)
+
+ def get_info_extractor(self, ie_key):
+ """
+ Get an instance of an IE with name ie_key, it will try to get one from
+ the _ies list, if there's no instance it will create a new one and add
+ it to the extractor list.
+ """
+ ie = self._ies_instances.get(ie_key)
+ if ie is None:
+ ie = get_info_extractor(ie_key)()
+ self.add_info_extractor(ie)
+ return ie
+
+ def add_default_info_extractors(self):
+ """
+ Add the InfoExtractors returned by gen_extractors to the end of the list
+ """
+ all_ies = {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()}
+ all_ies['end'] = UnsupportedURLIE()
+ try:
+ ie_names = orderedSet_from_options(
+ self.params.get('allowed_extractors', ['default']), {
+ 'all': list(all_ies),
+ 'default': [name for name, ie in all_ies.items() if ie._ENABLED],
+ }, use_regex=True)
+ except re.error as e:
+ raise ValueError(f'Wrong regex for allowed_extractors: {e.pattern}')
+ for name in ie_names:
+ self.add_info_extractor(all_ies[name])
+ self.write_debug(f'Loaded {len(ie_names)} extractors')
+
+ def add_post_processor(self, pp, when='post_process'):
+ """Add a PostProcessor object to the end of the chain."""
+ assert when in POSTPROCESS_WHEN, f'Invalid when={when}'
+ self._pps[when].append(pp)
+ pp.set_downloader(self)
+
+ def add_post_hook(self, ph):
+ """Add the post hook"""
+ self._post_hooks.append(ph)
+
+ def add_progress_hook(self, ph):
+ """Add the download progress hook"""
+ self._progress_hooks.append(ph)
+
+ def add_postprocessor_hook(self, ph):
+ """Add the postprocessing progress hook"""
+ self._postprocessor_hooks.append(ph)
+ for pps in self._pps.values():
+ for pp in pps:
+ pp.add_progress_hook(ph)
+
+ def _bidi_workaround(self, message):
+ if not hasattr(self, '_output_channel'):
+ return message
+
+ assert hasattr(self, '_output_process')
+ assert isinstance(message, str)
+ line_count = message.count('\n') + 1
+ self._output_process.stdin.write((message + '\n').encode())
+ self._output_process.stdin.flush()
+ res = ''.join(self._output_channel.readline().decode()
+ for _ in range(line_count))
+ return res[:-len('\n')]
+
+ def _write_string(self, message, out=None, only_once=False):
+ if only_once:
+ if message in self._printed_messages:
+ return
+ self._printed_messages.add(message)
+ write_string(message, out=out, encoding=self.params.get('encoding'))
+
+ def to_stdout(self, message, skip_eol=False, quiet=None):
+ """Print message to stdout"""
+ if quiet is not None:
+ self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. '
+ 'Use "YoutubeDL.to_screen" instead')
+ if skip_eol is not False:
+ self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. '
+ 'Use "YoutubeDL.to_screen" instead')
+ self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)
+
+ def to_screen(self, message, skip_eol=False, quiet=None, only_once=False):
+ """Print message to screen if not in quiet mode"""
+ if self.params.get('logger'):
+ self.params['logger'].debug(message)
+ return
+ if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
+ return
+ self._write_string(
+ '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
+ self._out_files.screen, only_once=only_once)
+
+ def to_stderr(self, message, only_once=False):
+ """Print message to stderr"""
+ assert isinstance(message, str)
+ if self.params.get('logger'):
+ self.params['logger'].error(message)
+ else:
+ self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)
+
+ def _send_console_code(self, code):
+ if compat_os_name == 'nt' or not self._out_files.console:
+ return
+ self._write_string(code, self._out_files.console)
+
+ def to_console_title(self, message):
+ if not self.params.get('consoletitle', False):
+ return
+ message = remove_terminal_sequences(message)
+ if compat_os_name == 'nt':
+ if ctypes.windll.kernel32.GetConsoleWindow():
+ # c_wchar_p() might not be necessary if `message` is
+ # already of type unicode()
+ ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
+ else:
+ self._send_console_code(f'\033]0;{message}\007')
+
+ def save_console_title(self):
+ if not self.params.get('consoletitle') or self.params.get('simulate'):
+ return
+ self._send_console_code('\033[22;0t') # Save the title on stack
+
+ def restore_console_title(self):
+ if not self.params.get('consoletitle') or self.params.get('simulate'):
+ return
+ self._send_console_code('\033[23;0t') # Restore the title from stack
+
+ def __enter__(self):
+ self.save_console_title()
+ return self
+
+ def save_cookies(self):
+ if self.params.get('cookiefile') is not None:
+ self.cookiejar.save()
+
+ def __exit__(self, *args):
+ self.restore_console_title()
+ self.close()
+
+ def close(self):
+ self.save_cookies()
+ if '_request_director' in self.__dict__:
+ self._request_director.close()
+ del self._request_director
+
+ def trouble(self, message=None, tb=None, is_error=True):
+ """Determine action to take when a download problem appears.
+
+ Depending on if the downloader has been configured to ignore
+ download errors or not, this method may throw an exception or
+ not when errors are found, after printing the message.
+
+ @param tb If given, is additional traceback information
+ @param is_error Whether to raise error according to ignorerrors
+ """
+ if message is not None:
+ self.to_stderr(message)
+ if self.params.get('verbose'):
+ if tb is None:
+ if sys.exc_info()[0]: # if .trouble has been called from an except block
+ tb = ''
+ if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
+ tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
+ tb += encode_compat_str(traceback.format_exc())
+ else:
+ tb_data = traceback.format_list(traceback.extract_stack())
+ tb = ''.join(tb_data)
+ if tb:
+ self.to_stderr(tb)
+ if not is_error:
+ return
+ if not self.params.get('ignoreerrors'):
+ if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
+ exc_info = sys.exc_info()[1].exc_info
+ else:
+ exc_info = sys.exc_info()
+ raise DownloadError(message, exc_info)
+ self._download_retcode = 1
+
+ Styles = Namespace(
+ HEADERS='yellow',
+ EMPHASIS='light blue',
+ FILENAME='green',
+ ID='green',
+ DELIM='blue',
+ ERROR='red',
+ BAD_FORMAT='light red',
+ WARNING='yellow',
+ SUPPRESS='light black',
+ )
+
+ def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
+ text = str(text)
+ if test_encoding:
+ original_text = text
+ # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
+ encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
+ text = text.encode(encoding, 'ignore').decode(encoding)
+ if fallback is not None and text != original_text:
+ text = fallback
+ return format_text(text, f) if allow_colors is True else text if fallback is None else fallback
+
+ def _format_out(self, *args, **kwargs):
+ return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)
+
+ def _format_screen(self, *args, **kwargs):
+ return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs)
+
+ def _format_err(self, *args, **kwargs):
+ return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)
+
+ def report_warning(self, message, only_once=False):
+ '''
+ Print the message to stderr, it will be prefixed with 'WARNING:'
+ If stderr is a tty file the 'WARNING:' will be colored
+ '''
+ if self.params.get('logger') is not None:
+ self.params['logger'].warning(message)
+ else:
+ if self.params.get('no_warnings'):
+ return
+ self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
+
+ def deprecation_warning(self, message, *, stacklevel=0):
+ deprecation_warning(
+ message, stacklevel=stacklevel + 1, printer=self.report_error, is_error=False)
+
+ def deprecated_feature(self, message):
+ if self.params.get('logger') is not None:
+ self.params['logger'].warning(f'Deprecated Feature: {message}')
+ self.to_stderr(f'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True)
+
+ def report_error(self, message, *args, **kwargs):
+ '''
+ Do the same as trouble, but prefixes the message with 'ERROR:', colored
+ in red if stderr is a tty file.
+ '''
+ self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
+
+ def write_debug(self, message, only_once=False):
+ '''Log debug message or Print message to stderr'''
+ if not self.params.get('verbose', False):
+ return
+ message = f'[debug] {message}'
+ if self.params.get('logger'):
+ self.params['logger'].debug(message)
+ else:
+ self.to_stderr(message, only_once)
+
+ def report_file_already_downloaded(self, file_name):
+ """Report file has already been fully downloaded."""
+ try:
+ self.to_screen('[download] %s has already been downloaded' % file_name)
+ except UnicodeEncodeError:
+ self.to_screen('[download] The file has already been downloaded')
+
+ def report_file_delete(self, file_name):
+ """Report that existing file will be deleted."""
+ try:
+ self.to_screen('Deleting existing file %s' % file_name)
+ except UnicodeEncodeError:
+ self.to_screen('Deleting existing file')
+
+ def raise_no_formats(self, info, forced=False, *, msg=None):
+ has_drm = info.get('_has_drm')
+ ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
+ msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
+ if forced or not ignored:
+ raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
+ expected=has_drm or ignored or expected)
+ else:
+ self.report_warning(msg)
+
+ def parse_outtmpl(self):
+ self.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')
+ self._parse_outtmpl()
+ return self.params['outtmpl']
+
+ def _parse_outtmpl(self):
+ sanitize = IDENTITY
+ if self.params.get('restrictfilenames'): # Remove spaces in the default template
+ sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
+
+ outtmpl = self.params.setdefault('outtmpl', {})
+ if not isinstance(outtmpl, dict):
+ self.params['outtmpl'] = outtmpl = {'default': outtmpl}
+ outtmpl.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None})
+
+ def get_output_path(self, dir_type='', filename=None):
+ paths = self.params.get('paths', {})
+ assert isinstance(paths, dict), '"paths" parameter must be a dictionary'
+ path = os.path.join(
+ expand_path(paths.get('home', '').strip()),
+ expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
+ filename or '')
+ return sanitize_path(path, force=self.params.get('windowsfilenames'))
+
+ @staticmethod
+ def _outtmpl_expandpath(outtmpl):
+ # expand_path translates '%%' into '%' and '$$' into '$'
+ # correspondingly that is not what we want since we need to keep
+ # '%%' intact for template dict substitution step. Working around
+ # with boundary-alike separator hack.
+ sep = ''.join(random.choices(string.ascii_letters, k=32))
+ outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')
+
+ # outtmpl should be expand_path'ed before template dict substitution
+ # because meta fields may contain env variables we don't want to
+ # be expanded. E.g. for outtmpl "%(title)s.%(ext)s" and
+ # title "Hello $PATH", we don't want `$PATH` to be expanded.
+ return expand_path(outtmpl).replace(sep, '')
+
+ @staticmethod
+ def escape_outtmpl(outtmpl):
+ ''' Escape any remaining strings like %s, %abc% etc. '''
+ return re.sub(
+ STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
+ lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
+ outtmpl)
+
+ @classmethod
+ def validate_outtmpl(cls, outtmpl):
+ ''' @return None or Exception object '''
+ outtmpl = re.sub(
+ STR_FORMAT_RE_TMPL.format('[^)]*', '[ljhqBUDS]'),
+ lambda mobj: f'{mobj.group(0)[:-1]}s',
+ cls._outtmpl_expandpath(outtmpl))
+ try:
+ cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
+ return None
+ except ValueError as err:
+ return err
+
+ @staticmethod
+ def _copy_infodict(info_dict):
+ info_dict = dict(info_dict)
+ info_dict.pop('__postprocessors', None)
+ info_dict.pop('__pending_error', None)
+ return info_dict
+
+ def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
+ """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
+ @param sanitize Whether to sanitize the output as a filename.
+ For backward compatibility, a function can also be passed
+ """
+
+ info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
+
+ info_dict = self._copy_infodict(info_dict)
+ info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
+ formatSeconds(info_dict['duration'], '-' if sanitize else ':')
+ if info_dict.get('duration', None) is not None
+ else None)
+ info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
+ info_dict['video_autonumber'] = self._num_videos
+ if info_dict.get('resolution') is None:
+ info_dict['resolution'] = self.format_resolution(info_dict, default=None)
+
+ # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
+ # of %(field)s to %(field)0Nd for backward compatibility
+ field_size_compat_map = {
+ 'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),
+ 'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
+ 'autonumber': self.params.get('autonumber_size') or 5,
+ }
+
+ TMPL_DICT = {}
+ EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljhqBUDS]'))
+ MATH_FUNCTIONS = {
+ '+': float.__add__,
+ '-': float.__sub__,
+ '*': float.__mul__,
+ }
+ # Field is of the form key1.key2...
+ # where keys (except first) can be string, int, slice or "{field, ...}"
+ FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'}
+ FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % {
+ 'inner': FIELD_INNER_RE,
+ 'field': rf'\w*(?:\.{FIELD_INNER_RE})*'
+ }
+ MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
+ MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
+ INTERNAL_FORMAT_RE = re.compile(rf'''(?xs)
+ (?P<negate>-)?
+ (?P<fields>{FIELD_RE})
+ (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
+ (?:>(?P<strf_format>.+?))?
+ (?P<remaining>
+ (?P<alternate>(?<!\\),[^|&)]+)?
+ (?:&(?P<replacement>.*?))?
+ (?:\|(?P<default>.*?))?
+ )$''')
+
+ def _from_user_input(field):
+ if field == ':':
+ return ...
+ elif ':' in field:
+ return slice(*map(int_or_none, field.split(':')))
+ elif int_or_none(field) is not None:
+ return int(field)
+ return field
+
+ def _traverse_infodict(fields):
+ fields = [f for x in re.split(r'\.({.+?})\.?', fields)
+ for f in ([x] if x.startswith('{') else x.split('.'))]
+ for i in (0, -1):
+ if fields and not fields[i]:
+ fields.pop(i)
+
+ for i, f in enumerate(fields):
+ if not f.startswith('{'):
+ fields[i] = _from_user_input(f)
+ continue
+ assert f.endswith('}'), f'No closing brace for {f} in {fields}'
+ fields[i] = {k: list(map(_from_user_input, k.split('.'))) for k in f[1:-1].split(',')}
+
+ return traverse_obj(info_dict, fields, traverse_string=True)
+
+ def get_value(mdict):
+ # Object traversal
+ value = _traverse_infodict(mdict['fields'])
+ # Negative
+ if mdict['negate']:
+ value = float_or_none(value)
+ if value is not None:
+ value *= -1
+ # Do maths
+ offset_key = mdict['maths']
+ if offset_key:
+ value = float_or_none(value)
+ operator = None
+ while offset_key:
+ item = re.match(
+ MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
+ offset_key).group(0)
+ offset_key = offset_key[len(item):]
+ if operator is None:
+ operator = MATH_FUNCTIONS[item]
+ continue
+ item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
+ offset = float_or_none(item)
+ if offset is None:
+ offset = float_or_none(_traverse_infodict(item))
+ try:
+ value = operator(value, multiplier * offset)
+ except (TypeError, ZeroDivisionError):
+ return None
+ operator = None
+ # Datetime formatting
+ if mdict['strf_format']:
+ value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
+
+ # XXX: Workaround for https://github.com/yt-dlp/yt-dlp/issues/4485
+ if sanitize and value == '':
+ value = None
+ return value
+
+ na = self.params.get('outtmpl_na_placeholder', 'NA')
+
+ def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
+ return sanitize_filename(str(value), restricted=restricted, is_id=(
+ bool(re.search(r'(^|[_.])id(\.|$)', key))
+ if 'filename-sanitization' in self.params['compat_opts']
+ else NO_DEFAULT))
+
+ sanitizer = sanitize if callable(sanitize) else filename_sanitizer
+ sanitize = bool(sanitize)
+
+ def _dumpjson_default(obj):
+ if isinstance(obj, (set, LazyList)):
+ return list(obj)
+ return repr(obj)
+
+ class _ReplacementFormatter(string.Formatter):
+ def get_field(self, field_name, args, kwargs):
+ if field_name.isdigit():
+ return args[0], -1
+ raise ValueError('Unsupported field')
+
+ replacement_formatter = _ReplacementFormatter()
+
+ def create_key(outer_mobj):
+ if not outer_mobj.group('has_key'):
+ return outer_mobj.group(0)
+ key = outer_mobj.group('key')
+ mobj = re.match(INTERNAL_FORMAT_RE, key)
+ value, replacement, default, last_field = None, None, na, ''
+ while mobj:
+ mobj = mobj.groupdict()
+ default = mobj['default'] if mobj['default'] is not None else default
+ value = get_value(mobj)
+ last_field, replacement = mobj['fields'], mobj['replacement']
+ if value is None and mobj['alternate']:
+ mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
+ else:
+ break
+
+ if None not in (value, replacement):
+ try:
+ value = replacement_formatter.format(replacement, value)
+ except ValueError:
+ value, default = None, na
+
+ fmt = outer_mobj.group('format')
+ if fmt == 's' and last_field in field_size_compat_map.keys() and isinstance(value, int):
+ fmt = f'0{field_size_compat_map[last_field]:d}d'
+
+ flags = outer_mobj.group('conversion') or ''
+ str_fmt = f'{fmt[:-1]}s'
+ if value is None:
+ value, fmt = default, 's'
+ elif fmt[-1] == 'l': # list
+ delim = '\n' if '#' in flags else ', '
+ value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
+ elif fmt[-1] == 'j': # json
+ value, fmt = json.dumps(
+ value, default=_dumpjson_default,
+ indent=4 if '#' in flags else None, ensure_ascii='+' not in flags), str_fmt
+ elif fmt[-1] == 'h': # html
+ value, fmt = escapeHTML(str(value)), str_fmt
+ elif fmt[-1] == 'q': # quoted
+ value = map(str, variadic(value) if '#' in flags else [value])
+ value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
+ elif fmt[-1] == 'B': # bytes
+ value = f'%{str_fmt}'.encode() % str(value).encode()
+ value, fmt = value.decode('utf-8', 'ignore'), 's'
+ elif fmt[-1] == 'U': # unicode normalized
+ value, fmt = unicodedata.normalize(
+ # "+" = compatibility equivalence, "#" = NFD
+ 'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
+ value), str_fmt
+ elif fmt[-1] == 'D': # decimal suffix
+ num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
+ value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
+ factor=1024 if '#' in flags else 1000)
+ elif fmt[-1] == 'S': # filename sanitization
+ value, fmt = filename_sanitizer(last_field, value, restricted='#' in flags), str_fmt
+ elif fmt[-1] == 'c':
+ if value:
+ value = str(value)[0]
+ else:
+ fmt = str_fmt
+ elif fmt[-1] not in 'rsa': # numeric
+ value = float_or_none(value)
+ if value is None:
+ value, fmt = default, 's'
+
+ if sanitize:
+ # If value is an object, sanitize might convert it to a string
+ # So we convert it to repr first
+ if fmt[-1] == 'r':
+ value, fmt = repr(value), str_fmt
+ elif fmt[-1] == 'a':
+ value, fmt = ascii(value), str_fmt
+ if fmt[-1] in 'csra':
+ value = sanitizer(last_field, value)
+
+ key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
+ TMPL_DICT[key] = value
+ return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
+
+ return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
+
+ def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
+ outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
+ return self.escape_outtmpl(outtmpl) % info_dict
+
+ def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
+ assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
+ if outtmpl is None:
+ outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default'])
+ try:
+ outtmpl = self._outtmpl_expandpath(outtmpl)
+ filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
+ if not filename:
+ return None
+
+ if tmpl_type in ('', 'temp'):
+ final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
+ if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
+ filename = replace_extension(filename, ext, final_ext)
+ elif tmpl_type:
+ force_ext = OUTTMPL_TYPES[tmpl_type]
+ if force_ext:
+ filename = replace_extension(filename, force_ext, info_dict.get('ext'))
+
+ # https://github.com/blackjack4494/youtube-dlc/issues/85
+ trim_file_name = self.params.get('trim_file_name', False)
+ if trim_file_name:
+ no_ext, *ext = filename.rsplit('.', 2)
+ filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
+
+ return filename
+ except ValueError as err:
+ self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
+ return None
+
+ def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):
+ """Generate the output filename"""
+ if outtmpl:
+ assert not dir_type, 'outtmpl and dir_type are mutually exclusive'
+ dir_type = None
+ filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)
+ if not filename and dir_type not in ('', 'temp'):
+ return ''
+
+ if warn:
+ if not self.params.get('paths'):
+ pass
+ elif filename == '-':
+ self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
+ elif os.path.isabs(filename):
+ self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
+ if filename == '-' or not filename:
+ return filename
+
+ return self.get_output_path(dir_type, filename)
+
+ def _match_entry(self, info_dict, incomplete=False, silent=False):
+ """Returns None if the file should be downloaded"""
+ _type = 'video' if 'playlist-match-filter' in self.params['compat_opts'] else info_dict.get('_type', 'video')
+ assert incomplete or _type == 'video', 'Only video result can be considered complete'
+
+ video_title = info_dict.get('title', info_dict.get('id', 'entry'))
+
+ def check_filter():
+ if _type in ('playlist', 'multi_video'):
+ return
+ elif _type in ('url', 'url_transparent') and not try_call(
+ lambda: self.get_info_extractor(info_dict['ie_key']).is_single_video(info_dict['url'])):
+ return
+
+ if 'title' in info_dict:
+ # This can happen when we're just evaluating the playlist
+ title = info_dict['title']
+ matchtitle = self.params.get('matchtitle', False)
+ if matchtitle:
+ if not re.search(matchtitle, title, re.IGNORECASE):
+ return '"' + title + '" title did not match pattern "' + matchtitle + '"'
+ rejecttitle = self.params.get('rejecttitle', False)
+ if rejecttitle:
+ if re.search(rejecttitle, title, re.IGNORECASE):
+ return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
+
+ date = info_dict.get('upload_date')
+ if date is not None:
+ dateRange = self.params.get('daterange', DateRange())
+ if date not in dateRange:
+ return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
+ view_count = info_dict.get('view_count')
+ if view_count is not None:
+ min_views = self.params.get('min_views')
+ if min_views is not None and view_count < min_views:
+ return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
+ max_views = self.params.get('max_views')
+ if max_views is not None and view_count > max_views:
+ return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
+ if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
+ return 'Skipping "%s" because it is age restricted' % video_title
+
+ match_filter = self.params.get('match_filter')
+ if match_filter is None:
+ return None
+
+ cancelled = None
+ try:
+ try:
+ ret = match_filter(info_dict, incomplete=incomplete)
+ except TypeError:
+ # For backward compatibility
+ ret = None if incomplete else match_filter(info_dict)
+ except DownloadCancelled as err:
+ if err.msg is not NO_DEFAULT:
+ raise
+ ret, cancelled = err.msg, err
+
+ if ret is NO_DEFAULT:
+ while True:
+ filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)
+ reply = input(self._format_screen(
+ f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()
+ if reply in {'y', ''}:
+ return None
+ elif reply == 'n':
+ if cancelled:
+ raise type(cancelled)(f'Skipping {video_title}')
+ return f'Skipping {video_title}'
+ return ret
+
+ if self.in_download_archive(info_dict):
+ reason = ''.join((
+ format_field(info_dict, 'id', f'{self._format_screen("%s", self.Styles.ID)}: '),
+ format_field(info_dict, 'title', f'{self._format_screen("%s", self.Styles.EMPHASIS)} '),
+ 'has already been recorded in the archive'))
+ break_opt, break_err = 'break_on_existing', ExistingVideoReached
+ else:
+ try:
+ reason = check_filter()
+ except DownloadCancelled as e:
+ reason, break_opt, break_err = e.msg, 'match_filter', type(e)
+ else:
+ break_opt, break_err = 'break_on_reject', RejectedVideoReached
+ if reason is not None:
+ if not silent:
+ self.to_screen('[download] ' + reason)
+ if self.params.get(break_opt, False):
+ raise break_err()
+ return reason
+
+ @staticmethod
+ def add_extra_info(info_dict, extra_info):
+ '''Set the keys from extra_info in info dict if they are missing'''
+ for key, value in extra_info.items():
+ info_dict.setdefault(key, value)
+
+ def extract_info(self, url, download=True, ie_key=None, extra_info=None,
+ process=True, force_generic_extractor=False):
+ """
+ Extract and return the information dictionary of the URL
+
+ Arguments:
+ @param url URL to extract
+
+ Keyword arguments:
+ @param download Whether to download videos
+ @param process Whether to resolve all unresolved references (URLs, playlist items).
+ Must be True for download to work
+ @param ie_key Use only the extractor with this key
+
+ @param extra_info Dictionary containing the extra values to add to the info (For internal use only)
+ @force_generic_extractor Force using the generic extractor (Deprecated; use ie_key='Generic')
+ """
+
+ if extra_info is None:
+ extra_info = {}
+
+ if not ie_key and force_generic_extractor:
+ ie_key = 'Generic'
+
+ if ie_key:
+ ies = {ie_key: self._ies[ie_key]} if ie_key in self._ies else {}
+ else:
+ ies = self._ies
+
+ for key, ie in ies.items():
+ if not ie.suitable(url):
+ continue
+
+ if not ie.working():
+ self.report_warning('The program functionality for this site has been marked as broken, '
+ 'and will probably not work.')
+
+ temp_id = ie.get_temp_id(url)
+ if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}):
+ self.to_screen(f'[download] {self._format_screen(temp_id, self.Styles.ID)}: '
+ 'has already been recorded in the archive')
+ if self.params.get('break_on_existing', False):
+ raise ExistingVideoReached()
+ break
+ return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)
+ else:
+ extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default'])
+ self.report_error(f'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}',
+ tb=False if extractors_restricted else None)
+
+ def _handle_extraction_exceptions(func):
+ @functools.wraps(func)
+ def wrapper(self, *args, **kwargs):
+ while True:
+ try:
+ return func(self, *args, **kwargs)
+ except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
+ raise
+ except ReExtractInfo as e:
+ if e.expected:
+ self.to_screen(f'{e}; Re-extracting data')
+ else:
+ self.to_stderr('\r')
+ self.report_warning(f'{e}; Re-extracting data')
+ continue
+ except GeoRestrictedError as e:
+ msg = e.msg
+ if e.countries:
+ msg += '\nThis video is available in %s.' % ', '.join(
+ map(ISO3166Utils.short2full, e.countries))
+ msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
+ self.report_error(msg)
+ except ExtractorError as e: # An error we somewhat expected
+ self.report_error(str(e), e.format_traceback())
+ except Exception as e:
+ if self.params.get('ignoreerrors'):
+ self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
+ else:
+ raise
+ break
+ return wrapper
+
+ def _wait_for_video(self, ie_result={}):
+ if (not self.params.get('wait_for_video')
+ or ie_result.get('_type', 'video') != 'video'
+ or ie_result.get('formats') or ie_result.get('url')):
+ return
+
+ format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
+ last_msg = ''
+
+ def progress(msg):
+ nonlocal last_msg
+ full_msg = f'{msg}\n'
+ if not self.params.get('noprogress'):
+ full_msg = msg + ' ' * (len(last_msg) - len(msg)) + '\r'
+ elif last_msg:
+ return
+ self.to_screen(full_msg, skip_eol=True)
+ last_msg = msg
+
+ min_wait, max_wait = self.params.get('wait_for_video')
+ diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
+ if diff is None and ie_result.get('live_status') == 'is_upcoming':
+ diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
+ self.report_warning('Release time of video is not known')
+ elif ie_result and (diff or 0) <= 0:
+ self.report_warning('Video should already be available according to extracted info')
+ diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
+ self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
+
+ wait_till = time.time() + diff
+ try:
+ while True:
+ diff = wait_till - time.time()
+ if diff <= 0:
+ progress('')
+ raise ReExtractInfo('[wait] Wait period ended', expected=True)
+ progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
+ time.sleep(1)
+ except KeyboardInterrupt:
+ progress('')
+ raise ReExtractInfo('[wait] Interrupted by user', expected=True)
+ except BaseException as e:
+ if not isinstance(e, ReExtractInfo):
+ self.to_screen('')
+ raise
+
+ def _load_cookies(self, data, *, autoscope=True):
+ """Loads cookies from a `Cookie` header
+
+ This tries to work around the security vulnerability of passing cookies to every domain.
+ See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
+
+ @param data The Cookie header as string to load the cookies from
+ @param autoscope If `False`, scope cookies using Set-Cookie syntax and error for cookie without domains
+ If `True`, save cookies for later to be stored in the jar with a limited scope
+ If a URL, save cookies in the jar with the domain of the URL
+ """
+ for cookie in LenientSimpleCookie(data).values():
+ if autoscope and any(cookie.values()):
+ raise ValueError('Invalid syntax in Cookie Header')
+
+ domain = cookie.get('domain') or ''
+ expiry = cookie.get('expires')
+ if expiry == '': # 0 is valid
+ expiry = None
+ prepared_cookie = http.cookiejar.Cookie(
+ cookie.get('version') or 0, cookie.key, cookie.value, None, False,
+ domain, True, True, cookie.get('path') or '', bool(cookie.get('path')),
+ cookie.get('secure') or False, expiry, False, None, None, {})
+
+ if domain:
+ self.cookiejar.set_cookie(prepared_cookie)
+ elif autoscope is True:
+ self.deprecated_feature(
+ 'Passing cookies as a header is a potential security risk; '
+ 'they will be scoped to the domain of the downloaded urls. '
+ 'Please consider loading cookies from a file or browser instead.')
+ self.__header_cookies.append(prepared_cookie)
+ elif autoscope:
+ self.report_warning(
+ 'The extractor result contains an unscoped cookie as an HTTP header. '
+ f'If you are using yt-dlp with an input URL{bug_reports_message(before=",")}',
+ only_once=True)
+ self._apply_header_cookies(autoscope, [prepared_cookie])
+ else:
+ self.report_error('Unscoped cookies are not allowed; please specify some sort of scoping',
+ tb=False, is_error=False)
+
+ def _apply_header_cookies(self, url, cookies=None):
+ """Applies stray header cookies to the provided url
+
+ This loads header cookies and scopes them to the domain provided in `url`.
+ While this is not ideal, it helps reduce the risk of them being sent
+ to an unintended destination while mostly maintaining compatibility.
+ """
+ parsed = urllib.parse.urlparse(url)
+ if not parsed.hostname:
+ return
+
+ for cookie in map(copy.copy, cookies or self.__header_cookies):
+ cookie.domain = f'.{parsed.hostname}'
+ self.cookiejar.set_cookie(cookie)
+
+ @_handle_extraction_exceptions
+ def __extract_info(self, url, ie, download, extra_info, process):
+ self._apply_header_cookies(url)
+
+ try:
+ ie_result = ie.extract(url)
+ except UserNotLive as e:
+ if process:
+ if self.params.get('wait_for_video'):
+ self.report_warning(e)
+ self._wait_for_video()
+ raise
+ if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
+ self.report_warning(f'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')
+ return
+ if isinstance(ie_result, list):
+ # Backwards compatibility: old IE result format
+ ie_result = {
+ '_type': 'compat_list',
+ 'entries': ie_result,
+ }
+ if extra_info.get('original_url'):
+ ie_result.setdefault('original_url', extra_info['original_url'])
+ self.add_default_extra_info(ie_result, ie, url)
+ if process:
+ self._wait_for_video(ie_result)
+ return self.process_ie_result(ie_result, download, extra_info)
+ else:
+ return ie_result
+
+ def add_default_extra_info(self, ie_result, ie, url):
+ if url is not None:
+ self.add_extra_info(ie_result, {
+ 'webpage_url': url,
+ 'original_url': url,
+ })
+ webpage_url = ie_result.get('webpage_url')
+ if webpage_url:
+ self.add_extra_info(ie_result, {
+ 'webpage_url_basename': url_basename(webpage_url),
+ 'webpage_url_domain': get_domain(webpage_url),
+ })
+ if ie is not None:
+ self.add_extra_info(ie_result, {
+ 'extractor': ie.IE_NAME,
+ 'extractor_key': ie.ie_key(),
+ })
+
+ def process_ie_result(self, ie_result, download=True, extra_info=None):
+ """
+ Take the result of the ie(may be modified) and resolve all unresolved
+ references (URLs, playlist items).
+
+ It will also download the videos if 'download'.
+ Returns the resolved ie_result.
+ """
+ if extra_info is None:
+ extra_info = {}
+ result_type = ie_result.get('_type', 'video')
+
+ if result_type in ('url', 'url_transparent'):
+ ie_result['url'] = sanitize_url(
+ ie_result['url'], scheme='http' if self.params.get('prefer_insecure') else 'https')
+ if ie_result.get('original_url') and not extra_info.get('original_url'):
+ extra_info = {'original_url': ie_result['original_url'], **extra_info}
+
+ extract_flat = self.params.get('extract_flat', False)
+ if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
+ or extract_flat is True):
+ info_copy = ie_result.copy()
+ ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
+ if ie and not ie_result.get('id'):
+ info_copy['id'] = ie.get_temp_id(ie_result['url'])
+ self.add_default_extra_info(info_copy, ie, ie_result['url'])
+ self.add_extra_info(info_copy, extra_info)
+ info_copy, _ = self.pre_process(info_copy)
+ self._fill_common_fields(info_copy, False)
+ self.__forced_printings(info_copy)
+ self._raise_pending_errors(info_copy)
+ if self.params.get('force_write_download_archive', False):
+ self.record_download_archive(info_copy)
+ return ie_result
+
+ if result_type == 'video':
+ self.add_extra_info(ie_result, extra_info)
+ ie_result = self.process_video_result(ie_result, download=download)
+ self._raise_pending_errors(ie_result)
+ additional_urls = (ie_result or {}).get('additional_urls')
+ if additional_urls:
+ # TODO: Improve MetadataParserPP to allow setting a list
+ if isinstance(additional_urls, str):
+ additional_urls = [additional_urls]
+ self.to_screen(
+ '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
+ self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
+ ie_result['additional_entries'] = [
+ self.extract_info(
+ url, download, extra_info=extra_info,
+ force_generic_extractor=self.params.get('force_generic_extractor'))
+ for url in additional_urls
+ ]
+ return ie_result
+ elif result_type == 'url':
+ # We have to add extra_info to the results because it may be
+ # contained in a playlist
+ return self.extract_info(
+ ie_result['url'], download,
+ ie_key=ie_result.get('ie_key'),
+ extra_info=extra_info)
+ elif result_type == 'url_transparent':
+ # Use the information from the embedding page
+ info = self.extract_info(
+ ie_result['url'], ie_key=ie_result.get('ie_key'),
+ extra_info=extra_info, download=False, process=False)
+
+ # extract_info may return None when ignoreerrors is enabled and
+ # extraction failed with an error, don't crash and return early
+ # in this case
+ if not info:
+ return info
+
+ exempted_fields = {'_type', 'url', 'ie_key'}
+ if not ie_result.get('section_end') and ie_result.get('section_start') is None:
+ # For video clips, the id etc of the clip extractor should be used
+ exempted_fields |= {'id', 'extractor', 'extractor_key'}
+
+ new_result = info.copy()
+ new_result.update(filter_dict(ie_result, lambda k, v: v is not None and k not in exempted_fields))
+
+ # Extracted info may not be a video result (i.e.
+ # info.get('_type', 'video') != video) but rather an url or
+ # url_transparent. In such cases outer metadata (from ie_result)
+ # should be propagated to inner one (info). For this to happen
+ # _type of info should be overridden with url_transparent. This
+ # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
+ if new_result.get('_type') == 'url':
+ new_result['_type'] = 'url_transparent'
+
+ return self.process_ie_result(
+ new_result, download=download, extra_info=extra_info)
+ elif result_type in ('playlist', 'multi_video'):
+ # Protect from infinite recursion due to recursively nested playlists
+ # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
+ webpage_url = ie_result.get('webpage_url') # Playlists maynot have webpage_url
+ if webpage_url and webpage_url in self._playlist_urls:
+ self.to_screen(
+ '[download] Skipping already downloaded playlist: %s'
+ % ie_result.get('title') or ie_result.get('id'))
+ return
+
+ self._playlist_level += 1
+ self._playlist_urls.add(webpage_url)
+ self._fill_common_fields(ie_result, False)
+ self._sanitize_thumbnails(ie_result)
+ try:
+ return self.__process_playlist(ie_result, download)
+ finally:
+ self._playlist_level -= 1
+ if not self._playlist_level:
+ self._playlist_urls.clear()
+ elif result_type == 'compat_list':
+ self.report_warning(
+ 'Extractor %s returned a compat_list result. '
+ 'It needs to be updated.' % ie_result.get('extractor'))
+
+ def _fixup(r):
+ self.add_extra_info(r, {
+ 'extractor': ie_result['extractor'],
+ 'webpage_url': ie_result['webpage_url'],
+ 'webpage_url_basename': url_basename(ie_result['webpage_url']),
+ 'webpage_url_domain': get_domain(ie_result['webpage_url']),
+ 'extractor_key': ie_result['extractor_key'],
+ })
+ return r
+ ie_result['entries'] = [
+ self.process_ie_result(_fixup(r), download, extra_info)
+ for r in ie_result['entries']
+ ]
+ return ie_result
+ else:
+ raise Exception('Invalid result type: %s' % result_type)
+
+ def _ensure_dir_exists(self, path):
+ return make_dir(path, self.report_error)
+
+ @staticmethod
+ def _playlist_infodict(ie_result, strict=False, **kwargs):
+ info = {
+ 'playlist_count': ie_result.get('playlist_count'),
+ 'playlist': ie_result.get('title') or ie_result.get('id'),
+ 'playlist_id': ie_result.get('id'),
+ 'playlist_title': ie_result.get('title'),
+ 'playlist_uploader': ie_result.get('uploader'),
+ 'playlist_uploader_id': ie_result.get('uploader_id'),
+ **kwargs,
+ }
+ if strict:
+ return info
+ if ie_result.get('webpage_url'):
+ info.update({
+ 'webpage_url': ie_result['webpage_url'],
+ 'webpage_url_basename': url_basename(ie_result['webpage_url']),
+ 'webpage_url_domain': get_domain(ie_result['webpage_url']),
+ })
+ return {
+ **info,
+ 'playlist_index': 0,
+ '__last_playlist_index': max(ie_result.get('requested_entries') or (0, 0)),
+ 'extractor': ie_result['extractor'],
+ 'extractor_key': ie_result['extractor_key'],
+ }
+
+ def __process_playlist(self, ie_result, download):
+ """Process each entry in the playlist"""
+ assert ie_result['_type'] in ('playlist', 'multi_video')
+
+ common_info = self._playlist_infodict(ie_result, strict=True)
+ title = common_info.get('playlist') or '<Untitled>'
+ if self._match_entry(common_info, incomplete=True) is not None:
+ return
+ self.to_screen(f'[download] Downloading {ie_result["_type"]}: {title}')
+
+ all_entries = PlaylistEntries(self, ie_result)
+ entries = orderedSet(all_entries.get_requested_items(), lazy=True)
+
+ lazy = self.params.get('lazy_playlist')
+ if lazy:
+ resolved_entries, n_entries = [], 'N/A'
+ ie_result['requested_entries'], ie_result['entries'] = None, None
+ else:
+ entries = resolved_entries = list(entries)
+ n_entries = len(resolved_entries)
+ ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
+ if not ie_result.get('playlist_count'):
+ # Better to do this after potentially exhausting entries
+ ie_result['playlist_count'] = all_entries.get_full_count()
+
+ extra = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
+ ie_copy = collections.ChainMap(ie_result, extra)
+
+ _infojson_written = False
+ write_playlist_files = self.params.get('allow_playlist_files', True)
+ if write_playlist_files and self.params.get('list_thumbnails'):
+ self.list_thumbnails(ie_result)
+ if write_playlist_files and not self.params.get('simulate'):
+ _infojson_written = self._write_info_json(
+ 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
+ if _infojson_written is None:
+ return
+ if self._write_description('playlist', ie_result,
+ self.prepare_filename(ie_copy, 'pl_description')) is None:
+ return
+ # TODO: This should be passed to ThumbnailsConvertor if necessary
+ self._write_thumbnails('playlist', ie_result, self.prepare_filename(ie_copy, 'pl_thumbnail'))
+
+ if lazy:
+ if self.params.get('playlistreverse') or self.params.get('playlistrandom'):
+ self.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once=True)
+ elif self.params.get('playlistreverse'):
+ entries.reverse()
+ elif self.params.get('playlistrandom'):
+ random.shuffle(entries)
+
+ self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} items'
+ f'{format_field(ie_result, "playlist_count", " of %s")}')
+
+ keep_resolved_entries = self.params.get('extract_flat') != 'discard'
+ if self.params.get('extract_flat') == 'discard_in_playlist':
+ keep_resolved_entries = ie_result['_type'] != 'playlist'
+ if keep_resolved_entries:
+ self.write_debug('The information of all playlist entries will be held in memory')
+
+ failures = 0
+ max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
+ for i, (playlist_index, entry) in enumerate(entries):
+ if lazy:
+ resolved_entries.append((playlist_index, entry))
+ if not entry:
+ continue
+
+ entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
+ if not lazy and 'playlist-index' in self.params['compat_opts']:
+ playlist_index = ie_result['requested_entries'][i]
+
+ entry_copy = collections.ChainMap(entry, {
+ **common_info,
+ 'n_entries': int_or_none(n_entries),
+ 'playlist_index': playlist_index,
+ 'playlist_autonumber': i + 1,
+ })
+
+ if self._match_entry(entry_copy, incomplete=True) is not None:
+ # For compatabilty with youtube-dl. See https://github.com/yt-dlp/yt-dlp/issues/4369
+ resolved_entries[i] = (playlist_index, NO_DEFAULT)
+ continue
+
+ self.to_screen('[download] Downloading item %s of %s' % (
+ self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
+
+ entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({
+ 'playlist_index': playlist_index,
+ 'playlist_autonumber': i + 1,
+ }, extra))
+ if not entry_result:
+ failures += 1
+ if failures >= max_failures:
+ self.report_error(
+ f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
+ break
+ if keep_resolved_entries:
+ resolved_entries[i] = (playlist_index, entry_result)
+
+ # Update with processed data
+ ie_result['entries'] = [e for _, e in resolved_entries if e is not NO_DEFAULT]
+ ie_result['requested_entries'] = [i for i, e in resolved_entries if e is not NO_DEFAULT]
+ if ie_result['requested_entries'] == try_call(lambda: list(range(1, ie_result['playlist_count'] + 1))):
+ # Do not set for full playlist
+ ie_result.pop('requested_entries')
+
+ # Write the updated info to json
+ if _infojson_written is True and self._write_info_json(
+ 'updated playlist', ie_result,
+ self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
+ return
+
+ ie_result = self.run_all_pps('playlist', ie_result)
+ self.to_screen(f'[download] Finished downloading playlist: {title}')
+ return ie_result
+
+ @_handle_extraction_exceptions
+ def __process_iterable_entry(self, entry, download, extra_info):
+ return self.process_ie_result(
+ entry, download=download, extra_info=extra_info)
+
+ def _build_format_filter(self, filter_spec):
+ " Returns a function to filter the formats according to the filter_spec "
+
+ OPERATORS = {
+ '<': operator.lt,
+ '<=': operator.le,
+ '>': operator.gt,
+ '>=': operator.ge,
+ '=': operator.eq,
+ '!=': operator.ne,
+ }
+ operator_rex = re.compile(r'''(?x)\s*
+ (?P<key>[\w.-]+)\s*
+ (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
+ (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
+ ''' % '|'.join(map(re.escape, OPERATORS.keys())))
+ m = operator_rex.fullmatch(filter_spec)
+ if m:
+ try:
+ comparison_value = int(m.group('value'))
+ except ValueError:
+ comparison_value = parse_filesize(m.group('value'))
+ if comparison_value is None:
+ comparison_value = parse_filesize(m.group('value') + 'B')
+ if comparison_value is None:
+ raise ValueError(
+ 'Invalid value %r in format specification %r' % (
+ m.group('value'), filter_spec))
+ op = OPERATORS[m.group('op')]
+
+ if not m:
+ STR_OPERATORS = {
+ '=': operator.eq,
+ '^=': lambda attr, value: attr.startswith(value),
+ '$=': lambda attr, value: attr.endswith(value),
+ '*=': lambda attr, value: value in attr,
+ '~=': lambda attr, value: value.search(attr) is not None
+ }
+ str_operator_rex = re.compile(r'''(?x)\s*
+ (?P<key>[a-zA-Z0-9._-]+)\s*
+ (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
+ (?P<quote>["'])?
+ (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
+ (?(quote)(?P=quote))\s*
+ ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
+ m = str_operator_rex.fullmatch(filter_spec)
+ if m:
+ if m.group('op') == '~=':
+ comparison_value = re.compile(m.group('value'))
+ else:
+ comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
+ str_op = STR_OPERATORS[m.group('op')]
+ if m.group('negation'):
+ op = lambda attr, value: not str_op(attr, value)
+ else:
+ op = str_op
+
+ if not m:
+ raise SyntaxError('Invalid filter specification %r' % filter_spec)
+
+ def _filter(f):
+ actual_value = f.get(m.group('key'))
+ if actual_value is None:
+ return m.group('none_inclusive')
+ return op(actual_value, comparison_value)
+ return _filter
+
+ def _check_formats(self, formats):
+ for f in formats:
+ self.to_screen('[info] Testing format %s' % f['format_id'])
+ path = self.get_output_path('temp')
+ if not self._ensure_dir_exists(f'{path}/'):
+ continue
+ temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
+ temp_file.close()
+ try:
+ success, _ = self.dl(temp_file.name, f, test=True)
+ except (DownloadError, OSError, ValueError) + network_exceptions:
+ success = False
+ finally:
+ if os.path.exists(temp_file.name):
+ try:
+ os.remove(temp_file.name)
+ except OSError:
+ self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
+ if success:
+ yield f
+ else:
+ self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
+
+ def _default_format_spec(self, info_dict, download=True):
+
+ def can_merge():
+ merger = FFmpegMergerPP(self)
+ return merger.available and merger.can_merge()
+
+ prefer_best = (
+ not self.params.get('simulate')
+ and download
+ and (
+ not can_merge()
+ or info_dict.get('is_live') and not self.params.get('live_from_start')
+ or self.params['outtmpl']['default'] == '-'))
+ compat = (
+ prefer_best
+ or self.params.get('allow_multiple_audio_streams', False)
+ or 'format-spec' in self.params['compat_opts'])
+
+ return (
+ 'best/bestvideo+bestaudio' if prefer_best
+ else 'bestvideo*+bestaudio/best' if not compat
+ else 'bestvideo+bestaudio/best')
+
+ def build_format_selector(self, format_spec):
+ def syntax_error(note, start):
+ message = (
+ 'Invalid format specification: '
+ '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))
+ return SyntaxError(message)
+
+ PICKFIRST = 'PICKFIRST'
+ MERGE = 'MERGE'
+ SINGLE = 'SINGLE'
+ GROUP = 'GROUP'
+ FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
+
+ allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
+ 'video': self.params.get('allow_multiple_video_streams', False)}
+
+ def _parse_filter(tokens):
+ filter_parts = []
+ for type, string_, start, _, _ in tokens:
+ if type == tokenize.OP and string_ == ']':
+ return ''.join(filter_parts)
+ else:
+ filter_parts.append(string_)
+
+ def _remove_unused_ops(tokens):
+ # Remove operators that we don't use and join them with the surrounding strings.
+ # E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
+ ALLOWED_OPS = ('/', '+', ',', '(', ')')
+ last_string, last_start, last_end, last_line = None, None, None, None
+ for type, string_, start, end, line in tokens:
+ if type == tokenize.OP and string_ == '[':
+ if last_string:
+ yield tokenize.NAME, last_string, last_start, last_end, last_line
+ last_string = None
+ yield type, string_, start, end, line
+ # everything inside brackets will be handled by _parse_filter
+ for type, string_, start, end, line in tokens:
+ yield type, string_, start, end, line
+ if type == tokenize.OP and string_ == ']':
+ break
+ elif type == tokenize.OP and string_ in ALLOWED_OPS:
+ if last_string:
+ yield tokenize.NAME, last_string, last_start, last_end, last_line
+ last_string = None
+ yield type, string_, start, end, line
+ elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
+ if not last_string:
+ last_string = string_
+ last_start = start
+ last_end = end
+ else:
+ last_string += string_
+ if last_string:
+ yield tokenize.NAME, last_string, last_start, last_end, last_line
+
+ def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
+ selectors = []
+ current_selector = None
+ for type, string_, start, _, _ in tokens:
+ # ENCODING is only defined in Python 3.x
+ if type == getattr(tokenize, 'ENCODING', None):
+ continue
+ elif type in [tokenize.NAME, tokenize.NUMBER]:
+ current_selector = FormatSelector(SINGLE, string_, [])
+ elif type == tokenize.OP:
+ if string_ == ')':
+ if not inside_group:
+ # ')' will be handled by the parentheses group
+ tokens.restore_last_token()
+ break
+ elif inside_merge and string_ in ['/', ',']:
+ tokens.restore_last_token()
+ break
+ elif inside_choice and string_ == ',':
+ tokens.restore_last_token()
+ break
+ elif string_ == ',':
+ if not current_selector:
+ raise syntax_error('"," must follow a format selector', start)
+ selectors.append(current_selector)
+ current_selector = None
+ elif string_ == '/':
+ if not current_selector:
+ raise syntax_error('"/" must follow a format selector', start)
+ first_choice = current_selector
+ second_choice = _parse_format_selection(tokens, inside_choice=True)
+ current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
+ elif string_ == '[':
+ if not current_selector:
+ current_selector = FormatSelector(SINGLE, 'best', [])
+ format_filter = _parse_filter(tokens)
+ current_selector.filters.append(format_filter)
+ elif string_ == '(':
+ if current_selector:
+ raise syntax_error('Unexpected "("', start)
+ group = _parse_format_selection(tokens, inside_group=True)
+ current_selector = FormatSelector(GROUP, group, [])
+ elif string_ == '+':
+ if not current_selector:
+ raise syntax_error('Unexpected "+"', start)
+ selector_1 = current_selector
+ selector_2 = _parse_format_selection(tokens, inside_merge=True)
+ if not selector_2:
+ raise syntax_error('Expected a selector', start)
+ current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
+ else:
+ raise syntax_error(f'Operator not recognized: "{string_}"', start)
+ elif type == tokenize.ENDMARKER:
+ break
+ if current_selector:
+ selectors.append(current_selector)
+ return selectors
+
+ def _merge(formats_pair):
+ format_1, format_2 = formats_pair
+
+ formats_info = []
+ formats_info.extend(format_1.get('requested_formats', (format_1,)))
+ formats_info.extend(format_2.get('requested_formats', (format_2,)))
+
+ if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
+ get_no_more = {'video': False, 'audio': False}
+ for (i, fmt_info) in enumerate(formats_info):
+ if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
+ formats_info.pop(i)
+ continue
+ for aud_vid in ['audio', 'video']:
+ if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
+ if get_no_more[aud_vid]:
+ formats_info.pop(i)
+ break
+ get_no_more[aud_vid] = True
+
+ if len(formats_info) == 1:
+ return formats_info[0]
+
+ video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
+ audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
+
+ the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
+ the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
+
+ output_ext = get_compatible_ext(
+ vcodecs=[f.get('vcodec') for f in video_fmts],
+ acodecs=[f.get('acodec') for f in audio_fmts],
+ vexts=[f['ext'] for f in video_fmts],
+ aexts=[f['ext'] for f in audio_fmts],
+ preferences=(try_call(lambda: self.params['merge_output_format'].split('/'))
+ or self.params.get('prefer_free_formats') and ('webm', 'mkv')))
+
+ filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
+
+ new_dict = {
+ 'requested_formats': formats_info,
+ 'format': '+'.join(filtered('format')),
+ 'format_id': '+'.join(filtered('format_id')),
+ 'ext': output_ext,
+ 'protocol': '+'.join(map(determine_protocol, formats_info)),
+ 'language': '+'.join(orderedSet(filtered('language'))) or None,
+ 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
+ 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
+ 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
+ }
+
+ if the_only_video:
+ new_dict.update({
+ 'width': the_only_video.get('width'),
+ 'height': the_only_video.get('height'),
+ 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
+ 'fps': the_only_video.get('fps'),
+ 'dynamic_range': the_only_video.get('dynamic_range'),
+ 'vcodec': the_only_video.get('vcodec'),
+ 'vbr': the_only_video.get('vbr'),
+ 'stretched_ratio': the_only_video.get('stretched_ratio'),
+ 'aspect_ratio': the_only_video.get('aspect_ratio'),
+ })
+
+ if the_only_audio:
+ new_dict.update({
+ 'acodec': the_only_audio.get('acodec'),
+ 'abr': the_only_audio.get('abr'),
+ 'asr': the_only_audio.get('asr'),
+ 'audio_channels': the_only_audio.get('audio_channels')
+ })
+
+ return new_dict
+
+ def _check_formats(formats):
+ if self.params.get('check_formats') == 'selected':
+ yield from self._check_formats(formats)
+ return
+ elif (self.params.get('check_formats') is not None
+ or self.params.get('allow_unplayable_formats')):
+ yield from formats
+ return
+
+ for f in formats:
+ if f.get('has_drm') or f.get('__needs_testing'):
+ yield from self._check_formats([f])
+ else:
+ yield f
+
+ def _build_selector_function(selector):
+ if isinstance(selector, list): # ,
+ fs = [_build_selector_function(s) for s in selector]
+
+ def selector_function(ctx):
+ for f in fs:
+ yield from f(ctx)
+ return selector_function
+
+ elif selector.type == GROUP: # ()
+ selector_function = _build_selector_function(selector.selector)
+
+ elif selector.type == PICKFIRST: # /
+ fs = [_build_selector_function(s) for s in selector.selector]
+
+ def selector_function(ctx):
+ for f in fs:
+ picked_formats = list(f(ctx))
+ if picked_formats:
+ return picked_formats
+ return []
+
+ elif selector.type == MERGE: # +
+ selector_1, selector_2 = map(_build_selector_function, selector.selector)
+
+ def selector_function(ctx):
+ for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
+ yield _merge(pair)
+
+ elif selector.type == SINGLE: # atom
+ format_spec = selector.selector or 'best'
+
+ # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
+ if format_spec == 'all':
+ def selector_function(ctx):
+ yield from _check_formats(ctx['formats'][::-1])
+ elif format_spec == 'mergeall':
+ def selector_function(ctx):
+ formats = list(_check_formats(
+ f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))
+ if not formats:
+ return
+ merged_format = formats[-1]
+ for f in formats[-2::-1]:
+ merged_format = _merge((merged_format, f))
+ yield merged_format
+
+ else:
+ format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1
+ mobj = re.match(
+ r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
+ format_spec)
+ if mobj is not None:
+ format_idx = int_or_none(mobj.group('n'), default=1)
+ format_reverse = mobj.group('bw')[0] == 'b'
+ format_type = (mobj.group('type') or [None])[0]
+ not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
+ format_modified = mobj.group('mod') is not None
+
+ format_fallback = not format_type and not format_modified # for b, w
+ _filter_f = (
+ (lambda f: f.get('%scodec' % format_type) != 'none')
+ if format_type and format_modified # bv*, ba*, wv*, wa*
+ else (lambda f: f.get('%scodec' % not_format_type) == 'none')
+ if format_type # bv, ba, wv, wa
+ else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
+ if not format_modified # b, w
+ else lambda f: True) # b*, w*
+ filter_f = lambda f: _filter_f(f) and (
+ f.get('vcodec') != 'none' or f.get('acodec') != 'none')
+ else:
+ if format_spec in self._format_selection_exts['audio']:
+ filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
+ elif format_spec in self._format_selection_exts['video']:
+ filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
+ seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'
+ elif format_spec in self._format_selection_exts['storyboards']:
+ filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
+ else:
+ filter_f = lambda f: f.get('format_id') == format_spec # id
+
+ def selector_function(ctx):
+ formats = list(ctx['formats'])
+ matches = list(filter(filter_f, formats)) if filter_f is not None else formats
+ if not matches:
+ if format_fallback and ctx['incomplete_formats']:
+ # for extractors with incomplete formats (audio only (soundcloud)
+ # or video only (imgur)) best/worst will fallback to
+ # best/worst {video,audio}-only format
+ matches = list(filter(lambda f: f.get('vcodec') != 'none' or f.get('acodec') != 'none', formats))
+ elif seperate_fallback and not ctx['has_merged_format']:
+ # for compatibility with youtube-dl when there is no pre-merged format
+ matches = list(filter(seperate_fallback, formats))
+ matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
+ try:
+ yield matches[format_idx - 1]
+ except LazyList.IndexError:
+ return
+
+ filters = [self._build_format_filter(f) for f in selector.filters]
+
+ def final_selector(ctx):
+ ctx_copy = dict(ctx)
+ for _filter in filters:
+ ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
+ return selector_function(ctx_copy)
+ return final_selector
+
+ # HACK: Python 3.12 changed the underlying parser, rendering '7_a' invalid
+ # Prefix numbers with random letters to avoid it being classified as a number
+ # See: https://github.com/yt-dlp/yt-dlp/pulls/8797
+ # TODO: Implement parser not reliant on tokenize.tokenize
+ prefix = ''.join(random.choices(string.ascii_letters, k=32))
+ stream = io.BytesIO(re.sub(r'\d[_\d]*', rf'{prefix}\g<0>', format_spec).encode())
+ try:
+ tokens = list(_remove_unused_ops(
+ token._replace(string=token.string.replace(prefix, ''))
+ for token in tokenize.tokenize(stream.readline)))
+ except tokenize.TokenError:
+ raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
+
+ class TokenIterator:
+ def __init__(self, tokens):
+ self.tokens = tokens
+ self.counter = 0
+
+ def __iter__(self):
+ return self
+
+ def __next__(self):
+ if self.counter >= len(self.tokens):
+ raise StopIteration()
+ value = self.tokens[self.counter]
+ self.counter += 1
+ return value
+
+ next = __next__
+
+ def restore_last_token(self):
+ self.counter -= 1
+
+ parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
+ return _build_selector_function(parsed_selector)
+
+ def _calc_headers(self, info_dict, load_cookies=False):
+ res = HTTPHeaderDict(self.params['http_headers'], info_dict.get('http_headers'))
+ clean_headers(res)
+
+ if load_cookies: # For --load-info-json
+ self._load_cookies(res.get('Cookie'), autoscope=info_dict['url']) # compat
+ self._load_cookies(info_dict.get('cookies'), autoscope=False)
+ # The `Cookie` header is removed to prevent leaks and unscoped cookies.
+ # See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
+ res.pop('Cookie', None)
+ cookies = self.cookiejar.get_cookies_for_url(info_dict['url'])
+ if cookies:
+ encoder = LenientSimpleCookie()
+ values = []
+ for cookie in cookies:
+ _, value = encoder.value_encode(cookie.value)
+ values.append(f'{cookie.name}={value}')
+ if cookie.domain:
+ values.append(f'Domain={cookie.domain}')
+ if cookie.path:
+ values.append(f'Path={cookie.path}')
+ if cookie.secure:
+ values.append('Secure')
+ if cookie.expires:
+ values.append(f'Expires={cookie.expires}')
+ if cookie.version:
+ values.append(f'Version={cookie.version}')
+ info_dict['cookies'] = '; '.join(values)
+
+ if 'X-Forwarded-For' not in res:
+ x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
+ if x_forwarded_for_ip:
+ res['X-Forwarded-For'] = x_forwarded_for_ip
+
+ return res
+
+ def _calc_cookies(self, url):
+ self.deprecation_warning('"YoutubeDL._calc_cookies" is deprecated and may be removed in a future version')
+ return self.cookiejar.get_cookie_header(url)
+
+ def _sort_thumbnails(self, thumbnails):
+ thumbnails.sort(key=lambda t: (
+ t.get('preference') if t.get('preference') is not None else -1,
+ t.get('width') if t.get('width') is not None else -1,
+ t.get('height') if t.get('height') is not None else -1,
+ t.get('id') if t.get('id') is not None else '',
+ t.get('url')))
+
+ def _sanitize_thumbnails(self, info_dict):
+ thumbnails = info_dict.get('thumbnails')
+ if thumbnails is None:
+ thumbnail = info_dict.get('thumbnail')
+ if thumbnail:
+ info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
+ if not thumbnails:
+ return
+
+ def check_thumbnails(thumbnails):
+ for t in thumbnails:
+ self.to_screen(f'[info] Testing thumbnail {t["id"]}')
+ try:
+ self.urlopen(HEADRequest(t['url']))
+ except network_exceptions as err:
+ self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
+ continue
+ yield t
+
+ self._sort_thumbnails(thumbnails)
+ for i, t in enumerate(thumbnails):
+ if t.get('id') is None:
+ t['id'] = '%d' % i
+ if t.get('width') and t.get('height'):
+ t['resolution'] = '%dx%d' % (t['width'], t['height'])
+ t['url'] = sanitize_url(t['url'])
+
+ if self.params.get('check_formats') is True:
+ info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
+ else:
+ info_dict['thumbnails'] = thumbnails
+
+ def _fill_common_fields(self, info_dict, final=True):
+ # TODO: move sanitization here
+ if final:
+ title = info_dict['fulltitle'] = info_dict.get('title')
+ if not title:
+ if title == '':
+ self.write_debug('Extractor gave empty title. Creating a generic title')
+ else:
+ self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
+ info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
+
+ if info_dict.get('duration') is not None:
+ info_dict['duration_string'] = formatSeconds(info_dict['duration'])
+
+ for ts_key, date_key in (
+ ('timestamp', 'upload_date'),
+ ('release_timestamp', 'release_date'),
+ ('modified_timestamp', 'modified_date'),
+ ):
+ if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
+ # Working around out-of-range timestamp values (e.g. negative ones on Windows,
+ # see http://bugs.python.org/issue1646728)
+ with contextlib.suppress(ValueError, OverflowError, OSError):
+ upload_date = datetime.datetime.fromtimestamp(info_dict[ts_key], datetime.timezone.utc)
+ info_dict[date_key] = upload_date.strftime('%Y%m%d')
+
+ if not info_dict.get('release_year'):
+ info_dict['release_year'] = traverse_obj(info_dict, ('release_date', {lambda x: int(x[:4])}))
+
+ live_keys = ('is_live', 'was_live')
+ live_status = info_dict.get('live_status')
+ if live_status is None:
+ for key in live_keys:
+ if info_dict.get(key) is False:
+ continue
+ if info_dict.get(key):
+ live_status = key
+ break
+ if all(info_dict.get(key) is False for key in live_keys):
+ live_status = 'not_live'
+ if live_status:
+ info_dict['live_status'] = live_status
+ for key in live_keys:
+ if info_dict.get(key) is None:
+ info_dict[key] = (live_status == key)
+ if live_status == 'post_live':
+ info_dict['was_live'] = True
+
+ # Auto generate title fields corresponding to the *_number fields when missing
+ # in order to always have clean titles. This is very common for TV series.
+ for field in ('chapter', 'season', 'episode'):
+ if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
+ info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
+
+ for old_key, new_key in self._deprecated_multivalue_fields.items():
+ if new_key in info_dict and old_key in info_dict:
+ if '_version' not in info_dict: # HACK: Do not warn when using --load-info-json
+ self.deprecation_warning(f'Do not return {old_key!r} when {new_key!r} is present')
+ elif old_value := info_dict.get(old_key):
+ info_dict[new_key] = old_value.split(', ')
+ elif new_value := info_dict.get(new_key):
+ info_dict[old_key] = ', '.join(v.replace(',', '\N{FULLWIDTH COMMA}') for v in new_value)
+
+ def _raise_pending_errors(self, info):
+ err = info.pop('__pending_error', None)
+ if err:
+ self.report_error(err, tb=False)
+
+ def sort_formats(self, info_dict):
+ formats = self._get_formats(info_dict)
+ formats.sort(key=FormatSorter(
+ self, info_dict.get('_format_sort_fields') or []).calculate_preference)
+
+ def process_video_result(self, info_dict, download=True):
+ assert info_dict.get('_type', 'video') == 'video'
+ self._num_videos += 1
+
+ if 'id' not in info_dict:
+ raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
+ elif not info_dict.get('id'):
+ raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
+
+ def report_force_conversion(field, field_not, conversion):
+ self.report_warning(
+ '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
+ % (field, field_not, conversion))
+
+ def sanitize_string_field(info, string_field):
+ field = info.get(string_field)
+ if field is None or isinstance(field, str):
+ return
+ report_force_conversion(string_field, 'a string', 'string')
+ info[string_field] = str(field)
+
+ def sanitize_numeric_fields(info):
+ for numeric_field in self._NUMERIC_FIELDS:
+ field = info.get(numeric_field)
+ if field is None or isinstance(field, (int, float)):
+ continue
+ report_force_conversion(numeric_field, 'numeric', 'int')
+ info[numeric_field] = int_or_none(field)
+
+ sanitize_string_field(info_dict, 'id')
+ sanitize_numeric_fields(info_dict)
+ if info_dict.get('section_end') and info_dict.get('section_start') is not None:
+ info_dict['duration'] = round(info_dict['section_end'] - info_dict['section_start'], 3)
+ if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
+ self.report_warning('"duration" field is negative, there is an error in extractor')
+
+ chapters = info_dict.get('chapters') or []
+ if chapters and chapters[0].get('start_time'):
+ chapters.insert(0, {'start_time': 0})
+
+ dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')}
+ for idx, (prev, current, next_) in enumerate(zip(
+ (dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)), 1):
+ if current.get('start_time') is None:
+ current['start_time'] = prev.get('end_time')
+ if not current.get('end_time'):
+ current['end_time'] = next_.get('start_time')
+ if not current.get('title'):
+ current['title'] = f'<Untitled Chapter {idx}>'
+
+ if 'playlist' not in info_dict:
+ # It isn't part of a playlist
+ info_dict['playlist'] = None
+ info_dict['playlist_index'] = None
+
+ self._sanitize_thumbnails(info_dict)
+
+ thumbnail = info_dict.get('thumbnail')
+ thumbnails = info_dict.get('thumbnails')
+ if thumbnail:
+ info_dict['thumbnail'] = sanitize_url(thumbnail)
+ elif thumbnails:
+ info_dict['thumbnail'] = thumbnails[-1]['url']
+
+ if info_dict.get('display_id') is None and 'id' in info_dict:
+ info_dict['display_id'] = info_dict['id']
+
+ self._fill_common_fields(info_dict)
+
+ for cc_kind in ('subtitles', 'automatic_captions'):
+ cc = info_dict.get(cc_kind)
+ if cc:
+ for _, subtitle in cc.items():
+ for subtitle_format in subtitle:
+ if subtitle_format.get('url'):
+ subtitle_format['url'] = sanitize_url(subtitle_format['url'])
+ if subtitle_format.get('ext') is None:
+ subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
+
+ automatic_captions = info_dict.get('automatic_captions')
+ subtitles = info_dict.get('subtitles')
+
+ info_dict['requested_subtitles'] = self.process_subtitles(
+ info_dict['id'], subtitles, automatic_captions)
+
+ formats = self._get_formats(info_dict)
+
+ # Backward compatibility with InfoExtractor._sort_formats
+ field_preference = (formats or [{}])[0].pop('__sort_fields', None)
+ if field_preference:
+ info_dict['_format_sort_fields'] = field_preference
+
+ info_dict['_has_drm'] = any( # or None ensures --clean-infojson removes it
+ f.get('has_drm') and f['has_drm'] != 'maybe' for f in formats) or None
+ if not self.params.get('allow_unplayable_formats'):
+ formats = [f for f in formats if not f.get('has_drm') or f['has_drm'] == 'maybe']
+
+ if formats and all(f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
+ self.report_warning(
+ f'{"This video is DRM protected and " if info_dict["_has_drm"] else ""}'
+ 'only images are available for download. Use --list-formats to see them'.capitalize())
+
+ get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
+ if not get_from_start:
+ info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
+ if info_dict.get('is_live') and formats:
+ formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
+ if get_from_start and not formats:
+ self.raise_no_formats(info_dict, msg=(
+ '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
+ 'If you want to download from the current time, use --no-live-from-start'))
+
+ def is_wellformed(f):
+ url = f.get('url')
+ if not url:
+ self.report_warning(
+ '"url" field is missing or empty - skipping format, '
+ 'there is an error in extractor')
+ return False
+ if isinstance(url, bytes):
+ sanitize_string_field(f, 'url')
+ return True
+
+ # Filter out malformed formats for better extraction robustness
+ formats = list(filter(is_wellformed, formats or []))
+
+ if not formats:
+ self.raise_no_formats(info_dict)
+
+ for format in formats:
+ sanitize_string_field(format, 'format_id')
+ sanitize_numeric_fields(format)
+ format['url'] = sanitize_url(format['url'])
+ if format.get('ext') is None:
+ format['ext'] = determine_ext(format['url']).lower()
+ if format.get('protocol') is None:
+ format['protocol'] = determine_protocol(format)
+ if format.get('resolution') is None:
+ format['resolution'] = self.format_resolution(format, default=None)
+ if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
+ format['dynamic_range'] = 'SDR'
+ if format.get('aspect_ratio') is None:
+ format['aspect_ratio'] = try_call(lambda: round(format['width'] / format['height'], 2))
+ # For fragmented formats, "tbr" is often max bitrate and not average
+ if (('manifest-filesize-approx' in self.params['compat_opts'] or not format.get('manifest_url'))
+ and info_dict.get('duration') and format.get('tbr')
+ and not format.get('filesize') and not format.get('filesize_approx')):
+ format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))
+ format['http_headers'] = self._calc_headers(collections.ChainMap(format, info_dict), load_cookies=True)
+
+ # Safeguard against old/insecure infojson when using --load-info-json
+ if info_dict.get('http_headers'):
+ info_dict['http_headers'] = HTTPHeaderDict(info_dict['http_headers'])
+ info_dict['http_headers'].pop('Cookie', None)
+
+ # This is copied to http_headers by the above _calc_headers and can now be removed
+ if '__x_forwarded_for_ip' in info_dict:
+ del info_dict['__x_forwarded_for_ip']
+
+ self.sort_formats({
+ 'formats': formats,
+ '_format_sort_fields': info_dict.get('_format_sort_fields')
+ })
+
+ # Sanitize and group by format_id
+ formats_dict = {}
+ for i, format in enumerate(formats):
+ if not format.get('format_id'):
+ format['format_id'] = str(i)
+ else:
+ # Sanitize format_id from characters used in format selector expression
+ format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
+ formats_dict.setdefault(format['format_id'], []).append(format)
+
+ # Make sure all formats have unique format_id
+ common_exts = set(itertools.chain(*self._format_selection_exts.values()))
+ for format_id, ambiguous_formats in formats_dict.items():
+ ambigious_id = len(ambiguous_formats) > 1
+ for i, format in enumerate(ambiguous_formats):
+ if ambigious_id:
+ format['format_id'] = '%s-%d' % (format_id, i)
+ # Ensure there is no conflict between id and ext in format selection
+ # See https://github.com/yt-dlp/yt-dlp/issues/1282
+ if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
+ format['format_id'] = 'f%s' % format['format_id']
+
+ if format.get('format') is None:
+ format['format'] = '{id} - {res}{note}'.format(
+ id=format['format_id'],
+ res=self.format_resolution(format),
+ note=format_field(format, 'format_note', ' (%s)'),
+ )
+
+ if self.params.get('check_formats') is True:
+ formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
+
+ if not formats or formats[0] is not info_dict:
+ # only set the 'formats' fields if the original info_dict list them
+ # otherwise we end up with a circular reference, the first (and unique)
+ # element in the 'formats' field in info_dict is info_dict itself,
+ # which can't be exported to json
+ info_dict['formats'] = formats
+
+ info_dict, _ = self.pre_process(info_dict)
+
+ if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
+ return info_dict
+
+ self.post_extract(info_dict)
+ info_dict, _ = self.pre_process(info_dict, 'after_filter')
+
+ # The pre-processors may have modified the formats
+ formats = self._get_formats(info_dict)
+
+ list_only = self.params.get('simulate') == 'list_only'
+ interactive_format_selection = not list_only and self.format_selector == '-'
+ if self.params.get('list_thumbnails'):
+ self.list_thumbnails(info_dict)
+ if self.params.get('listsubtitles'):
+ if 'automatic_captions' in info_dict:
+ self.list_subtitles(
+ info_dict['id'], automatic_captions, 'automatic captions')
+ self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
+ if self.params.get('listformats') or interactive_format_selection:
+ self.list_formats(info_dict)
+ if list_only:
+ # Without this printing, -F --print-json will not work
+ self.__forced_printings(info_dict)
+ return info_dict
+
+ format_selector = self.format_selector
+ while True:
+ if interactive_format_selection:
+ req_format = input(self._format_screen('\nEnter format selector ', self.Styles.EMPHASIS)
+ + '(Press ENTER for default, or Ctrl+C to quit)'
+ + self._format_screen(': ', self.Styles.EMPHASIS))
+ try:
+ format_selector = self.build_format_selector(req_format) if req_format else None
+ except SyntaxError as err:
+ self.report_error(err, tb=False, is_error=False)
+ continue
+
+ if format_selector is None:
+ req_format = self._default_format_spec(info_dict, download=download)
+ self.write_debug(f'Default format spec: {req_format}')
+ format_selector = self.build_format_selector(req_format)
+
+ formats_to_download = list(format_selector({
+ 'formats': formats,
+ 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
+ 'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats) # No formats with video
+ or all(f.get('acodec') == 'none' for f in formats)), # OR, No formats with audio
+ }))
+ if interactive_format_selection and not formats_to_download:
+ self.report_error('Requested format is not available', tb=False, is_error=False)
+ continue
+ break
+
+ if not formats_to_download:
+ if not self.params.get('ignore_no_formats_error'):
+ raise ExtractorError(
+ 'Requested format is not available. Use --list-formats for a list of available formats',
+ expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
+ self.report_warning('Requested format is not available')
+ # Process what we can, even without any available formats.
+ formats_to_download = [{}]
+
+ requested_ranges = tuple(self.params.get('download_ranges', lambda *_: [{}])(info_dict, self))
+ best_format, downloaded_formats = formats_to_download[-1], []
+ if download:
+ if best_format and requested_ranges:
+ def to_screen(*msg):
+ self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
+
+ to_screen(f'Downloading {len(formats_to_download)} format(s):',
+ (f['format_id'] for f in formats_to_download))
+ if requested_ranges != ({}, ):
+ to_screen(f'Downloading {len(requested_ranges)} time ranges:',
+ (f'{c["start_time"]:.1f}-{c["end_time"]:.1f}' for c in requested_ranges))
+ max_downloads_reached = False
+
+ for fmt, chapter in itertools.product(formats_to_download, requested_ranges):
+ new_info = self._copy_infodict(info_dict)
+ new_info.update(fmt)
+ offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')
+ end_time = offset + min(chapter.get('end_time', duration), duration)
+ # duration may not be accurate. So allow deviations <1sec
+ if end_time == float('inf') or end_time > offset + duration + 1:
+ end_time = None
+ if chapter or offset:
+ new_info.update({
+ 'section_start': offset + chapter.get('start_time', 0),
+ 'section_end': end_time,
+ 'section_title': chapter.get('title'),
+ 'section_number': chapter.get('index'),
+ })
+ downloaded_formats.append(new_info)
+ try:
+ self.process_info(new_info)
+ except MaxDownloadsReached:
+ max_downloads_reached = True
+ self._raise_pending_errors(new_info)
+ # Remove copied info
+ for key, val in tuple(new_info.items()):
+ if info_dict.get(key) == val:
+ new_info.pop(key)
+ if max_downloads_reached:
+ break
+
+ write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats}
+ assert write_archive.issubset({True, False, 'ignore'})
+ if True in write_archive and False not in write_archive:
+ self.record_download_archive(info_dict)
+
+ info_dict['requested_downloads'] = downloaded_formats
+ info_dict = self.run_all_pps('after_video', info_dict)
+ if max_downloads_reached:
+ raise MaxDownloadsReached()
+
+ # We update the info dict with the selected best quality format (backwards compatibility)
+ info_dict.update(best_format)
+ return info_dict
+
+ def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
+ """Select the requested subtitles and their format"""
+ available_subs, normal_sub_langs = {}, []
+ if normal_subtitles and self.params.get('writesubtitles'):
+ available_subs.update(normal_subtitles)
+ normal_sub_langs = tuple(normal_subtitles.keys())
+ if automatic_captions and self.params.get('writeautomaticsub'):
+ for lang, cap_info in automatic_captions.items():
+ if lang not in available_subs:
+ available_subs[lang] = cap_info
+
+ if not available_subs or (
+ not self.params.get('writesubtitles')
+ and not self.params.get('writeautomaticsub')):
+ return None
+
+ all_sub_langs = tuple(available_subs.keys())
+ if self.params.get('allsubtitles', False):
+ requested_langs = all_sub_langs
+ elif self.params.get('subtitleslangs', False):
+ try:
+ requested_langs = orderedSet_from_options(
+ self.params.get('subtitleslangs'), {'all': all_sub_langs}, use_regex=True)
+ except re.error as e:
+ raise ValueError(f'Wrong regex for subtitlelangs: {e.pattern}')
+ else:
+ requested_langs = LazyList(itertools.chain(
+ ['en'] if 'en' in normal_sub_langs else [],
+ filter(lambda f: f.startswith('en'), normal_sub_langs),
+ ['en'] if 'en' in all_sub_langs else [],
+ filter(lambda f: f.startswith('en'), all_sub_langs),
+ normal_sub_langs, all_sub_langs,
+ ))[:1]
+ if requested_langs:
+ self.to_screen(f'[info] {video_id}: Downloading subtitles: {", ".join(requested_langs)}')
+
+ formats_query = self.params.get('subtitlesformat', 'best')
+ formats_preference = formats_query.split('/') if formats_query else []
+ subs = {}
+ for lang in requested_langs:
+ formats = available_subs.get(lang)
+ if formats is None:
+ self.report_warning(f'{lang} subtitles not available for {video_id}')
+ continue
+ for ext in formats_preference:
+ if ext == 'best':
+ f = formats[-1]
+ break
+ matches = list(filter(lambda f: f['ext'] == ext, formats))
+ if matches:
+ f = matches[-1]
+ break
+ else:
+ f = formats[-1]
+ self.report_warning(
+ 'No subtitle format found matching "%s" for language %s, '
+ 'using %s' % (formats_query, lang, f['ext']))
+ subs[lang] = f
+ return subs
+
+ def _forceprint(self, key, info_dict):
+ if info_dict is None:
+ return
+ info_copy = info_dict.copy()
+ info_copy.setdefault('filename', self.prepare_filename(info_dict))
+ if info_dict.get('requested_formats') is not None:
+ # For RTMP URLs, also include the playpath
+ info_copy['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
+ elif info_dict.get('url'):
+ info_copy['urls'] = info_dict['url'] + info_dict.get('play_path', '')
+ info_copy['formats_table'] = self.render_formats_table(info_dict)
+ info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
+ info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
+ info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
+
+ def format_tmpl(tmpl):
+ mobj = re.fullmatch(r'([\w.:,]|-\d|(?P<dict>{([\w.:,]|-\d)+}))+=?', tmpl)
+ if not mobj:
+ return tmpl
+
+ fmt = '%({})s'
+ if tmpl.startswith('{'):
+ tmpl, fmt = f'.{tmpl}', '%({})j'
+ if tmpl.endswith('='):
+ tmpl, fmt = tmpl[:-1], '{0} = %({0})#j'
+ return '\n'.join(map(fmt.format, [tmpl] if mobj.group('dict') else tmpl.split(',')))
+
+ for tmpl in self.params['forceprint'].get(key, []):
+ self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
+
+ for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
+ filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)
+ tmpl = format_tmpl(tmpl)
+ self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
+ if self._ensure_dir_exists(filename):
+ with open(filename, 'a', encoding='utf-8', newline='') as f:
+ f.write(self.evaluate_outtmpl(tmpl, info_copy) + os.linesep)
+
+ return info_copy
+
+ def __forced_printings(self, info_dict, filename=None, incomplete=True):
+ if (self.params.get('forcejson')
+ or self.params['forceprint'].get('video')
+ or self.params['print_to_file'].get('video')):
+ self.post_extract(info_dict)
+ if filename:
+ info_dict['filename'] = filename
+ info_copy = self._forceprint('video', info_dict)
+
+ def print_field(field, actual_field=None, optional=False):
+ if actual_field is None:
+ actual_field = field
+ if self.params.get(f'force{field}') and (
+ info_copy.get(field) is not None or (not optional and not incomplete)):
+ self.to_stdout(info_copy[actual_field])
+
+ print_field('title')
+ print_field('id')
+ print_field('url', 'urls')
+ print_field('thumbnail', optional=True)
+ print_field('description', optional=True)
+ print_field('filename')
+ if self.params.get('forceduration') and info_copy.get('duration') is not None:
+ self.to_stdout(formatSeconds(info_copy['duration']))
+ print_field('format')
+
+ if self.params.get('forcejson'):
+ self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
+
+ def dl(self, name, info, subtitle=False, test=False):
+ if not info.get('url'):
+ self.raise_no_formats(info, True)
+
+ if test:
+ verbose = self.params.get('verbose')
+ params = {
+ 'test': True,
+ 'quiet': self.params.get('quiet') or not verbose,
+ 'verbose': verbose,
+ 'noprogress': not verbose,
+ 'nopart': True,
+ 'skip_unavailable_fragments': False,
+ 'keep_fragments': False,
+ 'overwrites': True,
+ '_no_ytdl_file': True,
+ }
+ else:
+ params = self.params
+ fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
+ if not test:
+ for ph in self._progress_hooks:
+ fd.add_progress_hook(ph)
+ urls = '", "'.join(
+ (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
+ for f in info.get('requested_formats', []) or [info])
+ self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"')
+
+ # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
+ # But it may contain objects that are not deep-copyable
+ new_info = self._copy_infodict(info)
+ if new_info.get('http_headers') is None:
+ new_info['http_headers'] = self._calc_headers(new_info)
+ return fd.download(name, new_info, subtitle)
+
+ def existing_file(self, filepaths, *, default_overwrite=True):
+ existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
+ if existing_files and not self.params.get('overwrites', default_overwrite):
+ return existing_files[0]
+
+ for file in existing_files:
+ self.report_file_delete(file)
+ os.remove(file)
+ return None
+
+ def process_info(self, info_dict):
+ """Process a single resolved IE result. (Modifies it in-place)"""
+
+ assert info_dict.get('_type', 'video') == 'video'
+ original_infodict = info_dict
+
+ if 'format' not in info_dict and 'ext' in info_dict:
+ info_dict['format'] = info_dict['ext']
+
+ if self._match_entry(info_dict) is not None:
+ info_dict['__write_download_archive'] = 'ignore'
+ return
+
+ # Does nothing under normal operation - for backward compatibility of process_info
+ self.post_extract(info_dict)
+
+ def replace_info_dict(new_info):
+ nonlocal info_dict
+ if new_info == info_dict:
+ return
+ info_dict.clear()
+ info_dict.update(new_info)
+
+ new_info, _ = self.pre_process(info_dict, 'video')
+ replace_info_dict(new_info)
+ self._num_downloads += 1
+
+ # info_dict['_filename'] needs to be set for backward compatibility
+ info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
+ temp_filename = self.prepare_filename(info_dict, 'temp')
+ files_to_move = {}
+
+ # Forced printings
+ self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
+
+ def check_max_downloads():
+ if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):
+ raise MaxDownloadsReached()
+
+ if self.params.get('simulate'):
+ info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
+ check_max_downloads()
+ return
+
+ if full_filename is None:
+ return
+ if not self._ensure_dir_exists(encodeFilename(full_filename)):
+ return
+ if not self._ensure_dir_exists(encodeFilename(temp_filename)):
+ return
+
+ if self._write_description('video', info_dict,
+ self.prepare_filename(info_dict, 'description')) is None:
+ return
+
+ sub_files = self._write_subtitles(info_dict, temp_filename)
+ if sub_files is None:
+ return
+ files_to_move.update(dict(sub_files))
+
+ thumb_files = self._write_thumbnails(
+ 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
+ if thumb_files is None:
+ return
+ files_to_move.update(dict(thumb_files))
+
+ infofn = self.prepare_filename(info_dict, 'infojson')
+ _infojson_written = self._write_info_json('video', info_dict, infofn)
+ if _infojson_written:
+ info_dict['infojson_filename'] = infofn
+ # For backward compatibility, even though it was a private field
+ info_dict['__infojson_filename'] = infofn
+ elif _infojson_written is None:
+ return
+
+ # Note: Annotations are deprecated
+ annofn = None
+ if self.params.get('writeannotations', False):
+ annofn = self.prepare_filename(info_dict, 'annotation')
+ if annofn:
+ if not self._ensure_dir_exists(encodeFilename(annofn)):
+ return
+ if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
+ self.to_screen('[info] Video annotations are already present')
+ elif not info_dict.get('annotations'):
+ self.report_warning('There are no annotations to write.')
+ else:
+ try:
+ self.to_screen('[info] Writing video annotations to: ' + annofn)
+ with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
+ annofile.write(info_dict['annotations'])
+ except (KeyError, TypeError):
+ self.report_warning('There are no annotations to write.')
+ except OSError:
+ self.report_error('Cannot write annotations file: ' + annofn)
+ return
+
+ # Write internet shortcut files
+ def _write_link_file(link_type):
+ url = try_get(info_dict['webpage_url'], iri_to_uri)
+ if not url:
+ self.report_warning(
+ f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
+ return True
+ linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
+ if not self._ensure_dir_exists(encodeFilename(linkfn)):
+ return False
+ if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
+ self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
+ return True
+ try:
+ self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
+ with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
+ newline='\r\n' if link_type == 'url' else '\n') as linkfile:
+ template_vars = {'url': url}
+ if link_type == 'desktop':
+ template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
+ linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
+ except OSError:
+ self.report_error(f'Cannot write internet shortcut {linkfn}')
+ return False
+ return True
+
+ write_links = {
+ 'url': self.params.get('writeurllink'),
+ 'webloc': self.params.get('writewebloclink'),
+ 'desktop': self.params.get('writedesktoplink'),
+ }
+ if self.params.get('writelink'):
+ link_type = ('webloc' if sys.platform == 'darwin'
+ else 'desktop' if sys.platform.startswith('linux')
+ else 'url')
+ write_links[link_type] = True
+
+ if any(should_write and not _write_link_file(link_type)
+ for link_type, should_write in write_links.items()):
+ return
+
+ new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
+ replace_info_dict(new_info)
+
+ if self.params.get('skip_download'):
+ info_dict['filepath'] = temp_filename
+ info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
+ info_dict['__files_to_move'] = files_to_move
+ replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
+ info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
+ else:
+ # Download
+ info_dict.setdefault('__postprocessors', [])
+ try:
+
+ def existing_video_file(*filepaths):
+ ext = info_dict.get('ext')
+ converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
+ file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
+ default_overwrite=False)
+ if file:
+ info_dict['ext'] = os.path.splitext(file)[1][1:]
+ return file
+
+ fd, success = None, True
+ if info_dict.get('protocol') or info_dict.get('url'):
+ fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
+ if fd != FFmpegFD and 'no-direct-merge' not in self.params['compat_opts'] and (
+ info_dict.get('section_start') or info_dict.get('section_end')):
+ msg = ('This format cannot be partially downloaded' if FFmpegFD.available()
+ else 'You have requested downloading the video partially, but ffmpeg is not installed')
+ self.report_error(f'{msg}. Aborting')
+ return
+
+ if info_dict.get('requested_formats') is not None:
+ old_ext = info_dict['ext']
+ if self.params.get('merge_output_format') is None:
+ if (info_dict['ext'] == 'webm'
+ and info_dict.get('thumbnails')
+ # check with type instead of pp_key, __name__, or isinstance
+ # since we dont want any custom PPs to trigger this
+ and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])): # noqa: E721
+ info_dict['ext'] = 'mkv'
+ self.report_warning(
+ 'webm doesn\'t support embedding a thumbnail, mkv will be used')
+ new_ext = info_dict['ext']
+
+ def correct_ext(filename, ext=new_ext):
+ if filename == '-':
+ return filename
+ filename_real_ext = os.path.splitext(filename)[1][1:]
+ filename_wo_ext = (
+ os.path.splitext(filename)[0]
+ if filename_real_ext in (old_ext, new_ext)
+ else filename)
+ return f'{filename_wo_ext}.{ext}'
+
+ # Ensure filename always has a correct extension for successful merge
+ full_filename = correct_ext(full_filename)
+ temp_filename = correct_ext(temp_filename)
+ dl_filename = existing_video_file(full_filename, temp_filename)
+
+ info_dict['__real_download'] = False
+ # NOTE: Copy so that original format dicts are not modified
+ info_dict['requested_formats'] = list(map(dict, info_dict['requested_formats']))
+
+ merger = FFmpegMergerPP(self)
+ downloaded = []
+ if dl_filename is not None:
+ self.report_file_already_downloaded(dl_filename)
+ elif fd:
+ for f in info_dict['requested_formats'] if fd != FFmpegFD else []:
+ f['filepath'] = fname = prepend_extension(
+ correct_ext(temp_filename, info_dict['ext']),
+ 'f%s' % f['format_id'], info_dict['ext'])
+ downloaded.append(fname)
+ info_dict['url'] = '\n'.join(f['url'] for f in info_dict['requested_formats'])
+ success, real_download = self.dl(temp_filename, info_dict)
+ info_dict['__real_download'] = real_download
+ else:
+ if self.params.get('allow_unplayable_formats'):
+ self.report_warning(
+ 'You have requested merging of multiple formats '
+ 'while also allowing unplayable formats to be downloaded. '
+ 'The formats won\'t be merged to prevent data corruption.')
+ elif not merger.available:
+ msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
+ if not self.params.get('ignoreerrors'):
+ self.report_error(f'{msg}. Aborting due to --abort-on-error')
+ return
+ self.report_warning(f'{msg}. The formats won\'t be merged')
+
+ if temp_filename == '-':
+ reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
+ else 'but the formats are incompatible for simultaneous download' if merger.available
+ else 'but ffmpeg is not installed')
+ self.report_warning(
+ f'You have requested downloading multiple formats to stdout {reason}. '
+ 'The formats will be streamed one after the other')
+ fname = temp_filename
+ for f in info_dict['requested_formats']:
+ new_info = dict(info_dict)
+ del new_info['requested_formats']
+ new_info.update(f)
+ if temp_filename != '-':
+ fname = prepend_extension(
+ correct_ext(temp_filename, new_info['ext']),
+ 'f%s' % f['format_id'], new_info['ext'])
+ if not self._ensure_dir_exists(fname):
+ return
+ f['filepath'] = fname
+ downloaded.append(fname)
+ partial_success, real_download = self.dl(fname, new_info)
+ info_dict['__real_download'] = info_dict['__real_download'] or real_download
+ success = success and partial_success
+
+ if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
+ info_dict['__postprocessors'].append(merger)
+ info_dict['__files_to_merge'] = downloaded
+ # Even if there were no downloads, it is being merged only now
+ info_dict['__real_download'] = True
+ else:
+ for file in downloaded:
+ files_to_move[file] = None
+ else:
+ # Just a single file
+ dl_filename = existing_video_file(full_filename, temp_filename)
+ if dl_filename is None or dl_filename == temp_filename:
+ # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
+ # So we should try to resume the download
+ success, real_download = self.dl(temp_filename, info_dict)
+ info_dict['__real_download'] = real_download
+ else:
+ self.report_file_already_downloaded(dl_filename)
+
+ dl_filename = dl_filename or temp_filename
+ info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
+
+ except network_exceptions as err:
+ self.report_error('unable to download video data: %s' % error_to_compat_str(err))
+ return
+ except OSError as err:
+ raise UnavailableVideoError(err)
+ except (ContentTooShortError, ) as err:
+ self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')
+ return
+
+ self._raise_pending_errors(info_dict)
+ if success and full_filename != '-':
+
+ def fixup():
+ do_fixup = True
+ fixup_policy = self.params.get('fixup')
+ vid = info_dict['id']
+
+ if fixup_policy in ('ignore', 'never'):
+ return
+ elif fixup_policy == 'warn':
+ do_fixup = 'warn'
+ elif fixup_policy != 'force':
+ assert fixup_policy in ('detect_or_warn', None)
+ if not info_dict.get('__real_download'):
+ do_fixup = False
+
+ def ffmpeg_fixup(cndn, msg, cls):
+ if not (do_fixup and cndn):
+ return
+ elif do_fixup == 'warn':
+ self.report_warning(f'{vid}: {msg}')
+ return
+ pp = cls(self)
+ if pp.available:
+ info_dict['__postprocessors'].append(pp)
+ else:
+ self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
+
+ stretched_ratio = info_dict.get('stretched_ratio')
+ ffmpeg_fixup(stretched_ratio not in (1, None),
+ f'Non-uniform pixel ratio {stretched_ratio}',
+ FFmpegFixupStretchedPP)
+
+ downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
+ downloader = downloader.FD_NAME if downloader else None
+
+ ext = info_dict.get('ext')
+ postprocessed_by_ffmpeg = info_dict.get('requested_formats') or any((
+ isinstance(pp, FFmpegVideoConvertorPP)
+ and resolve_recode_mapping(ext, pp.mapping)[0] not in (ext, None)
+ ) for pp in self._pps['post_process'])
+
+ if not postprocessed_by_ffmpeg:
+ ffmpeg_fixup(fd != FFmpegFD and ext == 'm4a'
+ and info_dict.get('container') == 'm4a_dash',
+ 'writing DASH m4a. Only some players support this container',
+ FFmpegFixupM4aPP)
+ ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')
+ or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
+ 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
+ FFmpegFixupM3u8PP)
+ ffmpeg_fixup(downloader == 'dashsegments'
+ and (info_dict.get('is_live') or info_dict.get('is_dash_periods')),
+ 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
+
+ ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
+ ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP)
+
+ fixup()
+ try:
+ replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
+ except PostProcessingError as err:
+ self.report_error('Postprocessing: %s' % str(err))
+ return
+ try:
+ for ph in self._post_hooks:
+ ph(info_dict['filepath'])
+ except Exception as err:
+ self.report_error('post hooks: %s' % str(err))
+ return
+ info_dict['__write_download_archive'] = True
+
+ assert info_dict is original_infodict # Make sure the info_dict was modified in-place
+ if self.params.get('force_write_download_archive'):
+ info_dict['__write_download_archive'] = True
+ check_max_downloads()
+
+ def __download_wrapper(self, func):
+ @functools.wraps(func)
+ def wrapper(*args, **kwargs):
+ try:
+ res = func(*args, **kwargs)
+ except UnavailableVideoError as e:
+ self.report_error(e)
+ except DownloadCancelled as e:
+ self.to_screen(f'[info] {e}')
+ if not self.params.get('break_per_url'):
+ raise
+ self._num_downloads = 0
+ else:
+ if self.params.get('dump_single_json', False):
+ self.post_extract(res)
+ self.to_stdout(json.dumps(self.sanitize_info(res)))
+ return wrapper
+
+ def download(self, url_list):
+ """Download a given list of URLs."""
+ url_list = variadic(url_list) # Passing a single URL is a common mistake
+ outtmpl = self.params['outtmpl']['default']
+ if (len(url_list) > 1
+ and outtmpl != '-'
+ and '%' not in outtmpl
+ and self.params.get('max_downloads') != 1):
+ raise SameFileError(outtmpl)
+
+ for url in url_list:
+ self.__download_wrapper(self.extract_info)(
+ url, force_generic_extractor=self.params.get('force_generic_extractor', False))
+
+ return self._download_retcode
+
+ def download_with_info_file(self, info_filename):
+ with contextlib.closing(fileinput.FileInput(
+ [info_filename], mode='r',
+ openhook=fileinput.hook_encoded('utf-8'))) as f:
+ # FileInput doesn't have a read method, we can't call json.load
+ infos = [self.sanitize_info(info, self.params.get('clean_infojson', True))
+ for info in variadic(json.loads('\n'.join(f)))]
+ for info in infos:
+ try:
+ self.__download_wrapper(self.process_ie_result)(info, download=True)
+ except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
+ if not isinstance(e, EntryNotInPlaylist):
+ self.to_stderr('\r')
+ webpage_url = info.get('webpage_url')
+ if webpage_url is None:
+ raise
+ self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
+ self.download([webpage_url])
+ except ExtractorError as e:
+ self.report_error(e)
+ return self._download_retcode
+
+ @staticmethod
+ def sanitize_info(info_dict, remove_private_keys=False):
+ ''' Sanitize the infodict for converting to json '''
+ if info_dict is None:
+ return info_dict
+ info_dict.setdefault('epoch', int(time.time()))
+ info_dict.setdefault('_type', 'video')
+ info_dict.setdefault('_version', {
+ 'version': __version__,
+ 'current_git_head': current_git_head(),
+ 'release_git_head': RELEASE_GIT_HEAD,
+ 'repository': ORIGIN,
+ })
+
+ if remove_private_keys:
+ reject = lambda k, v: v is None or k.startswith('__') or k in {
+ 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
+ 'entries', 'filepath', '_filename', 'filename', 'infojson_filename', 'original_url',
+ 'playlist_autonumber',
+ }
+ else:
+ reject = lambda k, v: False
+
+ def filter_fn(obj):
+ if isinstance(obj, dict):
+ return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
+ elif isinstance(obj, (list, tuple, set, LazyList)):
+ return list(map(filter_fn, obj))
+ elif obj is None or isinstance(obj, (str, int, float, bool)):
+ return obj
+ else:
+ return repr(obj)
+
+ return filter_fn(info_dict)
+
+ @staticmethod
+ def filter_requested_info(info_dict, actually_filter=True):
+ ''' Alias of sanitize_info for backward compatibility '''
+ return YoutubeDL.sanitize_info(info_dict, actually_filter)
+
+ def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):
+ for filename in set(filter(None, files_to_delete)):
+ if msg:
+ self.to_screen(msg % filename)
+ try:
+ os.remove(filename)
+ except OSError:
+ self.report_warning(f'Unable to delete file {filename}')
+ if filename in info.get('__files_to_move', []): # NB: Delete even if None
+ del info['__files_to_move'][filename]
+
+ @staticmethod
+ def post_extract(info_dict):
+ def actual_post_extract(info_dict):
+ if info_dict.get('_type') in ('playlist', 'multi_video'):
+ for video_dict in info_dict.get('entries', {}):
+ actual_post_extract(video_dict or {})
+ return
+
+ post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
+ info_dict.update(post_extractor())
+
+ actual_post_extract(info_dict or {})
+
+ def run_pp(self, pp, infodict):
+ files_to_delete = []
+ if '__files_to_move' not in infodict:
+ infodict['__files_to_move'] = {}
+ try:
+ files_to_delete, infodict = pp.run(infodict)
+ except PostProcessingError as e:
+ # Must be True and not 'only_download'
+ if self.params.get('ignoreerrors') is True:
+ self.report_error(e)
+ return infodict
+ raise
+
+ if not files_to_delete:
+ return infodict
+ if self.params.get('keepvideo', False):
+ for f in files_to_delete:
+ infodict['__files_to_move'].setdefault(f, '')
+ else:
+ self._delete_downloaded_files(
+ *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
+ return infodict
+
+ def run_all_pps(self, key, info, *, additional_pps=None):
+ if key != 'video':
+ self._forceprint(key, info)
+ for pp in (additional_pps or []) + self._pps[key]:
+ info = self.run_pp(pp, info)
+ return info
+
+ def pre_process(self, ie_info, key='pre_process', files_to_move=None):
+ info = dict(ie_info)
+ info['__files_to_move'] = files_to_move or {}
+ try:
+ info = self.run_all_pps(key, info)
+ except PostProcessingError as err:
+ msg = f'Preprocessing: {err}'
+ info.setdefault('__pending_error', msg)
+ self.report_error(msg, is_error=False)
+ return info, info.pop('__files_to_move', None)
+
+ def post_process(self, filename, info, files_to_move=None):
+ """Run all the postprocessors on the given file."""
+ info['filepath'] = filename
+ info['__files_to_move'] = files_to_move or {}
+ info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
+ info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
+ del info['__files_to_move']
+ return self.run_all_pps('after_move', info)
+
+ def _make_archive_id(self, info_dict):
+ video_id = info_dict.get('id')
+ if not video_id:
+ return
+ # Future-proof against any change in case
+ # and backwards compatibility with prior versions
+ extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
+ if extractor is None:
+ url = str_or_none(info_dict.get('url'))
+ if not url:
+ return
+ # Try to find matching extractor for the URL and take its ie_key
+ for ie_key, ie in self._ies.items():
+ if ie.suitable(url):
+ extractor = ie_key
+ break
+ else:
+ return
+ return make_archive_id(extractor, video_id)
+
+ def in_download_archive(self, info_dict):
+ if not self.archive:
+ return False
+
+ vid_ids = [self._make_archive_id(info_dict)]
+ vid_ids.extend(info_dict.get('_old_archive_ids') or [])
+ return any(id_ in self.archive for id_ in vid_ids)
+
+ def record_download_archive(self, info_dict):
+ fn = self.params.get('download_archive')
+ if fn is None:
+ return
+ vid_id = self._make_archive_id(info_dict)
+ assert vid_id
+
+ self.write_debug(f'Adding to archive: {vid_id}')
+ if is_path_like(fn):
+ with locked_file(fn, 'a', encoding='utf-8') as archive_file:
+ archive_file.write(vid_id + '\n')
+ self.archive.add(vid_id)
+
+ @staticmethod
+ def format_resolution(format, default='unknown'):
+ if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
+ return 'audio only'
+ if format.get('resolution') is not None:
+ return format['resolution']
+ if format.get('width') and format.get('height'):
+ return '%dx%d' % (format['width'], format['height'])
+ elif format.get('height'):
+ return '%sp' % format['height']
+ elif format.get('width'):
+ return '%dx?' % format['width']
+ return default
+
+ def _list_format_headers(self, *headers):
+ if self.params.get('listformats_table', True) is not False:
+ return [self._format_out(header, self.Styles.HEADERS) for header in headers]
+ return headers
+
+ def _format_note(self, fdict):
+ res = ''
+ if fdict.get('ext') in ['f4f', 'f4m']:
+ res += '(unsupported)'
+ if fdict.get('language'):
+ if res:
+ res += ' '
+ res += '[%s]' % fdict['language']
+ if fdict.get('format_note') is not None:
+ if res:
+ res += ' '
+ res += fdict['format_note']
+ if fdict.get('tbr') is not None:
+ if res:
+ res += ', '
+ res += '%4dk' % fdict['tbr']
+ if fdict.get('container') is not None:
+ if res:
+ res += ', '
+ res += '%s container' % fdict['container']
+ if (fdict.get('vcodec') is not None
+ and fdict.get('vcodec') != 'none'):
+ if res:
+ res += ', '
+ res += fdict['vcodec']
+ if fdict.get('vbr') is not None:
+ res += '@'
+ elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
+ res += 'video@'
+ if fdict.get('vbr') is not None:
+ res += '%4dk' % fdict['vbr']
+ if fdict.get('fps') is not None:
+ if res:
+ res += ', '
+ res += '%sfps' % fdict['fps']
+ if fdict.get('acodec') is not None:
+ if res:
+ res += ', '
+ if fdict['acodec'] == 'none':
+ res += 'video only'
+ else:
+ res += '%-5s' % fdict['acodec']
+ elif fdict.get('abr') is not None:
+ if res:
+ res += ', '
+ res += 'audio'
+ if fdict.get('abr') is not None:
+ res += '@%3dk' % fdict['abr']
+ if fdict.get('asr') is not None:
+ res += ' (%5dHz)' % fdict['asr']
+ if fdict.get('filesize') is not None:
+ if res:
+ res += ', '
+ res += format_bytes(fdict['filesize'])
+ elif fdict.get('filesize_approx') is not None:
+ if res:
+ res += ', '
+ res += '~' + format_bytes(fdict['filesize_approx'])
+ return res
+
+ def _get_formats(self, info_dict):
+ if info_dict.get('formats') is None:
+ if info_dict.get('url') and info_dict.get('_type', 'video') == 'video':
+ return [info_dict]
+ return []
+ return info_dict['formats']
+
+ def render_formats_table(self, info_dict):
+ formats = self._get_formats(info_dict)
+ if not formats:
+ return
+ if not self.params.get('listformats_table', True) is not False:
+ table = [
+ [
+ format_field(f, 'format_id'),
+ format_field(f, 'ext'),
+ self.format_resolution(f),
+ self._format_note(f)
+ ] for f in formats if (f.get('preference') or 0) >= -1000]
+ return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
+
+ def simplified_codec(f, field):
+ assert field in ('acodec', 'vcodec')
+ codec = f.get(field)
+ if not codec:
+ return 'unknown'
+ elif codec != 'none':
+ return '.'.join(codec.split('.')[:4])
+
+ if field == 'vcodec' and f.get('acodec') == 'none':
+ return 'images'
+ elif field == 'acodec' and f.get('vcodec') == 'none':
+ return ''
+ return self._format_out('audio only' if field == 'vcodec' else 'video only',
+ self.Styles.SUPPRESS)
+
+ delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)
+ table = [
+ [
+ self._format_out(format_field(f, 'format_id'), self.Styles.ID),
+ format_field(f, 'ext'),
+ format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
+ format_field(f, 'fps', '\t%d', func=round),
+ format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
+ format_field(f, 'audio_channels', '\t%s'),
+ delim, (
+ format_field(f, 'filesize', ' \t%s', func=format_bytes)
+ or format_field(f, 'filesize_approx', '≈\t%s', func=format_bytes)
+ or format_field(try_call(lambda: format_bytes(int(info_dict['duration'] * f['tbr'] * (1024 / 8)))),
+ None, self._format_out('~\t%s', self.Styles.SUPPRESS))),
+ format_field(f, 'tbr', '\t%dk', func=round),
+ shorten_protocol_name(f.get('protocol', '')),
+ delim,
+ simplified_codec(f, 'vcodec'),
+ format_field(f, 'vbr', '\t%dk', func=round),
+ simplified_codec(f, 'acodec'),
+ format_field(f, 'abr', '\t%dk', func=round),
+ format_field(f, 'asr', '\t%s', func=format_decimal_suffix),
+ join_nonempty(format_field(f, 'language', '[%s]'), join_nonempty(
+ self._format_out('UNSUPPORTED', self.Styles.BAD_FORMAT) if f.get('ext') in ('f4f', 'f4m') else None,
+ (self._format_out('Maybe DRM', self.Styles.WARNING) if f.get('has_drm') == 'maybe'
+ else self._format_out('DRM', self.Styles.BAD_FORMAT) if f.get('has_drm') else None),
+ format_field(f, 'format_note'),
+ format_field(f, 'container', ignore=(None, f.get('ext'))),
+ delim=', '), delim=' '),
+ ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
+ header_line = self._list_format_headers(
+ 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim, '\tFILESIZE', '\tTBR', 'PROTO',
+ delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
+
+ return render_table(
+ header_line, table, hide_empty=True,
+ delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True))
+
+ def render_thumbnails_table(self, info_dict):
+ thumbnails = list(info_dict.get('thumbnails') or [])
+ if not thumbnails:
+ return None
+ return render_table(
+ self._list_format_headers('ID', 'Width', 'Height', 'URL'),
+ [[t.get('id'), t.get('width') or 'unknown', t.get('height') or 'unknown', t['url']] for t in thumbnails])
+
+ def render_subtitles_table(self, video_id, subtitles):
+ def _row(lang, formats):
+ exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
+ if len(set(names)) == 1:
+ names = [] if names[0] == 'unknown' else names[:1]
+ return [lang, ', '.join(names), ', '.join(exts)]
+
+ if not subtitles:
+ return None
+ return render_table(
+ self._list_format_headers('Language', 'Name', 'Formats'),
+ [_row(lang, formats) for lang, formats in subtitles.items()],
+ hide_empty=True)
+
+ def __list_table(self, video_id, name, func, *args):
+ table = func(*args)
+ if not table:
+ self.to_screen(f'{video_id} has no {name}')
+ return
+ self.to_screen(f'[info] Available {name} for {video_id}:')
+ self.to_stdout(table)
+
+ def list_formats(self, info_dict):
+ self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
+
+ def list_thumbnails(self, info_dict):
+ self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
+
+ def list_subtitles(self, video_id, subtitles, name='subtitles'):
+ self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
+
+ def print_debug_header(self):
+ if not self.params.get('verbose'):
+ return
+
+ from . import _IN_CLI # Must be delayed import
+
+ # These imports can be slow. So import them only as needed
+ from .extractor.extractors import _LAZY_LOADER
+ from .extractor.extractors import (
+ _PLUGIN_CLASSES as plugin_ies,
+ _PLUGIN_OVERRIDES as plugin_ie_overrides
+ )
+
+ def get_encoding(stream):
+ ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
+ additional_info = []
+ if os.environ.get('TERM', '').lower() == 'dumb':
+ additional_info.append('dumb')
+ if not supports_terminal_sequences(stream):
+ from .utils import WINDOWS_VT_MODE # Must be imported locally
+ additional_info.append('No VT' if WINDOWS_VT_MODE is False else 'No ANSI')
+ if additional_info:
+ ret = f'{ret} ({",".join(additional_info)})'
+ return ret
+
+ encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (
+ locale.getpreferredencoding(),
+ sys.getfilesystemencoding(),
+ self.get_encoding(),
+ ', '.join(
+ f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_
+ if stream is not None and key != 'console')
+ )
+
+ logger = self.params.get('logger')
+ if logger:
+ write_debug = lambda msg: logger.debug(f'[debug] {msg}')
+ write_debug(encoding_str)
+ else:
+ write_string(f'[debug] {encoding_str}\n', encoding=None)
+ write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
+
+ source = detect_variant()
+ if VARIANT not in (None, 'pip'):
+ source += '*'
+ klass = type(self)
+ write_debug(join_nonempty(
+ f'{REPOSITORY.rpartition("/")[2]} version',
+ _make_label(ORIGIN, CHANNEL.partition('@')[2] or __version__, __version__),
+ f'[{RELEASE_GIT_HEAD[:9]}]' if RELEASE_GIT_HEAD else '',
+ '' if source == 'unknown' else f'({source})',
+ '' if _IN_CLI else 'API' if klass == YoutubeDL else f'API:{self.__module__}.{klass.__qualname__}',
+ delim=' '))
+
+ if not _IN_CLI:
+ write_debug(f'params: {self.params}')
+
+ if not _LAZY_LOADER:
+ if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
+ write_debug('Lazy loading extractors is forcibly disabled')
+ else:
+ write_debug('Lazy loading extractors is disabled')
+ if self.params['compat_opts']:
+ write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))
+
+ if current_git_head():
+ write_debug(f'Git HEAD: {current_git_head()}')
+ write_debug(system_identifier())
+
+ exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
+ ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
+ if ffmpeg_features:
+ exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))
+
+ exe_versions['rtmpdump'] = rtmpdump_version()
+ exe_versions['phantomjs'] = PhantomJSwrapper._version()
+ exe_str = ', '.join(
+ f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
+ ) or 'none'
+ write_debug('exe versions: %s' % exe_str)
+
+ from .compat.compat_utils import get_package_info
+ from .dependencies import available_dependencies
+
+ write_debug('Optional libraries: %s' % (', '.join(sorted({
+ join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
+ })) or 'none'))
+
+ write_debug(f'Proxy map: {self.proxies}')
+ write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}')
+ for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():
+ display_list = ['%s%s' % (
+ klass.__name__, '' if klass.__name__ == name else f' as {name}')
+ for name, klass in plugins.items()]
+ if plugin_type == 'Extractor':
+ display_list.extend(f'{plugins[-1].IE_NAME.partition("+")[2]} ({parent.__name__})'
+ for parent, plugins in plugin_ie_overrides.items())
+ if not display_list:
+ continue
+ write_debug(f'{plugin_type} Plugins: {", ".join(sorted(display_list))}')
+
+ plugin_dirs = plugin_directories()
+ if plugin_dirs:
+ write_debug(f'Plugin directories: {plugin_dirs}')
+
+ # Not implemented
+ if False and self.params.get('call_home'):
+ ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()
+ write_debug('Public IP address: %s' % ipaddr)
+ latest_version = self.urlopen(
+ 'https://yt-dl.org/latest/version').read().decode()
+ if version_tuple(latest_version) > version_tuple(__version__):
+ self.report_warning(
+ 'You are using an outdated version (newest version: %s)! '
+ 'See https://yt-dl.org/update if you need help updating.' %
+ latest_version)
+
+ @functools.cached_property
+ def proxies(self):
+ """Global proxy configuration"""
+ opts_proxy = self.params.get('proxy')
+ if opts_proxy is not None:
+ if opts_proxy == '':
+ opts_proxy = '__noproxy__'
+ proxies = {'all': opts_proxy}
+ else:
+ proxies = urllib.request.getproxies()
+ # compat. Set HTTPS_PROXY to __noproxy__ to revert
+ if 'http' in proxies and 'https' not in proxies:
+ proxies['https'] = proxies['http']
+
+ return proxies
+
+ @functools.cached_property
+ def cookiejar(self):
+ """Global cookiejar instance"""
+ return load_cookies(
+ self.params.get('cookiefile'), self.params.get('cookiesfrombrowser'), self)
+
+ @property
+ def _opener(self):
+ """
+ Get a urllib OpenerDirector from the Urllib handler (deprecated).
+ """
+ self.deprecation_warning('YoutubeDL._opener is deprecated, use YoutubeDL.urlopen()')
+ handler = self._request_director.handlers['Urllib']
+ return handler._get_instance(cookiejar=self.cookiejar, proxies=self.proxies)
+
+ def urlopen(self, req):
+ """ Start an HTTP download """
+ if isinstance(req, str):
+ req = Request(req)
+ elif isinstance(req, urllib.request.Request):
+ self.deprecation_warning(
+ 'Passing a urllib.request.Request object to YoutubeDL.urlopen() is deprecated. '
+ 'Use yt_dlp.networking.common.Request instead.')
+ req = urllib_req_to_req(req)
+ assert isinstance(req, Request)
+
+ # compat: Assume user:pass url params are basic auth
+ url, basic_auth_header = extract_basic_auth(req.url)
+ if basic_auth_header:
+ req.headers['Authorization'] = basic_auth_header
+ req.url = sanitize_url(url)
+
+ clean_proxies(proxies=req.proxies, headers=req.headers)
+ clean_headers(req.headers)
+
+ try:
+ return self._request_director.send(req)
+ except NoSupportingHandlers as e:
+ for ue in e.unsupported_errors:
+ # FIXME: This depends on the order of errors.
+ if not (ue.handler and ue.msg):
+ continue
+ if ue.handler.RH_KEY == 'Urllib' and 'unsupported url scheme: "file"' in ue.msg.lower():
+ raise RequestError(
+ 'file:// URLs are disabled by default in yt-dlp for security reasons. '
+ 'Use --enable-file-urls to enable at your own risk.', cause=ue) from ue
+ if 'unsupported proxy type: "https"' in ue.msg.lower():
+ raise RequestError(
+ 'To use an HTTPS proxy for this request, one of the following dependencies needs to be installed: requests')
+
+ elif (
+ re.match(r'unsupported url scheme: "wss?"', ue.msg.lower())
+ and 'websockets' not in self._request_director.handlers
+ ):
+ raise RequestError(
+ 'This request requires WebSocket support. '
+ 'Ensure one of the following dependencies are installed: websockets',
+ cause=ue) from ue
+ raise
+ except SSLError as e:
+ if 'UNSAFE_LEGACY_RENEGOTIATION_DISABLED' in str(e):
+ raise RequestError('UNSAFE_LEGACY_RENEGOTIATION_DISABLED: Try using --legacy-server-connect', cause=e) from e
+ elif 'SSLV3_ALERT_HANDSHAKE_FAILURE' in str(e):
+ raise RequestError(
+ 'SSLV3_ALERT_HANDSHAKE_FAILURE: The server may not support the current cipher list. '
+ 'Try using --legacy-server-connect', cause=e) from e
+ raise
+
+ def build_request_director(self, handlers, preferences=None):
+ logger = _YDLLogger(self)
+ headers = self.params['http_headers'].copy()
+ proxies = self.proxies.copy()
+ clean_headers(headers)
+ clean_proxies(proxies, headers)
+
+ director = RequestDirector(logger=logger, verbose=self.params.get('debug_printtraffic'))
+ for handler in handlers:
+ director.add_handler(handler(
+ logger=logger,
+ headers=headers,
+ cookiejar=self.cookiejar,
+ proxies=proxies,
+ prefer_system_certs='no-certifi' in self.params['compat_opts'],
+ verify=not self.params.get('nocheckcertificate'),
+ **traverse_obj(self.params, {
+ 'verbose': 'debug_printtraffic',
+ 'source_address': 'source_address',
+ 'timeout': 'socket_timeout',
+ 'legacy_ssl_support': 'legacyserverconnect',
+ 'enable_file_urls': 'enable_file_urls',
+ 'client_cert': {
+ 'client_certificate': 'client_certificate',
+ 'client_certificate_key': 'client_certificate_key',
+ 'client_certificate_password': 'client_certificate_password',
+ },
+ }),
+ ))
+ director.preferences.update(preferences or [])
+ if 'prefer-legacy-http-handler' in self.params['compat_opts']:
+ director.preferences.add(lambda rh, _: 500 if rh.RH_KEY == 'Urllib' else 0)
+ return director
+
+ @functools.cached_property
+ def _request_director(self):
+ return self.build_request_director(_REQUEST_HANDLERS.values(), _RH_PREFERENCES)
+
+ def encode(self, s):
+ if isinstance(s, bytes):
+ return s # Already encoded
+
+ try:
+ return s.encode(self.get_encoding())
+ except UnicodeEncodeError as err:
+ err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
+ raise
+
+ def get_encoding(self):
+ encoding = self.params.get('encoding')
+ if encoding is None:
+ encoding = preferredencoding()
+ return encoding
+
+ def _write_info_json(self, label, ie_result, infofn, overwrite=None):
+ ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
+ if overwrite is None:
+ overwrite = self.params.get('overwrites', True)
+ if not self.params.get('writeinfojson'):
+ return False
+ elif not infofn:
+ self.write_debug(f'Skipping writing {label} infojson')
+ return False
+ elif not self._ensure_dir_exists(infofn):
+ return None
+ elif not overwrite and os.path.exists(infofn):
+ self.to_screen(f'[info] {label.title()} metadata is already present')
+ return 'exists'
+
+ self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
+ try:
+ write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
+ return True
+ except OSError:
+ self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
+ return None
+
+ def _write_description(self, label, ie_result, descfn):
+ ''' Write description and returns True = written, False = skip, None = error '''
+ if not self.params.get('writedescription'):
+ return False
+ elif not descfn:
+ self.write_debug(f'Skipping writing {label} description')
+ return False
+ elif not self._ensure_dir_exists(descfn):
+ return None
+ elif not self.params.get('overwrites', True) and os.path.exists(descfn):
+ self.to_screen(f'[info] {label.title()} description is already present')
+ elif ie_result.get('description') is None:
+ self.to_screen(f'[info] There\'s no {label} description to write')
+ return False
+ else:
+ try:
+ self.to_screen(f'[info] Writing {label} description to: {descfn}')
+ with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
+ descfile.write(ie_result['description'])
+ except OSError:
+ self.report_error(f'Cannot write {label} description file {descfn}')
+ return None
+ return True
+
+ def _write_subtitles(self, info_dict, filename):
+ ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
+ ret = []
+ subtitles = info_dict.get('requested_subtitles')
+ if not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
+ # subtitles download errors are already managed as troubles in relevant IE
+ # that way it will silently go on when used with unsupporting IE
+ return ret
+ elif not subtitles:
+ self.to_screen('[info] There are no subtitles for the requested languages')
+ return ret
+ sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
+ if not sub_filename_base:
+ self.to_screen('[info] Skipping writing video subtitles')
+ return ret
+
+ for sub_lang, sub_info in subtitles.items():
+ sub_format = sub_info['ext']
+ sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
+ sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
+ existing_sub = self.existing_file((sub_filename_final, sub_filename))
+ if existing_sub:
+ self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
+ sub_info['filepath'] = existing_sub
+ ret.append((existing_sub, sub_filename_final))
+ continue
+
+ self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
+ if sub_info.get('data') is not None:
+ try:
+ # Use newline='' to prevent conversion of newline characters
+ # See https://github.com/ytdl-org/youtube-dl/issues/10268
+ with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
+ subfile.write(sub_info['data'])
+ sub_info['filepath'] = sub_filename
+ ret.append((sub_filename, sub_filename_final))
+ continue
+ except OSError:
+ self.report_error(f'Cannot write video subtitles file {sub_filename}')
+ return None
+
+ try:
+ sub_copy = sub_info.copy()
+ sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
+ self.dl(sub_filename, sub_copy, subtitle=True)
+ sub_info['filepath'] = sub_filename
+ ret.append((sub_filename, sub_filename_final))
+ except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
+ msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
+ if self.params.get('ignoreerrors') is not True: # False or 'only_download'
+ if not self.params.get('ignoreerrors'):
+ self.report_error(msg)
+ raise DownloadError(msg)
+ self.report_warning(msg)
+ return ret
+
+ def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
+ ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename); or None if error '''
+ write_all = self.params.get('write_all_thumbnails', False)
+ thumbnails, ret = [], []
+ if write_all or self.params.get('writethumbnail', False):
+ thumbnails = info_dict.get('thumbnails') or []
+ if not thumbnails:
+ self.to_screen(f'[info] There are no {label} thumbnails to download')
+ return ret
+ multiple = write_all and len(thumbnails) > 1
+
+ if thumb_filename_base is None:
+ thumb_filename_base = filename
+ if thumbnails and not thumb_filename_base:
+ self.write_debug(f'Skipping writing {label} thumbnail')
+ return ret
+
+ if thumbnails and not self._ensure_dir_exists(filename):
+ return None
+
+ for idx, t in list(enumerate(thumbnails))[::-1]:
+ thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
+ thumb_display_id = f'{label} thumbnail {t["id"]}'
+ thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
+ thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
+
+ existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
+ if existing_thumb:
+ self.to_screen('[info] %s is already present' % (
+ thumb_display_id if multiple else f'{label} thumbnail').capitalize())
+ t['filepath'] = existing_thumb
+ ret.append((existing_thumb, thumb_filename_final))
+ else:
+ self.to_screen(f'[info] Downloading {thumb_display_id} ...')
+ try:
+ uf = self.urlopen(Request(t['url'], headers=t.get('http_headers', {})))
+ self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
+ with open(encodeFilename(thumb_filename), 'wb') as thumbf:
+ shutil.copyfileobj(uf, thumbf)
+ ret.append((thumb_filename, thumb_filename_final))
+ t['filepath'] = thumb_filename
+ except network_exceptions as err:
+ if isinstance(err, HTTPError) and err.status == 404:
+ self.to_screen(f'[info] {thumb_display_id.title()} does not exist')
+ else:
+ self.report_warning(f'Unable to download {thumb_display_id}: {err}')
+ thumbnails.pop(idx)
+ if ret and not write_all:
+ break
+ return ret
diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py
new file mode 100644
index 0000000..aeea262
--- /dev/null
+++ b/yt_dlp/__init__.py
@@ -0,0 +1,1054 @@
+import sys
+
+if sys.version_info < (3, 8):
+ raise ImportError(
+ f'You are using an unsupported version of Python. Only Python versions 3.8 and above are supported by yt-dlp') # noqa: F541
+
+__license__ = 'The Unlicense'
+
+import collections
+import getpass
+import itertools
+import optparse
+import os
+import re
+import traceback
+
+from .compat import compat_os_name, compat_shlex_quote
+from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS
+from .downloader.external import get_external_downloader
+from .extractor import list_extractor_classes
+from .extractor.adobepass import MSO_INFO
+from .options import parseOpts
+from .postprocessor import (
+ FFmpegExtractAudioPP,
+ FFmpegMergerPP,
+ FFmpegPostProcessor,
+ FFmpegSubtitlesConvertorPP,
+ FFmpegThumbnailsConvertorPP,
+ FFmpegVideoConvertorPP,
+ FFmpegVideoRemuxerPP,
+ MetadataFromFieldPP,
+ MetadataParserPP,
+)
+from .update import Updater
+from .utils import (
+ NO_DEFAULT,
+ POSTPROCESS_WHEN,
+ DateRange,
+ DownloadCancelled,
+ DownloadError,
+ FormatSorter,
+ GeoUtils,
+ PlaylistEntries,
+ SameFileError,
+ decodeOption,
+ download_range_func,
+ expand_path,
+ float_or_none,
+ format_field,
+ int_or_none,
+ match_filter_func,
+ parse_bytes,
+ parse_duration,
+ preferredencoding,
+ read_batch_urls,
+ read_stdin,
+ render_table,
+ setproctitle,
+ traverse_obj,
+ variadic,
+ write_string,
+)
+from .utils.networking import std_headers
+from .YoutubeDL import YoutubeDL
+
+_IN_CLI = False
+
+
+def _exit(status=0, *args):
+ for msg in args:
+ sys.stderr.write(msg)
+ raise SystemExit(status)
+
+
+def get_urls(urls, batchfile, verbose):
+ """
+ @param verbose -1: quiet, 0: normal, 1: verbose
+ """
+ batch_urls = []
+ if batchfile is not None:
+ try:
+ batch_urls = read_batch_urls(
+ read_stdin(None if verbose == -1 else 'URLs') if batchfile == '-'
+ else open(expand_path(batchfile), encoding='utf-8', errors='ignore'))
+ if verbose == 1:
+ write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n')
+ except OSError:
+ _exit(f'ERROR: batch file {batchfile} could not be read')
+ _enc = preferredencoding()
+ return [
+ url.strip().decode(_enc, 'ignore') if isinstance(url, bytes) else url.strip()
+ for url in batch_urls + urls]
+
+
+def print_extractor_information(opts, urls):
+ out = ''
+ if opts.list_extractors:
+ # Importing GenericIE is currently slow since it imports YoutubeIE
+ from .extractor.generic import GenericIE
+
+ urls = dict.fromkeys(urls, False)
+ for ie in list_extractor_classes(opts.age_limit):
+ out += ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie.working() else '') + '\n'
+ if ie == GenericIE:
+ matched_urls = [url for url, matched in urls.items() if not matched]
+ else:
+ matched_urls = tuple(filter(ie.suitable, urls.keys()))
+ urls.update(dict.fromkeys(matched_urls, True))
+ out += ''.join(f' {url}\n' for url in matched_urls)
+ elif opts.list_extractor_descriptions:
+ _SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny', 'burping cow')
+ out = '\n'.join(
+ ie.description(markdown=False, search_examples=_SEARCHES)
+ for ie in list_extractor_classes(opts.age_limit) if ie.working() and ie.IE_DESC is not False)
+ elif opts.ap_list_mso:
+ out = 'Supported TV Providers:\n%s\n' % render_table(
+ ['mso', 'mso name'],
+ [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()])
+ else:
+ return False
+ write_string(out, out=sys.stdout)
+ return True
+
+
+def set_compat_opts(opts):
+ def _unused_compat_opt(name):
+ if name not in opts.compat_opts:
+ return False
+ opts.compat_opts.discard(name)
+ opts.compat_opts.update(['*%s' % name])
+ return True
+
+ def set_default_compat(compat_name, opt_name, default=True, remove_compat=True):
+ attr = getattr(opts, opt_name)
+ if compat_name in opts.compat_opts:
+ if attr is None:
+ setattr(opts, opt_name, not default)
+ return True
+ else:
+ if remove_compat:
+ _unused_compat_opt(compat_name)
+ return False
+ elif attr is None:
+ setattr(opts, opt_name, default)
+ return None
+
+ set_default_compat('abort-on-error', 'ignoreerrors', 'only_download')
+ set_default_compat('no-playlist-metafiles', 'allow_playlist_files')
+ set_default_compat('no-clean-infojson', 'clean_infojson')
+ if 'no-attach-info-json' in opts.compat_opts:
+ if opts.embed_infojson:
+ _unused_compat_opt('no-attach-info-json')
+ else:
+ opts.embed_infojson = False
+ if 'format-sort' in opts.compat_opts:
+ opts.format_sort.extend(FormatSorter.ytdl_default)
+ _video_multistreams_set = set_default_compat('multistreams', 'allow_multiple_video_streams', False, remove_compat=False)
+ _audio_multistreams_set = set_default_compat('multistreams', 'allow_multiple_audio_streams', False, remove_compat=False)
+ if _video_multistreams_set is False and _audio_multistreams_set is False:
+ _unused_compat_opt('multistreams')
+ if 'filename' in opts.compat_opts:
+ if opts.outtmpl.get('default') is None:
+ opts.outtmpl.update({'default': '%(title)s-%(id)s.%(ext)s'})
+ else:
+ _unused_compat_opt('filename')
+
+
+def validate_options(opts):
+ def validate(cndn, name, value=None, msg=None):
+ if cndn:
+ return True
+ raise ValueError((msg or 'invalid {name} "{value}" given').format(name=name, value=value))
+
+ def validate_in(name, value, items, msg=None):
+ return validate(value is None or value in items, name, value, msg)
+
+ def validate_regex(name, value, regex):
+ return validate(value is None or re.match(regex, value), name, value)
+
+ def validate_positive(name, value, strict=False):
+ return validate(value is None or value > 0 or (not strict and value == 0),
+ name, value, '{name} "{value}" must be positive' + ('' if strict else ' or 0'))
+
+ def validate_minmax(min_val, max_val, min_name, max_name=None):
+ if max_val is None or min_val is None or max_val >= min_val:
+ return
+ if not max_name:
+ min_name, max_name = f'min {min_name}', f'max {min_name}'
+ raise ValueError(f'{max_name} "{max_val}" must be must be greater than or equal to {min_name} "{min_val}"')
+
+ # Usernames and passwords
+ validate(sum(map(bool, (opts.usenetrc, opts.netrc_cmd, opts.username))) <= 1, '.netrc',
+ msg='{name}, netrc command and username/password are mutually exclusive options')
+ validate(opts.password is None or opts.username is not None, 'account username', msg='{name} missing')
+ validate(opts.ap_password is None or opts.ap_username is not None,
+ 'TV Provider account username', msg='{name} missing')
+ validate_in('TV Provider', opts.ap_mso, MSO_INFO,
+ 'Unsupported {name} "{value}", use --ap-list-mso to get a list of supported TV Providers')
+
+ # Numbers
+ validate_positive('autonumber start', opts.autonumber_start)
+ validate_positive('autonumber size', opts.autonumber_size, True)
+ validate_positive('concurrent fragments', opts.concurrent_fragment_downloads, True)
+ validate_positive('playlist start', opts.playliststart, True)
+ if opts.playlistend != -1:
+ validate_minmax(opts.playliststart, opts.playlistend, 'playlist start', 'playlist end')
+
+ # Time ranges
+ validate_positive('subtitles sleep interval', opts.sleep_interval_subtitles)
+ validate_positive('requests sleep interval', opts.sleep_interval_requests)
+ validate_positive('sleep interval', opts.sleep_interval)
+ validate_positive('max sleep interval', opts.max_sleep_interval)
+ if opts.sleep_interval is None:
+ validate(
+ opts.max_sleep_interval is None, 'min sleep interval',
+ msg='{name} must be specified; use --min-sleep-interval')
+ elif opts.max_sleep_interval is None:
+ opts.max_sleep_interval = opts.sleep_interval
+ else:
+ validate_minmax(opts.sleep_interval, opts.max_sleep_interval, 'sleep interval')
+
+ if opts.wait_for_video is not None:
+ min_wait, max_wait, *_ = map(parse_duration, opts.wait_for_video.split('-', 1) + [None])
+ validate(min_wait is not None and not (max_wait is None and '-' in opts.wait_for_video),
+ 'time range to wait for video', opts.wait_for_video)
+ validate_minmax(min_wait, max_wait, 'time range to wait for video')
+ opts.wait_for_video = (min_wait, max_wait)
+
+ # Format sort
+ for f in opts.format_sort:
+ validate_regex('format sorting', f, FormatSorter.regex)
+
+ # Postprocessor formats
+ validate_regex('merge output format', opts.merge_output_format,
+ r'({0})(/({0}))*'.format('|'.join(map(re.escape, FFmpegMergerPP.SUPPORTED_EXTS))))
+ validate_regex('audio format', opts.audioformat, FFmpegExtractAudioPP.FORMAT_RE)
+ validate_in('subtitle format', opts.convertsubtitles, FFmpegSubtitlesConvertorPP.SUPPORTED_EXTS)
+ validate_regex('thumbnail format', opts.convertthumbnails, FFmpegThumbnailsConvertorPP.FORMAT_RE)
+ validate_regex('recode video format', opts.recodevideo, FFmpegVideoConvertorPP.FORMAT_RE)
+ validate_regex('remux video format', opts.remuxvideo, FFmpegVideoRemuxerPP.FORMAT_RE)
+ if opts.audioquality:
+ opts.audioquality = opts.audioquality.strip('k').strip('K')
+ # int_or_none prevents inf, nan
+ validate_positive('audio quality', int_or_none(float_or_none(opts.audioquality), default=0))
+
+ # Retries
+ def parse_retries(name, value):
+ if value is None:
+ return None
+ elif value in ('inf', 'infinite'):
+ return float('inf')
+ try:
+ return int(value)
+ except (TypeError, ValueError):
+ validate(False, f'{name} retry count', value)
+
+ opts.retries = parse_retries('download', opts.retries)
+ opts.fragment_retries = parse_retries('fragment', opts.fragment_retries)
+ opts.extractor_retries = parse_retries('extractor', opts.extractor_retries)
+ opts.file_access_retries = parse_retries('file access', opts.file_access_retries)
+
+ # Retry sleep function
+ def parse_sleep_func(expr):
+ NUMBER_RE = r'\d+(?:\.\d+)?'
+ op, start, limit, step, *_ = tuple(re.fullmatch(
+ rf'(?:(linear|exp)=)?({NUMBER_RE})(?::({NUMBER_RE})?)?(?::({NUMBER_RE}))?',
+ expr.strip()).groups()) + (None, None)
+
+ if op == 'exp':
+ return lambda n: min(float(start) * (float(step or 2) ** n), float(limit or 'inf'))
+ else:
+ default_step = start if op or limit else 0
+ return lambda n: min(float(start) + float(step or default_step) * n, float(limit or 'inf'))
+
+ for key, expr in opts.retry_sleep.items():
+ if not expr:
+ del opts.retry_sleep[key]
+ continue
+ try:
+ opts.retry_sleep[key] = parse_sleep_func(expr)
+ except AttributeError:
+ raise ValueError(f'invalid {key} retry sleep expression {expr!r}')
+
+ # Bytes
+ def validate_bytes(name, value):
+ if value is None:
+ return None
+ numeric_limit = parse_bytes(value)
+ validate(numeric_limit is not None, 'rate limit', value)
+ return numeric_limit
+
+ opts.ratelimit = validate_bytes('rate limit', opts.ratelimit)
+ opts.throttledratelimit = validate_bytes('throttled rate limit', opts.throttledratelimit)
+ opts.min_filesize = validate_bytes('min filesize', opts.min_filesize)
+ opts.max_filesize = validate_bytes('max filesize', opts.max_filesize)
+ opts.buffersize = validate_bytes('buffer size', opts.buffersize)
+ opts.http_chunk_size = validate_bytes('http chunk size', opts.http_chunk_size)
+
+ # Output templates
+ def validate_outtmpl(tmpl, msg):
+ err = YoutubeDL.validate_outtmpl(tmpl)
+ if err:
+ raise ValueError(f'invalid {msg} "{tmpl}": {err}')
+
+ for k, tmpl in opts.outtmpl.items():
+ validate_outtmpl(tmpl, f'{k} output template')
+ for type_, tmpl_list in opts.forceprint.items():
+ for tmpl in tmpl_list:
+ validate_outtmpl(tmpl, f'{type_} print template')
+ for type_, tmpl_list in opts.print_to_file.items():
+ for tmpl, file in tmpl_list:
+ validate_outtmpl(tmpl, f'{type_} print to file template')
+ validate_outtmpl(file, f'{type_} print to file filename')
+ validate_outtmpl(opts.sponsorblock_chapter_title, 'SponsorBlock chapter title')
+ for k, tmpl in opts.progress_template.items():
+ k = f'{k[:-6]} console title' if '-title' in k else f'{k} progress'
+ validate_outtmpl(tmpl, f'{k} template')
+
+ outtmpl_default = opts.outtmpl.get('default')
+ if outtmpl_default == '':
+ opts.skip_download = None
+ del opts.outtmpl['default']
+
+ def parse_chapters(name, value, advanced=False):
+ parse_timestamp = lambda x: float('inf') if x in ('inf', 'infinite') else parse_duration(x)
+ TIMESTAMP_RE = r'''(?x)(?:
+ (?P<start_sign>-?)(?P<start>[^-]+)
+ )?\s*-\s*(?:
+ (?P<end_sign>-?)(?P<end>[^-]+)
+ )?'''
+
+ chapters, ranges, from_url = [], [], False
+ for regex in value or []:
+ if advanced and regex == '*from-url':
+ from_url = True
+ continue
+ elif not regex.startswith('*'):
+ try:
+ chapters.append(re.compile(regex))
+ except re.error as err:
+ raise ValueError(f'invalid {name} regex "{regex}" - {err}')
+ continue
+
+ for range_ in map(str.strip, regex[1:].split(',')):
+ mobj = range_ != '-' and re.fullmatch(TIMESTAMP_RE, range_)
+ dur = mobj and [parse_timestamp(mobj.group('start') or '0'), parse_timestamp(mobj.group('end') or 'inf')]
+ signs = mobj and (mobj.group('start_sign'), mobj.group('end_sign'))
+
+ err = None
+ if None in (dur or [None]):
+ err = 'Must be of the form "*start-end"'
+ elif not advanced and any(signs):
+ err = 'Negative timestamps are not allowed'
+ else:
+ dur[0] *= -1 if signs[0] else 1
+ dur[1] *= -1 if signs[1] else 1
+ if dur[1] == float('-inf'):
+ err = '"-inf" is not a valid end'
+ if err:
+ raise ValueError(f'invalid {name} time range "{regex}". {err}')
+ ranges.append(dur)
+
+ return chapters, ranges, from_url
+
+ opts.remove_chapters, opts.remove_ranges, _ = parse_chapters('--remove-chapters', opts.remove_chapters)
+ opts.download_ranges = download_range_func(*parse_chapters('--download-sections', opts.download_ranges, True))
+
+ # Cookies from browser
+ if opts.cookiesfrombrowser:
+ container = None
+ mobj = re.fullmatch(r'''(?x)
+ (?P<name>[^+:]+)
+ (?:\s*\+\s*(?P<keyring>[^:]+))?
+ (?:\s*:\s*(?!:)(?P<profile>.+?))?
+ (?:\s*::\s*(?P<container>.+))?
+ ''', opts.cookiesfrombrowser)
+ if mobj is None:
+ raise ValueError(f'invalid cookies from browser arguments: {opts.cookiesfrombrowser}')
+ browser_name, keyring, profile, container = mobj.group('name', 'keyring', 'profile', 'container')
+ browser_name = browser_name.lower()
+ if browser_name not in SUPPORTED_BROWSERS:
+ raise ValueError(f'unsupported browser specified for cookies: "{browser_name}". '
+ f'Supported browsers are: {", ".join(sorted(SUPPORTED_BROWSERS))}')
+ if keyring is not None:
+ keyring = keyring.upper()
+ if keyring not in SUPPORTED_KEYRINGS:
+ raise ValueError(f'unsupported keyring specified for cookies: "{keyring}". '
+ f'Supported keyrings are: {", ".join(sorted(SUPPORTED_KEYRINGS))}')
+ opts.cookiesfrombrowser = (browser_name, profile, keyring, container)
+
+ # MetadataParser
+ def metadataparser_actions(f):
+ if isinstance(f, str):
+ cmd = '--parse-metadata %s' % compat_shlex_quote(f)
+ try:
+ actions = [MetadataFromFieldPP.to_action(f)]
+ except Exception as err:
+ raise ValueError(f'{cmd} is invalid; {err}')
+ else:
+ cmd = '--replace-in-metadata %s' % ' '.join(map(compat_shlex_quote, f))
+ actions = ((MetadataParserPP.Actions.REPLACE, x, *f[1:]) for x in f[0].split(','))
+
+ for action in actions:
+ try:
+ MetadataParserPP.validate_action(*action)
+ except Exception as err:
+ raise ValueError(f'{cmd} is invalid; {err}')
+ yield action
+
+ if opts.metafromtitle is not None:
+ opts.parse_metadata.setdefault('pre_process', []).append('title:%s' % opts.metafromtitle)
+ opts.parse_metadata = {
+ k: list(itertools.chain(*map(metadataparser_actions, v)))
+ for k, v in opts.parse_metadata.items()
+ }
+
+ # Other options
+ if opts.playlist_items is not None:
+ try:
+ tuple(PlaylistEntries.parse_playlist_items(opts.playlist_items))
+ except Exception as err:
+ raise ValueError(f'Invalid playlist-items {opts.playlist_items!r}: {err}')
+
+ opts.geo_bypass_country, opts.geo_bypass_ip_block = None, None
+ if opts.geo_bypass.lower() not in ('default', 'never'):
+ try:
+ GeoUtils.random_ipv4(opts.geo_bypass)
+ except Exception:
+ raise ValueError(f'Unsupported --xff "{opts.geo_bypass}"')
+ if len(opts.geo_bypass) == 2:
+ opts.geo_bypass_country = opts.geo_bypass
+ else:
+ opts.geo_bypass_ip_block = opts.geo_bypass
+ opts.geo_bypass = opts.geo_bypass.lower() != 'never'
+
+ opts.match_filter = match_filter_func(opts.match_filter, opts.breaking_match_filter)
+
+ if opts.download_archive is not None:
+ opts.download_archive = expand_path(opts.download_archive)
+
+ if opts.ffmpeg_location is not None:
+ opts.ffmpeg_location = expand_path(opts.ffmpeg_location)
+
+ if opts.user_agent is not None:
+ opts.headers.setdefault('User-Agent', opts.user_agent)
+ if opts.referer is not None:
+ opts.headers.setdefault('Referer', opts.referer)
+
+ if opts.no_sponsorblock:
+ opts.sponsorblock_mark = opts.sponsorblock_remove = set()
+
+ default_downloader = None
+ for proto, path in opts.external_downloader.items():
+ if path == 'native':
+ continue
+ ed = get_external_downloader(path)
+ if ed is None:
+ raise ValueError(
+ f'No such {format_field(proto, None, "%s ", ignore="default")}external downloader "{path}"')
+ elif ed and proto == 'default':
+ default_downloader = ed.get_basename()
+
+ for policy in opts.color.values():
+ if policy not in ('always', 'auto', 'no_color', 'never'):
+ raise ValueError(f'"{policy}" is not a valid color policy')
+
+ warnings, deprecation_warnings = [], []
+
+ # Common mistake: -f best
+ if opts.format == 'best':
+ warnings.append('.\n '.join((
+ '"-f best" selects the best pre-merged format which is often not the best option',
+ 'To let yt-dlp download and merge the best available formats, simply do not pass any format selection',
+ 'If you know what you are doing and want only the best pre-merged format, use "-f b" instead to suppress this warning')))
+
+ # --(postprocessor/downloader)-args without name
+ def report_args_compat(name, value, key1, key2=None, where=None):
+ if key1 in value and key2 not in value:
+ warnings.append(f'{name.title()} arguments given without specifying name. '
+ f'The arguments will be given to {where or f"all {name}s"}')
+ return True
+ return False
+
+ if report_args_compat('external downloader', opts.external_downloader_args,
+ 'default', where=default_downloader) and default_downloader:
+ # Compat with youtube-dl's behavior. See https://github.com/ytdl-org/youtube-dl/commit/49c5293014bc11ec8c009856cd63cffa6296c1e1
+ opts.external_downloader_args.setdefault(default_downloader, opts.external_downloader_args.pop('default'))
+
+ if report_args_compat('post-processor', opts.postprocessor_args, 'default-compat', 'default'):
+ opts.postprocessor_args['default'] = opts.postprocessor_args.pop('default-compat')
+ opts.postprocessor_args.setdefault('sponskrub', [])
+
+ def report_conflict(arg1, opt1, arg2='--allow-unplayable-formats', opt2='allow_unplayable_formats',
+ val1=NO_DEFAULT, val2=NO_DEFAULT, default=False):
+ if val2 is NO_DEFAULT:
+ val2 = getattr(opts, opt2)
+ if not val2:
+ return
+
+ if val1 is NO_DEFAULT:
+ val1 = getattr(opts, opt1)
+ if val1:
+ warnings.append(f'{arg1} is ignored since {arg2} was given')
+ setattr(opts, opt1, default)
+
+ # Conflicting options
+ report_conflict('--playlist-reverse', 'playlist_reverse', '--playlist-random', 'playlist_random')
+ report_conflict('--playlist-reverse', 'playlist_reverse', '--lazy-playlist', 'lazy_playlist')
+ report_conflict('--playlist-random', 'playlist_random', '--lazy-playlist', 'lazy_playlist')
+ report_conflict('--dateafter', 'dateafter', '--date', 'date', default=None)
+ report_conflict('--datebefore', 'datebefore', '--date', 'date', default=None)
+ report_conflict('--exec-before-download', 'exec_before_dl_cmd',
+ '"--exec before_dl:"', 'exec_cmd', val2=opts.exec_cmd.get('before_dl'))
+ report_conflict('--id', 'useid', '--output', 'outtmpl', val2=opts.outtmpl.get('default'))
+ report_conflict('--remux-video', 'remuxvideo', '--recode-video', 'recodevideo')
+ report_conflict('--sponskrub', 'sponskrub', '--remove-chapters', 'remove_chapters')
+ report_conflict('--sponskrub', 'sponskrub', '--sponsorblock-mark', 'sponsorblock_mark')
+ report_conflict('--sponskrub', 'sponskrub', '--sponsorblock-remove', 'sponsorblock_remove')
+ report_conflict('--sponskrub-cut', 'sponskrub_cut', '--split-chapter', 'split_chapters',
+ val1=opts.sponskrub and opts.sponskrub_cut)
+
+ # Conflicts with --allow-unplayable-formats
+ report_conflict('--embed-metadata', 'addmetadata')
+ report_conflict('--embed-chapters', 'addchapters')
+ report_conflict('--embed-info-json', 'embed_infojson')
+ report_conflict('--embed-subs', 'embedsubtitles')
+ report_conflict('--embed-thumbnail', 'embedthumbnail')
+ report_conflict('--extract-audio', 'extractaudio')
+ report_conflict('--fixup', 'fixup', val1=opts.fixup not in (None, 'never', 'ignore'), default='never')
+ report_conflict('--recode-video', 'recodevideo')
+ report_conflict('--remove-chapters', 'remove_chapters', default=[])
+ report_conflict('--remux-video', 'remuxvideo')
+ report_conflict('--sponskrub', 'sponskrub')
+ report_conflict('--sponsorblock-remove', 'sponsorblock_remove', default=set())
+ report_conflict('--xattrs', 'xattrs')
+
+ # Fully deprecated options
+ def report_deprecation(val, old, new=None):
+ if not val:
+ return
+ deprecation_warnings.append(
+ f'{old} is deprecated and may be removed in a future version. Use {new} instead' if new
+ else f'{old} is deprecated and may not work as expected')
+
+ report_deprecation(opts.sponskrub, '--sponskrub', '--sponsorblock-mark or --sponsorblock-remove')
+ report_deprecation(not opts.prefer_ffmpeg, '--prefer-avconv', 'ffmpeg')
+ # report_deprecation(opts.include_ads, '--include-ads') # We may re-implement this in future
+ # report_deprecation(opts.call_home, '--call-home') # We may re-implement this in future
+ # report_deprecation(opts.writeannotations, '--write-annotations') # It's just that no website has it
+
+ # Dependent options
+ opts.date = DateRange.day(opts.date) if opts.date else DateRange(opts.dateafter, opts.datebefore)
+
+ if opts.exec_before_dl_cmd:
+ opts.exec_cmd['before_dl'] = opts.exec_before_dl_cmd
+
+ if opts.useid: # --id is not deprecated in youtube-dl
+ opts.outtmpl['default'] = '%(id)s.%(ext)s'
+
+ if opts.overwrites: # --force-overwrites implies --no-continue
+ opts.continue_dl = False
+
+ if (opts.addmetadata or opts.sponsorblock_mark) and opts.addchapters is None:
+ # Add chapters when adding metadata or marking sponsors
+ opts.addchapters = True
+
+ if opts.extractaudio and not opts.keepvideo and opts.format is None:
+ # Do not unnecessarily download audio
+ opts.format = 'bestaudio/best'
+
+ if opts.getcomments and opts.writeinfojson is None and not opts.embed_infojson:
+ # If JSON is not printed anywhere, but comments are requested, save it to file
+ if not opts.dumpjson or opts.print_json or opts.dump_single_json:
+ opts.writeinfojson = True
+
+ if opts.allsubtitles and not (opts.embedsubtitles or opts.writeautomaticsub):
+ # --all-sub automatically sets --write-sub if --write-auto-sub is not given
+ opts.writesubtitles = True
+
+ if opts.addmetadata and opts.embed_infojson is None:
+ # If embedding metadata and infojson is present, embed it
+ opts.embed_infojson = 'if_exists'
+
+ # Ask for passwords
+ if opts.username is not None and opts.password is None:
+ opts.password = getpass.getpass('Type account password and press [Return]: ')
+ if opts.ap_username is not None and opts.ap_password is None:
+ opts.ap_password = getpass.getpass('Type TV provider account password and press [Return]: ')
+
+ return warnings, deprecation_warnings
+
+
+def get_postprocessors(opts):
+ yield from opts.add_postprocessors
+
+ for when, actions in opts.parse_metadata.items():
+ yield {
+ 'key': 'MetadataParser',
+ 'actions': actions,
+ 'when': when
+ }
+ sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove
+ if sponsorblock_query:
+ yield {
+ 'key': 'SponsorBlock',
+ 'categories': sponsorblock_query,
+ 'api': opts.sponsorblock_api,
+ 'when': 'after_filter'
+ }
+ if opts.convertsubtitles:
+ yield {
+ 'key': 'FFmpegSubtitlesConvertor',
+ 'format': opts.convertsubtitles,
+ 'when': 'before_dl'
+ }
+ if opts.convertthumbnails:
+ yield {
+ 'key': 'FFmpegThumbnailsConvertor',
+ 'format': opts.convertthumbnails,
+ 'when': 'before_dl'
+ }
+ if opts.extractaudio:
+ yield {
+ 'key': 'FFmpegExtractAudio',
+ 'preferredcodec': opts.audioformat,
+ 'preferredquality': opts.audioquality,
+ 'nopostoverwrites': opts.nopostoverwrites,
+ }
+ if opts.remuxvideo:
+ yield {
+ 'key': 'FFmpegVideoRemuxer',
+ 'preferedformat': opts.remuxvideo,
+ }
+ if opts.recodevideo:
+ yield {
+ 'key': 'FFmpegVideoConvertor',
+ 'preferedformat': opts.recodevideo,
+ }
+ # If ModifyChapters is going to remove chapters, subtitles must already be in the container.
+ if opts.embedsubtitles:
+ keep_subs = 'no-keep-subs' not in opts.compat_opts
+ yield {
+ 'key': 'FFmpegEmbedSubtitle',
+ # already_have_subtitle = True prevents the file from being deleted after embedding
+ 'already_have_subtitle': opts.writesubtitles and keep_subs
+ }
+ if not opts.writeautomaticsub and keep_subs:
+ opts.writesubtitles = True
+
+ # ModifyChapters must run before FFmpegMetadataPP
+ if opts.remove_chapters or sponsorblock_query:
+ yield {
+ 'key': 'ModifyChapters',
+ 'remove_chapters_patterns': opts.remove_chapters,
+ 'remove_sponsor_segments': opts.sponsorblock_remove,
+ 'remove_ranges': opts.remove_ranges,
+ 'sponsorblock_chapter_title': opts.sponsorblock_chapter_title,
+ 'force_keyframes': opts.force_keyframes_at_cuts
+ }
+ # FFmpegMetadataPP should be run after FFmpegVideoConvertorPP and
+ # FFmpegExtractAudioPP as containers before conversion may not support
+ # metadata (3gp, webm, etc.)
+ # By default ffmpeg preserves metadata applicable for both
+ # source and target containers. From this point the container won't change,
+ # so metadata can be added here.
+ if opts.addmetadata or opts.addchapters or opts.embed_infojson:
+ yield {
+ 'key': 'FFmpegMetadata',
+ 'add_chapters': opts.addchapters,
+ 'add_metadata': opts.addmetadata,
+ 'add_infojson': opts.embed_infojson,
+ }
+ # Deprecated
+ # This should be above EmbedThumbnail since sponskrub removes the thumbnail attachment
+ # but must be below EmbedSubtitle and FFmpegMetadata
+ # See https://github.com/yt-dlp/yt-dlp/issues/204 , https://github.com/faissaloo/SponSkrub/issues/29
+ # If opts.sponskrub is None, sponskrub is used, but it silently fails if the executable can't be found
+ if opts.sponskrub is not False:
+ yield {
+ 'key': 'SponSkrub',
+ 'path': opts.sponskrub_path,
+ 'args': opts.sponskrub_args,
+ 'cut': opts.sponskrub_cut,
+ 'force': opts.sponskrub_force,
+ 'ignoreerror': opts.sponskrub is None,
+ '_from_cli': True,
+ }
+ if opts.embedthumbnail:
+ yield {
+ 'key': 'EmbedThumbnail',
+ # already_have_thumbnail = True prevents the file from being deleted after embedding
+ 'already_have_thumbnail': opts.writethumbnail
+ }
+ if not opts.writethumbnail:
+ opts.writethumbnail = True
+ opts.outtmpl['pl_thumbnail'] = ''
+ if opts.split_chapters:
+ yield {
+ 'key': 'FFmpegSplitChapters',
+ 'force_keyframes': opts.force_keyframes_at_cuts,
+ }
+ # XAttrMetadataPP should be run after post-processors that may change file contents
+ if opts.xattrs:
+ yield {'key': 'XAttrMetadata'}
+ if opts.concat_playlist != 'never':
+ yield {
+ 'key': 'FFmpegConcat',
+ 'only_multi_video': opts.concat_playlist != 'always',
+ 'when': 'playlist',
+ }
+ # Exec must be the last PP of each category
+ for when, exec_cmd in opts.exec_cmd.items():
+ yield {
+ 'key': 'Exec',
+ 'exec_cmd': exec_cmd,
+ 'when': when,
+ }
+
+
+ParsedOptions = collections.namedtuple('ParsedOptions', ('parser', 'options', 'urls', 'ydl_opts'))
+
+
+def parse_options(argv=None):
+ """@returns ParsedOptions(parser, opts, urls, ydl_opts)"""
+ parser, opts, urls = parseOpts(argv)
+ urls = get_urls(urls, opts.batchfile, -1 if opts.quiet and not opts.verbose else opts.verbose)
+
+ set_compat_opts(opts)
+ try:
+ warnings, deprecation_warnings = validate_options(opts)
+ except ValueError as err:
+ parser.error(f'{err}\n')
+
+ postprocessors = list(get_postprocessors(opts))
+
+ print_only = bool(opts.forceprint) and all(k not in opts.forceprint for k in POSTPROCESS_WHEN[3:])
+ any_getting = any(getattr(opts, k) for k in (
+ 'dumpjson', 'dump_single_json', 'getdescription', 'getduration', 'getfilename',
+ 'getformat', 'getid', 'getthumbnail', 'gettitle', 'geturl'
+ ))
+ if opts.quiet is None:
+ opts.quiet = any_getting or opts.print_json or bool(opts.forceprint)
+
+ playlist_pps = [pp for pp in postprocessors if pp.get('when') == 'playlist']
+ write_playlist_infojson = (opts.writeinfojson and not opts.clean_infojson
+ and opts.allow_playlist_files and opts.outtmpl.get('pl_infojson') != '')
+ if not any((
+ opts.extract_flat,
+ opts.dump_single_json,
+ opts.forceprint.get('playlist'),
+ opts.print_to_file.get('playlist'),
+ write_playlist_infojson,
+ )):
+ if not playlist_pps:
+ opts.extract_flat = 'discard'
+ elif playlist_pps == [{'key': 'FFmpegConcat', 'only_multi_video': True, 'when': 'playlist'}]:
+ opts.extract_flat = 'discard_in_playlist'
+
+ final_ext = (
+ opts.recodevideo if opts.recodevideo in FFmpegVideoConvertorPP.SUPPORTED_EXTS
+ else opts.remuxvideo if opts.remuxvideo in FFmpegVideoRemuxerPP.SUPPORTED_EXTS
+ else opts.audioformat if (opts.extractaudio and opts.audioformat in FFmpegExtractAudioPP.SUPPORTED_EXTS)
+ else None)
+
+ return ParsedOptions(parser, opts, urls, {
+ 'usenetrc': opts.usenetrc,
+ 'netrc_location': opts.netrc_location,
+ 'netrc_cmd': opts.netrc_cmd,
+ 'username': opts.username,
+ 'password': opts.password,
+ 'twofactor': opts.twofactor,
+ 'videopassword': opts.videopassword,
+ 'ap_mso': opts.ap_mso,
+ 'ap_username': opts.ap_username,
+ 'ap_password': opts.ap_password,
+ 'client_certificate': opts.client_certificate,
+ 'client_certificate_key': opts.client_certificate_key,
+ 'client_certificate_password': opts.client_certificate_password,
+ 'quiet': opts.quiet,
+ 'no_warnings': opts.no_warnings,
+ 'forceurl': opts.geturl,
+ 'forcetitle': opts.gettitle,
+ 'forceid': opts.getid,
+ 'forcethumbnail': opts.getthumbnail,
+ 'forcedescription': opts.getdescription,
+ 'forceduration': opts.getduration,
+ 'forcefilename': opts.getfilename,
+ 'forceformat': opts.getformat,
+ 'forceprint': opts.forceprint,
+ 'print_to_file': opts.print_to_file,
+ 'forcejson': opts.dumpjson or opts.print_json,
+ 'dump_single_json': opts.dump_single_json,
+ 'force_write_download_archive': opts.force_write_download_archive,
+ 'simulate': (print_only or any_getting or None) if opts.simulate is None else opts.simulate,
+ 'skip_download': opts.skip_download,
+ 'format': opts.format,
+ 'allow_unplayable_formats': opts.allow_unplayable_formats,
+ 'ignore_no_formats_error': opts.ignore_no_formats_error,
+ 'format_sort': opts.format_sort,
+ 'format_sort_force': opts.format_sort_force,
+ 'allow_multiple_video_streams': opts.allow_multiple_video_streams,
+ 'allow_multiple_audio_streams': opts.allow_multiple_audio_streams,
+ 'check_formats': opts.check_formats,
+ 'listformats': opts.listformats,
+ 'listformats_table': opts.listformats_table,
+ 'outtmpl': opts.outtmpl,
+ 'outtmpl_na_placeholder': opts.outtmpl_na_placeholder,
+ 'paths': opts.paths,
+ 'autonumber_size': opts.autonumber_size,
+ 'autonumber_start': opts.autonumber_start,
+ 'restrictfilenames': opts.restrictfilenames,
+ 'windowsfilenames': opts.windowsfilenames,
+ 'ignoreerrors': opts.ignoreerrors,
+ 'force_generic_extractor': opts.force_generic_extractor,
+ 'allowed_extractors': opts.allowed_extractors or ['default'],
+ 'ratelimit': opts.ratelimit,
+ 'throttledratelimit': opts.throttledratelimit,
+ 'overwrites': opts.overwrites,
+ 'retries': opts.retries,
+ 'file_access_retries': opts.file_access_retries,
+ 'fragment_retries': opts.fragment_retries,
+ 'extractor_retries': opts.extractor_retries,
+ 'retry_sleep_functions': opts.retry_sleep,
+ 'skip_unavailable_fragments': opts.skip_unavailable_fragments,
+ 'keep_fragments': opts.keep_fragments,
+ 'concurrent_fragment_downloads': opts.concurrent_fragment_downloads,
+ 'buffersize': opts.buffersize,
+ 'noresizebuffer': opts.noresizebuffer,
+ 'http_chunk_size': opts.http_chunk_size,
+ 'continuedl': opts.continue_dl,
+ 'noprogress': opts.quiet if opts.noprogress is None else opts.noprogress,
+ 'progress_with_newline': opts.progress_with_newline,
+ 'progress_template': opts.progress_template,
+ 'playliststart': opts.playliststart,
+ 'playlistend': opts.playlistend,
+ 'playlistreverse': opts.playlist_reverse,
+ 'playlistrandom': opts.playlist_random,
+ 'lazy_playlist': opts.lazy_playlist,
+ 'noplaylist': opts.noplaylist,
+ 'logtostderr': opts.outtmpl.get('default') == '-',
+ 'consoletitle': opts.consoletitle,
+ 'nopart': opts.nopart,
+ 'updatetime': opts.updatetime,
+ 'writedescription': opts.writedescription,
+ 'writeannotations': opts.writeannotations,
+ 'writeinfojson': opts.writeinfojson,
+ 'allow_playlist_files': opts.allow_playlist_files,
+ 'clean_infojson': opts.clean_infojson,
+ 'getcomments': opts.getcomments,
+ 'writethumbnail': opts.writethumbnail is True,
+ 'write_all_thumbnails': opts.writethumbnail == 'all',
+ 'writelink': opts.writelink,
+ 'writeurllink': opts.writeurllink,
+ 'writewebloclink': opts.writewebloclink,
+ 'writedesktoplink': opts.writedesktoplink,
+ 'writesubtitles': opts.writesubtitles,
+ 'writeautomaticsub': opts.writeautomaticsub,
+ 'allsubtitles': opts.allsubtitles,
+ 'listsubtitles': opts.listsubtitles,
+ 'subtitlesformat': opts.subtitlesformat,
+ 'subtitleslangs': opts.subtitleslangs,
+ 'matchtitle': decodeOption(opts.matchtitle),
+ 'rejecttitle': decodeOption(opts.rejecttitle),
+ 'max_downloads': opts.max_downloads,
+ 'prefer_free_formats': opts.prefer_free_formats,
+ 'trim_file_name': opts.trim_file_name,
+ 'verbose': opts.verbose,
+ 'dump_intermediate_pages': opts.dump_intermediate_pages,
+ 'write_pages': opts.write_pages,
+ 'load_pages': opts.load_pages,
+ 'test': opts.test,
+ 'keepvideo': opts.keepvideo,
+ 'min_filesize': opts.min_filesize,
+ 'max_filesize': opts.max_filesize,
+ 'min_views': opts.min_views,
+ 'max_views': opts.max_views,
+ 'daterange': opts.date,
+ 'cachedir': opts.cachedir,
+ 'youtube_print_sig_code': opts.youtube_print_sig_code,
+ 'age_limit': opts.age_limit,
+ 'download_archive': opts.download_archive,
+ 'break_on_existing': opts.break_on_existing,
+ 'break_on_reject': opts.break_on_reject,
+ 'break_per_url': opts.break_per_url,
+ 'skip_playlist_after_errors': opts.skip_playlist_after_errors,
+ 'cookiefile': opts.cookiefile,
+ 'cookiesfrombrowser': opts.cookiesfrombrowser,
+ 'legacyserverconnect': opts.legacy_server_connect,
+ 'nocheckcertificate': opts.no_check_certificate,
+ 'prefer_insecure': opts.prefer_insecure,
+ 'enable_file_urls': opts.enable_file_urls,
+ 'http_headers': opts.headers,
+ 'proxy': opts.proxy,
+ 'socket_timeout': opts.socket_timeout,
+ 'bidi_workaround': opts.bidi_workaround,
+ 'debug_printtraffic': opts.debug_printtraffic,
+ 'prefer_ffmpeg': opts.prefer_ffmpeg,
+ 'include_ads': opts.include_ads,
+ 'default_search': opts.default_search,
+ 'dynamic_mpd': opts.dynamic_mpd,
+ 'extractor_args': opts.extractor_args,
+ 'youtube_include_dash_manifest': opts.youtube_include_dash_manifest,
+ 'youtube_include_hls_manifest': opts.youtube_include_hls_manifest,
+ 'encoding': opts.encoding,
+ 'extract_flat': opts.extract_flat,
+ 'live_from_start': opts.live_from_start,
+ 'wait_for_video': opts.wait_for_video,
+ 'mark_watched': opts.mark_watched,
+ 'merge_output_format': opts.merge_output_format,
+ 'final_ext': final_ext,
+ 'postprocessors': postprocessors,
+ 'fixup': opts.fixup,
+ 'source_address': opts.source_address,
+ 'call_home': opts.call_home,
+ 'sleep_interval_requests': opts.sleep_interval_requests,
+ 'sleep_interval': opts.sleep_interval,
+ 'max_sleep_interval': opts.max_sleep_interval,
+ 'sleep_interval_subtitles': opts.sleep_interval_subtitles,
+ 'external_downloader': opts.external_downloader,
+ 'download_ranges': opts.download_ranges,
+ 'force_keyframes_at_cuts': opts.force_keyframes_at_cuts,
+ 'list_thumbnails': opts.list_thumbnails,
+ 'playlist_items': opts.playlist_items,
+ 'xattr_set_filesize': opts.xattr_set_filesize,
+ 'match_filter': opts.match_filter,
+ 'color': opts.color,
+ 'ffmpeg_location': opts.ffmpeg_location,
+ 'hls_prefer_native': opts.hls_prefer_native,
+ 'hls_use_mpegts': opts.hls_use_mpegts,
+ 'hls_split_discontinuity': opts.hls_split_discontinuity,
+ 'external_downloader_args': opts.external_downloader_args,
+ 'postprocessor_args': opts.postprocessor_args,
+ 'cn_verification_proxy': opts.cn_verification_proxy,
+ 'geo_verification_proxy': opts.geo_verification_proxy,
+ 'geo_bypass': opts.geo_bypass,
+ 'geo_bypass_country': opts.geo_bypass_country,
+ 'geo_bypass_ip_block': opts.geo_bypass_ip_block,
+ '_warnings': warnings,
+ '_deprecation_warnings': deprecation_warnings,
+ 'compat_opts': opts.compat_opts,
+ })
+
+
+def _real_main(argv=None):
+ setproctitle('yt-dlp')
+
+ parser, opts, all_urls, ydl_opts = parse_options(argv)
+
+ # Dump user agent
+ if opts.dump_user_agent:
+ ua = traverse_obj(opts.headers, 'User-Agent', casesense=False, default=std_headers['User-Agent'])
+ write_string(f'{ua}\n', out=sys.stdout)
+ return
+
+ if print_extractor_information(opts, all_urls):
+ return
+
+ # We may need ffmpeg_location without having access to the YoutubeDL instance
+ # See https://github.com/yt-dlp/yt-dlp/issues/2191
+ if opts.ffmpeg_location:
+ FFmpegPostProcessor._ffmpeg_location.set(opts.ffmpeg_location)
+
+ with YoutubeDL(ydl_opts) as ydl:
+ pre_process = opts.update_self or opts.rm_cachedir
+ actual_use = all_urls or opts.load_info_filename
+
+ if opts.rm_cachedir:
+ ydl.cache.remove()
+
+ try:
+ updater = Updater(ydl, opts.update_self)
+ if opts.update_self and updater.update() and actual_use:
+ if updater.cmd:
+ return updater.restart()
+ # This code is reachable only for zip variant in py < 3.10
+ # It makes sense to exit here, but the old behavior is to continue
+ ydl.report_warning('Restart yt-dlp to use the updated version')
+ # return 100, 'ERROR: The program must exit for the update to complete'
+ except Exception:
+ traceback.print_exc()
+ ydl._download_retcode = 100
+
+ if not actual_use:
+ if pre_process:
+ return ydl._download_retcode
+
+ args = sys.argv[1:] if argv is None else argv
+ ydl.warn_if_short_id(args)
+
+ # Show a useful error message and wait for keypress if not launched from shell on Windows
+ if not args and compat_os_name == 'nt' and getattr(sys, 'frozen', False):
+ import ctypes.wintypes
+ import msvcrt
+
+ kernel32 = ctypes.WinDLL('Kernel32')
+
+ buffer = (1 * ctypes.wintypes.DWORD)()
+ attached_processes = kernel32.GetConsoleProcessList(buffer, 1)
+ # If we only have a single process attached, then the executable was double clicked
+ # When using `pyinstaller` with `--onefile`, two processes get attached
+ is_onefile = hasattr(sys, '_MEIPASS') and os.path.basename(sys._MEIPASS).startswith('_MEI')
+ if attached_processes == 1 or is_onefile and attached_processes == 2:
+ print(parser._generate_error_message(
+ 'Do not double-click the executable, instead call it from a command line.\n'
+ 'Please read the README for further information on how to use yt-dlp: '
+ 'https://github.com/yt-dlp/yt-dlp#readme'))
+ msvcrt.getch()
+ _exit(2)
+ parser.error(
+ 'You must provide at least one URL.\n'
+ 'Type yt-dlp --help to see a list of all options.')
+
+ parser.destroy()
+ try:
+ if opts.load_info_filename is not None:
+ if all_urls:
+ ydl.report_warning('URLs are ignored due to --load-info-json')
+ return ydl.download_with_info_file(expand_path(opts.load_info_filename))
+ else:
+ return ydl.download(all_urls)
+ except DownloadCancelled:
+ ydl.to_screen('Aborting remaining downloads')
+ return 101
+
+
+def main(argv=None):
+ global _IN_CLI
+ _IN_CLI = True
+ try:
+ _exit(*variadic(_real_main(argv)))
+ except DownloadError:
+ _exit(1)
+ except SameFileError as e:
+ _exit(f'ERROR: {e}')
+ except KeyboardInterrupt:
+ _exit('\nERROR: Interrupted by user')
+ except BrokenPipeError as e:
+ # https://docs.python.org/3/library/signal.html#note-on-sigpipe
+ devnull = os.open(os.devnull, os.O_WRONLY)
+ os.dup2(devnull, sys.stdout.fileno())
+ _exit(f'\nERROR: {e}')
+ except optparse.OptParseError as e:
+ _exit(2, f'\n{e}')
+
+
+from .extractor import gen_extractors, list_extractors
+
+__all__ = [
+ 'main',
+ 'YoutubeDL',
+ 'parse_options',
+ 'gen_extractors',
+ 'list_extractors',
+]
diff --git a/yt_dlp/__main__.py b/yt_dlp/__main__.py
new file mode 100644
index 0000000..06c3920
--- /dev/null
+++ b/yt_dlp/__main__.py
@@ -0,0 +1,17 @@
+#!/usr/bin/env python3
+
+# Execute with
+# $ python3 -m yt_dlp
+
+import sys
+
+if __package__ is None and not getattr(sys, 'frozen', False):
+ # direct call of __main__.py
+ import os.path
+ path = os.path.realpath(os.path.abspath(__file__))
+ sys.path.insert(0, os.path.dirname(os.path.dirname(path)))
+
+import yt_dlp
+
+if __name__ == '__main__':
+ yt_dlp.main()
diff --git a/yt_dlp/__pyinstaller/__init__.py b/yt_dlp/__pyinstaller/__init__.py
new file mode 100644
index 0000000..1c52aad
--- /dev/null
+++ b/yt_dlp/__pyinstaller/__init__.py
@@ -0,0 +1,5 @@
+import os
+
+
+def get_hook_dirs():
+ return [os.path.dirname(__file__)]
diff --git a/yt_dlp/__pyinstaller/hook-yt_dlp.py b/yt_dlp/__pyinstaller/hook-yt_dlp.py
new file mode 100644
index 0000000..7c3dbfb
--- /dev/null
+++ b/yt_dlp/__pyinstaller/hook-yt_dlp.py
@@ -0,0 +1,34 @@
+import sys
+
+from PyInstaller.utils.hooks import collect_submodules
+
+
+def pycryptodome_module():
+ try:
+ import Cryptodome # noqa: F401
+ except ImportError:
+ try:
+ import Crypto # noqa: F401
+ print('WARNING: Using Crypto since Cryptodome is not available. '
+ 'Install with: python3 -m pip install pycryptodomex', file=sys.stderr)
+ return 'Crypto'
+ except ImportError:
+ pass
+ return 'Cryptodome'
+
+
+def get_hidden_imports():
+ yield from ('yt_dlp.compat._legacy', 'yt_dlp.compat._deprecated')
+ yield from ('yt_dlp.utils._legacy', 'yt_dlp.utils._deprecated')
+ yield pycryptodome_module()
+ # Only `websockets` is required, others are collected just in case
+ for module in ('websockets', 'requests', 'urllib3'):
+ yield from collect_submodules(module)
+ # These are auto-detected, but explicitly add them just in case
+ yield from ('mutagen', 'brotli', 'certifi', 'secretstorage')
+
+
+hiddenimports = list(get_hidden_imports())
+print(f'Adding imports: {hiddenimports}')
+
+excludedimports = ['youtube_dl', 'youtube_dlc', 'test', 'ytdlp_plugins', 'devscripts', 'bundle']
diff --git a/yt_dlp/aes.py b/yt_dlp/aes.py
new file mode 100644
index 0000000..b3a383c
--- /dev/null
+++ b/yt_dlp/aes.py
@@ -0,0 +1,567 @@
+import base64
+from math import ceil
+
+from .compat import compat_ord
+from .dependencies import Cryptodome
+from .utils import bytes_to_intlist, intlist_to_bytes
+
+if Cryptodome.AES:
+ def aes_cbc_decrypt_bytes(data, key, iv):
+ """ Decrypt bytes with AES-CBC using pycryptodome """
+ return Cryptodome.AES.new(key, Cryptodome.AES.MODE_CBC, iv).decrypt(data)
+
+ def aes_gcm_decrypt_and_verify_bytes(data, key, tag, nonce):
+ """ Decrypt bytes with AES-GCM using pycryptodome """
+ return Cryptodome.AES.new(key, Cryptodome.AES.MODE_GCM, nonce).decrypt_and_verify(data, tag)
+
+else:
+ def aes_cbc_decrypt_bytes(data, key, iv):
+ """ Decrypt bytes with AES-CBC using native implementation since pycryptodome is unavailable """
+ return intlist_to_bytes(aes_cbc_decrypt(*map(bytes_to_intlist, (data, key, iv))))
+
+ def aes_gcm_decrypt_and_verify_bytes(data, key, tag, nonce):
+ """ Decrypt bytes with AES-GCM using native implementation since pycryptodome is unavailable """
+ return intlist_to_bytes(aes_gcm_decrypt_and_verify(*map(bytes_to_intlist, (data, key, tag, nonce))))
+
+
+def aes_cbc_encrypt_bytes(data, key, iv, **kwargs):
+ return intlist_to_bytes(aes_cbc_encrypt(*map(bytes_to_intlist, (data, key, iv)), **kwargs))
+
+
+BLOCK_SIZE_BYTES = 16
+
+
+def unpad_pkcs7(data):
+ return data[:-compat_ord(data[-1])]
+
+
+def pkcs7_padding(data):
+ """
+ PKCS#7 padding
+
+ @param {int[]} data cleartext
+ @returns {int[]} padding data
+ """
+
+ remaining_length = BLOCK_SIZE_BYTES - len(data) % BLOCK_SIZE_BYTES
+ return data + [remaining_length] * remaining_length
+
+
+def pad_block(block, padding_mode):
+ """
+ Pad a block with the given padding mode
+ @param {int[]} block block to pad
+ @param padding_mode padding mode
+ """
+ padding_size = BLOCK_SIZE_BYTES - len(block)
+
+ PADDING_BYTE = {
+ 'pkcs7': padding_size,
+ 'iso7816': 0x0,
+ 'whitespace': 0x20,
+ 'zero': 0x0,
+ }
+
+ if padding_size < 0:
+ raise ValueError('Block size exceeded')
+ elif padding_mode not in PADDING_BYTE:
+ raise NotImplementedError(f'Padding mode {padding_mode} is not implemented')
+
+ if padding_mode == 'iso7816' and padding_size:
+ block = block + [0x80] # NB: += mutates list
+ padding_size -= 1
+
+ return block + [PADDING_BYTE[padding_mode]] * padding_size
+
+
+def aes_ecb_encrypt(data, key, iv=None):
+ """
+ Encrypt with aes in ECB mode. Using PKCS#7 padding
+
+ @param {int[]} data cleartext
+ @param {int[]} key 16/24/32-Byte cipher key
+ @param {int[]} iv Unused for this mode
+ @returns {int[]} encrypted data
+ """
+ expanded_key = key_expansion(key)
+ block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
+
+ encrypted_data = []
+ for i in range(block_count):
+ block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
+ encrypted_data += aes_encrypt(pkcs7_padding(block), expanded_key)
+
+ return encrypted_data
+
+
+def aes_ecb_decrypt(data, key, iv=None):
+ """
+ Decrypt with aes in ECB mode
+
+ @param {int[]} data cleartext
+ @param {int[]} key 16/24/32-Byte cipher key
+ @param {int[]} iv Unused for this mode
+ @returns {int[]} decrypted data
+ """
+ expanded_key = key_expansion(key)
+ block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
+
+ encrypted_data = []
+ for i in range(block_count):
+ block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
+ encrypted_data += aes_decrypt(block, expanded_key)
+ encrypted_data = encrypted_data[:len(data)]
+
+ return encrypted_data
+
+
+def aes_ctr_decrypt(data, key, iv):
+ """
+ Decrypt with aes in counter mode
+
+ @param {int[]} data cipher
+ @param {int[]} key 16/24/32-Byte cipher key
+ @param {int[]} iv 16-Byte initialization vector
+ @returns {int[]} decrypted data
+ """
+ return aes_ctr_encrypt(data, key, iv)
+
+
+def aes_ctr_encrypt(data, key, iv):
+ """
+ Encrypt with aes in counter mode
+
+ @param {int[]} data cleartext
+ @param {int[]} key 16/24/32-Byte cipher key
+ @param {int[]} iv 16-Byte initialization vector
+ @returns {int[]} encrypted data
+ """
+ expanded_key = key_expansion(key)
+ block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
+ counter = iter_vector(iv)
+
+ encrypted_data = []
+ for i in range(block_count):
+ counter_block = next(counter)
+ block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
+ block += [0] * (BLOCK_SIZE_BYTES - len(block))
+
+ cipher_counter_block = aes_encrypt(counter_block, expanded_key)
+ encrypted_data += xor(block, cipher_counter_block)
+ encrypted_data = encrypted_data[:len(data)]
+
+ return encrypted_data
+
+
+def aes_cbc_decrypt(data, key, iv):
+ """
+ Decrypt with aes in CBC mode
+
+ @param {int[]} data cipher
+ @param {int[]} key 16/24/32-Byte cipher key
+ @param {int[]} iv 16-Byte IV
+ @returns {int[]} decrypted data
+ """
+ expanded_key = key_expansion(key)
+ block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
+
+ decrypted_data = []
+ previous_cipher_block = iv
+ for i in range(block_count):
+ block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
+ block += [0] * (BLOCK_SIZE_BYTES - len(block))
+
+ decrypted_block = aes_decrypt(block, expanded_key)
+ decrypted_data += xor(decrypted_block, previous_cipher_block)
+ previous_cipher_block = block
+ decrypted_data = decrypted_data[:len(data)]
+
+ return decrypted_data
+
+
+def aes_cbc_encrypt(data, key, iv, *, padding_mode='pkcs7'):
+ """
+ Encrypt with aes in CBC mode
+
+ @param {int[]} data cleartext
+ @param {int[]} key 16/24/32-Byte cipher key
+ @param {int[]} iv 16-Byte IV
+ @param padding_mode Padding mode to use
+ @returns {int[]} encrypted data
+ """
+ expanded_key = key_expansion(key)
+ block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
+
+ encrypted_data = []
+ previous_cipher_block = iv
+ for i in range(block_count):
+ block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
+ block = pad_block(block, padding_mode)
+
+ mixed_block = xor(block, previous_cipher_block)
+
+ encrypted_block = aes_encrypt(mixed_block, expanded_key)
+ encrypted_data += encrypted_block
+
+ previous_cipher_block = encrypted_block
+
+ return encrypted_data
+
+
+def aes_gcm_decrypt_and_verify(data, key, tag, nonce):
+ """
+ Decrypt with aes in GBM mode and checks authenticity using tag
+
+ @param {int[]} data cipher
+ @param {int[]} key 16-Byte cipher key
+ @param {int[]} tag authentication tag
+ @param {int[]} nonce IV (recommended 12-Byte)
+ @returns {int[]} decrypted data
+ """
+
+ # XXX: check aes, gcm param
+
+ hash_subkey = aes_encrypt([0] * BLOCK_SIZE_BYTES, key_expansion(key))
+
+ if len(nonce) == 12:
+ j0 = nonce + [0, 0, 0, 1]
+ else:
+ fill = (BLOCK_SIZE_BYTES - (len(nonce) % BLOCK_SIZE_BYTES)) % BLOCK_SIZE_BYTES + 8
+ ghash_in = nonce + [0] * fill + bytes_to_intlist((8 * len(nonce)).to_bytes(8, 'big'))
+ j0 = ghash(hash_subkey, ghash_in)
+
+ # TODO: add nonce support to aes_ctr_decrypt
+
+ # nonce_ctr = j0[:12]
+ iv_ctr = inc(j0)
+
+ decrypted_data = aes_ctr_decrypt(data, key, iv_ctr + [0] * (BLOCK_SIZE_BYTES - len(iv_ctr)))
+ pad_len = len(data) // 16 * 16
+ s_tag = ghash(
+ hash_subkey,
+ data
+ + [0] * (BLOCK_SIZE_BYTES - len(data) + pad_len) # pad
+ + bytes_to_intlist((0 * 8).to_bytes(8, 'big') # length of associated data
+ + ((len(data) * 8).to_bytes(8, 'big'))) # length of data
+ )
+
+ if tag != aes_ctr_encrypt(s_tag, key, j0):
+ raise ValueError("Mismatching authentication tag")
+
+ return decrypted_data
+
+
+def aes_encrypt(data, expanded_key):
+ """
+ Encrypt one block with aes
+
+ @param {int[]} data 16-Byte state
+ @param {int[]} expanded_key 176/208/240-Byte expanded key
+ @returns {int[]} 16-Byte cipher
+ """
+ rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1
+
+ data = xor(data, expanded_key[:BLOCK_SIZE_BYTES])
+ for i in range(1, rounds + 1):
+ data = sub_bytes(data)
+ data = shift_rows(data)
+ if i != rounds:
+ data = list(iter_mix_columns(data, MIX_COLUMN_MATRIX))
+ data = xor(data, expanded_key[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES])
+
+ return data
+
+
+def aes_decrypt(data, expanded_key):
+ """
+ Decrypt one block with aes
+
+ @param {int[]} data 16-Byte cipher
+ @param {int[]} expanded_key 176/208/240-Byte expanded key
+ @returns {int[]} 16-Byte state
+ """
+ rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1
+
+ for i in range(rounds, 0, -1):
+ data = xor(data, expanded_key[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES])
+ if i != rounds:
+ data = list(iter_mix_columns(data, MIX_COLUMN_MATRIX_INV))
+ data = shift_rows_inv(data)
+ data = sub_bytes_inv(data)
+ data = xor(data, expanded_key[:BLOCK_SIZE_BYTES])
+
+ return data
+
+
+def aes_decrypt_text(data, password, key_size_bytes):
+ """
+ Decrypt text
+ - The first 8 Bytes of decoded 'data' are the 8 high Bytes of the counter
+ - The cipher key is retrieved by encrypting the first 16 Byte of 'password'
+ with the first 'key_size_bytes' Bytes from 'password' (if necessary filled with 0's)
+ - Mode of operation is 'counter'
+
+ @param {str} data Base64 encoded string
+ @param {str,unicode} password Password (will be encoded with utf-8)
+ @param {int} key_size_bytes Possible values: 16 for 128-Bit, 24 for 192-Bit or 32 for 256-Bit
+ @returns {str} Decrypted data
+ """
+ NONCE_LENGTH_BYTES = 8
+
+ data = bytes_to_intlist(base64.b64decode(data))
+ password = bytes_to_intlist(password.encode())
+
+ key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password))
+ key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES)
+
+ nonce = data[:NONCE_LENGTH_BYTES]
+ cipher = data[NONCE_LENGTH_BYTES:]
+
+ decrypted_data = aes_ctr_decrypt(cipher, key, nonce + [0] * (BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES))
+ plaintext = intlist_to_bytes(decrypted_data)
+
+ return plaintext
+
+
+RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36)
+SBOX = (0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
+ 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0,
+ 0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15,
+ 0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75,
+ 0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84,
+ 0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF,
+ 0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8,
+ 0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2,
+ 0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73,
+ 0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB,
+ 0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79,
+ 0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08,
+ 0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A,
+ 0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E,
+ 0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
+ 0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16)
+SBOX_INV = (0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
+ 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
+ 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
+ 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
+ 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
+ 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
+ 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
+ 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
+ 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
+ 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
+ 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
+ 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
+ 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
+ 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
+ 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
+ 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d)
+MIX_COLUMN_MATRIX = ((0x2, 0x3, 0x1, 0x1),
+ (0x1, 0x2, 0x3, 0x1),
+ (0x1, 0x1, 0x2, 0x3),
+ (0x3, 0x1, 0x1, 0x2))
+MIX_COLUMN_MATRIX_INV = ((0xE, 0xB, 0xD, 0x9),
+ (0x9, 0xE, 0xB, 0xD),
+ (0xD, 0x9, 0xE, 0xB),
+ (0xB, 0xD, 0x9, 0xE))
+RIJNDAEL_EXP_TABLE = (0x01, 0x03, 0x05, 0x0F, 0x11, 0x33, 0x55, 0xFF, 0x1A, 0x2E, 0x72, 0x96, 0xA1, 0xF8, 0x13, 0x35,
+ 0x5F, 0xE1, 0x38, 0x48, 0xD8, 0x73, 0x95, 0xA4, 0xF7, 0x02, 0x06, 0x0A, 0x1E, 0x22, 0x66, 0xAA,
+ 0xE5, 0x34, 0x5C, 0xE4, 0x37, 0x59, 0xEB, 0x26, 0x6A, 0xBE, 0xD9, 0x70, 0x90, 0xAB, 0xE6, 0x31,
+ 0x53, 0xF5, 0x04, 0x0C, 0x14, 0x3C, 0x44, 0xCC, 0x4F, 0xD1, 0x68, 0xB8, 0xD3, 0x6E, 0xB2, 0xCD,
+ 0x4C, 0xD4, 0x67, 0xA9, 0xE0, 0x3B, 0x4D, 0xD7, 0x62, 0xA6, 0xF1, 0x08, 0x18, 0x28, 0x78, 0x88,
+ 0x83, 0x9E, 0xB9, 0xD0, 0x6B, 0xBD, 0xDC, 0x7F, 0x81, 0x98, 0xB3, 0xCE, 0x49, 0xDB, 0x76, 0x9A,
+ 0xB5, 0xC4, 0x57, 0xF9, 0x10, 0x30, 0x50, 0xF0, 0x0B, 0x1D, 0x27, 0x69, 0xBB, 0xD6, 0x61, 0xA3,
+ 0xFE, 0x19, 0x2B, 0x7D, 0x87, 0x92, 0xAD, 0xEC, 0x2F, 0x71, 0x93, 0xAE, 0xE9, 0x20, 0x60, 0xA0,
+ 0xFB, 0x16, 0x3A, 0x4E, 0xD2, 0x6D, 0xB7, 0xC2, 0x5D, 0xE7, 0x32, 0x56, 0xFA, 0x15, 0x3F, 0x41,
+ 0xC3, 0x5E, 0xE2, 0x3D, 0x47, 0xC9, 0x40, 0xC0, 0x5B, 0xED, 0x2C, 0x74, 0x9C, 0xBF, 0xDA, 0x75,
+ 0x9F, 0xBA, 0xD5, 0x64, 0xAC, 0xEF, 0x2A, 0x7E, 0x82, 0x9D, 0xBC, 0xDF, 0x7A, 0x8E, 0x89, 0x80,
+ 0x9B, 0xB6, 0xC1, 0x58, 0xE8, 0x23, 0x65, 0xAF, 0xEA, 0x25, 0x6F, 0xB1, 0xC8, 0x43, 0xC5, 0x54,
+ 0xFC, 0x1F, 0x21, 0x63, 0xA5, 0xF4, 0x07, 0x09, 0x1B, 0x2D, 0x77, 0x99, 0xB0, 0xCB, 0x46, 0xCA,
+ 0x45, 0xCF, 0x4A, 0xDE, 0x79, 0x8B, 0x86, 0x91, 0xA8, 0xE3, 0x3E, 0x42, 0xC6, 0x51, 0xF3, 0x0E,
+ 0x12, 0x36, 0x5A, 0xEE, 0x29, 0x7B, 0x8D, 0x8C, 0x8F, 0x8A, 0x85, 0x94, 0xA7, 0xF2, 0x0D, 0x17,
+ 0x39, 0x4B, 0xDD, 0x7C, 0x84, 0x97, 0xA2, 0xFD, 0x1C, 0x24, 0x6C, 0xB4, 0xC7, 0x52, 0xF6, 0x01)
+RIJNDAEL_LOG_TABLE = (0x00, 0x00, 0x19, 0x01, 0x32, 0x02, 0x1a, 0xc6, 0x4b, 0xc7, 0x1b, 0x68, 0x33, 0xee, 0xdf, 0x03,
+ 0x64, 0x04, 0xe0, 0x0e, 0x34, 0x8d, 0x81, 0xef, 0x4c, 0x71, 0x08, 0xc8, 0xf8, 0x69, 0x1c, 0xc1,
+ 0x7d, 0xc2, 0x1d, 0xb5, 0xf9, 0xb9, 0x27, 0x6a, 0x4d, 0xe4, 0xa6, 0x72, 0x9a, 0xc9, 0x09, 0x78,
+ 0x65, 0x2f, 0x8a, 0x05, 0x21, 0x0f, 0xe1, 0x24, 0x12, 0xf0, 0x82, 0x45, 0x35, 0x93, 0xda, 0x8e,
+ 0x96, 0x8f, 0xdb, 0xbd, 0x36, 0xd0, 0xce, 0x94, 0x13, 0x5c, 0xd2, 0xf1, 0x40, 0x46, 0x83, 0x38,
+ 0x66, 0xdd, 0xfd, 0x30, 0xbf, 0x06, 0x8b, 0x62, 0xb3, 0x25, 0xe2, 0x98, 0x22, 0x88, 0x91, 0x10,
+ 0x7e, 0x6e, 0x48, 0xc3, 0xa3, 0xb6, 0x1e, 0x42, 0x3a, 0x6b, 0x28, 0x54, 0xfa, 0x85, 0x3d, 0xba,
+ 0x2b, 0x79, 0x0a, 0x15, 0x9b, 0x9f, 0x5e, 0xca, 0x4e, 0xd4, 0xac, 0xe5, 0xf3, 0x73, 0xa7, 0x57,
+ 0xaf, 0x58, 0xa8, 0x50, 0xf4, 0xea, 0xd6, 0x74, 0x4f, 0xae, 0xe9, 0xd5, 0xe7, 0xe6, 0xad, 0xe8,
+ 0x2c, 0xd7, 0x75, 0x7a, 0xeb, 0x16, 0x0b, 0xf5, 0x59, 0xcb, 0x5f, 0xb0, 0x9c, 0xa9, 0x51, 0xa0,
+ 0x7f, 0x0c, 0xf6, 0x6f, 0x17, 0xc4, 0x49, 0xec, 0xd8, 0x43, 0x1f, 0x2d, 0xa4, 0x76, 0x7b, 0xb7,
+ 0xcc, 0xbb, 0x3e, 0x5a, 0xfb, 0x60, 0xb1, 0x86, 0x3b, 0x52, 0xa1, 0x6c, 0xaa, 0x55, 0x29, 0x9d,
+ 0x97, 0xb2, 0x87, 0x90, 0x61, 0xbe, 0xdc, 0xfc, 0xbc, 0x95, 0xcf, 0xcd, 0x37, 0x3f, 0x5b, 0xd1,
+ 0x53, 0x39, 0x84, 0x3c, 0x41, 0xa2, 0x6d, 0x47, 0x14, 0x2a, 0x9e, 0x5d, 0x56, 0xf2, 0xd3, 0xab,
+ 0x44, 0x11, 0x92, 0xd9, 0x23, 0x20, 0x2e, 0x89, 0xb4, 0x7c, 0xb8, 0x26, 0x77, 0x99, 0xe3, 0xa5,
+ 0x67, 0x4a, 0xed, 0xde, 0xc5, 0x31, 0xfe, 0x18, 0x0d, 0x63, 0x8c, 0x80, 0xc0, 0xf7, 0x70, 0x07)
+
+
+def key_expansion(data):
+ """
+ Generate key schedule
+
+ @param {int[]} data 16/24/32-Byte cipher key
+ @returns {int[]} 176/208/240-Byte expanded key
+ """
+ data = data[:] # copy
+ rcon_iteration = 1
+ key_size_bytes = len(data)
+ expanded_key_size_bytes = (key_size_bytes // 4 + 7) * BLOCK_SIZE_BYTES
+
+ while len(data) < expanded_key_size_bytes:
+ temp = data[-4:]
+ temp = key_schedule_core(temp, rcon_iteration)
+ rcon_iteration += 1
+ data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
+
+ for _ in range(3):
+ temp = data[-4:]
+ data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
+
+ if key_size_bytes == 32:
+ temp = data[-4:]
+ temp = sub_bytes(temp)
+ data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
+
+ for _ in range(3 if key_size_bytes == 32 else 2 if key_size_bytes == 24 else 0):
+ temp = data[-4:]
+ data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
+ data = data[:expanded_key_size_bytes]
+
+ return data
+
+
+def iter_vector(iv):
+ while True:
+ yield iv
+ iv = inc(iv)
+
+
+def sub_bytes(data):
+ return [SBOX[x] for x in data]
+
+
+def sub_bytes_inv(data):
+ return [SBOX_INV[x] for x in data]
+
+
+def rotate(data):
+ return data[1:] + [data[0]]
+
+
+def key_schedule_core(data, rcon_iteration):
+ data = rotate(data)
+ data = sub_bytes(data)
+ data[0] = data[0] ^ RCON[rcon_iteration]
+
+ return data
+
+
+def xor(data1, data2):
+ return [x ^ y for x, y in zip(data1, data2)]
+
+
+def iter_mix_columns(data, matrix):
+ for i in (0, 4, 8, 12):
+ for row in matrix:
+ mixed = 0
+ for j in range(4):
+ # xor is (+) and (-)
+ mixed ^= (0 if data[i:i + 4][j] == 0 or row[j] == 0 else
+ RIJNDAEL_EXP_TABLE[(RIJNDAEL_LOG_TABLE[data[i + j]] + RIJNDAEL_LOG_TABLE[row[j]]) % 0xFF])
+ yield mixed
+
+
+def shift_rows(data):
+ return [data[((column + row) & 0b11) * 4 + row] for column in range(4) for row in range(4)]
+
+
+def shift_rows_inv(data):
+ return [data[((column - row) & 0b11) * 4 + row] for column in range(4) for row in range(4)]
+
+
+def shift_block(data):
+ data_shifted = []
+
+ bit = 0
+ for n in data:
+ if bit:
+ n |= 0x100
+ bit = n & 1
+ n >>= 1
+ data_shifted.append(n)
+
+ return data_shifted
+
+
+def inc(data):
+ data = data[:] # copy
+ for i in range(len(data) - 1, -1, -1):
+ if data[i] == 255:
+ data[i] = 0
+ else:
+ data[i] = data[i] + 1
+ break
+ return data
+
+
+def block_product(block_x, block_y):
+ # NIST SP 800-38D, Algorithm 1
+
+ if len(block_x) != BLOCK_SIZE_BYTES or len(block_y) != BLOCK_SIZE_BYTES:
+ raise ValueError("Length of blocks need to be %d bytes" % BLOCK_SIZE_BYTES)
+
+ block_r = [0xE1] + [0] * (BLOCK_SIZE_BYTES - 1)
+ block_v = block_y[:]
+ block_z = [0] * BLOCK_SIZE_BYTES
+
+ for i in block_x:
+ for bit in range(7, -1, -1):
+ if i & (1 << bit):
+ block_z = xor(block_z, block_v)
+
+ do_xor = block_v[-1] & 1
+ block_v = shift_block(block_v)
+ if do_xor:
+ block_v = xor(block_v, block_r)
+
+ return block_z
+
+
+def ghash(subkey, data):
+ # NIST SP 800-38D, Algorithm 2
+
+ if len(data) % BLOCK_SIZE_BYTES:
+ raise ValueError("Length of data should be %d bytes" % BLOCK_SIZE_BYTES)
+
+ last_y = [0] * BLOCK_SIZE_BYTES
+ for i in range(0, len(data), BLOCK_SIZE_BYTES):
+ block = data[i: i + BLOCK_SIZE_BYTES]
+ last_y = block_product(xor(last_y, block), subkey)
+
+ return last_y
+
+
+__all__ = [
+ 'aes_cbc_decrypt',
+ 'aes_cbc_decrypt_bytes',
+ 'aes_ctr_decrypt',
+ 'aes_decrypt_text',
+ 'aes_decrypt',
+ 'aes_ecb_decrypt',
+ 'aes_gcm_decrypt_and_verify',
+ 'aes_gcm_decrypt_and_verify_bytes',
+
+ 'aes_cbc_encrypt',
+ 'aes_cbc_encrypt_bytes',
+ 'aes_ctr_encrypt',
+ 'aes_ecb_encrypt',
+ 'aes_encrypt',
+
+ 'key_expansion',
+ 'pad_block',
+ 'pkcs7_padding',
+ 'unpad_pkcs7',
+]
diff --git a/yt_dlp/cache.py b/yt_dlp/cache.py
new file mode 100644
index 0000000..9dd4f2f
--- /dev/null
+++ b/yt_dlp/cache.py
@@ -0,0 +1,91 @@
+import contextlib
+import json
+import os
+import re
+import shutil
+import traceback
+import urllib.parse
+
+from .utils import expand_path, traverse_obj, version_tuple, write_json_file
+from .version import __version__
+
+
+class Cache:
+ def __init__(self, ydl):
+ self._ydl = ydl
+
+ def _get_root_dir(self):
+ res = self._ydl.params.get('cachedir')
+ if res is None:
+ cache_root = os.getenv('XDG_CACHE_HOME', '~/.cache')
+ res = os.path.join(cache_root, 'yt-dlp')
+ return expand_path(res)
+
+ def _get_cache_fn(self, section, key, dtype):
+ assert re.match(r'^[\w.-]+$', section), f'invalid section {section!r}'
+ key = urllib.parse.quote(key, safe='').replace('%', ',') # encode non-ascii characters
+ return os.path.join(self._get_root_dir(), section, f'{key}.{dtype}')
+
+ @property
+ def enabled(self):
+ return self._ydl.params.get('cachedir') is not False
+
+ def store(self, section, key, data, dtype='json'):
+ assert dtype in ('json',)
+
+ if not self.enabled:
+ return
+
+ fn = self._get_cache_fn(section, key, dtype)
+ try:
+ os.makedirs(os.path.dirname(fn), exist_ok=True)
+ self._ydl.write_debug(f'Saving {section}.{key} to cache')
+ write_json_file({'yt-dlp_version': __version__, 'data': data}, fn)
+ except Exception:
+ tb = traceback.format_exc()
+ self._ydl.report_warning(f'Writing cache to {fn!r} failed: {tb}')
+
+ def _validate(self, data, min_ver):
+ version = traverse_obj(data, 'yt-dlp_version')
+ if not version: # Backward compatibility
+ data, version = {'data': data}, '2022.08.19'
+ if not min_ver or version_tuple(version) >= version_tuple(min_ver):
+ return data['data']
+ self._ydl.write_debug(f'Discarding old cache from version {version} (needs {min_ver})')
+
+ def load(self, section, key, dtype='json', default=None, *, min_ver=None):
+ assert dtype in ('json',)
+
+ if not self.enabled:
+ return default
+
+ cache_fn = self._get_cache_fn(section, key, dtype)
+ with contextlib.suppress(OSError):
+ try:
+ with open(cache_fn, encoding='utf-8') as cachef:
+ self._ydl.write_debug(f'Loading {section}.{key} from cache')
+ return self._validate(json.load(cachef), min_ver)
+ except (ValueError, KeyError):
+ try:
+ file_size = os.path.getsize(cache_fn)
+ except OSError as oe:
+ file_size = str(oe)
+ self._ydl.report_warning(f'Cache retrieval from {cache_fn} failed ({file_size})')
+
+ return default
+
+ def remove(self):
+ if not self.enabled:
+ self._ydl.to_screen('Cache is disabled (Did you combine --no-cache-dir and --rm-cache-dir?)')
+ return
+
+ cachedir = self._get_root_dir()
+ if not any((term in cachedir) for term in ('cache', 'tmp')):
+ raise Exception('Not removing directory %s - this does not look like a cache dir' % cachedir)
+
+ self._ydl.to_screen(
+ 'Removing cache dir %s .' % cachedir, skip_eol=True)
+ if os.path.exists(cachedir):
+ self._ydl.to_screen('.', skip_eol=True)
+ shutil.rmtree(cachedir)
+ self._ydl.to_screen('.')
diff --git a/yt_dlp/compat/__init__.py b/yt_dlp/compat/__init__.py
new file mode 100644
index 0000000..5ad5c70
--- /dev/null
+++ b/yt_dlp/compat/__init__.py
@@ -0,0 +1,79 @@
+import os
+import sys
+import xml.etree.ElementTree as etree
+
+from .compat_utils import passthrough_module
+
+passthrough_module(__name__, '._deprecated')
+del passthrough_module
+
+
+# HTMLParseError has been deprecated in Python 3.3 and removed in
+# Python 3.5. Introducing dummy exception for Python >3.5 for compatible
+# and uniform cross-version exception handling
+class compat_HTMLParseError(ValueError):
+ pass
+
+
+class _TreeBuilder(etree.TreeBuilder):
+ def doctype(self, name, pubid, system):
+ pass
+
+
+def compat_etree_fromstring(text):
+ return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
+
+
+compat_os_name = os._name if os.name == 'java' else os.name
+
+
+if compat_os_name == 'nt':
+ def compat_shlex_quote(s):
+ import re
+ return s if re.match(r'^[-_\w./]+$', s) else s.replace('"', '""').join('""')
+else:
+ from shlex import quote as compat_shlex_quote # noqa: F401
+
+
+def compat_ord(c):
+ return c if isinstance(c, int) else ord(c)
+
+
+if compat_os_name == 'nt' and sys.version_info < (3, 8):
+ # os.path.realpath on Windows does not follow symbolic links
+ # prior to Python 3.8 (see https://bugs.python.org/issue9949)
+ def compat_realpath(path):
+ while os.path.islink(path):
+ path = os.path.abspath(os.readlink(path))
+ return os.path.realpath(path)
+else:
+ compat_realpath = os.path.realpath
+
+
+# Python 3.8+ does not honor %HOME% on windows, but this breaks compatibility with youtube-dl
+# See https://github.com/yt-dlp/yt-dlp/issues/792
+# https://docs.python.org/3/library/os.path.html#os.path.expanduser
+if compat_os_name in ('nt', 'ce'):
+ def compat_expanduser(path):
+ HOME = os.environ.get('HOME')
+ if not HOME:
+ return os.path.expanduser(path)
+ elif not path.startswith('~'):
+ return path
+ i = path.replace('\\', '/', 1).find('/') # ~user
+ if i < 0:
+ i = len(path)
+ userhome = os.path.join(os.path.dirname(HOME), path[1:i]) if i > 1 else HOME
+ return userhome + path[i:]
+else:
+ compat_expanduser = os.path.expanduser
+
+
+def urllib_req_to_req(urllib_request):
+ """Convert urllib Request to a networking Request"""
+ from ..networking import Request
+ from ..utils.networking import HTTPHeaderDict
+ return Request(
+ urllib_request.get_full_url(), data=urllib_request.data, method=urllib_request.get_method(),
+ headers=HTTPHeaderDict(urllib_request.headers, urllib_request.unredirected_hdrs),
+ extensions={'timeout': urllib_request.timeout} if hasattr(urllib_request, 'timeout') else None)
diff --git a/yt_dlp/compat/_deprecated.py b/yt_dlp/compat/_deprecated.py
new file mode 100644
index 0000000..607bae9
--- /dev/null
+++ b/yt_dlp/compat/_deprecated.py
@@ -0,0 +1,23 @@
+"""Deprecated - New code should avoid these"""
+import warnings
+
+from .compat_utils import passthrough_module
+
+# XXX: Implement this the same way as other DeprecationWarnings without circular import
+passthrough_module(__name__, '.._legacy', callback=lambda attr: warnings.warn(
+ DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=6))
+del passthrough_module
+
+import base64
+import urllib.error
+import urllib.parse
+
+compat_str = str
+
+compat_b64decode = base64.b64decode
+
+compat_urlparse = urllib.parse
+compat_parse_qs = urllib.parse.parse_qs
+compat_urllib_parse_unquote = urllib.parse.unquote
+compat_urllib_parse_urlencode = urllib.parse.urlencode
+compat_urllib_parse_urlparse = urllib.parse.urlparse
diff --git a/yt_dlp/compat/_legacy.py b/yt_dlp/compat/_legacy.py
new file mode 100644
index 0000000..7ea5d08
--- /dev/null
+++ b/yt_dlp/compat/_legacy.py
@@ -0,0 +1,108 @@
+""" Do not use! """
+
+import base64
+import collections
+import ctypes
+import getpass
+import html.entities
+import html.parser
+import http.client
+import http.cookiejar
+import http.cookies
+import http.server
+import itertools
+import os
+import shlex
+import shutil
+import socket
+import struct
+import subprocess
+import tokenize
+import urllib.error
+import urllib.parse
+import urllib.request
+import xml.etree.ElementTree as etree
+
+# isort: split
+import asyncio # noqa: F401
+import re # noqa: F401
+from asyncio import run as compat_asyncio_run # noqa: F401
+from re import Pattern as compat_Pattern # noqa: F401
+from re import match as compat_Match # noqa: F401
+
+from . import compat_expanduser, compat_HTMLParseError, compat_realpath
+from .compat_utils import passthrough_module
+from ..dependencies import brotli as compat_brotli # noqa: F401
+from ..dependencies import websockets as compat_websockets # noqa: F401
+from ..dependencies.Cryptodome import AES as compat_pycrypto_AES # noqa: F401
+from ..networking.exceptions import HTTPError as compat_HTTPError # noqa: F401
+
+passthrough_module(__name__, '...utils', ('WINDOWS_VT_MODE', 'windows_enable_vt_mode'))
+
+
+# compat_ctypes_WINFUNCTYPE = ctypes.WINFUNCTYPE
+# will not work since ctypes.WINFUNCTYPE does not exist in UNIX machines
+def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
+ return ctypes.WINFUNCTYPE(*args, **kwargs)
+
+
+def compat_setenv(key, value, env=os.environ):
+ env[key] = value
+
+
+compat_base64_b64decode = base64.b64decode
+compat_basestring = str
+compat_casefold = str.casefold
+compat_chr = chr
+compat_collections_abc = collections.abc
+compat_cookiejar = compat_http_cookiejar = http.cookiejar
+compat_cookiejar_Cookie = compat_http_cookiejar_Cookie = http.cookiejar.Cookie
+compat_cookies = compat_http_cookies = http.cookies
+compat_cookies_SimpleCookie = compat_http_cookies_SimpleCookie = http.cookies.SimpleCookie
+compat_etree_Element = compat_xml_etree_ElementTree_Element = etree.Element
+compat_etree_register_namespace = compat_xml_etree_register_namespace = etree.register_namespace
+compat_filter = filter
+compat_get_terminal_size = shutil.get_terminal_size
+compat_getenv = os.getenv
+compat_getpass = compat_getpass_getpass = getpass.getpass
+compat_html_entities = html.entities
+compat_html_entities_html5 = html.entities.html5
+compat_html_parser_HTMLParseError = compat_HTMLParseError
+compat_HTMLParser = compat_html_parser_HTMLParser = html.parser.HTMLParser
+compat_http_client = http.client
+compat_http_server = http.server
+compat_input = input
+compat_integer_types = (int, )
+compat_itertools_count = itertools.count
+compat_kwargs = lambda kwargs: kwargs
+compat_map = map
+compat_numeric_types = (int, float, complex)
+compat_os_path_expanduser = compat_expanduser
+compat_os_path_realpath = compat_realpath
+compat_print = print
+compat_shlex_split = shlex.split
+compat_socket_create_connection = socket.create_connection
+compat_Struct = struct.Struct
+compat_struct_pack = struct.pack
+compat_struct_unpack = struct.unpack
+compat_subprocess_get_DEVNULL = lambda: subprocess.DEVNULL
+compat_tokenize_tokenize = tokenize.tokenize
+compat_urllib_error = urllib.error
+compat_urllib_HTTPError = compat_HTTPError
+compat_urllib_parse = urllib.parse
+compat_urllib_parse_parse_qs = urllib.parse.parse_qs
+compat_urllib_parse_quote = urllib.parse.quote
+compat_urllib_parse_quote_plus = urllib.parse.quote_plus
+compat_urllib_parse_unquote_plus = urllib.parse.unquote_plus
+compat_urllib_parse_unquote_to_bytes = urllib.parse.unquote_to_bytes
+compat_urllib_parse_urlunparse = urllib.parse.urlunparse
+compat_urllib_request = urllib.request
+compat_urllib_request_DataHandler = urllib.request.DataHandler
+compat_urllib_response = urllib.response
+compat_urlretrieve = compat_urllib_request_urlretrieve = urllib.request.urlretrieve
+compat_xml_parse_error = compat_xml_etree_ElementTree_ParseError = etree.ParseError
+compat_xpath = lambda xpath: xpath
+compat_zip = zip
+workaround_optparse_bug9161 = lambda: None
+
+legacy = []
diff --git a/yt_dlp/compat/compat_utils.py b/yt_dlp/compat/compat_utils.py
new file mode 100644
index 0000000..d62b7d0
--- /dev/null
+++ b/yt_dlp/compat/compat_utils.py
@@ -0,0 +1,83 @@
+import collections
+import contextlib
+import functools
+import importlib
+import sys
+import types
+
+_NO_ATTRIBUTE = object()
+
+_Package = collections.namedtuple('Package', ('name', 'version'))
+
+
+def get_package_info(module):
+ return _Package(
+ name=getattr(module, '_yt_dlp__identifier', module.__name__),
+ version=str(next(filter(None, (
+ getattr(module, attr, None)
+ for attr in ('_yt_dlp__version', '__version__', 'version_string', 'version')
+ )), None)))
+
+
+def _is_package(module):
+ return '__path__' in vars(module)
+
+
+def _is_dunder(name):
+ return name.startswith('__') and name.endswith('__')
+
+
+class EnhancedModule(types.ModuleType):
+ def __bool__(self):
+ return vars(self).get('__bool__', lambda: True)()
+
+ def __getattribute__(self, attr):
+ try:
+ ret = super().__getattribute__(attr)
+ except AttributeError:
+ if _is_dunder(attr):
+ raise
+ getter = getattr(self, '__getattr__', None)
+ if not getter:
+ raise
+ ret = getter(attr)
+ return ret.fget() if isinstance(ret, property) else ret
+
+
+def passthrough_module(parent, child, allowed_attributes=(..., ), *, callback=lambda _: None):
+ """Passthrough parent module into a child module, creating the parent if necessary"""
+ def __getattr__(attr):
+ if _is_package(parent):
+ with contextlib.suppress(ModuleNotFoundError):
+ return importlib.import_module(f'.{attr}', parent.__name__)
+
+ ret = from_child(attr)
+ if ret is _NO_ATTRIBUTE:
+ raise AttributeError(f'module {parent.__name__} has no attribute {attr}')
+ callback(attr)
+ return ret
+
+ @functools.lru_cache(maxsize=None)
+ def from_child(attr):
+ nonlocal child
+ if attr not in allowed_attributes:
+ if ... not in allowed_attributes or _is_dunder(attr):
+ return _NO_ATTRIBUTE
+
+ if isinstance(child, str):
+ child = importlib.import_module(child, parent.__name__)
+
+ if _is_package(child):
+ with contextlib.suppress(ImportError):
+ return passthrough_module(f'{parent.__name__}.{attr}',
+ importlib.import_module(f'.{attr}', child.__name__))
+
+ with contextlib.suppress(AttributeError):
+ return getattr(child, attr)
+
+ return _NO_ATTRIBUTE
+
+ parent = sys.modules.get(parent, types.ModuleType(parent))
+ parent.__class__ = EnhancedModule
+ parent.__getattr__ = __getattr__
+ return parent
diff --git a/yt_dlp/compat/functools.py b/yt_dlp/compat/functools.py
new file mode 100644
index 0000000..36c9836
--- /dev/null
+++ b/yt_dlp/compat/functools.py
@@ -0,0 +1,12 @@
+# flake8: noqa: F405
+from functools import * # noqa: F403
+
+from .compat_utils import passthrough_module
+
+passthrough_module(__name__, 'functools')
+del passthrough_module
+
+try:
+ cache # >= 3.9
+except NameError:
+ cache = lru_cache(maxsize=None)
diff --git a/yt_dlp/compat/imghdr.py b/yt_dlp/compat/imghdr.py
new file mode 100644
index 0000000..5d64ab0
--- /dev/null
+++ b/yt_dlp/compat/imghdr.py
@@ -0,0 +1,16 @@
+tests = {
+ 'webp': lambda h: h[0:4] == b'RIFF' and h[8:] == b'WEBP',
+ 'png': lambda h: h[:8] == b'\211PNG\r\n\032\n',
+ 'jpeg': lambda h: h[6:10] in (b'JFIF', b'Exif'),
+ 'gif': lambda h: h[:6] in (b'GIF87a', b'GIF89a'),
+}
+
+
+def what(file=None, h=None):
+ """Detect format of image (Currently supports jpeg, png, webp, gif only)
+ Ref: https://github.com/python/cpython/blob/3.10/Lib/imghdr.py
+ """
+ if h is None:
+ with open(file, 'rb') as f:
+ h = f.read(12)
+ return next((type_ for type_, test in tests.items() if test(h)), None)
diff --git a/yt_dlp/compat/shutil.py b/yt_dlp/compat/shutil.py
new file mode 100644
index 0000000..23239d5
--- /dev/null
+++ b/yt_dlp/compat/shutil.py
@@ -0,0 +1,30 @@
+# flake8: noqa: F405
+from shutil import * # noqa: F403
+
+from .compat_utils import passthrough_module
+
+passthrough_module(__name__, 'shutil')
+del passthrough_module
+
+
+import sys
+
+if sys.platform.startswith('freebsd'):
+ import errno
+ import os
+ import shutil
+
+ # Workaround for PermissionError when using restricted ACL mode on FreeBSD
+ def copy2(src, dst, *args, **kwargs):
+ if os.path.isdir(dst):
+ dst = os.path.join(dst, os.path.basename(src))
+ shutil.copyfile(src, dst, *args, **kwargs)
+ try:
+ shutil.copystat(src, dst, *args, **kwargs)
+ except PermissionError as e:
+ if e.errno != getattr(errno, 'EPERM', None):
+ raise
+ return dst
+
+ def move(*args, copy_function=copy2, **kwargs):
+ return shutil.move(*args, copy_function=copy_function, **kwargs)
diff --git a/yt_dlp/compat/types.py b/yt_dlp/compat/types.py
new file mode 100644
index 0000000..4aa3b0e
--- /dev/null
+++ b/yt_dlp/compat/types.py
@@ -0,0 +1,13 @@
+# flake8: noqa: F405
+from types import * # noqa: F403
+
+from .compat_utils import passthrough_module
+
+passthrough_module(__name__, 'types')
+del passthrough_module
+
+try:
+ # NB: pypy has builtin NoneType, so checking NameError won't work
+ from types import NoneType # >= 3.10
+except ImportError:
+ NoneType = type(None)
diff --git a/yt_dlp/compat/urllib/__init__.py b/yt_dlp/compat/urllib/__init__.py
new file mode 100644
index 0000000..9084b3c
--- /dev/null
+++ b/yt_dlp/compat/urllib/__init__.py
@@ -0,0 +1,10 @@
+# flake8: noqa: F405
+from urllib import * # noqa: F403
+
+del request # noqa: F821
+from . import request # noqa: F401
+
+from ..compat_utils import passthrough_module
+
+passthrough_module(__name__, 'urllib')
+del passthrough_module
diff --git a/yt_dlp/compat/urllib/request.py b/yt_dlp/compat/urllib/request.py
new file mode 100644
index 0000000..ad9fa83
--- /dev/null
+++ b/yt_dlp/compat/urllib/request.py
@@ -0,0 +1,40 @@
+# flake8: noqa: F405
+from urllib.request import * # noqa: F403
+
+from ..compat_utils import passthrough_module
+
+passthrough_module(__name__, 'urllib.request')
+del passthrough_module
+
+
+from .. import compat_os_name
+
+if compat_os_name == 'nt':
+ # On older Python versions, proxies are extracted from Windows registry erroneously. [1]
+ # If the https proxy in the registry does not have a scheme, urllib will incorrectly add https:// to it. [2]
+ # It is unlikely that the user has actually set it to be https, so we should be fine to safely downgrade
+ # it to http on these older Python versions to avoid issues
+ # This also applies for ftp proxy type, as ftp:// proxy scheme is not supported.
+ # 1: https://github.com/python/cpython/issues/86793
+ # 2: https://github.com/python/cpython/blob/51f1ae5ceb0673316c4e4b0175384e892e33cc6e/Lib/urllib/request.py#L2683-L2698
+ import sys
+ from urllib.request import getproxies_environment, getproxies_registry
+
+ def getproxies_registry_patched():
+ proxies = getproxies_registry()
+ if (
+ sys.version_info >= (3, 10, 5) # https://docs.python.org/3.10/whatsnew/changelog.html#python-3-10-5-final
+ or (3, 9, 13) <= sys.version_info < (3, 10) # https://docs.python.org/3.9/whatsnew/changelog.html#python-3-9-13-final
+ ):
+ return proxies
+
+ for scheme in ('https', 'ftp'):
+ if scheme in proxies and proxies[scheme].startswith(f'{scheme}://'):
+ proxies[scheme] = 'http' + proxies[scheme][len(scheme):]
+
+ return proxies
+
+ def getproxies():
+ return getproxies_environment() or getproxies_registry_patched()
+
+del compat_os_name
diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py
new file mode 100644
index 0000000..28d174a
--- /dev/null
+++ b/yt_dlp/cookies.py
@@ -0,0 +1,1346 @@
+import base64
+import collections
+import contextlib
+import glob
+import http.cookiejar
+import http.cookies
+import io
+import json
+import os
+import re
+import shutil
+import struct
+import subprocess
+import sys
+import tempfile
+import time
+import urllib.request
+from datetime import datetime, timedelta, timezone
+from enum import Enum, auto
+from hashlib import pbkdf2_hmac
+
+from .aes import (
+ aes_cbc_decrypt_bytes,
+ aes_gcm_decrypt_and_verify_bytes,
+ unpad_pkcs7,
+)
+from .compat import functools # isort: split
+from .compat import compat_os_name
+from .dependencies import (
+ _SECRETSTORAGE_UNAVAILABLE_REASON,
+ secretstorage,
+ sqlite3,
+)
+from .minicurses import MultilinePrinter, QuietMultilinePrinter
+from .utils import (
+ DownloadError,
+ Popen,
+ error_to_str,
+ expand_path,
+ is_path_like,
+ sanitize_url,
+ str_or_none,
+ try_call,
+ write_string,
+)
+from .utils._utils import _YDLLogger
+from .utils.networking import normalize_url
+
+CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
+SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
+
+
+class YDLLogger(_YDLLogger):
+ def warning(self, message, only_once=False): # compat
+ return super().warning(message, once=only_once)
+
+ class ProgressBar(MultilinePrinter):
+ _DELAY, _timer = 0.1, 0
+
+ def print(self, message):
+ if time.time() - self._timer > self._DELAY:
+ self.print_at_line(f'[Cookies] {message}', 0)
+ self._timer = time.time()
+
+ def progress_bar(self):
+ """Return a context manager with a print method. (Optional)"""
+ # Do not print to files/pipes, loggers, or when --no-progress is used
+ if not self._ydl or self._ydl.params.get('noprogress') or self._ydl.params.get('logger'):
+ return
+ file = self._ydl._out_files.error
+ try:
+ if not file.isatty():
+ return
+ except BaseException:
+ return
+ return self.ProgressBar(file, preserve_output=False)
+
+
+def _create_progress_bar(logger):
+ if hasattr(logger, 'progress_bar'):
+ printer = logger.progress_bar()
+ if printer:
+ return printer
+ printer = QuietMultilinePrinter()
+ printer.print = lambda _: None
+ return printer
+
+
+def load_cookies(cookie_file, browser_specification, ydl):
+ cookie_jars = []
+ if browser_specification is not None:
+ browser_name, profile, keyring, container = _parse_browser_specification(*browser_specification)
+ cookie_jars.append(
+ extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring, container=container))
+
+ if cookie_file is not None:
+ is_filename = is_path_like(cookie_file)
+ if is_filename:
+ cookie_file = expand_path(cookie_file)
+
+ jar = YoutubeDLCookieJar(cookie_file)
+ if not is_filename or os.access(cookie_file, os.R_OK):
+ jar.load()
+ cookie_jars.append(jar)
+
+ return _merge_cookie_jars(cookie_jars)
+
+
+def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None, container=None):
+ if browser_name == 'firefox':
+ return _extract_firefox_cookies(profile, container, logger)
+ elif browser_name == 'safari':
+ return _extract_safari_cookies(profile, logger)
+ elif browser_name in CHROMIUM_BASED_BROWSERS:
+ return _extract_chrome_cookies(browser_name, profile, keyring, logger)
+ else:
+ raise ValueError(f'unknown browser: {browser_name}')
+
+
+def _extract_firefox_cookies(profile, container, logger):
+ logger.info('Extracting cookies from firefox')
+ if not sqlite3:
+ logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
+ 'Please use a Python interpreter compiled with sqlite3 support')
+ return YoutubeDLCookieJar()
+
+ if profile is None:
+ search_roots = list(_firefox_browser_dirs())
+ elif _is_path(profile):
+ search_roots = [profile]
+ else:
+ search_roots = [os.path.join(path, profile) for path in _firefox_browser_dirs()]
+ search_root = ', '.join(map(repr, search_roots))
+
+ cookie_database_path = _newest(_firefox_cookie_dbs(search_roots))
+ if cookie_database_path is None:
+ raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
+ logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
+
+ container_id = None
+ if container not in (None, 'none'):
+ containers_path = os.path.join(os.path.dirname(cookie_database_path), 'containers.json')
+ if not os.path.isfile(containers_path) or not os.access(containers_path, os.R_OK):
+ raise FileNotFoundError(f'could not read containers.json in {search_root}')
+ with open(containers_path, encoding='utf8') as containers:
+ identities = json.load(containers).get('identities', [])
+ container_id = next((context.get('userContextId') for context in identities if container in (
+ context.get('name'),
+ try_call(lambda: re.fullmatch(r'userContext([^\.]+)\.label', context['l10nID']).group())
+ )), None)
+ if not isinstance(container_id, int):
+ raise ValueError(f'could not find firefox container "{container}" in containers.json')
+
+ with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
+ cursor = None
+ try:
+ cursor = _open_database_copy(cookie_database_path, tmpdir)
+ if isinstance(container_id, int):
+ logger.debug(
+ f'Only loading cookies from firefox container "{container}", ID {container_id}')
+ cursor.execute(
+ 'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE originAttributes LIKE ? OR originAttributes LIKE ?',
+ (f'%userContextId={container_id}', f'%userContextId={container_id}&%'))
+ elif container == 'none':
+ logger.debug('Only loading cookies not belonging to any container')
+ cursor.execute(
+ 'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE NOT INSTR(originAttributes,"userContextId=")')
+ else:
+ cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
+ jar = YoutubeDLCookieJar()
+ with _create_progress_bar(logger) as progress_bar:
+ table = cursor.fetchall()
+ total_cookie_count = len(table)
+ for i, (host, name, value, path, expiry, is_secure) in enumerate(table):
+ progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
+ cookie = http.cookiejar.Cookie(
+ version=0, name=name, value=value, port=None, port_specified=False,
+ domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'),
+ path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False,
+ comment=None, comment_url=None, rest={})
+ jar.set_cookie(cookie)
+ logger.info(f'Extracted {len(jar)} cookies from firefox')
+ return jar
+ finally:
+ if cursor is not None:
+ cursor.connection.close()
+
+
+def _firefox_browser_dirs():
+ if sys.platform in ('cygwin', 'win32'):
+ yield os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
+
+ elif sys.platform == 'darwin':
+ yield os.path.expanduser('~/Library/Application Support/Firefox/Profiles')
+
+ else:
+ yield from map(os.path.expanduser, ('~/.mozilla/firefox', '~/snap/firefox/common/.mozilla/firefox'))
+
+
+def _firefox_cookie_dbs(roots):
+ for root in map(os.path.abspath, roots):
+ for pattern in ('', '*/', 'Profiles/*/'):
+ yield from glob.iglob(os.path.join(root, pattern, 'cookies.sqlite'))
+
+
+def _get_chromium_based_browser_settings(browser_name):
+ # https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md
+ if sys.platform in ('cygwin', 'win32'):
+ appdata_local = os.path.expandvars('%LOCALAPPDATA%')
+ appdata_roaming = os.path.expandvars('%APPDATA%')
+ browser_dir = {
+ 'brave': os.path.join(appdata_local, R'BraveSoftware\Brave-Browser\User Data'),
+ 'chrome': os.path.join(appdata_local, R'Google\Chrome\User Data'),
+ 'chromium': os.path.join(appdata_local, R'Chromium\User Data'),
+ 'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
+ 'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
+ 'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
+ }[browser_name]
+
+ elif sys.platform == 'darwin':
+ appdata = os.path.expanduser('~/Library/Application Support')
+ browser_dir = {
+ 'brave': os.path.join(appdata, 'BraveSoftware/Brave-Browser'),
+ 'chrome': os.path.join(appdata, 'Google/Chrome'),
+ 'chromium': os.path.join(appdata, 'Chromium'),
+ 'edge': os.path.join(appdata, 'Microsoft Edge'),
+ 'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
+ 'vivaldi': os.path.join(appdata, 'Vivaldi'),
+ }[browser_name]
+
+ else:
+ config = _config_home()
+ browser_dir = {
+ 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'),
+ 'chrome': os.path.join(config, 'google-chrome'),
+ 'chromium': os.path.join(config, 'chromium'),
+ 'edge': os.path.join(config, 'microsoft-edge'),
+ 'opera': os.path.join(config, 'opera'),
+ 'vivaldi': os.path.join(config, 'vivaldi'),
+ }[browser_name]
+
+ # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
+ # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
+ keyring_name = {
+ 'brave': 'Brave',
+ 'chrome': 'Chrome',
+ 'chromium': 'Chromium',
+ 'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
+ 'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
+ 'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
+ }[browser_name]
+
+ browsers_without_profiles = {'opera'}
+
+ return {
+ 'browser_dir': browser_dir,
+ 'keyring_name': keyring_name,
+ 'supports_profiles': browser_name not in browsers_without_profiles
+ }
+
+
+def _extract_chrome_cookies(browser_name, profile, keyring, logger):
+ logger.info(f'Extracting cookies from {browser_name}')
+
+ if not sqlite3:
+ logger.warning(f'Cannot extract cookies from {browser_name} without sqlite3 support. '
+ 'Please use a Python interpreter compiled with sqlite3 support')
+ return YoutubeDLCookieJar()
+
+ config = _get_chromium_based_browser_settings(browser_name)
+
+ if profile is None:
+ search_root = config['browser_dir']
+ elif _is_path(profile):
+ search_root = profile
+ config['browser_dir'] = os.path.dirname(profile) if config['supports_profiles'] else profile
+ else:
+ if config['supports_profiles']:
+ search_root = os.path.join(config['browser_dir'], profile)
+ else:
+ logger.error(f'{browser_name} does not support profiles')
+ search_root = config['browser_dir']
+
+ cookie_database_path = _newest(_find_files(search_root, 'Cookies', logger))
+ if cookie_database_path is None:
+ raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
+ logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
+
+ decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring)
+
+ with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
+ cursor = None
+ try:
+ cursor = _open_database_copy(cookie_database_path, tmpdir)
+ cursor.connection.text_factory = bytes
+ column_names = _get_column_names(cursor, 'cookies')
+ secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
+ cursor.execute(f'SELECT host_key, name, value, encrypted_value, path, expires_utc, {secure_column} FROM cookies')
+ jar = YoutubeDLCookieJar()
+ failed_cookies = 0
+ unencrypted_cookies = 0
+ with _create_progress_bar(logger) as progress_bar:
+ table = cursor.fetchall()
+ total_cookie_count = len(table)
+ for i, line in enumerate(table):
+ progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
+ is_encrypted, cookie = _process_chrome_cookie(decryptor, *line)
+ if not cookie:
+ failed_cookies += 1
+ continue
+ elif not is_encrypted:
+ unencrypted_cookies += 1
+ jar.set_cookie(cookie)
+ if failed_cookies > 0:
+ failed_message = f' ({failed_cookies} could not be decrypted)'
+ else:
+ failed_message = ''
+ logger.info(f'Extracted {len(jar)} cookies from {browser_name}{failed_message}')
+ counts = decryptor._cookie_counts.copy()
+ counts['unencrypted'] = unencrypted_cookies
+ logger.debug(f'cookie version breakdown: {counts}')
+ return jar
+ except PermissionError as error:
+ if compat_os_name == 'nt' and error.errno == 13:
+ message = 'Could not copy Chrome cookie database. See https://github.com/yt-dlp/yt-dlp/issues/7271 for more info'
+ logger.error(message)
+ raise DownloadError(message) # force exit
+ raise
+ finally:
+ if cursor is not None:
+ cursor.connection.close()
+
+
+def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, path, expires_utc, is_secure):
+ host_key = host_key.decode()
+ name = name.decode()
+ value = value.decode()
+ path = path.decode()
+ is_encrypted = not value and encrypted_value
+
+ if is_encrypted:
+ value = decryptor.decrypt(encrypted_value)
+ if value is None:
+ return is_encrypted, None
+
+ return is_encrypted, http.cookiejar.Cookie(
+ version=0, name=name, value=value, port=None, port_specified=False,
+ domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
+ path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False,
+ comment=None, comment_url=None, rest={})
+
+
+class ChromeCookieDecryptor:
+ """
+ Overview:
+
+ Linux:
+ - cookies are either v10 or v11
+ - v10: AES-CBC encrypted with a fixed key
+ - also attempts empty password if decryption fails
+ - v11: AES-CBC encrypted with an OS protected key (keyring)
+ - also attempts empty password if decryption fails
+ - v11 keys can be stored in various places depending on the activate desktop environment [2]
+
+ Mac:
+ - cookies are either v10 or not v10
+ - v10: AES-CBC encrypted with an OS protected key (keyring) and more key derivation iterations than linux
+ - not v10: 'old data' stored as plaintext
+
+ Windows:
+ - cookies are either v10 or not v10
+ - v10: AES-GCM encrypted with a key which is encrypted with DPAPI
+ - not v10: encrypted with DPAPI
+
+ Sources:
+ - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/
+ - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_linux.cc
+ - KeyStorageLinux::CreateService
+ """
+
+ _cookie_counts = {}
+
+ def decrypt(self, encrypted_value):
+ raise NotImplementedError('Must be implemented by sub classes')
+
+
+def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None):
+ if sys.platform == 'darwin':
+ return MacChromeCookieDecryptor(browser_keyring_name, logger)
+ elif sys.platform in ('win32', 'cygwin'):
+ return WindowsChromeCookieDecryptor(browser_root, logger)
+ return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
+
+
+class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
+ def __init__(self, browser_keyring_name, logger, *, keyring=None):
+ self._logger = logger
+ self._v10_key = self.derive_key(b'peanuts')
+ self._empty_key = self.derive_key(b'')
+ self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
+ self._browser_keyring_name = browser_keyring_name
+ self._keyring = keyring
+
+ @functools.cached_property
+ def _v11_key(self):
+ password = _get_linux_keyring_password(self._browser_keyring_name, self._keyring, self._logger)
+ return None if password is None else self.derive_key(password)
+
+ @staticmethod
+ def derive_key(password):
+ # values from
+ # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_linux.cc
+ return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)
+
+ def decrypt(self, encrypted_value):
+ """
+
+ following the same approach as the fix in [1]: if cookies fail to decrypt then attempt to decrypt
+ with an empty password. The failure detection is not the same as what chromium uses so the
+ results won't be perfect
+
+ References:
+ - [1] https://chromium.googlesource.com/chromium/src/+/bbd54702284caca1f92d656fdcadf2ccca6f4165%5E%21/
+ - a bugfix to try an empty password as a fallback
+ """
+ version = encrypted_value[:3]
+ ciphertext = encrypted_value[3:]
+
+ if version == b'v10':
+ self._cookie_counts['v10'] += 1
+ return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key, self._empty_key), self._logger)
+
+ elif version == b'v11':
+ self._cookie_counts['v11'] += 1
+ if self._v11_key is None:
+ self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
+ return None
+ return _decrypt_aes_cbc_multi(ciphertext, (self._v11_key, self._empty_key), self._logger)
+
+ else:
+ self._logger.warning(f'unknown cookie version: "{version}"', only_once=True)
+ self._cookie_counts['other'] += 1
+ return None
+
+
+class MacChromeCookieDecryptor(ChromeCookieDecryptor):
+ def __init__(self, browser_keyring_name, logger):
+ self._logger = logger
+ password = _get_mac_keyring_password(browser_keyring_name, logger)
+ self._v10_key = None if password is None else self.derive_key(password)
+ self._cookie_counts = {'v10': 0, 'other': 0}
+
+ @staticmethod
+ def derive_key(password):
+ # values from
+ # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_mac.mm
+ return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)
+
+ def decrypt(self, encrypted_value):
+ version = encrypted_value[:3]
+ ciphertext = encrypted_value[3:]
+
+ if version == b'v10':
+ self._cookie_counts['v10'] += 1
+ if self._v10_key is None:
+ self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
+ return None
+
+ return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key,), self._logger)
+
+ else:
+ self._cookie_counts['other'] += 1
+ # other prefixes are considered 'old data' which were stored as plaintext
+ # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_mac.mm
+ return encrypted_value
+
+
+class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
+ def __init__(self, browser_root, logger):
+ self._logger = logger
+ self._v10_key = _get_windows_v10_key(browser_root, logger)
+ self._cookie_counts = {'v10': 0, 'other': 0}
+
+ def decrypt(self, encrypted_value):
+ version = encrypted_value[:3]
+ ciphertext = encrypted_value[3:]
+
+ if version == b'v10':
+ self._cookie_counts['v10'] += 1
+ if self._v10_key is None:
+ self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
+ return None
+
+ # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
+ # kNonceLength
+ nonce_length = 96 // 8
+ # boringssl
+ # EVP_AEAD_AES_GCM_TAG_LEN
+ authentication_tag_length = 16
+
+ raw_ciphertext = ciphertext
+ nonce = raw_ciphertext[:nonce_length]
+ ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
+ authentication_tag = raw_ciphertext[-authentication_tag_length:]
+
+ return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger)
+
+ else:
+ self._cookie_counts['other'] += 1
+ # any other prefix means the data is DPAPI encrypted
+ # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
+ return _decrypt_windows_dpapi(encrypted_value, self._logger).decode()
+
+
+def _extract_safari_cookies(profile, logger):
+ if sys.platform != 'darwin':
+ raise ValueError(f'unsupported platform: {sys.platform}')
+
+ if profile:
+ cookies_path = os.path.expanduser(profile)
+ if not os.path.isfile(cookies_path):
+ raise FileNotFoundError('custom safari cookies database not found')
+
+ else:
+ cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
+
+ if not os.path.isfile(cookies_path):
+ logger.debug('Trying secondary cookie location')
+ cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies')
+ if not os.path.isfile(cookies_path):
+ raise FileNotFoundError('could not find safari cookies database')
+
+ with open(cookies_path, 'rb') as f:
+ cookies_data = f.read()
+
+ jar = parse_safari_cookies(cookies_data, logger=logger)
+ logger.info(f'Extracted {len(jar)} cookies from safari')
+ return jar
+
+
+class ParserError(Exception):
+ pass
+
+
+class DataParser:
+ def __init__(self, data, logger):
+ self._data = data
+ self.cursor = 0
+ self._logger = logger
+
+ def read_bytes(self, num_bytes):
+ if num_bytes < 0:
+ raise ParserError(f'invalid read of {num_bytes} bytes')
+ end = self.cursor + num_bytes
+ if end > len(self._data):
+ raise ParserError('reached end of input')
+ data = self._data[self.cursor:end]
+ self.cursor = end
+ return data
+
+ def expect_bytes(self, expected_value, message):
+ value = self.read_bytes(len(expected_value))
+ if value != expected_value:
+ raise ParserError(f'unexpected value: {value} != {expected_value} ({message})')
+
+ def read_uint(self, big_endian=False):
+ data_format = '>I' if big_endian else '<I'
+ return struct.unpack(data_format, self.read_bytes(4))[0]
+
+ def read_double(self, big_endian=False):
+ data_format = '>d' if big_endian else '<d'
+ return struct.unpack(data_format, self.read_bytes(8))[0]
+
+ def read_cstring(self):
+ buffer = []
+ while True:
+ c = self.read_bytes(1)
+ if c == b'\x00':
+ return b''.join(buffer).decode()
+ else:
+ buffer.append(c)
+
+ def skip(self, num_bytes, description='unknown'):
+ if num_bytes > 0:
+ self._logger.debug(f'skipping {num_bytes} bytes ({description}): {self.read_bytes(num_bytes)!r}')
+ elif num_bytes < 0:
+ raise ParserError(f'invalid skip of {num_bytes} bytes')
+
+ def skip_to(self, offset, description='unknown'):
+ self.skip(offset - self.cursor, description)
+
+ def skip_to_end(self, description='unknown'):
+ self.skip_to(len(self._data), description)
+
+
+def _mac_absolute_time_to_posix(timestamp):
+ return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) + timedelta(seconds=timestamp)).timestamp())
+
+
+def _parse_safari_cookies_header(data, logger):
+ p = DataParser(data, logger)
+ p.expect_bytes(b'cook', 'database signature')
+ number_of_pages = p.read_uint(big_endian=True)
+ page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)]
+ return page_sizes, p.cursor
+
+
+def _parse_safari_cookies_page(data, jar, logger):
+ p = DataParser(data, logger)
+ p.expect_bytes(b'\x00\x00\x01\x00', 'page signature')
+ number_of_cookies = p.read_uint()
+ record_offsets = [p.read_uint() for _ in range(number_of_cookies)]
+ if number_of_cookies == 0:
+ logger.debug(f'a cookies page of size {len(data)} has no cookies')
+ return
+
+ p.skip_to(record_offsets[0], 'unknown page header field')
+
+ with _create_progress_bar(logger) as progress_bar:
+ for i, record_offset in enumerate(record_offsets):
+ progress_bar.print(f'Loading cookie {i: 6d}/{number_of_cookies: 6d}')
+ p.skip_to(record_offset, 'space between records')
+ record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger)
+ p.read_bytes(record_length)
+ p.skip_to_end('space in between pages')
+
+
+def _parse_safari_cookies_record(data, jar, logger):
+ p = DataParser(data, logger)
+ record_size = p.read_uint()
+ p.skip(4, 'unknown record field 1')
+ flags = p.read_uint()
+ is_secure = bool(flags & 0x0001)
+ p.skip(4, 'unknown record field 2')
+ domain_offset = p.read_uint()
+ name_offset = p.read_uint()
+ path_offset = p.read_uint()
+ value_offset = p.read_uint()
+ p.skip(8, 'unknown record field 3')
+ expiration_date = _mac_absolute_time_to_posix(p.read_double())
+ _creation_date = _mac_absolute_time_to_posix(p.read_double()) # noqa: F841
+
+ try:
+ p.skip_to(domain_offset)
+ domain = p.read_cstring()
+
+ p.skip_to(name_offset)
+ name = p.read_cstring()
+
+ p.skip_to(path_offset)
+ path = p.read_cstring()
+
+ p.skip_to(value_offset)
+ value = p.read_cstring()
+ except UnicodeDecodeError:
+ logger.warning('failed to parse Safari cookie because UTF-8 decoding failed', only_once=True)
+ return record_size
+
+ p.skip_to(record_size, 'space at the end of the record')
+
+ cookie = http.cookiejar.Cookie(
+ version=0, name=name, value=value, port=None, port_specified=False,
+ domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'),
+ path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False,
+ comment=None, comment_url=None, rest={})
+ jar.set_cookie(cookie)
+ return record_size
+
+
+def parse_safari_cookies(data, jar=None, logger=YDLLogger()):
+ """
+ References:
+ - https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc
+ - this data appears to be out of date but the important parts of the database structure is the same
+ - there are a few bytes here and there which are skipped during parsing
+ """
+ if jar is None:
+ jar = YoutubeDLCookieJar()
+ page_sizes, body_start = _parse_safari_cookies_header(data, logger)
+ p = DataParser(data[body_start:], logger)
+ for page_size in page_sizes:
+ _parse_safari_cookies_page(p.read_bytes(page_size), jar, logger)
+ p.skip_to_end('footer')
+ return jar
+
+
+class _LinuxDesktopEnvironment(Enum):
+ """
+ https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.h
+ DesktopEnvironment
+ """
+ OTHER = auto()
+ CINNAMON = auto()
+ DEEPIN = auto()
+ GNOME = auto()
+ KDE3 = auto()
+ KDE4 = auto()
+ KDE5 = auto()
+ KDE6 = auto()
+ PANTHEON = auto()
+ UKUI = auto()
+ UNITY = auto()
+ XFCE = auto()
+ LXQT = auto()
+
+
+class _LinuxKeyring(Enum):
+ """
+ https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_util_linux.h
+ SelectedLinuxBackend
+ """
+ KWALLET = auto() # KDE4
+ KWALLET5 = auto()
+ KWALLET6 = auto()
+ GNOMEKEYRING = auto()
+ BASICTEXT = auto()
+
+
+SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys()
+
+
+def _get_linux_desktop_environment(env, logger):
+ """
+ https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc
+ GetDesktopEnvironment
+ """
+ xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
+ desktop_session = env.get('DESKTOP_SESSION', None)
+ if xdg_current_desktop is not None:
+ xdg_current_desktop = xdg_current_desktop.split(':')[0].strip()
+
+ if xdg_current_desktop == 'Unity':
+ if desktop_session is not None and 'gnome-fallback' in desktop_session:
+ return _LinuxDesktopEnvironment.GNOME
+ else:
+ return _LinuxDesktopEnvironment.UNITY
+ elif xdg_current_desktop == 'Deepin':
+ return _LinuxDesktopEnvironment.DEEPIN
+ elif xdg_current_desktop == 'GNOME':
+ return _LinuxDesktopEnvironment.GNOME
+ elif xdg_current_desktop == 'X-Cinnamon':
+ return _LinuxDesktopEnvironment.CINNAMON
+ elif xdg_current_desktop == 'KDE':
+ kde_version = env.get('KDE_SESSION_VERSION', None)
+ if kde_version == '5':
+ return _LinuxDesktopEnvironment.KDE5
+ elif kde_version == '6':
+ return _LinuxDesktopEnvironment.KDE6
+ elif kde_version == '4':
+ return _LinuxDesktopEnvironment.KDE4
+ else:
+ logger.info(f'unknown KDE version: "{kde_version}". Assuming KDE4')
+ return _LinuxDesktopEnvironment.KDE4
+ elif xdg_current_desktop == 'Pantheon':
+ return _LinuxDesktopEnvironment.PANTHEON
+ elif xdg_current_desktop == 'XFCE':
+ return _LinuxDesktopEnvironment.XFCE
+ elif xdg_current_desktop == 'UKUI':
+ return _LinuxDesktopEnvironment.UKUI
+ elif xdg_current_desktop == 'LXQt':
+ return _LinuxDesktopEnvironment.LXQT
+ else:
+ logger.info(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"')
+
+ elif desktop_session is not None:
+ if desktop_session == 'deepin':
+ return _LinuxDesktopEnvironment.DEEPIN
+ elif desktop_session in ('mate', 'gnome'):
+ return _LinuxDesktopEnvironment.GNOME
+ elif desktop_session in ('kde4', 'kde-plasma'):
+ return _LinuxDesktopEnvironment.KDE4
+ elif desktop_session == 'kde':
+ if 'KDE_SESSION_VERSION' in env:
+ return _LinuxDesktopEnvironment.KDE4
+ else:
+ return _LinuxDesktopEnvironment.KDE3
+ elif 'xfce' in desktop_session or desktop_session == 'xubuntu':
+ return _LinuxDesktopEnvironment.XFCE
+ elif desktop_session == 'ukui':
+ return _LinuxDesktopEnvironment.UKUI
+ else:
+ logger.info(f'DESKTOP_SESSION is set to an unknown value: "{desktop_session}"')
+
+ else:
+ if 'GNOME_DESKTOP_SESSION_ID' in env:
+ return _LinuxDesktopEnvironment.GNOME
+ elif 'KDE_FULL_SESSION' in env:
+ if 'KDE_SESSION_VERSION' in env:
+ return _LinuxDesktopEnvironment.KDE4
+ else:
+ return _LinuxDesktopEnvironment.KDE3
+ return _LinuxDesktopEnvironment.OTHER
+
+
+def _choose_linux_keyring(logger):
+ """
+ SelectBackend in [1]
+
+ There is currently support for forcing chromium to use BASIC_TEXT by creating a file called
+ `Disable Local Encryption` [1] in the user data dir. The function to write this file (`WriteBackendUse()` [1])
+ does not appear to be called anywhere other than in tests, so the user would have to create this file manually
+ and so would be aware enough to tell yt-dlp to use the BASIC_TEXT keyring.
+
+ References:
+ - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_util_linux.cc
+ """
+ desktop_environment = _get_linux_desktop_environment(os.environ, logger)
+ logger.debug(f'detected desktop environment: {desktop_environment.name}')
+ if desktop_environment == _LinuxDesktopEnvironment.KDE4:
+ linux_keyring = _LinuxKeyring.KWALLET
+ elif desktop_environment == _LinuxDesktopEnvironment.KDE5:
+ linux_keyring = _LinuxKeyring.KWALLET5
+ elif desktop_environment == _LinuxDesktopEnvironment.KDE6:
+ linux_keyring = _LinuxKeyring.KWALLET6
+ elif desktop_environment in (
+ _LinuxDesktopEnvironment.KDE3, _LinuxDesktopEnvironment.LXQT, _LinuxDesktopEnvironment.OTHER
+ ):
+ linux_keyring = _LinuxKeyring.BASICTEXT
+ else:
+ linux_keyring = _LinuxKeyring.GNOMEKEYRING
+ return linux_keyring
+
+
+def _get_kwallet_network_wallet(keyring, logger):
+ """ The name of the wallet used to store network passwords.
+
+ https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/kwallet_dbus.cc
+ KWalletDBus::NetworkWallet
+ which does a dbus call to the following function:
+ https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html
+ Wallet::NetworkWallet
+ """
+ default_wallet = 'kdewallet'
+ try:
+ if keyring == _LinuxKeyring.KWALLET:
+ service_name = 'org.kde.kwalletd'
+ wallet_path = '/modules/kwalletd'
+ elif keyring == _LinuxKeyring.KWALLET5:
+ service_name = 'org.kde.kwalletd5'
+ wallet_path = '/modules/kwalletd5'
+ elif keyring == _LinuxKeyring.KWALLET6:
+ service_name = 'org.kde.kwalletd6'
+ wallet_path = '/modules/kwalletd6'
+ else:
+ raise ValueError(keyring)
+
+ stdout, _, returncode = Popen.run([
+ 'dbus-send', '--session', '--print-reply=literal',
+ f'--dest={service_name}',
+ wallet_path,
+ 'org.kde.KWallet.networkWallet'
+ ], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
+
+ if returncode:
+ logger.warning('failed to read NetworkWallet')
+ return default_wallet
+ else:
+ logger.debug(f'NetworkWallet = "{stdout.strip()}"')
+ return stdout.strip()
+ except Exception as e:
+ logger.warning(f'exception while obtaining NetworkWallet: {e}')
+ return default_wallet
+
+
+def _get_kwallet_password(browser_keyring_name, keyring, logger):
+ logger.debug(f'using kwallet-query to obtain password from {keyring.name}')
+
+ if shutil.which('kwallet-query') is None:
+ logger.error('kwallet-query command not found. KWallet and kwallet-query '
+ 'must be installed to read from KWallet. kwallet-query should be'
+ 'included in the kwallet package for your distribution')
+ return b''
+
+ network_wallet = _get_kwallet_network_wallet(keyring, logger)
+
+ try:
+ stdout, _, returncode = Popen.run([
+ 'kwallet-query',
+ '--read-password', f'{browser_keyring_name} Safe Storage',
+ '--folder', f'{browser_keyring_name} Keys',
+ network_wallet
+ ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
+
+ if returncode:
+ logger.error(f'kwallet-query failed with return code {returncode}. '
+ 'Please consult the kwallet-query man page for details')
+ return b''
+ else:
+ if stdout.lower().startswith(b'failed to read'):
+ logger.debug('failed to read password from kwallet. Using empty string instead')
+ # this sometimes occurs in KDE because chrome does not check hasEntry and instead
+ # just tries to read the value (which kwallet returns "") whereas kwallet-query
+ # checks hasEntry. To verify this:
+ # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
+ # while starting chrome.
+ # this was identified as a bug later and fixed in
+ # https://chromium.googlesource.com/chromium/src/+/bbd54702284caca1f92d656fdcadf2ccca6f4165%5E%21/#F0
+ # https://chromium.googlesource.com/chromium/src/+/5463af3c39d7f5b6d11db7fbd51e38cc1974d764
+ return b''
+ else:
+ logger.debug('password found')
+ return stdout.rstrip(b'\n')
+ except Exception as e:
+ logger.warning(f'exception running kwallet-query: {error_to_str(e)}')
+ return b''
+
+
+def _get_gnome_keyring_password(browser_keyring_name, logger):
+ if not secretstorage:
+ logger.error(f'secretstorage not available {_SECRETSTORAGE_UNAVAILABLE_REASON}')
+ return b''
+ # the Gnome keyring does not seem to organise keys in the same way as KWallet,
+ # using `dbus-monitor` during startup, it can be observed that chromium lists all keys
+ # and presumably searches for its key in the list. It appears that we must do the same.
+ # https://github.com/jaraco/keyring/issues/556
+ with contextlib.closing(secretstorage.dbus_init()) as con:
+ col = secretstorage.get_default_collection(con)
+ for item in col.get_all_items():
+ if item.get_label() == f'{browser_keyring_name} Safe Storage':
+ return item.get_secret()
+ else:
+ logger.error('failed to read from keyring')
+ return b''
+
+
+def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
+ # note: chrome/chromium can be run with the following flags to determine which keyring backend
+ # it has chosen to use
+ # chromium --enable-logging=stderr --v=1 2>&1 | grep key_storage_
+ # Chromium supports a flag: --password-store=<basic|gnome|kwallet> so the automatic detection
+ # will not be sufficient in all cases.
+
+ keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger)
+ logger.debug(f'Chosen keyring: {keyring.name}')
+
+ if keyring in (_LinuxKeyring.KWALLET, _LinuxKeyring.KWALLET5, _LinuxKeyring.KWALLET6):
+ return _get_kwallet_password(browser_keyring_name, keyring, logger)
+ elif keyring == _LinuxKeyring.GNOMEKEYRING:
+ return _get_gnome_keyring_password(browser_keyring_name, logger)
+ elif keyring == _LinuxKeyring.BASICTEXT:
+ # when basic text is chosen, all cookies are stored as v10 (so no keyring password is required)
+ return None
+ assert False, f'Unknown keyring {keyring}'
+
+
+def _get_mac_keyring_password(browser_keyring_name, logger):
+ logger.debug('using find-generic-password to obtain password from OSX keychain')
+ try:
+ stdout, _, returncode = Popen.run(
+ ['security', 'find-generic-password',
+ '-w', # write password to stdout
+ '-a', browser_keyring_name, # match 'account'
+ '-s', f'{browser_keyring_name} Safe Storage'], # match 'service'
+ stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
+ if returncode:
+ logger.warning('find-generic-password failed')
+ return None
+ return stdout.rstrip(b'\n')
+ except Exception as e:
+ logger.warning(f'exception running find-generic-password: {error_to_str(e)}')
+ return None
+
+
+def _get_windows_v10_key(browser_root, logger):
+ """
+ References:
+ - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
+ """
+ path = _newest(_find_files(browser_root, 'Local State', logger))
+ if path is None:
+ logger.error('could not find local state file')
+ return None
+ logger.debug(f'Found local state file at "{path}"')
+ with open(path, encoding='utf8') as f:
+ data = json.load(f)
+ try:
+ # kOsCryptEncryptedKeyPrefName in [1]
+ base64_key = data['os_crypt']['encrypted_key']
+ except KeyError:
+ logger.error('no encrypted key in Local State')
+ return None
+ encrypted_key = base64.b64decode(base64_key)
+ # kDPAPIKeyPrefix in [1]
+ prefix = b'DPAPI'
+ if not encrypted_key.startswith(prefix):
+ logger.error('invalid key')
+ return None
+ return _decrypt_windows_dpapi(encrypted_key[len(prefix):], logger)
+
+
+def pbkdf2_sha1(password, salt, iterations, key_length):
+ return pbkdf2_hmac('sha1', password, salt, iterations, key_length)
+
+
+def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' ' * 16):
+ for key in keys:
+ plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
+ try:
+ return plaintext.decode()
+ except UnicodeDecodeError:
+ pass
+ logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
+ return None
+
+
+def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
+ try:
+ plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce)
+ except ValueError:
+ logger.warning('failed to decrypt cookie (AES-GCM) because the MAC check failed. Possibly the key is wrong?', only_once=True)
+ return None
+
+ try:
+ return plaintext.decode()
+ except UnicodeDecodeError:
+ logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
+ return None
+
+
+def _decrypt_windows_dpapi(ciphertext, logger):
+ """
+ References:
+ - https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata
+ """
+
+ import ctypes
+ import ctypes.wintypes
+
+ class DATA_BLOB(ctypes.Structure):
+ _fields_ = [('cbData', ctypes.wintypes.DWORD),
+ ('pbData', ctypes.POINTER(ctypes.c_char))]
+
+ buffer = ctypes.create_string_buffer(ciphertext)
+ blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer)
+ blob_out = DATA_BLOB()
+ ret = ctypes.windll.crypt32.CryptUnprotectData(
+ ctypes.byref(blob_in), # pDataIn
+ None, # ppszDataDescr: human readable description of pDataIn
+ None, # pOptionalEntropy: salt?
+ None, # pvReserved: must be NULL
+ None, # pPromptStruct: information about prompts to display
+ 0, # dwFlags
+ ctypes.byref(blob_out) # pDataOut
+ )
+ if not ret:
+ logger.warning('failed to decrypt with DPAPI', only_once=True)
+ return None
+
+ result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
+ ctypes.windll.kernel32.LocalFree(blob_out.pbData)
+ return result
+
+
+def _config_home():
+ return os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config'))
+
+
+def _open_database_copy(database_path, tmpdir):
+ # cannot open sqlite databases if they are already in use (e.g. by the browser)
+ database_copy_path = os.path.join(tmpdir, 'temporary.sqlite')
+ shutil.copy(database_path, database_copy_path)
+ conn = sqlite3.connect(database_copy_path)
+ return conn.cursor()
+
+
+def _get_column_names(cursor, table_name):
+ table_info = cursor.execute(f'PRAGMA table_info({table_name})').fetchall()
+ return [row[1].decode() for row in table_info]
+
+
+def _newest(files):
+ return max(files, key=lambda path: os.lstat(path).st_mtime, default=None)
+
+
+def _find_files(root, filename, logger):
+ # if there are multiple browser profiles, take the most recently used one
+ i = 0
+ with _create_progress_bar(logger) as progress_bar:
+ for curr_root, _, files in os.walk(root):
+ for file in files:
+ i += 1
+ progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched')
+ if file == filename:
+ yield os.path.join(curr_root, file)
+
+
+def _merge_cookie_jars(jars):
+ output_jar = YoutubeDLCookieJar()
+ for jar in jars:
+ for cookie in jar:
+ output_jar.set_cookie(cookie)
+ if jar.filename is not None:
+ output_jar.filename = jar.filename
+ return output_jar
+
+
+def _is_path(value):
+ return any(sep in value for sep in (os.path.sep, os.path.altsep) if sep)
+
+
+def _parse_browser_specification(browser_name, profile=None, keyring=None, container=None):
+ if browser_name not in SUPPORTED_BROWSERS:
+ raise ValueError(f'unsupported browser: "{browser_name}"')
+ if keyring not in (None, *SUPPORTED_KEYRINGS):
+ raise ValueError(f'unsupported keyring: "{keyring}"')
+ if profile is not None and _is_path(expand_path(profile)):
+ profile = expand_path(profile)
+ return browser_name, profile, keyring, container
+
+
+class LenientSimpleCookie(http.cookies.SimpleCookie):
+ """More lenient version of http.cookies.SimpleCookie"""
+ # From https://github.com/python/cpython/blob/v3.10.7/Lib/http/cookies.py
+ # We use Morsel's legal key chars to avoid errors on setting values
+ _LEGAL_KEY_CHARS = r'\w\d' + re.escape('!#$%&\'*+-.:^_`|~')
+ _LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + re.escape('(),/<=>?@[]{}')
+
+ _RESERVED = {
+ "expires",
+ "path",
+ "comment",
+ "domain",
+ "max-age",
+ "secure",
+ "httponly",
+ "version",
+ "samesite",
+ }
+
+ _FLAGS = {"secure", "httponly"}
+
+ # Added 'bad' group to catch the remaining value
+ _COOKIE_PATTERN = re.compile(r"""
+ \s* # Optional whitespace at start of cookie
+ (?P<key> # Start of group 'key'
+ [""" + _LEGAL_KEY_CHARS + r"""]+?# Any word of at least one letter
+ ) # End of group 'key'
+ ( # Optional group: there may not be a value.
+ \s*=\s* # Equal Sign
+ ( # Start of potential value
+ (?P<val> # Start of group 'val'
+ "(?:[^\\"]|\\.)*" # Any doublequoted string
+ | # or
+ \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr
+ | # or
+ [""" + _LEGAL_VALUE_CHARS + r"""]* # Any word or empty string
+ ) # End of group 'val'
+ | # or
+ (?P<bad>(?:\\;|[^;])*?) # 'bad' group fallback for invalid values
+ ) # End of potential value
+ )? # End of optional value group
+ \s* # Any number of spaces.
+ (\s+|;|$) # Ending either at space, semicolon, or EOS.
+ """, re.ASCII | re.VERBOSE)
+
+ def load(self, data):
+ # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4776
+ if not isinstance(data, str):
+ return super().load(data)
+
+ morsel = None
+ for match in self._COOKIE_PATTERN.finditer(data):
+ if match.group('bad'):
+ morsel = None
+ continue
+
+ key, value = match.group('key', 'val')
+
+ is_attribute = False
+ if key.startswith('$'):
+ key = key[1:]
+ is_attribute = True
+
+ lower_key = key.lower()
+ if lower_key in self._RESERVED:
+ if morsel is None:
+ continue
+
+ if value is None:
+ if lower_key not in self._FLAGS:
+ morsel = None
+ continue
+ value = True
+ else:
+ value, _ = self.value_decode(value)
+
+ morsel[key] = value
+
+ elif is_attribute:
+ morsel = None
+
+ elif value is not None:
+ morsel = self.get(key, http.cookies.Morsel())
+ real_value, coded_value = self.value_decode(value)
+ morsel.set(key, real_value, coded_value)
+ self[key] = morsel
+
+ else:
+ morsel = None
+
+
+class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
+ """
+ See [1] for cookie file format.
+
+ 1. https://curl.haxx.se/docs/http-cookies.html
+ """
+ _HTTPONLY_PREFIX = '#HttpOnly_'
+ _ENTRY_LEN = 7
+ _HEADER = '''# Netscape HTTP Cookie File
+# This file is generated by yt-dlp. Do not edit.
+
+'''
+ _CookieFileEntry = collections.namedtuple(
+ 'CookieFileEntry',
+ ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
+
+ def __init__(self, filename=None, *args, **kwargs):
+ super().__init__(None, *args, **kwargs)
+ if is_path_like(filename):
+ filename = os.fspath(filename)
+ self.filename = filename
+
+ @staticmethod
+ def _true_or_false(cndn):
+ return 'TRUE' if cndn else 'FALSE'
+
+ @contextlib.contextmanager
+ def open(self, file, *, write=False):
+ if is_path_like(file):
+ with open(file, 'w' if write else 'r', encoding='utf-8') as f:
+ yield f
+ else:
+ if write:
+ file.truncate(0)
+ yield file
+
+ def _really_save(self, f, ignore_discard, ignore_expires):
+ now = time.time()
+ for cookie in self:
+ if (not ignore_discard and cookie.discard
+ or not ignore_expires and cookie.is_expired(now)):
+ continue
+ name, value = cookie.name, cookie.value
+ if value is None:
+ # cookies.txt regards 'Set-Cookie: foo' as a cookie
+ # with no name, whereas http.cookiejar regards it as a
+ # cookie with no value.
+ name, value = '', name
+ f.write('%s\n' % '\t'.join((
+ cookie.domain,
+ self._true_or_false(cookie.domain.startswith('.')),
+ cookie.path,
+ self._true_or_false(cookie.secure),
+ str_or_none(cookie.expires, default=''),
+ name, value
+ )))
+
+ def save(self, filename=None, ignore_discard=True, ignore_expires=True):
+ """
+ Save cookies to a file.
+ Code is taken from CPython 3.6
+ https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """
+
+ if filename is None:
+ if self.filename is not None:
+ filename = self.filename
+ else:
+ raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
+
+ # Store session cookies with `expires` set to 0 instead of an empty string
+ for cookie in self:
+ if cookie.expires is None:
+ cookie.expires = 0
+
+ with self.open(filename, write=True) as f:
+ f.write(self._HEADER)
+ self._really_save(f, ignore_discard, ignore_expires)
+
+ def load(self, filename=None, ignore_discard=True, ignore_expires=True):
+ """Load cookies from a file."""
+ if filename is None:
+ if self.filename is not None:
+ filename = self.filename
+ else:
+ raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
+
+ def prepare_line(line):
+ if line.startswith(self._HTTPONLY_PREFIX):
+ line = line[len(self._HTTPONLY_PREFIX):]
+ # comments and empty lines are fine
+ if line.startswith('#') or not line.strip():
+ return line
+ cookie_list = line.split('\t')
+ if len(cookie_list) != self._ENTRY_LEN:
+ raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list))
+ cookie = self._CookieFileEntry(*cookie_list)
+ if cookie.expires_at and not cookie.expires_at.isdigit():
+ raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
+ return line
+
+ cf = io.StringIO()
+ with self.open(filename) as f:
+ for line in f:
+ try:
+ cf.write(prepare_line(line))
+ except http.cookiejar.LoadError as e:
+ if f'{line.strip()} '[0] in '[{"':
+ raise http.cookiejar.LoadError(
+ 'Cookies file must be Netscape formatted, not JSON. See '
+ 'https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp')
+ write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
+ continue
+ cf.seek(0)
+ self._really_load(cf, filename, ignore_discard, ignore_expires)
+ # Session cookies are denoted by either `expires` field set to
+ # an empty string or 0. MozillaCookieJar only recognizes the former
+ # (see [1]). So we need force the latter to be recognized as session
+ # cookies on our own.
+ # Session cookies may be important for cookies-based authentication,
+ # e.g. usually, when user does not check 'Remember me' check box while
+ # logging in on a site, some important cookies are stored as session
+ # cookies so that not recognizing them will result in failed login.
+ # 1. https://bugs.python.org/issue17164
+ for cookie in self:
+ # Treat `expires=0` cookies as session cookies
+ if cookie.expires == 0:
+ cookie.expires = None
+ cookie.discard = True
+
+ def get_cookie_header(self, url):
+ """Generate a Cookie HTTP header for a given url"""
+ cookie_req = urllib.request.Request(normalize_url(sanitize_url(url)))
+ self.add_cookie_header(cookie_req)
+ return cookie_req.get_header('Cookie')
+
+ def get_cookies_for_url(self, url):
+ """Generate a list of Cookie objects for a given url"""
+ # Policy `_now` attribute must be set before calling `_cookies_for_request`
+ # Ref: https://github.com/python/cpython/blob/3.7/Lib/http/cookiejar.py#L1360
+ self._policy._now = self._now = int(time.time())
+ return self._cookies_for_request(urllib.request.Request(normalize_url(sanitize_url(url))))
+
+ def clear(self, *args, **kwargs):
+ with contextlib.suppress(KeyError):
+ return super().clear(*args, **kwargs)
diff --git a/yt_dlp/dependencies/Cryptodome.py b/yt_dlp/dependencies/Cryptodome.py
new file mode 100644
index 0000000..2cfa4c9
--- /dev/null
+++ b/yt_dlp/dependencies/Cryptodome.py
@@ -0,0 +1,38 @@
+from ..compat.compat_utils import passthrough_module
+
+try:
+ import Cryptodome as _parent
+except ImportError:
+ try:
+ import Crypto as _parent
+ except (ImportError, SyntaxError): # Old Crypto gives SyntaxError in newer Python
+ _parent = passthrough_module(__name__, 'no_Cryptodome')
+ __bool__ = lambda: False
+
+del passthrough_module
+
+__version__ = ''
+AES = PKCS1_v1_5 = Blowfish = PKCS1_OAEP = SHA1 = CMAC = RSA = None
+try:
+ if _parent.__name__ == 'Cryptodome':
+ from Cryptodome import __version__
+ from Cryptodome.Cipher import AES, PKCS1_OAEP, Blowfish, PKCS1_v1_5
+ from Cryptodome.Hash import CMAC, SHA1
+ from Cryptodome.PublicKey import RSA
+ elif _parent.__name__ == 'Crypto':
+ from Crypto import __version__
+ from Crypto.Cipher import AES, PKCS1_OAEP, Blowfish, PKCS1_v1_5 # noqa: F401
+ from Crypto.Hash import CMAC, SHA1 # noqa: F401
+ from Crypto.PublicKey import RSA # noqa: F401
+except ImportError:
+ __version__ = f'broken {__version__}'.strip()
+
+
+_yt_dlp__identifier = _parent.__name__
+if AES and _yt_dlp__identifier == 'Crypto':
+ try:
+ # In pycrypto, mode defaults to ECB. See:
+ # https://www.pycryptodome.org/en/latest/src/vs_pycrypto.html#:~:text=not%20have%20ECB%20as%20default%20mode
+ AES.new(b'abcdefghijklmnop')
+ except TypeError:
+ _yt_dlp__identifier = 'pycrypto'
diff --git a/yt_dlp/dependencies/__init__.py b/yt_dlp/dependencies/__init__.py
new file mode 100644
index 0000000..9e3f907
--- /dev/null
+++ b/yt_dlp/dependencies/__init__.py
@@ -0,0 +1,92 @@
+# flake8: noqa: F401
+"""Imports all optional dependencies for the project.
+An attribute "_yt_dlp__identifier" may be inserted into the module if it uses an ambiguous namespace"""
+
+try:
+ import brotlicffi as brotli
+except ImportError:
+ try:
+ import brotli
+ except ImportError:
+ brotli = None
+
+
+try:
+ import certifi
+except ImportError:
+ certifi = None
+else:
+ from os.path import exists as _path_exists
+
+ # The certificate may not be bundled in executable
+ if not _path_exists(certifi.where()):
+ certifi = None
+
+
+try:
+ import mutagen
+except ImportError:
+ mutagen = None
+
+
+secretstorage = None
+try:
+ import secretstorage
+ _SECRETSTORAGE_UNAVAILABLE_REASON = None
+except ImportError:
+ _SECRETSTORAGE_UNAVAILABLE_REASON = (
+ 'as the `secretstorage` module is not installed. '
+ 'Please install by running `python3 -m pip install secretstorage`')
+except Exception as _err:
+ _SECRETSTORAGE_UNAVAILABLE_REASON = f'as the `secretstorage` module could not be initialized. {_err}'
+
+
+try:
+ import sqlite3
+ # We need to get the underlying `sqlite` version, see https://github.com/yt-dlp/yt-dlp/issues/8152
+ sqlite3._yt_dlp__version = sqlite3.sqlite_version
+except ImportError:
+ # although sqlite3 is part of the standard library, it is possible to compile Python without
+ # sqlite support. See: https://github.com/yt-dlp/yt-dlp/issues/544
+ sqlite3 = None
+
+
+try:
+ import websockets
+except ImportError:
+ websockets = None
+
+try:
+ import urllib3
+except ImportError:
+ urllib3 = None
+
+try:
+ import requests
+except ImportError:
+ requests = None
+
+try:
+ import xattr # xattr or pyxattr
+except ImportError:
+ xattr = None
+else:
+ if hasattr(xattr, 'set'): # pyxattr
+ xattr._yt_dlp__identifier = 'pyxattr'
+
+
+from . import Cryptodome
+
+all_dependencies = {k: v for k, v in globals().items() if not k.startswith('_')}
+available_dependencies = {k: v for k, v in all_dependencies.items() if v}
+
+
+# Deprecated
+Cryptodome_AES = Cryptodome.AES
+
+
+__all__ = [
+ 'all_dependencies',
+ 'available_dependencies',
+ *all_dependencies.keys(),
+]
diff --git a/yt_dlp/downloader/__init__.py b/yt_dlp/downloader/__init__.py
new file mode 100644
index 0000000..51a9f28
--- /dev/null
+++ b/yt_dlp/downloader/__init__.py
@@ -0,0 +1,131 @@
+from ..utils import NO_DEFAULT, determine_protocol
+
+
+def get_suitable_downloader(info_dict, params={}, default=NO_DEFAULT, protocol=None, to_stdout=False):
+ info_dict['protocol'] = determine_protocol(info_dict)
+ info_copy = info_dict.copy()
+ info_copy['to_stdout'] = to_stdout
+
+ protocols = (protocol or info_copy['protocol']).split('+')
+ downloaders = [_get_suitable_downloader(info_copy, proto, params, default) for proto in protocols]
+
+ if set(downloaders) == {FFmpegFD} and FFmpegFD.can_merge_formats(info_copy, params):
+ return FFmpegFD
+ elif (set(downloaders) == {DashSegmentsFD}
+ and not (to_stdout and len(protocols) > 1)
+ and set(protocols) == {'http_dash_segments_generator'}):
+ return DashSegmentsFD
+ elif len(downloaders) == 1:
+ return downloaders[0]
+ return None
+
+
+# Some of these require get_suitable_downloader
+from .common import FileDownloader
+from .dash import DashSegmentsFD
+from .external import FFmpegFD, get_external_downloader
+from .f4m import F4mFD
+from .fc2 import FC2LiveFD
+from .hls import HlsFD
+from .http import HttpFD
+from .ism import IsmFD
+from .mhtml import MhtmlFD
+from .niconico import NiconicoDmcFD, NiconicoLiveFD
+from .rtmp import RtmpFD
+from .rtsp import RtspFD
+from .websocket import WebSocketFragmentFD
+from .youtube_live_chat import YoutubeLiveChatFD
+
+PROTOCOL_MAP = {
+ 'rtmp': RtmpFD,
+ 'rtmpe': RtmpFD,
+ 'rtmp_ffmpeg': FFmpegFD,
+ 'm3u8_native': HlsFD,
+ 'm3u8': FFmpegFD,
+ 'mms': RtspFD,
+ 'rtsp': RtspFD,
+ 'f4m': F4mFD,
+ 'http_dash_segments': DashSegmentsFD,
+ 'http_dash_segments_generator': DashSegmentsFD,
+ 'ism': IsmFD,
+ 'mhtml': MhtmlFD,
+ 'niconico_dmc': NiconicoDmcFD,
+ 'niconico_live': NiconicoLiveFD,
+ 'fc2_live': FC2LiveFD,
+ 'websocket_frag': WebSocketFragmentFD,
+ 'youtube_live_chat': YoutubeLiveChatFD,
+ 'youtube_live_chat_replay': YoutubeLiveChatFD,
+}
+
+
+def shorten_protocol_name(proto, simplify=False):
+ short_protocol_names = {
+ 'm3u8_native': 'm3u8',
+ 'm3u8': 'm3u8F',
+ 'rtmp_ffmpeg': 'rtmpF',
+ 'http_dash_segments': 'dash',
+ 'http_dash_segments_generator': 'dashG',
+ 'niconico_dmc': 'dmc',
+ 'websocket_frag': 'WSfrag',
+ }
+ if simplify:
+ short_protocol_names.update({
+ 'https': 'http',
+ 'ftps': 'ftp',
+ 'm3u8': 'm3u8', # Reverse above m3u8 mapping
+ 'm3u8_native': 'm3u8',
+ 'http_dash_segments_generator': 'dash',
+ 'rtmp_ffmpeg': 'rtmp',
+ 'm3u8_frag_urls': 'm3u8',
+ 'dash_frag_urls': 'dash',
+ })
+ return short_protocol_names.get(proto, proto)
+
+
+def _get_suitable_downloader(info_dict, protocol, params, default):
+ """Get the downloader class that can handle the info dict."""
+ if default is NO_DEFAULT:
+ default = HttpFD
+
+ if (info_dict.get('section_start') or info_dict.get('section_end')) and FFmpegFD.can_download(info_dict):
+ return FFmpegFD
+
+ info_dict['protocol'] = protocol
+ downloaders = params.get('external_downloader')
+ external_downloader = (
+ downloaders if isinstance(downloaders, str) or downloaders is None
+ else downloaders.get(shorten_protocol_name(protocol, True), downloaders.get('default')))
+
+ if external_downloader is None:
+ if info_dict['to_stdout'] and FFmpegFD.can_merge_formats(info_dict, params):
+ return FFmpegFD
+ elif external_downloader.lower() != 'native':
+ ed = get_external_downloader(external_downloader)
+ if ed.can_download(info_dict, external_downloader):
+ return ed
+
+ if protocol == 'http_dash_segments':
+ if info_dict.get('is_live') and (external_downloader or '').lower() != 'native':
+ return FFmpegFD
+
+ if protocol in ('m3u8', 'm3u8_native'):
+ if info_dict.get('is_live'):
+ return FFmpegFD
+ elif (external_downloader or '').lower() == 'native':
+ return HlsFD
+ elif protocol == 'm3u8_native' and get_suitable_downloader(
+ info_dict, params, None, protocol='m3u8_frag_urls', to_stdout=info_dict['to_stdout']):
+ return HlsFD
+ elif params.get('hls_prefer_native') is True:
+ return HlsFD
+ elif params.get('hls_prefer_native') is False:
+ return FFmpegFD
+
+ return PROTOCOL_MAP.get(protocol, default)
+
+
+__all__ = [
+ 'FileDownloader',
+ 'get_suitable_downloader',
+ 'shorten_protocol_name',
+]
diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py
new file mode 100644
index 0000000..b71d7ee
--- /dev/null
+++ b/yt_dlp/downloader/common.py
@@ -0,0 +1,486 @@
+import contextlib
+import errno
+import functools
+import os
+import random
+import re
+import time
+
+from ..minicurses import (
+ BreaklineStatusPrinter,
+ MultilineLogger,
+ MultilinePrinter,
+ QuietMultilinePrinter,
+)
+from ..utils import (
+ IDENTITY,
+ NO_DEFAULT,
+ LockingUnsupportedError,
+ Namespace,
+ RetryManager,
+ classproperty,
+ decodeArgument,
+ deprecation_warning,
+ encodeFilename,
+ format_bytes,
+ join_nonempty,
+ parse_bytes,
+ remove_start,
+ sanitize_open,
+ shell_quote,
+ timeconvert,
+ timetuple_from_msec,
+ try_call,
+)
+
+
+class FileDownloader:
+ """File Downloader class.
+
+ File downloader objects are the ones responsible of downloading the
+ actual video file and writing it to disk.
+
+ File downloaders accept a lot of parameters. In order not to saturate
+ the object constructor with arguments, it receives a dictionary of
+ options instead.
+
+ Available options:
+
+ verbose: Print additional info to stdout.
+ quiet: Do not print messages to stdout.
+ ratelimit: Download speed limit, in bytes/sec.
+ throttledratelimit: Assume the download is being throttled below this speed (bytes/sec)
+ retries: Number of times to retry for expected network errors.
+ Default is 0 for API, but 10 for CLI
+ file_access_retries: Number of times to retry on file access error (default: 3)
+ buffersize: Size of download buffer in bytes.
+ noresizebuffer: Do not automatically resize the download buffer.
+ continuedl: Try to continue downloads if possible.
+ noprogress: Do not print the progress bar.
+ nopart: Do not use temporary .part files.
+ updatetime: Use the Last-modified header to set output file timestamps.
+ test: Download only first bytes to test the downloader.
+ min_filesize: Skip files smaller than this size
+ max_filesize: Skip files larger than this size
+ xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
+ external_downloader_args: A dictionary of downloader keys (in lower case)
+ and a list of additional command-line arguments for the
+ executable. Use 'default' as the name for arguments to be
+ passed to all downloaders. For compatibility with youtube-dl,
+ a single list of args can also be used
+ hls_use_mpegts: Use the mpegts container for HLS videos.
+ http_chunk_size: Size of a chunk for chunk-based HTTP downloading. May be
+ useful for bypassing bandwidth throttling imposed by
+ a webserver (experimental)
+ progress_template: See YoutubeDL.py
+ retry_sleep_functions: See YoutubeDL.py
+
+ Subclasses of this one must re-define the real_download method.
+ """
+
+ _TEST_FILE_SIZE = 10241
+ params = None
+
+ def __init__(self, ydl, params):
+ """Create a FileDownloader object with the given options."""
+ self._set_ydl(ydl)
+ self._progress_hooks = []
+ self.params = params
+ self._prepare_multiline_status()
+ self.add_progress_hook(self.report_progress)
+
+ def _set_ydl(self, ydl):
+ self.ydl = ydl
+
+ for func in (
+ 'deprecation_warning',
+ 'deprecated_feature',
+ 'report_error',
+ 'report_file_already_downloaded',
+ 'report_warning',
+ 'to_console_title',
+ 'to_stderr',
+ 'trouble',
+ 'write_debug',
+ ):
+ if not hasattr(self, func):
+ setattr(self, func, getattr(ydl, func))
+
+ def to_screen(self, *args, **kargs):
+ self.ydl.to_screen(*args, quiet=self.params.get('quiet'), **kargs)
+
+ __to_screen = to_screen
+
+ @classproperty
+ def FD_NAME(cls):
+ return re.sub(r'(?<=[a-z])(?=[A-Z])', '_', cls.__name__[:-2]).lower()
+
+ @staticmethod
+ def format_seconds(seconds):
+ if seconds is None:
+ return ' Unknown'
+ time = timetuple_from_msec(seconds * 1000)
+ if time.hours > 99:
+ return '--:--:--'
+ return '%02d:%02d:%02d' % time[:-1]
+
+ @classmethod
+ def format_eta(cls, seconds):
+ return f'{remove_start(cls.format_seconds(seconds), "00:"):>8s}'
+
+ @staticmethod
+ def calc_percent(byte_counter, data_len):
+ if data_len is None:
+ return None
+ return float(byte_counter) / float(data_len) * 100.0
+
+ @staticmethod
+ def format_percent(percent):
+ return ' N/A%' if percent is None else f'{percent:>5.1f}%'
+
+ @classmethod
+ def calc_eta(cls, start_or_rate, now_or_remaining, total=NO_DEFAULT, current=NO_DEFAULT):
+ if total is NO_DEFAULT:
+ rate, remaining = start_or_rate, now_or_remaining
+ if None in (rate, remaining):
+ return None
+ return int(float(remaining) / rate)
+
+ start, now = start_or_rate, now_or_remaining
+ if total is None:
+ return None
+ if now is None:
+ now = time.time()
+ rate = cls.calc_speed(start, now, current)
+ return rate and int((float(total) - float(current)) / rate)
+
+ @staticmethod
+ def calc_speed(start, now, bytes):
+ dif = now - start
+ if bytes == 0 or dif < 0.001: # One millisecond
+ return None
+ return float(bytes) / dif
+
+ @staticmethod
+ def format_speed(speed):
+ return ' Unknown B/s' if speed is None else f'{format_bytes(speed):>10s}/s'
+
+ @staticmethod
+ def format_retries(retries):
+ return 'inf' if retries == float('inf') else int(retries)
+
+ @staticmethod
+ def filesize_or_none(unencoded_filename):
+ if os.path.isfile(unencoded_filename):
+ return os.path.getsize(unencoded_filename)
+ return 0
+
+ @staticmethod
+ def best_block_size(elapsed_time, bytes):
+ new_min = max(bytes / 2.0, 1.0)
+ new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
+ if elapsed_time < 0.001:
+ return int(new_max)
+ rate = bytes / elapsed_time
+ if rate > new_max:
+ return int(new_max)
+ if rate < new_min:
+ return int(new_min)
+ return int(rate)
+
+ @staticmethod
+ def parse_bytes(bytestr):
+ """Parse a string indicating a byte quantity into an integer."""
+ deprecation_warning('yt_dlp.FileDownloader.parse_bytes is deprecated and '
+ 'may be removed in the future. Use yt_dlp.utils.parse_bytes instead')
+ return parse_bytes(bytestr)
+
+ def slow_down(self, start_time, now, byte_counter):
+ """Sleep if the download speed is over the rate limit."""
+ rate_limit = self.params.get('ratelimit')
+ if rate_limit is None or byte_counter == 0:
+ return
+ if now is None:
+ now = time.time()
+ elapsed = now - start_time
+ if elapsed <= 0.0:
+ return
+ speed = float(byte_counter) / elapsed
+ if speed > rate_limit:
+ sleep_time = float(byte_counter) / rate_limit - elapsed
+ if sleep_time > 0:
+ time.sleep(sleep_time)
+
+ def temp_name(self, filename):
+ """Returns a temporary filename for the given filename."""
+ if self.params.get('nopart', False) or filename == '-' or \
+ (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
+ return filename
+ return filename + '.part'
+
+ def undo_temp_name(self, filename):
+ if filename.endswith('.part'):
+ return filename[:-len('.part')]
+ return filename
+
+ def ytdl_filename(self, filename):
+ return filename + '.ytdl'
+
+ def wrap_file_access(action, *, fatal=False):
+ def error_callback(err, count, retries, *, fd):
+ return RetryManager.report_retry(
+ err, count, retries, info=fd.__to_screen,
+ warn=lambda e: (time.sleep(0.01), fd.to_screen(f'[download] Unable to {action} file: {e}')),
+ error=None if fatal else lambda e: fd.report_error(f'Unable to {action} file: {e}'),
+ sleep_func=fd.params.get('retry_sleep_functions', {}).get('file_access'))
+
+ def wrapper(self, func, *args, **kwargs):
+ for retry in RetryManager(self.params.get('file_access_retries', 3), error_callback, fd=self):
+ try:
+ return func(self, *args, **kwargs)
+ except OSError as err:
+ if err.errno in (errno.EACCES, errno.EINVAL):
+ retry.error = err
+ continue
+ retry.error_callback(err, 1, 0)
+
+ return functools.partial(functools.partialmethod, wrapper)
+
+ @wrap_file_access('open', fatal=True)
+ def sanitize_open(self, filename, open_mode):
+ f, filename = sanitize_open(filename, open_mode)
+ if not getattr(f, 'locked', None):
+ self.write_debug(f'{LockingUnsupportedError.msg}. Proceeding without locking', only_once=True)
+ return f, filename
+
+ @wrap_file_access('remove')
+ def try_remove(self, filename):
+ if os.path.isfile(filename):
+ os.remove(filename)
+
+ @wrap_file_access('rename')
+ def try_rename(self, old_filename, new_filename):
+ if old_filename == new_filename:
+ return
+ os.replace(old_filename, new_filename)
+
+ def try_utime(self, filename, last_modified_hdr):
+ """Try to set the last-modified time of the given file."""
+ if last_modified_hdr is None:
+ return
+ if not os.path.isfile(encodeFilename(filename)):
+ return
+ timestr = last_modified_hdr
+ if timestr is None:
+ return
+ filetime = timeconvert(timestr)
+ if filetime is None:
+ return filetime
+ # Ignore obviously invalid dates
+ if filetime == 0:
+ return
+ with contextlib.suppress(Exception):
+ os.utime(filename, (time.time(), filetime))
+ return filetime
+
+ def report_destination(self, filename):
+ """Report destination filename."""
+ self.to_screen('[download] Destination: ' + filename)
+
+ def _prepare_multiline_status(self, lines=1):
+ if self.params.get('noprogress'):
+ self._multiline = QuietMultilinePrinter()
+ elif self.ydl.params.get('logger'):
+ self._multiline = MultilineLogger(self.ydl.params['logger'], lines)
+ elif self.params.get('progress_with_newline'):
+ self._multiline = BreaklineStatusPrinter(self.ydl._out_files.out, lines)
+ else:
+ self._multiline = MultilinePrinter(self.ydl._out_files.out, lines, not self.params.get('quiet'))
+ self._multiline.allow_colors = self.ydl._allow_colors.out and self.ydl._allow_colors.out != 'no_color'
+ self._multiline._HAVE_FULLCAP = self.ydl._allow_colors.out
+
+ def _finish_multiline_status(self):
+ self._multiline.end()
+
+ ProgressStyles = Namespace(
+ downloaded_bytes='light blue',
+ percent='light blue',
+ eta='yellow',
+ speed='green',
+ elapsed='bold white',
+ total_bytes='',
+ total_bytes_estimate='',
+ )
+
+ def _report_progress_status(self, s, default_template):
+ for name, style in self.ProgressStyles.items_:
+ name = f'_{name}_str'
+ if name not in s:
+ continue
+ s[name] = self._format_progress(s[name], style)
+ s['_default_template'] = default_template % s
+
+ progress_dict = s.copy()
+ progress_dict.pop('info_dict')
+ progress_dict = {'info': s['info_dict'], 'progress': progress_dict}
+
+ progress_template = self.params.get('progress_template', {})
+ self._multiline.print_at_line(self.ydl.evaluate_outtmpl(
+ progress_template.get('download') or '[download] %(progress._default_template)s',
+ progress_dict), s.get('progress_idx') or 0)
+ self.to_console_title(self.ydl.evaluate_outtmpl(
+ progress_template.get('download-title') or 'yt-dlp %(progress._default_template)s',
+ progress_dict))
+
+ def _format_progress(self, *args, **kwargs):
+ return self.ydl._format_text(
+ self._multiline.stream, self._multiline.allow_colors, *args, **kwargs)
+
+ def report_progress(self, s):
+ def with_fields(*tups, default=''):
+ for *fields, tmpl in tups:
+ if all(s.get(f) is not None for f in fields):
+ return tmpl
+ return default
+
+ _format_bytes = lambda k: f'{format_bytes(s.get(k)):>10s}'
+
+ if s['status'] == 'finished':
+ if self.params.get('noprogress'):
+ self.to_screen('[download] Download completed')
+ speed = try_call(lambda: s['total_bytes'] / s['elapsed'])
+ s.update({
+ 'speed': speed,
+ '_speed_str': self.format_speed(speed).strip(),
+ '_total_bytes_str': _format_bytes('total_bytes'),
+ '_elapsed_str': self.format_seconds(s.get('elapsed')),
+ '_percent_str': self.format_percent(100),
+ })
+ self._report_progress_status(s, join_nonempty(
+ '100%%',
+ with_fields(('total_bytes', 'of %(_total_bytes_str)s')),
+ with_fields(('elapsed', 'in %(_elapsed_str)s')),
+ with_fields(('speed', 'at %(_speed_str)s')),
+ delim=' '))
+
+ if s['status'] != 'downloading':
+ return
+
+ s.update({
+ '_eta_str': self.format_eta(s.get('eta')).strip(),
+ '_speed_str': self.format_speed(s.get('speed')),
+ '_percent_str': self.format_percent(try_call(
+ lambda: 100 * s['downloaded_bytes'] / s['total_bytes'],
+ lambda: 100 * s['downloaded_bytes'] / s['total_bytes_estimate'],
+ lambda: s['downloaded_bytes'] == 0 and 0)),
+ '_total_bytes_str': _format_bytes('total_bytes'),
+ '_total_bytes_estimate_str': _format_bytes('total_bytes_estimate'),
+ '_downloaded_bytes_str': _format_bytes('downloaded_bytes'),
+ '_elapsed_str': self.format_seconds(s.get('elapsed')),
+ })
+
+ msg_template = with_fields(
+ ('total_bytes', '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'),
+ ('total_bytes_estimate', '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'),
+ ('downloaded_bytes', 'elapsed', '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'),
+ ('downloaded_bytes', '%(_downloaded_bytes_str)s at %(_speed_str)s'),
+ default='%(_percent_str)s at %(_speed_str)s ETA %(_eta_str)s')
+
+ msg_template += with_fields(
+ ('fragment_index', 'fragment_count', ' (frag %(fragment_index)s/%(fragment_count)s)'),
+ ('fragment_index', ' (frag %(fragment_index)s)'))
+ self._report_progress_status(s, msg_template)
+
+ def report_resuming_byte(self, resume_len):
+ """Report attempt to resume at given byte."""
+ self.to_screen('[download] Resuming download at byte %s' % resume_len)
+
+ def report_retry(self, err, count, retries, frag_index=NO_DEFAULT, fatal=True):
+ """Report retry"""
+ is_frag = False if frag_index is NO_DEFAULT else 'fragment'
+ RetryManager.report_retry(
+ err, count, retries, info=self.__to_screen,
+ warn=lambda msg: self.__to_screen(f'[download] Got error: {msg}'),
+ error=IDENTITY if not fatal else lambda e: self.report_error(f'\r[download] Got error: {e}'),
+ sleep_func=self.params.get('retry_sleep_functions', {}).get(is_frag or 'http'),
+ suffix=f'fragment{"s" if frag_index is None else f" {frag_index}"}' if is_frag else None)
+
+ def report_unable_to_resume(self):
+ """Report it was impossible to resume download."""
+ self.to_screen('[download] Unable to resume')
+
+ @staticmethod
+ def supports_manifest(manifest):
+ """ Whether the downloader can download the fragments from the manifest.
+ Redefine in subclasses if needed. """
+ pass
+
+ def download(self, filename, info_dict, subtitle=False):
+ """Download to a filename using the info from info_dict
+ Return True on success and False otherwise
+ """
+ nooverwrites_and_exists = (
+ not self.params.get('overwrites', True)
+ and os.path.exists(encodeFilename(filename))
+ )
+
+ if not hasattr(filename, 'write'):
+ continuedl_and_exists = (
+ self.params.get('continuedl', True)
+ and os.path.isfile(encodeFilename(filename))
+ and not self.params.get('nopart', False)
+ )
+
+ # Check file already present
+ if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists):
+ self.report_file_already_downloaded(filename)
+ self._hook_progress({
+ 'filename': filename,
+ 'status': 'finished',
+ 'total_bytes': os.path.getsize(encodeFilename(filename)),
+ }, info_dict)
+ self._finish_multiline_status()
+ return True, False
+
+ if subtitle:
+ sleep_interval = self.params.get('sleep_interval_subtitles') or 0
+ else:
+ min_sleep_interval = self.params.get('sleep_interval') or 0
+ sleep_interval = random.uniform(
+ min_sleep_interval, self.params.get('max_sleep_interval') or min_sleep_interval)
+ if sleep_interval > 0:
+ self.to_screen(f'[download] Sleeping {sleep_interval:.2f} seconds ...')
+ time.sleep(sleep_interval)
+
+ ret = self.real_download(filename, info_dict)
+ self._finish_multiline_status()
+ return ret, True
+
+ def real_download(self, filename, info_dict):
+ """Real download process. Redefine in subclasses."""
+ raise NotImplementedError('This method must be implemented by subclasses')
+
+ def _hook_progress(self, status, info_dict):
+ # Ideally we want to make a copy of the dict, but that is too slow
+ status['info_dict'] = info_dict
+ # youtube-dl passes the same status object to all the hooks.
+ # Some third party scripts seems to be relying on this.
+ # So keep this behavior if possible
+ for ph in self._progress_hooks:
+ ph(status)
+
+ def add_progress_hook(self, ph):
+ # See YoutubeDl.py (search for progress_hooks) for a description of
+ # this interface
+ self._progress_hooks.append(ph)
+
+ def _debug_cmd(self, args, exe=None):
+ if not self.params.get('verbose', False):
+ return
+
+ str_args = [decodeArgument(a) for a in args]
+
+ if exe is None:
+ exe = os.path.basename(str_args[0])
+
+ self.write_debug(f'{exe} command line: {shell_quote(str_args)}')
diff --git a/yt_dlp/downloader/dash.py b/yt_dlp/downloader/dash.py
new file mode 100644
index 0000000..afc79b6
--- /dev/null
+++ b/yt_dlp/downloader/dash.py
@@ -0,0 +1,90 @@
+import time
+import urllib.parse
+
+from . import get_suitable_downloader
+from .fragment import FragmentFD
+from ..utils import update_url_query, urljoin
+
+
+class DashSegmentsFD(FragmentFD):
+ """
+ Download segments in a DASH manifest. External downloaders can take over
+ the fragment downloads by supporting the 'dash_frag_urls' protocol
+ """
+
+ FD_NAME = 'dashsegments'
+
+ def real_download(self, filename, info_dict):
+ if 'http_dash_segments_generator' in info_dict['protocol'].split('+'):
+ real_downloader = None # No external FD can support --live-from-start
+ else:
+ if info_dict.get('is_live'):
+ self.report_error('Live DASH videos are not supported')
+ real_downloader = get_suitable_downloader(
+ info_dict, self.params, None, protocol='dash_frag_urls', to_stdout=(filename == '-'))
+
+ real_start = time.time()
+
+ requested_formats = [{**info_dict, **fmt} for fmt in info_dict.get('requested_formats', [])]
+ args = []
+ for fmt in requested_formats or [info_dict]:
+ try:
+ fragment_count = 1 if self.params.get('test') else len(fmt['fragments'])
+ except TypeError:
+ fragment_count = None
+ ctx = {
+ 'filename': fmt.get('filepath') or filename,
+ 'live': 'is_from_start' if fmt.get('is_from_start') else fmt.get('is_live'),
+ 'total_frags': fragment_count,
+ }
+
+ if real_downloader:
+ self._prepare_external_frag_download(ctx)
+ else:
+ self._prepare_and_start_frag_download(ctx, fmt)
+ ctx['start'] = real_start
+
+ extra_query = None
+ extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url')
+ if extra_param_to_segment_url:
+ extra_query = urllib.parse.parse_qs(extra_param_to_segment_url)
+
+ fragments_to_download = self._get_fragments(fmt, ctx, extra_query)
+
+ if real_downloader:
+ self.to_screen(
+ f'[{self.FD_NAME}] Fragment downloads will be delegated to {real_downloader.get_basename()}')
+ info_dict['fragments'] = list(fragments_to_download)
+ fd = real_downloader(self.ydl, self.params)
+ return fd.real_download(filename, info_dict)
+
+ args.append([ctx, fragments_to_download, fmt])
+
+ return self.download_and_append_fragments_multiple(*args, is_fatal=lambda idx: idx == 0)
+
+ def _resolve_fragments(self, fragments, ctx):
+ fragments = fragments(ctx) if callable(fragments) else fragments
+ return [next(iter(fragments))] if self.params.get('test') else fragments
+
+ def _get_fragments(self, fmt, ctx, extra_query):
+ fragment_base_url = fmt.get('fragment_base_url')
+ fragments = self._resolve_fragments(fmt['fragments'], ctx)
+
+ frag_index = 0
+ for i, fragment in enumerate(fragments):
+ frag_index += 1
+ if frag_index <= ctx['fragment_index']:
+ continue
+ fragment_url = fragment.get('url')
+ if not fragment_url:
+ assert fragment_base_url
+ fragment_url = urljoin(fragment_base_url, fragment['path'])
+ if extra_query:
+ fragment_url = update_url_query(fragment_url, extra_query)
+
+ yield {
+ 'frag_index': frag_index,
+ 'fragment_count': fragment.get('fragment_count'),
+ 'index': i,
+ 'url': fragment_url,
+ }
diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py
new file mode 100644
index 0000000..ce5eeb0
--- /dev/null
+++ b/yt_dlp/downloader/external.py
@@ -0,0 +1,664 @@
+import enum
+import json
+import os
+import re
+import subprocess
+import sys
+import tempfile
+import time
+import uuid
+
+from .fragment import FragmentFD
+from ..compat import functools
+from ..networking import Request
+from ..postprocessor.ffmpeg import EXT_TO_OUT_FORMATS, FFmpegPostProcessor
+from ..utils import (
+ Popen,
+ RetryManager,
+ _configuration_args,
+ check_executable,
+ classproperty,
+ cli_bool_option,
+ cli_option,
+ cli_valueless_option,
+ determine_ext,
+ encodeArgument,
+ encodeFilename,
+ find_available_port,
+ remove_end,
+ traverse_obj,
+)
+
+
+class Features(enum.Enum):
+ TO_STDOUT = enum.auto()
+ MULTIPLE_FORMATS = enum.auto()
+
+
+class ExternalFD(FragmentFD):
+ SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps')
+ SUPPORTED_FEATURES = ()
+ _CAPTURE_STDERR = True
+
+ def real_download(self, filename, info_dict):
+ self.report_destination(filename)
+ tmpfilename = self.temp_name(filename)
+ self._cookies_tempfile = None
+
+ try:
+ started = time.time()
+ retval = self._call_downloader(tmpfilename, info_dict)
+ except KeyboardInterrupt:
+ if not info_dict.get('is_live'):
+ raise
+ # Live stream downloading cancellation should be considered as
+ # correct and expected termination thus all postprocessing
+ # should take place
+ retval = 0
+ self.to_screen('[%s] Interrupted by user' % self.get_basename())
+ finally:
+ if self._cookies_tempfile:
+ self.try_remove(self._cookies_tempfile)
+
+ if retval == 0:
+ status = {
+ 'filename': filename,
+ 'status': 'finished',
+ 'elapsed': time.time() - started,
+ }
+ if filename != '-':
+ fsize = os.path.getsize(encodeFilename(tmpfilename))
+ self.try_rename(tmpfilename, filename)
+ status.update({
+ 'downloaded_bytes': fsize,
+ 'total_bytes': fsize,
+ })
+ self._hook_progress(status, info_dict)
+ return True
+ else:
+ self.to_stderr('\n')
+ self.report_error('%s exited with code %d' % (
+ self.get_basename(), retval))
+ return False
+
+ @classmethod
+ def get_basename(cls):
+ return cls.__name__[:-2].lower()
+
+ @classproperty
+ def EXE_NAME(cls):
+ return cls.get_basename()
+
+ @functools.cached_property
+ def exe(self):
+ return self.EXE_NAME
+
+ @classmethod
+ def available(cls, path=None):
+ path = check_executable(
+ cls.EXE_NAME if path in (None, cls.get_basename()) else path,
+ [cls.AVAILABLE_OPT])
+ if not path:
+ return False
+ cls.exe = path
+ return path
+
+ @classmethod
+ def supports(cls, info_dict):
+ return all((
+ not info_dict.get('to_stdout') or Features.TO_STDOUT in cls.SUPPORTED_FEATURES,
+ '+' not in info_dict['protocol'] or Features.MULTIPLE_FORMATS in cls.SUPPORTED_FEATURES,
+ not traverse_obj(info_dict, ('hls_aes', ...), 'extra_param_to_segment_url'),
+ all(proto in cls.SUPPORTED_PROTOCOLS for proto in info_dict['protocol'].split('+')),
+ ))
+
+ @classmethod
+ def can_download(cls, info_dict, path=None):
+ return cls.available(path) and cls.supports(info_dict)
+
+ def _option(self, command_option, param):
+ return cli_option(self.params, command_option, param)
+
+ def _bool_option(self, command_option, param, true_value='true', false_value='false', separator=None):
+ return cli_bool_option(self.params, command_option, param, true_value, false_value, separator)
+
+ def _valueless_option(self, command_option, param, expected_value=True):
+ return cli_valueless_option(self.params, command_option, param, expected_value)
+
+ def _configuration_args(self, keys=None, *args, **kwargs):
+ return _configuration_args(
+ self.get_basename(), self.params.get('external_downloader_args'), self.EXE_NAME,
+ keys, *args, **kwargs)
+
+ def _write_cookies(self):
+ if not self.ydl.cookiejar.filename:
+ tmp_cookies = tempfile.NamedTemporaryFile(suffix='.cookies', delete=False)
+ tmp_cookies.close()
+ self._cookies_tempfile = tmp_cookies.name
+ self.to_screen(f'[download] Writing temporary cookies file to "{self._cookies_tempfile}"')
+ # real_download resets _cookies_tempfile; if it's None then save() will write to cookiejar.filename
+ self.ydl.cookiejar.save(self._cookies_tempfile)
+ return self.ydl.cookiejar.filename or self._cookies_tempfile
+
+ def _call_downloader(self, tmpfilename, info_dict):
+ """ Either overwrite this or implement _make_cmd """
+ cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
+
+ self._debug_cmd(cmd)
+
+ if 'fragments' not in info_dict:
+ _, stderr, returncode = self._call_process(cmd, info_dict)
+ if returncode and stderr:
+ self.to_stderr(stderr)
+ return returncode
+
+ skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
+
+ retry_manager = RetryManager(self.params.get('fragment_retries'), self.report_retry,
+ frag_index=None, fatal=not skip_unavailable_fragments)
+ for retry in retry_manager:
+ _, stderr, returncode = self._call_process(cmd, info_dict)
+ if not returncode:
+ break
+ # TODO: Decide whether to retry based on error code
+ # https://aria2.github.io/manual/en/html/aria2c.html#exit-status
+ if stderr:
+ self.to_stderr(stderr)
+ retry.error = Exception()
+ continue
+ if not skip_unavailable_fragments and retry_manager.error:
+ return -1
+
+ decrypt_fragment = self.decrypter(info_dict)
+ dest, _ = self.sanitize_open(tmpfilename, 'wb')
+ for frag_index, fragment in enumerate(info_dict['fragments']):
+ fragment_filename = '%s-Frag%d' % (tmpfilename, frag_index)
+ try:
+ src, _ = self.sanitize_open(fragment_filename, 'rb')
+ except OSError as err:
+ if skip_unavailable_fragments and frag_index > 1:
+ self.report_skip_fragment(frag_index, err)
+ continue
+ self.report_error(f'Unable to open fragment {frag_index}; {err}')
+ return -1
+ dest.write(decrypt_fragment(fragment, src.read()))
+ src.close()
+ if not self.params.get('keep_fragments', False):
+ self.try_remove(encodeFilename(fragment_filename))
+ dest.close()
+ self.try_remove(encodeFilename('%s.frag.urls' % tmpfilename))
+ return 0
+
+ def _call_process(self, cmd, info_dict):
+ return Popen.run(cmd, text=True, stderr=subprocess.PIPE if self._CAPTURE_STDERR else None)
+
+
+class CurlFD(ExternalFD):
+ AVAILABLE_OPT = '-V'
+ _CAPTURE_STDERR = False # curl writes the progress to stderr
+
+ def _make_cmd(self, tmpfilename, info_dict):
+ cmd = [self.exe, '--location', '-o', tmpfilename, '--compressed']
+ cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
+ if cookie_header:
+ cmd += ['--cookie', cookie_header]
+ if info_dict.get('http_headers') is not None:
+ for key, val in info_dict['http_headers'].items():
+ cmd += ['--header', f'{key}: {val}']
+
+ cmd += self._bool_option('--continue-at', 'continuedl', '-', '0')
+ cmd += self._valueless_option('--silent', 'noprogress')
+ cmd += self._valueless_option('--verbose', 'verbose')
+ cmd += self._option('--limit-rate', 'ratelimit')
+ retry = self._option('--retry', 'retries')
+ if len(retry) == 2:
+ if retry[1] in ('inf', 'infinite'):
+ retry[1] = '2147483647'
+ cmd += retry
+ cmd += self._option('--max-filesize', 'max_filesize')
+ cmd += self._option('--interface', 'source_address')
+ cmd += self._option('--proxy', 'proxy')
+ cmd += self._valueless_option('--insecure', 'nocheckcertificate')
+ cmd += self._configuration_args()
+ cmd += ['--', info_dict['url']]
+ return cmd
+
+
+class AxelFD(ExternalFD):
+ AVAILABLE_OPT = '-V'
+
+ def _make_cmd(self, tmpfilename, info_dict):
+ cmd = [self.exe, '-o', tmpfilename]
+ if info_dict.get('http_headers') is not None:
+ for key, val in info_dict['http_headers'].items():
+ cmd += ['-H', f'{key}: {val}']
+ cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
+ if cookie_header:
+ cmd += ['-H', f'Cookie: {cookie_header}', '--max-redirect=0']
+ cmd += self._configuration_args()
+ cmd += ['--', info_dict['url']]
+ return cmd
+
+
+class WgetFD(ExternalFD):
+ AVAILABLE_OPT = '--version'
+
+ def _make_cmd(self, tmpfilename, info_dict):
+ cmd = [self.exe, '-O', tmpfilename, '-nv', '--compression=auto']
+ if self.ydl.cookiejar.get_cookie_header(info_dict['url']):
+ cmd += ['--load-cookies', self._write_cookies()]
+ if info_dict.get('http_headers') is not None:
+ for key, val in info_dict['http_headers'].items():
+ cmd += ['--header', f'{key}: {val}']
+ cmd += self._option('--limit-rate', 'ratelimit')
+ retry = self._option('--tries', 'retries')
+ if len(retry) == 2:
+ if retry[1] in ('inf', 'infinite'):
+ retry[1] = '0'
+ cmd += retry
+ cmd += self._option('--bind-address', 'source_address')
+ proxy = self.params.get('proxy')
+ if proxy:
+ for var in ('http_proxy', 'https_proxy'):
+ cmd += ['--execute', f'{var}={proxy}']
+ cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate')
+ cmd += self._configuration_args()
+ cmd += ['--', info_dict['url']]
+ return cmd
+
+
+class Aria2cFD(ExternalFD):
+ AVAILABLE_OPT = '-v'
+ SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'dash_frag_urls', 'm3u8_frag_urls')
+
+ @staticmethod
+ def supports_manifest(manifest):
+ UNSUPPORTED_FEATURES = [
+ r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [1]
+ # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2
+ ]
+ check_results = (not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES)
+ return all(check_results)
+
+ @staticmethod
+ def _aria2c_filename(fn):
+ return fn if os.path.isabs(fn) else f'.{os.path.sep}{fn}'
+
+ def _call_downloader(self, tmpfilename, info_dict):
+ # FIXME: Disabled due to https://github.com/yt-dlp/yt-dlp/issues/5931
+ if False and 'no-external-downloader-progress' not in self.params.get('compat_opts', []):
+ info_dict['__rpc'] = {
+ 'port': find_available_port() or 19190,
+ 'secret': str(uuid.uuid4()),
+ }
+ return super()._call_downloader(tmpfilename, info_dict)
+
+ def _make_cmd(self, tmpfilename, info_dict):
+ cmd = [self.exe, '-c', '--no-conf',
+ '--console-log-level=warn', '--summary-interval=0', '--download-result=hide',
+ '--http-accept-gzip=true', '--file-allocation=none', '-x16', '-j16', '-s16']
+ if 'fragments' in info_dict:
+ cmd += ['--allow-overwrite=true', '--allow-piece-length-change=true']
+ else:
+ cmd += ['--min-split-size', '1M']
+
+ if self.ydl.cookiejar.get_cookie_header(info_dict['url']):
+ cmd += [f'--load-cookies={self._write_cookies()}']
+ if info_dict.get('http_headers') is not None:
+ for key, val in info_dict['http_headers'].items():
+ cmd += ['--header', f'{key}: {val}']
+ cmd += self._option('--max-overall-download-limit', 'ratelimit')
+ cmd += self._option('--interface', 'source_address')
+ cmd += self._option('--all-proxy', 'proxy')
+ cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
+ cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=')
+ cmd += self._bool_option('--show-console-readout', 'noprogress', 'false', 'true', '=')
+ cmd += self._configuration_args()
+
+ if '__rpc' in info_dict:
+ cmd += [
+ '--enable-rpc',
+ f'--rpc-listen-port={info_dict["__rpc"]["port"]}',
+ f'--rpc-secret={info_dict["__rpc"]["secret"]}']
+
+ # aria2c strips out spaces from the beginning/end of filenames and paths.
+ # We work around this issue by adding a "./" to the beginning of the
+ # filename and relative path, and adding a "/" at the end of the path.
+ # See: https://github.com/yt-dlp/yt-dlp/issues/276
+ # https://github.com/ytdl-org/youtube-dl/issues/20312
+ # https://github.com/aria2/aria2/issues/1373
+ dn = os.path.dirname(tmpfilename)
+ if dn:
+ cmd += ['--dir', self._aria2c_filename(dn) + os.path.sep]
+ if 'fragments' not in info_dict:
+ cmd += ['--out', self._aria2c_filename(os.path.basename(tmpfilename))]
+ cmd += ['--auto-file-renaming=false']
+
+ if 'fragments' in info_dict:
+ cmd += ['--uri-selector=inorder']
+ url_list_file = '%s.frag.urls' % tmpfilename
+ url_list = []
+ for frag_index, fragment in enumerate(info_dict['fragments']):
+ fragment_filename = '%s-Frag%d' % (os.path.basename(tmpfilename), frag_index)
+ url_list.append('%s\n\tout=%s' % (fragment['url'], self._aria2c_filename(fragment_filename)))
+ stream, _ = self.sanitize_open(url_list_file, 'wb')
+ stream.write('\n'.join(url_list).encode())
+ stream.close()
+ cmd += ['-i', self._aria2c_filename(url_list_file)]
+ else:
+ cmd += ['--', info_dict['url']]
+ return cmd
+
+ def aria2c_rpc(self, rpc_port, rpc_secret, method, params=()):
+ # Does not actually need to be UUID, just unique
+ sanitycheck = str(uuid.uuid4())
+ d = json.dumps({
+ 'jsonrpc': '2.0',
+ 'id': sanitycheck,
+ 'method': method,
+ 'params': [f'token:{rpc_secret}', *params],
+ }).encode('utf-8')
+ request = Request(
+ f'http://localhost:{rpc_port}/jsonrpc',
+ data=d, headers={
+ 'Content-Type': 'application/json',
+ 'Content-Length': f'{len(d)}',
+ }, proxies={'all': None})
+ with self.ydl.urlopen(request) as r:
+ resp = json.load(r)
+ assert resp.get('id') == sanitycheck, 'Something went wrong with RPC server'
+ return resp['result']
+
+ def _call_process(self, cmd, info_dict):
+ if '__rpc' not in info_dict:
+ return super()._call_process(cmd, info_dict)
+
+ send_rpc = functools.partial(self.aria2c_rpc, info_dict['__rpc']['port'], info_dict['__rpc']['secret'])
+ started = time.time()
+
+ fragmented = 'fragments' in info_dict
+ frag_count = len(info_dict['fragments']) if fragmented else 1
+ status = {
+ 'filename': info_dict.get('_filename'),
+ 'status': 'downloading',
+ 'elapsed': 0,
+ 'downloaded_bytes': 0,
+ 'fragment_count': frag_count if fragmented else None,
+ 'fragment_index': 0 if fragmented else None,
+ }
+ self._hook_progress(status, info_dict)
+
+ def get_stat(key, *obj, average=False):
+ val = tuple(filter(None, map(float, traverse_obj(obj, (..., ..., key))))) or [0]
+ return sum(val) / (len(val) if average else 1)
+
+ with Popen(cmd, text=True, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE) as p:
+ # Add a small sleep so that RPC client can receive response,
+ # or the connection stalls infinitely
+ time.sleep(0.2)
+ retval = p.poll()
+ while retval is None:
+ # We don't use tellStatus as we won't know the GID without reading stdout
+ # Ref: https://aria2.github.io/manual/en/html/aria2c.html#aria2.tellActive
+ active = send_rpc('aria2.tellActive')
+ completed = send_rpc('aria2.tellStopped', [0, frag_count])
+
+ downloaded = get_stat('totalLength', completed) + get_stat('completedLength', active)
+ speed = get_stat('downloadSpeed', active)
+ total = frag_count * get_stat('totalLength', active, completed, average=True)
+ if total < downloaded:
+ total = None
+
+ status.update({
+ 'downloaded_bytes': int(downloaded),
+ 'speed': speed,
+ 'total_bytes': None if fragmented else total,
+ 'total_bytes_estimate': total,
+ 'eta': (total - downloaded) / (speed or 1),
+ 'fragment_index': min(frag_count, len(completed) + 1) if fragmented else None,
+ 'elapsed': time.time() - started
+ })
+ self._hook_progress(status, info_dict)
+
+ if not active and len(completed) >= frag_count:
+ send_rpc('aria2.shutdown')
+ retval = p.wait()
+ break
+
+ time.sleep(0.1)
+ retval = p.poll()
+
+ return '', p.stderr.read(), retval
+
+
+class HttpieFD(ExternalFD):
+ AVAILABLE_OPT = '--version'
+ EXE_NAME = 'http'
+
+ def _make_cmd(self, tmpfilename, info_dict):
+ cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
+
+ if info_dict.get('http_headers') is not None:
+ for key, val in info_dict['http_headers'].items():
+ cmd += [f'{key}:{val}']
+
+ # httpie 3.1.0+ removes the Cookie header on redirect, so this should be safe for now. [1]
+ # If we ever need cookie handling for redirects, we can export the cookiejar into a session. [2]
+ # 1: https://github.com/httpie/httpie/security/advisories/GHSA-9w4w-cpc8-h2fq
+ # 2: https://httpie.io/docs/cli/sessions
+ cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
+ if cookie_header:
+ cmd += [f'Cookie:{cookie_header}']
+ return cmd
+
+
+class FFmpegFD(ExternalFD):
+ SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'm3u8', 'm3u8_native', 'rtsp', 'rtmp', 'rtmp_ffmpeg', 'mms', 'http_dash_segments')
+ SUPPORTED_FEATURES = (Features.TO_STDOUT, Features.MULTIPLE_FORMATS)
+
+ @classmethod
+ def available(cls, path=None):
+ # TODO: Fix path for ffmpeg
+ # Fixme: This may be wrong when --ffmpeg-location is used
+ return FFmpegPostProcessor().available
+
+ def on_process_started(self, proc, stdin):
+ """ Override this in subclasses """
+ pass
+
+ @classmethod
+ def can_merge_formats(cls, info_dict, params):
+ return (
+ info_dict.get('requested_formats')
+ and info_dict.get('protocol')
+ and not params.get('allow_unplayable_formats')
+ and 'no-direct-merge' not in params.get('compat_opts', [])
+ and cls.can_download(info_dict))
+
+ def _call_downloader(self, tmpfilename, info_dict):
+ ffpp = FFmpegPostProcessor(downloader=self)
+ if not ffpp.available:
+ self.report_error('m3u8 download detected but ffmpeg could not be found. Please install')
+ return False
+ ffpp.check_version()
+
+ args = [ffpp.executable, '-y']
+
+ for log_level in ('quiet', 'verbose'):
+ if self.params.get(log_level, False):
+ args += ['-loglevel', log_level]
+ break
+ if not self.params.get('verbose'):
+ args += ['-hide_banner']
+
+ args += traverse_obj(info_dict, ('downloader_options', 'ffmpeg_args'), default=[])
+
+ # These exists only for compatibility. Extractors should use
+ # info_dict['downloader_options']['ffmpeg_args'] instead
+ args += info_dict.get('_ffmpeg_args') or []
+ seekable = info_dict.get('_seekable')
+ if seekable is not None:
+ # setting -seekable prevents ffmpeg from guessing if the server
+ # supports seeking(by adding the header `Range: bytes=0-`), which
+ # can cause problems in some cases
+ # https://github.com/ytdl-org/youtube-dl/issues/11800#issuecomment-275037127
+ # http://trac.ffmpeg.org/ticket/6125#comment:10
+ args += ['-seekable', '1' if seekable else '0']
+
+ env = None
+ proxy = self.params.get('proxy')
+ if proxy:
+ if not re.match(r'^[\da-zA-Z]+://', proxy):
+ proxy = 'http://%s' % proxy
+
+ if proxy.startswith('socks'):
+ self.report_warning(
+ '%s does not support SOCKS proxies. Downloading is likely to fail. '
+ 'Consider adding --hls-prefer-native to your command.' % self.get_basename())
+
+ # Since December 2015 ffmpeg supports -http_proxy option (see
+ # http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd)
+ # We could switch to the following code if we are able to detect version properly
+ # args += ['-http_proxy', proxy]
+ env = os.environ.copy()
+ env['HTTP_PROXY'] = proxy
+ env['http_proxy'] = proxy
+
+ protocol = info_dict.get('protocol')
+
+ if protocol == 'rtmp':
+ player_url = info_dict.get('player_url')
+ page_url = info_dict.get('page_url')
+ app = info_dict.get('app')
+ play_path = info_dict.get('play_path')
+ tc_url = info_dict.get('tc_url')
+ flash_version = info_dict.get('flash_version')
+ live = info_dict.get('rtmp_live', False)
+ conn = info_dict.get('rtmp_conn')
+ if player_url is not None:
+ args += ['-rtmp_swfverify', player_url]
+ if page_url is not None:
+ args += ['-rtmp_pageurl', page_url]
+ if app is not None:
+ args += ['-rtmp_app', app]
+ if play_path is not None:
+ args += ['-rtmp_playpath', play_path]
+ if tc_url is not None:
+ args += ['-rtmp_tcurl', tc_url]
+ if flash_version is not None:
+ args += ['-rtmp_flashver', flash_version]
+ if live:
+ args += ['-rtmp_live', 'live']
+ if isinstance(conn, list):
+ for entry in conn:
+ args += ['-rtmp_conn', entry]
+ elif isinstance(conn, str):
+ args += ['-rtmp_conn', conn]
+
+ start_time, end_time = info_dict.get('section_start') or 0, info_dict.get('section_end')
+
+ selected_formats = info_dict.get('requested_formats') or [info_dict]
+ for i, fmt in enumerate(selected_formats):
+ is_http = re.match(r'^https?://', fmt['url'])
+ cookies = self.ydl.cookiejar.get_cookies_for_url(fmt['url']) if is_http else []
+ if cookies:
+ args.extend(['-cookies', ''.join(
+ f'{cookie.name}={cookie.value}; path={cookie.path}; domain={cookie.domain};\r\n'
+ for cookie in cookies)])
+ if fmt.get('http_headers') and is_http:
+ # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
+ # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
+ args.extend(['-headers', ''.join(f'{key}: {val}\r\n' for key, val in fmt['http_headers'].items())])
+
+ if start_time:
+ args += ['-ss', str(start_time)]
+ if end_time:
+ args += ['-t', str(end_time - start_time)]
+
+ args += self._configuration_args((f'_i{i + 1}', '_i')) + ['-i', fmt['url']]
+
+ if not (start_time or end_time) or not self.params.get('force_keyframes_at_cuts'):
+ args += ['-c', 'copy']
+
+ if info_dict.get('requested_formats') or protocol == 'http_dash_segments':
+ for i, fmt in enumerate(selected_formats):
+ stream_number = fmt.get('manifest_stream_number', 0)
+ args.extend(['-map', f'{i}:{stream_number}'])
+
+ if self.params.get('test', False):
+ args += ['-fs', str(self._TEST_FILE_SIZE)]
+
+ ext = info_dict['ext']
+ if protocol in ('m3u8', 'm3u8_native'):
+ use_mpegts = (tmpfilename == '-') or self.params.get('hls_use_mpegts')
+ if use_mpegts is None:
+ use_mpegts = info_dict.get('is_live')
+ if use_mpegts:
+ args += ['-f', 'mpegts']
+ else:
+ args += ['-f', 'mp4']
+ if (ffpp.basename == 'ffmpeg' and ffpp._features.get('needs_adtstoasc')) and (not info_dict.get('acodec') or info_dict['acodec'].split('.')[0] in ('aac', 'mp4a')):
+ args += ['-bsf:a', 'aac_adtstoasc']
+ elif protocol == 'rtmp':
+ args += ['-f', 'flv']
+ elif ext == 'mp4' and tmpfilename == '-':
+ args += ['-f', 'mpegts']
+ elif ext == 'unknown_video':
+ ext = determine_ext(remove_end(tmpfilename, '.part'))
+ if ext == 'unknown_video':
+ self.report_warning(
+ 'The video format is unknown and cannot be downloaded by ffmpeg. '
+ 'Explicitly set the extension in the filename to attempt download in that format')
+ else:
+ self.report_warning(f'The video format is unknown. Trying to download as {ext} according to the filename')
+ args += ['-f', EXT_TO_OUT_FORMATS.get(ext, ext)]
+ else:
+ args += ['-f', EXT_TO_OUT_FORMATS.get(ext, ext)]
+
+ args += self._configuration_args(('_o1', '_o', ''))
+
+ args = [encodeArgument(opt) for opt in args]
+ args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
+ self._debug_cmd(args)
+
+ piped = any(fmt['url'] in ('-', 'pipe:') for fmt in selected_formats)
+ with Popen(args, stdin=subprocess.PIPE, env=env) as proc:
+ if piped:
+ self.on_process_started(proc, proc.stdin)
+ try:
+ retval = proc.wait()
+ except BaseException as e:
+ # subprocces.run would send the SIGKILL signal to ffmpeg and the
+ # mp4 file couldn't be played, but if we ask ffmpeg to quit it
+ # produces a file that is playable (this is mostly useful for live
+ # streams). Note that Windows is not affected and produces playable
+ # files (see https://github.com/ytdl-org/youtube-dl/issues/8300).
+ if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32' and not piped:
+ proc.communicate_or_kill(b'q')
+ else:
+ proc.kill(timeout=None)
+ raise
+ return retval
+
+
+class AVconvFD(FFmpegFD):
+ pass
+
+
+_BY_NAME = {
+ klass.get_basename(): klass
+ for name, klass in globals().items()
+ if name.endswith('FD') and name not in ('ExternalFD', 'FragmentFD')
+}
+
+
+def list_external_downloaders():
+ return sorted(_BY_NAME.keys())
+
+
+def get_external_downloader(external_downloader):
+ """ Given the name of the executable, see whether we support the given downloader """
+ bn = os.path.splitext(os.path.basename(external_downloader))[0]
+ return _BY_NAME.get(bn) or next((
+ klass for klass in _BY_NAME.values() if klass.EXE_NAME in bn
+ ), None)
diff --git a/yt_dlp/downloader/f4m.py b/yt_dlp/downloader/f4m.py
new file mode 100644
index 0000000..28cbba0
--- /dev/null
+++ b/yt_dlp/downloader/f4m.py
@@ -0,0 +1,427 @@
+import base64
+import io
+import itertools
+import struct
+import time
+import urllib.parse
+
+from .fragment import FragmentFD
+from ..compat import compat_etree_fromstring
+from ..networking.exceptions import HTTPError
+from ..utils import fix_xml_ampersands, xpath_text
+
+
+class DataTruncatedError(Exception):
+ pass
+
+
+class FlvReader(io.BytesIO):
+ """
+ Reader for Flv files
+ The file format is documented in https://www.adobe.com/devnet/f4v.html
+ """
+
+ def read_bytes(self, n):
+ data = self.read(n)
+ if len(data) < n:
+ raise DataTruncatedError(
+ 'FlvReader error: need %d bytes while only %d bytes got' % (
+ n, len(data)))
+ return data
+
+ # Utility functions for reading numbers and strings
+ def read_unsigned_long_long(self):
+ return struct.unpack('!Q', self.read_bytes(8))[0]
+
+ def read_unsigned_int(self):
+ return struct.unpack('!I', self.read_bytes(4))[0]
+
+ def read_unsigned_char(self):
+ return struct.unpack('!B', self.read_bytes(1))[0]
+
+ def read_string(self):
+ res = b''
+ while True:
+ char = self.read_bytes(1)
+ if char == b'\x00':
+ break
+ res += char
+ return res
+
+ def read_box_info(self):
+ """
+ Read a box and return the info as a tuple: (box_size, box_type, box_data)
+ """
+ real_size = size = self.read_unsigned_int()
+ box_type = self.read_bytes(4)
+ header_end = 8
+ if size == 1:
+ real_size = self.read_unsigned_long_long()
+ header_end = 16
+ return real_size, box_type, self.read_bytes(real_size - header_end)
+
+ def read_asrt(self):
+ # version
+ self.read_unsigned_char()
+ # flags
+ self.read_bytes(3)
+ quality_entry_count = self.read_unsigned_char()
+ # QualityEntryCount
+ for i in range(quality_entry_count):
+ self.read_string()
+
+ segment_run_count = self.read_unsigned_int()
+ segments = []
+ for i in range(segment_run_count):
+ first_segment = self.read_unsigned_int()
+ fragments_per_segment = self.read_unsigned_int()
+ segments.append((first_segment, fragments_per_segment))
+
+ return {
+ 'segment_run': segments,
+ }
+
+ def read_afrt(self):
+ # version
+ self.read_unsigned_char()
+ # flags
+ self.read_bytes(3)
+ # time scale
+ self.read_unsigned_int()
+
+ quality_entry_count = self.read_unsigned_char()
+ # QualitySegmentUrlModifiers
+ for i in range(quality_entry_count):
+ self.read_string()
+
+ fragments_count = self.read_unsigned_int()
+ fragments = []
+ for i in range(fragments_count):
+ first = self.read_unsigned_int()
+ first_ts = self.read_unsigned_long_long()
+ duration = self.read_unsigned_int()
+ if duration == 0:
+ discontinuity_indicator = self.read_unsigned_char()
+ else:
+ discontinuity_indicator = None
+ fragments.append({
+ 'first': first,
+ 'ts': first_ts,
+ 'duration': duration,
+ 'discontinuity_indicator': discontinuity_indicator,
+ })
+
+ return {
+ 'fragments': fragments,
+ }
+
+ def read_abst(self):
+ # version
+ self.read_unsigned_char()
+ # flags
+ self.read_bytes(3)
+
+ self.read_unsigned_int() # BootstrapinfoVersion
+ # Profile,Live,Update,Reserved
+ flags = self.read_unsigned_char()
+ live = flags & 0x20 != 0
+ # time scale
+ self.read_unsigned_int()
+ # CurrentMediaTime
+ self.read_unsigned_long_long()
+ # SmpteTimeCodeOffset
+ self.read_unsigned_long_long()
+
+ self.read_string() # MovieIdentifier
+ server_count = self.read_unsigned_char()
+ # ServerEntryTable
+ for i in range(server_count):
+ self.read_string()
+ quality_count = self.read_unsigned_char()
+ # QualityEntryTable
+ for i in range(quality_count):
+ self.read_string()
+ # DrmData
+ self.read_string()
+ # MetaData
+ self.read_string()
+
+ segments_count = self.read_unsigned_char()
+ segments = []
+ for i in range(segments_count):
+ box_size, box_type, box_data = self.read_box_info()
+ assert box_type == b'asrt'
+ segment = FlvReader(box_data).read_asrt()
+ segments.append(segment)
+ fragments_run_count = self.read_unsigned_char()
+ fragments = []
+ for i in range(fragments_run_count):
+ box_size, box_type, box_data = self.read_box_info()
+ assert box_type == b'afrt'
+ fragments.append(FlvReader(box_data).read_afrt())
+
+ return {
+ 'segments': segments,
+ 'fragments': fragments,
+ 'live': live,
+ }
+
+ def read_bootstrap_info(self):
+ total_size, box_type, box_data = self.read_box_info()
+ assert box_type == b'abst'
+ return FlvReader(box_data).read_abst()
+
+
+def read_bootstrap_info(bootstrap_bytes):
+ return FlvReader(bootstrap_bytes).read_bootstrap_info()
+
+
+def build_fragments_list(boot_info):
+ """ Return a list of (segment, fragment) for each fragment in the video """
+ res = []
+ segment_run_table = boot_info['segments'][0]
+ fragment_run_entry_table = boot_info['fragments'][0]['fragments']
+ first_frag_number = fragment_run_entry_table[0]['first']
+ fragments_counter = itertools.count(first_frag_number)
+ for segment, fragments_count in segment_run_table['segment_run']:
+ # In some live HDS streams (e.g. Rai), `fragments_count` is
+ # abnormal and causing out-of-memory errors. It's OK to change the
+ # number of fragments for live streams as they are updated periodically
+ if fragments_count == 4294967295 and boot_info['live']:
+ fragments_count = 2
+ for _ in range(fragments_count):
+ res.append((segment, next(fragments_counter)))
+
+ if boot_info['live']:
+ res = res[-2:]
+
+ return res
+
+
+def write_unsigned_int(stream, val):
+ stream.write(struct.pack('!I', val))
+
+
+def write_unsigned_int_24(stream, val):
+ stream.write(struct.pack('!I', val)[1:])
+
+
+def write_flv_header(stream):
+ """Writes the FLV header to stream"""
+ # FLV header
+ stream.write(b'FLV\x01')
+ stream.write(b'\x05')
+ stream.write(b'\x00\x00\x00\x09')
+ stream.write(b'\x00\x00\x00\x00')
+
+
+def write_metadata_tag(stream, metadata):
+ """Writes optional metadata tag to stream"""
+ SCRIPT_TAG = b'\x12'
+ FLV_TAG_HEADER_LEN = 11
+
+ if metadata:
+ stream.write(SCRIPT_TAG)
+ write_unsigned_int_24(stream, len(metadata))
+ stream.write(b'\x00\x00\x00\x00\x00\x00\x00')
+ stream.write(metadata)
+ write_unsigned_int(stream, FLV_TAG_HEADER_LEN + len(metadata))
+
+
+def remove_encrypted_media(media):
+ return list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib
+ and 'drmAdditionalHeaderSetId' not in e.attrib,
+ media))
+
+
+def _add_ns(prop, ver=1):
+ return '{http://ns.adobe.com/f4m/%d.0}%s' % (ver, prop)
+
+
+def get_base_url(manifest):
+ base_url = xpath_text(
+ manifest, [_add_ns('baseURL'), _add_ns('baseURL', 2)],
+ 'base URL', default=None)
+ if base_url:
+ base_url = base_url.strip()
+ return base_url
+
+
+class F4mFD(FragmentFD):
+ """
+ A downloader for f4m manifests or AdobeHDS.
+ """
+
+ def _get_unencrypted_media(self, doc):
+ media = doc.findall(_add_ns('media'))
+ if not media:
+ self.report_error('No media found')
+ if not self.params.get('allow_unplayable_formats'):
+ for e in (doc.findall(_add_ns('drmAdditionalHeader'))
+ + doc.findall(_add_ns('drmAdditionalHeaderSet'))):
+ # If id attribute is missing it's valid for all media nodes
+ # without drmAdditionalHeaderId or drmAdditionalHeaderSetId attribute
+ if 'id' not in e.attrib:
+ self.report_error('Missing ID in f4m DRM')
+ media = remove_encrypted_media(media)
+ if not media:
+ self.report_error('Unsupported DRM')
+ return media
+
+ def _get_bootstrap_from_url(self, bootstrap_url):
+ bootstrap = self.ydl.urlopen(bootstrap_url).read()
+ return read_bootstrap_info(bootstrap)
+
+ def _update_live_fragments(self, bootstrap_url, latest_fragment):
+ fragments_list = []
+ retries = 30
+ while (not fragments_list) and (retries > 0):
+ boot_info = self._get_bootstrap_from_url(bootstrap_url)
+ fragments_list = build_fragments_list(boot_info)
+ fragments_list = [f for f in fragments_list if f[1] > latest_fragment]
+ if not fragments_list:
+ # Retry after a while
+ time.sleep(5.0)
+ retries -= 1
+
+ if not fragments_list:
+ self.report_error('Failed to update fragments')
+
+ return fragments_list
+
+ def _parse_bootstrap_node(self, node, base_url):
+ # Sometimes non empty inline bootstrap info can be specified along
+ # with bootstrap url attribute (e.g. dummy inline bootstrap info
+ # contains whitespace characters in [1]). We will prefer bootstrap
+ # url over inline bootstrap info when present.
+ # 1. http://live-1-1.rutube.ru/stream/1024/HDS/SD/C2NKsS85HQNckgn5HdEmOQ/1454167650/S-s604419906/move/four/dirs/upper/1024-576p.f4m
+ bootstrap_url = node.get('url')
+ if bootstrap_url:
+ bootstrap_url = urllib.parse.urljoin(
+ base_url, bootstrap_url)
+ boot_info = self._get_bootstrap_from_url(bootstrap_url)
+ else:
+ bootstrap_url = None
+ bootstrap = base64.b64decode(node.text)
+ boot_info = read_bootstrap_info(bootstrap)
+ return boot_info, bootstrap_url
+
+ def real_download(self, filename, info_dict):
+ man_url = info_dict['url']
+ requested_bitrate = info_dict.get('tbr')
+ self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME)
+
+ urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
+ man_url = urlh.url
+ # Some manifests may be malformed, e.g. prosiebensat1 generated manifests
+ # (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244
+ # and https://github.com/ytdl-org/youtube-dl/issues/7823)
+ manifest = fix_xml_ampersands(urlh.read().decode('utf-8', 'ignore')).strip()
+
+ doc = compat_etree_fromstring(manifest)
+ formats = [(int(f.attrib.get('bitrate', -1)), f)
+ for f in self._get_unencrypted_media(doc)]
+ if requested_bitrate is None or len(formats) == 1:
+ # get the best format
+ formats = sorted(formats, key=lambda f: f[0])
+ rate, media = formats[-1]
+ else:
+ rate, media = list(filter(
+ lambda f: int(f[0]) == requested_bitrate, formats))[0]
+
+ # Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec.
+ man_base_url = get_base_url(doc) or man_url
+
+ base_url = urllib.parse.urljoin(man_base_url, media.attrib['url'])
+ bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
+ boot_info, bootstrap_url = self._parse_bootstrap_node(
+ bootstrap_node, man_base_url)
+ live = boot_info['live']
+ metadata_node = media.find(_add_ns('metadata'))
+ if metadata_node is not None:
+ metadata = base64.b64decode(metadata_node.text)
+ else:
+ metadata = None
+
+ fragments_list = build_fragments_list(boot_info)
+ test = self.params.get('test', False)
+ if test:
+ # We only download the first fragment
+ fragments_list = fragments_list[:1]
+ total_frags = len(fragments_list)
+ # For some akamai manifests we'll need to add a query to the fragment url
+ akamai_pv = xpath_text(doc, _add_ns('pv-2.0'))
+
+ ctx = {
+ 'filename': filename,
+ 'total_frags': total_frags,
+ 'live': bool(live),
+ }
+
+ self._prepare_frag_download(ctx)
+
+ dest_stream = ctx['dest_stream']
+
+ if ctx['complete_frags_downloaded_bytes'] == 0:
+ write_flv_header(dest_stream)
+ if not live:
+ write_metadata_tag(dest_stream, metadata)
+
+ base_url_parsed = urllib.parse.urlparse(base_url)
+
+ self._start_frag_download(ctx, info_dict)
+
+ frag_index = 0
+ while fragments_list:
+ seg_i, frag_i = fragments_list.pop(0)
+ frag_index += 1
+ if frag_index <= ctx['fragment_index']:
+ continue
+ name = 'Seg%d-Frag%d' % (seg_i, frag_i)
+ query = []
+ if base_url_parsed.query:
+ query.append(base_url_parsed.query)
+ if akamai_pv:
+ query.append(akamai_pv.strip(';'))
+ if info_dict.get('extra_param_to_segment_url'):
+ query.append(info_dict['extra_param_to_segment_url'])
+ url_parsed = base_url_parsed._replace(path=base_url_parsed.path + name, query='&'.join(query))
+ try:
+ success = self._download_fragment(ctx, url_parsed.geturl(), info_dict)
+ if not success:
+ return False
+ down_data = self._read_fragment(ctx)
+ reader = FlvReader(down_data)
+ while True:
+ try:
+ _, box_type, box_data = reader.read_box_info()
+ except DataTruncatedError:
+ if test:
+ # In tests, segments may be truncated, and thus
+ # FlvReader may not be able to parse the whole
+ # chunk. If so, write the segment as is
+ # See https://github.com/ytdl-org/youtube-dl/issues/9214
+ dest_stream.write(down_data)
+ break
+ raise
+ if box_type == b'mdat':
+ self._append_fragment(ctx, box_data)
+ break
+ except HTTPError as err:
+ if live and (err.status == 404 or err.status == 410):
+ # We didn't keep up with the live window. Continue
+ # with the next available fragment.
+ msg = 'Fragment %d unavailable' % frag_i
+ self.report_warning(msg)
+ fragments_list = []
+ else:
+ raise
+
+ if not fragments_list and not test and live and bootstrap_url:
+ fragments_list = self._update_live_fragments(bootstrap_url, frag_i)
+ total_frags += len(fragments_list)
+ if fragments_list and (fragments_list[0][1] > frag_i + 1):
+ msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1))
+ self.report_warning(msg)
+
+ return self._finish_frag_download(ctx, info_dict)
diff --git a/yt_dlp/downloader/fc2.py b/yt_dlp/downloader/fc2.py
new file mode 100644
index 0000000..f9763de
--- /dev/null
+++ b/yt_dlp/downloader/fc2.py
@@ -0,0 +1,46 @@
+import threading
+
+from .common import FileDownloader
+from .external import FFmpegFD
+
+
+class FC2LiveFD(FileDownloader):
+ """
+ Downloads FC2 live without being stopped. <br>
+ Note, this is not a part of public API, and will be removed without notice.
+ DO NOT USE
+ """
+
+ def real_download(self, filename, info_dict):
+ ws = info_dict['ws']
+
+ heartbeat_lock = threading.Lock()
+ heartbeat_state = [None, 1]
+
+ def heartbeat():
+ if heartbeat_state[1] < 0:
+ return
+
+ try:
+ heartbeat_state[1] += 1
+ ws.send('{"name":"heartbeat","arguments":{},"id":%d}' % heartbeat_state[1])
+ except Exception:
+ self.to_screen('[fc2:live] Heartbeat failed')
+
+ with heartbeat_lock:
+ heartbeat_state[0] = threading.Timer(30, heartbeat)
+ heartbeat_state[0]._daemonic = True
+ heartbeat_state[0].start()
+
+ heartbeat()
+
+ new_info_dict = info_dict.copy()
+ new_info_dict.update({
+ 'ws': None,
+ 'protocol': 'live_ffmpeg',
+ })
+ try:
+ return FFmpegFD(self.ydl, self.params or {}).download(filename, new_info_dict)
+ finally:
+ # stop heartbeating
+ heartbeat_state[1] = -1
diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py
new file mode 100644
index 0000000..b4f003d
--- /dev/null
+++ b/yt_dlp/downloader/fragment.py
@@ -0,0 +1,527 @@
+import concurrent.futures
+import contextlib
+import json
+import math
+import os
+import struct
+import time
+
+from .common import FileDownloader
+from .http import HttpFD
+from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
+from ..compat import compat_os_name
+from ..networking import Request
+from ..networking.exceptions import HTTPError, IncompleteRead
+from ..utils import DownloadError, RetryManager, encodeFilename, traverse_obj
+from ..utils.networking import HTTPHeaderDict
+from ..utils.progress import ProgressCalculator
+
+
+class HttpQuietDownloader(HttpFD):
+ def to_screen(self, *args, **kargs):
+ pass
+
+ to_console_title = to_screen
+
+
+class FragmentFD(FileDownloader):
+ """
+ A base file downloader class for fragmented media (e.g. f4m/m3u8 manifests).
+
+ Available options:
+
+ fragment_retries: Number of times to retry a fragment for HTTP error
+ (DASH and hlsnative only). Default is 0 for API, but 10 for CLI
+ skip_unavailable_fragments:
+ Skip unavailable fragments (DASH and hlsnative only)
+ keep_fragments: Keep downloaded fragments on disk after downloading is
+ finished
+ concurrent_fragment_downloads: The number of threads to use for native hls and dash downloads
+ _no_ytdl_file: Don't use .ytdl file
+
+ For each incomplete fragment download yt-dlp keeps on disk a special
+ bookkeeping file with download state and metadata (in future such files will
+ be used for any incomplete download handled by yt-dlp). This file is
+ used to properly handle resuming, check download file consistency and detect
+ potential errors. The file has a .ytdl extension and represents a standard
+ JSON file of the following format:
+
+ extractor:
+ Dictionary of extractor related data. TBD.
+
+ downloader:
+ Dictionary of downloader related data. May contain following data:
+ current_fragment:
+ Dictionary with current (being downloaded) fragment data:
+ index: 0-based index of current fragment among all fragments
+ fragment_count:
+ Total count of fragments
+
+ This feature is experimental and file format may change in future.
+ """
+
+ def report_retry_fragment(self, err, frag_index, count, retries):
+ self.deprecation_warning('yt_dlp.downloader.FragmentFD.report_retry_fragment is deprecated. '
+ 'Use yt_dlp.downloader.FileDownloader.report_retry instead')
+ return self.report_retry(err, count, retries, frag_index)
+
+ def report_skip_fragment(self, frag_index, err=None):
+ err = f' {err};' if err else ''
+ self.to_screen(f'[download]{err} Skipping fragment {frag_index:d} ...')
+
+ def _prepare_url(self, info_dict, url):
+ headers = info_dict.get('http_headers')
+ return Request(url, None, headers) if headers else url
+
+ def _prepare_and_start_frag_download(self, ctx, info_dict):
+ self._prepare_frag_download(ctx)
+ self._start_frag_download(ctx, info_dict)
+
+ def __do_ytdl_file(self, ctx):
+ return ctx['live'] is not True and ctx['tmpfilename'] != '-' and not self.params.get('_no_ytdl_file')
+
+ def _read_ytdl_file(self, ctx):
+ assert 'ytdl_corrupt' not in ctx
+ stream, _ = self.sanitize_open(self.ytdl_filename(ctx['filename']), 'r')
+ try:
+ ytdl_data = json.loads(stream.read())
+ ctx['fragment_index'] = ytdl_data['downloader']['current_fragment']['index']
+ if 'extra_state' in ytdl_data['downloader']:
+ ctx['extra_state'] = ytdl_data['downloader']['extra_state']
+ except Exception:
+ ctx['ytdl_corrupt'] = True
+ finally:
+ stream.close()
+
+ def _write_ytdl_file(self, ctx):
+ frag_index_stream, _ = self.sanitize_open(self.ytdl_filename(ctx['filename']), 'w')
+ try:
+ downloader = {
+ 'current_fragment': {
+ 'index': ctx['fragment_index'],
+ },
+ }
+ if 'extra_state' in ctx:
+ downloader['extra_state'] = ctx['extra_state']
+ if ctx.get('fragment_count') is not None:
+ downloader['fragment_count'] = ctx['fragment_count']
+ frag_index_stream.write(json.dumps({'downloader': downloader}))
+ finally:
+ frag_index_stream.close()
+
+ def _download_fragment(self, ctx, frag_url, info_dict, headers=None, request_data=None):
+ fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index'])
+ fragment_info_dict = {
+ 'url': frag_url,
+ 'http_headers': headers or info_dict.get('http_headers'),
+ 'request_data': request_data,
+ 'ctx_id': ctx.get('ctx_id'),
+ }
+ frag_resume_len = 0
+ if ctx['dl'].params.get('continuedl', True):
+ frag_resume_len = self.filesize_or_none(self.temp_name(fragment_filename))
+ fragment_info_dict['frag_resume_len'] = ctx['frag_resume_len'] = frag_resume_len
+
+ success, _ = ctx['dl'].download(fragment_filename, fragment_info_dict)
+ if not success:
+ return False
+ if fragment_info_dict.get('filetime'):
+ ctx['fragment_filetime'] = fragment_info_dict.get('filetime')
+ ctx['fragment_filename_sanitized'] = fragment_filename
+ return True
+
+ def _read_fragment(self, ctx):
+ if not ctx.get('fragment_filename_sanitized'):
+ return None
+ try:
+ down, frag_sanitized = self.sanitize_open(ctx['fragment_filename_sanitized'], 'rb')
+ except FileNotFoundError:
+ if ctx.get('live'):
+ return None
+ raise
+ ctx['fragment_filename_sanitized'] = frag_sanitized
+ frag_content = down.read()
+ down.close()
+ return frag_content
+
+ def _append_fragment(self, ctx, frag_content):
+ try:
+ ctx['dest_stream'].write(frag_content)
+ ctx['dest_stream'].flush()
+ finally:
+ if self.__do_ytdl_file(ctx):
+ self._write_ytdl_file(ctx)
+ if not self.params.get('keep_fragments', False):
+ self.try_remove(encodeFilename(ctx['fragment_filename_sanitized']))
+ del ctx['fragment_filename_sanitized']
+
+ def _prepare_frag_download(self, ctx):
+ if not ctx.setdefault('live', False):
+ total_frags_str = '%d' % ctx['total_frags']
+ ad_frags = ctx.get('ad_frags', 0)
+ if ad_frags:
+ total_frags_str += ' (not including %d ad)' % ad_frags
+ else:
+ total_frags_str = 'unknown (live)'
+ self.to_screen(f'[{self.FD_NAME}] Total fragments: {total_frags_str}')
+ self.report_destination(ctx['filename'])
+ dl = HttpQuietDownloader(self.ydl, {
+ **self.params,
+ 'noprogress': True,
+ 'test': False,
+ 'sleep_interval': 0,
+ 'max_sleep_interval': 0,
+ 'sleep_interval_subtitles': 0,
+ })
+ tmpfilename = self.temp_name(ctx['filename'])
+ open_mode = 'wb'
+
+ # Establish possible resume length
+ resume_len = self.filesize_or_none(tmpfilename)
+ if resume_len > 0:
+ open_mode = 'ab'
+
+ # Should be initialized before ytdl file check
+ ctx.update({
+ 'tmpfilename': tmpfilename,
+ 'fragment_index': 0,
+ })
+
+ if self.__do_ytdl_file(ctx):
+ ytdl_file_exists = os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename'])))
+ continuedl = self.params.get('continuedl', True)
+ if continuedl and ytdl_file_exists:
+ self._read_ytdl_file(ctx)
+ is_corrupt = ctx.get('ytdl_corrupt') is True
+ is_inconsistent = ctx['fragment_index'] > 0 and resume_len == 0
+ if is_corrupt or is_inconsistent:
+ message = (
+ '.ytdl file is corrupt' if is_corrupt else
+ 'Inconsistent state of incomplete fragment download')
+ self.report_warning(
+ '%s. Restarting from the beginning ...' % message)
+ ctx['fragment_index'] = resume_len = 0
+ if 'ytdl_corrupt' in ctx:
+ del ctx['ytdl_corrupt']
+ self._write_ytdl_file(ctx)
+
+ else:
+ if not continuedl:
+ if ytdl_file_exists:
+ self._read_ytdl_file(ctx)
+ ctx['fragment_index'] = resume_len = 0
+ self._write_ytdl_file(ctx)
+ assert ctx['fragment_index'] == 0
+
+ dest_stream, tmpfilename = self.sanitize_open(tmpfilename, open_mode)
+
+ ctx.update({
+ 'dl': dl,
+ 'dest_stream': dest_stream,
+ 'tmpfilename': tmpfilename,
+ # Total complete fragments downloaded so far in bytes
+ 'complete_frags_downloaded_bytes': resume_len,
+ })
+
+ def _start_frag_download(self, ctx, info_dict):
+ resume_len = ctx['complete_frags_downloaded_bytes']
+ total_frags = ctx['total_frags']
+ ctx_id = ctx.get('ctx_id')
+ # Stores the download progress, updated by the progress hook
+ state = {
+ 'status': 'downloading',
+ 'downloaded_bytes': resume_len,
+ 'fragment_index': ctx['fragment_index'],
+ 'fragment_count': total_frags,
+ 'filename': ctx['filename'],
+ 'tmpfilename': ctx['tmpfilename'],
+ }
+
+ ctx['started'] = time.time()
+ progress = ProgressCalculator(resume_len)
+
+ def frag_progress_hook(s):
+ if s['status'] not in ('downloading', 'finished'):
+ return
+
+ if not total_frags and ctx.get('fragment_count'):
+ state['fragment_count'] = ctx['fragment_count']
+
+ if ctx_id is not None and s.get('ctx_id') != ctx_id:
+ return
+
+ state['max_progress'] = ctx.get('max_progress')
+ state['progress_idx'] = ctx.get('progress_idx')
+
+ state['elapsed'] = progress.elapsed
+ frag_total_bytes = s.get('total_bytes') or 0
+ s['fragment_info_dict'] = s.pop('info_dict', {})
+
+ # XXX: Fragment resume is not accounted for here
+ if not ctx['live']:
+ estimated_size = (
+ (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes)
+ / (state['fragment_index'] + 1) * total_frags)
+ progress.total = estimated_size
+ progress.update(s.get('downloaded_bytes'))
+ state['total_bytes_estimate'] = progress.total
+ else:
+ progress.update(s.get('downloaded_bytes'))
+
+ if s['status'] == 'finished':
+ state['fragment_index'] += 1
+ ctx['fragment_index'] = state['fragment_index']
+ progress.thread_reset()
+
+ state['downloaded_bytes'] = ctx['complete_frags_downloaded_bytes'] = progress.downloaded
+ state['speed'] = ctx['speed'] = progress.speed.smooth
+ state['eta'] = progress.eta.smooth
+
+ self._hook_progress(state, info_dict)
+
+ ctx['dl'].add_progress_hook(frag_progress_hook)
+
+ return ctx['started']
+
+ def _finish_frag_download(self, ctx, info_dict):
+ ctx['dest_stream'].close()
+ if self.__do_ytdl_file(ctx):
+ self.try_remove(self.ytdl_filename(ctx['filename']))
+ elapsed = time.time() - ctx['started']
+
+ to_file = ctx['tmpfilename'] != '-'
+ if to_file:
+ downloaded_bytes = self.filesize_or_none(ctx['tmpfilename'])
+ else:
+ downloaded_bytes = ctx['complete_frags_downloaded_bytes']
+
+ if not downloaded_bytes:
+ if to_file:
+ self.try_remove(ctx['tmpfilename'])
+ self.report_error('The downloaded file is empty')
+ return False
+ elif to_file:
+ self.try_rename(ctx['tmpfilename'], ctx['filename'])
+ filetime = ctx.get('fragment_filetime')
+ if self.params.get('updatetime', True) and filetime:
+ with contextlib.suppress(Exception):
+ os.utime(ctx['filename'], (time.time(), filetime))
+
+ self._hook_progress({
+ 'downloaded_bytes': downloaded_bytes,
+ 'total_bytes': downloaded_bytes,
+ 'filename': ctx['filename'],
+ 'status': 'finished',
+ 'elapsed': elapsed,
+ 'ctx_id': ctx.get('ctx_id'),
+ 'max_progress': ctx.get('max_progress'),
+ 'progress_idx': ctx.get('progress_idx'),
+ }, info_dict)
+ return True
+
+ def _prepare_external_frag_download(self, ctx):
+ if 'live' not in ctx:
+ ctx['live'] = False
+ if not ctx['live']:
+ total_frags_str = '%d' % ctx['total_frags']
+ ad_frags = ctx.get('ad_frags', 0)
+ if ad_frags:
+ total_frags_str += ' (not including %d ad)' % ad_frags
+ else:
+ total_frags_str = 'unknown (live)'
+ self.to_screen(f'[{self.FD_NAME}] Total fragments: {total_frags_str}')
+
+ tmpfilename = self.temp_name(ctx['filename'])
+
+ # Should be initialized before ytdl file check
+ ctx.update({
+ 'tmpfilename': tmpfilename,
+ 'fragment_index': 0,
+ })
+
+ def decrypter(self, info_dict):
+ _key_cache = {}
+
+ def _get_key(url):
+ if url not in _key_cache:
+ _key_cache[url] = self.ydl.urlopen(self._prepare_url(info_dict, url)).read()
+ return _key_cache[url]
+
+ def decrypt_fragment(fragment, frag_content):
+ if frag_content is None:
+ return
+ decrypt_info = fragment.get('decrypt_info')
+ if not decrypt_info or decrypt_info['METHOD'] != 'AES-128':
+ return frag_content
+ iv = decrypt_info.get('IV') or struct.pack('>8xq', fragment['media_sequence'])
+ decrypt_info['KEY'] = (decrypt_info.get('KEY')
+ or _get_key(traverse_obj(info_dict, ('hls_aes', 'uri')) or decrypt_info['URI']))
+ # Don't decrypt the content in tests since the data is explicitly truncated and it's not to a valid block
+ # size (see https://github.com/ytdl-org/youtube-dl/pull/27660). Tests only care that the correct data downloaded,
+ # not what it decrypts to.
+ if self.params.get('test', False):
+ return frag_content
+ return unpad_pkcs7(aes_cbc_decrypt_bytes(frag_content, decrypt_info['KEY'], iv))
+
+ return decrypt_fragment
+
+ def download_and_append_fragments_multiple(self, *args, **kwargs):
+ '''
+ @params (ctx1, fragments1, info_dict1), (ctx2, fragments2, info_dict2), ...
+ all args must be either tuple or list
+ '''
+ interrupt_trigger = [True]
+ max_progress = len(args)
+ if max_progress == 1:
+ return self.download_and_append_fragments(*args[0], **kwargs)
+ max_workers = self.params.get('concurrent_fragment_downloads', 1)
+ if max_progress > 1:
+ self._prepare_multiline_status(max_progress)
+ is_live = any(traverse_obj(args, (..., 2, 'is_live')))
+
+ def thread_func(idx, ctx, fragments, info_dict, tpe):
+ ctx['max_progress'] = max_progress
+ ctx['progress_idx'] = idx
+ return self.download_and_append_fragments(
+ ctx, fragments, info_dict, **kwargs, tpe=tpe, interrupt_trigger=interrupt_trigger)
+
+ class FTPE(concurrent.futures.ThreadPoolExecutor):
+ # has to stop this or it's going to wait on the worker thread itself
+ def __exit__(self, exc_type, exc_val, exc_tb):
+ pass
+
+ if compat_os_name == 'nt':
+ def future_result(future):
+ while True:
+ try:
+ return future.result(0.1)
+ except KeyboardInterrupt:
+ raise
+ except concurrent.futures.TimeoutError:
+ continue
+ else:
+ def future_result(future):
+ return future.result()
+
+ def interrupt_trigger_iter(fg):
+ for f in fg:
+ if not interrupt_trigger[0]:
+ break
+ yield f
+
+ spins = []
+ for idx, (ctx, fragments, info_dict) in enumerate(args):
+ tpe = FTPE(math.ceil(max_workers / max_progress))
+ job = tpe.submit(thread_func, idx, ctx, interrupt_trigger_iter(fragments), info_dict, tpe)
+ spins.append((tpe, job))
+
+ result = True
+ for tpe, job in spins:
+ try:
+ result = result and future_result(job)
+ except KeyboardInterrupt:
+ interrupt_trigger[0] = False
+ finally:
+ tpe.shutdown(wait=True)
+ if not interrupt_trigger[0] and not is_live:
+ raise KeyboardInterrupt()
+ # we expect the user wants to stop and DO WANT the preceding postprocessors to run;
+ # so returning a intermediate result here instead of KeyboardInterrupt on live
+ return result
+
+ def download_and_append_fragments(
+ self, ctx, fragments, info_dict, *, is_fatal=(lambda idx: False),
+ pack_func=(lambda content, idx: content), finish_func=None,
+ tpe=None, interrupt_trigger=(True, )):
+
+ if not self.params.get('skip_unavailable_fragments', True):
+ is_fatal = lambda _: True
+
+ def download_fragment(fragment, ctx):
+ if not interrupt_trigger[0]:
+ return
+
+ frag_index = ctx['fragment_index'] = fragment['frag_index']
+ ctx['last_error'] = None
+ headers = HTTPHeaderDict(info_dict.get('http_headers'))
+ byte_range = fragment.get('byte_range')
+ if byte_range:
+ headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1)
+
+ # Never skip the first fragment
+ fatal = is_fatal(fragment.get('index') or (frag_index - 1))
+
+ def error_callback(err, count, retries):
+ if fatal and count > retries:
+ ctx['dest_stream'].close()
+ self.report_retry(err, count, retries, frag_index, fatal)
+ ctx['last_error'] = err
+
+ for retry in RetryManager(self.params.get('fragment_retries'), error_callback):
+ try:
+ ctx['fragment_count'] = fragment.get('fragment_count')
+ if not self._download_fragment(
+ ctx, fragment['url'], info_dict, headers, info_dict.get('request_data')):
+ return
+ except (HTTPError, IncompleteRead) as err:
+ retry.error = err
+ continue
+ except DownloadError: # has own retry settings
+ if fatal:
+ raise
+
+ def append_fragment(frag_content, frag_index, ctx):
+ if frag_content:
+ self._append_fragment(ctx, pack_func(frag_content, frag_index))
+ elif not is_fatal(frag_index - 1):
+ self.report_skip_fragment(frag_index, 'fragment not found')
+ else:
+ ctx['dest_stream'].close()
+ self.report_error(f'fragment {frag_index} not found, unable to continue')
+ return False
+ return True
+
+ decrypt_fragment = self.decrypter(info_dict)
+
+ max_workers = math.ceil(
+ self.params.get('concurrent_fragment_downloads', 1) / ctx.get('max_progress', 1))
+ if max_workers > 1:
+ def _download_fragment(fragment):
+ ctx_copy = ctx.copy()
+ download_fragment(fragment, ctx_copy)
+ return fragment, fragment['frag_index'], ctx_copy.get('fragment_filename_sanitized')
+
+ with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool:
+ try:
+ for fragment, frag_index, frag_filename in pool.map(_download_fragment, fragments):
+ ctx.update({
+ 'fragment_filename_sanitized': frag_filename,
+ 'fragment_index': frag_index,
+ })
+ if not append_fragment(decrypt_fragment(fragment, self._read_fragment(ctx)), frag_index, ctx):
+ return False
+ except KeyboardInterrupt:
+ self._finish_multiline_status()
+ self.report_error(
+ 'Interrupted by user. Waiting for all threads to shutdown...', is_error=False, tb=False)
+ pool.shutdown(wait=False)
+ raise
+ else:
+ for fragment in fragments:
+ if not interrupt_trigger[0]:
+ break
+ try:
+ download_fragment(fragment, ctx)
+ result = append_fragment(
+ decrypt_fragment(fragment, self._read_fragment(ctx)), fragment['frag_index'], ctx)
+ except KeyboardInterrupt:
+ if info_dict.get('is_live'):
+ break
+ raise
+ if not result:
+ return False
+
+ if finish_func is not None:
+ ctx['dest_stream'].write(finish_func())
+ ctx['dest_stream'].flush()
+ return self._finish_frag_download(ctx, info_dict)
diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py
new file mode 100644
index 0000000..4ac5d99
--- /dev/null
+++ b/yt_dlp/downloader/hls.py
@@ -0,0 +1,378 @@
+import binascii
+import io
+import re
+import urllib.parse
+
+from . import get_suitable_downloader
+from .external import FFmpegFD
+from .fragment import FragmentFD
+from .. import webvtt
+from ..dependencies import Cryptodome
+from ..utils import (
+ bug_reports_message,
+ parse_m3u8_attributes,
+ remove_start,
+ traverse_obj,
+ update_url_query,
+ urljoin,
+)
+
+
+class HlsFD(FragmentFD):
+ """
+ Download segments in a m3u8 manifest. External downloaders can take over
+ the fragment downloads by supporting the 'm3u8_frag_urls' protocol and
+ re-defining 'supports_manifest' function
+ """
+
+ FD_NAME = 'hlsnative'
+
+ @staticmethod
+ def _has_drm(manifest): # TODO: https://github.com/yt-dlp/yt-dlp/pull/5039
+ return bool(re.search('|'.join((
+ r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', # Apple FairPlay
+ r'#EXT-X-(?:SESSION-)?KEY:.*?KEYFORMAT="com\.apple\.streamingkeydelivery"', # Apple FairPlay
+ r'#EXT-X-(?:SESSION-)?KEY:.*?KEYFORMAT="com\.microsoft\.playready"', # Microsoft PlayReady
+ r'#EXT-X-FAXS-CM:', # Adobe Flash Access
+ )), manifest))
+
+ @classmethod
+ def can_download(cls, manifest, info_dict, allow_unplayable_formats=False):
+ UNSUPPORTED_FEATURES = [
+ # r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2]
+
+ # Live streams heuristic does not always work (e.g. geo restricted to Germany
+ # http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0)
+ # r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3]
+
+ # This heuristic also is not correct since segments may not be appended as well.
+ # Twitch vods of finished streams have EXT-X-PLAYLIST-TYPE:EVENT despite
+ # no segments will definitely be appended to the end of the playlist.
+ # r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of
+ # # event media playlists [4]
+ # r'#EXT-X-MAP:', # media initialization [5]
+ # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4
+ # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2
+ # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2
+ # 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5
+ # 5. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.5
+ ]
+ if not allow_unplayable_formats:
+ UNSUPPORTED_FEATURES += [
+ r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1], but not necessarily DRM
+ ]
+
+ def check_results():
+ yield not info_dict.get('is_live')
+ for feature in UNSUPPORTED_FEATURES:
+ yield not re.search(feature, manifest)
+ if not allow_unplayable_formats:
+ yield not cls._has_drm(manifest)
+ return all(check_results())
+
+ def real_download(self, filename, info_dict):
+ man_url = info_dict['url']
+ self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
+
+ urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
+ man_url = urlh.url
+ s = urlh.read().decode('utf-8', 'ignore')
+
+ can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None
+ if can_download:
+ has_ffmpeg = FFmpegFD.available()
+ no_crypto = not Cryptodome.AES and '#EXT-X-KEY:METHOD=AES-128' in s
+ if no_crypto and has_ffmpeg:
+ can_download, message = False, 'The stream has AES-128 encryption and pycryptodomex is not available'
+ elif no_crypto:
+ message = ('The stream has AES-128 encryption and neither ffmpeg nor pycryptodomex are available; '
+ 'Decryption will be performed natively, but will be extremely slow')
+ elif info_dict.get('extractor_key') == 'Generic' and re.search(r'(?m)#EXT-X-MEDIA-SEQUENCE:(?!0$)', s):
+ install_ffmpeg = '' if has_ffmpeg else 'install ffmpeg and '
+ message = ('Live HLS streams are not supported by the native downloader. If this is a livestream, '
+ f'please {install_ffmpeg}add "--downloader ffmpeg --hls-use-mpegts" to your command')
+ if not can_download:
+ if self._has_drm(s) and not self.params.get('allow_unplayable_formats'):
+ if info_dict.get('has_drm') and self.params.get('test'):
+ self.to_screen(f'[{self.FD_NAME}] This format is DRM protected', skip_eol=True)
+ else:
+ self.report_error(
+ 'This format is DRM protected; Try selecting another format with --format or '
+ 'add --check-formats to automatically fallback to the next best format', tb=False)
+ return False
+ message = message or 'Unsupported features have been detected'
+ fd = FFmpegFD(self.ydl, self.params)
+ self.report_warning(f'{message}; extraction will be delegated to {fd.get_basename()}')
+ return fd.real_download(filename, info_dict)
+ elif message:
+ self.report_warning(message)
+
+ is_webvtt = info_dict['ext'] == 'vtt'
+ if is_webvtt:
+ real_downloader = None # Packing the fragments is not currently supported for external downloader
+ else:
+ real_downloader = get_suitable_downloader(
+ info_dict, self.params, None, protocol='m3u8_frag_urls', to_stdout=(filename == '-'))
+ if real_downloader and not real_downloader.supports_manifest(s):
+ real_downloader = None
+ if real_downloader:
+ self.to_screen(f'[{self.FD_NAME}] Fragment downloads will be delegated to {real_downloader.get_basename()}')
+
+ def is_ad_fragment_start(s):
+ return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s
+ or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad'))
+
+ def is_ad_fragment_end(s):
+ return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s
+ or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment'))
+
+ fragments = []
+
+ media_frags = 0
+ ad_frags = 0
+ ad_frag_next = False
+ for line in s.splitlines():
+ line = line.strip()
+ if not line:
+ continue
+ if line.startswith('#'):
+ if is_ad_fragment_start(line):
+ ad_frag_next = True
+ elif is_ad_fragment_end(line):
+ ad_frag_next = False
+ continue
+ if ad_frag_next:
+ ad_frags += 1
+ continue
+ media_frags += 1
+
+ ctx = {
+ 'filename': filename,
+ 'total_frags': media_frags,
+ 'ad_frags': ad_frags,
+ }
+
+ if real_downloader:
+ self._prepare_external_frag_download(ctx)
+ else:
+ self._prepare_and_start_frag_download(ctx, info_dict)
+
+ extra_state = ctx.setdefault('extra_state', {})
+
+ format_index = info_dict.get('format_index')
+ extra_query = None
+ extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url')
+ if extra_param_to_segment_url:
+ extra_query = urllib.parse.parse_qs(extra_param_to_segment_url)
+ i = 0
+ media_sequence = 0
+ decrypt_info = {'METHOD': 'NONE'}
+ external_aes_key = traverse_obj(info_dict, ('hls_aes', 'key'))
+ if external_aes_key:
+ external_aes_key = binascii.unhexlify(remove_start(external_aes_key, '0x'))
+ assert len(external_aes_key) in (16, 24, 32), 'Invalid length for HLS AES-128 key'
+ external_aes_iv = traverse_obj(info_dict, ('hls_aes', 'iv'))
+ if external_aes_iv:
+ external_aes_iv = binascii.unhexlify(remove_start(external_aes_iv, '0x').zfill(32))
+ byte_range = {}
+ discontinuity_count = 0
+ frag_index = 0
+ ad_frag_next = False
+ for line in s.splitlines():
+ line = line.strip()
+ if line:
+ if not line.startswith('#'):
+ if format_index and discontinuity_count != format_index:
+ continue
+ if ad_frag_next:
+ continue
+ frag_index += 1
+ if frag_index <= ctx['fragment_index']:
+ continue
+ frag_url = urljoin(man_url, line)
+ if extra_query:
+ frag_url = update_url_query(frag_url, extra_query)
+
+ fragments.append({
+ 'frag_index': frag_index,
+ 'url': frag_url,
+ 'decrypt_info': decrypt_info,
+ 'byte_range': byte_range,
+ 'media_sequence': media_sequence,
+ })
+ media_sequence += 1
+
+ elif line.startswith('#EXT-X-MAP'):
+ if format_index and discontinuity_count != format_index:
+ continue
+ if frag_index > 0:
+ self.report_error(
+ 'Initialization fragment found after media fragments, unable to download')
+ return False
+ frag_index += 1
+ map_info = parse_m3u8_attributes(line[11:])
+ frag_url = urljoin(man_url, map_info.get('URI'))
+ if extra_query:
+ frag_url = update_url_query(frag_url, extra_query)
+
+ if map_info.get('BYTERANGE'):
+ splitted_byte_range = map_info.get('BYTERANGE').split('@')
+ sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end']
+ byte_range = {
+ 'start': sub_range_start,
+ 'end': sub_range_start + int(splitted_byte_range[0]),
+ }
+
+ fragments.append({
+ 'frag_index': frag_index,
+ 'url': frag_url,
+ 'decrypt_info': decrypt_info,
+ 'byte_range': byte_range,
+ 'media_sequence': media_sequence
+ })
+ media_sequence += 1
+
+ elif line.startswith('#EXT-X-KEY'):
+ decrypt_url = decrypt_info.get('URI')
+ decrypt_info = parse_m3u8_attributes(line[11:])
+ if decrypt_info['METHOD'] == 'AES-128':
+ if external_aes_iv:
+ decrypt_info['IV'] = external_aes_iv
+ elif 'IV' in decrypt_info:
+ decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:].zfill(32))
+ if external_aes_key:
+ decrypt_info['KEY'] = external_aes_key
+ else:
+ decrypt_info['URI'] = urljoin(man_url, decrypt_info['URI'])
+ if extra_query:
+ decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query)
+ if decrypt_url != decrypt_info['URI']:
+ decrypt_info['KEY'] = None
+
+ elif line.startswith('#EXT-X-MEDIA-SEQUENCE'):
+ media_sequence = int(line[22:])
+ elif line.startswith('#EXT-X-BYTERANGE'):
+ splitted_byte_range = line[17:].split('@')
+ sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end']
+ byte_range = {
+ 'start': sub_range_start,
+ 'end': sub_range_start + int(splitted_byte_range[0]),
+ }
+ elif is_ad_fragment_start(line):
+ ad_frag_next = True
+ elif is_ad_fragment_end(line):
+ ad_frag_next = False
+ elif line.startswith('#EXT-X-DISCONTINUITY'):
+ discontinuity_count += 1
+ i += 1
+
+ # We only download the first fragment during the test
+ if self.params.get('test', False):
+ fragments = [fragments[0] if fragments else None]
+
+ if real_downloader:
+ info_dict['fragments'] = fragments
+ fd = real_downloader(self.ydl, self.params)
+ # TODO: Make progress updates work without hooking twice
+ # for ph in self._progress_hooks:
+ # fd.add_progress_hook(ph)
+ return fd.real_download(filename, info_dict)
+
+ if is_webvtt:
+ def pack_fragment(frag_content, frag_index):
+ output = io.StringIO()
+ adjust = 0
+ overflow = False
+ mpegts_last = None
+ for block in webvtt.parse_fragment(frag_content):
+ if isinstance(block, webvtt.CueBlock):
+ extra_state['webvtt_mpegts_last'] = mpegts_last
+ if overflow:
+ extra_state['webvtt_mpegts_adjust'] += 1
+ overflow = False
+ block.start += adjust
+ block.end += adjust
+
+ dedup_window = extra_state.setdefault('webvtt_dedup_window', [])
+
+ ready = []
+
+ i = 0
+ is_new = True
+ while i < len(dedup_window):
+ wcue = dedup_window[i]
+ wblock = webvtt.CueBlock.from_json(wcue)
+ i += 1
+ if wblock.hinges(block):
+ wcue['end'] = block.end
+ is_new = False
+ continue
+ if wblock == block:
+ is_new = False
+ continue
+ if wblock.end > block.start:
+ continue
+ ready.append(wblock)
+ i -= 1
+ del dedup_window[i]
+
+ if is_new:
+ dedup_window.append(block.as_json)
+ for block in ready:
+ block.write_into(output)
+
+ # we only emit cues once they fall out of the duplicate window
+ continue
+ elif isinstance(block, webvtt.Magic):
+ # take care of MPEG PES timestamp overflow
+ if block.mpegts is None:
+ block.mpegts = 0
+ extra_state.setdefault('webvtt_mpegts_adjust', 0)
+ block.mpegts += extra_state['webvtt_mpegts_adjust'] << 33
+ if block.mpegts < extra_state.get('webvtt_mpegts_last', 0):
+ overflow = True
+ block.mpegts += 1 << 33
+ mpegts_last = block.mpegts
+
+ if frag_index == 1:
+ extra_state['webvtt_mpegts'] = block.mpegts or 0
+ extra_state['webvtt_local'] = block.local or 0
+ # XXX: block.local = block.mpegts = None ?
+ else:
+ if block.mpegts is not None and block.local is not None:
+ adjust = (
+ (block.mpegts - extra_state.get('webvtt_mpegts', 0))
+ - (block.local - extra_state.get('webvtt_local', 0))
+ )
+ continue
+ elif isinstance(block, webvtt.HeaderBlock):
+ if frag_index != 1:
+ # XXX: this should probably be silent as well
+ # or verify that all segments contain the same data
+ self.report_warning(bug_reports_message(
+ 'Discarding a %s block found in the middle of the stream; '
+ 'if the subtitles display incorrectly,'
+ % (type(block).__name__)))
+ continue
+ block.write_into(output)
+
+ return output.getvalue().encode()
+
+ def fin_fragments():
+ dedup_window = extra_state.get('webvtt_dedup_window')
+ if not dedup_window:
+ return b''
+
+ output = io.StringIO()
+ for cue in dedup_window:
+ webvtt.CueBlock.from_json(cue).write_into(output)
+
+ return output.getvalue().encode()
+
+ if len(fragments) == 1:
+ self.download_and_append_fragments(ctx, fragments, info_dict)
+ else:
+ self.download_and_append_fragments(
+ ctx, fragments, info_dict, pack_func=pack_fragment, finish_func=fin_fragments)
+ else:
+ return self.download_and_append_fragments(ctx, fragments, info_dict)
diff --git a/yt_dlp/downloader/http.py b/yt_dlp/downloader/http.py
new file mode 100644
index 0000000..693828b
--- /dev/null
+++ b/yt_dlp/downloader/http.py
@@ -0,0 +1,383 @@
+import os
+import random
+import time
+
+from .common import FileDownloader
+from ..networking import Request
+from ..networking.exceptions import (
+ CertificateVerifyError,
+ HTTPError,
+ TransportError,
+)
+from ..utils import (
+ ContentTooShortError,
+ RetryManager,
+ ThrottledDownload,
+ XAttrMetadataError,
+ XAttrUnavailableError,
+ encodeFilename,
+ int_or_none,
+ parse_http_range,
+ try_call,
+ write_xattr,
+)
+from ..utils.networking import HTTPHeaderDict
+
+
+class HttpFD(FileDownloader):
+ def real_download(self, filename, info_dict):
+ url = info_dict['url']
+ request_data = info_dict.get('request_data', None)
+
+ class DownloadContext(dict):
+ __getattr__ = dict.get
+ __setattr__ = dict.__setitem__
+ __delattr__ = dict.__delitem__
+
+ ctx = DownloadContext()
+ ctx.filename = filename
+ ctx.tmpfilename = self.temp_name(filename)
+ ctx.stream = None
+
+ # Disable compression
+ headers = HTTPHeaderDict({'Accept-Encoding': 'identity'}, info_dict.get('http_headers'))
+
+ is_test = self.params.get('test', False)
+ chunk_size = self._TEST_FILE_SIZE if is_test else (
+ self.params.get('http_chunk_size')
+ or info_dict.get('downloader_options', {}).get('http_chunk_size')
+ or 0)
+
+ ctx.open_mode = 'wb'
+ ctx.resume_len = 0
+ ctx.block_size = self.params.get('buffersize', 1024)
+ ctx.start_time = time.time()
+
+ # parse given Range
+ req_start, req_end, _ = parse_http_range(headers.get('Range'))
+
+ if self.params.get('continuedl', True):
+ # Establish possible resume length
+ if os.path.isfile(encodeFilename(ctx.tmpfilename)):
+ ctx.resume_len = os.path.getsize(
+ encodeFilename(ctx.tmpfilename))
+
+ ctx.is_resume = ctx.resume_len > 0
+
+ class SucceedDownload(Exception):
+ pass
+
+ class RetryDownload(Exception):
+ def __init__(self, source_error):
+ self.source_error = source_error
+
+ class NextFragment(Exception):
+ pass
+
+ def establish_connection():
+ ctx.chunk_size = (random.randint(int(chunk_size * 0.95), chunk_size)
+ if not is_test and chunk_size else chunk_size)
+ if ctx.resume_len > 0:
+ range_start = ctx.resume_len
+ if req_start is not None:
+ # offset the beginning of Range to be within request
+ range_start += req_start
+ if ctx.is_resume:
+ self.report_resuming_byte(ctx.resume_len)
+ ctx.open_mode = 'ab'
+ elif req_start is not None:
+ range_start = req_start
+ elif ctx.chunk_size > 0:
+ range_start = 0
+ else:
+ range_start = None
+ ctx.is_resume = False
+
+ if ctx.chunk_size:
+ chunk_aware_end = range_start + ctx.chunk_size - 1
+ # we're not allowed to download outside Range
+ range_end = chunk_aware_end if req_end is None else min(chunk_aware_end, req_end)
+ elif req_end is not None:
+ # there's no need for chunked downloads, so download until the end of Range
+ range_end = req_end
+ else:
+ range_end = None
+
+ if try_call(lambda: range_start > range_end):
+ ctx.resume_len = 0
+ ctx.open_mode = 'wb'
+ raise RetryDownload(Exception(f'Conflicting range. (start={range_start} > end={range_end})'))
+
+ if try_call(lambda: range_end >= ctx.content_len):
+ range_end = ctx.content_len - 1
+
+ request = Request(url, request_data, headers)
+ has_range = range_start is not None
+ if has_range:
+ request.headers['Range'] = f'bytes={int(range_start)}-{int_or_none(range_end) or ""}'
+ # Establish connection
+ try:
+ ctx.data = self.ydl.urlopen(request)
+ # When trying to resume, Content-Range HTTP header of response has to be checked
+ # to match the value of requested Range HTTP header. This is due to a webservers
+ # that don't support resuming and serve a whole file with no Content-Range
+ # set in response despite of requested Range (see
+ # https://github.com/ytdl-org/youtube-dl/issues/6057#issuecomment-126129799)
+ if has_range:
+ content_range = ctx.data.headers.get('Content-Range')
+ content_range_start, content_range_end, content_len = parse_http_range(content_range)
+ # Content-Range is present and matches requested Range, resume is possible
+ if range_start == content_range_start and (
+ # Non-chunked download
+ not ctx.chunk_size
+ # Chunked download and requested piece or
+ # its part is promised to be served
+ or content_range_end == range_end
+ or content_len < range_end):
+ ctx.content_len = content_len
+ if content_len or req_end:
+ ctx.data_len = min(content_len or req_end, req_end or content_len) - (req_start or 0)
+ return
+ # Content-Range is either not present or invalid. Assuming remote webserver is
+ # trying to send the whole file, resume is not possible, so wiping the local file
+ # and performing entire redownload
+ elif range_start > 0:
+ self.report_unable_to_resume()
+ ctx.resume_len = 0
+ ctx.open_mode = 'wb'
+ ctx.data_len = ctx.content_len = int_or_none(ctx.data.headers.get('Content-length', None))
+ except HTTPError as err:
+ if err.status == 416:
+ # Unable to resume (requested range not satisfiable)
+ try:
+ # Open the connection again without the range header
+ ctx.data = self.ydl.urlopen(
+ Request(url, request_data, headers))
+ content_length = ctx.data.headers['Content-Length']
+ except HTTPError as err:
+ if err.status < 500 or err.status >= 600:
+ raise
+ else:
+ # Examine the reported length
+ if (content_length is not None
+ and (ctx.resume_len - 100 < int(content_length) < ctx.resume_len + 100)):
+ # The file had already been fully downloaded.
+ # Explanation to the above condition: in issue #175 it was revealed that
+ # YouTube sometimes adds or removes a few bytes from the end of the file,
+ # changing the file size slightly and causing problems for some users. So
+ # I decided to implement a suggested change and consider the file
+ # completely downloaded if the file size differs less than 100 bytes from
+ # the one in the hard drive.
+ self.report_file_already_downloaded(ctx.filename)
+ self.try_rename(ctx.tmpfilename, ctx.filename)
+ self._hook_progress({
+ 'filename': ctx.filename,
+ 'status': 'finished',
+ 'downloaded_bytes': ctx.resume_len,
+ 'total_bytes': ctx.resume_len,
+ }, info_dict)
+ raise SucceedDownload()
+ else:
+ # The length does not match, we start the download over
+ self.report_unable_to_resume()
+ ctx.resume_len = 0
+ ctx.open_mode = 'wb'
+ return
+ elif err.status < 500 or err.status >= 600:
+ # Unexpected HTTP error
+ raise
+ raise RetryDownload(err)
+ except CertificateVerifyError:
+ raise
+ except TransportError as err:
+ raise RetryDownload(err)
+
+ def close_stream():
+ if ctx.stream is not None:
+ if not ctx.tmpfilename == '-':
+ ctx.stream.close()
+ ctx.stream = None
+
+ def download():
+ data_len = ctx.data.headers.get('Content-length')
+
+ if ctx.data.headers.get('Content-encoding'):
+ # Content-encoding is present, Content-length is not reliable anymore as we are
+ # doing auto decompression. (See: https://github.com/yt-dlp/yt-dlp/pull/6176)
+ data_len = None
+
+ # Range HTTP header may be ignored/unsupported by a webserver
+ # (e.g. extractor/scivee.py, extractor/bambuser.py).
+ # However, for a test we still would like to download just a piece of a file.
+ # To achieve this we limit data_len to _TEST_FILE_SIZE and manually control
+ # block size when downloading a file.
+ if is_test and (data_len is None or int(data_len) > self._TEST_FILE_SIZE):
+ data_len = self._TEST_FILE_SIZE
+
+ if data_len is not None:
+ data_len = int(data_len) + ctx.resume_len
+ min_data_len = self.params.get('min_filesize')
+ max_data_len = self.params.get('max_filesize')
+ if min_data_len is not None and data_len < min_data_len:
+ self.to_screen(
+ f'\r[download] File is smaller than min-filesize ({data_len} bytes < {min_data_len} bytes). Aborting.')
+ return False
+ if max_data_len is not None and data_len > max_data_len:
+ self.to_screen(
+ f'\r[download] File is larger than max-filesize ({data_len} bytes > {max_data_len} bytes). Aborting.')
+ return False
+
+ byte_counter = 0 + ctx.resume_len
+ block_size = ctx.block_size
+ start = time.time()
+
+ # measure time over whole while-loop, so slow_down() and best_block_size() work together properly
+ now = None # needed for slow_down() in the first loop run
+ before = start # start measuring
+
+ def retry(e):
+ close_stream()
+ if ctx.tmpfilename == '-':
+ ctx.resume_len = byte_counter
+ else:
+ try:
+ ctx.resume_len = os.path.getsize(encodeFilename(ctx.tmpfilename))
+ except FileNotFoundError:
+ ctx.resume_len = 0
+ raise RetryDownload(e)
+
+ while True:
+ try:
+ # Download and write
+ data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
+ except TransportError as err:
+ retry(err)
+
+ byte_counter += len(data_block)
+
+ # exit loop when download is finished
+ if len(data_block) == 0:
+ break
+
+ # Open destination file just in time
+ if ctx.stream is None:
+ try:
+ ctx.stream, ctx.tmpfilename = self.sanitize_open(
+ ctx.tmpfilename, ctx.open_mode)
+ assert ctx.stream is not None
+ ctx.filename = self.undo_temp_name(ctx.tmpfilename)
+ self.report_destination(ctx.filename)
+ except OSError as err:
+ self.report_error('unable to open for writing: %s' % str(err))
+ return False
+
+ if self.params.get('xattr_set_filesize', False) and data_len is not None:
+ try:
+ write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode())
+ except (XAttrUnavailableError, XAttrMetadataError) as err:
+ self.report_error('unable to set filesize xattr: %s' % str(err))
+
+ try:
+ ctx.stream.write(data_block)
+ except OSError as err:
+ self.to_stderr('\n')
+ self.report_error('unable to write data: %s' % str(err))
+ return False
+
+ # Apply rate limit
+ self.slow_down(start, now, byte_counter - ctx.resume_len)
+
+ # end measuring of one loop run
+ now = time.time()
+ after = now
+
+ # Adjust block size
+ if not self.params.get('noresizebuffer', False):
+ block_size = self.best_block_size(after - before, len(data_block))
+
+ before = after
+
+ # Progress message
+ speed = self.calc_speed(start, now, byte_counter - ctx.resume_len)
+ if ctx.data_len is None:
+ eta = None
+ else:
+ eta = self.calc_eta(start, time.time(), ctx.data_len - ctx.resume_len, byte_counter - ctx.resume_len)
+
+ self._hook_progress({
+ 'status': 'downloading',
+ 'downloaded_bytes': byte_counter,
+ 'total_bytes': ctx.data_len,
+ 'tmpfilename': ctx.tmpfilename,
+ 'filename': ctx.filename,
+ 'eta': eta,
+ 'speed': speed,
+ 'elapsed': now - ctx.start_time,
+ 'ctx_id': info_dict.get('ctx_id'),
+ }, info_dict)
+
+ if data_len is not None and byte_counter == data_len:
+ break
+
+ if speed and speed < (self.params.get('throttledratelimit') or 0):
+ # The speed must stay below the limit for 3 seconds
+ # This prevents raising error when the speed temporarily goes down
+ if ctx.throttle_start is None:
+ ctx.throttle_start = now
+ elif now - ctx.throttle_start > 3:
+ if ctx.stream is not None and ctx.tmpfilename != '-':
+ ctx.stream.close()
+ raise ThrottledDownload()
+ elif speed:
+ ctx.throttle_start = None
+
+ if ctx.stream is None:
+ self.to_stderr('\n')
+ self.report_error('Did not get any data blocks')
+ return False
+
+ if not is_test and ctx.chunk_size and ctx.content_len is not None and byte_counter < ctx.content_len:
+ ctx.resume_len = byte_counter
+ raise NextFragment()
+
+ if ctx.tmpfilename != '-':
+ ctx.stream.close()
+
+ if data_len is not None and byte_counter != data_len:
+ err = ContentTooShortError(byte_counter, int(data_len))
+ retry(err)
+
+ self.try_rename(ctx.tmpfilename, ctx.filename)
+
+ # Update file modification time
+ if self.params.get('updatetime', True):
+ info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.headers.get('last-modified', None))
+
+ self._hook_progress({
+ 'downloaded_bytes': byte_counter,
+ 'total_bytes': byte_counter,
+ 'filename': ctx.filename,
+ 'status': 'finished',
+ 'elapsed': time.time() - ctx.start_time,
+ 'ctx_id': info_dict.get('ctx_id'),
+ }, info_dict)
+
+ return True
+
+ for retry in RetryManager(self.params.get('retries'), self.report_retry):
+ try:
+ establish_connection()
+ return download()
+ except RetryDownload as err:
+ retry.error = err.source_error
+ continue
+ except NextFragment:
+ retry.error = None
+ retry.attempt -= 1
+ continue
+ except SucceedDownload:
+ return True
+ except: # noqa: E722
+ close_stream()
+ raise
+ return False
diff --git a/yt_dlp/downloader/ism.py b/yt_dlp/downloader/ism.py
new file mode 100644
index 0000000..dd688f5
--- /dev/null
+++ b/yt_dlp/downloader/ism.py
@@ -0,0 +1,283 @@
+import binascii
+import io
+import struct
+import time
+
+from .fragment import FragmentFD
+from ..networking.exceptions import HTTPError
+from ..utils import RetryManager
+
+u8 = struct.Struct('>B')
+u88 = struct.Struct('>Bx')
+u16 = struct.Struct('>H')
+u1616 = struct.Struct('>Hxx')
+u32 = struct.Struct('>I')
+u64 = struct.Struct('>Q')
+
+s88 = struct.Struct('>bx')
+s16 = struct.Struct('>h')
+s1616 = struct.Struct('>hxx')
+s32 = struct.Struct('>i')
+
+unity_matrix = (s32.pack(0x10000) + s32.pack(0) * 3) * 2 + s32.pack(0x40000000)
+
+TRACK_ENABLED = 0x1
+TRACK_IN_MOVIE = 0x2
+TRACK_IN_PREVIEW = 0x4
+
+SELF_CONTAINED = 0x1
+
+
+def box(box_type, payload):
+ return u32.pack(8 + len(payload)) + box_type + payload
+
+
+def full_box(box_type, version, flags, payload):
+ return box(box_type, u8.pack(version) + u32.pack(flags)[1:] + payload)
+
+
+def write_piff_header(stream, params):
+ track_id = params['track_id']
+ fourcc = params['fourcc']
+ duration = params['duration']
+ timescale = params.get('timescale', 10000000)
+ language = params.get('language', 'und')
+ height = params.get('height', 0)
+ width = params.get('width', 0)
+ stream_type = params['stream_type']
+ creation_time = modification_time = int(time.time())
+
+ ftyp_payload = b'isml' # major brand
+ ftyp_payload += u32.pack(1) # minor version
+ ftyp_payload += b'piff' + b'iso2' # compatible brands
+ stream.write(box(b'ftyp', ftyp_payload)) # File Type Box
+
+ mvhd_payload = u64.pack(creation_time)
+ mvhd_payload += u64.pack(modification_time)
+ mvhd_payload += u32.pack(timescale)
+ mvhd_payload += u64.pack(duration)
+ mvhd_payload += s1616.pack(1) # rate
+ mvhd_payload += s88.pack(1) # volume
+ mvhd_payload += u16.pack(0) # reserved
+ mvhd_payload += u32.pack(0) * 2 # reserved
+ mvhd_payload += unity_matrix
+ mvhd_payload += u32.pack(0) * 6 # pre defined
+ mvhd_payload += u32.pack(0xffffffff) # next track id
+ moov_payload = full_box(b'mvhd', 1, 0, mvhd_payload) # Movie Header Box
+
+ tkhd_payload = u64.pack(creation_time)
+ tkhd_payload += u64.pack(modification_time)
+ tkhd_payload += u32.pack(track_id) # track id
+ tkhd_payload += u32.pack(0) # reserved
+ tkhd_payload += u64.pack(duration)
+ tkhd_payload += u32.pack(0) * 2 # reserved
+ tkhd_payload += s16.pack(0) # layer
+ tkhd_payload += s16.pack(0) # alternate group
+ tkhd_payload += s88.pack(1 if stream_type == 'audio' else 0) # volume
+ tkhd_payload += u16.pack(0) # reserved
+ tkhd_payload += unity_matrix
+ tkhd_payload += u1616.pack(width)
+ tkhd_payload += u1616.pack(height)
+ trak_payload = full_box(b'tkhd', 1, TRACK_ENABLED | TRACK_IN_MOVIE | TRACK_IN_PREVIEW, tkhd_payload) # Track Header Box
+
+ mdhd_payload = u64.pack(creation_time)
+ mdhd_payload += u64.pack(modification_time)
+ mdhd_payload += u32.pack(timescale)
+ mdhd_payload += u64.pack(duration)
+ mdhd_payload += u16.pack(((ord(language[0]) - 0x60) << 10) | ((ord(language[1]) - 0x60) << 5) | (ord(language[2]) - 0x60))
+ mdhd_payload += u16.pack(0) # pre defined
+ mdia_payload = full_box(b'mdhd', 1, 0, mdhd_payload) # Media Header Box
+
+ hdlr_payload = u32.pack(0) # pre defined
+ if stream_type == 'audio': # handler type
+ hdlr_payload += b'soun'
+ hdlr_payload += u32.pack(0) * 3 # reserved
+ hdlr_payload += b'SoundHandler\0' # name
+ elif stream_type == 'video':
+ hdlr_payload += b'vide'
+ hdlr_payload += u32.pack(0) * 3 # reserved
+ hdlr_payload += b'VideoHandler\0' # name
+ elif stream_type == 'text':
+ hdlr_payload += b'subt'
+ hdlr_payload += u32.pack(0) * 3 # reserved
+ hdlr_payload += b'SubtitleHandler\0' # name
+ else:
+ assert False
+ mdia_payload += full_box(b'hdlr', 0, 0, hdlr_payload) # Handler Reference Box
+
+ if stream_type == 'audio':
+ smhd_payload = s88.pack(0) # balance
+ smhd_payload += u16.pack(0) # reserved
+ media_header_box = full_box(b'smhd', 0, 0, smhd_payload) # Sound Media Header
+ elif stream_type == 'video':
+ vmhd_payload = u16.pack(0) # graphics mode
+ vmhd_payload += u16.pack(0) * 3 # opcolor
+ media_header_box = full_box(b'vmhd', 0, 1, vmhd_payload) # Video Media Header
+ elif stream_type == 'text':
+ media_header_box = full_box(b'sthd', 0, 0, b'') # Subtitle Media Header
+ else:
+ assert False
+ minf_payload = media_header_box
+
+ dref_payload = u32.pack(1) # entry count
+ dref_payload += full_box(b'url ', 0, SELF_CONTAINED, b'') # Data Entry URL Box
+ dinf_payload = full_box(b'dref', 0, 0, dref_payload) # Data Reference Box
+ minf_payload += box(b'dinf', dinf_payload) # Data Information Box
+
+ stsd_payload = u32.pack(1) # entry count
+
+ sample_entry_payload = u8.pack(0) * 6 # reserved
+ sample_entry_payload += u16.pack(1) # data reference index
+ if stream_type == 'audio':
+ sample_entry_payload += u32.pack(0) * 2 # reserved
+ sample_entry_payload += u16.pack(params.get('channels', 2))
+ sample_entry_payload += u16.pack(params.get('bits_per_sample', 16))
+ sample_entry_payload += u16.pack(0) # pre defined
+ sample_entry_payload += u16.pack(0) # reserved
+ sample_entry_payload += u1616.pack(params['sampling_rate'])
+
+ if fourcc == 'AACL':
+ sample_entry_box = box(b'mp4a', sample_entry_payload)
+ if fourcc == 'EC-3':
+ sample_entry_box = box(b'ec-3', sample_entry_payload)
+ elif stream_type == 'video':
+ sample_entry_payload += u16.pack(0) # pre defined
+ sample_entry_payload += u16.pack(0) # reserved
+ sample_entry_payload += u32.pack(0) * 3 # pre defined
+ sample_entry_payload += u16.pack(width)
+ sample_entry_payload += u16.pack(height)
+ sample_entry_payload += u1616.pack(0x48) # horiz resolution 72 dpi
+ sample_entry_payload += u1616.pack(0x48) # vert resolution 72 dpi
+ sample_entry_payload += u32.pack(0) # reserved
+ sample_entry_payload += u16.pack(1) # frame count
+ sample_entry_payload += u8.pack(0) * 32 # compressor name
+ sample_entry_payload += u16.pack(0x18) # depth
+ sample_entry_payload += s16.pack(-1) # pre defined
+
+ codec_private_data = binascii.unhexlify(params['codec_private_data'].encode())
+ if fourcc in ('H264', 'AVC1'):
+ sps, pps = codec_private_data.split(u32.pack(1))[1:]
+ avcc_payload = u8.pack(1) # configuration version
+ avcc_payload += sps[1:4] # avc profile indication + profile compatibility + avc level indication
+ avcc_payload += u8.pack(0xfc | (params.get('nal_unit_length_field', 4) - 1)) # complete representation (1) + reserved (11111) + length size minus one
+ avcc_payload += u8.pack(1) # reserved (0) + number of sps (0000001)
+ avcc_payload += u16.pack(len(sps))
+ avcc_payload += sps
+ avcc_payload += u8.pack(1) # number of pps
+ avcc_payload += u16.pack(len(pps))
+ avcc_payload += pps
+ sample_entry_payload += box(b'avcC', avcc_payload) # AVC Decoder Configuration Record
+ sample_entry_box = box(b'avc1', sample_entry_payload) # AVC Simple Entry
+ else:
+ assert False
+ elif stream_type == 'text':
+ if fourcc == 'TTML':
+ sample_entry_payload += b'http://www.w3.org/ns/ttml\0' # namespace
+ sample_entry_payload += b'\0' # schema location
+ sample_entry_payload += b'\0' # auxilary mime types(??)
+ sample_entry_box = box(b'stpp', sample_entry_payload)
+ else:
+ assert False
+ else:
+ assert False
+ stsd_payload += sample_entry_box
+
+ stbl_payload = full_box(b'stsd', 0, 0, stsd_payload) # Sample Description Box
+
+ stts_payload = u32.pack(0) # entry count
+ stbl_payload += full_box(b'stts', 0, 0, stts_payload) # Decoding Time to Sample Box
+
+ stsc_payload = u32.pack(0) # entry count
+ stbl_payload += full_box(b'stsc', 0, 0, stsc_payload) # Sample To Chunk Box
+
+ stco_payload = u32.pack(0) # entry count
+ stbl_payload += full_box(b'stco', 0, 0, stco_payload) # Chunk Offset Box
+
+ minf_payload += box(b'stbl', stbl_payload) # Sample Table Box
+
+ mdia_payload += box(b'minf', minf_payload) # Media Information Box
+
+ trak_payload += box(b'mdia', mdia_payload) # Media Box
+
+ moov_payload += box(b'trak', trak_payload) # Track Box
+
+ mehd_payload = u64.pack(duration)
+ mvex_payload = full_box(b'mehd', 1, 0, mehd_payload) # Movie Extends Header Box
+
+ trex_payload = u32.pack(track_id) # track id
+ trex_payload += u32.pack(1) # default sample description index
+ trex_payload += u32.pack(0) # default sample duration
+ trex_payload += u32.pack(0) # default sample size
+ trex_payload += u32.pack(0) # default sample flags
+ mvex_payload += full_box(b'trex', 0, 0, trex_payload) # Track Extends Box
+
+ moov_payload += box(b'mvex', mvex_payload) # Movie Extends Box
+ stream.write(box(b'moov', moov_payload)) # Movie Box
+
+
+def extract_box_data(data, box_sequence):
+ data_reader = io.BytesIO(data)
+ while True:
+ box_size = u32.unpack(data_reader.read(4))[0]
+ box_type = data_reader.read(4)
+ if box_type == box_sequence[0]:
+ box_data = data_reader.read(box_size - 8)
+ if len(box_sequence) == 1:
+ return box_data
+ return extract_box_data(box_data, box_sequence[1:])
+ data_reader.seek(box_size - 8, 1)
+
+
+class IsmFD(FragmentFD):
+ """
+ Download segments in a ISM manifest
+ """
+
+ def real_download(self, filename, info_dict):
+ segments = info_dict['fragments'][:1] if self.params.get(
+ 'test', False) else info_dict['fragments']
+
+ ctx = {
+ 'filename': filename,
+ 'total_frags': len(segments),
+ }
+
+ self._prepare_and_start_frag_download(ctx, info_dict)
+
+ extra_state = ctx.setdefault('extra_state', {
+ 'ism_track_written': False,
+ })
+
+ skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
+
+ frag_index = 0
+ for i, segment in enumerate(segments):
+ frag_index += 1
+ if frag_index <= ctx['fragment_index']:
+ continue
+
+ retry_manager = RetryManager(self.params.get('fragment_retries'), self.report_retry,
+ frag_index=frag_index, fatal=not skip_unavailable_fragments)
+ for retry in retry_manager:
+ try:
+ success = self._download_fragment(ctx, segment['url'], info_dict)
+ if not success:
+ return False
+ frag_content = self._read_fragment(ctx)
+
+ if not extra_state['ism_track_written']:
+ tfhd_data = extract_box_data(frag_content, [b'moof', b'traf', b'tfhd'])
+ info_dict['_download_params']['track_id'] = u32.unpack(tfhd_data[4:8])[0]
+ write_piff_header(ctx['dest_stream'], info_dict['_download_params'])
+ extra_state['ism_track_written'] = True
+ self._append_fragment(ctx, frag_content)
+ except HTTPError as err:
+ retry.error = err
+ continue
+
+ if retry_manager.error:
+ if not skip_unavailable_fragments:
+ return False
+ self.report_skip_fragment(frag_index)
+
+ return self._finish_frag_download(ctx, info_dict)
diff --git a/yt_dlp/downloader/mhtml.py b/yt_dlp/downloader/mhtml.py
new file mode 100644
index 0000000..d977dce
--- /dev/null
+++ b/yt_dlp/downloader/mhtml.py
@@ -0,0 +1,189 @@
+import io
+import quopri
+import re
+import uuid
+
+from .fragment import FragmentFD
+from ..compat import imghdr
+from ..utils import escapeHTML, formatSeconds, srt_subtitles_timecode, urljoin
+from ..version import __version__ as YT_DLP_VERSION
+
+
+class MhtmlFD(FragmentFD):
+ _STYLESHEET = """\
+html, body {
+ margin: 0;
+ padding: 0;
+ height: 100vh;
+}
+
+html {
+ overflow-y: scroll;
+ scroll-snap-type: y mandatory;
+}
+
+body {
+ scroll-snap-type: y mandatory;
+ display: flex;
+ flex-flow: column;
+}
+
+body > figure {
+ max-width: 100vw;
+ max-height: 100vh;
+ scroll-snap-align: center;
+}
+
+body > figure > figcaption {
+ text-align: center;
+ height: 2.5em;
+}
+
+body > figure > img {
+ display: block;
+ margin: auto;
+ max-width: 100%;
+ max-height: calc(100vh - 5em);
+}
+"""
+ _STYLESHEET = re.sub(r'\s+', ' ', _STYLESHEET)
+ _STYLESHEET = re.sub(r'\B \B|(?<=[\w\-]) (?=[^\w\-])|(?<=[^\w\-]) (?=[\w\-])', '', _STYLESHEET)
+
+ @staticmethod
+ def _escape_mime(s):
+ return '=?utf-8?Q?' + (b''.join(
+ bytes((b,)) if b >= 0x20 else b'=%02X' % b
+ for b in quopri.encodestring(s.encode(), header=True)
+ )).decode('us-ascii') + '?='
+
+ def _gen_cid(self, i, fragment, frag_boundary):
+ return '%u.%s@yt-dlp.github.io.invalid' % (i, frag_boundary)
+
+ def _gen_stub(self, *, fragments, frag_boundary, title):
+ output = io.StringIO()
+
+ output.write((
+ '<!DOCTYPE html>'
+ '<html>'
+ '<head>'
+ '' '<meta name="generator" content="yt-dlp {version}">'
+ '' '<title>{title}</title>'
+ '' '<style>{styles}</style>'
+ '<body>'
+ ).format(
+ version=escapeHTML(YT_DLP_VERSION),
+ styles=self._STYLESHEET,
+ title=escapeHTML(title)
+ ))
+
+ t0 = 0
+ for i, frag in enumerate(fragments):
+ output.write('<figure>')
+ try:
+ t1 = t0 + frag['duration']
+ output.write((
+ '<figcaption>Slide #{num}: {t0} – {t1} (duration: {duration})</figcaption>'
+ ).format(
+ num=i + 1,
+ t0=srt_subtitles_timecode(t0),
+ t1=srt_subtitles_timecode(t1),
+ duration=formatSeconds(frag['duration'], msec=True)
+ ))
+ except (KeyError, ValueError, TypeError):
+ t1 = None
+ output.write((
+ '<figcaption>Slide #{num}</figcaption>'
+ ).format(num=i + 1))
+ output.write('<img src="cid:{cid}">'.format(
+ cid=self._gen_cid(i, frag, frag_boundary)))
+ output.write('</figure>')
+ t0 = t1
+
+ return output.getvalue()
+
+ def real_download(self, filename, info_dict):
+ fragment_base_url = info_dict.get('fragment_base_url')
+ fragments = info_dict['fragments'][:1] if self.params.get(
+ 'test', False) else info_dict['fragments']
+ title = info_dict.get('title', info_dict['format_id'])
+ origin = info_dict.get('webpage_url', info_dict['url'])
+
+ ctx = {
+ 'filename': filename,
+ 'total_frags': len(fragments),
+ }
+
+ self._prepare_and_start_frag_download(ctx, info_dict)
+
+ extra_state = ctx.setdefault('extra_state', {
+ 'header_written': False,
+ 'mime_boundary': str(uuid.uuid4()).replace('-', ''),
+ })
+
+ frag_boundary = extra_state['mime_boundary']
+
+ if not extra_state['header_written']:
+ stub = self._gen_stub(
+ fragments=fragments,
+ frag_boundary=frag_boundary,
+ title=title
+ )
+
+ ctx['dest_stream'].write((
+ 'MIME-Version: 1.0\r\n'
+ 'From: <nowhere@yt-dlp.github.io.invalid>\r\n'
+ 'To: <nowhere@yt-dlp.github.io.invalid>\r\n'
+ 'Subject: {title}\r\n'
+ 'Content-type: multipart/related; '
+ '' 'boundary="{boundary}"; '
+ '' 'type="text/html"\r\n'
+ 'X.yt-dlp.Origin: {origin}\r\n'
+ '\r\n'
+ '--{boundary}\r\n'
+ 'Content-Type: text/html; charset=utf-8\r\n'
+ 'Content-Length: {length}\r\n'
+ '\r\n'
+ '{stub}\r\n'
+ ).format(
+ origin=origin,
+ boundary=frag_boundary,
+ length=len(stub),
+ title=self._escape_mime(title),
+ stub=stub
+ ).encode())
+ extra_state['header_written'] = True
+
+ for i, fragment in enumerate(fragments):
+ if (i + 1) <= ctx['fragment_index']:
+ continue
+
+ fragment_url = fragment.get('url')
+ if not fragment_url:
+ assert fragment_base_url
+ fragment_url = urljoin(fragment_base_url, fragment['path'])
+
+ success = self._download_fragment(ctx, fragment_url, info_dict)
+ if not success:
+ continue
+ frag_content = self._read_fragment(ctx)
+
+ frag_header = io.BytesIO()
+ frag_header.write(
+ b'--%b\r\n' % frag_boundary.encode('us-ascii'))
+ frag_header.write(
+ b'Content-ID: <%b>\r\n' % self._gen_cid(i, fragment, frag_boundary).encode('us-ascii'))
+ frag_header.write(
+ b'Content-type: %b\r\n' % f'image/{imghdr.what(h=frag_content) or "jpeg"}'.encode())
+ frag_header.write(
+ b'Content-length: %u\r\n' % len(frag_content))
+ frag_header.write(
+ b'Content-location: %b\r\n' % fragment_url.encode('us-ascii'))
+ frag_header.write(
+ b'X.yt-dlp.Duration: %f\r\n' % fragment['duration'])
+ frag_header.write(b'\r\n')
+ self._append_fragment(
+ ctx, frag_header.getvalue() + frag_content + b'\r\n')
+
+ ctx['dest_stream'].write(
+ b'--%b--\r\n\r\n' % frag_boundary.encode('us-ascii'))
+ return self._finish_frag_download(ctx, info_dict)
diff --git a/yt_dlp/downloader/niconico.py b/yt_dlp/downloader/niconico.py
new file mode 100644
index 0000000..fef8bff
--- /dev/null
+++ b/yt_dlp/downloader/niconico.py
@@ -0,0 +1,140 @@
+import json
+import threading
+import time
+
+from . import get_suitable_downloader
+from .common import FileDownloader
+from .external import FFmpegFD
+from ..networking import Request
+from ..utils import DownloadError, str_or_none, try_get
+
+
+class NiconicoDmcFD(FileDownloader):
+ """ Downloading niconico douga from DMC with heartbeat """
+
+ def real_download(self, filename, info_dict):
+ from ..extractor.niconico import NiconicoIE
+
+ self.to_screen('[%s] Downloading from DMC' % self.FD_NAME)
+ ie = NiconicoIE(self.ydl)
+ info_dict, heartbeat_info_dict = ie._get_heartbeat_info(info_dict)
+
+ fd = get_suitable_downloader(info_dict, params=self.params)(self.ydl, self.params)
+
+ success = download_complete = False
+ timer = [None]
+ heartbeat_lock = threading.Lock()
+ heartbeat_url = heartbeat_info_dict['url']
+ heartbeat_data = heartbeat_info_dict['data'].encode()
+ heartbeat_interval = heartbeat_info_dict.get('interval', 30)
+
+ request = Request(heartbeat_url, heartbeat_data)
+
+ def heartbeat():
+ try:
+ self.ydl.urlopen(request).read()
+ except Exception:
+ self.to_screen('[%s] Heartbeat failed' % self.FD_NAME)
+
+ with heartbeat_lock:
+ if not download_complete:
+ timer[0] = threading.Timer(heartbeat_interval, heartbeat)
+ timer[0].start()
+
+ heartbeat_info_dict['ping']()
+ self.to_screen('[%s] Heartbeat with %d second interval ...' % (self.FD_NAME, heartbeat_interval))
+ try:
+ heartbeat()
+ if type(fd).__name__ == 'HlsFD':
+ info_dict.update(ie._extract_m3u8_formats(info_dict['url'], info_dict['id'])[0])
+ success = fd.real_download(filename, info_dict)
+ finally:
+ if heartbeat_lock:
+ with heartbeat_lock:
+ timer[0].cancel()
+ download_complete = True
+ return success
+
+
+class NiconicoLiveFD(FileDownloader):
+ """ Downloads niconico live without being stopped """
+
+ def real_download(self, filename, info_dict):
+ video_id = info_dict['video_id']
+ ws_url = info_dict['url']
+ ws_extractor = info_dict['ws']
+ ws_origin_host = info_dict['origin']
+ live_quality = info_dict.get('live_quality', 'high')
+ live_latency = info_dict.get('live_latency', 'high')
+ dl = FFmpegFD(self.ydl, self.params or {})
+
+ new_info_dict = info_dict.copy()
+ new_info_dict.update({
+ 'protocol': 'm3u8',
+ })
+
+ def communicate_ws(reconnect):
+ if reconnect:
+ ws = self.ydl.urlopen(Request(ws_url, headers={'Origin': f'https://{ws_origin_host}'}))
+ if self.ydl.params.get('verbose', False):
+ self.to_screen('[debug] Sending startWatching request')
+ ws.send(json.dumps({
+ 'type': 'startWatching',
+ 'data': {
+ 'stream': {
+ 'quality': live_quality,
+ 'protocol': 'hls+fmp4',
+ 'latency': live_latency,
+ 'chasePlay': False
+ },
+ 'room': {
+ 'protocol': 'webSocket',
+ 'commentable': True
+ },
+ 'reconnect': True,
+ }
+ }))
+ else:
+ ws = ws_extractor
+ with ws:
+ while True:
+ recv = ws.recv()
+ if not recv:
+ continue
+ data = json.loads(recv)
+ if not data or not isinstance(data, dict):
+ continue
+ if data.get('type') == 'ping':
+ # pong back
+ ws.send(r'{"type":"pong"}')
+ ws.send(r'{"type":"keepSeat"}')
+ elif data.get('type') == 'disconnect':
+ self.write_debug(data)
+ return True
+ elif data.get('type') == 'error':
+ self.write_debug(data)
+ message = try_get(data, lambda x: x['body']['code'], str) or recv
+ return DownloadError(message)
+ elif self.ydl.params.get('verbose', False):
+ if len(recv) > 100:
+ recv = recv[:100] + '...'
+ self.to_screen('[debug] Server said: %s' % recv)
+
+ def ws_main():
+ reconnect = False
+ while True:
+ try:
+ ret = communicate_ws(reconnect)
+ if ret is True:
+ return
+ except BaseException as e:
+ self.to_screen('[%s] %s: Connection error occured, reconnecting after 10 seconds: %s' % ('niconico:live', video_id, str_or_none(e)))
+ time.sleep(10)
+ continue
+ finally:
+ reconnect = True
+
+ thread = threading.Thread(target=ws_main, daemon=True)
+ thread.start()
+
+ return dl.download(filename, new_info_dict)
diff --git a/yt_dlp/downloader/rtmp.py b/yt_dlp/downloader/rtmp.py
new file mode 100644
index 0000000..0e09525
--- /dev/null
+++ b/yt_dlp/downloader/rtmp.py
@@ -0,0 +1,213 @@
+import os
+import re
+import subprocess
+import time
+
+from .common import FileDownloader
+from ..utils import (
+ Popen,
+ check_executable,
+ encodeArgument,
+ encodeFilename,
+ get_exe_version,
+)
+
+
+def rtmpdump_version():
+ return get_exe_version(
+ 'rtmpdump', ['--help'], r'(?i)RTMPDump\s*v?([0-9a-zA-Z._-]+)')
+
+
+class RtmpFD(FileDownloader):
+ def real_download(self, filename, info_dict):
+ def run_rtmpdump(args):
+ start = time.time()
+ resume_percent = None
+ resume_downloaded_data_len = None
+ proc = Popen(args, stderr=subprocess.PIPE)
+ cursor_in_new_line = True
+ proc_stderr_closed = False
+ try:
+ while not proc_stderr_closed:
+ # read line from stderr
+ line = ''
+ while True:
+ char = proc.stderr.read(1)
+ if not char:
+ proc_stderr_closed = True
+ break
+ if char in [b'\r', b'\n']:
+ break
+ line += char.decode('ascii', 'replace')
+ if not line:
+ # proc_stderr_closed is True
+ continue
+ mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line)
+ if mobj:
+ downloaded_data_len = int(float(mobj.group(1)) * 1024)
+ percent = float(mobj.group(2))
+ if not resume_percent:
+ resume_percent = percent
+ resume_downloaded_data_len = downloaded_data_len
+ time_now = time.time()
+ eta = self.calc_eta(start, time_now, 100 - resume_percent, percent - resume_percent)
+ speed = self.calc_speed(start, time_now, downloaded_data_len - resume_downloaded_data_len)
+ data_len = None
+ if percent > 0:
+ data_len = int(downloaded_data_len * 100 / percent)
+ self._hook_progress({
+ 'status': 'downloading',
+ 'downloaded_bytes': downloaded_data_len,
+ 'total_bytes_estimate': data_len,
+ 'tmpfilename': tmpfilename,
+ 'filename': filename,
+ 'eta': eta,
+ 'elapsed': time_now - start,
+ 'speed': speed,
+ }, info_dict)
+ cursor_in_new_line = False
+ else:
+ # no percent for live streams
+ mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line)
+ if mobj:
+ downloaded_data_len = int(float(mobj.group(1)) * 1024)
+ time_now = time.time()
+ speed = self.calc_speed(start, time_now, downloaded_data_len)
+ self._hook_progress({
+ 'downloaded_bytes': downloaded_data_len,
+ 'tmpfilename': tmpfilename,
+ 'filename': filename,
+ 'status': 'downloading',
+ 'elapsed': time_now - start,
+ 'speed': speed,
+ }, info_dict)
+ cursor_in_new_line = False
+ elif self.params.get('verbose', False):
+ if not cursor_in_new_line:
+ self.to_screen('')
+ cursor_in_new_line = True
+ self.to_screen('[rtmpdump] ' + line)
+ if not cursor_in_new_line:
+ self.to_screen('')
+ return proc.wait()
+ except BaseException: # Including KeyboardInterrupt
+ proc.kill(timeout=None)
+ raise
+
+ url = info_dict['url']
+ player_url = info_dict.get('player_url')
+ page_url = info_dict.get('page_url')
+ app = info_dict.get('app')
+ play_path = info_dict.get('play_path')
+ tc_url = info_dict.get('tc_url')
+ flash_version = info_dict.get('flash_version')
+ live = info_dict.get('rtmp_live', False)
+ conn = info_dict.get('rtmp_conn')
+ protocol = info_dict.get('rtmp_protocol')
+ real_time = info_dict.get('rtmp_real_time', False)
+ no_resume = info_dict.get('no_resume', False)
+ continue_dl = self.params.get('continuedl', True)
+
+ self.report_destination(filename)
+ tmpfilename = self.temp_name(filename)
+ test = self.params.get('test', False)
+
+ # Check for rtmpdump first
+ if not check_executable('rtmpdump', ['-h']):
+ self.report_error('RTMP download detected but "rtmpdump" could not be run. Please install')
+ return False
+
+ # Download using rtmpdump. rtmpdump returns exit code 2 when
+ # the connection was interrupted and resuming appears to be
+ # possible. This is part of rtmpdump's normal usage, AFAIK.
+ basic_args = [
+ 'rtmpdump', '--verbose', '-r', url,
+ '-o', tmpfilename]
+ if player_url is not None:
+ basic_args += ['--swfVfy', player_url]
+ if page_url is not None:
+ basic_args += ['--pageUrl', page_url]
+ if app is not None:
+ basic_args += ['--app', app]
+ if play_path is not None:
+ basic_args += ['--playpath', play_path]
+ if tc_url is not None:
+ basic_args += ['--tcUrl', tc_url]
+ if test:
+ basic_args += ['--stop', '1']
+ if flash_version is not None:
+ basic_args += ['--flashVer', flash_version]
+ if live:
+ basic_args += ['--live']
+ if isinstance(conn, list):
+ for entry in conn:
+ basic_args += ['--conn', entry]
+ elif isinstance(conn, str):
+ basic_args += ['--conn', conn]
+ if protocol is not None:
+ basic_args += ['--protocol', protocol]
+ if real_time:
+ basic_args += ['--realtime']
+
+ args = basic_args
+ if not no_resume and continue_dl and not live:
+ args += ['--resume']
+ if not live and continue_dl:
+ args += ['--skip', '1']
+
+ args = [encodeArgument(a) for a in args]
+
+ self._debug_cmd(args, exe='rtmpdump')
+
+ RD_SUCCESS = 0
+ RD_FAILED = 1
+ RD_INCOMPLETE = 2
+ RD_NO_CONNECT = 3
+
+ started = time.time()
+
+ try:
+ retval = run_rtmpdump(args)
+ except KeyboardInterrupt:
+ if not info_dict.get('is_live'):
+ raise
+ retval = RD_SUCCESS
+ self.to_screen('\n[rtmpdump] Interrupted by user')
+
+ if retval == RD_NO_CONNECT:
+ self.report_error('[rtmpdump] Could not connect to RTMP server.')
+ return False
+
+ while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live:
+ prevsize = os.path.getsize(encodeFilename(tmpfilename))
+ self.to_screen('[rtmpdump] Downloaded %s bytes' % prevsize)
+ time.sleep(5.0) # This seems to be needed
+ args = basic_args + ['--resume']
+ if retval == RD_FAILED:
+ args += ['--skip', '1']
+ args = [encodeArgument(a) for a in args]
+ retval = run_rtmpdump(args)
+ cursize = os.path.getsize(encodeFilename(tmpfilename))
+ if prevsize == cursize and retval == RD_FAILED:
+ break
+ # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
+ if prevsize == cursize and retval == RD_INCOMPLETE and cursize > 1024:
+ self.to_screen('[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
+ retval = RD_SUCCESS
+ break
+ if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE):
+ fsize = os.path.getsize(encodeFilename(tmpfilename))
+ self.to_screen('[rtmpdump] Downloaded %s bytes' % fsize)
+ self.try_rename(tmpfilename, filename)
+ self._hook_progress({
+ 'downloaded_bytes': fsize,
+ 'total_bytes': fsize,
+ 'filename': filename,
+ 'status': 'finished',
+ 'elapsed': time.time() - started,
+ }, info_dict)
+ return True
+ else:
+ self.to_stderr('\n')
+ self.report_error('rtmpdump exited with code %d' % retval)
+ return False
diff --git a/yt_dlp/downloader/rtsp.py b/yt_dlp/downloader/rtsp.py
new file mode 100644
index 0000000..e89269f
--- /dev/null
+++ b/yt_dlp/downloader/rtsp.py
@@ -0,0 +1,42 @@
+import os
+import subprocess
+
+from .common import FileDownloader
+from ..utils import check_executable, encodeFilename
+
+
+class RtspFD(FileDownloader):
+ def real_download(self, filename, info_dict):
+ url = info_dict['url']
+ self.report_destination(filename)
+ tmpfilename = self.temp_name(filename)
+
+ if check_executable('mplayer', ['-h']):
+ args = [
+ 'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy',
+ '-dumpstream', '-dumpfile', tmpfilename, url]
+ elif check_executable('mpv', ['-h']):
+ args = [
+ 'mpv', '-really-quiet', '--vo=null', '--stream-dump=' + tmpfilename, url]
+ else:
+ self.report_error('MMS or RTSP download detected but neither "mplayer" nor "mpv" could be run. Please install one')
+ return False
+
+ self._debug_cmd(args)
+
+ retval = subprocess.call(args)
+ if retval == 0:
+ fsize = os.path.getsize(encodeFilename(tmpfilename))
+ self.to_screen(f'\r[{args[0]}] {fsize} bytes')
+ self.try_rename(tmpfilename, filename)
+ self._hook_progress({
+ 'downloaded_bytes': fsize,
+ 'total_bytes': fsize,
+ 'filename': filename,
+ 'status': 'finished',
+ }, info_dict)
+ return True
+ else:
+ self.to_stderr('\n')
+ self.report_error('%s exited with code %d' % (args[0], retval))
+ return False
diff --git a/yt_dlp/downloader/websocket.py b/yt_dlp/downloader/websocket.py
new file mode 100644
index 0000000..6837ff1
--- /dev/null
+++ b/yt_dlp/downloader/websocket.py
@@ -0,0 +1,53 @@
+import asyncio
+import contextlib
+import os
+import signal
+import threading
+
+from .common import FileDownloader
+from .external import FFmpegFD
+from ..dependencies import websockets
+
+
+class FFmpegSinkFD(FileDownloader):
+ """ A sink to ffmpeg for downloading fragments in any form """
+
+ def real_download(self, filename, info_dict):
+ info_copy = info_dict.copy()
+ info_copy['url'] = '-'
+
+ async def call_conn(proc, stdin):
+ try:
+ await self.real_connection(stdin, info_dict)
+ except OSError:
+ pass
+ finally:
+ with contextlib.suppress(OSError):
+ stdin.flush()
+ stdin.close()
+ os.kill(os.getpid(), signal.SIGINT)
+
+ class FFmpegStdinFD(FFmpegFD):
+ @classmethod
+ def get_basename(cls):
+ return FFmpegFD.get_basename()
+
+ def on_process_started(self, proc, stdin):
+ thread = threading.Thread(target=asyncio.run, daemon=True, args=(call_conn(proc, stdin), ))
+ thread.start()
+
+ return FFmpegStdinFD(self.ydl, self.params or {}).download(filename, info_copy)
+
+ async def real_connection(self, sink, info_dict):
+ """ Override this in subclasses """
+ raise NotImplementedError('This method must be implemented by subclasses')
+
+
+class WebSocketFragmentFD(FFmpegSinkFD):
+ async def real_connection(self, sink, info_dict):
+ async with websockets.connect(info_dict['url'], extra_headers=info_dict.get('http_headers', {})) as ws:
+ while True:
+ recv = await ws.recv()
+ if isinstance(recv, str):
+ recv = recv.encode('utf8')
+ sink.write(recv)
diff --git a/yt_dlp/downloader/youtube_live_chat.py b/yt_dlp/downloader/youtube_live_chat.py
new file mode 100644
index 0000000..c7a8637
--- /dev/null
+++ b/yt_dlp/downloader/youtube_live_chat.py
@@ -0,0 +1,228 @@
+import json
+import time
+
+from .fragment import FragmentFD
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ RegexNotFoundError,
+ RetryManager,
+ dict_get,
+ int_or_none,
+ try_get,
+)
+from ..utils.networking import HTTPHeaderDict
+
+
+class YoutubeLiveChatFD(FragmentFD):
+ """ Downloads YouTube live chats fragment by fragment """
+
+ def real_download(self, filename, info_dict):
+ video_id = info_dict['video_id']
+ self.to_screen('[%s] Downloading live chat' % self.FD_NAME)
+ if not self.params.get('skip_download') and info_dict['protocol'] == 'youtube_live_chat':
+ self.report_warning('Live chat download runs until the livestream ends. '
+ 'If you wish to download the video simultaneously, run a separate yt-dlp instance')
+
+ test = self.params.get('test', False)
+
+ ctx = {
+ 'filename': filename,
+ 'live': True,
+ 'total_frags': None,
+ }
+
+ from ..extractor.youtube import YoutubeBaseInfoExtractor
+
+ ie = YoutubeBaseInfoExtractor(self.ydl)
+
+ start_time = int(time.time() * 1000)
+
+ def dl_fragment(url, data=None, headers=None):
+ http_headers = HTTPHeaderDict(info_dict.get('http_headers'), headers)
+ return self._download_fragment(ctx, url, info_dict, http_headers, data)
+
+ def parse_actions_replay(live_chat_continuation):
+ offset = continuation_id = click_tracking_params = None
+ processed_fragment = bytearray()
+ for action in live_chat_continuation.get('actions', []):
+ if 'replayChatItemAction' in action:
+ replay_chat_item_action = action['replayChatItemAction']
+ offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
+ processed_fragment.extend(
+ json.dumps(action, ensure_ascii=False).encode() + b'\n')
+ if offset is not None:
+ continuation = try_get(
+ live_chat_continuation,
+ lambda x: x['continuations'][0]['liveChatReplayContinuationData'], dict)
+ if continuation:
+ continuation_id = continuation.get('continuation')
+ click_tracking_params = continuation.get('clickTrackingParams')
+ self._append_fragment(ctx, processed_fragment)
+ return continuation_id, offset, click_tracking_params
+
+ def try_refresh_replay_beginning(live_chat_continuation):
+ # choose the second option that contains the unfiltered live chat replay
+ refresh_continuation = try_get(
+ live_chat_continuation,
+ lambda x: x['header']['liveChatHeaderRenderer']['viewSelector']['sortFilterSubMenuRenderer']['subMenuItems'][1]['continuation']['reloadContinuationData'], dict)
+ if refresh_continuation:
+ # no data yet but required to call _append_fragment
+ self._append_fragment(ctx, b'')
+ refresh_continuation_id = refresh_continuation.get('continuation')
+ offset = 0
+ click_tracking_params = refresh_continuation.get('trackingParams')
+ return refresh_continuation_id, offset, click_tracking_params
+ return parse_actions_replay(live_chat_continuation)
+
+ live_offset = 0
+
+ def parse_actions_live(live_chat_continuation):
+ nonlocal live_offset
+ continuation_id = click_tracking_params = None
+ processed_fragment = bytearray()
+ for action in live_chat_continuation.get('actions', []):
+ timestamp = self.parse_live_timestamp(action)
+ if timestamp is not None:
+ live_offset = timestamp - start_time
+ # compatibility with replay format
+ pseudo_action = {
+ 'replayChatItemAction': {'actions': [action]},
+ 'videoOffsetTimeMsec': str(live_offset),
+ 'isLive': True,
+ }
+ processed_fragment.extend(
+ json.dumps(pseudo_action, ensure_ascii=False).encode() + b'\n')
+ continuation_data_getters = [
+ lambda x: x['continuations'][0]['invalidationContinuationData'],
+ lambda x: x['continuations'][0]['timedContinuationData'],
+ ]
+ continuation_data = try_get(live_chat_continuation, continuation_data_getters, dict)
+ if continuation_data:
+ continuation_id = continuation_data.get('continuation')
+ click_tracking_params = continuation_data.get('clickTrackingParams')
+ timeout_ms = int_or_none(continuation_data.get('timeoutMs'))
+ if timeout_ms is not None:
+ time.sleep(timeout_ms / 1000)
+ self._append_fragment(ctx, processed_fragment)
+ return continuation_id, live_offset, click_tracking_params
+
+ def download_and_parse_fragment(url, frag_index, request_data=None, headers=None):
+ for retry in RetryManager(self.params.get('fragment_retries'), self.report_retry, frag_index=frag_index):
+ try:
+ success = dl_fragment(url, request_data, headers)
+ if not success:
+ return False, None, None, None
+ raw_fragment = self._read_fragment(ctx)
+ try:
+ data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
+ except RegexNotFoundError:
+ data = None
+ if not data:
+ data = json.loads(raw_fragment)
+ live_chat_continuation = try_get(
+ data,
+ lambda x: x['continuationContents']['liveChatContinuation'], dict) or {}
+
+ func = (info_dict['protocol'] == 'youtube_live_chat' and parse_actions_live
+ or frag_index == 1 and try_refresh_replay_beginning
+ or parse_actions_replay)
+ return (True, *func(live_chat_continuation))
+ except HTTPError as err:
+ retry.error = err
+ continue
+ return False, None, None, None
+
+ self._prepare_and_start_frag_download(ctx, info_dict)
+
+ success = dl_fragment(info_dict['url'])
+ if not success:
+ return False
+ raw_fragment = self._read_fragment(ctx)
+ try:
+ data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
+ except RegexNotFoundError:
+ return False
+ continuation_id = try_get(
+ data,
+ lambda x: x['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation'])
+ # no data yet but required to call _append_fragment
+ self._append_fragment(ctx, b'')
+
+ ytcfg = ie.extract_ytcfg(video_id, raw_fragment.decode('utf-8', 'replace'))
+
+ if not ytcfg:
+ return False
+ api_key = try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY'])
+ innertube_context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'])
+ if not api_key or not innertube_context:
+ return False
+ visitor_data = try_get(innertube_context, lambda x: x['client']['visitorData'], str)
+ if info_dict['protocol'] == 'youtube_live_chat_replay':
+ url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key
+ chat_page_url = 'https://www.youtube.com/live_chat_replay?continuation=' + continuation_id
+ elif info_dict['protocol'] == 'youtube_live_chat':
+ url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat?key=' + api_key
+ chat_page_url = 'https://www.youtube.com/live_chat?continuation=' + continuation_id
+
+ frag_index = offset = 0
+ click_tracking_params = None
+ while continuation_id is not None:
+ frag_index += 1
+ request_data = {
+ 'context': innertube_context,
+ 'continuation': continuation_id,
+ }
+ if frag_index > 1:
+ request_data['currentPlayerState'] = {'playerOffsetMs': str(max(offset - 5000, 0))}
+ if click_tracking_params:
+ request_data['context']['clickTracking'] = {'clickTrackingParams': click_tracking_params}
+ headers = ie.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data)
+ headers.update({'content-type': 'application/json'})
+ fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode() + b'\n'
+ success, continuation_id, offset, click_tracking_params = download_and_parse_fragment(
+ url, frag_index, fragment_request_data, headers)
+ else:
+ success, continuation_id, offset, click_tracking_params = download_and_parse_fragment(
+ chat_page_url, frag_index)
+ if not success:
+ return False
+ if test:
+ break
+
+ return self._finish_frag_download(ctx, info_dict)
+
+ @staticmethod
+ def parse_live_timestamp(action):
+ action_content = dict_get(
+ action,
+ ['addChatItemAction', 'addLiveChatTickerItemAction', 'addBannerToLiveChatCommand'])
+ if not isinstance(action_content, dict):
+ return None
+ item = dict_get(action_content, ['item', 'bannerRenderer'])
+ if not isinstance(item, dict):
+ return None
+ renderer = dict_get(item, [
+ # text
+ 'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer',
+ 'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer',
+ # ticker
+ 'liveChatTickerPaidMessageItemRenderer',
+ 'liveChatTickerSponsorItemRenderer',
+ # banner
+ 'liveChatBannerRenderer',
+ ])
+ if not isinstance(renderer, dict):
+ return None
+ parent_item_getters = [
+ lambda x: x['showItemEndpoint']['showLiveChatItemEndpoint']['renderer'],
+ lambda x: x['contents'],
+ ]
+ parent_item = try_get(renderer, parent_item_getters, dict)
+ if parent_item:
+ renderer = dict_get(parent_item, [
+ 'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer',
+ 'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer',
+ ])
+ if not isinstance(renderer, dict):
+ return None
+ return int_or_none(renderer.get('timestampUsec'), 1000)
diff --git a/yt_dlp/extractor/__init__.py b/yt_dlp/extractor/__init__.py
new file mode 100644
index 0000000..6bfa4bd
--- /dev/null
+++ b/yt_dlp/extractor/__init__.py
@@ -0,0 +1,42 @@
+from ..compat.compat_utils import passthrough_module
+
+passthrough_module(__name__, '.extractors')
+del passthrough_module
+
+
+def gen_extractor_classes():
+ """ Return a list of supported extractors.
+ The order does matter; the first extractor matched is the one handling the URL.
+ """
+ from .extractors import _ALL_CLASSES
+
+ return _ALL_CLASSES
+
+
+def gen_extractors():
+ """ Return a list of an instance of every supported extractor.
+ The order does matter; the first extractor matched is the one handling the URL.
+ """
+ return [klass() for klass in gen_extractor_classes()]
+
+
+def list_extractor_classes(age_limit=None):
+ """Return a list of extractors that are suitable for the given age, sorted by extractor name"""
+ from .generic import GenericIE
+
+ yield from sorted(filter(
+ lambda ie: ie.is_suitable(age_limit) and ie != GenericIE,
+ gen_extractor_classes()), key=lambda ie: ie.IE_NAME.lower())
+ yield GenericIE
+
+
+def list_extractors(age_limit=None):
+ """Return a list of extractor instances that are suitable for the given age, sorted by extractor name"""
+ return [ie() for ie in list_extractor_classes(age_limit)]
+
+
+def get_info_extractor(ie_name):
+ """Returns the info extractor class with the given ie_name"""
+ from . import extractors
+
+ return getattr(extractors, f'{ie_name}IE')
diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
new file mode 100644
index 0000000..c753655
--- /dev/null
+++ b/yt_dlp/extractor/_extractors.py
@@ -0,0 +1,2493 @@
+# flake8: noqa: F401
+
+from .youtube import ( # Youtube is moved to the top to improve performance
+ YoutubeIE,
+ YoutubeClipIE,
+ YoutubeFavouritesIE,
+ YoutubeNotificationsIE,
+ YoutubeHistoryIE,
+ YoutubeTabIE,
+ YoutubeLivestreamEmbedIE,
+ YoutubePlaylistIE,
+ YoutubeRecommendedIE,
+ YoutubeSearchDateIE,
+ YoutubeSearchIE,
+ YoutubeSearchURLIE,
+ YoutubeMusicSearchURLIE,
+ YoutubeSubscriptionsIE,
+ YoutubeTruncatedIDIE,
+ YoutubeTruncatedURLIE,
+ YoutubeYtBeIE,
+ YoutubeYtUserIE,
+ YoutubeWatchLaterIE,
+ YoutubeShortsAudioPivotIE,
+ YoutubeConsentRedirectIE,
+)
+
+from .abc import (
+ ABCIE,
+ ABCIViewIE,
+ ABCIViewShowSeriesIE,
+)
+from .abcnews import (
+ AbcNewsIE,
+ AbcNewsVideoIE,
+)
+from .abcotvs import (
+ ABCOTVSIE,
+ ABCOTVSClipsIE,
+)
+from .abematv import (
+ AbemaTVIE,
+ AbemaTVTitleIE,
+)
+from .academicearth import AcademicEarthCourseIE
+from .acast import (
+ ACastIE,
+ ACastChannelIE,
+)
+from .acfun import AcFunVideoIE, AcFunBangumiIE
+from .adn import ADNIE, ADNSeasonIE
+from .adobeconnect import AdobeConnectIE
+from .adobetv import (
+ AdobeTVEmbedIE,
+ AdobeTVIE,
+ AdobeTVShowIE,
+ AdobeTVChannelIE,
+ AdobeTVVideoIE,
+)
+from .adultswim import AdultSwimIE
+from .aenetworks import (
+ AENetworksIE,
+ AENetworksCollectionIE,
+ AENetworksShowIE,
+ HistoryTopicIE,
+ HistoryPlayerIE,
+ BiographyIE,
+)
+from .aeonco import AeonCoIE
+from .afreecatv import (
+ AfreecaTVIE,
+ AfreecaTVLiveIE,
+ AfreecaTVUserIE,
+)
+from .agora import (
+ TokFMAuditionIE,
+ TokFMPodcastIE,
+ WyborczaPodcastIE,
+ WyborczaVideoIE,
+)
+from .airtv import AirTVIE
+from .aitube import AitubeKZVideoIE
+from .aljazeera import AlJazeeraIE
+from .allstar import (
+ AllstarIE,
+ AllstarProfileIE,
+)
+from .alphaporno import AlphaPornoIE
+from .altcensored import (
+ AltCensoredIE,
+ AltCensoredChannelIE,
+)
+from .alura import (
+ AluraIE,
+ AluraCourseIE
+)
+from .amadeustv import AmadeusTVIE
+from .amara import AmaraIE
+from .amcnetworks import AMCNetworksIE
+from .amazon import (
+ AmazonStoreIE,
+ AmazonReviewsIE,
+)
+from .amazonminitv import (
+ AmazonMiniTVIE,
+ AmazonMiniTVSeasonIE,
+ AmazonMiniTVSeriesIE,
+)
+from .americastestkitchen import (
+ AmericasTestKitchenIE,
+ AmericasTestKitchenSeasonIE,
+)
+from .anchorfm import AnchorFMEpisodeIE
+from .angel import AngelIE
+from .anvato import AnvatoIE
+from .aol import AolIE
+from .allocine import AllocineIE
+from .aliexpress import AliExpressLiveIE
+from .alsace20tv import (
+ Alsace20TVIE,
+ Alsace20TVEmbedIE,
+)
+from .apa import APAIE
+from .aparat import AparatIE
+from .appleconnect import AppleConnectIE
+from .appletrailers import (
+ AppleTrailersIE,
+ AppleTrailersSectionIE,
+)
+from .applepodcasts import ApplePodcastsIE
+from .archiveorg import (
+ ArchiveOrgIE,
+ YoutubeWebArchiveIE,
+)
+from .arcpublishing import ArcPublishingIE
+from .arkena import ArkenaIE
+from .ard import (
+ ARDBetaMediathekIE,
+ ARDMediathekCollectionIE,
+ ARDIE,
+)
+from .art19 import (
+ Art19IE,
+ Art19ShowIE,
+)
+from .arte import (
+ ArteTVIE,
+ ArteTVEmbedIE,
+ ArteTVPlaylistIE,
+ ArteTVCategoryIE,
+)
+from .arnes import ArnesIE
+from .asobichannel import AsobiChannelIE, AsobiChannelTagURLIE
+from .atresplayer import AtresPlayerIE
+from .atscaleconf import AtScaleConfEventIE
+from .atvat import ATVAtIE
+from .audimedia import AudiMediaIE
+from .audioboom import AudioBoomIE
+from .audiodraft import (
+ AudiodraftCustomIE,
+ AudiodraftGenericIE,
+)
+from .audiomack import AudiomackIE, AudiomackAlbumIE
+from .audius import (
+ AudiusIE,
+ AudiusTrackIE,
+ AudiusPlaylistIE,
+ AudiusProfileIE,
+)
+from .awaan import (
+ AWAANIE,
+ AWAANVideoIE,
+ AWAANLiveIE,
+ AWAANSeasonIE,
+)
+from .axs import AxsIE
+from .azmedien import AZMedienIE
+from .baidu import BaiduVideoIE
+from .banbye import (
+ BanByeIE,
+ BanByeChannelIE,
+)
+from .bandaichannel import BandaiChannelIE
+from .bandcamp import (
+ BandcampIE,
+ BandcampAlbumIE,
+ BandcampWeeklyIE,
+ BandcampUserIE,
+)
+from .bannedvideo import BannedVideoIE
+from .bbc import (
+ BBCCoUkIE,
+ BBCCoUkArticleIE,
+ BBCCoUkIPlayerEpisodesIE,
+ BBCCoUkIPlayerGroupIE,
+ BBCCoUkPlaylistIE,
+ BBCIE,
+)
+from .beeg import BeegIE
+from .behindkink import BehindKinkIE
+from .bellmedia import BellMediaIE
+from .beatbump import (
+ BeatBumpVideoIE,
+ BeatBumpPlaylistIE,
+)
+from .beatport import BeatportIE
+from .berufetv import BerufeTVIE
+from .bet import BetIE
+from .bfi import BFIPlayerIE
+from .bfmtv import (
+ BFMTVIE,
+ BFMTVLiveIE,
+ BFMTVArticleIE,
+)
+from .bibeltv import (
+ BibelTVLiveIE,
+ BibelTVSeriesIE,
+ BibelTVVideoIE,
+)
+from .bigflix import BigflixIE
+from .bigo import BigoIE
+from .bild import BildIE
+from .bilibili import (
+ BiliBiliIE,
+ BiliBiliBangumiIE,
+ BiliBiliBangumiSeasonIE,
+ BiliBiliBangumiMediaIE,
+ BilibiliCheeseIE,
+ BilibiliCheeseSeasonIE,
+ BiliBiliSearchIE,
+ BilibiliCategoryIE,
+ BilibiliAudioIE,
+ BilibiliAudioAlbumIE,
+ BiliBiliPlayerIE,
+ BilibiliSpaceVideoIE,
+ BilibiliSpaceAudioIE,
+ BilibiliCollectionListIE,
+ BilibiliSeriesListIE,
+ BilibiliFavoritesListIE,
+ BilibiliWatchlaterIE,
+ BilibiliPlaylistIE,
+ BiliIntlIE,
+ BiliIntlSeriesIE,
+ BiliLiveIE,
+)
+from .biobiochiletv import BioBioChileTVIE
+from .bitchute import (
+ BitChuteIE,
+ BitChuteChannelIE,
+)
+from .blackboardcollaborate import BlackboardCollaborateIE
+from .bleacherreport import (
+ BleacherReportIE,
+ BleacherReportCMSIE,
+)
+from .blerp import BlerpIE
+from .blogger import BloggerIE
+from .bloomberg import BloombergIE
+from .bokecc import BokeCCIE
+from .bongacams import BongaCamsIE
+from .boosty import BoostyIE
+from .bostonglobe import BostonGlobeIE
+from .box import BoxIE
+from .boxcast import BoxCastVideoIE
+from .bpb import BpbIE
+from .br import BRIE
+from .bravotv import BravoTVIE
+from .brainpop import (
+ BrainPOPIE,
+ BrainPOPJrIE,
+ BrainPOPELLIE,
+ BrainPOPEspIE,
+ BrainPOPFrIE,
+ BrainPOPIlIE,
+)
+from .breitbart import BreitBartIE
+from .brightcove import (
+ BrightcoveLegacyIE,
+ BrightcoveNewIE,
+)
+from .brilliantpala import (
+ BrilliantpalaElearnIE,
+ BrilliantpalaClassesIE,
+)
+from .businessinsider import BusinessInsiderIE
+from .bundesliga import BundesligaIE
+from .bundestag import BundestagIE
+from .buzzfeed import BuzzFeedIE
+from .byutv import BYUtvIE
+from .c56 import C56IE
+from .cableav import CableAVIE
+from .callin import CallinIE
+from .caltrans import CaltransIE
+from .cam4 import CAM4IE
+from .camdemy import (
+ CamdemyIE,
+ CamdemyFolderIE
+)
+from .camfm import (
+ CamFMEpisodeIE,
+ CamFMShowIE
+)
+from .cammodels import CamModelsIE
+from .camsoda import CamsodaIE
+from .camtasia import CamtasiaEmbedIE
+from .canal1 import Canal1IE
+from .canalalpha import CanalAlphaIE
+from .canalplus import CanalplusIE
+from .canalc2 import Canalc2IE
+from .caracoltv import CaracolTvPlayIE
+from .cartoonnetwork import CartoonNetworkIE
+from .cbc import (
+ CBCIE,
+ CBCPlayerIE,
+ CBCPlayerPlaylistIE,
+ CBCGemIE,
+ CBCGemPlaylistIE,
+ CBCGemLiveIE,
+)
+from .cbs import (
+ CBSIE,
+ ParamountPressExpressIE,
+)
+from .cbsnews import (
+ CBSNewsEmbedIE,
+ CBSNewsIE,
+ CBSLocalIE,
+ CBSLocalArticleIE,
+ CBSLocalLiveIE,
+ CBSNewsLiveIE,
+ CBSNewsLiveVideoIE,
+)
+from .cbssports import (
+ CBSSportsEmbedIE,
+ CBSSportsIE,
+ TwentyFourSevenSportsIE,
+)
+from .ccc import (
+ CCCIE,
+ CCCPlaylistIE,
+)
+from .ccma import CCMAIE
+from .cctv import CCTVIE
+from .cda import CDAIE
+from .cellebrite import CellebriteIE
+from .ceskatelevize import CeskaTelevizeIE
+from .cgtn import CGTNIE
+from .charlierose import CharlieRoseIE
+from .chaturbate import ChaturbateIE
+from .chilloutzone import ChilloutzoneIE
+from .chzzk import (
+ CHZZKLiveIE,
+ CHZZKVideoIE,
+)
+from .cinemax import CinemaxIE
+from .cinetecamilano import CinetecaMilanoIE
+from .cineverse import (
+ CineverseIE,
+ CineverseDetailsIE,
+)
+from .ciscolive import (
+ CiscoLiveSessionIE,
+ CiscoLiveSearchIE,
+)
+from .ciscowebex import CiscoWebexIE
+from .cjsw import CJSWIE
+from .clipchamp import ClipchampIE
+from .clippit import ClippitIE
+from .cliprs import ClipRsIE
+from .closertotruth import CloserToTruthIE
+from .cloudflarestream import CloudflareStreamIE
+from .cloudycdn import CloudyCDNIE
+from .clubic import ClubicIE
+from .clyp import ClypIE
+from .cmt import CMTIE
+from .cnbc import (
+ CNBCVideoIE,
+)
+from .cnn import (
+ CNNIE,
+ CNNBlogsIE,
+ CNNArticleIE,
+ CNNIndonesiaIE,
+)
+from .coub import CoubIE
+from .comedycentral import (
+ ComedyCentralIE,
+ ComedyCentralTVIE,
+)
+from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
+from .commonprotocols import (
+ MmsIE,
+ RtmpIE,
+ ViewSourceIE,
+)
+from .condenast import CondeNastIE
+from .contv import CONtvIE
+from .corus import CorusIE
+from .cpac import (
+ CPACIE,
+ CPACPlaylistIE,
+)
+from .cozytv import CozyTVIE
+from .cracked import CrackedIE
+from .crackle import CrackleIE
+from .craftsy import CraftsyIE
+from .crooksandliars import CrooksAndLiarsIE
+from .crowdbunker import (
+ CrowdBunkerIE,
+ CrowdBunkerChannelIE,
+)
+from .crtvg import CrtvgIE
+from .crunchyroll import (
+ CrunchyrollBetaIE,
+ CrunchyrollBetaShowIE,
+ CrunchyrollMusicIE,
+ CrunchyrollArtistIE,
+)
+from .cspan import CSpanIE, CSpanCongressIE
+from .ctsnews import CtsNewsIE
+from .ctv import CTVIE
+from .ctvnews import CTVNewsIE
+from .cultureunplugged import CultureUnpluggedIE
+from .curiositystream import (
+ CuriosityStreamIE,
+ CuriosityStreamCollectionsIE,
+ CuriosityStreamSeriesIE,
+)
+from .cwtv import CWTVIE
+from .cybrary import (
+ CybraryIE,
+ CybraryCourseIE
+)
+from .dacast import (
+ DacastVODIE,
+ DacastPlaylistIE,
+)
+from .dailymail import DailyMailIE
+from .dailymotion import (
+ DailymotionIE,
+ DailymotionPlaylistIE,
+ DailymotionSearchIE,
+ DailymotionUserIE,
+)
+from .dailywire import (
+ DailyWireIE,
+ DailyWirePodcastIE,
+)
+from .damtomo import (
+ DamtomoRecordIE,
+ DamtomoVideoIE,
+)
+from .daum import (
+ DaumIE,
+ DaumClipIE,
+ DaumPlaylistIE,
+ DaumUserIE,
+)
+from .daystar import DaystarClipIE
+from .dbtv import DBTVIE
+from .dctp import DctpTvIE
+from .deezer import (
+ DeezerPlaylistIE,
+ DeezerAlbumIE,
+)
+from .democracynow import DemocracynowIE
+from .detik import DetikEmbedIE
+from .dlf import (
+ DLFIE,
+ DLFCorpusIE,
+)
+from .dfb import DFBIE
+from .dhm import DHMIE
+from .douyutv import (
+ DouyuShowIE,
+ DouyuTVIE,
+)
+from .dplay import (
+ DPlayIE,
+ DiscoveryPlusIE,
+ HGTVDeIE,
+ GoDiscoveryIE,
+ TravelChannelIE,
+ CookingChannelIE,
+ HGTVUsaIE,
+ FoodNetworkIE,
+ InvestigationDiscoveryIE,
+ DestinationAmericaIE,
+ AmHistoryChannelIE,
+ ScienceChannelIE,
+ DIYNetworkIE,
+ DiscoveryLifeIE,
+ AnimalPlanetIE,
+ TLCIE,
+ MotorTrendIE,
+ MotorTrendOnDemandIE,
+ DiscoveryPlusIndiaIE,
+ DiscoveryNetworksDeIE,
+ DiscoveryPlusItalyIE,
+ DiscoveryPlusItalyShowIE,
+ DiscoveryPlusIndiaShowIE,
+ GlobalCyclingNetworkPlusIE,
+)
+from .dreisat import DreiSatIE
+from .drbonanza import DRBonanzaIE
+from .drtuber import DrTuberIE
+from .drtv import (
+ DRTVIE,
+ DRTVLiveIE,
+ DRTVSeasonIE,
+ DRTVSeriesIE,
+)
+from .dtube import DTubeIE
+from .dvtv import DVTVIE
+from .duboku import (
+ DubokuIE,
+ DubokuPlaylistIE
+)
+from .dumpert import DumpertIE
+from .deuxm import (
+ DeuxMIE,
+ DeuxMNewsIE
+)
+from .digitalconcerthall import DigitalConcertHallIE
+from .discogs import DiscogsReleasePlaylistIE
+from .discovery import DiscoveryIE
+from .disney import DisneyIE
+from .dispeak import DigitallySpeakingIE
+from .dropbox import DropboxIE
+from .dropout import (
+ DropoutSeasonIE,
+ DropoutIE
+)
+from .duoplay import DuoplayIE
+from .dw import (
+ DWIE,
+ DWArticleIE,
+)
+from .eagleplatform import EaglePlatformIE, ClipYouEmbedIE
+from .ebaumsworld import EbaumsWorldIE
+from .ebay import EbayIE
+from .egghead import (
+ EggheadCourseIE,
+ EggheadLessonIE,
+)
+from .eighttracks import EightTracksIE
+from .einthusan import EinthusanIE
+from .eitb import EitbIE
+from .elementorembed import ElementorEmbedIE
+from .elonet import ElonetIE
+from .elpais import ElPaisIE
+from .eltrecetv import ElTreceTVIE
+from .embedly import EmbedlyIE
+from .epicon import (
+ EpiconIE,
+ EpiconSeriesIE,
+)
+from .epidemicsound import EpidemicSoundIE
+from .eplus import EplusIbIE
+from .epoch import EpochIE
+from .eporner import EpornerIE
+from .erocast import ErocastIE
+from .eroprofile import (
+ EroProfileIE,
+ EroProfileAlbumIE,
+)
+from .err import ERRJupiterIE
+from .ertgr import (
+ ERTFlixCodenameIE,
+ ERTFlixIE,
+ ERTWebtvEmbedIE,
+)
+from .espn import (
+ ESPNIE,
+ WatchESPNIE,
+ ESPNArticleIE,
+ FiveThirtyEightIE,
+ ESPNCricInfoIE,
+)
+from .ettutv import EttuTvIE
+from .europa import EuropaIE, EuroParlWebstreamIE
+from .europeantour import EuropeanTourIE
+from .eurosport import EurosportIE
+from .euscreen import EUScreenIE
+from .expressen import ExpressenIE
+from .eyedotv import EyedoTVIE
+from .facebook import (
+ FacebookIE,
+ FacebookPluginsVideoIE,
+ FacebookRedirectURLIE,
+ FacebookReelIE,
+ FacebookAdsIE,
+)
+from .fancode import (
+ FancodeVodIE,
+ FancodeLiveIE
+)
+
+from .faz import FazIE
+from .fc2 import (
+ FC2IE,
+ FC2EmbedIE,
+ FC2LiveIE,
+)
+from .fczenit import FczenitIE
+from .fifa import FifaIE
+from .filmon import (
+ FilmOnIE,
+ FilmOnChannelIE,
+)
+from .filmweb import FilmwebIE
+from .firsttv import FirstTVIE
+from .fivetv import FiveTVIE
+from .flextv import FlexTVIE
+from .flickr import FlickrIE
+from .floatplane import (
+ FloatplaneIE,
+ FloatplaneChannelIE,
+)
+from .folketinget import FolketingetIE
+from .footyroom import FootyRoomIE
+from .formula1 import Formula1IE
+from .fourtube import (
+ FourTubeIE,
+ PornTubeIE,
+ PornerBrosIE,
+ FuxIE,
+)
+from .fox import FOXIE
+from .fox9 import (
+ FOX9IE,
+ FOX9NewsIE,
+)
+from .foxnews import (
+ FoxNewsIE,
+ FoxNewsArticleIE,
+ FoxNewsVideoIE,
+)
+from .foxsports import FoxSportsIE
+from .fptplay import FptplayIE
+from .franceinter import FranceInterIE
+from .francetv import (
+ FranceTVIE,
+ FranceTVSiteIE,
+ FranceTVInfoIE,
+)
+from .freesound import FreesoundIE
+from .freespeech import FreespeechIE
+from .frontendmasters import (
+ FrontendMastersIE,
+ FrontendMastersLessonIE,
+ FrontendMastersCourseIE
+)
+from .freetv import (
+ FreeTvIE,
+ FreeTvMoviesIE,
+)
+from .fujitv import FujiTVFODPlus7IE
+from .funimation import (
+ FunimationIE,
+ FunimationPageIE,
+ FunimationShowIE,
+)
+from .funk import FunkIE
+from .funker530 import Funker530IE
+from .fuyintv import FuyinTVIE
+from .gab import (
+ GabTVIE,
+ GabIE,
+)
+from .gaia import GaiaIE
+from .gamejolt import (
+ GameJoltIE,
+ GameJoltUserIE,
+ GameJoltGameIE,
+ GameJoltGameSoundtrackIE,
+ GameJoltCommunityIE,
+ GameJoltSearchIE,
+)
+from .gamespot import GameSpotIE
+from .gamestar import GameStarIE
+from .gaskrank import GaskrankIE
+from .gazeta import GazetaIE
+from .gdcvault import GDCVaultIE
+from .gedidigital import GediDigitalIE
+from .generic import GenericIE
+from .genius import (
+ GeniusIE,
+ GeniusLyricsIE,
+)
+from .getcourseru import (
+ GetCourseRuPlayerIE,
+ GetCourseRuIE
+)
+from .gettr import (
+ GettrIE,
+ GettrStreamingIE,
+)
+from .giantbomb import GiantBombIE
+from .glide import GlideIE
+from .globalplayer import (
+ GlobalPlayerLiveIE,
+ GlobalPlayerLivePlaylistIE,
+ GlobalPlayerAudioIE,
+ GlobalPlayerAudioEpisodeIE,
+ GlobalPlayerVideoIE
+)
+from .globo import (
+ GloboIE,
+ GloboArticleIE,
+)
+from .gmanetwork import GMANetworkVideoIE
+from .go import GoIE
+from .godtube import GodTubeIE
+from .gofile import GofileIE
+from .golem import GolemIE
+from .goodgame import GoodGameIE
+from .googledrive import (
+ GoogleDriveIE,
+ GoogleDriveFolderIE,
+)
+from .googlepodcasts import (
+ GooglePodcastsIE,
+ GooglePodcastsFeedIE,
+)
+from .googlesearch import GoogleSearchIE
+from .gopro import GoProIE
+from .goplay import GoPlayIE
+from .goshgay import GoshgayIE
+from .gotostage import GoToStageIE
+from .gputechconf import GPUTechConfIE
+from .gronkh import (
+ GronkhIE,
+ GronkhFeedIE,
+ GronkhVodsIE
+)
+from .groupon import GrouponIE
+from .harpodeon import HarpodeonIE
+from .hbo import HBOIE
+from .hearthisat import HearThisAtIE
+from .heise import HeiseIE
+from .hellporno import HellPornoIE
+from .hgtv import HGTVComShowIE
+from .hketv import HKETVIE
+from .hidive import HiDiveIE
+from .historicfilms import HistoricFilmsIE
+from .hitrecord import HitRecordIE
+from .hollywoodreporter import (
+ HollywoodReporterIE,
+ HollywoodReporterPlaylistIE,
+)
+from .holodex import HolodexIE
+from .hotnewhiphop import HotNewHipHopIE
+from .hotstar import (
+ HotStarIE,
+ HotStarPrefixIE,
+ HotStarPlaylistIE,
+ HotStarSeasonIE,
+ HotStarSeriesIE,
+)
+from .hrefli import HrefLiRedirectIE
+from .hrfensehen import HRFernsehenIE
+from .hrti import (
+ HRTiIE,
+ HRTiPlaylistIE,
+)
+from .hse import (
+ HSEShowIE,
+ HSEProductIE,
+)
+from .genericembeds import (
+ HTML5MediaEmbedIE,
+ QuotedHTMLIE,
+)
+from .huajiao import HuajiaoIE
+from .huya import HuyaLiveIE
+from .huffpost import HuffPostIE
+from .hungama import (
+ HungamaIE,
+ HungamaSongIE,
+ HungamaAlbumPlaylistIE,
+)
+from .hypem import HypemIE
+from .hypergryph import MonsterSirenHypergryphMusicIE
+from .hytale import HytaleIE
+from .icareus import IcareusIE
+from .ichinanalive import (
+ IchinanaLiveIE,
+ IchinanaLiveClipIE,
+)
+from .idolplus import IdolPlusIE
+from .ign import (
+ IGNIE,
+ IGNVideoIE,
+ IGNArticleIE,
+)
+from .iheart import (
+ IHeartRadioIE,
+ IHeartRadioPodcastIE,
+)
+from .ilpost import IlPostIE
+from .iltalehti import IltalehtiIE
+from .imdb import (
+ ImdbIE,
+ ImdbListIE
+)
+from .imgur import (
+ ImgurIE,
+ ImgurAlbumIE,
+ ImgurGalleryIE,
+)
+from .ina import InaIE
+from .inc import IncIE
+from .indavideo import IndavideoEmbedIE
+from .infoq import InfoQIE
+from .instagram import (
+ InstagramIE,
+ InstagramIOSIE,
+ InstagramUserIE,
+ InstagramTagIE,
+ InstagramStoryIE,
+)
+from .internazionale import InternazionaleIE
+from .internetvideoarchive import InternetVideoArchiveIE
+from .iprima import (
+ IPrimaIE,
+ IPrimaCNNIE
+)
+from .iqiyi import (
+ IqiyiIE,
+ IqIE,
+ IqAlbumIE
+)
+from .islamchannel import (
+ IslamChannelIE,
+ IslamChannelSeriesIE,
+)
+from .israelnationalnews import IsraelNationalNewsIE
+from .itprotv import (
+ ITProTVIE,
+ ITProTVCourseIE
+)
+from .itv import (
+ ITVIE,
+ ITVBTCCIE,
+)
+from .ivi import (
+ IviIE,
+ IviCompilationIE
+)
+from .ivideon import IvideonIE
+from .iwara import (
+ IwaraIE,
+ IwaraPlaylistIE,
+ IwaraUserIE,
+)
+from .ixigua import IxiguaIE
+from .izlesene import IzleseneIE
+from .jable import (
+ JableIE,
+ JablePlaylistIE,
+)
+from .jamendo import (
+ JamendoIE,
+ JamendoAlbumIE,
+)
+from .japandiet import (
+ ShugiinItvLiveIE,
+ ShugiinItvLiveRoomIE,
+ ShugiinItvVodIE,
+ SangiinInstructionIE,
+ SangiinIE,
+)
+from .jeuxvideo import JeuxVideoIE
+from .jiosaavn import (
+ JioSaavnSongIE,
+ JioSaavnAlbumIE,
+)
+from .jove import JoveIE
+from .joj import JojIE
+from .joqrag import JoqrAgIE
+from .jstream import JStreamIE
+from .jtbc import (
+ JTBCIE,
+ JTBCProgramIE,
+)
+from .jwplatform import JWPlatformIE
+from .kakao import KakaoIE
+from .kaltura import KalturaIE
+from .kankanews import KankaNewsIE
+from .karaoketv import KaraoketvIE
+from .kelbyone import KelbyOneIE
+from .khanacademy import (
+ KhanAcademyIE,
+ KhanAcademyUnitIE,
+)
+from .kick import (
+ KickIE,
+ KickVODIE,
+)
+from .kicker import KickerIE
+from .kickstarter import KickStarterIE
+from .kinja import KinjaEmbedIE
+from .kinopoisk import KinoPoiskIE
+from .kommunetv import KommunetvIE
+from .kompas import KompasVideoIE
+from .koo import KooIE
+from .kth import KTHIE
+from .krasview import KrasViewIE
+from .ku6 import Ku6IE
+from .kukululive import KukuluLiveIE
+from .kuwo import (
+ KuwoIE,
+ KuwoAlbumIE,
+ KuwoChartIE,
+ KuwoSingerIE,
+ KuwoCategoryIE,
+ KuwoMvIE,
+)
+from .la7 import (
+ LA7IE,
+ LA7PodcastEpisodeIE,
+ LA7PodcastIE,
+)
+from .lastfm import (
+ LastFMIE,
+ LastFMPlaylistIE,
+ LastFMUserIE,
+)
+from .laxarxames import LaXarxaMesIE
+from .lbry import (
+ LBRYIE,
+ LBRYChannelIE,
+ LBRYPlaylistIE,
+)
+from .lci import LCIIE
+from .lcp import (
+ LcpPlayIE,
+ LcpIE,
+)
+from .lecture2go import Lecture2GoIE
+from .lecturio import (
+ LecturioIE,
+ LecturioCourseIE,
+ LecturioDeCourseIE,
+)
+from .leeco import (
+ LeIE,
+ LePlaylistIE,
+ LetvCloudIE,
+)
+from .lefigaro import (
+ LeFigaroVideoEmbedIE,
+ LeFigaroVideoSectionIE,
+)
+from .lego import LEGOIE
+from .lemonde import LemondeIE
+from .lenta import LentaIE
+from .libraryofcongress import LibraryOfCongressIE
+from .libsyn import LibsynIE
+from .lifenews import (
+ LifeNewsIE,
+ LifeEmbedIE,
+)
+from .likee import (
+ LikeeIE,
+ LikeeUserIE
+)
+from .limelight import (
+ LimelightMediaIE,
+ LimelightChannelIE,
+ LimelightChannelListIE,
+)
+from .linkedin import (
+ LinkedInIE,
+ LinkedInLearningIE,
+ LinkedInLearningCourseIE,
+)
+from .liputan6 import Liputan6IE
+from .listennotes import ListenNotesIE
+from .litv import LiTVIE
+from .livejournal import LiveJournalIE
+from .livestream import (
+ LivestreamIE,
+ LivestreamOriginalIE,
+ LivestreamShortenerIE,
+)
+from .livestreamfails import LivestreamfailsIE
+from .lnkgo import (
+ LnkGoIE,
+ LnkIE,
+)
+from .lovehomeporn import LoveHomePornIE
+from .lrt import (
+ LRTVODIE,
+ LRTStreamIE
+)
+from .lsm import (
+ LSMLREmbedIE,
+ LSMLTVEmbedIE,
+ LSMReplayIE
+)
+from .lumni import (
+ LumniIE
+)
+from .lynda import (
+ LyndaIE,
+ LyndaCourseIE
+)
+from .maariv import MaarivIE
+from .magellantv import MagellanTVIE
+from .magentamusik import MagentaMusikIE
+from .mailru import (
+ MailRuIE,
+ MailRuMusicIE,
+ MailRuMusicSearchIE,
+)
+from .mainstreaming import MainStreamingIE
+from .mangomolo import (
+ MangomoloVideoIE,
+ MangomoloLiveIE,
+)
+from .manoto import (
+ ManotoTVIE,
+ ManotoTVShowIE,
+ ManotoTVLiveIE,
+)
+from .manyvids import ManyVidsIE
+from .maoritv import MaoriTVIE
+from .markiza import (
+ MarkizaIE,
+ MarkizaPageIE,
+)
+from .massengeschmacktv import MassengeschmackTVIE
+from .masters import MastersIE
+from .matchtv import MatchTVIE
+from .mbn import MBNIE
+from .mdr import MDRIE
+from .medaltv import MedalTVIE
+from .mediaite import MediaiteIE
+from .mediaklikk import MediaKlikkIE
+from .mediaset import (
+ MediasetIE,
+ MediasetShowIE,
+)
+from .mediasite import (
+ MediasiteIE,
+ MediasiteCatalogIE,
+ MediasiteNamedCatalogIE,
+)
+from .mediastream import (
+ MediaStreamIE,
+ WinSportsVideoIE,
+)
+from .mediaworksnz import MediaWorksNZVODIE
+from .medici import MediciIE
+from .megaphone import MegaphoneIE
+from .meipai import MeipaiIE
+from .melonvod import MelonVODIE
+from .metacritic import MetacriticIE
+from .mgtv import MGTVIE
+from .microsoftstream import MicrosoftStreamIE
+from .microsoftvirtualacademy import (
+ MicrosoftVirtualAcademyIE,
+ MicrosoftVirtualAcademyCourseIE,
+)
+from .microsoftembed import MicrosoftEmbedIE
+from .mildom import (
+ MildomIE,
+ MildomVodIE,
+ MildomClipIE,
+ MildomUserVodIE,
+)
+from .minds import (
+ MindsIE,
+ MindsChannelIE,
+ MindsGroupIE,
+)
+from .minoto import MinotoIE
+from .mirrativ import (
+ MirrativIE,
+ MirrativUserIE,
+)
+from .mirrorcouk import MirrorCoUKIE
+from .mit import TechTVMITIE, OCWMITIE
+from .mitele import MiTeleIE
+from .mixch import (
+ MixchIE,
+ MixchArchiveIE,
+)
+from .mixcloud import (
+ MixcloudIE,
+ MixcloudUserIE,
+ MixcloudPlaylistIE,
+)
+from .mlb import (
+ MLBIE,
+ MLBVideoIE,
+ MLBTVIE,
+ MLBArticleIE,
+)
+from .mlssoccer import MLSSoccerIE
+from .mocha import MochaVideoIE
+from .mojvideo import MojvideoIE
+from .monstercat import MonstercatIE
+from .motherless import (
+ MotherlessIE,
+ MotherlessGroupIE,
+ MotherlessGalleryIE,
+ MotherlessUploaderIE,
+)
+from .motorsport import MotorsportIE
+from .moviepilot import MoviepilotIE
+from .moview import MoviewPlayIE
+from .moviezine import MoviezineIE
+from .movingimage import MovingImageIE
+from .msn import MSNIE
+from .mtv import (
+ MTVIE,
+ MTVVideoIE,
+ MTVServicesEmbeddedIE,
+ MTVDEIE,
+ MTVJapanIE,
+ MTVItaliaIE,
+ MTVItaliaProgrammaIE,
+)
+from .muenchentv import MuenchenTVIE
+from .murrtube import MurrtubeIE, MurrtubeUserIE
+from .museai import MuseAIIE
+from .musescore import MuseScoreIE
+from .musicdex import (
+ MusicdexSongIE,
+ MusicdexAlbumIE,
+ MusicdexArtistIE,
+ MusicdexPlaylistIE,
+)
+from .mx3 import (
+ Mx3IE,
+ Mx3NeoIE,
+ Mx3VolksmusikIE,
+)
+from .mxplayer import (
+ MxplayerIE,
+ MxplayerShowIE,
+)
+from .myspace import MySpaceIE, MySpaceAlbumIE
+from .myspass import MySpassIE
+from .myvideoge import MyVideoGeIE
+from .myvidster import MyVidsterIE
+from .mzaalo import MzaaloIE
+from .n1 import (
+ N1InfoAssetIE,
+ N1InfoIIE,
+)
+from .nate import (
+ NateIE,
+ NateProgramIE,
+)
+from .nationalgeographic import (
+ NationalGeographicVideoIE,
+ NationalGeographicTVIE,
+)
+from .naver import (
+ NaverIE,
+ NaverLiveIE,
+ NaverNowIE,
+)
+from .nba import (
+ NBAWatchEmbedIE,
+ NBAWatchIE,
+ NBAWatchCollectionIE,
+ NBAEmbedIE,
+ NBAIE,
+ NBAChannelIE,
+)
+from .nbc import (
+ NBCIE,
+ NBCNewsIE,
+ NBCOlympicsIE,
+ NBCOlympicsStreamIE,
+ NBCSportsIE,
+ NBCSportsStreamIE,
+ NBCSportsVPlayerIE,
+ NBCStationsIE,
+)
+from .ndr import (
+ NDRIE,
+ NJoyIE,
+ NDREmbedBaseIE,
+ NDREmbedIE,
+ NJoyEmbedIE,
+)
+from .ndtv import NDTVIE
+from .nebula import (
+ NebulaIE,
+ NebulaClassIE,
+ NebulaSubscriptionsIE,
+ NebulaChannelIE,
+)
+from .nekohacker import NekoHackerIE
+from .nerdcubed import NerdCubedFeedIE
+from .netzkino import NetzkinoIE
+from .neteasemusic import (
+ NetEaseMusicIE,
+ NetEaseMusicAlbumIE,
+ NetEaseMusicSingerIE,
+ NetEaseMusicListIE,
+ NetEaseMusicMvIE,
+ NetEaseMusicProgramIE,
+ NetEaseMusicDjRadioIE,
+)
+from .netverse import (
+ NetverseIE,
+ NetversePlaylistIE,
+ NetverseSearchIE,
+)
+from .newgrounds import (
+ NewgroundsIE,
+ NewgroundsPlaylistIE,
+ NewgroundsUserIE,
+)
+from .newspicks import NewsPicksIE
+from .newsy import NewsyIE
+from .nextmedia import (
+ NextMediaIE,
+ NextMediaActionNewsIE,
+ AppleDailyIE,
+ NextTVIE,
+)
+from .nexx import (
+ NexxIE,
+ NexxEmbedIE,
+)
+from .nfb import (
+ NFBIE,
+ NFBSeriesIE,
+)
+from .nfhsnetwork import NFHSNetworkIE
+from .nfl import (
+ NFLIE,
+ NFLArticleIE,
+ NFLPlusEpisodeIE,
+ NFLPlusReplayIE,
+)
+from .nhk import (
+ NhkVodIE,
+ NhkVodProgramIE,
+ NhkForSchoolBangumiIE,
+ NhkForSchoolSubjectIE,
+ NhkForSchoolProgramListIE,
+ NhkRadioNewsPageIE,
+ NhkRadiruIE,
+ NhkRadiruLiveIE,
+)
+from .nhl import NHLIE
+from .nick import (
+ NickIE,
+ NickBrIE,
+ NickDeIE,
+ NickRuIE,
+)
+from .niconico import (
+ NiconicoIE,
+ NiconicoPlaylistIE,
+ NiconicoUserIE,
+ NiconicoSeriesIE,
+ NiconicoHistoryIE,
+ NicovideoSearchDateIE,
+ NicovideoSearchIE,
+ NicovideoSearchURLIE,
+ NicovideoTagURLIE,
+ NiconicoLiveIE,
+)
+from .ninaprotocol import NinaProtocolIE
+from .ninecninemedia import (
+ NineCNineMediaIE,
+ CPTwentyFourIE,
+)
+from .niconicochannelplus import (
+ NiconicoChannelPlusIE,
+ NiconicoChannelPlusChannelVideosIE,
+ NiconicoChannelPlusChannelLivesIE,
+)
+from .ninegag import NineGagIE
+from .ninenews import NineNewsIE
+from .ninenow import NineNowIE
+from .nintendo import NintendoIE
+from .nitter import NitterIE
+from .nobelprize import NobelPrizeIE
+from .noice import NoicePodcastIE
+from .nonktube import NonkTubeIE
+from .noodlemagazine import NoodleMagazineIE
+from .noovo import NoovoIE
+from .nosnl import NOSNLArticleIE
+from .nova import (
+ NovaEmbedIE,
+ NovaIE,
+)
+from .novaplay import NovaPlayIE
+from .nowness import (
+ NownessIE,
+ NownessPlaylistIE,
+ NownessSeriesIE,
+)
+from .noz import NozIE
+from .npo import (
+ AndereTijdenIE,
+ NPOIE,
+ NPOLiveIE,
+ NPORadioIE,
+ NPORadioFragmentIE,
+ SchoolTVIE,
+ HetKlokhuisIE,
+ VPROIE,
+ WNLIE,
+)
+from .npr import NprIE
+from .nrk import (
+ NRKIE,
+ NRKPlaylistIE,
+ NRKSkoleIE,
+ NRKTVIE,
+ NRKTVDirekteIE,
+ NRKRadioPodkastIE,
+ NRKTVEpisodeIE,
+ NRKTVEpisodesIE,
+ NRKTVSeasonIE,
+ NRKTVSeriesIE,
+)
+from .nrl import NRLTVIE
+from .ntvcojp import NTVCoJpCUIE
+from .ntvde import NTVDeIE
+from .ntvru import NTVRuIE
+from .nubilesporn import NubilesPornIE
+from .nytimes import (
+ NYTimesIE,
+ NYTimesArticleIE,
+ NYTimesCookingIE,
+ NYTimesCookingRecipeIE,
+)
+from .nuum import (
+ NuumLiveIE,
+ NuumTabIE,
+ NuumMediaIE,
+)
+from .nuvid import NuvidIE
+from .nzherald import NZHeraldIE
+from .nzonscreen import NZOnScreenIE
+from .nzz import NZZIE
+from .odkmedia import OnDemandChinaEpisodeIE
+from .odnoklassniki import OdnoklassnikiIE
+from .oftv import (
+ OfTVIE,
+ OfTVPlaylistIE
+)
+from .oktoberfesttv import OktoberfestTVIE
+from .olympics import OlympicsReplayIE
+from .on24 import On24IE
+from .ondemandkorea import (
+ OnDemandKoreaIE,
+ OnDemandKoreaProgramIE,
+)
+from .onefootball import OneFootballIE
+from .onenewsnz import OneNewsNZIE
+from .oneplace import OnePlacePodcastIE
+from .onet import (
+ OnetIE,
+ OnetChannelIE,
+ OnetMVPIE,
+ OnetPlIE,
+)
+from .onionstudios import OnionStudiosIE
+from .opencast import (
+ OpencastIE,
+ OpencastPlaylistIE,
+)
+from .openrec import (
+ OpenRecIE,
+ OpenRecCaptureIE,
+ OpenRecMovieIE,
+)
+from .ora import OraTVIE
+from .orf import (
+ ORFTVthekIE,
+ ORFFM4StoryIE,
+ ORFONIE,
+ ORFRadioIE,
+ ORFPodcastIE,
+ ORFIPTVIE,
+)
+from .outsidetv import OutsideTVIE
+from .owncloud import OwnCloudIE
+from .packtpub import (
+ PacktPubIE,
+ PacktPubCourseIE,
+)
+from .palcomp3 import (
+ PalcoMP3IE,
+ PalcoMP3ArtistIE,
+ PalcoMP3VideoIE,
+)
+from .panopto import (
+ PanoptoIE,
+ PanoptoListIE,
+ PanoptoPlaylistIE
+)
+from .paramountplus import (
+ ParamountPlusIE,
+ ParamountPlusSeriesIE,
+)
+from .parler import ParlerIE
+from .parlview import ParlviewIE
+from .patreon import (
+ PatreonIE,
+ PatreonCampaignIE
+)
+from .pbs import PBSIE, PBSKidsIE
+from .pearvideo import PearVideoIE
+from .peekvids import PeekVidsIE, PlayVidsIE
+from .peertube import (
+ PeerTubeIE,
+ PeerTubePlaylistIE,
+)
+from .peertv import PeerTVIE
+from .peloton import (
+ PelotonIE,
+ PelotonLiveIE
+)
+from .performgroup import PerformGroupIE
+from .periscope import (
+ PeriscopeIE,
+ PeriscopeUserIE,
+)
+from .pgatour import PGATourIE
+from .philharmoniedeparis import PhilharmonieDeParisIE
+from .phoenix import PhoenixIE
+from .photobucket import PhotobucketIE
+from .piapro import PiaproIE
+from .piaulizaportal import PIAULIZAPortalIE
+from .picarto import (
+ PicartoIE,
+ PicartoVodIE,
+)
+from .piksel import PikselIE
+from .pinkbike import PinkbikeIE
+from .pinterest import (
+ PinterestIE,
+ PinterestCollectionIE,
+)
+from .pixivsketch import (
+ PixivSketchIE,
+ PixivSketchUserIE,
+)
+from .pladform import PladformIE
+from .planetmarathi import PlanetMarathiIE
+from .platzi import (
+ PlatziIE,
+ PlatziCourseIE,
+)
+from .playplustv import PlayPlusTVIE
+from .playsuisse import PlaySuisseIE
+from .playtvak import PlaytvakIE
+from .playwire import PlaywireIE
+from .plutotv import PlutoTVIE
+from .pluralsight import (
+ PluralsightIE,
+ PluralsightCourseIE,
+)
+from .podbayfm import PodbayFMIE, PodbayFMChannelIE
+from .podchaser import PodchaserIE
+from .podomatic import PodomaticIE
+from .pokemon import (
+ PokemonIE,
+ PokemonWatchIE,
+)
+from .pokergo import (
+ PokerGoIE,
+ PokerGoCollectionIE,
+)
+from .polsatgo import PolsatGoIE
+from .polskieradio import (
+ PolskieRadioIE,
+ PolskieRadioLegacyIE,
+ PolskieRadioAuditionIE,
+ PolskieRadioCategoryIE,
+ PolskieRadioPlayerIE,
+ PolskieRadioPodcastIE,
+ PolskieRadioPodcastListIE,
+)
+from .popcorntimes import PopcorntimesIE
+from .popcorntv import PopcornTVIE
+from .porn91 import Porn91IE
+from .pornbox import PornboxIE
+from .pornflip import PornFlipIE
+from .pornhub import (
+ PornHubIE,
+ PornHubUserIE,
+ PornHubPlaylistIE,
+ PornHubPagedVideoListIE,
+ PornHubUserVideosUploadIE,
+)
+from .pornotube import PornotubeIE
+from .pornovoisines import PornoVoisinesIE
+from .pornoxo import PornoXOIE
+from .puhutv import (
+ PuhuTVIE,
+ PuhuTVSerieIE,
+)
+from .pr0gramm import Pr0grammIE
+from .prankcast import PrankCastIE, PrankCastPostIE
+from .premiershiprugby import PremiershipRugbyIE
+from .presstv import PressTVIE
+from .projectveritas import ProjectVeritasIE
+from .prosiebensat1 import ProSiebenSat1IE
+from .prx import (
+ PRXStoryIE,
+ PRXSeriesIE,
+ PRXAccountIE,
+ PRXStoriesSearchIE,
+ PRXSeriesSearchIE
+)
+from .puls4 import Puls4IE
+from .pyvideo import PyvideoIE
+from .qdance import QDanceIE
+from .qingting import QingTingIE
+from .qqmusic import (
+ QQMusicIE,
+ QQMusicSingerIE,
+ QQMusicAlbumIE,
+ QQMusicToplistIE,
+ QQMusicPlaylistIE,
+)
+from .r7 import (
+ R7IE,
+ R7ArticleIE,
+)
+from .radiko import RadikoIE, RadikoRadioIE
+from .radiocanada import (
+ RadioCanadaIE,
+ RadioCanadaAudioVideoIE,
+)
+from .radiocomercial import (
+ RadioComercialIE,
+ RadioComercialPlaylistIE,
+)
+from .radiode import RadioDeIE
+from .radiojavan import RadioJavanIE
+from .radiofrance import (
+ FranceCultureIE,
+ RadioFranceIE,
+ RadioFranceLiveIE,
+ RadioFrancePodcastIE,
+ RadioFranceProfileIE,
+ RadioFranceProgramScheduleIE,
+)
+from .radiozet import RadioZetPodcastIE
+from .radiokapital import (
+ RadioKapitalIE,
+ RadioKapitalShowIE,
+)
+from .radlive import (
+ RadLiveIE,
+ RadLiveChannelIE,
+ RadLiveSeasonIE,
+)
+from .rai import (
+ RaiIE,
+ RaiCulturaIE,
+ RaiPlayIE,
+ RaiPlayLiveIE,
+ RaiPlayPlaylistIE,
+ RaiPlaySoundIE,
+ RaiPlaySoundLiveIE,
+ RaiPlaySoundPlaylistIE,
+ RaiNewsIE,
+ RaiSudtirolIE,
+)
+from .raywenderlich import (
+ RayWenderlichIE,
+ RayWenderlichCourseIE,
+)
+from .rbgtum import (
+ RbgTumIE,
+ RbgTumCourseIE,
+ RbgTumNewCourseIE,
+)
+from .rcs import (
+ RCSIE,
+ RCSEmbedsIE,
+ RCSVariousIE,
+)
+from .rcti import (
+ RCTIPlusIE,
+ RCTIPlusSeriesIE,
+ RCTIPlusTVIE,
+)
+from .rds import RDSIE
+from .redbee import ParliamentLiveUKIE, RTBFIE
+from .redbulltv import (
+ RedBullTVIE,
+ RedBullEmbedIE,
+ RedBullTVRrnContentIE,
+ RedBullIE,
+)
+from .reddit import RedditIE
+from .redge import RedCDNLivxIE
+from .redgifs import (
+ RedGifsIE,
+ RedGifsSearchIE,
+ RedGifsUserIE,
+)
+from .redtube import RedTubeIE
+from .rentv import (
+ RENTVIE,
+ RENTVArticleIE,
+)
+from .restudy import RestudyIE
+from .reuters import ReutersIE
+from .reverbnation import ReverbNationIE
+from .rheinmaintv import RheinMainTVIE
+from .ridehome import RideHomeIE
+from .rinsefm import (
+ RinseFMIE,
+ RinseFMArtistPlaylistIE,
+)
+from .rmcdecouverte import RMCDecouverteIE
+from .rockstargames import RockstarGamesIE
+from .rokfin import (
+ RokfinIE,
+ RokfinStackIE,
+ RokfinChannelIE,
+ RokfinSearchIE,
+)
+from .roosterteeth import RoosterTeethIE, RoosterTeethSeriesIE
+from .rottentomatoes import RottenTomatoesIE
+from .rozhlas import (
+ RozhlasIE,
+ RozhlasVltavaIE,
+ MujRozhlasIE,
+)
+from .rte import RteIE, RteRadioIE
+from .rtlnl import (
+ RtlNlIE,
+ RTLLuTeleVODIE,
+ RTLLuArticleIE,
+ RTLLuLiveIE,
+ RTLLuRadioIE,
+)
+from .rtl2 import RTL2IE
+from .rtnews import (
+ RTNewsIE,
+ RTDocumentryIE,
+ RTDocumentryPlaylistIE,
+ RuptlyIE,
+)
+from .rtp import RTPIE
+from .rtrfm import RTRFMIE
+from .rts import RTSIE
+from .rtvcplay import (
+ RTVCPlayIE,
+ RTVCPlayEmbedIE,
+ RTVCKalturaIE,
+)
+from .rtve import (
+ RTVEALaCartaIE,
+ RTVEAudioIE,
+ RTVELiveIE,
+ RTVEInfantilIE,
+ RTVETelevisionIE,
+)
+from .rtvs import RTVSIE
+from .rtvslo import RTVSLOIE
+from .rule34video import Rule34VideoIE
+from .rumble import (
+ RumbleEmbedIE,
+ RumbleIE,
+ RumbleChannelIE,
+)
+from .rudovideo import RudoVideoIE
+from .rutube import (
+ RutubeIE,
+ RutubeChannelIE,
+ RutubeEmbedIE,
+ RutubeMovieIE,
+ RutubePersonIE,
+ RutubePlaylistIE,
+ RutubeTagsIE,
+)
+from .glomex import (
+ GlomexIE,
+ GlomexEmbedIE,
+)
+from .megatvcom import (
+ MegaTVComIE,
+ MegaTVComEmbedIE,
+)
+from .antenna import (
+ AntennaGrWatchIE,
+ Ant1NewsGrArticleIE,
+ Ant1NewsGrEmbedIE,
+)
+from .rutv import RUTVIE
+from .ruutu import RuutuIE
+from .ruv import (
+ RuvIE,
+ RuvSpilaIE
+)
+from .s4c import (
+ S4CIE,
+ S4CSeriesIE
+)
+from .safari import (
+ SafariIE,
+ SafariApiIE,
+ SafariCourseIE,
+)
+from .saitosan import SaitosanIE
+from .samplefocus import SampleFocusIE
+from .sapo import SapoIE
+from .sbs import SBSIE
+from .sbscokr import (
+ SBSCoKrIE,
+ SBSCoKrAllvodProgramIE,
+ SBSCoKrProgramsVodIE,
+)
+from .screen9 import Screen9IE
+from .screencast import ScreencastIE
+from .screencastify import ScreencastifyIE
+from .screencastomatic import ScreencastOMaticIE
+from .scrippsnetworks import (
+ ScrippsNetworksWatchIE,
+ ScrippsNetworksIE,
+)
+from .scte import (
+ SCTEIE,
+ SCTECourseIE,
+)
+from .scrolller import ScrolllerIE
+from .sejmpl import SejmIE
+from .senalcolombia import SenalColombiaLiveIE
+from .senategov import SenateISVPIE, SenateGovIE
+from .sendtonews import SendtoNewsIE
+from .servus import ServusIE
+from .sevenplus import SevenPlusIE
+from .sexu import SexuIE
+from .seznamzpravy import (
+ SeznamZpravyIE,
+ SeznamZpravyArticleIE,
+)
+from .shahid import (
+ ShahidIE,
+ ShahidShowIE,
+)
+from .sharevideos import ShareVideosEmbedIE
+from .sibnet import SibnetEmbedIE
+from .shemaroome import ShemarooMeIE
+from .showroomlive import ShowRoomLiveIE
+from .simplecast import (
+ SimplecastIE,
+ SimplecastEpisodeIE,
+ SimplecastPodcastIE,
+)
+from .sina import SinaIE
+from .sixplay import SixPlayIE
+from .skeb import SkebIE
+from .skyit import (
+ SkyItPlayerIE,
+ SkyItVideoIE,
+ SkyItVideoLiveIE,
+ SkyItIE,
+ SkyItArteIE,
+ CieloTVItIE,
+ TV8ItIE,
+)
+from .skylinewebcams import SkylineWebcamsIE
+from .skynewsarabia import (
+ SkyNewsArabiaIE,
+ SkyNewsArabiaArticleIE,
+)
+from .skynewsau import SkyNewsAUIE
+from .sky import (
+ SkyNewsIE,
+ SkyNewsStoryIE,
+ SkySportsIE,
+ SkySportsNewsIE,
+)
+from .slideshare import SlideshareIE
+from .slideslive import SlidesLiveIE
+from .slutload import SlutloadIE
+from .smotrim import SmotrimIE
+from .snotr import SnotrIE
+from .sohu import (
+ SohuIE,
+ SohuVIE,
+)
+from .sonyliv import (
+ SonyLIVIE,
+ SonyLIVSeriesIE,
+)
+from .soundcloud import (
+ SoundcloudEmbedIE,
+ SoundcloudIE,
+ SoundcloudSetIE,
+ SoundcloudRelatedIE,
+ SoundcloudUserIE,
+ SoundcloudUserPermalinkIE,
+ SoundcloudTrackStationIE,
+ SoundcloudPlaylistIE,
+ SoundcloudSearchIE,
+)
+from .soundgasm import (
+ SoundgasmIE,
+ SoundgasmProfileIE
+)
+from .southpark import (
+ SouthParkIE,
+ SouthParkDeIE,
+ SouthParkDkIE,
+ SouthParkEsIE,
+ SouthParkLatIE,
+ SouthParkNlIE
+)
+from .sovietscloset import (
+ SovietsClosetIE,
+ SovietsClosetPlaylistIE
+)
+from .spankbang import (
+ SpankBangIE,
+ SpankBangPlaylistIE,
+)
+from .spiegel import SpiegelIE
+from .spike import (
+ BellatorIE,
+ ParamountNetworkIE,
+)
+from .stageplus import StagePlusVODConcertIE
+from .startrek import StarTrekIE
+from .stitcher import (
+ StitcherIE,
+ StitcherShowIE,
+)
+from .sport5 import Sport5IE
+from .sportbox import SportBoxIE
+from .sportdeutschland import SportDeutschlandIE
+from .spotify import (
+ SpotifyIE,
+ SpotifyShowIE,
+)
+from .spreaker import (
+ SpreakerIE,
+ SpreakerPageIE,
+ SpreakerShowIE,
+ SpreakerShowPageIE,
+)
+from .springboardplatform import SpringboardPlatformIE
+from .sprout import SproutIE
+from .srgssr import (
+ SRGSSRIE,
+ SRGSSRPlayIE,
+)
+from .srmediathek import SRMediathekIE
+from .stacommu import (
+ StacommuLiveIE,
+ StacommuVODIE,
+ TheaterComplexTownVODIE,
+ TheaterComplexTownPPVIE,
+)
+from .stanfordoc import StanfordOpenClassroomIE
+from .startv import StarTVIE
+from .steam import (
+ SteamIE,
+ SteamCommunityBroadcastIE,
+)
+from .storyfire import (
+ StoryFireIE,
+ StoryFireUserIE,
+ StoryFireSeriesIE,
+)
+from .streamable import StreamableIE
+from .streamcz import StreamCZIE
+from .streetvoice import StreetVoiceIE
+from .stretchinternet import StretchInternetIE
+from .stripchat import StripchatIE
+from .stv import STVPlayerIE
+from .substack import SubstackIE
+from .sunporno import SunPornoIE
+from .sverigesradio import (
+ SverigesRadioEpisodeIE,
+ SverigesRadioPublicationIE,
+)
+from .svt import (
+ SVTIE,
+ SVTPageIE,
+ SVTPlayIE,
+ SVTSeriesIE,
+)
+from .swearnet import SwearnetEpisodeIE
+from .syvdk import SYVDKIE
+from .syfy import SyfyIE
+from .sztvhu import SztvHuIE
+from .tagesschau import TagesschauIE
+from .tass import TassIE
+from .tbs import TBSIE
+from .tbsjp import (
+ TBSJPEpisodeIE,
+ TBSJPProgramIE,
+ TBSJPPlaylistIE,
+)
+from .teachable import (
+ TeachableIE,
+ TeachableCourseIE,
+)
+from .teachertube import (
+ TeacherTubeIE,
+ TeacherTubeUserIE,
+)
+from .teachingchannel import TeachingChannelIE
+from .teamcoco import (
+ TeamcocoIE,
+ ConanClassicIE,
+)
+from .teamtreehouse import TeamTreeHouseIE
+from .ted import (
+ TedEmbedIE,
+ TedPlaylistIE,
+ TedSeriesIE,
+ TedTalkIE,
+)
+from .tele5 import Tele5IE
+from .tele13 import Tele13IE
+from .telebruxelles import TeleBruxellesIE
+from .telecaribe import TelecaribePlayIE
+from .telecinco import TelecincoIE
+from .telegraaf import TelegraafIE
+from .telegram import TelegramEmbedIE
+from .telemb import TeleMBIE
+from .telemundo import TelemundoIE
+from .telequebec import (
+ TeleQuebecIE,
+ TeleQuebecSquatIE,
+ TeleQuebecEmissionIE,
+ TeleQuebecLiveIE,
+ TeleQuebecVideoIE,
+)
+from .teletask import TeleTaskIE
+from .telewebion import TelewebionIE
+from .tempo import TempoIE, IVXPlayerIE
+from .tencent import (
+ IflixEpisodeIE,
+ IflixSeriesIE,
+ VQQSeriesIE,
+ VQQVideoIE,
+ WeTvEpisodeIE,
+ WeTvSeriesIE,
+)
+from .tennistv import TennisTVIE
+from .tenplay import (
+ TenPlayIE,
+ TenPlaySeasonIE,
+)
+from .testurl import TestURLIE
+from .tf1 import TF1IE
+from .tfo import TFOIE
+from .theguardian import (
+ TheGuardianPodcastIE,
+ TheGuardianPodcastPlaylistIE,
+)
+from .theholetv import TheHoleTvIE
+from .theintercept import TheInterceptIE
+from .theplatform import (
+ ThePlatformIE,
+ ThePlatformFeedIE,
+)
+from .thestar import TheStarIE
+from .thesun import TheSunIE
+from .theweatherchannel import TheWeatherChannelIE
+from .thisamericanlife import ThisAmericanLifeIE
+from .thisoldhouse import ThisOldHouseIE
+from .thisvid import (
+ ThisVidIE,
+ ThisVidMemberIE,
+ ThisVidPlaylistIE,
+)
+from .threespeak import (
+ ThreeSpeakIE,
+ ThreeSpeakUserIE,
+)
+from .threeqsdn import ThreeQSDNIE
+from .tiktok import (
+ TikTokIE,
+ TikTokUserIE,
+ TikTokSoundIE,
+ TikTokEffectIE,
+ TikTokTagIE,
+ TikTokVMIE,
+ TikTokLiveIE,
+ DouyinIE,
+)
+from .tmz import TMZIE
+from .tnaflix import (
+ TNAFlixNetworkEmbedIE,
+ TNAFlixIE,
+ EMPFlixIE,
+ MovieFapIE,
+)
+from .toggle import (
+ ToggleIE,
+ MeWatchIE,
+)
+from .toggo import (
+ ToggoIE,
+)
+from .tonline import TOnlineIE
+from .toongoggles import ToonGogglesIE
+from .toutv import TouTvIE
+from .toypics import ToypicsUserIE, ToypicsIE
+from .traileraddict import TrailerAddictIE
+from .triller import (
+ TrillerIE,
+ TrillerUserIE,
+ TrillerShortIE,
+)
+from .trovo import (
+ TrovoIE,
+ TrovoVodIE,
+ TrovoChannelVodIE,
+ TrovoChannelClipIE,
+)
+from .trtcocuk import TrtCocukVideoIE
+from .trtworld import TrtWorldIE
+from .trueid import TrueIDIE
+from .trunews import TruNewsIE
+from .truth import TruthIE
+from .trutv import TruTVIE
+from .tube8 import Tube8IE
+from .tubetugraz import TubeTuGrazIE, TubeTuGrazSeriesIE
+from .tubitv import (
+ TubiTvIE,
+ TubiTvShowIE,
+)
+from .tumblr import TumblrIE
+from .tunein import (
+ TuneInStationIE,
+ TuneInPodcastIE,
+ TuneInPodcastEpisodeIE,
+ TuneInShortenerIE,
+)
+from .tv2 import (
+ TV2IE,
+ TV2ArticleIE,
+ KatsomoIE,
+ MTVUutisetArticleIE,
+)
+from .tv24ua import (
+ TV24UAVideoIE,
+)
+from .tv2dk import (
+ TV2DKIE,
+ TV2DKBornholmPlayIE,
+)
+from .tv2hu import (
+ TV2HuIE,
+ TV2HuSeriesIE,
+)
+from .tv4 import TV4IE
+from .tv5mondeplus import TV5MondePlusIE
+from .tv5unis import (
+ TV5UnisVideoIE,
+ TV5UnisIE,
+)
+from .tva import (
+ TVAIE,
+ QubIE,
+)
+from .tvanouvelles import (
+ TVANouvellesIE,
+ TVANouvellesArticleIE,
+)
+from .tvc import (
+ TVCIE,
+ TVCArticleIE,
+)
+from .tver import TVerIE
+from .tvigle import TvigleIE
+from .tviplayer import TVIPlayerIE
+from .tvland import TVLandIE
+from .tvn24 import TVN24IE
+from .tvnoe import TVNoeIE
+from .tvopengr import (
+ TVOpenGrWatchIE,
+ TVOpenGrEmbedIE,
+)
+from .tvp import (
+ TVPEmbedIE,
+ TVPIE,
+ TVPStreamIE,
+ TVPVODSeriesIE,
+ TVPVODVideoIE,
+)
+from .tvplay import (
+ TVPlayIE,
+ TVPlayHomeIE,
+)
+from .tvplayer import TVPlayerIE
+from .tweakers import TweakersIE
+from .twentymin import TwentyMinutenIE
+from .twentythreevideo import TwentyThreeVideoIE
+from .twitcasting import (
+ TwitCastingIE,
+ TwitCastingLiveIE,
+ TwitCastingUserIE,
+)
+from .twitch import (
+ TwitchVodIE,
+ TwitchCollectionIE,
+ TwitchVideosIE,
+ TwitchVideosClipsIE,
+ TwitchVideosCollectionsIE,
+ TwitchStreamIE,
+ TwitchClipsIE,
+)
+from .twitter import (
+ TwitterCardIE,
+ TwitterIE,
+ TwitterAmplifyIE,
+ TwitterBroadcastIE,
+ TwitterSpacesIE,
+ TwitterShortenerIE,
+)
+from .txxx import (
+ TxxxIE,
+ PornTopIE,
+)
+from .udemy import (
+ UdemyIE,
+ UdemyCourseIE
+)
+from .udn import UDNEmbedIE
+from .ufctv import (
+ UFCTVIE,
+ UFCArabiaIE,
+)
+from .ukcolumn import UkColumnIE
+from .uktvplay import UKTVPlayIE
+from .digiteka import DigitekaIE
+from .dlive import (
+ DLiveVODIE,
+ DLiveStreamIE,
+)
+from .drooble import DroobleIE
+from .umg import UMGDeIE
+from .unistra import UnistraIE
+from .unity import UnityIE
+from .unsupported import KnownDRMIE, KnownPiracyIE
+from .uol import UOLIE
+from .uplynk import (
+ UplynkIE,
+ UplynkPreplayIE,
+)
+from .urort import UrortIE
+from .urplay import URPlayIE
+from .usanetwork import USANetworkIE
+from .usatoday import USATodayIE
+from .ustream import UstreamIE, UstreamChannelIE
+from .ustudio import (
+ UstudioIE,
+ UstudioEmbedIE,
+)
+from .utreon import UtreonIE
+from .varzesh3 import Varzesh3IE
+from .vbox7 import Vbox7IE
+from .veo import VeoIE
+from .veoh import (
+ VeohIE,
+ VeohUserIE
+)
+from .vesti import VestiIE
+from .vevo import (
+ VevoIE,
+ VevoPlaylistIE,
+)
+from .vgtv import (
+ BTArticleIE,
+ BTVestlendingenIE,
+ VGTVIE,
+)
+from .vh1 import VH1IE
+from .vice import (
+ ViceIE,
+ ViceArticleIE,
+ ViceShowIE,
+)
+from .viddler import ViddlerIE
+from .videa import VideaIE
+from .videocampus_sachsen import (
+ VideocampusSachsenIE,
+ ViMPPlaylistIE,
+)
+from .videodetective import VideoDetectiveIE
+from .videofyme import VideofyMeIE
+from .videoken import (
+ VideoKenIE,
+ VideoKenPlayerIE,
+ VideoKenPlaylistIE,
+ VideoKenCategoryIE,
+ VideoKenTopicIE,
+)
+from .videomore import (
+ VideomoreIE,
+ VideomoreVideoIE,
+ VideomoreSeasonIE,
+)
+from .videopress import VideoPressIE
+from .vidio import (
+ VidioIE,
+ VidioPremierIE,
+ VidioLiveIE
+)
+from .vidlii import VidLiiIE
+from .vidly import VidlyIE
+from .viewlift import (
+ ViewLiftIE,
+ ViewLiftEmbedIE,
+)
+from .viidea import ViideaIE
+from .vimeo import (
+ VimeoIE,
+ VimeoAlbumIE,
+ VimeoChannelIE,
+ VimeoGroupsIE,
+ VimeoLikesIE,
+ VimeoOndemandIE,
+ VimeoProIE,
+ VimeoReviewIE,
+ VimeoUserIE,
+ VimeoWatchLaterIE,
+ VHXEmbedIE,
+)
+from .vimm import (
+ VimmIE,
+ VimmRecordingIE,
+)
+from .vine import (
+ VineIE,
+ VineUserIE,
+)
+from .viki import (
+ VikiIE,
+ VikiChannelIE,
+)
+from .viously import ViouslyIE
+from .viqeo import ViqeoIE
+from .viu import (
+ ViuIE,
+ ViuPlaylistIE,
+ ViuOTTIE,
+ ViuOTTIndonesiaIE,
+)
+from .vk import (
+ VKIE,
+ VKUserVideosIE,
+ VKWallPostIE,
+ VKPlayIE,
+ VKPlayLiveIE,
+)
+from .vocaroo import VocarooIE
+from .vodpl import VODPlIE
+from .vodplatform import VODPlatformIE
+from .voicy import (
+ VoicyIE,
+ VoicyChannelIE,
+)
+from .volejtv import VolejTVIE
+from .voot import (
+ VootIE,
+ VootSeriesIE,
+)
+from .voxmedia import (
+ VoxMediaVolumeIE,
+ VoxMediaIE,
+)
+from .vrt import (
+ VRTIE,
+ VrtNUIE,
+ KetnetIE,
+ DagelijkseKostIE,
+)
+from .vtm import VTMIE
+from .medialaan import MedialaanIE
+from .vuclip import VuClipIE
+from .vvvvid import (
+ VVVVIDIE,
+ VVVVIDShowIE,
+)
+from .walla import WallaIE
+from .washingtonpost import (
+ WashingtonPostIE,
+ WashingtonPostArticleIE,
+)
+from .wat import WatIE
+from .wdr import (
+ WDRIE,
+ WDRPageIE,
+ WDRElefantIE,
+ WDRMobileIE,
+)
+from .webcamerapl import WebcameraplIE
+from .webcaster import (
+ WebcasterIE,
+ WebcasterFeedIE,
+)
+from .webofstories import (
+ WebOfStoriesIE,
+ WebOfStoriesPlaylistIE,
+)
+from .weibo import (
+ WeiboIE,
+ WeiboVideoIE,
+ WeiboUserIE,
+)
+from .weiqitv import WeiqiTVIE
+from .weverse import (
+ WeverseIE,
+ WeverseMediaIE,
+ WeverseMomentIE,
+ WeverseLiveTabIE,
+ WeverseMediaTabIE,
+ WeverseLiveIE,
+)
+from .wevidi import WeVidiIE
+from .weyyak import WeyyakIE
+from .whyp import WhypIE
+from .wikimedia import WikimediaIE
+from .wimbledon import WimbledonIE
+from .wimtv import WimTVIE
+from .whowatch import WhoWatchIE
+from .wistia import (
+ WistiaIE,
+ WistiaPlaylistIE,
+ WistiaChannelIE,
+)
+from .wordpress import (
+ WordpressPlaylistEmbedIE,
+ WordpressMiniAudioPlayerEmbedIE,
+)
+from .worldstarhiphop import WorldStarHipHopIE
+from .wppilot import (
+ WPPilotIE,
+ WPPilotChannelsIE,
+)
+from .wrestleuniverse import (
+ WrestleUniverseVODIE,
+ WrestleUniversePPVIE,
+)
+from .wsj import (
+ WSJIE,
+ WSJArticleIE,
+)
+from .wwe import WWEIE
+from .wykop import (
+ WykopDigIE,
+ WykopDigCommentIE,
+ WykopPostIE,
+ WykopPostCommentIE,
+)
+from .xanimu import XanimuIE
+from .xboxclips import XboxClipsIE
+from .xfileshare import XFileShareIE
+from .xhamster import (
+ XHamsterIE,
+ XHamsterEmbedIE,
+ XHamsterUserIE,
+)
+from .ximalaya import (
+ XimalayaIE,
+ XimalayaAlbumIE
+)
+from .xinpianchang import XinpianchangIE
+from .xminus import XMinusIE
+from .xnxx import XNXXIE
+from .xstream import XstreamIE
+from .xvideos import (
+ XVideosIE,
+ XVideosQuickiesIE
+)
+from .xxxymovies import XXXYMoviesIE
+from .yahoo import (
+ YahooIE,
+ YahooSearchIE,
+ YahooJapanNewsIE,
+)
+from .yandexdisk import YandexDiskIE
+from .yandexmusic import (
+ YandexMusicTrackIE,
+ YandexMusicAlbumIE,
+ YandexMusicPlaylistIE,
+ YandexMusicArtistTracksIE,
+ YandexMusicArtistAlbumsIE,
+)
+from .yandexvideo import (
+ YandexVideoIE,
+ YandexVideoPreviewIE,
+ ZenYandexIE,
+ ZenYandexChannelIE,
+)
+from .yapfiles import YapFilesIE
+from .yappy import (
+ YappyIE,
+ YappyProfileIE,
+)
+from .yle_areena import YleAreenaIE
+from .youjizz import YouJizzIE
+from .youku import (
+ YoukuIE,
+ YoukuShowIE,
+)
+from .younow import (
+ YouNowLiveIE,
+ YouNowChannelIE,
+ YouNowMomentIE,
+)
+from .youporn import YouPornIE
+from .yourporn import YourPornIE
+from .yourupload import YourUploadIE
+from .zaiko import (
+ ZaikoIE,
+ ZaikoETicketIE,
+)
+from .zapiks import ZapiksIE
+from .zattoo import (
+ BBVTVIE,
+ BBVTVLiveIE,
+ BBVTVRecordingsIE,
+ EinsUndEinsTVIE,
+ EinsUndEinsTVLiveIE,
+ EinsUndEinsTVRecordingsIE,
+ EWETVIE,
+ EWETVLiveIE,
+ EWETVRecordingsIE,
+ GlattvisionTVIE,
+ GlattvisionTVLiveIE,
+ GlattvisionTVRecordingsIE,
+ MNetTVIE,
+ MNetTVLiveIE,
+ MNetTVRecordingsIE,
+ NetPlusTVIE,
+ NetPlusTVLiveIE,
+ NetPlusTVRecordingsIE,
+ OsnatelTVIE,
+ OsnatelTVLiveIE,
+ OsnatelTVRecordingsIE,
+ QuantumTVIE,
+ QuantumTVLiveIE,
+ QuantumTVRecordingsIE,
+ SaltTVIE,
+ SaltTVLiveIE,
+ SaltTVRecordingsIE,
+ SAKTVIE,
+ SAKTVLiveIE,
+ SAKTVRecordingsIE,
+ VTXTVIE,
+ VTXTVLiveIE,
+ VTXTVRecordingsIE,
+ WalyTVIE,
+ WalyTVLiveIE,
+ WalyTVRecordingsIE,
+ ZattooIE,
+ ZattooLiveIE,
+ ZattooMoviesIE,
+ ZattooRecordingsIE,
+)
+from .zdf import ZDFIE, ZDFChannelIE
+from .zee5 import (
+ Zee5IE,
+ Zee5SeriesIE,
+)
+from .zeenews import ZeeNewsIE
+from .zenporn import ZenPornIE
+from .zetland import ZetlandDKArticleIE
+from .zhihu import ZhihuIE
+from .zingmp3 import (
+ ZingMp3IE,
+ ZingMp3AlbumIE,
+ ZingMp3ChartHomeIE,
+ ZingMp3WeekChartIE,
+ ZingMp3ChartMusicVideoIE,
+ ZingMp3UserIE,
+ ZingMp3HubIE,
+ ZingMp3LiveRadioIE,
+ ZingMp3PodcastEpisodeIE,
+ ZingMp3PodcastIE,
+)
+from .zoom import ZoomIE
+from .zype import ZypeIE
diff --git a/yt_dlp/extractor/abc.py b/yt_dlp/extractor/abc.py
new file mode 100644
index 0000000..b217422
--- /dev/null
+++ b/yt_dlp/extractor/abc.py
@@ -0,0 +1,421 @@
+import hashlib
+import hmac
+import re
+import time
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ dict_get,
+ ExtractorError,
+ js_to_json,
+ int_or_none,
+ parse_iso8601,
+ str_or_none,
+ traverse_obj,
+ try_get,
+ unescapeHTML,
+ update_url_query,
+ url_or_none,
+)
+
+
+class ABCIE(InfoExtractor):
+ IE_NAME = 'abc.net.au'
+ _VALID_URL = r'https?://(?:www\.)?abc\.net\.au/(?:news|btn)/(?:[^/]+/){1,4}(?P<id>\d{5,})'
+
+ _TESTS = [{
+ 'url': 'http://www.abc.net.au/news/2014-11-05/australia-to-staff-ebola-treatment-centre-in-sierra-leone/5868334',
+ 'md5': 'cb3dd03b18455a661071ee1e28344d9f',
+ 'info_dict': {
+ 'id': '5868334',
+ 'ext': 'mp4',
+ 'title': 'Australia to help staff Ebola treatment centre in Sierra Leone',
+ 'description': 'md5:809ad29c67a05f54eb41f2a105693a67',
+ },
+ 'skip': 'this video has expired',
+ }, {
+ 'url': 'http://www.abc.net.au/news/2015-08-17/warren-entsch-introduces-same-sex-marriage-bill/6702326',
+ 'md5': '4ebd61bdc82d9a8b722f64f1f4b4d121',
+ 'info_dict': {
+ 'id': 'NvqvPeNZsHU',
+ 'ext': 'mp4',
+ 'upload_date': '20150816',
+ 'uploader': 'ABC News (Australia)',
+ 'description': 'Government backbencher Warren Entsch introduces a cross-party sponsored bill to legalise same-sex marriage, saying the bill is designed to promote "an inclusive Australia, not a divided one.". Read more here: http://ab.co/1Mwc6ef',
+ 'uploader_id': 'NewsOnABC',
+ 'title': 'Marriage Equality: Warren Entsch introduces same sex marriage bill',
+ },
+ 'add_ie': ['Youtube'],
+ 'skip': 'Not accessible from Travis CI server',
+ }, {
+ 'url': 'http://www.abc.net.au/news/2015-10-23/nab-lifts-interest-rates-following-westpac-and-cba/6880080',
+ 'md5': 'b96eee7c9edf4fc5a358a0252881cc1f',
+ 'info_dict': {
+ 'id': '6880080',
+ 'ext': 'mp3',
+ 'title': 'NAB lifts interest rates, following Westpac and CBA',
+ 'description': 'md5:f13d8edc81e462fce4a0437c7dc04728',
+ },
+ }, {
+ 'url': 'http://www.abc.net.au/news/2015-10-19/6866214',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.abc.net.au/btn/classroom/wwi-centenary/10527914',
+ 'info_dict': {
+ 'id': '10527914',
+ 'ext': 'mp4',
+ 'title': 'WWI Centenary',
+ 'description': 'md5:c2379ec0ca84072e86b446e536954546',
+ }
+ }, {
+ 'url': 'https://www.abc.net.au/news/programs/the-world/2020-06-10/black-lives-matter-protests-spawn-support-for/12342074',
+ 'info_dict': {
+ 'id': '12342074',
+ 'ext': 'mp4',
+ 'title': 'Black Lives Matter protests spawn support for Papuans in Indonesia',
+ 'description': 'md5:2961a17dc53abc558589ccd0fb8edd6f',
+ }
+ }, {
+ 'url': 'https://www.abc.net.au/btn/newsbreak/btn-newsbreak-20200814/12560476',
+ 'info_dict': {
+ 'id': 'tDL8Ld4dK_8',
+ 'ext': 'mp4',
+ 'title': 'Fortnite Banned From Apple and Google App Stores',
+ 'description': 'md5:a6df3f36ce8f816b74af4bd6462f5651',
+ 'upload_date': '20200813',
+ 'uploader': 'Behind the News',
+ 'uploader_id': 'behindthenews',
+ }
+ }, {
+ 'url': 'https://www.abc.net.au/news/2023-06-25/wagner-boss-orders-troops-back-to-bases-to-avoid-bloodshed/102520540',
+ 'info_dict': {
+ 'id': '102520540',
+ 'title': 'Wagner Group retreating from Russia, leader Prigozhin to move to Belarus',
+ 'ext': 'mp4',
+ 'description': 'Wagner troops leave Rostov-on-Don and\xa0Yevgeny Prigozhin will move to Belarus under a deal brokered by Belarusian President Alexander Lukashenko to end the mutiny.',
+ 'thumbnail': 'https://live-production.wcms.abc-cdn.net.au/0c170f5b57f0105c432f366c0e8e267b?impolicy=wcms_crop_resize&cropH=2813&cropW=5000&xPos=0&yPos=249&width=862&height=485',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ mobj = re.search(r'<a\s+href="(?P<url>[^"]+)"\s+data-duration="\d+"\s+title="Download audio directly">', webpage)
+ if mobj:
+ urls_info = mobj.groupdict()
+ youtube = False
+ video = False
+ else:
+ mobj = re.search(r'<a href="(?P<url>http://www\.youtube\.com/watch\?v=[^"]+)"><span><strong>External Link:</strong>',
+ webpage)
+ if mobj is None:
+ mobj = re.search(r'<iframe width="100%" src="(?P<url>//www\.youtube-nocookie\.com/embed/[^?"]+)', webpage)
+ if mobj:
+ urls_info = mobj.groupdict()
+ youtube = True
+ video = True
+
+ if mobj is None:
+ mobj = re.search(r'(?P<type>)"(?:sources|files|renditions)":\s*(?P<json_data>\[[^\]]+\])', webpage)
+ if mobj is None:
+ mobj = re.search(
+ r'inline(?P<type>Video|Audio|YouTube)Data\.push\((?P<json_data>[^)]+)\);',
+ webpage)
+ if mobj is None:
+ expired = self._html_search_regex(r'(?s)class="expired-(?:video|audio)".+?<span>(.+?)</span>', webpage, 'expired', None)
+ if expired:
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, expired), expected=True)
+ raise ExtractorError('Unable to extract video urls')
+
+ urls_info = self._parse_json(
+ mobj.group('json_data'), video_id, transform_source=js_to_json)
+ youtube = mobj.group('type') == 'YouTube'
+ video = mobj.group('type') == 'Video' or traverse_obj(
+ urls_info, (0, ('contentType', 'MIMEType')), get_all=False) == 'video/mp4'
+
+ if not isinstance(urls_info, list):
+ urls_info = [urls_info]
+
+ if youtube:
+ return self.playlist_result([
+ self.url_result(url_info['url']) for url_info in urls_info])
+
+ formats = []
+ for url_info in urls_info:
+ height = int_or_none(url_info.get('height'))
+ bitrate = int_or_none(url_info.get('bitrate'))
+ width = int_or_none(url_info.get('width'))
+ format_id = None
+ mobj = re.search(r'_(?:(?P<height>\d+)|(?P<bitrate>\d+)k)\.mp4$', url_info['url'])
+ if mobj:
+ height_from_url = mobj.group('height')
+ if height_from_url:
+ height = height or int_or_none(height_from_url)
+ width = width or int_or_none(url_info.get('label'))
+ else:
+ bitrate = bitrate or int_or_none(mobj.group('bitrate'))
+ format_id = str_or_none(url_info.get('label'))
+ formats.append({
+ 'url': url_info['url'],
+ 'vcodec': url_info.get('codec') if video else 'none',
+ 'width': width,
+ 'height': height,
+ 'tbr': bitrate,
+ 'filesize': int_or_none(url_info.get('filesize')),
+ 'format_id': format_id
+ })
+
+ return {
+ 'id': video_id,
+ 'title': self._og_search_title(webpage),
+ 'formats': formats,
+ 'description': self._og_search_description(webpage),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ }
+
+
+class ABCIViewIE(InfoExtractor):
+ IE_NAME = 'abc.net.au:iview'
+ _VALID_URL = r'https?://iview\.abc\.net\.au/(?:[^/]+/)*video/(?P<id>[^/?#]+)'
+ _GEO_COUNTRIES = ['AU']
+
+ _TESTS = [{
+ 'url': 'https://iview.abc.net.au/show/utopia/series/1/video/CO1211V001S00',
+ 'md5': '52a942bfd7a0b79a6bfe9b4ce6c9d0ed',
+ 'info_dict': {
+ 'id': 'CO1211V001S00',
+ 'ext': 'mp4',
+ 'title': 'Series 1 Ep 1 Wood For The Trees',
+ 'series': 'Utopia',
+ 'description': 'md5:0cfb2c183c1b952d1548fd65c8a95c00',
+ 'upload_date': '20230726',
+ 'uploader_id': 'abc1',
+ 'series_id': 'CO1211V',
+ 'episode_id': 'CO1211V001S00',
+ 'season_number': 1,
+ 'season': 'Season 1',
+ 'episode_number': 1,
+ 'episode': 'Wood For The Trees',
+ 'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/co/CO1211V001S00_5ad8353f4df09_1280.jpg',
+ 'timestamp': 1690403700,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'note': 'No episode name',
+ 'url': 'https://iview.abc.net.au/show/gruen/series/11/video/LE1927H001S00',
+ 'md5': '67715ce3c78426b11ba167d875ac6abf',
+ 'info_dict': {
+ 'id': 'LE1927H001S00',
+ 'ext': 'mp4',
+ 'title': 'Series 11 Ep 1',
+ 'series': 'Gruen',
+ 'description': 'md5:52cc744ad35045baf6aded2ce7287f67',
+ 'upload_date': '20190925',
+ 'uploader_id': 'abc1',
+ 'series_id': 'LE1927H',
+ 'episode_id': 'LE1927H001S00',
+ 'season_number': 11,
+ 'season': 'Season 11',
+ 'episode_number': 1,
+ 'episode': 'Episode 1',
+ 'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/le/LE1927H001S00_5d954fbd79e25_1280.jpg',
+ 'timestamp': 1569445289,
+ },
+ 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'note': 'No episode number',
+ 'url': 'https://iview.abc.net.au/show/four-corners/series/2022/video/NC2203H039S00',
+ 'md5': '77cb7d8434440e3b28fbebe331c2456a',
+ 'info_dict': {
+ 'id': 'NC2203H039S00',
+ 'ext': 'mp4',
+ 'title': 'Series 2022 Locking Up Kids',
+ 'series': 'Four Corners',
+ 'description': 'md5:54829ca108846d1a70e1fcce2853e720',
+ 'upload_date': '20221114',
+ 'uploader_id': 'abc1',
+ 'series_id': 'NC2203H',
+ 'episode_id': 'NC2203H039S00',
+ 'season_number': 2022,
+ 'season': 'Season 2022',
+ 'episode': 'Locking Up Kids',
+ 'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/nc/NC2203H039S00_636d8a0944a22_1920.jpg',
+ 'timestamp': 1668460497,
+
+ },
+ 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'note': 'No episode name or number',
+ 'url': 'https://iview.abc.net.au/show/landline/series/2021/video/RF2004Q043S00',
+ 'md5': '2e17dec06b13cc81dc119d2565289396',
+ 'info_dict': {
+ 'id': 'RF2004Q043S00',
+ 'ext': 'mp4',
+ 'title': 'Series 2021',
+ 'series': 'Landline',
+ 'description': 'md5:c9f30d9c0c914a7fd23842f6240be014',
+ 'upload_date': '20211205',
+ 'uploader_id': 'abc1',
+ 'series_id': 'RF2004Q',
+ 'episode_id': 'RF2004Q043S00',
+ 'season_number': 2021,
+ 'season': 'Season 2021',
+ 'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/rf/RF2004Q043S00_61a950639dbc0_1920.jpg',
+ 'timestamp': 1638710705,
+
+ },
+ 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video_params = self._download_json(
+ 'https://iview.abc.net.au/api/programs/' + video_id, video_id)
+ title = unescapeHTML(video_params.get('title') or video_params['seriesTitle'])
+ stream = next(s for s in video_params['playlist'] if s.get('type') in ('program', 'livestream'))
+
+ house_number = video_params.get('episodeHouseNumber') or video_id
+ path = '/auth/hls/sign?ts={0}&hn={1}&d=android-tablet'.format(
+ int(time.time()), house_number)
+ sig = hmac.new(
+ b'android.content.res.Resources',
+ path.encode('utf-8'), hashlib.sha256).hexdigest()
+ token = self._download_webpage(
+ 'http://iview.abc.net.au{0}&sig={1}'.format(path, sig), video_id)
+
+ def tokenize_url(url, token):
+ return update_url_query(url, {
+ 'hdnea': token,
+ })
+
+ for sd in ('1080', '720', 'sd', 'sd-low'):
+ sd_url = try_get(
+ stream, lambda x: x['streams']['hls'][sd], compat_str)
+ if not sd_url:
+ continue
+ formats = self._extract_m3u8_formats(
+ tokenize_url(sd_url, token), video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
+ if formats:
+ break
+
+ subtitles = {}
+ src_vtt = stream.get('captions', {}).get('src-vtt')
+ if src_vtt:
+ subtitles['en'] = [{
+ 'url': src_vtt,
+ 'ext': 'vtt',
+ }]
+
+ is_live = video_params.get('livestream') == '1'
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video_params.get('description'),
+ 'thumbnail': video_params.get('thumbnail'),
+ 'duration': int_or_none(video_params.get('eventDuration')),
+ 'timestamp': parse_iso8601(video_params.get('pubDate'), ' '),
+ 'series': unescapeHTML(video_params.get('seriesTitle')),
+ 'series_id': video_params.get('seriesHouseNumber') or video_id[:7],
+ 'season_number': int_or_none(self._search_regex(
+ r'\bSeries\s+(\d+)\b', title, 'season number', default=None)),
+ 'episode_number': int_or_none(self._search_regex(
+ r'\bEp\s+(\d+)\b', title, 'episode number', default=None)),
+ 'episode_id': house_number,
+ 'episode': self._search_regex(
+ r'^(?:Series\s+\d+)?\s*(?:Ep\s+\d+)?\s*(.*)$', title, 'episode', default='') or None,
+ 'uploader_id': video_params.get('channel'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'is_live': is_live,
+ }
+
+
+class ABCIViewShowSeriesIE(InfoExtractor):
+ IE_NAME = 'abc.net.au:iview:showseries'
+ _VALID_URL = r'https?://iview\.abc\.net\.au/show/(?P<id>[^/]+)(?:/series/\d+)?$'
+ _GEO_COUNTRIES = ['AU']
+
+ _TESTS = [{
+ 'url': 'https://iview.abc.net.au/show/upper-middle-bogan',
+ 'info_dict': {
+ 'id': '124870-1',
+ 'title': 'Series 1',
+ 'description': 'md5:93119346c24a7c322d446d8eece430ff',
+ 'series': 'Upper Middle Bogan',
+ 'season': 'Series 1',
+ 'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$'
+ },
+ 'playlist_count': 8,
+ }, {
+ 'url': 'https://iview.abc.net.au/show/upper-middle-bogan',
+ 'info_dict': {
+ 'id': 'CO1108V001S00',
+ 'ext': 'mp4',
+ 'title': 'Series 1 Ep 1 I\'m A Swan',
+ 'description': 'md5:7b676758c1de11a30b79b4d301e8da93',
+ 'series': 'Upper Middle Bogan',
+ 'uploader_id': 'abc1',
+ 'upload_date': '20210630',
+ 'timestamp': 1625036400,
+ },
+ 'params': {
+ 'noplaylist': True,
+ 'skip_download': 'm3u8',
+ },
+ }, {
+ # 'videoEpisodes' is a dict with `items` key
+ 'url': 'https://iview.abc.net.au/show/7-30-mark-humphries-satire',
+ 'info_dict': {
+ 'id': '178458-0',
+ 'title': 'Episodes',
+ 'description': 'Satirist Mark Humphries brings his unique perspective on current political events for 7.30.',
+ 'series': '7.30 Mark Humphries Satire',
+ 'season': 'Episodes',
+ 'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$'
+ },
+ 'playlist_count': 15,
+ }]
+
+ def _real_extract(self, url):
+ show_id = self._match_id(url)
+ webpage = self._download_webpage(url, show_id)
+ webpage_data = self._search_regex(
+ r'window\.__INITIAL_STATE__\s*=\s*[\'"](.+?)[\'"]\s*;',
+ webpage, 'initial state')
+ video_data = self._parse_json(
+ unescapeHTML(webpage_data).encode('utf-8').decode('unicode_escape'), show_id)
+ video_data = video_data['route']['pageData']['_embedded']
+
+ highlight = try_get(video_data, lambda x: x['highlightVideo']['shareUrl'])
+ if not self._yes_playlist(show_id, bool(highlight), video_label='highlight video'):
+ return self.url_result(highlight, ie=ABCIViewIE.ie_key())
+
+ series = video_data['selectedSeries']
+ return {
+ '_type': 'playlist',
+ 'entries': [self.url_result(episode_url, ABCIViewIE)
+ for episode_url in traverse_obj(series, (
+ '_embedded', 'videoEpisodes', (None, 'items'), ..., 'shareUrl', {url_or_none}))],
+ 'id': series.get('id'),
+ 'title': dict_get(series, ('title', 'displaySubtitle')),
+ 'description': series.get('description'),
+ 'series': dict_get(series, ('showTitle', 'displayTitle')),
+ 'season': dict_get(series, ('title', 'displaySubtitle')),
+ 'thumbnail': traverse_obj(
+ series, 'thumbnail', ('images', lambda _, v: v['name'] == 'seriesThumbnail', 'url'), get_all=False),
+ }
diff --git a/yt_dlp/extractor/abcnews.py b/yt_dlp/extractor/abcnews.py
new file mode 100644
index 0000000..a57295b
--- /dev/null
+++ b/yt_dlp/extractor/abcnews.py
@@ -0,0 +1,153 @@
+from .amp import AMPIE
+from .common import InfoExtractor
+from ..utils import (
+ parse_duration,
+ parse_iso8601,
+ try_get,
+)
+
+
+class AbcNewsVideoIE(AMPIE):
+ IE_NAME = 'abcnews:video'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ abcnews\.go\.com/
+ (?:
+ (?:[^/]+/)*video/(?P<display_id>[0-9a-z-]+)-|
+ video/(?:embed|itemfeed)\?.*?\bid=
+ )|
+ fivethirtyeight\.abcnews\.go\.com/video/embed/\d+/
+ )
+ (?P<id>\d+)
+ '''
+
+ _TESTS = [{
+ 'url': 'http://abcnews.go.com/ThisWeek/video/week-exclusive-irans-foreign-minister-zarif-20411932',
+ 'info_dict': {
+ 'id': '20411932',
+ 'ext': 'mp4',
+ 'display_id': 'week-exclusive-irans-foreign-minister-zarif',
+ 'title': '\'This Week\' Exclusive: Iran\'s Foreign Minister Zarif',
+ 'description': 'George Stephanopoulos goes one-on-one with Iranian Foreign Minister Dr. Javad Zarif.',
+ 'duration': 180,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1380454200,
+ 'upload_date': '20130929',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://abcnews.go.com/video/embed?id=46979033',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://abcnews.go.com/video/itemfeed?id=46979033',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://abcnews.go.com/GMA/News/video/history-christmas-story-67894761',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ display_id = mobj.group('display_id')
+ video_id = mobj.group('id')
+ info_dict = self._extract_feed_info(
+ 'http://abcnews.go.com/video/itemfeed?id=%s' % video_id)
+ info_dict.update({
+ 'id': video_id,
+ 'display_id': display_id,
+ })
+ return info_dict
+
+
+class AbcNewsIE(InfoExtractor):
+ IE_NAME = 'abcnews'
+ _VALID_URL = r'https?://abcnews\.go\.com/(?:[^/]+/)+(?P<display_id>[0-9a-z-]+)/story\?id=(?P<id>\d+)'
+
+ _TESTS = [{
+ # Youtube Embeds
+ 'url': 'https://abcnews.go.com/Entertainment/peter-billingsley-child-actor-christmas-story-hollywood-power/story?id=51286501',
+ 'info_dict': {
+ 'id': '51286501',
+ 'title': "Peter Billingsley: From child actor in 'A Christmas Story' to Hollywood power player",
+ 'description': 'Billingsley went from a child actor to Hollywood power player.',
+ },
+ 'playlist_count': 5,
+ }, {
+ 'url': 'http://abcnews.go.com/Entertainment/justin-timberlake-performs-stop-feeling-eurovision-2016/story?id=39125818',
+ 'info_dict': {
+ 'id': '38897857',
+ 'ext': 'mp4',
+ 'title': 'Justin Timberlake Drops Hints For Secret Single',
+ 'description': 'Lara Spencer reports the buzziest stories of the day in "GMA" Pop News.',
+ 'upload_date': '20160505',
+ 'timestamp': 1462442280,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ # The embedded YouTube video is blocked due to copyright issues
+ 'playlist_items': '1',
+ },
+ 'add_ie': ['AbcNewsVideo'],
+ }, {
+ 'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343',
+ 'only_matching': True,
+ }, {
+ # inline.type == 'video'
+ 'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ story_id = self._match_id(url)
+ webpage = self._download_webpage(url, story_id)
+ story = self._parse_json(self._search_regex(
+ r"window\['__abcnews__'\]\s*=\s*({.+?});",
+ webpage, 'data'), story_id)['page']['content']['story']['everscroll'][0]
+ article_contents = story.get('articleContents') or {}
+
+ def entries():
+ featured_video = story.get('featuredVideo') or {}
+ feed = try_get(featured_video, lambda x: x['video']['feed'])
+ if feed:
+ yield {
+ '_type': 'url',
+ 'id': featured_video.get('id'),
+ 'title': featured_video.get('name'),
+ 'url': feed,
+ 'thumbnail': featured_video.get('images'),
+ 'description': featured_video.get('description'),
+ 'timestamp': parse_iso8601(featured_video.get('uploadDate')),
+ 'duration': parse_duration(featured_video.get('duration')),
+ 'ie_key': AbcNewsVideoIE.ie_key(),
+ }
+
+ for inline in (article_contents.get('inlines') or []):
+ inline_type = inline.get('type')
+ if inline_type == 'iframe':
+ iframe_url = try_get(inline, lambda x: x['attrs']['src'])
+ if iframe_url:
+ yield self.url_result(iframe_url)
+ elif inline_type == 'video':
+ video_id = inline.get('id')
+ if video_id:
+ yield {
+ '_type': 'url',
+ 'id': video_id,
+ 'url': 'http://abcnews.go.com/video/embed?id=' + video_id,
+ 'thumbnail': inline.get('imgSrc') or inline.get('imgDefault'),
+ 'description': inline.get('description'),
+ 'duration': parse_duration(inline.get('duration')),
+ 'ie_key': AbcNewsVideoIE.ie_key(),
+ }
+
+ return self.playlist_result(
+ entries(), story_id, article_contents.get('headline'),
+ article_contents.get('subHead'))
diff --git a/yt_dlp/extractor/abcotvs.py b/yt_dlp/extractor/abcotvs.py
new file mode 100644
index 0000000..6dca19d
--- /dev/null
+++ b/yt_dlp/extractor/abcotvs.py
@@ -0,0 +1,130 @@
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ dict_get,
+ int_or_none,
+ try_get,
+)
+
+
+class ABCOTVSIE(InfoExtractor):
+ IE_NAME = 'abcotvs'
+ IE_DESC = 'ABC Owned Television Stations'
+ _VALID_URL = r'https?://(?P<site>abc(?:7(?:news|ny|chicago)?|11|13|30)|6abc)\.com(?:(?:/[^/]+)*/(?P<display_id>[^/]+))?/(?P<id>\d+)'
+ _TESTS = [
+ {
+ 'url': 'http://abc7news.com/entertainment/east-bay-museum-celebrates-vintage-synthesizers/472581/',
+ 'info_dict': {
+ 'id': '472548',
+ 'display_id': 'east-bay-museum-celebrates-vintage-synthesizers',
+ 'ext': 'mp4',
+ 'title': 'East Bay museum celebrates synthesized music',
+ 'description': 'md5:24ed2bd527096ec2a5c67b9d5a9005f3',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1421118520,
+ 'upload_date': '20150113',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://abc7news.com/472581',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://6abc.com/man-75-killed-after-being-struck-by-vehicle-in-chester/5725182/',
+ 'only_matching': True,
+ },
+ ]
+ _SITE_MAP = {
+ '6abc': 'wpvi',
+ 'abc11': 'wtvd',
+ 'abc13': 'ktrk',
+ 'abc30': 'kfsn',
+ 'abc7': 'kabc',
+ 'abc7chicago': 'wls',
+ 'abc7news': 'kgo',
+ 'abc7ny': 'wabc',
+ }
+
+ def _real_extract(self, url):
+ site, display_id, video_id = self._match_valid_url(url).groups()
+ display_id = display_id or video_id
+ station = self._SITE_MAP[site]
+
+ data = self._download_json(
+ 'https://api.abcotvs.com/v2/content', display_id, query={
+ 'id': video_id,
+ 'key': 'otv.web.%s.story' % station,
+ 'station': station,
+ })['data']
+ video = try_get(data, lambda x: x['featuredMedia']['video'], dict) or data
+ video_id = compat_str(dict_get(video, ('id', 'publishedKey'), video_id))
+ title = video.get('title') or video['linkText']
+
+ formats = []
+ m3u8_url = video.get('m3u8')
+ if m3u8_url:
+ formats = self._extract_m3u8_formats(
+ video['m3u8'].split('?')[0], display_id, 'mp4', m3u8_id='hls', fatal=False)
+ mp4_url = video.get('mp4')
+ if mp4_url:
+ formats.append({
+ 'abr': 128,
+ 'format_id': 'https',
+ 'height': 360,
+ 'url': mp4_url,
+ 'width': 640,
+ })
+
+ image = video.get('image') or {}
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': dict_get(video, ('description', 'caption'), try_get(video, lambda x: x['meta']['description'])),
+ 'thumbnail': dict_get(image, ('source', 'dynamicSource')),
+ 'timestamp': int_or_none(video.get('date')),
+ 'duration': int_or_none(video.get('length')),
+ 'formats': formats,
+ }
+
+
+class ABCOTVSClipsIE(InfoExtractor):
+ IE_NAME = 'abcotvs:clips'
+ _VALID_URL = r'https?://clips\.abcotvs\.com/(?:[^/]+/)*video/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://clips.abcotvs.com/kabc/video/214814',
+ 'info_dict': {
+ 'id': '214814',
+ 'ext': 'mp4',
+ 'title': 'SpaceX launch pad explosion destroys rocket, satellite',
+ 'description': 'md5:9f186e5ad8f490f65409965ee9c7be1b',
+ 'upload_date': '20160901',
+ 'timestamp': 1472756695,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video_data = self._download_json('https://clips.abcotvs.com/vogo/video/getByIds?ids=' + video_id, video_id)['results'][0]
+ title = video_data['title']
+ formats = self._extract_m3u8_formats(
+ video_data['videoURL'].split('?')[0], video_id, 'mp4')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video_data.get('description'),
+ 'thumbnail': video_data.get('thumbnailURL'),
+ 'duration': int_or_none(video_data.get('duration')),
+ 'timestamp': int_or_none(video_data.get('pubDate')),
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/abematv.py b/yt_dlp/extractor/abematv.py
new file mode 100644
index 0000000..fee7375
--- /dev/null
+++ b/yt_dlp/extractor/abematv.py
@@ -0,0 +1,484 @@
+import base64
+import binascii
+import functools
+import hashlib
+import hmac
+import io
+import json
+import re
+import struct
+import time
+import urllib.parse
+import urllib.request
+import urllib.response
+import uuid
+from ..utils.networking import clean_proxies
+from .common import InfoExtractor
+from ..aes import aes_ecb_decrypt
+from ..utils import (
+ ExtractorError,
+ bytes_to_intlist,
+ decode_base_n,
+ int_or_none,
+ intlist_to_bytes,
+ OnDemandPagedList,
+ time_seconds,
+ traverse_obj,
+ update_url_query,
+)
+
+
+def add_opener(ydl, handler): # FIXME: Create proper API in .networking
+ """Add a handler for opening URLs, like _download_webpage"""
+ # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
+ # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
+ rh = ydl._request_director.handlers['Urllib']
+ if 'abematv-license' in rh._SUPPORTED_URL_SCHEMES:
+ return
+ headers = ydl.params['http_headers'].copy()
+ proxies = ydl.proxies.copy()
+ clean_proxies(proxies, headers)
+ opener = rh._get_instance(cookiejar=ydl.cookiejar, proxies=proxies)
+ assert isinstance(opener, urllib.request.OpenerDirector)
+ opener.add_handler(handler)
+ rh._SUPPORTED_URL_SCHEMES = (*rh._SUPPORTED_URL_SCHEMES, 'abematv-license')
+
+
+class AbemaLicenseHandler(urllib.request.BaseHandler):
+ handler_order = 499
+ STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
+ HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E'
+
+ def __init__(self, ie: 'AbemaTVIE'):
+ # the protocol that this should really handle is 'abematv-license://'
+ # abematv_license_open is just a placeholder for development purposes
+ # ref. https://github.com/python/cpython/blob/f4c03484da59049eb62a9bf7777b963e2267d187/Lib/urllib/request.py#L510
+ setattr(self, 'abematv-license_open', getattr(self, 'abematv_license_open', None))
+ self.ie = ie
+
+ def _get_videokey_from_ticket(self, ticket):
+ to_show = self.ie.get_param('verbose', False)
+ media_token = self.ie._get_media_token(to_show=to_show)
+
+ license_response = self.ie._download_json(
+ 'https://license.abema.io/abematv-hls', None, note='Requesting playback license' if to_show else False,
+ query={'t': media_token},
+ data=json.dumps({
+ 'kv': 'a',
+ 'lt': ticket
+ }).encode('utf-8'),
+ headers={
+ 'Content-Type': 'application/json',
+ })
+
+ res = decode_base_n(license_response['k'], table=self.STRTABLE)
+ encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff))
+
+ h = hmac.new(
+ binascii.unhexlify(self.HKEY),
+ (license_response['cid'] + self.ie._DEVICE_ID).encode('utf-8'),
+ digestmod=hashlib.sha256)
+ enckey = bytes_to_intlist(h.digest())
+
+ return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey))
+
+ def abematv_license_open(self, url):
+ url = url.get_full_url() if isinstance(url, urllib.request.Request) else url
+ ticket = urllib.parse.urlparse(url).netloc
+ response_data = self._get_videokey_from_ticket(ticket)
+ return urllib.response.addinfourl(io.BytesIO(response_data), headers={
+ 'Content-Length': str(len(response_data)),
+ }, url=url, code=200)
+
+
+class AbemaTVBaseIE(InfoExtractor):
+ _NETRC_MACHINE = 'abematv'
+
+ _USERTOKEN = None
+ _DEVICE_ID = None
+ _MEDIATOKEN = None
+
+ _SECRETKEY = b'v+Gjs=25Aw5erR!J8ZuvRrCx*rGswhB&qdHd_SYerEWdU&a?3DzN9BRbp5KwY4hEmcj5#fykMjJ=AuWz5GSMY-d@H7DMEh3M@9n2G552Us$$k9cD=3TxwWe86!x#Zyhe'
+
+ @classmethod
+ def _generate_aks(cls, deviceid):
+ deviceid = deviceid.encode('utf-8')
+ # add 1 hour and then drop minute and secs
+ ts_1hour = int((time_seconds() // 3600 + 1) * 3600)
+ time_struct = time.gmtime(ts_1hour)
+ ts_1hour_str = str(ts_1hour).encode('utf-8')
+
+ tmp = None
+
+ def mix_once(nonce):
+ nonlocal tmp
+ h = hmac.new(cls._SECRETKEY, digestmod=hashlib.sha256)
+ h.update(nonce)
+ tmp = h.digest()
+
+ def mix_tmp(count):
+ nonlocal tmp
+ for i in range(count):
+ mix_once(tmp)
+
+ def mix_twist(nonce):
+ nonlocal tmp
+ mix_once(base64.urlsafe_b64encode(tmp).rstrip(b'=') + nonce)
+
+ mix_once(cls._SECRETKEY)
+ mix_tmp(time_struct.tm_mon)
+ mix_twist(deviceid)
+ mix_tmp(time_struct.tm_mday % 5)
+ mix_twist(ts_1hour_str)
+ mix_tmp(time_struct.tm_hour % 5)
+
+ return base64.urlsafe_b64encode(tmp).rstrip(b'=').decode('utf-8')
+
+ def _get_device_token(self):
+ if self._USERTOKEN:
+ return self._USERTOKEN
+
+ add_opener(self._downloader, AbemaLicenseHandler(self))
+
+ username, _ = self._get_login_info()
+ auth_cache = username and self.cache.load(self._NETRC_MACHINE, username, min_ver='2024.01.19')
+ AbemaTVBaseIE._USERTOKEN = auth_cache and auth_cache.get('usertoken')
+ if AbemaTVBaseIE._USERTOKEN:
+ # try authentication with locally stored token
+ try:
+ AbemaTVBaseIE._DEVICE_ID = auth_cache.get('device_id')
+ self._get_media_token(True)
+ return
+ except ExtractorError as e:
+ self.report_warning(f'Failed to login with cached user token; obtaining a fresh one ({e})')
+
+ AbemaTVBaseIE._DEVICE_ID = str(uuid.uuid4())
+ aks = self._generate_aks(self._DEVICE_ID)
+ user_data = self._download_json(
+ 'https://api.abema.io/v1/users', None, note='Authorizing',
+ data=json.dumps({
+ 'deviceId': self._DEVICE_ID,
+ 'applicationKeySecret': aks,
+ }).encode('utf-8'),
+ headers={
+ 'Content-Type': 'application/json',
+ })
+ AbemaTVBaseIE._USERTOKEN = user_data['token']
+
+ return self._USERTOKEN
+
+ def _get_media_token(self, invalidate=False, to_show=True):
+ if not invalidate and self._MEDIATOKEN:
+ return self._MEDIATOKEN
+
+ AbemaTVBaseIE._MEDIATOKEN = self._download_json(
+ 'https://api.abema.io/v1/media/token', None, note='Fetching media token' if to_show else False,
+ query={
+ 'osName': 'android',
+ 'osVersion': '6.0.1',
+ 'osLang': 'ja_JP',
+ 'osTimezone': 'Asia/Tokyo',
+ 'appId': 'tv.abema',
+ 'appVersion': '3.27.1'
+ }, headers={
+ 'Authorization': f'bearer {self._get_device_token()}',
+ })['token']
+
+ return self._MEDIATOKEN
+
+ def _perform_login(self, username, password):
+ self._get_device_token()
+ if self.cache.load(self._NETRC_MACHINE, username, min_ver='2024.01.19') and self._get_media_token():
+ self.write_debug('Skipping logging in')
+ return
+
+ if '@' in username: # don't strictly check if it's email address or not
+ ep, method = 'user/email', 'email'
+ else:
+ ep, method = 'oneTimePassword', 'userId'
+
+ login_response = self._download_json(
+ f'https://api.abema.io/v1/auth/{ep}', None, note='Logging in',
+ data=json.dumps({
+ method: username,
+ 'password': password
+ }).encode('utf-8'), headers={
+ 'Authorization': f'bearer {self._get_device_token()}',
+ 'Origin': 'https://abema.tv',
+ 'Referer': 'https://abema.tv/',
+ 'Content-Type': 'application/json',
+ })
+
+ AbemaTVBaseIE._USERTOKEN = login_response['token']
+ self._get_media_token(True)
+ auth_cache = {
+ 'device_id': AbemaTVBaseIE._DEVICE_ID,
+ 'usertoken': AbemaTVBaseIE._USERTOKEN,
+ }
+ self.cache.store(self._NETRC_MACHINE, username, auth_cache)
+
+ def _call_api(self, endpoint, video_id, query=None, note='Downloading JSON metadata'):
+ return self._download_json(
+ f'https://api.abema.io/{endpoint}', video_id, query=query or {},
+ note=note,
+ headers={
+ 'Authorization': f'bearer {self._get_device_token()}',
+ })
+
+ def _extract_breadcrumb_list(self, webpage, video_id):
+ for jld in re.finditer(
+ r'(?is)</span></li></ul><script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
+ webpage):
+ jsonld = self._parse_json(jld.group('json_ld'), video_id, fatal=False)
+ if traverse_obj(jsonld, '@type') != 'BreadcrumbList':
+ continue
+ items = traverse_obj(jsonld, ('itemListElement', ..., 'name'))
+ if items:
+ return items
+ return []
+
+
+class AbemaTVIE(AbemaTVBaseIE):
+ _VALID_URL = r'https?://abema\.tv/(?P<type>now-on-air|video/episode|channels/.+?/slots)/(?P<id>[^?/]+)'
+ _TESTS = [{
+ 'url': 'https://abema.tv/video/episode/194-25_s2_p1',
+ 'info_dict': {
+ 'id': '194-25_s2_p1',
+ 'title': '第1話 「チーズケーキ」 「モーニング再び」',
+ 'series': '異世界食堂2',
+ 'season': 'シーズン2',
+ 'season_number': 2,
+ 'episode': '第1話 「チーズケーキ」 「モーニング再び」',
+ 'episode_number': 1,
+ },
+ 'skip': 'expired',
+ }, {
+ 'url': 'https://abema.tv/channels/anime-live2/slots/E8tvAnMJ7a9a5d',
+ 'info_dict': {
+ 'id': 'E8tvAnMJ7a9a5d',
+ 'title': 'ゆるキャン△ SEASON2 全話一挙【無料ビデオ72時間】',
+ 'series': 'ゆるキャン△ SEASON2',
+ 'episode': 'ゆるキャン△ SEASON2 全話一挙【無料ビデオ72時間】',
+ 'season_number': 2,
+ 'episode_number': 1,
+ 'description': 'md5:9c5a3172ae763278f9303922f0ea5b17',
+ },
+ 'skip': 'expired',
+ }, {
+ 'url': 'https://abema.tv/video/episode/87-877_s1282_p31047',
+ 'info_dict': {
+ 'id': 'E8tvAnMJ7a9a5d',
+ 'title': '第5話『光射す』',
+ 'description': 'md5:56d4fc1b4f7769ded5f923c55bb4695d',
+ 'thumbnail': r're:https://hayabusa\.io/.+',
+ 'series': '相棒',
+ 'episode': '第5話『光射す』',
+ },
+ 'skip': 'expired',
+ }, {
+ 'url': 'https://abema.tv/now-on-air/abema-anime',
+ 'info_dict': {
+ 'id': 'abema-anime',
+ # this varies
+ # 'title': '女子高生の無駄づかい 全話一挙【無料ビデオ72時間】',
+ 'description': 'md5:55f2e61f46a17e9230802d7bcc913d5f',
+ 'is_live': True,
+ },
+ 'skip': 'Not supported until yt-dlp implements native live downloader OR AbemaTV can start a local HTTP server',
+ }]
+ _TIMETABLE = None
+
+ def _real_extract(self, url):
+ # starting download using infojson from this extractor is undefined behavior,
+ # and never be fixed in the future; you must trigger downloads by directly specifying URL.
+ # (unless there's a way to hook before downloading by extractor)
+ video_id, video_type = self._match_valid_url(url).group('id', 'type')
+ headers = {
+ 'Authorization': 'Bearer ' + self._get_device_token(),
+ }
+ video_type = video_type.split('/')[-1]
+
+ webpage = self._download_webpage(url, video_id)
+ canonical_url = self._search_regex(
+ r'<link\s+rel="canonical"\s*href="(.+?)"', webpage, 'canonical URL',
+ default=url)
+ info = self._search_json_ld(webpage, video_id, default={})
+
+ title = self._search_regex(
+ r'<span\s*class=".+?EpisodeTitleBlock__title">(.+?)</span>', webpage, 'title', default=None)
+ if not title:
+ jsonld = None
+ for jld in re.finditer(
+ r'(?is)<span\s*class="com-m-Thumbnail__image">(?:</span>)?<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
+ webpage):
+ jsonld = self._parse_json(jld.group('json_ld'), video_id, fatal=False)
+ if jsonld:
+ break
+ if jsonld:
+ title = jsonld.get('caption')
+ if not title and video_type == 'now-on-air':
+ if not self._TIMETABLE:
+ # cache the timetable because it goes to 5MiB in size (!!)
+ self._TIMETABLE = self._download_json(
+ 'https://api.abema.io/v1/timetable/dataSet?debug=false', video_id,
+ headers=headers)
+ now = time_seconds(hours=9)
+ for slot in self._TIMETABLE.get('slots', []):
+ if slot.get('channelId') != video_id:
+ continue
+ if slot['startAt'] <= now and now < slot['endAt']:
+ title = slot['title']
+ break
+
+ # read breadcrumb on top of page
+ breadcrumb = self._extract_breadcrumb_list(webpage, video_id)
+ if breadcrumb:
+ # breadcrumb list translates to: (e.g. 1st test for this IE)
+ # Home > Anime (genre) > Isekai Shokudo 2 (series name) > Episode 1 "Cheese cakes" "Morning again" (episode title)
+ # hence this works
+ info['series'] = breadcrumb[-2]
+ info['episode'] = breadcrumb[-1]
+ if not title:
+ title = info['episode']
+
+ description = self._html_search_regex(
+ (r'<p\s+class="com-video-EpisodeDetailsBlock__content"><span\s+class=".+?">(.+?)</span></p><div',
+ r'<span\s+class=".+?SlotSummary.+?">(.+?)</span></div><div',),
+ webpage, 'description', default=None, group=1)
+ if not description:
+ og_desc = self._html_search_meta(
+ ('description', 'og:description', 'twitter:description'), webpage)
+ if og_desc:
+ description = re.sub(r'''(?sx)
+ ^(.+?)(?:
+ アニメの動画を無料で見るならABEMA!| # anime
+ 等、.+ # applies for most of categories
+ )?
+ ''', r'\1', og_desc)
+
+ # canonical URL may contain season and episode number
+ mobj = re.search(r's(\d+)_p(\d+)$', canonical_url)
+ if mobj:
+ seri = int_or_none(mobj.group(1), default=float('inf'))
+ epis = int_or_none(mobj.group(2), default=float('inf'))
+ info['season_number'] = seri if seri < 100 else None
+ # some anime like Detective Conan (though not available in AbemaTV)
+ # has more than 1000 episodes (1026 as of 2021/11/15)
+ info['episode_number'] = epis if epis < 2000 else None
+
+ is_live, m3u8_url = False, None
+ if video_type == 'now-on-air':
+ is_live = True
+ channel_url = 'https://api.abema.io/v1/channels'
+ if video_id == 'news-global':
+ channel_url = update_url_query(channel_url, {'division': '1'})
+ onair_channels = self._download_json(channel_url, video_id)
+ for ch in onair_channels['channels']:
+ if video_id == ch['id']:
+ m3u8_url = ch['playback']['hls']
+ break
+ else:
+ raise ExtractorError(f'Cannot find on-air {video_id} channel.', expected=True)
+ elif video_type == 'episode':
+ api_response = self._download_json(
+ f'https://api.abema.io/v1/video/programs/{video_id}', video_id,
+ note='Checking playability',
+ headers=headers)
+ ondemand_types = traverse_obj(api_response, ('terms', ..., 'onDemandType'))
+ if 3 not in ondemand_types:
+ # cannot acquire decryption key for these streams
+ self.report_warning('This is a premium-only stream')
+ info.update(traverse_obj(api_response, {
+ 'series': ('series', 'title'),
+ 'season': ('season', 'name'),
+ 'season_number': ('season', 'sequence'),
+ 'episode_number': ('episode', 'number'),
+ }))
+ if not title:
+ title = traverse_obj(api_response, ('episode', 'title'))
+ if not description:
+ description = traverse_obj(api_response, ('episode', 'content'))
+
+ m3u8_url = f'https://vod-abematv.akamaized.net/program/{video_id}/playlist.m3u8'
+ elif video_type == 'slots':
+ api_response = self._download_json(
+ f'https://api.abema.io/v1/media/slots/{video_id}', video_id,
+ note='Checking playability',
+ headers=headers)
+ if not traverse_obj(api_response, ('slot', 'flags', 'timeshiftFree'), default=False):
+ self.report_warning('This is a premium-only stream')
+
+ m3u8_url = f'https://vod-abematv.akamaized.net/slot/{video_id}/playlist.m3u8'
+ else:
+ raise ExtractorError('Unreachable')
+
+ if is_live:
+ self.report_warning("This is a livestream; yt-dlp doesn't support downloading natively, but FFmpeg cannot handle m3u8 manifests from AbemaTV")
+ self.report_warning('Please consider using Streamlink to download these streams (https://github.com/streamlink/streamlink)')
+ formats = self._extract_m3u8_formats(
+ m3u8_url, video_id, ext='mp4', live=is_live)
+
+ info.update({
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'formats': formats,
+ 'is_live': is_live,
+ })
+ return info
+
+
+class AbemaTVTitleIE(AbemaTVBaseIE):
+ _VALID_URL = r'https?://abema\.tv/video/title/(?P<id>[^?/]+)'
+ _PAGE_SIZE = 25
+
+ _TESTS = [{
+ 'url': 'https://abema.tv/video/title/90-1597',
+ 'info_dict': {
+ 'id': '90-1597',
+ 'title': 'シャッフルアイランド',
+ },
+ 'playlist_mincount': 2,
+ }, {
+ 'url': 'https://abema.tv/video/title/193-132',
+ 'info_dict': {
+ 'id': '193-132',
+ 'title': '真心が届く~僕とスターのオフィス・ラブ!?~',
+ },
+ 'playlist_mincount': 16,
+ }, {
+ 'url': 'https://abema.tv/video/title/25-102',
+ 'info_dict': {
+ 'id': '25-102',
+ 'title': 'ソードアート・オンライン アリシゼーション',
+ },
+ 'playlist_mincount': 24,
+ }]
+
+ def _fetch_page(self, playlist_id, series_version, page):
+ programs = self._call_api(
+ f'v1/video/series/{playlist_id}/programs', playlist_id,
+ note=f'Downloading page {page + 1}',
+ query={
+ 'seriesVersion': series_version,
+ 'offset': str(page * self._PAGE_SIZE),
+ 'order': 'seq',
+ 'limit': str(self._PAGE_SIZE),
+ })
+ yield from (
+ self.url_result(f'https://abema.tv/video/episode/{x}')
+ for x in traverse_obj(programs, ('programs', ..., 'id')))
+
+ def _entries(self, playlist_id, series_version):
+ return OnDemandPagedList(
+ functools.partial(self._fetch_page, playlist_id, series_version),
+ self._PAGE_SIZE)
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ series_info = self._call_api(f'v1/video/series/{playlist_id}', playlist_id)
+
+ return self.playlist_result(
+ self._entries(playlist_id, series_info['version']), playlist_id=playlist_id,
+ playlist_title=series_info.get('title'),
+ playlist_description=series_info.get('content'))
diff --git a/yt_dlp/extractor/academicearth.py b/yt_dlp/extractor/academicearth.py
new file mode 100644
index 0000000..d9691cb
--- /dev/null
+++ b/yt_dlp/extractor/academicearth.py
@@ -0,0 +1,39 @@
+import re
+
+from .common import InfoExtractor
+
+
+class AcademicEarthCourseIE(InfoExtractor):
+ _VALID_URL = r'^https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)'
+ IE_NAME = 'AcademicEarth:Course'
+ _TEST = {
+ 'url': 'http://academicearth.org/playlists/laws-of-nature/',
+ 'info_dict': {
+ 'id': 'laws-of-nature',
+ 'title': 'Laws of Nature',
+ 'description': 'Introduce yourself to the laws of nature with these free online college lectures from Yale, Harvard, and MIT.',
+ },
+ 'playlist_count': 3,
+ }
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+ title = self._html_search_regex(
+ r'<h1 class="playlist-name"[^>]*?>(.*?)</h1>', webpage, 'title')
+ description = self._html_search_regex(
+ r'<p class="excerpt"[^>]*?>(.*?)</p>',
+ webpage, 'description', fatal=False)
+ urls = re.findall(
+ r'<li class="lecture-preview">\s*?<a target="_blank" href="([^"]+)">',
+ webpage)
+ entries = [self.url_result(u) for u in urls]
+
+ return {
+ '_type': 'playlist',
+ 'id': playlist_id,
+ 'title': title,
+ 'description': description,
+ 'entries': entries,
+ }
diff --git a/yt_dlp/extractor/acast.py b/yt_dlp/extractor/acast.py
new file mode 100644
index 0000000..427d04c
--- /dev/null
+++ b/yt_dlp/extractor/acast.py
@@ -0,0 +1,143 @@
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ clean_podcast_url,
+ int_or_none,
+ parse_iso8601,
+)
+
+
+class ACastBaseIE(InfoExtractor):
+ def _extract_episode(self, episode, show_info):
+ title = episode['title']
+ info = {
+ 'id': episode['id'],
+ 'display_id': episode.get('episodeUrl'),
+ 'url': clean_podcast_url(episode['url']),
+ 'title': title,
+ 'description': clean_html(episode.get('description') or episode.get('summary')),
+ 'thumbnail': episode.get('image'),
+ 'timestamp': parse_iso8601(episode.get('publishDate')),
+ 'duration': int_or_none(episode.get('duration')),
+ 'filesize': int_or_none(episode.get('contentLength')),
+ 'season_number': int_or_none(episode.get('season')),
+ 'episode': title,
+ 'episode_number': int_or_none(episode.get('episode')),
+ }
+ info.update(show_info)
+ return info
+
+ def _extract_show_info(self, show):
+ return {
+ 'creator': show.get('author'),
+ 'series': show.get('title'),
+ }
+
+ def _call_api(self, path, video_id, query=None):
+ return self._download_json(
+ 'https://feeder.acast.com/api/v1/shows/' + path, video_id, query=query)
+
+
+class ACastIE(ACastBaseIE):
+ IE_NAME = 'acast'
+ _VALID_URL = r'''(?x:
+ https?://
+ (?:
+ (?:(?:embed|www)\.)?acast\.com/|
+ play\.acast\.com/s/
+ )
+ (?P<channel>[^/]+)/(?P<id>[^/#?"]+)
+ )'''
+ _EMBED_REGEX = [rf'(?x)<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL})']
+ _TESTS = [{
+ 'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
+ 'info_dict': {
+ 'id': '2a92b283-1a75-4ad8-8396-499c641de0d9',
+ 'ext': 'mp3',
+ 'title': '2. Raggarmordet - Röster ur det förflutna',
+ 'description': 'md5:013959207e05011ad14a222cf22278cc',
+ 'timestamp': 1477346700,
+ 'upload_date': '20161024',
+ 'duration': 2766,
+ 'creator': 'Third Ear Studio',
+ 'series': 'Spår',
+ 'episode': '2. Raggarmordet - Röster ur det förflutna',
+ 'thumbnail': 'https://assets.pippa.io/shows/616ebe1886d7b1398620b943/616ebe33c7e6e70013cae7da.jpg',
+ 'episode_number': 2,
+ 'display_id': '2.raggarmordet-rosterurdetforflutna',
+ 'season_number': 4,
+ 'season': 'Season 4',
+ }
+ }, {
+ 'url': 'http://embed.acast.com/adambuxton/ep.12-adam-joeschristmaspodcast2015',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://play.acast.com/s/rattegangspodden/s04e09styckmordetihelenelund-del2-2',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://play.acast.com/s/sparpodcast/2a92b283-1a75-4ad8-8396-499c641de0d9',
+ 'only_matching': True,
+ }]
+ _WEBPAGE_TESTS = [{
+ 'url': 'https://ausi.anu.edu.au/news/democracy-sausage-episode-can-labor-be-long-form-government',
+ 'info_dict': {
+ 'id': '646c68fb21fbf20011e9c651',
+ 'ext': 'mp3',
+ 'creator': 'The Australian National University',
+ 'display_id': 'can-labor-be-a-long-form-government',
+ 'duration': 2618,
+ 'thumbnail': 'https://assets.pippa.io/shows/6113e8578b4903809f16f7e5/1684821529295-515b9520db9ce53275b995eb302f941c.jpeg',
+ 'title': 'Can Labor be a long-form government?',
+ 'episode': 'Can Labor be a long-form government?',
+ 'upload_date': '20230523',
+ 'series': 'Democracy Sausage with Mark Kenny',
+ 'timestamp': 1684826362,
+ 'description': 'md5:feabe1fc5004c78ee59c84a46bf4ba16',
+ }
+ }]
+
+ def _real_extract(self, url):
+ channel, display_id = self._match_valid_url(url).groups()
+ episode = self._call_api(
+ '%s/episodes/%s' % (channel, display_id),
+ display_id, {'showInfo': 'true'})
+ return self._extract_episode(
+ episode, self._extract_show_info(episode.get('show') or {}))
+
+
+class ACastChannelIE(ACastBaseIE):
+ IE_NAME = 'acast:channel'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:www\.)?acast\.com/|
+ play\.acast\.com/s/
+ )
+ (?P<id>[^/#?]+)
+ '''
+ _TESTS = [{
+ 'url': 'https://www.acast.com/todayinfocus',
+ 'info_dict': {
+ 'id': '4efc5294-5385-4847-98bd-519799ce5786',
+ 'title': 'Today in Focus',
+ 'description': 'md5:c09ce28c91002ce4ffce71d6504abaae',
+ },
+ 'playlist_mincount': 200,
+ }, {
+ 'url': 'http://play.acast.com/s/ft-banking-weekly',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if ACastIE.suitable(url) else super(ACastChannelIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ show_slug = self._match_id(url)
+ show = self._call_api(show_slug, show_slug)
+ show_info = self._extract_show_info(show)
+ entries = []
+ for episode in (show.get('episodes') or []):
+ entries.append(self._extract_episode(episode, show_info))
+ return self.playlist_result(
+ entries, show.get('id'), show.get('title'), show.get('description'))
diff --git a/yt_dlp/extractor/acfun.py b/yt_dlp/extractor/acfun.py
new file mode 100644
index 0000000..c3b4f43
--- /dev/null
+++ b/yt_dlp/extractor/acfun.py
@@ -0,0 +1,200 @@
+from .common import InfoExtractor
+from ..utils import (
+ float_or_none,
+ format_field,
+ int_or_none,
+ str_or_none,
+ traverse_obj,
+ parse_codecs,
+ parse_qs,
+)
+
+
+class AcFunVideoBaseIE(InfoExtractor):
+ def _extract_metadata(self, video_id, video_info):
+ playjson = self._parse_json(video_info['ksPlayJson'], video_id)
+
+ formats, subtitles = [], {}
+ for video in traverse_obj(playjson, ('adaptationSet', 0, 'representation')):
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(video['url'], video_id, 'mp4', fatal=False)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+ for f in fmts:
+ f.update({
+ 'fps': float_or_none(video.get('frameRate')),
+ 'width': int_or_none(video.get('width')),
+ 'height': int_or_none(video.get('height')),
+ 'tbr': float_or_none(video.get('avgBitrate')),
+ **parse_codecs(video.get('codecs', ''))
+ })
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'duration': float_or_none(video_info.get('durationMillis'), 1000),
+ 'timestamp': int_or_none(video_info.get('uploadTime'), 1000),
+ 'http_headers': {'Referer': 'https://www.acfun.cn/'},
+ }
+
+
+class AcFunVideoIE(AcFunVideoBaseIE):
+ _VALID_URL = r'https?://www\.acfun\.cn/v/ac(?P<id>[_\d]+)'
+
+ _TESTS = [{
+ 'url': 'https://www.acfun.cn/v/ac35457073',
+ 'info_dict': {
+ 'id': '35457073',
+ 'ext': 'mp4',
+ 'duration': 174.208,
+ 'timestamp': 1656403967,
+ 'title': '1 8 岁 现 状',
+ 'description': '“赶紧回去!班主任查班了!”',
+ 'uploader': '锤子game',
+ 'uploader_id': '51246077',
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg)',
+ 'upload_date': '20220628',
+ 'like_count': int,
+ 'view_count': int,
+ 'comment_count': int,
+ 'tags': list,
+ },
+ }, {
+ # example for len(video_list) > 1
+ 'url': 'https://www.acfun.cn/v/ac35468952_2',
+ 'info_dict': {
+ 'id': '35468952_2',
+ 'ext': 'mp4',
+ 'title': '【动画剧集】Rocket & Groot Season 1(2022)/火箭浣熊与格鲁特第1季 P02 S01E02 十拿九穩',
+ 'duration': 90.459,
+ 'uploader': '比令',
+ 'uploader_id': '37259967',
+ 'upload_date': '20220629',
+ 'timestamp': 1656479962,
+ 'tags': list,
+ 'like_count': int,
+ 'view_count': int,
+ 'comment_count': int,
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg)',
+ 'description': 'md5:67583aaf3a0f933bd606bc8a2d3ebb17',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+ json_all = self._search_json(r'window.videoInfo\s*=', webpage, 'videoInfo', video_id)
+
+ title = json_all.get('title')
+ video_list = json_all.get('videoList') or []
+ video_internal_id = traverse_obj(json_all, ('currentVideoInfo', 'id'))
+ if video_internal_id and len(video_list) > 1:
+ part_idx, part_video_info = next(
+ (idx + 1, v) for (idx, v) in enumerate(video_list)
+ if v['id'] == video_internal_id)
+ title = f'{title} P{part_idx:02d} {part_video_info["title"]}'
+
+ return {
+ **self._extract_metadata(video_id, json_all['currentVideoInfo']),
+ 'title': title,
+ 'thumbnail': json_all.get('coverUrl'),
+ 'description': json_all.get('description'),
+ 'uploader': traverse_obj(json_all, ('user', 'name')),
+ 'uploader_id': traverse_obj(json_all, ('user', 'href')),
+ 'tags': traverse_obj(json_all, ('tagList', ..., 'name')),
+ 'view_count': int_or_none(json_all.get('viewCount')),
+ 'like_count': int_or_none(json_all.get('likeCountShow')),
+ 'comment_count': int_or_none(json_all.get('commentCountShow')),
+ }
+
+
+class AcFunBangumiIE(AcFunVideoBaseIE):
+ _VALID_URL = r'https?://www\.acfun\.cn/bangumi/(?P<id>aa[_\d]+)'
+
+ _TESTS = [{
+ 'url': 'https://www.acfun.cn/bangumi/aa6002917_36188_1745457?ac=2',
+ 'info_dict': {
+ 'id': 'aa6002917_36188_1745457__2',
+ 'ext': 'mp4',
+ 'title': '【7月】租借女友 水原千鹤角色曲『DATE』特别PV',
+ 'upload_date': '20200916',
+ 'timestamp': 1600243813,
+ 'duration': 92.091,
+ },
+ }, {
+ 'url': 'https://www.acfun.cn/bangumi/aa5023171_36188_1750645',
+ 'info_dict': {
+ 'id': 'aa5023171_36188_1750645',
+ 'ext': 'mp4',
+ 'title': '红孩儿之趴趴蛙寻石记 第5话 ',
+ 'duration': 760.0,
+ 'season': '红孩儿之趴趴蛙寻石记',
+ 'season_id': '5023171',
+ 'season_number': 1, # series has only 1 season
+ 'episode': 'Episode 5',
+ 'episode_number': 5,
+ 'upload_date': '20181223',
+ 'timestamp': 1545552185,
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)',
+ 'comment_count': int,
+ },
+ }, {
+ 'url': 'https://www.acfun.cn/bangumi/aa6065485_36188_1885061',
+ 'info_dict': {
+ 'id': 'aa6065485_36188_1885061',
+ 'ext': 'mp4',
+ 'title': '叽歪老表(第二季) 第5话 坚不可摧',
+ 'season': '叽歪老表(第二季)',
+ 'season_number': 2,
+ 'season_id': '6065485',
+ 'episode': '坚不可摧',
+ 'episode_number': 5,
+ 'upload_date': '20220324',
+ 'timestamp': 1648082786,
+ 'duration': 105.002,
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)',
+ 'comment_count': int,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ ac_idx = parse_qs(url).get('ac', [None])[-1]
+ video_id = f'{video_id}{format_field(ac_idx, None, "__%s")}'
+
+ webpage = self._download_webpage(url, video_id)
+ json_bangumi_data = self._search_json(r'window.bangumiData\s*=', webpage, 'bangumiData', video_id)
+
+ if ac_idx:
+ video_info = json_bangumi_data['hlVideoInfo']
+ return {
+ **self._extract_metadata(video_id, video_info),
+ 'title': video_info.get('title'),
+ }
+
+ video_info = json_bangumi_data['currentVideoInfo']
+
+ season_id = json_bangumi_data.get('bangumiId')
+ season_number = season_id and next((
+ idx for idx, v in enumerate(json_bangumi_data.get('relatedBangumis') or [], 1)
+ if v.get('id') == season_id), 1)
+
+ json_bangumi_list = self._search_json(
+ r'window\.bangumiList\s*=', webpage, 'bangumiList', video_id, fatal=False)
+ video_internal_id = int_or_none(traverse_obj(json_bangumi_data, ('currentVideoInfo', 'id')))
+ episode_number = video_internal_id and next((
+ idx for idx, v in enumerate(json_bangumi_list.get('items') or [], 1)
+ if v.get('videoId') == video_internal_id), None)
+
+ return {
+ **self._extract_metadata(video_id, video_info),
+ 'title': json_bangumi_data.get('showTitle'),
+ 'thumbnail': json_bangumi_data.get('image'),
+ 'season': json_bangumi_data.get('bangumiTitle'),
+ 'season_id': str_or_none(season_id),
+ 'season_number': season_number,
+ 'episode': json_bangumi_data.get('title'),
+ 'episode_number': episode_number,
+ 'comment_count': int_or_none(json_bangumi_data.get('commentCount')),
+ }
diff --git a/yt_dlp/extractor/adn.py b/yt_dlp/extractor/adn.py
new file mode 100644
index 0000000..898d372
--- /dev/null
+++ b/yt_dlp/extractor/adn.py
@@ -0,0 +1,335 @@
+import base64
+import binascii
+import json
+import os
+import random
+import time
+
+from .common import InfoExtractor
+from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
+from ..compat import compat_b64decode
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ ass_subtitles_timecode,
+ bytes_to_intlist,
+ bytes_to_long,
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ intlist_to_bytes,
+ long_to_bytes,
+ parse_iso8601,
+ pkcs1pad,
+ strip_or_none,
+ str_or_none,
+ try_get,
+ unified_strdate,
+ urlencode_postdata,
+)
+from ..utils.traversal import traverse_obj
+
+
+class ADNBaseIE(InfoExtractor):
+ IE_DESC = 'Animation Digital Network'
+ _NETRC_MACHINE = 'animationdigitalnetwork'
+ _BASE = 'animationdigitalnetwork.fr'
+ _API_BASE_URL = f'https://gw.api.{_BASE}/'
+ _PLAYER_BASE_URL = f'{_API_BASE_URL}player/'
+ _HEADERS = {}
+ _LOGIN_ERR_MESSAGE = 'Unable to log in'
+ _RSA_KEY = (0x9B42B08905199A5CCE2026274399CA560ECB209EE9878A708B1C0812E1BB8CB5D1FB7441861147C1A1F2F3A0476DD63A9CAC20D3E983613346850AA6CB38F16DC7D720FD7D86FC6E5B3D5BBC72E14CD0BF9E869F2CEA2CCAD648F1DCE38F1FF916CEFB2D339B64AA0264372344BC775E265E8A852F88144AB0BD9AA06C1A4ABB, 65537)
+ _POS_ALIGN_MAP = {
+ 'start': 1,
+ 'end': 3,
+ }
+ _LINE_ALIGN_MAP = {
+ 'middle': 8,
+ 'end': 4,
+ }
+
+
+class ADNIE(ADNBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.(?P<lang>fr|de)/video/[^/?#]+/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://animationdigitalnetwork.fr/video/fruits-basket/9841-episode-1-a-ce-soir',
+ 'md5': '1c9ef066ceb302c86f80c2b371615261',
+ 'info_dict': {
+ 'id': '9841',
+ 'ext': 'mp4',
+ 'title': 'Fruits Basket - Episode 1',
+ 'description': 'md5:14be2f72c3c96809b0ca424b0097d336',
+ 'series': 'Fruits Basket',
+ 'duration': 1437,
+ 'release_date': '20190405',
+ 'comment_count': int,
+ 'average_rating': float,
+ 'season_number': 1,
+ 'episode': 'À ce soir !',
+ 'episode_number': 1,
+ 'thumbnail': str,
+ 'season': 'Season 1',
+ },
+ 'skip': 'Only available in French and German speaking Europe',
+ }, {
+ 'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://animationdigitalnetwork.de/video/the-eminence-in-shadow/23550-folge-1',
+ 'md5': '5c5651bf5791fa6fcd7906012b9d94e8',
+ 'info_dict': {
+ 'id': '23550',
+ 'ext': 'mp4',
+ 'episode_number': 1,
+ 'duration': 1417,
+ 'release_date': '20231004',
+ 'series': 'The Eminence in Shadow',
+ 'season_number': 2,
+ 'episode': str,
+ 'title': str,
+ 'thumbnail': str,
+ 'season': 'Season 2',
+ 'comment_count': int,
+ 'average_rating': float,
+ 'description': str,
+ },
+ # 'skip': 'Only available in French and German speaking Europe',
+ }]
+
+ def _get_subtitles(self, sub_url, video_id):
+ if not sub_url:
+ return None
+
+ enc_subtitles = self._download_webpage(
+ sub_url, video_id, 'Downloading subtitles location', fatal=False) or '{}'
+ subtitle_location = (self._parse_json(enc_subtitles, video_id, fatal=False) or {}).get('location')
+ if subtitle_location:
+ enc_subtitles = self._download_webpage(
+ subtitle_location, video_id, 'Downloading subtitles data',
+ fatal=False, headers={'Origin': 'https://' + self._BASE})
+ if not enc_subtitles:
+ return None
+
+ # http://animationdigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
+ dec_subtitles = unpad_pkcs7(aes_cbc_decrypt_bytes(
+ compat_b64decode(enc_subtitles[24:]),
+ binascii.unhexlify(self._K + '7fac1178830cfe0c'),
+ compat_b64decode(enc_subtitles[:24])))
+ subtitles_json = self._parse_json(dec_subtitles.decode(), None, fatal=False)
+ if not subtitles_json:
+ return None
+
+ subtitles = {}
+ for sub_lang, sub in subtitles_json.items():
+ ssa = '''[Script Info]
+ScriptType:V4.00
+[V4 Styles]
+Format: Name,Fontname,Fontsize,PrimaryColour,SecondaryColour,TertiaryColour,BackColour,Bold,Italic,BorderStyle,Outline,Shadow,Alignment,MarginL,MarginR,MarginV,AlphaLevel,Encoding
+Style: Default,Arial,18,16777215,16777215,16777215,0,-1,0,1,1,0,2,20,20,20,0,0
+[Events]
+Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
+ for current in sub:
+ start, end, text, line_align, position_align = (
+ float_or_none(current.get('startTime')),
+ float_or_none(current.get('endTime')),
+ current.get('text'), current.get('lineAlign'),
+ current.get('positionAlign'))
+ if start is None or end is None or text is None:
+ continue
+ alignment = self._POS_ALIGN_MAP.get(position_align, 2) + self._LINE_ALIGN_MAP.get(line_align, 0)
+ ssa += os.linesep + 'Dialogue: Marked=0,%s,%s,Default,,0,0,0,,%s%s' % (
+ ass_subtitles_timecode(start),
+ ass_subtitles_timecode(end),
+ '{\\a%d}' % alignment if alignment != 2 else '',
+ text.replace('\n', '\\N').replace('<i>', '{\\i1}').replace('</i>', '{\\i0}'))
+
+ if sub_lang == 'vostf':
+ sub_lang = 'fr'
+ elif sub_lang == 'vostde':
+ sub_lang = 'de'
+ subtitles.setdefault(sub_lang, []).extend([{
+ 'ext': 'json',
+ 'data': json.dumps(sub),
+ }, {
+ 'ext': 'ssa',
+ 'data': ssa,
+ }])
+ return subtitles
+
+ def _perform_login(self, username, password):
+ try:
+ access_token = (self._download_json(
+ self._API_BASE_URL + 'authentication/login', None,
+ 'Logging in', self._LOGIN_ERR_MESSAGE, fatal=False,
+ data=urlencode_postdata({
+ 'password': password,
+ 'rememberMe': False,
+ 'source': 'Web',
+ 'username': username,
+ })) or {}).get('accessToken')
+ if access_token:
+ self._HEADERS = {'authorization': 'Bearer ' + access_token}
+ except ExtractorError as e:
+ message = None
+ if isinstance(e.cause, HTTPError) and e.cause.status == 401:
+ resp = self._parse_json(
+ e.cause.response.read().decode(), None, fatal=False) or {}
+ message = resp.get('message') or resp.get('code')
+ self.report_warning(message or self._LOGIN_ERR_MESSAGE)
+
+ def _real_extract(self, url):
+ lang, video_id = self._match_valid_url(url).group('lang', 'id')
+ video_base_url = self._PLAYER_BASE_URL + 'video/%s/' % video_id
+ player = self._download_json(
+ video_base_url + 'configuration', video_id,
+ 'Downloading player config JSON metadata',
+ headers=self._HEADERS)['player']
+ options = player['options']
+
+ user = options['user']
+ if not user.get('hasAccess'):
+ start_date = traverse_obj(options, ('video', 'startDate', {str}))
+ if (parse_iso8601(start_date) or 0) > time.time():
+ raise ExtractorError(f'This video is not available yet. Release date: {start_date}', expected=True)
+ self.raise_login_required('This video requires a subscription', method='password')
+
+ token = self._download_json(
+ user.get('refreshTokenUrl') or (self._PLAYER_BASE_URL + 'refresh/token'),
+ video_id, 'Downloading access token', headers={
+ 'X-Player-Refresh-Token': user['refreshToken'],
+ }, data=b'')['token']
+
+ links_url = try_get(options, lambda x: x['video']['url']) or (video_base_url + 'link')
+ self._K = ''.join(random.choices('0123456789abcdef', k=16))
+ message = bytes_to_intlist(json.dumps({
+ 'k': self._K,
+ 't': token,
+ }))
+
+ # Sometimes authentication fails for no good reason, retry with
+ # a different random padding
+ links_data = None
+ for _ in range(3):
+ padded_message = intlist_to_bytes(pkcs1pad(message, 128))
+ n, e = self._RSA_KEY
+ encrypted_message = long_to_bytes(pow(bytes_to_long(padded_message), e, n))
+ authorization = base64.b64encode(encrypted_message).decode()
+
+ try:
+ links_data = self._download_json(
+ links_url, video_id, 'Downloading links JSON metadata', headers={
+ 'X-Player-Token': authorization,
+ 'X-Target-Distribution': lang,
+ **self._HEADERS
+ }, query={
+ 'freeWithAds': 'true',
+ 'adaptive': 'false',
+ 'withMetadata': 'true',
+ 'source': 'Web'
+ })
+ break
+ except ExtractorError as e:
+ if not isinstance(e.cause, HTTPError):
+ raise e
+
+ if e.cause.status == 401:
+ # This usually goes away with a different random pkcs1pad, so retry
+ continue
+
+ error = self._parse_json(e.cause.response.read(), video_id)
+ message = error.get('message')
+ if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country':
+ self.raise_geo_restricted(msg=message)
+ raise ExtractorError(message)
+ else:
+ raise ExtractorError('Giving up retrying')
+
+ links = links_data.get('links') or {}
+ metas = links_data.get('metadata') or {}
+ sub_url = (links.get('subtitles') or {}).get('all')
+ video_info = links_data.get('video') or {}
+ title = metas['title']
+
+ formats = []
+ for format_id, qualities in (links.get('streaming') or {}).items():
+ if not isinstance(qualities, dict):
+ continue
+ for quality, load_balancer_url in qualities.items():
+ load_balancer_data = self._download_json(
+ load_balancer_url, video_id,
+ 'Downloading %s %s JSON metadata' % (format_id, quality),
+ fatal=False) or {}
+ m3u8_url = load_balancer_data.get('location')
+ if not m3u8_url:
+ continue
+ m3u8_formats = self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id=format_id, fatal=False)
+ if format_id == 'vf':
+ for f in m3u8_formats:
+ f['language'] = 'fr'
+ elif format_id == 'vde':
+ for f in m3u8_formats:
+ f['language'] = 'de'
+ formats.extend(m3u8_formats)
+
+ if not formats:
+ self.raise_login_required('This video requires a subscription', method='password')
+
+ video = (self._download_json(
+ self._API_BASE_URL + 'video/%s' % video_id, video_id,
+ 'Downloading additional video metadata', fatal=False) or {}).get('video') or {}
+ show = video.get('show') or {}
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': strip_or_none(metas.get('summary') or video.get('summary')),
+ 'thumbnail': video_info.get('image') or player.get('image'),
+ 'formats': formats,
+ 'subtitles': self.extract_subtitles(sub_url, video_id),
+ 'episode': metas.get('subtitle') or video.get('name'),
+ 'episode_number': int_or_none(video.get('shortNumber')),
+ 'series': show.get('title'),
+ 'season_number': int_or_none(video.get('season')),
+ 'duration': int_or_none(video_info.get('duration') or video.get('duration')),
+ 'release_date': unified_strdate(video.get('releaseDate')),
+ 'average_rating': float_or_none(video.get('rating') or metas.get('rating')),
+ 'comment_count': int_or_none(video.get('commentsCount')),
+ }
+
+
+class ADNSeasonIE(ADNBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.(?P<lang>fr|de)/video/(?P<id>[^/?#]+)/?(?:$|[#?])'
+ _TESTS = [{
+ 'url': 'https://animationdigitalnetwork.fr/video/tokyo-mew-mew-new',
+ 'playlist_count': 12,
+ 'info_dict': {
+ 'id': '911',
+ 'title': 'Tokyo Mew Mew New',
+ },
+ # 'skip': 'Only available in French end German speaking Europe',
+ }]
+
+ def _real_extract(self, url):
+ lang, video_show_slug = self._match_valid_url(url).group('lang', 'id')
+ show = self._download_json(
+ f'{self._API_BASE_URL}show/{video_show_slug}/', video_show_slug,
+ 'Downloading show JSON metadata', headers=self._HEADERS)['show']
+ show_id = str(show['id'])
+ episodes = self._download_json(
+ f'{self._API_BASE_URL}video/show/{show_id}', video_show_slug,
+ 'Downloading episode list', headers={
+ 'X-Target-Distribution': lang,
+ **self._HEADERS
+ }, query={
+ 'order': 'asc',
+ 'limit': '-1',
+ })
+
+ def entries():
+ for episode_id in traverse_obj(episodes, ('videos', ..., 'id', {str_or_none})):
+ yield self.url_result(
+ f'https://animationdigitalnetwork.{lang}/video/{video_show_slug}/{episode_id}',
+ ADNIE, episode_id)
+
+ return self.playlist_result(entries(), show_id, show.get('title'))
diff --git a/yt_dlp/extractor/adobeconnect.py b/yt_dlp/extractor/adobeconnect.py
new file mode 100644
index 0000000..8963b12
--- /dev/null
+++ b/yt_dlp/extractor/adobeconnect.py
@@ -0,0 +1,34 @@
+from .common import InfoExtractor
+from ..compat import (
+ compat_parse_qs,
+ compat_urlparse,
+)
+
+
+class AdobeConnectIE(InfoExtractor):
+ _VALID_URL = r'https?://\w+\.adobeconnect\.com/(?P<id>[\w-]+)'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ title = self._html_extract_title(webpage)
+ qs = compat_parse_qs(self._search_regex(r"swfUrl\s*=\s*'([^']+)'", webpage, 'swf url').split('?')[1])
+ is_live = qs.get('isLive', ['false'])[0] == 'true'
+ formats = []
+ for con_string in qs['conStrings'][0].split(','):
+ formats.append({
+ 'format_id': con_string.split('://')[0],
+ 'app': compat_urlparse.quote('?' + con_string.split('?')[1] + 'flvplayerapp/' + qs['appInstance'][0]),
+ 'ext': 'flv',
+ 'play_path': 'mp4:' + qs['streamName'][0],
+ 'rtmp_conn': 'S:' + qs['ticket'][0],
+ 'rtmp_live': is_live,
+ 'url': con_string,
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'is_live': is_live,
+ }
diff --git a/yt_dlp/extractor/adobepass.py b/yt_dlp/extractor/adobepass.py
new file mode 100644
index 0000000..5eed0ca
--- /dev/null
+++ b/yt_dlp/extractor/adobepass.py
@@ -0,0 +1,1778 @@
+import getpass
+import json
+import re
+import time
+import xml.etree.ElementTree as etree
+
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ NO_DEFAULT,
+ ExtractorError,
+ unescapeHTML,
+ unified_timestamp,
+ urlencode_postdata,
+)
+
+MSO_INFO = {
+ 'DTV': {
+ 'name': 'DIRECTV',
+ 'username_field': 'username',
+ 'password_field': 'password',
+ },
+ 'ATT': {
+ 'name': 'AT&T U-verse',
+ 'username_field': 'userid',
+ 'password_field': 'password',
+ },
+ 'ATTOTT': {
+ 'name': 'DIRECTV NOW',
+ 'username_field': 'email',
+ 'password_field': 'loginpassword',
+ },
+ 'RCN': {
+ 'name': 'RCN',
+ 'username_field': 'username',
+ 'password_field': 'password',
+ },
+ 'Rogers': {
+ 'name': 'Rogers',
+ 'username_field': 'UserName',
+ 'password_field': 'UserPassword',
+ },
+ 'Comcast_SSO': {
+ 'name': 'Comcast XFINITY',
+ 'username_field': 'user',
+ 'password_field': 'passwd',
+ },
+ 'TWC': {
+ 'name': 'Time Warner Cable | Spectrum',
+ 'username_field': 'Ecom_User_ID',
+ 'password_field': 'Ecom_Password',
+ },
+ 'Brighthouse': {
+ 'name': 'Bright House Networks | Spectrum',
+ 'username_field': 'j_username',
+ 'password_field': 'j_password',
+ },
+ 'Charter_Direct': {
+ 'name': 'Charter Spectrum',
+ 'username_field': 'IDToken1',
+ 'password_field': 'IDToken2',
+ },
+ 'Spectrum': {
+ 'name': 'Spectrum',
+ 'username_field': 'IDToken1',
+ 'password_field': 'IDToken2',
+ },
+ 'Philo': {
+ 'name': 'Philo',
+ 'username_field': 'ident'
+ },
+ 'Verizon': {
+ 'name': 'Verizon FiOS',
+ 'username_field': 'IDToken1',
+ 'password_field': 'IDToken2',
+ },
+ 'Cablevision': {
+ 'name': 'Optimum/Cablevision',
+ 'username_field': 'j_username',
+ 'password_field': 'j_password',
+ },
+ 'thr030': {
+ 'name': '3 Rivers Communications'
+ },
+ 'com140': {
+ 'name': 'Access Montana'
+ },
+ 'acecommunications': {
+ 'name': 'AcenTek'
+ },
+ 'acm010': {
+ 'name': 'Acme Communications'
+ },
+ 'ada020': {
+ 'name': 'Adams Cable Service'
+ },
+ 'alb020': {
+ 'name': 'Albany Mutual Telephone'
+ },
+ 'algona': {
+ 'name': 'Algona Municipal Utilities'
+ },
+ 'allwest': {
+ 'name': 'All West Communications'
+ },
+ 'all025': {
+ 'name': 'Allen\'s Communications'
+ },
+ 'spl010': {
+ 'name': 'Alliance Communications'
+ },
+ 'all070': {
+ 'name': 'ALLO Communications'
+ },
+ 'alpine': {
+ 'name': 'Alpine Communications'
+ },
+ 'hun015': {
+ 'name': 'American Broadband'
+ },
+ 'nwc010': {
+ 'name': 'American Broadband Missouri'
+ },
+ 'com130-02': {
+ 'name': 'American Community Networks'
+ },
+ 'com130-01': {
+ 'name': 'American Warrior Networks'
+ },
+ 'tom020': {
+ 'name': 'Amherst Telephone/Tomorrow Valley'
+ },
+ 'tvc020': {
+ 'name': 'Andycable'
+ },
+ 'arkwest': {
+ 'name': 'Arkwest Communications'
+ },
+ 'art030': {
+ 'name': 'Arthur Mutual Telephone Company'
+ },
+ 'arvig': {
+ 'name': 'Arvig'
+ },
+ 'nttcash010': {
+ 'name': 'Ashland Home Net'
+ },
+ 'astound': {
+ 'name': 'Astound (now Wave)'
+ },
+ 'dix030': {
+ 'name': 'ATC Broadband'
+ },
+ 'ara010': {
+ 'name': 'ATC Communications'
+ },
+ 'she030-02': {
+ 'name': 'Ayersville Communications'
+ },
+ 'baldwin': {
+ 'name': 'Baldwin Lightstream'
+ },
+ 'bal040': {
+ 'name': 'Ballard TV'
+ },
+ 'cit025': {
+ 'name': 'Bardstown Cable TV'
+ },
+ 'bay030': {
+ 'name': 'Bay Country Communications'
+ },
+ 'tel095': {
+ 'name': 'Beaver Creek Cooperative Telephone'
+ },
+ 'bea020': {
+ 'name': 'Beaver Valley Cable'
+ },
+ 'bee010': {
+ 'name': 'Bee Line Cable'
+ },
+ 'wir030': {
+ 'name': 'Beehive Broadband'
+ },
+ 'bra020': {
+ 'name': 'BELD'
+ },
+ 'bel020': {
+ 'name': 'Bellevue Municipal Cable'
+ },
+ 'vol040-01': {
+ 'name': 'Ben Lomand Connect / BLTV'
+ },
+ 'bev010': {
+ 'name': 'BEVCOMM'
+ },
+ 'big020': {
+ 'name': 'Big Sandy Broadband'
+ },
+ 'ble020': {
+ 'name': 'Bledsoe Telephone Cooperative'
+ },
+ 'bvt010': {
+ 'name': 'Blue Valley Tele-Communications'
+ },
+ 'bra050': {
+ 'name': 'Brandenburg Telephone Co.'
+ },
+ 'bte010': {
+ 'name': 'Bristol Tennessee Essential Services'
+ },
+ 'annearundel': {
+ 'name': 'Broadstripe'
+ },
+ 'btc010': {
+ 'name': 'BTC Communications'
+ },
+ 'btc040': {
+ 'name': 'BTC Vision - Nahunta'
+ },
+ 'bul010': {
+ 'name': 'Bulloch Telephone Cooperative'
+ },
+ 'but010': {
+ 'name': 'Butler-Bremer Communications'
+ },
+ 'tel160-csp': {
+ 'name': 'C Spire SNAP'
+ },
+ 'csicable': {
+ 'name': 'Cable Services Inc.'
+ },
+ 'cableamerica': {
+ 'name': 'CableAmerica'
+ },
+ 'cab038': {
+ 'name': 'CableSouth Media 3'
+ },
+ 'weh010-camtel': {
+ 'name': 'Cam-Tel Company'
+ },
+ 'car030': {
+ 'name': 'Cameron Communications'
+ },
+ 'canbytel': {
+ 'name': 'Canby Telcom'
+ },
+ 'crt020': {
+ 'name': 'CapRock Tv'
+ },
+ 'car050': {
+ 'name': 'Carnegie Cable'
+ },
+ 'cas': {
+ 'name': 'CAS Cable'
+ },
+ 'casscomm': {
+ 'name': 'CASSCOMM'
+ },
+ 'mid180-02': {
+ 'name': 'Catalina Broadband Solutions'
+ },
+ 'cccomm': {
+ 'name': 'CC Communications'
+ },
+ 'nttccde010': {
+ 'name': 'CDE Lightband'
+ },
+ 'cfunet': {
+ 'name': 'Cedar Falls Utilities'
+ },
+ 'dem010-01': {
+ 'name': 'Celect-Bloomer Telephone Area'
+ },
+ 'dem010-02': {
+ 'name': 'Celect-Bruce Telephone Area'
+ },
+ 'dem010-03': {
+ 'name': 'Celect-Citizens Connected Area'
+ },
+ 'dem010-04': {
+ 'name': 'Celect-Elmwood/Spring Valley Area'
+ },
+ 'dem010-06': {
+ 'name': 'Celect-Mosaic Telecom'
+ },
+ 'dem010-05': {
+ 'name': 'Celect-West WI Telephone Area'
+ },
+ 'net010-02': {
+ 'name': 'Cellcom/Nsight Telservices'
+ },
+ 'cen100': {
+ 'name': 'CentraCom'
+ },
+ 'nttccst010': {
+ 'name': 'Central Scott / CSTV'
+ },
+ 'cha035': {
+ 'name': 'Chaparral CableVision'
+ },
+ 'cha050': {
+ 'name': 'Chariton Valley Communication Corporation, Inc.'
+ },
+ 'cha060': {
+ 'name': 'Chatmoss Cablevision'
+ },
+ 'nttcche010': {
+ 'name': 'Cherokee Communications'
+ },
+ 'che050': {
+ 'name': 'Chesapeake Bay Communications'
+ },
+ 'cimtel': {
+ 'name': 'Cim-Tel Cable, LLC.'
+ },
+ 'cit180': {
+ 'name': 'Citizens Cablevision - Floyd, VA'
+ },
+ 'cit210': {
+ 'name': 'Citizens Cablevision, Inc.'
+ },
+ 'cit040': {
+ 'name': 'Citizens Fiber'
+ },
+ 'cit250': {
+ 'name': 'Citizens Mutual'
+ },
+ 'war040': {
+ 'name': 'Citizens Telephone Corporation'
+ },
+ 'wat025': {
+ 'name': 'City Of Monroe'
+ },
+ 'wadsworth': {
+ 'name': 'CityLink'
+ },
+ 'nor100': {
+ 'name': 'CL Tel'
+ },
+ 'cla010': {
+ 'name': 'Clarence Telephone and Cedar Communications'
+ },
+ 'ser060': {
+ 'name': 'Clear Choice Communications'
+ },
+ 'tac020': {
+ 'name': 'Click! Cable TV'
+ },
+ 'war020': {
+ 'name': 'CLICK1.NET'
+ },
+ 'cml010': {
+ 'name': 'CML Telephone Cooperative Association'
+ },
+ 'cns': {
+ 'name': 'CNS'
+ },
+ 'com160': {
+ 'name': 'Co-Mo Connect'
+ },
+ 'coa020': {
+ 'name': 'Coast Communications'
+ },
+ 'coa030': {
+ 'name': 'Coaxial Cable TV'
+ },
+ 'mid055': {
+ 'name': 'Cobalt TV (Mid-State Community TV)'
+ },
+ 'col070': {
+ 'name': 'Columbia Power & Water Systems'
+ },
+ 'col080': {
+ 'name': 'Columbus Telephone'
+ },
+ 'nor105': {
+ 'name': 'Communications 1 Cablevision, Inc.'
+ },
+ 'com150': {
+ 'name': 'Community Cable & Broadband'
+ },
+ 'com020': {
+ 'name': 'Community Communications Company'
+ },
+ 'coy010': {
+ 'name': 'commZoom'
+ },
+ 'com025': {
+ 'name': 'Complete Communication Services'
+ },
+ 'cat020': {
+ 'name': 'Comporium'
+ },
+ 'com071': {
+ 'name': 'ComSouth Telesys'
+ },
+ 'consolidatedcable': {
+ 'name': 'Consolidated'
+ },
+ 'conwaycorp': {
+ 'name': 'Conway Corporation'
+ },
+ 'coo050': {
+ 'name': 'Coon Valley Telecommunications Inc'
+ },
+ 'coo080': {
+ 'name': 'Cooperative Telephone Company'
+ },
+ 'cpt010': {
+ 'name': 'CP-TEL'
+ },
+ 'cra010': {
+ 'name': 'Craw-Kan Telephone'
+ },
+ 'crestview': {
+ 'name': 'Crestview Cable Communications'
+ },
+ 'cross': {
+ 'name': 'Cross TV'
+ },
+ 'cro030': {
+ 'name': 'Crosslake Communications'
+ },
+ 'ctc040': {
+ 'name': 'CTC - Brainerd MN'
+ },
+ 'phe030': {
+ 'name': 'CTV-Beam - East Alabama'
+ },
+ 'cun010': {
+ 'name': 'Cunningham Telephone & Cable'
+ },
+ 'dpc010': {
+ 'name': 'D & P Communications'
+ },
+ 'dak030': {
+ 'name': 'Dakota Central Telecommunications'
+ },
+ 'nttcdel010': {
+ 'name': 'Delcambre Telephone LLC'
+ },
+ 'tel160-del': {
+ 'name': 'Delta Telephone Company'
+ },
+ 'sal040': {
+ 'name': 'DiamondNet'
+ },
+ 'ind060-dc': {
+ 'name': 'Direct Communications'
+ },
+ 'doy010': {
+ 'name': 'Doylestown Cable TV'
+ },
+ 'dic010': {
+ 'name': 'DRN'
+ },
+ 'dtc020': {
+ 'name': 'DTC'
+ },
+ 'dtc010': {
+ 'name': 'DTC Cable (Delhi)'
+ },
+ 'dum010': {
+ 'name': 'Dumont Telephone Company'
+ },
+ 'dun010': {
+ 'name': 'Dunkerton Telephone Cooperative'
+ },
+ 'cci010': {
+ 'name': 'Duo County Telecom'
+ },
+ 'eagle': {
+ 'name': 'Eagle Communications'
+ },
+ 'weh010-east': {
+ 'name': 'East Arkansas Cable TV'
+ },
+ 'eatel': {
+ 'name': 'EATEL Video, LLC'
+ },
+ 'ell010': {
+ 'name': 'ECTA'
+ },
+ 'emerytelcom': {
+ 'name': 'Emery Telcom Video LLC'
+ },
+ 'nor200': {
+ 'name': 'Empire Access'
+ },
+ 'endeavor': {
+ 'name': 'Endeavor Communications'
+ },
+ 'sun045': {
+ 'name': 'Enhanced Telecommunications Corporation'
+ },
+ 'mid030': {
+ 'name': 'enTouch'
+ },
+ 'epb020': {
+ 'name': 'EPB Smartnet'
+ },
+ 'jea010': {
+ 'name': 'EPlus Broadband'
+ },
+ 'com065': {
+ 'name': 'ETC'
+ },
+ 'ete010': {
+ 'name': 'Etex Communications'
+ },
+ 'fbc-tele': {
+ 'name': 'F&B Communications'
+ },
+ 'fal010': {
+ 'name': 'Falcon Broadband'
+ },
+ 'fam010': {
+ 'name': 'FamilyView CableVision'
+ },
+ 'far020': {
+ 'name': 'Farmers Mutual Telephone Company'
+ },
+ 'fay010': {
+ 'name': 'Fayetteville Public Utilities'
+ },
+ 'sal060': {
+ 'name': 'fibrant'
+ },
+ 'fid010': {
+ 'name': 'Fidelity Communications'
+ },
+ 'for030': {
+ 'name': 'FJ Communications'
+ },
+ 'fli020': {
+ 'name': 'Flint River Communications'
+ },
+ 'far030': {
+ 'name': 'FMT - Jesup'
+ },
+ 'foo010': {
+ 'name': 'Foothills Communications'
+ },
+ 'for080': {
+ 'name': 'Forsyth CableNet'
+ },
+ 'fbcomm': {
+ 'name': 'Frankfort Plant Board'
+ },
+ 'tel160-fra': {
+ 'name': 'Franklin Telephone Company'
+ },
+ 'nttcftc010': {
+ 'name': 'FTC'
+ },
+ 'fullchannel': {
+ 'name': 'Full Channel, Inc.'
+ },
+ 'gar040': {
+ 'name': 'Gardonville Cooperative Telephone Association'
+ },
+ 'gbt010': {
+ 'name': 'GBT Communications, Inc.'
+ },
+ 'tec010': {
+ 'name': 'Genuine Telecom'
+ },
+ 'clr010': {
+ 'name': 'Giant Communications'
+ },
+ 'gla010': {
+ 'name': 'Glasgow EPB'
+ },
+ 'gle010': {
+ 'name': 'Glenwood Telecommunications'
+ },
+ 'gra060': {
+ 'name': 'GLW Broadband Inc.'
+ },
+ 'goldenwest': {
+ 'name': 'Golden West Cablevision'
+ },
+ 'vis030': {
+ 'name': 'Grantsburg Telcom'
+ },
+ 'gpcom': {
+ 'name': 'Great Plains Communications'
+ },
+ 'gri010': {
+ 'name': 'Gridley Cable Inc'
+ },
+ 'hbc010': {
+ 'name': 'H&B Cable Services'
+ },
+ 'hae010': {
+ 'name': 'Haefele TV Inc.'
+ },
+ 'htc010': {
+ 'name': 'Halstad Telephone Company'
+ },
+ 'har005': {
+ 'name': 'Harlan Municipal Utilities'
+ },
+ 'har020': {
+ 'name': 'Hart Communications'
+ },
+ 'ced010': {
+ 'name': 'Hartelco TV'
+ },
+ 'hea040': {
+ 'name': 'Heart of Iowa Communications Cooperative'
+ },
+ 'htc020': {
+ 'name': 'Hickory Telephone Company'
+ },
+ 'nttchig010': {
+ 'name': 'Highland Communication Services'
+ },
+ 'hig030': {
+ 'name': 'Highland Media'
+ },
+ 'spc010': {
+ 'name': 'Hilliary Communications'
+ },
+ 'hin020': {
+ 'name': 'Hinton CATV Co.'
+ },
+ 'hometel': {
+ 'name': 'HomeTel Entertainment, Inc.'
+ },
+ 'hoodcanal': {
+ 'name': 'Hood Canal Communications'
+ },
+ 'weh010-hope': {
+ 'name': 'Hope - Prescott Cable TV'
+ },
+ 'horizoncable': {
+ 'name': 'Horizon Cable TV, Inc.'
+ },
+ 'hor040': {
+ 'name': 'Horizon Chillicothe Telephone'
+ },
+ 'htc030': {
+ 'name': 'HTC Communications Co. - IL'
+ },
+ 'htccomm': {
+ 'name': 'HTC Communications, Inc. - IA'
+ },
+ 'wal005': {
+ 'name': 'Huxley Communications'
+ },
+ 'imon': {
+ 'name': 'ImOn Communications'
+ },
+ 'ind040': {
+ 'name': 'Independence Telecommunications'
+ },
+ 'rrc010': {
+ 'name': 'Inland Networks'
+ },
+ 'stc020': {
+ 'name': 'Innovative Cable TV St Croix'
+ },
+ 'car100': {
+ 'name': 'Innovative Cable TV St Thomas-St John'
+ },
+ 'icc010': {
+ 'name': 'Inside Connect Cable'
+ },
+ 'int100': {
+ 'name': 'Integra Telecom'
+ },
+ 'int050': {
+ 'name': 'Interstate Telecommunications Coop'
+ },
+ 'irv010': {
+ 'name': 'Irvine Cable'
+ },
+ 'k2c010': {
+ 'name': 'K2 Communications'
+ },
+ 'kal010': {
+ 'name': 'Kalida Telephone Company, Inc.'
+ },
+ 'kal030': {
+ 'name': 'Kalona Cooperative Telephone Company'
+ },
+ 'kmt010': {
+ 'name': 'KMTelecom'
+ },
+ 'kpu010': {
+ 'name': 'KPU Telecommunications'
+ },
+ 'kuh010': {
+ 'name': 'Kuhn Communications, Inc.'
+ },
+ 'lak130': {
+ 'name': 'Lakeland Communications'
+ },
+ 'lan010': {
+ 'name': 'Langco'
+ },
+ 'lau020': {
+ 'name': 'Laurel Highland Total Communications, Inc.'
+ },
+ 'leh010': {
+ 'name': 'Lehigh Valley Cooperative Telephone'
+ },
+ 'bra010': {
+ 'name': 'Limestone Cable/Bracken Cable'
+ },
+ 'loc020': {
+ 'name': 'LISCO'
+ },
+ 'lit020': {
+ 'name': 'Litestream'
+ },
+ 'tel140': {
+ 'name': 'LivCom'
+ },
+ 'loc010': {
+ 'name': 'LocalTel Communications'
+ },
+ 'weh010-longview': {
+ 'name': 'Longview - Kilgore Cable TV'
+ },
+ 'lon030': {
+ 'name': 'Lonsdale Video Ventures, LLC'
+ },
+ 'lns010': {
+ 'name': 'Lost Nation-Elwood Telephone Co.'
+ },
+ 'nttclpc010': {
+ 'name': 'LPC Connect'
+ },
+ 'lumos': {
+ 'name': 'Lumos Networks'
+ },
+ 'madison': {
+ 'name': 'Madison Communications'
+ },
+ 'mad030': {
+ 'name': 'Madison County Cable Inc.'
+ },
+ 'nttcmah010': {
+ 'name': 'Mahaska Communication Group'
+ },
+ 'mar010': {
+ 'name': 'Marne & Elk Horn Telephone Company'
+ },
+ 'mcc040': {
+ 'name': 'McClure Telephone Co.'
+ },
+ 'mctv': {
+ 'name': 'MCTV'
+ },
+ 'merrimac': {
+ 'name': 'Merrimac Communications Ltd.'
+ },
+ 'metronet': {
+ 'name': 'Metronet'
+ },
+ 'mhtc': {
+ 'name': 'MHTC'
+ },
+ 'midhudson': {
+ 'name': 'Mid-Hudson Cable'
+ },
+ 'midrivers': {
+ 'name': 'Mid-Rivers Communications'
+ },
+ 'mid045': {
+ 'name': 'Midstate Communications'
+ },
+ 'mil080': {
+ 'name': 'Milford Communications'
+ },
+ 'min030': {
+ 'name': 'MINET'
+ },
+ 'nttcmin010': {
+ 'name': 'Minford TV'
+ },
+ 'san040-02': {
+ 'name': 'Mitchell Telecom'
+ },
+ 'mlg010': {
+ 'name': 'MLGC'
+ },
+ 'mon060': {
+ 'name': 'Mon-Cre TVE'
+ },
+ 'mou110': {
+ 'name': 'Mountain Telephone'
+ },
+ 'mou050': {
+ 'name': 'Mountain Village Cable'
+ },
+ 'mtacomm': {
+ 'name': 'MTA Communications, LLC'
+ },
+ 'mtc010': {
+ 'name': 'MTC Cable'
+ },
+ 'med040': {
+ 'name': 'MTC Technologies'
+ },
+ 'man060': {
+ 'name': 'MTCC'
+ },
+ 'mtc030': {
+ 'name': 'MTCO Communications'
+ },
+ 'mul050': {
+ 'name': 'Mulberry Telecommunications'
+ },
+ 'mur010': {
+ 'name': 'Murray Electric System'
+ },
+ 'musfiber': {
+ 'name': 'MUS FiberNET'
+ },
+ 'mpw': {
+ 'name': 'Muscatine Power & Water'
+ },
+ 'nttcsli010': {
+ 'name': 'myEVTV.com'
+ },
+ 'nor115': {
+ 'name': 'NCC'
+ },
+ 'nor260': {
+ 'name': 'NDTC'
+ },
+ 'nctc': {
+ 'name': 'Nebraska Central Telecom, Inc.'
+ },
+ 'nel020': {
+ 'name': 'Nelsonville TV Cable'
+ },
+ 'nem010': {
+ 'name': 'Nemont'
+ },
+ 'new075': {
+ 'name': 'New Hope Telephone Cooperative'
+ },
+ 'nor240': {
+ 'name': 'NICP'
+ },
+ 'cic010': {
+ 'name': 'NineStar Connect'
+ },
+ 'nktelco': {
+ 'name': 'NKTelco'
+ },
+ 'nortex': {
+ 'name': 'Nortex Communications'
+ },
+ 'nor140': {
+ 'name': 'North Central Telephone Cooperative'
+ },
+ 'nor030': {
+ 'name': 'Northland Communications'
+ },
+ 'nor075': {
+ 'name': 'Northwest Communications'
+ },
+ 'nor125': {
+ 'name': 'Norwood Light Broadband'
+ },
+ 'net010': {
+ 'name': 'Nsight Telservices'
+ },
+ 'dur010': {
+ 'name': 'Ntec'
+ },
+ 'nts010': {
+ 'name': 'NTS Communications'
+ },
+ 'new045': {
+ 'name': 'NU-Telecom'
+ },
+ 'nulink': {
+ 'name': 'NuLink'
+ },
+ 'jam030': {
+ 'name': 'NVC'
+ },
+ 'far035': {
+ 'name': 'OmniTel Communications'
+ },
+ 'onesource': {
+ 'name': 'OneSource Communications'
+ },
+ 'cit230': {
+ 'name': 'Opelika Power Services'
+ },
+ 'daltonutilities': {
+ 'name': 'OptiLink'
+ },
+ 'mid140': {
+ 'name': 'OPTURA'
+ },
+ 'ote010': {
+ 'name': 'OTEC Communication Company'
+ },
+ 'cci020': {
+ 'name': 'Packerland Broadband'
+ },
+ 'pan010': {
+ 'name': 'Panora Telco/Guthrie Center Communications'
+ },
+ 'otter': {
+ 'name': 'Park Region Telephone & Otter Tail Telcom'
+ },
+ 'mid050': {
+ 'name': 'Partner Communications Cooperative'
+ },
+ 'fib010': {
+ 'name': 'Pathway'
+ },
+ 'paulbunyan': {
+ 'name': 'Paul Bunyan Communications'
+ },
+ 'pem020': {
+ 'name': 'Pembroke Telephone Company'
+ },
+ 'mck010': {
+ 'name': 'Peoples Rural Telephone Cooperative'
+ },
+ 'pul010': {
+ 'name': 'PES Energize'
+ },
+ 'phi010': {
+ 'name': 'Philippi Communications System'
+ },
+ 'phonoscope': {
+ 'name': 'Phonoscope Cable'
+ },
+ 'pin070': {
+ 'name': 'Pine Belt Communications, Inc.'
+ },
+ 'weh010-pine': {
+ 'name': 'Pine Bluff Cable TV'
+ },
+ 'pin060': {
+ 'name': 'Pineland Telephone Cooperative'
+ },
+ 'cam010': {
+ 'name': 'Pinpoint Communications'
+ },
+ 'pio060': {
+ 'name': 'Pioneer Broadband'
+ },
+ 'pioncomm': {
+ 'name': 'Pioneer Communications'
+ },
+ 'pioneer': {
+ 'name': 'Pioneer DTV'
+ },
+ 'pla020': {
+ 'name': 'Plant TiftNet, Inc.'
+ },
+ 'par010': {
+ 'name': 'PLWC'
+ },
+ 'pro035': {
+ 'name': 'PMT'
+ },
+ 'vik011': {
+ 'name': 'Polar Cablevision'
+ },
+ 'pottawatomie': {
+ 'name': 'Pottawatomie Telephone Co.'
+ },
+ 'premiercomm': {
+ 'name': 'Premier Communications'
+ },
+ 'psc010': {
+ 'name': 'PSC'
+ },
+ 'pan020': {
+ 'name': 'PTCI'
+ },
+ 'qco010': {
+ 'name': 'QCOL'
+ },
+ 'qua010': {
+ 'name': 'Quality Cablevision'
+ },
+ 'rad010': {
+ 'name': 'Radcliffe Telephone Company'
+ },
+ 'car040': {
+ 'name': 'Rainbow Communications'
+ },
+ 'rai030': {
+ 'name': 'Rainier Connect'
+ },
+ 'ral010': {
+ 'name': 'Ralls Technologies'
+ },
+ 'rct010': {
+ 'name': 'RC Technologies'
+ },
+ 'red040': {
+ 'name': 'Red River Communications'
+ },
+ 'ree010': {
+ 'name': 'Reedsburg Utility Commission'
+ },
+ 'mol010': {
+ 'name': 'Reliance Connects- Oregon'
+ },
+ 'res020': {
+ 'name': 'Reserve Telecommunications'
+ },
+ 'weh010-resort': {
+ 'name': 'Resort TV Cable'
+ },
+ 'rld010': {
+ 'name': 'Richland Grant Telephone Cooperative, Inc.'
+ },
+ 'riv030': {
+ 'name': 'River Valley Telecommunications Coop'
+ },
+ 'rockportcable': {
+ 'name': 'Rock Port Cablevision'
+ },
+ 'rsf010': {
+ 'name': 'RS Fiber'
+ },
+ 'rtc': {
+ 'name': 'RTC Communication Corp'
+ },
+ 'res040': {
+ 'name': 'RTC-Reservation Telephone Coop.'
+ },
+ 'rte010': {
+ 'name': 'RTEC Communications'
+ },
+ 'stc010': {
+ 'name': 'S&T'
+ },
+ 'san020': {
+ 'name': 'San Bruno Cable TV'
+ },
+ 'san040-01': {
+ 'name': 'Santel'
+ },
+ 'sav010': {
+ 'name': 'SCI Broadband-Savage Communications Inc.'
+ },
+ 'sco050': {
+ 'name': 'Scottsboro Electric Power Board'
+ },
+ 'scr010': {
+ 'name': 'Scranton Telephone Company'
+ },
+ 'selco': {
+ 'name': 'SELCO'
+ },
+ 'she010': {
+ 'name': 'Shentel'
+ },
+ 'she030': {
+ 'name': 'Sherwood Mutual Telephone Association, Inc.'
+ },
+ 'ind060-ssc': {
+ 'name': 'Silver Star Communications'
+ },
+ 'sjoberg': {
+ 'name': 'Sjoberg\'s Inc.'
+ },
+ 'sou025': {
+ 'name': 'SKT'
+ },
+ 'sky050': {
+ 'name': 'SkyBest TV'
+ },
+ 'nttcsmi010': {
+ 'name': 'Smithville Communications'
+ },
+ 'woo010': {
+ 'name': 'Solarus'
+ },
+ 'sou075': {
+ 'name': 'South Central Rural Telephone Cooperative'
+ },
+ 'sou065': {
+ 'name': 'South Holt Cablevision, Inc.'
+ },
+ 'sou035': {
+ 'name': 'South Slope Cooperative Communications'
+ },
+ 'spa020': {
+ 'name': 'Spanish Fork Community Network'
+ },
+ 'spe010': {
+ 'name': 'Spencer Municipal Utilities'
+ },
+ 'spi005': {
+ 'name': 'Spillway Communications, Inc.'
+ },
+ 'srt010': {
+ 'name': 'SRT'
+ },
+ 'cccsmc010': {
+ 'name': 'St. Maarten Cable TV'
+ },
+ 'sta025': {
+ 'name': 'Star Communications'
+ },
+ 'sco020': {
+ 'name': 'STE'
+ },
+ 'uin010': {
+ 'name': 'STRATA Networks'
+ },
+ 'sum010': {
+ 'name': 'Sumner Cable TV'
+ },
+ 'pie010': {
+ 'name': 'Surry TV/PCSI TV'
+ },
+ 'swa010': {
+ 'name': 'Swayzee Communications'
+ },
+ 'sweetwater': {
+ 'name': 'Sweetwater Cable Television Co'
+ },
+ 'weh010-talequah': {
+ 'name': 'Tahlequah Cable TV'
+ },
+ 'tct': {
+ 'name': 'TCT'
+ },
+ 'tel050': {
+ 'name': 'Tele-Media Company'
+ },
+ 'com050': {
+ 'name': 'The Community Agency'
+ },
+ 'thr020': {
+ 'name': 'Three River'
+ },
+ 'cab140': {
+ 'name': 'Town & Country Technologies'
+ },
+ 'tra010': {
+ 'name': 'Trans-Video'
+ },
+ 'tre010': {
+ 'name': 'Trenton TV Cable Company'
+ },
+ 'tcc': {
+ 'name': 'Tri County Communications Cooperative'
+ },
+ 'tri025': {
+ 'name': 'TriCounty Telecom'
+ },
+ 'tri110': {
+ 'name': 'TrioTel Communications, Inc.'
+ },
+ 'tro010': {
+ 'name': 'Troy Cablevision, Inc.'
+ },
+ 'tsc': {
+ 'name': 'TSC'
+ },
+ 'cit220': {
+ 'name': 'Tullahoma Utilities Board'
+ },
+ 'tvc030': {
+ 'name': 'TV Cable of Rensselaer'
+ },
+ 'tvc015': {
+ 'name': 'TVC Cable'
+ },
+ 'cab180': {
+ 'name': 'TVision'
+ },
+ 'twi040': {
+ 'name': 'Twin Lakes'
+ },
+ 'tvtinc': {
+ 'name': 'Twin Valley'
+ },
+ 'uis010': {
+ 'name': 'Union Telephone Company'
+ },
+ 'uni110': {
+ 'name': 'United Communications - TN'
+ },
+ 'uni120': {
+ 'name': 'United Services'
+ },
+ 'uss020': {
+ 'name': 'US Sonet'
+ },
+ 'cab060': {
+ 'name': 'USA Communications'
+ },
+ 'she005': {
+ 'name': 'USA Communications/Shellsburg, IA'
+ },
+ 'val040': {
+ 'name': 'Valley TeleCom Group'
+ },
+ 'val025': {
+ 'name': 'Valley Telecommunications'
+ },
+ 'val030': {
+ 'name': 'Valparaiso Broadband'
+ },
+ 'cla050': {
+ 'name': 'Vast Broadband'
+ },
+ 'sul015': {
+ 'name': 'Venture Communications Cooperative, Inc.'
+ },
+ 'ver025': {
+ 'name': 'Vernon Communications Co-op'
+ },
+ 'weh010-vicksburg': {
+ 'name': 'Vicksburg Video'
+ },
+ 'vis070': {
+ 'name': 'Vision Communications'
+ },
+ 'volcanotel': {
+ 'name': 'Volcano Vision, Inc.'
+ },
+ 'vol040-02': {
+ 'name': 'VolFirst / BLTV'
+ },
+ 'ver070': {
+ 'name': 'VTel'
+ },
+ 'nttcvtx010': {
+ 'name': 'VTX1'
+ },
+ 'bci010-02': {
+ 'name': 'Vyve Broadband'
+ },
+ 'wab020': {
+ 'name': 'Wabash Mutual Telephone'
+ },
+ 'waitsfield': {
+ 'name': 'Waitsfield Cable'
+ },
+ 'wal010': {
+ 'name': 'Walnut Communications'
+ },
+ 'wavebroadband': {
+ 'name': 'Wave'
+ },
+ 'wav030': {
+ 'name': 'Waverly Communications Utility'
+ },
+ 'wbi010': {
+ 'name': 'WBI'
+ },
+ 'web020': {
+ 'name': 'Webster-Calhoun Cooperative Telephone Association'
+ },
+ 'wes005': {
+ 'name': 'West Alabama TV Cable'
+ },
+ 'carolinata': {
+ 'name': 'West Carolina Communications'
+ },
+ 'wct010': {
+ 'name': 'West Central Telephone Association'
+ },
+ 'wes110': {
+ 'name': 'West River Cooperative Telephone Company'
+ },
+ 'ani030': {
+ 'name': 'WesTel Systems'
+ },
+ 'westianet': {
+ 'name': 'Western Iowa Networks'
+ },
+ 'nttcwhi010': {
+ 'name': 'Whidbey Telecom'
+ },
+ 'weh010-white': {
+ 'name': 'White County Cable TV'
+ },
+ 'wes130': {
+ 'name': 'Wiatel'
+ },
+ 'wik010': {
+ 'name': 'Wiktel'
+ },
+ 'wil070': {
+ 'name': 'Wilkes Communications, Inc./RiverStreet Networks'
+ },
+ 'wil015': {
+ 'name': 'Wilson Communications'
+ },
+ 'win010': {
+ 'name': 'Windomnet/SMBS'
+ },
+ 'win090': {
+ 'name': 'Windstream Cable TV'
+ },
+ 'wcta': {
+ 'name': 'Winnebago Cooperative Telecom Association'
+ },
+ 'wtc010': {
+ 'name': 'WTC'
+ },
+ 'wil040': {
+ 'name': 'WTC Communications, Inc.'
+ },
+ 'wya010': {
+ 'name': 'Wyandotte Cable'
+ },
+ 'hin020-02': {
+ 'name': 'X-Stream Services'
+ },
+ 'xit010': {
+ 'name': 'XIT Communications'
+ },
+ 'yel010': {
+ 'name': 'Yelcot Communications'
+ },
+ 'mid180-01': {
+ 'name': 'yondoo'
+ },
+ 'cou060': {
+ 'name': 'Zito Media'
+ },
+ 'slingtv': {
+ 'name': 'Sling TV',
+ 'username_field': 'username',
+ 'password_field': 'password',
+ },
+ 'Suddenlink': {
+ 'name': 'Suddenlink',
+ 'username_field': 'username',
+ 'password_field': 'password',
+ },
+ 'AlticeOne': {
+ 'name': 'Optimum TV',
+ 'username_field': 'j_username',
+ 'password_field': 'j_password',
+ },
+}
+
+
+class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
+ _SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s'
+ _USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0'
+ _MVPD_CACHE = 'ap-mvpd'
+
+ _DOWNLOADING_LOGIN_PAGE = 'Downloading Provider Login Page'
+
+ def _download_webpage_handle(self, *args, **kwargs):
+ headers = self.geo_verification_headers()
+ headers.update(kwargs.get('headers', {}))
+ kwargs['headers'] = headers
+ return super(AdobePassIE, self)._download_webpage_handle(
+ *args, **kwargs)
+
+ @staticmethod
+ def _get_mvpd_resource(provider_id, title, guid, rating):
+ channel = etree.Element('channel')
+ channel_title = etree.SubElement(channel, 'title')
+ channel_title.text = provider_id
+ item = etree.SubElement(channel, 'item')
+ resource_title = etree.SubElement(item, 'title')
+ resource_title.text = title
+ resource_guid = etree.SubElement(item, 'guid')
+ resource_guid.text = guid
+ resource_rating = etree.SubElement(item, 'media:rating')
+ resource_rating.attrib = {'scheme': 'urn:v-chip'}
+ resource_rating.text = rating
+ return '<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/">' + etree.tostring(channel).decode() + '</rss>'
+
+ def _extract_mvpd_auth(self, url, video_id, requestor_id, resource):
+ def xml_text(xml_str, tag):
+ return self._search_regex(
+ '<%s>(.+?)</%s>' % (tag, tag), xml_str, tag)
+
+ def is_expired(token, date_ele):
+ token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(token, date_ele)))
+ return token_expires and token_expires <= int(time.time())
+
+ def post_form(form_page_res, note, data={}):
+ form_page, urlh = form_page_res
+ post_url = self._html_search_regex(r'<form[^>]+action=(["\'])(?P<url>.+?)\1', form_page, 'post url', group='url')
+ if not re.match(r'https?://', post_url):
+ post_url = compat_urlparse.urljoin(urlh.url, post_url)
+ form_data = self._hidden_inputs(form_page)
+ form_data.update(data)
+ return self._download_webpage_handle(
+ post_url, video_id, note, data=urlencode_postdata(form_data), headers={
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ })
+
+ def raise_mvpd_required():
+ raise ExtractorError(
+ 'This video is only available for users of participating TV providers. '
+ 'Use --ap-mso to specify Adobe Pass Multiple-system operator Identifier '
+ 'and --ap-username and --ap-password or --netrc to provide account credentials.', expected=True)
+
+ def extract_redirect_url(html, url=None, fatal=False):
+ # TODO: eliminate code duplication with generic extractor and move
+ # redirection code into _download_webpage_handle
+ REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
+ redirect_url = self._search_regex(
+ r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
+ r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
+ html, 'meta refresh redirect',
+ default=NO_DEFAULT if fatal else None, fatal=fatal)
+ if not redirect_url:
+ return None
+ if url:
+ redirect_url = compat_urlparse.urljoin(url, unescapeHTML(redirect_url))
+ return redirect_url
+
+ mvpd_headers = {
+ 'ap_42': 'anonymous',
+ 'ap_11': 'Linux i686',
+ 'ap_z': self._USER_AGENT,
+ 'User-Agent': self._USER_AGENT,
+ }
+
+ guid = xml_text(resource, 'guid') if '<' in resource else resource
+ count = 0
+ while count < 2:
+ requestor_info = self.cache.load(self._MVPD_CACHE, requestor_id) or {}
+ authn_token = requestor_info.get('authn_token')
+ if authn_token and is_expired(authn_token, 'simpleTokenExpires'):
+ authn_token = None
+ if not authn_token:
+ mso_id = self.get_param('ap_mso')
+ if mso_id:
+ username, password = self._get_login_info('ap_username', 'ap_password', mso_id)
+ if not username or not password:
+ raise_mvpd_required()
+ mso_info = MSO_INFO[mso_id]
+
+ provider_redirect_page_res = self._download_webpage_handle(
+ self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id,
+ 'Downloading Provider Redirect Page', query={
+ 'noflash': 'true',
+ 'mso_id': mso_id,
+ 'requestor_id': requestor_id,
+ 'no_iframe': 'false',
+ 'domain_name': 'adobe.com',
+ 'redirect_url': url,
+ })
+ elif not self._cookies_passed:
+ raise_mvpd_required()
+
+ if not mso_id:
+ pass
+ elif mso_id == 'Comcast_SSO':
+ # Comcast page flow varies by video site and whether you
+ # are on Comcast's network.
+ provider_redirect_page, urlh = provider_redirect_page_res
+ if 'automatically signing you in' in provider_redirect_page:
+ oauth_redirect_url = self._html_search_regex(
+ r'window\.location\s*=\s*[\'"]([^\'"]+)',
+ provider_redirect_page, 'oauth redirect')
+ self._download_webpage(
+ oauth_redirect_url, video_id, 'Confirming auto login')
+ elif 'automatically signed in with' in provider_redirect_page:
+ # Seems like comcast is rolling up new way of automatically signing customers
+ oauth_redirect_url = self._html_search_regex(
+ r'continue:\s*"(https://oauth\.xfinity\.com/oauth/authorize\?.+)"', provider_redirect_page,
+ 'oauth redirect (signed)')
+ # Just need to process the request. No useful data comes back
+ self._download_webpage(oauth_redirect_url, video_id, 'Confirming auto login')
+ else:
+ if '<form name="signin"' in provider_redirect_page:
+ provider_login_page_res = provider_redirect_page_res
+ elif 'http-equiv="refresh"' in provider_redirect_page:
+ oauth_redirect_url = extract_redirect_url(
+ provider_redirect_page, fatal=True)
+ provider_login_page_res = self._download_webpage_handle(
+ oauth_redirect_url, video_id,
+ self._DOWNLOADING_LOGIN_PAGE)
+ else:
+ provider_login_page_res = post_form(
+ provider_redirect_page_res,
+ self._DOWNLOADING_LOGIN_PAGE)
+
+ mvpd_confirm_page_res = post_form(
+ provider_login_page_res, 'Logging in', {
+ mso_info['username_field']: username,
+ mso_info['password_field']: password,
+ })
+ mvpd_confirm_page, urlh = mvpd_confirm_page_res
+ if '<button class="submit" value="Resume">Resume</button>' in mvpd_confirm_page:
+ post_form(mvpd_confirm_page_res, 'Confirming Login')
+ elif mso_id == 'Philo':
+ # Philo has very unique authentication method
+ self._download_webpage(
+ 'https://idp.philo.com/auth/init/login_code', video_id, 'Requesting auth code', data=urlencode_postdata({
+ 'ident': username,
+ 'device': 'web',
+ 'send_confirm_link': False,
+ 'send_token': True
+ }))
+ philo_code = getpass.getpass('Type auth code you have received [Return]: ')
+ self._download_webpage(
+ 'https://idp.philo.com/auth/update/login_code', video_id, 'Submitting token', data=urlencode_postdata({
+ 'token': philo_code
+ }))
+ mvpd_confirm_page_res = self._download_webpage_handle('https://idp.philo.com/idp/submit', video_id, 'Confirming Philo Login')
+ post_form(mvpd_confirm_page_res, 'Confirming Login')
+ elif mso_id == 'Verizon':
+ # In general, if you're connecting from a Verizon-assigned IP,
+ # you will not actually pass your credentials.
+ provider_redirect_page, urlh = provider_redirect_page_res
+ # From non-Verizon IP, still gave 'Please wait', but noticed N==Y; will need to try on Verizon IP
+ if 'Please wait ...' in provider_redirect_page and '\'N\'== "Y"' not in provider_redirect_page:
+ saml_redirect_url = self._html_search_regex(
+ r'self\.parent\.location=(["\'])(?P<url>.+?)\1',
+ provider_redirect_page,
+ 'SAML Redirect URL', group='url')
+ saml_login_page = self._download_webpage(
+ saml_redirect_url, video_id,
+ 'Downloading SAML Login Page')
+ elif 'Verizon FiOS - sign in' in provider_redirect_page:
+ # FXNetworks from non-Verizon IP
+ saml_login_page_res = post_form(
+ provider_redirect_page_res, 'Logging in', {
+ mso_info['username_field']: username,
+ mso_info['password_field']: password,
+ })
+ saml_login_page, urlh = saml_login_page_res
+ if 'Please try again.' in saml_login_page:
+ raise ExtractorError(
+ 'We\'re sorry, but either the User ID or Password entered is not correct.')
+ else:
+ # ABC from non-Verizon IP
+ saml_redirect_url = self._html_search_regex(
+ r'var\surl\s*=\s*(["\'])(?P<url>.+?)\1',
+ provider_redirect_page,
+ 'SAML Redirect URL', group='url')
+ saml_redirect_url = saml_redirect_url.replace(r'\/', '/')
+ saml_redirect_url = saml_redirect_url.replace(r'\-', '-')
+ saml_redirect_url = saml_redirect_url.replace(r'\x26', '&')
+ saml_login_page = self._download_webpage(
+ saml_redirect_url, video_id,
+ 'Downloading SAML Login Page')
+ saml_login_page, urlh = post_form(
+ [saml_login_page, saml_redirect_url], 'Logging in', {
+ mso_info['username_field']: username,
+ mso_info['password_field']: password,
+ })
+ if 'Please try again.' in saml_login_page:
+ raise ExtractorError(
+ 'Failed to login, incorrect User ID or Password.')
+ saml_login_url = self._search_regex(
+ r'xmlHttp\.open\("POST"\s*,\s*(["\'])(?P<url>.+?)\1',
+ saml_login_page, 'SAML Login URL', group='url')
+ saml_response_json = self._download_json(
+ saml_login_url, video_id, 'Downloading SAML Response',
+ headers={'Content-Type': 'text/xml'})
+ self._download_webpage(
+ saml_response_json['targetValue'], video_id,
+ 'Confirming Login', data=urlencode_postdata({
+ 'SAMLResponse': saml_response_json['SAMLResponse'],
+ 'RelayState': saml_response_json['RelayState']
+ }), headers={
+ 'Content-Type': 'application/x-www-form-urlencoded'
+ })
+ elif mso_id in ('Spectrum', 'Charter_Direct'):
+ # Spectrum's login for is dynamically loaded via JS so we need to hardcode the flow
+ # as a one-off implementation.
+ provider_redirect_page, urlh = provider_redirect_page_res
+ provider_login_page_res = post_form(
+ provider_redirect_page_res, self._DOWNLOADING_LOGIN_PAGE)
+ saml_login_page, urlh = provider_login_page_res
+ relay_state = self._search_regex(
+ r'RelayState\s*=\s*"(?P<relay>.+?)";',
+ saml_login_page, 'RelayState', group='relay')
+ saml_request = self._search_regex(
+ r'SAMLRequest\s*=\s*"(?P<saml_request>.+?)";',
+ saml_login_page, 'SAMLRequest', group='saml_request')
+ login_json = {
+ mso_info['username_field']: username,
+ mso_info['password_field']: password,
+ 'RelayState': relay_state,
+ 'SAMLRequest': saml_request,
+ }
+ saml_response_json = self._download_json(
+ 'https://tveauthn.spectrum.net/tveauthentication/api/v1/manualAuth', video_id,
+ 'Downloading SAML Response',
+ data=json.dumps(login_json).encode(),
+ headers={
+ 'Content-Type': 'application/json',
+ 'Accept': 'application/json',
+ })
+ self._download_webpage(
+ saml_response_json['SAMLRedirectUri'], video_id,
+ 'Confirming Login', data=urlencode_postdata({
+ 'SAMLResponse': saml_response_json['SAMLResponse'],
+ 'RelayState': relay_state,
+ }), headers={
+ 'Content-Type': 'application/x-www-form-urlencoded'
+ })
+ elif mso_id == 'slingtv':
+ # SlingTV has a meta-refresh based authentication, but also
+ # looks at the tab history to count the number of times the
+ # browser has been on a page
+
+ first_bookend_page, urlh = provider_redirect_page_res
+
+ hidden_data = self._hidden_inputs(first_bookend_page)
+ hidden_data['history'] = 1
+
+ provider_login_page_res = self._download_webpage_handle(
+ urlh.url, video_id, 'Sending first bookend',
+ query=hidden_data)
+
+ provider_association_redirect, urlh = post_form(
+ provider_login_page_res, 'Logging in', {
+ mso_info['username_field']: username,
+ mso_info['password_field']: password
+ })
+
+ provider_refresh_redirect_url = extract_redirect_url(
+ provider_association_redirect, url=urlh.url)
+
+ last_bookend_page, urlh = self._download_webpage_handle(
+ provider_refresh_redirect_url, video_id,
+ 'Downloading Auth Association Redirect Page')
+ hidden_data = self._hidden_inputs(last_bookend_page)
+ hidden_data['history'] = 3
+
+ mvpd_confirm_page_res = self._download_webpage_handle(
+ urlh.url, video_id, 'Sending final bookend',
+ query=hidden_data)
+
+ post_form(mvpd_confirm_page_res, 'Confirming Login')
+ elif mso_id == 'Suddenlink':
+ # Suddenlink is similar to SlingTV in using a tab history count and a meta refresh,
+ # but they also do a dynmaic redirect using javascript that has to be followed as well
+ first_bookend_page, urlh = post_form(
+ provider_redirect_page_res, 'Pressing Continue...')
+
+ hidden_data = self._hidden_inputs(first_bookend_page)
+ hidden_data['history_val'] = 1
+
+ provider_login_redirect_page_res = self._download_webpage_handle(
+ urlh.url, video_id, 'Sending First Bookend',
+ query=hidden_data)
+
+ provider_login_redirect_page, urlh = provider_login_redirect_page_res
+
+ # Some website partners seem to not have the extra ajaxurl redirect step, so we check if we already
+ # have the login prompt or not
+ if 'id="password" type="password" name="password"' in provider_login_redirect_page:
+ provider_login_page_res = provider_login_redirect_page_res
+ else:
+ provider_tryauth_url = self._html_search_regex(
+ r'url:\s*[\'"]([^\'"]+)', provider_login_redirect_page, 'ajaxurl')
+ provider_tryauth_page = self._download_webpage(
+ provider_tryauth_url, video_id, 'Submitting TryAuth',
+ query=hidden_data)
+
+ provider_login_page_res = self._download_webpage_handle(
+ f'https://authorize.suddenlink.net/saml/module.php/authSynacor/login.php?AuthState={provider_tryauth_page}',
+ video_id, 'Getting Login Page',
+ query=hidden_data)
+
+ provider_association_redirect, urlh = post_form(
+ provider_login_page_res, 'Logging in', {
+ mso_info['username_field']: username,
+ mso_info['password_field']: password
+ })
+
+ provider_refresh_redirect_url = extract_redirect_url(
+ provider_association_redirect, url=urlh.url)
+
+ last_bookend_page, urlh = self._download_webpage_handle(
+ provider_refresh_redirect_url, video_id,
+ 'Downloading Auth Association Redirect Page')
+
+ hidden_data = self._hidden_inputs(last_bookend_page)
+ hidden_data['history_val'] = 3
+
+ mvpd_confirm_page_res = self._download_webpage_handle(
+ urlh.url, video_id, 'Sending Final Bookend',
+ query=hidden_data)
+
+ post_form(mvpd_confirm_page_res, 'Confirming Login')
+ else:
+ # Some providers (e.g. DIRECTV NOW) have another meta refresh
+ # based redirect that should be followed.
+ provider_redirect_page, urlh = provider_redirect_page_res
+ provider_refresh_redirect_url = extract_redirect_url(
+ provider_redirect_page, url=urlh.url)
+ if provider_refresh_redirect_url:
+ provider_redirect_page_res = self._download_webpage_handle(
+ provider_refresh_redirect_url, video_id,
+ 'Downloading Provider Redirect Page (meta refresh)')
+ provider_login_page_res = post_form(
+ provider_redirect_page_res, self._DOWNLOADING_LOGIN_PAGE)
+ form_data = {
+ mso_info.get('username_field', 'username'): username,
+ mso_info.get('password_field', 'password'): password
+ }
+ if mso_id in ('Cablevision', 'AlticeOne'):
+ form_data['_eventId_proceed'] = ''
+ mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', form_data)
+ if mso_id != 'Rogers':
+ post_form(mvpd_confirm_page_res, 'Confirming Login')
+
+ try:
+ session = self._download_webpage(
+ self._SERVICE_PROVIDER_TEMPLATE % 'session', video_id,
+ 'Retrieving Session', data=urlencode_postdata({
+ '_method': 'GET',
+ 'requestor_id': requestor_id,
+ }), headers=mvpd_headers)
+ except ExtractorError as e:
+ if not mso_id and isinstance(e.cause, HTTPError) and e.cause.status == 401:
+ raise_mvpd_required()
+ raise
+ if '<pendingLogout' in session:
+ self.cache.store(self._MVPD_CACHE, requestor_id, {})
+ count += 1
+ continue
+ authn_token = unescapeHTML(xml_text(session, 'authnToken'))
+ requestor_info['authn_token'] = authn_token
+ self.cache.store(self._MVPD_CACHE, requestor_id, requestor_info)
+
+ authz_token = requestor_info.get(guid)
+ if authz_token and is_expired(authz_token, 'simpleTokenTTL'):
+ authz_token = None
+ if not authz_token:
+ authorize = self._download_webpage(
+ self._SERVICE_PROVIDER_TEMPLATE % 'authorize', video_id,
+ 'Retrieving Authorization Token', data=urlencode_postdata({
+ 'resource_id': resource,
+ 'requestor_id': requestor_id,
+ 'authentication_token': authn_token,
+ 'mso_id': xml_text(authn_token, 'simpleTokenMsoID'),
+ 'userMeta': '1',
+ }), headers=mvpd_headers)
+ if '<pendingLogout' in authorize:
+ self.cache.store(self._MVPD_CACHE, requestor_id, {})
+ count += 1
+ continue
+ if '<error' in authorize:
+ raise ExtractorError(xml_text(authorize, 'details'), expected=True)
+ authz_token = unescapeHTML(xml_text(authorize, 'authzToken'))
+ requestor_info[guid] = authz_token
+ self.cache.store(self._MVPD_CACHE, requestor_id, requestor_info)
+
+ mvpd_headers.update({
+ 'ap_19': xml_text(authn_token, 'simpleSamlNameID'),
+ 'ap_23': xml_text(authn_token, 'simpleSamlSessionIndex'),
+ })
+
+ short_authorize = self._download_webpage(
+ self._SERVICE_PROVIDER_TEMPLATE % 'shortAuthorize',
+ video_id, 'Retrieving Media Token', data=urlencode_postdata({
+ 'authz_token': authz_token,
+ 'requestor_id': requestor_id,
+ 'session_guid': xml_text(authn_token, 'simpleTokenAuthenticationGuid'),
+ 'hashed_guid': 'false',
+ }), headers=mvpd_headers)
+ if '<pendingLogout' in short_authorize:
+ self.cache.store(self._MVPD_CACHE, requestor_id, {})
+ count += 1
+ continue
+ return short_authorize
diff --git a/yt_dlp/extractor/adobetv.py b/yt_dlp/extractor/adobetv.py
new file mode 100644
index 0000000..d1525a1
--- /dev/null
+++ b/yt_dlp/extractor/adobetv.py
@@ -0,0 +1,286 @@
+import functools
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ float_or_none,
+ int_or_none,
+ ISO639Utils,
+ join_nonempty,
+ OnDemandPagedList,
+ parse_duration,
+ str_or_none,
+ str_to_int,
+ unified_strdate,
+)
+
+
+class AdobeTVBaseIE(InfoExtractor):
+ def _call_api(self, path, video_id, query, note=None):
+ return self._download_json(
+ 'http://tv.adobe.com/api/v4/' + path,
+ video_id, note, query=query)['data']
+
+ def _parse_subtitles(self, video_data, url_key):
+ subtitles = {}
+ for translation in video_data.get('translations', []):
+ vtt_path = translation.get(url_key)
+ if not vtt_path:
+ continue
+ lang = translation.get('language_w3c') or ISO639Utils.long2short(translation['language_medium'])
+ subtitles.setdefault(lang, []).append({
+ 'ext': 'vtt',
+ 'url': vtt_path,
+ })
+ return subtitles
+
+ def _parse_video_data(self, video_data):
+ video_id = compat_str(video_data['id'])
+ title = video_data['title']
+
+ s3_extracted = False
+ formats = []
+ for source in video_data.get('videos', []):
+ source_url = source.get('url')
+ if not source_url:
+ continue
+ f = {
+ 'format_id': source.get('quality_level'),
+ 'fps': int_or_none(source.get('frame_rate')),
+ 'height': int_or_none(source.get('height')),
+ 'tbr': int_or_none(source.get('video_data_rate')),
+ 'width': int_or_none(source.get('width')),
+ 'url': source_url,
+ }
+ original_filename = source.get('original_filename')
+ if original_filename:
+ if not (f.get('height') and f.get('width')):
+ mobj = re.search(r'_(\d+)x(\d+)', original_filename)
+ if mobj:
+ f.update({
+ 'height': int(mobj.group(2)),
+ 'width': int(mobj.group(1)),
+ })
+ if original_filename.startswith('s3://') and not s3_extracted:
+ formats.append({
+ 'format_id': 'original',
+ 'quality': 1,
+ 'url': original_filename.replace('s3://', 'https://s3.amazonaws.com/'),
+ })
+ s3_extracted = True
+ formats.append(f)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video_data.get('description'),
+ 'thumbnail': video_data.get('thumbnail'),
+ 'upload_date': unified_strdate(video_data.get('start_date')),
+ 'duration': parse_duration(video_data.get('duration')),
+ 'view_count': str_to_int(video_data.get('playcount')),
+ 'formats': formats,
+ 'subtitles': self._parse_subtitles(video_data, 'vtt'),
+ }
+
+
+class AdobeTVEmbedIE(AdobeTVBaseIE):
+ IE_NAME = 'adobetv:embed'
+ _VALID_URL = r'https?://tv\.adobe\.com/embed/\d+/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://tv.adobe.com/embed/22/4153',
+ 'md5': 'c8c0461bf04d54574fc2b4d07ac6783a',
+ 'info_dict': {
+ 'id': '4153',
+ 'ext': 'flv',
+ 'title': 'Creating Graphics Optimized for BlackBerry',
+ 'description': 'md5:eac6e8dced38bdaae51cd94447927459',
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'upload_date': '20091109',
+ 'duration': 377,
+ 'view_count': int,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video_data = self._call_api(
+ 'episode/' + video_id, video_id, {'disclosure': 'standard'})[0]
+ return self._parse_video_data(video_data)
+
+
+class AdobeTVIE(AdobeTVBaseIE):
+ IE_NAME = 'adobetv'
+ _VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?watch/(?P<show_urlname>[^/]+)/(?P<id>[^/]+)'
+
+ _TEST = {
+ 'url': 'http://tv.adobe.com/watch/the-complete-picture-with-julieanne-kost/quick-tip-how-to-draw-a-circle-around-an-object-in-photoshop/',
+ 'md5': '9bc5727bcdd55251f35ad311ca74fa1e',
+ 'info_dict': {
+ 'id': '10981',
+ 'ext': 'mp4',
+ 'title': 'Quick Tip - How to Draw a Circle Around an Object in Photoshop',
+ 'description': 'md5:99ec318dc909d7ba2a1f2b038f7d2311',
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'upload_date': '20110914',
+ 'duration': 60,
+ 'view_count': int,
+ },
+ }
+
+ def _real_extract(self, url):
+ language, show_urlname, urlname = self._match_valid_url(url).groups()
+ if not language:
+ language = 'en'
+
+ video_data = self._call_api(
+ 'episode/get', urlname, {
+ 'disclosure': 'standard',
+ 'language': language,
+ 'show_urlname': show_urlname,
+ 'urlname': urlname,
+ })[0]
+ return self._parse_video_data(video_data)
+
+
+class AdobeTVPlaylistBaseIE(AdobeTVBaseIE):
+ _PAGE_SIZE = 25
+
+ def _fetch_page(self, display_id, query, page):
+ page += 1
+ query['page'] = page
+ for element_data in self._call_api(
+ self._RESOURCE, display_id, query, 'Download Page %d' % page):
+ yield self._process_data(element_data)
+
+ def _extract_playlist_entries(self, display_id, query):
+ return OnDemandPagedList(functools.partial(
+ self._fetch_page, display_id, query), self._PAGE_SIZE)
+
+
+class AdobeTVShowIE(AdobeTVPlaylistBaseIE):
+ IE_NAME = 'adobetv:show'
+ _VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?show/(?P<id>[^/]+)'
+
+ _TEST = {
+ 'url': 'http://tv.adobe.com/show/the-complete-picture-with-julieanne-kost',
+ 'info_dict': {
+ 'id': '36',
+ 'title': 'The Complete Picture with Julieanne Kost',
+ 'description': 'md5:fa50867102dcd1aa0ddf2ab039311b27',
+ },
+ 'playlist_mincount': 136,
+ }
+ _RESOURCE = 'episode'
+ _process_data = AdobeTVBaseIE._parse_video_data
+
+ def _real_extract(self, url):
+ language, show_urlname = self._match_valid_url(url).groups()
+ if not language:
+ language = 'en'
+ query = {
+ 'disclosure': 'standard',
+ 'language': language,
+ 'show_urlname': show_urlname,
+ }
+
+ show_data = self._call_api(
+ 'show/get', show_urlname, query)[0]
+
+ return self.playlist_result(
+ self._extract_playlist_entries(show_urlname, query),
+ str_or_none(show_data.get('id')),
+ show_data.get('show_name'),
+ show_data.get('show_description'))
+
+
+class AdobeTVChannelIE(AdobeTVPlaylistBaseIE):
+ IE_NAME = 'adobetv:channel'
+ _VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?channel/(?P<id>[^/]+)(?:/(?P<category_urlname>[^/]+))?'
+
+ _TEST = {
+ 'url': 'http://tv.adobe.com/channel/development',
+ 'info_dict': {
+ 'id': 'development',
+ },
+ 'playlist_mincount': 96,
+ }
+ _RESOURCE = 'show'
+
+ def _process_data(self, show_data):
+ return self.url_result(
+ show_data['url'], 'AdobeTVShow', str_or_none(show_data.get('id')))
+
+ def _real_extract(self, url):
+ language, channel_urlname, category_urlname = self._match_valid_url(url).groups()
+ if not language:
+ language = 'en'
+ query = {
+ 'channel_urlname': channel_urlname,
+ 'language': language,
+ }
+ if category_urlname:
+ query['category_urlname'] = category_urlname
+
+ return self.playlist_result(
+ self._extract_playlist_entries(channel_urlname, query),
+ channel_urlname)
+
+
+class AdobeTVVideoIE(AdobeTVBaseIE):
+ IE_NAME = 'adobetv:video'
+ _VALID_URL = r'https?://video\.tv\.adobe\.com/v/(?P<id>\d+)'
+ _EMBED_REGEX = [r'<iframe[^>]+src=[\'"](?P<url>(?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]']
+
+ _TEST = {
+ # From https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners
+ 'url': 'https://video.tv.adobe.com/v/2456/',
+ 'md5': '43662b577c018ad707a63766462b1e87',
+ 'info_dict': {
+ 'id': '2456',
+ 'ext': 'mp4',
+ 'title': 'New experience with Acrobat DC',
+ 'description': 'New experience with Acrobat DC',
+ 'duration': 248.667,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ video_data = self._parse_json(self._search_regex(
+ r'var\s+bridge\s*=\s*([^;]+);', webpage, 'bridged data'), video_id)
+ title = video_data['title']
+
+ formats = []
+ sources = video_data.get('sources') or []
+ for source in sources:
+ source_src = source.get('src')
+ if not source_src:
+ continue
+ formats.append({
+ 'filesize': int_or_none(source.get('kilobytes') or None, invscale=1000),
+ 'format_id': join_nonempty(source.get('format'), source.get('label')),
+ 'height': int_or_none(source.get('height') or None),
+ 'tbr': int_or_none(source.get('bitrate') or None),
+ 'width': int_or_none(source.get('width') or None),
+ 'url': source_src,
+ })
+
+ # For both metadata and downloaded files the duration varies among
+ # formats. I just pick the max one
+ duration = max(filter(None, [
+ float_or_none(source.get('duration'), scale=1000)
+ for source in sources]))
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': title,
+ 'description': video_data.get('description'),
+ 'thumbnail': video_data.get('video', {}).get('poster'),
+ 'duration': duration,
+ 'subtitles': self._parse_subtitles(video_data, 'vttPath'),
+ }
diff --git a/yt_dlp/extractor/adultswim.py b/yt_dlp/extractor/adultswim.py
new file mode 100644
index 0000000..d807c41
--- /dev/null
+++ b/yt_dlp/extractor/adultswim.py
@@ -0,0 +1,198 @@
+import json
+
+from .turner import TurnerBaseIE
+from ..utils import (
+ determine_ext,
+ float_or_none,
+ int_or_none,
+ mimetype2ext,
+ parse_age_limit,
+ parse_iso8601,
+ strip_or_none,
+ try_get,
+)
+
+
+class AdultSwimIE(TurnerBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?adultswim\.com/videos/(?P<show_path>[^/?#]+)(?:/(?P<episode_path>[^/?#]+))?'
+
+ _TESTS = [{
+ 'url': 'http://adultswim.com/videos/rick-and-morty/pilot',
+ 'info_dict': {
+ 'id': 'rQxZvXQ4ROaSOqq-or2Mow',
+ 'ext': 'mp4',
+ 'title': 'Rick and Morty - Pilot',
+ 'description': 'Rick moves in with his daughter\'s family and establishes himself as a bad influence on his grandson, Morty.',
+ 'timestamp': 1543294800,
+ 'upload_date': '20181127',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Unable to download f4m manifest'],
+ }, {
+ 'url': 'http://www.adultswim.com/videos/tim-and-eric-awesome-show-great-job/dr-steve-brule-for-your-wine/',
+ 'info_dict': {
+ 'id': 'sY3cMUR_TbuE4YmdjzbIcQ',
+ 'ext': 'mp4',
+ 'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
+ 'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.',
+ 'upload_date': '20080124',
+ 'timestamp': 1201150800,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'skip': '404 Not Found',
+ }, {
+ 'url': 'http://www.adultswim.com/videos/decker/inside-decker-a-new-hero/',
+ 'info_dict': {
+ 'id': 'I0LQFQkaSUaFp8PnAWHhoQ',
+ 'ext': 'mp4',
+ 'title': 'Decker - Inside Decker: A New Hero',
+ 'description': 'The guys recap the conclusion of the season. They announce a new hero, take a peek into the Victorville Film Archive and welcome back the talented James Dean.',
+ 'timestamp': 1469480460,
+ 'upload_date': '20160725',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Unable to download f4m manifest'],
+ }, {
+ 'url': 'http://www.adultswim.com/videos/attack-on-titan',
+ 'info_dict': {
+ 'id': 'attack-on-titan',
+ 'title': 'Attack on Titan',
+ 'description': 'md5:41caa9416906d90711e31dc00cb7db7e',
+ },
+ 'playlist_mincount': 12,
+ }, {
+ 'url': 'http://www.adultswim.com/videos/streams/williams-stream',
+ 'info_dict': {
+ 'id': 'd8DEBj7QRfetLsRgFnGEyg',
+ 'ext': 'mp4',
+ 'title': r're:^Williams Stream \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
+ 'description': 'original programming',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'skip': '404 Not Found',
+ }]
+
+ def _real_extract(self, url):
+ show_path, episode_path = self._match_valid_url(url).groups()
+ display_id = episode_path or show_path
+ query = '''query {
+ getShowBySlug(slug:"%s") {
+ %%s
+ }
+}''' % show_path
+ if episode_path:
+ query = query % '''title
+ getVideoBySlug(slug:"%s") {
+ _id
+ auth
+ description
+ duration
+ episodeNumber
+ launchDate
+ mediaID
+ seasonNumber
+ poster
+ title
+ tvRating
+ }''' % episode_path
+ else:
+ query = query % '''metaDescription
+ title
+ videos(first:1000,sort:["episode_number"]) {
+ edges {
+ node {
+ _id
+ slug
+ }
+ }
+ }'''
+ show_data = self._download_json(
+ 'https://www.adultswim.com/api/search', display_id,
+ data=json.dumps({'query': query}).encode(),
+ headers={'Content-Type': 'application/json'})['data']['getShowBySlug']
+ if episode_path:
+ video_data = show_data['getVideoBySlug']
+ video_id = video_data['_id']
+ episode_title = title = video_data['title']
+ series = show_data.get('title')
+ if series:
+ title = '%s - %s' % (series, title)
+ info = {
+ 'id': video_id,
+ 'title': title,
+ 'description': strip_or_none(video_data.get('description')),
+ 'duration': float_or_none(video_data.get('duration')),
+ 'formats': [],
+ 'subtitles': {},
+ 'age_limit': parse_age_limit(video_data.get('tvRating')),
+ 'thumbnail': video_data.get('poster'),
+ 'timestamp': parse_iso8601(video_data.get('launchDate')),
+ 'series': series,
+ 'season_number': int_or_none(video_data.get('seasonNumber')),
+ 'episode': episode_title,
+ 'episode_number': int_or_none(video_data.get('episodeNumber')),
+ }
+
+ auth = video_data.get('auth')
+ media_id = video_data.get('mediaID')
+ if media_id:
+ info.update(self._extract_ngtv_info(media_id, {
+ # CDN_TOKEN_APP_ID from:
+ # https://d2gg02c3xr550i.cloudfront.net/assets/asvp.e9c8bef24322d060ef87.bundle.js
+ 'appId': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhcHBJZCI6ImFzLXR2ZS1kZXNrdG9wLXB0enQ2bSIsInByb2R1Y3QiOiJ0dmUiLCJuZXR3b3JrIjoiYXMiLCJwbGF0Zm9ybSI6ImRlc2t0b3AiLCJpYXQiOjE1MzI3MDIyNzl9.BzSCk-WYOZ2GMCIaeVb8zWnzhlgnXuJTCu0jGp_VaZE',
+ }, {
+ 'url': url,
+ 'site_name': 'AdultSwim',
+ 'auth_required': auth,
+ }))
+
+ if not auth:
+ extract_data = self._download_json(
+ 'https://www.adultswim.com/api/shows/v1/videos/' + video_id,
+ video_id, query={'fields': 'stream'}, fatal=False) or {}
+ assets = try_get(extract_data, lambda x: x['data']['video']['stream']['assets'], list) or []
+ for asset in assets:
+ asset_url = asset.get('url')
+ if not asset_url:
+ continue
+ ext = determine_ext(asset_url, mimetype2ext(asset.get('mime_type')))
+ if ext == 'm3u8':
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ asset_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
+ info['formats'].extend(fmts)
+ self._merge_subtitles(subs, target=info['subtitles'])
+ elif ext == 'f4m':
+ continue
+ # info['formats'].extend(self._extract_f4m_formats(
+ # asset_url, video_id, f4m_id='hds', fatal=False))
+ elif ext in ('scc', 'ttml', 'vtt'):
+ info['subtitles'].setdefault('en', []).append({
+ 'url': asset_url,
+ })
+
+ return info
+ else:
+ entries = []
+ for edge in show_data.get('videos', {}).get('edges', []):
+ video = edge.get('node') or {}
+ slug = video.get('slug')
+ if not slug:
+ continue
+ entries.append(self.url_result(
+ 'http://adultswim.com/videos/%s/%s' % (show_path, slug),
+ 'AdultSwim', video.get('_id')))
+ return self.playlist_result(
+ entries, show_path, show_data.get('title'),
+ strip_or_none(show_data.get('metaDescription')))
diff --git a/yt_dlp/extractor/aenetworks.py b/yt_dlp/extractor/aenetworks.py
new file mode 100644
index 0000000..ab4b6c0
--- /dev/null
+++ b/yt_dlp/extractor/aenetworks.py
@@ -0,0 +1,369 @@
+from .theplatform import ThePlatformIE
+from ..utils import (
+ ExtractorError,
+ GeoRestrictedError,
+ int_or_none,
+ remove_start,
+ traverse_obj,
+ update_url_query,
+ urlencode_postdata,
+)
+
+
+class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
+ _BASE_URL_REGEX = r'''(?x)https?://
+ (?:(?:www|play|watch)\.)?
+ (?P<domain>
+ (?:history(?:vault)?|aetv|mylifetime|lifetimemovieclub)\.com|
+ fyi\.tv
+ )/'''
+ _THEPLATFORM_KEY = '43jXaGRQud'
+ _THEPLATFORM_SECRET = 'S10BPXHMlb'
+ _DOMAIN_MAP = {
+ 'history.com': ('HISTORY', 'history'),
+ 'aetv.com': ('AETV', 'aetv'),
+ 'mylifetime.com': ('LIFETIME', 'lifetime'),
+ 'lifetimemovieclub.com': ('LIFETIMEMOVIECLUB', 'lmc'),
+ 'fyi.tv': ('FYI', 'fyi'),
+ 'historyvault.com': (None, 'historyvault'),
+ 'biography.com': (None, 'biography'),
+ }
+
+ def _extract_aen_smil(self, smil_url, video_id, auth=None):
+ query = {
+ 'mbr': 'true',
+ 'formats': 'M3U+none,MPEG-DASH+none,MPEG4,MP3',
+ }
+ if auth:
+ query['auth'] = auth
+ TP_SMIL_QUERY = [{
+ 'assetTypes': 'high_video_ak',
+ 'switch': 'hls_high_ak',
+ }, {
+ 'assetTypes': 'high_video_s3',
+ }, {
+ 'assetTypes': 'high_video_s3',
+ 'switch': 'hls_high_fastly',
+ }]
+ formats = []
+ subtitles = {}
+ last_e = None
+ for q in TP_SMIL_QUERY:
+ q.update(query)
+ m_url = update_url_query(smil_url, q)
+ m_url = self._sign_url(m_url, self._THEPLATFORM_KEY, self._THEPLATFORM_SECRET)
+ try:
+ tp_formats, tp_subtitles = self._extract_theplatform_smil(
+ m_url, video_id, 'Downloading %s SMIL data' % (q.get('switch') or q['assetTypes']))
+ except ExtractorError as e:
+ if isinstance(e, GeoRestrictedError):
+ raise
+ last_e = e
+ continue
+ formats.extend(tp_formats)
+ subtitles = self._merge_subtitles(subtitles, tp_subtitles)
+ if last_e and not formats:
+ raise last_e
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+ def _extract_aetn_info(self, domain, filter_key, filter_value, url):
+ requestor_id, brand = self._DOMAIN_MAP[domain]
+ result = self._download_json(
+ 'https://feeds.video.aetnd.com/api/v2/%s/videos' % brand,
+ filter_value, query={'filter[%s]' % filter_key: filter_value})
+ result = traverse_obj(
+ result, ('results',
+ lambda k, v: k == 0 and v[filter_key] == filter_value),
+ get_all=False)
+ if not result:
+ raise ExtractorError('Show not found in A&E feed (too new?)', expected=True,
+ video_id=remove_start(filter_value, '/'))
+ title = result['title']
+ video_id = result['id']
+ media_url = result['publicUrl']
+ theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
+ r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
+ info = self._parse_theplatform_metadata(theplatform_metadata)
+ auth = None
+ if theplatform_metadata.get('AETN$isBehindWall'):
+ resource = self._get_mvpd_resource(
+ requestor_id, theplatform_metadata['title'],
+ theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'),
+ traverse_obj(theplatform_metadata, ('ratings', 0, 'rating')))
+ auth = self._extract_mvpd_auth(
+ url, video_id, requestor_id, resource)
+ info.update(self._extract_aen_smil(media_url, video_id, auth))
+ info.update({
+ 'title': title,
+ 'series': result.get('seriesName'),
+ 'season_number': int_or_none(result.get('tvSeasonNumber')),
+ 'episode_number': int_or_none(result.get('tvSeasonEpisodeNumber')),
+ })
+ return info
+
+
+class AENetworksIE(AENetworksBaseIE):
+ IE_NAME = 'aenetworks'
+ IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault'
+ _VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'''(?P<id>
+ shows/[^/]+/season-\d+/episode-\d+|
+ (?:
+ (?:movie|special)s/[^/]+|
+ (?:shows/[^/]+/)?videos
+ )/[^/?#&]+
+ )'''
+ _TESTS = [{
+ 'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
+ 'info_dict': {
+ 'id': '22253814',
+ 'ext': 'mp4',
+ 'title': 'Winter Is Coming',
+ 'description': 'md5:a40e370925074260b1c8a633c632c63a',
+ 'timestamp': 1338306241,
+ 'upload_date': '20120529',
+ 'uploader': 'AENE-NEW',
+ 'duration': 2592.0,
+ 'thumbnail': r're:^https?://.*\.jpe?g$',
+ 'chapters': 'count:5',
+ 'tags': 'count:14',
+ 'categories': ['Mountain Men'],
+ 'episode_number': 1,
+ 'episode': 'Episode 1',
+ 'season': 'Season 1',
+ 'season_number': 1,
+ 'series': 'Mountain Men',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'add_ie': ['ThePlatform'],
+ 'skip': 'Geo-restricted - This content is not available in your location.'
+ }, {
+ 'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1',
+ 'info_dict': {
+ 'id': '600587331957',
+ 'ext': 'mp4',
+ 'title': 'Inlawful Entry',
+ 'description': 'md5:57c12115a2b384d883fe64ca50529e08',
+ 'timestamp': 1452634428,
+ 'upload_date': '20160112',
+ 'uploader': 'AENE-NEW',
+ 'duration': 1277.695,
+ 'thumbnail': r're:^https?://.*\.jpe?g$',
+ 'chapters': 'count:4',
+ 'tags': 'count:23',
+ 'episode': 'Episode 1',
+ 'episode_number': 1,
+ 'season': 'Season 9',
+ 'season_number': 9,
+ 'series': 'Duck Dynasty',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'add_ie': ['ThePlatform'],
+ 'skip': 'This video is only available for users of participating TV providers.',
+ }, {
+ 'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
+ 'only_matching': True
+ }, {
+ 'url': 'http://www.mylifetime.com/shows/project-runway-junior/season-1/episode-6',
+ 'only_matching': True
+ }, {
+ 'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie',
+ 'only_matching': True
+ }, {
+ 'url': 'https://watch.lifetimemovieclub.com/movies/10-year-reunion/full-movie',
+ 'only_matching': True
+ }, {
+ 'url': 'http://www.history.com/specials/sniper-into-the-kill-zone/full-special',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.aetv.com/specials/hunting-jonbenets-killer-the-untold-story/preview-hunting-jonbenets-killer-the-untold-story',
+ 'only_matching': True
+ }, {
+ 'url': 'http://www.history.com/videos/history-of-valentines-day',
+ 'only_matching': True
+ }, {
+ 'url': 'https://play.aetv.com/shows/duck-dynasty/videos/best-of-duck-dynasty-getting-quack-in-shape',
+ 'only_matching': True
+ }]
+
+ def _real_extract(self, url):
+ domain, canonical = self._match_valid_url(url).groups()
+ return self._extract_aetn_info(domain, 'canonical', '/' + canonical, url)
+
+
+class AENetworksListBaseIE(AENetworksBaseIE):
+ def _call_api(self, resource, slug, brand, fields):
+ return self._download_json(
+ 'https://yoga.appsvcs.aetnd.com/graphql',
+ slug, query={'brand': brand}, data=urlencode_postdata({
+ 'query': '''{
+ %s(slug: "%s") {
+ %s
+ }
+}''' % (resource, slug, fields),
+ }))['data'][resource]
+
+ def _real_extract(self, url):
+ domain, slug = self._match_valid_url(url).groups()
+ _, brand = self._DOMAIN_MAP[domain]
+ playlist = self._call_api(self._RESOURCE, slug, brand, self._FIELDS)
+ base_url = 'http://watch.%s' % domain
+
+ entries = []
+ for item in (playlist.get(self._ITEMS_KEY) or []):
+ doc = self._get_doc(item)
+ canonical = doc.get('canonical')
+ if not canonical:
+ continue
+ entries.append(self.url_result(
+ base_url + canonical, AENetworksIE.ie_key(), doc.get('id')))
+
+ description = None
+ if self._PLAYLIST_DESCRIPTION_KEY:
+ description = playlist.get(self._PLAYLIST_DESCRIPTION_KEY)
+
+ return self.playlist_result(
+ entries, playlist.get('id'),
+ playlist.get(self._PLAYLIST_TITLE_KEY), description)
+
+
+class AENetworksCollectionIE(AENetworksListBaseIE):
+ IE_NAME = 'aenetworks:collection'
+ _VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'(?:[^/]+/)*(?:list|collections)/(?P<id>[^/?#&]+)/?(?:[?#&]|$)'
+ _TESTS = [{
+ 'url': 'https://watch.historyvault.com/list/america-the-story-of-us',
+ 'info_dict': {
+ 'id': '282',
+ 'title': 'America The Story of Us',
+ },
+ 'playlist_mincount': 12,
+ }, {
+ 'url': 'https://watch.historyvault.com/shows/america-the-story-of-us-2/season-1/list/america-the-story-of-us',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.historyvault.com/collections/mysteryquest',
+ 'only_matching': True
+ }]
+ _RESOURCE = 'list'
+ _ITEMS_KEY = 'items'
+ _PLAYLIST_TITLE_KEY = 'display_title'
+ _PLAYLIST_DESCRIPTION_KEY = None
+ _FIELDS = '''id
+ display_title
+ items {
+ ... on ListVideoItem {
+ doc {
+ canonical
+ id
+ }
+ }
+ }'''
+
+ def _get_doc(self, item):
+ return item.get('doc') or {}
+
+
+class AENetworksShowIE(AENetworksListBaseIE):
+ IE_NAME = 'aenetworks:show'
+ _VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'shows/(?P<id>[^/?#&]+)/?(?:[?#&]|$)'
+ _TESTS = [{
+ 'url': 'http://www.history.com/shows/ancient-aliens',
+ 'info_dict': {
+ 'id': 'SERIES1574',
+ 'title': 'Ancient Aliens',
+ 'description': 'md5:3f6d74daf2672ff3ae29ed732e37ea7f',
+ },
+ 'playlist_mincount': 150,
+ }]
+ _RESOURCE = 'series'
+ _ITEMS_KEY = 'episodes'
+ _PLAYLIST_TITLE_KEY = 'title'
+ _PLAYLIST_DESCRIPTION_KEY = 'description'
+ _FIELDS = '''description
+ id
+ title
+ episodes {
+ canonical
+ id
+ }'''
+
+ def _get_doc(self, item):
+ return item
+
+
+class HistoryTopicIE(AENetworksBaseIE):
+ IE_NAME = 'history:topic'
+ IE_DESC = 'History.com Topic'
+ _VALID_URL = r'https?://(?:www\.)?history\.com/topics/[^/]+/(?P<id>[\w+-]+?)-video'
+ _TESTS = [{
+ 'url': 'https://www.history.com/topics/valentines-day/history-of-valentines-day-video',
+ 'info_dict': {
+ 'id': '40700995724',
+ 'ext': 'mp4',
+ 'title': "History of Valentine’s Day",
+ 'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
+ 'timestamp': 1375819729,
+ 'upload_date': '20130806',
+ 'uploader': 'AENE-NEW',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'add_ie': ['ThePlatform'],
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ return self.url_result(
+ 'http://www.history.com/videos/' + display_id,
+ AENetworksIE.ie_key())
+
+
+class HistoryPlayerIE(AENetworksBaseIE):
+ IE_NAME = 'history:player'
+ _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|biography)\.com)/player/(?P<id>\d+)'
+ _TESTS = []
+
+ def _real_extract(self, url):
+ domain, video_id = self._match_valid_url(url).groups()
+ return self._extract_aetn_info(domain, 'id', video_id, url)
+
+
+class BiographyIE(AENetworksBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?biography\.com/video/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.biography.com/video/vincent-van-gogh-full-episode-2075049808',
+ 'info_dict': {
+ 'id': '30322987',
+ 'ext': 'mp4',
+ 'title': 'Vincent Van Gogh - Full Episode',
+ 'description': 'A full biography about the most influential 20th century painter, Vincent Van Gogh.',
+ 'timestamp': 1311970571,
+ 'upload_date': '20110729',
+ 'uploader': 'AENE-NEW',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'add_ie': ['ThePlatform'],
+ 'skip': '404 Not Found',
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ player_url = self._search_regex(
+ r'<phoenix-iframe[^>]+src="(%s)' % HistoryPlayerIE._VALID_URL,
+ webpage, 'player URL')
+ return self.url_result(player_url, HistoryPlayerIE.ie_key())
diff --git a/yt_dlp/extractor/aeonco.py b/yt_dlp/extractor/aeonco.py
new file mode 100644
index 0000000..390eae3
--- /dev/null
+++ b/yt_dlp/extractor/aeonco.py
@@ -0,0 +1,74 @@
+from .common import InfoExtractor
+from .vimeo import VimeoIE
+from ..utils import ExtractorError, traverse_obj, url_or_none
+
+
+class AeonCoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?aeon\.co/videos/(?P<id>[^/?]+)'
+ _TESTS = [{
+ 'url': 'https://aeon.co/videos/raw-solar-storm-footage-is-the-punk-rock-antidote-to-sleek-james-webb-imagery',
+ 'md5': 'e5884d80552c9b6ea8d268a258753362',
+ 'info_dict': {
+ 'id': '1284717',
+ 'ext': 'mp4',
+ 'title': 'Brilliant Noise',
+ 'thumbnail': 'https://i.vimeocdn.com/video/21006315-1a1e49da8b07fd908384a982b4ba9ff0268c509a474576ebdf7b1392f4acae3b-d_960',
+ 'uploader': 'Semiconductor',
+ 'uploader_id': 'semiconductor',
+ 'uploader_url': 'https://vimeo.com/semiconductor',
+ 'duration': 348
+ }
+ }, {
+ 'url': 'https://aeon.co/videos/dazzling-timelapse-shows-how-microbes-spoil-our-food-and-sometimes-enrich-it',
+ 'md5': '03582d795382e49f2fd0b427b55de409',
+ 'info_dict': {
+ 'id': '759576926',
+ 'ext': 'mp4',
+ 'title': 'Wrought',
+ 'thumbnail': 'https://i.vimeocdn.com/video/1525599692-84614af88e446612f49ca966cf8f80eab2c73376bedd80555741c521c26f9a3e-d_1280',
+ 'uploader': 'Aeon Video',
+ 'uploader_id': 'aeonvideo',
+ 'uploader_url': 'https://vimeo.com/aeonvideo',
+ 'duration': 1344
+ }
+ }, {
+ 'url': 'https://aeon.co/videos/chew-over-the-prisoners-dilemma-and-see-if-you-can-find-the-rational-path-out',
+ 'md5': '1cfda0bf3ae24df17d00f2c0cb6cc21b',
+ 'info_dict': {
+ 'id': 'emyi4z-O0ls',
+ 'ext': 'mp4',
+ 'title': 'How to outsmart the Prisoner’s Dilemma - Lucas Husted',
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/emyi4z-O0ls/maxresdefault.webp',
+ 'uploader': 'TED-Ed',
+ 'uploader_id': '@TEDEd',
+ 'uploader_url': 'https://www.youtube.com/@TEDEd',
+ 'duration': 344,
+ 'upload_date': '20200827',
+ 'channel_id': 'UCsooa4yRKGN_zEE8iknghZA',
+ 'playable_in_embed': True,
+ 'description': 'md5:c0959524f08cb60f96fd010f3dfb17f3',
+ 'categories': ['Education'],
+ 'like_count': int,
+ 'channel': 'TED-Ed',
+ 'chapters': 'count:7',
+ 'channel_url': 'https://www.youtube.com/channel/UCsooa4yRKGN_zEE8iknghZA',
+ 'tags': 'count:26',
+ 'availability': 'public',
+ 'channel_follower_count': int,
+ 'view_count': int,
+ 'age_limit': 0,
+ 'live_status': 'not_live',
+ 'comment_count': int,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ embed_url = traverse_obj(self._yield_json_ld(webpage, video_id), (
+ lambda _, v: v['@type'] == 'VideoObject', 'embedUrl', {url_or_none}), get_all=False)
+ if not embed_url:
+ raise ExtractorError('No embed URL found in webpage')
+ if 'player.vimeo.com' in embed_url:
+ embed_url = VimeoIE._smuggle_referrer(embed_url, 'https://aeon.co/')
+ return self.url_result(embed_url)
diff --git a/yt_dlp/extractor/afreecatv.py b/yt_dlp/extractor/afreecatv.py
new file mode 100644
index 0000000..3d26d9c
--- /dev/null
+++ b/yt_dlp/extractor/afreecatv.py
@@ -0,0 +1,484 @@
+import functools
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ OnDemandPagedList,
+ date_from_str,
+ determine_ext,
+ int_or_none,
+ qualities,
+ traverse_obj,
+ unified_strdate,
+ unified_timestamp,
+ update_url_query,
+ url_or_none,
+ urlencode_postdata,
+ xpath_text,
+)
+
+
+class AfreecaTVIE(InfoExtractor):
+ IE_NAME = 'afreecatv'
+ IE_DESC = 'afreecatv.com'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:(?:live|afbbs|www)\.)?afreeca(?:tv)?\.com(?::\d+)?
+ (?:
+ /app/(?:index|read_ucc_bbs)\.cgi|
+ /player/[Pp]layer\.(?:swf|html)
+ )\?.*?\bnTitleNo=|
+ vod\.afreecatv\.com/(PLAYER/STATION|player)/
+ )
+ (?P<id>\d+)
+ '''
+ _NETRC_MACHINE = 'afreecatv'
+ _TESTS = [{
+ 'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=',
+ 'md5': 'f72c89fe7ecc14c1b5ce506c4996046e',
+ 'info_dict': {
+ 'id': '36164052',
+ 'ext': 'mp4',
+ 'title': '데일리 에이프릴 요정들의 시상식!',
+ 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
+ 'uploader': 'dailyapril',
+ 'uploader_id': 'dailyapril',
+ 'upload_date': '20160503',
+ },
+ 'skip': 'Video is gone',
+ }, {
+ 'url': 'http://afbbs.afreecatv.com:8080/app/read_ucc_bbs.cgi?nStationNo=16711924&nTitleNo=36153164&szBjId=dailyapril&nBbsNo=18605867',
+ 'info_dict': {
+ 'id': '36153164',
+ 'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
+ 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
+ 'uploader': 'dailyapril',
+ 'uploader_id': 'dailyapril',
+ },
+ 'playlist_count': 2,
+ 'playlist': [{
+ 'md5': 'd8b7c174568da61d774ef0203159bf97',
+ 'info_dict': {
+ 'id': '36153164_1',
+ 'ext': 'mp4',
+ 'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
+ 'upload_date': '20160502',
+ },
+ }, {
+ 'md5': '58f2ce7f6044e34439ab2d50612ab02b',
+ 'info_dict': {
+ 'id': '36153164_2',
+ 'ext': 'mp4',
+ 'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
+ 'upload_date': '20160502',
+ },
+ }],
+ 'skip': 'Video is gone',
+ }, {
+ # non standard key
+ 'url': 'http://vod.afreecatv.com/PLAYER/STATION/20515605',
+ 'info_dict': {
+ 'id': '20170411_BE689A0E_190960999_1_2_h',
+ 'ext': 'mp4',
+ 'title': '혼자사는여자집',
+ 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
+ 'uploader': '♥이슬이',
+ 'uploader_id': 'dasl8121',
+ 'upload_date': '20170411',
+ 'duration': 213,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # adult content
+ 'url': 'https://vod.afreecatv.com/player/97267690',
+ 'info_dict': {
+ 'id': '20180327_27901457_202289533_1',
+ 'ext': 'mp4',
+ 'title': '[생]빨개요♥ (part 1)',
+ 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
+ 'uploader': '[SA]서아',
+ 'uploader_id': 'bjdyrksu',
+ 'upload_date': '20180327',
+ 'duration': 3601,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'The VOD does not exist',
+ }, {
+ 'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://vod.afreecatv.com/player/96753363',
+ 'info_dict': {
+ 'id': '20230108_9FF5BEE1_244432674_1',
+ 'ext': 'mp4',
+ 'uploader_id': 'rlantnghks',
+ 'uploader': '페이즈으',
+ 'duration': 10840,
+ 'thumbnail': 'http://videoimg.afreecatv.com/php/SnapshotLoad.php?rowKey=20230108_9FF5BEE1_244432674_1_r',
+ 'upload_date': '20230108',
+ 'title': '젠지 페이즈',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ @staticmethod
+ def parse_video_key(key):
+ video_key = {}
+ m = re.match(r'^(?P<upload_date>\d{8})_\w+_(?P<part>\d+)$', key)
+ if m:
+ video_key['upload_date'] = m.group('upload_date')
+ video_key['part'] = int(m.group('part'))
+ return video_key
+
+ def _perform_login(self, username, password):
+ login_form = {
+ 'szWork': 'login',
+ 'szType': 'json',
+ 'szUid': username,
+ 'szPassword': password,
+ 'isSaveId': 'false',
+ 'szScriptVar': 'oLoginRet',
+ 'szAction': '',
+ }
+
+ response = self._download_json(
+ 'https://login.afreecatv.com/app/LoginAction.php', None,
+ 'Logging in', data=urlencode_postdata(login_form))
+
+ _ERRORS = {
+ -4: 'Your account has been suspended due to a violation of our terms and policies.',
+ -5: 'https://member.afreecatv.com/app/user_delete_progress.php',
+ -6: 'https://login.afreecatv.com/membership/changeMember.php',
+ -8: "Hello! AfreecaTV here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.",
+ -9: 'https://member.afreecatv.com/app/pop_login_block.php',
+ -11: 'https://login.afreecatv.com/afreeca/second_login.php',
+ -12: 'https://member.afreecatv.com/app/user_security.php',
+ 0: 'The username does not exist or you have entered the wrong password.',
+ -1: 'The username does not exist or you have entered the wrong password.',
+ -3: 'You have entered your username/password incorrectly.',
+ -7: 'You cannot use your Global AfreecaTV account to access Korean AfreecaTV.',
+ -10: 'Sorry for the inconvenience. \nYour account has been blocked due to an unauthorized access. \nPlease contact our Help Center for assistance.',
+ -32008: 'You have failed to log in. Please contact our Help Center.',
+ }
+
+ result = int_or_none(response.get('RESULT'))
+ if result != 1:
+ error = _ERRORS.get(result, 'You have failed to log in.')
+ raise ExtractorError(
+ 'Unable to login: %s said: %s' % (self.IE_NAME, error),
+ expected=True)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ partial_view = False
+ adult_view = False
+ for _ in range(2):
+ data = self._download_json(
+ 'https://api.m.afreecatv.com/station/video/a/view',
+ video_id, headers={'Referer': url}, data=urlencode_postdata({
+ 'nTitleNo': video_id,
+ 'nApiLevel': 10,
+ }))['data']
+ if traverse_obj(data, ('code', {int})) == -6221:
+ raise ExtractorError('The VOD does not exist', expected=True)
+ query = {
+ 'nTitleNo': video_id,
+ 'nStationNo': data['station_no'],
+ 'nBbsNo': data['bbs_no'],
+ }
+ if partial_view:
+ query['partialView'] = 'SKIP_ADULT'
+ if adult_view:
+ query['adultView'] = 'ADULT_VIEW'
+ video_xml = self._download_xml(
+ 'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php',
+ video_id, 'Downloading video info XML%s'
+ % (' (skipping adult)' if partial_view else ''),
+ video_id, headers={
+ 'Referer': url,
+ }, query=query)
+
+ flag = xpath_text(video_xml, './track/flag', 'flag', default=None)
+ if flag and flag == 'SUCCEED':
+ break
+ if flag == 'PARTIAL_ADULT':
+ self.report_warning(
+ 'In accordance with local laws and regulations, underage users are restricted from watching adult content. '
+ 'Only content suitable for all ages will be downloaded. '
+ 'Provide account credentials if you wish to download restricted content.')
+ partial_view = True
+ continue
+ elif flag == 'ADULT':
+ if not adult_view:
+ adult_view = True
+ continue
+ error = 'Only users older than 19 are able to watch this video. Provide account credentials to download this content.'
+ else:
+ error = flag
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, error), expected=True)
+ else:
+ raise ExtractorError('Unable to download video info')
+
+ video_element = video_xml.findall('./track/video')[-1]
+ if video_element is None or video_element.text is None:
+ raise ExtractorError(
+ 'Video %s does not exist' % video_id, expected=True)
+
+ video_url = video_element.text.strip()
+
+ title = xpath_text(video_xml, './track/title', 'title', fatal=True)
+
+ uploader = xpath_text(video_xml, './track/nickname', 'uploader')
+ uploader_id = xpath_text(video_xml, './track/bj_id', 'uploader id')
+ duration = int_or_none(xpath_text(
+ video_xml, './track/duration', 'duration'))
+ thumbnail = xpath_text(video_xml, './track/titleImage', 'thumbnail')
+
+ common_entry = {
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'thumbnail': thumbnail,
+ }
+
+ info = common_entry.copy()
+ info.update({
+ 'id': video_id,
+ 'title': title,
+ 'duration': duration,
+ })
+
+ if not video_url:
+ entries = []
+ file_elements = video_element.findall('./file')
+ one = len(file_elements) == 1
+ for file_num, file_element in enumerate(file_elements, start=1):
+ file_url = url_or_none(file_element.text)
+ if not file_url:
+ continue
+ key = file_element.get('key', '')
+ upload_date = unified_strdate(self._search_regex(
+ r'^(\d{8})_', key, 'upload date', default=None))
+ if upload_date is not None:
+ # sometimes the upload date isn't included in the file name
+ # instead, another random ID is, which may parse as a valid
+ # date but be wildly out of a reasonable range
+ parsed_date = date_from_str(upload_date)
+ if parsed_date.year < 2000 or parsed_date.year >= 2100:
+ upload_date = None
+ file_duration = int_or_none(file_element.get('duration'))
+ format_id = key if key else '%s_%s' % (video_id, file_num)
+ if determine_ext(file_url) == 'm3u8':
+ formats = self._extract_m3u8_formats(
+ file_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls',
+ note='Downloading part %d m3u8 information' % file_num)
+ else:
+ formats = [{
+ 'url': file_url,
+ 'format_id': 'http',
+ }]
+ if not formats and not self.get_param('ignore_no_formats'):
+ continue
+ file_info = common_entry.copy()
+ file_info.update({
+ 'id': format_id,
+ 'title': title if one else '%s (part %d)' % (title, file_num),
+ 'upload_date': upload_date,
+ 'duration': file_duration,
+ 'formats': formats,
+ })
+ entries.append(file_info)
+ entries_info = info.copy()
+ entries_info.update({
+ '_type': 'multi_video',
+ 'entries': entries,
+ })
+ return entries_info
+
+ info = {
+ 'id': video_id,
+ 'title': title,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'duration': duration,
+ 'thumbnail': thumbnail,
+ }
+
+ if determine_ext(video_url) == 'm3u8':
+ info['formats'] = self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls')
+ else:
+ app, playpath = video_url.split('mp4:')
+ info.update({
+ 'url': app,
+ 'ext': 'flv',
+ 'play_path': 'mp4:' + playpath,
+ 'rtmp_live': True, # downloading won't end without this
+ })
+
+ return info
+
+
+class AfreecaTVLiveIE(AfreecaTVIE): # XXX: Do not subclass from concrete IE
+
+ IE_NAME = 'afreecatv:live'
+ _VALID_URL = r'https?://play\.afreeca(?:tv)?\.com/(?P<id>[^/]+)(?:/(?P<bno>\d+))?'
+ _TESTS = [{
+ 'url': 'https://play.afreecatv.com/pyh3646/237852185',
+ 'info_dict': {
+ 'id': '237852185',
+ 'ext': 'mp4',
+ 'title': '【 우루과이 오늘은 무슨일이? 】',
+ 'uploader': '박진우[JINU]',
+ 'uploader_id': 'pyh3646',
+ 'timestamp': 1640661495,
+ 'is_live': True,
+ },
+ 'skip': 'Livestream has ended',
+ }, {
+ 'url': 'http://play.afreeca.com/pyh3646/237852185',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://play.afreeca.com/pyh3646',
+ 'only_matching': True,
+ }]
+
+ _LIVE_API_URL = 'https://live.afreecatv.com/afreeca/player_live_api.php'
+
+ _QUALITIES = ('sd', 'hd', 'hd2k', 'original')
+
+ def _real_extract(self, url):
+ broadcaster_id, broadcast_no = self._match_valid_url(url).group('id', 'bno')
+ password = self.get_param('videopassword')
+
+ info = self._download_json(self._LIVE_API_URL, broadcaster_id, fatal=False,
+ data=urlencode_postdata({'bid': broadcaster_id})) or {}
+ channel_info = info.get('CHANNEL') or {}
+ broadcaster_id = channel_info.get('BJID') or broadcaster_id
+ broadcast_no = channel_info.get('BNO') or broadcast_no
+ password_protected = channel_info.get('BPWD')
+ if not broadcast_no:
+ raise ExtractorError(f'Unable to extract broadcast number ({broadcaster_id} may not be live)', expected=True)
+ if password_protected == 'Y' and password is None:
+ raise ExtractorError(
+ 'This livestream is protected by a password, use the --video-password option',
+ expected=True)
+
+ formats = []
+ quality_key = qualities(self._QUALITIES)
+ for quality_str in self._QUALITIES:
+ params = {
+ 'bno': broadcast_no,
+ 'stream_type': 'common',
+ 'type': 'aid',
+ 'quality': quality_str,
+ }
+ if password is not None:
+ params['pwd'] = password
+ aid_response = self._download_json(
+ self._LIVE_API_URL, broadcast_no, fatal=False,
+ data=urlencode_postdata(params),
+ note=f'Downloading access token for {quality_str} stream',
+ errnote=f'Unable to download access token for {quality_str} stream')
+ aid = traverse_obj(aid_response, ('CHANNEL', 'AID'))
+ if not aid:
+ continue
+
+ stream_base_url = channel_info.get('RMD') or 'https://livestream-manager.afreecatv.com'
+ stream_info = self._download_json(
+ f'{stream_base_url}/broad_stream_assign.html', broadcast_no, fatal=False,
+ query={
+ 'return_type': channel_info.get('CDN', 'gcp_cdn'),
+ 'broad_key': f'{broadcast_no}-common-{quality_str}-hls',
+ },
+ note=f'Downloading metadata for {quality_str} stream',
+ errnote=f'Unable to download metadata for {quality_str} stream') or {}
+
+ if stream_info.get('view_url'):
+ formats.append({
+ 'format_id': quality_str,
+ 'url': update_url_query(stream_info['view_url'], {'aid': aid}),
+ 'ext': 'mp4',
+ 'protocol': 'm3u8',
+ 'quality': quality_key(quality_str),
+ })
+
+ station_info = self._download_json(
+ 'https://st.afreecatv.com/api/get_station_status.php', broadcast_no,
+ query={'szBjId': broadcaster_id}, fatal=False,
+ note='Downloading channel metadata', errnote='Unable to download channel metadata') or {}
+
+ return {
+ 'id': broadcast_no,
+ 'title': channel_info.get('TITLE') or station_info.get('station_title'),
+ 'uploader': channel_info.get('BJNICK') or station_info.get('station_name'),
+ 'uploader_id': broadcaster_id,
+ 'timestamp': unified_timestamp(station_info.get('broad_start')),
+ 'formats': formats,
+ 'is_live': True,
+ }
+
+
+class AfreecaTVUserIE(InfoExtractor):
+ IE_NAME = 'afreecatv:user'
+ _VALID_URL = r'https?://bj\.afreeca(?:tv)?\.com/(?P<id>[^/]+)/vods/?(?P<slug_type>[^/]+)?'
+ _TESTS = [{
+ 'url': 'https://bj.afreecatv.com/ryuryu24/vods/review',
+ 'info_dict': {
+ '_type': 'playlist',
+ 'id': 'ryuryu24',
+ 'title': 'ryuryu24 - review',
+ },
+ 'playlist_count': 218,
+ }, {
+ 'url': 'https://bj.afreecatv.com/parang1995/vods/highlight',
+ 'info_dict': {
+ '_type': 'playlist',
+ 'id': 'parang1995',
+ 'title': 'parang1995 - highlight',
+ },
+ 'playlist_count': 997,
+ }, {
+ 'url': 'https://bj.afreecatv.com/ryuryu24/vods',
+ 'info_dict': {
+ '_type': 'playlist',
+ 'id': 'ryuryu24',
+ 'title': 'ryuryu24 - all',
+ },
+ 'playlist_count': 221,
+ }, {
+ 'url': 'https://bj.afreecatv.com/ryuryu24/vods/balloonclip',
+ 'info_dict': {
+ '_type': 'playlist',
+ 'id': 'ryuryu24',
+ 'title': 'ryuryu24 - balloonclip',
+ },
+ 'playlist_count': 0,
+ }]
+ _PER_PAGE = 60
+
+ def _fetch_page(self, user_id, user_type, page):
+ page += 1
+ info = self._download_json(f'https://bjapi.afreecatv.com/api/{user_id}/vods/{user_type}', user_id,
+ query={'page': page, 'per_page': self._PER_PAGE, 'orderby': 'reg_date'},
+ note=f'Downloading {user_type} video page {page}')
+ for item in info['data']:
+ yield self.url_result(
+ f'https://vod.afreecatv.com/player/{item["title_no"]}/', AfreecaTVIE, item['title_no'])
+
+ def _real_extract(self, url):
+ user_id, user_type = self._match_valid_url(url).group('id', 'slug_type')
+ user_type = user_type or 'all'
+ entries = OnDemandPagedList(functools.partial(self._fetch_page, user_id, user_type), self._PER_PAGE)
+ return self.playlist_result(entries, user_id, f'{user_id} - {user_type}')
diff --git a/yt_dlp/extractor/agora.py b/yt_dlp/extractor/agora.py
new file mode 100644
index 0000000..abb2d3f
--- /dev/null
+++ b/yt_dlp/extractor/agora.py
@@ -0,0 +1,251 @@
+import functools
+import uuid
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ OnDemandPagedList,
+ int_or_none,
+ month_by_name,
+ parse_duration,
+ try_call,
+)
+
+
+class WyborczaVideoIE(InfoExtractor):
+ # this id is not an article id, it has to be extracted from the article
+ _VALID_URL = r'(?:wyborcza:video:|https?://wyborcza\.pl/(?:api-)?video/)(?P<id>\d+)'
+ IE_NAME = 'wyborcza:video'
+ _TESTS = [{
+ 'url': 'wyborcza:video:26207634',
+ 'info_dict': {
+ 'id': '26207634',
+ 'ext': 'mp4',
+ 'title': '- Polska w 2020 r. jest innym państwem niż w 2015 r. Nie zmieniła się konstytucja, ale jest to już inny ustrój - mówi Adam Bodnar',
+ 'description': ' ',
+ 'uploader': 'Dorota Roman',
+ 'duration': 2474,
+ 'thumbnail': r're:https://.+\.jpg',
+ },
+ }, {
+ 'url': 'https://wyborcza.pl/video/26207634',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://wyborcza.pl/api-video/26207634',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ meta = self._download_json(f'https://wyborcza.pl/api-video/{video_id}', video_id)
+
+ formats = []
+ base_url = meta['redirector'].replace('http://', 'https://') + meta['basePath']
+ for quality in ('standard', 'high'):
+ if not meta['files'].get(quality):
+ continue
+ formats.append({
+ 'url': base_url + meta['files'][quality],
+ 'height': int_or_none(
+ self._search_regex(
+ r'p(\d+)[a-z]+\.mp4$', meta['files'][quality],
+ 'mp4 video height', default=None)),
+ 'format_id': quality,
+ })
+ if meta['files'].get('dash'):
+ formats.extend(self._extract_mpd_formats(base_url + meta['files']['dash'], video_id))
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': meta.get('title'),
+ 'description': meta.get('lead'),
+ 'uploader': meta.get('signature'),
+ 'thumbnail': meta.get('imageUrl'),
+ 'duration': meta.get('duration'),
+ }
+
+
+class WyborczaPodcastIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://(?:www\.)?(?:
+ wyborcza\.pl/podcast(?:/0,172673\.html)?|
+ wysokieobcasy\.pl/wysokie-obcasy/0,176631\.html
+ )(?:\?(?:[^&#]+?&)*podcast=(?P<id>\d+))?
+ '''
+ _TESTS = [{
+ 'url': 'https://wyborcza.pl/podcast/0,172673.html?podcast=100720#S.main_topic-K.C-B.6-L.1.podcast',
+ 'info_dict': {
+ 'id': '100720',
+ 'ext': 'mp3',
+ 'title': 'Cyfrodziewczyny. Kim były pionierki polskiej informatyki ',
+ 'uploader': 'Michał Nogaś ',
+ 'upload_date': '20210117',
+ 'description': 'md5:49f0a06ffc4c1931210d3ab1416a651d',
+ 'duration': 3684.0,
+ 'thumbnail': r're:https://.+\.jpg',
+ },
+ }, {
+ 'url': 'https://www.wysokieobcasy.pl/wysokie-obcasy/0,176631.html?podcast=100673',
+ 'info_dict': {
+ 'id': '100673',
+ 'ext': 'mp3',
+ 'title': 'Czym jest ubóstwo menstruacyjne i dlaczego dotyczy każdej i każdego z nas?',
+ 'uploader': 'Agnieszka Urazińska ',
+ 'upload_date': '20210115',
+ 'description': 'md5:c161dc035f8dbb60077011fc41274899',
+ 'duration': 1803.0,
+ 'thumbnail': r're:https://.+\.jpg',
+ },
+ }, {
+ 'url': 'https://wyborcza.pl/podcast',
+ 'info_dict': {
+ 'id': '334',
+ 'title': 'Gościnnie: Wyborcza, 8:10',
+ 'series': 'Gościnnie: Wyborcza, 8:10',
+ },
+ 'playlist_mincount': 370,
+ }, {
+ 'url': 'https://www.wysokieobcasy.pl/wysokie-obcasy/0,176631.html',
+ 'info_dict': {
+ 'id': '395',
+ 'title': 'Gościnnie: Wysokie Obcasy',
+ 'series': 'Gościnnie: Wysokie Obcasy',
+ },
+ 'playlist_mincount': 12,
+ }]
+
+ def _real_extract(self, url):
+ podcast_id = self._match_id(url)
+
+ if not podcast_id: # playlist
+ podcast_id = '395' if 'wysokieobcasy.pl/' in url else '334'
+ return self.url_result(TokFMAuditionIE._create_url(podcast_id), TokFMAuditionIE, podcast_id)
+
+ meta = self._download_json('https://wyborcza.pl/api/podcast', podcast_id,
+ query={'guid': podcast_id, 'type': 'wo' if 'wysokieobcasy.pl/' in url else None})
+
+ day, month, year = self._search_regex(r'^(\d\d?) (\w+) (\d{4})$', meta.get('publishedDate'),
+ 'upload date', group=(1, 2, 3), default=(None, None, None))
+ return {
+ 'id': podcast_id,
+ 'url': meta['url'],
+ 'title': meta.get('title'),
+ 'description': meta.get('description'),
+ 'thumbnail': meta.get('imageUrl'),
+ 'duration': parse_duration(meta.get('duration')),
+ 'uploader': meta.get('author'),
+ 'upload_date': try_call(lambda: f'{year}{month_by_name(month, lang="pl"):0>2}{day:0>2}'),
+ }
+
+
+class TokFMPodcastIE(InfoExtractor):
+ _VALID_URL = r'(?:https?://audycje\.tokfm\.pl/podcast/|tokfm:podcast:)(?P<id>\d+),?'
+ IE_NAME = 'tokfm:podcast'
+ _TESTS = [{
+ 'url': 'https://audycje.tokfm.pl/podcast/91275,-Systemowy-rasizm-Czy-zamieszki-w-USA-po-morderstwie-w-Minneapolis-doprowadza-do-zmian-w-sluzbach-panstwowych',
+ 'info_dict': {
+ 'id': '91275',
+ 'ext': 'aac',
+ 'title': 'md5:a9b15488009065556900169fb8061cce',
+ 'episode': 'md5:a9b15488009065556900169fb8061cce',
+ 'series': 'Analizy',
+ },
+ }]
+
+ def _real_extract(self, url):
+ media_id = self._match_id(url)
+
+ # in case it breaks see this but it returns a lot of useless data
+ # https://api.podcast.radioagora.pl/api4/getPodcasts?podcast_id=100091&with_guests=true&with_leaders_for_mobile=true
+ metadata = self._download_json(
+ f'https://audycje.tokfm.pl/getp/3{media_id}', media_id, 'Downloading podcast metadata')
+ if not metadata:
+ raise ExtractorError('No such podcast', expected=True)
+ metadata = metadata[0]
+
+ formats = []
+ for ext in ('aac', 'mp3'):
+ url_data = self._download_json(
+ f'https://api.podcast.radioagora.pl/api4/getSongUrl?podcast_id={media_id}&device_id={uuid.uuid4()}&ppre=false&audio={ext}',
+ media_id, 'Downloading podcast %s URL' % ext)
+ # prevents inserting the mp3 (default) multiple times
+ if 'link_ssl' in url_data and f'.{ext}' in url_data['link_ssl']:
+ formats.append({
+ 'url': url_data['link_ssl'],
+ 'ext': ext,
+ 'vcodec': 'none',
+ 'acodec': ext,
+ })
+
+ return {
+ 'id': media_id,
+ 'formats': formats,
+ 'title': metadata.get('podcast_name'),
+ 'series': metadata.get('series_name'),
+ 'episode': metadata.get('podcast_name'),
+ }
+
+
+class TokFMAuditionIE(InfoExtractor):
+ _VALID_URL = r'(?:https?://audycje\.tokfm\.pl/audycja/|tokfm:audition:)(?P<id>\d+),?'
+ IE_NAME = 'tokfm:audition'
+ _TESTS = [{
+ 'url': 'https://audycje.tokfm.pl/audycja/218,Analizy',
+ 'info_dict': {
+ 'id': '218',
+ 'title': 'Analizy',
+ 'series': 'Analizy',
+ },
+ 'playlist_count': 1635,
+ }]
+
+ _PAGE_SIZE = 30
+ _HEADERS = {
+ 'User-Agent': 'Mozilla/5.0 (Linux; Android 9; Redmi 3S Build/PQ3A.190801.002; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/87.0.4280.101 Mobile Safari/537.36',
+ }
+
+ @staticmethod
+ def _create_url(id):
+ return f'https://audycje.tokfm.pl/audycja/{id}'
+
+ def _real_extract(self, url):
+ audition_id = self._match_id(url)
+
+ data = self._download_json(
+ f'https://api.podcast.radioagora.pl/api4/getSeries?series_id={audition_id}',
+ audition_id, 'Downloading audition metadata', headers=self._HEADERS)
+ if not data:
+ raise ExtractorError('No such audition', expected=True)
+ data = data[0]
+
+ entries = OnDemandPagedList(functools.partial(
+ self._fetch_page, audition_id, data), self._PAGE_SIZE)
+
+ return {
+ '_type': 'playlist',
+ 'id': audition_id,
+ 'title': data.get('series_name'),
+ 'series': data.get('series_name'),
+ 'entries': entries,
+ }
+
+ def _fetch_page(self, audition_id, data, page):
+ for retry in self.RetryManager():
+ podcast_page = self._download_json(
+ f'https://api.podcast.radioagora.pl/api4/getPodcasts?series_id={audition_id}&limit=30&offset={page}&with_guests=true&with_leaders_for_mobile=true',
+ audition_id, f'Downloading podcast list page {page + 1}', headers=self._HEADERS)
+ if not podcast_page:
+ retry.error = ExtractorError('Agora returned empty page', expected=True)
+
+ for podcast in podcast_page:
+ yield {
+ '_type': 'url_transparent',
+ 'url': podcast['podcast_sharing_url'],
+ 'ie_key': TokFMPodcastIE.ie_key(),
+ 'title': podcast.get('podcast_name'),
+ 'episode': podcast.get('podcast_name'),
+ 'description': podcast.get('podcast_description'),
+ 'timestamp': int_or_none(podcast.get('podcast_timestamp')),
+ 'series': data.get('series_name'),
+ }
diff --git a/yt_dlp/extractor/airtv.py b/yt_dlp/extractor/airtv.py
new file mode 100644
index 0000000..0b73a96
--- /dev/null
+++ b/yt_dlp/extractor/airtv.py
@@ -0,0 +1,96 @@
+from .common import InfoExtractor
+from .youtube import YoutubeIE
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ mimetype2ext,
+ parse_iso8601,
+ traverse_obj
+)
+
+
+class AirTVIE(InfoExtractor):
+ _VALID_URL = r'https?://www\.air\.tv/watch\?v=(?P<id>\w+)'
+ _TESTS = [{
+ # without youtube_id
+ 'url': 'https://www.air.tv/watch?v=W87jcWleSn2hXZN47zJZsQ',
+ 'info_dict': {
+ 'id': 'W87jcWleSn2hXZN47zJZsQ',
+ 'ext': 'mp4',
+ 'release_date': '20221003',
+ 'release_timestamp': 1664792603,
+ 'channel_id': 'vgfManQlRQKgoFQ8i8peFQ',
+ 'title': 'md5:c12d49ed367c3dadaa67659aff43494c',
+ 'upload_date': '20221003',
+ 'duration': 151,
+ 'view_count': int,
+ 'thumbnail': 'https://cdn-sp-gcs.air.tv/videos/W/8/W87jcWleSn2hXZN47zJZsQ/b13fc56464f47d9d62a36d110b9b5a72-4096x2160_9.jpg',
+ 'timestamp': 1664792603,
+ }
+ }, {
+ # with youtube_id
+ 'url': 'https://www.air.tv/watch?v=sv57EC8tRXG6h8dNXFUU1Q',
+ 'info_dict': {
+ 'id': '2ZTqmpee-bQ',
+ 'ext': 'mp4',
+ 'comment_count': int,
+ 'tags': 'count:11',
+ 'channel_follower_count': int,
+ 'like_count': int,
+ 'uploader': 'Newsflare',
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/2ZTqmpee-bQ/maxresdefault.webp',
+ 'availability': 'public',
+ 'title': 'Geese Chase Alligator Across Golf Course',
+ 'uploader_id': 'NewsflareBreaking',
+ 'channel_url': 'https://www.youtube.com/channel/UCzSSoloGEz10HALUAbYhngQ',
+ 'description': 'md5:99b21d9cea59330149efbd9706e208f5',
+ 'age_limit': 0,
+ 'channel_id': 'UCzSSoloGEz10HALUAbYhngQ',
+ 'uploader_url': 'http://www.youtube.com/user/NewsflareBreaking',
+ 'view_count': int,
+ 'categories': ['News & Politics'],
+ 'live_status': 'not_live',
+ 'playable_in_embed': True,
+ 'channel': 'Newsflare',
+ 'duration': 37,
+ 'upload_date': '20180511',
+ }
+ }]
+
+ def _get_formats_and_subtitle(self, json_data, video_id):
+ formats, subtitles = [], {}
+ for source in traverse_obj(json_data, 'sources', 'sources_desktop', ...):
+ ext = determine_ext(source.get('src'), mimetype2ext(source.get('type')))
+ if ext == 'm3u8':
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(source.get('src'), video_id)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+ else:
+ formats.append({'url': source.get('src'), 'ext': ext})
+ return formats, subtitles
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ nextjs_json = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['initialState']['videos'][display_id]
+ if nextjs_json.get('youtube_id'):
+ return self.url_result(
+ f'https://www.youtube.com/watch?v={nextjs_json.get("youtube_id")}', YoutubeIE)
+
+ formats, subtitles = self._get_formats_and_subtitle(nextjs_json, display_id)
+ return {
+ 'id': display_id,
+ 'title': nextjs_json.get('title') or self._html_search_meta('og:title', webpage),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'description': nextjs_json.get('description') or None,
+ 'duration': int_or_none(nextjs_json.get('duration')),
+ 'thumbnails': [
+ {'url': thumbnail}
+ for thumbnail in traverse_obj(nextjs_json, ('default_thumbnails', ...))],
+ 'channel_id': traverse_obj(nextjs_json, 'channel', 'channel_slug'),
+ 'timestamp': parse_iso8601(nextjs_json.get('created')),
+ 'release_timestamp': parse_iso8601(nextjs_json.get('published')),
+ 'view_count': int_or_none(nextjs_json.get('views')),
+ }
diff --git a/yt_dlp/extractor/aitube.py b/yt_dlp/extractor/aitube.py
new file mode 100644
index 0000000..89a6450
--- /dev/null
+++ b/yt_dlp/extractor/aitube.py
@@ -0,0 +1,60 @@
+from .common import InfoExtractor
+from ..utils import int_or_none, merge_dicts
+
+
+class AitubeKZVideoIE(InfoExtractor):
+ _VALID_URL = r'https?://aitube\.kz/(?:video|embed/)\?(?:[^\?]+)?id=(?P<id>[\w-]+)'
+ _TESTS = [{
+ # id paramater as first parameter
+ 'url': 'https://aitube.kz/video?id=9291d29b-c038-49a1-ad42-3da2051d353c&playlistId=d55b1f5f-ef2a-4f23-b646-2a86275b86b7&season=1',
+ 'info_dict': {
+ 'id': '9291d29b-c038-49a1-ad42-3da2051d353c',
+ 'ext': 'mp4',
+ 'duration': 2174.0,
+ 'channel_id': '94962f73-013b-432c-8853-1bd78ca860fe',
+ 'like_count': int,
+ 'channel': 'ASTANA TV',
+ 'comment_count': int,
+ 'view_count': int,
+ 'description': 'Смотреть любимые сериалы и видео, поделиться видео и сериалами с друзьями и близкими',
+ 'thumbnail': 'https://cdn.static02.aitube.kz/kz.aitudala.aitube.staticaccess/files/ddf2a2ff-bee3-409b-b5f2-2a8202bba75b',
+ 'upload_date': '20221102',
+ 'timestamp': 1667370519,
+ 'title': 'Ангел хранитель 1 серия',
+ 'channel_follower_count': int,
+ }
+ }, {
+ # embed url
+ 'url': 'https://aitube.kz/embed/?id=9291d29b-c038-49a1-ad42-3da2051d353c',
+ 'only_matching': True,
+ }, {
+ # id parameter is not as first paramater
+ 'url': 'https://aitube.kz/video?season=1&id=9291d29b-c038-49a1-ad42-3da2051d353c&playlistId=d55b1f5f-ef2a-4f23-b646-2a86275b86b7',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ nextjs_data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['videoInfo']
+ json_ld_data = self._search_json_ld(webpage, video_id)
+
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ f'https://api-http.aitube.kz/kz.aitudala.aitube.staticaccess/video/{video_id}/video', video_id)
+
+ return merge_dicts({
+ 'id': video_id,
+ 'title': nextjs_data.get('title') or self._html_search_meta(['name', 'og:title'], webpage),
+ 'description': nextjs_data.get('description'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'view_count': (nextjs_data.get('viewCount')
+ or int_or_none(self._html_search_meta('ya:ovs:views_total', webpage))),
+ 'like_count': nextjs_data.get('likeCount'),
+ 'channel': nextjs_data.get('channelTitle'),
+ 'channel_id': nextjs_data.get('channelId'),
+ 'thumbnail': nextjs_data.get('coverUrl'),
+ 'comment_count': nextjs_data.get('commentCount'),
+ 'channel_follower_count': int_or_none(nextjs_data.get('channelSubscriberCount')),
+ }, json_ld_data)
diff --git a/yt_dlp/extractor/aliexpress.py b/yt_dlp/extractor/aliexpress.py
new file mode 100644
index 0000000..2e83f2e
--- /dev/null
+++ b/yt_dlp/extractor/aliexpress.py
@@ -0,0 +1,50 @@
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ float_or_none,
+ try_get,
+)
+
+
+class AliExpressLiveIE(InfoExtractor):
+ _VALID_URL = r'https?://live\.aliexpress\.com/live/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://live.aliexpress.com/live/2800002704436634',
+ 'md5': 'e729e25d47c5e557f2630eaf99b740a5',
+ 'info_dict': {
+ 'id': '2800002704436634',
+ 'ext': 'mp4',
+ 'title': 'CASIMA7.22',
+ 'thumbnail': r're:https?://.*\.jpg',
+ 'uploader': 'CASIMA Official Store',
+ 'timestamp': 1500717600,
+ 'upload_date': '20170722',
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ data = self._parse_json(
+ self._search_regex(
+ r'(?s)runParams\s*=\s*({.+?})\s*;?\s*var',
+ webpage, 'runParams'),
+ video_id)
+
+ title = data['title']
+
+ formats = self._extract_m3u8_formats(
+ data['replyStreamUrl'], video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': data.get('coverUrl'),
+ 'uploader': try_get(
+ data, lambda x: x['followBar']['name'], compat_str),
+ 'timestamp': float_or_none(data.get('startTimeLong'), scale=1000),
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/aljazeera.py b/yt_dlp/extractor/aljazeera.py
new file mode 100644
index 0000000..124bab0
--- /dev/null
+++ b/yt_dlp/extractor/aljazeera.py
@@ -0,0 +1,83 @@
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ try_get,
+)
+
+
+class AlJazeeraIE(InfoExtractor):
+ _VALID_URL = r'https?://(?P<base>\w+\.aljazeera\.\w+)/(?P<type>programs?/[^/]+|(?:feature|video|new)s)?/\d{4}/\d{1,2}/\d{1,2}/(?P<id>[^/?&#]+)'
+
+ _TESTS = [{
+ 'url': 'https://balkans.aljazeera.net/videos/2021/11/6/pojedini-domovi-u-sarajevu-jos-pod-vodom-mjestanima-se-dostavlja-hrana',
+ 'info_dict': {
+ 'id': '6280641530001',
+ 'ext': 'mp4',
+ 'title': 'Pojedini domovi u Sarajevu još pod vodom, mještanima se dostavlja hrana',
+ 'timestamp': 1636219149,
+ 'description': 'U sarajevskim naseljima Rajlovac i Reljevo stambeni objekti, ali i industrijska postrojenja i dalje su pod vodom.',
+ 'upload_date': '20211106',
+ }
+ }, {
+ 'url': 'https://balkans.aljazeera.net/videos/2021/11/6/djokovic-usao-u-finale-mastersa-u-parizu',
+ 'info_dict': {
+ 'id': '6280654936001',
+ 'ext': 'mp4',
+ 'title': 'Đoković ušao u finale Mastersa u Parizu',
+ 'timestamp': 1636221686,
+ 'description': 'Novak Đoković je u polufinalu Mastersa u Parizu nakon preokreta pobijedio Poljaka Huberta Hurkacza.',
+ 'upload_date': '20211106',
+ },
+ }]
+ BRIGHTCOVE_URL_RE = r'https?://players.brightcove.net/(?P<account>\d+)/(?P<player_id>[a-zA-Z0-9]+)_(?P<embed>[^/]+)/index.html\?videoId=(?P<id>\d+)'
+
+ def _real_extract(self, url):
+ base, post_type, id = self._match_valid_url(url).groups()
+ wp = {
+ 'balkans.aljazeera.net': 'ajb',
+ 'chinese.aljazeera.net': 'chinese',
+ 'mubasher.aljazeera.net': 'ajm',
+ }.get(base) or 'aje'
+ post_type = {
+ 'features': 'post',
+ 'program': 'episode',
+ 'programs': 'episode',
+ 'videos': 'video',
+ 'news': 'news',
+ }[post_type.split('/')[0]]
+ video = self._download_json(
+ f'https://{base}/graphql', id, query={
+ 'wp-site': wp,
+ 'operationName': 'ArchipelagoSingleArticleQuery',
+ 'variables': json.dumps({
+ 'name': id,
+ 'postType': post_type,
+ }),
+ }, headers={
+ 'wp-site': wp,
+ })
+ video = try_get(video, lambda x: x['data']['article']['video']) or {}
+ video_id = video.get('id')
+ account = video.get('accountId') or '911432371001'
+ player_id = video.get('playerId') or 'csvTfAlKW'
+ embed = 'default'
+
+ if video_id is None:
+ webpage = self._download_webpage(url, id)
+
+ account, player_id, embed, video_id = self._search_regex(self.BRIGHTCOVE_URL_RE, webpage, 'video id',
+ group=(1, 2, 3, 4), default=(None, None, None, None))
+
+ if video_id is None:
+ return {
+ '_type': 'url_transparent',
+ 'url': url,
+ 'ie_key': 'Generic'
+ }
+
+ return {
+ '_type': 'url_transparent',
+ 'url': f'https://players.brightcove.net/{account}/{player_id}_{embed}/index.html?videoId={video_id}',
+ 'ie_key': 'BrightcoveNew'
+ }
diff --git a/yt_dlp/extractor/allocine.py b/yt_dlp/extractor/allocine.py
new file mode 100644
index 0000000..2d342cf
--- /dev/null
+++ b/yt_dlp/extractor/allocine.py
@@ -0,0 +1,125 @@
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ int_or_none,
+ qualities,
+ remove_end,
+ strip_or_none,
+ try_get,
+ unified_timestamp,
+ url_basename,
+)
+
+
+class AllocineIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?:article|video|film)/(?:fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=|video-)(?P<id>[0-9]+)(?:\.html)?'
+
+ _TESTS = [{
+ 'url': 'http://www.allocine.fr/article/fichearticle_gen_carticle=18635087.html',
+ 'md5': '0c9fcf59a841f65635fa300ac43d8269',
+ 'info_dict': {
+ 'id': '19546517',
+ 'display_id': '18635087',
+ 'ext': 'mp4',
+ 'title': 'Astérix - Le Domaine des Dieux Teaser VF',
+ 'description': 'md5:4a754271d9c6f16c72629a8a993ee884',
+ 'thumbnail': r're:http://.*\.jpg',
+ 'duration': 39,
+ 'timestamp': 1404273600,
+ 'upload_date': '20140702',
+ 'view_count': int,
+ },
+ }, {
+ 'url': 'http://www.allocine.fr/video/player_gen_cmedia=19540403&cfilm=222257.html',
+ 'md5': 'd0cdce5d2b9522ce279fdfec07ff16e0',
+ 'info_dict': {
+ 'id': '19540403',
+ 'display_id': '19540403',
+ 'ext': 'mp4',
+ 'title': 'Planes 2 Bande-annonce VF',
+ 'description': 'Regardez la bande annonce du film Planes 2 (Planes 2 Bande-annonce VF). Planes 2, un film de Roberts Gannaway',
+ 'thumbnail': r're:http://.*\.jpg',
+ 'duration': 69,
+ 'timestamp': 1385659800,
+ 'upload_date': '20131128',
+ 'view_count': int,
+ },
+ }, {
+ 'url': 'http://www.allocine.fr/video/player_gen_cmedia=19544709&cfilm=181290.html',
+ 'md5': '101250fb127ef9ca3d73186ff22a47ce',
+ 'info_dict': {
+ 'id': '19544709',
+ 'display_id': '19544709',
+ 'ext': 'mp4',
+ 'title': 'Dragons 2 - Bande annonce finale VF',
+ 'description': 'md5:6cdd2d7c2687d4c6aafe80a35e17267a',
+ 'thumbnail': r're:http://.*\.jpg',
+ 'duration': 144,
+ 'timestamp': 1397589900,
+ 'upload_date': '20140415',
+ 'view_count': int,
+ },
+ }, {
+ 'url': 'http://www.allocine.fr/video/video-19550147/',
+ 'md5': '3566c0668c0235e2d224fd8edb389f67',
+ 'info_dict': {
+ 'id': '19550147',
+ 'ext': 'mp4',
+ 'title': 'Faux Raccord N°123 - Les gaffes de Cliffhanger',
+ 'description': 'md5:bc734b83ffa2d8a12188d9eb48bb6354',
+ 'thumbnail': r're:http://.*\.jpg',
+ },
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ formats = []
+ quality = qualities(['ld', 'md', 'hd'])
+
+ model = self._html_search_regex(
+ r'data-model="([^"]+)"', webpage, 'data model', default=None)
+ if model:
+ model_data = self._parse_json(model, display_id)
+ video = model_data['videos'][0]
+ title = video['title']
+ for video_url in video['sources'].values():
+ video_id, format_id = url_basename(video_url).split('_')[:2]
+ formats.append({
+ 'format_id': format_id,
+ 'quality': quality(format_id),
+ 'url': video_url,
+ })
+ duration = int_or_none(video.get('duration'))
+ view_count = int_or_none(video.get('view_count'))
+ timestamp = unified_timestamp(try_get(
+ video, lambda x: x['added_at']['date'], compat_str))
+ else:
+ video_id = display_id
+ media_data = self._download_json(
+ 'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media=%s' % video_id, display_id)
+ title = remove_end(strip_or_none(self._html_extract_title(webpage), ' - AlloCiné'))
+ for key, value in media_data['video'].items():
+ if not key.endswith('Path'):
+ continue
+ format_id = key[:-len('Path')]
+ formats.append({
+ 'format_id': format_id,
+ 'quality': quality(format_id),
+ 'url': value,
+ })
+ duration, view_count, timestamp = [None] * 3
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': self._og_search_description(webpage),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'view_count': view_count,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/allstar.py b/yt_dlp/extractor/allstar.py
new file mode 100644
index 0000000..87219f2
--- /dev/null
+++ b/yt_dlp/extractor/allstar.py
@@ -0,0 +1,253 @@
+import functools
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ OnDemandPagedList,
+ int_or_none,
+ join_nonempty,
+ parse_qs,
+ urljoin,
+)
+from ..utils.traversal import traverse_obj
+
+
+_FIELDS = '''
+ _id
+ clipImageSource
+ clipImageThumb
+ clipLink
+ clipTitle
+ createdDate
+ shareId
+ user { _id }
+ username
+ views'''
+
+_EXTRA_FIELDS = '''
+ clipLength
+ clipSizeBytes'''
+
+_QUERIES = {
+ 'clip': '''query ($id: String!) {
+ video: getClip(clipIdentifier: $id) {
+ %s %s
+ }
+ }''' % (_FIELDS, _EXTRA_FIELDS),
+ 'montage': '''query ($id: String!) {
+ video: getMontage(clipIdentifier: $id) {
+ %s
+ }
+ }''' % _FIELDS,
+ 'Clips': '''query ($page: Int!, $user: String!, $game: Int) {
+ videos: clips(search: createdDate, page: $page, user: $user, mobile: false, game: $game) {
+ data { %s %s }
+ }
+ }''' % (_FIELDS, _EXTRA_FIELDS),
+ 'Montages': '''query ($page: Int!, $user: String!) {
+ videos: montages(search: createdDate, page: $page, user: $user) {
+ data { %s }
+ }
+ }''' % _FIELDS,
+ 'Mobile Clips': '''query ($page: Int!, $user: String!) {
+ videos: clips(search: createdDate, page: $page, user: $user, mobile: true) {
+ data { %s %s }
+ }
+ }''' % (_FIELDS, _EXTRA_FIELDS),
+}
+
+
+class AllstarBaseIE(InfoExtractor):
+ @staticmethod
+ def _parse_video_data(video_data):
+ def media_url_or_none(path):
+ return urljoin('https://media.allstar.gg/', path)
+
+ info = traverse_obj(video_data, {
+ 'id': ('_id', {str}),
+ 'display_id': ('shareId', {str}),
+ 'title': ('clipTitle', {str}),
+ 'url': ('clipLink', {media_url_or_none}),
+ 'thumbnails': (('clipImageThumb', 'clipImageSource'), {'url': {media_url_or_none}}),
+ 'duration': ('clipLength', {int_or_none}),
+ 'filesize': ('clipSizeBytes', {int_or_none}),
+ 'timestamp': ('createdDate', {functools.partial(int_or_none, scale=1000)}),
+ 'uploader': ('username', {str}),
+ 'uploader_id': ('user', '_id', {str}),
+ 'view_count': ('views', {int_or_none}),
+ })
+
+ if info.get('id') and info.get('url'):
+ basename = 'clip' if '/clips/' in info['url'] else 'montage'
+ info['webpage_url'] = f'https://allstar.gg/{basename}?{basename}={info["id"]}'
+
+ info.update({
+ 'extractor_key': AllstarIE.ie_key(),
+ 'extractor': AllstarIE.IE_NAME,
+ 'uploader_url': urljoin('https://allstar.gg/u/', info.get('uploader_id')),
+ })
+
+ return info
+
+ def _call_api(self, query, variables, path, video_id=None, note=None):
+ response = self._download_json(
+ 'https://a1.allstar.gg/graphql', video_id, note=note,
+ headers={'content-type': 'application/json'},
+ data=json.dumps({'variables': variables, 'query': query}).encode())
+
+ errors = traverse_obj(response, ('errors', ..., 'message', {str}))
+ if errors:
+ raise ExtractorError('; '.join(errors))
+
+ return traverse_obj(response, path)
+
+
+class AllstarIE(AllstarBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?allstar\.gg/(?P<type>(?:clip|montage))\?(?P=type)=(?P<id>[^/?#&]+)'
+
+ _TESTS = [{
+ 'url': 'https://allstar.gg/clip?clip=64482c2da9eec30008a67d1b',
+ 'info_dict': {
+ 'id': '64482c2da9eec30008a67d1b',
+ 'title': '4K on Inferno',
+ 'url': 'md5:66befb5381eef0c9456026386c25fa55',
+ 'thumbnail': r're:https://media\.allstar\.gg/.+\.(?:png|jpg)$',
+ 'uploader': 'chrk.',
+ 'ext': 'mp4',
+ 'duration': 20,
+ 'filesize': 21199257,
+ 'timestamp': 1682451501,
+ 'uploader_id': '62b8bdfc9021052f7905882d',
+ 'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d',
+ 'upload_date': '20230425',
+ 'view_count': int,
+ }
+ }, {
+ 'url': 'https://allstar.gg/clip?clip=8LJLY4JKB',
+ 'info_dict': {
+ 'id': '64a1ec6b887f4c0008dc50b8',
+ 'display_id': '8LJLY4JKB',
+ 'title': 'AK-47 3K on Mirage',
+ 'url': 'md5:dde224fd12f035c0e2529a4ae34c4283',
+ 'ext': 'mp4',
+ 'thumbnail': r're:https://media\.allstar\.gg/.+\.(?:png|jpg)$',
+ 'duration': 16,
+ 'filesize': 30175859,
+ 'timestamp': 1688333419,
+ 'uploader': 'cherokee',
+ 'uploader_id': '62b8bdfc9021052f7905882d',
+ 'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d',
+ 'upload_date': '20230702',
+ 'view_count': int,
+ }
+ }, {
+ 'url': 'https://allstar.gg/montage?montage=643e64089da7e9363e1fa66c',
+ 'info_dict': {
+ 'id': '643e64089da7e9363e1fa66c',
+ 'display_id': 'APQLGM2IMXW',
+ 'title': 'cherokee Rapid Fire Snipers Montage',
+ 'url': 'md5:a3ee356022115db2b27c81321d195945',
+ 'thumbnail': r're:https://media\.allstar\.gg/.+\.(?:png|jpg)$',
+ 'ext': 'mp4',
+ 'timestamp': 1681810448,
+ 'uploader': 'cherokee',
+ 'uploader_id': '62b8bdfc9021052f7905882d',
+ 'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d',
+ 'upload_date': '20230418',
+ 'view_count': int,
+ }
+ }, {
+ 'url': 'https://allstar.gg/montage?montage=RILJMH6QOS',
+ 'info_dict': {
+ 'id': '64a2697372ce3703de29e868',
+ 'display_id': 'RILJMH6QOS',
+ 'title': 'cherokee Rapid Fire Snipers Montage',
+ 'url': 'md5:d5672e6f88579730c2310a80fdbc4030',
+ 'thumbnail': r're:https://media\.allstar\.gg/.+\.(?:png|jpg)$',
+ 'ext': 'mp4',
+ 'timestamp': 1688365434,
+ 'uploader': 'cherokee',
+ 'uploader_id': '62b8bdfc9021052f7905882d',
+ 'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d',
+ 'upload_date': '20230703',
+ 'view_count': int,
+ }
+ }]
+
+ def _real_extract(self, url):
+ query_id, video_id = self._match_valid_url(url).group('type', 'id')
+
+ return self._parse_video_data(
+ self._call_api(
+ _QUERIES.get(query_id), {'id': video_id}, ('data', 'video'), video_id))
+
+
+class AllstarProfileIE(AllstarBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?allstar\.gg/(?:profile\?user=|u/)(?P<id>[^/?#&]+)'
+
+ _TESTS = [{
+ 'url': 'https://allstar.gg/profile?user=62b8bdfc9021052f7905882d',
+ 'info_dict': {
+ 'id': '62b8bdfc9021052f7905882d-clips',
+ 'title': 'cherokee - Clips',
+ },
+ 'playlist_mincount': 15
+ }, {
+ 'url': 'https://allstar.gg/u/cherokee?game=730&view=Clips',
+ 'info_dict': {
+ 'id': '62b8bdfc9021052f7905882d-clips-730',
+ 'title': 'cherokee - Clips - 730',
+ },
+ 'playlist_mincount': 15
+ }, {
+ 'url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d?view=Montages',
+ 'info_dict': {
+ 'id': '62b8bdfc9021052f7905882d-montages',
+ 'title': 'cherokee - Montages',
+ },
+ 'playlist_mincount': 4
+ }, {
+ 'url': 'https://allstar.gg/profile?user=cherokee&view=Mobile Clips',
+ 'info_dict': {
+ 'id': '62b8bdfc9021052f7905882d-mobile',
+ 'title': 'cherokee - Mobile Clips',
+ },
+ 'playlist_mincount': 1
+ }]
+
+ _PAGE_SIZE = 10
+
+ def _get_page(self, user_id, display_id, game, query, page_num):
+ page_num += 1
+
+ for video_data in self._call_api(
+ query, {
+ 'user': user_id,
+ 'page': page_num,
+ 'game': game,
+ }, ('data', 'videos', 'data'), display_id, f'Downloading page {page_num}'):
+ yield self._parse_video_data(video_data)
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ profile_data = self._download_json(
+ urljoin('https://api.allstar.gg/v1/users/profile/', display_id), display_id)
+ user_id = traverse_obj(profile_data, ('data', ('_id'), {str}))
+ if not user_id:
+ raise ExtractorError('Unable to extract the user id')
+
+ username = traverse_obj(profile_data, ('data', 'profile', ('username'), {str}))
+ url_query = parse_qs(url)
+ game = traverse_obj(url_query, ('game', 0, {int_or_none}))
+ query_id = traverse_obj(url_query, ('view', 0), default='Clips')
+
+ if query_id not in ('Clips', 'Montages', 'Mobile Clips'):
+ raise ExtractorError(f'Unsupported playlist URL type {query_id!r}')
+
+ return self.playlist_result(
+ OnDemandPagedList(
+ functools.partial(
+ self._get_page, user_id, display_id, game, _QUERIES.get(query_id)), self._PAGE_SIZE),
+ playlist_id=join_nonempty(user_id, query_id.lower().split()[0], game),
+ playlist_title=join_nonempty((username or display_id), query_id, game, delim=' - '))
diff --git a/yt_dlp/extractor/alphaporno.py b/yt_dlp/extractor/alphaporno.py
new file mode 100644
index 0000000..8d5b472
--- /dev/null
+++ b/yt_dlp/extractor/alphaporno.py
@@ -0,0 +1,75 @@
+from .common import InfoExtractor
+from ..utils import (
+ parse_iso8601,
+ parse_duration,
+ parse_filesize,
+ int_or_none,
+)
+
+
+class AlphaPornoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?alphaporno\.com/videos/(?P<id>[^/]+)'
+ _TEST = {
+ 'url': 'http://www.alphaporno.com/videos/sensual-striptease-porn-with-samantha-alexandra/',
+ 'md5': 'feb6d3bba8848cd54467a87ad34bd38e',
+ 'info_dict': {
+ 'id': '258807',
+ 'display_id': 'sensual-striptease-porn-with-samantha-alexandra',
+ 'ext': 'mp4',
+ 'title': 'Sensual striptease porn with Samantha Alexandra',
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'timestamp': 1418694611,
+ 'upload_date': '20141216',
+ 'duration': 387,
+ 'filesize_approx': 54120000,
+ 'tbr': 1145,
+ 'categories': list,
+ 'age_limit': 18,
+ }
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ video_id = self._search_regex(
+ r"video_id\s*:\s*'([^']+)'", webpage, 'video id', default=None)
+
+ video_url = self._search_regex(
+ r"video_url\s*:\s*'([^']+)'", webpage, 'video url')
+ ext = self._html_search_meta(
+ 'encodingFormat', webpage, 'ext', default='.mp4')[1:]
+
+ title = self._search_regex(
+ [r'<meta content="([^"]+)" itemprop="description">',
+ r'class="title" itemprop="name">([^<]+)<'],
+ webpage, 'title')
+ thumbnail = self._html_search_meta('thumbnail', webpage, 'thumbnail')
+ timestamp = parse_iso8601(self._html_search_meta(
+ 'uploadDate', webpage, 'upload date'))
+ duration = parse_duration(self._html_search_meta(
+ 'duration', webpage, 'duration'))
+ filesize_approx = parse_filesize(self._html_search_meta(
+ 'contentSize', webpage, 'file size'))
+ bitrate = int_or_none(self._html_search_meta(
+ 'bitrate', webpage, 'bitrate'))
+ categories = self._html_search_meta(
+ 'keywords', webpage, 'categories', default='').split(',')
+
+ age_limit = self._rta_search(webpage)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'url': video_url,
+ 'ext': ext,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'filesize_approx': filesize_approx,
+ 'tbr': bitrate,
+ 'categories': categories,
+ 'age_limit': age_limit,
+ }
diff --git a/yt_dlp/extractor/alsace20tv.py b/yt_dlp/extractor/alsace20tv.py
new file mode 100644
index 0000000..ea3332e
--- /dev/null
+++ b/yt_dlp/extractor/alsace20tv.py
@@ -0,0 +1,83 @@
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ dict_get,
+ get_element_by_class,
+ int_or_none,
+ unified_strdate,
+ url_or_none,
+)
+
+
+class Alsace20TVBaseIE(InfoExtractor):
+ def _extract_video(self, video_id, url=None):
+ info = self._download_json(
+ 'https://www.alsace20.tv/visionneuse/visio_v9_js.php?key=%s&habillage=0&mode=html' % (video_id, ),
+ video_id) or {}
+ title = info.get('titre')
+
+ formats = []
+ for res, fmt_url in (info.get('files') or {}).items():
+ formats.extend(
+ self._extract_smil_formats(fmt_url, video_id, fatal=False)
+ if '/smil:_' in fmt_url
+ else self._extract_mpd_formats(fmt_url, video_id, mpd_id=res, fatal=False))
+
+ webpage = (url and self._download_webpage(url, video_id, fatal=False)) or ''
+ thumbnail = url_or_none(dict_get(info, ('image', 'preview', )) or self._og_search_thumbnail(webpage))
+ upload_date = self._search_regex(r'/(\d{6})_', thumbnail, 'upload_date', default=None)
+ upload_date = unified_strdate('20%s-%s-%s' % (upload_date[:2], upload_date[2:4], upload_date[4:])) if upload_date else None
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'description': clean_html(get_element_by_class('wysiwyg', webpage)),
+ 'upload_date': upload_date,
+ 'thumbnail': thumbnail,
+ 'duration': int_or_none(self._og_search_property('video:duration', webpage) if webpage else None),
+ 'view_count': int_or_none(info.get('nb_vues')),
+ }
+
+
+class Alsace20TVIE(Alsace20TVBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?alsace20\.tv/(?:[\w-]+/)+[\w-]+-(?P<id>[\w]+)'
+ _TESTS = [{
+ 'url': 'https://www.alsace20.tv/VOD/Actu/JT/Votre-JT-jeudi-3-fevrier-lyNHCXpYJh.html',
+ 'info_dict': {
+ 'id': 'lyNHCXpYJh',
+ 'ext': 'mp4',
+ 'description': 'md5:fc0bc4a0692d3d2dba4524053de4c7b7',
+ 'title': 'Votre JT du jeudi 3 février',
+ 'upload_date': '20220203',
+ 'thumbnail': r're:https?://.+\.jpg',
+ 'duration': 1073,
+ 'view_count': int,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self._extract_video(video_id, url)
+
+
+class Alsace20TVEmbedIE(Alsace20TVBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?alsace20\.tv/emb/(?P<id>[\w]+)'
+ _TESTS = [{
+ 'url': 'https://www.alsace20.tv/emb/lyNHCXpYJh',
+ # 'md5': 'd91851bf9af73c0ad9b2cdf76c127fbb',
+ 'info_dict': {
+ 'id': 'lyNHCXpYJh',
+ 'ext': 'mp4',
+ 'title': 'Votre JT du jeudi 3 février',
+ 'upload_date': '20220203',
+ 'thumbnail': r're:https?://.+\.jpg',
+ 'view_count': int,
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self._extract_video(video_id)
diff --git a/yt_dlp/extractor/altcensored.py b/yt_dlp/extractor/altcensored.py
new file mode 100644
index 0000000..6878918
--- /dev/null
+++ b/yt_dlp/extractor/altcensored.py
@@ -0,0 +1,104 @@
+import re
+
+from .archiveorg import ArchiveOrgIE
+from .common import InfoExtractor
+from ..utils import (
+ InAdvancePagedList,
+ clean_html,
+ int_or_none,
+ orderedSet,
+ str_to_int,
+ urljoin,
+)
+
+
+class AltCensoredIE(InfoExtractor):
+ IE_NAME = 'altcensored'
+ _VALID_URL = r'https?://(?:www\.)?altcensored\.com/(?:watch\?v=|embed/)(?P<id>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'https://www.altcensored.com/watch?v=k0srjLSkga8',
+ 'info_dict': {
+ 'id': 'youtube-k0srjLSkga8',
+ 'ext': 'webm',
+ 'title': "QUELLES SONT LES CONSÉQUENCES DE L'HYPERSEXUALISATION DE LA SOCIÉTÉ ?",
+ 'display_id': 'k0srjLSkga8.webm',
+ 'release_date': '20180403',
+ 'creators': ['Virginie Vota'],
+ 'release_year': 2018,
+ 'upload_date': '20230318',
+ 'uploader': 'admin@altcensored.com',
+ 'description': 'md5:0b38a8fc04103579d5c1db10a247dc30',
+ 'timestamp': 1679161343,
+ 'track': 'k0srjLSkga8',
+ 'duration': 926.09,
+ 'thumbnail': 'https://archive.org/download/youtube-k0srjLSkga8/youtube-k0srjLSkga8.thumbs/k0srjLSkga8_000925.jpg',
+ 'view_count': int,
+ 'categories': ['News & Politics'],
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ category = clean_html(self._html_search_regex(
+ r'<a href="/category/\d+">([^<]+)</a>', webpage, 'category', default=None))
+
+ return {
+ '_type': 'url_transparent',
+ 'url': f'https://archive.org/details/youtube-{video_id}',
+ 'ie_key': ArchiveOrgIE.ie_key(),
+ 'view_count': str_to_int(self._html_search_regex(
+ r'YouTube Views:(?:\s|&nbsp;)*([\d,]+)', webpage, 'view count', default=None)),
+ 'categories': [category] if category else None,
+ }
+
+
+class AltCensoredChannelIE(InfoExtractor):
+ IE_NAME = 'altcensored:channel'
+ _VALID_URL = r'https?://(?:www\.)?altcensored\.com/channel/(?!page|table)(?P<id>[^/?#]+)'
+ _PAGE_SIZE = 24
+ _TESTS = [{
+ 'url': 'https://www.altcensored.com/channel/UCFPTO55xxHqFqkzRZHu4kcw',
+ 'info_dict': {
+ 'title': 'Virginie Vota',
+ 'id': 'UCFPTO55xxHqFqkzRZHu4kcw',
+ },
+ 'playlist_count': 85,
+ }, {
+ 'url': 'https://altcensored.com/channel/UC9CcJ96HKMWn0LZlcxlpFTw',
+ 'info_dict': {
+ 'title': 'yukikaze775',
+ 'id': 'UC9CcJ96HKMWn0LZlcxlpFTw',
+ },
+ 'playlist_count': 4,
+ }, {
+ 'url': 'https://altcensored.com/channel/UCfYbb7nga6-icsFWWgS-kWw',
+ 'info_dict': {
+ 'title': 'Mister Metokur',
+ 'id': 'UCfYbb7nga6-icsFWWgS-kWw',
+ },
+ 'playlist_count': 121,
+ }]
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+ webpage = self._download_webpage(
+ url, channel_id, 'Download channel webpage', 'Unable to get channel webpage')
+ title = self._html_search_meta('altcen_title', webpage, 'title', fatal=False)
+ page_count = int_or_none(self._html_search_regex(
+ r'<a[^>]+href="/channel/[\w-]+/page/(\d+)">(?:\1)</a>',
+ webpage, 'page count', default='1'))
+
+ def page_func(page_num):
+ page_num += 1
+ webpage = self._download_webpage(
+ f'https://altcensored.com/channel/{channel_id}/page/{page_num}',
+ channel_id, note=f'Downloading page {page_num}')
+
+ items = re.findall(r'<a[^>]+href="(/watch\?v=[^"]+)', webpage)
+ return [self.url_result(urljoin('https://www.altcensored.com', path), AltCensoredIE)
+ for path in orderedSet(items)]
+
+ return self.playlist_result(
+ InAdvancePagedList(page_func, page_count, self._PAGE_SIZE),
+ playlist_id=channel_id, playlist_title=title)
diff --git a/yt_dlp/extractor/alura.py b/yt_dlp/extractor/alura.py
new file mode 100644
index 0000000..bfe066b
--- /dev/null
+++ b/yt_dlp/extractor/alura.py
@@ -0,0 +1,167 @@
+import re
+
+from .common import InfoExtractor
+
+from ..compat import (
+ compat_urlparse,
+)
+
+from ..utils import (
+ urlencode_postdata,
+ urljoin,
+ int_or_none,
+ clean_html,
+ ExtractorError
+)
+
+
+class AluraIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:cursos\.)?alura\.com\.br/course/(?P<course_name>[^/]+)/task/(?P<id>\d+)'
+ _LOGIN_URL = 'https://cursos.alura.com.br/loginForm?urlAfterLogin=/loginForm'
+ _VIDEO_URL = 'https://cursos.alura.com.br/course/%s/task/%s/video'
+ _NETRC_MACHINE = 'alura'
+ _TESTS = [{
+ 'url': 'https://cursos.alura.com.br/course/clojure-mutabilidade-com-atoms-e-refs/task/60095',
+ 'info_dict': {
+ 'id': '60095',
+ 'ext': 'mp4',
+ 'title': 'Referências, ref-set e alter'
+ },
+ 'skip': 'Requires alura account credentials'},
+ {
+ # URL without video
+ 'url': 'https://cursos.alura.com.br/course/clojure-mutabilidade-com-atoms-e-refs/task/60098',
+ 'only_matching': True},
+ {
+ 'url': 'https://cursos.alura.com.br/course/fundamentos-market-digital/task/55219',
+ 'only_matching': True}
+ ]
+
+ def _real_extract(self, url):
+
+ course, video_id = self._match_valid_url(url)
+ video_url = self._VIDEO_URL % (course, video_id)
+
+ video_dict = self._download_json(video_url, video_id, 'Searching for videos')
+
+ if video_dict:
+ webpage = self._download_webpage(url, video_id)
+ video_title = clean_html(self._search_regex(
+ r'<span[^>]+class=(["\'])task-body-header-title-text\1[^>]*>(?P<title>[^<]+)',
+ webpage, 'title', group='title'))
+
+ formats = []
+ for video_obj in video_dict:
+ video_url_m3u8 = video_obj.get('link')
+ video_format = self._extract_m3u8_formats(
+ video_url_m3u8, None, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False)
+ for f in video_format:
+ m = re.search(r'^[\w \W]*-(?P<res>\w*).mp4[\W \w]*', f['url'])
+ if m:
+ if not f.get('height'):
+ f['height'] = int('720' if m.group('res') == 'hd' else '480')
+ formats.extend(video_format)
+
+ return {
+ 'id': video_id,
+ 'title': video_title,
+ "formats": formats
+ }
+
+ def _perform_login(self, username, password):
+
+ login_page = self._download_webpage(
+ self._LOGIN_URL, None, 'Downloading login popup')
+
+ def is_logged(webpage):
+ return any(re.search(p, webpage) for p in (
+ r'href=[\"|\']?/signout[\"|\']',
+ r'>Logout<'))
+
+ # already logged in
+ if is_logged(login_page):
+ return
+
+ login_form = self._hidden_inputs(login_page)
+
+ login_form.update({
+ 'username': username,
+ 'password': password,
+ })
+
+ post_url = self._search_regex(
+ r'<form[^>]+class=["|\']signin-form["|\'] action=["|\'](?P<url>.+?)["|\']', login_page,
+ 'post url', default=self._LOGIN_URL, group='url')
+
+ if not post_url.startswith('http'):
+ post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
+
+ response = self._download_webpage(
+ post_url, None, 'Logging in',
+ data=urlencode_postdata(login_form),
+ headers={'Content-Type': 'application/x-www-form-urlencoded'})
+
+ if not is_logged(response):
+ error = self._html_search_regex(
+ r'(?s)<p[^>]+class="alert-message[^"]*">(.+?)</p>',
+ response, 'error message', default=None)
+ if error:
+ raise ExtractorError('Unable to login: %s' % error, expected=True)
+ raise ExtractorError('Unable to log in')
+
+
+class AluraCourseIE(AluraIE): # XXX: Do not subclass from concrete IE
+
+ _VALID_URL = r'https?://(?:cursos\.)?alura\.com\.br/course/(?P<id>[^/]+)'
+ _LOGIN_URL = 'https://cursos.alura.com.br/loginForm?urlAfterLogin=/loginForm'
+ _NETRC_MACHINE = 'aluracourse'
+ _TESTS = [{
+ 'url': 'https://cursos.alura.com.br/course/clojure-mutabilidade-com-atoms-e-refs',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if AluraIE.suitable(url) else super(AluraCourseIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+
+ course_path = self._match_id(url)
+ webpage = self._download_webpage(url, course_path)
+
+ course_title = self._search_regex(
+ r'<h1.*?>(.*?)<strong>(?P<course_title>.*?)</strong></h[0-9]>', webpage,
+ 'course title', default=course_path, group='course_title')
+
+ entries = []
+ if webpage:
+ for path in re.findall(r'<a\b(?=[^>]* class="[^"]*(?<=[" ])courseSectionList-section[" ])(?=[^>]* href="([^"]*))', webpage):
+ page_url = urljoin(url, path)
+ section_path = self._download_webpage(page_url, course_path)
+ for path_video in re.findall(r'<a\b(?=[^>]* class="[^"]*(?<=[" ])task-menu-nav-item-link-VIDEO[" ])(?=[^>]* href="([^"]*))', section_path):
+ chapter = clean_html(
+ self._search_regex(
+ r'<h3[^>]+class=(["\'])task-menu-section-title-text\1[^>]*>(?P<chapter>[^<]+)',
+ section_path,
+ 'chapter',
+ group='chapter'))
+
+ chapter_number = int_or_none(
+ self._search_regex(
+ r'<span[^>]+class=(["\'])task-menu-section-title-number[^>]*>(.*?)<strong>(?P<chapter_number>[^<]+)</strong>',
+ section_path,
+ 'chapter number',
+ group='chapter_number'))
+ video_url = urljoin(url, path_video)
+
+ entry = {
+ '_type': 'url_transparent',
+ 'id': self._match_id(video_url),
+ 'url': video_url,
+ 'id_key': self.ie_key(),
+ 'chapter': chapter,
+ 'chapter_number': chapter_number
+ }
+ entries.append(entry)
+ return self.playlist_result(entries, course_path, course_title)
diff --git a/yt_dlp/extractor/amadeustv.py b/yt_dlp/extractor/amadeustv.py
new file mode 100644
index 0000000..2f5ca91
--- /dev/null
+++ b/yt_dlp/extractor/amadeustv.py
@@ -0,0 +1,77 @@
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ parse_iso8601,
+ url_or_none,
+)
+from ..utils.traversal import traverse_obj
+
+
+class AmadeusTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?amadeus\.tv/library/(?P<id>[\da-f]+)'
+ _TESTS = [{
+ 'url': 'http://www.amadeus.tv/library/65091a87ff85af59d9fc54c3',
+ 'info_dict': {
+ 'id': '5576678021301411311',
+ 'ext': 'mp4',
+ 'title': 'Jieon Park - 第五届珠海莫扎特国际青少年音乐周小提琴C组第三轮',
+ 'thumbnail': 'http://1253584441.vod2.myqcloud.com/a0046a27vodtransbj1253584441/7db4af535576678021301411311/coverBySnapshot_10_0.jpg',
+ 'duration': 1264.8,
+ 'upload_date': '20230918',
+ 'timestamp': 1695034800,
+ 'display_id': '65091a87ff85af59d9fc54c3',
+ 'view_count': int,
+ 'description': 'md5:a0357b9c215489e2067cbae0b777bb95',
+ }
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ nuxt_data = self._search_nuxt_data(webpage, display_id, traverse=('fetch', '0'))
+ video_id = traverse_obj(nuxt_data, ('item', 'video', {str}))
+
+ if not video_id:
+ raise ExtractorError('Unable to extract actual video ID')
+
+ video_data = self._download_json(
+ f'http://playvideo.qcloud.com/getplayinfo/v2/1253584441/{video_id}',
+ video_id, headers={'Referer': 'http://www.amadeus.tv/'})
+
+ formats = []
+ for video in traverse_obj(video_data, ('videoInfo', ('sourceVideo', ('transcodeList', ...)), {dict})):
+ if not url_or_none(video.get('url')):
+ continue
+ formats.append({
+ **traverse_obj(video, {
+ 'url': 'url',
+ 'format_id': ('definition', {lambda x: f'http-{x or "0"}'}),
+ 'width': ('width', {int_or_none}),
+ 'height': ('height', {int_or_none}),
+ 'filesize': (('totalSize', 'size'), {int_or_none}),
+ 'vcodec': ('videoStreamList', 0, 'codec'),
+ 'acodec': ('audioStreamList', 0, 'codec'),
+ 'fps': ('videoStreamList', 0, 'fps', {float_or_none}),
+ }, get_all=False),
+ 'http_headers': {'Referer': 'http://www.amadeus.tv/'},
+ })
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'formats': formats,
+ **traverse_obj(video_data, {
+ 'title': ('videoInfo', 'basicInfo', 'name', {str}),
+ 'thumbnail': ('coverInfo', 'coverUrl', {url_or_none}),
+ 'duration': ('videoInfo', 'sourceVideo', ('floatDuration', 'duration'), {float_or_none}),
+ }, get_all=False),
+ **traverse_obj(nuxt_data, ('item', {
+ 'title': (('title', 'title_en', 'title_cn'), {str}),
+ 'description': (('description', 'description_en', 'description_cn'), {str}),
+ 'timestamp': ('date', {parse_iso8601}),
+ 'view_count': ('view', {int_or_none}),
+ }), get_all=False),
+ }
diff --git a/yt_dlp/extractor/amara.py b/yt_dlp/extractor/amara.py
new file mode 100644
index 0000000..5018710
--- /dev/null
+++ b/yt_dlp/extractor/amara.py
@@ -0,0 +1,100 @@
+from .common import InfoExtractor
+from .youtube import YoutubeIE
+from .vimeo import VimeoIE
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+ update_url_query,
+)
+
+
+class AmaraIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?amara\.org/(?:\w+/)?videos/(?P<id>\w+)'
+ _TESTS = [{
+ # Youtube
+ 'url': 'https://amara.org/en/videos/jVx79ZKGK1ky/info/why-jury-trials-are-becoming-less-common/?tab=video',
+ 'md5': 'ea10daf2b6154b8c1ecf9922aca5e8ae',
+ 'info_dict': {
+ 'id': 'h6ZuVdvYnfE',
+ 'ext': 'mp4',
+ 'title': 'Why jury trials are becoming less common',
+ 'description': 'md5:a61811c319943960b6ab1c23e0cbc2c1',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'subtitles': dict,
+ 'upload_date': '20160813',
+ 'uploader': 'PBS NewsHour',
+ 'uploader_id': 'PBSNewsHour',
+ 'timestamp': 1549639570,
+ }
+ }, {
+ # Vimeo
+ 'url': 'https://amara.org/en/videos/kYkK1VUTWW5I/info/vimeo-at-ces-2011',
+ 'md5': '99392c75fa05d432a8f11df03612195e',
+ 'info_dict': {
+ 'id': '18622084',
+ 'ext': 'mov',
+ 'title': 'Vimeo at CES 2011!',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'subtitles': dict,
+ 'timestamp': 1294763658,
+ 'upload_date': '20110111',
+ 'uploader': 'Sam Morrill',
+ 'uploader_id': 'sammorrill'
+ }
+ }, {
+ # Direct Link
+ 'url': 'https://amara.org/en/videos/s8KL7I3jLmh6/info/the-danger-of-a-single-story/',
+ 'md5': 'd3970f08512738ee60c5807311ff5d3f',
+ 'info_dict': {
+ 'id': 's8KL7I3jLmh6',
+ 'ext': 'mp4',
+ 'title': 'The danger of a single story',
+ 'description': 'md5:d769b31139c3b8bb5be9177f62ea3f23',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'subtitles': dict,
+ 'upload_date': '20091007',
+ 'timestamp': 1254942511,
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ meta = self._download_json(
+ 'https://amara.org/api/videos/%s/' % video_id,
+ video_id, query={'format': 'json'})
+ title = meta['title']
+ video_url = meta['all_urls'][0]
+
+ subtitles = {}
+ for language in (meta.get('languages') or []):
+ subtitles_uri = language.get('subtitles_uri')
+ if not (subtitles_uri and language.get('published')):
+ continue
+ subtitle = subtitles.setdefault(language.get('code') or 'en', [])
+ for f in ('json', 'srt', 'vtt'):
+ subtitle.append({
+ 'ext': f,
+ 'url': update_url_query(subtitles_uri, {'format': f}),
+ })
+
+ info = {
+ 'url': video_url,
+ 'id': video_id,
+ 'subtitles': subtitles,
+ 'title': title,
+ 'description': meta.get('description'),
+ 'thumbnail': meta.get('thumbnail'),
+ 'duration': int_or_none(meta.get('duration')),
+ 'timestamp': parse_iso8601(meta.get('created')),
+ }
+
+ for ie in (YoutubeIE, VimeoIE):
+ if ie.suitable(video_url):
+ info.update({
+ '_type': 'url_transparent',
+ 'ie_key': ie.ie_key(),
+ })
+ break
+
+ return info
diff --git a/yt_dlp/extractor/amazon.py b/yt_dlp/extractor/amazon.py
new file mode 100644
index 0000000..a03f983
--- /dev/null
+++ b/yt_dlp/extractor/amazon.py
@@ -0,0 +1,170 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ clean_html,
+ float_or_none,
+ get_element_by_attribute,
+ get_element_by_class,
+ int_or_none,
+ js_to_json,
+ traverse_obj,
+ url_or_none,
+)
+
+
+class AmazonStoreIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?amazon\.(?:[a-z]{2,3})(?:\.[a-z]{2})?/(?:[^/]+/)?(?:dp|gp/product)/(?P<id>[^/&#$?]+)'
+
+ _TESTS = [{
+ 'url': 'https://www.amazon.co.uk/dp/B098XNCHLD/',
+ 'info_dict': {
+ 'id': 'B098XNCHLD',
+ 'title': str,
+ },
+ 'playlist_mincount': 1,
+ 'playlist': [{
+ 'info_dict': {
+ 'id': 'A1F83G8C2ARO7P',
+ 'ext': 'mp4',
+ 'title': 'mcdodo usb c cable 100W 5a',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 34,
+ },
+ }],
+ 'expected_warnings': ['Unable to extract data'],
+ }, {
+ 'url': 'https://www.amazon.in/Sony-WH-1000XM4-Cancelling-Headphones-Bluetooth/dp/B0863TXGM3',
+ 'info_dict': {
+ 'id': 'B0863TXGM3',
+ 'title': str,
+ },
+ 'playlist_mincount': 4,
+ 'expected_warnings': ['Unable to extract data'],
+ }, {
+ 'url': 'https://www.amazon.com/dp/B0845NXCXF/',
+ 'info_dict': {
+ 'id': 'B0845NXCXF',
+ 'title': str,
+ },
+ 'playlist-mincount': 1,
+ 'expected_warnings': ['Unable to extract data'],
+ }, {
+ 'url': 'https://www.amazon.es/Samsung-Smartphone-s-AMOLED-Quad-c%C3%A1mara-espa%C3%B1ola/dp/B08WX337PQ',
+ 'info_dict': {
+ 'id': 'B08WX337PQ',
+ 'title': str,
+ },
+ 'playlist_mincount': 1,
+ 'expected_warnings': ['Unable to extract data'],
+ }]
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+
+ for retry in self.RetryManager():
+ webpage = self._download_webpage(url, id)
+ try:
+ data_json = self._search_json(
+ r'var\s?obj\s?=\s?jQuery\.parseJSON\(\'', webpage, 'data', id,
+ transform_source=js_to_json)
+ except ExtractorError as e:
+ retry.error = e
+
+ entries = [{
+ 'id': video['marketPlaceID'],
+ 'url': video['url'],
+ 'title': video.get('title'),
+ 'thumbnail': video.get('thumbUrl') or video.get('thumb'),
+ 'duration': video.get('durationSeconds'),
+ 'height': int_or_none(video.get('videoHeight')),
+ 'width': int_or_none(video.get('videoWidth')),
+ } for video in (data_json.get('videos') or []) if video.get('isVideo') and video.get('url')]
+ return self.playlist_result(entries, playlist_id=id, playlist_title=data_json.get('title'))
+
+
+class AmazonReviewsIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?amazon\.(?:[a-z]{2,3})(?:\.[a-z]{2})?/gp/customer-reviews/(?P<id>[^/&#$?]+)'
+ _TESTS = [{
+ 'url': 'https://www.amazon.com/gp/customer-reviews/R10VE9VUSY19L3/ref=cm_cr_arp_d_rvw_ttl',
+ 'info_dict': {
+ 'id': 'R10VE9VUSY19L3',
+ 'ext': 'mp4',
+ 'title': 'Get squad #Suspicious',
+ 'description': 'md5:7012695052f440a1e064e402d87e0afb',
+ 'uploader': 'Kimberly Cronkright',
+ 'average_rating': 1.0,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'expected_warnings': ['Review body was not found in webpage'],
+ }, {
+ 'url': 'https://www.amazon.com/gp/customer-reviews/R10VE9VUSY19L3/ref=cm_cr_arp_d_rvw_ttl?language=es_US',
+ 'info_dict': {
+ 'id': 'R10VE9VUSY19L3',
+ 'ext': 'mp4',
+ 'title': 'Get squad #Suspicious',
+ 'description': 'md5:7012695052f440a1e064e402d87e0afb',
+ 'uploader': 'Kimberly Cronkright',
+ 'average_rating': 1.0,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'expected_warnings': ['Review body was not found in webpage'],
+ }, {
+ 'url': 'https://www.amazon.in/gp/customer-reviews/RV1CO8JN5VGXV/',
+ 'info_dict': {
+ 'id': 'RV1CO8JN5VGXV',
+ 'ext': 'mp4',
+ 'title': 'Not sure about its durability',
+ 'description': 'md5:1a252c106357f0a3109ebf37d2e87494',
+ 'uploader': 'Shoaib Gulzar',
+ 'average_rating': 2.0,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'expected_warnings': ['Review body was not found in webpage'],
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ for retry in self.RetryManager():
+ webpage = self._download_webpage(url, video_id)
+ review_body = get_element_by_attribute('data-hook', 'review-body', webpage)
+ if not review_body:
+ retry.error = ExtractorError('Review body was not found in webpage', expected=True)
+
+ formats, subtitles = [], {}
+
+ manifest_url = self._search_regex(
+ r'data-video-url="([^"]+)"', review_body, 'm3u8 url', default=None)
+ if url_or_none(manifest_url):
+ fmts, subtitles = self._extract_m3u8_formats_and_subtitles(
+ manifest_url, video_id, 'mp4', fatal=False)
+ formats.extend(fmts)
+
+ video_url = self._search_regex(
+ r'<input[^>]+\bvalue="([^"]+)"[^>]+\bclass="video-url"', review_body, 'mp4 url', default=None)
+ if url_or_none(video_url):
+ formats.append({
+ 'url': video_url,
+ 'ext': 'mp4',
+ 'format_id': 'http-mp4',
+ })
+
+ if not formats:
+ self.raise_no_formats('No video found for this customer review', expected=True)
+
+ return {
+ 'id': video_id,
+ 'title': (clean_html(get_element_by_attribute('data-hook', 'review-title', webpage))
+ or self._html_extract_title(webpage)),
+ 'description': clean_html(traverse_obj(re.findall(
+ r'<span(?:\s+class="cr-original-review-content")?>(.+?)</span>', review_body), -1)),
+ 'uploader': clean_html(get_element_by_class('a-profile-name', webpage)),
+ 'average_rating': float_or_none(clean_html(get_element_by_attribute(
+ 'data-hook', 'review-star-rating', webpage) or '').partition(' ')[0]),
+ 'thumbnail': self._search_regex(
+ r'data-thumbnail-url="([^"]+)"', review_body, 'thumbnail', default=None),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
diff --git a/yt_dlp/extractor/amazonminitv.py b/yt_dlp/extractor/amazonminitv.py
new file mode 100644
index 0000000..2c71c5e
--- /dev/null
+++ b/yt_dlp/extractor/amazonminitv.py
@@ -0,0 +1,294 @@
+import json
+
+from .common import InfoExtractor
+from ..utils import ExtractorError, int_or_none, traverse_obj, try_get
+
+
+class AmazonMiniTVBaseIE(InfoExtractor):
+ def _real_initialize(self):
+ self._download_webpage(
+ 'https://www.amazon.in/minitv', None,
+ note='Fetching guest session cookies')
+ AmazonMiniTVBaseIE.session_id = self._get_cookies('https://www.amazon.in')['session-id'].value
+
+ def _call_api(self, asin, data=None, note=None):
+ device = {'clientId': 'ATVIN', 'deviceLocale': 'en_GB'}
+ if data:
+ data['variables'].update({
+ 'contentType': 'VOD',
+ 'sessionIdToken': self.session_id,
+ **device,
+ })
+
+ resp = self._download_json(
+ f'https://www.amazon.in/minitv/api/web/{"graphql" if data else "prs"}',
+ asin, note=note, headers={
+ 'Content-Type': 'application/json',
+ 'currentpageurl': '/',
+ 'currentplatform': 'dWeb'
+ }, data=json.dumps(data).encode() if data else None,
+ query=None if data else {
+ 'deviceType': 'A1WMMUXPCUJL4N',
+ 'contentId': asin,
+ **device,
+ })
+
+ if resp.get('errors'):
+ raise ExtractorError(f'MiniTV said: {resp["errors"][0]["message"]}')
+ elif not data:
+ return resp
+ return resp['data'][data['operationName']]
+
+
+class AmazonMiniTVIE(AmazonMiniTVBaseIE):
+ _VALID_URL = r'(?:https?://(?:www\.)?amazon\.in/minitv/tp/|amazonminitv:(?:amzn1\.dv\.gti\.)?)(?P<id>[a-f0-9-]+)'
+ _TESTS = [{
+ 'url': 'https://www.amazon.in/minitv/tp/75fe3a75-b8fe-4499-8100-5c9424344840?referrer=https%3A%2F%2Fwww.amazon.in%2Fminitv',
+ 'info_dict': {
+ 'id': 'amzn1.dv.gti.75fe3a75-b8fe-4499-8100-5c9424344840',
+ 'ext': 'mp4',
+ 'title': 'May I Kiss You?',
+ 'language': 'Hindi',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
+ 'description': 'md5:a549bfc747973e04feb707833474e59d',
+ 'release_timestamp': 1644710400,
+ 'release_date': '20220213',
+ 'duration': 846,
+ 'chapters': 'count:2',
+ 'series': 'Couple Goals',
+ 'series_id': 'amzn1.dv.gti.56521d46-b040-4fd5-872e-3e70476a04b0',
+ 'season': 'Season 3',
+ 'season_number': 3,
+ 'season_id': 'amzn1.dv.gti.20331016-d9b9-4968-b991-c89fa4927a36',
+ 'episode': 'May I Kiss You?',
+ 'episode_number': 2,
+ 'episode_id': 'amzn1.dv.gti.75fe3a75-b8fe-4499-8100-5c9424344840',
+ },
+ }, {
+ 'url': 'https://www.amazon.in/minitv/tp/280d2564-584f-452f-9c98-7baf906e01ab?referrer=https%3A%2F%2Fwww.amazon.in%2Fminitv',
+ 'info_dict': {
+ 'id': 'amzn1.dv.gti.280d2564-584f-452f-9c98-7baf906e01ab',
+ 'ext': 'mp4',
+ 'title': 'Jahaan',
+ 'language': 'Hindi',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ 'description': 'md5:05eb765a77bf703f322f120ec6867339',
+ 'release_timestamp': 1647475200,
+ 'release_date': '20220317',
+ 'duration': 783,
+ 'chapters': [],
+ },
+ }, {
+ 'url': 'https://www.amazon.in/minitv/tp/280d2564-584f-452f-9c98-7baf906e01ab',
+ 'only_matching': True,
+ }, {
+ 'url': 'amazonminitv:amzn1.dv.gti.280d2564-584f-452f-9c98-7baf906e01ab',
+ 'only_matching': True,
+ }, {
+ 'url': 'amazonminitv:280d2564-584f-452f-9c98-7baf906e01ab',
+ 'only_matching': True,
+ }]
+
+ _GRAPHQL_QUERY_CONTENT = '''
+query content($sessionIdToken: String!, $deviceLocale: String, $contentId: ID!, $contentType: ContentType!, $clientId: String) {
+ content(
+ applicationContextInput: {deviceLocale: $deviceLocale, sessionIdToken: $sessionIdToken, clientId: $clientId}
+ contentId: $contentId
+ contentType: $contentType
+ ) {
+ contentId
+ name
+ ... on Episode {
+ contentId
+ vodType
+ name
+ images
+ description {
+ synopsis
+ contentLengthInSeconds
+ }
+ publicReleaseDateUTC
+ audioTracks
+ seasonId
+ seriesId
+ seriesName
+ seasonNumber
+ episodeNumber
+ timecode {
+ endCreditsTime
+ }
+ }
+ ... on MovieContent {
+ contentId
+ vodType
+ name
+ description {
+ synopsis
+ contentLengthInSeconds
+ }
+ images
+ publicReleaseDateUTC
+ audioTracks
+ }
+ }
+}'''
+
+ def _real_extract(self, url):
+ asin = f'amzn1.dv.gti.{self._match_id(url)}'
+ prs = self._call_api(asin, note='Downloading playback info')
+
+ formats, subtitles = [], {}
+ for type_, asset in prs['playbackAssets'].items():
+ if not traverse_obj(asset, 'manifestUrl'):
+ continue
+ if type_ == 'hls':
+ m3u8_fmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
+ asset['manifestUrl'], asin, ext='mp4', entry_protocol='m3u8_native',
+ m3u8_id=type_, fatal=False)
+ formats.extend(m3u8_fmts)
+ subtitles = self._merge_subtitles(subtitles, m3u8_subs)
+ elif type_ == 'dash':
+ mpd_fmts, mpd_subs = self._extract_mpd_formats_and_subtitles(
+ asset['manifestUrl'], asin, mpd_id=type_, fatal=False)
+ formats.extend(mpd_fmts)
+ subtitles = self._merge_subtitles(subtitles, mpd_subs)
+ else:
+ self.report_warning(f'Unknown asset type: {type_}')
+
+ title_info = self._call_api(
+ asin, note='Downloading title info', data={
+ 'operationName': 'content',
+ 'variables': {'contentId': asin},
+ 'query': self._GRAPHQL_QUERY_CONTENT,
+ })
+ credits_time = try_get(title_info, lambda x: x['timecode']['endCreditsTime'] / 1000)
+ is_episode = title_info.get('vodType') == 'EPISODE'
+
+ return {
+ 'id': asin,
+ 'title': title_info.get('name'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'language': traverse_obj(title_info, ('audioTracks', 0)),
+ 'thumbnails': [{
+ 'id': type_,
+ 'url': url,
+ } for type_, url in (title_info.get('images') or {}).items()],
+ 'description': traverse_obj(title_info, ('description', 'synopsis')),
+ 'release_timestamp': int_or_none(try_get(title_info, lambda x: x['publicReleaseDateUTC'] / 1000)),
+ 'duration': traverse_obj(title_info, ('description', 'contentLengthInSeconds')),
+ 'chapters': [{
+ 'start_time': credits_time,
+ 'title': 'End Credits',
+ }] if credits_time else [],
+ 'series': title_info.get('seriesName'),
+ 'series_id': title_info.get('seriesId'),
+ 'season_number': title_info.get('seasonNumber'),
+ 'season_id': title_info.get('seasonId'),
+ 'episode': title_info.get('name') if is_episode else None,
+ 'episode_number': title_info.get('episodeNumber'),
+ 'episode_id': asin if is_episode else None,
+ }
+
+
+class AmazonMiniTVSeasonIE(AmazonMiniTVBaseIE):
+ IE_NAME = 'amazonminitv:season'
+ _VALID_URL = r'amazonminitv:season:(?:amzn1\.dv\.gti\.)?(?P<id>[a-f0-9-]+)'
+ IE_DESC = 'Amazon MiniTV Season, "minitv:season:" prefix'
+ _TESTS = [{
+ 'url': 'amazonminitv:season:amzn1.dv.gti.0aa996eb-6a1b-4886-a342-387fbd2f1db0',
+ 'playlist_mincount': 6,
+ 'info_dict': {
+ 'id': 'amzn1.dv.gti.0aa996eb-6a1b-4886-a342-387fbd2f1db0',
+ },
+ }, {
+ 'url': 'amazonminitv:season:0aa996eb-6a1b-4886-a342-387fbd2f1db0',
+ 'only_matching': True,
+ }]
+
+ _GRAPHQL_QUERY = '''
+query getEpisodes($sessionIdToken: String!, $clientId: String, $episodeOrSeasonId: ID!, $deviceLocale: String) {
+ getEpisodes(
+ applicationContextInput: {sessionIdToken: $sessionIdToken, deviceLocale: $deviceLocale, clientId: $clientId}
+ episodeOrSeasonId: $episodeOrSeasonId
+ ) {
+ episodes {
+ ... on Episode {
+ contentId
+ name
+ images
+ seriesName
+ seasonId
+ seriesId
+ seasonNumber
+ episodeNumber
+ description {
+ synopsis
+ contentLengthInSeconds
+ }
+ publicReleaseDateUTC
+ }
+ }
+ }
+}
+'''
+
+ def _entries(self, asin):
+ season_info = self._call_api(
+ asin, note='Downloading season info', data={
+ 'operationName': 'getEpisodes',
+ 'variables': {'episodeOrSeasonId': asin},
+ 'query': self._GRAPHQL_QUERY,
+ })
+
+ for episode in season_info['episodes']:
+ yield self.url_result(
+ f'amazonminitv:{episode["contentId"]}', AmazonMiniTVIE, episode['contentId'])
+
+ def _real_extract(self, url):
+ asin = f'amzn1.dv.gti.{self._match_id(url)}'
+ return self.playlist_result(self._entries(asin), asin)
+
+
+class AmazonMiniTVSeriesIE(AmazonMiniTVBaseIE):
+ IE_NAME = 'amazonminitv:series'
+ _VALID_URL = r'amazonminitv:series:(?:amzn1\.dv\.gti\.)?(?P<id>[a-f0-9-]+)'
+ IE_DESC = 'Amazon MiniTV Series, "minitv:series:" prefix'
+ _TESTS = [{
+ 'url': 'amazonminitv:series:amzn1.dv.gti.56521d46-b040-4fd5-872e-3e70476a04b0',
+ 'playlist_mincount': 3,
+ 'info_dict': {
+ 'id': 'amzn1.dv.gti.56521d46-b040-4fd5-872e-3e70476a04b0',
+ },
+ }, {
+ 'url': 'amazonminitv:series:56521d46-b040-4fd5-872e-3e70476a04b0',
+ 'only_matching': True,
+ }]
+
+ _GRAPHQL_QUERY = '''
+query getSeasons($sessionIdToken: String!, $deviceLocale: String, $episodeOrSeasonOrSeriesId: ID!, $clientId: String) {
+ getSeasons(
+ applicationContextInput: {deviceLocale: $deviceLocale, sessionIdToken: $sessionIdToken, clientId: $clientId}
+ episodeOrSeasonOrSeriesId: $episodeOrSeasonOrSeriesId
+ ) {
+ seasons {
+ seasonId
+ }
+ }
+}
+'''
+
+ def _entries(self, asin):
+ season_info = self._call_api(
+ asin, note='Downloading series info', data={
+ 'operationName': 'getSeasons',
+ 'variables': {'episodeOrSeasonOrSeriesId': asin},
+ 'query': self._GRAPHQL_QUERY,
+ })
+
+ for season in season_info['seasons']:
+ yield self.url_result(f'amazonminitv:season:{season["seasonId"]}', AmazonMiniTVSeasonIE, season['seasonId'])
+
+ def _real_extract(self, url):
+ asin = f'amzn1.dv.gti.{self._match_id(url)}'
+ return self.playlist_result(self._entries(asin), asin)
diff --git a/yt_dlp/extractor/amcnetworks.py b/yt_dlp/extractor/amcnetworks.py
new file mode 100644
index 0000000..10bd021
--- /dev/null
+++ b/yt_dlp/extractor/amcnetworks.py
@@ -0,0 +1,147 @@
+import re
+
+from .theplatform import ThePlatformIE
+from ..utils import (
+ int_or_none,
+ parse_age_limit,
+ try_get,
+ update_url_query,
+)
+
+
+class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
+ _VALID_URL = r'https?://(?:www\.)?(?P<site>amc|bbcamerica|ifc|(?:we|sundance)tv)\.com/(?P<id>(?:movies|shows(?:/[^/]+)+)/[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.bbcamerica.com/shows/the-graham-norton-show/videos/tina-feys-adorable-airline-themed-family-dinner--51631',
+ 'info_dict': {
+ 'id': '4Lq1dzOnZGt0',
+ 'ext': 'mp4',
+ 'title': "The Graham Norton Show - Season 28 - Tina Fey's Adorable Airline-Themed Family Dinner",
+ 'description': "It turns out child stewardesses are very generous with the wine! All-new episodes of 'The Graham Norton Show' premiere Fridays at 11/10c on BBC America.",
+ 'upload_date': '20201120',
+ 'timestamp': 1605904350,
+ 'uploader': 'AMCN',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'skip': '404 Not Found',
+ }, {
+ 'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.amc.com/shows/preacher/full-episodes/season-01/episode-00/pilot',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.wetv.com/shows/million-dollar-matchmaker/season-01/episode-06-the-dumped-dj-and-shallow-hal',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.ifc.com/movies/chaos',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.bbcamerica.com/shows/doctor-who/full-episodes/the-power-of-the-daleks/episode-01-episode-1-color-version',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.wetv.com/shows/mama-june-from-not-to-hot/full-episode/season-01/thin-tervention',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.wetv.com/shows/la-hair/videos/season-05/episode-09-episode-9-2/episode-9-sneak-peek-3',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.sundancetv.com/shows/riviera/full-episodes/season-1/episode-01-episode-1',
+ 'only_matching': True,
+ }]
+ _REQUESTOR_ID_MAP = {
+ 'amc': 'AMC',
+ 'bbcamerica': 'BBCA',
+ 'ifc': 'IFC',
+ 'sundancetv': 'SUNDANCE',
+ 'wetv': 'WETV',
+ }
+
+ def _real_extract(self, url):
+ site, display_id = self._match_valid_url(url).groups()
+ requestor_id = self._REQUESTOR_ID_MAP[site]
+ page_data = self._download_json(
+ 'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/%s/url/%s'
+ % (requestor_id.lower(), display_id), display_id)['data']
+ properties = page_data.get('properties') or {}
+ query = {
+ 'mbr': 'true',
+ 'manifest': 'm3u',
+ }
+
+ video_player_count = 0
+ try:
+ for v in page_data['children']:
+ if v.get('type') == 'video-player':
+ releasePid = v['properties']['currentVideo']['meta']['releasePid']
+ tp_path = 'M_UwQC/' + releasePid
+ media_url = 'https://link.theplatform.com/s/' + tp_path
+ video_player_count += 1
+ except KeyError:
+ pass
+ if video_player_count > 1:
+ self.report_warning(
+ 'The JSON data has %d video players. Only one will be extracted' % video_player_count)
+
+ # Fall back to videoPid if releasePid not found.
+ # TODO: Fall back to videoPid if releasePid manifest uses DRM.
+ if not video_player_count:
+ tp_path = 'M_UwQC/media/' + properties['videoPid']
+ media_url = 'https://link.theplatform.com/s/' + tp_path
+
+ theplatform_metadata = self._download_theplatform_metadata(tp_path, display_id)
+ info = self._parse_theplatform_metadata(theplatform_metadata)
+ video_id = theplatform_metadata['pid']
+ title = theplatform_metadata['title']
+ rating = try_get(
+ theplatform_metadata, lambda x: x['ratings'][0]['rating'])
+ video_category = properties.get('videoCategory')
+ if video_category and video_category.endswith('-Auth'):
+ resource = self._get_mvpd_resource(
+ requestor_id, title, video_id, rating)
+ query['auth'] = self._extract_mvpd_auth(
+ url, video_id, requestor_id, resource)
+ media_url = update_url_query(media_url, query)
+ formats, subtitles = self._extract_theplatform_smil(
+ media_url, video_id)
+
+ thumbnails = []
+ thumbnail_urls = [properties.get('imageDesktop')]
+ if 'thumbnail' in info:
+ thumbnail_urls.append(info.pop('thumbnail'))
+ for thumbnail_url in thumbnail_urls:
+ if not thumbnail_url:
+ continue
+ mobj = re.search(r'(\d+)x(\d+)', thumbnail_url)
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'width': int(mobj.group(1)) if mobj else None,
+ 'height': int(mobj.group(2)) if mobj else None,
+ })
+
+ info.update({
+ 'age_limit': parse_age_limit(rating),
+ 'formats': formats,
+ 'id': video_id,
+ 'subtitles': subtitles,
+ 'thumbnails': thumbnails,
+ })
+ ns_keys = theplatform_metadata.get('$xmlns', {}).keys()
+ if ns_keys:
+ ns = list(ns_keys)[0]
+ episode = theplatform_metadata.get(ns + '$episodeTitle') or None
+ episode_number = int_or_none(
+ theplatform_metadata.get(ns + '$episode'))
+ season_number = int_or_none(
+ theplatform_metadata.get(ns + '$season'))
+ series = theplatform_metadata.get(ns + '$show') or None
+ info.update({
+ 'episode': episode,
+ 'episode_number': episode_number,
+ 'season_number': season_number,
+ 'series': series,
+ })
+ return info
diff --git a/yt_dlp/extractor/americastestkitchen.py b/yt_dlp/extractor/americastestkitchen.py
new file mode 100644
index 0000000..e889458
--- /dev/null
+++ b/yt_dlp/extractor/americastestkitchen.py
@@ -0,0 +1,215 @@
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ int_or_none,
+ try_get,
+ unified_strdate,
+ unified_timestamp,
+)
+
+
+class AmericasTestKitchenIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?(?:americastestkitchen|cooks(?:country|illustrated))\.com/(?:cooks(?:country|illustrated)/)?(?P<resource_type>episode|videos)/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.americastestkitchen.com/episode/582-weeknight-japanese-suppers',
+ 'md5': 'b861c3e365ac38ad319cfd509c30577f',
+ 'info_dict': {
+ 'id': '5b400b9ee338f922cb06450c',
+ 'title': 'Japanese Suppers',
+ 'ext': 'mp4',
+ 'display_id': 'weeknight-japanese-suppers',
+ 'description': 'md5:64e606bfee910627efc4b5f050de92b3',
+ 'timestamp': 1523304000,
+ 'upload_date': '20180409',
+ 'release_date': '20180409',
+ 'series': 'America\'s Test Kitchen',
+ 'season': 'Season 18',
+ 'episode': 'Japanese Suppers',
+ 'season_number': 18,
+ 'episode_number': 15,
+ 'duration': 1376,
+ 'thumbnail': r're:^https?://',
+ 'average_rating': 0,
+ 'view_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # Metadata parsing behaves differently for newer episodes (705) as opposed to older episodes (582 above)
+ 'url': 'https://www.americastestkitchen.com/episode/705-simple-chicken-dinner',
+ 'md5': '06451608c57651e985a498e69cec17e5',
+ 'info_dict': {
+ 'id': '5fbe8c61bda2010001c6763b',
+ 'title': 'Simple Chicken Dinner',
+ 'ext': 'mp4',
+ 'display_id': 'atktv_2103_simple-chicken-dinner_full-episode_web-mp4',
+ 'description': 'md5:eb68737cc2fd4c26ca7db30139d109e7',
+ 'timestamp': 1610737200,
+ 'upload_date': '20210115',
+ 'release_date': '20210115',
+ 'series': 'America\'s Test Kitchen',
+ 'season': 'Season 21',
+ 'episode': 'Simple Chicken Dinner',
+ 'season_number': 21,
+ 'episode_number': 3,
+ 'duration': 1397,
+ 'thumbnail': r're:^https?://',
+ 'view_count': int,
+ 'average_rating': 0,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.americastestkitchen.com/cookscountry/episode/564-when-only-chocolate-will-do',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.americastestkitchen.com/cooksillustrated/videos/4478-beef-wellington',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.cookscountry.com/episode/564-when-only-chocolate-will-do',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.cooksillustrated.com/videos/4478-beef-wellington',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ resource_type, video_id = self._match_valid_url(url).groups()
+ is_episode = resource_type == 'episode'
+ if is_episode:
+ resource_type = 'episodes'
+
+ resource = self._download_json(
+ 'https://www.americastestkitchen.com/api/v6/%s/%s' % (resource_type, video_id), video_id)
+ video = resource['video'] if is_episode else resource
+ episode = resource if is_episode else resource.get('episode') or {}
+
+ return {
+ '_type': 'url_transparent',
+ 'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % video['zypeId'],
+ 'ie_key': 'Zype',
+ 'description': clean_html(video.get('description')),
+ 'timestamp': unified_timestamp(video.get('publishDate')),
+ 'release_date': unified_strdate(video.get('publishDate')),
+ 'episode_number': int_or_none(episode.get('number')),
+ 'season_number': int_or_none(episode.get('season')),
+ 'series': try_get(episode, lambda x: x['show']['title']),
+ 'episode': episode.get('title'),
+ }
+
+
+class AmericasTestKitchenSeasonIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?(?P<show>americastestkitchen|(?P<cooks>cooks(?:country|illustrated)))\.com(?:(?:/(?P<show2>cooks(?:country|illustrated)))?(?:/?$|(?<!ated)(?<!ated\.com)/episodes/browse/season_(?P<season>\d+)))'
+ _TESTS = [{
+ # ATK Season
+ 'url': 'https://www.americastestkitchen.com/episodes/browse/season_1',
+ 'info_dict': {
+ 'id': 'season_1',
+ 'title': 'Season 1',
+ },
+ 'playlist_count': 13,
+ }, {
+ # Cooks Country Season
+ 'url': 'https://www.americastestkitchen.com/cookscountry/episodes/browse/season_12',
+ 'info_dict': {
+ 'id': 'season_12',
+ 'title': 'Season 12',
+ },
+ 'playlist_count': 13,
+ }, {
+ # America's Test Kitchen Series
+ 'url': 'https://www.americastestkitchen.com/',
+ 'info_dict': {
+ 'id': 'americastestkitchen',
+ 'title': 'America\'s Test Kitchen',
+ },
+ 'playlist_count': 558,
+ }, {
+ # Cooks Country Series
+ 'url': 'https://www.americastestkitchen.com/cookscountry',
+ 'info_dict': {
+ 'id': 'cookscountry',
+ 'title': 'Cook\'s Country',
+ },
+ 'playlist_count': 199,
+ }, {
+ 'url': 'https://www.americastestkitchen.com/cookscountry/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.cookscountry.com/episodes/browse/season_12',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.cookscountry.com',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.americastestkitchen.com/cooksillustrated/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.cooksillustrated.com',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ season_number, show1, show = self._match_valid_url(url).group('season', 'show', 'show2')
+ show_path = ('/' + show) if show else ''
+ show = show or show1
+ season_number = int_or_none(season_number)
+
+ slug, title = {
+ 'americastestkitchen': ('atk', 'America\'s Test Kitchen'),
+ 'cookscountry': ('cco', 'Cook\'s Country'),
+ 'cooksillustrated': ('cio', 'Cook\'s Illustrated'),
+ }[show]
+
+ facet_filters = [
+ 'search_document_klass:episode',
+ 'search_show_slug:' + slug,
+ ]
+
+ if season_number:
+ playlist_id = 'season_%d' % season_number
+ playlist_title = 'Season %d' % season_number
+ facet_filters.append('search_season_list:' + playlist_title)
+ else:
+ playlist_id = show
+ playlist_title = title
+
+ season_search = self._download_json(
+ 'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug,
+ playlist_id, headers={
+ 'Origin': 'https://www.americastestkitchen.com',
+ 'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805',
+ 'X-Algolia-Application-Id': 'Y1FNZXUI30',
+ }, query={
+ 'facetFilters': json.dumps(facet_filters),
+ 'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title,search_atk_episode_season' % slug,
+ 'attributesToHighlight': '',
+ 'hitsPerPage': 1000,
+ })
+
+ def entries():
+ for episode in (season_search.get('hits') or []):
+ search_url = episode.get('search_url') # always formatted like '/episode/123-title-of-episode'
+ if not search_url:
+ continue
+ yield {
+ '_type': 'url',
+ 'url': f'https://www.americastestkitchen.com{show_path or ""}{search_url}',
+ 'id': try_get(episode, lambda e: e['objectID'].split('_')[-1]),
+ 'title': episode.get('title'),
+ 'description': episode.get('description'),
+ 'timestamp': unified_timestamp(episode.get('search_document_date')),
+ 'season_number': season_number,
+ 'episode_number': int_or_none(episode.get('search_%s_episode_number' % slug)),
+ 'ie_key': AmericasTestKitchenIE.ie_key(),
+ }
+
+ return self.playlist_result(
+ entries(), playlist_id, playlist_title)
diff --git a/yt_dlp/extractor/amp.py b/yt_dlp/extractor/amp.py
new file mode 100644
index 0000000..0d259c5
--- /dev/null
+++ b/yt_dlp/extractor/amp.py
@@ -0,0 +1,101 @@
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ mimetype2ext,
+ parse_iso8601,
+ strip_jsonp,
+ unified_timestamp,
+ url_or_none,
+)
+
+
+class AMPIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
+ # parse Akamai Adaptive Media Player feed
+ def _extract_feed_info(self, url):
+ feed = self._download_json(
+ url, None, 'Downloading Akamai AMP feed',
+ 'Unable to download Akamai AMP feed', transform_source=strip_jsonp)
+ item = feed.get('channel', {}).get('item')
+ if not item:
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, feed['error']))
+
+ video_id = item['guid']
+
+ def get_media_node(name, default=None):
+ media_name = 'media-%s' % name
+ media_group = item.get('media-group') or item
+ return media_group.get(media_name) or item.get(media_name) or item.get(name, default)
+
+ thumbnails = []
+ media_thumbnail = get_media_node('thumbnail')
+ if media_thumbnail:
+ if isinstance(media_thumbnail, dict):
+ media_thumbnail = [media_thumbnail]
+ for thumbnail_data in media_thumbnail:
+ thumbnail = thumbnail_data.get('@attributes', {})
+ thumbnail_url = url_or_none(thumbnail.get('url'))
+ if not thumbnail_url:
+ continue
+ thumbnails.append({
+ 'url': self._proto_relative_url(thumbnail_url, 'http:'),
+ 'width': int_or_none(thumbnail.get('width')),
+ 'height': int_or_none(thumbnail.get('height')),
+ })
+
+ subtitles = {}
+ media_subtitle = get_media_node('subTitle')
+ if media_subtitle:
+ if isinstance(media_subtitle, dict):
+ media_subtitle = [media_subtitle]
+ for subtitle_data in media_subtitle:
+ subtitle = subtitle_data.get('@attributes', {})
+ subtitle_href = url_or_none(subtitle.get('href'))
+ if not subtitle_href:
+ continue
+ subtitles.setdefault(subtitle.get('lang') or 'en', []).append({
+ 'url': subtitle_href,
+ 'ext': mimetype2ext(subtitle.get('type')) or determine_ext(subtitle_href),
+ })
+
+ formats = []
+ media_content = get_media_node('content')
+ if isinstance(media_content, dict):
+ media_content = [media_content]
+ for media_data in media_content:
+ media = media_data.get('@attributes', {})
+ media_url = url_or_none(media.get('url'))
+ if not media_url:
+ continue
+ ext = mimetype2ext(media.get('type')) or determine_ext(media_url)
+ if ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ media_url + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124',
+ video_id, f4m_id='hds', fatal=False))
+ elif ext == 'm3u8':
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ media_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+ else:
+ formats.append({
+ 'format_id': media_data.get('media-category', {}).get('@attributes', {}).get('label'),
+ 'url': media_url,
+ 'tbr': int_or_none(media.get('bitrate')),
+ 'filesize': int_or_none(media.get('fileSize')),
+ 'ext': ext,
+ })
+
+ timestamp = unified_timestamp(item.get('pubDate'), ' ') or parse_iso8601(item.get('dc-date'))
+
+ return {
+ 'id': video_id,
+ 'title': get_media_node('title'),
+ 'description': get_media_node('description'),
+ 'thumbnails': thumbnails,
+ 'timestamp': timestamp,
+ 'duration': int_or_none(media_content[0].get('@attributes', {}).get('duration')),
+ 'subtitles': subtitles,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/anchorfm.py b/yt_dlp/extractor/anchorfm.py
new file mode 100644
index 0000000..52f2ad0
--- /dev/null
+++ b/yt_dlp/extractor/anchorfm.py
@@ -0,0 +1,98 @@
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ float_or_none,
+ int_or_none,
+ str_or_none,
+ traverse_obj,
+ unified_timestamp
+)
+
+
+class AnchorFMEpisodeIE(InfoExtractor):
+ _VALID_URL = r'https?://anchor\.fm/(?P<channel_name>\w+)/(?:embed/)?episodes/[\w-]+-(?P<episode_id>\w+)'
+ _EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL})']
+ _TESTS = [{
+ 'url': 'https://anchor.fm/lovelyti/episodes/Chrisean-Rock-takes-to-twitter-to-announce-shes-pregnant--Blueface-denies-he-is-the-father-e1tpt3d',
+ 'info_dict': {
+ 'id': 'e1tpt3d',
+ 'ext': 'mp3',
+ 'title': ' Chrisean Rock takes to twitter to announce she\'s pregnant, Blueface denies he is the father!',
+ 'description': 'md5:207d167de3e28ceb4ddc1ebf5a30044c',
+ 'thumbnail': 'https://s3-us-west-2.amazonaws.com/anchor-generated-image-bank/production/podcast_uploaded_nologo/1034827/1034827-1658438968460-5f3bfdf3601e8.jpg',
+ 'duration': 624.718,
+ 'uploader': 'Lovelyti ',
+ 'uploader_id': '991541',
+ 'channel': 'lovelyti',
+ 'modified_date': '20230121',
+ 'modified_timestamp': 1674285178,
+ 'release_date': '20230121',
+ 'release_timestamp': 1674285179,
+ 'episode_id': 'e1tpt3d',
+ }
+ }, {
+ # embed url
+ 'url': 'https://anchor.fm/apakatatempo/embed/episodes/S2E75-Perang-Bintang-di-Balik-Kasus-Ferdy-Sambo-dan-Ismail-Bolong-e1shjqd',
+ 'info_dict': {
+ 'id': 'e1shjqd',
+ 'ext': 'mp3',
+ 'title': 'S2E75 Perang Bintang di Balik Kasus Ferdy Sambo dan Ismail Bolong',
+ 'description': 'md5:9e95ad9293bf00178bf8d33e9cb92c41',
+ 'duration': 1042.008,
+ 'thumbnail': 'https://s3-us-west-2.amazonaws.com/anchor-generated-image-bank/production/podcast_uploaded_episode400/2627805/2627805-1671590688729-4db3882ac9e4b.jpg',
+ 'release_date': '20221221',
+ 'release_timestamp': 1671595916,
+ 'modified_date': '20221221',
+ 'modified_timestamp': 1671590834,
+ 'channel': 'apakatatempo',
+ 'uploader': 'Podcast Tempo',
+ 'uploader_id': '2585461',
+ 'season': 'Season 2',
+ 'season_number': 2,
+ 'episode_id': 'e1shjqd',
+ }
+ }]
+
+ _WEBPAGE_TESTS = [{
+ 'url': 'https://podcast.tempo.co/podcast/192/perang-bintang-di-balik-kasus-ferdy-sambo-dan-ismail-bolong',
+ 'info_dict': {
+ 'id': 'e1shjqd',
+ 'ext': 'mp3',
+ 'release_date': '20221221',
+ 'duration': 1042.008,
+ 'season': 'Season 2',
+ 'modified_timestamp': 1671590834,
+ 'uploader_id': '2585461',
+ 'modified_date': '20221221',
+ 'description': 'md5:9e95ad9293bf00178bf8d33e9cb92c41',
+ 'season_number': 2,
+ 'title': 'S2E75 Perang Bintang di Balik Kasus Ferdy Sambo dan Ismail Bolong',
+ 'release_timestamp': 1671595916,
+ 'episode_id': 'e1shjqd',
+ 'thumbnail': 'https://s3-us-west-2.amazonaws.com/anchor-generated-image-bank/production/podcast_uploaded_episode400/2627805/2627805-1671590688729-4db3882ac9e4b.jpg',
+ 'uploader': 'Podcast Tempo',
+ 'channel': 'apakatatempo',
+ }
+ }]
+
+ def _real_extract(self, url):
+ channel_name, episode_id = self._match_valid_url(url).group('channel_name', 'episode_id')
+ api_data = self._download_json(f'https://anchor.fm/api/v3/episodes/{episode_id}', episode_id)
+
+ return {
+ 'id': episode_id,
+ 'title': traverse_obj(api_data, ('episode', 'title')),
+ 'url': traverse_obj(api_data, ('episode', 'episodeEnclosureUrl'), ('episodeAudios', 0, 'url')),
+ 'ext': 'mp3',
+ 'vcodec': 'none',
+ 'thumbnail': traverse_obj(api_data, ('episode', 'episodeImage')),
+ 'description': clean_html(traverse_obj(api_data, ('episode', ('description', 'descriptionPreview')), get_all=False)),
+ 'duration': float_or_none(traverse_obj(api_data, ('episode', 'duration')), 1000),
+ 'modified_timestamp': unified_timestamp(traverse_obj(api_data, ('episode', 'modified'))),
+ 'release_timestamp': int_or_none(traverse_obj(api_data, ('episode', 'publishOnUnixTimestamp'))),
+ 'episode_id': episode_id,
+ 'uploader': traverse_obj(api_data, ('creator', 'name')),
+ 'uploader_id': str_or_none(traverse_obj(api_data, ('creator', 'userId'))),
+ 'season_number': int_or_none(traverse_obj(api_data, ('episode', 'podcastSeasonNumber'))),
+ 'channel': channel_name or traverse_obj(api_data, ('creator', 'vanitySlug')),
+ }
diff --git a/yt_dlp/extractor/angel.py b/yt_dlp/extractor/angel.py
new file mode 100644
index 0000000..306b365
--- /dev/null
+++ b/yt_dlp/extractor/angel.py
@@ -0,0 +1,56 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import url_or_none, merge_dicts
+
+
+class AngelIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?angel\.com/watch/(?P<series>[^/?#]+)/episode/(?P<id>[\w-]+)/season-(?P<season_number>\d+)/episode-(?P<episode_number>\d+)/(?P<title>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'https://www.angel.com/watch/tuttle-twins/episode/2f3d0382-ea82-4cdc-958e-84fbadadc710/season-1/episode-1/when-laws-give-you-lemons',
+ 'md5': '4734e5cfdd64a568e837246aa3eaa524',
+ 'info_dict': {
+ 'id': '2f3d0382-ea82-4cdc-958e-84fbadadc710',
+ 'ext': 'mp4',
+ 'title': 'Tuttle Twins Season 1, Episode 1: When Laws Give You Lemons',
+ 'description': 'md5:73b704897c20ab59c433a9c0a8202d5e',
+ 'thumbnail': r're:^https?://images.angelstudios.com/image/upload/angel-app/.*$',
+ 'duration': 1359.0
+ }
+ }, {
+ 'url': 'https://www.angel.com/watch/the-chosen/episode/8dfb714d-bca5-4812-8125-24fb9514cd10/season-1/episode-1/i-have-called-you-by-name',
+ 'md5': 'e4774bad0a5f0ad2e90d175cafdb797d',
+ 'info_dict': {
+ 'id': '8dfb714d-bca5-4812-8125-24fb9514cd10',
+ 'ext': 'mp4',
+ 'title': 'The Chosen Season 1, Episode 1: I Have Called You By Name',
+ 'description': 'md5:aadfb4827a94415de5ff6426e6dee3be',
+ 'thumbnail': r're:^https?://images.angelstudios.com/image/upload/angel-app/.*$',
+ 'duration': 3276.0
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ json_ld = self._search_json_ld(webpage, video_id)
+
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ json_ld.pop('url'), video_id, note='Downloading HD m3u8 information')
+
+ info_dict = {
+ 'id': video_id,
+ 'title': self._og_search_title(webpage),
+ 'description': self._og_search_description(webpage),
+ 'formats': formats,
+ 'subtitles': subtitles
+ }
+
+ # Angel uses cloudinary in the background and supports image transformations.
+ # We remove these transformations and return the source file
+ base_thumbnail_url = url_or_none(self._og_search_thumbnail(webpage)) or json_ld.pop('thumbnails')
+ if base_thumbnail_url:
+ info_dict['thumbnail'] = re.sub(r'(/upload)/.+(/angel-app/.+)$', r'\1\2', base_thumbnail_url)
+
+ return merge_dicts(info_dict, json_ld)
diff --git a/yt_dlp/extractor/antenna.py b/yt_dlp/extractor/antenna.py
new file mode 100644
index 0000000..2929d65
--- /dev/null
+++ b/yt_dlp/extractor/antenna.py
@@ -0,0 +1,143 @@
+import urllib.parse
+
+from .common import InfoExtractor
+from ..networking import HEADRequest
+from ..utils import (
+ ExtractorError,
+ determine_ext,
+ make_archive_id,
+ scale_thumbnails_to_max_format_width,
+)
+
+
+class AntennaBaseIE(InfoExtractor):
+ def _download_and_extract_api_data(self, video_id, netloc, cid=None):
+ info = self._download_json(f'{self.http_scheme()}//{netloc}{self._API_PATH}',
+ video_id, query={'cid': cid or video_id})
+ if not info.get('url'):
+ raise ExtractorError(f'No source found for {video_id}')
+
+ ext = determine_ext(info['url'])
+ if ext == 'm3u8':
+ formats, subs = self._extract_m3u8_formats_and_subtitles(info['url'], video_id, 'mp4')
+ else:
+ formats, subs = [{'url': info['url'], 'format_id': ext}], {}
+
+ thumbnails = scale_thumbnails_to_max_format_width(
+ formats, [{'url': info['thumb']}], r'(?<=/imgHandler/)\d+') if info.get('thumb') else []
+ return {
+ 'id': video_id,
+ 'title': info.get('title'),
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ 'subtitles': subs,
+ }
+
+
+class AntennaGrWatchIE(AntennaBaseIE):
+ IE_NAME = 'antenna:watch'
+ IE_DESC = 'antenna.gr and ant1news.gr videos'
+ _VALID_URL = r'https?://(?P<netloc>(?:www\.)?(?:antenna|ant1news)\.gr)/watch/(?P<id>\d+)/'
+ _API_PATH = '/templates/data/player'
+
+ _TESTS = [{
+ 'url': 'https://www.ant1news.gr/watch/1506168/ant1-news-09112021-stis-18-45',
+ 'md5': 'c472d9dd7cd233c63aff2ea42201cda6',
+ 'info_dict': {
+ 'id': '1506168',
+ 'ext': 'mp4',
+ 'title': 'md5:0ad00fa66ecf8aa233d26ab0dba7514a',
+ 'description': 'md5:18665af715a6dcfeac1d6153a44f16b0',
+ 'thumbnail': r're:https://ant1media\.azureedge\.net/imgHandler/\d+/26d46bf6-8158-4f02-b197-7096c714b2de\.jpg',
+ },
+ }, {
+ 'url': 'https://www.antenna.gr/watch/1643812/oi-prodotes-epeisodio-01',
+ 'md5': '8f6f7dd3b1dba4d835ba990e25f31243',
+ 'info_dict': {
+ 'id': '1643812',
+ 'ext': 'mp4',
+ 'format_id': 'mp4',
+ 'title': 'ΟΙ ΠΡΟΔΟΤΕΣ – ΕΠΕΙΣΟΔΙΟ 01',
+ 'thumbnail': r're:https://ant1media\.azureedge\.net/imgHandler/\d+/b3d63096-e72d-43c4-87a0-00d4363d242f\.jpg',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id, netloc = self._match_valid_url(url).group('id', 'netloc')
+ webpage = self._download_webpage(url, video_id)
+ info = self._download_and_extract_api_data(video_id, netloc)
+ info['description'] = self._og_search_description(webpage, default=None)
+ info['_old_archive_ids'] = [make_archive_id('Ant1NewsGrWatch', video_id)]
+ return info
+
+
+class Ant1NewsGrArticleIE(AntennaBaseIE):
+ IE_NAME = 'ant1newsgr:article'
+ IE_DESC = 'ant1news.gr articles'
+ _VALID_URL = r'https?://(?:www\.)?ant1news\.gr/[^/]+/article/(?P<id>\d+)/'
+
+ _TESTS = [{
+ 'url': 'https://www.ant1news.gr/afieromata/article/549468/o-tzeims-mpont-sta-meteora-oi-apeiles-kai-o-xesikomos-ton-kalogeron',
+ 'md5': '57eb8d12181f0fa2b14b0b138e1de9b6',
+ 'info_dict': {
+ 'id': '_xvg/m_cmbatw=',
+ 'ext': 'mp4',
+ 'title': 'md5:a93e8ecf2e4073bfdffcb38f59945411',
+ 'timestamp': 1666166520,
+ 'upload_date': '20221019',
+ 'thumbnail': 'https://ant1media.azureedge.net/imgHandler/1920/756206d2-d640-40e2-b201-3555abdfc0db.jpg',
+ },
+ }, {
+ 'url': 'https://ant1news.gr/Society/article/620286/symmoria-anilikon-dikigoros-thymaton-ithelan-na-toys-apoteleiosoyn',
+ 'info_dict': {
+ 'id': '620286',
+ 'title': 'md5:91fe569e952e4d146485740ae927662b',
+ },
+ 'playlist_mincount': 2,
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ info = self._search_json_ld(webpage, video_id, expected_type='NewsArticle')
+ embed_urls = list(Ant1NewsGrEmbedIE._extract_embed_urls(url, webpage))
+ if not embed_urls:
+ raise ExtractorError('no videos found for %s' % video_id, expected=True)
+ return self.playlist_from_matches(
+ embed_urls, video_id, info.get('title'), ie=Ant1NewsGrEmbedIE.ie_key(),
+ video_kwargs={'url_transparent': True, 'timestamp': info.get('timestamp')})
+
+
+class Ant1NewsGrEmbedIE(AntennaBaseIE):
+ IE_NAME = 'ant1newsgr:embed'
+ IE_DESC = 'ant1news.gr embedded videos'
+ _BASE_PLAYER_URL_RE = r'(?:https?:)?//(?:[a-zA-Z0-9\-]+\.)?(?:antenna|ant1news)\.gr/templates/pages/player'
+ _VALID_URL = rf'{_BASE_PLAYER_URL_RE}\?([^#]+&)?cid=(?P<id>[^#&]+)'
+ _EMBED_REGEX = [rf'<iframe[^>]+?src=(?P<_q1>["\'])(?P<url>{_BASE_PLAYER_URL_RE}\?(?:(?!(?P=_q1)).)+)(?P=_q1)']
+ _API_PATH = '/templates/data/jsonPlayer'
+
+ _TESTS = [{
+ 'url': 'https://www.antenna.gr/templates/pages/player?cid=3f_li_c_az_jw_y_u=&w=670&h=377',
+ 'md5': 'dfc58c3a11a5a9aad2ba316ed447def3',
+ 'info_dict': {
+ 'id': '3f_li_c_az_jw_y_u=',
+ 'ext': 'mp4',
+ 'title': 'md5:a30c93332455f53e1e84ae0724f0adf7',
+ 'thumbnail': 'https://ant1media.azureedge.net/imgHandler/640/bbe31201-3f09-4a4e-87f5-8ad2159fffe2.jpg',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ canonical_url = self._request_webpage(
+ HEADRequest(url), video_id,
+ note='Resolve canonical player URL',
+ errnote='Could not resolve canonical player URL').url
+ _, netloc, _, _, query, _ = urllib.parse.urlparse(canonical_url)
+ cid = urllib.parse.parse_qs(query)['cid'][0]
+
+ return self._download_and_extract_api_data(video_id, netloc, cid=cid)
diff --git a/yt_dlp/extractor/anvato.py b/yt_dlp/extractor/anvato.py
new file mode 100644
index 0000000..0df5033
--- /dev/null
+++ b/yt_dlp/extractor/anvato.py
@@ -0,0 +1,404 @@
+import base64
+import hashlib
+import json
+import random
+import re
+import time
+
+from .common import InfoExtractor
+from ..aes import aes_encrypt
+from ..utils import (
+ bytes_to_intlist,
+ determine_ext,
+ int_or_none,
+ intlist_to_bytes,
+ join_nonempty,
+ smuggle_url,
+ strip_jsonp,
+ traverse_obj,
+ unescapeHTML,
+ unsmuggle_url,
+)
+
+
+def md5_text(s):
+ return hashlib.md5(str(s).encode()).hexdigest()
+
+
+class AnvatoIE(InfoExtractor):
+ _VALID_URL = r'anvato:(?P<access_key_or_mcp>[^:]+):(?P<id>\d+)'
+
+ _API_BASE_URL = 'https://tkx.mp.lura.live/rest/v2'
+ _ANVP_RE = r'<script[^>]+\bdata-anvp\s*=\s*(["\'])(?P<anvp>(?:(?!\1).)+)\1'
+ _AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce' # from anvplayer.min.js
+
+ _TESTS = [{
+ # from https://www.nfl.com/videos/baker-mayfield-s-game-changing-plays-from-3-td-game-week-14
+ 'url': 'anvato:GXvEgwyJeWem8KCYXfeoHWknwP48Mboj:899441',
+ 'md5': '921919dab3cd0b849ff3d624831ae3e2',
+ 'info_dict': {
+ 'id': '899441',
+ 'ext': 'mp4',
+ 'title': 'Baker Mayfield\'s game-changing plays from 3-TD game Week 14',
+ 'description': 'md5:85e05a3cc163f8c344340f220521136d',
+ 'upload_date': '20201215',
+ 'timestamp': 1608009755,
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'uploader': 'NFL',
+ 'tags': ['Baltimore Ravens at Cleveland Browns (2020-REG-14)', 'Baker Mayfield', 'Game Highlights',
+ 'Player Highlights', 'Cleveland Browns', 'league'],
+ 'duration': 157,
+ 'categories': ['Entertainment', 'Game', 'Highlights'],
+ },
+ }, {
+ # from https://ktla.com/news/99-year-old-woman-learns-to-fly-in-torrance-checks-off-bucket-list-dream/
+ 'url': 'anvato:X8POa4zpGZMmeiq0wqiO8IP5rMqQM9VN:8032455',
+ 'md5': '837718bcfb3a7778d022f857f7a9b19e',
+ 'info_dict': {
+ 'id': '8032455',
+ 'ext': 'mp4',
+ 'title': '99-year-old woman learns to fly plane in Torrance, checks off bucket list dream',
+ 'description': 'md5:0a12bab8159445e78f52a297a35c6609',
+ 'upload_date': '20220928',
+ 'timestamp': 1664408881,
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'uploader': 'LIN',
+ 'tags': ['video', 'news', '5live'],
+ 'duration': 155,
+ 'categories': ['News'],
+ },
+ }]
+
+ # Copied from anvplayer.min.js
+ _ANVACK_TABLE = {
+ 'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ',
+ 'nbcu_nbcd_desktop_web_qa_1a6f01bdd0dc45a439043b694c8a031d': 'eSxJUbA2UUKBTXryyQ2d6NuM8oEqaPySvaPzfKNA',
+ 'nbcu_nbcd_desktop_web_acc_eb2ff240a5d4ae9a63d4c297c32716b6c523a129': '89JR3RtUGbvKuuJIiKOMK0SoarLb5MUx8v89RcbP',
+ 'nbcu_nbcd_watchvod_web_prod_e61107507180976724ec8e8319fe24ba5b4b60e1': 'Uc7dFt7MJ9GsBWB5T7iPvLaMSOt8BBxv4hAXk5vv',
+ 'nbcu_nbcd_watchvod_web_qa_42afedba88a36203db5a4c09a5ba29d045302232': 'T12oDYVFP2IaFvxkmYMy5dKxswpLHtGZa4ZAXEi7',
+ 'nbcu_nbcd_watchvod_web_acc_9193214448e2e636b0ffb78abacfd9c4f937c6ca': 'MmobcxUxMedUpohNWwXaOnMjlbiyTOBLL6d46ZpR',
+ 'nbcu_local_monitor_web_acc_f998ad54eaf26acd8ee033eb36f39a7b791c6335': 'QvfIoPYrwsjUCcASiw3AIkVtQob2LtJHfidp9iWg',
+ 'nbcu_cable_monitor_web_acc_a413759603e8bedfcd3c61b14767796e17834077': 'uwVPJLShvJWSs6sWEIuVem7MTF8A4IknMMzIlFto',
+ 'nbcu_nbcd_mcpstage_web_qa_4c43a8f6e95a88dbb40276c0630ba9f693a63a4e': 'PxVYZVwjhgd5TeoPRxL3whssb5OUPnM3zyAzq8GY',
+ 'nbcu_comcast_comcast_web_prod_074080762ad4ce956b26b43fb22abf153443a8c4': 'afnaRZfDyg1Z3WZHdupKfy6xrbAG2MHqe3VfuSwh',
+ 'nbcu_comcast_comcast_web_qa_706103bb93ead3ef70b1de12a0e95e3c4481ade0': 'DcjsVbX9b3uoPlhdriIiovgFQZVxpISZwz0cx1ZK',
+ 'nbcu_comcast_comcastcable_web_prod_669f04817536743563d7331c9293e59fbdbe3d07': '0RwMN2cWy10qhAhOscq3eK7aEe0wqnKt3vJ0WS4D',
+ 'nbcu_comcast_comcastcable_web_qa_3d9d2d66219094127f0f6b09cc3c7bb076e3e1ca': '2r8G9DEya7PCqBceKZgrn2XkXgASjwLMuaFE1Aad',
+ 'hearst_hearst_demo_web_stage_960726dfef3337059a01a78816e43b29ec04dfc7': 'cuZBPXTR6kSdoTCVXwk5KGA8rk3NrgGn4H6e9Dsp',
+ 'anvato_mcpqa_demo_web_stage_18b55e00db5a13faa8d03ae6e41f6f5bcb15b922': 'IOaaLQ8ymqVyem14QuAvE5SndQynTcH5CrLkU2Ih',
+ 'anvato_nextmedia_demo_web_stage_9787d56a02ff6b9f43e9a2b0920d8ca88beb5818': 'Pqu9zVzI1ApiIzbVA3VkGBEQHvdKSUuKpD6s2uaR',
+ 'anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a': 'du1ccmn7RxzgizwbWU7hyUaGodNlJn7HtXI0WgXW',
+ 'anvato_scripps_app_web_stage_360797e00fe2826be142155c4618cc52fce6c26c': '2PMrQ0BRoqCWl7nzphj0GouIMEh2mZYivAT0S1Su',
+ 'fs2go_fs2go_go_all_prod_21934911ccfafc03a075894ead2260d11e2ddd24': 'RcuHlKikW2IJw6HvVoEkqq2UsuEJlbEl11pWXs4Q',
+ 'fs2go_fs2go_go_web_prod_ead4b0eec7460c1a07783808db21b49cf1f2f9a7': '4K0HTT2u1zkQA2MaGaZmkLa1BthGSBdr7jllrhk5',
+ 'fs2go_fs2go_go_web_stage_407585454a4400355d4391691c67f361': 'ftnc37VKRJBmHfoGGi3kT05bHyeJzilEzhKJCyl3',
+ 'fs2go_fs2go_go_android_stage_44b714db6f8477f29afcba15a41e1d30': 'CtxpPvVpo6AbZGomYUhkKs7juHZwNml9b9J0J2gI',
+ 'anvato_cbslocal_app_web_prod_547f3e49241ef0e5d30c79b2efbca5d92c698f67': 'Pw0XX5KBDsyRnPS0R2JrSrXftsy8Jnz5pAjaYC8s',
+ 'anvato_cbslocal_app_web_stage_547a5f096594cd3e00620c6f825cad1096d28c80': '37OBUhX2uwNyKhhrNzSSNHSRPZpApC3trdqDBpuz',
+ 'fs2go_att_att_web_prod_1042dddd089a05438b6a08f972941176f699ffd8': 'JLcF20JwYvpv6uAGcLWIaV12jKwaL1R8us4b6Zkg',
+ 'fs2go_att_att_web_stage_807c5001955fc114a3331fe027ddc76e': 'gbu1oO1y0JiOFh4SUipt86P288JHpyjSqolrrT1x',
+ 'fs2go_fs2go_tudor_web_prod_a7dd8e5a7cdc830cae55eae6f3e9fee5ee49eb9b': 'ipcp87VCEZXPPe868j3orLqzc03oTy7DXsGkAXXH',
+ 'anvato_mhz_app_web_prod_b808218b30de7fdf60340cbd9831512bc1bf6d37': 'Stlm5Gs6BEhJLRTZHcNquyzxGqr23EuFmE5DCgjX',
+ 'fs2go_charter_charter_web_stage_c2c6e5a68375a1bf00fff213d3ff8f61a835a54c': 'Lz4hbJp1fwL6jlcz4M2PMzghM4jp4aAmybtT5dPc',
+ 'fs2go_charter_charter_web_prod_ebfe3b10f1af215a7321cd3d629e0b81dfa6fa8c': 'vUJsK345A1bVmyYDRhZX0lqFIgVXuqhmuyp1EtPK',
+ 'anvato_epfox_app_web_prod_b3373168e12f423f41504f207000188daf88251b': 'GDKq1ixvX3MoBNdU5IOYmYa2DTUXYOozPjrCJnW7',
+ 'anvato_epfox_app_web_stage_a3c2ce60f8f83ef374a88b68ee73a950f8ab87ce': '2jz2NH4BsXMaDsoJ5qkHMbcczAfIReo2eFYuVC1C',
+ 'fs2go_verizon_verizon_web_stage_08e6df0354a4803f1b1f2428b5a9a382e8dbcd62': 'rKTVapNaAcmnUbGL4ZcuOoY4SE7VmZSQsblPFr7e',
+ 'fs2go_verizon_verizon_web_prod_f909564cb606eff1f731b5e22e0928676732c445': 'qLSUuHerM3u9eNPzaHyUK52obai5MvE4XDJfqYe1',
+ 'fs2go_foxcom_synd_web_stage_f7b9091f00ea25a4fdaaae77fca5b54cdc7e7043': '96VKF2vLd24fFiDfwPFpzM5llFN4TiIGAlodE0Re',
+ 'fs2go_foxcom_synd_web_prod_0f2cdd64d87e4ab6a1d54aada0ff7a7c8387a064': 'agiPjbXEyEZUkbuhcnmVPhe9NNVbDjCFq2xkcx51',
+ 'anvato_own_app_web_stage_1214ade5d28422c4dae9d03c1243aba0563c4dba': 'mzhamNac3swG4WsJAiUTacnGIODi6SWeVWk5D7ho',
+ 'anvato_own_app_web_prod_944e162ed927ec3e9ed13eb68ed2f1008ee7565e': '9TSxh6G2TXOLBoYm9ro3LdNjjvnXpKb8UR8KoIP9',
+ 'anvato_scripps_app_ftv_prod_a10a10468edd5afb16fb48171c03b956176afad1': 'COJ2i2UIPK7xZqIWswxe7FaVBOVgRkP1F6O6qGoH',
+ 'anvato_scripps_app_ftv_stage_77d3ad2bdb021ec37ca2e35eb09acd396a974c9a': 'Q7nnopNLe2PPfGLOTYBqxSaRpl209IhqaEuDZi1F',
+ 'anvato_univision_app_web_stage_551236ef07a0e17718c3995c35586b5ed8cb5031': 'D92PoLS6UitwxDRA191HUGT9OYcOjV6mPMa5wNyo',
+ 'anvato_univision_app_web_prod_039a5c0a6009e637ae8ac906718a79911e0e65e1': '5mVS5u4SQjtw6NGw2uhMbKEIONIiLqRKck5RwQLR',
+ 'nbcu_cnbc_springfield_ios_prod_670207fae43d6e9a94c351688851a2ce': 'M7fqCCIP9lW53oJbHs19OlJlpDrVyc2OL8gNeuTa',
+ 'nbcu_cnbc_springfieldvod_ios_prod_7a5f04b1ceceb0e9c9e2264a44aa236e08e034c2': 'Yia6QbJahW0S7K1I0drksimhZb4UFq92xLBmmMvk',
+ 'anvato_cox_app_web_prod_ce45cda237969f93e7130f50ee8bb6280c1484ab': 'cc0miZexpFtdoqZGvdhfXsLy7FXjRAOgb9V0f5fZ',
+ 'anvato_cox_app_web_stage_c23dbe016a8e9d8c7101d10172b92434f6088bf9': 'yivU3MYHd2eDZcOfmLbINVtqxyecKTOp8OjOuoGJ',
+ 'anvato_chnzero_app_web_stage_b1164d1352b579e792e542fddf13ee34c0eeb46b': 'A76QkXMmVH8lTCfU15xva1mZnSVcqeY4Xb22Kp7m',
+ 'anvato_chnzero_app_web_prod_253d358928dc08ec161eda2389d53707288a730c': 'OA5QI3ZWZZkdtUEDqh28AH8GedsF6FqzJI32596b',
+ 'anvato_discovery_vodpoc_web_stage_9fa7077b5e8af1f8355f65d4fb8d2e0e9d54e2b7': 'q3oT191tTQ5g3JCP67PkjLASI9s16DuWZ6fYmry3',
+ 'anvato_discovery_vodpoc_web_prod_688614983167a1af6cdf6d76343fda10a65223c1': 'qRvRQCTVHd0VVOHsMvvfidyWmlYVrTbjby7WqIuK',
+ 'nbcu_cnbc_springfieldvod_ftv_stage_826040aad1925a46ac5dfb4b3c5143e648c6a30d': 'JQaSb5a8Tz0PT4ti329DNmzDO30TnngTHmvX8Vua',
+ 'nbcu_cnbc_springfield_ftv_stage_826040aad1925a46ac5dfb4b3c5143e648c6a30d': 'JQaSb5a8Tz0PT4ti329DNmzDO30TnngTHmvX8Vua',
+ 'nbcu_nbcd_capture_web_stage_4dd9d585bfb984ebf856dee35db027b2465cc4ae': '0j1Ov4Vopyi2HpBZJYdL2m8ERJVGYh3nNpzPiO8F',
+ 'nbcu_nbcd_watch3_android_prod_7712ca5fcf1c22f19ec1870a9650f9c37db22dcf': '3LN2UB3rPUAMu7ZriWkHky9vpLMXYha8JbSnxBlx',
+ 'nbcu_nbcd_watchvod3_android_prod_0910a3a4692d57c0b5ff4316075bc5d096be45b9': 'mJagcQ2II30vUOAauOXne7ERwbf5S9nlB3IP17lQ',
+ 'anvato_scripps_app_atv_prod_790deda22e16e71e83df58f880cd389908a45d52': 'CB6trI1mpoDIM5o54DNTsji90NDBQPZ4z4RqBNSH',
+ 'nbcu_nbcd_watchv4_android_prod_ff67cef9cb409158c6f8c3533edddadd0b750507': 'j8CHQCUWjlYERj4NFRmUYOND85QNbHViH09UwuKm',
+ 'nbcu_nbcd_watchvodv4_android_prod_a814d781609989dea6a629d50ae4c7ad8cc8e907': 'rkVnUXxdA9rawVLUlDQtMue9Y4Q7lFEaIotcUhjt',
+ 'rvVKpA50qlOPLFxMjrCGf5pdkdQDm7qn': '1J7ZkY5Qz5lMLi93QOH9IveE7EYB3rLl',
+ 'nbcu_dtv_local_web_prod_b266cf49defe255fd4426a97e27c09e513e9f82f': 'HuLnJDqzLa4saCzYMJ79zDRSQpEduw1TzjMNQu2b',
+ 'nbcu_att_local_web_prod_4cef038b2d969a6b7d700a56a599040b6a619f67': 'Q0Em5VDc2KpydUrVwzWRXAwoNBulWUxCq2faK0AV',
+ 'nbcu_dish_local_web_prod_c56dcaf2da2e9157a4266c82a78195f1dd570f6b': 'bC1LWmRz9ayj2AlzizeJ1HuhTfIaJGsDBnZNgoRg',
+ 'nbcu_verizon_local_web_prod_88bebd2ce006d4ed980de8133496f9a74cb9b3e1': 'wzhDKJZpgvUSS1EQvpCQP8Q59qVzcPixqDGJefSk',
+ 'nbcu_charter_local_web_prod_9ad90f7fc4023643bb718f0fe0fd5beea2382a50': 'PyNbxNhEWLzy1ZvWEQelRuIQY88Eub7xbSVRMdfT',
+ 'nbcu_suddenlink_local_web_prod_20fb711725cac224baa1c1cb0b1c324d25e97178': '0Rph41lPXZbb3fqeXtHjjbxfSrNbtZp1Ygq7Jypa',
+ 'nbcu_wow_local_web_prod_652d9ce4f552d9c2e7b5b1ed37b8cb48155174ad': 'qayIBZ70w1dItm2zS42AptXnxW15mkjRrwnBjMPv',
+ 'nbcu_centurylink_local_web_prod_2034402b029bf3e837ad46814d9e4b1d1345ccd5': 'StePcPMkjsX51PcizLdLRMzxMEl5k2FlsMLUNV4k',
+ 'nbcu_atlanticbrd_local_web_prod_8d5f5ecbf7f7b2f5e6d908dd75d90ae3565f682e': 'NtYLb4TFUS0pRs3XTkyO5sbVGYjVf17bVbjaGscI',
+ 'nbcu_nbcd_watchvod_web_dev_08bc05699be47c4f31d5080263a8cfadc16d0f7c': 'hwxi2dgDoSWgfmVVXOYZm14uuvku4QfopstXckhr',
+ 'anvato_nextmedia_app_web_prod_a4fa8c7204aa65e71044b57aaf63711980cfe5a0': 'tQN1oGPYY1nM85rJYePWGcIb92TG0gSqoVpQTWOw',
+ 'anvato_mcp_lin_web_prod_4c36fbfd4d8d8ecae6488656e21ac6d1ac972749': 'GUXNf5ZDX2jFUpu4WT2Go4DJ5nhUCzpnwDRRUx1K',
+ 'anvato_mcp_univision_web_prod_37fe34850c99a3b5cdb71dab10a417dd5cdecafa': 'bLDYF8JqfG42b7bwKEgQiU9E2LTIAtnKzSgYpFUH',
+ 'anvato_mcp_fs2go_web_prod_c7b90a93e171469cdca00a931211a2f556370d0a': 'icgGoYGipQMMSEvhplZX1pwbN69srwKYWksz3xWK',
+ 'anvato_mcp_sps_web_prod_54bdc90dd6ba21710e9f7074338365bba28da336': 'fA2iQdI7RDpynqzQYIpXALVS83NTPr8LLFK4LFsu',
+ 'anvato_mcp_anv_web_prod_791407490f4c1ef2a4bcb21103e0cb1bcb3352b3': 'rMOUZqe9lwcGq2mNgG3EDusm6lKgsUnczoOX3mbg',
+ 'anvato_mcp_gray_web_prod_4c10f067c393ed8fc453d3930f8ab2b159973900': 'rMOUZqe9lwcGq2mNgG3EDusm6lKgsUnczoOX3mbg',
+ 'anvato_mcp_hearst_web_prod_5356c3de0fc7c90a3727b4863ca7fec3a4524a99': 'P3uXJ0fXXditBPCGkfvlnVScpPEfKmc64Zv7ZgbK',
+ 'anvato_mcp_cbs_web_prod_02f26581ff80e5bda7aad28226a8d369037f2cbe': 'mGPvo5ZA5SgjOFAPEPXv7AnOpFUICX8hvFQVz69n',
+ 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582': 'qyT6PXXLjVNCrHaRVj0ugAhalNRS7Ee9BP7LUokD',
+ 'nbcu_nbcd_watchvodv4_web_stage_4108362fba2d4ede21f262fea3c4162cbafd66c7': 'DhaU5lj0W2gEdcSSsnxURq8t7KIWtJfD966crVDk',
+ 'anvato_scripps_app_ios_prod_409c41960c60b308db43c3cc1da79cab9f1c3d93': 'WPxj5GraLTkYCyj3M7RozLqIycjrXOEcDGFMIJPn',
+ 'EZqvRyKBJLrgpClDPDF8I7Xpdp40Vx73': '4OxGd2dEakylntVKjKF0UK9PDPYB6A9W',
+ 'M2v78QkpleXm9hPp9jUXI63x5vA6BogR': 'ka6K32k7ZALmpINkjJUGUo0OE42Md1BQ',
+ 'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6_secure': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ',
+ 'X8POa4zPPaKVZHqmWjuEzfP31b1QM9VN': 'Dn5vOY9ooDw7VSl9qztjZI5o0g08mA0z',
+ 'M2v78QkBMpNJlSPp9diX5F2PBmBy6Bog': 'ka6K32kyo7nDZfNkjQCGWf1lpApXMd1B',
+ 'bvJ0dQpav07l0hG5JgfVLF2dv1vARwpP': 'BzoQW24GrJZoJfmNodiJKSPeB9B8NOxj',
+ 'lxQMLg2XZKuEZaWgsqubBxV9INZ6bryY': 'Vm2Mx6noKds9jB71h6urazwlTG3m9x8l',
+ '04EnjvXeoSmkbJ9ckPs7oY0mcxv7PlyN': 'aXERQP9LMfQVlEDsgGs6eEA1SWznAQ8P',
+ 'mQbO2ge6BFRWVPYCYpU06YvNt80XLvAX': 'E2BV1NGmasN5v7eujECVPJgwflnLPm2A',
+ 'g43oeBzJrCml7o6fa5fRL1ErCdeD8z4K': 'RX34mZ6zVH4Nr6whbxIGLv9WSbxEKo8V',
+ 'VQrDJoP7mtdBzkxhXbSPwGB1coeElk4x': 'j2VejQx0VFKQepAF7dI0mJLKtOVJE18z',
+ 'WxA5NzLRjCrmq0NUgaU5pdMDuZO7RJ4w': 'lyY5ADLKaIOLEgAsGQCveEMAcqnx3rY9',
+ 'M4lpMXB71ie0PjMCjdFzVXq0SeRVqz49': 'n2zVkOqaLIv3GbLfBjcwW51LcveWOZ2e',
+ 'dyDZGEqN8u8nkJZcJns0oxYmtP7KbGAn': 'VXOEqQW9BtEVLajfZQSLEqxgS5B7qn2D',
+ 'E7QNjrVY5u5mGvgu67IoDgV1CjEND8QR': 'rz8AaDmdKIkLmPNhB5ILPJnjS5PnlL8d',
+ 'a4zrqjoKlfzg0dwHEWtP31VqcLBpjm4g': 'LY9J16gwETdGWa3hjBu5o0RzuoQDjqXQ',
+ 'dQP5BZroMsMVLO1hbmT5r2Enu86GjxA6': '7XR3oOdbPF6x3PRFLDCq9RkgsRjAo48V',
+ 'M4lKNBO1NFe0PjMCj1tzVXq0SeRVqzA9': 'n2zoRqGLRUv3GbLfBmTwW51LcveWOZYe',
+ 'nAZ7MZdpGCGg1pqFEbsoJOz2C60mv143': 'dYJgdqA9aT4yojETqGi7yNgoFADxqmXP',
+ '3y1MERYgOuE9NzbFgwhV6Wv2F0YKvbyz': '081xpZDQgC4VadLTavhWQxrku56DAgXV',
+ 'bmQvmEXr5HWklBMCZOcpE2Z3HBYwqGyl': 'zxXPbVNyMiMAZldhr9FkOmA0fl4aKr2v',
+ 'wA7oDNYldfr6050Hwxi52lPZiVlB86Ap': 'ZYK16aA7ni0d3l3c34uwpxD7CbReMm8Q',
+ 'g43MbKMWmFml7o7sJoSRkXxZiXRvJ3QK': 'RX3oBJonvs4Nr6rUWBCGn3matRGqJPXV',
+ 'mA9VdlqpLS0raGaSDvtoqNrBTzb8XY4q': '0XN4OjBD3fnW7r7IbmtJB4AyfOmlrE2r',
+ 'mAajOwgkGt17oGoFmEuklMP9H0GnW54d': 'lXbBLPGyzikNGeGujAuAJGjZiwLRxyXR',
+ 'vy8vjJ9kbUwrRqRu59Cj5dWZfzYErlAb': 'K8l7gpwaGcBpnAnCLNCmPZRdin3eaQX0',
+ 'xQMWBpR8oHEZaWaSMGUb0avOHjLVYn4Y': 'm2MrN4vEaf9jB7BFy5Srb40jTrN67AYl',
+ 'xyKEmVO3miRr6D6UVkt7oB8jtD6aJEAv': 'g2ddDebqDfqdgKgswyUKwGjbTWwzq923',
+ '7Qk0wa2D9FjKapacoJF27aLvUDKkLGA0': 'b2kgBEkephJaMkMTL7s1PLe4Ua6WyP2P',
+ '3QLg6nqmNTJ5VvVTo7f508LPidz1xwyY': 'g2L1GgpraipmAOAUqmIbBnPxHOmw4MYa',
+ '3y1B7zZjXTE9NZNSzZSVNPZaTNLjo6Qz': '081b5G6wzH4VagaURmcWbN5mT4JGEe2V',
+ 'lAqnwvkw6SG6D8DSqmUg6DRLUp0w3G4x': 'O2pbP0xPDFNJjpjIEvcdryOJtpkVM4X5',
+ 'awA7xd1N0Hr6050Hw2c52lPZiVlB864p': 'GZYKpn4aoT0d3l3c3PiwpxD7CbReMmXQ',
+ 'jQVqPLl9YHL1WGWtR1HDgWBGT63qRNyV': '6X03ne6vrU4oWyWUN7tQVoajikxJR3Ye',
+ 'GQRMR8mL7uZK797t7xH3eNzPIP5dOny1': 'm2vqPWGd4U31zWzSyasDRAoMT1PKRp8o',
+ 'zydq9RdmRhXLkNkfNoTJlMzaF0lWekQB': '3X7LnvE7vH5nkEkSqLiey793Un7dLB8e',
+ 'VQrDzwkB2IdBzjzu9MHPbEYkSB50gR4x': 'j2VebLzoKUKQeEesmVh0gM1eIp9jKz8z',
+ 'mAa2wMamBs17oGoFmktklMP9H0GnW54d': 'lXbgP74xZTkNGeGujVUAJGjZiwLRxy8R',
+ '7yjB6ZLG6sW8R6RF2xcan1KGfJ5dNoyd': 'wXQkPorvPHZ45N5t4Jf6qwg5Tp4xvw29',
+ 'a4zPpNeWGuzg0m0iX3tPeanGSkRKWXQg': 'LY9oa3QAyHdGW9Wu3Ri5JGeEik7l1N8Q',
+ 'k2rneA2M38k25cXDwwSknTJlxPxQLZ6M': '61lyA2aEVDzklfdwmmh31saPxQx2VRjp',
+ 'bK9Zk4OvPnvxduLgxvi8VUeojnjA02eV': 'o5jANYjbeMb4nfBaQvcLAt1jzLzYx6ze',
+ '5VD6EydM3R9orHmNMGInGCJwbxbQvGRw': 'w3zjmX7g4vnxzCxElvUEOiewkokXprkZ',
+ '70X35QbVYVYNPUmP9YfbzI06YqYQk2R1': 'vG4Aj2BMjMjoztB7zeFOnCVPJpJ8lMOa',
+ '26qYwQVG9p1Bks2GgBckjfDJOXOAMgG1': 'r4ev9X0mv5zqJc0yk5IBDcQOwZw8mnwQ',
+ 'rvVKpA56MBXWlSxMw3cobT5pdkd4Dm7q': '1J7ZkY53pZ645c93owcLZuveE7E8B3rL',
+ 'qN1zdy1zlYL23IWZGWtDvfV6WeWQWkJo': 'qN1zdy1zlYL23IWZGWtDvfV6WeWQWkJo',
+ 'jdKqRGF16dKsBviMDae7IGDl7oTjEbVV': 'Q09l7vhlNxPFErIOK6BVCe7KnwUW5DVV',
+ '3QLkogW1OUJ5VvPsrDH56DY2u7lgZWyY': 'g2LRE1V9espmAOPhE4ubj4ZdUA57yDXa',
+ 'wyJvWbXGBSdbkEzhv0CW8meou82aqRy8': 'M2wolPvyBIpQGkbT4juedD4ruzQGdK2y',
+ '7QkdZrzEkFjKap6IYDU2PB0oCNZORmA0': 'b2kN1l96qhJaMkPs9dt1lpjBfwqZoA8P',
+ 'pvA05113MHG1w3JTYxc6DVlRCjErVz4O': 'gQXeAbblBUnDJ7vujbHvbRd1cxlz3AXO',
+ 'mA9blJDZwT0raG1cvkuoeVjLC7ZWd54q': '0XN9jRPwMHnW7rvumgfJZOD9CJgVkWYr',
+ '5QwRN5qKJTvGKlDTmnf7xwNZcjRmvEy9': 'R2GP6LWBJU1QlnytwGt0B9pytWwAdDYy',
+ 'eyn5rPPbkfw2KYxH32fG1q58CbLJzM40': 'p2gyqooZnS56JWeiDgfmOy1VugOQEBXn',
+ '3BABn3b5RfPJGDwilbHe7l82uBoR05Am': '7OYZG7KMVhbPdKJS3xcWEN3AuDlLNmXj',
+ 'xA5zNGXD3HrmqMlF6OS5pdMDuZO7RJ4w': 'yY5DAm6r1IOLE3BCVMFveEMAcqnx3r29',
+ 'g43PgW3JZfml7o6fDEURL1ErCdeD8zyK': 'RX3aQn1zrS4Nr6whDgCGLv9WSbxEKo2V',
+ 'lAqp8WbGgiG6D8LTKJcg3O72CDdre1Qx': 'O2pnm6473HNJjpKuVosd3vVeh975yrX5',
+ 'wyJbYEDxKSdbkJ6S6RhW8meou82aqRy8': 'M2wPm7EgRSpQGlAh70CedD4ruzQGdKYy',
+ 'M4lgW28nLCe0PVdtaXszVXq0SeRVqzA9': 'n2zmJvg4jHv3G0ETNgiwW51LcveWOZ8e',
+ '5Qw3OVvp9FvGKlDTmOC7xwNZcjRmvEQ9': 'R2GzDdml9F1Qlnytw9s0B9pytWwAdD8y',
+ 'vy8a98X7zCwrRqbHrLUjYzwDiK2b70Qb': 'K8lVwzyjZiBpnAaSGeUmnAgxuGOBxmY0',
+ 'g4eGjJLLoiqRD3Pf9oT5O03LuNbLRDQp': '6XqD59zzpfN4EwQuaGt67qNpSyRBlnYy',
+ 'g43OPp9boIml7o6fDOIRL1ErCdeD8z4K': 'RX33alNB4s4Nr6whDPUGLv9WSbxEKoXV',
+ 'xA2ng9OkBcGKzDbTkKsJlx7dUK8R3dA5': 'z2aPnJvzBfObkwGC3vFaPxeBhxoMqZ8K',
+ 'xyKEgBajZuRr6DEC0Kt7XpD1cnNW9gAv': 'g2ddlEBvRsqdgKaI4jUK9PrgfMexGZ23',
+ 'BAogww51jIMa2JnH1BcYpXM5F658RNAL': 'rYWDmm0KptlkGv4FGJFMdZmjs9RDE6XR',
+ 'BAokpg62VtMa2JnH1mHYpXM5F658RNAL': 'rYWryDnlNslkGv4FG4HMdZmjs9RDE62R',
+ 'a4z1Px5e2hzg0m0iMMCPeanGSkRKWXAg': 'LY9eorNQGUdGW9WuKKf5JGeEik7l1NYQ',
+ 'kAx69R58kF9nY5YcdecJdl2pFXP53WyX': 'gXyRxELpbfPvLeLSaRil0mp6UEzbZJ8L',
+ 'BAoY13nwViMa2J2uo2cY6BlETgmdwryL': 'rYWwKzJmNFlkGvGtNoUM9bzwIJVzB1YR',
+ }
+
+ _MCP_TO_ACCESS_KEY_TABLE = {
+ 'qa': 'anvato_mcpqa_demo_web_stage_18b55e00db5a13faa8d03ae6e41f6f5bcb15b922',
+ 'lin': 'anvato_mcp_lin_web_prod_4c36fbfd4d8d8ecae6488656e21ac6d1ac972749',
+ 'univison': 'anvato_mcp_univision_web_prod_37fe34850c99a3b5cdb71dab10a417dd5cdecafa',
+ 'uni': 'anvato_mcp_univision_web_prod_37fe34850c99a3b5cdb71dab10a417dd5cdecafa',
+ 'dev': 'anvato_mcp_fs2go_web_prod_c7b90a93e171469cdca00a931211a2f556370d0a',
+ 'sps': 'anvato_mcp_sps_web_prod_54bdc90dd6ba21710e9f7074338365bba28da336',
+ 'spsstg': 'anvato_mcp_sps_web_prod_54bdc90dd6ba21710e9f7074338365bba28da336',
+ 'anv': 'anvato_mcp_anv_web_prod_791407490f4c1ef2a4bcb21103e0cb1bcb3352b3',
+ 'gray': 'anvato_mcp_gray_web_prod_4c10f067c393ed8fc453d3930f8ab2b159973900',
+ 'hearst': 'anvato_mcp_hearst_web_prod_5356c3de0fc7c90a3727b4863ca7fec3a4524a99',
+ 'cbs': 'anvato_mcp_cbs_web_prod_02f26581ff80e5bda7aad28226a8d369037f2cbe',
+ 'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582'
+ }
+
+ def _generate_nfl_token(self, anvack, mcp_id):
+ reroute = self._download_json(
+ 'https://api.nfl.com/v1/reroute', mcp_id, data=b'grant_type=client_credentials',
+ headers={'X-Domain-Id': 100}, note='Fetching token info')
+ token_type = reroute.get('token_type') or 'Bearer'
+ auth_token = f'{token_type} {reroute["access_token"]}'
+ response = self._download_json(
+ 'https://api.nfl.com/v3/shield/', mcp_id, data=json.dumps({
+ 'query': '''{
+ viewer {
+ mediaToken(anvack: "%s", id: %s) {
+ token
+ }
+ }
+}''' % (anvack, mcp_id),
+ }).encode(), headers={
+ 'Authorization': auth_token,
+ 'Content-Type': 'application/json',
+ }, note='Fetching NFL API token')
+ return traverse_obj(response, ('data', 'viewer', 'mediaToken', 'token'))
+
+ _TOKEN_GENERATORS = {
+ 'GXvEgwyJeWem8KCYXfeoHWknwP48Mboj': _generate_nfl_token,
+ }
+
+ def _server_time(self, access_key, video_id):
+ return int_or_none(traverse_obj(self._download_json(
+ f'{self._API_BASE_URL}/server_time', video_id, query={'anvack': access_key},
+ note='Fetching server time', fatal=False), 'server_time')) or int(time.time())
+
+ def _get_video_json(self, access_key, video_id, extracted_token):
+ # See et() in anvplayer.min.js, which is an alias of getVideoJSON()
+ video_data_url = f'{self._API_BASE_URL}/mcp/video/{video_id}?anvack={access_key}'
+ server_time = self._server_time(access_key, video_id)
+ input_data = f'{server_time}~{md5_text(video_data_url)}~{md5_text(server_time)}'
+
+ auth_secret = intlist_to_bytes(aes_encrypt(
+ bytes_to_intlist(input_data[:64]), bytes_to_intlist(self._AUTH_KEY)))
+ query = {
+ 'X-Anvato-Adst-Auth': base64.b64encode(auth_secret).decode('ascii'),
+ 'rtyp': 'fp',
+ }
+ anvrid = md5_text(time.time() * 1000 * random.random())[:30]
+ api = {
+ 'anvrid': anvrid,
+ 'anvts': server_time,
+ }
+ if extracted_token is not None:
+ api['anvstk2'] = extracted_token
+ elif self._TOKEN_GENERATORS.get(access_key) is not None:
+ api['anvstk2'] = self._TOKEN_GENERATORS[access_key](self, access_key, video_id)
+ elif self._ANVACK_TABLE.get(access_key) is not None:
+ api['anvstk'] = md5_text(f'{access_key}|{anvrid}|{server_time}|{self._ANVACK_TABLE[access_key]}')
+ else:
+ api['anvstk2'] = 'default'
+
+ return self._download_json(
+ video_data_url, video_id, transform_source=strip_jsonp, query=query,
+ data=json.dumps({'api': api}, separators=(',', ':')).encode('utf-8'))
+
+ def _get_anvato_videos(self, access_key, video_id, token):
+ video_data = self._get_video_json(access_key, video_id, token)
+
+ formats = []
+ for published_url in video_data['published_urls']:
+ video_url = published_url.get('embed_url')
+ if not video_url:
+ continue
+ media_format = published_url.get('format')
+ ext = determine_ext(video_url)
+
+ if ext == 'smil' or media_format == 'smil':
+ formats.extend(self._extract_smil_formats(video_url, video_id))
+ continue
+
+ tbr = int_or_none(published_url.get('kbps'))
+ a_format = {
+ 'url': video_url,
+ 'format_id': join_nonempty('http', published_url.get('cdn_name')).lower(),
+ 'tbr': tbr or None,
+ }
+
+ vtt_subs, hls_subs = {}, {}
+ if media_format == 'vtt':
+ _, vtt_subs = self._extract_m3u8_formats_and_subtitles(
+ video_url, video_id, m3u8_id='vtt', fatal=False)
+ continue
+ elif media_format == 'm3u8' and tbr is not None:
+ a_format.update({
+ 'format_id': join_nonempty('hls', tbr),
+ 'ext': 'mp4',
+ })
+ elif media_format == 'm3u8-variant' or ext == 'm3u8':
+ # For some videos the initial m3u8 URL returns JSON instead
+ manifest_json = self._download_json(
+ video_url, video_id, note='Downloading manifest JSON', fatal=False)
+ if manifest_json:
+ video_url = manifest_json.get('master_m3u8')
+ if not video_url:
+ continue
+ hls_fmts, hls_subs = self._extract_m3u8_formats_and_subtitles(
+ video_url, video_id, ext='mp4', m3u8_id='hls', fatal=False)
+ formats.extend(hls_fmts)
+ continue
+ elif ext == 'mp3' or media_format == 'mp3':
+ a_format['vcodec'] = 'none'
+ else:
+ a_format.update({
+ 'width': int_or_none(published_url.get('width')),
+ 'height': int_or_none(published_url.get('height')),
+ })
+ formats.append(a_format)
+
+ subtitles = {}
+ for caption in video_data.get('captions', []):
+ a_caption = {
+ 'url': caption['url'],
+ 'ext': 'tt' if caption.get('format') == 'SMPTE-TT' else None
+ }
+ subtitles.setdefault(caption['language'], []).append(a_caption)
+ subtitles = self._merge_subtitles(subtitles, hls_subs, vtt_subs)
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': video_data.get('def_title'),
+ 'description': video_data.get('def_description'),
+ 'tags': video_data.get('def_tags', '').split(','),
+ 'categories': video_data.get('categories'),
+ 'thumbnail': video_data.get('src_image_url') or video_data.get('thumbnail'),
+ 'timestamp': int_or_none(video_data.get(
+ 'ts_published') or video_data.get('ts_added')),
+ 'uploader': video_data.get('mcp_id'),
+ 'duration': int_or_none(video_data.get('duration')),
+ 'subtitles': subtitles,
+ }
+
+ @classmethod
+ def _extract_from_webpage(cls, url, webpage):
+ for mobj in re.finditer(cls._ANVP_RE, webpage):
+ anvplayer_data = unescapeHTML(json.loads(mobj.group('anvp'))) or {}
+ video_id, access_key = anvplayer_data.get('video'), anvplayer_data.get('accessKey')
+ if not access_key:
+ access_key = cls._MCP_TO_ACCESS_KEY_TABLE.get((anvplayer_data.get('mcp') or '').lower())
+ if not (video_id or '').isdigit() or not access_key:
+ continue
+ url = f'anvato:{access_key}:{video_id}'
+ if anvplayer_data.get('token'):
+ url = smuggle_url(url, {'token': anvplayer_data['token']})
+ yield cls.url_result(url, AnvatoIE, video_id)
+
+ def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+ self._initialize_geo_bypass({
+ 'countries': smuggled_data.get('geo_countries'),
+ })
+
+ access_key, video_id = self._match_valid_url(url).group('access_key_or_mcp', 'id')
+ if access_key not in self._ANVACK_TABLE:
+ access_key = self._MCP_TO_ACCESS_KEY_TABLE.get(access_key) or access_key
+ return self._get_anvato_videos(access_key, video_id, smuggled_data.get('token'))
diff --git a/yt_dlp/extractor/aol.py b/yt_dlp/extractor/aol.py
new file mode 100644
index 0000000..455f667
--- /dev/null
+++ b/yt_dlp/extractor/aol.py
@@ -0,0 +1,133 @@
+import re
+
+from .yahoo import YahooIE
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ parse_qs,
+ url_or_none,
+)
+
+
+class AolIE(YahooIE): # XXX: Do not subclass from concrete IE
+ _WORKING = False
+ IE_NAME = 'aol.com'
+ _VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.(?:com|ca|co\.uk|de|jp)/video/(?:[^/]+/)*)(?P<id>\d{9}|[0-9a-f]{24}|[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12})'
+
+ _TESTS = [{
+ # video with 5min ID
+ 'url': 'https://www.aol.com/video/view/u-s--official-warns-of-largest-ever-irs-phone-scam/518167793/',
+ 'md5': '18ef68f48740e86ae94b98da815eec42',
+ 'info_dict': {
+ 'id': '518167793',
+ 'ext': 'mp4',
+ 'title': 'U.S. Official Warns Of \'Largest Ever\' IRS Phone Scam',
+ 'description': 'A major phone scam has cost thousands of taxpayers more than $1 million, with less than a month until income tax returns are due to the IRS.',
+ 'timestamp': 1395405060,
+ 'upload_date': '20140321',
+ 'uploader': 'Newsy Studio',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ }, {
+ # video with vidible ID
+ 'url': 'https://www.aol.com/video/view/netflix-is-raising-rates/5707d6b8e4b090497b04f706/',
+ 'info_dict': {
+ 'id': '5707d6b8e4b090497b04f706',
+ 'ext': 'mp4',
+ 'title': 'Netflix is Raising Rates',
+ 'description': 'Netflix is rewarding millions of it’s long-standing members with an increase in cost. Veuer’s Carly Figueroa has more.',
+ 'upload_date': '20160408',
+ 'timestamp': 1460123280,
+ 'uploader': 'Veuer',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'https://www.aol.com/video/view/park-bench-season-2-trailer/559a1b9be4b0c3bfad3357a7/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.aol.com/video/view/donald-trump-spokeswoman-tones-down-megyn-kelly-attacks/519442220/',
+ 'only_matching': True,
+ }, {
+ 'url': 'aol-video:5707d6b8e4b090497b04f706',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.aol.com/video/playlist/PL8245/5ca79d19d21f1a04035db606/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.aol.ca/video/view/u-s-woman-s-family-arrested-for-murder-first-pinned-on-panhandler-police/5c7ccf45bc03931fa04b2fe1/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.aol.co.uk/video/view/-one-dead-and-22-hurt-in-bus-crash-/5cb3a6f3d21f1a072b457347/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.aol.de/video/view/eva-braun-privataufnahmen-von-hitlers-geliebter-werden-digitalisiert/5cb2d49de98ab54c113d3d5d/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.aol.jp/video/playlist/5a28e936a1334d000137da0c/5a28f3151e642219fde19831/',
+ 'only_matching': True,
+ }, {
+ # Yahoo video
+ 'url': 'https://www.aol.com/video/play/991e6700-ac02-11ea-99ff-357400036f61/24bbc846-3e30-3c46-915e-fe8ccd7fcc46/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ if '-' in video_id:
+ return self._extract_yahoo_video(video_id, 'us')
+
+ response = self._download_json(
+ 'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/%s/details' % video_id,
+ video_id)['response']
+ if response['statusText'] != 'Ok':
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, response['statusText']), expected=True)
+
+ video_data = response['data']
+ formats = []
+ m3u8_url = url_or_none(video_data.get('videoMasterPlaylist'))
+ if m3u8_url:
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
+ for rendition in video_data.get('renditions', []):
+ video_url = url_or_none(rendition.get('url'))
+ if not video_url:
+ continue
+ ext = rendition.get('format')
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
+ else:
+ f = {
+ 'url': video_url,
+ 'format_id': rendition.get('quality'),
+ }
+ mobj = re.search(r'(\d+)x(\d+)', video_url)
+ if mobj:
+ f.update({
+ 'width': int(mobj.group(1)),
+ 'height': int(mobj.group(2)),
+ })
+ else:
+ qs = parse_qs(video_url)
+ f.update({
+ 'width': int_or_none(qs.get('w', [None])[0]),
+ 'height': int_or_none(qs.get('h', [None])[0]),
+ })
+ formats.append(f)
+
+ return {
+ 'id': video_id,
+ 'title': video_data['title'],
+ 'duration': int_or_none(video_data.get('duration')),
+ 'timestamp': int_or_none(video_data.get('publishDate')),
+ 'view_count': int_or_none(video_data.get('views')),
+ 'description': video_data.get('description'),
+ 'uploader': video_data.get('videoOwner'),
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/apa.py b/yt_dlp/extractor/apa.py
new file mode 100644
index 0000000..1ea0b1d
--- /dev/null
+++ b/yt_dlp/extractor/apa.py
@@ -0,0 +1,82 @@
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ url_or_none,
+)
+
+
+class APAIE(InfoExtractor):
+ _VALID_URL = r'(?P<base_url>https?://[^/]+\.apa\.at)/embed/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+ _EMBED_REGEX = [r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//[^/]+\.apa\.at/embed/[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}.*?)\1']
+ _TESTS = [{
+ 'url': 'http://uvp.apa.at/embed/293f6d17-692a-44e3-9fd5-7b178f3a1029',
+ 'md5': '2b12292faeb0a7d930c778c7a5b4759b',
+ 'info_dict': {
+ 'id': '293f6d17-692a-44e3-9fd5-7b178f3a1029',
+ 'ext': 'mp4',
+ 'title': '293f6d17-692a-44e3-9fd5-7b178f3a1029',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ }, {
+ 'url': 'https://uvp-apapublisher.sf.apa.at/embed/2f94e9e6-d945-4db2-9548-f9a41ebf7b78',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://uvp-rma.sf.apa.at/embed/70404cca-2f47-4855-bbb8-20b1fae58f76',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://uvp-kleinezeitung.sf.apa.at/embed/f1c44979-dba2-4ebf-b021-e4cf2cac3c81',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id, base_url = mobj.group('id', 'base_url')
+
+ webpage = self._download_webpage(
+ '%s/player/%s' % (base_url, video_id), video_id)
+
+ jwplatform_id = self._search_regex(
+ r'media[iI]d\s*:\s*["\'](?P<id>[a-zA-Z0-9]{8})', webpage,
+ 'jwplatform id', default=None)
+
+ if jwplatform_id:
+ return self.url_result(
+ 'jwplatform:' + jwplatform_id, ie='JWPlatform',
+ video_id=video_id)
+
+ def extract(field, name=None):
+ return self._search_regex(
+ r'\b%s["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % field,
+ webpage, name or field, default=None, group='value')
+
+ title = extract('title') or video_id
+ description = extract('description')
+ thumbnail = extract('poster', 'thumbnail')
+
+ formats = []
+ for format_id in ('hls', 'progressive'):
+ source_url = url_or_none(extract(format_id))
+ if not source_url:
+ continue
+ ext = determine_ext(source_url)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ source_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ height = int_or_none(self._search_regex(
+ r'(\d+)\.mp4', source_url, 'height', default=None))
+ formats.append({
+ 'url': source_url,
+ 'format_id': format_id,
+ 'height': height,
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/aparat.py b/yt_dlp/extractor/aparat.py
new file mode 100644
index 0000000..4a989d8
--- /dev/null
+++ b/yt_dlp/extractor/aparat.py
@@ -0,0 +1,88 @@
+from .common import InfoExtractor
+from ..utils import (
+ get_element_by_id,
+ int_or_none,
+ merge_dicts,
+ mimetype2ext,
+ url_or_none,
+)
+
+
+class AparatIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)'
+ _EMBED_REGEX = [r'<iframe .*?src="(?P<url>http://www\.aparat\.com/video/[^"]+)"']
+
+ _TESTS = [{
+ 'url': 'http://www.aparat.com/v/wP8On',
+ 'md5': '131aca2e14fe7c4dcb3c4877ba300c89',
+ 'info_dict': {
+ 'id': 'wP8On',
+ 'ext': 'mp4',
+ 'title': 'تیم گلکسی 11 - زومیت',
+ 'description': 'md5:096bdabcdcc4569f2b8a5e903a3b3028',
+ 'duration': 231,
+ 'timestamp': 1387394859,
+ 'upload_date': '20131218',
+ 'view_count': int,
+ },
+ }, {
+ # multiple formats
+ 'url': 'https://www.aparat.com/v/8dflw/',
+ 'only_matching': True,
+ }]
+
+ def _parse_options(self, webpage, video_id, fatal=True):
+ return self._parse_json(self._search_regex(
+ r'options\s*=\s*({.+?})\s*;', webpage, 'options', default='{}'), video_id)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ # If available, provides more metadata
+ webpage = self._download_webpage(url, video_id, fatal=False)
+ options = self._parse_options(webpage, video_id, fatal=False)
+
+ if not options:
+ webpage = self._download_webpage(
+ 'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id,
+ video_id, 'Downloading embed webpage')
+ options = self._parse_options(webpage, video_id)
+
+ formats = []
+ for sources in (options.get('multiSRC') or []):
+ for item in sources:
+ if not isinstance(item, dict):
+ continue
+ file_url = url_or_none(item.get('src'))
+ if not file_url:
+ continue
+ item_type = item.get('type')
+ if item_type == 'application/vnd.apple.mpegurl':
+ formats.extend(self._extract_m3u8_formats(
+ file_url, video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls',
+ fatal=False))
+ else:
+ ext = mimetype2ext(item.get('type'))
+ label = item.get('label')
+ formats.append({
+ 'url': file_url,
+ 'ext': ext,
+ 'format_id': 'http-%s' % (label or ext),
+ 'height': int_or_none(self._search_regex(
+ r'(\d+)[pP]', label or '', 'height',
+ default=None)),
+ })
+
+ info = self._search_json_ld(webpage, video_id, default={})
+
+ if not info.get('title'):
+ info['title'] = get_element_by_id('videoTitle', webpage) or \
+ self._html_search_meta(['og:title', 'twitter:title', 'DC.Title', 'title'], webpage, fatal=True)
+
+ return merge_dicts(info, {
+ 'id': video_id,
+ 'thumbnail': url_or_none(options.get('poster')),
+ 'duration': int_or_none(options.get('duration')),
+ 'formats': formats,
+ })
diff --git a/yt_dlp/extractor/appleconnect.py b/yt_dlp/extractor/appleconnect.py
new file mode 100644
index 0000000..d00b0f9
--- /dev/null
+++ b/yt_dlp/extractor/appleconnect.py
@@ -0,0 +1,50 @@
+from .common import InfoExtractor
+from ..utils import (
+ str_to_int,
+ ExtractorError
+)
+
+
+class AppleConnectIE(InfoExtractor):
+ _VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/(?:id)?sa\.(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
+ 'md5': 'c1d41f72c8bcaf222e089434619316e4',
+ 'info_dict': {
+ 'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
+ 'ext': 'm4v',
+ 'title': 'Energy',
+ 'uploader': 'Drake',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20150710',
+ 'timestamp': 1436545535,
+ },
+ }, {
+ 'url': 'https://itunes.apple.com/us/post/sa.0fe0229f-2457-11e5-9f40-1bb645f2d5d9',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ try:
+ video_json = self._html_search_regex(
+ r'class="auc-video-data">(\{.*?\})', webpage, 'json')
+ except ExtractorError:
+ raise ExtractorError('This post doesn\'t contain a video', expected=True)
+
+ video_data = self._parse_json(video_json, video_id)
+ timestamp = str_to_int(self._html_search_regex(r'data-timestamp="(\d+)"', webpage, 'timestamp'))
+ like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count', default=None))
+
+ return {
+ 'id': video_id,
+ 'url': video_data['sslSrc'],
+ 'title': video_data['title'],
+ 'description': video_data['description'],
+ 'uploader': video_data['artistName'],
+ 'thumbnail': video_data['artworkUrl'],
+ 'timestamp': timestamp,
+ 'like_count': like_count,
+ }
diff --git a/yt_dlp/extractor/applepodcasts.py b/yt_dlp/extractor/applepodcasts.py
new file mode 100644
index 0000000..49bbeab
--- /dev/null
+++ b/yt_dlp/extractor/applepodcasts.py
@@ -0,0 +1,85 @@
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ clean_podcast_url,
+ get_element_by_class,
+ int_or_none,
+ parse_iso8601,
+ try_get,
+)
+
+
+class ApplePodcastsIE(InfoExtractor):
+ _VALID_URL = r'https?://podcasts\.apple\.com/(?:[^/]+/)?podcast(?:/[^/]+){1,2}.*?\bi=(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://podcasts.apple.com/us/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
+ 'md5': '41dc31cd650143e530d9423b6b5a344f',
+ 'info_dict': {
+ 'id': '1000482637777',
+ 'ext': 'mp3',
+ 'title': '207 - Whitney Webb Returns',
+ 'description': 'md5:75ef4316031df7b41ced4e7b987f79c6',
+ 'upload_date': '20200705',
+ 'timestamp': 1593932400,
+ 'duration': 6454,
+ 'series': 'The Tim Dillon Show',
+ 'thumbnail': 're:.+[.](png|jpe?g|webp)',
+ }
+ }, {
+ 'url': 'https://podcasts.apple.com/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://podcasts.apple.com/podcast/207-whitney-webb-returns?i=1000482637777',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://podcasts.apple.com/podcast/id1135137367?i=1000482637777',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ episode_id = self._match_id(url)
+ webpage = self._download_webpage(url, episode_id)
+ episode_data = {}
+ ember_data = {}
+ # new page type 2021-11
+ amp_data = self._parse_json(self._search_regex(
+ r'(?s)id="shoebox-media-api-cache-amp-podcasts"[^>]*>\s*({.+?})\s*<',
+ webpage, 'AMP data', default='{}'), episode_id, fatal=False) or {}
+ amp_data = try_get(amp_data,
+ lambda a: self._parse_json(
+ next(a[x] for x in iter(a) if episode_id in x),
+ episode_id),
+ dict) or {}
+ amp_data = amp_data.get('d') or []
+ episode_data = try_get(
+ amp_data,
+ lambda a: next(x for x in a
+ if x['type'] == 'podcast-episodes' and x['id'] == episode_id),
+ dict)
+ if not episode_data:
+ # try pre 2021-11 page type: TODO: consider deleting if no longer used
+ ember_data = self._parse_json(self._search_regex(
+ r'(?s)id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
+ webpage, 'ember data'), episode_id) or {}
+ ember_data = ember_data.get(episode_id) or ember_data
+ episode_data = try_get(ember_data, lambda x: x['data'], dict)
+ episode = episode_data['attributes']
+ description = episode.get('description') or {}
+
+ series = None
+ for inc in (amp_data or ember_data.get('included') or []):
+ if inc.get('type') == 'media/podcast':
+ series = try_get(inc, lambda x: x['attributes']['name'])
+ series = series or clean_html(get_element_by_class('podcast-header__identity', webpage))
+
+ return {
+ 'id': episode_id,
+ 'title': episode.get('name'),
+ 'url': clean_podcast_url(episode['assetUrl']),
+ 'description': description.get('standard') or description.get('short'),
+ 'timestamp': parse_iso8601(episode.get('releaseDateTime')),
+ 'duration': int_or_none(episode.get('durationInMilliseconds'), 1000),
+ 'series': series,
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'vcodec': 'none',
+ }
diff --git a/yt_dlp/extractor/appletrailers.py b/yt_dlp/extractor/appletrailers.py
new file mode 100644
index 0000000..2e0b0a8
--- /dev/null
+++ b/yt_dlp/extractor/appletrailers.py
@@ -0,0 +1,278 @@
+import re
+import json
+
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+from ..utils import (
+ int_or_none,
+ parse_duration,
+ unified_strdate,
+)
+
+
+class AppleTrailersIE(InfoExtractor):
+ IE_NAME = 'appletrailers'
+ _VALID_URL = r'https?://(?:www\.|movie)?trailers\.apple\.com/(?:trailers|ca)/(?P<company>[^/]+)/(?P<movie>[^/]+)'
+ _TESTS = [{
+ 'url': 'http://trailers.apple.com/trailers/wb/manofsteel/',
+ 'info_dict': {
+ 'id': '5111',
+ 'title': 'Man of Steel',
+ },
+ 'playlist': [
+ {
+ 'md5': 'd97a8e575432dbcb81b7c3acb741f8a8',
+ 'info_dict': {
+ 'id': 'manofsteel-trailer4',
+ 'ext': 'mov',
+ 'duration': 111,
+ 'title': 'Trailer 4',
+ 'upload_date': '20130523',
+ 'uploader_id': 'wb',
+ },
+ },
+ {
+ 'md5': 'b8017b7131b721fb4e8d6f49e1df908c',
+ 'info_dict': {
+ 'id': 'manofsteel-trailer3',
+ 'ext': 'mov',
+ 'duration': 182,
+ 'title': 'Trailer 3',
+ 'upload_date': '20130417',
+ 'uploader_id': 'wb',
+ },
+ },
+ {
+ 'md5': 'd0f1e1150989b9924679b441f3404d48',
+ 'info_dict': {
+ 'id': 'manofsteel-trailer',
+ 'ext': 'mov',
+ 'duration': 148,
+ 'title': 'Trailer',
+ 'upload_date': '20121212',
+ 'uploader_id': 'wb',
+ },
+ },
+ {
+ 'md5': '5fe08795b943eb2e757fa95cb6def1cb',
+ 'info_dict': {
+ 'id': 'manofsteel-teaser',
+ 'ext': 'mov',
+ 'duration': 93,
+ 'title': 'Teaser',
+ 'upload_date': '20120721',
+ 'uploader_id': 'wb',
+ },
+ },
+ ]
+ }, {
+ 'url': 'http://trailers.apple.com/trailers/magnolia/blackthorn/',
+ 'info_dict': {
+ 'id': '4489',
+ 'title': 'Blackthorn',
+ },
+ 'playlist_mincount': 2,
+ 'expected_warnings': ['Unable to download JSON metadata'],
+ }, {
+ # json data only available from http://trailers.apple.com/trailers/feeds/data/15881.json
+ 'url': 'http://trailers.apple.com/trailers/fox/kungfupanda3/',
+ 'info_dict': {
+ 'id': '15881',
+ 'title': 'Kung Fu Panda 3',
+ },
+ 'playlist_mincount': 4,
+ }, {
+ 'url': 'http://trailers.apple.com/ca/metropole/autrui/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://movietrailers.apple.com/trailers/focus_features/kuboandthetwostrings/',
+ 'only_matching': True,
+ }]
+
+ _JSON_RE = r'iTunes.playURL\((.*?)\);'
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ movie = mobj.group('movie')
+ uploader_id = mobj.group('company')
+
+ webpage = self._download_webpage(url, movie)
+ film_id = self._search_regex(r"FilmId\s*=\s*'(\d+)'", webpage, 'film id')
+ film_data = self._download_json(
+ 'http://trailers.apple.com/trailers/feeds/data/%s.json' % film_id,
+ film_id, fatal=False)
+
+ if film_data:
+ entries = []
+ for clip in film_data.get('clips', []):
+ clip_title = clip['title']
+
+ formats = []
+ for version, version_data in clip.get('versions', {}).items():
+ for size, size_data in version_data.get('sizes', {}).items():
+ src = size_data.get('src')
+ if not src:
+ continue
+ formats.append({
+ 'format_id': '%s-%s' % (version, size),
+ 'url': re.sub(r'_(\d+p\.mov)', r'_h\1', src),
+ 'width': int_or_none(size_data.get('width')),
+ 'height': int_or_none(size_data.get('height')),
+ 'language': version[:2],
+ })
+
+ entries.append({
+ 'id': movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', clip_title).lower(),
+ 'formats': formats,
+ 'title': clip_title,
+ 'thumbnail': clip.get('screen') or clip.get('thumb'),
+ 'duration': parse_duration(clip.get('runtime') or clip.get('faded')),
+ 'upload_date': unified_strdate(clip.get('posted')),
+ 'uploader_id': uploader_id,
+ })
+
+ page_data = film_data.get('page', {})
+ return self.playlist_result(entries, film_id, page_data.get('movie_title'))
+
+ playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc')
+
+ def fix_html(s):
+ s = re.sub(r'(?s)<script[^<]*?>.*?</script>', '', s)
+ s = re.sub(r'<img ([^<]*?)/?>', r'<img \1/>', s)
+ # The ' in the onClick attributes are not escaped, it couldn't be parsed
+ # like: http://trailers.apple.com/trailers/wb/gravity/
+
+ def _clean_json(m):
+ return 'iTunes.playURL(%s);' % m.group(1).replace('\'', '&#39;')
+ s = re.sub(self._JSON_RE, _clean_json, s)
+ s = '<html>%s</html>' % s
+ return s
+ doc = self._download_xml(playlist_url, movie, transform_source=fix_html)
+
+ playlist = []
+ for li in doc.findall('./div/ul/li'):
+ on_click = li.find('.//a').attrib['onClick']
+ trailer_info_json = self._search_regex(self._JSON_RE,
+ on_click, 'trailer info')
+ trailer_info = json.loads(trailer_info_json)
+ first_url = trailer_info.get('url')
+ if not first_url:
+ continue
+ title = trailer_info['title']
+ video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower()
+ thumbnail = li.find('.//img').attrib['src']
+ upload_date = trailer_info['posted'].replace('-', '')
+
+ runtime = trailer_info['runtime']
+ m = re.search(r'(?P<minutes>[0-9]+):(?P<seconds>[0-9]{1,2})', runtime)
+ duration = None
+ if m:
+ duration = 60 * int(m.group('minutes')) + int(m.group('seconds'))
+
+ trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower()
+ settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id)
+ settings = self._download_json(settings_json_url, trailer_id, 'Downloading settings json')
+
+ formats = []
+ for format in settings['metadata']['sizes']:
+ # The src is a file pointing to the real video file
+ format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', format['src'])
+ formats.append({
+ 'url': format_url,
+ 'format': format['type'],
+ 'width': int_or_none(format['width']),
+ 'height': int_or_none(format['height']),
+ })
+
+ playlist.append({
+ '_type': 'video',
+ 'id': video_id,
+ 'formats': formats,
+ 'title': title,
+ 'duration': duration,
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ 'uploader_id': uploader_id,
+ 'http_headers': {
+ 'User-Agent': 'QuickTime compatible (yt-dlp)',
+ },
+ })
+
+ return {
+ '_type': 'playlist',
+ 'id': movie,
+ 'entries': playlist,
+ }
+
+
+class AppleTrailersSectionIE(InfoExtractor):
+ IE_NAME = 'appletrailers:section'
+ _SECTIONS = {
+ 'justadded': {
+ 'feed_path': 'just_added',
+ 'title': 'Just Added',
+ },
+ 'exclusive': {
+ 'feed_path': 'exclusive',
+ 'title': 'Exclusive',
+ },
+ 'justhd': {
+ 'feed_path': 'just_hd',
+ 'title': 'Just HD',
+ },
+ 'mostpopular': {
+ 'feed_path': 'most_pop',
+ 'title': 'Most Popular',
+ },
+ 'moviestudios': {
+ 'feed_path': 'studios',
+ 'title': 'Movie Studios',
+ },
+ }
+ _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/#section=(?P<id>%s)' % '|'.join(_SECTIONS)
+ _TESTS = [{
+ 'url': 'http://trailers.apple.com/#section=justadded',
+ 'info_dict': {
+ 'title': 'Just Added',
+ 'id': 'justadded',
+ },
+ 'playlist_mincount': 80,
+ }, {
+ 'url': 'http://trailers.apple.com/#section=exclusive',
+ 'info_dict': {
+ 'title': 'Exclusive',
+ 'id': 'exclusive',
+ },
+ 'playlist_mincount': 80,
+ }, {
+ 'url': 'http://trailers.apple.com/#section=justhd',
+ 'info_dict': {
+ 'title': 'Just HD',
+ 'id': 'justhd',
+ },
+ 'playlist_mincount': 80,
+ }, {
+ 'url': 'http://trailers.apple.com/#section=mostpopular',
+ 'info_dict': {
+ 'title': 'Most Popular',
+ 'id': 'mostpopular',
+ },
+ 'playlist_mincount': 30,
+ }, {
+ 'url': 'http://trailers.apple.com/#section=moviestudios',
+ 'info_dict': {
+ 'title': 'Movie Studios',
+ 'id': 'moviestudios',
+ },
+ 'playlist_mincount': 80,
+ }]
+
+ def _real_extract(self, url):
+ section = self._match_id(url)
+ section_data = self._download_json(
+ 'http://trailers.apple.com/trailers/home/feeds/%s.json' % self._SECTIONS[section]['feed_path'],
+ section)
+ entries = [
+ self.url_result('http://trailers.apple.com' + e['location'])
+ for e in section_data]
+ return self.playlist_result(entries, section, self._SECTIONS[section]['title'])
diff --git a/yt_dlp/extractor/archiveorg.py b/yt_dlp/extractor/archiveorg.py
new file mode 100644
index 0000000..41f3a4f
--- /dev/null
+++ b/yt_dlp/extractor/archiveorg.py
@@ -0,0 +1,947 @@
+import json
+import re
+import urllib.parse
+
+from .common import InfoExtractor
+from .youtube import YoutubeBaseInfoExtractor, YoutubeIE
+from ..compat import compat_urllib_parse_unquote
+from ..networking import HEADRequest
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ KNOWN_EXTENSIONS,
+ ExtractorError,
+ bug_reports_message,
+ clean_html,
+ dict_get,
+ extract_attributes,
+ get_element_by_id,
+ int_or_none,
+ join_nonempty,
+ js_to_json,
+ merge_dicts,
+ mimetype2ext,
+ orderedSet,
+ parse_duration,
+ parse_qs,
+ str_or_none,
+ str_to_int,
+ traverse_obj,
+ try_get,
+ unified_strdate,
+ unified_timestamp,
+ url_or_none,
+ urlhandle_detect_ext,
+ variadic,
+)
+
+
+class ArchiveOrgIE(InfoExtractor):
+ IE_NAME = 'archive.org'
+ IE_DESC = 'archive.org video and audio'
+ _VALID_URL = r'https?://(?:www\.)?archive\.org/(?:details|embed)/(?P<id>[^?#]+)(?:[?].*)?$'
+ _TESTS = [{
+ 'url': 'http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
+ 'md5': '8af1d4cf447933ed3c7f4871162602db',
+ 'info_dict': {
+ 'id': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect',
+ 'ext': 'ogv',
+ 'title': '1968 Demo - FJCC Conference Presentation Reel #1',
+ 'description': 'md5:da45c349df039f1cc8075268eb1b5c25',
+ 'release_date': '19681210',
+ 'timestamp': 1268695290,
+ 'upload_date': '20100315',
+ 'creators': ['SRI International'],
+ 'uploader': 'laura@archive.org',
+ 'thumbnail': r're:https://archive\.org/download/.*\.jpg',
+ 'display_id': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect.cdr',
+ 'track': 'XD300-23 68HighlightsAResearchCntAugHumanIntellect',
+
+ },
+ }, {
+ 'url': 'https://archive.org/details/Cops1922',
+ 'md5': '0869000b4ce265e8ca62738b336b268a',
+ 'info_dict': {
+ 'id': 'Cops1922',
+ 'ext': 'mp4',
+ 'title': 'Buster Keaton\'s "Cops" (1922)',
+ 'description': 'md5:cd6f9910c35aedd5fc237dbc3957e2ca',
+ 'uploader': 'yorkmba99@hotmail.com',
+ 'timestamp': 1387699629,
+ 'upload_date': '20131222',
+ 'display_id': 'Cops-v2.mp4',
+ 'thumbnail': r're:https://archive\.org/download/.*\.jpg',
+ 'duration': 1091.96,
+ },
+ }, {
+ 'url': 'http://archive.org/embed/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://archive.org/details/Election_Ads',
+ 'md5': 'eec5cddebd4793c6a653b69c3b11f2e6',
+ 'info_dict': {
+ 'id': 'Election_Ads/Commercial-JFK1960ElectionAdCampaignJingle.mpg',
+ 'title': 'Commercial-JFK1960ElectionAdCampaignJingle.mpg',
+ 'ext': 'mpg',
+ 'thumbnail': r're:https://archive\.org/download/.*\.jpg',
+ 'duration': 59.77,
+ 'display_id': 'Commercial-JFK1960ElectionAdCampaignJingle.mpg',
+ },
+ }, {
+ 'url': 'https://archive.org/details/Election_Ads/Commercial-Nixon1960ElectionAdToughonDefense.mpg',
+ 'md5': 'ea1eed8234e7d4165f38c8c769edef38',
+ 'info_dict': {
+ 'id': 'Election_Ads/Commercial-Nixon1960ElectionAdToughonDefense.mpg',
+ 'title': 'Commercial-Nixon1960ElectionAdToughonDefense.mpg',
+ 'ext': 'mpg',
+ 'timestamp': 1205588045,
+ 'uploader': 'mikedavisstripmaster@yahoo.com',
+ 'description': '1960 Presidential Campaign Election Commercials John F Kennedy, Richard M Nixon',
+ 'upload_date': '20080315',
+ 'display_id': 'Commercial-Nixon1960ElectionAdToughonDefense.mpg',
+ 'duration': 59.51,
+ 'license': 'http://creativecommons.org/licenses/publicdomain/',
+ 'thumbnail': r're:https://archive\.org/download/.*\.jpg',
+ },
+ }, {
+ 'url': 'https://archive.org/details/gd1977-05-08.shure57.stevenson.29303.flac16',
+ 'md5': '7d07ffb42aba6537c28e053efa4b54c9',
+ 'info_dict': {
+ 'id': 'gd1977-05-08.shure57.stevenson.29303.flac16/gd1977-05-08d01t01.flac',
+ 'title': 'Turning',
+ 'ext': 'flac',
+ 'track': 'Turning',
+ 'creators': ['Grateful Dead'],
+ 'display_id': 'gd1977-05-08d01t01.flac',
+ 'track_number': 1,
+ 'album': '1977-05-08 - Barton Hall - Cornell University',
+ 'duration': 39.8,
+ },
+ }, {
+ 'url': 'https://archive.org/details/gd1977-05-08.shure57.stevenson.29303.flac16/gd1977-05-08d01t07.flac',
+ 'md5': 'a07cd8c6ab4ee1560f8a0021717130f3',
+ 'info_dict': {
+ 'id': 'gd1977-05-08.shure57.stevenson.29303.flac16/gd1977-05-08d01t07.flac',
+ 'title': 'Deal',
+ 'ext': 'flac',
+ 'timestamp': 1205895624,
+ 'uploader': 'mvernon54@yahoo.com',
+ 'description': 'md5:6c921464414814720c6593810a5c7e3d',
+ 'upload_date': '20080319',
+ 'location': 'Barton Hall - Cornell University',
+ 'duration': 438.68,
+ 'track': 'Deal',
+ 'creators': ['Grateful Dead'],
+ 'album': '1977-05-08 - Barton Hall - Cornell University',
+ 'release_date': '19770508',
+ 'display_id': 'gd1977-05-08d01t07.flac',
+ 'track_number': 7,
+ },
+ }, {
+ # FIXME: give a better error message than just IndexError when all available formats are restricted
+ 'url': 'https://archive.org/details/lp_the-music-of-russia_various-artists-a-askaryan-alexander-melik',
+ 'md5': '7cb019baa9b332e82ea7c10403acd180',
+ 'info_dict': {
+ 'id': 'lp_the-music-of-russia_various-artists-a-askaryan-alexander-melik/disc1/01.01. Bells Of Rostov.mp3',
+ 'title': 'Bells Of Rostov',
+ 'ext': 'mp3',
+ },
+ 'skip': 'restricted'
+ }, {
+ 'url': 'https://archive.org/details/lp_the-music-of-russia_various-artists-a-askaryan-alexander-melik/disc1/02.02.+Song+And+Chorus+In+The+Polovetsian+Camp+From+%22Prince+Igor%22+(Act+2%2C+Scene+1).mp3',
+ 'md5': '1d0aabe03edca83ca58d9ed3b493a3c3',
+ 'info_dict': {
+ 'id': 'lp_the-music-of-russia_various-artists-a-askaryan-alexander-melik/disc1/02.02. Song And Chorus In The Polovetsian Camp From "Prince Igor" (Act 2, Scene 1).mp3',
+ 'title': 'Song And Chorus In The Polovetsian Camp From "Prince Igor" (Act 2, Scene 1)',
+ 'ext': 'mp3',
+ 'timestamp': 1569662587,
+ 'uploader': 'associate-joygen-odiongan@archive.org',
+ 'description': 'md5:012b2d668ae753be36896f343d12a236',
+ 'upload_date': '20190928',
+ },
+ 'skip': 'restricted'
+ }, {
+ # Original formats are private
+ 'url': 'https://archive.org/details/irelandthemakingofarepublic',
+ 'info_dict': {
+ 'id': 'irelandthemakingofarepublic',
+ 'title': 'Ireland: The Making of a Republic',
+ 'upload_date': '20160610',
+ 'description': 'md5:f70956a156645a658a0dc9513d9e78b7',
+ 'uploader': 'dimitrios@archive.org',
+ 'creators': ['British Broadcasting Corporation', 'Time-Life Films'],
+ 'timestamp': 1465594947,
+ },
+ 'playlist': [
+ {
+ 'md5': '0b211261b26590d49df968f71b90690d',
+ 'info_dict': {
+ 'id': 'irelandthemakingofarepublic/irelandthemakingofarepublicreel1_01.mov',
+ 'ext': 'mp4',
+ 'title': 'irelandthemakingofarepublicreel1_01.mov',
+ 'duration': 130.46,
+ 'thumbnail': 'https://archive.org/download/irelandthemakingofarepublic/irelandthemakingofarepublic.thumbs/irelandthemakingofarepublicreel1_01_000117.jpg',
+ 'display_id': 'irelandthemakingofarepublicreel1_01.mov',
+ },
+ }, {
+ 'md5': '67335ee3b23a0da930841981c1e79b02',
+ 'info_dict': {
+ 'id': 'irelandthemakingofarepublic/irelandthemakingofarepublicreel1_02.mov',
+ 'ext': 'mp4',
+ 'duration': 1395.13,
+ 'title': 'irelandthemakingofarepublicreel1_02.mov',
+ 'display_id': 'irelandthemakingofarepublicreel1_02.mov',
+ 'thumbnail': 'https://archive.org/download/irelandthemakingofarepublic/irelandthemakingofarepublic.thumbs/irelandthemakingofarepublicreel1_02_001374.jpg',
+ },
+ }, {
+ 'md5': 'e470e86787893603f4a341a16c281eb5',
+ 'info_dict': {
+ 'id': 'irelandthemakingofarepublic/irelandthemakingofarepublicreel2.mov',
+ 'ext': 'mp4',
+ 'duration': 1602.67,
+ 'title': 'irelandthemakingofarepublicreel2.mov',
+ 'thumbnail': 'https://archive.org/download/irelandthemakingofarepublic/irelandthemakingofarepublic.thumbs/irelandthemakingofarepublicreel2_001554.jpg',
+ 'display_id': 'irelandthemakingofarepublicreel2.mov',
+ },
+ }
+ ]
+ }]
+
+ @staticmethod
+ def _playlist_data(webpage):
+ element = re.findall(r'''(?xs)
+ <input
+ (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
+ \s+class=['"]?js-play8-playlist['"]?
+ (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
+ \s*/>
+ ''', webpage)[0]
+
+ return json.loads(extract_attributes(element)['value'])
+
+ def _real_extract(self, url):
+ video_id = urllib.parse.unquote_plus(self._match_id(url))
+ identifier, entry_id = (video_id.split('/', 1) + [None])[:2]
+
+ # Archive.org metadata API doesn't clearly demarcate playlist entries
+ # or subtitle tracks, so we get them from the embeddable player.
+ embed_page = self._download_webpage(f'https://archive.org/embed/{identifier}', identifier)
+ playlist = self._playlist_data(embed_page)
+
+ entries = {}
+ for p in playlist:
+ # If the user specified a playlist entry in the URL, ignore the
+ # rest of the playlist.
+ if entry_id and p['orig'] != entry_id:
+ continue
+
+ entries[p['orig']] = {
+ 'formats': [],
+ 'thumbnails': [],
+ 'artist': p.get('artist'),
+ 'track': p.get('title'),
+ 'subtitles': {},
+ }
+
+ for track in p.get('tracks', []):
+ if track['kind'] != 'subtitles':
+ continue
+ entries[p['orig']][track['label']] = {
+ 'url': 'https://archive.org/' + track['file'].lstrip('/')
+ }
+
+ metadata = self._download_json('http://archive.org/metadata/' + identifier, identifier)
+ m = metadata['metadata']
+ identifier = m['identifier']
+
+ info = {
+ 'id': identifier,
+ 'title': m['title'],
+ 'description': clean_html(m.get('description')),
+ 'uploader': dict_get(m, ['uploader', 'adder']),
+ 'creators': traverse_obj(m, ('creator', {variadic}, {lambda x: x[0] and list(x)})),
+ 'license': m.get('licenseurl'),
+ 'release_date': unified_strdate(m.get('date')),
+ 'timestamp': unified_timestamp(dict_get(m, ['publicdate', 'addeddate'])),
+ 'webpage_url': f'https://archive.org/details/{identifier}',
+ 'location': m.get('venue'),
+ 'release_year': int_or_none(m.get('year'))}
+
+ for f in metadata['files']:
+ if f['name'] in entries:
+ entries[f['name']] = merge_dicts(entries[f['name']], {
+ 'id': identifier + '/' + f['name'],
+ 'title': f.get('title') or f['name'],
+ 'display_id': f['name'],
+ 'description': clean_html(f.get('description')),
+ 'creators': traverse_obj(f, ('creator', {variadic}, {lambda x: x[0] and list(x)})),
+ 'duration': parse_duration(f.get('length')),
+ 'track_number': int_or_none(f.get('track')),
+ 'album': f.get('album'),
+ 'discnumber': int_or_none(f.get('disc')),
+ 'release_year': int_or_none(f.get('year'))})
+ entry = entries[f['name']]
+ elif traverse_obj(f, 'original', expected_type=str) in entries:
+ entry = entries[f['original']]
+ else:
+ continue
+
+ if f.get('format') == 'Thumbnail':
+ entry['thumbnails'].append({
+ 'id': f['name'],
+ 'url': 'https://archive.org/download/' + identifier + '/' + f['name'],
+ 'width': int_or_none(f.get('width')),
+ 'height': int_or_none(f.get('width')),
+ 'filesize': int_or_none(f.get('size'))})
+
+ extension = (f['name'].rsplit('.', 1) + [None])[1]
+
+ # We don't want to skip private formats if the user has access to them,
+ # however without access to an account with such privileges we can't implement/test this.
+ # For now to be safe, we will only skip them if there is no user logged in.
+ is_logged_in = bool(self._get_cookies('https://archive.org').get('logged-in-sig'))
+ if extension in KNOWN_EXTENSIONS and (not f.get('private') or is_logged_in):
+ entry['formats'].append({
+ 'url': 'https://archive.org/download/' + identifier + '/' + urllib.parse.quote(f['name']),
+ 'format': f.get('format'),
+ 'width': int_or_none(f.get('width')),
+ 'height': int_or_none(f.get('height')),
+ 'filesize': int_or_none(f.get('size')),
+ 'protocol': 'https',
+ 'source_preference': 0 if f.get('source') == 'original' else -1,
+ 'format_note': f.get('source')
+ })
+
+ for entry in entries.values():
+ entry['_format_sort_fields'] = ('source', )
+
+ if len(entries) == 1:
+ # If there's only one item, use it as the main info dict
+ only_video = next(iter(entries.values()))
+ if entry_id:
+ info = merge_dicts(only_video, info)
+ else:
+ info = merge_dicts(info, only_video)
+ else:
+ # Otherwise, we have a playlist.
+ info['_type'] = 'playlist'
+ info['entries'] = list(entries.values())
+
+ if metadata.get('reviews'):
+ info['comments'] = []
+ for review in metadata['reviews']:
+ info['comments'].append({
+ 'id': review.get('review_id'),
+ 'author': review.get('reviewer'),
+ 'text': str_or_none(review.get('reviewtitle'), '') + '\n\n' + review.get('reviewbody'),
+ 'timestamp': unified_timestamp(review.get('createdate')),
+ 'parent': 'root'})
+
+ return info
+
+
+class YoutubeWebArchiveIE(InfoExtractor):
+ IE_NAME = 'web.archive:youtube'
+ IE_DESC = 'web.archive.org saved youtube videos, "ytarchive:" prefix'
+ _VALID_URL = r'''(?x)(?:(?P<prefix>ytarchive:)|
+ (?:https?://)?web\.archive\.org/
+ (?:web/)?(?:(?P<date>[0-9]{14})?[0-9A-Za-z_*]*/)? # /web and the version index is optional
+ (?:https?(?::|%3[Aa])//)?(?:
+ (?:\w+\.)?youtube\.com(?::(?:80|443))?/watch(?:\.php)?(?:\?|%3[fF])(?:[^\#]+(?:&|%26))?v(?:=|%3[dD]) # Youtube URL
+ |(?:wayback-fakeurl\.archive\.org/yt/) # Or the internal fake url
+ )
+ )(?P<id>[0-9A-Za-z_-]{11})
+ (?(prefix)
+ (?::(?P<date2>[0-9]{14}))?$|
+ (?:%26|[#&]|$)
+ )'''
+
+ _TESTS = [
+ {
+ 'url': 'https://web.archive.org/web/20150415002341/https://www.youtube.com/watch?v=aYAGB11YrSs',
+ 'info_dict': {
+ 'id': 'aYAGB11YrSs',
+ 'ext': 'webm',
+ 'title': 'Team Fortress 2 - Sandviches!',
+ 'description': 'md5:4984c0f9a07f349fc5d8e82ab7af4eaf',
+ 'upload_date': '20110926',
+ 'uploader': 'Zeurel',
+ 'channel_id': 'UCukCyHaD-bK3in_pKpfH9Eg',
+ 'duration': 32,
+ 'uploader_id': 'Zeurel',
+ 'uploader_url': 'https://www.youtube.com/user/Zeurel',
+ 'thumbnail': r're:https?://.*\.(jpg|webp)',
+ 'channel_url': 'https://www.youtube.com/channel/UCukCyHaD-bK3in_pKpfH9Eg',
+ }
+ }, {
+ # Internal link
+ 'url': 'https://web.archive.org/web/2oe/http://wayback-fakeurl.archive.org/yt/97t7Xj_iBv0',
+ 'info_dict': {
+ 'id': '97t7Xj_iBv0',
+ 'ext': 'mp4',
+ 'title': 'Why Machines That Bend Are Better',
+ 'description': 'md5:00404df2c632d16a674ff8df1ecfbb6c',
+ 'upload_date': '20190312',
+ 'uploader': 'Veritasium',
+ 'channel_id': 'UCHnyfMqiRRG1u-2MsSQLbXA',
+ 'duration': 771,
+ 'uploader_id': '1veritasium',
+ 'uploader_url': 'https://www.youtube.com/user/1veritasium',
+ 'thumbnail': r're:https?://.*\.(jpg|webp)',
+ 'channel_url': 'https://www.youtube.com/channel/UCHnyfMqiRRG1u-2MsSQLbXA',
+ }
+ }, {
+ # Video from 2012, webm format itag 45. Newest capture is deleted video, with an invalid description.
+ # Should use the date in the link. Title ends with '- Youtube'. Capture has description in eow-description
+ 'url': 'https://web.archive.org/web/20120712231619/http://www.youtube.com/watch?v=AkhihxRKcrs&gl=US&hl=en',
+ 'info_dict': {
+ 'id': 'AkhihxRKcrs',
+ 'ext': 'webm',
+ 'title': 'Limited Run: Mondo\'s Modern Classic 1 of 3 (SDCC 2012)',
+ 'upload_date': '20120712',
+ 'duration': 398,
+ 'description': 'md5:ff4de6a7980cb65d951c2f6966a4f2f3',
+ 'uploader_id': 'machinima',
+ 'uploader_url': 'https://www.youtube.com/user/machinima',
+ 'thumbnail': r're:https?://.*\.(jpg|webp)',
+ 'uploader': 'machinima'
+ }
+ }, {
+ # FLV video. Video file URL does not provide itag information
+ 'url': 'https://web.archive.org/web/20081211103536/http://www.youtube.com/watch?v=jNQXAC9IVRw',
+ 'info_dict': {
+ 'id': 'jNQXAC9IVRw',
+ 'ext': 'flv',
+ 'title': 'Me at the zoo',
+ 'upload_date': '20050423',
+ 'channel_id': 'UC4QobU6STFB0P71PMvOGN5A',
+ 'duration': 19,
+ 'description': 'md5:10436b12e07ac43ff8df65287a56efb4',
+ 'uploader_id': 'jawed',
+ 'uploader_url': 'https://www.youtube.com/user/jawed',
+ 'channel_url': 'https://www.youtube.com/channel/UC4QobU6STFB0P71PMvOGN5A',
+ 'thumbnail': r're:https?://.*\.(jpg|webp)',
+ 'uploader': 'jawed',
+ }
+ }, {
+ 'url': 'https://web.archive.org/web/20110712231407/http://www.youtube.com/watch?v=lTx3G6h2xyA',
+ 'info_dict': {
+ 'id': 'lTx3G6h2xyA',
+ 'ext': 'flv',
+ 'title': 'Madeon - Pop Culture (live mashup)',
+ 'upload_date': '20110711',
+ 'uploader': 'Madeon',
+ 'channel_id': 'UCqMDNf3Pn5L7pcNkuSEeO3w',
+ 'duration': 204,
+ 'description': 'md5:f7535343b6eda34a314eff8b85444680',
+ 'uploader_id': 'itsmadeon',
+ 'uploader_url': 'https://www.youtube.com/user/itsmadeon',
+ 'channel_url': 'https://www.youtube.com/channel/UCqMDNf3Pn5L7pcNkuSEeO3w',
+ 'thumbnail': r're:https?://.*\.(jpg|webp)',
+ }
+ }, {
+ # First capture is of dead video, second is the oldest from CDX response.
+ 'url': 'https://web.archive.org/https://www.youtube.com/watch?v=1JYutPM8O6E',
+ 'info_dict': {
+ 'id': '1JYutPM8O6E',
+ 'ext': 'mp4',
+ 'title': 'Fake Teen Doctor Strikes AGAIN! - Weekly Weird News',
+ 'upload_date': '20160218',
+ 'channel_id': 'UCdIaNUarhzLSXGoItz7BHVA',
+ 'duration': 1235,
+ 'description': 'md5:21032bae736421e89c2edf36d1936947',
+ 'uploader_id': 'MachinimaETC',
+ 'uploader_url': 'https://www.youtube.com/user/MachinimaETC',
+ 'channel_url': 'https://www.youtube.com/channel/UCdIaNUarhzLSXGoItz7BHVA',
+ 'thumbnail': r're:https?://.*\.(jpg|webp)',
+ 'uploader': 'ETC News',
+ }
+ }, {
+ # First capture of dead video, capture date in link links to dead capture.
+ 'url': 'https://web.archive.org/web/20180803221945/https://www.youtube.com/watch?v=6FPhZJGvf4E',
+ 'info_dict': {
+ 'id': '6FPhZJGvf4E',
+ 'ext': 'mp4',
+ 'title': 'WTF: Video Games Still Launch BROKEN?! - T.U.G.S.',
+ 'upload_date': '20160219',
+ 'channel_id': 'UCdIaNUarhzLSXGoItz7BHVA',
+ 'duration': 797,
+ 'description': 'md5:a1dbf12d9a3bd7cb4c5e33b27d77ffe7',
+ 'uploader_id': 'MachinimaETC',
+ 'uploader_url': 'https://www.youtube.com/user/MachinimaETC',
+ 'channel_url': 'https://www.youtube.com/channel/UCdIaNUarhzLSXGoItz7BHVA',
+ 'thumbnail': r're:https?://.*\.(jpg|webp)',
+ 'uploader': 'ETC News',
+ },
+ 'expected_warnings': [
+ r'unable to download capture webpage \(it may not be archived\)'
+ ]
+ }, { # Very old YouTube page, has - YouTube in title.
+ 'url': 'http://web.archive.org/web/20070302011044/http://youtube.com/watch?v=-06-KB9XTzg',
+ 'info_dict': {
+ 'id': '-06-KB9XTzg',
+ 'ext': 'flv',
+ 'title': 'New Coin Hack!! 100% Safe!!'
+ }
+ }, {
+ 'url': 'web.archive.org/https://www.youtube.com/watch?v=dWW7qP423y8',
+ 'info_dict': {
+ 'id': 'dWW7qP423y8',
+ 'ext': 'mp4',
+ 'title': 'It\'s Bootleg AirPods Time.',
+ 'upload_date': '20211021',
+ 'channel_id': 'UC7Jwj9fkrf1adN4fMmTkpug',
+ 'channel_url': 'https://www.youtube.com/channel/UC7Jwj9fkrf1adN4fMmTkpug',
+ 'duration': 810,
+ 'description': 'md5:7b567f898d8237b256f36c1a07d6d7bc',
+ 'thumbnail': r're:https?://.*\.(jpg|webp)',
+ 'uploader': 'DankPods',
+ }
+ }, {
+ # player response contains '};' See: https://github.com/ytdl-org/youtube-dl/issues/27093
+ 'url': 'https://web.archive.org/web/20200827003909if_/http://www.youtube.com/watch?v=6Dh-RL__uN4',
+ 'info_dict': {
+ 'id': '6Dh-RL__uN4',
+ 'ext': 'mp4',
+ 'title': 'bitch lasagna',
+ 'upload_date': '20181005',
+ 'channel_id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
+ 'channel_url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
+ 'duration': 135,
+ 'description': 'md5:2dbe4051feeff2dab5f41f82bb6d11d0',
+ 'uploader': 'PewDiePie',
+ 'uploader_id': 'PewDiePie',
+ 'uploader_url': 'https://www.youtube.com/user/PewDiePie',
+ 'thumbnail': r're:https?://.*\.(jpg|webp)',
+ }
+ }, {
+ # ~June 2010 Capture. swfconfig
+ 'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=8XeW5ilk-9Y',
+ 'info_dict': {
+ 'id': '8XeW5ilk-9Y',
+ 'ext': 'flv',
+ 'title': 'Story of Stuff, The Critique Part 4 of 4',
+ 'duration': 541,
+ 'description': 'md5:28157da06f2c5e94c97f7f3072509972',
+ 'uploader': 'HowTheWorldWorks',
+ 'uploader_id': 'HowTheWorldWorks',
+ 'thumbnail': r're:https?://.*\.(jpg|webp)',
+ 'uploader_url': 'https://www.youtube.com/user/HowTheWorldWorks',
+ 'upload_date': '20090520',
+ }
+ }, {
+ # Jan 2011: watch-video-date/eow-date surrounded by whitespace
+ 'url': 'https://web.archive.org/web/20110126141719/http://www.youtube.com/watch?v=Q_yjX80U7Yc',
+ 'info_dict': {
+ 'id': 'Q_yjX80U7Yc',
+ 'ext': 'flv',
+ 'title': 'Spray Paint Art by Clay Butler: Purple Fantasy Forest',
+ 'uploader_id': 'claybutlermusic',
+ 'description': 'md5:4595264559e3d0a0ceb3f011f6334543',
+ 'upload_date': '20090803',
+ 'uploader': 'claybutlermusic',
+ 'thumbnail': r're:https?://.*\.(jpg|webp)',
+ 'duration': 132,
+ 'uploader_url': 'https://www.youtube.com/user/claybutlermusic',
+ }
+ }, {
+ # ~May 2009 swfArgs. ytcfg is spread out over various vars
+ 'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=c5uJgG05xUY',
+ 'info_dict': {
+ 'id': 'c5uJgG05xUY',
+ 'ext': 'webm',
+ 'title': 'Story of Stuff, The Critique Part 1 of 4',
+ 'uploader_id': 'HowTheWorldWorks',
+ 'uploader': 'HowTheWorldWorks',
+ 'uploader_url': 'https://www.youtube.com/user/HowTheWorldWorks',
+ 'upload_date': '20090513',
+ 'description': 'md5:4ca77d79538064e41e4cc464e93f44f0',
+ 'thumbnail': r're:https?://.*\.(jpg|webp)',
+ 'duration': 754,
+ }
+ }, {
+ # ~June 2012. Upload date is in another lang so cannot extract.
+ 'url': 'https://web.archive.org/web/20120607174520/http://www.youtube.com/watch?v=xWTLLl-dQaA',
+ 'info_dict': {
+ 'id': 'xWTLLl-dQaA',
+ 'ext': 'mp4',
+ 'title': 'Black Nerd eHarmony Video Bio Parody (SPOOF)',
+ 'uploader_url': 'https://www.youtube.com/user/BlackNerdComedy',
+ 'description': 'md5:e25f0133aaf9e6793fb81c18021d193e',
+ 'uploader_id': 'BlackNerdComedy',
+ 'uploader': 'BlackNerdComedy',
+ 'duration': 182,
+ 'thumbnail': r're:https?://.*\.(jpg|webp)',
+ }
+ }, {
+ # ~July 2013
+ 'url': 'https://web.archive.org/web/*/https://www.youtube.com/watch?v=9eO1aasHyTM',
+ 'info_dict': {
+ 'id': '9eO1aasHyTM',
+ 'ext': 'mp4',
+ 'title': 'Polar-oid',
+ 'description': 'Cameras and bears are dangerous!',
+ 'uploader_url': 'https://www.youtube.com/user/punkybird',
+ 'uploader_id': 'punkybird',
+ 'duration': 202,
+ 'channel_id': 'UC62R2cBezNBOqxSerfb1nMQ',
+ 'channel_url': 'https://www.youtube.com/channel/UC62R2cBezNBOqxSerfb1nMQ',
+ 'upload_date': '20060428',
+ 'uploader': 'punkybird',
+ }
+ }, {
+ # April 2020: Player response in player config
+ 'url': 'https://web.archive.org/web/20200416034815/https://www.youtube.com/watch?v=Cf7vS8jc7dY&gl=US&hl=en',
+ 'info_dict': {
+ 'id': 'Cf7vS8jc7dY',
+ 'ext': 'mp4',
+ 'title': 'A Dramatic Pool Story (by Jamie Spicer-Lewis) - Game Grumps Animated',
+ 'duration': 64,
+ 'upload_date': '20200408',
+ 'uploader_id': 'GameGrumps',
+ 'uploader': 'GameGrumps',
+ 'channel_url': 'https://www.youtube.com/channel/UC9CuvdOVfMPvKCiwdGKL3cQ',
+ 'channel_id': 'UC9CuvdOVfMPvKCiwdGKL3cQ',
+ 'thumbnail': r're:https?://.*\.(jpg|webp)',
+ 'description': 'md5:c625bb3c02c4f5fb4205971e468fa341',
+ 'uploader_url': 'https://www.youtube.com/user/GameGrumps',
+ }
+ }, {
+ # watch7-user-header with yt-user-info
+ 'url': 'ytarchive:kbh4T_b4Ixw:20160307085057',
+ 'info_dict': {
+ 'id': 'kbh4T_b4Ixw',
+ 'ext': 'mp4',
+ 'title': 'Shovel Knight OST - Strike the Earth! Plains of Passage 16 bit SNES style remake / remix',
+ 'channel_url': 'https://www.youtube.com/channel/UCnTaGvsHmMy792DWeT6HbGA',
+ 'uploader': 'Nelward music',
+ 'duration': 213,
+ 'description': 'md5:804b4a9ce37b050a5fefdbb23aeba54d',
+ 'thumbnail': r're:https?://.*\.(jpg|webp)',
+ 'upload_date': '20150503',
+ 'channel_id': 'UCnTaGvsHmMy792DWeT6HbGA',
+ }
+ }, {
+ # April 2012
+ 'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=SOm7mPoPskU',
+ 'info_dict': {
+ 'id': 'SOm7mPoPskU',
+ 'ext': 'mp4',
+ 'title': 'Boyfriend - Justin Bieber Parody',
+ 'uploader_url': 'https://www.youtube.com/user/thecomputernerd01',
+ 'uploader': 'thecomputernerd01',
+ 'thumbnail': r're:https?://.*\.(jpg|webp)',
+ 'description': 'md5:dd7fa635519c2a5b4d566beaecad7491',
+ 'duration': 200,
+ 'upload_date': '20120407',
+ 'uploader_id': 'thecomputernerd01',
+ }
+ }, {
+ 'url': 'https://web.archive.org/web/http://www.youtube.com/watch?v=kH-G_aIBlFw',
+ 'only_matching': True
+ }, {
+ 'url': 'https://web.archive.org/web/20050214000000_if/http://www.youtube.com/watch?v=0altSZ96U4M',
+ 'only_matching': True
+ }, {
+ # Video not archived, only capture is unavailable video page
+ 'url': 'https://web.archive.org/web/20210530071008/https://www.youtube.com/watch?v=lHJTf93HL1s&spfreload=10',
+ 'only_matching': True
+ }, { # Encoded url
+ 'url': 'https://web.archive.org/web/20120712231619/http%3A//www.youtube.com/watch%3Fgl%3DUS%26v%3DAkhihxRKcrs%26hl%3Den',
+ 'only_matching': True
+ }, {
+ 'url': 'https://web.archive.org/web/20120712231619/http%3A//www.youtube.com/watch%3Fv%3DAkhihxRKcrs%26gl%3DUS%26hl%3Den',
+ 'only_matching': True
+ }, {
+ 'url': 'https://web.archive.org/web/20060527081937/http://www.youtube.com:80/watch.php?v=ELTFsLT73fA&amp;search=soccer',
+ 'only_matching': True
+ }, {
+ 'url': 'https://web.archive.org/http://www.youtube.com:80/watch?v=-05VVye-ffg',
+ 'only_matching': True
+ }, {
+ 'url': 'ytarchive:BaW_jenozKc:20050214000000',
+ 'only_matching': True
+ }, {
+ 'url': 'ytarchive:BaW_jenozKc',
+ 'only_matching': True
+ },
+ ]
+ _YT_INITIAL_DATA_RE = YoutubeBaseInfoExtractor._YT_INITIAL_DATA_RE
+ _YT_INITIAL_PLAYER_RESPONSE_RE = fr'''(?x:
+ (?:window\s*\[\s*["\']ytInitialPlayerResponse["\']\s*\]|ytInitialPlayerResponse)\s*=[(\s]*|
+ {YoutubeBaseInfoExtractor._YT_INITIAL_PLAYER_RESPONSE_RE}
+ )'''
+
+ _YT_DEFAULT_THUMB_SERVERS = ['i.ytimg.com'] # thumbnails most likely archived on these servers
+ _YT_ALL_THUMB_SERVERS = orderedSet(
+ _YT_DEFAULT_THUMB_SERVERS + ['img.youtube.com', *[f'{c}{n or ""}.ytimg.com' for c in ('i', 's') for n in (*range(0, 5), 9)]])
+
+ _WAYBACK_BASE_URL = 'https://web.archive.org/web/%sif_/'
+ _OLDEST_CAPTURE_DATE = 20050214000000
+ _NEWEST_CAPTURE_DATE = 20500101000000
+
+ def _call_cdx_api(self, item_id, url, filters: list = None, collapse: list = None, query: dict = None, note=None, fatal=False):
+ # CDX docs: https://github.com/internetarchive/wayback/blob/master/wayback-cdx-server/README.md
+ query = {
+ 'url': url,
+ 'output': 'json',
+ 'fl': 'original,mimetype,length,timestamp',
+ 'limit': 500,
+ 'filter': ['statuscode:200'] + (filters or []),
+ 'collapse': collapse or [],
+ **(query or {})
+ }
+ res = self._download_json(
+ 'https://web.archive.org/cdx/search/cdx', item_id,
+ note or 'Downloading CDX API JSON', query=query, fatal=fatal)
+ if isinstance(res, list) and len(res) >= 2:
+ # format response to make it easier to use
+ return list(dict(zip(res[0], v)) for v in res[1:])
+ elif not isinstance(res, list) or len(res) != 0:
+ self.report_warning('Error while parsing CDX API response' + bug_reports_message())
+
+ def _extract_webpage_title(self, webpage):
+ page_title = self._html_extract_title(webpage, default='')
+ # YouTube video pages appear to always have either 'YouTube -' as prefix or '- YouTube' as suffix.
+ return self._html_search_regex(
+ r'(?:YouTube\s*-\s*(.*)$)|(?:(.*)\s*-\s*YouTube$)',
+ page_title, 'title', default='')
+
+ def _extract_metadata(self, video_id, webpage):
+ search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None)) if webpage else (lambda x: None))
+ player_response = self._search_json(
+ self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response',
+ video_id, default={})
+ initial_data = self._search_json(
+ self._YT_INITIAL_DATA_RE, webpage, 'initial data', video_id, default={})
+
+ ytcfg = {}
+ for j in re.findall(r'yt\.setConfig\(\s*(?P<json>{\s*(?s:.+?)\s*})\s*\);', webpage): # ~June 2010
+ ytcfg.update(self._parse_json(j, video_id, fatal=False, ignore_extra=True, transform_source=js_to_json, errnote='') or {})
+
+ # XXX: this also may contain a 'ptchn' key
+ player_config = (
+ self._search_json(
+ r'(?:yt\.playerConfig|ytplayer\.config|swfConfig)\s*=',
+ webpage, 'player config', video_id, default=None)
+ or ytcfg.get('PLAYER_CONFIG') or {})
+
+ # XXX: this may also contain a 'creator' key.
+ swf_args = self._search_json(r'swfArgs\s*=', webpage, 'swf config', video_id, default={})
+ if swf_args and not traverse_obj(player_config, ('args',)):
+ player_config['args'] = swf_args
+
+ if not player_response:
+ # April 2020
+ player_response = self._parse_json(
+ traverse_obj(player_config, ('args', 'player_response')) or '{}', video_id, fatal=False)
+
+ initial_data_video = traverse_obj(
+ initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'videoPrimaryInfoRenderer'),
+ expected_type=dict, get_all=False, default={})
+
+ video_details = traverse_obj(
+ player_response, 'videoDetails', expected_type=dict, get_all=False, default={})
+
+ microformats = traverse_obj(
+ player_response, ('microformat', 'playerMicroformatRenderer'), expected_type=dict, get_all=False, default={})
+
+ video_title = (
+ video_details.get('title')
+ or YoutubeBaseInfoExtractor._get_text(microformats, 'title')
+ or YoutubeBaseInfoExtractor._get_text(initial_data_video, 'title')
+ or traverse_obj(player_config, ('args', 'title'))
+ or self._extract_webpage_title(webpage)
+ or search_meta(['og:title', 'twitter:title', 'title']))
+
+ def id_from_url(url, type_):
+ return self._search_regex(
+ rf'(?:{type_})/([^/#&?]+)', url or '', f'{type_} id', default=None)
+
+ # XXX: would the get_elements_by_... functions be better suited here?
+ _CHANNEL_URL_HREF_RE = r'href="[^"]*(?P<url>https?://www\.youtube\.com/(?:user|channel)/[^"]+)"'
+ uploader_or_channel_url = self._search_regex(
+ [fr'<(?:link\s*itemprop=\"url\"|a\s*id=\"watch-username\").*?\b{_CHANNEL_URL_HREF_RE}>', # @fd05024
+ fr'<div\s*id=\"(?:watch-channel-stats|watch-headline-user-info)\"[^>]*>\s*<a[^>]*\b{_CHANNEL_URL_HREF_RE}'], # ~ May 2009, ~June 2012
+ webpage, 'uploader or channel url', default=None)
+
+ owner_profile_url = url_or_none(microformats.get('ownerProfileUrl')) # @a6211d2
+
+ # Uploader refers to the /user/ id ONLY
+ uploader_id = (
+ id_from_url(owner_profile_url, 'user')
+ or id_from_url(uploader_or_channel_url, 'user')
+ or ytcfg.get('VIDEO_USERNAME'))
+ uploader_url = f'https://www.youtube.com/user/{uploader_id}' if uploader_id else None
+
+ # XXX: do we want to differentiate uploader and channel?
+ uploader = (
+ self._search_regex(
+ [r'<a\s*id="watch-username"[^>]*>\s*<strong>([^<]+)</strong>', # June 2010
+ r'var\s*watchUsername\s*=\s*\'(.+?)\';', # ~May 2009
+ r'<div\s*\bid=\"watch-channel-stats"[^>]*>\s*<a[^>]*>\s*(.+?)\s*</a', # ~May 2009
+ r'<a\s*id="watch-userbanner"[^>]*title="\s*(.+?)\s*"'], # ~June 2012
+ webpage, 'uploader', default=None)
+ or self._html_search_regex(
+ [r'(?s)<div\s*class="yt-user-info".*?<a[^>]*[^>]*>\s*(.*?)\s*</a', # March 2016
+ r'(?s)<a[^>]*yt-user-name[^>]*>\s*(.*?)\s*</a'], # july 2013
+ get_element_by_id('watch7-user-header', webpage), 'uploader', default=None)
+ or self._html_search_regex(
+ r'<button\s*href="/user/[^>]*>\s*<span[^>]*>\s*(.+?)\s*<', # April 2012
+ get_element_by_id('watch-headline-user-info', webpage), 'uploader', default=None)
+ or traverse_obj(player_config, ('args', 'creator'))
+ or video_details.get('author'))
+
+ channel_id = str_or_none(
+ video_details.get('channelId')
+ or microformats.get('externalChannelId')
+ or search_meta('channelId')
+ or self._search_regex(
+ r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1', # @b45a9e6
+ webpage, 'channel id', default=None, group='id')
+ or id_from_url(owner_profile_url, 'channel')
+ or id_from_url(uploader_or_channel_url, 'channel')
+ or traverse_obj(player_config, ('args', 'ucid')))
+
+ channel_url = f'https://www.youtube.com/channel/{channel_id}' if channel_id else None
+ duration = int_or_none(
+ video_details.get('lengthSeconds')
+ or microformats.get('lengthSeconds')
+ or traverse_obj(player_config, ('args', ('length_seconds', 'l')), get_all=False)
+ or parse_duration(search_meta('duration')))
+ description = (
+ video_details.get('shortDescription')
+ or YoutubeBaseInfoExtractor._get_text(microformats, 'description')
+ or clean_html(get_element_by_id('eow-description', webpage)) # @9e6dd23
+ or search_meta(['description', 'og:description', 'twitter:description']))
+
+ upload_date = unified_strdate(
+ dict_get(microformats, ('uploadDate', 'publishDate'))
+ or search_meta(['uploadDate', 'datePublished'])
+ or self._search_regex(
+ [r'(?s)id="eow-date.*?>\s*(.*?)\s*</span>',
+ r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']', # @7998520
+ r'class\s*=\s*"(?:watch-video-date|watch-video-added post-date)"[^>]*>\s*([^<]+?)\s*<'], # ~June 2010, ~Jan 2009 (respectively)
+ webpage, 'upload date', default=None))
+
+ return {
+ 'title': video_title,
+ 'description': description,
+ 'upload_date': upload_date,
+ 'uploader': uploader,
+ 'channel_id': channel_id,
+ 'channel_url': channel_url,
+ 'duration': duration,
+ 'uploader_url': uploader_url,
+ 'uploader_id': uploader_id,
+ }
+
+ def _extract_thumbnails(self, video_id):
+ try_all = 'thumbnails' in self._configuration_arg('check_all')
+ thumbnail_base_urls = ['http://{server}/vi{webp}/{video_id}'.format(
+ webp='_webp' if ext == 'webp' else '', video_id=video_id, server=server)
+ for server in (self._YT_ALL_THUMB_SERVERS if try_all else self._YT_DEFAULT_THUMB_SERVERS) for ext in (('jpg', 'webp') if try_all else ('jpg',))]
+
+ thumbnails = []
+ for url in thumbnail_base_urls:
+ response = self._call_cdx_api(
+ video_id, url, filters=['mimetype:image/(?:webp|jpeg)'],
+ collapse=['urlkey'], query={'matchType': 'prefix'})
+ if not response:
+ continue
+ thumbnails.extend(
+ {
+ 'url': (self._WAYBACK_BASE_URL % (int_or_none(thumbnail_dict.get('timestamp')) or self._OLDEST_CAPTURE_DATE)) + thumbnail_dict.get('original'),
+ 'filesize': int_or_none(thumbnail_dict.get('length')),
+ 'preference': int_or_none(thumbnail_dict.get('length'))
+ } for thumbnail_dict in response)
+ if not try_all:
+ break
+
+ self._remove_duplicate_formats(thumbnails)
+ return thumbnails
+
+ def _get_capture_dates(self, video_id, url_date):
+ capture_dates = []
+ # Note: CDX API will not find watch pages with extra params in the url.
+ response = self._call_cdx_api(
+ video_id, f'https://www.youtube.com/watch?v={video_id}',
+ filters=['mimetype:text/html'], collapse=['timestamp:6', 'digest'], query={'matchType': 'prefix'}) or []
+ all_captures = sorted(int_or_none(r['timestamp']) for r in response if int_or_none(r['timestamp']) is not None)
+
+ # Prefer the new polymer UI captures as we support extracting more metadata from them
+ # WBM captures seem to all switch to this layout ~July 2020
+ modern_captures = [x for x in all_captures if x >= 20200701000000]
+ if modern_captures:
+ capture_dates.append(modern_captures[0])
+ capture_dates.append(url_date)
+ if all_captures:
+ capture_dates.append(all_captures[0])
+
+ if 'captures' in self._configuration_arg('check_all'):
+ capture_dates.extend(modern_captures + all_captures)
+
+ # Fallbacks if any of the above fail
+ capture_dates.extend([self._OLDEST_CAPTURE_DATE, self._NEWEST_CAPTURE_DATE])
+ return orderedSet(filter(None, capture_dates))
+
+ def _real_extract(self, url):
+ video_id, url_date, url_date_2 = self._match_valid_url(url).group('id', 'date', 'date2')
+ url_date = url_date or url_date_2
+
+ urlh = None
+ retry_manager = self.RetryManager(fatal=False)
+ for retry in retry_manager:
+ try:
+ urlh = self._request_webpage(
+ HEADRequest('https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/%s' % video_id),
+ video_id, note='Fetching archived video file url', expected_status=True)
+ except ExtractorError as e:
+ # HTTP Error 404 is expected if the video is not saved.
+ if isinstance(e.cause, HTTPError) and e.cause.status == 404:
+ self.raise_no_formats(
+ 'The requested video is not archived, indexed, or there is an issue with web.archive.org (try again later)', expected=True)
+ else:
+ retry.error = e
+
+ if retry_manager.error:
+ self.raise_no_formats(retry_manager.error, expected=True, video_id=video_id)
+
+ capture_dates = self._get_capture_dates(video_id, int_or_none(url_date))
+ self.write_debug('Captures to try: ' + join_nonempty(*capture_dates, delim=', '))
+ info = {'id': video_id}
+ for capture in capture_dates:
+ webpage = self._download_webpage(
+ (self._WAYBACK_BASE_URL + 'http://www.youtube.com/watch?v=%s') % (capture, video_id),
+ video_id=video_id, fatal=False, errnote='unable to download capture webpage (it may not be archived)',
+ note='Downloading capture webpage')
+ current_info = self._extract_metadata(video_id, webpage or '')
+ # Try avoid getting deleted video metadata
+ if current_info.get('title'):
+ info = merge_dicts(info, current_info)
+ if 'captures' not in self._configuration_arg('check_all'):
+ break
+
+ info['thumbnails'] = self._extract_thumbnails(video_id)
+
+ if urlh:
+ url = compat_urllib_parse_unquote(urlh.url)
+ video_file_url_qs = parse_qs(url)
+ # Attempt to recover any ext & format info from playback url & response headers
+ format = {'url': url, 'filesize': int_or_none(urlh.headers.get('x-archive-orig-content-length'))}
+ itag = try_get(video_file_url_qs, lambda x: x['itag'][0])
+ if itag and itag in YoutubeIE._formats:
+ format.update(YoutubeIE._formats[itag])
+ format.update({'format_id': itag})
+ else:
+ mime = try_get(video_file_url_qs, lambda x: x['mime'][0])
+ ext = (mimetype2ext(mime)
+ or urlhandle_detect_ext(urlh)
+ or mimetype2ext(urlh.headers.get('x-archive-guessed-content-type')))
+ format.update({'ext': ext})
+ info['formats'] = [format]
+ if not info.get('duration'):
+ info['duration'] = str_to_int(try_get(video_file_url_qs, lambda x: x['dur'][0]))
+
+ if not info.get('title'):
+ info['title'] = video_id
+ return info
diff --git a/yt_dlp/extractor/arcpublishing.py b/yt_dlp/extractor/arcpublishing.py
new file mode 100644
index 0000000..febd3d2
--- /dev/null
+++ b/yt_dlp/extractor/arcpublishing.py
@@ -0,0 +1,164 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ extract_attributes,
+ int_or_none,
+ parse_iso8601,
+ try_get,
+)
+
+
+class ArcPublishingIE(InfoExtractor):
+ _UUID_REGEX = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}'
+ _VALID_URL = r'arcpublishing:(?P<org>[a-z]+):(?P<id>%s)' % _UUID_REGEX
+ _TESTS = [{
+ # https://www.adn.com/politics/2020/11/02/video-senate-candidates-campaign-in-anchorage-on-eve-of-election-day/
+ 'url': 'arcpublishing:adn:8c99cb6e-b29c-4bc9-9173-7bf9979225ab',
+ 'only_matching': True,
+ }, {
+ # https://www.bostonglobe.com/video/2020/12/30/metro/footage-released-showing-officer-talking-about-striking-protesters-with-car/
+ 'url': 'arcpublishing:bostonglobe:232b7ae6-7d73-432d-bc0a-85dbf0119ab1',
+ 'only_matching': True,
+ }, {
+ # https://www.actionnewsjax.com/video/live-stream/
+ 'url': 'arcpublishing:cmg:cfb1cf1b-3ab5-4d1b-86c5-a5515d311f2a',
+ 'only_matching': True,
+ }, {
+ # https://elcomercio.pe/videos/deportes/deporte-total-futbol-peruano-seleccion-peruana-la-valorizacion-de-los-peruanos-en-el-exterior-tras-un-2020-atipico-nnav-vr-video-noticia/
+ 'url': 'arcpublishing:elcomercio:27a7e1f8-2ec7-4177-874f-a4feed2885b3',
+ 'only_matching': True,
+ }, {
+ # https://www.clickondetroit.com/video/community/2020/05/15/events-surrounding-woodward-dream-cruise-being-canceled/
+ 'url': 'arcpublishing:gmg:c8793fb2-8d44-4242-881e-2db31da2d9fe',
+ 'only_matching': True,
+ }, {
+ # https://www.wabi.tv/video/2020/12/30/trenton-company-making-equipment-pfizer-covid-vaccine/
+ 'url': 'arcpublishing:gray:0b0ba30e-032a-4598-8810-901d70e6033e',
+ 'only_matching': True,
+ }, {
+ # https://www.lateja.cr/el-mundo/video-china-aprueba-con-condiciones-su-primera/dfcbfa57-527f-45ff-a69b-35fe71054143/video/
+ 'url': 'arcpublishing:gruponacion:dfcbfa57-527f-45ff-a69b-35fe71054143',
+ 'only_matching': True,
+ }, {
+ # https://www.fifthdomain.com/video/2018/03/09/is-america-vulnerable-to-a-cyber-attack/
+ 'url': 'arcpublishing:mco:aa0ca6fe-1127-46d4-b32c-be0d6fdb8055',
+ 'only_matching': True,
+ }, {
+ # https://www.vl.no/kultur/2020/12/09/en-melding-fra-en-lytter-endret-julelista-til-lewi-bergrud/
+ 'url': 'arcpublishing:mentormedier:47a12084-650b-4011-bfd0-3699b6947b2d',
+ 'only_matching': True,
+ }, {
+ # https://www.14news.com/2020/12/30/whiskey-theft-caught-camera-henderson-liquor-store/
+ 'url': 'arcpublishing:raycom:b89f61f8-79fa-4c09-8255-e64237119bf7',
+ 'only_matching': True,
+ }, {
+ # https://www.theglobeandmail.com/world/video-ethiopian-woman-who-became-symbol-of-integration-in-italy-killed-on/
+ 'url': 'arcpublishing:tgam:411b34c1-8701-4036-9831-26964711664b',
+ 'only_matching': True,
+ }, {
+ # https://www.pilotonline.com/460f2931-8130-4719-8ea1-ffcb2d7cb685-132.html
+ 'url': 'arcpublishing:tronc:460f2931-8130-4719-8ea1-ffcb2d7cb685',
+ 'only_matching': True,
+ }]
+ _POWA_DEFAULTS = [
+ (['cmg', 'prisa'], '%s-config-prod.api.cdn.arcpublishing.com/video'),
+ ([
+ 'adn', 'advancelocal', 'answers', 'bonnier', 'bostonglobe', 'demo',
+ 'gmg', 'gruponacion', 'infobae', 'mco', 'nzme', 'pmn', 'raycom',
+ 'spectator', 'tbt', 'tgam', 'tronc', 'wapo', 'wweek',
+ ], 'video-api-cdn.%s.arcpublishing.com/api'),
+ ]
+
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ entries = []
+ # https://arcpublishing.atlassian.net/wiki/spaces/POWA/overview
+ for powa_el in re.findall(r'(<div[^>]+class="[^"]*\bpowa\b[^"]*"[^>]+data-uuid="%s"[^>]*>)' % ArcPublishingIE._UUID_REGEX, webpage):
+ powa = extract_attributes(powa_el) or {}
+ org = powa.get('data-org')
+ uuid = powa.get('data-uuid')
+ if org and uuid:
+ entries.append('arcpublishing:%s:%s' % (org, uuid))
+ return entries
+
+ def _real_extract(self, url):
+ org, uuid = self._match_valid_url(url).groups()
+ for orgs, tmpl in self._POWA_DEFAULTS:
+ if org in orgs:
+ base_api_tmpl = tmpl
+ break
+ else:
+ base_api_tmpl = '%s-prod-cdn.video-api.arcpublishing.com/api'
+ if org == 'wapo':
+ org = 'washpost'
+ video = self._download_json(
+ 'https://%s/v1/ansvideos/findByUuid' % (base_api_tmpl % org),
+ uuid, query={'uuid': uuid})[0]
+ title = video['headlines']['basic']
+ is_live = video.get('status') == 'live'
+
+ urls = []
+ formats = []
+ for s in video.get('streams', []):
+ s_url = s.get('url')
+ if not s_url or s_url in urls:
+ continue
+ urls.append(s_url)
+ stream_type = s.get('stream_type')
+ if stream_type == 'smil':
+ smil_formats = self._extract_smil_formats(
+ s_url, uuid, fatal=False)
+ for f in smil_formats:
+ if f['url'].endswith('/cfx/st'):
+ f['app'] = 'cfx/st'
+ if not f['play_path'].startswith('mp4:'):
+ f['play_path'] = 'mp4:' + f['play_path']
+ if isinstance(f['tbr'], float):
+ f['vbr'] = f['tbr'] * 1000
+ del f['tbr']
+ f['format_id'] = 'rtmp-%d' % f['vbr']
+ formats.extend(smil_formats)
+ elif stream_type in ('ts', 'hls'):
+ m3u8_formats = self._extract_m3u8_formats(
+ s_url, uuid, 'mp4', live=is_live, m3u8_id='hls', fatal=False)
+ if all([f.get('acodec') == 'none' for f in m3u8_formats]):
+ continue
+ for f in m3u8_formats:
+ height = f.get('height')
+ if not height:
+ continue
+ vbr = self._search_regex(
+ r'[_x]%d[_-](\d+)' % height, f['url'], 'vbr', default=None)
+ if vbr:
+ f['vbr'] = int(vbr)
+ formats.extend(m3u8_formats)
+ else:
+ vbr = int_or_none(s.get('bitrate'))
+ formats.append({
+ 'format_id': '%s-%d' % (stream_type, vbr) if vbr else stream_type,
+ 'vbr': vbr,
+ 'width': int_or_none(s.get('width')),
+ 'height': int_or_none(s.get('height')),
+ 'filesize': int_or_none(s.get('filesize')),
+ 'url': s_url,
+ 'quality': -10,
+ })
+
+ subtitles = {}
+ for subtitle in (try_get(video, lambda x: x['subtitles']['urls'], list) or []):
+ subtitle_url = subtitle.get('url')
+ if subtitle_url:
+ subtitles.setdefault('en', []).append({'url': subtitle_url})
+
+ return {
+ 'id': uuid,
+ 'title': title,
+ 'thumbnail': try_get(video, lambda x: x['promo_image']['url']),
+ 'description': try_get(video, lambda x: x['subheadlines']['basic']),
+ 'formats': formats,
+ 'duration': int_or_none(video.get('duration'), 100),
+ 'timestamp': parse_iso8601(video.get('created_date')),
+ 'subtitles': subtitles,
+ 'is_live': is_live,
+ }
diff --git a/yt_dlp/extractor/ard.py b/yt_dlp/extractor/ard.py
new file mode 100644
index 0000000..46e68d6
--- /dev/null
+++ b/yt_dlp/extractor/ard.py
@@ -0,0 +1,579 @@
+import re
+from functools import partial
+
+from .common import InfoExtractor
+from ..utils import (
+ OnDemandPagedList,
+ bug_reports_message,
+ determine_ext,
+ int_or_none,
+ join_nonempty,
+ jwt_decode_hs256,
+ make_archive_id,
+ parse_duration,
+ parse_iso8601,
+ remove_start,
+ str_or_none,
+ unified_strdate,
+ update_url_query,
+ url_or_none,
+ xpath_text,
+)
+from ..utils.traversal import traverse_obj
+
+
+class ARDMediathekBaseIE(InfoExtractor):
+ _GEO_COUNTRIES = ['DE']
+
+ def _extract_media_info(self, media_info_url, webpage, video_id):
+ media_info = self._download_json(
+ media_info_url, video_id, 'Downloading media JSON')
+ return self._parse_media_info(media_info, video_id, '"fsk"' in webpage)
+
+ def _parse_media_info(self, media_info, video_id, fsk):
+ formats = self._extract_formats(media_info, video_id)
+
+ if not formats:
+ if fsk:
+ self.raise_no_formats(
+ 'This video is only available after 20:00', expected=True)
+ elif media_info.get('_geoblocked'):
+ self.raise_geo_restricted(
+ 'This video is not available due to geoblocking',
+ countries=self._GEO_COUNTRIES, metadata_available=True)
+
+ subtitles = {}
+ subtitle_url = media_info.get('_subtitleUrl')
+ if subtitle_url:
+ subtitles['de'] = [{
+ 'ext': 'ttml',
+ 'url': subtitle_url,
+ }, {
+ 'ext': 'vtt',
+ 'url': subtitle_url.replace('/ebutt/', '/webvtt/') + '.vtt',
+ }]
+
+ return {
+ 'id': video_id,
+ 'duration': int_or_none(media_info.get('_duration')),
+ 'thumbnail': media_info.get('_previewImage'),
+ 'is_live': media_info.get('_isLive') is True,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+ def _extract_formats(self, media_info, video_id):
+ type_ = media_info.get('_type')
+ media_array = media_info.get('_mediaArray', [])
+ formats = []
+ for num, media in enumerate(media_array):
+ for stream in media.get('_mediaStreamArray', []):
+ stream_urls = stream.get('_stream')
+ if not stream_urls:
+ continue
+ if not isinstance(stream_urls, list):
+ stream_urls = [stream_urls]
+ quality = stream.get('_quality')
+ server = stream.get('_server')
+ for stream_url in stream_urls:
+ if not url_or_none(stream_url):
+ continue
+ ext = determine_ext(stream_url)
+ if quality != 'auto' and ext in ('f4m', 'm3u8'):
+ continue
+ if ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ update_url_query(stream_url, {
+ 'hdcore': '3.1.1',
+ 'plugin': 'aasp-3.1.1.69.124'
+ }), video_id, f4m_id='hds', fatal=False))
+ elif ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ stream_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ if server and server.startswith('rtmp'):
+ f = {
+ 'url': server,
+ 'play_path': stream_url,
+ 'format_id': 'a%s-rtmp-%s' % (num, quality),
+ }
+ else:
+ f = {
+ 'url': stream_url,
+ 'format_id': 'a%s-%s-%s' % (num, ext, quality)
+ }
+ m = re.search(
+ r'_(?P<width>\d+)x(?P<height>\d+)\.mp4$',
+ stream_url)
+ if m:
+ f.update({
+ 'width': int(m.group('width')),
+ 'height': int(m.group('height')),
+ })
+ if type_ == 'audio':
+ f['vcodec'] = 'none'
+ formats.append(f)
+ return formats
+
+
+class ARDIE(InfoExtractor):
+ _VALID_URL = r'(?P<mainurl>https?://(?:www\.)?daserste\.de/(?:[^/?#&]+/)+(?P<id>[^/?#&]+))\.html'
+ _TESTS = [{
+ # available till 7.12.2023
+ 'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-video-424.html',
+ 'md5': '94812e6438488fb923c361a44469614b',
+ 'info_dict': {
+ 'id': 'maischberger-video-424',
+ 'display_id': 'maischberger-video-424',
+ 'ext': 'mp4',
+ 'duration': 4452.0,
+ 'title': 'maischberger am 07.12.2022',
+ 'upload_date': '20221207',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ }, {
+ 'url': 'https://www.daserste.de/information/politik-weltgeschehen/morgenmagazin/videosextern/dominik-kahun-aus-der-nhl-direkt-zur-weltmeisterschaft-100.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.daserste.de/information/nachrichten-wetter/tagesthemen/videosextern/tagesthemen-17736.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.daserste.de/unterhaltung/serie/in-aller-freundschaft-die-jungen-aerzte/videos/diversity-tag-sanam-afrashteh100.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.daserste.de/unterhaltung/serie/in-aller-freundschaft-die-jungen-aerzte/Drehpause-100.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.daserste.de/unterhaltung/film/filmmittwoch-im-ersten/videos/making-ofwendezeit-video-100.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ display_id = mobj.group('id')
+
+ player_url = mobj.group('mainurl') + '~playerXml.xml'
+ doc = self._download_xml(player_url, display_id)
+ video_node = doc.find('./video')
+ upload_date = unified_strdate(xpath_text(
+ video_node, './broadcastDate'))
+ thumbnail = xpath_text(video_node, './/teaserImage//variant/url')
+
+ formats = []
+ for a in video_node.findall('.//asset'):
+ file_name = xpath_text(a, './fileName', default=None)
+ if not file_name:
+ continue
+ format_type = a.attrib.get('type')
+ format_url = url_or_none(file_name)
+ if format_url:
+ ext = determine_ext(file_name)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, display_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id=format_type or 'hls', fatal=False))
+ continue
+ elif ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ update_url_query(format_url, {'hdcore': '3.7.0'}),
+ display_id, f4m_id=format_type or 'hds', fatal=False))
+ continue
+ f = {
+ 'format_id': format_type,
+ 'width': int_or_none(xpath_text(a, './frameWidth')),
+ 'height': int_or_none(xpath_text(a, './frameHeight')),
+ 'vbr': int_or_none(xpath_text(a, './bitrateVideo')),
+ 'abr': int_or_none(xpath_text(a, './bitrateAudio')),
+ 'vcodec': xpath_text(a, './codecVideo'),
+ 'tbr': int_or_none(xpath_text(a, './totalBitrate')),
+ }
+ server_prefix = xpath_text(a, './serverPrefix', default=None)
+ if server_prefix:
+ f.update({
+ 'url': server_prefix,
+ 'playpath': file_name,
+ })
+ else:
+ if not format_url:
+ continue
+ f['url'] = format_url
+ formats.append(f)
+
+ _SUB_FORMATS = (
+ ('./dataTimedText', 'ttml'),
+ ('./dataTimedTextNoOffset', 'ttml'),
+ ('./dataTimedTextVtt', 'vtt'),
+ )
+
+ subtitles = {}
+ for subsel, subext in _SUB_FORMATS:
+ for node in video_node.findall(subsel):
+ subtitles.setdefault('de', []).append({
+ 'url': node.attrib['url'],
+ 'ext': subext,
+ })
+
+ return {
+ 'id': xpath_text(video_node, './videoId', default=display_id),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'display_id': display_id,
+ 'title': video_node.find('./title').text,
+ 'duration': parse_duration(video_node.find('./duration').text),
+ 'upload_date': upload_date,
+ 'thumbnail': thumbnail,
+ }
+
+
+class ARDBetaMediathekIE(InfoExtractor):
+ IE_NAME = 'ARDMediathek'
+ _VALID_URL = r'''(?x)https://
+ (?:(?:beta|www)\.)?ardmediathek\.de/
+ (?:[^/]+/)?
+ (?:player|live|video)/
+ (?:[^?#]+/)?
+ (?P<id>[a-zA-Z0-9]+)
+ /?(?:[?#]|$)'''
+ _GEO_COUNTRIES = ['DE']
+ _TOKEN_URL = 'https://sso.ardmediathek.de/sso/token'
+
+ _TESTS = [{
+ 'url': 'https://www.ardmediathek.de/video/filme-im-mdr/liebe-auf-vier-pfoten/mdr-fernsehen/Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0',
+ 'md5': 'b6e8ab03f2bcc6e1f9e6cef25fcc03c4',
+ 'info_dict': {
+ 'display_id': 'Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0',
+ 'id': '12939099',
+ 'title': 'Liebe auf vier Pfoten',
+ 'description': r're:^Claudia Schmitt, Anwältin in Salzburg',
+ 'duration': 5222,
+ 'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:aee7cbf8f06de976?w=960&ch=ae4d0f2ee47d8b9b',
+ 'timestamp': 1701343800,
+ 'upload_date': '20231130',
+ 'ext': 'mp4',
+ 'episode': 'Liebe auf vier Pfoten',
+ 'series': 'Filme im MDR',
+ 'age_limit': 0,
+ 'channel': 'MDR',
+ '_old_archive_ids': ['ardbetamediathek Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0'],
+ },
+ }, {
+ 'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/',
+ 'md5': 'a1dc75a39c61601b980648f7c9f9f71d',
+ 'info_dict': {
+ 'display_id': 'die-robuste-roswita',
+ 'id': '78566716',
+ 'title': 'Die robuste Roswita',
+ 'description': r're:^Der Mord.*totgeglaubte Ehefrau Roswita',
+ 'duration': 5316,
+ 'thumbnail': 'https://img.ardmediathek.de/standard/00/78/56/67/84/575672121/16x9/960?mandant=ard',
+ 'timestamp': 1596658200,
+ 'upload_date': '20200805',
+ 'ext': 'mp4',
+ },
+ 'skip': 'Error',
+ }, {
+ 'url': 'https://www.ardmediathek.de/video/tagesschau-oder-tagesschau-20-00-uhr/das-erste/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhZ2Vzc2NoYXUvZmM4ZDUxMjgtOTE0ZC00Y2MzLTgzNzAtNDZkNGNiZWJkOTll',
+ 'md5': '1e73ded21cb79bac065117e80c81dc88',
+ 'info_dict': {
+ 'id': '10049223',
+ 'ext': 'mp4',
+ 'title': 'tagesschau, 20:00 Uhr',
+ 'timestamp': 1636398000,
+ 'description': 'md5:39578c7b96c9fe50afdf5674ad985e6b',
+ 'upload_date': '20211108',
+ 'display_id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhZ2Vzc2NoYXUvZmM4ZDUxMjgtOTE0ZC00Y2MzLTgzNzAtNDZkNGNiZWJkOTll',
+ 'duration': 915,
+ 'episode': 'tagesschau, 20:00 Uhr',
+ 'series': 'tagesschau',
+ 'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:fbb21142783b0a49?w=960&ch=ee69108ae344f678',
+ 'channel': 'ARD-Aktuell',
+ '_old_archive_ids': ['ardbetamediathek Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhZ2Vzc2NoYXUvZmM4ZDUxMjgtOTE0ZC00Y2MzLTgzNzAtNDZkNGNiZWJkOTll'],
+ },
+ }, {
+ 'url': 'https://www.ardmediathek.de/video/7-tage/7-tage-unter-harten-jungs/hr-fernsehen/N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3',
+ 'md5': 'c428b9effff18ff624d4f903bda26315',
+ 'info_dict': {
+ 'id': '94834686',
+ 'ext': 'mp4',
+ 'duration': 2700,
+ 'episode': '7 Tage ... unter harten Jungs',
+ 'description': 'md5:0f215470dcd2b02f59f4bd10c963f072',
+ 'upload_date': '20231005',
+ 'timestamp': 1696491171,
+ 'display_id': 'N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3',
+ 'series': '7 Tage ...',
+ 'channel': 'HR',
+ 'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:f6e6d5ffac41925c?w=960&ch=fa32ba69bc87989a',
+ 'title': '7 Tage ... unter harten Jungs',
+ '_old_archive_ids': ['ardbetamediathek N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3'],
+ },
+ }, {
+ 'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://ardmediathek.de/ard/video/saartalk/saartalk-gesellschaftsgift-haltung-gegen-hass/sr-fernsehen/Y3JpZDovL3NyLW9ubGluZS5kZS9TVF84MTY4MA/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.ardmediathek.de/ard/video/trailer/private-eyes-s01-e01/one/Y3JpZDovL3dkci5kZS9CZWl0cmFnLTE1MTgwYzczLWNiMTEtNGNkMS1iMjUyLTg5MGYzOWQxZmQ1YQ/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.ardmediathek.de/ard/player/Y3JpZDovL3N3ci5kZS9hZXgvbzEwNzE5MTU/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.ardmediathek.de/swr/live/Y3JpZDovL3N3ci5kZS8xMzQ4MTA0Mg',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.ardmediathek.de/video/coronavirus-update-ndr-info/astrazeneca-kurz-lockdown-und-pims-syndrom-81/ndr/Y3JpZDovL25kci5kZS84NzE0M2FjNi0wMWEwLTQ5ODEtOTE5NS1mOGZhNzdhOTFmOTI/',
+ 'only_matching': True,
+ }]
+
+ def _extract_episode_info(self, title):
+ patterns = [
+ # Pattern for title like "Homo sapiens (S06/E07) - Originalversion"
+ # from: https://www.ardmediathek.de/one/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw
+ r'.*(?P<ep_info> \(S(?P<season_number>\d+)/E(?P<episode_number>\d+)\)).*',
+ # E.g.: title="Fritjof aus Norwegen (2) (AD)"
+ # from: https://www.ardmediathek.de/ard/sammlung/der-krieg-und-ich/68cMkqJdllm639Skj4c7sS/
+ r'.*(?P<ep_info> \((?:Folge |Teil )?(?P<episode_number>\d+)(?:/\d+)?\)).*',
+ r'.*(?P<ep_info>Folge (?P<episode_number>\d+)(?:\:| -|) )\"(?P<episode>.+)\".*',
+ # E.g.: title="Folge 25/42: Symmetrie"
+ # from: https://www.ardmediathek.de/ard/video/grips-mathe/folge-25-42-symmetrie/ard-alpha/Y3JpZDovL2JyLmRlL3ZpZGVvLzMyYzI0ZjczLWQ1N2MtNDAxNC05ZmZhLTFjYzRkZDA5NDU5OQ/
+ # E.g.: title="Folge 1063 - Vertrauen"
+ # from: https://www.ardmediathek.de/ard/sendung/die-fallers/Y3JpZDovL3N3ci5kZS8yMzAyMDQ4/
+ r'.*(?P<ep_info>Folge (?P<episode_number>\d+)(?:/\d+)?(?:\:| -|) ).*',
+ # As a fallback use the full title
+ r'(?P<title>.*)',
+ ]
+
+ return traverse_obj(patterns, (..., {partial(re.match, string=title)}, {
+ 'season_number': ('season_number', {int_or_none}),
+ 'episode_number': ('episode_number', {int_or_none}),
+ 'episode': ((
+ ('episode', {str_or_none}),
+ ('ep_info', {lambda x: title.replace(x, '')}),
+ ('title', {str}),
+ ), {str.strip}),
+ }), get_all=False)
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ query = {'embedded': 'false', 'mcV6': 'true'}
+ headers = {}
+
+ if self._get_cookies(self._TOKEN_URL).get('ams'):
+ token = self._download_json(
+ self._TOKEN_URL, display_id, 'Fetching token for age verification',
+ 'Unable to fetch age verification token', fatal=False)
+ id_token = traverse_obj(token, ('idToken', {str}))
+ decoded_token = traverse_obj(id_token, ({jwt_decode_hs256}, {dict}))
+ user_id = traverse_obj(decoded_token, (('user_id', 'sub'), {str}), get_all=False)
+ if not user_id:
+ self.report_warning('Unable to extract token, continuing without authentication')
+ else:
+ headers['x-authorization'] = f'Bearer {id_token}'
+ query['userId'] = user_id
+ if decoded_token.get('age_rating') != 18:
+ self.report_warning('Account is not verified as 18+; video may be unavailable')
+
+ page_data = self._download_json(
+ f'https://api.ardmediathek.de/page-gateway/pages/ard/item/{display_id}',
+ display_id, query=query, headers=headers)
+
+ # For user convenience we use the old contentId instead of the longer crid
+ # Ref: https://github.com/yt-dlp/yt-dlp/issues/8731#issuecomment-1874398283
+ old_id = traverse_obj(page_data, ('tracking', 'atiCustomVars', 'contentId', {int}))
+ if old_id is not None:
+ video_id = str(old_id)
+ archive_ids = [make_archive_id(ARDBetaMediathekIE, display_id)]
+ else:
+ self.report_warning(f'Could not extract contentId{bug_reports_message()}')
+ video_id = display_id
+ archive_ids = None
+
+ player_data = traverse_obj(
+ page_data, ('widgets', lambda _, v: v['type'] in ('player_ondemand', 'player_live'), {dict}), get_all=False)
+ is_live = player_data.get('type') == 'player_live'
+ media_data = traverse_obj(player_data, ('mediaCollection', 'embedded', {dict}))
+
+ if player_data.get('blockedByFsk'):
+ self.raise_login_required('This video is only available for age verified users or after 22:00')
+
+ formats = []
+ subtitles = {}
+ for stream in traverse_obj(media_data, ('streams', ..., {dict})):
+ kind = stream.get('kind')
+ # Prioritize main stream over sign language and others
+ preference = 1 if kind == 'main' else None
+ for media in traverse_obj(stream, ('media', lambda _, v: url_or_none(v['url']))):
+ media_url = media['url']
+
+ audio_kind = traverse_obj(media, (
+ 'audios', 0, 'kind', {str}), default='').replace('standard', '')
+ lang_code = traverse_obj(media, ('audios', 0, 'languageCode', {str})) or 'deu'
+ lang = join_nonempty(lang_code, audio_kind)
+ language_preference = 10 if lang == 'deu' else -10
+
+ if determine_ext(media_url) == 'm3u8':
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ media_url, video_id, m3u8_id=f'hls-{kind}', preference=preference, fatal=False, live=is_live)
+ for f in fmts:
+ f['language'] = lang
+ f['language_preference'] = language_preference
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+ else:
+ formats.append({
+ 'url': media_url,
+ 'format_id': f'http-{kind}',
+ 'preference': preference,
+ 'language': lang,
+ 'language_preference': language_preference,
+ **traverse_obj(media, {
+ 'format_note': ('forcedLabel', {str}),
+ 'width': ('maxHResolutionPx', {int_or_none}),
+ 'height': ('maxVResolutionPx', {int_or_none}),
+ 'vcodec': ('videoCodec', {str}),
+ }),
+ })
+
+ for sub in traverse_obj(media_data, ('subtitles', ..., {dict})):
+ for sources in traverse_obj(sub, ('sources', lambda _, v: url_or_none(v['url']))):
+ subtitles.setdefault(sub.get('languageCode') or 'deu', []).append({
+ 'url': sources['url'],
+ 'ext': {'webvtt': 'vtt', 'ebutt': 'ttml'}.get(sources.get('kind')),
+ })
+
+ age_limit = traverse_obj(page_data, ('fskRating', {lambda x: remove_start(x, 'FSK')}, {int_or_none}))
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'is_live': is_live,
+ 'age_limit': age_limit,
+ **traverse_obj(media_data, ('meta', {
+ 'title': 'title',
+ 'description': 'synopsis',
+ 'timestamp': ('broadcastedOnDateTime', {parse_iso8601}),
+ 'series': 'seriesTitle',
+ 'thumbnail': ('images', 0, 'url', {url_or_none}),
+ 'duration': ('durationSeconds', {int_or_none}),
+ 'channel': 'clipSourceName',
+ })),
+ **self._extract_episode_info(page_data.get('title')),
+ '_old_archive_ids': archive_ids,
+ }
+
+
+class ARDMediathekCollectionIE(InfoExtractor):
+ _VALID_URL = r'''(?x)https://
+ (?:(?:beta|www)\.)?ardmediathek\.de/
+ (?:[^/?#]+/)?
+ (?P<playlist>sendung|serie|sammlung)/
+ (?:(?P<display_id>[^?#]+?)/)?
+ (?P<id>[a-zA-Z0-9]+)
+ (?:/(?P<season>\d+)(?:/(?P<version>OV|AD))?)?/?(?:[?#]|$)'''
+ _GEO_COUNTRIES = ['DE']
+
+ _TESTS = [{
+ 'url': 'https://www.ardmediathek.de/serie/quiz/staffel-1-originalversion/Y3JpZDovL3dkci5kZS9vbmUvcXVpeg/1/OV',
+ 'info_dict': {
+ 'id': 'Y3JpZDovL3dkci5kZS9vbmUvcXVpeg_1_OV',
+ 'display_id': 'quiz/staffel-1-originalversion',
+ 'title': 'Staffel 1 Originalversion',
+ },
+ 'playlist_count': 3,
+ }, {
+ 'url': 'https://www.ardmediathek.de/serie/babylon-berlin/staffel-4-mit-audiodeskription/Y3JpZDovL2Rhc2Vyc3RlLmRlL2JhYnlsb24tYmVybGlu/4/AD',
+ 'info_dict': {
+ 'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL2JhYnlsb24tYmVybGlu_4_AD',
+ 'display_id': 'babylon-berlin/staffel-4-mit-audiodeskription',
+ 'title': 'Staffel 4 mit Audiodeskription',
+ },
+ 'playlist_count': 12,
+ }, {
+ 'url': 'https://www.ardmediathek.de/serie/babylon-berlin/staffel-1/Y3JpZDovL2Rhc2Vyc3RlLmRlL2JhYnlsb24tYmVybGlu/1/',
+ 'info_dict': {
+ 'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL2JhYnlsb24tYmVybGlu_1',
+ 'display_id': 'babylon-berlin/staffel-1',
+ 'title': 'Staffel 1',
+ },
+ 'playlist_count': 8,
+ }, {
+ 'url': 'https://www.ardmediathek.de/sendung/tatort/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydA',
+ 'info_dict': {
+ 'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydA',
+ 'display_id': 'tatort',
+ 'title': 'Tatort',
+ },
+ 'playlist_mincount': 500,
+ }, {
+ 'url': 'https://www.ardmediathek.de/sammlung/die-kirche-bleibt-im-dorf/5eOHzt8XB2sqeFXbIoJlg2',
+ 'info_dict': {
+ 'id': '5eOHzt8XB2sqeFXbIoJlg2',
+ 'display_id': 'die-kirche-bleibt-im-dorf',
+ 'title': 'Die Kirche bleibt im Dorf',
+ 'description': 'Die Kirche bleibt im Dorf',
+ },
+ 'playlist_count': 4,
+ }, {
+ # playlist of type 'sendung'
+ 'url': 'https://www.ardmediathek.de/ard/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw/',
+ 'only_matching': True,
+ }, {
+ # playlist of type 'serie'
+ 'url': 'https://www.ardmediathek.de/serie/nachtstreife/staffel-1/Y3JpZDovL3N3ci5kZS9zZGIvc3RJZC8xMjQy/1',
+ 'only_matching': True,
+ }, {
+ # playlist of type 'sammlung'
+ 'url': 'https://www.ardmediathek.de/ard/sammlung/team-muenster/5JpTzLSbWUAK8184IOvEir/',
+ 'only_matching': True,
+ }]
+
+ _PAGE_SIZE = 100
+
+ def _real_extract(self, url):
+ playlist_id, display_id, playlist_type, season_number, version = self._match_valid_url(url).group(
+ 'id', 'display_id', 'playlist', 'season', 'version')
+
+ def call_api(page_num):
+ api_path = 'compilations/ard' if playlist_type == 'sammlung' else 'widgets/ard/asset'
+ return self._download_json(
+ f'https://api.ardmediathek.de/page-gateway/{api_path}/{playlist_id}', playlist_id,
+ f'Downloading playlist page {page_num}', query={
+ 'pageNumber': page_num,
+ 'pageSize': self._PAGE_SIZE,
+ **({
+ 'seasoned': 'true',
+ 'seasonNumber': season_number,
+ 'withOriginalversion': 'true' if version == 'OV' else 'false',
+ 'withAudiodescription': 'true' if version == 'AD' else 'false',
+ } if season_number else {}),
+ })
+
+ def fetch_page(page_num):
+ for item in traverse_obj(call_api(page_num), ('teasers', ..., {dict})):
+ item_id = traverse_obj(item, ('links', 'target', ('urlId', 'id')), 'id', get_all=False)
+ if not item_id or item_id == playlist_id:
+ continue
+ item_mode = 'sammlung' if item.get('type') == 'compilation' else 'video'
+ yield self.url_result(
+ f'https://www.ardmediathek.de/{item_mode}/{item_id}',
+ ie=(ARDMediathekCollectionIE if item_mode == 'sammlung' else ARDBetaMediathekIE),
+ **traverse_obj(item, {
+ 'id': ('id', {str}),
+ 'title': ('longTitle', {str}),
+ 'duration': ('duration', {int_or_none}),
+ 'timestamp': ('broadcastedOn', {parse_iso8601}),
+ }))
+
+ page_data = call_api(0)
+ full_id = join_nonempty(playlist_id, season_number, version, delim='_')
+
+ return self.playlist_result(
+ OnDemandPagedList(fetch_page, self._PAGE_SIZE), full_id, display_id=display_id,
+ title=page_data.get('title'), description=page_data.get('synopsis'))
diff --git a/yt_dlp/extractor/arkena.py b/yt_dlp/extractor/arkena.py
new file mode 100644
index 0000000..de36ec8
--- /dev/null
+++ b/yt_dlp/extractor/arkena.py
@@ -0,0 +1,150 @@
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ parse_iso8601,
+ parse_qs,
+ try_get,
+)
+
+
+class ArkenaIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ video\.(?:arkena|qbrick)\.com/play2/embed/player\?|
+ play\.arkena\.com/(?:config|embed)/avp/v\d/player/media/(?P<id>[^/]+)/[^/]+/(?P<account_id>\d+)
+ )
+ '''
+ # See https://support.arkena.com/display/PLAY/Ways+to+embed+your+video
+ _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//play\.arkena\.com/embed/avp/.+?)\1']
+ _TESTS = [{
+ 'url': 'https://video.qbrick.com/play2/embed/player?accountId=1034090&mediaId=d8ab4607-00090107-aab86310',
+ 'md5': '97f117754e5f3c020f5f26da4a44ebaf',
+ 'info_dict': {
+ 'id': 'd8ab4607-00090107-aab86310',
+ 'ext': 'mp4',
+ 'title': 'EM_HT20_117_roslund_v2.mp4',
+ 'timestamp': 1608285912,
+ 'upload_date': '20201218',
+ 'duration': 1429.162667,
+ 'subtitles': {
+ 'sv': 'count:3',
+ },
+ },
+ }, {
+ 'url': 'https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://play.arkena.com/config/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411/?callbackMethod=jQuery1111023664739129262213_1469227693893',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://play.arkena.com/config/avp/v1/player/media/327336/darkmatter/131064/?callbackMethod=jQuery1111002221189684892677_1469227595972',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://play.arkena.com/embed/avp/v1/player/media/327336/darkmatter/131064/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://video.arkena.com/play2/embed/player?accountId=472718&mediaId=35763b3b-00090078-bf604299&pageStyling=styled',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id')
+ account_id = mobj.group('account_id')
+
+ # Handle http://video.arkena.com/play2/embed/player URL
+ if not video_id:
+ qs = parse_qs(url)
+ video_id = qs.get('mediaId', [None])[0]
+ account_id = qs.get('accountId', [None])[0]
+ if not video_id or not account_id:
+ raise ExtractorError('Invalid URL', expected=True)
+
+ media = self._download_json(
+ 'https://video.qbrick.com/api/v1/public/accounts/%s/medias/%s' % (account_id, video_id),
+ video_id, query={
+ # https://video.qbrick.com/docs/api/examples/library-api.html
+ 'fields': 'asset/resources/*/renditions/*(height,id,language,links/*(href,mimeType),type,size,videos/*(audios/*(codec,sampleRate),bitrate,codec,duration,height,width),width),created,metadata/*(title,description),tags',
+ })
+ metadata = media.get('metadata') or {}
+ title = metadata['title']
+
+ duration = None
+ formats = []
+ thumbnails = []
+ subtitles = {}
+ for resource in media['asset']['resources']:
+ for rendition in (resource.get('renditions') or []):
+ rendition_type = rendition.get('type')
+ for i, link in enumerate(rendition.get('links') or []):
+ href = link.get('href')
+ if not href:
+ continue
+ if rendition_type == 'image':
+ thumbnails.append({
+ 'filesize': int_or_none(rendition.get('size')),
+ 'height': int_or_none(rendition.get('height')),
+ 'id': rendition.get('id'),
+ 'url': href,
+ 'width': int_or_none(rendition.get('width')),
+ })
+ elif rendition_type == 'subtitle':
+ subtitles.setdefault(rendition.get('language') or 'en', []).append({
+ 'url': href,
+ })
+ elif rendition_type == 'video':
+ f = {
+ 'filesize': int_or_none(rendition.get('size')),
+ 'format_id': rendition.get('id'),
+ 'url': href,
+ }
+ video = try_get(rendition, lambda x: x['videos'][i], dict)
+ if video:
+ if not duration:
+ duration = float_or_none(video.get('duration'))
+ f.update({
+ 'height': int_or_none(video.get('height')),
+ 'tbr': int_or_none(video.get('bitrate'), 1000),
+ 'vcodec': video.get('codec'),
+ 'width': int_or_none(video.get('width')),
+ })
+ audio = try_get(video, lambda x: x['audios'][0], dict)
+ if audio:
+ f.update({
+ 'acodec': audio.get('codec'),
+ 'asr': int_or_none(audio.get('sampleRate')),
+ })
+ formats.append(f)
+ elif rendition_type == 'index':
+ mime_type = link.get('mimeType')
+ if mime_type == 'application/smil+xml':
+ formats.extend(self._extract_smil_formats(
+ href, video_id, fatal=False))
+ elif mime_type == 'application/x-mpegURL':
+ formats.extend(self._extract_m3u8_formats(
+ href, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif mime_type == 'application/hds+xml':
+ formats.extend(self._extract_f4m_formats(
+ href, video_id, f4m_id='hds', fatal=False))
+ elif mime_type == 'application/dash+xml':
+ formats.extend(self._extract_f4m_formats(
+ href, video_id, f4m_id='hds', fatal=False))
+ elif mime_type == 'application/vnd.ms-sstr+xml':
+ formats.extend(self._extract_ism_formats(
+ href, video_id, ism_id='mss', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': metadata.get('description'),
+ 'timestamp': parse_iso8601(media.get('created')),
+ 'thumbnails': thumbnails,
+ 'subtitles': subtitles,
+ 'duration': duration,
+ 'tags': media.get('tags'),
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/arnes.py b/yt_dlp/extractor/arnes.py
new file mode 100644
index 0000000..a493714
--- /dev/null
+++ b/yt_dlp/extractor/arnes.py
@@ -0,0 +1,98 @@
+from .common import InfoExtractor
+from ..compat import (
+ compat_parse_qs,
+ compat_urllib_parse_urlparse,
+)
+from ..utils import (
+ format_field,
+ float_or_none,
+ int_or_none,
+ parse_iso8601,
+ remove_start,
+)
+
+
+class ArnesIE(InfoExtractor):
+ IE_NAME = 'video.arnes.si'
+ IE_DESC = 'Arnes Video'
+ _VALID_URL = r'https?://video\.arnes\.si/(?:[a-z]{2}/)?(?:watch|embed|api/(?:asset|public/video))/(?P<id>[0-9a-zA-Z]{12})'
+ _TESTS = [{
+ 'url': 'https://video.arnes.si/watch/a1qrWTOQfVoU?t=10',
+ 'md5': '4d0f4d0a03571b33e1efac25fd4a065d',
+ 'info_dict': {
+ 'id': 'a1qrWTOQfVoU',
+ 'ext': 'mp4',
+ 'title': 'Linearna neodvisnost, definicija',
+ 'description': 'Linearna neodvisnost, definicija',
+ 'license': 'PRIVATE',
+ 'creator': 'Polona Oblak',
+ 'timestamp': 1585063725,
+ 'upload_date': '20200324',
+ 'channel': 'Polona Oblak',
+ 'channel_id': 'q6pc04hw24cj',
+ 'channel_url': 'https://video.arnes.si/?channel=q6pc04hw24cj',
+ 'duration': 596.75,
+ 'view_count': int,
+ 'tags': ['linearna_algebra'],
+ 'start_time': 10,
+ }
+ }, {
+ 'url': 'https://video.arnes.si/api/asset/s1YjnV7hadlC/play.mp4',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.arnes.si/embed/s1YjnV7hadlC',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.arnes.si/en/watch/s1YjnV7hadlC',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.arnes.si/embed/s1YjnV7hadlC?t=123&hideRelated=1',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.arnes.si/api/public/video/s1YjnV7hadlC',
+ 'only_matching': True,
+ }]
+ _BASE_URL = 'https://video.arnes.si'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video = self._download_json(
+ self._BASE_URL + '/api/public/video/' + video_id, video_id)['data']
+ title = video['title']
+
+ formats = []
+ for media in (video.get('media') or []):
+ media_url = media.get('url')
+ if not media_url:
+ continue
+ formats.append({
+ 'url': self._BASE_URL + media_url,
+ 'format_id': remove_start(media.get('format'), 'FORMAT_'),
+ 'format_note': media.get('formatTranslation'),
+ 'width': int_or_none(media.get('width')),
+ 'height': int_or_none(media.get('height')),
+ })
+
+ channel = video.get('channel') or {}
+ channel_id = channel.get('url')
+ thumbnail = video.get('thumbnailUrl')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': self._BASE_URL + thumbnail,
+ 'description': video.get('description'),
+ 'license': video.get('license'),
+ 'creator': video.get('author'),
+ 'timestamp': parse_iso8601(video.get('creationTime')),
+ 'channel': channel.get('name'),
+ 'channel_id': channel_id,
+ 'channel_url': format_field(channel_id, None, f'{self._BASE_URL}/?channel=%s'),
+ 'duration': float_or_none(video.get('duration'), 1000),
+ 'view_count': int_or_none(video.get('views')),
+ 'tags': video.get('hashtags'),
+ 'start_time': int_or_none(compat_parse_qs(
+ compat_urllib_parse_urlparse(url).query).get('t', [None])[0]),
+ }
diff --git a/yt_dlp/extractor/art19.py b/yt_dlp/extractor/art19.py
new file mode 100644
index 0000000..271c505
--- /dev/null
+++ b/yt_dlp/extractor/art19.py
@@ -0,0 +1,303 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import float_or_none, int_or_none, parse_iso8601, url_or_none
+from ..utils.traversal import traverse_obj
+
+
+class Art19IE(InfoExtractor):
+ _UUID_REGEX = r'[\da-f]{8}-?[\da-f]{4}-?[\da-f]{4}-?[\da-f]{4}-?[\da-f]{12}'
+ _VALID_URL = [
+ rf'https?://(?:www\.)?art19\.com/shows/[^/#?]+/episodes/(?P<id>{_UUID_REGEX})',
+ rf'https?://rss\.art19\.com/episodes/(?P<id>{_UUID_REGEX})\.mp3',
+ ]
+ _EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL[0]})']
+
+ _TESTS = [{
+ 'url': 'https://rss.art19.com/episodes/5ba1413c-48b8-472b-9cc3-cfd952340bdb.mp3',
+ 'info_dict': {
+ 'id': '5ba1413c-48b8-472b-9cc3-cfd952340bdb',
+ 'ext': 'mp3',
+ 'title': 'Why Did DeSantis Drop Out?',
+ 'series': 'The Daily Briefing',
+ 'release_timestamp': 1705941275,
+ 'description': 'md5:da38961da4a3f7e419471365e3c6b49f',
+ 'episode': 'Episode 582',
+ 'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
+ 'series_id': 'ed52a0ab-08b1-4def-8afc-549e4d93296d',
+ 'upload_date': '20240122',
+ 'timestamp': 1705940815,
+ 'episode_number': 582,
+ 'modified_date': '20240122',
+ 'episode_id': '5ba1413c-48b8-472b-9cc3-cfd952340bdb',
+ 'modified_timestamp': 1705941275,
+ 'release_date': '20240122',
+ 'duration': 527.4,
+ },
+ }, {
+ 'url': 'https://art19.com/shows/scamfluencers/episodes/8319b776-4153-4d22-8630-631f204a03dd',
+ 'info_dict': {
+ 'id': '8319b776-4153-4d22-8630-631f204a03dd',
+ 'ext': 'mp3',
+ 'title': 'Martha Stewart: The Homemaker Hustler Part 2',
+ 'modified_date': '20240116',
+ 'upload_date': '20240105',
+ 'modified_timestamp': 1705435802,
+ 'episode_id': '8319b776-4153-4d22-8630-631f204a03dd',
+ 'series_id': 'd3c9b8ca-26b3-42f4-9bd8-21d1a9031e75',
+ 'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
+ 'description': 'md5:4aa7cfd1358dc57e729835bc208d7893',
+ 'release_timestamp': 1705305660,
+ 'release_date': '20240115',
+ 'timestamp': 1704481536,
+ 'episode_number': 88,
+ 'series': 'Scamfluencers',
+ 'duration': 2588.37501,
+ 'episode': 'Episode 88',
+ },
+ }]
+ _WEBPAGE_TESTS = [{
+ 'url': 'https://www.nu.nl/formule-1/6291456/verstappen-wordt-een-synoniem-voor-formule-1.html',
+ 'info_dict': {
+ 'id': '7d42626a-7301-47db-bb8a-3b6f054d77d7',
+ 'ext': 'mp3',
+ 'title': "'Verstappen wordt een synoniem voor Formule 1'",
+ 'season': 'Seizoen 6',
+ 'description': 'md5:39a7159a31c4cda312b2e893bdd5c071',
+ 'episode_id': '7d42626a-7301-47db-bb8a-3b6f054d77d7',
+ 'duration': 3061.82111,
+ 'series_id': '93f4e113-2a60-4609-a564-755058fa40d8',
+ 'release_date': '20231126',
+ 'modified_timestamp': 1701156004,
+ 'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
+ 'season_number': 6,
+ 'episode_number': 52,
+ 'modified_date': '20231128',
+ 'upload_date': '20231126',
+ 'timestamp': 1701025981,
+ 'season_id': '36097c1e-7455-490d-a2fe-e2f10b4d5f26',
+ 'series': 'De Boordradio',
+ 'release_timestamp': 1701026308,
+ 'episode': 'Episode 52',
+ },
+ }, {
+ 'url': 'https://www.wishtv.com/podcast-episode/larry-bucshon-announces-retirement-from-congress/',
+ 'info_dict': {
+ 'id': '8da368bd-08d1-46d0-afaa-c134a4af7dc0',
+ 'ext': 'mp3',
+ 'title': 'Larry Bucshon announces retirement from congress',
+ 'upload_date': '20240115',
+ 'episode_number': 148,
+ 'episode': 'Episode 148',
+ 'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
+ 'release_date': '20240115',
+ 'timestamp': 1705328205,
+ 'release_timestamp': 1705329275,
+ 'series': 'All INdiana Politics',
+ 'modified_date': '20240117',
+ 'modified_timestamp': 1705458901,
+ 'series_id': 'c4af6c27-b10f-4ff2-9f84-0f407df86ff1',
+ 'episode_id': '8da368bd-08d1-46d0-afaa-c134a4af7dc0',
+ 'description': 'md5:53b5239e4d14973a87125c217c255b2a',
+ 'duration': 1256.18848,
+ },
+ }]
+
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ yield from super()._extract_embed_urls(url, webpage)
+ for episode_id in re.findall(
+ rf'<div[^>]+\bclass=[\'"][^\'"]*art19-web-player[^\'"]*[\'"][^>]+\bdata-episode-id=[\'"]({cls._UUID_REGEX})[\'"]', webpage):
+ yield f'https://rss.art19.com/episodes/{episode_id}.mp3'
+
+ def _real_extract(self, url):
+ episode_id = self._match_id(url)
+
+ player_metadata = self._download_json(
+ f'https://art19.com/episodes/{episode_id}', episode_id,
+ note='Downloading player metadata', fatal=False,
+ headers={'Accept': 'application/vnd.art19.v0+json'})
+ rss_metadata = self._download_json(
+ f'https://rss.art19.com/episodes/{episode_id}.json', episode_id, fatal=False,
+ note='Downloading RSS metadata')
+
+ formats = [{
+ 'format_id': 'direct',
+ 'url': f'https://rss.art19.com/episodes/{episode_id}.mp3',
+ 'vcodec': 'none',
+ 'acodec': 'mp3',
+ }]
+ for fmt_id, fmt_data in traverse_obj(rss_metadata, ('content', 'media', {dict.items}, ...)):
+ if fmt_id == 'waveform_bin':
+ continue
+ fmt_url = traverse_obj(fmt_data, ('url', {url_or_none}))
+ if not fmt_url:
+ continue
+ formats.append({
+ 'format_id': fmt_id,
+ 'url': fmt_url,
+ 'vcodec': 'none',
+ 'acodec': fmt_id,
+ 'quality': -2 if fmt_id == 'ogg' else -1,
+ })
+
+ return {
+ 'id': episode_id,
+ 'formats': formats,
+ **traverse_obj(player_metadata, ('episode', {
+ 'title': ('title', {str}),
+ 'description': ('description_plain', {str}),
+ 'episode_id': ('id', {str}),
+ 'episode_number': ('episode_number', {int_or_none}),
+ 'season_id': ('season_id', {str}),
+ 'series_id': ('series_id', {str}),
+ 'timestamp': ('created_at', {parse_iso8601}),
+ 'release_timestamp': ('released_at', {parse_iso8601}),
+ 'modified_timestamp': ('updated_at', {parse_iso8601})
+ })),
+ **traverse_obj(rss_metadata, ('content', {
+ 'title': ('episode_title', {str}),
+ 'description': ('episode_description_plain', {str}),
+ 'episode_id': ('episode_id', {str}),
+ 'episode_number': ('episode_number', {int_or_none}),
+ 'season': ('season_title', {str}),
+ 'season_id': ('season_id', {str}),
+ 'season_number': ('season_number', {int_or_none}),
+ 'series': ('series_title', {str}),
+ 'series_id': ('series_id', {str}),
+ 'thumbnail': ('cover_image', {url_or_none}),
+ 'duration': ('duration', {float_or_none}),
+ })),
+ }
+
+
+class Art19ShowIE(InfoExtractor):
+ _VALID_URL_BASE = r'https?://(?:www\.)?art19\.com/shows/(?P<id>[\w-]+)(?:/embed)?/?'
+ _VALID_URL = [
+ rf'{_VALID_URL_BASE}(?:$|[#?])',
+ r'https?://rss\.art19\.com/(?P<id>[\w-]+)/?(?:$|[#?])',
+ ]
+ _EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL_BASE}[^\'"])']
+
+ _TESTS = [{
+ 'url': 'https://www.art19.com/shows/5898c087-a14f-48dc-b6fc-a2280a1ff6e0/',
+ 'info_dict': {
+ '_type': 'playlist',
+ 'id': '5898c087-a14f-48dc-b6fc-a2280a1ff6e0',
+ 'display_id': 'echt-gebeurd',
+ 'title': 'Echt Gebeurd',
+ 'description': 'md5:5fd11dc80b76e51ffd34b6067fd5e560',
+ 'timestamp': 1492642167,
+ 'upload_date': '20170419',
+ 'modified_timestamp': int,
+ 'modified_date': str,
+ 'tags': 'count:7',
+ },
+ 'playlist_mincount': 425,
+ }, {
+ 'url': 'https://www.art19.com/shows/echt-gebeurd',
+ 'info_dict': {
+ '_type': 'playlist',
+ 'id': '5898c087-a14f-48dc-b6fc-a2280a1ff6e0',
+ 'display_id': 'echt-gebeurd',
+ 'title': 'Echt Gebeurd',
+ 'description': 'md5:5fd11dc80b76e51ffd34b6067fd5e560',
+ 'timestamp': 1492642167,
+ 'upload_date': '20170419',
+ 'modified_timestamp': int,
+ 'modified_date': str,
+ 'tags': 'count:7',
+ },
+ 'playlist_mincount': 425,
+ }, {
+ 'url': 'https://rss.art19.com/scamfluencers',
+ 'info_dict': {
+ '_type': 'playlist',
+ 'id': 'd3c9b8ca-26b3-42f4-9bd8-21d1a9031e75',
+ 'display_id': 'scamfluencers',
+ 'title': 'Scamfluencers',
+ 'description': 'md5:7d239d670c0ced6dadbf71c4caf764b7',
+ 'timestamp': 1647368573,
+ 'upload_date': '20220315',
+ 'modified_timestamp': int,
+ 'modified_date': str,
+ 'tags': [],
+ },
+ 'playlist_mincount': 90,
+ }, {
+ 'url': 'https://art19.com/shows/enthuellt/embed',
+ 'info_dict': {
+ '_type': 'playlist',
+ 'id': 'e2cacf57-bb8a-4263-aa81-719bcdd4f80c',
+ 'display_id': 'enthuellt',
+ 'title': 'Enthüllt',
+ 'description': 'md5:17752246643414a2fd51744fc9a1c08e',
+ 'timestamp': 1601645860,
+ 'upload_date': '20201002',
+ 'modified_timestamp': int,
+ 'modified_date': str,
+ 'tags': 'count:10',
+ },
+ 'playlist_mincount': 10,
+ }]
+ _WEBPAGE_TESTS = [{
+ 'url': 'https://deconstructingyourself.com/deconstructing-yourself-podcast',
+ 'info_dict': {
+ '_type': 'playlist',
+ 'id': 'cfbb9b01-c295-4adb-8726-adde7c03cf21',
+ 'display_id': 'deconstructing-yourself',
+ 'title': 'Deconstructing Yourself',
+ 'description': 'md5:dab5082b28b248a35476abf64768854d',
+ 'timestamp': 1570581181,
+ 'upload_date': '20191009',
+ 'modified_timestamp': int,
+ 'modified_date': str,
+ 'tags': 'count:5',
+ },
+ 'playlist_mincount': 80,
+ }, {
+ 'url': 'https://chicagoreader.com/columns-opinion/podcasts/ben-joravsky-show-podcast-episodes/',
+ 'info_dict': {
+ '_type': 'playlist',
+ 'id': '9dfa2c37-ab87-4c13-8388-4897914313ec',
+ 'display_id': 'the-ben-joravsky-show',
+ 'title': 'The Ben Joravsky Show',
+ 'description': 'md5:c0f3ec0ee0dbea764390e521adc8780a',
+ 'timestamp': 1550875095,
+ 'upload_date': '20190222',
+ 'modified_timestamp': int,
+ 'modified_date': str,
+ 'tags': ['Chicago Politics', 'chicago', 'Ben Joravsky'],
+ },
+ 'playlist_mincount': 1900,
+ }]
+
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ yield from super()._extract_embed_urls(url, webpage)
+ for series_id in re.findall(
+ r'<div[^>]+\bclass=[\'"][^\'"]*art19-web-player[^\'"]*[\'"][^>]+\bdata-series-id=[\'"]([\w-]+)[\'"]', webpage):
+ yield f'https://art19.com/shows/{series_id}'
+
+ def _real_extract(self, url):
+ series_id = self._match_id(url)
+ series_metadata = self._download_json(
+ f'https://art19.com/series/{series_id}', series_id, note='Downloading series metadata',
+ headers={'Accept': 'application/vnd.art19.v0+json'})
+
+ return {
+ '_type': 'playlist',
+ 'entries': [
+ self.url_result(f'https://rss.art19.com/episodes/{episode_id}.mp3', Art19IE)
+ for episode_id in traverse_obj(series_metadata, ('series', 'episode_ids', ..., {str}))
+ ],
+ **traverse_obj(series_metadata, ('series', {
+ 'id': ('id', {str}),
+ 'display_id': ('slug', {str}),
+ 'title': ('title', {str}),
+ 'description': ('description_plain', {str}),
+ 'timestamp': ('created_at', {parse_iso8601}),
+ 'modified_timestamp': ('updated_at', {parse_iso8601}),
+ })),
+ 'tags': traverse_obj(series_metadata, ('tags', ..., 'name', {str})),
+ }
diff --git a/yt_dlp/extractor/arte.py b/yt_dlp/extractor/arte.py
new file mode 100644
index 0000000..1c180b1
--- /dev/null
+++ b/yt_dlp/extractor/arte.py
@@ -0,0 +1,345 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ GeoRestrictedError,
+ int_or_none,
+ parse_iso8601,
+ parse_qs,
+ strip_or_none,
+ traverse_obj,
+ url_or_none,
+)
+
+
+class ArteTVBaseIE(InfoExtractor):
+ _ARTE_LANGUAGES = 'fr|de|en|es|it|pl'
+ _API_BASE = 'https://api.arte.tv/api/player/v2'
+
+
+class ArteTVIE(ArteTVBaseIE):
+ _VALID_URL = r'''(?x)
+ (?:https?://
+ (?:
+ (?:www\.)?arte\.tv/(?P<lang>%(langs)s)/videos|
+ api\.arte\.tv/api/player/v\d+/config/(?P<lang_2>%(langs)s)
+ )
+ |arte://program)
+ /(?P<id>\d{6}-\d{3}-[AF]|LIVE)
+ ''' % {'langs': ArteTVBaseIE._ARTE_LANGUAGES}
+ _TESTS = [{
+ 'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.arte.tv/pl/videos/100103-000-A/usa-dyskryminacja-na-porodowce/',
+ 'info_dict': {
+ 'id': '100103-000-A',
+ 'title': 'USA: Dyskryminacja na porodówce',
+ 'description': 'md5:242017b7cce59ffae340a54baefcafb1',
+ 'alt_title': 'ARTE Reportage',
+ 'upload_date': '20201103',
+ 'duration': 554,
+ 'thumbnail': r're:https://api-cdn\.arte\.tv/.+940x530',
+ 'timestamp': 1604417980,
+ 'ext': 'mp4',
+ },
+ 'params': {'skip_download': 'm3u8'}
+ }, {
+ 'note': 'No alt_title',
+ 'url': 'https://www.arte.tv/fr/videos/110371-000-A/la-chaleur-supplice-des-arbres-de-rue/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://api.arte.tv/api/player/v2/config/de/100605-013-A',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://api.arte.tv/api/player/v2/config/de/LIVE',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.arte.tv/de/videos/110203-006-A/zaz/',
+ 'only_matching': True,
+ }, {
+ 'note': 'age-restricted',
+ 'url': 'https://www.arte.tv/de/videos/006785-000-A/the-element-of-crime/',
+ 'info_dict': {
+ 'id': '006785-000-A',
+ 'description': 'md5:c2f94fdfefc8a280e4dab68ab96ab0ba',
+ 'title': 'The Element of Crime',
+ 'timestamp': 1696111200,
+ 'duration': 5849,
+ 'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/q82dTTfyuCXupPsGxXsd7B/940x530',
+ 'upload_date': '20230930',
+ 'ext': 'mp4',
+ },
+ }, {
+ 'url': 'https://www.arte.tv/de/videos/085374-003-A/im-hohen-norden-geboren/',
+ 'info_dict': {
+ 'id': '085374-003-A',
+ 'ext': 'mp4',
+ 'description': 'md5:ab79ec7cc472a93164415b4e4916abf9',
+ 'timestamp': 1702872000,
+ 'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/TnyHBfPxv3v2GEY3suXGZP/940x530',
+ 'duration': 2594,
+ 'title': 'Die kurze Zeit der Jugend',
+ 'alt_title': 'Im hohen Norden geboren',
+ 'upload_date': '20231218',
+ 'subtitles': {
+ 'fr': 'mincount:1',
+ 'fr-acc': 'mincount:1',
+ },
+ },
+ }]
+
+ _GEO_BYPASS = True
+
+ _LANG_MAP = { # ISO639 -> French abbreviations
+ 'fr': 'F',
+ 'de': 'A',
+ 'en': 'E[ANG]',
+ 'es': 'E[ESP]',
+ 'it': 'E[ITA]',
+ 'pl': 'E[POL]',
+ # XXX: probably means mixed; <https://www.arte.tv/en/videos/107710-029-A/dispatches-from-ukraine-local-journalists-report/>
+ # uses this code for audio that happens to be in Ukrainian, but the manifest uses the ISO code 'mul' (mixed)
+ 'mul': 'EU',
+ }
+
+ _VERSION_CODE_RE = re.compile(r'''(?x)
+ V
+ (?P<original_voice>O?)
+ (?P<vlang>[FA]|E\[[A-Z]+\]|EU)?
+ (?P<audio_desc>AUD|)
+ (?:
+ (?P<has_sub>-ST)
+ (?P<sdh_sub>M?)
+ (?P<sub_lang>[FA]|E\[[A-Z]+\]|EU)
+ )?
+ ''')
+
+ # all obtained by exhaustive testing
+ _COUNTRIES_MAP = {
+ 'DE_FR': (
+ 'BL', 'DE', 'FR', 'GF', 'GP', 'MF', 'MQ', 'NC',
+ 'PF', 'PM', 'RE', 'WF', 'YT',
+ ),
+ # with both of the below 'BE' sometimes works, sometimes doesn't
+ 'EUR_DE_FR': (
+ 'AT', 'BL', 'CH', 'DE', 'FR', 'GF', 'GP', 'LI',
+ 'MC', 'MF', 'MQ', 'NC', 'PF', 'PM', 'RE', 'WF',
+ 'YT',
+ ),
+ 'SAT': (
+ 'AD', 'AT', 'AX', 'BG', 'BL', 'CH', 'CY', 'CZ',
+ 'DE', 'DK', 'EE', 'ES', 'FI', 'FR', 'GB', 'GF',
+ 'GR', 'HR', 'HU', 'IE', 'IS', 'IT', 'KN', 'LI',
+ 'LT', 'LU', 'LV', 'MC', 'MF', 'MQ', 'MT', 'NC',
+ 'NL', 'NO', 'PF', 'PL', 'PM', 'PT', 'RE', 'RO',
+ 'SE', 'SI', 'SK', 'SM', 'VA', 'WF', 'YT',
+ ),
+ }
+
+ @staticmethod
+ def _fix_accessible_subs_locale(subs):
+ updated_subs = {}
+ for lang, sub_formats in subs.items():
+ for fmt in sub_formats:
+ if fmt.get('url', '').endswith('-MAL.m3u8'):
+ lang += '-acc'
+ updated_subs.setdefault(lang, []).append(fmt)
+ return updated_subs
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id')
+ lang = mobj.group('lang') or mobj.group('lang_2')
+ langauge_code = self._LANG_MAP.get(lang)
+
+ config = self._download_json(f'{self._API_BASE}/config/{lang}/{video_id}', video_id, headers={
+ 'x-validated-age': '18'
+ })
+
+ geoblocking = traverse_obj(config, ('data', 'attributes', 'restriction', 'geoblocking')) or {}
+ if geoblocking.get('restrictedArea'):
+ raise GeoRestrictedError(f'Video restricted to {geoblocking["code"]!r}',
+ countries=self._COUNTRIES_MAP.get(geoblocking['code'], ('DE', 'FR')))
+
+ if not traverse_obj(config, ('data', 'attributes', 'rights')):
+ # Eg: https://www.arte.tv/de/videos/097407-215-A/28-minuten
+ # Eg: https://www.arte.tv/es/videos/104351-002-A/serviteur-du-peuple-1-23
+ raise ExtractorError(
+ 'Video is not available in this language edition of Arte or broadcast rights expired', expected=True)
+
+ formats, subtitles = [], {}
+ secondary_formats = []
+ for stream in config['data']['attributes']['streams']:
+ # official player contains code like `e.get("versions")[0].eStat.ml5`
+ stream_version = stream['versions'][0]
+ stream_version_code = stream_version['eStat']['ml5']
+
+ lang_pref = -1
+ m = self._VERSION_CODE_RE.match(stream_version_code)
+ if m:
+ lang_pref = int(''.join('01'[x] for x in (
+ m.group('vlang') == langauge_code, # we prefer voice in the requested language
+ not m.group('audio_desc'), # and not the audio description version
+ bool(m.group('original_voice')), # but if voice is not in the requested language, at least choose the original voice
+ m.group('sub_lang') == langauge_code, # if subtitles are present, we prefer them in the requested language
+ not m.group('has_sub'), # but we prefer no subtitles otherwise
+ not m.group('sdh_sub'), # and we prefer not the hard-of-hearing subtitles if there are subtitles
+ )))
+
+ short_label = traverse_obj(stream_version, 'shortLabel', expected_type=str, default='?')
+ if 'HLS' in stream['protocol']:
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ stream['url'], video_id=video_id, ext='mp4', m3u8_id=stream_version_code, fatal=False)
+ for fmt in fmts:
+ fmt.update({
+ 'format_note': f'{stream_version.get("label", "unknown")} [{short_label}]',
+ 'language_preference': lang_pref,
+ })
+ if any(map(short_label.startswith, ('cc', 'OGsub'))):
+ secondary_formats.extend(fmts)
+ else:
+ formats.extend(fmts)
+ subs = self._fix_accessible_subs_locale(subs)
+ self._merge_subtitles(subs, target=subtitles)
+
+ elif stream['protocol'] in ('HTTPS', 'RTMP'):
+ formats.append({
+ 'format_id': f'{stream["protocol"]}-{stream_version_code}',
+ 'url': stream['url'],
+ 'format_note': f'{stream_version.get("label", "unknown")} [{short_label}]',
+ 'language_preference': lang_pref,
+ # 'ext': 'mp4', # XXX: may or may not be necessary, at least for HTTPS
+ })
+
+ else:
+ self.report_warning(f'Skipping stream with unknown protocol {stream["protocol"]}')
+
+ formats.extend(secondary_formats)
+ self._remove_duplicate_formats(formats)
+
+ metadata = config['data']['attributes']['metadata']
+
+ return {
+ 'id': metadata['providerId'],
+ 'webpage_url': traverse_obj(metadata, ('link', 'url')),
+ 'title': traverse_obj(metadata, 'subtitle', 'title'),
+ 'alt_title': metadata.get('subtitle') and metadata.get('title'),
+ 'description': metadata.get('description'),
+ 'duration': traverse_obj(metadata, ('duration', 'seconds')),
+ 'language': metadata.get('language'),
+ 'timestamp': traverse_obj(config, ('data', 'attributes', 'rights', 'begin'), expected_type=parse_iso8601),
+ 'is_live': config['data']['attributes'].get('live', False),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnails': [
+ {'url': image['url'], 'id': image.get('caption')}
+ for image in metadata.get('images') or [] if url_or_none(image.get('url'))
+ ],
+ # TODO: chapters may also be in stream['segments']?
+ 'chapters': traverse_obj(config, ('data', 'attributes', 'chapters', 'elements', ..., {
+ 'start_time': 'startTime',
+ 'title': 'title',
+ })) or None,
+ }
+
+
+class ArteTVEmbedIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?arte\.tv/player/v\d+/index\.php\?.*?\bjson_url=.+'
+ _EMBED_REGEX = [r'<(?:iframe|script)[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?arte\.tv/player/v\d+/index\.php\?.*?\bjson_url=.+?)\1']
+ _TESTS = [{
+ 'url': 'https://www.arte.tv/player/v5/index.php?json_url=https%3A%2F%2Fapi.arte.tv%2Fapi%2Fplayer%2Fv2%2Fconfig%2Fde%2F100605-013-A&lang=de&autoplay=true&mute=0100605-013-A',
+ 'info_dict': {
+ 'id': '100605-013-A',
+ 'ext': 'mp4',
+ 'title': 'United we Stream November Lockdown Edition #13',
+ 'description': 'md5:be40b667f45189632b78c1425c7c2ce1',
+ 'upload_date': '20201116',
+ },
+ 'skip': 'No video available'
+ }, {
+ 'url': 'https://www.arte.tv/player/v3/index.php?json_url=https://api.arte.tv/api/player/v2/config/de/100605-013-A',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ qs = parse_qs(url)
+ json_url = qs['json_url'][0]
+ video_id = ArteTVIE._match_id(json_url)
+ return self.url_result(
+ json_url, ie=ArteTVIE.ie_key(), video_id=video_id)
+
+
+class ArteTVPlaylistIE(ArteTVBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>%s)/videos/(?P<id>RC-\d{6})' % ArteTVBaseIE._ARTE_LANGUAGES
+ _TESTS = [{
+ 'url': 'https://www.arte.tv/en/videos/RC-016954/earn-a-living/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.arte.tv/pl/videos/RC-014123/arte-reportage/',
+ 'playlist_mincount': 100,
+ 'info_dict': {
+ 'description': 'md5:84e7bf1feda248bc325ebfac818c476e',
+ 'id': 'RC-014123',
+ 'title': 'ARTE Reportage - najlepsze reportaże',
+ },
+ }]
+
+ def _real_extract(self, url):
+ lang, playlist_id = self._match_valid_url(url).group('lang', 'id')
+ playlist = self._download_json(
+ f'{self._API_BASE}/playlist/{lang}/{playlist_id}', playlist_id)['data']['attributes']
+
+ entries = [{
+ '_type': 'url_transparent',
+ 'url': video['config']['url'],
+ 'ie_key': ArteTVIE.ie_key(),
+ 'id': video.get('providerId'),
+ 'title': video.get('title'),
+ 'alt_title': video.get('subtitle'),
+ 'thumbnail': url_or_none(traverse_obj(video, ('mainImage', 'url'))),
+ 'duration': int_or_none(traverse_obj(video, ('duration', 'seconds'))),
+ } for video in traverse_obj(playlist, ('items', lambda _, v: v['config']['url']))]
+
+ return self.playlist_result(entries, playlist_id,
+ traverse_obj(playlist, ('metadata', 'title')),
+ traverse_obj(playlist, ('metadata', 'description')))
+
+
+class ArteTVCategoryIE(ArteTVBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>%s)/videos/(?P<id>[\w-]+(?:/[\w-]+)*)/?\s*$' % ArteTVBaseIE._ARTE_LANGUAGES
+ _TESTS = [{
+ 'url': 'https://www.arte.tv/en/videos/politics-and-society/',
+ 'info_dict': {
+ 'id': 'politics-and-society',
+ 'title': 'Politics and society',
+ 'description': 'Investigative documentary series, geopolitical analysis, and international commentary',
+ },
+ 'playlist_mincount': 13,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return (
+ not any(ie.suitable(url) for ie in (ArteTVIE, ArteTVPlaylistIE, ))
+ and super().suitable(url))
+
+ def _real_extract(self, url):
+ lang, playlist_id = self._match_valid_url(url).groups()
+ webpage = self._download_webpage(url, playlist_id)
+
+ items = []
+ for video in re.finditer(
+ r'<a\b[^>]*?href\s*=\s*(?P<q>"|\'|\b)(?P<url>https?://www\.arte\.tv/%s/videos/[\w/-]+)(?P=q)' % lang,
+ webpage):
+ video = video.group('url')
+ if video == url:
+ continue
+ if any(ie.suitable(video) for ie in (ArteTVIE, ArteTVPlaylistIE, )):
+ items.append(video)
+
+ title = strip_or_none(self._generic_title('', webpage, default='').rsplit('|', 1)[0]) or None
+
+ return self.playlist_from_matches(items, playlist_id=playlist_id, playlist_title=title,
+ description=self._og_search_description(webpage, default=None))
diff --git a/yt_dlp/extractor/asobichannel.py b/yt_dlp/extractor/asobichannel.py
new file mode 100644
index 0000000..e3479ed
--- /dev/null
+++ b/yt_dlp/extractor/asobichannel.py
@@ -0,0 +1,168 @@
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ clean_html,
+ merge_dicts,
+ parse_iso8601,
+ url_or_none,
+)
+from ..utils.traversal import traverse_obj
+
+
+class AsobiChannelBaseIE(InfoExtractor):
+ _MICROCMS_HEADER = {'X-MICROCMS-API-KEY': 'qRaKehul9AHU8KtL0dnq1OCLKnFec6yrbcz3'}
+
+ def _extract_info(self, metadata):
+ return traverse_obj(metadata, {
+ 'id': ('id', {str}),
+ 'title': ('title', {str}),
+ 'description': ('body', {clean_html}),
+ 'thumbnail': ('contents', 'video_thumb', 'url', {url_or_none}),
+ 'timestamp': ('publishedAt', {parse_iso8601}),
+ 'modified_timestamp': ('updatedAt', {parse_iso8601}),
+ 'channel': ('channel', 'name', {str}),
+ 'channel_id': ('channel', 'id', {str}),
+ })
+
+
+class AsobiChannelIE(AsobiChannelBaseIE):
+ IE_NAME = 'asobichannel'
+ IE_DESC = 'ASOBI CHANNEL'
+
+ _VALID_URL = r'https?://asobichannel\.asobistore\.jp/watch/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://asobichannel.asobistore.jp/watch/1ypp48qd32p',
+ 'md5': '39df74e872afe032c4eb27b89144fc92',
+ 'info_dict': {
+ 'id': '1ypp48qd32p',
+ 'ext': 'mp4',
+ 'title': 'アイドルマスター ミリオンライブ! 765プロch 原っぱ通信 #1',
+ 'description': 'md5:b930bd2199c9b2fd75951ce4aaa7efd2',
+ 'thumbnail': 'https://images.microcms-assets.io/assets/d2420de4b9194e11beb164f99edb1f95/a8e6f84119f54eb9ab4ce16729239905/%E3%82%B5%E3%83%A0%E3%83%8D%20(1).png',
+ 'timestamp': 1697098247,
+ 'upload_date': '20231012',
+ 'modified_timestamp': 1698381162,
+ 'modified_date': '20231027',
+ 'channel': 'アイドルマスター',
+ 'channel_id': 'idolmaster',
+ },
+ }, {
+ 'url': 'https://asobichannel.asobistore.jp/watch/redigiwnjzqj',
+ 'md5': '229fa8fb5c591c75ce8c37a497f113f6',
+ 'info_dict': {
+ 'id': 'redigiwnjzqj',
+ 'ext': 'mp4',
+ 'title': '【おまけ放送】アイドルマスター ミリオンライブ! 765プロch 原っぱ通信 #1',
+ 'description': 'md5:7d9cd35fb54425a6967822bd564ea2d9',
+ 'thumbnail': 'https://images.microcms-assets.io/assets/d2420de4b9194e11beb164f99edb1f95/20e5c1d6184242eebc2512a5dec59bf0/P1_%E5%8E%9F%E3%81%A3%E3%81%B1%E3%82%B5%E3%83%A0%E3%83%8D.png',
+ 'modified_timestamp': 1697797125,
+ 'modified_date': '20231020',
+ 'timestamp': 1697261769,
+ 'upload_date': '20231014',
+ 'channel': 'アイドルマスター',
+ 'channel_id': 'idolmaster',
+ },
+ }]
+
+ _survapi_header = None
+
+ def _real_initialize(self):
+ token = self._download_json(
+ 'https://asobichannel-api.asobistore.jp/api/v1/vspf/token', None,
+ note='Retrieving API token')
+ self._survapi_header = {'Authorization': f'Bearer {token}'}
+
+ def _process_vod(self, video_id, metadata):
+ content_id = metadata['contents']['video_id']
+
+ vod_data = self._download_json(
+ f'https://survapi.channel.or.jp/proxy/v1/contents/{content_id}/get_by_cuid', video_id,
+ headers=self._survapi_header, note='Downloading vod data')
+
+ return {
+ 'formats': self._extract_m3u8_formats(vod_data['ex_content']['streaming_url'], video_id),
+ }
+
+ def _process_live(self, video_id, metadata):
+ content_id = metadata['contents']['video_id']
+ event_data = self._download_json(
+ f'https://survapi.channel.or.jp/ex/events/{content_id}?embed=channel', video_id,
+ headers=self._survapi_header, note='Downloading event data')
+
+ player_type = traverse_obj(event_data, ('data', 'Player_type', {str}))
+ if player_type == 'poster':
+ self.raise_no_formats('Live event has not yet started', expected=True)
+ live_status = 'is_upcoming'
+ formats = []
+ elif player_type == 'player':
+ live_status = 'is_live'
+ formats = self._extract_m3u8_formats(
+ event_data['data']['Channel']['Custom_live_url'], video_id, live=True)
+ else:
+ raise ExtractorError('Unsupported player type {player_type!r}')
+
+ return {
+ 'release_timestamp': traverse_obj(metadata, ('period', 'start', {parse_iso8601})),
+ 'live_status': live_status,
+ 'formats': formats,
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ metadata = self._download_json(
+ f'https://channel.microcms.io/api/v1/media/{video_id}', video_id,
+ headers=self._MICROCMS_HEADER)
+
+ info = self._extract_info(metadata)
+
+ video_type = traverse_obj(metadata, ('contents', 'video_type', 0, {str}))
+ if video_type == 'VOD':
+ return merge_dicts(info, self._process_vod(video_id, metadata))
+ if video_type == 'LIVE':
+ return merge_dicts(info, self._process_live(video_id, metadata))
+
+ raise ExtractorError(f'Unexpected video type {video_type!r}')
+
+
+class AsobiChannelTagURLIE(AsobiChannelBaseIE):
+ IE_NAME = 'asobichannel:tag'
+ IE_DESC = 'ASOBI CHANNEL'
+
+ _VALID_URL = r'https?://asobichannel\.asobistore\.jp/tag/(?P<id>[a-z0-9-_]+)'
+ _TESTS = [{
+ 'url': 'https://asobichannel.asobistore.jp/tag/bjhh-nbcja',
+ 'info_dict': {
+ 'id': 'bjhh-nbcja',
+ 'title': 'アイドルマスター ミリオンライブ! 765プロch 原っぱ通信',
+ },
+ 'playlist_mincount': 16,
+ }, {
+ 'url': 'https://asobichannel.asobistore.jp/tag/hvm5qw3c6od',
+ 'info_dict': {
+ 'id': 'hvm5qw3c6od',
+ 'title': 'アイマスMOIW2023ラジオ',
+ },
+ 'playlist_mincount': 13,
+ }]
+
+ def _real_extract(self, url):
+ tag_id = self._match_id(url)
+ webpage = self._download_webpage(url, tag_id)
+ title = traverse_obj(self._search_nextjs_data(
+ webpage, tag_id, fatal=False), ('props', 'pageProps', 'data', 'name', {str}))
+
+ media = self._download_json(
+ f'https://channel.microcms.io/api/v1/media?limit=999&filters=(tag[contains]{tag_id})',
+ tag_id, headers=self._MICROCMS_HEADER)
+
+ def entries():
+ for metadata in traverse_obj(media, ('contents', lambda _, v: v['id'])):
+ yield {
+ '_type': 'url',
+ 'url': f'https://asobichannel.asobistore.jp/watch/{metadata["id"]}',
+ 'ie_key': AsobiChannelIE.ie_key(),
+ **self._extract_info(metadata),
+ }
+
+ return self.playlist_result(entries(), tag_id, title)
diff --git a/yt_dlp/extractor/atresplayer.py b/yt_dlp/extractor/atresplayer.py
new file mode 100644
index 0000000..3a44e52
--- /dev/null
+++ b/yt_dlp/extractor/atresplayer.py
@@ -0,0 +1,104 @@
+from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ urlencode_postdata,
+)
+
+
+class AtresPlayerIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/[^/]+/[^/]+/[^/]+/[^/]+/(?P<display_id>.+?)_(?P<id>[0-9a-f]{24})'
+ _NETRC_MACHINE = 'atresplayer'
+ _TESTS = [
+ {
+ 'url': 'https://www.atresplayer.com/antena3/series/pequenas-coincidencias/temporada-1/capitulo-7-asuntos-pendientes_5d4aa2c57ed1a88fc715a615/',
+ 'info_dict': {
+ 'id': '5d4aa2c57ed1a88fc715a615',
+ 'ext': 'mp4',
+ 'title': 'Capítulo 7: Asuntos pendientes',
+ 'description': 'md5:7634cdcb4d50d5381bedf93efb537fbc',
+ 'duration': 3413,
+ },
+ 'skip': 'This video is only available for registered users'
+ },
+ {
+ 'url': 'https://www.atresplayer.com/lasexta/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_5ad08edf986b2855ed47adc4/',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://www.atresplayer.com/antena3/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_5ad51046986b2886722ccdea/',
+ 'only_matching': True,
+ },
+ ]
+ _API_BASE = 'https://api.atresplayer.com/'
+
+ def _handle_error(self, e, code):
+ if isinstance(e.cause, HTTPError) and e.cause.status == code:
+ error = self._parse_json(e.cause.response.read(), None)
+ if error.get('error') == 'required_registered':
+ self.raise_login_required()
+ raise ExtractorError(error['error_description'], expected=True)
+ raise
+
+ def _perform_login(self, username, password):
+ self._request_webpage(
+ self._API_BASE + 'login', None, 'Downloading login page')
+
+ try:
+ target_url = self._download_json(
+ 'https://account.atresmedia.com/api/login', None,
+ 'Logging in', headers={
+ 'Content-Type': 'application/x-www-form-urlencoded'
+ }, data=urlencode_postdata({
+ 'username': username,
+ 'password': password,
+ }))['targetUrl']
+ except ExtractorError as e:
+ self._handle_error(e, 400)
+
+ self._request_webpage(target_url, None, 'Following Target URL')
+
+ def _real_extract(self, url):
+ display_id, video_id = self._match_valid_url(url).groups()
+
+ try:
+ episode = self._download_json(
+ self._API_BASE + 'client/v1/player/episode/' + video_id, video_id)
+ except ExtractorError as e:
+ self._handle_error(e, 403)
+
+ title = episode['titulo']
+
+ formats = []
+ subtitles = {}
+ for source in episode.get('sources', []):
+ src = source.get('src')
+ if not src:
+ continue
+ src_type = source.get('type')
+ if src_type == 'application/vnd.apple.mpegurl':
+ formats, subtitles = self._extract_m3u8_formats(
+ src, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False)
+ elif src_type == 'application/dash+xml':
+ formats, subtitles = self._extract_mpd_formats(
+ src, video_id, mpd_id='dash', fatal=False)
+
+ heartbeat = episode.get('heartbeat') or {}
+ omniture = episode.get('omniture') or {}
+ get_meta = lambda x: heartbeat.get(x) or omniture.get(x)
+
+ return {
+ 'display_id': display_id,
+ 'id': video_id,
+ 'title': title,
+ 'description': episode.get('descripcion'),
+ 'thumbnail': episode.get('imgPoster'),
+ 'duration': int_or_none(episode.get('duration')),
+ 'formats': formats,
+ 'channel': get_meta('channel'),
+ 'season': get_meta('season'),
+ 'episode_number': int_or_none(get_meta('episodeNumber')),
+ 'subtitles': subtitles,
+ }
diff --git a/yt_dlp/extractor/atscaleconf.py b/yt_dlp/extractor/atscaleconf.py
new file mode 100644
index 0000000..3f7b1e9
--- /dev/null
+++ b/yt_dlp/extractor/atscaleconf.py
@@ -0,0 +1,34 @@
+import re
+
+from .common import InfoExtractor
+
+
+class AtScaleConfEventIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?atscaleconference\.com/events/(?P<id>[^/&$?]+)'
+
+ _TESTS = [{
+ 'url': 'https://atscaleconference.com/events/data-scale-spring-2022/',
+ 'playlist_mincount': 13,
+ 'info_dict': {
+ 'id': 'data-scale-spring-2022',
+ 'title': 'Data @Scale Spring 2022',
+ 'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55'
+ },
+ }, {
+ 'url': 'https://atscaleconference.com/events/video-scale-2021/',
+ 'playlist_mincount': 14,
+ 'info_dict': {
+ 'id': 'video-scale-2021',
+ 'title': 'Video @Scale 2021',
+ 'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55'
+ },
+ }]
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ webpage = self._download_webpage(url, id)
+
+ return self.playlist_from_matches(
+ re.findall(r'data-url\s*=\s*"(https?://(?:www\.)?atscaleconference\.com/videos/[^"]+)"', webpage),
+ ie='Generic', playlist_id=id,
+ title=self._og_search_title(webpage), description=self._og_search_description(webpage))
diff --git a/yt_dlp/extractor/atvat.py b/yt_dlp/extractor/atvat.py
new file mode 100644
index 0000000..d6ed9e4
--- /dev/null
+++ b/yt_dlp/extractor/atvat.py
@@ -0,0 +1,108 @@
+import datetime
+
+from .common import InfoExtractor
+from ..utils import (
+ float_or_none,
+ jwt_encode_hs256,
+ try_get,
+ ExtractorError,
+)
+
+
+class ATVAtIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?atv\.at/tv/(?:[^/]+/){2,3}(?P<id>.*)'
+
+ _TESTS = [{
+ 'url': 'https://www.atv.at/tv/bauer-sucht-frau/staffel-18/bauer-sucht-frau/bauer-sucht-frau-staffel-18-folge-3-die-hofwochen',
+ 'md5': '3c3b4aaca9f63e32b35e04a9c2515903',
+ 'info_dict': {
+ 'id': 'v-ce9cgn1e70n5-1',
+ 'ext': 'mp4',
+ 'title': 'Bauer sucht Frau - Staffel 18 Folge 3 - Die Hofwochen',
+ }
+ }, {
+ 'url': 'https://www.atv.at/tv/bauer-sucht-frau/staffel-18/episode-01/bauer-sucht-frau-staffel-18-vorstellungsfolge-1',
+ 'only_matching': True,
+ }]
+
+ # extracted from bootstrap.js function (search for e.encryption_key and use your browser's debugger)
+ _ACCESS_ID = 'x_atv'
+ _ENCRYPTION_KEY = 'Hohnaekeishoogh2omaeghooquooshia'
+
+ def _extract_video_info(self, url, content, video):
+ clip_id = content.get('splitId', content['id'])
+ formats = []
+ clip_urls = video['urls']
+ for protocol, variant in clip_urls.items():
+ source_url = try_get(variant, lambda x: x['clear']['url'])
+ if not source_url:
+ continue
+ if protocol == 'dash':
+ formats.extend(self._extract_mpd_formats(
+ source_url, clip_id, mpd_id=protocol, fatal=False))
+ elif protocol == 'hls':
+ formats.extend(self._extract_m3u8_formats(
+ source_url, clip_id, 'mp4', 'm3u8_native',
+ m3u8_id=protocol, fatal=False))
+ else:
+ formats.append({
+ 'url': source_url,
+ 'format_id': protocol,
+ })
+
+ return {
+ 'id': clip_id,
+ 'title': content.get('title'),
+ 'duration': float_or_none(content.get('duration')),
+ 'series': content.get('tvShowTitle'),
+ 'formats': formats,
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ json_data = self._parse_json(
+ self._search_regex(r'<script id="state" type="text/plain">(.*)</script>', webpage, 'json_data'),
+ video_id=video_id)
+
+ video_title = json_data['views']['default']['page']['title']
+ contentResource = json_data['views']['default']['page']['contentResource']
+ content_id = contentResource[0]['id']
+ content_ids = [{'id': id, 'subclip_start': content['start'], 'subclip_end': content['end']}
+ for id, content in enumerate(contentResource)]
+
+ time_of_request = datetime.datetime.now()
+ not_before = time_of_request - datetime.timedelta(minutes=5)
+ expire = time_of_request + datetime.timedelta(minutes=5)
+ payload = {
+ 'content_ids': {
+ content_id: content_ids,
+ },
+ 'secure_delivery': True,
+ 'iat': int(time_of_request.timestamp()),
+ 'nbf': int(not_before.timestamp()),
+ 'exp': int(expire.timestamp()),
+ }
+ jwt_token = jwt_encode_hs256(payload, self._ENCRYPTION_KEY, headers={'kid': self._ACCESS_ID})
+ videos = self._download_json(
+ 'https://vas-v4.p7s1video.net/4.0/getsources',
+ content_id, 'Downloading videos JSON', query={
+ 'token': jwt_token.decode('utf-8')
+ })
+
+ video_id, videos_data = list(videos['data'].items())[0]
+ error_msg = try_get(videos_data, lambda x: x['error']['title'])
+ if error_msg == 'Geo check failed':
+ self.raise_geo_restricted(error_msg)
+ elif error_msg:
+ raise ExtractorError(error_msg)
+ entries = [
+ self._extract_video_info(url, contentResource[video['id']], video)
+ for video in videos_data]
+
+ return {
+ '_type': 'multi_video',
+ 'id': video_id,
+ 'title': video_title,
+ 'entries': entries,
+ }
diff --git a/yt_dlp/extractor/audimedia.py b/yt_dlp/extractor/audimedia.py
new file mode 100644
index 0000000..35114e5
--- /dev/null
+++ b/yt_dlp/extractor/audimedia.py
@@ -0,0 +1,89 @@
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+)
+
+
+class AudiMediaIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?audi-mediacenter\.com/(?:en|de)/audimediatv/(?:video/)?(?P<id>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'https://www.audi-mediacenter.com/en/audimediatv/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test-1467',
+ 'md5': '79a8b71c46d49042609795ab59779b66',
+ 'info_dict': {
+ 'id': '1565',
+ 'ext': 'mp4',
+ 'title': '60 Seconds of Audi Sport 104/2015 - WEC Bahrain, Rookie Test',
+ 'description': 'md5:60e5d30a78ced725f7b8d34370762941',
+ 'upload_date': '20151124',
+ 'timestamp': 1448354940,
+ 'duration': 74022,
+ 'view_count': int,
+ }
+ }, {
+ 'url': 'https://www.audi-mediacenter.com/en/audimediatv/video/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test-2991',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ raw_payload = self._search_regex([
+ r'class="amtv-embed"[^>]+id="([0-9a-z-]+)"',
+ r'id="([0-9a-z-]+)"[^>]+class="amtv-embed"',
+ r'class=\\"amtv-embed\\"[^>]+id=\\"([0-9a-z-]+)\\"',
+ r'id=\\"([0-9a-z-]+)\\"[^>]+class=\\"amtv-embed\\"',
+ r'id=(?:\\)?"(amtve-[a-z]-\d+-[a-z]{2})',
+ ], webpage, 'raw payload')
+ _, stage_mode, video_id, _ = raw_payload.split('-')
+
+ # TODO: handle s and e stage_mode (live streams and ended live streams)
+ if stage_mode not in ('s', 'e'):
+ video_data = self._download_json(
+ 'https://www.audimedia.tv/api/video/v1/videos/' + video_id,
+ video_id, query={
+ 'embed[]': ['video_versions', 'thumbnail_image'],
+ })['results']
+ formats = []
+
+ stream_url_hls = video_data.get('stream_url_hls')
+ if stream_url_hls:
+ formats.extend(self._extract_m3u8_formats(
+ stream_url_hls, video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
+
+ stream_url_hds = video_data.get('stream_url_hds')
+ if stream_url_hds:
+ formats.extend(self._extract_f4m_formats(
+ stream_url_hds + '?hdcore=3.4.0',
+ video_id, f4m_id='hds', fatal=False))
+
+ for video_version in video_data.get('video_versions', []):
+ video_version_url = video_version.get('download_url') or video_version.get('stream_url')
+ if not video_version_url:
+ continue
+ f = {
+ 'url': video_version_url,
+ 'width': int_or_none(video_version.get('width')),
+ 'height': int_or_none(video_version.get('height')),
+ 'abr': int_or_none(video_version.get('audio_bitrate')),
+ 'vbr': int_or_none(video_version.get('video_bitrate')),
+ }
+ bitrate = self._search_regex(r'(\d+)k', video_version_url, 'bitrate', default=None)
+ if bitrate:
+ f.update({
+ 'format_id': 'http-%s' % bitrate,
+ })
+ formats.append(f)
+
+ return {
+ 'id': video_id,
+ 'title': video_data['title'],
+ 'description': video_data.get('subtitle'),
+ 'thumbnail': video_data.get('thumbnail_image', {}).get('file'),
+ 'timestamp': parse_iso8601(video_data.get('publication_date')),
+ 'duration': int_or_none(video_data.get('duration')),
+ 'view_count': int_or_none(video_data.get('view_count')),
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/audioboom.py b/yt_dlp/extractor/audioboom.py
new file mode 100644
index 0000000..a23fcd2
--- /dev/null
+++ b/yt_dlp/extractor/audioboom.py
@@ -0,0 +1,57 @@
+from .common import InfoExtractor
+from ..utils import clean_html, float_or_none, traverse_obj, unescapeHTML
+
+
+class AudioBoomIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?audioboom\.com/(?:boos|posts)/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://audioboom.com/posts/7398103-asim-chaudhry',
+ 'md5': '4d68be11c9f9daf3dab0778ad1e010c3',
+ 'info_dict': {
+ 'id': '7398103',
+ 'ext': 'mp3',
+ 'title': 'Asim Chaudhry',
+ 'description': 'md5:0ed714ae0e81e5d9119cac2f618ad679',
+ 'duration': 4000.99,
+ 'uploader': 'Sue Perkins: An hour or so with...',
+ 'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/perkins',
+ }
+ }, { # Direct mp3-file link
+ 'url': 'https://audioboom.com/posts/8128496.mp3',
+ 'md5': 'e329edf304d450def95c7f86a9165ee1',
+ 'info_dict': {
+ 'id': '8128496',
+ 'ext': 'mp3',
+ 'title': 'TCRNo8 / DAILY 03 - In Control',
+ 'description': 'md5:44665f142db74858dfa21c5b34787948',
+ 'duration': 1689.7,
+ 'uploader': 'Lost Dot Podcast: The Trans Pyrenees and Transcontinental Race',
+ 'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channels/5003904',
+ }
+ }, {
+ 'url': 'https://audioboom.com/posts/4279833-3-09-2016-czaban-hour-3?t=0',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(f'https://audioboom.com/posts/{video_id}', video_id)
+
+ clip_store = self._search_json(
+ r'data-react-class="V5DetailPagePlayer"\s*data-react-props=["\']',
+ webpage, 'clip store', video_id, fatal=False, transform_source=unescapeHTML)
+ clip = traverse_obj(clip_store, ('clips', 0), expected_type=dict) or {}
+
+ return {
+ 'id': video_id,
+ 'url': clip.get('clipURLPriorToLoading') or self._og_search_property('audio', webpage, 'audio url'),
+ 'title': clip.get('title') or self._html_search_meta(['og:title', 'og:audio:title', 'audio_title'], webpage),
+ 'description': (clip.get('description') or clean_html(clip.get('formattedDescription'))
+ or self._og_search_description(webpage)),
+ 'duration': float_or_none(clip.get('duration') or self._html_search_meta('weibo:audio:duration', webpage)),
+ 'uploader': clip.get('author') or self._html_search_meta(
+ ['og:audio:artist', 'twitter:audio:artist_name', 'audio_artist'], webpage, 'uploader'),
+ 'uploader_url': clip.get('author_url') or self._html_search_regex(
+ r'<div class="avatar flex-shrink-0">\s*<a href="(?P<uploader_url>http[^"]+)"',
+ webpage, 'uploader url', fatal=False),
+ }
diff --git a/yt_dlp/extractor/audiodraft.py b/yt_dlp/extractor/audiodraft.py
new file mode 100644
index 0000000..71e5afd
--- /dev/null
+++ b/yt_dlp/extractor/audiodraft.py
@@ -0,0 +1,93 @@
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class AudiodraftBaseIE(InfoExtractor):
+ def _audiodraft_extract_from_id(self, player_entry_id):
+ data_json = self._download_json(
+ 'https://www.audiodraft.com/scripts/general/player/getPlayerInfoNew.php', player_entry_id,
+ headers={
+ 'Content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
+ 'X-Requested-With': 'XMLHttpRequest',
+ }, data=f'id={player_entry_id}'.encode('utf-8'))
+
+ return {
+ 'id': str(data_json['entry_id']),
+ 'title': data_json.get('entry_title'),
+ 'url': data_json['path'],
+ 'vcodec': 'none',
+ 'ext': 'mp3',
+ 'uploader': data_json.get('designer_name'),
+ 'uploader_id': data_json.get('designer_id'),
+ 'webpage_url': data_json.get('entry_url'),
+ 'like_count': int_or_none(data_json.get('entry_likes')),
+ 'average_rating': int_or_none(data_json.get('entry_rating')),
+ }
+
+
+class AudiodraftCustomIE(AudiodraftBaseIE):
+ IE_NAME = 'Audiodraft:custom'
+ _VALID_URL = r'https?://(?:[-\w]+)\.audiodraft\.com/entry/(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'http://nokiatune.audiodraft.com/entry/5874',
+ 'info_dict': {
+ 'id': '9485',
+ 'ext': 'mp3',
+ 'title': 'Hula Hula Calls',
+ 'uploader': 'unclemaki',
+ 'uploader_id': '13512',
+ 'average_rating': 5,
+ 'like_count': int,
+ },
+ }, {
+ 'url': 'http://vikinggrace.audiodraft.com/entry/501',
+ 'info_dict': {
+ 'id': '22241',
+ 'ext': 'mp3',
+ 'title': 'MVG Happy',
+ 'uploader': 'frog',
+ 'uploader_id': '19142',
+ 'average_rating': 5,
+ 'like_count': int,
+ },
+ }, {
+ 'url': 'http://timferriss.audiodraft.com/entry/765',
+ 'info_dict': {
+ 'id': '19710',
+ 'ext': 'mp3',
+ 'title': 'ferris03',
+ 'uploader': 'malex',
+ 'uploader_id': '17335',
+ 'average_rating': 5,
+ 'like_count': int,
+ },
+ }]
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ webpage = self._download_webpage(url, id)
+ player_entry_id = self._search_regex(r'playAudio\(\'(player_entry_\d+)\'\);', webpage, id, 'play entry id')
+ return self._audiodraft_extract_from_id(player_entry_id)
+
+
+class AudiodraftGenericIE(AudiodraftBaseIE):
+ IE_NAME = 'Audiodraft:generic'
+ _VALID_URL = r'https?://www\.audiodraft\.com/contests/[^/#]+#entries&eid=(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'https://www.audiodraft.com/contests/570-Score-A-Video-Surprise-Us#entries&eid=30138',
+ 'info_dict': {
+ 'id': '30138',
+ 'ext': 'mp3',
+ 'title': 'DROP in sound_V2',
+ 'uploader': 'TiagoSilva',
+ 'uploader_id': '19452',
+ 'average_rating': 4,
+ 'like_count': int,
+ },
+ }]
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ return self._audiodraft_extract_from_id(f'player_entry_{id}')
diff --git a/yt_dlp/extractor/audiomack.py b/yt_dlp/extractor/audiomack.py
new file mode 100644
index 0000000..5c4160f
--- /dev/null
+++ b/yt_dlp/extractor/audiomack.py
@@ -0,0 +1,147 @@
+import itertools
+import time
+
+from .common import InfoExtractor
+from .soundcloud import SoundcloudIE
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ url_basename,
+)
+
+
+class AudiomackIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?audiomack\.com/(?:song/|(?=.+/song/))(?P<id>[\w/-]+)'
+ IE_NAME = 'audiomack'
+ _TESTS = [
+ # hosted on audiomack
+ {
+ 'url': 'http://www.audiomack.com/song/roosh-williams/extraordinary',
+ 'info_dict':
+ {
+ 'id': '310086',
+ 'ext': 'mp3',
+ 'uploader': 'Roosh Williams',
+ 'title': 'Extraordinary'
+ }
+ },
+ # audiomack wrapper around soundcloud song
+ # Needs new test URL.
+ {
+ 'add_ie': ['Soundcloud'],
+ 'url': 'http://www.audiomack.com/song/hip-hop-daily/black-mamba-freestyle',
+ 'info_dict': {
+ 'id': '258901379',
+ 'ext': 'mp3',
+ 'description': 'mamba day freestyle for the legend Kobe Bryant ',
+ 'title': 'Black Mamba Freestyle [Prod. By Danny Wolf]',
+ 'uploader': 'ILOVEMAKONNEN',
+ 'upload_date': '20160414',
+ },
+ 'skip': 'Song has been removed from the site',
+ },
+ ]
+
+ def _real_extract(self, url):
+ # URLs end with [uploader name]/song/[uploader title]
+ # this title is whatever the user types in, and is rarely
+ # the proper song title. Real metadata is in the api response
+ album_url_tag = self._match_id(url).replace('/song/', '/')
+
+ # Request the extended version of the api for extra fields like artist and title
+ api_response = self._download_json(
+ 'http://www.audiomack.com/api/music/url/song/%s?extended=1&_=%d' % (
+ album_url_tag, time.time()),
+ album_url_tag)
+
+ # API is inconsistent with errors
+ if 'url' not in api_response or not api_response['url'] or 'error' in api_response:
+ raise ExtractorError('Invalid url %s' % url)
+
+ # Audiomack wraps a lot of soundcloud tracks in their branded wrapper
+ # if so, pass the work off to the soundcloud extractor
+ if SoundcloudIE.suitable(api_response['url']):
+ return self.url_result(api_response['url'], SoundcloudIE.ie_key())
+
+ return {
+ 'id': compat_str(api_response.get('id', album_url_tag)),
+ 'uploader': api_response.get('artist'),
+ 'title': api_response.get('title'),
+ 'url': api_response['url'],
+ }
+
+
+class AudiomackAlbumIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?audiomack\.com/(?:album/|(?=.+/album/))(?P<id>[\w/-]+)'
+ IE_NAME = 'audiomack:album'
+ _TESTS = [
+ # Standard album playlist
+ {
+ 'url': 'http://www.audiomack.com/album/flytunezcom/tha-tour-part-2-mixtape',
+ 'playlist_count': 11,
+ 'info_dict':
+ {
+ 'id': '812251',
+ 'title': 'Tha Tour: Part 2 (Official Mixtape)'
+ }
+ },
+ # Album playlist ripped from fakeshoredrive with no metadata
+ {
+ 'url': 'http://www.audiomack.com/album/fakeshoredrive/ppp-pistol-p-project',
+ 'info_dict': {
+ 'title': 'PPP (Pistol P Project)',
+ 'id': '837572',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'title': 'PPP (Pistol P Project) - 8. Real (prod by SYK SENSE )',
+ 'id': '837576',
+ 'ext': 'mp3',
+ 'uploader': 'Lil Herb a.k.a. G Herbo',
+ }
+ }, {
+ 'info_dict': {
+ 'title': 'PPP (Pistol P Project) - 10. 4 Minutes Of Hell Part 4 (prod by DY OF 808 MAFIA)',
+ 'id': '837580',
+ 'ext': 'mp3',
+ 'uploader': 'Lil Herb a.k.a. G Herbo',
+ }
+ }],
+ }
+ ]
+
+ def _real_extract(self, url):
+ # URLs end with [uploader name]/album/[uploader title]
+ # this title is whatever the user types in, and is rarely
+ # the proper song title. Real metadata is in the api response
+ album_url_tag = self._match_id(url).replace('/album/', '/')
+ result = {'_type': 'playlist', 'entries': []}
+ # There is no one endpoint for album metadata - instead it is included/repeated in each song's metadata
+ # Therefore we don't know how many songs the album has and must infi-loop until failure
+ for track_no in itertools.count():
+ # Get song's metadata
+ api_response = self._download_json(
+ 'http://www.audiomack.com/api/music/url/album/%s/%d?extended=1&_=%d'
+ % (album_url_tag, track_no, time.time()), album_url_tag,
+ note='Querying song information (%d)' % (track_no + 1))
+
+ # Total failure, only occurs when url is totally wrong
+ # Won't happen in middle of valid playlist (next case)
+ if 'url' not in api_response or 'error' in api_response:
+ raise ExtractorError('Invalid url for track %d of album url %s' % (track_no, url))
+ # URL is good but song id doesn't exist - usually means end of playlist
+ elif not api_response['url']:
+ break
+ else:
+ # Pull out the album metadata and add to result (if it exists)
+ for resultkey, apikey in [('id', 'album_id'), ('title', 'album_title')]:
+ if apikey in api_response and resultkey not in result:
+ result[resultkey] = compat_str(api_response[apikey])
+ song_id = url_basename(api_response['url']).rpartition('.')[0]
+ result['entries'].append({
+ 'id': compat_str(api_response.get('id', song_id)),
+ 'uploader': api_response.get('artist'),
+ 'title': api_response.get('title', song_id),
+ 'url': api_response['url'],
+ })
+ return result
diff --git a/yt_dlp/extractor/audius.py b/yt_dlp/extractor/audius.py
new file mode 100644
index 0000000..6448b44
--- /dev/null
+++ b/yt_dlp/extractor/audius.py
@@ -0,0 +1,271 @@
+import random
+
+from .common import InfoExtractor
+from ..compat import compat_str, compat_urllib_parse_unquote
+from ..utils import ExtractorError, str_or_none, try_get
+
+
+class AudiusBaseIE(InfoExtractor):
+ _API_BASE = None
+ _API_V = '/v1'
+
+ def _get_response_data(self, response):
+ if isinstance(response, dict):
+ response_data = response.get('data')
+ if response_data is not None:
+ return response_data
+ if len(response) == 1 and 'message' in response:
+ raise ExtractorError('API error: %s' % response['message'],
+ expected=True)
+ raise ExtractorError('Unexpected API response')
+
+ def _select_api_base(self):
+ """Selecting one of the currently available API hosts"""
+ response = super(AudiusBaseIE, self)._download_json(
+ 'https://api.audius.co/', None,
+ note='Requesting available API hosts',
+ errnote='Unable to request available API hosts')
+ hosts = self._get_response_data(response)
+ if isinstance(hosts, list):
+ self._API_BASE = random.choice(hosts)
+ return
+ raise ExtractorError('Unable to get available API hosts')
+
+ @staticmethod
+ def _prepare_url(url, title):
+ """
+ Audius removes forward slashes from the uri, but leaves backslashes.
+ The problem is that the current version of Chrome replaces backslashes
+ in the address bar with a forward slashes, so if you copy the link from
+ there and paste it into youtube-dl, you won't be able to download
+ anything from this link, since the Audius API won't be able to resolve
+ this url
+ """
+ url = compat_urllib_parse_unquote(url)
+ title = compat_urllib_parse_unquote(title)
+ if '/' in title or '%2F' in title:
+ fixed_title = title.replace('/', '%5C').replace('%2F', '%5C')
+ return url.replace(title, fixed_title)
+ return url
+
+ def _api_request(self, path, item_id=None, note='Downloading JSON metadata',
+ errnote='Unable to download JSON metadata',
+ expected_status=None):
+ if self._API_BASE is None:
+ self._select_api_base()
+ try:
+ response = super(AudiusBaseIE, self)._download_json(
+ '%s%s%s' % (self._API_BASE, self._API_V, path), item_id, note=note,
+ errnote=errnote, expected_status=expected_status)
+ except ExtractorError as exc:
+ # some of Audius API hosts may not work as expected and return HTML
+ if 'Failed to parse JSON' in compat_str(exc):
+ raise ExtractorError('An error occurred while receiving data. Try again',
+ expected=True)
+ raise exc
+ return self._get_response_data(response)
+
+ def _resolve_url(self, url, item_id):
+ return self._api_request('/resolve?url=%s' % url, item_id,
+ expected_status=404)
+
+
+class AudiusIE(AudiusBaseIE):
+ _VALID_URL = r'''(?x)https?://(?:www\.)?(?:audius\.co/(?P<uploader>[\w\d-]+)(?!/album|/playlist)/(?P<title>\S+))'''
+ IE_DESC = 'Audius.co'
+ _TESTS = [
+ {
+ # URL from Chrome address bar which replace backslash to forward slash
+ 'url': 'https://audius.co/test_acc/t%D0%B5%D0%B5%D0%B5est-1.%5E_%7B%7D/%22%3C%3E.%E2%84%96~%60-198631',
+ 'md5': '92c35d3e754d5a0f17eef396b0d33582',
+ 'info_dict': {
+ 'id': 'xd8gY',
+ 'title': '''Tеееest/ 1.!@#$%^&*()_+=[]{};'\\\":<>,.?/№~`''',
+ 'ext': 'mp3',
+ 'description': 'Description',
+ 'duration': 30,
+ 'track': '''Tеееest/ 1.!@#$%^&*()_+=[]{};'\\\":<>,.?/№~`''',
+ 'artist': 'test',
+ 'genre': 'Electronic',
+ 'thumbnail': r're:https?://.*\.jpg',
+ 'view_count': int,
+ 'like_count': int,
+ 'repost_count': int,
+ }
+ },
+ {
+ # Regular track
+ 'url': 'https://audius.co/voltra/radar-103692',
+ 'md5': '491898a0a8de39f20c5d6a8a80ab5132',
+ 'info_dict': {
+ 'id': 'KKdy2',
+ 'title': 'RADAR',
+ 'ext': 'mp3',
+ 'duration': 318,
+ 'track': 'RADAR',
+ 'artist': 'voltra',
+ 'genre': 'Trance',
+ 'thumbnail': r're:https?://.*\.jpg',
+ 'view_count': int,
+ 'like_count': int,
+ 'repost_count': int,
+ }
+ },
+ ]
+
+ _ARTWORK_MAP = {
+ "150x150": 150,
+ "480x480": 480,
+ "1000x1000": 1000
+ }
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ track_id = try_get(mobj, lambda x: x.group('track_id'))
+ if track_id is None:
+ title = mobj.group('title')
+ # uploader = mobj.group('uploader')
+ url = self._prepare_url(url, title)
+ track_data = self._resolve_url(url, title)
+ else: # API link
+ title = None
+ # uploader = None
+ track_data = self._api_request('/tracks/%s' % track_id, track_id)
+
+ if not isinstance(track_data, dict):
+ raise ExtractorError('Unexpected API response')
+
+ track_id = track_data.get('id')
+ if track_id is None:
+ raise ExtractorError('Unable to get ID of the track')
+
+ artworks_data = track_data.get('artwork')
+ thumbnails = []
+ if isinstance(artworks_data, dict):
+ for quality_key, thumbnail_url in artworks_data.items():
+ thumbnail = {
+ "url": thumbnail_url
+ }
+ quality_code = self._ARTWORK_MAP.get(quality_key)
+ if quality_code is not None:
+ thumbnail['preference'] = quality_code
+ thumbnails.append(thumbnail)
+
+ return {
+ 'id': track_id,
+ 'title': track_data.get('title', title),
+ 'url': '%s/v1/tracks/%s/stream' % (self._API_BASE, track_id),
+ 'ext': 'mp3',
+ 'description': track_data.get('description'),
+ 'duration': track_data.get('duration'),
+ 'track': track_data.get('title'),
+ 'artist': try_get(track_data, lambda x: x['user']['name'], compat_str),
+ 'genre': track_data.get('genre'),
+ 'thumbnails': thumbnails,
+ 'view_count': track_data.get('play_count'),
+ 'like_count': track_data.get('favorite_count'),
+ 'repost_count': track_data.get('repost_count'),
+ }
+
+
+class AudiusTrackIE(AudiusIE): # XXX: Do not subclass from concrete IE
+ _VALID_URL = r'''(?x)(?:audius:)(?:https?://(?:www\.)?.+/v1/tracks/)?(?P<track_id>\w+)'''
+ IE_NAME = 'audius:track'
+ IE_DESC = 'Audius track ID or API link. Prepend with "audius:"'
+ _TESTS = [
+ {
+ 'url': 'audius:9RWlo',
+ 'only_matching': True
+ },
+ {
+ 'url': 'audius:http://discoveryprovider.audius.prod-us-west-2.staked.cloud/v1/tracks/9RWlo',
+ 'only_matching': True
+ },
+ ]
+
+
+class AudiusPlaylistIE(AudiusBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?audius\.co/(?P<uploader>[\w\d-]+)/(?:album|playlist)/(?P<title>\S+)'
+ IE_NAME = 'audius:playlist'
+ IE_DESC = 'Audius.co playlists'
+ _TEST = {
+ 'url': 'https://audius.co/test_acc/playlist/test-playlist-22910',
+ 'info_dict': {
+ 'id': 'DNvjN',
+ 'title': 'test playlist',
+ 'description': 'Test description\n\nlol',
+ },
+ 'playlist_count': 175,
+ }
+
+ def _build_playlist(self, tracks):
+ entries = []
+ for track in tracks:
+ if not isinstance(track, dict):
+ raise ExtractorError('Unexpected API response')
+ track_id = str_or_none(track.get('id'))
+ if not track_id:
+ raise ExtractorError('Unable to get track ID from playlist')
+ entries.append(self.url_result(
+ 'audius:%s' % track_id,
+ ie=AudiusTrackIE.ie_key(), video_id=track_id))
+ return entries
+
+ def _real_extract(self, url):
+ self._select_api_base()
+ mobj = self._match_valid_url(url)
+ title = mobj.group('title')
+ # uploader = mobj.group('uploader')
+ url = self._prepare_url(url, title)
+ playlist_response = self._resolve_url(url, title)
+
+ if not isinstance(playlist_response, list) or len(playlist_response) != 1:
+ raise ExtractorError('Unexpected API response')
+
+ playlist_data = playlist_response[0]
+ if not isinstance(playlist_data, dict):
+ raise ExtractorError('Unexpected API response')
+
+ playlist_id = playlist_data.get('id')
+ if playlist_id is None:
+ raise ExtractorError('Unable to get playlist ID')
+
+ playlist_tracks = self._api_request(
+ '/playlists/%s/tracks' % playlist_id,
+ title, note='Downloading playlist tracks metadata',
+ errnote='Unable to download playlist tracks metadata')
+ if not isinstance(playlist_tracks, list):
+ raise ExtractorError('Unexpected API response')
+
+ entries = self._build_playlist(playlist_tracks)
+ return self.playlist_result(entries, playlist_id,
+ playlist_data.get('playlist_name', title),
+ playlist_data.get('description'))
+
+
+class AudiusProfileIE(AudiusPlaylistIE): # XXX: Do not subclass from concrete IE
+ IE_NAME = 'audius:artist'
+ IE_DESC = 'Audius.co profile/artist pages'
+ _VALID_URL = r'https?://(?:www)?audius\.co/(?P<id>[^\/]+)/?(?:[?#]|$)'
+ _TEST = {
+ 'url': 'https://audius.co/pzl/',
+ 'info_dict': {
+ 'id': 'ezRo7',
+ 'description': 'TAMALE\n\nContact: officialpzl@gmail.com',
+ 'title': 'pzl',
+ },
+ 'playlist_count': 24,
+ }
+
+ def _real_extract(self, url):
+ self._select_api_base()
+ profile_id = self._match_id(url)
+ try:
+ _profile_data = self._api_request('/full/users/handle/' + profile_id, profile_id)
+ except ExtractorError as e:
+ raise ExtractorError('Could not download profile info; ' + str(e))
+ profile_audius_id = _profile_data[0]['id']
+ profile_bio = _profile_data[0].get('bio')
+
+ api_call = self._api_request('/full/users/handle/%s/tracks' % profile_id, profile_id)
+ return self.playlist_result(self._build_playlist(api_call), profile_audius_id, profile_id, profile_bio)
diff --git a/yt_dlp/extractor/awaan.py b/yt_dlp/extractor/awaan.py
new file mode 100644
index 0000000..6fc938d
--- /dev/null
+++ b/yt_dlp/extractor/awaan.py
@@ -0,0 +1,184 @@
+import base64
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_urllib_parse_urlencode,
+ compat_str,
+)
+from ..utils import (
+ format_field,
+ int_or_none,
+ parse_iso8601,
+ smuggle_url,
+ unsmuggle_url,
+ urlencode_postdata,
+)
+
+
+class AWAANIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?show/(?P<show_id>\d+)/[^/]+(?:/(?P<id>\d+)/(?P<season_id>\d+))?'
+
+ def _real_extract(self, url):
+ show_id, video_id, season_id = self._match_valid_url(url).groups()
+ if video_id and int(video_id) > 0:
+ return self.url_result(
+ 'http://awaan.ae/media/%s' % video_id, 'AWAANVideo')
+ elif season_id and int(season_id) > 0:
+ return self.url_result(smuggle_url(
+ 'http://awaan.ae/program/season/%s' % season_id,
+ {'show_id': show_id}), 'AWAANSeason')
+ else:
+ return self.url_result(
+ 'http://awaan.ae/program/%s' % show_id, 'AWAANSeason')
+
+
+class AWAANBaseIE(InfoExtractor):
+ def _parse_video_data(self, video_data, video_id, is_live):
+ title = video_data.get('title_en') or video_data['title_ar']
+ img = video_data.get('img')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video_data.get('description_en') or video_data.get('description_ar'),
+ 'thumbnail': format_field(img, None, 'http://admin.mangomolo.com/analytics/%s'),
+ 'duration': int_or_none(video_data.get('duration')),
+ 'timestamp': parse_iso8601(video_data.get('create_time'), ' '),
+ 'is_live': is_live,
+ 'uploader_id': video_data.get('user_id'),
+ }
+
+
+class AWAANVideoIE(AWAANBaseIE):
+ IE_NAME = 'awaan:video'
+ _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?(?:video(?:/[^/]+)?|media|catchup/[^/]+/[^/]+)/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://www.dcndigital.ae/#/video/%D8%B1%D8%AD%D9%84%D8%A9-%D8%A7%D9%84%D8%B9%D9%85%D8%B1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/17375',
+ 'md5': '5f61c33bfc7794315c671a62d43116aa',
+ 'info_dict':
+ {
+ 'id': '17375',
+ 'ext': 'mp4',
+ 'title': 'رحلة العمر : الحلقة 1',
+ 'description': 'md5:0156e935d870acb8ef0a66d24070c6d6',
+ 'duration': 2041,
+ 'timestamp': 1227504126,
+ 'upload_date': '20081124',
+ 'uploader_id': '71',
+ },
+ }, {
+ 'url': 'http://awaan.ae/video/26723981/%D8%AF%D8%A7%D8%B1-%D8%A7%D9%84%D8%B3%D9%84%D8%A7%D9%85:-%D8%AE%D9%8A%D8%B1-%D8%AF%D9%88%D8%B1-%D8%A7%D9%84%D8%A3%D9%86%D8%B5%D8%A7%D8%B1',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video_data = self._download_json(
+ 'http://admin.mangomolo.com/analytics/index.php/plus/video?id=%s' % video_id,
+ video_id, headers={'Origin': 'http://awaan.ae'})
+ info = self._parse_video_data(video_data, video_id, False)
+
+ embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + compat_urllib_parse_urlencode({
+ 'id': video_data['id'],
+ 'user_id': video_data['user_id'],
+ 'signature': video_data['signature'],
+ 'countries': 'Q0M=',
+ 'filter': 'DENY',
+ })
+ info.update({
+ '_type': 'url_transparent',
+ 'url': embed_url,
+ 'ie_key': 'MangomoloVideo',
+ })
+ return info
+
+
+class AWAANLiveIE(AWAANBaseIE):
+ IE_NAME = 'awaan:live'
+ _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?live/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://awaan.ae/live/6/dubai-tv',
+ 'info_dict': {
+ 'id': '6',
+ 'ext': 'mp4',
+ 'title': 're:Dubai Al Oula [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'upload_date': '20150107',
+ 'timestamp': 1420588800,
+ 'uploader_id': '71',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+
+ channel_data = self._download_json(
+ 'http://admin.mangomolo.com/analytics/index.php/plus/getchanneldetails?channel_id=%s' % channel_id,
+ channel_id, headers={'Origin': 'http://awaan.ae'})
+ info = self._parse_video_data(channel_data, channel_id, True)
+
+ embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + compat_urllib_parse_urlencode({
+ 'id': base64.b64encode(channel_data['user_id'].encode()).decode(),
+ 'channelid': base64.b64encode(channel_data['id'].encode()).decode(),
+ 'signature': channel_data['signature'],
+ 'countries': 'Q0M=',
+ 'filter': 'DENY',
+ })
+ info.update({
+ '_type': 'url_transparent',
+ 'url': embed_url,
+ 'ie_key': 'MangomoloLive',
+ })
+ return info
+
+
+class AWAANSeasonIE(InfoExtractor):
+ IE_NAME = 'awaan:season'
+ _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?program/(?:(?P<show_id>\d+)|season/(?P<season_id>\d+))'
+ _TEST = {
+ 'url': 'http://dcndigital.ae/#/program/205024/%D9%85%D8%AD%D8%A7%D8%B6%D8%B1%D8%A7%D8%AA-%D8%A7%D9%84%D8%B4%D9%8A%D8%AE-%D8%A7%D9%84%D8%B4%D8%B9%D8%B1%D8%A7%D9%88%D9%8A',
+ 'info_dict':
+ {
+ 'id': '7910',
+ 'title': 'محاضرات الشيخ الشعراوي',
+ },
+ 'playlist_mincount': 27,
+ }
+
+ def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+ show_id, season_id = self._match_valid_url(url).groups()
+
+ data = {}
+ if season_id:
+ data['season'] = season_id
+ show_id = smuggled_data.get('show_id')
+ if show_id is None:
+ season = self._download_json(
+ 'http://admin.mangomolo.com/analytics/index.php/plus/season_info?id=%s' % season_id,
+ season_id, headers={'Origin': 'http://awaan.ae'})
+ show_id = season['id']
+ data['show_id'] = show_id
+ show = self._download_json(
+ 'http://admin.mangomolo.com/analytics/index.php/plus/show',
+ show_id, data=urlencode_postdata(data), headers={
+ 'Origin': 'http://awaan.ae',
+ 'Content-Type': 'application/x-www-form-urlencoded'
+ })
+ if not season_id:
+ season_id = show['default_season']
+ for season in show['seasons']:
+ if season['id'] == season_id:
+ title = season.get('title_en') or season['title_ar']
+
+ entries = []
+ for video in show['videos']:
+ video_id = compat_str(video['id'])
+ entries.append(self.url_result(
+ 'http://awaan.ae/media/%s' % video_id, 'AWAANVideo', video_id))
+
+ return self.playlist_result(entries, season_id, title)
diff --git a/yt_dlp/extractor/aws.py b/yt_dlp/extractor/aws.py
new file mode 100644
index 0000000..c4741a6
--- /dev/null
+++ b/yt_dlp/extractor/aws.py
@@ -0,0 +1,75 @@
+import datetime
+import hashlib
+import hmac
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse_urlencode
+
+
+class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
+ _AWS_ALGORITHM = 'AWS4-HMAC-SHA256'
+ _AWS_REGION = 'us-east-1'
+
+ def _aws_execute_api(self, aws_dict, video_id, query=None):
+ query = query or {}
+ amz_date = datetime.datetime.now(datetime.timezone.utc).strftime('%Y%m%dT%H%M%SZ')
+ date = amz_date[:8]
+ headers = {
+ 'Accept': 'application/json',
+ 'Host': self._AWS_PROXY_HOST,
+ 'X-Amz-Date': amz_date,
+ 'X-Api-Key': self._AWS_API_KEY
+ }
+ session_token = aws_dict.get('session_token')
+ if session_token:
+ headers['X-Amz-Security-Token'] = session_token
+
+ def aws_hash(s):
+ return hashlib.sha256(s.encode('utf-8')).hexdigest()
+
+ # Task 1: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html
+ canonical_querystring = compat_urllib_parse_urlencode(query)
+ canonical_headers = ''
+ for header_name, header_value in sorted(headers.items()):
+ canonical_headers += '%s:%s\n' % (header_name.lower(), header_value)
+ signed_headers = ';'.join([header.lower() for header in sorted(headers.keys())])
+ canonical_request = '\n'.join([
+ 'GET',
+ aws_dict['uri'],
+ canonical_querystring,
+ canonical_headers,
+ signed_headers,
+ aws_hash('')
+ ])
+
+ # Task 2: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-string-to-sign.html
+ credential_scope_list = [date, self._AWS_REGION, 'execute-api', 'aws4_request']
+ credential_scope = '/'.join(credential_scope_list)
+ string_to_sign = '\n'.join([self._AWS_ALGORITHM, amz_date, credential_scope, aws_hash(canonical_request)])
+
+ # Task 3: http://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html
+ def aws_hmac(key, msg):
+ return hmac.new(key, msg.encode('utf-8'), hashlib.sha256)
+
+ def aws_hmac_digest(key, msg):
+ return aws_hmac(key, msg).digest()
+
+ def aws_hmac_hexdigest(key, msg):
+ return aws_hmac(key, msg).hexdigest()
+
+ k_signing = ('AWS4' + aws_dict['secret_key']).encode('utf-8')
+ for value in credential_scope_list:
+ k_signing = aws_hmac_digest(k_signing, value)
+
+ signature = aws_hmac_hexdigest(k_signing, string_to_sign)
+
+ # Task 4: http://docs.aws.amazon.com/general/latest/gr/sigv4-add-signature-to-request.html
+ headers['Authorization'] = ', '.join([
+ '%s Credential=%s/%s' % (self._AWS_ALGORITHM, aws_dict['access_key'], credential_scope),
+ 'SignedHeaders=%s' % signed_headers,
+ 'Signature=%s' % signature,
+ ])
+
+ return self._download_json(
+ 'https://%s%s%s' % (self._AWS_PROXY_HOST, aws_dict['uri'], '?' + canonical_querystring if canonical_querystring else ''),
+ video_id, headers=headers)
diff --git a/yt_dlp/extractor/axs.py b/yt_dlp/extractor/axs.py
new file mode 100644
index 0000000..7e91667
--- /dev/null
+++ b/yt_dlp/extractor/axs.py
@@ -0,0 +1,89 @@
+from .common import InfoExtractor
+from ..utils import (
+ float_or_none,
+ js_to_json,
+ parse_iso8601,
+ traverse_obj,
+ url_or_none,
+)
+
+
+class AxsIE(InfoExtractor):
+ IE_NAME = 'axs.tv'
+ _VALID_URL = r'https?://(?:www\.)?axs\.tv/(?:channel/(?:[^/?#]+/)+)?video/(?P<id>[^/?#]+)'
+
+ _TESTS = [{
+ 'url': 'https://www.axs.tv/video/5f4dc776b70e4f1c194f22ef/',
+ 'md5': '8d97736ae8e50c64df528e5e676778cf',
+ 'info_dict': {
+ 'id': '5f4dc776b70e4f1c194f22ef',
+ 'title': 'Small Town',
+ 'ext': 'mp4',
+ 'description': 'md5:e314d28bfaa227a4d7ec965fae19997f',
+ 'upload_date': '20230602',
+ 'timestamp': 1685729564,
+ 'duration': 1284.216,
+ 'series': 'Rock & Roll Road Trip with Sammy Hagar',
+ 'season': 'Season 2',
+ 'season_number': 2,
+ 'episode': '3',
+ 'thumbnail': 'https://images.dotstudiopro.com/5f4e9d330a0c3b295a7e8394',
+ },
+ }, {
+ 'url': 'https://www.axs.tv/channel/rock-star-interview/video/daryl-hall',
+ 'md5': '300ae795cd8f9984652c0949734ffbdc',
+ 'info_dict': {
+ 'id': '5f488148b70e4f392572977c',
+ 'display_id': 'daryl-hall',
+ 'title': 'Daryl Hall',
+ 'ext': 'mp4',
+ 'description': 'md5:e54ecaa0f4b5683fc9259e9e4b196628',
+ 'upload_date': '20230214',
+ 'timestamp': 1676403615,
+ 'duration': 2570.668,
+ 'series': 'The Big Interview with Dan Rather',
+ 'season': 'Season 3',
+ 'season_number': 3,
+ 'episode': '5',
+ 'thumbnail': 'https://images.dotstudiopro.com/5f4d1901f340b50d937cec32',
+ },
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ webpage_json_data = self._search_json(
+ r'mountObj\s*=', webpage, 'video ID data', display_id,
+ transform_source=js_to_json)
+ video_id = webpage_json_data['video_id']
+ company_id = webpage_json_data['company_id']
+
+ meta = self._download_json(
+ f'https://api.myspotlight.tv/dotplayer/video/{company_id}/{video_id}',
+ video_id, query={'device_type': 'desktop_web'})['video']
+
+ formats = self._extract_m3u8_formats(
+ meta['video_m3u8'], video_id, 'mp4', m3u8_id='hls')
+
+ subtitles = {}
+ for cc in traverse_obj(meta, ('closeCaption', lambda _, v: url_or_none(v['srtPath']))):
+ subtitles.setdefault(cc.get('srtShortLang') or 'en', []).append(
+ {'ext': cc.get('srtExt'), 'url': cc['srtPath']})
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'formats': formats,
+ **traverse_obj(meta, {
+ 'title': ('title', {str}),
+ 'description': ('description', {str}),
+ 'series': ('seriestitle', {str}),
+ 'season_number': ('season', {int}),
+ 'episode': ('episode', {str}),
+ 'duration': ('duration', {float_or_none}),
+ 'timestamp': ('updated_at', {parse_iso8601}),
+ 'thumbnail': ('thumb', {url_or_none}),
+ }),
+ 'subtitles': subtitles,
+ }
diff --git a/yt_dlp/extractor/azmedien.py b/yt_dlp/extractor/azmedien.py
new file mode 100644
index 0000000..d1686ee
--- /dev/null
+++ b/yt_dlp/extractor/azmedien.py
@@ -0,0 +1,66 @@
+import json
+
+from .common import InfoExtractor
+from .kaltura import KalturaIE
+
+
+class AZMedienIE(InfoExtractor):
+ IE_DESC = 'AZ Medien videos'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.|tv\.)?
+ (?P<host>
+ telezueri\.ch|
+ telebaern\.tv|
+ telem1\.ch|
+ tvo-online\.ch
+ )/
+ [^/]+/
+ (?P<id>
+ [^/]+-(?P<article_id>\d+)
+ )
+ (?:
+ \#video=
+ (?P<kaltura_id>
+ [_0-9a-z]+
+ )
+ )?
+ '''
+
+ _TESTS = [{
+ 'url': 'https://tv.telezueri.ch/sonntalk/bundesrats-vakanzen-eu-rahmenabkommen-133214569',
+ 'info_dict': {
+ 'id': '1_anruz3wy',
+ 'ext': 'mp4',
+ 'title': 'Bundesrats-Vakanzen / EU-Rahmenabkommen',
+ 'uploader_id': 'TVOnline',
+ 'upload_date': '20180930',
+ 'timestamp': 1538328802,
+ 'view_count': int,
+ 'thumbnail': 'http://cfvod.kaltura.com/p/1719221/sp/171922100/thumbnail/entry_id/1_anruz3wy/version/100031',
+ 'duration': 1930
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.telebaern.tv/telebaern-news/montag-1-oktober-2018-ganze-sendung-133531189#video=0_7xjo9lf1',
+ 'only_matching': True
+ }]
+ _API_TEMPL = 'https://www.%s/api/pub/gql/%s/NewsArticleTeaser/a4016f65fe62b81dc6664dd9f4910e4ab40383be'
+ _PARTNER_ID = '1719221'
+
+ def _real_extract(self, url):
+ host, display_id, article_id, entry_id = self._match_valid_url(url).groups()
+
+ if not entry_id:
+ entry_id = self._download_json(
+ self._API_TEMPL % (host, host.split('.')[0]), display_id, query={
+ 'variables': json.dumps({
+ 'contextId': 'NewsArticle:' + article_id,
+ }),
+ })['data']['context']['mainAsset']['video']['kaltura']['kalturaId']
+
+ return self.url_result(
+ 'kaltura:%s:%s' % (self._PARTNER_ID, entry_id),
+ ie=KalturaIE.ie_key(), video_id=entry_id)
diff --git a/yt_dlp/extractor/baidu.py b/yt_dlp/extractor/baidu.py
new file mode 100644
index 0000000..8786d67
--- /dev/null
+++ b/yt_dlp/extractor/baidu.py
@@ -0,0 +1,51 @@
+from .common import InfoExtractor
+from ..utils import unescapeHTML
+
+
+class BaiduVideoIE(InfoExtractor):
+ IE_DESC = '百度视频'
+ _VALID_URL = r'https?://v\.baidu\.com/(?P<type>[a-z]+)/(?P<id>\d+)\.htm'
+ _TESTS = [{
+ 'url': 'http://v.baidu.com/comic/1069.htm?frp=bdbrand&q=%E4%B8%AD%E5%8D%8E%E5%B0%8F%E5%BD%93%E5%AE%B6',
+ 'info_dict': {
+ 'id': '1069',
+ 'title': '中华小当家 TV版国语',
+ 'description': 'md5:51be07afe461cf99fa61231421b5397c',
+ },
+ 'playlist_count': 52,
+ }, {
+ 'url': 'http://v.baidu.com/show/11595.htm?frp=bdbrand',
+ 'info_dict': {
+ 'id': '11595',
+ 'title': 're:^奔跑吧兄弟',
+ 'description': 'md5:1bf88bad6d850930f542d51547c089b8',
+ },
+ 'playlist_mincount': 12,
+ }]
+
+ def _call_api(self, path, category, playlist_id, note):
+ return self._download_json('http://app.video.baidu.com/%s/?worktype=adnative%s&id=%s' % (
+ path, category, playlist_id), playlist_id, note)
+
+ def _real_extract(self, url):
+ category, playlist_id = self._match_valid_url(url).groups()
+ if category == 'show':
+ category = 'tvshow'
+ if category == 'tv':
+ category = 'tvplay'
+
+ playlist_detail = self._call_api(
+ 'xqinfo', category, playlist_id, 'Download playlist JSON metadata')
+
+ playlist_title = playlist_detail['title']
+ playlist_description = unescapeHTML(playlist_detail.get('intro'))
+
+ episodes_detail = self._call_api(
+ 'xqsingle', category, playlist_id, 'Download episodes JSON metadata')
+
+ entries = [self.url_result(
+ episode['url'], video_title=episode['title']
+ ) for episode in episodes_detail['videos']]
+
+ return self.playlist_result(
+ entries, playlist_id, playlist_title, playlist_description)
diff --git a/yt_dlp/extractor/banbye.py b/yt_dlp/extractor/banbye.py
new file mode 100644
index 0000000..67af29a
--- /dev/null
+++ b/yt_dlp/extractor/banbye.py
@@ -0,0 +1,168 @@
+import math
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_urllib_parse_urlparse,
+ compat_parse_qs,
+)
+from ..utils import (
+ format_field,
+ InAdvancePagedList,
+ traverse_obj,
+ unified_timestamp,
+)
+
+
+class BanByeBaseIE(InfoExtractor):
+ _API_BASE = 'https://api.banbye.com'
+ _CDN_BASE = 'https://cdn.banbye.com'
+ _VIDEO_BASE = 'https://banbye.com/watch'
+
+ @staticmethod
+ def _extract_playlist_id(url, param='playlist'):
+ return compat_parse_qs(
+ compat_urllib_parse_urlparse(url).query).get(param, [None])[0]
+
+ def _extract_playlist(self, playlist_id):
+ data = self._download_json(f'{self._API_BASE}/playlists/{playlist_id}', playlist_id)
+ return self.playlist_result([
+ self.url_result(f'{self._VIDEO_BASE}/{video_id}', BanByeIE)
+ for video_id in data['videoIds']], playlist_id, data.get('name'))
+
+
+class BanByeIE(BanByeBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?banbye\.com/(?:en/)?watch/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://banbye.com/watch/v_ytfmvkVYLE8T',
+ 'md5': '2f4ea15c5ca259a73d909b2cfd558eb5',
+ 'info_dict': {
+ 'id': 'v_ytfmvkVYLE8T',
+ 'ext': 'mp4',
+ 'title': 'md5:5ec098f88a0d796f987648de6322ba0f',
+ 'description': 'md5:4d94836e73396bc18ef1fa0f43e5a63a',
+ 'uploader': 'wRealu24',
+ 'channel_id': 'ch_wrealu24',
+ 'channel_url': 'https://banbye.com/channel/ch_wrealu24',
+ 'timestamp': 1647604800,
+ 'upload_date': '20220318',
+ 'duration': 1931,
+ 'thumbnail': r're:https?://.*\.webp',
+ 'tags': 'count:5',
+ 'like_count': int,
+ 'dislike_count': int,
+ 'view_count': int,
+ 'comment_count': int,
+ },
+ }, {
+ 'url': 'https://banbye.com/watch/v_2JjQtqjKUE_F?playlistId=p_Ld82N6gBw_OJ',
+ 'info_dict': {
+ 'title': 'Krzysztof Karoń',
+ 'id': 'p_Ld82N6gBw_OJ',
+ },
+ 'playlist_mincount': 9,
+ }, {
+ 'url': 'https://banbye.com/watch/v_kb6_o1Kyq-CD',
+ 'info_dict': {
+ 'id': 'v_kb6_o1Kyq-CD',
+ 'ext': 'mp4',
+ 'title': 'Co tak naprawdę dzieje się we Francji?! Czy Warszawa a potem cała Polska będzie drugim Paryżem?!🤔🇵🇱',
+ 'description': 'md5:82be4c0e13eae8ea1ca8b9f2e07226a8',
+ 'uploader': 'Marcin Rola - MOIM ZDANIEM!🇵🇱',
+ 'channel_id': 'ch_QgWnHvDG2fo5',
+ 'channel_url': 'https://banbye.com/channel/ch_QgWnHvDG2fo5',
+ 'duration': 597,
+ 'timestamp': 1688642656,
+ 'upload_date': '20230706',
+ 'thumbnail': 'https://cdn.banbye.com/video/v_kb6_o1Kyq-CD/96.webp',
+ 'tags': ['Paryż', 'Francja', 'Polska', 'Imigranci', 'Morawiecki', 'Tusk'],
+ 'like_count': int,
+ 'dislike_count': int,
+ 'view_count': int,
+ 'comment_count': int,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ playlist_id = self._extract_playlist_id(url, 'playlistId')
+
+ if self._yes_playlist(playlist_id, video_id):
+ return self._extract_playlist(playlist_id)
+
+ data = self._download_json(f'{self._API_BASE}/videos/{video_id}', video_id)
+ thumbnails = [{
+ 'id': f'{quality}p',
+ 'url': f'{self._CDN_BASE}/video/{video_id}/{quality}.webp',
+ } for quality in [48, 96, 144, 240, 512, 1080]]
+ formats = [{
+ 'format_id': f'http-{quality}p',
+ 'quality': quality,
+ 'url': f'{self._CDN_BASE}/video/{video_id}/{quality}.mp4',
+ } for quality in data['quality']]
+
+ return {
+ 'id': video_id,
+ 'title': data.get('title'),
+ 'description': data.get('desc'),
+ 'uploader': traverse_obj(data, ('channel', 'name')),
+ 'channel_id': data.get('channelId'),
+ 'channel_url': format_field(data, 'channelId', 'https://banbye.com/channel/%s'),
+ 'timestamp': unified_timestamp(data.get('publishedAt')),
+ 'duration': data.get('duration'),
+ 'tags': data.get('tags'),
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'like_count': data.get('likes'),
+ 'dislike_count': data.get('dislikes'),
+ 'view_count': data.get('views'),
+ 'comment_count': data.get('commentCount'),
+ }
+
+
+class BanByeChannelIE(BanByeBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?banbye\.com/(?:en/)?channel/(?P<id>\w+)'
+ _TESTS = [{
+ 'url': 'https://banbye.com/channel/ch_wrealu24',
+ 'info_dict': {
+ 'title': 'wRealu24',
+ 'id': 'ch_wrealu24',
+ 'description': 'md5:da54e48416b74dfdde20a04867c0c2f6',
+ },
+ 'playlist_mincount': 791,
+ }, {
+ 'url': 'https://banbye.com/channel/ch_wrealu24?playlist=p_Ld82N6gBw_OJ',
+ 'info_dict': {
+ 'title': 'Krzysztof Karoń',
+ 'id': 'p_Ld82N6gBw_OJ',
+ },
+ 'playlist_count': 9,
+ }]
+ _PAGE_SIZE = 100
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+ playlist_id = self._extract_playlist_id(url)
+
+ if playlist_id:
+ return self._extract_playlist(playlist_id)
+
+ def page_func(page_num):
+ data = self._download_json(f'{self._API_BASE}/videos', channel_id, query={
+ 'channelId': channel_id,
+ 'sort': 'new',
+ 'limit': self._PAGE_SIZE,
+ 'offset': page_num * self._PAGE_SIZE,
+ }, note=f'Downloading page {page_num + 1}')
+ return [
+ self.url_result(f"{self._VIDEO_BASE}/{video['_id']}", BanByeIE)
+ for video in data['items']
+ ]
+
+ channel_data = self._download_json(f'{self._API_BASE}/channels/{channel_id}', channel_id)
+ entries = InAdvancePagedList(
+ page_func,
+ math.ceil(channel_data['videoCount'] / self._PAGE_SIZE),
+ self._PAGE_SIZE)
+
+ return self.playlist_result(
+ entries, channel_id, channel_data.get('name'), channel_data.get('description'))
diff --git a/yt_dlp/extractor/bandaichannel.py b/yt_dlp/extractor/bandaichannel.py
new file mode 100644
index 0000000..d7fcf44
--- /dev/null
+++ b/yt_dlp/extractor/bandaichannel.py
@@ -0,0 +1,33 @@
+from .brightcove import BrightcoveNewBaseIE
+from ..utils import extract_attributes
+
+
+class BandaiChannelIE(BrightcoveNewBaseIE):
+ IE_NAME = 'bandaichannel'
+ _VALID_URL = r'https?://(?:www\.)?b-ch\.com/titles/(?P<id>\d+/\d+)'
+ _TESTS = [{
+ 'url': 'https://www.b-ch.com/titles/514/001',
+ 'md5': 'a0f2d787baa5729bed71108257f613a4',
+ 'info_dict': {
+ 'id': '6128044564001',
+ 'ext': 'mp4',
+ 'title': 'メタルファイターMIKU 第1話',
+ 'timestamp': 1580354056,
+ 'uploader_id': '5797077852001',
+ 'upload_date': '20200130',
+ 'duration': 1387.733,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ attrs = extract_attributes(self._search_regex(
+ r'(<video-js[^>]+\bid="bcplayer"[^>]*>)', webpage, 'player'))
+ bc = self._download_json(
+ 'https://pbifcd.b-ch.com/v1/playbackinfo/ST/70/' + attrs['data-info'],
+ video_id, headers={'X-API-KEY': attrs['data-auth'].strip()})['bc']
+ return self._parse_brightcove_metadata(bc, bc['id'])
diff --git a/yt_dlp/extractor/bandcamp.py b/yt_dlp/extractor/bandcamp.py
new file mode 100644
index 0000000..e89b3a6
--- /dev/null
+++ b/yt_dlp/extractor/bandcamp.py
@@ -0,0 +1,485 @@
+import random
+import re
+import time
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ KNOWN_EXTENSIONS,
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ parse_filesize,
+ str_or_none,
+ try_get,
+ unified_strdate,
+ unified_timestamp,
+ update_url_query,
+ url_or_none,
+ urljoin,
+)
+
+
+class BandcampIE(InfoExtractor):
+ _VALID_URL = r'https?://(?P<uploader>[^/]+)\.bandcamp\.com/track/(?P<id>[^/?#&]+)'
+ _EMBED_REGEX = [r'<meta property="og:url"[^>]*?content="(?P<url>.*?bandcamp\.com.*?)"']
+ _TESTS = [{
+ 'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
+ 'md5': 'c557841d5e50261777a6585648adf439',
+ 'info_dict': {
+ 'id': '1812978515',
+ 'ext': 'mp3',
+ 'title': 'youtube-dl "\'/\\ä↭ - youtube-dl "\'/\\ä↭ - youtube-dl test song "\'/\\ä↭',
+ 'duration': 9.8485,
+ 'uploader': 'youtube-dl "\'/\\ä↭',
+ 'upload_date': '20121129',
+ 'timestamp': 1354224127,
+ 'track': 'youtube-dl "\'/\\ä↭ - youtube-dl test song "\'/\\ä↭',
+ 'album_artist': 'youtube-dl "\'/\\ä↭',
+ 'track_id': '1812978515',
+ 'artist': 'youtube-dl "\'/\\ä↭',
+ 'uploader_url': 'https://youtube-dl.bandcamp.com',
+ 'uploader_id': 'youtube-dl',
+ 'thumbnail': 'https://f4.bcbits.com/img/a3216802731_5.jpg',
+ },
+ '_skip': 'There is a limit of 200 free downloads / month for the test song'
+ }, {
+ # free download
+ 'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
+ 'info_dict': {
+ 'id': '2650410135',
+ 'ext': 'm4a',
+ 'acodec': r're:[fa]lac',
+ 'title': 'Ben Prunty - Lanius (Battle)',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'Ben Prunty',
+ 'timestamp': 1396508491,
+ 'upload_date': '20140403',
+ 'release_timestamp': 1396483200,
+ 'release_date': '20140403',
+ 'duration': 260.877,
+ 'track': 'Lanius (Battle)',
+ 'track_number': 1,
+ 'track_id': '2650410135',
+ 'artist': 'Ben Prunty',
+ 'album_artist': 'Ben Prunty',
+ 'album': 'FTL: Advanced Edition Soundtrack',
+ 'uploader_url': 'https://benprunty.bandcamp.com',
+ 'uploader_id': 'benprunty',
+ },
+ }, {
+ # no free download, mp3 128
+ 'url': 'https://relapsealumni.bandcamp.com/track/hail-to-fire',
+ 'md5': 'fec12ff55e804bb7f7ebeb77a800c8b7',
+ 'info_dict': {
+ 'id': '2584466013',
+ 'ext': 'mp3',
+ 'title': 'Mastodon - Hail to Fire',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'Mastodon',
+ 'timestamp': 1322005399,
+ 'upload_date': '20111122',
+ 'release_timestamp': 1076112000,
+ 'release_date': '20040207',
+ 'duration': 120.79,
+ 'track': 'Hail to Fire',
+ 'track_number': 5,
+ 'track_id': '2584466013',
+ 'artist': 'Mastodon',
+ 'album_artist': 'Mastodon',
+ 'album': 'Call of the Mastodon',
+ 'uploader_url': 'https://relapsealumni.bandcamp.com',
+ 'uploader_id': 'relapsealumni',
+ },
+ }, {
+ # track from compilation album (artist/album_artist difference)
+ 'url': 'https://diskotopia.bandcamp.com/track/safehouse',
+ 'md5': '19c5337bca1428afa54129f86a2f6a69',
+ 'info_dict': {
+ 'id': '1978174799',
+ 'ext': 'mp3',
+ 'title': 'submerse - submerse - Safehouse',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'submerse',
+ 'timestamp': 1480779297,
+ 'upload_date': '20161203',
+ 'release_timestamp': 1481068800,
+ 'release_date': '20161207',
+ 'duration': 154.066,
+ 'track': 'submerse - Safehouse',
+ 'track_number': 3,
+ 'track_id': '1978174799',
+ 'artist': 'submerse',
+ 'album_artist': 'Diskotopia',
+ 'album': 'DSK F/W 2016-2017 Free Compilation',
+ 'uploader_url': 'https://diskotopia.bandcamp.com',
+ 'uploader_id': 'diskotopia',
+ },
+ }]
+
+ def _extract_data_attr(self, webpage, video_id, attr='tralbum', fatal=True):
+ return self._parse_json(self._html_search_regex(
+ r'data-%s=(["\'])({.+?})\1' % attr, webpage,
+ attr + ' data', group=2), video_id, fatal=fatal)
+
+ def _real_extract(self, url):
+ title, uploader = self._match_valid_url(url).group('id', 'uploader')
+ webpage = self._download_webpage(url, title)
+ tralbum = self._extract_data_attr(webpage, title)
+ thumbnail = self._og_search_thumbnail(webpage)
+
+ track_id = None
+ track = None
+ track_number = None
+ duration = None
+
+ formats = []
+ track_info = try_get(tralbum, lambda x: x['trackinfo'][0], dict)
+ if track_info:
+ file_ = track_info.get('file')
+ if isinstance(file_, dict):
+ for format_id, format_url in file_.items():
+ if not url_or_none(format_url):
+ continue
+ ext, abr_str = format_id.split('-', 1)
+ formats.append({
+ 'format_id': format_id,
+ 'url': self._proto_relative_url(format_url, 'http:'),
+ 'ext': ext,
+ 'vcodec': 'none',
+ 'acodec': ext,
+ 'abr': int_or_none(abr_str),
+ })
+ track = track_info.get('title')
+ track_id = str_or_none(
+ track_info.get('track_id') or track_info.get('id'))
+ track_number = int_or_none(track_info.get('track_num'))
+ duration = float_or_none(track_info.get('duration'))
+
+ embed = self._extract_data_attr(webpage, title, 'embed', False)
+ current = tralbum.get('current') or {}
+ artist = embed.get('artist') or current.get('artist') or tralbum.get('artist')
+ album_artist = self._html_search_regex(
+ r'<h3 class="albumTitle">[\S\s]*?by\s*<span>\s*<a href="[^>]+">\s*([^>]+?)\s*</a>',
+ webpage, 'album artist', fatal=False)
+ timestamp = unified_timestamp(
+ current.get('publish_date') or tralbum.get('album_publish_date'))
+
+ download_link = tralbum.get('freeDownloadPage')
+ if download_link:
+ track_id = compat_str(tralbum['id'])
+
+ download_webpage = self._download_webpage(
+ download_link, track_id, 'Downloading free downloads page')
+
+ blob = self._extract_data_attr(download_webpage, track_id, 'blob')
+
+ info = try_get(
+ blob, (lambda x: x['digital_items'][0],
+ lambda x: x['download_items'][0]), dict)
+ if info:
+ downloads = info.get('downloads')
+ if isinstance(downloads, dict):
+ if not track:
+ track = info.get('title')
+ if not artist:
+ artist = info.get('artist')
+ if not thumbnail:
+ thumbnail = info.get('thumb_url')
+
+ download_formats = {}
+ download_formats_list = blob.get('download_formats')
+ if isinstance(download_formats_list, list):
+ for f in blob['download_formats']:
+ name, ext = f.get('name'), f.get('file_extension')
+ if all(isinstance(x, compat_str) for x in (name, ext)):
+ download_formats[name] = ext.strip('.')
+
+ for format_id, f in downloads.items():
+ format_url = f.get('url')
+ if not format_url:
+ continue
+ # Stat URL generation algorithm is reverse engineered from
+ # download_*_bundle_*.js
+ stat_url = update_url_query(
+ format_url.replace('/download/', '/statdownload/'), {
+ '.rand': int(time.time() * 1000 * random.random()),
+ })
+ format_id = f.get('encoding_name') or format_id
+ stat = self._download_json(
+ stat_url, track_id, 'Downloading %s JSON' % format_id,
+ transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1],
+ fatal=False)
+ if not stat:
+ continue
+ retry_url = url_or_none(stat.get('retry_url'))
+ if not retry_url:
+ continue
+ formats.append({
+ 'url': self._proto_relative_url(retry_url, 'http:'),
+ 'ext': download_formats.get(format_id),
+ 'format_id': format_id,
+ 'format_note': f.get('description'),
+ 'filesize': parse_filesize(f.get('size_mb')),
+ 'vcodec': 'none',
+ 'acodec': format_id.split('-')[0],
+ })
+
+ title = '%s - %s' % (artist, track) if artist else track
+
+ if not duration:
+ duration = float_or_none(self._html_search_meta(
+ 'duration', webpage, default=None))
+
+ return {
+ 'id': track_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'uploader': artist,
+ 'uploader_id': uploader,
+ 'uploader_url': f'https://{uploader}.bandcamp.com',
+ 'timestamp': timestamp,
+ 'release_timestamp': unified_timestamp(tralbum.get('album_release_date')),
+ 'duration': duration,
+ 'track': track,
+ 'track_number': track_number,
+ 'track_id': track_id,
+ 'artist': artist,
+ 'album': embed.get('album_title'),
+ 'album_artist': album_artist,
+ 'formats': formats,
+ }
+
+
+class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE
+ IE_NAME = 'Bandcamp:album'
+ _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com/album/(?P<id>[^/?#&]+)'
+
+ _TESTS = [{
+ 'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
+ 'playlist': [
+ {
+ 'md5': '39bc1eded3476e927c724321ddf116cf',
+ 'info_dict': {
+ 'id': '1353101989',
+ 'ext': 'mp3',
+ 'title': 'Blazo - Intro',
+ 'timestamp': 1311756226,
+ 'upload_date': '20110727',
+ 'uploader': 'Blazo',
+ }
+ },
+ {
+ 'md5': '1a2c32e2691474643e912cc6cd4bffaa',
+ 'info_dict': {
+ 'id': '38097443',
+ 'ext': 'mp3',
+ 'title': 'Blazo - Kero One - Keep It Alive (Blazo remix)',
+ 'timestamp': 1311757238,
+ 'upload_date': '20110727',
+ 'uploader': 'Blazo',
+ }
+ },
+ ],
+ 'info_dict': {
+ 'title': 'Jazz Format Mixtape vol.1',
+ 'id': 'jazz-format-mixtape-vol-1',
+ 'uploader_id': 'blazo',
+ },
+ 'params': {
+ 'playlistend': 2
+ },
+ 'skip': 'Bandcamp imposes download limits.'
+ }, {
+ 'url': 'http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave',
+ 'info_dict': {
+ 'title': 'Hierophany of the Open Grave',
+ 'uploader_id': 'nightbringer',
+ 'id': 'hierophany-of-the-open-grave',
+ },
+ 'playlist_mincount': 9,
+ }, {
+ # with escaped quote in title
+ 'url': 'https://jstrecords.bandcamp.com/album/entropy-ep',
+ 'info_dict': {
+ 'title': '"Entropy" EP',
+ 'uploader_id': 'jstrecords',
+ 'id': 'entropy-ep',
+ 'description': 'md5:0ff22959c943622972596062f2f366a5',
+ },
+ 'playlist_mincount': 3,
+ }, {
+ # not all tracks have songs
+ 'url': 'https://insulters.bandcamp.com/album/we-are-the-plague',
+ 'info_dict': {
+ 'id': 'we-are-the-plague',
+ 'title': 'WE ARE THE PLAGUE',
+ 'uploader_id': 'insulters',
+ 'description': 'md5:b3cf845ee41b2b1141dc7bde9237255f',
+ },
+ 'playlist_count': 2,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return (False
+ if BandcampWeeklyIE.suitable(url) or BandcampIE.suitable(url)
+ else super(BandcampAlbumIE, cls).suitable(url))
+
+ def _real_extract(self, url):
+ uploader_id, album_id = self._match_valid_url(url).groups()
+ playlist_id = album_id or uploader_id
+ webpage = self._download_webpage(url, playlist_id)
+ tralbum = self._extract_data_attr(webpage, playlist_id)
+ track_info = tralbum.get('trackinfo')
+ if not track_info:
+ raise ExtractorError('The page doesn\'t contain any tracks')
+ # Only tracks with duration info have songs
+ entries = [
+ self.url_result(
+ urljoin(url, t['title_link']), BandcampIE.ie_key(),
+ str_or_none(t.get('track_id') or t.get('id')), t.get('title'))
+ for t in track_info
+ if t.get('duration')]
+
+ current = tralbum.get('current') or {}
+
+ return {
+ '_type': 'playlist',
+ 'uploader_id': uploader_id,
+ 'id': playlist_id,
+ 'title': current.get('title'),
+ 'description': current.get('about'),
+ 'entries': entries,
+ }
+
+
+class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE
+ IE_NAME = 'Bandcamp:weekly'
+ _VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://bandcamp.com/?show=224',
+ 'md5': 'b00df799c733cf7e0c567ed187dea0fd',
+ 'info_dict': {
+ 'id': '224',
+ 'ext': 'opus',
+ 'title': 'BC Weekly April 4th 2017 - Magic Moments',
+ 'description': 'md5:5d48150916e8e02d030623a48512c874',
+ 'duration': 5829.77,
+ 'release_date': '20170404',
+ 'series': 'Bandcamp Weekly',
+ 'episode': 'Magic Moments',
+ 'episode_id': '224',
+ },
+ 'params': {
+ 'format': 'opus-lo',
+ },
+ }, {
+ 'url': 'https://bandcamp.com/?blah/blah@&show=228',
+ 'only_matching': True
+ }]
+
+ def _real_extract(self, url):
+ show_id = self._match_id(url)
+ webpage = self._download_webpage(url, show_id)
+
+ blob = self._extract_data_attr(webpage, show_id, 'blob')
+
+ show = blob['bcw_data'][show_id]
+
+ formats = []
+ for format_id, format_url in show['audio_stream'].items():
+ if not url_or_none(format_url):
+ continue
+ for known_ext in KNOWN_EXTENSIONS:
+ if known_ext in format_id:
+ ext = known_ext
+ break
+ else:
+ ext = None
+ formats.append({
+ 'format_id': format_id,
+ 'url': format_url,
+ 'ext': ext,
+ 'vcodec': 'none',
+ })
+
+ title = show.get('audio_title') or 'Bandcamp Weekly'
+ subtitle = show.get('subtitle')
+ if subtitle:
+ title += ' - %s' % subtitle
+
+ return {
+ 'id': show_id,
+ 'title': title,
+ 'description': show.get('desc') or show.get('short_desc'),
+ 'duration': float_or_none(show.get('audio_duration')),
+ 'is_live': False,
+ 'release_date': unified_strdate(show.get('published_date')),
+ 'series': 'Bandcamp Weekly',
+ 'episode': show.get('subtitle'),
+ 'episode_id': show_id,
+ 'formats': formats
+ }
+
+
+class BandcampUserIE(InfoExtractor):
+ IE_NAME = 'Bandcamp:user'
+ _VALID_URL = r'https?://(?!www\.)(?P<id>[^.]+)\.bandcamp\.com(?:/music)?/?(?:[#?]|$)'
+
+ _TESTS = [{
+ # Type 1 Bandcamp user page.
+ 'url': 'https://adrianvonziegler.bandcamp.com',
+ 'info_dict': {
+ 'id': 'adrianvonziegler',
+ 'title': 'Discography of adrianvonziegler',
+ },
+ 'playlist_mincount': 23,
+ }, {
+ # Bandcamp user page with only one album
+ 'url': 'http://dotscale.bandcamp.com',
+ 'info_dict': {
+ 'id': 'dotscale',
+ 'title': 'Discography of dotscale'
+ },
+ 'playlist_count': 1,
+ }, {
+ # Type 2 Bandcamp user page.
+ 'url': 'https://nightcallofficial.bandcamp.com',
+ 'info_dict': {
+ 'id': 'nightcallofficial',
+ 'title': 'Discography of nightcallofficial',
+ },
+ 'playlist_count': 4,
+ }, {
+ 'url': 'https://steviasphere.bandcamp.com/music',
+ 'playlist_mincount': 47,
+ 'info_dict': {
+ 'id': 'steviasphere',
+ 'title': 'Discography of steviasphere',
+ },
+ }, {
+ 'url': 'https://coldworldofficial.bandcamp.com/music',
+ 'playlist_mincount': 10,
+ 'info_dict': {
+ 'id': 'coldworldofficial',
+ 'title': 'Discography of coldworldofficial',
+ },
+ }, {
+ 'url': 'https://nuclearwarnowproductions.bandcamp.com/music',
+ 'playlist_mincount': 399,
+ 'info_dict': {
+ 'id': 'nuclearwarnowproductions',
+ 'title': 'Discography of nuclearwarnowproductions',
+ },
+ }]
+
+ def _real_extract(self, url):
+ uploader = self._match_id(url)
+ webpage = self._download_webpage(url, uploader)
+
+ discography_data = (re.findall(r'<li data-item-id=["\'][^>]+>\s*<a href=["\'](?![^"\'/]*?/merch)([^"\']+)', webpage)
+ or re.findall(r'<div[^>]+trackTitle["\'][^"\']+["\']([^"\']+)', webpage))
+
+ return self.playlist_from_matches(
+ discography_data, uploader, f'Discography of {uploader}', getter=lambda x: urljoin(url, x))
diff --git a/yt_dlp/extractor/bannedvideo.py b/yt_dlp/extractor/bannedvideo.py
new file mode 100644
index 0000000..51e7220
--- /dev/null
+++ b/yt_dlp/extractor/bannedvideo.py
@@ -0,0 +1,155 @@
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ try_get,
+ int_or_none,
+ url_or_none,
+ float_or_none,
+ unified_timestamp,
+)
+
+
+class BannedVideoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?banned\.video/watch\?id=(?P<id>[0-f]{24})'
+ _TESTS = [{
+ 'url': 'https://banned.video/watch?id=5e7a859644e02200c6ef5f11',
+ 'md5': '14b6e81d41beaaee2215cd75c6ed56e4',
+ 'info_dict': {
+ 'id': '5e7a859644e02200c6ef5f11',
+ 'ext': 'mp4',
+ 'title': 'China Discovers Origin of Corona Virus: Issues Emergency Statement',
+ 'thumbnail': r're:^https?://(?:www\.)?assets\.infowarsmedia.com/images/',
+ 'description': 'md5:560d96f02abbebe6c6b78b47465f6b28',
+ 'upload_date': '20200324',
+ 'timestamp': 1585087895,
+ }
+ }]
+
+ _GRAPHQL_GETMETADATA_QUERY = '''
+query GetVideoAndComments($id: String!) {
+ getVideo(id: $id) {
+ streamUrl
+ directUrl
+ unlisted
+ live
+ tags {
+ name
+ }
+ title
+ summary
+ playCount
+ largeImage
+ videoDuration
+ channel {
+ _id
+ title
+ }
+ createdAt
+ }
+ getVideoComments(id: $id, limit: 999999, offset: 0) {
+ _id
+ content
+ user {
+ _id
+ username
+ }
+ voteCount {
+ positive
+ }
+ createdAt
+ replyCount
+ }
+}'''
+
+ _GRAPHQL_GETCOMMENTSREPLIES_QUERY = '''
+query GetCommentReplies($id: String!) {
+ getCommentReplies(id: $id, limit: 999999, offset: 0) {
+ _id
+ content
+ user {
+ _id
+ username
+ }
+ voteCount {
+ positive
+ }
+ createdAt
+ replyCount
+ }
+}'''
+
+ _GRAPHQL_QUERIES = {
+ 'GetVideoAndComments': _GRAPHQL_GETMETADATA_QUERY,
+ 'GetCommentReplies': _GRAPHQL_GETCOMMENTSREPLIES_QUERY,
+ }
+
+ def _call_api(self, video_id, id, operation, note):
+ return self._download_json(
+ 'https://api.infowarsmedia.com/graphql', video_id, note=note,
+ headers={
+ 'Content-Type': 'application/json; charset=utf-8'
+ }, data=json.dumps({
+ 'variables': {'id': id},
+ 'operationName': operation,
+ 'query': self._GRAPHQL_QUERIES[operation]
+ }).encode('utf8')).get('data')
+
+ def _get_comments(self, video_id, comments, comment_data):
+ yield from comments
+ for comment in comment_data.copy():
+ comment_id = comment.get('_id')
+ if comment.get('replyCount') > 0:
+ reply_json = self._call_api(
+ video_id, comment_id, 'GetCommentReplies',
+ f'Downloading replies for comment {comment_id}')
+ for reply in reply_json.get('getCommentReplies'):
+ yield self._parse_comment(reply, comment_id)
+
+ @staticmethod
+ def _parse_comment(comment_data, parent):
+ return {
+ 'id': comment_data.get('_id'),
+ 'text': comment_data.get('content'),
+ 'author': try_get(comment_data, lambda x: x['user']['username']),
+ 'author_id': try_get(comment_data, lambda x: x['user']['_id']),
+ 'timestamp': unified_timestamp(comment_data.get('createdAt')),
+ 'parent': parent,
+ 'like_count': try_get(comment_data, lambda x: x['voteCount']['positive']),
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video_json = self._call_api(video_id, video_id, 'GetVideoAndComments', 'Downloading video metadata')
+ video_info = video_json['getVideo']
+ is_live = video_info.get('live')
+ comments = [self._parse_comment(comment, 'root') for comment in video_json.get('getVideoComments')]
+
+ formats = [{
+ 'format_id': 'direct',
+ 'quality': 1,
+ 'url': video_info.get('directUrl'),
+ 'ext': 'mp4',
+ }] if url_or_none(video_info.get('directUrl')) else []
+ if video_info.get('streamUrl'):
+ formats.extend(self._extract_m3u8_formats(
+ video_info.get('streamUrl'), video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls', live=True))
+
+ return {
+ 'id': video_id,
+ 'title': video_info.get('title')[:-1],
+ 'formats': formats,
+ 'is_live': is_live,
+ 'description': video_info.get('summary'),
+ 'channel': try_get(video_info, lambda x: x['channel']['title']),
+ 'channel_id': try_get(video_info, lambda x: x['channel']['_id']),
+ 'view_count': int_or_none(video_info.get('playCount')),
+ 'thumbnail': url_or_none(video_info.get('largeImage')),
+ 'duration': float_or_none(video_info.get('videoDuration')),
+ 'timestamp': unified_timestamp(video_info.get('createdAt')),
+ 'tags': [tag.get('name') for tag in video_info.get('tags')],
+ 'availability': self._availability(is_unlisted=video_info.get('unlisted')),
+ 'comments': comments,
+ '__post_extractor': self.extract_comments(video_id, comments, video_json.get('getVideoComments'))
+ }
diff --git a/yt_dlp/extractor/bbc.py b/yt_dlp/extractor/bbc.py
new file mode 100644
index 0000000..015af9e
--- /dev/null
+++ b/yt_dlp/extractor/bbc.py
@@ -0,0 +1,1660 @@
+import functools
+import itertools
+import json
+import re
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+from ..compat import compat_str, compat_urlparse
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ ExtractorError,
+ OnDemandPagedList,
+ clean_html,
+ dict_get,
+ float_or_none,
+ get_element_by_class,
+ int_or_none,
+ join_nonempty,
+ js_to_json,
+ parse_duration,
+ parse_iso8601,
+ parse_qs,
+ strip_or_none,
+ traverse_obj,
+ try_get,
+ unescapeHTML,
+ unified_timestamp,
+ url_or_none,
+ urlencode_postdata,
+ urljoin,
+)
+
+
+class BBCCoUkIE(InfoExtractor):
+ IE_NAME = 'bbc.co.uk'
+ IE_DESC = 'BBC iPlayer'
+ _ID_REGEX = r'(?:[pbml][\da-z]{7}|w[\da-z]{7,14})'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?bbc\.co\.uk/
+ (?:
+ programmes/(?!articles/)|
+ iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
+ music/(?:clips|audiovideo/popular)[/#]|
+ radio/player/|
+ events/[^/]+/play/[^/]+/
+ )
+ (?P<id>%s)(?!/(?:episodes|broadcasts|clips))
+ ''' % _ID_REGEX
+ _EMBED_REGEX = [r'setPlaylist\("(?P<url>https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)']
+
+ _LOGIN_URL = 'https://account.bbc.com/signin'
+ _NETRC_MACHINE = 'bbc'
+
+ _MEDIA_SELECTOR_URL_TEMPL = 'https://open.live.bbc.co.uk/mediaselector/6/select/version/2.0/mediaset/%s/vpid/%s'
+ _MEDIA_SETS = [
+ # Provides HQ HLS streams with even better quality that pc mediaset but fails
+ # with geolocation in some cases when it's even not geo restricted at all (e.g.
+ # http://www.bbc.co.uk/programmes/b06bp7lf). Also may fail with selectionunavailable.
+ 'iptv-all',
+ 'pc',
+ ]
+
+ _EMP_PLAYLIST_NS = 'http://bbc.co.uk/2008/emp/playlist'
+
+ _TESTS = [
+ {
+ 'url': 'http://www.bbc.co.uk/programmes/b039g8p7',
+ 'info_dict': {
+ 'id': 'b039d07m',
+ 'ext': 'flv',
+ 'title': 'Kaleidoscope, Leonard Cohen',
+ 'description': 'The Canadian poet and songwriter reflects on his musical career.',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ }
+ },
+ {
+ 'url': 'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/',
+ 'info_dict': {
+ 'id': 'b00yng1d',
+ 'ext': 'flv',
+ 'title': 'The Man in Black: Series 3: The Printed Name',
+ 'description': "Mark Gatiss introduces Nicholas Pierpan's chilling tale of a writer's devilish pact with a mysterious man. Stars Ewan Bailey.",
+ 'duration': 1800,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'Episode is no longer available on BBC iPlayer Radio',
+ },
+ {
+ 'url': 'http://www.bbc.co.uk/iplayer/episode/b03vhd1f/The_Voice_UK_Series_3_Blind_Auditions_5/',
+ 'info_dict': {
+ 'id': 'b00yng1d',
+ 'ext': 'flv',
+ 'title': 'The Voice UK: Series 3: Blind Auditions 5',
+ 'description': 'Emma Willis and Marvin Humes present the fifth set of blind auditions in the singing competition, as the coaches continue to build their teams based on voice alone.',
+ 'duration': 5100,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
+ },
+ {
+ 'url': 'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion',
+ 'info_dict': {
+ 'id': 'b03k3pb7',
+ 'ext': 'flv',
+ 'title': "Tomorrow's Worlds: The Unearthly History of Science Fiction",
+ 'description': '2. Invasion',
+ 'duration': 3600,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
+ }, {
+ 'url': 'http://www.bbc.co.uk/programmes/b04v20dw',
+ 'info_dict': {
+ 'id': 'b04v209v',
+ 'ext': 'flv',
+ 'title': 'Pete Tong, The Essential New Tune Special',
+ 'description': "Pete has a very special mix - all of 2014's Essential New Tunes!",
+ 'duration': 10800,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'Episode is no longer available on BBC iPlayer Radio',
+ }, {
+ 'url': 'http://www.bbc.co.uk/music/clips/p022h44b',
+ 'note': 'Audio',
+ 'info_dict': {
+ 'id': 'p022h44j',
+ 'ext': 'flv',
+ 'title': 'BBC Proms Music Guides, Rachmaninov: Symphonic Dances',
+ 'description': "In this Proms Music Guide, Andrew McGregor looks at Rachmaninov's Symphonic Dances.",
+ 'duration': 227,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'http://www.bbc.co.uk/music/clips/p025c0zz',
+ 'note': 'Video',
+ 'info_dict': {
+ 'id': 'p025c103',
+ 'ext': 'flv',
+ 'title': 'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)',
+ 'description': 'Rae Morris performs Closer for BBC Three at Reading 2014',
+ 'duration': 226,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'http://www.bbc.co.uk/iplayer/episode/b054fn09/ad/natural-world-20152016-2-super-powered-owls',
+ 'info_dict': {
+ 'id': 'p02n76xf',
+ 'ext': 'flv',
+ 'title': 'Natural World, 2015-2016: 2. Super Powered Owls',
+ 'description': 'md5:e4db5c937d0e95a7c6b5e654d429183d',
+ 'duration': 3540,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'geolocation',
+ }, {
+ 'url': 'http://www.bbc.co.uk/iplayer/episode/b05zmgwn/royal-academy-summer-exhibition',
+ 'info_dict': {
+ 'id': 'b05zmgw1',
+ 'ext': 'flv',
+ 'description': 'Kirsty Wark and Morgan Quaintance visit the Royal Academy as it prepares for its annual artistic extravaganza, meeting people who have come together to make the show unique.',
+ 'title': 'Royal Academy Summer Exhibition',
+ 'duration': 3540,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'geolocation',
+ }, {
+ # iptv-all mediaset fails with geolocation however there is no geo restriction
+ # for this programme at all
+ 'url': 'http://www.bbc.co.uk/programmes/b06rkn85',
+ 'info_dict': {
+ 'id': 'b06rkms3',
+ 'ext': 'flv',
+ 'title': "Best of the Mini-Mixes 2015: Part 3, Annie Mac's Friday Night - BBC Radio 1",
+ 'description': "Annie has part three in the Best of the Mini-Mixes 2015, plus the year's Most Played!",
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'Now it\'s really geo-restricted',
+ }, {
+ # compact player (https://github.com/ytdl-org/youtube-dl/issues/8147)
+ 'url': 'http://www.bbc.co.uk/programmes/p028bfkf/player',
+ 'info_dict': {
+ 'id': 'p028bfkj',
+ 'ext': 'flv',
+ 'title': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
+ 'description': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.bbc.co.uk/music/clips#p02frcc3',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.bbc.co.uk/iplayer/cbeebies/episode/b0480276/bing-14-atchoo',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.bbc.co.uk/radio/player/p03cchwf',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.bbc.co.uk/music/audiovideo/popular#p055bc55',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.bbc.co.uk/programmes/w3csv1y9',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.bbc.co.uk/programmes/m00005xn',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.bbc.co.uk/programmes/w172w4dww1jqt5s',
+ 'only_matching': True,
+ }]
+
+ def _perform_login(self, username, password):
+ login_page = self._download_webpage(
+ self._LOGIN_URL, None, 'Downloading signin page')
+
+ login_form = self._hidden_inputs(login_page)
+
+ login_form.update({
+ 'username': username,
+ 'password': password,
+ })
+
+ post_url = urljoin(self._LOGIN_URL, self._search_regex(
+ r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
+ 'post url', default=self._LOGIN_URL, group='url'))
+
+ response, urlh = self._download_webpage_handle(
+ post_url, None, 'Logging in', data=urlencode_postdata(login_form),
+ headers={'Referer': self._LOGIN_URL})
+
+ if self._LOGIN_URL in urlh.url:
+ error = clean_html(get_element_by_class('form-message', response))
+ if error:
+ raise ExtractorError(
+ 'Unable to login: %s' % error, expected=True)
+ raise ExtractorError('Unable to log in')
+
+ class MediaSelectionError(Exception):
+ def __init__(self, id):
+ self.id = id
+
+ def _extract_asx_playlist(self, connection, programme_id):
+ asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist')
+ return [ref.get('href') for ref in asx.findall('./Entry/ref')]
+
+ def _extract_items(self, playlist):
+ return playlist.findall('./{%s}item' % self._EMP_PLAYLIST_NS)
+
+ def _extract_medias(self, media_selection):
+ error = media_selection.get('result')
+ if error:
+ raise BBCCoUkIE.MediaSelectionError(error)
+ return media_selection.get('media') or []
+
+ def _extract_connections(self, media):
+ return media.get('connection') or []
+
+ def _get_subtitles(self, media, programme_id):
+ subtitles = {}
+ for connection in self._extract_connections(media):
+ cc_url = url_or_none(connection.get('href'))
+ if not cc_url:
+ continue
+ captions = self._download_xml(
+ cc_url, programme_id, 'Downloading captions', fatal=False)
+ if not isinstance(captions, xml.etree.ElementTree.Element):
+ continue
+ subtitles['en'] = [
+ {
+ 'url': connection.get('href'),
+ 'ext': 'ttml',
+ },
+ ]
+ break
+ return subtitles
+
+ def _raise_extractor_error(self, media_selection_error):
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, media_selection_error.id),
+ expected=True)
+
+ def _download_media_selector(self, programme_id):
+ last_exception = None
+ formats, subtitles = [], {}
+ for media_set in self._MEDIA_SETS:
+ try:
+ fmts, subs = self._download_media_selector_url(
+ self._MEDIA_SELECTOR_URL_TEMPL % (media_set, programme_id), programme_id)
+ formats.extend(fmts)
+ if subs:
+ self._merge_subtitles(subs, target=subtitles)
+ except BBCCoUkIE.MediaSelectionError as e:
+ if e.id in ('notukerror', 'geolocation', 'selectionunavailable'):
+ last_exception = e
+ continue
+ self._raise_extractor_error(e)
+ if last_exception:
+ if formats or subtitles:
+ self.report_warning(f'{self.IE_NAME} returned error: {last_exception.id}')
+ else:
+ self._raise_extractor_error(last_exception)
+ return formats, subtitles
+
+ def _download_media_selector_url(self, url, programme_id=None):
+ media_selection = self._download_json(
+ url, programme_id, 'Downloading media selection JSON',
+ expected_status=(403, 404))
+ return self._process_media_selector(media_selection, programme_id)
+
+ def _process_media_selector(self, media_selection, programme_id):
+ formats = []
+ subtitles = None
+ urls = []
+
+ for media in self._extract_medias(media_selection):
+ kind = media.get('kind')
+ if kind in ('video', 'audio'):
+ bitrate = int_or_none(media.get('bitrate'))
+ encoding = media.get('encoding')
+ width = int_or_none(media.get('width'))
+ height = int_or_none(media.get('height'))
+ file_size = int_or_none(media.get('media_file_size'))
+ for connection in self._extract_connections(media):
+ href = connection.get('href')
+ if href in urls:
+ continue
+ if href:
+ urls.append(href)
+ conn_kind = connection.get('kind')
+ protocol = connection.get('protocol')
+ supplier = connection.get('supplier')
+ transfer_format = connection.get('transferFormat')
+ format_id = supplier or conn_kind or protocol
+ # ASX playlist
+ if supplier == 'asx':
+ for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
+ formats.append({
+ 'url': ref,
+ 'format_id': 'ref%s_%s' % (i, format_id),
+ })
+ elif transfer_format == 'dash':
+ formats.extend(self._extract_mpd_formats(
+ href, programme_id, mpd_id=format_id, fatal=False))
+ elif transfer_format == 'hls':
+ # TODO: let expected_status be passed into _extract_xxx_formats() instead
+ try:
+ fmts = self._extract_m3u8_formats(
+ href, programme_id, ext='mp4', entry_protocol='m3u8_native',
+ m3u8_id=format_id, fatal=False)
+ except ExtractorError as e:
+ if not (isinstance(e.exc_info[1], HTTPError)
+ and e.exc_info[1].status in (403, 404)):
+ raise
+ fmts = []
+ formats.extend(fmts)
+ elif transfer_format == 'hds':
+ formats.extend(self._extract_f4m_formats(
+ href, programme_id, f4m_id=format_id, fatal=False))
+ else:
+ if not supplier and bitrate:
+ format_id += '-%d' % bitrate
+ fmt = {
+ 'format_id': format_id,
+ 'filesize': file_size,
+ }
+ if kind == 'video':
+ fmt.update({
+ 'width': width,
+ 'height': height,
+ 'tbr': bitrate,
+ 'vcodec': encoding,
+ })
+ else:
+ fmt.update({
+ 'abr': bitrate,
+ 'acodec': encoding,
+ 'vcodec': 'none',
+ })
+ if protocol in ('http', 'https'):
+ # Direct link
+ fmt.update({
+ 'url': href,
+ })
+ elif protocol == 'rtmp':
+ application = connection.get('application', 'ondemand')
+ auth_string = connection.get('authString')
+ identifier = connection.get('identifier')
+ server = connection.get('server')
+ fmt.update({
+ 'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
+ 'play_path': identifier,
+ 'app': '%s?%s' % (application, auth_string),
+ 'page_url': 'http://www.bbc.co.uk',
+ 'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf',
+ 'rtmp_live': False,
+ 'ext': 'flv',
+ })
+ else:
+ continue
+ formats.append(fmt)
+ elif kind == 'captions':
+ subtitles = self.extract_subtitles(media, programme_id)
+ return formats, subtitles
+
+ def _download_playlist(self, playlist_id):
+ try:
+ playlist = self._download_json(
+ 'http://www.bbc.co.uk/programmes/%s/playlist.json' % playlist_id,
+ playlist_id, 'Downloading playlist JSON')
+ formats = []
+ subtitles = {}
+
+ for version in playlist.get('allAvailableVersions', []):
+ smp_config = version['smpConfig']
+ title = smp_config['title']
+ description = smp_config['summary']
+ for item in smp_config['items']:
+ kind = item['kind']
+ if kind not in ('programme', 'radioProgramme'):
+ continue
+ programme_id = item.get('vpid')
+ duration = int_or_none(item.get('duration'))
+ version_formats, version_subtitles = self._download_media_selector(programme_id)
+ types = version['types']
+ for f in version_formats:
+ f['format_note'] = ', '.join(types)
+ if any('AudioDescribed' in x for x in types):
+ f['language_preference'] = -10
+ formats += version_formats
+ for tag, subformats in (version_subtitles or {}).items():
+ subtitles.setdefault(tag, []).extend(subformats)
+
+ return programme_id, title, description, duration, formats, subtitles
+ except ExtractorError as ee:
+ if not (isinstance(ee.cause, HTTPError) and ee.cause.status == 404):
+ raise
+
+ # fallback to legacy playlist
+ return self._process_legacy_playlist(playlist_id)
+
+ def _process_legacy_playlist_url(self, url, display_id):
+ playlist = self._download_legacy_playlist_url(url, display_id)
+ return self._extract_from_legacy_playlist(playlist, display_id)
+
+ def _process_legacy_playlist(self, playlist_id):
+ return self._process_legacy_playlist_url(
+ 'http://www.bbc.co.uk/iplayer/playlist/%s' % playlist_id, playlist_id)
+
+ def _download_legacy_playlist_url(self, url, playlist_id=None):
+ return self._download_xml(
+ url, playlist_id, 'Downloading legacy playlist XML')
+
+ def _extract_from_legacy_playlist(self, playlist, playlist_id):
+ no_items = playlist.find('./{%s}noItems' % self._EMP_PLAYLIST_NS)
+ if no_items is not None:
+ reason = no_items.get('reason')
+ if reason == 'preAvailability':
+ msg = 'Episode %s is not yet available' % playlist_id
+ elif reason == 'postAvailability':
+ msg = 'Episode %s is no longer available' % playlist_id
+ elif reason == 'noMedia':
+ msg = 'Episode %s is not currently available' % playlist_id
+ else:
+ msg = 'Episode %s is not available: %s' % (playlist_id, reason)
+ raise ExtractorError(msg, expected=True)
+
+ for item in self._extract_items(playlist):
+ kind = item.get('kind')
+ if kind not in ('programme', 'radioProgramme'):
+ continue
+ title = playlist.find('./{%s}title' % self._EMP_PLAYLIST_NS).text
+ description_el = playlist.find('./{%s}summary' % self._EMP_PLAYLIST_NS)
+ description = description_el.text if description_el is not None else None
+
+ def get_programme_id(item):
+ def get_from_attributes(item):
+ for p in ('identifier', 'group'):
+ value = item.get(p)
+ if value and re.match(r'^[pb][\da-z]{7}$', value):
+ return value
+ get_from_attributes(item)
+ mediator = item.find('./{%s}mediator' % self._EMP_PLAYLIST_NS)
+ if mediator is not None:
+ return get_from_attributes(mediator)
+
+ programme_id = get_programme_id(item)
+ duration = int_or_none(item.get('duration'))
+
+ if programme_id:
+ formats, subtitles = self._download_media_selector(programme_id)
+ else:
+ formats, subtitles = self._process_media_selector(item, playlist_id)
+ programme_id = playlist_id
+
+ return programme_id, title, description, duration, formats, subtitles
+
+ def _real_extract(self, url):
+ group_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, group_id, 'Downloading video page')
+
+ error = self._search_regex(
+ r'<div\b[^>]+\bclass=["\'](?:smp|playout)__message delta["\'][^>]*>\s*([^<]+?)\s*<',
+ webpage, 'error', default=None)
+ if error:
+ raise ExtractorError(error, expected=True)
+
+ programme_id = None
+ duration = None
+
+ tviplayer = self._search_regex(
+ r'mediator\.bind\(({.+?})\s*,\s*document\.getElementById',
+ webpage, 'player', default=None)
+
+ if tviplayer:
+ player = self._parse_json(tviplayer, group_id).get('player', {})
+ duration = int_or_none(player.get('duration'))
+ programme_id = player.get('vpid')
+
+ if not programme_id:
+ programme_id = self._search_regex(
+ r'"vpid"\s*:\s*"(%s)"' % self._ID_REGEX, webpage, 'vpid', fatal=False, default=None)
+
+ if programme_id:
+ formats, subtitles = self._download_media_selector(programme_id)
+ title = self._og_search_title(webpage, default=None) or self._html_search_regex(
+ (r'<h2[^>]+id="parent-title"[^>]*>(.+?)</h2>',
+ r'<div[^>]+class="info"[^>]*>\s*<h1>(.+?)</h1>'), webpage, 'title')
+ description = self._search_regex(
+ (r'<p class="[^"]*medium-description[^"]*">([^<]+)</p>',
+ r'<div[^>]+class="info_+synopsis"[^>]*>([^<]+)</div>'),
+ webpage, 'description', default=None)
+ if not description:
+ description = self._html_search_meta('description', webpage)
+ else:
+ programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
+
+ return {
+ 'id': programme_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': self._og_search_thumbnail(webpage, default=None),
+ 'duration': duration,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+
+class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
+ IE_NAME = 'bbc'
+ IE_DESC = 'BBC'
+ _VALID_URL = r'''(?x)
+ https?://(?:www\.)?(?:
+ bbc\.(?:com|co\.uk)|
+ bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd\.onion|
+ bbcweb3hytmzhn5d532owbu6oqadra5z3ar726vq5kgwwn6aucdccrad\.onion
+ )/(?:[^/]+/)+(?P<id>[^/#?]+)'''
+
+ _MEDIA_SETS = [
+ 'pc',
+ 'mobile-tablet-main',
+ ]
+
+ _TESTS = [{
+ # article with multiple videos embedded with data-playable containing vpids
+ 'url': 'http://www.bbc.com/news/world-europe-32668511',
+ 'info_dict': {
+ 'id': 'world-europe-32668511',
+ 'title': 'Russia stages massive WW2 parade',
+ 'description': 'md5:00ff61976f6081841f759a08bf78cc9c',
+ },
+ 'playlist_count': 2,
+ }, {
+ # article with multiple videos embedded with data-playable (more videos)
+ 'url': 'http://www.bbc.com/news/business-28299555',
+ 'info_dict': {
+ 'id': 'business-28299555',
+ 'title': 'Farnborough Airshow: Video highlights',
+ 'description': 'BBC reports and video highlights at the Farnborough Airshow.',
+ },
+ 'playlist_count': 9,
+ 'skip': 'Save time',
+ }, {
+ # article with multiple videos embedded with `new SMP()`
+ # broken
+ 'url': 'http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460',
+ 'info_dict': {
+ 'id': '3662a707-0af9-3149-963f-47bea720b460',
+ 'title': 'BUGGER',
+ },
+ 'playlist_count': 18,
+ }, {
+ # single video embedded with data-playable containing vpid
+ 'url': 'http://www.bbc.com/news/world-europe-32041533',
+ 'info_dict': {
+ 'id': 'p02mprgb',
+ 'ext': 'mp4',
+ 'title': 'Aerial footage showed the site of the crash in the Alps - courtesy BFM TV',
+ 'description': 'md5:2868290467291b37feda7863f7a83f54',
+ 'duration': 47,
+ 'timestamp': 1427219242,
+ 'upload_date': '20150324',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ }
+ }, {
+ # article with single video embedded with data-playable containing XML playlist
+ # with direct video links as progressiveDownloadUrl (for now these are extracted)
+ # and playlist with f4m and m3u8 as streamingUrl
+ 'url': 'http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu',
+ 'info_dict': {
+ 'id': '150615_telabyad_kentin_cogu',
+ 'ext': 'mp4',
+ 'title': "YPG: Tel Abyad'ın tamamı kontrolümüzde",
+ 'description': 'md5:33a4805a855c9baf7115fcbde57e7025',
+ 'timestamp': 1434397334,
+ 'upload_date': '20150615',
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }, {
+ # single video embedded with data-playable containing XML playlists (regional section)
+ 'url': 'http://www.bbc.com/mundo/video_fotos/2015/06/150619_video_honduras_militares_hospitales_corrupcion_aw',
+ 'info_dict': {
+ 'id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
+ 'ext': 'mp4',
+ 'title': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
+ 'description': 'md5:1525f17448c4ee262b64b8f0c9ce66c8',
+ 'timestamp': 1434713142,
+ 'upload_date': '20150619',
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }, {
+ # single video from video playlist embedded with vxp-playlist-data JSON
+ 'url': 'http://www.bbc.com/news/video_and_audio/must_see/33376376',
+ 'info_dict': {
+ 'id': 'p02w6qjc',
+ 'ext': 'mp4',
+ 'title': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''',
+ 'duration': 56,
+ 'description': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''',
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }, {
+ # single video story with digitalData
+ 'url': 'http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret',
+ 'info_dict': {
+ 'id': 'p02q6gc4',
+ 'ext': 'flv',
+ 'title': 'Sri Lanka’s spicy secret',
+ 'description': 'As a new train line to Jaffna opens up the country’s north, travellers can experience a truly distinct slice of Tamil culture.',
+ 'timestamp': 1437674293,
+ 'upload_date': '20150723',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ }
+ }, {
+ # single video story without digitalData
+ 'url': 'http://www.bbc.com/autos/story/20130513-hyundais-rock-star',
+ 'info_dict': {
+ 'id': 'p018zqqg',
+ 'ext': 'mp4',
+ 'title': 'Hyundai Santa Fe Sport: Rock star',
+ 'description': 'md5:b042a26142c4154a6e472933cf20793d',
+ 'timestamp': 1415867444,
+ 'upload_date': '20141113',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ }
+ }, {
+ # single video embedded with Morph
+ 'url': 'http://www.bbc.co.uk/sport/live/olympics/36895975',
+ 'info_dict': {
+ 'id': 'p041vhd0',
+ 'ext': 'mp4',
+ 'title': "Nigeria v Japan - Men's First Round",
+ 'description': 'Live coverage of the first round from Group B at the Amazonia Arena.',
+ 'duration': 7980,
+ 'uploader': 'BBC Sport',
+ 'uploader_id': 'bbc_sport',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'skip': 'Georestricted to UK',
+ }, {
+ # single video with playlist.sxml URL in playlist param
+ 'url': 'http://www.bbc.com/sport/0/football/33653409',
+ 'info_dict': {
+ 'id': 'p02xycnp',
+ 'ext': 'mp4',
+ 'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
+ 'description': 'BBC Sport\'s David Ornstein has the latest transfer gossip, including rumours of a Manchester United return for Cristiano Ronaldo.',
+ 'duration': 140,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ }
+ }, {
+ # article with multiple videos embedded with playlist.sxml in playlist param
+ 'url': 'http://www.bbc.com/sport/0/football/34475836',
+ 'info_dict': {
+ 'id': '34475836',
+ 'title': 'Jurgen Klopp: Furious football from a witty and winning coach',
+ 'description': 'Fast-paced football, wit, wisdom and a ready smile - why Liverpool fans should come to love new boss Jurgen Klopp.',
+ },
+ 'playlist_count': 3,
+ }, {
+ # school report article with single video
+ 'url': 'http://www.bbc.co.uk/schoolreport/35744779',
+ 'info_dict': {
+ 'id': '35744779',
+ 'title': 'School which breaks down barriers in Jerusalem',
+ },
+ 'playlist_count': 1,
+ }, {
+ # single video with playlist URL from weather section
+ 'url': 'http://www.bbc.com/weather/features/33601775',
+ 'only_matching': True,
+ }, {
+ # custom redirection to www.bbc.com
+ # also, video with window.__INITIAL_DATA__
+ 'url': 'http://www.bbc.co.uk/news/science-environment-33661876',
+ 'info_dict': {
+ 'id': 'p02xzws1',
+ 'ext': 'mp4',
+ 'title': "Pluto may have 'nitrogen glaciers'",
+ 'description': 'md5:6a95b593f528d7a5f2605221bc56912f',
+ 'thumbnail': r're:https?://.+/.+\.jpg',
+ 'timestamp': 1437785037,
+ 'upload_date': '20150725',
+ },
+ }, {
+ # video with window.__INITIAL_DATA__ and value as JSON string
+ 'url': 'https://www.bbc.com/news/av/world-europe-59468682',
+ 'info_dict': {
+ 'id': 'p0b71qth',
+ 'ext': 'mp4',
+ 'title': 'Why France is making this woman a national hero',
+ 'description': 'md5:7affdfab80e9c3a1f976230a1ff4d5e4',
+ 'thumbnail': r're:https?://.+/.+\.jpg',
+ 'timestamp': 1638230731,
+ 'upload_date': '20211130',
+ },
+ }, {
+ # single video article embedded with data-media-vpid
+ 'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
+ 'only_matching': True,
+ }, {
+ # bbcthreeConfig
+ 'url': 'https://www.bbc.co.uk/bbcthree/clip/73d0bbd0-abc3-4cea-b3c0-cdae21905eb1',
+ 'info_dict': {
+ 'id': 'p06556y7',
+ 'ext': 'mp4',
+ 'title': 'Things Not To Say to people that live on council estates',
+ 'description': "From being labelled a 'chav', to the presumption that they're 'scroungers', people who live on council estates encounter all kinds of prejudices and false assumptions about themselves, their families, and their lifestyles. Here, eight people discuss the common statements, misconceptions, and clichés that they're tired of hearing.",
+ 'duration': 360,
+ 'thumbnail': r're:https?://.+/.+\.jpg',
+ },
+ }, {
+ # window.__PRELOADED_STATE__
+ 'url': 'https://www.bbc.co.uk/radio/play/b0b9z4yl',
+ 'info_dict': {
+ 'id': 'b0b9z4vz',
+ 'ext': 'mp4',
+ 'title': 'Prom 6: An American in Paris and Turangalila',
+ 'description': 'md5:51cf7d6f5c8553f197e58203bc78dff8',
+ 'uploader': 'Radio 3',
+ 'uploader_id': 'bbc_radio_three',
+ },
+ }, {
+ 'url': 'http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227',
+ 'info_dict': {
+ 'id': 'p06w9tws',
+ 'ext': 'mp4',
+ 'title': 'md5:2fabf12a726603193a2879a055f72514',
+ 'description': 'Learn English words and phrases from this story',
+ },
+ 'add_ie': [BBCCoUkIE.ie_key()],
+ }, {
+ # BBC Reel
+ 'url': 'https://www.bbc.com/reel/video/p07c6sb6/how-positive-thinking-is-harming-your-happiness',
+ 'info_dict': {
+ 'id': 'p07c6sb9',
+ 'ext': 'mp4',
+ 'title': 'How positive thinking is harming your happiness',
+ 'alt_title': 'The downsides of positive thinking',
+ 'description': 'md5:fad74b31da60d83b8265954ee42d85b4',
+ 'duration': 235,
+ 'thumbnail': r're:https?://.+/p07c9dsr.jpg',
+ 'upload_date': '20190604',
+ 'categories': ['Psychology'],
+ },
+ }, {
+ # BBC Sounds
+ 'url': 'https://www.bbc.co.uk/sounds/play/m001q78b',
+ 'info_dict': {
+ 'id': 'm001q789',
+ 'ext': 'mp4',
+ 'title': 'The Night Tracks Mix - Music for the darkling hour',
+ 'thumbnail': 'https://ichef.bbci.co.uk/images/ic/raw/p0c00hym.jpg',
+ 'chapters': 'count:8',
+ 'description': 'md5:815fb51cbdaa270040aab8145b3f1d67',
+ 'uploader': 'Radio 3',
+ 'duration': 1800,
+ 'uploader_id': 'bbc_radio_three',
+ },
+ }, { # onion routes
+ 'url': 'https://www.bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd.onion/news/av/world-europe-63208576',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.bbcweb3hytmzhn5d532owbu6oqadra5z3ar726vq5kgwwn6aucdccrad.onion/sport/av/football/63195681',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ EXCLUDE_IE = (BBCCoUkIE, BBCCoUkArticleIE, BBCCoUkIPlayerEpisodesIE, BBCCoUkIPlayerGroupIE, BBCCoUkPlaylistIE)
+ return (False if any(ie.suitable(url) for ie in EXCLUDE_IE)
+ else super(BBCIE, cls).suitable(url))
+
+ def _extract_from_media_meta(self, media_meta, video_id):
+ # Direct links to media in media metadata (e.g.
+ # http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
+ # TODO: there are also f4m and m3u8 streams incorporated in playlist.sxml
+ source_files = media_meta.get('sourceFiles')
+ if source_files:
+ return [{
+ 'url': f['url'],
+ 'format_id': format_id,
+ 'ext': f.get('encoding'),
+ 'tbr': float_or_none(f.get('bitrate'), 1000),
+ 'filesize': int_or_none(f.get('filesize')),
+ } for format_id, f in source_files.items() if f.get('url')], []
+
+ programme_id = media_meta.get('externalId')
+ if programme_id:
+ return self._download_media_selector(programme_id)
+
+ # Process playlist.sxml as legacy playlist
+ href = media_meta.get('href')
+ if href:
+ playlist = self._download_legacy_playlist_url(href)
+ _, _, _, _, formats, subtitles = self._extract_from_legacy_playlist(playlist, video_id)
+ return formats, subtitles
+
+ return [], []
+
+ def _extract_from_playlist_sxml(self, url, playlist_id, timestamp):
+ programme_id, title, description, duration, formats, subtitles = \
+ self._process_legacy_playlist_url(url, playlist_id)
+ return {
+ 'id': programme_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ json_ld_info = self._search_json_ld(webpage, playlist_id, default={})
+ timestamp = json_ld_info.get('timestamp')
+
+ playlist_title = json_ld_info.get('title') or re.sub(
+ r'(.+)\s*-\s*BBC.*?$', r'\1', self._generic_title('', webpage, default='')).strip() or None
+
+ playlist_description = json_ld_info.get(
+ 'description') or self._og_search_description(webpage, default=None)
+
+ if not timestamp:
+ timestamp = parse_iso8601(self._search_regex(
+ [r'<meta[^>]+property="article:published_time"[^>]+content="([^"]+)"',
+ r'itemprop="datePublished"[^>]+datetime="([^"]+)"',
+ r'"datePublished":\s*"([^"]+)'],
+ webpage, 'date', default=None))
+
+ entries = []
+
+ # article with multiple videos embedded with playlist.sxml (e.g.
+ # http://www.bbc.com/sport/0/football/34475836)
+ playlists = re.findall(r'<param[^>]+name="playlist"[^>]+value="([^"]+)"', webpage)
+ playlists.extend(re.findall(r'data-media-id="([^"]+/playlist\.sxml)"', webpage))
+ if playlists:
+ entries = [
+ self._extract_from_playlist_sxml(playlist_url, playlist_id, timestamp)
+ for playlist_url in playlists]
+
+ # news article with multiple videos embedded with data-playable
+ data_playables = re.findall(r'data-playable=(["\'])({.+?})\1', webpage)
+ if data_playables:
+ for _, data_playable_json in data_playables:
+ data_playable = self._parse_json(
+ unescapeHTML(data_playable_json), playlist_id, fatal=False)
+ if not data_playable:
+ continue
+ settings = data_playable.get('settings', {})
+ if settings:
+ # data-playable with video vpid in settings.playlistObject.items (e.g.
+ # http://www.bbc.com/news/world-us-canada-34473351)
+ playlist_object = settings.get('playlistObject', {})
+ if playlist_object:
+ items = playlist_object.get('items')
+ if items and isinstance(items, list):
+ title = playlist_object['title']
+ description = playlist_object.get('summary')
+ duration = int_or_none(items[0].get('duration'))
+ programme_id = items[0].get('vpid')
+ formats, subtitles = self._download_media_selector(programme_id)
+ entries.append({
+ 'id': programme_id,
+ 'title': title,
+ 'description': description,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ })
+ else:
+ # data-playable without vpid but with a playlist.sxml URLs
+ # in otherSettings.playlist (e.g.
+ # http://www.bbc.com/turkce/multimedya/2015/10/151010_vid_ankara_patlama_ani)
+ playlist = data_playable.get('otherSettings', {}).get('playlist', {})
+ if playlist:
+ entry = None
+ for key in ('streaming', 'progressiveDownload'):
+ playlist_url = playlist.get('%sUrl' % key)
+ if not playlist_url:
+ continue
+ try:
+ info = self._extract_from_playlist_sxml(
+ playlist_url, playlist_id, timestamp)
+ if not entry:
+ entry = info
+ else:
+ entry['title'] = info['title']
+ entry['formats'].extend(info['formats'])
+ except ExtractorError as e:
+ # Some playlist URL may fail with 500, at the same time
+ # the other one may work fine (e.g.
+ # http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
+ if isinstance(e.cause, HTTPError) and e.cause.status == 500:
+ continue
+ raise
+ if entry:
+ entries.append(entry)
+
+ if entries:
+ return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
+
+ # http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227
+ group_id = self._search_regex(
+ r'<div[^>]+\bclass=["\']video["\'][^>]+\bdata-pid=["\'](%s)' % self._ID_REGEX,
+ webpage, 'group id', default=None)
+ if group_id:
+ return self.url_result(
+ 'https://www.bbc.co.uk/programmes/%s' % group_id,
+ ie=BBCCoUkIE.ie_key())
+
+ # single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
+ programme_id = self._search_regex(
+ [r'data-(?:video-player|media)-vpid="(%s)"' % self._ID_REGEX,
+ r'<param[^>]+name="externalIdentifier"[^>]+value="(%s)"' % self._ID_REGEX,
+ r'videoId\s*:\s*["\'](%s)["\']' % self._ID_REGEX],
+ webpage, 'vpid', default=None)
+
+ if programme_id:
+ formats, subtitles = self._download_media_selector(programme_id)
+ # digitalData may be missing (e.g. http://www.bbc.com/autos/story/20130513-hyundais-rock-star)
+ digital_data = self._parse_json(
+ self._search_regex(
+ r'var\s+digitalData\s*=\s*({.+?});?\n', webpage, 'digital data', default='{}'),
+ programme_id, fatal=False)
+ page_info = digital_data.get('page', {}).get('pageInfo', {})
+ title = page_info.get('pageName') or self._og_search_title(webpage)
+ description = page_info.get('description') or self._og_search_description(webpage)
+ timestamp = parse_iso8601(page_info.get('publicationDate')) or timestamp
+ return {
+ 'id': programme_id,
+ 'title': title,
+ 'description': description,
+ 'timestamp': timestamp,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+ # bbc reel (e.g. https://www.bbc.com/reel/video/p07c6sb6/how-positive-thinking-is-harming-your-happiness)
+ initial_data = self._parse_json(self._html_search_regex(
+ r'<script[^>]+id=(["\'])initial-data\1[^>]+data-json=(["\'])(?P<json>(?:(?!\2).)+)',
+ webpage, 'initial data', default='{}', group='json'), playlist_id, fatal=False)
+ if initial_data:
+ init_data = try_get(
+ initial_data, lambda x: x['initData']['items'][0], dict) or {}
+ smp_data = init_data.get('smpData') or {}
+ clip_data = try_get(smp_data, lambda x: x['items'][0], dict) or {}
+ version_id = clip_data.get('versionID')
+ if version_id:
+ title = smp_data['title']
+ formats, subtitles = self._download_media_selector(version_id)
+ image_url = smp_data.get('holdingImageURL')
+ display_date = init_data.get('displayDate')
+ topic_title = init_data.get('topicTitle')
+
+ return {
+ 'id': version_id,
+ 'title': title,
+ 'formats': formats,
+ 'alt_title': init_data.get('shortTitle'),
+ 'thumbnail': image_url.replace('$recipe', 'raw') if image_url else None,
+ 'description': smp_data.get('summary') or init_data.get('shortSummary'),
+ 'upload_date': display_date.replace('-', '') if display_date else None,
+ 'subtitles': subtitles,
+ 'duration': int_or_none(clip_data.get('duration')),
+ 'categories': [topic_title] if topic_title else None,
+ }
+
+ # Morph based embed (e.g. http://www.bbc.co.uk/sport/live/olympics/36895975)
+ # There are several setPayload calls may be present but the video
+ # seems to be always related to the first one
+ morph_payload = self._parse_json(
+ self._search_regex(
+ r'Morph\.setPayload\([^,]+,\s*({.+?})\);',
+ webpage, 'morph payload', default='{}'),
+ playlist_id, fatal=False)
+ if morph_payload:
+ components = try_get(morph_payload, lambda x: x['body']['components'], list) or []
+ for component in components:
+ if not isinstance(component, dict):
+ continue
+ lead_media = try_get(component, lambda x: x['props']['leadMedia'], dict)
+ if not lead_media:
+ continue
+ identifiers = lead_media.get('identifiers')
+ if not identifiers or not isinstance(identifiers, dict):
+ continue
+ programme_id = identifiers.get('vpid') or identifiers.get('playablePid')
+ if not programme_id:
+ continue
+ title = lead_media.get('title') or self._og_search_title(webpage)
+ formats, subtitles = self._download_media_selector(programme_id)
+ description = lead_media.get('summary')
+ uploader = lead_media.get('masterBrand')
+ uploader_id = lead_media.get('mid')
+ duration = None
+ duration_d = lead_media.get('duration')
+ if isinstance(duration_d, dict):
+ duration = parse_duration(dict_get(
+ duration_d, ('rawDuration', 'formattedDuration', 'spokenDuration')))
+ return {
+ 'id': programme_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+ preload_state = self._parse_json(self._search_regex(
+ r'window\.__PRELOADED_STATE__\s*=\s*({.+?});', webpage,
+ 'preload state', default='{}'), playlist_id, fatal=False)
+ if preload_state:
+ current_programme = preload_state.get('programmes', {}).get('current') or {}
+ programme_id = current_programme.get('id')
+ if current_programme and programme_id and current_programme.get('type') == 'playable_item':
+ title = current_programme.get('titles', {}).get('tertiary') or playlist_title
+ formats, subtitles = self._download_media_selector(programme_id)
+ synopses = current_programme.get('synopses') or {}
+ network = current_programme.get('network') or {}
+ duration = int_or_none(
+ current_programme.get('duration', {}).get('value'))
+ thumbnail = None
+ image_url = current_programme.get('image_url')
+ if image_url:
+ thumbnail = image_url.replace('{recipe}', 'raw')
+ return {
+ 'id': programme_id,
+ 'title': title,
+ 'description': dict_get(synopses, ('long', 'medium', 'short')),
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'uploader': network.get('short_title'),
+ 'uploader_id': network.get('id'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'chapters': traverse_obj(preload_state, (
+ 'tracklist', 'tracks', lambda _, v: float_or_none(v['offset']['start']), {
+ 'title': ('titles', {lambda x: join_nonempty(
+ 'primary', 'secondary', 'tertiary', delim=' - ', from_dict=x)}),
+ 'start_time': ('offset', 'start', {float_or_none}),
+ 'end_time': ('offset', 'end', {float_or_none}),
+ })) or None,
+ }
+
+ bbc3_config = self._parse_json(
+ self._search_regex(
+ r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage,
+ 'bbcthree config', default='{}'),
+ playlist_id, transform_source=js_to_json, fatal=False) or {}
+ payload = bbc3_config.get('payload') or {}
+ if payload:
+ clip = payload.get('currentClip') or {}
+ clip_vpid = clip.get('vpid')
+ clip_title = clip.get('title')
+ if clip_vpid and clip_title:
+ formats, subtitles = self._download_media_selector(clip_vpid)
+ return {
+ 'id': clip_vpid,
+ 'title': clip_title,
+ 'thumbnail': dict_get(clip, ('poster', 'imageUrl')),
+ 'description': clip.get('description'),
+ 'duration': parse_duration(clip.get('duration')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+ bbc3_playlist = try_get(
+ payload, lambda x: x['content']['bbcMedia']['playlist'],
+ dict)
+ if bbc3_playlist:
+ playlist_title = bbc3_playlist.get('title') or playlist_title
+ thumbnail = bbc3_playlist.get('holdingImageURL')
+ entries = []
+ for bbc3_item in bbc3_playlist['items']:
+ programme_id = bbc3_item.get('versionID')
+ if not programme_id:
+ continue
+ formats, subtitles = self._download_media_selector(programme_id)
+ entries.append({
+ 'id': programme_id,
+ 'title': playlist_title,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ })
+ return self.playlist_result(
+ entries, playlist_id, playlist_title, playlist_description)
+
+ initial_data = self._search_regex(
+ r'window\.__INITIAL_DATA__\s*=\s*("{.+?}")\s*;', webpage,
+ 'quoted preload state', default=None)
+ if initial_data is None:
+ initial_data = self._search_regex(
+ r'window\.__INITIAL_DATA__\s*=\s*({.+?})\s*;', webpage,
+ 'preload state', default='{}')
+ else:
+ initial_data = self._parse_json(initial_data or '"{}"', playlist_id, fatal=False)
+ initial_data = self._parse_json(initial_data, playlist_id, fatal=False)
+ if initial_data:
+ def parse_media(media):
+ if not media:
+ return
+ for item in (try_get(media, lambda x: x['media']['items'], list) or []):
+ item_id = item.get('id')
+ item_title = item.get('title')
+ if not (item_id and item_title):
+ continue
+ formats, subtitles = self._download_media_selector(item_id)
+ item_desc = None
+ blocks = try_get(media, lambda x: x['summary']['blocks'], list)
+ if blocks:
+ summary = []
+ for block in blocks:
+ text = try_get(block, lambda x: x['model']['text'], compat_str)
+ if text:
+ summary.append(text)
+ if summary:
+ item_desc = '\n\n'.join(summary)
+ item_time = None
+ for meta in try_get(media, lambda x: x['metadata']['items'], list) or []:
+ if try_get(meta, lambda x: x['label']) == 'Published':
+ item_time = unified_timestamp(meta.get('timestamp'))
+ break
+ entries.append({
+ 'id': item_id,
+ 'title': item_title,
+ 'thumbnail': item.get('holdingImageUrl'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'timestamp': item_time,
+ 'description': strip_or_none(item_desc),
+ })
+ for resp in (initial_data.get('data') or {}).values():
+ name = resp.get('name')
+ if name == 'media-experience':
+ parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
+ elif name == 'article':
+ for block in (try_get(resp,
+ (lambda x: x['data']['blocks'],
+ lambda x: x['data']['content']['model']['blocks'],),
+ list) or []):
+ if block.get('type') not in ['media', 'video']:
+ continue
+ parse_media(block.get('model'))
+ return self.playlist_result(
+ entries, playlist_id, playlist_title, playlist_description)
+
+ def extract_all(pattern):
+ return list(filter(None, map(
+ lambda s: self._parse_json(s, playlist_id, fatal=False),
+ re.findall(pattern, webpage))))
+
+ # Multiple video article (e.g.
+ # http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460)
+ EMBED_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+%s(?:\b[^"]+)?' % self._ID_REGEX
+ entries = []
+ for match in extract_all(r'new\s+SMP\(({.+?})\)'):
+ embed_url = match.get('playerSettings', {}).get('externalEmbedUrl')
+ if embed_url and re.match(EMBED_URL, embed_url):
+ entries.append(embed_url)
+ entries.extend(re.findall(
+ r'setPlaylist\("(%s)"\)' % EMBED_URL, webpage))
+ if entries:
+ return self.playlist_result(
+ [self.url_result(entry_, 'BBCCoUk') for entry_ in entries],
+ playlist_id, playlist_title, playlist_description)
+
+ # Multiple video article (e.g. http://www.bbc.com/news/world-europe-32668511)
+ medias = extract_all(r"data-media-meta='({[^']+})'")
+
+ if not medias:
+ # Single video article (e.g. http://www.bbc.com/news/video_and_audio/international)
+ media_asset = self._search_regex(
+ r'mediaAssetPage\.init\(\s*({.+?}), "/',
+ webpage, 'media asset', default=None)
+ if media_asset:
+ media_asset_page = self._parse_json(media_asset, playlist_id, fatal=False)
+ medias = []
+ for video in media_asset_page.get('videos', {}).values():
+ medias.extend(video.values())
+
+ if not medias:
+ # Multiple video playlist with single `now playing` entry (e.g.
+ # http://www.bbc.com/news/video_and_audio/must_see/33767813)
+ vxp_playlist = self._parse_json(
+ self._search_regex(
+ r'<script[^>]+class="vxp-playlist-data"[^>]+type="application/json"[^>]*>([^<]+)</script>',
+ webpage, 'playlist data'),
+ playlist_id)
+ playlist_medias = []
+ for item in vxp_playlist:
+ media = item.get('media')
+ if not media:
+ continue
+ playlist_medias.append(media)
+ # Download single video if found media with asset id matching the video id from URL
+ if item.get('advert', {}).get('assetId') == playlist_id:
+ medias = [media]
+ break
+ # Fallback to the whole playlist
+ if not medias:
+ medias = playlist_medias
+
+ entries = []
+ for num, media_meta in enumerate(medias, start=1):
+ formats, subtitles = self._extract_from_media_meta(media_meta, playlist_id)
+ if not formats and not self.get_param('ignore_no_formats'):
+ continue
+
+ video_id = media_meta.get('externalId')
+ if not video_id:
+ video_id = playlist_id if len(medias) == 1 else '%s-%s' % (playlist_id, num)
+
+ title = media_meta.get('caption')
+ if not title:
+ title = playlist_title if len(medias) == 1 else '%s - Video %s' % (playlist_title, num)
+
+ duration = int_or_none(media_meta.get('durationInSeconds')) or parse_duration(media_meta.get('duration'))
+
+ images = []
+ for image in media_meta.get('images', {}).values():
+ images.extend(image.values())
+ if 'image' in media_meta:
+ images.append(media_meta['image'])
+
+ thumbnails = [{
+ 'url': image.get('href'),
+ 'width': int_or_none(image.get('width')),
+ 'height': int_or_none(image.get('height')),
+ } for image in images]
+
+ entries.append({
+ 'id': video_id,
+ 'title': title,
+ 'thumbnails': thumbnails,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ })
+
+ return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
+
+
+class BBCCoUkArticleIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/articles/(?P<id>[a-zA-Z0-9]+)'
+ IE_NAME = 'bbc.co.uk:article'
+ IE_DESC = 'BBC articles'
+
+ _TEST = {
+ 'url': 'http://www.bbc.co.uk/programmes/articles/3jNQLTMrPlYGTBn0WV6M2MS/not-your-typical-role-model-ada-lovelace-the-19th-century-programmer',
+ 'info_dict': {
+ 'id': '3jNQLTMrPlYGTBn0WV6M2MS',
+ 'title': 'Calculating Ada: The Countess of Computing - Not your typical role model: Ada Lovelace the 19th century programmer - BBC Four',
+ 'description': 'Hannah Fry reveals some of her surprising discoveries about Ada Lovelace during filming.',
+ },
+ 'playlist_count': 4,
+ 'add_ie': ['BBCCoUk'],
+ }
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ title = self._og_search_title(webpage)
+ description = self._og_search_description(webpage).strip()
+
+ entries = [self.url_result(programme_url) for programme_url in re.findall(
+ r'<div[^>]+typeof="Clip"[^>]+resource="([^"]+)"', webpage)]
+
+ return self.playlist_result(entries, playlist_id, title, description)
+
+
+class BBCCoUkPlaylistBaseIE(InfoExtractor):
+ def _entries(self, webpage, url, playlist_id):
+ single_page = 'page' in compat_urlparse.parse_qs(
+ compat_urlparse.urlparse(url).query)
+ for page_num in itertools.count(2):
+ for video_id in re.findall(
+ self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage):
+ yield self.url_result(
+ self._URL_TEMPLATE % video_id, BBCCoUkIE.ie_key())
+ if single_page:
+ return
+ next_page = self._search_regex(
+ r'<li[^>]+class=(["\'])pagination_+next\1[^>]*><a[^>]+href=(["\'])(?P<url>(?:(?!\2).)+)\2',
+ webpage, 'next page url', default=None, group='url')
+ if not next_page:
+ break
+ webpage = self._download_webpage(
+ compat_urlparse.urljoin(url, next_page), playlist_id,
+ 'Downloading page %d' % page_num, page_num)
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ title, description = self._extract_title_and_description(webpage)
+
+ return self.playlist_result(
+ self._entries(webpage, url, playlist_id),
+ playlist_id, title, description)
+
+
+class BBCCoUkIPlayerPlaylistBaseIE(InfoExtractor):
+ _VALID_URL_TMPL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/%%s/(?P<id>%s)' % BBCCoUkIE._ID_REGEX
+
+ @staticmethod
+ def _get_default(episode, key, default_key='default'):
+ return try_get(episode, lambda x: x[key][default_key])
+
+ def _get_description(self, data):
+ synopsis = data.get(self._DESCRIPTION_KEY) or {}
+ return dict_get(synopsis, ('large', 'medium', 'small'))
+
+ def _fetch_page(self, programme_id, per_page, series_id, page):
+ elements = self._get_elements(self._call_api(
+ programme_id, per_page, page + 1, series_id))
+ for element in elements:
+ episode = self._get_episode(element)
+ episode_id = episode.get('id')
+ if not episode_id:
+ continue
+ thumbnail = None
+ image = self._get_episode_image(episode)
+ if image:
+ thumbnail = image.replace('{recipe}', 'raw')
+ category = self._get_default(episode, 'labels', 'category')
+ yield {
+ '_type': 'url',
+ 'id': episode_id,
+ 'title': self._get_episode_field(episode, 'subtitle'),
+ 'url': 'https://www.bbc.co.uk/iplayer/episode/' + episode_id,
+ 'thumbnail': thumbnail,
+ 'description': self._get_description(episode),
+ 'categories': [category] if category else None,
+ 'series': self._get_episode_field(episode, 'title'),
+ 'ie_key': BBCCoUkIE.ie_key(),
+ }
+
+ def _real_extract(self, url):
+ pid = self._match_id(url)
+ qs = parse_qs(url)
+ series_id = qs.get('seriesId', [None])[0]
+ page = qs.get('page', [None])[0]
+ per_page = 36 if page else self._PAGE_SIZE
+ fetch_page = functools.partial(self._fetch_page, pid, per_page, series_id)
+ entries = fetch_page(int(page) - 1) if page else OnDemandPagedList(fetch_page, self._PAGE_SIZE)
+ playlist_data = self._get_playlist_data(self._call_api(pid, 1))
+ return self.playlist_result(
+ entries, pid, self._get_playlist_title(playlist_data),
+ self._get_description(playlist_data))
+
+
+class BBCCoUkIPlayerEpisodesIE(BBCCoUkIPlayerPlaylistBaseIE):
+ IE_NAME = 'bbc.co.uk:iplayer:episodes'
+ _VALID_URL = BBCCoUkIPlayerPlaylistBaseIE._VALID_URL_TMPL % 'episodes'
+ _TESTS = [{
+ 'url': 'http://www.bbc.co.uk/iplayer/episodes/b05rcz9v',
+ 'info_dict': {
+ 'id': 'b05rcz9v',
+ 'title': 'The Disappearance',
+ 'description': 'md5:58eb101aee3116bad4da05f91179c0cb',
+ },
+ 'playlist_mincount': 8,
+ }, {
+ # all seasons
+ 'url': 'https://www.bbc.co.uk/iplayer/episodes/b094m5t9/doctor-foster',
+ 'info_dict': {
+ 'id': 'b094m5t9',
+ 'title': 'Doctor Foster',
+ 'description': 'md5:5aa9195fad900e8e14b52acd765a9fd6',
+ },
+ 'playlist_mincount': 10,
+ }, {
+ # explicit season
+ 'url': 'https://www.bbc.co.uk/iplayer/episodes/b094m5t9/doctor-foster?seriesId=b094m6nv',
+ 'info_dict': {
+ 'id': 'b094m5t9',
+ 'title': 'Doctor Foster',
+ 'description': 'md5:5aa9195fad900e8e14b52acd765a9fd6',
+ },
+ 'playlist_mincount': 5,
+ }, {
+ # all pages
+ 'url': 'https://www.bbc.co.uk/iplayer/episodes/m0004c4v/beechgrove',
+ 'info_dict': {
+ 'id': 'm0004c4v',
+ 'title': 'Beechgrove',
+ 'description': 'Gardening show that celebrates Scottish horticulture and growing conditions.',
+ },
+ 'playlist_mincount': 37,
+ }, {
+ # explicit page
+ 'url': 'https://www.bbc.co.uk/iplayer/episodes/m0004c4v/beechgrove?page=2',
+ 'info_dict': {
+ 'id': 'm0004c4v',
+ 'title': 'Beechgrove',
+ 'description': 'Gardening show that celebrates Scottish horticulture and growing conditions.',
+ },
+ 'playlist_mincount': 1,
+ }]
+ _PAGE_SIZE = 100
+ _DESCRIPTION_KEY = 'synopsis'
+
+ def _get_episode_image(self, episode):
+ return self._get_default(episode, 'image')
+
+ def _get_episode_field(self, episode, field):
+ return self._get_default(episode, field)
+
+ @staticmethod
+ def _get_elements(data):
+ return data['entities']['results']
+
+ @staticmethod
+ def _get_episode(element):
+ return element.get('episode') or {}
+
+ def _call_api(self, pid, per_page, page=1, series_id=None):
+ variables = {
+ 'id': pid,
+ 'page': page,
+ 'perPage': per_page,
+ }
+ if series_id:
+ variables['sliceId'] = series_id
+ return self._download_json(
+ 'https://graph.ibl.api.bbc.co.uk/', pid, headers={
+ 'Content-Type': 'application/json'
+ }, data=json.dumps({
+ 'id': '5692d93d5aac8d796a0305e895e61551',
+ 'variables': variables,
+ }).encode('utf-8'))['data']['programme']
+
+ @staticmethod
+ def _get_playlist_data(data):
+ return data
+
+ def _get_playlist_title(self, data):
+ return self._get_default(data, 'title')
+
+
+class BBCCoUkIPlayerGroupIE(BBCCoUkIPlayerPlaylistBaseIE):
+ IE_NAME = 'bbc.co.uk:iplayer:group'
+ _VALID_URL = BBCCoUkIPlayerPlaylistBaseIE._VALID_URL_TMPL % 'group'
+ _TESTS = [{
+ # Available for over a year unlike 30 days for most other programmes
+ 'url': 'http://www.bbc.co.uk/iplayer/group/p02tcc32',
+ 'info_dict': {
+ 'id': 'p02tcc32',
+ 'title': 'Bohemian Icons',
+ 'description': 'md5:683e901041b2fe9ba596f2ab04c4dbe7',
+ },
+ 'playlist_mincount': 10,
+ }, {
+ # all pages
+ 'url': 'https://www.bbc.co.uk/iplayer/group/p081d7j7',
+ 'info_dict': {
+ 'id': 'p081d7j7',
+ 'title': 'Music in Scotland',
+ 'description': 'Perfomances in Scotland and programmes featuring Scottish acts.',
+ },
+ 'playlist_mincount': 47,
+ }, {
+ # explicit page
+ 'url': 'https://www.bbc.co.uk/iplayer/group/p081d7j7?page=2',
+ 'info_dict': {
+ 'id': 'p081d7j7',
+ 'title': 'Music in Scotland',
+ 'description': 'Perfomances in Scotland and programmes featuring Scottish acts.',
+ },
+ 'playlist_mincount': 11,
+ }]
+ _PAGE_SIZE = 200
+ _DESCRIPTION_KEY = 'synopses'
+
+ def _get_episode_image(self, episode):
+ return self._get_default(episode, 'images', 'standard')
+
+ def _get_episode_field(self, episode, field):
+ return episode.get(field)
+
+ @staticmethod
+ def _get_elements(data):
+ return data['elements']
+
+ @staticmethod
+ def _get_episode(element):
+ return element
+
+ def _call_api(self, pid, per_page, page=1, series_id=None):
+ return self._download_json(
+ 'http://ibl.api.bbc.co.uk/ibl/v1/groups/%s/episodes' % pid,
+ pid, query={
+ 'page': page,
+ 'per_page': per_page,
+ })['group_episodes']
+
+ @staticmethod
+ def _get_playlist_data(data):
+ return data['group']
+
+ def _get_playlist_title(self, data):
+ return data.get('title')
+
+
+class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE):
+ IE_NAME = 'bbc.co.uk:playlist'
+ _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/(?P<id>%s)/(?:episodes|broadcasts|clips)' % BBCCoUkIE._ID_REGEX
+ _URL_TEMPLATE = 'http://www.bbc.co.uk/programmes/%s'
+ _VIDEO_ID_TEMPLATE = r'data-pid=["\'](%s)'
+ _TESTS = [{
+ 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips',
+ 'info_dict': {
+ 'id': 'b05rcz9v',
+ 'title': 'The Disappearance - Clips - BBC Four',
+ 'description': 'French thriller serial about a missing teenager.',
+ },
+ 'playlist_mincount': 7,
+ }, {
+ # multipage playlist, explicit page
+ 'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips?page=1',
+ 'info_dict': {
+ 'id': 'b00mfl7n',
+ 'title': 'Frozen Planet - Clips - BBC One',
+ 'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c',
+ },
+ 'playlist_mincount': 24,
+ }, {
+ # multipage playlist, all pages
+ 'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips',
+ 'info_dict': {
+ 'id': 'b00mfl7n',
+ 'title': 'Frozen Planet - Clips - BBC One',
+ 'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c',
+ },
+ 'playlist_mincount': 142,
+ }, {
+ 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/broadcasts/2016/06',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.bbc.co.uk/programmes/b055jkys/episodes/player',
+ 'only_matching': True,
+ }]
+
+ def _extract_title_and_description(self, webpage):
+ title = self._og_search_title(webpage, fatal=False)
+ description = self._og_search_description(webpage)
+ return title, description
diff --git a/yt_dlp/extractor/beatbump.py b/yt_dlp/extractor/beatbump.py
new file mode 100644
index 0000000..777a1b3
--- /dev/null
+++ b/yt_dlp/extractor/beatbump.py
@@ -0,0 +1,111 @@
+from .common import InfoExtractor
+from .youtube import YoutubeIE, YoutubeTabIE
+
+
+class BeatBumpVideoIE(InfoExtractor):
+ _VALID_URL = r'https?://beatbump\.(?:ml|io)/listen\?id=(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://beatbump.ml/listen?id=MgNrAu2pzNs',
+ 'md5': '5ff3fff41d3935b9810a9731e485fe66',
+ 'info_dict': {
+ 'id': 'MgNrAu2pzNs',
+ 'ext': 'mp4',
+ 'artist': 'Stephen',
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
+ 'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
+ 'upload_date': '20190312',
+ 'categories': ['Music'],
+ 'playable_in_embed': True,
+ 'duration': 169,
+ 'like_count': int,
+ 'alt_title': 'Voyeur Girl',
+ 'view_count': int,
+ 'track': 'Voyeur Girl',
+ 'uploader': 'Stephen',
+ 'title': 'Voyeur Girl',
+ 'channel_follower_count': int,
+ 'age_limit': 0,
+ 'availability': 'public',
+ 'live_status': 'not_live',
+ 'album': 'it\'s too much love to know my dear',
+ 'channel': 'Stephen',
+ 'comment_count': int,
+ 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
+ 'tags': 'count:11',
+ 'creator': 'Stephen',
+ 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
+ 'channel_is_verified': True,
+ 'heatmap': 'count:100',
+ },
+ }, {
+ 'url': 'https://beatbump.io/listen?id=LDGZAprNGWo',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ id_ = self._match_id(url)
+ return self.url_result(f'https://music.youtube.com/watch?v={id_}', YoutubeIE, id_)
+
+
+class BeatBumpPlaylistIE(InfoExtractor):
+ _VALID_URL = r'https?://beatbump\.(?:ml|io)/(?:release\?id=|artist/|playlist/)(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://beatbump.ml/release?id=MPREb_gTAcphH99wE',
+ 'playlist_count': 50,
+ 'info_dict': {
+ 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
+ 'availability': 'unlisted',
+ 'view_count': int,
+ 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
+ 'description': '',
+ 'tags': [],
+ 'modified_date': '20231110',
+ },
+ 'expected_warnings': ['YouTube Music is not directly supported'],
+ }, {
+ 'url': 'https://beatbump.ml/artist/UC_aEa8K-EOJ3D6gOs7HcyNg',
+ 'playlist_mincount': 1,
+ 'params': {'flatplaylist': True},
+ 'info_dict': {
+ 'id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
+ 'uploader_url': 'https://www.youtube.com/@NoCopyrightSounds',
+ 'channel_url': 'https://www.youtube.com/channel/UC_aEa8K-EOJ3D6gOs7HcyNg',
+ 'uploader_id': '@NoCopyrightSounds',
+ 'channel_follower_count': int,
+ 'title': 'NoCopyrightSounds',
+ 'uploader': 'NoCopyrightSounds',
+ 'description': 'md5:cd4fd53d81d363d05eee6c1b478b491a',
+ 'channel': 'NoCopyrightSounds',
+ 'tags': 'count:65',
+ 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
+ 'channel_is_verified': True,
+ },
+ 'expected_warnings': ['YouTube Music is not directly supported'],
+ }, {
+ 'url': 'https://beatbump.ml/playlist/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
+ 'playlist_mincount': 1,
+ 'params': {'flatplaylist': True},
+ 'info_dict': {
+ 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
+ 'uploader_url': 'https://www.youtube.com/@NoCopyrightSounds',
+ 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
+ 'view_count': int,
+ 'channel_url': 'https://www.youtube.com/channel/UC_aEa8K-EOJ3D6gOs7HcyNg',
+ 'uploader_id': '@NoCopyrightSounds',
+ 'title': 'NCS : All Releases 💿',
+ 'uploader': 'NoCopyrightSounds',
+ 'availability': 'public',
+ 'channel': 'NoCopyrightSounds',
+ 'tags': [],
+ 'modified_date': '20231112',
+ 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
+ },
+ 'expected_warnings': ['YouTube Music is not directly supported'],
+ }, {
+ 'url': 'https://beatbump.io/playlist/VLPLFCHGavqRG-q_2ZhmgU2XB2--ZY6irT1c',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ id_ = self._match_id(url)
+ return self.url_result(f'https://music.youtube.com/browse/{id_}', YoutubeTabIE, id_)
diff --git a/yt_dlp/extractor/beatport.py b/yt_dlp/extractor/beatport.py
new file mode 100644
index 0000000..0aecbd0
--- /dev/null
+++ b/yt_dlp/extractor/beatport.py
@@ -0,0 +1,97 @@
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import int_or_none
+
+
+class BeatportIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.|pro\.)?beatport\.com/track/(?P<display_id>[^/]+)/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://beatport.com/track/synesthesia-original-mix/5379371',
+ 'md5': 'b3c34d8639a2f6a7f734382358478887',
+ 'info_dict': {
+ 'id': '5379371',
+ 'display_id': 'synesthesia-original-mix',
+ 'ext': 'mp4',
+ 'title': 'Froxic - Synesthesia (Original Mix)',
+ },
+ }, {
+ 'url': 'https://beatport.com/track/love-and-war-original-mix/3756896',
+ 'md5': 'e44c3025dfa38c6577fbaeb43da43514',
+ 'info_dict': {
+ 'id': '3756896',
+ 'display_id': 'love-and-war-original-mix',
+ 'ext': 'mp3',
+ 'title': 'Wolfgang Gartner - Love & War (Original Mix)',
+ },
+ }, {
+ 'url': 'https://beatport.com/track/birds-original-mix/4991738',
+ 'md5': 'a1fd8e8046de3950fd039304c186c05f',
+ 'info_dict': {
+ 'id': '4991738',
+ 'display_id': 'birds-original-mix',
+ 'ext': 'mp4',
+ 'title': "Tos, Middle Milk, Mumblin' Johnsson - Birds (Original Mix)",
+ }
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ track_id = mobj.group('id')
+ display_id = mobj.group('display_id')
+
+ webpage = self._download_webpage(url, display_id)
+
+ playables = self._parse_json(
+ self._search_regex(
+ r'window\.Playables\s*=\s*({.+?});', webpage,
+ 'playables info', flags=re.DOTALL),
+ track_id)
+
+ track = next(t for t in playables['tracks'] if t['id'] == int(track_id))
+
+ title = ', '.join((a['name'] for a in track['artists'])) + ' - ' + track['name']
+ if track['mix']:
+ title += ' (' + track['mix'] + ')'
+
+ formats = []
+ for ext, info in track['preview'].items():
+ if not info['url']:
+ continue
+ fmt = {
+ 'url': info['url'],
+ 'ext': ext,
+ 'format_id': ext,
+ 'vcodec': 'none',
+ }
+ if ext == 'mp3':
+ fmt['acodec'] = 'mp3'
+ fmt['abr'] = 96
+ fmt['asr'] = 44100
+ elif ext == 'mp4':
+ fmt['acodec'] = 'aac'
+ fmt['abr'] = 96
+ fmt['asr'] = 44100
+ formats.append(fmt)
+
+ images = []
+ for name, info in track['images'].items():
+ image_url = info.get('url')
+ if name == 'dynamic' or not image_url:
+ continue
+ image = {
+ 'id': name,
+ 'url': image_url,
+ 'height': int_or_none(info.get('height')),
+ 'width': int_or_none(info.get('width')),
+ }
+ images.append(image)
+
+ return {
+ 'id': compat_str(track.get('id')) or track_id,
+ 'display_id': track.get('slug') or display_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnails': images,
+ }
diff --git a/yt_dlp/extractor/beeg.py b/yt_dlp/extractor/beeg.py
new file mode 100644
index 0000000..042b322
--- /dev/null
+++ b/yt_dlp/extractor/beeg.py
@@ -0,0 +1,90 @@
+from .common import InfoExtractor
+
+from ..utils import (
+ int_or_none,
+ str_or_none,
+ traverse_obj,
+ try_get,
+ unified_timestamp,
+)
+
+
+class BeegIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?beeg\.(?:com(?:/video)?)/-?(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://beeg.com/-0983946056129650',
+ 'md5': '51d235147c4627cfce884f844293ff88',
+ 'info_dict': {
+ 'id': '0983946056129650',
+ 'ext': 'mp4',
+ 'title': 'sucked cock and fucked in a private plane',
+ 'duration': 927,
+ 'tags': list,
+ 'age_limit': 18,
+ 'upload_date': '20220131',
+ 'timestamp': 1643656455,
+ 'display_id': '2540839',
+ }
+ }, {
+ 'url': 'https://beeg.com/-0599050563103750?t=4-861',
+ 'md5': 'bd8b5ea75134f7f07fad63008db2060e',
+ 'info_dict': {
+ 'id': '0599050563103750',
+ 'ext': 'mp4',
+ 'title': 'Bad Relatives',
+ 'duration': 2060,
+ 'tags': list,
+ 'age_limit': 18,
+ 'description': 'md5:b4fc879a58ae6c604f8f259155b7e3b9',
+ 'timestamp': 1643623200,
+ 'display_id': '2569965',
+ 'upload_date': '20220131',
+ }
+ }, {
+ # api/v6 v2
+ 'url': 'https://beeg.com/1941093077?t=911-1391',
+ 'only_matching': True,
+ }, {
+ # api/v6 v2 w/o t
+ 'url': 'https://beeg.com/1277207756',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ video = self._download_json(
+ 'https://store.externulls.com/facts/file/%s' % video_id,
+ video_id, 'Downloading JSON for %s' % video_id)
+
+ fc_facts = video.get('fc_facts')
+ first_fact = {}
+ for fact in fc_facts:
+ if not first_fact or try_get(fact, lambda x: x['id'] < first_fact['id']):
+ first_fact = fact
+
+ resources = traverse_obj(video, ('file', 'hls_resources')) or first_fact.get('hls_resources')
+
+ formats = []
+ for format_id, video_uri in resources.items():
+ if not video_uri:
+ continue
+ height = int_or_none(self._search_regex(r'fl_cdn_(\d+)', format_id, 'height', default=None))
+ current_formats = self._extract_m3u8_formats(f'https://video.beeg.com/{video_uri}', video_id, ext='mp4', m3u8_id=str(height))
+ for f in current_formats:
+ f['height'] = height
+ formats.extend(current_formats)
+
+ return {
+ 'id': video_id,
+ 'display_id': str_or_none(first_fact.get('id')),
+ 'title': traverse_obj(video, ('file', 'stuff', 'sf_name')),
+ 'description': traverse_obj(video, ('file', 'stuff', 'sf_story')),
+ 'timestamp': unified_timestamp(first_fact.get('fc_created')),
+ 'duration': int_or_none(traverse_obj(video, ('file', 'fl_duration'))),
+ 'tags': traverse_obj(video, ('tags', ..., 'tg_name')),
+ 'formats': formats,
+ 'age_limit': self._rta_search(webpage),
+ }
diff --git a/yt_dlp/extractor/behindkink.py b/yt_dlp/extractor/behindkink.py
new file mode 100644
index 0000000..9d2324f
--- /dev/null
+++ b/yt_dlp/extractor/behindkink.py
@@ -0,0 +1,42 @@
+from .common import InfoExtractor
+from ..utils import url_basename
+
+
+class BehindKinkIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:www\.)?behindkink\.com/(?P<year>[0-9]{4})/(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<id>[^/#?_]+)'
+ _TEST = {
+ 'url': 'http://www.behindkink.com/2014/12/05/what-are-you-passionate-about-marley-blaze/',
+ 'md5': '507b57d8fdcd75a41a9a7bdb7989c762',
+ 'info_dict': {
+ 'id': '37127',
+ 'ext': 'mp4',
+ 'title': 'What are you passionate about – Marley Blaze',
+ 'description': 'md5:aee8e9611b4ff70186f752975d9b94b4',
+ 'upload_date': '20141205',
+ 'thumbnail': 'http://www.behindkink.com/wp-content/uploads/2014/12/blaze-1.jpg',
+ 'age_limit': 18,
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ display_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, display_id)
+
+ video_url = self._search_regex(
+ r'<source src="([^"]+)"', webpage, 'video URL')
+ video_id = url_basename(video_url).split('_')[0]
+ upload_date = mobj.group('year') + mobj.group('month') + mobj.group('day')
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'url': video_url,
+ 'title': self._og_search_title(webpage),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'description': self._og_search_description(webpage),
+ 'upload_date': upload_date,
+ 'age_limit': 18,
+ }
diff --git a/yt_dlp/extractor/bellmedia.py b/yt_dlp/extractor/bellmedia.py
new file mode 100644
index 0000000..677680b
--- /dev/null
+++ b/yt_dlp/extractor/bellmedia.py
@@ -0,0 +1,91 @@
+from .common import InfoExtractor
+
+
+class BellMediaIE(InfoExtractor):
+ _VALID_URL = r'''(?x)https?://(?:www\.)?
+ (?P<domain>
+ (?:
+ ctv|
+ tsn|
+ bnn(?:bloomberg)?|
+ thecomedynetwork|
+ discovery|
+ discoveryvelocity|
+ sciencechannel|
+ investigationdiscovery|
+ animalplanet|
+ bravo|
+ mtv|
+ space|
+ etalk|
+ marilyn
+ )\.ca|
+ (?:much|cp24)\.com
+ )/.*?(?:\b(?:vid(?:eoid)?|clipId)=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})'''
+ _TESTS = [{
+ 'url': 'https://www.bnnbloomberg.ca/video/david-cockfield-s-top-picks~1403070',
+ 'md5': '3e5b8e38370741d5089da79161646635',
+ 'info_dict': {
+ 'id': '1403070',
+ 'ext': 'flv',
+ 'title': 'David Cockfield\'s Top Picks',
+ 'description': 'md5:810f7f8c6a83ad5b48677c3f8e5bb2c3',
+ 'upload_date': '20180525',
+ 'timestamp': 1527288600,
+ 'season_id': '73997',
+ 'season': '2018',
+ 'thumbnail': 'http://images2.9c9media.com/image_asset/2018_5_25_baf30cbd-b28d-4a18-9903-4bb8713b00f5_PNG_956x536.jpg',
+ 'tags': [],
+ 'categories': ['ETFs'],
+ 'season_number': 8,
+ 'duration': 272.038,
+ 'series': 'Market Call Tonight',
+ },
+ }, {
+ 'url': 'http://www.thecomedynetwork.ca/video/player?vid=923582',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.tsn.ca/video/expectations-high-for-milos-raonic-at-us-open~939549',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.bnn.ca/video/berman-s-call-part-two-viewer-questions~939654',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.ctv.ca/YourMorning/Video/S1E6-Monday-August-29-2016-vid938009',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.much.com/shows/atmidnight/episode948007/tuesday-september-13-2016',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.much.com/shows/the-almost-impossible-gameshow/928979/episode-6',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.ctv.ca/DCs-Legends-of-Tomorrow/Video/S2E11-Turncoat-vid1051430',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.etalk.ca/video?videoid=663455',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.cp24.com/video?clipId=1982548',
+ 'only_matching': True,
+ }]
+ _DOMAINS = {
+ 'thecomedynetwork': 'comedy',
+ 'discoveryvelocity': 'discvel',
+ 'sciencechannel': 'discsci',
+ 'investigationdiscovery': 'invdisc',
+ 'animalplanet': 'aniplan',
+ 'etalk': 'ctv',
+ 'bnnbloomberg': 'bnn',
+ 'marilyn': 'ctv_marilyn',
+ }
+
+ def _real_extract(self, url):
+ domain, video_id = self._match_valid_url(url).groups()
+ domain = domain.split('.')[0]
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'url': '9c9media:%s_web:%s' % (self._DOMAINS.get(domain, domain), video_id),
+ 'ie_key': 'NineCNineMedia',
+ }
diff --git a/yt_dlp/extractor/berufetv.py b/yt_dlp/extractor/berufetv.py
new file mode 100644
index 0000000..8160cbd
--- /dev/null
+++ b/yt_dlp/extractor/berufetv.py
@@ -0,0 +1,70 @@
+from .common import InfoExtractor
+from ..utils import float_or_none, mimetype2ext, traverse_obj
+
+
+class BerufeTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?web\.arbeitsagentur\.de/berufetv/[^?#]+/film;filmId=(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://web.arbeitsagentur.de/berufetv/studienberufe/wirtschaftswissenschaften/wirtschaftswissenschaften-volkswirtschaft/film;filmId=DvKC3DUpMKvUZ_6fEnfg3u',
+ 'md5': '041b6432ec8e6838f84a5c30f31cc795',
+ 'info_dict': {
+ 'id': 'DvKC3DUpMKvUZ_6fEnfg3u',
+ 'ext': 'mp4',
+ 'title': 'Volkswirtschaftslehre',
+ 'description': 'md5:6bd87d0c63163480a6489a37526ee1c1',
+ 'categories': ['Studien&shy;beruf'],
+ 'tags': ['Studienfilm'],
+ 'duration': 602.440,
+ 'thumbnail': r're:^https://asset-out-cdn\.video-cdn\.net/private/videos/DvKC3DUpMKvUZ_6fEnfg3u/thumbnails/793063\?quality=thumbnail&__token__=[^\s]+$',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ movie_metadata = self._download_json(
+ 'https://rest.arbeitsagentur.de/infosysbub/berufetv/pc/v1/film-metadata',
+ video_id, 'Downloading JSON metadata',
+ headers={'X-API-Key': '79089773-4892-4386-86e6-e8503669f426'}, fatal=False)
+
+ meta = traverse_obj(
+ movie_metadata, ('metadaten', lambda _, i: video_id == i['miId']),
+ get_all=False, default={})
+
+ video = self._download_json(
+ f'https://d.video-cdn.net/play/player/8YRzUk6pTzmBdrsLe9Y88W/video/{video_id}',
+ video_id, 'Downloading video JSON')
+
+ formats, subtitles = [], {}
+ for key, source in video['videoSources']['html'].items():
+ if key == 'auto':
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(source[0]['source'], video_id)
+ formats += fmts
+ subtitles = subs
+ else:
+ formats.append({
+ 'url': source[0]['source'],
+ 'ext': mimetype2ext(source[0]['mimeType']),
+ 'format_id': key,
+ })
+
+ for track in video.get('videoTracks') or []:
+ if track.get('type') != 'SUBTITLES':
+ continue
+ subtitles.setdefault(track['language'], []).append({
+ 'url': track['source'],
+ 'name': track.get('label'),
+ 'ext': 'vtt'
+ })
+
+ return {
+ 'id': video_id,
+ 'title': meta.get('titel') or traverse_obj(video, ('videoMetaData', 'title')),
+ 'description': meta.get('beschreibung'),
+ 'thumbnail': meta.get('thumbnail') or f'https://asset-out-cdn.video-cdn.net/private/videos/{video_id}/thumbnails/active',
+ 'duration': float_or_none(video.get('duration'), scale=1000),
+ 'categories': [meta['kategorie']] if meta.get('kategorie') else None,
+ 'tags': meta.get('themengebiete'),
+ 'subtitles': subtitles,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/bet.py b/yt_dlp/extractor/bet.py
new file mode 100644
index 0000000..cbf3dd0
--- /dev/null
+++ b/yt_dlp/extractor/bet.py
@@ -0,0 +1,79 @@
+from .mtv import MTVServicesInfoExtractor
+from ..utils import unified_strdate
+
+
+class BetIE(MTVServicesInfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:www\.)?bet\.com/(?:[^/]+/)+(?P<id>.+?)\.html'
+ _TESTS = [
+ {
+ 'url': 'http://www.bet.com/news/politics/2014/12/08/in-bet-exclusive-obama-talks-race-and-racism.html',
+ 'info_dict': {
+ 'id': '07e96bd3-8850-3051-b856-271b457f0ab8',
+ 'display_id': 'in-bet-exclusive-obama-talks-race-and-racism',
+ 'ext': 'flv',
+ 'title': 'A Conversation With President Obama',
+ 'description': 'President Obama urges persistence in confronting racism and bias.',
+ 'duration': 1534,
+ 'upload_date': '20141208',
+ 'thumbnail': r're:(?i)^https?://.*\.jpg$',
+ 'subtitles': {
+ 'en': 'mincount:2',
+ }
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://www.bet.com/video/news/national/2014/justice-for-ferguson-a-community-reacts.html',
+ 'info_dict': {
+ 'id': '9f516bf1-7543-39c4-8076-dd441b459ba9',
+ 'display_id': 'justice-for-ferguson-a-community-reacts',
+ 'ext': 'flv',
+ 'title': 'Justice for Ferguson: A Community Reacts',
+ 'description': 'A BET News special.',
+ 'duration': 1696,
+ 'upload_date': '20141125',
+ 'thumbnail': r're:(?i)^https?://.*\.jpg$',
+ 'subtitles': {
+ 'en': 'mincount:2',
+ }
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ }
+ ]
+
+ _FEED_URL = "http://feeds.mtvnservices.com/od/feed/bet-mrss-player"
+
+ def _get_feed_query(self, uri):
+ return {
+ 'uuid': uri,
+ }
+
+ def _extract_mgid(self, webpage):
+ return self._search_regex(r'data-uri="([^"]+)', webpage, 'mgid')
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+ mgid = self._extract_mgid(webpage)
+ videos_info = self._get_videos_info(mgid)
+
+ info_dict = videos_info['entries'][0]
+
+ upload_date = unified_strdate(self._html_search_meta('date', webpage))
+ description = self._html_search_meta('description', webpage)
+
+ info_dict.update({
+ 'display_id': display_id,
+ 'description': description,
+ 'upload_date': upload_date,
+ })
+
+ return info_dict
diff --git a/yt_dlp/extractor/bfi.py b/yt_dlp/extractor/bfi.py
new file mode 100644
index 0000000..a6ebfed
--- /dev/null
+++ b/yt_dlp/extractor/bfi.py
@@ -0,0 +1,35 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import extract_attributes
+
+
+class BFIPlayerIE(InfoExtractor):
+ _WORKING = False
+ IE_NAME = 'bfi:player'
+ _VALID_URL = r'https?://player\.bfi\.org\.uk/[^/]+/film/watch-(?P<id>[\w-]+)-online'
+ _TEST = {
+ 'url': 'https://player.bfi.org.uk/free/film/watch-computer-doctor-1974-online',
+ 'md5': 'e8783ebd8e061ec4bc6e9501ed547de8',
+ 'info_dict': {
+ 'id': 'htNnhlZjE60C9VySkQEIBtU-cNV1Xx63',
+ 'ext': 'mp4',
+ 'title': 'Computer Doctor',
+ 'description': 'md5:fb6c240d40c4dbe40428bdd62f78203b',
+ },
+ 'skip': 'BFI Player films cannot be played outside of the UK',
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ entries = []
+ for player_el in re.findall(r'(?s)<[^>]+class="player"[^>]*>', webpage):
+ player_attr = extract_attributes(player_el)
+ ooyala_id = player_attr.get('data-video-id')
+ if not ooyala_id:
+ continue
+ entries.append(self.url_result(
+ 'ooyala:' + ooyala_id, 'Ooyala',
+ ooyala_id, player_attr.get('data-label')))
+ return self.playlist_result(entries)
diff --git a/yt_dlp/extractor/bfmtv.py b/yt_dlp/extractor/bfmtv.py
new file mode 100644
index 0000000..c4621ca
--- /dev/null
+++ b/yt_dlp/extractor/bfmtv.py
@@ -0,0 +1,119 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import extract_attributes
+
+
+class BFMTVBaseIE(InfoExtractor):
+ _VALID_URL_BASE = r'https?://(?:www\.|rmc\.)?bfmtv\.com/'
+ _VALID_URL_TMPL = _VALID_URL_BASE + r'(?:[^/]+/)*[^/?&#]+_%s[A-Z]-(?P<id>\d{12})\.html'
+ _VIDEO_BLOCK_REGEX = r'(<div[^>]+class="video_block[^"]*"[^>]*>)'
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
+
+ def _brightcove_url_result(self, video_id, video_block):
+ account_id = video_block.get('accountid') or '876450612001'
+ player_id = video_block.get('playerid') or 'I2qBTln4u'
+ return self.url_result(
+ self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id),
+ 'BrightcoveNew', video_id)
+
+
+class BFMTVIE(BFMTVBaseIE):
+ IE_NAME = 'bfmtv'
+ _VALID_URL = BFMTVBaseIE._VALID_URL_TMPL % 'V'
+ _TESTS = [{
+ 'url': 'https://www.bfmtv.com/politique/emmanuel-macron-l-islam-est-une-religion-qui-vit-une-crise-aujourd-hui-partout-dans-le-monde_VN-202010020146.html',
+ 'info_dict': {
+ 'id': '6196747868001',
+ 'ext': 'mp4',
+ 'title': 'Emmanuel Macron: "L\'Islam est une religion qui vit une crise aujourd’hui, partout dans le monde"',
+ 'description': 'Le Président s\'exprime sur la question du séparatisme depuis les Mureaux, dans les Yvelines.',
+ 'uploader_id': '876450610001',
+ 'upload_date': '20201002',
+ 'timestamp': 1601629620,
+ 'duration': 44.757,
+ 'tags': ['bfmactu', 'politique'],
+ 'thumbnail': 'https://cf-images.eu-west-1.prod.boltdns.net/v1/static/876450610001/5041f4c1-bc48-4af8-a256-1b8300ad8ef0/cf2f9114-e8e2-4494-82b4-ab794ea4bc7d/1920x1080/match/image.jpg',
+ },
+ }]
+
+ def _real_extract(self, url):
+ bfmtv_id = self._match_id(url)
+ webpage = self._download_webpage(url, bfmtv_id)
+ video_block = extract_attributes(self._search_regex(
+ self._VIDEO_BLOCK_REGEX, webpage, 'video block'))
+ return self._brightcove_url_result(video_block['videoid'], video_block)
+
+
+class BFMTVLiveIE(BFMTVIE): # XXX: Do not subclass from concrete IE
+ IE_NAME = 'bfmtv:live'
+ _VALID_URL = BFMTVBaseIE._VALID_URL_BASE + '(?P<id>(?:[^/]+/)?en-direct)'
+ _TESTS = [{
+ 'url': 'https://www.bfmtv.com/en-direct/',
+ 'info_dict': {
+ 'id': '5615950982001',
+ 'ext': 'mp4',
+ 'title': r're:^le direct BFMTV WEB \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
+ 'uploader_id': '876450610001',
+ 'upload_date': '20220926',
+ 'timestamp': 1664207191,
+ 'live_status': 'is_live',
+ 'thumbnail': r're:https://.+/image\.jpg',
+ 'tags': [],
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.bfmtv.com/economie/en-direct/',
+ 'only_matching': True,
+ }]
+
+
+class BFMTVArticleIE(BFMTVBaseIE):
+ IE_NAME = 'bfmtv:article'
+ _VALID_URL = BFMTVBaseIE._VALID_URL_TMPL % 'A'
+ _TESTS = [{
+ 'url': 'https://www.bfmtv.com/sante/covid-19-un-responsable-de-l-institut-pasteur-se-demande-quand-la-france-va-se-reconfiner_AV-202101060198.html',
+ 'info_dict': {
+ 'id': '202101060198',
+ 'title': 'Covid-19: un responsable de l\'Institut Pasteur se demande "quand la France va se reconfiner"',
+ 'description': 'md5:947974089c303d3ac6196670ae262843',
+ },
+ 'playlist_count': 2,
+ }, {
+ 'url': 'https://www.bfmtv.com/international/pour-bolsonaro-le-bresil-est-en-faillite-mais-il-ne-peut-rien-faire_AD-202101060232.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.bfmtv.com/sante/covid-19-oui-le-vaccin-de-pfizer-distribue-en-france-a-bien-ete-teste-sur-des-personnes-agees_AN-202101060275.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://rmc.bfmtv.com/actualites/societe/transports/ce-n-est-plus-tout-rentable-le-bioethanol-e85-depasse-1eu-le-litre-des-automobilistes-regrettent_AV-202301100268.html',
+ 'info_dict': {
+ 'id': '6318445464112',
+ 'ext': 'mp4',
+ 'title': 'Le plein de bioéthanol fait de plus en plus mal à la pompe',
+ 'uploader_id': '876630703001',
+ 'upload_date': '20230110',
+ 'timestamp': 1673341692,
+ 'duration': 109.269,
+ 'tags': ['rmc', 'show', 'apolline de malherbe', 'info', 'talk', 'matinale', 'radio'],
+ 'thumbnail': 'https://cf-images.eu-west-1.prod.boltdns.net/v1/static/876630703001/5bef74b8-9d5e-4480-a21f-60c2e2480c46/96c88b74-f9db-45e1-8040-e199c5da216c/1920x1080/match/image.jpg'
+ }
+ }]
+
+ def _real_extract(self, url):
+ bfmtv_id = self._match_id(url)
+ webpage = self._download_webpage(url, bfmtv_id)
+
+ entries = []
+ for video_block_el in re.findall(self._VIDEO_BLOCK_REGEX, webpage):
+ video_block = extract_attributes(video_block_el)
+ video_id = video_block.get('videoid')
+ if not video_id:
+ continue
+ entries.append(self._brightcove_url_result(video_id, video_block))
+
+ return self.playlist_result(
+ entries, bfmtv_id, self._og_search_title(webpage, fatal=False),
+ self._html_search_meta(['og:description', 'description'], webpage))
diff --git a/yt_dlp/extractor/bibeltv.py b/yt_dlp/extractor/bibeltv.py
new file mode 100644
index 0000000..34464da
--- /dev/null
+++ b/yt_dlp/extractor/bibeltv.py
@@ -0,0 +1,197 @@
+from functools import partial
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ clean_html,
+ determine_ext,
+ format_field,
+ int_or_none,
+ js_to_json,
+ orderedSet,
+ parse_iso8601,
+ traverse_obj,
+ url_or_none,
+)
+
+
+class BibelTVBaseIE(InfoExtractor):
+ _GEO_COUNTRIES = ['AT', 'CH', 'DE']
+ _GEO_BYPASS = False
+
+ API_URL = 'https://www.bibeltv.de/mediathek/api'
+ AUTH_TOKEN = 'j88bRXY8DsEqJ9xmTdWhrByVi5Hm'
+
+ def _extract_formats_and_subtitles(self, data, crn_id, *, is_live=False):
+ formats = []
+ subtitles = {}
+ for media_url in traverse_obj(data, (..., 'src', {url_or_none})):
+ media_ext = determine_ext(media_url)
+ if media_ext == 'm3u8':
+ m3u8_formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
+ media_url, crn_id, live=is_live)
+ formats.extend(m3u8_formats)
+ subtitles.update(m3u8_subs)
+ elif media_ext == 'mpd':
+ mpd_formats, mpd_subs = self._extract_mpd_formats_and_subtitles(media_url, crn_id)
+ formats.extend(mpd_formats)
+ subtitles.update(mpd_subs)
+ elif media_ext == 'mp4':
+ formats.append({'url': media_url})
+ else:
+ self.report_warning(f'Unknown format {media_ext!r}')
+
+ return formats, subtitles
+
+ @staticmethod
+ def _extract_base_info(data):
+ return {
+ 'id': data['crn'],
+ **traverse_obj(data, {
+ 'title': 'title',
+ 'description': 'description',
+ 'duration': ('duration', {partial(int_or_none, scale=1000)}),
+ 'timestamp': ('schedulingStart', {parse_iso8601}),
+ 'season_number': 'seasonNumber',
+ 'episode_number': 'episodeNumber',
+ 'view_count': 'viewCount',
+ 'like_count': 'likeCount',
+ }),
+ 'thumbnails': orderedSet(traverse_obj(data, ('images', ..., {
+ 'url': ('url', {url_or_none}),
+ }))),
+ }
+
+ def _extract_url_info(self, data):
+ return {
+ '_type': 'url',
+ 'url': format_field(data, 'slug', 'https://www.bibeltv.de/mediathek/videos/%s'),
+ **self._extract_base_info(data),
+ }
+
+ def _extract_video_info(self, data):
+ crn_id = data['crn']
+
+ if data.get('drm'):
+ self.report_drm(crn_id)
+
+ json_data = self._download_json(
+ format_field(data, 'id', f'{self.API_URL}/video/%s'), crn_id,
+ headers={'Authorization': self.AUTH_TOKEN}, fatal=False,
+ errnote='No formats available') or {}
+
+ formats, subtitles = self._extract_formats_and_subtitles(
+ traverse_obj(json_data, ('video', 'videoUrls', ...)), crn_id)
+
+ return {
+ '_type': 'video',
+ **self._extract_base_info(data),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+
+class BibelTVVideoIE(BibelTVBaseIE):
+ IE_DESC = 'BibelTV single video'
+ _VALID_URL = r'https?://(?:www\.)?bibeltv\.de/mediathek/videos/(?P<id>\d+)[\w-]+'
+ IE_NAME = 'bibeltv:video'
+
+ _TESTS = [{
+ 'url': 'https://www.bibeltv.de/mediathek/videos/344436-alte-wege',
+ 'md5': 'ec1c07efe54353780512e8a4103b612e',
+ 'info_dict': {
+ 'id': '344436',
+ 'ext': 'mp4',
+ 'title': 'Alte Wege',
+ 'description': 'md5:2f4eb7294c9797a47b8fd13cccca22e9',
+ 'timestamp': 1677877071,
+ 'duration': 150.0,
+ 'upload_date': '20230303',
+ 'thumbnail': r're:https://bibeltv\.imgix\.net/[\w-]+\.jpg',
+ 'episode': 'Episode 1',
+ 'episode_number': 1,
+ 'view_count': int,
+ 'like_count': int,
+ },
+ 'params': {
+ 'format': '6',
+ },
+ }]
+
+ def _real_extract(self, url):
+ crn_id = self._match_id(url)
+ video_data = traverse_obj(
+ self._search_nextjs_data(self._download_webpage(url, crn_id), crn_id),
+ ('props', 'pageProps', 'videoPageData', 'videos', 0, {dict}))
+ if not video_data:
+ raise ExtractorError('Missing video data.')
+
+ return self._extract_video_info(video_data)
+
+
+class BibelTVSeriesIE(BibelTVBaseIE):
+ IE_DESC = 'BibelTV series playlist'
+ _VALID_URL = r'https?://(?:www\.)?bibeltv\.de/mediathek/serien/(?P<id>\d+)[\w-]+'
+ IE_NAME = 'bibeltv:series'
+
+ _TESTS = [{
+ 'url': 'https://www.bibeltv.de/mediathek/serien/333485-ein-wunder-fuer-jeden-tag',
+ 'playlist_mincount': 400,
+ 'info_dict': {
+ 'id': '333485',
+ 'title': 'Ein Wunder für jeden Tag',
+ 'description': 'Tägliche Kurzandacht mit Déborah Rosenkranz.',
+ },
+ }]
+
+ def _real_extract(self, url):
+ crn_id = self._match_id(url)
+ webpage = self._download_webpage(url, crn_id)
+ nextjs_data = self._search_nextjs_data(webpage, crn_id)
+ series_data = traverse_obj(nextjs_data, ('props', 'pageProps', 'seriePageData', {dict}))
+ if not series_data:
+ raise ExtractorError('Missing series data.')
+
+ return self.playlist_result(
+ traverse_obj(series_data, ('videos', ..., {dict}, {self._extract_url_info})),
+ crn_id, series_data.get('title'), clean_html(series_data.get('description')))
+
+
+class BibelTVLiveIE(BibelTVBaseIE):
+ IE_DESC = 'BibelTV live program'
+ _VALID_URL = r'https?://(?:www\.)?bibeltv\.de/livestreams/(?P<id>[\w-]+)'
+ IE_NAME = 'bibeltv:live'
+
+ _TESTS = [{
+ 'url': 'https://www.bibeltv.de/livestreams/bibeltv/',
+ 'info_dict': {
+ 'id': 'bibeltv',
+ 'ext': 'mp4',
+ 'title': 're:Bibel TV',
+ 'live_status': 'is_live',
+ 'thumbnail': 'https://streampreview.bibeltv.de/bibeltv.webp',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://www.bibeltv.de/livestreams/impuls/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ stream_id = self._match_id(url)
+ webpage = self._download_webpage(url, stream_id)
+ stream_data = self._search_json(
+ r'\\"video\\":', webpage, 'bibeltvData', stream_id,
+ transform_source=lambda jstring: js_to_json(jstring.replace('\\"', '"')))
+
+ formats, subtitles = self._extract_formats_and_subtitles(
+ traverse_obj(stream_data, ('src', ...)), stream_id, is_live=True)
+
+ return {
+ 'id': stream_id,
+ 'title': stream_data.get('title'),
+ 'thumbnail': stream_data.get('poster'),
+ 'is_live': True,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
diff --git a/yt_dlp/extractor/bigflix.py b/yt_dlp/extractor/bigflix.py
new file mode 100644
index 0000000..02d1ba0
--- /dev/null
+++ b/yt_dlp/extractor/bigflix.py
@@ -0,0 +1,73 @@
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_b64decode,
+ compat_urllib_parse_unquote,
+)
+
+
+class BigflixIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?bigflix\.com/.+/(?P<id>[0-9]+)'
+ _TESTS = [{
+ # 2 formats
+ 'url': 'http://www.bigflix.com/Tamil-movies/Drama-movies/Madarasapatinam/16070',
+ 'info_dict': {
+ 'id': '16070',
+ 'ext': 'mp4',
+ 'title': 'Madarasapatinam',
+ 'description': 'md5:9f0470b26a4ba8e824c823b5d95c2f6b',
+ 'formats': 'mincount:2',
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }, {
+ # multiple formats
+ 'url': 'http://www.bigflix.com/Malayalam-movies/Drama-movies/Indian-Rupee/15967',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_regex(
+ r'<div[^>]+class=["\']pagetitle["\'][^>]*>(.+?)</div>',
+ webpage, 'title')
+
+ def decode_url(quoted_b64_url):
+ return compat_b64decode(compat_urllib_parse_unquote(
+ quoted_b64_url)).decode('utf-8')
+
+ formats = []
+ for height, encoded_url in re.findall(
+ r'ContentURL_(\d{3,4})[pP][^=]+=([^&]+)', webpage):
+ video_url = decode_url(encoded_url)
+ f = {
+ 'url': video_url,
+ 'format_id': '%sp' % height,
+ 'height': int(height),
+ }
+ if video_url.startswith('rtmp'):
+ f['ext'] = 'flv'
+ formats.append(f)
+
+ file_url = self._search_regex(
+ r'file=([^&]+)', webpage, 'video url', default=None)
+ if file_url:
+ video_url = decode_url(file_url)
+ if all(f['url'] != video_url for f in formats):
+ formats.append({
+ 'url': decode_url(file_url),
+ })
+
+ description = self._html_search_meta('description', webpage)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'formats': formats
+ }
diff --git a/yt_dlp/extractor/bigo.py b/yt_dlp/extractor/bigo.py
new file mode 100644
index 0000000..acf78e4
--- /dev/null
+++ b/yt_dlp/extractor/bigo.py
@@ -0,0 +1,57 @@
+from .common import InfoExtractor
+from ..utils import ExtractorError, urlencode_postdata
+
+
+class BigoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?bigo\.tv/(?:[a-z]{2,}/)?(?P<id>[^/]+)'
+
+ _TESTS = [{
+ 'url': 'https://www.bigo.tv/ja/221338632',
+ 'info_dict': {
+ 'id': '6576287577575737440',
+ 'title': '土よ〜💁‍♂️ 休憩室/REST room',
+ 'thumbnail': r're:https?://.+',
+ 'uploader': '✨Shin💫',
+ 'uploader_id': '221338632',
+ 'is_live': True,
+ },
+ 'skip': 'livestream',
+ }, {
+ 'url': 'https://www.bigo.tv/th/Tarlerm1304',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://bigo.tv/115976881',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ user_id = self._match_id(url)
+
+ info_raw = self._download_json(
+ 'https://ta.bigo.tv/official_website/studio/getInternalStudioInfo',
+ user_id, data=urlencode_postdata({'siteId': user_id}),
+ headers={'Accept': 'application/json'})
+
+ if not isinstance(info_raw, dict):
+ raise ExtractorError('Received invalid JSON data')
+ if info_raw.get('code'):
+ raise ExtractorError(
+ 'Bigo says: %s (code %s)' % (info_raw.get('msg'), info_raw.get('code')), expected=True)
+ info = info_raw.get('data') or {}
+
+ if not info.get('alive'):
+ raise ExtractorError('This user is offline.', expected=True)
+
+ formats, subs = self._extract_m3u8_formats_and_subtitles(
+ info.get('hls_src'), user_id, 'mp4', 'm3u8')
+
+ return {
+ 'id': info.get('roomId') or user_id,
+ 'title': info.get('roomTopic') or info.get('nick_name') or user_id,
+ 'formats': formats,
+ 'subtitles': subs,
+ 'thumbnail': info.get('snapshot'),
+ 'uploader': info.get('nick_name'),
+ 'uploader_id': user_id,
+ 'is_live': True,
+ }
diff --git a/yt_dlp/extractor/bild.py b/yt_dlp/extractor/bild.py
new file mode 100644
index 0000000..eb28932
--- /dev/null
+++ b/yt_dlp/extractor/bild.py
@@ -0,0 +1,63 @@
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ traverse_obj,
+ unescapeHTML,
+)
+
+
+class BildIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?bild\.de/(?:[^/]+/)+(?P<display_id>[^/]+)-(?P<id>\d+)(?:,auto=true)?\.bild\.html'
+ IE_DESC = 'Bild.de'
+ _TESTS = [{
+ 'note': 'static MP4 only',
+ 'url': 'http://www.bild.de/video/clip/apple-ipad-air/das-koennen-die-neuen-ipads-38184146.bild.html',
+ 'md5': 'dd495cbd99f2413502a1713a1156ac8a',
+ 'info_dict': {
+ 'id': '38184146',
+ 'ext': 'mp4',
+ 'title': 'Das können die neuen iPads',
+ 'description': 'md5:a4058c4fa2a804ab59c00d7244bbf62f',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 196,
+ }
+ }, {
+ 'note': 'static MP4 and HLS',
+ 'url': 'https://www.bild.de/video/clip/news-ausland/deftiger-abgang-vom-10m-turm-bademeister-sorgt-fuer-skandal-85158620.bild.html',
+ 'md5': 'fb0ed4f09c495d4ba7ce2eee0bb90de1',
+ 'info_dict': {
+ 'id': '85158620',
+ 'ext': 'mp4',
+ 'title': 'Der Sprungturm-Skandal',
+ 'description': 'md5:709b543c24dc31bbbffee73bccda34ad',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 69,
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video_data = self._download_json(
+ url.split('.bild.html')[0] + ',view=json.bild.html', video_id)
+
+ formats = []
+ for src in traverse_obj(video_data, ('clipList', 0, 'srces', lambda _, v: v['src'])):
+ src_type = src.get('type')
+ if src_type == 'application/x-mpegURL':
+ formats.extend(
+ self._extract_m3u8_formats(
+ src['src'], video_id, 'mp4', m3u8_id='hls', fatal=False))
+ elif src_type == 'video/mp4':
+ formats.append({'url': src['src'], 'format_id': 'http-mp4'})
+ else:
+ self.report_warning(f'Skipping unsupported format type: "{src_type}"')
+
+ return {
+ 'id': video_id,
+ 'title': unescapeHTML(video_data['title']).strip(),
+ 'description': unescapeHTML(video_data.get('description')),
+ 'formats': formats,
+ 'thumbnail': video_data.get('poster'),
+ 'duration': int_or_none(video_data.get('durationSec')),
+ }
diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py
new file mode 100644
index 0000000..fee4b29
--- /dev/null
+++ b/yt_dlp/extractor/bilibili.py
@@ -0,0 +1,2233 @@
+import base64
+import functools
+import hashlib
+import itertools
+import json
+import math
+import re
+import time
+import urllib.parse
+import uuid
+
+from .common import InfoExtractor, SearchInfoExtractor
+from ..dependencies import Cryptodome
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ ExtractorError,
+ GeoRestrictedError,
+ InAdvancePagedList,
+ OnDemandPagedList,
+ bool_or_none,
+ clean_html,
+ determine_ext,
+ filter_dict,
+ float_or_none,
+ format_field,
+ get_element_by_class,
+ int_or_none,
+ join_nonempty,
+ make_archive_id,
+ merge_dicts,
+ mimetype2ext,
+ parse_count,
+ parse_qs,
+ qualities,
+ smuggle_url,
+ srt_subtitles_timecode,
+ str_or_none,
+ traverse_obj,
+ try_call,
+ unified_timestamp,
+ unsmuggle_url,
+ url_or_none,
+ urlencode_postdata,
+ variadic,
+)
+
+
+class BilibiliBaseIE(InfoExtractor):
+ _FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
+
+ def extract_formats(self, play_info):
+ format_names = {
+ r['quality']: traverse_obj(r, 'new_description', 'display_desc')
+ for r in traverse_obj(play_info, ('support_formats', lambda _, v: v['quality']))
+ }
+
+ audios = traverse_obj(play_info, ('dash', (None, 'dolby'), 'audio', ..., {dict}))
+ flac_audio = traverse_obj(play_info, ('dash', 'flac', 'audio'))
+ if flac_audio:
+ audios.append(flac_audio)
+ formats = [{
+ 'url': traverse_obj(audio, 'baseUrl', 'base_url', 'url'),
+ 'ext': mimetype2ext(traverse_obj(audio, 'mimeType', 'mime_type')),
+ 'acodec': traverse_obj(audio, ('codecs', {str.lower})),
+ 'vcodec': 'none',
+ 'tbr': float_or_none(audio.get('bandwidth'), scale=1000),
+ 'filesize': int_or_none(audio.get('size')),
+ 'format_id': str_or_none(audio.get('id')),
+ } for audio in audios]
+
+ formats.extend({
+ 'url': traverse_obj(video, 'baseUrl', 'base_url', 'url'),
+ 'ext': mimetype2ext(traverse_obj(video, 'mimeType', 'mime_type')),
+ 'fps': float_or_none(traverse_obj(video, 'frameRate', 'frame_rate')),
+ 'width': int_or_none(video.get('width')),
+ 'height': int_or_none(video.get('height')),
+ 'vcodec': video.get('codecs'),
+ 'acodec': 'none' if audios else None,
+ 'dynamic_range': {126: 'DV', 125: 'HDR10'}.get(int_or_none(video.get('id'))),
+ 'tbr': float_or_none(video.get('bandwidth'), scale=1000),
+ 'filesize': int_or_none(video.get('size')),
+ 'quality': int_or_none(video.get('id')),
+ 'format_id': traverse_obj(
+ video, (('baseUrl', 'base_url'), {self._FORMAT_ID_RE.search}, 1),
+ ('id', {str_or_none}), get_all=False),
+ 'format': format_names.get(video.get('id')),
+ } for video in traverse_obj(play_info, ('dash', 'video', ...)))
+
+ missing_formats = format_names.keys() - set(traverse_obj(formats, (..., 'quality')))
+ if missing_formats:
+ self.to_screen(f'Format(s) {", ".join(format_names[i] for i in missing_formats)} are missing; '
+ f'you have to login or become premium member to download them. {self._login_hint()}')
+
+ return formats
+
+ def _download_playinfo(self, video_id, cid):
+ return self._download_json(
+ 'https://api.bilibili.com/x/player/playurl', video_id,
+ query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
+ note=f'Downloading video formats for cid {cid}')['data']
+
+ def json2srt(self, json_data):
+ srt_data = ''
+ for idx, line in enumerate(json_data.get('body') or []):
+ srt_data += (f'{idx + 1}\n'
+ f'{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n'
+ f'{line["content"]}\n\n')
+ return srt_data
+
+ def _get_subtitles(self, video_id, cid, aid=None):
+ subtitles = {
+ 'danmaku': [{
+ 'ext': 'xml',
+ 'url': f'https://comment.bilibili.com/{cid}.xml',
+ }]
+ }
+
+ subtitle_info = traverse_obj(self._download_json(
+ 'https://api.bilibili.com/x/player/v2', video_id,
+ query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid},
+ note=f'Extracting subtitle info {cid}'), ('data', 'subtitle'))
+ subs_list = traverse_obj(subtitle_info, ('subtitles', lambda _, v: v['subtitle_url'] and v['lan']))
+ if not subs_list and traverse_obj(subtitle_info, 'allow_submit'):
+ if not self._get_cookies('https://api.bilibili.com').get('SESSDATA'): # no login session cookie
+ self.report_warning(f'CC subtitles (if any) are only visible when logged in. {self._login_hint()}', only_once=True)
+ for s in subs_list:
+ subtitles.setdefault(s['lan'], []).append({
+ 'ext': 'srt',
+ 'data': self.json2srt(self._download_json(s['subtitle_url'], video_id))
+ })
+ return subtitles
+
+ def _get_chapters(self, aid, cid):
+ chapters = aid and cid and self._download_json(
+ 'https://api.bilibili.com/x/player/v2', aid, query={'aid': aid, 'cid': cid},
+ note='Extracting chapters', fatal=False)
+ return traverse_obj(chapters, ('data', 'view_points', ..., {
+ 'title': 'content',
+ 'start_time': 'from',
+ 'end_time': 'to',
+ })) or None
+
+ def _get_comments(self, aid):
+ for idx in itertools.count(1):
+ replies = traverse_obj(
+ self._download_json(
+ f'https://api.bilibili.com/x/v2/reply?pn={idx}&oid={aid}&type=1&jsonp=jsonp&sort=2&_=1567227301685',
+ aid, note=f'Extracting comments from page {idx}', fatal=False),
+ ('data', 'replies'))
+ if not replies:
+ return
+ for children in map(self._get_all_children, replies):
+ yield from children
+
+ def _get_all_children(self, reply):
+ yield {
+ 'author': traverse_obj(reply, ('member', 'uname')),
+ 'author_id': traverse_obj(reply, ('member', 'mid')),
+ 'id': reply.get('rpid'),
+ 'text': traverse_obj(reply, ('content', 'message')),
+ 'timestamp': reply.get('ctime'),
+ 'parent': reply.get('parent') or 'root',
+ }
+ for children in map(self._get_all_children, traverse_obj(reply, ('replies', ...))):
+ yield from children
+
+ def _get_episodes_from_season(self, ss_id, url):
+ season_info = self._download_json(
+ 'https://api.bilibili.com/pgc/web/season/section', ss_id,
+ note='Downloading season info', query={'season_id': ss_id},
+ headers={'Referer': url, **self.geo_verification_headers()})
+
+ for entry in traverse_obj(season_info, (
+ 'result', 'main_section', 'episodes',
+ lambda _, v: url_or_none(v['share_url']) and v['id'])):
+ yield self.url_result(entry['share_url'], BiliBiliBangumiIE, str_or_none(entry.get('id')))
+
+ def _get_divisions(self, video_id, graph_version, edges, edge_id, cid_edges=None):
+ cid_edges = cid_edges or {}
+ division_data = self._download_json(
+ 'https://api.bilibili.com/x/stein/edgeinfo_v2', video_id,
+ query={'graph_version': graph_version, 'edge_id': edge_id, 'bvid': video_id},
+ note=f'Extracting divisions from edge {edge_id}')
+ edges.setdefault(edge_id, {}).update(
+ traverse_obj(division_data, ('data', 'story_list', lambda _, v: v['edge_id'] == edge_id, {
+ 'title': ('title', {str}),
+ 'cid': ('cid', {int_or_none}),
+ }), get_all=False))
+
+ edges[edge_id].update(traverse_obj(division_data, ('data', {
+ 'title': ('title', {str}),
+ 'choices': ('edges', 'questions', ..., 'choices', ..., {
+ 'edge_id': ('id', {int_or_none}),
+ 'cid': ('cid', {int_or_none}),
+ 'text': ('option', {str}),
+ }),
+ })))
+ # use dict to combine edges that use the same video section (same cid)
+ cid_edges.setdefault(edges[edge_id]['cid'], {})[edge_id] = edges[edge_id]
+ for choice in traverse_obj(edges, (edge_id, 'choices', ...)):
+ if choice['edge_id'] not in edges:
+ edges[choice['edge_id']] = {'cid': choice['cid']}
+ self._get_divisions(video_id, graph_version, edges, choice['edge_id'], cid_edges=cid_edges)
+ return cid_edges
+
+ def _get_interactive_entries(self, video_id, cid, metainfo):
+ graph_version = traverse_obj(
+ self._download_json(
+ 'https://api.bilibili.com/x/player/wbi/v2', video_id,
+ 'Extracting graph version', query={'bvid': video_id, 'cid': cid}),
+ ('data', 'interaction', 'graph_version', {int_or_none}))
+ cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1)
+ for cid, edges in cid_edges.items():
+ play_info = self._download_playinfo(video_id, cid)
+ yield {
+ **metainfo,
+ 'id': f'{video_id}_{cid}',
+ 'title': f'{metainfo.get("title")} - {list(edges.values())[0].get("title")}',
+ 'formats': self.extract_formats(play_info),
+ 'description': f'{json.dumps(edges, ensure_ascii=False)}\n{metainfo.get("description", "")}',
+ 'duration': float_or_none(play_info.get('timelength'), scale=1000),
+ 'subtitles': self.extract_subtitles(video_id, cid),
+ }
+
+
+class BiliBiliIE(BilibiliBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
+
+ _TESTS = [{
+ 'url': 'https://www.bilibili.com/video/BV13x41117TL',
+ 'info_dict': {
+ 'id': 'BV13x41117TL',
+ 'title': '阿滴英文|英文歌分享#6 "Closer',
+ 'ext': 'mp4',
+ 'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文',
+ 'uploader_id': '65880958',
+ 'uploader': '阿滴英文',
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+ 'duration': 554.117,
+ 'tags': list,
+ 'comment_count': int,
+ 'upload_date': '20170301',
+ 'timestamp': 1488353834,
+ 'like_count': int,
+ 'view_count': int,
+ },
+ }, {
+ 'note': 'old av URL version',
+ 'url': 'http://www.bilibili.com/video/av1074402/',
+ 'info_dict': {
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
+ 'ext': 'mp4',
+ 'uploader': '菊子桑',
+ 'uploader_id': '156160',
+ 'id': 'BV11x411K7CN',
+ 'title': '【金坷垃】金泡沫',
+ 'duration': 308.36,
+ 'upload_date': '20140420',
+ 'timestamp': 1397983878,
+ 'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
+ 'like_count': int,
+ 'comment_count': int,
+ 'view_count': int,
+ 'tags': list,
+ },
+ 'params': {'skip_download': True},
+ }, {
+ 'note': 'Anthology',
+ 'url': 'https://www.bilibili.com/video/BV1bK411W797',
+ 'info_dict': {
+ 'id': 'BV1bK411W797',
+ 'title': '物语中的人物是如何吐槽自己的OP的'
+ },
+ 'playlist_count': 18,
+ 'playlist': [{
+ 'info_dict': {
+ 'id': 'BV1bK411W797_p1',
+ 'ext': 'mp4',
+ 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
+ 'tags': 'count:10',
+ 'timestamp': 1589601697,
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+ 'uploader': '打牌还是打桩',
+ 'uploader_id': '150259984',
+ 'like_count': int,
+ 'comment_count': int,
+ 'upload_date': '20200516',
+ 'view_count': int,
+ 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
+ 'duration': 90.314,
+ }
+ }]
+ }, {
+ 'note': 'Specific page of Anthology',
+ 'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1',
+ 'info_dict': {
+ 'id': 'BV1bK411W797_p1',
+ 'ext': 'mp4',
+ 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
+ 'tags': 'count:10',
+ 'timestamp': 1589601697,
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+ 'uploader': '打牌还是打桩',
+ 'uploader_id': '150259984',
+ 'like_count': int,
+ 'comment_count': int,
+ 'upload_date': '20200516',
+ 'view_count': int,
+ 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
+ 'duration': 90.314,
+ }
+ }, {
+ 'note': 'video has subtitles',
+ 'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
+ 'info_dict': {
+ 'id': 'BV12N4y1M7rh',
+ 'ext': 'mp4',
+ 'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
+ 'tags': list,
+ 'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
+ 'duration': 313.557,
+ 'upload_date': '20220709',
+ 'uploader': '小夫太渴',
+ 'timestamp': 1657347907,
+ 'uploader_id': '1326814124',
+ 'comment_count': int,
+ 'view_count': int,
+ 'like_count': int,
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+ 'subtitles': 'count:2'
+ },
+ 'params': {'listsubtitles': True},
+ }, {
+ 'url': 'https://www.bilibili.com/video/av8903802/',
+ 'info_dict': {
+ 'id': 'BV13x41117TL',
+ 'ext': 'mp4',
+ 'title': '阿滴英文|英文歌分享#6 "Closer',
+ 'upload_date': '20170301',
+ 'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
+ 'timestamp': 1488353834,
+ 'uploader_id': '65880958',
+ 'uploader': '阿滴英文',
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+ 'duration': 554.117,
+ 'tags': list,
+ 'comment_count': int,
+ 'view_count': int,
+ 'like_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'note': 'video has chapter',
+ 'url': 'https://www.bilibili.com/video/BV1vL411G7N7/',
+ 'info_dict': {
+ 'id': 'BV1vL411G7N7',
+ 'ext': 'mp4',
+ 'title': '如何为你的B站视频添加进度条分段',
+ 'timestamp': 1634554558,
+ 'upload_date': '20211018',
+ 'description': 'md5:a9a3d6702b3a94518d419b2e9c320a6d',
+ 'tags': list,
+ 'uploader': '爱喝咖啡的当麻',
+ 'duration': 669.482,
+ 'uploader_id': '1680903',
+ 'chapters': 'count:6',
+ 'comment_count': int,
+ 'view_count': int,
+ 'like_count': int,
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+ },
+ 'params': {'skip_download': True},
+ }, {
+ 'note': 'video redirects to festival page',
+ 'url': 'https://www.bilibili.com/video/BV1wP4y1P72h',
+ 'info_dict': {
+ 'id': 'BV1wP4y1P72h',
+ 'ext': 'mp4',
+ 'title': '牛虎年相交之际,一首传统民族打击乐《牛斗虎》祝大家新春快乐,虎年大吉!【bilibili音乐虎闹新春】',
+ 'timestamp': 1643947497,
+ 'upload_date': '20220204',
+ 'description': 'md5:8681a0d4d2c06b4ae27e59c8080a7fe6',
+ 'uploader': '叨叨冯聊音乐',
+ 'duration': 246.719,
+ 'uploader_id': '528182630',
+ 'view_count': int,
+ 'like_count': int,
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+ },
+ 'params': {'skip_download': True},
+ }, {
+ 'note': 'newer festival video',
+ 'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
+ 'info_dict': {
+ 'id': 'BV1ay4y1d77f',
+ 'ext': 'mp4',
+ 'title': '【崩坏3新春剧场】为特别的你送上祝福!',
+ 'timestamp': 1674273600,
+ 'upload_date': '20230121',
+ 'description': 'md5:58af66d15c6a0122dc30c8adfd828dd8',
+ 'uploader': '果蝇轰',
+ 'duration': 1111.722,
+ 'uploader_id': '8469526',
+ 'view_count': int,
+ 'like_count': int,
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+ },
+ 'params': {'skip_download': True},
+ }, {
+ 'note': 'interactive/split-path video',
+ 'url': 'https://www.bilibili.com/video/BV1af4y1H7ga/',
+ 'info_dict': {
+ 'id': 'BV1af4y1H7ga',
+ 'title': '【互动游戏】花了大半年时间做的自我介绍~请查收!!',
+ 'timestamp': 1630500414,
+ 'upload_date': '20210901',
+ 'description': 'md5:01113e39ab06e28042d74ac356a08786',
+ 'tags': list,
+ 'uploader': '钉宫妮妮Ninico',
+ 'duration': 1503,
+ 'uploader_id': '8881297',
+ 'comment_count': int,
+ 'view_count': int,
+ 'like_count': int,
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+ },
+ 'playlist_count': 33,
+ 'playlist': [{
+ 'info_dict': {
+ 'id': 'BV1af4y1H7ga_400950101',
+ 'ext': 'mp4',
+ 'title': '【互动游戏】花了大半年时间做的自我介绍~请查收!! - 听见猫猫叫~',
+ 'timestamp': 1630500414,
+ 'upload_date': '20210901',
+ 'description': 'md5:db66ac7a2813a94b8291dbce990cc5b2',
+ 'tags': list,
+ 'uploader': '钉宫妮妮Ninico',
+ 'duration': 11.605,
+ 'uploader_id': '8881297',
+ 'comment_count': int,
+ 'view_count': int,
+ 'like_count': int,
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+ },
+ }],
+ }, {
+ 'note': '301 redirect to bangumi link',
+ 'url': 'https://www.bilibili.com/video/BV1TE411f7f1',
+ 'info_dict': {
+ 'id': '288525',
+ 'title': '李永乐老师 钱学森弹道和乘波体飞行器是什么?',
+ 'ext': 'mp4',
+ 'series': '我和我的祖国',
+ 'series_id': '4780',
+ 'season': '幕后纪实',
+ 'season_id': '28609',
+ 'season_number': 1,
+ 'episode': '钱学森弹道和乘波体飞行器是什么?',
+ 'episode_id': '288525',
+ 'episode_number': 105,
+ 'duration': 1183.957,
+ 'timestamp': 1571648124,
+ 'upload_date': '20191021',
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+ },
+ }, {
+ 'url': 'https://www.bilibili.com/video/BV1jL41167ZG/',
+ 'info_dict': {
+ 'id': 'BV1jL41167ZG',
+ 'title': '一场大火引发的离奇死亡!古典推理经典短篇集《不可能犯罪诊断书》!',
+ 'ext': 'mp4',
+ },
+ 'skip': 'supporter-only video',
+ }, {
+ 'url': 'https://www.bilibili.com/video/BV1Ks411f7aQ/',
+ 'info_dict': {
+ 'id': 'BV1Ks411f7aQ',
+ 'title': '【BD1080P】狼与香辛料I【华盟】',
+ 'ext': 'mp4',
+ },
+ 'skip': 'login required',
+ }, {
+ 'url': 'https://www.bilibili.com/video/BV1GJ411x7h7/',
+ 'info_dict': {
+ 'id': 'BV1GJ411x7h7',
+ 'title': '【官方 MV】Never Gonna Give You Up - Rick Astley',
+ 'ext': 'mp4',
+ },
+ 'skip': 'geo-restricted',
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage, urlh = self._download_webpage_handle(url, video_id)
+ if not self._match_valid_url(urlh.url):
+ return self.url_result(urlh.url)
+
+ initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
+
+ is_festival = 'videoData' not in initial_state
+ if is_festival:
+ video_data = initial_state['videoInfo']
+ else:
+ play_info_obj = self._search_json(
+ r'window\.__playinfo__\s*=', webpage, 'play info', video_id, fatal=False)
+ if not play_info_obj:
+ if traverse_obj(initial_state, ('error', 'trueCode')) == -403:
+ self.raise_login_required()
+ if traverse_obj(initial_state, ('error', 'trueCode')) == -404:
+ raise ExtractorError(
+ 'This video may be deleted or geo-restricted. '
+ 'You might want to try a VPN or a proxy server (with --proxy)', expected=True)
+ play_info = traverse_obj(play_info_obj, ('data', {dict}))
+ if not play_info:
+ if traverse_obj(play_info_obj, 'code') == 87007:
+ toast = get_element_by_class('tips-toast', webpage) or ''
+ msg = clean_html(
+ f'{get_element_by_class("belongs-to", toast) or ""},'
+ + (get_element_by_class('level', toast) or ''))
+ raise ExtractorError(
+ f'This is a supporter-only video: {msg}. {self._login_hint()}', expected=True)
+ raise ExtractorError('Failed to extract play info')
+ video_data = initial_state['videoData']
+
+ video_id, title = video_data['bvid'], video_data.get('title')
+
+ # Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
+ page_list_json = not is_festival and traverse_obj(
+ self._download_json(
+ 'https://api.bilibili.com/x/player/pagelist', video_id,
+ fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'},
+ note='Extracting videos in anthology'),
+ 'data', expected_type=list) or []
+ is_anthology = len(page_list_json) > 1
+
+ part_id = int_or_none(parse_qs(url).get('p', [None])[-1])
+ if is_anthology and not part_id and self._yes_playlist(video_id, video_id):
+ return self.playlist_from_matches(
+ page_list_json, video_id, title, ie=BiliBiliIE,
+ getter=lambda entry: f'https://www.bilibili.com/video/{video_id}?p={entry["page"]}')
+
+ if is_anthology:
+ part_id = part_id or 1
+ title += f' p{part_id:02d} {traverse_obj(page_list_json, (part_id - 1, "part")) or ""}'
+
+ aid = video_data.get('aid')
+ old_video_id = format_field(aid, None, f'%s_part{part_id or 1}')
+
+ cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid')
+
+ festival_info = {}
+ if is_festival:
+ play_info = self._download_playinfo(video_id, cid)
+
+ festival_info = traverse_obj(initial_state, {
+ 'uploader': ('videoInfo', 'upName'),
+ 'uploader_id': ('videoInfo', 'upMid', {str_or_none}),
+ 'like_count': ('videoStatus', 'like', {int_or_none}),
+ 'thumbnail': ('sectionEpisodes', lambda _, v: v['bvid'] == video_id, 'cover'),
+ }, get_all=False)
+
+ metainfo = {
+ **traverse_obj(initial_state, {
+ 'uploader': ('upData', 'name'),
+ 'uploader_id': ('upData', 'mid', {str_or_none}),
+ 'like_count': ('videoData', 'stat', 'like', {int_or_none}),
+ 'tags': ('tags', ..., 'tag_name'),
+ 'thumbnail': ('videoData', 'pic', {url_or_none}),
+ }),
+ **festival_info,
+ **traverse_obj(video_data, {
+ 'description': 'desc',
+ 'timestamp': ('pubdate', {int_or_none}),
+ 'view_count': (('viewCount', ('stat', 'view')), {int_or_none}),
+ 'comment_count': ('stat', 'reply', {int_or_none}),
+ }, get_all=False),
+ 'id': f'{video_id}{format_field(part_id, None, "_p%d")}',
+ '_old_archive_ids': [make_archive_id(self, old_video_id)] if old_video_id else None,
+ 'title': title,
+ 'http_headers': {'Referer': url},
+ }
+
+ is_interactive = traverse_obj(video_data, ('rights', 'is_stein_gate'))
+ if is_interactive:
+ return self.playlist_result(
+ self._get_interactive_entries(video_id, cid, metainfo), **metainfo, **{
+ 'duration': traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})),
+ '__post_extractor': self.extract_comments(aid),
+ })
+ else:
+ return {
+ **metainfo,
+ 'duration': float_or_none(play_info.get('timelength'), scale=1000),
+ 'chapters': self._get_chapters(aid, cid),
+ 'subtitles': self.extract_subtitles(video_id, cid),
+ 'formats': self.extract_formats(play_info),
+ '__post_extractor': self.extract_comments(aid),
+ }
+
+
+class BiliBiliBangumiIE(BilibiliBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/play/ep(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'https://www.bilibili.com/bangumi/play/ep21495/',
+ 'info_dict': {
+ 'id': '21495',
+ 'ext': 'mp4',
+ 'series': '悠久之翼',
+ 'series_id': '774',
+ 'season': '第二季',
+ 'season_id': '1182',
+ 'season_number': 2,
+ 'episode': 'forever/ef',
+ 'episode_id': '21495',
+ 'episode_number': 12,
+ 'title': '12 forever/ef',
+ 'duration': 1420.791,
+ 'timestamp': 1320412200,
+ 'upload_date': '20111104',
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+ },
+ }, {
+ 'url': 'https://www.bilibili.com/bangumi/play/ep267851',
+ 'info_dict': {
+ 'id': '267851',
+ 'ext': 'mp4',
+ 'series': '鬼灭之刃',
+ 'series_id': '4358',
+ 'season': '立志篇',
+ 'season_id': '26801',
+ 'season_number': 1,
+ 'episode': '残酷',
+ 'episode_id': '267851',
+ 'episode_number': 1,
+ 'title': '1 残酷',
+ 'duration': 1425.256,
+ 'timestamp': 1554566400,
+ 'upload_date': '20190406',
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
+ },
+ 'skip': 'Geo-restricted',
+ }, {
+ 'note': 'a making-of which falls outside main section',
+ 'url': 'https://www.bilibili.com/bangumi/play/ep345120',
+ 'info_dict': {
+ 'id': '345120',
+ 'ext': 'mp4',
+ 'series': '鬼灭之刃',
+ 'series_id': '4358',
+ 'season': '立志篇',
+ 'season_id': '26801',
+ 'season_number': 1,
+ 'episode': '炭治郎篇',
+ 'episode_id': '345120',
+ 'episode_number': 27,
+ 'title': '#1 炭治郎篇',
+ 'duration': 1922.129,
+ 'timestamp': 1602853860,
+ 'upload_date': '20201016',
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
+ },
+ }]
+
+ def _real_extract(self, url):
+ episode_id = self._match_id(url)
+ webpage = self._download_webpage(url, episode_id)
+
+ if '您所在的地区无法观看本片' in webpage:
+ raise GeoRestrictedError('This video is restricted')
+ elif '正在观看预览,大会员免费看全片' in webpage:
+ self.raise_login_required('This video is for premium members only')
+
+ headers = {'Referer': url, **self.geo_verification_headers()}
+ play_info = self._download_json(
+ 'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id,
+ 'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
+ headers=headers)
+ premium_only = play_info.get('code') == -10403
+ play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {}
+
+ formats = self.extract_formats(play_info)
+ if not formats and (premium_only or '成为大会员抢先看' in webpage or '开通大会员观看' in webpage):
+ self.raise_login_required('This video is for premium members only')
+
+ bangumi_info = self._download_json(
+ 'https://api.bilibili.com/pgc/view/web/season', episode_id, 'Get episode details',
+ query={'ep_id': episode_id}, headers=headers)['result']
+
+ episode_number, episode_info = next((
+ (idx, ep) for idx, ep in enumerate(traverse_obj(
+ bangumi_info, (('episodes', ('section', ..., 'episodes')), ..., {dict})), 1)
+ if str_or_none(ep.get('id')) == episode_id), (1, {}))
+
+ season_id = bangumi_info.get('season_id')
+ season_number, season_title = season_id and next((
+ (idx + 1, e.get('season_title')) for idx, e in enumerate(
+ traverse_obj(bangumi_info, ('seasons', ...)))
+ if e.get('season_id') == season_id
+ ), (None, None))
+
+ aid = episode_info.get('aid')
+
+ return {
+ 'id': episode_id,
+ 'formats': formats,
+ **traverse_obj(bangumi_info, {
+ 'series': ('series', 'series_title', {str}),
+ 'series_id': ('series', 'series_id', {str_or_none}),
+ 'thumbnail': ('square_cover', {url_or_none}),
+ }),
+ **traverse_obj(episode_info, {
+ 'episode': ('long_title', {str}),
+ 'episode_number': ('title', {int_or_none}, {lambda x: x or episode_number}),
+ 'timestamp': ('pub_time', {int_or_none}),
+ 'title': {lambda v: v and join_nonempty('title', 'long_title', delim=' ', from_dict=v)},
+ }),
+ 'episode_id': episode_id,
+ 'season': str_or_none(season_title),
+ 'season_id': str_or_none(season_id),
+ 'season_number': season_number,
+ 'duration': float_or_none(play_info.get('timelength'), scale=1000),
+ 'subtitles': self.extract_subtitles(episode_id, episode_info.get('cid'), aid=aid),
+ '__post_extractor': self.extract_comments(aid),
+ 'http_headers': headers,
+ }
+
+
+class BiliBiliBangumiMediaIE(BilibiliBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/media/md(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.bilibili.com/bangumi/media/md24097891',
+ 'info_dict': {
+ 'id': '24097891',
+ 'title': 'CAROLE & TUESDAY',
+ 'description': 'md5:42417ad33d1eaa1c93bfd2dd1626b829',
+ },
+ 'playlist_mincount': 25,
+ }, {
+ 'url': 'https://www.bilibili.com/bangumi/media/md1565/',
+ 'info_dict': {
+ 'id': '1565',
+ 'title': '攻壳机动队 S.A.C. 2nd GIG',
+ 'description': 'md5:46cac00bafd645b97f4d6df616fc576d',
+ },
+ 'playlist_count': 26,
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '68540',
+ 'ext': 'mp4',
+ 'series': '攻壳机动队',
+ 'series_id': '1077',
+ 'season': '第二季',
+ 'season_id': '1565',
+ 'season_number': 2,
+ 'episode': '再启动 REEMBODY',
+ 'episode_id': '68540',
+ 'episode_number': 1,
+ 'title': '1 再启动 REEMBODY',
+ 'duration': 1525.777,
+ 'timestamp': 1425074413,
+ 'upload_date': '20150227',
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
+ },
+ }],
+ }]
+
+ def _real_extract(self, url):
+ media_id = self._match_id(url)
+ webpage = self._download_webpage(url, media_id)
+
+ initial_state = self._search_json(
+ r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)
+ ss_id = initial_state['mediaInfo']['season_id']
+
+ return self.playlist_result(
+ self._get_episodes_from_season(ss_id, url), media_id,
+ **traverse_obj(initial_state, ('mediaInfo', {
+ 'title': ('title', {str}),
+ 'description': ('evaluate', {str}),
+ })))
+
+
+class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
+ _VALID_URL = r'(?x)https?://(?:www\.)?bilibili\.com/bangumi/play/ss(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.bilibili.com/bangumi/play/ss26801',
+ 'info_dict': {
+ 'id': '26801',
+ 'title': '鬼灭之刃',
+ 'description': 'md5:e2cc9848b6f69be6db79fc2a82d9661b',
+ },
+ 'playlist_mincount': 26
+ }, {
+ 'url': 'https://www.bilibili.com/bangumi/play/ss2251',
+ 'info_dict': {
+ 'id': '2251',
+ 'title': '玲音',
+ 'description': 'md5:1fd40e3df4c08d4d9d89a6a34844bdc4',
+ },
+ 'playlist_count': 13,
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '50188',
+ 'ext': 'mp4',
+ 'series': '玲音',
+ 'series_id': '1526',
+ 'season': 'TV',
+ 'season_id': '2251',
+ 'season_number': 1,
+ 'episode': 'WEIRD',
+ 'episode_id': '50188',
+ 'episode_number': 1,
+ 'title': '1 WEIRD',
+ 'duration': 1436.992,
+ 'timestamp': 1343185080,
+ 'upload_date': '20120725',
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
+ },
+ }],
+ }]
+
+ def _real_extract(self, url):
+ ss_id = self._match_id(url)
+ webpage = self._download_webpage(url, ss_id)
+ metainfo = traverse_obj(
+ self._search_json(r'<script[^>]+type="application/ld\+json"[^>]*>', webpage, 'info', ss_id),
+ ('itemListElement', ..., {
+ 'title': ('name', {str}),
+ 'description': ('description', {str}),
+ }), get_all=False)
+
+ return self.playlist_result(self._get_episodes_from_season(ss_id, url), ss_id, **metainfo)
+
+
+class BilibiliCheeseBaseIE(BilibiliBaseIE):
+ _HEADERS = {'Referer': 'https://www.bilibili.com/'}
+
+ def _extract_episode(self, season_info, ep_id):
+ episode_info = traverse_obj(season_info, (
+ 'episodes', lambda _, v: v['id'] == int(ep_id)), get_all=False)
+ aid, cid = episode_info['aid'], episode_info['cid']
+
+ if traverse_obj(episode_info, 'ep_status') == -1:
+ raise ExtractorError('This course episode is not yet available.', expected=True)
+ if not traverse_obj(episode_info, 'playable'):
+ self.raise_login_required('You need to purchase the course to download this episode')
+
+ play_info = self._download_json(
+ 'https://api.bilibili.com/pugv/player/web/playurl', ep_id,
+ query={'avid': aid, 'cid': cid, 'ep_id': ep_id, 'fnval': 16, 'fourk': 1},
+ headers=self._HEADERS, note='Downloading playinfo')['data']
+
+ return {
+ 'id': str_or_none(ep_id),
+ 'episode_id': str_or_none(ep_id),
+ 'formats': self.extract_formats(play_info),
+ 'extractor_key': BilibiliCheeseIE.ie_key(),
+ 'extractor': BilibiliCheeseIE.IE_NAME,
+ 'webpage_url': f'https://www.bilibili.com/cheese/play/ep{ep_id}',
+ **traverse_obj(episode_info, {
+ 'episode': ('title', {str}),
+ 'title': {lambda v: v and join_nonempty('index', 'title', delim=' - ', from_dict=v)},
+ 'alt_title': ('subtitle', {str}),
+ 'duration': ('duration', {int_or_none}),
+ 'episode_number': ('index', {int_or_none}),
+ 'thumbnail': ('cover', {url_or_none}),
+ 'timestamp': ('release_date', {int_or_none}),
+ 'view_count': ('play', {int_or_none}),
+ }),
+ **traverse_obj(season_info, {
+ 'uploader': ('up_info', 'uname', {str}),
+ 'uploader_id': ('up_info', 'mid', {str_or_none}),
+ }),
+ 'subtitles': self.extract_subtitles(ep_id, cid, aid=aid),
+ '__post_extractor': self.extract_comments(aid),
+ 'http_headers': self._HEADERS,
+ }
+
+ def _download_season_info(self, query_key, video_id):
+ return self._download_json(
+ f'https://api.bilibili.com/pugv/view/web/season?{query_key}={video_id}', video_id,
+ headers=self._HEADERS, note='Downloading season info')['data']
+
+
+class BilibiliCheeseIE(BilibiliCheeseBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?bilibili\.com/cheese/play/ep(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.bilibili.com/cheese/play/ep229832',
+ 'info_dict': {
+ 'id': '229832',
+ 'ext': 'mp4',
+ 'title': '1 - 课程先导片',
+ 'alt_title': '视频课 · 3分41秒',
+ 'uploader': '马督工',
+ 'uploader_id': '316568752',
+ 'episode': '课程先导片',
+ 'episode_id': '229832',
+ 'episode_number': 1,
+ 'duration': 221,
+ 'timestamp': 1695549606,
+ 'upload_date': '20230924',
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+ 'view_count': int,
+ }
+ }]
+
+ def _real_extract(self, url):
+ ep_id = self._match_id(url)
+ return self._extract_episode(self._download_season_info('ep_id', ep_id), ep_id)
+
+
+class BilibiliCheeseSeasonIE(BilibiliCheeseBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?bilibili\.com/cheese/play/ss(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.bilibili.com/cheese/play/ss5918',
+ 'info_dict': {
+ 'id': '5918',
+ 'title': '【限时五折】新闻系学不到:马督工教你做自媒体',
+ 'description': '帮普通人建立世界模型,降低人与人的沟通门槛',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '229832',
+ 'ext': 'mp4',
+ 'title': '1 - 课程先导片',
+ 'alt_title': '视频课 · 3分41秒',
+ 'uploader': '马督工',
+ 'uploader_id': '316568752',
+ 'episode': '课程先导片',
+ 'episode_id': '229832',
+ 'episode_number': 1,
+ 'duration': 221,
+ 'timestamp': 1695549606,
+ 'upload_date': '20230924',
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+ 'view_count': int,
+ }
+ }],
+ 'params': {'playlist_items': '1'},
+ }, {
+ 'url': 'https://www.bilibili.com/cheese/play/ss5918',
+ 'info_dict': {
+ 'id': '5918',
+ 'title': '【限时五折】新闻系学不到:马督工教你做自媒体',
+ 'description': '帮普通人建立世界模型,降低人与人的沟通门槛',
+ },
+ 'playlist_mincount': 5,
+ 'skip': 'paid video in list',
+ }]
+
+ def _get_cheese_entries(self, season_info):
+ for ep_id in traverse_obj(season_info, ('episodes', lambda _, v: v['episode_can_view'], 'id')):
+ yield self._extract_episode(season_info, ep_id)
+
+ def _real_extract(self, url):
+ season_id = self._match_id(url)
+ season_info = self._download_season_info('season_id', season_id)
+
+ return self.playlist_result(
+ self._get_cheese_entries(season_info), season_id,
+ **traverse_obj(season_info, {
+ 'title': ('title', {str}),
+ 'description': ('subtitle', {str}),
+ }))
+
+
+class BilibiliSpaceBaseIE(InfoExtractor):
+ def _extract_playlist(self, fetch_page, get_metadata, get_entries):
+ first_page = fetch_page(0)
+ metadata = get_metadata(first_page)
+
+ paged_list = InAdvancePagedList(
+ lambda idx: get_entries(fetch_page(idx) if idx else first_page),
+ metadata['page_count'], metadata['page_size'])
+
+ return metadata, paged_list
+
+
+class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
+ _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)(?P<video>/video)?/?(?:[?#]|$)'
+ _TESTS = [{
+ 'url': 'https://space.bilibili.com/3985676/video',
+ 'info_dict': {
+ 'id': '3985676',
+ },
+ 'playlist_mincount': 178,
+ }, {
+ 'url': 'https://space.bilibili.com/313580179/video',
+ 'info_dict': {
+ 'id': '313580179',
+ },
+ 'playlist_mincount': 92,
+ }]
+
+ def _extract_signature(self, playlist_id):
+ session_data = self._download_json('https://api.bilibili.com/x/web-interface/nav', playlist_id, fatal=False)
+
+ key_from_url = lambda x: x[x.rfind('/') + 1:].split('.')[0]
+ img_key = traverse_obj(
+ session_data, ('data', 'wbi_img', 'img_url', {key_from_url})) or '34478ba821254d9d93542680e3b86100'
+ sub_key = traverse_obj(
+ session_data, ('data', 'wbi_img', 'sub_url', {key_from_url})) or '7e16a90d190a4355a78fd00b32a38de6'
+
+ session_key = img_key + sub_key
+
+ signature_values = []
+ for position in (
+ 46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39,
+ 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63,
+ 57, 62, 11, 36, 20, 34, 44, 52
+ ):
+ char_at_position = try_call(lambda: session_key[position])
+ if char_at_position:
+ signature_values.append(char_at_position)
+
+ return ''.join(signature_values)[:32]
+
+ def _real_extract(self, url):
+ playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video')
+ if not is_video_url:
+ self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
+ 'To download audios, add a "/audio" to the URL')
+
+ signature = self._extract_signature(playlist_id)
+
+ def fetch_page(page_idx):
+ query = {
+ 'keyword': '',
+ 'mid': playlist_id,
+ 'order': 'pubdate',
+ 'order_avoided': 'true',
+ 'platform': 'web',
+ 'pn': page_idx + 1,
+ 'ps': 30,
+ 'tid': 0,
+ 'web_location': 1550101,
+ 'wts': int(time.time()),
+ }
+ query['w_rid'] = hashlib.md5(f'{urllib.parse.urlencode(query)}{signature}'.encode()).hexdigest()
+
+ try:
+ response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search',
+ playlist_id, note=f'Downloading page {page_idx}', query=query)
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 412:
+ raise ExtractorError(
+ 'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
+ raise
+ if response['code'] == -401:
+ raise ExtractorError(
+ 'Request is blocked by server (401), please add cookies, wait and try later.', expected=True)
+ return response['data']
+
+ def get_metadata(page_data):
+ page_size = page_data['page']['ps']
+ entry_count = page_data['page']['count']
+ return {
+ 'page_count': math.ceil(entry_count / page_size),
+ 'page_size': page_size,
+ }
+
+ def get_entries(page_data):
+ for entry in traverse_obj(page_data, ('list', 'vlist')) or []:
+ yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}', BiliBiliIE, entry['bvid'])
+
+ metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
+ return self.playlist_result(paged_list, playlist_id)
+
+
+class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE):
+ _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)/audio'
+ _TESTS = [{
+ 'url': 'https://space.bilibili.com/313580179/audio',
+ 'info_dict': {
+ 'id': '313580179',
+ },
+ 'playlist_mincount': 1,
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ def fetch_page(page_idx):
+ return self._download_json(
+ 'https://api.bilibili.com/audio/music-service/web/song/upper', playlist_id,
+ note=f'Downloading page {page_idx}',
+ query={'uid': playlist_id, 'pn': page_idx + 1, 'ps': 30, 'order': 1, 'jsonp': 'jsonp'})['data']
+
+ def get_metadata(page_data):
+ return {
+ 'page_count': page_data['pageCount'],
+ 'page_size': page_data['pageSize'],
+ }
+
+ def get_entries(page_data):
+ for entry in page_data.get('data', []):
+ yield self.url_result(f'https://www.bilibili.com/audio/au{entry["id"]}', BilibiliAudioIE, entry['id'])
+
+ metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
+ return self.playlist_result(paged_list, playlist_id)
+
+
+class BilibiliSpaceListBaseIE(BilibiliSpaceBaseIE):
+ def _get_entries(self, page_data, bvid_keys, ending_key='bvid'):
+ for bvid in traverse_obj(page_data, (*variadic(bvid_keys, (str, bytes, dict, set)), ..., ending_key, {str})):
+ yield self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE, bvid)
+
+ def _get_uploader(self, uid, playlist_id):
+ webpage = self._download_webpage(f'https://space.bilibili.com/{uid}', playlist_id, fatal=False)
+ return self._search_regex(r'(?s)<title\b[^>]*>([^<]+)的个人空间-', webpage, 'uploader', fatal=False)
+
+ def _extract_playlist(self, fetch_page, get_metadata, get_entries):
+ metadata, page_list = super()._extract_playlist(fetch_page, get_metadata, get_entries)
+ metadata.pop('page_count', None)
+ metadata.pop('page_size', None)
+ return metadata, page_list
+
+
+class BilibiliCollectionListIE(BilibiliSpaceListBaseIE):
+ _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail/?\?sid=(?P<sid>\d+)'
+ _TESTS = [{
+ 'url': 'https://space.bilibili.com/2142762/channel/collectiondetail?sid=57445',
+ 'info_dict': {
+ 'id': '2142762_57445',
+ 'title': '【完结】《底特律 变人》全结局流程解说',
+ 'description': '',
+ 'uploader': '老戴在此',
+ 'uploader_id': '2142762',
+ 'timestamp': int,
+ 'upload_date': str,
+ 'thumbnail': 'https://archive.biliimg.com/bfs/archive/e0e543ae35ad3df863ea7dea526bc32e70f4c091.jpg',
+ },
+ 'playlist_mincount': 31,
+ }]
+
+ def _real_extract(self, url):
+ mid, sid = self._match_valid_url(url).group('mid', 'sid')
+ playlist_id = f'{mid}_{sid}'
+
+ def fetch_page(page_idx):
+ return self._download_json(
+ 'https://api.bilibili.com/x/polymer/space/seasons_archives_list',
+ playlist_id, note=f'Downloading page {page_idx}',
+ query={'mid': mid, 'season_id': sid, 'page_num': page_idx + 1, 'page_size': 30})['data']
+
+ def get_metadata(page_data):
+ page_size = page_data['page']['page_size']
+ entry_count = page_data['page']['total']
+ return {
+ 'page_count': math.ceil(entry_count / page_size),
+ 'page_size': page_size,
+ 'uploader': self._get_uploader(mid, playlist_id),
+ **traverse_obj(page_data, {
+ 'title': ('meta', 'name', {str}),
+ 'description': ('meta', 'description', {str}),
+ 'uploader_id': ('meta', 'mid', {str_or_none}),
+ 'timestamp': ('meta', 'ptime', {int_or_none}),
+ 'thumbnail': ('meta', 'cover', {url_or_none}),
+ })
+ }
+
+ def get_entries(page_data):
+ return self._get_entries(page_data, 'archives')
+
+ metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
+ return self.playlist_result(paged_list, playlist_id, **metadata)
+
+
+class BilibiliSeriesListIE(BilibiliSpaceListBaseIE):
+ _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/seriesdetail/?\?\bsid=(?P<sid>\d+)'
+ _TESTS = [{
+ 'url': 'https://space.bilibili.com/1958703906/channel/seriesdetail?sid=547718&ctype=0',
+ 'info_dict': {
+ 'id': '1958703906_547718',
+ 'title': '直播回放',
+ 'description': '直播回放',
+ 'uploader': '靡烟miya',
+ 'uploader_id': '1958703906',
+ 'timestamp': 1637985853,
+ 'upload_date': '20211127',
+ 'modified_timestamp': int,
+ 'modified_date': str,
+ },
+ 'playlist_mincount': 513,
+ }]
+
+ def _real_extract(self, url):
+ mid, sid = self._match_valid_url(url).group('mid', 'sid')
+ playlist_id = f'{mid}_{sid}'
+ playlist_meta = traverse_obj(self._download_json(
+ f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False
+ ), {
+ 'title': ('data', 'meta', 'name', {str}),
+ 'description': ('data', 'meta', 'description', {str}),
+ 'uploader_id': ('data', 'meta', 'mid', {str_or_none}),
+ 'timestamp': ('data', 'meta', 'ctime', {int_or_none}),
+ 'modified_timestamp': ('data', 'meta', 'mtime', {int_or_none}),
+ })
+
+ def fetch_page(page_idx):
+ return self._download_json(
+ 'https://api.bilibili.com/x/series/archives',
+ playlist_id, note=f'Downloading page {page_idx}',
+ query={'mid': mid, 'series_id': sid, 'pn': page_idx + 1, 'ps': 30})['data']
+
+ def get_metadata(page_data):
+ page_size = page_data['page']['size']
+ entry_count = page_data['page']['total']
+ return {
+ 'page_count': math.ceil(entry_count / page_size),
+ 'page_size': page_size,
+ 'uploader': self._get_uploader(mid, playlist_id),
+ **playlist_meta
+ }
+
+ def get_entries(page_data):
+ return self._get_entries(page_data, 'archives')
+
+ metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
+ return self.playlist_result(paged_list, playlist_id, **metadata)
+
+
+class BilibiliFavoritesListIE(BilibiliSpaceListBaseIE):
+ _VALID_URL = r'https?://(?:space\.bilibili\.com/\d+/favlist/?\?fid=|(?:www\.)?bilibili\.com/medialist/detail/ml)(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://space.bilibili.com/84912/favlist?fid=1103407912&ftype=create',
+ 'info_dict': {
+ 'id': '1103407912',
+ 'title': '【V2】(旧)',
+ 'description': '',
+ 'uploader': '晓月春日',
+ 'uploader_id': '84912',
+ 'timestamp': 1604905176,
+ 'upload_date': '20201109',
+ 'modified_timestamp': int,
+ 'modified_date': str,
+ 'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
+ 'view_count': int,
+ 'like_count': int,
+ },
+ 'playlist_mincount': 22,
+ }, {
+ 'url': 'https://www.bilibili.com/medialist/detail/ml1103407912',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ fid = self._match_id(url)
+
+ list_info = self._download_json(
+ f'https://api.bilibili.com/x/v3/fav/resource/list?media_id={fid}&pn=1&ps=20',
+ fid, note='Downloading favlist metadata')
+ if list_info['code'] == -403:
+ self.raise_login_required(msg='This is a private favorites list. You need to log in as its owner')
+
+ entries = self._get_entries(self._download_json(
+ f'https://api.bilibili.com/x/v3/fav/resource/ids?media_id={fid}',
+ fid, note='Download favlist entries'), 'data')
+
+ return self.playlist_result(entries, fid, **traverse_obj(list_info, ('data', 'info', {
+ 'title': ('title', {str}),
+ 'description': ('intro', {str}),
+ 'uploader': ('upper', 'name', {str}),
+ 'uploader_id': ('upper', 'mid', {str_or_none}),
+ 'timestamp': ('ctime', {int_or_none}),
+ 'modified_timestamp': ('mtime', {int_or_none}),
+ 'thumbnail': ('cover', {url_or_none}),
+ 'view_count': ('cnt_info', 'play', {int_or_none}),
+ 'like_count': ('cnt_info', 'thumb_up', {int_or_none}),
+ })))
+
+
+class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
+ _TESTS = [{
+ 'url': 'https://www.bilibili.com/watchlater/#/list',
+ 'info_dict': {'id': 'watchlater'},
+ 'playlist_mincount': 0,
+ 'skip': 'login required',
+ }]
+
+ def _real_extract(self, url):
+ list_id = getattr(self._get_cookies(url).get('DedeUserID'), 'value', 'watchlater')
+ watchlater_info = self._download_json(
+ 'https://api.bilibili.com/x/v2/history/toview/web?jsonp=jsonp', list_id)
+ if watchlater_info['code'] == -101:
+ self.raise_login_required(msg='You need to login to access your watchlater list')
+ entries = self._get_entries(watchlater_info, ('data', 'list'))
+ return self.playlist_result(entries, id=list_id, title='稍后再看')
+
+
+class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:medialist/play|list)/(?P<id>\w+)'
+ _TESTS = [{
+ 'url': 'https://www.bilibili.com/list/1958703906?sid=547718',
+ 'info_dict': {
+ 'id': '5_547718',
+ 'title': '直播回放',
+ 'uploader': '靡烟miya',
+ 'uploader_id': '1958703906',
+ 'timestamp': 1637985853,
+ 'upload_date': '20211127',
+ },
+ 'playlist_mincount': 513,
+ }, {
+ 'url': 'https://www.bilibili.com/list/1958703906?sid=547718&oid=687146339&bvid=BV1DU4y1r7tz',
+ 'info_dict': {
+ 'id': 'BV1DU4y1r7tz',
+ 'ext': 'mp4',
+ 'title': '【直播回放】8.20晚9:30 3d发布喵 2022年8月20日21点场',
+ 'upload_date': '20220820',
+ 'description': '',
+ 'timestamp': 1661016330,
+ 'uploader_id': '1958703906',
+ 'uploader': '靡烟miya',
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+ 'duration': 9552.903,
+ 'tags': list,
+ 'comment_count': int,
+ 'view_count': int,
+ 'like_count': int,
+ '_old_archive_ids': ['bilibili 687146339_part1'],
+ },
+ 'params': {'noplaylist': True},
+ }, {
+ 'url': 'https://www.bilibili.com/medialist/play/1958703906?business=space_series&business_id=547718&desc=1',
+ 'info_dict': {
+ 'id': '5_547718',
+ },
+ 'playlist_mincount': 513,
+ 'skip': 'redirect url',
+ }, {
+ 'url': 'https://www.bilibili.com/list/ml1103407912',
+ 'info_dict': {
+ 'id': '3_1103407912',
+ 'title': '【V2】(旧)',
+ 'uploader': '晓月春日',
+ 'uploader_id': '84912',
+ 'timestamp': 1604905176,
+ 'upload_date': '20201109',
+ 'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
+ },
+ 'playlist_mincount': 22,
+ }, {
+ 'url': 'https://www.bilibili.com/medialist/play/ml1103407912',
+ 'info_dict': {
+ 'id': '3_1103407912',
+ },
+ 'playlist_mincount': 22,
+ 'skip': 'redirect url',
+ }, {
+ 'url': 'https://www.bilibili.com/list/watchlater',
+ 'info_dict': {'id': 'watchlater'},
+ 'playlist_mincount': 0,
+ 'skip': 'login required',
+ }, {
+ 'url': 'https://www.bilibili.com/medialist/play/watchlater',
+ 'info_dict': {'id': 'watchlater'},
+ 'playlist_mincount': 0,
+ 'skip': 'login required',
+ }]
+
+ def _extract_medialist(self, query, list_id):
+ for page_num in itertools.count(1):
+ page_data = self._download_json(
+ 'https://api.bilibili.com/x/v2/medialist/resource/list',
+ list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}'
+ )['data']
+ yield from self._get_entries(page_data, 'media_list', ending_key='bv_id')
+ query['oid'] = traverse_obj(page_data, ('media_list', -1, 'id'))
+ if not page_data.get('has_more', False):
+ break
+
+ def _real_extract(self, url):
+ list_id = self._match_id(url)
+
+ bvid = traverse_obj(parse_qs(url), ('bvid', 0))
+ if not self._yes_playlist(list_id, bvid):
+ return self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE)
+
+ webpage = self._download_webpage(url, list_id)
+ initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', list_id)
+ if traverse_obj(initial_state, ('error', 'code', {int_or_none})) != 200:
+ error_code = traverse_obj(initial_state, ('error', 'trueCode', {int_or_none}))
+ error_message = traverse_obj(initial_state, ('error', 'message', {str_or_none}))
+ if error_code == -400 and list_id == 'watchlater':
+ self.raise_login_required('You need to login to access your watchlater playlist')
+ elif error_code == -403:
+ self.raise_login_required('This is a private playlist. You need to login as its owner')
+ elif error_code == 11010:
+ raise ExtractorError('Playlist is no longer available', expected=True)
+ raise ExtractorError(f'Could not access playlist: {error_code} {error_message}')
+
+ query = {
+ 'ps': 20,
+ 'with_current': False,
+ **traverse_obj(initial_state, {
+ 'type': ('playlist', 'type', {int_or_none}),
+ 'biz_id': ('playlist', 'id', {int_or_none}),
+ 'tid': ('tid', {int_or_none}),
+ 'sort_field': ('sortFiled', {int_or_none}),
+ 'desc': ('desc', {bool_or_none}, {str_or_none}, {str.lower}),
+ })
+ }
+ metadata = {
+ 'id': f'{query["type"]}_{query["biz_id"]}',
+ **traverse_obj(initial_state, ('mediaListInfo', {
+ 'title': ('title', {str}),
+ 'uploader': ('upper', 'name', {str}),
+ 'uploader_id': ('upper', 'mid', {str_or_none}),
+ 'timestamp': ('ctime', {int_or_none}),
+ 'thumbnail': ('cover', {url_or_none}),
+ })),
+ }
+ return self.playlist_result(self._extract_medialist(query, list_id), **metadata)
+
+
+class BilibiliCategoryIE(InfoExtractor):
+ IE_NAME = 'Bilibili category extractor'
+ _MAX_RESULTS = 1000000
+ _VALID_URL = r'https?://(?:www\.)?bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
+ _TESTS = [{
+ 'url': 'https://www.bilibili.com/v/kichiku/mad',
+ 'info_dict': {
+ 'id': 'kichiku: mad',
+ 'title': 'kichiku: mad'
+ },
+ 'playlist_mincount': 45,
+ 'params': {
+ 'playlistend': 45
+ }
+ }]
+
+ def _fetch_page(self, api_url, num_pages, query, page_num):
+ parsed_json = self._download_json(
+ api_url, query, query={'Search_key': query, 'pn': page_num},
+ note='Extracting results from page %s of %s' % (page_num, num_pages))
+
+ video_list = traverse_obj(parsed_json, ('data', 'archives'), expected_type=list)
+ if not video_list:
+ raise ExtractorError('Failed to retrieve video list for page %d' % page_num)
+
+ for video in video_list:
+ yield self.url_result(
+ 'https://www.bilibili.com/video/%s' % video['bvid'], 'BiliBili', video['bvid'])
+
+ def _entries(self, category, subcategory, query):
+ # map of categories : subcategories : RIDs
+ rid_map = {
+ 'kichiku': {
+ 'mad': 26,
+ 'manual_vocaloid': 126,
+ 'guide': 22,
+ 'theatre': 216,
+ 'course': 127
+ },
+ }
+
+ if category not in rid_map:
+ raise ExtractorError(
+ f'The category {category} isn\'t supported. Supported categories: {list(rid_map.keys())}')
+ if subcategory not in rid_map[category]:
+ raise ExtractorError(
+ f'The subcategory {subcategory} isn\'t supported for this category. Supported subcategories: {list(rid_map[category].keys())}')
+ rid_value = rid_map[category][subcategory]
+
+ api_url = 'https://api.bilibili.com/x/web-interface/newlist?rid=%d&type=1&ps=20&jsonp=jsonp' % rid_value
+ page_json = self._download_json(api_url, query, query={'Search_key': query, 'pn': '1'})
+ page_data = traverse_obj(page_json, ('data', 'page'), expected_type=dict)
+ count, size = int_or_none(page_data.get('count')), int_or_none(page_data.get('size'))
+ if count is None or not size:
+ raise ExtractorError('Failed to calculate either page count or size')
+
+ num_pages = math.ceil(count / size)
+
+ return OnDemandPagedList(functools.partial(
+ self._fetch_page, api_url, num_pages, query), size)
+
+ def _real_extract(self, url):
+ category, subcategory = urllib.parse.urlparse(url).path.split('/')[2:4]
+ query = '%s: %s' % (category, subcategory)
+
+ return self.playlist_result(self._entries(category, subcategory, query), query, query)
+
+
+class BiliBiliSearchIE(SearchInfoExtractor):
+ IE_DESC = 'Bilibili video search'
+ _MAX_RESULTS = 100000
+ _SEARCH_KEY = 'bilisearch'
+ _TESTS = [{
+ 'url': 'bilisearch3:靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
+ 'playlist_count': 3,
+ 'info_dict': {
+ 'id': '靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
+ 'title': '靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': 'BV1n44y1Q7sc',
+ 'ext': 'mp4',
+ 'title': '“出道一年,我怎么还在等你单推的女人睡觉后开播啊?”【一分钟了解靡烟miya】',
+ 'timestamp': 1669889987,
+ 'upload_date': '20221201',
+ 'description': 'md5:43343c0973defff527b5a4b403b4abf9',
+ 'tags': list,
+ 'uploader': '靡烟miya',
+ 'duration': 123.156,
+ 'uploader_id': '1958703906',
+ 'comment_count': int,
+ 'view_count': int,
+ 'like_count': int,
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+ '_old_archive_ids': ['bilibili 988222410_part1'],
+ },
+ }],
+ }]
+
+ def _search_results(self, query):
+ if not self._get_cookies('https://api.bilibili.com').get('buvid3'):
+ self._set_cookie('.bilibili.com', 'buvid3', f'{uuid.uuid4()}infoc')
+ for page_num in itertools.count(1):
+ videos = self._download_json(
+ 'https://api.bilibili.com/x/web-interface/search/type', query,
+ note=f'Extracting results from page {page_num}', query={
+ 'Search_key': query,
+ 'keyword': query,
+ 'page': page_num,
+ 'context': '',
+ 'duration': 0,
+ 'tids_2': '',
+ '__refresh__': 'true',
+ 'search_type': 'video',
+ 'tids': 0,
+ 'highlight': 1,
+ })['data'].get('result')
+ if not videos:
+ break
+ for video in videos:
+ yield self.url_result(video['arcurl'], 'BiliBili', str(video['aid']))
+
+
+class BilibiliAudioBaseIE(InfoExtractor):
+ def _call_api(self, path, sid, query=None):
+ if not query:
+ query = {'sid': sid}
+ return self._download_json(
+ 'https://www.bilibili.com/audio/music-service-c/web/' + path,
+ sid, query=query)['data']
+
+
+class BilibiliAudioIE(BilibiliAudioBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/au(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://www.bilibili.com/audio/au1003142',
+ 'md5': 'fec4987014ec94ef9e666d4d158ad03b',
+ 'info_dict': {
+ 'id': '1003142',
+ 'ext': 'm4a',
+ 'title': '【tsukimi】YELLOW / 神山羊',
+ 'artist': 'tsukimi',
+ 'comment_count': int,
+ 'description': 'YELLOW的mp3版!',
+ 'duration': 183,
+ 'subtitles': {
+ 'origin': [{
+ 'ext': 'lrc',
+ }],
+ },
+ 'thumbnail': r're:^https?://.+\.jpg',
+ 'timestamp': 1564836614,
+ 'upload_date': '20190803',
+ 'uploader': 'tsukimi-つきみぐー',
+ 'view_count': int,
+ },
+ }
+
+ def _real_extract(self, url):
+ au_id = self._match_id(url)
+
+ play_data = self._call_api('url', au_id)
+ formats = [{
+ 'url': play_data['cdns'][0],
+ 'filesize': int_or_none(play_data.get('size')),
+ 'vcodec': 'none'
+ }]
+
+ for a_format in formats:
+ a_format.setdefault('http_headers', {}).update({
+ 'Referer': url,
+ })
+
+ song = self._call_api('song/info', au_id)
+ title = song['title']
+ statistic = song.get('statistic') or {}
+
+ subtitles = None
+ lyric = song.get('lyric')
+ if lyric:
+ subtitles = {
+ 'origin': [{
+ 'url': lyric,
+ }]
+ }
+
+ return {
+ 'id': au_id,
+ 'title': title,
+ 'formats': formats,
+ 'artist': song.get('author'),
+ 'comment_count': int_or_none(statistic.get('comment')),
+ 'description': song.get('intro'),
+ 'duration': int_or_none(song.get('duration')),
+ 'subtitles': subtitles,
+ 'thumbnail': song.get('cover'),
+ 'timestamp': int_or_none(song.get('passtime')),
+ 'uploader': song.get('uname'),
+ 'view_count': int_or_none(statistic.get('play')),
+ }
+
+
+class BilibiliAudioAlbumIE(BilibiliAudioBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/am(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://www.bilibili.com/audio/am10624',
+ 'info_dict': {
+ 'id': '10624',
+ 'title': '每日新曲推荐(每日11:00更新)',
+ 'description': '每天11:00更新,为你推送最新音乐',
+ },
+ 'playlist_count': 19,
+ }
+
+ def _real_extract(self, url):
+ am_id = self._match_id(url)
+
+ songs = self._call_api(
+ 'song/of-menu', am_id, {'sid': am_id, 'pn': 1, 'ps': 100})['data']
+
+ entries = []
+ for song in songs:
+ sid = str_or_none(song.get('id'))
+ if not sid:
+ continue
+ entries.append(self.url_result(
+ 'https://www.bilibili.com/audio/au' + sid,
+ BilibiliAudioIE.ie_key(), sid))
+
+ if entries:
+ album_data = self._call_api('menu/info', am_id) or {}
+ album_title = album_data.get('title')
+ if album_title:
+ for entry in entries:
+ entry['album'] = album_title
+ return self.playlist_result(
+ entries, am_id, album_title, album_data.get('intro'))
+
+ return self.playlist_result(entries, am_id)
+
+
+class BiliBiliPlayerIE(InfoExtractor):
+ _VALID_URL = r'https?://player\.bilibili\.com/player\.html\?.*?\baid=(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://player.bilibili.com/player.html?aid=92494333&cid=157926707&page=1',
+ 'only_matching': True,
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self.url_result(
+ 'http://www.bilibili.tv/video/av%s/' % video_id,
+ ie=BiliBiliIE.ie_key(), video_id=video_id)
+
+
+class BiliIntlBaseIE(InfoExtractor):
+ _API_URL = 'https://api.bilibili.tv/intl/gateway'
+ _NETRC_MACHINE = 'biliintl'
+ _HEADERS = {'Referer': 'https://www.bilibili.com/'}
+
+ def _call_api(self, endpoint, *args, **kwargs):
+ json = self._download_json(self._API_URL + endpoint, *args, **kwargs)
+ if json.get('code'):
+ if json['code'] in (10004004, 10004005, 10023006):
+ self.raise_login_required()
+ elif json['code'] == 10004001:
+ self.raise_geo_restricted()
+ else:
+ if json.get('message') and str(json['code']) != json['message']:
+ errmsg = f'{kwargs.get("errnote", "Unable to download JSON metadata")}: {self.IE_NAME} said: {json["message"]}'
+ else:
+ errmsg = kwargs.get('errnote', 'Unable to download JSON metadata')
+ if kwargs.get('fatal'):
+ raise ExtractorError(errmsg)
+ else:
+ self.report_warning(errmsg)
+ return json.get('data')
+
+ def json2srt(self, json):
+ data = '\n\n'.join(
+ f'{i + 1}\n{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n{line["content"]}'
+ for i, line in enumerate(traverse_obj(json, (
+ 'body', lambda _, l: l['content'] and l['from'] and l['to']))))
+ return data
+
+ def _get_subtitles(self, *, ep_id=None, aid=None):
+ sub_json = self._call_api(
+ '/web/v2/subtitle', ep_id or aid, fatal=False,
+ note='Downloading subtitles list', errnote='Unable to download subtitles list',
+ query=filter_dict({
+ 'platform': 'web',
+ 's_locale': 'en_US',
+ 'episode_id': ep_id,
+ 'aid': aid,
+ })) or {}
+ subtitles = {}
+ fetched_urls = set()
+ for sub in traverse_obj(sub_json, (('subtitles', 'video_subtitle'), ..., {dict})):
+ for url in traverse_obj(sub, ((None, 'ass', 'srt'), 'url', {url_or_none})):
+ if url in fetched_urls:
+ continue
+ fetched_urls.add(url)
+ sub_ext = determine_ext(url)
+ sub_lang = sub.get('lang_key') or 'en'
+
+ if sub_ext == 'ass':
+ subtitles.setdefault(sub_lang, []).append({
+ 'ext': 'ass',
+ 'url': url,
+ })
+ elif sub_ext == 'json':
+ sub_data = self._download_json(
+ url, ep_id or aid, fatal=False,
+ note=f'Downloading subtitles{format_field(sub, "lang", " for %s")} ({sub_lang})',
+ errnote='Unable to download subtitles')
+
+ if sub_data:
+ subtitles.setdefault(sub_lang, []).append({
+ 'ext': 'srt',
+ 'data': self.json2srt(sub_data),
+ })
+ else:
+ self.report_warning('Unexpected subtitle extension', ep_id or aid)
+
+ return subtitles
+
+ def _get_formats(self, *, ep_id=None, aid=None):
+ video_json = self._call_api(
+ '/web/playurl', ep_id or aid, note='Downloading video formats',
+ errnote='Unable to download video formats', query=filter_dict({
+ 'platform': 'web',
+ 'ep_id': ep_id,
+ 'aid': aid,
+ }))
+ video_json = video_json['playurl']
+ formats = []
+ for vid in video_json.get('video') or []:
+ video_res = vid.get('video_resource') or {}
+ video_info = vid.get('stream_info') or {}
+ if not video_res.get('url'):
+ continue
+ formats.append({
+ 'url': video_res['url'],
+ 'ext': 'mp4',
+ 'format_note': video_info.get('desc_words'),
+ 'width': video_res.get('width'),
+ 'height': video_res.get('height'),
+ 'vbr': video_res.get('bandwidth'),
+ 'acodec': 'none',
+ 'vcodec': video_res.get('codecs'),
+ 'filesize': video_res.get('size'),
+ })
+ for aud in video_json.get('audio_resource') or []:
+ if not aud.get('url'):
+ continue
+ formats.append({
+ 'url': aud['url'],
+ 'ext': 'mp4',
+ 'abr': aud.get('bandwidth'),
+ 'acodec': aud.get('codecs'),
+ 'vcodec': 'none',
+ 'filesize': aud.get('size'),
+ })
+
+ return formats
+
+ def _parse_video_metadata(self, video_data):
+ return {
+ 'title': video_data.get('title_display') or video_data.get('title'),
+ 'description': video_data.get('desc'),
+ 'thumbnail': video_data.get('cover'),
+ 'timestamp': unified_timestamp(video_data.get('formatted_pub_date')),
+ 'episode_number': int_or_none(self._search_regex(
+ r'^E(\d+)(?:$| - )', video_data.get('title_display') or '', 'episode number', default=None)),
+ }
+
+ def _perform_login(self, username, password):
+ if not Cryptodome.RSA:
+ raise ExtractorError('pycryptodomex not found. Please install', expected=True)
+
+ key_data = self._download_json(
+ 'https://passport.bilibili.tv/x/intl/passport-login/web/key?lang=en-US', None,
+ note='Downloading login key', errnote='Unable to download login key')['data']
+
+ public_key = Cryptodome.RSA.importKey(key_data['key'])
+ password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode('utf-8'))
+ login_post = self._download_json(
+ 'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data=urlencode_postdata({
+ 'username': username,
+ 'password': base64.b64encode(password_hash).decode('ascii'),
+ 'keep_me': 'true',
+ 's_locale': 'en_US',
+ 'isTrusted': 'true'
+ }), note='Logging in', errnote='Unable to log in')
+ if login_post.get('code'):
+ if login_post.get('message'):
+ raise ExtractorError(f'Unable to log in: {self.IE_NAME} said: {login_post["message"]}', expected=True)
+ else:
+ raise ExtractorError('Unable to log in')
+
+
+class BiliIntlIE(BiliIntlBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(play/(?P<season_id>\d+)/(?P<ep_id>\d+)|video/(?P<aid>\d+))'
+ _TESTS = [{
+ # Bstation page
+ 'url': 'https://www.bilibili.tv/en/play/34613/341736',
+ 'info_dict': {
+ 'id': '341736',
+ 'ext': 'mp4',
+ 'title': 'E2 - The First Night',
+ 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
+ 'episode_number': 2,
+ 'upload_date': '20201009',
+ 'episode': 'Episode 2',
+ 'timestamp': 1602259500,
+ 'description': 'md5:297b5a17155eb645e14a14b385ab547e',
+ 'chapters': [{
+ 'start_time': 0,
+ 'end_time': 76.242,
+ 'title': '<Untitled Chapter 1>'
+ }, {
+ 'start_time': 76.242,
+ 'end_time': 161.161,
+ 'title': 'Intro'
+ }, {
+ 'start_time': 1325.742,
+ 'end_time': 1403.903,
+ 'title': 'Outro'
+ }],
+ }
+ }, {
+ # Non-Bstation page
+ 'url': 'https://www.bilibili.tv/en/play/1033760/11005006',
+ 'info_dict': {
+ 'id': '11005006',
+ 'ext': 'mp4',
+ 'title': 'E3 - Who?',
+ 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
+ 'episode_number': 3,
+ 'description': 'md5:e1a775e71a35c43f141484715470ad09',
+ 'episode': 'Episode 3',
+ 'upload_date': '20211219',
+ 'timestamp': 1639928700,
+ 'chapters': [{
+ 'start_time': 0,
+ 'end_time': 88.0,
+ 'title': '<Untitled Chapter 1>'
+ }, {
+ 'start_time': 88.0,
+ 'end_time': 156.0,
+ 'title': 'Intro'
+ }, {
+ 'start_time': 1173.0,
+ 'end_time': 1259.535,
+ 'title': 'Outro'
+ }],
+ }
+ }, {
+ # Subtitle with empty content
+ 'url': 'https://www.bilibili.tv/en/play/1005144/10131790',
+ 'info_dict': {
+ 'id': '10131790',
+ 'ext': 'mp4',
+ 'title': 'E140 - Two Heartbeats: Kabuto\'s Trap',
+ 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
+ 'episode_number': 140,
+ },
+ 'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.'
+ }, {
+ # episode comment extraction
+ 'url': 'https://www.bilibili.tv/en/play/34580/340317',
+ 'info_dict': {
+ 'id': '340317',
+ 'ext': 'mp4',
+ 'timestamp': 1604057820,
+ 'upload_date': '20201030',
+ 'episode_number': 5,
+ 'title': 'E5 - My Own Steel',
+ 'description': 'md5:2b17ab10aebb33e3c2a54da9e8e487e2',
+ 'thumbnail': r're:https?://pic\.bstarstatic\.com/ogv/.+\.png$',
+ 'episode': 'Episode 5',
+ 'comment_count': int,
+ 'chapters': [{
+ 'start_time': 0,
+ 'end_time': 61.0,
+ 'title': '<Untitled Chapter 1>'
+ }, {
+ 'start_time': 61.0,
+ 'end_time': 134.0,
+ 'title': 'Intro'
+ }, {
+ 'start_time': 1290.0,
+ 'end_time': 1379.0,
+ 'title': 'Outro'
+ }],
+ },
+ 'params': {
+ 'getcomments': True
+ }
+ }, {
+ # user generated content comment extraction
+ 'url': 'https://www.bilibili.tv/en/video/2045730385',
+ 'info_dict': {
+ 'id': '2045730385',
+ 'ext': 'mp4',
+ 'description': 'md5:693b6f3967fb4e7e7764ea817857c33a',
+ 'timestamp': 1667891924,
+ 'upload_date': '20221108',
+ 'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan',
+ 'comment_count': int,
+ 'thumbnail': r're:https://pic\.bstarstatic\.(?:com|net)/ugc/f6c363659efd2eabe5683fbb906b1582\.jpg',
+ },
+ 'params': {
+ 'getcomments': True
+ }
+ }, {
+ # episode id without intro and outro
+ 'url': 'https://www.bilibili.tv/en/play/1048837/11246489',
+ 'info_dict': {
+ 'id': '11246489',
+ 'ext': 'mp4',
+ 'title': 'E1 - Operation \'Strix\' <Owl>',
+ 'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
+ 'timestamp': 1649516400,
+ 'thumbnail': 'https://pic.bstarstatic.com/ogv/62cb1de23ada17fb70fbe7bdd6ff29c29da02a64.png',
+ 'episode': 'Episode 1',
+ 'episode_number': 1,
+ 'upload_date': '20220409',
+ },
+ }, {
+ 'url': 'https://www.biliintl.com/en/play/34613/341736',
+ 'only_matching': True,
+ }, {
+ # User-generated content (as opposed to a series licensed from a studio)
+ 'url': 'https://bilibili.tv/en/video/2019955076',
+ 'only_matching': True,
+ }, {
+ # No language in URL
+ 'url': 'https://www.bilibili.tv/video/2019955076',
+ 'only_matching': True,
+ }, {
+ # Uppercase language in URL
+ 'url': 'https://www.bilibili.tv/EN/video/2019955076',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _make_url(video_id, series_id=None):
+ if series_id:
+ return f'https://www.bilibili.tv/en/play/{series_id}/{video_id}'
+ return f'https://www.bilibili.tv/en/video/{video_id}'
+
+ def _extract_video_metadata(self, url, video_id, season_id):
+ url, smuggled_data = unsmuggle_url(url, {})
+ if smuggled_data.get('title'):
+ return smuggled_data
+
+ webpage = self._download_webpage(url, video_id)
+ # Bstation layout
+ initial_data = (
+ self._search_json(r'window\.__INITIAL_(?:DATA|STATE)__\s*=', webpage, 'preload state', video_id, default={})
+ or self._search_nuxt_data(webpage, video_id, '__initialState', fatal=False, traverse=None))
+ video_data = traverse_obj(
+ initial_data, ('OgvVideo', 'epDetail'), ('UgcVideo', 'videoData'), ('ugc', 'archive'), expected_type=dict) or {}
+
+ if season_id and not video_data:
+ # Non-Bstation layout, read through episode list
+ season_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id)
+ video_data = traverse_obj(season_json, (
+ 'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id
+ ), expected_type=dict, get_all=False)
+
+ # XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
+ return merge_dicts(
+ self._parse_video_metadata(video_data), {
+ 'title': get_element_by_class(
+ 'bstar-meta__title', webpage) or self._html_search_meta('og:title', webpage),
+ 'description': get_element_by_class(
+ 'bstar-meta__desc', webpage) or self._html_search_meta('og:description', webpage),
+ }, self._search_json_ld(webpage, video_id, default={}))
+
+ def _get_comments_reply(self, root_id, next_id=0, display_id=None):
+ comment_api_raw_data = self._download_json(
+ 'https://api.bilibili.tv/reply/web/detail', display_id,
+ note=f'Downloading reply comment of {root_id} - {next_id}',
+ query={
+ 'platform': 'web',
+ 'ps': 20, # comment's reply per page (default: 3)
+ 'root': root_id,
+ 'next': next_id,
+ })
+
+ for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
+ yield {
+ 'author': traverse_obj(replies, ('member', 'name')),
+ 'author_id': traverse_obj(replies, ('member', 'mid')),
+ 'author_thumbnail': traverse_obj(replies, ('member', 'face')),
+ 'text': traverse_obj(replies, ('content', 'message')),
+ 'id': replies.get('rpid'),
+ 'like_count': int_or_none(replies.get('like_count')),
+ 'parent': replies.get('parent'),
+ 'timestamp': unified_timestamp(replies.get('ctime_text'))
+ }
+
+ if not traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
+ yield from self._get_comments_reply(
+ root_id, comment_api_raw_data['data']['cursor']['next'], display_id)
+
+ def _get_comments(self, video_id, ep_id):
+ for i in itertools.count(0):
+ comment_api_raw_data = self._download_json(
+ 'https://api.bilibili.tv/reply/web/root', video_id,
+ note=f'Downloading comment page {i + 1}',
+ query={
+ 'platform': 'web',
+ 'pn': i, # page number
+ 'ps': 20, # comment per page (default: 20)
+ 'oid': video_id,
+ 'type': 3 if ep_id else 1, # 1: user generated content, 3: series content
+ 'sort_type': 1, # 1: best, 2: recent
+ })
+
+ for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
+ yield {
+ 'author': traverse_obj(replies, ('member', 'name')),
+ 'author_id': traverse_obj(replies, ('member', 'mid')),
+ 'author_thumbnail': traverse_obj(replies, ('member', 'face')),
+ 'text': traverse_obj(replies, ('content', 'message')),
+ 'id': replies.get('rpid'),
+ 'like_count': int_or_none(replies.get('like_count')),
+ 'timestamp': unified_timestamp(replies.get('ctime_text')),
+ 'author_is_uploader': bool(traverse_obj(replies, ('member', 'type'))),
+ }
+ if replies.get('count'):
+ yield from self._get_comments_reply(replies.get('rpid'), display_id=video_id)
+
+ if traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
+ break
+
+ def _real_extract(self, url):
+ season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid')
+ video_id = ep_id or aid
+ chapters = None
+
+ if ep_id:
+ intro_ending_json = self._call_api(
+ f'/web/v2/ogv/play/episode?episode_id={ep_id}&platform=web',
+ video_id, fatal=False) or {}
+ if intro_ending_json.get('skip'):
+ # FIXME: start time and end time seems a bit off a few second even it corrext based on ogv.*.js
+ # ref: https://p.bstarstatic.com/fe-static/bstar-web-new/assets/ogv.2b147442.js
+ chapters = [{
+ 'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_start_time')), 1000),
+ 'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_end_time')), 1000),
+ 'title': 'Intro'
+ }, {
+ 'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_start_time')), 1000),
+ 'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_end_time')), 1000),
+ 'title': 'Outro'
+ }]
+
+ return {
+ 'id': video_id,
+ **self._extract_video_metadata(url, video_id, season_id),
+ 'formats': self._get_formats(ep_id=ep_id, aid=aid),
+ 'subtitles': self.extract_subtitles(ep_id=ep_id, aid=aid),
+ 'chapters': chapters,
+ '__post_extractor': self.extract_comments(video_id, ep_id),
+ 'http_headers': self._HEADERS,
+ }
+
+
+class BiliIntlSeriesIE(BiliIntlBaseIE):
+ IE_NAME = 'biliIntl:series'
+ _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(?:play|media)/(?P<id>\d+)/?(?:[?#]|$)'
+ _TESTS = [{
+ 'url': 'https://www.bilibili.tv/en/play/34613',
+ 'playlist_mincount': 15,
+ 'info_dict': {
+ 'id': '34613',
+ 'title': 'TONIKAWA: Over the Moon For You',
+ 'description': 'md5:297b5a17155eb645e14a14b385ab547e',
+ 'categories': ['Slice of life', 'Comedy', 'Romance'],
+ 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
+ 'view_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.bilibili.tv/en/media/1048837',
+ 'info_dict': {
+ 'id': '1048837',
+ 'title': 'SPY×FAMILY',
+ 'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
+ 'categories': ['Adventure', 'Action', 'Comedy'],
+ 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.jpg$',
+ 'view_count': int,
+ },
+ 'playlist_mincount': 25,
+ }, {
+ 'url': 'https://www.biliintl.com/en/play/34613',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.biliintl.com/EN/play/34613',
+ 'only_matching': True,
+ }]
+
+ def _entries(self, series_id):
+ series_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={series_id}&platform=web', series_id)
+ for episode in traverse_obj(series_json, ('sections', ..., 'episodes', ...), expected_type=dict):
+ episode_id = str(episode['episode_id'])
+ yield self.url_result(smuggle_url(
+ BiliIntlIE._make_url(episode_id, series_id),
+ self._parse_video_metadata(episode)
+ ), BiliIntlIE, episode_id)
+
+ def _real_extract(self, url):
+ series_id = self._match_id(url)
+ series_info = self._call_api(f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {}
+ return self.playlist_result(
+ self._entries(series_id), series_id, series_info.get('title'), series_info.get('description'),
+ categories=traverse_obj(series_info, ('styles', ..., 'title'), expected_type=str_or_none),
+ thumbnail=url_or_none(series_info.get('horizontal_cover')), view_count=parse_count(series_info.get('view')))
+
+
+class BiliLiveIE(InfoExtractor):
+ _VALID_URL = r'https?://live\.bilibili\.com/(?:blanc/)?(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'https://live.bilibili.com/196',
+ 'info_dict': {
+ 'id': '33989',
+ 'description': "周六杂谈回,其他时候随机游戏。 | \n录播:@下播型泛式录播组。 | \n直播通知群(全员禁言):666906670,902092584,59971⑧481 (功能一样,别多加)",
+ 'ext': 'flv',
+ 'title': "太空狼人杀联动,不被爆杀就算赢",
+ 'thumbnail': "https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg",
+ 'timestamp': 1650802769,
+ },
+ 'skip': 'not live'
+ }, {
+ 'url': 'https://live.bilibili.com/196?broadcast_type=0&is_room_feed=1?spm_id_from=333.999.space_home.strengthen_live_card.click',
+ 'only_matching': True
+ }, {
+ 'url': 'https://live.bilibili.com/blanc/196',
+ 'only_matching': True
+ }]
+
+ _FORMATS = {
+ 80: {'format_id': 'low', 'format_note': '流畅'},
+ 150: {'format_id': 'high_res', 'format_note': '高清'},
+ 250: {'format_id': 'ultra_high_res', 'format_note': '超清'},
+ 400: {'format_id': 'blue_ray', 'format_note': '蓝光'},
+ 10000: {'format_id': 'source', 'format_note': '原画'},
+ 20000: {'format_id': '4K', 'format_note': '4K'},
+ 30000: {'format_id': 'dolby', 'format_note': '杜比'},
+ }
+
+ _quality = staticmethod(qualities(list(_FORMATS)))
+
+ def _call_api(self, path, room_id, query):
+ api_result = self._download_json(f'https://api.live.bilibili.com/{path}', room_id, query=query)
+ if api_result.get('code') != 0:
+ raise ExtractorError(api_result.get('message') or 'Unable to download JSON metadata')
+ return api_result.get('data') or {}
+
+ def _parse_formats(self, qn, fmt):
+ for codec in fmt.get('codec') or []:
+ if codec.get('current_qn') != qn:
+ continue
+ for url_info in codec['url_info']:
+ yield {
+ 'url': f'{url_info["host"]}{codec["base_url"]}{url_info["extra"]}',
+ 'ext': fmt.get('format_name'),
+ 'vcodec': codec.get('codec_name'),
+ 'quality': self._quality(qn),
+ **self._FORMATS[qn],
+ }
+
+ def _real_extract(self, url):
+ room_id = self._match_id(url)
+ room_data = self._call_api('room/v1/Room/get_info', room_id, {'id': room_id})
+ if room_data.get('live_status') == 0:
+ raise ExtractorError('Streamer is not live', expected=True)
+
+ formats = []
+ for qn in self._FORMATS.keys():
+ stream_data = self._call_api('xlive/web-room/v2/index/getRoomPlayInfo', room_id, {
+ 'room_id': room_id,
+ 'qn': qn,
+ 'codec': '0,1',
+ 'format': '0,2',
+ 'mask': '0',
+ 'no_playurl': '0',
+ 'platform': 'web',
+ 'protocol': '0,1',
+ })
+ for fmt in traverse_obj(stream_data, ('playurl_info', 'playurl', 'stream', ..., 'format', ...)) or []:
+ formats.extend(self._parse_formats(qn, fmt))
+
+ return {
+ 'id': room_id,
+ 'title': room_data.get('title'),
+ 'description': room_data.get('description'),
+ 'thumbnail': room_data.get('user_cover'),
+ 'timestamp': stream_data.get('live_time'),
+ 'formats': formats,
+ 'is_live': True,
+ 'http_headers': {
+ 'Referer': url,
+ },
+ }
diff --git a/yt_dlp/extractor/biobiochiletv.py b/yt_dlp/extractor/biobiochiletv.py
new file mode 100644
index 0000000..180c965
--- /dev/null
+++ b/yt_dlp/extractor/biobiochiletv.py
@@ -0,0 +1,83 @@
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ remove_end,
+)
+
+
+class BioBioChileTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:tv|www)\.biobiochile\.cl/(?:notas|noticias)/(?:[^/]+/)+(?P<id>[^/]+)\.shtml'
+
+ _TESTS = [{
+ 'url': 'http://tv.biobiochile.cl/notas/2015/10/21/sobre-camaras-y-camarillas-parlamentarias.shtml',
+ 'md5': '26f51f03cf580265defefb4518faec09',
+ 'info_dict': {
+ 'id': 'sobre-camaras-y-camarillas-parlamentarias',
+ 'ext': 'mp4',
+ 'title': 'Sobre Cámaras y camarillas parlamentarias',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'Fernando Atria',
+ },
+ 'skip': 'URL expired and redirected to http://www.biobiochile.cl/portada/bbtv/index.html',
+ }, {
+ # different uploader layout
+ 'url': 'http://tv.biobiochile.cl/notas/2016/03/18/natalia-valdebenito-repasa-a-diputado-hasbun-paso-a-la-categoria-de-hablar-brutalidades.shtml',
+ 'md5': 'edc2e6b58974c46d5b047dea3c539ff3',
+ 'info_dict': {
+ 'id': 'natalia-valdebenito-repasa-a-diputado-hasbun-paso-a-la-categoria-de-hablar-brutalidades',
+ 'ext': 'mp4',
+ 'title': 'Natalia Valdebenito repasa a diputado Hasbún: Pasó a la categoría de hablar brutalidades',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'Piangella Obrador',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'URL expired and redirected to http://www.biobiochile.cl/portada/bbtv/index.html',
+ }, {
+ 'url': 'http://www.biobiochile.cl/noticias/bbtv/comentarios-bio-bio/2016/07/08/edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos.shtml',
+ 'info_dict': {
+ 'id': 'b4xd0LK3SK',
+ 'ext': 'mp4',
+ # TODO: fix url_transparent information overriding
+ # 'uploader': 'Juan Pablo Echenique',
+ 'title': 'Comentario Oscar Cáceres',
+ },
+ 'params': {
+ # empty m3u8 manifest
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://tv.biobiochile.cl/notas/2015/10/22/ninos-transexuales-de-quien-es-la-decision.shtml',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://tv.biobiochile.cl/notas/2015/10/21/exclusivo-hector-pinto-formador-de-chupete-revela-version-del-ex-delantero-albo.shtml',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ rudo_url = self._search_regex(
+ r'<iframe[^>]+src=(?P<q1>[\'"])(?P<url>(?:https?:)?//rudo\.video/vod/[0-9a-zA-Z]+)(?P=q1)',
+ webpage, 'embed URL', None, group='url')
+ if not rudo_url:
+ raise ExtractorError('No videos found')
+
+ title = remove_end(self._og_search_title(webpage), ' - BioBioChile TV')
+
+ thumbnail = self._og_search_thumbnail(webpage)
+ uploader = self._html_search_regex(
+ r'<a[^>]+href=["\'](?:https?://(?:busca|www)\.biobiochile\.cl)?/(?:lista/)?(?:author|autor)[^>]+>(.+?)</a>',
+ webpage, 'uploader', fatal=False)
+
+ return {
+ '_type': 'url_transparent',
+ 'url': rudo_url,
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ }
diff --git a/yt_dlp/extractor/bitchute.py b/yt_dlp/extractor/bitchute.py
new file mode 100644
index 0000000..194bf1f
--- /dev/null
+++ b/yt_dlp/extractor/bitchute.py
@@ -0,0 +1,275 @@
+import functools
+import re
+
+from .common import InfoExtractor
+from ..networking import HEADRequest
+from ..utils import (
+ ExtractorError,
+ OnDemandPagedList,
+ clean_html,
+ extract_attributes,
+ get_element_by_class,
+ get_element_by_id,
+ get_element_html_by_class,
+ get_elements_html_by_class,
+ int_or_none,
+ orderedSet,
+ parse_count,
+ parse_duration,
+ traverse_obj,
+ unified_strdate,
+ urlencode_postdata,
+ urljoin,
+)
+
+
+class BitChuteIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)'
+ _EMBED_REGEX = [rf'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P<url>{_VALID_URL})']
+ _TESTS = [{
+ 'url': 'https://www.bitchute.com/video/UGlrF9o9b-Q/',
+ 'md5': '7e427d7ed7af5a75b5855705ec750e2b',
+ 'info_dict': {
+ 'id': 'UGlrF9o9b-Q',
+ 'ext': 'mp4',
+ 'title': 'This is the first video on #BitChute !',
+ 'description': 'md5:a0337e7b1fe39e32336974af8173a034',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'BitChute',
+ 'upload_date': '20170103',
+ 'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
+ 'channel': 'BitChute',
+ 'channel_url': 'https://www.bitchute.com/channel/bitchute/'
+ },
+ }, {
+ # test case: video with different channel and uploader
+ 'url': 'https://www.bitchute.com/video/Yti_j9A-UZ4/',
+ 'md5': 'f10e6a8e787766235946d0868703f1d0',
+ 'info_dict': {
+ 'id': 'Yti_j9A-UZ4',
+ 'ext': 'mp4',
+ 'title': 'Israel at War | Full Measure',
+ 'description': 'md5:38cf7bc6f42da1a877835539111c69ef',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'sharylattkisson',
+ 'upload_date': '20231106',
+ 'uploader_url': 'https://www.bitchute.com/profile/9K0kUWA9zmd9/',
+ 'channel': 'Full Measure with Sharyl Attkisson',
+ 'channel_url': 'https://www.bitchute.com/channel/sharylattkisson/'
+ },
+ }, {
+ # video not downloadable in browser, but we can recover it
+ 'url': 'https://www.bitchute.com/video/2s6B3nZjAk7R/',
+ 'md5': '05c12397d5354bf24494885b08d24ed1',
+ 'info_dict': {
+ 'id': '2s6B3nZjAk7R',
+ 'ext': 'mp4',
+ 'filesize': 71537926,
+ 'title': 'STYXHEXENHAMMER666 - Election Fraud, Clinton 2020, EU Armies, and Gun Control',
+ 'description': 'md5:228ee93bd840a24938f536aeac9cf749',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'BitChute',
+ 'upload_date': '20181113',
+ 'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
+ 'channel': 'BitChute',
+ 'channel_url': 'https://www.bitchute.com/channel/bitchute/'
+ },
+ 'params': {'check_formats': None},
+ }, {
+ # restricted video
+ 'url': 'https://www.bitchute.com/video/WEnQU7XGcTdl/',
+ 'info_dict': {
+ 'id': 'WEnQU7XGcTdl',
+ 'ext': 'mp4',
+ 'title': 'Impartial Truth - Ein Letzter Appell an die Vernunft',
+ },
+ 'params': {'skip_download': True},
+ 'skip': 'Georestricted in DE',
+ }, {
+ 'url': 'https://www.bitchute.com/embed/lbb5G1hjPhw/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.bitchute.com/torrent/Zee5BE49045h/szoMrox2JEI.webtorrent',
+ 'only_matching': True,
+ }]
+ _GEO_BYPASS = False
+
+ _HEADERS = {
+ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36',
+ 'Referer': 'https://www.bitchute.com/',
+ }
+
+ def _check_format(self, video_url, video_id):
+ urls = orderedSet(
+ re.sub(r'(^https?://)(seed\d+)(?=\.bitchute\.com)', fr'\g<1>{host}', video_url)
+ for host in (r'\g<2>', 'seed122', 'seed125', 'seed126', 'seed128',
+ 'seed132', 'seed150', 'seed151', 'seed152', 'seed153',
+ 'seed167', 'seed171', 'seed177', 'seed305', 'seed307',
+ 'seedp29xb', 'zb10-7gsop1v78'))
+ for url in urls:
+ try:
+ response = self._request_webpage(
+ HEADRequest(url), video_id=video_id, note=f'Checking {url}', headers=self._HEADERS)
+ except ExtractorError as e:
+ self.to_screen(f'{video_id}: URL is invalid, skipping: {e.cause}')
+ continue
+ return {
+ 'url': url,
+ 'filesize': int_or_none(response.headers.get('Content-Length'))
+ }
+
+ def _raise_if_restricted(self, webpage):
+ page_title = clean_html(get_element_by_class('page-title', webpage)) or ''
+ if re.fullmatch(r'(?:Channel|Video) Restricted', page_title):
+ reason = clean_html(get_element_by_id('page-detail', webpage)) or page_title
+ self.raise_geo_restricted(reason)
+
+ @staticmethod
+ def _make_url(html):
+ path = extract_attributes(get_element_html_by_class('spa', html) or '').get('href')
+ return urljoin('https://www.bitchute.com', path)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(
+ f'https://www.bitchute.com/video/{video_id}', video_id, headers=self._HEADERS)
+
+ self._raise_if_restricted(webpage)
+ publish_date = clean_html(get_element_by_class('video-publish-date', webpage))
+ entries = self._parse_html5_media_entries(url, webpage, video_id)
+
+ formats = []
+ for format_ in traverse_obj(entries, (0, 'formats', ...)):
+ if self.get_param('check_formats') is not False:
+ format_.update(self._check_format(format_.pop('url'), video_id) or {})
+ if 'url' not in format_:
+ continue
+ formats.append(format_)
+
+ if not formats:
+ self.raise_no_formats(
+ 'Video is unavailable. Please make sure this video is playable in the browser '
+ 'before reporting this issue.', expected=True, video_id=video_id)
+
+ details = get_element_by_class('details', webpage) or ''
+ uploader_html = get_element_html_by_class('creator', details) or ''
+ channel_html = get_element_html_by_class('name', details) or ''
+
+ return {
+ 'id': video_id,
+ 'title': self._html_extract_title(webpage) or self._og_search_title(webpage),
+ 'description': self._og_search_description(webpage, default=None),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'uploader': clean_html(uploader_html),
+ 'uploader_url': self._make_url(uploader_html),
+ 'channel': clean_html(channel_html),
+ 'channel_url': self._make_url(channel_html),
+ 'upload_date': unified_strdate(self._search_regex(
+ r'at \d+:\d+ UTC on (.+?)\.', publish_date, 'upload date', fatal=False)),
+ 'formats': formats,
+ }
+
+
+class BitChuteChannelIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?P<type>channel|playlist)/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.bitchute.com/channel/bitchute/',
+ 'info_dict': {
+ 'id': 'bitchute',
+ 'title': 'BitChute',
+ 'description': 'md5:5329fb3866125afa9446835594a9b138',
+ },
+ 'playlist': [
+ {
+ 'md5': '7e427d7ed7af5a75b5855705ec750e2b',
+ 'info_dict': {
+ 'id': 'UGlrF9o9b-Q',
+ 'ext': 'mp4',
+ 'title': 'This is the first video on #BitChute !',
+ 'description': 'md5:a0337e7b1fe39e32336974af8173a034',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'BitChute',
+ 'upload_date': '20170103',
+ 'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
+ 'channel': 'BitChute',
+ 'channel_url': 'https://www.bitchute.com/channel/bitchute/',
+ 'duration': 16,
+ 'view_count': int,
+ },
+ }
+ ],
+ 'params': {
+ 'skip_download': True,
+ 'playlist_items': '-1',
+ },
+ }, {
+ 'url': 'https://www.bitchute.com/playlist/wV9Imujxasw9/',
+ 'playlist_mincount': 20,
+ 'info_dict': {
+ 'id': 'wV9Imujxasw9',
+ 'title': 'Bruce MacDonald and "The Light of Darkness"',
+ 'description': 'md5:747724ef404eebdfc04277714f81863e',
+ }
+ }]
+
+ _TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7'
+ PAGE_SIZE = 25
+ HTML_CLASS_NAMES = {
+ 'channel': {
+ 'container': 'channel-videos-container',
+ 'title': 'channel-videos-title',
+ 'description': 'channel-videos-text',
+ },
+ 'playlist': {
+ 'container': 'playlist-video',
+ 'title': 'title',
+ 'description': 'description',
+ }
+
+ }
+
+ @staticmethod
+ def _make_url(playlist_id, playlist_type):
+ return f'https://www.bitchute.com/{playlist_type}/{playlist_id}/'
+
+ def _fetch_page(self, playlist_id, playlist_type, page_num):
+ playlist_url = self._make_url(playlist_id, playlist_type)
+ data = self._download_json(
+ f'{playlist_url}extend/', playlist_id, f'Downloading page {page_num}',
+ data=urlencode_postdata({
+ 'csrfmiddlewaretoken': self._TOKEN,
+ 'name': '',
+ 'offset': page_num * self.PAGE_SIZE,
+ }), headers={
+ 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
+ 'Referer': playlist_url,
+ 'X-Requested-With': 'XMLHttpRequest',
+ 'Cookie': f'csrftoken={self._TOKEN}',
+ })
+ if not data.get('success'):
+ return
+ classes = self.HTML_CLASS_NAMES[playlist_type]
+ for video_html in get_elements_html_by_class(classes['container'], data.get('html')):
+ video_id = self._search_regex(
+ r'<a\s[^>]*\bhref=["\']/video/([^"\'/]+)', video_html, 'video id', default=None)
+ if not video_id:
+ continue
+ yield self.url_result(
+ f'https://www.bitchute.com/video/{video_id}', BitChuteIE, video_id, url_transparent=True,
+ title=clean_html(get_element_by_class(classes['title'], video_html)),
+ description=clean_html(get_element_by_class(classes['description'], video_html)),
+ duration=parse_duration(get_element_by_class('video-duration', video_html)),
+ view_count=parse_count(clean_html(get_element_by_class('video-views', video_html))))
+
+ def _real_extract(self, url):
+ playlist_type, playlist_id = self._match_valid_url(url).group('type', 'id')
+ webpage = self._download_webpage(self._make_url(playlist_id, playlist_type), playlist_id)
+
+ page_func = functools.partial(self._fetch_page, playlist_id, playlist_type)
+ return self.playlist_result(
+ OnDemandPagedList(page_func, self.PAGE_SIZE), playlist_id,
+ title=self._html_extract_title(webpage, default=None),
+ description=self._html_search_meta(
+ ('description', 'og:description', 'twitter:description'), webpage, default=None),
+ playlist_count=int_or_none(self._html_search_regex(
+ r'<span>(\d+)\s+videos?</span>', webpage, 'playlist count', default=None)))
diff --git a/yt_dlp/extractor/blackboardcollaborate.py b/yt_dlp/extractor/blackboardcollaborate.py
new file mode 100644
index 0000000..8f41c89
--- /dev/null
+++ b/yt_dlp/extractor/blackboardcollaborate.py
@@ -0,0 +1,63 @@
+from .common import InfoExtractor
+from ..utils import parse_iso8601
+
+
+class BlackboardCollaborateIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?P<region>[a-z-]+)\.bbcollab\.com/
+ (?:
+ collab/ui/session/playback/load|
+ recording
+ )/
+ (?P<id>[^/]+)'''
+ _TESTS = [
+ {
+ 'url': 'https://us-lti.bbcollab.com/collab/ui/session/playback/load/0a633b6a88824deb8c918f470b22b256',
+ 'md5': 'bb7a055682ee4f25fdb5838cdf014541',
+ 'info_dict': {
+ 'id': '0a633b6a88824deb8c918f470b22b256',
+ 'title': 'HESI A2 Information Session - Thursday, May 6, 2021 - recording_1',
+ 'ext': 'mp4',
+ 'duration': 1896000,
+ 'timestamp': 1620331399,
+ 'upload_date': '20210506',
+ },
+ },
+ {
+ 'url': 'https://us.bbcollab.com/collab/ui/session/playback/load/76761522adfe4345a0dee6794bbcabda',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://ca.bbcollab.com/collab/ui/session/playback/load/b6399dcb44df4f21b29ebe581e22479d',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://eu.bbcollab.com/recording/51ed7b50810c4444a106e48cefb3e6b5',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://au.bbcollab.com/collab/ui/session/playback/load/2bccf7165d7c419ab87afc1ec3f3bb15',
+ 'only_matching': True,
+ },
+ ]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ region = mobj.group('region')
+ video_id = mobj.group('id')
+ info = self._download_json(
+ 'https://{}.bbcollab.com/collab/api/csa/recordings/{}/data'.format(region, video_id), video_id)
+ duration = info.get('duration')
+ title = info['name']
+ upload_date = info.get('created')
+ streams = info['streams']
+ formats = [{'format_id': k, 'url': url} for k, url in streams.items()]
+
+ return {
+ 'duration': duration,
+ 'formats': formats,
+ 'id': video_id,
+ 'timestamp': parse_iso8601(upload_date),
+ 'title': title,
+ }
diff --git a/yt_dlp/extractor/bleacherreport.py b/yt_dlp/extractor/bleacherreport.py
new file mode 100644
index 0000000..e875957
--- /dev/null
+++ b/yt_dlp/extractor/bleacherreport.py
@@ -0,0 +1,110 @@
+from .common import InfoExtractor
+from .amp import AMPIE
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ parse_iso8601,
+ str_or_none,
+)
+
+
+class BleacherReportIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/articles/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://bleacherreport.com/articles/2496438-fsu-stat-projections-is-jalen-ramsey-best-defensive-player-in-college-football',
+ 'md5': 'a3ffc3dc73afdbc2010f02d98f990f20',
+ 'info_dict': {
+ 'id': '2496438',
+ 'ext': 'mp4',
+ 'title': 'FSU Stat Projections: Is Jalen Ramsey Best Defensive Player in College Football?',
+ 'uploader_id': '3992341',
+ 'description': 'CFB, ACC, Florida State',
+ 'timestamp': 1434380212,
+ 'upload_date': '20150615',
+ 'uploader': 'Team Stream Now ',
+ },
+ 'skip': 'Video removed',
+ }, {
+ 'url': 'http://bleacherreport.com/articles/2586817-aussie-golfers-get-fright-of-their-lives-after-being-chased-by-angry-kangaroo',
+ 'md5': '6a5cd403418c7b01719248ca97fb0692',
+ 'info_dict': {
+ 'id': '2586817',
+ 'ext': 'webm',
+ 'title': 'Aussie Golfers Get Fright of Their Lives After Being Chased by Angry Kangaroo',
+ 'timestamp': 1446839961,
+ 'uploader': 'Sean Fay',
+ 'description': 'md5:b1601e2314c4d8eec23b6eafe086a757',
+ 'uploader_id': '6466954',
+ 'upload_date': '20151011',
+ },
+ 'add_ie': ['Youtube'],
+ }]
+
+ def _real_extract(self, url):
+ article_id = self._match_id(url)
+
+ article_data = self._download_json('http://api.bleacherreport.com/api/v1/articles/%s' % article_id, article_id)['article']
+
+ thumbnails = []
+ primary_photo = article_data.get('primaryPhoto')
+ if primary_photo:
+ thumbnails = [{
+ 'url': primary_photo['url'],
+ 'width': primary_photo.get('width'),
+ 'height': primary_photo.get('height'),
+ }]
+
+ info = {
+ '_type': 'url_transparent',
+ 'id': article_id,
+ 'title': article_data['title'],
+ 'uploader': article_data.get('author', {}).get('name'),
+ 'uploader_id': str_or_none(article_data.get('authorId')),
+ 'timestamp': parse_iso8601(article_data.get('createdAt')),
+ 'thumbnails': thumbnails,
+ 'comment_count': int_or_none(article_data.get('commentsCount')),
+ 'view_count': int_or_none(article_data.get('hitCount')),
+ }
+
+ video = article_data.get('video')
+ if video:
+ video_type = video['type']
+ if video_type in ('cms.bleacherreport.com', 'vid.bleacherreport.com'):
+ info['url'] = 'http://bleacherreport.com/video_embed?id=%s' % video['id']
+ elif video_type == 'youtube.com':
+ info['url'] = video['id']
+ elif video_type == 'vine.co':
+ info['url'] = 'https://vine.co/v/%s' % video['id']
+ else:
+ info['url'] = video_type + video['id']
+ return info
+ else:
+ raise ExtractorError('no video in the article', expected=True)
+
+
+class BleacherReportCMSIE(AMPIE):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36}|\d{5})'
+ _TESTS = [{
+ 'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1&library=video-cms',
+ 'md5': '670b2d73f48549da032861130488c681',
+ 'info_dict': {
+ 'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
+ 'ext': 'mp4',
+ 'title': 'Cena vs. Rollins Would Expose the Heavyweight Division',
+ 'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e',
+ 'upload_date': '20150723',
+ 'timestamp': 1437679032,
+
+ },
+ 'expected_warnings': [
+ 'Unable to download f4m manifest'
+ ]
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ info = self._extract_feed_info('http://vid.bleacherreport.com/videos/%s.akamai' % video_id)
+ info['id'] = video_id
+ return info
diff --git a/yt_dlp/extractor/blerp.py b/yt_dlp/extractor/blerp.py
new file mode 100644
index 0000000..4631ad2
--- /dev/null
+++ b/yt_dlp/extractor/blerp.py
@@ -0,0 +1,167 @@
+import json
+
+from .common import InfoExtractor
+from ..utils import strip_or_none, traverse_obj
+
+
+class BlerpIE(InfoExtractor):
+ IE_NAME = 'blerp'
+ _VALID_URL = r'https?://(?:www\.)?blerp\.com/soundbites/(?P<id>[0-9a-zA-Z]+)'
+ _TESTS = [{
+ 'url': 'https://blerp.com/soundbites/6320fe8745636cb4dd677a5a',
+ 'info_dict': {
+ 'id': '6320fe8745636cb4dd677a5a',
+ 'title': 'Samsung Galaxy S8 Over the Horizon Ringtone 2016',
+ 'uploader': 'luminousaj',
+ 'uploader_id': '5fb81e51aa66ae000c395478',
+ 'ext': 'mp3',
+ 'tags': ['samsung', 'galaxy', 's8', 'over the horizon', '2016', 'ringtone'],
+ }
+ }, {
+ 'url': 'https://blerp.com/soundbites/5bc94ef4796001000498429f',
+ 'info_dict': {
+ 'id': '5bc94ef4796001000498429f',
+ 'title': 'Yee',
+ 'uploader': '179617322678353920',
+ 'uploader_id': '5ba99cf71386730004552c42',
+ 'ext': 'mp3',
+ 'tags': ['YEE', 'YEET', 'wo ha haah catchy tune yee', 'yee']
+ }
+ }]
+
+ _GRAPHQL_OPERATIONNAME = "webBitePageGetBite"
+ _GRAPHQL_QUERY = (
+ '''query webBitePageGetBite($_id: MongoID!) {
+ web {
+ biteById(_id: $_id) {
+ ...bitePageFrag
+ __typename
+ }
+ __typename
+ }
+ }
+
+ fragment bitePageFrag on Bite {
+ _id
+ title
+ userKeywords
+ keywords
+ color
+ visibility
+ isPremium
+ owned
+ price
+ extraReview
+ isAudioExists
+ image {
+ filename
+ original {
+ url
+ __typename
+ }
+ __typename
+ }
+ userReactions {
+ _id
+ reactions
+ createdAt
+ __typename
+ }
+ topReactions
+ totalSaveCount
+ saved
+ blerpLibraryType
+ license
+ licenseMetaData
+ playCount
+ totalShareCount
+ totalFavoriteCount
+ totalAddedToBoardCount
+ userCategory
+ userAudioQuality
+ audioCreationState
+ transcription
+ userTranscription
+ description
+ createdAt
+ updatedAt
+ author
+ listingType
+ ownerObject {
+ _id
+ username
+ profileImage {
+ filename
+ original {
+ url
+ __typename
+ }
+ __typename
+ }
+ __typename
+ }
+ transcription
+ favorited
+ visibility
+ isCurated
+ sourceUrl
+ audienceRating
+ strictAudienceRating
+ ownerId
+ reportObject {
+ reportedContentStatus
+ __typename
+ }
+ giphy {
+ mp4
+ gif
+ __typename
+ }
+ audio {
+ filename
+ original {
+ url
+ __typename
+ }
+ mp3 {
+ url
+ __typename
+ }
+ __typename
+ }
+ __typename
+ }
+
+ ''')
+
+ def _real_extract(self, url):
+ audio_id = self._match_id(url)
+
+ data = {
+ 'operationName': self._GRAPHQL_OPERATIONNAME,
+ 'query': self._GRAPHQL_QUERY,
+ 'variables': {
+ '_id': audio_id
+ }
+ }
+
+ headers = {
+ 'Content-Type': 'application/json'
+ }
+
+ json_result = self._download_json('https://api.blerp.com/graphql',
+ audio_id, data=json.dumps(data).encode('utf-8'), headers=headers)
+
+ bite_json = json_result['data']['web']['biteById']
+
+ info_dict = {
+ 'id': bite_json['_id'],
+ 'url': bite_json['audio']['mp3']['url'],
+ 'title': bite_json['title'],
+ 'uploader': traverse_obj(bite_json, ('ownerObject', 'username'), expected_type=strip_or_none),
+ 'uploader_id': traverse_obj(bite_json, ('ownerObject', '_id'), expected_type=strip_or_none),
+ 'ext': 'mp3',
+ 'tags': list(filter(None, map(strip_or_none, (traverse_obj(bite_json, 'userKeywords', expected_type=list) or []))) or None)
+ }
+
+ return info_dict
diff --git a/yt_dlp/extractor/blogger.py b/yt_dlp/extractor/blogger.py
new file mode 100644
index 0000000..3d6e033
--- /dev/null
+++ b/yt_dlp/extractor/blogger.py
@@ -0,0 +1,45 @@
+from ..utils import (
+ mimetype2ext,
+ parse_duration,
+ parse_qs,
+ str_or_none,
+ traverse_obj,
+)
+from .common import InfoExtractor
+
+
+class BloggerIE(InfoExtractor):
+ IE_NAME = 'blogger.com'
+ _VALID_URL = r'https?://(?:www\.)?blogger\.com/video\.g\?token=(?P<id>.+)'
+ _EMBED_REGEX = [r'''<iframe[^>]+src=["'](?P<url>(?:https?:)?//(?:www\.)?blogger\.com/video\.g\?token=[^"']+)["']''']
+ _TESTS = [{
+ 'url': 'https://www.blogger.com/video.g?token=AD6v5dzEe9hfcARr5Hlq1WTkYy6t-fXH3BBahVhGvVHe5szdEUBEloSEDSTA8-b111089KbfWuBvTN7fnbxMtymsHhXAXwVvyzHH4Qch2cfLQdGxKQrrEuFpC1amSl_9GuLWODjPgw',
+ 'md5': 'f1bc19b6ea1b0fd1d81e84ca9ec467ac',
+ 'info_dict': {
+ 'id': 'BLOGGER-video-3c740e3a49197e16-796',
+ 'title': 'BLOGGER-video-3c740e3a49197e16-796',
+ 'ext': 'mp4',
+ 'thumbnail': r're:^https?://.*',
+ 'duration': 76.068,
+ }
+ }]
+
+ def _real_extract(self, url):
+ token_id = self._match_id(url)
+ webpage = self._download_webpage(url, token_id)
+ data_json = self._search_regex(r'var\s+VIDEO_CONFIG\s*=\s*(\{.*)', webpage, 'JSON data')
+ data = self._parse_json(data_json.encode('utf-8').decode('unicode_escape'), token_id)
+ streams = data['streams']
+ formats = [{
+ 'ext': mimetype2ext(traverse_obj(parse_qs(stream['play_url']), ('mime', 0))),
+ 'url': stream['play_url'],
+ 'format_id': str_or_none(stream.get('format_id')),
+ } for stream in streams]
+
+ return {
+ 'id': data.get('iframe_id', token_id),
+ 'title': data.get('iframe_id', token_id),
+ 'formats': formats,
+ 'thumbnail': data.get('thumbnail'),
+ 'duration': parse_duration(traverse_obj(parse_qs(streams[0]['play_url']), ('dur', 0))),
+ }
diff --git a/yt_dlp/extractor/bloomberg.py b/yt_dlp/extractor/bloomberg.py
new file mode 100644
index 0000000..792155e
--- /dev/null
+++ b/yt_dlp/extractor/bloomberg.py
@@ -0,0 +1,77 @@
+import re
+
+from .common import InfoExtractor
+
+
+class BloombergIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?bloomberg\.com/(?:[^/]+/)*(?P<id>[^/?#]+)'
+
+ _TESTS = [{
+ 'url': 'https://www.bloomberg.com/news/videos/2021-09-14/apple-unveils-the-new-iphone-13-stock-doesn-t-move-much-video',
+ 'info_dict': {
+ 'id': 'V8cFcYMxTHaMcEiiYVr39A',
+ 'ext': 'flv',
+ 'title': 'Apple Unveils the New IPhone 13, Stock Doesn\'t Move Much',
+ },
+ 'params': {
+ 'format': 'best[format_id^=hds]',
+ },
+ }, {
+ # video ID in BPlayer(...)
+ 'url': 'http://www.bloomberg.com/features/2016-hello-world-new-zealand/',
+ 'info_dict': {
+ 'id': '938c7e72-3f25-4ddb-8b85-a9be731baa74',
+ 'ext': 'flv',
+ 'title': 'Meet the Real-Life Tech Wizards of Middle Earth',
+ 'description': 'Hello World, Episode 1: New Zealand’s freaky AI babies, robot exoskeletons, and a virtual you.',
+ },
+ 'params': {
+ 'format': 'best[format_id^=hds]',
+ },
+ }, {
+ # data-bmmrid=
+ 'url': 'https://www.bloomberg.com/politics/articles/2017-02-08/le-pen-aide-briefed-french-central-banker-on-plan-to-print-money',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.bloomberg.com/news/articles/2015-11-12/five-strange-things-that-have-been-happening-in-financial-markets',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.bloomberg.com/politics/videos/2015-11-25/karl-rove-on-jeb-bush-s-struggles-stopping-trump',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ name = self._match_id(url)
+ webpage = self._download_webpage(url, name)
+ video_id = self._search_regex(
+ (r'["\']bmmrId["\']\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
+ r'videoId\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
+ r'data-bmmrid=(["\'])(?P<id>(?:(?!\1).)+)\1'),
+ webpage, 'id', group='id', default=None)
+ if not video_id:
+ bplayer_data = self._parse_json(self._search_regex(
+ r'BPlayer\(null,\s*({[^;]+})\);', webpage, 'id'), name)
+ video_id = bplayer_data['id']
+ title = re.sub(': Video$', '', self._og_search_title(webpage))
+
+ embed_info = self._download_json(
+ 'http://www.bloomberg.com/multimedia/api/embed?id=%s' % video_id, video_id)
+ formats = []
+ for stream in embed_info['streams']:
+ stream_url = stream.get('url')
+ if not stream_url:
+ continue
+ if stream['muxing_format'] == 'TS':
+ formats.extend(self._extract_m3u8_formats(
+ stream_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
+ else:
+ formats.extend(self._extract_f4m_formats(
+ stream_url, video_id, f4m_id='hds', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'description': self._og_search_description(webpage),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ }
diff --git a/yt_dlp/extractor/bokecc.py b/yt_dlp/extractor/bokecc.py
new file mode 100644
index 0000000..ca326f2
--- /dev/null
+++ b/yt_dlp/extractor/bokecc.py
@@ -0,0 +1,53 @@
+from .common import InfoExtractor
+from ..compat import compat_parse_qs
+from ..utils import ExtractorError
+
+
+class BokeCCBaseIE(InfoExtractor):
+ def _extract_bokecc_formats(self, webpage, video_id, format_id=None):
+ player_params_str = self._html_search_regex(
+ r'<(?:script|embed)[^>]+src=(?P<q>["\'])(?:https?:)?//p\.bokecc\.com/(?:player|flash/player\.swf)\?(?P<query>.+?)(?P=q)',
+ webpage, 'player params', group='query')
+
+ player_params = compat_parse_qs(player_params_str)
+
+ info_xml = self._download_xml(
+ 'http://p.bokecc.com/servlet/playinfo?uid=%s&vid=%s&m=1' % (
+ player_params['siteid'][0], player_params['vid'][0]), video_id)
+
+ formats = [{
+ 'format_id': format_id,
+ 'url': quality.find('./copy').attrib['playurl'],
+ 'quality': int(quality.attrib['value']),
+ } for quality in info_xml.findall('./video/quality')]
+
+ return formats
+
+
+class BokeCCIE(BokeCCBaseIE):
+ _IE_DESC = 'CC视频'
+ _VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)'
+
+ _TESTS = [{
+ 'url': 'http://union.bokecc.com/playvideo.bo?vid=E0ABAE9D4F509B189C33DC5901307461&uid=FE644790DE9D154A',
+ 'info_dict': {
+ 'id': 'FE644790DE9D154A_E0ABAE9D4F509B189C33DC5901307461',
+ 'ext': 'flv',
+ 'title': 'BokeCC Video',
+ },
+ }]
+
+ def _real_extract(self, url):
+ qs = compat_parse_qs(self._match_valid_url(url).group('query'))
+ if not qs.get('vid') or not qs.get('uid'):
+ raise ExtractorError('Invalid URL', expected=True)
+
+ video_id = '%s_%s' % (qs['uid'][0], qs['vid'][0])
+
+ webpage = self._download_webpage(url, video_id)
+
+ return {
+ 'id': video_id,
+ 'title': 'BokeCC Video', # no title provided in the webpage
+ 'formats': self._extract_bokecc_formats(webpage, video_id),
+ }
diff --git a/yt_dlp/extractor/bongacams.py b/yt_dlp/extractor/bongacams.py
new file mode 100644
index 0000000..bf95566
--- /dev/null
+++ b/yt_dlp/extractor/bongacams.py
@@ -0,0 +1,70 @@
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ int_or_none,
+ try_get,
+ urlencode_postdata,
+)
+
+
+class BongaCamsIE(InfoExtractor):
+ _VALID_URL = r'https?://(?P<host>(?:[^/]+\.)?bongacams\d*\.(?:com|net))/(?P<id>[^/?&#]+)'
+ _TESTS = [{
+ 'url': 'https://de.bongacams.com/azumi-8',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://cn.bongacams.com/azumi-8',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://de.bongacams.net/claireashton',
+ 'info_dict': {
+ 'id': 'claireashton',
+ 'ext': 'mp4',
+ 'title': r're:ClaireAshton \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
+ 'age_limit': 18,
+ 'uploader_id': 'ClaireAshton',
+ 'uploader': 'ClaireAshton',
+ 'like_count': int,
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ host = mobj.group('host')
+ channel_id = mobj.group('id')
+
+ amf = self._download_json(
+ 'https://%s/tools/amf.php' % host, channel_id,
+ data=urlencode_postdata((
+ ('method', 'getRoomData'),
+ ('args[]', channel_id),
+ ('args[]', 'false'),
+ )), headers={'X-Requested-With': 'XMLHttpRequest'})
+
+ server_url = amf['localData']['videoServerUrl']
+
+ uploader_id = try_get(
+ amf, lambda x: x['performerData']['username'], compat_str) or channel_id
+ uploader = try_get(
+ amf, lambda x: x['performerData']['displayName'], compat_str)
+ like_count = int_or_none(try_get(
+ amf, lambda x: x['performerData']['loversCount']))
+
+ formats = self._extract_m3u8_formats(
+ '%s/hls/stream_%s/playlist.m3u8' % (server_url, uploader_id),
+ channel_id, 'mp4', m3u8_id='hls', live=True)
+
+ return {
+ 'id': channel_id,
+ 'title': uploader or uploader_id,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'like_count': like_count,
+ 'age_limit': 18,
+ 'is_live': True,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/boosty.py b/yt_dlp/extractor/boosty.py
new file mode 100644
index 0000000..fb14ca1
--- /dev/null
+++ b/yt_dlp/extractor/boosty.py
@@ -0,0 +1,209 @@
+from .common import InfoExtractor
+from .youtube import YoutubeIE
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ qualities,
+ str_or_none,
+ url_or_none,
+)
+from ..utils.traversal import traverse_obj
+
+
+class BoostyIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?boosty\.to/(?P<user>[^/#?]+)/posts/(?P<post_id>[^/#?]+)'
+ _TESTS = [{
+ # single ok_video
+ 'url': 'https://boosty.to/kuplinov/posts/e55d050c-e3bb-4873-a7db-ac7a49b40c38',
+ 'info_dict': {
+ 'id': 'd7473824-352e-48e2-ae53-d4aa39459968',
+ 'title': 'phasma_3',
+ 'channel': 'Kuplinov',
+ 'channel_id': '7958701',
+ 'timestamp': 1655031975,
+ 'upload_date': '20220612',
+ 'release_timestamp': 1655049000,
+ 'release_date': '20220612',
+ 'modified_timestamp': 1668680993,
+ 'modified_date': '20221117',
+ 'tags': ['куплинов', 'phasmophobia'],
+ 'like_count': int,
+ 'ext': 'mp4',
+ 'duration': 105,
+ 'view_count': int,
+ 'thumbnail': r're:^https://i\.mycdn\.me/videoPreview\?',
+ },
+ }, {
+ # multiple ok_video
+ 'url': 'https://boosty.to/maddyson/posts/0c652798-3b35-471f-8b48-a76a0b28736f',
+ 'info_dict': {
+ 'id': '0c652798-3b35-471f-8b48-a76a0b28736f',
+ 'title': 'то что не пропустил юта6',
+ 'channel': 'Илья Давыдов',
+ 'channel_id': '6808257',
+ 'timestamp': 1694017040,
+ 'upload_date': '20230906',
+ 'release_timestamp': 1694017040,
+ 'release_date': '20230906',
+ 'modified_timestamp': 1694071178,
+ 'modified_date': '20230907',
+ 'like_count': int,
+ },
+ 'playlist_count': 3,
+ 'playlist': [{
+ 'info_dict': {
+ 'id': 'cc325a9f-a563-41c6-bf47-516c1b506c9a',
+ 'title': 'то что не пропустил юта6',
+ 'channel': 'Илья Давыдов',
+ 'channel_id': '6808257',
+ 'timestamp': 1694017040,
+ 'upload_date': '20230906',
+ 'release_timestamp': 1694017040,
+ 'release_date': '20230906',
+ 'modified_timestamp': 1694071178,
+ 'modified_date': '20230907',
+ 'like_count': int,
+ 'ext': 'mp4',
+ 'duration': 31204,
+ 'view_count': int,
+ 'thumbnail': r're:^https://i\.mycdn\.me/videoPreview\?',
+ },
+ }, {
+ 'info_dict': {
+ 'id': 'd07b0a72-9493-4512-b54e-55ce468fd4b7',
+ 'title': 'то что не пропустил юта6',
+ 'channel': 'Илья Давыдов',
+ 'channel_id': '6808257',
+ 'timestamp': 1694017040,
+ 'upload_date': '20230906',
+ 'release_timestamp': 1694017040,
+ 'release_date': '20230906',
+ 'modified_timestamp': 1694071178,
+ 'modified_date': '20230907',
+ 'like_count': int,
+ 'ext': 'mp4',
+ 'duration': 25704,
+ 'view_count': int,
+ 'thumbnail': r're:^https://i\.mycdn\.me/videoPreview\?',
+ },
+ }, {
+ 'info_dict': {
+ 'id': '4a3bba32-78c8-422a-9432-2791aff60b42',
+ 'title': 'то что не пропустил юта6',
+ 'channel': 'Илья Давыдов',
+ 'channel_id': '6808257',
+ 'timestamp': 1694017040,
+ 'upload_date': '20230906',
+ 'release_timestamp': 1694017040,
+ 'release_date': '20230906',
+ 'modified_timestamp': 1694071178,
+ 'modified_date': '20230907',
+ 'like_count': int,
+ 'ext': 'mp4',
+ 'duration': 31867,
+ 'view_count': int,
+ 'thumbnail': r're:^https://i\.mycdn\.me/videoPreview\?',
+ },
+ }],
+ }, {
+ # single external video (youtube)
+ 'url': 'https://boosty.to/denischuzhoy/posts/6094a487-bcec-4cf8-a453-43313b463c38',
+ 'info_dict': {
+ 'id': 'EXelTnve5lY',
+ 'title': 'Послание Президента Федеральному Собранию | Класс народа',
+ 'upload_date': '20210425',
+ 'channel': 'Денис Чужой',
+ 'tags': 'count:10',
+ 'like_count': int,
+ 'ext': 'mp4',
+ 'duration': 816,
+ 'view_count': int,
+ 'thumbnail': r're:^https://i\.ytimg\.com/',
+ 'age_limit': 0,
+ 'availability': 'public',
+ 'categories': list,
+ 'channel_follower_count': int,
+ 'channel_id': 'UCCzVNbWZfYpBfyofCCUD_0w',
+ 'channel_is_verified': bool,
+ 'channel_url': r're:^https://www\.youtube\.com/',
+ 'comment_count': int,
+ 'description': str,
+ 'heatmap': 'count:100',
+ 'live_status': str,
+ 'playable_in_embed': bool,
+ 'uploader': str,
+ 'uploader_id': str,
+ 'uploader_url': r're:^https://www\.youtube\.com/',
+ },
+ }]
+
+ _MP4_TYPES = ('tiny', 'lowest', 'low', 'medium', 'high', 'full_hd', 'quad_hd', 'ultra_hd')
+
+ def _extract_formats(self, player_urls, video_id):
+ formats = []
+ quality = qualities(self._MP4_TYPES)
+ for player_url in traverse_obj(player_urls, lambda _, v: url_or_none(v['url'])):
+ url = player_url['url']
+ format_type = player_url.get('type')
+ if format_type in ('hls', 'hls_live', 'live_ondemand_hls', 'live_playback_hls'):
+ formats.extend(self._extract_m3u8_formats(url, video_id, m3u8_id='hls', fatal=False))
+ elif format_type in ('dash', 'dash_live', 'live_playback_dash'):
+ formats.extend(self._extract_mpd_formats(url, video_id, mpd_id='dash', fatal=False))
+ elif format_type in self._MP4_TYPES:
+ formats.append({
+ 'url': url,
+ 'ext': 'mp4',
+ 'format_id': format_type,
+ 'quality': quality(format_type),
+ })
+ else:
+ self.report_warning(f'Unknown format type: {format_type!r}')
+ return formats
+
+ def _real_extract(self, url):
+ user, post_id = self._match_valid_url(url).group('user', 'post_id')
+ post = self._download_json(
+ f'https://api.boosty.to/v1/blog/{user}/post/{post_id}', post_id,
+ note='Downloading post data', errnote='Unable to download post data')
+
+ post_title = post.get('title')
+ if not post_title:
+ self.report_warning('Unable to extract post title. Falling back to parsing html page')
+ webpage = self._download_webpage(url, video_id=post_id)
+ post_title = self._og_search_title(webpage, default=None) or self._html_extract_title(webpage)
+
+ common_metadata = {
+ 'title': post_title,
+ **traverse_obj(post, {
+ 'channel': ('user', 'name', {str}),
+ 'channel_id': ('user', 'id', {str_or_none}),
+ 'timestamp': ('createdAt', {int_or_none}),
+ 'release_timestamp': ('publishTime', {int_or_none}),
+ 'modified_timestamp': ('updatedAt', {int_or_none}),
+ 'tags': ('tags', ..., 'title', {str}),
+ 'like_count': ('count', 'likes', {int_or_none}),
+ }),
+ }
+ entries = []
+ for item in traverse_obj(post, ('data', ..., {dict})):
+ item_type = item.get('type')
+ if item_type == 'video' and url_or_none(item.get('url')):
+ entries.append(self.url_result(item['url'], YoutubeIE))
+ elif item_type == 'ok_video':
+ video_id = item.get('id') or post_id
+ entries.append({
+ 'id': video_id,
+ 'formats': self._extract_formats(item.get('playerUrls'), video_id),
+ **common_metadata,
+ **traverse_obj(item, {
+ 'title': ('title', {str}),
+ 'duration': ('duration', {int_or_none}),
+ 'view_count': ('viewsCounter', {int_or_none}),
+ 'thumbnail': (('previewUrl', 'defaultPreview'), {url_or_none}),
+ }, get_all=False)})
+
+ if not entries:
+ raise ExtractorError('No videos found', expected=True)
+ if len(entries) == 1:
+ return entries[0]
+ return self.playlist_result(entries, post_id, post_title, **common_metadata)
diff --git a/yt_dlp/extractor/bostonglobe.py b/yt_dlp/extractor/bostonglobe.py
new file mode 100644
index 0000000..92f8ea2
--- /dev/null
+++ b/yt_dlp/extractor/bostonglobe.py
@@ -0,0 +1,69 @@
+import re
+
+from .common import InfoExtractor
+
+from ..utils import (
+ extract_attributes,
+)
+
+
+class BostonGlobeIE(InfoExtractor):
+ _VALID_URL = r'(?i)https?://(?:www\.)?bostonglobe\.com/.*/(?P<id>[^/]+)/\w+(?:\.html)?'
+ _TESTS = [
+ {
+ 'url': 'http://www.bostonglobe.com/metro/2017/02/11/tree-finally-succumbs-disease-leaving-hole-neighborhood/h1b4lviqzMTIn9sVy8F3gP/story.html',
+ 'md5': '0a62181079c85c2d2b618c9a738aedaf',
+ 'info_dict': {
+ 'title': 'A tree finally succumbs to disease, leaving a hole in a neighborhood',
+ 'id': '5320421710001',
+ 'ext': 'mp4',
+ 'description': 'It arrived as a sapling when the Back Bay was in its infancy, a spindly American elm tamped down into a square of dirt cut into the brick sidewalk of 1880s Marlborough Street, no higher than the first bay window of the new brownstone behind it.',
+ 'timestamp': 1486877593,
+ 'upload_date': '20170212',
+ 'uploader_id': '245991542',
+ },
+ },
+ {
+ # Embedded youtube video; we hand it off to the Generic extractor.
+ 'url': 'https://www.bostonglobe.com/lifestyle/names/2017/02/17/does-ben-affleck-play-matt-damon-favorite-version-batman/ruqkc9VxKBYmh5txn1XhSI/story.html',
+ 'md5': '582b40327089d5c0c949b3c54b13c24b',
+ 'info_dict': {
+ 'title': "Who Is Matt Damon's Favorite Batman?",
+ 'id': 'ZW1QCnlA6Qc',
+ 'ext': 'mp4',
+ 'upload_date': '20170217',
+ 'description': 'md5:3b3dccb9375867e0b4d527ed87d307cb',
+ 'uploader': 'The Late Late Show with James Corden',
+ 'uploader_id': 'TheLateLateShow',
+ },
+ 'expected_warnings': ['404'],
+ },
+ ]
+
+ def _real_extract(self, url):
+ page_id = self._match_id(url)
+ webpage = self._download_webpage(url, page_id)
+
+ page_title = self._og_search_title(webpage, default=None)
+
+ # <video data-brightcove-video-id="5320421710001" data-account="245991542" data-player="SJWAiyYWg" data-embed="default" class="video-js" controls itemscope itemtype="http://schema.org/VideoObject">
+ entries = []
+ for video in re.findall(r'(?i)(<video[^>]+>)', webpage):
+ attrs = extract_attributes(video)
+
+ video_id = attrs.get('data-brightcove-video-id')
+ account_id = attrs.get('data-account')
+ player_id = attrs.get('data-player')
+ embed = attrs.get('data-embed')
+
+ if video_id and account_id and player_id and embed:
+ entries.append(
+ 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s'
+ % (account_id, player_id, embed, video_id))
+
+ if len(entries) == 0:
+ return self.url_result(url, 'Generic')
+ elif len(entries) == 1:
+ return self.url_result(entries[0], 'BrightcoveNew')
+ else:
+ return self.playlist_from_matches(entries, page_id, page_title, ie='BrightcoveNew')
diff --git a/yt_dlp/extractor/box.py b/yt_dlp/extractor/box.py
new file mode 100644
index 0000000..7281b3c
--- /dev/null
+++ b/yt_dlp/extractor/box.py
@@ -0,0 +1,83 @@
+import json
+import urllib.parse
+
+from .common import InfoExtractor
+from ..utils import (
+ parse_iso8601,
+ update_url_query,
+ url_or_none,
+)
+from ..utils.traversal import traverse_obj
+
+
+class BoxIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[^.]+\.)?app\.box\.com/s/(?P<shared_name>[^/?#]+)/file/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://mlssoccer.app.box.com/s/0evd2o3e08l60lr4ygukepvnkord1o1x/file/510727257538',
+ 'md5': '1f81b2fd3960f38a40a3b8823e5fcd43',
+ 'info_dict': {
+ 'id': '510727257538',
+ 'ext': 'mp4',
+ 'title': 'Garber St. Louis will be 28th MLS team +scarving.mp4',
+ 'uploader': '',
+ 'timestamp': 1566320259,
+ 'upload_date': '20190820',
+ 'uploader_id': '235196876',
+ },
+ 'params': {'skip_download': 'dash fragment too small'},
+ }
+
+ def _real_extract(self, url):
+ shared_name, file_id = self._match_valid_url(url).groups()
+ webpage = self._download_webpage(url, file_id)
+ request_token = self._parse_json(self._search_regex(
+ r'Box\.config\s*=\s*({.+?});', webpage,
+ 'Box config'), file_id)['requestToken']
+ access_token = self._download_json(
+ 'https://app.box.com/app-api/enduserapp/elements/tokens', file_id,
+ 'Downloading token JSON metadata',
+ data=json.dumps({'fileIDs': [file_id]}).encode(), headers={
+ 'Content-Type': 'application/json',
+ 'X-Request-Token': request_token,
+ 'X-Box-EndUser-API': 'sharedName=' + shared_name,
+ })[file_id]['read']
+ shared_link = 'https://app.box.com/s/' + shared_name
+ f = self._download_json(
+ 'https://api.box.com/2.0/files/' + file_id, file_id,
+ 'Downloading file JSON metadata', headers={
+ 'Authorization': 'Bearer ' + access_token,
+ 'BoxApi': 'shared_link=' + shared_link,
+ 'X-Rep-Hints': '[dash]', # TODO: extract `hls` formats
+ }, query={
+ 'fields': 'authenticated_download_url,created_at,created_by,description,extension,is_download_available,name,representations,size'
+ })
+ title = f['name']
+
+ query = {
+ 'access_token': access_token,
+ 'shared_link': shared_link
+ }
+
+ formats = []
+
+ for url_tmpl in traverse_obj(f, (
+ 'representations', 'entries', lambda _, v: v['representation'] == 'dash',
+ 'content', 'url_template', {url_or_none}
+ )):
+ manifest_url = update_url_query(url_tmpl.replace('{+asset_path}', 'manifest.mpd'), query)
+ fmts = self._extract_mpd_formats(manifest_url, file_id)
+ for fmt in fmts:
+ fmt['extra_param_to_segment_url'] = urllib.parse.urlparse(manifest_url).query
+ formats.extend(fmts)
+
+ creator = f.get('created_by') or {}
+
+ return {
+ 'id': file_id,
+ 'title': title,
+ 'formats': formats,
+ 'description': f.get('description') or None,
+ 'uploader': creator.get('name'),
+ 'timestamp': parse_iso8601(f.get('created_at')),
+ 'uploader_id': creator.get('id'),
+ }
diff --git a/yt_dlp/extractor/boxcast.py b/yt_dlp/extractor/boxcast.py
new file mode 100644
index 0000000..51f9eb7
--- /dev/null
+++ b/yt_dlp/extractor/boxcast.py
@@ -0,0 +1,102 @@
+from .common import InfoExtractor
+from ..utils import (
+ js_to_json,
+ traverse_obj,
+ unified_timestamp
+)
+
+
+class BoxCastVideoIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://boxcast\.tv/(?:
+ view-embed/|
+ channel/\w+\?(?:[^#]+&)?b=|
+ video-portal/(?:\w+/){2}
+ )(?P<id>[\w-]+)'''
+ _EMBED_REGEX = [r'<iframe[^>]+src=["\'](?P<url>https?://boxcast\.tv/view-embed/[\w-]+)']
+ _TESTS = [{
+ 'url': 'https://boxcast.tv/view-embed/in-the-midst-of-darkness-light-prevails-an-interdisciplinary-symposium-ozmq5eclj50ujl4bmpwx',
+ 'info_dict': {
+ 'id': 'da1eqqgkacngd5djlqld',
+ 'ext': 'mp4',
+ 'thumbnail': r're:https?://uploads\.boxcast\.com/(?:[\w+-]+/){3}.+\.png$',
+ 'title': 'In the Midst of Darkness Light Prevails: An Interdisciplinary Symposium',
+ 'release_timestamp': 1670686812,
+ 'release_date': '20221210',
+ 'uploader_id': 're8w0v8hohhvpqtbskpe',
+ 'uploader': 'Children\'s Health Defense',
+ }
+ }, {
+ 'url': 'https://boxcast.tv/video-portal/vctwevwntun3o0ikq7af/rvyblnn0fxbfjx5nwxhl/otbpltj2kzkveo2qz3ad',
+ 'info_dict': {
+ 'id': 'otbpltj2kzkveo2qz3ad',
+ 'ext': 'mp4',
+ 'uploader_id': 'vctwevwntun3o0ikq7af',
+ 'uploader': 'Legacy Christian Church',
+ 'title': 'The Quest | 1: Beginner\'s Bay | Jamie Schools',
+ 'thumbnail': r're:https?://uploads.boxcast.com/(?:[\w-]+/){3}.+\.jpg'
+ }
+ }, {
+ 'url': 'https://boxcast.tv/channel/z03fqwaeaby5lnaawox2?b=ssihlw5gvfij2by8tkev',
+ 'info_dict': {
+ 'id': 'ssihlw5gvfij2by8tkev',
+ 'ext': 'mp4',
+ 'thumbnail': r're:https?://uploads.boxcast.com/(?:[\w-]+/){3}.+\.jpg$',
+ 'release_date': '20230101',
+ 'uploader_id': 'ds25vaazhlu4ygcvffid',
+ 'release_timestamp': 1672543201,
+ 'uploader': 'Lighthouse Ministries International - Beltsville, Maryland',
+ 'description': 'md5:ac23e3d01b0b0be592e8f7fe0ec3a340',
+ 'title': 'New Year\'s Eve CROSSOVER Service at LHMI | December 31, 2022',
+ }
+ }]
+ _WEBPAGE_TESTS = [{
+ 'url': 'https://childrenshealthdefense.eu/live-stream/',
+ 'info_dict': {
+ 'id': 'da1eqqgkacngd5djlqld',
+ 'ext': 'mp4',
+ 'thumbnail': r're:https?://uploads\.boxcast\.com/(?:[\w+-]+/){3}.+\.png$',
+ 'title': 'In the Midst of Darkness Light Prevails: An Interdisciplinary Symposium',
+ 'release_timestamp': 1670686812,
+ 'release_date': '20221210',
+ 'uploader_id': 're8w0v8hohhvpqtbskpe',
+ 'uploader': 'Children\'s Health Defense',
+ }
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ webpage_json_data = self._search_json(
+ r'var\s*BOXCAST_PRELOAD\s*=', webpage, 'broadcast data', display_id,
+ transform_source=js_to_json, default={})
+
+ # Ref: https://support.boxcast.com/en/articles/4235158-build-a-custom-viewer-experience-with-boxcast-api
+ broadcast_json_data = (
+ traverse_obj(webpage_json_data, ('broadcast', 'data'))
+ or self._download_json(f'https://api.boxcast.com/broadcasts/{display_id}', display_id))
+ view_json_data = (
+ traverse_obj(webpage_json_data, ('view', 'data'))
+ or self._download_json(f'https://api.boxcast.com/broadcasts/{display_id}/view',
+ display_id, fatal=False) or {})
+
+ formats, subtitles = [], {}
+ if view_json_data.get('status') == 'recorded':
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ view_json_data['playlist'], display_id)
+
+ return {
+ 'id': str(broadcast_json_data['id']),
+ 'title': (broadcast_json_data.get('name')
+ or self._html_search_meta(['og:title', 'twitter:title'], webpage)),
+ 'description': (broadcast_json_data.get('description')
+ or self._html_search_meta(['og:description', 'twitter:description'], webpage)
+ or None),
+ 'thumbnail': (broadcast_json_data.get('preview')
+ or self._html_search_meta(['og:image', 'twitter:image'], webpage)),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'release_timestamp': unified_timestamp(broadcast_json_data.get('streamed_at')),
+ 'uploader': broadcast_json_data.get('account_name'),
+ 'uploader_id': broadcast_json_data.get('account_id'),
+ }
diff --git a/yt_dlp/extractor/bpb.py b/yt_dlp/extractor/bpb.py
new file mode 100644
index 0000000..7fe0899
--- /dev/null
+++ b/yt_dlp/extractor/bpb.py
@@ -0,0 +1,170 @@
+import functools
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ extract_attributes,
+ get_element_text_and_html_by_tag,
+ get_elements_by_class,
+ join_nonempty,
+ js_to_json,
+ mimetype2ext,
+ unified_strdate,
+ url_or_none,
+ urljoin,
+ variadic,
+)
+from ..utils.traversal import traverse_obj
+
+
+def html_get_element(tag=None, cls=None):
+ assert tag or cls, 'One of tag or class is required'
+
+ if cls:
+ func = functools.partial(get_elements_by_class, cls, tag=tag)
+ else:
+ func = functools.partial(get_element_text_and_html_by_tag, tag)
+
+ def html_get_element_wrapper(html):
+ return variadic(func(html))[0]
+
+ return html_get_element_wrapper
+
+
+class BpbIE(InfoExtractor):
+ IE_DESC = 'Bundeszentrale für politische Bildung'
+ _VALID_URL = r'https?://(?:www\.|m\.)?bpb\.de/(?:[^/?#]+/)*(?P<id>\d+)(?:[/?#]|$)'
+
+ _TESTS = [{
+ 'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr',
+ 'info_dict': {
+ 'id': '297',
+ 'ext': 'mp4',
+ 'creator': 'Kooperative Berlin',
+ 'description': 'md5:f4f75885ba009d3e2b156247a8941ce6',
+ 'release_date': '20160115',
+ 'series': 'Interview auf dem Geschichtsforum 1989 | 2009',
+ 'tags': ['Friedliche Revolution', 'Erinnerungskultur', 'Vergangenheitspolitik', 'DDR 1949 - 1990', 'Freiheitsrecht', 'BStU', 'Deutschland'],
+ 'thumbnail': 'https://www.bpb.de/cache/images/7/297_teaser_16x9_1240.jpg?8839D',
+ 'title': 'Joachim Gauck zu 1989 und die Erinnerung an die DDR',
+ 'uploader': 'Bundeszentrale für politische Bildung',
+ },
+ }, {
+ 'url': 'https://www.bpb.de/mediathek/video/522184/krieg-flucht-und-falschmeldungen-wirstattdesinformation-2/',
+ 'info_dict': {
+ 'id': '522184',
+ 'ext': 'mp4',
+ 'creator': 'Institute for Strategic Dialogue Germany gGmbH (ISD)',
+ 'description': 'md5:f83c795ff8f825a69456a9e51fc15903',
+ 'release_date': '20230621',
+ 'tags': ['Desinformation', 'Ukraine', 'Russland', 'Geflüchtete'],
+ 'thumbnail': 'https://www.bpb.de/cache/images/4/522184_teaser_16x9_1240.png?EABFB',
+ 'title': 'md5:9b01ccdbf58dbf9e5c9f6e771a803b1c',
+ 'uploader': 'Bundeszentrale für politische Bildung',
+ },
+ }, {
+ 'url': 'https://www.bpb.de/lernen/bewegtbild-und-politische-bildung/webvideo/518789/krieg-flucht-und-falschmeldungen-wirstattdesinformation-1/',
+ 'info_dict': {
+ 'id': '518789',
+ 'ext': 'mp4',
+ 'creator': 'Institute for Strategic Dialogue Germany gGmbH (ISD)',
+ 'description': 'md5:85228aed433e84ff0ff9bc582abd4ea8',
+ 'release_date': '20230302',
+ 'tags': ['Desinformation', 'Ukraine', 'Russland', 'Geflüchtete'],
+ 'thumbnail': 'https://www.bpb.de/cache/images/9/518789_teaser_16x9_1240.jpeg?56D0D',
+ 'title': 'md5:3e956f264bb501f6383f10495a401da4',
+ 'uploader': 'Bundeszentrale für politische Bildung',
+ },
+ }, {
+ 'url': 'https://www.bpb.de/mediathek/podcasts/apuz-podcast/539727/apuz-20-china/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.bpb.de/mediathek/audio/315813/folge-1-eine-einfuehrung/',
+ 'info_dict': {
+ 'id': '315813',
+ 'ext': 'mp3',
+ 'creator': 'Axel Schröder',
+ 'description': 'md5:eda9d1af34e5912efef5baf54fba4427',
+ 'release_date': '20200921',
+ 'series': 'Auf Endlagersuche. Der deutsche Weg zu einem sicheren Atommülllager',
+ 'tags': ['Atomenergie', 'Endlager', 'hoch-radioaktiver Abfall', 'Endlagersuche', 'Atommüll', 'Atomendlager', 'Gorleben', 'Deutschland'],
+ 'thumbnail': 'https://www.bpb.de/cache/images/3/315813_teaser_16x9_1240.png?92A94',
+ 'title': 'Folge 1: Eine Einführung',
+ 'uploader': 'Bundeszentrale für politische Bildung',
+ },
+ }, {
+ 'url': 'https://www.bpb.de/517806/die-weltanschauung-der-neuen-rechten/',
+ 'info_dict': {
+ 'id': '517806',
+ 'ext': 'mp3',
+ 'creator': 'Bundeszentrale für politische Bildung',
+ 'description': 'md5:594689600e919912aade0b2871cc3fed',
+ 'release_date': '20230127',
+ 'series': 'Vorträge des Fachtags "Modernisierer. Grenzgänger. Anstifter. Sechs Jahrzehnte \'Neue Rechte\'"',
+ 'tags': ['Rechtsextremismus', 'Konservatismus', 'Konservativismus', 'neue Rechte', 'Rechtspopulismus', 'Schnellroda', 'Deutschland'],
+ 'thumbnail': 'https://www.bpb.de/cache/images/6/517806_teaser_16x9_1240.png?7A7A0',
+ 'title': 'Die Weltanschauung der "Neuen Rechten"',
+ 'uploader': 'Bundeszentrale für politische Bildung',
+ },
+ }, {
+ 'url': 'https://www.bpb.de/mediathek/reihen/zahlen-und-fakten-soziale-situation-filme/520153/zahlen-und-fakten-die-soziale-situation-in-deutschland-migration/',
+ 'only_matching': True,
+ }]
+
+ _TITLE_RE = re.compile('(?P<title>[^<]*)<[^>]+>(?P<series>[^<]*)')
+
+ def _parse_vue_attributes(self, name, string, video_id):
+ attributes = extract_attributes(self._search_regex(rf'(<{name}(?:"[^"]*?"|[^>])*>)', string, name))
+
+ for key, value in attributes.items():
+ if key.startswith(':'):
+ attributes[key] = self._parse_json(value, video_id, transform_source=js_to_json, fatal=False)
+
+ return attributes
+
+ @staticmethod
+ def _process_source(source):
+ url = url_or_none(source['src'])
+ if not url:
+ return None
+
+ source_type = source.get('type', '')
+ extension = mimetype2ext(source_type)
+ is_video = source_type.startswith('video')
+ note = url.rpartition('.')[0].rpartition('_')[2] if is_video else None
+
+ return {
+ 'url': url,
+ 'ext': extension,
+ 'vcodec': None if is_video else 'none',
+ 'quality': 10 if note == 'high' else 0,
+ 'format_note': note,
+ 'format_id': join_nonempty(extension, note),
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title_result = traverse_obj(webpage, ({html_get_element(cls='opening-header__title')}, {self._TITLE_RE.match}))
+ json_lds = list(self._yield_json_ld(webpage, video_id, fatal=False))
+
+ return {
+ 'id': video_id,
+ 'title': traverse_obj(title_result, ('title', {str.strip})) or None,
+ # This metadata could be interpreted otherwise, but it fits "series" the most
+ 'series': traverse_obj(title_result, ('series', {str.strip})) or None,
+ 'description': join_nonempty(*traverse_obj(webpage, [(
+ {html_get_element(cls='opening-intro')},
+ [{html_get_element(tag='bpb-accordion-item')}, {html_get_element(cls='text-content')}],
+ ), {clean_html}]), delim='\n\n') or None,
+ 'creator': self._html_search_meta('author', webpage),
+ 'uploader': self._html_search_meta('publisher', webpage),
+ 'release_date': unified_strdate(self._html_search_meta('date', webpage)),
+ 'tags': traverse_obj(json_lds, (..., 'keywords', {lambda x: x.split(',')}, ...)),
+ **traverse_obj(self._parse_vue_attributes('bpb-player', webpage, video_id), {
+ 'formats': (':sources', ..., {self._process_source}),
+ 'thumbnail': ('poster', {lambda x: urljoin(url, x)}),
+ }),
+ }
diff --git a/yt_dlp/extractor/br.py b/yt_dlp/extractor/br.py
new file mode 100644
index 0000000..6e1c63e
--- /dev/null
+++ b/yt_dlp/extractor/br.py
@@ -0,0 +1,166 @@
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ parse_duration,
+ xpath_element,
+ xpath_text,
+)
+
+
+class BRIE(InfoExtractor):
+ _WORKING = False
+ IE_DESC = 'Bayerischer Rundfunk'
+ _VALID_URL = r'(?P<base_url>https?://(?:www\.)?br(?:-klassik)?\.de)/(?:[a-z0-9\-_]+/)+(?P<id>[a-z0-9\-_]+)\.html'
+
+ _TESTS = [
+ {
+ 'url': 'http://www.br.de/mediathek/video/sendungen/abendschau/betriebliche-altersvorsorge-104.html',
+ 'md5': '83a0477cf0b8451027eb566d88b51106',
+ 'info_dict': {
+ 'id': '48f656ef-287e-486f-be86-459122db22cc',
+ 'ext': 'mp4',
+ 'title': 'Die böse Überraschung',
+ 'description': 'md5:ce9ac81b466ce775b8018f6801b48ac9',
+ 'duration': 180,
+ 'uploader': 'Reinhard Weber',
+ 'upload_date': '20150422',
+ },
+ 'skip': '404 not found',
+ },
+ {
+ 'url': 'http://www.br.de/nachrichten/oberbayern/inhalt/muenchner-polizeipraesident-schreiber-gestorben-100.html',
+ 'md5': 'af3a3a4aa43ff0ce6a89504c67f427ef',
+ 'info_dict': {
+ 'id': 'a4b83e34-123d-4b81-9f4e-c0d3121a4e05',
+ 'ext': 'flv',
+ 'title': 'Manfred Schreiber ist tot',
+ 'description': 'md5:b454d867f2a9fc524ebe88c3f5092d97',
+ 'duration': 26,
+ },
+ 'skip': '404 not found',
+ },
+ {
+ 'url': 'https://www.br-klassik.de/audio/peeping-tom-premierenkritik-dance-festival-muenchen-100.html',
+ 'md5': '8b5b27c0b090f3b35eac4ab3f7a73d3d',
+ 'info_dict': {
+ 'id': '74c603c9-26d3-48bb-b85b-079aeed66e0b',
+ 'ext': 'aac',
+ 'title': 'Kurzweilig und sehr bewegend',
+ 'description': 'md5:0351996e3283d64adeb38ede91fac54e',
+ 'duration': 296,
+ },
+ 'skip': '404 not found',
+ },
+ {
+ 'url': 'http://www.br.de/radio/bayern1/service/team/videos/team-video-erdelt100.html',
+ 'md5': 'dbab0aef2e047060ea7a21fc1ce1078a',
+ 'info_dict': {
+ 'id': '6ba73750-d405-45d3-861d-1ce8c524e059',
+ 'ext': 'mp4',
+ 'title': 'Umweltbewusster Häuslebauer',
+ 'description': 'md5:d52dae9792d00226348c1dbb13c9bae2',
+ 'duration': 116,
+ }
+ },
+ {
+ 'url': 'http://www.br.de/fernsehen/br-alpha/sendungen/kant-fuer-anfaenger/kritik-der-reinen-vernunft/kant-kritik-01-metaphysik100.html',
+ 'md5': '23bca295f1650d698f94fc570977dae3',
+ 'info_dict': {
+ 'id': 'd982c9ce-8648-4753-b358-98abb8aec43d',
+ 'ext': 'mp4',
+ 'title': 'Folge 1 - Metaphysik',
+ 'description': 'md5:bb659990e9e59905c3d41e369db1fbe3',
+ 'duration': 893,
+ 'uploader': 'Eva Maria Steimle',
+ 'upload_date': '20170208',
+ }
+ },
+ ]
+
+ def _real_extract(self, url):
+ base_url, display_id = self._match_valid_url(url).groups()
+ page = self._download_webpage(url, display_id)
+ xml_url = self._search_regex(
+ r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/(?:[a-z0-9\-]+/)+[a-z0-9/~_.-]+)'}\)\);", page, 'XMLURL')
+ xml = self._download_xml(base_url + xml_url, display_id)
+
+ medias = []
+
+ for xml_media in xml.findall('video') + xml.findall('audio'):
+ media_id = xml_media.get('externalId')
+ media = {
+ 'id': media_id,
+ 'title': xpath_text(xml_media, 'title', 'title', True),
+ 'duration': parse_duration(xpath_text(xml_media, 'duration')),
+ 'formats': self._extract_formats(xpath_element(
+ xml_media, 'assets'), media_id),
+ 'thumbnails': self._extract_thumbnails(xpath_element(
+ xml_media, 'teaserImage/variants'), base_url),
+ 'description': xpath_text(xml_media, 'desc'),
+ 'webpage_url': xpath_text(xml_media, 'permalink'),
+ 'uploader': xpath_text(xml_media, 'author'),
+ }
+ broadcast_date = xpath_text(xml_media, 'broadcastDate')
+ if broadcast_date:
+ media['upload_date'] = ''.join(reversed(broadcast_date.split('.')))
+ medias.append(media)
+
+ if len(medias) > 1:
+ self.report_warning(
+ 'found multiple medias; please '
+ 'report this with the video URL to http://yt-dl.org/bug')
+ if not medias:
+ raise ExtractorError('No media entries found')
+ return medias[0]
+
+ def _extract_formats(self, assets, media_id):
+ formats = []
+ for asset in assets.findall('asset'):
+ format_url = xpath_text(asset, ['downloadUrl', 'url'])
+ asset_type = asset.get('type')
+ if asset_type.startswith('HDS'):
+ formats.extend(self._extract_f4m_formats(
+ format_url + '?hdcore=3.2.0', media_id, f4m_id='hds', fatal=False))
+ elif asset_type.startswith('HLS'):
+ formats.extend(self._extract_m3u8_formats(
+ format_url, media_id, 'mp4', 'm3u8_native', m3u8_id='hds', fatal=False))
+ else:
+ format_info = {
+ 'ext': xpath_text(asset, 'mediaType'),
+ 'width': int_or_none(xpath_text(asset, 'frameWidth')),
+ 'height': int_or_none(xpath_text(asset, 'frameHeight')),
+ 'tbr': int_or_none(xpath_text(asset, 'bitrateVideo')),
+ 'abr': int_or_none(xpath_text(asset, 'bitrateAudio')),
+ 'vcodec': xpath_text(asset, 'codecVideo'),
+ 'acodec': xpath_text(asset, 'codecAudio'),
+ 'container': xpath_text(asset, 'mediaType'),
+ 'filesize': int_or_none(xpath_text(asset, 'size')),
+ }
+ format_url = self._proto_relative_url(format_url)
+ if format_url:
+ http_format_info = format_info.copy()
+ http_format_info.update({
+ 'url': format_url,
+ 'format_id': 'http-%s' % asset_type,
+ })
+ formats.append(http_format_info)
+ server_prefix = xpath_text(asset, 'serverPrefix')
+ if server_prefix:
+ rtmp_format_info = format_info.copy()
+ rtmp_format_info.update({
+ 'url': server_prefix,
+ 'play_path': xpath_text(asset, 'fileName'),
+ 'format_id': 'rtmp-%s' % asset_type,
+ })
+ formats.append(rtmp_format_info)
+ return formats
+
+ def _extract_thumbnails(self, variants, base_url):
+ thumbnails = [{
+ 'url': base_url + xpath_text(variant, 'url'),
+ 'width': int_or_none(xpath_text(variant, 'width')),
+ 'height': int_or_none(xpath_text(variant, 'height')),
+ } for variant in variants.findall('variant') if xpath_text(variant, 'url')]
+ thumbnails.sort(key=lambda x: x['width'] * x['height'], reverse=True)
+ return thumbnails
diff --git a/yt_dlp/extractor/brainpop.py b/yt_dlp/extractor/brainpop.py
new file mode 100644
index 0000000..1200437
--- /dev/null
+++ b/yt_dlp/extractor/brainpop.py
@@ -0,0 +1,318 @@
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ classproperty,
+ int_or_none,
+ traverse_obj,
+ urljoin
+)
+
+
+class BrainPOPBaseIE(InfoExtractor):
+ _NETRC_MACHINE = 'brainpop'
+ _ORIGIN = '' # So that _VALID_URL doesn't crash
+ _LOGIN_ERRORS = {
+ 1502: 'The username and password you entered did not match.', # LOGIN_FAILED
+ 1503: 'Payment method is expired.', # LOGIN_FAILED_ACCOUNT_NOT_ACTIVE
+ 1506: 'Your BrainPOP plan has expired.', # LOGIN_FAILED_ACCOUNT_EXPIRED
+ 1507: 'Terms not accepted.', # LOGIN_FAILED_TERMS_NOT_ACCEPTED
+ 1508: 'Account not activated.', # LOGIN_FAILED_SUBSCRIPTION_NOT_ACTIVE
+ 1512: 'The maximum number of devices permitted are logged in with your account right now.', # LOGIN_FAILED_LOGIN_LIMIT_REACHED
+ 1513: 'You are trying to access your account from outside of its allowed IP range.', # LOGIN_FAILED_INVALID_IP
+ 1514: 'Individual accounts are not included in your plan. Try again with your shared username and password.', # LOGIN_FAILED_MBP_DISABLED
+ 1515: 'Account not activated.', # LOGIN_FAILED_TEACHER_NOT_ACTIVE
+ 1523: 'That username and password won\'t work on this BrainPOP site.', # LOGIN_FAILED_NO_ACCESS
+ 1524: 'You\'ll need to join a class before you can login.', # LOGIN_FAILED_STUDENT_NO_PERIOD
+ 1526: 'Your account is locked. Reset your password, or ask a teacher or administrator for help.', # LOGIN_FAILED_ACCOUNT_LOCKED
+ }
+
+ @classproperty
+ def _VALID_URL(cls):
+ root = re.escape(cls._ORIGIN).replace(r'https:', r'https?:').replace(r'www\.', r'(?:www\.)?')
+ return rf'{root}/(?P<slug>[^/]+/[^/]+/(?P<id>[^/?#&]+))'
+
+ def _assemble_formats(self, slug, format_id, display_id, token='', extra_fields={}):
+ formats = []
+ formats = self._extract_m3u8_formats(
+ f'{urljoin(self._HLS_URL, slug)}.m3u8?{token}',
+ display_id, 'mp4', m3u8_id=f'{format_id}-hls', fatal=False)
+ formats.append({
+ 'format_id': format_id,
+ 'url': f'{urljoin(self._VIDEO_URL, slug)}?{token}',
+ })
+ for f in formats:
+ f.update(extra_fields)
+ return formats
+
+ def _extract_adaptive_formats(self, data, token, display_id, key_format='%s', extra_fields={}):
+ formats = []
+ additional_key_formats = {
+ '%s': {},
+ 'ad_%s': {
+ 'format_note': 'Audio description',
+ 'source_preference': -2
+ }
+ }
+ for additional_key_format, additional_key_fields in additional_key_formats.items():
+ for key_quality, key_index in enumerate(('high', 'low')):
+ full_key_index = additional_key_format % (key_format % key_index)
+ if data.get(full_key_index):
+ formats.extend(self._assemble_formats(data[full_key_index], full_key_index, display_id, token, {
+ 'quality': -1 - key_quality,
+ **additional_key_fields,
+ **extra_fields
+ }))
+ return formats
+
+ def _perform_login(self, username, password):
+ login_res = self._download_json(
+ 'https://api.brainpop.com/api/login', None,
+ data=json.dumps({'username': username, 'password': password}).encode(),
+ headers={
+ 'Content-Type': 'application/json',
+ 'Referer': self._ORIGIN
+ }, note='Logging in', errnote='Unable to log in', expected_status=400)
+ status_code = int_or_none(login_res['status_code'])
+ if status_code != 1505:
+ self.report_warning(
+ f'Unable to login: {self._LOGIN_ERRORS.get(status_code) or login_res.get("message")}'
+ or f'Got status code {status_code}')
+
+
+class BrainPOPIE(BrainPOPBaseIE):
+ _ORIGIN = 'https://www.brainpop.com'
+ _VIDEO_URL = 'https://svideos.brainpop.com'
+ _HLS_URL = 'https://hls.brainpop.com'
+ _CDN_URL = 'https://cdn.brainpop.com'
+ _TESTS = [{
+ 'url': 'https://www.brainpop.com/health/conflictresolution/martinlutherkingjr/movie?ref=null',
+ 'md5': '3ead374233ae74c7f1b0029a01c972f0',
+ 'info_dict': {
+ 'id': '1f3259fa457292b4',
+ 'ext': 'mp4',
+ 'title': 'Martin Luther King, Jr.',
+ 'display_id': 'martinlutherkingjr',
+ 'description': 'md5:f403dbb2bf3ccc7cf4c59d9e43e3c349',
+ },
+ }, {
+ 'url': 'https://www.brainpop.com/science/space/bigbang/',
+ 'md5': '9a1ff0e77444dd9e437354eb669c87ec',
+ 'info_dict': {
+ 'id': 'acae52cd48c99acf',
+ 'ext': 'mp4',
+ 'title': 'Big Bang',
+ 'display_id': 'bigbang',
+ 'description': 'md5:3e53b766b0f116f631b13f4cae185d38',
+ },
+ 'skip': 'Requires login',
+ }]
+
+ def _real_extract(self, url):
+ slug, display_id = self._match_valid_url(url).group('slug', 'id')
+ movie_data = self._download_json(
+ f'https://api.brainpop.com/api/content/published/bp/en/{slug}/movie?full=1', display_id,
+ 'Downloading movie data JSON', 'Unable to download movie data')['data']
+ topic_data = traverse_obj(self._download_json(
+ f'https://api.brainpop.com/api/content/published/bp/en/{slug}?full=1', display_id,
+ 'Downloading topic data JSON', 'Unable to download topic data', fatal=False),
+ ('data', 'topic'), expected_type=dict) or movie_data['topic']
+
+ if not traverse_obj(movie_data, ('access', 'allow')):
+ reason = traverse_obj(movie_data, ('access', 'reason'))
+ if 'logged' in reason:
+ self.raise_login_required(reason, metadata_available=True)
+ else:
+ self.raise_no_formats(reason, video_id=display_id)
+ movie_feature = movie_data['feature']
+ movie_feature_data = movie_feature['data']
+
+ formats, subtitles = [], {}
+ formats.extend(self._extract_adaptive_formats(movie_feature_data, movie_feature_data.get('token', ''), display_id, '%s_v2', {
+ 'language': movie_feature.get('language') or 'en',
+ 'language_preference': 10
+ }))
+ for lang, localized_feature in traverse_obj(movie_feature, 'localization', default={}, expected_type=dict).items():
+ formats.extend(self._extract_adaptive_formats(localized_feature, localized_feature.get('token', ''), display_id, '%s_v2', {
+ 'language': lang,
+ 'language_preference': -10
+ }))
+
+ # TODO: Do localization fields also have subtitles?
+ for name, url in movie_feature_data.items():
+ lang = self._search_regex(
+ r'^subtitles_(?P<lang>\w+)$', name, 'subtitle metadata', default=None)
+ if lang and url:
+ subtitles.setdefault(lang, []).append({
+ 'url': urljoin(self._CDN_URL, url)
+ })
+
+ return {
+ 'id': topic_data['topic_id'],
+ 'display_id': display_id,
+ 'title': topic_data.get('name'),
+ 'description': topic_data.get('synopsis'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+
+class BrainPOPLegacyBaseIE(BrainPOPBaseIE):
+ def _parse_js_topic_data(self, topic_data, display_id, token):
+ movie_data = topic_data['movies']
+ # TODO: Are there non-burned subtitles?
+ formats = self._extract_adaptive_formats(movie_data, token, display_id)
+
+ return {
+ 'id': topic_data['EntryID'],
+ 'display_id': display_id,
+ 'title': topic_data.get('name'),
+ 'alt_title': topic_data.get('title'),
+ 'description': topic_data.get('synopsis'),
+ 'formats': formats,
+ }
+
+ def _real_extract(self, url):
+ slug, display_id = self._match_valid_url(url).group('slug', 'id')
+ webpage = self._download_webpage(url, display_id)
+ topic_data = self._search_json(
+ r'var\s+content\s*=\s*', webpage, 'content data',
+ display_id, end_pattern=';')['category']['unit']['topic']
+ token = self._search_regex(r'ec_token\s*:\s*[\'"]([^\'"]+)', webpage, 'video token')
+ return self._parse_js_topic_data(topic_data, display_id, token)
+
+
+class BrainPOPJrIE(BrainPOPLegacyBaseIE):
+ _ORIGIN = 'https://jr.brainpop.com'
+ _VIDEO_URL = 'https://svideos-jr.brainpop.com'
+ _HLS_URL = 'https://hls-jr.brainpop.com'
+ _CDN_URL = 'https://cdn-jr.brainpop.com'
+ _TESTS = [{
+ 'url': 'https://jr.brainpop.com/health/feelingsandsel/emotions/',
+ 'md5': '04e0561bb21770f305a0ce6cf0d869ab',
+ 'info_dict': {
+ 'id': '347',
+ 'ext': 'mp4',
+ 'title': 'Emotions',
+ 'display_id': 'emotions',
+ },
+ }, {
+ 'url': 'https://jr.brainpop.com/science/habitats/arctichabitats/',
+ 'md5': 'b0ed063bbd1910df00220ee29340f5d6',
+ 'info_dict': {
+ 'id': '29',
+ 'ext': 'mp4',
+ 'title': 'Arctic Habitats',
+ 'display_id': 'arctichabitats',
+ },
+ 'skip': 'Requires login',
+ }]
+
+
+class BrainPOPELLIE(BrainPOPLegacyBaseIE):
+ _ORIGIN = 'https://ell.brainpop.com'
+ _VIDEO_URL = 'https://svideos-esl.brainpop.com'
+ _HLS_URL = 'https://hls-esl.brainpop.com'
+ _CDN_URL = 'https://cdn-esl.brainpop.com'
+ _TESTS = [{
+ 'url': 'https://ell.brainpop.com/level1/unit1/lesson1/',
+ 'md5': 'a2012700cfb774acb7ad2e8834eed0d0',
+ 'info_dict': {
+ 'id': '1',
+ 'ext': 'mp4',
+ 'title': 'Lesson 1',
+ 'display_id': 'lesson1',
+ 'alt_title': 'Personal Pronouns',
+ },
+ }, {
+ 'url': 'https://ell.brainpop.com/level3/unit6/lesson5/',
+ 'md5': 'be19c8292c87b24aacfb5fda2f3f8363',
+ 'info_dict': {
+ 'id': '101',
+ 'ext': 'mp4',
+ 'title': 'Lesson 5',
+ 'display_id': 'lesson5',
+ 'alt_title': 'Review: Unit 6',
+ },
+ 'skip': 'Requires login',
+ }]
+
+
+class BrainPOPEspIE(BrainPOPLegacyBaseIE):
+ IE_DESC = 'BrainPOP Español'
+ _ORIGIN = 'https://esp.brainpop.com'
+ _VIDEO_URL = 'https://svideos.brainpop.com'
+ _HLS_URL = 'https://hls.brainpop.com'
+ _CDN_URL = 'https://cdn.brainpop.com/mx'
+ _TESTS = [{
+ 'url': 'https://esp.brainpop.com/ciencia/la_diversidad_de_la_vida/ecosistemas/',
+ 'md5': 'cb3f062db2b3c5240ddfcfde7108f8c9',
+ 'info_dict': {
+ 'id': '3893',
+ 'ext': 'mp4',
+ 'title': 'Ecosistemas',
+ 'display_id': 'ecosistemas',
+ 'description': 'md5:80fc55b07e241f8c8f2aa8d74deaf3c3',
+ },
+ }, {
+ 'url': 'https://esp.brainpop.com/espanol/la_escritura/emily_dickinson/',
+ 'md5': '98c1b9559e0e33777209c425cda7dac4',
+ 'info_dict': {
+ 'id': '7146',
+ 'ext': 'mp4',
+ 'title': 'Emily Dickinson',
+ 'display_id': 'emily_dickinson',
+ 'description': 'md5:2795ad87b1d239c9711c1e92ab5a978b',
+ },
+ 'skip': 'Requires login',
+ }]
+
+
+class BrainPOPFrIE(BrainPOPLegacyBaseIE):
+ IE_DESC = 'BrainPOP Français'
+ _ORIGIN = 'https://fr.brainpop.com'
+ _VIDEO_URL = 'https://svideos.brainpop.com'
+ _HLS_URL = 'https://hls.brainpop.com'
+ _CDN_URL = 'https://cdn.brainpop.com/fr'
+ _TESTS = [{
+ 'url': 'https://fr.brainpop.com/sciencesdelaterre/energie/sourcesdenergie/',
+ 'md5': '97e7f48af8af93f8a2be11709f239371',
+ 'info_dict': {
+ 'id': '1651',
+ 'ext': 'mp4',
+ 'title': 'Sources d\'énergie',
+ 'display_id': 'sourcesdenergie',
+ 'description': 'md5:7eece350f019a21ef9f64d4088b2d857',
+ },
+ }, {
+ 'url': 'https://fr.brainpop.com/francais/ecrire/plagiat/',
+ 'md5': '0cf2b4f89804d0dd4a360a51310d445a',
+ 'info_dict': {
+ 'id': '5803',
+ 'ext': 'mp4',
+ 'title': 'Plagiat',
+ 'display_id': 'plagiat',
+ 'description': 'md5:4496d87127ace28e8b1eda116e77cd2b',
+ },
+ 'skip': 'Requires login',
+ }]
+
+
+class BrainPOPIlIE(BrainPOPLegacyBaseIE):
+ IE_DESC = 'BrainPOP Hebrew'
+ _ORIGIN = 'https://il.brainpop.com'
+ _VIDEO_URL = 'https://svideos.brainpop.com'
+ _HLS_URL = 'https://hls.brainpop.com'
+ _CDN_URL = 'https://cdn.brainpop.com/he'
+ _TESTS = [{
+ 'url': 'https://il.brainpop.com/category_9/subcategory_150/subjects_3782/',
+ 'md5': '9e4ea9dc60ecd385a6e5ca12ccf31641',
+ 'info_dict': {
+ 'id': '3782',
+ 'ext': 'mp4',
+ 'title': 'md5:e993632fcda0545d9205602ec314ad67',
+ 'display_id': 'subjects_3782',
+ 'description': 'md5:4cc084a8012beb01f037724423a4d4ed',
+ },
+ }]
diff --git a/yt_dlp/extractor/bravotv.py b/yt_dlp/extractor/bravotv.py
new file mode 100644
index 0000000..419fe8c
--- /dev/null
+++ b/yt_dlp/extractor/bravotv.py
@@ -0,0 +1,189 @@
+from .adobepass import AdobePassIE
+from ..networking import HEADRequest
+from ..utils import (
+ extract_attributes,
+ float_or_none,
+ get_element_html_by_class,
+ int_or_none,
+ merge_dicts,
+ parse_age_limit,
+ remove_end,
+ str_or_none,
+ traverse_obj,
+ unescapeHTML,
+ unified_timestamp,
+ update_url_query,
+ url_or_none,
+)
+
+
+class BravoTVIE(AdobePassIE):
+ _VALID_URL = r'https?://(?:www\.)?(?P<site>bravotv|oxygen)\.com/(?:[^/]+/)+(?P<id>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'https://www.bravotv.com/top-chef/season-16/episode-15/videos/the-top-chef-season-16-winner-is',
+ 'info_dict': {
+ 'id': '3923059',
+ 'ext': 'mp4',
+ 'title': 'The Top Chef Season 16 Winner Is...',
+ 'description': 'Find out who takes the title of Top Chef!',
+ 'upload_date': '20190314',
+ 'timestamp': 1552591860,
+ 'season_number': 16,
+ 'episode_number': 15,
+ 'series': 'Top Chef',
+ 'episode': 'The Top Chef Season 16 Winner Is...',
+ 'duration': 190.357,
+ 'season': 'Season 16',
+ 'thumbnail': r're:^https://.+\.jpg',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://www.bravotv.com/top-chef/season-20/episode-1/london-calling',
+ 'info_dict': {
+ 'id': '9000234570',
+ 'ext': 'mp4',
+ 'title': 'London Calling',
+ 'description': 'md5:5af95a8cbac1856bd10e7562f86bb759',
+ 'upload_date': '20230310',
+ 'timestamp': 1678410000,
+ 'season_number': 20,
+ 'episode_number': 1,
+ 'series': 'Top Chef',
+ 'episode': 'London Calling',
+ 'duration': 3266.03,
+ 'season': 'Season 20',
+ 'chapters': 'count:7',
+ 'thumbnail': r're:^https://.+\.jpg',
+ 'age_limit': 14,
+ },
+ 'params': {'skip_download': 'm3u8'},
+ 'skip': 'This video requires AdobePass MSO credentials',
+ }, {
+ 'url': 'https://www.oxygen.com/in-ice-cold-blood/season-1/closing-night',
+ 'info_dict': {
+ 'id': '3692045',
+ 'ext': 'mp4',
+ 'title': 'Closing Night',
+ 'description': 'md5:3170065c5c2f19548d72a4cbc254af63',
+ 'upload_date': '20180401',
+ 'timestamp': 1522623600,
+ 'season_number': 1,
+ 'episode_number': 1,
+ 'series': 'In Ice Cold Blood',
+ 'episode': 'Closing Night',
+ 'duration': 2629.051,
+ 'season': 'Season 1',
+ 'chapters': 'count:6',
+ 'thumbnail': r're:^https://.+\.jpg',
+ 'age_limit': 14,
+ },
+ 'params': {'skip_download': 'm3u8'},
+ 'skip': 'This video requires AdobePass MSO credentials',
+ }, {
+ 'url': 'https://www.oxygen.com/in-ice-cold-blood/season-2/episode-16/videos/handling-the-horwitz-house-after-the-murder-season-2',
+ 'info_dict': {
+ 'id': '3974019',
+ 'ext': 'mp4',
+ 'title': '\'Handling The Horwitz House After The Murder (Season 2, Episode 16)',
+ 'description': 'md5:f9d638dd6946a1c1c0533a9c6100eae5',
+ 'upload_date': '20190617',
+ 'timestamp': 1560790800,
+ 'season_number': 2,
+ 'episode_number': 16,
+ 'series': 'In Ice Cold Blood',
+ 'episode': '\'Handling The Horwitz House After The Murder (Season 2, Episode 16)',
+ 'duration': 68.235,
+ 'season': 'Season 2',
+ 'thumbnail': r're:^https://.+\.jpg',
+ 'age_limit': 14,
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ site, display_id = self._match_valid_url(url).group('site', 'id')
+ webpage = self._download_webpage(url, display_id)
+ settings = self._search_json(
+ r'<script[^>]+data-drupal-selector="drupal-settings-json"[^>]*>', webpage, 'settings', display_id)
+ tve = extract_attributes(get_element_html_by_class('tve-video-deck-app', webpage) or '')
+ query = {
+ 'manifest': 'm3u',
+ 'formats': 'm3u,mpeg4',
+ }
+
+ if tve:
+ account_pid = tve.get('data-mpx-media-account-pid') or 'HNK2IC'
+ account_id = tve['data-mpx-media-account-id']
+ metadata = self._parse_json(
+ tve.get('data-normalized-video', ''), display_id, fatal=False, transform_source=unescapeHTML)
+ video_id = tve.get('data-guid') or metadata['guid']
+ if tve.get('data-entitlement') == 'auth':
+ auth = traverse_obj(settings, ('tve_adobe_auth', {dict})) or {}
+ site = remove_end(site, 'tv')
+ release_pid = tve['data-release-pid']
+ resource = self._get_mvpd_resource(
+ tve.get('data-adobe-pass-resource-id') or auth.get('adobePassResourceId') or site,
+ tve['data-title'], release_pid, tve.get('data-rating'))
+ query.update({
+ 'switch': 'HLSServiceSecure',
+ 'auth': self._extract_mvpd_auth(
+ url, release_pid, auth.get('adobePassRequestorId') or site, resource),
+ })
+
+ else:
+ ls_playlist = traverse_obj(settings, ('ls_playlist', ..., {dict}), get_all=False) or {}
+ account_pid = ls_playlist.get('mpxMediaAccountPid') or 'PHSl-B'
+ account_id = ls_playlist['mpxMediaAccountId']
+ video_id = ls_playlist['defaultGuid']
+ metadata = traverse_obj(
+ ls_playlist, ('videos', lambda _, v: v['guid'] == video_id, {dict}), get_all=False)
+
+ tp_url = f'https://link.theplatform.com/s/{account_pid}/media/guid/{account_id}/{video_id}'
+ tp_metadata = self._download_json(
+ update_url_query(tp_url, {'format': 'preview'}), video_id, fatal=False)
+
+ seconds_or_none = lambda x: float_or_none(x, 1000)
+ chapters = traverse_obj(tp_metadata, ('chapters', ..., {
+ 'start_time': ('startTime', {seconds_or_none}),
+ 'end_time': ('endTime', {seconds_or_none}),
+ }))
+ # prune pointless single chapters that span the entire duration from short videos
+ if len(chapters) == 1 and not traverse_obj(chapters, (0, 'end_time')):
+ chapters = None
+
+ m3u8_url = self._request_webpage(HEADRequest(
+ update_url_query(f'{tp_url}/stream.m3u8', query)), video_id, 'Checking m3u8 URL').url
+ if 'mpeg_cenc' in m3u8_url:
+ self.report_drm(video_id)
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls')
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'chapters': chapters,
+ **merge_dicts(traverse_obj(tp_metadata, {
+ 'title': 'title',
+ 'description': 'description',
+ 'duration': ('duration', {seconds_or_none}),
+ 'timestamp': ('pubDate', {seconds_or_none}),
+ 'season_number': (('pl1$seasonNumber', 'nbcu$seasonNumber'), {int_or_none}),
+ 'episode_number': (('pl1$episodeNumber', 'nbcu$episodeNumber'), {int_or_none}),
+ 'series': (('pl1$show', 'nbcu$show'), (None, ...), {str}),
+ 'episode': (('title', 'pl1$episodeNumber', 'nbcu$episodeNumber'), {str_or_none}),
+ 'age_limit': ('ratings', ..., 'rating', {parse_age_limit}),
+ }, get_all=False), traverse_obj(metadata, {
+ 'title': 'title',
+ 'description': 'description',
+ 'duration': ('durationInSeconds', {int_or_none}),
+ 'timestamp': ('airDate', {unified_timestamp}),
+ 'thumbnail': ('thumbnailUrl', {url_or_none}),
+ 'season_number': ('seasonNumber', {int_or_none}),
+ 'episode_number': ('episodeNumber', {int_or_none}),
+ 'episode': 'episodeTitle',
+ 'series': 'show',
+ }))
+ }
diff --git a/yt_dlp/extractor/breitbart.py b/yt_dlp/extractor/breitbart.py
new file mode 100644
index 0000000..b5abb7f
--- /dev/null
+++ b/yt_dlp/extractor/breitbart.py
@@ -0,0 +1,34 @@
+from .common import InfoExtractor
+
+
+class BreitBartIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?breitbart\.com/videos/v/(?P<id>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'https://www.breitbart.com/videos/v/5cOz1yup/?pl=Ij6NDOji',
+ 'md5': '0aa6d1d6e183ac5ca09207fe49f17ade',
+ 'info_dict': {
+ 'id': '5cOz1yup',
+ 'ext': 'mp4',
+ 'title': 'Watch \u2013 Clyburn: Statues in Congress Have to Go Because they Are Honoring Slavery',
+ 'description': 'md5:bac35eb0256d1cb17f517f54c79404d5',
+ 'thumbnail': 'https://cdn.jwplayer.com/thumbs/5cOz1yup-1920.jpg',
+ 'age_limit': 0,
+ }
+ }, {
+ 'url': 'https://www.breitbart.com/videos/v/eaiZjVOn/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ formats = self._extract_m3u8_formats(f'https://cdn.jwplayer.com/manifests/{video_id}.m3u8', video_id, ext='mp4')
+ return {
+ 'id': video_id,
+ 'title': self._generic_title('', webpage),
+ 'description': self._og_search_description(webpage),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'age_limit': self._rta_search(webpage),
+ 'formats': formats
+ }
diff --git a/yt_dlp/extractor/brightcove.py b/yt_dlp/extractor/brightcove.py
new file mode 100644
index 0000000..61b1841
--- /dev/null
+++ b/yt_dlp/extractor/brightcove.py
@@ -0,0 +1,952 @@
+import base64
+import re
+import struct
+import xml.etree.ElementTree
+
+from .adobepass import AdobePassIE
+from .common import InfoExtractor
+from ..compat import (
+ compat_etree_fromstring,
+ compat_parse_qs,
+ compat_urlparse,
+)
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ clean_html,
+ dict_get,
+ extract_attributes,
+ ExtractorError,
+ find_xpath_attr,
+ fix_xml_ampersands,
+ float_or_none,
+ int_or_none,
+ js_to_json,
+ mimetype2ext,
+ parse_iso8601,
+ parse_qs,
+ smuggle_url,
+ str_or_none,
+ try_get,
+ unescapeHTML,
+ unsmuggle_url,
+ UnsupportedError,
+ update_url_query,
+ url_or_none,
+)
+
+
+class BrightcoveLegacyIE(InfoExtractor):
+ IE_NAME = 'brightcove:legacy'
+ _VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P<query>.*)'
+
+ _TESTS = [
+ {
+ # From http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/
+ 'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1654948606001&flashID=myExperience&%40videoPlayer=2371591881001',
+ 'md5': '5423e113865d26e40624dce2e4b45d95',
+ 'note': 'Test Brightcove downloads and detection in GenericIE',
+ 'info_dict': {
+ 'id': '2371591881001',
+ 'ext': 'mp4',
+ 'title': 'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”',
+ 'uploader': '8TV',
+ 'description': 'md5:a950cc4285c43e44d763d036710cd9cd',
+ 'timestamp': 1368213670,
+ 'upload_date': '20130510',
+ 'uploader_id': '1589608506001',
+ },
+ 'skip': 'The player has been deactivated by the content owner',
+ },
+ {
+ # From http://medianetwork.oracle.com/video/player/1785452137001
+ 'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1217746023001&flashID=myPlayer&%40videoPlayer=1785452137001',
+ 'info_dict': {
+ 'id': '1785452137001',
+ 'ext': 'flv',
+ 'title': 'JVMLS 2012: Arrays 2.0 - Opportunities and Challenges',
+ 'description': 'John Rose speaks at the JVM Language Summit, August 1, 2012.',
+ 'uploader': 'Oracle',
+ 'timestamp': 1344975024,
+ 'upload_date': '20120814',
+ 'uploader_id': '1460825906',
+ },
+ 'skip': 'video not playable',
+ },
+ {
+ # From http://mashable.com/2013/10/26/thermoelectric-bracelet-lets-you-control-your-body-temperature/
+ 'url': 'http://c.brightcove.com/services/viewer/federated_f9?&playerID=1265504713001&publisherID=AQ%7E%7E%2CAAABBzUwv1E%7E%2CxP-xFHVUstiMFlNYfvF4G9yFnNaqCw_9&videoID=2750934548001',
+ 'info_dict': {
+ 'id': '2750934548001',
+ 'ext': 'mp4',
+ 'title': 'This Bracelet Acts as a Personal Thermostat',
+ 'description': 'md5:547b78c64f4112766ccf4e151c20b6a0',
+ # 'uploader': 'Mashable',
+ 'timestamp': 1382041798,
+ 'upload_date': '20131017',
+ 'uploader_id': '1130468786001',
+ },
+ },
+ {
+ # test that the default referer works
+ # from http://national.ballet.ca/interact/video/Lost_in_Motion_II/
+ 'url': 'http://link.brightcove.com/services/player/bcpid756015033001?bckey=AQ~~,AAAApYJi_Ck~,GxhXCegT1Dp39ilhXuxMJxasUhVNZiil&bctid=2878862109001',
+ 'info_dict': {
+ 'id': '2878862109001',
+ 'ext': 'mp4',
+ 'title': 'Lost in Motion II',
+ 'description': 'md5:363109c02998fee92ec02211bd8000df',
+ 'uploader': 'National Ballet of Canada',
+ },
+ 'skip': 'Video gone',
+ },
+ {
+ # test flv videos served by akamaihd.net
+ # From http://www.redbull.com/en/bike/stories/1331655643987/replay-uci-dh-world-cup-2014-from-fort-william
+ 'url': 'http://c.brightcove.com/services/viewer/htmlFederated?%40videoPlayer=ref%3Aevent-stream-356&linkBaseURL=http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fvideos%2F1331655630249%2Freplay-uci-fort-william-2014-dh&playerKey=AQ%7E%7E%2CAAAApYJ7UqE%7E%2Cxqr_zXk0I-zzNndy8NlHogrCb5QdyZRf&playerID=1398061561001#__youtubedl_smuggle=%7B%22Referer%22%3A+%22http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fstories%2F1331655643987%2Freplay-uci-dh-world-cup-2014-from-fort-william%22%7D',
+ # The md5 checksum changes on each download
+ 'info_dict': {
+ 'id': '3750436379001',
+ 'ext': 'flv',
+ 'title': 'UCI MTB World Cup 2014: Fort William, UK - Downhill Finals',
+ 'uploader': 'RBTV Old (do not use)',
+ 'description': 'UCI MTB World Cup 2014: Fort William, UK - Downhill Finals',
+ 'timestamp': 1409122195,
+ 'upload_date': '20140827',
+ 'uploader_id': '710858724001',
+ },
+ 'skip': 'Video gone',
+ },
+ {
+ # playlist with 'videoList'
+ # from http://support.brightcove.com/en/video-cloud/docs/playlist-support-single-video-players
+ 'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=3550052898001&playerKey=AQ%7E%7E%2CAAABmA9XpXk%7E%2C-Kp7jNgisre1fG5OdqpAFUTcs0lP_ZoL',
+ 'info_dict': {
+ 'title': 'Sealife',
+ 'id': '3550319591001',
+ },
+ 'playlist_mincount': 7,
+ 'skip': 'Unsupported URL',
+ },
+ {
+ # playlist with 'playlistTab' (https://github.com/ytdl-org/youtube-dl/issues/9965)
+ 'url': 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=AQ%7E%7E,AAABXlLMdok%7E,NJ4EoMlZ4rZdx9eU1rkMVd8EaYPBBUlg',
+ 'info_dict': {
+ 'id': '1522758701001',
+ 'title': 'Lesson 08',
+ },
+ 'playlist_mincount': 10,
+ 'skip': 'Unsupported URL',
+ },
+ {
+ # playerID inferred from bcpid
+ # from http://www.un.org/chinese/News/story.asp?NewsID=27724
+ 'url': 'https://link.brightcove.com/services/player/bcpid1722935254001/?bctid=5360463607001&autoStart=false&secureConnections=true&width=650&height=350',
+ 'only_matching': True, # Tested in GenericIE
+ }
+ ]
+
+ _WEBPAGE_TESTS = [{
+ # embedded brightcove video
+ # it also tests brightcove videos that need to set the 'Referer'
+ # in the http requests
+ 'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
+ 'info_dict': {
+ 'id': '2765128793001',
+ 'ext': 'mp4',
+ 'title': 'Le cours de bourse : l’analyse technique',
+ 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
+ 'uploader': 'BFM BUSINESS',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': '404 Not Found',
+ }, {
+ # embedded with itemprop embedURL and video id spelled as `idVideo`
+ 'url': 'http://bfmbusiness.bfmtv.com/mediaplayer/chroniques/olivier-delamarche/',
+ 'info_dict': {
+ 'id': '5255628253001',
+ 'ext': 'mp4',
+ 'title': 'md5:37c519b1128915607601e75a87995fc0',
+ 'description': 'md5:37f7f888b434bb8f8cc8dbd4f7a4cf26',
+ 'uploader': 'BFM BUSINESS',
+ 'uploader_id': '876450612001',
+ 'timestamp': 1482255315,
+ 'upload_date': '20161220',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'Redirects, page gone',
+ }, {
+ # https://github.com/ytdl-org/youtube-dl/issues/2253
+ 'url': 'http://bcove.me/i6nfkrc3',
+ 'md5': '0ba9446db037002366bab3b3eb30c88c',
+ 'info_dict': {
+ 'id': '3101154703001',
+ 'ext': 'mp4',
+ 'title': 'Still no power',
+ 'uploader': 'thestar.com',
+ 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
+ },
+ 'skip': 'video gone',
+ }, {
+ # https://github.com/ytdl-org/youtube-dl/issues/3541
+ 'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
+ 'info_dict': {
+ 'id': '3866516442001',
+ 'ext': 'mp4',
+ 'title': 'Leer mij vrouwen kennen: Aflevering 1',
+ 'description': 'Leer mij vrouwen kennen: Aflevering 1',
+ 'uploader': 'SBS Broadcasting',
+ },
+ 'skip': 'Restricted to Netherlands, 404 Not Found',
+ 'params': {
+ 'skip_download': True, # m3u8 download
+ },
+ }, {
+ # Brightcove video in <iframe>
+ 'url': 'http://www.un.org/chinese/News/story.asp?NewsID=27724',
+ 'md5': '36d74ef5e37c8b4a2ce92880d208b968',
+ 'info_dict': {
+ 'id': '5360463607001',
+ 'ext': 'mp4',
+ 'title': '叙利亚失明儿童在废墟上演唱《心跳》 呼吁获得正常童年生活',
+ 'description': '联合国儿童基金会中东和北非区域大使、作曲家扎德·迪拉尼(Zade Dirani)在3月15日叙利亚冲突爆发7周年纪念日之际发布了为叙利亚谱写的歌曲《心跳》(HEARTBEAT),为受到六年冲突影响的叙利亚儿童发出强烈呐喊,呼吁世界做出共同努力,使叙利亚儿童重新获得享有正常童年生活的权利。',
+ 'uploader': 'United Nations',
+ 'uploader_id': '1362235914001',
+ 'timestamp': 1489593889,
+ 'upload_date': '20170315',
+ },
+ 'skip': '404 Not Found',
+ }, {
+ # Brightcove with UUID in videoPlayer
+ 'url': 'http://www8.hp.com/cn/zh/home.html',
+ 'info_dict': {
+ 'id': '5255815316001',
+ 'ext': 'mp4',
+ 'title': 'Sprocket Video - China',
+ 'description': 'Sprocket Video - China',
+ 'uploader': 'HP-Video Gallery',
+ 'timestamp': 1482263210,
+ 'upload_date': '20161220',
+ 'uploader_id': '1107601872001',
+ },
+ 'params': {
+ 'skip_download': True, # m3u8 download
+ },
+ 'skip': 'video rotates...weekly?',
+ }, {
+ # Multiple brightcove videos
+ # https://github.com/ytdl-org/youtube-dl/issues/2283
+ 'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
+ 'info_dict': {
+ 'id': 'always-never',
+ 'title': 'Always / Never - The New Yorker',
+ },
+ 'playlist_count': 3,
+ 'params': {
+ 'extract_flat': False,
+ 'skip_download': True,
+ },
+ 'skip': 'Redirects, page gone',
+ }, {
+ # BrightcoveInPageEmbed embed
+ 'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
+ 'info_dict': {
+ 'id': '4238694884001',
+ 'ext': 'flv',
+ 'title': 'Tabletop: Dread, Last Thoughts',
+ 'description': 'Tabletop: Dread, Last Thoughts',
+ 'duration': 51690,
+ },
+ 'skip': 'Redirects, page gone',
+ }, {
+ # Brightcove embed, with no valid 'renditions' but valid 'IOSRenditions'
+ # This video can't be played in browsers if Flash disabled and UA set to iPhone, which is actually a false alarm
+ 'url': 'https://dl.dropboxusercontent.com/u/29092637/interview.html',
+ 'info_dict': {
+ 'id': '4785848093001',
+ 'ext': 'mp4',
+ 'title': 'The Cardinal Pell Interview',
+ 'description': 'Sky News Contributor Andrew Bolt interviews George Pell in Rome, following the Cardinal\'s evidence before the Royal Commission into Child Abuse. ',
+ 'uploader': 'GlobeCast Australia - GlobeStream',
+ 'uploader_id': '2733773828001',
+ 'upload_date': '20160304',
+ 'timestamp': 1457083087,
+ },
+ 'params': {
+ # m3u8 downloads
+ 'skip_download': True,
+ },
+ 'skip': '404 Not Found',
+ }, {
+ # Brightcove embed with whitespace around attribute names
+ 'url': 'http://www.stack.com/video/3167554373001/learn-to-hit-open-three-pointers-with-damian-lillard-s-baseline-drift-drill',
+ 'info_dict': {
+ 'id': '3167554373001',
+ 'ext': 'mp4',
+ 'title': "Learn to Hit Open Three-Pointers With Damian Lillard's Baseline Drift Drill",
+ 'description': 'md5:57bacb0e0f29349de4972bfda3191713',
+ 'uploader_id': '1079349493',
+ 'upload_date': '20140207',
+ 'timestamp': 1391810548,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': '410 Gone',
+ }]
+
+ @classmethod
+ def _build_brightcove_url(cls, object_str):
+ """
+ Build a Brightcove url from a xml string containing
+ <object class="BrightcoveExperience">{params}</object>
+ """
+
+ # Fix up some stupid HTML, see https://github.com/ytdl-org/youtube-dl/issues/1553
+ object_str = re.sub(r'(<param(?:\s+[a-zA-Z0-9_]+="[^"]*")*)>',
+ lambda m: m.group(1) + '/>', object_str)
+ # Fix up some stupid XML, see https://github.com/ytdl-org/youtube-dl/issues/1608
+ object_str = object_str.replace('<--', '<!--')
+ # remove namespace to simplify extraction
+ object_str = re.sub(r'(<object[^>]*)(xmlns=".*?")', r'\1', object_str)
+ object_str = fix_xml_ampersands(object_str)
+
+ try:
+ object_doc = compat_etree_fromstring(object_str.encode('utf-8'))
+ except xml.etree.ElementTree.ParseError:
+ return
+
+ fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars')
+ if fv_el is not None:
+ flashvars = dict(
+ (k, v[0])
+ for k, v in compat_parse_qs(fv_el.attrib['value']).items())
+ else:
+ flashvars = {}
+
+ data_url = object_doc.attrib.get('data', '')
+ data_url_params = parse_qs(data_url)
+
+ def find_param(name):
+ if name in flashvars:
+ return flashvars[name]
+ node = find_xpath_attr(object_doc, './param', 'name', name)
+ if node is not None:
+ return node.attrib['value']
+ return data_url_params.get(name)
+
+ params = {}
+
+ playerID = find_param('playerID') or find_param('playerId')
+ if playerID is None:
+ raise ExtractorError('Cannot find player ID')
+ params['playerID'] = playerID
+
+ playerKey = find_param('playerKey')
+ # Not all pages define this value
+ if playerKey is not None:
+ params['playerKey'] = playerKey
+ # These fields hold the id of the video
+ videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') or find_param('@videoList')
+ if videoPlayer is not None:
+ if isinstance(videoPlayer, list):
+ videoPlayer = videoPlayer[0]
+ videoPlayer = videoPlayer.strip()
+ # UUID is also possible for videoPlayer (e.g.
+ # http://www.popcornflix.com/hoodies-vs-hooligans/7f2d2b87-bbf2-4623-acfb-ea942b4f01dd
+ # or http://www8.hp.com/cn/zh/home.html)
+ if not (re.match(
+ r'^(?:\d+|[\da-fA-F]{8}-?[\da-fA-F]{4}-?[\da-fA-F]{4}-?[\da-fA-F]{4}-?[\da-fA-F]{12})$',
+ videoPlayer) or videoPlayer.startswith('ref:')):
+ return None
+ params['@videoPlayer'] = videoPlayer
+ linkBase = find_param('linkBaseURL')
+ if linkBase is not None:
+ params['linkBaseURL'] = linkBase
+ return cls._make_brightcove_url(params)
+
+ @classmethod
+ def _build_brightcove_url_from_js(cls, object_js):
+ # The layout of JS is as follows:
+ # customBC.createVideo = function (width, height, playerID, playerKey, videoPlayer, VideoRandomID) {
+ # // build Brightcove <object /> XML
+ # }
+ m = re.search(
+ r'''(?x)customBC\.createVideo\(
+ .*? # skipping width and height
+ ["\'](?P<playerID>\d+)["\']\s*,\s* # playerID
+ ["\'](?P<playerKey>AQ[^"\']{48})[^"\']*["\']\s*,\s* # playerKey begins with AQ and is 50 characters
+ # in length, however it's appended to itself
+ # in places, so truncate
+ ["\'](?P<videoID>\d+)["\'] # @videoPlayer
+ ''', object_js)
+ if m:
+ return cls._make_brightcove_url(m.groupdict())
+
+ @classmethod
+ def _make_brightcove_url(cls, params):
+ return update_url_query(
+ 'http://c.brightcove.com/services/viewer/htmlFederated', params)
+
+ @classmethod
+ def _extract_brightcove_url(cls, webpage):
+ """Try to extract the brightcove url from the webpage, returns None
+ if it can't be found
+ """
+ urls = cls._extract_brightcove_urls(webpage)
+ return urls[0] if urls else None
+
+ @classmethod
+ def _extract_brightcove_urls(cls, webpage):
+ """Return a list of all Brightcove URLs from the webpage """
+
+ url_m = re.search(
+ r'''(?x)
+ <meta\s+
+ (?:property|itemprop)=([\'"])(?:og:video|embedURL)\1[^>]+
+ content=([\'"])(?P<url>https?://(?:secure|c)\.brightcove.com/(?:(?!\2).)+)\2
+ ''', webpage)
+ if url_m:
+ url = unescapeHTML(url_m.group('url'))
+ # Some sites don't add it, we can't download with this url, for example:
+ # http://www.ktvu.com/videos/news/raw-video-caltrain-releases-video-of-man-almost/vCTZdY/
+ if 'playerKey' in url or 'videoId' in url or 'idVideo' in url:
+ return [url]
+
+ matches = re.findall(
+ r'''(?sx)<object
+ (?:
+ [^>]+?class=[\'"][^>]*?BrightcoveExperience.*?[\'"] |
+ [^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/
+ ).+?>\s*</object>''',
+ webpage)
+ if matches:
+ return list(filter(None, [cls._build_brightcove_url(m) for m in matches]))
+
+ matches = re.findall(r'(customBC\.createVideo\(.+?\);)', webpage)
+ if matches:
+ return list(filter(None, [
+ cls._build_brightcove_url_from_js(custom_bc)
+ for custom_bc in matches]))
+ return [src for _, src in re.findall(
+ r'<iframe[^>]+src=([\'"])((?:https?:)?//link\.brightcove\.com/services/player/(?!\1).+)\1', webpage)]
+
+ def _extract_from_webpage(self, url, webpage):
+ bc_urls = self._extract_brightcove_urls(webpage)
+ for bc_url in bc_urls:
+ yield self.url_result(smuggle_url(bc_url, {'Referer': url}), BrightcoveLegacyIE)
+
+ def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+
+ # Change the 'videoId' and others field to '@videoPlayer'
+ url = re.sub(r'(?<=[?&])(videoI(d|D)|idVideo|bctid)', '%40videoPlayer', url)
+ # Change bckey (used by bcove.me urls) to playerKey
+ url = re.sub(r'(?<=[?&])bckey', 'playerKey', url)
+ mobj = self._match_valid_url(url)
+ query_str = mobj.group('query')
+ query = compat_urlparse.parse_qs(query_str)
+
+ videoPlayer = query.get('@videoPlayer')
+ if videoPlayer:
+ # We set the original url as the default 'Referer' header
+ referer = query.get('linkBaseURL', [None])[0] or smuggled_data.get('Referer', url)
+ video_id = videoPlayer[0]
+ if 'playerID' not in query:
+ mobj = re.search(r'/bcpid(\d+)', url)
+ if mobj is not None:
+ query['playerID'] = [mobj.group(1)]
+ publisher_id = query.get('publisherId')
+ if publisher_id and publisher_id[0].isdigit():
+ publisher_id = publisher_id[0]
+ if not publisher_id:
+ player_key = query.get('playerKey')
+ if player_key and ',' in player_key[0]:
+ player_key = player_key[0]
+ else:
+ player_id = query.get('playerID')
+ if player_id and player_id[0].isdigit():
+ headers = {}
+ if referer:
+ headers['Referer'] = referer
+ player_page = self._download_webpage(
+ 'http://link.brightcove.com/services/player/bcpid' + player_id[0],
+ video_id, headers=headers, fatal=False)
+ if player_page:
+ player_key = self._search_regex(
+ r'<param\s+name="playerKey"\s+value="([\w~,-]+)"',
+ player_page, 'player key', fatal=False)
+ if player_key:
+ enc_pub_id = player_key.split(',')[1].replace('~', '=')
+ publisher_id = struct.unpack('>Q', base64.urlsafe_b64decode(enc_pub_id))[0]
+ if publisher_id:
+ brightcove_new_url = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id)
+ if referer:
+ brightcove_new_url = smuggle_url(brightcove_new_url, {'referrer': referer})
+ return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id)
+ # TODO: figure out if it's possible to extract playlistId from playerKey
+ # elif 'playerKey' in query:
+ # player_key = query['playerKey']
+ # return self._get_playlist_info(player_key[0])
+ raise UnsupportedError(url)
+
+
+class BrightcoveNewBaseIE(AdobePassIE):
+ def _parse_brightcove_metadata(self, json_data, video_id, headers={}):
+ title = json_data['name'].strip()
+
+ formats, subtitles = [], {}
+ sources = json_data.get('sources') or []
+ for source in sources:
+ container = source.get('container')
+ ext = mimetype2ext(source.get('type'))
+ src = source.get('src')
+ if ext == 'm3u8' or container == 'M2TS':
+ if not src:
+ continue
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ src, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
+ subtitles = self._merge_subtitles(subtitles, subs)
+ elif ext == 'mpd':
+ if not src:
+ continue
+ fmts, subs = self._extract_mpd_formats_and_subtitles(src, video_id, 'dash', fatal=False)
+ subtitles = self._merge_subtitles(subtitles, subs)
+ else:
+ streaming_src = source.get('streaming_src')
+ stream_name, app_name = source.get('stream_name'), source.get('app_name')
+ if not src and not streaming_src and (not stream_name or not app_name):
+ continue
+ tbr = float_or_none(source.get('avg_bitrate'), 1000)
+ height = int_or_none(source.get('height'))
+ width = int_or_none(source.get('width'))
+ f = {
+ 'tbr': tbr,
+ 'filesize': int_or_none(source.get('size')),
+ 'container': container,
+ 'ext': ext or container.lower(),
+ }
+ if width == 0 and height == 0:
+ f.update({
+ 'vcodec': 'none',
+ })
+ else:
+ f.update({
+ 'width': width,
+ 'height': height,
+ 'vcodec': source.get('codec'),
+ })
+
+ def build_format_id(kind):
+ format_id = kind
+ if tbr:
+ format_id += '-%dk' % int(tbr)
+ if height:
+ format_id += '-%dp' % height
+ return format_id
+
+ if src or streaming_src:
+ f.update({
+ 'url': src or streaming_src,
+ 'format_id': build_format_id('http' if src else 'http-streaming'),
+ 'source_preference': 0 if src else -1,
+ })
+ else:
+ f.update({
+ 'url': app_name,
+ 'play_path': stream_name,
+ 'format_id': build_format_id('rtmp'),
+ })
+ fmts = [f]
+
+ # https://support.brightcove.com/playback-api-video-fields-reference#key_systems_object
+ if container == 'WVM' or source.get('key_systems') or ext == 'ism':
+ for f in fmts:
+ f['has_drm'] = True
+ formats.extend(fmts)
+
+ if not formats:
+ errors = json_data.get('errors')
+ if errors:
+ error = errors[0]
+ self.raise_no_formats(
+ error.get('message') or error.get('error_subcode') or error['error_code'], expected=True)
+
+ headers.pop('Authorization', None) # or else http formats will give error 400
+ for f in formats:
+ f.setdefault('http_headers', {}).update(headers)
+
+ for text_track in json_data.get('text_tracks', []):
+ if text_track.get('kind') != 'captions':
+ continue
+ text_track_url = url_or_none(text_track.get('src'))
+ if not text_track_url:
+ continue
+ lang = (str_or_none(text_track.get('srclang'))
+ or str_or_none(text_track.get('label')) or 'en').lower()
+ subtitles.setdefault(lang, []).append({
+ 'url': text_track_url,
+ })
+
+ is_live = False
+ duration = float_or_none(json_data.get('duration'), 1000)
+ if duration is not None and duration <= 0:
+ is_live = True
+
+ common_res = [(160, 90), (320, 180), (480, 720), (640, 360), (768, 432), (1024, 576), (1280, 720), (1366, 768), (1920, 1080)]
+ thumb_base_url = dict_get(json_data, ('poster', 'thumbnail'))
+ thumbnails = [{
+ 'url': re.sub(r'\d+x\d+', f'{w}x{h}', thumb_base_url),
+ 'width': w,
+ 'height': h,
+ } for w, h in common_res] if thumb_base_url else None
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': clean_html(json_data.get('description')),
+ 'thumbnails': thumbnails,
+ 'duration': duration,
+ 'timestamp': parse_iso8601(json_data.get('published_at')),
+ 'uploader_id': json_data.get('account_id'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'tags': json_data.get('tags', []),
+ 'is_live': is_live,
+ }
+
+
+class BrightcoveNewIE(BrightcoveNewBaseIE):
+ IE_NAME = 'brightcove:new'
+ _VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*(?P<content_type>video|playlist)Id=(?P<video_id>\d+|ref:[^&]+)'
+ _TESTS = [{
+ 'url': 'http://players.brightcove.net/929656772001/e41d32dc-ec74-459e-a845-6c69f7b724ea_default/index.html?videoId=4463358922001',
+ 'md5': 'c8100925723840d4b0d243f7025703be',
+ 'info_dict': {
+ 'id': '4463358922001',
+ 'ext': 'mp4',
+ 'title': 'Meet the man behind Popcorn Time',
+ 'description': 'md5:eac376a4fe366edc70279bfb681aea16',
+ 'duration': 165.768,
+ 'timestamp': 1441391203,
+ 'upload_date': '20150904',
+ 'uploader_id': '929656772001',
+ 'formats': 'mincount:20',
+ },
+ 'skip': '404 Not Found',
+ }, {
+ # with rtmp streams
+ 'url': 'http://players.brightcove.net/4036320279001/5d112ed9-283f-485f-a7f9-33f42e8bc042_default/index.html?videoId=4279049078001',
+ 'info_dict': {
+ 'id': '4279049078001',
+ 'ext': 'mp4',
+ 'title': 'Titansgrave: Chapter 0',
+ 'description': 'Titansgrave: Chapter 0',
+ 'duration': 1242.058,
+ 'timestamp': 1433556729,
+ 'upload_date': '20150606',
+ 'uploader_id': '4036320279001',
+ 'formats': 'mincount:39',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ }, {
+ # playlist stream
+ 'url': 'https://players.brightcove.net/1752604059001/S13cJdUBz_default/index.html?playlistId=5718313430001',
+ 'info_dict': {
+ 'id': '5718313430001',
+ 'title': 'No Audio Playlist',
+ },
+ 'playlist_count': 7,
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'http://players.brightcove.net/5690807595001/HyZNerRl7_default/index.html?playlistId=5743160747001',
+ 'only_matching': True,
+ }, {
+ # ref: prefixed video id
+ 'url': 'http://players.brightcove.net/3910869709001/21519b5c-4b3b-4363-accb-bdc8f358f823_default/index.html?videoId=ref:7069442',
+ 'only_matching': True,
+ }, {
+ # non numeric ref: prefixed video id
+ 'url': 'http://players.brightcove.net/710858724001/default_default/index.html?videoId=ref:event-stream-356',
+ 'only_matching': True,
+ }, {
+ # unavailable video without message but with error_code
+ 'url': 'http://players.brightcove.net/1305187701/c832abfb-641b-44eb-9da0-2fe76786505f_default/index.html?videoId=4377407326001',
+ 'only_matching': True,
+ }]
+
+ _WEBPAGE_TESTS = [{
+ # brightcove player url embed
+ 'url': 'https://nbc-2.com/weather/forecast/2022/11/16/forecast-warmest-day-of-the-week/',
+ 'md5': '2934d5372b354d27083ccf8575dbfee2',
+ 'info_dict': {
+ 'id': '6315650313112',
+ 'title': 'First Alert Forecast: November 15, 2022',
+ 'ext': 'mp4',
+ 'tags': ['nbc2', 'forecast'],
+ 'uploader_id': '6146886170001',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1668574571,
+ 'duration': 233.375,
+ 'upload_date': '20221116',
+ },
+ }, {
+ # embedded with video tag only
+ 'url': 'https://www.gooddishtv.com/tiktok-rapping-chef-mr-pyrex',
+ 'info_dict': {
+ 'id': 'tiktok-rapping-chef-mr-pyrex',
+ 'title': 'TikTok\'s Rapping Chef Makes Jambalaya for the Hosts',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'age_limit': 0,
+ 'description': 'Just in time for Mardi Gras',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '6299189544001',
+ 'ext': 'mp4',
+ 'title': 'TGD_01-032_5',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'tags': [],
+ 'timestamp': 1646078943,
+ 'uploader_id': '1569565978001',
+ 'upload_date': '20220228',
+ 'duration': 217.195,
+ },
+ }, {
+ 'info_dict': {
+ 'id': '6305565995112',
+ 'ext': 'mp4',
+ 'title': 'TGD 01-087 (Airs 05.25.22)_Segment 5',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'tags': [],
+ 'timestamp': 1651604591,
+ 'uploader_id': '1569565978001',
+ 'upload_date': '20220503',
+ 'duration': 310.421,
+ },
+ }],
+ }, {
+ # Brightcove:new type [2].
+ 'url': 'http://www.delawaresportszone.com/video-st-thomas-more-earns-first-trip-to-basketball-semis',
+ 'md5': '2b35148fcf48da41c9fb4591650784f3',
+ 'info_dict': {
+ 'id': '5348741021001',
+ 'ext': 'mp4',
+ 'upload_date': '20170306',
+ 'uploader_id': '4191638492001',
+ 'timestamp': 1488769918,
+ 'title': 'VIDEO: St. Thomas More earns first trip to basketball semis',
+ },
+ 'skip': '404 Not Found',
+ }, {
+ # Alternative brightcove <video> attributes
+ 'url': 'http://www.programme-tv.net/videos/extraits/81095-guillaume-canet-evoque-les-rumeurs-d-infidelite-de-marion-cotillard-avec-brad-pitt-dans-vivement-dimanche/',
+ 'info_dict': {
+ 'id': '81095-guillaume-canet-evoque-les-rumeurs-d-infidelite-de-marion-cotillard-avec-brad-pitt-dans-vivement-dimanche',
+ 'title': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche, Extraits : toutes les vidéos avec Télé-Loisirs",
+ },
+ 'playlist': [{
+ 'md5': '732d22ba3d33f2f3fc253c39f8f36523',
+ 'info_dict': {
+ 'id': '5311302538001',
+ 'ext': 'mp4',
+ 'title': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche",
+ 'description': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche (France 2, 5 février 2017)",
+ 'timestamp': 1486321708,
+ 'upload_date': '20170205',
+ 'uploader_id': '800000640001',
+ },
+ 'only_matching': True,
+ }],
+ 'skip': '404 Not Found',
+ }, {
+ # Brightcove URL in single quotes
+ 'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
+ 'md5': '4ae374f1f8b91c889c4b9203c8c752af',
+ 'info_dict': {
+ 'id': '4255764656001',
+ 'ext': 'mp4',
+ 'title': 'SN Presents: Russell Martin, World Citizen',
+ 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
+ 'uploader': 'Rogers Sportsnet',
+ 'uploader_id': '1704050871',
+ 'upload_date': '20150525',
+ 'timestamp': 1432570283,
+ },
+ 'skip': 'Page no longer has URL, now has javascript',
+ }]
+
+ @staticmethod
+ def _extract_url(ie, webpage):
+ urls = BrightcoveNewIE._extract_brightcove_urls(ie, webpage)
+ return urls[0] if urls else None
+
+ @staticmethod
+ def _extract_brightcove_urls(ie, webpage):
+ # Reference:
+ # 1. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideoiniframe
+ # 2. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#tag
+ # 3. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideousingjavascript
+ # 4. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/in-page-embed-player-implementation.html
+ # 5. https://support.brightcove.com/en/video-cloud/docs/dynamically-assigning-videos-player
+
+ entries = []
+
+ # Look for iframe embeds [1]
+ for _, url in re.findall(
+ r'<iframe[^>]+src=(["\'])((?:https?:)?//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage):
+ entries.append(url if url.startswith('http') else 'http:' + url)
+
+ # Look for <video> tags [2] and embed_in_page embeds [3]
+ # [2] looks like:
+ for video, script_tag, account_id, player_id, embed in re.findall(
+ r'''(?isx)
+ (<video(?:-js)?\s+[^>]*\bdata-video-id\s*=\s*['"]?[^>]+>)
+ (?:.*?
+ (<script[^>]+
+ src=["\'](?:https?:)?//players\.brightcove\.net/
+ (\d+)/([^/]+)_([^/]+)/index(?:\.min)?\.js
+ )
+ )?
+ ''', webpage):
+ attrs = extract_attributes(video)
+
+ # According to examples from [4] it's unclear whether video id
+ # may be optional and what to do when it is
+ video_id = attrs.get('data-video-id')
+ if not video_id:
+ continue
+
+ account_id = account_id or attrs.get('data-account')
+ if not account_id:
+ continue
+
+ player_id = player_id or attrs.get('data-player') or 'default'
+ embed = embed or attrs.get('data-embed') or 'default'
+
+ bc_url = 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' % (
+ account_id, player_id, embed, video_id)
+
+ # Some brightcove videos may be embedded with video tag only and
+ # without script tag or any mentioning of brightcove at all. Such
+ # embeds are considered ambiguous since they are matched based only
+ # on data-video-id and data-account attributes and in the wild may
+ # not be brightcove embeds at all. Let's check reconstructed
+ # brightcove URLs in case of such embeds and only process valid
+ # ones. By this we ensure there is indeed a brightcove embed.
+ if not script_tag and not ie._is_valid_url(
+ bc_url, video_id, 'possible brightcove video'):
+ continue
+
+ entries.append(bc_url)
+
+ return entries
+
+ def _extract_from_webpage(self, url, webpage):
+ bc_urls = self._extract_brightcove_urls(self, webpage)
+ for bc_url in bc_urls:
+ yield self.url_result(smuggle_url(bc_url, {'referrer': url}), BrightcoveNewIE)
+
+ def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+ self._initialize_geo_bypass({
+ 'countries': smuggled_data.get('geo_countries'),
+ 'ip_blocks': smuggled_data.get('geo_ip_blocks'),
+ })
+
+ account_id, player_id, embed, content_type, video_id = self._match_valid_url(url).groups()
+
+ policy_key_id = '%s_%s' % (account_id, player_id)
+ policy_key = self.cache.load('brightcove', policy_key_id)
+ policy_key_extracted = False
+ store_pk = lambda x: self.cache.store('brightcove', policy_key_id, x)
+
+ def extract_policy_key():
+ base_url = 'http://players.brightcove.net/%s/%s_%s/' % (account_id, player_id, embed)
+ config = self._download_json(
+ base_url + 'config.json', video_id, fatal=False) or {}
+ policy_key = try_get(
+ config, lambda x: x['video_cloud']['policy_key'])
+ if not policy_key:
+ webpage = self._download_webpage(
+ base_url + 'index.min.js', video_id)
+
+ catalog = self._search_regex(
+ r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
+ if catalog:
+ catalog = self._parse_json(
+ js_to_json(catalog), video_id, fatal=False)
+ if catalog:
+ policy_key = catalog.get('policyKey')
+
+ if not policy_key:
+ policy_key = self._search_regex(
+ r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
+ webpage, 'policy key', group='pk')
+
+ store_pk(policy_key)
+ return policy_key
+
+ token = smuggled_data.get('token')
+ api_url = f'https://{"edge-auth" if token else "edge"}.api.brightcove.com/playback/v1/accounts/{account_id}/{content_type}s/{video_id}'
+ headers = {'Authorization': f'Bearer {token}'} if token else {}
+ referrer = smuggled_data.get('referrer') # XXX: notice the spelling/case of the key
+ if referrer:
+ headers.update({
+ 'Referer': referrer,
+ 'Origin': re.search(r'https?://[^/]+', referrer).group(0),
+ })
+
+ for _ in range(2):
+ if not policy_key:
+ policy_key = extract_policy_key()
+ policy_key_extracted = True
+ headers['Accept'] = 'application/json;pk=%s' % policy_key
+ try:
+ json_data = self._download_json(api_url, video_id, headers=headers)
+ break
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status in (401, 403):
+ json_data = self._parse_json(e.cause.response.read().decode(), video_id)[0]
+ message = json_data.get('message') or json_data['error_code']
+ if json_data.get('error_subcode') == 'CLIENT_GEO':
+ self.raise_geo_restricted(msg=message)
+ elif json_data.get('error_code') == 'INVALID_POLICY_KEY' and not policy_key_extracted:
+ policy_key = None
+ store_pk(None)
+ continue
+ raise ExtractorError(message, expected=True)
+ raise
+
+ errors = json_data.get('errors')
+ if errors and errors[0].get('error_subcode') == 'TVE_AUTH':
+ custom_fields = json_data['custom_fields']
+ tve_token = self._extract_mvpd_auth(
+ smuggled_data['source_url'], video_id,
+ custom_fields['bcadobepassrequestorid'],
+ custom_fields['bcadobepassresourceid'])
+ json_data = self._download_json(
+ api_url, video_id, headers={
+ 'Accept': 'application/json;pk=%s' % policy_key
+ }, query={
+ 'tveToken': tve_token,
+ })
+
+ if content_type == 'playlist':
+ return self.playlist_result(
+ [self._parse_brightcove_metadata(vid, vid.get('id'), headers)
+ for vid in json_data.get('videos', []) if vid.get('id')],
+ json_data.get('id'), json_data.get('name'),
+ json_data.get('description'))
+
+ return self._parse_brightcove_metadata(
+ json_data, video_id, headers=headers)
diff --git a/yt_dlp/extractor/brilliantpala.py b/yt_dlp/extractor/brilliantpala.py
new file mode 100644
index 0000000..0bf8622
--- /dev/null
+++ b/yt_dlp/extractor/brilliantpala.py
@@ -0,0 +1,127 @@
+import hashlib
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ traverse_obj,
+ urlencode_postdata,
+)
+
+
+class BrilliantpalaBaseIE(InfoExtractor):
+ _NETRC_MACHINE = 'brilliantpala'
+ _DOMAIN = '{subdomain}.brilliantpala.org'
+
+ def _initialize_pre_login(self):
+ self._HOMEPAGE = f'https://{self._DOMAIN}'
+ self._LOGIN_API = f'{self._HOMEPAGE}/login/'
+ self._LOGOUT_DEVICES_API = f'{self._HOMEPAGE}/logout_devices/?next=/'
+ self._CONTENT_API = f'{self._HOMEPAGE}/api/v2.4/contents/{{content_id}}/'
+ self._HLS_AES_URI = f'{self._HOMEPAGE}/api/v2.5/video_contents/{{content_id}}/key/'
+
+ def _get_logged_in_username(self, url, video_id):
+ webpage, urlh = self._download_webpage_handle(url, video_id)
+ if urlh.url.startswith(self._LOGIN_API):
+ self.raise_login_required()
+ return self._html_search_regex(
+ r'"username"\s*:\s*"(?P<username>[^"]+)"', webpage, 'logged-in username')
+
+ def _perform_login(self, username, password):
+ login_form = self._hidden_inputs(self._download_webpage(
+ self._LOGIN_API, None, 'Downloading login page'))
+ login_form.update({
+ 'username': username,
+ 'password': password,
+ })
+ self._set_cookie(self._DOMAIN, 'csrftoken', login_form['csrfmiddlewaretoken'])
+
+ logged_page = self._download_webpage(
+ self._LOGIN_API, None, note='Logging in', headers={'Referer': self._LOGIN_API},
+ data=urlencode_postdata(login_form))
+
+ if self._html_search_regex(
+ r'(Your username / email and password)', logged_page, 'auth fail', default=None):
+ raise ExtractorError('wrong username or password', expected=True)
+
+ # the maximum number of logins is one
+ if self._html_search_regex(
+ r'(Logout Other Devices)', logged_page, 'logout devices button', default=None):
+ logout_device_form = self._hidden_inputs(logged_page)
+ self._download_webpage(
+ self._LOGOUT_DEVICES_API, None, headers={'Referer': self._LOGIN_API},
+ note='Logging out other devices', data=urlencode_postdata(logout_device_form))
+
+ def _real_extract(self, url):
+ course_id, content_id = self._match_valid_url(url).group('course_id', 'content_id')
+ video_id = f'{course_id}-{content_id}'
+
+ username = self._get_logged_in_username(url, video_id)
+
+ content_json = self._download_json(
+ self._CONTENT_API.format(content_id=content_id), video_id,
+ note='Fetching content info', errnote='Unable to fetch content info')
+
+ entries = []
+ for stream in traverse_obj(content_json, ('video', 'streams', lambda _, v: v['id'] and v['url'])):
+ formats = self._extract_m3u8_formats(stream['url'], video_id, fatal=False)
+ if not formats:
+ continue
+ entries.append({
+ 'id': str(stream['id']),
+ 'title': content_json.get('title'),
+ 'formats': formats,
+ 'hls_aes': {'uri': self._HLS_AES_URI.format(content_id=content_id)},
+ 'http_headers': {'X-Key': hashlib.sha256(username.encode('ascii')).hexdigest()},
+ 'thumbnail': content_json.get('cover_image'),
+ })
+
+ return self.playlist_result(
+ entries, playlist_id=video_id, playlist_title=content_json.get('title'))
+
+
+class BrilliantpalaElearnIE(BrilliantpalaBaseIE):
+ IE_NAME = 'Brilliantpala:Elearn'
+ IE_DESC = 'VoD on elearn.brilliantpala.org'
+ _VALID_URL = r'https?://elearn\.brilliantpala\.org/courses/(?P<course_id>\d+)/contents/(?P<content_id>\d+)/?'
+ _TESTS = [{
+ 'url': 'https://elearn.brilliantpala.org/courses/42/contents/12345/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://elearn.brilliantpala.org/courses/98/contents/36683/',
+ 'info_dict': {
+ 'id': '23577',
+ 'ext': 'mp4',
+ 'title': 'Physical World, Units and Measurements - 1',
+ 'thumbnail': 'https://d1j3vi2u94ebt0.cloudfront.net/institute/brilliantpalalms/chapter_contents/26237/e657f81b90874be19795c7ea081f8d5c.png',
+ 'live_status': 'not_live',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ _DOMAIN = BrilliantpalaBaseIE._DOMAIN.format(subdomain='elearn')
+
+
+class BrilliantpalaClassesIE(BrilliantpalaBaseIE):
+ IE_NAME = 'Brilliantpala:Classes'
+ IE_DESC = 'VoD on classes.brilliantpala.org'
+ _VALID_URL = r'https?://classes\.brilliantpala\.org/courses/(?P<course_id>\d+)/contents/(?P<content_id>\d+)/?'
+ _TESTS = [{
+ 'url': 'https://classes.brilliantpala.org/courses/42/contents/12345/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://classes.brilliantpala.org/courses/416/contents/25445/',
+ 'info_dict': {
+ 'id': '9128',
+ 'ext': 'mp4',
+ 'title': 'Motion in a Straight Line - Class 1',
+ 'thumbnail': 'https://d3e4y8hquds3ek.cloudfront.net/institute/brilliantpalaelearn/chapter_contents/ff5ba838d0ec43419f67387fe1a01fa8.png',
+ 'live_status': 'not_live',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ _DOMAIN = BrilliantpalaBaseIE._DOMAIN.format(subdomain='classes')
diff --git a/yt_dlp/extractor/bundesliga.py b/yt_dlp/extractor/bundesliga.py
new file mode 100644
index 0000000..e76dd58
--- /dev/null
+++ b/yt_dlp/extractor/bundesliga.py
@@ -0,0 +1,34 @@
+from .common import InfoExtractor
+from .jwplatform import JWPlatformIE
+
+
+class BundesligaIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?bundesliga\.com/[a-z]{2}/bundesliga/videos(?:/[^?]+)?\?vid=(?P<id>[a-zA-Z0-9]{8})'
+ _TESTS = [
+ {
+ 'url': 'https://www.bundesliga.com/en/bundesliga/videos?vid=bhhHkKyN',
+ 'md5': '8fc3b25cd12440e3a8cdc51f1493849c',
+ 'info_dict': {
+ 'id': 'bhhHkKyN',
+ 'ext': 'mp4',
+ 'title': 'Watch: Alphonso Davies and Jeremie Frimpong head-to-head',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/bhhHkKyN/poster.jpg?width=720',
+ 'upload_date': '20220928',
+ 'duration': 146,
+ 'timestamp': 1664366511,
+ 'description': 'md5:803d4411bd134140c774021dd4b7598b'
+ }
+ },
+ {
+ 'url': 'https://www.bundesliga.com/en/bundesliga/videos/latest-features/T8IKc8TX?vid=ROHjs06G',
+ 'only_matching': True
+ },
+ {
+ 'url': 'https://www.bundesliga.com/en/bundesliga/videos/goals?vid=mOG56vWA',
+ 'only_matching': True
+ }
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self.url_result(f'jwplatform:{video_id}', JWPlatformIE, video_id)
diff --git a/yt_dlp/extractor/bundestag.py b/yt_dlp/extractor/bundestag.py
new file mode 100644
index 0000000..9fd7c7d
--- /dev/null
+++ b/yt_dlp/extractor/bundestag.py
@@ -0,0 +1,123 @@
+import re
+from functools import partial
+
+from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ ExtractorError,
+ bug_reports_message,
+ clean_html,
+ format_field,
+ get_element_text_and_html_by_tag,
+ int_or_none,
+ url_or_none,
+)
+from ..utils.traversal import traverse_obj
+
+
+class BundestagIE(InfoExtractor):
+ _VALID_URL = [
+ r'https?://dbtg\.tv/[cf]vid/(?P<id>\d+)',
+ r'https?://www\.bundestag\.de/mediathek/?\?(?:[^#]+&)?videoid=(?P<id>\d+)',
+ ]
+ _TESTS = [{
+ 'url': 'https://dbtg.tv/cvid/7605304',
+ 'info_dict': {
+ 'id': '7605304',
+ 'ext': 'mp4',
+ 'title': '145. Sitzung vom 15.12.2023, TOP 24 Barrierefreiheit',
+ 'description': 'md5:321a9dc6bdad201264c0045efc371561',
+ },
+ }, {
+ 'url': 'https://www.bundestag.de/mediathek?videoid=7602120&url=L21lZGlhdGhla292ZXJsYXk=&mod=mediathek',
+ 'info_dict': {
+ 'id': '7602120',
+ 'ext': 'mp4',
+ 'title': '130. Sitzung vom 18.10.2023, TOP 1 Befragung der Bundesregierung',
+ 'description': 'Befragung der Bundesregierung',
+ },
+ }, {
+ 'url': 'https://www.bundestag.de/mediathek?videoid=7604941#url=L21lZGlhdGhla292ZXJsYXk/dmlkZW9pZD03NjA0OTQx&mod=mediathek',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://dbtg.tv/fvid/3594346',
+ 'only_matching': True,
+ }]
+
+ _OVERLAY_URL = 'https://www.bundestag.de/mediathekoverlay'
+ _INSTANCE_FORMAT = 'https://cldf-wzw-od.r53.cdn.tv1.eu/13014bundestagod/_definst_/13014bundestag/ondemand/3777parlamentsfernsehen/archiv/app144277506/145293313/{0}/{0}_playlist.smil/playlist.m3u8'
+
+ _SHARE_URL = 'https://webtv.bundestag.de/player/macros/_x_s-144277506/shareData.json?contentId='
+ _SHARE_AUDIO_REGEX = r'/\d+_(?P<codec>\w+)_(?P<bitrate>\d+)kb_(?P<channels>\w+)_\w+_\d+\.(?P<ext>\w+)'
+ _SHARE_VIDEO_REGEX = r'/\d+_(?P<codec>\w+)_(?P<width>\w+)_(?P<height>\w+)_(?P<bitrate>\d+)kb_\w+_\w+_\d+\.(?P<ext>\w+)'
+
+ def _bt_extract_share_formats(self, video_id):
+ share_data = self._download_json(
+ f'{self._SHARE_URL}{video_id}', video_id, note='Downloading share format JSON')
+ if traverse_obj(share_data, ('status', 'code', {int})) != 1:
+ self.report_warning(format_field(
+ share_data, [('status', 'message', {str})],
+ 'Share API response: %s', default='Unknown Share API Error')
+ + bug_reports_message())
+ return
+
+ for name, url in share_data.items():
+ if not isinstance(name, str) or not url_or_none(url):
+ continue
+
+ elif name.startswith('audio'):
+ match = re.search(self._SHARE_AUDIO_REGEX, url)
+ yield {
+ 'format_id': name,
+ 'url': url,
+ 'vcodec': 'none',
+ **traverse_obj(match, {
+ 'acodec': 'codec',
+ 'audio_channels': ('channels', {{'mono': 1, 'stereo': 2}.get}),
+ 'abr': ('bitrate', {int_or_none}),
+ 'ext': 'ext',
+ }),
+ }
+
+ elif name.startswith('download'):
+ match = re.search(self._SHARE_VIDEO_REGEX, url)
+ yield {
+ 'format_id': name,
+ 'url': url,
+ **traverse_obj(match, {
+ 'vcodec': 'codec',
+ 'tbr': ('bitrate', {int_or_none}),
+ 'width': ('width', {int_or_none}),
+ 'height': ('height', {int_or_none}),
+ 'ext': 'ext',
+ }),
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ formats = []
+ result = {'id': video_id, 'formats': formats}
+
+ try:
+ formats.extend(self._extract_m3u8_formats(
+ self._INSTANCE_FORMAT.format(video_id), video_id, m3u8_id='instance'))
+ except ExtractorError as error:
+ if isinstance(error.cause, HTTPError) and error.cause.status == 404:
+ raise ExtractorError('Could not find video id', expected=True)
+ self.report_warning(f'Error extracting hls formats: {error}', video_id)
+ formats.extend(self._bt_extract_share_formats(video_id))
+ if not formats:
+ self.raise_no_formats('Could not find suitable formats', video_id=video_id)
+
+ result.update(traverse_obj(self._download_webpage(
+ self._OVERLAY_URL, video_id,
+ query={'videoid': video_id, 'view': 'main'},
+ note='Downloading metadata overlay', fatal=False,
+ ), {
+ 'title': (
+ {partial(get_element_text_and_html_by_tag, 'h3')}, 0,
+ {partial(re.sub, r'<span[^>]*>[^<]+</span>', '')}, {clean_html}),
+ 'description': ({partial(get_element_text_and_html_by_tag, 'p')}, 0, {clean_html}),
+ }))
+
+ return result
diff --git a/yt_dlp/extractor/businessinsider.py b/yt_dlp/extractor/businessinsider.py
new file mode 100644
index 0000000..4b3f5e6
--- /dev/null
+++ b/yt_dlp/extractor/businessinsider.py
@@ -0,0 +1,45 @@
+from .common import InfoExtractor
+from .jwplatform import JWPlatformIE
+
+
+class BusinessInsiderIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[^/]+\.)?businessinsider\.(?:com|nl)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'http://uk.businessinsider.com/how-much-radiation-youre-exposed-to-in-everyday-life-2016-6',
+ 'md5': 'ffed3e1e12a6f950aa2f7d83851b497a',
+ 'info_dict': {
+ 'id': 'cjGDb0X9',
+ 'ext': 'mp4',
+ 'title': "Bananas give you more radiation exposure than living next to a nuclear power plant",
+ 'description': 'md5:0175a3baf200dd8fa658f94cade841b3',
+ 'upload_date': '20160611',
+ 'timestamp': 1465675620,
+ },
+ }, {
+ 'url': 'https://www.businessinsider.nl/5-scientifically-proven-things-make-you-less-attractive-2017-7/',
+ 'md5': '43f438dbc6da0b89f5ac42f68529d84a',
+ 'info_dict': {
+ 'id': '5zJwd4FK',
+ 'ext': 'mp4',
+ 'title': 'Deze dingen zorgen ervoor dat je minder snel een date scoort',
+ 'description': 'md5:2af8975825d38a4fed24717bbe51db49',
+ 'upload_date': '20170705',
+ 'timestamp': 1499270528,
+ },
+ }, {
+ 'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ jwplatform_id = self._search_regex(
+ (r'data-media-id=["\']([a-zA-Z0-9]{8})',
+ r'id=["\']jwplayer_([a-zA-Z0-9]{8})',
+ r'id["\']?\s*:\s*["\']?([a-zA-Z0-9]{8})',
+ r'(?:jwplatform\.com/players/|jwplayer_)([a-zA-Z0-9]{8})'),
+ webpage, 'jwplatform id')
+ return self.url_result(
+ 'jwplatform:%s' % jwplatform_id, ie=JWPlatformIE.ie_key(),
+ video_id=video_id)
diff --git a/yt_dlp/extractor/buzzfeed.py b/yt_dlp/extractor/buzzfeed.py
new file mode 100644
index 0000000..b30a3b7
--- /dev/null
+++ b/yt_dlp/extractor/buzzfeed.py
@@ -0,0 +1,95 @@
+import json
+import re
+
+from .common import InfoExtractor
+from .facebook import FacebookIE
+
+
+class BuzzFeedIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?buzzfeed\.com/[^?#]*?/(?P<id>[^?#]+)'
+ _TESTS = [{
+ 'url': 'http://www.buzzfeed.com/abagg/this-angry-ram-destroys-a-punching-bag-like-a-boss?utm_term=4ldqpia',
+ 'info_dict': {
+ 'id': 'this-angry-ram-destroys-a-punching-bag-like-a-boss',
+ 'title': 'This Angry Ram Destroys A Punching Bag Like A Boss',
+ 'description': 'Rambro!',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': 'aVCR29aE_OQ',
+ 'ext': 'mp4',
+ 'title': 'Angry Ram destroys a punching bag..',
+ 'description': 'md5:c59533190ef23fd4458a5e8c8c872345',
+ 'upload_date': '20141024',
+ 'uploader_id': 'Buddhanz1',
+ 'uploader': 'Angry Ram',
+ }
+ }]
+ }, {
+ 'url': 'http://www.buzzfeed.com/sheridanwatson/look-at-this-cute-dog-omg?utm_term=4ldqpia',
+ 'params': {
+ 'skip_download': True, # Got enough YouTube download tests
+ },
+ 'info_dict': {
+ 'id': 'look-at-this-cute-dog-omg',
+ 'description': 're:Munchkin the Teddy Bear is back ?!',
+ 'title': 'You Need To Stop What You\'re Doing And Watching This Dog Walk On A Treadmill',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': 'mVmBL8B-In0',
+ 'ext': 'mp4',
+ 'title': 're:Munchkin the Teddy Bear gets her exercise',
+ 'description': 'md5:28faab95cda6e361bcff06ec12fc21d8',
+ 'upload_date': '20141124',
+ 'uploader_id': 'CindysMunchkin',
+ 'uploader': 're:^Munchkin the',
+ },
+ }]
+ }, {
+ 'url': 'http://www.buzzfeed.com/craigsilverman/the-most-adorable-crash-landing-ever#.eq7pX0BAmK',
+ 'info_dict': {
+ 'id': 'the-most-adorable-crash-landing-ever',
+ 'title': 'Watch This Baby Goose Make The Most Adorable Crash Landing',
+ 'description': 'This gosling knows how to stick a landing.',
+ },
+ 'playlist': [{
+ 'md5': '763ca415512f91ca62e4621086900a23',
+ 'info_dict': {
+ 'id': '971793786185728',
+ 'ext': 'mp4',
+ 'title': 'We set up crash pads so that the goslings on our roof would have a safe landi...',
+ 'uploader': 'Calgary Outdoor Centre-University of Calgary',
+ },
+ }],
+ 'add_ie': ['Facebook'],
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ webpage = self._download_webpage(url, playlist_id)
+
+ all_buckets = re.findall(
+ r'(?s)<div class="video-embed[^"]*"..*?rel:bf_bucket_data=\'([^\']+)\'',
+ webpage)
+
+ entries = []
+ for bd_json in all_buckets:
+ bd = json.loads(bd_json)
+ video = bd.get('video') or bd.get('progload_video')
+ if not video:
+ continue
+ entries.append(self.url_result(video['url']))
+
+ facebook_urls = FacebookIE._extract_embed_urls(url, webpage)
+ entries.extend([
+ self.url_result(facebook_url)
+ for facebook_url in facebook_urls])
+
+ return {
+ '_type': 'playlist',
+ 'id': playlist_id,
+ 'title': self._og_search_title(webpage),
+ 'description': self._og_search_description(webpage),
+ 'entries': entries,
+ }
diff --git a/yt_dlp/extractor/byutv.py b/yt_dlp/extractor/byutv.py
new file mode 100644
index 0000000..ad35427
--- /dev/null
+++ b/yt_dlp/extractor/byutv.py
@@ -0,0 +1,104 @@
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ merge_dicts,
+ parse_duration,
+ url_or_none,
+)
+
+
+class BYUtvIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:www\.)?byutv\.org/(?:watch|player)/(?!event/)(?P<id>[0-9a-f-]+)(?:/(?P<display_id>[^/?#&]+))?'
+ _TESTS = [{
+ 'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5',
+ 'info_dict': {
+ 'id': 'ZvanRocTpW-G5_yZFeltTAMv6jxOU9KH',
+ 'display_id': 'studio-c-season-5-episode-5',
+ 'ext': 'mp4',
+ 'title': 'Season 5 Episode 5',
+ 'description': 'md5:1d31dc18ef4f075b28f6a65937d22c65',
+ 'thumbnail': r're:^https?://.*',
+ 'duration': 1486.486,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # dvr
+ 'url': 'https://www.byutv.org/player/8f1dab9b-b243-47c8-b525-3e2d021a3451/byu-softball-pacific-vs-byu-41219---game-2',
+ 'info_dict': {
+ 'id': '8f1dab9b-b243-47c8-b525-3e2d021a3451',
+ 'display_id': 'byu-softball-pacific-vs-byu-41219---game-2',
+ 'ext': 'mp4',
+ 'title': 'Pacific vs. BYU (4/12/19)',
+ 'description': 'md5:1ac7b57cb9a78015910a4834790ce1f3',
+ 'duration': 11645,
+ },
+ 'params': {
+ 'skip_download': True
+ },
+ }, {
+ 'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.byutv.org/player/27741493-dc83-40b0-8420-e7ae38a2ae98/byu-football-toledo-vs-byu-93016?listid=4fe0fee5-0d3c-4a29-b725-e4948627f472&listindex=0&q=toledo',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id') or video_id
+
+ video = self._download_json(
+ 'https://api.byutv.org/api3/catalog/getvideosforcontent',
+ display_id, query={
+ 'contentid': video_id,
+ 'channel': 'byutv',
+ 'x-byutv-context': 'web$US',
+ }, headers={
+ 'x-byutv-context': 'web$US',
+ 'x-byutv-platformkey': 'xsaaw9c7y5',
+ })
+
+ info = {}
+ formats = []
+ subtitles = {}
+ for format_id, ep in video.items():
+ if not isinstance(ep, dict):
+ continue
+ video_url = url_or_none(ep.get('videoUrl'))
+ if not video_url:
+ continue
+ ext = determine_ext(video_url)
+ if ext == 'm3u8':
+ m3u8_fmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
+ video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False)
+ formats.extend(m3u8_fmts)
+ subtitles = self._merge_subtitles(subtitles, m3u8_subs)
+ elif ext == 'mpd':
+ mpd_fmts, mpd_subs = self._extract_mpd_formats_and_subtitles(
+ video_url, video_id, mpd_id='dash', fatal=False)
+ formats.extend(mpd_fmts)
+ subtitles = self._merge_subtitles(subtitles, mpd_subs)
+ else:
+ formats.append({
+ 'url': video_url,
+ 'format_id': format_id,
+ })
+ merge_dicts(info, {
+ 'title': ep.get('title'),
+ 'description': ep.get('description'),
+ 'thumbnail': ep.get('imageThumbnail'),
+ 'duration': parse_duration(ep.get('length')),
+ })
+
+ return merge_dicts(info, {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': display_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ })
diff --git a/yt_dlp/extractor/c56.py b/yt_dlp/extractor/c56.py
new file mode 100644
index 0000000..e4b1c9a
--- /dev/null
+++ b/yt_dlp/extractor/c56.py
@@ -0,0 +1,59 @@
+from .common import InfoExtractor
+from ..utils import js_to_json
+
+
+class C56IE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:www|player)\.)?56\.com/(?:.+?/)?(?:v_|(?:play_album.+-))(?P<textid>.+?)\.(?:html|swf)'
+ IE_NAME = '56.com'
+ _TESTS = [{
+ 'url': 'http://www.56.com/u39/v_OTM0NDA3MTY.html',
+ 'md5': 'e59995ac63d0457783ea05f93f12a866',
+ 'info_dict': {
+ 'id': '93440716',
+ 'ext': 'flv',
+ 'title': '网事知多少 第32期:车怒',
+ 'duration': 283.813,
+ },
+ }, {
+ 'url': 'http://www.56.com/u47/v_MTM5NjQ5ODc2.html',
+ 'md5': '',
+ 'info_dict': {
+ 'id': '82247482',
+ 'title': '爱的诅咒之杜鹃花开',
+ },
+ 'playlist_count': 7,
+ 'add_ie': ['Sohu'],
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ text_id = mobj.group('textid')
+
+ webpage = self._download_webpage(url, text_id)
+ sohu_video_info_str = self._search_regex(
+ r'var\s+sohuVideoInfo\s*=\s*({[^}]+});', webpage, 'Sohu video info', default=None)
+ if sohu_video_info_str:
+ sohu_video_info = self._parse_json(
+ sohu_video_info_str, text_id, transform_source=js_to_json)
+ return self.url_result(sohu_video_info['url'], 'Sohu')
+
+ page = self._download_json(
+ 'http://vxml.56.com/json/%s/' % text_id, text_id, 'Downloading video info')
+
+ info = page['info']
+
+ formats = [
+ {
+ 'format_id': f['type'],
+ 'filesize': int(f['filesize']),
+ 'url': f['url']
+ } for f in info['rfiles']
+ ]
+
+ return {
+ 'id': info['vid'],
+ 'title': info['Subject'],
+ 'duration': int(info['duration']) / 1000.0,
+ 'formats': formats,
+ 'thumbnail': info.get('bimg') or info.get('img'),
+ }
diff --git a/yt_dlp/extractor/cableav.py b/yt_dlp/extractor/cableav.py
new file mode 100644
index 0000000..4a22141
--- /dev/null
+++ b/yt_dlp/extractor/cableav.py
@@ -0,0 +1,32 @@
+from .common import InfoExtractor
+
+
+class CableAVIE(InfoExtractor):
+ _VALID_URL = r'https?://cableav\.tv/(?P<id>[a-zA-Z0-9]+)'
+ _TESTS = [{
+ 'url': 'https://cableav.tv/lS4iR9lWjN8/',
+ 'md5': '7e3fe5e49d61c4233b7f5b0f69b15e18',
+ 'info_dict': {
+ 'id': 'lS4iR9lWjN8',
+ 'ext': 'mp4',
+ 'title': '國產麻豆AV 叮叮映畫 DDF001 情欲小說家 - CableAV',
+ 'description': '國產AV 480p, 720p 国产麻豆AV 叮叮映画 DDF001 情欲小说家',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ video_url = self._og_search_video_url(webpage, secure=False)
+
+ formats = self._extract_m3u8_formats(video_url, video_id, 'mp4')
+
+ return {
+ 'id': video_id,
+ 'title': self._og_search_title(webpage),
+ 'description': self._og_search_description(webpage),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/callin.py b/yt_dlp/extractor/callin.py
new file mode 100644
index 0000000..c77179c
--- /dev/null
+++ b/yt_dlp/extractor/callin.py
@@ -0,0 +1,155 @@
+from .common import InfoExtractor
+from ..utils import float_or_none, int_or_none, make_archive_id, traverse_obj
+
+
+class CallinIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?callin\.com/(episode)/(?P<id>[-a-zA-Z]+)'
+ _TESTS = [{
+ 'url': 'https://www.callin.com/episode/the-title-ix-regime-and-the-long-march-through-EBfXYSrsjc',
+ 'info_dict': {
+ 'id': '218b979630a35ead12c6fd096f2996c56c37e4d0dc1f6dc0feada32dcf7b31cd',
+ 'title': 'The Title IX Regime and the Long March Through and Beyond the Institutions',
+ 'ext': 'ts',
+ 'display_id': 'the-title-ix-regime-and-the-long-march-through-EBfXYSrsjc',
+ 'thumbnail': 're:https://.+\\.png',
+ 'description': 'First episode',
+ 'uploader': 'Wesley Yang',
+ 'timestamp': 1639404128.65,
+ 'upload_date': '20211213',
+ 'uploader_id': 'wesyang',
+ 'uploader_url': 'http://wesleyyang.substack.com',
+ 'channel': 'Conversations in Year Zero',
+ 'channel_id': '436d1f82ddeb30cd2306ea9156044d8d2cfdc3f1f1552d245117a42173e78553',
+ 'channel_url': 'https://callin.com/show/conversations-in-year-zero-oJNllRFSfx',
+ 'duration': 9951.936,
+ 'view_count': int,
+ 'categories': ['News & Politics', 'History', 'Technology'],
+ 'cast': ['Wesley Yang', 'KC Johnson', 'Gabi Abramovich'],
+ 'series': 'Conversations in Year Zero',
+ 'series_id': '436d1f82ddeb30cd2306ea9156044d8d2cfdc3f1f1552d245117a42173e78553',
+ 'episode': 'The Title IX Regime and the Long March Through and Beyond the Institutions',
+ 'episode_number': 1,
+ 'episode_id': '218b979630a35ead12c6fd096f2996c56c37e4d0dc1f6dc0feada32dcf7b31cd'
+ }
+ }, {
+ 'url': 'https://www.callin.com/episode/fcc-commissioner-brendan-carr-on-elons-PrumRdSQJW',
+ 'md5': '14ede27ee2c957b7e4db93140fc0745c',
+ 'info_dict': {
+ 'id': 'c3dab47f237bf953d180d3f243477a84302798be0e0b29bc9ade6d60a69f04f5',
+ 'ext': 'ts',
+ 'title': 'FCC Commissioner Brendan Carr on Elon’s Starlink',
+ 'description': 'Or, why the government doesn’t like SpaceX',
+ 'channel': 'The Pull Request',
+ 'channel_url': 'https://callin.com/show/the-pull-request-ucnDJmEKAa',
+ 'duration': 3182.472,
+ 'series_id': '7e9c23156e4aecfdcaef46bfb2ed7ca268509622ec006c0f0f25d90e34496638',
+ 'uploader_url': 'http://thepullrequest.com',
+ 'upload_date': '20220902',
+ 'episode': 'FCC Commissioner Brendan Carr on Elon’s Starlink',
+ 'display_id': 'fcc-commissioner-brendan-carr-on-elons-PrumRdSQJW',
+ 'series': 'The Pull Request',
+ 'channel_id': '7e9c23156e4aecfdcaef46bfb2ed7ca268509622ec006c0f0f25d90e34496638',
+ 'view_count': int,
+ 'uploader': 'Antonio García Martínez',
+ 'thumbnail': 'https://d1z76fhpoqkd01.cloudfront.net/shows/legacy/1ade9142625344045dc17cf523469ced1d93610762f4c886d06aa190a2f979e8.png',
+ 'episode_id': 'c3dab47f237bf953d180d3f243477a84302798be0e0b29bc9ade6d60a69f04f5',
+ 'timestamp': 1662100688.005,
+ }
+ }, {
+ 'url': 'https://www.callin.com/episode/episode-81-elites-melt-down-over-student-debt-lzxMidUnjA',
+ 'md5': '16f704ddbf82a27e3930533b12062f07',
+ 'info_dict': {
+ 'id': '8d06f869798f93a7814e380bceabea72d501417e620180416ff6bd510596e83c',
+ 'ext': 'ts',
+ 'title': 'Episode 81- Elites MELT DOWN over Student Debt Victory? Rumble in NYC?',
+ 'description': 'Let’s talk todays episode about the primary election shake up in NYC and the elites melting down over student debt cancelation.',
+ 'channel': 'The DEBRIEF With Briahna Joy Gray',
+ 'channel_url': 'https://callin.com/show/the-debrief-with-briahna-joy-gray-siiFDzGegm',
+ 'duration': 10043.16,
+ 'series_id': '61cea58444465fd26674069703bd8322993bc9e5b4f1a6d0872690554a046ff7',
+ 'uploader_url': 'http://patreon.com/badfaithpodcast',
+ 'upload_date': '20220826',
+ 'episode': 'Episode 81- Elites MELT DOWN over Student Debt Victory? Rumble in NYC?',
+ 'display_id': 'episode-',
+ 'series': 'The DEBRIEF With Briahna Joy Gray',
+ 'channel_id': '61cea58444465fd26674069703bd8322993bc9e5b4f1a6d0872690554a046ff7',
+ 'view_count': int,
+ 'uploader': 'Briahna Gray',
+ 'thumbnail': 'https://d1z76fhpoqkd01.cloudfront.net/shows/legacy/461ea0d86172cb6aff7d6c80fd49259cf5e64bdf737a4650f8bc24cf392ca218.png',
+ 'episode_id': '8d06f869798f93a7814e380bceabea72d501417e620180416ff6bd510596e83c',
+ 'timestamp': 1661476708.282,
+ }
+ }]
+
+ def try_get_user_name(self, d):
+ names = [d.get(n) for n in ('first', 'last')]
+ if None in names:
+ return next((n for n in names if n), default=None)
+ return ' '.join(names)
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ next_data = self._search_nextjs_data(webpage, display_id)
+ episode = next_data['props']['pageProps']['episode']
+
+ id = episode['id']
+ title = episode.get('title') or self._generic_title('', webpage)
+ url = episode['m3u8']
+ formats = self._extract_m3u8_formats(url, display_id, ext='ts')
+
+ show = traverse_obj(episode, ('show', 'title'))
+ show_id = traverse_obj(episode, ('show', 'id'))
+
+ show_json = None
+ app_slug = (self._html_search_regex(
+ '<script\\s+src=["\']/_next/static/([-_a-zA-Z0-9]+)/_',
+ webpage, 'app slug', fatal=False) or next_data.get('buildId'))
+ show_slug = traverse_obj(episode, ('show', 'linkObj', 'resourceUrl'))
+ if app_slug and show_slug and '/' in show_slug:
+ show_slug = show_slug.rsplit('/', 1)[1]
+ show_json_url = f'https://www.callin.com/_next/data/{app_slug}/show/{show_slug}.json'
+ show_json = self._download_json(show_json_url, display_id, fatal=False)
+
+ host = (traverse_obj(show_json, ('pageProps', 'show', 'hosts', 0))
+ or traverse_obj(episode, ('speakers', 0)))
+
+ host_nick = traverse_obj(host, ('linkObj', 'resourceUrl'))
+ host_nick = host_nick.rsplit('/', 1)[1] if (host_nick and '/' in host_nick) else None
+
+ cast = list(filter(None, [
+ self.try_get_user_name(u) for u in
+ traverse_obj(episode, (('speakers', 'callerTags'), ...)) or []
+ ]))
+
+ episode_list = traverse_obj(show_json, ('pageProps', 'show', 'episodes')) or []
+ episode_number = next(
+ (len(episode_list) - i for (i, e) in enumerate(episode_list) if e.get('id') == id),
+ None)
+
+ return {
+ 'id': id,
+ '_old_archive_ids': [make_archive_id(self, display_id.rsplit('-', 1)[-1])],
+ 'display_id': display_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': traverse_obj(episode, ('show', 'photo')),
+ 'description': episode.get('description'),
+ 'uploader': self.try_get_user_name(host) if host else None,
+ 'timestamp': episode.get('publishedAt'),
+ 'uploader_id': host_nick,
+ 'uploader_url': traverse_obj(show_json, ('pageProps', 'show', 'url')),
+ 'channel': show,
+ 'channel_id': show_id,
+ 'channel_url': traverse_obj(episode, ('show', 'linkObj', 'resourceUrl')),
+ 'duration': float_or_none(episode.get('runtime')),
+ 'view_count': int_or_none(episode.get('plays')),
+ 'categories': traverse_obj(episode, ('show', 'categorizations', ..., 'name')),
+ 'cast': cast if cast else None,
+ 'series': show,
+ 'series_id': show_id,
+ 'episode': title,
+ 'episode_number': episode_number,
+ 'episode_id': id
+ }
diff --git a/yt_dlp/extractor/caltrans.py b/yt_dlp/extractor/caltrans.py
new file mode 100644
index 0000000..f4a4a83
--- /dev/null
+++ b/yt_dlp/extractor/caltrans.py
@@ -0,0 +1,37 @@
+from .common import InfoExtractor
+
+
+class CaltransIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[^/]+\.)?ca\.gov/vm/loc/[^/]+/(?P<id>[a-z0-9_]+)\.htm'
+ _TEST = {
+ 'url': 'https://cwwp2.dot.ca.gov/vm/loc/d3/hwy50at24th.htm',
+ 'info_dict': {
+ 'id': 'hwy50at24th',
+ 'ext': 'ts',
+ 'title': 'US-50 : Sacramento : Hwy 50 at 24th',
+ 'live_status': 'is_live',
+ 'thumbnail': 'https://cwwp2.dot.ca.gov/data/d3/cctv/image/hwy50at24th/hwy50at24th.jpg',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ global_vars = self._search_regex(
+ r'<script[^<]+?([^<]+\.m3u8[^<]+)</script>',
+ webpage, 'Global Vars')
+ route_place = self._search_regex(r'routePlace\s*=\s*"([^"]+)"', global_vars, 'Route Place', fatal=False)
+ location_name = self._search_regex(r'locationName\s*=\s*"([^"]+)"', global_vars, 'Location Name', fatal=False)
+ poster_url = self._search_regex(r'posterURL\s*=\s*"([^"]+)"', global_vars, 'Poster Url', fatal=False)
+ video_stream = self._search_regex(r'videoStreamURL\s*=\s*"([^"]+)"', global_vars, 'Video Stream URL', fatal=False)
+
+ formats = self._extract_m3u8_formats(video_stream, video_id, 'ts', live=True)
+
+ return {
+ 'id': video_id,
+ 'title': f'{route_place} : {location_name}',
+ 'is_live': True,
+ 'formats': formats,
+ 'thumbnail': poster_url,
+ }
diff --git a/yt_dlp/extractor/cam4.py b/yt_dlp/extractor/cam4.py
new file mode 100644
index 0000000..2650cc1
--- /dev/null
+++ b/yt_dlp/extractor/cam4.py
@@ -0,0 +1,31 @@
+from .common import InfoExtractor
+
+
+class CAM4IE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[^/]+\.)?cam4\.com/(?P<id>[a-z0-9_]+)'
+ _TEST = {
+ 'url': 'https://www.cam4.com/foxynesss',
+ 'info_dict': {
+ 'id': 'foxynesss',
+ 'ext': 'mp4',
+ 'title': 're:^foxynesss [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'age_limit': 18,
+ 'live_status': 'is_live',
+ 'thumbnail': 'https://snapshots.xcdnpro.com/thumbnails/foxynesss',
+ }
+ }
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+ m3u8_playlist = self._download_json('https://www.cam4.com/rest/v1.0/profile/{}/streamInfo'.format(channel_id), channel_id).get('cdnURL')
+
+ formats = self._extract_m3u8_formats(m3u8_playlist, channel_id, 'mp4', m3u8_id='hls', live=True)
+
+ return {
+ 'id': channel_id,
+ 'title': channel_id,
+ 'is_live': True,
+ 'age_limit': 18,
+ 'formats': formats,
+ 'thumbnail': f'https://snapshots.xcdnpro.com/thumbnails/{channel_id}',
+ }
diff --git a/yt_dlp/extractor/camdemy.py b/yt_dlp/extractor/camdemy.py
new file mode 100644
index 0000000..c7079e4
--- /dev/null
+++ b/yt_dlp/extractor/camdemy.py
@@ -0,0 +1,158 @@
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_urllib_parse_urlencode,
+ compat_urlparse,
+)
+from ..utils import (
+ clean_html,
+ parse_duration,
+ str_to_int,
+ unified_strdate,
+)
+
+
+class CamdemyIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?camdemy\.com/media/(?P<id>\d+)'
+ _TESTS = [{
+ # single file
+ 'url': 'http://www.camdemy.com/media/5181/',
+ 'md5': '5a5562b6a98b37873119102e052e311b',
+ 'info_dict': {
+ 'id': '5181',
+ 'ext': 'mp4',
+ 'title': 'Ch1-1 Introduction, Signals (02-23-2012)',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'creator': 'ss11spring',
+ 'duration': 1591,
+ 'upload_date': '20130114',
+ 'view_count': int,
+ }
+ }, {
+ # With non-empty description
+ # webpage returns "No permission or not login"
+ 'url': 'http://www.camdemy.com/media/13885',
+ 'md5': '4576a3bb2581f86c61044822adbd1249',
+ 'info_dict': {
+ 'id': '13885',
+ 'ext': 'mp4',
+ 'title': 'EverCam + Camdemy QuickStart',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'md5:2a9f989c2b153a2342acee579c6e7db6',
+ 'creator': 'evercam',
+ 'duration': 318,
+ }
+ }, {
+ # External source (YouTube)
+ 'url': 'http://www.camdemy.com/media/14842',
+ 'info_dict': {
+ 'id': '2vsYQzNIsJo',
+ 'ext': 'mp4',
+ 'title': 'Excel 2013 Tutorial - How to add Password Protection',
+ 'description': 'Excel 2013 Tutorial for Beginners - How to add Password Protection',
+ 'upload_date': '20130211',
+ 'uploader': 'Hun Kim',
+ 'uploader_id': 'hunkimtutorials',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ src_from = self._html_search_regex(
+ r"class=['\"]srcFrom['\"][^>]*>Sources?(?:\s+from)?\s*:\s*<a[^>]+(?:href|title)=(['\"])(?P<url>(?:(?!\1).)+)\1",
+ webpage, 'external source', default=None, group='url')
+ if src_from:
+ return self.url_result(src_from)
+
+ oembed_obj = self._download_json(
+ 'http://www.camdemy.com/oembed/?format=json&url=' + url, video_id)
+
+ title = oembed_obj['title']
+ thumb_url = oembed_obj['thumbnail_url']
+ video_folder = compat_urlparse.urljoin(thumb_url, 'video/')
+ file_list_doc = self._download_xml(
+ compat_urlparse.urljoin(video_folder, 'fileList.xml'),
+ video_id, 'Downloading filelist XML')
+ file_name = file_list_doc.find('./video/item/fileName').text
+ video_url = compat_urlparse.urljoin(video_folder, file_name)
+
+ # Some URLs return "No permission or not login" in a webpage despite being
+ # freely available via oembed JSON URL (e.g. http://www.camdemy.com/media/13885)
+ upload_date = unified_strdate(self._search_regex(
+ r'>published on ([^<]+)<', webpage,
+ 'upload date', default=None))
+ view_count = str_to_int(self._search_regex(
+ r'role=["\']viewCnt["\'][^>]*>([\d,.]+) views',
+ webpage, 'view count', default=None))
+ description = self._html_search_meta(
+ 'description', webpage, default=None) or clean_html(
+ oembed_obj.get('description'))
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'thumbnail': thumb_url,
+ 'description': description,
+ 'creator': oembed_obj.get('author_name'),
+ 'duration': parse_duration(oembed_obj.get('duration')),
+ 'upload_date': upload_date,
+ 'view_count': view_count,
+ }
+
+
+class CamdemyFolderIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?camdemy\.com/folder/(?P<id>\d+)'
+ _TESTS = [{
+ # links with trailing slash
+ 'url': 'http://www.camdemy.com/folder/450',
+ 'info_dict': {
+ 'id': '450',
+ 'title': '信號與系統 2012 & 2011 (Signals and Systems)',
+ },
+ 'playlist_mincount': 145
+ }, {
+ # links without trailing slash
+ # and multi-page
+ 'url': 'http://www.camdemy.com/folder/853',
+ 'info_dict': {
+ 'id': '853',
+ 'title': '科學計算 - 使用 Matlab'
+ },
+ 'playlist_mincount': 20
+ }, {
+ # with displayMode parameter. For testing the codes to add parameters
+ 'url': 'http://www.camdemy.com/folder/853/?displayMode=defaultOrderByOrg',
+ 'info_dict': {
+ 'id': '853',
+ 'title': '科學計算 - 使用 Matlab'
+ },
+ 'playlist_mincount': 20
+ }]
+
+ def _real_extract(self, url):
+ folder_id = self._match_id(url)
+
+ # Add displayMode=list so that all links are displayed in a single page
+ parsed_url = list(compat_urlparse.urlparse(url))
+ query = dict(compat_urlparse.parse_qsl(parsed_url[4]))
+ query.update({'displayMode': 'list'})
+ parsed_url[4] = compat_urllib_parse_urlencode(query)
+ final_url = compat_urlparse.urlunparse(parsed_url)
+
+ page = self._download_webpage(final_url, folder_id)
+ matches = re.findall(r"href='(/media/\d+/?)'", page)
+
+ entries = [self.url_result('http://www.camdemy.com' + media_path)
+ for media_path in matches]
+
+ folder_title = self._html_search_meta('keywords', page)
+
+ return self.playlist_result(entries, folder_id, folder_title)
diff --git a/yt_dlp/extractor/camfm.py b/yt_dlp/extractor/camfm.py
new file mode 100644
index 0000000..11dafa4
--- /dev/null
+++ b/yt_dlp/extractor/camfm.py
@@ -0,0 +1,85 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ get_element_by_class,
+ get_elements_by_class,
+ join_nonempty,
+ traverse_obj,
+ unified_timestamp,
+ urljoin,
+)
+
+
+class CamFMShowIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?camfm\.co\.uk/shows/(?P<id>[^/]+)'
+ _TESTS = [{
+ 'playlist_mincount': 5,
+ 'url': 'https://camfm.co.uk/shows/soul-mining/',
+ 'info_dict': {
+ 'id': 'soul-mining',
+ 'thumbnail': 'md5:6a873091f92c936f23bdcce80f75e66a',
+ 'title': 'Soul Mining',
+ 'description': 'Telling the stories of jazz, funk and soul from all corners of the world.',
+ },
+ }]
+
+ def _real_extract(self, url):
+ show_id = self._match_id(url)
+ page = self._download_webpage(url, show_id)
+
+ return {
+ '_type': 'playlist',
+ 'id': show_id,
+ 'entries': [self.url_result(urljoin('https://camfm.co.uk', i), CamFMEpisodeIE)
+ for i in re.findall(r"javascript:popup\('(/player/[^']+)', 'listen'", page)],
+ 'thumbnail': urljoin('https://camfm.co.uk', self._search_regex(
+ r'<img[^>]+class="thumb-expand"[^>]+src="([^"]+)"', page, 'thumbnail', fatal=False)),
+ 'title': self._html_search_regex('<h1>([^<]+)</h1>', page, 'title', fatal=False),
+ 'description': clean_html(get_element_by_class('small-12 medium-8 cell', page))
+ }
+
+
+class CamFMEpisodeIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?camfm\.co\.uk/player/(?P<id>[^/]+)'
+ _TESTS = [{
+ 'url': 'https://camfm.co.uk/player/43336',
+ 'skip': 'Episode will expire - don\'t actually know when, but it will go eventually',
+ 'info_dict': {
+ 'id': '43336',
+ 'title': 'AITAA: Am I the Agony Aunt? - 19:00 Tue 16/05/2023',
+ 'ext': 'mp3',
+ 'upload_date': '20230516',
+ 'description': 'md5:f165144f94927c0f1bfa2ee6e6ab7bbf',
+ 'timestamp': 1684263600,
+ 'series': 'AITAA: Am I the Agony Aunt?',
+ 'thumbnail': 'md5:5980a831360d0744c3764551be3d09c1',
+ 'categories': ['Entertainment'],
+ }
+ }]
+
+ def _real_extract(self, url):
+ episode_id = self._match_id(url)
+ page = self._download_webpage(url, episode_id)
+ audios = self._parse_html5_media_entries('https://audio.camfm.co.uk', page, episode_id)
+
+ caption = get_element_by_class('caption', page)
+ series = clean_html(re.sub(r'<span[^<]+<[^<]+>', '', caption))
+
+ card_section = get_element_by_class('card-section', page)
+ date = self._html_search_regex('>Aired at ([^<]+)<', card_section, 'air date', fatal=False)
+
+ return {
+ 'id': episode_id,
+ 'title': join_nonempty(series, date, delim=' - '),
+ 'formats': traverse_obj(audios, (..., 'formats', ...)),
+ 'timestamp': unified_timestamp(date), # XXX: Does not account for UK's daylight savings
+ 'series': series,
+ 'description': clean_html(re.sub(r'<b>[^<]+</b><br[^>]+/>', '', card_section)),
+ 'thumbnail': urljoin('https://camfm.co.uk', self._search_regex(
+ r'<div[^>]+class="cover-art"[^>]+style="[^"]+url\(\'([^\']+)',
+ page, 'thumbnail', fatal=False)),
+ 'categories': get_elements_by_class('label', caption),
+ 'was_live': True,
+ }
diff --git a/yt_dlp/extractor/cammodels.py b/yt_dlp/extractor/cammodels.py
new file mode 100644
index 0000000..135b315
--- /dev/null
+++ b/yt_dlp/extractor/cammodels.py
@@ -0,0 +1,77 @@
+from .common import InfoExtractor
+from ..utils import int_or_none, url_or_none
+
+
+class CamModelsIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?cammodels\.com/cam/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.cammodels.com/cam/AutumnKnight/',
+ 'only_matching': True,
+ 'age_limit': 18
+ }]
+
+ def _real_extract(self, url):
+ user_id = self._match_id(url)
+
+ manifest = self._download_json(
+ 'https://manifest-server.naiadsystems.com/live/s:%s.json' % user_id, user_id)
+
+ formats = []
+ thumbnails = []
+ for format_id, format_dict in manifest['formats'].items():
+ if not isinstance(format_dict, dict):
+ continue
+ encodings = format_dict.get('encodings')
+ if not isinstance(encodings, list):
+ continue
+ vcodec = format_dict.get('videoCodec')
+ acodec = format_dict.get('audioCodec')
+ for media in encodings:
+ if not isinstance(media, dict):
+ continue
+ media_url = url_or_none(media.get('location'))
+ if not media_url:
+ continue
+
+ format_id_list = [format_id]
+ height = int_or_none(media.get('videoHeight'))
+ if height is not None:
+ format_id_list.append('%dp' % height)
+ f = {
+ 'url': media_url,
+ 'format_id': '-'.join(format_id_list),
+ 'width': int_or_none(media.get('videoWidth')),
+ 'height': height,
+ 'vbr': int_or_none(media.get('videoKbps')),
+ 'abr': int_or_none(media.get('audioKbps')),
+ 'fps': int_or_none(media.get('fps')),
+ 'vcodec': vcodec,
+ 'acodec': acodec,
+ }
+ if 'rtmp' in format_id:
+ f['ext'] = 'flv'
+ elif 'hls' in format_id:
+ f.update({
+ 'ext': 'mp4',
+ # hls skips fragments, preferring rtmp
+ 'quality': -10,
+ })
+ else:
+ if format_id == 'jpeg':
+ thumbnails.append({
+ 'url': f['url'],
+ 'width': f['width'],
+ 'height': f['height'],
+ 'format_id': f['format_id'],
+ })
+ continue
+ formats.append(f)
+
+ return {
+ 'id': user_id,
+ 'title': user_id,
+ 'thumbnails': thumbnails,
+ 'is_live': True,
+ 'formats': formats,
+ 'age_limit': 18
+ }
diff --git a/yt_dlp/extractor/camsoda.py b/yt_dlp/extractor/camsoda.py
new file mode 100644
index 0000000..021cd91
--- /dev/null
+++ b/yt_dlp/extractor/camsoda.py
@@ -0,0 +1,57 @@
+import random
+
+from .common import InfoExtractor
+from ..utils import ExtractorError, traverse_obj
+
+
+class CamsodaIE(InfoExtractor):
+ _VALID_URL = r'https?://www\.camsoda\.com/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://www.camsoda.com/lizzhopf',
+ 'info_dict': {
+ 'id': 'lizzhopf',
+ 'ext': 'mp4',
+ 'title': 'lizzhopf (lizzhopf) Nude on Cam. Free Live Sex Chat Room - CamSoda',
+ 'description': str,
+ 'is_live': True,
+ 'age_limit': 18,
+ },
+ 'skip': 'Room is offline',
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id, headers=self.geo_verification_headers())
+
+ data = self._download_json(
+ f'https://camsoda.com/api/v1/video/vtoken/{video_id}', video_id,
+ query={'username': f'guest_{random.randrange(10000, 99999)}'},
+ headers=self.geo_verification_headers())
+ if not data:
+ raise ExtractorError('Unable to find configuration for stream.')
+ elif data.get('private_servers'):
+ raise ExtractorError('Model is in private show.', expected=True)
+ elif not data.get('stream_name'):
+ raise ExtractorError('Model is offline.', expected=True)
+
+ stream_name = traverse_obj(data, 'stream_name', expected_type=str)
+ token = traverse_obj(data, 'token', expected_type=str)
+
+ formats = []
+ for server in traverse_obj(data, ('edge_servers', ...)):
+ formats = self._extract_m3u8_formats(
+ f'https://{server}/{stream_name}_v1/index.m3u8?token={token}',
+ video_id, ext='mp4', m3u8_id='hls', fatal=False, live=True)
+ if formats:
+ break
+ if not formats:
+ self.raise_no_formats('No active streams found', expected=True)
+
+ return {
+ 'id': video_id,
+ 'title': self._html_extract_title(webpage),
+ 'description': self._html_search_meta('description', webpage, default=None),
+ 'is_live': True,
+ 'formats': formats,
+ 'age_limit': 18,
+ }
diff --git a/yt_dlp/extractor/camtasia.py b/yt_dlp/extractor/camtasia.py
new file mode 100644
index 0000000..70ab6c6
--- /dev/null
+++ b/yt_dlp/extractor/camtasia.py
@@ -0,0 +1,71 @@
+import os
+import urllib.parse
+
+from .common import InfoExtractor
+from ..utils import float_or_none
+
+
+class CamtasiaEmbedIE(InfoExtractor):
+ _VALID_URL = False
+ _WEBPAGE_TESTS = [
+ {
+ 'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
+ 'playlist': [{
+ 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
+ 'info_dict': {
+ 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
+ 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
+ 'ext': 'flv',
+ 'duration': 2235.90,
+ }
+ }, {
+ 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
+ 'info_dict': {
+ 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
+ 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
+ 'ext': 'flv',
+ 'duration': 2235.93,
+ }
+ }],
+ 'info_dict': {
+ 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
+ },
+ 'skip': 'webpage dead'
+ },
+
+ ]
+
+ def _extract_from_webpage(self, url, webpage):
+ camtasia_cfg = self._search_regex(
+ r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
+ webpage, 'camtasia configuration file', default=None)
+ if camtasia_cfg is None:
+ return None
+
+ title = self._html_search_meta('DC.title', webpage, fatal=True)
+
+ camtasia_url = urllib.parse.urljoin(url, camtasia_cfg)
+ camtasia_cfg = self._download_xml(
+ camtasia_url, self._generic_id(url),
+ note='Downloading camtasia configuration',
+ errnote='Failed to download camtasia configuration')
+ fileset_node = camtasia_cfg.find('./playlist/array/fileset')
+
+ entries = []
+ for n in fileset_node.getchildren():
+ url_n = n.find('./uri')
+ if url_n is None:
+ continue
+
+ entries.append({
+ 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
+ 'title': f'{title} - {n.tag}',
+ 'url': urllib.parse.urljoin(url, url_n.text),
+ 'duration': float_or_none(n.find('./duration').text),
+ })
+
+ return {
+ '_type': 'playlist',
+ 'entries': entries,
+ 'title': title,
+ }
diff --git a/yt_dlp/extractor/canal1.py b/yt_dlp/extractor/canal1.py
new file mode 100644
index 0000000..587a11a
--- /dev/null
+++ b/yt_dlp/extractor/canal1.py
@@ -0,0 +1,39 @@
+from .common import InfoExtractor
+
+
+class Canal1IE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.|noticias\.)?canal1\.com\.co/(?:[^?#&])+/(?P<id>[\w-]+)'
+
+ _TESTS = [{
+ 'url': 'https://canal1.com.co/noticias/napa-i-una-cadena-de-produccion-de-arroz-que-se-quedo-en-veremos-y-abandonada-en-el-departamento-del-choco/',
+ 'info_dict': {
+ 'id': '63b39f6b354977084b85ab54',
+ 'display_id': 'napa-i-una-cadena-de-produccion-de-arroz-que-se-quedo-en-veremos-y-abandonada-en-el-departamento-del-choco',
+ 'title': 'Ñapa I Una cadena de producción de arroz que se quedó en veremos y abandonada en el departamento del Chocó',
+ 'description': 'md5:bc49c6d64d20610ea1e7daf079a0d013',
+ 'thumbnail': r're:^https?://[^?#]+63b39f6b354977084b85ab54',
+ 'ext': 'mp4',
+ },
+ }, {
+ 'url': 'https://noticias.canal1.com.co/noticias/tres-i-el-triste-record-que-impuso-elon-musk-el-dueno-de-tesla-y-de-twitter/',
+ 'info_dict': {
+ 'id': '63b39e93f5fd223aa32250fb',
+ 'display_id': 'tres-i-el-triste-record-que-impuso-elon-musk-el-dueno-de-tesla-y-de-twitter',
+ 'title': 'Tres I El triste récord que impuso Elon Musk, el dueño de Tesla y de Twitter',
+ 'description': 'md5:d9f691f131a21ce6767ca6c05d17d791',
+ 'thumbnail': r're:^https?://[^?#]+63b39e93f5fd223aa32250fb',
+ 'ext': 'mp4',
+ },
+ }, {
+ # Geo-restricted to Colombia
+ 'url': 'https://canal1.com.co/programas/guerreros-canal-1/video-inedito-guerreros-despedida-kewin-zarate/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ return self.url_result(
+ self._search_regex(r'"embedUrl"\s*:\s*"([^"]+)', webpage, 'embed url'),
+ display_id=display_id, url_transparent=True)
diff --git a/yt_dlp/extractor/canalalpha.py b/yt_dlp/extractor/canalalpha.py
new file mode 100644
index 0000000..df5ca58
--- /dev/null
+++ b/yt_dlp/extractor/canalalpha.py
@@ -0,0 +1,94 @@
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ dict_get,
+ try_get,
+ unified_strdate,
+)
+
+
+class CanalAlphaIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?canalalpha\.ch/play/[^/]+/[^/]+/(?P<id>\d+)/?.*'
+
+ _TESTS = [{
+ 'url': 'https://www.canalalpha.ch/play/le-journal/episode/24520/jeudi-28-octobre-2021',
+ 'info_dict': {
+ 'id': '24520',
+ 'ext': 'mp4',
+ 'title': 'Jeudi 28 octobre 2021',
+ 'description': 'md5:d30c6c3e53f8ad40d405379601973b30',
+ 'thumbnail': 'https://static.canalalpha.ch/poster/journal/journal_20211028.jpg',
+ 'upload_date': '20211028',
+ 'duration': 1125,
+ },
+ 'params': {'skip_download': True}
+ }, {
+ 'url': 'https://www.canalalpha.ch/play/le-journal/topic/24512/la-poste-fait-de-neuchatel-un-pole-cryptographique',
+ 'info_dict': {
+ 'id': '24512',
+ 'ext': 'mp4',
+ 'title': 'La Poste fait de Neuchâtel un pôle cryptographique',
+ 'description': 'md5:4ba63ae78a0974d1a53d6703b6e1dedf',
+ 'thumbnail': 'https://static.canalalpha.ch/poster/news/news_39712.jpg',
+ 'upload_date': '20211028',
+ 'duration': 138,
+ },
+ 'params': {'skip_download': True}
+ }, {
+ 'url': 'https://www.canalalpha.ch/play/eureka/episode/24484/ces-innovations-qui-veulent-rendre-lagriculture-plus-durable',
+ 'info_dict': {
+ 'id': '24484',
+ 'ext': 'mp4',
+ 'title': 'Ces innovations qui veulent rendre l’agriculture plus durable',
+ 'description': 'md5:3de3f151180684621e85be7c10e4e613',
+ 'thumbnail': 'https://static.canalalpha.ch/poster/magazine/magazine_10236.jpg',
+ 'upload_date': '20211026',
+ 'duration': 360,
+ },
+ 'params': {'skip_download': True}
+ }, {
+ 'url': 'https://www.canalalpha.ch/play/avec-le-temps/episode/23516/redonner-de-leclat-grace-au-polissage',
+ 'info_dict': {
+ 'id': '23516',
+ 'ext': 'mp4',
+ 'title': 'Redonner de l\'éclat grâce au polissage',
+ 'description': 'md5:0d8fbcda1a5a4d6f6daa3165402177e1',
+ 'thumbnail': 'https://static.canalalpha.ch/poster/magazine/magazine_9990.png',
+ 'upload_date': '20210726',
+ 'duration': 360,
+ },
+ 'params': {'skip_download': True}
+ }]
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ webpage = self._download_webpage(url, id)
+ data_json = self._parse_json(self._search_regex(
+ r'window\.__SERVER_STATE__\s?=\s?({(?:(?!};)[^"]|"([^"]|\\")*")+})\s?;',
+ webpage, 'data_json'), id)['1']['data']['data']
+ manifests = try_get(data_json, lambda x: x['video']['manifests'], expected_type=dict) or {}
+ subtitles = {}
+ formats = [{
+ 'url': video['$url'],
+ 'ext': 'mp4',
+ 'width': try_get(video, lambda x: x['res']['width'], expected_type=int),
+ 'height': try_get(video, lambda x: x['res']['height'], expected_type=int),
+ } for video in try_get(data_json, lambda x: x['video']['mp4'], expected_type=list) or [] if video.get('$url')]
+ if manifests.get('hls'):
+ m3u8_frmts, m3u8_subs = self._parse_m3u8_formats_and_subtitles(manifests['hls'], video_id=id)
+ formats.extend(m3u8_frmts)
+ subtitles = self._merge_subtitles(subtitles, m3u8_subs)
+ if manifests.get('dash'):
+ dash_frmts, dash_subs = self._parse_mpd_formats_and_subtitles(manifests['dash'])
+ formats.extend(dash_frmts)
+ subtitles = self._merge_subtitles(subtitles, dash_subs)
+ return {
+ 'id': id,
+ 'title': data_json.get('title').strip(),
+ 'description': clean_html(dict_get(data_json, ('longDesc', 'shortDesc'))),
+ 'thumbnail': data_json.get('poster'),
+ 'upload_date': unified_strdate(dict_get(data_json, ('webPublishAt', 'featuredAt', 'diffusionDate'))),
+ 'duration': try_get(data_json, lambda x: x['video']['duration'], expected_type=int),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
diff --git a/yt_dlp/extractor/canalc2.py b/yt_dlp/extractor/canalc2.py
new file mode 100644
index 0000000..597cb2a
--- /dev/null
+++ b/yt_dlp/extractor/canalc2.py
@@ -0,0 +1,68 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import parse_duration
+
+
+class Canalc2IE(InfoExtractor):
+ IE_NAME = 'canalc2.tv'
+ _VALID_URL = r'https?://(?:(?:www\.)?canalc2\.tv/video/|archives-canalc2\.u-strasbg\.fr/video\.asp\?.*\bidVideo=)(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'http://www.canalc2.tv/video/12163',
+ 'md5': '060158428b650f896c542dfbb3d6487f',
+ 'info_dict': {
+ 'id': '12163',
+ 'ext': 'mp4',
+ 'title': 'Terrasses du Numérique',
+ 'duration': 122,
+ },
+ }, {
+ 'url': 'http://archives-canalc2.u-strasbg.fr/video.asp?idVideo=11427&voir=oui',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'http://www.canalc2.tv/video/%s' % video_id, video_id)
+
+ title = self._html_search_regex(
+ r'(?s)class="[^"]*col_description[^"]*">.*?<h3>(.+?)</h3>',
+ webpage, 'title')
+
+ formats = []
+ for _, video_url in re.findall(r'file\s*=\s*(["\'])(.+?)\1', webpage):
+ if video_url.startswith('rtmp://'):
+ rtmp = re.search(
+ r'^(?P<url>rtmp://[^/]+/(?P<app>.+/))(?P<play_path>mp4:.+)$', video_url)
+ formats.append({
+ 'url': rtmp.group('url'),
+ 'format_id': 'rtmp',
+ 'ext': 'flv',
+ 'app': rtmp.group('app'),
+ 'play_path': rtmp.group('play_path'),
+ 'page_url': url,
+ })
+ else:
+ formats.append({
+ 'url': video_url,
+ 'format_id': 'http',
+ })
+
+ if formats:
+ info = {
+ 'formats': formats,
+ }
+ else:
+ info = self._parse_html5_media_entries(url, webpage, url)[0]
+
+ info.update({
+ 'id': video_id,
+ 'title': title,
+ 'duration': parse_duration(self._search_regex(
+ r'id=["\']video_duree["\'][^>]*>([^<]+)',
+ webpage, 'duration', fatal=False)),
+ })
+ return info
diff --git a/yt_dlp/extractor/canalplus.py b/yt_dlp/extractor/canalplus.py
new file mode 100644
index 0000000..3ff5c3f
--- /dev/null
+++ b/yt_dlp/extractor/canalplus.py
@@ -0,0 +1,110 @@
+from .common import InfoExtractor
+from ..utils import (
+ # ExtractorError,
+ # HEADRequest,
+ int_or_none,
+ qualities,
+ unified_strdate,
+)
+
+
+class CanalplusIE(InfoExtractor):
+ IE_DESC = 'mycanal.fr and piwiplus.fr'
+ _VALID_URL = r'https?://(?:www\.)?(?P<site>mycanal|piwiplus)\.fr/(?:[^/]+/)*(?P<display_id>[^?/]+)(?:\.html\?.*\bvid=|/p/)(?P<id>\d+)'
+ _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/%s/%s?format=json'
+ _SITE_ID_MAP = {
+ 'mycanal': 'cplus',
+ 'piwiplus': 'teletoon',
+ }
+
+ # Only works for direct mp4 URLs
+ _GEO_COUNTRIES = ['FR']
+
+ _TESTS = [{
+ 'url': 'https://www.mycanal.fr/d17-emissions/lolywood/p/1397061',
+ 'info_dict': {
+ 'id': '1397061',
+ 'display_id': 'lolywood',
+ 'ext': 'mp4',
+ 'title': 'Euro 2016 : Je préfère te prévenir - Lolywood - Episode 34',
+ 'description': 'md5:7d97039d455cb29cdba0d652a0efaa5e',
+ 'upload_date': '20160602',
+ },
+ }, {
+ # geo restricted, bypassed
+ 'url': 'http://www.piwiplus.fr/videos-piwi/pid1405-le-labyrinthe-boing-super-ranger.html?vid=1108190',
+ 'info_dict': {
+ 'id': '1108190',
+ 'display_id': 'pid1405-le-labyrinthe-boing-super-ranger',
+ 'ext': 'mp4',
+ 'title': 'BOING SUPER RANGER - Ep : Le labyrinthe',
+ 'description': 'md5:4cea7a37153be42c1ba2c1d3064376ff',
+ 'upload_date': '20140724',
+ },
+ 'expected_warnings': ['HTTP Error 403: Forbidden'],
+ }]
+
+ def _real_extract(self, url):
+ site, display_id, video_id = self._match_valid_url(url).groups()
+
+ site_id = self._SITE_ID_MAP[site]
+
+ info_url = self._VIDEO_INFO_TEMPLATE % (site_id, video_id)
+ video_data = self._download_json(info_url, video_id, 'Downloading video JSON')
+
+ if isinstance(video_data, list):
+ video_data = [video for video in video_data if video.get('ID') == video_id][0]
+ media = video_data['MEDIA']
+ infos = video_data['INFOS']
+
+ preference = qualities(['MOBILE', 'BAS_DEBIT', 'HAUT_DEBIT', 'HD'])
+
+ # _, fmt_url = next(iter(media['VIDEOS'].items()))
+ # if '/geo' in fmt_url.lower():
+ # response = self._request_webpage(
+ # HEADRequest(fmt_url), video_id,
+ # 'Checking if the video is georestricted')
+ # if '/blocage' in response.url:
+ # raise ExtractorError(
+ # 'The video is not available in your country',
+ # expected=True)
+
+ formats = []
+ for format_id, format_url in media['VIDEOS'].items():
+ if not format_url:
+ continue
+ if format_id == 'HLS':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', 'm3u8_native', m3u8_id=format_id, fatal=False))
+ elif format_id == 'HDS':
+ formats.extend(self._extract_f4m_formats(
+ format_url + '?hdcore=2.11.3', video_id, f4m_id=format_id, fatal=False))
+ else:
+ formats.append({
+ # the secret extracted from ya function in http://player.canalplus.fr/common/js/canalPlayer.js
+ 'url': format_url + '?secret=pqzerjlsmdkjfoiuerhsdlfknaes',
+ 'format_id': format_id,
+ 'quality': preference(format_id),
+ })
+
+ thumbnails = [{
+ 'id': image_id,
+ 'url': image_url,
+ } for image_id, image_url in media.get('images', {}).items()]
+
+ titrage = infos['TITRAGE']
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': '%s - %s' % (titrage['TITRE'],
+ titrage['SOUS_TITRE']),
+ 'upload_date': unified_strdate(infos.get('PUBLICATION', {}).get('DATE')),
+ 'thumbnails': thumbnails,
+ 'description': infos.get('DESCRIPTION'),
+ 'duration': int_or_none(infos.get('DURATION')),
+ 'view_count': int_or_none(infos.get('NB_VUES')),
+ 'like_count': int_or_none(infos.get('NB_LIKES')),
+ 'comment_count': int_or_none(infos.get('NB_COMMENTS')),
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/caracoltv.py b/yt_dlp/extractor/caracoltv.py
new file mode 100644
index 0000000..79f7752
--- /dev/null
+++ b/yt_dlp/extractor/caracoltv.py
@@ -0,0 +1,136 @@
+import base64
+import json
+import uuid
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ js_to_json,
+ traverse_obj,
+ urljoin,
+)
+
+
+class CaracolTvPlayIE(InfoExtractor):
+ _VALID_URL = r'https?://play\.caracoltv\.com/videoDetails/(?P<id>[^/?#]+)'
+ _NETRC_MACHINE = 'caracoltv-play'
+
+ _TESTS = [{
+ 'url': 'https://play.caracoltv.com/videoDetails/OTo4NGFmNjUwOWQ2ZmM0NTg2YWRiOWU0MGNhOWViOWJkYQ==',
+ 'info_dict': {
+ 'id': 'OTo4NGFmNjUwOWQ2ZmM0NTg2YWRiOWU0MGNhOWViOWJkYQ==',
+ 'title': 'La teoría del promedio',
+ 'description': 'md5:1cdd6d2c13f19ef0d9649ab81a023ac3',
+ },
+ 'playlist_count': 6,
+ }, {
+ 'url': 'https://play.caracoltv.com/videoDetails/OTo3OWM4ZTliYzQxMmM0MTMxYTk4Mjk2YjdjNGQ4NGRkOQ==/ella?season=0',
+ 'info_dict': {
+ 'id': 'OTo3OWM4ZTliYzQxMmM0MTMxYTk4Mjk2YjdjNGQ4NGRkOQ==',
+ 'title': 'Ella',
+ 'description': 'md5:a639b1feb5ddcc0cff92a489b4e544b8',
+ },
+ 'playlist_count': 10,
+ }, {
+ 'url': 'https://play.caracoltv.com/videoDetails/OTpiYTY1YTVmOTI5MzI0ZWJhOGZiY2Y3MmRlOWZlYmJkOA==/la-vuelta-al-mundo-en-80-risas-2022?season=0',
+ 'info_dict': {
+ 'id': 'OTpiYTY1YTVmOTI5MzI0ZWJhOGZiY2Y3MmRlOWZlYmJkOA==',
+ 'title': 'La vuelta al mundo en 80 risas 2022',
+ 'description': 'md5:e97aac36106e5c37ebf947b3350106a4',
+ },
+ 'playlist_count': 17,
+ }, {
+ 'url': 'https://play.caracoltv.com/videoDetails/MzoxX3BwbjRmNjB1',
+ 'only_matching': True,
+ }]
+
+ _USER_TOKEN = None
+
+ def _extract_app_token(self, webpage):
+ config_js_path = self._search_regex(
+ r'<script[^>]+src\s*=\s*"([^"]+coreConfig.js[^"]+)', webpage, 'config js url', fatal=False)
+
+ mediation_config = {} if not config_js_path else self._search_json(
+ r'mediation\s*:', self._download_webpage(
+ urljoin('https://play.caracoltv.com/', config_js_path), None, fatal=False, note='Extracting JS config'),
+ 'mediation_config', None, transform_source=js_to_json, fatal=False)
+
+ key = traverse_obj(
+ mediation_config, ('live', 'key')) or '795cd9c089a1fc48094524a5eba85a3fca1331817c802f601735907c8bbb4f50'
+ secret = traverse_obj(
+ mediation_config, ('live', 'secret')) or '64dec00a6989ba83d087621465b5e5d38bdac22033b0613b659c442c78976fa0'
+
+ return base64.b64encode(f'{key}:{secret}'.encode()).decode()
+
+ def _perform_login(self, email, password):
+ webpage = self._download_webpage('https://play.caracoltv.com/', None, fatal=False)
+ app_token = self._extract_app_token(webpage)
+
+ bearer_token = self._download_json(
+ 'https://eu-gateway.inmobly.com/applications/oauth', None, data=b'', note='Retrieving bearer token',
+ headers={'Authorization': f'Basic {app_token}'})['token']
+
+ self._USER_TOKEN = self._download_json(
+ 'https://eu-gateway.inmobly.com/user/login', None, note='Performing login', headers={
+ 'Content-Type': 'application/json',
+ 'Authorization': f'Bearer {bearer_token}',
+ }, data=json.dumps({
+ 'device_data': {
+ 'device_id': str(uuid.uuid4()),
+ 'device_token': '',
+ 'device_type': 'web'
+ },
+ 'login_data': {
+ 'enabled': True,
+ 'email': email,
+ 'password': password,
+ }
+ }).encode())['user_token']
+
+ def _extract_video(self, video_data, series_id=None, season_id=None, season_number=None):
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_data['stream_url'], series_id, 'mp4')
+
+ return {
+ 'id': video_data['id'],
+ 'title': video_data.get('name'),
+ 'description': video_data.get('description'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnails': traverse_obj(
+ video_data, ('extra_thumbs', ..., {'url': 'thumb_url', 'height': 'height', 'width': 'width'})),
+ 'series_id': series_id,
+ 'season_id': season_id,
+ 'season_number': int_or_none(season_number),
+ 'episode_number': int_or_none(video_data.get('item_order')),
+ 'is_live': video_data.get('entry_type') == 3,
+ }
+
+ def _extract_series_seasons(self, seasons, series_id):
+ for season in seasons:
+ api_response = self._download_json(
+ 'https://eu-gateway.inmobly.com/feed', series_id, query={'season_id': season['id']},
+ headers={'Authorization': f'Bearer {self._USER_TOKEN}'})
+
+ season_number = season.get('order')
+ for episode in api_response['items']:
+ yield self._extract_video(episode, series_id, season['id'], season_number)
+
+ def _real_extract(self, url):
+ series_id = self._match_id(url)
+
+ if self._USER_TOKEN is None:
+ self._perform_login('guest@inmobly.com', 'Test@gus1')
+
+ api_response = self._download_json(
+ 'https://eu-gateway.inmobly.com/feed', series_id, query={'include_ids': series_id},
+ headers={'Authorization': f'Bearer {self._USER_TOKEN}'})['items'][0]
+
+ if not api_response.get('seasons'):
+ return self._extract_video(api_response)
+
+ return self.playlist_result(
+ self._extract_series_seasons(api_response['seasons'], series_id),
+ series_id, **traverse_obj(api_response, {
+ 'title': 'name',
+ 'description': 'description',
+ }))
diff --git a/yt_dlp/extractor/cartoonnetwork.py b/yt_dlp/extractor/cartoonnetwork.py
new file mode 100644
index 0000000..4dd7ac4
--- /dev/null
+++ b/yt_dlp/extractor/cartoonnetwork.py
@@ -0,0 +1,59 @@
+from .turner import TurnerBaseIE
+from ..utils import int_or_none
+
+
+class CartoonNetworkIE(TurnerBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?cartoonnetwork\.com/video/(?:[^/]+/)+(?P<id>[^/?#]+)-(?:clip|episode)\.html'
+ _TEST = {
+ 'url': 'https://www.cartoonnetwork.com/video/ben-10/how-to-draw-upgrade-episode.html',
+ 'info_dict': {
+ 'id': '6e3375097f63874ebccec7ef677c1c3845fa850e',
+ 'ext': 'mp4',
+ 'title': 'How to Draw Upgrade',
+ 'description': 'md5:2061d83776db7e8be4879684eefe8c0f',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ def find_field(global_re, name, content_re=None, value_re='[^"]+', fatal=False):
+ metadata_re = ''
+ if content_re:
+ metadata_re = r'|video_metadata\.content_' + content_re
+ return self._search_regex(
+ r'(?:_cnglobal\.currentVideo\.%s%s)\s*=\s*"(%s)";' % (global_re, metadata_re, value_re),
+ webpage, name, fatal=fatal)
+
+ media_id = find_field('mediaId', 'media id', 'id', '[0-9a-f]{40}', True)
+ title = find_field('episodeTitle', 'title', '(?:episodeName|name)', fatal=True)
+
+ info = self._extract_ngtv_info(
+ media_id, {'networkId': 'cartoonnetwork'}, {
+ 'url': url,
+ 'site_name': 'CartoonNetwork',
+ 'auth_required': find_field('authType', 'auth type') != 'unauth',
+ })
+
+ series = find_field(
+ 'propertyName', 'series', 'showName') or self._html_search_meta('partOfSeries', webpage)
+ info.update({
+ 'id': media_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': self._html_search_meta('description', webpage),
+ 'series': series,
+ 'episode': title,
+ })
+
+ for field in ('season', 'episode'):
+ field_name = field + 'Number'
+ info[field + '_number'] = int_or_none(find_field(
+ field_name, field + ' number', value_re=r'\d+') or self._html_search_meta(field_name, webpage))
+
+ return info
diff --git a/yt_dlp/extractor/cbc.py b/yt_dlp/extractor/cbc.py
new file mode 100644
index 0000000..b5beb1e
--- /dev/null
+++ b/yt_dlp/extractor/cbc.py
@@ -0,0 +1,653 @@
+import base64
+import json
+import re
+import time
+import urllib.parse
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+)
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ join_nonempty,
+ js_to_json,
+ orderedSet,
+ parse_iso8601,
+ smuggle_url,
+ strip_or_none,
+ traverse_obj,
+ try_get,
+)
+
+
+class CBCIE(InfoExtractor):
+ IE_NAME = 'cbc.ca'
+ _VALID_URL = r'https?://(?:www\.)?cbc\.ca/(?!player/)(?:[^/]+/)+(?P<id>[^/?#]+)'
+ _TESTS = [{
+ # with mediaId
+ 'url': 'http://www.cbc.ca/22minutes/videos/clips-season-23/don-cherry-play-offs',
+ 'md5': '97e24d09672fc4cf56256d6faa6c25bc',
+ 'info_dict': {
+ 'id': '2682904050',
+ 'ext': 'mp4',
+ 'title': 'Don Cherry – All-Stars',
+ 'description': 'Don Cherry has a bee in his bonnet about AHL player John Scott because that guy’s got heart.',
+ 'timestamp': 1454463000,
+ 'upload_date': '20160203',
+ 'uploader': 'CBCC-NEW',
+ },
+ 'skip': 'Geo-restricted to Canada',
+ }, {
+ # with clipId, feed available via tpfeed.cbc.ca and feed.theplatform.com
+ 'url': 'http://www.cbc.ca/22minutes/videos/22-minutes-update/22-minutes-update-episode-4',
+ 'md5': '162adfa070274b144f4fdc3c3b8207db',
+ 'info_dict': {
+ 'id': '2414435309',
+ 'ext': 'mp4',
+ 'title': '22 Minutes Update: What Not To Wear Quebec',
+ 'description': "This week's latest Canadian top political story is What Not To Wear Quebec.",
+ 'upload_date': '20131025',
+ 'uploader': 'CBCC-NEW',
+ 'timestamp': 1382717907,
+ },
+ 'skip': 'No longer available',
+ }, {
+ # with clipId, feed only available via tpfeed.cbc.ca
+ 'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live',
+ 'md5': '0274a90b51a9b4971fe005c63f592f12',
+ 'info_dict': {
+ 'id': '2487345465',
+ 'ext': 'mp4',
+ 'title': 'Robin Williams freestyles on 90 Minutes Live',
+ 'description': 'Wacky American comedian Robin Williams shows off his infamous "freestyle" comedic talents while being interviewed on CBC\'s 90 Minutes Live.',
+ 'upload_date': '19780210',
+ 'uploader': 'CBCC-NEW',
+ 'timestamp': 255977160,
+ },
+ 'skip': '404 Not Found',
+ }, {
+ # multiple iframes
+ 'url': 'http://www.cbc.ca/natureofthings/blog/birds-eye-view-from-vancouvers-burrard-street-bridge-how-we-got-the-shot',
+ 'playlist': [{
+ 'md5': '377572d0b49c4ce0c9ad77470e0b96b4',
+ 'info_dict': {
+ 'id': '2680832926',
+ 'ext': 'mp4',
+ 'title': 'An Eagle\'s-Eye View Off Burrard Bridge',
+ 'description': 'Hercules the eagle flies from Vancouver\'s Burrard Bridge down to a nearby park with a mini-camera strapped to his back.',
+ 'upload_date': '20160201',
+ 'timestamp': 1454342820,
+ 'uploader': 'CBCC-NEW',
+ },
+ }, {
+ 'md5': '415a0e3f586113894174dfb31aa5bb1a',
+ 'info_dict': {
+ 'id': '2658915080',
+ 'ext': 'mp4',
+ 'title': 'Fly like an eagle!',
+ 'description': 'Eagle equipped with a mini camera flies from the world\'s tallest tower',
+ 'upload_date': '20150315',
+ 'timestamp': 1426443984,
+ 'uploader': 'CBCC-NEW',
+ },
+ }],
+ 'skip': 'Geo-restricted to Canada',
+ }, {
+ # multiple CBC.APP.Caffeine.initInstance(...)
+ 'url': 'http://www.cbc.ca/news/canada/calgary/dog-indoor-exercise-winter-1.3928238',
+ 'info_dict': {
+ 'title': 'Keep Rover active during the deep freeze with doggie pushups and other fun indoor tasks', # FIXME
+ 'id': 'dog-indoor-exercise-winter-1.3928238',
+ 'description': 'md5:c18552e41726ee95bd75210d1ca9194c',
+ },
+ 'playlist_mincount': 6,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if CBCPlayerIE.suitable(url) else super(CBCIE, cls).suitable(url)
+
+ def _extract_player_init(self, player_init, display_id):
+ player_info = self._parse_json(player_init, display_id, js_to_json)
+ media_id = player_info.get('mediaId')
+ if not media_id:
+ clip_id = player_info['clipId']
+ feed = self._download_json(
+ 'http://tpfeed.cbc.ca/f/ExhSPC/vms_5akSXx4Ng_Zn?byCustomValue={:mpsReleases}{%s}' % clip_id,
+ clip_id, fatal=False)
+ if feed:
+ media_id = try_get(feed, lambda x: x['entries'][0]['guid'], compat_str)
+ if not media_id:
+ media_id = self._download_json(
+ 'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id,
+ clip_id)['entries'][0]['id'].split('/')[-1]
+ return self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ title = (self._og_search_title(webpage, default=None)
+ or self._html_search_meta('twitter:title', webpage, 'title', default=None)
+ or self._html_extract_title(webpage))
+ entries = [
+ self._extract_player_init(player_init, display_id)
+ for player_init in re.findall(r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage)]
+ media_ids = []
+ for media_id_re in (
+ r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"',
+ r'<div[^>]+\bid=["\']player-(\d+)',
+ r'guid["\']\s*:\s*["\'](\d+)'):
+ media_ids.extend(re.findall(media_id_re, webpage))
+ entries.extend([
+ self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
+ for media_id in orderedSet(media_ids)])
+ return self.playlist_result(
+ entries, display_id, strip_or_none(title),
+ self._og_search_description(webpage))
+
+
+class CBCPlayerIE(InfoExtractor):
+ IE_NAME = 'cbc.ca:player'
+ _VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://www.cbc.ca/player/play/2683190193',
+ 'md5': '64d25f841ddf4ddb28a235338af32e2c',
+ 'info_dict': {
+ 'id': '2683190193',
+ 'ext': 'mp4',
+ 'title': 'Gerry Runs a Sweat Shop',
+ 'description': 'md5:b457e1c01e8ff408d9d801c1c2cd29b0',
+ 'timestamp': 1455071400,
+ 'upload_date': '20160210',
+ 'uploader': 'CBCC-NEW',
+ },
+ 'skip': 'Geo-restricted to Canada and no longer available',
+ }, {
+ # Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/
+ 'url': 'http://www.cbc.ca/player/play/2657631896',
+ 'md5': 'e5e708c34ae6fca156aafe17c43e8b75',
+ 'info_dict': {
+ 'id': '2657631896',
+ 'ext': 'mp3',
+ 'title': 'CBC Montreal is organizing its first ever community hackathon!',
+ 'description': 'The modern technology we tend to depend on so heavily, is never without it\'s share of hiccups and headaches. Next weekend - CBC Montreal will be getting members of the public for its first Hackathon.',
+ 'timestamp': 1425704400,
+ 'upload_date': '20150307',
+ 'uploader': 'CBCC-NEW',
+ 'thumbnail': 'http://thumbnails.cbc.ca/maven_legacy/thumbnails/sonali-karnick-220.jpg',
+ 'chapters': [],
+ 'duration': 494.811,
+ 'categories': ['AudioMobile/All in a Weekend Montreal'],
+ 'tags': 'count:8',
+ 'location': 'Quebec',
+ 'series': 'All in a Weekend Montreal',
+ 'season': 'Season 2015',
+ 'season_number': 2015,
+ 'media_type': 'Excerpt',
+ },
+ }, {
+ 'url': 'http://www.cbc.ca/player/play/2164402062',
+ 'md5': '33fcd8f6719b9dd60a5e73adcb83b9f6',
+ 'info_dict': {
+ 'id': '2164402062',
+ 'ext': 'mp4',
+ 'title': 'Cancer survivor four times over',
+ 'description': 'Tim Mayer has beaten three different forms of cancer four times in five years.',
+ 'timestamp': 1320410746,
+ 'upload_date': '20111104',
+ 'uploader': 'CBCC-NEW',
+ 'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/277/67/cancer_852x480_2164412612.jpg',
+ 'chapters': [],
+ 'duration': 186.867,
+ 'series': 'CBC News: Windsor at 6:00',
+ 'categories': ['News/Canada/Windsor'],
+ 'location': 'Windsor',
+ 'tags': ['cancer'],
+ 'creator': 'Allison Johnson',
+ 'media_type': 'Excerpt',
+ },
+ }, {
+ # Has subtitles
+ # These broadcasts expire after ~1 month, can find new test URL here:
+ # https://www.cbc.ca/player/news/TV%20Shows/The%20National/Latest%20Broadcast
+ 'url': 'http://www.cbc.ca/player/play/2284799043667',
+ 'md5': '9b49f0839e88b6ec0b01d840cf3d42b5',
+ 'info_dict': {
+ 'id': '2284799043667',
+ 'ext': 'mp4',
+ 'title': 'The National | Hockey coach charged, Green grants, Safer drugs',
+ 'description': 'md5:84ef46321c94bcf7d0159bb565d26bfa',
+ 'timestamp': 1700272800,
+ 'duration': 2718.833,
+ 'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]},
+ 'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/907/171/thumbnail.jpeg',
+ 'uploader': 'CBCC-NEW',
+ 'chapters': 'count:5',
+ 'upload_date': '20231118',
+ 'categories': 'count:4',
+ 'series': 'The National - Full Show',
+ 'tags': 'count:1',
+ 'creator': 'News',
+ 'location': 'Canada',
+ 'media_type': 'Full Program',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': 'ThePlatform',
+ 'url': smuggle_url(
+ 'http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/%s?mbr=true&formats=MPEG4,FLV,MP3' % video_id, {
+ 'force_smil_url': True
+ }),
+ 'id': video_id,
+ '_format_sort_fields': ('res', 'proto') # Prioritize direct http formats over HLS
+ }
+
+
+class CBCPlayerPlaylistIE(InfoExtractor):
+ IE_NAME = 'cbc.ca:player:playlist'
+ _VALID_URL = r'https?://(?:www\.)?cbc\.ca/(?:player/)(?!play/)(?P<id>[^?#]+)'
+ _TESTS = [{
+ 'url': 'https://www.cbc.ca/player/news/TV%20Shows/The%20National/Latest%20Broadcast',
+ 'playlist_mincount': 25,
+ 'info_dict': {
+ 'id': 'news/tv shows/the national/latest broadcast',
+ }
+ }, {
+ 'url': 'https://www.cbc.ca/player/news/Canada/North',
+ 'playlist_mincount': 25,
+ 'info_dict': {
+ 'id': 'news/canada/north',
+ }
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = urllib.parse.unquote(self._match_id(url)).lower()
+ webpage = self._download_webpage(url, playlist_id)
+ json_content = self._search_json(
+ r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', playlist_id)
+
+ def entries():
+ for video_id in traverse_obj(json_content, (
+ 'video', 'clipsByCategory', lambda k, _: k.lower() == playlist_id, 'items', ..., 'id'
+ )):
+ yield self.url_result(f'https://www.cbc.ca/player/play/{video_id}', CBCPlayerIE)
+
+ return self.playlist_result(entries(), playlist_id)
+
+
+class CBCGemIE(InfoExtractor):
+ IE_NAME = 'gem.cbc.ca'
+ _VALID_URL = r'https?://gem\.cbc\.ca/(?:media/)?(?P<id>[0-9a-z-]+/s[0-9]+[a-z][0-9]+)'
+ _TESTS = [{
+ # This is a normal, public, TV show video
+ 'url': 'https://gem.cbc.ca/media/schitts-creek/s06e01',
+ 'md5': '93dbb31c74a8e45b378cf13bd3f6f11e',
+ 'info_dict': {
+ 'id': 'schitts-creek/s06e01',
+ 'ext': 'mp4',
+ 'title': 'Smoke Signals',
+ 'description': 'md5:929868d20021c924020641769eb3e7f1',
+ 'thumbnail': 'https://images.radio-canada.ca/v1/synps-cbc/episode/perso/cbc_schitts_creek_season_06e01_thumbnail_v01.jpg?im=Resize=(Size)',
+ 'duration': 1314,
+ 'categories': ['comedy'],
+ 'series': 'Schitt\'s Creek',
+ 'season': 'Season 6',
+ 'season_number': 6,
+ 'episode': 'Smoke Signals',
+ 'episode_number': 1,
+ 'episode_id': 'schitts-creek/s06e01',
+ },
+ 'params': {'format': 'bv'},
+ 'skip': 'Geo-restricted to Canada',
+ }, {
+ # This video requires an account in the browser, but works fine in yt-dlp
+ 'url': 'https://gem.cbc.ca/media/schitts-creek/s01e01',
+ 'md5': '297a9600f554f2258aed01514226a697',
+ 'info_dict': {
+ 'id': 'schitts-creek/s01e01',
+ 'ext': 'mp4',
+ 'title': 'The Cup Runneth Over',
+ 'description': 'md5:9bca14ea49ab808097530eb05a29e797',
+ 'thumbnail': 'https://images.radio-canada.ca/v1/synps-cbc/episode/perso/cbc_schitts_creek_season_01e01_thumbnail_v01.jpg?im=Resize=(Size)',
+ 'series': 'Schitt\'s Creek',
+ 'season_number': 1,
+ 'season': 'Season 1',
+ 'episode_number': 1,
+ 'episode': 'The Cup Runneth Over',
+ 'episode_id': 'schitts-creek/s01e01',
+ 'duration': 1309,
+ 'categories': ['comedy'],
+ },
+ 'params': {'format': 'bv'},
+ 'skip': 'Geo-restricted to Canada',
+ }, {
+ 'url': 'https://gem.cbc.ca/nadiyas-family-favourites/s01e01',
+ 'only_matching': True,
+ }]
+
+ _GEO_COUNTRIES = ['CA']
+ _TOKEN_API_KEY = '3f4beddd-2061-49b0-ae80-6f1f2ed65b37'
+ _NETRC_MACHINE = 'cbcgem'
+ _claims_token = None
+
+ def _new_claims_token(self, email, password):
+ data = json.dumps({
+ 'email': email,
+ 'password': password,
+ }).encode()
+ headers = {'content-type': 'application/json'}
+ query = {'apikey': self._TOKEN_API_KEY}
+ resp = self._download_json('https://api.loginradius.com/identity/v2/auth/login',
+ None, data=data, headers=headers, query=query)
+ access_token = resp['access_token']
+
+ query = {
+ 'access_token': access_token,
+ 'apikey': self._TOKEN_API_KEY,
+ 'jwtapp': 'jwt',
+ }
+ resp = self._download_json('https://cloud-api.loginradius.com/sso/jwt/api/token',
+ None, headers=headers, query=query)
+ sig = resp['signature']
+
+ data = json.dumps({'jwt': sig}).encode()
+ headers = {'content-type': 'application/json', 'ott-device-type': 'web'}
+ resp = self._download_json('https://services.radio-canada.ca/ott/cbc-api/v2/token',
+ None, data=data, headers=headers, expected_status=426)
+ cbc_access_token = resp['accessToken']
+
+ headers = {'content-type': 'application/json', 'ott-device-type': 'web', 'ott-access-token': cbc_access_token}
+ resp = self._download_json('https://services.radio-canada.ca/ott/cbc-api/v2/profile',
+ None, headers=headers, expected_status=426)
+ return resp['claimsToken']
+
+ def _get_claims_token_expiry(self):
+ # Token is a JWT
+ # JWT is decoded here and 'exp' field is extracted
+ # It is a Unix timestamp for when the token expires
+ b64_data = self._claims_token.split('.')[1]
+ data = base64.urlsafe_b64decode(b64_data + "==")
+ return json.loads(data)['exp']
+
+ def claims_token_expired(self):
+ exp = self._get_claims_token_expiry()
+ if exp - time.time() < 10:
+ # It will expire in less than 10 seconds, or has already expired
+ return True
+ return False
+
+ def claims_token_valid(self):
+ return self._claims_token is not None and not self.claims_token_expired()
+
+ def _get_claims_token(self, email, password):
+ if not self.claims_token_valid():
+ self._claims_token = self._new_claims_token(email, password)
+ self.cache.store(self._NETRC_MACHINE, 'claims_token', self._claims_token)
+ return self._claims_token
+
+ def _real_initialize(self):
+ if self.claims_token_valid():
+ return
+ self._claims_token = self.cache.load(self._NETRC_MACHINE, 'claims_token')
+
+ def _find_secret_formats(self, formats, video_id):
+ """ Find a valid video url and convert it to the secret variant """
+ base_format = next((f for f in formats if f.get('vcodec') != 'none'), None)
+ if not base_format:
+ return
+
+ base_url = re.sub(r'(Manifest\(.*?),filter=[\w-]+(.*?\))', r'\1\2', base_format['url'])
+ url = re.sub(r'(Manifest\(.*?),format=[\w-]+(.*?\))', r'\1\2', base_url)
+
+ secret_xml = self._download_xml(url, video_id, note='Downloading secret XML', fatal=False)
+ if not isinstance(secret_xml, xml.etree.ElementTree.Element):
+ return
+
+ for child in secret_xml:
+ if child.attrib.get('Type') != 'video':
+ continue
+ for video_quality in child:
+ bitrate = int_or_none(video_quality.attrib.get('Bitrate'))
+ if not bitrate or 'Index' not in video_quality.attrib:
+ continue
+ height = int_or_none(video_quality.attrib.get('MaxHeight'))
+
+ yield {
+ **base_format,
+ 'format_id': join_nonempty('sec', height),
+ # Note: \g<1> is necessary instead of \1 since bitrate is a number
+ 'url': re.sub(r'(QualityLevels\()\d+(\))', fr'\g<1>{bitrate}\2', base_url),
+ 'width': int_or_none(video_quality.attrib.get('MaxWidth')),
+ 'tbr': bitrate / 1000.0,
+ 'height': height,
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video_info = self._download_json(
+ f'https://services.radio-canada.ca/ott/cbc-api/v2/assets/{video_id}',
+ video_id, expected_status=426)
+
+ email, password = self._get_login_info()
+ if email and password:
+ claims_token = self._get_claims_token(email, password)
+ headers = {'x-claims-token': claims_token}
+ else:
+ headers = {}
+ m3u8_info = self._download_json(video_info['playSession']['url'], video_id, headers=headers)
+ m3u8_url = m3u8_info.get('url')
+
+ if m3u8_info.get('errorCode') == 1:
+ self.raise_geo_restricted(countries=['CA'])
+ elif m3u8_info.get('errorCode') == 35:
+ self.raise_login_required(method='password')
+ elif m3u8_info.get('errorCode') != 0:
+ raise ExtractorError(f'{self.IE_NAME} said: {m3u8_info.get("errorCode")} - {m3u8_info.get("message")}')
+
+ formats = self._extract_m3u8_formats(m3u8_url, video_id, m3u8_id='hls')
+ self._remove_duplicate_formats(formats)
+ formats.extend(self._find_secret_formats(formats, video_id))
+
+ for format in formats:
+ if format.get('vcodec') == 'none':
+ if format.get('ext') is None:
+ format['ext'] = 'm4a'
+ if format.get('acodec') is None:
+ format['acodec'] = 'mp4a.40.2'
+
+ # Put described audio at the beginning of the list, so that it
+ # isn't chosen by default, as most people won't want it.
+ if 'descriptive' in format['format_id'].lower():
+ format['preference'] = -2
+
+ return {
+ 'id': video_id,
+ 'title': video_info['title'],
+ 'description': video_info.get('description'),
+ 'thumbnail': video_info.get('image'),
+ 'series': video_info.get('series'),
+ 'season_number': video_info.get('season'),
+ 'season': f'Season {video_info.get("season")}',
+ 'episode_number': video_info.get('episode'),
+ 'episode': video_info.get('title'),
+ 'episode_id': video_id,
+ 'duration': video_info.get('duration'),
+ 'categories': [video_info.get('category')],
+ 'formats': formats,
+ 'release_timestamp': video_info.get('airDate'),
+ 'timestamp': video_info.get('availableDate'),
+ }
+
+
+class CBCGemPlaylistIE(InfoExtractor):
+ IE_NAME = 'gem.cbc.ca:playlist'
+ _VALID_URL = r'https?://gem\.cbc\.ca/(?:media/)?(?P<id>(?P<show>[0-9a-z-]+)/s(?P<season>[0-9]+))/?(?:[?#]|$)'
+ _TESTS = [{
+ # TV show playlist, all public videos
+ 'url': 'https://gem.cbc.ca/media/schitts-creek/s06',
+ 'playlist_count': 16,
+ 'info_dict': {
+ 'id': 'schitts-creek/s06',
+ 'title': 'Season 6',
+ 'description': 'md5:6a92104a56cbeb5818cc47884d4326a2',
+ 'series': 'Schitt\'s Creek',
+ 'season_number': 6,
+ 'season': 'Season 6',
+ 'thumbnail': 'https://images.radio-canada.ca/v1/synps-cbc/season/perso/cbc_schitts_creek_season_06_carousel_v03.jpg?impolicy=ott&im=Resize=(_Size_)&quality=75',
+ },
+ }, {
+ 'url': 'https://gem.cbc.ca/schitts-creek/s06',
+ 'only_matching': True,
+ }]
+ _API_BASE = 'https://services.radio-canada.ca/ott/cbc-api/v2/shows/'
+
+ def _real_extract(self, url):
+ match = self._match_valid_url(url)
+ season_id = match.group('id')
+ show = match.group('show')
+ show_info = self._download_json(self._API_BASE + show, season_id, expected_status=426)
+ season = int(match.group('season'))
+
+ season_info = next((s for s in show_info['seasons'] if s.get('season') == season), None)
+
+ if season_info is None:
+ raise ExtractorError(f'Couldn\'t find season {season} of {show}')
+
+ episodes = []
+ for episode in season_info['assets']:
+ episodes.append({
+ '_type': 'url_transparent',
+ 'ie_key': 'CBCGem',
+ 'url': 'https://gem.cbc.ca/media/' + episode['id'],
+ 'id': episode['id'],
+ 'title': episode.get('title'),
+ 'description': episode.get('description'),
+ 'thumbnail': episode.get('image'),
+ 'series': episode.get('series'),
+ 'season_number': episode.get('season'),
+ 'season': season_info['title'],
+ 'season_id': season_info.get('id'),
+ 'episode_number': episode.get('episode'),
+ 'episode': episode.get('title'),
+ 'episode_id': episode['id'],
+ 'duration': episode.get('duration'),
+ 'categories': [episode.get('category')],
+ })
+
+ thumbnail = None
+ tn_uri = season_info.get('image')
+ # the-national was observed to use a "data:image/png;base64"
+ # URI for their 'image' value. The image was 1x1, and is
+ # probably just a placeholder, so it is ignored.
+ if tn_uri is not None and not tn_uri.startswith('data:'):
+ thumbnail = tn_uri
+
+ return {
+ '_type': 'playlist',
+ 'entries': episodes,
+ 'id': season_id,
+ 'title': season_info['title'],
+ 'description': season_info.get('description'),
+ 'thumbnail': thumbnail,
+ 'series': show_info.get('title'),
+ 'season_number': season_info.get('season'),
+ 'season': season_info['title'],
+ }
+
+
+class CBCGemLiveIE(InfoExtractor):
+ IE_NAME = 'gem.cbc.ca:live'
+ _VALID_URL = r'https?://gem\.cbc\.ca/live(?:-event)?/(?P<id>\d+)'
+ _TESTS = [
+ {
+ 'url': 'https://gem.cbc.ca/live/920604739687',
+ 'info_dict': {
+ 'title': 'Ottawa',
+ 'description': 'The live TV channel and local programming from Ottawa',
+ 'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/CBC_OTT_VMS/Live_Channel_Static_Images/Ottawa_2880x1620.jpg',
+ 'is_live': True,
+ 'id': 'AyqZwxRqh8EH',
+ 'ext': 'mp4',
+ 'timestamp': 1492106160,
+ 'upload_date': '20170413',
+ 'uploader': 'CBCC-NEW',
+ },
+ 'skip': 'Live might have ended',
+ },
+ {
+ 'url': 'https://gem.cbc.ca/live/44',
+ 'info_dict': {
+ 'id': '44',
+ 'ext': 'mp4',
+ 'is_live': True,
+ 'title': r're:^Ottawa [0-9\-: ]+',
+ 'description': 'The live TV channel and local programming from Ottawa',
+ 'live_status': 'is_live',
+ 'thumbnail': r're:https://images.gem.cbc.ca/v1/cbc-gem/live/.*'
+ },
+ 'params': {'skip_download': True},
+ 'skip': 'Live might have ended',
+ },
+ {
+ 'url': 'https://gem.cbc.ca/live-event/10835',
+ 'info_dict': {
+ 'id': '10835',
+ 'ext': 'mp4',
+ 'is_live': True,
+ 'title': r're:^The National \| Biden’s trip wraps up, Paltrow testifies, Bird flu [0-9\-: ]+',
+ 'description': 'March 24, 2023 | President Biden’s Ottawa visit ends with big pledges from both countries. Plus, Gwyneth Paltrow testifies in her ski collision trial.',
+ 'live_status': 'is_live',
+ 'thumbnail': r're:https://images.gem.cbc.ca/v1/cbc-gem/live/.*',
+ 'timestamp': 1679706000,
+ 'upload_date': '20230325',
+ },
+ 'params': {'skip_download': True},
+ 'skip': 'Live might have ended',
+ }
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ video_info = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['data']
+
+ # Two types of metadata JSON
+ if not video_info.get('formattedIdMedia'):
+ video_info = traverse_obj(
+ video_info, (('freeTv', ('streams', ...)), 'items', lambda _, v: v['key'] == video_id, {dict}),
+ get_all=False, default={})
+
+ video_stream_id = video_info.get('formattedIdMedia')
+ if not video_stream_id:
+ raise ExtractorError('Couldn\'t find video metadata, maybe this livestream is now offline', expected=True)
+
+ stream_data = self._download_json(
+ 'https://services.radio-canada.ca/media/validation/v2/', video_id, query={
+ 'appCode': 'mpx',
+ 'connectionType': 'hd',
+ 'deviceType': 'ipad',
+ 'idMedia': video_stream_id,
+ 'multibitrate': 'true',
+ 'output': 'json',
+ 'tech': 'hls',
+ 'manifestType': 'desktop',
+ })
+
+ return {
+ 'id': video_id,
+ 'formats': self._extract_m3u8_formats(stream_data['url'], video_id, 'mp4', live=True),
+ 'is_live': True,
+ **traverse_obj(video_info, {
+ 'title': 'title',
+ 'description': 'description',
+ 'thumbnail': ('images', 'card', 'url'),
+ 'timestamp': ('airDate', {parse_iso8601}),
+ })
+ }
diff --git a/yt_dlp/extractor/cbs.py b/yt_dlp/extractor/cbs.py
new file mode 100644
index 0000000..cf83021
--- /dev/null
+++ b/yt_dlp/extractor/cbs.py
@@ -0,0 +1,280 @@
+from .brightcove import BrightcoveNewIE
+from .common import InfoExtractor
+from .theplatform import ThePlatformFeedIE
+from .youtube import YoutubeIE
+from ..utils import (
+ ExtractorError,
+ extract_attributes,
+ get_element_html_by_id,
+ int_or_none,
+ find_xpath_attr,
+ smuggle_url,
+ xpath_element,
+ xpath_text,
+ update_url_query,
+ url_or_none,
+)
+
+
+class CBSBaseIE(ThePlatformFeedIE): # XXX: Do not subclass from concrete IE
+ def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
+ subtitles = {}
+ for k, ext in [('sMPTE-TTCCURL', 'tt'), ('ClosedCaptionURL', 'ttml'), ('webVTTCaptionURL', 'vtt')]:
+ cc_e = find_xpath_attr(smil, self._xpath_ns('.//param', namespace), 'name', k)
+ if cc_e is not None:
+ cc_url = cc_e.get('value')
+ if cc_url:
+ subtitles.setdefault(subtitles_lang, []).append({
+ 'ext': ext,
+ 'url': cc_url,
+ })
+ return subtitles
+
+ def _extract_common_video_info(self, content_id, asset_types, mpx_acc, extra_info):
+ tp_path = 'dJ5BDC/media/guid/%d/%s' % (mpx_acc, content_id)
+ tp_release_url = f'https://link.theplatform.com/s/{tp_path}'
+ info = self._extract_theplatform_metadata(tp_path, content_id)
+
+ formats, subtitles = [], {}
+ last_e = None
+ for asset_type, query in asset_types.items():
+ try:
+ tp_formats, tp_subtitles = self._extract_theplatform_smil(
+ update_url_query(tp_release_url, query), content_id,
+ 'Downloading %s SMIL data' % asset_type)
+ except ExtractorError as e:
+ last_e = e
+ if asset_type != 'fallback':
+ continue
+ query['formats'] = '' # blank query to check if expired
+ try:
+ tp_formats, tp_subtitles = self._extract_theplatform_smil(
+ update_url_query(tp_release_url, query), content_id,
+ 'Downloading %s SMIL data, trying again with another format' % asset_type)
+ except ExtractorError as e:
+ last_e = e
+ continue
+ formats.extend(tp_formats)
+ subtitles = self._merge_subtitles(subtitles, tp_subtitles)
+ if last_e and not formats:
+ self.raise_no_formats(last_e, True, content_id)
+
+ extra_info.update({
+ 'id': content_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ })
+ info.update({k: v for k, v in extra_info.items() if v is not None})
+ return info
+
+ def _extract_video_info(self, *args, **kwargs):
+ # Extract assets + metadata and call _extract_common_video_info
+ raise NotImplementedError('This method must be implemented by subclasses')
+
+ def _real_extract(self, url):
+ return self._extract_video_info(self._match_id(url))
+
+
+class CBSIE(CBSBaseIE):
+ _WORKING = False
+ _VALID_URL = r'''(?x)
+ (?:
+ cbs:|
+ https?://(?:www\.)?(?:
+ cbs\.com/(?:shows|movies)/(?:video|[^/]+/video|[^/]+)/|
+ colbertlateshow\.com/(?:video|podcasts)/)
+ )(?P<id>[\w-]+)'''
+
+ # All tests are blocked outside US
+ _TESTS = [{
+ 'url': 'https://www.cbs.com/shows/video/xrUyNLtl9wd8D_RWWAg9NU2F_V6QpB3R/',
+ 'info_dict': {
+ 'id': 'xrUyNLtl9wd8D_RWWAg9NU2F_V6QpB3R',
+ 'ext': 'mp4',
+ 'title': 'Tough As Nails - Dreams Never Die',
+ 'description': 'md5:a3535a62531cdd52b0364248a2c1ae33',
+ 'duration': 2588,
+ 'timestamp': 1639015200,
+ 'upload_date': '20211209',
+ 'uploader': 'CBSI-NEW',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'skip': 'Subscription required',
+ }, {
+ 'url': 'https://www.cbs.com/shows/video/sZH1MGgomIosZgxGJ1l263MFq16oMtW1/',
+ 'info_dict': {
+ 'id': 'sZH1MGgomIosZgxGJ1l263MFq16oMtW1',
+ 'title': 'The Late Show - 3/16/22 (Michael Buble, Rose Matafeo)',
+ 'timestamp': 1647488100,
+ 'description': 'md5:d0e6ec23c544b7fa8e39a8e6844d2439',
+ 'uploader': 'CBSI-NEW',
+ 'upload_date': '20220317',
+ },
+ 'params': {
+ 'ignore_no_formats_error': True,
+ 'skip_download': True,
+ },
+ 'expected_warnings': [
+ 'This content expired on', 'No video formats found', 'Requested format is not available'],
+ 'skip': '404 Not Found',
+ }, {
+ 'url': 'http://colbertlateshow.com/video/8GmB0oY0McANFvp2aEffk9jZZZ2YyXxy/the-colbeard/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/',
+ 'only_matching': True,
+ }]
+
+ def _extract_video_info(self, content_id, site='cbs', mpx_acc=2198311517):
+ items_data = self._download_xml(
+ 'https://can.cbs.com/thunder/player/videoPlayerService.php',
+ content_id, query={'partner': site, 'contentId': content_id})
+ video_data = xpath_element(items_data, './/item')
+ title = xpath_text(video_data, 'videoTitle', 'title') or xpath_text(video_data, 'videotitle', 'title')
+
+ asset_types = {}
+ has_drm = False
+ for item in items_data.findall('.//item'):
+ asset_type = xpath_text(item, 'assetType')
+ query = {
+ 'mbr': 'true',
+ 'assetTypes': asset_type,
+ }
+ if not asset_type:
+ # fallback for content_ids that videoPlayerService doesn't return anything for
+ asset_type = 'fallback'
+ query['formats'] = 'M3U+none,MPEG4,M3U+appleHlsEncryption,MP3'
+ del query['assetTypes']
+ if asset_type in asset_types:
+ continue
+ elif any(excluded in asset_type for excluded in ('HLS_FPS', 'DASH_CENC', 'OnceURL')):
+ if 'DASH_CENC' in asset_type:
+ has_drm = True
+ continue
+ if asset_type.startswith('HLS') or 'StreamPack' in asset_type:
+ query['formats'] = 'MPEG4,M3U'
+ elif asset_type in ('RTMP', 'WIFI', '3G'):
+ query['formats'] = 'MPEG4,FLV'
+ asset_types[asset_type] = query
+
+ if not asset_types and has_drm:
+ self.report_drm(content_id)
+
+ return self._extract_common_video_info(content_id, asset_types, mpx_acc, extra_info={
+ 'title': title,
+ 'series': xpath_text(video_data, 'seriesTitle'),
+ 'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')),
+ 'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')),
+ 'duration': int_or_none(xpath_text(video_data, 'videoLength'), 1000),
+ 'thumbnail': url_or_none(xpath_text(video_data, 'previewImageURL')),
+ })
+
+
+class ParamountPressExpressIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?paramountpressexpress\.com(?:/[\w-]+)+/(?P<yt>yt-)?video/?\?watch=(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://www.paramountpressexpress.com/cbs-entertainment/shows/survivor/video/?watch=pnzew7e2hx',
+ 'md5': '56631dbcadaab980d1fc47cb7b76cba4',
+ 'info_dict': {
+ 'id': '6322981580112',
+ 'ext': 'mp4',
+ 'title': 'I’m Felicia',
+ 'description': 'md5:88fad93f8eede1c9c8f390239e4c6290',
+ 'uploader_id': '6055873637001',
+ 'upload_date': '20230320',
+ 'timestamp': 1679334960,
+ 'duration': 49.557,
+ 'thumbnail': r're:^https://.+\.jpg',
+ 'tags': [],
+ },
+ }, {
+ 'url': 'https://www.paramountpressexpress.com/cbs-entertainment/video/?watch=2s5eh8kppc',
+ 'md5': 'edcb03e3210b88a3e56c05aa863e0e5b',
+ 'info_dict': {
+ 'id': '6323036027112',
+ 'ext': 'mp4',
+ 'title': '‘Y&R’ Set Visit: Jerry O’Connell Quizzes Cast on Pre-Love Scene Rituals and More',
+ 'description': 'md5:b929867a357aac5544b783d834c78383',
+ 'uploader_id': '6055873637001',
+ 'upload_date': '20230321',
+ 'timestamp': 1679430180,
+ 'duration': 132.032,
+ 'thumbnail': r're:^https://.+\.jpg',
+ 'tags': [],
+ },
+ }, {
+ 'url': 'https://www.paramountpressexpress.com/paramount-plus/yt-video/?watch=OX9wJWOcqck',
+ 'info_dict': {
+ 'id': 'OX9wJWOcqck',
+ 'ext': 'mp4',
+ 'title': 'Rugrats | Season 2 Official Trailer | Paramount+',
+ 'description': 'md5:1f7e26f5625a9f0d6564d9ad97a9f7de',
+ 'uploader': 'Paramount Plus',
+ 'uploader_id': '@paramountplus',
+ 'uploader_url': 'http://www.youtube.com/@paramountplus',
+ 'channel': 'Paramount Plus',
+ 'channel_id': 'UCrRttZIypNTA1Mrfwo745Sg',
+ 'channel_url': 'https://www.youtube.com/channel/UCrRttZIypNTA1Mrfwo745Sg',
+ 'upload_date': '20230316',
+ 'duration': 88,
+ 'age_limit': 0,
+ 'availability': 'public',
+ 'live_status': 'not_live',
+ 'playable_in_embed': True,
+ 'view_count': int,
+ 'like_count': int,
+ 'channel_follower_count': int,
+ 'thumbnail': 'https://i.ytimg.com/vi/OX9wJWOcqck/maxresdefault.jpg',
+ 'categories': ['Entertainment'],
+ 'tags': ['Rugrats'],
+ },
+ }, {
+ 'url': 'https://www.paramountpressexpress.com/showtime/yt-video/?watch=_ljssSoDLkw',
+ 'info_dict': {
+ 'id': '_ljssSoDLkw',
+ 'ext': 'mp4',
+ 'title': 'Lavell Crawford: THEE Lavell Crawford Comedy Special Official Trailer | SHOWTIME',
+ 'description': 'md5:39581bcc3fd810209b642609f448af70',
+ 'uploader': 'SHOWTIME',
+ 'uploader_id': '@Showtime',
+ 'uploader_url': 'http://www.youtube.com/@Showtime',
+ 'channel': 'SHOWTIME',
+ 'channel_id': 'UCtwMWJr2BFPkuJTnSvCESSQ',
+ 'channel_url': 'https://www.youtube.com/channel/UCtwMWJr2BFPkuJTnSvCESSQ',
+ 'upload_date': '20230209',
+ 'duration': 49,
+ 'age_limit': 0,
+ 'availability': 'public',
+ 'live_status': 'not_live',
+ 'playable_in_embed': True,
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'channel_follower_count': int,
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/_ljssSoDLkw/maxresdefault.webp',
+ 'categories': ['People & Blogs'],
+ 'tags': 'count:27',
+ },
+ }]
+
+ def _real_extract(self, url):
+ display_id, is_youtube = self._match_valid_url(url).group('id', 'yt')
+ if is_youtube:
+ return self.url_result(display_id, YoutubeIE)
+
+ webpage = self._download_webpage(url, display_id)
+ video_id = self._search_regex(
+ r'\bvideo_id\s*=\s*["\'](\d+)["\']\s*,', webpage, 'Brightcove ID')
+ token = self._search_regex(r'\btoken\s*=\s*["\']([\w.-]+)["\']', webpage, 'token')
+
+ player = extract_attributes(get_element_html_by_id('vcbrightcoveplayer', webpage) or '')
+ account_id = player.get('data-account') or '6055873637001'
+ player_id = player.get('data-player') or 'OtLKgXlO9F'
+ embed = player.get('data-embed') or 'default'
+
+ return self.url_result(smuggle_url(
+ f'https://players.brightcove.net/{account_id}/{player_id}_{embed}/index.html?videoId={video_id}',
+ {'token': token}), BrightcoveNewIE)
diff --git a/yt_dlp/extractor/cbsnews.py b/yt_dlp/extractor/cbsnews.py
new file mode 100644
index 0000000..5a8ebb8
--- /dev/null
+++ b/yt_dlp/extractor/cbsnews.py
@@ -0,0 +1,443 @@
+import base64
+import re
+import urllib.error
+import urllib.parse
+import zlib
+
+from .anvato import AnvatoIE
+from .common import InfoExtractor
+from .paramountplus import ParamountPlusIE
+from ..networking import HEADRequest
+from ..utils import (
+ ExtractorError,
+ UserNotLive,
+ determine_ext,
+ float_or_none,
+ format_field,
+ int_or_none,
+ make_archive_id,
+ mimetype2ext,
+ parse_duration,
+ smuggle_url,
+ traverse_obj,
+ url_or_none,
+)
+
+
+class CBSNewsBaseIE(InfoExtractor):
+ _LOCALES = {
+ 'atlanta': None,
+ 'baltimore': 'BAL',
+ 'boston': 'BOS',
+ 'chicago': 'CHI',
+ 'colorado': 'DEN',
+ 'detroit': 'DET',
+ 'losangeles': 'LA',
+ 'miami': 'MIA',
+ 'minnesota': 'MIN',
+ 'newyork': 'NY',
+ 'philadelphia': 'PHI',
+ 'pittsburgh': 'PIT',
+ 'sacramento': 'SAC',
+ 'sanfrancisco': 'SF',
+ 'texas': 'DAL',
+ }
+ _LOCALE_RE = '|'.join(map(re.escape, _LOCALES))
+ _ANVACK = '5VD6Eyd6djewbCmNwBFnsJj17YAvGRwl'
+
+ def _get_item(self, webpage, display_id):
+ return traverse_obj(self._search_json(
+ r'CBSNEWS\.defaultPayload\s*=', webpage, 'payload', display_id,
+ default={}), ('items', 0, {dict})) or {}
+
+ def _get_video_url(self, item):
+ return traverse_obj(item, 'video', 'video2', expected_type=url_or_none)
+
+ def _extract_playlist(self, webpage, playlist_id):
+ entries = [self.url_result(embed_url, CBSNewsEmbedIE) for embed_url in re.findall(
+ r'<iframe[^>]+data-src="(https?://(?:www\.)?cbsnews\.com/embed/video/[^#]*#[^"]+)"', webpage)]
+ if entries:
+ return self.playlist_result(
+ entries, playlist_id, self._html_search_meta(['og:title', 'twitter:title'], webpage),
+ self._html_search_meta(['og:description', 'twitter:description', 'description'], webpage))
+
+ def _extract_video(self, item, video_url, video_id):
+ if mimetype2ext(item.get('format'), default=determine_ext(video_url)) == 'mp4':
+ formats = [{'url': video_url, 'ext': 'mp4'}]
+
+ else:
+ manifest = self._download_webpage(video_url, video_id, note='Downloading m3u8 information')
+
+ anvato_id = self._search_regex(r'anvato-(\d+)', manifest, 'Anvato ID', default=None)
+ # Prefer Anvato if available; cbsnews.com m3u8 formats are re-encoded from Anvato source
+ if anvato_id:
+ return self.url_result(
+ smuggle_url(f'anvato:{self._ANVACK}:{anvato_id}', {'token': 'default'}),
+ AnvatoIE, url_transparent=True, _old_archive_ids=[make_archive_id(self, anvato_id)])
+
+ formats, _ = self._parse_m3u8_formats_and_subtitles(
+ manifest, video_url, 'mp4', m3u8_id='hls', video_id=video_id)
+
+ def get_subtitles(subs_url):
+ return {
+ 'en': [{
+ 'url': subs_url,
+ 'ext': 'dfxp', # TTAF1
+ }],
+ } if url_or_none(subs_url) else None
+
+ episode_meta = traverse_obj(item, {
+ 'season_number': ('season', {int_or_none}),
+ 'episode_number': ('episode', {int_or_none}),
+ }) if item.get('isFullEpisode') else {}
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ **traverse_obj(item, {
+ 'title': (None, ('fulltitle', 'title')),
+ 'description': 'dek',
+ 'timestamp': ('timestamp', {lambda x: float_or_none(x, 1000)}),
+ 'duration': ('duration', {float_or_none}),
+ 'subtitles': ('captions', {get_subtitles}),
+ 'thumbnail': ('images', ('hd', 'sd'), {url_or_none}),
+ 'is_live': ('type', {lambda x: x == 'live'}),
+ }, get_all=False),
+ **episode_meta,
+ }
+
+
+class CBSNewsEmbedIE(CBSNewsBaseIE):
+ IE_NAME = 'cbsnews:embed'
+ _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/embed/video[^#]*#(?P<id>.+)'
+ _TESTS = [{
+ 'url': 'https://www.cbsnews.com/embed/video/?v=1.c9b5b61492913d6660db0b2f03579ef25e86307a#1Vb7b9s2EP5XBAHbT6Gt98PAMKTJ0se6LVjWYWtdGBR1stlIpEBSTtwi%2F%2FvuJNkNhmHdGxgM2NL57vjd6zt%2B8PngdN%2Fyg79qeGvhzN%2FLGrS%2F%2BuBLB531V28%2B%2BO7Qg7%2Fy97r2z3xZ42NW8yLhDbA0S0KWlHnIijwKWJBHZZnHBa8Cgbpdf%2F89NM9Hi9fXifhpr8sr%2FlP848tn%2BTdXycX25zh4cdX%2FvHl6PmmPqnWQv9w8Ed%2B9GjYRim07bFEqdG%2BZVHuwTm65A7bVRrYtR5lAyMox7pigF6W4k%2By91mjspGsJ%2BwVae4%2BsvdnaO1p73HkXs%2FVisUDTGm7R8IcdnOROeq%2B19qT1amhA1VJtPenoTUgrtfKc9m7Rq8dP7nnjwOB7wg7ADdNt7VX64DWAWlKhPtmDEq22g4GF99x6Dk9E8OSsankHXqPNKDxC%2FdK7MLKTircTDgsI3mmj4OBdSq64dy7fd1x577RU1rt4cvMtOaulFYOd%2FLewRWvDO9lIgXFpZSnkZmjbv5SxKTPoQXClFbpsf%2Fhbbpzs0IB3vb8KkyzJQ%2BywOAgCrMpgRrz%2BKk4fvb7kFbR4XJCu0gAdtNO7woCwZTu%2BBUs9bam%2Fds71drVerpeisgrubLjAB4nnOSkWQnfr5W6o1ku5Xpr1MgrCbL0M0vUyDtfLLK15WiYp47xKWSLyjFVpwVmVJSLIoCjSOFkv3W7oKsVliwZJcB9nwXpZ5GEQQwY8jNKqKCBrgjTLeFxgdCIpazojDgnRtn43J6kG7nZ6cAbxh0EeFFk4%2B1u867cY5u4344n%2FxXjCqAjucdTHgLKojNKmSfO8KRsOFY%2FzKEYCKEJBzv90QA9nfm9gL%2BHulaFqUkz9ULUYxl62B3U%2FRVNLA8IhggaPycOoBuwOCESciDQVSSUgiOMsROB%2FhKfwCKOzEk%2B4k6rWd4uuT%2FwTDz7K7t3d3WLO8ISD95jSPQbayBacthbz86XVgxHwhex5zawzgDOmtp%2F3GPcXn0VXHdSS029%2Fj99UC%2FwJUvyKQ%2FzKyixIEVlYJOn4RxxuaH43Ty9fbJ5OObykHH435XAzJTHeOF4hhEUXD8URe%2FQ%2FBT%2BMpf8d5GN02Ox%2FfiGsl7TA7POu1xZ5%2BbTzcAVKMe48mqcC21hkacVEVScM26liVVBnrKkC4CLKyzAvHu0lhEaTKMFwI3a4SN9MsrfYzdBLq2vkwRD1gVviLT8kY9h2CHH6Y%2Bix6609weFtey4ESp60WtyeWMy%2BsmBuhsoKIyuoT%2Bq2R%2FrW5qi3g%2FvzS2j40DoixDP8%2BKP0yUdpXJ4l6Vla%2Bg9vce%2BC4yM5YlUcbA%2F0jLKdpmTwvsdN5z88nAIe08%2F0HgxeG1iv%2B6Hlhjh7uiW0SDzYNI92L401uha3JKYk268UVRzdOzNQvAaJqoXzAc80dAV440NZ1WVVAAMRYQ2KrGJFmDUsq8saWSnjvIj8t78y%2FRa3JRnbHVfyFpfwoDiGpPgjzekyUiKNlU3OMlwuLMmzgvEojllYVE2Z1HhImvsnk%2BuhusTEoB21PAtSFodeFK3iYhXEH9WOG2%2FkOE833sfeG%2Ff5cfHtEFNXgYes0%2FXj7aGivUgJ9XpusCtoNcNYVVnJVrrDo0OmJAutHCpuZul4W9lLcfy7BnuLPT02%2ByXsCTk%2B9zhzswIN04YueNSK%2BPtM0jS88QdLqSLJDTLsuGZJNolm2yO0PXh3UPnz9Ix5bfIAqxPjvETQsDCEiPG4QbqNyhBZISxybLnZYCrW5H3Axp690%2F0BJdXtDZ5ITuM4xj3f4oUHGzc5JeJmZKpp%2FjwKh4wMV%2FV1yx3emLoR0MwbG4K%2F%2BZgVep3PnzXGDHZ6a3i%2Fk%2BJrONDN13%2Bnq6tBTYk4o7cLGhBtqCC4KwacGHpEVuoH5JNro%2FE6JfE6d5RydbiR76k%2BW5wioDHBIjw1euhHjUGRB0y5A97KoaPx6MlL%2BwgboUVtUFRI%2FLemgTpdtF59ii7pab08kuPcfWzs0l%2FRI5takWnFpka0zOgWRtYcuf9aIxZMxlwr6IiGpsb6j2DQUXPl%2FimXI599Ev7fWjoPD78A',
+ 'info_dict': {
+ 'id': '6ZP4cXvo9FaX3VLH7MF4CgY30JFpY_GA',
+ 'ext': 'mp4',
+ 'title': 'Cops investigate gorilla incident at Cincinnati Zoo',
+ 'description': 'md5:fee7441ab8aaeb3c693482394738102b',
+ 'duration': 350,
+ 'timestamp': 1464719713,
+ 'upload_date': '20160531',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }]
+
+ def _real_extract(self, url):
+ item = traverse_obj(self._parse_json(zlib.decompress(base64.b64decode(
+ urllib.parse.unquote(self._match_id(url))),
+ -zlib.MAX_WBITS).decode(), None), ('video', 'items', 0, {dict})) or {}
+
+ video_id = item['mpxRefId']
+ video_url = self._get_video_url(item)
+ if not video_url:
+ # Old embeds redirect user to ParamountPlus but most links are 404
+ pplus_url = f'https://www.paramountplus.com/shows/video/{video_id}'
+ try:
+ self._request_webpage(HEADRequest(pplus_url), video_id)
+ return self.url_result(pplus_url, ParamountPlusIE)
+ except ExtractorError:
+ self.raise_no_formats('This video is no longer available', True, video_id)
+
+ return self._extract_video(item, video_url, video_id)
+
+
+class CBSNewsIE(CBSNewsBaseIE):
+ IE_NAME = 'cbsnews'
+ IE_DESC = 'CBS News'
+ _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|video)/(?P<id>[\w-]+)'
+
+ _TESTS = [
+ {
+ # 60 minutes
+ 'url': 'http://www.cbsnews.com/news/artificial-intelligence-positioned-to-be-a-game-changer/',
+ 'info_dict': {
+ 'id': 'Y_nf_aEg6WwO9OLAq0MpKaPgfnBUxfW4',
+ 'ext': 'flv',
+ 'title': 'Artificial Intelligence, real-life applications',
+ 'description': 'md5:a7aaf27f1b4777244de8b0b442289304',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 317,
+ 'uploader': 'CBSI-NEW',
+ 'timestamp': 1476046464,
+ 'upload_date': '20161009',
+ },
+ 'skip': 'This video is no longer available',
+ },
+ {
+ 'url': 'https://www.cbsnews.com/video/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/',
+ 'info_dict': {
+ 'id': 'SNJBOYzXiWBOvaLsdzwH8fmtP1SCd91Y',
+ 'ext': 'mp4',
+ 'title': 'Fort Hood shooting: Army downplays mental illness as cause of attack',
+ 'description': 'md5:4a6983e480542d8b333a947bfc64ddc7',
+ 'upload_date': '20140404',
+ 'timestamp': 1396650660,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 205,
+ 'subtitles': {
+ 'en': [{
+ 'ext': 'dfxp',
+ }],
+ },
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ },
+ {
+ # 48 hours
+ 'url': 'http://www.cbsnews.com/news/maria-ridulph-murder-will-the-nations-oldest-cold-case-to-go-to-trial-ever-get-solved/',
+ 'info_dict': {
+ 'id': 'maria-ridulph-murder-will-the-nations-oldest-cold-case-to-go-to-trial-ever-get-solved',
+ 'title': 'Cold as Ice',
+ 'description': 'Can a childhood memory solve the 1957 murder of 7-year-old Maria Ridulph?',
+ },
+ 'playlist_mincount': 7,
+ },
+ {
+ 'url': 'https://www.cbsnews.com/video/032823-cbs-evening-news/',
+ 'info_dict': {
+ 'id': '_2wuO7hD9LwtyM_TwSnVwnKp6kxlcXgE',
+ 'ext': 'mp4',
+ 'title': 'CBS Evening News, March 28, 2023',
+ 'description': 'md5:db20615aae54adc1d55a1fd69dc75d13',
+ 'duration': 1189,
+ 'timestamp': 1680042600,
+ 'upload_date': '20230328',
+ 'season': 'Season 2023',
+ 'season_number': 2023,
+ 'episode': 'Episode 83',
+ 'episode_number': 83,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ },
+ ]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ playlist = self._extract_playlist(webpage, display_id)
+ if playlist:
+ return playlist
+
+ item = self._get_item(webpage, display_id)
+ video_id = item.get('mpxRefId') or display_id
+ video_url = self._get_video_url(item)
+ if not video_url:
+ self.raise_no_formats('No video content was found', expected=True, video_id=video_id)
+
+ return self._extract_video(item, video_url, video_id)
+
+
+class CBSLocalBaseIE(CBSNewsBaseIE):
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ item = self._get_item(webpage, display_id)
+ video_id = item.get('mpxRefId') or display_id
+ anvato_id = None
+ video_url = self._get_video_url(item)
+
+ if not video_url:
+ anv_params = self._search_regex(
+ r'<iframe[^>]+\bdata-src="https?://w3\.mp\.lura\.live/player/prod/v3/anvload\.html\?key=([^"]+)"',
+ webpage, 'Anvato URL', default=None)
+
+ if not anv_params:
+ playlist = self._extract_playlist(webpage, display_id)
+ if playlist:
+ return playlist
+ self.raise_no_formats('No video content was found', expected=True, video_id=video_id)
+
+ anv_data = self._parse_json(base64.urlsafe_b64decode(f'{anv_params}===').decode(), video_id)
+ anvato_id = anv_data['v']
+ return self.url_result(
+ smuggle_url(f'anvato:{anv_data.get("anvack") or self._ANVACK}:{anvato_id}', {
+ 'token': anv_data.get('token') or 'default',
+ }), AnvatoIE, url_transparent=True, _old_archive_ids=[make_archive_id(self, anvato_id)])
+
+ return self._extract_video(item, video_url, video_id)
+
+
+class CBSLocalIE(CBSLocalBaseIE):
+ _VALID_URL = rf'https?://(?:www\.)?cbsnews\.com/(?:{CBSNewsBaseIE._LOCALE_RE})/(?:live/)?video/(?P<id>[\w-]+)'
+ _TESTS = [{
+ # Anvato video via defaultPayload JSON
+ 'url': 'https://www.cbsnews.com/newyork/video/1st-cannabis-dispensary-opens-in-queens/',
+ 'info_dict': {
+ 'id': '6376747',
+ 'ext': 'mp4',
+ 'title': '1st cannabis dispensary opens in Queens',
+ 'description': 'The dispensary is women-owned and located in Jamaica.',
+ 'uploader': 'CBS',
+ 'duration': 20,
+ 'timestamp': 1680193657,
+ 'upload_date': '20230330',
+ 'categories': ['Stations\\Spoken Word\\WCBSTV', 'Content\\Google', 'Content\\News', 'Content\\News\\Local News'],
+ 'tags': 'count:11',
+ 'thumbnail': 're:^https?://.*',
+ '_old_archive_ids': ['cbslocal 6376747'],
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ # cbsnews.com video via defaultPayload JSON
+ 'url': 'https://www.cbsnews.com/newyork/live/video/20230330171655-the-city-is-sounding-the-alarm-on-dangerous-social-media-challenges/',
+ 'info_dict': {
+ 'id': 'sJqfw7YvgSC6ant2zVmzt3y1jYKoL5J3',
+ 'ext': 'mp4',
+ 'title': 'the city is sounding the alarm on dangerous social media challenges',
+ 'description': 'md5:8eccc9b1b73be5138a52e9c4350d2cd6',
+ 'thumbnail': 'https://images-cbsn.cbsnews.com/prod/2023/03/30/story_22509622_1680196925.jpg',
+ 'duration': 41.0,
+ 'timestamp': 1680196615,
+ 'upload_date': '20230330',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }]
+
+
+class CBSLocalArticleIE(CBSLocalBaseIE):
+ _VALID_URL = rf'https?://(?:www\.)?cbsnews\.com/(?:{CBSNewsBaseIE._LOCALE_RE})/news/(?P<id>[\w-]+)'
+ _TESTS = [{
+ # Anvato video via iframe embed
+ 'url': 'https://www.cbsnews.com/newyork/news/mta-station-agents-leaving-their-booths-to-provide-more-direct-customer-service/',
+ 'playlist_count': 2,
+ 'info_dict': {
+ 'id': 'mta-station-agents-leaving-their-booths-to-provide-more-direct-customer-service',
+ 'title': 'MTA station agents begin leaving their booths to provide more direct customer service',
+ 'description': 'The more than 2,200 agents will provide face-to-face customer service to passengers.',
+ },
+ }, {
+ 'url': 'https://www.cbsnews.com/losangeles/news/safety-advocates-say-fatal-car-seat-failures-are-public-health-crisis/',
+ 'md5': 'f0ee3081e3843f575fccef901199b212',
+ 'info_dict': {
+ 'id': '3401037',
+ 'ext': 'mp4',
+ 'title': 'Safety Advocates Say Fatal Car Seat Failures Are \'Public Health Crisis\'',
+ 'thumbnail': 're:^https?://.*',
+ 'timestamp': 1463440500,
+ 'upload_date': '20160516',
+ },
+ 'skip': 'Video has been removed',
+ }]
+
+
+class CBSNewsLiveBaseIE(CBSNewsBaseIE):
+ def _get_id(self, url):
+ raise NotImplementedError('This method must be implemented by subclasses')
+
+ def _real_extract(self, url):
+ video_id = self._get_id(url)
+ if not video_id:
+ raise ExtractorError('Livestream is not available', expected=True)
+
+ data = traverse_obj(self._download_json(
+ 'https://feeds-cbsn.cbsnews.com/2.0/rundown/', video_id, query={
+ 'partner': 'cbsnsite',
+ 'edition': video_id,
+ 'type': 'live',
+ }), ('navigation', 'data', 0, {dict}))
+
+ video_url = traverse_obj(data, (('videoUrlDAI', ('videoUrl', 'base')), {url_or_none}), get_all=False)
+ if not video_url:
+ raise UserNotLive(video_id=video_id)
+
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_url, video_id, 'mp4', m3u8_id='hls')
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'is_live': True,
+ **traverse_obj(data, {
+ 'title': 'headline',
+ 'description': 'rundown_slug',
+ 'thumbnail': ('images', 'thumbnail_url_hd', {url_or_none}),
+ }),
+ }
+
+
+class CBSLocalLiveIE(CBSNewsLiveBaseIE):
+ _VALID_URL = rf'https?://(?:www\.)?cbsnews\.com/(?P<id>{CBSNewsBaseIE._LOCALE_RE})/live/?(?:[?#]|$)'
+ _TESTS = [{
+ 'url': 'https://www.cbsnews.com/losangeles/live/',
+ 'info_dict': {
+ 'id': 'CBSN-LA',
+ 'ext': 'mp4',
+ 'title': str,
+ 'description': r're:KCBS/CBSN_LA.CRISPIN.\w+.RUNDOWN \w+ \w+',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'live_status': 'is_live',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }]
+
+ def _get_id(self, url):
+ return format_field(self._LOCALES, self._match_id(url), 'CBSN-%s')
+
+
+class CBSNewsLiveIE(CBSNewsLiveBaseIE):
+ IE_NAME = 'cbsnews:live'
+ IE_DESC = 'CBS News Livestream'
+ _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/?(?:[?#]|$)'
+ _TESTS = [{
+ 'url': 'https://www.cbsnews.com/live/',
+ 'info_dict': {
+ 'id': 'CBSN-US',
+ 'ext': 'mp4',
+ 'title': str,
+ 'description': r're:\w+ \w+ CRISPIN RUNDOWN',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'live_status': 'is_live',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }]
+
+ def _get_id(self, url):
+ return 'CBSN-US'
+
+
+class CBSNewsLiveVideoIE(InfoExtractor):
+ IE_NAME = 'cbsnews:livevideo'
+ IE_DESC = 'CBS News Live Videos'
+ _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/video/(?P<id>[^/?#]+)'
+
+ # Live videos get deleted soon. See http://www.cbsnews.com/live/ for the latest examples
+ _TESTS = [{
+ 'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/',
+ 'info_dict': {
+ 'id': 'clinton-sanders-prepare-to-face-off-in-nh',
+ 'ext': 'mp4',
+ 'title': 'Clinton, Sanders Prepare To Face Off In NH',
+ 'duration': 334,
+ },
+ 'skip': 'Video gone',
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ video_info = self._download_json(
+ 'http://feeds.cbsn.cbsnews.com/rundown/story', display_id, query={
+ 'device': 'desktop',
+ 'dvr_slug': display_id,
+ })
+
+ return {
+ 'id': display_id,
+ 'display_id': display_id,
+ 'formats': self._extract_akamai_formats(video_info['url'], display_id),
+ **traverse_obj(video_info, {
+ 'title': 'headline',
+ 'thumbnail': ('thumbnail_url_hd', {url_or_none}),
+ 'duration': ('segmentDur', {parse_duration}),
+ }),
+ }
diff --git a/yt_dlp/extractor/cbssports.py b/yt_dlp/extractor/cbssports.py
new file mode 100644
index 0000000..b9c82da
--- /dev/null
+++ b/yt_dlp/extractor/cbssports.py
@@ -0,0 +1,111 @@
+# from .cbs import CBSBaseIE
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ try_get,
+)
+
+
+# class CBSSportsEmbedIE(CBSBaseIE):
+class CBSSportsEmbedIE(InfoExtractor):
+ _WORKING = False
+ IE_NAME = 'cbssports:embed'
+ _VALID_URL = r'''(?ix)https?://(?:(?:www\.)?cbs|embed\.247)sports\.com/player/embed.+?
+ (?:
+ ids%3D(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})|
+ pcid%3D(?P<pcid>\d+)
+ )'''
+ _TESTS = [{
+ 'url': 'https://www.cbssports.com/player/embed/?args=player_id%3Db56c03a6-231a-4bbe-9c55-af3c8a8e9636%26ids%3Db56c03a6-231a-4bbe-9c55-af3c8a8e9636%26resizable%3D1%26autoplay%3Dtrue%26domain%3Dcbssports.com%26comp_ads_enabled%3Dfalse%26watchAndRead%3D0%26startTime%3D0%26env%3Dprod',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://embed.247sports.com/player/embed/?args=%3fplayer_id%3d1827823171591%26channel%3dcollege-football-recruiting%26pcid%3d1827823171591%26width%3d640%26height%3d360%26autoplay%3dTrue%26comp_ads_enabled%3dFalse%26uvpc%3dhttps%253a%252f%252fwww.cbssports.com%252fapi%252fcontent%252fvideo%252fconfig%252f%253fcfg%253duvp_247sports_v4%2526partner%253d247%26uvpc_m%3dhttps%253a%252f%252fwww.cbssports.com%252fapi%252fcontent%252fvideo%252fconfig%252f%253fcfg%253duvp_247sports_m_v4%2526partner_m%253d247_mobile%26utag%3d247sportssite%26resizable%3dTrue',
+ 'only_matching': True,
+ }]
+
+ # def _extract_video_info(self, filter_query, video_id):
+ # return self._extract_feed_info('dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id)
+
+ def _real_extract(self, url):
+ uuid, pcid = self._match_valid_url(url).groups()
+ query = {'id': uuid} if uuid else {'pcid': pcid}
+ video = self._download_json(
+ 'https://www.cbssports.com/api/content/video/',
+ uuid or pcid, query=query)[0]
+ video_id = video['id']
+ title = video['title']
+ metadata = video.get('metaData') or {}
+ # return self._extract_video_info('byId=%d' % metadata['mpxOutletId'], video_id)
+ # return self._extract_video_info('byGuid=' + metadata['mpxRefId'], video_id)
+
+ formats = self._extract_m3u8_formats(
+ metadata['files'][0]['url'], video_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False)
+
+ image = video.get('image')
+ thumbnails = None
+ if image:
+ image_path = image.get('path')
+ if image_path:
+ thumbnails = [{
+ 'url': image_path,
+ 'width': int_or_none(image.get('width')),
+ 'height': int_or_none(image.get('height')),
+ 'filesize': int_or_none(image.get('size')),
+ }]
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'description': video.get('description'),
+ 'timestamp': int_or_none(try_get(video, lambda x: x['dateCreated']['epoch'])),
+ 'duration': int_or_none(metadata.get('duration')),
+ }
+
+
+class CBSSportsBaseIE(InfoExtractor):
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ iframe_url = self._search_regex(
+ r'<iframe[^>]+(?:data-)?src="(https?://[^/]+/player/embed[^"]+)"',
+ webpage, 'embed url')
+ return self.url_result(iframe_url, CBSSportsEmbedIE.ie_key())
+
+
+class CBSSportsIE(CBSSportsBaseIE):
+ _WORKING = False
+ IE_NAME = 'cbssports'
+ _VALID_URL = r'https?://(?:www\.)?cbssports\.com/[^/]+/video/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.cbssports.com/college-football/video/cover-3-stanford-spring-gleaning/',
+ 'info_dict': {
+ 'id': 'b56c03a6-231a-4bbe-9c55-af3c8a8e9636',
+ 'ext': 'mp4',
+ 'title': 'Cover 3: Stanford Spring Gleaning',
+ 'description': 'The Cover 3 crew break down everything you need to know about the Stanford Cardinal this spring.',
+ 'timestamp': 1617218398,
+ 'upload_date': '20210331',
+ 'duration': 502,
+ },
+ }]
+
+
+class TwentyFourSevenSportsIE(CBSSportsBaseIE):
+ _WORKING = False
+ IE_NAME = '247sports'
+ _VALID_URL = r'https?://(?:www\.)?247sports\.com/Video/(?:[^/?#&]+-)?(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://247sports.com/Video/2021-QB-Jake-Garcia-senior-highlights-through-five-games-10084854/',
+ 'info_dict': {
+ 'id': '4f1265cb-c3b5-44a8-bb1d-1914119a0ccc',
+ 'ext': 'mp4',
+ 'title': '2021 QB Jake Garcia senior highlights through five games',
+ 'description': 'md5:8cb67ebed48e2e6adac1701e0ff6e45b',
+ 'timestamp': 1607114223,
+ 'upload_date': '20201204',
+ 'duration': 208,
+ },
+ }]
diff --git a/yt_dlp/extractor/ccc.py b/yt_dlp/extractor/ccc.py
new file mode 100644
index 0000000..ca6b82c
--- /dev/null
+++ b/yt_dlp/extractor/ccc.py
@@ -0,0 +1,115 @@
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+ try_get,
+ url_or_none,
+)
+
+
+class CCCIE(InfoExtractor):
+ IE_NAME = 'media.ccc.de'
+ _VALID_URL = r'https?://(?:www\.)?media\.ccc\.de/v/(?P<id>[^/?#&]+)'
+
+ _TESTS = [{
+ 'url': 'https://media.ccc.de/v/30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor#video',
+ 'md5': '3a1eda8f3a29515d27f5adb967d7e740',
+ 'info_dict': {
+ 'id': '1839',
+ 'ext': 'mp4',
+ 'title': 'Introduction to Processor Design',
+ 'creator': 'byterazor',
+ 'description': 'md5:df55f6d073d4ceae55aae6f2fd98a0ac',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20131228',
+ 'timestamp': 1388188800,
+ 'duration': 3710,
+ 'tags': list,
+ }
+ }, {
+ 'url': 'https://media.ccc.de/v/32c3-7368-shopshifting#download',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ event_id = self._search_regex(r"data-id='(\d+)'", webpage, 'event id')
+ event_data = self._download_json('https://media.ccc.de/public/events/%s' % event_id, event_id)
+
+ formats = []
+ for recording in event_data.get('recordings', []):
+ recording_url = recording.get('recording_url')
+ if not recording_url:
+ continue
+ language = recording.get('language')
+ folder = recording.get('folder')
+ format_id = None
+ if language:
+ format_id = language
+ if folder:
+ if language:
+ format_id += '-' + folder
+ else:
+ format_id = folder
+ vcodec = 'h264' if 'h264' in folder else (
+ 'none' if folder in ('mp3', 'opus') else None
+ )
+ formats.append({
+ 'format_id': format_id,
+ 'url': recording_url,
+ 'width': int_or_none(recording.get('width')),
+ 'height': int_or_none(recording.get('height')),
+ 'filesize': int_or_none(recording.get('size'), invscale=1024 * 1024),
+ 'language': language,
+ 'vcodec': vcodec,
+ })
+
+ return {
+ 'id': event_id,
+ 'display_id': display_id,
+ 'title': event_data['title'],
+ 'creator': try_get(event_data, lambda x: ', '.join(x['persons'])),
+ 'description': event_data.get('description'),
+ 'thumbnail': event_data.get('thumb_url'),
+ 'timestamp': parse_iso8601(event_data.get('date')),
+ 'duration': int_or_none(event_data.get('length')),
+ 'view_count': int_or_none(event_data.get('view_count')),
+ 'tags': event_data.get('tags'),
+ 'formats': formats,
+ }
+
+
+class CCCPlaylistIE(InfoExtractor):
+ IE_NAME = 'media.ccc.de:lists'
+ _VALID_URL = r'https?://(?:www\.)?media\.ccc\.de/c/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://media.ccc.de/c/30c3',
+ 'info_dict': {
+ 'title': '30C3',
+ 'id': '30c3',
+ },
+ 'playlist_count': 135,
+ }, {
+ 'url': 'https://media.ccc.de/c/DS2023',
+ 'info_dict': {
+ 'title': 'Datenspuren 2023',
+ 'id': 'DS2023',
+ },
+ 'playlist_count': 37
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ conf = self._download_json(
+ 'https://media.ccc.de/public/conferences/' + playlist_id,
+ playlist_id)
+
+ entries = []
+ for e in conf['events']:
+ event_url = url_or_none(e.get('frontend_link'))
+ if event_url:
+ entries.append(self.url_result(event_url, ie=CCCIE.ie_key()))
+
+ return self.playlist_result(entries, playlist_id, conf.get('title'))
diff --git a/yt_dlp/extractor/ccma.py b/yt_dlp/extractor/ccma.py
new file mode 100644
index 0000000..ab840f3
--- /dev/null
+++ b/yt_dlp/extractor/ccma.py
@@ -0,0 +1,147 @@
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ determine_ext,
+ int_or_none,
+ parse_duration,
+ parse_resolution,
+ try_get,
+ unified_timestamp,
+ url_or_none,
+)
+
+
+class CCMAIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?ccma\.cat/(?:[^/]+/)*?(?P<type>video|audio)/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://www.ccma.cat/tv3/alacarta/lespot-de-la-marato-de-tv3/lespot-de-la-marato-de-tv3/video/5630208/',
+ 'md5': '7296ca43977c8ea4469e719c609b0871',
+ 'info_dict': {
+ 'id': '5630208',
+ 'ext': 'mp4',
+ 'title': 'L\'espot de La Marató de TV3',
+ 'description': 'md5:f12987f320e2f6e988e9908e4fe97765',
+ 'timestamp': 1478608140,
+ 'upload_date': '20161108',
+ 'age_limit': 0,
+ }
+ }, {
+ 'url': 'http://www.ccma.cat/catradio/alacarta/programa/el-consell-de-savis-analitza-el-derbi/audio/943685/',
+ 'md5': 'fa3e38f269329a278271276330261425',
+ 'info_dict': {
+ 'id': '943685',
+ 'ext': 'mp3',
+ 'title': 'El Consell de Savis analitza el derbi',
+ 'description': 'md5:e2a3648145f3241cb9c6b4b624033e53',
+ 'upload_date': '20170512',
+ 'timestamp': 1494622500,
+ 'vcodec': 'none',
+ 'categories': ['Esports'],
+ }
+ }, {
+ 'url': 'http://www.ccma.cat/tv3/alacarta/crims/crims-josep-tallada-lespereu-me-capitol-1/video/6031387/',
+ 'md5': 'b43c3d3486f430f3032b5b160d80cbc3',
+ 'info_dict': {
+ 'id': '6031387',
+ 'ext': 'mp4',
+ 'title': 'Crims - Josep Talleda, l\'"Espereu-me" (capítol 1)',
+ 'description': 'md5:7cbdafb640da9d0d2c0f62bad1e74e60',
+ 'timestamp': 1582577700,
+ 'upload_date': '20200224',
+ 'subtitles': 'mincount:4',
+ 'age_limit': 16,
+ 'series': 'Crims',
+ }
+ }]
+
+ def _real_extract(self, url):
+ media_type, media_id = self._match_valid_url(url).groups()
+
+ media = self._download_json(
+ 'http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={
+ 'media': media_type,
+ 'idint': media_id,
+ 'format': 'dm',
+ })
+
+ formats = []
+ media_url = media['media']['url']
+ if isinstance(media_url, list):
+ for format_ in media_url:
+ format_url = url_or_none(format_.get('file'))
+ if not format_url:
+ continue
+ if determine_ext(format_url) == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ format_url, media_id, mpd_id='dash', fatal=False))
+ continue
+ label = format_.get('label')
+ f = parse_resolution(label)
+ f.update({
+ 'url': format_url,
+ 'format_id': label,
+ })
+ formats.append(f)
+ else:
+ formats.append({
+ 'url': media_url,
+ 'vcodec': 'none' if media_type == 'audio' else None,
+ })
+
+ informacio = media['informacio']
+ title = informacio['titol']
+ durada = informacio.get('durada') or {}
+ duration = int_or_none(durada.get('milisegons'), 1000) or parse_duration(durada.get('text'))
+ tematica = try_get(informacio, lambda x: x['tematica']['text'])
+
+ data_utc = try_get(informacio, lambda x: x['data_emissio']['utc'])
+ timestamp = unified_timestamp(data_utc)
+
+ subtitles = {}
+ subtitols = media.get('subtitols') or []
+ if isinstance(subtitols, dict):
+ subtitols = [subtitols]
+ for st in subtitols:
+ sub_url = st.get('url')
+ if sub_url:
+ subtitles.setdefault(
+ st.get('iso') or st.get('text') or 'ca', []).append({
+ 'url': sub_url,
+ })
+
+ thumbnails = []
+ imatges = media.get('imatges', {})
+ if imatges:
+ thumbnail_url = imatges.get('url')
+ if thumbnail_url:
+ thumbnails = [{
+ 'url': thumbnail_url,
+ 'width': int_or_none(imatges.get('amplada')),
+ 'height': int_or_none(imatges.get('alcada')),
+ }]
+
+ age_limit = None
+ codi_etic = try_get(informacio, lambda x: x['codi_etic']['id'])
+ if codi_etic:
+ codi_etic_s = codi_etic.split('_')
+ if len(codi_etic_s) == 2:
+ if codi_etic_s[1] == 'TP':
+ age_limit = 0
+ else:
+ age_limit = int_or_none(codi_etic_s[1])
+
+ return {
+ 'id': media_id,
+ 'title': title,
+ 'description': clean_html(informacio.get('descripcio')),
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'thumbnails': thumbnails,
+ 'subtitles': subtitles,
+ 'formats': formats,
+ 'age_limit': age_limit,
+ 'alt_title': informacio.get('titol_complet'),
+ 'episode_number': int_or_none(informacio.get('capitol')),
+ 'categories': [tematica] if tematica else None,
+ 'series': informacio.get('programa'),
+ }
diff --git a/yt_dlp/extractor/cctv.py b/yt_dlp/extractor/cctv.py
new file mode 100644
index 0000000..8552ee5
--- /dev/null
+++ b/yt_dlp/extractor/cctv.py
@@ -0,0 +1,201 @@
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ float_or_none,
+ try_get,
+ unified_timestamp,
+)
+
+
+class CCTVIE(InfoExtractor):
+ IE_DESC = '央视网'
+ _VALID_URL = r'https?://(?:(?:[^/]+)\.(?:cntv|cctv)\.(?:com|cn)|(?:www\.)?ncpa-classic\.com)/(?:[^/]+/)*?(?P<id>[^/?#&]+?)(?:/index)?(?:\.s?html|[?#&]|$)'
+ _TESTS = [{
+ # fo.addVariable("videoCenterId","id")
+ 'url': 'http://sports.cntv.cn/2016/02/12/ARTIaBRxv4rTT1yWf1frW2wi160212.shtml',
+ 'md5': 'd61ec00a493e09da810bf406a078f691',
+ 'info_dict': {
+ 'id': '5ecdbeab623f4973b40ff25f18b174e8',
+ 'ext': 'mp4',
+ 'title': '[NBA]二少联手砍下46分 雷霆主场击败鹈鹕(快讯)',
+ 'description': 'md5:7e14a5328dc5eb3d1cd6afbbe0574e95',
+ 'duration': 98,
+ 'uploader': 'songjunjie',
+ 'timestamp': 1455279956,
+ 'upload_date': '20160212',
+ },
+ }, {
+ # var guid = "id"
+ 'url': 'http://tv.cctv.com/2016/02/05/VIDEUS7apq3lKrHG9Dncm03B160205.shtml',
+ 'info_dict': {
+ 'id': 'efc5d49e5b3b4ab2b34f3a502b73d3ae',
+ 'ext': 'mp4',
+ 'title': '[赛车]“车王”舒马赫恢复情况成谜(快讯)',
+ 'description': '2月4日,蒙特泽莫罗透露了关于“车王”舒马赫恢复情况,但情况是否属实遭到了质疑。',
+ 'duration': 37,
+ 'uploader': 'shujun',
+ 'timestamp': 1454677291,
+ 'upload_date': '20160205',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # changePlayer('id')
+ 'url': 'http://english.cntv.cn/special/four_comprehensives/index.shtml',
+ 'info_dict': {
+ 'id': '4bb9bb4db7a6471ba85fdeda5af0381e',
+ 'ext': 'mp4',
+ 'title': 'NHnews008 ANNUAL POLITICAL SEASON',
+ 'description': 'Four Comprehensives',
+ 'duration': 60,
+ 'uploader': 'zhangyunlei',
+ 'timestamp': 1425385521,
+ 'upload_date': '20150303',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # loadvideo('id')
+ 'url': 'http://cctv.cntv.cn/lm/tvseries_russian/yilugesanghua/index.shtml',
+ 'info_dict': {
+ 'id': 'b15f009ff45c43968b9af583fc2e04b2',
+ 'ext': 'mp4',
+ 'title': 'Путь,усыпанный космеями Серия 1',
+ 'description': 'Путь, усыпанный космеями',
+ 'duration': 2645,
+ 'uploader': 'renxue',
+ 'timestamp': 1477479241,
+ 'upload_date': '20161026',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # var initMyAray = 'id'
+ 'url': 'http://www.ncpa-classic.com/2013/05/22/VIDE1369219508996867.shtml',
+ 'info_dict': {
+ 'id': 'a194cfa7f18c426b823d876668325946',
+ 'ext': 'mp4',
+ 'title': '小泽征尔音乐塾 音乐梦想无国界',
+ 'duration': 2173,
+ 'timestamp': 1369248264,
+ 'upload_date': '20130522',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # videoCenterId: "id"
+ 'url': 'http://news.cctv.com/2024/02/21/ARTIcU5tKIOIF2myEGCATkLo240221.shtml',
+ 'info_dict': {
+ 'id': '5c846c0518444308ba32c4159df3b3e0',
+ 'ext': 'mp4',
+ 'title': '《平“语”近人——习近平喜欢的典故》第三季 第5集:风物长宜放眼量',
+ 'uploader': 'yangjuan',
+ 'timestamp': 1708554940,
+ 'upload_date': '20240221',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # var ids = ["id"]
+ 'url': 'http://www.ncpa-classic.com/clt/more/416/index.shtml',
+ 'info_dict': {
+ 'id': 'a8606119a4884588a79d81c02abecc16',
+ 'ext': 'mp3',
+ 'title': '来自维也纳的新年贺礼',
+ 'description': 'md5:f13764ae8dd484e84dd4b39d5bcba2a7',
+ 'duration': 1578,
+ 'uploader': 'djy',
+ 'timestamp': 1482942419,
+ 'upload_date': '20161228',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Failed to download m3u8 information'],
+ }, {
+ 'url': 'http://ent.cntv.cn/2016/01/18/ARTIjprSSJH8DryTVr5Bx8Wb160118.shtml',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://tv.cntv.cn/video/C39296/e0210d949f113ddfb38d31f00a4e5c44',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://english.cntv.cn/2016/09/03/VIDEhnkB5y9AgHyIEVphCEz1160903.shtml',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://tv.cctv.com/2016/09/07/VIDE5C1FnlX5bUywlrjhxXOV160907.shtml',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://tv.cntv.cn/video/C39296/95cfac44cabd3ddc4a9438780a4e5c44',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ video_id = self._search_regex(
+ [r'var\s+guid\s*=\s*["\']([\da-fA-F]+)',
+ r'videoCenterId(?:["\']\s*,|:)\s*["\']([\da-fA-F]+)',
+ r'changePlayer\s*\(\s*["\']([\da-fA-F]+)',
+ r'load[Vv]ideo\s*\(\s*["\']([\da-fA-F]+)',
+ r'var\s+initMyAray\s*=\s*["\']([\da-fA-F]+)',
+ r'var\s+ids\s*=\s*\[["\']([\da-fA-F]+)'],
+ webpage, 'video id')
+
+ data = self._download_json(
+ 'http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do', video_id,
+ query={
+ 'pid': video_id,
+ 'url': url,
+ 'idl': 32,
+ 'idlr': 32,
+ 'modifyed': 'false',
+ })
+
+ title = data['title']
+
+ formats = []
+
+ video = data.get('video')
+ if isinstance(video, dict):
+ for quality, chapters_key in enumerate(('lowChapters', 'chapters')):
+ video_url = try_get(
+ video, lambda x: x[chapters_key][0]['url'], compat_str)
+ if video_url:
+ formats.append({
+ 'url': video_url,
+ 'format_id': 'http',
+ 'quality': quality,
+ # Sample clip
+ 'preference': -10
+ })
+
+ hls_url = try_get(data, lambda x: x['hls_url'], compat_str)
+ if hls_url:
+ hls_url = re.sub(r'maxbr=\d+&?', '', hls_url)
+ formats.extend(self._extract_m3u8_formats(
+ hls_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+
+ uploader = data.get('editer_name')
+ description = self._html_search_meta(
+ 'description', webpage, default=None)
+ timestamp = unified_timestamp(data.get('f_pgmtime'))
+ duration = float_or_none(try_get(video, lambda x: x['totalLength']))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'uploader': uploader,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/cda.py b/yt_dlp/extractor/cda.py
new file mode 100644
index 0000000..1157114
--- /dev/null
+++ b/yt_dlp/extractor/cda.py
@@ -0,0 +1,338 @@
+import base64
+import codecs
+import datetime
+import hashlib
+import hmac
+import json
+import random
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_ord, compat_urllib_parse_unquote
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ merge_dicts,
+ multipart_encode,
+ parse_duration,
+ random_birthday,
+ traverse_obj,
+ try_call,
+ try_get,
+ urljoin,
+)
+
+
+class CDAIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:www\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P<id>[0-9a-z]+)'
+ _NETRC_MACHINE = 'cdapl'
+
+ _BASE_URL = 'https://www.cda.pl'
+ _BASE_API_URL = 'https://api.cda.pl'
+ _API_HEADERS = {
+ 'Accept': 'application/vnd.cda.public+json',
+ }
+ # hardcoded in the app
+ _LOGIN_REQUEST_AUTH = 'Basic YzU3YzBlZDUtYTIzOC00MWQwLWI2NjQtNmZmMWMxY2Y2YzVlOklBTm95QlhRRVR6U09MV1hnV3MwMW0xT2VyNWJNZzV4clRNTXhpNGZJUGVGZ0lWUlo5UGVYTDhtUGZaR1U1U3Q'
+ _BEARER_CACHE = 'cda-bearer'
+
+ _TESTS = [{
+ 'url': 'http://www.cda.pl/video/5749950c',
+ 'md5': '6f844bf51b15f31fae165365707ae970',
+ 'info_dict': {
+ 'id': '5749950c',
+ 'ext': 'mp4',
+ 'height': 720,
+ 'title': 'Oto dlaczego przed zakrętem należy zwolnić.',
+ 'description': 'md5:269ccd135d550da90d1662651fcb9772',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'average_rating': float,
+ 'duration': 39,
+ 'age_limit': 0,
+ 'upload_date': '20160221',
+ 'timestamp': 1456078244,
+ }
+ }, {
+ 'url': 'http://www.cda.pl/video/57413289',
+ 'md5': 'a88828770a8310fc00be6c95faf7f4d5',
+ 'info_dict': {
+ 'id': '57413289',
+ 'ext': 'mp4',
+ 'title': 'Lądowanie na lotnisku na Maderze',
+ 'description': 'md5:60d76b71186dcce4e0ba6d4bbdb13e1a',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'crash404',
+ 'view_count': int,
+ 'average_rating': float,
+ 'duration': 137,
+ 'age_limit': 0,
+ }
+ }, {
+ # Age-restricted
+ 'url': 'http://www.cda.pl/video/1273454c4',
+ 'info_dict': {
+ 'id': '1273454c4',
+ 'ext': 'mp4',
+ 'title': 'Bronson (2008) napisy HD 1080p',
+ 'description': 'md5:1b6cb18508daf2dc4e0fa4db77fec24c',
+ 'height': 1080,
+ 'uploader': 'boniek61',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 5554,
+ 'age_limit': 18,
+ 'view_count': int,
+ 'average_rating': float,
+ },
+ }, {
+ 'url': 'http://ebd.cda.pl/0x0/5749950c',
+ 'only_matching': True,
+ }]
+
+ def _download_age_confirm_page(self, url, video_id, *args, **kwargs):
+ form_data = random_birthday('rok', 'miesiac', 'dzien')
+ form_data.update({'return': url, 'module': 'video', 'module_id': video_id})
+ data, content_type = multipart_encode(form_data)
+ return self._download_webpage(
+ urljoin(url, '/a/validatebirth'), video_id, *args,
+ data=data, headers={
+ 'Referer': url,
+ 'Content-Type': content_type,
+ }, **kwargs)
+
+ def _perform_login(self, username, password):
+ app_version = random.choice((
+ '1.2.88 build 15306',
+ '1.2.174 build 18469',
+ ))
+ android_version = random.randrange(8, 14)
+ phone_model = random.choice((
+ # x-kom.pl top selling Android smartphones, as of 2022-12-26
+ # https://www.x-kom.pl/g-4/c/1590-smartfony-i-telefony.html?f201-system-operacyjny=61322-android
+ 'ASUS ZenFone 8',
+ 'Motorola edge 20 5G',
+ 'Motorola edge 30 neo 5G',
+ 'Motorola moto g22',
+ 'OnePlus Nord 2T 5G',
+ 'Samsung Galaxy A32 SM‑A325F',
+ 'Samsung Galaxy M13',
+ 'Samsung Galaxy S20 FE 5G',
+ 'Xiaomi 11T',
+ 'Xiaomi POCO M4 Pro',
+ 'Xiaomi Redmi 10',
+ 'Xiaomi Redmi 10C',
+ 'Xiaomi Redmi 9C NFC',
+ 'Xiaomi Redmi Note 10 Pro',
+ 'Xiaomi Redmi Note 11 Pro',
+ 'Xiaomi Redmi Note 11',
+ 'Xiaomi Redmi Note 11S 5G',
+ 'Xiaomi Redmi Note 11S',
+ 'realme 10',
+ 'realme 9 Pro+',
+ 'vivo Y33s',
+ ))
+ self._API_HEADERS['User-Agent'] = f'pl.cda 1.0 (version {app_version}; Android {android_version}; {phone_model})'
+
+ cached_bearer = self.cache.load(self._BEARER_CACHE, username) or {}
+ if cached_bearer.get('valid_until', 0) > datetime.datetime.now().timestamp() + 5:
+ self._API_HEADERS['Authorization'] = f'Bearer {cached_bearer["token"]}'
+ return
+
+ password_hash = base64.urlsafe_b64encode(hmac.new(
+ b's01m1Oer5IANoyBXQETzSOLWXgWs01m1Oer5bMg5xrTMMxRZ9Pi4fIPeFgIVRZ9PeXL8mPfXQETZGUAN5StRZ9P',
+ ''.join(f'{bytes((bt & 255, )).hex():0>2}'
+ for bt in hashlib.md5(password.encode()).digest()).encode(),
+ hashlib.sha256).digest()).decode().replace('=', '')
+
+ token_res = self._download_json(
+ f'{self._BASE_API_URL}/oauth/token', None, 'Logging in', data=b'',
+ headers={**self._API_HEADERS, 'Authorization': self._LOGIN_REQUEST_AUTH},
+ query={
+ 'grant_type': 'password',
+ 'login': username,
+ 'password': password_hash,
+ })
+ self.cache.store(self._BEARER_CACHE, username, {
+ 'token': token_res['access_token'],
+ 'valid_until': token_res['expires_in'] + datetime.datetime.now().timestamp(),
+ })
+ self._API_HEADERS['Authorization'] = f'Bearer {token_res["access_token"]}'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ if 'Authorization' in self._API_HEADERS:
+ return self._api_extract(video_id)
+ else:
+ return self._web_extract(video_id, url)
+
+ def _api_extract(self, video_id):
+ meta = self._download_json(
+ f'{self._BASE_API_URL}/video/{video_id}', video_id, headers=self._API_HEADERS)['video']
+
+ uploader = traverse_obj(meta, 'author', 'login')
+
+ formats = [{
+ 'url': quality['file'],
+ 'format': quality.get('title'),
+ 'resolution': quality.get('name'),
+ 'height': try_call(lambda: int(quality['name'][:-1])),
+ 'filesize': quality.get('length'),
+ } for quality in meta['qualities'] if quality.get('file')]
+
+ if meta.get('premium') and not meta.get('premium_free') and not formats:
+ raise ExtractorError(
+ 'Video requires CDA Premium - subscription needed', expected=True)
+
+ return {
+ 'id': video_id,
+ 'title': meta.get('title'),
+ 'description': meta.get('description'),
+ 'uploader': None if uploader == 'anonim' else uploader,
+ 'average_rating': float_or_none(meta.get('rating')),
+ 'thumbnail': meta.get('thumb'),
+ 'formats': formats,
+ 'duration': meta.get('duration'),
+ 'age_limit': 18 if meta.get('for_adults') else 0,
+ 'view_count': meta.get('views'),
+ }
+
+ def _web_extract(self, video_id, url):
+ self._set_cookie('cda.pl', 'cda.player', 'html5')
+ webpage = self._download_webpage(
+ f'{self._BASE_URL}/video/{video_id}/vfilm', video_id)
+
+ if 'Ten film jest dostępny dla użytkowników premium' in webpage:
+ self.raise_login_required('This video is only available for premium users')
+
+ if re.search(r'niedostępn[ey] w(?:&nbsp;|\s+)Twoim kraju\s*<', webpage):
+ self.raise_geo_restricted()
+
+ need_confirm_age = False
+ if self._html_search_regex(r'(<form[^>]+action="[^"]*/a/validatebirth[^"]*")',
+ webpage, 'birthday validate form', default=None):
+ webpage = self._download_age_confirm_page(
+ url, video_id, note='Confirming age')
+ need_confirm_age = True
+
+ formats = []
+
+ uploader = self._search_regex(r'''(?x)
+ <(span|meta)[^>]+itemprop=(["\'])author\2[^>]*>
+ (?:<\1[^>]*>[^<]*</\1>|(?!</\1>)(?:.|\n))*?
+ <(span|meta)[^>]+itemprop=(["\'])name\4[^>]*>(?P<uploader>[^<]+)</\3>
+ ''', webpage, 'uploader', default=None, group='uploader')
+ view_count = self._search_regex(
+ r'Odsłony:(?:\s|&nbsp;)*([0-9]+)', webpage,
+ 'view_count', default=None)
+ average_rating = self._search_regex(
+ (r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)',
+ r'<span[^>]+\bclass=["\']rating["\'][^>]*>(?P<rating_value>[0-9.]+)'), webpage, 'rating', fatal=False,
+ group='rating_value')
+
+ info_dict = {
+ 'id': video_id,
+ 'title': self._og_search_title(webpage),
+ 'description': self._og_search_description(webpage),
+ 'uploader': uploader,
+ 'view_count': int_or_none(view_count),
+ 'average_rating': float_or_none(average_rating),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'formats': formats,
+ 'duration': None,
+ 'age_limit': 18 if need_confirm_age else 0,
+ }
+
+ info = self._search_json_ld(webpage, video_id, default={})
+
+ # Source: https://www.cda.pl/js/player.js?t=1606154898
+ def decrypt_file(a):
+ for p in ('_XDDD', '_CDA', '_ADC', '_CXD', '_QWE', '_Q5', '_IKSDE'):
+ a = a.replace(p, '')
+ a = compat_urllib_parse_unquote(a)
+ b = []
+ for c in a:
+ f = compat_ord(c)
+ b.append(chr(33 + (f + 14) % 94) if 33 <= f <= 126 else chr(f))
+ a = ''.join(b)
+ a = a.replace('.cda.mp4', '')
+ for p in ('.2cda.pl', '.3cda.pl'):
+ a = a.replace(p, '.cda.pl')
+ if '/upstream' in a:
+ a = a.replace('/upstream', '.mp4/upstream')
+ return 'https://' + a
+ return 'https://' + a + '.mp4'
+
+ def extract_format(page, version):
+ json_str = self._html_search_regex(
+ r'player_data=(\\?["\'])(?P<player_data>.+?)\1', page,
+ '%s player_json' % version, fatal=False, group='player_data')
+ if not json_str:
+ return
+ player_data = self._parse_json(
+ json_str, '%s player_data' % version, fatal=False)
+ if not player_data:
+ return
+ video = player_data.get('video')
+ if not video or 'file' not in video:
+ self.report_warning('Unable to extract %s version information' % version)
+ return
+ if video['file'].startswith('uggc'):
+ video['file'] = codecs.decode(video['file'], 'rot_13')
+ if video['file'].endswith('adc.mp4'):
+ video['file'] = video['file'].replace('adc.mp4', '.mp4')
+ elif not video['file'].startswith('http'):
+ video['file'] = decrypt_file(video['file'])
+ video_quality = video.get('quality')
+ qualities = video.get('qualities', {})
+ video_quality = next((k for k, v in qualities.items() if v == video_quality), video_quality)
+ info_dict['formats'].append({
+ 'url': video['file'],
+ 'format_id': video_quality,
+ 'height': int_or_none(video_quality[:-1]),
+ })
+ for quality, cda_quality in qualities.items():
+ if quality == video_quality:
+ continue
+ data = {'jsonrpc': '2.0', 'method': 'videoGetLink', 'id': 2,
+ 'params': [video_id, cda_quality, video.get('ts'), video.get('hash2'), {}]}
+ data = json.dumps(data).encode('utf-8')
+ video_url = self._download_json(
+ f'https://www.cda.pl/video/{video_id}', video_id, headers={
+ 'Content-Type': 'application/json',
+ 'X-Requested-With': 'XMLHttpRequest'
+ }, data=data, note=f'Fetching {quality} url',
+ errnote=f'Failed to fetch {quality} url', fatal=False)
+ if try_get(video_url, lambda x: x['result']['status']) == 'ok':
+ video_url = try_get(video_url, lambda x: x['result']['resp'])
+ info_dict['formats'].append({
+ 'url': video_url,
+ 'format_id': quality,
+ 'height': int_or_none(quality[:-1])
+ })
+
+ if not info_dict['duration']:
+ info_dict['duration'] = parse_duration(video.get('duration'))
+
+ extract_format(webpage, 'default')
+
+ for href, resolution in re.findall(
+ r'<a[^>]+data-quality="[^"]+"[^>]+href="([^"]+)"[^>]+class="quality-btn"[^>]*>([0-9]+p)',
+ webpage):
+ if need_confirm_age:
+ handler = self._download_age_confirm_page
+ else:
+ handler = self._download_webpage
+
+ webpage = handler(
+ urljoin(self._BASE_URL, href), video_id,
+ 'Downloading %s version information' % resolution, fatal=False)
+ if not webpage:
+ # Manually report warning because empty page is returned when
+ # invalid version is requested.
+ self.report_warning('Unable to download %s version information' % resolution)
+ continue
+
+ extract_format(webpage, resolution)
+
+ return merge_dicts(info_dict, info)
diff --git a/yt_dlp/extractor/cellebrite.py b/yt_dlp/extractor/cellebrite.py
new file mode 100644
index 0000000..9896a31
--- /dev/null
+++ b/yt_dlp/extractor/cellebrite.py
@@ -0,0 +1,63 @@
+from .common import InfoExtractor
+from ..utils import traverse_obj
+
+
+class CellebriteIE(InfoExtractor):
+ _VALID_URL = r'https?://cellebrite\.com/(?:\w+)?/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://cellebrite.com/en/collect-data-from-android-devices-with-cellebrite-ufed/',
+ 'info_dict': {
+ 'id': '16025876',
+ 'ext': 'mp4',
+ 'description': 'md5:174571cb97083fd1d457d75c684f4e2b',
+ 'thumbnail': 'https://cellebrite.com/wp-content/uploads/2021/05/Chat-Capture-1024x559.png',
+ 'title': 'Ask the Expert: Chat Capture - Collect Data from Android Devices in Cellebrite UFED',
+ 'duration': 455,
+ 'tags': [],
+ }
+ }, {
+ 'url': 'https://cellebrite.com/en/how-to-lawfully-collect-the-maximum-amount-of-data-from-android-devices/',
+ 'info_dict': {
+ 'id': '29018255',
+ 'ext': 'mp4',
+ 'duration': 134,
+ 'tags': [],
+ 'description': 'md5:e9a3d124c7287b0b07bad2547061cacf',
+ 'thumbnail': 'https://cellebrite.com/wp-content/uploads/2022/07/How-to-Lawfully-Collect-the-Maximum-Amount-of-Data-From-Android-Devices.png',
+ 'title': 'Android Extractions Explained',
+ }
+ }]
+
+ def _get_formats_and_subtitles(self, json_data, display_id):
+ formats = [{'url': url} for url in traverse_obj(json_data, ('mp4', ..., 'url')) or []]
+ subtitles = {}
+
+ for url in traverse_obj(json_data, ('hls', ..., 'url')) or []:
+ fmt, sub = self._extract_m3u8_formats_and_subtitles(
+ url, display_id, ext='mp4', headers={'Referer': 'https://play.vidyard.com/'})
+ formats.extend(fmt)
+ self._merge_subtitles(sub, target=subtitles)
+
+ return formats, subtitles
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ player_uuid = self._search_regex(
+ r'<img\s[^>]*\bdata-uuid\s*=\s*"([^"\?]+)', webpage, 'player UUID')
+ json_data = self._download_json(
+ f'https://play.vidyard.com/player/{player_uuid}.json', display_id)['payload']['chapters'][0]
+
+ formats, subtitles = self._get_formats_and_subtitles(json_data['sources'], display_id)
+ return {
+ 'id': str(json_data['videoId']),
+ 'title': json_data.get('name') or self._og_search_title(webpage),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'description': json_data.get('description') or self._og_search_description(webpage),
+ 'duration': json_data.get('seconds'),
+ 'tags': json_data.get('tags'),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'http_headers': {'Referer': 'https://play.vidyard.com/'},
+ }
diff --git a/yt_dlp/extractor/ceskatelevize.py b/yt_dlp/extractor/ceskatelevize.py
new file mode 100644
index 0000000..156b6a3
--- /dev/null
+++ b/yt_dlp/extractor/ceskatelevize.py
@@ -0,0 +1,289 @@
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse_unquote, compat_urllib_parse_urlparse
+from ..networking import Request
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ str_or_none,
+ traverse_obj,
+ urlencode_postdata,
+)
+
+USER_AGENTS = {
+ 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
+}
+
+
+class CeskaTelevizeIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady|zive)/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
+ _TESTS = [{
+ 'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en',
+ 'info_dict': {
+ 'id': '61924494877028507',
+ 'ext': 'mp4',
+ 'title': 'Bonus 01 - En - Hyde Park Civilizace',
+ 'description': 'English Subtittles',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 81.3,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ # live stream
+ 'url': 'http://www.ceskatelevize.cz/zive/ct1/',
+ 'info_dict': {
+ 'id': '102',
+ 'ext': 'mp4',
+ 'title': r'ČT1 - živé vysílání online',
+ 'description': 'Sledujte živé vysílání kanálu ČT1 online. Vybírat si můžete i z dalších kanálů České televize na kterémkoli z vašich zařízení.',
+ 'is_live': True,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ # another
+ 'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/',
+ 'only_matching': True,
+ 'info_dict': {
+ 'id': '402',
+ 'ext': 'mp4',
+ 'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
+ 'is_live': True,
+ },
+ # 'skip': 'Georestricted to Czech Republic',
+ }, {
+ 'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25',
+ 'only_matching': True,
+ }, {
+ # video with 18+ caution trailer
+ 'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/',
+ 'info_dict': {
+ 'id': '215562210900007-bogotart',
+ 'title': 'Bogotart - Queer',
+ 'description': 'Hlavní město Kolumbie v doprovodu queer umělců. Vroucí svět plný vášně, sebevědomí, ale i násilí a bolesti',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '61924494877311053',
+ 'ext': 'mp4',
+ 'title': 'Bogotart - Queer (Varování 18+)',
+ 'duration': 11.9,
+ },
+ }, {
+ 'info_dict': {
+ 'id': '61924494877068022',
+ 'ext': 'mp4',
+ 'title': 'Bogotart - Queer (Queer)',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 1558.3,
+ },
+ }],
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ # iframe embed
+ 'url': 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ webpage, urlh = self._download_webpage_handle(url, playlist_id)
+ parsed_url = compat_urllib_parse_urlparse(urlh.url)
+ site_name = self._og_search_property('site_name', webpage, fatal=False, default='Česká televize')
+ playlist_title = self._og_search_title(webpage, default=None)
+ if site_name and playlist_title:
+ playlist_title = re.split(r'\s*[—|]\s*%s' % (site_name, ), playlist_title, 1)[0]
+ playlist_description = self._og_search_description(webpage, default=None)
+ if playlist_description:
+ playlist_description = playlist_description.replace('\xa0', ' ')
+
+ type_ = 'IDEC'
+ if re.search(r'(^/porady|/zive)/', parsed_url.path):
+ next_data = self._search_nextjs_data(webpage, playlist_id)
+ if '/zive/' in parsed_url.path:
+ idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'liveBroadcast', 'current', 'idec'), get_all=False)
+ else:
+ idec = traverse_obj(next_data, ('props', 'pageProps', 'data', ('show', 'mediaMeta'), 'idec'), get_all=False)
+ if not idec:
+ idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'videobonusDetail', 'bonusId'), get_all=False)
+ if idec:
+ type_ = 'bonus'
+ if not idec:
+ raise ExtractorError('Failed to find IDEC id')
+ iframe_hash = self._download_webpage(
+ 'https://www.ceskatelevize.cz/v-api/iframe-hash/',
+ playlist_id, note='Getting IFRAME hash')
+ query = {'hash': iframe_hash, 'origin': 'iVysilani', 'autoStart': 'true', type_: idec, }
+ webpage = self._download_webpage(
+ 'https://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php',
+ playlist_id, note='Downloading player', query=query)
+
+ NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.'
+ if '%s</p>' % NOT_AVAILABLE_STRING in webpage:
+ self.raise_geo_restricted(NOT_AVAILABLE_STRING)
+ if any(not_found in webpage for not_found in ('Neplatný parametr pro videopřehrávač', 'IDEC nebyl nalezen', )):
+ raise ExtractorError('no video with IDEC available', video_id=idec, expected=True)
+
+ type_ = None
+ episode_id = None
+
+ playlist = self._parse_json(
+ self._search_regex(
+ r'getPlaylistUrl\(\[({.+?})\]', webpage, 'playlist',
+ default='{}'), playlist_id)
+ if playlist:
+ type_ = playlist.get('type')
+ episode_id = playlist.get('id')
+
+ if not type_:
+ type_ = self._html_search_regex(
+ r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],',
+ webpage, 'type')
+ if not episode_id:
+ episode_id = self._html_search_regex(
+ r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],',
+ webpage, 'episode_id')
+
+ data = {
+ 'playlist[0][type]': type_,
+ 'playlist[0][id]': episode_id,
+ 'requestUrl': parsed_url.path,
+ 'requestSource': 'iVysilani',
+ }
+
+ entries = []
+
+ for user_agent in (None, USER_AGENTS['Safari']):
+ req = Request(
+ 'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist/',
+ data=urlencode_postdata(data))
+
+ req.headers['Content-type'] = 'application/x-www-form-urlencoded'
+ req.headers['x-addr'] = '127.0.0.1'
+ req.headers['X-Requested-With'] = 'XMLHttpRequest'
+ if user_agent:
+ req.headers['User-Agent'] = user_agent
+ req.headers['Referer'] = url
+
+ playlistpage = self._download_json(req, playlist_id, fatal=False)
+
+ if not playlistpage:
+ continue
+
+ playlist_url = playlistpage['url']
+ if playlist_url == 'error_region':
+ raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
+
+ req = Request(compat_urllib_parse_unquote(playlist_url))
+ req.headers['Referer'] = url
+
+ playlist = self._download_json(req, playlist_id, fatal=False)
+ if not playlist:
+ continue
+
+ playlist = playlist.get('playlist')
+ if not isinstance(playlist, list):
+ continue
+
+ playlist_len = len(playlist)
+
+ for num, item in enumerate(playlist):
+ is_live = item.get('type') == 'LIVE'
+ formats = []
+ for format_id, stream_url in item.get('streamUrls', {}).items():
+ if 'playerType=flash' in stream_url:
+ stream_formats = self._extract_m3u8_formats(
+ stream_url, playlist_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls-%s' % format_id, fatal=False)
+ else:
+ stream_formats = self._extract_mpd_formats(
+ stream_url, playlist_id,
+ mpd_id='dash-%s' % format_id, fatal=False)
+ if 'drmOnly=true' in stream_url:
+ for f in stream_formats:
+ f['has_drm'] = True
+ # See https://github.com/ytdl-org/youtube-dl/issues/12119#issuecomment-280037031
+ if format_id == 'audioDescription':
+ for f in stream_formats:
+ f['source_preference'] = -10
+ formats.extend(stream_formats)
+
+ if user_agent and len(entries) == playlist_len:
+ entries[num]['formats'].extend(formats)
+ continue
+
+ item_id = str_or_none(item.get('id') or item['assetId'])
+ title = item['title']
+
+ duration = float_or_none(item.get('duration'))
+ thumbnail = item.get('previewImageUrl')
+
+ subtitles = {}
+ if item.get('type') == 'VOD':
+ subs = item.get('subtitles')
+ if subs:
+ subtitles = self.extract_subtitles(episode_id, subs)
+
+ if playlist_len == 1:
+ final_title = playlist_title or title
+ else:
+ final_title = '%s (%s)' % (playlist_title, title)
+
+ entries.append({
+ 'id': item_id,
+ 'title': final_title,
+ 'description': playlist_description if playlist_len == 1 else None,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'is_live': is_live,
+ })
+
+ if len(entries) == 1:
+ return entries[0]
+ return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
+
+ def _get_subtitles(self, episode_id, subs):
+ original_subtitles = self._download_webpage(
+ subs[0]['url'], episode_id, 'Downloading subtitles')
+ srt_subs = self._fix_subtitles(original_subtitles)
+ return {
+ 'cs': [{
+ 'ext': 'srt',
+ 'data': srt_subs,
+ }]
+ }
+
+ @staticmethod
+ def _fix_subtitles(subtitles):
+ """ Convert millisecond-based subtitles to SRT """
+
+ def _msectotimecode(msec):
+ """ Helper utility to convert milliseconds to timecode """
+ components = []
+ for divider in [1000, 60, 60, 100]:
+ components.append(msec % divider)
+ msec //= divider
+ return '{3:02}:{2:02}:{1:02},{0:03}'.format(*components)
+
+ def _fix_subtitle(subtitle):
+ for line in subtitle.splitlines():
+ m = re.match(r'^\s*([0-9]+);\s*([0-9]+)\s+([0-9]+)\s*$', line)
+ if m:
+ yield m.group(1)
+ start, stop = (_msectotimecode(int(t)) for t in m.groups()[1:])
+ yield '{0} --> {1}'.format(start, stop)
+ else:
+ yield line
+
+ return '\r\n'.join(_fix_subtitle(subtitles))
diff --git a/yt_dlp/extractor/cgtn.py b/yt_dlp/extractor/cgtn.py
new file mode 100644
index 0000000..5d9d9bc
--- /dev/null
+++ b/yt_dlp/extractor/cgtn.py
@@ -0,0 +1,65 @@
+from .common import InfoExtractor
+from ..utils import (
+ try_get,
+ unified_timestamp,
+)
+
+
+class CGTNIE(InfoExtractor):
+ _VALID_URL = r'https?://news\.cgtn\.com/news/[0-9]{4}-[0-9]{2}-[0-9]{2}/[a-zA-Z0-9-]+-(?P<id>[a-zA-Z0-9-]+)/index\.html'
+ _TESTS = [
+ {
+ 'url': 'https://news.cgtn.com/news/2021-03-09/Up-and-Out-of-Poverty-Ep-1-A-solemn-promise-YuOUaOzGQU/index.html',
+ 'info_dict': {
+ 'id': 'YuOUaOzGQU',
+ 'ext': 'mp4',
+ 'title': 'Up and Out of Poverty Ep. 1: A solemn promise',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1615295940,
+ 'upload_date': '20210309',
+ 'categories': ['Video'],
+ },
+ 'params': {
+ 'skip_download': True
+ }
+ }, {
+ 'url': 'https://news.cgtn.com/news/2021-06-06/China-Indonesia-vow-to-further-deepen-maritime-cooperation-10REvJCewCY/index.html',
+ 'info_dict': {
+ 'id': '10REvJCewCY',
+ 'ext': 'mp4',
+ 'title': 'China, Indonesia vow to further deepen maritime cooperation',
+ 'thumbnail': r're:^https?://.*\.png$',
+ 'description': 'China and Indonesia vowed to upgrade their cooperation into the maritime sector and also for political security, economy, and cultural and people-to-people exchanges.',
+ 'creators': ['CGTN'],
+ 'categories': ['China'],
+ 'timestamp': 1622950200,
+ 'upload_date': '20210606',
+ },
+ 'params': {
+ 'skip_download': False
+ }
+ }
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ download_url = self._html_search_regex(r'data-video ="(?P<url>.+m3u8)"', webpage, 'download_url')
+ datetime_str = self._html_search_regex(
+ r'<span class="date">\s*(.+?)\s*</span>', webpage, 'datetime_str', fatal=False)
+ category = self._html_search_regex(
+ r'<span class="section">\s*(.+?)\s*</span>', webpage, 'category', fatal=False)
+ author = self._search_regex(
+ r'<div class="news-author-name">\s*(.+?)\s*</div>', webpage, 'author', default=None)
+
+ return {
+ 'id': video_id,
+ 'title': self._og_search_title(webpage),
+ 'description': self._og_search_description(webpage, default=None),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'formats': self._extract_m3u8_formats(download_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls'),
+ 'categories': [category] if category else None,
+ 'creators': [author] if author else None,
+ 'timestamp': try_get(unified_timestamp(datetime_str), lambda x: x - 8 * 3600),
+ }
diff --git a/yt_dlp/extractor/charlierose.py b/yt_dlp/extractor/charlierose.py
new file mode 100644
index 0000000..8fe6797
--- /dev/null
+++ b/yt_dlp/extractor/charlierose.py
@@ -0,0 +1,50 @@
+from .common import InfoExtractor
+from ..utils import remove_end
+
+
+class CharlieRoseIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?charlierose\.com/(?:video|episode)(?:s|/player)/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://charlierose.com/videos/27996',
+ 'md5': 'fda41d49e67d4ce7c2411fd2c4702e09',
+ 'info_dict': {
+ 'id': '27996',
+ 'ext': 'mp4',
+ 'title': 'Remembering Zaha Hadid',
+ 'thumbnail': r're:^https?://.*\.jpg\?\d+',
+ 'description': 'We revisit past conversations with Zaha Hadid, in memory of the world renowned Iraqi architect.',
+ 'subtitles': {
+ 'en': [{
+ 'ext': 'vtt',
+ }],
+ },
+ },
+ }, {
+ 'url': 'https://charlierose.com/videos/27996',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://charlierose.com/episodes/30887?autoplay=true',
+ 'only_matching': True,
+ }]
+
+ _PLAYER_BASE = 'https://charlierose.com/video/player/%s'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(self._PLAYER_BASE % video_id, video_id)
+
+ title = remove_end(self._og_search_title(webpage), ' - Charlie Rose')
+
+ info_dict = self._parse_html5_media_entries(
+ self._PLAYER_BASE % video_id, webpage, video_id,
+ m3u8_entry_protocol='m3u8_native')[0]
+ self._remove_duplicate_formats(info_dict['formats'])
+
+ info_dict.update({
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'description': self._og_search_description(webpage),
+ })
+
+ return info_dict
diff --git a/yt_dlp/extractor/chaturbate.py b/yt_dlp/extractor/chaturbate.py
new file mode 100644
index 0000000..99dfcfd
--- /dev/null
+++ b/yt_dlp/extractor/chaturbate.py
@@ -0,0 +1,106 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ lowercase_escape,
+ url_or_none,
+)
+
+
+class ChaturbateIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.com/(?:fullvideo/?\?.*?\bb=)?(?P<id>[^/?&#]+)'
+ _TESTS = [{
+ 'url': 'https://www.chaturbate.com/siswet19/',
+ 'info_dict': {
+ 'id': 'siswet19',
+ 'ext': 'mp4',
+ 'title': 're:^siswet19 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'age_limit': 18,
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'Room is offline',
+ }, {
+ 'url': 'https://chaturbate.com/fullvideo/?b=caylin',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://en.chaturbate.com/siswet19/',
+ 'only_matching': True,
+ }]
+
+ _ROOM_OFFLINE = 'Room is currently offline'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'https://chaturbate.com/%s/' % video_id, video_id,
+ headers=self.geo_verification_headers())
+
+ found_m3u8_urls = []
+
+ data = self._parse_json(
+ self._search_regex(
+ r'initialRoomDossier\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
+ webpage, 'data', default='{}', group='value'),
+ video_id, transform_source=lowercase_escape, fatal=False)
+ if data:
+ m3u8_url = url_or_none(data.get('hls_source'))
+ if m3u8_url:
+ found_m3u8_urls.append(m3u8_url)
+
+ if not found_m3u8_urls:
+ for m in re.finditer(
+ r'(\\u002[27])(?P<url>http.+?\.m3u8.*?)\1', webpage):
+ found_m3u8_urls.append(lowercase_escape(m.group('url')))
+
+ if not found_m3u8_urls:
+ for m in re.finditer(
+ r'(["\'])(?P<url>http.+?\.m3u8.*?)\1', webpage):
+ found_m3u8_urls.append(m.group('url'))
+
+ m3u8_urls = []
+ for found_m3u8_url in found_m3u8_urls:
+ m3u8_fast_url, m3u8_no_fast_url = found_m3u8_url, found_m3u8_url.replace('_fast', '')
+ for m3u8_url in (m3u8_fast_url, m3u8_no_fast_url):
+ if m3u8_url not in m3u8_urls:
+ m3u8_urls.append(m3u8_url)
+
+ if not m3u8_urls:
+ error = self._search_regex(
+ [r'<span[^>]+class=(["\'])desc_span\1[^>]*>(?P<error>[^<]+)</span>',
+ r'<div[^>]+id=(["\'])defchat\1[^>]*>\s*<p><strong>(?P<error>[^<]+)<'],
+ webpage, 'error', group='error', default=None)
+ if not error:
+ if any(p in webpage for p in (
+ self._ROOM_OFFLINE, 'offline_tipping', 'tip_offline')):
+ error = self._ROOM_OFFLINE
+ if error:
+ raise ExtractorError(error, expected=True)
+ raise ExtractorError('Unable to find stream URL')
+
+ formats = []
+ for m3u8_url in m3u8_urls:
+ for known_id in ('fast', 'slow'):
+ if '_%s' % known_id in m3u8_url:
+ m3u8_id = known_id
+ break
+ else:
+ m3u8_id = None
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, ext='mp4',
+ # ffmpeg skips segments for fast m3u8
+ preference=-10 if m3u8_id == 'fast' else None,
+ m3u8_id=m3u8_id, fatal=False, live=True))
+
+ return {
+ 'id': video_id,
+ 'title': video_id,
+ 'thumbnail': 'https://roomimg.stream.highwebmedia.com/ri/%s.jpg' % video_id,
+ 'age_limit': self._rta_search(webpage),
+ 'is_live': True,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/chilloutzone.py b/yt_dlp/extractor/chilloutzone.py
new file mode 100644
index 0000000..ac4252f
--- /dev/null
+++ b/yt_dlp/extractor/chilloutzone.py
@@ -0,0 +1,123 @@
+import base64
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ int_or_none,
+ traverse_obj,
+)
+
+
+class ChilloutzoneIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?chilloutzone\.net/video/(?P<id>[\w-]+)\.html'
+ _TESTS = [{
+ 'url': 'https://www.chilloutzone.net/video/enemene-meck-alle-katzen-weg.html',
+ 'md5': 'a76f3457e813ea0037e5244f509e66d1',
+ 'info_dict': {
+ 'id': 'enemene-meck-alle-katzen-weg',
+ 'ext': 'mp4',
+ 'title': 'Enemene Meck - Alle Katzen weg',
+ 'description': 'Ist das der Umkehrschluss des Niesenden Panda-Babys?',
+ 'duration': 24,
+ },
+ }, {
+ 'note': 'Video hosted at YouTube',
+ 'url': 'https://www.chilloutzone.net/video/eine-sekunde-bevor.html',
+ 'info_dict': {
+ 'id': '1YVQaAgHyRU',
+ 'ext': 'mp4',
+ 'title': '16 Photos Taken 1 Second Before Disaster',
+ 'description': 'md5:58a8fcf6a459fe0a08f54140f0ad1814',
+ 'uploader': 'BuzzFeedVideo',
+ 'uploader_id': '@BuzzFeedVideo',
+ 'upload_date': '20131105',
+ 'availability': 'public',
+ 'thumbnail': 'https://i.ytimg.com/vi/1YVQaAgHyRU/maxresdefault.jpg',
+ 'tags': 'count:41',
+ 'like_count': int,
+ 'playable_in_embed': True,
+ 'channel_url': 'https://www.youtube.com/channel/UCpko_-a4wgz2u_DgDgd9fqA',
+ 'chapters': 'count:6',
+ 'live_status': 'not_live',
+ 'view_count': int,
+ 'categories': ['Entertainment'],
+ 'age_limit': 0,
+ 'channel_id': 'UCpko_-a4wgz2u_DgDgd9fqA',
+ 'duration': 100,
+ 'uploader_url': 'http://www.youtube.com/@BuzzFeedVideo',
+ 'channel_follower_count': int,
+ 'channel': 'BuzzFeedVideo',
+ },
+ }, {
+ 'url': 'https://www.chilloutzone.net/video/icon-blending.html',
+ 'md5': '2f9d6850ec567b24f0f4fa143b9aa2f9',
+ 'info_dict': {
+ 'id': 'LLNkHpSjBfc',
+ 'ext': 'mp4',
+ 'title': 'The Sunday Times Making of Icons',
+ 'description': 'md5:b9259fcf63a1669e42001e5db677f02a',
+ 'uploader': 'MadFoxUA',
+ 'uploader_id': '@MadFoxUA',
+ 'upload_date': '20140204',
+ 'channel_id': 'UCSZa9Y6-Vl7c11kWMcbAfCw',
+ 'channel_url': 'https://www.youtube.com/channel/UCSZa9Y6-Vl7c11kWMcbAfCw',
+ 'comment_count': int,
+ 'uploader_url': 'http://www.youtube.com/@MadFoxUA',
+ 'duration': 66,
+ 'live_status': 'not_live',
+ 'channel_follower_count': int,
+ 'playable_in_embed': True,
+ 'view_count': int,
+ 'like_count': int,
+ 'thumbnail': 'https://i.ytimg.com/vi/LLNkHpSjBfc/maxresdefault.jpg',
+ 'categories': ['Comedy'],
+ 'availability': 'public',
+ 'tags': [],
+ 'channel': 'MadFoxUA',
+ 'age_limit': 0,
+ },
+ }, {
+ 'url': 'https://www.chilloutzone.net/video/ordentlich-abgeschuettelt.html',
+ 'info_dict': {
+ 'id': 'ordentlich-abgeschuettelt',
+ 'ext': 'mp4',
+ 'title': 'Ordentlich abgeschüttelt',
+ 'description': 'md5:d41541966b75d3d1e8ea77a94ea0d329',
+ 'duration': 18,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ b64_data = self._html_search_regex(
+ r'var cozVidData\s*=\s*"([^"]+)"', webpage, 'video data')
+ info = self._parse_json(base64.b64decode(b64_data).decode(), video_id)
+
+ video_url = info.get('mediaUrl')
+ native_platform = info.get('nativePlatform')
+
+ if native_platform and info.get('sourcePriority') == 'native':
+ native_video_id = info['nativeVideoId']
+ if native_platform == 'youtube':
+ return self.url_result(native_video_id, 'Youtube')
+ elif native_platform == 'vimeo':
+ return self.url_result(f'https://vimeo.com/{native_video_id}', 'Vimeo')
+
+ elif not video_url:
+ # Possibly a standard youtube embed?
+ # TODO: Investigate if site still does this (there are no tests for it)
+ return self.url_result(url, 'Generic')
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'ext': 'mp4',
+ **traverse_obj(info, {
+ 'title': 'title',
+ 'description': ('description', {clean_html}),
+ 'duration': ('videoLength', {int_or_none}),
+ 'width': ('videoWidth', {int_or_none}),
+ 'height': ('videoHeight', {int_or_none}),
+ }),
+ }
diff --git a/yt_dlp/extractor/chzzk.py b/yt_dlp/extractor/chzzk.py
new file mode 100644
index 0000000..420fe05
--- /dev/null
+++ b/yt_dlp/extractor/chzzk.py
@@ -0,0 +1,139 @@
+import functools
+
+from .common import InfoExtractor
+from ..utils import (
+ UserNotLive,
+ float_or_none,
+ int_or_none,
+ parse_iso8601,
+ url_or_none,
+)
+from ..utils.traversal import traverse_obj
+
+
+class CHZZKLiveIE(InfoExtractor):
+ IE_NAME = 'chzzk:live'
+ _VALID_URL = r'https?://chzzk\.naver\.com/live/(?P<id>[\da-f]+)'
+ _TESTS = [{
+ 'url': 'https://chzzk.naver.com/live/c68b8ef525fb3d2fa146344d84991753',
+ 'info_dict': {
+ 'id': 'c68b8ef525fb3d2fa146344d84991753',
+ 'ext': 'mp4',
+ 'title': str,
+ 'channel': '진짜도현',
+ 'channel_id': 'c68b8ef525fb3d2fa146344d84991753',
+ 'channel_is_verified': False,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1705510344,
+ 'upload_date': '20240117',
+ 'live_status': 'is_live',
+ 'view_count': int,
+ 'concurrent_view_count': int,
+ },
+ 'skip': 'The channel is not currently live',
+ }]
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+ live_detail = self._download_json(
+ f'https://api.chzzk.naver.com/service/v2/channels/{channel_id}/live-detail', channel_id,
+ note='Downloading channel info', errnote='Unable to download channel info')['content']
+
+ if live_detail.get('status') == 'CLOSE':
+ raise UserNotLive(video_id=channel_id)
+
+ live_playback = self._parse_json(live_detail['livePlaybackJson'], channel_id)
+
+ thumbnails = []
+ thumbnail_template = traverse_obj(
+ live_playback, ('thumbnail', 'snapshotThumbnailTemplate', {url_or_none}))
+ if thumbnail_template and '{type}' in thumbnail_template:
+ for width in traverse_obj(live_playback, ('thumbnail', 'types', ..., {str})):
+ thumbnails.append({
+ 'id': width,
+ 'url': thumbnail_template.replace('{type}', width),
+ 'width': int_or_none(width),
+ })
+
+ formats, subtitles = [], {}
+ for media in traverse_obj(live_playback, ('media', lambda _, v: url_or_none(v['path']))):
+ is_low_latency = media.get('mediaId') == 'LLHLS'
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ media['path'], channel_id, 'mp4', fatal=False, live=True,
+ m3u8_id='hls-ll' if is_low_latency else 'hls')
+ for f in fmts:
+ if is_low_latency:
+ f['source_preference'] = -2
+ if '-afragalow.stream-audio.stream' in f['format_id']:
+ f['quality'] = -2
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+
+ return {
+ 'id': channel_id,
+ 'is_live': True,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnails': thumbnails,
+ **traverse_obj(live_detail, {
+ 'title': ('liveTitle', {str}),
+ 'timestamp': ('openDate', {functools.partial(parse_iso8601, delimiter=' ')}),
+ 'concurrent_view_count': ('concurrentUserCount', {int_or_none}),
+ 'view_count': ('accumulateCount', {int_or_none}),
+ 'channel': ('channel', 'channelName', {str}),
+ 'channel_id': ('channel', 'channelId', {str}),
+ 'channel_is_verified': ('channel', 'verifiedMark', {bool}),
+ }),
+ }
+
+
+class CHZZKVideoIE(InfoExtractor):
+ IE_NAME = 'chzzk:video'
+ _VALID_URL = r'https?://chzzk\.naver\.com/video/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://chzzk.naver.com/video/1754',
+ 'md5': 'b0c0c1bb888d913b93d702b1512c7f06',
+ 'info_dict': {
+ 'id': '1754',
+ 'ext': 'mp4',
+ 'title': '치지직 테스트 방송',
+ 'channel': '침착맨',
+ 'channel_id': 'bb382c2c0cc9fa7c86ab3b037fb5799c',
+ 'channel_is_verified': False,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 15577,
+ 'timestamp': 1702970505.417,
+ 'upload_date': '20231219',
+ 'view_count': int,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video_meta = self._download_json(
+ f'https://api.chzzk.naver.com/service/v2/videos/{video_id}', video_id,
+ note='Downloading video info', errnote='Unable to download video info')['content']
+ formats, subtitles = self._extract_mpd_formats_and_subtitles(
+ f'https://apis.naver.com/neonplayer/vodplay/v1/playback/{video_meta["videoId"]}', video_id,
+ query={
+ 'key': video_meta['inKey'],
+ 'env': 'real',
+ 'lc': 'en_US',
+ 'cpl': 'en_US',
+ }, note='Downloading video playback', errnote='Unable to download video playback')
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ **traverse_obj(video_meta, {
+ 'title': ('videoTitle', {str}),
+ 'thumbnail': ('thumbnailImageUrl', {url_or_none}),
+ 'timestamp': ('publishDateAt', {functools.partial(float_or_none, scale=1000)}),
+ 'view_count': ('readCount', {int_or_none}),
+ 'duration': ('duration', {int_or_none}),
+ 'channel': ('channel', 'channelName', {str}),
+ 'channel_id': ('channel', 'channelId', {str}),
+ 'channel_is_verified': ('channel', 'verifiedMark', {bool}),
+ }),
+ }
diff --git a/yt_dlp/extractor/cinemax.py b/yt_dlp/extractor/cinemax.py
new file mode 100644
index 0000000..706ec85
--- /dev/null
+++ b/yt_dlp/extractor/cinemax.py
@@ -0,0 +1,25 @@
+from .hbo import HBOBaseIE
+
+
+class CinemaxIE(HBOBaseIE):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:www\.)?cinemax\.com/(?P<path>[^/]+/video/[0-9a-z-]+-(?P<id>\d+))'
+ _TESTS = [{
+ 'url': 'https://www.cinemax.com/warrior/video/s1-ep-1-recap-20126903',
+ 'md5': '82e0734bba8aa7ef526c9dd00cf35a05',
+ 'info_dict': {
+ 'id': '20126903',
+ 'ext': 'mp4',
+ 'title': 'S1 Ep 1: Recap',
+ },
+ 'expected_warnings': ['Unknown MIME type application/mp4 in DASH manifest'],
+ }, {
+ 'url': 'https://www.cinemax.com/warrior/video/s1-ep-1-recap-20126903.embed',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ path, video_id = self._match_valid_url(url).groups()
+ info = self._extract_info('https://www.cinemax.com/%s.xml' % path, video_id)
+ info['id'] = video_id
+ return info
diff --git a/yt_dlp/extractor/cinetecamilano.py b/yt_dlp/extractor/cinetecamilano.py
new file mode 100644
index 0000000..9cffa11
--- /dev/null
+++ b/yt_dlp/extractor/cinetecamilano.py
@@ -0,0 +1,61 @@
+import json
+from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ parse_iso8601,
+ strip_or_none,
+ traverse_obj,
+ try_get,
+ urljoin,
+)
+
+
+class CinetecaMilanoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?cinetecamilano\.it/film/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.cinetecamilano.it/film/1942',
+ 'info_dict': {
+ 'id': '1942',
+ 'ext': 'mp4',
+ 'title': 'Il draghetto Gris\u00f9 (4 episodi)',
+ 'release_date': '20220129',
+ 'thumbnail': r're:.+\.png',
+ 'description': 'md5:5328cbe080b93224712b6f17fcaf2c01',
+ 'modified_date': '20200520',
+ 'duration': 3139,
+ 'release_timestamp': 1643446208,
+ 'modified_timestamp': int
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ try:
+ film_json = self._download_json(
+ f'https://www.cinetecamilano.it/api/catalogo/{video_id}/?',
+ video_id, headers={
+ 'Referer': url,
+ 'Authorization': try_get(self._get_cookies('https://www.cinetecamilano.it'), lambda x: f'Bearer {x["cnt-token"].value}') or ''
+ })
+ except ExtractorError as e:
+ if ((isinstance(e.cause, HTTPError) and e.cause.status == 500)
+ or isinstance(e.cause, json.JSONDecodeError)):
+ self.raise_login_required(method='cookies')
+ raise
+ if not film_json.get('success') or not film_json.get('archive'):
+ raise ExtractorError('Video information not found')
+ archive = film_json['archive']
+
+ return {
+ 'id': video_id,
+ 'title': archive.get('title'),
+ 'description': strip_or_none(archive.get('description')),
+ 'duration': float_or_none(archive.get('duration'), invscale=60),
+ 'release_timestamp': parse_iso8601(archive.get('updated_at'), delimiter=' '),
+ 'modified_timestamp': parse_iso8601(archive.get('created_at'), delimiter=' '),
+ 'thumbnail': urljoin(url, try_get(archive, lambda x: x['thumb']['src'].replace('/public/', '/storage/'))),
+ 'formats': self._extract_m3u8_formats(
+ urljoin(url, traverse_obj(archive, ('drm', 'hls'))), video_id, 'mp4')
+ }
diff --git a/yt_dlp/extractor/cineverse.py b/yt_dlp/extractor/cineverse.py
new file mode 100644
index 0000000..4405297
--- /dev/null
+++ b/yt_dlp/extractor/cineverse.py
@@ -0,0 +1,139 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ filter_dict,
+ int_or_none,
+ parse_age_limit,
+ smuggle_url,
+ traverse_obj,
+ unsmuggle_url,
+ url_or_none,
+)
+
+
+class CineverseBaseIE(InfoExtractor):
+ _VALID_URL_BASE = r'https?://www\.(?P<host>%s)' % '|'.join(map(re.escape, (
+ 'cineverse.com',
+ 'asiancrush.com',
+ 'dovechannel.com',
+ 'screambox.com',
+ 'midnightpulp.com',
+ 'fandor.com',
+ 'retrocrush.tv',
+ )))
+
+
+class CineverseIE(CineverseBaseIE):
+ _VALID_URL = rf'{CineverseBaseIE._VALID_URL_BASE}/watch/(?P<id>[A-Z0-9]+)'
+ _TESTS = [{
+ 'url': 'https://www.asiancrush.com/watch/DMR00018919/Women-Who-Flirt',
+ 'skip': 'geo-blocked',
+ 'info_dict': {
+ 'title': 'Women Who Flirt',
+ 'ext': 'mp4',
+ 'id': 'DMR00018919',
+ 'modified_timestamp': 1678744575289,
+ 'cast': ['Xun Zhou', 'Xiaoming Huang', 'Yi-Lin Sie', 'Sonia Sui', 'Quniciren'],
+ 'duration': 5811.597,
+ 'description': 'md5:892fd62a05611d394141e8394ace0bc6',
+ 'age_limit': 13,
+ }
+ }, {
+ 'url': 'https://www.retrocrush.tv/watch/1000000023016/Archenemy! Crystal Bowie',
+ 'skip': 'geo-blocked',
+ 'info_dict': {
+ 'title': 'Archenemy! Crystal Bowie',
+ 'ext': 'mp4',
+ 'id': '1000000023016',
+ 'episode_number': 3,
+ 'season_number': 1,
+ 'cast': ['Nachi Nozawa', 'Yoshiko Sakakibara', 'Toshiko Fujita'],
+ 'age_limit': 0,
+ 'episode': 'Episode 3',
+ 'season': 'Season 1',
+ 'duration': 1485.067,
+ 'description': 'Cobra meets a beautiful bounty hunter by the name of Jane Royal.',
+ 'series': 'Space Adventure COBRA (Original Japanese)',
+ }
+ }]
+
+ def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, default={})
+ self._initialize_geo_bypass({
+ 'countries': smuggled_data.get('geo_countries'),
+ })
+ video_id = self._match_id(url)
+ html = self._download_webpage(url, video_id)
+ idetails = self._search_nextjs_data(html, video_id)['props']['pageProps']['idetails']
+
+ err_code = idetails.get('err_code')
+ if err_code == 1002:
+ self.raise_login_required()
+ elif err_code == 1200:
+ self.raise_geo_restricted(
+ 'This video is not available from your location due to geo restriction. '
+ 'You may be able to bypass it by using the /details/ page instead of the /watch/ page',
+ countries=smuggled_data.get('geo_countries'))
+
+ return {
+ 'subtitles': filter_dict({
+ 'en': traverse_obj(idetails, (('cc_url_vtt', 'subtitle_url'), {'url': {url_or_none}})) or None,
+ }),
+ 'formats': self._extract_m3u8_formats(idetails['url'], video_id),
+ **traverse_obj(idetails, {
+ 'title': 'title',
+ 'id': ('details', 'item_id'),
+ 'description': ('details', 'description'),
+ 'duration': ('duration', {lambda x: x / 1000}),
+ 'cast': ('details', 'cast', {lambda x: x.split(', ')}),
+ 'modified_timestamp': ('details', 'updated_by', 0, 'update_time', 'time', {int_or_none}),
+ 'season_number': ('details', 'season', {int_or_none}),
+ 'episode_number': ('details', 'episode', {int_or_none}),
+ 'age_limit': ('details', 'rating_code', {parse_age_limit}),
+ 'series': ('details', 'series_details', 'title'),
+ }),
+ }
+
+
+class CineverseDetailsIE(CineverseBaseIE):
+ _VALID_URL = rf'{CineverseBaseIE._VALID_URL_BASE}/details/(?P<id>[A-Z0-9]+)'
+ _TESTS = [{
+ 'url': 'https://www.retrocrush.tv/details/1000000023012/Space-Adventure-COBRA-(Original-Japanese)',
+ 'playlist_mincount': 30,
+ 'info_dict': {
+ 'title': 'Space Adventure COBRA (Original Japanese)',
+ 'id': '1000000023012',
+ }
+ }, {
+ 'url': 'https://www.asiancrush.com/details/NNVG4938/Hansel-and-Gretel',
+ 'info_dict': {
+ 'id': 'NNVG4938',
+ 'ext': 'mp4',
+ 'title': 'Hansel and Gretel',
+ 'description': 'md5:e3e4c35309c2e82aee044f972c2fb05d',
+ 'cast': ['Jeong-myeong Cheon', 'Eun Won-jae', 'Shim Eun-gyeong', 'Ji-hee Jin', 'Hee-soon Park', 'Lydia Park', 'Kyeong-ik Kim'],
+ 'duration': 7030.732,
+ },
+ }]
+
+ def _real_extract(self, url):
+ host, series_id = self._match_valid_url(url).group('host', 'id')
+ html = self._download_webpage(url, series_id)
+ pageprops = self._search_nextjs_data(html, series_id)['props']['pageProps']
+
+ geo_countries = traverse_obj(pageprops, ('itemDetailsData', 'geo_country', {lambda x: x.split(', ')}))
+ geoblocked = traverse_obj(pageprops, (
+ 'itemDetailsData', 'playback_err_msg')) == 'This title is not available in your location.'
+
+ def item_result(item):
+ item_url = f'https://www.{host}/watch/{item["item_id"]}/{item["title"]}'
+ if geoblocked:
+ item_url = smuggle_url(item_url, {'geo_countries': geo_countries})
+ return self.url_result(item_url, CineverseIE)
+
+ season = traverse_obj(pageprops, ('seasonEpisodes', ..., 'episodes', lambda _, v: v['item_id'] and v['title']))
+ if season:
+ return self.playlist_result([item_result(ep) for ep in season], playlist_id=series_id,
+ playlist_title=traverse_obj(pageprops, ('itemDetailsData', 'title')))
+ return item_result(pageprops['itemDetailsData'])
diff --git a/yt_dlp/extractor/ciscolive.py b/yt_dlp/extractor/ciscolive.py
new file mode 100644
index 0000000..0668578
--- /dev/null
+++ b/yt_dlp/extractor/ciscolive.py
@@ -0,0 +1,145 @@
+import itertools
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ float_or_none,
+ int_or_none,
+ parse_qs,
+ try_get,
+ urlencode_postdata,
+)
+
+
+class CiscoLiveBaseIE(InfoExtractor):
+ # These appear to be constant across all Cisco Live presentations
+ # and are not tied to any user session or event
+ RAINFOCUS_API_URL = 'https://events.rainfocus.com/api/%s'
+ RAINFOCUS_API_PROFILE_ID = 'Na3vqYdAlJFSxhYTYQGuMbpafMqftalz'
+ RAINFOCUS_WIDGET_ID = 'n6l4Lo05R8fiy3RpUBm447dZN8uNWoye'
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5647924234001/SyK2FdqjM_default/index.html?videoId=%s'
+
+ HEADERS = {
+ 'Origin': 'https://ciscolive.cisco.com',
+ 'rfApiProfileId': RAINFOCUS_API_PROFILE_ID,
+ 'rfWidgetId': RAINFOCUS_WIDGET_ID,
+ }
+
+ def _call_api(self, ep, rf_id, query, referrer, note=None):
+ headers = self.HEADERS.copy()
+ headers['Referer'] = referrer
+ return self._download_json(
+ self.RAINFOCUS_API_URL % ep, rf_id, note=note,
+ data=urlencode_postdata(query), headers=headers)
+
+ def _parse_rf_item(self, rf_item):
+ event_name = rf_item.get('eventName')
+ title = rf_item['title']
+ description = clean_html(rf_item.get('abstract'))
+ presenter_name = try_get(rf_item, lambda x: x['participants'][0]['fullName'])
+ bc_id = rf_item['videos'][0]['url']
+ bc_url = self.BRIGHTCOVE_URL_TEMPLATE % bc_id
+ duration = float_or_none(try_get(rf_item, lambda x: x['times'][0]['length']))
+ location = try_get(rf_item, lambda x: x['times'][0]['room'])
+
+ if duration:
+ duration = duration * 60
+
+ return {
+ '_type': 'url_transparent',
+ 'url': bc_url,
+ 'ie_key': 'BrightcoveNew',
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'creator': presenter_name,
+ 'location': location,
+ 'series': event_name,
+ }
+
+
+class CiscoLiveSessionIE(CiscoLiveBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?ciscolive(?:\.cisco)?\.com/[^#]*#/session/(?P<id>[^/?&]+)'
+ _TESTS = [{
+ 'url': 'https://ciscolive.cisco.com/on-demand-library/?#/session/1423353499155001FoSs',
+ 'md5': 'c98acf395ed9c9f766941c70f5352e22',
+ 'info_dict': {
+ 'id': '5803694304001',
+ 'ext': 'mp4',
+ 'title': '13 Smart Automations to Monitor Your Cisco IOS Network',
+ 'description': 'md5:ec4a436019e09a918dec17714803f7cc',
+ 'timestamp': 1530305395,
+ 'upload_date': '20180629',
+ 'uploader_id': '5647924234001',
+ 'location': '16B Mezz.',
+ },
+ }, {
+ 'url': 'https://www.ciscolive.com/global/on-demand-library.html?search.event=ciscoliveemea2019#/session/15361595531500013WOU',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.ciscolive.com/global/on-demand-library.html?#/session/1490051371645001kNaS',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ rf_id = self._match_id(url)
+ rf_result = self._call_api('session', rf_id, {'id': rf_id}, url)
+ return self._parse_rf_item(rf_result['items'][0])
+
+
+class CiscoLiveSearchIE(CiscoLiveBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?ciscolive(?:\.cisco)?\.com/(?:global/)?on-demand-library(?:\.html|/)'
+ _TESTS = [{
+ 'url': 'https://ciscolive.cisco.com/on-demand-library/?search.event=ciscoliveus2018&search.technicallevel=scpsSkillLevel_aintroductory&search.focus=scpsSessionFocus_designAndDeployment#/',
+ 'info_dict': {
+ 'title': 'Search query',
+ },
+ 'playlist_count': 5,
+ }, {
+ 'url': 'https://ciscolive.cisco.com/on-demand-library/?search.technology=scpsTechnology_applicationDevelopment&search.technology=scpsTechnology_ipv6&search.focus=scpsSessionFocus_troubleshootingTroubleshooting#/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.ciscolive.com/global/on-demand-library.html?search.technicallevel=scpsSkillLevel_aintroductory&search.event=ciscoliveemea2019&search.technology=scpsTechnology_dataCenter&search.focus=scpsSessionFocus_bestPractices#/',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if CiscoLiveSessionIE.suitable(url) else super(CiscoLiveSearchIE, cls).suitable(url)
+
+ @staticmethod
+ def _check_bc_id_exists(rf_item):
+ return int_or_none(try_get(rf_item, lambda x: x['videos'][0]['url'])) is not None
+
+ def _entries(self, query, url):
+ query['size'] = 50
+ query['from'] = 0
+ for page_num in itertools.count(1):
+ results = self._call_api(
+ 'search', None, query, url,
+ 'Downloading search JSON page %d' % page_num)
+ sl = try_get(results, lambda x: x['sectionList'][0], dict)
+ if sl:
+ results = sl
+ items = results.get('items')
+ if not items or not isinstance(items, list):
+ break
+ for item in items:
+ if not isinstance(item, dict):
+ continue
+ if not self._check_bc_id_exists(item):
+ continue
+ yield self._parse_rf_item(item)
+ size = int_or_none(results.get('size'))
+ if size is not None:
+ query['size'] = size
+ total = int_or_none(results.get('total'))
+ if total is not None and query['from'] + query['size'] > total:
+ break
+ query['from'] += query['size']
+
+ def _real_extract(self, url):
+ query = parse_qs(url)
+ query['type'] = 'session'
+ return self.playlist_result(
+ self._entries(query, url), playlist_title='Search query')
diff --git a/yt_dlp/extractor/ciscowebex.py b/yt_dlp/extractor/ciscowebex.py
new file mode 100644
index 0000000..85585df
--- /dev/null
+++ b/yt_dlp/extractor/ciscowebex.py
@@ -0,0 +1,106 @@
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ try_get,
+ unified_timestamp,
+)
+
+
+class CiscoWebexIE(InfoExtractor):
+ IE_NAME = 'ciscowebex'
+ IE_DESC = 'Cisco Webex'
+ _VALID_URL = r'''(?x)
+ (?P<url>https?://(?P<subdomain>[^/#?]*)\.webex\.com/(?:
+ (?P<siteurl_1>[^/#?]*)/(?:ldr|lsr).php\?(?:[^#]*&)*RCID=(?P<rcid>[0-9a-f]{32})|
+ (?:recordingservice|webappng)/sites/(?P<siteurl_2>[^/#?]*)/recording/(?:playback/|play/)?(?P<id>[0-9a-f]{32})
+ ))'''
+
+ _TESTS = [{
+ 'url': 'https://demosubdomain.webex.com/demositeurl/ldr.php?RCID=e58e803bc0f766bb5f6376d2e86adb5b',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://demosubdomain.webex.com/demositeurl/lsr.php?RCID=bc04b4a7b5ea2cc3a493d5ae6aaff5d7',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://demosubdomain.webex.com/recordingservice/sites/demositeurl/recording/88e7a42f7b19f5b423c54754aecc2ce9/playback',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ rcid = mobj.group('rcid')
+ if rcid:
+ webpage = self._download_webpage(url, None, note='Getting video ID')
+ url = self._search_regex(self._VALID_URL, webpage, 'redirection url', group='url')
+ url = self._request_webpage(url, None, note='Resolving final URL').url
+ mobj = self._match_valid_url(url)
+ subdomain = mobj.group('subdomain')
+ siteurl = mobj.group('siteurl_1') or mobj.group('siteurl_2')
+ video_id = mobj.group('id')
+
+ password = self.get_param('videopassword')
+
+ headers = {'Accept': 'application/json'}
+ if password:
+ headers['accessPwd'] = password
+
+ stream, urlh = self._download_json_handle(
+ 'https://%s.webex.com/webappng/api/v1/recordings/%s/stream' % (subdomain, video_id),
+ video_id, headers=headers, query={'siteurl': siteurl}, expected_status=(403, 429))
+
+ if urlh.status == 403:
+ if stream['code'] == 53004:
+ self.raise_login_required()
+ if stream['code'] == 53005:
+ if password:
+ raise ExtractorError('Wrong password', expected=True)
+ raise ExtractorError(
+ 'This video is protected by a password, use the --video-password option', expected=True)
+ raise ExtractorError(f'{self.IE_NAME} said: {stream["code"]} - {stream["message"]}', expected=True)
+
+ if urlh.status == 429:
+ self.raise_login_required(
+ f'{self.IE_NAME} asks you to solve a CAPTCHA. Solve CAPTCHA in browser and',
+ method='cookies')
+
+ video_id = stream.get('recordUUID') or video_id
+
+ formats = [{
+ 'format_id': 'video',
+ 'url': stream['fallbackPlaySrc'],
+ 'ext': 'mp4',
+ 'vcodec': 'avc1.640028',
+ 'acodec': 'mp4a.40.2',
+ }]
+ if stream.get('preventDownload') is False:
+ mp4url = try_get(stream, lambda x: x['downloadRecordingInfo']['downloadInfo']['mp4URL'])
+ if mp4url:
+ formats.append({
+ 'format_id': 'video',
+ 'url': mp4url,
+ 'ext': 'mp4',
+ 'vcodec': 'avc1.640028',
+ 'acodec': 'mp4a.40.2',
+ })
+ audiourl = try_get(stream, lambda x: x['downloadRecordingInfo']['downloadInfo']['audioURL'])
+ if audiourl:
+ formats.append({
+ 'format_id': 'audio',
+ 'url': audiourl,
+ 'ext': 'mp3',
+ 'vcodec': 'none',
+ 'acodec': 'mp3',
+ })
+
+ return {
+ 'id': video_id,
+ 'title': stream['recordName'],
+ 'description': stream.get('description'),
+ 'uploader': stream.get('ownerDisplayName'),
+ 'uploader_id': stream.get('ownerUserName') or stream.get('ownerId'),
+ 'timestamp': unified_timestamp(stream.get('createTime')),
+ 'duration': int_or_none(stream.get('duration'), 1000),
+ 'webpage_url': 'https://%s.webex.com/recordingservice/sites/%s/recording/playback/%s' % (subdomain, siteurl, video_id),
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/cjsw.py b/yt_dlp/extractor/cjsw.py
new file mode 100644
index 0000000..c37a3b8
--- /dev/null
+++ b/yt_dlp/extractor/cjsw.py
@@ -0,0 +1,67 @@
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ unescapeHTML,
+)
+
+
+class CJSWIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?cjsw\.com/program/(?P<program>[^/]+)/episode/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://cjsw.com/program/freshly-squeezed/episode/20170620',
+ 'md5': 'cee14d40f1e9433632c56e3d14977120',
+ 'info_dict': {
+ 'id': '91d9f016-a2e7-46c5-8dcb-7cbcd7437c41',
+ 'ext': 'mp3',
+ 'title': 'Freshly Squeezed – Episode June 20, 2017',
+ 'description': 'md5:c967d63366c3898a80d0c7b0ff337202',
+ 'series': 'Freshly Squeezed',
+ 'episode_id': '20170620',
+ },
+ }, {
+ # no description
+ 'url': 'http://cjsw.com/program/road-pops/episode/20170707/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ program, episode_id = mobj.group('program', 'id')
+ audio_id = '%s/%s' % (program, episode_id)
+
+ webpage = self._download_webpage(url, episode_id)
+
+ title = unescapeHTML(self._search_regex(
+ (r'<h1[^>]+class=["\']episode-header__title["\'][^>]*>(?P<title>[^<]+)',
+ r'data-audio-title=(["\'])(?P<title>(?:(?!\1).)+)\1'),
+ webpage, 'title', group='title'))
+
+ audio_url = self._search_regex(
+ r'<button[^>]+data-audio-src=(["\'])(?P<url>(?:(?!\1).)+)\1',
+ webpage, 'audio url', group='url')
+
+ audio_id = self._search_regex(
+ r'/([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})\.mp3',
+ audio_url, 'audio id', default=audio_id)
+
+ formats = [{
+ 'url': audio_url,
+ 'ext': determine_ext(audio_url, 'mp3'),
+ 'vcodec': 'none',
+ }]
+
+ description = self._html_search_regex(
+ r'<p>(?P<description>.+?)</p>', webpage, 'description',
+ default=None)
+ series = self._search_regex(
+ r'data-showname=(["\'])(?P<name>(?:(?!\1).)+)\1', webpage,
+ 'series', default=program, group='name')
+
+ return {
+ 'id': audio_id,
+ 'title': title,
+ 'description': description,
+ 'formats': formats,
+ 'series': series,
+ 'episode_id': episode_id,
+ }
diff --git a/yt_dlp/extractor/clipchamp.py b/yt_dlp/extractor/clipchamp.py
new file mode 100644
index 0000000..a8bdf7e
--- /dev/null
+++ b/yt_dlp/extractor/clipchamp.py
@@ -0,0 +1,61 @@
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ traverse_obj,
+ unified_timestamp,
+ url_or_none,
+)
+
+
+class ClipchampIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?clipchamp\.com/watch/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://clipchamp.com/watch/gRXZ4ZhdDaU',
+ 'info_dict': {
+ 'id': 'gRXZ4ZhdDaU',
+ 'ext': 'mp4',
+ 'title': 'Untitled video',
+ 'uploader': 'Alexander Schwartz',
+ 'timestamp': 1680805580,
+ 'upload_date': '20230406',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }]
+
+ _STREAM_URL_TMPL = 'https://%s.cloudflarestream.com/%s/manifest/video.%s'
+ _STREAM_URL_QUERY = {'parentOrigin': 'https://clipchamp.com'}
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['video']
+
+ storage_location = data.get('storage_location')
+ if storage_location != 'cf_stream':
+ raise ExtractorError(f'Unsupported clip storage location "{storage_location}"')
+
+ path = data['download_url']
+ iframe = self._download_webpage(
+ f'https://iframe.cloudflarestream.com/{path}', video_id, 'Downloading player iframe')
+ subdomain = self._search_regex(
+ r'\bcustomer-domain-prefix=["\']([\w-]+)["\']', iframe,
+ 'subdomain', fatal=False) or 'customer-2ut9yn3y6fta1yxe'
+
+ formats = self._extract_mpd_formats(
+ self._STREAM_URL_TMPL % (subdomain, path, 'mpd'), video_id,
+ query=self._STREAM_URL_QUERY, fatal=False, mpd_id='dash')
+ formats.extend(self._extract_m3u8_formats(
+ self._STREAM_URL_TMPL % (subdomain, path, 'm3u8'), video_id, 'mp4',
+ query=self._STREAM_URL_QUERY, fatal=False, m3u8_id='hls'))
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'uploader': ' '.join(traverse_obj(data, ('creator', ('first_name', 'last_name'), {str}))) or None,
+ **traverse_obj(data, {
+ 'title': ('project', 'project_name', {str}),
+ 'timestamp': ('created_at', {unified_timestamp}),
+ 'thumbnail': ('thumbnail_url', {url_or_none}),
+ }),
+ }
diff --git a/yt_dlp/extractor/clippit.py b/yt_dlp/extractor/clippit.py
new file mode 100644
index 0000000..006a713
--- /dev/null
+++ b/yt_dlp/extractor/clippit.py
@@ -0,0 +1,70 @@
+from .common import InfoExtractor
+from ..utils import (
+ parse_iso8601,
+ qualities,
+)
+
+import re
+
+
+class ClippitIE(InfoExtractor):
+
+ _VALID_URL = r'https?://(?:www\.)?clippituser\.tv/c/(?P<id>[a-z]+)'
+ _TEST = {
+ 'url': 'https://www.clippituser.tv/c/evmgm',
+ 'md5': '963ae7a59a2ec4572ab8bf2f2d2c5f09',
+ 'info_dict': {
+ 'id': 'evmgm',
+ 'ext': 'mp4',
+ 'title': 'Bye bye Brutus. #BattleBots - Clippit',
+ 'uploader': 'lizllove',
+ 'uploader_url': 'https://www.clippituser.tv/p/lizllove',
+ 'timestamp': 1472183818,
+ 'upload_date': '20160826',
+ 'description': 'BattleBots | ABC',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_regex(r'<title.*>(.+?)</title>', webpage, 'title')
+
+ FORMATS = ('sd', 'hd')
+ quality = qualities(FORMATS)
+ formats = []
+ for format_id in FORMATS:
+ url = self._html_search_regex(r'data-%s-file="(.+?)"' % format_id,
+ webpage, 'url', fatal=False)
+ if not url:
+ continue
+ match = re.search(r'/(?P<height>\d+)\.mp4', url)
+ formats.append({
+ 'url': url,
+ 'format_id': format_id,
+ 'quality': quality(format_id),
+ 'height': int(match.group('height')) if match else None,
+ })
+
+ uploader = self._html_search_regex(r'class="username".*>\s+(.+?)\n',
+ webpage, 'uploader', fatal=False)
+ uploader_url = ('https://www.clippituser.tv/p/' + uploader
+ if uploader else None)
+
+ timestamp = self._html_search_regex(r'datetime="(.+?)"',
+ webpage, 'date', fatal=False)
+ thumbnail = self._html_search_regex(r'data-image="(.+?)"',
+ webpage, 'thumbnail', fatal=False)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'uploader': uploader,
+ 'uploader_url': uploader_url,
+ 'timestamp': parse_iso8601(timestamp),
+ 'description': self._og_search_description(webpage),
+ 'thumbnail': thumbnail,
+ }
diff --git a/yt_dlp/extractor/cliprs.py b/yt_dlp/extractor/cliprs.py
new file mode 100644
index 0000000..c2add02
--- /dev/null
+++ b/yt_dlp/extractor/cliprs.py
@@ -0,0 +1,31 @@
+from .onet import OnetBaseIE
+
+
+class ClipRsIE(OnetBaseIE):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:www\.)?clip\.rs/(?P<id>[^/]+)/\d+'
+ _TEST = {
+ 'url': 'http://www.clip.rs/premijera-frajle-predstavljaju-novi-spot-za-pesmu-moli-me-moli/3732',
+ 'md5': 'c412d57815ba07b56f9edc7b5d6a14e5',
+ 'info_dict': {
+ 'id': '1488842.1399140381',
+ 'ext': 'mp4',
+ 'title': 'PREMIJERA Frajle predstavljaju novi spot za pesmu Moli me, moli',
+ 'description': 'md5:56ce2c3b4ab31c5a2e0b17cb9a453026',
+ 'duration': 229,
+ 'timestamp': 1459850243,
+ 'upload_date': '20160405',
+ }
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ mvp_id = self._search_mvp_id(webpage)
+
+ info_dict = self._extract_from_id(mvp_id, webpage)
+ info_dict['display_id'] = display_id
+
+ return info_dict
diff --git a/yt_dlp/extractor/closertotruth.py b/yt_dlp/extractor/closertotruth.py
new file mode 100644
index 0000000..1f9a5f6
--- /dev/null
+++ b/yt_dlp/extractor/closertotruth.py
@@ -0,0 +1,89 @@
+import re
+
+from .common import InfoExtractor
+
+
+class CloserToTruthIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:www\.)?closertotruth\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'http://closertotruth.com/series/solutions-the-mind-body-problem#video-3688',
+ 'info_dict': {
+ 'id': '0_zof1ktre',
+ 'display_id': 'solutions-the-mind-body-problem',
+ 'ext': 'mov',
+ 'title': 'Solutions to the Mind-Body Problem?',
+ 'upload_date': '20140221',
+ 'timestamp': 1392956007,
+ 'uploader_id': 'CTTXML'
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://closertotruth.com/episodes/how-do-brains-work',
+ 'info_dict': {
+ 'id': '0_iuxai6g6',
+ 'display_id': 'how-do-brains-work',
+ 'ext': 'mov',
+ 'title': 'How do Brains Work?',
+ 'upload_date': '20140221',
+ 'timestamp': 1392956024,
+ 'uploader_id': 'CTTXML'
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://closertotruth.com/interviews/1725',
+ 'info_dict': {
+ 'id': '1725',
+ 'title': 'AyaFr-002',
+ },
+ 'playlist_mincount': 2,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ partner_id = self._search_regex(
+ r'<script[^>]+src=["\'].*?\b(?:partner_id|p)/(\d+)',
+ webpage, 'kaltura partner_id')
+
+ title = self._html_extract_title(webpage, 'video title')
+
+ select = self._search_regex(
+ r'(?s)<select[^>]+id="select-version"[^>]*>(.+?)</select>',
+ webpage, 'select version', default=None)
+ if select:
+ entry_ids = set()
+ entries = []
+ for mobj in re.finditer(
+ r'<option[^>]+value=(["\'])(?P<id>[0-9a-z_]+)(?:#.+?)?\1[^>]*>(?P<title>[^<]+)',
+ webpage):
+ entry_id = mobj.group('id')
+ if entry_id in entry_ids:
+ continue
+ entry_ids.add(entry_id)
+ entries.append({
+ '_type': 'url_transparent',
+ 'url': 'kaltura:%s:%s' % (partner_id, entry_id),
+ 'ie_key': 'Kaltura',
+ 'title': mobj.group('title'),
+ })
+ if entries:
+ return self.playlist_result(entries, display_id, title)
+
+ entry_id = self._search_regex(
+ r'<a[^>]+id=(["\'])embed-kaltura\1[^>]+data-kaltura=(["\'])(?P<id>[0-9a-z_]+)\2',
+ webpage, 'kaltura entry_id', group='id')
+
+ return {
+ '_type': 'url_transparent',
+ 'display_id': display_id,
+ 'url': 'kaltura:%s:%s' % (partner_id, entry_id),
+ 'ie_key': 'Kaltura',
+ 'title': title
+ }
diff --git a/yt_dlp/extractor/cloudflarestream.py b/yt_dlp/extractor/cloudflarestream.py
new file mode 100644
index 0000000..a812c24
--- /dev/null
+++ b/yt_dlp/extractor/cloudflarestream.py
@@ -0,0 +1,76 @@
+import base64
+
+from .common import InfoExtractor
+
+
+class CloudflareStreamIE(InfoExtractor):
+ _SUBDOMAIN_RE = r'(?:(?:watch|iframe|customer-\w+)\.)?'
+ _DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)'
+ _EMBED_RE = rf'embed\.{_DOMAIN_RE}/embed/[^/]+\.js\?.*?\bvideo='
+ _ID_RE = r'[\da-f]{32}|[\w-]+\.[\w-]+\.[\w-]+'
+ _VALID_URL = rf'https?://(?:{_SUBDOMAIN_RE}{_DOMAIN_RE}/|{_EMBED_RE})(?P<id>{_ID_RE})'
+ _EMBED_REGEX = [
+ rf'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE}).*?)\1',
+ rf'<iframe[^>]+\bsrc=["\'](?P<url>https?://{_SUBDOMAIN_RE}{_DOMAIN_RE}/[\da-f]{{32}})',
+ ]
+ _TESTS = [{
+ 'url': 'https://embed.cloudflarestream.com/embed/we4g.fla9.latest.js?video=31c9291ab41fac05471db4e73aa11717',
+ 'info_dict': {
+ 'id': '31c9291ab41fac05471db4e73aa11717',
+ 'ext': 'mp4',
+ 'title': '31c9291ab41fac05471db4e73aa11717',
+ 'thumbnail': 'https://videodelivery.net/31c9291ab41fac05471db4e73aa11717/thumbnails/thumbnail.jpg',
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ }, {
+ 'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://cloudflarestream.com/31c9291ab41fac05471db4e73aa11717/manifest/video.mpd',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://embed.videodelivery.net/embed/r4xu.fla9.latest.js?video=81d80727f3022488598f68d323c1ad5e',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://customer-aw5py76sw8wyqzmh.cloudflarestream.com/2463f6d3e06fa29710a337f5f5389fd8/iframe',
+ 'only_matching': True,
+ }]
+ _WEBPAGE_TESTS = [{
+ 'url': 'https://upride.cc/incident/shoulder-pass-at-light/',
+ 'info_dict': {
+ 'id': 'eaef9dea5159cf968be84241b5cedfe7',
+ 'ext': 'mp4',
+ 'title': 'eaef9dea5159cf968be84241b5cedfe7',
+ 'thumbnail': 'https://videodelivery.net/eaef9dea5159cf968be84241b5cedfe7/thumbnails/thumbnail.jpg',
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ domain = 'bytehighway.net' if 'bytehighway.net/' in url else 'videodelivery.net'
+ base_url = 'https://%s/%s/' % (domain, video_id)
+ if '.' in video_id:
+ video_id = self._parse_json(base64.urlsafe_b64decode(
+ video_id.split('.')[1] + '==='), video_id)['sub']
+ manifest_base_url = base_url + 'manifest/video.'
+
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ manifest_base_url + 'm3u8', video_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False)
+ fmts, subs = self._extract_mpd_formats_and_subtitles(
+ manifest_base_url + 'mpd', video_id, mpd_id='dash', fatal=False)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+
+ return {
+ 'id': video_id,
+ 'title': video_id,
+ 'thumbnail': base_url + 'thumbnails/thumbnail.jpg',
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
diff --git a/yt_dlp/extractor/cloudycdn.py b/yt_dlp/extractor/cloudycdn.py
new file mode 100644
index 0000000..e6e470e
--- /dev/null
+++ b/yt_dlp/extractor/cloudycdn.py
@@ -0,0 +1,79 @@
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+ url_or_none,
+ urlencode_postdata,
+)
+from ..utils.traversal import traverse_obj
+
+
+class CloudyCDNIE(InfoExtractor):
+ _VALID_URL = r'(?:https?:)?//embed\.cloudycdn\.services/(?P<site_id>[^/?#]+)/media/(?P<id>[\w-]+)'
+ _EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL})']
+ _TESTS = [{
+ 'url': 'https://embed.cloudycdn.services/ltv/media/46k_d23-6000-105?',
+ 'md5': '64f72a360ca530d5ed89c77646c9eee5',
+ 'info_dict': {
+ 'id': '46k_d23-6000-105',
+ 'ext': 'mp4',
+ 'timestamp': 1700589151,
+ 'duration': 1442,
+ 'upload_date': '20231121',
+ 'title': 'D23-6000-105_cetstud',
+ 'thumbnail': 'https://store.cloudycdn.services/tmsp00060/assets/media/660858/placeholder1700589200.jpg',
+ }
+ }, {
+ 'url': 'https://embed.cloudycdn.services/izm/media/26e_lv-8-5-1',
+ 'md5': '798828a479151e2444d8dcfbec76e482',
+ 'info_dict': {
+ 'id': '26e_lv-8-5-1',
+ 'ext': 'mp4',
+ 'title': 'LV-8-5-1',
+ 'timestamp': 1669767167,
+ 'thumbnail': 'https://store.cloudycdn.services/tmsp00120/assets/media/488306/placeholder1679423604.jpg',
+ 'duration': 1205,
+ 'upload_date': '20221130',
+ }
+ }]
+ _WEBPAGE_TESTS = [{
+ 'url': 'https://www.tavaklase.lv/video/es-esmu-mina-um-2/',
+ 'md5': '63074e8e6c84ac2a01f2fb8bf03b8f43',
+ 'info_dict': {
+ 'id': 'cqd_lib-2',
+ 'ext': 'mp4',
+ 'upload_date': '20230223',
+ 'duration': 629,
+ 'thumbnail': 'https://store.cloudycdn.services/tmsp00120/assets/media/518407/placeholder1678748124.jpg',
+ 'timestamp': 1677181513,
+ 'title': 'LIB-2',
+ }
+ }]
+
+ def _real_extract(self, url):
+ site_id, video_id = self._match_valid_url(url).group('site_id', 'id')
+
+ data = self._download_json(
+ f'https://player.cloudycdn.services/player/{site_id}/media/{video_id}/',
+ video_id, data=urlencode_postdata({
+ 'version': '6.4.0',
+ 'referer': url,
+ }))
+
+ formats, subtitles = [], {}
+ for m3u8_url in traverse_obj(data, ('source', 'sources', ..., 'src', {url_or_none})):
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, fatal=False)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ **traverse_obj(data, {
+ 'title': ('name', {str}),
+ 'duration': ('duration', {int_or_none}),
+ 'timestamp': ('upload_date', {parse_iso8601}),
+ 'thumbnail': ('source', 'poster', {url_or_none}),
+ }),
+ }
diff --git a/yt_dlp/extractor/clubic.py b/yt_dlp/extractor/clubic.py
new file mode 100644
index 0000000..716f259
--- /dev/null
+++ b/yt_dlp/extractor/clubic.py
@@ -0,0 +1,53 @@
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ qualities,
+)
+
+
+class ClubicIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:www\.)?clubic\.com/video/(?:[^/]+/)*video.*-(?P<id>[0-9]+)\.html'
+
+ _TESTS = [{
+ 'url': 'http://www.clubic.com/video/clubic-week/video-clubic-week-2-0-le-fbi-se-lance-dans-la-photo-d-identite-448474.html',
+ 'md5': '1592b694ba586036efac1776b0b43cd3',
+ 'info_dict': {
+ 'id': '448474',
+ 'ext': 'mp4',
+ 'title': 'Clubic Week 2.0 : le FBI se lance dans la photo d\u0092identité',
+ 'description': 're:Gueule de bois chez Nokia. Le constructeur a indiqué cette.*',
+ 'thumbnail': r're:^http://img\.clubic\.com/.*\.jpg$',
+ }
+ }, {
+ 'url': 'http://www.clubic.com/video/video-clubic-week-2-0-apple-iphone-6s-et-plus-mais-surtout-le-pencil-469792.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ player_url = 'http://player.m6web.fr/v1/player/clubic/%s.html' % video_id
+ player_page = self._download_webpage(player_url, video_id)
+
+ config = self._parse_json(self._search_regex(
+ r'(?m)M6\.Player\.config\s*=\s*(\{.+?\});$', player_page,
+ 'configuration'), video_id)
+
+ video_info = config['videoInfo']
+ sources = config['sources']
+ quality_order = qualities(['sd', 'hq'])
+
+ formats = [{
+ 'format_id': src['streamQuality'],
+ 'url': src['src'],
+ 'quality': quality_order(src['streamQuality']),
+ } for src in sources]
+
+ return {
+ 'id': video_id,
+ 'title': video_info['title'],
+ 'formats': formats,
+ 'description': clean_html(video_info.get('description')),
+ 'thumbnail': config.get('poster'),
+ }
diff --git a/yt_dlp/extractor/clyp.py b/yt_dlp/extractor/clyp.py
new file mode 100644
index 0000000..273d002
--- /dev/null
+++ b/yt_dlp/extractor/clyp.py
@@ -0,0 +1,99 @@
+from .common import InfoExtractor
+from ..utils import (
+ float_or_none,
+ parse_qs,
+ unified_timestamp,
+)
+
+
+class ClypIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?clyp\.it/(?P<id>[a-z0-9]+)'
+ _TESTS = [{
+ 'url': 'https://clyp.it/iynkjk4b',
+ 'md5': '4bc6371c65210e7b372097fce4d92441',
+ 'info_dict': {
+ 'id': 'iynkjk4b',
+ 'ext': 'ogg',
+ 'title': 'research',
+ 'description': '#Research',
+ 'duration': 51.278,
+ 'timestamp': 1435524981,
+ 'upload_date': '20150628',
+ },
+ }, {
+ 'url': 'https://clyp.it/b04p1odi?token=b0078e077e15835845c528a44417719d',
+ 'info_dict': {
+ 'id': 'b04p1odi',
+ 'ext': 'ogg',
+ 'title': 'GJ! (Reward Edit)',
+ 'description': 'Metal Resistance (THE ONE edition)',
+ 'duration': 177.789,
+ 'timestamp': 1528241278,
+ 'upload_date': '20180605',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://clyp.it/v42214lc',
+ 'md5': '4aca4dfc3236fb6d6ddc4ea08314f33f',
+ 'info_dict': {
+ 'id': 'v42214lc',
+ 'ext': 'wav',
+ 'title': 'i dont wanna go (old version)',
+ 'duration': 113.528,
+ 'timestamp': 1607348505,
+ 'upload_date': '20201207',
+ },
+ }]
+
+ def _real_extract(self, url):
+ audio_id = self._match_id(url)
+
+ qs = parse_qs(url)
+ token = qs.get('token', [None])[0]
+
+ query = {}
+ if token:
+ query['token'] = token
+
+ metadata = self._download_json(
+ 'https://api.clyp.it/%s' % audio_id, audio_id, query=query)
+
+ formats = []
+ for secure in ('', 'Secure'):
+ for ext in ('Ogg', 'Mp3'):
+ format_id = '%s%s' % (secure, ext)
+ format_url = metadata.get('%sUrl' % format_id)
+ if format_url:
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_id,
+ 'vcodec': 'none',
+ 'acodec': ext.lower(),
+ })
+
+ page = self._download_webpage(url, video_id=audio_id)
+ wav_url = self._html_search_regex(
+ r'var\s*wavStreamUrl\s*=\s*["\'](?P<url>https?://[^\'"]+)', page, 'url', default=None)
+ if wav_url:
+ formats.append({
+ 'url': wav_url,
+ 'format_id': 'wavStreamUrl',
+ 'vcodec': 'none',
+ 'acodec': 'wav',
+ })
+
+ title = metadata['Title']
+ description = metadata.get('Description')
+ duration = float_or_none(metadata.get('Duration'))
+ timestamp = unified_timestamp(metadata.get('DateCreated'))
+
+ return {
+ 'id': audio_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/cmt.py b/yt_dlp/extractor/cmt.py
new file mode 100644
index 0000000..6359102
--- /dev/null
+++ b/yt_dlp/extractor/cmt.py
@@ -0,0 +1,55 @@
+from .mtv import MTVIE
+
+# TODO Remove - Reason: Outdated Site
+
+
+class CMTIE(MTVIE): # XXX: Do not subclass from concrete IE
+ _WORKING = False
+ IE_NAME = 'cmt.com'
+ _VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows|(?:full-)?episodes|video-clips)/(?P<id>[^/]+)'
+
+ _TESTS = [{
+ 'url': 'http://www.cmt.com/videos/garth-brooks/989124/the-call-featuring-trisha-yearwood.jhtml#artist=30061',
+ 'md5': 'e6b7ef3c4c45bbfae88061799bbba6c2',
+ 'info_dict': {
+ 'id': '989124',
+ 'ext': 'mp4',
+ 'title': 'Garth Brooks - "The Call (featuring Trisha Yearwood)"',
+ 'description': 'Blame It All On My Roots',
+ },
+ 'skip': 'Video not available',
+ }, {
+ 'url': 'http://www.cmt.com/videos/misc/1504699/still-the-king-ep-109-in-3-minutes.jhtml#id=1739908',
+ 'md5': 'e61a801ca4a183a466c08bd98dccbb1c',
+ 'info_dict': {
+ 'id': '1504699',
+ 'ext': 'mp4',
+ 'title': 'Still The King Ep. 109 in 3 Minutes',
+ 'description': 'Relive or catch up with Still The King by watching this recap of season 1, episode 9.',
+ 'timestamp': 1469421000.0,
+ 'upload_date': '20160725',
+ },
+ }, {
+ 'url': 'http://www.cmt.com/shows/party-down-south/party-down-south-ep-407-gone-girl/1738172/playlist/#id=1738172',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.cmt.com/full-episodes/537qb3/nashville-the-wayfaring-stranger-season-5-ep-501',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.cmt.com/video-clips/t9e4ci/nashville-juliette-in-2-minutes',
+ 'only_matching': True,
+ }]
+
+ def _extract_mgid(self, webpage, url):
+ mgid = self._search_regex(
+ r'MTVN\.VIDEO\.contentUri\s*=\s*([\'"])(?P<mgid>.+?)\1',
+ webpage, 'mgid', group='mgid', default=None)
+ if not mgid:
+ mgid = self._extract_triforce_mgid(webpage)
+ return mgid
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ mgid = self._extract_mgid(webpage, url)
+ return self.url_result('http://media.mtvnservices.com/embed/%s' % mgid)
diff --git a/yt_dlp/extractor/cnbc.py b/yt_dlp/extractor/cnbc.py
new file mode 100644
index 0000000..cedfd3e
--- /dev/null
+++ b/yt_dlp/extractor/cnbc.py
@@ -0,0 +1,97 @@
+from .common import InfoExtractor
+from ..utils import int_or_none, parse_iso8601, str_or_none, url_or_none
+from ..utils.traversal import traverse_obj
+
+
+class CNBCVideoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?cnbc\.com/video/(?:[^/?#]+/)+(?P<id>[^./?#&]+)\.html'
+
+ _TESTS = [{
+ 'url': 'https://www.cnbc.com/video/2023/12/07/mcdonalds-just-unveiled-cosmcsits-new-spinoff-brand.html',
+ 'info_dict': {
+ 'ext': 'mp4',
+ 'id': '107344774',
+ 'display_id': 'mcdonalds-just-unveiled-cosmcsits-new-spinoff-brand',
+ 'modified_timestamp': 1702053483,
+ 'timestamp': 1701977810,
+ 'channel': 'News Videos',
+ 'upload_date': '20231207',
+ 'description': 'md5:882c001d85cb43d7579b514307b3e78b',
+ 'release_timestamp': 1701977375,
+ 'modified_date': '20231208',
+ 'release_date': '20231207',
+ 'duration': 65,
+ 'creators': ['Sean Conlon'],
+ 'title': 'Here\'s a first look at McDonald\'s new spinoff brand, CosMc\'s',
+ 'thumbnail': 'https://image.cnbcfm.com/api/v1/image/107344192-1701894812493-CosMcsskyHero_2336x1040_hero-desktop.jpg?v=1701894855',
+ },
+ 'expected_warnings': ['Unable to download f4m manifest'],
+ }, {
+ 'url': 'https://www.cnbc.com/video/2023/12/08/jim-cramer-shares-his-take-on-seattles-tech-scene.html',
+ 'info_dict': {
+ 'creators': ['Jim Cramer'],
+ 'channel': 'Mad Money with Jim Cramer',
+ 'description': 'md5:72925be21b952e95eba51178dddf4e3e',
+ 'duration': 299.0,
+ 'ext': 'mp4',
+ 'id': '107345451',
+ 'display_id': 'jim-cramer-shares-his-take-on-seattles-tech-scene',
+ 'thumbnail': 'https://image.cnbcfm.com/api/v1/image/107345481-1702079431MM-B-120823.jpg?v=1702079430',
+ 'timestamp': 1702080139,
+ 'title': 'Jim Cramer shares his take on Seattle\'s tech scene',
+ 'release_date': '20231208',
+ 'upload_date': '20231209',
+ 'modified_timestamp': 1702080139,
+ 'modified_date': '20231209',
+ 'release_timestamp': 1702073551,
+ },
+ 'expected_warnings': ['Unable to download f4m manifest'],
+ }, {
+ 'url': 'https://www.cnbc.com/video/2023/12/08/the-epicenter-of-ai-is-in-seattle-says-jim-cramer.html',
+ 'info_dict': {
+ 'creators': ['Jim Cramer'],
+ 'channel': 'Mad Money with Jim Cramer',
+ 'description': 'md5:72925be21b952e95eba51178dddf4e3e',
+ 'duration': 113.0,
+ 'ext': 'mp4',
+ 'id': '107345474',
+ 'display_id': 'the-epicenter-of-ai-is-in-seattle-says-jim-cramer',
+ 'thumbnail': 'https://image.cnbcfm.com/api/v1/image/107345486-Screenshot_2023-12-08_at_70339_PM.png?v=1702080248',
+ 'timestamp': 1702080535,
+ 'title': 'The epicenter of AI is in Seattle, says Jim Cramer',
+ 'release_timestamp': 1702077347,
+ 'modified_timestamp': 1702080535,
+ 'release_date': '20231208',
+ 'upload_date': '20231209',
+ 'modified_date': '20231209',
+ },
+ 'expected_warnings': ['Unable to download f4m manifest'],
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ data = self._search_json(r'window\.__s_data=', webpage, 'video data', display_id)
+
+ player_data = traverse_obj(data, (
+ 'page', 'page', 'layout', ..., 'columns', ..., 'modules',
+ lambda _, v: v['name'] == 'clipPlayer', 'data', {dict}), get_all=False)
+
+ return {
+ 'id': display_id,
+ 'display_id': display_id,
+ 'formats': self._extract_akamai_formats(player_data['playbackURL'], display_id),
+ **self._search_json_ld(webpage, display_id, fatal=False),
+ **traverse_obj(player_data, {
+ 'id': ('id', {str_or_none}),
+ 'title': ('title', {str}),
+ 'description': ('description', {str}),
+ 'creators': ('author', ..., 'name', {str}),
+ 'timestamp': ('datePublished', {parse_iso8601}),
+ 'release_timestamp': ('uploadDate', {parse_iso8601}),
+ 'modified_timestamp': ('dateLastPublished', {parse_iso8601}),
+ 'thumbnail': ('thumbnail', {url_or_none}),
+ 'duration': ('duration', {int_or_none}),
+ 'channel': ('section', 'title', {str}),
+ }),
+ }
diff --git a/yt_dlp/extractor/cnn.py b/yt_dlp/extractor/cnn.py
new file mode 100644
index 0000000..61b62fa
--- /dev/null
+++ b/yt_dlp/extractor/cnn.py
@@ -0,0 +1,198 @@
+from .common import InfoExtractor
+from .turner import TurnerBaseIE
+from ..utils import merge_dicts, try_call, url_basename
+
+
+class CNNIE(TurnerBaseIE):
+ _VALID_URL = r'''(?x)https?://(?:(?P<sub_domain>edition|www|money)\.)?cnn\.com/(?:video/(?:data/.+?|\?)/)?videos?/
+ (?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:[a-z\-]+)|(?=&)))'''
+
+ _TESTS = [{
+ 'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
+ 'md5': '3e6121ea48df7e2259fe73a0628605c4',
+ 'info_dict': {
+ 'id': 'sports/2013/06/09/nadal-1-on-1.cnn',
+ 'ext': 'mp4',
+ 'title': 'Nadal wins 8th French Open title',
+ 'description': 'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.',
+ 'duration': 135,
+ 'upload_date': '20130609',
+ },
+ 'expected_warnings': ['Failed to download m3u8 information'],
+ }, {
+ 'url': 'http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29',
+ 'md5': 'b5cc60c60a3477d185af8f19a2a26f4e',
+ 'info_dict': {
+ 'id': 'us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology',
+ 'ext': 'mp4',
+ 'title': "Student's epic speech stuns new freshmen",
+ 'description': "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\"",
+ 'upload_date': '20130821',
+ },
+ 'expected_warnings': ['Failed to download m3u8 information'],
+ }, {
+ 'url': 'http://www.cnn.com/video/data/2.0/video/living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln.html',
+ 'md5': 'f14d02ebd264df951feb2400e2c25a1b',
+ 'info_dict': {
+ 'id': 'living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln',
+ 'ext': 'mp4',
+ 'title': 'Nashville Ep. 1: Hand crafted skateboards',
+ 'description': 'md5:e7223a503315c9f150acac52e76de086',
+ 'upload_date': '20141222',
+ },
+ 'expected_warnings': ['Failed to download m3u8 information'],
+ }, {
+ 'url': 'http://money.cnn.com/video/news/2016/08/19/netflix-stunning-stats.cnnmoney/index.html',
+ 'md5': '52a515dc1b0f001cd82e4ceda32be9d1',
+ 'info_dict': {
+ 'id': '/video/news/2016/08/19/netflix-stunning-stats.cnnmoney',
+ 'ext': 'mp4',
+ 'title': '5 stunning stats about Netflix',
+ 'description': 'Did you know that Netflix has more than 80 million members? Here are five facts about the online video distributor that you probably didn\'t know.',
+ 'upload_date': '20160819',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://cnn.com/video/?/video/politics/2015/03/27/pkg-arizona-senator-church-attendance-mandatory.ktvk',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://cnn.com/video/?/video/us/2015/04/06/dnt-baker-refuses-anti-gay-order.wkmg',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://edition.cnn.com/videos/arts/2016/04/21/olympic-games-cultural-a-z-brazil.cnn',
+ 'only_matching': True,
+ }]
+
+ _CONFIG = {
+ # http://edition.cnn.com/.element/apps/cvp/3.0/cfg/spider/cnn/expansion/config.xml
+ 'edition': {
+ 'data_src': 'http://edition.cnn.com/video/data/3.0/video/%s/index.xml',
+ 'media_src': 'http://pmd.cdn.turner.com/cnn/big',
+ },
+ # http://money.cnn.com/.element/apps/cvp2/cfg/config.xml
+ 'money': {
+ 'data_src': 'http://money.cnn.com/video/data/4.0/video/%s.xml',
+ 'media_src': 'http://ht3.cdn.turner.com/money/big',
+ },
+ }
+
+ def _extract_timestamp(self, video_data):
+ # TODO: fix timestamp extraction
+ return None
+
+ def _real_extract(self, url):
+ sub_domain, path, page_title = self._match_valid_url(url).groups()
+ if sub_domain not in ('money', 'edition'):
+ sub_domain = 'edition'
+ config = self._CONFIG[sub_domain]
+ return self._extract_cvp_info(
+ config['data_src'] % path, page_title, {
+ 'default': {
+ 'media_src': config['media_src'],
+ },
+ 'f4m': {
+ 'host': 'cnn-vh.akamaihd.net',
+ },
+ })
+
+
+class CNNBlogsIE(InfoExtractor):
+ _VALID_URL = r'https?://[^\.]+\.blogs\.cnn\.com/.+'
+ _TEST = {
+ 'url': 'http://reliablesources.blogs.cnn.com/2014/02/09/criminalizing-journalism/',
+ 'md5': '3e56f97b0b6ffb4b79f4ea0749551084',
+ 'info_dict': {
+ 'id': 'bestoftv/2014/02/09/criminalizing-journalism.cnn',
+ 'ext': 'mp4',
+ 'title': 'Criminalizing journalism?',
+ 'description': 'Glenn Greenwald responds to comments made this week on Capitol Hill that journalists could be criminal accessories.',
+ 'upload_date': '20140209',
+ },
+ 'expected_warnings': ['Failed to download m3u8 information'],
+ 'add_ie': ['CNN'],
+ }
+
+ def _real_extract(self, url):
+ webpage = self._download_webpage(url, url_basename(url))
+ cnn_url = self._html_search_regex(r'data-url="(.+?)"', webpage, 'cnn url')
+ return self.url_result(cnn_url, CNNIE.ie_key())
+
+
+class CNNArticleIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:edition|www)\.)?cnn\.com/(?!videos?/)'
+ _TEST = {
+ 'url': 'http://www.cnn.com/2014/12/21/politics/obama-north-koreas-hack-not-war-but-cyber-vandalism/',
+ 'md5': '689034c2a3d9c6dc4aa72d65a81efd01',
+ 'info_dict': {
+ 'id': 'bestoftv/2014/12/21/ip-north-korea-obama.cnn',
+ 'ext': 'mp4',
+ 'title': 'Obama: Cyberattack not an act of war',
+ 'description': 'md5:0a802a40d2376f60e6b04c8d5bcebc4b',
+ 'upload_date': '20141221',
+ },
+ 'expected_warnings': ['Failed to download m3u8 information'],
+ 'add_ie': ['CNN'],
+ }
+
+ def _real_extract(self, url):
+ webpage = self._download_webpage(url, url_basename(url))
+ cnn_url = self._html_search_regex(r"video:\s*'([^']+)'", webpage, 'cnn url')
+ return self.url_result('http://cnn.com/video/?/video/' + cnn_url, CNNIE.ie_key())
+
+
+class CNNIndonesiaIE(InfoExtractor):
+ _VALID_URL = r'https?://www\.cnnindonesia\.com/[\w-]+/(?P<upload_date>\d{8})\d+-\d+-(?P<id>\d+)/(?P<display_id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://www.cnnindonesia.com/ekonomi/20220909212635-89-845885/alasan-harga-bbm-di-indonesia-masih-disubsidi',
+ 'info_dict': {
+ 'id': '845885',
+ 'ext': 'mp4',
+ 'description': 'md5:e7954bfa6f1749bc9ef0c079a719c347',
+ 'upload_date': '20220909',
+ 'title': 'Alasan Harga BBM di Indonesia Masih Disubsidi',
+ 'timestamp': 1662859088,
+ 'duration': 120.0,
+ 'thumbnail': r're:https://akcdn\.detik\.net\.id/visual/2022/09/09/thumbnail-ekopedia-alasan-harga-bbm-disubsidi_169\.jpeg',
+ 'tags': ['ekopedia', 'subsidi bbm', 'subsidi', 'bbm', 'bbm subsidi', 'harga pertalite naik'],
+ 'age_limit': 0,
+ 'release_timestamp': 1662859088,
+ 'release_date': '20220911',
+ 'uploader': 'Asfahan Yahsyi',
+ }
+ }, {
+ 'url': 'https://www.cnnindonesia.com/internasional/20220911104341-139-846189/video-momen-charles-disambut-meriah-usai-dilantik-jadi-raja-inggris',
+ 'info_dict': {
+ 'id': '846189',
+ 'ext': 'mp4',
+ 'upload_date': '20220911',
+ 'duration': 76.0,
+ 'timestamp': 1662869995,
+ 'description': 'md5:ece7b003b3ee7d81c6a5cfede7d5397d',
+ 'thumbnail': r're:https://akcdn\.detik\.net\.id/visual/2022/09/11/thumbnail-video-1_169\.jpeg',
+ 'title': 'VIDEO: Momen Charles Disambut Meriah usai Dilantik jadi Raja Inggris',
+ 'tags': ['raja charles', 'raja charles iii', 'ratu elizabeth', 'ratu elizabeth meninggal dunia', 'raja inggris', 'inggris'],
+ 'age_limit': 0,
+ 'release_date': '20220911',
+ 'uploader': 'REUTERS',
+ 'release_timestamp': 1662869995,
+ }
+ }]
+
+ def _real_extract(self, url):
+ upload_date, video_id, display_id = self._match_valid_url(url).group('upload_date', 'id', 'display_id')
+ webpage = self._download_webpage(url, display_id)
+
+ json_ld_list = list(self._yield_json_ld(webpage, display_id))
+ json_ld_data = self._json_ld(json_ld_list, display_id)
+ embed_url = next(
+ json_ld.get('embedUrl') for json_ld in json_ld_list if json_ld.get('@type') == 'VideoObject')
+
+ return merge_dicts(json_ld_data, {
+ '_type': 'url_transparent',
+ 'url': embed_url,
+ 'upload_date': upload_date,
+ 'tags': try_call(lambda: self._html_search_meta('keywords', webpage).split(', '))
+ })
diff --git a/yt_dlp/extractor/comedycentral.py b/yt_dlp/extractor/comedycentral.py
new file mode 100644
index 0000000..27d295b
--- /dev/null
+++ b/yt_dlp/extractor/comedycentral.py
@@ -0,0 +1,55 @@
+from .mtv import MTVServicesInfoExtractor
+
+
+class ComedyCentralIE(MTVServicesInfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?cc\.com/(?:episodes|video(?:-clips)?|collection-playlist|movies)/(?P<id>[0-9a-z]{6})'
+ _FEED_URL = 'http://comedycentral.com/feeds/mrss/'
+
+ _TESTS = [{
+ 'url': 'http://www.cc.com/video-clips/5ke9v2/the-daily-show-with-trevor-noah-doc-rivers-and-steve-ballmer---the-nba-player-strike',
+ 'md5': 'b8acb347177c680ff18a292aa2166f80',
+ 'info_dict': {
+ 'id': '89ccc86e-1b02-4f83-b0c9-1d9592ecd025',
+ 'ext': 'mp4',
+ 'title': 'The Daily Show with Trevor Noah|August 28, 2020|25|25149|Doc Rivers and Steve Ballmer - The NBA Player Strike',
+ 'description': 'md5:5334307c433892b85f4f5e5ac9ef7498',
+ 'timestamp': 1598670000,
+ 'upload_date': '20200829',
+ },
+ }, {
+ 'url': 'http://www.cc.com/episodes/pnzzci/drawn-together--american-idol--parody-clip-show-season-3-ep-314',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.cc.com/video/k3sdvm/the-daily-show-with-jon-stewart-exclusive-the-fourth-estate',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.cc.com/collection-playlist/cosnej/stand-up-specials/t6vtjb',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.cc.com/movies/tkp406/a-cluesterfuenke-christmas',
+ 'only_matching': True,
+ }]
+
+
+class ComedyCentralTVIE(MTVServicesInfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?comedycentral\.tv/folgen/(?P<id>[0-9a-z]{6})'
+ _TESTS = [{
+ 'url': 'https://www.comedycentral.tv/folgen/pxdpec/josh-investigates-klimawandel-staffel-1-ep-1',
+ 'info_dict': {
+ 'id': '15907dc3-ec3c-11e8-a442-0e40cf2fc285',
+ 'ext': 'mp4',
+ 'title': 'Josh Investigates',
+ 'description': 'Steht uns das Ende der Welt bevor?',
+ },
+ }]
+ _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
+ _GEO_COUNTRIES = ['DE']
+
+ def _get_feed_query(self, uri):
+ return {
+ 'accountOverride': 'intl.mtvi.com',
+ 'arcEp': 'web.cc.tv',
+ 'ep': 'b9032c3a',
+ 'imageEp': 'web.cc.tv',
+ 'mgid': uri,
+ }
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
new file mode 100644
index 0000000..e776cca
--- /dev/null
+++ b/yt_dlp/extractor/common.py
@@ -0,0 +1,3943 @@
+import base64
+import collections
+import getpass
+import hashlib
+import http.client
+import http.cookiejar
+import http.cookies
+import inspect
+import itertools
+import json
+import math
+import netrc
+import os
+import random
+import re
+import subprocess
+import sys
+import time
+import types
+import urllib.parse
+import urllib.request
+import xml.etree.ElementTree
+
+from ..compat import functools # isort: split
+from ..compat import (
+ compat_etree_fromstring,
+ compat_expanduser,
+ compat_os_name,
+ urllib_req_to_req,
+)
+from ..cookies import LenientSimpleCookie
+from ..downloader.f4m import get_base_url, remove_encrypted_media
+from ..downloader.hls import HlsFD
+from ..networking import HEADRequest, Request
+from ..networking.exceptions import (
+ HTTPError,
+ IncompleteRead,
+ network_exceptions,
+)
+from ..utils import (
+ IDENTITY,
+ JSON_LD_RE,
+ NO_DEFAULT,
+ ExtractorError,
+ FormatSorter,
+ GeoRestrictedError,
+ GeoUtils,
+ LenientJSONDecoder,
+ Popen,
+ RegexNotFoundError,
+ RetryManager,
+ UnsupportedError,
+ age_restricted,
+ base_url,
+ bug_reports_message,
+ classproperty,
+ clean_html,
+ deprecation_warning,
+ determine_ext,
+ dict_get,
+ encode_data_uri,
+ error_to_compat_str,
+ extract_attributes,
+ filter_dict,
+ fix_xml_ampersands,
+ float_or_none,
+ format_field,
+ int_or_none,
+ join_nonempty,
+ js_to_json,
+ mimetype2ext,
+ netrc_from_content,
+ orderedSet,
+ parse_bitrate,
+ parse_codecs,
+ parse_duration,
+ parse_iso8601,
+ parse_m3u8_attributes,
+ parse_resolution,
+ sanitize_filename,
+ sanitize_url,
+ smuggle_url,
+ str_or_none,
+ str_to_int,
+ strip_or_none,
+ traverse_obj,
+ truncate_string,
+ try_call,
+ try_get,
+ unescapeHTML,
+ unified_strdate,
+ unified_timestamp,
+ url_basename,
+ url_or_none,
+ urlhandle_detect_ext,
+ urljoin,
+ variadic,
+ xpath_element,
+ xpath_text,
+ xpath_with_ns,
+)
+
+
+class InfoExtractor:
+ """Information Extractor class.
+
+ Information extractors are the classes that, given a URL, extract
+ information about the video (or videos) the URL refers to. This
+ information includes the real video URL, the video title, author and
+ others. The information is stored in a dictionary which is then
+ passed to the YoutubeDL. The YoutubeDL processes this
+ information possibly downloading the video to the file system, among
+ other possible outcomes.
+
+ The type field determines the type of the result.
+ By far the most common value (and the default if _type is missing) is
+ "video", which indicates a single video.
+
+ For a video, the dictionaries must include the following fields:
+
+ id: Video identifier.
+ title: Video title, unescaped. Set to an empty string if video has
+ no title as opposed to "None" which signifies that the
+ extractor failed to obtain a title
+
+ Additionally, it must contain either a formats entry or a url one:
+
+ formats: A list of dictionaries for each format available, ordered
+ from worst to best quality.
+
+ Potential fields:
+ * url The mandatory URL representing the media:
+ for plain file media - HTTP URL of this file,
+ for RTMP - RTMP URL,
+ for HLS - URL of the M3U8 media playlist,
+ for HDS - URL of the F4M manifest,
+ for DASH
+ - HTTP URL to plain file media (in case of
+ unfragmented media)
+ - URL of the MPD manifest or base URL
+ representing the media if MPD manifest
+ is parsed from a string (in case of
+ fragmented media)
+ for MSS - URL of the ISM manifest.
+ * request_data Data to send in POST request to the URL
+ * manifest_url
+ The URL of the manifest file in case of
+ fragmented media:
+ for HLS - URL of the M3U8 master playlist,
+ for HDS - URL of the F4M manifest,
+ for DASH - URL of the MPD manifest,
+ for MSS - URL of the ISM manifest.
+ * manifest_stream_number (For internal use only)
+ The index of the stream in the manifest file
+ * ext Will be calculated from URL if missing
+ * format A human-readable description of the format
+ ("mp4 container with h264/opus").
+ Calculated from the format_id, width, height.
+ and format_note fields if missing.
+ * format_id A short description of the format
+ ("mp4_h264_opus" or "19").
+ Technically optional, but strongly recommended.
+ * format_note Additional info about the format
+ ("3D" or "DASH video")
+ * width Width of the video, if known
+ * height Height of the video, if known
+ * aspect_ratio Aspect ratio of the video, if known
+ Automatically calculated from width and height
+ * resolution Textual description of width and height
+ Automatically calculated from width and height
+ * dynamic_range The dynamic range of the video. One of:
+ "SDR" (None), "HDR10", "HDR10+, "HDR12", "HLG, "DV"
+ * tbr Average bitrate of audio and video in KBit/s
+ * abr Average audio bitrate in KBit/s
+ * acodec Name of the audio codec in use
+ * asr Audio sampling rate in Hertz
+ * audio_channels Number of audio channels
+ * vbr Average video bitrate in KBit/s
+ * fps Frame rate
+ * vcodec Name of the video codec in use
+ * container Name of the container format
+ * filesize The number of bytes, if known in advance
+ * filesize_approx An estimate for the number of bytes
+ * player_url SWF Player URL (used for rtmpdump).
+ * protocol The protocol that will be used for the actual
+ download, lower-case. One of "http", "https" or
+ one of the protocols defined in downloader.PROTOCOL_MAP
+ * fragment_base_url
+ Base URL for fragments. Each fragment's path
+ value (if present) will be relative to
+ this URL.
+ * fragments A list of fragments of a fragmented media.
+ Each fragment entry must contain either an url
+ or a path. If an url is present it should be
+ considered by a client. Otherwise both path and
+ fragment_base_url must be present. Here is
+ the list of all potential fields:
+ * "url" - fragment's URL
+ * "path" - fragment's path relative to
+ fragment_base_url
+ * "duration" (optional, int or float)
+ * "filesize" (optional, int)
+ * is_from_start Is a live format that can be downloaded
+ from the start. Boolean
+ * preference Order number of this format. If this field is
+ present and not None, the formats get sorted
+ by this field, regardless of all other values.
+ -1 for default (order by other properties),
+ -2 or smaller for less than default.
+ < -1000 to hide the format (if there is
+ another one which is strictly better)
+ * language Language code, e.g. "de" or "en-US".
+ * language_preference Is this in the language mentioned in
+ the URL?
+ 10 if it's what the URL is about,
+ -1 for default (don't know),
+ -10 otherwise, other values reserved for now.
+ * quality Order number of the video quality of this
+ format, irrespective of the file format.
+ -1 for default (order by other properties),
+ -2 or smaller for less than default.
+ * source_preference Order number for this video source
+ (quality takes higher priority)
+ -1 for default (order by other properties),
+ -2 or smaller for less than default.
+ * http_headers A dictionary of additional HTTP headers
+ to add to the request.
+ * stretched_ratio If given and not 1, indicates that the
+ video's pixels are not square.
+ width : height ratio as float.
+ * no_resume The server does not support resuming the
+ (HTTP or RTMP) download. Boolean.
+ * has_drm True if the format has DRM and cannot be downloaded.
+ 'maybe' if the format may have DRM and has to be tested before download.
+ * extra_param_to_segment_url A query string to append to each
+ fragment's URL, or to update each existing query string
+ with. Only applied by the native HLS/DASH downloaders.
+ * hls_aes A dictionary of HLS AES-128 decryption information
+ used by the native HLS downloader to override the
+ values in the media playlist when an '#EXT-X-KEY' tag
+ is present in the playlist:
+ * uri The URI from which the key will be downloaded
+ * key The key (as hex) used to decrypt fragments.
+ If `key` is given, any key URI will be ignored
+ * iv The IV (as hex) used to decrypt fragments
+ * downloader_options A dictionary of downloader options
+ (For internal use only)
+ * http_chunk_size Chunk size for HTTP downloads
+ * ffmpeg_args Extra arguments for ffmpeg downloader
+ * is_dash_periods Whether the format is a result of merging
+ multiple DASH periods.
+ RTMP formats can also have the additional fields: page_url,
+ app, play_path, tc_url, flash_version, rtmp_live, rtmp_conn,
+ rtmp_protocol, rtmp_real_time
+
+ url: Final video URL.
+ ext: Video filename extension.
+ format: The video format, defaults to ext (used for --get-format)
+ player_url: SWF Player URL (used for rtmpdump).
+
+ The following fields are optional:
+
+ direct: True if a direct video file was given (must only be set by GenericIE)
+ alt_title: A secondary title of the video.
+ display_id: An alternative identifier for the video, not necessarily
+ unique, but available before title. Typically, id is
+ something like "4234987", title "Dancing naked mole rats",
+ and display_id "dancing-naked-mole-rats"
+ thumbnails: A list of dictionaries, with the following entries:
+ * "id" (optional, string) - Thumbnail format ID
+ * "url"
+ * "preference" (optional, int) - quality of the image
+ * "width" (optional, int)
+ * "height" (optional, int)
+ * "resolution" (optional, string "{width}x{height}",
+ deprecated)
+ * "filesize" (optional, int)
+ * "http_headers" (dict) - HTTP headers for the request
+ thumbnail: Full URL to a video thumbnail image.
+ description: Full video description.
+ uploader: Full name of the video uploader.
+ license: License name the video is licensed under.
+ creators: List of creators of the video.
+ timestamp: UNIX timestamp of the moment the video was uploaded
+ upload_date: Video upload date in UTC (YYYYMMDD).
+ If not explicitly set, calculated from timestamp
+ release_timestamp: UNIX timestamp of the moment the video was released.
+ If it is not clear whether to use timestamp or this, use the former
+ release_date: The date (YYYYMMDD) when the video was released in UTC.
+ If not explicitly set, calculated from release_timestamp
+ release_year: Year (YYYY) as integer when the video or album was released.
+ To be used if no exact release date is known.
+ If not explicitly set, calculated from release_date.
+ modified_timestamp: UNIX timestamp of the moment the video was last modified.
+ modified_date: The date (YYYYMMDD) when the video was last modified in UTC.
+ If not explicitly set, calculated from modified_timestamp
+ uploader_id: Nickname or id of the video uploader.
+ uploader_url: Full URL to a personal webpage of the video uploader.
+ channel: Full name of the channel the video is uploaded on.
+ Note that channel fields may or may not repeat uploader
+ fields. This depends on a particular extractor.
+ channel_id: Id of the channel.
+ channel_url: Full URL to a channel webpage.
+ channel_follower_count: Number of followers of the channel.
+ channel_is_verified: Whether the channel is verified on the platform.
+ location: Physical location where the video was filmed.
+ subtitles: The available subtitles as a dictionary in the format
+ {tag: subformats}. "tag" is usually a language code, and
+ "subformats" is a list sorted from lower to higher
+ preference, each element is a dictionary with the "ext"
+ entry and one of:
+ * "data": The subtitles file contents
+ * "url": A URL pointing to the subtitles file
+ It can optionally also have:
+ * "name": Name or description of the subtitles
+ * "http_headers": A dictionary of additional HTTP headers
+ to add to the request.
+ "ext" will be calculated from URL if missing
+ automatic_captions: Like 'subtitles'; contains automatically generated
+ captions instead of normal subtitles
+ duration: Length of the video in seconds, as an integer or float.
+ view_count: How many users have watched the video on the platform.
+ concurrent_view_count: How many users are currently watching the video on the platform.
+ like_count: Number of positive ratings of the video
+ dislike_count: Number of negative ratings of the video
+ repost_count: Number of reposts of the video
+ average_rating: Average rating give by users, the scale used depends on the webpage
+ comment_count: Number of comments on the video
+ comments: A list of comments, each with one or more of the following
+ properties (all but one of text or html optional):
+ * "author" - human-readable name of the comment author
+ * "author_id" - user ID of the comment author
+ * "author_thumbnail" - The thumbnail of the comment author
+ * "author_url" - The url to the comment author's page
+ * "author_is_verified" - Whether the author is verified
+ on the platform
+ * "author_is_uploader" - Whether the comment is made by
+ the video uploader
+ * "id" - Comment ID
+ * "html" - Comment as HTML
+ * "text" - Plain text of the comment
+ * "timestamp" - UNIX timestamp of comment
+ * "parent" - ID of the comment this one is replying to.
+ Set to "root" to indicate that this is a
+ comment to the original video.
+ * "like_count" - Number of positive ratings of the comment
+ * "dislike_count" - Number of negative ratings of the comment
+ * "is_favorited" - Whether the comment is marked as
+ favorite by the video uploader
+ * "is_pinned" - Whether the comment is pinned to
+ the top of the comments
+ age_limit: Age restriction for the video, as an integer (years)
+ webpage_url: The URL to the video webpage, if given to yt-dlp it
+ should allow to get the same result again. (It will be set
+ by YoutubeDL if it's missing)
+ categories: A list of categories that the video falls in, for example
+ ["Sports", "Berlin"]
+ tags: A list of tags assigned to the video, e.g. ["sweden", "pop music"]
+ cast: A list of the video cast
+ is_live: True, False, or None (=unknown). Whether this video is a
+ live stream that goes on instead of a fixed-length video.
+ was_live: True, False, or None (=unknown). Whether this video was
+ originally a live stream.
+ live_status: None (=unknown), 'is_live', 'is_upcoming', 'was_live', 'not_live',
+ or 'post_live' (was live, but VOD is not yet processed)
+ If absent, automatically set from is_live, was_live
+ start_time: Time in seconds where the reproduction should start, as
+ specified in the URL.
+ end_time: Time in seconds where the reproduction should end, as
+ specified in the URL.
+ chapters: A list of dictionaries, with the following entries:
+ * "start_time" - The start time of the chapter in seconds
+ * "end_time" - The end time of the chapter in seconds
+ * "title" (optional, string)
+ heatmap: A list of dictionaries, with the following entries:
+ * "start_time" - The start time of the data point in seconds
+ * "end_time" - The end time of the data point in seconds
+ * "value" - The normalized value of the data point (float between 0 and 1)
+ playable_in_embed: Whether this video is allowed to play in embedded
+ players on other sites. Can be True (=always allowed),
+ False (=never allowed), None (=unknown), or a string
+ specifying the criteria for embedability; e.g. 'whitelist'
+ availability: Under what condition the video is available. One of
+ 'private', 'premium_only', 'subscriber_only', 'needs_auth',
+ 'unlisted' or 'public'. Use 'InfoExtractor._availability'
+ to set it
+ media_type: The type of media as classified by the site, e.g. "episode", "clip", "trailer"
+ _old_archive_ids: A list of old archive ids needed for backward compatibility
+ _format_sort_fields: A list of fields to use for sorting formats
+ __post_extractor: A function to be called just before the metadata is
+ written to either disk, logger or console. The function
+ must return a dict which will be added to the info_dict.
+ This is usefull for additional information that is
+ time-consuming to extract. Note that the fields thus
+ extracted will not be available to output template and
+ match_filter. So, only "comments" and "comment_count" are
+ currently allowed to be extracted via this method.
+
+ The following fields should only be used when the video belongs to some logical
+ chapter or section:
+
+ chapter: Name or title of the chapter the video belongs to.
+ chapter_number: Number of the chapter the video belongs to, as an integer.
+ chapter_id: Id of the chapter the video belongs to, as a unicode string.
+
+ The following fields should only be used when the video is an episode of some
+ series, programme or podcast:
+
+ series: Title of the series or programme the video episode belongs to.
+ series_id: Id of the series or programme the video episode belongs to, as a unicode string.
+ season: Title of the season the video episode belongs to.
+ season_number: Number of the season the video episode belongs to, as an integer.
+ season_id: Id of the season the video episode belongs to, as a unicode string.
+ episode: Title of the video episode. Unlike mandatory video title field,
+ this field should denote the exact title of the video episode
+ without any kind of decoration.
+ episode_number: Number of the video episode within a season, as an integer.
+ episode_id: Id of the video episode, as a unicode string.
+
+ The following fields should only be used when the media is a track or a part of
+ a music album:
+
+ track: Title of the track.
+ track_number: Number of the track within an album or a disc, as an integer.
+ track_id: Id of the track (useful in case of custom indexing, e.g. 6.iii),
+ as a unicode string.
+ artists: List of artists of the track.
+ composers: List of composers of the piece.
+ genres: List of genres of the track.
+ album: Title of the album the track belongs to.
+ album_type: Type of the album (e.g. "Demo", "Full-length", "Split", "Compilation", etc).
+ album_artists: List of all artists appeared on the album.
+ E.g. ["Ash Borer", "Fell Voices"] or ["Various Artists"].
+ Useful for splits and compilations.
+ disc_number: Number of the disc or other physical medium the track belongs to,
+ as an integer.
+
+ The following fields should only be set for clips that should be cut from the original video:
+
+ section_start: Start time of the section in seconds
+ section_end: End time of the section in seconds
+
+ The following fields should only be set for storyboards:
+ rows: Number of rows in each storyboard fragment, as an integer
+ columns: Number of columns in each storyboard fragment, as an integer
+
+ The following fields are deprecated and should not be set by new code:
+ composer: Use "composers" instead.
+ Composer(s) of the piece, comma-separated.
+ artist: Use "artists" instead.
+ Artist(s) of the track, comma-separated.
+ genre: Use "genres" instead.
+ Genre(s) of the track, comma-separated.
+ album_artist: Use "album_artists" instead.
+ All artists appeared on the album, comma-separated.
+ creator: Use "creators" instead.
+ The creator of the video.
+
+ Unless mentioned otherwise, the fields should be Unicode strings.
+
+ Unless mentioned otherwise, None is equivalent to absence of information.
+
+
+ _type "playlist" indicates multiple videos.
+ There must be a key "entries", which is a list, an iterable, or a PagedList
+ object, each element of which is a valid dictionary by this specification.
+
+ Additionally, playlists can have "id", "title", and any other relevant
+ attributes with the same semantics as videos (see above).
+
+ It can also have the following optional fields:
+
+ playlist_count: The total number of videos in a playlist. If not given,
+ YoutubeDL tries to calculate it from "entries"
+
+
+ _type "multi_video" indicates that there are multiple videos that
+ form a single show, for examples multiple acts of an opera or TV episode.
+ It must have an entries key like a playlist and contain all the keys
+ required for a video at the same time.
+
+
+ _type "url" indicates that the video must be extracted from another
+ location, possibly by a different extractor. Its only required key is:
+ "url" - the next URL to extract.
+ The key "ie_key" can be set to the class name (minus the trailing "IE",
+ e.g. "Youtube") if the extractor class is known in advance.
+ Additionally, the dictionary may have any properties of the resolved entity
+ known in advance, for example "title" if the title of the referred video is
+ known ahead of time.
+
+
+ _type "url_transparent" entities have the same specification as "url", but
+ indicate that the given additional information is more precise than the one
+ associated with the resolved URL.
+ This is useful when a site employs a video service that hosts the video and
+ its technical metadata, but that video service does not embed a useful
+ title, description etc.
+
+
+ Subclasses of this should also be added to the list of extractors and
+ should define _VALID_URL as a regexp or a Sequence of regexps, and
+ re-define the _real_extract() and (optionally) _real_initialize() methods.
+
+ Subclasses may also override suitable() if necessary, but ensure the function
+ signature is preserved and that this function imports everything it needs
+ (except other extractors), so that lazy_extractors works correctly.
+
+ Subclasses can define a list of _EMBED_REGEX, which will be searched for in
+ the HTML of Generic webpages. It may also override _extract_embed_urls
+ or _extract_from_webpage as necessary. While these are normally classmethods,
+ _extract_from_webpage is allowed to be an instance method.
+
+ _extract_from_webpage may raise self.StopExtraction() to stop further
+ processing of the webpage and obtain exclusive rights to it. This is useful
+ when the extractor cannot reliably be matched using just the URL,
+ e.g. invidious/peertube instances
+
+ Embed-only extractors can be defined by setting _VALID_URL = False.
+
+ To support username + password (or netrc) login, the extractor must define a
+ _NETRC_MACHINE and re-define _perform_login(username, password) and
+ (optionally) _initialize_pre_login() methods. The _perform_login method will
+ be called between _initialize_pre_login and _real_initialize if credentials
+ are passed by the user. In cases where it is necessary to have the login
+ process as part of the extraction rather than initialization, _perform_login
+ can be left undefined.
+
+ _GEO_BYPASS attribute may be set to False in order to disable
+ geo restriction bypass mechanisms for a particular extractor.
+ Though it won't disable explicit geo restriction bypass based on
+ country code provided with geo_bypass_country.
+
+ _GEO_COUNTRIES attribute may contain a list of presumably geo unrestricted
+ countries for this extractor. One of these countries will be used by
+ geo restriction bypass mechanism right away in order to bypass
+ geo restriction, of course, if the mechanism is not disabled.
+
+ _GEO_IP_BLOCKS attribute may contain a list of presumably geo unrestricted
+ IP blocks in CIDR notation for this extractor. One of these IP blocks
+ will be used by geo restriction bypass mechanism similarly
+ to _GEO_COUNTRIES.
+
+ The _ENABLED attribute should be set to False for IEs that
+ are disabled by default and must be explicitly enabled.
+
+ The _WORKING attribute should be set to False for broken IEs
+ in order to warn the users and skip the tests.
+ """
+
+ _ready = False
+ _downloader = None
+ _x_forwarded_for_ip = None
+ _GEO_BYPASS = True
+ _GEO_COUNTRIES = None
+ _GEO_IP_BLOCKS = None
+ _WORKING = True
+ _ENABLED = True
+ _NETRC_MACHINE = None
+ IE_DESC = None
+ SEARCH_KEY = None
+ _VALID_URL = None
+ _EMBED_REGEX = []
+
+ def _login_hint(self, method=NO_DEFAULT, netrc=None):
+ password_hint = f'--username and --password, --netrc-cmd, or --netrc ({netrc or self._NETRC_MACHINE}) to provide account credentials'
+ return {
+ None: '',
+ 'any': f'Use --cookies, --cookies-from-browser, {password_hint}',
+ 'password': f'Use {password_hint}',
+ 'cookies': (
+ 'Use --cookies-from-browser or --cookies for the authentication. '
+ 'See https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp for how to manually pass cookies'),
+ }[method if method is not NO_DEFAULT else 'any' if self.supports_login() else 'cookies']
+
+ def __init__(self, downloader=None):
+ """Constructor. Receives an optional downloader (a YoutubeDL instance).
+ If a downloader is not passed during initialization,
+ it must be set using "set_downloader()" before "extract()" is called"""
+ self._ready = False
+ self._x_forwarded_for_ip = None
+ self._printed_messages = set()
+ self.set_downloader(downloader)
+
+ @classmethod
+ def _match_valid_url(cls, url):
+ if cls._VALID_URL is False:
+ return None
+ # This does not use has/getattr intentionally - we want to know whether
+ # we have cached the regexp for *this* class, whereas getattr would also
+ # match the superclass
+ if '_VALID_URL_RE' not in cls.__dict__:
+ cls._VALID_URL_RE = tuple(map(re.compile, variadic(cls._VALID_URL)))
+ return next(filter(None, (regex.match(url) for regex in cls._VALID_URL_RE)), None)
+
+ @classmethod
+ def suitable(cls, url):
+ """Receives a URL and returns True if suitable for this IE."""
+ # This function must import everything it needs (except other extractors),
+ # so that lazy_extractors works correctly
+ return cls._match_valid_url(url) is not None
+
+ @classmethod
+ def _match_id(cls, url):
+ return cls._match_valid_url(url).group('id')
+
+ @classmethod
+ def get_temp_id(cls, url):
+ try:
+ return cls._match_id(url)
+ except (IndexError, AttributeError):
+ return None
+
+ @classmethod
+ def working(cls):
+ """Getter method for _WORKING."""
+ return cls._WORKING
+
+ @classmethod
+ def supports_login(cls):
+ return bool(cls._NETRC_MACHINE)
+
+ def initialize(self):
+ """Initializes an instance (authentication, etc)."""
+ self._printed_messages = set()
+ self._initialize_geo_bypass({
+ 'countries': self._GEO_COUNTRIES,
+ 'ip_blocks': self._GEO_IP_BLOCKS,
+ })
+ if not self._ready:
+ self._initialize_pre_login()
+ if self.supports_login():
+ username, password = self._get_login_info()
+ if username:
+ self._perform_login(username, password)
+ elif self.get_param('username') and False not in (self.IE_DESC, self._NETRC_MACHINE):
+ self.report_warning(f'Login with password is not supported for this website. {self._login_hint("cookies")}')
+ self._real_initialize()
+ self._ready = True
+
+ def _initialize_geo_bypass(self, geo_bypass_context):
+ """
+ Initialize geo restriction bypass mechanism.
+
+ This method is used to initialize geo bypass mechanism based on faking
+ X-Forwarded-For HTTP header. A random country from provided country list
+ is selected and a random IP belonging to this country is generated. This
+ IP will be passed as X-Forwarded-For HTTP header in all subsequent
+ HTTP requests.
+
+ This method will be used for initial geo bypass mechanism initialization
+ during the instance initialization with _GEO_COUNTRIES and
+ _GEO_IP_BLOCKS.
+
+ You may also manually call it from extractor's code if geo bypass
+ information is not available beforehand (e.g. obtained during
+ extraction) or due to some other reason. In this case you should pass
+ this information in geo bypass context passed as first argument. It may
+ contain following fields:
+
+ countries: List of geo unrestricted countries (similar
+ to _GEO_COUNTRIES)
+ ip_blocks: List of geo unrestricted IP blocks in CIDR notation
+ (similar to _GEO_IP_BLOCKS)
+
+ """
+ if not self._x_forwarded_for_ip:
+
+ # Geo bypass mechanism is explicitly disabled by user
+ if not self.get_param('geo_bypass', True):
+ return
+
+ if not geo_bypass_context:
+ geo_bypass_context = {}
+
+ # Backward compatibility: previously _initialize_geo_bypass
+ # expected a list of countries, some 3rd party code may still use
+ # it this way
+ if isinstance(geo_bypass_context, (list, tuple)):
+ geo_bypass_context = {
+ 'countries': geo_bypass_context,
+ }
+
+ # The whole point of geo bypass mechanism is to fake IP
+ # as X-Forwarded-For HTTP header based on some IP block or
+ # country code.
+
+ # Path 1: bypassing based on IP block in CIDR notation
+
+ # Explicit IP block specified by user, use it right away
+ # regardless of whether extractor is geo bypassable or not
+ ip_block = self.get_param('geo_bypass_ip_block', None)
+
+ # Otherwise use random IP block from geo bypass context but only
+ # if extractor is known as geo bypassable
+ if not ip_block:
+ ip_blocks = geo_bypass_context.get('ip_blocks')
+ if self._GEO_BYPASS and ip_blocks:
+ ip_block = random.choice(ip_blocks)
+
+ if ip_block:
+ self._x_forwarded_for_ip = GeoUtils.random_ipv4(ip_block)
+ self.write_debug(f'Using fake IP {self._x_forwarded_for_ip} as X-Forwarded-For')
+ return
+
+ # Path 2: bypassing based on country code
+
+ # Explicit country code specified by user, use it right away
+ # regardless of whether extractor is geo bypassable or not
+ country = self.get_param('geo_bypass_country', None)
+
+ # Otherwise use random country code from geo bypass context but
+ # only if extractor is known as geo bypassable
+ if not country:
+ countries = geo_bypass_context.get('countries')
+ if self._GEO_BYPASS and countries:
+ country = random.choice(countries)
+
+ if country:
+ self._x_forwarded_for_ip = GeoUtils.random_ipv4(country)
+ self._downloader.write_debug(
+ f'Using fake IP {self._x_forwarded_for_ip} ({country.upper()}) as X-Forwarded-For')
+
+ def extract(self, url):
+ """Extracts URL information and returns it in list of dicts."""
+ try:
+ for _ in range(2):
+ try:
+ self.initialize()
+ self.to_screen('Extracting URL: %s' % (
+ url if self.get_param('verbose') else truncate_string(url, 100, 20)))
+ ie_result = self._real_extract(url)
+ if ie_result is None:
+ return None
+ if self._x_forwarded_for_ip:
+ ie_result['__x_forwarded_for_ip'] = self._x_forwarded_for_ip
+ subtitles = ie_result.get('subtitles') or {}
+ if 'no-live-chat' in self.get_param('compat_opts'):
+ for lang in ('live_chat', 'comments', 'danmaku'):
+ subtitles.pop(lang, None)
+ return ie_result
+ except GeoRestrictedError as e:
+ if self.__maybe_fake_ip_and_retry(e.countries):
+ continue
+ raise
+ except UnsupportedError:
+ raise
+ except ExtractorError as e:
+ e.video_id = e.video_id or self.get_temp_id(url)
+ e.ie = e.ie or self.IE_NAME
+ e.traceback = e.traceback or sys.exc_info()[2]
+ raise
+ except IncompleteRead as e:
+ raise ExtractorError('A network error has occurred.', cause=e, expected=True, video_id=self.get_temp_id(url))
+ except (KeyError, StopIteration) as e:
+ raise ExtractorError('An extractor error has occurred.', cause=e, video_id=self.get_temp_id(url))
+
+ def __maybe_fake_ip_and_retry(self, countries):
+ if (not self.get_param('geo_bypass_country', None)
+ and self._GEO_BYPASS
+ and self.get_param('geo_bypass', True)
+ and not self._x_forwarded_for_ip
+ and countries):
+ country_code = random.choice(countries)
+ self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
+ if self._x_forwarded_for_ip:
+ self.report_warning(
+ 'Video is geo restricted. Retrying extraction with fake IP %s (%s) as X-Forwarded-For.'
+ % (self._x_forwarded_for_ip, country_code.upper()))
+ return True
+ return False
+
+ def set_downloader(self, downloader):
+ """Sets a YoutubeDL instance as the downloader for this IE."""
+ self._downloader = downloader
+
+ @property
+ def cache(self):
+ return self._downloader.cache
+
+ @property
+ def cookiejar(self):
+ return self._downloader.cookiejar
+
+ def _initialize_pre_login(self):
+ """ Initialization before login. Redefine in subclasses."""
+ pass
+
+ def _perform_login(self, username, password):
+ """ Login with username and password. Redefine in subclasses."""
+ pass
+
+ def _real_initialize(self):
+ """Real initialization process. Redefine in subclasses."""
+ pass
+
+ def _real_extract(self, url):
+ """Real extraction process. Redefine in subclasses."""
+ raise NotImplementedError('This method must be implemented by subclasses')
+
+ @classmethod
+ def ie_key(cls):
+ """A string for getting the InfoExtractor with get_info_extractor"""
+ return cls.__name__[:-2]
+
+ @classproperty
+ def IE_NAME(cls):
+ return cls.__name__[:-2]
+
+ @staticmethod
+ def __can_accept_status_code(err, expected_status):
+ assert isinstance(err, HTTPError)
+ if expected_status is None:
+ return False
+ elif callable(expected_status):
+ return expected_status(err.status) is True
+ else:
+ return err.status in variadic(expected_status)
+
+ def _create_request(self, url_or_request, data=None, headers=None, query=None):
+ if isinstance(url_or_request, urllib.request.Request):
+ self._downloader.deprecation_warning(
+ 'Passing a urllib.request.Request to _create_request() is deprecated. '
+ 'Use yt_dlp.networking.common.Request instead.')
+ url_or_request = urllib_req_to_req(url_or_request)
+ elif not isinstance(url_or_request, Request):
+ url_or_request = Request(url_or_request)
+
+ url_or_request.update(data=data, headers=headers, query=query)
+ return url_or_request
+
+ def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers=None, query=None, expected_status=None):
+ """
+ Return the response handle.
+
+ See _download_webpage docstring for arguments specification.
+ """
+ if not self._downloader._first_webpage_request:
+ sleep_interval = self.get_param('sleep_interval_requests') or 0
+ if sleep_interval > 0:
+ self.to_screen('Sleeping %s seconds ...' % sleep_interval)
+ time.sleep(sleep_interval)
+ else:
+ self._downloader._first_webpage_request = False
+
+ if note is None:
+ self.report_download_webpage(video_id)
+ elif note is not False:
+ if video_id is None:
+ self.to_screen(str(note))
+ else:
+ self.to_screen(f'{video_id}: {note}')
+
+ # Some sites check X-Forwarded-For HTTP header in order to figure out
+ # the origin of the client behind proxy. This allows bypassing geo
+ # restriction by faking this header's value to IP that belongs to some
+ # geo unrestricted country. We will do so once we encounter any
+ # geo restriction error.
+ if self._x_forwarded_for_ip:
+ headers = (headers or {}).copy()
+ headers.setdefault('X-Forwarded-For', self._x_forwarded_for_ip)
+
+ try:
+ return self._downloader.urlopen(self._create_request(url_or_request, data, headers, query))
+ except network_exceptions as err:
+ if isinstance(err, HTTPError):
+ if self.__can_accept_status_code(err, expected_status):
+ return err.response
+
+ if errnote is False:
+ return False
+ if errnote is None:
+ errnote = 'Unable to download webpage'
+
+ errmsg = f'{errnote}: {error_to_compat_str(err)}'
+ if fatal:
+ raise ExtractorError(errmsg, cause=err)
+ else:
+ self.report_warning(errmsg)
+ return False
+
+ def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True,
+ encoding=None, data=None, headers={}, query={}, expected_status=None):
+ """
+ Return a tuple (page content as string, URL handle).
+
+ Arguments:
+ url_or_request -- plain text URL as a string or
+ a urllib.request.Request object
+ video_id -- Video/playlist/item identifier (string)
+
+ Keyword arguments:
+ note -- note printed before downloading (string)
+ errnote -- note printed in case of an error (string)
+ fatal -- flag denoting whether error should be considered fatal,
+ i.e. whether it should cause ExtractionError to be raised,
+ otherwise a warning will be reported and extraction continued
+ encoding -- encoding for a page content decoding, guessed automatically
+ when not explicitly specified
+ data -- POST data (bytes)
+ headers -- HTTP headers (dict)
+ query -- URL query (dict)
+ expected_status -- allows to accept failed HTTP requests (non 2xx
+ status code) by explicitly specifying a set of accepted status
+ codes. Can be any of the following entities:
+ - an integer type specifying an exact failed status code to
+ accept
+ - a list or a tuple of integer types specifying a list of
+ failed status codes to accept
+ - a callable accepting an actual failed status code and
+ returning True if it should be accepted
+ Note that this argument does not affect success status codes (2xx)
+ which are always accepted.
+ """
+
+ # Strip hashes from the URL (#1038)
+ if isinstance(url_or_request, str):
+ url_or_request = url_or_request.partition('#')[0]
+
+ urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query, expected_status=expected_status)
+ if urlh is False:
+ assert not fatal
+ return False
+ content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal, encoding=encoding)
+ return (content, urlh)
+
+ @staticmethod
+ def _guess_encoding_from_content(content_type, webpage_bytes):
+ m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
+ if m:
+ encoding = m.group(1)
+ else:
+ m = re.search(br'<meta[^>]+charset=[\'"]?([^\'")]+)[ /\'">]',
+ webpage_bytes[:1024])
+ if m:
+ encoding = m.group(1).decode('ascii')
+ elif webpage_bytes.startswith(b'\xff\xfe'):
+ encoding = 'utf-16'
+ else:
+ encoding = 'utf-8'
+
+ return encoding
+
+ def __check_blocked(self, content):
+ first_block = content[:512]
+ if ('<title>Access to this site is blocked</title>' in content
+ and 'Websense' in first_block):
+ msg = 'Access to this webpage has been blocked by Websense filtering software in your network.'
+ blocked_iframe = self._html_search_regex(
+ r'<iframe src="([^"]+)"', content,
+ 'Websense information URL', default=None)
+ if blocked_iframe:
+ msg += ' Visit %s for more details' % blocked_iframe
+ raise ExtractorError(msg, expected=True)
+ if '<title>The URL you requested has been blocked</title>' in first_block:
+ msg = (
+ 'Access to this webpage has been blocked by Indian censorship. '
+ 'Use a VPN or proxy server (with --proxy) to route around it.')
+ block_msg = self._html_search_regex(
+ r'</h1><p>(.*?)</p>',
+ content, 'block message', default=None)
+ if block_msg:
+ msg += ' (Message: "%s")' % block_msg.replace('\n', ' ')
+ raise ExtractorError(msg, expected=True)
+ if ('<title>TTK :: Доступ к ресурсу ограничен</title>' in content
+ and 'blocklist.rkn.gov.ru' in content):
+ raise ExtractorError(
+ 'Access to this webpage has been blocked by decision of the Russian government. '
+ 'Visit http://blocklist.rkn.gov.ru/ for a block reason.',
+ expected=True)
+
+ def _request_dump_filename(self, url, video_id):
+ basen = f'{video_id}_{url}'
+ trim_length = self.get_param('trim_file_name') or 240
+ if len(basen) > trim_length:
+ h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
+ basen = basen[:trim_length - len(h)] + h
+ filename = sanitize_filename(f'{basen}.dump', restricted=True)
+ # Working around MAX_PATH limitation on Windows (see
+ # http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx)
+ if compat_os_name == 'nt':
+ absfilepath = os.path.abspath(filename)
+ if len(absfilepath) > 259:
+ filename = fR'\\?\{absfilepath}'
+ return filename
+
+ def __decode_webpage(self, webpage_bytes, encoding, headers):
+ if not encoding:
+ encoding = self._guess_encoding_from_content(headers.get('Content-Type', ''), webpage_bytes)
+ try:
+ return webpage_bytes.decode(encoding, 'replace')
+ except LookupError:
+ return webpage_bytes.decode('utf-8', 'replace')
+
+ def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None, encoding=None):
+ webpage_bytes = urlh.read()
+ if prefix is not None:
+ webpage_bytes = prefix + webpage_bytes
+ if self.get_param('dump_intermediate_pages', False):
+ self.to_screen('Dumping request to ' + urlh.url)
+ dump = base64.b64encode(webpage_bytes).decode('ascii')
+ self._downloader.to_screen(dump)
+ if self.get_param('write_pages'):
+ filename = self._request_dump_filename(urlh.url, video_id)
+ self.to_screen(f'Saving request to {filename}')
+ with open(filename, 'wb') as outf:
+ outf.write(webpage_bytes)
+
+ content = self.__decode_webpage(webpage_bytes, encoding, urlh.headers)
+ self.__check_blocked(content)
+
+ return content
+
+ def __print_error(self, errnote, fatal, video_id, err):
+ if fatal:
+ raise ExtractorError(f'{video_id}: {errnote}', cause=err)
+ elif errnote:
+ self.report_warning(f'{video_id}: {errnote}: {err}')
+
+ def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True, errnote=None):
+ if transform_source:
+ xml_string = transform_source(xml_string)
+ try:
+ return compat_etree_fromstring(xml_string.encode('utf-8'))
+ except xml.etree.ElementTree.ParseError as ve:
+ self.__print_error('Failed to parse XML' if errnote is None else errnote, fatal, video_id, ve)
+
+ def _parse_json(self, json_string, video_id, transform_source=None, fatal=True, errnote=None, **parser_kwargs):
+ try:
+ return json.loads(
+ json_string, cls=LenientJSONDecoder, strict=False, transform_source=transform_source, **parser_kwargs)
+ except ValueError as ve:
+ self.__print_error('Failed to parse JSON' if errnote is None else errnote, fatal, video_id, ve)
+
+ def _parse_socket_response_as_json(self, data, *args, **kwargs):
+ return self._parse_json(data[data.find('{'):data.rfind('}') + 1], *args, **kwargs)
+
+ def __create_download_methods(name, parser, note, errnote, return_value):
+
+ def parse(ie, content, *args, errnote=errnote, **kwargs):
+ if parser is None:
+ return content
+ if errnote is False:
+ kwargs['errnote'] = errnote
+ # parser is fetched by name so subclasses can override it
+ return getattr(ie, parser)(content, *args, **kwargs)
+
+ def download_handle(self, url_or_request, video_id, note=note, errnote=errnote, transform_source=None,
+ fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
+ res = self._download_webpage_handle(
+ url_or_request, video_id, note=note, errnote=errnote, fatal=fatal, encoding=encoding,
+ data=data, headers=headers, query=query, expected_status=expected_status)
+ if res is False:
+ return res
+ content, urlh = res
+ return parse(self, content, video_id, transform_source=transform_source, fatal=fatal, errnote=errnote), urlh
+
+ def download_content(self, url_or_request, video_id, note=note, errnote=errnote, transform_source=None,
+ fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
+ if self.get_param('load_pages'):
+ url_or_request = self._create_request(url_or_request, data, headers, query)
+ filename = self._request_dump_filename(url_or_request.url, video_id)
+ self.to_screen(f'Loading request from {filename}')
+ try:
+ with open(filename, 'rb') as dumpf:
+ webpage_bytes = dumpf.read()
+ except OSError as e:
+ self.report_warning(f'Unable to load request from disk: {e}')
+ else:
+ content = self.__decode_webpage(webpage_bytes, encoding, url_or_request.headers)
+ return parse(self, content, video_id, transform_source=transform_source, fatal=fatal, errnote=errnote)
+ kwargs = {
+ 'note': note,
+ 'errnote': errnote,
+ 'transform_source': transform_source,
+ 'fatal': fatal,
+ 'encoding': encoding,
+ 'data': data,
+ 'headers': headers,
+ 'query': query,
+ 'expected_status': expected_status,
+ }
+ if parser is None:
+ kwargs.pop('transform_source')
+ # The method is fetched by name so subclasses can override _download_..._handle
+ res = getattr(self, download_handle.__name__)(url_or_request, video_id, **kwargs)
+ return res if res is False else res[0]
+
+ def impersonate(func, name, return_value):
+ func.__name__, func.__qualname__ = name, f'InfoExtractor.{name}'
+ func.__doc__ = f'''
+ @param transform_source Apply this transformation before parsing
+ @returns {return_value}
+
+ See _download_webpage_handle docstring for other arguments specification
+ '''
+
+ impersonate(download_handle, f'_download_{name}_handle', f'({return_value}, URL handle)')
+ impersonate(download_content, f'_download_{name}', f'{return_value}')
+ return download_handle, download_content
+
+ _download_xml_handle, _download_xml = __create_download_methods(
+ 'xml', '_parse_xml', 'Downloading XML', 'Unable to download XML', 'xml as an xml.etree.ElementTree.Element')
+ _download_json_handle, _download_json = __create_download_methods(
+ 'json', '_parse_json', 'Downloading JSON metadata', 'Unable to download JSON metadata', 'JSON object as a dict')
+ _download_socket_json_handle, _download_socket_json = __create_download_methods(
+ 'socket_json', '_parse_socket_response_as_json', 'Polling socket', 'Unable to poll socket', 'JSON object as a dict')
+ __download_webpage = __create_download_methods('webpage', None, None, None, 'data of the page as a string')[1]
+
+ def _download_webpage(
+ self, url_or_request, video_id, note=None, errnote=None,
+ fatal=True, tries=1, timeout=NO_DEFAULT, *args, **kwargs):
+ """
+ Return the data of the page as a string.
+
+ Keyword arguments:
+ tries -- number of tries
+ timeout -- sleep interval between tries
+
+ See _download_webpage_handle docstring for other arguments specification.
+ """
+
+ R''' # NB: These are unused; should they be deprecated?
+ if tries != 1:
+ self._downloader.deprecation_warning('tries argument is deprecated in InfoExtractor._download_webpage')
+ if timeout is NO_DEFAULT:
+ timeout = 5
+ else:
+ self._downloader.deprecation_warning('timeout argument is deprecated in InfoExtractor._download_webpage')
+ '''
+
+ try_count = 0
+ while True:
+ try:
+ return self.__download_webpage(url_or_request, video_id, note, errnote, None, fatal, *args, **kwargs)
+ except IncompleteRead as e:
+ try_count += 1
+ if try_count >= tries:
+ raise e
+ self._sleep(timeout, video_id)
+
+ def report_warning(self, msg, video_id=None, *args, only_once=False, **kwargs):
+ idstr = format_field(video_id, None, '%s: ')
+ msg = f'[{self.IE_NAME}] {idstr}{msg}'
+ if only_once:
+ if f'WARNING: {msg}' in self._printed_messages:
+ return
+ self._printed_messages.add(f'WARNING: {msg}')
+ self._downloader.report_warning(msg, *args, **kwargs)
+
+ def to_screen(self, msg, *args, **kwargs):
+ """Print msg to screen, prefixing it with '[ie_name]'"""
+ self._downloader.to_screen(f'[{self.IE_NAME}] {msg}', *args, **kwargs)
+
+ def write_debug(self, msg, *args, **kwargs):
+ self._downloader.write_debug(f'[{self.IE_NAME}] {msg}', *args, **kwargs)
+
+ def get_param(self, name, default=None, *args, **kwargs):
+ if self._downloader:
+ return self._downloader.params.get(name, default, *args, **kwargs)
+ return default
+
+ def report_drm(self, video_id, partial=NO_DEFAULT):
+ if partial is not NO_DEFAULT:
+ self._downloader.deprecation_warning('InfoExtractor.report_drm no longer accepts the argument partial')
+ self.raise_no_formats('This video is DRM protected', expected=True, video_id=video_id)
+
+ def report_extraction(self, id_or_name):
+ """Report information extraction."""
+ self.to_screen('%s: Extracting information' % id_or_name)
+
+ def report_download_webpage(self, video_id):
+ """Report webpage download."""
+ self.to_screen('%s: Downloading webpage' % video_id)
+
+ def report_age_confirmation(self):
+ """Report attempt to confirm age."""
+ self.to_screen('Confirming age')
+
+ def report_login(self):
+ """Report attempt to log in."""
+ self.to_screen('Logging in')
+
+ def raise_login_required(
+ self, msg='This video is only available for registered users',
+ metadata_available=False, method=NO_DEFAULT):
+ if metadata_available and (
+ self.get_param('ignore_no_formats_error') or self.get_param('wait_for_video')):
+ self.report_warning(msg)
+ return
+ msg += format_field(self._login_hint(method), None, '. %s')
+ raise ExtractorError(msg, expected=True)
+
+ def raise_geo_restricted(
+ self, msg='This video is not available from your location due to geo restriction',
+ countries=None, metadata_available=False):
+ if metadata_available and (
+ self.get_param('ignore_no_formats_error') or self.get_param('wait_for_video')):
+ self.report_warning(msg)
+ else:
+ raise GeoRestrictedError(msg, countries=countries)
+
+ def raise_no_formats(self, msg, expected=False, video_id=None):
+ if expected and (
+ self.get_param('ignore_no_formats_error') or self.get_param('wait_for_video')):
+ self.report_warning(msg, video_id)
+ elif isinstance(msg, ExtractorError):
+ raise msg
+ else:
+ raise ExtractorError(msg, expected=expected, video_id=video_id)
+
+ # Methods for following #608
+ @staticmethod
+ def url_result(url, ie=None, video_id=None, video_title=None, *, url_transparent=False, **kwargs):
+ """Returns a URL that points to a page that should be processed"""
+ if ie is not None:
+ kwargs['ie_key'] = ie if isinstance(ie, str) else ie.ie_key()
+ if video_id is not None:
+ kwargs['id'] = video_id
+ if video_title is not None:
+ kwargs['title'] = video_title
+ return {
+ **kwargs,
+ '_type': 'url_transparent' if url_transparent else 'url',
+ 'url': url,
+ }
+
+ @classmethod
+ def playlist_from_matches(cls, matches, playlist_id=None, playlist_title=None,
+ getter=IDENTITY, ie=None, video_kwargs=None, **kwargs):
+ return cls.playlist_result(
+ (cls.url_result(m, ie, **(video_kwargs or {})) for m in orderedSet(map(getter, matches), lazy=True)),
+ playlist_id, playlist_title, **kwargs)
+
+ @staticmethod
+ def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None, *, multi_video=False, **kwargs):
+ """Returns a playlist"""
+ if playlist_id:
+ kwargs['id'] = playlist_id
+ if playlist_title:
+ kwargs['title'] = playlist_title
+ if playlist_description is not None:
+ kwargs['description'] = playlist_description
+ return {
+ **kwargs,
+ '_type': 'multi_video' if multi_video else 'playlist',
+ 'entries': entries,
+ }
+
+ def _search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
+ """
+ Perform a regex search on the given string, using a single or a list of
+ patterns returning the first matching group.
+ In case of failure return a default value or raise a WARNING or a
+ RegexNotFoundError, depending on fatal, specifying the field name.
+ """
+ if string is None:
+ mobj = None
+ elif isinstance(pattern, (str, re.Pattern)):
+ mobj = re.search(pattern, string, flags)
+ else:
+ for p in pattern:
+ mobj = re.search(p, string, flags)
+ if mobj:
+ break
+
+ _name = self._downloader._format_err(name, self._downloader.Styles.EMPHASIS)
+
+ if mobj:
+ if group is None:
+ # return the first matching group
+ return next(g for g in mobj.groups() if g is not None)
+ elif isinstance(group, (list, tuple)):
+ return tuple(mobj.group(g) for g in group)
+ else:
+ return mobj.group(group)
+ elif default is not NO_DEFAULT:
+ return default
+ elif fatal:
+ raise RegexNotFoundError('Unable to extract %s' % _name)
+ else:
+ self.report_warning('unable to extract %s' % _name + bug_reports_message())
+ return None
+
+ def _search_json(self, start_pattern, string, name, video_id, *, end_pattern='',
+ contains_pattern=r'{(?s:.+)}', fatal=True, default=NO_DEFAULT, **kwargs):
+ """Searches string for the JSON object specified by start_pattern"""
+ # NB: end_pattern is only used to reduce the size of the initial match
+ if default is NO_DEFAULT:
+ default, has_default = {}, False
+ else:
+ fatal, has_default = False, True
+
+ json_string = self._search_regex(
+ rf'(?:{start_pattern})\s*(?P<json>{contains_pattern})\s*(?:{end_pattern})',
+ string, name, group='json', fatal=fatal, default=None if has_default else NO_DEFAULT)
+ if not json_string:
+ return default
+
+ _name = self._downloader._format_err(name, self._downloader.Styles.EMPHASIS)
+ try:
+ return self._parse_json(json_string, video_id, ignore_extra=True, **kwargs)
+ except ExtractorError as e:
+ if fatal:
+ raise ExtractorError(
+ f'Unable to extract {_name} - Failed to parse JSON', cause=e.cause, video_id=video_id)
+ elif not has_default:
+ self.report_warning(
+ f'Unable to extract {_name} - Failed to parse JSON: {e}', video_id=video_id)
+ return default
+
+ def _html_search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
+ """
+ Like _search_regex, but strips HTML tags and unescapes entities.
+ """
+ res = self._search_regex(pattern, string, name, default, fatal, flags, group)
+ if isinstance(res, tuple):
+ return tuple(map(clean_html, res))
+ return clean_html(res)
+
+ def _get_netrc_login_info(self, netrc_machine=None):
+ netrc_machine = netrc_machine or self._NETRC_MACHINE
+
+ cmd = self.get_param('netrc_cmd')
+ if cmd:
+ cmd = cmd.replace('{}', netrc_machine)
+ self.to_screen(f'Executing command: {cmd}')
+ stdout, _, ret = Popen.run(cmd, text=True, shell=True, stdout=subprocess.PIPE)
+ if ret != 0:
+ raise OSError(f'Command returned error code {ret}')
+ info = netrc_from_content(stdout).authenticators(netrc_machine)
+
+ elif self.get_param('usenetrc', False):
+ netrc_file = compat_expanduser(self.get_param('netrc_location') or '~')
+ if os.path.isdir(netrc_file):
+ netrc_file = os.path.join(netrc_file, '.netrc')
+ info = netrc.netrc(netrc_file).authenticators(netrc_machine)
+
+ else:
+ return None, None
+ if not info:
+ self.to_screen(f'No authenticators for {netrc_machine}')
+ return None, None
+
+ self.write_debug(f'Using netrc for {netrc_machine} authentication')
+ return info[0], info[2]
+
+ def _get_login_info(self, username_option='username', password_option='password', netrc_machine=None):
+ """
+ Get the login info as (username, password)
+ First look for the manually specified credentials using username_option
+ and password_option as keys in params dictionary. If no such credentials
+ are available try the netrc_cmd if it is defined or look in the
+ netrc file using the netrc_machine or _NETRC_MACHINE value.
+ If there's no info available, return (None, None)
+ """
+
+ username = self.get_param(username_option)
+ if username is not None:
+ password = self.get_param(password_option)
+ else:
+ try:
+ username, password = self._get_netrc_login_info(netrc_machine)
+ except (OSError, netrc.NetrcParseError) as err:
+ self.report_warning(f'Failed to parse .netrc: {err}')
+ return None, None
+ return username, password
+
+ def _get_tfa_info(self, note='two-factor verification code'):
+ """
+ Get the two-factor authentication info
+ TODO - asking the user will be required for sms/phone verify
+ currently just uses the command line option
+ If there's no info available, return None
+ """
+
+ tfa = self.get_param('twofactor')
+ if tfa is not None:
+ return tfa
+
+ return getpass.getpass('Type %s and press [Return]: ' % note)
+
+ # Helper functions for extracting OpenGraph info
+ @staticmethod
+ def _og_regexes(prop):
+ content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?)(?=\s|/?>))'
+ property_re = (r'(?:name|property)=(?:\'og%(sep)s%(prop)s\'|"og%(sep)s%(prop)s"|\s*og%(sep)s%(prop)s\b)'
+ % {'prop': re.escape(prop), 'sep': '(?:&#x3A;|[:-])'})
+ template = r'<meta[^>]+?%s[^>]+?%s'
+ return [
+ template % (property_re, content_re),
+ template % (content_re, property_re),
+ ]
+
+ @staticmethod
+ def _meta_regex(prop):
+ return r'''(?isx)<meta
+ (?=[^>]+(?:itemprop|name|property|id|http-equiv)=(["\']?)%s\1)
+ [^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(prop)
+
+ def _og_search_property(self, prop, html, name=None, **kargs):
+ prop = variadic(prop)
+ if name is None:
+ name = 'OpenGraph %s' % prop[0]
+ og_regexes = []
+ for p in prop:
+ og_regexes.extend(self._og_regexes(p))
+ escaped = self._search_regex(og_regexes, html, name, flags=re.DOTALL, **kargs)
+ if escaped is None:
+ return None
+ return unescapeHTML(escaped)
+
+ def _og_search_thumbnail(self, html, **kargs):
+ return self._og_search_property('image', html, 'thumbnail URL', fatal=False, **kargs)
+
+ def _og_search_description(self, html, **kargs):
+ return self._og_search_property('description', html, fatal=False, **kargs)
+
+ def _og_search_title(self, html, *, fatal=False, **kargs):
+ return self._og_search_property('title', html, fatal=fatal, **kargs)
+
+ def _og_search_video_url(self, html, name='video url', secure=True, **kargs):
+ regexes = self._og_regexes('video') + self._og_regexes('video:url')
+ if secure:
+ regexes = self._og_regexes('video:secure_url') + regexes
+ return self._html_search_regex(regexes, html, name, **kargs)
+
+ def _og_search_url(self, html, **kargs):
+ return self._og_search_property('url', html, **kargs)
+
+ def _html_extract_title(self, html, name='title', *, fatal=False, **kwargs):
+ return self._html_search_regex(r'(?s)<title\b[^>]*>([^<]+)</title>', html, name, fatal=fatal, **kwargs)
+
+ def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs):
+ name = variadic(name)
+ if display_name is None:
+ display_name = name[0]
+ return self._html_search_regex(
+ [self._meta_regex(n) for n in name],
+ html, display_name, fatal=fatal, group='content', **kwargs)
+
+ def _dc_search_uploader(self, html):
+ return self._html_search_meta('dc.creator', html, 'uploader')
+
+ @staticmethod
+ def _rta_search(html):
+ # See http://www.rtalabel.org/index.php?content=howtofaq#single
+ if re.search(r'(?ix)<meta\s+name="rating"\s+'
+ r' content="RTA-5042-1996-1400-1577-RTA"',
+ html):
+ return 18
+
+ # And then there are the jokers who advertise that they use RTA, but actually don't.
+ AGE_LIMIT_MARKERS = [
+ r'Proudly Labeled <a href="http://www\.rtalabel\.org/" title="Restricted to Adults">RTA</a>',
+ r'>[^<]*you acknowledge you are at least (\d+) years old',
+ r'>\s*(?:18\s+U(?:\.S\.C\.|SC)\s+)?(?:§+\s*)?2257\b',
+ ]
+
+ age_limit = 0
+ for marker in AGE_LIMIT_MARKERS:
+ mobj = re.search(marker, html)
+ if mobj:
+ age_limit = max(age_limit, int(traverse_obj(mobj, 1, default=18)))
+ return age_limit
+
+ def _media_rating_search(self, html):
+ # See http://www.tjg-designs.com/WP/metadata-code-examples-adding-metadata-to-your-web-pages/
+ rating = self._html_search_meta('rating', html)
+
+ if not rating:
+ return None
+
+ RATING_TABLE = {
+ 'safe for kids': 0,
+ 'general': 8,
+ '14 years': 14,
+ 'mature': 17,
+ 'restricted': 19,
+ }
+ return RATING_TABLE.get(rating.lower())
+
+ def _family_friendly_search(self, html):
+ # See http://schema.org/VideoObject
+ family_friendly = self._html_search_meta(
+ 'isFamilyFriendly', html, default=None)
+
+ if not family_friendly:
+ return None
+
+ RATING_TABLE = {
+ '1': 0,
+ 'true': 0,
+ '0': 18,
+ 'false': 18,
+ }
+ return RATING_TABLE.get(family_friendly.lower())
+
+ def _twitter_search_player(self, html):
+ return self._html_search_meta('twitter:player', html,
+ 'twitter card player')
+
+ def _yield_json_ld(self, html, video_id, *, fatal=True, default=NO_DEFAULT):
+ """Yield all json ld objects in the html"""
+ if default is not NO_DEFAULT:
+ fatal = False
+ for mobj in re.finditer(JSON_LD_RE, html):
+ json_ld_item = self._parse_json(mobj.group('json_ld'), video_id, fatal=fatal)
+ for json_ld in variadic(json_ld_item):
+ if isinstance(json_ld, dict):
+ yield json_ld
+
+ def _search_json_ld(self, html, video_id, expected_type=None, *, fatal=True, default=NO_DEFAULT):
+ """Search for a video in any json ld in the html"""
+ if default is not NO_DEFAULT:
+ fatal = False
+ info = self._json_ld(
+ list(self._yield_json_ld(html, video_id, fatal=fatal, default=default)),
+ video_id, fatal=fatal, expected_type=expected_type)
+ if info:
+ return info
+ if default is not NO_DEFAULT:
+ return default
+ elif fatal:
+ raise RegexNotFoundError('Unable to extract JSON-LD')
+ else:
+ self.report_warning('unable to extract JSON-LD %s' % bug_reports_message())
+ return {}
+
+ def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None):
+ if isinstance(json_ld, str):
+ json_ld = self._parse_json(json_ld, video_id, fatal=fatal)
+ if not json_ld:
+ return {}
+ info = {}
+
+ INTERACTION_TYPE_MAP = {
+ 'CommentAction': 'comment',
+ 'AgreeAction': 'like',
+ 'DisagreeAction': 'dislike',
+ 'LikeAction': 'like',
+ 'DislikeAction': 'dislike',
+ 'ListenAction': 'view',
+ 'WatchAction': 'view',
+ 'ViewAction': 'view',
+ }
+
+ def is_type(e, *expected_types):
+ type = variadic(traverse_obj(e, '@type'))
+ return any(x in type for x in expected_types)
+
+ def extract_interaction_type(e):
+ interaction_type = e.get('interactionType')
+ if isinstance(interaction_type, dict):
+ interaction_type = interaction_type.get('@type')
+ return str_or_none(interaction_type)
+
+ def extract_interaction_statistic(e):
+ interaction_statistic = e.get('interactionStatistic')
+ if isinstance(interaction_statistic, dict):
+ interaction_statistic = [interaction_statistic]
+ if not isinstance(interaction_statistic, list):
+ return
+ for is_e in interaction_statistic:
+ if not is_type(is_e, 'InteractionCounter'):
+ continue
+ interaction_type = extract_interaction_type(is_e)
+ if not interaction_type:
+ continue
+ # For interaction count some sites provide string instead of
+ # an integer (as per spec) with non digit characters (e.g. ",")
+ # so extracting count with more relaxed str_to_int
+ interaction_count = str_to_int(is_e.get('userInteractionCount'))
+ if interaction_count is None:
+ continue
+ count_kind = INTERACTION_TYPE_MAP.get(interaction_type.split('/')[-1])
+ if not count_kind:
+ continue
+ count_key = '%s_count' % count_kind
+ if info.get(count_key) is not None:
+ continue
+ info[count_key] = interaction_count
+
+ def extract_chapter_information(e):
+ chapters = [{
+ 'title': part.get('name'),
+ 'start_time': part.get('startOffset'),
+ 'end_time': part.get('endOffset'),
+ } for part in variadic(e.get('hasPart') or []) if part.get('@type') == 'Clip']
+ for idx, (last_c, current_c, next_c) in enumerate(zip(
+ [{'end_time': 0}] + chapters, chapters, chapters[1:])):
+ current_c['end_time'] = current_c['end_time'] or next_c['start_time']
+ current_c['start_time'] = current_c['start_time'] or last_c['end_time']
+ if None in current_c.values():
+ self.report_warning(f'Chapter {idx} contains broken data. Not extracting chapters')
+ return
+ if chapters:
+ chapters[-1]['end_time'] = chapters[-1]['end_time'] or info['duration']
+ info['chapters'] = chapters
+
+ def extract_video_object(e):
+ author = e.get('author')
+ info.update({
+ 'url': url_or_none(e.get('contentUrl')),
+ 'ext': mimetype2ext(e.get('encodingFormat')),
+ 'title': unescapeHTML(e.get('name')),
+ 'description': unescapeHTML(e.get('description')),
+ 'thumbnails': [{'url': unescapeHTML(url)}
+ for url in variadic(traverse_obj(e, 'thumbnailUrl', 'thumbnailURL'))
+ if url_or_none(url)],
+ 'duration': parse_duration(e.get('duration')),
+ 'timestamp': unified_timestamp(e.get('uploadDate')),
+ # author can be an instance of 'Organization' or 'Person' types.
+ # both types can have 'name' property(inherited from 'Thing' type). [1]
+ # however some websites are using 'Text' type instead.
+ # 1. https://schema.org/VideoObject
+ 'uploader': author.get('name') if isinstance(author, dict) else author if isinstance(author, str) else None,
+ 'artist': traverse_obj(e, ('byArtist', 'name'), expected_type=str),
+ 'filesize': int_or_none(float_or_none(e.get('contentSize'))),
+ 'tbr': int_or_none(e.get('bitrate')),
+ 'width': int_or_none(e.get('width')),
+ 'height': int_or_none(e.get('height')),
+ 'view_count': int_or_none(e.get('interactionCount')),
+ 'tags': try_call(lambda: e.get('keywords').split(',')),
+ })
+ if is_type(e, 'AudioObject'):
+ info.update({
+ 'vcodec': 'none',
+ 'abr': int_or_none(e.get('bitrate')),
+ })
+ extract_interaction_statistic(e)
+ extract_chapter_information(e)
+
+ def traverse_json_ld(json_ld, at_top_level=True):
+ for e in variadic(json_ld):
+ if not isinstance(e, dict):
+ continue
+ if at_top_level and '@context' not in e:
+ continue
+ if at_top_level and set(e.keys()) == {'@context', '@graph'}:
+ traverse_json_ld(e['@graph'], at_top_level=False)
+ continue
+ if expected_type is not None and not is_type(e, expected_type):
+ continue
+ rating = traverse_obj(e, ('aggregateRating', 'ratingValue'), expected_type=float_or_none)
+ if rating is not None:
+ info['average_rating'] = rating
+ if is_type(e, 'TVEpisode', 'Episode'):
+ episode_name = unescapeHTML(e.get('name'))
+ info.update({
+ 'episode': episode_name,
+ 'episode_number': int_or_none(e.get('episodeNumber')),
+ 'description': unescapeHTML(e.get('description')),
+ })
+ if not info.get('title') and episode_name:
+ info['title'] = episode_name
+ part_of_season = e.get('partOfSeason')
+ if is_type(part_of_season, 'TVSeason', 'Season', 'CreativeWorkSeason'):
+ info.update({
+ 'season': unescapeHTML(part_of_season.get('name')),
+ 'season_number': int_or_none(part_of_season.get('seasonNumber')),
+ })
+ part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries')
+ if is_type(part_of_series, 'TVSeries', 'Series', 'CreativeWorkSeries'):
+ info['series'] = unescapeHTML(part_of_series.get('name'))
+ elif is_type(e, 'Movie'):
+ info.update({
+ 'title': unescapeHTML(e.get('name')),
+ 'description': unescapeHTML(e.get('description')),
+ 'duration': parse_duration(e.get('duration')),
+ 'timestamp': unified_timestamp(e.get('dateCreated')),
+ })
+ elif is_type(e, 'Article', 'NewsArticle'):
+ info.update({
+ 'timestamp': parse_iso8601(e.get('datePublished')),
+ 'title': unescapeHTML(e.get('headline')),
+ 'description': unescapeHTML(e.get('articleBody') or e.get('description')),
+ })
+ if is_type(traverse_obj(e, ('video', 0)), 'VideoObject'):
+ extract_video_object(e['video'][0])
+ elif is_type(traverse_obj(e, ('subjectOf', 0)), 'VideoObject'):
+ extract_video_object(e['subjectOf'][0])
+ elif is_type(e, 'VideoObject', 'AudioObject'):
+ extract_video_object(e)
+ if expected_type is None:
+ continue
+ else:
+ break
+ video = e.get('video')
+ if is_type(video, 'VideoObject'):
+ extract_video_object(video)
+ if expected_type is None:
+ continue
+ else:
+ break
+
+ traverse_json_ld(json_ld)
+ return filter_dict(info)
+
+ def _search_nextjs_data(self, webpage, video_id, *, transform_source=None, fatal=True, **kw):
+ return self._parse_json(
+ self._search_regex(
+ r'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>',
+ webpage, 'next.js data', fatal=fatal, **kw),
+ video_id, transform_source=transform_source, fatal=fatal)
+
+ def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)):
+ """Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""
+ rectx = re.escape(context_name)
+ FUNCTION_RE = r'\(function\((?P<arg_keys>.*?)\){.*?\breturn\s+(?P<js>{.*?})\s*;?\s*}\((?P<arg_vals>.*?)\)'
+ js, arg_keys, arg_vals = self._search_regex(
+ (rf'<script>\s*window\.{rectx}={FUNCTION_RE}\s*\)\s*;?\s*</script>', rf'{rectx}\(.*?{FUNCTION_RE}'),
+ webpage, context_name, group=('js', 'arg_keys', 'arg_vals'),
+ default=NO_DEFAULT if fatal else (None, None, None))
+ if js is None:
+ return {}
+
+ args = dict(zip(arg_keys.split(','), map(json.dumps, self._parse_json(
+ f'[{arg_vals}]', video_id, transform_source=js_to_json, fatal=fatal) or ())))
+
+ ret = self._parse_json(js, video_id, transform_source=functools.partial(js_to_json, vars=args), fatal=fatal)
+ return traverse_obj(ret, traverse) or {}
+
+ @staticmethod
+ def _hidden_inputs(html):
+ html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html)
+ hidden_inputs = {}
+ for input in re.findall(r'(?i)(<input[^>]+>)', html):
+ attrs = extract_attributes(input)
+ if not input:
+ continue
+ if attrs.get('type') not in ('hidden', 'submit'):
+ continue
+ name = attrs.get('name') or attrs.get('id')
+ value = attrs.get('value')
+ if name and value is not None:
+ hidden_inputs[name] = value
+ return hidden_inputs
+
+ def _form_hidden_inputs(self, form_id, html):
+ form = self._search_regex(
+ r'(?is)<form[^>]+?id=(["\'])%s\1[^>]*>(?P<form>.+?)</form>' % form_id,
+ html, '%s form' % form_id, group='form')
+ return self._hidden_inputs(form)
+
+ @classproperty(cache=True)
+ def FormatSort(cls):
+ class FormatSort(FormatSorter):
+ def __init__(ie, *args, **kwargs):
+ super().__init__(ie._downloader, *args, **kwargs)
+
+ deprecation_warning(
+ 'yt_dlp.InfoExtractor.FormatSort is deprecated and may be removed in the future. '
+ 'Use yt_dlp.utils.FormatSorter instead')
+ return FormatSort
+
+ def _sort_formats(self, formats, field_preference=[]):
+ if not field_preference:
+ self._downloader.deprecation_warning(
+ 'yt_dlp.InfoExtractor._sort_formats is deprecated and is no longer required')
+ return
+ self._downloader.deprecation_warning(
+ 'yt_dlp.InfoExtractor._sort_formats is deprecated and no longer works as expected. '
+ 'Return _format_sort_fields in the info_dict instead')
+ if formats:
+ formats[0]['__sort_fields'] = field_preference
+
+ def _check_formats(self, formats, video_id):
+ if formats:
+ formats[:] = filter(
+ lambda f: self._is_valid_url(
+ f['url'], video_id,
+ item='%s video format' % f.get('format_id') if f.get('format_id') else 'video'),
+ formats)
+
+ @staticmethod
+ def _remove_duplicate_formats(formats):
+ format_urls = set()
+ unique_formats = []
+ for f in formats:
+ if f['url'] not in format_urls:
+ format_urls.add(f['url'])
+ unique_formats.append(f)
+ formats[:] = unique_formats
+
+ def _is_valid_url(self, url, video_id, item='video', headers={}):
+ url = self._proto_relative_url(url, scheme='http:')
+ # For now assume non HTTP(S) URLs always valid
+ if not (url.startswith('http://') or url.startswith('https://')):
+ return True
+ try:
+ self._request_webpage(url, video_id, 'Checking %s URL' % item, headers=headers)
+ return True
+ except ExtractorError as e:
+ self.to_screen(
+ '%s: %s URL is invalid, skipping: %s'
+ % (video_id, item, error_to_compat_str(e.cause)))
+ return False
+
+ def http_scheme(self):
+ """ Either "http:" or "https:", depending on the user's preferences """
+ return (
+ 'http:'
+ if self.get_param('prefer_insecure', False)
+ else 'https:')
+
+ def _proto_relative_url(self, url, scheme=None):
+ scheme = scheme or self.http_scheme()
+ assert scheme.endswith(':')
+ return sanitize_url(url, scheme=scheme[:-1])
+
+ def _sleep(self, timeout, video_id, msg_template=None):
+ if msg_template is None:
+ msg_template = '%(video_id)s: Waiting for %(timeout)s seconds'
+ msg = msg_template % {'video_id': video_id, 'timeout': timeout}
+ self.to_screen(msg)
+ time.sleep(timeout)
+
+ def _extract_f4m_formats(self, manifest_url, video_id, preference=None, quality=None, f4m_id=None,
+ transform_source=lambda s: fix_xml_ampersands(s).strip(),
+ fatal=True, m3u8_id=None, data=None, headers={}, query={}):
+ if self.get_param('ignore_no_formats_error'):
+ fatal = False
+
+ res = self._download_xml_handle(
+ manifest_url, video_id, 'Downloading f4m manifest',
+ 'Unable to download f4m manifest',
+ # Some manifests may be malformed, e.g. prosiebensat1 generated manifests
+ # (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244)
+ transform_source=transform_source,
+ fatal=fatal, data=data, headers=headers, query=query)
+ if res is False:
+ return []
+
+ manifest, urlh = res
+ manifest_url = urlh.url
+
+ return self._parse_f4m_formats(
+ manifest, manifest_url, video_id, preference=preference, quality=quality, f4m_id=f4m_id,
+ transform_source=transform_source, fatal=fatal, m3u8_id=m3u8_id)
+
+ def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, quality=None, f4m_id=None,
+ transform_source=lambda s: fix_xml_ampersands(s).strip(),
+ fatal=True, m3u8_id=None):
+ if not isinstance(manifest, xml.etree.ElementTree.Element) and not fatal:
+ return []
+
+ # currently yt-dlp cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy
+ akamai_pv = manifest.find('{http://ns.adobe.com/f4m/1.0}pv-2.0')
+ if akamai_pv is not None and ';' in akamai_pv.text:
+ playerVerificationChallenge = akamai_pv.text.split(';')[0]
+ if playerVerificationChallenge.strip() != '':
+ return []
+
+ formats = []
+ manifest_version = '1.0'
+ media_nodes = manifest.findall('{http://ns.adobe.com/f4m/1.0}media')
+ if not media_nodes:
+ manifest_version = '2.0'
+ media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
+ # Remove unsupported DRM protected media from final formats
+ # rendition (see https://github.com/ytdl-org/youtube-dl/issues/8573).
+ media_nodes = remove_encrypted_media(media_nodes)
+ if not media_nodes:
+ return formats
+
+ manifest_base_url = get_base_url(manifest)
+
+ bootstrap_info = xpath_element(
+ manifest, ['{http://ns.adobe.com/f4m/1.0}bootstrapInfo', '{http://ns.adobe.com/f4m/2.0}bootstrapInfo'],
+ 'bootstrap info', default=None)
+
+ vcodec = None
+ mime_type = xpath_text(
+ manifest, ['{http://ns.adobe.com/f4m/1.0}mimeType', '{http://ns.adobe.com/f4m/2.0}mimeType'],
+ 'base URL', default=None)
+ if mime_type and mime_type.startswith('audio/'):
+ vcodec = 'none'
+
+ for i, media_el in enumerate(media_nodes):
+ tbr = int_or_none(media_el.attrib.get('bitrate'))
+ width = int_or_none(media_el.attrib.get('width'))
+ height = int_or_none(media_el.attrib.get('height'))
+ format_id = join_nonempty(f4m_id, tbr or i)
+ # If <bootstrapInfo> is present, the specified f4m is a
+ # stream-level manifest, and only set-level manifests may refer to
+ # external resources. See section 11.4 and section 4 of F4M spec
+ if bootstrap_info is None:
+ media_url = None
+ # @href is introduced in 2.0, see section 11.6 of F4M spec
+ if manifest_version == '2.0':
+ media_url = media_el.attrib.get('href')
+ if media_url is None:
+ media_url = media_el.attrib.get('url')
+ if not media_url:
+ continue
+ manifest_url = (
+ media_url if media_url.startswith('http://') or media_url.startswith('https://')
+ else ((manifest_base_url or '/'.join(manifest_url.split('/')[:-1])) + '/' + media_url))
+ # If media_url is itself a f4m manifest do the recursive extraction
+ # since bitrates in parent manifest (this one) and media_url manifest
+ # may differ leading to inability to resolve the format by requested
+ # bitrate in f4m downloader
+ ext = determine_ext(manifest_url)
+ if ext == 'f4m':
+ f4m_formats = self._extract_f4m_formats(
+ manifest_url, video_id, preference=preference, quality=quality, f4m_id=f4m_id,
+ transform_source=transform_source, fatal=fatal)
+ # Sometimes stream-level manifest contains single media entry that
+ # does not contain any quality metadata (e.g. http://matchtv.ru/#live-player).
+ # At the same time parent's media entry in set-level manifest may
+ # contain it. We will copy it from parent in such cases.
+ if len(f4m_formats) == 1:
+ f = f4m_formats[0]
+ f.update({
+ 'tbr': f.get('tbr') or tbr,
+ 'width': f.get('width') or width,
+ 'height': f.get('height') or height,
+ 'format_id': f.get('format_id') if not tbr else format_id,
+ 'vcodec': vcodec,
+ })
+ formats.extend(f4m_formats)
+ continue
+ elif ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ manifest_url, video_id, 'mp4', preference=preference,
+ quality=quality, m3u8_id=m3u8_id, fatal=fatal))
+ continue
+ formats.append({
+ 'format_id': format_id,
+ 'url': manifest_url,
+ 'manifest_url': manifest_url,
+ 'ext': 'flv' if bootstrap_info is not None else None,
+ 'protocol': 'f4m',
+ 'tbr': tbr,
+ 'width': width,
+ 'height': height,
+ 'vcodec': vcodec,
+ 'preference': preference,
+ 'quality': quality,
+ })
+ return formats
+
+ def _m3u8_meta_format(self, m3u8_url, ext=None, preference=None, quality=None, m3u8_id=None):
+ return {
+ 'format_id': join_nonempty(m3u8_id, 'meta'),
+ 'url': m3u8_url,
+ 'ext': ext,
+ 'protocol': 'm3u8',
+ 'preference': preference - 100 if preference else -100,
+ 'quality': quality,
+ 'resolution': 'multiple',
+ 'format_note': 'Quality selection URL',
+ }
+
+ def _report_ignoring_subs(self, name):
+ self.report_warning(bug_reports_message(
+ f'Ignoring subtitle tracks found in the {name} manifest; '
+ 'if any subtitle tracks are missing,'
+ ), only_once=True)
+
+ def _extract_m3u8_formats(self, *args, **kwargs):
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(*args, **kwargs)
+ if subs:
+ self._report_ignoring_subs('HLS')
+ return fmts
+
+ def _extract_m3u8_formats_and_subtitles(
+ self, m3u8_url, video_id, ext=None, entry_protocol='m3u8_native',
+ preference=None, quality=None, m3u8_id=None, note=None,
+ errnote=None, fatal=True, live=False, data=None, headers={},
+ query={}):
+
+ if self.get_param('ignore_no_formats_error'):
+ fatal = False
+
+ if not m3u8_url:
+ if errnote is not False:
+ errnote = errnote or 'Failed to obtain m3u8 URL'
+ if fatal:
+ raise ExtractorError(errnote, video_id=video_id)
+ self.report_warning(f'{errnote}{bug_reports_message()}')
+ return [], {}
+
+ res = self._download_webpage_handle(
+ m3u8_url, video_id,
+ note='Downloading m3u8 information' if note is None else note,
+ errnote='Failed to download m3u8 information' if errnote is None else errnote,
+ fatal=fatal, data=data, headers=headers, query=query)
+
+ if res is False:
+ return [], {}
+
+ m3u8_doc, urlh = res
+ m3u8_url = urlh.url
+
+ return self._parse_m3u8_formats_and_subtitles(
+ m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol,
+ preference=preference, quality=quality, m3u8_id=m3u8_id,
+ note=note, errnote=errnote, fatal=fatal, live=live, data=data,
+ headers=headers, query=query, video_id=video_id)
+
+ def _parse_m3u8_formats_and_subtitles(
+ self, m3u8_doc, m3u8_url=None, ext=None, entry_protocol='m3u8_native',
+ preference=None, quality=None, m3u8_id=None, live=False, note=None,
+ errnote=None, fatal=True, data=None, headers={}, query={},
+ video_id=None):
+ formats, subtitles = [], {}
+ has_drm = HlsFD._has_drm(m3u8_doc)
+
+ def format_url(url):
+ return url if re.match(r'^https?://', url) else urllib.parse.urljoin(m3u8_url, url)
+
+ if self.get_param('hls_split_discontinuity', False):
+ def _extract_m3u8_playlist_indices(manifest_url=None, m3u8_doc=None):
+ if not m3u8_doc:
+ if not manifest_url:
+ return []
+ m3u8_doc = self._download_webpage(
+ manifest_url, video_id, fatal=fatal, data=data, headers=headers,
+ note=False, errnote='Failed to download m3u8 playlist information')
+ if m3u8_doc is False:
+ return []
+ return range(1 + sum(line.startswith('#EXT-X-DISCONTINUITY') for line in m3u8_doc.splitlines()))
+
+ else:
+ def _extract_m3u8_playlist_indices(*args, **kwargs):
+ return [None]
+
+ # References:
+ # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-21
+ # 2. https://github.com/ytdl-org/youtube-dl/issues/12211
+ # 3. https://github.com/ytdl-org/youtube-dl/issues/18923
+
+ # We should try extracting formats only from master playlists [1, 4.3.4],
+ # i.e. playlists that describe available qualities. On the other hand
+ # media playlists [1, 4.3.3] should be returned as is since they contain
+ # just the media without qualities renditions.
+ # Fortunately, master playlist can be easily distinguished from media
+ # playlist based on particular tags availability. As of [1, 4.3.3, 4.3.4]
+ # master playlist tags MUST NOT appear in a media playlist and vice versa.
+ # As of [1, 4.3.3.1] #EXT-X-TARGETDURATION tag is REQUIRED for every
+ # media playlist and MUST NOT appear in master playlist thus we can
+ # clearly detect media playlist with this criterion.
+
+ if '#EXT-X-TARGETDURATION' in m3u8_doc: # media playlist, return as is
+ formats = [{
+ 'format_id': join_nonempty(m3u8_id, idx),
+ 'format_index': idx,
+ 'url': m3u8_url or encode_data_uri(m3u8_doc.encode('utf-8'), 'application/x-mpegurl'),
+ 'ext': ext,
+ 'protocol': entry_protocol,
+ 'preference': preference,
+ 'quality': quality,
+ 'has_drm': has_drm,
+ } for idx in _extract_m3u8_playlist_indices(m3u8_doc=m3u8_doc)]
+
+ return formats, subtitles
+
+ groups = {}
+ last_stream_inf = {}
+
+ def extract_media(x_media_line):
+ media = parse_m3u8_attributes(x_media_line)
+ # As per [1, 4.3.4.1] TYPE, GROUP-ID and NAME are REQUIRED
+ media_type, group_id, name = media.get('TYPE'), media.get('GROUP-ID'), media.get('NAME')
+ if not (media_type and group_id and name):
+ return
+ groups.setdefault(group_id, []).append(media)
+ # <https://tools.ietf.org/html/rfc8216#section-4.3.4.1>
+ if media_type == 'SUBTITLES':
+ # According to RFC 8216 §4.3.4.2.1, URI is REQUIRED in the
+ # EXT-X-MEDIA tag if the media type is SUBTITLES.
+ # However, lack of URI has been spotted in the wild.
+ # e.g. NebulaIE; see https://github.com/yt-dlp/yt-dlp/issues/339
+ if not media.get('URI'):
+ return
+ url = format_url(media['URI'])
+ sub_info = {
+ 'url': url,
+ 'ext': determine_ext(url),
+ }
+ if sub_info['ext'] == 'm3u8':
+ # Per RFC 8216 §3.1, the only possible subtitle format m3u8
+ # files may contain is WebVTT:
+ # <https://tools.ietf.org/html/rfc8216#section-3.1>
+ sub_info['ext'] = 'vtt'
+ sub_info['protocol'] = 'm3u8_native'
+ lang = media.get('LANGUAGE') or 'und'
+ subtitles.setdefault(lang, []).append(sub_info)
+ if media_type not in ('VIDEO', 'AUDIO'):
+ return
+ media_url = media.get('URI')
+ if media_url:
+ manifest_url = format_url(media_url)
+ formats.extend({
+ 'format_id': join_nonempty(m3u8_id, group_id, name, idx),
+ 'format_note': name,
+ 'format_index': idx,
+ 'url': manifest_url,
+ 'manifest_url': m3u8_url,
+ 'language': media.get('LANGUAGE'),
+ 'ext': ext,
+ 'protocol': entry_protocol,
+ 'preference': preference,
+ 'quality': quality,
+ 'has_drm': has_drm,
+ 'vcodec': 'none' if media_type == 'AUDIO' else None,
+ } for idx in _extract_m3u8_playlist_indices(manifest_url))
+
+ def build_stream_name():
+ # Despite specification does not mention NAME attribute for
+ # EXT-X-STREAM-INF tag it still sometimes may be present (see [1]
+ # or vidio test in TestInfoExtractor.test_parse_m3u8_formats)
+ # 1. http://www.vidio.com/watch/165683-dj_ambred-booyah-live-2015
+ stream_name = last_stream_inf.get('NAME')
+ if stream_name:
+ return stream_name
+ # If there is no NAME in EXT-X-STREAM-INF it will be obtained
+ # from corresponding rendition group
+ stream_group_id = last_stream_inf.get('VIDEO')
+ if not stream_group_id:
+ return
+ stream_group = groups.get(stream_group_id)
+ if not stream_group:
+ return stream_group_id
+ rendition = stream_group[0]
+ return rendition.get('NAME') or stream_group_id
+
+ # parse EXT-X-MEDIA tags before EXT-X-STREAM-INF in order to have the
+ # chance to detect video only formats when EXT-X-STREAM-INF tags
+ # precede EXT-X-MEDIA tags in HLS manifest such as [3].
+ for line in m3u8_doc.splitlines():
+ if line.startswith('#EXT-X-MEDIA:'):
+ extract_media(line)
+
+ for line in m3u8_doc.splitlines():
+ if line.startswith('#EXT-X-STREAM-INF:'):
+ last_stream_inf = parse_m3u8_attributes(line)
+ elif line.startswith('#') or not line.strip():
+ continue
+ else:
+ tbr = float_or_none(
+ last_stream_inf.get('AVERAGE-BANDWIDTH')
+ or last_stream_inf.get('BANDWIDTH'), scale=1000)
+ manifest_url = format_url(line.strip())
+
+ for idx in _extract_m3u8_playlist_indices(manifest_url):
+ format_id = [m3u8_id, None, idx]
+ # Bandwidth of live streams may differ over time thus making
+ # format_id unpredictable. So it's better to keep provided
+ # format_id intact.
+ if not live:
+ stream_name = build_stream_name()
+ format_id[1] = stream_name or '%d' % (tbr or len(formats))
+ f = {
+ 'format_id': join_nonempty(*format_id),
+ 'format_index': idx,
+ 'url': manifest_url,
+ 'manifest_url': m3u8_url,
+ 'tbr': tbr,
+ 'ext': ext,
+ 'fps': float_or_none(last_stream_inf.get('FRAME-RATE')),
+ 'protocol': entry_protocol,
+ 'preference': preference,
+ 'quality': quality,
+ 'has_drm': has_drm,
+ }
+ resolution = last_stream_inf.get('RESOLUTION')
+ if resolution:
+ mobj = re.search(r'(?P<width>\d+)[xX](?P<height>\d+)', resolution)
+ if mobj:
+ f['width'] = int(mobj.group('width'))
+ f['height'] = int(mobj.group('height'))
+ # Unified Streaming Platform
+ mobj = re.search(
+ r'audio.*?(?:%3D|=)(\d+)(?:-video.*?(?:%3D|=)(\d+))?', f['url'])
+ if mobj:
+ abr, vbr = mobj.groups()
+ abr, vbr = float_or_none(abr, 1000), float_or_none(vbr, 1000)
+ f.update({
+ 'vbr': vbr,
+ 'abr': abr,
+ })
+ codecs = parse_codecs(last_stream_inf.get('CODECS'))
+ f.update(codecs)
+ audio_group_id = last_stream_inf.get('AUDIO')
+ # As per [1, 4.3.4.1.1] any EXT-X-STREAM-INF tag which
+ # references a rendition group MUST have a CODECS attribute.
+ # However, this is not always respected. E.g. [2]
+ # contains EXT-X-STREAM-INF tag which references AUDIO
+ # rendition group but does not have CODECS and despite
+ # referencing an audio group it represents a complete
+ # (with audio and video) format. So, for such cases we will
+ # ignore references to rendition groups and treat them
+ # as complete formats.
+ if audio_group_id and codecs and f.get('vcodec') != 'none':
+ audio_group = groups.get(audio_group_id)
+ if audio_group and audio_group[0].get('URI'):
+ # TODO: update acodec for audio only formats with
+ # the same GROUP-ID
+ f['acodec'] = 'none'
+ if not f.get('ext'):
+ f['ext'] = 'm4a' if f.get('vcodec') == 'none' else 'mp4'
+ formats.append(f)
+
+ # for DailyMotion
+ progressive_uri = last_stream_inf.get('PROGRESSIVE-URI')
+ if progressive_uri:
+ http_f = f.copy()
+ del http_f['manifest_url']
+ http_f.update({
+ 'format_id': f['format_id'].replace('hls-', 'http-'),
+ 'protocol': 'http',
+ 'url': progressive_uri,
+ })
+ formats.append(http_f)
+
+ last_stream_inf = {}
+ return formats, subtitles
+
+ def _extract_m3u8_vod_duration(
+ self, m3u8_vod_url, video_id, note=None, errnote=None, data=None, headers={}, query={}):
+
+ m3u8_vod = self._download_webpage(
+ m3u8_vod_url, video_id,
+ note='Downloading m3u8 VOD manifest' if note is None else note,
+ errnote='Failed to download VOD manifest' if errnote is None else errnote,
+ fatal=False, data=data, headers=headers, query=query)
+
+ return self._parse_m3u8_vod_duration(m3u8_vod or '', video_id)
+
+ def _parse_m3u8_vod_duration(self, m3u8_vod, video_id):
+ if '#EXT-X-ENDLIST' not in m3u8_vod:
+ return None
+
+ return int(sum(
+ float(line[len('#EXTINF:'):].split(',')[0])
+ for line in m3u8_vod.splitlines() if line.startswith('#EXTINF:'))) or None
+
+ def _extract_mpd_vod_duration(
+ self, mpd_url, video_id, note=None, errnote=None, data=None, headers={}, query={}):
+
+ mpd_doc = self._download_xml(
+ mpd_url, video_id,
+ note='Downloading MPD VOD manifest' if note is None else note,
+ errnote='Failed to download VOD manifest' if errnote is None else errnote,
+ fatal=False, data=data, headers=headers, query=query)
+ if not isinstance(mpd_doc, xml.etree.ElementTree.Element):
+ return None
+ return int_or_none(parse_duration(mpd_doc.get('mediaPresentationDuration')))
+
+ @staticmethod
+ def _xpath_ns(path, namespace=None):
+ if not namespace:
+ return path
+ out = []
+ for c in path.split('/'):
+ if not c or c == '.':
+ out.append(c)
+ else:
+ out.append('{%s}%s' % (namespace, c))
+ return '/'.join(out)
+
+ def _extract_smil_formats_and_subtitles(self, smil_url, video_id, fatal=True, f4m_params=None, transform_source=None):
+ if self.get_param('ignore_no_formats_error'):
+ fatal = False
+
+ res = self._download_smil(smil_url, video_id, fatal=fatal, transform_source=transform_source)
+ if res is False:
+ assert not fatal
+ return [], {}
+ smil, urlh = res
+
+ return self._parse_smil_formats_and_subtitles(smil, urlh.url, video_id, f4m_params=f4m_params,
+ namespace=self._parse_smil_namespace(smil))
+
+ def _extract_smil_formats(self, *args, **kwargs):
+ fmts, subs = self._extract_smil_formats_and_subtitles(*args, **kwargs)
+ if subs:
+ self._report_ignoring_subs('SMIL')
+ return fmts
+
+ def _extract_smil_info(self, smil_url, video_id, fatal=True, f4m_params=None):
+ res = self._download_smil(smil_url, video_id, fatal=fatal)
+ if res is False:
+ return {}
+
+ smil, urlh = res
+ smil_url = urlh.url
+
+ return self._parse_smil(smil, smil_url, video_id, f4m_params=f4m_params)
+
+ def _download_smil(self, smil_url, video_id, fatal=True, transform_source=None):
+ return self._download_xml_handle(
+ smil_url, video_id, 'Downloading SMIL file',
+ 'Unable to download SMIL file', fatal=fatal, transform_source=transform_source)
+
+ def _parse_smil(self, smil, smil_url, video_id, f4m_params=None):
+ namespace = self._parse_smil_namespace(smil)
+
+ formats, subtitles = self._parse_smil_formats_and_subtitles(
+ smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
+
+ video_id = os.path.splitext(url_basename(smil_url))[0]
+ title = None
+ description = None
+ upload_date = None
+ for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
+ name = meta.attrib.get('name')
+ content = meta.attrib.get('content')
+ if not name or not content:
+ continue
+ if not title and name == 'title':
+ title = content
+ elif not description and name in ('description', 'abstract'):
+ description = content
+ elif not upload_date and name == 'date':
+ upload_date = unified_strdate(content)
+
+ thumbnails = [{
+ 'id': image.get('type'),
+ 'url': image.get('src'),
+ 'width': int_or_none(image.get('width')),
+ 'height': int_or_none(image.get('height')),
+ } for image in smil.findall(self._xpath_ns('.//image', namespace)) if image.get('src')]
+
+ return {
+ 'id': video_id,
+ 'title': title or video_id,
+ 'description': description,
+ 'upload_date': upload_date,
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+ def _parse_smil_namespace(self, smil):
+ return self._search_regex(
+ r'(?i)^{([^}]+)?}smil$', smil.tag, 'namespace', default=None)
+
+ def _parse_smil_formats(self, *args, **kwargs):
+ fmts, subs = self._parse_smil_formats_and_subtitles(*args, **kwargs)
+ if subs:
+ self._report_ignoring_subs('SMIL')
+ return fmts
+
+ def _parse_smil_formats_and_subtitles(
+ self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
+ base = smil_url
+ for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
+ b = meta.get('base') or meta.get('httpBase')
+ if b:
+ base = b
+ break
+
+ formats, subtitles = [], {}
+ rtmp_count = 0
+ http_count = 0
+ m3u8_count = 0
+ imgs_count = 0
+
+ srcs = set()
+ media = itertools.chain.from_iterable(
+ smil.findall(self._xpath_ns(arg, namespace))
+ for arg in ['.//video', './/audio', './/media'])
+ for medium in media:
+ src = medium.get('src')
+ if not src or src in srcs:
+ continue
+ srcs.add(src)
+
+ bitrate = float_or_none(medium.get('system-bitrate') or medium.get('systemBitrate'), 1000)
+ filesize = int_or_none(medium.get('size') or medium.get('fileSize'))
+ width = int_or_none(medium.get('width'))
+ height = int_or_none(medium.get('height'))
+ proto = medium.get('proto')
+ ext = medium.get('ext')
+ src_ext = determine_ext(src, default_ext=None) or ext or urlhandle_detect_ext(
+ self._request_webpage(HEADRequest(src), video_id, note='Requesting extension info', fatal=False))
+ streamer = medium.get('streamer') or base
+
+ if proto == 'rtmp' or streamer.startswith('rtmp'):
+ rtmp_count += 1
+ formats.append({
+ 'url': streamer,
+ 'play_path': src,
+ 'ext': 'flv',
+ 'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
+ 'tbr': bitrate,
+ 'filesize': filesize,
+ 'width': width,
+ 'height': height,
+ })
+ if transform_rtmp_url:
+ streamer, src = transform_rtmp_url(streamer, src)
+ formats[-1].update({
+ 'url': streamer,
+ 'play_path': src,
+ })
+ continue
+
+ src_url = src if src.startswith('http') else urllib.parse.urljoin(base, src)
+ src_url = src_url.strip()
+
+ if proto == 'm3u8' or src_ext == 'm3u8':
+ m3u8_formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
+ src_url, video_id, ext or 'mp4', m3u8_id='hls', fatal=False)
+ self._merge_subtitles(m3u8_subs, target=subtitles)
+ if len(m3u8_formats) == 1:
+ m3u8_count += 1
+ m3u8_formats[0].update({
+ 'format_id': 'hls-%d' % (m3u8_count if bitrate is None else bitrate),
+ 'tbr': bitrate,
+ 'width': width,
+ 'height': height,
+ })
+ formats.extend(m3u8_formats)
+ elif src_ext == 'f4m':
+ f4m_url = src_url
+ if not f4m_params:
+ f4m_params = {
+ 'hdcore': '3.2.0',
+ 'plugin': 'flowplayer-3.2.0.1',
+ }
+ f4m_url += '&' if '?' in f4m_url else '?'
+ f4m_url += urllib.parse.urlencode(f4m_params)
+ formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False))
+ elif src_ext == 'mpd':
+ mpd_formats, mpd_subs = self._extract_mpd_formats_and_subtitles(
+ src_url, video_id, mpd_id='dash', fatal=False)
+ formats.extend(mpd_formats)
+ self._merge_subtitles(mpd_subs, target=subtitles)
+ elif re.search(r'\.ism/[Mm]anifest', src_url):
+ ism_formats, ism_subs = self._extract_ism_formats_and_subtitles(
+ src_url, video_id, ism_id='mss', fatal=False)
+ formats.extend(ism_formats)
+ self._merge_subtitles(ism_subs, target=subtitles)
+ elif src_url.startswith('http') and self._is_valid_url(src, video_id):
+ http_count += 1
+ formats.append({
+ 'url': src_url,
+ 'ext': ext or src_ext or 'flv',
+ 'format_id': 'http-%d' % (bitrate or http_count),
+ 'tbr': bitrate,
+ 'filesize': filesize,
+ 'width': width,
+ 'height': height,
+ })
+
+ for medium in smil.findall(self._xpath_ns('.//imagestream', namespace)):
+ src = medium.get('src')
+ if not src or src in srcs:
+ continue
+ srcs.add(src)
+
+ imgs_count += 1
+ formats.append({
+ 'format_id': 'imagestream-%d' % (imgs_count),
+ 'url': src,
+ 'ext': mimetype2ext(medium.get('type')),
+ 'acodec': 'none',
+ 'vcodec': 'none',
+ 'width': int_or_none(medium.get('width')),
+ 'height': int_or_none(medium.get('height')),
+ 'format_note': 'SMIL storyboards',
+ })
+
+ smil_subs = self._parse_smil_subtitles(smil, namespace=namespace)
+ self._merge_subtitles(smil_subs, target=subtitles)
+
+ return formats, subtitles
+
+ def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
+ urls = []
+ subtitles = {}
+ for num, textstream in enumerate(smil.findall(self._xpath_ns('.//textstream', namespace))):
+ src = textstream.get('src')
+ if not src or src in urls:
+ continue
+ urls.append(src)
+ ext = textstream.get('ext') or mimetype2ext(textstream.get('type')) or determine_ext(src)
+ lang = textstream.get('systemLanguage') or textstream.get('systemLanguageName') or textstream.get('lang') or subtitles_lang
+ subtitles.setdefault(lang, []).append({
+ 'url': src,
+ 'ext': ext,
+ })
+ return subtitles
+
+ def _extract_xspf_playlist(self, xspf_url, playlist_id, fatal=True):
+ res = self._download_xml_handle(
+ xspf_url, playlist_id, 'Downloading xpsf playlist',
+ 'Unable to download xspf manifest', fatal=fatal)
+ if res is False:
+ return []
+
+ xspf, urlh = res
+ xspf_url = urlh.url
+
+ return self._parse_xspf(
+ xspf, playlist_id, xspf_url=xspf_url,
+ xspf_base_url=base_url(xspf_url))
+
+ def _parse_xspf(self, xspf_doc, playlist_id, xspf_url=None, xspf_base_url=None):
+ NS_MAP = {
+ 'xspf': 'http://xspf.org/ns/0/',
+ 's1': 'http://static.streamone.nl/player/ns/0',
+ }
+
+ entries = []
+ for track in xspf_doc.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)):
+ title = xpath_text(
+ track, xpath_with_ns('./xspf:title', NS_MAP), 'title', default=playlist_id)
+ description = xpath_text(
+ track, xpath_with_ns('./xspf:annotation', NS_MAP), 'description')
+ thumbnail = xpath_text(
+ track, xpath_with_ns('./xspf:image', NS_MAP), 'thumbnail')
+ duration = float_or_none(
+ xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'), 1000)
+
+ formats = []
+ for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP)):
+ format_url = urljoin(xspf_base_url, location.text)
+ if not format_url:
+ continue
+ formats.append({
+ 'url': format_url,
+ 'manifest_url': xspf_url,
+ 'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)),
+ 'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))),
+ 'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))),
+ })
+
+ entries.append({
+ 'id': playlist_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'formats': formats,
+ })
+ return entries
+
+ def _extract_mpd_formats(self, *args, **kwargs):
+ fmts, subs = self._extract_mpd_formats_and_subtitles(*args, **kwargs)
+ if subs:
+ self._report_ignoring_subs('DASH')
+ return fmts
+
+ def _extract_mpd_formats_and_subtitles(self, *args, **kwargs):
+ periods = self._extract_mpd_periods(*args, **kwargs)
+ return self._merge_mpd_periods(periods)
+
+ def _extract_mpd_periods(
+ self, mpd_url, video_id, mpd_id=None, note=None, errnote=None,
+ fatal=True, data=None, headers={}, query={}):
+
+ if self.get_param('ignore_no_formats_error'):
+ fatal = False
+
+ res = self._download_xml_handle(
+ mpd_url, video_id,
+ note='Downloading MPD manifest' if note is None else note,
+ errnote='Failed to download MPD manifest' if errnote is None else errnote,
+ fatal=fatal, data=data, headers=headers, query=query)
+ if res is False:
+ return []
+ mpd_doc, urlh = res
+ if mpd_doc is None:
+ return []
+
+ # We could have been redirected to a new url when we retrieved our mpd file.
+ mpd_url = urlh.url
+ mpd_base_url = base_url(mpd_url)
+
+ return self._parse_mpd_periods(mpd_doc, mpd_id, mpd_base_url, mpd_url)
+
+ def _parse_mpd_formats(self, *args, **kwargs):
+ fmts, subs = self._parse_mpd_formats_and_subtitles(*args, **kwargs)
+ if subs:
+ self._report_ignoring_subs('DASH')
+ return fmts
+
+ def _parse_mpd_formats_and_subtitles(self, *args, **kwargs):
+ periods = self._parse_mpd_periods(*args, **kwargs)
+ return self._merge_mpd_periods(periods)
+
+ def _merge_mpd_periods(self, periods):
+ """
+ Combine all formats and subtitles from an MPD manifest into a single list,
+ by concatenate streams with similar formats.
+ """
+ formats, subtitles = {}, {}
+ for period in periods:
+ for f in period['formats']:
+ assert 'is_dash_periods' not in f, 'format already processed'
+ f['is_dash_periods'] = True
+ format_key = tuple(v for k, v in f.items() if k not in (
+ ('format_id', 'fragments', 'manifest_stream_number')))
+ if format_key not in formats:
+ formats[format_key] = f
+ elif 'fragments' in f:
+ formats[format_key].setdefault('fragments', []).extend(f['fragments'])
+
+ if subtitles and period['subtitles']:
+ self.report_warning(bug_reports_message(
+ 'Found subtitles in multiple periods in the DASH manifest; '
+ 'if part of the subtitles are missing,'
+ ), only_once=True)
+
+ for sub_lang, sub_info in period['subtitles'].items():
+ subtitles.setdefault(sub_lang, []).extend(sub_info)
+
+ return list(formats.values()), subtitles
+
+ def _parse_mpd_periods(self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None):
+ """
+ Parse formats from MPD manifest.
+ References:
+ 1. MPEG-DASH Standard, ISO/IEC 23009-1:2014(E),
+ http://standards.iso.org/ittf/PubliclyAvailableStandards/c065274_ISO_IEC_23009-1_2014.zip
+ 2. https://en.wikipedia.org/wiki/Dynamic_Adaptive_Streaming_over_HTTP
+ """
+ if not self.get_param('dynamic_mpd', True):
+ if mpd_doc.get('type') == 'dynamic':
+ return [], {}
+
+ namespace = self._search_regex(r'(?i)^{([^}]+)?}MPD$', mpd_doc.tag, 'namespace', default=None)
+
+ def _add_ns(path):
+ return self._xpath_ns(path, namespace)
+
+ def is_drm_protected(element):
+ return element.find(_add_ns('ContentProtection')) is not None
+
+ def extract_multisegment_info(element, ms_parent_info):
+ ms_info = ms_parent_info.copy()
+
+ # As per [1, 5.3.9.2.2] SegmentList and SegmentTemplate share some
+ # common attributes and elements. We will only extract relevant
+ # for us.
+ def extract_common(source):
+ segment_timeline = source.find(_add_ns('SegmentTimeline'))
+ if segment_timeline is not None:
+ s_e = segment_timeline.findall(_add_ns('S'))
+ if s_e:
+ ms_info['total_number'] = 0
+ ms_info['s'] = []
+ for s in s_e:
+ r = int(s.get('r', 0))
+ ms_info['total_number'] += 1 + r
+ ms_info['s'].append({
+ 't': int(s.get('t', 0)),
+ # @d is mandatory (see [1, 5.3.9.6.2, Table 17, page 60])
+ 'd': int(s.attrib['d']),
+ 'r': r,
+ })
+ start_number = source.get('startNumber')
+ if start_number:
+ ms_info['start_number'] = int(start_number)
+ timescale = source.get('timescale')
+ if timescale:
+ ms_info['timescale'] = int(timescale)
+ segment_duration = source.get('duration')
+ if segment_duration:
+ ms_info['segment_duration'] = float(segment_duration)
+
+ def extract_Initialization(source):
+ initialization = source.find(_add_ns('Initialization'))
+ if initialization is not None:
+ ms_info['initialization_url'] = initialization.attrib['sourceURL']
+
+ segment_list = element.find(_add_ns('SegmentList'))
+ if segment_list is not None:
+ extract_common(segment_list)
+ extract_Initialization(segment_list)
+ segment_urls_e = segment_list.findall(_add_ns('SegmentURL'))
+ if segment_urls_e:
+ ms_info['segment_urls'] = [segment.attrib['media'] for segment in segment_urls_e]
+ else:
+ segment_template = element.find(_add_ns('SegmentTemplate'))
+ if segment_template is not None:
+ extract_common(segment_template)
+ media = segment_template.get('media')
+ if media:
+ ms_info['media'] = media
+ initialization = segment_template.get('initialization')
+ if initialization:
+ ms_info['initialization'] = initialization
+ else:
+ extract_Initialization(segment_template)
+ return ms_info
+
+ mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
+ stream_numbers = collections.defaultdict(int)
+ for period_idx, period in enumerate(mpd_doc.findall(_add_ns('Period'))):
+ period_entry = {
+ 'id': period.get('id', f'period-{period_idx}'),
+ 'formats': [],
+ 'subtitles': collections.defaultdict(list),
+ }
+ period_duration = parse_duration(period.get('duration')) or mpd_duration
+ period_ms_info = extract_multisegment_info(period, {
+ 'start_number': 1,
+ 'timescale': 1,
+ })
+ for adaptation_set in period.findall(_add_ns('AdaptationSet')):
+ adaption_set_ms_info = extract_multisegment_info(adaptation_set, period_ms_info)
+ for representation in adaptation_set.findall(_add_ns('Representation')):
+ representation_attrib = adaptation_set.attrib.copy()
+ representation_attrib.update(representation.attrib)
+ # According to [1, 5.3.7.2, Table 9, page 41], @mimeType is mandatory
+ mime_type = representation_attrib['mimeType']
+ content_type = representation_attrib.get('contentType', mime_type.split('/')[0])
+
+ codec_str = representation_attrib.get('codecs', '')
+ # Some kind of binary subtitle found in some youtube livestreams
+ if mime_type == 'application/x-rawcc':
+ codecs = {'scodec': codec_str}
+ else:
+ codecs = parse_codecs(codec_str)
+ if content_type not in ('video', 'audio', 'text'):
+ if mime_type == 'image/jpeg':
+ content_type = mime_type
+ elif codecs.get('vcodec', 'none') != 'none':
+ content_type = 'video'
+ elif codecs.get('acodec', 'none') != 'none':
+ content_type = 'audio'
+ elif codecs.get('scodec', 'none') != 'none':
+ content_type = 'text'
+ elif mimetype2ext(mime_type) in ('tt', 'dfxp', 'ttml', 'xml', 'json'):
+ content_type = 'text'
+ else:
+ self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
+ continue
+
+ base_url = ''
+ for element in (representation, adaptation_set, period, mpd_doc):
+ base_url_e = element.find(_add_ns('BaseURL'))
+ if try_call(lambda: base_url_e.text) is not None:
+ base_url = base_url_e.text + base_url
+ if re.match(r'^https?://', base_url):
+ break
+ if mpd_base_url and base_url.startswith('/'):
+ base_url = urllib.parse.urljoin(mpd_base_url, base_url)
+ elif mpd_base_url and not re.match(r'^https?://', base_url):
+ if not mpd_base_url.endswith('/'):
+ mpd_base_url += '/'
+ base_url = mpd_base_url + base_url
+ representation_id = representation_attrib.get('id')
+ lang = representation_attrib.get('lang')
+ url_el = representation.find(_add_ns('BaseURL'))
+ filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
+ bandwidth = int_or_none(representation_attrib.get('bandwidth'))
+ if representation_id is not None:
+ format_id = representation_id
+ else:
+ format_id = content_type
+ if mpd_id:
+ format_id = mpd_id + '-' + format_id
+ if content_type in ('video', 'audio'):
+ f = {
+ 'format_id': format_id,
+ 'manifest_url': mpd_url,
+ 'ext': mimetype2ext(mime_type),
+ 'width': int_or_none(representation_attrib.get('width')),
+ 'height': int_or_none(representation_attrib.get('height')),
+ 'tbr': float_or_none(bandwidth, 1000),
+ 'asr': int_or_none(representation_attrib.get('audioSamplingRate')),
+ 'fps': int_or_none(representation_attrib.get('frameRate')),
+ 'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
+ 'format_note': 'DASH %s' % content_type,
+ 'filesize': filesize,
+ 'container': mimetype2ext(mime_type) + '_dash',
+ **codecs
+ }
+ elif content_type == 'text':
+ f = {
+ 'ext': mimetype2ext(mime_type),
+ 'manifest_url': mpd_url,
+ 'filesize': filesize,
+ }
+ elif content_type == 'image/jpeg':
+ # See test case in VikiIE
+ # https://www.viki.com/videos/1175236v-choosing-spouse-by-lottery-episode-1
+ f = {
+ 'format_id': format_id,
+ 'ext': 'mhtml',
+ 'manifest_url': mpd_url,
+ 'format_note': 'DASH storyboards (jpeg)',
+ 'acodec': 'none',
+ 'vcodec': 'none',
+ }
+ if is_drm_protected(adaptation_set) or is_drm_protected(representation):
+ f['has_drm'] = True
+ representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
+
+ def prepare_template(template_name, identifiers):
+ tmpl = representation_ms_info[template_name]
+ if representation_id is not None:
+ tmpl = tmpl.replace('$RepresentationID$', representation_id)
+ # First of, % characters outside $...$ templates
+ # must be escaped by doubling for proper processing
+ # by % operator string formatting used further (see
+ # https://github.com/ytdl-org/youtube-dl/issues/16867).
+ t = ''
+ in_template = False
+ for c in tmpl:
+ t += c
+ if c == '$':
+ in_template = not in_template
+ elif c == '%' and not in_template:
+ t += c
+ # Next, $...$ templates are translated to their
+ # %(...) counterparts to be used with % operator
+ t = re.sub(r'\$(%s)\$' % '|'.join(identifiers), r'%(\1)d', t)
+ t = re.sub(r'\$(%s)%%([^$]+)\$' % '|'.join(identifiers), r'%(\1)\2', t)
+ t.replace('$$', '$')
+ return t
+
+ # @initialization is a regular template like @media one
+ # so it should be handled just the same way (see
+ # https://github.com/ytdl-org/youtube-dl/issues/11605)
+ if 'initialization' in representation_ms_info:
+ initialization_template = prepare_template(
+ 'initialization',
+ # As per [1, 5.3.9.4.2, Table 15, page 54] $Number$ and
+ # $Time$ shall not be included for @initialization thus
+ # only $Bandwidth$ remains
+ ('Bandwidth', ))
+ representation_ms_info['initialization_url'] = initialization_template % {
+ 'Bandwidth': bandwidth,
+ }
+
+ def location_key(location):
+ return 'url' if re.match(r'^https?://', location) else 'path'
+
+ if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
+
+ media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time'))
+ media_location_key = location_key(media_template)
+
+ # As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
+ # can't be used at the same time
+ if '%(Number' in media_template and 's' not in representation_ms_info:
+ segment_duration = None
+ if 'total_number' not in representation_ms_info and 'segment_duration' in representation_ms_info:
+ segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
+ representation_ms_info['total_number'] = int(math.ceil(
+ float_or_none(period_duration, segment_duration, default=0)))
+ representation_ms_info['fragments'] = [{
+ media_location_key: media_template % {
+ 'Number': segment_number,
+ 'Bandwidth': bandwidth,
+ },
+ 'duration': segment_duration,
+ } for segment_number in range(
+ representation_ms_info['start_number'],
+ representation_ms_info['total_number'] + representation_ms_info['start_number'])]
+ else:
+ # $Number*$ or $Time$ in media template with S list available
+ # Example $Number*$: http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg
+ # Example $Time$: https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411
+ representation_ms_info['fragments'] = []
+ segment_time = 0
+ segment_d = None
+ segment_number = representation_ms_info['start_number']
+
+ def add_segment_url():
+ segment_url = media_template % {
+ 'Time': segment_time,
+ 'Bandwidth': bandwidth,
+ 'Number': segment_number,
+ }
+ representation_ms_info['fragments'].append({
+ media_location_key: segment_url,
+ 'duration': float_or_none(segment_d, representation_ms_info['timescale']),
+ })
+
+ for num, s in enumerate(representation_ms_info['s']):
+ segment_time = s.get('t') or segment_time
+ segment_d = s['d']
+ add_segment_url()
+ segment_number += 1
+ for r in range(s.get('r', 0)):
+ segment_time += segment_d
+ add_segment_url()
+ segment_number += 1
+ segment_time += segment_d
+ elif 'segment_urls' in representation_ms_info and 's' in representation_ms_info:
+ # No media template,
+ # e.g. https://www.youtube.com/watch?v=iXZV5uAYMJI
+ # or any YouTube dashsegments video
+ fragments = []
+ segment_index = 0
+ timescale = representation_ms_info['timescale']
+ for s in representation_ms_info['s']:
+ duration = float_or_none(s['d'], timescale)
+ for r in range(s.get('r', 0) + 1):
+ segment_uri = representation_ms_info['segment_urls'][segment_index]
+ fragments.append({
+ location_key(segment_uri): segment_uri,
+ 'duration': duration,
+ })
+ segment_index += 1
+ representation_ms_info['fragments'] = fragments
+ elif 'segment_urls' in representation_ms_info:
+ # Segment URLs with no SegmentTimeline
+ # E.g. https://www.seznam.cz/zpravy/clanek/cesko-zasahne-vitr-o-sile-vichrice-muze-byt-i-zivotu-nebezpecny-39091
+ # https://github.com/ytdl-org/youtube-dl/pull/14844
+ fragments = []
+ segment_duration = float_or_none(
+ representation_ms_info['segment_duration'],
+ representation_ms_info['timescale']) if 'segment_duration' in representation_ms_info else None
+ for segment_url in representation_ms_info['segment_urls']:
+ fragment = {
+ location_key(segment_url): segment_url,
+ }
+ if segment_duration:
+ fragment['duration'] = segment_duration
+ fragments.append(fragment)
+ representation_ms_info['fragments'] = fragments
+ # If there is a fragments key available then we correctly recognized fragmented media.
+ # Otherwise we will assume unfragmented media with direct access. Technically, such
+ # assumption is not necessarily correct since we may simply have no support for
+ # some forms of fragmented media renditions yet, but for now we'll use this fallback.
+ if 'fragments' in representation_ms_info:
+ f.update({
+ # NB: mpd_url may be empty when MPD manifest is parsed from a string
+ 'url': mpd_url or base_url,
+ 'fragment_base_url': base_url,
+ 'fragments': [],
+ 'protocol': 'http_dash_segments' if mime_type != 'image/jpeg' else 'mhtml',
+ })
+ if 'initialization_url' in representation_ms_info:
+ initialization_url = representation_ms_info['initialization_url']
+ if not f.get('url'):
+ f['url'] = initialization_url
+ f['fragments'].append({location_key(initialization_url): initialization_url})
+ f['fragments'].extend(representation_ms_info['fragments'])
+ if not period_duration:
+ period_duration = try_get(
+ representation_ms_info,
+ lambda r: sum(frag['duration'] for frag in r['fragments']), float)
+ else:
+ # Assuming direct URL to unfragmented media.
+ f['url'] = base_url
+ if content_type in ('video', 'audio', 'image/jpeg'):
+ f['manifest_stream_number'] = stream_numbers[f['url']]
+ stream_numbers[f['url']] += 1
+ period_entry['formats'].append(f)
+ elif content_type == 'text':
+ period_entry['subtitles'][lang or 'und'].append(f)
+ yield period_entry
+
+ def _extract_ism_formats(self, *args, **kwargs):
+ fmts, subs = self._extract_ism_formats_and_subtitles(*args, **kwargs)
+ if subs:
+ self._report_ignoring_subs('ISM')
+ return fmts
+
+ def _extract_ism_formats_and_subtitles(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
+ if self.get_param('ignore_no_formats_error'):
+ fatal = False
+
+ res = self._download_xml_handle(
+ ism_url, video_id,
+ note='Downloading ISM manifest' if note is None else note,
+ errnote='Failed to download ISM manifest' if errnote is None else errnote,
+ fatal=fatal, data=data, headers=headers, query=query)
+ if res is False:
+ return [], {}
+ ism_doc, urlh = res
+ if ism_doc is None:
+ return [], {}
+
+ return self._parse_ism_formats_and_subtitles(ism_doc, urlh.url, ism_id)
+
+ def _parse_ism_formats_and_subtitles(self, ism_doc, ism_url, ism_id=None):
+ """
+ Parse formats from ISM manifest.
+ References:
+ 1. [MS-SSTR]: Smooth Streaming Protocol,
+ https://msdn.microsoft.com/en-us/library/ff469518.aspx
+ """
+ if ism_doc.get('IsLive') == 'TRUE':
+ return [], {}
+
+ duration = int(ism_doc.attrib['Duration'])
+ timescale = int_or_none(ism_doc.get('TimeScale')) or 10000000
+
+ formats = []
+ subtitles = {}
+ for stream in ism_doc.findall('StreamIndex'):
+ stream_type = stream.get('Type')
+ if stream_type not in ('video', 'audio', 'text'):
+ continue
+ url_pattern = stream.attrib['Url']
+ stream_timescale = int_or_none(stream.get('TimeScale')) or timescale
+ stream_name = stream.get('Name')
+ stream_language = stream.get('Language', 'und')
+ for track in stream.findall('QualityLevel'):
+ KNOWN_TAGS = {'255': 'AACL', '65534': 'EC-3'}
+ fourcc = track.get('FourCC') or KNOWN_TAGS.get(track.get('AudioTag'))
+ # TODO: add support for WVC1 and WMAP
+ if fourcc not in ('H264', 'AVC1', 'AACL', 'TTML', 'EC-3'):
+ self.report_warning('%s is not a supported codec' % fourcc)
+ continue
+ tbr = int(track.attrib['Bitrate']) // 1000
+ # [1] does not mention Width and Height attributes. However,
+ # they're often present while MaxWidth and MaxHeight are
+ # missing, so should be used as fallbacks
+ width = int_or_none(track.get('MaxWidth') or track.get('Width'))
+ height = int_or_none(track.get('MaxHeight') or track.get('Height'))
+ sampling_rate = int_or_none(track.get('SamplingRate'))
+
+ track_url_pattern = re.sub(r'{[Bb]itrate}', track.attrib['Bitrate'], url_pattern)
+ track_url_pattern = urllib.parse.urljoin(ism_url, track_url_pattern)
+
+ fragments = []
+ fragment_ctx = {
+ 'time': 0,
+ }
+ stream_fragments = stream.findall('c')
+ for stream_fragment_index, stream_fragment in enumerate(stream_fragments):
+ fragment_ctx['time'] = int_or_none(stream_fragment.get('t')) or fragment_ctx['time']
+ fragment_repeat = int_or_none(stream_fragment.get('r')) or 1
+ fragment_ctx['duration'] = int_or_none(stream_fragment.get('d'))
+ if not fragment_ctx['duration']:
+ try:
+ next_fragment_time = int(stream_fragment[stream_fragment_index + 1].attrib['t'])
+ except IndexError:
+ next_fragment_time = duration
+ fragment_ctx['duration'] = (next_fragment_time - fragment_ctx['time']) / fragment_repeat
+ for _ in range(fragment_repeat):
+ fragments.append({
+ 'url': re.sub(r'{start[ _]time}', str(fragment_ctx['time']), track_url_pattern),
+ 'duration': fragment_ctx['duration'] / stream_timescale,
+ })
+ fragment_ctx['time'] += fragment_ctx['duration']
+
+ if stream_type == 'text':
+ subtitles.setdefault(stream_language, []).append({
+ 'ext': 'ismt',
+ 'protocol': 'ism',
+ 'url': ism_url,
+ 'manifest_url': ism_url,
+ 'fragments': fragments,
+ '_download_params': {
+ 'stream_type': stream_type,
+ 'duration': duration,
+ 'timescale': stream_timescale,
+ 'fourcc': fourcc,
+ 'language': stream_language,
+ 'codec_private_data': track.get('CodecPrivateData'),
+ }
+ })
+ elif stream_type in ('video', 'audio'):
+ formats.append({
+ 'format_id': join_nonempty(ism_id, stream_name, tbr),
+ 'url': ism_url,
+ 'manifest_url': ism_url,
+ 'ext': 'ismv' if stream_type == 'video' else 'isma',
+ 'width': width,
+ 'height': height,
+ 'tbr': tbr,
+ 'asr': sampling_rate,
+ 'vcodec': 'none' if stream_type == 'audio' else fourcc,
+ 'acodec': 'none' if stream_type == 'video' else fourcc,
+ 'protocol': 'ism',
+ 'fragments': fragments,
+ 'has_drm': ism_doc.find('Protection') is not None,
+ 'language': stream_language,
+ 'audio_channels': int_or_none(track.get('Channels')),
+ '_download_params': {
+ 'stream_type': stream_type,
+ 'duration': duration,
+ 'timescale': stream_timescale,
+ 'width': width or 0,
+ 'height': height or 0,
+ 'fourcc': fourcc,
+ 'language': stream_language,
+ 'codec_private_data': track.get('CodecPrivateData'),
+ 'sampling_rate': sampling_rate,
+ 'channels': int_or_none(track.get('Channels', 2)),
+ 'bits_per_sample': int_or_none(track.get('BitsPerSample', 16)),
+ 'nal_unit_length_field': int_or_none(track.get('NALUnitLengthField', 4)),
+ },
+ })
+ return formats, subtitles
+
+ def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8_native', mpd_id=None, preference=None, quality=None):
+ def absolute_url(item_url):
+ return urljoin(base_url, item_url)
+
+ def parse_content_type(content_type):
+ if not content_type:
+ return {}
+ ctr = re.search(r'(?P<mimetype>[^/]+/[^;]+)(?:;\s*codecs="?(?P<codecs>[^"]+))?', content_type)
+ if ctr:
+ mimetype, codecs = ctr.groups()
+ f = parse_codecs(codecs)
+ f['ext'] = mimetype2ext(mimetype)
+ return f
+ return {}
+
+ def _media_formats(src, cur_media_type, type_info=None):
+ type_info = type_info or {}
+ full_url = absolute_url(src)
+ ext = type_info.get('ext') or determine_ext(full_url)
+ if ext == 'm3u8':
+ is_plain_url = False
+ formats = self._extract_m3u8_formats(
+ full_url, video_id, ext='mp4',
+ entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id,
+ preference=preference, quality=quality, fatal=False)
+ elif ext == 'mpd':
+ is_plain_url = False
+ formats = self._extract_mpd_formats(
+ full_url, video_id, mpd_id=mpd_id, fatal=False)
+ else:
+ is_plain_url = True
+ formats = [{
+ 'url': full_url,
+ 'vcodec': 'none' if cur_media_type == 'audio' else None,
+ 'ext': ext,
+ }]
+ return is_plain_url, formats
+
+ entries = []
+ # amp-video and amp-audio are very similar to their HTML5 counterparts
+ # so we will include them right here (see
+ # https://www.ampproject.org/docs/reference/components/amp-video)
+ # For dl8-* tags see https://delight-vr.com/documentation/dl8-video/
+ _MEDIA_TAG_NAME_RE = r'(?:(?:amp|dl8(?:-live)?)-)?(video|audio)'
+ media_tags = [(media_tag, media_tag_name, media_type, '')
+ for media_tag, media_tag_name, media_type
+ in re.findall(r'(?s)(<(%s)[^>]*/>)' % _MEDIA_TAG_NAME_RE, webpage)]
+ media_tags.extend(re.findall(
+ # We only allow video|audio followed by a whitespace or '>'.
+ # Allowing more characters may end up in significant slow down (see
+ # https://github.com/ytdl-org/youtube-dl/issues/11979,
+ # e.g. http://www.porntrex.com/maps/videositemap.xml).
+ r'(?s)(<(?P<tag>%s)(?:\s+[^>]*)?>)(.*?)</(?P=tag)>' % _MEDIA_TAG_NAME_RE, webpage))
+ for media_tag, _, media_type, media_content in media_tags:
+ media_info = {
+ 'formats': [],
+ 'subtitles': {},
+ }
+ media_attributes = extract_attributes(media_tag)
+ src = strip_or_none(dict_get(media_attributes, ('src', 'data-video-src', 'data-src', 'data-source')))
+ if src:
+ f = parse_content_type(media_attributes.get('type'))
+ _, formats = _media_formats(src, media_type, f)
+ media_info['formats'].extend(formats)
+ media_info['thumbnail'] = absolute_url(media_attributes.get('poster'))
+ if media_content:
+ for source_tag in re.findall(r'<source[^>]+>', media_content):
+ s_attr = extract_attributes(source_tag)
+ # data-video-src and data-src are non standard but seen
+ # several times in the wild
+ src = strip_or_none(dict_get(s_attr, ('src', 'data-video-src', 'data-src', 'data-source')))
+ if not src:
+ continue
+ f = parse_content_type(s_attr.get('type'))
+ is_plain_url, formats = _media_formats(src, media_type, f)
+ if is_plain_url:
+ # width, height, res, label and title attributes are
+ # all not standard but seen several times in the wild
+ labels = [
+ s_attr.get(lbl)
+ for lbl in ('label', 'title')
+ if str_or_none(s_attr.get(lbl))
+ ]
+ width = int_or_none(s_attr.get('width'))
+ height = (int_or_none(s_attr.get('height'))
+ or int_or_none(s_attr.get('res')))
+ if not width or not height:
+ for lbl in labels:
+ resolution = parse_resolution(lbl)
+ if not resolution:
+ continue
+ width = width or resolution.get('width')
+ height = height or resolution.get('height')
+ for lbl in labels:
+ tbr = parse_bitrate(lbl)
+ if tbr:
+ break
+ else:
+ tbr = None
+ f.update({
+ 'width': width,
+ 'height': height,
+ 'tbr': tbr,
+ 'format_id': s_attr.get('label') or s_attr.get('title'),
+ })
+ f.update(formats[0])
+ media_info['formats'].append(f)
+ else:
+ media_info['formats'].extend(formats)
+ for track_tag in re.findall(r'<track[^>]+>', media_content):
+ track_attributes = extract_attributes(track_tag)
+ kind = track_attributes.get('kind')
+ if not kind or kind in ('subtitles', 'captions'):
+ src = strip_or_none(track_attributes.get('src'))
+ if not src:
+ continue
+ lang = track_attributes.get('srclang') or track_attributes.get('lang') or track_attributes.get('label')
+ media_info['subtitles'].setdefault(lang, []).append({
+ 'url': absolute_url(src),
+ })
+ for f in media_info['formats']:
+ f.setdefault('http_headers', {})['Referer'] = base_url
+ if media_info['formats'] or media_info['subtitles']:
+ entries.append(media_info)
+ return entries
+
+ def _extract_akamai_formats(self, *args, **kwargs):
+ fmts, subs = self._extract_akamai_formats_and_subtitles(*args, **kwargs)
+ if subs:
+ self._report_ignoring_subs('akamai')
+ return fmts
+
+ def _extract_akamai_formats_and_subtitles(self, manifest_url, video_id, hosts={}):
+ signed = 'hdnea=' in manifest_url
+ if not signed:
+ # https://learn.akamai.com/en-us/webhelp/media-services-on-demand/stream-packaging-user-guide/GUID-BE6C0F73-1E06-483B-B0EA-57984B91B7F9.html
+ manifest_url = re.sub(
+ r'(?:b=[\d,-]+|(?:__a__|attributes)=off|__b__=\d+)&?',
+ '', manifest_url).strip('?')
+
+ formats = []
+ subtitles = {}
+
+ hdcore_sign = 'hdcore=3.7.0'
+ f4m_url = re.sub(r'(https?://[^/]+)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
+ hds_host = hosts.get('hds')
+ if hds_host:
+ f4m_url = re.sub(r'(https?://)[^/]+', r'\1' + hds_host, f4m_url)
+ if 'hdcore=' not in f4m_url:
+ f4m_url += ('&' if '?' in f4m_url else '?') + hdcore_sign
+ f4m_formats = self._extract_f4m_formats(
+ f4m_url, video_id, f4m_id='hds', fatal=False)
+ for entry in f4m_formats:
+ entry.update({'extra_param_to_segment_url': hdcore_sign})
+ formats.extend(f4m_formats)
+
+ m3u8_url = re.sub(r'(https?://[^/]+)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8')
+ hls_host = hosts.get('hls')
+ if hls_host:
+ m3u8_url = re.sub(r'(https?://)[^/]+', r'\1' + hls_host, m3u8_url)
+ m3u8_formats, m3u8_subtitles = self._extract_m3u8_formats_and_subtitles(
+ m3u8_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False)
+ formats.extend(m3u8_formats)
+ subtitles = self._merge_subtitles(subtitles, m3u8_subtitles)
+
+ http_host = hosts.get('http')
+ if http_host and m3u8_formats and not signed:
+ REPL_REGEX = r'https?://[^/]+/i/([^,]+),([^/]+),([^/]+)\.csmil/.+'
+ qualities = re.match(REPL_REGEX, m3u8_url).group(2).split(',')
+ qualities_length = len(qualities)
+ if len(m3u8_formats) in (qualities_length, qualities_length + 1):
+ i = 0
+ for f in m3u8_formats:
+ if f['vcodec'] != 'none':
+ for protocol in ('http', 'https'):
+ http_f = f.copy()
+ del http_f['manifest_url']
+ http_url = re.sub(
+ REPL_REGEX, protocol + fr'://{http_host}/\g<1>{qualities[i]}\3', f['url'])
+ http_f.update({
+ 'format_id': http_f['format_id'].replace('hls-', protocol + '-'),
+ 'url': http_url,
+ 'protocol': protocol,
+ })
+ formats.append(http_f)
+ i += 1
+
+ return formats, subtitles
+
+ def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
+ query = urllib.parse.urlparse(url).query
+ url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url)
+ mobj = re.search(
+ r'(?:(?:http|rtmp|rtsp)(?P<s>s)?:)?(?P<url>//[^?]+)', url)
+ url_base = mobj.group('url')
+ http_base_url = '%s%s:%s' % ('http', mobj.group('s') or '', url_base)
+ formats = []
+
+ def manifest_url(manifest):
+ m_url = f'{http_base_url}/{manifest}'
+ if query:
+ m_url += '?%s' % query
+ return m_url
+
+ if 'm3u8' not in skip_protocols:
+ formats.extend(self._extract_m3u8_formats(
+ manifest_url('playlist.m3u8'), video_id, 'mp4',
+ m3u8_entry_protocol, m3u8_id='hls', fatal=False))
+ if 'f4m' not in skip_protocols:
+ formats.extend(self._extract_f4m_formats(
+ manifest_url('manifest.f4m'),
+ video_id, f4m_id='hds', fatal=False))
+ if 'dash' not in skip_protocols:
+ formats.extend(self._extract_mpd_formats(
+ manifest_url('manifest.mpd'),
+ video_id, mpd_id='dash', fatal=False))
+ if re.search(r'(?:/smil:|\.smil)', url_base):
+ if 'smil' not in skip_protocols:
+ rtmp_formats = self._extract_smil_formats(
+ manifest_url('jwplayer.smil'),
+ video_id, fatal=False)
+ for rtmp_format in rtmp_formats:
+ rtsp_format = rtmp_format.copy()
+ rtsp_format['url'] = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path'])
+ del rtsp_format['play_path']
+ del rtsp_format['ext']
+ rtsp_format.update({
+ 'url': rtsp_format['url'].replace('rtmp://', 'rtsp://'),
+ 'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'),
+ 'protocol': 'rtsp',
+ })
+ formats.extend([rtmp_format, rtsp_format])
+ else:
+ for protocol in ('rtmp', 'rtsp'):
+ if protocol not in skip_protocols:
+ formats.append({
+ 'url': f'{protocol}:{url_base}',
+ 'format_id': protocol,
+ 'protocol': protocol,
+ })
+ return formats
+
+ def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json):
+ mobj = re.search(
+ r'''(?s)jwplayer\s*\(\s*(?P<q>'|")(?!(?P=q)).+(?P=q)\s*\)(?!</script>).*?\.\s*setup\s*\(\s*(?P<options>(?:\([^)]*\)|[^)])+)\s*\)''',
+ webpage)
+ if mobj:
+ try:
+ jwplayer_data = self._parse_json(mobj.group('options'),
+ video_id=video_id,
+ transform_source=transform_source)
+ except ExtractorError:
+ pass
+ else:
+ if isinstance(jwplayer_data, dict):
+ return jwplayer_data
+
+ def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
+ jwplayer_data = self._find_jwplayer_data(
+ webpage, video_id, transform_source=js_to_json)
+ return self._parse_jwplayer_data(
+ jwplayer_data, video_id, *args, **kwargs)
+
+ def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True,
+ m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
+ entries = []
+ if not isinstance(jwplayer_data, dict):
+ return entries
+
+ playlist_items = jwplayer_data.get('playlist')
+ # JWPlayer backward compatibility: single playlist item/flattened playlists
+ # https://github.com/jwplayer/jwplayer/blob/v7.7.0/src/js/playlist/playlist.js#L10
+ # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96
+ if not isinstance(playlist_items, list):
+ playlist_items = (playlist_items or jwplayer_data, )
+
+ for video_data in playlist_items:
+ if not isinstance(video_data, dict):
+ continue
+ # JWPlayer backward compatibility: flattened sources
+ # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35
+ if 'sources' not in video_data:
+ video_data['sources'] = [video_data]
+
+ this_video_id = video_id or video_data['mediaid']
+
+ formats = self._parse_jwplayer_formats(
+ video_data['sources'], video_id=this_video_id, m3u8_id=m3u8_id,
+ mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url)
+
+ subtitles = {}
+ tracks = video_data.get('tracks')
+ if tracks and isinstance(tracks, list):
+ for track in tracks:
+ if not isinstance(track, dict):
+ continue
+ track_kind = track.get('kind')
+ if not track_kind or not isinstance(track_kind, str):
+ continue
+ if track_kind.lower() not in ('captions', 'subtitles'):
+ continue
+ track_url = urljoin(base_url, track.get('file'))
+ if not track_url:
+ continue
+ subtitles.setdefault(track.get('label') or 'en', []).append({
+ 'url': self._proto_relative_url(track_url)
+ })
+
+ entry = {
+ 'id': this_video_id,
+ 'title': unescapeHTML(video_data['title'] if require_title else video_data.get('title')),
+ 'description': clean_html(video_data.get('description')),
+ 'thumbnail': urljoin(base_url, self._proto_relative_url(video_data.get('image'))),
+ 'timestamp': int_or_none(video_data.get('pubdate')),
+ 'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
+ 'subtitles': subtitles,
+ 'alt_title': clean_html(video_data.get('subtitle')), # attributes used e.g. by Tele5 ...
+ 'genre': clean_html(video_data.get('genre')),
+ 'channel': clean_html(dict_get(video_data, ('category', 'channel'))),
+ 'season_number': int_or_none(video_data.get('season')),
+ 'episode_number': int_or_none(video_data.get('episode')),
+ 'release_year': int_or_none(video_data.get('releasedate')),
+ 'age_limit': int_or_none(video_data.get('age_restriction')),
+ }
+ # https://github.com/jwplayer/jwplayer/blob/master/src/js/utils/validator.js#L32
+ if len(formats) == 1 and re.search(r'^(?:http|//).*(?:youtube\.com|youtu\.be)/.+', formats[0]['url']):
+ entry.update({
+ '_type': 'url_transparent',
+ 'url': formats[0]['url'],
+ })
+ else:
+ entry['formats'] = formats
+ entries.append(entry)
+ if len(entries) == 1:
+ return entries[0]
+ else:
+ return self.playlist_result(entries)
+
+ def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None,
+ m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
+ urls = set()
+ formats = []
+ for source in jwplayer_sources_data:
+ if not isinstance(source, dict):
+ continue
+ source_url = urljoin(
+ base_url, self._proto_relative_url(source.get('file')))
+ if not source_url or source_url in urls:
+ continue
+ urls.add(source_url)
+ source_type = source.get('type') or ''
+ ext = mimetype2ext(source_type) or determine_ext(source_url)
+ if source_type == 'hls' or ext == 'm3u8' or 'format=m3u8-aapl' in source_url:
+ formats.extend(self._extract_m3u8_formats(
+ source_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id=m3u8_id, fatal=False))
+ elif source_type == 'dash' or ext == 'mpd' or 'format=mpd-time-csf' in source_url:
+ formats.extend(self._extract_mpd_formats(
+ source_url, video_id, mpd_id=mpd_id, fatal=False))
+ elif ext == 'smil':
+ formats.extend(self._extract_smil_formats(
+ source_url, video_id, fatal=False))
+ # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67
+ elif source_type.startswith('audio') or ext in (
+ 'oga', 'aac', 'mp3', 'mpeg', 'vorbis'):
+ formats.append({
+ 'url': source_url,
+ 'vcodec': 'none',
+ 'ext': ext,
+ })
+ else:
+ format_id = str_or_none(source.get('label'))
+ height = int_or_none(source.get('height'))
+ if height is None and format_id:
+ # Often no height is provided but there is a label in
+ # format like "1080p", "720p SD", or 1080.
+ height = parse_resolution(format_id).get('height')
+ a_format = {
+ 'url': source_url,
+ 'width': int_or_none(source.get('width')),
+ 'height': height,
+ 'tbr': int_or_none(source.get('bitrate'), scale=1000),
+ 'filesize': int_or_none(source.get('filesize')),
+ 'ext': ext,
+ 'format_id': format_id
+ }
+ if source_url.startswith('rtmp'):
+ a_format['ext'] = 'flv'
+ # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
+ # of jwplayer.flash.swf
+ rtmp_url_parts = re.split(
+ r'((?:mp4|mp3|flv):)', source_url, 1)
+ if len(rtmp_url_parts) == 3:
+ rtmp_url, prefix, play_path = rtmp_url_parts
+ a_format.update({
+ 'url': rtmp_url,
+ 'play_path': prefix + play_path,
+ })
+ if rtmp_params:
+ a_format.update(rtmp_params)
+ formats.append(a_format)
+ return formats
+
+ def _live_title(self, name):
+ self._downloader.deprecation_warning('yt_dlp.InfoExtractor._live_title is deprecated and does not work as expected')
+ return name
+
+ def _int(self, v, name, fatal=False, **kwargs):
+ res = int_or_none(v, **kwargs)
+ if res is None:
+ msg = f'Failed to extract {name}: Could not parse value {v!r}'
+ if fatal:
+ raise ExtractorError(msg)
+ else:
+ self.report_warning(msg)
+ return res
+
+ def _float(self, v, name, fatal=False, **kwargs):
+ res = float_or_none(v, **kwargs)
+ if res is None:
+ msg = f'Failed to extract {name}: Could not parse value {v!r}'
+ if fatal:
+ raise ExtractorError(msg)
+ else:
+ self.report_warning(msg)
+ return res
+
+ def _set_cookie(self, domain, name, value, expire_time=None, port=None,
+ path='/', secure=False, discard=False, rest={}, **kwargs):
+ cookie = http.cookiejar.Cookie(
+ 0, name, value, port, port is not None, domain, True,
+ domain.startswith('.'), path, True, secure, expire_time,
+ discard, None, None, rest)
+ self.cookiejar.set_cookie(cookie)
+
+ def _get_cookies(self, url):
+ """ Return a http.cookies.SimpleCookie with the cookies for the url """
+ return LenientSimpleCookie(self._downloader.cookiejar.get_cookie_header(url))
+
+ def _apply_first_set_cookie_header(self, url_handle, cookie):
+ """
+ Apply first Set-Cookie header instead of the last. Experimental.
+
+ Some sites (e.g. [1-3]) may serve two cookies under the same name
+ in Set-Cookie header and expect the first (old) one to be set rather
+ than second (new). However, as of RFC6265 the newer one cookie
+ should be set into cookie store what actually happens.
+ We will workaround this issue by resetting the cookie to
+ the first one manually.
+ 1. https://new.vk.com/
+ 2. https://github.com/ytdl-org/youtube-dl/issues/9841#issuecomment-227871201
+ 3. https://learning.oreilly.com/
+ """
+ for header, cookies in url_handle.headers.items():
+ if header.lower() != 'set-cookie':
+ continue
+ cookies = cookies.encode('iso-8859-1').decode('utf-8')
+ cookie_value = re.search(
+ r'%s=(.+?);.*?\b[Dd]omain=(.+?)(?:[,;]|$)' % cookie, cookies)
+ if cookie_value:
+ value, domain = cookie_value.groups()
+ self._set_cookie(domain, cookie, value)
+ break
+
+ @classmethod
+ def get_testcases(cls, include_onlymatching=False):
+ # Do not look in super classes
+ t = vars(cls).get('_TEST')
+ if t:
+ assert not hasattr(cls, '_TESTS'), f'{cls.ie_key()}IE has _TEST and _TESTS'
+ tests = [t]
+ else:
+ tests = vars(cls).get('_TESTS', [])
+ for t in tests:
+ if not include_onlymatching and t.get('only_matching', False):
+ continue
+ t['name'] = cls.ie_key()
+ yield t
+ if getattr(cls, '__wrapped__', None):
+ yield from cls.__wrapped__.get_testcases(include_onlymatching)
+
+ @classmethod
+ def get_webpage_testcases(cls):
+ tests = vars(cls).get('_WEBPAGE_TESTS', [])
+ for t in tests:
+ t['name'] = cls.ie_key()
+ yield t
+ if getattr(cls, '__wrapped__', None):
+ yield from cls.__wrapped__.get_webpage_testcases()
+
+ @classproperty(cache=True)
+ def age_limit(cls):
+ """Get age limit from the testcases"""
+ return max(traverse_obj(
+ (*cls.get_testcases(include_onlymatching=False), *cls.get_webpage_testcases()),
+ (..., (('playlist', 0), None), 'info_dict', 'age_limit')) or [0])
+
+ @classproperty(cache=True)
+ def _RETURN_TYPE(cls):
+ """What the extractor returns: "video", "playlist", "any", or None (Unknown)"""
+ tests = tuple(cls.get_testcases(include_onlymatching=False))
+ if not tests:
+ return None
+ elif not any(k.startswith('playlist') for test in tests for k in test):
+ return 'video'
+ elif all(any(k.startswith('playlist') for k in test) for test in tests):
+ return 'playlist'
+ return 'any'
+
+ @classmethod
+ def is_single_video(cls, url):
+ """Returns whether the URL is of a single video, None if unknown"""
+ if cls.suitable(url):
+ return {'video': True, 'playlist': False}.get(cls._RETURN_TYPE)
+
+ @classmethod
+ def is_suitable(cls, age_limit):
+ """Test whether the extractor is generally suitable for the given age limit"""
+ return not age_restricted(cls.age_limit, age_limit)
+
+ @classmethod
+ def description(cls, *, markdown=True, search_examples=None):
+ """Description of the extractor"""
+ desc = ''
+ if cls._NETRC_MACHINE:
+ if markdown:
+ desc += f' [*{cls._NETRC_MACHINE}*](## "netrc machine")'
+ else:
+ desc += f' [{cls._NETRC_MACHINE}]'
+ if cls.IE_DESC is False:
+ desc += ' [HIDDEN]'
+ elif cls.IE_DESC:
+ desc += f' {cls.IE_DESC}'
+ if cls.SEARCH_KEY:
+ desc += f'{";" if cls.IE_DESC else ""} "{cls.SEARCH_KEY}:" prefix'
+ if search_examples:
+ _COUNTS = ('', '5', '10', 'all')
+ desc += f' (e.g. "{cls.SEARCH_KEY}{random.choice(_COUNTS)}:{random.choice(search_examples)}")'
+ if not cls.working():
+ desc += ' (**Currently broken**)' if markdown else ' (Currently broken)'
+
+ # Escape emojis. Ref: https://github.com/github/markup/issues/1153
+ name = (' - **%s**' % re.sub(r':(\w+:)', ':\u200B\\g<1>', cls.IE_NAME)) if markdown else cls.IE_NAME
+ return f'{name}:{desc}' if desc else name
+
+ def extract_subtitles(self, *args, **kwargs):
+ if (self.get_param('writesubtitles', False)
+ or self.get_param('listsubtitles')):
+ return self._get_subtitles(*args, **kwargs)
+ return {}
+
+ def _get_subtitles(self, *args, **kwargs):
+ raise NotImplementedError('This method must be implemented by subclasses')
+
+ class CommentsDisabled(Exception):
+ """Raise in _get_comments if comments are disabled for the video"""
+
+ def extract_comments(self, *args, **kwargs):
+ if not self.get_param('getcomments'):
+ return None
+ generator = self._get_comments(*args, **kwargs)
+
+ def extractor():
+ comments = []
+ interrupted = True
+ try:
+ while True:
+ comments.append(next(generator))
+ except StopIteration:
+ interrupted = False
+ except KeyboardInterrupt:
+ self.to_screen('Interrupted by user')
+ except self.CommentsDisabled:
+ return {'comments': None, 'comment_count': None}
+ except Exception as e:
+ if self.get_param('ignoreerrors') is not True:
+ raise
+ self._downloader.report_error(e)
+ comment_count = len(comments)
+ self.to_screen(f'Extracted {comment_count} comments')
+ return {
+ 'comments': comments,
+ 'comment_count': None if interrupted else comment_count
+ }
+ return extractor
+
+ def _get_comments(self, *args, **kwargs):
+ raise NotImplementedError('This method must be implemented by subclasses')
+
+ @staticmethod
+ def _merge_subtitle_items(subtitle_list1, subtitle_list2):
+ """ Merge subtitle items for one language. Items with duplicated URLs/data
+ will be dropped. """
+ list1_data = {(item.get('url'), item.get('data')) for item in subtitle_list1}
+ ret = list(subtitle_list1)
+ ret.extend(item for item in subtitle_list2 if (item.get('url'), item.get('data')) not in list1_data)
+ return ret
+
+ @classmethod
+ def _merge_subtitles(cls, *dicts, target=None):
+ """ Merge subtitle dictionaries, language by language. """
+ if target is None:
+ target = {}
+ for d in dicts:
+ for lang, subs in d.items():
+ target[lang] = cls._merge_subtitle_items(target.get(lang, []), subs)
+ return target
+
+ def extract_automatic_captions(self, *args, **kwargs):
+ if (self.get_param('writeautomaticsub', False)
+ or self.get_param('listsubtitles')):
+ return self._get_automatic_captions(*args, **kwargs)
+ return {}
+
+ def _get_automatic_captions(self, *args, **kwargs):
+ raise NotImplementedError('This method must be implemented by subclasses')
+
+ @functools.cached_property
+ def _cookies_passed(self):
+ """Whether cookies have been passed to YoutubeDL"""
+ return self.get_param('cookiefile') is not None or self.get_param('cookiesfrombrowser') is not None
+
+ def mark_watched(self, *args, **kwargs):
+ if not self.get_param('mark_watched', False):
+ return
+ if self.supports_login() and self._get_login_info()[0] is not None or self._cookies_passed:
+ self._mark_watched(*args, **kwargs)
+
+ def _mark_watched(self, *args, **kwargs):
+ raise NotImplementedError('This method must be implemented by subclasses')
+
+ def geo_verification_headers(self):
+ headers = {}
+ geo_verification_proxy = self.get_param('geo_verification_proxy')
+ if geo_verification_proxy:
+ headers['Ytdl-request-proxy'] = geo_verification_proxy
+ return headers
+
+ @staticmethod
+ def _generic_id(url):
+ return urllib.parse.unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
+
+ def _generic_title(self, url='', webpage='', *, default=None):
+ return (self._og_search_title(webpage, default=None)
+ or self._html_extract_title(webpage, default=None)
+ or urllib.parse.unquote(os.path.splitext(url_basename(url))[0])
+ or default)
+
+ def _extract_chapters_helper(self, chapter_list, start_function, title_function, duration, strict=True):
+ if not duration:
+ return
+ chapter_list = [{
+ 'start_time': start_function(chapter),
+ 'title': title_function(chapter),
+ } for chapter in chapter_list or []]
+ if strict:
+ warn = self.report_warning
+ else:
+ warn = self.write_debug
+ chapter_list.sort(key=lambda c: c['start_time'] or 0)
+
+ chapters = [{'start_time': 0}]
+ for idx, chapter in enumerate(chapter_list):
+ if chapter['start_time'] is None:
+ warn(f'Incomplete chapter {idx}')
+ elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:
+ chapters.append(chapter)
+ elif chapter not in chapters:
+ issue = (f'{chapter["start_time"]} > {duration}' if chapter['start_time'] > duration
+ else f'{chapter["start_time"]} < {chapters[-1]["start_time"]}')
+ warn(f'Invalid start time ({issue}) for chapter "{chapter["title"]}"')
+ return chapters[1:]
+
+ def _extract_chapters_from_description(self, description, duration):
+ duration_re = r'(?:\d+:)?\d{1,2}:\d{2}'
+ sep_re = r'(?m)^\s*(%s)\b\W*\s(%s)\s*$'
+ return self._extract_chapters_helper(
+ re.findall(sep_re % (duration_re, r'.+?'), description or ''),
+ start_function=lambda x: parse_duration(x[0]), title_function=lambda x: x[1],
+ duration=duration, strict=False) or self._extract_chapters_helper(
+ re.findall(sep_re % (r'.+?', duration_re), description or ''),
+ start_function=lambda x: parse_duration(x[1]), title_function=lambda x: x[0],
+ duration=duration, strict=False)
+
+ @staticmethod
+ def _availability(is_private=None, needs_premium=None, needs_subscription=None, needs_auth=None, is_unlisted=None):
+ all_known = all(map(
+ lambda x: x is not None,
+ (is_private, needs_premium, needs_subscription, needs_auth, is_unlisted)))
+ return (
+ 'private' if is_private
+ else 'premium_only' if needs_premium
+ else 'subscriber_only' if needs_subscription
+ else 'needs_auth' if needs_auth
+ else 'unlisted' if is_unlisted
+ else 'public' if all_known
+ else None)
+
+ def _configuration_arg(self, key, default=NO_DEFAULT, *, ie_key=None, casesense=False):
+ '''
+ @returns A list of values for the extractor argument given by "key"
+ or "default" if no such key is present
+ @param default The default value to return when the key is not present (default: [])
+ @param casesense When false, the values are converted to lower case
+ '''
+ ie_key = ie_key if isinstance(ie_key, str) else (ie_key or self).ie_key()
+ val = traverse_obj(self._downloader.params, ('extractor_args', ie_key.lower(), key))
+ if val is None:
+ return [] if default is NO_DEFAULT else default
+ return list(val) if casesense else [x.lower() for x in val]
+
+ def _yes_playlist(self, playlist_id, video_id, smuggled_data=None, *, playlist_label='playlist', video_label='video'):
+ if not playlist_id or not video_id:
+ return not video_id
+
+ no_playlist = (smuggled_data or {}).get('force_noplaylist')
+ if no_playlist is not None:
+ return not no_playlist
+
+ video_id = '' if video_id is True else f' {video_id}'
+ playlist_id = '' if playlist_id is True else f' {playlist_id}'
+ if self.get_param('noplaylist'):
+ self.to_screen(f'Downloading just the {video_label}{video_id} because of --no-playlist')
+ return False
+ self.to_screen(f'Downloading {playlist_label}{playlist_id} - add --no-playlist to download just the {video_label}{video_id}')
+ return True
+
+ def _error_or_warning(self, err, _count=None, _retries=0, *, fatal=True):
+ RetryManager.report_retry(
+ err, _count or int(fatal), _retries,
+ info=self.to_screen, warn=self.report_warning, error=None if fatal else self.report_warning,
+ sleep_func=self.get_param('retry_sleep_functions', {}).get('extractor'))
+
+ def RetryManager(self, **kwargs):
+ return RetryManager(self.get_param('extractor_retries', 3), self._error_or_warning, **kwargs)
+
+ def _extract_generic_embeds(self, url, *args, info_dict={}, note='Extracting generic embeds', **kwargs):
+ display_id = traverse_obj(info_dict, 'display_id', 'id')
+ self.to_screen(f'{format_field(display_id, None, "%s: ")}{note}')
+ return self._downloader.get_info_extractor('Generic')._extract_embeds(
+ smuggle_url(url, {'block_ies': [self.ie_key()]}), *args, **kwargs)
+
+ @classmethod
+ def extract_from_webpage(cls, ydl, url, webpage):
+ ie = (cls if isinstance(cls._extract_from_webpage, types.MethodType)
+ else ydl.get_info_extractor(cls.ie_key()))
+ for info in ie._extract_from_webpage(url, webpage) or []:
+ # url = None since we do not want to set (webpage/original)_url
+ ydl.add_default_extra_info(info, ie, None)
+ yield info
+
+ @classmethod
+ def _extract_from_webpage(cls, url, webpage):
+ for embed_url in orderedSet(
+ cls._extract_embed_urls(url, webpage) or [], lazy=True):
+ yield cls.url_result(embed_url, None if cls._VALID_URL is False else cls)
+
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ """@returns all the embed urls on the webpage"""
+ if '_EMBED_URL_RE' not in cls.__dict__:
+ assert isinstance(cls._EMBED_REGEX, (list, tuple))
+ for idx, regex in enumerate(cls._EMBED_REGEX):
+ assert regex.count('(?P<url>') == 1, \
+ f'{cls.__name__}._EMBED_REGEX[{idx}] must have exactly 1 url group\n\t{regex}'
+ cls._EMBED_URL_RE = tuple(map(re.compile, cls._EMBED_REGEX))
+
+ for regex in cls._EMBED_URL_RE:
+ for mobj in regex.finditer(webpage):
+ embed_url = urllib.parse.urljoin(url, unescapeHTML(mobj.group('url')))
+ if cls._VALID_URL is False or cls.suitable(embed_url):
+ yield embed_url
+
+ class StopExtraction(Exception):
+ pass
+
+ @classmethod
+ def _extract_url(cls, webpage): # TODO: Remove
+ """Only for compatibility with some older extractors"""
+ return next(iter(cls._extract_embed_urls(None, webpage) or []), None)
+
+ @classmethod
+ def __init_subclass__(cls, *, plugin_name=None, **kwargs):
+ if plugin_name:
+ mro = inspect.getmro(cls)
+ super_class = cls.__wrapped__ = mro[mro.index(cls) + 1]
+ cls.PLUGIN_NAME, cls.ie_key = plugin_name, super_class.ie_key
+ cls.IE_NAME = f'{super_class.IE_NAME}+{plugin_name}'
+ while getattr(super_class, '__wrapped__', None):
+ super_class = super_class.__wrapped__
+ setattr(sys.modules[super_class.__module__], super_class.__name__, cls)
+ _PLUGIN_OVERRIDES[super_class].append(cls)
+
+ return super().__init_subclass__(**kwargs)
+
+
+class SearchInfoExtractor(InfoExtractor):
+ """
+ Base class for paged search queries extractors.
+ They accept URLs in the format _SEARCH_KEY(|all|[0-9]):{query}
+ Instances should define _SEARCH_KEY and optionally _MAX_RESULTS
+ """
+
+ _MAX_RESULTS = float('inf')
+ _RETURN_TYPE = 'playlist'
+
+ @classproperty
+ def _VALID_URL(cls):
+ return r'%s(?P<prefix>|[1-9][0-9]*|all):(?P<query>[\s\S]+)' % cls._SEARCH_KEY
+
+ def _real_extract(self, query):
+ prefix, query = self._match_valid_url(query).group('prefix', 'query')
+ if prefix == '':
+ return self._get_n_results(query, 1)
+ elif prefix == 'all':
+ return self._get_n_results(query, self._MAX_RESULTS)
+ else:
+ n = int(prefix)
+ if n <= 0:
+ raise ExtractorError(f'invalid download number {n} for query "{query}"')
+ elif n > self._MAX_RESULTS:
+ self.report_warning('%s returns max %i results (you requested %i)' % (self._SEARCH_KEY, self._MAX_RESULTS, n))
+ n = self._MAX_RESULTS
+ return self._get_n_results(query, n)
+
+ def _get_n_results(self, query, n):
+ """Get a specified number of results for a query.
+ Either this function or _search_results must be overridden by subclasses """
+ return self.playlist_result(
+ itertools.islice(self._search_results(query), 0, None if n == float('inf') else n),
+ query, query)
+
+ def _search_results(self, query):
+ """Returns an iterator of search results"""
+ raise NotImplementedError('This method must be implemented by subclasses')
+
+ @classproperty
+ def SEARCH_KEY(cls):
+ return cls._SEARCH_KEY
+
+
+class UnsupportedURLIE(InfoExtractor):
+ _VALID_URL = '.*'
+ _ENABLED = False
+ IE_DESC = False
+
+ def _real_extract(self, url):
+ raise UnsupportedError(url)
+
+
+_PLUGIN_OVERRIDES = collections.defaultdict(list)
diff --git a/yt_dlp/extractor/commonmistakes.py b/yt_dlp/extractor/commonmistakes.py
new file mode 100644
index 0000000..1d3b61c
--- /dev/null
+++ b/yt_dlp/extractor/commonmistakes.py
@@ -0,0 +1,42 @@
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+
+class CommonMistakesIE(InfoExtractor):
+ IE_DESC = False # Do not list
+ _VALID_URL = r'(?:url|URL|yt-dlp)$'
+
+ _TESTS = [{
+ 'url': 'url',
+ 'only_matching': True,
+ }, {
+ 'url': 'URL',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ msg = (
+ 'You\'ve asked yt-dlp to download the URL "%s". '
+ 'That doesn\'t make any sense. '
+ 'Simply remove the parameter in your command or configuration.'
+ ) % url
+ if not self.get_param('verbose'):
+ msg += ' Add -v to the command line to see what arguments and configuration yt-dlp has'
+ raise ExtractorError(msg, expected=True)
+
+
+class UnicodeBOMIE(InfoExtractor):
+ IE_DESC = False
+ _VALID_URL = r'(?P<bom>\ufeff)(?P<id>.*)$'
+
+ _TESTS = [{
+ 'url': '\ufeffhttp://www.youtube.com/watch?v=BaW_jenozKc',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ real_url = self._match_id(url)
+ self.report_warning(
+ 'Your URL starts with a Byte Order Mark (BOM). '
+ 'Removing the BOM and looking for "%s" ...' % real_url)
+ return self.url_result(real_url)
diff --git a/yt_dlp/extractor/commonprotocols.py b/yt_dlp/extractor/commonprotocols.py
new file mode 100644
index 0000000..2f93e8e
--- /dev/null
+++ b/yt_dlp/extractor/commonprotocols.py
@@ -0,0 +1,70 @@
+import urllib.parse
+
+from .common import InfoExtractor
+
+
+class RtmpIE(InfoExtractor):
+ IE_DESC = False # Do not list
+ _VALID_URL = r'(?i)rtmp[est]?://.+'
+
+ _TESTS = [{
+ 'url': 'rtmp://cp44293.edgefcs.net/ondemand?auth=daEcTdydfdqcsb8cZcDbAaCbhamacbbawaS-bw7dBb-bWG-GqpGFqCpNCnGoyL&aifp=v001&slist=public/unsecure/audio/2c97899446428e4301471a8cb72b4b97--audio--pmg-20110908-0900a_flv_aac_med_int.mp4',
+ 'only_matching': True,
+ }, {
+ 'url': 'rtmp://edge.live.hitbox.tv/live/dimak',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._generic_id(url)
+ title = self._generic_title(url)
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': [{
+ 'url': url,
+ 'ext': 'flv',
+ 'format_id': urllib.parse.urlparse(url).scheme,
+ }],
+ }
+
+
+class MmsIE(InfoExtractor):
+ IE_DESC = False # Do not list
+ _VALID_URL = r'(?i)mms://.+'
+
+ _TEST = {
+ # Direct MMS link
+ 'url': 'mms://kentro.kaist.ac.kr/200907/MilesReid(0709).wmv',
+ 'info_dict': {
+ 'id': 'MilesReid(0709)',
+ 'ext': 'wmv',
+ 'title': 'MilesReid(0709)',
+ },
+ 'params': {
+ 'skip_download': True, # rtsp downloads, requiring mplayer or mpv
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._generic_id(url)
+ title = self._generic_title(url)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'url': url,
+ }
+
+
+class ViewSourceIE(InfoExtractor):
+ IE_DESC = False
+ _VALID_URL = r'view-source:(?P<url>.+)'
+
+ _TEST = {
+ 'url': 'view-source:https://www.youtube.com/watch?v=BaW_jenozKc',
+ 'only_matching': True
+ }
+
+ def _real_extract(self, url):
+ return self.url_result(self._match_valid_url(url).group('url'))
diff --git a/yt_dlp/extractor/condenast.py b/yt_dlp/extractor/condenast.py
new file mode 100644
index 0000000..3170c29
--- /dev/null
+++ b/yt_dlp/extractor/condenast.py
@@ -0,0 +1,250 @@
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_urllib_parse_urlparse,
+ compat_urlparse,
+)
+from ..utils import (
+ determine_ext,
+ extract_attributes,
+ int_or_none,
+ js_to_json,
+ mimetype2ext,
+ orderedSet,
+ parse_iso8601,
+ strip_or_none,
+ try_get,
+)
+
+
+class CondeNastIE(InfoExtractor):
+ """
+ Condé Nast is a media group, some of its sites use a custom HTML5 player
+ that works the same in all of them.
+ """
+
+ # The keys are the supported sites and the values are the name to be shown
+ # to the user and in the extractor description.
+ _SITES = {
+ 'allure': 'Allure',
+ 'architecturaldigest': 'Architectural Digest',
+ 'arstechnica': 'Ars Technica',
+ 'bonappetit': 'Bon Appétit',
+ 'brides': 'Brides',
+ 'cnevids': 'Condé Nast',
+ 'cntraveler': 'Condé Nast Traveler',
+ 'details': 'Details',
+ 'epicurious': 'Epicurious',
+ 'glamour': 'Glamour',
+ 'golfdigest': 'Golf Digest',
+ 'gq': 'GQ',
+ 'newyorker': 'The New Yorker',
+ 'self': 'SELF',
+ 'teenvogue': 'Teen Vogue',
+ 'vanityfair': 'Vanity Fair',
+ 'vogue': 'Vogue',
+ 'wired': 'WIRED',
+ 'wmagazine': 'W Magazine',
+ }
+
+ _VALID_URL = r'''(?x)https?://(?:video|www|player(?:-backend)?)\.(?:%s)\.com/
+ (?:
+ (?:
+ embed(?:js)?|
+ (?:script|inline)/video
+ )/(?P<id>[0-9a-f]{24})(?:/(?P<player_id>[0-9a-f]{24}))?(?:.+?\btarget=(?P<target>[^&]+))?|
+ (?P<type>watch|series|video)/(?P<display_id>[^/?#]+)
+ )''' % '|'.join(_SITES.keys())
+ IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values()))
+
+ _EMBED_REGEX = [r'''(?x)
+ <(?:iframe|script)[^>]+?src=(["\'])(?P<url>
+ (?:https?:)?//player(?:-backend)?\.(?:%s)\.com/(?:embed(?:js)?|(?:script|inline)/video)/.+?
+ )\1''' % '|'.join(_SITES.keys())]
+
+ _TESTS = [{
+ 'url': 'http://video.wired.com/watch/3d-printed-speakers-lit-with-led',
+ 'md5': '1921f713ed48aabd715691f774c451f7',
+ 'info_dict': {
+ 'id': '5171b343c2b4c00dd0c1ccb3',
+ 'ext': 'mp4',
+ 'title': '3D Printed Speakers Lit With LED',
+ 'description': 'Check out these beautiful 3D printed LED speakers. You can\'t actually buy them, but LumiGeek is working on a board that will let you make you\'re own.',
+ 'uploader': 'wired',
+ 'upload_date': '20130314',
+ 'timestamp': 1363219200,
+ }
+ }, {
+ 'url': 'http://video.gq.com/watch/the-closer-with-keith-olbermann-the-only-true-surprise-trump-s-an-idiot?c=series',
+ 'info_dict': {
+ 'id': '58d1865bfd2e6126e2000015',
+ 'ext': 'mp4',
+ 'title': 'The Only True Surprise? Trump’s an Idiot',
+ 'uploader': 'gq',
+ 'upload_date': '20170321',
+ 'timestamp': 1490126427,
+ 'description': 'How much grimmer would things be if these people were competent?',
+ },
+ }, {
+ # JS embed
+ 'url': 'http://player.cnevids.com/embedjs/55f9cf8b61646d1acf00000c/5511d76261646d5566020000.js',
+ 'md5': 'f1a6f9cafb7083bab74a710f65d08999',
+ 'info_dict': {
+ 'id': '55f9cf8b61646d1acf00000c',
+ 'ext': 'mp4',
+ 'title': '3D printed TSA Travel Sentry keys really do open TSA locks',
+ 'uploader': 'arstechnica',
+ 'upload_date': '20150916',
+ 'timestamp': 1442434920,
+ }
+ }, {
+ 'url': 'https://player.cnevids.com/inline/video/59138decb57ac36b83000005.js?target=js-cne-player',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://player-backend.cnevids.com/script/video/59138decb57ac36b83000005.js',
+ 'only_matching': True,
+ }]
+
+ def _extract_series(self, url, webpage):
+ title = self._html_search_regex(
+ r'(?s)<div class="cne-series-info">.*?<h1>(.+?)</h1>',
+ webpage, 'series title')
+ url_object = compat_urllib_parse_urlparse(url)
+ base_url = '%s://%s' % (url_object.scheme, url_object.netloc)
+ m_paths = re.finditer(
+ r'(?s)<p class="cne-thumb-title">.*?<a href="(/watch/.+?)["\?]', webpage)
+ paths = orderedSet(m.group(1) for m in m_paths)
+ build_url = lambda path: compat_urlparse.urljoin(base_url, path)
+ entries = [self.url_result(build_url(path), 'CondeNast') for path in paths]
+ return self.playlist_result(entries, playlist_title=title)
+
+ def _extract_video_params(self, webpage, display_id):
+ query = self._parse_json(
+ self._search_regex(
+ r'(?s)var\s+params\s*=\s*({.+?})[;,]', webpage, 'player params',
+ default='{}'),
+ display_id, transform_source=js_to_json, fatal=False)
+ if query:
+ query['videoId'] = self._search_regex(
+ r'(?:data-video-id=|currentVideoId\s*=\s*)["\']([\da-f]+)',
+ webpage, 'video id', default=None)
+ else:
+ params = extract_attributes(self._search_regex(
+ r'(<[^>]+data-js="video-player"[^>]+>)',
+ webpage, 'player params element'))
+ query.update({
+ 'videoId': params['data-video'],
+ 'playerId': params['data-player'],
+ 'target': params['id'],
+ })
+ return query
+
+ def _extract_video(self, params):
+ video_id = params['videoId']
+
+ video_info = None
+
+ # New API path
+ query = params.copy()
+ query['embedType'] = 'inline'
+ info_page = self._download_json(
+ 'http://player.cnevids.com/embed-api.json', video_id,
+ 'Downloading embed info', fatal=False, query=query)
+
+ # Old fallbacks
+ if not info_page:
+ if params.get('playerId'):
+ info_page = self._download_json(
+ 'http://player.cnevids.com/player/video.js', video_id,
+ 'Downloading video info', fatal=False, query=params)
+ if info_page:
+ video_info = info_page.get('video')
+ if not video_info:
+ info_page = self._download_webpage(
+ 'http://player.cnevids.com/player/loader.js',
+ video_id, 'Downloading loader info', query=params)
+ if not video_info:
+ info_page = self._download_webpage(
+ 'https://player.cnevids.com/inline/video/%s.js' % video_id,
+ video_id, 'Downloading inline info', query={
+ 'target': params.get('target', 'embedplayer')
+ })
+
+ if not video_info:
+ video_info = self._parse_json(
+ self._search_regex(
+ r'(?s)var\s+config\s*=\s*({.+?});', info_page, 'config'),
+ video_id, transform_source=js_to_json)['video']
+
+ title = video_info['title']
+
+ formats = []
+ for fdata in video_info['sources']:
+ src = fdata.get('src')
+ if not src:
+ continue
+ ext = mimetype2ext(fdata.get('type')) or determine_ext(src)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ src, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ continue
+ quality = fdata.get('quality')
+ formats.append({
+ 'format_id': ext + ('-%s' % quality if quality else ''),
+ 'url': src,
+ 'ext': ext,
+ 'quality': 1 if quality == 'high' else 0,
+ })
+
+ subtitles = {}
+ for t, caption in video_info.get('captions', {}).items():
+ caption_url = caption.get('src')
+ if not (t in ('vtt', 'srt', 'tml') and caption_url):
+ continue
+ subtitles.setdefault('en', []).append({'url': caption_url})
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': title,
+ 'thumbnail': video_info.get('poster_frame'),
+ 'uploader': video_info.get('brand'),
+ 'duration': int_or_none(video_info.get('duration')),
+ 'tags': video_info.get('tags'),
+ 'series': video_info.get('series_title'),
+ 'season': video_info.get('season_title'),
+ 'timestamp': parse_iso8601(video_info.get('premiere_date')),
+ 'categories': video_info.get('categories'),
+ 'subtitles': subtitles,
+ }
+
+ def _real_extract(self, url):
+ video_id, player_id, target, url_type, display_id = self._match_valid_url(url).groups()
+
+ if video_id:
+ return self._extract_video({
+ 'videoId': video_id,
+ 'playerId': player_id,
+ 'target': target,
+ })
+
+ webpage = self._download_webpage(url, display_id)
+
+ if url_type == 'series':
+ return self._extract_series(url, webpage)
+ else:
+ video = try_get(self._parse_json(self._search_regex(
+ r'__PRELOADED_STATE__\s*=\s*({.+?});', webpage,
+ 'preload state', '{}'), display_id),
+ lambda x: x['transformed']['video'])
+ if video:
+ params = {'videoId': video['id']}
+ info = {'description': strip_or_none(video.get('description'))}
+ else:
+ params = self._extract_video_params(webpage, display_id)
+ info = self._search_json_ld(
+ webpage, display_id, fatal=False)
+ info.update(self._extract_video(params))
+ return info
diff --git a/yt_dlp/extractor/contv.py b/yt_dlp/extractor/contv.py
new file mode 100644
index 0000000..d69e816
--- /dev/null
+++ b/yt_dlp/extractor/contv.py
@@ -0,0 +1,113 @@
+from .common import InfoExtractor
+from ..utils import (
+ float_or_none,
+ int_or_none,
+)
+
+
+class CONtvIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?contv\.com/details-movie/(?P<id>[^/]+)'
+ _TESTS = [{
+ 'url': 'https://www.contv.com/details-movie/CEG10022949/days-of-thrills-&-laughter',
+ 'info_dict': {
+ 'id': 'CEG10022949',
+ 'ext': 'mp4',
+ 'title': 'Days Of Thrills & Laughter',
+ 'description': 'md5:5d6b3d0b1829bb93eb72898c734802eb',
+ 'upload_date': '20180703',
+ 'timestamp': 1530634789.61,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.contv.com/details-movie/CLIP-show_fotld_bts/fight-of-the-living-dead:-behind-the-scenes-bites',
+ 'info_dict': {
+ 'id': 'CLIP-show_fotld_bts',
+ 'title': 'Fight of the Living Dead: Behind the Scenes Bites',
+ },
+ 'playlist_mincount': 7,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ details = self._download_json(
+ 'http://metax.contv.live.junctiontv.net/metax/2.5/details/' + video_id,
+ video_id, query={'device': 'web'})
+
+ if details.get('type') == 'episodic':
+ seasons = self._download_json(
+ 'http://metax.contv.live.junctiontv.net/metax/2.5/seriesfeed/json/' + video_id,
+ video_id)
+ entries = []
+ for season in seasons:
+ for episode in season.get('episodes', []):
+ episode_id = episode.get('id')
+ if not episode_id:
+ continue
+ entries.append(self.url_result(
+ 'https://www.contv.com/details-movie/' + episode_id,
+ CONtvIE.ie_key(), episode_id))
+ return self.playlist_result(entries, video_id, details.get('title'))
+
+ m_details = details['details']
+ title = details['title']
+
+ formats = []
+
+ media_hls_url = m_details.get('media_hls_url')
+ if media_hls_url:
+ formats.extend(self._extract_m3u8_formats(
+ media_hls_url, video_id, 'mp4',
+ m3u8_id='hls', fatal=False))
+
+ media_mp4_url = m_details.get('media_mp4_url')
+ if media_mp4_url:
+ formats.append({
+ 'format_id': 'http',
+ 'url': media_mp4_url,
+ })
+
+ subtitles = {}
+ captions = m_details.get('captions') or {}
+ for caption_url in captions.values():
+ subtitles.setdefault('en', []).append({
+ 'url': caption_url
+ })
+
+ thumbnails = []
+ for image in m_details.get('images', []):
+ image_url = image.get('url')
+ if not image_url:
+ continue
+ thumbnails.append({
+ 'url': image_url,
+ 'width': int_or_none(image.get('width')),
+ 'height': int_or_none(image.get('height')),
+ })
+
+ description = None
+ for p in ('large_', 'medium_', 'small_', ''):
+ d = m_details.get(p + 'description')
+ if d:
+ description = d
+ break
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'description': description,
+ 'timestamp': float_or_none(details.get('metax_added_on'), 1000),
+ 'subtitles': subtitles,
+ 'duration': float_or_none(m_details.get('duration'), 1000),
+ 'view_count': int_or_none(details.get('num_watched')),
+ 'like_count': int_or_none(details.get('num_fav')),
+ 'categories': details.get('category'),
+ 'tags': details.get('tags'),
+ 'season_number': int_or_none(details.get('season')),
+ 'episode_number': int_or_none(details.get('episode')),
+ 'release_year': int_or_none(details.get('pub_year')),
+ }
diff --git a/yt_dlp/extractor/corus.py b/yt_dlp/extractor/corus.py
new file mode 100644
index 0000000..bcc34dd
--- /dev/null
+++ b/yt_dlp/extractor/corus.py
@@ -0,0 +1,154 @@
+from .theplatform import ThePlatformFeedIE
+from ..utils import (
+ dict_get,
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+)
+
+
+class CorusIE(ThePlatformFeedIE): # XXX: Do not subclass from concrete IE
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?
+ (?P<domain>
+ (?:
+ globaltv|
+ etcanada|
+ seriesplus|
+ wnetwork|
+ ytv
+ )\.com|
+ (?:
+ hgtv|
+ foodnetwork|
+ slice|
+ history|
+ showcase|
+ bigbrothercanada|
+ abcspark|
+ disney(?:channel|lachaine)
+ )\.ca
+ )
+ /(?:[^/]+/)*
+ (?:
+ video\.html\?.*?\bv=|
+ videos?/(?:[^/]+/)*(?:[a-z0-9-]+-)?
+ )
+ (?P<id>
+ [\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}|
+ (?:[A-Z]{4})?\d{12,20}
+ )
+ '''
+ _TESTS = [{
+ 'url': 'https://www.hgtv.ca/video/bryan-inc/movie-night-popcorn-with-bryan/870923331648/',
+ 'info_dict': {
+ 'id': '870923331648',
+ 'ext': 'mp4',
+ 'title': 'Movie Night Popcorn with Bryan',
+ 'description': 'Bryan whips up homemade popcorn, the old fashion way for Jojo and Lincoln.',
+ 'upload_date': '20170206',
+ 'timestamp': 1486392197,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Failed to parse JSON'],
+ # FIXME: yt-dlp wrongly raises for geo restriction
+ }, {
+ 'url': 'http://www.foodnetwork.ca/shows/chopped/video/episode/chocolate-obsession/video.html?v=872683587753',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://etcanada.com/video/873675331955/meet-the-survivor-game-changers-castaways-part-2/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.history.ca/the-world-without-canada/video/full-episodes/natural-resources/video.html?v=955054659646#video',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.showcase.ca/eyewitness/video/eyewitness++106/video.html?v=955070531919&p=1&s=da#video',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.bigbrothercanada.ca/video/1457812035894/',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.bigbrothercanada.ca/video/big-brother-canada-704/1457812035894/',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.seriesplus.com/emissions/dre-mary-mort-sur-ordonnance/videos/deux-coeurs-battant/SERP0055626330000200/',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.disneychannel.ca/shows/gabby-duran-the-unsittables/video/crybaby-duran-clip/2f557eec-0588-11ea-ae2b-e2c6776b770e/',
+ 'only_matching': True
+ }]
+ _GEO_BYPASS = False
+ _SITE_MAP = {
+ 'globaltv': 'series',
+ 'etcanada': 'series',
+ 'foodnetwork': 'food',
+ 'bigbrothercanada': 'series',
+ 'disneychannel': 'disneyen',
+ 'disneylachaine': 'disneyfr',
+ }
+
+ def _real_extract(self, url):
+ domain, video_id = self._match_valid_url(url).groups()
+ site = domain.split('.')[0]
+ path = self._SITE_MAP.get(site, site)
+ if path != 'series':
+ path = 'migration/' + path
+ video = self._download_json(
+ 'https://globalcontent.corusappservices.com/templates/%s/playlist/' % path,
+ video_id, query={'byId': video_id},
+ headers={'Accept': 'application/json'})[0]
+ title = video['title']
+
+ formats = []
+ for source in video.get('sources', []):
+ smil_url = source.get('file')
+ if not smil_url:
+ continue
+ source_type = source.get('type')
+ note = 'Downloading%s smil file' % (' ' + source_type if source_type else '')
+ resp = self._download_webpage(
+ smil_url, video_id, note, fatal=False,
+ headers=self.geo_verification_headers())
+ if not resp:
+ continue
+ error = self._parse_json(resp, video_id, fatal=False)
+ if error:
+ if error.get('exception') == 'GeoLocationBlocked':
+ self.raise_geo_restricted(countries=['CA'])
+ raise ExtractorError(error['description'])
+ smil = self._parse_xml(resp, video_id, fatal=False)
+ if smil is None:
+ continue
+ namespace = self._parse_smil_namespace(smil)
+ formats.extend(self._parse_smil_formats(
+ smil, smil_url, video_id, namespace))
+ if not formats and video.get('drm'):
+ self.report_drm(video_id)
+
+ subtitles = {}
+ for track in video.get('tracks', []):
+ track_url = track.get('file')
+ if not track_url:
+ continue
+ lang = 'fr' if site in ('disneylachaine', 'seriesplus') else 'en'
+ subtitles.setdefault(lang, []).append({'url': track_url})
+
+ metadata = video.get('metadata') or {}
+ get_number = lambda x: int_or_none(video.get('pl1$' + x) or metadata.get(x + 'Number'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': dict_get(video, ('defaultThumbnailUrl', 'thumbnail', 'image')),
+ 'description': video.get('description'),
+ 'timestamp': int_or_none(video.get('availableDate'), 1000),
+ 'subtitles': subtitles,
+ 'duration': float_or_none(metadata.get('duration')),
+ 'series': dict_get(video, ('show', 'pl1$show')),
+ 'season_number': get_number('season'),
+ 'episode_number': get_number('episode'),
+ }
diff --git a/yt_dlp/extractor/coub.py b/yt_dlp/extractor/coub.py
new file mode 100644
index 0000000..9bab698
--- /dev/null
+++ b/yt_dlp/extractor/coub.py
@@ -0,0 +1,136 @@
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ parse_iso8601,
+ qualities,
+)
+
+
+class CoubIE(InfoExtractor):
+ _VALID_URL = r'(?:coub:|https?://(?:coub\.com/(?:view|embed|coubs)/|c-cdn\.coub\.com/fb-player\.swf\?.*\bcoub(?:ID|id)=))(?P<id>[\da-z]+)'
+
+ _TESTS = [{
+ 'url': 'http://coub.com/view/5u5n1',
+ 'info_dict': {
+ 'id': '5u5n1',
+ 'ext': 'mp4',
+ 'title': 'The Matrix Moonwalk',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 4.6,
+ 'timestamp': 1428527772,
+ 'upload_date': '20150408',
+ 'uploader': 'Artyom Loskutnikov',
+ 'uploader_id': 'artyom.loskutnikov',
+ 'view_count': int,
+ 'like_count': int,
+ 'repost_count': int,
+ 'age_limit': 0,
+ },
+ }, {
+ 'url': 'http://c-cdn.coub.com/fb-player.swf?bot_type=vk&coubID=7w5a4',
+ 'only_matching': True,
+ }, {
+ 'url': 'coub:5u5n1',
+ 'only_matching': True,
+ }, {
+ # longer video id
+ 'url': 'http://coub.com/view/237d5l5h',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ coub = self._download_json(
+ 'http://coub.com/api/v2/coubs/%s.json' % video_id, video_id)
+
+ if coub.get('error'):
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, coub['error']), expected=True)
+
+ title = coub['title']
+
+ file_versions = coub['file_versions']
+
+ QUALITIES = ('low', 'med', 'high', 'higher')
+
+ MOBILE = 'mobile'
+ IPHONE = 'iphone'
+ HTML5 = 'html5'
+
+ SOURCE_PREFERENCE = (MOBILE, IPHONE, HTML5)
+
+ quality_key = qualities(QUALITIES)
+ preference_key = qualities(SOURCE_PREFERENCE)
+
+ formats = []
+
+ for kind, items in file_versions.get(HTML5, {}).items():
+ if kind not in ('video', 'audio'):
+ continue
+ if not isinstance(items, dict):
+ continue
+ for quality, item in items.items():
+ if not isinstance(item, dict):
+ continue
+ item_url = item.get('url')
+ if not item_url:
+ continue
+ formats.append({
+ 'url': item_url,
+ 'format_id': '%s-%s-%s' % (HTML5, kind, quality),
+ 'filesize': int_or_none(item.get('size')),
+ 'vcodec': 'none' if kind == 'audio' else None,
+ 'acodec': 'none' if kind == 'video' else None,
+ 'quality': quality_key(quality),
+ 'source_preference': preference_key(HTML5),
+ })
+
+ iphone_url = file_versions.get(IPHONE, {}).get('url')
+ if iphone_url:
+ formats.append({
+ 'url': iphone_url,
+ 'format_id': IPHONE,
+ 'source_preference': preference_key(IPHONE),
+ })
+
+ mobile_url = file_versions.get(MOBILE, {}).get('audio_url')
+ if mobile_url:
+ formats.append({
+ 'url': mobile_url,
+ 'format_id': '%s-audio' % MOBILE,
+ 'source_preference': preference_key(MOBILE),
+ })
+
+ thumbnail = coub.get('picture')
+ duration = float_or_none(coub.get('duration'))
+ timestamp = parse_iso8601(coub.get('published_at') or coub.get('created_at'))
+ uploader = coub.get('channel', {}).get('title')
+ uploader_id = coub.get('channel', {}).get('permalink')
+
+ view_count = int_or_none(coub.get('views_count') or coub.get('views_increase_count'))
+ like_count = int_or_none(coub.get('likes_count'))
+ repost_count = int_or_none(coub.get('recoubs_count'))
+
+ age_restricted = coub.get('age_restricted', coub.get('age_restricted_by_admin'))
+ if age_restricted is not None:
+ age_limit = 18 if age_restricted is True else 0
+ else:
+ age_limit = None
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'view_count': view_count,
+ 'like_count': like_count,
+ 'repost_count': repost_count,
+ 'age_limit': age_limit,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/cozytv.py b/yt_dlp/extractor/cozytv.py
new file mode 100644
index 0000000..5ef5afc
--- /dev/null
+++ b/yt_dlp/extractor/cozytv.py
@@ -0,0 +1,37 @@
+from .common import InfoExtractor
+from ..utils import unified_strdate
+
+
+class CozyTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?cozy\.tv/(?P<uploader>[^/]+)/replays/(?P<id>[^/$#&?]+)'
+
+ _TESTS = [{
+ 'url': 'https://cozy.tv/beardson/replays/2021-11-19_1',
+ 'info_dict': {
+ 'id': 'beardson-2021-11-19_1',
+ 'ext': 'mp4',
+ 'title': 'pokemon pt2',
+ 'uploader': 'beardson',
+ 'upload_date': '20211119',
+ 'was_live': True,
+ 'duration': 7981,
+ },
+ 'params': {'skip_download': True}
+ }]
+
+ def _real_extract(self, url):
+ uploader, date = self._match_valid_url(url).groups()
+ id = f'{uploader}-{date}'
+ data_json = self._download_json(f'https://api.cozy.tv/cache/{uploader}/replay/{date}', id)
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ f'https://cozycdn.foxtrotstream.xyz/replays/{uploader}/{date}/index.m3u8', id, ext='mp4')
+ return {
+ 'id': id,
+ 'title': data_json.get('title'),
+ 'uploader': data_json.get('user') or uploader,
+ 'upload_date': unified_strdate(data_json.get('date')),
+ 'was_live': True,
+ 'duration': data_json.get('duration'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
diff --git a/yt_dlp/extractor/cpac.py b/yt_dlp/extractor/cpac.py
new file mode 100644
index 0000000..32bba1e
--- /dev/null
+++ b/yt_dlp/extractor/cpac.py
@@ -0,0 +1,136 @@
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ int_or_none,
+ str_or_none,
+ try_get,
+ unified_timestamp,
+ update_url_query,
+ urljoin,
+)
+
+
+class CPACIE(InfoExtractor):
+ IE_NAME = 'cpac'
+ _VALID_URL = r'https?://(?:www\.)?cpac\.ca/(?P<fr>l-)?episode\?id=(?P<id>[\da-f]{8}(?:-[\da-f]{4}){3}-[\da-f]{12})'
+ _TEST = {
+ # 'url': 'http://www.cpac.ca/en/programs/primetime-politics/episodes/65490909',
+ 'url': 'https://www.cpac.ca/episode?id=fc7edcae-4660-47e1-ba61-5b7f29a9db0f',
+ 'md5': 'e46ad699caafd7aa6024279f2614e8fa',
+ 'info_dict': {
+ 'id': 'fc7edcae-4660-47e1-ba61-5b7f29a9db0f',
+ 'ext': 'mp4',
+ 'upload_date': '20220215',
+ 'title': 'News Conference to Celebrate National Kindness Week – February 15, 2022',
+ 'description': 'md5:466a206abd21f3a6f776cdef290c23fb',
+ 'timestamp': 1644901200,
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ 'hls_prefer_native': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ url_lang = 'fr' if '/l-episode?' in url else 'en'
+
+ content = self._download_json(
+ 'https://www.cpac.ca/api/1/services/contentModel.json?url=/site/website/episode/index.xml&crafterSite=cpacca&id=' + video_id,
+ video_id)
+ video_url = try_get(content, lambda x: x['page']['details']['videoUrl'], compat_str)
+ formats = []
+ if video_url:
+ content = content['page']
+ title = str_or_none(content['details']['title_%s_t' % (url_lang, )])
+ formats = self._extract_m3u8_formats(video_url, video_id, m3u8_id='hls', ext='mp4')
+ for fmt in formats:
+ # prefer language to match URL
+ fmt_lang = fmt.get('language')
+ if fmt_lang == url_lang:
+ fmt['language_preference'] = 10
+ elif not fmt_lang:
+ fmt['language_preference'] = -1
+ else:
+ fmt['language_preference'] = -10
+
+ category = str_or_none(content['details']['category_%s_t' % (url_lang, )])
+
+ def is_live(v_type):
+ return (v_type == 'live') if v_type is not None else None
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': title,
+ 'description': str_or_none(content['details'].get('description_%s_t' % (url_lang, ))),
+ 'timestamp': unified_timestamp(content['details'].get('liveDateTime')),
+ 'categories': [category] if category else None,
+ 'thumbnail': urljoin(url, str_or_none(content['details'].get('image_%s_s' % (url_lang, )))),
+ 'is_live': is_live(content['details'].get('type')),
+ }
+
+
+class CPACPlaylistIE(InfoExtractor):
+ IE_NAME = 'cpac:playlist'
+ _VALID_URL = r'(?i)https?://(?:www\.)?cpac\.ca/(?:program|search|(?P<fr>emission|rechercher))\?(?:[^&]+&)*?(?P<id>(?:id=\d+|programId=\d+|key=[^&]+))'
+
+ _TESTS = [{
+ 'url': 'https://www.cpac.ca/program?id=6',
+ 'info_dict': {
+ 'id': 'id=6',
+ 'title': 'Headline Politics',
+ 'description': 'Watch CPAC’s signature long-form coverage of the day’s pressing political events as they unfold.',
+ },
+ 'playlist_count': 10,
+ }, {
+ 'url': 'https://www.cpac.ca/search?key=hudson&type=all&order=desc',
+ 'info_dict': {
+ 'id': 'key=hudson',
+ 'title': 'hudson',
+ },
+ 'playlist_count': 22,
+ }, {
+ 'url': 'https://www.cpac.ca/search?programId=50',
+ 'info_dict': {
+ 'id': 'programId=50',
+ 'title': '50',
+ },
+ 'playlist_count': 9,
+ }, {
+ 'url': 'https://www.cpac.ca/emission?id=6',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.cpac.ca/rechercher?key=hudson&type=all&order=desc',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ url_lang = 'fr' if any(x in url for x in ('/emission?', '/rechercher?')) else 'en'
+ pl_type, list_type = ('program', 'itemList') if any(x in url for x in ('/program?', '/emission?')) else ('search', 'searchResult')
+ api_url = (
+ 'https://www.cpac.ca/api/1/services/contentModel.json?url=/site/website/%s/index.xml&crafterSite=cpacca&%s'
+ % (pl_type, video_id, ))
+ content = self._download_json(api_url, video_id)
+ entries = []
+ total_pages = int_or_none(try_get(content, lambda x: x['page'][list_type]['totalPages']), default=1)
+ for page in range(1, total_pages + 1):
+ if page > 1:
+ api_url = update_url_query(api_url, {'page': '%d' % (page, ), })
+ content = self._download_json(
+ api_url, video_id,
+ note='Downloading continuation - %d' % (page, ),
+ fatal=False)
+
+ for item in try_get(content, lambda x: x['page'][list_type]['item'], list) or []:
+ episode_url = urljoin(url, try_get(item, lambda x: x['url_%s_s' % (url_lang, )]))
+ if episode_url:
+ entries.append(episode_url)
+
+ return self.playlist_result(
+ (self.url_result(entry) for entry in entries),
+ playlist_id=video_id,
+ playlist_title=try_get(content, lambda x: x['page']['program']['title_%s_t' % (url_lang, )]) or video_id.split('=')[-1],
+ playlist_description=try_get(content, lambda x: x['page']['program']['description_%s_t' % (url_lang, )]),
+ )
diff --git a/yt_dlp/extractor/cracked.py b/yt_dlp/extractor/cracked.py
new file mode 100644
index 0000000..c6aabcc
--- /dev/null
+++ b/yt_dlp/extractor/cracked.py
@@ -0,0 +1,88 @@
+import re
+
+from .common import InfoExtractor
+from .youtube import YoutubeIE
+from ..utils import (
+ parse_iso8601,
+ str_to_int,
+)
+
+
+class CrackedIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?cracked\.com/video_(?P<id>\d+)_[\da-z-]+\.html'
+ _TESTS = [{
+ 'url': 'http://www.cracked.com/video_19070_if-animal-actors-got-e21-true-hollywood-stories.html',
+ 'md5': '89b90b9824e3806ca95072c4d78f13f7',
+ 'info_dict': {
+ 'id': '19070',
+ 'ext': 'mp4',
+ 'title': 'If Animal Actors Got E! True Hollywood Stories',
+ 'timestamp': 1404954000,
+ 'upload_date': '20140710',
+ }
+ }, {
+ # youtube embed
+ 'url': 'http://www.cracked.com/video_19006_4-plot-holes-you-didnt-notice-in-your-favorite-movies.html',
+ 'md5': 'ccd52866b50bde63a6ef3b35016ba8c7',
+ 'info_dict': {
+ 'id': 'EjI00A3rZD0',
+ 'ext': 'mp4',
+ 'title': "4 Plot Holes You Didn't Notice in Your Favorite Movies - The Spit Take",
+ 'description': 'md5:c603708c718b796fe6079e2b3351ffc7',
+ 'upload_date': '20140725',
+ 'uploader_id': 'Cracked',
+ 'uploader': 'Cracked',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ youtube_url = YoutubeIE._extract_url(webpage)
+ if youtube_url:
+ return self.url_result(youtube_url, ie=YoutubeIE.ie_key())
+
+ video_url = self._html_search_regex(
+ [r'var\s+CK_vidSrc\s*=\s*"([^"]+)"', r'<video\s+src="([^"]+)"'],
+ webpage, 'video URL')
+
+ title = self._search_regex(
+ [r'property="?og:title"?\s+content="([^"]+)"', r'class="?title"?>([^<]+)'],
+ webpage, 'title')
+
+ description = self._search_regex(
+ r'name="?(?:og:)?description"?\s+content="([^"]+)"',
+ webpage, 'description', default=None)
+
+ timestamp = self._html_search_regex(
+ r'"date"\s*:\s*"([^"]+)"', webpage, 'upload date', fatal=False)
+ if timestamp:
+ timestamp = parse_iso8601(timestamp[:-6])
+
+ view_count = str_to_int(self._html_search_regex(
+ r'<span\s+class="?views"? id="?viewCounts"?>([\d,\.]+) Views</span>',
+ webpage, 'view count', fatal=False))
+ comment_count = str_to_int(self._html_search_regex(
+ r'<span\s+id="?commentCounts"?>([\d,\.]+)</span>',
+ webpage, 'comment count', fatal=False))
+
+ m = re.search(r'_(?P<width>\d+)X(?P<height>\d+)\.mp4$', video_url)
+ if m:
+ width = int(m.group('width'))
+ height = int(m.group('height'))
+ else:
+ width = height = None
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': description,
+ 'timestamp': timestamp,
+ 'view_count': view_count,
+ 'comment_count': comment_count,
+ 'height': height,
+ 'width': width,
+ }
diff --git a/yt_dlp/extractor/crackle.py b/yt_dlp/extractor/crackle.py
new file mode 100644
index 0000000..1ef90b5
--- /dev/null
+++ b/yt_dlp/extractor/crackle.py
@@ -0,0 +1,243 @@
+import hashlib
+import hmac
+import re
+import time
+
+from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ determine_ext,
+ float_or_none,
+ int_or_none,
+ orderedSet,
+ parse_age_limit,
+ parse_duration,
+ url_or_none,
+ ExtractorError
+)
+
+
+class CrackleIE(InfoExtractor):
+ _VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?(?:sony)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
+ _TESTS = [{
+ # Crackle is available in the United States and territories
+ 'url': 'https://www.crackle.com/thanksgiving/2510064',
+ 'info_dict': {
+ 'id': '2510064',
+ 'ext': 'mp4',
+ 'title': 'Touch Football',
+ 'description': 'md5:cfbb513cf5de41e8b56d7ab756cff4df',
+ 'duration': 1398,
+ 'view_count': int,
+ 'average_rating': 0,
+ 'age_limit': 17,
+ 'genre': 'Comedy',
+ 'creator': 'Daniel Powell',
+ 'artist': 'Chris Elliott, Amy Sedaris',
+ 'release_year': 2016,
+ 'series': 'Thanksgiving',
+ 'episode': 'Touch Football',
+ 'season_number': 1,
+ 'episode_number': 1,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'expected_warnings': [
+ 'Trying with a list of known countries'
+ ],
+ }, {
+ 'url': 'https://www.sonycrackle.com/thanksgiving/2510064',
+ 'only_matching': True,
+ }]
+
+ _MEDIA_FILE_SLOTS = {
+ '360p.mp4': {
+ 'width': 640,
+ 'height': 360,
+ },
+ '480p.mp4': {
+ 'width': 768,
+ 'height': 432,
+ },
+ '480p_1mbps.mp4': {
+ 'width': 852,
+ 'height': 480,
+ },
+ }
+
+ def _download_json(self, url, *args, **kwargs):
+ # Authorization generation algorithm is reverse engineered from:
+ # https://www.sonycrackle.com/static/js/main.ea93451f.chunk.js
+ timestamp = time.strftime('%Y%m%d%H%M', time.gmtime())
+ h = hmac.new(b'IGSLUQCBDFHEOIFM', '|'.join([url, timestamp]).encode(), hashlib.sha1).hexdigest().upper()
+ headers = {
+ 'Accept': 'application/json',
+ 'Authorization': '|'.join([h, timestamp, '117', '1']),
+ }
+ return InfoExtractor._download_json(self, url, *args, headers=headers, **kwargs)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ geo_bypass_country = self.get_param('geo_bypass_country', None)
+ countries = orderedSet((geo_bypass_country, 'US', 'AU', 'CA', 'AS', 'FM', 'GU', 'MP', 'PR', 'PW', 'MH', 'VI', ''))
+ num_countries, num = len(countries) - 1, 0
+
+ media = {}
+ for num, country in enumerate(countries):
+ if num == 1: # start hard-coded list
+ self.report_warning('%s. Trying with a list of known countries' % (
+ 'Unable to obtain video formats from %s API' % geo_bypass_country if geo_bypass_country
+ else 'No country code was given using --geo-bypass-country'))
+ elif num == num_countries: # end of list
+ geo_info = self._download_json(
+ 'https://web-api-us.crackle.com/Service.svc/geo/country',
+ video_id, fatal=False, note='Downloading geo-location information from crackle API',
+ errnote='Unable to fetch geo-location information from crackle') or {}
+ country = geo_info.get('CountryCode')
+ if country is None:
+ continue
+ self.to_screen('%s identified country as %s' % (self.IE_NAME, country))
+ if country in countries:
+ self.to_screen('Downloading from %s API was already attempted. Skipping...' % country)
+ continue
+
+ if country is None:
+ continue
+ try:
+ media = self._download_json(
+ 'https://web-api-us.crackle.com/Service.svc/details/media/%s/%s?disableProtocols=true' % (video_id, country),
+ video_id, note='Downloading media JSON from %s API' % country,
+ errnote='Unable to download media JSON')
+ except ExtractorError as e:
+ # 401 means geo restriction, trying next country
+ if isinstance(e.cause, HTTPError) and e.cause.status == 401:
+ continue
+ raise
+
+ status = media.get('status')
+ if status.get('messageCode') != '0':
+ raise ExtractorError(
+ '%s said: %s %s - %s' % (
+ self.IE_NAME, status.get('messageCodeDescription'), status.get('messageCode'), status.get('message')),
+ expected=True)
+
+ # Found video formats
+ if isinstance(media.get('MediaURLs'), list):
+ break
+
+ ignore_no_formats = self.get_param('ignore_no_formats_error')
+
+ if not media or (not media.get('MediaURLs') and not ignore_no_formats):
+ raise ExtractorError(
+ 'Unable to access the crackle API. Try passing your country code '
+ 'to --geo-bypass-country. If it still does not work and the '
+ 'video is available in your country')
+ title = media['Title']
+
+ formats, subtitles = [], {}
+ has_drm = False
+ for e in media.get('MediaURLs') or []:
+ if e.get('UseDRM'):
+ has_drm = True
+ format_url = url_or_none(e.get('DRMPath'))
+ else:
+ format_url = url_or_none(e.get('Path'))
+ if not format_url:
+ continue
+ ext = determine_ext(format_url)
+ if ext == 'm3u8':
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ format_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False)
+ formats.extend(fmts)
+ subtitles = self._merge_subtitles(subtitles, subs)
+ elif ext == 'mpd':
+ fmts, subs = self._extract_mpd_formats_and_subtitles(
+ format_url, video_id, mpd_id='dash', fatal=False)
+ formats.extend(fmts)
+ subtitles = self._merge_subtitles(subtitles, subs)
+ elif format_url.endswith('.ism/Manifest'):
+ fmts, subs = self._extract_ism_formats_and_subtitles(
+ format_url, video_id, ism_id='mss', fatal=False)
+ formats.extend(fmts)
+ subtitles = self._merge_subtitles(subtitles, subs)
+ else:
+ mfs_path = e.get('Type')
+ mfs_info = self._MEDIA_FILE_SLOTS.get(mfs_path)
+ if not mfs_info:
+ continue
+ formats.append({
+ 'url': format_url,
+ 'format_id': 'http-' + mfs_path.split('.')[0],
+ 'width': mfs_info['width'],
+ 'height': mfs_info['height'],
+ })
+ if not formats and has_drm:
+ self.report_drm(video_id)
+
+ description = media.get('Description')
+ duration = int_or_none(media.get(
+ 'DurationInSeconds')) or parse_duration(media.get('Duration'))
+ view_count = int_or_none(media.get('CountViews'))
+ average_rating = float_or_none(media.get('UserRating'))
+ age_limit = parse_age_limit(media.get('Rating'))
+ genre = media.get('Genre')
+ release_year = int_or_none(media.get('ReleaseYear'))
+ creator = media.get('Directors')
+ artist = media.get('Cast')
+
+ if media.get('MediaTypeDisplayValue') == 'Full Episode':
+ series = media.get('ShowName')
+ episode = title
+ season_number = int_or_none(media.get('Season'))
+ episode_number = int_or_none(media.get('Episode'))
+ else:
+ series = episode = season_number = episode_number = None
+
+ cc_files = media.get('ClosedCaptionFiles')
+ if isinstance(cc_files, list):
+ for cc_file in cc_files:
+ if not isinstance(cc_file, dict):
+ continue
+ cc_url = url_or_none(cc_file.get('Path'))
+ if not cc_url:
+ continue
+ lang = cc_file.get('Locale') or 'en'
+ subtitles.setdefault(lang, []).append({'url': cc_url})
+
+ thumbnails = []
+ images = media.get('Images')
+ if isinstance(images, list):
+ for image_key, image_url in images.items():
+ mobj = re.search(r'Img_(\d+)[xX](\d+)', image_key)
+ if not mobj:
+ continue
+ thumbnails.append({
+ 'url': image_url,
+ 'width': int(mobj.group(1)),
+ 'height': int(mobj.group(2)),
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'average_rating': average_rating,
+ 'age_limit': age_limit,
+ 'genre': genre,
+ 'creator': creator,
+ 'artist': artist,
+ 'release_year': release_year,
+ 'series': series,
+ 'episode': episode,
+ 'season_number': season_number,
+ 'episode_number': episode_number,
+ 'thumbnails': thumbnails,
+ 'subtitles': subtitles,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/craftsy.py b/yt_dlp/extractor/craftsy.py
new file mode 100644
index 0000000..3a05ed4
--- /dev/null
+++ b/yt_dlp/extractor/craftsy.py
@@ -0,0 +1,75 @@
+import json
+
+from .brightcove import BrightcoveNewIE
+from .common import InfoExtractor
+from ..utils import (
+ extract_attributes,
+ get_element_html_by_class,
+ get_element_text_and_html_by_tag,
+)
+from ..utils.traversal import traverse_obj
+
+
+class CraftsyIE(InfoExtractor):
+ _VALID_URL = r'https?://www\.craftsy\.com/class/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://www.craftsy.com/class/the-midnight-quilt-show-season-5/',
+ 'info_dict': {
+ 'id': 'the-midnight-quilt-show-season-5',
+ 'title': 'The Midnight Quilt Show Season 5',
+ 'description': 'md5:113eda818e985d1a566625fb2f833b7a',
+ },
+ 'playlist_count': 10,
+ }, {
+ 'url': 'https://www.craftsy.com/class/sew-your-own-designer-handbag/',
+ 'info_dict': {
+ 'id': 'sew-your-own-designer-handbag',
+ 'title': 'Sew Your Own Designer Handbag',
+ 'description': 'md5:8270d0ef5427d3c895a27351aeaac276',
+ },
+ 'playlist_mincount': 1,
+ }, {
+ 'url': 'https://www.craftsy.com/class/all-access-estes-park-wool-market/',
+ 'info_dict': {
+ 'id': 'all-access-estes-park-wool-market',
+ 'title': 'All Access: Estes Park Wool Market',
+ 'description': 'md5:aded1bd8d38ae2fae4dae936c0ae01e7',
+ },
+ 'playlist_count': 6,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ video_player = get_element_html_by_class('class-video-player', webpage)
+ video_data = traverse_obj(video_player, (
+ {extract_attributes}, 'wire:snapshot', {json.loads}, 'data', {dict})) or {}
+ video_js = traverse_obj(video_player, (
+ {lambda x: get_element_text_and_html_by_tag('video-js', x)}, 1, {extract_attributes})) or {}
+
+ has_access = video_data.get('userHasAccess')
+ lessons = traverse_obj(video_data, ('lessons', ..., ..., lambda _, v: v['video_id']))
+
+ preview_id = video_js.get('data-video-id')
+ if preview_id and preview_id not in traverse_obj(lessons, (..., 'video_id')):
+ if not lessons and not has_access:
+ self.report_warning(
+ 'Only extracting preview. For the full class, pass cookies '
+ + f'from an account that has access. {self._login_hint()}')
+ lessons.append({'video_id': preview_id})
+
+ if not lessons and not has_access:
+ self.raise_login_required('You do not have access to this class')
+
+ account_id = video_data.get('accountId') or video_js['data-account']
+
+ def entries(lessons):
+ for lesson in lessons:
+ yield self.url_result(
+ f'http://players.brightcove.net/{account_id}/default_default/index.html?videoId={lesson["video_id"]}',
+ BrightcoveNewIE, lesson['video_id'], lesson.get('title'))
+
+ return self.playlist_result(
+ entries(lessons), video_id, self._html_search_meta(('og:title', 'twitter:title'), webpage),
+ self._html_search_meta(('og:description', 'description'), webpage, default=None))
diff --git a/yt_dlp/extractor/crooksandliars.py b/yt_dlp/extractor/crooksandliars.py
new file mode 100644
index 0000000..2ee0730
--- /dev/null
+++ b/yt_dlp/extractor/crooksandliars.py
@@ -0,0 +1,56 @@
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ qualities,
+)
+
+
+class CrooksAndLiarsIE(InfoExtractor):
+ _VALID_URL = r'https?://embed\.crooksandliars\.com/(?:embed|v)/(?P<id>[A-Za-z0-9]+)'
+ _EMBED_REGEX = [r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1']
+
+ _TESTS = [{
+ 'url': 'https://embed.crooksandliars.com/embed/8RUoRhRi',
+ 'info_dict': {
+ 'id': '8RUoRhRi',
+ 'ext': 'mp4',
+ 'title': 'Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!',
+ 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'timestamp': 1428207000,
+ 'upload_date': '20150405',
+ 'uploader': 'Heather',
+ 'duration': 236,
+ }
+ }, {
+ 'url': 'http://embed.crooksandliars.com/v/MTE3MjUtMzQ2MzA',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'http://embed.crooksandliars.com/embed/%s' % video_id, video_id)
+
+ manifest = self._search_json(r'var\s+manifest\s*=', webpage, 'manifest JSON', video_id)
+
+ quality = qualities(('webm_low', 'mp4_low', 'webm_high', 'mp4_high'))
+
+ formats = [{
+ 'url': item['url'],
+ 'format_id': item['type'],
+ 'quality': quality(item['type']),
+ } for item in manifest['flavors'] if item['mime'].startswith('video/')]
+
+ return {
+ 'url': url,
+ 'id': video_id,
+ 'title': manifest['title'],
+ 'description': manifest.get('description'),
+ 'thumbnail': self._proto_relative_url(manifest.get('poster')),
+ 'timestamp': int_or_none(manifest.get('created')),
+ 'uploader': manifest.get('author'),
+ 'duration': int_or_none(manifest.get('duration')),
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/crowdbunker.py b/yt_dlp/extractor/crowdbunker.py
new file mode 100644
index 0000000..d83c015
--- /dev/null
+++ b/yt_dlp/extractor/crowdbunker.py
@@ -0,0 +1,109 @@
+import itertools
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ try_get,
+ unified_strdate,
+)
+
+
+class CrowdBunkerIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?crowdbunker\.com/v/(?P<id>[^/?#$&]+)'
+
+ _TESTS = [{
+ 'url': 'https://crowdbunker.com/v/0z4Kms8pi8I',
+ 'info_dict': {
+ 'id': '0z4Kms8pi8I',
+ 'ext': 'mp4',
+ 'title': '117) Pass vax et solutions',
+ 'description': 'md5:86bcb422c29475dbd2b5dcfa6ec3749c',
+ 'view_count': int,
+ 'duration': 5386,
+ 'uploader': 'Jérémie Mercier',
+ 'uploader_id': 'UCeN_qQV829NYf0pvPJhW5dQ',
+ 'like_count': int,
+ 'upload_date': '20211218',
+ 'thumbnail': 'https://scw.divulg.org/cb-medias4/images/0z4Kms8pi8I/maxres.jpg'
+ },
+ 'params': {'skip_download': True}
+ }]
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ data_json = self._download_json(f'https://api.divulg.org/post/{id}/details',
+ id, headers={'accept': 'application/json, text/plain, */*'})
+ video_json = data_json['video']
+ formats, subtitles = [], {}
+ for sub in video_json.get('captions') or []:
+ sub_url = try_get(sub, lambda x: x['file']['url'])
+ if not sub_url:
+ continue
+ subtitles.setdefault(sub.get('languageCode', 'fr'), []).append({
+ 'url': sub_url,
+ })
+
+ mpd_url = try_get(video_json, lambda x: x['dashManifest']['url'])
+ if mpd_url:
+ fmts, subs = self._extract_mpd_formats_and_subtitles(mpd_url, id)
+ formats.extend(fmts)
+ subtitles = self._merge_subtitles(subtitles, subs)
+ m3u8_url = try_get(video_json, lambda x: x['hlsManifest']['url'])
+ if m3u8_url:
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(mpd_url, id)
+ formats.extend(fmts)
+ subtitles = self._merge_subtitles(subtitles, subs)
+
+ thumbnails = [{
+ 'url': image['url'],
+ 'height': int_or_none(image.get('height')),
+ 'width': int_or_none(image.get('width')),
+ } for image in video_json.get('thumbnails') or [] if image.get('url')]
+
+ return {
+ 'id': id,
+ 'title': video_json.get('title'),
+ 'description': video_json.get('description'),
+ 'view_count': video_json.get('viewCount'),
+ 'duration': video_json.get('duration'),
+ 'uploader': try_get(data_json, lambda x: x['channel']['name']),
+ 'uploader_id': try_get(data_json, lambda x: x['channel']['id']),
+ 'like_count': data_json.get('likesCount'),
+ 'upload_date': unified_strdate(video_json.get('publishedAt') or video_json.get('createdAt')),
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+
+class CrowdBunkerChannelIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?crowdbunker\.com/@(?P<id>[^/?#$&]+)'
+
+ _TESTS = [{
+ 'url': 'https://crowdbunker.com/@Milan_UHRIN',
+ 'playlist_mincount': 14,
+ 'info_dict': {
+ 'id': 'Milan_UHRIN',
+ },
+ }]
+
+ def _entries(self, id):
+ last = None
+
+ for page in itertools.count():
+ channel_json = self._download_json(
+ f'https://api.divulg.org/organization/{id}/posts', id, headers={'accept': 'application/json, text/plain, */*'},
+ query={'after': last} if last else {}, note=f'Downloading Page {page}')
+ for item in channel_json.get('items') or []:
+ v_id = item.get('uid')
+ if not v_id:
+ continue
+ yield self.url_result(
+ 'https://crowdbunker.com/v/%s' % v_id, ie=CrowdBunkerIE.ie_key(), video_id=v_id)
+ last = channel_json.get('last')
+ if not last:
+ break
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ return self.playlist_result(self._entries(id), playlist_id=id)
diff --git a/yt_dlp/extractor/crtvg.py b/yt_dlp/extractor/crtvg.py
new file mode 100644
index 0000000..21325e3
--- /dev/null
+++ b/yt_dlp/extractor/crtvg.py
@@ -0,0 +1,53 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import make_archive_id, remove_end
+
+
+class CrtvgIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?crtvg\.es/tvg/a-carta/(?P<id>[^/#?]+)'
+ _TESTS = [{
+ 'url': 'https://www.crtvg.es/tvg/a-carta/os-caimans-do-tea-5839623',
+ 'md5': 'c0958d9ff90e4503a75544358758921d',
+ 'info_dict': {
+ 'id': 'os-caimans-do-tea-5839623',
+ 'title': 'Os caimáns do Tea',
+ 'ext': 'mp4',
+ 'description': 'md5:f71cfba21ae564f0a6f415b31de1f842',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ '_old_archive_ids': ['crtvg 5839623'],
+ },
+ 'params': {'skip_download': 'm3u8'}
+ }, {
+ 'url': 'https://www.crtvg.es/tvg/a-carta/a-parabolica-love-story',
+ 'md5': '9a47b95a1749db7b7eb3214904624584',
+ 'info_dict': {
+ 'id': 'a-parabolica-love-story',
+ 'title': 'A parabólica / Trabuco, o can mordedor / Love Story',
+ 'ext': 'mp4',
+ 'description': 'md5:f71cfba21ae564f0a6f415b31de1f842',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ },
+ 'params': {'skip_download': 'm3u8'}
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ video_url = self._search_regex(r'var\s+url\s*=\s*["\']([^"\']+)', webpage, 'video url')
+ formats = self._extract_m3u8_formats(video_url + '/playlist.m3u8', video_id, fatal=False)
+ formats.extend(self._extract_mpd_formats(video_url + '/manifest.mpd', video_id, fatal=False))
+
+ old_video_id = None
+ if mobj := re.fullmatch(r'[^/#?]+-(?P<old_id>\d{7})', video_id):
+ old_video_id = [make_archive_id(self, mobj.group('old_id'))]
+
+ return {
+ 'id': video_id,
+ '_old_archive_ids': old_video_id,
+ 'formats': formats,
+ 'title': remove_end(self._html_search_meta(
+ ['og:title', 'twitter:title'], webpage, 'title', default=None), ' | CRTVG'),
+ 'description': self._html_search_meta('description', webpage, 'description', default=None),
+ 'thumbnail': self._html_search_meta(['og:image', 'twitter:image'], webpage, 'thumbnail', default=None),
+ }
diff --git a/yt_dlp/extractor/crunchyroll.py b/yt_dlp/extractor/crunchyroll.py
new file mode 100644
index 0000000..8d997de
--- /dev/null
+++ b/yt_dlp/extractor/crunchyroll.py
@@ -0,0 +1,650 @@
+import base64
+
+from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ format_field,
+ int_or_none,
+ join_nonempty,
+ parse_age_limit,
+ parse_count,
+ parse_iso8601,
+ qualities,
+ remove_start,
+ time_seconds,
+ traverse_obj,
+ url_or_none,
+ urlencode_postdata,
+)
+
+
+class CrunchyrollBaseIE(InfoExtractor):
+ _BASE_URL = 'https://www.crunchyroll.com'
+ _API_BASE = 'https://api.crunchyroll.com'
+ _NETRC_MACHINE = 'crunchyroll'
+ _AUTH_HEADERS = None
+ _API_ENDPOINT = None
+ _BASIC_AUTH = None
+ _CLIENT_ID = ('cr_web', 'noaihdevm_6iyg0a8l0q')
+ _LOCALE_LOOKUP = {
+ 'ar': 'ar-SA',
+ 'de': 'de-DE',
+ '': 'en-US',
+ 'es': 'es-419',
+ 'es-es': 'es-ES',
+ 'fr': 'fr-FR',
+ 'it': 'it-IT',
+ 'pt-br': 'pt-BR',
+ 'pt-pt': 'pt-PT',
+ 'ru': 'ru-RU',
+ 'hi': 'hi-IN',
+ }
+
+ @property
+ def is_logged_in(self):
+ return bool(self._get_cookies(self._BASE_URL).get('etp_rt'))
+
+ def _perform_login(self, username, password):
+ if self.is_logged_in:
+ return
+
+ upsell_response = self._download_json(
+ f'{self._API_BASE}/get_upsell_data.0.json', None, 'Getting session id',
+ query={
+ 'sess_id': 1,
+ 'device_id': 'whatvalueshouldbeforweb',
+ 'device_type': 'com.crunchyroll.static',
+ 'access_token': 'giKq5eY27ny3cqz',
+ 'referer': f'{self._BASE_URL}/welcome/login'
+ })
+ if upsell_response['code'] != 'ok':
+ raise ExtractorError('Could not get session id')
+ session_id = upsell_response['data']['session_id']
+
+ login_response = self._download_json(
+ f'{self._API_BASE}/login.1.json', None, 'Logging in',
+ data=urlencode_postdata({
+ 'account': username,
+ 'password': password,
+ 'session_id': session_id
+ }))
+ if login_response['code'] != 'ok':
+ raise ExtractorError('Login failed. Server message: %s' % login_response['message'], expected=True)
+ if not self.is_logged_in:
+ raise ExtractorError('Login succeeded but did not set etp_rt cookie')
+
+ def _update_auth(self):
+ if CrunchyrollBaseIE._AUTH_HEADERS and CrunchyrollBaseIE._AUTH_REFRESH > time_seconds():
+ return
+
+ if not CrunchyrollBaseIE._BASIC_AUTH:
+ cx_api_param = self._CLIENT_ID[self.is_logged_in]
+ self.write_debug(f'Using cxApiParam={cx_api_param}')
+ CrunchyrollBaseIE._BASIC_AUTH = 'Basic ' + base64.b64encode(f'{cx_api_param}:'.encode()).decode()
+
+ grant_type = 'etp_rt_cookie' if self.is_logged_in else 'client_id'
+ try:
+ auth_response = self._download_json(
+ f'{self._BASE_URL}/auth/v1/token', None, note=f'Authenticating with grant_type={grant_type}',
+ headers={'Authorization': CrunchyrollBaseIE._BASIC_AUTH}, data=f'grant_type={grant_type}'.encode())
+ except ExtractorError as error:
+ if isinstance(error.cause, HTTPError) and error.cause.status == 403:
+ raise ExtractorError(
+ 'Request blocked by Cloudflare; navigate to Crunchyroll in your browser, '
+ 'then pass the fresh cookies (with --cookies-from-browser or --cookies) '
+ 'and your browser\'s User-Agent (with --user-agent)', expected=True)
+ raise
+
+ CrunchyrollBaseIE._AUTH_HEADERS = {'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token']}
+ CrunchyrollBaseIE._AUTH_REFRESH = time_seconds(seconds=traverse_obj(auth_response, ('expires_in', {float_or_none}), default=300) - 10)
+
+ def _locale_from_language(self, language):
+ config_locale = self._configuration_arg('metadata', ie_key=CrunchyrollBetaIE, casesense=True)
+ return config_locale[0] if config_locale else self._LOCALE_LOOKUP.get(language)
+
+ def _call_base_api(self, endpoint, internal_id, lang, note=None, query={}):
+ self._update_auth()
+
+ if not endpoint.startswith('/'):
+ endpoint = f'/{endpoint}'
+
+ query = query.copy()
+ locale = self._locale_from_language(lang)
+ if locale:
+ query['locale'] = locale
+
+ return self._download_json(
+ f'{self._BASE_URL}{endpoint}', internal_id, note or f'Calling API: {endpoint}',
+ headers=CrunchyrollBaseIE._AUTH_HEADERS, query=query)
+
+ def _call_api(self, path, internal_id, lang, note='api', query={}):
+ if not path.startswith(f'/content/v2/{self._API_ENDPOINT}/'):
+ path = f'/content/v2/{self._API_ENDPOINT}/{path}'
+
+ try:
+ result = self._call_base_api(
+ path, internal_id, lang, f'Downloading {note} JSON ({self._API_ENDPOINT})', query=query)
+ except ExtractorError as error:
+ if isinstance(error.cause, HTTPError) and error.cause.status == 404:
+ return None
+ raise
+
+ if not result:
+ raise ExtractorError(f'Unexpected response when downloading {note} JSON')
+ return result
+
+ def _extract_formats(self, stream_response, display_id=None):
+ requested_formats = self._configuration_arg('format') or ['adaptive_hls']
+ available_formats = {}
+ for stream_type, streams in traverse_obj(
+ stream_response, (('streams', ('data', 0)), {dict.items}, ...)):
+ if stream_type not in requested_formats:
+ continue
+ for stream in traverse_obj(streams, lambda _, v: v['url']):
+ hardsub_lang = stream.get('hardsub_locale') or ''
+ format_id = join_nonempty(stream_type, format_field(stream, 'hardsub_locale', 'hardsub-%s'))
+ available_formats[hardsub_lang] = (stream_type, format_id, hardsub_lang, stream['url'])
+
+ requested_hardsubs = [('' if val == 'none' else val) for val in (self._configuration_arg('hardsub') or ['none'])]
+ if '' in available_formats and 'all' not in requested_hardsubs:
+ full_format_langs = set(requested_hardsubs)
+ self.to_screen(
+ 'To get all formats of a hardsub language, use '
+ '"--extractor-args crunchyrollbeta:hardsub=<language_code or all>". '
+ 'See https://github.com/yt-dlp/yt-dlp#crunchyrollbeta-crunchyroll for more info',
+ only_once=True)
+ else:
+ full_format_langs = set(map(str.lower, available_formats))
+
+ audio_locale = traverse_obj(stream_response, ((None, 'meta'), 'audio_locale'), get_all=False)
+ hardsub_preference = qualities(requested_hardsubs[::-1])
+ formats = []
+ for stream_type, format_id, hardsub_lang, stream_url in available_formats.values():
+ if stream_type.endswith('hls'):
+ if hardsub_lang.lower() in full_format_langs:
+ adaptive_formats = self._extract_m3u8_formats(
+ stream_url, display_id, 'mp4', m3u8_id=format_id,
+ fatal=False, note=f'Downloading {format_id} HLS manifest')
+ else:
+ adaptive_formats = (self._m3u8_meta_format(stream_url, ext='mp4', m3u8_id=format_id),)
+ elif stream_type.endswith('dash'):
+ adaptive_formats = self._extract_mpd_formats(
+ stream_url, display_id, mpd_id=format_id,
+ fatal=False, note=f'Downloading {format_id} MPD manifest')
+ else:
+ self.report_warning(f'Encountered unknown stream_type: {stream_type!r}', display_id, only_once=True)
+ continue
+ for f in adaptive_formats:
+ if f.get('acodec') != 'none':
+ f['language'] = audio_locale
+ f['quality'] = hardsub_preference(hardsub_lang.lower())
+ formats.extend(adaptive_formats)
+
+ return formats
+
+ def _extract_subtitles(self, data):
+ subtitles = {}
+
+ for locale, subtitle in traverse_obj(data, ((None, 'meta'), 'subtitles', {dict.items}, ...)):
+ subtitles[locale] = [traverse_obj(subtitle, {'url': 'url', 'ext': 'format'})]
+
+ return subtitles
+
+
+class CrunchyrollCmsBaseIE(CrunchyrollBaseIE):
+ _API_ENDPOINT = 'cms'
+ _CMS_EXPIRY = None
+
+ def _call_cms_api_signed(self, path, internal_id, lang, note='api'):
+ if not CrunchyrollCmsBaseIE._CMS_EXPIRY or CrunchyrollCmsBaseIE._CMS_EXPIRY <= time_seconds():
+ response = self._call_base_api('index/v2', None, lang, 'Retrieving signed policy')['cms_web']
+ CrunchyrollCmsBaseIE._CMS_QUERY = {
+ 'Policy': response['policy'],
+ 'Signature': response['signature'],
+ 'Key-Pair-Id': response['key_pair_id'],
+ }
+ CrunchyrollCmsBaseIE._CMS_BUCKET = response['bucket']
+ CrunchyrollCmsBaseIE._CMS_EXPIRY = parse_iso8601(response['expires']) - 10
+
+ if not path.startswith('/cms/v2'):
+ path = f'/cms/v2{CrunchyrollCmsBaseIE._CMS_BUCKET}/{path}'
+
+ return self._call_base_api(
+ path, internal_id, lang, f'Downloading {note} JSON (signed cms)', query=CrunchyrollCmsBaseIE._CMS_QUERY)
+
+
+class CrunchyrollBetaIE(CrunchyrollCmsBaseIE):
+ IE_NAME = 'crunchyroll'
+ _VALID_URL = r'''(?x)
+ https?://(?:beta\.|www\.)?crunchyroll\.com/
+ (?:(?P<lang>\w{2}(?:-\w{2})?)/)?
+ watch/(?!concert|musicvideo)(?P<id>\w+)'''
+ _TESTS = [{
+ # Premium only
+ 'url': 'https://www.crunchyroll.com/watch/GY2P1Q98Y/to-the-future',
+ 'info_dict': {
+ 'id': 'GY2P1Q98Y',
+ 'ext': 'mp4',
+ 'duration': 1380.241,
+ 'timestamp': 1459632600,
+ 'description': 'md5:a022fbec4fbb023d43631032c91ed64b',
+ 'title': 'World Trigger Episode 73 – To the Future',
+ 'upload_date': '20160402',
+ 'series': 'World Trigger',
+ 'series_id': 'GR757DMKY',
+ 'season': 'World Trigger',
+ 'season_id': 'GR9P39NJ6',
+ 'season_number': 1,
+ 'episode': 'To the Future',
+ 'episode_number': 73,
+ 'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
+ 'chapters': 'count:2',
+ 'age_limit': 14,
+ 'like_count': int,
+ 'dislike_count': int,
+ },
+ 'params': {'skip_download': 'm3u8', 'format': 'all[format_id~=hardsub]'},
+ }, {
+ # Premium only
+ 'url': 'https://www.crunchyroll.com/watch/GYE5WKQGR',
+ 'info_dict': {
+ 'id': 'GYE5WKQGR',
+ 'ext': 'mp4',
+ 'duration': 366.459,
+ 'timestamp': 1476788400,
+ 'description': 'md5:74b67283ffddd75f6e224ca7dc031e76',
+ 'title': 'SHELTER – Porter Robinson presents Shelter the Animation',
+ 'upload_date': '20161018',
+ 'series': 'SHELTER',
+ 'series_id': 'GYGG09WWY',
+ 'season': 'SHELTER',
+ 'season_id': 'GR09MGK4R',
+ 'season_number': 1,
+ 'episode': 'Porter Robinson presents Shelter the Animation',
+ 'episode_number': 0,
+ 'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
+ 'age_limit': 14,
+ 'like_count': int,
+ 'dislike_count': int,
+ },
+ 'params': {'skip_download': True},
+ }, {
+ 'url': 'https://www.crunchyroll.com/watch/GJWU2VKK3/cherry-blossom-meeting-and-a-coming-blizzard',
+ 'info_dict': {
+ 'id': 'GJWU2VKK3',
+ 'ext': 'mp4',
+ 'duration': 1420.054,
+ 'description': 'md5:2d1c67c0ec6ae514d9c30b0b99a625cd',
+ 'title': 'The Ice Guy and His Cool Female Colleague Episode 1 – Cherry Blossom Meeting and a Coming Blizzard',
+ 'series': 'The Ice Guy and His Cool Female Colleague',
+ 'series_id': 'GW4HM75NP',
+ 'season': 'The Ice Guy and His Cool Female Colleague',
+ 'season_id': 'GY9PC21VE',
+ 'season_number': 1,
+ 'episode': 'Cherry Blossom Meeting and a Coming Blizzard',
+ 'episode_number': 1,
+ 'chapters': 'count:2',
+ 'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
+ 'timestamp': 1672839000,
+ 'upload_date': '20230104',
+ 'age_limit': 14,
+ 'like_count': int,
+ 'dislike_count': int,
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://www.crunchyroll.com/watch/GM8F313NQ',
+ 'info_dict': {
+ 'id': 'GM8F313NQ',
+ 'ext': 'mp4',
+ 'title': 'Garakowa -Restore the World-',
+ 'description': 'md5:8d2f8b6b9dd77d87810882e7d2ee5608',
+ 'duration': 3996.104,
+ 'age_limit': 13,
+ 'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://www.crunchyroll.com/watch/G62PEZ2E6',
+ 'info_dict': {
+ 'id': 'G62PEZ2E6',
+ 'description': 'md5:8d2f8b6b9dd77d87810882e7d2ee5608',
+ 'age_limit': 13,
+ 'duration': 65.138,
+ 'title': 'Garakowa -Restore the World-',
+ },
+ 'playlist_mincount': 5,
+ }, {
+ 'url': 'https://www.crunchyroll.com/de/watch/GY2P1Q98Y',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://beta.crunchyroll.com/pt-br/watch/G8WUN8VKP/the-ruler-of-conspiracy',
+ 'only_matching': True,
+ }]
+ # We want to support lazy playlist filtering and movie listings cannot be inside a playlist
+ _RETURN_TYPE = 'video'
+
+ def _real_extract(self, url):
+ lang, internal_id = self._match_valid_url(url).group('lang', 'id')
+
+ # We need to use unsigned API call to allow ratings query string
+ response = traverse_obj(self._call_api(
+ f'objects/{internal_id}', internal_id, lang, 'object info', {'ratings': 'true'}), ('data', 0, {dict}))
+ if not response:
+ raise ExtractorError(f'No video with id {internal_id} could be found (possibly region locked?)', expected=True)
+
+ object_type = response.get('type')
+ if object_type == 'episode':
+ result = self._transform_episode_response(response)
+
+ elif object_type == 'movie':
+ result = self._transform_movie_response(response)
+
+ elif object_type == 'movie_listing':
+ first_movie_id = traverse_obj(response, ('movie_listing_metadata', 'first_movie_id'))
+ if not self._yes_playlist(internal_id, first_movie_id):
+ return self.url_result(f'{self._BASE_URL}/{lang}watch/{first_movie_id}', CrunchyrollBetaIE, first_movie_id)
+
+ def entries():
+ movies = self._call_api(f'movie_listings/{internal_id}/movies', internal_id, lang, 'movie list')
+ for movie_response in traverse_obj(movies, ('data', ...)):
+ yield self.url_result(
+ f'{self._BASE_URL}/{lang}watch/{movie_response["id"]}',
+ CrunchyrollBetaIE, **self._transform_movie_response(movie_response))
+
+ return self.playlist_result(entries(), **self._transform_movie_response(response))
+
+ else:
+ raise ExtractorError(f'Unknown object type {object_type}')
+
+ # There might be multiple audio languages for one object (`<object>_metadata.versions`),
+ # so we need to get the id from `streams_link` instead or we dont know which language to choose
+ streams_link = response.get('streams_link')
+ if not streams_link and traverse_obj(response, (f'{object_type}_metadata', 'is_premium_only')):
+ message = f'This {object_type} is for premium members only'
+ if self.is_logged_in:
+ raise ExtractorError(message, expected=True)
+ self.raise_login_required(message)
+
+ # We need go from unsigned to signed api to avoid getting soft banned
+ stream_response = self._call_cms_api_signed(remove_start(
+ streams_link, '/content/v2/cms/'), internal_id, lang, 'stream info')
+ result['formats'] = self._extract_formats(stream_response, internal_id)
+ result['subtitles'] = self._extract_subtitles(stream_response)
+
+ # if no intro chapter is available, a 403 without usable data is returned
+ intro_chapter = self._download_json(
+ f'https://static.crunchyroll.com/datalab-intro-v2/{internal_id}.json',
+ internal_id, note='Downloading chapter info', fatal=False, errnote=False)
+ if isinstance(intro_chapter, dict):
+ result['chapters'] = [{
+ 'title': 'Intro',
+ 'start_time': float_or_none(intro_chapter.get('startTime')),
+ 'end_time': float_or_none(intro_chapter.get('endTime')),
+ }]
+
+ def calculate_count(item):
+ return parse_count(''.join((item['displayed'], item.get('unit') or '')))
+
+ result.update(traverse_obj(response, ('rating', {
+ 'like_count': ('up', {calculate_count}),
+ 'dislike_count': ('down', {calculate_count}),
+ })))
+
+ return result
+
+ @staticmethod
+ def _transform_episode_response(data):
+ metadata = traverse_obj(data, (('episode_metadata', None), {dict}), get_all=False) or {}
+ return {
+ 'id': data['id'],
+ 'title': ' \u2013 '.join((
+ ('%s%s' % (
+ format_field(metadata, 'season_title'),
+ format_field(metadata, 'episode', ' Episode %s'))),
+ format_field(data, 'title'))),
+ **traverse_obj(data, {
+ 'episode': ('title', {str}),
+ 'description': ('description', {str}, {lambda x: x.replace(r'\r\n', '\n')}),
+ 'thumbnails': ('images', 'thumbnail', ..., ..., {
+ 'url': ('source', {url_or_none}),
+ 'width': ('width', {int_or_none}),
+ 'height': ('height', {int_or_none}),
+ }),
+ }),
+ **traverse_obj(metadata, {
+ 'duration': ('duration_ms', {lambda x: float_or_none(x, 1000)}),
+ 'timestamp': ('upload_date', {parse_iso8601}),
+ 'series': ('series_title', {str}),
+ 'series_id': ('series_id', {str}),
+ 'season': ('season_title', {str}),
+ 'season_id': ('season_id', {str}),
+ 'season_number': ('season_number', ({int}, {float_or_none})),
+ 'episode_number': ('sequence_number', ({int}, {float_or_none})),
+ 'age_limit': ('maturity_ratings', -1, {parse_age_limit}),
+ 'language': ('audio_locale', {str}),
+ }, get_all=False),
+ }
+
+ @staticmethod
+ def _transform_movie_response(data):
+ metadata = traverse_obj(data, (('movie_metadata', 'movie_listing_metadata', None), {dict}), get_all=False) or {}
+ return {
+ 'id': data['id'],
+ **traverse_obj(data, {
+ 'title': ('title', {str}),
+ 'description': ('description', {str}, {lambda x: x.replace(r'\r\n', '\n')}),
+ 'thumbnails': ('images', 'thumbnail', ..., ..., {
+ 'url': ('source', {url_or_none}),
+ 'width': ('width', {int_or_none}),
+ 'height': ('height', {int_or_none}),
+ }),
+ }),
+ **traverse_obj(metadata, {
+ 'duration': ('duration_ms', {lambda x: float_or_none(x, 1000)}),
+ 'age_limit': ('maturity_ratings', -1, {parse_age_limit}),
+ }),
+ }
+
+
+class CrunchyrollBetaShowIE(CrunchyrollCmsBaseIE):
+ IE_NAME = 'crunchyroll:playlist'
+ _VALID_URL = r'''(?x)
+ https?://(?:beta\.|www\.)?crunchyroll\.com/
+ (?P<lang>(?:\w{2}(?:-\w{2})?/)?)
+ series/(?P<id>\w+)'''
+ _TESTS = [{
+ 'url': 'https://www.crunchyroll.com/series/GY19NQ2QR/Girl-Friend-BETA',
+ 'info_dict': {
+ 'id': 'GY19NQ2QR',
+ 'title': 'Girl Friend BETA',
+ 'description': 'md5:99c1b22ee30a74b536a8277ced8eb750',
+ # XXX: `thumbnail` does not get set from `thumbnails` in playlist
+ # 'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
+ 'age_limit': 14,
+ },
+ 'playlist_mincount': 10,
+ }, {
+ 'url': 'https://beta.crunchyroll.com/it/series/GY19NQ2QR',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ lang, internal_id = self._match_valid_url(url).group('lang', 'id')
+
+ def entries():
+ seasons_response = self._call_cms_api_signed(f'seasons?series_id={internal_id}', internal_id, lang, 'seasons')
+ for season in traverse_obj(seasons_response, ('items', ..., {dict})):
+ episodes_response = self._call_cms_api_signed(
+ f'episodes?season_id={season["id"]}', season["id"], lang, 'episode list')
+ for episode_response in traverse_obj(episodes_response, ('items', ..., {dict})):
+ yield self.url_result(
+ f'{self._BASE_URL}/{lang}watch/{episode_response["id"]}',
+ CrunchyrollBetaIE, **CrunchyrollBetaIE._transform_episode_response(episode_response))
+
+ return self.playlist_result(
+ entries(), internal_id,
+ **traverse_obj(self._call_api(f'series/{internal_id}', internal_id, lang, 'series'), ('data', 0, {
+ 'title': ('title', {str}),
+ 'description': ('description', {lambda x: x.replace(r'\r\n', '\n')}),
+ 'age_limit': ('maturity_ratings', -1, {parse_age_limit}),
+ 'thumbnails': ('images', ..., ..., ..., {
+ 'url': ('source', {url_or_none}),
+ 'width': ('width', {int_or_none}),
+ 'height': ('height', {int_or_none}),
+ })
+ })))
+
+
+class CrunchyrollMusicIE(CrunchyrollBaseIE):
+ IE_NAME = 'crunchyroll:music'
+ _VALID_URL = r'''(?x)
+ https?://(?:www\.)?crunchyroll\.com/
+ (?P<lang>(?:\w{2}(?:-\w{2})?/)?)
+ watch/(?P<type>concert|musicvideo)/(?P<id>\w+)'''
+ _TESTS = [{
+ 'url': 'https://www.crunchyroll.com/de/watch/musicvideo/MV5B02C79',
+ 'info_dict': {
+ 'ext': 'mp4',
+ 'id': 'MV5B02C79',
+ 'display_id': 'egaono-hana',
+ 'title': 'Egaono Hana',
+ 'track': 'Egaono Hana',
+ 'artist': 'Goose house',
+ 'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
+ 'genres': ['J-Pop'],
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://www.crunchyroll.com/watch/musicvideo/MV88BB7F2C',
+ 'info_dict': {
+ 'ext': 'mp4',
+ 'id': 'MV88BB7F2C',
+ 'display_id': 'crossing-field',
+ 'title': 'Crossing Field',
+ 'track': 'Crossing Field',
+ 'artist': 'LiSA',
+ 'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
+ 'genres': ['Anime'],
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://www.crunchyroll.com/watch/concert/MC2E2AC135',
+ 'info_dict': {
+ 'ext': 'mp4',
+ 'id': 'MC2E2AC135',
+ 'display_id': 'live-is-smile-always-364joker-at-yokohama-arena',
+ 'title': 'LiVE is Smile Always-364+JOKER- at YOKOHAMA ARENA',
+ 'track': 'LiVE is Smile Always-364+JOKER- at YOKOHAMA ARENA',
+ 'artist': 'LiSA',
+ 'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
+ 'description': 'md5:747444e7e6300907b7a43f0a0503072e',
+ 'genres': ['J-Pop'],
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://www.crunchyroll.com/de/watch/musicvideo/MV5B02C79/egaono-hana',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.crunchyroll.com/watch/concert/MC2E2AC135/live-is-smile-always-364joker-at-yokohama-arena',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.crunchyroll.com/watch/musicvideo/MV88BB7F2C/crossing-field',
+ 'only_matching': True,
+ }]
+ _API_ENDPOINT = 'music'
+
+ def _real_extract(self, url):
+ lang, internal_id, object_type = self._match_valid_url(url).group('lang', 'id', 'type')
+ path, name = {
+ 'concert': ('concerts', 'concert info'),
+ 'musicvideo': ('music_videos', 'music video info'),
+ }[object_type]
+ response = traverse_obj(self._call_api(f'{path}/{internal_id}', internal_id, lang, name), ('data', 0, {dict}))
+ if not response:
+ raise ExtractorError(f'No video with id {internal_id} could be found (possibly region locked?)', expected=True)
+
+ streams_link = response.get('streams_link')
+ if not streams_link and response.get('isPremiumOnly'):
+ message = f'This {response.get("type") or "media"} is for premium members only'
+ if self.is_logged_in:
+ raise ExtractorError(message, expected=True)
+ self.raise_login_required(message)
+
+ result = self._transform_music_response(response)
+ stream_response = self._call_api(streams_link, internal_id, lang, 'stream info')
+ result['formats'] = self._extract_formats(stream_response, internal_id)
+
+ return result
+
+ @staticmethod
+ def _transform_music_response(data):
+ return {
+ 'id': data['id'],
+ **traverse_obj(data, {
+ 'display_id': 'slug',
+ 'title': 'title',
+ 'track': 'title',
+ 'artist': ('artist', 'name'),
+ 'description': ('description', {str}, {lambda x: x.replace(r'\r\n', '\n') or None}),
+ 'thumbnails': ('images', ..., ..., {
+ 'url': ('source', {url_or_none}),
+ 'width': ('width', {int_or_none}),
+ 'height': ('height', {int_or_none}),
+ }),
+ 'genres': ('genres', ..., 'displayValue'),
+ 'age_limit': ('maturity_ratings', -1, {parse_age_limit}),
+ }),
+ }
+
+
+class CrunchyrollArtistIE(CrunchyrollBaseIE):
+ IE_NAME = 'crunchyroll:artist'
+ _VALID_URL = r'''(?x)
+ https?://(?:www\.)?crunchyroll\.com/
+ (?P<lang>(?:\w{2}(?:-\w{2})?/)?)
+ artist/(?P<id>\w{10})'''
+ _TESTS = [{
+ 'url': 'https://www.crunchyroll.com/artist/MA179CB50D',
+ 'info_dict': {
+ 'id': 'MA179CB50D',
+ 'title': 'LiSA',
+ 'genres': ['J-Pop', 'Anime', 'Rock'],
+ 'description': 'md5:16d87de61a55c3f7d6c454b73285938e',
+ },
+ 'playlist_mincount': 83,
+ }, {
+ 'url': 'https://www.crunchyroll.com/artist/MA179CB50D/lisa',
+ 'only_matching': True,
+ }]
+ _API_ENDPOINT = 'music'
+
+ def _real_extract(self, url):
+ lang, internal_id = self._match_valid_url(url).group('lang', 'id')
+ response = traverse_obj(self._call_api(
+ f'artists/{internal_id}', internal_id, lang, 'artist info'), ('data', 0))
+
+ def entries():
+ for attribute, path in [('concerts', 'concert'), ('videos', 'musicvideo')]:
+ for internal_id in traverse_obj(response, (attribute, ...)):
+ yield self.url_result(f'{self._BASE_URL}/watch/{path}/{internal_id}', CrunchyrollMusicIE, internal_id)
+
+ return self.playlist_result(entries(), **self._transform_artist_response(response))
+
+ @staticmethod
+ def _transform_artist_response(data):
+ return {
+ 'id': data['id'],
+ **traverse_obj(data, {
+ 'title': 'name',
+ 'description': ('description', {str}, {lambda x: x.replace(r'\r\n', '\n')}),
+ 'thumbnails': ('images', ..., ..., {
+ 'url': ('source', {url_or_none}),
+ 'width': ('width', {int_or_none}),
+ 'height': ('height', {int_or_none}),
+ }),
+ 'genres': ('genres', ..., 'displayValue'),
+ }),
+ }
diff --git a/yt_dlp/extractor/cspan.py b/yt_dlp/extractor/cspan.py
new file mode 100644
index 0000000..0075680
--- /dev/null
+++ b/yt_dlp/extractor/cspan.py
@@ -0,0 +1,286 @@
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_HTMLParseError
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ extract_attributes,
+ find_xpath_attr,
+ get_element_by_attribute,
+ get_element_by_class,
+ int_or_none,
+ join_nonempty,
+ js_to_json,
+ merge_dicts,
+ parse_iso8601,
+ parse_qs,
+ smuggle_url,
+ str_to_int,
+ unescapeHTML,
+)
+from .senategov import SenateISVPIE
+from .ustream import UstreamIE
+
+
+class CSpanIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?c-span\.org/video/\?(?P<id>[0-9a-f]+)'
+ IE_DESC = 'C-SPAN'
+ _TESTS = [{
+ 'url': 'http://www.c-span.org/video/?313572-1/HolderonV',
+ 'md5': '94b29a4f131ff03d23471dd6f60b6a1d',
+ 'info_dict': {
+ 'id': '315139',
+ 'title': 'Attorney General Eric Holder on Voting Rights Act Decision',
+ },
+ 'playlist_mincount': 2,
+ 'skip': 'Regularly fails on travis, for unknown reasons',
+ }, {
+ 'url': 'http://www.c-span.org/video/?c4486943/cspan-international-health-care-models',
+ # md5 is unstable
+ 'info_dict': {
+ 'id': 'c4486943',
+ 'ext': 'mp4',
+ 'title': 'CSPAN - International Health Care Models',
+ 'description': 'md5:7a985a2d595dba00af3d9c9f0783c967',
+ }
+ }, {
+ 'url': 'http://www.c-span.org/video/?318608-1/gm-ignition-switch-recall',
+ 'info_dict': {
+ 'id': '342759',
+ 'title': 'General Motors Ignition Switch Recall',
+ },
+ 'playlist_mincount': 6,
+ }, {
+ # Video from senate.gov
+ 'url': 'http://www.c-span.org/video/?104517-1/immigration-reforms-needed-protect-skilled-american-workers',
+ 'info_dict': {
+ 'id': 'judiciary031715',
+ 'ext': 'mp4',
+ 'title': 'Immigration Reforms Needed to Protect Skilled American Workers',
+ },
+ 'params': {
+ 'skip_download': True, # m3u8 downloads
+ }
+ }, {
+ # Ustream embedded video
+ 'url': 'https://www.c-span.org/video/?114917-1/armed-services',
+ 'info_dict': {
+ 'id': '58428542',
+ 'ext': 'flv',
+ 'title': 'USHR07 Armed Services Committee',
+ 'description': 'hsas00-2118-20150204-1000et-07\n\n\nUSHR07 Armed Services Committee',
+ 'timestamp': 1423060374,
+ 'upload_date': '20150204',
+ 'uploader': 'HouseCommittee',
+ 'uploader_id': '12987475',
+ },
+ }, {
+ # Audio Only
+ 'url': 'https://www.c-span.org/video/?437336-1/judiciary-antitrust-competition-policy-consumer-rights',
+ 'only_matching': True,
+ }]
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video_type = None
+ webpage = self._download_webpage(url, video_id)
+
+ ustream_url = UstreamIE._extract_url(webpage)
+ if ustream_url:
+ return self.url_result(ustream_url, UstreamIE.ie_key())
+
+ if '&vod' not in url:
+ bc = self._search_regex(
+ r"(<[^>]+id='brightcove-player-embed'[^>]+>)",
+ webpage, 'brightcove embed', default=None)
+ if bc:
+ bc_attr = extract_attributes(bc)
+ bc_url = self.BRIGHTCOVE_URL_TEMPLATE % (
+ bc_attr.get('data-bcaccountid', '3162030207001'),
+ bc_attr.get('data-noprebcplayerid', 'SyGGpuJy3g'),
+ bc_attr.get('data-newbcplayerid', 'default'),
+ bc_attr['data-bcid'])
+ return self.url_result(smuggle_url(bc_url, {'source_url': url}))
+
+ def add_referer(formats):
+ for f in formats:
+ f.setdefault('http_headers', {})['Referer'] = url
+
+ # As of 01.12.2020 this path looks to cover all cases making the rest
+ # of the code unnecessary
+ jwsetup = self._parse_json(
+ self._search_regex(
+ r'(?s)jwsetup\s*=\s*({.+?})\s*;', webpage, 'jwsetup',
+ default='{}'),
+ video_id, transform_source=js_to_json, fatal=False)
+ if jwsetup:
+ info = self._parse_jwplayer_data(
+ jwsetup, video_id, require_title=False, m3u8_id='hls',
+ base_url=url)
+ add_referer(info['formats'])
+ for subtitles in info['subtitles'].values():
+ for subtitle in subtitles:
+ ext = determine_ext(subtitle['url'])
+ if ext == 'php':
+ ext = 'vtt'
+ subtitle['ext'] = ext
+ ld_info = self._search_json_ld(webpage, video_id, default={})
+ try:
+ title = get_element_by_class('video-page-title', webpage)
+ except compat_HTMLParseError:
+ title = None
+ if title is None:
+ title = self._og_search_title(webpage)
+ description = get_element_by_attribute('itemprop', 'description', webpage) or \
+ self._html_search_meta(['og:description', 'description'], webpage)
+ return merge_dicts(info, ld_info, {
+ 'title': title,
+ 'thumbnail': get_element_by_attribute('itemprop', 'thumbnailUrl', webpage),
+ 'description': description,
+ 'timestamp': parse_iso8601(get_element_by_attribute('itemprop', 'uploadDate', webpage)),
+ 'location': get_element_by_attribute('itemprop', 'contentLocation', webpage),
+ 'duration': int_or_none(self._search_regex(
+ r'jwsetup\.seclength\s*=\s*(\d+);',
+ webpage, 'duration', fatal=False)),
+ 'view_count': str_to_int(self._search_regex(
+ r"<span[^>]+class='views'[^>]*>([\d,]+)\s+Views</span>",
+ webpage, 'views', fatal=False)),
+ })
+
+ # Obsolete
+ # We first look for clipid, because clipprog always appears before
+ patterns = [r'id=\'clip(%s)\'\s*value=\'([0-9]+)\'' % t for t in ('id', 'prog')]
+ results = list(filter(None, (re.search(p, webpage) for p in patterns)))
+ if results:
+ matches = results[0]
+ video_type, video_id = matches.groups()
+ video_type = 'clip' if video_type == 'id' else 'program'
+ else:
+ m = re.search(r'data-(?P<type>clip|prog)id=["\'](?P<id>\d+)', webpage)
+ if m:
+ video_id = m.group('id')
+ video_type = 'program' if m.group('type') == 'prog' else 'clip'
+ else:
+ senate_isvp_url = SenateISVPIE._extract_url(webpage)
+ if senate_isvp_url:
+ title = self._og_search_title(webpage)
+ surl = smuggle_url(senate_isvp_url, {'force_title': title})
+ return self.url_result(surl, 'SenateISVP', video_id, title)
+ video_id = self._search_regex(
+ r'jwsetup\.clipprog\s*=\s*(\d+);',
+ webpage, 'jwsetup program id', default=None)
+ if video_id:
+ video_type = 'program'
+ if video_type is None or video_id is None:
+ error_message = get_element_by_class('VLplayer-error-message', webpage)
+ if error_message:
+ raise ExtractorError(error_message)
+ raise ExtractorError('unable to find video id and type')
+
+ def get_text_attr(d, attr):
+ return d.get(attr, {}).get('#text')
+
+ data = self._download_json(
+ 'http://www.c-span.org/assets/player/ajax-player.php?os=android&html5=%s&id=%s' % (video_type, video_id),
+ video_id)['video']
+ if data['@status'] != 'Success':
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, get_text_attr(data, 'error')), expected=True)
+
+ doc = self._download_xml(
+ 'http://www.c-span.org/common/services/flashXml.php?%sid=%s' % (video_type, video_id),
+ video_id)
+
+ description = self._html_search_meta('description', webpage)
+
+ title = find_xpath_attr(doc, './/string', 'name', 'title').text
+ thumbnail = find_xpath_attr(doc, './/string', 'name', 'poster').text
+
+ files = data['files']
+ capfile = get_text_attr(data, 'capfile')
+
+ entries = []
+ for partnum, f in enumerate(files):
+ formats = []
+ for quality in f.get('qualities', []):
+ formats.append({
+ 'format_id': '%s-%sp' % (get_text_attr(quality, 'bitrate'), get_text_attr(quality, 'height')),
+ 'url': unescapeHTML(get_text_attr(quality, 'file')),
+ 'height': int_or_none(get_text_attr(quality, 'height')),
+ 'tbr': int_or_none(get_text_attr(quality, 'bitrate')),
+ })
+ if not formats:
+ path = unescapeHTML(get_text_attr(f, 'path'))
+ if not path:
+ continue
+ formats = self._extract_m3u8_formats(
+ path, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls') if determine_ext(path) == 'm3u8' else [{'url': path, }]
+ add_referer(formats)
+ entries.append({
+ 'id': '%s_%d' % (video_id, partnum + 1),
+ 'title': (
+ title if len(files) == 1 else
+ '%s part %d' % (title, partnum + 1)),
+ 'formats': formats,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': int_or_none(get_text_attr(f, 'length')),
+ 'subtitles': {
+ 'en': [{
+ 'url': capfile,
+ 'ext': determine_ext(capfile, 'dfxp')
+ }],
+ } if capfile else None,
+ })
+
+ if len(entries) == 1:
+ entry = dict(entries[0])
+ entry['id'] = 'c' + video_id if video_type == 'clip' else video_id
+ return entry
+ else:
+ return {
+ '_type': 'playlist',
+ 'entries': entries,
+ 'title': title,
+ 'id': 'c' + video_id if video_type == 'clip' else video_id,
+ }
+
+
+class CSpanCongressIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?c-span\.org/congress/'
+ _TESTS = [{
+ 'url': 'https://www.c-span.org/congress/?chamber=house&date=2017-12-13&t=1513208380',
+ 'info_dict': {
+ 'id': 'house_2017-12-13',
+ 'title': 'Congressional Chronicle - Members of Congress, Hearings and More',
+ 'description': 'md5:54c264b7a8f219937987610243305a84',
+ 'thumbnail': r're:https://ximage.c-spanvideo.org/.+',
+ 'ext': 'mp4'
+ }
+ }]
+
+ def _real_extract(self, url):
+ query = parse_qs(url)
+ video_date = query.get('date', [None])[0]
+ video_id = join_nonempty(query.get('chamber', ['senate'])[0], video_date, delim='_')
+ webpage = self._download_webpage(url, video_id)
+ if not video_date:
+ jwp_date = re.search(r'jwsetup.clipprogdate = \'(?P<date>\d{4}-\d{2}-\d{2})\';', webpage)
+ if jwp_date:
+ video_id = f'{video_id}_{jwp_date.group("date")}'
+ jwplayer_data = self._parse_json(
+ self._search_regex(r'jwsetup\s*=\s*({(?:.|\n)[^;]+});', webpage, 'player config'),
+ video_id, transform_source=js_to_json)
+
+ title = self._generic_title('', webpage)
+ description = (self._og_search_description(webpage, default=None)
+ or self._html_search_meta('description', webpage, 'description', default=None))
+
+ return {
+ **self._parse_jwplayer_data(jwplayer_data, video_id, False),
+ 'title': re.sub(r'\s+', ' ', title.split('|')[0]).strip(),
+ 'description': description,
+ 'http_headers': {'Referer': 'https://www.c-span.org/'},
+ }
diff --git a/yt_dlp/extractor/ctsnews.py b/yt_dlp/extractor/ctsnews.py
new file mode 100644
index 0000000..cec178f
--- /dev/null
+++ b/yt_dlp/extractor/ctsnews.py
@@ -0,0 +1,84 @@
+from .common import InfoExtractor
+from ..utils import unified_timestamp
+from .youtube import YoutubeIE
+
+
+class CtsNewsIE(InfoExtractor):
+ IE_DESC = '華視新聞'
+ _VALID_URL = r'https?://news\.cts\.com\.tw/[a-z]+/[a-z]+/\d+/(?P<id>\d+)\.html'
+ _TESTS = [{
+ 'url': 'http://news.cts.com.tw/cts/international/201501/201501291578109.html',
+ 'md5': 'a9875cb790252b08431186d741beaabe',
+ 'info_dict': {
+ 'id': '201501291578109',
+ 'ext': 'mp4',
+ 'title': '以色列.真主黨交火 3人死亡 - 華視新聞網',
+ 'description': '以色列和黎巴嫩真主黨,爆發五年最嚴重衝突,雙方砲轟交火,兩名以軍死亡,還有一名西班牙籍的聯合國維和人員也不幸罹難。大陸陝西、河南、安徽、江蘇和湖北五個省份出現大暴雪,嚴重影響陸空交通,不過九華山卻出現...',
+ 'timestamp': 1422528540,
+ 'upload_date': '20150129',
+ }
+ }, {
+ # News count not appear on page but still available in database
+ 'url': 'http://news.cts.com.tw/cts/international/201309/201309031304098.html',
+ 'md5': '3aee7e0df7cdff94e43581f54c22619e',
+ 'info_dict': {
+ 'id': '201309031304098',
+ 'ext': 'mp4',
+ 'title': '韓國31歲童顏男 貌如十多歲小孩 - 華視新聞網',
+ 'description': '越有年紀的人,越希望看起來年輕一點,而南韓卻有一位31歲的男子,看起來像是11、12歲的小孩,身...',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1378205880,
+ 'upload_date': '20130903',
+ }
+ }, {
+ # With Youtube embedded video
+ 'url': 'http://news.cts.com.tw/cts/money/201501/201501291578003.html',
+ 'md5': 'e4726b2ccd70ba2c319865e28f0a91d1',
+ 'info_dict': {
+ 'id': 'OVbfO7d0_hQ',
+ 'ext': 'mp4',
+ 'title': 'iPhone6熱銷 蘋果財報亮眼',
+ 'description': 'md5:f395d4f485487bb0f992ed2c4b07aa7d',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20150128',
+ 'uploader_id': 'TBSCTS',
+ 'uploader': '中華電視公司',
+ },
+ 'add_ie': ['Youtube'],
+ }]
+
+ def _real_extract(self, url):
+ news_id = self._match_id(url)
+ page = self._download_webpage(url, news_id)
+
+ news_id = self._hidden_inputs(page).get('get_id')
+
+ if news_id:
+ mp4_feed = self._download_json(
+ 'http://news.cts.com.tw/action/test_mp4feed.php',
+ news_id, note='Fetching feed', query={'news_id': news_id})
+ video_url = mp4_feed['source_url']
+ else:
+ self.to_screen('Not CTSPlayer video, trying Youtube...')
+ youtube_url = YoutubeIE._extract_url(page)
+
+ return self.url_result(youtube_url, ie='Youtube')
+
+ description = self._html_search_meta('description', page)
+ title = self._html_search_meta('title', page, fatal=True)
+ thumbnail = self._html_search_meta('image', page)
+
+ datetime_str = self._html_search_regex(
+ r'(\d{4}/\d{2}/\d{2} \d{2}:\d{2})', page, 'date and time', fatal=False)
+ timestamp = None
+ if datetime_str:
+ timestamp = unified_timestamp(datetime_str) - 8 * 3600
+
+ return {
+ 'id': news_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ }
diff --git a/yt_dlp/extractor/ctv.py b/yt_dlp/extractor/ctv.py
new file mode 100644
index 0000000..f125c1c
--- /dev/null
+++ b/yt_dlp/extractor/ctv.py
@@ -0,0 +1,49 @@
+from .common import InfoExtractor
+
+
+class CTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?ctv\.ca/(?P<id>(?:show|movie)s/[^/]+/[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.ctv.ca/shows/your-morning/wednesday-december-23-2020-s5e88',
+ 'info_dict': {
+ 'id': '2102249',
+ 'ext': 'flv',
+ 'title': 'Wednesday, December 23, 2020',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'Your Morning delivers original perspectives and unique insights into the headlines of the day.',
+ 'timestamp': 1608732000,
+ 'upload_date': '20201223',
+ 'series': 'Your Morning',
+ 'season': '2020-2021',
+ 'season_number': 5,
+ 'episode_number': 88,
+ 'tags': ['Your Morning'],
+ 'categories': ['Talk Show'],
+ 'duration': 7467.126,
+ },
+ }, {
+ 'url': 'https://www.ctv.ca/movies/adam-sandlers-eight-crazy-nights/adam-sandlers-eight-crazy-nights',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ content = self._download_json(
+ 'https://www.ctv.ca/space-graphql/graphql', display_id, query={
+ 'query': '''{
+ resolvedPath(path: "/%s") {
+ lastSegment {
+ content {
+ ... on AxisContent {
+ axisId
+ videoPlayerDestCode
+ }
+ }
+ }
+ }
+}''' % display_id,
+ })['data']['resolvedPath']['lastSegment']['content']
+ video_id = content['axisId']
+ return self.url_result(
+ '9c9media:%s:%s' % (content['videoPlayerDestCode'], video_id),
+ 'NineCNineMedia', video_id)
diff --git a/yt_dlp/extractor/ctvnews.py b/yt_dlp/extractor/ctvnews.py
new file mode 100644
index 0000000..ad3f0d8
--- /dev/null
+++ b/yt_dlp/extractor/ctvnews.py
@@ -0,0 +1,70 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import orderedSet
+
+
+class CTVNewsIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:.+?\.)?ctvnews\.ca/(?:video\?(?:clip|playlist|bin)Id=|.*?)(?P<id>[0-9.]+)'
+ _TESTS = [{
+ 'url': 'http://www.ctvnews.ca/video?clipId=901995',
+ 'md5': '9b8624ba66351a23e0b6e1391971f9af',
+ 'info_dict': {
+ 'id': '901995',
+ 'ext': 'flv',
+ 'title': 'Extended: \'That person cannot be me\' Johnson says',
+ 'description': 'md5:958dd3b4f5bbbf0ed4d045c790d89285',
+ 'timestamp': 1467286284,
+ 'upload_date': '20160630',
+ }
+ }, {
+ 'url': 'http://www.ctvnews.ca/video?playlistId=1.2966224',
+ 'info_dict':
+ {
+ 'id': '1.2966224',
+ },
+ 'playlist_mincount': 19,
+ }, {
+ 'url': 'http://www.ctvnews.ca/video?binId=1.2876780',
+ 'info_dict':
+ {
+ 'id': '1.2876780',
+ },
+ 'playlist_mincount': 100,
+ }, {
+ 'url': 'http://www.ctvnews.ca/1.810401',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.ctvnews.ca/canadiens-send-p-k-subban-to-nashville-in-blockbuster-trade-1.2967231',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://vancouverisland.ctvnews.ca/video?clipId=761241',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ page_id = self._match_id(url)
+
+ def ninecninemedia_url_result(clip_id):
+ return {
+ '_type': 'url_transparent',
+ 'id': clip_id,
+ 'url': '9c9media:ctvnews_web:%s' % clip_id,
+ 'ie_key': 'NineCNineMedia',
+ }
+
+ if page_id.isdigit():
+ return ninecninemedia_url_result(page_id)
+ else:
+ webpage = self._download_webpage('http://www.ctvnews.ca/%s' % page_id, page_id, query={
+ 'ot': 'example.AjaxPageLayout.ot',
+ 'maxItemsPerPage': 1000000,
+ })
+ entries = [ninecninemedia_url_result(clip_id) for clip_id in orderedSet(
+ re.findall(r'clip\.id\s*=\s*(\d+);', webpage))]
+ if not entries:
+ webpage = self._download_webpage(url, page_id)
+ if 'getAuthStates("' in webpage:
+ entries = [ninecninemedia_url_result(clip_id) for clip_id in
+ self._search_regex(r'getAuthStates\("([\d+,]+)"', webpage, 'clip ids').split(',')]
+ return self.playlist_result(entries, page_id)
diff --git a/yt_dlp/extractor/cultureunplugged.py b/yt_dlp/extractor/cultureunplugged.py
new file mode 100644
index 0000000..9c8509f
--- /dev/null
+++ b/yt_dlp/extractor/cultureunplugged.py
@@ -0,0 +1,65 @@
+import time
+
+from .common import InfoExtractor
+from ..networking import HEADRequest
+from ..utils import int_or_none
+
+
+class CultureUnpluggedIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?cultureunplugged\.com/documentary/watch-online/play/(?P<id>\d+)(?:/(?P<display_id>[^/]+))?'
+ _TESTS = [{
+ 'url': 'http://www.cultureunplugged.com/documentary/watch-online/play/53662/The-Next--Best-West',
+ 'md5': 'ac6c093b089f7d05e79934dcb3d228fc',
+ 'info_dict': {
+ 'id': '53662',
+ 'display_id': 'The-Next--Best-West',
+ 'ext': 'mp4',
+ 'title': 'The Next, Best West',
+ 'description': 'md5:0423cd00833dea1519cf014e9d0903b1',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'creator': 'Coldstream Creative',
+ 'duration': 2203,
+ 'view_count': int,
+ }
+ }, {
+ 'url': 'http://www.cultureunplugged.com/documentary/watch-online/play/53662',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id') or video_id
+
+ # request setClientTimezone.php to get PHPSESSID cookie which is need to get valid json data in the next request
+ self._request_webpage(HEADRequest(
+ 'http://www.cultureunplugged.com/setClientTimezone.php?timeOffset=%d' % -(time.timezone / 3600)), display_id)
+ movie_data = self._download_json(
+ 'http://www.cultureunplugged.com/movie-data/cu-%s.json' % video_id, display_id)
+
+ video_url = movie_data['url']
+ title = movie_data['title']
+
+ description = movie_data.get('synopsis')
+ creator = movie_data.get('producer')
+ duration = int_or_none(movie_data.get('duration'))
+ view_count = int_or_none(movie_data.get('views'))
+
+ thumbnails = [{
+ 'url': movie_data['%s_thumb' % size],
+ 'id': size,
+ 'preference': preference,
+ } for preference, size in enumerate((
+ 'small', 'large')) if movie_data.get('%s_thumb' % size)]
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': description,
+ 'creator': creator,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'thumbnails': thumbnails,
+ }
diff --git a/yt_dlp/extractor/curiositystream.py b/yt_dlp/extractor/curiositystream.py
new file mode 100644
index 0000000..941cf4e
--- /dev/null
+++ b/yt_dlp/extractor/curiositystream.py
@@ -0,0 +1,203 @@
+import re
+import urllib.parse
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import ExtractorError, int_or_none, urlencode_postdata
+
+
+class CuriosityStreamBaseIE(InfoExtractor):
+ _NETRC_MACHINE = 'curiositystream'
+ _auth_token = None
+
+ def _handle_errors(self, result):
+ error = result.get('error', {}).get('message')
+ if error:
+ if isinstance(error, dict):
+ error = ', '.join(error.values())
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, error), expected=True)
+
+ def _call_api(self, path, video_id, query=None):
+ headers = {}
+ if not self._auth_token:
+ auth_cookie = self._get_cookies('https://curiositystream.com').get('auth_token')
+ if auth_cookie:
+ self.write_debug('Obtained auth_token cookie')
+ self._auth_token = urllib.parse.unquote(auth_cookie.value)
+ if self._auth_token:
+ headers['X-Auth-Token'] = self._auth_token
+ result = self._download_json(
+ self._API_BASE_URL + path, video_id, headers=headers, query=query)
+ self._handle_errors(result)
+ return result['data']
+
+ def _perform_login(self, username, password):
+ result = self._download_json(
+ 'https://api.curiositystream.com/v1/login', None,
+ note='Logging in', data=urlencode_postdata({
+ 'email': username,
+ 'password': password,
+ }))
+ self._handle_errors(result)
+ CuriosityStreamBaseIE._auth_token = result['message']['auth_token']
+
+
+class CuriosityStreamIE(CuriosityStreamBaseIE):
+ IE_NAME = 'curiositystream'
+ _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/video/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://app.curiositystream.com/video/2',
+ 'info_dict': {
+ 'id': '2',
+ 'ext': 'mp4',
+ 'title': 'How Did You Develop The Internet?',
+ 'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.',
+ 'channel': 'Curiosity Stream',
+ 'categories': ['Technology', 'Interview'],
+ 'average_rating': float,
+ 'series_id': '2',
+ 'thumbnail': r're:https://img.curiositystream.com/.+\.jpg',
+ 'tags': [],
+ 'duration': 158
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }]
+
+ _API_BASE_URL = 'https://api.curiositystream.com/v1/media/'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ formats = []
+ for encoding_format in ('m3u8', 'mpd'):
+ media = self._call_api(video_id, video_id, query={
+ 'encodingsNew': 'true',
+ 'encodingsFormat': encoding_format,
+ })
+ for encoding in media.get('encodings', []):
+ playlist_url = encoding.get('master_playlist_url')
+ if encoding_format == 'm3u8':
+ # use `m3u8` entry_protocol until EXT-X-MAP is properly supported by `m3u8_native` entry_protocol
+ formats.extend(self._extract_m3u8_formats(
+ playlist_url, video_id, 'mp4',
+ m3u8_id='hls', fatal=False))
+ elif encoding_format == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ playlist_url, video_id, mpd_id='dash', fatal=False))
+ encoding_url = encoding.get('url')
+ file_url = encoding.get('file_url')
+ if not encoding_url and not file_url:
+ continue
+ f = {
+ 'width': int_or_none(encoding.get('width')),
+ 'height': int_or_none(encoding.get('height')),
+ 'vbr': int_or_none(encoding.get('video_bitrate')),
+ 'abr': int_or_none(encoding.get('audio_bitrate')),
+ 'filesize': int_or_none(encoding.get('size_in_bytes')),
+ 'vcodec': encoding.get('video_codec'),
+ 'acodec': encoding.get('audio_codec'),
+ 'container': encoding.get('container_type'),
+ }
+ for f_url in (encoding_url, file_url):
+ if not f_url:
+ continue
+ fmt = f.copy()
+ rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp[34]:.+)$', f_url)
+ if rtmp:
+ fmt.update({
+ 'url': rtmp.group('url'),
+ 'play_path': rtmp.group('playpath'),
+ 'app': rtmp.group('app'),
+ 'ext': 'flv',
+ 'format_id': 'rtmp',
+ })
+ else:
+ fmt.update({
+ 'url': f_url,
+ 'format_id': 'http',
+ })
+ formats.append(fmt)
+
+ title = media['title']
+
+ subtitles = {}
+ for closed_caption in media.get('closed_captions', []):
+ sub_url = closed_caption.get('file')
+ if not sub_url:
+ continue
+ lang = closed_caption.get('code') or closed_caption.get('language') or 'en'
+ subtitles.setdefault(lang, []).append({
+ 'url': sub_url,
+ })
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': title,
+ 'description': media.get('description'),
+ 'thumbnail': media.get('image_large') or media.get('image_medium') or media.get('image_small'),
+ 'duration': int_or_none(media.get('duration')),
+ 'tags': media.get('tags'),
+ 'subtitles': subtitles,
+ 'channel': media.get('producer'),
+ 'categories': [media.get('primary_category'), media.get('type')],
+ 'average_rating': media.get('rating_percentage'),
+ 'series_id': str(media.get('collection_id') or '') or None,
+ }
+
+
+class CuriosityStreamCollectionBaseIE(CuriosityStreamBaseIE):
+
+ def _real_extract(self, url):
+ collection_id = self._match_id(url)
+ collection = self._call_api(collection_id, collection_id)
+ entries = []
+ for media in collection.get('media', []):
+ media_id = compat_str(media.get('id'))
+ media_type, ie = ('series', CuriosityStreamSeriesIE) if media.get('is_collection') else ('video', CuriosityStreamIE)
+ entries.append(self.url_result(
+ 'https://curiositystream.com/%s/%s' % (media_type, media_id),
+ ie=ie.ie_key(), video_id=media_id))
+ return self.playlist_result(
+ entries, collection_id,
+ collection.get('title'), collection.get('description'))
+
+
+class CuriosityStreamCollectionsIE(CuriosityStreamCollectionBaseIE):
+ IE_NAME = 'curiositystream:collections'
+ _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/collections/(?P<id>\d+)'
+ _API_BASE_URL = 'https://api.curiositystream.com/v2/collections/'
+ _TESTS = [{
+ 'url': 'https://curiositystream.com/collections/86',
+ 'info_dict': {
+ 'id': '86',
+ 'title': 'Staff Picks',
+ 'description': 'Wondering where to start? Here are a few of our favorite series and films... from our couch to yours.',
+ },
+ 'playlist_mincount': 7,
+ }, {
+ 'url': 'https://curiositystream.com/collections/36',
+ 'only_matching': True,
+ }]
+
+
+class CuriosityStreamSeriesIE(CuriosityStreamCollectionBaseIE):
+ IE_NAME = 'curiositystream:series'
+ _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/(?:series|collection)/(?P<id>\d+)'
+ _API_BASE_URL = 'https://api.curiositystream.com/v2/series/'
+ _TESTS = [{
+ 'url': 'https://curiositystream.com/series/2',
+ 'info_dict': {
+ 'id': '2',
+ 'title': 'Curious Minds: The Internet',
+ 'description': 'How is the internet shaping our lives in the 21st Century?',
+ },
+ 'playlist_mincount': 16,
+ }, {
+ 'url': 'https://curiositystream.com/collection/2',
+ 'only_matching': True,
+ }]
diff --git a/yt_dlp/extractor/cwtv.py b/yt_dlp/extractor/cwtv.py
new file mode 100644
index 0000000..69d50da
--- /dev/null
+++ b/yt_dlp/extractor/cwtv.py
@@ -0,0 +1,99 @@
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ parse_age_limit,
+ parse_iso8601,
+ smuggle_url,
+ str_or_none,
+)
+
+
+class CWTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?cw(?:tv(?:pr)?|seed)\.com/(?:shows/)?(?:[^/]+/)+[^?]*\?.*\b(?:play|watch)=(?P<id>[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})'
+ _TESTS = [{
+ 'url': 'http://cwtv.com/shows/arrow/legends-of-yesterday/?play=6b15e985-9345-4f60-baf8-56e96be57c63',
+ 'info_dict': {
+ 'id': '6b15e985-9345-4f60-baf8-56e96be57c63',
+ 'ext': 'mp4',
+ 'title': 'Legends of Yesterday',
+ 'description': 'Oliver and Barry Allen take Kendra Saunders and Carter Hall to a remote location to keep them hidden from Vandal Savage while they figure out how to defeat him.',
+ 'duration': 2665,
+ 'series': 'Arrow',
+ 'season_number': 4,
+ 'season': '4',
+ 'episode_number': 8,
+ 'upload_date': '20151203',
+ 'timestamp': 1449122100,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'skip': 'redirect to http://cwtv.com/shows/arrow/',
+ }, {
+ 'url': 'http://www.cwseed.com/shows/whose-line-is-it-anyway/jeff-davis-4/?play=24282b12-ead2-42f2-95ad-26770c2c6088',
+ 'info_dict': {
+ 'id': '24282b12-ead2-42f2-95ad-26770c2c6088',
+ 'ext': 'mp4',
+ 'title': 'Jeff Davis 4',
+ 'description': 'Jeff Davis is back to make you laugh.',
+ 'duration': 1263,
+ 'series': 'Whose Line Is It Anyway?',
+ 'season_number': 11,
+ 'episode_number': 20,
+ 'upload_date': '20151006',
+ 'timestamp': 1444107300,
+ 'age_limit': 14,
+ 'uploader': 'CWTV',
+ 'thumbnail': r're:^https?://.*\.jpe?g$',
+ 'chapters': 'count:4',
+ 'episode': 'Episode 20',
+ 'season': 'Season 11',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://cwtv.com/thecw/chroniclesofcisco/?play=8adebe35-f447-465f-ab52-e863506ff6d6',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://cwtvpr.com/the-cw/video?watch=9eee3f60-ef4e-440b-b3b2-49428ac9c54e',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://cwtv.com/shows/arrow/legends-of-yesterday/?watch=6b15e985-9345-4f60-baf8-56e96be57c63',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ data = self._download_json(
+ 'http://images.cwtv.com/feed/mobileapp/video-meta/apiversion_8/guid_' + video_id,
+ video_id)
+ if data.get('result') != 'ok':
+ raise ExtractorError(data['msg'], expected=True)
+ video_data = data['video']
+ title = video_data['title']
+ mpx_url = video_data.get('mpx_url') or 'http://link.theplatform.com/s/cwtv/media/guid/2703454149/%s?formats=M3U' % video_id
+
+ season = str_or_none(video_data.get('season'))
+ episode = str_or_none(video_data.get('episode'))
+ if episode and season:
+ episode = episode[len(season):]
+
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'title': title,
+ 'url': smuggle_url(mpx_url, {'force_smil_url': True}),
+ 'description': video_data.get('description_long'),
+ 'duration': int_or_none(video_data.get('duration_secs')),
+ 'series': video_data.get('series_name'),
+ 'season_number': int_or_none(season),
+ 'episode_number': int_or_none(episode),
+ 'timestamp': parse_iso8601(video_data.get('start_time')),
+ 'age_limit': parse_age_limit(video_data.get('rating')),
+ 'ie_key': 'ThePlatform',
+ 'thumbnail': video_data.get('large_thumbnail')
+ }
diff --git a/yt_dlp/extractor/cybrary.py b/yt_dlp/extractor/cybrary.py
new file mode 100644
index 0000000..c6995b2
--- /dev/null
+++ b/yt_dlp/extractor/cybrary.py
@@ -0,0 +1,144 @@
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ smuggle_url,
+ str_or_none,
+ traverse_obj,
+ urlencode_postdata,
+)
+
+
+class CybraryBaseIE(InfoExtractor):
+ _API_KEY = 'AIzaSyCX9ru6j70PX2My1Eq6Q1zoMAhuTdXlzSw'
+ _ENDPOINTS = {
+ 'course': 'https://app.cybrary.it/courses/api/catalog/browse/course/{}',
+ 'course_enrollment': 'https://app.cybrary.it/courses/api/catalog/{}/enrollment',
+ 'enrollment': 'https://app.cybrary.it/courses/api/enrollment/{}',
+ 'launch': 'https://app.cybrary.it/courses/api/catalog/{}/launch',
+ 'vimeo_oembed': 'https://vimeo.com/api/oembed.json?url=https://vimeo.com/{}',
+ }
+ _NETRC_MACHINE = 'cybrary'
+ _TOKEN = None
+
+ def _perform_login(self, username, password):
+ CybraryBaseIE._TOKEN = self._download_json(
+ f'https://identitytoolkit.googleapis.com/v1/accounts:signInWithPassword?key={self._API_KEY}',
+ None, data=urlencode_postdata({'email': username, 'password': password, 'returnSecureToken': True}),
+ note='Logging in')['idToken']
+
+ def _real_initialize(self):
+ if not self._TOKEN:
+ self.raise_login_required(method='password')
+
+ def _call_api(self, endpoint, item_id):
+ return self._download_json(
+ self._ENDPOINTS[endpoint].format(item_id), item_id,
+ note=f'Downloading {endpoint} JSON metadata',
+ headers={'Authorization': f'Bearer {self._TOKEN}'})
+
+ def _get_vimeo_id(self, activity_id):
+ launch_api = self._call_api('launch', activity_id)
+
+ if launch_api.get('url'):
+ return self._search_regex(r'https?://player\.vimeo\.com/video/(?P<vimeo_id>[0-9]+)', launch_api['url'], 'vimeo_id')
+ return traverse_obj(launch_api, ('vendor_data', 'content', ..., 'videoId'), get_all=False)
+
+
+class CybraryIE(CybraryBaseIE):
+ _VALID_URL = r'https?://app\.cybrary\.it/immersive/(?P<enrollment>[0-9]+)/activity/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://app.cybrary.it/immersive/12487950/activity/63102',
+ 'md5': '9ae12d37e555cb2ed554223a71a701d0',
+ 'info_dict': {
+ 'id': '646609770',
+ 'ext': 'mp4',
+ 'title': 'Getting Started',
+ 'thumbnail': 'https://i.vimeocdn.com/video/1301817996-76a268f0c56cff18a5cecbbdc44131eb9dda0c80eb0b3a036_1280',
+ 'series_id': '63111',
+ 'uploader_url': 'https://vimeo.com/user30867300',
+ 'duration': 88,
+ 'uploader_id': 'user30867300',
+ 'series': 'Cybrary Orientation',
+ 'uploader': 'Cybrary',
+ 'chapter': 'Cybrary Orientation Series',
+ 'chapter_id': '63110'
+ },
+ 'expected_warnings': ['No authenticators for vimeo']
+ }, {
+ 'url': 'https://app.cybrary.it/immersive/12747143/activity/52686',
+ 'md5': '62f26547dccc59c44363e2a13d4ad08d',
+ 'info_dict': {
+ 'id': '445638073',
+ 'ext': 'mp4',
+ 'title': 'Azure Virtual Network IP Addressing',
+ 'thumbnail': 'https://i.vimeocdn.com/video/936667051-1647ace66c627d4a2382185e0dae8deb830309bfddd53f8b2367b2f91e92ed0e-d_1280',
+ 'series_id': '52733',
+ 'uploader_url': 'https://vimeo.com/user30867300',
+ 'duration': 426,
+ 'uploader_id': 'user30867300',
+ 'series': 'AZ-500: Microsoft Azure Security Technologies',
+ 'uploader': 'Cybrary',
+ 'chapter': 'Implement Network Security',
+ 'chapter_id': '52693'
+ },
+ 'expected_warnings': ['No authenticators for vimeo']
+ }]
+
+ def _real_extract(self, url):
+ activity_id, enrollment_id = self._match_valid_url(url).group('id', 'enrollment')
+ course = self._call_api('enrollment', enrollment_id)['content']
+ activity = traverse_obj(course, ('learning_modules', ..., 'activities', lambda _, v: int(activity_id) == v['id']), get_all=False)
+
+ if activity.get('type') not in ['Video Activity', 'Lesson Activity']:
+ raise ExtractorError('The activity is not a video', expected=True)
+
+ module = next((m for m in course.get('learning_modules') or []
+ if int(activity_id) in traverse_obj(m, ('activities', ..., 'id') or [])), None)
+
+ vimeo_id = self._get_vimeo_id(activity_id)
+
+ return {
+ '_type': 'url_transparent',
+ 'series': traverse_obj(course, ('content_description', 'title')),
+ 'series_id': str_or_none(traverse_obj(course, ('content_description', 'id'))),
+ 'id': vimeo_id,
+ 'chapter': module.get('title'),
+ 'chapter_id': str_or_none(module.get('id')),
+ 'title': activity.get('title'),
+ 'url': smuggle_url(f'https://player.vimeo.com/video/{vimeo_id}', {'referer': 'https://api.cybrary.it'})
+ }
+
+
+class CybraryCourseIE(CybraryBaseIE):
+ _VALID_URL = r'https?://app\.cybrary\.it/browse/course/(?P<id>[\w-]+)/?(?:$|[#?])'
+ _TESTS = [{
+ 'url': 'https://app.cybrary.it/browse/course/az-500-microsoft-azure-security-technologies',
+ 'info_dict': {
+ 'id': '898',
+ 'title': 'AZ-500: Microsoft Azure Security Technologies',
+ 'description': 'md5:69549d379c0fc1dec92926d4e8b6fbd4'
+ },
+ 'playlist_count': 59
+ }, {
+ 'url': 'https://app.cybrary.it/browse/course/cybrary-orientation',
+ 'info_dict': {
+ 'id': '1245',
+ 'title': 'Cybrary Orientation',
+ 'description': 'md5:9e69ff66b32fe78744e0ad4babe2e88e'
+ },
+ 'playlist_count': 4
+ }]
+
+ def _real_extract(self, url):
+ course_id = self._match_id(url)
+ course = self._call_api('course', course_id)
+ enrollment_info = self._call_api('course_enrollment', course['id'])
+
+ entries = [self.url_result(
+ f'https://app.cybrary.it/immersive/{enrollment_info["id"]}/activity/{activity["id"]}')
+ for activity in traverse_obj(course, ('content_item', 'learning_modules', ..., 'activities', ...))]
+
+ return self.playlist_result(
+ entries,
+ traverse_obj(course, ('content_item', 'id'), expected_type=str_or_none),
+ course.get('title'), course.get('short_description'))
diff --git a/yt_dlp/extractor/dacast.py b/yt_dlp/extractor/dacast.py
new file mode 100644
index 0000000..4e81aa4
--- /dev/null
+++ b/yt_dlp/extractor/dacast.py
@@ -0,0 +1,158 @@
+import hashlib
+import re
+import time
+
+from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ ExtractorError,
+ classproperty,
+ float_or_none,
+ traverse_obj,
+ url_or_none,
+)
+
+
+class DacastBaseIE(InfoExtractor):
+ _URL_TYPE = None
+
+ @classproperty
+ def _VALID_URL(cls):
+ return fr'https?://iframe\.dacast\.com/{cls._URL_TYPE}/(?P<user_id>[\w-]+)/(?P<id>[\w-]+)'
+
+ @classproperty
+ def _EMBED_REGEX(cls):
+ return [rf'<iframe[^>]+\bsrc=["\'](?P<url>{cls._VALID_URL})']
+
+ _API_INFO_URL = 'https://playback.dacast.com/content/info'
+
+ @classmethod
+ def _get_url_from_id(cls, content_id):
+ user_id, media_id = content_id.split(f'-{cls._URL_TYPE}-')
+ return f'https://iframe.dacast.com/{cls._URL_TYPE}/{user_id}/{media_id}'
+
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ yield from super()._extract_embed_urls(url, webpage)
+ for content_id in re.findall(
+ rf'<script[^>]+\bsrc=["\']https://player\.dacast\.com/js/player\.js\?contentId=([\w-]+-{cls._URL_TYPE}-[\w-]+)["\']', webpage):
+ yield cls._get_url_from_id(content_id)
+
+
+class DacastVODIE(DacastBaseIE):
+ _URL_TYPE = 'vod'
+ _TESTS = [{
+ 'url': 'https://iframe.dacast.com/vod/acae82153ef4d7a7344ae4eaa86af534/1c6143e3-5a06-371d-8695-19b96ea49090',
+ 'info_dict': {
+ 'id': '1c6143e3-5a06-371d-8695-19b96ea49090',
+ 'ext': 'mp4',
+ 'uploader_id': 'acae82153ef4d7a7344ae4eaa86af534',
+ 'title': '2_4||Adnexal mass characterisation: O-RADS US and MRI||N. Bharwani, London/UK',
+ 'thumbnail': 'https://universe-files.dacast.com/26137208-5858-65c1-5e9a-9d6b6bd2b6c2',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }]
+ _WEBPAGE_TESTS = [{
+ 'url': 'https://www.dacast.com/support/knowledgebase/how-can-i-embed-a-video-on-my-website/',
+ 'info_dict': {
+ 'id': 'b6674869-f08a-23c5-1d7b-81f5309e1a90',
+ 'ext': 'mp4',
+ 'title': '4-HowToEmbedVideo.mp4',
+ 'uploader_id': '3b67c4a9-3886-4eb1-d0eb-39b23b14bef3',
+ 'thumbnail': 'https://universe-files.dacast.com/d26ab48f-a52a-8783-c42e-a90290ba06b6.png',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://gist.githubusercontent.com/bashonly/4ad249ef2910346fbdf3809b220f11ee/raw/87349778d4af1a80b1fcc3beb9c88108de5858f5/dacast_embeds.html',
+ 'info_dict': {
+ 'id': 'e7df418e-a83b-7a7f-7b5e-1a667981e8fa',
+ 'ext': 'mp4',
+ 'title': 'Evening Service 2-5-23',
+ 'uploader_id': '943bb1ab3c03695ba85330d92d6d226e',
+ 'thumbnail': 'https://universe-files.dacast.com/337472b3-e92c-2ea4-7eb7-5700da477f67',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }]
+
+ def _real_extract(self, url):
+ user_id, video_id = self._match_valid_url(url).group('user_id', 'id')
+ query = {'contentId': f'{user_id}-vod-{video_id}', 'provider': 'universe'}
+ info = self._download_json(self._API_INFO_URL, video_id, query=query, fatal=False)
+ access = self._download_json(
+ 'https://playback.dacast.com/content/access', video_id,
+ note='Downloading access JSON', query=query, expected_status=403)
+
+ error = access.get('error')
+ if error in ('Broadcaster has been blocked', 'Content is offline'):
+ raise ExtractorError(error, expected=True)
+ elif error:
+ raise ExtractorError(f'Dacast API says "{error}"')
+
+ hls_url = access['hls']
+ hls_aes = {}
+
+ if 'DRM_EXT' in hls_url:
+ self.report_drm(video_id)
+ elif '/uspaes/' in hls_url:
+ # From https://player.dacast.com/js/player.js
+ ts = int(time.time())
+ signature = hashlib.sha1(
+ f'{10413792000 - ts}{ts}YfaKtquEEpDeusCKbvYszIEZnWmBcSvw').digest().hex()
+ hls_aes['uri'] = f'https://keys.dacast.com/uspaes/{video_id}.key?s={signature}&ts={ts}'
+
+ for retry in self.RetryManager():
+ try:
+ formats = self._extract_m3u8_formats(hls_url, video_id, 'mp4', m3u8_id='hls')
+ except ExtractorError as e:
+ # CDN will randomly respond with 403
+ if isinstance(e.cause, HTTPError) and e.cause.status == 403:
+ retry.error = e
+ continue
+ raise
+
+ return {
+ 'id': video_id,
+ 'uploader_id': user_id,
+ 'formats': formats,
+ 'hls_aes': hls_aes or None,
+ **traverse_obj(info, ('contentInfo', {
+ 'title': 'title',
+ 'duration': ('duration', {float_or_none}),
+ 'thumbnail': ('thumbnailUrl', {url_or_none}),
+ })),
+ }
+
+
+class DacastPlaylistIE(DacastBaseIE):
+ _URL_TYPE = 'playlist'
+ _TESTS = [{
+ 'url': 'https://iframe.dacast.com/playlist/943bb1ab3c03695ba85330d92d6d226e/b632eb053cac17a9c9a02bcfc827f2d8',
+ 'playlist_mincount': 28,
+ 'info_dict': {
+ 'id': 'b632eb053cac17a9c9a02bcfc827f2d8',
+ 'title': 'Archive Sermons',
+ },
+ }]
+ _WEBPAGE_TESTS = [{
+ 'url': 'https://gist.githubusercontent.com/bashonly/7efb606f49f3c6e07ea0327de5a661d1/raw/05a16eac830245ea301fb0a585023bec71e6093c/dacast_playlist_embed.html',
+ 'playlist_mincount': 28,
+ 'info_dict': {
+ 'id': 'b632eb053cac17a9c9a02bcfc827f2d8',
+ 'title': 'Archive Sermons',
+ },
+ }]
+
+ def _real_extract(self, url):
+ user_id, playlist_id = self._match_valid_url(url).group('user_id', 'id')
+ info = self._download_json(
+ self._API_INFO_URL, playlist_id, note='Downloading playlist JSON', query={
+ 'contentId': f'{user_id}-playlist-{playlist_id}',
+ 'provider': 'universe',
+ })['contentInfo']
+
+ def entries(info):
+ for video in traverse_obj(info, ('features', 'playlist', 'contents', lambda _, v: v['id'])):
+ yield self.url_result(
+ DacastVODIE._get_url_from_id(video['id']), DacastVODIE, video['id'], video.get('title'))
+
+ return self.playlist_result(entries(info), playlist_id, info.get('title'))
diff --git a/yt_dlp/extractor/dailymail.py b/yt_dlp/extractor/dailymail.py
new file mode 100644
index 0000000..43401e1
--- /dev/null
+++ b/yt_dlp/extractor/dailymail.py
@@ -0,0 +1,73 @@
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ int_or_none,
+ determine_protocol,
+ try_get,
+ unescapeHTML,
+)
+
+
+class DailyMailIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?dailymail\.co\.uk/(?:video/[^/]+/video-|embed/video/)(?P<id>[0-9]+)'
+ _EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?dailymail\.co\.uk/embed/video/\d+\.html)']
+ _TESTS = [{
+ 'url': 'http://www.dailymail.co.uk/video/tvshowbiz/video-1295863/The-Mountain-appears-sparkling-water-ad-Heavy-Bubbles.html',
+ 'md5': 'f6129624562251f628296c3a9ffde124',
+ 'info_dict': {
+ 'id': '1295863',
+ 'ext': 'mp4',
+ 'title': 'The Mountain appears in sparkling water ad for \'Heavy Bubbles\'',
+ 'description': 'md5:a93d74b6da172dd5dc4d973e0b766a84',
+ }
+ }, {
+ 'url': 'http://www.dailymail.co.uk/embed/video/1295863.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ video_data = self._parse_json(self._search_regex(
+ r"data-opts='({.+?})'", webpage, 'video data'), video_id)
+ title = unescapeHTML(video_data['title'])
+
+ sources_url = (try_get(
+ video_data,
+ (lambda x: x['plugins']['sources']['url'],
+ lambda x: x['sources']['url']), compat_str)
+ or 'http://www.dailymail.co.uk/api/player/%s/video-sources.json' % video_id)
+
+ video_sources = self._download_json(sources_url, video_id)
+ body = video_sources.get('body')
+ if body:
+ video_sources = body
+
+ formats = []
+ for rendition in video_sources['renditions']:
+ rendition_url = rendition.get('url')
+ if not rendition_url:
+ continue
+ tbr = int_or_none(rendition.get('encodingRate'), 1000)
+ container = rendition.get('videoContainer')
+ is_hls = container == 'M2TS'
+ protocol = 'm3u8_native' if is_hls else determine_protocol({'url': rendition_url})
+ formats.append({
+ 'format_id': ('hls' if is_hls else protocol) + ('-%d' % tbr if tbr else ''),
+ 'url': rendition_url,
+ 'width': int_or_none(rendition.get('frameWidth')),
+ 'height': int_or_none(rendition.get('frameHeight')),
+ 'tbr': tbr,
+ 'vcodec': rendition.get('videoCodec'),
+ 'container': container,
+ 'protocol': protocol,
+ 'ext': 'mp4' if is_hls else None,
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': unescapeHTML(video_data.get('descr')),
+ 'thumbnail': video_data.get('poster') or video_data.get('thumbnail'),
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/dailymotion.py b/yt_dlp/extractor/dailymotion.py
new file mode 100644
index 0000000..c570a4f
--- /dev/null
+++ b/yt_dlp/extractor/dailymotion.py
@@ -0,0 +1,474 @@
+import functools
+import json
+import re
+import urllib.parse
+
+from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ ExtractorError,
+ OnDemandPagedList,
+ age_restricted,
+ clean_html,
+ int_or_none,
+ traverse_obj,
+ try_get,
+ unescapeHTML,
+ unsmuggle_url,
+ urlencode_postdata,
+)
+
+
+class DailymotionBaseInfoExtractor(InfoExtractor):
+ _FAMILY_FILTER = None
+ _HEADERS = {
+ 'Content-Type': 'application/json',
+ 'Origin': 'https://www.dailymotion.com',
+ }
+ _NETRC_MACHINE = 'dailymotion'
+
+ def _get_dailymotion_cookies(self):
+ return self._get_cookies('https://www.dailymotion.com/')
+
+ @staticmethod
+ def _get_cookie_value(cookies, name):
+ cookie = cookies.get(name)
+ if cookie:
+ return cookie.value
+
+ def _set_dailymotion_cookie(self, name, value):
+ self._set_cookie('www.dailymotion.com', name, value)
+
+ def _real_initialize(self):
+ cookies = self._get_dailymotion_cookies()
+ ff = self._get_cookie_value(cookies, 'ff')
+ self._FAMILY_FILTER = ff == 'on' if ff else age_restricted(18, self.get_param('age_limit'))
+ self._set_dailymotion_cookie('ff', 'on' if self._FAMILY_FILTER else 'off')
+
+ def _get_token(self, xid):
+ cookies = self._get_dailymotion_cookies()
+ token = self._get_cookie_value(cookies, 'access_token') or self._get_cookie_value(cookies, 'client_token')
+ if token:
+ return token
+
+ data = {
+ 'client_id': 'f1a362d288c1b98099c7',
+ 'client_secret': 'eea605b96e01c796ff369935357eca920c5da4c5',
+ }
+ username, password = self._get_login_info()
+ if username:
+ data.update({
+ 'grant_type': 'password',
+ 'password': password,
+ 'username': username,
+ })
+ else:
+ data['grant_type'] = 'client_credentials'
+ try:
+ token = self._download_json(
+ 'https://graphql.api.dailymotion.com/oauth/token',
+ None, 'Downloading Access Token',
+ data=urlencode_postdata(data))['access_token']
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 400:
+ raise ExtractorError(self._parse_json(
+ e.cause.response.read().decode(), xid)['error_description'], expected=True)
+ raise
+ self._set_dailymotion_cookie('access_token' if username else 'client_token', token)
+ return token
+
+ def _call_api(self, object_type, xid, object_fields, note, filter_extra=None):
+ if not self._HEADERS.get('Authorization'):
+ self._HEADERS['Authorization'] = f'Bearer {self._get_token(xid)}'
+
+ resp = self._download_json(
+ 'https://graphql.api.dailymotion.com/', xid, note, data=json.dumps({
+ 'query': '''{
+ %s(xid: "%s"%s) {
+ %s
+ }
+}''' % (object_type, xid, ', ' + filter_extra if filter_extra else '', object_fields),
+ }).encode(), headers=self._HEADERS)
+ obj = resp['data'][object_type]
+ if not obj:
+ raise ExtractorError(resp['errors'][0]['message'], expected=True)
+ return obj
+
+
+class DailymotionIE(DailymotionBaseInfoExtractor):
+ _VALID_URL = r'''(?ix)
+ https?://
+ (?:
+ (?:(?:www|touch|geo)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:(?:embed|swf|\#)/)|player(?:/\w+)?\.html\?)?video|swf)|
+ (?:www\.)?lequipe\.fr/video
+ )
+ [/=](?P<id>[^/?_&]+)(?:.+?\bplaylist=(?P<playlist_id>x[0-9a-z]+))?
+ '''
+ IE_NAME = 'dailymotion'
+ _EMBED_REGEX = [r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1']
+ _TESTS = [{
+ 'url': 'http://www.dailymotion.com/video/x5kesuj_office-christmas-party-review-jason-bateman-olivia-munn-t-j-miller_news',
+ 'md5': '074b95bdee76b9e3654137aee9c79dfe',
+ 'info_dict': {
+ 'id': 'x5kesuj',
+ 'ext': 'mp4',
+ 'title': 'Office Christmas Party Review – Jason Bateman, Olivia Munn, T.J. Miller',
+ 'description': 'Office Christmas Party Review - Jason Bateman, Olivia Munn, T.J. Miller',
+ 'duration': 187,
+ 'timestamp': 1493651285,
+ 'upload_date': '20170501',
+ 'uploader': 'Deadline',
+ 'uploader_id': 'x1xm8ri',
+ 'age_limit': 0,
+ 'view_count': int,
+ 'like_count': int,
+ 'tags': ['hollywood', 'celeb', 'celebrity', 'movies', 'red carpet'],
+ 'thumbnail': r're:https://(?:s[12]\.)dmcdn\.net/v/K456B1aXqIx58LKWQ/x1080',
+ },
+ }, {
+ 'url': 'https://geo.dailymotion.com/player.html?video=x89eyek&mute=true',
+ 'md5': 'e2f9717c6604773f963f069ca53a07f8',
+ 'info_dict': {
+ 'id': 'x89eyek',
+ 'ext': 'mp4',
+ 'title': "En quête d'esprit du 27/03/2022",
+ 'description': 'md5:66542b9f4df2eb23f314fc097488e553',
+ 'duration': 2756,
+ 'timestamp': 1648383669,
+ 'upload_date': '20220327',
+ 'uploader': 'CNEWS',
+ 'uploader_id': 'x24vth',
+ 'age_limit': 0,
+ 'view_count': int,
+ 'like_count': int,
+ 'tags': ['en_quete_d_esprit'],
+ 'thumbnail': r're:https://(?:s[12]\.)dmcdn\.net/v/Tncwi1YNg_RUl7ueu/x1080',
+ }
+ }, {
+ 'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
+ 'md5': '2137c41a8e78554bb09225b8eb322406',
+ 'info_dict': {
+ 'id': 'x2iuewm',
+ 'ext': 'mp4',
+ 'title': 'Steam Machine Models, Pricing Listed on Steam Store - IGN News',
+ 'description': 'Several come bundled with the Steam Controller.',
+ 'thumbnail': r're:^https?:.*\.(?:jpg|png)$',
+ 'duration': 74,
+ 'timestamp': 1425657362,
+ 'upload_date': '20150306',
+ 'uploader': 'IGN',
+ 'uploader_id': 'xijv66',
+ 'age_limit': 0,
+ 'view_count': int,
+ },
+ 'skip': 'video gone',
+ }, {
+ # Vevo video
+ 'url': 'http://www.dailymotion.com/video/x149uew_katy-perry-roar-official_musi',
+ 'info_dict': {
+ 'title': 'Roar (Official)',
+ 'id': 'USUV71301934',
+ 'ext': 'mp4',
+ 'uploader': 'Katy Perry',
+ 'upload_date': '20130905',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'VEVO is only available in some countries',
+ }, {
+ # age-restricted video
+ 'url': 'http://www.dailymotion.com/video/xyh2zz_leanna-decker-cyber-girl-of-the-year-desires-nude-playboy-plus_redband',
+ 'md5': '0d667a7b9cebecc3c89ee93099c4159d',
+ 'info_dict': {
+ 'id': 'xyh2zz',
+ 'ext': 'mp4',
+ 'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]',
+ 'uploader': 'HotWaves1012',
+ 'age_limit': 18,
+ },
+ 'skip': 'video gone',
+ }, {
+ # geo-restricted, player v5
+ 'url': 'http://www.dailymotion.com/video/xhza0o',
+ 'only_matching': True,
+ }, {
+ # with subtitles
+ 'url': 'http://www.dailymotion.com/video/x20su5f_the-power-of-nightmares-1-the-rise-of-the-politics-of-fear-bbc-2004_news',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.dailymotion.com/swf/video/x3n92nf',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.dailymotion.com/swf/x3ss1m_funny-magic-trick-barry-and-stuart_fun',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.lequipe.fr/video/x791mem',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.lequipe.fr/video/k7MtHciueyTcrFtFKA2',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.dailymotion.com/video/x3z49k?playlist=xv4bw',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://geo.dailymotion.com/player/x86gw.html?video=k46oCapRs4iikoz9DWy',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://geo.dailymotion.com/player/xakln.html?video=x8mjju4&customConfig%5BcustomParams%5D=%2Ffr-fr%2Ftennis%2Fwimbledon-mens-singles%2Farticles-video',
+ 'only_matching': True,
+ }]
+ _GEO_BYPASS = False
+ _COMMON_MEDIA_FIELDS = '''description
+ geoblockedCountries {
+ allowed
+ }
+ xid'''
+
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ # https://developer.dailymotion.com/player#player-parameters
+ yield from super()._extract_embed_urls(url, webpage)
+ for mobj in re.finditer(
+ r'(?s)DM\.player\([^,]+,\s*{.*?video[\'"]?\s*:\s*["\']?(?P<id>[0-9a-zA-Z]+).+?}\s*\);', webpage):
+ yield from 'https://www.dailymotion.com/embed/video/' + mobj.group('id')
+
+ def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url)
+ video_id, playlist_id = self._match_valid_url(url).groups()
+
+ if playlist_id:
+ if self._yes_playlist(playlist_id, video_id):
+ return self.url_result(
+ 'http://www.dailymotion.com/playlist/' + playlist_id,
+ 'DailymotionPlaylist', playlist_id)
+
+ password = self.get_param('videopassword')
+ media = self._call_api(
+ 'media', video_id, '''... on Video {
+ %s
+ stats {
+ likes {
+ total
+ }
+ views {
+ total
+ }
+ }
+ }
+ ... on Live {
+ %s
+ audienceCount
+ isOnAir
+ }''' % (self._COMMON_MEDIA_FIELDS, self._COMMON_MEDIA_FIELDS), 'Downloading media JSON metadata',
+ 'password: "%s"' % self.get_param('videopassword') if password else None)
+ xid = media['xid']
+
+ metadata = self._download_json(
+ 'https://www.dailymotion.com/player/metadata/video/' + xid,
+ xid, 'Downloading metadata JSON',
+ query=traverse_obj(smuggled_data, 'query') or {'app': 'com.dailymotion.neon'})
+
+ error = metadata.get('error')
+ if error:
+ title = error.get('title') or error['raw_message']
+ # See https://developer.dailymotion.com/api#access-error
+ if error.get('code') == 'DM007':
+ allowed_countries = try_get(media, lambda x: x['geoblockedCountries']['allowed'], list)
+ self.raise_geo_restricted(msg=title, countries=allowed_countries)
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, title), expected=True)
+
+ title = metadata['title']
+ is_live = media.get('isOnAir')
+ formats = []
+ for quality, media_list in metadata['qualities'].items():
+ for m in media_list:
+ media_url = m.get('url')
+ media_type = m.get('type')
+ if not media_url or media_type == 'application/vnd.lumberjack.manifest':
+ continue
+ if media_type == 'application/x-mpegURL':
+ formats.extend(self._extract_m3u8_formats(
+ media_url, video_id, 'mp4', live=is_live, m3u8_id='hls', fatal=False))
+ else:
+ f = {
+ 'url': media_url,
+ 'format_id': 'http-' + quality,
+ }
+ m = re.search(r'/H264-(\d+)x(\d+)(?:-(60)/)?', media_url)
+ if m:
+ width, height, fps = map(int_or_none, m.groups())
+ f.update({
+ 'fps': fps,
+ 'height': height,
+ 'width': width,
+ })
+ formats.append(f)
+ for f in formats:
+ f['url'] = f['url'].split('#')[0]
+ if not f.get('fps') and f['format_id'].endswith('@60'):
+ f['fps'] = 60
+
+ subtitles = {}
+ subtitles_data = try_get(metadata, lambda x: x['subtitles']['data'], dict) or {}
+ for subtitle_lang, subtitle in subtitles_data.items():
+ subtitles[subtitle_lang] = [{
+ 'url': subtitle_url,
+ } for subtitle_url in subtitle.get('urls', [])]
+
+ thumbnails = []
+ for height, poster_url in metadata.get('posters', {}).items():
+ thumbnails.append({
+ 'height': int_or_none(height),
+ 'id': height,
+ 'url': poster_url,
+ })
+
+ owner = metadata.get('owner') or {}
+ stats = media.get('stats') or {}
+ get_count = lambda x: int_or_none(try_get(stats, lambda y: y[x + 's']['total']))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': clean_html(media.get('description')),
+ 'thumbnails': thumbnails,
+ 'duration': int_or_none(metadata.get('duration')) or None,
+ 'timestamp': int_or_none(metadata.get('created_time')),
+ 'uploader': owner.get('screenname'),
+ 'uploader_id': owner.get('id') or metadata.get('screenname'),
+ 'age_limit': 18 if metadata.get('explicit') else 0,
+ 'tags': metadata.get('tags'),
+ 'view_count': get_count('view') or int_or_none(media.get('audienceCount')),
+ 'like_count': get_count('like'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'is_live': is_live,
+ }
+
+
+class DailymotionPlaylistBaseIE(DailymotionBaseInfoExtractor):
+ _PAGE_SIZE = 100
+
+ def _fetch_page(self, playlist_id, page):
+ page += 1
+ videos = self._call_api(
+ self._OBJECT_TYPE, playlist_id,
+ '''videos(allowExplicit: %s, first: %d, page: %d) {
+ edges {
+ node {
+ xid
+ url
+ }
+ }
+ }''' % ('false' if self._FAMILY_FILTER else 'true', self._PAGE_SIZE, page),
+ 'Downloading page %d' % page)['videos']
+ for edge in videos['edges']:
+ node = edge['node']
+ yield self.url_result(
+ node['url'], DailymotionIE.ie_key(), node['xid'])
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ entries = OnDemandPagedList(functools.partial(
+ self._fetch_page, playlist_id), self._PAGE_SIZE)
+ return self.playlist_result(
+ entries, playlist_id)
+
+
+class DailymotionPlaylistIE(DailymotionPlaylistBaseIE):
+ IE_NAME = 'dailymotion:playlist'
+ _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>x[0-9a-z]+)'
+ _TESTS = [{
+ 'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q',
+ 'info_dict': {
+ 'id': 'xv4bw',
+ },
+ 'playlist_mincount': 20,
+ }]
+ _OBJECT_TYPE = 'collection'
+
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ # Look for embedded Dailymotion playlist player (#3822)
+ for mobj in re.finditer(
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1',
+ webpage):
+ for p in re.findall(r'list\[\]=/playlist/([^/]+)/', unescapeHTML(mobj.group('url'))):
+ yield '//dailymotion.com/playlist/%s' % p
+
+
+class DailymotionSearchIE(DailymotionPlaylistBaseIE):
+ IE_NAME = 'dailymotion:search'
+ _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/search/(?P<id>[^/?#]+)/videos'
+ _PAGE_SIZE = 20
+ _TESTS = [{
+ 'url': 'http://www.dailymotion.com/search/king of turtles/videos',
+ 'info_dict': {
+ 'id': 'king of turtles',
+ 'title': 'king of turtles',
+ },
+ 'playlist_mincount': 90,
+ }]
+ _SEARCH_QUERY = 'query SEARCH_QUERY( $query: String! $page: Int $limit: Int ) { search { videos( query: $query first: $limit page: $page ) { edges { node { xid } } } } } '
+
+ def _call_search_api(self, term, page, note):
+ if not self._HEADERS.get('Authorization'):
+ self._HEADERS['Authorization'] = f'Bearer {self._get_token(term)}'
+ resp = self._download_json(
+ 'https://graphql.api.dailymotion.com/', None, note, data=json.dumps({
+ 'operationName': 'SEARCH_QUERY',
+ 'query': self._SEARCH_QUERY,
+ 'variables': {
+ 'limit': 20,
+ 'page': page,
+ 'query': term,
+ }
+ }).encode(), headers=self._HEADERS)
+ obj = traverse_obj(resp, ('data', 'search', {dict}))
+ if not obj:
+ raise ExtractorError(
+ traverse_obj(resp, ('errors', 0, 'message', {str})) or 'Could not fetch search data')
+
+ return obj
+
+ def _fetch_page(self, term, page):
+ page += 1
+ response = self._call_search_api(term, page, f'Searching "{term}" page {page}')
+ for xid in traverse_obj(response, ('videos', 'edges', ..., 'node', 'xid')):
+ yield self.url_result(f'https://www.dailymotion.com/video/{xid}', DailymotionIE, xid)
+
+ def _real_extract(self, url):
+ term = urllib.parse.unquote_plus(self._match_id(url))
+ return self.playlist_result(
+ OnDemandPagedList(functools.partial(self._fetch_page, term), self._PAGE_SIZE), term, term)
+
+
+class DailymotionUserIE(DailymotionPlaylistBaseIE):
+ IE_NAME = 'dailymotion:user'
+ _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist|search)/)(?:(?:old/)?user/)?(?P<id>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'https://www.dailymotion.com/user/nqtv',
+ 'info_dict': {
+ 'id': 'nqtv',
+ },
+ 'playlist_mincount': 152,
+ }, {
+ 'url': 'http://www.dailymotion.com/user/UnderProject',
+ 'info_dict': {
+ 'id': 'UnderProject',
+ },
+ 'playlist_mincount': 1000,
+ 'skip': 'Takes too long time',
+ }, {
+ 'url': 'https://www.dailymotion.com/user/nqtv',
+ 'info_dict': {
+ 'id': 'nqtv',
+ },
+ 'playlist_mincount': 148,
+ 'params': {
+ 'age_limit': 0,
+ },
+ }]
+ _OBJECT_TYPE = 'channel'
diff --git a/yt_dlp/extractor/dailywire.py b/yt_dlp/extractor/dailywire.py
new file mode 100644
index 0000000..f177c9d
--- /dev/null
+++ b/yt_dlp/extractor/dailywire.py
@@ -0,0 +1,113 @@
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ float_or_none,
+ join_nonempty,
+ traverse_obj,
+ url_or_none,
+)
+
+
+class DailyWireBaseIE(InfoExtractor):
+ _JSON_PATH = {
+ 'episode': ('props', 'pageProps', 'episodeData', 'episode'),
+ 'videos': ('props', 'pageProps', 'videoData', 'video'),
+ 'podcasts': ('props', 'pageProps', 'episode'),
+ }
+
+ def _get_json(self, url):
+ sites_type, slug = self._match_valid_url(url).group('sites_type', 'id')
+ json_data = self._search_nextjs_data(self._download_webpage(url, slug), slug)
+ return slug, traverse_obj(json_data, self._JSON_PATH[sites_type])
+
+
+class DailyWireIE(DailyWireBaseIE):
+ _VALID_URL = r'https?://(?:www\.)dailywire(?:\.com)/(?P<sites_type>episode|videos)/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://www.dailywire.com/episode/1-fauci',
+ 'info_dict': {
+ 'id': 'ckzsl50xnqpy30850in3v4bu7',
+ 'ext': 'mp4',
+ 'display_id': '1-fauci',
+ 'title': '1. Fauci',
+ 'description': 'md5:9df630347ef85081b7e97dd30bc22853',
+ 'thumbnail': 'https://daily-wire-production.imgix.net/episodes/ckzsl50xnqpy30850in3v4bu7/ckzsl50xnqpy30850in3v4bu7-1648237399554.jpg',
+ 'creator': 'Caroline Roberts',
+ 'series_id': 'ckzplm0a097fn0826r2vc3j7h',
+ 'series': 'China: The Enemy Within',
+ }
+ }, {
+ 'url': 'https://www.dailywire.com/episode/ep-124-bill-maher',
+ 'info_dict': {
+ 'id': 'cl0ngbaalplc80894sfdo9edf',
+ 'ext': 'mp3',
+ 'display_id': 'ep-124-bill-maher',
+ 'title': 'Ep. 124 - Bill Maher',
+ 'thumbnail': 'https://daily-wire-production.imgix.net/episodes/cl0ngbaalplc80894sfdo9edf/cl0ngbaalplc80894sfdo9edf-1647065568518.jpg',
+ 'creator': 'Caroline Roberts',
+ 'description': 'md5:adb0de584bcfa9c41374999d9e324e98',
+ 'series_id': 'cjzvep7270hp00786l9hwccob',
+ 'series': 'The Sunday Special',
+ }
+ }, {
+ 'url': 'https://www.dailywire.com/videos/the-hyperions',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ slug, episode_info = self._get_json(url)
+ urls = traverse_obj(
+ episode_info, (('segments', 'videoUrl'), ..., ('video', 'audio')), expected_type=url_or_none)
+
+ formats, subtitles = [], {}
+ for url in urls:
+ if determine_ext(url) != 'm3u8':
+ formats.append({'url': url})
+ continue
+ format_, subs_ = self._extract_m3u8_formats_and_subtitles(url, slug)
+ formats.extend(format_)
+ self._merge_subtitles(subs_, target=subtitles)
+ return {
+ 'id': episode_info['id'],
+ 'display_id': slug,
+ 'title': traverse_obj(episode_info, 'title', 'name'),
+ 'description': episode_info.get('description'),
+ 'creator': join_nonempty(('createdBy', 'firstName'), ('createdBy', 'lastName'), from_dict=episode_info, delim=' '),
+ 'duration': float_or_none(episode_info.get('duration')),
+ 'is_live': episode_info.get('isLive'),
+ 'thumbnail': traverse_obj(episode_info, 'thumbnail', 'image', expected_type=url_or_none),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'series_id': traverse_obj(episode_info, ('show', 'id')),
+ 'series': traverse_obj(episode_info, ('show', 'name')),
+ }
+
+
+class DailyWirePodcastIE(DailyWireBaseIE):
+ _VALID_URL = r'https?://(?:www\.)dailywire(?:\.com)/(?P<sites_type>podcasts)/(?P<podcaster>[\w-]+/(?P<id>[\w-]+))'
+ _TESTS = [{
+ 'url': 'https://www.dailywire.com/podcasts/morning-wire/get-ready-for-recession-6-15-22',
+ 'info_dict': {
+ 'id': 'cl4f01d0w8pbe0a98ydd0cfn1',
+ 'ext': 'm4a',
+ 'display_id': 'get-ready-for-recession-6-15-22',
+ 'title': 'Get Ready for Recession | 6.15.22',
+ 'description': 'md5:c4afbadda4e1c38a4496f6d62be55634',
+ 'thumbnail': 'https://daily-wire-production.imgix.net/podcasts/ckx4otgd71jm508699tzb6hf4-1639506575562.jpg',
+ 'duration': 900.117667,
+ }
+ }]
+
+ def _real_extract(self, url):
+ slug, episode_info = self._get_json(url)
+ audio_id = traverse_obj(episode_info, 'audioMuxPlaybackId', 'VUsAipTrBVSgzw73SpC2DAJD401TYYwEp')
+
+ return {
+ 'id': episode_info['id'],
+ 'url': f'https://stream.media.dailywire.com/{audio_id}/audio.m4a',
+ 'display_id': slug,
+ 'title': episode_info.get('title'),
+ 'duration': float_or_none(episode_info.get('duration')),
+ 'thumbnail': episode_info.get('thumbnail'),
+ 'description': episode_info.get('description'),
+ }
diff --git a/yt_dlp/extractor/damtomo.py b/yt_dlp/extractor/damtomo.py
new file mode 100644
index 0000000..5e14d6a
--- /dev/null
+++ b/yt_dlp/extractor/damtomo.py
@@ -0,0 +1,108 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import ExtractorError, clean_html, int_or_none, try_get, unified_strdate
+from ..compat import compat_str
+
+
+class DamtomoBaseIE(InfoExtractor):
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage, handle = self._download_webpage_handle(self._WEBPAGE_URL_TMPL % video_id, video_id, encoding='sjis')
+
+ if handle.url == 'https://www.clubdam.com/sorry/':
+ raise ExtractorError('You are rate-limited. Try again later.', expected=True)
+ if '<h2>予期せぬエラーが発生しました。</h2>' in webpage:
+ raise ExtractorError('There is an error on server-side. Try again later.', expected=True)
+
+ description = self._search_regex(r'(?m)<div id="public_comment">\s*<p>\s*([^<]*?)\s*</p>', webpage, 'description', default=None)
+ uploader_id = self._search_regex(r'<a href="https://www\.clubdam\.com/app/damtomo/member/info/Profile\.do\?damtomoId=([^"]+)"', webpage, 'uploader_id', default=None)
+
+ data_dict = {
+ mobj.group('class'): re.sub(r'\s+', ' ', clean_html(mobj.group('value')))
+ for mobj in re.finditer(r'(?s)<(p|div)\s+class="(?P<class>[^" ]+?)">(?P<value>.+?)</\1>', webpage)}
+
+ # since videos do not have title, give the name of song instead
+ data_dict['user_name'] = re.sub(r'\s*さん\s*$', '', data_dict['user_name'])
+ title = data_dict.get('song_title')
+
+ stream_tree = self._download_xml(
+ self._DKML_XML_URL % video_id, video_id, note='Requesting stream information', encoding='sjis',
+ # doing this has no problem since there is no character outside ASCII,
+ # and never likely to happen in the future
+ transform_source=lambda x: re.sub(r'\s*encoding="[^"]+?"', '', x))
+ m3u8_url = try_get(stream_tree, lambda x: x.find(
+ './/d:streamingUrl', {'d': self._DKML_XML_NS}).text.strip(), compat_str)
+ if not m3u8_url:
+ raise ExtractorError('Failed to obtain m3u8 URL')
+ formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'uploader_id': uploader_id,
+ 'description': description,
+ 'uploader': data_dict.get('user_name'),
+ 'upload_date': unified_strdate(self._search_regex(r'(\d{4}/\d{2}/\d{2})', data_dict.get('date'), 'upload_date', default=None)),
+ 'view_count': int_or_none(self._search_regex(r'(\d+)', data_dict['audience'], 'view_count', default=None)),
+ 'like_count': int_or_none(self._search_regex(r'(\d+)', data_dict['nice'], 'like_count', default=None)),
+ 'track': title,
+ 'artist': data_dict.get('song_artist'),
+ 'formats': formats,
+ }
+
+
+class DamtomoVideoIE(DamtomoBaseIE):
+ IE_NAME = 'damtomo:video'
+ _VALID_URL = r'https?://(?:www\.)?clubdam\.com/app/damtomo/(?:SP/)?karaokeMovie/StreamingDkm\.do\?karaokeMovieId=(?P<id>\d+)'
+ _WEBPAGE_URL_TMPL = 'https://www.clubdam.com/app/damtomo/karaokeMovie/StreamingDkm.do?karaokeMovieId=%s'
+ _DKML_XML_URL = 'https://www.clubdam.com/app/damtomo/karaokeMovie/GetStreamingDkmUrlXML.do?movieSelectFlg=2&karaokeMovieId=%s'
+ _DKML_XML_NS = 'https://www.clubdam.com/app/damtomo/karaokeMovie/GetStreamingDkmUrlXML'
+ _TESTS = [{
+ 'url': 'https://www.clubdam.com/app/damtomo/karaokeMovie/StreamingDkm.do?karaokeMovieId=2414316',
+ 'info_dict': {
+ 'id': '2414316',
+ 'title': 'Get Wild',
+ 'uploader': 'Kドロン',
+ 'uploader_id': 'ODk5NTQwMzQ',
+ 'track': 'Get Wild',
+ 'artist': 'TM NETWORK(TMN)',
+ 'upload_date': '20201226',
+ }
+ }]
+
+
+class DamtomoRecordIE(DamtomoBaseIE):
+ IE_NAME = 'damtomo:record'
+ _VALID_URL = r'https?://(?:www\.)?clubdam\.com/app/damtomo/(?:SP/)?karaokePost/StreamingKrk\.do\?karaokeContributeId=(?P<id>\d+)'
+ _WEBPAGE_URL_TMPL = 'https://www.clubdam.com/app/damtomo/karaokePost/StreamingKrk.do?karaokeContributeId=%s'
+ _DKML_XML_URL = 'https://www.clubdam.com/app/damtomo/karaokePost/GetStreamingKrkUrlXML.do?karaokeContributeId=%s'
+ _DKML_XML_NS = 'https://www.clubdam.com/app/damtomo/karaokePost/GetStreamingKrkUrlXML'
+ _TESTS = [{
+ 'url': 'https://www.clubdam.com/app/damtomo/karaokePost/StreamingKrk.do?karaokeContributeId=27376862',
+ 'info_dict': {
+ 'id': '27376862',
+ 'title': 'イカSUMMER [良音]',
+ 'uploader': 'NANA',
+ 'uploader_id': 'MzAyMDExNTY',
+ 'upload_date': '20210721',
+ 'view_count': 4,
+ 'like_count': 1,
+ 'track': 'イカSUMMER [良音]',
+ 'artist': 'ORANGE RANGE',
+ }
+ }, {
+ 'url': 'https://www.clubdam.com/app/damtomo/karaokePost/StreamingKrk.do?karaokeContributeId=27489418',
+ 'info_dict': {
+ 'id': '27489418',
+ 'title': '心みだれて〜say it with flowers〜(生音)',
+ 'uploader_id': 'NjI1MjI2MjU',
+ 'description': 'やっぱりキーを下げて正解だった感じ。リベンジ成功ということで。',
+ 'uploader': '箱の「中の人」',
+ 'upload_date': '20210815',
+ 'view_count': 5,
+ 'like_count': 3,
+ 'track': '心みだれて〜say it with flowers〜(生音)',
+ 'artist': '小林明子',
+ }
+ }]
diff --git a/yt_dlp/extractor/daum.py b/yt_dlp/extractor/daum.py
new file mode 100644
index 0000000..24c5208
--- /dev/null
+++ b/yt_dlp/extractor/daum.py
@@ -0,0 +1,258 @@
+import itertools
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_urllib_parse_unquote,
+)
+from ..utils import parse_qs
+
+
+class DaumBaseIE(InfoExtractor):
+ _KAKAO_EMBED_BASE = 'http://tv.kakao.com/embed/player/cliplink/'
+
+
+class DaumIE(DaumBaseIE):
+ _VALID_URL = r'https?://(?:(?:m\.)?tvpot\.daum\.net/v/|videofarm\.daum\.net/controller/player/VodPlayer\.swf\?vid=)(?P<id>[^?#&]+)'
+ IE_NAME = 'daum.net'
+
+ _TESTS = [{
+ 'url': 'http://tvpot.daum.net/v/vab4dyeDBysyBssyukBUjBz',
+ 'info_dict': {
+ 'id': 'vab4dyeDBysyBssyukBUjBz',
+ 'ext': 'mp4',
+ 'title': '마크 헌트 vs 안토니오 실바',
+ 'description': 'Mark Hunt vs Antonio Silva',
+ 'upload_date': '20131217',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ 'duration': 2117,
+ 'view_count': int,
+ 'comment_count': int,
+ 'uploader_id': '186139',
+ 'uploader': '콘간지',
+ 'timestamp': 1387310323,
+ },
+ }, {
+ 'url': 'http://m.tvpot.daum.net/v/65139429',
+ 'info_dict': {
+ 'id': '65139429',
+ 'ext': 'mp4',
+ 'title': '1297회, \'아빠 아들로 태어나길 잘 했어\' 민수, 감동의 눈물[아빠 어디가] 20150118',
+ 'description': 'md5:79794514261164ff27e36a21ad229fc5',
+ 'upload_date': '20150118',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ 'duration': 154,
+ 'view_count': int,
+ 'comment_count': int,
+ 'uploader': 'MBC 예능',
+ 'uploader_id': '132251',
+ 'timestamp': 1421604228,
+ },
+ }, {
+ 'url': 'http://tvpot.daum.net/v/07dXWRka62Y%24',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://videofarm.daum.net/controller/player/VodPlayer.swf?vid=vwIpVpCQsT8%24&ref=',
+ 'info_dict': {
+ 'id': 'vwIpVpCQsT8$',
+ 'ext': 'flv',
+ 'title': '01-Korean War ( Trouble on the horizon )',
+ 'description': 'Korean War 01\r\nTrouble on the horizon\r\n전쟁의 먹구름',
+ 'upload_date': '20080223',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ 'duration': 249,
+ 'view_count': int,
+ 'comment_count': int,
+ 'uploader': '까칠한 墮落始祖 황비홍님의',
+ 'uploader_id': '560824',
+ 'timestamp': 1203770745,
+ },
+ }, {
+ # Requires dte_type=WEB (#9972)
+ 'url': 'http://tvpot.daum.net/v/s3794Uf1NZeZ1qMpGpeqeRU',
+ 'md5': 'a8917742069a4dd442516b86e7d66529',
+ 'info_dict': {
+ 'id': 's3794Uf1NZeZ1qMpGpeqeRU',
+ 'ext': 'mp4',
+ 'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)',
+ 'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\r\n\r\n[쇼! 음악중심] 20160611, 507회',
+ 'upload_date': '20170129',
+ 'uploader': '쇼! 음악중심',
+ 'uploader_id': '2653210',
+ 'timestamp': 1485684628,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = compat_urllib_parse_unquote(self._match_id(url))
+ if not video_id.isdigit():
+ video_id += '@my'
+ return self.url_result(
+ self._KAKAO_EMBED_BASE + video_id, 'Kakao', video_id)
+
+
+class DaumClipIE(DaumBaseIE):
+ _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/(?:clip/ClipView.(?:do|tv)|mypot/View.do)\?.*?clipid=(?P<id>\d+)'
+ IE_NAME = 'daum.net:clip'
+ _URL_TEMPLATE = 'http://tvpot.daum.net/clip/ClipView.do?clipid=%s'
+
+ _TESTS = [{
+ 'url': 'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690',
+ 'info_dict': {
+ 'id': '52554690',
+ 'ext': 'mp4',
+ 'title': 'DOTA 2GETHER 시즌2 6회 - 2부',
+ 'description': 'DOTA 2GETHER 시즌2 6회 - 2부',
+ 'upload_date': '20130831',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ 'duration': 3868,
+ 'view_count': int,
+ 'uploader': 'GOMeXP',
+ 'uploader_id': '6667',
+ 'timestamp': 1377911092,
+ },
+ }, {
+ 'url': 'http://m.tvpot.daum.net/clip/ClipView.tv?clipid=54999425',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if DaumPlaylistIE.suitable(url) or DaumUserIE.suitable(url) else super(DaumClipIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self.url_result(
+ self._KAKAO_EMBED_BASE + video_id, 'Kakao', video_id)
+
+
+class DaumListIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
+ def _get_entries(self, list_id, list_id_type):
+ name = None
+ entries = []
+ for pagenum in itertools.count(1):
+ list_info = self._download_json(
+ 'http://tvpot.daum.net/mypot/json/GetClipInfo.do?size=48&init=true&order=date&page=%d&%s=%s' % (
+ pagenum, list_id_type, list_id), list_id, 'Downloading list info - %s' % pagenum)
+
+ entries.extend([
+ self.url_result(
+ 'http://tvpot.daum.net/v/%s' % clip['vid'])
+ for clip in list_info['clip_list']
+ ])
+
+ if not name:
+ name = list_info.get('playlist_bean', {}).get('name') or \
+ list_info.get('potInfo', {}).get('name')
+
+ if not list_info.get('has_more'):
+ break
+
+ return name, entries
+
+ def _check_clip(self, url, list_id):
+ query_dict = parse_qs(url)
+ if 'clipid' in query_dict:
+ clip_id = query_dict['clipid'][0]
+ if not self._yes_playlist(list_id, clip_id):
+ return self.url_result(DaumClipIE._URL_TEMPLATE % clip_id, 'DaumClip')
+
+
+class DaumPlaylistIE(DaumListIE):
+ _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/mypot/(?:View\.do|Top\.tv)\?.*?playlistid=(?P<id>[0-9]+)'
+ IE_NAME = 'daum.net:playlist'
+ _URL_TEMPLATE = 'http://tvpot.daum.net/mypot/View.do?playlistid=%s'
+
+ _TESTS = [{
+ 'note': 'Playlist url with clipid',
+ 'url': 'http://tvpot.daum.net/mypot/View.do?playlistid=6213966&clipid=73806844',
+ 'info_dict': {
+ 'id': '6213966',
+ 'title': 'Woorissica Official',
+ },
+ 'playlist_mincount': 181
+ }, {
+ 'note': 'Playlist url with clipid - noplaylist',
+ 'url': 'http://tvpot.daum.net/mypot/View.do?playlistid=6213966&clipid=73806844',
+ 'info_dict': {
+ 'id': '73806844',
+ 'ext': 'mp4',
+ 'title': '151017 Airport',
+ 'upload_date': '20160117',
+ },
+ 'params': {
+ 'noplaylist': True,
+ 'skip_download': True,
+ }
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if DaumUserIE.suitable(url) else super(DaumPlaylistIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ list_id = self._match_id(url)
+
+ clip_result = self._check_clip(url, list_id)
+ if clip_result:
+ return clip_result
+
+ name, entries = self._get_entries(list_id, 'playlistid')
+
+ return self.playlist_result(entries, list_id, name)
+
+
+class DaumUserIE(DaumListIE):
+ _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/mypot/(?:View|Top)\.(?:do|tv)\?.*?ownerid=(?P<id>[0-9a-zA-Z]+)'
+ IE_NAME = 'daum.net:user'
+
+ _TESTS = [{
+ 'url': 'http://tvpot.daum.net/mypot/View.do?ownerid=o2scDLIVbHc0',
+ 'info_dict': {
+ 'id': 'o2scDLIVbHc0',
+ 'title': '마이 리틀 텔레비전',
+ },
+ 'playlist_mincount': 213
+ }, {
+ 'url': 'http://tvpot.daum.net/mypot/View.do?ownerid=o2scDLIVbHc0&clipid=73801156',
+ 'info_dict': {
+ 'id': '73801156',
+ 'ext': 'mp4',
+ 'title': '[미공개] 김구라, 오만석이 부릅니다 \'오케피\' - 마이 리틀 텔레비전 20160116',
+ 'upload_date': '20160117',
+ 'description': 'md5:5e91d2d6747f53575badd24bd62b9f36'
+ },
+ 'params': {
+ 'noplaylist': True,
+ 'skip_download': True,
+ }
+ }, {
+ 'note': 'Playlist url has ownerid and playlistid, playlistid takes precedence',
+ 'url': 'http://tvpot.daum.net/mypot/View.do?ownerid=o2scDLIVbHc0&playlistid=6196631',
+ 'info_dict': {
+ 'id': '6196631',
+ 'title': '마이 리틀 텔레비전 - 20160109',
+ },
+ 'playlist_count': 11
+ }, {
+ 'url': 'http://tvpot.daum.net/mypot/Top.do?ownerid=o2scDLIVbHc0',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://m.tvpot.daum.net/mypot/Top.tv?ownerid=45x1okb1If50&playlistid=3569733',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ list_id = self._match_id(url)
+
+ clip_result = self._check_clip(url, list_id)
+ if clip_result:
+ return clip_result
+
+ query_dict = parse_qs(url)
+ if 'playlistid' in query_dict:
+ playlist_id = query_dict['playlistid'][0]
+ return self.url_result(DaumPlaylistIE._URL_TEMPLATE % playlist_id, 'DaumPlaylist')
+
+ name, entries = self._get_entries(list_id, 'ownerid')
+
+ return self.playlist_result(entries, list_id, name)
diff --git a/yt_dlp/extractor/daystar.py b/yt_dlp/extractor/daystar.py
new file mode 100644
index 0000000..ef3520a
--- /dev/null
+++ b/yt_dlp/extractor/daystar.py
@@ -0,0 +1,47 @@
+from .common import InfoExtractor
+from ..utils import js_to_json, urljoin
+
+
+class DaystarClipIE(InfoExtractor):
+ IE_NAME = 'daystar:clip'
+ _VALID_URL = r'https?://player\.daystar\.tv/(?P<id>\w+)'
+ _TESTS = [{
+ 'url': 'https://player.daystar.tv/0MTO2ITM',
+ 'info_dict': {
+ 'id': '0MTO2ITM',
+ 'ext': 'mp4',
+ 'title': 'The Dark World of COVID Pt. 1 | Aaron Siri',
+ 'description': 'a420d320dda734e5f29458df3606c5f4',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ src_iframe = self._search_regex(r'\<iframe[^>]+src="([^"]+)"', webpage, 'src iframe')
+ webpage_iframe = self._download_webpage(
+ src_iframe.replace('player.php', 'config2.php'), video_id, headers={'Referer': src_iframe})
+
+ sources = self._parse_json(self._search_regex(
+ r'sources\:\s*(\[.*?\])', webpage_iframe, 'm3u8 source'), video_id, transform_source=js_to_json)
+
+ formats, subtitles = [], {}
+ for source in sources:
+ file = source.get('file')
+ if file and source.get('type') == 'm3u8':
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ urljoin('https://www.lightcast.com/embed/', file),
+ video_id, 'mp4', fatal=False, headers={'Referer': src_iframe})
+ formats.extend(fmts)
+ subtitles = self._merge_subtitles(subtitles, subs)
+
+ return {
+ 'id': video_id,
+ 'title': self._html_search_meta(['og:title', 'twitter:title'], webpage),
+ 'description': self._html_search_meta(['og:description', 'twitter:description'], webpage),
+ 'thumbnail': self._search_regex(r'image:\s*"([^"]+)', webpage_iframe, 'thumbnail'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
diff --git a/yt_dlp/extractor/dbtv.py b/yt_dlp/extractor/dbtv.py
new file mode 100644
index 0000000..18be46f
--- /dev/null
+++ b/yt_dlp/extractor/dbtv.py
@@ -0,0 +1,47 @@
+from .common import InfoExtractor
+
+
+class DBTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?dagbladet\.no/video/(?:(?:embed|(?P<display_id>[^/]+))/)?(?P<id>[0-9A-Za-z_-]{11}|[a-zA-Z0-9]{8})'
+ _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dagbladet\.no/video/embed/(?:[0-9A-Za-z_-]{11}|[a-zA-Z0-9]{8}).*?)\1']
+ _TESTS = [{
+ 'url': 'https://www.dagbladet.no/video/PynxJnNWChE/',
+ 'md5': 'b8f850ba1860adbda668d367f9b77699',
+ 'info_dict': {
+ 'id': 'PynxJnNWChE',
+ 'ext': 'mp4',
+ 'title': 'Skulle teste ut fornøyelsespark, men kollegaen var bare opptatt av bikinikroppen',
+ 'description': 'md5:49cc8370e7d66e8a2ef15c3b4631fd3f',
+ 'thumbnail': r're:https?://.*\.jpg',
+ 'upload_date': '20160916',
+ 'duration': 69,
+ 'uploader_id': 'UCk5pvsyZJoYJBd7_oFPTlRQ',
+ 'uploader': 'Dagbladet',
+ },
+ 'add_ie': ['Youtube']
+ }, {
+ 'url': 'https://www.dagbladet.no/video/embed/xlGmyIeN9Jo/?autoplay=false',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.dagbladet.no/video/truer-iran-bor-passe-dere/PalfB2Cw',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id, video_id = self._match_valid_url(url).groups()
+ info = {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'display_id': display_id,
+ }
+ if len(video_id) == 11:
+ info.update({
+ 'url': video_id,
+ 'ie_key': 'Youtube',
+ })
+ else:
+ info.update({
+ 'url': 'jwplatform:' + video_id,
+ 'ie_key': 'JWPlatform',
+ })
+ return info
diff --git a/yt_dlp/extractor/dctp.py b/yt_dlp/extractor/dctp.py
new file mode 100644
index 0000000..24bb6ac
--- /dev/null
+++ b/yt_dlp/extractor/dctp.py
@@ -0,0 +1,102 @@
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ float_or_none,
+ int_or_none,
+ unified_timestamp,
+ url_or_none,
+)
+
+
+class DctpTvIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?dctp\.tv/(?:#/)?filme/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ # 4x3
+ 'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
+ 'md5': '3ffbd1556c3fe210724d7088fad723e3',
+ 'info_dict': {
+ 'id': '95eaa4f33dad413aa17b4ee613cccc6c',
+ 'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
+ 'ext': 'm4v',
+ 'title': 'Videoinstallation für eine Kaufhausfassade',
+ 'description': 'Kurzfilm',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 71.24,
+ 'timestamp': 1302172322,
+ 'upload_date': '20110407',
+ },
+ }, {
+ # 16x9
+ 'url': 'http://www.dctp.tv/filme/sind-youtuber-die-besseren-lehrer/',
+ 'only_matching': True,
+ }]
+
+ _BASE_URL = 'http://dctp-ivms2-restapi.s3.amazonaws.com'
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ version = self._download_json(
+ '%s/version.json' % self._BASE_URL, display_id,
+ 'Downloading version JSON')
+
+ restapi_base = '%s/%s/restapi' % (
+ self._BASE_URL, version['version_name'])
+
+ info = self._download_json(
+ '%s/slugs/%s.json' % (restapi_base, display_id), display_id,
+ 'Downloading video info JSON')
+
+ media = self._download_json(
+ '%s/media/%s.json' % (restapi_base, compat_str(info['object_id'])),
+ display_id, 'Downloading media JSON')
+
+ uuid = media['uuid']
+ title = media['title']
+ is_wide = media.get('is_wide')
+ formats = []
+
+ def add_formats(suffix):
+ templ = 'https://%%s/%s_dctp_%s.m4v' % (uuid, suffix)
+ formats.extend([{
+ 'format_id': 'hls-' + suffix,
+ 'url': templ % 'cdn-segments.dctp.tv' + '/playlist.m3u8',
+ 'protocol': 'm3u8_native',
+ }, {
+ 'format_id': 's3-' + suffix,
+ 'url': templ % 'completed-media.s3.amazonaws.com',
+ }, {
+ 'format_id': 'http-' + suffix,
+ 'url': templ % 'cdn-media.dctp.tv',
+ }])
+
+ add_formats('0500_' + ('16x9' if is_wide else '4x3'))
+ if is_wide:
+ add_formats('720p')
+
+ thumbnails = []
+ images = media.get('images')
+ if isinstance(images, list):
+ for image in images:
+ if not isinstance(image, dict):
+ continue
+ image_url = url_or_none(image.get('url'))
+ if not image_url:
+ continue
+ thumbnails.append({
+ 'url': image_url,
+ 'width': int_or_none(image.get('width')),
+ 'height': int_or_none(image.get('height')),
+ })
+
+ return {
+ 'id': uuid,
+ 'display_id': display_id,
+ 'title': title,
+ 'alt_title': media.get('subtitle'),
+ 'description': media.get('description') or media.get('teaser'),
+ 'timestamp': unified_timestamp(media.get('created')),
+ 'duration': float_or_none(media.get('duration_in_ms'), scale=1000),
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/deezer.py b/yt_dlp/extractor/deezer.py
new file mode 100644
index 0000000..f61f12a
--- /dev/null
+++ b/yt_dlp/extractor/deezer.py
@@ -0,0 +1,142 @@
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ orderedSet,
+)
+
+
+class DeezerBaseInfoExtractor(InfoExtractor):
+ def get_data(self, url):
+ if not self.get_param('test'):
+ self.report_warning('For now, this extractor only supports the 30 second previews. Patches welcome!')
+
+ mobj = self._match_valid_url(url)
+ data_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, data_id)
+ geoblocking_msg = self._html_search_regex(
+ r'<p class="soon-txt">(.*?)</p>', webpage, 'geoblocking message',
+ default=None)
+ if geoblocking_msg is not None:
+ raise ExtractorError(
+ 'Deezer said: %s' % geoblocking_msg, expected=True)
+
+ data_json = self._search_regex(
+ (r'__DZR_APP_STATE__\s*=\s*({.+?})\s*</script>',
+ r'naboo\.display\(\'[^\']+\',\s*(.*?)\);\n'),
+ webpage, 'data JSON')
+ data = json.loads(data_json)
+ return data_id, webpage, data
+
+
+class DeezerPlaylistIE(DeezerBaseInfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?deezer\.com/(../)?playlist/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://www.deezer.com/playlist/176747451',
+ 'info_dict': {
+ 'id': '176747451',
+ 'title': 'Best!',
+ 'uploader': 'anonymous',
+ 'thumbnail': r're:^https?://(e-)?cdns-images\.dzcdn\.net/images/cover/.*\.jpg$',
+ },
+ 'playlist_count': 29,
+ }
+
+ def _real_extract(self, url):
+ playlist_id, webpage, data = self.get_data(url)
+
+ playlist_title = data.get('DATA', {}).get('TITLE')
+ playlist_uploader = data.get('DATA', {}).get('PARENT_USERNAME')
+ playlist_thumbnail = self._search_regex(
+ r'<img id="naboo_playlist_image".*?src="([^"]+)"', webpage,
+ 'playlist thumbnail')
+
+ entries = []
+ for s in data.get('SONGS', {}).get('data'):
+ formats = [{
+ 'format_id': 'preview',
+ 'url': s.get('MEDIA', [{}])[0].get('HREF'),
+ 'preference': -100, # Only the first 30 seconds
+ 'ext': 'mp3',
+ }]
+ artists = ', '.join(
+ orderedSet(a.get('ART_NAME') for a in s.get('ARTISTS')))
+ entries.append({
+ 'id': s.get('SNG_ID'),
+ 'duration': int_or_none(s.get('DURATION')),
+ 'title': '%s - %s' % (artists, s.get('SNG_TITLE')),
+ 'uploader': s.get('ART_NAME'),
+ 'uploader_id': s.get('ART_ID'),
+ 'age_limit': 16 if s.get('EXPLICIT_LYRICS') == '1' else 0,
+ 'formats': formats,
+ })
+
+ return {
+ '_type': 'playlist',
+ 'id': playlist_id,
+ 'title': playlist_title,
+ 'uploader': playlist_uploader,
+ 'thumbnail': playlist_thumbnail,
+ 'entries': entries,
+ }
+
+
+class DeezerAlbumIE(DeezerBaseInfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?deezer\.com/(../)?album/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'https://www.deezer.com/fr/album/67505622',
+ 'info_dict': {
+ 'id': '67505622',
+ 'title': 'Last Week',
+ 'uploader': 'Home Brew',
+ 'thumbnail': r're:^https?://(e-)?cdns-images\.dzcdn\.net/images/cover/.*\.jpg$',
+ },
+ 'playlist_count': 7,
+ }
+
+ def _real_extract(self, url):
+ album_id, webpage, data = self.get_data(url)
+
+ album_title = data.get('DATA', {}).get('ALB_TITLE')
+ album_uploader = data.get('DATA', {}).get('ART_NAME')
+ album_thumbnail = self._search_regex(
+ r'<img id="naboo_album_image".*?src="([^"]+)"', webpage,
+ 'album thumbnail')
+
+ entries = []
+ for s in data.get('SONGS', {}).get('data'):
+ formats = [{
+ 'format_id': 'preview',
+ 'url': s.get('MEDIA', [{}])[0].get('HREF'),
+ 'preference': -100, # Only the first 30 seconds
+ 'ext': 'mp3',
+ }]
+ artists = ', '.join(
+ orderedSet(a.get('ART_NAME') for a in s.get('ARTISTS')))
+ entries.append({
+ 'id': s.get('SNG_ID'),
+ 'duration': int_or_none(s.get('DURATION')),
+ 'title': '%s - %s' % (artists, s.get('SNG_TITLE')),
+ 'uploader': s.get('ART_NAME'),
+ 'uploader_id': s.get('ART_ID'),
+ 'age_limit': 16 if s.get('EXPLICIT_LYRICS') == '1' else 0,
+ 'formats': formats,
+ 'track': s.get('SNG_TITLE'),
+ 'track_number': int_or_none(s.get('TRACK_NUMBER')),
+ 'track_id': s.get('SNG_ID'),
+ 'artist': album_uploader,
+ 'album': album_title,
+ 'album_artist': album_uploader,
+ })
+
+ return {
+ '_type': 'playlist',
+ 'id': album_id,
+ 'title': album_title,
+ 'uploader': album_uploader,
+ 'thumbnail': album_thumbnail,
+ 'entries': entries,
+ }
diff --git a/yt_dlp/extractor/democracynow.py b/yt_dlp/extractor/democracynow.py
new file mode 100644
index 0000000..1624d08
--- /dev/null
+++ b/yt_dlp/extractor/democracynow.py
@@ -0,0 +1,91 @@
+import re
+import os.path
+
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+from ..utils import (
+ url_basename,
+ remove_start,
+)
+
+
+class DemocracynowIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?democracynow\.org/(?P<id>[^\?]*)'
+ IE_NAME = 'democracynow'
+ _TESTS = [{
+ 'url': 'http://www.democracynow.org/shows/2015/7/3',
+ 'md5': '3757c182d3d84da68f5c8f506c18c196',
+ 'info_dict': {
+ 'id': '2015-0703-001',
+ 'ext': 'mp4',
+ 'title': 'Daily Show for July 03, 2015',
+ 'description': 'md5:80eb927244d6749900de6072c7cc2c86',
+ },
+ }, {
+ 'url': 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree',
+ 'info_dict': {
+ 'id': '2015-0703-001',
+ 'ext': 'mp4',
+ 'title': '"This Flag Comes Down Today": Bree Newsome Scales SC Capitol Flagpole, Takes Down Confederate Flag',
+ 'description': 'md5:4d2bc4f0d29f5553c2210a4bc7761a21',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ json_data = self._parse_json(self._search_regex(
+ r'<script[^>]+type="text/json"[^>]*>\s*({[^>]+})', webpage, 'json'),
+ display_id)
+
+ title = json_data['title']
+ formats = []
+
+ video_id = None
+
+ for key in ('file', 'audio', 'video', 'high_res_video'):
+ media_url = json_data.get(key, '')
+ if not media_url:
+ continue
+ media_url = re.sub(r'\?.*', '', compat_urlparse.urljoin(url, media_url))
+ video_id = video_id or remove_start(os.path.splitext(url_basename(media_url))[0], 'dn')
+ formats.append({
+ 'url': media_url,
+ 'vcodec': 'none' if key == 'audio' else None,
+ })
+
+ default_lang = 'en'
+ subtitles = {}
+
+ def add_subtitle_item(lang, info_dict):
+ if lang not in subtitles:
+ subtitles[lang] = []
+ subtitles[lang].append(info_dict)
+
+ # chapter_file are not subtitles
+ if 'caption_file' in json_data:
+ add_subtitle_item(default_lang, {
+ 'url': compat_urlparse.urljoin(url, json_data['caption_file']),
+ })
+
+ for subtitle_item in json_data.get('captions', []):
+ lang = subtitle_item.get('language', '').lower() or default_lang
+ add_subtitle_item(lang, {
+ 'url': compat_urlparse.urljoin(url, subtitle_item['url']),
+ })
+
+ description = self._og_search_description(webpage, default=None)
+
+ return {
+ 'id': video_id or display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': json_data.get('image'),
+ 'subtitles': subtitles,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/detik.py b/yt_dlp/extractor/detik.py
new file mode 100644
index 0000000..f148054
--- /dev/null
+++ b/yt_dlp/extractor/detik.py
@@ -0,0 +1,159 @@
+from .common import InfoExtractor
+from ..utils import int_or_none, merge_dicts, try_call, url_basename
+
+
+class DetikEmbedIE(InfoExtractor):
+ _VALID_URL = False
+ _WEBPAGE_TESTS = [{
+ # cnn embed
+ 'url': 'https://www.cnnindonesia.com/embed/video/846189',
+ 'info_dict': {
+ 'id': '846189',
+ 'ext': 'mp4',
+ 'description': 'md5:ece7b003b3ee7d81c6a5cfede7d5397d',
+ 'thumbnail': r're:https?://akcdn\.detik\.net\.id/visual/2022/09/11/thumbnail-video-1_169.jpeg',
+ 'title': 'Video CNN Indonesia - VIDEO: Momen Charles Disambut Meriah usai Dilantik jadi Raja Inggris',
+ 'age_limit': 0,
+ 'tags': ['raja charles', ' raja charles iii', ' ratu elizabeth', ' ratu elizabeth meninggal dunia', ' raja inggris', ' inggris'],
+ 'release_timestamp': 1662869995,
+ 'release_date': '20220911',
+ 'uploader': 'REUTERS'
+ }
+ }, {
+ # 20.detik
+ 'url': 'https://20.detik.com/otobuzz/20220704-220704093/mulai-rp-10-jutaan-ini-skema-kredit-mitsubishi-pajero-sport',
+ 'info_dict': {
+ 'display_id': 'mulai-rp-10-jutaan-ini-skema-kredit-mitsubishi-pajero-sport',
+ 'id': '220704093',
+ 'ext': 'mp4',
+ 'description': 'md5:9b2257341b6f375cdcf90106146d5ffb',
+ 'thumbnail': r're:https?://cdnv\.detik\.com/videoservice/AdminTV/2022/07/04/5d6187e402ec4a91877755a5886ff5b6-20220704161859-0s.jpg',
+ 'title': 'Mulai Rp 10 Jutaan! Ini Skema Kredit Mitsubishi Pajero Sport',
+ 'timestamp': 1656951521,
+ 'upload_date': '20220704',
+ 'duration': 83.0,
+ 'tags': ['cicilan mobil', 'mitsubishi pajero sport', 'mitsubishi', 'pajero sport'],
+ 'release_timestamp': 1656926321,
+ 'release_date': '20220704',
+ 'age_limit': 0,
+ 'uploader': 'Ridwan Arifin ' # TODO: strip trailling whitespace at uploader
+ }
+ }, {
+ # pasangmata.detik
+ 'url': 'https://pasangmata.detik.com/contribution/366649',
+ 'info_dict': {
+ 'id': '366649',
+ 'ext': 'mp4',
+ 'title': 'Saling Dorong Aparat dan Pendemo di Aksi Tolak Kenaikan BBM',
+ 'description': 'md5:7a6580876c8381c454679e028620bea7',
+ 'age_limit': 0,
+ 'tags': 'count:17',
+ 'thumbnail': 'https://akcdn.detik.net.id/community/data/media/thumbs-pasangmata/2022/09/08/366649-16626229351533009620.mp4-03.jpg',
+ }
+ }, {
+ # insertlive embed
+ 'url': 'https://www.insertlive.com/embed/video/290482',
+ 'info_dict': {
+ 'id': '290482',
+ 'ext': 'mp4',
+ 'release_timestamp': 1663063704,
+ 'thumbnail': 'https://akcdn.detik.net.id/visual/2022/09/13/leonardo-dicaprio_169.png?w=600&q=90',
+ 'age_limit': 0,
+ 'description': 'Aktor Leonardo DiCaprio memang baru saja putus dari kekasihnya yang bernama Camilla Morrone.',
+ 'release_date': '20220913',
+ 'title': 'Diincar Leonardo DiCaprio, Gigi Hadid Ngaku Tertarik Tapi Belum Cinta',
+ 'tags': ['leonardo dicaprio', ' gigi hadid', ' hollywood'],
+ 'uploader': '!nsertlive',
+ }
+ }, {
+ # beautynesia embed
+ 'url': 'https://www.beautynesia.id/embed/video/261636',
+ 'info_dict': {
+ 'id': '261636',
+ 'ext': 'mp4',
+ 'age_limit': 0,
+ 'release_timestamp': 1662375600,
+ 'description': 'Menurut ramalan astrologi, tiga zodiak ini bakal hoki sepanjang September 2022.',
+ 'title': '3 Zodiak Paling Beruntung Selama September 2022',
+ 'release_date': '20220905',
+ 'tags': ['zodiac update', ' zodiak', ' ramalan bintang', ' zodiak beruntung 2022', ' zodiak hoki september 2022', ' zodiak beruntung september 2022'],
+ 'thumbnail': 'https://akcdn.detik.net.id/visual/2022/09/05/3-zodiak-paling-beruntung-selama-september-2022_169.jpeg?w=600&q=90',
+ 'uploader': 'amh',
+ }
+ }, {
+ # cnbcindonesia embed
+ 'url': 'https://www.cnbcindonesia.com/embed/video/371839',
+ 'info_dict': {
+ 'id': '371839',
+ 'ext': 'mp4',
+ 'title': 'Puluhan Pejabat Rusia Tuntut Putin Mundur',
+ 'tags': ['putin'],
+ 'age_limit': 0,
+ 'thumbnail': 'https://awsimages.detik.net.id/visual/2022/09/13/cnbc-indonesia-tv-3_169.png?w=600&q=80',
+ 'description': 'md5:8b9111e37555fcd95fe549a9b4ae6fdc',
+ }
+ }, {
+ # detik shortlink (we can get it from https://dtk.id/?<url>)
+ 'url': 'https://dtk.id/NkISKr',
+ 'info_dict': {
+ 'id': '220914049',
+ 'ext': 'mp4',
+ 'release_timestamp': 1663114488,
+ 'uploader': 'Tim 20Detik',
+ 'title': 'Pakar Bicara soal Tim Khusus Jokowi dan Mereka yang Pro ke Bjorka',
+ 'age_limit': 0,
+ 'thumbnail': 'https://cdnv.detik.com/videoservice/AdminTV/2022/09/14/f15cae71d7b640c58e75b254ecbb1ce1-20220914071613-0s.jpg?w=400&q=80',
+ 'display_id': 'pakar-bicara-soal-tim-khusus-jokowi-dan-mereka-yang-pro-ke-bjorka',
+ 'upload_date': '20220914',
+ 'release_date': '20220914',
+ 'description': 'md5:5eb03225f7ee40207dd3a1e18a73f1ff',
+ 'timestamp': 1663139688,
+ 'duration': 213.0,
+ 'tags': ['hacker bjorka', 'bjorka', 'hacker bjorka bocorkan data rahasia presiden jokowi', 'jokowi'],
+ }
+ }]
+
+ def _extract_from_webpage(self, url, webpage):
+ player_type, video_data = self._search_regex(
+ r'<script\s*[^>]+src="https?://(aws)?cdn\.detik\.net\.id/(?P<type>flowplayer|detikVideo)[^>]+>\s*(?P<video_data>{[^}]+})',
+ webpage, 'playerjs', group=('type', 'video_data'), default=(None, ''))
+ if not player_type:
+ return
+
+ display_id, extra_info_dict = url_basename(url), {}
+
+ if player_type == 'flowplayer':
+ video_json_data = self._parse_json(video_data.replace('\'', '"'), display_id)
+ video_url = video_json_data['videoUrl']
+
+ extra_info_dict = {
+ 'id': self._search_regex(r'identifier\s*:\s*\'([^\']+)', webpage, 'identifier'),
+ 'thumbnail': video_json_data.get('imageUrl'),
+ }
+
+ elif player_type == 'detikVideo':
+ video_url = self._search_regex(
+ r'videoUrl\s*:\s*[\'"]?([^"\']+)', video_data, 'videoUrl')
+ extra_info_dict = {
+ 'id': self._html_search_meta(['video_id', 'dtk:video_id'], webpage),
+ 'thumbnail': self._search_regex(r'imageUrl\s*:\s*[\'"]?([^"\']+)', video_data, 'videoUrl'),
+ 'duration': int_or_none(self._html_search_meta('duration', webpage, fatal=False, default=None)),
+ 'release_timestamp': int_or_none(self._html_search_meta('dtk:publishdateunix', webpage, fatal=False, default=None), 1000),
+ 'timestamp': int_or_none(self._html_search_meta('dtk:createdateunix', webpage, fatal=False, default=None), 1000),
+ 'uploader': self._search_regex(
+ r'([^-]+)', self._html_search_meta('dtk:author', webpage, default='').strip(), 'uploader',
+ default=None)
+ }
+
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_url, display_id)
+
+ json_ld_data = self._search_json_ld(webpage, display_id, default={})
+ yield merge_dicts(json_ld_data, extra_info_dict, {
+ 'display_id': display_id,
+ 'title': self._html_search_meta(['og:title', 'originalTitle'], webpage) or self._html_extract_title(webpage),
+ 'description': self._html_search_meta(['og:description', 'twitter:description', 'description'], webpage),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'tags': try_call(lambda: self._html_search_meta(
+ ['keywords', 'keyword', 'dtk:keywords'], webpage).split(',')),
+ })
diff --git a/yt_dlp/extractor/deuxm.py b/yt_dlp/extractor/deuxm.py
new file mode 100644
index 0000000..74a6da6
--- /dev/null
+++ b/yt_dlp/extractor/deuxm.py
@@ -0,0 +1,76 @@
+from .common import InfoExtractor
+from ..utils import url_or_none
+
+
+class DeuxMIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?2m\.ma/[^/]+/replay/single/(?P<id>([\w.]{1,24})+)'
+
+ _TESTS = [{
+ 'url': 'https://2m.ma/fr/replay/single/6351d439b15e1a613b3debe8',
+ 'md5': '5f761f04c9d686e553b685134dca5d32',
+ 'info_dict': {
+ 'id': '6351d439b15e1a613b3debe8',
+ 'ext': 'mp4',
+ 'title': 'Grand Angle : Jeudi 20 Octobre 2022',
+ 'thumbnail': r're:^https?://2msoread-ww.amagi.tv/mediasfiles/videos/images/.*\.png$'
+ }
+ }, {
+ 'url': 'https://2m.ma/fr/replay/single/635c0aeab4eec832622356da',
+ 'md5': 'ad6af2f5e4d5b2ad2194a84b6e890b4c',
+ 'info_dict': {
+ 'id': '635c0aeab4eec832622356da',
+ 'ext': 'mp4',
+ 'title': 'Journal Amazigh : Vendredi 28 Octobre 2022',
+ 'thumbnail': r're:^https?://2msoread-ww.amagi.tv/mediasfiles/videos/images/.*\.png$'
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video = self._download_json(
+ f'https://2m.ma/api/watchDetail/{video_id}', video_id)['response']['News']
+ return {
+ 'id': video_id,
+ 'title': video.get('titre'),
+ 'url': video['url'],
+ 'description': video.get('description'),
+ 'thumbnail': url_or_none(video.get('image')),
+ }
+
+
+class DeuxMNewsIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?2m\.ma/(?P<lang>\w+)/news/(?P<id>[^/#?]+)'
+
+ _TESTS = [{
+ 'url': 'https://2m.ma/fr/news/Kan-Ya-Mkan-d%C3%A9poussi%C3%A8re-l-histoire-du-phare-du-Cap-Beddouza-20221028',
+ 'md5': '43d5e693a53fa0b71e8a5204c7d4542a',
+ 'info_dict': {
+ 'id': '635c5d1233b83834e35b282e',
+ 'ext': 'mp4',
+ 'title': 'Kan Ya Mkan d\u00e9poussi\u00e8re l\u2019histoire du phare du Cap Beddouza',
+ 'description': 'md5:99dcf29b82f1d7f2a4acafed1d487527',
+ 'thumbnail': r're:^https?://2msoread-ww.amagi.tv/mediasfiles/videos/images/.*\.png$'
+ }
+ }, {
+ 'url': 'https://2m.ma/fr/news/Interview-Casablanca-hors-des-sentiers-battus-avec-Abderrahim-KASSOU-Replay--20221017',
+ 'md5': '7aca29f02230945ef635eb8290283c0c',
+ 'info_dict': {
+ 'id': '634d9e108b70d40bc51a844b',
+ 'ext': 'mp4',
+ 'title': 'Interview: Casablanca hors des sentiers battus avec Abderrahim KASSOU (Replay) ',
+ 'description': 'md5:3b8e78111de9fcc6ef7f7dd6cff2430c',
+ 'thumbnail': r're:^https?://2msoread-ww.amagi.tv/mediasfiles/videos/images/.*\.png$'
+ }
+ }]
+
+ def _real_extract(self, url):
+ article_name, lang = self._match_valid_url(url).group('id', 'lang')
+ video = self._download_json(
+ f'https://2m.ma/api/articlesByUrl?lang={lang}&url=/news/{article_name}', article_name)['response']['article'][0]
+ return {
+ 'id': video['id'],
+ 'title': video.get('title'),
+ 'url': video['image'][0],
+ 'description': video.get('content'),
+ 'thumbnail': url_or_none(video.get('cover')),
+ }
diff --git a/yt_dlp/extractor/dfb.py b/yt_dlp/extractor/dfb.py
new file mode 100644
index 0000000..c4fb5c2
--- /dev/null
+++ b/yt_dlp/extractor/dfb.py
@@ -0,0 +1,52 @@
+from .common import InfoExtractor
+from ..utils import unified_strdate
+
+
+class DFBIE(InfoExtractor):
+ IE_NAME = 'tv.dfb.de'
+ _VALID_URL = r'https?://tv\.dfb\.de/video/(?P<display_id>[^/]+)/(?P<id>\d+)'
+
+ _TEST = {
+ 'url': 'http://tv.dfb.de/video/u-19-em-stimmen-zum-spiel-gegen-russland/11633/',
+ 'md5': 'ac0f98a52a330f700b4b3034ad240649',
+ 'info_dict': {
+ 'id': '11633',
+ 'display_id': 'u-19-em-stimmen-zum-spiel-gegen-russland',
+ 'ext': 'mp4',
+ 'title': 'U 19-EM: Stimmen zum Spiel gegen Russland',
+ 'upload_date': '20150714',
+ },
+ }
+
+ def _real_extract(self, url):
+ display_id, video_id = self._match_valid_url(url).groups()
+
+ player_info = self._download_xml(
+ 'http://tv.dfb.de/server/hd_video.php?play=%s' % video_id,
+ display_id)
+ video_info = player_info.find('video')
+ stream_access_url = self._proto_relative_url(video_info.find('url').text.strip())
+
+ formats = []
+ # see http://tv.dfb.de/player/js/ajax.js for the method to extract m3u8 formats
+ for sa_url in (stream_access_url, stream_access_url + '&area=&format=iphone'):
+ stream_access_info = self._download_xml(sa_url, display_id)
+ token_el = stream_access_info.find('token')
+ manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth']
+ if '.f4m' in manifest_url:
+ formats.extend(self._extract_f4m_formats(
+ manifest_url + '&hdcore=3.2.0',
+ display_id, f4m_id='hds', fatal=False))
+ else:
+ formats.extend(self._extract_m3u8_formats(
+ manifest_url, display_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': video_info.find('title').text,
+ 'thumbnail': 'http://tv.dfb.de/images/%s_640x360.jpg' % video_id,
+ 'upload_date': unified_strdate(video_info.find('time_date').text),
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/dhm.py b/yt_dlp/extractor/dhm.py
new file mode 100644
index 0000000..a5f5f79
--- /dev/null
+++ b/yt_dlp/extractor/dhm.py
@@ -0,0 +1,58 @@
+from .common import InfoExtractor
+from ..utils import parse_duration
+
+
+class DHMIE(InfoExtractor):
+ _WORKING = False
+ IE_DESC = 'Filmarchiv - Deutsches Historisches Museum'
+ _VALID_URL = r'https?://(?:www\.)?dhm\.de/filmarchiv/(?:[^/]+/)+(?P<id>[^/]+)'
+
+ _TESTS = [{
+ 'url': 'http://www.dhm.de/filmarchiv/die-filme/the-marshallplan-at-work-in-west-germany/',
+ 'md5': '11c475f670209bf6acca0b2b7ef51827',
+ 'info_dict': {
+ 'id': 'the-marshallplan-at-work-in-west-germany',
+ 'ext': 'flv',
+ 'title': 'MARSHALL PLAN AT WORK IN WESTERN GERMANY, THE',
+ 'description': 'md5:1fabd480c153f97b07add61c44407c82',
+ 'duration': 660,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ }, {
+ 'url': 'http://www.dhm.de/filmarchiv/02-mapping-the-wall/peter-g/rolle-1/',
+ 'md5': '09890226332476a3e3f6f2cb74734aa5',
+ 'info_dict': {
+ 'id': 'rolle-1',
+ 'ext': 'flv',
+ 'title': 'ROLLE 1',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ playlist_url = self._search_regex(
+ r"file\s*:\s*'([^']+)'", webpage, 'playlist url')
+
+ entries = self._extract_xspf_playlist(playlist_url, playlist_id)
+
+ title = self._search_regex(
+ [r'dc:title="([^"]+)"', r'<title> &raquo;([^<]+)</title>'],
+ webpage, 'title').strip()
+ description = self._html_search_regex(
+ r'<p><strong>Description:</strong>(.+?)</p>',
+ webpage, 'description', default=None)
+ duration = parse_duration(self._search_regex(
+ r'<em>Length\s*</em>\s*:\s*</strong>([^<]+)',
+ webpage, 'duration', default=None))
+
+ entries[0].update({
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ })
+
+ return self.playlist_result(entries, playlist_id)
diff --git a/yt_dlp/extractor/digitalconcerthall.py b/yt_dlp/extractor/digitalconcerthall.py
new file mode 100644
index 0000000..c11cd79
--- /dev/null
+++ b/yt_dlp/extractor/digitalconcerthall.py
@@ -0,0 +1,150 @@
+from .common import InfoExtractor
+
+from ..utils import (
+ ExtractorError,
+ parse_resolution,
+ traverse_obj,
+ try_get,
+ urlencode_postdata,
+)
+
+
+class DigitalConcertHallIE(InfoExtractor):
+ IE_DESC = 'DigitalConcertHall extractor'
+ _VALID_URL = r'https?://(?:www\.)?digitalconcerthall\.com/(?P<language>[a-z]+)/(?P<type>film|concert)/(?P<id>[0-9]+)'
+ _OAUTH_URL = 'https://api.digitalconcerthall.com/v2/oauth2/token'
+ _ACCESS_TOKEN = None
+ _NETRC_MACHINE = 'digitalconcerthall'
+ _TESTS = [{
+ 'note': 'Playlist with only one video',
+ 'url': 'https://www.digitalconcerthall.com/en/concert/53201',
+ 'info_dict': {
+ 'id': '53201-1',
+ 'ext': 'mp4',
+ 'composer': 'Kurt Weill',
+ 'title': '[Magic Night]',
+ 'thumbnail': r're:^https?://images.digitalconcerthall.com/cms/thumbnails.*\.jpg$',
+ 'upload_date': '20210624',
+ 'timestamp': 1624548600,
+ 'duration': 2798,
+ 'album_artist': 'Members of the Berliner Philharmoniker / Simon Rössler',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'note': 'Concert with several works and an interview',
+ 'url': 'https://www.digitalconcerthall.com/en/concert/53785',
+ 'info_dict': {
+ 'id': '53785',
+ 'album_artist': 'Berliner Philharmoniker / Kirill Petrenko',
+ 'title': 'Kirill Petrenko conducts Mendelssohn and Shostakovich',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ 'playlist_count': 3,
+ }, {
+ 'url': 'https://www.digitalconcerthall.com/en/film/388',
+ 'info_dict': {
+ 'id': '388',
+ 'ext': 'mp4',
+ 'title': 'The Berliner Philharmoniker and Frank Peter Zimmermann',
+ 'description': 'md5:cfe25a7044fa4be13743e5089b5b5eb2',
+ 'thumbnail': r're:^https?://images.digitalconcerthall.com/cms/thumbnails.*\.jpg$',
+ 'upload_date': '20220714',
+ 'timestamp': 1657785600,
+ 'album_artist': 'Frank Peter Zimmermann / Benedikt von Bernstorff / Jakob von Bernstorff',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }]
+
+ def _perform_login(self, username, password):
+ token_response = self._download_json(
+ self._OAUTH_URL,
+ None, 'Obtaining token', errnote='Unable to obtain token', data=urlencode_postdata({
+ 'affiliate': 'none',
+ 'grant_type': 'device',
+ 'device_vendor': 'unknown',
+ 'app_id': 'dch.webapp',
+ 'app_version': '1.0.0',
+ 'client_secret': '2ySLN+2Fwb',
+ }), headers={
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ })
+ self._ACCESS_TOKEN = token_response['access_token']
+ try:
+ self._download_json(
+ self._OAUTH_URL,
+ None, note='Logging in', errnote='Unable to login', data=urlencode_postdata({
+ 'grant_type': 'password',
+ 'username': username,
+ 'password': password,
+ }), headers={
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ 'Referer': 'https://www.digitalconcerthall.com',
+ 'Authorization': f'Bearer {self._ACCESS_TOKEN}'
+ })
+ except ExtractorError:
+ self.raise_login_required(msg='Login info incorrect')
+
+ def _real_initialize(self):
+ if not self._ACCESS_TOKEN:
+ self.raise_login_required(method='password')
+
+ def _entries(self, items, language, type_, **kwargs):
+ for item in items:
+ video_id = item['id']
+ stream_info = self._download_json(
+ self._proto_relative_url(item['_links']['streams']['href']), video_id, headers={
+ 'Accept': 'application/json',
+ 'Authorization': f'Bearer {self._ACCESS_TOKEN}',
+ 'Accept-Language': language
+ })
+
+ m3u8_url = traverse_obj(
+ stream_info, ('channel', lambda k, _: k.startswith('vod_mixed'), 'stream', 0, 'url'), get_all=False)
+ formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', 'm3u8_native', fatal=False)
+
+ yield {
+ 'id': video_id,
+ 'title': item.get('title'),
+ 'composer': item.get('name_composer'),
+ 'url': m3u8_url,
+ 'formats': formats,
+ 'duration': item.get('duration_total'),
+ 'timestamp': traverse_obj(item, ('date', 'published')),
+ 'description': item.get('short_description') or stream_info.get('short_description'),
+ **kwargs,
+ 'chapters': [{
+ 'start_time': chapter.get('time'),
+ 'end_time': try_get(chapter, lambda x: x['time'] + x['duration']),
+ 'title': chapter.get('text'),
+ } for chapter in item['cuepoints']] if item.get('cuepoints') and type_ == 'concert' else None,
+ }
+
+ def _real_extract(self, url):
+ language, type_, video_id = self._match_valid_url(url).group('language', 'type', 'id')
+ if not language:
+ language = 'en'
+
+ thumbnail_url = self._html_search_regex(
+ r'(https?://images\.digitalconcerthall\.com/cms/thumbnails/.*\.jpg)',
+ self._download_webpage(url, video_id), 'thumbnail')
+ thumbnails = [{
+ 'url': thumbnail_url,
+ **parse_resolution(thumbnail_url)
+ }]
+
+ vid_info = self._download_json(
+ f'https://api.digitalconcerthall.com/v2/{type_}/{video_id}', video_id, headers={
+ 'Accept': 'application/json',
+ 'Accept-Language': language
+ })
+ album_artist = ' / '.join(traverse_obj(vid_info, ('_links', 'artist', ..., 'name')) or '')
+ videos = [vid_info] if type_ == 'film' else traverse_obj(vid_info, ('_embedded', ..., ...))
+
+ return {
+ '_type': 'playlist',
+ 'id': video_id,
+ 'title': vid_info.get('title'),
+ 'entries': self._entries(videos, language, thumbnails=thumbnails, album_artist=album_artist, type_=type_),
+ 'thumbnails': thumbnails,
+ 'album_artist': album_artist,
+ }
diff --git a/yt_dlp/extractor/digiteka.py b/yt_dlp/extractor/digiteka.py
new file mode 100644
index 0000000..912e33b
--- /dev/null
+++ b/yt_dlp/extractor/digiteka.py
@@ -0,0 +1,98 @@
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class DigitekaIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://(?:www\.)?(?:digiteka\.net|ultimedia\.com)/
+ (?:
+ deliver/
+ (?P<embed_type>
+ generic|
+ musique
+ )
+ (?:/[^/]+)*/
+ (?:
+ src|
+ article
+ )|
+ default/index/video
+ (?P<site_type>
+ generic|
+ music
+ )
+ /id
+ )/(?P<id>[\d+a-z]+)'''
+ _EMBED_REGEX = [r'<(?:iframe|script)[^>]+src=["\'](?P<url>(?:https?:)?//(?:www\.)?ultimedia\.com/deliver/(?:generic|musique)(?:/[^/]+)*/(?:src|article)/[\d+a-z]+)']
+ _TESTS = [{
+ # news
+ 'url': 'https://www.ultimedia.com/default/index/videogeneric/id/s8uk0r',
+ 'md5': '276a0e49de58c7e85d32b057837952a2',
+ 'info_dict': {
+ 'id': 's8uk0r',
+ 'ext': 'mp4',
+ 'title': 'Loi sur la fin de vie: le texte prévoit un renforcement des directives anticipées',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 74,
+ 'upload_date': '20150317',
+ 'timestamp': 1426604939,
+ 'uploader_id': '3fszv',
+ },
+ }, {
+ # music
+ 'url': 'https://www.ultimedia.com/default/index/videomusic/id/xvpfp8',
+ 'md5': '2ea3513813cf230605c7e2ffe7eca61c',
+ 'info_dict': {
+ 'id': 'xvpfp8',
+ 'ext': 'mp4',
+ 'title': 'Two - C\'est La Vie (clip)',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 233,
+ 'upload_date': '20150224',
+ 'timestamp': 1424760500,
+ 'uploader_id': '3rfzk',
+ },
+ }, {
+ 'url': 'https://www.digiteka.net/deliver/generic/iframe/mdtk/01637594/src/lqm3kl/zone/1/showtitle/1/autoplay/yes',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id')
+ video_type = mobj.group('embed_type') or mobj.group('site_type')
+ if video_type == 'music':
+ video_type = 'musique'
+
+ deliver_info = self._download_json(
+ 'http://www.ultimedia.com/deliver/video?video=%s&topic=%s' % (video_id, video_type),
+ video_id)
+
+ yt_id = deliver_info.get('yt_id')
+ if yt_id:
+ return self.url_result(yt_id, 'Youtube')
+
+ jwconf = deliver_info['jwconf']
+
+ formats = []
+ for source in jwconf['playlist'][0]['sources']:
+ formats.append({
+ 'url': source['file'],
+ 'format_id': source.get('label'),
+ })
+
+ title = deliver_info['title']
+ thumbnail = jwconf.get('image')
+ duration = int_or_none(deliver_info.get('duration'))
+ timestamp = int_or_none(deliver_info.get('release_time'))
+ uploader_id = deliver_info.get('owner_id')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'uploader_id': uploader_id,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/discogs.py b/yt_dlp/extractor/discogs.py
new file mode 100644
index 0000000..048c622
--- /dev/null
+++ b/yt_dlp/extractor/discogs.py
@@ -0,0 +1,35 @@
+from .common import InfoExtractor
+from .youtube import YoutubeIE
+from ..utils import traverse_obj
+
+
+class DiscogsReleasePlaylistIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?discogs\.com/(?P<type>release|master)/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.discogs.com/release/1-The-Persuader-Stockholm',
+ 'info_dict': {
+ 'id': 'release1',
+ 'title': 'Stockholm',
+ },
+ 'playlist_mincount': 7,
+ }, {
+ 'url': 'https://www.discogs.com/master/113-Vince-Watson-Moments-In-Time',
+ 'info_dict': {
+ 'id': 'master113',
+ 'title': 'Moments In Time',
+ },
+ 'playlist_mincount': 53,
+ }]
+
+ def _real_extract(self, url):
+ playlist_id, playlist_type = self._match_valid_url(url).group('id', 'type')
+
+ display_id = f'{playlist_type}{playlist_id}'
+ response = self._download_json(
+ f'https://api.discogs.com/{playlist_type}s/{playlist_id}', display_id)
+
+ entries = [
+ self.url_result(video['uri'], YoutubeIE, video_title=video.get('title'))
+ for video in traverse_obj(response, ('videos', lambda _, v: YoutubeIE.suitable(v['uri'])))]
+
+ return self.playlist_result(entries, display_id, response.get('title'))
diff --git a/yt_dlp/extractor/discovery.py b/yt_dlp/extractor/discovery.py
new file mode 100644
index 0000000..75b4643
--- /dev/null
+++ b/yt_dlp/extractor/discovery.py
@@ -0,0 +1,115 @@
+import random
+import string
+
+from .discoverygo import DiscoveryGoBaseIE
+from ..compat import compat_urllib_parse_unquote
+from ..networking.exceptions import HTTPError
+from ..utils import ExtractorError
+
+
+class DiscoveryIE(DiscoveryGoBaseIE):
+ _VALID_URL = r'''(?x)https?://
+ (?P<site>
+ go\.discovery|
+ www\.
+ (?:
+ investigationdiscovery|
+ discoverylife|
+ animalplanet|
+ ahctv|
+ destinationamerica|
+ sciencechannel|
+ tlc
+ )|
+ watch\.
+ (?:
+ hgtv|
+ foodnetwork|
+ travelchannel|
+ diynetwork|
+ cookingchanneltv|
+ motortrend
+ )
+ )\.com/tv-shows/(?P<show_slug>[^/]+)/(?:video|full-episode)s/(?P<id>[^./?#]+)'''
+ _TESTS = [{
+ 'url': 'https://go.discovery.com/tv-shows/cash-cab/videos/riding-with-matthew-perry',
+ 'info_dict': {
+ 'id': '5a2f35ce6b66d17a5026e29e',
+ 'ext': 'mp4',
+ 'title': 'Riding with Matthew Perry',
+ 'description': 'md5:a34333153e79bc4526019a5129e7f878',
+ 'duration': 84,
+ },
+ 'params': {
+ 'skip_download': True, # requires ffmpeg
+ }
+ }, {
+ 'url': 'https://www.investigationdiscovery.com/tv-shows/final-vision/full-episodes/final-vision',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://go.discovery.com/tv-shows/alaskan-bush-people/videos/follow-your-own-road',
+ 'only_matching': True,
+ }, {
+ # using `show_slug` is important to get the correct video data
+ 'url': 'https://www.sciencechannel.com/tv-shows/mythbusters-on-science/full-episodes/christmas-special',
+ 'only_matching': True,
+ }]
+ _GEO_COUNTRIES = ['US']
+ _GEO_BYPASS = False
+ _API_BASE_URL = 'https://api.discovery.com/v1/'
+
+ def _real_extract(self, url):
+ site, show_slug, display_id = self._match_valid_url(url).groups()
+
+ access_token = None
+ cookies = self._get_cookies(url)
+
+ # prefer Affiliate Auth Token over Anonymous Auth Token
+ auth_storage_cookie = cookies.get('eosAf') or cookies.get('eosAn')
+ if auth_storage_cookie and auth_storage_cookie.value:
+ auth_storage = self._parse_json(compat_urllib_parse_unquote(
+ compat_urllib_parse_unquote(auth_storage_cookie.value)),
+ display_id, fatal=False) or {}
+ access_token = auth_storage.get('a') or auth_storage.get('access_token')
+
+ if not access_token:
+ access_token = self._download_json(
+ 'https://%s.com/anonymous' % site, display_id,
+ 'Downloading token JSON metadata', query={
+ 'authRel': 'authorization',
+ 'client_id': '3020a40c2356a645b4b4',
+ 'nonce': ''.join(random.choices(string.ascii_letters, k=32)),
+ 'redirectUri': 'https://www.discovery.com/',
+ })['access_token']
+
+ headers = self.geo_verification_headers()
+ headers['Authorization'] = 'Bearer ' + access_token
+
+ try:
+ video = self._download_json(
+ self._API_BASE_URL + 'content/videos',
+ display_id, 'Downloading content JSON metadata',
+ headers=headers, query={
+ 'embed': 'show.name',
+ 'fields': 'authenticated,description.detailed,duration,episodeNumber,id,name,parental.rating,season.number,show,tags',
+ 'slug': display_id,
+ 'show_slug': show_slug,
+ })[0]
+ video_id = video['id']
+ stream = self._download_json(
+ self._API_BASE_URL + 'streaming/video/' + video_id,
+ display_id, 'Downloading streaming JSON metadata', headers=headers)
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status in (401, 403):
+ e_description = self._parse_json(
+ e.cause.response.read().decode(), display_id)['description']
+ if 'resource not available for country' in e_description:
+ self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
+ if 'Authorized Networks' in e_description:
+ raise ExtractorError(
+ 'This video is only available via cable service provider subscription that'
+ ' is not currently supported. You may want to use --cookies.', expected=True)
+ raise ExtractorError(e_description)
+ raise
+
+ return self._extract_video_info(video, stream, display_id)
diff --git a/yt_dlp/extractor/discoverygo.py b/yt_dlp/extractor/discoverygo.py
new file mode 100644
index 0000000..1f3d8e3
--- /dev/null
+++ b/yt_dlp/extractor/discoverygo.py
@@ -0,0 +1,172 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ extract_attributes,
+ ExtractorError,
+ int_or_none,
+ parse_age_limit,
+ remove_end,
+ unescapeHTML,
+ url_or_none,
+)
+
+
+class DiscoveryGoBaseIE(InfoExtractor):
+ _VALID_URL_TEMPLATE = r'''(?x)https?://(?:www\.)?(?:
+ discovery|
+ investigationdiscovery|
+ discoverylife|
+ animalplanet|
+ ahctv|
+ destinationamerica|
+ sciencechannel|
+ tlc|
+ velocitychannel
+ )go\.com/%s(?P<id>[^/?#&]+)'''
+
+ def _extract_video_info(self, video, stream, display_id):
+ title = video['name']
+
+ if not stream:
+ if video.get('authenticated') is True:
+ raise ExtractorError(
+ 'This video is only available via cable service provider subscription that'
+ ' is not currently supported. You may want to use --cookies.', expected=True)
+ else:
+ raise ExtractorError('Unable to find stream')
+ STREAM_URL_SUFFIX = 'streamUrl'
+ formats = []
+ for stream_kind in ('', 'hds'):
+ suffix = STREAM_URL_SUFFIX.capitalize() if stream_kind else STREAM_URL_SUFFIX
+ stream_url = stream.get('%s%s' % (stream_kind, suffix))
+ if not stream_url:
+ continue
+ if stream_kind == '':
+ formats.extend(self._extract_m3u8_formats(
+ stream_url, display_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif stream_kind == 'hds':
+ formats.extend(self._extract_f4m_formats(
+ stream_url, display_id, f4m_id=stream_kind, fatal=False))
+
+ video_id = video.get('id') or display_id
+ description = video.get('description', {}).get('detailed')
+ duration = int_or_none(video.get('duration'))
+
+ series = video.get('show', {}).get('name')
+ season_number = int_or_none(video.get('season', {}).get('number'))
+ episode_number = int_or_none(video.get('episodeNumber'))
+
+ tags = video.get('tags')
+ age_limit = parse_age_limit(video.get('parental', {}).get('rating'))
+
+ subtitles = {}
+ captions = stream.get('captions')
+ if isinstance(captions, list):
+ for caption in captions:
+ subtitle_url = url_or_none(caption.get('fileUrl'))
+ if not subtitle_url or not subtitle_url.startswith('http'):
+ continue
+ lang = caption.get('fileLang', 'en')
+ ext = determine_ext(subtitle_url)
+ subtitles.setdefault(lang, []).append({
+ 'url': subtitle_url,
+ 'ext': 'ttml' if ext == 'xml' else ext,
+ })
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'series': series,
+ 'season_number': season_number,
+ 'episode_number': episode_number,
+ 'tags': tags,
+ 'age_limit': age_limit,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+
+class DiscoveryGoIE(DiscoveryGoBaseIE):
+ _VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % r'(?:[^/]+/)+'
+ _GEO_COUNTRIES = ['US']
+ _TEST = {
+ 'url': 'https://www.discoverygo.com/bering-sea-gold/reaper-madness/',
+ 'info_dict': {
+ 'id': '58c167d86b66d12f2addeb01',
+ 'ext': 'mp4',
+ 'title': 'Reaper Madness',
+ 'description': 'md5:09f2c625c99afb8946ed4fb7865f6e78',
+ 'duration': 2519,
+ 'series': 'Bering Sea Gold',
+ 'season_number': 8,
+ 'episode_number': 6,
+ 'age_limit': 14,
+ },
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ container = extract_attributes(
+ self._search_regex(
+ r'(<div[^>]+class=["\']video-player-container[^>]+>)',
+ webpage, 'video container'))
+
+ video = self._parse_json(
+ container.get('data-video') or container.get('data-json'),
+ display_id)
+
+ stream = video.get('stream')
+
+ return self._extract_video_info(video, stream, display_id)
+
+
+class DiscoveryGoPlaylistIE(DiscoveryGoBaseIE):
+ _VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % ''
+ _TEST = {
+ 'url': 'https://www.discoverygo.com/bering-sea-gold/',
+ 'info_dict': {
+ 'id': 'bering-sea-gold',
+ 'title': 'Bering Sea Gold',
+ 'description': 'md5:cc5c6489835949043c0cc3ad66c2fa0e',
+ },
+ 'playlist_mincount': 6,
+ }
+
+ @classmethod
+ def suitable(cls, url):
+ return False if DiscoveryGoIE.suitable(url) else super(
+ DiscoveryGoPlaylistIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ entries = []
+ for mobj in re.finditer(r'data-json=(["\'])(?P<json>{.+?})\1', webpage):
+ data = self._parse_json(
+ mobj.group('json'), display_id,
+ transform_source=unescapeHTML, fatal=False)
+ if not isinstance(data, dict) or data.get('type') != 'episode':
+ continue
+ episode_url = data.get('socialUrl')
+ if not episode_url:
+ continue
+ entries.append(self.url_result(
+ episode_url, ie=DiscoveryGoIE.ie_key(),
+ video_id=data.get('id')))
+
+ return self.playlist_result(
+ entries, display_id,
+ remove_end(self._og_search_title(
+ webpage, fatal=False), ' | Discovery GO'),
+ self._og_search_description(webpage))
diff --git a/yt_dlp/extractor/disney.py b/yt_dlp/extractor/disney.py
new file mode 100644
index 0000000..430de32
--- /dev/null
+++ b/yt_dlp/extractor/disney.py
@@ -0,0 +1,160 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ unified_strdate,
+ determine_ext,
+ join_nonempty,
+ update_url_query,
+)
+
+
+class DisneyIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://(?P<domain>(?:[^/]+\.)?(?:disney\.[a-z]{2,3}(?:\.[a-z]{2})?|disney(?:(?:me|latino)\.com|turkiye\.com\.tr|channel\.de)|(?:starwars|marvelkids)\.com))/(?:(?:embed/|(?:[^/]+/)+[\w-]+-)(?P<id>[a-z0-9]{24})|(?:[^/]+/)?(?P<display_id>[^/?#]+))'''
+ _TESTS = [{
+ # Disney.EmbedVideo
+ 'url': 'http://video.disney.com/watch/moana-trailer-545ed1857afee5a0ec239977',
+ 'info_dict': {
+ 'id': '545ed1857afee5a0ec239977',
+ 'ext': 'mp4',
+ 'title': 'Moana - Trailer',
+ 'description': 'A fun adventure for the entire Family! Bring home Moana on Digital HD Feb 21 & Blu-ray March 7',
+ 'upload_date': '20170112',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ }, {
+ # Grill.burger
+ 'url': 'http://www.starwars.com/video/rogue-one-a-star-wars-story-intro-featurette',
+ 'info_dict': {
+ 'id': '5454e9f4e9804a552e3524c8',
+ 'ext': 'mp4',
+ 'title': '"Intro" Featurette: Rogue One: A Star Wars Story',
+ 'upload_date': '20170104',
+ 'description': 'Go behind-the-scenes of Rogue One: A Star Wars Story in this featurette with Director Gareth Edwards and the cast of the film.',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'http://videos.disneylatino.com/ver/spider-man-de-regreso-a-casa-primer-adelanto-543a33a1850bdcfcca13bae2',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://video.en.disneyme.com/watch/future-worm/robo-carp-2001-544b66002aa7353cdd3f5114',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://video.disneyturkiye.com.tr/izle/7c-7-cuceler/kimin-sesi-zaten-5456f3d015f6b36c8afdd0e2',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://disneyjunior.disney.com/embed/546a4798ddba3d1612e4005d',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.starwars.com/embed/54690d1e6c42e5f09a0fb097',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://spiderman.marvelkids.com/embed/522900d2ced3c565e4cc0677',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://spiderman.marvelkids.com/videos/contest-of-champions-part-four-clip-1',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://disneyjunior.en.disneyme.com/dj/watch-my-friends-tigger-and-pooh-promo',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://disneychannel.de/sehen/soy-luna-folge-118-5518518987ba27f3cc729268',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://disneyjunior.disney.com/galactech-the-galactech-grab-galactech-an-admiral-rescue',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ domain, video_id, display_id = self._match_valid_url(url).groups()
+ if not video_id:
+ webpage = self._download_webpage(url, display_id)
+ grill = re.sub(r'"\s*\+\s*"', '', self._search_regex(
+ r'Grill\.burger\s*=\s*({.+})\s*:',
+ webpage, 'grill data'))
+ page_data = next(s for s in self._parse_json(grill, display_id)['stack'] if s.get('type') == 'video')
+ video_data = page_data['data'][0]
+ else:
+ webpage = self._download_webpage(
+ 'http://%s/embed/%s' % (domain, video_id), video_id)
+ page_data = self._parse_json(self._search_regex(
+ r'Disney\.EmbedVideo\s*=\s*({.+});',
+ webpage, 'embed data'), video_id)
+ video_data = page_data['video']
+
+ for external in video_data.get('externals', []):
+ if external.get('source') == 'vevo':
+ return self.url_result('vevo:' + external['data_id'], 'Vevo')
+
+ video_id = video_data['id']
+ title = video_data['title']
+
+ formats = []
+ for flavor in video_data.get('flavors', []):
+ flavor_format = flavor.get('format')
+ flavor_url = flavor.get('url')
+ if not flavor_url or not re.match(r'https?://', flavor_url) or flavor_format == 'mp4_access':
+ continue
+ tbr = int_or_none(flavor.get('bitrate'))
+ if tbr == 99999:
+ # wrong ks(Kaltura Signature) causes 404 Error
+ flavor_url = update_url_query(flavor_url, {'ks': ''})
+ m3u8_formats = self._extract_m3u8_formats(
+ flavor_url, video_id, 'mp4',
+ m3u8_id=flavor_format, fatal=False)
+ for f in m3u8_formats:
+ # Apple FairPlay
+ if '/fpshls/' in f['url']:
+ continue
+ formats.append(f)
+ continue
+ ext = determine_ext(flavor_url)
+ if flavor_format == 'applehttp' or ext == 'm3u8':
+ ext = 'mp4'
+ width = int_or_none(flavor.get('width'))
+ height = int_or_none(flavor.get('height'))
+ formats.append({
+ 'format_id': join_nonempty(flavor_format, tbr),
+ 'url': flavor_url,
+ 'width': width,
+ 'height': height,
+ 'tbr': tbr,
+ 'ext': ext,
+ 'vcodec': 'none' if (width == 0 and height == 0) else None,
+ })
+ if not formats and video_data.get('expired'):
+ self.raise_no_formats(
+ '%s said: %s' % (self.IE_NAME, page_data['translations']['video_expired']),
+ expected=True)
+
+ subtitles = {}
+ for caption in video_data.get('captions', []):
+ caption_url = caption.get('url')
+ caption_format = caption.get('format')
+ if not caption_url or caption_format.startswith('unknown'):
+ continue
+ subtitles.setdefault(caption.get('language', 'en'), []).append({
+ 'url': caption_url,
+ 'ext': {
+ 'webvtt': 'vtt',
+ }.get(caption_format, caption_format),
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video_data.get('description') or video_data.get('short_desc'),
+ 'thumbnail': video_data.get('thumb') or video_data.get('thumb_secure'),
+ 'duration': int_or_none(video_data.get('duration_sec')),
+ 'upload_date': unified_strdate(video_data.get('publish_date')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
diff --git a/yt_dlp/extractor/dispeak.py b/yt_dlp/extractor/dispeak.py
new file mode 100644
index 0000000..37f89b9
--- /dev/null
+++ b/yt_dlp/extractor/dispeak.py
@@ -0,0 +1,127 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_duration,
+ remove_end,
+ xpath_element,
+ xpath_text,
+)
+
+
+class DigitallySpeakingIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:s?evt\.dispeak|events\.digitallyspeaking)\.com/(?:[^/]+/)+xml/(?P<id>[^.]+)\.xml'
+
+ _TESTS = [{
+ # From http://gdcvault.com/play/1023460/Tenacious-Design-and-The-Interface
+ 'url': 'http://evt.dispeak.com/ubm/gdc/sf16/xml/840376_BQRC.xml',
+ 'md5': 'a8efb6c31ed06ca8739294960b2dbabd',
+ 'info_dict': {
+ 'id': '840376_BQRC',
+ 'ext': 'mp4',
+ 'title': 'Tenacious Design and The Interface of \'Destiny\'',
+ },
+ }, {
+ # From http://www.gdcvault.com/play/1014631/Classic-Game-Postmortem-PAC
+ 'url': 'http://events.digitallyspeaking.com/gdc/sf11/xml/12396_1299111843500GMPX.xml',
+ 'only_matching': True,
+ }, {
+ # From http://www.gdcvault.com/play/1013700/Advanced-Material
+ 'url': 'http://sevt.dispeak.com/ubm/gdc/eur10/xml/11256_1282118587281VNIT.xml',
+ 'only_matching': True,
+ }, {
+ # From https://gdcvault.com/play/1016624, empty speakerVideo
+ 'url': 'https://sevt.dispeak.com/ubm/gdc/online12/xml/201210-822101_1349794556671DDDD.xml',
+ 'info_dict': {
+ 'id': '201210-822101_1349794556671DDDD',
+ 'ext': 'flv',
+ 'title': 'Pre-launch - Preparing to Take the Plunge',
+ },
+ }, {
+ # From http://www.gdcvault.com/play/1014846/Conference-Keynote-Shigeru, empty slideVideo
+ 'url': 'http://events.digitallyspeaking.com/gdc/project25/xml/p25-miyamoto1999_1282467389849HSVB.xml',
+ 'only_matching': True,
+ }]
+
+ def _parse_mp4(self, metadata):
+ video_formats = []
+ video_root = None
+
+ mp4_video = xpath_text(metadata, './mp4video', default=None)
+ if mp4_video is not None:
+ mobj = re.match(r'(?P<root>https?://.*?/).*', mp4_video)
+ video_root = mobj.group('root')
+ if video_root is None:
+ http_host = xpath_text(metadata, 'httpHost', default=None)
+ if http_host:
+ video_root = 'http://%s/' % http_host
+ if video_root is None:
+ # Hard-coded in http://evt.dispeak.com/ubm/gdc/sf16/custom/player2.js
+ # Works for GPUTechConf, too
+ video_root = 'http://s3-2u.digitallyspeaking.com/'
+
+ formats = metadata.findall('./MBRVideos/MBRVideo')
+ if not formats:
+ return None
+ for a_format in formats:
+ stream_name = xpath_text(a_format, 'streamName', fatal=True)
+ video_path = re.match(r'mp4\:(?P<path>.*)', stream_name).group('path')
+ url = video_root + video_path
+ bitrate = xpath_text(a_format, 'bitrate')
+ tbr = int_or_none(bitrate)
+ vbr = int_or_none(self._search_regex(
+ r'-(\d+)\.mp4', video_path, 'vbr', default=None))
+ video_formats.append({
+ 'format_id': bitrate,
+ 'url': url,
+ 'tbr': tbr,
+ 'vbr': vbr,
+ })
+ return video_formats
+
+ def _parse_flv(self, metadata):
+ formats = []
+ akamai_url = xpath_text(metadata, './akamaiHost', fatal=True)
+ audios = metadata.findall('./audios/audio')
+ for audio in audios:
+ formats.append({
+ 'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
+ 'play_path': remove_end(audio.get('url'), '.flv'),
+ 'ext': 'flv',
+ 'vcodec': 'none',
+ 'quality': 1,
+ 'format_id': audio.get('code'),
+ })
+ for video_key, format_id, preference in (
+ ('slide', 'slides', -2), ('speaker', 'speaker', -1)):
+ video_path = xpath_text(metadata, './%sVideo' % video_key)
+ if not video_path:
+ continue
+ formats.append({
+ 'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
+ 'play_path': remove_end(video_path, '.flv'),
+ 'ext': 'flv',
+ 'format_note': '%s video' % video_key,
+ 'quality': preference,
+ 'format_id': format_id,
+ })
+ return formats
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ xml_description = self._download_xml(url, video_id)
+ metadata = xpath_element(xml_description, 'metadata')
+
+ video_formats = self._parse_mp4(metadata)
+ if video_formats is None:
+ video_formats = self._parse_flv(metadata)
+
+ return {
+ 'id': video_id,
+ 'formats': video_formats,
+ 'title': xpath_text(metadata, 'title', fatal=True),
+ 'duration': parse_duration(xpath_text(metadata, 'endTime')),
+ 'creator': xpath_text(metadata, 'speaker'),
+ }
diff --git a/yt_dlp/extractor/dlf.py b/yt_dlp/extractor/dlf.py
new file mode 100644
index 0000000..88a4149
--- /dev/null
+++ b/yt_dlp/extractor/dlf.py
@@ -0,0 +1,192 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ extract_attributes,
+ int_or_none,
+ traverse_obj,
+ url_or_none,
+)
+
+
+class DLFBaseIE(InfoExtractor):
+ _VALID_URL_BASE = r'https?://(?:www\.)?deutschlandfunk\.de/'
+ _BUTTON_REGEX = r'(<button[^>]+alt="Anhören"[^>]+data-audio-diraid[^>]*>)'
+
+ def _parse_button_attrs(self, button, audio_id=None):
+ attrs = extract_attributes(button)
+ audio_id = audio_id or attrs['data-audio-diraid']
+
+ url = traverse_obj(
+ attrs, 'data-audio-download-src', 'data-audio', 'data-audioreference',
+ 'data-audio-src', expected_type=url_or_none)
+ ext = determine_ext(url)
+
+ return {
+ 'id': audio_id,
+ 'extractor_key': DLFIE.ie_key(),
+ 'extractor': DLFIE.IE_NAME,
+ **traverse_obj(attrs, {
+ 'title': (('data-audiotitle', 'data-audio-title', 'data-audio-download-tracking-title'), {str}),
+ 'duration': (('data-audioduration', 'data-audio-duration'), {int_or_none}),
+ 'thumbnail': ('data-audioimage', {url_or_none}),
+ 'uploader': 'data-audio-producer',
+ 'series': 'data-audio-series',
+ 'channel': 'data-audio-origin-site-name',
+ 'webpage_url': ('data-audio-download-tracking-path', {url_or_none}),
+ }, get_all=False),
+ 'formats': (self._extract_m3u8_formats(url, audio_id, fatal=False)
+ if ext == 'm3u8' else [{'url': url, 'ext': ext, 'vcodec': 'none'}])
+ }
+
+
+class DLFIE(DLFBaseIE):
+ IE_NAME = 'dlf'
+ _VALID_URL = DLFBaseIE._VALID_URL_BASE + r'[\w-]+-dlf-(?P<id>[\da-f]{8})-100\.html'
+ _TESTS = [
+ # Audio as an HLS stream
+ {
+ 'url': 'https://www.deutschlandfunk.de/tanz-der-saiteninstrumente-das-wild-strings-trio-aus-slowenien-dlf-03a3eb19-100.html',
+ 'info_dict': {
+ 'id': '03a3eb19',
+ 'title': r're:Tanz der Saiteninstrumente [-/] Das Wild Strings Trio aus Slowenien',
+ 'ext': 'm4a',
+ 'duration': 3298,
+ 'thumbnail': 'https://assets.deutschlandfunk.de/FALLBACK-IMAGE-AUDIO/512x512.png?t=1603714364673',
+ 'uploader': 'Deutschlandfunk',
+ 'series': 'On Stage',
+ 'channel': 'deutschlandfunk'
+ },
+ 'params': {
+ 'skip_download': 'm3u8'
+ },
+ 'skip': 'This webpage no longer exists'
+ }, {
+ 'url': 'https://www.deutschlandfunk.de/russische-athleten-kehren-zurueck-auf-die-sportbuehne-ein-gefaehrlicher-tueroeffner-dlf-d9cc1856-100.html',
+ 'info_dict': {
+ 'id': 'd9cc1856',
+ 'title': 'Russische Athleten kehren zurück auf die Sportbühne: Ein gefährlicher Türöffner',
+ 'ext': 'mp3',
+ 'duration': 291,
+ 'thumbnail': 'https://assets.deutschlandfunk.de/FALLBACK-IMAGE-AUDIO/512x512.png?t=1603714364673',
+ 'uploader': 'Deutschlandfunk',
+ 'series': 'Kommentare und Themen der Woche',
+ 'channel': 'deutschlandfunk'
+ }
+ },
+ ]
+
+ def _real_extract(self, url):
+ audio_id = self._match_id(url)
+ webpage = self._download_webpage(url, audio_id)
+
+ return self._parse_button_attrs(
+ self._search_regex(self._BUTTON_REGEX, webpage, 'button'), audio_id)
+
+
+class DLFCorpusIE(DLFBaseIE):
+ IE_NAME = 'dlf:corpus'
+ IE_DESC = 'DLF Multi-feed Archives'
+ _VALID_URL = DLFBaseIE._VALID_URL_BASE + r'(?P<id>(?![\w-]+-dlf-[\da-f]{8})[\w-]+-\d+)\.html'
+ _TESTS = [
+ # Recorded news broadcast with referrals to related broadcasts
+ {
+ 'url': 'https://www.deutschlandfunk.de/fechten-russland-belarus-ukraine-protest-100.html',
+ 'info_dict': {
+ 'id': 'fechten-russland-belarus-ukraine-protest-100',
+ 'title': r're:Wiederzulassung als neutrale Athleten [-/] Was die Rückkehr russischer und belarussischer Sportler beim Fechten bedeutet',
+ 'description': 'md5:91340aab29c71aa7518ad5be13d1e8ad'
+ },
+ 'playlist_mincount': 5,
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '1fc5d64a',
+ 'title': r're:Wiederzulassung als neutrale Athleten [-/] Was die Rückkehr russischer und belarussischer Sportler beim Fechten bedeutet',
+ 'ext': 'mp3',
+ 'duration': 252,
+ 'thumbnail': 'https://assets.deutschlandfunk.de/aad16241-6b76-4a09-958b-96d0ee1d6f57/512x512.jpg?t=1679480020313',
+ 'uploader': 'Deutschlandfunk',
+ 'series': 'Sport',
+ 'channel': 'deutschlandfunk'
+ }
+ }, {
+ 'info_dict': {
+ 'id': '2ada145f',
+ 'title': r're:(?:Sportpolitik / )?Fechtverband votiert für Rückkehr russischer Athleten',
+ 'ext': 'mp3',
+ 'duration': 336,
+ 'thumbnail': 'https://assets.deutschlandfunk.de/FILE_93982766f7317df30409b8a184ac044a/512x512.jpg?t=1678547581005',
+ 'uploader': 'Deutschlandfunk',
+ 'series': 'Deutschlandfunk Nova',
+ 'channel': 'deutschlandfunk-nova'
+ }
+ }, {
+ 'info_dict': {
+ 'id': '5e55e8c9',
+ 'title': r're:Wiederzulassung von Russland und Belarus [-/] "Herumlavieren" des Fechter-Bundes sorgt für Unverständnis',
+ 'ext': 'mp3',
+ 'duration': 187,
+ 'thumbnail': 'https://assets.deutschlandfunk.de/a595989d-1ed1-4a2e-8370-b64d7f11d757/512x512.jpg?t=1679173825412',
+ 'uploader': 'Deutschlandfunk',
+ 'series': 'Sport am Samstag',
+ 'channel': 'deutschlandfunk'
+ }
+ }, {
+ 'info_dict': {
+ 'id': '47e1a096',
+ 'title': r're:Rückkehr Russlands im Fechten [-/] "Fassungslos, dass es einfach so passiert ist"',
+ 'ext': 'mp3',
+ 'duration': 602,
+ 'thumbnail': 'https://assets.deutschlandfunk.de/da4c494a-21cc-48b4-9cc7-40e09fd442c2/512x512.jpg?t=1678562155770',
+ 'uploader': 'Deutschlandfunk',
+ 'series': 'Sport am Samstag',
+ 'channel': 'deutschlandfunk'
+ }
+ }, {
+ 'info_dict': {
+ 'id': '5e55e8c9',
+ 'title': r're:Wiederzulassung von Russland und Belarus [-/] "Herumlavieren" des Fechter-Bundes sorgt für Unverständnis',
+ 'ext': 'mp3',
+ 'duration': 187,
+ 'thumbnail': 'https://assets.deutschlandfunk.de/a595989d-1ed1-4a2e-8370-b64d7f11d757/512x512.jpg?t=1679173825412',
+ 'uploader': 'Deutschlandfunk',
+ 'series': 'Sport am Samstag',
+ 'channel': 'deutschlandfunk'
+ }
+ }]
+ },
+ # Podcast feed with tag buttons, playlist count fluctuates
+ {
+ 'url': 'https://www.deutschlandfunk.de/kommentare-und-themen-der-woche-100.html',
+ 'info_dict': {
+ 'id': 'kommentare-und-themen-der-woche-100',
+ 'title': 'Meinung - Kommentare und Themen der Woche',
+ 'description': 'md5:2901bbd65cd2d45e116d399a099ce5d5',
+ },
+ 'playlist_mincount': 10,
+ },
+ # Podcast feed with no description
+ {
+ 'url': 'https://www.deutschlandfunk.de/podcast-tolle-idee-100.html',
+ 'info_dict': {
+ 'id': 'podcast-tolle-idee-100',
+ 'title': 'Wissenschaftspodcast - Tolle Idee! - Was wurde daraus?',
+ },
+ 'playlist_mincount': 11,
+ },
+ ]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ webpage = self._download_webpage(url, playlist_id)
+
+ return {
+ '_type': 'playlist',
+ 'id': playlist_id,
+ 'description': self._html_search_meta(
+ ['description', 'og:description', 'twitter:description'], webpage, default=None),
+ 'title': self._html_search_meta(
+ ['og:title', 'twitter:title'], webpage, default=None),
+ 'entries': map(self._parse_button_attrs, re.findall(self._BUTTON_REGEX, webpage)),
+ }
diff --git a/yt_dlp/extractor/dlive.py b/yt_dlp/extractor/dlive.py
new file mode 100644
index 0000000..30fcf9f
--- /dev/null
+++ b/yt_dlp/extractor/dlive.py
@@ -0,0 +1,92 @@
+import json
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class DLiveVODIE(InfoExtractor):
+ IE_NAME = 'dlive:vod'
+ _VALID_URL = r'https?://(?:www\.)?dlive\.tv/p/(?P<uploader_id>.+?)\+(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://dlive.tv/p/pdp+3mTzOl4WR',
+ 'info_dict': {
+ 'id': '3mTzOl4WR',
+ 'ext': 'mp4',
+ 'title': 'Minecraft with james charles epic',
+ 'upload_date': '20190701',
+ 'timestamp': 1562011015,
+ 'uploader_id': 'pdp',
+ }
+ }, {
+ 'url': 'https://dlive.tv/p/pdpreplay+D-RD-xSZg',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ uploader_id, vod_id = self._match_valid_url(url).groups()
+ broadcast = self._download_json(
+ 'https://graphigo.prd.dlive.tv/', vod_id,
+ data=json.dumps({'query': '''query {
+ pastBroadcast(permlink:"%s+%s") {
+ content
+ createdAt
+ length
+ playbackUrl
+ title
+ thumbnailUrl
+ viewCount
+ }
+}''' % (uploader_id, vod_id)}).encode())['data']['pastBroadcast']
+ title = broadcast['title']
+ formats = self._extract_m3u8_formats(
+ broadcast['playbackUrl'], vod_id, 'mp4', 'm3u8_native')
+ return {
+ 'id': vod_id,
+ 'title': title,
+ 'uploader_id': uploader_id,
+ 'formats': formats,
+ 'description': broadcast.get('content'),
+ 'thumbnail': broadcast.get('thumbnailUrl'),
+ 'timestamp': int_or_none(broadcast.get('createdAt'), 1000),
+ 'view_count': int_or_none(broadcast.get('viewCount')),
+ }
+
+
+class DLiveStreamIE(InfoExtractor):
+ IE_NAME = 'dlive:stream'
+ _VALID_URL = r'https?://(?:www\.)?dlive\.tv/(?!p/)(?P<id>[\w.-]+)'
+
+ def _real_extract(self, url):
+ display_name = self._match_id(url)
+ user = self._download_json(
+ 'https://graphigo.prd.dlive.tv/', display_name,
+ data=json.dumps({'query': '''query {
+ userByDisplayName(displayname:"%s") {
+ livestream {
+ content
+ createdAt
+ title
+ thumbnailUrl
+ watchingCount
+ }
+ username
+ }
+}''' % display_name}).encode())['data']['userByDisplayName']
+ livestream = user['livestream']
+ title = livestream['title']
+ username = user['username']
+ formats = self._extract_m3u8_formats(
+ 'https://live.prd.dlive.tv/hls/live/%s.m3u8' % username,
+ display_name, 'mp4')
+ return {
+ 'id': display_name,
+ 'title': title,
+ 'uploader': display_name,
+ 'uploader_id': username,
+ 'formats': formats,
+ 'description': livestream.get('content'),
+ 'thumbnail': livestream.get('thumbnailUrl'),
+ 'is_live': True,
+ 'timestamp': int_or_none(livestream.get('createdAt'), 1000),
+ 'view_count': int_or_none(livestream.get('watchingCount')),
+ }
diff --git a/yt_dlp/extractor/douyutv.py b/yt_dlp/extractor/douyutv.py
new file mode 100644
index 0000000..ee8893d
--- /dev/null
+++ b/yt_dlp/extractor/douyutv.py
@@ -0,0 +1,306 @@
+import time
+import hashlib
+import urllib
+import uuid
+
+from .common import InfoExtractor
+from .openload import PhantomJSwrapper
+from ..utils import (
+ ExtractorError,
+ UserNotLive,
+ determine_ext,
+ int_or_none,
+ js_to_json,
+ parse_resolution,
+ str_or_none,
+ traverse_obj,
+ unescapeHTML,
+ url_or_none,
+ urlencode_postdata,
+ urljoin,
+)
+
+
+class DouyuBaseIE(InfoExtractor):
+ def _download_cryptojs_md5(self, video_id):
+ for url in [
+ 'https://cdnjs.cloudflare.com/ajax/libs/crypto-js/3.1.2/rollups/md5.js',
+ 'https://cdn.bootcdn.net/ajax/libs/crypto-js/3.1.2/rollups/md5.js',
+ ]:
+ js_code = self._download_webpage(
+ url, video_id, note='Downloading signing dependency', fatal=False)
+ if js_code:
+ self.cache.store('douyu', 'crypto-js-md5', js_code)
+ return js_code
+ raise ExtractorError('Unable to download JS dependency (crypto-js/md5)')
+
+ def _get_cryptojs_md5(self, video_id):
+ return self.cache.load('douyu', 'crypto-js-md5') or self._download_cryptojs_md5(video_id)
+
+ def _calc_sign(self, sign_func, video_id, a):
+ b = uuid.uuid4().hex
+ c = round(time.time())
+ js_script = f'{self._get_cryptojs_md5(video_id)};{sign_func};console.log(ub98484234("{a}","{b}","{c}"))'
+ phantom = PhantomJSwrapper(self)
+ result = phantom.execute(js_script, video_id,
+ note='Executing JS signing script').strip()
+ return {i: v[0] for i, v in urllib.parse.parse_qs(result).items()}
+
+ def _search_js_sign_func(self, webpage, fatal=True):
+ # The greedy look-behind ensures last possible script tag is matched
+ return self._search_regex(
+ r'(?:<script.*)?<script[^>]*>(.*?ub98484234.*?)</script>', webpage, 'JS sign func', fatal=fatal)
+
+
+class DouyuTVIE(DouyuBaseIE):
+ IE_DESC = '斗鱼直播'
+ _VALID_URL = r'https?://(?:www\.)?douyu(?:tv)?\.com/(topic/\w+\?rid=|(?:[^/]+/))*(?P<id>[A-Za-z0-9]+)'
+ _TESTS = [{
+ 'url': 'https://www.douyu.com/pigff',
+ 'info_dict': {
+ 'id': '24422',
+ 'display_id': 'pigff',
+ 'ext': 'mp4',
+ 'title': 're:^【PIGFF】.* [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'description': r'≥15级牌子看鱼吧置顶帖进粉丝vx群',
+ 'thumbnail': str,
+ 'uploader': 'pigff',
+ 'is_live': True,
+ 'live_status': 'is_live',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.douyutv.com/85982',
+ 'info_dict': {
+ 'id': '85982',
+ 'display_id': '85982',
+ 'ext': 'flv',
+ 'title': 're:^小漠从零单排记!——CSOL2躲猫猫 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'description': 'md5:746a2f7a253966a06755a912f0acc0d2',
+ 'thumbnail': r're:^https?://.*\.png',
+ 'uploader': 'douyu小漠',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'Room not found',
+ }, {
+ 'url': 'http://www.douyutv.com/17732',
+ 'info_dict': {
+ 'id': '17732',
+ 'display_id': '17732',
+ 'ext': 'flv',
+ 'title': 're:^清晨醒脑!根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'description': r're:.*m7show@163\.com.*',
+ 'thumbnail': r're:^https?://.*\.png',
+ 'uploader': '7师傅',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.douyu.com/topic/ydxc?rid=6560603',
+ 'info_dict': {
+ 'id': '6560603',
+ 'display_id': '6560603',
+ 'ext': 'flv',
+ 'title': 're:^阿余:新年快乐恭喜发财! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'description': 're:.*直播时间.*',
+ 'thumbnail': r're:^https?://.*\.png',
+ 'uploader': '阿涛皎月Carry',
+ 'live_status': 'is_live',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.douyu.com/xiaocang',
+ 'only_matching': True,
+ }, {
+ # \"room_id\"
+ 'url': 'http://www.douyu.com/t/lpl',
+ 'only_matching': True,
+ }]
+
+ def _get_sign_func(self, room_id, video_id):
+ return self._download_json(
+ f'https://www.douyu.com/swf_api/homeH5Enc?rids={room_id}', video_id,
+ note='Getting signing script')['data'][f'room{room_id}']
+
+ def _extract_stream_formats(self, stream_formats):
+ formats = []
+ for stream_info in traverse_obj(stream_formats, (..., 'data')):
+ stream_url = urljoin(
+ traverse_obj(stream_info, 'rtmp_url'), traverse_obj(stream_info, 'rtmp_live'))
+ if stream_url:
+ rate_id = traverse_obj(stream_info, ('rate', {int_or_none}))
+ rate_info = traverse_obj(stream_info, ('multirates', lambda _, v: v['rate'] == rate_id), get_all=False)
+ ext = determine_ext(stream_url)
+ formats.append({
+ 'url': stream_url,
+ 'format_id': str_or_none(rate_id),
+ 'ext': 'mp4' if ext == 'm3u8' else ext,
+ 'protocol': 'm3u8_native' if ext == 'm3u8' else 'https',
+ 'quality': rate_id % -10000 if rate_id is not None else None,
+ **traverse_obj(rate_info, {
+ 'format': ('name', {str_or_none}),
+ 'tbr': ('bit', {int_or_none}),
+ }),
+ })
+ return formats
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+ room_id = self._search_regex(r'\$ROOM\.room_id\s*=\s*(\d+)', webpage, 'room id')
+
+ if self._search_regex(r'"videoLoop"\s*:\s*(\d+)', webpage, 'loop', default='') == '1':
+ raise UserNotLive('The channel is auto-playing VODs', video_id=video_id)
+ if self._search_regex(r'\$ROOM\.show_status\s*=\s*(\d+)', webpage, 'status', default='') == '2':
+ raise UserNotLive(video_id=video_id)
+
+ # Grab metadata from API
+ params = {
+ 'aid': 'wp',
+ 'client_sys': 'wp',
+ 'time': int(time.time()),
+ }
+ params['auth'] = hashlib.md5(
+ f'room/{room_id}?{urllib.parse.urlencode(params)}zNzMV1y4EMxOHS6I5WKm'.encode()).hexdigest()
+ room = traverse_obj(self._download_json(
+ f'http://www.douyutv.com/api/v1/room/{room_id}', video_id,
+ note='Downloading room info', query=params, fatal=False), 'data')
+
+ # 1 = live, 2 = offline
+ if traverse_obj(room, 'show_status') == '2':
+ raise UserNotLive(video_id=video_id)
+
+ js_sign_func = self._search_js_sign_func(webpage, fatal=False) or self._get_sign_func(room_id, video_id)
+ form_data = {
+ 'rate': 0,
+ **self._calc_sign(js_sign_func, video_id, room_id),
+ }
+ stream_formats = [self._download_json(
+ f'https://www.douyu.com/lapi/live/getH5Play/{room_id}',
+ video_id, note="Downloading livestream format",
+ data=urlencode_postdata(form_data))]
+
+ for rate_id in traverse_obj(stream_formats[0], ('data', 'multirates', ..., 'rate')):
+ if rate_id != traverse_obj(stream_formats[0], ('data', 'rate')):
+ form_data['rate'] = rate_id
+ stream_formats.append(self._download_json(
+ f'https://www.douyu.com/lapi/live/getH5Play/{room_id}',
+ video_id, note=f'Downloading livestream format {rate_id}',
+ data=urlencode_postdata(form_data)))
+
+ return {
+ 'id': room_id,
+ 'formats': self._extract_stream_formats(stream_formats),
+ 'is_live': True,
+ **traverse_obj(room, {
+ 'display_id': ('url', {str}, {lambda i: i[1:]}),
+ 'title': ('room_name', {unescapeHTML}),
+ 'description': ('show_details', {str}),
+ 'uploader': ('nickname', {str}),
+ 'thumbnail': ('room_src', {url_or_none}),
+ })
+ }
+
+
+class DouyuShowIE(DouyuBaseIE):
+ _VALID_URL = r'https?://v(?:mobile)?\.douyu\.com/show/(?P<id>[0-9a-zA-Z]+)'
+
+ _TESTS = [{
+ 'url': 'https://v.douyu.com/show/mPyq7oVNe5Yv1gLY',
+ 'info_dict': {
+ 'id': 'mPyq7oVNe5Yv1gLY',
+ 'ext': 'mp4',
+ 'title': '四川人小时候的味道“蒜苗回锅肉”,传统菜不能丢,要常做来吃',
+ 'duration': 633,
+ 'thumbnail': str,
+ 'uploader': '美食作家王刚V',
+ 'uploader_id': 'OVAO4NVx1m7Q',
+ 'timestamp': 1661850002,
+ 'upload_date': '20220830',
+ 'view_count': int,
+ 'tags': ['美食', '美食综合'],
+ },
+ }, {
+ 'url': 'https://vmobile.douyu.com/show/rjNBdvnVXNzvE2yw',
+ 'only_matching': True,
+ }]
+
+ _FORMATS = {
+ 'super': '原画',
+ 'high': '超清',
+ 'normal': '高清',
+ }
+
+ _QUALITIES = {
+ 'super': -1,
+ 'high': -2,
+ 'normal': -3,
+ }
+
+ _RESOLUTIONS = {
+ 'super': '1920x1080',
+ 'high': '1280x720',
+ 'normal': '852x480',
+ }
+
+ def _real_extract(self, url):
+ url = url.replace('vmobile.', 'v.')
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ video_info = self._search_json(
+ r'<script>\s*window\.\$DATA\s*=', webpage,
+ 'video info', video_id, transform_source=js_to_json)
+
+ js_sign_func = self._search_js_sign_func(webpage)
+ form_data = {
+ 'vid': video_id,
+ **self._calc_sign(js_sign_func, video_id, video_info['ROOM']['point_id']),
+ }
+ url_info = self._download_json(
+ 'https://v.douyu.com/api/stream/getStreamUrl', video_id,
+ data=urlencode_postdata(form_data), note="Downloading video formats")
+
+ formats = []
+ for name, url in traverse_obj(url_info, ('data', 'thumb_video', {dict.items}, ...)):
+ video_url = traverse_obj(url, ('url', {url_or_none}))
+ if video_url:
+ ext = determine_ext(video_url)
+ formats.append({
+ 'format': self._FORMATS.get(name),
+ 'format_id': name,
+ 'url': video_url,
+ 'quality': self._QUALITIES.get(name),
+ 'ext': 'mp4' if ext == 'm3u8' else ext,
+ 'protocol': 'm3u8_native' if ext == 'm3u8' else 'https',
+ **parse_resolution(self._RESOLUTIONS.get(name))
+ })
+ else:
+ self.to_screen(
+ f'"{self._FORMATS.get(name, name)}" format may require logging in. {self._login_hint()}')
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ **traverse_obj(video_info, ('DATA', {
+ 'title': ('content', 'title', {str}),
+ 'uploader': ('content', 'author', {str}),
+ 'uploader_id': ('content', 'up_id', {str_or_none}),
+ 'duration': ('content', 'video_duration', {int_or_none}),
+ 'thumbnail': ('content', 'video_pic', {url_or_none}),
+ 'timestamp': ('content', 'create_time', {int_or_none}),
+ 'view_count': ('content', 'view_num', {int_or_none}),
+ 'tags': ('videoTag', ..., 'tagName', {str}),
+ }))
+ }
diff --git a/yt_dlp/extractor/dplay.py b/yt_dlp/extractor/dplay.py
new file mode 100644
index 0000000..363b4be
--- /dev/null
+++ b/yt_dlp/extractor/dplay.py
@@ -0,0 +1,1059 @@
+import json
+import uuid
+
+from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ remove_start,
+ strip_or_none,
+ try_get,
+ unified_timestamp,
+)
+
+
+class DPlayBaseIE(InfoExtractor):
+ _PATH_REGEX = r'/(?P<id>[^/]+/[^/?#]+)'
+ _auth_token_cache = {}
+
+ def _get_auth(self, disco_base, display_id, realm, needs_device_id=True):
+ key = (disco_base, realm)
+ st = self._get_cookies(disco_base).get('st')
+ token = (st and st.value) or self._auth_token_cache.get(key)
+
+ if not token:
+ query = {'realm': realm}
+ if needs_device_id:
+ query['deviceId'] = uuid.uuid4().hex
+ token = self._download_json(
+ disco_base + 'token', display_id, 'Downloading token',
+ query=query)['data']['attributes']['token']
+
+ # Save cache only if cookies are not being set
+ if not self._get_cookies(disco_base).get('st'):
+ self._auth_token_cache[key] = token
+
+ return f'Bearer {token}'
+
+ def _process_errors(self, e, geo_countries):
+ info = self._parse_json(e.cause.response.read().decode('utf-8'), None)
+ error = info['errors'][0]
+ error_code = error.get('code')
+ if error_code == 'access.denied.geoblocked':
+ self.raise_geo_restricted(countries=geo_countries)
+ elif error_code in ('access.denied.missingpackage', 'invalid.token'):
+ raise ExtractorError(
+ 'This video is only available for registered users. You may want to use --cookies.', expected=True)
+ raise ExtractorError(info['errors'][0]['detail'], expected=True)
+
+ def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
+ headers['Authorization'] = self._get_auth(disco_base, display_id, realm, False)
+
+ def _download_video_playback_info(self, disco_base, video_id, headers):
+ streaming = self._download_json(
+ disco_base + 'playback/videoPlaybackInfo/' + video_id,
+ video_id, headers=headers)['data']['attributes']['streaming']
+ streaming_list = []
+ for format_id, format_dict in streaming.items():
+ streaming_list.append({
+ 'type': format_id,
+ 'url': format_dict.get('url'),
+ })
+ return streaming_list
+
+ def _get_disco_api_info(self, url, display_id, disco_host, realm, country, domain=''):
+ country = self.get_param('geo_bypass_country') or country
+ geo_countries = [country.upper()]
+ self._initialize_geo_bypass({
+ 'countries': geo_countries,
+ })
+ disco_base = 'https://%s/' % disco_host
+ headers = {
+ 'Referer': url,
+ }
+ self._update_disco_api_headers(headers, disco_base, display_id, realm)
+ try:
+ video = self._download_json(
+ disco_base + 'content/videos/' + display_id, display_id,
+ headers=headers, query={
+ 'fields[channel]': 'name',
+ 'fields[image]': 'height,src,width',
+ 'fields[show]': 'name',
+ 'fields[tag]': 'name',
+ 'fields[video]': 'description,episodeNumber,name,publishStart,seasonNumber,videoDuration',
+ 'include': 'images,primaryChannel,show,tags'
+ })
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 400:
+ self._process_errors(e, geo_countries)
+ raise
+ video_id = video['data']['id']
+ info = video['data']['attributes']
+ title = info['name'].strip()
+ formats = []
+ subtitles = {}
+ try:
+ streaming = self._download_video_playback_info(
+ disco_base, video_id, headers)
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 403:
+ self._process_errors(e, geo_countries)
+ raise
+ for format_dict in streaming:
+ if not isinstance(format_dict, dict):
+ continue
+ format_url = format_dict.get('url')
+ if not format_url:
+ continue
+ format_id = format_dict.get('type')
+ ext = determine_ext(format_url)
+ if format_id == 'dash' or ext == 'mpd':
+ dash_fmts, dash_subs = self._extract_mpd_formats_and_subtitles(
+ format_url, display_id, mpd_id='dash', fatal=False)
+ formats.extend(dash_fmts)
+ subtitles = self._merge_subtitles(subtitles, dash_subs)
+ elif format_id == 'hls' or ext == 'm3u8':
+ m3u8_fmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
+ format_url, display_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls',
+ fatal=False)
+ formats.extend(m3u8_fmts)
+ subtitles = self._merge_subtitles(subtitles, m3u8_subs)
+ else:
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_id,
+ })
+
+ creator = series = None
+ tags = []
+ thumbnails = []
+ included = video.get('included') or []
+ if isinstance(included, list):
+ for e in included:
+ attributes = e.get('attributes')
+ if not attributes:
+ continue
+ e_type = e.get('type')
+ if e_type == 'channel':
+ creator = attributes.get('name')
+ elif e_type == 'image':
+ src = attributes.get('src')
+ if src:
+ thumbnails.append({
+ 'url': src,
+ 'width': int_or_none(attributes.get('width')),
+ 'height': int_or_none(attributes.get('height')),
+ })
+ if e_type == 'show':
+ series = attributes.get('name')
+ elif e_type == 'tag':
+ name = attributes.get('name')
+ if name:
+ tags.append(name)
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': strip_or_none(info.get('description')),
+ 'duration': float_or_none(info.get('videoDuration'), 1000),
+ 'timestamp': unified_timestamp(info.get('publishStart')),
+ 'series': series,
+ 'season_number': int_or_none(info.get('seasonNumber')),
+ 'episode_number': int_or_none(info.get('episodeNumber')),
+ 'creator': creator,
+ 'tags': tags,
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'http_headers': {
+ 'referer': domain,
+ },
+ }
+
+
+class DPlayIE(DPlayBaseIE):
+ _VALID_URL = r'''(?x)https?://
+ (?P<domain>
+ (?:www\.)?(?P<host>d
+ (?:
+ play\.(?P<country>dk|fi|jp|se|no)|
+ iscoveryplus\.(?P<plus_country>dk|es|fi|it|se|no)
+ )
+ )|
+ (?P<subdomain_country>es|it)\.dplay\.com
+ )/[^/]+''' + DPlayBaseIE._PATH_REGEX
+
+ _TESTS = [{
+ # non geo restricted, via secure api, unsigned download hls URL
+ 'url': 'https://www.dplay.se/videos/nugammalt-77-handelser-som-format-sverige/nugammalt-77-handelser-som-format-sverige-101',
+ 'info_dict': {
+ 'id': '13628',
+ 'display_id': 'nugammalt-77-handelser-som-format-sverige/nugammalt-77-handelser-som-format-sverige-101',
+ 'ext': 'mp4',
+ 'title': 'Svensken lär sig njuta av livet',
+ 'description': 'md5:d3819c9bccffd0fe458ca42451dd50d8',
+ 'duration': 2649.856,
+ 'timestamp': 1365453720,
+ 'upload_date': '20130408',
+ 'creator': 'Kanal 5',
+ 'series': 'Nugammalt - 77 händelser som format Sverige',
+ 'season_number': 1,
+ 'episode_number': 1,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # geo restricted, via secure api, unsigned download hls URL
+ 'url': 'http://www.dplay.dk/videoer/ted-bundy-mind-of-a-monster/ted-bundy-mind-of-a-monster',
+ 'info_dict': {
+ 'id': '104465',
+ 'display_id': 'ted-bundy-mind-of-a-monster/ted-bundy-mind-of-a-monster',
+ 'ext': 'mp4',
+ 'title': 'Ted Bundy: Mind Of A Monster',
+ 'description': 'md5:8b780f6f18de4dae631668b8a9637995',
+ 'duration': 5290.027,
+ 'timestamp': 1570694400,
+ 'upload_date': '20191010',
+ 'creator': 'ID - Investigation Discovery',
+ 'series': 'Ted Bundy: Mind Of A Monster',
+ 'season_number': 1,
+ 'episode_number': 1,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # disco-api
+ 'url': 'https://www.dplay.no/videoer/i-kongens-klr/sesong-1-episode-7',
+ 'info_dict': {
+ 'id': '40206',
+ 'display_id': 'i-kongens-klr/sesong-1-episode-7',
+ 'ext': 'mp4',
+ 'title': 'Episode 7',
+ 'description': 'md5:e3e1411b2b9aebeea36a6ec5d50c60cf',
+ 'duration': 2611.16,
+ 'timestamp': 1516726800,
+ 'upload_date': '20180123',
+ 'series': 'I kongens klær',
+ 'season_number': 1,
+ 'episode_number': 7,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'Available for Premium users',
+ }, {
+ 'url': 'http://it.dplay.com/nove/biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij/',
+ 'md5': '2b808ffb00fc47b884a172ca5d13053c',
+ 'info_dict': {
+ 'id': '6918',
+ 'display_id': 'biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij',
+ 'ext': 'mp4',
+ 'title': 'Luigi Di Maio: la psicosi di Stanislawskij',
+ 'description': 'md5:3c7a4303aef85868f867a26f5cc14813',
+ 'thumbnail': r're:^https?://.*\.jpe?g',
+ 'upload_date': '20160524',
+ 'timestamp': 1464076800,
+ 'series': 'Biografie imbarazzanti',
+ 'season_number': 1,
+ 'episode': 'Episode 1',
+ 'episode_number': 1,
+ },
+ }, {
+ 'url': 'https://es.dplay.com/dmax/la-fiebre-del-oro/temporada-8-episodio-1/',
+ 'info_dict': {
+ 'id': '21652',
+ 'display_id': 'la-fiebre-del-oro/temporada-8-episodio-1',
+ 'ext': 'mp4',
+ 'title': 'Episodio 1',
+ 'description': 'md5:b9dcff2071086e003737485210675f69',
+ 'thumbnail': r're:^https?://.*\.png',
+ 'upload_date': '20180709',
+ 'timestamp': 1531173540,
+ 'series': 'La fiebre del oro',
+ 'season_number': 8,
+ 'episode': 'Episode 1',
+ 'episode_number': 1,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.dplay.fi/videot/shifting-gears-with-aaron-kaufman/episode-16',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.dplay.jp/video/gold-rush/24086',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.discoveryplus.se/videos/nugammalt-77-handelser-som-format-sverige/nugammalt-77-handelser-som-format-sverige-101',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.discoveryplus.dk/videoer/ted-bundy-mind-of-a-monster/ted-bundy-mind-of-a-monster',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.discoveryplus.no/videoer/i-kongens-klr/sesong-1-episode-7',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.discoveryplus.it/videos/biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.discoveryplus.es/videos/la-fiebre-del-oro/temporada-8-episodio-1',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.discoveryplus.fi/videot/shifting-gears-with-aaron-kaufman/episode-16',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ display_id = mobj.group('id')
+ domain = remove_start(mobj.group('domain'), 'www.')
+ country = mobj.group('country') or mobj.group('subdomain_country') or mobj.group('plus_country')
+ host = 'disco-api.' + domain if domain[0] == 'd' else 'eu2-prod.disco-api.com'
+ return self._get_disco_api_info(
+ url, display_id, host, 'dplay' + country, country, domain)
+
+
+class HGTVDeIE(DPlayBaseIE):
+ _VALID_URL = r'https?://de\.hgtv\.com/sendungen' + DPlayBaseIE._PATH_REGEX
+ _TESTS = [{
+ 'url': 'https://de.hgtv.com/sendungen/tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette/',
+ 'info_dict': {
+ 'id': '151205',
+ 'display_id': 'tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette',
+ 'ext': 'mp4',
+ 'title': 'Wer braucht schon eine Toilette',
+ 'description': 'md5:05b40a27e7aed2c9172de34d459134e2',
+ 'duration': 1177.024,
+ 'timestamp': 1595705400,
+ 'upload_date': '20200725',
+ 'creator': 'HGTV',
+ 'series': 'Tiny House - klein, aber oho',
+ 'season_number': 3,
+ 'episode_number': 3,
+ },
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ return self._get_disco_api_info(
+ url, display_id, 'eu1-prod.disco-api.com', 'hgtv', 'de')
+
+
+class DiscoveryPlusBaseIE(DPlayBaseIE):
+ def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
+ headers['x-disco-client'] = f'WEB:UNKNOWN:{self._PRODUCT}:25.2.6'
+
+ def _download_video_playback_info(self, disco_base, video_id, headers):
+ return self._download_json(
+ disco_base + 'playback/v3/videoPlaybackInfo',
+ video_id, headers=headers, data=json.dumps({
+ 'deviceInfo': {
+ 'adBlocker': False,
+ },
+ 'videoId': video_id,
+ 'wisteriaProperties': {
+ 'platform': 'desktop',
+ 'product': self._PRODUCT,
+ },
+ }).encode('utf-8'))['data']['attributes']['streaming']
+
+ def _real_extract(self, url):
+ return self._get_disco_api_info(url, self._match_id(url), **self._DISCO_API_PARAMS)
+
+
+class GoDiscoveryIE(DiscoveryPlusBaseIE):
+ _VALID_URL = r'https?://(?:go\.)?discovery\.com/video' + DPlayBaseIE._PATH_REGEX
+ _TESTS = [{
+ 'url': 'https://go.discovery.com/video/dirty-jobs-discovery-atve-us/rodbuster-galvanizer',
+ 'info_dict': {
+ 'id': '4164906',
+ 'display_id': 'dirty-jobs-discovery-atve-us/rodbuster-galvanizer',
+ 'ext': 'mp4',
+ 'title': 'Rodbuster / Galvanizer',
+ 'description': 'Mike installs rebar with a team of rodbusters, then he galvanizes steel.',
+ 'season_number': 9,
+ 'episode_number': 1,
+ },
+ 'skip': 'Available for Premium users',
+ }, {
+ 'url': 'https://discovery.com/video/dirty-jobs-discovery-atve-us/rodbuster-galvanizer',
+ 'only_matching': True,
+ }]
+
+ _PRODUCT = 'dsc'
+ _DISCO_API_PARAMS = {
+ 'disco_host': 'us1-prod-direct.go.discovery.com',
+ 'realm': 'go',
+ 'country': 'us',
+ }
+
+
+class TravelChannelIE(DiscoveryPlusBaseIE):
+ _VALID_URL = r'https?://(?:watch\.)?travelchannel\.com/video' + DPlayBaseIE._PATH_REGEX
+ _TESTS = [{
+ 'url': 'https://watch.travelchannel.com/video/ghost-adventures-travel-channel/ghost-train-of-ely',
+ 'info_dict': {
+ 'id': '2220256',
+ 'display_id': 'ghost-adventures-travel-channel/ghost-train-of-ely',
+ 'ext': 'mp4',
+ 'title': 'Ghost Train of Ely',
+ 'description': 'The crew investigates the dark history of the Nevada Northern Railway.',
+ 'season_number': 24,
+ 'episode_number': 1,
+ },
+ 'skip': 'Available for Premium users',
+ }, {
+ 'url': 'https://watch.travelchannel.com/video/ghost-adventures-travel-channel/ghost-train-of-ely',
+ 'only_matching': True,
+ }]
+
+ _PRODUCT = 'trav'
+ _DISCO_API_PARAMS = {
+ 'disco_host': 'us1-prod-direct.watch.travelchannel.com',
+ 'realm': 'go',
+ 'country': 'us',
+ }
+
+
+class CookingChannelIE(DiscoveryPlusBaseIE):
+ _VALID_URL = r'https?://(?:watch\.)?cookingchanneltv\.com/video' + DPlayBaseIE._PATH_REGEX
+ _TESTS = [{
+ 'url': 'https://watch.cookingchanneltv.com/video/carnival-eats-cooking-channel/the-postman-always-brings-rice-2348634',
+ 'info_dict': {
+ 'id': '2348634',
+ 'display_id': 'carnival-eats-cooking-channel/the-postman-always-brings-rice-2348634',
+ 'ext': 'mp4',
+ 'title': 'The Postman Always Brings Rice',
+ 'description': 'Noah visits the Maui Fair and the Aurora Winter Festival in Vancouver.',
+ 'season_number': 9,
+ 'episode_number': 1,
+ },
+ 'skip': 'Available for Premium users',
+ }, {
+ 'url': 'https://watch.cookingchanneltv.com/video/carnival-eats-cooking-channel/the-postman-always-brings-rice-2348634',
+ 'only_matching': True,
+ }]
+
+ _PRODUCT = 'cook'
+ _DISCO_API_PARAMS = {
+ 'disco_host': 'us1-prod-direct.watch.cookingchanneltv.com',
+ 'realm': 'go',
+ 'country': 'us',
+ }
+
+
+class HGTVUsaIE(DiscoveryPlusBaseIE):
+ _VALID_URL = r'https?://(?:watch\.)?hgtv\.com/video' + DPlayBaseIE._PATH_REGEX
+ _TESTS = [{
+ 'url': 'https://watch.hgtv.com/video/home-inspector-joe-hgtv-atve-us/this-mold-house',
+ 'info_dict': {
+ 'id': '4289736',
+ 'display_id': 'home-inspector-joe-hgtv-atve-us/this-mold-house',
+ 'ext': 'mp4',
+ 'title': 'This Mold House',
+ 'description': 'Joe and Noel help take a familys dream home from hazardous to fabulous.',
+ 'season_number': 1,
+ 'episode_number': 1,
+ },
+ 'skip': 'Available for Premium users',
+ }, {
+ 'url': 'https://watch.hgtv.com/video/home-inspector-joe-hgtv-atve-us/this-mold-house',
+ 'only_matching': True,
+ }]
+
+ _PRODUCT = 'hgtv'
+ _DISCO_API_PARAMS = {
+ 'disco_host': 'us1-prod-direct.watch.hgtv.com',
+ 'realm': 'go',
+ 'country': 'us',
+ }
+
+
+class FoodNetworkIE(DiscoveryPlusBaseIE):
+ _VALID_URL = r'https?://(?:watch\.)?foodnetwork\.com/video' + DPlayBaseIE._PATH_REGEX
+ _TESTS = [{
+ 'url': 'https://watch.foodnetwork.com/video/kids-baking-championship-food-network/float-like-a-butterfly',
+ 'info_dict': {
+ 'id': '4116449',
+ 'display_id': 'kids-baking-championship-food-network/float-like-a-butterfly',
+ 'ext': 'mp4',
+ 'title': 'Float Like a Butterfly',
+ 'description': 'The 12 kid bakers create colorful carved butterfly cakes.',
+ 'season_number': 10,
+ 'episode_number': 1,
+ },
+ 'skip': 'Available for Premium users',
+ }, {
+ 'url': 'https://watch.foodnetwork.com/video/kids-baking-championship-food-network/float-like-a-butterfly',
+ 'only_matching': True,
+ }]
+
+ _PRODUCT = 'food'
+ _DISCO_API_PARAMS = {
+ 'disco_host': 'us1-prod-direct.watch.foodnetwork.com',
+ 'realm': 'go',
+ 'country': 'us',
+ }
+
+
+class DestinationAmericaIE(DiscoveryPlusBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?destinationamerica\.com/video' + DPlayBaseIE._PATH_REGEX
+ _TESTS = [{
+ 'url': 'https://www.destinationamerica.com/video/alaska-monsters-destination-america-atve-us/central-alaskas-bigfoot',
+ 'info_dict': {
+ 'id': '4210904',
+ 'display_id': 'alaska-monsters-destination-america-atve-us/central-alaskas-bigfoot',
+ 'ext': 'mp4',
+ 'title': 'Central Alaskas Bigfoot',
+ 'description': 'A team heads to central Alaska to investigate an aggressive Bigfoot.',
+ 'season_number': 1,
+ 'episode_number': 1,
+ },
+ 'skip': 'Available for Premium users',
+ }, {
+ 'url': 'https://www.destinationamerica.com/video/alaska-monsters-destination-america-atve-us/central-alaskas-bigfoot',
+ 'only_matching': True,
+ }]
+
+ _PRODUCT = 'dam'
+ _DISCO_API_PARAMS = {
+ 'disco_host': 'us1-prod-direct.destinationamerica.com',
+ 'realm': 'go',
+ 'country': 'us',
+ }
+
+
+class InvestigationDiscoveryIE(DiscoveryPlusBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?investigationdiscovery\.com/video' + DPlayBaseIE._PATH_REGEX
+ _TESTS = [{
+ 'url': 'https://www.investigationdiscovery.com/video/unmasked-investigation-discovery/the-killer-clown',
+ 'info_dict': {
+ 'id': '2139409',
+ 'display_id': 'unmasked-investigation-discovery/the-killer-clown',
+ 'ext': 'mp4',
+ 'title': 'The Killer Clown',
+ 'description': 'A wealthy Florida woman is fatally shot in the face by a clown at her door.',
+ 'season_number': 1,
+ 'episode_number': 1,
+ },
+ 'skip': 'Available for Premium users',
+ }, {
+ 'url': 'https://www.investigationdiscovery.com/video/unmasked-investigation-discovery/the-killer-clown',
+ 'only_matching': True,
+ }]
+
+ _PRODUCT = 'ids'
+ _DISCO_API_PARAMS = {
+ 'disco_host': 'us1-prod-direct.investigationdiscovery.com',
+ 'realm': 'go',
+ 'country': 'us',
+ }
+
+
+class AmHistoryChannelIE(DiscoveryPlusBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?ahctv\.com/video' + DPlayBaseIE._PATH_REGEX
+ _TESTS = [{
+ 'url': 'https://www.ahctv.com/video/modern-sniper-ahc/army',
+ 'info_dict': {
+ 'id': '2309730',
+ 'display_id': 'modern-sniper-ahc/army',
+ 'ext': 'mp4',
+ 'title': 'Army',
+ 'description': 'Snipers today face challenges their predecessors couldve only dreamed of.',
+ 'season_number': 1,
+ 'episode_number': 1,
+ },
+ 'skip': 'Available for Premium users',
+ }, {
+ 'url': 'https://www.ahctv.com/video/modern-sniper-ahc/army',
+ 'only_matching': True,
+ }]
+
+ _PRODUCT = 'ahc'
+ _DISCO_API_PARAMS = {
+ 'disco_host': 'us1-prod-direct.ahctv.com',
+ 'realm': 'go',
+ 'country': 'us',
+ }
+
+
+class ScienceChannelIE(DiscoveryPlusBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?sciencechannel\.com/video' + DPlayBaseIE._PATH_REGEX
+ _TESTS = [{
+ 'url': 'https://www.sciencechannel.com/video/strangest-things-science-atve-us/nazi-mystery-machine',
+ 'info_dict': {
+ 'id': '2842849',
+ 'display_id': 'strangest-things-science-atve-us/nazi-mystery-machine',
+ 'ext': 'mp4',
+ 'title': 'Nazi Mystery Machine',
+ 'description': 'Experts investigate the secrets of a revolutionary encryption machine.',
+ 'season_number': 1,
+ 'episode_number': 1,
+ },
+ 'skip': 'Available for Premium users',
+ }, {
+ 'url': 'https://www.sciencechannel.com/video/strangest-things-science-atve-us/nazi-mystery-machine',
+ 'only_matching': True,
+ }]
+
+ _PRODUCT = 'sci'
+ _DISCO_API_PARAMS = {
+ 'disco_host': 'us1-prod-direct.sciencechannel.com',
+ 'realm': 'go',
+ 'country': 'us',
+ }
+
+
+class DIYNetworkIE(DiscoveryPlusBaseIE):
+ _VALID_URL = r'https?://(?:watch\.)?diynetwork\.com/video' + DPlayBaseIE._PATH_REGEX
+ _TESTS = [{
+ 'url': 'https://watch.diynetwork.com/video/pool-kings-diy-network/bringing-beach-life-to-texas',
+ 'info_dict': {
+ 'id': '2309730',
+ 'display_id': 'pool-kings-diy-network/bringing-beach-life-to-texas',
+ 'ext': 'mp4',
+ 'title': 'Bringing Beach Life to Texas',
+ 'description': 'The Pool Kings give a family a day at the beach in their own backyard.',
+ 'season_number': 10,
+ 'episode_number': 2,
+ },
+ 'skip': 'Available for Premium users',
+ }, {
+ 'url': 'https://watch.diynetwork.com/video/pool-kings-diy-network/bringing-beach-life-to-texas',
+ 'only_matching': True,
+ }]
+
+ _PRODUCT = 'diy'
+ _DISCO_API_PARAMS = {
+ 'disco_host': 'us1-prod-direct.watch.diynetwork.com',
+ 'realm': 'go',
+ 'country': 'us',
+ }
+
+
+class DiscoveryLifeIE(DiscoveryPlusBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?discoverylife\.com/video' + DPlayBaseIE._PATH_REGEX
+ _TESTS = [{
+ 'url': 'https://www.discoverylife.com/video/surviving-death-discovery-life-atve-us/bodily-trauma',
+ 'info_dict': {
+ 'id': '2218238',
+ 'display_id': 'surviving-death-discovery-life-atve-us/bodily-trauma',
+ 'ext': 'mp4',
+ 'title': 'Bodily Trauma',
+ 'description': 'Meet three people who tested the limits of the human body.',
+ 'season_number': 1,
+ 'episode_number': 2,
+ },
+ 'skip': 'Available for Premium users',
+ }, {
+ 'url': 'https://www.discoverylife.com/video/surviving-death-discovery-life-atve-us/bodily-trauma',
+ 'only_matching': True,
+ }]
+
+ _PRODUCT = 'dlf'
+ _DISCO_API_PARAMS = {
+ 'disco_host': 'us1-prod-direct.discoverylife.com',
+ 'realm': 'go',
+ 'country': 'us',
+ }
+
+
+class AnimalPlanetIE(DiscoveryPlusBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?animalplanet\.com/video' + DPlayBaseIE._PATH_REGEX
+ _TESTS = [{
+ 'url': 'https://www.animalplanet.com/video/north-woods-law-animal-planet/squirrel-showdown',
+ 'info_dict': {
+ 'id': '3338923',
+ 'display_id': 'north-woods-law-animal-planet/squirrel-showdown',
+ 'ext': 'mp4',
+ 'title': 'Squirrel Showdown',
+ 'description': 'A woman is suspected of being in possession of flying squirrel kits.',
+ 'season_number': 16,
+ 'episode_number': 11,
+ },
+ 'skip': 'Available for Premium users',
+ }, {
+ 'url': 'https://www.animalplanet.com/video/north-woods-law-animal-planet/squirrel-showdown',
+ 'only_matching': True,
+ }]
+
+ _PRODUCT = 'apl'
+ _DISCO_API_PARAMS = {
+ 'disco_host': 'us1-prod-direct.animalplanet.com',
+ 'realm': 'go',
+ 'country': 'us',
+ }
+
+
+class TLCIE(DiscoveryPlusBaseIE):
+ _VALID_URL = r'https?://(?:go\.)?tlc\.com/video' + DPlayBaseIE._PATH_REGEX
+ _TESTS = [{
+ 'url': 'https://go.tlc.com/video/my-600-lb-life-tlc/melissas-story-part-1',
+ 'info_dict': {
+ 'id': '2206540',
+ 'display_id': 'my-600-lb-life-tlc/melissas-story-part-1',
+ 'ext': 'mp4',
+ 'title': 'Melissas Story (Part 1)',
+ 'description': 'At 650 lbs, Melissa is ready to begin her seven-year weight loss journey.',
+ 'season_number': 1,
+ 'episode_number': 1,
+ },
+ 'skip': 'Available for Premium users',
+ }, {
+ 'url': 'https://go.tlc.com/video/my-600-lb-life-tlc/melissas-story-part-1',
+ 'only_matching': True,
+ }]
+
+ _PRODUCT = 'tlc'
+ _DISCO_API_PARAMS = {
+ 'disco_host': 'us1-prod-direct.tlc.com',
+ 'realm': 'go',
+ 'country': 'us',
+ }
+
+
+class MotorTrendIE(DiscoveryPlusBaseIE):
+ _VALID_URL = r'https?://(?:watch\.)?motortrend\.com/video' + DPlayBaseIE._PATH_REGEX
+ _TESTS = [{
+ 'url': 'https://watch.motortrend.com/video/car-issues-motortrend-atve-us/double-dakotas',
+ 'info_dict': {
+ 'id': '"4859182"',
+ 'display_id': 'double-dakotas',
+ 'ext': 'mp4',
+ 'title': 'Double Dakotas',
+ 'description': 'Tylers buy-one-get-one Dakota deal has the Wizard pulling double duty.',
+ 'season_number': 2,
+ 'episode_number': 3,
+ },
+ 'skip': 'Available for Premium users',
+ }, {
+ 'url': 'https://watch.motortrend.com/video/car-issues-motortrend-atve-us/double-dakotas',
+ 'only_matching': True,
+ }]
+
+ _PRODUCT = 'vel'
+ _DISCO_API_PARAMS = {
+ 'disco_host': 'us1-prod-direct.watch.motortrend.com',
+ 'realm': 'go',
+ 'country': 'us',
+ }
+
+
+class MotorTrendOnDemandIE(DiscoveryPlusBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?motortrend(?:ondemand\.com|\.com/plus)/detail' + DPlayBaseIE._PATH_REGEX
+ _TESTS = [{
+ 'url': 'https://www.motortrendondemand.com/detail/wheelstanding-dump-truck-stubby-bobs-comeback/37699/784',
+ 'info_dict': {
+ 'id': '37699',
+ 'display_id': 'wheelstanding-dump-truck-stubby-bobs-comeback/37699',
+ 'ext': 'mp4',
+ 'title': 'Wheelstanding Dump Truck! Stubby Bob’s Comeback',
+ 'description': 'md5:996915abe52a1c3dfc83aecea3cce8e7',
+ 'season_number': 5,
+ 'episode_number': 52,
+ 'episode': 'Episode 52',
+ 'season': 'Season 5',
+ 'thumbnail': r're:^https?://.+\.jpe?g$',
+ 'timestamp': 1388534401,
+ 'duration': 1887.345,
+ 'creator': 'Originals',
+ 'series': 'Roadkill',
+ 'upload_date': '20140101',
+ 'tags': [],
+ },
+ }, {
+ 'url': 'https://www.motortrend.com/plus/detail/roadworthy-rescues-teaser-trailer/4922860/',
+ 'info_dict': {
+ 'id': '4922860',
+ 'ext': 'mp4',
+ 'title': 'Roadworthy Rescues | Teaser Trailer',
+ 'description': 'Derek Bieri helps Freiburger and Finnegan with their \'68 big-block Dart.',
+ 'display_id': 'roadworthy-rescues-teaser-trailer/4922860',
+ 'creator': 'Originals',
+ 'series': 'Roadworthy Rescues',
+ 'thumbnail': r're:^https?://.+\.jpe?g$',
+ 'upload_date': '20220907',
+ 'timestamp': 1662523200,
+ 'duration': 1066.356,
+ 'tags': [],
+ },
+ }, {
+ 'url': 'https://www.motortrend.com/plus/detail/ugly-duckling/2450033/12439',
+ 'only_matching': True,
+ }]
+
+ _PRODUCT = 'MTOD'
+ _DISCO_API_PARAMS = {
+ 'disco_host': 'us1-prod-direct.motortrendondemand.com',
+ 'realm': 'motortrend',
+ 'country': 'us',
+ }
+
+ def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
+ headers.update({
+ 'x-disco-params': f'realm={realm}',
+ 'x-disco-client': f'WEB:UNKNOWN:{self._PRODUCT}:4.39.1-gi1',
+ 'Authorization': self._get_auth(disco_base, display_id, realm),
+ })
+
+
+class DiscoveryPlusIE(DiscoveryPlusBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/(?!it/)(?:\w{2}/)?video' + DPlayBaseIE._PATH_REGEX
+ _TESTS = [{
+ 'url': 'https://www.discoveryplus.com/video/property-brothers-forever-home/food-and-family',
+ 'info_dict': {
+ 'id': '1140794',
+ 'display_id': 'property-brothers-forever-home/food-and-family',
+ 'ext': 'mp4',
+ 'title': 'Food and Family',
+ 'description': 'The brothers help a Richmond family expand their single-level home.',
+ 'duration': 2583.113,
+ 'timestamp': 1609304400,
+ 'upload_date': '20201230',
+ 'creator': 'HGTV',
+ 'series': 'Property Brothers: Forever Home',
+ 'season_number': 1,
+ 'episode_number': 1,
+ },
+ 'skip': 'Available for Premium users',
+ }, {
+ 'url': 'https://discoveryplus.com/ca/video/bering-sea-gold-discovery-ca/goldslingers',
+ 'only_matching': True,
+ }]
+
+ _PRODUCT = 'dplus_us'
+ _DISCO_API_PARAMS = {
+ 'disco_host': 'us1-prod-direct.discoveryplus.com',
+ 'realm': 'go',
+ 'country': 'us',
+ }
+
+
+class DiscoveryPlusIndiaIE(DiscoveryPlusBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?discoveryplus\.in/videos?' + DPlayBaseIE._PATH_REGEX
+ _TESTS = [{
+ 'url': 'https://www.discoveryplus.in/videos/how-do-they-do-it/fugu-and-more?seasonId=8&type=EPISODE',
+ 'info_dict': {
+ 'id': '27104',
+ 'ext': 'mp4',
+ 'display_id': 'how-do-they-do-it/fugu-and-more',
+ 'title': 'Fugu and More',
+ 'description': 'The Japanese catch, prepare and eat the deadliest fish on the planet.',
+ 'duration': 1319.32,
+ 'timestamp': 1582309800,
+ 'upload_date': '20200221',
+ 'series': 'How Do They Do It?',
+ 'season_number': 8,
+ 'episode_number': 2,
+ 'creator': 'Discovery Channel',
+ 'thumbnail': r're:https://.+\.jpeg',
+ 'episode': 'Episode 2',
+ 'season': 'Season 8',
+ 'tags': [],
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }]
+
+ _PRODUCT = 'dplus-india'
+ _DISCO_API_PARAMS = {
+ 'disco_host': 'ap2-prod-direct.discoveryplus.in',
+ 'realm': 'dplusindia',
+ 'country': 'in',
+ 'domain': 'https://www.discoveryplus.in/',
+ }
+
+ def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
+ headers.update({
+ 'x-disco-params': 'realm=%s' % realm,
+ 'x-disco-client': f'WEB:UNKNOWN:{self._PRODUCT}:17.0.0',
+ 'Authorization': self._get_auth(disco_base, display_id, realm),
+ })
+
+
+class DiscoveryNetworksDeIE(DPlayBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show|sendungen)/(?P<programme>[^/]+)/(?:video/)?(?P<alternate_id>[^/]+)'
+
+ _TESTS = [{
+ 'url': 'https://www.tlc.de/programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100',
+ 'info_dict': {
+ 'id': '78867',
+ 'ext': 'mp4',
+ 'title': 'Die Welt da draußen',
+ 'description': 'md5:61033c12b73286e409d99a41742ef608',
+ 'timestamp': 1554069600,
+ 'upload_date': '20190331',
+ 'creator': 'TLC',
+ 'season': 'Season 1',
+ 'series': 'Breaking Amish',
+ 'episode_number': 1,
+ 'tags': ['new york', 'großstadt', 'amische', 'landleben', 'modern', 'infos', 'tradition', 'herausforderung'],
+ 'display_id': 'breaking-amish/die-welt-da-drauen',
+ 'episode': 'Episode 1',
+ 'duration': 2625.024,
+ 'season_number': 1,
+ 'thumbnail': r're:https://.+\.jpg',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.dmax.de/programme/dmax-highlights/video/tuning-star-sidney-hoffmann-exklusiv-bei-dmax/191023082312316',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.dplay.co.uk/show/ghost-adventures/video/hotel-leger-103620/EHD_280313B',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tlc.de/sendungen/breaking-amish/die-welt-da-drauen/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ domain, programme, alternate_id = self._match_valid_url(url).groups()
+ country = 'GB' if domain == 'dplay.co.uk' else 'DE'
+ realm = 'questuk' if country == 'GB' else domain.replace('.', '')
+ return self._get_disco_api_info(
+ url, '%s/%s' % (programme, alternate_id),
+ 'sonic-eu1-prod.disco-api.com', realm, country)
+
+
+class DiscoveryPlusShowBaseIE(DPlayBaseIE):
+
+ def _entries(self, show_name):
+ headers = {
+ 'x-disco-client': self._X_CLIENT,
+ 'x-disco-params': f'realm={self._REALM}',
+ 'referer': self._DOMAIN,
+ 'Authentication': self._get_auth(self._BASE_API, None, self._REALM),
+ }
+ show_json = self._download_json(
+ f'{self._BASE_API}cms/routes/{self._SHOW_STR}/{show_name}?include=default',
+ video_id=show_name, headers=headers)['included'][self._INDEX]['attributes']['component']
+ show_id = show_json['mandatoryParams'].split('=')[-1]
+ season_url = self._BASE_API + 'content/videos?sort=episodeNumber&filter[seasonNumber]={}&filter[show.id]={}&page[size]=100&page[number]={}'
+ for season in show_json['filters'][0]['options']:
+ season_id = season['id']
+ total_pages, page_num = 1, 0
+ while page_num < total_pages:
+ season_json = self._download_json(
+ season_url.format(season_id, show_id, str(page_num + 1)), show_name, headers=headers,
+ note='Downloading season %s JSON metadata%s' % (season_id, ' page %d' % page_num if page_num else ''))
+ if page_num == 0:
+ total_pages = try_get(season_json, lambda x: x['meta']['totalPages'], int) or 1
+ episodes_json = season_json['data']
+ for episode in episodes_json:
+ video_path = episode['attributes']['path']
+ yield self.url_result(
+ '%svideos/%s' % (self._DOMAIN, video_path),
+ ie=self._VIDEO_IE.ie_key(), video_id=episode.get('id') or video_path)
+ page_num += 1
+
+ def _real_extract(self, url):
+ show_name = self._match_valid_url(url).group('show_name')
+ return self.playlist_result(self._entries(show_name), playlist_id=show_name)
+
+
+class DiscoveryPlusItalyIE(DiscoveryPlusBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/it/video' + DPlayBaseIE._PATH_REGEX
+ _TESTS = [{
+ 'url': 'https://www.discoveryplus.com/it/video/i-signori-della-neve/stagione-2-episodio-1-i-preparativi',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.discoveryplus.com/it/video/super-benny/trailer',
+ 'only_matching': True,
+ }]
+
+ _PRODUCT = 'dplus_us'
+ _DISCO_API_PARAMS = {
+ 'disco_host': 'eu1-prod-direct.discoveryplus.com',
+ 'realm': 'dplay',
+ 'country': 'it',
+ }
+
+ def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
+ headers.update({
+ 'x-disco-params': 'realm=%s' % realm,
+ 'x-disco-client': f'WEB:UNKNOWN:{self._PRODUCT}:25.2.6',
+ 'Authorization': self._get_auth(disco_base, display_id, realm),
+ })
+
+
+class DiscoveryPlusItalyShowIE(DiscoveryPlusShowBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?discoveryplus\.it/programmi/(?P<show_name>[^/]+)/?(?:[?#]|$)'
+ _TESTS = [{
+ 'url': 'https://www.discoveryplus.it/programmi/deal-with-it-stai-al-gioco',
+ 'playlist_mincount': 168,
+ 'info_dict': {
+ 'id': 'deal-with-it-stai-al-gioco',
+ },
+ }]
+
+ _BASE_API = 'https://disco-api.discoveryplus.it/'
+ _DOMAIN = 'https://www.discoveryplus.it/'
+ _X_CLIENT = 'WEB:UNKNOWN:dplay-client:2.6.0'
+ _REALM = 'dplayit'
+ _SHOW_STR = 'programmi'
+ _INDEX = 1
+ _VIDEO_IE = DPlayIE
+
+
+class DiscoveryPlusIndiaShowIE(DiscoveryPlusShowBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?discoveryplus\.in/show/(?P<show_name>[^/]+)/?(?:[?#]|$)'
+ _TESTS = [{
+ 'url': 'https://www.discoveryplus.in/show/how-do-they-do-it',
+ 'playlist_mincount': 140,
+ 'info_dict': {
+ 'id': 'how-do-they-do-it',
+ },
+ }]
+
+ _BASE_API = 'https://ap2-prod-direct.discoveryplus.in/'
+ _DOMAIN = 'https://www.discoveryplus.in/'
+ _X_CLIENT = 'WEB:UNKNOWN:dplus-india:prod'
+ _REALM = 'dplusindia'
+ _SHOW_STR = 'show'
+ _INDEX = 4
+ _VIDEO_IE = DiscoveryPlusIndiaIE
+
+
+class GlobalCyclingNetworkPlusIE(DiscoveryPlusBaseIE):
+ _VALID_URL = r'https?://plus\.globalcyclingnetwork\.com/watch/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://plus.globalcyclingnetwork.com/watch/1397691',
+ 'info_dict': {
+ 'id': '1397691',
+ 'ext': 'mp4',
+ 'title': 'The Athertons: Mountain Biking\'s Fastest Family',
+ 'description': 'md5:75a81937fcd8b989eec6083a709cd837',
+ 'thumbnail': 'https://us1-prod-images.disco-api.com/2021/03/04/eb9e3026-4849-3001-8281-9356466f0557.png',
+ 'series': 'gcn',
+ 'creator': 'Gcn',
+ 'upload_date': '20210309',
+ 'timestamp': 1615248000,
+ 'duration': 2531.0,
+ 'tags': [],
+ },
+ 'skip': 'Subscription required',
+ 'params': {'skip_download': 'm3u8'},
+ }]
+
+ _PRODUCT = 'web'
+ _DISCO_API_PARAMS = {
+ 'disco_host': 'disco-api-prod.globalcyclingnetwork.com',
+ 'realm': 'gcn',
+ 'country': 'us',
+ }
+
+ def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
+ headers.update({
+ 'x-disco-params': f'realm={realm}',
+ 'x-disco-client': f'WEB:UNKNOWN:{self._PRODUCT}:27.3.2',
+ 'Authorization': self._get_auth(disco_base, display_id, realm),
+ })
diff --git a/yt_dlp/extractor/drbonanza.py b/yt_dlp/extractor/drbonanza.py
new file mode 100644
index 0000000..824d70d
--- /dev/null
+++ b/yt_dlp/extractor/drbonanza.py
@@ -0,0 +1,54 @@
+from .common import InfoExtractor
+from ..utils import (
+ js_to_json,
+ parse_duration,
+ unescapeHTML,
+)
+
+
+class DRBonanzaIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?dr\.dk/bonanza/[^/]+/\d+/[^/]+/(?P<id>\d+)/(?P<display_id>[^/?#&]+)'
+ _TEST = {
+ 'url': 'http://www.dr.dk/bonanza/serie/154/matador/40312/matador---0824-komme-fremmede-',
+ 'info_dict': {
+ 'id': '40312',
+ 'display_id': 'matador---0824-komme-fremmede-',
+ 'ext': 'mp4',
+ 'title': 'MATADOR - 08:24. "Komme fremmede".',
+ 'description': 'md5:77b4c1ac4d4c1b9d610ab4395212ff84',
+ 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$',
+ 'duration': 4613,
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id, display_id = mobj.group('id', 'display_id')
+
+ webpage = self._download_webpage(url, display_id)
+
+ info = self._parse_html5_media_entries(
+ url, webpage, display_id, m3u8_id='hls',
+ m3u8_entry_protocol='m3u8_native')[0]
+
+ asset = self._parse_json(
+ self._search_regex(
+ r'(?s)currentAsset\s*=\s*({.+?})\s*</script', webpage, 'asset'),
+ display_id, transform_source=js_to_json)
+
+ title = unescapeHTML(asset['AssetTitle']).strip()
+
+ def extract(field):
+ return self._search_regex(
+ r'<div[^>]+>\s*<p>%s:<p>\s*</div>\s*<div[^>]+>\s*<p>([^<]+)</p>' % field,
+ webpage, field, default=None)
+
+ info.update({
+ 'id': asset.get('AssetId') or video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': extract('Programinfo'),
+ 'duration': parse_duration(extract('Tid')),
+ 'thumbnail': asset.get('AssetImageUrl'),
+ })
+ return info
diff --git a/yt_dlp/extractor/dreisat.py b/yt_dlp/extractor/dreisat.py
new file mode 100644
index 0000000..8a59c23
--- /dev/null
+++ b/yt_dlp/extractor/dreisat.py
@@ -0,0 +1,41 @@
+from .zdf import ZDFIE
+
+
+class DreiSatIE(ZDFIE): # XXX: Do not subclass from concrete IE
+ IE_NAME = '3sat'
+ _VALID_URL = r'https?://(?:www\.)?3sat\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)\.html'
+ _TESTS = [{
+ # Same as https://www.zdf.de/dokumentation/ab-18/10-wochen-sommer-102.html
+ 'url': 'https://www.3sat.de/film/ab-18/10-wochen-sommer-108.html',
+ 'md5': '0aff3e7bc72c8813f5e0fae333316a1d',
+ 'info_dict': {
+ 'id': '141007_ab18_10wochensommer_film',
+ 'ext': 'mp4',
+ 'title': 'Ab 18! - 10 Wochen Sommer',
+ 'description': 'md5:8253f41dc99ce2c3ff892dac2d65fe26',
+ 'duration': 2660,
+ 'timestamp': 1608604200,
+ 'upload_date': '20201222',
+ },
+ }, {
+ 'url': 'https://www.3sat.de/gesellschaft/schweizweit/waidmannsheil-100.html',
+ 'info_dict': {
+ 'id': '140913_sendung_schweizweit',
+ 'ext': 'mp4',
+ 'title': 'Waidmannsheil',
+ 'description': 'md5:cce00ca1d70e21425e72c86a98a56817',
+ 'timestamp': 1410623100,
+ 'upload_date': '20140913'
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }, {
+ # Same as https://www.zdf.de/filme/filme-sonstige/der-hauptmann-112.html
+ 'url': 'https://www.3sat.de/film/spielfilm/der-hauptmann-100.html',
+ 'only_matching': True,
+ }, {
+ # Same as https://www.zdf.de/wissen/nano/nano-21-mai-2019-102.html, equal media ids
+ 'url': 'https://www.3sat.de/wissen/nano/nano-21-mai-2019-102.html',
+ 'only_matching': True,
+ }]
diff --git a/yt_dlp/extractor/drooble.py b/yt_dlp/extractor/drooble.py
new file mode 100644
index 0000000..106e5c4
--- /dev/null
+++ b/yt_dlp/extractor/drooble.py
@@ -0,0 +1,113 @@
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ try_get,
+)
+
+
+class DroobleIE(InfoExtractor):
+ _VALID_URL = r'''(?x)https?://drooble\.com/(?:
+ (?:(?P<user>[^/]+)/)?(?P<kind>song|videos|music/albums)/(?P<id>\d+)|
+ (?P<user_2>[^/]+)/(?P<kind_2>videos|music))
+ '''
+ _TESTS = [{
+ 'url': 'https://drooble.com/song/2858030',
+ 'md5': '5ffda90f61c7c318dc0c3df4179eb064',
+ 'info_dict': {
+ 'id': '2858030',
+ 'ext': 'mp3',
+ 'title': 'Skankocillin',
+ 'upload_date': '20200801',
+ 'timestamp': 1596241390,
+ 'uploader_id': '95894',
+ 'uploader': 'Bluebeat Shelter',
+ }
+ }, {
+ 'url': 'https://drooble.com/karl340758/videos/2859183',
+ 'info_dict': {
+ 'id': 'J6QCQY_I5Tk',
+ 'ext': 'mp4',
+ 'title': 'Skankocillin',
+ 'uploader_id': 'UCrSRoI5vVyeYihtWEYua7rg',
+ 'description': 'md5:ffc0bd8ba383db5341a86a6cd7d9bcca',
+ 'upload_date': '20200731',
+ 'uploader': 'Bluebeat Shelter',
+ }
+ }, {
+ 'url': 'https://drooble.com/karl340758/music/albums/2858031',
+ 'info_dict': {
+ 'id': '2858031',
+ },
+ 'playlist_mincount': 8,
+ }, {
+ 'url': 'https://drooble.com/karl340758/music',
+ 'info_dict': {
+ 'id': 'karl340758',
+ },
+ 'playlist_mincount': 8,
+ }, {
+ 'url': 'https://drooble.com/karl340758/videos',
+ 'info_dict': {
+ 'id': 'karl340758',
+ },
+ 'playlist_mincount': 8,
+ }]
+
+ def _call_api(self, method, video_id, data=None):
+ response = self._download_json(
+ f'https://drooble.com/api/dt/{method}', video_id, data=json.dumps(data).encode())
+ if not response[0]:
+ raise ExtractorError('Unable to download JSON metadata')
+ return response[1]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ user = mobj.group('user') or mobj.group('user_2')
+ kind = mobj.group('kind') or mobj.group('kind_2')
+ display_id = mobj.group('id') or user
+
+ if mobj.group('kind_2') == 'videos':
+ data = {'from_user': display_id, 'album': -1, 'limit': 18, 'offset': 0, 'order': 'new2old', 'type': 'video'}
+ elif kind in ('music/albums', 'music'):
+ data = {'user': user, 'public_only': True, 'individual_limit': {'singles': 1, 'albums': 1, 'playlists': 1}}
+ else:
+ data = {'url_slug': display_id, 'children': 10, 'order': 'old2new'}
+
+ method = 'getMusicOverview' if kind in ('music/albums', 'music') else 'getElements'
+ json_data = self._call_api(method, display_id, data=data)
+ if kind in ('music/albums', 'music'):
+ json_data = json_data['singles']['list']
+
+ entites = []
+ for media in json_data:
+ url = media.get('external_media_url') or media.get('link')
+ if url.startswith('https://www.youtube.com'):
+ entites.append({
+ '_type': 'url',
+ 'url': url,
+ 'ie_key': 'Youtube'
+ })
+ continue
+ is_audio = (media.get('type') or '').lower() == 'audio'
+ entites.append({
+ 'url': url,
+ 'id': media['id'],
+ 'title': media['title'],
+ 'duration': int_or_none(media.get('duration')),
+ 'timestamp': int_or_none(media.get('timestamp')),
+ 'album': try_get(media, lambda x: x['album']['title']),
+ 'uploader': try_get(media, lambda x: x['creator']['display_name']),
+ 'uploader_id': try_get(media, lambda x: x['creator']['id']),
+ 'thumbnail': media.get('image_comment'),
+ 'like_count': int_or_none(media.get('likes')),
+ 'vcodec': 'none' if is_audio else None,
+ 'ext': 'mp3' if is_audio else None,
+ })
+
+ if len(entites) > 1:
+ return self.playlist_result(entites, display_id)
+
+ return entites[0]
diff --git a/yt_dlp/extractor/dropbox.py b/yt_dlp/extractor/dropbox.py
new file mode 100644
index 0000000..bc2efce
--- /dev/null
+++ b/yt_dlp/extractor/dropbox.py
@@ -0,0 +1,90 @@
+import base64
+import os.path
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse_unquote
+from ..utils import (
+ ExtractorError,
+ update_url_query,
+ url_basename,
+)
+
+
+class DropboxIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?dropbox\.com/(?:(?:e/)?scl/fi|sh?)/(?P<id>\w+)'
+ _TESTS = [
+ {
+ 'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4?dl=0',
+ 'info_dict': {
+ 'id': 'nelirfsxnmcfbfh',
+ 'ext': 'mp4',
+ 'title': 'youtube-dl test video \'ä"BaW_jenozKc'
+ }
+ }, {
+ 'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.dropbox.com/sh/2mgpiuq7kv8nqdf/AABy-fW4dkydT4GmWi2mdOUDa?dl=0&preview=Drone+Shot.mp4',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.dropbox.com/scl/fi/r2kd2skcy5ylbbta5y1pz/DJI_0003.MP4?dl=0&rlkey=wcdgqangn7t3lnmmv6li9mu9h',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.dropbox.com/e/scl/fi/r2kd2skcy5ylbbta5y1pz/DJI_0003.MP4?dl=0&rlkey=wcdgqangn7t3lnmmv6li9mu9h',
+ 'only_matching': True,
+ },
+ ]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id')
+ webpage = self._download_webpage(url, video_id)
+ fn = compat_urllib_parse_unquote(url_basename(url))
+ title = os.path.splitext(fn)[0]
+
+ password = self.get_param('videopassword')
+ if (self._og_search_title(webpage) == 'Dropbox - Password Required'
+ or 'Enter the password for this link' in webpage):
+
+ if password:
+ content_id = self._search_regex(r'content_id=(.*?)["\']', webpage, 'content_id')
+ payload = f'is_xhr=true&t={self._get_cookies("https://www.dropbox.com").get("t").value}&content_id={content_id}&password={password}&url={url}'
+ response = self._download_json(
+ 'https://www.dropbox.com/sm/auth', video_id, 'POSTing video password', data=payload.encode('UTF-8'),
+ headers={'content-type': 'application/x-www-form-urlencoded; charset=UTF-8'})
+
+ if response.get('status') != 'authed':
+ raise ExtractorError('Authentication failed!', expected=True)
+ webpage = self._download_webpage(url, video_id)
+ elif self._get_cookies('https://dropbox.com').get('sm_auth'):
+ webpage = self._download_webpage(url, video_id)
+ else:
+ raise ExtractorError('Password protected video, use --video-password <password>', expected=True)
+
+ formats, subtitles, has_anonymous_download = [], {}, False
+ for encoded in reversed(re.findall(r'registerStreamedPrefetch\s*\(\s*"[\w/+=]+"\s*,\s*"([\w/+=]+)"', webpage)):
+ decoded = base64.b64decode(encoded).decode('utf-8', 'ignore')
+ transcode_url = self._search_regex(
+ r'\n.(https://[^\x03\x08\x12\n]+\.m3u8)', decoded, 'transcode url', default=None)
+ if not transcode_url:
+ continue
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id, 'mp4')
+ has_anonymous_download = self._search_regex(r'(anonymous:\tanonymous)', decoded, 'anonymous', default=False)
+ break
+
+ # downloads enabled we can get the original file
+ if has_anonymous_download:
+ formats.append({
+ 'url': update_url_query(url, {'dl': '1'}),
+ 'format_id': 'original',
+ 'format_note': 'Original',
+ 'quality': 1
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'subtitles': subtitles
+ }
diff --git a/yt_dlp/extractor/dropout.py b/yt_dlp/extractor/dropout.py
new file mode 100644
index 0000000..80ae6c1
--- /dev/null
+++ b/yt_dlp/extractor/dropout.py
@@ -0,0 +1,224 @@
+import functools
+
+from .common import InfoExtractor
+from .vimeo import VHXEmbedIE
+from ..utils import (
+ ExtractorError,
+ OnDemandPagedList,
+ clean_html,
+ extract_attributes,
+ get_element_by_class,
+ get_element_by_id,
+ get_elements_html_by_class,
+ int_or_none,
+ traverse_obj,
+ unified_strdate,
+ urlencode_postdata,
+)
+
+
+class DropoutIE(InfoExtractor):
+ _LOGIN_URL = 'https://www.dropout.tv/login'
+ _NETRC_MACHINE = 'dropout'
+
+ _VALID_URL = r'https?://(?:www\.)?dropout\.tv/(?:[^/]+/)*videos/(?P<id>[^/]+)/?$'
+ _TESTS = [
+ {
+ 'url': 'https://www.dropout.tv/game-changer/season:2/videos/yes-or-no',
+ 'note': 'Episode in a series',
+ 'md5': '5e000fdfd8d8fa46ff40456f1c2af04a',
+ 'info_dict': {
+ 'id': '738153',
+ 'display_id': 'yes-or-no',
+ 'ext': 'mp4',
+ 'title': 'Yes or No',
+ 'description': 'Ally, Brennan, and Zac are asked a simple question, but is there a correct answer?',
+ 'release_date': '20200508',
+ 'thumbnail': 'https://vhx.imgix.net/chuncensoredstaging/assets/351e3f24-c4a3-459a-8b79-dc80f1e5b7fd.jpg',
+ 'series': 'Game Changer',
+ 'season_number': 2,
+ 'season': 'Season 2',
+ 'episode_number': 6,
+ 'episode': 'Yes or No',
+ 'duration': 1180,
+ 'uploader_id': 'user80538407',
+ 'uploader_url': 'https://vimeo.com/user80538407',
+ 'uploader': 'OTT Videos'
+ },
+ 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest']
+ },
+ {
+ 'url': 'https://www.dropout.tv/dimension-20-fantasy-high/season:1/videos/episode-1',
+ 'note': 'Episode in a series (missing release_date)',
+ 'md5': '712caf7c191f1c47c8f1879520c2fa5c',
+ 'info_dict': {
+ 'id': '320562',
+ 'display_id': 'episode-1',
+ 'ext': 'mp4',
+ 'title': 'The Beginning Begins',
+ 'description': 'The cast introduces their PCs, including a neurotic elf, a goblin PI, and a corn-worshipping cleric.',
+ 'thumbnail': 'https://vhx.imgix.net/chuncensoredstaging/assets/4421ed0d-f630-4c88-9004-5251b2b8adfa.jpg',
+ 'series': 'Dimension 20: Fantasy High',
+ 'season_number': 1,
+ 'season': 'Season 1',
+ 'episode_number': 1,
+ 'episode': 'The Beginning Begins',
+ 'duration': 6838,
+ 'uploader_id': 'user80538407',
+ 'uploader_url': 'https://vimeo.com/user80538407',
+ 'uploader': 'OTT Videos'
+ },
+ 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest']
+ },
+ {
+ 'url': 'https://www.dropout.tv/videos/misfits-magic-holiday-special',
+ 'note': 'Episode not in a series',
+ 'md5': 'c30fa18999c5880d156339f13c953a26',
+ 'info_dict': {
+ 'id': '1915774',
+ 'display_id': 'misfits-magic-holiday-special',
+ 'ext': 'mp4',
+ 'title': 'Misfits & Magic Holiday Special',
+ 'description': 'The magical misfits spend Christmas break at Gowpenny, with an unwelcome visitor.',
+ 'release_date': '20211215',
+ 'thumbnail': 'https://vhx.imgix.net/chuncensoredstaging/assets/d91ea8a6-b250-42ed-907e-b30fb1c65176-8e24b8e5.jpg',
+ 'duration': 11698,
+ 'uploader_id': 'user80538407',
+ 'uploader_url': 'https://vimeo.com/user80538407',
+ 'uploader': 'OTT Videos'
+ },
+ 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest']
+ }
+ ]
+
+ def _get_authenticity_token(self, display_id):
+ signin_page = self._download_webpage(
+ self._LOGIN_URL, display_id, note='Getting authenticity token')
+ return self._html_search_regex(
+ r'name=["\']authenticity_token["\'] value=["\'](.+?)["\']',
+ signin_page, 'authenticity_token')
+
+ def _login(self, display_id):
+ username, password = self._get_login_info()
+ if not username:
+ return True
+
+ response = self._download_webpage(
+ self._LOGIN_URL, display_id, note='Logging in', fatal=False,
+ data=urlencode_postdata({
+ 'email': username,
+ 'password': password,
+ 'authenticity_token': self._get_authenticity_token(display_id),
+ 'utf8': True
+ }))
+
+ user_has_subscription = self._search_regex(
+ r'user_has_subscription:\s*["\'](.+?)["\']', response, 'subscription status', default='none')
+ if user_has_subscription.lower() == 'true':
+ return
+ elif user_has_subscription.lower() == 'false':
+ return 'Account is not subscribed'
+ else:
+ return 'Incorrect username/password'
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = None
+ if self._get_cookies('https://www.dropout.tv').get('_session'):
+ webpage = self._download_webpage(url, display_id)
+ if not webpage or '<div id="watch-unauthorized"' in webpage:
+ login_err = self._login(display_id)
+ webpage = self._download_webpage(url, display_id)
+ if login_err and '<div id="watch-unauthorized"' in webpage:
+ if login_err is True:
+ self.raise_login_required(method='any')
+ raise ExtractorError(login_err, expected=True)
+
+ embed_url = self._search_regex(r'embed_url:\s*["\'](.+?)["\']', webpage, 'embed url')
+ thumbnail = self._og_search_thumbnail(webpage)
+ watch_info = get_element_by_id('watch-info', webpage) or ''
+
+ title = clean_html(get_element_by_class('video-title', watch_info))
+ season_episode = get_element_by_class(
+ 'site-font-secondary-color', get_element_by_class('text', watch_info))
+ episode_number = int_or_none(self._search_regex(
+ r'Episode (\d+)', season_episode or '', 'episode', default=None))
+
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': VHXEmbedIE.ie_key(),
+ 'url': VHXEmbedIE._smuggle_referrer(embed_url, 'https://www.dropout.tv'),
+ 'id': self._search_regex(r'embed\.vhx\.tv/videos/(.+?)\?', embed_url, 'id'),
+ 'display_id': display_id,
+ 'title': title,
+ 'description': self._html_search_meta('description', webpage, fatal=False),
+ 'thumbnail': thumbnail.split('?')[0] if thumbnail else None, # Ignore crop/downscale
+ 'series': clean_html(get_element_by_class('series-title', watch_info)),
+ 'episode_number': episode_number,
+ 'episode': title if episode_number else None,
+ 'season_number': int_or_none(self._search_regex(
+ r'Season (\d+),', season_episode or '', 'season', default=None)),
+ 'release_date': unified_strdate(self._search_regex(
+ r'data-meta-field-name=["\']release_dates["\'] data-meta-field-value=["\'](.+?)["\']',
+ watch_info, 'release date', default=None)),
+ }
+
+
+class DropoutSeasonIE(InfoExtractor):
+ _PAGE_SIZE = 24
+ _VALID_URL = r'https?://(?:www\.)?dropout\.tv/(?P<id>[^\/$&?#]+)(?:/?$|/season:(?P<season>[0-9]+)/?$)'
+ _TESTS = [
+ {
+ 'url': 'https://www.dropout.tv/dimension-20-fantasy-high/season:1',
+ 'note': 'Multi-season series with the season in the url',
+ 'playlist_count': 24,
+ 'info_dict': {
+ 'id': 'dimension-20-fantasy-high-season-1',
+ 'title': 'Dimension 20 Fantasy High - Season 1'
+ }
+ },
+ {
+ 'url': 'https://www.dropout.tv/dimension-20-fantasy-high',
+ 'note': 'Multi-season series with the season not in the url',
+ 'playlist_count': 24,
+ 'info_dict': {
+ 'id': 'dimension-20-fantasy-high-season-1',
+ 'title': 'Dimension 20 Fantasy High - Season 1'
+ }
+ },
+ {
+ 'url': 'https://www.dropout.tv/dimension-20-shriek-week',
+ 'note': 'Single-season series',
+ 'playlist_count': 4,
+ 'info_dict': {
+ 'id': 'dimension-20-shriek-week-season-1',
+ 'title': 'Dimension 20 Shriek Week - Season 1'
+ }
+ },
+ {
+ 'url': 'https://www.dropout.tv/breaking-news-no-laugh-newsroom/season:3',
+ 'note': 'Multi-season series with season in the url that requires pagination',
+ 'playlist_count': 25,
+ 'info_dict': {
+ 'id': 'breaking-news-no-laugh-newsroom-season-3',
+ 'title': 'Breaking News No Laugh Newsroom - Season 3'
+ }
+ }
+ ]
+
+ def _fetch_page(self, url, season_id, page):
+ page += 1
+ webpage = self._download_webpage(
+ f'{url}?page={page}', season_id, note=f'Downloading page {page}', expected_status={400})
+ yield from [self.url_result(item_url, DropoutIE) for item_url in traverse_obj(
+ get_elements_html_by_class('browse-item-link', webpage), (..., {extract_attributes}, 'href'))]
+
+ def _real_extract(self, url):
+ season_id = self._match_id(url)
+ season_num = self._match_valid_url(url).group('season') or 1
+ season_title = season_id.replace('-', ' ').title()
+
+ return self.playlist_result(
+ OnDemandPagedList(functools.partial(self._fetch_page, url, season_id), self._PAGE_SIZE),
+ f'{season_id}-season-{season_num}', f'{season_title} - Season {season_num}')
diff --git a/yt_dlp/extractor/drtuber.py b/yt_dlp/extractor/drtuber.py
new file mode 100644
index 0000000..e5dab6a
--- /dev/null
+++ b/yt_dlp/extractor/drtuber.py
@@ -0,0 +1,104 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ NO_DEFAULT,
+ parse_duration,
+ str_to_int,
+)
+
+
+class DrTuberIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:www|m)\.)?drtuber\.com/(?:video|embed)/(?P<id>\d+)(?:/(?P<display_id>[\w-]+))?'
+ _EMBED_REGEX = [r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?drtuber\.com/embed/\d+)']
+ _TESTS = [{
+ 'url': 'http://www.drtuber.com/video/1740434/hot-perky-blonde-naked-golf',
+ 'md5': '93e680cf2536ad0dfb7e74d94a89facd',
+ 'info_dict': {
+ 'id': '1740434',
+ 'display_id': 'hot-perky-blonde-naked-golf',
+ 'ext': 'mp4',
+ 'title': 'hot perky blonde naked golf',
+ 'like_count': int,
+ 'comment_count': int,
+ 'categories': ['Babe', 'Blonde', 'Erotic', 'Outdoor', 'Softcore', 'Solo'],
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'age_limit': 18,
+ }
+ }, {
+ 'url': 'http://www.drtuber.com/embed/489939',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://m.drtuber.com/video/3893529/lingerie-blowjob-from-beautiful-teen',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id') or video_id
+
+ webpage = self._download_webpage(
+ 'http://www.drtuber.com/video/%s' % video_id, display_id)
+
+ video_data = self._download_json(
+ 'http://www.drtuber.com/player_config_json/', video_id, query={
+ 'vid': video_id,
+ 'embed': 0,
+ 'aid': 0,
+ 'domain_id': 0,
+ })
+
+ formats = []
+ for format_id, video_url in video_data['files'].items():
+ if video_url:
+ formats.append({
+ 'format_id': format_id,
+ 'quality': 2 if format_id == 'hq' else 1,
+ 'url': video_url
+ })
+
+ duration = int_or_none(video_data.get('duration')) or parse_duration(
+ video_data.get('duration_format'))
+
+ title = self._html_search_regex(
+ (r'<h1[^>]+class=["\']title[^>]+>([^<]+)',
+ r'<title>([^<]+)\s*@\s+DrTuber',
+ r'class="title_watch"[^>]*><(?:p|h\d+)[^>]*>([^<]+)<',
+ r'<p[^>]+class="title_substrate">([^<]+)</p>',
+ r'<title>([^<]+) - \d+'),
+ webpage, 'title')
+
+ thumbnail = self._html_search_regex(
+ r'poster="([^"]+)"',
+ webpage, 'thumbnail', fatal=False)
+
+ def extract_count(id_, name, default=NO_DEFAULT):
+ return str_to_int(self._html_search_regex(
+ r'<span[^>]+(?:class|id)="%s"[^>]*>([\d,\.]+)</span>' % id_,
+ webpage, '%s count' % name, default=default, fatal=False))
+
+ like_count = extract_count('rate_likes', 'like')
+ dislike_count = extract_count('rate_dislikes', 'dislike', default=None)
+ comment_count = extract_count('comments_count', 'comment')
+
+ cats_str = self._search_regex(
+ r'<div[^>]+class="categories_list">(.+?)</div>',
+ webpage, 'categories', fatal=False)
+ categories = [] if not cats_str else re.findall(
+ r'<a title="([^"]+)"', cats_str)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'formats': formats,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'like_count': like_count,
+ 'dislike_count': dislike_count,
+ 'comment_count': comment_count,
+ 'categories': categories,
+ 'age_limit': self._rta_search(webpage),
+ 'duration': duration,
+ }
diff --git a/yt_dlp/extractor/drtv.py b/yt_dlp/extractor/drtv.py
new file mode 100644
index 0000000..2a6e337
--- /dev/null
+++ b/yt_dlp/extractor/drtv.py
@@ -0,0 +1,401 @@
+import json
+import uuid
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ mimetype2ext,
+ parse_iso8601,
+ try_call,
+ update_url_query,
+ url_or_none,
+)
+from ..utils.traversal import traverse_obj
+
+SERIES_API = 'https://production-cdn.dr-massive.com/api/page?device=web_browser&item_detail_expand=all&lang=da&max_list_prefetch=3&path=%s'
+
+
+class DRTVIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:www\.)?dr\.dk/tv/se(?:/ondemand)?/(?:[^/?#]+/)*|
+ (?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/(?:se|episode|program)/
+ )
+ (?P<id>[\da-z_-]+)
+ '''
+ _GEO_BYPASS = False
+ _GEO_COUNTRIES = ['DK']
+ IE_NAME = 'drtv'
+ _TESTS = [{
+ 'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10',
+ 'md5': '25e659cccc9a2ed956110a299fdf5983',
+ 'info_dict': {
+ 'id': 'klassen-darlig-taber-10',
+ 'ext': 'mp4',
+ 'title': 'Klassen - Dårlig taber (10)',
+ 'description': 'md5:815fe1b7fa656ed80580f31e8b3c79aa',
+ 'timestamp': 1539085800,
+ 'upload_date': '20181009',
+ 'duration': 606.84,
+ 'series': 'Klassen',
+ 'season': 'Klassen I',
+ 'season_number': 1,
+ 'season_id': 'urn:dr:mu:bundle:57d7e8216187a4031cfd6f6b',
+ 'episode': 'Episode 10',
+ 'episode_number': 10,
+ 'release_year': 2016,
+ },
+ 'expected_warnings': ['Unable to download f4m manifest'],
+ 'skip': 'this video has been removed',
+ }, {
+ # with SignLanguage formats
+ 'url': 'https://www.dr.dk/tv/se/historien-om-danmark/-/historien-om-danmark-stenalder',
+ 'info_dict': {
+ 'id': '00831690010',
+ 'ext': 'mp4',
+ 'title': 'Historien om Danmark: Stenalder',
+ 'description': 'md5:8c66dcbc1669bbc6f873879880f37f2a',
+ 'timestamp': 1546628400,
+ 'upload_date': '20190104',
+ 'duration': 3504.619,
+ 'formats': 'mincount:20',
+ 'release_year': 2017,
+ 'season_id': 'urn:dr:mu:bundle:5afc03ad6187a4065ca5fd35',
+ 'season_number': 1,
+ 'season': 'Historien om Danmark',
+ 'series': 'Historien om Danmark',
+ },
+ 'skip': 'this video has been removed',
+ }, {
+ 'url': 'https://www.dr.dk/drtv/se/frank-and-kastaniegaarden_71769',
+ 'info_dict': {
+ 'id': '00951930010',
+ 'ext': 'mp4',
+ 'title': 'Frank & Kastaniegaarden',
+ 'description': 'md5:974e1780934cf3275ef10280204bccb0',
+ 'release_timestamp': 1546545600,
+ 'release_date': '20190103',
+ 'duration': 2576,
+ 'season': 'Frank & Kastaniegaarden',
+ 'season_id': '67125',
+ 'release_year': 2019,
+ 'season_number': 2019,
+ 'series': 'Frank & Kastaniegaarden',
+ 'episode_number': 1,
+ 'episode': 'Frank & Kastaniegaarden',
+ 'thumbnail': r're:https?://.+',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # Foreign and Regular subtitle track
+ 'url': 'https://www.dr.dk/drtv/se/spise-med-price_-pasta-selv_397445',
+ 'info_dict': {
+ 'id': '00212301010',
+ 'ext': 'mp4',
+ 'episode_number': 1,
+ 'title': 'Spise med Price: Pasta Selv',
+ 'alt_title': '1. Pasta Selv',
+ 'release_date': '20230807',
+ 'description': 'md5:2da9060524fed707810d71080b3d0cd8',
+ 'duration': 1750,
+ 'season': 'Spise med Price',
+ 'release_timestamp': 1691438400,
+ 'season_id': '397440',
+ 'episode': 'Spise med Price: Pasta Selv',
+ 'thumbnail': r're:https?://.+',
+ 'season_number': 15,
+ 'series': 'Spise med Price',
+ 'release_year': 2022,
+ 'subtitles': 'mincount:2',
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ }, {
+ 'url': 'https://www.dr.dk/drtv/episode/bonderoeven_71769',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://dr-massive.com/drtv/se/bonderoeven_71769',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.dr.dk/drtv/program/jagten_220924',
+ 'only_matching': True,
+ }]
+
+ SUBTITLE_LANGS = {
+ 'DanishLanguageSubtitles': 'da',
+ 'ForeignLanguageSubtitles': 'da_foreign',
+ 'CombinedLanguageSubtitles': 'da_combined',
+ }
+
+ _TOKEN = None
+
+ def _real_initialize(self):
+ if self._TOKEN:
+ return
+
+ token_response = self._download_json(
+ 'https://production.dr-massive.com/api/authorization/anonymous-sso', None,
+ note='Downloading anonymous token', headers={
+ 'content-type': 'application/json',
+ }, query={
+ 'device': 'web_browser',
+ 'ff': 'idp,ldp,rpt',
+ 'lang': 'da',
+ 'supportFallbackToken': 'true',
+ }, data=json.dumps({
+ 'deviceId': str(uuid.uuid4()),
+ 'scopes': ['Catalog'],
+ 'optout': True,
+ }).encode())
+
+ self._TOKEN = traverse_obj(
+ token_response, (lambda _, x: x['type'] == 'UserAccount', 'value', {str}), get_all=False)
+ if not self._TOKEN:
+ raise ExtractorError('Unable to get anonymous token')
+
+ def _real_extract(self, url):
+ url_slug = self._match_id(url)
+ webpage = self._download_webpage(url, url_slug)
+
+ json_data = self._search_json(
+ r'window\.__data\s*=', webpage, 'data', url_slug, fatal=False) or {}
+ item = traverse_obj(
+ json_data, ('cache', 'page', ..., (None, ('entries', 0)), 'item', {dict}), get_all=False)
+ if item:
+ item_id = item.get('id')
+ else:
+ item_id = url_slug.rsplit('_', 1)[-1]
+ item = self._download_json(
+ f'https://production-cdn.dr-massive.com/api/items/{item_id}', item_id,
+ note='Attempting to download backup item data', query={
+ 'device': 'web_browser',
+ 'expand': 'all',
+ 'ff': 'idp,ldp,rpt',
+ 'geoLocation': 'dk',
+ 'isDeviceAbroad': 'false',
+ 'lang': 'da',
+ 'segments': 'drtv,optedout',
+ 'sub': 'Anonymous',
+ })
+
+ video_id = try_call(lambda: item['customId'].rsplit(':', 1)[-1]) or item_id
+ stream_data = self._download_json(
+ f'https://production.dr-massive.com/api/account/items/{item_id}/videos', video_id,
+ note='Downloading stream data', query={
+ 'delivery': 'stream',
+ 'device': 'web_browser',
+ 'ff': 'idp,ldp,rpt',
+ 'lang': 'da',
+ 'resolution': 'HD-1080',
+ 'sub': 'Anonymous',
+ }, headers={'authorization': f'Bearer {self._TOKEN}'})
+
+ formats = []
+ subtitles = {}
+ for stream in traverse_obj(stream_data, (lambda _, x: x['url'])):
+ format_id = stream.get('format', 'na')
+ access_service = stream.get('accessService')
+ preference = None
+ subtitle_suffix = ''
+ if access_service in ('SpokenSubtitles', 'SignLanguage', 'VisuallyInterpreted'):
+ preference = -1
+ format_id += f'-{access_service}'
+ subtitle_suffix = f'-{access_service}'
+ elif access_service == 'StandardVideo':
+ preference = 1
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ stream.get('url'), video_id, ext='mp4', preference=preference, m3u8_id=format_id, fatal=False)
+ formats.extend(fmts)
+
+ api_subtitles = traverse_obj(stream, ('subtitles', lambda _, v: url_or_none(v['link']), {dict}))
+ if not api_subtitles:
+ self._merge_subtitles(subs, target=subtitles)
+
+ for sub_track in api_subtitles:
+ lang = sub_track.get('language') or 'da'
+ subtitles.setdefault(self.SUBTITLE_LANGS.get(lang, lang) + subtitle_suffix, []).append({
+ 'url': sub_track['link'],
+ 'ext': mimetype2ext(sub_track.get('format')) or 'vtt'
+ })
+
+ if not formats and traverse_obj(item, ('season', 'customFields', 'IsGeoRestricted')):
+ self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ **traverse_obj(item, {
+ 'title': 'title',
+ 'alt_title': 'contextualTitle',
+ 'description': 'description',
+ 'thumbnail': ('images', 'wallpaper'),
+ 'release_timestamp': ('customFields', 'BroadcastTimeDK', {parse_iso8601}),
+ 'duration': ('duration', {int_or_none}),
+ 'series': ('season', 'show', 'title'),
+ 'season': ('season', 'title'),
+ 'season_number': ('season', 'seasonNumber', {int_or_none}),
+ 'season_id': 'seasonId',
+ 'episode': 'episodeName',
+ 'episode_number': ('episodeNumber', {int_or_none}),
+ 'release_year': ('releaseYear', {int_or_none}),
+ }),
+ }
+
+
+class DRTVLiveIE(InfoExtractor):
+ IE_NAME = 'drtv:live'
+ _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv|TV)/live/(?P<id>[\da-z-]+)'
+ _GEO_COUNTRIES = ['DK']
+ _TEST = {
+ 'url': 'https://www.dr.dk/tv/live/dr1',
+ 'info_dict': {
+ 'id': 'dr1',
+ 'ext': 'mp4',
+ 'title': 're:^DR1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+ channel_data = self._download_json(
+ 'https://www.dr.dk/mu-online/api/1.0/channel/' + channel_id,
+ channel_id)
+ title = channel_data['Title']
+
+ formats = []
+ for streaming_server in channel_data.get('StreamingServers', []):
+ server = streaming_server.get('Server')
+ if not server:
+ continue
+ link_type = streaming_server.get('LinkType')
+ for quality in streaming_server.get('Qualities', []):
+ for stream in quality.get('Streams', []):
+ stream_path = stream.get('Stream')
+ if not stream_path:
+ continue
+ stream_url = update_url_query(
+ '%s/%s' % (server, stream_path), {'b': ''})
+ if link_type == 'HLS':
+ formats.extend(self._extract_m3u8_formats(
+ stream_url, channel_id, 'mp4',
+ m3u8_id=link_type, fatal=False, live=True))
+ elif link_type == 'HDS':
+ formats.extend(self._extract_f4m_formats(update_url_query(
+ '%s/%s' % (server, stream_path), {'hdcore': '3.7.0'}),
+ channel_id, f4m_id=link_type, fatal=False))
+
+ return {
+ 'id': channel_id,
+ 'title': title,
+ 'thumbnail': channel_data.get('PrimaryImageUri'),
+ 'formats': formats,
+ 'is_live': True,
+ }
+
+
+class DRTVSeasonIE(InfoExtractor):
+ IE_NAME = 'drtv:season'
+ _VALID_URL = r'https?://(?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/saeson/(?P<display_id>[\w-]+)_(?P<id>\d+)'
+ _GEO_COUNTRIES = ['DK']
+ _TESTS = [{
+ 'url': 'https://www.dr.dk/drtv/saeson/frank-and-kastaniegaarden_9008',
+ 'info_dict': {
+ 'id': '9008',
+ 'display_id': 'frank-and-kastaniegaarden',
+ 'title': 'Frank & Kastaniegaarden',
+ 'series': 'Frank & Kastaniegaarden',
+ 'season_number': 2008,
+ 'alt_title': 'Season 2008',
+ },
+ 'playlist_mincount': 8
+ }, {
+ 'url': 'https://www.dr.dk/drtv/saeson/frank-and-kastaniegaarden_8761',
+ 'info_dict': {
+ 'id': '8761',
+ 'display_id': 'frank-and-kastaniegaarden',
+ 'title': 'Frank & Kastaniegaarden',
+ 'series': 'Frank & Kastaniegaarden',
+ 'season_number': 2009,
+ 'alt_title': 'Season 2009',
+ },
+ 'playlist_mincount': 19
+ }]
+
+ def _real_extract(self, url):
+ display_id, season_id = self._match_valid_url(url).group('display_id', 'id')
+ data = self._download_json(SERIES_API % f'/saeson/{display_id}_{season_id}', display_id)
+
+ entries = [{
+ '_type': 'url',
+ 'url': f'https://www.dr.dk/drtv{episode["path"]}',
+ 'ie_key': DRTVIE.ie_key(),
+ 'title': episode.get('title'),
+ 'alt_title': episode.get('contextualTitle'),
+ 'episode': episode.get('episodeName'),
+ 'description': episode.get('shortDescription'),
+ 'series': traverse_obj(data, ('entries', 0, 'item', 'title')),
+ 'season_number': traverse_obj(data, ('entries', 0, 'item', 'seasonNumber')),
+ 'episode_number': episode.get('episodeNumber'),
+ } for episode in traverse_obj(data, ('entries', 0, 'item', 'episodes', 'items'))]
+
+ return {
+ '_type': 'playlist',
+ 'id': season_id,
+ 'display_id': display_id,
+ 'title': traverse_obj(data, ('entries', 0, 'item', 'title')),
+ 'alt_title': traverse_obj(data, ('entries', 0, 'item', 'contextualTitle')),
+ 'series': traverse_obj(data, ('entries', 0, 'item', 'title')),
+ 'entries': entries,
+ 'season_number': traverse_obj(data, ('entries', 0, 'item', 'seasonNumber'))
+ }
+
+
+class DRTVSeriesIE(InfoExtractor):
+ IE_NAME = 'drtv:series'
+ _VALID_URL = r'https?://(?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/serie/(?P<display_id>[\w-]+)_(?P<id>\d+)'
+ _GEO_COUNTRIES = ['DK']
+ _TESTS = [{
+ 'url': 'https://www.dr.dk/drtv/serie/frank-and-kastaniegaarden_6954',
+ 'info_dict': {
+ 'id': '6954',
+ 'display_id': 'frank-and-kastaniegaarden',
+ 'title': 'Frank & Kastaniegaarden',
+ 'series': 'Frank & Kastaniegaarden',
+ 'alt_title': '',
+ },
+ 'playlist_mincount': 15
+ }]
+
+ def _real_extract(self, url):
+ display_id, series_id = self._match_valid_url(url).group('display_id', 'id')
+ data = self._download_json(SERIES_API % f'/serie/{display_id}_{series_id}', display_id)
+
+ entries = [{
+ '_type': 'url',
+ 'url': f'https://www.dr.dk/drtv{season.get("path")}',
+ 'ie_key': DRTVSeasonIE.ie_key(),
+ 'title': season.get('title'),
+ 'alt_title': season.get('contextualTitle'),
+ 'series': traverse_obj(data, ('entries', 0, 'item', 'title')),
+ 'season_number': traverse_obj(data, ('entries', 0, 'item', 'seasonNumber'))
+ } for season in traverse_obj(data, ('entries', 0, 'item', 'show', 'seasons', 'items'))]
+
+ return {
+ '_type': 'playlist',
+ 'id': series_id,
+ 'display_id': display_id,
+ 'title': traverse_obj(data, ('entries', 0, 'item', 'title')),
+ 'alt_title': traverse_obj(data, ('entries', 0, 'item', 'contextualTitle')),
+ 'series': traverse_obj(data, ('entries', 0, 'item', 'title')),
+ 'entries': entries
+ }
diff --git a/yt_dlp/extractor/dtube.py b/yt_dlp/extractor/dtube.py
new file mode 100644
index 0000000..bb06c42
--- /dev/null
+++ b/yt_dlp/extractor/dtube.py
@@ -0,0 +1,80 @@
+import json
+from socket import timeout
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+)
+
+
+class DTubeIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:www\.)?d\.tube/(?:#!/)?v/(?P<uploader_id>[0-9a-z.-]+)/(?P<id>[0-9a-z]{8})'
+ _TEST = {
+ 'url': 'https://d.tube/#!/v/broncnutz/x380jtr1',
+ 'md5': '9f29088fa08d699a7565ee983f56a06e',
+ 'info_dict': {
+ 'id': 'x380jtr1',
+ 'ext': 'mp4',
+ 'title': 'Lefty 3-Rings is Back Baby!! NCAA Picks',
+ 'description': 'md5:60be222088183be3a42f196f34235776',
+ 'uploader_id': 'broncnutz',
+ 'upload_date': '20190107',
+ 'timestamp': 1546854054,
+ },
+ 'params': {
+ 'format': '480p',
+ },
+ }
+
+ def _real_extract(self, url):
+ uploader_id, video_id = self._match_valid_url(url).groups()
+ result = self._download_json('https://api.steemit.com/', video_id, data=json.dumps({
+ 'jsonrpc': '2.0',
+ 'method': 'get_content',
+ 'params': [uploader_id, video_id],
+ }).encode())['result']
+
+ metadata = json.loads(result['json_metadata'])
+ video = metadata['video']
+ content = video['content']
+ info = video.get('info', {})
+ title = info.get('title') or result['title']
+
+ def canonical_url(h):
+ if not h:
+ return None
+ return 'https://video.dtube.top/ipfs/' + h
+
+ formats = []
+ for q in ('240', '480', '720', '1080', ''):
+ video_url = canonical_url(content.get('video%shash' % q))
+ if not video_url:
+ continue
+ format_id = (q + 'p') if q else 'Source'
+ try:
+ self.to_screen('%s: Checking %s video format URL' % (video_id, format_id))
+ self._downloader._opener.open(video_url, timeout=5).close()
+ except timeout:
+ self.to_screen(
+ '%s: %s URL is invalid, skipping' % (video_id, format_id))
+ continue
+ formats.append({
+ 'format_id': format_id,
+ 'url': video_url,
+ 'height': int_or_none(q),
+ 'ext': 'mp4',
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': content.get('description'),
+ 'thumbnail': canonical_url(info.get('snaphash')),
+ 'tags': content.get('tags') or metadata.get('tags'),
+ 'duration': info.get('duration'),
+ 'formats': formats,
+ 'timestamp': parse_iso8601(result.get('created')),
+ 'uploader_id': uploader_id,
+ }
diff --git a/yt_dlp/extractor/duboku.py b/yt_dlp/extractor/duboku.py
new file mode 100644
index 0000000..626e577
--- /dev/null
+++ b/yt_dlp/extractor/duboku.py
@@ -0,0 +1,247 @@
+import base64
+import re
+import urllib.parse
+
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+from ..utils import (
+ clean_html,
+ extract_attributes,
+ ExtractorError,
+ get_elements_by_class,
+ int_or_none,
+ js_to_json,
+ smuggle_url,
+ unescapeHTML,
+)
+
+
+def _get_elements_by_tag_and_attrib(html, tag=None, attribute=None, value=None, escape_value=True):
+ """Return the content of the tag with the specified attribute in the passed HTML document"""
+
+ if tag is None:
+ tag = '[a-zA-Z0-9:._-]+'
+ if attribute is None:
+ attribute = ''
+ else:
+ attribute = r'\s+(?P<attribute>%s)' % re.escape(attribute)
+ if value is None:
+ value = ''
+ else:
+ value = re.escape(value) if escape_value else value
+ value = '=[\'"]?(?P<value>%s)[\'"]?' % value
+
+ retlist = []
+ for m in re.finditer(r'''(?xs)
+ <(?P<tag>%s)
+ (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
+ %s%s
+ (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
+ \s*>
+ (?P<content>.*?)
+ </\1>
+ ''' % (tag, attribute, value), html):
+ retlist.append(m)
+
+ return retlist
+
+
+def _get_element_by_tag_and_attrib(html, tag=None, attribute=None, value=None, escape_value=True):
+ retval = _get_elements_by_tag_and_attrib(html, tag, attribute, value, escape_value)
+ return retval[0] if retval else None
+
+
+class DubokuIE(InfoExtractor):
+ IE_NAME = 'duboku'
+ IE_DESC = 'www.duboku.io'
+
+ _VALID_URL = r'(?:https?://[^/]+\.duboku\.io/vodplay/)(?P<id>[0-9]+-[0-9-]+)\.html.*'
+ _TESTS = [{
+ 'url': 'https://w.duboku.io/vodplay/1575-1-1.html',
+ 'info_dict': {
+ 'id': '1575-1-1',
+ 'ext': 'mp4',
+ 'series': '白色月光',
+ 'title': 'contains:白色月光',
+ 'season_number': 1,
+ 'episode_number': 1,
+ 'season': 'Season 1',
+ 'episode_id': '1',
+ 'season_id': '1',
+ 'episode': 'Episode 1',
+ },
+ 'params': {
+ 'skip_download': 'm3u8 download',
+ },
+ }, {
+ 'url': 'https://w.duboku.io/vodplay/1588-1-1.html',
+ 'info_dict': {
+ 'id': '1588-1-1',
+ 'ext': 'mp4',
+ 'series': '亲爱的自己',
+ 'title': 'contains:第1集',
+ 'season_number': 1,
+ 'episode_number': 1,
+ 'episode': 'Episode 1',
+ 'season': 'Season 1',
+ 'episode_id': '1',
+ 'season_id': '1',
+ },
+ 'params': {
+ 'skip_download': 'm3u8 download',
+ },
+ }]
+
+ _PLAYER_DATA_PATTERN = r'player_data\s*=\s*(\{\s*(.*)})\s*;?\s*</script'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ temp = video_id.split('-')
+ series_id = temp[0]
+ season_id = temp[1]
+ episode_id = temp[2]
+
+ webpage_url = 'https://w.duboku.io/vodplay/%s.html' % video_id
+ webpage_html = self._download_webpage(webpage_url, video_id)
+
+ # extract video url
+
+ player_data = self._search_regex(
+ self._PLAYER_DATA_PATTERN, webpage_html, 'player_data')
+ player_data = self._parse_json(player_data, video_id, js_to_json)
+
+ # extract title
+
+ temp = get_elements_by_class('title', webpage_html)
+ series_title = None
+ title = None
+ for html in temp:
+ mobj = re.search(r'<a\s+.*>(.*)</a>', html)
+ if mobj:
+ href = extract_attributes(mobj.group(0)).get('href')
+ if href:
+ mobj1 = re.search(r'/(\d+)\.html', href)
+ if mobj1 and mobj1.group(1) == series_id:
+ series_title = clean_html(mobj.group(0))
+ series_title = re.sub(r'[\s\r\n\t]+', ' ', series_title)
+ title = clean_html(html)
+ title = re.sub(r'[\s\r\n\t]+', ' ', title)
+ break
+
+ data_url = player_data.get('url')
+ if not data_url:
+ raise ExtractorError('Cannot find url in player_data')
+ player_encrypt = player_data.get('encrypt')
+ if player_encrypt == 1:
+ data_url = urllib.parse.unquote(data_url)
+ elif player_encrypt == 2:
+ data_url = urllib.parse.unquote(base64.b64decode(data_url).decode('ascii'))
+
+ # if it is an embedded iframe, maybe it's an external source
+ headers = {'Referer': webpage_url}
+ if player_data.get('from') == 'iframe':
+ # use _type url_transparent to retain the meaningful details
+ # of the video.
+ return {
+ '_type': 'url_transparent',
+ 'url': smuggle_url(data_url, {'referer': webpage_url}),
+ 'id': video_id,
+ 'title': title,
+ 'series': series_title,
+ 'season_number': int_or_none(season_id),
+ 'season_id': season_id,
+ 'episode_number': int_or_none(episode_id),
+ 'episode_id': episode_id,
+ }
+
+ formats = self._extract_m3u8_formats(data_url, video_id, 'mp4', headers=headers)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'series': series_title,
+ 'season_number': int_or_none(season_id),
+ 'season_id': season_id,
+ 'episode_number': int_or_none(episode_id),
+ 'episode_id': episode_id,
+ 'formats': formats,
+ 'http_headers': headers
+ }
+
+
+class DubokuPlaylistIE(InfoExtractor):
+ IE_NAME = 'duboku:list'
+ IE_DESC = 'www.duboku.io entire series'
+
+ _VALID_URL = r'(?:https?://[^/]+\.duboku\.io/voddetail/)(?P<id>[0-9]+)\.html.*'
+ _TESTS = [{
+ 'url': 'https://w.duboku.io/voddetail/1575.html',
+ 'info_dict': {
+ 'id': 'startswith:1575',
+ 'title': '白色月光',
+ },
+ 'playlist_count': 12,
+ }, {
+ 'url': 'https://w.duboku.io/voddetail/1554.html',
+ 'info_dict': {
+ 'id': 'startswith:1554',
+ 'title': '以家人之名',
+ },
+ 'playlist_mincount': 30,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ if mobj is None:
+ raise ExtractorError('Invalid URL: %s' % url)
+ series_id = mobj.group('id')
+ fragment = compat_urlparse.urlparse(url).fragment
+
+ webpage_url = 'https://w.duboku.io/voddetail/%s.html' % series_id
+ webpage_html = self._download_webpage(webpage_url, series_id)
+
+ # extract title
+
+ title = _get_element_by_tag_and_attrib(webpage_html, 'h1', 'class', 'title')
+ title = unescapeHTML(title.group('content')) if title else None
+ if not title:
+ title = self._html_search_meta('keywords', webpage_html)
+ if not title:
+ title = _get_element_by_tag_and_attrib(webpage_html, 'title')
+ title = unescapeHTML(title.group('content')) if title else None
+
+ # extract playlists
+
+ playlists = {}
+ for div in _get_elements_by_tag_and_attrib(
+ webpage_html, attribute='id', value='playlist\\d+', escape_value=False):
+ playlist_id = div.group('value')
+ playlist = []
+ for a in _get_elements_by_tag_and_attrib(
+ div.group('content'), 'a', 'href', value='[^\'"]+?', escape_value=False):
+ playlist.append({
+ 'href': unescapeHTML(a.group('value')),
+ 'title': unescapeHTML(a.group('content'))
+ })
+ playlists[playlist_id] = playlist
+
+ # select the specified playlist if url fragment exists
+ playlist = None
+ playlist_id = None
+ if fragment:
+ playlist = playlists.get(fragment)
+ playlist_id = fragment
+ else:
+ first = next(iter(playlists.items()), None)
+ if first:
+ (playlist_id, playlist) = first
+ if not playlist:
+ raise ExtractorError(
+ 'Cannot find %s' % fragment if fragment else 'Cannot extract playlist')
+
+ # return url results
+ return self.playlist_result([
+ self.url_result(
+ compat_urlparse.urljoin('https://w.duboku.io', x['href']),
+ ie=DubokuIE.ie_key(), video_title=x.get('title'))
+ for x in playlist], series_id + '#' + playlist_id, title)
diff --git a/yt_dlp/extractor/dumpert.py b/yt_dlp/extractor/dumpert.py
new file mode 100644
index 0000000..5e7aef0
--- /dev/null
+++ b/yt_dlp/extractor/dumpert.py
@@ -0,0 +1,114 @@
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ qualities,
+)
+
+
+class DumpertIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ (?P<protocol>https?)://(?:(?:www|legacy)\.)?dumpert\.nl/(?:
+ (?:mediabase|embed|item)/|
+ [^#]*[?&]selectedId=
+ )(?P<id>[0-9]+[/_][0-9a-zA-Z]+)'''
+ _TESTS = [{
+ 'url': 'https://www.dumpert.nl/item/6646981_951bc60f',
+ 'md5': '1b9318d7d5054e7dcb9dc7654f21d643',
+ 'info_dict': {
+ 'id': '6646981/951bc60f',
+ 'ext': 'mp4',
+ 'title': 'Ik heb nieuws voor je',
+ 'description': 'Niet schrikken hoor',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 9,
+ 'view_count': int,
+ 'like_count': int,
+ }
+ }, {
+ 'url': 'https://www.dumpert.nl/embed/6675421_dc440fe7',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://legacy.dumpert.nl/mediabase/6646981/951bc60f',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://legacy.dumpert.nl/embed/6675421/dc440fe7',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.dumpert.nl/item/100031688_b317a185',
+ 'info_dict': {
+ 'id': '100031688/b317a185',
+ 'ext': 'mp4',
+ 'title': 'Epic schijnbeweging',
+ 'description': '<p>Die zag je niet eh</p>',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
+ 'duration': 12,
+ 'view_count': int,
+ 'like_count': int,
+ },
+ 'params': {'skip_download': 'm3u8'}
+ }, {
+ 'url': 'https://www.dumpert.nl/toppers?selectedId=100031688_b317a185',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.dumpert.nl/latest?selectedId=100031688_b317a185',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.dumpert.nl/?selectedId=100031688_b317a185',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.dumpert.nl/toppers/dag?selectedId=100086074_f5cef3ac',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url).replace('_', '/')
+ item = self._download_json(
+ 'http://api-live.dumpert.nl/mobile_api/json/info/' + video_id.replace('/', '_'),
+ video_id)['items'][0]
+ title = item['title']
+ media = next(m for m in item['media'] if m.get('mediatype') == 'VIDEO')
+
+ quality = qualities(['flv', 'mobile', 'tablet', '720p', '1080p'])
+ formats = []
+ for variant in media.get('variants', []):
+ uri = variant.get('uri')
+ if not uri:
+ continue
+ version = variant.get('version')
+ preference = quality(version)
+ if determine_ext(uri) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ uri, video_id, 'mp4', m3u8_id=version, quality=preference))
+ else:
+ formats.append({
+ 'url': uri,
+ 'format_id': version,
+ 'quality': preference,
+ })
+
+ thumbnails = []
+ stills = item.get('stills') or {}
+ for t in ('thumb', 'still'):
+ for s in ('', '-medium', '-large'):
+ still_id = t + s
+ still_url = stills.get(still_id)
+ if not still_url:
+ continue
+ thumbnails.append({
+ 'id': still_id,
+ 'url': still_url,
+ })
+
+ stats = item.get('stats') or {}
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': item.get('description'),
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ 'duration': int_or_none(media.get('duration')),
+ 'like_count': int_or_none(stats.get('kudos_total')),
+ 'view_count': int_or_none(stats.get('views_total')),
+ }
diff --git a/yt_dlp/extractor/duoplay.py b/yt_dlp/extractor/duoplay.py
new file mode 100644
index 0000000..18642fe
--- /dev/null
+++ b/yt_dlp/extractor/duoplay.py
@@ -0,0 +1,104 @@
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ extract_attributes,
+ get_element_text_and_html_by_tag,
+ int_or_none,
+ join_nonempty,
+ str_or_none,
+ try_call,
+ unified_timestamp,
+)
+from ..utils.traversal import traverse_obj
+
+
+class DuoplayIE(InfoExtractor):
+ _VALID_URL = r'https?://duoplay\.ee/(?P<id>\d+)/[\w-]+/?(?:\?(?:[^#]+&)?ep=(?P<ep>\d+))?'
+ _TESTS = [{
+ 'note': 'Siberi võmm S02E12',
+ 'url': 'https://duoplay.ee/4312/siberi-vomm?ep=24',
+ 'md5': '1ff59d535310ac9c5cf5f287d8f91b2d',
+ 'info_dict': {
+ 'id': '4312_24',
+ 'ext': 'mp4',
+ 'title': 'Operatsioon "Öö"',
+ 'thumbnail': r're:https://.+\.jpg(?:\?c=\d+)?$',
+ 'description': 'md5:8ef98f38569d6b8b78f3d350ccc6ade8',
+ 'upload_date': '20170523',
+ 'timestamp': 1495567800,
+ 'series': 'Siberi võmm',
+ 'series_id': '4312',
+ 'season': 'Season 2',
+ 'season_number': 2,
+ 'episode': 'Operatsioon "Öö"',
+ 'episode_number': 12,
+ 'episode_id': '24',
+ },
+ }, {
+ 'note': 'Empty title',
+ 'url': 'https://duoplay.ee/17/uhikarotid?ep=14',
+ 'md5': '6aca68be71112314738dd17cced7f8bf',
+ 'info_dict': {
+ 'id': '17_14',
+ 'ext': 'mp4',
+ 'title': 'Ühikarotid',
+ 'thumbnail': r're:https://.+\.jpg(?:\?c=\d+)?$',
+ 'description': 'md5:4719b418e058c209def41d48b601276e',
+ 'upload_date': '20100916',
+ 'timestamp': 1284661800,
+ 'series': 'Ühikarotid',
+ 'series_id': '17',
+ 'season': 'Season 2',
+ 'season_number': 2,
+ 'episode_id': '14',
+ 'release_year': 2010,
+ },
+ }, {
+ 'note': 'Movie without expiry',
+ 'url': 'https://duoplay.ee/5501/pilvede-all.-neljas-ode',
+ 'md5': '7abf63d773a49ef7c39f2c127842b8fd',
+ 'info_dict': {
+ 'id': '5501',
+ 'ext': 'mp4',
+ 'title': 'Pilvede all. Neljas õde',
+ 'thumbnail': r're:https://.+\.jpg(?:\?c=\d+)?$',
+ 'description': 'md5:d86a70f8f31e82c369d4d4f4c79b1279',
+ 'cast': 'count:9',
+ 'upload_date': '20221214',
+ 'timestamp': 1671054000,
+ 'release_year': 2018,
+ },
+ }]
+
+ def _real_extract(self, url):
+ telecast_id, episode = self._match_valid_url(url).group('id', 'ep')
+ video_id = join_nonempty(telecast_id, episode, delim='_')
+ webpage = self._download_webpage(url, video_id)
+ video_player = try_call(lambda: extract_attributes(
+ get_element_text_and_html_by_tag('video-player', webpage)[1]))
+ if not video_player or not video_player.get('manifest-url'):
+ raise ExtractorError('No video found', expected=True)
+
+ episode_attr = self._parse_json(video_player.get(':episode') or '', video_id, fatal=False) or {}
+
+ return {
+ 'id': video_id,
+ 'formats': self._extract_m3u8_formats(video_player['manifest-url'], video_id, 'mp4'),
+ **traverse_obj(episode_attr, {
+ 'title': 'title',
+ 'description': 'synopsis',
+ 'thumbnail': ('images', 'original'),
+ 'timestamp': ('airtime', {lambda x: unified_timestamp(x + ' +0200')}),
+ 'cast': ('cast', {lambda x: x.split(', ')}),
+ 'release_year': ('year', {int_or_none}),
+ }),
+ **(traverse_obj(episode_attr, {
+ 'title': (None, ('subtitle', ('episode_nr', {lambda x: f'Episode {x}' if x else None}))),
+ 'series': 'title',
+ 'series_id': ('telecast_id', {str_or_none}),
+ 'season_number': ('season_id', {int_or_none}),
+ 'episode': 'subtitle',
+ 'episode_number': ('episode_nr', {int_or_none}),
+ 'episode_id': ('episode_id', {str_or_none}),
+ }, get_all=False) if episode_attr.get('category') != 'movies' else {}),
+ }
diff --git a/yt_dlp/extractor/dvtv.py b/yt_dlp/extractor/dvtv.py
new file mode 100644
index 0000000..e671433
--- /dev/null
+++ b/yt_dlp/extractor/dvtv.py
@@ -0,0 +1,177 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ join_nonempty,
+ js_to_json,
+ mimetype2ext,
+ try_get,
+ unescapeHTML,
+ parse_iso8601,
+)
+
+
+class DVTVIE(InfoExtractor):
+ IE_NAME = 'dvtv'
+ IE_DESC = 'http://video.aktualne.cz/'
+ _VALID_URL = r'https?://video\.aktualne\.cz/(?:[^/]+/)+r~(?P<id>[0-9a-f]{32})'
+ _TESTS = [{
+ 'url': 'http://video.aktualne.cz/dvtv/vondra-o-ceskem-stoleti-pri-pohledu-na-havla-mi-bylo-trapne/r~e5efe9ca855511e4833a0025900fea04/',
+ 'md5': '67cb83e4a955d36e1b5d31993134a0c2',
+ 'info_dict': {
+ 'id': 'dc0768de855511e49e4b0025900fea04',
+ 'ext': 'mp4',
+ 'title': 'Vondra o Českém století: Při pohledu na Havla mi bylo trapně',
+ 'duration': 1484,
+ 'upload_date': '20141217',
+ 'timestamp': 1418792400,
+ }
+ }, {
+ 'url': 'http://video.aktualne.cz/dvtv/dvtv-16-12-2014-utok-talibanu-boj-o-kliniku-uprchlici/r~973eb3bc854e11e498be002590604f2e/',
+ 'info_dict': {
+ 'title': r'DVTV 16. 12. 2014: útok Talibanu, boj o kliniku, uprchlíci',
+ 'id': '973eb3bc854e11e498be002590604f2e',
+ },
+ 'playlist': [{
+ 'md5': 'da7ca6be4935532241fa9520b3ad91e4',
+ 'info_dict': {
+ 'id': 'b0b40906854d11e4bdad0025900fea04',
+ 'ext': 'mp4',
+ 'title': 'Drtinová Veselovský TV 16. 12. 2014: Témata dne',
+ 'description': 'md5:0916925dea8e30fe84222582280b47a0',
+ 'timestamp': 1418760010,
+ 'upload_date': '20141216',
+ }
+ }, {
+ 'md5': '5f7652a08b05009c1292317b449ffea2',
+ 'info_dict': {
+ 'id': '420ad9ec854a11e4bdad0025900fea04',
+ 'ext': 'mp4',
+ 'title': 'Školní masakr možná změní boj s Talibanem, říká novinářka',
+ 'description': 'md5:ff2f9f6de73c73d7cef4f756c1c1af42',
+ 'timestamp': 1418760010,
+ 'upload_date': '20141216',
+ }
+ }, {
+ 'md5': '498eb9dfa97169f409126c617e2a3d64',
+ 'info_dict': {
+ 'id': '95d35580846a11e4b6d20025900fea04',
+ 'ext': 'mp4',
+ 'title': 'Boj o kliniku: Veřejný zájem, nebo právo na majetek?',
+ 'description': 'md5:889fe610a70fee5511dc3326a089188e',
+ 'timestamp': 1418760010,
+ 'upload_date': '20141216',
+ }
+ }, {
+ 'md5': 'b8dc6b744844032dab6ba3781a7274b9',
+ 'info_dict': {
+ 'id': '6fe14d66853511e4833a0025900fea04',
+ 'ext': 'mp4',
+ 'title': 'Pánek: Odmítání syrských uprchlíků je ostudou české vlády',
+ 'description': 'md5:544f86de6d20c4815bea11bf2ac3004f',
+ 'timestamp': 1418760010,
+ 'upload_date': '20141216',
+ }
+ }],
+ }, {
+ 'url': 'https://video.aktualne.cz/dvtv/zeman-si-jen-leci-mindraky-sobotku-nenavidi-a-babis-se-mu-te/r~960cdb3a365a11e7a83b0025900fea04/',
+ 'md5': 'f8efe9656017da948369aa099788c8ea',
+ 'info_dict': {
+ 'id': '3c496fec365911e7a6500025900fea04',
+ 'ext': 'mp4',
+ 'title': 'Zeman si jen léčí mindráky, Sobotku nenávidí a Babiš se mu teď hodí, tvrdí Kmenta',
+ 'duration': 1103,
+ 'upload_date': '20170511',
+ 'timestamp': 1494514200,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://video.aktualne.cz/v-cechach-poprve-zazni-zelenkova-zrestaurovana-mse/r~45b4b00483ec11e4883b002590604f2e/',
+ 'only_matching': True,
+ }, {
+ # Test live stream video (liveStarter) parsing
+ 'url': 'https://video.aktualne.cz/dvtv/zive-mistryne-sveta-eva-samkova-po-navratu-ze-sampionatu/r~182654c2288811e990fd0cc47ab5f122/',
+ 'md5': '2e552e483f2414851ca50467054f9d5d',
+ 'info_dict': {
+ 'id': '8d116360288011e98c840cc47ab5f122',
+ 'ext': 'mp4',
+ 'title': 'Živě: Mistryně světa Eva Samková po návratu ze šampionátu',
+ 'upload_date': '20190204',
+ 'timestamp': 1549289591,
+ },
+ 'params': {
+ # Video content is no longer available
+ 'skip_download': True,
+ },
+ }]
+
+ def _parse_video_metadata(self, js, video_id, timestamp):
+ data = self._parse_json(js, video_id, transform_source=js_to_json)
+ title = unescapeHTML(data['title'])
+
+ live_starter = try_get(data, lambda x: x['plugins']['liveStarter'], dict)
+ if live_starter:
+ data.update(live_starter)
+
+ formats = []
+ for tracks in data.get('tracks', {}).values():
+ for video in tracks:
+ video_url = video.get('src')
+ if not video_url:
+ continue
+ video_type = video.get('type')
+ ext = determine_ext(video_url, mimetype2ext(video_type))
+ if video_type == 'application/vnd.apple.mpegurl' or ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif video_type == 'application/dash+xml' or ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ video_url, video_id, mpd_id='dash', fatal=False))
+ else:
+ label = video.get('label')
+ height = self._search_regex(
+ r'^(\d+)[pP]', label or '', 'height', default=None)
+ formats.append({
+ 'url': video_url,
+ 'format_id': join_nonempty('http', ext, label),
+ 'height': int_or_none(height),
+ })
+
+ return {
+ 'id': data.get('mediaid') or video_id,
+ 'title': title,
+ 'description': data.get('description'),
+ 'thumbnail': data.get('image'),
+ 'duration': int_or_none(data.get('duration')),
+ 'timestamp': int_or_none(timestamp),
+ 'formats': formats
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ timestamp = parse_iso8601(self._html_search_meta(
+ 'article:published_time', webpage, 'published time', default=None))
+
+ items = re.findall(r'(?s)playlist\.push\(({.+?})\);', webpage)
+ if items:
+ return self.playlist_result(
+ [self._parse_video_metadata(i, video_id, timestamp) for i in items],
+ video_id, self._html_search_meta('twitter:title', webpage))
+
+ item = self._search_regex(
+ r'(?s)BBXPlayer\.setup\((.+?)\);',
+ webpage, 'video', default=None)
+ if item:
+ # remove function calls (ex. htmldeentitize)
+ # TODO this should be fixed in a general way in the js_to_json
+ item = re.sub(r'\w+?\((.+)\)', r'\1', item)
+ return self._parse_video_metadata(item, video_id, timestamp)
+
+ raise ExtractorError('Could not find neither video nor playlist')
diff --git a/yt_dlp/extractor/dw.py b/yt_dlp/extractor/dw.py
new file mode 100644
index 0000000..f7b8520
--- /dev/null
+++ b/yt_dlp/extractor/dw.py
@@ -0,0 +1,110 @@
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ unified_strdate,
+ url_or_none,
+)
+from ..compat import compat_urlparse
+
+
+class DWIE(InfoExtractor):
+ _WORKING = False
+ _ENABLED = None # XXX: pass through to GenericIE
+ IE_NAME = 'dw'
+ _VALID_URL = r'https?://(?:www\.)?dw\.com/(?:[^/]+/)+(?:av|e)-(?P<id>\d+)'
+ _TESTS = [{
+ # video
+ 'url': 'http://www.dw.com/en/intelligent-light/av-19112290',
+ 'md5': 'fb9dfd9520811d3ece80f04befd73428',
+ 'info_dict': {
+ 'id': '19112290',
+ 'ext': 'mp4',
+ 'title': 'Intelligent light',
+ 'description': 'md5:90e00d5881719f2a6a5827cb74985af1',
+ 'upload_date': '20160605',
+ }
+ }, {
+ # audio
+ 'url': 'http://www.dw.com/en/worldlink-my-business/av-19111941',
+ 'md5': '2814c9a1321c3a51f8a7aeb067a360dd',
+ 'info_dict': {
+ 'id': '19111941',
+ 'ext': 'mp3',
+ 'title': 'WorldLink: My business',
+ 'description': 'md5:bc9ca6e4e063361e21c920c53af12405',
+ 'upload_date': '20160311',
+ }
+ }, {
+ # DW documentaries, only last for one or two weeks
+ 'url': 'http://www.dw.com/en/documentaries-welcome-to-the-90s-2016-05-21/e-19220158-9798',
+ 'md5': '56b6214ef463bfb9a3b71aeb886f3cf1',
+ 'info_dict': {
+ 'id': '19274438',
+ 'ext': 'mp4',
+ 'title': 'Welcome to the 90s – Hip Hop',
+ 'description': 'Welcome to the 90s - The Golden Decade of Hip Hop',
+ 'upload_date': '20160521',
+ },
+ 'skip': 'Video removed',
+ }]
+
+ def _real_extract(self, url):
+ media_id = self._match_id(url)
+ webpage = self._download_webpage(url, media_id)
+ hidden_inputs = self._hidden_inputs(webpage)
+ title = hidden_inputs['media_title']
+ media_id = hidden_inputs.get('media_id') or media_id
+
+ direct_url = url_or_none(hidden_inputs.get('file_name'))
+ if direct_url:
+ formats = [{'url': hidden_inputs['file_name']}]
+ else:
+ formats = self._extract_smil_formats(
+ 'http://www.dw.com/smil/v-%s' % media_id, media_id,
+ transform_source=lambda s: s.replace(
+ 'rtmp://tv-od.dw.de/flash/',
+ 'http://tv-download.dw.de/dwtv_video/flv/'))
+
+ upload_date = hidden_inputs.get('display_date')
+ if not upload_date:
+ upload_date = self._html_search_regex(
+ r'<span[^>]+class="date">([0-9.]+)\s*\|', webpage,
+ 'upload date', default=None)
+ upload_date = unified_strdate(upload_date)
+
+ return {
+ 'id': media_id,
+ 'title': title,
+ 'description': self._og_search_description(webpage),
+ 'thumbnail': hidden_inputs.get('preview_image'),
+ 'duration': int_or_none(hidden_inputs.get('file_duration')),
+ 'upload_date': upload_date,
+ 'formats': formats,
+ }
+
+
+class DWArticleIE(InfoExtractor):
+ _WORKING = False
+ _ENABLED = None # XXX: pass through to GenericIE
+ IE_NAME = 'dw:article'
+ _VALID_URL = r'https?://(?:www\.)?dw\.com/(?:[^/]+/)+a-(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://www.dw.com/en/no-hope-limited-options-for-refugees-in-idomeni/a-19111009',
+ 'md5': '8ca657f9d068bbef74d6fc38b97fc869',
+ 'info_dict': {
+ 'id': '19105868',
+ 'ext': 'mp4',
+ 'title': 'The harsh life of refugees in Idomeni',
+ 'description': 'md5:196015cc7e48ebf474db9399420043c7',
+ 'upload_date': '20160310',
+ }
+ }
+
+ def _real_extract(self, url):
+ article_id = self._match_id(url)
+ webpage = self._download_webpage(url, article_id)
+ hidden_inputs = self._hidden_inputs(webpage)
+ media_id = hidden_inputs['media_id']
+ media_path = self._search_regex(r'href="([^"]+av-%s)"\s+class="overlayLink"' % media_id, webpage, 'media url')
+ media_url = compat_urlparse.urljoin(url, media_path)
+ return self.url_result(media_url, 'DW', media_id)
diff --git a/yt_dlp/extractor/eagleplatform.py b/yt_dlp/extractor/eagleplatform.py
new file mode 100644
index 0000000..739d179
--- /dev/null
+++ b/yt_dlp/extractor/eagleplatform.py
@@ -0,0 +1,215 @@
+import functools
+import re
+
+from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ smuggle_url,
+ unsmuggle_url,
+ url_or_none,
+)
+
+
+class EaglePlatformIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ (?:
+ eagleplatform:(?P<custom_host>[^/]+):|
+ https?://(?P<host>.+?\.media\.eagleplatform\.com)/index/player\?.*\brecord_id=
+ )
+ (?P<id>\d+)
+ '''
+ _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//.+?\.media\.eagleplatform\.com/index/player\?.+?)\1']
+ _TESTS = [{
+ # http://lenta.ru/news/2015/03/06/navalny/
+ 'url': 'http://lentaru.media.eagleplatform.com/index/player?player=new&record_id=227304&player_template_id=5201',
+ # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
+ 'info_dict': {
+ 'id': '227304',
+ 'ext': 'mp4',
+ 'title': 'Навальный вышел на свободу',
+ 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 87,
+ 'view_count': int,
+ 'age_limit': 0,
+ },
+ }, {
+ # http://muz-tv.ru/play/7129/
+ # http://media.clipyou.ru/index/player?record_id=12820&width=730&height=415&autoplay=true
+ 'url': 'eagleplatform:media.clipyou.ru:12820',
+ 'md5': '358597369cf8ba56675c1df15e7af624',
+ 'info_dict': {
+ 'id': '12820',
+ 'ext': 'mp4',
+ 'title': "'O Sole Mio",
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 216,
+ 'view_count': int,
+ },
+ 'skip': 'Georestricted',
+ }, {
+ # referrer protected video (https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/)
+ 'url': 'eagleplatform:tvrainru.media.eagleplatform.com:582306',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ add_referer = functools.partial(smuggle_url, data={'referrer': url})
+
+ res = tuple(super()._extract_embed_urls(url, webpage))
+ if res:
+ return map(add_referer, res)
+
+ PLAYER_JS_RE = r'''
+ <script[^>]+
+ src=(?P<qjs>["\'])(?:https?:)?//(?P<host>(?:(?!(?P=qjs)).)+\.media\.eagleplatform\.com)/player/player\.js(?P=qjs)
+ .+?
+ '''
+ # "Basic usage" embedding (see http://dultonmedia.github.io/eplayer/)
+ mobj = re.search(
+ r'''(?xs)
+ %s
+ <div[^>]+
+ class=(?P<qclass>["\'])eagleplayer(?P=qclass)[^>]+
+ data-id=["\'](?P<id>\d+)
+ ''' % PLAYER_JS_RE, webpage)
+ if mobj is not None:
+ return [add_referer('eagleplatform:%(host)s:%(id)s' % mobj.groupdict())]
+ # Generalization of "Javascript code usage", "Combined usage" and
+ # "Usage without attaching to DOM" embeddings (see
+ # http://dultonmedia.github.io/eplayer/)
+ mobj = re.search(
+ r'''(?xs)
+ %s
+ <script>
+ .+?
+ new\s+EaglePlayer\(
+ (?:[^,]+\s*,\s*)?
+ {
+ .+?
+ \bid\s*:\s*["\']?(?P<id>\d+)
+ .+?
+ }
+ \s*\)
+ .+?
+ </script>
+ ''' % PLAYER_JS_RE, webpage)
+ if mobj is not None:
+ return [add_referer('eagleplatform:%(host)s:%(id)s' % mobj.groupdict())]
+
+ @staticmethod
+ def _handle_error(response):
+ status = int_or_none(response.get('status', 200))
+ if status != 200:
+ raise ExtractorError(' '.join(response['errors']), expected=True)
+
+ def _download_json(self, url_or_request, video_id, *args, **kwargs):
+ try:
+ response = super(EaglePlatformIE, self)._download_json(
+ url_or_request, video_id, *args, **kwargs)
+ except ExtractorError as ee:
+ if isinstance(ee.cause, HTTPError):
+ response = self._parse_json(ee.cause.response.read().decode('utf-8'), video_id)
+ self._handle_error(response)
+ raise
+ return response
+
+ def _get_video_url(self, url_or_request, video_id, note='Downloading JSON metadata'):
+ return self._download_json(url_or_request, video_id, note)['data'][0]
+
+ def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+
+ mobj = self._match_valid_url(url)
+ host, video_id = mobj.group('custom_host') or mobj.group('host'), mobj.group('id')
+
+ headers = {}
+ query = {
+ 'id': video_id,
+ }
+
+ referrer = smuggled_data.get('referrer')
+ if referrer:
+ headers['Referer'] = referrer
+ query['referrer'] = referrer
+
+ player_data = self._download_json(
+ 'http://%s/api/player_data' % host, video_id,
+ headers=headers, query=query)
+
+ media = player_data['data']['playlist']['viewports'][0]['medialist'][0]
+
+ title = media['title']
+ description = media.get('description')
+ thumbnail = self._proto_relative_url(media.get('snapshot'), 'http:')
+ duration = int_or_none(media.get('duration'))
+ view_count = int_or_none(media.get('views'))
+
+ age_restriction = media.get('age_restriction')
+ age_limit = None
+ if age_restriction:
+ age_limit = 0 if age_restriction == 'allow_all' else 18
+
+ secure_m3u8 = self._proto_relative_url(media['sources']['secure_m3u8']['auto'], 'http:')
+
+ formats = []
+
+ m3u8_url = self._get_video_url(secure_m3u8, video_id, 'Downloading m3u8 JSON')
+ m3u8_formats = self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False)
+ formats.extend(m3u8_formats)
+
+ m3u8_formats_dict = {}
+ for f in m3u8_formats:
+ if f.get('height') is not None:
+ m3u8_formats_dict[f['height']] = f
+
+ mp4_data = self._download_json(
+ # Secure mp4 URL is constructed according to Player.prototype.mp4 from
+ # http://lentaru.media.eagleplatform.com/player/player.js
+ re.sub(r'm3u8|hlsvod|hls|f4m', 'mp4s', secure_m3u8),
+ video_id, 'Downloading mp4 JSON', fatal=False)
+ if mp4_data:
+ for format_id, format_url in mp4_data.get('data', {}).items():
+ if not url_or_none(format_url):
+ continue
+ height = int_or_none(format_id)
+ if height is not None and m3u8_formats_dict.get(height):
+ f = m3u8_formats_dict[height].copy()
+ f.update({
+ 'format_id': f['format_id'].replace('hls', 'http'),
+ 'protocol': 'http',
+ })
+ else:
+ f = {
+ 'format_id': 'http-%s' % format_id,
+ 'height': int_or_none(format_id),
+ }
+ f['url'] = format_url
+ formats.append(f)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'age_limit': age_limit,
+ 'formats': formats,
+ }
+
+
+class ClipYouEmbedIE(InfoExtractor):
+ _VALID_URL = False
+
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ mobj = re.search(
+ r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
+ if mobj is not None:
+ yield smuggle_url('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), {'referrer': url})
diff --git a/yt_dlp/extractor/ebaumsworld.py b/yt_dlp/extractor/ebaumsworld.py
new file mode 100644
index 0000000..0854d03
--- /dev/null
+++ b/yt_dlp/extractor/ebaumsworld.py
@@ -0,0 +1,31 @@
+from .common import InfoExtractor
+
+
+class EbaumsWorldIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?ebaumsworld\.com/videos/[^/]+/(?P<id>\d+)'
+
+ _TEST = {
+ 'url': 'http://www.ebaumsworld.com/videos/a-giant-python-opens-the-door/83367677/',
+ 'info_dict': {
+ 'id': '83367677',
+ 'ext': 'mp4',
+ 'title': 'A Giant Python Opens The Door',
+ 'description': 'This is how nightmares start...',
+ 'uploader': 'jihadpizza',
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ config = self._download_xml(
+ 'http://www.ebaumsworld.com/video/player/%s' % video_id, video_id)
+ video_url = config.find('file').text
+
+ return {
+ 'id': video_id,
+ 'title': config.find('title').text,
+ 'url': video_url,
+ 'description': config.find('description').text,
+ 'thumbnail': config.find('image').text,
+ 'uploader': config.find('username').text,
+ }
diff --git a/yt_dlp/extractor/ebay.py b/yt_dlp/extractor/ebay.py
new file mode 100644
index 0000000..d0eb9fc
--- /dev/null
+++ b/yt_dlp/extractor/ebay.py
@@ -0,0 +1,36 @@
+from .common import InfoExtractor
+from ..utils import remove_end
+
+
+class EbayIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?ebay\.com/itm/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.ebay.com/itm/194509326719',
+ 'info_dict': {
+ 'id': '194509326719',
+ 'ext': 'mp4',
+ 'title': 'WiFi internal antenna adhesive for wifi 2.4GHz wifi 5 wifi 6 wifi 6E full bands',
+ },
+ 'params': {'skip_download': 'm3u8'}
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ video_json = self._search_json(r'"video":', webpage, 'video json', video_id)
+
+ formats = []
+ for key, url in video_json['playlistMap'].items():
+ if key == 'HLS':
+ formats.extend(self._extract_m3u8_formats(url, video_id, fatal=False))
+ elif key == 'DASH':
+ formats.extend(self._extract_mpd_formats(url, video_id, fatal=False))
+ else:
+ self.report_warning(f'Unsupported format {key}', video_id)
+
+ return {
+ 'id': video_id,
+ 'title': remove_end(self._html_extract_title(webpage), ' | eBay'),
+ 'formats': formats
+ }
diff --git a/yt_dlp/extractor/egghead.py b/yt_dlp/extractor/egghead.py
new file mode 100644
index 0000000..c94f3f8
--- /dev/null
+++ b/yt_dlp/extractor/egghead.py
@@ -0,0 +1,134 @@
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ try_get,
+ unified_timestamp,
+ url_or_none,
+)
+
+
+class EggheadBaseIE(InfoExtractor):
+ def _call_api(self, path, video_id, resource, fatal=True):
+ return self._download_json(
+ 'https://app.egghead.io/api/v1/' + path,
+ video_id, 'Downloading %s JSON' % resource, fatal=fatal)
+
+
+class EggheadCourseIE(EggheadBaseIE):
+ IE_DESC = 'egghead.io course'
+ IE_NAME = 'egghead:course'
+ _VALID_URL = r'https?://(?:app\.)?egghead\.io/(?:course|playlist)s/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://egghead.io/courses/professor-frisby-introduces-composable-functional-javascript',
+ 'playlist_count': 29,
+ 'info_dict': {
+ 'id': '432655',
+ 'title': 'Professor Frisby Introduces Composable Functional JavaScript',
+ 'description': 're:(?s)^This course teaches the ubiquitous.*You\'ll start composing functionality before you know it.$',
+ },
+ }, {
+ 'url': 'https://app.egghead.io/playlists/professor-frisby-introduces-composable-functional-javascript',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ series_path = 'series/' + playlist_id
+ lessons = self._call_api(
+ series_path + '/lessons', playlist_id, 'course lessons')
+
+ entries = []
+ for lesson in lessons:
+ lesson_url = url_or_none(lesson.get('http_url'))
+ if not lesson_url:
+ continue
+ lesson_id = lesson.get('id')
+ if lesson_id:
+ lesson_id = compat_str(lesson_id)
+ entries.append(self.url_result(
+ lesson_url, ie=EggheadLessonIE.ie_key(), video_id=lesson_id))
+
+ course = self._call_api(
+ series_path, playlist_id, 'course', False) or {}
+
+ playlist_id = course.get('id')
+ if playlist_id:
+ playlist_id = compat_str(playlist_id)
+
+ return self.playlist_result(
+ entries, playlist_id, course.get('title'),
+ course.get('description'))
+
+
+class EggheadLessonIE(EggheadBaseIE):
+ IE_DESC = 'egghead.io lesson'
+ IE_NAME = 'egghead:lesson'
+ _VALID_URL = r'https?://(?:app\.)?egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',
+ 'info_dict': {
+ 'id': '1196',
+ 'display_id': 'javascript-linear-data-flow-with-container-style-types-box',
+ 'ext': 'mp4',
+ 'title': 'Create linear data flow with container style types (Box)',
+ 'description': 'md5:9aa2cdb6f9878ed4c39ec09e85a8150e',
+ 'thumbnail': r're:^https?:.*\.jpg$',
+ 'timestamp': 1481296768,
+ 'upload_date': '20161209',
+ 'duration': 304,
+ 'view_count': 0,
+ 'tags': 'count:2',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://egghead.io/api/v1/lessons/react-add-redux-to-a-react-application',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://app.egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ lesson = self._call_api(
+ 'lessons/' + display_id, display_id, 'lesson')
+
+ lesson_id = compat_str(lesson['id'])
+ title = lesson['title']
+
+ formats = []
+ for _, format_url in lesson['media_urls'].items():
+ format_url = url_or_none(format_url)
+ if not format_url:
+ continue
+ ext = determine_ext(format_url)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, lesson_id, 'mp4', m3u8_id='hls', fatal=False))
+ elif ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ format_url, lesson_id, mpd_id='dash', fatal=False))
+ else:
+ formats.append({
+ 'url': format_url,
+ })
+
+ return {
+ 'id': lesson_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': lesson.get('summary'),
+ 'thumbnail': lesson.get('thumb_nail'),
+ 'timestamp': unified_timestamp(lesson.get('published_at')),
+ 'duration': int_or_none(lesson.get('duration')),
+ 'view_count': int_or_none(lesson.get('plays_count')),
+ 'tags': try_get(lesson, lambda x: x['tag_list'], list),
+ 'series': try_get(
+ lesson, lambda x: x['series']['title'], compat_str),
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/eighttracks.py b/yt_dlp/extractor/eighttracks.py
new file mode 100644
index 0000000..3dd9ab1
--- /dev/null
+++ b/yt_dlp/extractor/eighttracks.py
@@ -0,0 +1,161 @@
+import json
+import random
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+)
+from ..utils import (
+ ExtractorError,
+)
+
+
+class EightTracksIE(InfoExtractor):
+ IE_NAME = '8tracks'
+ _VALID_URL = r'https?://8tracks\.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$'
+ _TEST = {
+ 'name': 'EightTracks',
+ 'url': 'http://8tracks.com/ytdl/youtube-dl-test-tracks-a',
+ 'info_dict': {
+ 'id': '1336550',
+ 'display_id': 'youtube-dl-test-tracks-a',
+ 'description': "test chars: \"'/\\ä↭",
+ 'title': "youtube-dl test tracks \"'/\\ä↭<>",
+ },
+ 'playlist': [
+ {
+ 'md5': '96ce57f24389fc8734ce47f4c1abcc55',
+ 'info_dict': {
+ 'id': '11885610',
+ 'ext': 'm4a',
+ 'title': "youtue-dl project<>\"' - youtube-dl test track 1 \"'/\\\u00e4\u21ad",
+ 'uploader_id': 'ytdl'
+ }
+ },
+ {
+ 'md5': '4ab26f05c1f7291ea460a3920be8021f',
+ 'info_dict': {
+ 'id': '11885608',
+ 'ext': 'm4a',
+ 'title': "youtube-dl project - youtube-dl test track 2 \"'/\\\u00e4\u21ad",
+ 'uploader_id': 'ytdl'
+ }
+ },
+ {
+ 'md5': 'd30b5b5f74217410f4689605c35d1fd7',
+ 'info_dict': {
+ 'id': '11885679',
+ 'ext': 'm4a',
+ 'title': "youtube-dl project as well - youtube-dl test track 3 \"'/\\\u00e4\u21ad",
+ 'uploader_id': 'ytdl'
+ }
+ },
+ {
+ 'md5': '4eb0a669317cd725f6bbd336a29f923a',
+ 'info_dict': {
+ 'id': '11885680',
+ 'ext': 'm4a',
+ 'title': "youtube-dl project as well - youtube-dl test track 4 \"'/\\\u00e4\u21ad",
+ 'uploader_id': 'ytdl'
+ }
+ },
+ {
+ 'md5': '1893e872e263a2705558d1d319ad19e8',
+ 'info_dict': {
+ 'id': '11885682',
+ 'ext': 'm4a',
+ 'title': "PH - youtube-dl test track 5 \"'/\\\u00e4\u21ad",
+ 'uploader_id': 'ytdl'
+ }
+ },
+ {
+ 'md5': 'b673c46f47a216ab1741ae8836af5899',
+ 'info_dict': {
+ 'id': '11885683',
+ 'ext': 'm4a',
+ 'title': "PH - youtube-dl test track 6 \"'/\\\u00e4\u21ad",
+ 'uploader_id': 'ytdl'
+ }
+ },
+ {
+ 'md5': '1d74534e95df54986da7f5abf7d842b7',
+ 'info_dict': {
+ 'id': '11885684',
+ 'ext': 'm4a',
+ 'title': "phihag - youtube-dl test track 7 \"'/\\\u00e4\u21ad",
+ 'uploader_id': 'ytdl'
+ }
+ },
+ {
+ 'md5': 'f081f47af8f6ae782ed131d38b9cd1c0',
+ 'info_dict': {
+ 'id': '11885685',
+ 'ext': 'm4a',
+ 'title': "phihag - youtube-dl test track 8 \"'/\\\u00e4\u21ad",
+ 'uploader_id': 'ytdl'
+ }
+ }
+ ]
+ }
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ data = self._parse_json(
+ self._search_regex(
+ r"(?s)PAGE\.mix\s*=\s*({.+?});\n", webpage, 'trax information'),
+ playlist_id)
+
+ session = str(random.randint(0, 1000000000))
+ mix_id = data['id']
+ track_count = data['tracks_count']
+ duration = data['duration']
+ avg_song_duration = float(duration) / track_count
+ # duration is sometimes negative, use predefined avg duration
+ if avg_song_duration <= 0:
+ avg_song_duration = 300
+ first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
+ next_url = first_url
+ entries = []
+
+ for i in range(track_count):
+ api_json = None
+ download_tries = 0
+
+ while api_json is None:
+ try:
+ api_json = self._download_webpage(
+ next_url, playlist_id,
+ note='Downloading song information %d/%d' % (i + 1, track_count),
+ errnote='Failed to download song information')
+ except ExtractorError:
+ if download_tries > 3:
+ raise
+ else:
+ download_tries += 1
+ self._sleep(avg_song_duration, playlist_id)
+
+ api_data = json.loads(api_json)
+ track_data = api_data['set']['track']
+ info = {
+ 'id': compat_str(track_data['id']),
+ 'url': track_data['track_file_stream_url'],
+ 'title': track_data['performer'] + ' - ' + track_data['name'],
+ 'raw_title': track_data['name'],
+ 'uploader_id': data['user']['login'],
+ 'ext': 'm4a',
+ }
+ entries.append(info)
+
+ next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (
+ session, mix_id, track_data['id'])
+ return {
+ '_type': 'playlist',
+ 'entries': entries,
+ 'id': compat_str(mix_id),
+ 'display_id': playlist_id,
+ 'title': data.get('name'),
+ 'description': data.get('description'),
+ }
diff --git a/yt_dlp/extractor/einthusan.py b/yt_dlp/extractor/einthusan.py
new file mode 100644
index 0000000..53bc253
--- /dev/null
+++ b/yt_dlp/extractor/einthusan.py
@@ -0,0 +1,105 @@
+import json
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_b64decode,
+ compat_str,
+ compat_urlparse,
+)
+from ..utils import (
+ extract_attributes,
+ ExtractorError,
+ get_elements_by_class,
+ urlencode_postdata,
+)
+
+
+class EinthusanIE(InfoExtractor):
+ _VALID_URL = r'https?://(?P<host>einthusan\.(?:tv|com|ca))/movie/watch/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://einthusan.tv/movie/watch/9097/',
+ 'md5': 'ff0f7f2065031b8a2cf13a933731c035',
+ 'info_dict': {
+ 'id': '9097',
+ 'ext': 'mp4',
+ 'title': 'Ae Dil Hai Mushkil',
+ 'description': 'md5:33ef934c82a671a94652a9b4e54d931b',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ }
+ }, {
+ 'url': 'https://einthusan.tv/movie/watch/51MZ/?lang=hindi',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://einthusan.com/movie/watch/9097/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://einthusan.ca/movie/watch/4E9n/?lang=hindi',
+ 'only_matching': True,
+ }]
+
+ # reversed from jsoncrypto.prototype.decrypt() in einthusan-PGMovieWatcher.js
+ def _decrypt(self, encrypted_data, video_id):
+ return self._parse_json(compat_b64decode((
+ encrypted_data[:10] + encrypted_data[-1] + encrypted_data[12:-1]
+ )).decode('utf-8'), video_id)
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ host = mobj.group('host')
+ video_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_regex(r'<h3>([^<]+)</h3>', webpage, 'title')
+
+ player_params = extract_attributes(self._search_regex(
+ r'(<section[^>]+id="UIVideoPlayer"[^>]+>)', webpage, 'player parameters'))
+
+ page_id = self._html_search_regex(
+ '<html[^>]+data-pageid="([^"]+)"', webpage, 'page ID')
+ video_data = self._download_json(
+ 'https://%s/ajax/movie/watch/%s/' % (host, video_id), video_id,
+ data=urlencode_postdata({
+ 'xEvent': 'UIVideoPlayer.PingOutcome',
+ 'xJson': json.dumps({
+ 'EJOutcomes': player_params['data-ejpingables'],
+ 'NativeHLS': False
+ }),
+ 'arcVersion': 3,
+ 'appVersion': 59,
+ 'gorilla.csrf.Token': page_id,
+ }))['Data']
+
+ if isinstance(video_data, compat_str) and video_data.startswith('/ratelimited/'):
+ raise ExtractorError(
+ 'Download rate reached. Please try again later.', expected=True)
+
+ ej_links = self._decrypt(video_data['EJLinks'], video_id)
+
+ formats = []
+
+ m3u8_url = ej_links.get('HLSLink')
+ if m3u8_url:
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native'))
+
+ mp4_url = ej_links.get('MP4Link')
+ if mp4_url:
+ formats.append({
+ 'url': mp4_url,
+ })
+
+ description = get_elements_by_class('synopsis', webpage)[0]
+ thumbnail = self._html_search_regex(
+ r'''<img[^>]+src=(["'])(?P<url>(?!\1).+?/moviecovers/(?!\1).+?)\1''',
+ webpage, 'thumbnail url', fatal=False, group='url')
+ if thumbnail is not None:
+ thumbnail = compat_urlparse.urljoin(url, thumbnail)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': thumbnail,
+ 'description': description,
+ }
diff --git a/yt_dlp/extractor/eitb.py b/yt_dlp/extractor/eitb.py
new file mode 100644
index 0000000..66afbb6
--- /dev/null
+++ b/yt_dlp/extractor/eitb.py
@@ -0,0 +1,79 @@
+from .common import InfoExtractor
+from ..networking import Request
+from ..utils import float_or_none, int_or_none, parse_iso8601
+
+
+class EitbIE(InfoExtractor):
+ IE_NAME = 'eitb.tv'
+ _VALID_URL = r'https?://(?:www\.)?eitb\.tv/(?:eu/bideoa|es/video)/[^/]+/\d+/(?P<id>\d+)'
+
+ _TEST = {
+ 'url': 'http://www.eitb.tv/es/video/60-minutos-60-minutos-2013-2014/4104995148001/4090227752001/lasa-y-zabala-30-anos/',
+ 'md5': 'edf4436247185adee3ea18ce64c47998',
+ 'info_dict': {
+ 'id': '4090227752001',
+ 'ext': 'mp4',
+ 'title': '60 minutos (Lasa y Zabala, 30 años)',
+ 'description': 'Programa de reportajes de actualidad.',
+ 'duration': 3996.76,
+ 'timestamp': 1381789200,
+ 'upload_date': '20131014',
+ 'tags': list,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video = self._download_json(
+ 'http://mam.eitb.eus/mam/REST/ServiceMultiweb/Video/MULTIWEBTV/%s/' % video_id,
+ video_id, 'Downloading video JSON')
+
+ media = video['web_media'][0]
+
+ formats = []
+ for rendition in media['RENDITIONS']:
+ video_url = rendition.get('PMD_URL')
+ if not video_url:
+ continue
+ tbr = float_or_none(rendition.get('ENCODING_RATE'), 1000)
+ format_id = 'http'
+ if tbr:
+ format_id += '-%d' % int(tbr)
+ formats.append({
+ 'url': rendition['PMD_URL'],
+ 'format_id': format_id,
+ 'width': int_or_none(rendition.get('FRAME_WIDTH')),
+ 'height': int_or_none(rendition.get('FRAME_HEIGHT')),
+ 'tbr': tbr,
+ })
+
+ hls_url = media.get('HLS_SURL')
+ if hls_url:
+ request = Request(
+ 'http://mam.eitb.eus/mam/REST/ServiceMultiweb/DomainRestrictedSecurity/TokenAuth/',
+ headers={'Referer': url})
+ token_data = self._download_json(
+ request, video_id, 'Downloading auth token', fatal=False)
+ if token_data:
+ token = token_data.get('token')
+ if token:
+ formats.extend(self._extract_m3u8_formats(
+ '%s?hdnts=%s' % (hls_url, token), video_id, m3u8_id='hls', fatal=False))
+
+ hds_url = media.get('HDS_SURL')
+ if hds_url:
+ formats.extend(self._extract_f4m_formats(
+ '%s?hdcore=3.7.0' % hds_url.replace('euskalsvod', 'euskalvod'),
+ video_id, f4m_id='hds', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'title': media.get('NAME_ES') or media.get('name') or media['NAME_EU'],
+ 'description': media.get('SHORT_DESC_ES') or video.get('desc_group') or media.get('SHORT_DESC_EU'),
+ 'thumbnail': media.get('STILL_URL') or media.get('THUMBNAIL_URL'),
+ 'duration': float_or_none(media.get('LENGTH'), 1000),
+ 'timestamp': parse_iso8601(media.get('BROADCST_DATE'), ' '),
+ 'tags': media.get('TAGS'),
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/elementorembed.py b/yt_dlp/extractor/elementorembed.py
new file mode 100644
index 0000000..638893f
--- /dev/null
+++ b/yt_dlp/extractor/elementorembed.py
@@ -0,0 +1,72 @@
+import re
+
+from .common import InfoExtractor
+from .vimeo import VimeoIE
+from .youtube import YoutubeIE
+from ..utils import unescapeHTML, url_or_none
+from ..utils.traversal import traverse_obj
+
+
+class ElementorEmbedIE(InfoExtractor):
+ _VALID_URL = False
+ _WEBPAGE_TESTS = [{
+ 'url': 'https://capitaltv.cy/2023/12/14/υγεια-και-ζωη-14-12-2023-δρ-ξενια-κωσταντινιδο/',
+ 'info_dict': {
+ 'id': 'KgzuxwuQwM4',
+ 'ext': 'mp4',
+ 'title': 'ΥΓΕΙΑ ΚΑΙ ΖΩΗ 14 12 2023 ΔΡ ΞΕΝΙΑ ΚΩΣΤΑΝΤΙΝΙΔΟΥ',
+ 'thumbnail': 'https://i.ytimg.com/vi/KgzuxwuQwM4/maxresdefault.jpg',
+ 'playable_in_embed': True,
+ 'tags': 'count:16',
+ 'like_count': int,
+ 'channel': 'Capital TV Cyprus',
+ 'channel_id': 'UCR8LwVKTLGEXt4ZAErpCMrg',
+ 'availability': 'public',
+ 'description': 'md5:7a3308a22881aea4612358c4ba121f77',
+ 'duration': 2891,
+ 'upload_date': '20231214',
+ 'uploader_id': '@capitaltvcyprus6389',
+ 'live_status': 'not_live',
+ 'channel_url': 'https://www.youtube.com/channel/UCR8LwVKTLGEXt4ZAErpCMrg',
+ 'uploader_url': 'https://www.youtube.com/@capitaltvcyprus6389',
+ 'uploader': 'Capital TV Cyprus',
+ 'age_limit': 0,
+ 'categories': ['News & Politics'],
+ 'view_count': int,
+ 'channel_follower_count': int,
+ },
+ }, {
+ 'url': 'https://elementor.com/academy/theme-builder-collection/?playlist=76011151&video=9e59909',
+ 'info_dict': {
+ 'id': '?playlist=76011151&video=9e59909',
+ 'title': 'Theme Builder Collection - Academy',
+ 'age_limit': 0,
+ 'timestamp': 1702196984.0,
+ 'upload_date': '20231210',
+ 'description': 'md5:7f52c52715ee9e54fd7f82210511673d',
+ 'thumbnail': 'https://elementor.com/academy/wp-content/uploads/2021/07/Theme-Builder-1.png',
+ },
+ 'playlist_count': 11,
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+ _WIDGET_REGEX = r'<div[^>]+class="[^"]*elementor-widget-video(?:-playlist)?[^"]*"[^>]*data-settings="([^"]*)"'
+
+ def _extract_from_webpage(self, url, webpage):
+ for data_settings in re.findall(self._WIDGET_REGEX, webpage):
+ data = self._parse_json(data_settings, None, fatal=False, transform_source=unescapeHTML)
+ if youtube_url := traverse_obj(data, ('youtube_url', {url_or_none})):
+ yield self.url_result(youtube_url, ie=YoutubeIE)
+
+ for video in traverse_obj(data, ('tabs', lambda _, v: v['_id'], {dict})):
+ if youtube_url := traverse_obj(video, ('youtube_url', {url_or_none})):
+ yield self.url_result(youtube_url, ie=YoutubeIE)
+ if vimeo_url := traverse_obj(video, ('vimeo_url', {url_or_none})):
+ yield self.url_result(vimeo_url, ie=VimeoIE)
+ for direct_url in traverse_obj(video, (('hosted_url', 'external_url'), 'url', {url_or_none})):
+ yield {
+ 'id': video['_id'],
+ 'url': direct_url,
+ 'title': video.get('title'),
+ }
diff --git a/yt_dlp/extractor/elonet.py b/yt_dlp/extractor/elonet.py
new file mode 100644
index 0000000..c5558ff
--- /dev/null
+++ b/yt_dlp/extractor/elonet.py
@@ -0,0 +1,64 @@
+from .common import InfoExtractor
+from ..utils import determine_ext
+
+
+class ElonetIE(InfoExtractor):
+ _VALID_URL = r'https?://elonet\.finna\.fi/Record/kavi\.elonet_elokuva_(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://elonet.finna.fi/Record/kavi.elonet_elokuva_107867',
+ 'info_dict': {
+ 'id': '107867',
+ 'ext': 'mp4',
+ 'title': 'Valkoinen peura',
+ 'thumbnail': r're:^https?://elonet\.finna\.fi/Cover/Show\?id=kavi\.elonet_elokuva_107867.+',
+ 'description': 'md5:bded4201c9677fab10854884fe8f7312',
+ },
+ 'params': {'skip_download': 'dash'},
+ }, {
+ # DASH with subtitles
+ 'url': 'https://elonet.finna.fi/Record/kavi.elonet_elokuva_116539',
+ 'info_dict': {
+ 'id': '116539',
+ 'ext': 'mp4',
+ 'title': 'Minulla on tiikeri',
+ 'thumbnail': r're:^https?://elonet\.finna\.fi/Cover/Show\?id=kavi\.elonet_elokuva_116539.+',
+ 'description': 'md5:5ab72b3fe76d3414e46cc8f277104419',
+ },
+ 'params': {'skip_download': 'dash'},
+ }, {
+ # Page with multiple videos, download the main one
+ 'url': 'https://elonet.finna.fi/Record/kavi.elonet_elokuva_117396',
+ 'info_dict': {
+ 'id': '117396',
+ 'ext': 'mp4',
+ 'title': 'Sampo',
+ 'thumbnail': r're:^https?://elonet\.finna\.fi/Cover/Show\?id=kavi\.elonet_elokuva_117396.+',
+ 'description': 'md5:ec69572a5b054d0ecafe8086b1fa96f7',
+ },
+ 'params': {'skip_download': 'dash'},
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ src = self._parse_json(self._html_search_regex(
+ r'id=\'video-data\'[^>]+data-video-sources="([^"]+)"', webpage, 'json'), video_id)[0]['src']
+ ext = determine_ext(src)
+
+ if ext == 'm3u8':
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(src, video_id, fatal=False)
+ elif ext == 'mpd':
+ formats, subtitles = self._extract_mpd_formats_and_subtitles(src, video_id, fatal=False)
+ else:
+ formats, subtitles = [], {}
+ self.raise_no_formats(f'Unknown streaming format {ext}')
+
+ return {
+ 'id': video_id,
+ 'title': self._og_search_title(webpage),
+ 'description': self._og_search_description(webpage),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
diff --git a/yt_dlp/extractor/elpais.py b/yt_dlp/extractor/elpais.py
new file mode 100644
index 0000000..7c6c880
--- /dev/null
+++ b/yt_dlp/extractor/elpais.py
@@ -0,0 +1,92 @@
+from .common import InfoExtractor
+from ..utils import strip_jsonp, unified_strdate
+
+
+class ElPaisIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[^.]+\.)?elpais\.com/.*/(?P<id>[^/#?]+)\.html(?:$|[?#])'
+ IE_DESC = 'El País'
+
+ _TESTS = [{
+ 'url': 'http://blogs.elpais.com/la-voz-de-inaki/2014/02/tiempo-nuevo-recetas-viejas.html',
+ 'md5': '98406f301f19562170ec071b83433d55',
+ 'info_dict': {
+ 'id': 'tiempo-nuevo-recetas-viejas',
+ 'ext': 'mp4',
+ 'title': 'Tiempo nuevo, recetas viejas',
+ 'description': 'De lunes a viernes, a partir de las ocho de la mañana, Iñaki Gabilondo nos cuenta su visión de la actualidad nacional e internacional.',
+ 'upload_date': '20140206',
+ }
+ }, {
+ 'url': 'http://elcomidista.elpais.com/elcomidista/2016/02/24/articulo/1456340311_668921.html#?id_externo_nwl=newsletter_diaria20160303t',
+ 'md5': '3bd5b09509f3519d7d9e763179b013de',
+ 'info_dict': {
+ 'id': '1456340311_668921',
+ 'ext': 'mp4',
+ 'title': 'Cómo hacer el mejor café con cafetera italiana',
+ 'description': 'Que sí, que las cápsulas son cómodas. Pero si le pides algo más a la vida, quizá deberías aprender a usar bien la cafetera italiana. No tienes más que ver este vídeo y seguir sus siete normas básicas.',
+ 'upload_date': '20160303',
+ }
+ }, {
+ 'url': 'http://elpais.com/elpais/2017/01/26/ciencia/1485456786_417876.html',
+ 'md5': '9c79923a118a067e1a45789e1e0b0f9c',
+ 'info_dict': {
+ 'id': '1485456786_417876',
+ 'ext': 'mp4',
+ 'title': 'Hallado un barco de la antigua Roma que naufragó en Baleares hace 1.800 años',
+ 'description': 'La nave portaba cientos de ánforas y se hundió cerca de la isla de Cabrera por razones desconocidas',
+ 'upload_date': '20170127',
+ },
+ }, {
+ 'url': 'http://epv.elpais.com/epv/2017/02/14/programa_la_voz_de_inaki/1487062137_075943.html',
+ 'info_dict': {
+ 'id': '1487062137_075943',
+ 'ext': 'mp4',
+ 'title': 'Disyuntivas',
+ 'description': 'md5:a0fb1485c4a6a8a917e6f93878e66218',
+ 'upload_date': '20170214',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ prefix = self._html_search_regex(
+ r'var\s+url_cache\s*=\s*"([^"]+)";', webpage, 'URL prefix')
+ id_multimedia = self._search_regex(
+ r"id_multimedia\s*=\s*'([^']+)'", webpage, 'ID multimedia', default=None)
+ if id_multimedia:
+ url_info = self._download_json(
+ 'http://elpais.com/vdpep/1/?pepid=' + id_multimedia, video_id, transform_source=strip_jsonp)
+ video_suffix = url_info['mp4']
+ else:
+ video_suffix = self._search_regex(
+ r"(?:URLMediaFile|urlVideo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'", webpage, 'video URL')
+ video_url = prefix + video_suffix
+ thumbnail_suffix = self._search_regex(
+ r"(?:URLMediaStill|urlFotogramaFijo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'",
+ webpage, 'thumbnail URL', default=None)
+ thumbnail = (
+ None if thumbnail_suffix is None
+ else prefix + thumbnail_suffix) or self._og_search_thumbnail(webpage)
+ title = self._html_search_regex(
+ (r"tituloVideo\s*=\s*'([^']+)'",
+ r'<h2 class="entry-header entry-title.*?>(.*?)</h2>',
+ r'<h1[^>]+class="titulo"[^>]*>([^<]+)'),
+ webpage, 'title', default=None) or self._og_search_title(webpage)
+ upload_date = unified_strdate(self._search_regex(
+ r'<p class="date-header date-int updated"\s+title="([^"]+)">',
+ webpage, 'upload date', default=None) or self._html_search_meta(
+ 'datePublished', webpage, 'timestamp'))
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': self._og_search_description(webpage),
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ }
diff --git a/yt_dlp/extractor/eltrecetv.py b/yt_dlp/extractor/eltrecetv.py
new file mode 100644
index 0000000..f64023a
--- /dev/null
+++ b/yt_dlp/extractor/eltrecetv.py
@@ -0,0 +1,62 @@
+from .common import InfoExtractor
+
+
+class ElTreceTVIE(InfoExtractor):
+ IE_DESC = 'El Trece TV (Argentina)'
+ _VALID_URL = r'https?://(?:www\.)?eltrecetv\.com\.ar/[\w-]+/capitulos/temporada-\d+/(?P<id>[\w-]+)'
+ _TESTS = [
+ {
+ 'url': 'https://www.eltrecetv.com.ar/ahora-caigo/capitulos/temporada-2023/programa-del-061023/',
+ 'md5': '71a66673dc63f9a5939d97bfe4b311ba',
+ 'info_dict': {
+ 'id': 'AHCA05102023145553329621094',
+ 'ext': 'mp4',
+ 'title': 'AHORA CAIGO - Programa 06/10/23',
+ 'thumbnail': 'https://thumbs.vodgc.net/AHCA05102023145553329621094.JPG?649339',
+ }
+ },
+ {
+ 'url': 'https://www.eltrecetv.com.ar/poco-correctos/capitulos/temporada-2023/programa-del-250923-invitada-dalia-gutmann/',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://www.eltrecetv.com.ar/argentina-tierra-de-amor-y-venganza/capitulos/temporada-2023/atav-2-capitulo-121-del-250923/',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://www.eltrecetv.com.ar/ahora-caigo/capitulos/temporada-2023/programa-del-250923/',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://www.eltrecetv.com.ar/pasaplatos/capitulos/temporada-2023/pasaplatos-el-restaurante-del-250923/',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://www.eltrecetv.com.ar/el-galpon/capitulos/temporada-2023/programa-del-160923-invitado-raul-lavie/',
+ 'only_matching': True,
+ }
+ ]
+
+ def _real_extract(self, url):
+ slug = self._match_id(url)
+ webpage = self._download_webpage(url, slug)
+ config = self._search_json(
+ r'Fusion.globalContent\s*=', webpage, 'content', slug)['promo_items']['basic']['embed']['config']
+ video_url = config['m3u8']
+ video_id = self._search_regex(r'/(\w+)\.m3u8', video_url, 'video id', default=slug)
+
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_url, video_id, 'mp4', m3u8_id='hls')
+ formats.extend([{
+ 'url': f['url'][:-23],
+ 'format_id': f['format_id'].replace('hls', 'http'),
+ 'width': f.get('width'),
+ 'height': f.get('height'),
+ } for f in formats if f['url'].endswith('/tracks-v1a1/index.m3u8') and f.get('height') != 1080])
+
+ return {
+ 'id': video_id,
+ 'title': config.get('title'),
+ 'thumbnail': config.get('thumbnail'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
diff --git a/yt_dlp/extractor/embedly.py b/yt_dlp/extractor/embedly.py
new file mode 100644
index 0000000..a424b49
--- /dev/null
+++ b/yt_dlp/extractor/embedly.py
@@ -0,0 +1,109 @@
+import re
+import urllib.parse
+
+from .common import InfoExtractor
+from .youtube import YoutubeTabIE
+from ..utils import parse_qs, smuggle_url, traverse_obj
+
+
+class EmbedlyIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www|cdn\.)?embedly\.com/widgets/media\.html\?(?:[^#]*?&)?(?:src|url)=(?:[^#&]+)'
+ _TESTS = [{
+ 'url': 'https://cdn.embedly.com/widgets/media.html?src=http%3A%2F%2Fwww.youtube.com%2Fembed%2Fvideoseries%3Flist%3DUUGLim4T2loE5rwCMdpCIPVg&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DSU4fj_aEMVw%26list%3DUUGLim4T2loE5rwCMdpCIPVg&image=http%3A%2F%2Fi.ytimg.com%2Fvi%2FSU4fj_aEMVw%2Fhqdefault.jpg&key=8ee8a2e6a8cc47aab1a5ee67f9a178e0&type=text%2Fhtml&schema=youtube&autoplay=1',
+ 'info_dict': {
+ 'id': 'UUGLim4T2loE5rwCMdpCIPVg',
+ 'modified_date': '20221225',
+ 'view_count': int,
+ 'uploader_url': 'https://www.youtube.com/@TraciHinesMusic',
+ 'channel_id': 'UCGLim4T2loE5rwCMdpCIPVg',
+ 'uploader': 'TraciJHines',
+ 'channel_url': 'https://www.youtube.com/@TraciHinesMusic',
+ 'channel': 'TraciJHines',
+ 'availability': 'public',
+ 'uploader_id': 'UCGLim4T2loE5rwCMdpCIPVg',
+ 'description': '',
+ 'tags': [],
+ 'title': 'Uploads from TraciJHines',
+ },
+ 'playlist_mincount': 10,
+ }, {
+ 'url': 'https://cdn.embedly.com/widgets/media.html?src=http%3A%2F%2Fwww.youtube.com%2Fembed%2Fvideoseries%3Flist%3DUUGLim4T2loE5rwCMdpCIPVg&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DSU4fj_aEMVw%26list%3DUUGLim4T2loE5rwCMdpCIPVg&image=http%3A%2F%2Fi.ytimg.com%2Fvi%2FSU4fj_aEMVw%2Fhqdefault.jpg&key=8ee8a2e6a8cc47aab1a5ee67f9a178e0&type=text%2Fhtml&schema=youtube&autoplay=1',
+ 'params': {'noplaylist': True},
+ 'info_dict': {
+ 'id': 'SU4fj_aEMVw',
+ 'ext': 'mp4',
+ 'title': 'I\'m on Patreon!',
+ 'age_limit': 0,
+ 'categories': ['Entertainment'],
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/SU4fj_aEMVw/maxresdefault.webp',
+ 'live_status': 'not_live',
+ 'playable_in_embed': True,
+ 'channel': 'TraciJHines',
+ 'uploader_id': 'TraciJHines',
+ 'channel_url': 'https://www.youtube.com/channel/UCGLim4T2loE5rwCMdpCIPVg',
+ 'uploader_url': 'http://www.youtube.com/user/TraciJHines',
+ 'upload_date': '20150211',
+ 'duration': 282,
+ 'availability': 'public',
+ 'channel_follower_count': int,
+ 'tags': 'count:39',
+ 'view_count': int,
+ 'comment_count': int,
+ 'channel_id': 'UCGLim4T2loE5rwCMdpCIPVg',
+ 'like_count': int,
+ 'uploader': 'TraciJHines',
+ 'description': 'md5:8af6425f50bd46fbf29f3db0fc3a8364',
+ 'chapters': list,
+
+ },
+ }, {
+ 'url': 'https://cdn.embedly.com/widgets/media.html?src=https://player.vimeo.com/video/1234567?h=abcdefgh',
+ 'only_matching': True,
+ }]
+
+ _WEBPAGE_TESTS = [{
+ 'url': 'http://www.permacultureetc.com/2022/12/comment-greffer-facilement-les-arbres-fruitiers.html',
+ 'info_dict': {
+ 'id': 'pfUK_ADTvgY',
+ 'ext': 'mp4',
+ 'title': 'Comment greffer facilement les arbres fruitiers ? (mois par mois)',
+ 'description': 'md5:d3a876995e522f138aabb48e040bfb4c',
+ 'view_count': int,
+ 'upload_date': '20221210',
+ 'comment_count': int,
+ 'live_status': 'not_live',
+ 'channel_id': 'UCsM4_jihNFYe4CtSkXvDR-Q',
+ 'channel_follower_count': int,
+ 'tags': ['permaculture', 'jardinage', 'dekarz', 'autonomie', 'greffe', 'fruitiers', 'arbres', 'jardin forêt', 'forêt comestible', 'damien'],
+ 'playable_in_embed': True,
+ 'uploader': 'permaculture agroécologie etc...',
+ 'channel': 'permaculture agroécologie etc...',
+ 'thumbnail': 'https://i.ytimg.com/vi/pfUK_ADTvgY/sddefault.jpg',
+ 'duration': 1526,
+ 'channel_url': 'https://www.youtube.com/channel/UCsM4_jihNFYe4CtSkXvDR-Q',
+ 'age_limit': 0,
+ 'uploader_id': 'permacultureetc',
+ 'like_count': int,
+ 'uploader_url': 'http://www.youtube.com/user/permacultureetc',
+ 'categories': ['Education'],
+ 'availability': 'public',
+ },
+ }]
+
+ @classmethod
+ def _extract_from_webpage(cls, url, webpage):
+ # Bypass "ie=cls" and suitable check
+ for mobj in re.finditer(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage):
+ yield cls.url_result(mobj.group('url'))
+
+ for mobj in re.finditer(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage):
+ yield cls.url_result(urllib.parse.unquote(mobj.group('url')))
+
+ def _real_extract(self, url):
+ qs = parse_qs(url)
+ src = urllib.parse.unquote(traverse_obj(qs, ('url', 0)) or '')
+ if src and YoutubeTabIE.suitable(src):
+ return self.url_result(src, YoutubeTabIE)
+ return self.url_result(smuggle_url(
+ urllib.parse.unquote(traverse_obj(qs, ('src', 0), ('url', 0))),
+ {'referer': url}))
diff --git a/yt_dlp/extractor/epicon.py b/yt_dlp/extractor/epicon.py
new file mode 100644
index 0000000..3bfcc54
--- /dev/null
+++ b/yt_dlp/extractor/epicon.py
@@ -0,0 +1,115 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+
+class EpiconIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?epicon\.in/(?:documentaries|movies|tv-shows/[^/?#]+/[^/?#]+)/(?P<id>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'https://www.epicon.in/documentaries/air-battle-of-srinagar',
+ 'info_dict': {
+ 'id': 'air-battle-of-srinagar',
+ 'ext': 'mp4',
+ 'title': 'Air Battle of Srinagar',
+ 'description': 'md5:c4de2013af9bc05ae4392e4115d518d7',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ }
+ }, {
+ 'url': 'https://www.epicon.in/movies/krit',
+ 'info_dict': {
+ 'id': 'krit',
+ 'ext': 'mp4',
+ 'title': 'Krit',
+ 'description': 'md5:c12b35dad915d48ccff7f013c79bab4a',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ }
+ }, {
+ 'url': 'https://www.epicon.in/tv-shows/paapnaashini-ganga/season-1/vardaan',
+ 'info_dict': {
+ 'id': 'vardaan',
+ 'ext': 'mp4',
+ 'title': 'Paapnaashini Ganga - Season 1 - Ep 1 - VARDAAN',
+ 'description': 'md5:f517058c3d0402398eefa6242f4dd6ae',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ }
+ }, {
+ 'url': 'https://www.epicon.in/movies/jayadev',
+ 'info_dict': {
+ 'id': 'jayadev',
+ 'ext': 'mp4',
+ 'title': 'Jayadev',
+ 'description': 'md5:09e349eecd8e585a3b6466904f19df6c',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ }
+ }]
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ webpage = self._download_webpage(url, id)
+ cid = self._search_regex(r'class=\"mylist-icon\ iconclick\"\ id=\"(\d+)', webpage, 'cid')
+ headers = {'content-type': 'application/x-www-form-urlencoded; charset=UTF-8'}
+ data = f'cid={cid}&action=st&type=video'.encode()
+ data_json = self._parse_json(self._download_json('https://www.epicon.in/ajaxplayer/', id, headers=headers, data=data), id)
+
+ if not data_json['success']:
+ raise ExtractorError(data_json['message'], expected=True)
+
+ title = self._search_regex(r'setplaytitle=\"([^\"]+)', webpage, 'title')
+ description = self._og_search_description(webpage) or None
+ thumbnail = self._og_search_thumbnail(webpage) or None
+ formats = self._extract_m3u8_formats(data_json['url']['video_url'], id)
+
+ subtitles = {}
+ for subtitle in data_json.get('subtitles', []):
+ sub_url = subtitle.get('file')
+ if not sub_url:
+ continue
+ subtitles.setdefault(subtitle.get('lang', 'English'), []).append({
+ 'url': self._proto_relative_url(sub_url),
+ })
+
+ return {
+ 'id': id,
+ 'formats': formats,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'subtitles': subtitles,
+ }
+
+
+class EpiconSeriesIE(InfoExtractor):
+ _VALID_URL = r'(?!.*season)https?://(?:www\.)?epicon\.in/tv-shows/(?P<id>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'https://www.epicon.in/tv-shows/1-of-something',
+ 'playlist_mincount': 5,
+ 'info_dict': {
+ 'id': '1-of-something',
+ },
+ }, {
+ 'url': 'https://www.epicon.in/tv-shows/eco-india-english',
+ 'playlist_mincount': 76,
+ 'info_dict': {
+ 'id': 'eco-india-english',
+ },
+ }, {
+ 'url': 'https://www.epicon.in/tv-shows/s/',
+ 'playlist_mincount': 25,
+ 'info_dict': {
+ 'id': 's',
+ },
+ }, {
+ 'url': 'https://www.epicon.in/tv-shows/ekaant',
+ 'playlist_mincount': 38,
+ 'info_dict': {
+ 'id': 'ekaant',
+ },
+ }]
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ webpage = self._download_webpage(url, id)
+ episodes = re.findall(r'ct-tray-url=\"(tv-shows/%s/[^\"]+)' % id, webpage)
+ entries = [self.url_result('https://www.epicon.in/%s' % episode, ie=EpiconIE.ie_key()) for episode in episodes]
+ return self.playlist_result(entries, playlist_id=id)
diff --git a/yt_dlp/extractor/epidemicsound.py b/yt_dlp/extractor/epidemicsound.py
new file mode 100644
index 0000000..0d81b11
--- /dev/null
+++ b/yt_dlp/extractor/epidemicsound.py
@@ -0,0 +1,107 @@
+from .common import InfoExtractor
+from ..utils import (
+ float_or_none,
+ int_or_none,
+ orderedSet,
+ parse_iso8601,
+ parse_qs,
+ parse_resolution,
+ str_or_none,
+ traverse_obj,
+ url_or_none,
+)
+
+
+class EpidemicSoundIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?epidemicsound\.com/track/(?P<id>[0-9a-zA-Z]+)'
+ _TESTS = [{
+ 'url': 'https://www.epidemicsound.com/track/yFfQVRpSPz/',
+ 'md5': 'd98ff2ddb49e8acab9716541cbc9dfac',
+ 'info_dict': {
+ 'id': '45014',
+ 'display_id': 'yFfQVRpSPz',
+ 'ext': 'mp3',
+ 'title': 'Door Knock Door 1',
+ 'alt_title': 'Door Knock Door 1',
+ 'tags': ['foley', 'door', 'knock', 'glass', 'window', 'glass door knock'],
+ 'categories': ['Misc. Door'],
+ 'duration': 1,
+ 'thumbnail': 'https://cdn.epidemicsound.com/curation-assets/commercial-release-cover-images/default-sfx/3000x3000.jpg',
+ 'timestamp': 1415320353,
+ 'upload_date': '20141107',
+ },
+ }, {
+ 'url': 'https://www.epidemicsound.com/track/mj8GTTwsZd/',
+ 'md5': 'c82b745890f9baf18dc2f8d568ee3830',
+ 'info_dict': {
+ 'id': '148700',
+ 'display_id': 'mj8GTTwsZd',
+ 'ext': 'mp3',
+ 'title': 'Noplace',
+ 'tags': ['liquid drum n bass', 'energetic'],
+ 'categories': ['drum and bass'],
+ 'duration': 237,
+ 'timestamp': 1694426482,
+ 'thumbnail': 'https://cdn.epidemicsound.com/curation-assets/commercial-release-cover-images/11138/3000x3000.jpg',
+ 'upload_date': '20230911',
+ 'release_timestamp': 1700535606,
+ 'release_date': '20231121',
+ },
+ }]
+
+ @staticmethod
+ def _epidemic_parse_thumbnail(url: str):
+ if not url_or_none(url):
+ return None
+
+ return {
+ 'url': url,
+ **(traverse_obj(url, ({parse_qs}, {
+ 'width': ('width', 0, {int_or_none}),
+ 'height': ('height', 0, {int_or_none}),
+ })) or parse_resolution(url)),
+ }
+
+ @staticmethod
+ def _epidemic_fmt_or_none(f):
+ if not f.get('format'):
+ f['format'] = f.get('format_id')
+ elif not f.get('format_id'):
+ f['format_id'] = f['format']
+ if not f['url'] or not f['format']:
+ return None
+ if f.get('format_note'):
+ f['format_note'] = f'track ID {f["format_note"]}'
+ if f['format'] != 'full':
+ f['preference'] = -2
+ return f
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ json_data = self._download_json(f'https://www.epidemicsound.com/json/track/{video_id}', video_id)
+
+ thumbnails = traverse_obj(json_data, [('imageUrl', 'cover')])
+ thumb_base_url = traverse_obj(json_data, ('coverArt', 'baseUrl', {url_or_none}))
+ if thumb_base_url:
+ thumbnails.extend(traverse_obj(json_data, (
+ 'coverArt', 'sizes', ..., {thumb_base_url.__add__})))
+
+ return traverse_obj(json_data, {
+ 'id': ('id', {str_or_none}),
+ 'display_id': ('publicSlug', {str}),
+ 'title': ('title', {str}),
+ 'alt_title': ('oldTitle', {str}),
+ 'duration': ('length', {float_or_none}),
+ 'timestamp': ('added', {parse_iso8601}),
+ 'release_timestamp': ('releaseDate', {parse_iso8601}),
+ 'categories': ('genres', ..., 'tag', {str}),
+ 'tags': ('metadataTags', ..., {str}),
+ 'age_limit': ('isExplicit', {lambda b: 18 if b else None}),
+ 'thumbnails': ({lambda _: thumbnails}, {orderedSet}, ..., {self._epidemic_parse_thumbnail}),
+ 'formats': ('stems', {dict.items}, ..., {
+ 'format': (0, {str_or_none}),
+ 'format_note': (1, 's3TrackId', {str_or_none}),
+ 'format_id': (1, 'stemType', {str}),
+ 'url': (1, 'lqMp3Url', {url_or_none}),
+ }, {self._epidemic_fmt_or_none}),
+ })
diff --git a/yt_dlp/extractor/eplus.py b/yt_dlp/extractor/eplus.py
new file mode 100644
index 0000000..88a8d5a
--- /dev/null
+++ b/yt_dlp/extractor/eplus.py
@@ -0,0 +1,183 @@
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ try_call,
+ unified_timestamp,
+ urlencode_postdata,
+)
+
+
+class EplusIbIE(InfoExtractor):
+ _NETRC_MACHINE = 'eplus'
+ IE_NAME = 'eplus'
+ IE_DESC = 'e+ (イープラス)'
+ _VALID_URL = [r'https?://live\.eplus\.jp/ex/player\?ib=(?P<id>(?:\w|%2B|%2F){86}%3D%3D)',
+ r'https?://live\.eplus\.jp/(?P<id>sample|\d+)']
+ _TESTS = [{
+ 'url': 'https://live.eplus.jp/ex/player?ib=YEFxb3Vyc2Dombnjg7blkrLlrablnJLjgrnjgq%2Fjg7zjg6vjgqLjgqTjg4njg6vlkIzlpb3kvJpgTGllbGxhIQ%3D%3D',
+ 'info_dict': {
+ 'id': '354502-0001-002',
+ 'title': 'LoveLive!Series Presents COUNTDOWN LoveLive! 2021→2022~LIVE with a smile!~【Streaming+(配信)】',
+ 'live_status': 'was_live',
+ 'release_date': '20211231',
+ 'release_timestamp': 1640952000,
+ 'description': str,
+ },
+ 'params': {
+ 'skip_download': True,
+ 'ignore_no_formats_error': True,
+ },
+ 'expected_warnings': [
+ 'Could not find the playlist URL. This event may not be accessible',
+ 'No video formats found!',
+ 'Requested format is not available',
+ ],
+ }, {
+ 'url': 'https://live.eplus.jp/sample',
+ 'info_dict': {
+ 'id': 'stream1ng20210719-test-005',
+ 'title': 'Online streaming test for DRM',
+ 'live_status': 'was_live',
+ 'release_date': '20210719',
+ 'release_timestamp': 1626703200,
+ },
+ 'params': {
+ 'skip_download': True,
+ 'ignore_no_formats_error': True,
+ },
+ 'expected_warnings': [
+ 'Could not find the playlist URL. This event may not be accessible',
+ 'No video formats found!',
+ 'Requested format is not available',
+ 'This video is DRM protected',
+ ],
+ }, {
+ 'url': 'https://live.eplus.jp/2053935',
+ 'info_dict': {
+ 'id': '331320-0001-001',
+ 'title': '丘みどり2020配信LIVE Vol.2 ~秋麗~ 【Streaming+(配信チケット)】',
+ 'live_status': 'was_live',
+ 'release_date': '20200920',
+ 'release_timestamp': 1600596000,
+ },
+ 'params': {
+ 'skip_download': True,
+ 'ignore_no_formats_error': True,
+ },
+ 'expected_warnings': [
+ 'Could not find the playlist URL. This event may not be accessible',
+ 'No video formats found!',
+ 'Requested format is not available',
+ ],
+ }]
+
+ _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0'
+
+ def _login(self, username, password, urlh):
+ if not self._get_cookies('https://live.eplus.jp/').get('ci_session'):
+ raise ExtractorError('Unable to get ci_session cookie')
+
+ cltft_token = urlh.headers.get('X-CLTFT-Token')
+ if not cltft_token:
+ raise ExtractorError('Unable to get X-CLTFT-Token')
+ self._set_cookie('live.eplus.jp', 'X-CLTFT-Token', cltft_token)
+
+ login_json = self._download_json(
+ 'https://live.eplus.jp/member/api/v1/FTAuth/idpw', None,
+ note='Sending pre-login info', errnote='Unable to send pre-login info', headers={
+ 'Content-Type': 'application/json; charset=UTF-8',
+ 'Referer': urlh.url,
+ 'X-Cltft-Token': cltft_token,
+ 'Accept': '*/*',
+ }, data=json.dumps({
+ 'loginId': username,
+ 'loginPassword': password,
+ }).encode())
+ if not login_json.get('isSuccess'):
+ raise ExtractorError('Login failed: Invalid id or password', expected=True)
+
+ self._request_webpage(
+ urlh.url, None, note='Logging in', errnote='Unable to log in',
+ data=urlencode_postdata({
+ 'loginId': username,
+ 'loginPassword': password,
+ 'Token.Default': cltft_token,
+ 'op': 'nextPage',
+ }), headers={'Referer': urlh.url})
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage, urlh = self._download_webpage_handle(
+ url, video_id, headers={'User-Agent': self._USER_AGENT})
+ if urlh.url.startswith('https://live.eplus.jp/member/auth'):
+ username, password = self._get_login_info()
+ if not username:
+ self.raise_login_required()
+ self._login(username, password, urlh)
+ webpage = self._download_webpage(
+ url, video_id, headers={'User-Agent': self._USER_AGENT})
+
+ data_json = self._search_json(r'<script>\s*var app\s*=', webpage, 'data json', video_id)
+
+ if data_json.get('drm_mode') == 'ON':
+ self.report_drm(video_id)
+
+ delivery_status = data_json.get('delivery_status')
+ archive_mode = data_json.get('archive_mode')
+ release_timestamp = try_call(lambda: unified_timestamp(data_json['event_datetime']) - 32400)
+ release_timestamp_str = data_json.get('event_datetime_text') # JST
+
+ self.write_debug(f'delivery_status = {delivery_status}, archive_mode = {archive_mode}')
+
+ if delivery_status == 'PREPARING':
+ live_status = 'is_upcoming'
+ elif delivery_status == 'STARTED':
+ live_status = 'is_live'
+ elif delivery_status == 'STOPPED':
+ if archive_mode != 'ON':
+ raise ExtractorError(
+ 'This event has ended and there is no archive for this event', expected=True)
+ live_status = 'post_live'
+ elif delivery_status == 'WAIT_CONFIRM_ARCHIVED':
+ live_status = 'post_live'
+ elif delivery_status == 'CONFIRMED_ARCHIVE':
+ live_status = 'was_live'
+ else:
+ self.report_warning(f'Unknown delivery_status {delivery_status}, treat it as a live')
+ live_status = 'is_live'
+
+ formats = []
+
+ m3u8_playlist_urls = self._search_json(
+ r'var\s+listChannels\s*=', webpage, 'hls URLs', video_id, contains_pattern=r'\[.+\]', default=[])
+ if not m3u8_playlist_urls:
+ if live_status == 'is_upcoming':
+ self.raise_no_formats(
+ f'Could not find the playlist URL. This live event will begin at {release_timestamp_str} JST', expected=True)
+ else:
+ self.raise_no_formats(
+ 'Could not find the playlist URL. This event may not be accessible', expected=True)
+ elif live_status == 'is_upcoming':
+ self.raise_no_formats(f'This live event will begin at {release_timestamp_str} JST', expected=True)
+ elif live_status == 'post_live':
+ self.raise_no_formats('This event has ended, and the archive will be available shortly', expected=True)
+ else:
+ for m3u8_playlist_url in m3u8_playlist_urls:
+ formats.extend(self._extract_m3u8_formats(m3u8_playlist_url, video_id))
+ # FIXME: HTTP request headers need to be updated to continue download
+ warning = 'Due to technical limitations, the download will be interrupted after one hour'
+ if live_status == 'is_live':
+ self.report_warning(warning)
+ elif live_status == 'was_live':
+ self.report_warning(f'{warning}. You can restart to continue the download')
+
+ return {
+ 'id': data_json['app_id'],
+ 'title': data_json.get('app_name'),
+ 'formats': formats,
+ 'live_status': live_status,
+ 'description': data_json.get('content'),
+ 'release_timestamp': release_timestamp,
+ }
diff --git a/yt_dlp/extractor/epoch.py b/yt_dlp/extractor/epoch.py
new file mode 100644
index 0000000..110e78c
--- /dev/null
+++ b/yt_dlp/extractor/epoch.py
@@ -0,0 +1,55 @@
+from .common import InfoExtractor
+from ..utils import extract_attributes, get_element_html_by_id
+
+
+class EpochIE(InfoExtractor):
+ _VALID_URL = r'https?://www.theepochtimes\.com/[\w-]+_(?P<id>\d+).html'
+ _TESTS = [
+ {
+ 'url': 'https://www.theepochtimes.com/they-can-do-audio-video-physical-surveillance-on-you-24h-365d-a-year-rex-lee-on-intrusive-apps_4661688.html',
+ 'info_dict': {
+ 'id': 'a3dd732c-4750-4bc8-8156-69180668bda1',
+ 'ext': 'mp4',
+ 'title': '‘They Can Do Audio, Video, Physical Surveillance on You 24H/365D a Year’: Rex Lee on Intrusive Apps',
+ }
+ },
+ {
+ 'url': 'https://www.theepochtimes.com/the-communist-partys-cyberattacks-on-america-explained-rex-lee-talks-tech-hybrid-warfare_4342413.html',
+ 'info_dict': {
+ 'id': '276c7f46-3bbf-475d-9934-b9bbe827cf0a',
+ 'ext': 'mp4',
+ 'title': 'The Communist Party’s Cyberattacks on America Explained; Rex Lee Talks Tech Hybrid Warfare',
+ }
+ },
+ {
+ 'url': 'https://www.theepochtimes.com/kash-patel-a-6-year-saga-of-government-corruption-from-russiagate-to-mar-a-lago_4690250.html',
+ 'info_dict': {
+ 'id': 'aa9ceecd-a127-453d-a2de-7153d6fd69b6',
+ 'ext': 'mp4',
+ 'title': 'Kash Patel: A ‘6-Year-Saga’ of Government Corruption, From Russiagate to Mar-a-Lago',
+ }
+ },
+ {
+ 'url': 'https://www.theepochtimes.com/dick-morris-discusses-his-book-the-return-trumps-big-2024-comeback_4819205.html',
+ 'info_dict': {
+ 'id': '9489f994-2a20-4812-b233-ac0e5c345632',
+ 'ext': 'mp4',
+ 'title': 'Dick Morris Discusses His Book ‘The Return: Trump’s Big 2024 Comeback’',
+ }
+ },
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ youmaker_video_id = extract_attributes(get_element_html_by_id('videobox', webpage))['data-id']
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ f'http://vs1.youmaker.com/assets/{youmaker_video_id}/playlist.m3u8', video_id, 'mp4', m3u8_id='hls')
+
+ return {
+ 'id': youmaker_video_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'title': self._html_extract_title(webpage)
+ }
diff --git a/yt_dlp/extractor/eporner.py b/yt_dlp/extractor/eporner.py
new file mode 100644
index 0000000..b18a76c
--- /dev/null
+++ b/yt_dlp/extractor/eporner.py
@@ -0,0 +1,137 @@
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ encode_base_n,
+ get_elements_by_class,
+ int_or_none,
+ join_nonempty,
+ merge_dicts,
+ parse_duration,
+ str_to_int,
+ url_or_none,
+)
+
+
+class EpornerIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?eporner\.com/(?:(?:hd-porn|embed)/|video-)(?P<id>\w+)(?:/(?P<display_id>[\w-]+))?'
+ _TESTS = [{
+ 'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/',
+ 'md5': '39d486f046212d8e1b911c52ab4691f8',
+ 'info_dict': {
+ 'id': 'qlDUmNsj6VS',
+ 'display_id': 'Infamous-Tiffany-Teen-Strip-Tease-Video',
+ 'ext': 'mp4',
+ 'title': 'Infamous Tiffany Teen Strip Tease Video',
+ 'description': 'md5:764f39abf932daafa37485eb46efa152',
+ 'timestamp': 1232520922,
+ 'upload_date': '20090121',
+ 'duration': 1838,
+ 'view_count': int,
+ 'age_limit': 18,
+ },
+ 'params': {
+ 'proxy': '127.0.0.1:8118'
+ }
+ }, {
+ # New (May 2016) URL layout
+ 'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.eporner.com/embed/3YRUtzMcWn0',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.eporner.com/video-FJsA19J3Y3H/one-of-the-greats/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id') or video_id
+
+ webpage, urlh = self._download_webpage_handle(url, display_id)
+
+ video_id = self._match_id(urlh.url)
+
+ hash = self._search_regex(
+ r'hash\s*[:=]\s*["\']([\da-f]{32})', webpage, 'hash')
+
+ title = self._og_search_title(webpage, default=None) or self._html_search_regex(
+ r'<title>(.+?) - EPORNER', webpage, 'title')
+
+ # Reverse engineered from vjs.js
+ def calc_hash(s):
+ return ''.join((encode_base_n(int(s[lb:lb + 8], 16), 36) for lb in range(0, 32, 8)))
+
+ video = self._download_json(
+ 'http://www.eporner.com/xhr/video/%s' % video_id,
+ display_id, note='Downloading video JSON',
+ query={
+ 'hash': calc_hash(hash),
+ 'device': 'generic',
+ 'domain': 'www.eporner.com',
+ 'fallback': 'false',
+ })
+
+ if video.get('available') is False:
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, video['message']), expected=True)
+
+ sources = video['sources']
+
+ formats = []
+ has_av1 = bool(get_elements_by_class('download-av1', webpage))
+ for kind, formats_dict in sources.items():
+ if not isinstance(formats_dict, dict):
+ continue
+ for format_id, format_dict in formats_dict.items():
+ if not isinstance(format_dict, dict):
+ continue
+ src = url_or_none(format_dict.get('src'))
+ if not src or not src.startswith('http'):
+ continue
+ if kind == 'hls':
+ formats.extend(self._extract_m3u8_formats(
+ src, display_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id=kind, fatal=False))
+ else:
+ height = int_or_none(self._search_regex(
+ r'(\d+)[pP]', format_id, 'height', default=None))
+ fps = int_or_none(self._search_regex(
+ r'(\d+)fps', format_id, 'fps', default=None))
+
+ formats.append({
+ 'url': src,
+ 'format_id': format_id,
+ 'height': height,
+ 'fps': fps,
+ })
+ if has_av1:
+ formats.append({
+ 'url': src.replace('.mp4', '-av1.mp4'),
+ 'format_id': join_nonempty('av1', format_id),
+ 'height': height,
+ 'fps': fps,
+ 'vcodec': 'av1',
+ })
+
+ json_ld = self._search_json_ld(webpage, display_id, default={})
+
+ duration = parse_duration(self._html_search_meta(
+ 'duration', webpage, default=None))
+ view_count = str_to_int(self._search_regex(
+ r'id=["\']cinemaviews1["\'][^>]*>\s*([0-9,]+)',
+ webpage, 'view count', default=None))
+
+ return merge_dicts(json_ld, {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'formats': formats,
+ 'age_limit': 18,
+ })
diff --git a/yt_dlp/extractor/erocast.py b/yt_dlp/extractor/erocast.py
new file mode 100644
index 0000000..92a5753
--- /dev/null
+++ b/yt_dlp/extractor/erocast.py
@@ -0,0 +1,63 @@
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+ str_or_none,
+ traverse_obj,
+ url_or_none,
+)
+
+
+class ErocastIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?erocast\.me/track/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://erocast.me/track/9787/f',
+ 'md5': 'af63b91f5f231096aba54dd682abea3b',
+ 'info_dict': {
+ 'id': '9787',
+ 'title': '[F4M] Your roommate, who is definitely not possessed by an alien, suddenly wants to fuck you',
+ 'url': 'https://erocast.s3.us-east-2.wasabisys.com/1220419/track.m3u8',
+ 'ext': 'm4a',
+ 'age_limit': 18,
+ 'release_timestamp': 1696178652,
+ 'release_date': '20231001',
+ 'modified_timestamp': int,
+ 'modified_date': str,
+ 'description': 'ExtraTerrestrial Tuesday!',
+ 'uploader': 'clarissaisshy',
+ 'uploader_id': '8113',
+ 'uploader_url': 'https://erocast.me/clarissaisshy',
+ 'thumbnail': 'https://erocast.s3.us-east-2.wasabisys.com/1220418/conversions/1696179247-lg.jpg',
+ 'duration': 2307,
+ 'view_count': int,
+ 'comment_count': int,
+ 'webpage_url': 'https://erocast.me/track/9787/f4m-your-roommate-who-is-definitely-not-possessed-by-an-alien-suddenly-wants-to-fuck-you',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ data = self._search_json(
+ rf'<script>\s*var song_data_{video_id}\s*=', webpage, 'data', video_id, end_pattern=r'</script>')
+
+ return {
+ 'id': video_id,
+ 'formats': self._extract_m3u8_formats(
+ data.get('file_url') or data['stream_url'], video_id, 'm4a', m3u8_id='hls'),
+ 'age_limit': 18,
+ **traverse_obj(data, {
+ 'title': ('title', {str}),
+ 'description': ('description', {str}),
+ 'release_timestamp': ('created_at', {parse_iso8601}),
+ 'modified_timestamp': ('updated_at', {parse_iso8601}),
+ 'uploader': ('user', 'name', {str}),
+ 'uploader_id': ('user', 'id', {str_or_none}),
+ 'uploader_url': ('user', 'permalink_url', {url_or_none}),
+ 'thumbnail': ('artwork_url', {url_or_none}),
+ 'duration': ('duration', {int_or_none}),
+ 'view_count': ('plays', {int_or_none}),
+ 'comment_count': ('comment_count', {int_or_none}),
+ 'webpage_url': ('permalink_url', {url_or_none}),
+ }),
+ }
diff --git a/yt_dlp/extractor/eroprofile.py b/yt_dlp/extractor/eroprofile.py
new file mode 100644
index 0000000..2b61f3b
--- /dev/null
+++ b/yt_dlp/extractor/eroprofile.py
@@ -0,0 +1,122 @@
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse_urlencode
+from ..utils import (
+ ExtractorError,
+ merge_dicts,
+)
+
+
+class EroProfileIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?eroprofile\.com/m/videos/view/(?P<id>[^/]+)'
+ _LOGIN_URL = 'http://www.eroprofile.com/auth/auth.php?'
+ _NETRC_MACHINE = 'eroprofile'
+ _TESTS = [{
+ 'url': 'http://www.eroprofile.com/m/videos/view/sexy-babe-softcore',
+ 'md5': 'c26f351332edf23e1ea28ce9ec9de32f',
+ 'info_dict': {
+ 'id': '3733775',
+ 'display_id': 'sexy-babe-softcore',
+ 'ext': 'm4v',
+ 'title': 'sexy babe softcore',
+ 'thumbnail': r're:https?://.*\.jpg',
+ 'age_limit': 18,
+ },
+ 'skip': 'Video not found',
+ }, {
+ 'url': 'http://www.eroprofile.com/m/videos/view/Try-It-On-Pee_cut_2-wmv-4shared-com-file-sharing-download-movie-file',
+ 'md5': '1baa9602ede46ce904c431f5418d8916',
+ 'info_dict': {
+ 'id': '1133519',
+ 'ext': 'm4v',
+ 'title': 'Try It On Pee_cut_2.wmv - 4shared.com - file sharing - download movie file',
+ 'thumbnail': r're:https?://.*\.jpg',
+ 'age_limit': 18,
+ },
+ 'skip': 'Requires login',
+ }]
+
+ def _perform_login(self, username, password):
+ query = compat_urllib_parse_urlencode({
+ 'username': username,
+ 'password': password,
+ 'url': 'http://www.eroprofile.com/',
+ })
+ login_url = self._LOGIN_URL + query
+ login_page = self._download_webpage(login_url, None, False)
+
+ m = re.search(r'Your username or password was incorrect\.', login_page)
+ if m:
+ raise ExtractorError(
+ 'Wrong username and/or password.', expected=True)
+
+ self.report_login()
+ redirect_url = self._search_regex(
+ r'<script[^>]+?src="([^"]+)"', login_page, 'login redirect url')
+ self._download_webpage(redirect_url, None, False)
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ m = re.search(r'You must be logged in to view this video\.', webpage)
+ if m:
+ self.raise_login_required('This video requires login')
+
+ video_id = self._search_regex(
+ [r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'],
+ webpage, 'video id', default=None)
+
+ title = self._html_search_regex(
+ (r'Title:</th><td>([^<]+)</td>', r'<h1[^>]*>(.+?)</h1>'),
+ webpage, 'title')
+
+ info = self._parse_html5_media_entries(url, webpage, video_id)[0]
+
+ return merge_dicts(info, {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'age_limit': 18,
+ })
+
+
+class EroProfileAlbumIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?eroprofile\.com/m/videos/album/(?P<id>[^/]+)'
+ IE_NAME = 'EroProfile:album'
+
+ _TESTS = [{
+ 'url': 'https://www.eroprofile.com/m/videos/album/BBW-2-893',
+ 'info_dict': {
+ 'id': 'BBW-2-893',
+ 'title': 'BBW 2'
+ },
+ 'playlist_mincount': 486,
+ },
+ ]
+
+ def _extract_from_page(self, page):
+ for url in re.findall(r'href=".*?(/m/videos/view/[^"]+)"', page):
+ yield self.url_result(f'https://www.eroprofile.com{url}', EroProfileIE.ie_key())
+
+ def _entries(self, playlist_id, first_page):
+ yield from self._extract_from_page(first_page)
+
+ page_urls = re.findall(rf'href=".*?(/m/videos/album/{playlist_id}\?pnum=(\d+))"', first_page)
+ max_page = max(int(n) for _, n in page_urls)
+
+ for n in range(2, max_page + 1):
+ url = f'https://www.eroprofile.com/m/videos/album/{playlist_id}?pnum={n}'
+ yield from self._extract_from_page(
+ self._download_webpage(url, playlist_id,
+ note=f'Downloading playlist page {int(n) - 1}'))
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ first_page = self._download_webpage(url, playlist_id, note='Downloading playlist')
+ playlist_title = self._search_regex(
+ r'<title>Album: (.*) - EroProfile</title>', first_page, 'playlist_title')
+
+ return self.playlist_result(self._entries(playlist_id, first_page), playlist_id, playlist_title)
diff --git a/yt_dlp/extractor/err.py b/yt_dlp/extractor/err.py
new file mode 100644
index 0000000..abd00f2
--- /dev/null
+++ b/yt_dlp/extractor/err.py
@@ -0,0 +1,224 @@
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ int_or_none,
+ str_or_none,
+ url_or_none,
+)
+from ..utils.traversal import traverse_obj
+
+
+class ERRJupiterIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:jupiter(?:pluss)?|lasteekraan)\.err\.ee/(?P<id>\d+)'
+ _TESTS = [{
+ 'note': 'Jupiter: Movie: siin-me-oleme',
+ 'url': 'https://jupiter.err.ee/1211107/siin-me-oleme',
+ 'md5': '9b45d1682a98853acaa1e1b0c791f425',
+ 'info_dict': {
+ 'id': '1211107',
+ 'ext': 'mp4',
+ 'title': 'Siin me oleme!',
+ 'alt_title': '',
+ 'description': 'md5:1825b795f5f7584241aeb59e5bbb4f70',
+ 'release_date': '20231226',
+ 'upload_date': '20201217',
+ 'modified_date': '20201217',
+ 'release_timestamp': 1703577600,
+ 'timestamp': 1608210000,
+ 'modified_timestamp': 1608220800,
+ 'release_year': 1978,
+ },
+ }, {
+ 'note': 'Jupiter: Series: Impulss',
+ 'url': 'https://jupiter.err.ee/1609145945/impulss',
+ 'md5': 'a378486df07ed1ba74e46cc861886243',
+ 'info_dict': {
+ 'id': '1609145945',
+ 'ext': 'mp4',
+ 'title': 'Impulss',
+ 'alt_title': 'Loteriipilet hooldekodusse',
+ 'description': 'md5:fa8a2ed0cdccb130211513443ee4d571',
+ 'release_date': '20231107',
+ 'upload_date': '20231026',
+ 'modified_date': '20231118',
+ 'release_timestamp': 1699380000,
+ 'timestamp': 1698327601,
+ 'modified_timestamp': 1700311802,
+ 'series': 'Impulss',
+ 'season': 'Season 1',
+ 'season_number': 1,
+ 'episode': 'Loteriipilet hooldekodusse',
+ 'episode_number': 6,
+ 'series_id': '1609108187',
+ 'release_year': 2023,
+ 'episode_id': '1609145945',
+ },
+ }, {
+ 'note': 'Jupiter: Radio Show: mnemoturniir episode',
+ 'url': 'https://jupiter.err.ee/1037919/mnemoturniir',
+ 'md5': 'f1eb95fe66f9620ff84e81bbac37076a',
+ 'info_dict': {
+ 'id': '1037919',
+ 'ext': 'm4a',
+ 'title': 'Mnemoturniir',
+ 'alt_title': '',
+ 'description': 'md5:626db52394e7583c26ab74d6a34d9982',
+ 'release_date': '20240121',
+ 'upload_date': '20240108',
+ 'modified_date': '20240121',
+ 'release_timestamp': 1705827900,
+ 'timestamp': 1704675602,
+ 'modified_timestamp': 1705827601,
+ 'series': 'Mnemoturniir',
+ 'season': 'Season 0',
+ 'season_number': 0,
+ 'episode': 'Episode 0',
+ 'episode_number': 0,
+ 'series_id': '1037919',
+ 'release_year': 2024,
+ 'episode_id': '1609215101',
+ },
+ }, {
+ 'note': 'Jupiter+: Clip: bolee-zelenyj-tallinn',
+ 'url': 'https://jupiterpluss.err.ee/1609180445/bolee-zelenyj-tallinn',
+ 'md5': '1b812270c4daf6ce51c06bfeaf33ed95',
+ 'info_dict': {
+ 'id': '1609180445',
+ 'ext': 'mp4',
+ 'title': 'Более зеленый Таллинн',
+ 'alt_title': '',
+ 'description': 'md5:fd34d9bf939c28c4a725b19a7f0d6320',
+ 'release_date': '20231224',
+ 'upload_date': '20231130',
+ 'modified_date': '20231207',
+ 'release_timestamp': 1703423400,
+ 'timestamp': 1701338400,
+ 'modified_timestamp': 1701967200,
+ 'release_year': 2023,
+ },
+ }, {
+ 'note': 'Jupiter+: Series: The Sniffer',
+ 'url': 'https://jupiterpluss.err.ee/1608311387/njuhach',
+ 'md5': '2abdeb7131ce551bce49e8d0cea08536',
+ 'info_dict': {
+ 'id': '1608311387',
+ 'ext': 'mp4',
+ 'title': 'Нюхач',
+ 'alt_title': '',
+ 'description': 'md5:8c5c7d8f32ec6e54cd498c9e59ca83bc',
+ 'release_date': '20230601',
+ 'upload_date': '20210818',
+ 'modified_date': '20210903',
+ 'release_timestamp': 1685633400,
+ 'timestamp': 1629318000,
+ 'modified_timestamp': 1630686000,
+ 'release_year': 2013,
+ 'episode': 'Episode 1',
+ 'episode_id': '1608311390',
+ 'episode_number': 1,
+ 'season': 'Season 1',
+ 'season_number': 1,
+ 'series': 'Нюхач',
+ 'series_id': '1608311387',
+ },
+ }, {
+ 'note': 'Jupiter+: Podcast: lesnye-istorii-aisty',
+ 'url': 'https://jupiterpluss.err.ee/1608990335/lesnye-istorii-aisty',
+ 'md5': '8b46d7e4510b254a14b7a52211b5bf96',
+ 'info_dict': {
+ 'id': '1608990335',
+ 'ext': 'm4a',
+ 'title': 'Лесные истории | Аисты',
+ 'alt_title': '',
+ 'description': 'md5:065e721623e271e7a63e6540d409ca6b',
+ 'release_date': '20230609',
+ 'upload_date': '20230527',
+ 'modified_date': '20230608',
+ 'release_timestamp': 1686308700,
+ 'timestamp': 1685145600,
+ 'modified_timestamp': 1686252600,
+ 'release_year': 2023,
+ 'episode': 'Episode 0',
+ 'episode_id': '1608990335',
+ 'episode_number': 0,
+ 'season': 'Season 0',
+ 'season_number': 0,
+ 'series': 'Лесные истории | Аисты',
+ 'series_id': '1037497',
+ }
+ }, {
+ 'note': 'Lasteekraan: Pätu',
+ 'url': 'https://lasteekraan.err.ee/1092243/patu',
+ 'md5': 'a67eb9b9bcb3d201718c15d1638edf77',
+ 'info_dict': {
+ 'id': '1092243',
+ 'ext': 'mp4',
+ 'title': 'Pätu',
+ 'alt_title': '',
+ 'description': 'md5:64a7b5a80afd7042d3f8ec48c77befd9',
+ 'release_date': '20230614',
+ 'upload_date': '20200520',
+ 'modified_date': '20200520',
+ 'release_timestamp': 1686745800,
+ 'timestamp': 1589975640,
+ 'modified_timestamp': 1589975640,
+ 'release_year': 1990,
+ 'episode': 'Episode 1',
+ 'episode_id': '1092243',
+ 'episode_number': 1,
+ 'season': 'Season 1',
+ 'season_number': 1,
+ 'series': 'Pätu',
+ 'series_id': '1092236',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ data = self._download_json(
+ 'https://services.err.ee/api/v2/vodContent/getContentPageData', video_id,
+ query={'contentId': video_id})['data']['mainContent']
+
+ media_data = traverse_obj(data, ('medias', ..., {dict}), get_all=False)
+ if traverse_obj(media_data, ('restrictions', 'drm', {bool})):
+ self.report_drm(video_id)
+
+ formats, subtitles = [], {}
+ for format_url in set(traverse_obj(media_data, ('src', ('hls', 'hls2', 'hlsNew'), {url_or_none}))):
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ format_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+ for format_url in set(traverse_obj(media_data, ('src', ('dash', 'dashNew'), {url_or_none}))):
+ fmts, subs = self._extract_mpd_formats_and_subtitles(
+ format_url, video_id, mpd_id='dash', fatal=False)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+ if format_url := traverse_obj(media_data, ('src', 'file', {url_or_none})):
+ formats.append({
+ 'url': format_url,
+ 'format_id': 'http',
+ })
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ **traverse_obj(data, {
+ 'title': ('heading', {str}),
+ 'alt_title': ('subHeading', {str}),
+ 'description': (('lead', 'body'), {clean_html}, {lambda x: x or None}),
+ 'timestamp': ('created', {int_or_none}),
+ 'modified_timestamp': ('updated', {int_or_none}),
+ 'release_timestamp': (('scheduleStart', 'publicStart'), {int_or_none}),
+ 'release_year': ('year', {int_or_none}),
+ }, get_all=False),
+ **(traverse_obj(data, {
+ 'series': ('heading', {str}),
+ 'series_id': ('rootContentId', {str_or_none}),
+ 'episode': ('subHeading', {str}),
+ 'season_number': ('season', {int_or_none}),
+ 'episode_number': ('episode', {int_or_none}),
+ 'episode_id': ('id', {str_or_none}),
+ }) if data.get('type') == 'episode' else {}),
+ }
diff --git a/yt_dlp/extractor/ertgr.py b/yt_dlp/extractor/ertgr.py
new file mode 100644
index 0000000..9ecdf5d
--- /dev/null
+++ b/yt_dlp/extractor/ertgr.py
@@ -0,0 +1,302 @@
+import json
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ clean_html,
+ determine_ext,
+ ExtractorError,
+ dict_get,
+ int_or_none,
+ merge_dicts,
+ parse_qs,
+ parse_age_limit,
+ parse_iso8601,
+ str_or_none,
+ try_get,
+ url_or_none,
+ variadic,
+)
+
+
+class ERTFlixBaseIE(InfoExtractor):
+ def _call_api(
+ self, video_id, method='Player/AcquireContent', api_version=1,
+ param_headers=None, data=None, headers=None, **params):
+ platform_codename = {'platformCodename': 'www'}
+ headers_as_param = {'X-Api-Date-Format': 'iso', 'X-Api-Camel-Case': False}
+ headers_as_param.update(param_headers or {})
+ headers = headers or {}
+ if data:
+ headers['Content-Type'] = headers_as_param['Content-Type'] = 'application/json;charset=utf-8'
+ data = json.dumps(merge_dicts(platform_codename, data)).encode('utf-8')
+ query = merge_dicts(
+ {} if data else platform_codename,
+ {'$headers': json.dumps(headers_as_param)},
+ params)
+ response = self._download_json(
+ 'https://api.app.ertflix.gr/v%s/%s' % (str(api_version), method),
+ video_id, fatal=False, query=query, data=data, headers=headers)
+ if try_get(response, lambda x: x['Result']['Success']) is True:
+ return response
+
+ def _call_api_get_tiles(self, video_id, *tile_ids):
+ requested_tile_ids = [video_id] + list(tile_ids)
+ requested_tiles = [{'Id': tile_id} for tile_id in requested_tile_ids]
+ tiles_response = self._call_api(
+ video_id, method='Tile/GetTiles', api_version=2,
+ data={'RequestedTiles': requested_tiles})
+ tiles = try_get(tiles_response, lambda x: x['Tiles'], list) or []
+ if tile_ids:
+ if sorted([tile['Id'] for tile in tiles]) != sorted(requested_tile_ids):
+ raise ExtractorError('Requested tiles not found', video_id=video_id)
+ return tiles
+ try:
+ return next(tile for tile in tiles if tile['Id'] == video_id)
+ except StopIteration:
+ raise ExtractorError('No matching tile found', video_id=video_id)
+
+
+class ERTFlixCodenameIE(ERTFlixBaseIE):
+ IE_NAME = 'ertflix:codename'
+ IE_DESC = 'ERTFLIX videos by codename'
+ _VALID_URL = r'ertflix:(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'ertflix:monogramma-praxitelis-tzanoylinos',
+ 'md5': '5b9c2cd171f09126167e4082fc1dd0ef',
+ 'info_dict': {
+ 'id': 'monogramma-praxitelis-tzanoylinos',
+ 'ext': 'mp4',
+ 'title': 'md5:ef0b439902963d56c43ac83c3f41dd0e',
+ },
+ },
+ ]
+
+ def _extract_formats_and_subs(self, video_id):
+ media_info = self._call_api(video_id, codename=video_id)
+ formats, subs = [], {}
+ for media_file in try_get(media_info, lambda x: x['MediaFiles'], list) or []:
+ for media in try_get(media_file, lambda x: x['Formats'], list) or []:
+ fmt_url = url_or_none(try_get(media, lambda x: x['Url']))
+ if not fmt_url:
+ continue
+ ext = determine_ext(fmt_url)
+ if ext == 'm3u8':
+ formats_, subs_ = self._extract_m3u8_formats_and_subtitles(
+ fmt_url, video_id, m3u8_id='hls', ext='mp4', fatal=False)
+ elif ext == 'mpd':
+ formats_, subs_ = self._extract_mpd_formats_and_subtitles(
+ fmt_url, video_id, mpd_id='dash', fatal=False)
+ else:
+ formats.append({
+ 'url': fmt_url,
+ 'format_id': str_or_none(media.get('Id')),
+ })
+ continue
+ formats.extend(formats_)
+ self._merge_subtitles(subs_, target=subs)
+
+ return formats, subs
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ formats, subs = self._extract_formats_and_subs(video_id)
+
+ if formats:
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'subtitles': subs,
+ 'title': self._generic_title(url),
+ }
+
+
+class ERTFlixIE(ERTFlixBaseIE):
+ IE_NAME = 'ertflix'
+ IE_DESC = 'ERTFLIX videos'
+ _VALID_URL = r'https?://www\.ertflix\.gr/(?:[^/]+/)?(?:series|vod)/(?P<id>[a-z]{3}\.\d+)'
+ _TESTS = [{
+ 'url': 'https://www.ertflix.gr/vod/vod.173258-aoratoi-ergates',
+ 'md5': '6479d5e60fd7e520b07ba5411dcdd6e7',
+ 'info_dict': {
+ 'id': 'aoratoi-ergates',
+ 'ext': 'mp4',
+ 'title': 'md5:c1433d598fbba0211b0069021517f8b4',
+ 'description': 'md5:01a64d113c31957eb7eb07719ab18ff4',
+ 'thumbnail': r're:https?://.+\.jpg',
+ 'episode_id': 'vod.173258',
+ 'timestamp': 1639648800,
+ 'upload_date': '20211216',
+ 'duration': 3166,
+ 'age_limit': 8,
+ },
+ }, {
+ 'url': 'https://www.ertflix.gr/series/ser.3448-monogramma',
+ 'info_dict': {
+ 'id': 'ser.3448',
+ 'age_limit': 8,
+ 'description': 'Η εκπομπή σαράντα ετών που σημάδεψε τον πολιτισμό μας.',
+ 'title': 'Μονόγραμμα',
+ },
+ 'playlist_mincount': 64,
+ }, {
+ 'url': 'https://www.ertflix.gr/series/ser.3448-monogramma?season=1',
+ 'info_dict': {
+ 'id': 'ser.3448',
+ 'age_limit': 8,
+ 'description': 'Η εκπομπή σαράντα ετών που σημάδεψε τον πολιτισμό μας.',
+ 'title': 'Μονόγραμμα',
+ },
+ 'playlist_count': 22,
+ }, {
+ 'url': 'https://www.ertflix.gr/series/ser.3448-monogramma?season=1&season=2021%20-%202022',
+ 'info_dict': {
+ 'id': 'ser.3448',
+ 'age_limit': 8,
+ 'description': 'Η εκπομπή σαράντα ετών που σημάδεψε τον πολιτισμό μας.',
+ 'title': 'Μονόγραμμα',
+ },
+ 'playlist_mincount': 36,
+ }, {
+ 'url': 'https://www.ertflix.gr/series/ser.164991-to-diktuo-1?season=1-9',
+ 'info_dict': {
+ 'id': 'ser.164991',
+ 'age_limit': 8,
+ 'description': 'Η πρώτη ελληνική εκπομπή με θεματολογία αποκλειστικά γύρω από το ίντερνετ.',
+ 'title': 'Το δίκτυο',
+ },
+ 'playlist_mincount': 9,
+ }, {
+ 'url': 'https://www.ertflix.gr/en/vod/vod.127652-ta-kalytera-mas-chronia-ep1-mia-volta-sto-feggari',
+ 'only_matching': True,
+ }]
+
+ def _extract_episode(self, episode):
+ codename = try_get(episode, lambda x: x['Codename'], compat_str)
+ title = episode.get('Title')
+ description = clean_html(dict_get(episode, ('ShortDescription', 'TinyDescription', )))
+ if not codename or not title or not episode.get('HasPlayableStream', True):
+ return
+ thumbnail = next((
+ url_or_none(thumb.get('Url'))
+ for thumb in variadic(dict_get(episode, ('Images', 'Image')) or {})
+ if thumb.get('IsMain')),
+ None)
+ return {
+ '_type': 'url_transparent',
+ 'thumbnail': thumbnail,
+ 'id': codename,
+ 'episode_id': episode.get('Id'),
+ 'title': title,
+ 'alt_title': episode.get('Subtitle'),
+ 'description': description,
+ 'timestamp': parse_iso8601(episode.get('PublishDate')),
+ 'duration': episode.get('DurationSeconds'),
+ 'age_limit': self._parse_age_rating(episode),
+ 'url': 'ertflix:%s' % (codename, ),
+ }
+
+ @staticmethod
+ def _parse_age_rating(info_dict):
+ return parse_age_limit(
+ info_dict.get('AgeRating')
+ or (info_dict.get('IsAdultContent') and 18)
+ or (info_dict.get('IsKidsContent') and 0))
+
+ def _extract_series(self, video_id, season_titles=None, season_numbers=None):
+ media_info = self._call_api(video_id, method='Tile/GetSeriesDetails', id=video_id)
+
+ series = try_get(media_info, lambda x: x['Series'], dict) or {}
+ series_info = {
+ 'age_limit': self._parse_age_rating(series),
+ 'title': series.get('Title'),
+ 'description': dict_get(series, ('ShortDescription', 'TinyDescription', )),
+ }
+ if season_numbers:
+ season_titles = season_titles or []
+ for season in try_get(series, lambda x: x['Seasons'], list) or []:
+ if season.get('SeasonNumber') in season_numbers and season.get('Title'):
+ season_titles.append(season['Title'])
+
+ def gen_episode(m_info, season_titles):
+ for episode_group in try_get(m_info, lambda x: x['EpisodeGroups'], list) or []:
+ if season_titles and episode_group.get('Title') not in season_titles:
+ continue
+ episodes = try_get(episode_group, lambda x: x['Episodes'], list)
+ if not episodes:
+ continue
+ season_info = {
+ 'season': episode_group.get('Title'),
+ 'season_number': int_or_none(episode_group.get('SeasonNumber')),
+ }
+ try:
+ episodes = [(int(ep['EpisodeNumber']), ep) for ep in episodes]
+ episodes.sort()
+ except (KeyError, ValueError):
+ episodes = enumerate(episodes, 1)
+ for n, episode in episodes:
+ info = self._extract_episode(episode)
+ if info is None:
+ continue
+ info['episode_number'] = n
+ info.update(season_info)
+ yield info
+
+ return self.playlist_result(
+ gen_episode(media_info, season_titles), playlist_id=video_id, **series_info)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ if video_id.startswith('ser.'):
+ param_season = parse_qs(url).get('season', [None])
+ param_season = [
+ (have_number, int_or_none(v) if have_number else str_or_none(v))
+ for have_number, v in
+ [(int_or_none(ps) is not None, ps) for ps in param_season]
+ if v is not None
+ ]
+ season_kwargs = {
+ k: [v for is_num, v in param_season if is_num is c] or None
+ for k, c in
+ [('season_titles', False), ('season_numbers', True)]
+ }
+ return self._extract_series(video_id, **season_kwargs)
+
+ return self._extract_episode(self._call_api_get_tiles(video_id))
+
+
+class ERTWebtvEmbedIE(InfoExtractor):
+ IE_NAME = 'ertwebtv:embed'
+ IE_DESC = 'ert.gr webtv embedded videos'
+ _BASE_PLAYER_URL_RE = re.escape('//www.ert.gr/webtv/live-uni/vod/dt-uni-vod.php')
+ _VALID_URL = rf'https?:{_BASE_PLAYER_URL_RE}\?([^#]+&)?f=(?P<id>[^#&]+)'
+ _EMBED_REGEX = [rf'<iframe[^>]+?src=(?P<_q1>["\'])(?P<url>(?:https?:)?{_BASE_PLAYER_URL_RE}\?(?:(?!(?P=_q1)).)+)(?P=_q1)']
+
+ _TESTS = [{
+ 'url': 'https://www.ert.gr/webtv/live-uni/vod/dt-uni-vod.php?f=trailers/E2251_TO_DIKTYO_E09_16-01_1900.mp4&bgimg=/photos/2022/1/to_diktio_ep09_i_istoria_tou_diadiktiou_stin_Ellada_1021x576.jpg',
+ 'md5': 'f9e9900c25c26f4ecfbddbb4b6305854',
+ 'info_dict': {
+ 'id': 'trailers/E2251_TO_DIKTYO_E09_16-01_1900.mp4',
+ 'title': 'md5:914f06a73cd8b62fbcd6fb90c636e497',
+ 'ext': 'mp4',
+ 'thumbnail': 'https://program.ert.gr/photos/2022/1/to_diktio_ep09_i_istoria_tou_diadiktiou_stin_Ellada_1021x576.jpg'
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ formats, subs = self._extract_m3u8_formats_and_subtitles(
+ f'https://mediastream.ert.gr/vodedge/_definst_/mp4:dvrorigin/{video_id}/playlist.m3u8',
+ video_id, 'mp4')
+ thumbnail_id = parse_qs(url).get('bgimg', [None])[0]
+ if thumbnail_id and not thumbnail_id.startswith('http'):
+ thumbnail_id = f'https://program.ert.gr{thumbnail_id}'
+ return {
+ 'id': video_id,
+ 'title': f'VOD - {video_id}',
+ 'thumbnail': thumbnail_id,
+ 'formats': formats,
+ 'subtitles': subs,
+ }
diff --git a/yt_dlp/extractor/espn.py b/yt_dlp/extractor/espn.py
new file mode 100644
index 0000000..7ed824c
--- /dev/null
+++ b/yt_dlp/extractor/espn.py
@@ -0,0 +1,421 @@
+import base64
+import json
+import re
+import urllib.parse
+
+from .adobepass import AdobePassIE
+from .common import InfoExtractor
+from .once import OnceIE
+from ..utils import (
+ determine_ext,
+ dict_get,
+ int_or_none,
+ traverse_obj,
+ unified_strdate,
+ unified_timestamp,
+)
+
+
+class ESPNIE(OnceIE):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:
+ (?:
+ (?:(?:\w+\.)+)?espn\.go|
+ (?:www\.)?espn
+ )\.com/
+ (?:
+ (?:
+ video/(?:clip|iframe/twitter)|
+ )
+ (?:
+ .*?\?.*?\bid=|
+ /_/id/
+ )|
+ [^/]+/video/
+ )
+ )|
+ (?:www\.)espnfc\.(?:com|us)/(?:video/)?[^/]+/\d+/video/
+ )
+ (?P<id>\d+)
+ '''
+
+ _TESTS = [{
+ 'url': 'http://espn.go.com/video/clip?id=10365079',
+ 'info_dict': {
+ 'id': '10365079',
+ 'ext': 'mp4',
+ 'title': '30 for 30 Shorts: Judging Jewell',
+ 'description': 'md5:39370c2e016cb4ecf498ffe75bef7f0f',
+ 'timestamp': 1390936111,
+ 'upload_date': '20140128',
+ 'duration': 1302,
+ 'thumbnail': r're:https://.+\.jpg',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://broadband.espn.go.com/video/clip?id=18910086',
+ 'info_dict': {
+ 'id': '18910086',
+ 'ext': 'mp4',
+ 'title': 'Kyrie spins around defender for two',
+ 'description': 'md5:2b0f5bae9616d26fba8808350f0d2b9b',
+ 'timestamp': 1489539155,
+ 'upload_date': '20170315',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Unable to download f4m manifest'],
+ }, {
+ 'url': 'http://nonredline.sports.espn.go.com/video/clip?id=19744672',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://cdn.espn.go.com/video/clip/_/id/19771774',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.espn.com/video/clip?id=10365079',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.espn.com/video/clip/_/id/17989860',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.espnfc.us/video/espn-fc-tv/86/video/3319154/nashville-unveiled-as-the-newest-club-in-mls',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.espnfc.com/english-premier-league/23/video/3324163/premier-league-in-90-seconds-golden-tweets',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.espn.com/espnw/video/26066627/arkansas-gibson-completes-hr-cycle-four-innings',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.espn.com/watch/player?id=19141491',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.espn.com/watch/player?bucketId=257&id=19505875',
+ 'only_matching': True,
+ }, ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ clip = self._download_json(
+ 'http://api-app.espn.com/v1/video/clips/%s' % video_id,
+ video_id)['videos'][0]
+
+ title = clip['headline']
+
+ format_urls = set()
+ formats = []
+
+ def traverse_source(source, base_source_id=None):
+ for source_id, source in source.items():
+ if source_id == 'alert':
+ continue
+ elif isinstance(source, str):
+ extract_source(source, base_source_id)
+ elif isinstance(source, dict):
+ traverse_source(
+ source,
+ '%s-%s' % (base_source_id, source_id)
+ if base_source_id else source_id)
+
+ def extract_source(source_url, source_id=None):
+ if source_url in format_urls:
+ return
+ format_urls.add(source_url)
+ ext = determine_ext(source_url)
+ if OnceIE.suitable(source_url):
+ formats.extend(self._extract_once_formats(source_url))
+ elif ext == 'smil':
+ formats.extend(self._extract_smil_formats(
+ source_url, video_id, fatal=False))
+ elif ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ source_url, video_id, f4m_id=source_id, fatal=False))
+ elif ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ source_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id=source_id, fatal=False))
+ else:
+ f = {
+ 'url': source_url,
+ 'format_id': source_id,
+ }
+ mobj = re.search(r'(\d+)p(\d+)_(\d+)k\.', source_url)
+ if mobj:
+ f.update({
+ 'height': int(mobj.group(1)),
+ 'fps': int(mobj.group(2)),
+ 'tbr': int(mobj.group(3)),
+ })
+ if source_id == 'mezzanine':
+ f['quality'] = 1
+ formats.append(f)
+
+ links = clip.get('links', {})
+ traverse_source(links.get('source', {}))
+ traverse_source(links.get('mobile', {}))
+
+ description = clip.get('caption') or clip.get('description')
+ thumbnail = clip.get('thumbnail')
+ duration = int_or_none(clip.get('duration'))
+ timestamp = unified_timestamp(clip.get('originalPublishDate'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'formats': formats,
+ }
+
+
+class ESPNArticleIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P<id>[^/]+)'
+ _TESTS = [{
+ 'url': 'http://espn.go.com/nba/recap?gameId=400793786',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://espn.go.com/blog/golden-state-warriors/post/_/id/593/how-warriors-rapidly-regained-a-winning-edge',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://espn.go.com/sports/endurance/story/_/id/12893522/dzhokhar-tsarnaev-sentenced-role-boston-marathon-bombings',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://espn.go.com/nba/playoffs/2015/story/_/id/12887571/john-wall-washington-wizards-no-swelling-left-hand-wrist-game-5-return',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if (ESPNIE.suitable(url) or WatchESPNIE.suitable(url)) else super().suitable(url)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ video_id = self._search_regex(
+ r'class=(["\']).*?video-play-button.*?\1[^>]+data-id=["\'](?P<id>\d+)',
+ webpage, 'video id', group='id')
+
+ return self.url_result(
+ 'http://espn.go.com/video/clip?id=%s' % video_id, ESPNIE.ie_key())
+
+
+class FiveThirtyEightIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?fivethirtyeight\.com/features/(?P<id>[^/?#]+)'
+ _TEST = {
+ 'url': 'http://fivethirtyeight.com/features/how-the-6-8-raiders-can-still-make-the-playoffs/',
+ 'info_dict': {
+ 'id': '56032156',
+ 'ext': 'flv',
+ 'title': 'FiveThirtyEight: The Raiders can still make the playoffs',
+ 'description': 'Neil Paine breaks down the simplest scenario that will put the Raiders into the playoffs at 8-8.',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ embed_url = self._search_regex(
+ r'<iframe[^>]+src=["\'](https?://fivethirtyeight\.abcnews\.go\.com/video/embed/\d+/\d+)',
+ webpage, 'embed url')
+
+ return self.url_result(embed_url, 'AbcNewsVideo')
+
+
+class ESPNCricInfoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?espncricinfo\.com/(?:cricket-)?videos?/[^#$&?/]+-(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.espncricinfo.com/video/finch-chasing-comes-with-risks-despite-world-cup-trend-1289135',
+ 'info_dict': {
+ 'id': '1289135',
+ 'ext': 'mp4',
+ 'title': 'Finch: Chasing comes with \'risks\' despite World Cup trend',
+ 'description': 'md5:ea32373303e25efbb146efdfc8a37829',
+ 'upload_date': '20211113',
+ 'duration': 96,
+ },
+ 'params': {'skip_download': True}
+ }, {
+ 'url': 'https://www.espncricinfo.com/cricket-videos/daryl-mitchell-mitchell-santner-is-one-of-the-best-white-ball-spinners-india-vs-new-zealand-1356225',
+ 'info_dict': {
+ 'id': '1356225',
+ 'ext': 'mp4',
+ 'description': '"Santner has done it for a long time for New Zealand - we\'re lucky to have him"',
+ 'upload_date': '20230128',
+ 'title': 'Mitchell: \'Santner is one of the best white-ball spinners at the moment\'',
+ 'duration': 87,
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }]
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ data_json = self._download_json(f'https://hs-consumer-api.espncricinfo.com/v1/pages/video/video-details?videoId={id}', id)['video']
+ formats, subtitles = [], {}
+ for item in data_json.get('playbacks') or []:
+ if item.get('type') == 'HLS' and item.get('url'):
+ m3u8_frmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles(item['url'], id)
+ formats.extend(m3u8_frmts)
+ subtitles = self._merge_subtitles(subtitles, m3u8_subs)
+ elif item.get('type') == 'AUDIO' and item.get('url'):
+ formats.append({
+ 'url': item['url'],
+ 'vcodec': 'none',
+ })
+ return {
+ 'id': id,
+ 'title': data_json.get('title'),
+ 'description': data_json.get('summary'),
+ 'upload_date': unified_strdate(dict_get(data_json, ('publishedAt', 'recordedAt'))),
+ 'duration': data_json.get('duration'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+
+class WatchESPNIE(AdobePassIE):
+ _VALID_URL = r'https?://(?:www\.)?espn\.com/(?:watch|espnplus)/player/_/id/(?P<id>[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})'
+ _TESTS = [{
+ 'url': 'https://www.espn.com/watch/player/_/id/dbbc6b1d-c084-4b47-9878-5f13c56ce309',
+ 'info_dict': {
+ 'id': 'dbbc6b1d-c084-4b47-9878-5f13c56ce309',
+ 'ext': 'mp4',
+ 'title': 'Huddersfield vs. Burnley',
+ 'duration': 7500,
+ 'thumbnail': 'https://artwork.api.espn.com/artwork/collections/media/dbbc6b1d-c084-4b47-9878-5f13c56ce309/default?width=640&apikey=1ngjw23osgcis1i1vbj96lmfqs',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.espn.com/watch/player/_/id/a049a56e-a7ce-477e-aef3-c7e48ef8221c',
+ 'info_dict': {
+ 'id': 'a049a56e-a7ce-477e-aef3-c7e48ef8221c',
+ 'ext': 'mp4',
+ 'title': 'Dynamo Dresden vs. VfB Stuttgart (Round #1) (German Cup)',
+ 'duration': 8335,
+ 'thumbnail': 'https://s.secure.espncdn.com/stitcher/artwork/collections/media/bd1f3d12-0654-47d9-852e-71b85ea695c7/16x9.jpg?timestamp=202201112217&showBadge=true&cb=12&package=ESPN_PLUS',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.espn.com/espnplus/player/_/id/317f5fd1-c78a-4ebe-824a-129e0d348421',
+ 'info_dict': {
+ 'id': '317f5fd1-c78a-4ebe-824a-129e0d348421',
+ 'ext': 'mp4',
+ 'title': 'The Wheel - Episode 10',
+ 'duration': 3352,
+ 'thumbnail': 'https://s.secure.espncdn.com/stitcher/artwork/collections/media/317f5fd1-c78a-4ebe-824a-129e0d348421/16x9.jpg?timestamp=202205031523&showBadge=true&cb=12&package=ESPN_PLUS',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ _API_KEY = 'ZXNwbiZicm93c2VyJjEuMC4w.ptUt7QxsteaRruuPmGZFaJByOoqKvDP2a5YkInHrc7c'
+
+ def _call_bamgrid_api(self, path, video_id, payload=None, headers={}):
+ if 'Authorization' not in headers:
+ headers['Authorization'] = f'Bearer {self._API_KEY}'
+ parse = urllib.parse.urlencode if path == 'token' else json.dumps
+ return self._download_json(
+ f'https://espn.api.edge.bamgrid.com/{path}', video_id, headers=headers, data=parse(payload).encode())
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ cdn_data = self._download_json(
+ f'https://watch-cdn.product.api.espn.com/api/product/v3/watchespn/web/playback/event?id={video_id}',
+ video_id)
+ video_data = cdn_data['playbackState']
+
+ # ESPN+ subscription required, through cookies
+ if 'DTC' in video_data.get('sourceId'):
+ cookie = self._get_cookies(url).get('ESPN-ONESITE.WEB-PROD.token')
+ if not cookie:
+ self.raise_login_required(method='cookies')
+
+ assertion = self._call_bamgrid_api(
+ 'devices', video_id,
+ headers={'Content-Type': 'application/json; charset=UTF-8'},
+ payload={
+ 'deviceFamily': 'android',
+ 'applicationRuntime': 'android',
+ 'deviceProfile': 'tv',
+ 'attributes': {},
+ })['assertion']
+ token = self._call_bamgrid_api(
+ 'token', video_id, payload={
+ 'subject_token': assertion,
+ 'subject_token_type': 'urn:bamtech:params:oauth:token-type:device',
+ 'platform': 'android',
+ 'grant_type': 'urn:ietf:params:oauth:grant-type:token-exchange'
+ })['access_token']
+
+ assertion = self._call_bamgrid_api(
+ 'accounts/grant', video_id, payload={'id_token': cookie.value.split('|')[1]},
+ headers={
+ 'Authorization': token,
+ 'Content-Type': 'application/json; charset=UTF-8'
+ })['assertion']
+ token = self._call_bamgrid_api(
+ 'token', video_id, payload={
+ 'subject_token': assertion,
+ 'subject_token_type': 'urn:bamtech:params:oauth:token-type:account',
+ 'platform': 'android',
+ 'grant_type': 'urn:ietf:params:oauth:grant-type:token-exchange'
+ })['access_token']
+
+ playback = self._download_json(
+ video_data['videoHref'].format(scenario='browser~ssai'), video_id,
+ headers={
+ 'Accept': 'application/vnd.media-service+json; version=5',
+ 'Authorization': token
+ })
+ m3u8_url, headers = playback['stream']['complete'][0]['url'], {'authorization': token}
+
+ # No login required
+ elif video_data.get('sourceId') == 'ESPN_FREE':
+ asset = self._download_json(
+ f'https://watch.auth.api.espn.com/video/auth/media/{video_id}/asset?apikey=uiqlbgzdwuru14v627vdusswb',
+ video_id)
+ m3u8_url, headers = asset['stream'], {}
+
+ # TV Provider required
+ else:
+ resource = self._get_mvpd_resource('ESPN', video_data['name'], video_id, None)
+ auth = self._extract_mvpd_auth(url, video_id, 'ESPN', resource).encode()
+
+ asset = self._download_json(
+ f'https://watch.auth.api.espn.com/video/auth/media/{video_id}/asset?apikey=uiqlbgzdwuru14v627vdusswb',
+ video_id, data=f'adobeToken={urllib.parse.quote_plus(base64.b64encode(auth))}&drmSupport=HLS'.encode())
+ m3u8_url, headers = asset['stream'], {}
+
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls')
+
+ return {
+ 'id': video_id,
+ 'duration': traverse_obj(cdn_data, ('tracking', 'duration')),
+ 'title': video_data.get('name'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnail': video_data.get('posterHref'),
+ 'http_headers': headers,
+ }
diff --git a/yt_dlp/extractor/ettutv.py b/yt_dlp/extractor/ettutv.py
new file mode 100644
index 0000000..133b525
--- /dev/null
+++ b/yt_dlp/extractor/ettutv.py
@@ -0,0 +1,60 @@
+from .common import InfoExtractor
+from ..utils import bool_or_none, traverse_obj, unified_timestamp, url_or_none
+
+
+class EttuTvIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?ettu\.tv/[^?#]+/playerpage/(?P<id>[0-9]+)'
+
+ _TESTS = [{
+ 'url': 'https://www.ettu.tv/en-int/playerpage/1573849',
+ 'md5': '5874b7639a2aa866d1f6c3a4037c7c09',
+ 'info_dict': {
+ 'id': '1573849',
+ 'title': 'Ni Xia Lian - Shao Jieni',
+ 'description': 'ITTF Europe Top 16 Cup',
+ 'timestamp': 1677348600,
+ 'upload_date': '20230225',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ 'ext': 'mp4',
+ },
+ }, {
+ 'url': 'https://www.ettu.tv/en-int/playerpage/1573753',
+ 'md5': '1fc094bf96cf2d5ec0f434d3a6dec9aa',
+ 'info_dict': {
+ 'id': '1573753',
+ 'title': 'Qiu Dang - Jorgic Darko',
+ 'description': 'ITTF Europe Top 16 Cup',
+ 'timestamp': 1677423600,
+ 'upload_date': '20230226',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ 'ext': 'mp4',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ player_settings = self._download_json(
+ f'https://www.ettu.tv/api/v3/contents/{video_id}/player-settings', video_id, query={
+ 'language': 'en',
+ 'showTitle': 'true',
+ 'device': 'desktop',
+ })
+
+ stream_response = self._download_json(player_settings['streamAccess'], video_id, data=b'')
+
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ stream_response['data']['stream'], video_id, 'mp4')
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ **traverse_obj(player_settings, {
+ 'title': 'title',
+ 'description': ('metaInformation', 'competition'),
+ 'thumbnail': ('image', {url_or_none}),
+ 'timestamp': ('date', {unified_timestamp}),
+ 'is_live': ('isLivestream', {bool_or_none}),
+ })
+ }
diff --git a/yt_dlp/extractor/europa.py b/yt_dlp/extractor/europa.py
new file mode 100644
index 0000000..191a436
--- /dev/null
+++ b/yt_dlp/extractor/europa.py
@@ -0,0 +1,174 @@
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ orderedSet,
+ parse_duration,
+ parse_iso8601,
+ parse_qs,
+ qualities,
+ traverse_obj,
+ unified_strdate,
+ xpath_text
+)
+
+
+class EuropaIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://ec\.europa\.eu/avservices/(?:video/player|audio/audioDetails)\.cfm\?.*?\bref=(?P<id>[A-Za-z0-9-]+)'
+ _TESTS = [{
+ 'url': 'http://ec.europa.eu/avservices/video/player.cfm?ref=I107758',
+ 'md5': '574f080699ddd1e19a675b0ddf010371',
+ 'info_dict': {
+ 'id': 'I107758',
+ 'ext': 'mp4',
+ 'title': 'TRADE - Wikileaks on TTIP',
+ 'description': 'NEW LIVE EC Midday press briefing of 11/08/2015',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20150811',
+ 'duration': 34,
+ 'view_count': int,
+ 'formats': 'mincount:3',
+ }
+ }, {
+ 'url': 'http://ec.europa.eu/avservices/video/player.cfm?sitelang=en&ref=I107786',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://ec.europa.eu/avservices/audio/audioDetails.cfm?ref=I-109295&sitelang=en',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ playlist = self._download_xml(
+ 'http://ec.europa.eu/avservices/video/player/playlist.cfm?ID=%s' % video_id, video_id)
+
+ def get_item(type_, preference):
+ items = {}
+ for item in playlist.findall('./info/%s/item' % type_):
+ lang, label = xpath_text(item, 'lg', default=None), xpath_text(item, 'label', default=None)
+ if lang and label:
+ items[lang] = label.strip()
+ for p in preference:
+ if items.get(p):
+ return items[p]
+
+ query = parse_qs(url)
+ preferred_lang = query.get('sitelang', ('en', ))[0]
+
+ preferred_langs = orderedSet((preferred_lang, 'en', 'int'))
+
+ title = get_item('title', preferred_langs) or video_id
+ description = get_item('description', preferred_langs)
+ thumbnail = xpath_text(playlist, './info/thumburl', 'thumbnail')
+ upload_date = unified_strdate(xpath_text(playlist, './info/date', 'upload date'))
+ duration = parse_duration(xpath_text(playlist, './info/duration', 'duration'))
+ view_count = int_or_none(xpath_text(playlist, './info/views', 'views'))
+
+ language_preference = qualities(preferred_langs[::-1])
+
+ formats = []
+ for file_ in playlist.findall('./files/file'):
+ video_url = xpath_text(file_, './url')
+ if not video_url:
+ continue
+ lang = xpath_text(file_, './lg')
+ formats.append({
+ 'url': video_url,
+ 'format_id': lang,
+ 'format_note': xpath_text(file_, './lglabel'),
+ 'language_preference': language_preference(lang)
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'formats': formats
+ }
+
+
+class EuroParlWebstreamIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://multimedia\.europarl\.europa\.eu/[^/#?]+/
+ (?:(?!video)[^/#?]+/[\w-]+_)(?P<id>[\w-]+)
+ '''
+ _TESTS = [{
+ 'url': 'https://multimedia.europarl.europa.eu/pl/webstreaming/plenary-session_20220914-0900-PLENARY',
+ 'info_dict': {
+ 'id': '62388b15-d85b-4add-99aa-ba12ccf64f0d',
+ 'ext': 'mp4',
+ 'title': 'Plenary session',
+ 'release_timestamp': 1663139069,
+ 'release_date': '20220914',
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }, {
+ # live webstream
+ 'url': 'https://multimedia.europarl.europa.eu/en/webstreaming/euroscola_20221115-1000-SPECIAL-EUROSCOLA',
+ 'info_dict': {
+ 'ext': 'mp4',
+ 'id': '510eda7f-ba72-161b-7ee7-0e836cd2e715',
+ 'release_timestamp': 1668502800,
+ 'title': 'Euroscola 2022-11-15 19:21',
+ 'release_date': '20221115',
+ 'live_status': 'is_live',
+ },
+ 'skip': 'not live anymore'
+ }, {
+ 'url': 'https://multimedia.europarl.europa.eu/en/webstreaming/committee-on-culture-and-education_20230301-1130-COMMITTEE-CULT',
+ 'info_dict': {
+ 'id': '7355662c-8eac-445e-4bb9-08db14b0ddd7',
+ 'ext': 'mp4',
+ 'release_date': '20230301',
+ 'title': 'Committee on Culture and Education',
+ 'release_timestamp': 1677666641,
+ }
+ }, {
+ # live stream
+ 'url': 'https://multimedia.europarl.europa.eu/en/webstreaming/committee-on-environment-public-health-and-food-safety_20230524-0900-COMMITTEE-ENVI',
+ 'info_dict': {
+ 'id': 'e4255f56-10aa-4b3c-6530-08db56d5b0d9',
+ 'ext': 'mp4',
+ 'release_date': '20230524',
+ 'title': r're:Committee on Environment, Public Health and Food Safety \d{4}-\d{2}-\d{2}\s\d{2}:\d{2}',
+ 'release_timestamp': 1684911541,
+ 'live_status': 'is_live',
+ },
+ 'skip': 'Not live anymore'
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ webpage_nextjs = self._search_nextjs_data(webpage, display_id)['props']['pageProps']
+
+ json_info = self._download_json(
+ 'https://acs-api.europarl.connectedviews.eu/api/FullMeeting', display_id,
+ query={
+ 'api-version': 1.0,
+ 'tenantId': 'bae646ca-1fc8-4363-80ba-2c04f06b4968',
+ 'externalReference': display_id
+ })
+
+ formats, subtitles = [], {}
+ for hls_url in traverse_obj(json_info, ((('meetingVideo'), ('meetingVideos', ...)), 'hlsUrl')):
+ fmt, subs = self._extract_m3u8_formats_and_subtitles(hls_url, display_id)
+ formats.extend(fmt)
+ self._merge_subtitles(subs, target=subtitles)
+
+ return {
+ 'id': json_info['id'],
+ 'title': traverse_obj(webpage_nextjs, (('mediaItem', 'title'), ('title', )), get_all=False),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'release_timestamp': parse_iso8601(json_info.get('startDateTime')),
+ 'is_live': traverse_obj(webpage_nextjs, ('mediaItem', 'mediaSubType')) == 'Live'
+ }
diff --git a/yt_dlp/extractor/europeantour.py b/yt_dlp/extractor/europeantour.py
new file mode 100644
index 0000000..1995a74
--- /dev/null
+++ b/yt_dlp/extractor/europeantour.py
@@ -0,0 +1,34 @@
+import re
+
+from .common import InfoExtractor
+
+
+class EuropeanTourIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?europeantour\.com/dpworld-tour/news/video/(?P<id>[^/&?#$]+)'
+
+ _TESTS = [{
+ 'url': 'https://www.europeantour.com/dpworld-tour/news/video/the-best-shots-of-the-2021-seasons/',
+ 'info_dict': {
+ 'id': '6287788195001',
+ 'ext': 'mp4',
+ 'title': 'The best shots of the 2021 seasons',
+ 'duration': 2416.512,
+ 'timestamp': 1640010141,
+ 'uploader_id': '5136026580001',
+ 'tags': ['prod-imported'],
+ 'thumbnail': 'md5:fdac52bc826548860edf8145ee74e71a',
+ 'upload_date': '20211220'
+ },
+ 'params': {'skip_download': True}
+ }]
+
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ webpage = self._download_webpage(url, id)
+ vid, aid = re.search(r'(?s)brightcove-player\s?video-id="([^"]+)".*"ACCOUNT_ID":"([^"]+)"', webpage).groups()
+ if not aid:
+ aid = '5136026580001'
+ return self.url_result(
+ self.BRIGHTCOVE_URL_TEMPLATE % (aid, vid), 'BrightcoveNew')
diff --git a/yt_dlp/extractor/eurosport.py b/yt_dlp/extractor/eurosport.py
new file mode 100644
index 0000000..6c426bb
--- /dev/null
+++ b/yt_dlp/extractor/eurosport.py
@@ -0,0 +1,123 @@
+from .common import InfoExtractor
+from ..utils import traverse_obj
+
+
+class EurosportIE(InfoExtractor):
+ _VALID_URL = r'https?://www\.eurosport\.com/\w+/(?:[\w-]+/[\d-]+/)?[\w-]+_(?P<id>vid\d+)'
+ _TESTS = [{
+ 'url': 'https://www.eurosport.com/tennis/roland-garros/2022/highlights-rafael-nadal-brushes-aside-caper-ruud-to-win-record-extending-14th-french-open-title_vid1694147/video.shtml',
+ 'info_dict': {
+ 'id': '2480939',
+ 'ext': 'mp4',
+ 'title': 'Highlights: Rafael Nadal brushes aside Caper Ruud to win record-extending 14th French Open title',
+ 'description': 'md5:b564db73ecfe4b14ebbd8e62a3692c76',
+ 'thumbnail': 'https://imgresizer.eurosport.com/unsafe/1280x960/smart/filters:format(jpeg)/origin-imgresizer.eurosport.com/2022/06/05/3388285-69245968-2560-1440.png',
+ 'duration': 195.0,
+ 'display_id': 'vid1694147',
+ 'timestamp': 1654446698,
+ 'upload_date': '20220605',
+ }
+ }, {
+ 'url': 'https://www.eurosport.com/tennis/roland-garros/2022/watch-the-top-five-shots-from-men-s-final-as-rafael-nadal-beats-casper-ruud-to-seal-14th-french-open_vid1694283/video.shtml',
+ 'info_dict': {
+ 'id': '2481254',
+ 'ext': 'mp4',
+ 'title': 'md5:149dcc5dfb38ab7352acc008cc9fb071',
+ 'duration': 130.0,
+ 'thumbnail': 'https://imgresizer.eurosport.com/unsafe/1280x960/smart/filters:format(jpeg)/origin-imgresizer.eurosport.com/2022/06/05/3388422-69248708-2560-1440.png',
+ 'description': 'md5:a0c8a7f6b285e48ae8ddbe7aa85cfee6',
+ 'display_id': 'vid1694283',
+ 'timestamp': 1654456090,
+ 'upload_date': '20220605',
+ }
+ }, {
+ # geo-fence but can bypassed by xff
+ 'url': 'https://www.eurosport.com/cycling/tour-de-france-femmes/2022/incredible-ride-marlen-reusser-storms-to-stage-4-win-at-tour-de-france-femmes_vid1722221/video.shtml',
+ 'info_dict': {
+ 'id': '2582552',
+ 'ext': 'mp4',
+ 'title': '‘Incredible ride!’ - Marlen Reusser storms to Stage 4 win at Tour de France Femmes',
+ 'duration': 188.0,
+ 'display_id': 'vid1722221',
+ 'timestamp': 1658936167,
+ 'thumbnail': 'https://imgresizer.eurosport.com/unsafe/1280x960/smart/filters:format(jpeg)/origin-imgresizer.eurosport.com/2022/07/27/3423347-69852108-2560-1440.jpg',
+ 'description': 'md5:32bbe3a773ac132c57fb1e8cca4b7c71',
+ 'upload_date': '20220727',
+ }
+ }, {
+ 'url': 'https://www.eurosport.com/football/champions-league/2022-2023/pep-guardiola-emotionally-destroyed-after-manchester-city-win-over-bayern-munich-in-champions-league_vid1896254/video.shtml',
+ 'info_dict': {
+ 'id': '3096477',
+ 'ext': 'mp4',
+ 'title': 'md5:82edc17370124c7a19b3cf518517583b',
+ 'duration': 84.0,
+ 'description': 'md5:b3f44ef7f5b5b95b24a273b163083feb',
+ 'thumbnail': 'https://imgresizer.eurosport.com/unsafe/1280x960/smart/filters:format(jpeg)/origin-imgresizer.eurosport.com/2023/04/12/3682873-74947393-2560-1440.jpg',
+ 'timestamp': 1681292028,
+ 'upload_date': '20230412',
+ 'display_id': 'vid1896254',
+ }
+ }, {
+ 'url': 'https://www.eurosport.com/football/last-year-s-semi-final-pain-was-still-there-pep-guardiola-after-man-city-reach-cl-final_vid1914115/video.shtml',
+ 'info_dict': {
+ 'id': '3149108',
+ 'ext': 'mp4',
+ 'title': '\'Last year\'s semi-final pain was still there\' - Pep Guardiola after Man City reach CL final',
+ 'description': 'md5:89ef142fe0170a66abab77fac2955d8e',
+ 'display_id': 'vid1914115',
+ 'timestamp': 1684403618,
+ 'thumbnail': 'https://imgresizer.eurosport.com/unsafe/1280x960/smart/filters:format(jpeg)/origin-imgresizer.eurosport.com/2023/05/18/3707254-75435008-2560-1440.jpg',
+ 'duration': 105.0,
+ 'upload_date': '20230518',
+ }
+ }]
+
+ _TOKEN = None
+
+ # actually defined in https://netsport.eurosport.io/?variables={"databaseId":<databaseId>,"playoutType":"VDP"}&extensions={"persistedQuery":{"version":1 ..
+ # but this method require to get sha256 hash
+ _GEO_COUNTRIES = ['DE', 'NL', 'EU', 'IT', 'FR'] # Not complete list but it should work
+
+ def _real_initialize(self):
+ if EurosportIE._TOKEN is None:
+ EurosportIE._TOKEN = self._download_json(
+ 'https://eu3-prod-direct.eurosport.com/token?realm=eurosport', None,
+ 'Trying to get token')['data']['attributes']['token']
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ json_data = self._download_json(
+ f'https://eu3-prod-direct.eurosport.com/playback/v2/videoPlaybackInfo/sourceSystemId/eurosport-{display_id}',
+ display_id, query={'usePreAuth': True}, headers={'Authorization': f'Bearer {EurosportIE._TOKEN}'})['data']
+
+ json_ld_data = self._search_json_ld(webpage, display_id)
+
+ formats, subtitles = [], {}
+ for stream_type in json_data['attributes']['streaming']:
+ if stream_type == 'hls':
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, ext='mp4')
+ elif stream_type == 'dash':
+ fmts, subs = self._extract_mpd_formats_and_subtitles(
+ traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id)
+ elif stream_type == 'mss':
+ fmts, subs = self._extract_ism_formats_and_subtitles(
+ traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id)
+
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+
+ return {
+ 'id': json_data['id'],
+ 'title': json_ld_data.get('title') or self._og_search_title(webpage),
+ 'display_id': display_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnails': json_ld_data.get('thumbnails'),
+ 'description': (json_ld_data.get('description')
+ or self._html_search_meta(['og:description', 'description'], webpage)),
+ 'duration': json_ld_data.get('duration'),
+ 'timestamp': json_ld_data.get('timestamp'),
+ }
diff --git a/yt_dlp/extractor/euscreen.py b/yt_dlp/extractor/euscreen.py
new file mode 100644
index 0000000..65a1dc7
--- /dev/null
+++ b/yt_dlp/extractor/euscreen.py
@@ -0,0 +1,60 @@
+from .common import InfoExtractor
+
+from ..utils import (
+ parse_duration,
+ js_to_json,
+)
+
+
+class EUScreenIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?euscreen\.eu/item.html\?id=(?P<id>[^&?$/]+)'
+
+ _TESTS = [{
+ 'url': 'https://euscreen.eu/item.html?id=EUS_0EBCBF356BFC4E12A014023BA41BD98C',
+ 'info_dict': {
+ 'id': 'EUS_0EBCBF356BFC4E12A014023BA41BD98C',
+ 'ext': 'mp4',
+ 'title': "L'effondrement du stade du Heysel",
+ 'alt_title': 'Collapse of the Heysel Stadium',
+ 'duration': 318.0,
+ 'description': 'md5:f0ffffdfce6821139357a1b8359d6152',
+ 'series': 'JA2 DERNIERE',
+ 'episode': '-',
+ 'uploader': 'INA / France',
+ 'thumbnail': 'http://images3.noterik.com/domain/euscreenxl/user/eu_ina/video/EUS_0EBCBF356BFC4E12A014023BA41BD98C/image.jpg'
+ },
+ 'params': {'skip_download': True}
+ }]
+
+ _payload = b'<fsxml><screen><properties><screenId>-1</screenId></properties><capabilities id="1"><properties><platform>Win32</platform><appcodename>Mozilla</appcodename><appname>Netscape</appname><appversion>5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36</appversion><useragent>Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36</useragent><cookiesenabled>true</cookiesenabled><screenwidth>784</screenwidth><screenheight>758</screenheight><orientation>undefined</orientation><smt_browserid>Sat, 07 Oct 2021 08:56:50 GMT</smt_browserid><smt_sessionid>1633769810758</smt_sessionid></properties></capabilities></screen></fsxml>'
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ args_for_js_request = self._download_webpage(
+ 'https://euscreen.eu/lou/LouServlet/domain/euscreenxl/html5application/euscreenxlitem',
+ id, data=self._payload, query={'actionlist': 'itempage', 'id': id})
+ info_js = self._download_webpage(
+ 'https://euscreen.eu/lou/LouServlet/domain/euscreenxl/html5application/euscreenxlitem',
+ id, data=args_for_js_request.replace('screenid', 'screenId').encode())
+ video_json = self._parse_json(
+ self._search_regex(r'setVideo\(({.+})\)\(\$end\$\)put', info_js, 'Video JSON'),
+ id, transform_source=js_to_json)
+ meta_json = self._parse_json(
+ self._search_regex(r'setData\(({.+})\)\(\$end\$\)', info_js, 'Metadata JSON'),
+ id, transform_source=js_to_json)
+ formats = [{
+ 'url': source['src'],
+ } for source in video_json.get('sources', [])]
+
+ return {
+ 'id': id,
+ 'title': meta_json.get('originalTitle'),
+ 'alt_title': meta_json.get('title'),
+ 'duration': parse_duration(meta_json.get('duration')),
+ 'description': '%s\n%s' % (meta_json.get('summaryOriginal', ''), meta_json.get('summaryEnglish', '')),
+ 'series': meta_json.get('series') or meta_json.get('seriesEnglish'),
+ 'episode': meta_json.get('episodeNumber'),
+ 'uploader': meta_json.get('provider'),
+ 'thumbnail': meta_json.get('screenshot') or video_json.get('screenshot'),
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/expressen.py b/yt_dlp/extractor/expressen.py
new file mode 100644
index 0000000..b96f2e4
--- /dev/null
+++ b/yt_dlp/extractor/expressen.py
@@ -0,0 +1,96 @@
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ unescapeHTML,
+ unified_timestamp,
+)
+
+
+class ExpressenIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?(?:expressen|di)\.se/
+ (?:(?:tvspelare/video|video-?player/embed)/)?
+ (?:tv|nyheter)/(?:[^/?#]+/)*
+ (?P<id>[^/?#&]+)
+ '''
+ _EMBED_REGEX = [r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?(?:expressen|di)\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1']
+ _TESTS = [{
+ 'url': 'https://www.expressen.se/tv/ledare/ledarsnack/ledarsnack-om-arbetslosheten-bland-kvinnor-i-speciellt-utsatta-omraden/',
+ 'md5': 'deb2ca62e7b1dcd19fa18ba37523f66e',
+ 'info_dict': {
+ 'id': 'ba90f5a9-78d1-4511-aa02-c177b9c99136',
+ 'display_id': 'ledarsnack-om-arbetslosheten-bland-kvinnor-i-speciellt-utsatta-omraden',
+ 'ext': 'mp4',
+ 'title': 'Ledarsnack: Om arbetslösheten bland kvinnor i speciellt utsatta områden',
+ 'description': 'md5:f38c81ff69f3de4d269bbda012fcbbba',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 788,
+ 'timestamp': 1526639109,
+ 'upload_date': '20180518',
+ },
+ }, {
+ 'url': 'https://www.expressen.se/tv/kultur/kulturdebatt-med-expressens-karin-olsson/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.expressen.se/tvspelare/video/tv/ditv/ekonomistudion/experterna-har-ar-fragorna-som-avgor-valet/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.expressen.se/videoplayer/embed/tv/ditv/ekonomistudion/experterna-har-ar-fragorna-som-avgor-valet/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.di.se/videoplayer/embed/tv/ditv/borsmorgon/implantica-rusar-70--under-borspremiaren-hor-styrelsemedlemmen/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.expressen.se/video-player/embed/tv/nyheter/ekero-fodda-olof-gustafsson-forvaltar-knarkbaronen-pablo-escobars-namn',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.expressen.se/nyheter/efter-egna-telefonbluffen-escobar-stammer-klarna/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ def extract_data(name):
+ return self._parse_json(
+ self._search_regex(
+ r'data-%s=(["\'])(?P<value>(?:(?!\1).)+)\1' % name,
+ webpage, 'info', group='value'),
+ display_id, transform_source=unescapeHTML)
+
+ info = extract_data('video-tracking-info')
+ video_id = info['contentId']
+
+ data = extract_data('article-data')
+ stream = data['stream']
+
+ if determine_ext(stream) == 'm3u8':
+ formats = self._extract_m3u8_formats(
+ stream, display_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls')
+ else:
+ formats = [{
+ 'url': stream,
+ }]
+
+ title = info.get('titleRaw') or data['title']
+ description = info.get('descriptionRaw')
+ thumbnail = info.get('socialMediaImage') or data.get('image')
+ duration = int_or_none(info.get('videoTotalSecondsDuration')
+ or data.get('totalSecondsDuration'))
+ timestamp = unified_timestamp(info.get('publishDate'))
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py
new file mode 100644
index 0000000..baa69d2
--- /dev/null
+++ b/yt_dlp/extractor/extractors.py
@@ -0,0 +1,28 @@
+import contextlib
+import os
+
+from ..plugins import load_plugins
+
+# NB: Must be before other imports so that plugins can be correctly injected
+_PLUGIN_CLASSES = load_plugins('extractor', 'IE')
+
+_LAZY_LOADER = False
+if not os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
+ with contextlib.suppress(ImportError):
+ from .lazy_extractors import * # noqa: F403
+ from .lazy_extractors import _ALL_CLASSES
+ _LAZY_LOADER = True
+
+if not _LAZY_LOADER:
+ from ._extractors import * # noqa: F403
+ _ALL_CLASSES = [ # noqa: F811
+ klass
+ for name, klass in globals().items()
+ if name.endswith('IE') and name != 'GenericIE'
+ ]
+ _ALL_CLASSES.append(GenericIE) # noqa: F405
+
+globals().update(_PLUGIN_CLASSES)
+_ALL_CLASSES[:0] = _PLUGIN_CLASSES.values()
+
+from .common import _PLUGIN_OVERRIDES # noqa: F401
diff --git a/yt_dlp/extractor/eyedotv.py b/yt_dlp/extractor/eyedotv.py
new file mode 100644
index 0000000..d8b068e
--- /dev/null
+++ b/yt_dlp/extractor/eyedotv.py
@@ -0,0 +1,61 @@
+from .common import InfoExtractor
+from ..utils import (
+ xpath_text,
+ parse_duration,
+ ExtractorError,
+)
+
+
+class EyedoTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?eyedo\.tv/[^/]+/(?:#!/)?Live/Detail/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'https://www.eyedo.tv/en-US/#!/Live/Detail/16301',
+ 'md5': 'ba14f17995cdfc20c36ba40e21bf73f7',
+ 'info_dict': {
+ 'id': '16301',
+ 'ext': 'mp4',
+ 'title': 'Journée du conseil scientifique de l\'Afnic 2015',
+ 'description': 'md5:4abe07293b2f73efc6e1c37028d58c98',
+ 'uploader': 'Afnic Live',
+ 'uploader_id': '8023',
+ }
+ }
+ _ROOT_URL = 'http://live.eyedo.net:1935/'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video_data = self._download_xml('http://eyedo.tv/api/live/GetLive/%s' % video_id, video_id)
+
+ def _add_ns(path):
+ return self._xpath_ns(path, 'http://schemas.datacontract.org/2004/07/EyeDo.Core.Implementation.Web.ViewModels.Api')
+
+ title = xpath_text(video_data, _add_ns('Titre'), 'title', True)
+ state_live_code = xpath_text(video_data, _add_ns('StateLiveCode'), 'title', True)
+ if state_live_code == 'avenir':
+ raise ExtractorError(
+ '%s said: We\'re sorry, but this video is not yet available.' % self.IE_NAME,
+ expected=True)
+
+ is_live = state_live_code == 'live'
+ m3u8_url = None
+ # http://eyedo.tv/Content/Html5/Scripts/html5view.js
+ if is_live:
+ if xpath_text(video_data, 'Cdn') == 'true':
+ m3u8_url = 'http://rrr.sz.xlcdn.com/?account=eyedo&file=A%s&type=live&service=wowza&protocol=http&output=playlist.m3u8' % video_id
+ else:
+ m3u8_url = self._ROOT_URL + 'w/%s/eyedo_720p/playlist.m3u8' % video_id
+ else:
+ m3u8_url = self._ROOT_URL + 'replay-w/%s/mp4:%s.mp4/playlist.m3u8' % (video_id, video_id)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', 'm3u8_native'),
+ 'description': xpath_text(video_data, _add_ns('Description')),
+ 'duration': parse_duration(xpath_text(video_data, _add_ns('Duration'))),
+ 'uploader': xpath_text(video_data, _add_ns('Createur')),
+ 'uploader_id': xpath_text(video_data, _add_ns('CreateurId')),
+ 'chapter': xpath_text(video_data, _add_ns('ChapitreTitre')),
+ 'chapter_id': xpath_text(video_data, _add_ns('ChapitreId')),
+ }
diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py
new file mode 100644
index 0000000..834b1df
--- /dev/null
+++ b/yt_dlp/extractor/facebook.py
@@ -0,0 +1,1060 @@
+import json
+import re
+import urllib.parse
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_etree_fromstring,
+ compat_str,
+ compat_urllib_parse_unquote,
+)
+from ..networking import Request
+from ..networking.exceptions import network_exceptions
+from ..utils import (
+ ExtractorError,
+ clean_html,
+ determine_ext,
+ error_to_compat_str,
+ float_or_none,
+ format_field,
+ get_element_by_id,
+ get_first,
+ int_or_none,
+ join_nonempty,
+ js_to_json,
+ merge_dicts,
+ parse_count,
+ parse_qs,
+ qualities,
+ str_or_none,
+ traverse_obj,
+ try_get,
+ url_or_none,
+ urlencode_postdata,
+ urljoin,
+ variadic,
+)
+
+
+class FacebookIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ (?:
+ https?://
+ (?:[\w-]+\.)?(?:facebook\.com|facebookwkhpilnemxj7asaniu7vnjjbiltxjqhye3mhbshg7kx5tfyd\.onion)/
+ (?:[^#]*?\#!/)?
+ (?:
+ (?:
+ permalink\.php|
+ video/video\.php|
+ photo\.php|
+ video\.php|
+ video/embed|
+ story\.php|
+ watch(?:/live)?/?
+ )\?(?:.*?)(?:v|video_id|story_fbid)=|
+ [^/]+/videos/(?:[^/]+/)?|
+ [^/]+/posts/|
+ events/(?:[^/]+/)?|
+ groups/[^/]+/(?:permalink|posts)/|
+ watchparty/
+ )|
+ facebook:
+ )
+ (?P<id>pfbid[A-Za-z0-9]+|\d+)
+ '''
+ _EMBED_REGEX = [
+ r'<iframe[^>]+?src=(["\'])(?P<url>https?://www\.facebook\.com/(?:video/embed|plugins/video\.php).+?)\1',
+ # Facebook API embed https://developers.facebook.com/docs/plugins/embedded-video-player
+ r'''(?x)<div[^>]+
+ class=(?P<q1>[\'"])[^\'"]*\bfb-(?:video|post)\b[^\'"]*(?P=q1)[^>]+
+ data-href=(?P<q2>[\'"])(?P<url>(?:https?:)?//(?:www\.)?facebook.com/.+?)(?P=q2)''',
+ ]
+ _LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1'
+ _CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1'
+ _NETRC_MACHINE = 'facebook'
+ IE_NAME = 'facebook'
+
+ _VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s'
+ _VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true&payloadtype=primary'
+
+ _TESTS = [{
+ 'url': 'https://www.facebook.com/radiokicksfm/videos/3676516585958356/',
+ 'info_dict': {
+ 'id': '3676516585958356',
+ 'ext': 'mp4',
+ 'title': 'dr Adam Przygoda',
+ 'description': 'md5:34675bda53336b1d16400265c2bb9b3b',
+ 'uploader': 'RADIO KICKS FM',
+ 'upload_date': '20230818',
+ 'timestamp': 1692346159,
+ 'thumbnail': r're:^https?://.*',
+ 'uploader_id': '100063551323670',
+ 'duration': 3132.184,
+ 'view_count': int,
+ 'concurrent_view_count': 0,
+ },
+ }, {
+ 'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
+ 'md5': '6a40d33c0eccbb1af76cf0485a052659',
+ 'info_dict': {
+ 'id': '637842556329505',
+ 'ext': 'mp4',
+ 'title': 're:Did you know Kei Nishikori is the first Asian man to ever reach a Grand Slam',
+ 'uploader': 'Tennis on Facebook',
+ 'upload_date': '20140908',
+ 'timestamp': 1410199200,
+ },
+ 'skip': 'Requires logging in',
+ }, {
+ # data.video
+ 'url': 'https://www.facebook.com/video.php?v=274175099429670',
+ 'info_dict': {
+ 'id': '274175099429670',
+ 'ext': 'mp4',
+ 'title': 'Asif',
+ 'description': '',
+ 'uploader': 'Asif Nawab Butt',
+ 'upload_date': '20140506',
+ 'timestamp': 1399398998,
+ 'thumbnail': r're:^https?://.*',
+ 'uploader_id': 'pfbid028wxorhX2ErLFJ578N6P3crHD3PHmXTCqCvfBpsnbSLmbokwSY75p5hWBjHGkG4zxl',
+ 'duration': 131.03,
+ 'concurrent_view_count': int,
+ },
+ }, {
+ 'note': 'Video with DASH manifest',
+ 'url': 'https://www.facebook.com/video.php?v=957955867617029',
+ 'md5': 'b2c28d528273b323abe5c6ab59f0f030',
+ 'info_dict': {
+ 'id': '957955867617029',
+ 'ext': 'mp4',
+ 'title': 'When you post epic content on instagram.com/433 8 million followers, this is ...',
+ 'uploader': 'Demy de Zeeuw',
+ 'upload_date': '20160110',
+ 'timestamp': 1452431627,
+ },
+ 'skip': 'Requires logging in',
+ }, {
+ 'url': 'https://www.facebook.com/maxlayn/posts/10153807558977570',
+ 'md5': '037b1fa7f3c2d02b7a0d7bc16031ecc6',
+ 'info_dict': {
+ 'id': '544765982287235',
+ 'ext': 'mp4',
+ 'title': '"What are you doing running in the snow?"',
+ 'uploader': 'FailArmy',
+ },
+ 'skip': 'Video gone',
+ }, {
+ 'url': 'https://m.facebook.com/story.php?story_fbid=1035862816472149&id=116132035111903',
+ 'md5': '1deb90b6ac27f7efcf6d747c8a27f5e3',
+ 'info_dict': {
+ 'id': '1035862816472149',
+ 'ext': 'mp4',
+ 'title': 'What the Flock Is Going On In New Zealand Credit: ViralHog',
+ 'uploader': 'S. Saint',
+ },
+ 'skip': 'Video gone',
+ }, {
+ 'note': 'swf params escaped',
+ 'url': 'https://www.facebook.com/barackobama/posts/10153664894881749',
+ 'md5': '97ba073838964d12c70566e0085c2b91',
+ 'info_dict': {
+ 'id': '10153664894881749',
+ 'ext': 'mp4',
+ 'title': 'Average time to confirm recent Supreme Court nominees: 67 days Longest it\'s t...',
+ 'thumbnail': r're:^https?://.*',
+ 'timestamp': 1456259628,
+ 'upload_date': '20160223',
+ 'uploader': 'Barack Obama',
+ },
+ 'skip': 'Gif on giphy.com gone',
+ }, {
+ # have 1080P, but only up to 720p in swf params
+ # data.video.story.attachments[].media
+ 'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
+ 'md5': 'ca63897a90c9452efee5f8c40d080e25',
+ 'info_dict': {
+ 'id': '10155529876156509',
+ 'ext': 'mp4',
+ 'title': 'Holocaust survivor becomes US citizen',
+ 'description': 'She survived the holocaust — and years later, she’s getting her citizenship so she can vote for Hillary Clinton http://cnn.it/2eERh5f',
+ 'timestamp': 1477818095,
+ 'upload_date': '20161030',
+ 'uploader': 'CNN',
+ 'thumbnail': r're:^https?://.*',
+ 'view_count': int,
+ 'uploader_id': '100059479812265',
+ 'concurrent_view_count': int,
+ 'duration': 44.478,
+ },
+ }, {
+ # bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
+ # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media
+ 'url': 'https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/',
+ 'info_dict': {
+ 'id': '1417995061575415',
+ 'ext': 'mp4',
+ 'title': 'Довгоочікуване відео | By Yaroslav - Facebook',
+ 'description': 'Довгоочікуване відео',
+ 'timestamp': 1486648217,
+ 'upload_date': '20170209',
+ 'uploader': 'Yaroslav Korpan',
+ 'uploader_id': 'pfbid06AScABAWcW91qpiuGrLt99Ef9tvwHoXP6t8KeFYEqkSfreMtfa9nTveh8b2ZEVSWl',
+ 'concurrent_view_count': int,
+ 'thumbnail': r're:^https?://.*',
+ 'view_count': int,
+ 'duration': 11736.446,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # FIXME
+ 'url': 'https://www.facebook.com/LaGuiaDelVaron/posts/1072691702860471',
+ 'info_dict': {
+ 'id': '1072691702860471',
+ 'ext': 'mp4',
+ 'title': 'md5:ae2d22a93fbb12dad20dc393a869739d',
+ 'timestamp': 1477305000,
+ 'upload_date': '20161024',
+ 'uploader': 'La Guía Del Varón',
+ 'thumbnail': r're:^https?://.*',
+ },
+ 'skip': 'Requires logging in',
+ }, {
+ # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media
+ 'url': 'https://www.facebook.com/groups/1024490957622648/permalink/1396382447100162/',
+ 'info_dict': {
+ 'id': '202882990186699',
+ 'ext': 'mp4',
+ 'title': 'birb (O v O") | Hello? Yes your uber ride is here',
+ 'description': 'Hello? Yes your uber ride is here * Jukin Media Verified * Find this video and others like it by visiting...',
+ 'timestamp': 1486035513,
+ 'upload_date': '20170202',
+ 'uploader': 'Elisabeth Ahtn',
+ 'uploader_id': '100013949973717',
+ },
+ 'skip': 'Requires logging in',
+ }, {
+ # data.node.comet_sections.content.story.attachments[].throwbackStyles.attachment_target_renderer.attachment.target.attachments[].styles.attachment.media
+ 'url': 'https://www.facebook.com/groups/1645456212344334/posts/3737828833107051/',
+ 'info_dict': {
+ 'id': '1569199726448814',
+ 'ext': 'mp4',
+ 'title': 'Pence MUST GO!',
+ 'description': 'Vickie Gentry shared a memory.',
+ 'timestamp': 1511548260,
+ 'upload_date': '20171124',
+ 'uploader': 'Vickie Gentry',
+ 'uploader_id': 'pfbid0FuZhHCeWDAxWxEbr3yKPFaRstXvRxgsp9uCPG6GjD4J2AitB35NUAuJ4Q75KcjiDl',
+ 'thumbnail': r're:^https?://.*',
+ 'duration': 148.435,
+ },
+ }, {
+ # data.node.comet_sections.content.story.attachments[].styles.attachment.media
+ 'url': 'https://www.facebook.com/attn/posts/pfbid0j1Czf2gGDVqeQ8KiMLFm3pWN8GxsQmeRrVhimWDzMuKQoR8r4b1knNsejELmUgyhl',
+ 'info_dict': {
+ 'id': '6968553779868435',
+ 'ext': 'mp4',
+ 'description': 'md5:2f2fcf93e97ac00244fe64521bbdb0cb',
+ 'uploader': 'ATTN:',
+ 'upload_date': '20231207',
+ 'title': 'ATTN:',
+ 'duration': 132.675,
+ 'uploader_id': '100064451419378',
+ 'view_count': int,
+ 'thumbnail': r're:^https?://.*',
+ 'timestamp': 1701975646,
+ },
+ }, {
+ # data.node.comet_sections.content.story.attachments[].styles.attachment.media
+ 'url': 'https://www.facebook.com/permalink.php?story_fbid=pfbid0fqQuVEQyXRa9Dp4RcaTR14KHU3uULHV1EK7eckNXSH63JMuoALsAvVCJ97zAGitil&id=100068861234290',
+ 'info_dict': {
+ 'id': '270103405756416',
+ 'ext': 'mp4',
+ 'title': 'Lela Evans',
+ 'description': 'Today Makkovik\'s own Pilot Mandy Smith made her inaugural landing on the airstrip in her hometown. What a proud moment as we all cheered and...',
+ 'thumbnail': r're:^https?://.*',
+ 'uploader': 'Lela Evans',
+ 'uploader_id': 'pfbid0shZJipuigyy5mqrUJn9ub5LJFWNHvan5prtyi3LrDuuuJ4NwrURgnQHYR9fywBepl',
+ 'upload_date': '20231228',
+ 'timestamp': 1703804085,
+ 'duration': 394.347,
+ 'view_count': int,
+ },
+ }, {
+ 'url': 'https://www.facebook.com/story.php?story_fbid=pfbid0Fnzhm8UuzjBYpPMNFzaSpFE9UmLdU4fJN8qTANi1Dmtj5q7DNrL5NERXfsAzDEV7l&id=100073071055552',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.facebook.com/video.php?v=10204634152394104',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.facebook.com/amogood/videos/1618742068337349/?fref=nf',
+ 'only_matching': True,
+ }, {
+ # data.mediaset.currMedia.edges
+ 'url': 'https://www.facebook.com/ChristyClarkForBC/videos/vb.22819070941/10153870694020942/?type=2&theater',
+ 'only_matching': True,
+ }, {
+ # data.video.story.attachments[].media
+ 'url': 'facebook:544765982287235',
+ 'only_matching': True,
+ }, {
+ # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media
+ 'url': 'https://www.facebook.com/groups/164828000315060/permalink/764967300301124/',
+ 'only_matching': True,
+ }, {
+ # data.video.creation_story.attachments[].media
+ 'url': 'https://zh-hk.facebook.com/peoplespower/videos/1135894589806027/',
+ 'only_matching': True,
+ }, {
+ # data.video
+ 'url': 'https://www.facebookwkhpilnemxj7asaniu7vnjjbiltxjqhye3mhbshg7kx5tfyd.onion/video.php?v=274175099429670',
+ 'only_matching': True,
+ }, {
+ # no title
+ 'url': 'https://www.facebook.com/onlycleverentertainment/videos/1947995502095005/',
+ 'only_matching': True,
+ }, {
+ # data.video
+ 'url': 'https://www.facebook.com/WatchESLOne/videos/359649331226507/',
+ 'info_dict': {
+ 'id': '359649331226507',
+ 'ext': 'mp4',
+ 'title': 'Fnatic vs. EG - Group A - Opening Match - ESL One Birmingham Day 1',
+ 'description': '#ESLOne VoD - Birmingham Finals Day#1 Fnatic vs. @Evil Geniuses',
+ 'timestamp': 1527084179,
+ 'upload_date': '20180523',
+ 'uploader': 'ESL One Dota 2',
+ 'uploader_id': '100066514874195',
+ 'duration': 4524.212,
+ 'view_count': int,
+ 'thumbnail': r're:^https?://.*',
+ 'concurrent_view_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.all_subattachments.nodes[].media
+ 'url': 'https://www.facebook.com/100033620354545/videos/106560053808006/',
+ 'info_dict': {
+ 'id': '106560053808006',
+ 'ext': 'mp4',
+ 'title': 'Josef',
+ 'thumbnail': r're:^https?://.*',
+ 'concurrent_view_count': int,
+ 'uploader_id': 'pfbid0cibUN6tV7DYgdbJdsUFN46wc4jKpVSPAvJQhFofGqBGmVn3V3JtAs2tfUwziw2hUl',
+ 'timestamp': 1549275572,
+ 'duration': 3.413,
+ 'uploader': 'Josef Novak',
+ 'description': '',
+ 'upload_date': '20190204',
+ },
+ }, {
+ # data.video.story.attachments[].media
+ 'url': 'https://www.facebook.com/watch/?v=647537299265662',
+ 'only_matching': True,
+ }, {
+ # FIXME: https://github.com/yt-dlp/yt-dlp/issues/542
+ # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.all_subattachments.nodes[].media
+ 'url': 'https://www.facebook.com/PankajShahLondon/posts/10157667649866271',
+ 'info_dict': {
+ 'id': '10157667649866271',
+ },
+ 'playlist_count': 3,
+ 'skip': 'Requires logging in',
+ }, {
+ # data.nodes[].comet_sections.content.story.attachments[].style_type_renderer.attachment.media
+ 'url': 'https://m.facebook.com/Alliance.Police.Department/posts/4048563708499330',
+ 'info_dict': {
+ 'id': '117576630041613',
+ 'ext': 'mp4',
+ # TODO: title can be extracted from video page
+ 'title': 'Facebook video #117576630041613',
+ 'uploader_id': '189393014416438',
+ 'upload_date': '20201123',
+ 'timestamp': 1606162592,
+ },
+ 'skip': 'Requires logging in',
+ }, {
+ # node.comet_sections.content.story.attached_story.attachments.style_type_renderer.attachment.media
+ 'url': 'https://www.facebook.com/groups/ateistiskselskab/permalink/10154930137678856/',
+ 'info_dict': {
+ 'id': '211567722618337',
+ 'ext': 'mp4',
+ 'title': 'Facebook video #211567722618337',
+ 'uploader_id': '127875227654254',
+ 'upload_date': '20161122',
+ 'timestamp': 1479793574,
+ },
+ 'skip': 'No video',
+ }, {
+ # data.video.creation_story.attachments[].media
+ 'url': 'https://www.facebook.com/watch/live/?v=1823658634322275',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.facebook.com/watchparty/211641140192478',
+ 'info_dict': {
+ 'id': '211641140192478',
+ },
+ 'playlist_count': 1,
+ 'skip': 'Requires logging in',
+ }, {
+ # data.event.cover_media_renderer.cover_video
+ 'url': 'https://m.facebook.com/events/1509582499515440',
+ 'info_dict': {
+ 'id': '637246984455045',
+ 'ext': 'mp4',
+ 'title': 'ANALISI IN CAMPO OSCURO " Coaguli nel sangue dei vaccinati"',
+ 'description': 'Other event by Comitato Liberi Pensatori on Tuesday, October 18 2022',
+ 'thumbnail': r're:^https?://.*',
+ 'uploader': 'Comitato Liberi Pensatori',
+ 'uploader_id': '100065709540881',
+ },
+ }]
+ _SUPPORTED_PAGLETS_REGEX = r'(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_[0-9a-f]+)'
+ _api_config = {
+ 'graphURI': '/api/graphql/'
+ }
+
+ def _perform_login(self, username, password):
+ login_page_req = Request(self._LOGIN_URL)
+ self._set_cookie('facebook.com', 'locale', 'en_US')
+ login_page = self._download_webpage(login_page_req, None,
+ note='Downloading login page',
+ errnote='Unable to download login page')
+ lsd = self._search_regex(
+ r'<input type="hidden" name="lsd" value="([^"]*)"',
+ login_page, 'lsd')
+ lgnrnd = self._search_regex(r'name="lgnrnd" value="([^"]*?)"', login_page, 'lgnrnd')
+
+ login_form = {
+ 'email': username,
+ 'pass': password,
+ 'lsd': lsd,
+ 'lgnrnd': lgnrnd,
+ 'next': 'http://facebook.com/home.php',
+ 'default_persistent': '0',
+ 'legacy_return': '1',
+ 'timezone': '-60',
+ 'trynum': '1',
+ }
+ request = Request(self._LOGIN_URL, urlencode_postdata(login_form))
+ request.headers['Content-Type'] = 'application/x-www-form-urlencoded'
+ try:
+ login_results = self._download_webpage(request, None,
+ note='Logging in', errnote='unable to fetch login page')
+ if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
+ error = self._html_search_regex(
+ r'(?s)<div[^>]+class=(["\']).*?login_error_box.*?\1[^>]*><div[^>]*>.*?</div><div[^>]*>(?P<error>.+?)</div>',
+ login_results, 'login error', default=None, group='error')
+ if error:
+ raise ExtractorError('Unable to login: %s' % error, expected=True)
+ self.report_warning('unable to log in: bad username/password, or exceeded login rate limit (~3/min). Check credentials or wait.')
+ return
+
+ fb_dtsg = self._search_regex(
+ r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg', default=None)
+ h = self._search_regex(
+ r'name="h"\s+(?:\w+="[^"]+"\s+)*?value="([^"]+)"', login_results, 'h', default=None)
+
+ if not fb_dtsg or not h:
+ return
+
+ check_form = {
+ 'fb_dtsg': fb_dtsg,
+ 'h': h,
+ 'name_action_selected': 'dont_save',
+ }
+ check_req = Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
+ check_req.headers['Content-Type'] = 'application/x-www-form-urlencoded'
+ check_response = self._download_webpage(check_req, None,
+ note='Confirming login')
+ if re.search(r'id="checkpointSubmitButton"', check_response) is not None:
+ self.report_warning('Unable to confirm login, you have to login in your browser and authorize the login.')
+ except network_exceptions as err:
+ self.report_warning('unable to log in: %s' % error_to_compat_str(err))
+ return
+
+ def _extract_from_url(self, url, video_id):
+ webpage = self._download_webpage(
+ url.replace('://m.facebook.com/', '://www.facebook.com/'), video_id)
+
+ def extract_metadata(webpage):
+ post_data = [self._parse_json(j, video_id, fatal=False) for j in re.findall(
+ r'data-sjs>({.*?ScheduledServerJS.*?})</script>', webpage)]
+ post = traverse_obj(post_data, (
+ ..., 'require', ..., ..., ..., '__bbox', 'require', ..., ..., ..., '__bbox', 'result', 'data'), expected_type=dict) or []
+ media = traverse_obj(post, (..., 'attachments', ..., lambda k, v: (
+ k == 'media' and str(v['id']) == video_id and v['__typename'] == 'Video')), expected_type=dict)
+ title = get_first(media, ('title', 'text'))
+ description = get_first(media, ('creation_story', 'comet_sections', 'message', 'story', 'message', 'text'))
+ page_title = title or self._html_search_regex((
+ r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>(?P<content>[^<]*)</h2>',
+ r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(?P<content>.*?)</span>',
+ self._meta_regex('og:title'), self._meta_regex('twitter:title'), r'<title>(?P<content>.+?)</title>'
+ ), webpage, 'title', default=None, group='content')
+ description = description or self._html_search_meta(
+ ['description', 'og:description', 'twitter:description'],
+ webpage, 'description', default=None)
+ uploader_data = (
+ get_first(media, ('owner', {dict}))
+ or get_first(post, ('video', 'creation_story', 'attachments', ..., 'media', lambda k, v: k == 'owner' and v['name']))
+ or get_first(post, (..., 'video', lambda k, v: k == 'owner' and v['name']))
+ or get_first(post, ('node', 'actors', ..., {dict}))
+ or get_first(post, ('event', 'event_creator', {dict})) or {})
+ uploader = uploader_data.get('name') or (
+ clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage))
+ or self._search_regex(
+ (r'ownerName\s*:\s*"([^"]+)"', *self._og_regexes('title')), webpage, 'uploader', fatal=False))
+ timestamp = int_or_none(self._search_regex(
+ r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
+ 'timestamp', default=None))
+ thumbnail = self._html_search_meta(
+ ['og:image', 'twitter:image'], webpage, 'thumbnail', default=None)
+ # some webpages contain unretrievable thumbnail urls
+ # like https://lookaside.fbsbx.com/lookaside/crawler/media/?media_id=10155168902769113&get_thumbnail=1
+ # in https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/
+ if thumbnail and not re.search(r'\.(?:jpg|png)', thumbnail):
+ thumbnail = None
+ info_dict = {
+ 'description': description,
+ 'uploader': uploader,
+ 'uploader_id': uploader_data.get('id'),
+ 'timestamp': timestamp,
+ 'thumbnail': thumbnail,
+ 'view_count': parse_count(self._search_regex(
+ (r'\bviewCount\s*:\s*["\']([\d,.]+)', r'video_view_count["\']\s*:\s*(\d+)',),
+ webpage, 'view count', default=None)),
+ 'concurrent_view_count': get_first(post, (
+ ('video', (..., ..., 'attachments', ..., 'media')), 'liveViewerCount', {int_or_none})),
+ }
+
+ info_json_ld = self._search_json_ld(webpage, video_id, default={})
+ info_json_ld['title'] = (re.sub(r'\s*\|\s*Facebook$', '', title or info_json_ld.get('title') or page_title or '')
+ or (description or '').replace('\n', ' ') or f'Facebook video #{video_id}')
+ return merge_dicts(info_json_ld, info_dict)
+
+ video_data = None
+
+ def extract_video_data(instances):
+ video_data = []
+ for item in instances:
+ if try_get(item, lambda x: x[1][0]) == 'VideoConfig':
+ video_item = item[2][0]
+ if video_item.get('video_id'):
+ video_data.append(video_item['videoData'])
+ return video_data
+
+ server_js_data = self._parse_json(self._search_regex(
+ [r'handleServerJS\(({.+})(?:\);|,")', r'\bs\.handle\(({.+?})\);'],
+ webpage, 'server js data', default='{}'), video_id, fatal=False)
+
+ if server_js_data:
+ video_data = extract_video_data(server_js_data.get('instances', []))
+
+ def extract_from_jsmods_instances(js_data):
+ if js_data:
+ return extract_video_data(try_get(
+ js_data, lambda x: x['jsmods']['instances'], list) or [])
+
+ def extract_dash_manifest(video, formats):
+ dash_manifest = video.get('dash_manifest')
+ if dash_manifest:
+ formats.extend(self._parse_mpd_formats(
+ compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)),
+ mpd_url=video.get('dash_manifest_url')))
+
+ def process_formats(info):
+ # Downloads with browser's User-Agent are rate limited. Working around
+ # with non-browser User-Agent.
+ for f in info['formats']:
+ # Downloads with browser's User-Agent are rate limited. Working around
+ # with non-browser User-Agent.
+ f.setdefault('http_headers', {})['User-Agent'] = 'facebookexternalhit/1.1'
+ # Formats larger than ~500MB will return error 403 unless chunk size is regulated
+ f.setdefault('downloader_options', {})['http_chunk_size'] = 250 << 20
+
+ def extract_relay_data(_filter):
+ return self._parse_json(self._search_regex(
+ r'data-sjs>({.*?%s.*?})</script>' % _filter,
+ webpage, 'replay data', default='{}'), video_id, fatal=False) or {}
+
+ def extract_relay_prefetched_data(_filter):
+ return traverse_obj(extract_relay_data(_filter), (
+ 'require', (None, (..., ..., ..., '__bbox', 'require')),
+ lambda _, v: any(key.startswith('RelayPrefetchedStreamCache') for key in v),
+ ..., ..., '__bbox', 'result', 'data', {dict}), get_all=False) or {}
+
+ if not video_data:
+ server_js_data = self._parse_json(self._search_regex([
+ r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+' + self._SUPPORTED_PAGLETS_REGEX,
+ r'bigPipe\.onPageletArrive\(({.*?id\s*:\s*"%s".*?})\);' % self._SUPPORTED_PAGLETS_REGEX
+ ], webpage, 'js data', default='{}'), video_id, js_to_json, False)
+ video_data = extract_from_jsmods_instances(server_js_data)
+
+ if not video_data:
+ data = extract_relay_prefetched_data(
+ r'"(?:dash_manifest|playable_url(?:_quality_hd)?)')
+ if data:
+ entries = []
+
+ def parse_graphql_video(video):
+ v_id = video.get('videoId') or video.get('id') or video_id
+ reel_info = traverse_obj(
+ video, ('creation_story', 'short_form_video_context', 'playback_video', {dict}))
+ if reel_info:
+ video = video['creation_story']
+ video['owner'] = traverse_obj(video, ('short_form_video_context', 'video_owner'))
+ video.update(reel_info)
+ formats = []
+ q = qualities(['sd', 'hd'])
+ for key, format_id in (('playable_url', 'sd'), ('playable_url_quality_hd', 'hd'),
+ ('playable_url_dash', ''), ('browser_native_hd_url', 'hd'),
+ ('browser_native_sd_url', 'sd')):
+ playable_url = video.get(key)
+ if not playable_url:
+ continue
+ if determine_ext(playable_url) == 'mpd':
+ formats.extend(self._extract_mpd_formats(playable_url, video_id))
+ else:
+ formats.append({
+ 'format_id': format_id,
+ # sd, hd formats w/o resolution info should be deprioritized below DASH
+ 'quality': q(format_id) - 3,
+ 'url': playable_url,
+ })
+ extract_dash_manifest(video, formats)
+
+ automatic_captions, subtitles = {}, {}
+ is_broadcast = traverse_obj(video, ('is_video_broadcast', {bool}))
+ for caption in traverse_obj(video, (
+ 'video_available_captions_locales',
+ {lambda x: sorted(x, key=lambda c: c['locale'])},
+ lambda _, v: url_or_none(v['captions_url'])
+ )):
+ lang = caption.get('localized_language') or 'und'
+ subs = {
+ 'url': caption['captions_url'],
+ 'name': format_field(caption, 'localized_country', f'{lang} (%s)', default=lang),
+ }
+ if caption.get('localized_creation_method') or is_broadcast:
+ automatic_captions.setdefault(caption['locale'], []).append(subs)
+ else:
+ subtitles.setdefault(caption['locale'], []).append(subs)
+ captions_url = traverse_obj(video, ('captions_url', {url_or_none}))
+ if captions_url and not automatic_captions and not subtitles:
+ locale = self._html_search_meta(
+ ['og:locale', 'twitter:locale'], webpage, 'locale', default='en_US')
+ (automatic_captions if is_broadcast else subtitles)[locale] = [{'url': captions_url}]
+
+ info = {
+ 'id': v_id,
+ 'formats': formats,
+ 'thumbnail': traverse_obj(
+ video, ('thumbnailImage', 'uri'), ('preferred_thumbnail', 'image', 'uri')),
+ 'uploader_id': traverse_obj(video, ('owner', 'id', {str_or_none})),
+ 'timestamp': traverse_obj(video, 'publish_time', 'creation_time', expected_type=int_or_none),
+ 'duration': (float_or_none(video.get('playable_duration_in_ms'), 1000)
+ or float_or_none(video.get('length_in_second'))),
+ 'automatic_captions': automatic_captions,
+ 'subtitles': subtitles,
+ }
+ process_formats(info)
+ description = try_get(video, lambda x: x['savable_description']['text'])
+ title = video.get('name')
+ if title:
+ info.update({
+ 'title': title,
+ 'description': description,
+ })
+ else:
+ info['title'] = description or 'Facebook video #%s' % v_id
+ entries.append(info)
+
+ def parse_attachment(attachment, key='media'):
+ media = attachment.get(key) or {}
+ if media.get('__typename') == 'Video':
+ return parse_graphql_video(media)
+
+ nodes = variadic(traverse_obj(data, 'nodes', 'node') or [])
+ attachments = traverse_obj(nodes, (
+ ..., 'comet_sections', 'content', 'story', (None, 'attached_story'), 'attachments',
+ ..., ('styles', 'style_type_renderer', ('throwbackStyles', 'attachment_target_renderer')),
+ 'attachment', {dict}))
+ for attachment in attachments:
+ ns = traverse_obj(attachment, ('all_subattachments', 'nodes', ..., {dict}),
+ ('target', 'attachments', ..., 'styles', 'attachment', {dict}))
+ for n in ns:
+ parse_attachment(n)
+ parse_attachment(attachment)
+
+ edges = try_get(data, lambda x: x['mediaset']['currMedia']['edges'], list) or []
+ for edge in edges:
+ parse_attachment(edge, key='node')
+
+ video = traverse_obj(data, (
+ 'event', 'cover_media_renderer', 'cover_video'), 'video', expected_type=dict) or {}
+ if video:
+ attachments = try_get(video, [
+ lambda x: x['story']['attachments'],
+ lambda x: x['creation_story']['attachments']
+ ], list) or []
+ for attachment in attachments:
+ parse_attachment(attachment)
+ if not entries:
+ parse_graphql_video(video)
+
+ if len(entries) > 1:
+ return self.playlist_result(entries, video_id)
+
+ video_info = entries[0] if entries else {'id': video_id}
+ webpage_info = extract_metadata(webpage)
+ # honor precise duration in video info
+ if video_info.get('duration'):
+ webpage_info['duration'] = video_info['duration']
+ # preserve preferred_thumbnail in video info
+ if video_info.get('thumbnail'):
+ webpage_info['thumbnail'] = video_info['thumbnail']
+ return merge_dicts(webpage_info, video_info)
+
+ if not video_data:
+ m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage)
+ if m_msg is not None:
+ raise ExtractorError(
+ 'The video is not available, Facebook said: "%s"' % m_msg.group(1),
+ expected=True)
+ elif any(p in webpage for p in (
+ '>You must log in to continue',
+ 'id="login_form"',
+ 'id="loginbutton"')):
+ self.raise_login_required()
+
+ if not video_data and '/watchparty/' in url:
+ post_data = {
+ 'doc_id': 3731964053542869,
+ 'variables': json.dumps({
+ 'livingRoomID': video_id,
+ }),
+ }
+
+ prefetched_data = extract_relay_prefetched_data(r'"login_data"\s*:\s*{')
+ if prefetched_data:
+ lsd = try_get(prefetched_data, lambda x: x['login_data']['lsd'], dict)
+ if lsd:
+ post_data[lsd['name']] = lsd['value']
+
+ relay_data = extract_relay_data(r'\[\s*"RelayAPIConfigDefaults"\s*,')
+ for define in (relay_data.get('define') or []):
+ if define[0] == 'RelayAPIConfigDefaults':
+ self._api_config = define[2]
+
+ living_room = self._download_json(
+ urljoin(url, self._api_config['graphURI']), video_id,
+ data=urlencode_postdata(post_data))['data']['living_room']
+
+ entries = []
+ for edge in (try_get(living_room, lambda x: x['recap']['watched_content']['edges']) or []):
+ video = try_get(edge, lambda x: x['node']['video']) or {}
+ v_id = video.get('id')
+ if not v_id:
+ continue
+ v_id = compat_str(v_id)
+ entries.append(self.url_result(
+ self._VIDEO_PAGE_TEMPLATE % v_id,
+ self.ie_key(), v_id, video.get('name')))
+
+ return self.playlist_result(entries, video_id)
+
+ if not video_data:
+ # Video info not in first request, do a secondary request using
+ # tahoe player specific URL
+ tahoe_data = self._download_webpage(
+ self._VIDEO_PAGE_TAHOE_TEMPLATE % video_id, video_id,
+ data=urlencode_postdata({
+ '__a': 1,
+ '__pc': self._search_regex(
+ r'pkg_cohort["\']\s*:\s*["\'](.+?)["\']', webpage,
+ 'pkg cohort', default='PHASED:DEFAULT'),
+ '__rev': self._search_regex(
+ r'client_revision["\']\s*:\s*(\d+),', webpage,
+ 'client revision', default='3944515'),
+ 'fb_dtsg': self._search_regex(
+ r'"DTSGInitialData"\s*,\s*\[\]\s*,\s*{\s*"token"\s*:\s*"([^"]+)"',
+ webpage, 'dtsg token', default=''),
+ }),
+ headers={
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ })
+ tahoe_js_data = self._parse_json(
+ self._search_regex(
+ r'for\s+\(\s*;\s*;\s*\)\s*;(.+)', tahoe_data,
+ 'tahoe js data', default='{}'),
+ video_id, fatal=False)
+ video_data = extract_from_jsmods_instances(tahoe_js_data)
+
+ if not video_data:
+ raise ExtractorError('Cannot parse data')
+
+ if len(video_data) > 1:
+ entries = []
+ for v in video_data:
+ video_url = v[0].get('video_url')
+ if not video_url:
+ continue
+ entries.append(self.url_result(urljoin(
+ url, video_url), self.ie_key(), v[0].get('video_id')))
+ return self.playlist_result(entries, video_id)
+ video_data = video_data[0]
+
+ formats = []
+ subtitles = {}
+ for f in video_data:
+ format_id = f['stream_type']
+ if f and isinstance(f, dict):
+ f = [f]
+ if not f or not isinstance(f, list):
+ continue
+ for quality in ('sd', 'hd'):
+ for src_type in ('src', 'src_no_ratelimit'):
+ src = f[0].get('%s_%s' % (quality, src_type))
+ if src:
+ # sd, hd formats w/o resolution info should be deprioritized below DASH
+ # TODO: investigate if progressive or src formats still exist
+ preference = -10 if format_id == 'progressive' else -3
+ if quality == 'hd':
+ preference += 1
+ formats.append({
+ 'format_id': '%s_%s_%s' % (format_id, quality, src_type),
+ 'url': src,
+ 'quality': preference,
+ 'height': 720 if quality == 'hd' else None
+ })
+ extract_dash_manifest(f[0], formats)
+ subtitles_src = f[0].get('subtitles_src')
+ if subtitles_src:
+ subtitles.setdefault('en', []).append({'url': subtitles_src})
+
+ info_dict = {
+ 'id': video_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+ process_formats(info_dict)
+ info_dict.update(extract_metadata(webpage))
+
+ return info_dict
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ real_url = self._VIDEO_PAGE_TEMPLATE % video_id if url.startswith('facebook:') else url
+ return self._extract_from_url(real_url, video_id)
+
+
+class FacebookPluginsVideoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[\w-]+\.)?facebook\.com/plugins/video\.php\?.*?\bhref=(?P<id>https.+)'
+
+ _TESTS = [{
+ 'url': 'https://www.facebook.com/plugins/video.php?href=https%3A%2F%2Fwww.facebook.com%2Fgov.sg%2Fvideos%2F10154383743583686%2F&show_text=0&width=560',
+ 'md5': '5954e92cdfe51fe5782ae9bda7058a07',
+ 'info_dict': {
+ 'id': '10154383743583686',
+ 'ext': 'mp4',
+ # TODO: Fix title, uploader
+ 'title': 'What to do during the haze?',
+ 'uploader': 'Gov.sg',
+ 'upload_date': '20160826',
+ 'timestamp': 1472184808,
+ },
+ 'add_ie': [FacebookIE.ie_key()],
+ }, {
+ 'url': 'https://www.facebook.com/plugins/video.php?href=https%3A%2F%2Fwww.facebook.com%2Fvideo.php%3Fv%3D10204634152394104',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.facebook.com/plugins/video.php?href=https://www.facebook.com/gov.sg/videos/10154383743583686/&show_text=0&width=560',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ return self.url_result(
+ compat_urllib_parse_unquote(self._match_id(url)),
+ FacebookIE.ie_key())
+
+
+class FacebookRedirectURLIE(InfoExtractor):
+ IE_DESC = False # Do not list
+ _VALID_URL = r'https?://(?:[\w-]+\.)?facebook\.com/flx/warn[/?]'
+ _TESTS = [{
+ 'url': 'https://www.facebook.com/flx/warn/?h=TAQHsoToz&u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&s=1',
+ 'info_dict': {
+ 'id': 'pO8h3EaFRdo',
+ 'ext': 'mp4',
+ 'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
+ 'description': 'md5:2d713ccbb45b686a1888397b2c77ca6b',
+ 'channel_id': 'UCGBpxWJr9FNOcFYA5GkKrMg',
+ 'playable_in_embed': True,
+ 'categories': ['Music'],
+ 'channel': 'Boiler Room',
+ 'uploader_id': 'brtvofficial',
+ 'uploader': 'Boiler Room',
+ 'tags': 'count:11',
+ 'duration': 3332,
+ 'live_status': 'not_live',
+ 'thumbnail': 'https://i.ytimg.com/vi/pO8h3EaFRdo/maxresdefault.jpg',
+ 'channel_url': 'https://www.youtube.com/channel/UCGBpxWJr9FNOcFYA5GkKrMg',
+ 'availability': 'public',
+ 'uploader_url': 'http://www.youtube.com/user/brtvofficial',
+ 'upload_date': '20150917',
+ 'age_limit': 0,
+ 'view_count': int,
+ 'like_count': int,
+ },
+ 'add_ie': ['Youtube'],
+ 'params': {'skip_download': 'Youtube'},
+ }]
+
+ def _real_extract(self, url):
+ redirect_url = url_or_none(parse_qs(url).get('u', [None])[-1])
+ if not redirect_url:
+ raise ExtractorError('Invalid facebook redirect URL', expected=True)
+ return self.url_result(redirect_url)
+
+
+class FacebookReelIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[\w-]+\.)?facebook\.com/reel/(?P<id>\d+)'
+ IE_NAME = 'facebook:reel'
+
+ _TESTS = [{
+ 'url': 'https://www.facebook.com/reel/1195289147628387',
+ 'md5': 'f13dd37f2633595982db5ed8765474d3',
+ 'info_dict': {
+ 'id': '1195289147628387',
+ 'ext': 'mp4',
+ 'title': 'md5:b05800b5b1ad56c0ca78bd3807b6a61e',
+ 'description': 'md5:22f03309b216ac84720183961441d8db',
+ 'uploader': 'md5:723e6cb3091241160f20b3c5dc282af1',
+ 'uploader_id': '100040874179269',
+ 'duration': 9.579,
+ 'timestamp': 1637502609,
+ 'upload_date': '20211121',
+ 'thumbnail': r're:^https?://.*',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self.url_result(
+ f'https://m.facebook.com/watch/?v={video_id}&_rdr', FacebookIE, video_id)
+
+
+class FacebookAdsIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[\w-]+\.)?facebook\.com/ads/library/?\?(?:[^#]+&)?id=(?P<id>\d+)'
+ IE_NAME = 'facebook:ads'
+
+ _TESTS = [{
+ 'url': 'https://www.facebook.com/ads/library/?id=899206155126718',
+ 'info_dict': {
+ 'id': '899206155126718',
+ 'ext': 'mp4',
+ 'title': 'video by Kandao',
+ 'uploader': 'Kandao',
+ 'uploader_id': '774114102743284',
+ 'uploader_url': r're:^https?://.*',
+ 'timestamp': 1702548330,
+ 'thumbnail': r're:^https?://.*',
+ 'upload_date': '20231214',
+ 'like_count': int,
+ }
+ }, {
+ 'url': 'https://www.facebook.com/ads/library/?id=893637265423481',
+ 'info_dict': {
+ 'id': '893637265423481',
+ 'title': 'Jusqu\u2019\u00e0 -25% sur une s\u00e9lection de vins p\u00e9tillants italiens ',
+ 'uploader': 'Eataly Paris Marais',
+ 'uploader_id': '2086668958314152',
+ 'uploader_url': r're:^https?://.*',
+ 'timestamp': 1703571529,
+ 'upload_date': '20231226',
+ 'like_count': int,
+ },
+ 'playlist_count': 3,
+ }, {
+ 'url': 'https://es-la.facebook.com/ads/library/?id=901230958115569',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://m.facebook.com/ads/library/?id=901230958115569',
+ 'only_matching': True,
+ }]
+
+ _FORMATS_MAP = {
+ 'watermarked_video_sd_url': ('sd-wmk', 'SD, watermarked'),
+ 'video_sd_url': ('sd', None),
+ 'watermarked_video_hd_url': ('hd-wmk', 'HD, watermarked'),
+ 'video_hd_url': ('hd', None),
+ }
+
+ def _extract_formats(self, video_dict):
+ formats = []
+ for format_key, format_url in traverse_obj(video_dict, (
+ {dict.items}, lambda _, v: v[0] in self._FORMATS_MAP and url_or_none(v[1])
+ )):
+ formats.append({
+ 'format_id': self._FORMATS_MAP[format_key][0],
+ 'format_note': self._FORMATS_MAP[format_key][1],
+ 'url': format_url,
+ 'ext': 'mp4',
+ 'quality': qualities(tuple(self._FORMATS_MAP))(format_key),
+ })
+ return formats
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ post_data = [self._parse_json(j, video_id, fatal=False)
+ for j in re.findall(r's\.handle\(({.*})\);requireLazy\(', webpage)]
+ data = traverse_obj(post_data, (
+ ..., 'require', ..., ..., ..., 'props', 'deeplinkAdCard', 'snapshot', {dict}), get_all=False)
+ if not data:
+ raise ExtractorError('Unable to extract ad data')
+
+ title = data.get('title')
+ if not title or title == '{{product.name}}':
+ title = join_nonempty('display_format', 'page_name', delim=' by ', from_dict=data)
+
+ info_dict = traverse_obj(data, {
+ 'description': ('link_description', {str}, {lambda x: x if x != '{{product.description}}' else None}),
+ 'uploader': ('page_name', {str}),
+ 'uploader_id': ('page_id', {str_or_none}),
+ 'uploader_url': ('page_profile_uri', {url_or_none}),
+ 'timestamp': ('creation_time', {int_or_none}),
+ 'like_count': ('page_like_count', {int_or_none}),
+ })
+
+ entries = []
+ for idx, entry in enumerate(traverse_obj(
+ data, (('videos', 'cards'), lambda _, v: any([url_or_none(v[f]) for f in self._FORMATS_MAP]))), 1
+ ):
+ entries.append({
+ 'id': f'{video_id}_{idx}',
+ 'title': entry.get('title') or title,
+ 'description': entry.get('link_description') or info_dict.get('description'),
+ 'thumbnail': url_or_none(entry.get('video_preview_image_url')),
+ 'formats': self._extract_formats(entry),
+ })
+
+ if len(entries) == 1:
+ info_dict.update(entries[0])
+
+ elif len(entries) > 1:
+ info_dict.update({
+ 'title': entries[0]['title'],
+ 'entries': entries,
+ '_type': 'playlist',
+ })
+
+ info_dict['id'] = video_id
+
+ return info_dict
diff --git a/yt_dlp/extractor/fancode.py b/yt_dlp/extractor/fancode.py
new file mode 100644
index 0000000..cddf254
--- /dev/null
+++ b/yt_dlp/extractor/fancode.py
@@ -0,0 +1,181 @@
+from .common import InfoExtractor
+
+from ..compat import compat_str
+from ..utils import (
+ parse_iso8601,
+ ExtractorError,
+ try_get,
+ mimetype2ext
+)
+
+
+class FancodeVodIE(InfoExtractor):
+ _WORKING = False
+ IE_NAME = 'fancode:vod'
+
+ _VALID_URL = r'https?://(?:www\.)?fancode\.com/video/(?P<id>[0-9]+)\b'
+
+ _TESTS = [{
+ 'url': 'https://fancode.com/video/15043/match-preview-pbks-vs-mi',
+ 'params': {
+ 'skip_download': True,
+ },
+ 'info_dict': {
+ 'id': '6249806281001',
+ 'ext': 'mp4',
+ 'title': 'Match Preview: PBKS vs MI',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ "timestamp": 1619081590,
+ 'view_count': int,
+ 'like_count': int,
+ 'upload_date': '20210422',
+ 'uploader_id': '6008340455001'
+ }
+ }, {
+ 'url': 'https://fancode.com/video/15043',
+ 'only_matching': True,
+ }]
+
+ _ACCESS_TOKEN = None
+ _NETRC_MACHINE = 'fancode'
+
+ _LOGIN_HINT = 'Use "--username refresh --password <refresh_token>" to login using a refresh token'
+
+ headers = {
+ 'content-type': 'application/json',
+ 'origin': 'https://fancode.com',
+ 'referer': 'https://fancode.com',
+ }
+
+ def _perform_login(self, username, password):
+ # Access tokens are shortlived, so get them using the refresh token.
+ if username != 'refresh':
+ self.report_warning(f'Login using username and password is not currently supported. {self._LOGIN_HINT}')
+
+ self.report_login()
+ data = '''{
+ "query":"mutation RefreshToken($refreshToken: String\\u0021) { refreshToken(refreshToken: $refreshToken) { accessToken }}",
+ "variables":{
+ "refreshToken":"%s"
+ },
+ "operationName":"RefreshToken"
+ }''' % password
+
+ token_json = self.download_gql('refresh token', data, "Getting the Access token")
+ self._ACCESS_TOKEN = try_get(token_json, lambda x: x['data']['refreshToken']['accessToken'])
+ if self._ACCESS_TOKEN is None:
+ self.report_warning('Failed to get Access token')
+ else:
+ self.headers.update({'Authorization': 'Bearer %s' % self._ACCESS_TOKEN})
+
+ def _check_login_required(self, is_available, is_premium):
+ msg = None
+ if is_premium and self._ACCESS_TOKEN is None:
+ msg = f'This video is only available for registered users. {self._LOGIN_HINT}'
+ elif not is_available and self._ACCESS_TOKEN is not None:
+ msg = 'This video isn\'t available to the current logged in account'
+ if msg:
+ self.raise_login_required(msg, metadata_available=True, method=None)
+
+ def download_gql(self, variable, data, note, fatal=False, headers=headers):
+ return self._download_json(
+ 'https://www.fancode.com/graphql', variable,
+ data=data.encode(), note=note,
+ headers=headers, fatal=fatal)
+
+ def _real_extract(self, url):
+
+ BRIGHTCOVE_URL_TEMPLATE = 'https://players.brightcove.net/%s/default_default/index.html?videoId=%s'
+ video_id = self._match_id(url)
+
+ brightcove_user_id = '6008340455001'
+ data = '''{
+ "query":"query Video($id: Int\\u0021, $filter: SegmentFilter) { media(id: $id, filter: $filter) { id contentId title contentId publishedTime totalViews totalUpvotes provider thumbnail { src } mediaSource {brightcove } duration isPremium isUserEntitled tags duration }}",
+ "variables":{
+ "id":%s,
+ "filter":{
+ "contentDataType":"DEFAULT"
+ }
+ },
+ "operationName":"Video"
+ }''' % video_id
+
+ metadata_json = self.download_gql(video_id, data, note='Downloading metadata')
+
+ media = try_get(metadata_json, lambda x: x['data']['media'], dict) or {}
+ brightcove_video_id = try_get(media, lambda x: x['mediaSource']['brightcove'], compat_str)
+
+ if brightcove_video_id is None:
+ raise ExtractorError('Unable to extract brightcove Video ID')
+
+ is_premium = media.get('isPremium')
+
+ self._check_login_required(media.get('isUserEntitled'), is_premium)
+
+ return {
+ '_type': 'url_transparent',
+ 'url': BRIGHTCOVE_URL_TEMPLATE % (brightcove_user_id, brightcove_video_id),
+ 'ie_key': 'BrightcoveNew',
+ 'id': video_id,
+ 'title': media['title'],
+ 'like_count': media.get('totalUpvotes'),
+ 'view_count': media.get('totalViews'),
+ 'tags': media.get('tags'),
+ 'release_timestamp': parse_iso8601(media.get('publishedTime')),
+ 'availability': self._availability(needs_premium=is_premium),
+ }
+
+
+class FancodeLiveIE(FancodeVodIE): # XXX: Do not subclass from concrete IE
+ _WORKING = False
+ IE_NAME = 'fancode:live'
+
+ _VALID_URL = r'https?://(www\.)?fancode\.com/match/(?P<id>[0-9]+).+'
+
+ _TESTS = [{
+ 'url': 'https://fancode.com/match/35328/cricket-fancode-ecs-hungary-2021-bub-vs-blb?slug=commentary',
+ 'info_dict': {
+ 'id': '35328',
+ 'ext': 'mp4',
+ 'title': 'BUB vs BLB',
+ "timestamp": 1624863600,
+ 'is_live': True,
+ 'upload_date': '20210628',
+ },
+ 'skip': 'Ended'
+ }, {
+ 'url': 'https://fancode.com/match/35328/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://fancode.com/match/35567?slug=scorecard',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+
+ id = self._match_id(url)
+ data = '''{
+ "query":"query MatchResponse($id: Int\\u0021, $isLoggedIn: Boolean\\u0021) { match: matchWithScores(id: $id) { id matchDesc mediaId videoStreamId videoStreamUrl { ...VideoSource } liveStreams { videoStreamId videoStreamUrl { ...VideoSource } contentId } name startTime streamingStatus isPremium isUserEntitled @include(if: $isLoggedIn) status metaTags bgImage { src } sport { name slug } tour { id name } squads { name shortName } liveStreams { contentId } mediaId }}fragment VideoSource on VideoSource { title description posterUrl url deliveryType playerType}",
+ "variables":{
+ "id":%s,
+ "isLoggedIn":true
+ },
+ "operationName":"MatchResponse"
+ }''' % id
+
+ info_json = self.download_gql(id, data, "Info json")
+
+ match_info = try_get(info_json, lambda x: x['data']['match'])
+
+ if match_info.get('streamingStatus') != "STARTED":
+ raise ExtractorError('The stream can\'t be accessed', expected=True)
+ self._check_login_required(match_info.get('isUserEntitled'), True) # all live streams are premium only
+
+ return {
+ 'id': id,
+ 'title': match_info.get('name'),
+ 'formats': self._extract_akamai_formats(try_get(match_info, lambda x: x['videoStreamUrl']['url']), id),
+ 'ext': mimetype2ext(try_get(match_info, lambda x: x['videoStreamUrl']['deliveryType'])),
+ 'is_live': True,
+ 'release_timestamp': parse_iso8601(match_info.get('startTime'))
+ }
diff --git a/yt_dlp/extractor/faz.py b/yt_dlp/extractor/faz.py
new file mode 100644
index 0000000..bca62ad
--- /dev/null
+++ b/yt_dlp/extractor/faz.py
@@ -0,0 +1,89 @@
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_etree_fromstring
+from ..utils import (
+ xpath_element,
+ xpath_text,
+ int_or_none,
+)
+
+
+class FazIE(InfoExtractor):
+ IE_NAME = 'faz.net'
+ _VALID_URL = r'https?://(?:www\.)?faz\.net/(?:[^/]+/)*.*?-(?P<id>\d+)\.html'
+
+ _TESTS = [{
+ 'url': 'http://www.faz.net/multimedia/videos/stockholm-chemie-nobelpreis-fuer-drei-amerikanische-forscher-12610585.html',
+ 'info_dict': {
+ 'id': '12610585',
+ 'ext': 'mp4',
+ 'title': 'Stockholm: Chemie-Nobelpreis für drei amerikanische Forscher',
+ 'description': 'md5:1453fbf9a0d041d985a47306192ea253',
+ },
+ }, {
+ 'url': 'http://www.faz.net/aktuell/politik/berlin-gabriel-besteht-zerreissprobe-ueber-datenspeicherung-13659345.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.faz.net/berlin-gabriel-besteht-zerreissprobe-ueber-datenspeicherung-13659345.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.faz.net/-13659345.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.faz.net/aktuell/politik/-13659345.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.faz.net/foobarblafasel-13659345.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+ description = self._og_search_description(webpage)
+ media = self._html_search_regex(
+ r"data-videojs-media='([^']+)",
+ webpage, 'media')
+ if media == 'extern':
+ perform_url = self._search_regex(
+ r"<iframe[^>]+?src='((?:http:)?//player\.performgroup\.com/eplayer/eplayer\.html#/?[0-9a-f]{26}\.[0-9a-z]{26})",
+ webpage, 'perform url')
+ return self.url_result(perform_url)
+ config = compat_etree_fromstring(media)
+
+ encodings = xpath_element(config, 'ENCODINGS', 'encodings', True)
+ formats = []
+ for pref, code in enumerate(['LOW', 'HIGH', 'HQ']):
+ encoding = xpath_element(encodings, code)
+ if encoding is not None:
+ encoding_url = xpath_text(encoding, 'FILENAME')
+ if encoding_url:
+ tbr = xpath_text(encoding, 'AVERAGEBITRATE', 1000)
+ if tbr:
+ tbr = int_or_none(tbr.replace(',', '.'))
+ f = {
+ 'url': encoding_url,
+ 'format_id': code.lower(),
+ 'quality': pref,
+ 'tbr': tbr,
+ 'vcodec': xpath_text(encoding, 'CODEC'),
+ }
+ mobj = re.search(r'(\d+)x(\d+)_(\d+)\.mp4', encoding_url)
+ if mobj:
+ f.update({
+ 'width': int(mobj.group(1)),
+ 'height': int(mobj.group(2)),
+ 'tbr': tbr or int(mobj.group(3)),
+ })
+ formats.append(f)
+
+ return {
+ 'id': video_id,
+ 'title': self._og_search_title(webpage),
+ 'formats': formats,
+ 'description': description.strip() if description else None,
+ 'thumbnail': xpath_text(config, 'STILL/STILL_BIG'),
+ 'duration': int_or_none(xpath_text(config, 'DURATION')),
+ }
diff --git a/yt_dlp/extractor/fc2.py b/yt_dlp/extractor/fc2.py
new file mode 100644
index 0000000..bbc4b56
--- /dev/null
+++ b/yt_dlp/extractor/fc2.py
@@ -0,0 +1,280 @@
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_parse_qs
+from ..networking import Request
+from ..utils import (
+ ExtractorError,
+ js_to_json,
+ traverse_obj,
+ update_url_query,
+ urlencode_postdata,
+ urljoin,
+)
+
+
+class FC2IE(InfoExtractor):
+ _VALID_URL = r'^(?:https?://video\.fc2\.com/(?:[^/]+/)*content/|fc2:)(?P<id>[^/]+)'
+ IE_NAME = 'fc2'
+ _NETRC_MACHINE = 'fc2'
+ _TESTS = [{
+ 'url': 'http://video.fc2.com/en/content/20121103kUan1KHs',
+ 'md5': 'a6ebe8ebe0396518689d963774a54eb7',
+ 'info_dict': {
+ 'id': '20121103kUan1KHs',
+ 'ext': 'flv',
+ 'title': 'Boxing again with Puff',
+ },
+ }, {
+ 'url': 'http://video.fc2.com/en/content/20150125cEva0hDn/',
+ 'info_dict': {
+ 'id': '20150125cEva0hDn',
+ 'ext': 'mp4',
+ },
+ 'params': {
+ 'username': 'ytdl@yt-dl.org',
+ 'password': '(snip)',
+ },
+ 'skip': 'requires actual password',
+ }, {
+ 'url': 'http://video.fc2.com/en/a/content/20130926eZpARwsF',
+ 'only_matching': True,
+ }]
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None or password is None:
+ return False
+
+ # Log in
+ login_form_strs = {
+ 'email': username,
+ 'password': password,
+ 'done': 'video',
+ 'Submit': ' Login ',
+ }
+
+ login_data = urlencode_postdata(login_form_strs)
+ request = Request(
+ 'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data)
+
+ login_results = self._download_webpage(request, None, note='Logging in', errnote='Unable to log in')
+ if 'mode=redirect&login=done' not in login_results:
+ self.report_warning('unable to log in: bad username or password')
+ return False
+
+ # this is also needed
+ login_redir = Request('http://id.fc2.com/?mode=redirect&login=done')
+ self._download_webpage(
+ login_redir, None, note='Login redirect', errnote='Login redirect failed')
+
+ return True
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ self._login()
+ webpage = None
+ if not url.startswith('fc2:'):
+ webpage = self._download_webpage(url, video_id)
+ self.cookiejar.clear_session_cookies() # must clear
+ self._login()
+
+ title, thumbnail, description = None, None, None
+ if webpage is not None:
+ title = self._html_search_regex(
+ (r'<h2\s+class="videoCnt_title">([^<]+?)</h2>',
+ r'\s+href="[^"]+"\s*title="([^"]+?)"\s*rel="nofollow">\s*<img',
+ # there's two matches in the webpage
+ r'\s+href="[^"]+"\s*title="([^"]+?)"\s*rel="nofollow">\s*\1'),
+ webpage,
+ 'title', fatal=False)
+ thumbnail = self._og_search_thumbnail(webpage)
+ description = self._og_search_description(webpage, default=None)
+
+ vidplaylist = self._download_json(
+ 'https://video.fc2.com/api/v3/videoplaylist/%s?sh=1&fs=0' % video_id, video_id,
+ note='Downloading info page')
+ vid_url = traverse_obj(vidplaylist, ('playlist', 'nq'))
+ if not vid_url:
+ raise ExtractorError('Unable to extract video URL')
+ vid_url = urljoin('https://video.fc2.com/', vid_url)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'url': vid_url,
+ 'ext': 'mp4',
+ 'protocol': 'm3u8_native',
+ 'description': description,
+ 'thumbnail': thumbnail,
+ }
+
+
+class FC2EmbedIE(InfoExtractor):
+ _VALID_URL = r'https?://video\.fc2\.com/flv2\.swf\?(?P<query>.+)'
+ IE_NAME = 'fc2:embed'
+
+ _TEST = {
+ 'url': 'http://video.fc2.com/flv2.swf?t=201404182936758512407645&i=20130316kwishtfitaknmcgd76kjd864hso93htfjcnaogz629mcgfs6rbfk0hsycma7shkf85937cbchfygd74&i=201403223kCqB3Ez&d=2625&sj=11&lang=ja&rel=1&from=11&cmt=1&tk=TlRBM09EQTNNekU9&tl=プリズン・ブレイク%20S1-01%20マイケル%20【吹替】',
+ 'md5': 'b8aae5334cb691bdb1193a88a6ab5d5a',
+ 'info_dict': {
+ 'id': '201403223kCqB3Ez',
+ 'ext': 'flv',
+ 'title': 'プリズン・ブレイク S1-01 マイケル 【吹替】',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ query = compat_parse_qs(mobj.group('query'))
+
+ video_id = query['i'][-1]
+ title = query.get('tl', ['FC2 video %s' % video_id])[0]
+
+ sj = query.get('sj', [None])[0]
+ thumbnail = None
+ if sj:
+ # See thumbnailImagePath() in ServerConst.as of flv2.swf
+ thumbnail = 'http://video%s-thumbnail.fc2.com/up/pic/%s.jpg' % (
+ sj, '/'.join((video_id[:6], video_id[6:8], video_id[-2], video_id[-1], video_id)))
+
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': FC2IE.ie_key(),
+ 'url': 'fc2:%s' % video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ }
+
+
+class FC2LiveIE(InfoExtractor):
+ _VALID_URL = r'https?://live\.fc2\.com/(?P<id>\d+)'
+ IE_NAME = 'fc2:live'
+
+ _TESTS = [{
+ 'url': 'https://live.fc2.com/57892267/',
+ 'info_dict': {
+ 'id': '57892267',
+ 'title': 'どこまで・・・',
+ 'uploader': 'あつあげ',
+ 'uploader_id': '57892267',
+ 'thumbnail': r're:https?://.+fc2.+',
+ },
+ 'skip': 'livestream',
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage('https://live.fc2.com/%s/' % video_id, video_id)
+
+ self._set_cookie('live.fc2.com', 'js-player_size', '1')
+
+ member_api = self._download_json(
+ 'https://live.fc2.com/api/memberApi.php', video_id, data=urlencode_postdata({
+ 'channel': '1',
+ 'profile': '1',
+ 'user': '1',
+ 'streamid': video_id
+ }), note='Requesting member info')
+
+ control_server = self._download_json(
+ 'https://live.fc2.com/api/getControlServer.php', video_id, note='Downloading ControlServer data',
+ data=urlencode_postdata({
+ 'channel_id': video_id,
+ 'mode': 'play',
+ 'orz': '',
+ 'channel_version': member_api['data']['channel_data']['version'],
+ 'client_version': '2.1.0\n [1]',
+ 'client_type': 'pc',
+ 'client_app': 'browser_hls',
+ 'ipv6': '',
+ }), headers={'X-Requested-With': 'XMLHttpRequest'})
+ self._set_cookie('live.fc2.com', 'l_ortkn', control_server['orz_raw'])
+
+ ws_url = update_url_query(control_server['url'], {'control_token': control_server['control_token']})
+ playlist_data = None
+
+ ws = self._request_webpage(Request(ws_url, headers={
+ 'Origin': 'https://live.fc2.com',
+ }), video_id, note='Fetching HLS playlist info via WebSocket')
+
+ self.write_debug('Sending HLS server request')
+
+ while True:
+ recv = ws.recv()
+ if not recv:
+ continue
+ data = self._parse_json(recv, video_id, fatal=False)
+ if not data or not isinstance(data, dict):
+ continue
+
+ if data.get('name') == 'connect_complete':
+ break
+ ws.send(r'{"name":"get_hls_information","arguments":{},"id":1}')
+
+ while True:
+ recv = ws.recv()
+ if not recv:
+ continue
+ data = self._parse_json(recv, video_id, fatal=False)
+ if not data or not isinstance(data, dict):
+ continue
+ if data.get('name') == '_response_' and data.get('id') == 1:
+ self.write_debug('Goodbye')
+ playlist_data = data
+ break
+ self.write_debug('Server said: %s%s' % (recv[:100], '...' if len(recv) > 100 else ''))
+
+ if not playlist_data:
+ raise ExtractorError('Unable to fetch HLS playlist info via WebSocket')
+
+ formats = []
+ for name, playlists in playlist_data['arguments'].items():
+ if not isinstance(playlists, list):
+ continue
+ for pl in playlists:
+ if pl.get('status') == 0 and 'master_playlist' in pl.get('url'):
+ formats.extend(self._extract_m3u8_formats(
+ pl['url'], video_id, ext='mp4', m3u8_id=name, live=True,
+ headers={
+ 'Origin': 'https://live.fc2.com',
+ 'Referer': url,
+ }))
+
+ for fmt in formats:
+ fmt.update({
+ 'protocol': 'fc2_live',
+ 'ws': ws,
+ })
+
+ title = self._html_search_meta(('og:title', 'twitter:title'), webpage, 'live title', fatal=False)
+ if not title:
+ title = self._html_extract_title(webpage, 'html title', fatal=False)
+ if title:
+ # remove service name in <title>
+ title = re.sub(r'\s+-\s+.+$', '', title)
+ uploader = None
+ if title:
+ match = self._search_regex(r'^(.+?)\s*\[(.+?)\]$', title, 'title and uploader', default=None, group=(1, 2))
+ if match and all(match):
+ title, uploader = match
+
+ live_info_view = self._search_regex(r'(?s)liveInfoView\s*:\s*({.+?}),\s*premiumStateView', webpage, 'user info', fatal=False) or None
+ if live_info_view:
+ # remove jQuery code from object literal
+ live_info_view = re.sub(r'\$\(.+?\)[^,]+,', '"",', live_info_view)
+ live_info_view = self._parse_json(js_to_json(live_info_view), video_id)
+
+ return {
+ 'id': video_id,
+ 'title': title or traverse_obj(live_info_view, 'title'),
+ 'description': self._html_search_meta(
+ ('og:description', 'twitter:description'),
+ webpage, 'live description', fatal=False) or traverse_obj(live_info_view, 'info'),
+ 'formats': formats,
+ 'uploader': uploader or traverse_obj(live_info_view, 'name'),
+ 'uploader_id': video_id,
+ 'thumbnail': traverse_obj(live_info_view, 'thumb'),
+ 'is_live': True,
+ }
diff --git a/yt_dlp/extractor/fczenit.py b/yt_dlp/extractor/fczenit.py
new file mode 100644
index 0000000..8175b6b
--- /dev/null
+++ b/yt_dlp/extractor/fczenit.py
@@ -0,0 +1,51 @@
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ float_or_none,
+)
+
+
+class FczenitIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?fc-zenit\.ru/video/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://fc-zenit.ru/video/41044/',
+ 'md5': '0e3fab421b455e970fa1aa3891e57df0',
+ 'info_dict': {
+ 'id': '41044',
+ 'ext': 'mp4',
+ 'title': 'Так пишется история: казанский разгром ЦСКА на «Зенит-ТВ»',
+ 'timestamp': 1462283735,
+ 'upload_date': '20160503',
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ msi_id = self._search_regex(
+ r"(?s)config\s*=\s*{.+?video_id\s*:\s*'([^']+)'", webpage, 'msi id')
+
+ msi_data = self._download_json(
+ 'http://player.fc-zenit.ru/msi/video', msi_id, query={
+ 'video': msi_id,
+ })['data']
+ title = msi_data['name']
+
+ formats = [{
+ 'format_id': q.get('label'),
+ 'url': q['url'],
+ 'height': int_or_none(q.get('label')),
+ } for q in msi_data['qualities'] if q.get('url')]
+
+ tags = [tag['label'] for tag in msi_data.get('tags', []) if tag.get('label')]
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': msi_data.get('preview'),
+ 'formats': formats,
+ 'duration': float_or_none(msi_data.get('duration')),
+ 'timestamp': int_or_none(msi_data.get('date')),
+ 'tags': tags,
+ }
diff --git a/yt_dlp/extractor/fifa.py b/yt_dlp/extractor/fifa.py
new file mode 100644
index 0000000..f604cbd
--- /dev/null
+++ b/yt_dlp/extractor/fifa.py
@@ -0,0 +1,83 @@
+from .common import InfoExtractor
+
+from ..utils import (
+ int_or_none,
+ traverse_obj,
+ unified_timestamp,
+)
+
+
+class FifaIE(InfoExtractor):
+ _VALID_URL = r'https?://www\.fifa\.com/fifaplus/(?P<locale>\w{2})/watch/([^#?]+/)?(?P<id>\w+)'
+ _TESTS = [{
+ 'url': 'https://www.fifa.com/fifaplus/en/watch/7on10qPcnyLajDDU3ntg6y',
+ 'info_dict': {
+ 'id': '7on10qPcnyLajDDU3ntg6y',
+ 'title': 'Italy v France | Final | 2006 FIFA World Cup Germany™ | Full Match Replay',
+ 'description': 'md5:f4520d0ee80529c8ba4134a7d692ff8b',
+ 'ext': 'mp4',
+ 'categories': ['FIFA Tournaments'],
+ 'thumbnail': 'https://digitalhub.fifa.com/transform/135e2656-3a51-407b-8810-6c34bec5b59b/FMR_2006_Italy_France_Final_Hero',
+ 'duration': 8165,
+ 'release_timestamp': 1152403200,
+ 'release_date': '20060709',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://www.fifa.com/fifaplus/pt/watch/1cg5r5Qt6Qt12ilkDgb1sV',
+ 'info_dict': {
+ 'id': '1cg5r5Qt6Qt12ilkDgb1sV',
+ 'title': 'Brazil v Germany | Semi-finals | 2014 FIFA World Cup Brazil™ | Extended Highlights',
+ 'description': 'md5:d908c74ee66322b804ae2e521b02a855',
+ 'ext': 'mp4',
+ 'categories': ['FIFA Tournaments', 'Highlights'],
+ 'thumbnail': 'https://digitalhub.fifa.com/transform/d8fe6f61-276d-4a73-a7fe-6878a35fd082/FIFAPLS_100EXTHL_2014BRAvGER_TMB',
+ 'duration': 902,
+ 'release_timestamp': 1404777600,
+ 'release_date': '20140708',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://www.fifa.com/fifaplus/fr/watch/3C6gQH9C2DLwzNx7BMRQdp',
+ 'info_dict': {
+ 'id': '3C6gQH9C2DLwzNx7BMRQdp',
+ 'title': 'Josimar goal against Northern Ireland | Classic Goals',
+ 'description': 'md5:cbe7e7bb52f603c9f1fe9a4780fe983b',
+ 'ext': 'mp4',
+ 'categories': ['FIFA Tournaments', 'Goal'],
+ 'duration': 28,
+ 'thumbnail': 'https://digitalhub.fifa.com/transform/f9301391-f8d9-48b5-823e-c093ac5e3e11/CG_MEN_1986_JOSIMAR',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }]
+
+ def _real_extract(self, url):
+ video_id, locale = self._match_valid_url(url).group('id', 'locale')
+ webpage = self._download_webpage(url, video_id)
+
+ preconnect_link = self._search_regex(
+ r'<link\b[^>]+\brel\s*=\s*"preconnect"[^>]+href\s*=\s*"([^"]+)"', webpage, 'Preconnect Link')
+
+ video_details = self._download_json(
+ f'{preconnect_link}/sections/videoDetails/{video_id}', video_id, 'Downloading Video Details', fatal=False)
+
+ preplay_parameters = self._download_json(
+ f'{preconnect_link}/videoPlayerData/{video_id}', video_id, 'Downloading Preplay Parameters')['preplayParameters']
+
+ content_data = self._download_json(
+ 'https://content.uplynk.com/preplay/{contentId}/multiple.json?{queryStr}&sig={signature}'.format(**preplay_parameters),
+ video_id, 'Downloading Content Data')
+
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(content_data['playURL'], video_id)
+
+ return {
+ 'id': video_id,
+ 'title': video_details.get('title'),
+ 'description': video_details.get('description'),
+ 'duration': int_or_none(video_details.get('duration')),
+ 'release_timestamp': unified_timestamp(video_details.get('dateOfRelease')),
+ 'categories': traverse_obj(video_details, (('videoCategory', 'videoSubcategory'),)),
+ 'thumbnail': traverse_obj(video_details, ('backgroundImage', 'src')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
diff --git a/yt_dlp/extractor/filmon.py b/yt_dlp/extractor/filmon.py
new file mode 100644
index 0000000..0cd18f4
--- /dev/null
+++ b/yt_dlp/extractor/filmon.py
@@ -0,0 +1,171 @@
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ qualities,
+ strip_or_none,
+ int_or_none,
+ ExtractorError,
+)
+
+
+class FilmOnIE(InfoExtractor):
+ IE_NAME = 'filmon'
+ _VALID_URL = r'(?:https?://(?:www\.)?filmon\.com/vod/view/|filmon:)(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.filmon.com/vod/view/24869-0-plan-9-from-outer-space',
+ 'info_dict': {
+ 'id': '24869',
+ 'ext': 'mp4',
+ 'title': 'Plan 9 From Outer Space',
+ 'description': 'Dead human, zombies and vampires',
+ },
+ }, {
+ 'url': 'https://www.filmon.com/vod/view/2825-1-popeye-series-1',
+ 'info_dict': {
+ 'id': '2825',
+ 'title': 'Popeye Series 1',
+ 'description': 'The original series of Popeye.',
+ },
+ 'playlist_mincount': 8,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ try:
+ response = self._download_json(
+ 'https://www.filmon.com/api/vod/movie?id=%s' % video_id,
+ video_id)['response']
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError):
+ errmsg = self._parse_json(e.cause.response.read().decode(), video_id)['reason']
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True)
+ raise
+
+ title = response['title']
+ description = strip_or_none(response.get('description'))
+
+ if response.get('type_id') == 1:
+ entries = [self.url_result('filmon:' + episode_id) for episode_id in response.get('episodes', [])]
+ return self.playlist_result(entries, video_id, title, description)
+
+ QUALITY = qualities(('low', 'high'))
+ formats = []
+ for format_id, stream in response.get('streams', {}).items():
+ stream_url = stream.get('url')
+ if not stream_url:
+ continue
+ formats.append({
+ 'format_id': format_id,
+ 'url': stream_url,
+ 'ext': 'mp4',
+ 'quality': QUALITY(stream.get('quality')),
+ 'protocol': 'm3u8_native',
+ })
+
+ thumbnails = []
+ poster = response.get('poster', {})
+ thumbs = poster.get('thumbs', {})
+ thumbs['poster'] = poster
+ for thumb_id, thumb in thumbs.items():
+ thumb_url = thumb.get('url')
+ if not thumb_url:
+ continue
+ thumbnails.append({
+ 'id': thumb_id,
+ 'url': thumb_url,
+ 'width': int_or_none(thumb.get('width')),
+ 'height': int_or_none(thumb.get('height')),
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'description': description,
+ 'thumbnails': thumbnails,
+ }
+
+
+class FilmOnChannelIE(InfoExtractor):
+ IE_NAME = 'filmon:channel'
+ _VALID_URL = r'https?://(?:www\.)?filmon\.com/(?:tv|channel)/(?P<id>[a-z0-9-]+)'
+ _TESTS = [{
+ # VOD
+ 'url': 'http://www.filmon.com/tv/sports-haters',
+ 'info_dict': {
+ 'id': '4190',
+ 'ext': 'mp4',
+ 'title': 'Sports Haters',
+ 'description': 'md5:dabcb4c1d9cfc77085612f1a85f8275d',
+ },
+ }, {
+ # LIVE
+ 'url': 'https://www.filmon.com/channel/filmon-sports',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.filmon.com/tv/2894',
+ 'only_matching': True,
+ }]
+
+ _THUMBNAIL_RES = [
+ ('logo', 56, 28),
+ ('big_logo', 106, 106),
+ ('extra_big_logo', 300, 300),
+ ]
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+
+ try:
+ channel_data = self._download_json(
+ 'http://www.filmon.com/api-v2/channel/' + channel_id, channel_id)['data']
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError):
+ errmsg = self._parse_json(e.cause.response.read().decode(), channel_id)['message']
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True)
+ raise
+
+ channel_id = compat_str(channel_data['id'])
+ is_live = not channel_data.get('is_vod') and not channel_data.get('is_vox')
+ title = channel_data['title']
+
+ QUALITY = qualities(('low', 'high'))
+ formats = []
+ for stream in channel_data.get('streams', []):
+ stream_url = stream.get('url')
+ if not stream_url:
+ continue
+ if not is_live:
+ formats.extend(self._extract_wowza_formats(
+ stream_url, channel_id, skip_protocols=['dash', 'rtmp', 'rtsp']))
+ continue
+ quality = stream.get('quality')
+ formats.append({
+ 'format_id': quality,
+ # this is an m3u8 stream, but we are deliberately not using _extract_m3u8_formats
+ # because it doesn't have bitrate variants anyway
+ 'url': stream_url,
+ 'ext': 'mp4',
+ 'quality': QUALITY(quality),
+ })
+
+ thumbnails = []
+ for name, width, height in self._THUMBNAIL_RES:
+ thumbnails.append({
+ 'id': name,
+ 'url': 'http://static.filmon.com/assets/channels/%s/%s.png' % (channel_id, name),
+ 'width': width,
+ 'height': height,
+ })
+
+ return {
+ 'id': channel_id,
+ 'display_id': channel_data.get('alias'),
+ 'title': title,
+ 'description': channel_data.get('description'),
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ 'is_live': is_live,
+ }
diff --git a/yt_dlp/extractor/filmweb.py b/yt_dlp/extractor/filmweb.py
new file mode 100644
index 0000000..cfea1f2
--- /dev/null
+++ b/yt_dlp/extractor/filmweb.py
@@ -0,0 +1,38 @@
+from .common import InfoExtractor
+
+
+class FilmwebIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?filmweb\.no/(?P<type>trailere|filmnytt)/article(?P<id>\d+)\.ece'
+ _TEST = {
+ 'url': 'http://www.filmweb.no/trailere/article1264921.ece',
+ 'md5': 'e353f47df98e557d67edaceda9dece89',
+ 'info_dict': {
+ 'id': '13033574',
+ 'ext': 'mp4',
+ 'title': 'Det som en gang var',
+ 'upload_date': '20160316',
+ 'timestamp': 1458140101,
+ 'uploader_id': '12639966',
+ 'uploader': 'Live Roaldset',
+ }
+ }
+
+ def _real_extract(self, url):
+ article_type, article_id = self._match_valid_url(url).groups()
+ if article_type == 'filmnytt':
+ webpage = self._download_webpage(url, article_id)
+ article_id = self._search_regex(r'data-videoid="(\d+)"', webpage, 'article id')
+ embed_code = self._download_json(
+ 'https://www.filmweb.no/template_v2/ajax/json_trailerEmbed.jsp',
+ article_id, query={
+ 'articleId': article_id,
+ })['embedCode']
+ iframe_url = self._proto_relative_url(self._search_regex(
+ r'<iframe[^>]+src="([^"]+)', embed_code, 'iframe url'))
+
+ return {
+ '_type': 'url_transparent',
+ 'id': article_id,
+ 'url': iframe_url,
+ 'ie_key': 'TwentyThreeVideo',
+ }
diff --git a/yt_dlp/extractor/firsttv.py b/yt_dlp/extractor/firsttv.py
new file mode 100644
index 0000000..f74bd13
--- /dev/null
+++ b/yt_dlp/extractor/firsttv.py
@@ -0,0 +1,152 @@
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urlparse,
+)
+from ..utils import (
+ int_or_none,
+ qualities,
+ unified_strdate,
+ url_or_none,
+)
+
+
+class FirstTVIE(InfoExtractor):
+ IE_NAME = '1tv'
+ IE_DESC = 'Первый канал'
+ _VALID_URL = r'https?://(?:www\.)?1tv\.ru/(?:[^/]+/)+(?P<id>[^/?#]+)'
+
+ _TESTS = [{
+ # single format
+ 'url': 'http://www.1tv.ru/shows/naedine-so-vsemi/vypuski/gost-lyudmila-senchina-naedine-so-vsemi-vypusk-ot-12-02-2015',
+ 'md5': 'a1b6b60d530ebcf8daacf4565762bbaf',
+ 'info_dict': {
+ 'id': '40049',
+ 'ext': 'mp4',
+ 'title': 'Гость Людмила Сенчина. Наедине со всеми. Выпуск от 12.02.2015',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|JPG)$',
+ 'upload_date': '20150212',
+ 'duration': 2694,
+ },
+ }, {
+ # multiple formats
+ 'url': 'http://www.1tv.ru/shows/dobroe-utro/pro-zdorove/vesennyaya-allergiya-dobroe-utro-fragment-vypuska-ot-07042016',
+ 'info_dict': {
+ 'id': '364746',
+ 'ext': 'mp4',
+ 'title': 'Весенняя аллергия. Доброе утро. Фрагмент выпуска от 07.04.2016',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|JPG)$',
+ 'upload_date': '20160407',
+ 'duration': 179,
+ 'formats': 'mincount:3',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.1tv.ru/news/issue/2016-12-01/14:00',
+ 'info_dict': {
+ 'id': '14:00',
+ 'title': 'Выпуск новостей в 14:00 1 декабря 2016 года. Новости. Первый канал',
+ 'description': 'md5:2e921b948f8c1ff93901da78ebdb1dfd',
+ },
+ 'playlist_count': 13,
+ }, {
+ 'url': 'http://www.1tv.ru/shows/tochvtoch-supersezon/vystupleniya/evgeniy-dyatlov-vladimir-vysockiy-koni-priveredlivye-toch-v-toch-supersezon-fragment-vypuska-ot-06-11-2016',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+ playlist_url = compat_urlparse.urljoin(url, self._search_regex(
+ r'data-playlist-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
+ webpage, 'playlist url', group='url'))
+
+ parsed_url = compat_urlparse.urlparse(playlist_url)
+ qs = compat_urlparse.parse_qs(parsed_url.query)
+ item_ids = qs.get('videos_ids[]') or qs.get('news_ids[]')
+
+ items = self._download_json(playlist_url, display_id)
+
+ if item_ids:
+ items = [
+ item for item in items
+ if item.get('uid') and compat_str(item['uid']) in item_ids]
+ else:
+ items = [items[0]]
+
+ entries = []
+ QUALITIES = ('ld', 'sd', 'hd', )
+
+ for item in items:
+ title = item['title']
+ quality = qualities(QUALITIES)
+ formats = []
+ path = None
+ for f in item.get('mbr', []):
+ src = url_or_none(f.get('src'))
+ if not src:
+ continue
+ tbr = int_or_none(self._search_regex(
+ r'_(\d{3,})\.mp4', src, 'tbr', default=None))
+ if not path:
+ path = self._search_regex(
+ r'//[^/]+/(.+?)_\d+\.mp4', src,
+ 'm3u8 path', default=None)
+ formats.append({
+ 'url': src,
+ 'format_id': f.get('name'),
+ 'tbr': tbr,
+ 'source_preference': quality(f.get('name')),
+ # quality metadata of http formats may be incorrect
+ 'preference': -10,
+ })
+ # m3u8 URL format is reverse engineered from [1] (search for
+ # master.m3u8). dashEdges (that is currently balancer-vod.1tv.ru)
+ # is taken from [2].
+ # 1. http://static.1tv.ru/player/eump1tv-current/eump-1tv.all.min.js?rnd=9097422834:formatted
+ # 2. http://static.1tv.ru/player/eump1tv-config/config-main.js?rnd=9097422834
+ if not path and len(formats) == 1:
+ path = self._search_regex(
+ r'//[^/]+/(.+?$)', formats[0]['url'],
+ 'm3u8 path', default=None)
+ if path:
+ if len(formats) == 1:
+ m3u8_path = ','
+ else:
+ tbrs = [compat_str(t) for t in sorted(f['tbr'] for f in formats)]
+ m3u8_path = '_,%s,%s' % (','.join(tbrs), '.mp4')
+ formats.extend(self._extract_m3u8_formats(
+ 'http://balancer-vod.1tv.ru/%s%s.urlset/master.m3u8'
+ % (path, m3u8_path),
+ display_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
+
+ thumbnail = item.get('poster') or self._og_search_thumbnail(webpage)
+ duration = int_or_none(item.get('duration') or self._html_search_meta(
+ 'video:duration', webpage, 'video duration', fatal=False))
+ upload_date = unified_strdate(self._html_search_meta(
+ 'ya:ovs:upload_date', webpage, 'upload date', default=None))
+
+ entries.append({
+ 'id': compat_str(item.get('id') or item['uid']),
+ 'thumbnail': thumbnail,
+ 'title': title,
+ 'upload_date': upload_date,
+ 'duration': int_or_none(duration),
+ 'formats': formats
+ })
+
+ title = self._html_search_regex(
+ (r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>',
+ r"'title'\s*:\s*'([^']+)'"),
+ webpage, 'title', default=None) or self._og_search_title(
+ webpage, default=None)
+ description = self._html_search_regex(
+ r'<div class="descr">\s*<div>&nbsp;</div>\s*<p>([^<]*)</p></div>',
+ webpage, 'description', default=None) or self._html_search_meta(
+ 'description', webpage, 'description', default=None)
+
+ return self.playlist_result(entries, display_id, title, description)
diff --git a/yt_dlp/extractor/fivetv.py b/yt_dlp/extractor/fivetv.py
new file mode 100644
index 0000000..1f48cfd
--- /dev/null
+++ b/yt_dlp/extractor/fivetv.py
@@ -0,0 +1,85 @@
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class FiveTVIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?5-tv\.ru/
+ (?:
+ (?:[^/]+/)+(?P<id>\d+)|
+ (?P<path>[^/?#]+)(?:[/?#])?
+ )
+ '''
+
+ _TESTS = [{
+ 'url': 'http://5-tv.ru/news/96814/',
+ 'md5': 'bbff554ad415ecf5416a2f48c22d9283',
+ 'info_dict': {
+ 'id': '96814',
+ 'ext': 'mp4',
+ 'title': 'Россияне выбрали имя для общенациональной платежной системы',
+ 'description': 'md5:a8aa13e2b7ad36789e9f77a74b6de660',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 180,
+ },
+ }, {
+ 'url': 'http://5-tv.ru/video/1021729/',
+ 'info_dict': {
+ 'id': '1021729',
+ 'ext': 'mp4',
+ 'title': '3D принтер',
+ 'description': 'md5:d76c736d29ef7ec5c0cf7d7c65ffcb41',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 180,
+ },
+ }, {
+ # redirect to https://www.5-tv.ru/projects/1000095/izvestia-glavnoe/
+ 'url': 'http://www.5-tv.ru/glavnoe/#itemDetails',
+ 'info_dict': {
+ 'id': 'glavnoe',
+ 'ext': 'mp4',
+ 'title': r're:^Итоги недели с \d+ по \d+ \w+ \d{4} года$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'skip': 'redirect to «Известия. Главное» project page',
+ }, {
+ 'url': 'http://www.5-tv.ru/glavnoe/broadcasts/508645/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://5-tv.ru/films/1507502/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://5-tv.ru/programs/broadcast/508713/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://5-tv.ru/angel/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.5-tv.ru/schedule/?iframe=true&width=900&height=450',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id') or mobj.group('path')
+
+ webpage = self._download_webpage(url, video_id)
+
+ video_url = self._search_regex(
+ [r'<div[^>]+?class="(?:flow)?player[^>]+?data-href="([^"]+)"',
+ r'<a[^>]+?href="([^"]+)"[^>]+?class="videoplayer"'],
+ webpage, 'video url')
+
+ title = self._generic_title('', webpage)
+ duration = int_or_none(self._og_search_property(
+ 'video:duration', webpage, 'duration', default=None))
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': self._og_search_description(webpage, default=None),
+ 'thumbnail': self._og_search_thumbnail(webpage, default=None),
+ 'duration': duration,
+ }
diff --git a/yt_dlp/extractor/flextv.py b/yt_dlp/extractor/flextv.py
new file mode 100644
index 0000000..f3d3eff
--- /dev/null
+++ b/yt_dlp/extractor/flextv.py
@@ -0,0 +1,62 @@
+from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ ExtractorError,
+ UserNotLive,
+ parse_iso8601,
+ str_or_none,
+ traverse_obj,
+ url_or_none,
+)
+
+
+class FlexTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?flextv\.co\.kr/channels/(?P<id>\d+)/live'
+ _TESTS = [{
+ 'url': 'https://www.flextv.co.kr/channels/231638/live',
+ 'info_dict': {
+ 'id': '231638',
+ 'ext': 'mp4',
+ 'title': r're:^214하나만\.\.\. ',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ 'upload_date': r're:\d{8}',
+ 'timestamp': int,
+ 'live_status': 'is_live',
+ 'channel': 'Hi별',
+ 'channel_id': '244396',
+ },
+ 'skip': 'The channel is offline',
+ }, {
+ 'url': 'https://www.flextv.co.kr/channels/746/live',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+
+ try:
+ stream_data = self._download_json(
+ f'https://api.flextv.co.kr/api/channels/{channel_id}/stream',
+ channel_id, query={'option': 'all'})
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 400:
+ raise UserNotLive(video_id=channel_id)
+ raise
+
+ playlist_url = stream_data['sources'][0]['url']
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ playlist_url, channel_id, 'mp4')
+
+ return {
+ 'id': channel_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'is_live': True,
+ **traverse_obj(stream_data, {
+ 'title': ('stream', 'title', {str}),
+ 'timestamp': ('stream', 'createdAt', {parse_iso8601}),
+ 'thumbnail': ('thumbUrl', {url_or_none}),
+ 'channel': ('owner', 'name', {str}),
+ 'channel_id': ('owner', 'id', {str_or_none}),
+ }),
+ }
diff --git a/yt_dlp/extractor/flickr.py b/yt_dlp/extractor/flickr.py
new file mode 100644
index 0000000..89a40d7
--- /dev/null
+++ b/yt_dlp/extractor/flickr.py
@@ -0,0 +1,114 @@
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urllib_parse_urlencode,
+)
+from ..utils import (
+ ExtractorError,
+ format_field,
+ int_or_none,
+ qualities,
+)
+
+
+class FlickrIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.|secure\.)?flickr\.com/photos/[\w\-_@]+/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/',
+ 'md5': '164fe3fa6c22e18d448d4d5af2330f31',
+ 'info_dict': {
+ 'id': '5645318632',
+ 'ext': 'mpg',
+ 'description': 'Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.',
+ 'title': 'Dark Hollow Waterfalls',
+ 'duration': 19,
+ 'timestamp': 1303528740,
+ 'upload_date': '20110423',
+ 'uploader_id': '10922353@N03',
+ 'uploader': 'Forest Wander',
+ 'uploader_url': 'https://www.flickr.com/photos/forestwander-nature-pictures/',
+ 'comment_count': int,
+ 'view_count': int,
+ 'tags': list,
+ 'license': 'Attribution-ShareAlike',
+ }
+ }
+ _API_BASE_URL = 'https://api.flickr.com/services/rest?'
+ # https://help.yahoo.com/kb/flickr/SLN25525.html
+ _LICENSES = {
+ '0': 'All Rights Reserved',
+ '1': 'Attribution-NonCommercial-ShareAlike',
+ '2': 'Attribution-NonCommercial',
+ '3': 'Attribution-NonCommercial-NoDerivs',
+ '4': 'Attribution',
+ '5': 'Attribution-ShareAlike',
+ '6': 'Attribution-NoDerivs',
+ '7': 'No known copyright restrictions',
+ '8': 'United States government work',
+ '9': 'Public Domain Dedication (CC0)',
+ '10': 'Public Domain Work',
+ }
+
+ def _call_api(self, method, video_id, api_key, note, secret=None):
+ query = {
+ 'photo_id': video_id,
+ 'method': 'flickr.%s' % method,
+ 'api_key': api_key,
+ 'format': 'json',
+ 'nojsoncallback': 1,
+ }
+ if secret:
+ query['secret'] = secret
+ data = self._download_json(self._API_BASE_URL + compat_urllib_parse_urlencode(query), video_id, note)
+ if data['stat'] != 'ok':
+ raise ExtractorError(data['message'])
+ return data
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ api_key = self._download_json(
+ 'https://www.flickr.com/hermes_error_beacon.gne', video_id,
+ 'Downloading api key')['site_key']
+
+ video_info = self._call_api(
+ 'photos.getInfo', video_id, api_key, 'Downloading video info')['photo']
+ if video_info['media'] == 'video':
+ streams = self._call_api(
+ 'video.getStreamInfo', video_id, api_key,
+ 'Downloading streams info', video_info['secret'])['streams']
+
+ preference = qualities(
+ ['288p', 'iphone_wifi', '100', '300', '700', '360p', 'appletv', '720p', '1080p', 'orig'])
+
+ formats = []
+ for stream in streams['stream']:
+ stream_type = compat_str(stream.get('type'))
+ formats.append({
+ 'format_id': stream_type,
+ 'url': stream['_content'],
+ 'quality': preference(stream_type),
+ })
+
+ owner = video_info.get('owner', {})
+ uploader_id = owner.get('nsid')
+ uploader_path = owner.get('path_alias') or uploader_id
+ uploader_url = format_field(uploader_path, None, 'https://www.flickr.com/photos/%s/')
+
+ return {
+ 'id': video_id,
+ 'title': video_info['title']['_content'],
+ 'description': video_info.get('description', {}).get('_content'),
+ 'formats': formats,
+ 'timestamp': int_or_none(video_info.get('dateuploaded')),
+ 'duration': int_or_none(video_info.get('video', {}).get('duration')),
+ 'uploader_id': uploader_id,
+ 'uploader': owner.get('realname'),
+ 'uploader_url': uploader_url,
+ 'comment_count': int_or_none(video_info.get('comments', {}).get('_content')),
+ 'view_count': int_or_none(video_info.get('views')),
+ 'tags': [tag.get('_content') for tag in video_info.get('tags', {}).get('tag', [])],
+ 'license': self._LICENSES.get(video_info.get('license')),
+ }
+ else:
+ raise ExtractorError('not a video', expected=True)
diff --git a/yt_dlp/extractor/floatplane.py b/yt_dlp/extractor/floatplane.py
new file mode 100644
index 0000000..8676d73
--- /dev/null
+++ b/yt_dlp/extractor/floatplane.py
@@ -0,0 +1,333 @@
+import functools
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ OnDemandPagedList,
+ clean_html,
+ determine_ext,
+ format_field,
+ int_or_none,
+ join_nonempty,
+ parse_codecs,
+ parse_iso8601,
+ url_or_none,
+ urljoin,
+)
+from ..utils.traversal import traverse_obj
+
+
+class FloatplaneIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:www|beta)\.)?floatplane\.com/post/(?P<id>\w+)'
+ _TESTS = [{
+ 'url': 'https://www.floatplane.com/post/2Yf3UedF7C',
+ 'info_dict': {
+ 'id': 'yuleLogLTT',
+ 'ext': 'mp4',
+ 'display_id': '2Yf3UedF7C',
+ 'title': '8K Yule Log Fireplace with Crackling Fire Sounds - 10 Hours',
+ 'description': 'md5:adf2970e0de1c5e3df447818bb0309f6',
+ 'thumbnail': r're:^https?://.*\.jpe?g$',
+ 'duration': 36035,
+ 'comment_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'release_date': '20191206',
+ 'release_timestamp': 1575657000,
+ 'uploader': 'LinusTechTips',
+ 'uploader_id': '59f94c0bdd241b70349eb72b',
+ 'uploader_url': 'https://www.floatplane.com/channel/linustechtips/home',
+ 'channel': 'Linus Tech Tips',
+ 'channel_id': '63fe42c309e691e4e36de93d',
+ 'channel_url': 'https://www.floatplane.com/channel/linustechtips/home/main',
+ 'availability': 'subscriber_only',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://www.floatplane.com/post/j2jqG3JmgJ',
+ 'info_dict': {
+ 'id': 'j2jqG3JmgJ',
+ 'title': 'TJM: Does Anyone Care About Avatar: The Way of Water?',
+ 'description': 'md5:00bf17dc5733e4031e99b7fd6489f274',
+ 'thumbnail': r're:^https?://.*\.jpe?g$',
+ 'comment_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'release_timestamp': 1671915900,
+ 'release_date': '20221224',
+ 'uploader': 'LinusTechTips',
+ 'uploader_id': '59f94c0bdd241b70349eb72b',
+ 'uploader_url': 'https://www.floatplane.com/channel/linustechtips/home',
+ 'channel': "They're Just Movies",
+ 'channel_id': '64135f82fc76ab7f9fbdc876',
+ 'channel_url': 'https://www.floatplane.com/channel/linustechtips/home/tajm',
+ 'availability': 'subscriber_only',
+ },
+ 'playlist_count': 2,
+ }, {
+ 'url': 'https://www.floatplane.com/post/3tK2tInhoN',
+ 'info_dict': {
+ 'id': '3tK2tInhoN',
+ 'title': 'Extras - How Linus Communicates with Editors (Compensator 4)',
+ 'description': 'md5:83cd40aae1ce124df33769600c80ca5b',
+ 'thumbnail': r're:^https?://.*\.jpe?g$',
+ 'comment_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'release_timestamp': 1700529120,
+ 'release_date': '20231121',
+ 'uploader': 'LinusTechTips',
+ 'uploader_id': '59f94c0bdd241b70349eb72b',
+ 'uploader_url': 'https://www.floatplane.com/channel/linustechtips/home',
+ 'channel': 'FP Exclusives',
+ 'channel_id': '6413623f5b12cca228a28e78',
+ 'channel_url': 'https://www.floatplane.com/channel/linustechtips/home/fpexclusive',
+ 'availability': 'subscriber_only',
+ },
+ 'playlist_count': 2,
+ }, {
+ 'url': 'https://beta.floatplane.com/post/d870PEFXS1',
+ 'info_dict': {
+ 'id': 'bg9SuYKEww',
+ 'ext': 'mp4',
+ 'display_id': 'd870PEFXS1',
+ 'title': 'LCS Drama, TLOU 2 Remaster, Destiny 2 Player Count Drops, + More!',
+ 'description': 'md5:80d612dcabf41b17487afcbe303ec57d',
+ 'thumbnail': r're:^https?://.*\.jpe?g$',
+ 'release_timestamp': 1700622000,
+ 'release_date': '20231122',
+ 'duration': 513,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'comment_count': int,
+ 'uploader': 'LinusTechTips',
+ 'uploader_id': '59f94c0bdd241b70349eb72b',
+ 'uploader_url': 'https://www.floatplane.com/channel/linustechtips/home',
+ 'channel': 'GameLinked',
+ 'channel_id': '649dbade3540dbc3945eeda7',
+ 'channel_url': 'https://www.floatplane.com/channel/linustechtips/home/gamelinked',
+ 'availability': 'subscriber_only',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://www.floatplane.com/post/65B5PNoBtf',
+ 'info_dict': {
+ 'id': '65B5PNoBtf',
+ 'description': 'I recorded the inbuilt demo mode for your 90\'s enjoyment, thanks for being Floaties!',
+ 'display_id': '65B5PNoBtf',
+ 'like_count': int,
+ 'release_timestamp': 1701249480,
+ 'uploader': 'The Trash Network',
+ 'availability': 'subscriber_only',
+ 'uploader_id': '61bc20c9a131fb692bf2a513',
+ 'uploader_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home',
+ 'channel_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home/thedrumthing',
+ 'comment_count': int,
+ 'title': 'The $50 electronic drum kit.',
+ 'channel_id': '64424fe73cd58cbcf8d8e131',
+ 'thumbnail': 'https://pbs.floatplane.com/blogPost_thumbnails/65B5PNoBtf/725555379422705_1701247052743.jpeg',
+ 'dislike_count': int,
+ 'channel': 'The Drum Thing',
+ 'release_date': '20231129',
+ },
+ 'playlist_count': 2,
+ 'playlist': [{
+ 'info_dict': {
+ 'id': 'ISPJjexylS',
+ 'ext': 'mp4',
+ 'release_date': '20231129',
+ 'release_timestamp': 1701249480,
+ 'title': 'The $50 electronic drum kit. .mov',
+ 'channel_id': '64424fe73cd58cbcf8d8e131',
+ 'thumbnail': 'https://pbs.floatplane.com/video_thumbnails/ISPJjexylS/335202812134041_1701249383392.jpeg',
+ 'availability': 'subscriber_only',
+ 'uploader': 'The Trash Network',
+ 'duration': 622,
+ 'channel': 'The Drum Thing',
+ 'uploader_id': '61bc20c9a131fb692bf2a513',
+ 'channel_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home/thedrumthing',
+ 'uploader_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home',
+ },
+ }, {
+ 'info_dict': {
+ 'id': 'qKfxu6fEpu',
+ 'ext': 'aac',
+ 'release_date': '20231129',
+ 'release_timestamp': 1701249480,
+ 'title': 'Roland TD-7 Demo.m4a',
+ 'channel_id': '64424fe73cd58cbcf8d8e131',
+ 'availability': 'subscriber_only',
+ 'uploader': 'The Trash Network',
+ 'duration': 114,
+ 'channel': 'The Drum Thing',
+ 'uploader_id': '61bc20c9a131fb692bf2a513',
+ 'channel_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home/thedrumthing',
+ 'uploader_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home',
+ },
+ }],
+ 'skip': 'requires subscription: "The Trash Network"',
+ 'params': {'skip_download': 'm3u8'},
+ }]
+
+ def _real_initialize(self):
+ if not self._get_cookies('https://www.floatplane.com').get('sails.sid'):
+ self.raise_login_required()
+
+ def _real_extract(self, url):
+ post_id = self._match_id(url)
+
+ post_data = self._download_json(
+ 'https://www.floatplane.com/api/v3/content/post', post_id, query={'id': post_id},
+ note='Downloading post data', errnote='Unable to download post data')
+
+ if not any(traverse_obj(post_data, ('metadata', ('hasVideo', 'hasAudio')))):
+ raise ExtractorError('Post does not contain a video or audio track', expected=True)
+
+ uploader_url = format_field(
+ post_data, [('creator', 'urlname')], 'https://www.floatplane.com/channel/%s/home') or None
+
+ common_info = {
+ 'uploader_url': uploader_url,
+ 'channel_url': urljoin(f'{uploader_url}/', traverse_obj(post_data, ('channel', 'urlname'))),
+ 'availability': self._availability(needs_subscription=True),
+ **traverse_obj(post_data, {
+ 'uploader': ('creator', 'title', {str}),
+ 'uploader_id': ('creator', 'id', {str}),
+ 'channel': ('channel', 'title', {str}),
+ 'channel_id': ('channel', 'id', {str}),
+ 'release_timestamp': ('releaseDate', {parse_iso8601}),
+ }),
+ }
+
+ items = []
+ for media in traverse_obj(post_data, (('videoAttachments', 'audioAttachments'), ...)):
+ media_id = media['id']
+ media_typ = media.get('type') or 'video'
+
+ metadata = self._download_json(
+ f'https://www.floatplane.com/api/v3/content/{media_typ}', media_id, query={'id': media_id},
+ note=f'Downloading {media_typ} metadata')
+
+ stream = self._download_json(
+ 'https://www.floatplane.com/api/v2/cdn/delivery', media_id, query={
+ 'type': 'vod' if media_typ == 'video' else 'aod',
+ 'guid': metadata['guid']
+ }, note=f'Downloading {media_typ} stream data')
+
+ path_template = traverse_obj(stream, ('resource', 'uri', {str}))
+
+ def format_path(params):
+ path = path_template
+ for i, val in (params or {}).items():
+ path = path.replace(f'{{qualityLevelParams.{i}}}', val)
+ return path
+
+ formats = []
+ for quality in traverse_obj(stream, ('resource', 'data', 'qualityLevels', ...)):
+ url = urljoin(stream['cdn'], format_path(traverse_obj(
+ stream, ('resource', 'data', 'qualityLevelParams', quality['name'], {dict}))))
+ formats.append({
+ **traverse_obj(quality, {
+ 'format_id': ('name', {str}),
+ 'format_note': ('label', {str}),
+ 'width': ('width', {int}),
+ 'height': ('height', {int}),
+ }),
+ **parse_codecs(quality.get('codecs')),
+ 'url': url,
+ 'ext': determine_ext(url.partition('/chunk.m3u8')[0], 'mp4'),
+ })
+
+ items.append({
+ **common_info,
+ 'id': media_id,
+ **traverse_obj(metadata, {
+ 'title': ('title', {str}),
+ 'duration': ('duration', {int_or_none}),
+ 'thumbnail': ('thumbnail', 'path', {url_or_none}),
+ }),
+ 'formats': formats,
+ })
+
+ post_info = {
+ **common_info,
+ 'id': post_id,
+ 'display_id': post_id,
+ **traverse_obj(post_data, {
+ 'title': ('title', {str}),
+ 'description': ('text', {clean_html}),
+ 'like_count': ('likes', {int_or_none}),
+ 'dislike_count': ('dislikes', {int_or_none}),
+ 'comment_count': ('comments', {int_or_none}),
+ 'thumbnail': ('thumbnail', 'path', {url_or_none}),
+ }),
+ }
+
+ if len(items) > 1:
+ return self.playlist_result(items, **post_info)
+
+ post_info.update(items[0])
+ return post_info
+
+
+class FloatplaneChannelIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:www|beta)\.)?floatplane\.com/channel/(?P<id>[\w-]+)/home(?:/(?P<channel>[\w-]+))?'
+ _PAGE_SIZE = 20
+ _TESTS = [{
+ 'url': 'https://www.floatplane.com/channel/linustechtips/home/ltxexpo',
+ 'info_dict': {
+ 'id': 'linustechtips/ltxexpo',
+ 'title': 'LTX Expo',
+ 'description': 'md5:9819002f9ebe7fd7c75a3a1d38a59149',
+ },
+ 'playlist_mincount': 51,
+ }, {
+ 'url': 'https://www.floatplane.com/channel/ShankMods/home',
+ 'info_dict': {
+ 'id': 'ShankMods',
+ 'title': 'Shank Mods',
+ 'description': 'md5:6dff1bb07cad8e5448e04daad9be1b30',
+ },
+ 'playlist_mincount': 14,
+ }, {
+ 'url': 'https://beta.floatplane.com/channel/bitwit_ultra/home',
+ 'info_dict': {
+ 'id': 'bitwit_ultra',
+ 'title': 'Bitwit Ultra',
+ 'description': 'md5:1452f280bb45962976d4789200f676dd',
+ },
+ 'playlist_mincount': 200,
+ }]
+
+ def _fetch_page(self, display_id, creator_id, channel_id, page):
+ query = {
+ 'id': creator_id,
+ 'limit': self._PAGE_SIZE,
+ 'fetchAfter': page * self._PAGE_SIZE,
+ }
+ if channel_id:
+ query['channel'] = channel_id
+ page_data = self._download_json(
+ 'https://www.floatplane.com/api/v3/content/creator', display_id,
+ query=query, note=f'Downloading page {page + 1}')
+ for post in page_data or []:
+ yield self.url_result(
+ f'https://www.floatplane.com/post/{post["id"]}',
+ FloatplaneIE, id=post['id'], title=post.get('title'),
+ release_timestamp=parse_iso8601(post.get('releaseDate')))
+
+ def _real_extract(self, url):
+ creator, channel = self._match_valid_url(url).group('id', 'channel')
+ display_id = join_nonempty(creator, channel, delim='/')
+
+ creator_data = self._download_json(
+ 'https://www.floatplane.com/api/v3/creator/named',
+ display_id, query={'creatorURL[0]': creator})[0]
+
+ channel_data = traverse_obj(
+ creator_data, ('channels', lambda _, v: v['urlname'] == channel), get_all=False) or {}
+
+ return self.playlist_result(OnDemandPagedList(functools.partial(
+ self._fetch_page, display_id, creator_data['id'], channel_data.get('id')), self._PAGE_SIZE),
+ display_id, title=channel_data.get('title') or creator_data.get('title'),
+ description=channel_data.get('about') or creator_data.get('about'))
diff --git a/yt_dlp/extractor/folketinget.py b/yt_dlp/extractor/folketinget.py
new file mode 100644
index 0000000..55a11e5
--- /dev/null
+++ b/yt_dlp/extractor/folketinget.py
@@ -0,0 +1,73 @@
+from .common import InfoExtractor
+from ..compat import compat_parse_qs
+from ..utils import (
+ int_or_none,
+ parse_duration,
+ parse_iso8601,
+ xpath_text,
+)
+
+
+class FolketingetIE(InfoExtractor):
+ IE_DESC = 'Folketinget (ft.dk; Danish parliament)'
+ _VALID_URL = r'https?://(?:www\.)?ft\.dk/webtv/video/[^?#]*?\.(?P<id>[0-9]+)\.aspx'
+ _TEST = {
+ 'url': 'http://www.ft.dk/webtv/video/20141/eru/td.1165642.aspx?as=1#player',
+ 'md5': '6269e8626fa1a891bf5369b386ae996a',
+ 'info_dict': {
+ 'id': '1165642',
+ 'ext': 'mp4',
+ 'title': 'Åbent samråd i Erhvervsudvalget',
+ 'description': 'Åbent samråd med erhvervs- og vækstministeren om regeringens politik på teleområdet',
+ 'view_count': int,
+ 'width': 768,
+ 'height': 432,
+ 'tbr': 928000,
+ 'timestamp': 1416493800,
+ 'upload_date': '20141120',
+ 'duration': 3960,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._og_search_title(webpage)
+ description = self._html_search_regex(
+ r'(?s)<div class="video-item-agenda"[^>]*>(.*?)<',
+ webpage, 'description', fatal=False)
+
+ player_params = compat_parse_qs(self._search_regex(
+ r'<embed src="http://ft\.arkena\.tv/flash/ftplayer\.swf\?([^"]+)"',
+ webpage, 'player params'))
+ xml_url = player_params['xml'][0]
+ doc = self._download_xml(xml_url, video_id)
+
+ timestamp = parse_iso8601(xpath_text(doc, './/date'))
+ duration = parse_duration(xpath_text(doc, './/duration'))
+ width = int_or_none(xpath_text(doc, './/width'))
+ height = int_or_none(xpath_text(doc, './/height'))
+ view_count = int_or_none(xpath_text(doc, './/views'))
+
+ formats = [{
+ 'format_id': n.attrib['bitrate'],
+ 'url': xpath_text(n, './url', fatal=True),
+ 'tbr': int_or_none(n.attrib['bitrate']),
+ } for n in doc.findall('.//streams/stream')]
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'description': description,
+ 'timestamp': timestamp,
+ 'width': width,
+ 'height': height,
+ 'duration': duration,
+ 'view_count': view_count,
+ }
diff --git a/yt_dlp/extractor/footyroom.py b/yt_dlp/extractor/footyroom.py
new file mode 100644
index 0000000..4a1316b
--- /dev/null
+++ b/yt_dlp/extractor/footyroom.py
@@ -0,0 +1,53 @@
+from .common import InfoExtractor
+from .streamable import StreamableIE
+
+
+class FootyRoomIE(InfoExtractor):
+ _VALID_URL = r'https?://footyroom\.com/matches/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://footyroom.com/matches/79922154/hull-city-vs-chelsea/review',
+ 'info_dict': {
+ 'id': '79922154',
+ 'title': 'VIDEO Hull City 0 - 2 Chelsea',
+ },
+ 'playlist_count': 2,
+ 'add_ie': [StreamableIE.ie_key()],
+ }, {
+ 'url': 'http://footyroom.com/matches/75817984/georgia-vs-germany/review',
+ 'info_dict': {
+ 'id': '75817984',
+ 'title': 'VIDEO Georgia 0 - 2 Germany',
+ },
+ 'playlist_count': 1,
+ 'add_ie': ['Playwire']
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ playlist = self._parse_json(self._search_regex(
+ r'DataStore\.media\s*=\s*([^;]+)', webpage, 'media data'),
+ playlist_id)
+
+ playlist_title = self._og_search_title(webpage)
+
+ entries = []
+ for video in playlist:
+ payload = video.get('payload')
+ if not payload:
+ continue
+ playwire_url = self._html_search_regex(
+ r'data-config="([^"]+)"', payload,
+ 'playwire url', default=None)
+ if playwire_url:
+ entries.append(self.url_result(self._proto_relative_url(
+ playwire_url, 'http:'), 'Playwire'))
+
+ streamable_url = StreamableIE._extract_url(payload)
+ if streamable_url:
+ entries.append(self.url_result(
+ streamable_url, StreamableIE.ie_key()))
+
+ return self.playlist_result(entries, playlist_id, playlist_title)
diff --git a/yt_dlp/extractor/formula1.py b/yt_dlp/extractor/formula1.py
new file mode 100644
index 0000000..0a8ef85
--- /dev/null
+++ b/yt_dlp/extractor/formula1.py
@@ -0,0 +1,24 @@
+from .common import InfoExtractor
+
+
+class Formula1IE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?formula1\.com/en/latest/video\.[^.]+\.(?P<id>\d+)\.html'
+ _TEST = {
+ 'url': 'https://www.formula1.com/en/latest/video.race-highlights-spain-2016.6060988138001.html',
+ 'md5': 'be7d3a8c2f804eb2ab2aa5d941c359f8',
+ 'info_dict': {
+ 'id': '6060988138001',
+ 'ext': 'mp4',
+ 'title': 'Race highlights - Spain 2016',
+ 'timestamp': 1463332814,
+ 'upload_date': '20160515',
+ 'uploader_id': '6057949432001',
+ },
+ 'add_ie': ['BrightcoveNew'],
+ }
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/6057949432001/S1WMrhjlh_default/index.html?videoId=%s'
+
+ def _real_extract(self, url):
+ bc_id = self._match_id(url)
+ return self.url_result(
+ self.BRIGHTCOVE_URL_TEMPLATE % bc_id, 'BrightcoveNew', bc_id)
diff --git a/yt_dlp/extractor/fourtube.py b/yt_dlp/extractor/fourtube.py
new file mode 100644
index 0000000..b6368b8
--- /dev/null
+++ b/yt_dlp/extractor/fourtube.py
@@ -0,0 +1,306 @@
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_b64decode,
+ compat_str,
+ compat_urllib_parse_unquote,
+ compat_urlparse,
+)
+from ..utils import (
+ int_or_none,
+ parse_duration,
+ parse_iso8601,
+ str_or_none,
+ str_to_int,
+ try_get,
+ unified_timestamp,
+ url_or_none,
+)
+
+
+class FourTubeBaseIE(InfoExtractor):
+ def _extract_formats(self, url, video_id, media_id, sources):
+ token_url = 'https://%s/%s/desktop/%s' % (
+ self._TKN_HOST, media_id, '+'.join(sources))
+
+ parsed_url = compat_urlparse.urlparse(url)
+ tokens = self._download_json(token_url, video_id, data=b'', headers={
+ 'Origin': '%s://%s' % (parsed_url.scheme, parsed_url.hostname),
+ 'Referer': url,
+ })
+ formats = [{
+ 'url': tokens[format]['token'],
+ 'format_id': format + 'p',
+ 'resolution': format + 'p',
+ 'quality': int(format),
+ } for format in sources]
+ return formats
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ kind, video_id, display_id = mobj.group('kind', 'id', 'display_id')
+
+ if kind == 'm' or not display_id:
+ url = self._URL_TEMPLATE % video_id
+
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_meta('name', webpage)
+ timestamp = parse_iso8601(self._html_search_meta(
+ 'uploadDate', webpage))
+ thumbnail = self._html_search_meta('thumbnailUrl', webpage)
+ uploader_id = self._html_search_regex(
+ r'<a class="item-to-subscribe" href="[^"]+/(?:channel|user)s?/([^/"]+)" title="Go to [^"]+ page">',
+ webpage, 'uploader id', fatal=False)
+ uploader = self._html_search_regex(
+ r'<a class="item-to-subscribe" href="[^"]+/(?:channel|user)s?/[^/"]+" title="Go to ([^"]+) page">',
+ webpage, 'uploader', fatal=False)
+
+ categories_html = self._search_regex(
+ r'(?s)><i class="icon icon-tag"></i>\s*Categories / Tags\s*.*?<ul class="[^"]*?list[^"]*?">(.*?)</ul>',
+ webpage, 'categories', fatal=False)
+ categories = None
+ if categories_html:
+ categories = [
+ c.strip() for c in re.findall(
+ r'(?s)<li><a.*?>(.*?)</a>', categories_html)]
+
+ view_count = str_to_int(self._search_regex(
+ r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserPlays:([0-9,]+)">',
+ webpage, 'view count', default=None))
+ like_count = str_to_int(self._search_regex(
+ r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserLikes:([0-9,]+)">',
+ webpage, 'like count', default=None))
+ duration = parse_duration(self._html_search_meta('duration', webpage))
+
+ media_id = self._search_regex(
+ r'<button[^>]+data-id=(["\'])(?P<id>\d+)\1[^>]+data-quality=', webpage,
+ 'media id', default=None, group='id')
+ sources = [
+ quality
+ for _, quality in re.findall(r'<button[^>]+data-quality=(["\'])(.+?)\1', webpage)]
+ if not (media_id and sources):
+ player_js = self._download_webpage(
+ self._search_regex(
+ r'<script[^>]id=(["\'])playerembed\1[^>]+src=(["\'])(?P<url>.+?)\2',
+ webpage, 'player JS', group='url'),
+ video_id, 'Downloading player JS')
+ params_js = self._search_regex(
+ r'\$\.ajax\(url,\ opts\);\s*\}\s*\}\)\(([0-9,\[\] ]+)\)',
+ player_js, 'initialization parameters')
+ params = self._parse_json('[%s]' % params_js, video_id)
+ media_id = params[0]
+ sources = ['%s' % p for p in params[2]]
+
+ formats = self._extract_formats(url, video_id, media_id, sources)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'categories': categories,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'timestamp': timestamp,
+ 'like_count': like_count,
+ 'view_count': view_count,
+ 'duration': duration,
+ 'age_limit': 18,
+ }
+
+
+class FourTubeIE(FourTubeBaseIE):
+ IE_NAME = '4tube'
+ _VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?4tube\.com/(?:videos|embed)/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?'
+ _URL_TEMPLATE = 'https://www.4tube.com/videos/%s/video'
+ _TKN_HOST = 'token.4tube.com'
+ _TESTS = [{
+ 'url': 'http://www.4tube.com/videos/209733/hot-babe-holly-michaels-gets-her-ass-stuffed-by-black',
+ 'md5': '6516c8ac63b03de06bc8eac14362db4f',
+ 'info_dict': {
+ 'id': '209733',
+ 'ext': 'mp4',
+ 'title': 'Hot Babe Holly Michaels gets her ass stuffed by black',
+ 'uploader': 'WCP Club',
+ 'uploader_id': 'wcp-club',
+ 'upload_date': '20131031',
+ 'timestamp': 1383263892,
+ 'duration': 583,
+ 'view_count': int,
+ 'like_count': int,
+ 'categories': list,
+ 'age_limit': 18,
+ },
+ }, {
+ 'url': 'http://www.4tube.com/embed/209733',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://m.4tube.com/videos/209733/hot-babe-holly-michaels-gets-her-ass-stuffed-by-black',
+ 'only_matching': True,
+ }]
+
+
+class FuxIE(FourTubeBaseIE):
+ _VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?fux\.com/(?:video|embed)/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?'
+ _URL_TEMPLATE = 'https://www.fux.com/video/%s/video'
+ _TKN_HOST = 'token.fux.com'
+ _TESTS = [{
+ 'url': 'https://www.fux.com/video/195359/awesome-fucking-kitchen-ends-cum-swallow',
+ 'info_dict': {
+ 'id': '195359',
+ 'ext': 'mp4',
+ 'title': 'Awesome fucking in the kitchen ends with cum swallow',
+ 'uploader': 'alenci2342',
+ 'uploader_id': 'alenci2342',
+ 'upload_date': '20131230',
+ 'timestamp': 1388361660,
+ 'duration': 289,
+ 'view_count': int,
+ 'like_count': int,
+ 'categories': list,
+ 'age_limit': 18,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.fux.com/embed/195359',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.fux.com/video/195359/awesome-fucking-kitchen-ends-cum-swallow',
+ 'only_matching': True,
+ }]
+
+
+class PornTubeIE(FourTubeBaseIE):
+ _VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?porntube\.com/(?:videos/(?P<display_id>[^/]+)_|embed/)(?P<id>\d+)'
+ _URL_TEMPLATE = 'https://www.porntube.com/videos/video_%s'
+ _TKN_HOST = 'tkn.porntube.com'
+ _TESTS = [{
+ 'url': 'https://www.porntube.com/videos/teen-couple-doing-anal_7089759',
+ 'info_dict': {
+ 'id': '7089759',
+ 'ext': 'mp4',
+ 'title': 'Teen couple doing anal',
+ 'uploader': 'Alexy',
+ 'uploader_id': '91488',
+ 'upload_date': '20150606',
+ 'timestamp': 1433595647,
+ 'duration': 5052,
+ 'view_count': int,
+ 'like_count': int,
+ 'age_limit': 18,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.porntube.com/videos/squirting-teen-ballerina-ecg_1331406',
+ 'info_dict': {
+ 'id': '1331406',
+ 'ext': 'mp4',
+ 'title': 'Squirting Teen Ballerina on ECG',
+ 'uploader': 'Exploited College Girls',
+ 'uploader_id': '665',
+ 'channel': 'Exploited College Girls',
+ 'channel_id': '665',
+ 'upload_date': '20130920',
+ 'timestamp': 1379685485,
+ 'duration': 851,
+ 'view_count': int,
+ 'like_count': int,
+ 'age_limit': 18,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.porntube.com/embed/7089759',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://m.porntube.com/videos/teen-couple-doing-anal_7089759',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id, display_id = mobj.group('id', 'display_id')
+
+ webpage = self._download_webpage(url, display_id)
+
+ video = self._parse_json(
+ self._search_regex(
+ r'INITIALSTATE\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
+ webpage, 'data', group='value'), video_id,
+ transform_source=lambda x: compat_urllib_parse_unquote(
+ compat_b64decode(x).decode('utf-8')))['page']['video']
+
+ title = video['title']
+ media_id = video['mediaId']
+ sources = [compat_str(e['height'])
+ for e in video['encodings'] if e.get('height')]
+ formats = self._extract_formats(url, video_id, media_id, sources)
+
+ thumbnail = url_or_none(video.get('masterThumb'))
+ uploader = try_get(video, lambda x: x['user']['username'], compat_str)
+ uploader_id = str_or_none(try_get(
+ video, lambda x: x['user']['id'], int))
+ channel = try_get(video, lambda x: x['channel']['name'], compat_str)
+ channel_id = str_or_none(try_get(
+ video, lambda x: x['channel']['id'], int))
+ like_count = int_or_none(video.get('likes'))
+ dislike_count = int_or_none(video.get('dislikes'))
+ view_count = int_or_none(video.get('playsQty'))
+ duration = int_or_none(video.get('durationInSeconds'))
+ timestamp = unified_timestamp(video.get('publishedAt'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader or channel,
+ 'uploader_id': uploader_id or channel_id,
+ 'channel': channel,
+ 'channel_id': channel_id,
+ 'timestamp': timestamp,
+ 'like_count': like_count,
+ 'dislike_count': dislike_count,
+ 'view_count': view_count,
+ 'duration': duration,
+ 'age_limit': 18,
+ }
+
+
+class PornerBrosIE(FourTubeBaseIE):
+ _VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?pornerbros\.com/(?:videos/(?P<display_id>[^/]+)_|embed/)(?P<id>\d+)'
+ _URL_TEMPLATE = 'https://www.pornerbros.com/videos/video_%s'
+ _TKN_HOST = 'token.pornerbros.com'
+ _TESTS = [{
+ 'url': 'https://www.pornerbros.com/videos/skinny-brunette-takes-big-cock-down-her-anal-hole_181369',
+ 'md5': '6516c8ac63b03de06bc8eac14362db4f',
+ 'info_dict': {
+ 'id': '181369',
+ 'ext': 'mp4',
+ 'title': 'Skinny brunette takes big cock down her anal hole',
+ 'uploader': 'PornerBros HD',
+ 'uploader_id': 'pornerbros-hd',
+ 'upload_date': '20130130',
+ 'timestamp': 1359527401,
+ 'duration': 1224,
+ 'view_count': int,
+ 'categories': list,
+ 'age_limit': 18,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.pornerbros.com/embed/181369',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://m.pornerbros.com/videos/skinny-brunette-takes-big-cock-down-her-anal-hole_181369',
+ 'only_matching': True,
+ }]
diff --git a/yt_dlp/extractor/fox.py b/yt_dlp/extractor/fox.py
new file mode 100644
index 0000000..e00e977
--- /dev/null
+++ b/yt_dlp/extractor/fox.py
@@ -0,0 +1,177 @@
+import json
+import uuid
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urllib_parse_unquote,
+)
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ parse_age_limit,
+ parse_duration,
+ traverse_obj,
+ try_get,
+ unified_timestamp,
+ url_or_none,
+)
+
+
+class FOXIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?fox(?:sports)?\.com/(?:watch|replay)/(?P<id>[\da-fA-F]+)'
+ _TESTS = [{
+ # clip
+ 'url': 'https://www.fox.com/watch/4b765a60490325103ea69888fb2bd4e8/',
+ 'md5': 'ebd296fcc41dd4b19f8115d8461a3165',
+ 'info_dict': {
+ 'id': '4b765a60490325103ea69888fb2bd4e8',
+ 'ext': 'mp4',
+ 'title': 'Aftermath: Bruce Wayne Develops Into The Dark Knight',
+ 'description': 'md5:549cd9c70d413adb32ce2a779b53b486',
+ 'duration': 102,
+ 'timestamp': 1504291893,
+ 'upload_date': '20170901',
+ 'creator': 'FOX',
+ 'series': 'Gotham',
+ 'age_limit': 14,
+ 'episode': 'Aftermath: Bruce Wayne Develops Into The Dark Knight',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # episode, geo-restricted
+ 'url': 'https://www.fox.com/watch/087036ca7f33c8eb79b08152b4dd75c1/',
+ 'only_matching': True,
+ }, {
+ # sports event, geo-restricted
+ 'url': 'https://www.fox.com/watch/b057484dade738d1f373b3e46216fa2c/',
+ 'only_matching': True,
+ }, {
+ # fox sports replay, geo-restricted
+ 'url': 'https://www.foxsports.com/replay/561f3e071347a24e5e877abc56b22e89',
+ 'only_matching': True,
+ }]
+ _GEO_BYPASS = False
+ _HOME_PAGE_URL = 'https://www.fox.com/'
+ _API_KEY = '6E9S4bmcoNnZwVLOHywOv8PJEdu76cM9'
+ _access_token = None
+ _device_id = compat_str(uuid.uuid4())
+
+ def _call_api(self, path, video_id, data=None):
+ headers = {
+ 'X-Api-Key': self._API_KEY,
+ }
+ if self._access_token:
+ headers['Authorization'] = 'Bearer ' + self._access_token
+ try:
+ return self._download_json(
+ 'https://api3.fox.com/v2.0/' + path,
+ video_id, data=data, headers=headers)
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 403:
+ entitlement_issues = self._parse_json(
+ e.cause.response.read().decode(), video_id)['entitlementIssues']
+ for e in entitlement_issues:
+ if e.get('errorCode') == 1005:
+ raise ExtractorError(
+ 'This video is only available via cable service provider '
+ 'subscription. You may want to use --cookies.', expected=True)
+ messages = ', '.join([e['message'] for e in entitlement_issues])
+ raise ExtractorError(messages, expected=True)
+ raise
+
+ def _real_initialize(self):
+ if not self._access_token:
+ mvpd_auth = self._get_cookies(self._HOME_PAGE_URL).get('mvpd-auth')
+ if mvpd_auth:
+ self._access_token = (self._parse_json(compat_urllib_parse_unquote(
+ mvpd_auth.value), None, fatal=False) or {}).get('accessToken')
+ if not self._access_token:
+ self._access_token = self._call_api(
+ 'login', None, json.dumps({
+ 'deviceId': self._device_id,
+ }).encode())['accessToken']
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ self._access_token = self._call_api(
+ 'previewpassmvpd?device_id=%s&mvpd_id=TempPass_fbcfox_60min' % self._device_id,
+ video_id)['accessToken']
+
+ video = self._call_api('watch', video_id, data=json.dumps({
+ 'capabilities': ['drm/widevine', 'fsdk/yo'],
+ 'deviceWidth': 1280,
+ 'deviceHeight': 720,
+ 'maxRes': '720p',
+ 'os': 'macos',
+ 'osv': '',
+ 'provider': {
+ 'freewheel': {'did': self._device_id},
+ 'vdms': {'rays': ''},
+ 'dmp': {'kuid': '', 'seg': ''}
+ },
+ 'playlist': '',
+ 'privacy': {'us': '1---'},
+ 'siteSection': '',
+ 'streamType': 'vod',
+ 'streamId': video_id}).encode('utf-8'))
+
+ title = video['name']
+ release_url = video['url']
+
+ try:
+ m3u8_url = self._download_json(release_url, video_id)['playURL']
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 403:
+ error = self._parse_json(e.cause.response.read().decode(), video_id)
+ if error.get('exception') == 'GeoLocationBlocked':
+ self.raise_geo_restricted(countries=['US'])
+ raise ExtractorError(error['description'], expected=True)
+ raise
+ formats = self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls')
+
+ data = try_get(
+ video, lambda x: x['trackingData']['properties'], dict) or {}
+
+ duration = int_or_none(video.get('durationInSeconds')) or int_or_none(
+ video.get('duration')) or parse_duration(video.get('duration'))
+ timestamp = unified_timestamp(video.get('datePublished'))
+ creator = data.get('brand') or data.get('network') or video.get('network')
+ series = video.get('seriesName') or data.get(
+ 'seriesName') or data.get('show')
+
+ subtitles = {}
+ for doc_rel in video.get('documentReleases', []):
+ rel_url = doc_rel.get('url')
+ if not url or doc_rel.get('format') != 'SCC':
+ continue
+ subtitles['en'] = [{
+ 'url': rel_url,
+ 'ext': 'scc',
+ }]
+ break
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'description': video.get('description'),
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'age_limit': parse_age_limit(video.get('contentRating')),
+ 'creator': creator,
+ 'series': series,
+ 'season_number': int_or_none(video.get('seasonNumber')),
+ 'episode': video.get('name'),
+ 'episode_number': int_or_none(video.get('episodeNumber')),
+ 'thumbnail': traverse_obj(video, ('images', 'still', 'raw'), expected_type=url_or_none),
+ 'release_year': int_or_none(video.get('releaseYear')),
+ 'subtitles': subtitles,
+ }
diff --git a/yt_dlp/extractor/fox9.py b/yt_dlp/extractor/fox9.py
new file mode 100644
index 0000000..dfbafa7
--- /dev/null
+++ b/yt_dlp/extractor/fox9.py
@@ -0,0 +1,38 @@
+from .common import InfoExtractor
+
+
+class FOX9IE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?fox9\.com/video/(?P<id>\d+)'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self.url_result(
+ 'anvato:anvato_epfox_app_web_prod_b3373168e12f423f41504f207000188daf88251b:' + video_id,
+ 'Anvato', video_id)
+
+
+class FOX9NewsIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?fox9\.com/news/(?P<id>[^/?&#]+)'
+ _TEST = {
+ 'url': 'https://www.fox9.com/news/black-bear-in-tree-draws-crowd-in-downtown-duluth-minnesota',
+ 'md5': 'd6e1b2572c3bab8a849c9103615dd243',
+ 'info_dict': {
+ 'id': '314473',
+ 'ext': 'mp4',
+ 'title': 'Bear climbs tree in downtown Duluth',
+ 'description': 'md5:6a36bfb5073a411758a752455408ac90',
+ 'duration': 51,
+ 'timestamp': 1478123580,
+ 'upload_date': '20161102',
+ 'uploader': 'EPFOX',
+ 'categories': ['News', 'Sports'],
+ 'tags': ['news', 'video'],
+ },
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ anvato_id = self._search_regex(
+ r'anvatoId\s*:\s*[\'"](\d+)', webpage, 'anvato id')
+ return self.url_result('https://www.fox9.com/video/' + anvato_id, 'FOX9')
diff --git a/yt_dlp/extractor/foxnews.py b/yt_dlp/extractor/foxnews.py
new file mode 100644
index 0000000..6aa6361
--- /dev/null
+++ b/yt_dlp/extractor/foxnews.py
@@ -0,0 +1,185 @@
+import re
+
+from .amp import AMPIE
+from .common import InfoExtractor
+
+
+class FoxNewsIE(AMPIE):
+ IE_NAME = 'foxnews'
+ IE_DESC = 'Fox News and Fox Business Video'
+ _VALID_URL = r'https?://video\.(?:insider\.)?fox(?:news|business)\.com/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'
+ _TESTS = [
+ {
+ 'url': 'https://video.foxnews.com/v/6320653836112',
+ 'info_dict': {
+ 'id': '6320653836112',
+ 'ext': 'mp4',
+ 'title': 'Tucker Carlson joins \'Gutfeld!\' to discuss his new documentary',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 404,
+ 'upload_date': '20230217',
+ 'description': 'md5:858a8a36f59e9ca897d758855bcdfa02',
+ 'timestamp': 1676611344.0,
+ },
+ 'params': {'skip_download': 'm3u8'},
+ },
+ {
+ # From http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words
+ 'url': 'http://video.insider.foxnews.com/v/video-embed.html?video_id=5099377331001&autoplay=true&share_url=http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words&share_title=Student%20Group:%20Saying%20%27Politically%20Correct,%27%20%27Trash%27%20and%20%27Lame%27%20Is%20Offensive&share=true',
+ 'info_dict': {
+ 'id': '5099377331001',
+ 'ext': 'mp4',
+ 'title': '82416_censoring',
+ 'description': '82416_censoring',
+ 'upload_date': '20160826',
+ 'timestamp': 1472169708.0,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 521,
+ },
+ 'params': {'skip_download': 'm3u8'},
+ },
+ {
+ 'url': 'http://video.foxnews.com/v/3937480/frozen-in-time/#sp=show-clips',
+ 'md5': '32aaded6ba3ef0d1c04e238d01031e5e',
+ 'info_dict': {
+ 'id': '3937480',
+ 'ext': 'flv',
+ 'title': 'Frozen in Time',
+ 'description': '16-year-old girl is size of toddler',
+ 'duration': 265,
+ 'timestamp': 1304411491,
+ 'upload_date': '20110503',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'skip': '404 page',
+ },
+ {
+ 'url': 'http://video.foxnews.com/v/3922535568001/rep-luis-gutierrez-on-if-obamas-immigration-plan-is-legal/#sp=show-clips',
+ 'md5': '5846c64a1ea05ec78175421b8323e2df',
+ 'info_dict': {
+ 'id': '3922535568001',
+ 'ext': 'mp4',
+ 'title': "Rep. Luis Gutierrez on if Obama's immigration plan is legal",
+ 'description': "Congressman discusses president's plan",
+ 'duration': 292,
+ 'timestamp': 1417662047,
+ 'upload_date': '20141204',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'skip': 'm3u8 HTTP error 400 in web browser',
+ },
+ {
+ 'url': 'http://video.foxnews.com/v/video-embed.html?video_id=3937480&d=video.foxnews.com',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://video.foxbusiness.com/v/4442309889001',
+ 'only_matching': True,
+ },
+ ]
+
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ for mobj in re.finditer(
+ r'''(?x)
+ <(?:script|(?:amp-)?iframe)[^>]+\bsrc=["\']
+ (?:https?:)?//video\.foxnews\.com/v/(?:video-embed\.html|embed\.js)\?
+ (?:[^>"\']+&)?(?:video_)?id=(?P<video_id>\d+)
+ ''', webpage):
+ yield f'https://video.foxnews.com/v/video-embed.html?video_id={mobj.group("video_id")}'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ info = self._extract_feed_info(
+ f'https://api.foxnews.com/v3/video-player/{video_id}?callback=uid_{video_id}')
+ info['id'] = video_id
+ return info
+
+
+class FoxNewsVideoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?foxnews\.com/video/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.foxnews.com/video/6328632286112',
+ 'info_dict': {
+ 'id': '6328632286112',
+ 'ext': 'mp4',
+ 'title': 'Review: 2023 Toyota Prius Prime',
+ 'duration': 155,
+ 'thumbnail': r're:^https://.+\.jpg$',
+ 'timestamp': 1685720177.0,
+ 'upload_date': '20230602',
+ 'description': 'md5:b69aafb125b41c1402e9744f53d6edc4',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://www.foxnews.com/video/6313058664112',
+ 'info_dict': {
+ 'id': '6313058664112',
+ 'ext': 'mp4',
+ 'thumbnail': r're:https://.+/1280x720/match/image\.jpg',
+ 'upload_date': '20220930',
+ 'description': 'New York City, Kids Therapy, Biden',
+ 'duration': 2415,
+ 'title': 'Gutfeld! - Thursday, September 29',
+ 'timestamp': 1664527538,
+ },
+ 'skip': '404 page',
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self.url_result(f'https://video.foxnews.com/v/{video_id}', FoxNewsIE, video_id)
+
+
+class FoxNewsArticleIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?(?:insider\.)?foxnews\.com/(?!v)([^/]+/)+(?P<id>[a-z-]+)'
+ IE_NAME = 'foxnews:article'
+
+ _TESTS = [{
+ # data-video-id
+ 'url': 'https://www.foxnews.com/politics/2016/09/08/buzz-about-bud-clinton-camp-denies-claims-wore-earpiece-at-forum.html',
+ 'md5': 'd2dd6ce809cedeefa96460e964821437',
+ 'info_dict': {
+ 'id': '5116295019001',
+ 'ext': 'mp4',
+ 'title': 'Trump and Clinton asked to defend positions on Iraq War',
+ 'description': 'Veterans and Fox News host Dana Perino react on \'The Kelly File\' to NBC\'s presidential forum',
+ 'timestamp': 1473301045,
+ 'upload_date': '20160908',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 426,
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ # iframe embed
+ 'url': 'https://www.foxnews.com/us/2018/03/09/parkland-survivor-kyle-kashuv-on-meeting-trump-his-app-to-prevent-another-school-shooting.amp.html?__twitter_impression=true',
+ 'info_dict': {
+ 'id': '5748266721001',
+ 'ext': 'flv',
+ 'title': 'Kyle Kashuv has a positive message for the Trump White House',
+ 'description': 'Marjory Stoneman Douglas student disagrees with classmates.',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 229,
+ 'timestamp': 1520594670,
+ 'upload_date': '20180309',
+ },
+ 'skip': '404 page',
+ }, {
+ 'url': 'http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ video_id = self._html_search_regex(
+ r'data-video-id=([\'"])(?P<id>[^\'"]+)\1',
+ webpage, 'video ID', group='id', default=None)
+ if video_id:
+ return self.url_result(
+ 'http://video.foxnews.com/v/' + video_id, FoxNewsIE.ie_key())
+
+ return self.url_result(
+ next(FoxNewsIE._extract_embed_urls(url, webpage)), FoxNewsIE.ie_key())
diff --git a/yt_dlp/extractor/foxsports.py b/yt_dlp/extractor/foxsports.py
new file mode 100644
index 0000000..8e89ccf
--- /dev/null
+++ b/yt_dlp/extractor/foxsports.py
@@ -0,0 +1,52 @@
+from .common import InfoExtractor
+from .uplynk import UplynkPreplayIE
+from ..networking import HEADRequest
+from ..utils import float_or_none, make_archive_id, smuggle_url
+
+
+class FoxSportsIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?foxsports\.com/watch/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://www.foxsports.com/watch/play-612168c6700004b',
+ 'info_dict': {
+ 'id': 'b72f5bd8658140baa5791bb676433733',
+ 'ext': 'mp4',
+ 'display_id': 'play-612168c6700004b',
+ 'title': 'md5:e0c4ecac3a1f25295b4fae22fb5c126a',
+ 'description': 'md5:371bc43609708ae2b9e1a939229762af',
+ 'uploader_id': '06b4a36349624051a9ba52ac3a91d268',
+ 'upload_date': '20221205',
+ 'timestamp': 1670262586,
+ 'duration': 31.7317,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'extra_param_to_segment_url': str,
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ json_ld = self._search_json_ld(webpage, video_id, expected_type='VideoObject', default={})
+ data = self._download_json(
+ f'https://api3.fox.com/v2.0/vodplayer/sportsclip/{video_id}',
+ video_id, note='Downloading API JSON', headers={
+ 'x-api-key': 'cf289e299efdfa39fb6316f259d1de93',
+ })
+ preplay_url = self._request_webpage(
+ HEADRequest(data['url']), video_id, 'Fetching preplay URL').url
+
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': UplynkPreplayIE.ie_key(),
+ 'url': smuggle_url(preplay_url, {'Origin': 'https://www.foxsports.com'}),
+ 'display_id': video_id,
+ 'title': data.get('name') or json_ld.get('title'),
+ 'description': data.get('description') or json_ld.get('description'),
+ 'duration': float_or_none(data.get('durationInSeconds')),
+ 'timestamp': json_ld.get('timestamp'),
+ 'thumbnails': json_ld.get('thumbnails'),
+ '_old_archive_ids': [make_archive_id(self, video_id)],
+ }
diff --git a/yt_dlp/extractor/fptplay.py b/yt_dlp/extractor/fptplay.py
new file mode 100644
index 0000000..85613ba
--- /dev/null
+++ b/yt_dlp/extractor/fptplay.py
@@ -0,0 +1,117 @@
+import hashlib
+import time
+import urllib.parse
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ join_nonempty,
+ strip_or_none,
+)
+
+
+class FptplayIE(InfoExtractor):
+ _VALID_URL = r'https?://fptplay\.vn/xem-video/[^/]+\-(?P<id>\w+)(?:/tap-(?P<episode>\d+)?/?(?:[?#]|$)|)'
+ _GEO_COUNTRIES = ['VN']
+ IE_NAME = 'fptplay'
+ IE_DESC = 'fptplay.vn'
+ _TESTS = [{
+ 'url': 'https://fptplay.vn/xem-video/nhan-duyen-dai-nhan-xin-dung-buoc-621a123016f369ebbde55945',
+ 'md5': 'ca0ee9bc63446c0c3e9a90186f7d6b33',
+ 'info_dict': {
+ 'id': '621a123016f369ebbde55945',
+ 'ext': 'mp4',
+ 'title': 'Nhân Duyên Đại Nhân Xin Dừng Bước - Tập 1A',
+ 'description': 'md5:23cf7d1ce0ade8e21e76ae482e6a8c6c',
+ },
+ }, {
+ 'url': 'https://fptplay.vn/xem-video/ma-toi-la-dai-gia-61f3aa8a6b3b1d2e73c60eb5/tap-3',
+ 'md5': 'b35be968c909b3e4e1e20ca45dd261b1',
+ 'info_dict': {
+ 'id': '61f3aa8a6b3b1d2e73c60eb5',
+ 'ext': 'mp4',
+ 'title': 'Má Tôi Là Đại Gia - Tập 3',
+ 'description': 'md5:ff8ba62fb6e98ef8875c42edff641d1c',
+ },
+ }, {
+ 'url': 'https://fptplay.vn/xem-video/lap-toi-do-giam-under-the-skin-6222d9684ec7230fa6e627a2/tap-4',
+ 'md5': 'bcb06c55ec14786d7d4eda07fa1ccbb9',
+ 'info_dict': {
+ 'id': '6222d9684ec7230fa6e627a2',
+ 'ext': 'mp4',
+ 'title': 'Lạp Tội Đồ Giám - Tập 2B',
+ 'description': 'md5:e5a47e9d35fbf7e9479ca8a77204908b',
+ },
+ }, {
+ 'url': 'https://fptplay.vn/xem-video/nha-co-chuyen-hi-alls-well-ends-well-1997-6218995f6af792ee370459f0',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id, slug_episode = self._match_valid_url(url).group('id', 'episode')
+ webpage = self._download_webpage(url, video_id=video_id, fatal=False) or ''
+ title = self._search_regex(
+ r'(?s)<h4\s+class="mb-1 text-2xl text-white"[^>]*>(.+)</h4>', webpage, 'title', fatal=False)
+ real_episode = slug_episode if not title else self._search_regex(
+ r'<p.+title="(?P<episode>[^">]+)"\s+class="epi-title active"', webpage, 'episode', fatal=False)
+ title = strip_or_none(title) or self._html_search_meta(('og:title', 'twitter:title'), webpage)
+
+ info = self._download_json(
+ self.get_api_with_st_token(video_id, int(slug_episode) - 1 if slug_episode else 0), video_id)
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(info['data']['url'], video_id, 'mp4')
+ return {
+ 'id': video_id,
+ 'title': join_nonempty(title, real_episode, delim=' - '),
+ 'description': (
+ clean_html(self._search_regex(r'<p\s+class="overflow-hidden"[^>]*>(.+)</p>', webpage, 'description'))
+ or self._html_search_meta(('og:description', 'twitter:description'), webpage)),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+ def get_api_with_st_token(self, video_id, episode):
+ path = f'/api/v6.2_w/stream/vod/{video_id}/{episode}/auto_vip'
+ timestamp = int(time.time()) + 10800
+
+ t = hashlib.md5(f'WEBv6Dkdsad90dasdjlALDDDS{timestamp}{path}'.encode()).hexdigest().upper()
+ r = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'
+ n = [int(f'0x{t[2 * o: 2 * o + 2]}', 16) for o in range(len(t) // 2)]
+
+ def convert(e):
+ t = ''
+ n = 0
+ i = [0, 0, 0]
+ a = [0, 0, 0, 0]
+ s = len(e)
+ c = 0
+ for z in range(s, 0, -1):
+ if n <= 3:
+ i[n] = e[c]
+ n += 1
+ c += 1
+ if 3 == n:
+ a[0] = (252 & i[0]) >> 2
+ a[1] = ((3 & i[0]) << 4) + ((240 & i[1]) >> 4)
+ a[2] = ((15 & i[1]) << 2) + ((192 & i[2]) >> 6)
+ a[3] = (63 & i[2])
+ for v in range(4):
+ t += r[a[v]]
+ n = 0
+ if n:
+ for o in range(n, 3):
+ i[o] = 0
+
+ for o in range(n + 1):
+ a[0] = (252 & i[0]) >> 2
+ a[1] = ((3 & i[0]) << 4) + ((240 & i[1]) >> 4)
+ a[2] = ((15 & i[1]) << 2) + ((192 & i[2]) >> 6)
+ a[3] = (63 & i[2])
+ t += r[a[o]]
+ n += 1
+ while n < 3:
+ t += ''
+ n += 1
+ return t
+
+ st_token = convert(n).replace('+', '-').replace('/', '_').replace('=', '')
+ return f'https://api.fptplay.net{path}?{urllib.parse.urlencode({"st": st_token, "e": timestamp})}'
diff --git a/yt_dlp/extractor/franceinter.py b/yt_dlp/extractor/franceinter.py
new file mode 100644
index 0000000..779249b
--- /dev/null
+++ b/yt_dlp/extractor/franceinter.py
@@ -0,0 +1,56 @@
+from .common import InfoExtractor
+from ..utils import month_by_name
+
+
+class FranceInterIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?franceinter\.fr/emissions/(?P<id>[^?#]+)'
+
+ _TEST = {
+ 'url': 'https://www.franceinter.fr/emissions/affaires-sensibles/affaires-sensibles-07-septembre-2016',
+ 'md5': '9e54d7bdb6fdc02a841007f8a975c094',
+ 'info_dict': {
+ 'id': 'affaires-sensibles/affaires-sensibles-07-septembre-2016',
+ 'ext': 'mp3',
+ 'title': 'Affaire Cahuzac : le contentieux du compte en Suisse',
+ 'description': 'md5:401969c5d318c061f86bda1fa359292b',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'upload_date': '20160907',
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ video_url = self._search_regex(
+ r'(?s)<div[^>]+class=["\']page-diffusion["\'][^>]*>.*?<button[^>]+data-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
+ webpage, 'video url', group='url')
+
+ title = self._og_search_title(webpage)
+ description = self._og_search_description(webpage)
+ thumbnail = self._html_search_meta(['og:image', 'twitter:image'], webpage)
+
+ upload_date_str = self._search_regex(
+ r'class=["\']\s*cover-emission-period\s*["\'][^>]*>[^<]+\s+(\d{1,2}\s+[^\s]+\s+\d{4})<',
+ webpage, 'upload date', fatal=False)
+ if upload_date_str:
+ upload_date_list = upload_date_str.split()
+ upload_date_list.reverse()
+ upload_date_list[1] = '%02d' % (month_by_name(upload_date_list[1], lang='fr') or 0)
+ upload_date_list[2] = '%02d' % int(upload_date_list[2])
+ upload_date = ''.join(upload_date_list)
+ else:
+ upload_date = None
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ 'formats': [{
+ 'url': video_url,
+ 'vcodec': 'none',
+ }],
+ }
diff --git a/yt_dlp/extractor/francetv.py b/yt_dlp/extractor/francetv.py
new file mode 100644
index 0000000..7b8f7dd
--- /dev/null
+++ b/yt_dlp/extractor/francetv.py
@@ -0,0 +1,423 @@
+import re
+import urllib.parse
+
+from .common import InfoExtractor
+from .dailymotion import DailymotionIE
+from ..networking import HEADRequest
+from ..utils import (
+ determine_ext,
+ filter_dict,
+ format_field,
+ int_or_none,
+ join_nonempty,
+ parse_iso8601,
+ smuggle_url,
+ unsmuggle_url,
+ url_or_none,
+)
+from ..utils.traversal import traverse_obj
+
+
+class FranceTVBaseInfoExtractor(InfoExtractor):
+ def _make_url_result(self, video_id, url=None):
+ video_id = video_id.split('@')[0] # for compat with old @catalog IDs
+ full_id = f'francetv:{video_id}'
+ if url:
+ full_id = smuggle_url(full_id, {'hostname': urllib.parse.urlparse(url).hostname})
+ return self.url_result(full_id, FranceTVIE, video_id)
+
+
+class FranceTVIE(InfoExtractor):
+ _VALID_URL = r'francetv:(?P<id>[^@#]+)'
+ _GEO_COUNTRIES = ['FR']
+ _GEO_BYPASS = False
+
+ _TESTS = [{
+ 'url': 'francetv:ec217ecc-0733-48cf-ac06-af1347b849d1',
+ 'info_dict': {
+ 'id': 'ec217ecc-0733-48cf-ac06-af1347b849d1',
+ 'ext': 'mp4',
+ 'title': '13h15, le dimanche... - Les mystères de Jésus',
+ 'timestamp': 1502623500,
+ 'duration': 2580,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20170813',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'francetv:162311093',
+ 'only_matching': True,
+ }, {
+ 'url': 'francetv:NI_1004933@Zouzous',
+ 'only_matching': True,
+ }, {
+ 'url': 'francetv:NI_983319@Info-web',
+ 'only_matching': True,
+ }, {
+ 'url': 'francetv:NI_983319',
+ 'only_matching': True,
+ }, {
+ 'url': 'francetv:NI_657393@Regions',
+ 'only_matching': True,
+ }, {
+ # france-3 live
+ 'url': 'francetv:SIM_France3',
+ 'only_matching': True,
+ }]
+
+ def _extract_video(self, video_id, hostname=None):
+ is_live = None
+ videos = []
+ title = None
+ subtitle = None
+ episode_number = None
+ season_number = None
+ image = None
+ duration = None
+ timestamp = None
+ spritesheets = None
+
+ # desktop+chrome returns dash; mobile+safari returns hls
+ for device_type, browser in [('desktop', 'chrome'), ('mobile', 'safari')]:
+ dinfo = self._download_json(
+ f'https://k7.ftven.fr/videos/{video_id}', video_id,
+ f'Downloading {device_type} {browser} video JSON', query=filter_dict({
+ 'device_type': device_type,
+ 'browser': browser,
+ 'domain': hostname,
+ }), fatal=False)
+
+ if not dinfo:
+ continue
+
+ video = traverse_obj(dinfo, ('video', {dict}))
+ if video:
+ videos.append(video)
+ if duration is None:
+ duration = video.get('duration')
+ if is_live is None:
+ is_live = video.get('is_live')
+ if spritesheets is None:
+ spritesheets = video.get('spritesheets')
+
+ meta = traverse_obj(dinfo, ('meta', {dict}))
+ if meta:
+ if title is None:
+ title = meta.get('title')
+ # meta['pre_title'] contains season and episode number for series in format "S<ID> E<ID>"
+ season_number, episode_number = self._search_regex(
+ r'S(\d+)\s*E(\d+)', meta.get('pre_title'), 'episode info', group=(1, 2), default=(None, None))
+ if subtitle is None:
+ subtitle = meta.get('additional_title')
+ if image is None:
+ image = meta.get('image_url')
+ if timestamp is None:
+ timestamp = parse_iso8601(meta.get('broadcasted_at'))
+
+ formats, subtitles, video_url = [], {}, None
+ for video in traverse_obj(videos, lambda _, v: url_or_none(v['url'])):
+ video_url = video['url']
+ format_id = video.get('format')
+
+ if token_url := url_or_none(video.get('token')):
+ tokenized_url = traverse_obj(self._download_json(
+ token_url, video_id, f'Downloading signed {format_id} manifest URL',
+ fatal=False, query={
+ 'format': 'json',
+ 'url': video_url,
+ }), ('url', {url_or_none}))
+ if tokenized_url:
+ video_url = tokenized_url
+
+ ext = determine_ext(video_url)
+ if ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ video_url, video_id, f4m_id=format_id or ext, fatal=False))
+ elif ext == 'm3u8':
+ format_id = format_id or 'hls'
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ video_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)
+ for f in traverse_obj(fmts, lambda _, v: v['vcodec'] == 'none' and v.get('tbr') is None):
+ if mobj := re.match(rf'{format_id}-[Aa]udio-\w+-(?P<bitrate>\d+)', f['format_id']):
+ f.update({
+ 'tbr': int_or_none(mobj.group('bitrate')),
+ 'acodec': 'mp4a',
+ })
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+ elif ext == 'mpd':
+ fmts, subs = self._extract_mpd_formats_and_subtitles(
+ video_url, video_id, mpd_id=format_id or 'dash', fatal=False)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+ elif video_url.startswith('rtmp'):
+ formats.append({
+ 'url': video_url,
+ 'format_id': join_nonempty('rtmp', format_id),
+ 'ext': 'flv',
+ })
+ else:
+ if self._is_valid_url(video_url, video_id, format_id):
+ formats.append({
+ 'url': video_url,
+ 'format_id': format_id,
+ })
+
+ # XXX: what is video['captions']?
+
+ if not formats and video_url:
+ urlh = self._request_webpage(
+ HEADRequest(video_url), video_id, 'Checking for geo-restriction',
+ fatal=False, expected_status=403)
+ if urlh and urlh.headers.get('x-errortype') == 'geo':
+ self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
+
+ for f in formats:
+ if f.get('acodec') != 'none' and f.get('language') in ('qtz', 'qad'):
+ f['language_preference'] = -10
+ f['format_note'] = 'audio description%s' % format_field(f, 'format_note', ', %s')
+
+ if spritesheets:
+ formats.append({
+ 'format_id': 'spritesheets',
+ 'format_note': 'storyboard',
+ 'acodec': 'none',
+ 'vcodec': 'none',
+ 'ext': 'mhtml',
+ 'protocol': 'mhtml',
+ 'url': 'about:invalid',
+ 'fragments': [{
+ 'url': sheet,
+ # XXX: not entirely accurate; each spritesheet seems to be
+ # a 10×10 grid of thumbnails corresponding to approximately
+ # 2 seconds of the video; the last spritesheet may be shorter
+ 'duration': 200,
+ } for sheet in traverse_obj(spritesheets, (..., {url_or_none}))]
+ })
+
+ return {
+ 'id': video_id,
+ 'title': join_nonempty(title, subtitle, delim=' - ').strip(),
+ 'thumbnail': image,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'is_live': is_live,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'episode': subtitle if episode_number else None,
+ 'series': title if episode_number else None,
+ 'episode_number': int_or_none(episode_number),
+ 'season_number': int_or_none(season_number),
+ '_format_sort_fields': ('res', 'tbr', 'proto'), # prioritize m3u8 over dash
+ }
+
+ def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+ video_id = self._match_id(url)
+ hostname = smuggled_data.get('hostname') or 'www.france.tv'
+
+ return self._extract_video(video_id, hostname=hostname)
+
+
+class FranceTVSiteIE(FranceTVBaseInfoExtractor):
+ _VALID_URL = r'https?://(?:(?:www\.)?france\.tv|mobile\.france\.tv)/(?:[^/]+/)*(?P<id>[^/]+)\.html'
+
+ _TESTS = [{
+ 'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html',
+ 'info_dict': {
+ 'id': 'ec217ecc-0733-48cf-ac06-af1347b849d1',
+ 'ext': 'mp4',
+ 'title': '13h15, le dimanche... - Les mystères de Jésus',
+ 'timestamp': 1502623500,
+ 'duration': 2580,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20170813',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [FranceTVIE.ie_key()],
+ }, {
+ # geo-restricted
+ 'url': 'https://www.france.tv/enfants/six-huit-ans/foot2rue/saison-1/3066387-duel-au-vieux-port.html',
+ 'info_dict': {
+ 'id': 'a9050959-eedd-4b4a-9b0d-de6eeaa73e44',
+ 'ext': 'mp4',
+ 'title': 'Foot2Rue - Duel au vieux port',
+ 'episode': 'Duel au vieux port',
+ 'series': 'Foot2Rue',
+ 'episode_number': 1,
+ 'season_number': 1,
+ 'timestamp': 1642761360,
+ 'upload_date': '20220121',
+ 'season': 'Season 1',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 1441,
+ },
+ }, {
+ # geo-restricted livestream (workflow == 'token-akamai')
+ 'url': 'https://www.france.tv/france-4/direct.html',
+ 'info_dict': {
+ 'id': '9a6a7670-dde9-4264-adbc-55b89558594b',
+ 'ext': 'mp4',
+ 'title': r're:France 4 en direct .+',
+ 'live_status': 'is_live',
+ },
+ 'skip': 'geo-restricted livestream',
+ }, {
+ # livestream (workflow == 'dai')
+ 'url': 'https://www.france.tv/france-2/direct.html',
+ 'info_dict': {
+ 'id': '006194ea-117d-4bcf-94a9-153d999c59ae',
+ 'ext': 'mp4',
+ 'title': r're:France 2 en direct .+',
+ 'live_status': 'is_live',
+ },
+ 'params': {'skip_download': 'livestream'},
+ }, {
+ # france3
+ 'url': 'https://www.france.tv/france-3/des-chiffres-et-des-lettres/139063-emission-du-mardi-9-mai-2017.html',
+ 'only_matching': True,
+ }, {
+ # france4
+ 'url': 'https://www.france.tv/france-4/hero-corp/saison-1/134151-apres-le-calme.html',
+ 'only_matching': True,
+ }, {
+ # france5
+ 'url': 'https://www.france.tv/france-5/c-a-dire/saison-10/137013-c-a-dire.html',
+ 'only_matching': True,
+ }, {
+ # franceo
+ 'url': 'https://www.france.tv/france-o/archipels/132249-mon-ancetre-l-esclave.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.france.tv/documentaires/histoire/136517-argentine-les-500-bebes-voles-de-la-dictature.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.france.tv/jeux-et-divertissements/divertissements/133965-le-web-contre-attaque.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://mobile.france.tv/france-5/c-dans-l-air/137347-emission-du-vendredi-12-mai-2017.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.france.tv/142749-rouge-sang.html',
+ 'only_matching': True,
+ }, {
+ # france-3 live
+ 'url': 'https://www.france.tv/france-3/direct.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ video_id = self._search_regex(
+ r'(?:data-main-video\s*=|videoId["\']?\s*[:=])\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
+ webpage, 'video id', default=None, group='id')
+
+ if not video_id:
+ video_id = self._html_search_regex(
+ r'(?:href=|player\.setVideo\(\s*)"http://videos?\.francetv\.fr/video/([^@"]+@[^"]+)"',
+ webpage, 'video ID')
+
+ return self._make_url_result(video_id, url=url)
+
+
+class FranceTVInfoIE(FranceTVBaseInfoExtractor):
+ IE_NAME = 'francetvinfo.fr'
+ _VALID_URL = r'https?://(?:www|mobile|france3-regions)\.francetvinfo\.fr/(?:[^/]+/)*(?P<id>[^/?#&.]+)'
+
+ _TESTS = [{
+ 'url': 'https://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-jeudi-22-aout-2019_3561461.html',
+ 'info_dict': {
+ 'id': 'd12458ee-5062-48fe-bfdd-a30d6a01b793',
+ 'ext': 'mp4',
+ 'title': 'Soir 3',
+ 'upload_date': '20190822',
+ 'timestamp': 1566510730,
+ 'thumbnail': r're:^https?://.*\.jpe?g$',
+ 'duration': 1637,
+ 'subtitles': {
+ 'fr': 'mincount:2',
+ },
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [FranceTVIE.ie_key()],
+ }, {
+ 'note': 'Only an image exists in initial webpage instead of the video',
+ 'url': 'https://www.francetvinfo.fr/sante/maladie/coronavirus/covid-19-en-inde-une-situation-catastrophique-a-new-dehli_4381095.html',
+ 'info_dict': {
+ 'id': '7d204c9e-a2d3-11eb-9e4c-000d3a23d482',
+ 'ext': 'mp4',
+ 'title': 'Covid-19 : une situation catastrophique à New Dehli - Édition du mercredi 21 avril 2021',
+ 'thumbnail': r're:^https?://.*\.jpe?g$',
+ 'duration': 76,
+ 'timestamp': 1619028518,
+ 'upload_date': '20210421',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [FranceTVIE.ie_key()],
+ }, {
+ 'url': 'http://www.francetvinfo.fr/elections/europeennes/direct-europeennes-regardez-le-debat-entre-les-candidats-a-la-presidence-de-la-commission_600639.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.francetvinfo.fr/economie/entreprises/les-entreprises-familiales-le-secret-de-la-reussite_933271.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://france3-regions.francetvinfo.fr/bretagne/cotes-d-armor/thalassa-echappee-breizh-ce-venredi-dans-les-cotes-d-armor-954961.html',
+ 'only_matching': True,
+ }, {
+ # Dailymotion embed
+ 'url': 'http://www.francetvinfo.fr/politique/notre-dame-des-landes/video-sur-france-inter-cecile-duflot-denonce-le-regard-meprisant-de-patrick-cohen_1520091.html',
+ 'md5': 'ee7f1828f25a648addc90cb2687b1f12',
+ 'info_dict': {
+ 'id': 'x4iiko0',
+ 'ext': 'mp4',
+ 'title': 'NDDL, référendum, Brexit : Cécile Duflot répond à Patrick Cohen',
+ 'description': 'md5:fdcb582c370756293a65cdfbc6ecd90e',
+ 'timestamp': 1467011958,
+ 'uploader': 'France Inter',
+ 'uploader_id': 'x2q2ez',
+ 'upload_date': '20160627',
+ 'view_count': int,
+ 'tags': ['Politique', 'France Inter', '27 juin 2016', 'Linvité de 8h20', 'Cécile Duflot', 'Patrick Cohen'],
+ 'age_limit': 0,
+ 'duration': 640,
+ 'like_count': int,
+ 'thumbnail': r're:https://[^/?#]+/v/[^/?#]+/x1080',
+ },
+ 'add_ie': ['Dailymotion'],
+ }, {
+ 'url': 'http://france3-regions.francetvinfo.fr/limousin/emissions/jt-1213-limousin',
+ 'only_matching': True,
+ }, {
+ # "<figure id=" pattern (#28792)
+ 'url': 'https://www.francetvinfo.fr/culture/patrimoine/incendie-de-notre-dame-de-paris/notre-dame-de-paris-de-l-incendie-de-la-cathedrale-a-sa-reconstruction_4372291.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ dailymotion_urls = tuple(DailymotionIE._extract_embed_urls(url, webpage))
+ if dailymotion_urls:
+ return self.playlist_result([
+ self.url_result(dailymotion_url, DailymotionIE.ie_key())
+ for dailymotion_url in dailymotion_urls])
+
+ video_id = self._search_regex(
+ (r'player\.load[^;]+src:\s*["\']([^"\']+)',
+ r'id-video=([^@]+@[^"]+)',
+ r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"',
+ r'(?:data-id|<figure[^<]+\bid)=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'),
+ webpage, 'video id')
+
+ return self._make_url_result(video_id, url=url)
diff --git a/yt_dlp/extractor/freesound.py b/yt_dlp/extractor/freesound.py
new file mode 100644
index 0000000..fcde044
--- /dev/null
+++ b/yt_dlp/extractor/freesound.py
@@ -0,0 +1,77 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ float_or_none,
+ get_element_by_class,
+ get_element_by_id,
+ unified_strdate,
+)
+
+
+class FreesoundIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?freesound\.org/people/[^/]+/sounds/(?P<id>[^/]+)'
+ _TEST = {
+ 'url': 'http://www.freesound.org/people/miklovan/sounds/194503/',
+ 'md5': '12280ceb42c81f19a515c745eae07650',
+ 'info_dict': {
+ 'id': '194503',
+ 'ext': 'mp3',
+ 'title': 'gulls in the city.wav',
+ 'description': 'the sounds of seagulls in the city',
+ 'duration': 130.233,
+ 'uploader': 'miklovan',
+ 'upload_date': '20130715',
+ 'tags': list,
+ }
+ }
+
+ def _real_extract(self, url):
+ audio_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, audio_id)
+
+ audio_url = self._og_search_property('audio', webpage, 'song url')
+ title = self._og_search_property('audio:title', webpage, 'song title')
+
+ description = self._html_search_regex(
+ r'(?s)id=["\']sound_description["\'][^>]*>(.+?)</div>',
+ webpage, 'description', fatal=False)
+
+ duration = float_or_none(
+ get_element_by_class('duration', webpage), scale=1000)
+
+ upload_date = unified_strdate(get_element_by_id('sound_date', webpage))
+ uploader = self._og_search_property(
+ 'audio:artist', webpage, 'uploader', fatal=False)
+
+ channels = self._html_search_regex(
+ r'Channels</dt><dd>(.+?)</dd>', webpage,
+ 'channels info', fatal=False)
+
+ tags_str = get_element_by_class('tags', webpage)
+ tags = re.findall(r'<a[^>]+>([^<]+)', tags_str) if tags_str else None
+
+ audio_url = re.sub(r'^https?://freesound\.org(https?://)', r'\1', audio_url)
+ audio_urls = [audio_url]
+
+ LQ_FORMAT = '-lq.mp3'
+ if LQ_FORMAT in audio_url:
+ audio_urls.append(audio_url.replace(LQ_FORMAT, '-hq.mp3'))
+
+ formats = [{
+ 'url': format_url,
+ 'format_note': channels,
+ 'quality': quality,
+ } for quality, format_url in enumerate(audio_urls)]
+
+ return {
+ 'id': audio_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'uploader': uploader,
+ 'upload_date': upload_date,
+ 'tags': tags,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/freespeech.py b/yt_dlp/extractor/freespeech.py
new file mode 100644
index 0000000..aea5513
--- /dev/null
+++ b/yt_dlp/extractor/freespeech.py
@@ -0,0 +1,29 @@
+from .common import InfoExtractor
+from .youtube import YoutubeIE
+
+
+class FreespeechIE(InfoExtractor):
+ IE_NAME = 'freespeech.org'
+ _VALID_URL = r'https?://(?:www\.)?freespeech\.org/stories/(?P<id>.+)'
+ _TEST = {
+ 'add_ie': ['Youtube'],
+ 'url': 'http://www.freespeech.org/stories/fcc-announces-net-neutrality-rollback-whats-stake/',
+ 'info_dict': {
+ 'id': 'waRk6IPqyWM',
+ 'ext': 'mp4',
+ 'title': 'What\'s At Stake - Net Neutrality Special',
+ 'description': 'Presented by MNN and FSTV',
+ 'upload_date': '20170728',
+ 'uploader_id': 'freespeechtv',
+ 'uploader': 'freespeechtv',
+ },
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ youtube_url = self._search_regex(
+ r'data-video-url="([^"]+)"',
+ webpage, 'youtube url')
+
+ return self.url_result(youtube_url, YoutubeIE.ie_key())
diff --git a/yt_dlp/extractor/freetv.py b/yt_dlp/extractor/freetv.py
new file mode 100644
index 0000000..757a10d
--- /dev/null
+++ b/yt_dlp/extractor/freetv.py
@@ -0,0 +1,139 @@
+import itertools
+import re
+
+from .common import InfoExtractor
+from ..utils import int_or_none, traverse_obj, urlencode_postdata
+
+
+class FreeTvBaseIE(InfoExtractor):
+ def _get_api_response(self, content_id, resource_type, postdata):
+ return self._download_json(
+ 'https://www.freetv.com/wordpress/wp-admin/admin-ajax.php',
+ content_id, data=urlencode_postdata(postdata),
+ note=f'Downloading {content_id} {resource_type} JSON')['data']
+
+
+class FreeTvMoviesIE(FreeTvBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?freetv\.com/peliculas/(?P<id>[^/]+)'
+ _TESTS = [{
+ 'url': 'https://www.freetv.com/peliculas/atrapame-si-puedes/',
+ 'md5': 'dc62d5abf0514726640077cd1591aa92',
+ 'info_dict': {
+ 'id': '428021',
+ 'title': 'Atrápame Si Puedes',
+ 'description': 'md5:ca63bc00898aeb2f64ec87c6d3a5b982',
+ 'ext': 'mp4',
+ }
+ }, {
+ 'url': 'https://www.freetv.com/peliculas/monstruoso/',
+ 'md5': '509c15c68de41cb708d1f92d071f20aa',
+ 'info_dict': {
+ 'id': '377652',
+ 'title': 'Monstruoso',
+ 'description': 'md5:333fc19ee327b457b980e54a911ea4a3',
+ 'ext': 'mp4',
+ }
+ }]
+
+ def _extract_video(self, content_id, action='olyott_video_play'):
+ api_response = self._get_api_response(content_id, 'video', {
+ 'action': action,
+ 'contentID': content_id,
+ })
+
+ video_id, video_url = api_response['displayMeta']['contentID'], api_response['displayMeta']['streamURLVideo']
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_url, video_id, 'mp4')
+
+ return {
+ 'id': video_id,
+ 'title': traverse_obj(api_response, ('displayMeta', 'title')),
+ 'description': traverse_obj(api_response, ('displayMeta', 'desc')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ return self._extract_video(
+ self._search_regex((
+ r'class=["\'][^>]+postid-(?P<video_id>\d+)',
+ r'<link[^>]+freetv.com/\?p=(?P<video_id>\d+)',
+ r'<div[^>]+data-params=["\'][^>]+post_id=(?P<video_id>\d+)',
+ ), webpage, 'video id', group='video_id'))
+
+
+class FreeTvIE(FreeTvBaseIE):
+ IE_NAME = 'freetv:series'
+ _VALID_URL = r'https?://(?:www\.)?freetv\.com/series/(?P<id>[^/]+)'
+ _TESTS = [{
+ 'url': 'https://www.freetv.com/series/el-detective-l/',
+ 'info_dict': {
+ 'id': 'el-detective-l',
+ 'title': 'El Detective L',
+ 'description': 'md5:f9f1143bc33e9856ecbfcbfb97a759be'
+ },
+ 'playlist_count': 24,
+ }, {
+ 'url': 'https://www.freetv.com/series/esmeraldas/',
+ 'info_dict': {
+ 'id': 'esmeraldas',
+ 'title': 'Esmeraldas',
+ 'description': 'md5:43d7ec45bd931d8268a4f5afaf4c77bf'
+ },
+ 'playlist_count': 62,
+ }, {
+ 'url': 'https://www.freetv.com/series/las-aventuras-de-leonardo/',
+ 'info_dict': {
+ 'id': 'las-aventuras-de-leonardo',
+ 'title': 'Las Aventuras de Leonardo',
+ 'description': 'md5:0c47130846c141120a382aca059288f6'
+ },
+ 'playlist_count': 13,
+ },
+ ]
+
+ def _extract_series_season(self, season_id, series_title):
+ episodes = self._get_api_response(season_id, 'series', {
+ 'contentID': season_id,
+ 'action': 'olyott_get_dynamic_series_content',
+ 'type': 'list',
+ 'perPage': '1000',
+ })['1']
+
+ for episode in episodes:
+ video_id = str(episode['contentID'])
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(episode['streamURL'], video_id, 'mp4')
+
+ yield {
+ 'id': video_id,
+ 'title': episode.get('fullTitle'),
+ 'description': episode.get('description'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnail': episode.get('thumbnail'),
+ 'series': series_title,
+ 'series_id': traverse_obj(episode, ('contentMeta', 'displayMeta', 'seriesID')),
+ 'season_id': traverse_obj(episode, ('contentMeta', 'displayMeta', 'seasonID')),
+ 'season_number': traverse_obj(
+ episode, ('contentMeta', 'displayMeta', 'seasonNum'), expected_type=int_or_none),
+ 'episode_number': traverse_obj(
+ episode, ('contentMeta', 'displayMeta', 'episodeNum'), expected_type=int_or_none),
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ title = self._html_search_regex(
+ r'<h1[^>]+class=["\']synopis[^>]>(?P<title>[^<]+)', webpage, 'title', group='title', fatal=False)
+ description = self._html_search_regex(
+ r'<div[^>]+class=["\']+synopis content[^>]><p>(?P<description>[^<]+)',
+ webpage, 'description', group='description', fatal=False)
+
+ return self.playlist_result(
+ itertools.chain.from_iterable(
+ self._extract_series_season(season_id, title)
+ for season_id in re.findall(r'<option[^>]+value=["\'](\d+)["\']', webpage)),
+ display_id, title, description)
diff --git a/yt_dlp/extractor/frontendmasters.py b/yt_dlp/extractor/frontendmasters.py
new file mode 100644
index 0000000..3bae8ad
--- /dev/null
+++ b/yt_dlp/extractor/frontendmasters.py
@@ -0,0 +1,252 @@
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urlparse,
+)
+from ..utils import (
+ ExtractorError,
+ parse_duration,
+ url_or_none,
+ urlencode_postdata,
+)
+
+
+class FrontendMastersBaseIE(InfoExtractor):
+ _API_BASE = 'https://api.frontendmasters.com/v1/kabuki'
+ _LOGIN_URL = 'https://frontendmasters.com/login/'
+
+ _NETRC_MACHINE = 'frontendmasters'
+
+ _QUALITIES = {
+ 'low': {'width': 480, 'height': 360},
+ 'mid': {'width': 1280, 'height': 720},
+ 'high': {'width': 1920, 'height': 1080}
+ }
+
+ def _perform_login(self, username, password):
+ login_page = self._download_webpage(
+ self._LOGIN_URL, None, 'Downloading login page')
+
+ login_form = self._hidden_inputs(login_page)
+
+ login_form.update({
+ 'username': username,
+ 'password': password
+ })
+
+ post_url = self._search_regex(
+ r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
+ 'post_url', default=self._LOGIN_URL, group='url')
+
+ if not post_url.startswith('http'):
+ post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
+
+ response = self._download_webpage(
+ post_url, None, 'Logging in', data=urlencode_postdata(login_form),
+ headers={'Content-Type': 'application/x-www-form-urlencoded'})
+
+ # Successful login
+ if any(p in response for p in (
+ 'wp-login.php?action=logout', '>Logout')):
+ return
+
+ error = self._html_search_regex(
+ r'class=(["\'])(?:(?!\1).)*\bMessageAlert\b(?:(?!\1).)*\1[^>]*>(?P<error>[^<]+)<',
+ response, 'error message', default=None, group='error')
+ if error:
+ raise ExtractorError('Unable to login: %s' % error, expected=True)
+ raise ExtractorError('Unable to log in')
+
+
+class FrontendMastersPageBaseIE(FrontendMastersBaseIE):
+ def _download_course(self, course_name, url):
+ return self._download_json(
+ '%s/courses/%s' % (self._API_BASE, course_name), course_name,
+ 'Downloading course JSON', headers={'Referer': url})
+
+ @staticmethod
+ def _extract_chapters(course):
+ chapters = []
+ lesson_elements = course.get('lessonElements')
+ if isinstance(lesson_elements, list):
+ chapters = [url_or_none(e) for e in lesson_elements if url_or_none(e)]
+ return chapters
+
+ @staticmethod
+ def _extract_lesson(chapters, lesson_id, lesson):
+ title = lesson.get('title') or lesson_id
+ display_id = lesson.get('slug')
+ description = lesson.get('description')
+ thumbnail = lesson.get('thumbnail')
+
+ chapter_number = None
+ index = lesson.get('index')
+ element_index = lesson.get('elementIndex')
+ if (isinstance(index, int) and isinstance(element_index, int)
+ and index < element_index):
+ chapter_number = element_index - index
+ chapter = (chapters[chapter_number - 1]
+ if chapter_number - 1 < len(chapters) else None)
+
+ duration = None
+ timestamp = lesson.get('timestamp')
+ if isinstance(timestamp, compat_str):
+ mobj = re.search(
+ r'(?P<start>\d{1,2}:\d{1,2}:\d{1,2})\s*-(?P<end>\s*\d{1,2}:\d{1,2}:\d{1,2})',
+ timestamp)
+ if mobj:
+ duration = parse_duration(mobj.group('end')) - parse_duration(
+ mobj.group('start'))
+
+ return {
+ '_type': 'url_transparent',
+ 'url': 'frontendmasters:%s' % lesson_id,
+ 'ie_key': FrontendMastersIE.ie_key(),
+ 'id': lesson_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'chapter': chapter,
+ 'chapter_number': chapter_number,
+ }
+
+
+class FrontendMastersIE(FrontendMastersBaseIE):
+ _VALID_URL = r'(?:frontendmasters:|https?://api\.frontendmasters\.com/v\d+/kabuki/video/)(?P<id>[^/]+)'
+ _TESTS = [{
+ 'url': 'https://api.frontendmasters.com/v1/kabuki/video/a2qogef6ba',
+ 'md5': '7f161159710d6b7016a4f4af6fcb05e2',
+ 'info_dict': {
+ 'id': 'a2qogef6ba',
+ 'ext': 'mp4',
+ 'title': 'a2qogef6ba',
+ },
+ 'skip': 'Requires FrontendMasters account credentials',
+ }, {
+ 'url': 'frontendmasters:a2qogef6ba',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ lesson_id = self._match_id(url)
+
+ source_url = '%s/video/%s/source' % (self._API_BASE, lesson_id)
+
+ formats = []
+ for ext in ('webm', 'mp4'):
+ for quality in ('low', 'mid', 'high'):
+ resolution = self._QUALITIES[quality].copy()
+ format_id = '%s-%s' % (ext, quality)
+ format_url = self._download_json(
+ source_url, lesson_id,
+ 'Downloading %s source JSON' % format_id, query={
+ 'f': ext,
+ 'r': resolution['height'],
+ }, headers={
+ 'Referer': url,
+ }, fatal=False)['url']
+
+ if not format_url:
+ continue
+
+ f = resolution.copy()
+ f.update({
+ 'url': format_url,
+ 'ext': ext,
+ 'format_id': format_id,
+ })
+ formats.append(f)
+
+ subtitles = {
+ 'en': [{
+ 'url': '%s/transcripts/%s.vtt' % (self._API_BASE, lesson_id),
+ }]
+ }
+
+ return {
+ 'id': lesson_id,
+ 'title': lesson_id,
+ 'formats': formats,
+ 'subtitles': subtitles
+ }
+
+
+class FrontendMastersLessonIE(FrontendMastersPageBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?frontendmasters\.com/courses/(?P<course_name>[^/]+)/(?P<lesson_name>[^/]+)'
+ _TEST = {
+ 'url': 'https://frontendmasters.com/courses/web-development/tools',
+ 'info_dict': {
+ 'id': 'a2qogef6ba',
+ 'display_id': 'tools',
+ 'ext': 'mp4',
+ 'title': 'Tools',
+ 'description': 'md5:82c1ea6472e88ed5acd1829fe992e4f7',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'chapter': 'Introduction',
+ 'chapter_number': 1,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'Requires FrontendMasters account credentials',
+ }
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ course_name, lesson_name = mobj.group('course_name', 'lesson_name')
+
+ course = self._download_course(course_name, url)
+
+ lesson_id, lesson = next(
+ (video_id, data)
+ for video_id, data in course['lessonData'].items()
+ if data.get('slug') == lesson_name)
+
+ chapters = self._extract_chapters(course)
+ return self._extract_lesson(chapters, lesson_id, lesson)
+
+
+class FrontendMastersCourseIE(FrontendMastersPageBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?frontendmasters\.com/courses/(?P<id>[^/]+)'
+ _TEST = {
+ 'url': 'https://frontendmasters.com/courses/web-development/',
+ 'info_dict': {
+ 'id': 'web-development',
+ 'title': 'Introduction to Web Development',
+ 'description': 'md5:9317e6e842098bf725d62360e52d49a6',
+ },
+ 'playlist_count': 81,
+ 'skip': 'Requires FrontendMasters account credentials',
+ }
+
+ @classmethod
+ def suitable(cls, url):
+ return False if FrontendMastersLessonIE.suitable(url) else super(
+ FrontendMastersBaseIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ course_name = self._match_id(url)
+
+ course = self._download_course(course_name, url)
+
+ chapters = self._extract_chapters(course)
+
+ lessons = sorted(
+ course['lessonData'].values(), key=lambda data: data['index'])
+
+ entries = []
+ for lesson in lessons:
+ lesson_name = lesson.get('slug')
+ lesson_id = lesson.get('hash') or lesson.get('statsId')
+ if not lesson_id or not lesson_name:
+ continue
+ entries.append(self._extract_lesson(chapters, lesson_id, lesson))
+
+ title = course.get('title')
+ description = course.get('description')
+
+ return self.playlist_result(entries, course_name, title, description)
diff --git a/yt_dlp/extractor/fujitv.py b/yt_dlp/extractor/fujitv.py
new file mode 100644
index 0000000..77e826e
--- /dev/null
+++ b/yt_dlp/extractor/fujitv.py
@@ -0,0 +1,71 @@
+from .common import InfoExtractor
+from ..networking import HEADRequest
+
+
+class FujiTVFODPlus7IE(InfoExtractor):
+ _VALID_URL = r'https?://fod\.fujitv\.co\.jp/title/(?P<sid>[0-9a-z]{4})/(?P<id>[0-9a-z]+)'
+ _BASE_URL = 'https://i.fod.fujitv.co.jp/'
+ _BITRATE_MAP = {
+ 300: (320, 180),
+ 800: (640, 360),
+ 1200: (1280, 720),
+ 2000: (1280, 720),
+ 4000: (1920, 1080),
+ }
+
+ _TESTS = [{
+ 'url': 'https://fod.fujitv.co.jp/title/5d40/5d40110076',
+ 'info_dict': {
+ 'id': '5d40110076',
+ 'ext': 'ts',
+ 'title': '#1318 『まる子、まぼろしの洋館を見る』の巻',
+ 'series': 'ちびまる子ちゃん',
+ 'series_id': '5d40',
+ 'description': 'md5:b3f51dbfdda162ac4f789e0ff4d65750',
+ 'thumbnail': 'https://i.fod.fujitv.co.jp/img/program/5d40/episode/5d40110076_a.jpg',
+ },
+ }, {
+ 'url': 'https://fod.fujitv.co.jp/title/5d40/5d40810083',
+ 'info_dict': {
+ 'id': '5d40810083',
+ 'ext': 'ts',
+ 'title': '#1324 『まる子とオニの子』の巻/『結成!2月をムダにしない会』の巻',
+ 'description': 'md5:3972d900b896adc8ab1849e310507efa',
+ 'series': 'ちびまる子ちゃん',
+ 'series_id': '5d40',
+ 'thumbnail': 'https://i.fod.fujitv.co.jp/img/program/5d40/episode/5d40810083_a.jpg'},
+ 'skip': 'Video available only in one week'
+ }]
+
+ def _real_extract(self, url):
+ series_id, video_id = self._match_valid_url(url).groups()
+ self._request_webpage(HEADRequest(url), video_id)
+ json_info = {}
+ token = self._get_cookies(url).get('CT')
+ if token:
+ json_info = self._download_json('https://fod-sp.fujitv.co.jp/apps/api/episode/detail/?ep_id=%s&is_premium=false' % video_id, video_id, headers={'x-authorization': f'Bearer {token.value}'}, fatal=False)
+ else:
+ self.report_warning(f'The token cookie is needed to extract video metadata. {self._login_hint("cookies")}')
+ formats, subtitles = [], {}
+ src_json = self._download_json(f'{self._BASE_URL}abrjson_v2/tv_android/{video_id}', video_id)
+ for src in src_json['video_selector']:
+ if not src.get('url'):
+ continue
+ fmt, subs = self._extract_m3u8_formats_and_subtitles(src['url'], video_id, 'ts')
+ for f in fmt:
+ f.update(dict(zip(('height', 'width'),
+ self._BITRATE_MAP.get(f.get('tbr'), ()))))
+ formats.extend(fmt)
+ subtitles = self._merge_subtitles(subtitles, subs)
+
+ return {
+ 'id': video_id,
+ 'title': json_info.get('ep_title'),
+ 'series': json_info.get('lu_title'),
+ 'series_id': series_id,
+ 'description': json_info.get('ep_description'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnail': f'{self._BASE_URL}img/program/{series_id}/episode/{video_id}_a.jpg',
+ '_format_sort_fields': ('tbr', )
+ }
diff --git a/yt_dlp/extractor/funimation.py b/yt_dlp/extractor/funimation.py
new file mode 100644
index 0000000..c32f005
--- /dev/null
+++ b/yt_dlp/extractor/funimation.py
@@ -0,0 +1,349 @@
+import random
+import re
+import string
+
+from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ ExtractorError,
+ determine_ext,
+ int_or_none,
+ join_nonempty,
+ js_to_json,
+ make_archive_id,
+ orderedSet,
+ qualities,
+ str_or_none,
+ traverse_obj,
+ try_get,
+ urlencode_postdata,
+)
+
+
+class FunimationBaseIE(InfoExtractor):
+ _NETRC_MACHINE = 'funimation'
+ _REGION = None
+ _TOKEN = None
+
+ def _get_region(self):
+ region_cookie = self._get_cookies('https://www.funimation.com').get('region')
+ region = region_cookie.value if region_cookie else self.get_param('geo_bypass_country')
+ return region or traverse_obj(
+ self._download_json(
+ 'https://geo-service.prd.funimationsvc.com/geo/v1/region/check', None, fatal=False,
+ note='Checking geo-location', errnote='Unable to fetch geo-location information'),
+ 'region') or 'US'
+
+ def _perform_login(self, username, password):
+ if self._TOKEN:
+ return
+ try:
+ data = self._download_json(
+ 'https://prod-api-funimationnow.dadcdigital.com/api/auth/login/',
+ None, 'Logging in', data=urlencode_postdata({
+ 'username': username,
+ 'password': password,
+ }))
+ FunimationBaseIE._TOKEN = data['token']
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 401:
+ error = self._parse_json(e.cause.response.read().decode(), None)['error']
+ raise ExtractorError(error, expected=True)
+ raise
+
+
+class FunimationPageIE(FunimationBaseIE):
+ IE_NAME = 'funimation:page'
+ _VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/(?:(?P<lang>[^/]+)/)?(?:shows|v)/(?P<show>[^/]+)/(?P<episode>[^/?#&]+)'
+
+ _TESTS = [{
+ 'url': 'https://www.funimation.com/shows/attack-on-titan-junior-high/broadcast-dub-preview/',
+ 'info_dict': {
+ 'id': '210050',
+ 'ext': 'mp4',
+ 'title': 'Broadcast Dub Preview',
+ # Other metadata is tested in FunimationIE
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ 'add_ie': ['Funimation'],
+ }, {
+ # Not available in US
+ 'url': 'https://www.funimation.com/shows/hacksign/role-play/',
+ 'only_matching': True,
+ }, {
+ # with lang code
+ 'url': 'https://www.funimation.com/en/shows/hacksign/role-play/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.funimationnow.uk/shows/puzzle-dragons-x/drop-impact/simulcast/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.funimation.com/v/a-certain-scientific-railgun/super-powered-level-5',
+ 'only_matching': True,
+ }]
+
+ def _real_initialize(self):
+ if not self._REGION:
+ FunimationBaseIE._REGION = self._get_region()
+
+ def _real_extract(self, url):
+ locale, show, episode = self._match_valid_url(url).group('lang', 'show', 'episode')
+
+ video_id = traverse_obj(self._download_json(
+ f'https://title-api.prd.funimationsvc.com/v1/shows/{show}/episodes/{episode}',
+ f'{show}_{episode}', query={
+ 'deviceType': 'web',
+ 'region': self._REGION,
+ 'locale': locale or 'en'
+ }), ('videoList', ..., 'id'), get_all=False)
+
+ return self.url_result(f'https://www.funimation.com/player/{video_id}', FunimationIE.ie_key(), video_id)
+
+
+class FunimationIE(FunimationBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?funimation\.com/player/(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'https://www.funimation.com/player/210051',
+ 'info_dict': {
+ 'id': '210050',
+ 'display_id': 'broadcast-dub-preview',
+ 'ext': 'mp4',
+ 'title': 'Broadcast Dub Preview',
+ 'thumbnail': r're:https?://.*\.(?:jpg|png)',
+ 'episode': 'Broadcast Dub Preview',
+ 'episode_id': '210050',
+ 'season': 'Extras',
+ 'season_id': '166038',
+ 'season_number': 99,
+ 'series': 'Attack on Titan: Junior High',
+ 'description': '',
+ 'duration': 155,
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ }, {
+ 'note': 'player_id should be extracted with the relevent compat-opt',
+ 'url': 'https://www.funimation.com/player/210051',
+ 'info_dict': {
+ 'id': '210051',
+ 'display_id': 'broadcast-dub-preview',
+ 'ext': 'mp4',
+ 'title': 'Broadcast Dub Preview',
+ 'thumbnail': r're:https?://.*\.(?:jpg|png)',
+ 'episode': 'Broadcast Dub Preview',
+ 'episode_id': '210050',
+ 'season': 'Extras',
+ 'season_id': '166038',
+ 'season_number': 99,
+ 'series': 'Attack on Titan: Junior High',
+ 'description': '',
+ 'duration': 155,
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ 'compat_opts': ['seperate-video-versions'],
+ },
+ }]
+
+ @staticmethod
+ def _get_experiences(episode):
+ for lang, lang_data in episode.get('languages', {}).items():
+ for video_data in lang_data.values():
+ for version, f in video_data.items():
+ yield lang, version.title(), f
+
+ def _get_episode(self, webpage, experience_id=None, episode_id=None, fatal=True):
+ ''' Extract the episode, season and show objects given either episode/experience id '''
+ show = self._parse_json(
+ self._search_regex(
+ r'show\s*=\s*({.+?})\s*;', webpage, 'show data', fatal=fatal),
+ experience_id, transform_source=js_to_json, fatal=fatal) or []
+ for season in show.get('seasons', []):
+ for episode in season.get('episodes', []):
+ if episode_id is not None:
+ if str(episode.get('episodePk')) == episode_id:
+ return episode, season, show
+ continue
+ for _, _, f in self._get_experiences(episode):
+ if f.get('experienceId') == experience_id:
+ return episode, season, show
+ if fatal:
+ raise ExtractorError('Unable to find episode information')
+ else:
+ self.report_warning('Unable to find episode information')
+ return {}, {}, {}
+
+ def _real_extract(self, url):
+ initial_experience_id = self._match_id(url)
+ webpage = self._download_webpage(
+ url, initial_experience_id, note=f'Downloading player webpage for {initial_experience_id}')
+ episode, season, show = self._get_episode(webpage, experience_id=int(initial_experience_id))
+ episode_id = str(episode['episodePk'])
+ display_id = episode.get('slug') or episode_id
+
+ formats, subtitles, thumbnails, duration = [], {}, [], 0
+ requested_languages, requested_versions = self._configuration_arg('language'), self._configuration_arg('version')
+ language_preference = qualities((requested_languages or [''])[::-1])
+ source_preference = qualities((requested_versions or ['uncut', 'simulcast'])[::-1])
+ only_initial_experience = 'seperate-video-versions' in self.get_param('compat_opts', [])
+
+ for lang, version, fmt in self._get_experiences(episode):
+ experience_id = str(fmt['experienceId'])
+ if (only_initial_experience and experience_id != initial_experience_id
+ or requested_languages and lang.lower() not in requested_languages
+ or requested_versions and version.lower() not in requested_versions):
+ continue
+ thumbnails.append({'url': fmt.get('poster')})
+ duration = max(duration, fmt.get('duration', 0))
+ format_name = '%s %s (%s)' % (version, lang, experience_id)
+ self.extract_subtitles(
+ subtitles, experience_id, display_id=display_id, format_name=format_name,
+ episode=episode if experience_id == initial_experience_id else episode_id)
+
+ headers = {}
+ if self._TOKEN:
+ headers['Authorization'] = 'Token %s' % self._TOKEN
+ page = self._download_json(
+ 'https://www.funimation.com/api/showexperience/%s/' % experience_id,
+ display_id, headers=headers, expected_status=403, query={
+ 'pinst_id': ''.join(random.choices(string.digits + string.ascii_letters, k=8)),
+ }, note=f'Downloading {format_name} JSON')
+ sources = page.get('items') or []
+ if not sources:
+ error = try_get(page, lambda x: x['errors'][0], dict)
+ if error:
+ self.report_warning('%s said: Error %s - %s' % (
+ self.IE_NAME, error.get('code'), error.get('detail') or error.get('title')))
+ else:
+ self.report_warning('No sources found for format')
+
+ current_formats = []
+ for source in sources:
+ source_url = source.get('src')
+ source_type = source.get('videoType') or determine_ext(source_url)
+ if source_type == 'm3u8':
+ current_formats.extend(self._extract_m3u8_formats(
+ source_url, display_id, 'mp4', m3u8_id='%s-%s' % (experience_id, 'hls'), fatal=False,
+ note=f'Downloading {format_name} m3u8 information'))
+ else:
+ current_formats.append({
+ 'format_id': '%s-%s' % (experience_id, source_type),
+ 'url': source_url,
+ })
+ for f in current_formats:
+ # TODO: Convert language to code
+ f.update({
+ 'language': lang,
+ 'format_note': version,
+ 'source_preference': source_preference(version.lower()),
+ 'language_preference': language_preference(lang.lower()),
+ })
+ formats.extend(current_formats)
+ if not formats and (requested_languages or requested_versions):
+ self.raise_no_formats(
+ 'There are no video formats matching the requested languages/versions', expected=True, video_id=display_id)
+ self._remove_duplicate_formats(formats)
+
+ return {
+ 'id': episode_id,
+ '_old_archive_ids': [make_archive_id(self, initial_experience_id)],
+ 'display_id': display_id,
+ 'duration': duration,
+ 'title': episode['episodeTitle'],
+ 'description': episode.get('episodeSummary'),
+ 'episode': episode.get('episodeTitle'),
+ 'episode_number': int_or_none(episode.get('episodeId')),
+ 'episode_id': episode_id,
+ 'season': season.get('seasonTitle'),
+ 'season_number': int_or_none(season.get('seasonId')),
+ 'season_id': str_or_none(season.get('seasonPk')),
+ 'series': show.get('showTitle'),
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'subtitles': subtitles,
+ '_format_sort_fields': ('lang', 'source'),
+ }
+
+ def _get_subtitles(self, subtitles, experience_id, episode, display_id, format_name):
+ if isinstance(episode, str):
+ webpage = self._download_webpage(
+ f'https://www.funimation.com/player/{experience_id}/', display_id,
+ fatal=False, note=f'Downloading player webpage for {format_name}')
+ episode, _, _ = self._get_episode(webpage, episode_id=episode, fatal=False)
+
+ for _, version, f in self._get_experiences(episode):
+ for source in f.get('sources'):
+ for text_track in source.get('textTracks'):
+ if not text_track.get('src'):
+ continue
+ sub_type = text_track.get('type').upper()
+ sub_type = sub_type if sub_type != 'FULL' else None
+ current_sub = {
+ 'url': text_track['src'],
+ 'name': join_nonempty(version, text_track.get('label'), sub_type, delim=' ')
+ }
+ lang = join_nonempty(text_track.get('language', 'und'),
+ version if version != 'Simulcast' else None,
+ sub_type, delim='_')
+ if current_sub not in subtitles.get(lang, []):
+ subtitles.setdefault(lang, []).append(current_sub)
+ return subtitles
+
+
+class FunimationShowIE(FunimationBaseIE):
+ IE_NAME = 'funimation:show'
+ _VALID_URL = r'(?P<url>https?://(?:www\.)?funimation(?:\.com|now\.uk)/(?P<locale>[^/]+)?/?shows/(?P<id>[^/?#&]+))/?(?:[?#]|$)'
+
+ _TESTS = [{
+ 'url': 'https://www.funimation.com/en/shows/sk8-the-infinity',
+ 'info_dict': {
+ 'id': '1315000',
+ 'title': 'SK8 the Infinity'
+ },
+ 'playlist_count': 13,
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # without lang code
+ 'url': 'https://www.funimation.com/shows/ouran-high-school-host-club/',
+ 'info_dict': {
+ 'id': '39643',
+ 'title': 'Ouran High School Host Club'
+ },
+ 'playlist_count': 26,
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_initialize(self):
+ if not self._REGION:
+ FunimationBaseIE._REGION = self._get_region()
+
+ def _real_extract(self, url):
+ base_url, locale, display_id = self._match_valid_url(url).groups()
+
+ show_info = self._download_json(
+ 'https://title-api.prd.funimationsvc.com/v2/shows/%s?region=%s&deviceType=web&locale=%s'
+ % (display_id, self._REGION, locale or 'en'), display_id)
+ items_info = self._download_json(
+ 'https://prod-api-funimationnow.dadcdigital.com/api/funimation/episodes/?limit=99999&title_id=%s'
+ % show_info.get('id'), display_id)
+
+ vod_items = traverse_obj(items_info, ('items', ..., lambda k, _: re.match(r'(?i)mostRecent[AS]vod', k), 'item'))
+
+ return {
+ '_type': 'playlist',
+ 'id': str_or_none(show_info['id']),
+ 'title': show_info['name'],
+ 'entries': orderedSet(
+ self.url_result(
+ '%s/%s' % (base_url, vod_item.get('episodeSlug')), FunimationPageIE.ie_key(),
+ vod_item.get('episodeId'), vod_item.get('episodeName'))
+ for vod_item in sorted(vod_items, key=lambda x: x.get('episodeOrder', -1))),
+ }
diff --git a/yt_dlp/extractor/funk.py b/yt_dlp/extractor/funk.py
new file mode 100644
index 0000000..8bdea3f
--- /dev/null
+++ b/yt_dlp/extractor/funk.py
@@ -0,0 +1,40 @@
+from .common import InfoExtractor
+from .nexx import NexxIE
+
+
+class FunkIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.|origin\.)?funk\.net/(?:channel|playlist)/[^/]+/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.funk.net/channel/ba-793/die-lustigsten-instrumente-aus-dem-internet-teil-2-1155821',
+ 'md5': '8610449476156f338761a75391b0017d',
+ 'info_dict': {
+ 'id': '1155821',
+ 'ext': 'mp4',
+ 'title': 'Die LUSTIGSTEN INSTRUMENTE aus dem Internet - Teil 2',
+ 'description': 'md5:2a03b67596eda0d1b5125c299f45e953',
+ 'timestamp': 1514507395,
+ 'upload_date': '20171229',
+ 'duration': 426.0,
+ 'cast': ['United Creators PMB GmbH'],
+ 'thumbnail': 'https://assets.nexx.cloud/media/75/56/79/3YKUSJN1LACN0CRxL.jpg',
+ 'display_id': 'die-lustigsten-instrumente-aus-dem-internet-teil-2',
+ 'alt_title': 'Die LUSTIGSTEN INSTRUMENTE aus dem Internet Teil 2',
+ 'season_number': 0,
+ 'season': 'Season 0',
+ 'episode_number': 0,
+ 'episode': 'Episode 0',
+ },
+ }, {
+ 'url': 'https://www.funk.net/playlist/neuesteVideos/kameras-auf-dem-fusion-festival-1618699',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id, nexx_id = self._match_valid_url(url).groups()
+ return {
+ '_type': 'url_transparent',
+ 'url': f'nexx:741:{nexx_id}',
+ 'ie_key': NexxIE.ie_key(),
+ 'id': nexx_id,
+ 'display_id': display_id,
+ }
diff --git a/yt_dlp/extractor/funker530.py b/yt_dlp/extractor/funker530.py
new file mode 100644
index 0000000..62fd7f6
--- /dev/null
+++ b/yt_dlp/extractor/funker530.py
@@ -0,0 +1,80 @@
+from .common import InfoExtractor
+from .rumble import RumbleEmbedIE
+from .youtube import YoutubeIE
+from ..utils import ExtractorError, clean_html, get_element_by_class, strip_or_none
+
+
+class Funker530IE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?funker530\.com/video/(?P<id>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'https://funker530.com/video/azov-patrol-caught-in-open-under-automatic-grenade-launcher-fire/',
+ 'md5': '085f50fea27523a388bbc22e123e09c8',
+ 'info_dict': {
+ 'id': 'v2qbmu4',
+ 'ext': 'mp4',
+ 'title': 'Azov Patrol Caught In Open Under Automatic Grenade Launcher Fire',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'Funker530',
+ 'channel': 'Funker530',
+ 'channel_url': 'https://rumble.com/c/c-1199543',
+ 'width': 1280,
+ 'height': 720,
+ 'fps': 25,
+ 'duration': 27,
+ 'upload_date': '20230608',
+ 'timestamp': 1686241321,
+ 'live_status': 'not_live',
+ 'description': 'md5:bea2e1f458095414e04b5ac189c2f980',
+ }
+ }, {
+ 'url': 'https://funker530.com/video/my-friends-joined-the-russians-civdiv/',
+ 'md5': 'a42c2933391210662e93e867d7124b70',
+ 'info_dict': {
+ 'id': 'k-pk4bOvoac',
+ 'ext': 'mp4',
+ 'view_count': int,
+ 'channel': 'Civ Div',
+ 'comment_count': int,
+ 'channel_follower_count': int,
+ 'thumbnail': 'https://i.ytimg.com/vi/k-pk4bOvoac/maxresdefault.jpg',
+ 'uploader_id': '@CivDiv',
+ 'duration': 357,
+ 'channel_url': 'https://www.youtube.com/channel/UCgsCiwJ88up-YyMHo7hL5-A',
+ 'tags': [],
+ 'uploader_url': 'https://www.youtube.com/@CivDiv',
+ 'channel_id': 'UCgsCiwJ88up-YyMHo7hL5-A',
+ 'like_count': int,
+ 'description': 'md5:aef75ec3f59c07a0e39400f609b24429',
+ 'live_status': 'not_live',
+ 'age_limit': 0,
+ 'uploader': 'Civ Div',
+ 'categories': ['People & Blogs'],
+ 'title': 'My “Friends” joined the Russians.',
+ 'availability': 'public',
+ 'upload_date': '20230608',
+ 'playable_in_embed': True,
+ 'heatmap': 'count:100',
+ }
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ info = {}
+ rumble_url = list(RumbleEmbedIE._extract_embed_urls(url, webpage))
+ if rumble_url:
+ info = {'url': rumble_url[0], 'ie_key': RumbleEmbedIE.ie_key()}
+ else:
+ youtube_url = list(YoutubeIE._extract_embed_urls(url, webpage))
+ if youtube_url:
+ info = {'url': youtube_url[0], 'ie_key': YoutubeIE.ie_key()}
+ if not info:
+ raise ExtractorError('No videos found on webpage', expected=True)
+
+ return {
+ **info,
+ '_type': 'url_transparent',
+ 'description': strip_or_none(self._search_regex(
+ r'(?s)(.+)About the Author', clean_html(get_element_by_class('video-desc-paragraph', webpage)),
+ 'description', default=None))
+ }
diff --git a/yt_dlp/extractor/fuyintv.py b/yt_dlp/extractor/fuyintv.py
new file mode 100644
index 0000000..197901d
--- /dev/null
+++ b/yt_dlp/extractor/fuyintv.py
@@ -0,0 +1,30 @@
+from .common import InfoExtractor
+from ..utils import traverse_obj
+
+
+class FuyinTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?fuyin\.tv/html/(?:\d+)/(?P<id>\d+)\.html'
+ _TESTS = [{
+ 'url': 'https://www.fuyin.tv/html/2733/44129.html',
+ 'info_dict': {
+ 'id': '44129',
+ 'ext': 'mp4',
+ 'title': '第1集',
+ 'description': 'md5:21a3d238dc8d49608e1308e85044b9c3',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ json_data = self._download_json(
+ 'https://www.fuyin.tv/api/api/tv.movie/url',
+ video_id, query={'urlid': f'{video_id}'})
+ webpage = self._download_webpage(url, video_id, fatal=False)
+
+ return {
+ 'id': video_id,
+ 'title': traverse_obj(json_data, ('data', 'title')),
+ 'url': json_data['data']['url'],
+ 'ext': 'mp4',
+ 'description': self._html_search_meta('description', webpage),
+ }
diff --git a/yt_dlp/extractor/gab.py b/yt_dlp/extractor/gab.py
new file mode 100644
index 0000000..f9d22fd
--- /dev/null
+++ b/yt_dlp/extractor/gab.py
@@ -0,0 +1,140 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ int_or_none,
+ parse_codecs,
+ parse_duration,
+ str_to_int,
+ unified_timestamp
+)
+
+
+class GabTVIE(InfoExtractor):
+ _VALID_URL = r'https?://tv\.gab\.com/channel/[^/]+/view/(?P<id>[a-z0-9-]+)'
+ _TESTS = [{
+ 'url': 'https://tv.gab.com/channel/wurzelroot/view/why-was-america-in-afghanistan-61217eacea5665de450d0488',
+ 'info_dict': {
+ 'id': '61217eacea5665de450d0488',
+ 'ext': 'mp4',
+ 'title': 'WHY WAS AMERICA IN AFGHANISTAN - AMERICA FIRST AGAINST AMERICAN OLIGARCHY',
+ 'uploader': 'Wurzelroot',
+ 'uploader_id': '608fb0a85738fd1974984f7d',
+ 'thumbnail': 'https://tv.gab.com/image/61217eacea5665de450d0488',
+ }
+ }]
+
+ def _real_extract(self, url):
+ id = self._match_id(url).split('-')[-1]
+ webpage = self._download_webpage(url, id)
+ channel_id = self._search_regex(r'data-channel-id=\"(?P<channel_id>[^\"]+)', webpage, 'channel_id')
+ channel_name = self._search_regex(r'data-channel-name=\"(?P<channel_id>[^\"]+)', webpage, 'channel_name')
+ title = self._search_regex(r'data-episode-title=\"(?P<channel_id>[^\"]+)', webpage, 'title')
+ view_key = self._search_regex(r'data-view-key=\"(?P<channel_id>[^\"]+)', webpage, 'view_key')
+ description = clean_html(
+ self._html_search_regex(self._meta_regex('description'), webpage, 'description', group='content')) or None
+ available_resolutions = re.findall(r'<a\ data-episode-id=\"%s\"\ data-resolution=\"(?P<resolution>[^\"]+)' % id,
+ webpage)
+
+ formats = []
+ for resolution in available_resolutions:
+ frmt = {
+ 'url': f'https://tv.gab.com/media/{id}?viewKey={view_key}&r={resolution}',
+ 'format_id': resolution,
+ 'vcodec': 'h264',
+ 'acodec': 'aac',
+ 'ext': 'mp4'
+ }
+ if 'audio-' in resolution:
+ frmt['abr'] = str_to_int(resolution.replace('audio-', ''))
+ frmt['height'] = 144
+ frmt['quality'] = -10
+ else:
+ frmt['height'] = str_to_int(resolution.replace('p', ''))
+ formats.append(frmt)
+
+ return {
+ 'id': id,
+ 'title': title,
+ 'formats': formats,
+ 'description': description,
+ 'uploader': channel_name,
+ 'uploader_id': channel_id,
+ 'thumbnail': f'https://tv.gab.com/image/{id}',
+ }
+
+
+class GabIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?gab\.com/[^/]+/posts/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://gab.com/SomeBitchIKnow/posts/107163961867310434',
+ 'md5': '8ca34fb00f1e1033b5c5988d79ec531d',
+ 'info_dict': {
+ 'id': '107163961867310434-0',
+ 'ext': 'mp4',
+ 'title': 'L on Gab',
+ 'uploader_id': '946600',
+ 'uploader': 'SomeBitchIKnow',
+ 'description': 'md5:204055fafd5e1a519f5d6db953567ca3',
+ 'timestamp': 1635192289,
+ 'upload_date': '20211025',
+ }
+ }, {
+ 'url': 'https://gab.com/TheLonelyProud/posts/107045884469287653',
+ 'md5': 'f9cefcfdff6418e392611a828d47839d',
+ 'info_dict': {
+ 'id': '107045884469287653-0',
+ 'ext': 'mp4',
+ 'title': 'Jody Sadowski on Gab',
+ 'uploader_id': '1390705',
+ 'timestamp': 1633390571,
+ 'upload_date': '20211004',
+ 'uploader': 'TheLonelyProud',
+ }
+ }]
+
+ def _real_extract(self, url):
+ post_id = self._match_id(url)
+ json_data = self._download_json(f'https://gab.com/api/v1/statuses/{post_id}', post_id)
+
+ entries = []
+ for idx, media in enumerate(json_data['media_attachments']):
+ if media.get('type') not in ('video', 'gifv'):
+ continue
+ metadata = media['meta']
+ format_metadata = {
+ 'acodec': parse_codecs(metadata.get('audio_encode')).get('acodec'),
+ 'asr': int_or_none((metadata.get('audio_bitrate') or '').split(' ')[0]),
+ 'fps': metadata.get('fps'),
+ }
+
+ formats = [{
+ 'url': url,
+ 'width': f.get('width'),
+ 'height': f.get('height'),
+ 'tbr': int_or_none(f.get('bitrate'), scale=1000),
+ **format_metadata,
+ } for url, f in ((media.get('url'), metadata.get('original') or {}),
+ (media.get('source_mp4'), metadata.get('playable') or {})) if url]
+
+ author = json_data.get('account') or {}
+ entries.append({
+ 'id': f'{post_id}-{idx}',
+ 'title': f'{json_data["account"]["display_name"]} on Gab',
+ 'timestamp': unified_timestamp(json_data.get('created_at')),
+ 'formats': formats,
+ 'description': clean_html(json_data.get('content')),
+ 'duration': metadata.get('duration') or parse_duration(metadata.get('length')),
+ 'like_count': json_data.get('favourites_count'),
+ 'comment_count': json_data.get('replies_count'),
+ 'repost_count': json_data.get('reblogs_count'),
+ 'uploader': author.get('username'),
+ 'uploader_id': author.get('id'),
+ 'uploader_url': author.get('url'),
+ })
+
+ if len(entries) > 1:
+ return self.playlist_result(entries, post_id)
+
+ return entries[0]
diff --git a/yt_dlp/extractor/gaia.py b/yt_dlp/extractor/gaia.py
new file mode 100644
index 0000000..c84386f
--- /dev/null
+++ b/yt_dlp/extractor/gaia.py
@@ -0,0 +1,122 @@
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urllib_parse_unquote,
+)
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ str_or_none,
+ strip_or_none,
+ try_get,
+ urlencode_postdata,
+)
+
+
+class GaiaIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?gaia\.com/video/(?P<id>[^/?]+).*?\bfullplayer=(?P<type>feature|preview)'
+ _TESTS = [{
+ 'url': 'https://www.gaia.com/video/connecting-universal-consciousness?fullplayer=feature',
+ 'info_dict': {
+ 'id': '89356',
+ 'ext': 'mp4',
+ 'title': 'Connecting with Universal Consciousness',
+ 'description': 'md5:844e209ad31b7d31345f5ed689e3df6f',
+ 'upload_date': '20151116',
+ 'timestamp': 1447707266,
+ 'duration': 936,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.gaia.com/video/connecting-universal-consciousness?fullplayer=preview',
+ 'info_dict': {
+ 'id': '89351',
+ 'ext': 'mp4',
+ 'title': 'Connecting with Universal Consciousness',
+ 'description': 'md5:844e209ad31b7d31345f5ed689e3df6f',
+ 'upload_date': '20151116',
+ 'timestamp': 1447707266,
+ 'duration': 53,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }]
+ _NETRC_MACHINE = 'gaia'
+ _jwt = None
+
+ def _real_initialize(self):
+ auth = self._get_cookies('https://www.gaia.com/').get('auth')
+ if auth:
+ auth = self._parse_json(compat_urllib_parse_unquote(auth.value), None, fatal=False)
+ self._jwt = auth.get('jwt')
+
+ def _perform_login(self, username, password):
+ if self._jwt:
+ return
+ auth = self._download_json(
+ 'https://auth.gaia.com/v1/login',
+ None, data=urlencode_postdata({
+ 'username': username,
+ 'password': password
+ }))
+ if auth.get('success') is False:
+ raise ExtractorError(', '.join(auth['messages']), expected=True)
+ self._jwt = auth.get('jwt')
+
+ def _real_extract(self, url):
+ display_id, vtype = self._match_valid_url(url).groups()
+ node_id = self._download_json(
+ 'https://brooklyn.gaia.com/pathinfo', display_id, query={
+ 'path': 'video/' + display_id,
+ })['id']
+ node = self._download_json(
+ 'https://brooklyn.gaia.com/node/%d' % node_id, node_id)
+ vdata = node[vtype]
+ media_id = compat_str(vdata['nid'])
+ title = node['title']
+
+ headers = None
+ if self._jwt:
+ headers = {'Authorization': 'Bearer ' + self._jwt}
+ media = self._download_json(
+ 'https://brooklyn.gaia.com/media/' + media_id,
+ media_id, headers=headers)
+ formats = self._extract_m3u8_formats(
+ media['mediaUrls']['bcHLS'], media_id, 'mp4')
+
+ subtitles = {}
+ text_tracks = media.get('textTracks', {})
+ for key in ('captions', 'subtitles'):
+ for lang, sub_url in text_tracks.get(key, {}).items():
+ subtitles.setdefault(lang, []).append({
+ 'url': sub_url,
+ })
+
+ fivestar = node.get('fivestar', {})
+ fields = node.get('fields', {})
+
+ def get_field_value(key, value_key='value'):
+ return try_get(fields, lambda x: x[key][0][value_key])
+
+ return {
+ 'id': media_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'formats': formats,
+ 'description': strip_or_none(get_field_value('body') or get_field_value('teaser')),
+ 'timestamp': int_or_none(node.get('created')),
+ 'subtitles': subtitles,
+ 'duration': int_or_none(vdata.get('duration')),
+ 'like_count': int_or_none(try_get(fivestar, lambda x: x['up_count']['value'])),
+ 'dislike_count': int_or_none(try_get(fivestar, lambda x: x['down_count']['value'])),
+ 'comment_count': int_or_none(node.get('comment_count')),
+ 'series': try_get(node, lambda x: x['series']['title'], compat_str),
+ 'season_number': int_or_none(get_field_value('season')),
+ 'season_id': str_or_none(get_field_value('series_nid', 'nid')),
+ 'episode_number': int_or_none(get_field_value('episode')),
+ }
diff --git a/yt_dlp/extractor/gamejolt.py b/yt_dlp/extractor/gamejolt.py
new file mode 100644
index 0000000..1d3c0b1
--- /dev/null
+++ b/yt_dlp/extractor/gamejolt.py
@@ -0,0 +1,537 @@
+import itertools
+import json
+import math
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse_unquote
+from ..utils import (
+ determine_ext,
+ format_field,
+ int_or_none,
+ str_or_none,
+ traverse_obj,
+ try_get
+)
+
+
+class GameJoltBaseIE(InfoExtractor):
+ _API_BASE = 'https://gamejolt.com/site-api/'
+
+ def _call_api(self, endpoint, *args, **kwargs):
+ kwargs.setdefault('headers', {}).update({'Accept': 'image/webp,*/*'})
+ return self._download_json(self._API_BASE + endpoint, *args, **kwargs)['payload']
+
+ def _parse_content_as_text(self, content):
+ outer_contents, joined_contents = content.get('content') or [], []
+ for outer_content in outer_contents:
+ if outer_content.get('type') != 'paragraph':
+ joined_contents.append(self._parse_content_as_text(outer_content))
+ continue
+ inner_contents, inner_content_text = outer_content.get('content') or [], ''
+ for inner_content in inner_contents:
+ if inner_content.get('text'):
+ inner_content_text += inner_content['text']
+ elif inner_content.get('type') == 'hardBreak':
+ inner_content_text += '\n'
+ joined_contents.append(inner_content_text)
+
+ return '\n'.join(joined_contents)
+
+ def _get_comments(self, post_num_id, post_hash_id):
+ sort_by, scroll_id = self._configuration_arg('comment_sort', ['hot'], ie_key=GameJoltIE.ie_key())[0], -1
+ is_scrolled = sort_by in ('new', 'you')
+ for page in itertools.count(1):
+ comments_data = self._call_api(
+ 'comments/Fireside_Post/%s/%s?%s=%d' % (
+ post_num_id, sort_by,
+ 'scroll_id' if is_scrolled else 'page', scroll_id if is_scrolled else page),
+ post_hash_id, note='Downloading comments list page %d' % page)
+ if not comments_data.get('comments'):
+ break
+ for comment in traverse_obj(comments_data, (('comments', 'childComments'), ...), expected_type=dict):
+ yield {
+ 'id': comment['id'],
+ 'text': self._parse_content_as_text(
+ self._parse_json(comment['comment_content'], post_hash_id)),
+ 'timestamp': int_or_none(comment.get('posted_on'), scale=1000),
+ 'like_count': comment.get('votes'),
+ 'author': traverse_obj(comment, ('user', ('display_name', 'name')), expected_type=str_or_none, get_all=False),
+ 'author_id': traverse_obj(comment, ('user', 'username'), expected_type=str_or_none),
+ 'author_thumbnail': traverse_obj(comment, ('user', 'image_avatar'), expected_type=str_or_none),
+ 'parent': comment.get('parent_id') or None,
+ }
+ scroll_id = int_or_none(comments_data['comments'][-1].get('posted_on'))
+
+ def _parse_post(self, post_data):
+ post_id = post_data['hash']
+ lead_content = self._parse_json(post_data.get('lead_content') or '{}', post_id, fatal=False) or {}
+ description, full_description = post_data.get('leadStr') or self._parse_content_as_text(
+ self._parse_json(post_data.get('lead_content'), post_id)), None
+ if post_data.get('has_article'):
+ article_content = self._parse_json(
+ post_data.get('article_content')
+ or self._call_api(f'web/posts/article/{post_data.get("id", post_id)}', post_id,
+ note='Downloading article metadata', errnote='Unable to download article metadata', fatal=False).get('article'),
+ post_id, fatal=False)
+ full_description = self._parse_content_as_text(article_content)
+
+ user_data = post_data.get('user') or {}
+ info_dict = {
+ 'extractor_key': GameJoltIE.ie_key(),
+ 'extractor': 'GameJolt',
+ 'webpage_url': str_or_none(post_data.get('url')) or f'https://gamejolt.com/p/{post_id}',
+ 'id': post_id,
+ 'title': description,
+ 'description': full_description or description,
+ 'display_id': post_data.get('slug'),
+ 'uploader': user_data.get('display_name') or user_data.get('name'),
+ 'uploader_id': user_data.get('username'),
+ 'uploader_url': format_field(user_data, 'url', 'https://gamejolt.com%s'),
+ 'categories': [try_get(category, lambda x: '%s - %s' % (x['community']['name'], x['channel'].get('display_title') or x['channel']['title']))
+ for category in post_data.get('communities') or []],
+ 'tags': traverse_obj(
+ lead_content, ('content', ..., 'content', ..., 'marks', ..., 'attrs', 'tag'), expected_type=str_or_none),
+ 'like_count': int_or_none(post_data.get('like_count')),
+ 'comment_count': int_or_none(post_data.get('comment_count'), default=0),
+ 'timestamp': int_or_none(post_data.get('added_on'), scale=1000),
+ 'release_timestamp': int_or_none(post_data.get('published_on'), scale=1000),
+ '__post_extractor': self.extract_comments(post_data.get('id'), post_id)
+ }
+
+ # TODO: Handle multiple videos/embeds?
+ video_data = traverse_obj(post_data, ('videos', ...), expected_type=dict, get_all=False) or {}
+ formats, subtitles, thumbnails = [], {}, []
+ for media in video_data.get('media') or []:
+ media_url, mimetype, ext, media_id = media['img_url'], media.get('filetype', ''), determine_ext(media['img_url']), media.get('type')
+ if mimetype == 'application/vnd.apple.mpegurl' or ext == 'm3u8':
+ hls_formats, hls_subs = self._extract_m3u8_formats_and_subtitles(media_url, post_id, 'mp4', m3u8_id=media_id)
+ formats.extend(hls_formats)
+ subtitles.update(hls_subs)
+ elif mimetype == 'application/dash+xml' or ext == 'mpd':
+ dash_formats, dash_subs = self._extract_mpd_formats_and_subtitles(media_url, post_id, mpd_id=media_id)
+ formats.extend(dash_formats)
+ subtitles.update(dash_subs)
+ elif 'image' in mimetype:
+ thumbnails.append({
+ 'id': media_id,
+ 'url': media_url,
+ 'width': media.get('width'),
+ 'height': media.get('height'),
+ 'filesize': media.get('filesize'),
+ })
+ else:
+ formats.append({
+ 'format_id': media_id,
+ 'url': media_url,
+ 'width': media.get('width'),
+ 'height': media.get('height'),
+ 'filesize': media.get('filesize'),
+ 'acodec': 'none' if 'video-card' in media_url else None,
+ })
+
+ if formats:
+ return {
+ **info_dict,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnails': thumbnails,
+ 'view_count': int_or_none(video_data.get('view_count')),
+ }
+
+ gif_entries = []
+ for media in post_data.get('media', []):
+ if determine_ext(media['img_url']) != 'gif' or 'gif' not in media.get('filetype', ''):
+ continue
+ gif_entries.append({
+ 'id': media['hash'],
+ 'title': media['filename'].split('.')[0],
+ 'formats': [{
+ 'format_id': url_key,
+ 'url': media[url_key],
+ 'width': media.get('width') if url_key == 'img_url' else None,
+ 'height': media.get('height') if url_key == 'img_url' else None,
+ 'filesize': media.get('filesize') if url_key == 'img_url' else None,
+ 'acodec': 'none',
+ } for url_key in ('img_url', 'mediaserver_url', 'mediaserver_url_mp4', 'mediaserver_url_webm') if media.get(url_key)]
+ })
+ if gif_entries:
+ return {
+ '_type': 'playlist',
+ **info_dict,
+ 'entries': gif_entries,
+ }
+
+ embed_url = traverse_obj(post_data, ('embeds', ..., 'url'), expected_type=str_or_none, get_all=False)
+ if embed_url:
+ return self.url_result(embed_url)
+ return info_dict
+
+
+class GameJoltIE(GameJoltBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?gamejolt\.com/p/(?:[\w-]*-)?(?P<id>\w{8})'
+ _TESTS = [{
+ # No audio
+ 'url': 'https://gamejolt.com/p/introducing-ramses-jackson-some-fnf-himbo-i-ve-been-animating-fo-c6achnzu',
+ 'md5': 'cd5f733258f6678b0ce500dd88166d86',
+ 'info_dict': {
+ 'id': 'c6achnzu',
+ 'ext': 'mp4',
+ 'display_id': 'introducing-ramses-jackson-some-fnf-himbo-i-ve-been-animating-fo-c6achnzu',
+ 'title': 'Introducing Ramses Jackson, some FNF himbo I’ve been animating for the past few days, hehe.\n#fnfmod #fridaynightfunkin',
+ 'description': 'Introducing Ramses Jackson, some FNF himbo I’ve been animating for the past few days, hehe.\n#fnfmod #fridaynightfunkin',
+ 'uploader': 'Jakeneutron',
+ 'uploader_id': 'Jakeneutron',
+ 'uploader_url': 'https://gamejolt.com/@Jakeneutron',
+ 'categories': ['Friday Night Funkin\' - Videos'],
+ 'tags': ['fnfmod', 'fridaynightfunkin'],
+ 'timestamp': 1633499590,
+ 'upload_date': '20211006',
+ 'release_timestamp': 1633499655,
+ 'release_date': '20211006',
+ 'thumbnail': 're:^https?://.+wgch9mhq.png$',
+ 'like_count': int,
+ 'comment_count': int,
+ 'view_count': int,
+ }
+ }, {
+ # YouTube embed
+ 'url': 'https://gamejolt.com/p/hey-hey-if-there-s-anyone-who-s-looking-to-get-into-learning-a-n6g4jzpq',
+ 'md5': '79a931ff500a5c783ef6c3bda3272e32',
+ 'info_dict': {
+ 'id': 'XsNA_mzC0q4',
+ 'title': 'Adobe Animate CC 2021 Tutorial || Part 1 - The Basics',
+ 'description': 'md5:9d1ab9e2625b3fe1f42b2a44c67fdd13',
+ 'uploader': 'Jakeneutron',
+ 'uploader_id': 'Jakeneutron',
+ 'uploader_url': 'http://www.youtube.com/user/Jakeneutron',
+ 'ext': 'mp4',
+ 'duration': 1749,
+ 'tags': ['Adobe Animate CC', 'Tutorial', 'Animation', 'The Basics', 'For Beginners'],
+ 'like_count': int,
+ 'playable_in_embed': True,
+ 'categories': ['Education'],
+ 'availability': 'public',
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/XsNA_mzC0q4/maxresdefault.webp',
+ 'age_limit': 0,
+ 'live_status': 'not_live',
+ 'channel_url': 'https://www.youtube.com/channel/UC6_L7fnczNalFZyBthUE9oA',
+ 'channel': 'Jakeneutron',
+ 'channel_id': 'UC6_L7fnczNalFZyBthUE9oA',
+ 'upload_date': '20211015',
+ 'view_count': int,
+ 'chapters': 'count:18',
+ }
+ }, {
+ # Article
+ 'url': 'https://gamejolt.com/p/i-fuckin-broke-chaos-d56h3eue',
+ 'md5': '786c1ccf98fde02c03a2768acb4258d0',
+ 'info_dict': {
+ 'id': 'd56h3eue',
+ 'ext': 'mp4',
+ 'display_id': 'i-fuckin-broke-chaos-d56h3eue',
+ 'title': 'I fuckin broke Chaos.',
+ 'description': 'I moved my tab durning the cutscene so now it\'s stuck like this.',
+ 'uploader': 'Jeff____________',
+ 'uploader_id': 'The_Nyesh_Man',
+ 'uploader_url': 'https://gamejolt.com/@The_Nyesh_Man',
+ 'categories': ['Friday Night Funkin\' - Videos'],
+ 'timestamp': 1639800264,
+ 'upload_date': '20211218',
+ 'release_timestamp': 1639800330,
+ 'release_date': '20211218',
+ 'thumbnail': 're:^https?://.+euksy8bd.png$',
+ 'like_count': int,
+ 'comment_count': int,
+ 'view_count': int,
+ }
+ }, {
+ # Single GIF
+ 'url': 'https://gamejolt.com/p/hello-everyone-i-m-developing-a-pixel-art-style-mod-for-fnf-and-i-vs4gdrd8',
+ 'info_dict': {
+ 'id': 'vs4gdrd8',
+ 'display_id': 'hello-everyone-i-m-developing-a-pixel-art-style-mod-for-fnf-and-i-vs4gdrd8',
+ 'title': 'md5:cc3d8b031d9bc7ec2ec5a9ffc707e1f9',
+ 'description': 'md5:cc3d8b031d9bc7ec2ec5a9ffc707e1f9',
+ 'uploader': 'Quesoguy',
+ 'uploader_id': 'CheeseguyDev',
+ 'uploader_url': 'https://gamejolt.com/@CheeseguyDev',
+ 'categories': ['Game Dev - General', 'Arts n\' Crafts - Creations', 'Pixel Art - showcase',
+ 'Friday Night Funkin\' - Mods', 'Newgrounds - Friday Night Funkin (13+)'],
+ 'timestamp': 1639517122,
+ 'release_timestamp': 1639519966,
+ 'like_count': int,
+ 'comment_count': int,
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': 'dszyjnwi',
+ 'ext': 'webm',
+ 'title': 'gif-presentacion-mejorado-dszyjnwi',
+ }
+ }],
+ 'playlist_count': 1,
+ }, {
+ # Multiple GIFs
+ 'url': 'https://gamejolt.com/p/gif-yhsqkumq',
+ 'playlist_count': 35,
+ 'info_dict': {
+ 'id': 'yhsqkumq',
+ 'display_id': 'gif-yhsqkumq',
+ 'title': 'GIF',
+ 'description': 'GIF',
+ 'uploader': 'DaniilTvman',
+ 'uploader_id': 'DaniilTvman',
+ 'uploader_url': 'https://gamejolt.com/@DaniilTvman',
+ 'categories': ['Five Nights At The AGK Studio Comunity - NEWS game'],
+ 'timestamp': 1638721559,
+ 'release_timestamp': 1638722276,
+ 'like_count': int,
+ 'comment_count': int,
+ },
+ }]
+
+ def _real_extract(self, url):
+ post_id = self._match_id(url)
+ post_data = self._call_api(
+ f'web/posts/view/{post_id}', post_id)['post']
+ return self._parse_post(post_data)
+
+
+class GameJoltPostListBaseIE(GameJoltBaseIE):
+ def _entries(self, endpoint, list_id, note='Downloading post list', errnote='Unable to download post list', initial_items=[]):
+ page_num, scroll_id = 1, None
+ items = initial_items or self._call_api(endpoint, list_id, note=note, errnote=errnote)['items']
+ while items:
+ for item in items:
+ yield self._parse_post(item['action_resource_model'])
+ scroll_id = items[-1]['scroll_id']
+ page_num += 1
+ items = self._call_api(
+ endpoint, list_id, note=f'{note} page {page_num}', errnote=errnote, data=json.dumps({
+ 'scrollDirection': 'from',
+ 'scrollId': scroll_id,
+ }).encode('utf-8')).get('items')
+
+
+class GameJoltUserIE(GameJoltPostListBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?gamejolt\.com/@(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://gamejolt.com/@BlazikenSuperStar',
+ 'playlist_mincount': 1,
+ 'info_dict': {
+ 'id': '6116784',
+ 'title': 'S. Blaze',
+ 'description': 'md5:5ba7fbbb549e8ea2545aafbfe22eb03a',
+ },
+ 'params': {
+ 'ignore_no_formats_error': True,
+ },
+ 'expected_warnings': ['skipping format', 'No video formats found', 'Requested format is not available'],
+ }]
+
+ def _real_extract(self, url):
+ user_id = self._match_id(url)
+ user_data = self._call_api(
+ f'web/profile/@{user_id}', user_id, note='Downloading user info', errnote='Unable to download user info')['user']
+ bio = self._parse_content_as_text(
+ self._parse_json(user_data.get('bio_content', '{}'), user_id, fatal=False) or {})
+ return self.playlist_result(
+ self._entries(f'web/posts/fetch/user/@{user_id}?tab=active', user_id, 'Downloading user posts', 'Unable to download user posts'),
+ str_or_none(user_data.get('id')), user_data.get('display_name') or user_data.get('name'), bio)
+
+
+class GameJoltGameIE(GameJoltPostListBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?gamejolt\.com/games/[\w-]+/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://gamejolt.com/games/Friday4Fun/655124',
+ 'playlist_mincount': 2,
+ 'info_dict': {
+ 'id': '655124',
+ 'title': 'Friday Night Funkin\': Friday 4 Fun',
+ 'description': 'md5:576a7dd87912a2dcf33c50d2bd3966d3'
+ },
+ 'params': {
+ 'ignore_no_formats_error': True,
+ },
+ 'expected_warnings': ['skipping format', 'No video formats found', 'Requested format is not available'],
+ }]
+
+ def _real_extract(self, url):
+ game_id = self._match_id(url)
+ game_data = self._call_api(
+ f'web/discover/games/{game_id}', game_id, note='Downloading game info', errnote='Unable to download game info')['game']
+ description = self._parse_content_as_text(
+ self._parse_json(game_data.get('description_content', '{}'), game_id, fatal=False) or {})
+ return self.playlist_result(
+ self._entries(f'web/posts/fetch/game/{game_id}', game_id, 'Downloading game posts', 'Unable to download game posts'),
+ game_id, game_data.get('title'), description)
+
+
+class GameJoltGameSoundtrackIE(GameJoltBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?gamejolt\.com/get/soundtrack(?:\?|\#!?)(?:.*?[&;])??game=(?P<id>(?:\d+)+)'
+ _TESTS = [{
+ 'url': 'https://gamejolt.com/get/soundtrack?foo=bar&game=657899',
+ 'info_dict': {
+ 'id': '657899',
+ 'title': 'Friday Night Funkin\': Vs Oswald',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '184434',
+ 'ext': 'mp3',
+ 'title': 'Gettin\' Lucky (Menu Music)',
+ 'url': r're:^https://.+vs-oswald-menu-music\.mp3$',
+ 'release_timestamp': 1635190816,
+ 'release_date': '20211025',
+ }
+ }, {
+ 'info_dict': {
+ 'id': '184435',
+ 'ext': 'mp3',
+ 'title': 'Rabbit\'s Luck (Extended Version)',
+ 'url': r're:^https://.+rabbit-s-luck--full-version-\.mp3$',
+ 'release_timestamp': 1635190841,
+ 'release_date': '20211025',
+ }
+ }, {
+ 'info_dict': {
+ 'id': '185228',
+ 'ext': 'mp3',
+ 'title': 'Last Straw',
+ 'url': r're:^https://.+last-straw\.mp3$',
+ 'release_timestamp': 1635881104,
+ 'release_date': '20211102',
+ }
+ }],
+ 'playlist_count': 3,
+ }]
+
+ def _real_extract(self, url):
+ game_id = self._match_id(url)
+ game_overview = self._call_api(
+ f'web/discover/games/overview/{game_id}', game_id, note='Downloading soundtrack info', errnote='Unable to download soundtrack info')
+ return self.playlist_result([{
+ 'id': str_or_none(song.get('id')),
+ 'title': str_or_none(song.get('title')),
+ 'url': str_or_none(song.get('url')),
+ 'release_timestamp': int_or_none(song.get('posted_on'), scale=1000),
+ } for song in game_overview.get('songs') or []], game_id, traverse_obj(
+ game_overview, ('microdata', 'name'), (('twitter', 'fb'), 'title'), expected_type=str_or_none, get_all=False))
+
+
+class GameJoltCommunityIE(GameJoltPostListBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?gamejolt\.com/c/(?P<id>(?P<community>[\w-]+)(?:/(?P<channel>[\w-]+))?)(?:(?:\?|\#!?)(?:.*?[&;])??sort=(?P<sort>\w+))?'
+ _TESTS = [{
+ 'url': 'https://gamejolt.com/c/fnf/videos',
+ 'playlist_mincount': 50,
+ 'info_dict': {
+ 'id': 'fnf/videos',
+ 'title': 'Friday Night Funkin\' - Videos',
+ 'description': 'md5:6d8c06f27460f7d35c1554757ffe53c8'
+ },
+ 'params': {
+ 'playlistend': 50,
+ 'ignore_no_formats_error': True,
+ },
+ 'expected_warnings': ['skipping format', 'No video formats found', 'Requested format is not available'],
+ }, {
+ 'url': 'https://gamejolt.com/c/youtubers',
+ 'playlist_mincount': 50,
+ 'info_dict': {
+ 'id': 'youtubers/featured',
+ 'title': 'Youtubers - featured',
+ 'description': 'md5:53e5582c93dcc467ab597bfca4db17d4'
+ },
+ 'params': {
+ 'playlistend': 50,
+ 'ignore_no_formats_error': True,
+ },
+ 'expected_warnings': ['skipping format', 'No video formats found', 'Requested format is not available'],
+ }]
+
+ def _real_extract(self, url):
+ display_id, community_id, channel_id, sort_by = self._match_valid_url(url).group('id', 'community', 'channel', 'sort')
+ channel_id, sort_by = channel_id or 'featured', sort_by or 'new'
+
+ community_data = self._call_api(
+ f'web/communities/view/{community_id}', display_id,
+ note='Downloading community info', errnote='Unable to download community info')['community']
+ channel_data = traverse_obj(self._call_api(
+ f'web/communities/view-channel/{community_id}/{channel_id}', display_id,
+ note='Downloading channel info', errnote='Unable to download channel info', fatal=False), 'channel') or {}
+
+ title = f'{community_data.get("name") or community_id} - {channel_data.get("display_title") or channel_id}'
+ description = self._parse_content_as_text(
+ self._parse_json(community_data.get('description_content') or '{}', display_id, fatal=False) or {})
+ return self.playlist_result(
+ self._entries(
+ f'web/posts/fetch/community/{community_id}?channels[]={sort_by}&channels[]={channel_id}',
+ display_id, 'Downloading community posts', 'Unable to download community posts'),
+ f'{community_id}/{channel_id}', title, description)
+
+
+class GameJoltSearchIE(GameJoltPostListBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?gamejolt\.com/search(?:/(?P<filter>communities|users|games))?(?:\?|\#!?)(?:.*?[&;])??q=(?P<id>(?:[^&#]+)+)'
+ _URL_FORMATS = {
+ 'users': 'https://gamejolt.com/@{username}',
+ 'communities': 'https://gamejolt.com/c/{path}',
+ 'games': 'https://gamejolt.com/games/{slug}/{id}',
+ }
+ _TESTS = [{
+ 'url': 'https://gamejolt.com/search?foo=bar&q=%23fnf',
+ 'playlist_mincount': 50,
+ 'info_dict': {
+ 'id': '#fnf',
+ 'title': '#fnf',
+ },
+ 'params': {
+ 'playlistend': 50,
+ 'ignore_no_formats_error': True,
+ },
+ 'expected_warnings': ['skipping format', 'No video formats found', 'Requested format is not available'],
+ }, {
+ 'url': 'https://gamejolt.com/search/communities?q=cookie%20run',
+ 'playlist_mincount': 10,
+ 'info_dict': {
+ 'id': 'cookie run',
+ 'title': 'cookie run',
+ },
+ }, {
+ 'url': 'https://gamejolt.com/search/users?q=mlp',
+ 'playlist_mincount': 278,
+ 'info_dict': {
+ 'id': 'mlp',
+ 'title': 'mlp',
+ },
+ }, {
+ 'url': 'https://gamejolt.com/search/games?q=roblox',
+ 'playlist_mincount': 688,
+ 'info_dict': {
+ 'id': 'roblox',
+ 'title': 'roblox',
+ },
+ }]
+
+ def _search_entries(self, query, filter_mode, display_query):
+ initial_search_data = self._call_api(
+ f'web/search/{filter_mode}?q={query}', display_query,
+ note=f'Downloading {filter_mode} list', errnote=f'Unable to download {filter_mode} list')
+ entries_num = traverse_obj(initial_search_data, 'count', f'{filter_mode}Count')
+ if not entries_num:
+ return
+ for page in range(1, math.ceil(entries_num / initial_search_data['perPage']) + 1):
+ search_results = self._call_api(
+ f'web/search/{filter_mode}?q={query}&page={page}', display_query,
+ note=f'Downloading {filter_mode} list page {page}', errnote=f'Unable to download {filter_mode} list')
+ for result in search_results[filter_mode]:
+ yield self.url_result(self._URL_FORMATS[filter_mode].format(**result))
+
+ def _real_extract(self, url):
+ filter_mode, query = self._match_valid_url(url).group('filter', 'id')
+ display_query = compat_urllib_parse_unquote(query)
+ return self.playlist_result(
+ self._search_entries(query, filter_mode, display_query) if filter_mode else self._entries(
+ f'web/posts/fetch/search/{query}', display_query, initial_items=self._call_api(
+ f'web/search?q={query}', display_query,
+ note='Downloading initial post list', errnote='Unable to download initial post list')['posts']),
+ display_query, display_query)
diff --git a/yt_dlp/extractor/gamespot.py b/yt_dlp/extractor/gamespot.py
new file mode 100644
index 0000000..8dec252
--- /dev/null
+++ b/yt_dlp/extractor/gamespot.py
@@ -0,0 +1,75 @@
+from .once import OnceIE
+from ..compat import compat_urllib_parse_unquote
+
+
+class GameSpotIE(OnceIE):
+ _VALID_URL = r'https?://(?:www\.)?gamespot\.com/(?:video|article|review)s/(?:[^/]+/\d+-|embed/)(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/',
+ 'md5': 'b2a30deaa8654fcccd43713a6b6a4825',
+ 'info_dict': {
+ 'id': 'gs-2300-6410818',
+ 'ext': 'mp4',
+ 'title': 'Arma 3 - Community Guide: SITREP I',
+ 'description': 'Check out this video where some of the basics of Arma 3 is explained.',
+ },
+ 'skip': 'manifest URL give HTTP Error 404: Not Found',
+ }, {
+ 'url': 'http://www.gamespot.com/videos/the-witcher-3-wild-hunt-xbox-one-now-playing/2300-6424837/',
+ 'md5': '173ea87ad762cf5d3bf6163dceb255a6',
+ 'info_dict': {
+ 'id': 'gs-2300-6424837',
+ 'ext': 'mp4',
+ 'title': 'Now Playing - The Witcher 3: Wild Hunt',
+ 'description': 'Join us as we take a look at the early hours of The Witcher 3: Wild Hunt and more.',
+ },
+ }, {
+ 'url': 'https://www.gamespot.com/videos/embed/6439218/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.gamespot.com/articles/the-last-of-us-2-receives-new-ps4-trailer/1100-6454469/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.gamespot.com/reviews/gears-of-war-review/1900-6161188/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ page_id = self._match_id(url)
+ webpage = self._download_webpage(url, page_id)
+ data_video = self._parse_json(self._html_search_regex(
+ r'data-video=(["\'])({.*?})\1', webpage,
+ 'video data', group=2), page_id)
+ title = compat_urllib_parse_unquote(data_video['title'])
+ streams = data_video['videoStreams']
+ formats = []
+
+ m3u8_url = streams.get('adaptive_stream')
+ if m3u8_url:
+ m3u8_formats = self._extract_m3u8_formats(
+ m3u8_url, page_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False)
+ for f in m3u8_formats:
+ formats.append(f)
+ http_f = f.copy()
+ del http_f['manifest_url']
+ http_f.update({
+ 'format_id': f['format_id'].replace('hls-', 'http-'),
+ 'protocol': 'http',
+ 'url': f['url'].replace('.m3u8', '.mp4'),
+ })
+ formats.append(http_f)
+
+ mpd_url = streams.get('adaptive_dash')
+ if mpd_url:
+ formats.extend(self._extract_mpd_formats(
+ mpd_url, page_id, mpd_id='dash', fatal=False))
+
+ return {
+ 'id': data_video.get('guid') or page_id,
+ 'display_id': page_id,
+ 'title': title,
+ 'formats': formats,
+ 'description': self._html_search_meta('description', webpage),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ }
diff --git a/yt_dlp/extractor/gamestar.py b/yt_dlp/extractor/gamestar.py
new file mode 100644
index 0000000..e9966f5
--- /dev/null
+++ b/yt_dlp/extractor/gamestar.py
@@ -0,0 +1,60 @@
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ remove_end,
+)
+
+
+class GameStarIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?game(?P<site>pro|star)\.de/videos/.*,(?P<id>[0-9]+)\.html'
+ _TESTS = [{
+ 'url': 'http://www.gamestar.de/videos/trailer,3/hobbit-3-die-schlacht-der-fuenf-heere,76110.html',
+ 'md5': 'ee782f1f8050448c95c5cacd63bc851c',
+ 'info_dict': {
+ 'id': '76110',
+ 'ext': 'mp4',
+ 'title': 'Hobbit 3: Die Schlacht der Fünf Heere - Teaser-Trailer zum dritten Teil',
+ 'description': 'Der Teaser-Trailer zu Hobbit 3: Die Schlacht der Fünf Heere zeigt einige Szenen aus dem dritten Teil der Saga und kündigt den...',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1406542380,
+ 'upload_date': '20140728',
+ 'duration': 17,
+ }
+ }, {
+ 'url': 'http://www.gamepro.de/videos/top-10-indie-spiele-fuer-nintendo-switch-video-tolle-nindies-games-zum-download,95316.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.gamestar.de/videos/top-10-indie-spiele-fuer-nintendo-switch-video-tolle-nindies-games-zum-download,95316.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ site = mobj.group('site')
+ video_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, video_id)
+
+ # TODO: there are multiple ld+json objects in the webpage,
+ # while _search_json_ld finds only the first one
+ json_ld = self._parse_json(self._search_regex(
+ r'(?s)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>[^<]+VideoObject[^<]+)</script>',
+ webpage, 'JSON-LD', group='json_ld'), video_id)
+ info_dict = self._json_ld(json_ld, video_id)
+ info_dict['title'] = remove_end(
+ info_dict['title'], ' - Game%s' % site.title())
+
+ view_count = int_or_none(json_ld.get('interactionCount'))
+ comment_count = int_or_none(self._html_search_regex(
+ r'<span>Kommentare</span>\s*<span[^>]+class=["\']count[^>]+>\s*\(\s*([0-9]+)',
+ webpage, 'comment count', fatal=False))
+
+ info_dict.update({
+ 'id': video_id,
+ 'url': 'http://gamestar.de/_misc/videos/portal/getVideoUrl.cfm?premium=0&videoId=' + video_id,
+ 'ext': 'mp4',
+ 'view_count': view_count,
+ 'comment_count': comment_count
+ })
+
+ return info_dict
diff --git a/yt_dlp/extractor/gaskrank.py b/yt_dlp/extractor/gaskrank.py
new file mode 100644
index 0000000..bc56b03
--- /dev/null
+++ b/yt_dlp/extractor/gaskrank.py
@@ -0,0 +1,96 @@
+import re
+from .common import InfoExtractor
+from ..utils import (
+ float_or_none,
+ int_or_none,
+ unified_strdate,
+)
+
+
+class GaskrankIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?gaskrank\.tv/tv/(?P<categories>[^/]+)/(?P<id>[^/]+)\.htm'
+ _TESTS = [{
+ 'url': 'http://www.gaskrank.tv/tv/motorrad-fun/strike-einparken-durch-anfaenger-crash-mit-groesserem-flurschaden.htm',
+ 'md5': '1ae88dbac97887d85ebd1157a95fc4f9',
+ 'info_dict': {
+ 'id': '201601/26955',
+ 'ext': 'mp4',
+ 'title': 'Strike! Einparken können nur Männer - Flurschaden hält sich in Grenzen *lol*',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'categories': ['motorrad-fun'],
+ 'display_id': 'strike-einparken-durch-anfaenger-crash-mit-groesserem-flurschaden',
+ 'uploader_id': 'Bikefun',
+ 'upload_date': '20170110',
+ }
+ }, {
+ 'url': 'http://www.gaskrank.tv/tv/racing/isle-of-man-tt-2011-michael-du-15920.htm',
+ 'md5': 'c33ee32c711bc6c8224bfcbe62b23095',
+ 'info_dict': {
+ 'id': '201106/15920',
+ 'ext': 'mp4',
+ 'title': 'Isle of Man - Michael Dunlop vs Guy Martin - schwindelig kucken',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'categories': ['racing'],
+ 'display_id': 'isle-of-man-tt-2011-michael-du-15920',
+ 'uploader_id': 'IOM',
+ 'upload_date': '20170523',
+ 'uploader_url': 'www.iomtt.com',
+ }
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ title = self._og_search_title(
+ webpage, default=None) or self._html_search_meta(
+ 'title', webpage, fatal=True)
+
+ categories = [self._match_valid_url(url).group('categories')]
+
+ mobj = re.search(
+ r'Video von:\s*(?P<uploader_id>[^|]*?)\s*\|\s*vom:\s*(?P<upload_date>[0-9][0-9]\.[0-9][0-9]\.[0-9][0-9][0-9][0-9])',
+ webpage)
+ if mobj is not None:
+ uploader_id = mobj.groupdict().get('uploader_id')
+ upload_date = unified_strdate(mobj.groupdict().get('upload_date'))
+
+ uploader_url = self._search_regex(
+ r'Homepage:\s*<[^>]*>(?P<uploader_url>[^<]*)',
+ webpage, 'uploader_url', default=None)
+ tags = re.findall(
+ r'/tv/tags/[^/]+/"\s*>(?P<tag>[^<]*?)<',
+ webpage)
+
+ view_count = self._search_regex(
+ r'class\s*=\s*"gkRight"(?:[^>]*>\s*<[^>]*)*icon-eye-open(?:[^>]*>\s*<[^>]*)*>\s*(?P<view_count>[0-9\.]*)',
+ webpage, 'view_count', default=None)
+ if view_count:
+ view_count = int_or_none(view_count.replace('.', ''))
+
+ average_rating = self._search_regex(
+ r'itemprop\s*=\s*"ratingValue"[^>]*>\s*(?P<average_rating>[0-9,]+)',
+ webpage, 'average_rating')
+ if average_rating:
+ average_rating = float_or_none(average_rating.replace(',', '.'))
+
+ video_id = self._search_regex(
+ r'https?://movies\.gaskrank\.tv/([^-]*?)(-[^\.]*)?\.mp4',
+ webpage, 'video id', default=display_id)
+
+ entry = self._parse_html5_media_entries(url, webpage, video_id)[0]
+ entry.update({
+ 'id': video_id,
+ 'title': title,
+ 'categories': categories,
+ 'display_id': display_id,
+ 'uploader_id': uploader_id,
+ 'upload_date': upload_date,
+ 'uploader_url': uploader_url,
+ 'tags': tags,
+ 'view_count': view_count,
+ 'average_rating': average_rating,
+ })
+
+ return entry
diff --git a/yt_dlp/extractor/gazeta.py b/yt_dlp/extractor/gazeta.py
new file mode 100644
index 0000000..8925b69
--- /dev/null
+++ b/yt_dlp/extractor/gazeta.py
@@ -0,0 +1,44 @@
+from .common import InfoExtractor
+
+
+class GazetaIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'(?P<url>https?://(?:www\.)?gazeta\.ru/(?:[^/]+/)?video/(?:main/)*(?:\d{4}/\d{2}/\d{2}/)?(?P<id>[A-Za-z0-9-_.]+)\.s?html)'
+ _TESTS = [{
+ 'url': 'http://www.gazeta.ru/video/main/zadaite_vopros_vladislavu_yurevichu.shtml',
+ 'md5': 'd49c9bdc6e5a7888f27475dc215ee789',
+ 'info_dict': {
+ 'id': '205566',
+ 'ext': 'mp4',
+ 'title': '«70–80 процентов гражданских в Донецке на грани голода»',
+ 'description': 'md5:38617526050bd17b234728e7f9620a71',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ },
+ 'skip': 'video not found',
+ }, {
+ 'url': 'http://www.gazeta.ru/lifestyle/video/2015/03/08/master-klass_krasivoi_byt._delaem_vesennii_makiyazh.shtml',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.gazeta.ru/video/main/main/2015/06/22/platit_ili_ne_platit_po_isku_yukosa.shtml',
+ 'md5': '37f19f78355eb2f4256ee1688359f24c',
+ 'info_dict': {
+ 'id': '252048',
+ 'ext': 'mp4',
+ 'title': '"Если по иску ЮКОСа придется платить, это будет большой удар по бюджету"',
+ },
+ 'add_ie': ['EaglePlatform'],
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+
+ display_id = mobj.group('id')
+ embed_url = '%s?p=embed' % mobj.group('url')
+ embed_page = self._download_webpage(
+ embed_url, display_id, 'Downloading embed page')
+
+ video_id = self._search_regex(
+ r'<div[^>]*?class="eagleplayer"[^>]*?data-id="([^"]+)"', embed_page, 'video id')
+
+ return self.url_result(
+ 'eagleplatform:gazeta.media.eagleplatform.com:%s' % video_id, 'EaglePlatform')
diff --git a/yt_dlp/extractor/gdcvault.py b/yt_dlp/extractor/gdcvault.py
new file mode 100644
index 0000000..b4d81b2
--- /dev/null
+++ b/yt_dlp/extractor/gdcvault.py
@@ -0,0 +1,214 @@
+import re
+
+from .common import InfoExtractor
+from .kaltura import KalturaIE
+from ..networking import HEADRequest, Request
+from ..utils import remove_start, smuggle_url, urlencode_postdata
+
+
+class GDCVaultIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P<id>\d+)(?:/(?P<name>[\w-]+))?'
+ _NETRC_MACHINE = 'gdcvault'
+ _TESTS = [
+ {
+ 'url': 'http://www.gdcvault.com/play/1019721/Doki-Doki-Universe-Sweet-Simple',
+ 'md5': '7ce8388f544c88b7ac11c7ab1b593704',
+ 'info_dict': {
+ 'id': '201311826596_AWNY',
+ 'display_id': 'Doki-Doki-Universe-Sweet-Simple',
+ 'ext': 'mp4',
+ 'title': 'Doki-Doki Universe: Sweet, Simple and Genuine (GDC Next 10)'
+ }
+ },
+ {
+ 'url': 'http://www.gdcvault.com/play/1015683/Embracing-the-Dark-Art-of',
+ 'info_dict': {
+ 'id': '201203272_1330951438328RSXR',
+ 'display_id': 'Embracing-the-Dark-Art-of',
+ 'ext': 'flv',
+ 'title': 'Embracing the Dark Art of Mathematical Modeling in AI'
+ },
+ 'params': {
+ 'skip_download': True, # Requires rtmpdump
+ }
+ },
+ {
+ 'url': 'http://www.gdcvault.com/play/1015301/Thexder-Meets-Windows-95-or',
+ 'md5': 'a5eb77996ef82118afbbe8e48731b98e',
+ 'info_dict': {
+ 'id': '1015301',
+ 'display_id': 'Thexder-Meets-Windows-95-or',
+ 'ext': 'flv',
+ 'title': 'Thexder Meets Windows 95, or Writing Great Games in the Windows 95 Environment',
+ },
+ 'skip': 'Requires login',
+ },
+ {
+ 'url': 'http://gdcvault.com/play/1020791/',
+ 'only_matching': True,
+ },
+ {
+ # Hard-coded hostname
+ 'url': 'http://gdcvault.com/play/1023460/Tenacious-Design-and-The-Interface',
+ 'md5': 'a8efb6c31ed06ca8739294960b2dbabd',
+ 'info_dict': {
+ 'id': '840376_BQRC',
+ 'ext': 'mp4',
+ 'display_id': 'Tenacious-Design-and-The-Interface',
+ 'title': 'Tenacious Design and The Interface of \'Destiny\'',
+ },
+ },
+ {
+ # Multiple audios
+ 'url': 'http://www.gdcvault.com/play/1014631/Classic-Game-Postmortem-PAC',
+ 'info_dict': {
+ 'id': '12396_1299111843500GMPX',
+ 'ext': 'mp4',
+ 'title': 'How to Create a Good Game - From My Experience of Designing Pac-Man',
+ },
+ # 'params': {
+ # 'skip_download': True, # Requires rtmpdump
+ # 'format': 'jp', # The japanese audio
+ # }
+ },
+ {
+ # gdc-player.html
+ 'url': 'http://www.gdcvault.com/play/1435/An-American-engine-in-Tokyo',
+ 'info_dict': {
+ 'id': '9350_1238021887562UHXB',
+ 'display_id': 'An-American-engine-in-Tokyo',
+ 'ext': 'mp4',
+ 'title': 'An American Engine in Tokyo:/nThe collaboration of Epic Games and Square Enix/nFor THE LAST REMINANT',
+ },
+ },
+ {
+ # Kaltura Embed
+ 'url': 'https://www.gdcvault.com/play/1026180/Mastering-the-Apex-of-Scaling',
+ 'info_dict': {
+ 'id': '0_h1fg8j3p',
+ 'ext': 'mp4',
+ 'title': 'Mastering the Apex of Scaling Game Servers (Presented by Multiplay)',
+ 'timestamp': 1554401811,
+ 'upload_date': '20190404',
+ 'uploader_id': 'joe@blazestreaming.com',
+ },
+ 'params': {
+ 'format': 'mp4-408',
+ },
+ },
+ {
+ # Kaltura embed, whitespace between quote and embedded URL in iframe's src
+ 'url': 'https://www.gdcvault.com/play/1025699',
+ 'info_dict': {
+ 'id': '0_zagynv0a',
+ 'ext': 'mp4',
+ 'title': 'Tech Toolbox',
+ 'upload_date': '20190408',
+ 'uploader_id': 'joe@blazestreaming.com',
+ 'timestamp': 1554764629,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # HTML5 video
+ 'url': 'http://www.gdcvault.com/play/1014846/Conference-Keynote-Shigeru',
+ 'only_matching': True,
+ },
+ ]
+
+ def _login(self, webpage_url, display_id):
+ username, password = self._get_login_info()
+ if username is None or password is None:
+ self.report_warning('It looks like ' + webpage_url + ' requires a login. Try specifying a username and password and try again.')
+ return None
+
+ mobj = re.match(r'(?P<root_url>https?://.*?/).*', webpage_url)
+ login_url = mobj.group('root_url') + 'api/login.php'
+ logout_url = mobj.group('root_url') + 'logout'
+
+ login_form = {
+ 'email': username,
+ 'password': password,
+ }
+
+ request = Request(login_url, urlencode_postdata(login_form))
+ request.headers['Content-Type'] = 'application/x-www-form-urlencoded'
+ self._download_webpage(request, display_id, 'Logging in')
+ start_page = self._download_webpage(webpage_url, display_id, 'Getting authenticated video page')
+ self._download_webpage(logout_url, display_id, 'Logging out')
+
+ return start_page
+
+ def _real_extract(self, url):
+ video_id, name = self._match_valid_url(url).groups()
+ display_id = name or video_id
+
+ webpage_url = 'http://www.gdcvault.com/play/' + video_id
+ start_page = self._download_webpage(webpage_url, display_id)
+
+ direct_url = self._search_regex(
+ r's1\.addVariable\("file",\s*encodeURIComponent\("(/[^"]+)"\)\);',
+ start_page, 'url', default=None)
+ if direct_url:
+ title = self._html_search_regex(
+ r'<td><strong>Session Name:?</strong></td>\s*<td>(.*?)</td>',
+ start_page, 'title')
+ video_url = 'http://www.gdcvault.com' + direct_url
+ # resolve the url so that we can detect the correct extension
+ video_url = self._request_webpage(
+ HEADRequest(video_url), video_id).url
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'url': video_url,
+ 'title': title,
+ }
+
+ embed_url = KalturaIE._extract_url(start_page)
+ if embed_url:
+ embed_url = smuggle_url(embed_url, {'source_url': url})
+ ie_key = 'Kaltura'
+ else:
+ PLAYER_REGEX = r'<iframe src="(?P<xml_root>.+?)/(?:gdc-)?player.*?\.html.*?".*?</iframe>'
+
+ xml_root = self._html_search_regex(
+ PLAYER_REGEX, start_page, 'xml root', default=None)
+ if xml_root is None:
+ # Probably need to authenticate
+ login_res = self._login(webpage_url, display_id)
+ if login_res is None:
+ self.report_warning('Could not login.')
+ else:
+ start_page = login_res
+ # Grab the url from the authenticated page
+ xml_root = self._html_search_regex(
+ PLAYER_REGEX, start_page, 'xml root')
+
+ xml_name = self._html_search_regex(
+ r'<iframe src=".*?\?xml(?:=|URL=xml/)(.+?\.xml).*?".*?</iframe>',
+ start_page, 'xml filename', default=None)
+ if not xml_name:
+ info = self._parse_html5_media_entries(url, start_page, video_id)[0]
+ info.update({
+ 'title': remove_start(self._search_regex(
+ r'>Session Name:\s*<.*?>\s*<td>(.+?)</td>', start_page,
+ 'title', default=None) or self._og_search_title(
+ start_page, default=None), 'GDC Vault - '),
+ 'id': video_id,
+ 'display_id': display_id,
+ })
+ return info
+ embed_url = '%s/xml/%s' % (xml_root, xml_name)
+ ie_key = 'DigitallySpeaking'
+
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'display_id': display_id,
+ 'url': embed_url,
+ 'ie_key': ie_key,
+ }
diff --git a/yt_dlp/extractor/gedidigital.py b/yt_dlp/extractor/gedidigital.py
new file mode 100644
index 0000000..1878d63
--- /dev/null
+++ b/yt_dlp/extractor/gedidigital.py
@@ -0,0 +1,198 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ base_url,
+ determine_ext,
+ int_or_none,
+ url_basename,
+ urljoin,
+)
+
+
+class GediDigitalIE(InfoExtractor):
+ _VALID_URL = r'''(?x:(?P<base_url>(?:https?:)//video\.
+ (?:
+ (?:
+ (?:espresso\.)?repubblica
+ |lastampa
+ |ilsecoloxix
+ |huffingtonpost
+ )|
+ (?:
+ iltirreno
+ |messaggeroveneto
+ |ilpiccolo
+ |gazzettadimantova
+ |mattinopadova
+ |laprovinciapavese
+ |tribunatreviso
+ |nuovavenezia
+ |gazzettadimodena
+ |lanuovaferrara
+ |corrierealpi
+ |lasentinella
+ )\.gelocal
+ )\.it(?:/[^/]+){2,4}/(?P<id>\d+))(?:$|[?&].*))'''
+ _EMBED_REGEX = [rf'''(?x)
+ (?:
+ data-frame-src=|
+ <iframe[^\n]+src=
+ )
+ (["'])(?P<url>{_VALID_URL})\1''']
+ _TESTS = [{
+ 'url': 'https://video.lastampa.it/politica/il-paradosso-delle-regionali-la-lega-vince-ma-sembra-aver-perso/121559/121683',
+ 'md5': '84658d7fb9e55a6e57ecc77b73137494',
+ 'info_dict': {
+ 'id': '121683',
+ 'ext': 'mp4',
+ 'title': 'Il paradosso delle Regionali: ecco perché la Lega vince ma sembra aver perso',
+ 'description': 'md5:de7f4d6eaaaf36c153b599b10f8ce7ca',
+ 'thumbnail': r're:^https://www\.repstatic\.it/video/photo/.+?-thumb-full-.+?\.jpg$',
+ 'duration': 125,
+ },
+ }, {
+ 'url': 'https://video.huffingtonpost.it/embed/politica/cotticelli-non-so-cosa-mi-sia-successo-sto-cercando-di-capire-se-ho-avuto-un-malore/29312/29276?responsive=true&el=video971040871621586700',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.espresso.repubblica.it/embed/tutti-i-video/01-ted-villa/14772/14870&width=640&height=360',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.repubblica.it/motori/record-della-pista-a-spa-francorchamps-la-pagani-huayra-roadster-bc-stupisce/367415/367963',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.ilsecoloxix.it/sport/cassani-e-i-brividi-azzurri-ai-mondiali-di-imola-qui-mi-sono-innamorato-del-ciclismo-da-ragazzino-incredibile-tornarci-da-ct/66184/66267',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.iltirreno.gelocal.it/sport/dentro-la-notizia-ferrari-cosa-succede-a-maranello/141059/142723',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.messaggeroveneto.gelocal.it/locale/maria-giovanna-elmi-covid-vaccino/138155/139268',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.ilpiccolo.gelocal.it/dossier/big-john/dinosauro-big-john-al-via-le-visite-guidate-a-trieste/135226/135751',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.gazzettadimantova.gelocal.it/locale/dal-ponte-visconteo-di-valeggio-l-and-8217sos-dei-ristoratori-aprire-anche-a-cena/137310/137818',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.mattinopadova.gelocal.it/dossier/coronavirus-in-veneto/covid-a-vo-un-anno-dopo-un-cuore-tricolore-per-non-dimenticare/138402/138964',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.laprovinciapavese.gelocal.it/locale/mede-zona-rossa-via-alle-vaccinazioni-per-gli-over-80/137545/138120',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.tribunatreviso.gelocal.it/dossier/coronavirus-in-veneto/ecco-le-prima-vaccinazioni-di-massa-nella-marca/134485/135024',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.nuovavenezia.gelocal.it/locale/camion-troppo-alto-per-il-ponte-ferroviario-perde-il-carico/135734/136266',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.gazzettadimodena.gelocal.it/locale/modena-scoperta-la-proteina-che-predice-il-livello-di-gravita-del-covid/139109/139796',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.lanuovaferrara.gelocal.it/locale/due-bombole-di-gpl-aperte-e-abbandonate-i-vigili-bruciano-il-gas/134391/134957',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.corrierealpi.gelocal.it/dossier/cortina-2021-i-mondiali-di-sci-alpino/mondiali-di-sci-il-timelapse-sulla-splendida-olympia/133760/134331',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.lasentinella.gelocal.it/locale/vestigne-centra-un-auto-e-si-ribalta/138931/139466',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.espresso.repubblica.it/tutti-i-video/01-ted-villa/14772',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _sanitize_urls(urls):
+ # add protocol if missing
+ for i, e in enumerate(urls):
+ if e.startswith('//'):
+ urls[i] = 'https:%s' % e
+ # clean iframes urls
+ for i, e in enumerate(urls):
+ urls[i] = urljoin(base_url(e), url_basename(e))
+ return urls
+
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ return cls._sanitize_urls(tuple(super()._extract_embed_urls(url, webpage)))
+
+ @staticmethod
+ def _clean_formats(formats):
+ format_urls = set()
+ clean_formats = []
+ for f in formats:
+ if f['url'] not in format_urls:
+ if f.get('audio_ext') != 'none' and not f.get('acodec'):
+ continue
+ format_urls.add(f['url'])
+ clean_formats.append(f)
+ formats[:] = clean_formats
+
+ def _real_extract(self, url):
+ video_id, url = self._match_valid_url(url).group('id', 'base_url')
+ webpage = self._download_webpage(url, video_id)
+ title = self._html_search_meta(
+ ['twitter:title', 'og:title'], webpage, fatal=True)
+ player_data = re.findall(
+ r"PlayerFactory\.setParam\('(?P<type>format|param)',\s*'(?P<name>[^']+)',\s*'(?P<val>[^']+)'\);",
+ webpage)
+
+ formats = []
+ duration = thumb = None
+ for t, n, v in player_data:
+ if t == 'format':
+ if n in ('video-hds-vod-ec', 'video-hls-vod-ec', 'video-viralize', 'video-youtube-pfp'):
+ continue
+ elif n.endswith('-vod-ak'):
+ formats.extend(self._extract_akamai_formats(
+ v, video_id, {'http': 'media.gedidigital.it'}))
+ else:
+ ext = determine_ext(v)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ v, video_id, 'mp4', 'm3u8_native', m3u8_id=n, fatal=False))
+ continue
+ f = {
+ 'format_id': n,
+ 'url': v,
+ }
+ if ext == 'mp3':
+ abr = int_or_none(self._search_regex(
+ r'-mp3-audio-(\d+)', v, 'abr', default=None))
+ f.update({
+ 'abr': abr,
+ 'tbr': abr,
+ 'acodec': ext,
+ 'vcodec': 'none'
+ })
+ else:
+ mobj = re.match(r'^video-rrtv-(\d+)(?:-(\d+))?$', n)
+ if mobj:
+ f.update({
+ 'height': int(mobj.group(1)),
+ 'vbr': int_or_none(mobj.group(2)),
+ })
+ if not f.get('vbr'):
+ f['vbr'] = int_or_none(self._search_regex(
+ r'-video-rrtv-(\d+)', v, 'abr', default=None))
+ formats.append(f)
+ elif t == 'param':
+ if n in ['image_full', 'image']:
+ thumb = v
+ elif n == 'videoDuration':
+ duration = int_or_none(v)
+
+ self._clean_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': self._html_search_meta(
+ ['twitter:description', 'og:description', 'description'], webpage),
+ 'thumbnail': thumb or self._og_search_thumbnail(webpage),
+ 'formats': formats,
+ 'duration': duration,
+ }
diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py
new file mode 100644
index 0000000..9d82515
--- /dev/null
+++ b/yt_dlp/extractor/generic.py
@@ -0,0 +1,2849 @@
+import os
+import re
+import types
+import urllib.parse
+import xml.etree.ElementTree
+
+from .common import InfoExtractor # isort: split
+from .commonprotocols import RtmpIE
+from .youtube import YoutubeIE
+from ..compat import compat_etree_fromstring
+from ..utils import (
+ KNOWN_EXTENSIONS,
+ MEDIA_EXTENSIONS,
+ ExtractorError,
+ UnsupportedError,
+ determine_ext,
+ determine_protocol,
+ dict_get,
+ extract_basic_auth,
+ filter_dict,
+ format_field,
+ int_or_none,
+ is_html,
+ js_to_json,
+ merge_dicts,
+ mimetype2ext,
+ orderedSet,
+ parse_duration,
+ parse_resolution,
+ smuggle_url,
+ str_or_none,
+ traverse_obj,
+ try_call,
+ unescapeHTML,
+ unified_timestamp,
+ unsmuggle_url,
+ update_url_query,
+ url_or_none,
+ urlhandle_detect_ext,
+ urljoin,
+ variadic,
+ xpath_attr,
+ xpath_text,
+ xpath_with_ns,
+)
+
+
+class GenericIE(InfoExtractor):
+ IE_DESC = 'Generic downloader that works on some sites'
+ _VALID_URL = r'.*'
+ IE_NAME = 'generic'
+ _NETRC_MACHINE = False # Suppress username warning
+ _TESTS = [
+ # Direct link to a video
+ {
+ 'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
+ 'md5': '67d406c2bcb6af27fa886f31aa934bbe',
+ 'info_dict': {
+ 'id': 'trailer',
+ 'ext': 'mp4',
+ 'title': 'trailer',
+ 'upload_date': '20100513',
+ 'direct': True,
+ 'timestamp': 1273772943.0,
+ }
+ },
+ # Direct link to media delivered compressed (until Accept-Encoding is *)
+ {
+ 'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
+ 'md5': '128c42e68b13950268b648275386fc74',
+ 'info_dict': {
+ 'id': 'FictionJunction-Parallel_Hearts',
+ 'ext': 'flac',
+ 'title': 'FictionJunction-Parallel_Hearts',
+ 'upload_date': '20140522',
+ },
+ 'expected_warnings': [
+ 'URL could be a direct video link, returning it as such.'
+ ],
+ 'skip': 'URL invalid',
+ },
+ # Direct download with broken HEAD
+ {
+ 'url': 'http://ai-radio.org:8000/radio.opus',
+ 'info_dict': {
+ 'id': 'radio',
+ 'ext': 'opus',
+ 'title': 'radio',
+ },
+ 'params': {
+ 'skip_download': True, # infinite live stream
+ },
+ 'expected_warnings': [
+ r'501.*Not Implemented',
+ r'400.*Bad Request',
+ ],
+ },
+ # Direct link with incorrect MIME type
+ {
+ 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
+ 'md5': '4ccbebe5f36706d85221f204d7eb5913',
+ 'info_dict': {
+ 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
+ 'id': '5_Lennart_Poettering_-_Systemd',
+ 'ext': 'webm',
+ 'title': '5_Lennart_Poettering_-_Systemd',
+ 'upload_date': '20141120',
+ 'direct': True,
+ 'timestamp': 1416498816.0,
+ },
+ 'expected_warnings': [
+ 'URL could be a direct video link, returning it as such.'
+ ]
+ },
+ # RSS feed
+ {
+ 'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
+ 'info_dict': {
+ 'id': 'https://phihag.de/2014/youtube-dl/rss2.xml',
+ 'title': 'Zero Punctuation',
+ 'description': 're:.*groundbreaking video review series.*'
+ },
+ 'playlist_mincount': 11,
+ },
+ # RSS feed with enclosure
+ {
+ 'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
+ 'info_dict': {
+ 'id': 'http://podcastfeeds.nbcnews.com/nbcnews/video/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
+ 'title': 'MSNBC Rachel Maddow (video)',
+ 'description': 're:.*her unique approach to storytelling.*',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'ext': 'mov',
+ 'id': 'pdv_maddow_netcast_mov-12-03-2020-223726',
+ 'title': 'MSNBC Rachel Maddow (video) - 12-03-2020-223726',
+ 'description': 're:.*her unique approach to storytelling.*',
+ 'upload_date': '20201204',
+ },
+ }],
+ 'skip': 'Dead link',
+ },
+ # RSS feed with item with description and thumbnails
+ {
+ 'url': 'https://anchor.fm/s/dd00e14/podcast/rss',
+ 'info_dict': {
+ 'id': 'https://anchor.fm/s/dd00e14/podcast/rss',
+ 'title': 're:.*100% Hydrogen.*',
+ 'description': 're:.*In this episode.*',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'ext': 'm4a',
+ 'id': '818a5d38-01cd-152f-2231-ee479677fa82',
+ 'title': 're:Hydrogen!',
+ 'description': 're:.*In this episode we are going.*',
+ 'timestamp': 1567977776,
+ 'upload_date': '20190908',
+ 'duration': 423,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'episode_number': 1,
+ 'season_number': 1,
+ 'age_limit': 0,
+ 'season': 'Season 1',
+ 'direct': True,
+ 'episode': 'Episode 1',
+ },
+ }],
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ # RSS feed with enclosures and unsupported link URLs
+ {
+ 'url': 'http://www.hellointernet.fm/podcast?format=rss',
+ 'info_dict': {
+ 'id': 'http://www.hellointernet.fm/podcast?format=rss',
+ 'description': 'CGP Grey and Brady Haran talk about YouTube, life, work, whatever.',
+ 'title': 'Hello Internet',
+ },
+ 'playlist_mincount': 100,
+ },
+ # RSS feed with guid
+ {
+ 'url': 'https://www.omnycontent.com/d/playlist/a7b4f8fe-59d9-4afc-a79a-a90101378abf/bf2c1d80-3656-4449-9d00-a903004e8f84/efbff746-e7c1-463a-9d80-a903004e8f8f/podcast.rss',
+ 'info_dict': {
+ 'id': 'https://www.omnycontent.com/d/playlist/a7b4f8fe-59d9-4afc-a79a-a90101378abf/bf2c1d80-3656-4449-9d00-a903004e8f84/efbff746-e7c1-463a-9d80-a903004e8f8f/podcast.rss',
+ 'description': 'md5:be809a44b63b0c56fb485caf68685520',
+ 'title': 'The Little Red Podcast',
+ },
+ 'playlist_mincount': 76,
+ },
+ # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
+ {
+ 'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
+ 'info_dict': {
+ 'id': 'smil',
+ 'ext': 'mp4',
+ 'title': 'Automatics, robotics and biocybernetics',
+ 'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
+ 'upload_date': '20130627',
+ 'formats': 'mincount:16',
+ 'subtitles': 'mincount:1',
+ },
+ 'params': {
+ 'force_generic_extractor': True,
+ 'skip_download': True,
+ },
+ },
+ # SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html
+ {
+ 'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil',
+ 'info_dict': {
+ 'id': 'hds',
+ 'ext': 'flv',
+ 'title': 'hds',
+ 'formats': 'mincount:1',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ # SMIL from https://www.restudy.dk/video/play/id/1637
+ {
+ 'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml',
+ 'info_dict': {
+ 'id': 'video_1637',
+ 'ext': 'flv',
+ 'title': 'video_1637',
+ 'formats': 'mincount:3',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ # SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm
+ {
+ 'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil',
+ 'info_dict': {
+ 'id': 'smil-service',
+ 'ext': 'flv',
+ 'title': 'smil-service',
+ 'formats': 'mincount:1',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ # SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370
+ {
+ 'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil',
+ 'info_dict': {
+ 'id': '4719370',
+ 'ext': 'mp4',
+ 'title': '571de1fd-47bc-48db-abf9-238872a58d1f',
+ 'formats': 'mincount:3',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ # XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html
+ {
+ 'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf',
+ 'info_dict': {
+ 'id': 'mZlp2ctYIUEB',
+ 'ext': 'mp4',
+ 'title': 'Tikibad ontruimd wegens brand',
+ 'description': 'md5:05ca046ff47b931f9b04855015e163a4',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 33,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': '404 Not Found',
+ },
+ # MPD from http://dash-mse-test.appspot.com/media.html
+ {
+ 'url': 'http://yt-dash-mse-test.commondatastorage.googleapis.com/media/car-20120827-manifest.mpd',
+ 'md5': '4b57baab2e30d6eb3a6a09f0ba57ef53',
+ 'info_dict': {
+ 'id': 'car-20120827-manifest',
+ 'ext': 'mp4',
+ 'title': 'car-20120827-manifest',
+ 'formats': 'mincount:9',
+ 'upload_date': '20130904',
+ 'timestamp': 1378272859.0,
+ },
+ },
+ # m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8
+ {
+ 'url': 'http://once.unicornmedia.com/now/master/playlist/bb0b18ba-64f5-4b1b-a29f-0ac252f06b68/77a785f3-5188-4806-b788-0893a61634ed/93677179-2d99-4ef4-9e17-fe70d49abfbf/content.m3u8',
+ 'info_dict': {
+ 'id': 'content',
+ 'ext': 'mp4',
+ 'title': 'content',
+ 'formats': 'mincount:8',
+ },
+ 'params': {
+ # m3u8 downloads
+ 'skip_download': True,
+ },
+ 'skip': 'video gone',
+ },
+ # m3u8 served with Content-Type: text/plain
+ {
+ 'url': 'http://www.nacentapps.com/m3u8/index.m3u8',
+ 'info_dict': {
+ 'id': 'index',
+ 'ext': 'mp4',
+ 'title': 'index',
+ 'upload_date': '20140720',
+ 'formats': 'mincount:11',
+ },
+ 'params': {
+ # m3u8 downloads
+ 'skip_download': True,
+ },
+ 'skip': 'video gone',
+ },
+ # google redirect
+ {
+ 'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
+ 'info_dict': {
+ 'id': 'cmQHVoWB5FY',
+ 'ext': 'mp4',
+ 'upload_date': '20130224',
+ 'uploader_id': '@TheVerge',
+ 'description': r're:^Chris Ziegler takes a look at the\.*',
+ 'uploader': 'The Verge',
+ 'title': 'First Firefox OS phones side-by-side',
+ },
+ 'params': {
+ 'skip_download': False,
+ }
+ },
+ {
+ # redirect in Refresh HTTP header
+ 'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1',
+ 'info_dict': {
+ 'id': 'pO8h3EaFRdo',
+ 'ext': 'mp4',
+ 'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
+ 'description': 'md5:6294cc1af09c4049e0652b51a2df10d5',
+ 'upload_date': '20150917',
+ 'uploader_id': 'brtvofficial',
+ 'uploader': 'Boiler Room',
+ },
+ 'params': {
+ 'skip_download': False,
+ },
+ },
+ {
+ 'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
+ 'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
+ 'info_dict': {
+ 'id': '13601338388002',
+ 'ext': 'mp4',
+ 'uploader': 'www.hodiho.fr',
+ 'title': 'R\u00e9gis plante sa Jeep',
+ }
+ },
+ # bandcamp page with custom domain
+ {
+ 'add_ie': ['Bandcamp'],
+ 'url': 'http://bronyrock.com/track/the-pony-mash',
+ 'info_dict': {
+ 'id': '3235767654',
+ 'ext': 'mp3',
+ 'title': 'The Pony Mash',
+ 'uploader': 'M_Pallante',
+ },
+ 'skip': 'There is a limit of 200 free downloads / month for the test song',
+ },
+ # embed.ly video
+ {
+ 'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
+ 'info_dict': {
+ 'id': '9ODmcdjQcHQ',
+ 'ext': 'mp4',
+ 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
+ 'upload_date': '20140225',
+ 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
+ 'uploader': 'Tested',
+ 'uploader_id': 'testedcom',
+ },
+ # No need to test YoutubeIE here
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ # funnyordie embed
+ {
+ 'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
+ 'info_dict': {
+ 'id': '18e820ec3f',
+ 'ext': 'mp4',
+ 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
+ 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
+ },
+ # HEAD requests lead to endless 301, while GET is OK
+ 'expected_warnings': ['301'],
+ },
+ # RUTV embed
+ {
+ 'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
+ 'info_dict': {
+ 'id': '776940',
+ 'ext': 'mp4',
+ 'title': 'Охотское море стало целиком российским',
+ 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ # TVC embed
+ {
+ 'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
+ 'info_dict': {
+ 'id': '55304',
+ 'ext': 'mp4',
+ 'title': 'Дошкольное воспитание',
+ },
+ },
+ # SportBox embed
+ {
+ 'url': 'http://www.vestifinance.ru/articles/25753',
+ 'info_dict': {
+ 'id': '25753',
+ 'title': 'Прямые трансляции с Форума-выставки "Госзаказ-2013"',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '370908',
+ 'title': 'Госзаказ. День 3',
+ 'ext': 'mp4',
+ }
+ }, {
+ 'info_dict': {
+ 'id': '370905',
+ 'title': 'Госзаказ. День 2',
+ 'ext': 'mp4',
+ }
+ }, {
+ 'info_dict': {
+ 'id': '370902',
+ 'title': 'Госзаказ. День 1',
+ 'ext': 'mp4',
+ }
+ }],
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ # Myvi.ru embed
+ {
+ 'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
+ 'info_dict': {
+ 'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
+ 'ext': 'mp4',
+ 'title': 'Ужастики, русский трейлер (2015)',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 153,
+ },
+ 'skip': 'Site dead',
+ },
+ # XHamster embed
+ {
+ 'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
+ 'info_dict': {
+ 'id': 'showthread',
+ 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
+ },
+ 'playlist_mincount': 7,
+ # This forum does not allow <iframe> syntaxes anymore
+ # Now HTML tags are displayed as-is
+ 'skip': 'No videos on this page',
+ },
+ # Embedded TED video
+ {
+ 'url': 'http://en.support.wordpress.com/videos/ted-talks/',
+ 'md5': '65fdff94098e4a607385a60c5177c638',
+ 'info_dict': {
+ 'id': '1969',
+ 'ext': 'mp4',
+ 'title': 'Hidden miracles of the natural world',
+ 'uploader': 'Louie Schwartzberg',
+ 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
+ }
+ },
+ # nowvideo embed hidden behind percent encoding
+ {
+ 'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
+ 'md5': '2baf4ddd70f697d94b1c18cf796d5107',
+ 'info_dict': {
+ 'id': '06e53103ca9aa',
+ 'ext': 'flv',
+ 'title': 'Macross Episode 001 Watch Macross Episode 001 onl',
+ 'description': 'No description',
+ },
+ },
+ # arte embed
+ {
+ 'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
+ 'md5': '7653032cbb25bf6c80d80f217055fa43',
+ 'info_dict': {
+ 'id': '048195-004_PLUS7-F',
+ 'ext': 'flv',
+ 'title': 'X:enius',
+ 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
+ 'upload_date': '20140320',
+ },
+ 'params': {
+ 'skip_download': 'Requires rtmpdump'
+ },
+ 'skip': 'video gone',
+ },
+ # francetv embed
+ {
+ 'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
+ 'info_dict': {
+ 'id': 'EV_30231',
+ 'ext': 'mp4',
+ 'title': 'Alcaline, le concert avec Calogero',
+ 'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
+ 'upload_date': '20150226',
+ 'timestamp': 1424989860,
+ 'duration': 5400,
+ },
+ 'params': {
+ # m3u8 downloads
+ 'skip_download': True,
+ },
+ 'expected_warnings': [
+ 'Forbidden'
+ ]
+ },
+ # Condé Nast embed
+ {
+ 'url': 'http://www.wired.com/2014/04/honda-asimo/',
+ 'md5': 'ba0dfe966fa007657bd1443ee672db0f',
+ 'info_dict': {
+ 'id': '53501be369702d3275860000',
+ 'ext': 'mp4',
+ 'title': 'Honda’s New Asimo Robot Is More Human Than Ever',
+ }
+ },
+ # Dailymotion embed
+ {
+ 'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
+ 'md5': '441aeeb82eb72c422c7f14ec533999cd',
+ 'info_dict': {
+ 'id': 'k2mm4bCdJ6CQ2i7c8o2',
+ 'ext': 'mp4',
+ 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
+ 'description': 'md5:faf028e48a461b8b7fad38f1e104b119',
+ 'uploader': 'Spi0n',
+ 'uploader_id': 'xgditw',
+ 'upload_date': '20140425',
+ 'timestamp': 1398441542,
+ },
+ 'add_ie': ['Dailymotion'],
+ },
+ # DailyMail embed
+ {
+ 'url': 'http://www.bumm.sk/krimi/2017/07/05/biztonsagi-kamera-buktatta-le-az-agg-ferfit-utlegelo-apolot',
+ 'info_dict': {
+ 'id': '1495629',
+ 'ext': 'mp4',
+ 'title': 'Care worker punches elderly dementia patient in head 11 times',
+ 'description': 'md5:3a743dee84e57e48ec68bf67113199a5',
+ },
+ 'add_ie': ['DailyMail'],
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ # YouTube embed
+ {
+ 'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
+ 'info_dict': {
+ 'id': 'FXRb4ykk4S0',
+ 'ext': 'mp4',
+ 'title': 'The NBL Auction 2014',
+ 'uploader': 'BADMINTON England',
+ 'uploader_id': 'BADMINTONEvents',
+ 'upload_date': '20140603',
+ 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
+ },
+ 'add_ie': ['Youtube'],
+ 'params': {
+ 'skip_download': True,
+ }
+ },
+ # MTVServices embed
+ {
+ 'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html',
+ 'md5': 'ca1aef97695ef2c1d6973256a57e5252',
+ 'info_dict': {
+ 'id': '769f7ec0-0692-4d62-9b45-0d88074bffc1',
+ 'ext': 'mp4',
+ 'title': 'Key and Peele|October 10, 2012|2|203|Liam Neesons - Uncensored',
+ 'description': 'Two valets share their love for movie star Liam Neesons.',
+ 'timestamp': 1349922600,
+ 'upload_date': '20121011',
+ },
+ },
+ # YouTube embed via <data-embed-url="">
+ {
+ 'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
+ 'info_dict': {
+ 'id': '4vAffPZIT44',
+ 'ext': 'mp4',
+ 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
+ 'uploader': 'Gameloft',
+ 'uploader_id': 'gameloft',
+ 'upload_date': '20140828',
+ 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ },
+ # Flowplayer
+ {
+ 'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
+ 'md5': '9d65602bf31c6e20014319c7d07fba27',
+ 'info_dict': {
+ 'id': '5123ea6d5e5a7',
+ 'ext': 'mp4',
+ 'age_limit': 18,
+ 'uploader': 'www.handjobhub.com',
+ 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
+ }
+ },
+ # MLB embed
+ {
+ 'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
+ 'md5': '96f09a37e44da40dd083e12d9a683327',
+ 'info_dict': {
+ 'id': '33322633',
+ 'ext': 'mp4',
+ 'title': 'Ump changes call to ball',
+ 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
+ 'duration': 48,
+ 'timestamp': 1401537900,
+ 'upload_date': '20140531',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ },
+ # Wistia standard embed (async)
+ {
+ 'url': 'https://www.getdrip.com/university/brennan-dunn-drip-workshop/',
+ 'info_dict': {
+ 'id': '807fafadvk',
+ 'ext': 'mp4',
+ 'title': 'Drip Brennan Dunn Workshop',
+ 'description': 'a JV Webinars video from getdrip-1',
+ 'duration': 4986.95,
+ 'timestamp': 1463607249,
+ 'upload_date': '20160518',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'webpage 404 not found',
+ },
+ # Soundcloud embed
+ {
+ 'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
+ 'info_dict': {
+ 'id': '174391317',
+ 'ext': 'mp3',
+ 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
+ 'uploader': 'Sophos Security',
+ 'title': 'Chet Chat 171 - Oct 29, 2014',
+ 'upload_date': '20141029',
+ }
+ },
+ # Soundcloud multiple embeds
+ {
+ 'url': 'http://www.guitarplayer.com/lessons/1014/legato-workout-one-hour-to-more-fluid-performance---tab/52809',
+ 'info_dict': {
+ 'id': '52809',
+ 'title': 'Guitar Essentials: Legato Workout—One-Hour to Fluid Performance | TAB + AUDIO',
+ },
+ 'playlist_mincount': 7,
+ },
+ # TuneIn station embed
+ {
+ 'url': 'http://radiocnrv.com/promouvoir-radio-cnrv/',
+ 'info_dict': {
+ 'id': '204146',
+ 'ext': 'mp3',
+ 'title': 'CNRV',
+ 'location': 'Paris, France',
+ 'is_live': True,
+ },
+ 'params': {
+ # Live stream
+ 'skip_download': True,
+ },
+ },
+ # Livestream embed
+ {
+ 'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
+ 'info_dict': {
+ 'id': '67864563',
+ 'ext': 'flv',
+ 'upload_date': '20141112',
+ 'title': 'Rosetta #CometLanding webcast HL 10',
+ }
+ },
+ # Another Livestream embed, without 'new.' in URL
+ {
+ 'url': 'https://www.freespeech.org/',
+ 'info_dict': {
+ 'id': '123537347',
+ 'ext': 'mp4',
+ 'title': 're:^FSTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ },
+ 'params': {
+ # Live stream
+ 'skip_download': True,
+ },
+ },
+ # LazyYT
+ {
+ 'url': 'https://skiplagged.com/',
+ 'info_dict': {
+ 'id': 'skiplagged',
+ 'title': 'Skiplagged: The smart way to find cheap flights',
+ },
+ 'playlist_mincount': 1,
+ 'add_ie': ['Youtube'],
+ },
+ # Libsyn embed
+ {
+ 'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
+ 'info_dict': {
+ 'id': '3793998',
+ 'ext': 'mp3',
+ 'upload_date': '20141126',
+ 'title': 'Underground Wellness Radio - Jack Tips: 5 Steps to Permanent Gut Healing',
+ 'thumbnail': 'https://assets.libsyn.com/secure/item/3793998/?height=90&width=90',
+ 'duration': 3989.0,
+ }
+ },
+ # Cinerama player
+ {
+ 'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
+ 'info_dict': {
+ 'id': '730m_DandD_1901_512k',
+ 'ext': 'mp4',
+ 'uploader': 'www.abc.net.au',
+ 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
+ }
+ },
+ # embedded viddler video
+ {
+ 'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
+ 'info_dict': {
+ 'id': '4d03aad9',
+ 'ext': 'mp4',
+ 'uploader': 'deadspin',
+ 'title': 'WALL-TO-GORTAT',
+ 'timestamp': 1422285291,
+ 'upload_date': '20150126',
+ },
+ 'add_ie': ['Viddler'],
+ },
+ # Libsyn embed
+ {
+ 'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
+ 'info_dict': {
+ 'id': '3377616',
+ 'ext': 'mp3',
+ 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
+ 'description': 'md5:601cb790edd05908957dae8aaa866465',
+ 'upload_date': '20150220',
+ },
+ 'skip': 'All The Daily Show URLs now redirect to http://www.cc.com/shows/',
+ },
+ # jwplayer YouTube
+ {
+ 'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
+ 'info_dict': {
+ 'id': 'Mrj4DVp2zeA',
+ 'ext': 'mp4',
+ 'upload_date': '20150212',
+ 'uploader': 'The National Archives UK',
+ 'description': 'md5:8078af856dca76edc42910b61273dbbf',
+ 'uploader_id': 'NationalArchives08',
+ 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
+ },
+ },
+ # jwplayer rtmp
+ {
+ 'url': 'http://www.suffolk.edu/sjc/live.php',
+ 'info_dict': {
+ 'id': 'live',
+ 'ext': 'flv',
+ 'title': 'Massachusetts Supreme Judicial Court Oral Arguments',
+ 'uploader': 'www.suffolk.edu',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'Only has video a few mornings per month, see http://www.suffolk.edu/sjc/',
+ },
+ # jwplayer with only the json URL
+ {
+ 'url': 'https://www.hollywoodreporter.com/news/general-news/dunkirk-team-reveals-what-christopher-nolan-said-oscar-win-meet-your-oscar-winner-1092454',
+ 'info_dict': {
+ 'id': 'TljWkvWH',
+ 'ext': 'mp4',
+ 'upload_date': '20180306',
+ 'title': 'md5:91eb1862f6526415214f62c00b453936',
+ 'description': 'md5:73048ae50ae953da10549d1d2fe9b3aa',
+ 'timestamp': 1520367225,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ # Complex jwplayer
+ {
+ 'url': 'http://www.indiedb.com/games/king-machine/videos',
+ 'info_dict': {
+ 'id': 'videos',
+ 'ext': 'mp4',
+ 'title': 'king machine trailer 1',
+ 'description': 'Browse King Machine videos & audio for sweet media. Your eyes will thank you.',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ },
+ {
+ # Youtube embed, formerly: Video.js embed, multiple formats
+ 'url': 'http://ortcam.com/solidworks-урок-6-настройка-чертежа_33f9b7351.html',
+ 'info_dict': {
+ 'id': 'yygqldloqIk',
+ 'ext': 'mp4',
+ 'title': 'SolidWorks. Урок 6 Настройка чертежа',
+ 'description': 'md5:baf95267792646afdbf030e4d06b2ab3',
+ 'upload_date': '20130314',
+ 'uploader': 'PROстое3D',
+ 'uploader_id': 'PROstoe3D',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # Video.js embed, single format
+ 'url': 'https://www.vooplayer.com/v3/watch/watch.php?v=NzgwNTg=',
+ 'info_dict': {
+ 'id': 'watch',
+ 'ext': 'mp4',
+ 'title': 'Step 1 - Good Foundation',
+ 'description': 'md5:d1e7ff33a29fc3eb1673d6c270d344f4',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': '404 Not Found',
+ },
+ # rtl.nl embed
+ {
+ 'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
+ 'playlist_mincount': 5,
+ 'info_dict': {
+ 'id': 'aanslagen-kopenhagen',
+ 'title': 'Aanslagen Kopenhagen',
+ }
+ },
+ # Zapiks embed
+ {
+ 'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
+ 'info_dict': {
+ 'id': '118046',
+ 'ext': 'mp4',
+ 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
+ }
+ },
+ # Kaltura embed (different embed code)
+ {
+ 'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
+ 'info_dict': {
+ 'id': '1_a52wc67y',
+ 'ext': 'flv',
+ 'upload_date': '20150127',
+ 'uploader_id': 'PremierMedia',
+ 'timestamp': int,
+ 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
+ },
+ },
+ # Kaltura embed with single quotes
+ {
+ 'url': 'http://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY',
+ 'info_dict': {
+ 'id': '0_izeg5utt',
+ 'ext': 'mp4',
+ 'title': '35871',
+ 'timestamp': 1355743100,
+ 'upload_date': '20121217',
+ 'uploader_id': 'cplapp@learn360.com',
+ },
+ 'add_ie': ['Kaltura'],
+ },
+ {
+ # Kaltura embedded via quoted entry_id
+ 'url': 'https://www.oreilly.com/ideas/my-cloud-makes-pretty-pictures',
+ 'info_dict': {
+ 'id': '0_utuok90b',
+ 'ext': 'mp4',
+ 'title': '06_matthew_brender_raj_dutt',
+ 'timestamp': 1466638791,
+ 'upload_date': '20160622',
+ },
+ 'add_ie': ['Kaltura'],
+ 'expected_warnings': [
+ 'Could not send HEAD request'
+ ],
+ 'params': {
+ 'skip_download': True,
+ }
+ },
+ {
+ # Kaltura embedded, some fileExt broken (#11480)
+ 'url': 'http://www.cornell.edu/video/nima-arkani-hamed-standard-models-of-particle-physics',
+ 'info_dict': {
+ 'id': '1_sgtvehim',
+ 'ext': 'mp4',
+ 'title': 'Our "Standard Models" of particle physics and cosmology',
+ 'description': 'md5:67ea74807b8c4fea92a6f38d6d323861',
+ 'timestamp': 1321158993,
+ 'upload_date': '20111113',
+ 'uploader_id': 'kps1',
+ },
+ 'add_ie': ['Kaltura'],
+ },
+ {
+ # Kaltura iframe embed
+ 'url': 'http://www.gsd.harvard.edu/event/i-m-pei-a-centennial-celebration/',
+ 'md5': 'ae5ace8eb09dc1a35d03b579a9c2cc44',
+ 'info_dict': {
+ 'id': '0_f2cfbpwy',
+ 'ext': 'mp4',
+ 'title': 'I. M. Pei: A Centennial Celebration',
+ 'description': 'md5:1db8f40c69edc46ca180ba30c567f37c',
+ 'upload_date': '20170403',
+ 'uploader_id': 'batchUser',
+ 'timestamp': 1491232186,
+ },
+ 'add_ie': ['Kaltura'],
+ },
+ {
+ # Kaltura iframe embed, more sophisticated
+ 'url': 'http://www.cns.nyu.edu/~eero/math-tools/Videos/lecture-05sep2017.html',
+ 'info_dict': {
+ 'id': '1_9gzouybz',
+ 'ext': 'mp4',
+ 'title': 'lecture-05sep2017',
+ 'description': 'md5:40f347d91fd4ba047e511c5321064b49',
+ 'upload_date': '20170913',
+ 'uploader_id': 'eps2',
+ 'timestamp': 1505340777,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': ['Kaltura'],
+ },
+ {
+ # meta twitter:player
+ 'url': 'http://thechive.com/2017/12/08/all-i-want-for-christmas-is-more-twerk/',
+ 'info_dict': {
+ 'id': '0_01b42zps',
+ 'ext': 'mp4',
+ 'title': 'Main Twerk (Video)',
+ 'upload_date': '20171208',
+ 'uploader_id': 'sebastian.salinas@thechive.com',
+ 'timestamp': 1512713057,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': ['Kaltura'],
+ },
+ # referrer protected EaglePlatform embed
+ {
+ 'url': 'https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/',
+ 'info_dict': {
+ 'id': '582306',
+ 'ext': 'mp4',
+ 'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 3382,
+ 'view_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ # ClipYou (EaglePlatform) embed (custom URL)
+ {
+ 'url': 'http://muz-tv.ru/play/7129/',
+ # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
+ 'info_dict': {
+ 'id': '12820',
+ 'ext': 'mp4',
+ 'title': "'O Sole Mio",
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 216,
+ 'view_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'This video is unavailable.',
+ },
+ # Pladform embed
+ {
+ 'url': 'http://muz-tv.ru/kinozal/view/7400/',
+ 'info_dict': {
+ 'id': '100183293',
+ 'ext': 'mp4',
+ 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
+ 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 694,
+ 'age_limit': 0,
+ },
+ 'skip': 'HTTP Error 404: Not Found',
+ },
+ # Playwire embed
+ {
+ 'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
+ 'info_dict': {
+ 'id': '3519514',
+ 'ext': 'mp4',
+ 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
+ 'thumbnail': r're:^https?://.*\.png$',
+ 'duration': 45.115,
+ },
+ },
+ # Crooks and Liars embed
+ {
+ 'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
+ 'info_dict': {
+ 'id': '8RUoRhRi',
+ 'ext': 'mp4',
+ 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
+ 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
+ 'timestamp': 1428207000,
+ 'upload_date': '20150405',
+ 'uploader': 'Heather',
+ },
+ },
+ # Crooks and Liars external embed
+ {
+ 'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
+ 'info_dict': {
+ 'id': 'MTE3MjUtMzQ2MzA',
+ 'ext': 'mp4',
+ 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
+ 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
+ 'timestamp': 1265032391,
+ 'upload_date': '20100201',
+ 'uploader': 'Heather',
+ },
+ },
+ # NBC Sports vplayer embed
+ {
+ 'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
+ 'info_dict': {
+ 'id': 'ln7x1qSThw4k',
+ 'ext': 'flv',
+ 'title': "PFT Live: New leader in the 'new-look' defense",
+ 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
+ 'uploader': 'NBCU-SPORTS',
+ 'upload_date': '20140107',
+ 'timestamp': 1389118457,
+ },
+ 'skip': 'Invalid Page URL',
+ },
+ # NBC News embed
+ {
+ 'url': 'http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html',
+ 'md5': '1aa589c675898ae6d37a17913cf68d66',
+ 'info_dict': {
+ 'id': 'x_dtl_oa_LettermanliftPR_160608',
+ 'ext': 'mp4',
+ 'title': 'David Letterman: A Preview',
+ 'description': 'A preview of Tom Brokaw\'s interview with David Letterman as part of the On Assignment series powered by Dateline. Airs Sunday June 12 at 7/6c.',
+ 'upload_date': '20160609',
+ 'timestamp': 1465431544,
+ 'uploader': 'NBCU-NEWS',
+ },
+ },
+ # UDN embed
+ {
+ 'url': 'https://video.udn.com/news/300346',
+ 'md5': 'fd2060e988c326991037b9aff9df21a6',
+ 'info_dict': {
+ 'id': '300346',
+ 'ext': 'mp4',
+ 'title': '中一中男師變性 全校師生力挺',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Failed to parse JSON Expecting value'],
+ },
+ # Kinja embed
+ {
+ 'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
+ 'info_dict': {
+ 'id': '106351',
+ 'ext': 'mp4',
+ 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
+ 'description': 'Migrated from OnionStudios',
+ 'thumbnail': r're:^https?://.*\.jpe?g$',
+ 'uploader': 'clickhole',
+ 'upload_date': '20150527',
+ 'timestamp': 1432744860,
+ }
+ },
+ # SnagFilms embed
+ {
+ 'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
+ 'info_dict': {
+ 'id': '74849a00-85a9-11e1-9660-123139220831',
+ 'ext': 'mp4',
+ 'title': '#whilewewatch',
+ }
+ },
+ # AdobeTVVideo embed
+ {
+ 'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
+ 'md5': '43662b577c018ad707a63766462b1e87',
+ 'info_dict': {
+ 'id': '2456',
+ 'ext': 'mp4',
+ 'title': 'New experience with Acrobat DC',
+ 'description': 'New experience with Acrobat DC',
+ 'duration': 248.667,
+ },
+ },
+ # Another form of arte.tv embed
+ {
+ 'url': 'http://www.tv-replay.fr/redirection/09-04-16/arte-reportage-arte-11508975.html',
+ 'md5': '850bfe45417ddf221288c88a0cffe2e2',
+ 'info_dict': {
+ 'id': '030273-562_PLUS7-F',
+ 'ext': 'mp4',
+ 'title': 'ARTE Reportage - Nulle part, en France',
+ 'description': 'md5:e3a0e8868ed7303ed509b9e3af2b870d',
+ 'upload_date': '20160409',
+ },
+ },
+ # Duplicated embedded video URLs
+ {
+ 'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
+ 'info_dict': {
+ 'id': '149298443_480_16c25b74_2',
+ 'ext': 'mp4',
+ 'title': 'vs. Blue Orange Spring Game',
+ 'uploader': 'www.hudl.com',
+ },
+ },
+ # twitter:player:stream embed
+ {
+ 'url': 'http://www.rtl.be/info/video/589263.aspx?CategoryID=288',
+ 'info_dict': {
+ 'id': 'master',
+ 'ext': 'mp4',
+ 'title': 'Une nouvelle espèce de dinosaure découverte en Argentine',
+ 'uploader': 'www.rtl.be',
+ },
+ 'params': {
+ # m3u8 downloads
+ 'skip_download': True,
+ },
+ },
+ # twitter:player embed
+ {
+ 'url': 'http://www.theatlantic.com/video/index/484130/what-do-black-holes-sound-like/',
+ 'md5': 'a3e0df96369831de324f0778e126653c',
+ 'info_dict': {
+ 'id': '4909620399001',
+ 'ext': 'mp4',
+ 'title': 'What Do Black Holes Sound Like?',
+ 'description': 'what do black holes sound like',
+ 'upload_date': '20160524',
+ 'uploader_id': '29913724001',
+ 'timestamp': 1464107587,
+ 'uploader': 'TheAtlantic',
+ },
+ 'skip': 'Private Youtube video',
+ },
+ # Facebook <iframe> embed
+ {
+ 'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html',
+ 'md5': 'fbcde74f534176ecb015849146dd3aee',
+ 'info_dict': {
+ 'id': '599637780109885',
+ 'ext': 'mp4',
+ 'title': 'Facebook video #599637780109885',
+ },
+ },
+ # Facebook <iframe> embed, plugin video
+ {
+ 'url': 'http://5pillarsuk.com/2017/06/07/tariq-ramadan-disagrees-with-pr-exercise-by-imams-refusing-funeral-prayers-for-london-attackers/',
+ 'info_dict': {
+ 'id': '1754168231264132',
+ 'ext': 'mp4',
+ 'title': 'About the Imams and Religious leaders refusing to perform funeral prayers for...',
+ 'uploader': 'Tariq Ramadan (official)',
+ 'timestamp': 1496758379,
+ 'upload_date': '20170606',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ # Facebook API embed
+ {
+ 'url': 'http://www.lothype.com/blue-stars-2016-preview-standstill-full-show/',
+ 'md5': 'a47372ee61b39a7b90287094d447d94e',
+ 'info_dict': {
+ 'id': '10153467542406923',
+ 'ext': 'mp4',
+ 'title': 'Facebook video #10153467542406923',
+ },
+ },
+ # Wordpress "YouTube Video Importer" plugin
+ {
+ 'url': 'http://www.lothype.com/blue-devils-drumline-stanford-lot-2016/',
+ 'md5': 'd16797741b560b485194eddda8121b48',
+ 'info_dict': {
+ 'id': 'HNTXWDXV9Is',
+ 'ext': 'mp4',
+ 'title': 'Blue Devils Drumline Stanford lot 2016',
+ 'upload_date': '20160627',
+ 'uploader_id': 'GENOCIDE8GENERAL10',
+ 'uploader': 'cylus cyrus',
+ },
+ },
+ {
+ # video stored on custom kaltura server
+ 'url': 'http://www.expansion.com/multimedia/videos.html?media=EQcM30NHIPv',
+ 'md5': '537617d06e64dfed891fa1593c4b30cc',
+ 'info_dict': {
+ 'id': '0_1iotm5bh',
+ 'ext': 'mp4',
+ 'title': 'Elecciones británicas: 5 lecciones para Rajoy',
+ 'description': 'md5:435a89d68b9760b92ce67ed227055f16',
+ 'uploader_id': 'videos.expansion@el-mundo.net',
+ 'upload_date': '20150429',
+ 'timestamp': 1430303472,
+ },
+ 'add_ie': ['Kaltura'],
+ },
+ {
+ # multiple kaltura embeds, nsfw
+ 'url': 'https://www.quartier-rouge.be/prive/femmes/kamila-avec-video-jaime-sadomie.html',
+ 'info_dict': {
+ 'id': 'kamila-avec-video-jaime-sadomie',
+ 'title': "Kamila avec vídeo “J'aime sadomie”",
+ },
+ 'playlist_count': 8,
+ },
+ {
+ # Non-standard Vimeo embed
+ 'url': 'https://openclassrooms.com/courses/understanding-the-web',
+ 'md5': '64d86f1c7d369afd9a78b38cbb88d80a',
+ 'info_dict': {
+ 'id': '148867247',
+ 'ext': 'mp4',
+ 'title': 'Understanding the web - Teaser',
+ 'description': 'This is "Understanding the web - Teaser" by openclassrooms on Vimeo, the home for high quality videos and the people who love them.',
+ 'upload_date': '20151214',
+ 'uploader': 'OpenClassrooms',
+ 'uploader_id': 'openclassrooms',
+ },
+ 'add_ie': ['Vimeo'],
+ },
+ {
+ # generic vimeo embed that requires original URL passed as Referer
+ 'url': 'http://racing4everyone.eu/2016/07/30/formula-1-2016-round12-germany/',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://support.arkena.com/display/PLAY/Ways+to+embed+your+video',
+ 'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
+ 'info_dict': {
+ 'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe',
+ 'ext': 'mp4',
+ 'title': 'Big Buck Bunny',
+ 'description': 'Royalty free test video',
+ 'timestamp': 1432816365,
+ 'upload_date': '20150528',
+ 'is_live': False,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': ['Arkena'],
+ },
+ {
+ 'url': 'http://nova.bg/news/view/2016/08/16/156543/%D0%BD%D0%B0-%D0%BA%D0%BE%D1%81%D1%8A%D0%BC-%D0%BE%D1%82-%D0%B2%D0%B7%D1%80%D0%B8%D0%B2-%D0%BE%D1%82%D1%86%D0%B5%D0%BF%D0%B8%D1%85%D0%B0-%D1%86%D1%8F%D0%BB-%D0%BA%D0%B2%D0%B0%D1%80%D1%82%D0%B0%D0%BB-%D0%B7%D0%B0%D1%80%D0%B0%D0%B4%D0%B8-%D0%B8%D0%B7%D1%82%D0%B8%D1%87%D0%B0%D0%BD%D0%B5-%D0%BD%D0%B0-%D0%B3%D0%B0%D0%B7-%D0%B2-%D0%BF%D0%BB%D0%BE%D0%B2%D0%B4%D0%B8%D0%B2/',
+ 'info_dict': {
+ 'id': '1c7141f46c',
+ 'ext': 'mp4',
+ 'title': 'НА КОСЪМ ОТ ВЗРИВ: Изтичане на газ на бензиностанция в Пловдив',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': ['Vbox7'],
+ },
+ {
+ # DBTV embeds
+ 'url': 'http://www.dagbladet.no/2016/02/23/nyheter/nordlys/ski/troms/ver/43254897/',
+ 'info_dict': {
+ 'id': '43254897',
+ 'title': 'Etter ett års planlegging, klaffet endelig alt: - Jeg måtte ta en liten dans',
+ },
+ 'playlist_mincount': 3,
+ },
+ {
+ # Videa embeds
+ 'url': 'http://forum.dvdtalk.com/movie-talk/623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style.html',
+ 'info_dict': {
+ 'id': '623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style',
+ 'title': 'Deleted Magic - Star Wars: OT Deleted / Alt. Scenes Docu. Style - DVD Talk Forum',
+ },
+ 'playlist_mincount': 2,
+ },
+ {
+ # 20 minuten embed
+ 'url': 'http://www.20min.ch/schweiz/news/story/So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552',
+ 'info_dict': {
+ 'id': '523629',
+ 'ext': 'mp4',
+ 'title': 'So kommen Sie bei Eis und Schnee sicher an',
+ 'description': 'md5:117c212f64b25e3d95747e5276863f7d',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': ['TwentyMinuten'],
+ },
+ {
+ # VideoPress embed
+ 'url': 'https://en.support.wordpress.com/videopress/',
+ 'info_dict': {
+ 'id': 'OcobLTqC',
+ 'ext': 'm4v',
+ 'title': 'IMG_5786',
+ 'timestamp': 1435711927,
+ 'upload_date': '20150701',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': ['VideoPress'],
+ },
+ {
+ # Rutube embed
+ 'url': 'http://magazzino.friday.ru/videos/vipuski/kazan-2',
+ 'info_dict': {
+ 'id': '9b3d5bee0a8740bf70dfd29d3ea43541',
+ 'ext': 'flv',
+ 'title': 'Магаззино: Казань 2',
+ 'description': 'md5:99bccdfac2269f0e8fdbc4bbc9db184a',
+ 'uploader': 'Магаззино',
+ 'upload_date': '20170228',
+ 'uploader_id': '996642',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': ['Rutube'],
+ },
+ {
+ # glomex:embed
+ 'url': 'https://www.skai.gr/news/world/iatrikos-syllogos-tourkias-to-turkovac-aplo-dialyma-erntogan-eiste-apateones-kai-pseytes',
+ 'info_dict': {
+ 'id': 'v-ch2nkhcirwc9-sf',
+ 'ext': 'mp4',
+ 'title': 'md5:786e1e24e06c55993cee965ef853a0c1',
+ 'description': 'md5:8b517a61d577efe7e36fde72fd535995',
+ 'timestamp': 1641885019,
+ 'upload_date': '20220111',
+ 'duration': 460000,
+ 'thumbnail': 'https://i3thumbs.glomex.com/dC1idjJwdndiMjRzeGwvMjAyMi8wMS8xMS8wNy8xMF8zNV82MWRkMmQ2YmU5ZTgyLmpwZw==/profile:player-960x540',
+ },
+ },
+ {
+ # megatvcom:embed
+ 'url': 'https://www.in.gr/2021/12/18/greece/apokalypsi-mega-poios-parelave-tin-ereyna-tsiodra-ek-merous-tis-kyvernisis-o-prothypourgos-telika-gnorize/',
+ 'info_dict': {
+ 'id': 'apokalypsi-mega-poios-parelave-tin-ereyna-tsiodra-ek-merous-tis-kyvernisis-o-prothypourgos-telika-gnorize',
+ 'title': 'md5:5e569cf996ec111057c2764ec272848f',
+ },
+ 'playlist': [{
+ 'md5': '1afa26064ff00ccb91617957dbc73dc1',
+ 'info_dict': {
+ 'ext': 'mp4',
+ 'id': '564916',
+ 'display_id': 'md5:6cdf22d3a2e7bacb274b7295089a1770',
+ 'title': 'md5:33b9dd39584685b62873043670eb52a6',
+ 'description': 'md5:c1db7310f390518ac36dd69d947ef1a1',
+ 'timestamp': 1639753145,
+ 'upload_date': '20211217',
+ 'thumbnail': 'https://www.megatv.com/wp-content/uploads/2021/12/prezerakos-1024x597.jpg',
+ },
+ }, {
+ 'md5': '4a1c220695f1ef865a8b7966a53e2474',
+ 'info_dict': {
+ 'ext': 'mp4',
+ 'id': '564905',
+ 'display_id': 'md5:ead15695e485e649aed2b81ebd699b88',
+ 'title': 'md5:2b71fd54249a3ca34609fe39ae31c47b',
+ 'description': 'md5:c42e12f638d0a97d6de4508e2c4df982',
+ 'timestamp': 1639753047,
+ 'upload_date': '20211217',
+ 'thumbnail': 'https://www.megatv.com/wp-content/uploads/2021/12/tsiodras-mitsotakis-1024x545.jpg',
+ },
+ }]
+ },
+ {
+ 'url': 'https://www.ertnews.gr/video/manolis-goyalles-o-anthropos-piso-apo-ti-diadiktyaki-vasilopita/',
+ 'info_dict': {
+ 'id': '2022/tv/news-themata-ianouarios/20220114-apotis6-gouales-pita.mp4',
+ 'ext': 'mp4',
+ 'title': 'md5:df64f5b61c06d0e9556c0cdd5cf14464',
+ 'thumbnail': 'https://www.ert.gr/themata/photos/2021/20220114-apotis6-gouales-pita.jpg',
+ },
+ },
+ {
+ # ThePlatform embedded with whitespaces in URLs
+ 'url': 'http://www.golfchannel.com/topics/shows/golftalkcentral.htm',
+ 'only_matching': True,
+ },
+ {
+ # Senate ISVP iframe https
+ 'url': 'https://www.hsgac.senate.gov/hearings/canadas-fast-track-refugee-plan-unanswered-questions-and-implications-for-us-national-security',
+ 'md5': 'fb8c70b0b515e5037981a2492099aab8',
+ 'info_dict': {
+ 'id': 'govtaff020316',
+ 'ext': 'mp4',
+ 'title': 'Integrated Senate Video Player',
+ },
+ 'add_ie': ['SenateISVP'],
+ },
+ {
+ # Limelight embeds (1 channel embed + 4 media embeds)
+ 'url': 'http://www.sedona.com/FacilitatorTraining2017',
+ 'info_dict': {
+ 'id': 'FacilitatorTraining2017',
+ 'title': 'Facilitator Training 2017',
+ },
+ 'playlist_mincount': 5,
+ },
+ {
+ # Limelight embed (LimelightPlayerUtil.embed)
+ 'url': 'https://tv5.ca/videos?v=xuu8qowr291ri',
+ 'info_dict': {
+ 'id': '95d035dc5c8a401588e9c0e6bd1e9c92',
+ 'ext': 'mp4',
+ 'title': '07448641',
+ 'timestamp': 1499890639,
+ 'upload_date': '20170712',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': ['LimelightMedia'],
+ },
+ {
+ 'url': 'http://kron4.com/2017/04/28/standoff-with-walnut-creek-murder-suspect-ends-with-arrest/',
+ 'info_dict': {
+ 'id': 'standoff-with-walnut-creek-murder-suspect-ends-with-arrest',
+ 'title': 'Standoff with Walnut Creek murder suspect ends',
+ 'description': 'md5:3ccc48a60fc9441eeccfc9c469ebf788',
+ },
+ 'playlist_mincount': 4,
+ },
+ {
+ # WashingtonPost embed
+ 'url': 'http://www.vanityfair.com/hollywood/2017/04/donald-trump-tv-pitches',
+ 'info_dict': {
+ 'id': '8caf6e88-d0ec-11e5-90d3-34c2c42653ac',
+ 'ext': 'mp4',
+ 'title': "No one has seen the drama series based on Trump's life \u2014 until now",
+ 'description': 'Donald Trump wanted a weekly TV drama based on his life. It never aired. But The Washington Post recently obtained a scene from the pilot script — and enlisted actors.',
+ 'timestamp': 1455216756,
+ 'uploader': 'The Washington Post',
+ 'upload_date': '20160211',
+ },
+ 'add_ie': ['WashingtonPost'],
+ },
+ {
+ # JOJ.sk embeds
+ 'url': 'https://www.noviny.sk/slovensko/238543-slovenskom-sa-prehnala-vlna-silnych-burok',
+ 'info_dict': {
+ 'id': '238543-slovenskom-sa-prehnala-vlna-silnych-burok',
+ 'title': 'Slovenskom sa prehnala vlna silných búrok',
+ },
+ 'playlist_mincount': 5,
+ 'add_ie': ['Joj'],
+ },
+ {
+ # AMP embed (see https://www.ampproject.org/docs/reference/components/amp-video)
+ 'url': 'https://tvrain.ru/amp/418921/',
+ 'md5': 'cc00413936695987e8de148b67d14f1d',
+ 'info_dict': {
+ 'id': '418921',
+ 'ext': 'mp4',
+ 'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
+ },
+ },
+ {
+ # multiple HTML5 videos on one page
+ 'url': 'https://www.paragon-software.com/home/rk-free/keyscenarios.html',
+ 'info_dict': {
+ 'id': 'keyscenarios',
+ 'title': 'Rescue Kit 14 Free Edition - Getting started',
+ },
+ 'playlist_count': 4,
+ },
+ {
+ # vshare embed
+ 'url': 'https://youtube-dl-demo.neocities.org/vshare.html',
+ 'md5': '17b39f55b5497ae8b59f5fbce8e35886',
+ 'info_dict': {
+ 'id': '0f64ce6',
+ 'title': 'vl14062007715967',
+ 'ext': 'mp4',
+ }
+ },
+ {
+ 'url': 'http://www.heidelberg-laureate-forum.org/blog/video/lecture-friday-september-23-2016-sir-c-antony-r-hoare/',
+ 'md5': 'aecd089f55b1cb5a59032cb049d3a356',
+ 'info_dict': {
+ 'id': '90227f51a80c4d8f86c345a7fa62bd9a1d',
+ 'ext': 'mp4',
+ 'title': 'Lecture: Friday, September 23, 2016 - Sir Tony Hoare',
+ 'description': 'md5:5a51db84a62def7b7054df2ade403c6c',
+ 'timestamp': 1474354800,
+ 'upload_date': '20160920',
+ }
+ },
+ {
+ 'url': 'http://www.kidzworld.com/article/30935-trolls-the-beat-goes-on-interview-skylar-astin-and-amanda-leighton',
+ 'info_dict': {
+ 'id': '1731611',
+ 'ext': 'mp4',
+ 'title': 'Official Trailer | TROLLS: THE BEAT GOES ON!',
+ 'description': 'md5:eb5f23826a027ba95277d105f248b825',
+ 'timestamp': 1516100691,
+ 'upload_date': '20180116',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': ['SpringboardPlatform'],
+ },
+ {
+ 'url': 'https://www.yapfiles.ru/show/1872528/690b05d3054d2dbe1e69523aa21bb3b1.mp4.html',
+ 'info_dict': {
+ 'id': 'vMDE4NzI1Mjgt690b',
+ 'ext': 'mp4',
+ 'title': 'Котята',
+ },
+ 'add_ie': ['YapFiles'],
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # CloudflareStream embed
+ 'url': 'https://www.cloudflare.com/products/cloudflare-stream/',
+ 'info_dict': {
+ 'id': '31c9291ab41fac05471db4e73aa11717',
+ 'ext': 'mp4',
+ 'title': '31c9291ab41fac05471db4e73aa11717',
+ },
+ 'add_ie': ['CloudflareStream'],
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # PeerTube embed
+ 'url': 'https://joinpeertube.org/fr/home/',
+ 'info_dict': {
+ 'id': 'home',
+ 'title': 'Reprenez le contrôle de vos vidéos ! #JoinPeertube',
+ },
+ 'playlist_count': 2,
+ },
+ {
+ # Indavideo embed
+ 'url': 'https://streetkitchen.hu/receptek/igy_kell_otthon_hamburgert_sutni/',
+ 'info_dict': {
+ 'id': '1693903',
+ 'ext': 'mp4',
+ 'title': 'Így kell otthon hamburgert sütni',
+ 'description': 'md5:f5a730ecf900a5c852e1e00540bbb0f7',
+ 'timestamp': 1426330212,
+ 'upload_date': '20150314',
+ 'uploader': 'StreetKitchen',
+ 'uploader_id': '546363',
+ },
+ 'add_ie': ['IndavideoEmbed'],
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # APA embed via JWPlatform embed
+ 'url': 'http://www.vol.at/blue-man-group/5593454',
+ 'info_dict': {
+ 'id': 'jjv85FdZ',
+ 'ext': 'mp4',
+ 'title': '"Blau ist mysteriös": Die Blue Man Group im Interview',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 254,
+ 'timestamp': 1519211149,
+ 'upload_date': '20180221',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://share-videos.se/auto/video/83645793?uid=13',
+ 'md5': 'b68d276de422ab07ee1d49388103f457',
+ 'info_dict': {
+ 'id': '83645793',
+ 'title': 'Lock up and get excited',
+ 'ext': 'mp4'
+ },
+ 'skip': 'TODO: fix nested playlists processing in tests',
+ },
+ {
+ # Viqeo embeds
+ 'url': 'https://viqeo.tv/',
+ 'info_dict': {
+ 'id': 'viqeo',
+ 'title': 'All-new video platform',
+ },
+ 'playlist_count': 6,
+ },
+ # {
+ # # Zype embed
+ # 'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites',
+ # 'info_dict': {
+ # 'id': '5b400b834b32992a310622b9',
+ # 'ext': 'mp4',
+ # 'title': 'Smoky Barbecue Favorites',
+ # 'thumbnail': r're:^https?://.*\.jpe?g',
+ # 'description': 'md5:5ff01e76316bd8d46508af26dc86023b',
+ # 'upload_date': '20170909',
+ # 'timestamp': 1504915200,
+ # },
+ # 'add_ie': [ZypeIE.ie_key()],
+ # 'params': {
+ # 'skip_download': True,
+ # },
+ # },
+ {
+ # videojs embed
+ 'url': 'https://video.sibnet.ru/shell.php?videoid=3422904',
+ 'info_dict': {
+ 'id': 'shell',
+ 'ext': 'mp4',
+ 'title': 'Доставщик пиццы спросил разрешения сыграть на фортепиано',
+ 'description': 'md5:89209cdc587dab1e4a090453dbaa2cb1',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Failed to download MPD manifest'],
+ },
+ {
+ # DailyMotion embed with DM.player
+ 'url': 'https://www.beinsports.com/us/copa-del-rey/video/the-locker-room-valencia-beat-barca-in-copa/1203804',
+ 'info_dict': {
+ 'id': 'k6aKkGHd9FJs4mtJN39',
+ 'ext': 'mp4',
+ 'title': 'The Locker Room: Valencia Beat Barca In Copa del Rey Final',
+ 'description': 'This video is private.',
+ 'uploader_id': 'x1jf30l',
+ 'uploader': 'beIN SPORTS USA',
+ 'upload_date': '20190528',
+ 'timestamp': 1559062971,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # tvopengr:embed
+ 'url': 'https://www.ethnos.gr/World/article/190604/hparosiaxekinoynoisynomiliessthgeneyhmethskiatoypolemoypanoapothnoykrania',
+ 'md5': 'eb0c3995d0a6f18f6538c8e057865d7d',
+ 'info_dict': {
+ 'id': '101119',
+ 'ext': 'mp4',
+ 'display_id': 'oikarpoitondiapragmateyseonhparosias',
+ 'title': 'md5:b979f4d640c568617d6547035528a149',
+ 'description': 'md5:e54fc1977c7159b01cc11cd7d9d85550',
+ 'timestamp': 1641772800,
+ 'upload_date': '20220110',
+ 'thumbnail': 'https://opentv-static.siliconweb.com/imgHandler/1920/70bc39fa-895b-4918-a364-c39d2135fc6d.jpg',
+
+ }
+ },
+ {
+ # blogger embed
+ 'url': 'https://blog.tomeuvizoso.net/2019/01/a-panfrost-milestone.html',
+ 'md5': 'f1bc19b6ea1b0fd1d81e84ca9ec467ac',
+ 'info_dict': {
+ 'id': 'BLOGGER-video-3c740e3a49197e16-796',
+ 'ext': 'mp4',
+ 'title': 'Blogger',
+ 'thumbnail': r're:^https?://.*',
+ },
+ },
+ # {
+ # # TODO: find another test
+ # # http://schema.org/VideoObject
+ # 'url': 'https://flipagram.com/f/nyvTSJMKId',
+ # 'md5': '888dcf08b7ea671381f00fab74692755',
+ # 'info_dict': {
+ # 'id': 'nyvTSJMKId',
+ # 'ext': 'mp4',
+ # 'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction',
+ # 'description': '#love for cats.',
+ # 'timestamp': 1461244995,
+ # 'upload_date': '20160421',
+ # },
+ # 'params': {
+ # 'force_generic_extractor': True,
+ # },
+ # },
+ {
+ # VHX Embed
+ 'url': 'https://demo.vhx.tv/category-c/videos/file-example-mp4-480-1-5mg-copy',
+ 'info_dict': {
+ 'id': '858208',
+ 'ext': 'mp4',
+ 'title': 'Untitled',
+ 'uploader_id': 'user80538407',
+ 'uploader': 'OTT Videos',
+ },
+ },
+ {
+ # ArcPublishing PoWa video player
+ 'url': 'https://www.adn.com/politics/2020/11/02/video-senate-candidates-campaign-in-anchorage-on-eve-of-election-day/',
+ 'md5': 'b03b2fac8680e1e5a7cc81a5c27e71b3',
+ 'info_dict': {
+ 'id': '8c99cb6e-b29c-4bc9-9173-7bf9979225ab',
+ 'ext': 'mp4',
+ 'title': 'Senate candidates wave to voters on Anchorage streets',
+ 'description': 'md5:91f51a6511f090617353dc720318b20e',
+ 'timestamp': 1604378735,
+ 'upload_date': '20201103',
+ 'duration': 1581,
+ },
+ },
+ {
+ # MyChannels SDK embed
+ # https://www.24kitchen.nl/populair/deskundige-dit-waarom-sommigen-gevoelig-zijn-voor-voedselallergieen
+ 'url': 'https://www.demorgen.be/nieuws/burgemeester-rotterdam-richt-zich-in-videoboodschap-tot-relschoppers-voelt-het-goed~b0bcfd741/',
+ 'md5': '90c0699c37006ef18e198c032d81739c',
+ 'info_dict': {
+ 'id': '194165',
+ 'ext': 'mp4',
+ 'title': 'Burgemeester Aboutaleb spreekt relschoppers toe',
+ 'timestamp': 1611740340,
+ 'upload_date': '20210127',
+ 'duration': 159,
+ },
+ },
+ {
+ # Simplecast player embed
+ 'url': 'https://www.bio.org/podcast',
+ 'info_dict': {
+ 'id': 'podcast',
+ 'title': 'I AM BIO Podcast | BIO',
+ },
+ 'playlist_mincount': 52,
+ }, {
+ # WimTv embed player
+ 'url': 'http://www.msmotor.tv/wearefmi-pt-2-2021/',
+ 'info_dict': {
+ 'id': 'wearefmi-pt-2-2021',
+ 'title': '#WEAREFMI – PT.2 – 2021 – MsMotorTV',
+ },
+ 'playlist_count': 1,
+ }, {
+ # KVS Player
+ 'url': 'https://www.kvs-demo.com/videos/105/kelis-4th-of-july/',
+ 'info_dict': {
+ 'id': '105',
+ 'display_id': 'kelis-4th-of-july',
+ 'ext': 'mp4',
+ 'title': 'Kelis - 4th Of July',
+ 'description': 'Kelis - 4th Of July',
+ 'thumbnail': r're:https://(?:www\.)?kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Untested major version'],
+ }, {
+ # KVS Player
+ 'url': 'https://www.kvs-demo.com/embed/105/',
+ 'info_dict': {
+ 'id': '105',
+ 'display_id': 'kelis-4th-of-july',
+ 'ext': 'mp4',
+ 'title': 'Kelis - 4th Of July / Embed Player',
+ 'thumbnail': r're:https://(?:www\.)?kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://youix.com/video/leningrad-zoj/',
+ 'md5': '94f96ba95706dc3880812b27b7d8a2b8',
+ 'info_dict': {
+ 'id': '18485',
+ 'display_id': 'leningrad-zoj',
+ 'ext': 'mp4',
+ 'title': 'Клип: Ленинград - ЗОЖ скачать, смотреть онлайн | Youix.com',
+ 'thumbnail': r're:https://youix.com/contents/videos_screenshots/18000/18485/preview(?:_480x320_youix_com.mp4)?\.jpg',
+ },
+ }, {
+ # KVS Player
+ 'url': 'https://youix.com/embed/18485',
+ 'md5': '94f96ba95706dc3880812b27b7d8a2b8',
+ 'info_dict': {
+ 'id': '18485',
+ 'display_id': 'leningrad-zoj',
+ 'ext': 'mp4',
+ 'title': 'Ленинград - ЗОЖ',
+ 'thumbnail': r're:https://youix.com/contents/videos_screenshots/18000/18485/preview(?:_480x320_youix_com.mp4)?\.jpg',
+ },
+ }, {
+ # KVS Player
+ 'url': 'https://bogmedia.org/videos/21217/40-nochey-40-nights-2016/',
+ 'md5': '94166bdb26b4cb1fb9214319a629fc51',
+ 'info_dict': {
+ 'id': '21217',
+ 'display_id': '40-nochey-2016',
+ 'ext': 'mp4',
+ 'title': '40 ночей (2016) - BogMedia.org',
+ 'description': 'md5:4e6d7d622636eb7948275432eb256dc3',
+ 'thumbnail': 'https://bogmedia.org/contents/videos_screenshots/21000/21217/preview_480p.mp4.jpg',
+ },
+ },
+ {
+ # KVS Player (for sites that serve kt_player.js via non-https urls)
+ 'url': 'http://www.camhub.world/embed/389508',
+ 'md5': 'fbe89af4cfb59c8fd9f34a202bb03e32',
+ 'info_dict': {
+ 'id': '389508',
+ 'display_id': 'syren-de-mer-onlyfans-05-07-2020have-a-happy-safe-holiday5f014e68a220979bdb8cd-source',
+ 'ext': 'mp4',
+ 'title': 'Syren De Mer onlyfans_05-07-2020Have_a_happy_safe_holiday5f014e68a220979bdb8cd_source / Embed плеер',
+ 'thumbnail': r're:https?://www\.camhub\.world/contents/videos_screenshots/389000/389508/preview\.mp4\.jpg',
+ },
+ },
+ {
+ # Reddit-hosted video that will redirect and be processed by RedditIE
+ # Redirects to https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/
+ 'url': 'https://v.redd.it/zv89llsvexdz',
+ 'md5': '87f5f02f6c1582654146f830f21f8662',
+ 'info_dict': {
+ 'id': 'zv89llsvexdz',
+ 'ext': 'mp4',
+ 'timestamp': 1501941939.0,
+ 'title': 'That small heart attack.',
+ 'upload_date': '20170805',
+ 'uploader': 'Antw87'
+ }
+ },
+ {
+ # 1080p Reddit-hosted video that will redirect and be processed by RedditIE
+ 'url': 'https://v.redd.it/33hgok7dfbz71/',
+ 'md5': '7a1d587940242c9bb3bd6eb320b39258',
+ 'info_dict': {
+ 'id': '33hgok7dfbz71',
+ 'ext': 'mp4',
+ 'title': "The game Didn't want me to Knife that Guy I guess",
+ 'uploader': 'paraf1ve',
+ 'timestamp': 1636788683.0,
+ 'upload_date': '20211113'
+ }
+ },
+ {
+ # MainStreaming player
+ 'url': 'https://www.lactv.it/2021/10/03/lac-news24-la-settimana-03-10-2021/',
+ 'info_dict': {
+ 'id': 'EUlZfGWkGpOd',
+ 'title': 'La Settimana ',
+ 'description': '03 Ottobre ore 02:00',
+ 'ext': 'mp4',
+ 'live_status': 'not_live',
+ 'thumbnail': r're:https?://[A-Za-z0-9-]*\.msvdn.net/image/\w+/poster',
+ 'duration': 1512
+ }
+ },
+ {
+ # Multiple gfycat iframe embeds
+ 'url': 'https://www.gezip.net/bbs/board.php?bo_table=entertaine&wr_id=613422',
+ 'info_dict': {
+ 'title': '재이, 윤, 세은 황금 드레스를 입고 빛난다',
+ 'id': 'board'
+ },
+ 'playlist_count': 8,
+ },
+ {
+ # Multiple gfycat gifs (direct links)
+ 'url': 'https://www.gezip.net/bbs/board.php?bo_table=entertaine&wr_id=612199',
+ 'info_dict': {
+ 'title': '옳게 된 크롭 니트 스테이씨 아이사',
+ 'id': 'board'
+ },
+ 'playlist_count': 6
+ },
+ {
+ # Multiple gfycat embeds, with uppercase "IFR" in urls
+ 'url': 'https://kkzz.kr/?vid=2295',
+ 'info_dict': {
+ 'title': '지방시 앰버서더 에스파 카리나 움짤',
+ 'id': '?vid=2295'
+ },
+ 'playlist_count': 9
+ },
+ {
+ # Panopto embeds
+ 'url': 'https://www.monash.edu/learning-teaching/teachhq/learning-technologies/panopto/how-to/insert-a-quiz-into-a-panopto-video',
+ 'info_dict': {
+ 'ext': 'mp4',
+ 'id': '0bd3f16c-824a-436a-8486-ac5900693aef',
+ 'title': 'Quizzes in Panopto',
+ },
+ },
+ {
+ # Ruutu embed
+ 'url': 'https://www.nelonen.fi/ohjelmat/madventures-suomi/2160731-riku-ja-tunna-lahtevat-peurajahtiin-tv-sta-tutun-biologin-kanssa---metsastysreissu-huipentuu-kasvissyojan-painajaiseen',
+ 'md5': 'a2513a98d3496099e6eced40f7e6a14b',
+ 'info_dict': {
+ 'id': '4044426',
+ 'ext': 'mp4',
+ 'title': 'Riku ja Tunna lähtevät peurajahtiin tv:stä tutun biologin kanssa – metsästysreissu huipentuu kasvissyöjän painajaiseen!',
+ 'thumbnail': r're:^https?://.+\.jpg$',
+ 'duration': 108,
+ 'series': 'Madventures Suomi',
+ 'description': 'md5:aa55b44bd06a1e337a6f1d0b46507381',
+ 'categories': ['Matkailu', 'Elämäntyyli'],
+ 'age_limit': 0,
+ 'upload_date': '20220308',
+ },
+ },
+ {
+ # Multiple Ruutu embeds
+ 'url': 'https://www.hs.fi/kotimaa/art-2000008762560.html',
+ 'info_dict': {
+ 'title': 'Koronavirus | Epidemiahuippu voi olla Suomessa ohi, mutta koronaviruksen poistamista yleisvaarallisten tautien joukosta harkitaan vasta syksyllä',
+ 'id': 'art-2000008762560'
+ },
+ 'playlist_count': 3
+ },
+ {
+ # Ruutu embed in hs.fi with a single video
+ 'url': 'https://www.hs.fi/kotimaa/art-2000008793421.html',
+ 'md5': 'f8964e65d8fada6e8a562389bf366bb4',
+ 'info_dict': {
+ 'id': '4081841',
+ 'ext': 'mp4',
+ 'title': 'Puolustusvoimat siirsi panssariajoneuvoja harjoituksiin Niinisaloon 2.5.2022',
+ 'thumbnail': r're:^https?://.+\.jpg$',
+ 'duration': 138,
+ 'age_limit': 0,
+ 'upload_date': '20220504',
+ },
+ },
+ {
+ # Webpage contains double BOM
+ 'url': 'https://www.filmarkivet.se/movies/paris-d-moll/',
+ 'md5': 'df02cadc719dcc63d43288366f037754',
+ 'info_dict': {
+ 'id': 'paris-d-moll',
+ 'ext': 'mp4',
+ 'upload_date': '20220518',
+ 'title': 'Paris d-moll',
+ 'description': 'md5:319e37ea5542293db37e1e13072fe330',
+ 'thumbnail': 'https://www.filmarkivet.se/wp-content/uploads/parisdmoll2.jpg',
+ 'timestamp': 1652833414,
+ 'age_limit': 0,
+ }
+ },
+ {
+ 'url': 'https://www.mollymovieclub.com/p/interstellar?s=r#details',
+ 'md5': '198bde8bed23d0b23c70725c83c9b6d9',
+ 'info_dict': {
+ 'id': '53602801',
+ 'ext': 'mpga',
+ 'title': 'Interstellar',
+ 'description': 'Listen now | Episode One',
+ 'thumbnail': 'md5:c30d9c83f738e16d8551d7219d321538',
+ 'uploader': 'Molly Movie Club',
+ 'uploader_id': '839621',
+ },
+ },
+ {
+ 'url': 'https://www.blockedandreported.org/p/episode-117-lets-talk-about-depp?s=r',
+ 'md5': 'c0cc44ee7415daeed13c26e5b56d6aa0',
+ 'info_dict': {
+ 'id': '57962052',
+ 'ext': 'mpga',
+ 'title': 'md5:855b2756f0ee10f6723fa00b16266f8d',
+ 'description': 'md5:fe512a5e94136ad260c80bde00ea4eef',
+ 'thumbnail': 'md5:2218f27dfe517bb5ac16c47d0aebac59',
+ 'uploader': 'Blocked and Reported',
+ 'uploader_id': '500230',
+ },
+ },
+ {
+ 'url': 'https://www.skimag.com/video/ski-people-1980/',
+ 'md5': '022a7e31c70620ebec18deeab376ee03',
+ 'info_dict': {
+ 'id': 'YTmgRiNU',
+ 'ext': 'mp4',
+ 'title': '1980 Ski People',
+ 'timestamp': 1610407738,
+ 'description': 'md5:cf9c3d101452c91e141f292b19fe4843',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/YTmgRiNU/poster.jpg?width=720',
+ 'duration': 5688.0,
+ 'upload_date': '20210111',
+ }
+ },
+ {
+ 'note': 'JSON LD with multiple @type',
+ 'url': 'https://www.nu.nl/280161/video/hoe-een-bladvlo-dit-verwoestende-japanse-onkruid-moet-vernietigen.html',
+ 'md5': 'c7949f34f57273013fb7ccb1156393db',
+ 'info_dict': {
+ 'id': 'ipy2AcGL',
+ 'ext': 'mp4',
+ 'description': 'md5:6a9d644bab0dc2dc06849c2505d8383d',
+ 'thumbnail': r're:https://media\.nu\.nl/m/.+\.jpg',
+ 'title': 'Hoe een bladvlo dit verwoestende Japanse onkruid moet vernietigen',
+ 'timestamp': 1586577474,
+ 'upload_date': '20200411',
+ 'age_limit': 0,
+ 'duration': 111.0,
+ }
+ },
+ {
+ 'note': 'JSON LD with unexpected data type',
+ 'url': 'https://www.autoweek.nl/autotests/artikel/porsche-911-gt3-rs-rij-impressie-2/',
+ 'info_dict': {
+ 'id': 'porsche-911-gt3-rs-rij-impressie-2',
+ 'ext': 'mp4',
+ 'title': 'Test: Porsche 911 GT3 RS',
+ 'description': 'Je ziet het niet, maar het is er wel. Downforce, hebben we het dan over. En in de nieuwe Porsche 911 GT3 RS is er zelfs heel veel downforce.',
+ 'timestamp': 1664920902,
+ 'upload_date': '20221004',
+ 'thumbnail': r're:^https://media.autoweek.nl/m/.+\.jpg$',
+ 'age_limit': 0,
+ 'direct': True,
+ }
+ },
+ {
+ 'note': 'server returns data in brotli compression by default if `accept-encoding: *` is specified.',
+ 'url': 'https://www.extra.cz/cauky-lidi-70-dil-babis-predstavil-pohadky-prymulanek-nebo-andrejovy-nove-saty-ac867',
+ 'info_dict': {
+ 'id': 'cauky-lidi-70-dil-babis-predstavil-pohadky-prymulanek-nebo-andrejovy-nove-saty-ac867',
+ 'ext': 'mp4',
+ 'title': 'čauky lidi 70 finall',
+ 'description': 'čauky lidi 70 finall',
+ 'thumbnail': 'h',
+ 'upload_date': '20220606',
+ 'timestamp': 1654513791,
+ 'duration': 318.0,
+ 'direct': True,
+ 'age_limit': 0,
+ },
+ },
+ {
+ 'note': 'JW Player embed with unicode-escape sequences in URL',
+ 'url': 'https://www.medici.tv/en/concerts/lahav-shani-mozart-mahler-israel-philharmonic-abu-dhabi-classics',
+ 'info_dict': {
+ 'id': 'm',
+ 'ext': 'mp4',
+ 'title': 'Lahav Shani conducts the Israel Philharmonic\'s first-ever concert in Abu Dhabi',
+ 'description': 'Mahler\'s ',
+ 'uploader': 'www.medici.tv',
+ 'age_limit': 0,
+ 'thumbnail': r're:^https?://.+\.jpg',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'https://shooshtime.com/videos/284002/just-out-of-the-shower-joi/',
+ 'md5': 'e2f0a4c329f7986280b7328e24036d60',
+ 'info_dict': {
+ 'id': '284002',
+ 'display_id': 'just-out-of-the-shower-joi',
+ 'ext': 'mp4',
+ 'title': 'Just Out Of The Shower JOI - Shooshtime',
+ 'thumbnail': 'https://i.shoosh.co/contents/videos_screenshots/284000/284002/preview.mp4.jpg',
+ 'height': 720,
+ 'age_limit': 18,
+ },
+ },
+ {
+ 'note': 'Live HLS direct link',
+ 'url': 'https://d18j67ugtrocuq.cloudfront.net/out/v1/2767aec339144787926bd0322f72c6e9/index.m3u8',
+ 'info_dict': {
+ 'id': 'index',
+ 'title': r're:index',
+ 'ext': 'mp4',
+ 'live_status': 'is_live',
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ },
+ {
+ 'note': 'Video.js VOD HLS',
+ 'url': 'https://gist.githubusercontent.com/bashonly/2aae0862c50f4a4b84f220c315767208/raw/e3380d413749dabbe804c9c2d8fd9a45142475c7/videojs_hls_test.html',
+ 'info_dict': {
+ 'id': 'videojs_hls_test',
+ 'title': 'video',
+ 'ext': 'mp4',
+ 'age_limit': 0,
+ 'duration': 1800,
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ },
+ ]
+
+ def report_following_redirect(self, new_url):
+ """Report information extraction."""
+ self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
+
+ def report_detected(self, name, num=1, note=None):
+ if num > 1:
+ name += 's'
+ elif not num:
+ return
+ else:
+ num = 'a'
+
+ self._downloader.write_debug(f'Identified {num} {name}{format_field(note, None, "; %s")}')
+
+ def _extra_manifest_info(self, info, manifest_url):
+ fragment_query = self._configuration_arg('fragment_query', [None], casesense=True)[0]
+ if fragment_query is not None:
+ info['extra_param_to_segment_url'] = (
+ urllib.parse.urlparse(fragment_query).query or fragment_query
+ or urllib.parse.urlparse(manifest_url).query or None)
+
+ hex_or_none = lambda x: x if re.fullmatch(r'(0x)?[\da-f]+', x, re.IGNORECASE) else None
+ info['hls_aes'] = traverse_obj(self._configuration_arg('hls_key', casesense=True), {
+ 'uri': (0, {url_or_none}), 'key': (0, {hex_or_none}), 'iv': (1, {hex_or_none}),
+ }) or None
+
+ variant_query = self._configuration_arg('variant_query', [None], casesense=True)[0]
+ if variant_query is not None:
+ query = urllib.parse.parse_qs(
+ urllib.parse.urlparse(variant_query).query or variant_query
+ or urllib.parse.urlparse(manifest_url).query)
+ for fmt in self._downloader._get_formats(info):
+ fmt['url'] = update_url_query(fmt['url'], query)
+
+ # Attempt to detect live HLS or set VOD duration
+ m3u8_format = next((f for f in self._downloader._get_formats(info)
+ if determine_protocol(f) == 'm3u8_native'), None)
+ if m3u8_format:
+ is_live = self._configuration_arg('is_live', [None])[0]
+ if is_live is not None:
+ info['live_status'] = 'not_live' if is_live == 'false' else 'is_live'
+ return
+ headers = m3u8_format.get('http_headers') or info.get('http_headers')
+ duration = self._extract_m3u8_vod_duration(
+ m3u8_format['url'], info.get('id'), note='Checking m3u8 live status',
+ errnote='Failed to download m3u8 media playlist', headers=headers)
+ if not duration:
+ info['live_status'] = 'is_live'
+ info['duration'] = info.get('duration') or duration
+
+ def _extract_rss(self, url, video_id, doc):
+ NS_MAP = {
+ 'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd',
+ }
+
+ entries = []
+ for it in doc.findall('./channel/item'):
+ next_url = next(
+ (e.attrib.get('url') for e in it.findall('./enclosure')),
+ xpath_text(it, 'link', fatal=False))
+ if not next_url:
+ continue
+
+ guid = try_call(lambda: it.find('guid').text)
+ if guid:
+ next_url = smuggle_url(next_url, {'force_videoid': guid})
+
+ def itunes(key):
+ return xpath_text(it, xpath_with_ns(f'./itunes:{key}', NS_MAP), default=None)
+
+ entries.append({
+ '_type': 'url_transparent',
+ 'url': next_url,
+ 'title': try_call(lambda: it.find('title').text),
+ 'description': xpath_text(it, 'description', default=None),
+ 'timestamp': unified_timestamp(xpath_text(it, 'pubDate', default=None)),
+ 'duration': parse_duration(itunes('duration')),
+ 'thumbnail': url_or_none(xpath_attr(it, xpath_with_ns('./itunes:image', NS_MAP), 'href')),
+ 'episode': itunes('title'),
+ 'episode_number': int_or_none(itunes('episode')),
+ 'season_number': int_or_none(itunes('season')),
+ 'age_limit': {'true': 18, 'yes': 18, 'false': 0, 'no': 0}.get((itunes('explicit') or '').lower()),
+ })
+
+ return {
+ '_type': 'playlist',
+ 'id': url,
+ 'title': try_call(lambda: doc.find('./channel/title').text),
+ 'description': try_call(lambda: doc.find('./channel/description').text),
+ 'entries': entries,
+ }
+
+ @classmethod
+ def _kvs_get_real_url(cls, video_url, license_code):
+ if not video_url.startswith('function/0/'):
+ return video_url # not obfuscated
+
+ parsed = urllib.parse.urlparse(video_url[len('function/0/'):])
+ license = cls._kvs_get_license_token(license_code)
+ urlparts = parsed.path.split('/')
+
+ HASH_LENGTH = 32
+ hash = urlparts[3][:HASH_LENGTH]
+ indices = list(range(HASH_LENGTH))
+
+ # Swap indices of hash according to the destination calculated from the license token
+ accum = 0
+ for src in reversed(range(HASH_LENGTH)):
+ accum += license[src]
+ dest = (src + accum) % HASH_LENGTH
+ indices[src], indices[dest] = indices[dest], indices[src]
+
+ urlparts[3] = ''.join(hash[index] for index in indices) + urlparts[3][HASH_LENGTH:]
+ return urllib.parse.urlunparse(parsed._replace(path='/'.join(urlparts)))
+
+ @staticmethod
+ def _kvs_get_license_token(license):
+ license = license.replace('$', '')
+ license_values = [int(char) for char in license]
+
+ modlicense = license.replace('0', '1')
+ center = len(modlicense) // 2
+ fronthalf = int(modlicense[:center + 1])
+ backhalf = int(modlicense[center:])
+ modlicense = str(4 * abs(fronthalf - backhalf))[:center + 1]
+
+ return [
+ (license_values[index + offset] + current) % 10
+ for index, current in enumerate(map(int, modlicense))
+ for offset in range(4)
+ ]
+
+ def _extract_kvs(self, url, webpage, video_id):
+ flashvars = self._search_json(
+ r'(?s:<script\b[^>]*>.*?var\s+flashvars\s*=)',
+ webpage, 'flashvars', video_id, transform_source=js_to_json)
+
+ # extract the part after the last / as the display_id from the
+ # canonical URL.
+ display_id = self._search_regex(
+ r'(?:<link href="https?://[^"]+/(.+?)/?" rel="canonical"\s*/?>'
+ r'|<link rel="canonical" href="https?://[^"]+/(.+?)/?"\s*/?>)',
+ webpage, 'display_id', fatal=False)
+ title = self._html_search_regex(r'<(?:h1|title)>(?:Video: )?(.+?)</(?:h1|title)>', webpage, 'title')
+
+ thumbnail = flashvars['preview_url']
+ if thumbnail.startswith('//'):
+ protocol, _, _ = url.partition('/')
+ thumbnail = protocol + thumbnail
+
+ url_keys = list(filter(re.compile(r'^video_(?:url|alt_url\d*)$').match, flashvars.keys()))
+ formats = []
+ for key in url_keys:
+ if '/get_file/' not in flashvars[key]:
+ continue
+ format_id = flashvars.get(f'{key}_text', key)
+ formats.append({
+ 'url': urljoin(url, self._kvs_get_real_url(flashvars[key], flashvars['license_code'])),
+ 'format_id': format_id,
+ 'ext': 'mp4',
+ **(parse_resolution(format_id) or parse_resolution(flashvars[key])),
+ 'http_headers': {'Referer': url},
+ })
+ if not formats[-1].get('height'):
+ formats[-1]['quality'] = 1
+
+ return {
+ 'id': flashvars['video_id'],
+ 'display_id': display_id,
+ 'title': title,
+ 'thumbnail': urljoin(url, thumbnail),
+ 'formats': formats,
+ }
+
+ def _real_extract(self, url):
+ if url.startswith('//'):
+ return self.url_result(self.http_scheme() + url)
+
+ parsed_url = urllib.parse.urlparse(url)
+ if not parsed_url.scheme:
+ default_search = self.get_param('default_search')
+ if default_search is None:
+ default_search = 'fixup_error'
+
+ if default_search in ('auto', 'auto_warning', 'fixup_error'):
+ if re.match(r'^[^\s/]+\.[^\s/]+/', url):
+ self.report_warning('The url doesn\'t specify the protocol, trying with http')
+ return self.url_result('http://' + url)
+ elif default_search != 'fixup_error':
+ if default_search == 'auto_warning':
+ if re.match(r'^(?:url|URL)$', url):
+ raise ExtractorError(
+ 'Invalid URL: %r . Call yt-dlp like this: yt-dlp -v "https://www.youtube.com/watch?v=BaW_jenozKc" ' % url,
+ expected=True)
+ else:
+ self.report_warning(
+ 'Falling back to youtube search for %s . Set --default-search "auto" to suppress this warning.' % url)
+ return self.url_result('ytsearch:' + url)
+
+ if default_search in ('error', 'fixup_error'):
+ raise ExtractorError(
+ '%r is not a valid URL. '
+ 'Set --default-search "ytsearch" (or run yt-dlp "ytsearch:%s" ) to search YouTube'
+ % (url, url), expected=True)
+ else:
+ if ':' not in default_search:
+ default_search += ':'
+ return self.url_result(default_search + url)
+
+ original_url = url
+ url, smuggled_data = unsmuggle_url(url, {})
+ force_videoid = None
+ is_intentional = smuggled_data.get('to_generic')
+ if 'force_videoid' in smuggled_data:
+ force_videoid = smuggled_data['force_videoid']
+ video_id = force_videoid
+ else:
+ video_id = self._generic_id(url)
+
+ # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
+ # making it impossible to download only chunk of the file (yet we need only 512kB to
+ # test whether it's HTML or not). According to yt-dlp default Accept-Encoding
+ # that will always result in downloading the whole file that is not desirable.
+ # Therefore for extraction pass we have to override Accept-Encoding to any in order
+ # to accept raw bytes and being able to download only a chunk.
+ # It may probably better to solve this by checking Content-Type for application/octet-stream
+ # after a HEAD request, but not sure if we can rely on this.
+ full_response = self._request_webpage(url, video_id, headers=filter_dict({
+ 'Accept-Encoding': 'identity',
+ 'Referer': smuggled_data.get('referer'),
+ }))
+ new_url = full_response.url
+ if new_url != extract_basic_auth(url)[0]:
+ self.report_following_redirect(new_url)
+ if force_videoid:
+ new_url = smuggle_url(new_url, {'force_videoid': force_videoid})
+ return self.url_result(new_url)
+
+ info_dict = {
+ 'id': video_id,
+ 'title': self._generic_title(url),
+ 'timestamp': unified_timestamp(full_response.headers.get('Last-Modified'))
+ }
+
+ # Check for direct link to a video
+ content_type = full_response.headers.get('Content-Type', '').lower()
+ m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
+ if m:
+ self.report_detected('direct video link')
+ headers = filter_dict({'Referer': smuggled_data.get('referer')})
+ format_id = str(m.group('format_id'))
+ ext = determine_ext(url, default_ext=None) or urlhandle_detect_ext(full_response)
+ subtitles = {}
+ if format_id.endswith('mpegurl') or ext == 'm3u8':
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4', headers=headers)
+ elif format_id.endswith('mpd') or format_id.endswith('dash+xml') or ext == 'mpd':
+ formats, subtitles = self._extract_mpd_formats_and_subtitles(url, video_id, headers=headers)
+ elif format_id == 'f4m' or ext == 'f4m':
+ formats = self._extract_f4m_formats(url, video_id, headers=headers)
+ else:
+ formats = [{
+ 'format_id': format_id,
+ 'url': url,
+ 'ext': ext,
+ 'vcodec': 'none' if m.group('type') == 'audio' else None
+ }]
+ info_dict['direct'] = True
+ info_dict.update({
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'http_headers': headers or None,
+ })
+ self._extra_manifest_info(info_dict, url)
+ return info_dict
+
+ if not self.get_param('test', False) and not is_intentional:
+ force = self.get_param('force_generic_extractor', False)
+ self.report_warning('%s generic information extractor' % ('Forcing' if force else 'Falling back on'))
+
+ first_bytes = full_response.read(512)
+
+ # Is it an M3U playlist?
+ if first_bytes.startswith(b'#EXTM3U'):
+ self.report_detected('M3U playlist')
+ info_dict['formats'], info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4')
+ self._extra_manifest_info(info_dict, url)
+ return info_dict
+
+ # Maybe it's a direct link to a video?
+ # Be careful not to download the whole thing!
+ if not is_html(first_bytes):
+ self.report_warning(
+ 'URL could be a direct video link, returning it as such.')
+ info_dict.update({
+ 'direct': True,
+ 'url': url,
+ })
+ return info_dict
+
+ webpage = self._webpage_read_content(
+ full_response, url, video_id, prefix=first_bytes)
+
+ if '<title>DPG Media Privacy Gate</title>' in webpage:
+ webpage = self._download_webpage(url, video_id)
+
+ self.report_extraction(video_id)
+
+ # Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
+ try:
+ try:
+ doc = compat_etree_fromstring(webpage)
+ except xml.etree.ElementTree.ParseError:
+ doc = compat_etree_fromstring(webpage.encode('utf-8'))
+ if doc.tag == 'rss':
+ self.report_detected('RSS feed')
+ return self._extract_rss(url, video_id, doc)
+ elif doc.tag == 'SmoothStreamingMedia':
+ info_dict['formats'], info_dict['subtitles'] = self._parse_ism_formats_and_subtitles(doc, url)
+ self.report_detected('ISM manifest')
+ return info_dict
+ elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
+ smil = self._parse_smil(doc, url, video_id)
+ self.report_detected('SMIL file')
+ return smil
+ elif doc.tag == '{http://xspf.org/ns/0/}playlist':
+ self.report_detected('XSPF playlist')
+ return self.playlist_result(
+ self._parse_xspf(
+ doc, video_id, xspf_url=url,
+ xspf_base_url=full_response.url),
+ video_id)
+ elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
+ info_dict['formats'], info_dict['subtitles'] = self._parse_mpd_formats_and_subtitles(
+ doc,
+ mpd_base_url=full_response.url.rpartition('/')[0],
+ mpd_url=url)
+ self._extra_manifest_info(info_dict, url)
+ self.report_detected('DASH manifest')
+ return info_dict
+ elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
+ info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id)
+ self.report_detected('F4M manifest')
+ return info_dict
+ except xml.etree.ElementTree.ParseError:
+ pass
+
+ info_dict.update({
+ # it's tempting to parse this further, but you would
+ # have to take into account all the variations like
+ # Video Title - Site Name
+ # Site Name | Video Title
+ # Video Title - Tagline | Site Name
+ # and so on and so forth; it's just not practical
+ 'title': self._generic_title('', webpage, default='video'),
+ 'description': self._og_search_description(webpage, default=None),
+ 'thumbnail': self._og_search_thumbnail(webpage, default=None),
+ 'age_limit': self._rta_search(webpage),
+ })
+
+ self._downloader.write_debug('Looking for embeds')
+ embeds = list(self._extract_embeds(original_url, webpage, urlh=full_response, info_dict=info_dict))
+ if len(embeds) == 1:
+ return merge_dicts(embeds[0], info_dict)
+ elif embeds:
+ return self.playlist_result(embeds, **info_dict)
+ raise UnsupportedError(url)
+
+ def _extract_embeds(self, url, webpage, *, urlh=None, info_dict={}):
+ """Returns an iterator of video entries"""
+ info_dict = types.MappingProxyType(info_dict) # Prevents accidental mutation
+ video_id = traverse_obj(info_dict, 'display_id', 'id') or self._generic_id(url)
+ url, smuggled_data = unsmuggle_url(url, {})
+ actual_url = urlh.url if urlh else url
+
+ # Sometimes embedded video player is hidden behind percent encoding
+ # (e.g. https://github.com/ytdl-org/youtube-dl/issues/2448)
+ # Unescaping the whole page allows to handle those cases in a generic way
+ # FIXME: unescaping the whole page may break URLs, commenting out for now.
+ # There probably should be a second run of generic extractor on unescaped webpage.
+ # webpage = urllib.parse.unquote(webpage)
+
+ embeds = []
+ for ie in self._downloader._ies.values():
+ if ie.ie_key() in smuggled_data.get('block_ies', []):
+ continue
+ gen = ie.extract_from_webpage(self._downloader, url, webpage)
+ current_embeds = []
+ try:
+ while True:
+ current_embeds.append(next(gen))
+ except self.StopExtraction:
+ self.report_detected(f'{ie.IE_NAME} exclusive embed', len(current_embeds),
+ embeds and 'discarding other embeds')
+ return current_embeds
+ except StopIteration:
+ self.report_detected(f'{ie.IE_NAME} embed', len(current_embeds))
+ embeds.extend(current_embeds)
+
+ if embeds:
+ return embeds
+
+ jwplayer_data = self._find_jwplayer_data(
+ webpage, video_id, transform_source=js_to_json)
+ if jwplayer_data:
+ if isinstance(jwplayer_data.get('playlist'), str):
+ self.report_detected('JW Player playlist')
+ return [self.url_result(jwplayer_data['playlist'], 'JWPlatform')]
+ try:
+ info = self._parse_jwplayer_data(
+ jwplayer_data, video_id, require_title=False, base_url=url)
+ if traverse_obj(info, 'formats', ('entries', ..., 'formats')):
+ self.report_detected('JW Player data')
+ return [info]
+ except ExtractorError:
+ # See https://github.com/ytdl-org/youtube-dl/pull/16735
+ pass
+
+ # Video.js embed
+ mobj = re.search(
+ r'(?s)\bvideojs\s*\(.+?([a-zA-Z0-9_$]+)\.src\s*\(\s*((?:\[.+?\]|{.+?}))\s*\)\s*;',
+ webpage)
+ if mobj is not None:
+ varname = mobj.group(1)
+ sources = variadic(self._parse_json(
+ mobj.group(2), video_id, transform_source=js_to_json, fatal=False) or [])
+ formats, subtitles, src = [], {}, None
+ for source in sources:
+ src = source.get('src')
+ if not src or not isinstance(src, str):
+ continue
+ src = urllib.parse.urljoin(url, src)
+ src_type = source.get('type')
+ if isinstance(src_type, str):
+ src_type = src_type.lower()
+ ext = determine_ext(src).lower()
+ if src_type == 'video/youtube':
+ return [self.url_result(src, YoutubeIE.ie_key())]
+ if src_type == 'application/dash+xml' or ext == 'mpd':
+ fmts, subs = self._extract_mpd_formats_and_subtitles(
+ src, video_id, mpd_id='dash', fatal=False)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+ elif src_type == 'application/x-mpegurl' or ext == 'm3u8':
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ src, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+
+ if not formats:
+ formats.append({
+ 'url': src,
+ 'ext': (mimetype2ext(src_type)
+ or ext if ext in KNOWN_EXTENSIONS else 'mp4'),
+ 'http_headers': {
+ 'Referer': actual_url,
+ },
+ })
+ # https://docs.videojs.com/player#addRemoteTextTrack
+ # https://html.spec.whatwg.org/multipage/media.html#htmltrackelement
+ for sub_match in re.finditer(rf'(?s){re.escape(varname)}' r'\.addRemoteTextTrack\(({.+?})\s*,\s*(?:true|false)\)', webpage):
+ sub = self._parse_json(
+ sub_match.group(1), video_id, transform_source=js_to_json, fatal=False) or {}
+ sub_src = str_or_none(sub.get('src'))
+ if not sub_src:
+ continue
+ subtitles.setdefault(dict_get(sub, ('language', 'srclang')) or 'und', []).append({
+ 'url': urllib.parse.urljoin(url, sub_src),
+ 'name': sub.get('label'),
+ 'http_headers': {
+ 'Referer': actual_url,
+ },
+ })
+ if formats or subtitles:
+ self.report_detected('video.js embed')
+ info_dict = {'formats': formats, 'subtitles': subtitles}
+ if formats:
+ self._extra_manifest_info(info_dict, src)
+ return [info_dict]
+
+ # Look for generic KVS player (before json-ld bc of some urls that break otherwise)
+ found = self._search_regex((
+ r'<script\b[^>]+?\bsrc\s*=\s*(["\'])https?://(?:(?!\1)[^?#])+/kt_player\.js\?v=(?P<ver>\d+(?:\.\d+)+)\1[^>]*>',
+ r'kt_player\s*\(\s*(["\'])(?:(?!\1)[\w\W])+\1\s*,\s*(["\'])https?://(?:(?!\2)[^?#])+/kt_player\.swf\?v=(?P<ver>\d+(?:\.\d+)+)\2\s*,',
+ ), webpage, 'KVS player', group='ver', default=False)
+ if found:
+ self.report_detected('KVS Player')
+ if found.split('.')[0] not in ('4', '5', '6'):
+ self.report_warning(f'Untested major version ({found}) in player engine - download may fail.')
+ return [self._extract_kvs(url, webpage, video_id)]
+
+ # Looking for http://schema.org/VideoObject
+ json_ld = self._search_json_ld(webpage, video_id, default={})
+ if json_ld.get('url') not in (url, None):
+ self.report_detected('JSON LD')
+ is_direct = json_ld.get('ext') not in (None, *MEDIA_EXTENSIONS.manifests)
+ return [merge_dicts({
+ '_type': 'video' if is_direct else 'url_transparent',
+ 'url': smuggle_url(json_ld['url'], {
+ 'force_videoid': video_id,
+ 'to_generic': True,
+ 'referer': url,
+ }),
+ }, json_ld)]
+
+ def check_video(vurl):
+ if YoutubeIE.suitable(vurl):
+ return True
+ if RtmpIE.suitable(vurl):
+ return True
+ vpath = urllib.parse.urlparse(vurl).path
+ vext = determine_ext(vpath, None)
+ return vext not in (None, 'swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js', 'xml')
+
+ def filter_video(urls):
+ return list(filter(check_video, urls))
+
+ # Start with something easy: JW Player in SWFObject
+ found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
+ if found:
+ self.report_detected('JW Player in SFWObject')
+ else:
+ # Look for gorilla-vid style embedding
+ found = filter_video(re.findall(r'''(?sx)
+ (?:
+ jw_plugins|
+ JWPlayerOptions|
+ jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
+ )
+ .*?
+ ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
+ if found:
+ self.report_detected('JW Player embed')
+ if not found:
+ # Broaden the search a little bit
+ found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
+ if found:
+ self.report_detected('video file')
+ if not found:
+ # Broaden the findall a little bit: JWPlayer JS loader
+ found = filter_video(re.findall(
+ r'[^A-Za-z0-9]?(?:file|video_url)["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
+ if found:
+ self.report_detected('JW Player JS loader')
+ if not found:
+ # Flow player
+ found = filter_video(re.findall(r'''(?xs)
+ flowplayer\("[^"]+",\s*
+ \{[^}]+?\}\s*,
+ \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
+ ["']?url["']?\s*:\s*["']([^"']+)["']
+ ''', webpage))
+ if found:
+ self.report_detected('Flow Player')
+ if not found:
+ # Cinerama player
+ found = re.findall(
+ r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
+ if found:
+ self.report_detected('Cinerama player')
+ if not found:
+ # Try to find twitter cards info
+ # twitter:player:stream should be checked before twitter:player since
+ # it is expected to contain a raw stream (see
+ # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
+ found = filter_video(re.findall(
+ r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
+ if found:
+ self.report_detected('Twitter card')
+ if not found:
+ # We look for Open Graph info:
+ # We have to match any number spaces between elements, some sites try to align them, e.g.: statigr.am
+ m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
+ # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
+ if m_video_type is not None:
+ found = filter_video(re.findall(r'<meta.*?property="og:(?:video|audio)".*?content="(.*?)"', webpage))
+ if found:
+ self.report_detected('Open Graph video info')
+ if not found:
+ REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
+ found = re.search(
+ r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
+ r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
+ webpage)
+ if not found:
+ # Look also in Refresh HTTP header
+ refresh_header = urlh and urlh.headers.get('Refresh')
+ if refresh_header:
+ found = re.search(REDIRECT_REGEX, refresh_header)
+ if found:
+ new_url = urllib.parse.urljoin(url, unescapeHTML(found.group(1)))
+ if new_url != url:
+ self.report_following_redirect(new_url)
+ return [self.url_result(new_url)]
+ else:
+ found = None
+
+ if not found:
+ # twitter:player is a https URL to iframe player that may or may not
+ # be supported by yt-dlp thus this is checked the very last (see
+ # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
+ embed_url = self._html_search_meta('twitter:player', webpage, default=None)
+ if embed_url and embed_url != url:
+ self.report_detected('twitter:player iframe')
+ return [self.url_result(embed_url)]
+
+ if not found:
+ return []
+
+ domain_name = self._search_regex(r'^(?:https?://)?([^/]*)/.*', url, 'video uploader', default=None)
+
+ entries = []
+ for video_url in orderedSet(found):
+ video_url = video_url.encode().decode('unicode-escape')
+ video_url = unescapeHTML(video_url)
+ video_url = video_url.replace('\\/', '/')
+ video_url = urllib.parse.urljoin(url, video_url)
+ video_id = urllib.parse.unquote(os.path.basename(video_url))
+
+ # Sometimes, jwplayer extraction will result in a YouTube URL
+ if YoutubeIE.suitable(video_url):
+ entries.append(self.url_result(video_url, 'Youtube'))
+ continue
+
+ video_id = os.path.splitext(video_id)[0]
+ headers = {
+ 'referer': actual_url
+ }
+
+ entry_info_dict = {
+ 'id': video_id,
+ 'uploader': domain_name,
+ 'title': info_dict['title'],
+ 'age_limit': info_dict['age_limit'],
+ 'http_headers': headers,
+ }
+
+ if RtmpIE.suitable(video_url):
+ entry_info_dict.update({
+ '_type': 'url_transparent',
+ 'ie_key': RtmpIE.ie_key(),
+ 'url': video_url,
+ })
+ entries.append(entry_info_dict)
+ continue
+
+ ext = determine_ext(video_url)
+ if ext == 'smil':
+ entry_info_dict = {**self._extract_smil_info(video_url, video_id), **entry_info_dict}
+ elif ext == 'xspf':
+ return [self._extract_xspf_playlist(video_url, video_id)]
+ elif ext == 'm3u8':
+ entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(video_url, video_id, ext='mp4', headers=headers)
+ self._extra_manifest_info(entry_info_dict, video_url)
+ elif ext == 'mpd':
+ entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_mpd_formats_and_subtitles(video_url, video_id, headers=headers)
+ self._extra_manifest_info(entry_info_dict, video_url)
+ elif ext == 'f4m':
+ entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id, headers=headers)
+ elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url:
+ # Just matching .ism/manifest is not enough to be reliably sure
+ # whether it's actually an ISM manifest or some other streaming
+ # manifest since there are various streaming URL formats
+ # possible (see [1]) as well as some other shenanigans like
+ # .smil/manifest URLs that actually serve an ISM (see [2]) and
+ # so on.
+ # Thus the most reasonable way to solve this is to delegate
+ # to generic extractor in order to look into the contents of
+ # the manifest itself.
+ # 1. https://azure.microsoft.com/en-us/documentation/articles/media-services-deliver-content-overview/#streaming-url-formats
+ # 2. https://svs.itworkscdn.net/lbcivod/smil:itwfcdn/lbci/170976.smil/Manifest
+ entry_info_dict = self.url_result(
+ smuggle_url(video_url, {'to_generic': True}),
+ GenericIE.ie_key())
+ else:
+ entry_info_dict['url'] = video_url
+
+ entries.append(entry_info_dict)
+
+ if len(entries) > 1:
+ for num, e in enumerate(entries, start=1):
+ # 'url' results don't have a title
+ if e.get('title') is not None:
+ e['title'] = '%s (%d)' % (e['title'], num)
+ return entries
diff --git a/yt_dlp/extractor/genericembeds.py b/yt_dlp/extractor/genericembeds.py
new file mode 100644
index 0000000..9b4f14d
--- /dev/null
+++ b/yt_dlp/extractor/genericembeds.py
@@ -0,0 +1,114 @@
+import re
+import urllib.parse
+
+from .common import InfoExtractor
+from ..utils import make_archive_id, unescapeHTML
+
+
+class HTML5MediaEmbedIE(InfoExtractor):
+ _VALID_URL = False
+ IE_NAME = 'html5'
+ _WEBPAGE_TESTS = [
+ {
+ 'url': 'https://html.com/media/',
+ 'info_dict': {
+ 'title': 'HTML5 Media',
+ 'description': 'md5:933b2d02ceffe7a7a0f3c8326d91cc2a',
+ },
+ 'playlist_count': 2
+ }
+ ]
+
+ def _extract_from_webpage(self, url, webpage):
+ video_id, title = self._generic_id(url), self._generic_title(url, webpage)
+ entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls') or []
+ for num, entry in enumerate(entries, start=1):
+ entry.update({
+ 'id': f'{video_id}-{num}',
+ 'title': f'{title} ({num})',
+ '_old_archive_ids': [
+ make_archive_id('generic', f'{video_id}-{num}' if len(entries) > 1 else video_id),
+ ],
+ })
+ yield entry
+
+
+class QuotedHTMLIE(InfoExtractor):
+ """For common cases of quoted/escaped html parts in the webpage"""
+ _VALID_URL = False
+ IE_NAME = 'generic:quoted-html'
+ IE_DESC = False # Do not list
+ _WEBPAGE_TESTS = [{
+ # 2 YouTube embeds in data-html
+ 'url': 'https://24tv.ua/bronetransporteri-ozbroyenni-zsu-shho-vidomo-pro-bronovik-wolfhound_n2167966',
+ 'info_dict': {
+ 'id': 'bronetransporteri-ozbroyenni-zsu-shho-vidomo-pro-bronovik-wolfhound_n2167966',
+ 'title': 'Броньовик Wolfhound: гігант, який допомагає ЗСУ знищувати окупантів на фронті',
+ 'thumbnail': r're:^https?://.*\.jpe?g',
+ 'timestamp': float,
+ 'upload_date': str,
+ 'description': 'md5:6816e1e5a65304bd7898e4c7eb1b26f7',
+ 'age_limit': 0,
+ },
+ 'playlist_count': 2
+ }, {
+ # Generic iframe embed of TV24UAPlayerIE within data-html
+ 'url': 'https://24tv.ua/harkivyani-zgaduyut-misto-do-viyni-shhemlive-video_n1887584',
+ 'info_dict': {
+ 'id': '1887584',
+ 'ext': 'mp4',
+ 'title': 'Харків\'яни згадують місто до війни: щемливе відео',
+ 'thumbnail': r're:^https?://.*\.jpe?g',
+ },
+ 'params': {'skip_download': True}
+ }, {
+ # YouTube embeds on Squarespace (data-html): https://github.com/ytdl-org/youtube-dl/issues/21294
+ 'url': 'https://www.harvardballetcompany.org/past-productions',
+ 'info_dict': {
+ 'id': 'past-productions',
+ 'title': 'Productions — Harvard Ballet Company',
+ 'age_limit': 0,
+ 'description': 'Past Productions',
+ },
+ 'playlist_mincount': 26
+ }, {
+ # Squarespace video embed, 2019-08-28, data-html
+ 'url': 'http://ootboxford.com',
+ 'info_dict': {
+ 'id': 'Tc7b_JGdZfw',
+ 'title': 'Out of the Blue, at Childish Things 10',
+ 'ext': 'mp4',
+ 'description': 'md5:a83d0026666cf5ee970f8bd1cfd69c7f',
+ 'uploader_id': 'helendouglashouse',
+ 'uploader': 'Helen & Douglas House',
+ 'upload_date': '20140328',
+ 'availability': 'public',
+ 'view_count': int,
+ 'channel': 'Helen & Douglas House',
+ 'comment_count': int,
+ 'uploader_url': 'http://www.youtube.com/user/helendouglashouse',
+ 'duration': 253,
+ 'channel_url': 'https://www.youtube.com/channel/UCTChGezrZVmlYlpMlkmulPA',
+ 'playable_in_embed': True,
+ 'age_limit': 0,
+ 'channel_follower_count': int,
+ 'channel_id': 'UCTChGezrZVmlYlpMlkmulPA',
+ 'tags': 'count:6',
+ 'categories': ['Nonprofits & Activism'],
+ 'like_count': int,
+ 'thumbnail': 'https://i.ytimg.com/vi/Tc7b_JGdZfw/hqdefault.jpg',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ def _extract_from_webpage(self, url, webpage):
+ combined = ''
+ for _, html in re.findall(r'(?s)\bdata-html=(["\'])((?:(?!\1).)+)\1', webpage):
+ # unescapeHTML can handle &quot; etc., unquote can handle percent encoding
+ unquoted_html = unescapeHTML(urllib.parse.unquote(html))
+ if unquoted_html != html:
+ combined += unquoted_html
+ if combined:
+ yield from self._extract_generic_embeds(url, combined)
diff --git a/yt_dlp/extractor/genius.py b/yt_dlp/extractor/genius.py
new file mode 100644
index 0000000..57c25e7
--- /dev/null
+++ b/yt_dlp/extractor/genius.py
@@ -0,0 +1,145 @@
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ js_to_json,
+ smuggle_url,
+ str_or_none,
+ traverse_obj,
+ unescapeHTML,
+)
+
+
+class GeniusIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?genius\.com/(?:videos|(?P<article>a))/(?P<id>[^?/#]+)'
+ _TESTS = [{
+ 'url': 'https://genius.com/videos/Vince-staples-breaks-down-the-meaning-of-when-sparks-fly',
+ 'md5': '64c2ad98cfafcfda23bfa0ad0c512f4c',
+ 'info_dict': {
+ 'id': '6313303597112',
+ 'ext': 'mp4',
+ 'title': 'Vince Staples Breaks Down The Meaning Of “When Sparks Fly”',
+ 'description': 'md5:bc15e00342c537c0039d414423ae5752',
+ 'tags': 'count:1',
+ 'uploader_id': '4863540648001',
+ 'duration': 388.416,
+ 'upload_date': '20221005',
+ 'timestamp': 1664982341,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ }, {
+ 'url': 'https://genius.com/videos/Breaking-down-drakes-certified-lover-boy-kanye-beef-way-2-sexy-cudi',
+ 'md5': 'b8ed87a5efd1473bd027c20a969d4060',
+ 'info_dict': {
+ 'id': '6271792014001',
+ 'ext': 'mp4',
+ 'title': 'md5:c6355f7fa8a70bc86492a3963919fc15',
+ 'description': 'md5:1774638c31548b31b037c09e9b821393',
+ 'tags': 'count:3',
+ 'uploader_id': '4863540648001',
+ 'duration': 2685.099,
+ 'upload_date': '20210909',
+ 'timestamp': 1631209167,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ }, {
+ 'url': 'https://genius.com/a/cordae-anderson-paak-break-down-the-meaning-of-two-tens',
+ 'md5': 'f98a4e03b16b0a2821bd6e52fb3cc9d7',
+ 'info_dict': {
+ 'id': '6321509903112',
+ 'ext': 'mp4',
+ 'title': 'Cordae & Anderson .Paak Breaks Down The Meaning Of “Two Tens”',
+ 'description': 'md5:1255f0e1161d07342ce56a8464ac339d',
+ 'tags': ['song id: 5457554'],
+ 'uploader_id': '4863540648001',
+ 'duration': 361.813,
+ 'upload_date': '20230301',
+ 'timestamp': 1677703908,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ }]
+
+ def _real_extract(self, url):
+ display_id, is_article = self._match_valid_url(url).group('id', 'article')
+ webpage = self._download_webpage(url, display_id)
+
+ metadata = self._search_json(
+ r'<meta content="', webpage, 'metadata', display_id,
+ end_pattern=r'"\s+itemprop="page_data"', transform_source=unescapeHTML)
+ video_id = traverse_obj(metadata, (
+ (('article', 'media', ...), ('video', None)),
+ ('provider_id', ('dfp_kv', lambda _, v: v['name'] == 'brightcove_video_id', 'values', ...))),
+ get_all=False)
+ if not video_id:
+ # Not all article pages have videos, expect the error
+ raise ExtractorError('Brightcove video ID not found in webpage', expected=bool(is_article))
+
+ config = self._search_json(r'var\s*APP_CONFIG\s*=', webpage, 'config', video_id, default={})
+ account_id = config.get('brightcove_account_id', '4863540648001')
+ player_id = traverse_obj(
+ config, 'brightcove_standard_web_player_id', 'brightcove_standard_no_autoplay_web_player_id',
+ 'brightcove_modal_web_player_id', 'brightcove_song_story_web_player_id', default='S1ZcmcOC1x')
+
+ return self.url_result(
+ smuggle_url(
+ f'https://players.brightcove.net/{account_id}/{player_id}_default/index.html?videoId={video_id}',
+ {'referrer': url}), 'BrightcoveNew', video_id)
+
+
+class GeniusLyricsIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?genius\.com/(?P<id>[^?/#]+)-lyrics(?:[?/#]|$)'
+ _TESTS = [{
+ 'url': 'https://genius.com/Lil-baby-heyy-lyrics',
+ 'playlist_mincount': 2,
+ 'info_dict': {
+ 'id': '8454545',
+ 'title': 'Heyy',
+ 'description': 'Heyy by Lil Baby',
+ },
+ }, {
+ 'url': 'https://genius.com/Outkast-two-dope-boyz-in-a-cadillac-lyrics',
+ 'playlist_mincount': 1,
+ 'info_dict': {
+ 'id': '36239',
+ 'title': 'Two Dope Boyz (In a Cadillac)',
+ 'description': 'Two Dope Boyz (In a Cadillac) by OutKast',
+ },
+ }, {
+ 'url': 'https://genius.com/Playboi-carti-rip-lyrics',
+ 'playlist_mincount': 1,
+ 'info_dict': {
+ 'id': '3710582',
+ 'title': 'R.I.P.',
+ 'description': 'R.I.P. by Playboi Carti',
+ },
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ json_string = self._search_json(
+ r'window\.__PRELOADED_STATE__\s*=\s*JSON\.parse\(', webpage, 'json string',
+ display_id, transform_source=js_to_json, contains_pattern=r'\'{(?s:.+)}\'')
+ song_info = self._parse_json(json_string, display_id)
+ song_id = str_or_none(traverse_obj(song_info, ('songPage', 'song')))
+ if not song_id:
+ raise ExtractorError('Song id not found in webpage')
+
+ title = traverse_obj(
+ song_info, ('songPage', 'trackingData', lambda _, x: x['key'] == 'Title', 'value'),
+ get_all=False, default='untitled')
+ artist = traverse_obj(
+ song_info, ('songPage', 'trackingData', lambda _, x: x['key'] == 'Primary Artist', 'value'),
+ get_all=False, default='unknown artist')
+ media = traverse_obj(
+ song_info, ('entities', 'songs', song_id, 'media'), expected_type=list, default=[])
+
+ entries = []
+ for m in media:
+ if m.get('type') in ('video', 'audio') and m.get('url'):
+ if m.get('provider') == 'spotify':
+ self.to_screen(f'{song_id}: Skipping Spotify audio embed')
+ else:
+ entries.append(self.url_result(m['url']))
+
+ return self.playlist_result(entries, song_id, title, f'{title} by {artist}')
diff --git a/yt_dlp/extractor/getcourseru.py b/yt_dlp/extractor/getcourseru.py
new file mode 100644
index 0000000..144321a
--- /dev/null
+++ b/yt_dlp/extractor/getcourseru.py
@@ -0,0 +1,178 @@
+import re
+import time
+import urllib.parse
+
+from .common import InfoExtractor
+from ..utils import ExtractorError, int_or_none, url_or_none, urlencode_postdata
+from ..utils.traversal import traverse_obj
+
+
+class GetCourseRuPlayerIE(InfoExtractor):
+ _VALID_URL = r'https?://player02\.getcourse\.ru/sign-player/?\?(?:[^#]+&)?json=[^#&]+'
+ _EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL}[^\'"]*)']
+ _TESTS = [{
+ 'url': 'http://player02.getcourse.ru/sign-player/?json=eyJ2aWRlb19oYXNoIjoiMTkwYmRmOTNmMWIyOTczNTMwOTg1M2E3YTE5ZTI0YjMiLCJ1c2VyX2lkIjozNTk1MjUxODMsInN1Yl9sb2dpbl91c2VyX2lkIjpudWxsLCJsZXNzb25faWQiOm51bGwsImlwIjoiNDYuMTQyLjE4Mi4yNDciLCJnY19ob3N0IjoiYWNhZGVteW1lbC5vbmxpbmUiLCJ0aW1lIjoxNzA1NDQ5NjQyLCJwYXlsb2FkIjoidV8zNTk1MjUxODMiLCJ1aV9sYW5ndWFnZSI6InJ1IiwiaXNfaGF2ZV9jdXN0b21fc3R5bGUiOnRydWV9&s=354ad2c993d95d5ac629e3133d6cefea&vh-static-feature=zigzag',
+ 'info_dict': {
+ 'id': '513573381',
+ 'title': '190bdf93f1b29735309853a7a19e24b3',
+ 'ext': 'mp4',
+ 'thumbnail': 'https://preview-htz.kinescopecdn.net/preview/190bdf93f1b29735309853a7a19e24b3/preview.jpg?version=1702370546&host=vh-80',
+ 'duration': 1693
+ },
+ 'skip': 'JWT expired',
+ }]
+
+ def _real_extract(self, url):
+ webpage = self._download_webpage(url, None, 'Downloading player page')
+ window_configs = self._search_json(
+ r'window\.configs\s*=', webpage, 'config', None)
+ video_id = str(window_configs['gcFileId'])
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ window_configs['masterPlaylistUrl'], video_id)
+
+ return {
+ **traverse_obj(window_configs, {
+ 'title': ('videoHash', {str}),
+ 'thumbnail': ('previewUrl', {url_or_none}),
+ 'duration': ('videoDuration', {int_or_none}),
+ }),
+ 'id': video_id,
+ 'formats': formats,
+ 'subtitles': subtitles
+ }
+
+
+class GetCourseRuIE(InfoExtractor):
+ _NETRC_MACHINE = 'getcourseru'
+ _DOMAINS = [
+ 'academymel.online',
+ 'marafon.mani-beauty.com',
+ 'on.psbook.ru'
+ ]
+ _BASE_URL_RE = rf'https?://(?:(?!player02\.)[^.]+\.getcourse\.(?:ru|io)|{"|".join(map(re.escape, _DOMAINS))})'
+ _VALID_URL = [
+ rf'{_BASE_URL_RE}/(?!pl/|teach/)(?P<id>[^?#]+)',
+ rf'{_BASE_URL_RE}/(:?pl/)?teach/control/lesson/view\?(?:[^#]+&)?id=(?P<id>\d+)',
+ ]
+ _TESTS = [{
+ 'url': 'http://academymel.online/3video_1',
+ 'info_dict': {
+ 'id': '3059742',
+ 'display_id': '3video_1',
+ 'title': 'Промоуроки Академии МЕЛ',
+ },
+ 'playlist_count': 1,
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '513573381',
+ 'ext': 'mp4',
+ 'title': 'Промоуроки Академии МЕЛ',
+ 'thumbnail': 'https://preview-htz.kinescopecdn.net/preview/190bdf93f1b29735309853a7a19e24b3/preview.jpg?version=1702370546&host=vh-80',
+ 'duration': 1693
+ },
+ }]
+ }, {
+ 'url': 'https://academymel.getcourse.ru/3video_1',
+ 'info_dict': {
+ 'id': '3059742',
+ 'display_id': '3video_1',
+ 'title': 'Промоуроки Академии МЕЛ',
+ },
+ 'playlist_count': 1,
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '513573381',
+ 'ext': 'mp4',
+ 'title': 'Промоуроки Академии МЕЛ',
+ 'thumbnail': 'https://preview-htz.kinescopecdn.net/preview/190bdf93f1b29735309853a7a19e24b3/preview.jpg?version=1702370546&host=vh-80',
+ 'duration': 1693
+ },
+ }]
+ }, {
+ 'url': 'https://academymel.getcourse.ru/pl/teach/control/lesson/view?id=319141781&editMode=0',
+ 'info_dict': {
+ 'id': '319141781',
+ 'title': '1. Разминка у стены',
+ },
+ 'playlist_count': 1,
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '4919601',
+ 'ext': 'mp4',
+ 'title': '1. Разминка у стены',
+ 'thumbnail': 'https://preview-htz.vhcdn.com/preview/5a521788e7dc25b4f70c3dff6512d90e/preview.jpg?version=1703223532&host=vh-81',
+ 'duration': 704
+ },
+ }],
+ 'skip': 'paid lesson'
+ }, {
+ 'url': 'https://manibeauty.getcourse.ru/pl/teach/control/lesson/view?id=272499894',
+ 'info_dict': {
+ 'id': '272499894',
+ 'title': 'Мотивация к тренировкам',
+ },
+ 'playlist_count': 1,
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '447479687',
+ 'ext': 'mp4',
+ 'title': 'Мотивация к тренировкам',
+ 'thumbnail': 'https://preview-htz.vhcdn.com/preview/70ed5b9f489dd03b4aff55bfdff71a26/preview.jpg?version=1685115787&host=vh-71',
+ 'duration': 30
+ },
+ }],
+ 'skip': 'paid lesson'
+ }, {
+ 'url': 'https://gaismasmandalas.getcourse.io/ATLAUTSEVBUT',
+ 'only_matching': True,
+ }]
+
+ _LOGIN_URL_PATH = '/cms/system/login'
+
+ def _login(self, hostname, username, password):
+ if self._get_cookies(f'https://{hostname}').get('PHPSESSID5'):
+ return
+ login_url = f'https://{hostname}{self._LOGIN_URL_PATH}'
+ webpage = self._download_webpage(login_url, None)
+
+ self._request_webpage(
+ login_url, None, 'Logging in', 'Failed to log in',
+ data=urlencode_postdata({
+ 'action': 'processXdget',
+ 'xdgetId': self._html_search_regex(
+ r'<form[^>]+\bclass="[^"]*\bstate-login[^"]*"[^>]+\bdata-xdget-id="([^"]+)"',
+ webpage, 'xdgetId'),
+ 'params[action]': 'login',
+ 'params[url]': login_url,
+ 'params[object_type]': 'cms_page',
+ 'params[object_id]': -1,
+ 'params[email]': username,
+ 'params[password]': password,
+ 'requestTime': int(time.time()),
+ 'requestSimpleSign': self._html_search_regex(
+ r'window.requestSimpleSign\s*=\s*"([\da-f]+)"', webpage, 'simple sign'),
+ }))
+
+ def _real_extract(self, url):
+ hostname = urllib.parse.urlparse(url).hostname
+ username, password = self._get_login_info(netrc_machine=hostname)
+ if username:
+ self._login(hostname, username, password)
+
+ display_id = self._match_id(url)
+ webpage, urlh = self._download_webpage_handle(url, display_id)
+ if self._LOGIN_URL_PATH in urlh.url:
+ raise ExtractorError(
+ f'This video is only available for registered users. {self._login_hint("any", netrc=hostname)}',
+ expected=True)
+
+ playlist_id = self._search_regex(
+ r'window\.(?:lessonId|gcsObjectId)\s*=\s*(\d+)', webpage, 'playlist id', default=display_id)
+ title = self._og_search_title(webpage) or self._html_extract_title(webpage)
+
+ return self.playlist_from_matches(
+ re.findall(GetCourseRuPlayerIE._EMBED_REGEX[0], webpage),
+ playlist_id, title, display_id=display_id, ie=GetCourseRuPlayerIE, video_kwargs={
+ 'url_transparent': True,
+ 'title': title,
+ })
diff --git a/yt_dlp/extractor/gettr.py b/yt_dlp/extractor/gettr.py
new file mode 100644
index 0000000..7795dc5
--- /dev/null
+++ b/yt_dlp/extractor/gettr.py
@@ -0,0 +1,206 @@
+from .common import InfoExtractor
+from ..utils import (
+ bool_or_none,
+ ExtractorError,
+ dict_get,
+ float_or_none,
+ int_or_none,
+ str_or_none,
+ traverse_obj,
+ try_get,
+ url_or_none,
+ urljoin,
+)
+
+
+class GettrBaseIE(InfoExtractor):
+ _BASE_REGEX = r'https?://(www\.)?gettr\.com/'
+ _MEDIA_BASE_URL = 'https://media.gettr.com/'
+
+ def _call_api(self, path, video_id, *args, **kwargs):
+ return self._download_json(urljoin('https://api.gettr.com/u/', path), video_id, *args, **kwargs)['result']
+
+
+class GettrIE(GettrBaseIE):
+ _VALID_URL = GettrBaseIE._BASE_REGEX + r'post/(?P<id>[a-z0-9]+)'
+
+ _TESTS = [{
+ 'url': 'https://www.gettr.com/post/pcf6uv838f',
+ 'info_dict': {
+ 'id': 'pcf6uv838f',
+ 'title': 'md5:9086a646bbd06c41c4fe8e52b3c93454',
+ 'description': 'md5:be0577f1e4caadc06de4a002da2bf287',
+ 'ext': 'mp4',
+ 'uploader': 'EpochTV',
+ 'uploader_id': 'epochtv',
+ 'upload_date': '20210927',
+ 'thumbnail': r're:^https?://.+/out\.jpg',
+ 'timestamp': 1632782451.058,
+ 'duration': 58.5585,
+ 'tags': ['hornofafrica', 'explorations'],
+ }
+ }, {
+ 'url': 'https://gettr.com/post/p4iahp',
+ 'info_dict': {
+ 'id': 'p4iahp',
+ 'title': 'md5:b03c07883db6fbc1aab88877a6c3b149',
+ 'description': 'md5:741b7419d991c403196ed2ea7749a39d',
+ 'ext': 'mp4',
+ 'uploader': 'Neues Forum Freiheit',
+ 'uploader_id': 'nf_freiheit',
+ 'upload_date': '20210718',
+ 'thumbnail': r're:^https?://.+/out\.jpg',
+ 'timestamp': 1626594455.017,
+ 'duration': 23,
+ 'tags': 'count:12',
+ }
+ }, {
+ # quote post
+ 'url': 'https://gettr.com/post/pxn5b743a9',
+ 'only_matching': True,
+ }, {
+ # quote with video
+ 'url': 'https://gettr.com/post/pxtiiz5ca2',
+ 'only_matching': True,
+ }, {
+ # streaming embed
+ 'url': 'https://gettr.com/post/pxlu8p3b13',
+ 'only_matching': True,
+ }, {
+ # youtube embed
+ 'url': 'https://gettr.com/post/pv6wp9e24c',
+ 'only_matching': True,
+ 'add_ie': ['Youtube'],
+ }]
+
+ def _real_extract(self, url):
+ post_id = self._match_id(url)
+ webpage = self._download_webpage(url, post_id)
+ api_data = self._call_api('post/%s?incl="poststats|userinfo"' % post_id, post_id)
+
+ post_data = api_data.get('data')
+ user_data = try_get(api_data, lambda x: x['aux']['uinf'][post_data['uid']], dict) or {}
+
+ vid = post_data.get('vid')
+ ovid = post_data.get('ovid')
+
+ if post_data.get('p_type') == 'stream':
+ return self.url_result(f'https://gettr.com/streaming/{post_id}', ie='GettrStreaming', video_id=post_id)
+
+ if not (ovid or vid):
+ embed_url = url_or_none(post_data.get('prevsrc'))
+ shared_post_id = traverse_obj(api_data, ('aux', 'shrdpst', '_id'), ('data', 'rpstIds', 0), expected_type=str)
+
+ if embed_url:
+ return self.url_result(embed_url)
+ elif shared_post_id:
+ return self.url_result(f'https://gettr.com/post/{shared_post_id}', ie='Gettr', video_id=shared_post_id)
+ else:
+ raise ExtractorError('There\'s no video in this post.')
+
+ title = description = str_or_none(
+ post_data.get('txt') or self._og_search_description(webpage))
+
+ uploader = str_or_none(
+ user_data.get('nickname')
+ or self._search_regex(r'^(.+?) on GETTR', self._og_search_title(webpage, default=''), 'uploader', fatal=False))
+
+ if uploader:
+ title = '%s - %s' % (uploader, title)
+
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ urljoin(self._MEDIA_BASE_URL, vid), post_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) if vid else ([], {})
+
+ if ovid:
+ formats.append({
+ 'url': urljoin(self._MEDIA_BASE_URL, ovid),
+ 'format_id': 'ovid',
+ 'ext': 'mp4',
+ 'width': int_or_none(post_data.get('vid_wid')),
+ 'height': int_or_none(post_data.get('vid_hgt')),
+ })
+
+ return {
+ 'id': post_id,
+ 'title': title,
+ 'description': description,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'uploader': uploader,
+ 'uploader_id': str_or_none(
+ dict_get(user_data, ['_id', 'username'])
+ or post_data.get('uid')),
+ 'thumbnail': url_or_none(
+ urljoin(self._MEDIA_BASE_URL, post_data.get('main'))
+ or self._html_search_meta(['og:image', 'image'], webpage, 'thumbnail', fatal=False)),
+ 'timestamp': float_or_none(dict_get(post_data, ['cdate', 'udate']), scale=1000),
+ 'duration': float_or_none(post_data.get('vid_dur')),
+ 'tags': post_data.get('htgs'),
+ }
+
+
+class GettrStreamingIE(GettrBaseIE):
+ _VALID_URL = GettrBaseIE._BASE_REGEX + r'streaming/(?P<id>[a-z0-9]+)'
+
+ _TESTS = [{
+ 'url': 'https://gettr.com/streaming/psoiulc122',
+ 'info_dict': {
+ 'id': 'psoiulc122',
+ 'ext': 'mp4',
+ 'description': 'md5:56bca4b8f48f1743d9fd03d49c723017',
+ 'view_count': int,
+ 'uploader': 'Corona Investigative Committee',
+ 'uploader_id': 'coronacommittee',
+ 'duration': 5180.184,
+ 'thumbnail': r're:^https?://.+',
+ 'title': 'Day 1: Opening Session of the Grand Jury Proceeding',
+ 'timestamp': 1644080997.164,
+ 'upload_date': '20220205',
+ }
+ }, {
+ 'url': 'https://gettr.com/streaming/psfmeefcc1',
+ 'info_dict': {
+ 'id': 'psfmeefcc1',
+ 'ext': 'mp4',
+ 'title': 'Session 90: "The Virus Of Power"',
+ 'view_count': int,
+ 'uploader_id': 'coronacommittee',
+ 'description': 'md5:98986acdf656aa836bf36f9c9704c65b',
+ 'uploader': 'Corona Investigative Committee',
+ 'thumbnail': r're:^https?://.+',
+ 'duration': 21872.507,
+ 'timestamp': 1643976662.858,
+ 'upload_date': '20220204',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video_info = self._call_api('live/join/%s' % video_id, video_id, data={})
+
+ live_info = video_info['broadcast']
+ live_url = url_or_none(live_info.get('url'))
+
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ live_url, video_id, ext='mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) if live_url else ([], {})
+
+ thumbnails = [{
+ 'url': urljoin(self._MEDIA_BASE_URL, thumbnail),
+ } for thumbnail in try_get(video_info, lambda x: x['postData']['imgs'], list) or []]
+
+ return {
+ 'id': video_id,
+ 'title': try_get(video_info, lambda x: x['postData']['ttl'], str),
+ 'description': try_get(video_info, lambda x: x['postData']['dsc'], str),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnails': thumbnails,
+ 'uploader': try_get(video_info, lambda x: x['liveHostInfo']['nickname'], str),
+ 'uploader_id': try_get(video_info, lambda x: x['liveHostInfo']['_id'], str),
+ 'view_count': int_or_none(live_info.get('viewsCount')),
+ 'timestamp': float_or_none(live_info.get('startAt'), scale=1000),
+ 'duration': float_or_none(live_info.get('duration'), scale=1000),
+ 'is_live': bool_or_none(live_info.get('isLive')),
+ }
diff --git a/yt_dlp/extractor/giantbomb.py b/yt_dlp/extractor/giantbomb.py
new file mode 100644
index 0000000..1125723
--- /dev/null
+++ b/yt_dlp/extractor/giantbomb.py
@@ -0,0 +1,85 @@
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ qualities,
+ unescapeHTML,
+)
+
+
+class GiantBombIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?giantbomb\.com/(?:videos|shows)/(?P<display_id>[^/]+)/(?P<id>\d+-\d+)'
+ _TESTS = [{
+ 'url': 'http://www.giantbomb.com/videos/quick-look-destiny-the-dark-below/2300-9782/',
+ 'md5': '132f5a803e7e0ab0e274d84bda1e77ae',
+ 'info_dict': {
+ 'id': '2300-9782',
+ 'display_id': 'quick-look-destiny-the-dark-below',
+ 'ext': 'mp4',
+ 'title': 'Quick Look: Destiny: The Dark Below',
+ 'description': 'md5:0aa3aaf2772a41b91d44c63f30dfad24',
+ 'duration': 2399,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ }
+ }, {
+ 'url': 'https://www.giantbomb.com/shows/ben-stranding/2970-20212',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id')
+
+ webpage = self._download_webpage(url, display_id)
+
+ title = self._og_search_title(webpage)
+ description = self._og_search_description(webpage)
+ thumbnail = self._og_search_thumbnail(webpage)
+
+ video = json.loads(unescapeHTML(self._search_regex(
+ r'data-video="([^"]+)"', webpage, 'data-video')))
+
+ duration = int_or_none(video.get('lengthSeconds'))
+
+ quality = qualities([
+ 'f4m_low', 'progressive_low', 'f4m_high',
+ 'progressive_high', 'f4m_hd', 'progressive_hd'])
+
+ formats = []
+ for format_id, video_url in video['videoStreams'].items():
+ if format_id == 'f4m_stream':
+ continue
+ ext = determine_ext(video_url)
+ if ext == 'f4m':
+ f4m_formats = self._extract_f4m_formats(video_url + '?hdcore=3.3.1', display_id)
+ if f4m_formats:
+ f4m_formats[0]['quality'] = quality(format_id)
+ formats.extend(f4m_formats)
+ elif ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, display_id, ext='mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ formats.append({
+ 'url': video_url,
+ 'format_id': format_id,
+ 'quality': quality(format_id),
+ })
+
+ if not formats:
+ youtube_id = video.get('youtubeID')
+ if youtube_id:
+ return self.url_result(youtube_id, 'Youtube')
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/gigya.py b/yt_dlp/extractor/gigya.py
new file mode 100644
index 0000000..c5bc86b
--- /dev/null
+++ b/yt_dlp/extractor/gigya.py
@@ -0,0 +1,20 @@
+from .common import InfoExtractor
+
+from ..utils import (
+ ExtractorError,
+ urlencode_postdata,
+)
+
+
+class GigyaBaseIE(InfoExtractor):
+ def _gigya_login(self, auth_data):
+ auth_info = self._download_json(
+ 'https://accounts.eu1.gigya.com/accounts.login', None,
+ note='Logging in', errnote='Unable to log in',
+ data=urlencode_postdata(auth_data))
+
+ error_message = auth_info.get('errorDetails') or auth_info.get('errorMessage')
+ if error_message:
+ raise ExtractorError(
+ 'Unable to login: %s' % error_message, expected=True)
+ return auth_info
diff --git a/yt_dlp/extractor/glide.py b/yt_dlp/extractor/glide.py
new file mode 100644
index 0000000..d114f34
--- /dev/null
+++ b/yt_dlp/extractor/glide.py
@@ -0,0 +1,38 @@
+from .common import InfoExtractor
+
+
+class GlideIE(InfoExtractor):
+ IE_DESC = 'Glide mobile video messages (glide.me)'
+ _VALID_URL = r'https?://share\.glide\.me/(?P<id>[A-Za-z0-9\-=_+]+)'
+ _TEST = {
+ 'url': 'http://share.glide.me/UZF8zlmuQbe4mr+7dCiQ0w==',
+ 'md5': '4466372687352851af2d131cfaa8a4c7',
+ 'info_dict': {
+ 'id': 'UZF8zlmuQbe4mr+7dCiQ0w==',
+ 'ext': 'mp4',
+ 'title': "Damon's Glide message",
+ 'thumbnail': r're:^https?://.*?\.cloudfront\.net/.*\.jpg$',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._generic_title('', webpage)
+ video_url = self._proto_relative_url(self._search_regex(
+ r'<source[^>]+src=(["\'])(?P<url>.+?)\1',
+ webpage, 'video URL', default=None,
+ group='url')) or self._og_search_video_url(webpage)
+ thumbnail = self._proto_relative_url(self._search_regex(
+ r'<img[^>]+id=["\']video-thumbnail["\'][^>]+src=(["\'])(?P<url>.+?)\1',
+ webpage, 'thumbnail url', default=None,
+ group='url')) or self._og_search_thumbnail(webpage)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'url': video_url,
+ 'thumbnail': thumbnail,
+ }
diff --git a/yt_dlp/extractor/globalplayer.py b/yt_dlp/extractor/globalplayer.py
new file mode 100644
index 0000000..e0c0d58
--- /dev/null
+++ b/yt_dlp/extractor/globalplayer.py
@@ -0,0 +1,254 @@
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ join_nonempty,
+ parse_duration,
+ str_or_none,
+ traverse_obj,
+ unified_strdate,
+ unified_timestamp,
+ urlhandle_detect_ext,
+)
+
+
+class GlobalPlayerBaseIE(InfoExtractor):
+ def _get_page_props(self, url, video_id):
+ webpage = self._download_webpage(url, video_id)
+ return self._search_nextjs_data(webpage, video_id)['props']['pageProps']
+
+ def _request_ext(self, url, video_id):
+ return urlhandle_detect_ext(self._request_webpage( # Server rejects HEAD requests
+ url, video_id, note='Determining source extension'))
+
+ def _extract_audio(self, episode, series):
+ return {
+ 'vcodec': 'none',
+ **traverse_obj(series, {
+ 'series': 'title',
+ 'series_id': 'id',
+ 'thumbnail': 'imageUrl',
+ 'uploader': 'itunesAuthor', # podcasts only
+ }),
+ **traverse_obj(episode, {
+ 'id': 'id',
+ 'description': ('description', {clean_html}),
+ 'duration': ('duration', {parse_duration}),
+ 'thumbnail': 'imageUrl',
+ 'url': 'streamUrl',
+ 'timestamp': (('pubDate', 'startDate'), {unified_timestamp}),
+ 'title': 'title',
+ }, get_all=False)
+ }
+
+
+class GlobalPlayerLiveIE(GlobalPlayerBaseIE):
+ _VALID_URL = r'https?://www\.globalplayer\.com/live/(?P<id>\w+)/\w+'
+ _TESTS = [{
+ 'url': 'https://www.globalplayer.com/live/smoothchill/uk/',
+ 'info_dict': {
+ 'id': '2mx1E',
+ 'ext': 'aac',
+ 'display_id': 'smoothchill-uk',
+ 'title': 're:^Smooth Chill.+$',
+ 'thumbnail': 'https://herald.musicradio.com/media/f296ade8-50c9-4f60-911f-924e96873620.png',
+ 'description': 'Music To Chill To',
+ 'live_status': 'is_live',
+ },
+ }, {
+ # national station
+ 'url': 'https://www.globalplayer.com/live/heart/uk/',
+ 'info_dict': {
+ 'id': '2mwx4',
+ 'ext': 'aac',
+ 'description': 'turn up the feel good!',
+ 'thumbnail': 'https://herald.musicradio.com/media/49b9e8cb-15bf-4bf2-8c28-a4850cc6b0f3.png',
+ 'live_status': 'is_live',
+ 'title': 're:^Heart UK.+$',
+ 'display_id': 'heart-uk',
+ },
+ }, {
+ # regional variation
+ 'url': 'https://www.globalplayer.com/live/heart/london/',
+ 'info_dict': {
+ 'id': 'AMqg',
+ 'ext': 'aac',
+ 'thumbnail': 'https://herald.musicradio.com/media/49b9e8cb-15bf-4bf2-8c28-a4850cc6b0f3.png',
+ 'title': 're:^Heart London.+$',
+ 'live_status': 'is_live',
+ 'display_id': 'heart-london',
+ 'description': 'turn up the feel good!',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ station = self._get_page_props(url, video_id)['station']
+ stream_url = station['streamUrl']
+
+ return {
+ 'id': station['id'],
+ 'display_id': join_nonempty('brandSlug', 'slug', from_dict=station) or station.get('legacyStationPrefix'),
+ 'url': stream_url,
+ 'ext': self._request_ext(stream_url, video_id),
+ 'vcodec': 'none',
+ 'is_live': True,
+ **traverse_obj(station, {
+ 'title': (('name', 'brandName'), {str_or_none}),
+ 'description': 'tagline',
+ 'thumbnail': 'brandLogo',
+ }, get_all=False),
+ }
+
+
+class GlobalPlayerLivePlaylistIE(GlobalPlayerBaseIE):
+ _VALID_URL = r'https?://www\.globalplayer\.com/playlists/(?P<id>\w+)'
+ _TESTS = [{
+ # "live playlist"
+ 'url': 'https://www.globalplayer.com/playlists/8bLk/',
+ 'info_dict': {
+ 'id': '8bLk',
+ 'ext': 'aac',
+ 'live_status': 'is_live',
+ 'description': 'md5:e10f5e10b01a7f2c14ba815509fbb38d',
+ 'thumbnail': 'https://images.globalplayer.com/images/551379?width=450&signature=oMLPZIoi5_dBSHnTMREW0Xg76mA=',
+ 'title': 're:^Classic FM Hall of Fame.+$'
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ station = self._get_page_props(url, video_id)['playlistData']
+ stream_url = station['streamUrl']
+
+ return {
+ 'id': video_id,
+ 'url': stream_url,
+ 'ext': self._request_ext(stream_url, video_id),
+ 'vcodec': 'none',
+ 'is_live': True,
+ **traverse_obj(station, {
+ 'title': 'title',
+ 'description': 'description',
+ 'thumbnail': 'image',
+ }),
+ }
+
+
+class GlobalPlayerAudioIE(GlobalPlayerBaseIE):
+ _VALID_URL = r'https?://www\.globalplayer\.com/(?:(?P<podcast>podcasts)/|catchup/\w+/\w+/)(?P<id>\w+)/?(?:$|[?#])'
+ _TESTS = [{
+ # podcast
+ 'url': 'https://www.globalplayer.com/podcasts/42KuaM/',
+ 'playlist_mincount': 5,
+ 'info_dict': {
+ 'id': '42KuaM',
+ 'title': 'Filthy Ritual',
+ 'thumbnail': 'md5:60286e7d12d795bd1bbc9efc6cee643e',
+ 'categories': ['Society & Culture', 'True Crime'],
+ 'uploader': 'Global',
+ 'description': 'md5:da5b918eac9ae319454a10a563afacf9',
+ },
+ }, {
+ # radio catchup
+ 'url': 'https://www.globalplayer.com/catchup/lbc/uk/46vyD7z/',
+ 'playlist_mincount': 3,
+ 'info_dict': {
+ 'id': '46vyD7z',
+ 'description': 'Nick Ferrari At Breakfast is Leading Britain\'s Conversation.',
+ 'title': 'Nick Ferrari',
+ 'thumbnail': 'md5:4df24d8a226f5b2508efbcc6ae874ebf',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id, podcast = self._match_valid_url(url).group('id', 'podcast')
+ props = self._get_page_props(url, video_id)
+ series = props['podcastInfo'] if podcast else props['catchupInfo']
+
+ return {
+ '_type': 'playlist',
+ 'id': video_id,
+ 'entries': [self._extract_audio(ep, series) for ep in traverse_obj(
+ series, ('episodes', lambda _, v: v['id'] and v['streamUrl']))],
+ 'categories': traverse_obj(series, ('categories', ..., 'name')) or None,
+ **traverse_obj(series, {
+ 'description': 'description',
+ 'thumbnail': 'imageUrl',
+ 'title': 'title',
+ 'uploader': 'itunesAuthor', # podcasts only
+ }),
+ }
+
+
+class GlobalPlayerAudioEpisodeIE(GlobalPlayerBaseIE):
+ _VALID_URL = r'https?://www\.globalplayer\.com/(?:(?P<podcast>podcasts)|catchup/\w+/\w+)/episodes/(?P<id>\w+)/?(?:$|[?#])'
+ _TESTS = [{
+ # podcast
+ 'url': 'https://www.globalplayer.com/podcasts/episodes/7DrfNnE/',
+ 'info_dict': {
+ 'id': '7DrfNnE',
+ 'ext': 'mp3',
+ 'title': 'Filthy Ritual - Trailer',
+ 'description': 'md5:1f1562fd0f01b4773b590984f94223e0',
+ 'thumbnail': 'md5:60286e7d12d795bd1bbc9efc6cee643e',
+ 'duration': 225.0,
+ 'timestamp': 1681254900,
+ 'series': 'Filthy Ritual',
+ 'series_id': '42KuaM',
+ 'upload_date': '20230411',
+ 'uploader': 'Global',
+ },
+ }, {
+ # radio catchup
+ 'url': 'https://www.globalplayer.com/catchup/lbc/uk/episodes/2zGq26Vcv1fCWhddC4JAwETXWe/',
+ 'info_dict': {
+ 'id': '2zGq26Vcv1fCWhddC4JAwETXWe',
+ 'ext': 'm4a',
+ 'timestamp': 1682056800,
+ 'series': 'Nick Ferrari',
+ 'thumbnail': 'md5:4df24d8a226f5b2508efbcc6ae874ebf',
+ 'upload_date': '20230421',
+ 'series_id': '46vyD7z',
+ 'description': 'Nick Ferrari At Breakfast is Leading Britain\'s Conversation.',
+ 'title': 'Nick Ferrari',
+ 'duration': 10800.0,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id, podcast = self._match_valid_url(url).group('id', 'podcast')
+ props = self._get_page_props(url, video_id)
+ episode = props['podcastEpisode'] if podcast else props['catchupEpisode']
+
+ return self._extract_audio(
+ episode, traverse_obj(episode, 'podcast', 'show', expected_type=dict) or {})
+
+
+class GlobalPlayerVideoIE(GlobalPlayerBaseIE):
+ _VALID_URL = r'https?://www\.globalplayer\.com/videos/(?P<id>\w+)'
+ _TESTS = [{
+ 'url': 'https://www.globalplayer.com/videos/2JsSZ7Gm2uP/',
+ 'info_dict': {
+ 'id': '2JsSZ7Gm2uP',
+ 'ext': 'mp4',
+ 'description': 'md5:6a9f063c67c42f218e42eee7d0298bfd',
+ 'thumbnail': 'md5:d4498af48e15aae4839ce77b97d39550',
+ 'upload_date': '20230420',
+ 'title': 'Treble Malakai Bayoh sings a sublime Handel aria at Classic FM Live',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ meta = self._get_page_props(url, video_id)['videoData']
+
+ return {
+ 'id': video_id,
+ **traverse_obj(meta, {
+ 'url': 'url',
+ 'thumbnail': ('image', 'url'),
+ 'title': 'title',
+ 'upload_date': ('publish_date', {unified_strdate}),
+ 'description': 'description',
+ }),
+ }
diff --git a/yt_dlp/extractor/globo.py b/yt_dlp/extractor/globo.py
new file mode 100644
index 0000000..df98f09
--- /dev/null
+++ b/yt_dlp/extractor/globo.py
@@ -0,0 +1,246 @@
+import base64
+import hashlib
+import json
+import random
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+)
+from ..networking import HEADRequest
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ orderedSet,
+ str_or_none,
+ try_get,
+)
+
+
+class GloboIE(InfoExtractor):
+ _VALID_URL = r'(?:globo:|https?://.+?\.globo\.com/(?:[^/]+/)*(?:v/(?:[^/]+/)?|videos/))(?P<id>\d{7,})'
+ _NETRC_MACHINE = 'globo'
+ _TESTS = [{
+ 'url': 'http://g1.globo.com/carros/autoesporte/videos/t/exclusivos-do-g1/v/mercedes-benz-gla-passa-por-teste-de-colisao-na-europa/3607726/',
+ 'info_dict': {
+ 'id': '3607726',
+ 'ext': 'mp4',
+ 'title': 'Mercedes-Benz GLA passa por teste de colisão na Europa',
+ 'duration': 103.204,
+ 'uploader': 'G1',
+ 'uploader_id': '2015',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://globoplay.globo.com/v/4581987/',
+ 'info_dict': {
+ 'id': '4581987',
+ 'ext': 'mp4',
+ 'title': 'Acidentes de trânsito estão entre as maiores causas de queda de energia em SP',
+ 'duration': 137.973,
+ 'uploader': 'Rede Globo',
+ 'uploader_id': '196',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://canalbrasil.globo.com/programas/sangue-latino/videos/3928201.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://globosatplay.globo.com/globonews/v/4472924/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://globotv.globo.com/t/programa/v/clipe-sexo-e-as-negas-adeus/3836166/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://globotv.globo.com/canal-brasil/sangue-latino/t/todos-os-videos/v/ator-e-diretor-argentino-ricado-darin-fala-sobre-utopias-e-suas-perdas/3928201/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://canaloff.globo.com/programas/desejar-profundo/videos/4518560.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'globo:3607726',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://globoplay.globo.com/v/10248083/',
+ 'info_dict': {
+ 'id': '10248083',
+ 'ext': 'mp4',
+ 'title': 'Melhores momentos: Equador 1 x 1 Brasil pelas Eliminatórias da Copa do Mundo 2022',
+ 'duration': 530.964,
+ 'uploader': 'SporTV',
+ 'uploader_id': '698',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ self._request_webpage(
+ HEADRequest('https://globo-ab.globo.com/v2/selected-alternatives?experiments=player-isolated-experiment-02&skipImpressions=true'),
+ video_id, 'Getting cookies')
+
+ video = self._download_json(
+ 'http://api.globovideos.com/videos/%s/playlist' % video_id,
+ video_id)['videos'][0]
+ if not self.get_param('allow_unplayable_formats') and video.get('encrypted') is True:
+ self.report_drm(video_id)
+
+ title = video['title']
+
+ formats = []
+ security = self._download_json(
+ 'https://playback.video.globo.com/v2/video-session', video_id, 'Downloading security hash for %s' % video_id,
+ headers={'content-type': 'application/json'}, data=json.dumps({
+ "player_type": "desktop",
+ "video_id": video_id,
+ "quality": "max",
+ "content_protection": "widevine",
+ "vsid": "581b986b-4c40-71f0-5a58-803e579d5fa2",
+ "tz": "-3.0:00"
+ }).encode())
+
+ self._request_webpage(HEADRequest(security['sources'][0]['url_template']), video_id, 'Getting locksession cookie')
+
+ security_hash = security['sources'][0]['token']
+ if not security_hash:
+ message = security.get('message')
+ if message:
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, message), expected=True)
+
+ hash_code = security_hash[:2]
+ padding = '%010d' % random.randint(1, 10000000000)
+ if hash_code in ('04', '14'):
+ received_time = security_hash[3:13]
+ received_md5 = security_hash[24:]
+ hash_prefix = security_hash[:23]
+ elif hash_code in ('02', '12', '03', '13'):
+ received_time = security_hash[2:12]
+ received_md5 = security_hash[22:]
+ padding += '1'
+ hash_prefix = '05' + security_hash[:22]
+
+ padded_sign_time = compat_str(int(received_time) + 86400) + padding
+ md5_data = (received_md5 + padded_sign_time + '0xAC10FD').encode()
+ signed_md5 = base64.urlsafe_b64encode(hashlib.md5(md5_data).digest()).decode().strip('=')
+ signed_hash = hash_prefix + padded_sign_time + signed_md5
+ source = security['sources'][0]['url_parts']
+ resource_url = source['scheme'] + '://' + source['domain'] + source['path']
+ signed_url = '%s?h=%s&k=html5&a=%s' % (resource_url, signed_hash, 'F' if video.get('subscriber_only') else 'A')
+
+ fmts, subtitles = self._extract_m3u8_formats_and_subtitles(
+ signed_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
+ formats.extend(fmts)
+
+ for resource in video['resources']:
+ if resource.get('type') == 'subtitle':
+ subtitles.setdefault(resource.get('language') or 'por', []).append({
+ 'url': resource.get('url'),
+ })
+ subs = try_get(security, lambda x: x['source']['subtitles'], expected_type=dict) or {}
+ for sub_lang, sub_url in subs.items():
+ if sub_url:
+ subtitles.setdefault(sub_lang or 'por', []).append({
+ 'url': sub_url,
+ })
+ subs = try_get(security, lambda x: x['source']['subtitles_webvtt'], expected_type=dict) or {}
+ for sub_lang, sub_url in subs.items():
+ if sub_url:
+ subtitles.setdefault(sub_lang or 'por', []).append({
+ 'url': sub_url,
+ })
+
+ duration = float_or_none(video.get('duration'), 1000)
+ uploader = video.get('channel')
+ uploader_id = str_or_none(video.get('channel_id'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'duration': duration,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+
+class GloboArticleIE(InfoExtractor):
+ _VALID_URL = r'https?://.+?\.globo\.com/(?:[^/]+/)*(?P<id>[^/.]+)(?:\.html)?'
+
+ _VIDEOID_REGEXES = [
+ r'\bdata-video-id=["\'](\d{7,})["\']',
+ r'\bdata-player-videosids=["\'](\d{7,})["\']',
+ r'\bvideosIDs\s*:\s*["\']?(\d{7,})',
+ r'\bdata-id=["\'](\d{7,})["\']',
+ r'<div[^>]+\bid=["\'](\d{7,})["\']',
+ r'<bs-player[^>]+\bvideoid=["\'](\d{8,})["\']',
+ ]
+
+ _TESTS = [{
+ 'url': 'http://g1.globo.com/jornal-nacional/noticia/2014/09/novidade-na-fiscalizacao-de-bagagem-pela-receita-provoca-discussoes.html',
+ 'info_dict': {
+ 'id': 'novidade-na-fiscalizacao-de-bagagem-pela-receita-provoca-discussoes',
+ 'title': 'Novidade na fiscalização de bagagem pela Receita provoca discussões',
+ 'description': 'md5:c3c4b4d4c30c32fce460040b1ac46b12',
+ },
+ 'playlist_count': 1,
+ }, {
+ 'url': 'http://g1.globo.com/pr/parana/noticia/2016/09/mpf-denuncia-lula-marisa-e-mais-seis-na-operacao-lava-jato.html',
+ 'info_dict': {
+ 'id': 'mpf-denuncia-lula-marisa-e-mais-seis-na-operacao-lava-jato',
+ 'title': "Lula era o 'comandante máximo' do esquema da Lava Jato, diz MPF",
+ 'description': 'md5:8aa7cc8beda4dc71cc8553e00b77c54c',
+ },
+ 'playlist_count': 6,
+ }, {
+ 'url': 'http://gq.globo.com/Prazeres/Poder/noticia/2015/10/all-o-desafio-assista-ao-segundo-capitulo-da-serie.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://gshow.globo.com/programas/tv-xuxa/O-Programa/noticia/2014/01/xuxa-e-junno-namoram-muuuito-em-luau-de-zeze-di-camargo-e-luciano.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://oglobo.globo.com/rio/a-amizade-entre-um-entregador-de-farmacia-um-piano-19946271',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://ge.globo.com/video/ta-na-area-como-foi-assistir-ao-jogo-do-palmeiras-que-a-globo-nao-passou-10287094.ghtml',
+ 'info_dict': {
+ 'id': 'ta-na-area-como-foi-assistir-ao-jogo-do-palmeiras-que-a-globo-nao-passou-10287094',
+ 'title': 'Tá na Área: como foi assistir ao jogo do Palmeiras que a Globo não passou',
+ 'description': 'md5:2d089d036c4c9675117d3a56f8c61739',
+ },
+ 'playlist_count': 1,
+ }, {
+ 'url': 'https://redeglobo.globo.com/rpc/meuparana/noticia/a-producao-de-chocolates-no-parana.ghtml',
+ 'info_dict': {
+ 'id': 'a-producao-de-chocolates-no-parana',
+ 'title': 'A produção de chocolates no Paraná',
+ 'description': 'md5:f2e3daf00ffd1dc0e9a8a6c7cfb0a89e',
+ },
+ 'playlist_count': 2,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if GloboIE.suitable(url) else super(GloboArticleIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ video_ids = []
+ for video_regex in self._VIDEOID_REGEXES:
+ video_ids.extend(re.findall(video_regex, webpage))
+ entries = [
+ self.url_result('globo:%s' % video_id, GloboIE.ie_key())
+ for video_id in orderedSet(video_ids)]
+ title = self._og_search_title(webpage).strip()
+ description = self._html_search_meta('description', webpage)
+ return self.playlist_result(entries, display_id, title, description)
diff --git a/yt_dlp/extractor/glomex.py b/yt_dlp/extractor/glomex.py
new file mode 100644
index 0000000..22aac0d
--- /dev/null
+++ b/yt_dlp/extractor/glomex.py
@@ -0,0 +1,216 @@
+import re
+import urllib.parse
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ extract_attributes,
+ ExtractorError,
+ int_or_none,
+ parse_qs,
+ smuggle_url,
+ unescapeHTML,
+ unsmuggle_url,
+)
+
+
+class GlomexBaseIE(InfoExtractor):
+ _DEFAULT_ORIGIN_URL = 'https://player.glomex.com/'
+ _API_URL = 'https://integration-cloudfront-eu-west-1.mes.glomex.cloud/'
+
+ @staticmethod
+ def _smuggle_origin_url(url, origin_url):
+ if origin_url is None:
+ return url
+ return smuggle_url(url, {'origin': origin_url})
+
+ @classmethod
+ def _unsmuggle_origin_url(cls, url, fallback_origin_url=None):
+ defaults = {'origin': fallback_origin_url or cls._DEFAULT_ORIGIN_URL}
+ unsmuggled_url, data = unsmuggle_url(url, default=defaults)
+ return unsmuggled_url, data['origin']
+
+ def _get_videoid_type(self, video_id):
+ _VIDEOID_TYPES = {
+ 'v': 'video',
+ 'pl': 'playlist',
+ 'rl': 'related videos playlist',
+ 'cl': 'curated playlist',
+ }
+ prefix = video_id.split('-')[0]
+ return _VIDEOID_TYPES.get(prefix, 'unknown type')
+
+ def _download_api_data(self, video_id, integration, current_url=None):
+ query = {
+ 'integration_id': integration,
+ 'playlist_id': video_id,
+ 'current_url': current_url or self._DEFAULT_ORIGIN_URL,
+ }
+ video_id_type = self._get_videoid_type(video_id)
+ return self._download_json(
+ self._API_URL,
+ video_id, 'Downloading %s JSON' % video_id_type,
+ 'Unable to download %s JSON' % video_id_type,
+ query=query)
+
+ def _download_and_extract_api_data(self, video_id, integration, current_url):
+ api_data = self._download_api_data(video_id, integration, current_url)
+ videos = api_data['videos']
+ if not videos:
+ raise ExtractorError('no videos found for %s' % video_id)
+ videos = [self._extract_api_data(video, video_id) for video in videos]
+ return videos[0] if len(videos) == 1 else self.playlist_result(videos, video_id)
+
+ def _extract_api_data(self, video, video_id):
+ if video.get('error_code') == 'contentGeoblocked':
+ self.raise_geo_restricted(countries=video['geo_locations'])
+
+ formats, subs = [], {}
+ for format_id, format_url in video['source'].items():
+ ext = determine_ext(format_url)
+ if ext == 'm3u8':
+ formats_, subs_ = self._extract_m3u8_formats_and_subtitles(
+ format_url, video_id, 'mp4', m3u8_id=format_id,
+ fatal=False)
+ formats.extend(formats_)
+ self._merge_subtitles(subs_, target=subs)
+ else:
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_id,
+ })
+ if video.get('language'):
+ for fmt in formats:
+ fmt['language'] = video['language']
+
+ images = (video.get('images') or []) + [video.get('image') or {}]
+ thumbnails = [{
+ 'id': image.get('id'),
+ 'url': f'{image["url"]}/profile:player-960x540',
+ 'width': 960,
+ 'height': 540,
+ } for image in images if image.get('url')]
+ self._remove_duplicate_formats(thumbnails)
+
+ return {
+ 'id': video.get('clip_id') or video_id,
+ 'title': video.get('title'),
+ 'description': video.get('description'),
+ 'thumbnails': thumbnails,
+ 'duration': int_or_none(video.get('clip_duration')),
+ 'timestamp': video.get('created_at'),
+ 'formats': formats,
+ 'subtitles': subs,
+ }
+
+
+class GlomexIE(GlomexBaseIE):
+ IE_NAME = 'glomex'
+ IE_DESC = 'Glomex videos'
+ _VALID_URL = r'https?://video\.glomex\.com/[^/]+/(?P<id>v-[^-]+)'
+ _INTEGRATION_ID = '19syy24xjn1oqlpc'
+
+ _TESTS = [{
+ 'url': 'https://video.glomex.com/sport/v-cb24uwg77hgh-nach-2-0-sieg-guardiola-mit-mancity-vor-naechstem-titel',
+ 'md5': 'cec33a943c4240c9cb33abea8c26242e',
+ 'info_dict': {
+ 'id': 'v-cb24uwg77hgh',
+ 'ext': 'mp4',
+ 'title': 'md5:38a90cedcfadd72982c81acf13556e0c',
+ 'description': 'md5:1ea6b6caff1443fcbbba159e432eedb8',
+ 'duration': 29600,
+ 'timestamp': 1619895017,
+ 'upload_date': '20210501',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self.url_result(
+ GlomexEmbedIE.build_player_url(video_id, self._INTEGRATION_ID, url),
+ GlomexEmbedIE.ie_key(), video_id)
+
+
+class GlomexEmbedIE(GlomexBaseIE):
+ IE_NAME = 'glomex:embed'
+ IE_DESC = 'Glomex embedded videos'
+ _BASE_PLAYER_URL = '//player.glomex.com/integration/1/iframe-player.html'
+ _BASE_PLAYER_URL_RE = re.escape(_BASE_PLAYER_URL).replace('/1/', r'/[^/]/')
+ _VALID_URL = rf'https?:{_BASE_PLAYER_URL_RE}\?([^#]+&)?playlistId=(?P<id>[^#&]+)'
+
+ _TESTS = [{
+ 'url': 'https://player.glomex.com/integration/1/iframe-player.html?integrationId=4059a013k56vb2yd&playlistId=v-cfa6lye0dkdd-sf',
+ 'md5': '68f259b98cc01918ac34180142fce287',
+ 'info_dict': {
+ 'id': 'v-cfa6lye0dkdd-sf',
+ 'ext': 'mp4',
+ 'timestamp': 1635337199,
+ 'duration': 133080,
+ 'upload_date': '20211027',
+ 'description': 'md5:e741185fc309310ff5d0c789b437be66',
+ 'title': 'md5:35647293513a6c92363817a0fb0a7961',
+ },
+ }, {
+ 'url': 'https://player.glomex.com/integration/1/iframe-player.html?origin=fullpage&integrationId=19syy24xjn1oqlpc&playlistId=rl-vcb49w1fb592p&playlistIndex=0',
+ 'info_dict': {
+ 'id': 'rl-vcb49w1fb592p',
+ },
+ 'playlist_count': 100,
+ }, {
+ 'url': 'https://player.glomex.com/integration/1/iframe-player.html?playlistId=cl-bgqaata6aw8x&integrationId=19syy24xjn1oqlpc',
+ 'info_dict': {
+ 'id': 'cl-bgqaata6aw8x',
+ },
+ 'playlist_mincount': 2,
+ }]
+
+ @classmethod
+ def build_player_url(cls, video_id, integration, origin_url=None):
+ query_string = urllib.parse.urlencode({
+ 'playlistId': video_id,
+ 'integrationId': integration,
+ })
+ return cls._smuggle_origin_url(f'https:{cls._BASE_PLAYER_URL}?{query_string}', origin_url)
+
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ # https://docs.glomex.com/publisher/video-player-integration/javascript-api/
+ quot_re = r'["\']'
+
+ regex = fr'''(?x)
+ <iframe[^>]+?src=(?P<q>{quot_re})(?P<url>
+ (?:https?:)?{cls._BASE_PLAYER_URL_RE}\?(?:(?!(?P=q)).)+
+ )(?P=q)'''
+ for mobj in re.finditer(regex, webpage):
+ embed_url = unescapeHTML(mobj.group('url'))
+ if cls.suitable(embed_url):
+ yield cls._smuggle_origin_url(embed_url, url)
+
+ regex = fr'''(?x)
+ <glomex-player [^>]+?>|
+ <div[^>]* data-glomex-player=(?P<q>{quot_re})true(?P=q)[^>]*>'''
+ for mobj in re.finditer(regex, webpage):
+ attrs = extract_attributes(mobj.group(0))
+ if attrs.get('data-integration-id') and attrs.get('data-playlist-id'):
+ yield cls.build_player_url(attrs['data-playlist-id'], attrs['data-integration-id'], url)
+
+ # naive parsing of inline scripts for hard-coded integration parameters
+ regex = fr'''(?x)
+ (?P<is_js>dataset\.)?%s\s*(?(is_js)=|:)\s*
+ (?P<q>{quot_re})(?P<id>(?:(?!(?P=q)).)+)(?P=q)\s'''
+ for mobj in re.finditer(r'(?x)<script[^<]*>.+?</script>', webpage):
+ script = mobj.group(0)
+ integration_id = re.search(regex % 'integrationId', script)
+ if not integration_id:
+ continue
+ playlist_id = re.search(regex % 'playlistId', script)
+ if playlist_id:
+ yield cls.build_player_url(playlist_id, integration_id, url)
+
+ def _real_extract(self, url):
+ url, origin_url = self._unsmuggle_origin_url(url)
+ playlist_id = self._match_id(url)
+ integration = parse_qs(url).get('integrationId', [None])[0]
+ if not integration:
+ raise ExtractorError('No integrationId in URL', expected=True)
+ return self._download_and_extract_api_data(playlist_id, integration, origin_url)
diff --git a/yt_dlp/extractor/gmanetwork.py b/yt_dlp/extractor/gmanetwork.py
new file mode 100644
index 0000000..62fff4e
--- /dev/null
+++ b/yt_dlp/extractor/gmanetwork.py
@@ -0,0 +1,83 @@
+from .common import InfoExtractor
+from .dailymotion import DailymotionIE
+from .youtube import YoutubeIE
+
+
+class GMANetworkVideoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www)\.gmanetwork\.com/(?:\w+/){3}(?P<id>\d+)/(?P<display_id>[\w-]+)/video'
+ _TESTS = [{
+ 'url': 'https://www.gmanetwork.com/fullepisodes/home/running_man_philippines/168677/running-man-philippines-catch-the-thief-full-chapter-2/video?section=home',
+ 'info_dict': {
+ 'id': '28BqW0AXPe0',
+ 'ext': 'mp4',
+ 'upload_date': '20220919',
+ 'uploader_url': 'http://www.youtube.com/channel/UChsoPNR5x-wdSO2GrOSIWqQ',
+ 'like_count': int,
+ 'view_count': int,
+ 'uploader': 'YoüLOL',
+ 'channel_id': 'UChsoPNR5x-wdSO2GrOSIWqQ',
+ 'duration': 5313,
+ 'comment_count': int,
+ 'tags': 'count:22',
+ 'uploader_id': 'UChsoPNR5x-wdSO2GrOSIWqQ',
+ 'title': 'Running Man Philippines: Catch the Thief (FULL CHAPTER 2)',
+ 'channel_url': 'https://www.youtube.com/channel/UChsoPNR5x-wdSO2GrOSIWqQ',
+ 'thumbnail': 'https://i.ytimg.com/vi/28BqW0AXPe0/maxresdefault.jpg',
+ 'release_timestamp': 1663594212,
+ 'age_limit': 0,
+ 'channel_follower_count': int,
+ 'categories': ['Entertainment'],
+ 'description': 'md5:811bdcea74f9c48051824e494756e926',
+ 'live_status': 'not_live',
+ 'playable_in_embed': True,
+ 'channel': 'YoüLOL',
+ 'availability': 'public',
+ 'release_date': '20220919',
+ }
+ }, {
+ 'url': 'https://www.gmanetwork.com/fullepisodes/home/more_than_words/87059/more-than-words-full-episode-80/video?section=home',
+ 'info_dict': {
+ 'id': 'yiDOExw2aSA',
+ 'ext': 'mp4',
+ 'live_status': 'not_live',
+ 'channel': 'GMANetwork',
+ 'like_count': int,
+ 'channel_follower_count': int,
+ 'description': 'md5:6d00cd658394fa1a5071200d3ed4be05',
+ 'duration': 1419,
+ 'age_limit': 0,
+ 'comment_count': int,
+ 'upload_date': '20181003',
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/yiDOExw2aSA/maxresdefault.webp',
+ 'availability': 'public',
+ 'playable_in_embed': True,
+ 'channel_id': 'UCKL5hAuzgFQsyrsQKgU0Qng',
+ 'title': 'More Than Words: Full Episode 80 (Finale)',
+ 'uploader_id': 'GMANETWORK',
+ 'categories': ['Entertainment'],
+ 'uploader': 'GMANetwork',
+ 'channel_url': 'https://www.youtube.com/channel/UCKL5hAuzgFQsyrsQKgU0Qng',
+ 'tags': 'count:29',
+ 'view_count': int,
+ 'uploader_url': 'http://www.youtube.com/user/GMANETWORK',
+ }
+ }]
+
+ def _real_extract(self, url):
+ content_id, display_id = self._match_valid_url(url).group('id', 'display_id')
+ webpage = self._download_webpage(url, display_id)
+ # webpage route
+ youtube_id = self._search_regex(
+ r'var\s*YOUTUBE_VIDEO\s*=\s*[\'"]+(?P<yt_id>[\w-]+)', webpage, 'youtube_id', fatal=False)
+ if youtube_id:
+ return self.url_result(youtube_id, YoutubeIE, youtube_id)
+
+ # api call route
+ # more info at https://aphrodite.gmanetwork.com/fullepisodes/assets/fullepisodes/js/dist/fullepisodes_video.js?v=1.1.11
+ network_url = self._search_regex(
+ r'NETWORK_URL\s*=\s*[\'"](?P<url>[^\'"]+)', webpage, 'network_url')
+ json_data = self._download_json(f'{network_url}api/data/content/video/{content_id}', display_id)
+ if json_data.get('video_file'):
+ return self.url_result(json_data['video_file'], YoutubeIE, json_data['video_file'])
+ else:
+ return self.url_result(json_data['dailymotion_file'], DailymotionIE, json_data['dailymotion_file'])
diff --git a/yt_dlp/extractor/go.py b/yt_dlp/extractor/go.py
new file mode 100644
index 0000000..b075a02
--- /dev/null
+++ b/yt_dlp/extractor/go.py
@@ -0,0 +1,333 @@
+import re
+
+from .adobepass import AdobePassIE
+from ..compat import compat_str
+from ..utils import (
+ int_or_none,
+ determine_ext,
+ parse_age_limit,
+ remove_start,
+ remove_end,
+ try_get,
+ urlencode_postdata,
+ ExtractorError,
+ unified_timestamp,
+ traverse_obj,
+)
+
+
+class GoIE(AdobePassIE):
+ _SITE_INFO = {
+ 'abc': {
+ 'brand': '001',
+ 'requestor_id': 'ABC',
+ },
+ 'freeform': {
+ 'brand': '002',
+ 'requestor_id': 'ABCFamily',
+ },
+ 'watchdisneychannel': {
+ 'brand': '004',
+ 'resource_id': 'Disney',
+ },
+ 'watchdisneyjunior': {
+ 'brand': '008',
+ 'resource_id': 'DisneyJunior',
+ },
+ 'watchdisneyxd': {
+ 'brand': '009',
+ 'resource_id': 'DisneyXD',
+ },
+ 'disneynow': {
+ 'brand': '011',
+ 'resource_id': 'Disney',
+ },
+ 'fxnow.fxnetworks': {
+ 'brand': '025',
+ 'requestor_id': 'dtci',
+ },
+ }
+ _VALID_URL = r'''(?x)
+ https?://
+ (?P<sub_domain>
+ (?:%s\.)?go|fxnow\.fxnetworks|
+ (?:www\.)?(?:abc|freeform|disneynow)
+ )\.com/
+ (?:
+ (?:[^/]+/)*(?P<id>[Vv][Dd][Kk][Aa]\w+)|
+ (?:[^/]+/)*(?P<display_id>[^/?\#]+)
+ )
+ ''' % r'\.|'.join(list(_SITE_INFO.keys()))
+ _TESTS = [{
+ 'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643',
+ 'info_dict': {
+ 'id': 'VDKA3807643',
+ 'ext': 'mp4',
+ 'title': 'The Traitor in the White House',
+ 'description': 'md5:05b009d2d145a1e85d25111bd37222e8',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'skip': 'This content is no longer available.',
+ }, {
+ 'url': 'https://disneynow.com/shows/big-hero-6-the-series',
+ 'info_dict': {
+ 'title': 'Doraemon',
+ 'id': 'SH55574025',
+ },
+ 'playlist_mincount': 51,
+ }, {
+ 'url': 'http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood',
+ 'info_dict': {
+ 'id': 'VDKA3609139',
+ 'title': 'This Guilty Blood',
+ 'description': 'md5:f18e79ad1c613798d95fdabfe96cd292',
+ 'age_limit': 14,
+ 'episode': 'Episode 1',
+ 'upload_date': '20170102',
+ 'season': 'Season 2',
+ 'thumbnail': 'http://cdn1.edgedatg.com/aws/v2/abcf/Shadowhunters/video/201/ae5f75608d86bf88aa4f9f4aa76ab1b7/579x325-Q100_ae5f75608d86bf88aa4f9f4aa76ab1b7.jpg',
+ 'duration': 2544,
+ 'season_number': 2,
+ 'series': 'Shadowhunters',
+ 'episode_number': 1,
+ 'timestamp': 1483387200,
+ 'ext': 'mp4'
+ },
+ 'params': {
+ 'geo_bypass_ip_block': '3.244.239.0/24',
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://abc.com/shows/the-rookie/episode-guide/season-04/12-the-knock',
+ 'info_dict': {
+ 'id': 'VDKA26050359',
+ 'title': 'The Knock',
+ 'description': 'md5:0c2947e3ada4c31f28296db7db14aa64',
+ 'age_limit': 14,
+ 'ext': 'mp4',
+ 'thumbnail': 'http://cdn1.edgedatg.com/aws/v2/abc/TheRookie/video/412/daf830d06e83b11eaf5c0a299d993ae3/1556x876-Q75_daf830d06e83b11eaf5c0a299d993ae3.jpg',
+ 'episode': 'Episode 12',
+ 'season_number': 4,
+ 'season': 'Season 4',
+ 'timestamp': 1642975200,
+ 'episode_number': 12,
+ 'upload_date': '20220123',
+ 'series': 'The Rookie',
+ 'duration': 2572,
+ },
+ 'params': {
+ 'geo_bypass_ip_block': '3.244.239.0/24',
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://fxnow.fxnetworks.com/shows/better-things/video/vdka12782841',
+ 'info_dict': {
+ 'id': 'VDKA12782841',
+ 'title': 'First Look: Better Things - Season 2',
+ 'description': 'md5:fa73584a95761c605d9d54904e35b407',
+ 'ext': 'mp4',
+ 'age_limit': 14,
+ 'upload_date': '20170825',
+ 'duration': 161,
+ 'series': 'Better Things',
+ 'thumbnail': 'http://cdn1.edgedatg.com/aws/v2/fx/BetterThings/video/12782841/b6b05e58264121cc2c98811318e6d507/1556x876-Q75_b6b05e58264121cc2c98811318e6d507.jpg',
+ 'timestamp': 1503661074,
+ },
+ 'params': {
+ 'geo_bypass_ip_block': '3.244.239.0/24',
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://abc.go.com/shows/world-news-tonight/episode-guide/2017-02/17-021717-intense-stand-off-between-man-with-rifle-and-police-in-oakland',
+ 'only_matching': True,
+ }, {
+ # brand 004
+ 'url': 'http://disneynow.go.com/shows/big-hero-6-the-series/season-01/episode-10-mr-sparkles-loses-his-sparkle/vdka4637915',
+ 'only_matching': True,
+ }, {
+ # brand 008
+ 'url': 'http://disneynow.go.com/shows/minnies-bow-toons/video/happy-campers/vdka4872013',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://disneynow.com/shows/minnies-bow-toons/video/happy-campers/vdka4872013',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.freeform.com/shows/cruel-summer/episode-guide/season-01/01-happy-birthday-jeanette-turner',
+ 'only_matching': True,
+ }]
+
+ def _extract_videos(self, brand, video_id='-1', show_id='-1'):
+ display_id = video_id if video_id != '-1' else show_id
+ return self._download_json(
+ 'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/%s/-1/%s/-1/-1.json' % (brand, show_id, video_id),
+ display_id)['video']
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ sub_domain = remove_start(remove_end(mobj.group('sub_domain') or '', '.go'), 'www.')
+ video_id, display_id = mobj.group('id', 'display_id')
+ site_info = self._SITE_INFO.get(sub_domain, {})
+ brand = site_info.get('brand')
+ if not video_id or not site_info:
+ webpage = self._download_webpage(url, display_id or video_id)
+ data = self._parse_json(
+ self._search_regex(
+ r'["\']__abc_com__["\']\s*\]\s*=\s*({.+?})\s*;', webpage,
+ 'data', default='{}'),
+ display_id or video_id, fatal=False)
+ # https://abc.com/shows/modern-family/episode-guide/season-01/101-pilot
+ layout = try_get(data, lambda x: x['page']['content']['video']['layout'], dict)
+ video_id = None
+ if layout:
+ video_id = try_get(
+ layout,
+ (lambda x: x['videoid'], lambda x: x['video']['id']),
+ compat_str)
+ if not video_id:
+ video_id = self._search_regex(
+ (
+ # There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
+ # from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
+ r'data-video-id=["\']*(VDKA\w+)',
+ # page.analytics.videoIdCode
+ r'\bvideoIdCode["\']\s*:\s*["\']((?:vdka|VDKA)\w+)',
+ # https://abc.com/shows/the-rookie/episode-guide/season-02/03-the-bet
+ r'\b(?:video)?id["\']\s*:\s*["\'](VDKA\w+)'
+ ), webpage, 'video id', default=video_id)
+ if not site_info:
+ brand = self._search_regex(
+ (r'data-brand=\s*["\']\s*(\d+)',
+ r'data-page-brand=\s*["\']\s*(\d+)'), webpage, 'brand',
+ default='004')
+ site_info = next(
+ si for _, si in self._SITE_INFO.items()
+ if si.get('brand') == brand)
+ if not video_id:
+ # show extraction works for Disney, DisneyJunior and DisneyXD
+ # ABC and Freeform has different layout
+ show_id = self._search_regex(r'data-show-id=["\']*(SH\d+)', webpage, 'show id')
+ videos = self._extract_videos(brand, show_id=show_id)
+ show_title = self._search_regex(r'data-show-title="([^"]+)"', webpage, 'show title', fatal=False)
+ entries = []
+ for video in videos:
+ entries.append(self.url_result(
+ video['url'], 'Go', video.get('id'), video.get('title')))
+ entries.reverse()
+ return self.playlist_result(entries, show_id, show_title)
+ video_data = self._extract_videos(brand, video_id)[0]
+ video_id = video_data['id']
+ title = video_data['title']
+
+ formats = []
+ subtitles = {}
+ for asset in video_data.get('assets', {}).get('asset', []):
+ asset_url = asset.get('value')
+ if not asset_url:
+ continue
+ format_id = asset.get('format')
+ ext = determine_ext(asset_url)
+ if ext == 'm3u8':
+ video_type = video_data.get('type')
+ data = {
+ 'video_id': video_data['id'],
+ 'video_type': video_type,
+ 'brand': brand,
+ 'device': '001',
+ }
+ if video_data.get('accesslevel') == '1':
+ requestor_id = site_info.get('requestor_id', 'DisneyChannels')
+ resource = site_info.get('resource_id') or self._get_mvpd_resource(
+ requestor_id, title, video_id, None)
+ auth = self._extract_mvpd_auth(
+ url, video_id, requestor_id, resource)
+ data.update({
+ 'token': auth,
+ 'token_type': 'ap',
+ 'adobe_requestor_id': requestor_id,
+ })
+ else:
+ self._initialize_geo_bypass({'countries': ['US']})
+ entitlement = self._download_json(
+ 'https://api.entitlement.watchabc.go.com/vp2/ws-secure/entitlement/2020/authorize.json',
+ video_id, data=urlencode_postdata(data))
+ errors = entitlement.get('errors', {}).get('errors', [])
+ if errors:
+ for error in errors:
+ if error.get('code') == 1002:
+ self.raise_geo_restricted(
+ error['message'], countries=['US'])
+ error_message = ', '.join([error['message'] for error in errors])
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)
+ asset_url += '?' + entitlement['uplynkData']['sessionKey']
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ asset_url, video_id, 'mp4', m3u8_id=format_id or 'hls', fatal=False)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+ else:
+ f = {
+ 'format_id': format_id,
+ 'url': asset_url,
+ 'ext': ext,
+ }
+ if re.search(r'(?:/mp4/source/|_source\.mp4)', asset_url):
+ f.update({
+ 'format_id': ('%s-' % format_id if format_id else '') + 'SOURCE',
+ 'quality': 1,
+ })
+ else:
+ mobj = re.search(r'/(\d+)x(\d+)/', asset_url)
+ if mobj:
+ height = int(mobj.group(2))
+ f.update({
+ 'format_id': ('%s-' % format_id if format_id else '') + '%dP' % height,
+ 'width': int(mobj.group(1)),
+ 'height': height,
+ })
+ formats.append(f)
+
+ for cc in video_data.get('closedcaption', {}).get('src', []):
+ cc_url = cc.get('value')
+ if not cc_url:
+ continue
+ ext = determine_ext(cc_url)
+ if ext == 'xml':
+ ext = 'ttml'
+ subtitles.setdefault(cc.get('lang'), []).append({
+ 'url': cc_url,
+ 'ext': ext,
+ })
+
+ thumbnails = []
+ for thumbnail in video_data.get('thumbnails', {}).get('thumbnail', []):
+ thumbnail_url = thumbnail.get('value')
+ if not thumbnail_url:
+ continue
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'width': int_or_none(thumbnail.get('width')),
+ 'height': int_or_none(thumbnail.get('height')),
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video_data.get('longdescription') or video_data.get('description'),
+ 'duration': int_or_none(video_data.get('duration', {}).get('value'), 1000),
+ 'age_limit': parse_age_limit(video_data.get('tvrating', {}).get('rating')),
+ 'episode_number': int_or_none(video_data.get('episodenumber')),
+ 'series': video_data.get('show', {}).get('title'),
+ 'season_number': int_or_none(video_data.get('season', {}).get('num')),
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'timestamp': unified_timestamp(traverse_obj(video_data, ('airdates', 'airdate', 0))),
+ }
diff --git a/yt_dlp/extractor/godtube.py b/yt_dlp/extractor/godtube.py
new file mode 100644
index 0000000..35fb7a9
--- /dev/null
+++ b/yt_dlp/extractor/godtube.py
@@ -0,0 +1,55 @@
+from .common import InfoExtractor
+from ..utils import (
+ parse_duration,
+ parse_iso8601,
+)
+
+
+class GodTubeIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:www\.)?godtube\.com/watch/\?v=(?P<id>[\da-zA-Z]+)'
+ _TESTS = [
+ {
+ 'url': 'https://www.godtube.com/watch/?v=0C0CNNNU',
+ 'md5': '77108c1e4ab58f48031101a1a2119789',
+ 'info_dict': {
+ 'id': '0C0CNNNU',
+ 'ext': 'mp4',
+ 'title': 'Woman at the well.',
+ 'duration': 159,
+ 'timestamp': 1205712000,
+ 'uploader': 'beverlybmusic',
+ 'upload_date': '20080317',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ },
+ ]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id')
+
+ config = self._download_xml(
+ 'http://www.godtube.com/resource/mediaplayer/%s.xml' % video_id.lower(),
+ video_id, 'Downloading player config XML')
+
+ video_url = config.find('file').text
+ uploader = config.find('author').text
+ timestamp = parse_iso8601(config.find('date').text)
+ duration = parse_duration(config.find('duration').text)
+ thumbnail = config.find('image').text
+
+ media = self._download_xml(
+ 'http://www.godtube.com/media/xml/?v=%s' % video_id, video_id, 'Downloading media XML')
+
+ title = media.find('title').text
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'uploader': uploader,
+ 'duration': duration,
+ }
diff --git a/yt_dlp/extractor/gofile.py b/yt_dlp/extractor/gofile.py
new file mode 100644
index 0000000..eb1dcf8
--- /dev/null
+++ b/yt_dlp/extractor/gofile.py
@@ -0,0 +1,106 @@
+import hashlib
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ try_get
+)
+
+
+class GofileIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?gofile\.io/d/(?P<id>[^/]+)'
+ _TESTS = [{
+ 'url': 'https://gofile.io/d/AMZyDw',
+ 'info_dict': {
+ 'id': 'AMZyDw',
+ },
+ 'playlist_mincount': 2,
+ 'playlist': [{
+ 'info_dict': {
+ 'id': 'de571ac1-5edc-42e2-8ec2-bdac83ad4a31',
+ 'filesize': 928116,
+ 'ext': 'mp4',
+ 'title': 'nuuh',
+ 'release_timestamp': 1638338704,
+ 'release_date': '20211201',
+ }
+ }]
+ }, {
+ 'url': 'https://gofile.io/d/is8lKr',
+ 'info_dict': {
+ 'id': 'TMjXd9',
+ 'ext': 'mp4',
+ },
+ 'playlist_count': 0,
+ 'skip': 'No video/audio found at provided URL.',
+ }, {
+ 'url': 'https://gofile.io/d/TMjXd9',
+ 'info_dict': {
+ 'id': 'TMjXd9',
+ },
+ 'playlist_count': 1,
+ }, {
+ 'url': 'https://gofile.io/d/gqOtRf',
+ 'info_dict': {
+ 'id': 'gqOtRf',
+ },
+ 'playlist_mincount': 1,
+ 'params': {
+ 'videopassword': 'password',
+ },
+ }]
+ _TOKEN = None
+
+ def _real_initialize(self):
+ token = self._get_cookies('https://gofile.io/').get('accountToken')
+ if token:
+ self._TOKEN = token.value
+ return
+
+ account_data = self._download_json(
+ 'https://api.gofile.io/createAccount', None, note='Getting a new guest account')
+ self._TOKEN = account_data['data']['token']
+ self._set_cookie('.gofile.io', 'accountToken', self._TOKEN)
+
+ def _entries(self, file_id):
+ query_params = {
+ 'contentId': file_id,
+ 'token': self._TOKEN,
+ 'wt': '4fd6sg89d7s6', # From https://gofile.io/dist/js/alljs.js
+ }
+ password = self.get_param('videopassword')
+ if password:
+ query_params['password'] = hashlib.sha256(password.encode('utf-8')).hexdigest()
+ files = self._download_json(
+ 'https://api.gofile.io/getContent', file_id, note='Getting filelist', query=query_params)
+
+ status = files['status']
+ if status == 'error-passwordRequired':
+ raise ExtractorError(
+ 'This video is protected by a password, use the --video-password option', expected=True)
+ elif status != 'ok':
+ raise ExtractorError(f'{self.IE_NAME} said: status {status}', expected=True)
+
+ found_files = False
+ for file in (try_get(files, lambda x: x['data']['contents'], dict) or {}).values():
+ file_type, file_format = file.get('mimetype').split('/', 1)
+ if file_type not in ('video', 'audio') and file_format != 'vnd.mts':
+ continue
+
+ found_files = True
+ file_url = file.get('link')
+ if file_url:
+ yield {
+ 'id': file['id'],
+ 'title': file['name'].rsplit('.', 1)[0],
+ 'url': file_url,
+ 'filesize': file.get('size'),
+ 'release_timestamp': file.get('createTime')
+ }
+
+ if not found_files:
+ raise ExtractorError('No video/audio found at provided URL.', expected=True)
+
+ def _real_extract(self, url):
+ file_id = self._match_id(url)
+ return self.playlist_result(self._entries(file_id), playlist_id=file_id)
diff --git a/yt_dlp/extractor/golem.py b/yt_dlp/extractor/golem.py
new file mode 100644
index 0000000..c33d950
--- /dev/null
+++ b/yt_dlp/extractor/golem.py
@@ -0,0 +1,68 @@
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urlparse,
+)
+from ..utils import (
+ determine_ext,
+)
+
+
+class GolemIE(InfoExtractor):
+ _VALID_URL = r'^https?://video\.golem\.de/.+?/(?P<id>.+?)/'
+ _TEST = {
+ 'url': 'http://video.golem.de/handy/14095/iphone-6-und-6-plus-test.html',
+ 'md5': 'c1a2c0a3c863319651c7c992c5ee29bf',
+ 'info_dict': {
+ 'id': '14095',
+ 'format_id': 'high',
+ 'ext': 'mp4',
+ 'title': 'iPhone 6 und 6 Plus - Test',
+ 'duration': 300.44,
+ 'filesize': 65309548,
+ }
+ }
+
+ _PREFIX = 'http://video.golem.de'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ config = self._download_xml(
+ 'https://video.golem.de/xml/{0}.xml'.format(video_id), video_id)
+
+ info = {
+ 'id': video_id,
+ 'title': config.findtext('./title', 'golem'),
+ 'duration': self._float(config.findtext('./playtime'), 'duration'),
+ }
+
+ formats = []
+ for e in config:
+ url = e.findtext('./url')
+ if not url:
+ continue
+
+ formats.append({
+ 'format_id': compat_str(e.tag),
+ 'url': compat_urlparse.urljoin(self._PREFIX, url),
+ 'height': self._int(e.get('height'), 'height'),
+ 'width': self._int(e.get('width'), 'width'),
+ 'filesize': self._int(e.findtext('filesize'), 'filesize'),
+ 'ext': determine_ext(e.findtext('./filename')),
+ })
+ info['formats'] = formats
+
+ thumbnails = []
+ for e in config.findall('.//teaser'):
+ url = e.findtext('./url')
+ if not url:
+ continue
+ thumbnails.append({
+ 'url': compat_urlparse.urljoin(self._PREFIX, url),
+ 'width': self._int(e.get('width'), 'thumbnail width'),
+ 'height': self._int(e.get('height'), 'thumbnail height'),
+ })
+ info['thumbnails'] = thumbnails
+
+ return info
diff --git a/yt_dlp/extractor/goodgame.py b/yt_dlp/extractor/goodgame.py
new file mode 100644
index 0000000..c17ad56
--- /dev/null
+++ b/yt_dlp/extractor/goodgame.py
@@ -0,0 +1,57 @@
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ int_or_none,
+ str_or_none,
+ traverse_obj,
+)
+
+
+class GoodGameIE(InfoExtractor):
+ IE_NAME = 'goodgame:stream'
+ _VALID_URL = r'https?://goodgame\.ru/channel/(?P<id>\w+)'
+ _TESTS = [{
+ 'url': 'https://goodgame.ru/channel/Pomi/#autoplay',
+ 'info_dict': {
+ 'id': 'pomi',
+ 'ext': 'mp4',
+ 'title': r're:Reynor vs Special \(1/2,bo3\) Wardi Spring EU \- playoff \(финальный день\) \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
+ 'channel_id': '1644',
+ 'channel': 'Pomi',
+ 'channel_url': 'https://goodgame.ru/channel/Pomi/',
+ 'description': 'md5:4a87b775ee7b2b57bdccebe285bbe171',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'live_status': 'is_live',
+ 'view_count': int,
+ },
+ 'params': {'skip_download': 'm3u8'},
+ 'skip': 'May not be online',
+ }]
+
+ def _real_extract(self, url):
+ channel_name = self._match_id(url)
+ response = self._download_json(f'https://api2.goodgame.ru/v2/streams/{channel_name}', channel_name)
+ player_id = response['channel']['gg_player_src']
+
+ formats, subtitles = [], {}
+ if response.get('status') == 'Live':
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ f'https://hls.goodgame.ru/manifest/{player_id}_master.m3u8',
+ channel_name, 'mp4', live=True)
+ else:
+ self.raise_no_formats('User is offline', expected=True, video_id=channel_name)
+
+ return {
+ 'id': player_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'title': traverse_obj(response, ('channel', 'title')),
+ 'channel': channel_name,
+ 'channel_id': str_or_none(traverse_obj(response, ('channel', 'id'))),
+ 'channel_url': response.get('url'),
+ 'description': clean_html(traverse_obj(response, ('channel', 'description'))),
+ 'thumbnail': traverse_obj(response, ('channel', 'thumb')),
+ 'is_live': bool(formats),
+ 'view_count': int_or_none(response.get('viewers')),
+ 'age_limit': 18 if traverse_obj(response, ('channel', 'adult')) else None,
+ }
diff --git a/yt_dlp/extractor/googledrive.py b/yt_dlp/extractor/googledrive.py
new file mode 100644
index 0000000..06658dd
--- /dev/null
+++ b/yt_dlp/extractor/googledrive.py
@@ -0,0 +1,341 @@
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_parse_qs
+from ..utils import (
+ ExtractorError,
+ determine_ext,
+ extract_attributes,
+ get_element_by_class,
+ get_element_html_by_id,
+ int_or_none,
+ lowercase_escape,
+ try_get,
+ update_url_query,
+)
+
+
+class GoogleDriveIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:docs|drive|drive\.usercontent)\.google\.com/
+ (?:
+ (?:uc|open|download)\?.*?id=|
+ file/d/
+ )|
+ video\.google\.com/get_player\?.*?docid=
+ )
+ (?P<id>[a-zA-Z0-9_-]{28,})
+ '''
+ _TESTS = [{
+ 'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
+ 'md5': '5c602afbbf2c1db91831f5d82f678554',
+ 'info_dict': {
+ 'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
+ 'ext': 'mp4',
+ 'title': 'Big Buck Bunny.mp4',
+ 'duration': 45,
+ 'thumbnail': 'https://drive.google.com/thumbnail?id=0ByeS4oOUV-49Zzh4R1J6R09zazQ',
+ }
+ }, {
+ # video can't be watched anonymously due to view count limit reached,
+ # but can be downloaded (see https://github.com/ytdl-org/youtube-dl/issues/14046)
+ 'url': 'https://drive.google.com/file/d/0B-vUyvmDLdWDcEt4WjBqcmI2XzQ/view',
+ 'only_matching': True,
+ }, {
+ # video id is longer than 28 characters
+ 'url': 'https://drive.google.com/file/d/1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ/edit',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://drive.google.com/open?id=0B2fjwgkl1A_CX083Tkowdmt6d28',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://drive.google.com/uc?id=0B2fjwgkl1A_CX083Tkowdmt6d28',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://drive.usercontent.google.com/download?id=0ByeS4oOUV-49Zzh4R1J6R09zazQ',
+ 'only_matching': True,
+ }]
+ _FORMATS_EXT = {
+ '5': 'flv',
+ '6': 'flv',
+ '13': '3gp',
+ '17': '3gp',
+ '18': 'mp4',
+ '22': 'mp4',
+ '34': 'flv',
+ '35': 'flv',
+ '36': '3gp',
+ '37': 'mp4',
+ '38': 'mp4',
+ '43': 'webm',
+ '44': 'webm',
+ '45': 'webm',
+ '46': 'webm',
+ '59': 'mp4',
+ }
+ _BASE_URL_CAPTIONS = 'https://drive.google.com/timedtext'
+ _CAPTIONS_ENTRY_TAG = {
+ 'subtitles': 'track',
+ 'automatic_captions': 'target',
+ }
+ _caption_formats_ext = []
+ _captions_xml = None
+
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ mobj = re.search(
+ r'<iframe[^>]+src="https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28,})',
+ webpage)
+ if mobj:
+ yield 'https://drive.google.com/file/d/%s' % mobj.group('id')
+
+ def _download_subtitles_xml(self, video_id, subtitles_id, hl):
+ if self._captions_xml:
+ return
+ self._captions_xml = self._download_xml(
+ self._BASE_URL_CAPTIONS, video_id, query={
+ 'id': video_id,
+ 'vid': subtitles_id,
+ 'hl': hl,
+ 'v': video_id,
+ 'type': 'list',
+ 'tlangs': '1',
+ 'fmts': '1',
+ 'vssids': '1',
+ }, note='Downloading subtitles XML',
+ errnote='Unable to download subtitles XML', fatal=False)
+ if self._captions_xml:
+ for f in self._captions_xml.findall('format'):
+ if f.attrib.get('fmt_code') and not f.attrib.get('default'):
+ self._caption_formats_ext.append(f.attrib['fmt_code'])
+
+ def _get_captions_by_type(self, video_id, subtitles_id, caption_type,
+ origin_lang_code=None):
+ if not subtitles_id or not caption_type:
+ return
+ captions = {}
+ for caption_entry in self._captions_xml.findall(
+ self._CAPTIONS_ENTRY_TAG[caption_type]):
+ caption_lang_code = caption_entry.attrib.get('lang_code')
+ if not caption_lang_code:
+ continue
+ caption_format_data = []
+ for caption_format in self._caption_formats_ext:
+ query = {
+ 'vid': subtitles_id,
+ 'v': video_id,
+ 'fmt': caption_format,
+ 'lang': (caption_lang_code if origin_lang_code is None
+ else origin_lang_code),
+ 'type': 'track',
+ 'name': '',
+ 'kind': '',
+ }
+ if origin_lang_code is not None:
+ query.update({'tlang': caption_lang_code})
+ caption_format_data.append({
+ 'url': update_url_query(self._BASE_URL_CAPTIONS, query),
+ 'ext': caption_format,
+ })
+ captions[caption_lang_code] = caption_format_data
+ return captions
+
+ def _get_subtitles(self, video_id, subtitles_id, hl):
+ if not subtitles_id or not hl:
+ return
+ self._download_subtitles_xml(video_id, subtitles_id, hl)
+ if not self._captions_xml:
+ return
+ return self._get_captions_by_type(video_id, subtitles_id, 'subtitles')
+
+ def _get_automatic_captions(self, video_id, subtitles_id, hl):
+ if not subtitles_id or not hl:
+ return
+ self._download_subtitles_xml(video_id, subtitles_id, hl)
+ if not self._captions_xml:
+ return
+ track = self._captions_xml.find('track')
+ if track is None:
+ return
+ origin_lang_code = track.attrib.get('lang_code')
+ if not origin_lang_code:
+ return
+ return self._get_captions_by_type(
+ video_id, subtitles_id, 'automatic_captions', origin_lang_code)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video_info = compat_parse_qs(self._download_webpage(
+ 'https://drive.google.com/get_video_info',
+ video_id, 'Downloading video webpage', query={'docid': video_id}))
+
+ def get_value(key):
+ return try_get(video_info, lambda x: x[key][0])
+
+ reason = get_value('reason')
+ title = get_value('title')
+
+ formats = []
+ fmt_stream_map = (get_value('fmt_stream_map') or '').split(',')
+ fmt_list = (get_value('fmt_list') or '').split(',')
+ if fmt_stream_map and fmt_list:
+ resolutions = {}
+ for fmt in fmt_list:
+ mobj = re.search(
+ r'^(?P<format_id>\d+)/(?P<width>\d+)[xX](?P<height>\d+)', fmt)
+ if mobj:
+ resolutions[mobj.group('format_id')] = (
+ int(mobj.group('width')), int(mobj.group('height')))
+
+ for fmt_stream in fmt_stream_map:
+ fmt_stream_split = fmt_stream.split('|')
+ if len(fmt_stream_split) < 2:
+ continue
+ format_id, format_url = fmt_stream_split[:2]
+ f = {
+ 'url': lowercase_escape(format_url),
+ 'format_id': format_id,
+ 'ext': self._FORMATS_EXT[format_id],
+ }
+ resolution = resolutions.get(format_id)
+ if resolution:
+ f.update({
+ 'width': resolution[0],
+ 'height': resolution[1],
+ })
+ formats.append(f)
+
+ source_url = update_url_query(
+ 'https://drive.usercontent.google.com/download', {
+ 'id': video_id,
+ 'export': 'download',
+ 'confirm': 't',
+ })
+
+ def request_source_file(source_url, kind, data=None):
+ return self._request_webpage(
+ source_url, video_id, note='Requesting %s file' % kind,
+ errnote='Unable to request %s file' % kind, fatal=False, data=data)
+ urlh = request_source_file(source_url, 'source')
+ if urlh:
+ def add_source_format(urlh):
+ nonlocal title
+ if not title:
+ title = self._search_regex(
+ r'\bfilename="([^"]+)"', urlh.headers.get('Content-Disposition'),
+ 'title', default=None)
+ formats.append({
+ # Use redirect URLs as download URLs in order to calculate
+ # correct cookies in _calc_cookies.
+ # Using original URLs may result in redirect loop due to
+ # google.com's cookies mistakenly used for googleusercontent.com
+ # redirect URLs (see #23919).
+ 'url': urlh.url,
+ 'ext': determine_ext(title, 'mp4').lower(),
+ 'format_id': 'source',
+ 'quality': 1,
+ })
+ if urlh.headers.get('Content-Disposition'):
+ add_source_format(urlh)
+ else:
+ confirmation_webpage = self._webpage_read_content(
+ urlh, url, video_id, note='Downloading confirmation page',
+ errnote='Unable to confirm download', fatal=False)
+ if confirmation_webpage:
+ confirmed_source_url = extract_attributes(
+ get_element_html_by_id('download-form', confirmation_webpage) or '').get('action')
+ if confirmed_source_url:
+ urlh = request_source_file(confirmed_source_url, 'confirmed source', data=b'')
+ if urlh and urlh.headers.get('Content-Disposition'):
+ add_source_format(urlh)
+ else:
+ self.report_warning(
+ get_element_by_class('uc-error-subcaption', confirmation_webpage)
+ or get_element_by_class('uc-error-caption', confirmation_webpage)
+ or 'unable to extract confirmation code')
+
+ if not formats and reason:
+ if title:
+ self.raise_no_formats(reason, expected=True)
+ else:
+ raise ExtractorError(reason, expected=True)
+
+ hl = get_value('hl')
+ subtitles_id = None
+ ttsurl = get_value('ttsurl')
+ if ttsurl:
+ # the video Id for subtitles will be the last value in the ttsurl
+ # query string
+ subtitles_id = ttsurl.encode('utf-8').decode(
+ 'unicode_escape').split('=')[-1]
+
+ self.cookiejar.clear(domain='.google.com', path='/', name='NID')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': 'https://drive.google.com/thumbnail?id=' + video_id,
+ 'duration': int_or_none(get_value('length_seconds')),
+ 'formats': formats,
+ 'subtitles': self.extract_subtitles(video_id, subtitles_id, hl),
+ 'automatic_captions': self.extract_automatic_captions(
+ video_id, subtitles_id, hl),
+ }
+
+
+class GoogleDriveFolderIE(InfoExtractor):
+ IE_NAME = 'GoogleDrive:Folder'
+ _VALID_URL = r'https?://(?:docs|drive)\.google\.com/drive/folders/(?P<id>[\w-]{28,})'
+ _TESTS = [{
+ 'url': 'https://drive.google.com/drive/folders/1dQ4sx0-__Nvg65rxTSgQrl7VyW_FZ9QI',
+ 'info_dict': {
+ 'id': '1dQ4sx0-__Nvg65rxTSgQrl7VyW_FZ9QI',
+ 'title': 'Forrest'
+ },
+ 'playlist_count': 3,
+ }]
+ _BOUNDARY = '=====vc17a3rwnndj====='
+ _REQUEST = "/drive/v2beta/files?openDrive=true&reason=102&syncType=0&errorRecovery=false&q=trashed%20%3D%20false%20and%20'{folder_id}'%20in%20parents&fields=kind%2CnextPageToken%2Citems(kind%2CmodifiedDate%2CmodifiedByMeDate%2ClastViewedByMeDate%2CfileSize%2Cowners(kind%2CpermissionId%2Cid)%2ClastModifyingUser(kind%2CpermissionId%2Cid)%2ChasThumbnail%2CthumbnailVersion%2Ctitle%2Cid%2CresourceKey%2Cshared%2CsharedWithMeDate%2CuserPermission(role)%2CexplicitlyTrashed%2CmimeType%2CquotaBytesUsed%2Ccopyable%2CfileExtension%2CsharingUser(kind%2CpermissionId%2Cid)%2Cspaces%2Cversion%2CteamDriveId%2ChasAugmentedPermissions%2CcreatedDate%2CtrashingUser(kind%2CpermissionId%2Cid)%2CtrashedDate%2Cparents(id)%2CshortcutDetails(targetId%2CtargetMimeType%2CtargetLookupStatus)%2Ccapabilities(canCopy%2CcanDownload%2CcanEdit%2CcanAddChildren%2CcanDelete%2CcanRemoveChildren%2CcanShare%2CcanTrash%2CcanRename%2CcanReadTeamDrive%2CcanMoveTeamDriveItem)%2Clabels(starred%2Ctrashed%2Crestricted%2Cviewed))%2CincompleteSearch&appDataFilter=NO_APP_DATA&spaces=drive&pageToken={page_token}&maxResults=50&supportsTeamDrives=true&includeItemsFromAllDrives=true&corpora=default&orderBy=folder%2Ctitle_natural%20asc&retryCount=0&key={key} HTTP/1.1"
+ _DATA = f'''--{_BOUNDARY}
+content-type: application/http
+content-transfer-encoding: binary
+
+GET %s
+
+--{_BOUNDARY}
+'''
+
+ def _call_api(self, folder_id, key, data, **kwargs):
+ response = self._download_webpage(
+ 'https://clients6.google.com/batch/drive/v2beta',
+ folder_id, data=data.encode('utf-8'),
+ headers={
+ 'Content-Type': 'text/plain;charset=UTF-8;',
+ 'Origin': 'https://drive.google.com',
+ }, query={
+ '$ct': f'multipart/mixed; boundary="{self._BOUNDARY}"',
+ 'key': key
+ }, **kwargs)
+ return self._search_json('', response, 'api response', folder_id, **kwargs) or {}
+
+ def _get_folder_items(self, folder_id, key):
+ page_token = ''
+ while page_token is not None:
+ request = self._REQUEST.format(folder_id=folder_id, page_token=page_token, key=key)
+ page = self._call_api(folder_id, key, self._DATA % request)
+ yield from page['items']
+ page_token = page.get('nextPageToken')
+
+ def _real_extract(self, url):
+ folder_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, folder_id)
+ key = self._search_regex(r'"(\w{39})"', webpage, 'key')
+
+ folder_info = self._call_api(folder_id, key, self._DATA % f'/drive/v2beta/files/{folder_id} HTTP/1.1', fatal=False)
+
+ return self.playlist_from_matches(
+ self._get_folder_items(folder_id, key), folder_id, folder_info.get('title'),
+ ie=GoogleDriveIE, getter=lambda item: f'https://drive.google.com/file/d/{item["id"]}')
diff --git a/yt_dlp/extractor/googlepodcasts.py b/yt_dlp/extractor/googlepodcasts.py
new file mode 100644
index 0000000..8b2351b
--- /dev/null
+++ b/yt_dlp/extractor/googlepodcasts.py
@@ -0,0 +1,84 @@
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_podcast_url,
+ int_or_none,
+ try_get,
+ urlencode_postdata,
+)
+
+
+class GooglePodcastsBaseIE(InfoExtractor):
+ _VALID_URL_BASE = r'https?://podcasts\.google\.com/feed/'
+
+ def _batch_execute(self, func_id, video_id, params):
+ return json.loads(self._download_json(
+ 'https://podcasts.google.com/_/PodcastsUi/data/batchexecute',
+ video_id, data=urlencode_postdata({
+ 'f.req': json.dumps([[[func_id, json.dumps(params), None, '1']]]),
+ }), transform_source=lambda x: self._search_regex(r'(?s)(\[.+\])', x, 'data'))[0][2])
+
+ def _extract_episode(self, episode):
+ return {
+ 'id': episode[4][3],
+ 'title': episode[8],
+ 'url': clean_podcast_url(episode[13]),
+ 'thumbnail': episode[2],
+ 'description': episode[9],
+ 'creator': try_get(episode, lambda x: x[14]),
+ 'timestamp': int_or_none(episode[11]),
+ 'duration': int_or_none(episode[12]),
+ 'series': episode[1],
+ }
+
+
+class GooglePodcastsIE(GooglePodcastsBaseIE):
+ IE_NAME = 'google:podcasts'
+ _VALID_URL = GooglePodcastsBaseIE._VALID_URL_BASE + r'(?P<feed_url>[^/]+)/episode/(?P<id>[^/?&#]+)'
+ _TEST = {
+ 'url': 'https://podcasts.google.com/feed/aHR0cHM6Ly9mZWVkcy5ucHIub3JnLzM0NDA5ODUzOS9wb2RjYXN0LnhtbA/episode/MzBlNWRlN2UtOWE4Yy00ODcwLTk2M2MtM2JlMmUyNmViOTRh',
+ 'md5': 'fa56b2ee8bd0703e27e42d4b104c4766',
+ 'info_dict': {
+ 'id': '30e5de7e-9a8c-4870-963c-3be2e26eb94a',
+ 'ext': 'mp3',
+ 'title': 'WWDTM New Year 2021',
+ 'description': 'We say goodbye to 2020 with Christine Baranksi, Doug Jones, Jonna Mendez, and Kellee Edwards.',
+ 'upload_date': '20210102',
+ 'timestamp': 1609606800,
+ 'duration': 2901,
+ 'series': "Wait Wait... Don't Tell Me!",
+ }
+ }
+
+ def _real_extract(self, url):
+ b64_feed_url, b64_guid = self._match_valid_url(url).groups()
+ episode = self._batch_execute(
+ 'oNjqVe', b64_guid, [b64_feed_url, b64_guid])[1]
+ return self._extract_episode(episode)
+
+
+class GooglePodcastsFeedIE(GooglePodcastsBaseIE):
+ IE_NAME = 'google:podcasts:feed'
+ _VALID_URL = GooglePodcastsBaseIE._VALID_URL_BASE + r'(?P<id>[^/?&#]+)/?(?:[?#&]|$)'
+ _TEST = {
+ 'url': 'https://podcasts.google.com/feed/aHR0cHM6Ly9mZWVkcy5ucHIub3JnLzM0NDA5ODUzOS9wb2RjYXN0LnhtbA',
+ 'info_dict': {
+ 'title': "Wait Wait... Don't Tell Me!",
+ 'description': "NPR's weekly current events quiz. Have a laugh and test your news knowledge while figuring out what's real and what we've made up.",
+ },
+ 'playlist_mincount': 20,
+ }
+
+ def _real_extract(self, url):
+ b64_feed_url = self._match_id(url)
+ data = self._batch_execute('ncqJEe', b64_feed_url, [b64_feed_url])
+
+ entries = []
+ for episode in (try_get(data, lambda x: x[1][0]) or []):
+ entries.append(self._extract_episode(episode))
+
+ feed = try_get(data, lambda x: x[3]) or []
+ return self.playlist_result(
+ entries, playlist_title=try_get(feed, lambda x: x[0]),
+ playlist_description=try_get(feed, lambda x: x[2]))
diff --git a/yt_dlp/extractor/googlesearch.py b/yt_dlp/extractor/googlesearch.py
new file mode 100644
index 0000000..67ca0e5
--- /dev/null
+++ b/yt_dlp/extractor/googlesearch.py
@@ -0,0 +1,38 @@
+import itertools
+import re
+
+from .common import SearchInfoExtractor
+
+
+class GoogleSearchIE(SearchInfoExtractor):
+ IE_DESC = 'Google Video search'
+ IE_NAME = 'video.google:search'
+ _SEARCH_KEY = 'gvsearch'
+ _TESTS = [{
+ 'url': 'gvsearch15:python language',
+ 'info_dict': {
+ 'id': 'python language',
+ 'title': 'python language',
+ },
+ 'playlist_count': 15,
+ }]
+ _PAGE_SIZE = 100
+
+ def _search_results(self, query):
+ for pagenum in itertools.count():
+ webpage = self._download_webpage(
+ 'http://www.google.com/search', f'gvsearch:{query}',
+ note=f'Downloading result page {pagenum + 1}',
+ query={
+ 'tbm': 'vid',
+ 'q': query,
+ 'start': pagenum * self._PAGE_SIZE,
+ 'num': self._PAGE_SIZE,
+ 'hl': 'en',
+ })
+
+ for url in re.findall(r'<div[^>]* class="dXiKIc"[^>]*><a href="([^"]+)"', webpage):
+ yield self.url_result(url)
+
+ if not re.search(r'id="pnnext"', webpage):
+ return
diff --git a/yt_dlp/extractor/goplay.py b/yt_dlp/extractor/goplay.py
new file mode 100644
index 0000000..74aad11
--- /dev/null
+++ b/yt_dlp/extractor/goplay.py
@@ -0,0 +1,433 @@
+import base64
+import binascii
+import datetime
+import hashlib
+import hmac
+import json
+import os
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ traverse_obj,
+ unescapeHTML,
+)
+
+
+class GoPlayIE(InfoExtractor):
+ _VALID_URL = r'https?://(www\.)?goplay\.be/video/([^/]+/[^/]+/|)(?P<display_id>[^/#]+)'
+
+ _NETRC_MACHINE = 'goplay'
+
+ _TESTS = [{
+ 'url': 'https://www.goplay.be/video/de-container-cup/de-container-cup-s3/de-container-cup-s3-aflevering-2#autoplay',
+ 'info_dict': {
+ 'id': '9c4214b8-e55d-4e4b-a446-f015f6c6f811',
+ 'ext': 'mp4',
+ 'title': 'S3 - Aflevering 2',
+ 'series': 'De Container Cup',
+ 'season': 'Season 3',
+ 'season_number': 3,
+ 'episode': 'Episode 2',
+ 'episode_number': 2,
+ },
+ 'skip': 'This video is only available for registered users'
+ }, {
+ 'url': 'https://www.goplay.be/video/a-family-for-thr-holidays-s1-aflevering-1#autoplay',
+ 'info_dict': {
+ 'id': '74e3ed07-748c-49e4-85a0-393a93337dbf',
+ 'ext': 'mp4',
+ 'title': 'A Family for the Holidays',
+ },
+ 'skip': 'This video is only available for registered users'
+ }, {
+ 'url': 'https://www.goplay.be/video/de-mol/de-mol-s11/de-mol-s11-aflevering-1#autoplay',
+ 'info_dict': {
+ 'id': '03eb8f2f-153e-41cb-9805-0d3a29dab656',
+ 'ext': 'mp4',
+ 'title': 'S11 - Aflevering 1',
+ 'episode': 'Episode 1',
+ 'series': 'De Mol',
+ 'season_number': 11,
+ 'episode_number': 1,
+ 'season': 'Season 11'
+ },
+ 'params': {
+ 'skip_download': True
+ },
+ 'skip': 'This video is only available for registered users'
+ }]
+
+ _id_token = None
+
+ def _perform_login(self, username, password):
+ self.report_login()
+ aws = AwsIdp(ie=self, pool_id='eu-west-1_dViSsKM5Y', client_id='6s1h851s8uplco5h6mqh1jac8m')
+ self._id_token, _ = aws.authenticate(username=username, password=password)
+
+ def _real_initialize(self):
+ if not self._id_token:
+ raise self.raise_login_required(method='password')
+
+ def _real_extract(self, url):
+ url, display_id = self._match_valid_url(url).group(0, 'display_id')
+ webpage = self._download_webpage(url, display_id)
+ video_data_json = self._html_search_regex(r'<div\s+data-hero="([^"]+)"', webpage, 'video_data')
+ video_data = self._parse_json(unescapeHTML(video_data_json), display_id).get('data')
+
+ movie = video_data.get('movie')
+ if movie:
+ video_id = movie['videoUuid']
+ info_dict = {
+ 'title': movie.get('title')
+ }
+ else:
+ episode = traverse_obj(video_data, ('playlists', ..., 'episodes', lambda _, v: v['pageInfo']['url'] == url), get_all=False)
+ video_id = episode['videoUuid']
+ info_dict = {
+ 'title': episode.get('episodeTitle'),
+ 'series': traverse_obj(episode, ('program', 'title')),
+ 'season_number': episode.get('seasonNumber'),
+ 'episode_number': episode.get('episodeNumber'),
+ }
+
+ api = self._download_json(
+ f'https://api.goplay.be/web/v1/videos/long-form/{video_id}',
+ video_id, headers={
+ 'Authorization': 'Bearer %s' % self._id_token,
+ **self.geo_verification_headers(),
+ })
+
+ if 'manifestUrls' in api:
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ api['manifestUrls']['hls'], video_id, ext='mp4', m3u8_id='HLS')
+
+ else:
+ if 'ssai' not in api:
+ raise ExtractorError('expecting Google SSAI stream')
+
+ ssai_content_source_id = api['ssai']['contentSourceID']
+ ssai_video_id = api['ssai']['videoID']
+
+ dai = self._download_json(
+ f'https://dai.google.com/ondemand/dash/content/{ssai_content_source_id}/vid/{ssai_video_id}/streams',
+ video_id, data=b'{"api-key":"null"}',
+ headers={'content-type': 'application/json'})
+
+ periods = self._extract_mpd_periods(dai['stream_manifest'], video_id)
+
+ # skip pre-roll and mid-roll ads
+ periods = [p for p in periods if '-ad-' not in p['id']]
+
+ formats, subtitles = self._merge_mpd_periods(periods)
+
+ info_dict.update({
+ 'id': video_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ })
+ return info_dict
+
+
+# Taken from https://github.com/add-ons/plugin.video.viervijfzes/blob/master/resources/lib/viervijfzes/auth_awsidp.py
+# Released into Public domain by https://github.com/michaelarnauts
+
+class InvalidLoginException(ExtractorError):
+ """ The login credentials are invalid """
+
+
+class AuthenticationException(ExtractorError):
+ """ Something went wrong while logging in """
+
+
+class AwsIdp:
+ """ AWS Identity Provider """
+
+ def __init__(self, ie, pool_id, client_id):
+ """
+ :param InfoExtrator ie: The extractor that instantiated this class.
+ :param str pool_id: The AWS user pool to connect to (format: <region>_<poolid>).
+ E.g.: eu-west-1_aLkOfYN3T
+ :param str client_id: The client application ID (the ID of the application connecting)
+ """
+
+ self.ie = ie
+
+ self.pool_id = pool_id
+ if "_" not in self.pool_id:
+ raise ValueError("Invalid pool_id format. Should be <region>_<poolid>.")
+
+ self.client_id = client_id
+ self.region = self.pool_id.split("_")[0]
+ self.url = "https://cognito-idp.%s.amazonaws.com/" % (self.region,)
+
+ # Initialize the values
+ # https://github.com/aws/amazon-cognito-identity-js/blob/master/src/AuthenticationHelper.js#L22
+ self.n_hex = 'FFFFFFFFFFFFFFFFC90FDAA22168C234C4C6628B80DC1CD1' + \
+ '29024E088A67CC74020BBEA63B139B22514A08798E3404DD' + \
+ 'EF9519B3CD3A431B302B0A6DF25F14374FE1356D6D51C245' + \
+ 'E485B576625E7EC6F44C42E9A637ED6B0BFF5CB6F406B7ED' + \
+ 'EE386BFB5A899FA5AE9F24117C4B1FE649286651ECE45B3D' + \
+ 'C2007CB8A163BF0598DA48361C55D39A69163FA8FD24CF5F' + \
+ '83655D23DCA3AD961C62F356208552BB9ED529077096966D' + \
+ '670C354E4ABC9804F1746C08CA18217C32905E462E36CE3B' + \
+ 'E39E772C180E86039B2783A2EC07A28FB5C55DF06F4C52C9' + \
+ 'DE2BCBF6955817183995497CEA956AE515D2261898FA0510' + \
+ '15728E5A8AAAC42DAD33170D04507A33A85521ABDF1CBA64' + \
+ 'ECFB850458DBEF0A8AEA71575D060C7DB3970F85A6E1E4C7' + \
+ 'ABF5AE8CDB0933D71E8C94E04A25619DCEE3D2261AD2EE6B' + \
+ 'F12FFA06D98A0864D87602733EC86A64521F2B18177B200C' + \
+ 'BBE117577A615D6C770988C0BAD946E208E24FA074E5AB31' + \
+ '43DB5BFCE0FD108E4B82D120A93AD2CAFFFFFFFFFFFFFFFF'
+
+ # https://github.com/aws/amazon-cognito-identity-js/blob/master/src/AuthenticationHelper.js#L49
+ self.g_hex = '2'
+ self.info_bits = bytearray('Caldera Derived Key', 'utf-8')
+
+ self.big_n = self.__hex_to_long(self.n_hex)
+ self.g = self.__hex_to_long(self.g_hex)
+ self.k = self.__hex_to_long(self.__hex_hash('00' + self.n_hex + '0' + self.g_hex))
+ self.small_a_value = self.__generate_random_small_a()
+ self.large_a_value = self.__calculate_a()
+
+ def authenticate(self, username, password):
+ """ Authenticate with a username and password. """
+ # Step 1: First initiate an authentication request
+ auth_data_dict = self.__get_authentication_request(username)
+ auth_data = json.dumps(auth_data_dict).encode("utf-8")
+ auth_headers = {
+ "X-Amz-Target": "AWSCognitoIdentityProviderService.InitiateAuth",
+ "Accept-Encoding": "identity",
+ "Content-Type": "application/x-amz-json-1.1"
+ }
+ auth_response_json = self.ie._download_json(
+ self.url, None, data=auth_data, headers=auth_headers,
+ note='Authenticating username', errnote='Invalid username')
+ challenge_parameters = auth_response_json.get("ChallengeParameters")
+
+ if auth_response_json.get("ChallengeName") != "PASSWORD_VERIFIER":
+ raise AuthenticationException(auth_response_json["message"])
+
+ # Step 2: Respond to the Challenge with a valid ChallengeResponse
+ challenge_request = self.__get_challenge_response_request(challenge_parameters, password)
+ challenge_data = json.dumps(challenge_request).encode("utf-8")
+ challenge_headers = {
+ "X-Amz-Target": "AWSCognitoIdentityProviderService.RespondToAuthChallenge",
+ "Content-Type": "application/x-amz-json-1.1"
+ }
+ auth_response_json = self.ie._download_json(
+ self.url, None, data=challenge_data, headers=challenge_headers,
+ note='Authenticating password', errnote='Invalid password')
+
+ if 'message' in auth_response_json:
+ raise InvalidLoginException(auth_response_json['message'])
+ return (
+ auth_response_json['AuthenticationResult']['IdToken'],
+ auth_response_json['AuthenticationResult']['RefreshToken']
+ )
+
+ def __get_authentication_request(self, username):
+ """
+
+ :param str username: The username to use
+
+ :return: A full Authorization request.
+ :rtype: dict
+ """
+ auth_request = {
+ "AuthParameters": {
+ "USERNAME": username,
+ "SRP_A": self.__long_to_hex(self.large_a_value)
+ },
+ "AuthFlow": "USER_SRP_AUTH",
+ "ClientId": self.client_id
+ }
+ return auth_request
+
+ def __get_challenge_response_request(self, challenge_parameters, password):
+ """ Create a Challenge Response Request object.
+
+ :param dict[str,str|imt] challenge_parameters: The parameters for the challenge.
+ :param str password: The password.
+
+ :return: A valid and full request data object to use as a response for a challenge.
+ :rtype: dict
+ """
+ user_id = challenge_parameters["USERNAME"]
+ user_id_for_srp = challenge_parameters["USER_ID_FOR_SRP"]
+ srp_b = challenge_parameters["SRP_B"]
+ salt = challenge_parameters["SALT"]
+ secret_block = challenge_parameters["SECRET_BLOCK"]
+
+ timestamp = self.__get_current_timestamp()
+
+ # Get a HKDF key for the password, SrpB and the Salt
+ hkdf = self.__get_hkdf_key_for_password(
+ user_id_for_srp,
+ password,
+ self.__hex_to_long(srp_b),
+ salt
+ )
+ secret_block_bytes = base64.standard_b64decode(secret_block)
+
+ # the message is a combo of the pool_id, provided SRP userId, the Secret and Timestamp
+ msg = \
+ bytearray(self.pool_id.split('_')[1], 'utf-8') + \
+ bytearray(user_id_for_srp, 'utf-8') + \
+ bytearray(secret_block_bytes) + \
+ bytearray(timestamp, 'utf-8')
+ hmac_obj = hmac.new(hkdf, msg, digestmod=hashlib.sha256)
+ signature_string = base64.standard_b64encode(hmac_obj.digest()).decode('utf-8')
+ challenge_request = {
+ "ChallengeResponses": {
+ "USERNAME": user_id,
+ "TIMESTAMP": timestamp,
+ "PASSWORD_CLAIM_SECRET_BLOCK": secret_block,
+ "PASSWORD_CLAIM_SIGNATURE": signature_string
+ },
+ "ChallengeName": "PASSWORD_VERIFIER",
+ "ClientId": self.client_id
+ }
+ return challenge_request
+
+ def __get_hkdf_key_for_password(self, username, password, server_b_value, salt):
+ """ Calculates the final hkdf based on computed S value, and computed U value and the key.
+
+ :param str username: Username.
+ :param str password: Password.
+ :param int server_b_value: Server B value.
+ :param int salt: Generated salt.
+
+ :return Computed HKDF value.
+ :rtype: object
+ """
+
+ u_value = self.__calculate_u(self.large_a_value, server_b_value)
+ if u_value == 0:
+ raise ValueError('U cannot be zero.')
+ username_password = '%s%s:%s' % (self.pool_id.split('_')[1], username, password)
+ username_password_hash = self.__hash_sha256(username_password.encode('utf-8'))
+
+ x_value = self.__hex_to_long(self.__hex_hash(self.__pad_hex(salt) + username_password_hash))
+ g_mod_pow_xn = pow(self.g, x_value, self.big_n)
+ int_value2 = server_b_value - self.k * g_mod_pow_xn
+ s_value = pow(int_value2, self.small_a_value + u_value * x_value, self.big_n)
+ hkdf = self.__compute_hkdf(
+ bytearray.fromhex(self.__pad_hex(s_value)),
+ bytearray.fromhex(self.__pad_hex(self.__long_to_hex(u_value)))
+ )
+ return hkdf
+
+ def __compute_hkdf(self, ikm, salt):
+ """ Standard hkdf algorithm
+
+ :param {Buffer} ikm Input key material.
+ :param {Buffer} salt Salt value.
+ :return {Buffer} Strong key material.
+ """
+
+ prk = hmac.new(salt, ikm, hashlib.sha256).digest()
+ info_bits_update = self.info_bits + bytearray(chr(1), 'utf-8')
+ hmac_hash = hmac.new(prk, info_bits_update, hashlib.sha256).digest()
+ return hmac_hash[:16]
+
+ def __calculate_u(self, big_a, big_b):
+ """ Calculate the client's value U which is the hash of A and B
+
+ :param int big_a: Large A value.
+ :param int big_b: Server B value.
+
+ :return Computed U value.
+ :rtype: int
+ """
+
+ u_hex_hash = self.__hex_hash(self.__pad_hex(big_a) + self.__pad_hex(big_b))
+ return self.__hex_to_long(u_hex_hash)
+
+ def __generate_random_small_a(self):
+ """ Helper function to generate a random big integer
+
+ :return a random value.
+ :rtype: int
+ """
+ random_long_int = self.__get_random(128)
+ return random_long_int % self.big_n
+
+ def __calculate_a(self):
+ """ Calculate the client's public value A = g^a%N with the generated random number a
+
+ :return Computed large A.
+ :rtype: int
+ """
+
+ big_a = pow(self.g, self.small_a_value, self.big_n)
+ # safety check
+ if (big_a % self.big_n) == 0:
+ raise ValueError('Safety check for A failed')
+ return big_a
+
+ @staticmethod
+ def __long_to_hex(long_num):
+ return '%x' % long_num
+
+ @staticmethod
+ def __hex_to_long(hex_string):
+ return int(hex_string, 16)
+
+ @staticmethod
+ def __hex_hash(hex_string):
+ return AwsIdp.__hash_sha256(bytearray.fromhex(hex_string))
+
+ @staticmethod
+ def __hash_sha256(buf):
+ """AuthenticationHelper.hash"""
+ digest = hashlib.sha256(buf).hexdigest()
+ return (64 - len(digest)) * '0' + digest
+
+ @staticmethod
+ def __pad_hex(long_int):
+ """ Converts a Long integer (or hex string) to hex format padded with zeroes for hashing
+
+ :param int|str long_int: Number or string to pad.
+
+ :return Padded hex string.
+ :rtype: str
+ """
+
+ if not isinstance(long_int, str):
+ hash_str = AwsIdp.__long_to_hex(long_int)
+ else:
+ hash_str = long_int
+ if len(hash_str) % 2 == 1:
+ hash_str = '0%s' % hash_str
+ elif hash_str[0] in '89ABCDEFabcdef':
+ hash_str = '00%s' % hash_str
+ return hash_str
+
+ @staticmethod
+ def __get_random(nbytes):
+ random_hex = binascii.hexlify(os.urandom(nbytes))
+ return AwsIdp.__hex_to_long(random_hex)
+
+ @staticmethod
+ def __get_current_timestamp():
+ """ Creates a timestamp with the correct English format.
+
+ :return: timestamp in format 'Sun Jan 27 19:00:04 UTC 2019'
+ :rtype: str
+ """
+
+ # We need US only data, so we cannot just do a strftime:
+ # Sun Jan 27 19:00:04 UTC 2019
+ months = [None, 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
+ days = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
+
+ time_now = datetime.datetime.now(datetime.timezone.utc)
+ format_string = "{} {} {} %H:%M:%S UTC %Y".format(days[time_now.weekday()], months[time_now.month], time_now.day)
+ time_string = time_now.strftime(format_string)
+ return time_string
+
+ def __str__(self):
+ return "AWS IDP Client for:\nRegion: %s\nPoolId: %s\nAppId: %s" % (
+ self.region, self.pool_id.split("_")[1], self.client_id
+ )
diff --git a/yt_dlp/extractor/gopro.py b/yt_dlp/extractor/gopro.py
new file mode 100644
index 0000000..ec1595b
--- /dev/null
+++ b/yt_dlp/extractor/gopro.py
@@ -0,0 +1,105 @@
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ remove_end,
+ str_or_none,
+ try_get,
+ unified_timestamp,
+ url_or_none,
+)
+
+
+class GoProIE(InfoExtractor):
+ _VALID_URL = r'https?://(www\.)?gopro\.com/v/(?P<id>[A-Za-z0-9]+)'
+
+ _TESTS = [{
+ 'url': 'https://gopro.com/v/ZNVvED8QDzR5V',
+ 'info_dict': {
+ 'id': 'ZNVvED8QDzR5V',
+ 'title': 'My GoPro Adventure - 9/19/21',
+ 'thumbnail': r're:https?://.+',
+ 'ext': 'mp4',
+ 'timestamp': 1632072947,
+ 'upload_date': '20210919',
+ 'uploader_id': 'fireydive30018',
+ 'duration': 396062,
+ }
+ }, {
+ 'url': 'https://gopro.com/v/KRm6Vgp2peg4e',
+ 'info_dict': {
+ 'id': 'KRm6Vgp2peg4e',
+ 'title': 'じゃがいも カリカリ オーブン焼き',
+ 'thumbnail': r're:https?://.+',
+ 'ext': 'mp4',
+ 'timestamp': 1607231125,
+ 'upload_date': '20201206',
+ 'uploader_id': 'dc9bcb8b-47d2-47c6-afbc-4c48f9a3769e',
+ 'duration': 45187,
+ 'track': 'The Sky Machine',
+ }
+ }, {
+ 'url': 'https://gopro.com/v/kVrK9wlJvBMwn',
+ 'info_dict': {
+ 'id': 'kVrK9wlJvBMwn',
+ 'title': 'DARKNESS',
+ 'thumbnail': r're:https?://.+',
+ 'ext': 'mp4',
+ 'timestamp': 1594183735,
+ 'upload_date': '20200708',
+ 'uploader_id': '闇夜乃皇帝',
+ 'duration': 313075,
+ 'track': 'Battery (Live)',
+ 'artist': 'Metallica',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ metadata = self._search_json(
+ r'window\.__reflectData\s*=', webpage, 'metadata', video_id)
+
+ video_info = metadata['collectionMedia'][0]
+ media_data = self._download_json(
+ 'https://api.gopro.com/media/%s/download' % video_info['id'], video_id)
+
+ formats = []
+ for fmt in try_get(media_data, lambda x: x['_embedded']['variations']) or []:
+ format_url = url_or_none(fmt.get('url'))
+ if not format_url:
+ continue
+ formats.append({
+ 'url': format_url,
+ 'format_id': str_or_none(fmt.get('quality')),
+ 'format_note': str_or_none(fmt.get('label')),
+ 'ext': str_or_none(fmt.get('type')),
+ 'width': int_or_none(fmt.get('width')),
+ 'height': int_or_none(fmt.get('height')),
+ })
+
+ title = str_or_none(
+ try_get(metadata, lambda x: x['collection']['title'])
+ or self._html_search_meta(['og:title', 'twitter:title'], webpage)
+ or remove_end(self._html_search_regex(
+ r'<title[^>]*>([^<]+)</title>', webpage, 'title', fatal=False), ' | GoPro'))
+ if title:
+ title = title.replace('\n', ' ')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': url_or_none(
+ self._html_search_meta(['og:image', 'twitter:image'], webpage)),
+ 'timestamp': unified_timestamp(
+ try_get(metadata, lambda x: x['collection']['created_at'])),
+ 'uploader_id': str_or_none(
+ try_get(metadata, lambda x: x['account']['nickname'])),
+ 'duration': int_or_none(
+ video_info.get('source_duration')),
+ 'artist': str_or_none(
+ video_info.get('music_track_artist')) or None,
+ 'track': str_or_none(
+ video_info.get('music_track_name')) or None,
+ }
diff --git a/yt_dlp/extractor/goshgay.py b/yt_dlp/extractor/goshgay.py
new file mode 100644
index 0000000..9a1f32b
--- /dev/null
+++ b/yt_dlp/extractor/goshgay.py
@@ -0,0 +1,48 @@
+from .common import InfoExtractor
+from ..compat import (
+ compat_parse_qs,
+)
+from ..utils import (
+ parse_duration,
+)
+
+
+class GoshgayIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?goshgay\.com/video(?P<id>\d+?)($|/)'
+ _TEST = {
+ 'url': 'http://www.goshgay.com/video299069/diesel_sfw_xxx_video',
+ 'md5': '4b6db9a0a333142eb9f15913142b0ed1',
+ 'info_dict': {
+ 'id': '299069',
+ 'ext': 'flv',
+ 'title': 'DIESEL SFW XXX Video',
+ 'thumbnail': r're:^http://.*\.jpg$',
+ 'duration': 80,
+ 'age_limit': 18,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_regex(
+ r'<h2>(.*?)<', webpage, 'title')
+ duration = parse_duration(self._html_search_regex(
+ r'<span class="duration">\s*-?\s*(.*?)</span>',
+ webpage, 'duration', fatal=False))
+
+ flashvars = compat_parse_qs(self._html_search_regex(
+ r'<embed.+?id="flash-player-embed".+?flashvars="([^"]+)"',
+ webpage, 'flashvars'))
+ thumbnail = flashvars.get('url_bigthumb', [None])[0]
+ video_url = flashvars['flv_url'][0]
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'age_limit': 18,
+ }
diff --git a/yt_dlp/extractor/gotostage.py b/yt_dlp/extractor/gotostage.py
new file mode 100644
index 0000000..112293b
--- /dev/null
+++ b/yt_dlp/extractor/gotostage.py
@@ -0,0 +1,70 @@
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ try_get,
+ url_or_none
+)
+
+import json
+
+
+class GoToStageIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?gotostage\.com/channel/[a-z0-9]+/recording/(?P<id>[a-z0-9]+)/watch'
+ _TESTS = [{
+ 'url': 'https://www.gotostage.com/channel/8901680603948959494/recording/60bb55548d434f21b9ce4f0e225c4895/watch',
+ 'md5': 'ca72ce990cdcd7a2bd152f7217e319a2',
+ 'info_dict': {
+ 'id': '60bb55548d434f21b9ce4f0e225c4895',
+ 'ext': 'mp4',
+ 'title': 'What is GoToStage?',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 93.924711
+ }
+ }, {
+ 'url': 'https://www.gotostage.com/channel/bacc3d3535b34bafacc3f4ef8d4df78a/recording/831e74cd3e0042be96defba627b6f676/watch?source=HOMEPAGE',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ metadata = self._download_json(
+ 'https://api.gotostage.com/contents?ids=%s' % video_id,
+ video_id,
+ note='Downloading video metadata',
+ errnote='Unable to download video metadata')[0]
+
+ registration_data = {
+ 'product': metadata['product'],
+ 'resourceType': metadata['contentType'],
+ 'productReferenceKey': metadata['productRefKey'],
+ 'firstName': 'foo',
+ 'lastName': 'bar',
+ 'email': 'foobar@example.com'
+ }
+
+ registration_response = self._download_json(
+ 'https://api-registrations.logmeininc.com/registrations',
+ video_id,
+ data=json.dumps(registration_data).encode(),
+ expected_status=409,
+ headers={'Content-Type': 'application/json'},
+ note='Register user',
+ errnote='Unable to register user')
+
+ content_response = self._download_json(
+ 'https://api.gotostage.com/contents/%s/asset' % video_id,
+ video_id,
+ headers={'x-registrantkey': registration_response['registrationKey']},
+ note='Get download url',
+ errnote='Unable to get download url')
+
+ return {
+ 'id': video_id,
+ 'title': try_get(metadata, lambda x: x['title'], compat_str),
+ 'url': try_get(content_response, lambda x: x['cdnLocation'], compat_str),
+ 'ext': 'mp4',
+ 'thumbnail': url_or_none(try_get(metadata, lambda x: x['thumbnail']['location'])),
+ 'duration': try_get(metadata, lambda x: x['duration'], float),
+ 'categories': [try_get(metadata, lambda x: x['category'], compat_str)],
+ 'is_live': False
+ }
diff --git a/yt_dlp/extractor/gputechconf.py b/yt_dlp/extractor/gputechconf.py
new file mode 100644
index 0000000..2d13bf4
--- /dev/null
+++ b/yt_dlp/extractor/gputechconf.py
@@ -0,0 +1,32 @@
+from .common import InfoExtractor
+
+
+class GPUTechConfIE(InfoExtractor):
+ _VALID_URL = r'https?://on-demand\.gputechconf\.com/gtc/2015/video/S(?P<id>\d+)\.html'
+ _TEST = {
+ 'url': 'http://on-demand.gputechconf.com/gtc/2015/video/S5156.html',
+ 'md5': 'a8862a00a0fd65b8b43acc5b8e33f798',
+ 'info_dict': {
+ 'id': '5156',
+ 'ext': 'mp4',
+ 'title': 'Coordinating More Than 3 Million CUDA Threads for Social Network Analysis',
+ 'duration': 1219,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ root_path = self._search_regex(
+ r'var\s+rootPath\s*=\s*"([^"]+)', webpage, 'root path',
+ default='http://evt.dispeak.com/nvidia/events/gtc15/')
+ xml_file_id = self._search_regex(
+ r'var\s+xmlFileId\s*=\s*"([^"]+)', webpage, 'xml file id')
+
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'url': '%sxml/%s.xml' % (root_path, xml_file_id),
+ 'ie_key': 'DigitallySpeaking',
+ }
diff --git a/yt_dlp/extractor/gronkh.py b/yt_dlp/extractor/gronkh.py
new file mode 100644
index 0000000..1ae0a68
--- /dev/null
+++ b/yt_dlp/extractor/gronkh.py
@@ -0,0 +1,120 @@
+import functools
+
+from .common import InfoExtractor
+from ..utils import (
+ OnDemandPagedList,
+ float_or_none,
+ traverse_obj,
+ unified_strdate,
+)
+
+
+class GronkhIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?gronkh\.tv/(?:watch/)?streams?/(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'https://gronkh.tv/streams/657',
+ 'info_dict': {
+ 'id': '657',
+ 'ext': 'mp4',
+ 'title': 'H.O.R.D.E. - DAS ZWEiTE ZEiTALTER 🎲 Session 1',
+ 'view_count': int,
+ 'thumbnail': 'https://01.cdn.vod.farm/preview/9e2555d3a23bf4e5c5b7c6b3b70a9d84.jpg',
+ 'upload_date': '20221111',
+ 'chapters': 'count:3',
+ 'duration': 31463,
+ },
+ 'params': {'skip_download': True}
+ }, {
+ 'url': 'https://gronkh.tv/stream/536',
+ 'info_dict': {
+ 'id': '536',
+ 'ext': 'mp4',
+ 'title': 'GTV0536, 2021-10-01 - MARTHA IS DEAD #FREiAB1830 !FF7 !horde !archiv',
+ 'view_count': int,
+ 'thumbnail': 'https://01.cdn.vod.farm/preview/6436746cce14e25f751260a692872b9b.jpg',
+ 'upload_date': '20211001',
+ 'duration': 32058,
+ },
+ 'params': {'skip_download': True}
+ }, {
+ 'url': 'https://gronkh.tv/watch/stream/546',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ data_json = self._download_json(f'https://api.gronkh.tv/v1/video/info?episode={id}', id)
+ m3u8_url = self._download_json(f'https://api.gronkh.tv/v1/video/playlist?episode={id}', id)['playlist_url']
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, id)
+ if data_json.get('vtt_url'):
+ subtitles.setdefault('en', []).append({
+ 'url': data_json['vtt_url'],
+ 'ext': 'vtt',
+ })
+ return {
+ 'id': id,
+ 'title': data_json.get('title'),
+ 'view_count': data_json.get('views'),
+ 'thumbnail': data_json.get('preview_url'),
+ 'upload_date': unified_strdate(data_json.get('created_at')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'duration': float_or_none(data_json.get('source_length')),
+ 'chapters': traverse_obj(data_json, (
+ 'chapters', lambda _, v: float_or_none(v['offset']) is not None, {
+ 'title': 'title',
+ 'start_time': ('offset', {float_or_none}),
+ })) or None,
+ }
+
+
+class GronkhFeedIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?gronkh\.tv(?:/feed)?/?(?:#|$)'
+ IE_NAME = 'gronkh:feed'
+
+ _TESTS = [{
+ 'url': 'https://gronkh.tv/feed',
+ 'info_dict': {
+ 'id': 'feed',
+ },
+ 'playlist_count': 16,
+ }, {
+ 'url': 'https://gronkh.tv',
+ 'only_matching': True,
+ }]
+
+ def _entries(self):
+ for type_ in ('recent', 'views'):
+ info = self._download_json(
+ f'https://api.gronkh.tv/v1/video/discovery/{type_}', 'feed', note=f'Downloading {type_} API JSON')
+ for item in traverse_obj(info, ('discovery', ...)) or []:
+ yield self.url_result(f'https://gronkh.tv/watch/stream/{item["episode"]}', GronkhIE, item.get('title'))
+
+ def _real_extract(self, url):
+ return self.playlist_result(self._entries(), 'feed')
+
+
+class GronkhVodsIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?gronkh\.tv/vods/streams/?(?:#|$)'
+ IE_NAME = 'gronkh:vods'
+
+ _TESTS = [{
+ 'url': 'https://gronkh.tv/vods/streams',
+ 'info_dict': {
+ 'id': 'vods',
+ },
+ 'playlist_mincount': 150,
+ }]
+ _PER_PAGE = 25
+
+ def _fetch_page(self, page):
+ items = traverse_obj(self._download_json(
+ 'https://api.gronkh.tv/v1/search', 'vods', query={'offset': self._PER_PAGE * page, 'first': self._PER_PAGE},
+ note=f'Downloading stream video page {page + 1}'), ('results', 'videos', ...))
+ for item in items or []:
+ yield self.url_result(f'https://gronkh.tv/watch/stream/{item["episode"]}', GronkhIE, item['episode'], item.get('title'))
+
+ def _real_extract(self, url):
+ entries = OnDemandPagedList(functools.partial(self._fetch_page), self._PER_PAGE)
+ return self.playlist_result(entries, 'vods')
diff --git a/yt_dlp/extractor/groupon.py b/yt_dlp/extractor/groupon.py
new file mode 100644
index 0000000..c1cbda3
--- /dev/null
+++ b/yt_dlp/extractor/groupon.py
@@ -0,0 +1,64 @@
+from .common import InfoExtractor
+
+
+class GrouponIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?groupon\.com/deals/(?P<id>[^/?#&]+)'
+
+ _TEST = {
+ 'url': 'https://www.groupon.com/deals/bikram-yoga-huntington-beach-2#ooid=tubGNycTo_9Uxg82uESj4i61EYX8nyuf',
+ 'info_dict': {
+ 'id': 'bikram-yoga-huntington-beach-2',
+ 'title': '$49 for 10 Yoga Classes or One Month of Unlimited Classes at Bikram Yoga Huntington Beach ($180 Value)',
+ 'description': 'Studio kept at 105 degrees and 40% humidity with anti-microbial and anti-slip Flotex flooring; certified instructors',
+ },
+ 'playlist': [{
+ 'md5': '42428ce8a00585f9bc36e49226eae7a1',
+ 'info_dict': {
+ 'id': 'fk6OhWpXgIQ',
+ 'ext': 'mp4',
+ 'title': 'Bikram Yoga Huntington Beach | Orange County !tubGNycTo@9Uxg82uESj4i61EYX8nyuf',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'duration': 45,
+ 'upload_date': '20160405',
+ 'uploader_id': 'groupon',
+ 'uploader': 'Groupon',
+ },
+ 'add_ie': ['Youtube'],
+ }],
+ 'params': {
+ 'skip_download': True,
+ },
+ }
+
+ _PROVIDERS = {
+ 'youtube': ('%s', 'Youtube'),
+ }
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ webpage = self._download_webpage(url, playlist_id)
+
+ payload = self._parse_json(self._search_regex(
+ r'(?:var\s+|window\.)payload\s*=\s*(.*?);\n', webpage, 'payload'), playlist_id)
+ videos = payload['carousel'].get('dealVideos', [])
+ entries = []
+ for v in videos:
+ provider = v.get('provider')
+ video_id = v.get('media') or v.get('id') or v.get('baseURL')
+ if not provider or not video_id:
+ continue
+ url_pattern, ie_key = self._PROVIDERS.get(provider.lower())
+ if not url_pattern:
+ self.report_warning(
+ '%s: Unsupported video provider %s, skipping video' %
+ (playlist_id, provider))
+ continue
+ entries.append(self.url_result(url_pattern % video_id, ie_key))
+
+ return {
+ '_type': 'playlist',
+ 'id': playlist_id,
+ 'entries': entries,
+ 'title': self._og_search_title(webpage),
+ 'description': self._og_search_description(webpage),
+ }
diff --git a/yt_dlp/extractor/harpodeon.py b/yt_dlp/extractor/harpodeon.py
new file mode 100644
index 0000000..46eaddb
--- /dev/null
+++ b/yt_dlp/extractor/harpodeon.py
@@ -0,0 +1,70 @@
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class HarpodeonIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?harpodeon\.com/(?:video|preview)/\w+/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.harpodeon.com/video/The_Smoking_Out_of_Bella_Butts/268068288',
+ 'md5': '727371564a6a9ebccef2073535b5b6bd',
+ 'skip': 'Free video could become unavailable',
+ 'info_dict': {
+ 'id': '268068288',
+ 'ext': 'mp4',
+ 'title': 'The Smoking Out of Bella Butts',
+ 'description': 'md5:47e16bdb41fc8a79c83ab83af11c8b77',
+ 'creator': 'Vitagraph Company of America',
+ 'release_year': 1915,
+ }
+ }, {
+ 'url': 'https://www.harpodeon.com/preview/The_Smoking_Out_of_Bella_Butts/268068288',
+ 'md5': '6dfea5412845f690c7331be703f884db',
+ 'info_dict': {
+ 'id': '268068288',
+ 'ext': 'mp4',
+ 'title': 'The Smoking Out of Bella Butts',
+ 'description': 'md5:47e16bdb41fc8a79c83ab83af11c8b77',
+ 'creator': 'Vitagraph Company of America',
+ 'release_year': 1915,
+ }
+ }, {
+ 'url': 'https://www.harpodeon.com/preview/Behind_the_Screen/421838710',
+ 'md5': '7979df9ca04637282cb7d172ab3a9c3b',
+ 'info_dict': {
+ 'id': '421838710',
+ 'ext': 'mp4',
+ 'title': 'Behind the Screen',
+ 'description': 'md5:008972a3dc51fba3965ee517d2ba9155',
+ 'creator': 'Lone Star Corporation',
+ 'release_year': 1916,
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title, creator, release_year = self._search_regex(
+ r'''(?x)
+ <div[^>]+videoInfo[^<]*<h2[^>]*>(?P<title>[^>]+)</h2>
+ (?:\s*<p[^>]*>\((?P<creator>.+),\s*)?(?P<release_year>\d{4})?''',
+ webpage, 'title', group=('title', 'creator', 'release_year'),
+ fatal=False) or (None, None, None)
+
+ hp_base = self._html_search_regex(r'hpBase\(\s*["\']([^"\']+)', webpage, 'hp_base')
+
+ hp_inject_video, hp_resolution = self._search_regex(
+ r'''(?x)
+ hpInjectVideo\([\'\"](?P<hp_inject_video>\w+)[\'\"],
+ [\'\"](?P<hp_resolution>\d+)[\'\"]''',
+ webpage, 'hp_inject_video', group=['hp_inject_video', 'hp_resolution'])
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'url': f'{hp_base}{hp_inject_video}_{hp_resolution}.mp4',
+ 'http_headers': {'Referer': url},
+ 'description': self._html_search_meta('description', webpage, fatal=False),
+ 'creator': creator,
+ 'release_year': int_or_none(release_year),
+ }
diff --git a/yt_dlp/extractor/hbo.py b/yt_dlp/extractor/hbo.py
new file mode 100644
index 0000000..530bdb7
--- /dev/null
+++ b/yt_dlp/extractor/hbo.py
@@ -0,0 +1,171 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ xpath_text,
+ xpath_element,
+ int_or_none,
+ parse_duration,
+ urljoin,
+)
+
+
+class HBOBaseIE(InfoExtractor):
+ _FORMATS_INFO = {
+ 'pro7': {
+ 'width': 1280,
+ 'height': 720,
+ },
+ '1920': {
+ 'width': 1280,
+ 'height': 720,
+ },
+ 'pro6': {
+ 'width': 768,
+ 'height': 432,
+ },
+ '640': {
+ 'width': 768,
+ 'height': 432,
+ },
+ 'pro5': {
+ 'width': 640,
+ 'height': 360,
+ },
+ 'highwifi': {
+ 'width': 640,
+ 'height': 360,
+ },
+ 'high3g': {
+ 'width': 640,
+ 'height': 360,
+ },
+ 'medwifi': {
+ 'width': 400,
+ 'height': 224,
+ },
+ 'med3g': {
+ 'width': 400,
+ 'height': 224,
+ },
+ }
+
+ def _extract_info(self, url, display_id):
+ video_data = self._download_xml(url, display_id)
+ video_id = xpath_text(video_data, 'id', fatal=True)
+ episode_title = title = xpath_text(video_data, 'title', fatal=True)
+ series = xpath_text(video_data, 'program')
+ if series:
+ title = '%s - %s' % (series, title)
+
+ formats = []
+ for source in xpath_element(video_data, 'videos', 'sources', True):
+ if source.tag == 'size':
+ path = xpath_text(source, './/path')
+ if not path:
+ continue
+ width = source.attrib.get('width')
+ format_info = self._FORMATS_INFO.get(width, {})
+ height = format_info.get('height')
+ fmt = {
+ 'url': path,
+ 'format_id': 'http%s' % ('-%dp' % height if height else ''),
+ 'width': format_info.get('width'),
+ 'height': height,
+ }
+ rtmp = re.search(r'^(?P<url>rtmpe?://[^/]+/(?P<app>.+))/(?P<playpath>mp4:.+)$', path)
+ if rtmp:
+ fmt.update({
+ 'url': rtmp.group('url'),
+ 'play_path': rtmp.group('playpath'),
+ 'app': rtmp.group('app'),
+ 'ext': 'flv',
+ 'format_id': fmt['format_id'].replace('http', 'rtmp'),
+ })
+ formats.append(fmt)
+ else:
+ video_url = source.text
+ if not video_url:
+ continue
+ if source.tag == 'tarball':
+ formats.extend(self._extract_m3u8_formats(
+ video_url.replace('.tar', '/base_index_w8.m3u8'),
+ video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
+ elif source.tag == 'hls':
+ m3u8_formats = self._extract_m3u8_formats(
+ video_url.replace('.tar', '/base_index.m3u8'),
+ video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
+ for f in m3u8_formats:
+ if f.get('vcodec') == 'none' and not f.get('tbr'):
+ f['tbr'] = int_or_none(self._search_regex(
+ r'-(\d+)k/', f['url'], 'tbr', default=None))
+ formats.extend(m3u8_formats)
+ elif source.tag == 'dash':
+ formats.extend(self._extract_mpd_formats(
+ video_url.replace('.tar', '/manifest.mpd'),
+ video_id, mpd_id='dash', fatal=False))
+ else:
+ format_info = self._FORMATS_INFO.get(source.tag, {})
+ formats.append({
+ 'format_id': 'http-%s' % source.tag,
+ 'url': video_url,
+ 'width': format_info.get('width'),
+ 'height': format_info.get('height'),
+ })
+
+ thumbnails = []
+ card_sizes = xpath_element(video_data, 'titleCardSizes')
+ if card_sizes is not None:
+ for size in card_sizes:
+ path = xpath_text(size, 'path')
+ if not path:
+ continue
+ width = int_or_none(size.get('width'))
+ thumbnails.append({
+ 'id': width,
+ 'url': path,
+ 'width': width,
+ })
+
+ subtitles = None
+ caption_url = xpath_text(video_data, 'captionUrl')
+ if caption_url:
+ subtitles = {
+ 'en': [{
+ 'url': caption_url,
+ 'ext': 'ttml'
+ }],
+ }
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'duration': parse_duration(xpath_text(video_data, 'duration/tv14')),
+ 'series': series,
+ 'episode': episode_title,
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'subtitles': subtitles,
+ }
+
+
+class HBOIE(HBOBaseIE):
+ IE_NAME = 'hbo'
+ _VALID_URL = r'https?://(?:www\.)?hbo\.com/(?:video|embed)(?:/[^/]+)*/(?P<id>[^/?#]+)'
+ _TEST = {
+ 'url': 'https://www.hbo.com/video/game-of-thrones/seasons/season-8/videos/trailer',
+ 'md5': '8126210656f433c452a21367f9ad85b3',
+ 'info_dict': {
+ 'id': '22113301',
+ 'ext': 'mp4',
+ 'title': 'Game of Thrones - Trailer',
+ },
+ 'expected_warnings': ['Unknown MIME type application/mp4 in DASH manifest'],
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ location_path = self._parse_json(self._html_search_regex(
+ r'data-state="({.+?})"', webpage, 'state'), display_id)['video']['locationUrl']
+ return self._extract_info(urljoin(url, location_path), display_id)
diff --git a/yt_dlp/extractor/hearthisat.py b/yt_dlp/extractor/hearthisat.py
new file mode 100644
index 0000000..d1a400d
--- /dev/null
+++ b/yt_dlp/extractor/hearthisat.py
@@ -0,0 +1,96 @@
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ KNOWN_EXTENSIONS,
+ str_to_int,
+)
+
+
+class HearThisAtIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?hearthis\.at/(?P<artist>[^/]+)/(?P<title>[A-Za-z0-9\-]+)/?$'
+ _PLAYLIST_URL = 'https://hearthis.at/playlist.php'
+ _TESTS = [{
+ 'url': 'https://hearthis.at/moofi/dr-kreep',
+ 'md5': 'ab6ec33c8fed6556029337c7885eb4e0',
+ 'info_dict': {
+ 'id': '150939',
+ 'ext': 'wav',
+ 'title': 'Moofi - Dr. Kreep',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1421564134,
+ 'description': 'md5:1adb0667b01499f9d27e97ddfd53852a',
+ 'upload_date': '20150118',
+ 'view_count': int,
+ 'duration': 71,
+ 'genre': 'Experimental',
+ }
+ }, {
+ # 'download' link redirects to the original webpage
+ 'url': 'https://hearthis.at/twitchsf/dj-jim-hopkins-totally-bitchin-80s-dance-mix/',
+ 'md5': '5980ceb7c461605d30f1f039df160c6e',
+ 'info_dict': {
+ 'id': '811296',
+ 'ext': 'mp3',
+ 'title': 'TwitchSF - DJ Jim Hopkins - Totally Bitchin\' 80\'s Dance Mix!',
+ 'description': 'md5:ef26815ca8f483272a87b137ff175be2',
+ 'upload_date': '20160328',
+ 'timestamp': 1459186146,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'view_count': int,
+ 'duration': 4360,
+ 'genre': 'Dance',
+ },
+ }]
+
+ def _real_extract(self, url):
+ m = self._match_valid_url(url)
+ display_id = '{artist:s} - {title:s}'.format(**m.groupdict())
+ api_url = url.replace('www.', '').replace('hearthis.at', 'api-v2.hearthis.at')
+ data_json = self._download_json(api_url, display_id)
+ track_id = data_json.get('id')
+ artist_json = data_json.get('user')
+ title = '{} - {}'.format(artist_json.get('username'), data_json.get('title'))
+ genre = data_json.get('genre')
+ description = data_json.get('description')
+ thumbnail = data_json.get('artwork_url') or data_json.get('thumb')
+ view_count = str_to_int(data_json.get('playback_count'))
+ duration = str_to_int(data_json.get('duration'))
+ timestamp = data_json.get('release_timestamp')
+
+ formats = []
+ mp3_url = data_json.get('stream_url')
+
+ if mp3_url:
+ formats.append({
+ 'format_id': 'mp3',
+ 'vcodec': 'none',
+ 'acodec': 'mp3',
+ 'url': mp3_url,
+ 'ext': 'mp3',
+ })
+
+ if data_json.get('download_url'):
+ download_url = data_json['download_url']
+ ext = determine_ext(data_json['download_filename'])
+ if ext in KNOWN_EXTENSIONS:
+ formats.append({
+ 'format_id': ext,
+ 'vcodec': 'none',
+ 'ext': ext,
+ 'url': download_url,
+ 'acodec': ext,
+ 'quality': 2, # Usually better quality
+ })
+
+ return {
+ 'id': track_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': thumbnail,
+ 'description': description,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'view_count': view_count,
+ 'genre': genre,
+ }
diff --git a/yt_dlp/extractor/heise.py b/yt_dlp/extractor/heise.py
new file mode 100644
index 0000000..27d737c
--- /dev/null
+++ b/yt_dlp/extractor/heise.py
@@ -0,0 +1,207 @@
+import urllib.parse
+
+from .common import InfoExtractor
+from .kaltura import KalturaIE
+from .youtube import YoutubeIE
+from ..utils import (
+ NO_DEFAULT,
+ determine_ext,
+ int_or_none,
+ parse_iso8601,
+ smuggle_url,
+ xpath_text,
+)
+
+
+class HeiseIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?heise\.de/(?:[^/]+/)+[^/]+-(?P<id>[0-9]+)\.html'
+ _TESTS = [{
+ # kaltura embed
+ 'url': 'http://www.heise.de/video/artikel/Podcast-c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2404147.html',
+ 'info_dict': {
+ 'id': '1_kkrq94sm',
+ 'ext': 'mp4',
+ 'title': "Podcast: c't uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone",
+ 'timestamp': 1512734959,
+ 'upload_date': '20171208',
+ 'description': 'md5:c934cbfb326c669c2bcabcbe3d3fcd20',
+ 'thumbnail': 're:^https?://.*/thumbnail/.*',
+ 'duration': 2845,
+ 'view_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # YouTube embed
+ 'url': 'http://www.heise.de/newsticker/meldung/Netflix-In-20-Jahren-vom-Videoverleih-zum-TV-Revolutionaer-3814130.html',
+ 'md5': 'e403d2b43fea8e405e88e3f8623909f1',
+ 'info_dict': {
+ 'id': '6kmWbXleKW4',
+ 'ext': 'mp4',
+ 'title': 'Neu im September 2017 | Netflix',
+ 'description': 'md5:d6852d1f96bb80760608eed3b907437c',
+ 'upload_date': '20170830',
+ 'uploader': 'Netflix Deutschland, Österreich und Schweiz',
+ 'uploader_id': 'netflixdach',
+ 'categories': ['Entertainment'],
+ 'tags': 'count:27',
+ 'age_limit': 0,
+ 'availability': 'public',
+ 'comment_count': int,
+ 'channel_id': 'UCZqgRlLcvO3Fnx_npQJygcQ',
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/6kmWbXleKW4/maxresdefault.webp',
+ 'uploader_url': 'http://www.youtube.com/user/netflixdach',
+ 'playable_in_embed': True,
+ 'live_status': 'not_live',
+ 'channel_url': 'https://www.youtube.com/channel/UCZqgRlLcvO3Fnx_npQJygcQ',
+ 'view_count': int,
+ 'channel': 'Netflix Deutschland, Österreich und Schweiz',
+ 'channel_follower_count': int,
+ 'like_count': int,
+ 'duration': 67,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.heise.de/video/artikel/nachgehakt-Wie-sichert-das-c-t-Tool-Restric-tor-Windows-10-ab-3700244.html',
+ 'info_dict': {
+ 'id': '1_ntrmio2s',
+ 'ext': 'mp4',
+ 'title': "nachgehakt: Wie sichert das c't-Tool Restric'tor Windows 10 ab?",
+ 'description': 'md5:47e8ffb6c46d85c92c310a512d6db271',
+ 'timestamp': 1512470717,
+ 'upload_date': '20171205',
+ 'duration': 786,
+ 'view_count': int,
+ 'thumbnail': 're:^https?://.*/thumbnail/.*',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # FIXME: Video m3u8 fails to download; issue with Kaltura extractor
+ 'url': 'https://www.heise.de/ct/artikel/c-t-uplink-20-8-Staubsaugerroboter-Xiaomi-Vacuum-2-AR-Brille-Meta-2-und-Android-rooten-3959893.html',
+ 'info_dict': {
+ 'id': '1_59mk80sf',
+ 'ext': 'mp4',
+ 'title': "c't uplink 20.8: Staubsaugerroboter Xiaomi Vacuum 2, AR-Brille Meta 2 und Android rooten",
+ 'description': 'md5:f50fe044d3371ec73a8f79fcebd74afc',
+ 'timestamp': 1517567237,
+ 'upload_date': '20180202',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # videout
+ 'url': 'https://www.heise.de/ct/artikel/c-t-uplink-3-8-Anonyme-SIM-Karten-G-Sync-Monitore-Citizenfour-2440327.html',
+ 'info_dict': {
+ 'id': '2440327',
+ 'ext': 'mp4',
+ 'title': 'c\'t uplink 3.8: Anonyme SIM-Karten, G-Sync-Monitore, Citizenfour',
+ 'thumbnail': 'http://www.heise.de/imagine/yxM2qmol0xV3iFB7qFb70dGvXjc/gallery/',
+ 'description': 'md5:fa164d8c8707dff124a9626d39205f5d',
+ 'timestamp': 1414825200,
+ 'upload_date': '20141101',
+ }
+ }, {
+ 'url': 'http://www.heise.de/ct/artikel/c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2403911.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.heise.de/newsticker/meldung/c-t-uplink-Owncloud-Tastaturen-Peilsender-Smartphone-2404251.html?wt_mc=rss.ho.beitrag.atom',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.heise.de/ct/ausgabe/2016-12-Spiele-3214137.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ def extract_title(default=NO_DEFAULT):
+ title = self._html_search_meta(
+ ('fulltitle', 'title'), webpage, default=None)
+ if not title or title == "c't":
+ title = self._search_regex(
+ r'<div[^>]+class="videoplayerjw"[^>]+data-title="([^"]+)"',
+ webpage, 'title', default=None)
+ if not title:
+ title = self._html_search_regex(
+ r'<h1[^>]+\bclass=["\']article_page_title[^>]+>(.+?)<',
+ webpage, 'title', default=default)
+ return title
+
+ title = extract_title(default=None)
+ description = self._og_search_description(
+ webpage, default=None) or self._html_search_meta(
+ 'description', webpage)
+
+ def _make_kaltura_result(kaltura_url):
+ return {
+ '_type': 'url_transparent',
+ 'url': smuggle_url(kaltura_url, {'source_url': url}),
+ 'ie_key': KalturaIE.ie_key(),
+ 'title': title,
+ 'description': description,
+ }
+
+ kaltura_url = KalturaIE._extract_url(webpage)
+ if kaltura_url:
+ return _make_kaltura_result(kaltura_url)
+
+ kaltura_id = self._search_regex(
+ r'entry-id=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'kaltura id',
+ default=None, group='id')
+ if kaltura_id:
+ return _make_kaltura_result('kaltura:2238431:%s' % kaltura_id)
+
+ yt_urls = tuple(YoutubeIE._extract_embed_urls(url, webpage))
+ if yt_urls:
+ return self.playlist_from_matches(
+ yt_urls, video_id, title, ie=YoutubeIE.ie_key())
+
+ title = extract_title()
+ api_params = urllib.parse.parse_qs(
+ self._search_regex(r'/videout/feed\.json\?([^\']+)', webpage, 'feed params', default=None) or '')
+ if not api_params or 'container' not in api_params or 'sequenz' not in api_params:
+ container_id = self._search_regex(
+ r'<div class="videoplayerjw"[^>]+data-container="([0-9]+)"',
+ webpage, 'container ID')
+
+ sequenz_id = self._search_regex(
+ r'<div class="videoplayerjw"[^>]+data-sequenz="([0-9]+)"',
+ webpage, 'sequenz ID')
+ api_params = {
+ 'container': container_id,
+ 'sequenz': sequenz_id,
+ }
+ doc = self._download_xml(
+ 'http://www.heise.de/videout/feed', video_id, query=api_params)
+
+ formats = []
+ for source_node in doc.findall('.//{http://rss.jwpcdn.com/}source'):
+ label = source_node.attrib['label']
+ height = int_or_none(self._search_regex(
+ r'^(.*?_)?([0-9]+)p$', label, 'height', default=None))
+ video_url = source_node.attrib['file']
+ ext = determine_ext(video_url, '')
+ formats.append({
+ 'url': video_url,
+ 'format_note': label,
+ 'format_id': '%s_%s' % (ext, label),
+ 'height': height,
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': (xpath_text(doc, './/{http://rss.jwpcdn.com/}image')
+ or self._og_search_thumbnail(webpage)),
+ 'timestamp': parse_iso8601(
+ self._html_search_meta('date', webpage)),
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/hellporno.py b/yt_dlp/extractor/hellporno.py
new file mode 100644
index 0000000..fa32b27
--- /dev/null
+++ b/yt_dlp/extractor/hellporno.py
@@ -0,0 +1,72 @@
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ merge_dicts,
+ remove_end,
+ unified_timestamp,
+)
+
+
+class HellPornoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?hellporno\.(?:com/videos|net/v)/(?P<id>[^/]+)'
+ _TESTS = [{
+ 'url': 'http://hellporno.com/videos/dixie-is-posing-with-naked-ass-very-erotic/',
+ 'md5': 'f0a46ebc0bed0c72ae8fe4629f7de5f3',
+ 'info_dict': {
+ 'id': '149116',
+ 'display_id': 'dixie-is-posing-with-naked-ass-very-erotic',
+ 'ext': 'mp4',
+ 'title': 'Dixie is posing with naked ass very erotic',
+ 'description': 'md5:9a72922749354edb1c4b6e540ad3d215',
+ 'categories': list,
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'duration': 240,
+ 'timestamp': 1398762720,
+ 'upload_date': '20140429',
+ 'view_count': int,
+ 'age_limit': 18,
+ },
+ }, {
+ 'url': 'http://hellporno.net/v/186271/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ title = remove_end(self._html_extract_title(webpage), ' - Hell Porno')
+
+ info = self._parse_html5_media_entries(url, webpage, display_id)[0]
+
+ video_id = self._search_regex(
+ (r'chs_object\s*=\s*["\'](\d+)',
+ r'params\[["\']video_id["\']\]\s*=\s*(\d+)'), webpage, 'video id',
+ default=display_id)
+ description = self._search_regex(
+ r'class=["\']desc_video_view_v2[^>]+>([^<]+)', webpage,
+ 'description', fatal=False)
+ categories = [
+ c.strip()
+ for c in self._html_search_meta(
+ 'keywords', webpage, 'categories', default='').split(',')
+ if c.strip()]
+ duration = int_or_none(self._og_search_property(
+ 'video:duration', webpage, fatal=False))
+ timestamp = unified_timestamp(self._og_search_property(
+ 'video:release_date', webpage, fatal=False))
+ view_count = int_or_none(self._search_regex(
+ r'>Views\s+(\d+)', webpage, 'view count', fatal=False))
+
+ return merge_dicts(info, {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'categories': categories,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'view_count': view_count,
+ 'age_limit': 18,
+ })
diff --git a/yt_dlp/extractor/hgtv.py b/yt_dlp/extractor/hgtv.py
new file mode 100644
index 0000000..c40017d
--- /dev/null
+++ b/yt_dlp/extractor/hgtv.py
@@ -0,0 +1,37 @@
+from .common import InfoExtractor
+
+
+class HGTVComShowIE(InfoExtractor):
+ IE_NAME = 'hgtv.com:show'
+ _VALID_URL = r'https?://(?:www\.)?hgtv\.com/shows/[^/]+/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ # data-module="video"
+ 'url': 'http://www.hgtv.com/shows/flip-or-flop/flip-or-flop-full-episodes-season-4-videos',
+ 'info_dict': {
+ 'id': 'flip-or-flop-full-episodes-season-4-videos',
+ 'title': 'Flip or Flop Full Episodes',
+ },
+ 'playlist_mincount': 15,
+ }, {
+ # data-deferred-module="video"
+ 'url': 'http://www.hgtv.com/shows/good-bones/episodes/an-old-victorian-house-gets-a-new-facelift',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ config = self._parse_json(
+ self._search_regex(
+ r'(?s)data-(?:deferred-)?module=["\']video["\'][^>]*>.*?<script[^>]+type=["\']text/x-config["\'][^>]*>(.+?)</script',
+ webpage, 'video config'),
+ display_id)['channels'][0]
+
+ entries = [
+ self.url_result(video['releaseUrl'])
+ for video in config['videos'] if video.get('releaseUrl')]
+
+ return self.playlist_result(
+ entries, display_id, config.get('title'), config.get('description'))
diff --git a/yt_dlp/extractor/hidive.py b/yt_dlp/extractor/hidive.py
new file mode 100644
index 0000000..df6868d
--- /dev/null
+++ b/yt_dlp/extractor/hidive.py
@@ -0,0 +1,119 @@
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ try_get,
+ url_or_none,
+ urlencode_postdata,
+)
+
+
+class HiDiveIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?hidive\.com/stream/(?P<id>(?P<title>[^/]+)/(?P<key>[^/?#&]+))'
+ # Using X-Forwarded-For results in 403 HTTP error for HLS fragments,
+ # so disabling geo bypass completely
+ _GEO_BYPASS = False
+ _NETRC_MACHINE = 'hidive'
+ _LOGIN_URL = 'https://www.hidive.com/account/login'
+
+ _TESTS = [{
+ 'url': 'https://www.hidive.com/stream/the-comic-artist-and-his-assistants/s01e001',
+ 'info_dict': {
+ 'id': 'the-comic-artist-and-his-assistants/s01e001',
+ 'ext': 'mp4',
+ 'title': 'the-comic-artist-and-his-assistants/s01e001',
+ 'series': 'the-comic-artist-and-his-assistants',
+ 'season_number': 1,
+ 'episode_number': 1,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'Requires Authentication',
+ }]
+
+ def _perform_login(self, username, password):
+ webpage = self._download_webpage(self._LOGIN_URL, None)
+ form = self._search_regex(
+ r'(?s)<form[^>]+action="/account/login"[^>]*>(.+?)</form>',
+ webpage, 'login form', default=None)
+ if not form:
+ return
+ data = self._hidden_inputs(form)
+ data.update({
+ 'Email': username,
+ 'Password': password,
+ })
+ login_webpage = self._download_webpage(
+ self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata(data))
+ # If the user has multiple profiles on their account, select one. For now pick the first profile.
+ profile_id = self._search_regex(
+ r'<button [^>]+?data-profile-id="(\w+)"', login_webpage, 'profile id', default=None)
+ if profile_id is None:
+ return # If only one profile, Hidive auto-selects it
+ self._request_webpage(
+ 'https://www.hidive.com/ajax/chooseprofile', None,
+ data=urlencode_postdata({
+ 'profileId': profile_id,
+ 'hash': self._search_regex(
+ r'\<button [^>]+?data-hash="(\w+)"', login_webpage, 'profile id hash'),
+ 'returnUrl': '/dashboard'
+ }))
+
+ def _call_api(self, video_id, title, key, data={}, **kwargs):
+ data = {
+ **data,
+ 'Title': title,
+ 'Key': key,
+ 'PlayerId': 'f4f895ce1ca713ba263b91caeb1daa2d08904783',
+ }
+ return self._download_json(
+ 'https://www.hidive.com/play/settings', video_id,
+ data=urlencode_postdata(data), **kwargs) or {}
+
+ def _real_extract(self, url):
+ video_id, title, key = self._match_valid_url(url).group('id', 'title', 'key')
+ settings = self._call_api(video_id, title, key)
+
+ restriction = settings.get('restrictionReason')
+ if restriction == 'RegionRestricted':
+ self.raise_geo_restricted()
+ if restriction and restriction != 'None':
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, restriction), expected=True)
+
+ formats, parsed_urls = [], {None}
+ for rendition_id, rendition in settings['renditions'].items():
+ audio, version, extra = rendition_id.split('_')
+ m3u8_url = url_or_none(try_get(rendition, lambda x: x['bitrates']['hls']))
+ if m3u8_url not in parsed_urls:
+ parsed_urls.add(m3u8_url)
+ frmt = self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id=rendition_id, fatal=False)
+ for f in frmt:
+ f['language'] = audio
+ f['format_note'] = f'{version}, {extra}'
+ formats.extend(frmt)
+
+ subtitles = {}
+ for rendition_id, rendition in settings['renditions'].items():
+ audio, version, extra = rendition_id.split('_')
+ for cc_file in rendition.get('ccFiles') or []:
+ cc_url = url_or_none(try_get(cc_file, lambda x: x[2]))
+ cc_lang = try_get(cc_file, (lambda x: x[1].replace(' ', '-').lower(), lambda x: x[0]), str)
+ if cc_url not in parsed_urls and cc_lang:
+ parsed_urls.add(cc_url)
+ subtitles.setdefault(cc_lang, []).append({'url': cc_url})
+
+ return {
+ 'id': video_id,
+ 'title': video_id,
+ 'subtitles': subtitles,
+ 'formats': formats,
+ 'series': title,
+ 'season_number': int_or_none(
+ self._search_regex(r's(\d+)', key, 'season number', default=None)),
+ 'episode_number': int_or_none(
+ self._search_regex(r'e(\d+)', key, 'episode number', default=None)),
+ 'http_headers': {'Referer': url}
+ }
diff --git a/yt_dlp/extractor/historicfilms.py b/yt_dlp/extractor/historicfilms.py
new file mode 100644
index 0000000..c428fee
--- /dev/null
+++ b/yt_dlp/extractor/historicfilms.py
@@ -0,0 +1,45 @@
+from .common import InfoExtractor
+from ..utils import parse_duration
+
+
+class HistoricFilmsIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?historicfilms\.com/(?:tapes/|play)(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://www.historicfilms.com/tapes/4728',
+ 'md5': 'd4a437aec45d8d796a38a215db064e9a',
+ 'info_dict': {
+ 'id': '4728',
+ 'ext': 'mov',
+ 'title': 'Historic Films: GP-7',
+ 'description': 'md5:1a86a0f3ac54024e419aba97210d959a',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 2096,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ tape_id = self._search_regex(
+ [r'class="tapeId"[^>]*>([^<]+)<', r'tapeId\s*:\s*"([^"]+)"'],
+ webpage, 'tape id')
+
+ title = self._og_search_title(webpage)
+ description = self._og_search_description(webpage)
+ thumbnail = self._html_search_meta(
+ 'thumbnailUrl', webpage, 'thumbnails') or self._og_search_thumbnail(webpage)
+ duration = parse_duration(self._html_search_meta(
+ 'duration', webpage, 'duration'))
+
+ video_url = 'http://www.historicfilms.com/video/%s_%s_web.mov' % (tape_id, video_id)
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ }
diff --git a/yt_dlp/extractor/hitrecord.py b/yt_dlp/extractor/hitrecord.py
new file mode 100644
index 0000000..902af44
--- /dev/null
+++ b/yt_dlp/extractor/hitrecord.py
@@ -0,0 +1,66 @@
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ clean_html,
+ float_or_none,
+ int_or_none,
+ try_get,
+)
+
+
+class HitRecordIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?hitrecord\.org/records/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://hitrecord.org/records/2954362',
+ 'md5': 'fe1cdc2023bce0bbb95c39c57426aa71',
+ 'info_dict': {
+ 'id': '2954362',
+ 'ext': 'mp4',
+ 'title': 'A Very Different World (HITRECORD x ACLU)',
+ 'description': 'md5:e62defaffab5075a5277736bead95a3d',
+ 'duration': 139.327,
+ 'timestamp': 1471557582,
+ 'upload_date': '20160818',
+ 'uploader': 'Zuzi.C12',
+ 'uploader_id': '362811',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'tags': list,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video = self._download_json(
+ 'https://hitrecord.org/api/web/records/%s' % video_id, video_id)
+
+ title = video['title']
+ video_url = video['source_url']['mp4_url']
+
+ tags = None
+ tags_list = try_get(video, lambda x: x['tags'], list)
+ if tags_list:
+ tags = [
+ t['text']
+ for t in tags_list
+ if isinstance(t, dict) and t.get('text')
+ and isinstance(t['text'], compat_str)]
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': clean_html(video.get('body')),
+ 'duration': float_or_none(video.get('duration'), 1000),
+ 'timestamp': int_or_none(video.get('created_at_i')),
+ 'uploader': try_get(
+ video, lambda x: x['user']['username'], compat_str),
+ 'uploader_id': try_get(
+ video, lambda x: compat_str(x['user']['id'])),
+ 'view_count': int_or_none(video.get('total_views_count')),
+ 'like_count': int_or_none(video.get('hearts_count')),
+ 'comment_count': int_or_none(video.get('comments_count')),
+ 'tags': tags,
+ }
diff --git a/yt_dlp/extractor/hketv.py b/yt_dlp/extractor/hketv.py
new file mode 100644
index 0000000..e026996
--- /dev/null
+++ b/yt_dlp/extractor/hketv.py
@@ -0,0 +1,187 @@
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ clean_html,
+ ExtractorError,
+ int_or_none,
+ merge_dicts,
+ parse_count,
+ str_or_none,
+ try_get,
+ unified_strdate,
+ urlencode_postdata,
+ urljoin,
+)
+
+
+class HKETVIE(InfoExtractor):
+ IE_NAME = 'hketv'
+ IE_DESC = '香港教育局教育電視 (HKETV) Educational Television, Hong Kong Educational Bureau'
+ _GEO_BYPASS = False
+ _GEO_COUNTRIES = ['HK']
+ _VALID_URL = r'https?://(?:www\.)?hkedcity\.net/etv/resource/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://www.hkedcity.net/etv/resource/2932360618',
+ 'md5': 'f193712f5f7abb208ddef3c5ea6ed0b7',
+ 'info_dict': {
+ 'id': '2932360618',
+ 'ext': 'mp4',
+ 'title': '喜閱一生(共享閱讀樂) (中、英文字幕可供選擇)',
+ 'description': 'md5:d5286d05219ef50e0613311cbe96e560',
+ 'upload_date': '20181024',
+ 'duration': 900,
+ 'subtitles': 'count:2',
+ },
+ 'skip': 'Geo restricted to HK',
+ }, {
+ 'url': 'https://www.hkedcity.net/etv/resource/972641418',
+ 'md5': '1ed494c1c6cf7866a8290edad9b07dc9',
+ 'info_dict': {
+ 'id': '972641418',
+ 'ext': 'mp4',
+ 'title': '衣冠楚楚 (天使系列之一)',
+ 'description': 'md5:10bb3d659421e74f58e5db5691627b0f',
+ 'upload_date': '20070109',
+ 'duration': 907,
+ 'subtitles': {},
+ },
+ 'params': {
+ 'geo_verification_proxy': '<HK proxy here>',
+ },
+ 'skip': 'Geo restricted to HK',
+ }]
+
+ _CC_LANGS = {
+ '中文(繁體中文)': 'zh-Hant',
+ '中文(简体中文)': 'zh-Hans',
+ 'English': 'en',
+ 'Bahasa Indonesia': 'id',
+ '\u0939\u093f\u0928\u094d\u0926\u0940': 'hi',
+ '\u0928\u0947\u092a\u093e\u0932\u0940': 'ne',
+ 'Tagalog': 'tl',
+ '\u0e44\u0e17\u0e22': 'th',
+ '\u0627\u0631\u062f\u0648': 'ur',
+ }
+ _FORMAT_HEIGHTS = {
+ 'SD': 360,
+ 'HD': 720,
+ }
+ _APPS_BASE_URL = 'https://apps.hkedcity.net'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = (
+ self._html_search_meta(
+ ('ed_title', 'search.ed_title'), webpage, default=None)
+ or self._search_regex(
+ r'data-favorite_title_(?:eng|chi)=(["\'])(?P<id>(?:(?!\1).)+)\1',
+ webpage, 'title', default=None, group='url')
+ or self._html_search_regex(
+ r'<h1>([^<]+)</h1>', webpage, 'title', default=None)
+ or self._og_search_title(webpage)
+ )
+
+ file_id = self._search_regex(
+ r'post_var\[["\']file_id["\']\s*\]\s*=\s*(.+?);',
+ webpage, 'file ID')
+ curr_url = self._search_regex(
+ r'post_var\[["\']curr_url["\']\s*\]\s*=\s*"(.+?)";',
+ webpage, 'curr URL')
+ data = {
+ 'action': 'get_info',
+ 'curr_url': curr_url,
+ 'file_id': file_id,
+ 'video_url': file_id,
+ }
+
+ response = self._download_json(
+ self._APPS_BASE_URL + '/media/play/handler.php', video_id,
+ data=urlencode_postdata(data),
+ headers=merge_dicts({
+ 'Content-Type': 'application/x-www-form-urlencoded'},
+ self.geo_verification_headers()))
+
+ result = response['result']
+
+ if not response.get('success') or not response.get('access'):
+ error = clean_html(response.get('access_err_msg'))
+ if 'Video streaming is not available in your country' in error:
+ self.raise_geo_restricted(
+ msg=error, countries=self._GEO_COUNTRIES)
+ else:
+ raise ExtractorError(error, expected=True)
+
+ formats = []
+
+ width = int_or_none(result.get('width'))
+ height = int_or_none(result.get('height'))
+
+ playlist0 = result['playlist'][0]
+ for fmt in playlist0['sources']:
+ file_url = urljoin(self._APPS_BASE_URL, fmt.get('file'))
+ if not file_url:
+ continue
+ # If we ever wanted to provide the final resolved URL that
+ # does not require cookies, albeit with a shorter lifespan:
+ # urlh = self._downloader.urlopen(file_url)
+ # resolved_url = urlh.url
+ label = fmt.get('label')
+ h = self._FORMAT_HEIGHTS.get(label)
+ w = h * width // height if h and width and height else None
+ formats.append({
+ 'format_id': label,
+ 'ext': fmt.get('type'),
+ 'url': file_url,
+ 'width': w,
+ 'height': h,
+ })
+
+ subtitles = {}
+ tracks = try_get(playlist0, lambda x: x['tracks'], list) or []
+ for track in tracks:
+ if not isinstance(track, dict):
+ continue
+ track_kind = str_or_none(track.get('kind'))
+ if not track_kind or not isinstance(track_kind, compat_str):
+ continue
+ if track_kind.lower() not in ('captions', 'subtitles'):
+ continue
+ track_url = urljoin(self._APPS_BASE_URL, track.get('file'))
+ if not track_url:
+ continue
+ track_label = track.get('label')
+ subtitles.setdefault(self._CC_LANGS.get(
+ track_label, track_label), []).append({
+ 'url': self._proto_relative_url(track_url),
+ 'ext': 'srt',
+ })
+
+ # Likes
+ emotion = self._download_json(
+ 'https://emocounter.hkedcity.net/handler.php', video_id,
+ data=urlencode_postdata({
+ 'action': 'get_emotion',
+ 'data[bucket_id]': 'etv',
+ 'data[identifier]': video_id,
+ }),
+ headers={'Content-Type': 'application/x-www-form-urlencoded'},
+ fatal=False) or {}
+ like_count = int_or_none(try_get(
+ emotion, lambda x: x['data']['emotion_data'][0]['count']))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': self._html_search_meta(
+ 'description', webpage, fatal=False),
+ 'upload_date': unified_strdate(self._html_search_meta(
+ 'ed_date', webpage, fatal=False), day_first=False),
+ 'duration': int_or_none(result.get('length')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnail': urljoin(self._APPS_BASE_URL, result.get('image')),
+ 'view_count': parse_count(result.get('view_count')),
+ 'like_count': like_count,
+ }
diff --git a/yt_dlp/extractor/hollywoodreporter.py b/yt_dlp/extractor/hollywoodreporter.py
new file mode 100644
index 0000000..1f7eb89
--- /dev/null
+++ b/yt_dlp/extractor/hollywoodreporter.py
@@ -0,0 +1,72 @@
+import functools
+import re
+
+from .common import InfoExtractor
+from .jwplatform import JWPlatformIE
+from ..utils import (
+ ExtractorError,
+ OnDemandPagedList,
+ extract_attributes,
+ get_element_by_class,
+ get_element_html_by_class,
+)
+
+
+class HollywoodReporterIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?hollywoodreporter\.com/video/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://www.hollywoodreporter.com/video/chris-pine-michelle-rodriguez-dungeons-dragons-cast-directors-on-what-it-took-to-make-film-sxsw-2023/',
+ 'info_dict': {
+ 'id': 'zH4jZaR5',
+ 'ext': 'mp4',
+ 'title': 'md5:a9a1c073770a32f178955997712c4bd9',
+ 'description': 'The cast and directors of \'Dungeons & Dragons: Honor Among Thieves\' talk about their new film.',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/zH4jZaR5/poster.jpg?width=720',
+ 'upload_date': '20230312',
+ 'timestamp': 1678586423,
+ 'duration': 242.0,
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ data = extract_attributes(get_element_html_by_class('vlanding-video-card__link', webpage) or '')
+ video_id = data['data-video-showcase-trigger']
+ showcase_type = data['data-video-showcase-type']
+
+ if showcase_type == 'jwplayer':
+ return self.url_result(f'jwplatform:{video_id}', JWPlatformIE)
+ elif showcase_type == 'youtube':
+ return self.url_result(video_id, 'Youtube')
+ else:
+ raise ExtractorError(f'Unsupported showcase type "{showcase_type}"')
+
+
+class HollywoodReporterPlaylistIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?hollywoodreporter\.com/vcategory/(?P<slug>[\w-]+)-(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.hollywoodreporter.com/vcategory/heat-vision-breakdown-57822/',
+ 'playlist_mincount': 109,
+ 'info_dict': {
+ 'id': '57822',
+ 'title': 'heat-vision-breakdown',
+ }
+ }]
+
+ def _fetch_page(self, slug, pl_id, page):
+ page += 1
+ webpage = self._download_webpage(
+ f'https://www.hollywoodreporter.com/vcategory/{slug}-{pl_id}/page/{page}/',
+ pl_id, note=f'Downloading playlist page {page}')
+ section = get_element_by_class('video-playlist-river', webpage) or ''
+
+ for url in re.findall(r'<a[^>]+href="([^"]+)"[^>]+class="c-title__link', section):
+ yield self.url_result(url, HollywoodReporterIE)
+
+ def _real_extract(self, url):
+ slug, pl_id = self._match_valid_url(url).group('slug', 'id')
+ return self.playlist_result(
+ OnDemandPagedList(functools.partial(self._fetch_page, slug, pl_id), 15), pl_id, slug)
diff --git a/yt_dlp/extractor/holodex.py b/yt_dlp/extractor/holodex.py
new file mode 100644
index 0000000..a2b73ec
--- /dev/null
+++ b/yt_dlp/extractor/holodex.py
@@ -0,0 +1,100 @@
+from .common import InfoExtractor
+from .youtube import YoutubeIE
+from ..utils import traverse_obj
+
+
+class HolodexIE(InfoExtractor):
+ _VALID_URL = r'''(?x)https?://(?:www\.|staging\.)?holodex\.net/(?:
+ api/v2/playlist/(?P<playlist>\d+)|
+ watch/(?P<id>[\w-]{11})(?:\?(?:[^#]+&)?playlist=(?P<playlist2>\d+))?
+ )'''
+ _TESTS = [{
+ 'url': 'https://holodex.net/watch/9kQ2GtvDV3s',
+ 'md5': 'be5ffce2f0feae8ba4c01553abc0f175',
+ 'info_dict': {
+ 'ext': 'mp4',
+ 'id': '9kQ2GtvDV3s',
+ 'title': '【おちゃめ機能】ホロライブが吹っ切れた【24人で歌ってみた】',
+ 'channel_id': 'UCJFZiqLMntJufDCHc6bQixg',
+ 'playable_in_embed': True,
+ 'tags': 'count:43',
+ 'age_limit': 0,
+ 'live_status': 'not_live',
+ 'description': 'md5:040e866c09dc4ab899b36479f4b7c7a2',
+ 'channel_url': 'https://www.youtube.com/channel/UCJFZiqLMntJufDCHc6bQixg',
+ 'upload_date': '20200406',
+ 'uploader_url': 'http://www.youtube.com/channel/UCJFZiqLMntJufDCHc6bQixg',
+ 'view_count': int,
+ 'channel': 'hololive ホロライブ - VTuber Group',
+ 'categories': ['Music'],
+ 'uploader': 'hololive ホロライブ - VTuber Group',
+ 'channel_follower_count': int,
+ 'uploader_id': 'UCJFZiqLMntJufDCHc6bQixg',
+ 'availability': 'public',
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/9kQ2GtvDV3s/maxresdefault.webp',
+ 'duration': 263,
+ 'like_count': int,
+ },
+ }, {
+ 'url': 'https://holodex.net/api/v2/playlist/239',
+ 'info_dict': {
+ 'id': '239',
+ 'title': 'Songs/Videos that made fall into the rabbit hole (from my google activity history)',
+ },
+ 'playlist_count': 14,
+ }, {
+ 'url': 'https://holodex.net/watch/_m2mQyaofjI?foo=bar&playlist=69',
+ 'info_dict': {
+ 'id': '69',
+ 'title': '拿著金斧頭的藍髮大姊姊'
+ },
+ 'playlist_count': 3,
+ }, {
+ 'url': 'https://holodex.net/watch/_m2mQyaofjI?playlist=69',
+ 'info_dict': {
+ 'id': '_m2mQyaofjI',
+ 'ext': 'mp4',
+ 'playable_in_embed': True,
+ 'like_count': int,
+ 'uploader': 'Ernst / エンスト',
+ 'duration': 11,
+ 'uploader_url': 'http://www.youtube.com/channel/UCqSX4PPZY0cyetqKVY_wRVA',
+ 'categories': ['Entertainment'],
+ 'title': '【星街すいせい】星街向你獻上晚安',
+ 'upload_date': '20210705',
+ 'description': 'md5:8b8ffb157bae77f2d109021a0b577d4a',
+ 'channel': 'Ernst / エンスト',
+ 'channel_id': 'UCqSX4PPZY0cyetqKVY_wRVA',
+ 'channel_follower_count': int,
+ 'view_count': int,
+ 'tags': [],
+ 'live_status': 'not_live',
+ 'channel_url': 'https://www.youtube.com/channel/UCqSX4PPZY0cyetqKVY_wRVA',
+ 'availability': 'public',
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/_m2mQyaofjI/maxresdefault.webp',
+ 'age_limit': 0,
+ 'uploader_id': 'UCqSX4PPZY0cyetqKVY_wRVA',
+ 'comment_count': int,
+ },
+ 'params': {'noplaylist': True},
+ }, {
+ 'url': 'https://staging.holodex.net/api/v2/playlist/125',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://staging.holodex.net/watch/rJJTJA_T_b0?playlist=25',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://staging.holodex.net/watch/s1ifBeukThg',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id, playlist_id, pl_id2 = self._match_valid_url(url).group('id', 'playlist', 'playlist2')
+ playlist_id = playlist_id or pl_id2
+
+ if not self._yes_playlist(playlist_id, video_id):
+ return self.url_result(f'https://www.youtube.com/watch?v={video_id}', YoutubeIE)
+
+ data = self._download_json(f'https://holodex.net/api/v2/playlist/{playlist_id}', playlist_id)
+ return self.playlist_from_matches(
+ traverse_obj(data, ('videos', ..., 'id')), playlist_id, data.get('name'), ie=YoutubeIE)
diff --git a/yt_dlp/extractor/hotnewhiphop.py b/yt_dlp/extractor/hotnewhiphop.py
new file mode 100644
index 0000000..4f506cd
--- /dev/null
+++ b/yt_dlp/extractor/hotnewhiphop.py
@@ -0,0 +1,61 @@
+from .common import InfoExtractor
+from ..compat import compat_b64decode
+from ..networking import HEADRequest, Request
+from ..utils import ExtractorError, urlencode_postdata
+
+
+class HotNewHipHopIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:www\.)?hotnewhiphop\.com/.*\.(?P<id>.*)\.html'
+ _TEST = {
+ 'url': 'http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html',
+ 'md5': '2c2cd2f76ef11a9b3b581e8b232f3d96',
+ 'info_dict': {
+ 'id': '1435540',
+ 'ext': 'mp3',
+ 'title': 'Freddie Gibbs - Lay It Down'
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ video_url_base64 = self._search_regex(
+ r'data-path="(.*?)"', webpage, 'video URL', default=None)
+
+ if video_url_base64 is None:
+ video_url = self._search_regex(
+ r'"contentUrl" content="(.*?)"', webpage, 'content URL')
+ return self.url_result(video_url, ie='Youtube')
+
+ reqdata = urlencode_postdata([
+ ('mediaType', 's'),
+ ('mediaId', video_id),
+ ])
+ r = Request(
+ 'http://www.hotnewhiphop.com/ajax/media/getActions/', data=reqdata)
+ r.headers['Content-Type'] = 'application/x-www-form-urlencoded'
+ mkd = self._download_json(
+ r, video_id, note='Requesting media key',
+ errnote='Could not download media key')
+ if 'mediaKey' not in mkd:
+ raise ExtractorError('Did not get a media key')
+
+ redirect_url = compat_b64decode(video_url_base64).decode('utf-8')
+ redirect_req = HEADRequest(redirect_url)
+ req = self._request_webpage(
+ redirect_req, video_id,
+ note='Resolving final URL', errnote='Could not resolve final URL')
+ video_url = req.url
+ if video_url.endswith('.html'):
+ raise ExtractorError('Redirect failed')
+
+ video_title = self._og_search_title(webpage).strip()
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': video_title,
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ }
diff --git a/yt_dlp/extractor/hotstar.py b/yt_dlp/extractor/hotstar.py
new file mode 100644
index 0000000..a3a3c20
--- /dev/null
+++ b/yt_dlp/extractor/hotstar.py
@@ -0,0 +1,468 @@
+import hashlib
+import hmac
+import json
+import re
+import time
+import uuid
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ ExtractorError,
+ determine_ext,
+ int_or_none,
+ join_nonempty,
+ str_or_none,
+ traverse_obj,
+ url_or_none,
+)
+
+
+class HotStarBaseIE(InfoExtractor):
+ _BASE_URL = 'https://www.hotstar.com'
+ _API_URL = 'https://api.hotstar.com'
+ _AKAMAI_ENCRYPTION_KEY = b'\x05\xfc\x1a\x01\xca\xc9\x4b\xc4\x12\xfc\x53\x12\x07\x75\xf9\xee'
+
+ def _call_api_v1(self, path, *args, **kwargs):
+ return self._download_json(
+ f'{self._API_URL}/o/v1/{path}', *args, **kwargs,
+ headers={'x-country-code': 'IN', 'x-platform-code': 'PCTV'})
+
+ def _call_api_impl(self, path, video_id, query, st=None, cookies=None):
+ st = int_or_none(st) or int(time.time())
+ exp = st + 6000
+ auth = 'st=%d~exp=%d~acl=/*' % (st, exp)
+ auth += '~hmac=' + hmac.new(self._AKAMAI_ENCRYPTION_KEY, auth.encode(), hashlib.sha256).hexdigest()
+
+ if cookies and cookies.get('userUP'):
+ token = cookies.get('userUP').value
+ else:
+ token = self._download_json(
+ f'{self._API_URL}/um/v3/users',
+ video_id, note='Downloading token',
+ data=json.dumps({"device_ids": [{"id": compat_str(uuid.uuid4()), "type": "device_id"}]}).encode('utf-8'),
+ headers={
+ 'hotstarauth': auth,
+ 'x-hs-platform': 'PCTV', # or 'web'
+ 'Content-Type': 'application/json',
+ })['user_identity']
+
+ response = self._download_json(
+ f'{self._API_URL}/{path}', video_id, query=query,
+ headers={
+ 'hotstarauth': auth,
+ 'x-hs-appversion': '6.72.2',
+ 'x-hs-platform': 'web',
+ 'x-hs-usertoken': token,
+ })
+
+ if response['message'] != "Playback URL's fetched successfully":
+ raise ExtractorError(
+ response['message'], expected=True)
+ return response['data']
+
+ def _call_api_v2(self, path, video_id, st=None, cookies=None):
+ return self._call_api_impl(
+ f'{path}/content/{video_id}', video_id, st=st, cookies=cookies, query={
+ 'desired-config': 'audio_channel:stereo|container:fmp4|dynamic_range:hdr|encryption:plain|ladder:tv|package:dash|resolution:fhd|subs-tag:HotstarVIP|video_codec:h265',
+ 'device-id': cookies.get('device_id').value if cookies.get('device_id') else compat_str(uuid.uuid4()),
+ 'os-name': 'Windows',
+ 'os-version': '10',
+ })
+
+ def _playlist_entries(self, path, item_id, root=None, **kwargs):
+ results = self._call_api_v1(path, item_id, **kwargs)['body']['results']
+ for video in traverse_obj(results, (('assets', None), 'items', ...)):
+ if video.get('contentId'):
+ yield self.url_result(
+ HotStarIE._video_url(video['contentId'], root=root), HotStarIE, video['contentId'])
+
+
+class HotStarIE(HotStarBaseIE):
+ IE_NAME = 'hotstar'
+ _VALID_URL = r'''(?x)
+ https?://(?:www\.)?hotstar\.com(?:/in)?/(?!in/)
+ (?:
+ (?P<type>movies|sports|clips|episode|(?P<tv>tv|shows))/
+ (?(tv)(?:[^/?#]+/){2}|[^?#]*)
+ )?
+ [^/?#]+/
+ (?P<id>\d{10})
+ '''
+
+ _TESTS = [{
+ 'url': 'https://www.hotstar.com/can-you-not-spread-rumours/1000076273',
+ 'info_dict': {
+ 'id': '1000076273',
+ 'ext': 'mp4',
+ 'title': 'Can You Not Spread Rumours?',
+ 'description': 'md5:c957d8868e9bc793ccb813691cc4c434',
+ 'timestamp': 1447248600,
+ 'upload_date': '20151111',
+ 'duration': 381,
+ 'episode': 'Can You Not Spread Rumours?',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://www.hotstar.com/tv/ek-bhram-sarvagun-sampanna/s-2116/janhvi-targets-suman/1000234847',
+ 'info_dict': {
+ 'id': '1000234847',
+ 'ext': 'mp4',
+ 'title': 'Janhvi Targets Suman',
+ 'description': 'md5:78a85509348910bd1ca31be898c5796b',
+ 'timestamp': 1556670600,
+ 'upload_date': '20190501',
+ 'duration': 1219,
+ 'channel': 'StarPlus',
+ 'channel_id': '3',
+ 'series': 'Ek Bhram - Sarvagun Sampanna',
+ 'season': 'Chapter 1',
+ 'season_number': 1,
+ 'season_id': '6771',
+ 'episode': 'Janhvi Targets Suman',
+ 'episode_number': 8,
+ }
+ }, {
+ 'url': 'https://www.hotstar.com/in/shows/anupama/1260022017/anupama-anuj-share-a-moment/1000282843',
+ 'info_dict': {
+ 'id': '1000282843',
+ 'ext': 'mp4',
+ 'title': 'Anupama, Anuj Share a Moment',
+ 'season': 'Chapter 1',
+ 'description': 'md5:8d74ed2248423b8b06d5c8add4d7a0c0',
+ 'timestamp': 1678149000,
+ 'channel': 'StarPlus',
+ 'series': 'Anupama',
+ 'season_number': 1,
+ 'season_id': '7399',
+ 'upload_date': '20230307',
+ 'episode': 'Anupama, Anuj Share a Moment',
+ 'episode_number': 853,
+ 'duration': 1272,
+ 'channel_id': '3',
+ },
+ 'skip': 'HTTP Error 504: Gateway Time-out', # XXX: Investigate 504 errors on some episodes
+ }, {
+ 'url': 'https://www.hotstar.com/in/shows/kana-kaanum-kaalangal/1260097087/back-to-school/1260097320',
+ 'info_dict': {
+ 'id': '1260097320',
+ 'ext': 'mp4',
+ 'title': 'Back To School',
+ 'season': 'Chapter 1',
+ 'description': 'md5:b0d6a4c8a650681491e7405496fc7e13',
+ 'timestamp': 1650564000,
+ 'channel': 'Hotstar Specials',
+ 'series': 'Kana Kaanum Kaalangal',
+ 'season_number': 1,
+ 'season_id': '9441',
+ 'upload_date': '20220421',
+ 'episode': 'Back To School',
+ 'episode_number': 1,
+ 'duration': 1810,
+ 'channel_id': '54',
+ },
+ }, {
+ 'url': 'https://www.hotstar.com/in/clips/e3-sairat-kahani-pyaar-ki/1000262286',
+ 'info_dict': {
+ 'id': '1000262286',
+ 'ext': 'mp4',
+ 'title': 'E3 - SaiRat, Kahani Pyaar Ki',
+ 'description': 'md5:e3b4b3203bc0c5396fe7d0e4948a6385',
+ 'episode': 'E3 - SaiRat, Kahani Pyaar Ki',
+ 'upload_date': '20210606',
+ 'timestamp': 1622943900,
+ 'duration': 5395,
+ },
+ }, {
+ 'url': 'https://www.hotstar.com/in/movies/premam/1000091195',
+ 'info_dict': {
+ 'id': '1000091195',
+ 'ext': 'mp4',
+ 'title': 'Premam',
+ 'release_year': 2015,
+ 'description': 'md5:d833c654e4187b5e34757eafb5b72d7f',
+ 'timestamp': 1462149000,
+ 'upload_date': '20160502',
+ 'episode': 'Premam',
+ 'duration': 8994,
+ },
+ }, {
+ 'url': 'https://www.hotstar.com/movies/radha-gopalam/1000057157',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.hotstar.com/in/sports/cricket/follow-the-blues-2021/recap-eng-fight-back-on-day-2/1260066104',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.hotstar.com/in/sports/football/most-costly-pl-transfers-ft-grealish/1260065956',
+ 'only_matching': True,
+ }]
+ _GEO_BYPASS = False
+
+ _TYPE = {
+ 'movies': 'movie',
+ 'sports': 'match',
+ 'episode': 'episode',
+ 'tv': 'episode',
+ 'shows': 'episode',
+ 'clips': 'content',
+ None: 'content',
+ }
+
+ _IGNORE_MAP = {
+ 'res': 'resolution',
+ 'vcodec': 'video_codec',
+ 'dr': 'dynamic_range',
+ }
+
+ _TAG_FIELDS = {
+ 'language': 'language',
+ 'acodec': 'audio_codec',
+ 'vcodec': 'video_codec',
+ }
+
+ @classmethod
+ def _video_url(cls, video_id, video_type=None, *, slug='ignore_me', root=None):
+ assert None in (video_type, root)
+ if not root:
+ root = join_nonempty(cls._BASE_URL, video_type, delim='/')
+ return f'{root}/{slug}/{video_id}'
+
+ def _real_extract(self, url):
+ video_id, video_type = self._match_valid_url(url).group('id', 'type')
+ video_type = self._TYPE.get(video_type, video_type)
+ cookies = self._get_cookies(url) # Cookies before any request
+
+ video_data = traverse_obj(
+ self._call_api_v1(
+ f'{video_type}/detail', video_id, fatal=False, query={'tas': 10000, 'contentId': video_id}),
+ ('body', 'results', 'item', {dict})) or {}
+ if not self.get_param('allow_unplayable_formats') and video_data.get('drmProtected'):
+ self.report_drm(video_id)
+
+ # See https://github.com/yt-dlp/yt-dlp/issues/396
+ st = self._download_webpage_handle(f'{self._BASE_URL}/in', video_id)[1].headers.get('x-origin-date')
+
+ geo_restricted = False
+ formats, subs = [], {}
+ headers = {'Referer': f'{self._BASE_URL}/in'}
+
+ # change to v2 in the future
+ playback_sets = self._call_api_v2('play/v1/playback', video_id, st=st, cookies=cookies)['playBackSets']
+ for playback_set in playback_sets:
+ if not isinstance(playback_set, dict):
+ continue
+ tags = str_or_none(playback_set.get('tagsCombination')) or ''
+ if any(f'{prefix}:{ignore}' in tags
+ for key, prefix in self._IGNORE_MAP.items()
+ for ignore in self._configuration_arg(key)):
+ continue
+ tag_dict = dict((t.split(':', 1) + [None])[:2] for t in tags.split(';'))
+
+ format_url = url_or_none(playback_set.get('playbackUrl'))
+ if not format_url:
+ continue
+ format_url = re.sub(r'(?<=//staragvod)(\d)', r'web\1', format_url)
+ ext = determine_ext(format_url)
+
+ current_formats, current_subs = [], {}
+ try:
+ if 'package:hls' in tags or ext == 'm3u8':
+ current_formats, current_subs = self._extract_m3u8_formats_and_subtitles(
+ format_url, video_id, ext='mp4', headers=headers)
+ elif 'package:dash' in tags or ext == 'mpd':
+ current_formats, current_subs = self._extract_mpd_formats_and_subtitles(
+ format_url, video_id, headers=headers)
+ elif ext == 'f4m':
+ pass # XXX: produce broken files
+ else:
+ current_formats = [{
+ 'url': format_url,
+ 'width': int_or_none(playback_set.get('width')),
+ 'height': int_or_none(playback_set.get('height')),
+ }]
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 403:
+ geo_restricted = True
+ continue
+
+ if tag_dict.get('encryption') not in ('plain', None):
+ for f in current_formats:
+ f['has_drm'] = True
+ for f in current_formats:
+ for k, v in self._TAG_FIELDS.items():
+ if not f.get(k):
+ f[k] = tag_dict.get(v)
+ if f.get('vcodec') != 'none' and not f.get('dynamic_range'):
+ f['dynamic_range'] = tag_dict.get('dynamic_range')
+ if f.get('acodec') != 'none' and not f.get('audio_channels'):
+ f['audio_channels'] = {
+ 'stereo': 2,
+ 'dolby51': 6,
+ }.get(tag_dict.get('audio_channel'))
+ f['format_note'] = join_nonempty(
+ tag_dict.get('ladder'),
+ tag_dict.get('audio_channel') if f.get('acodec') != 'none' else None,
+ f.get('format_note'),
+ delim=', ')
+
+ formats.extend(current_formats)
+ subs = self._merge_subtitles(subs, current_subs)
+
+ if not formats and geo_restricted:
+ self.raise_geo_restricted(countries=['IN'], metadata_available=True)
+ self._remove_duplicate_formats(formats)
+ for f in formats:
+ f.setdefault('http_headers', {}).update(headers)
+
+ return {
+ 'id': video_id,
+ 'title': video_data.get('title'),
+ 'description': video_data.get('description'),
+ 'duration': int_or_none(video_data.get('duration')),
+ 'timestamp': int_or_none(traverse_obj(video_data, 'broadcastDate', 'startDate')),
+ 'release_year': int_or_none(video_data.get('year')),
+ 'formats': formats,
+ 'subtitles': subs,
+ 'channel': video_data.get('channelName'),
+ 'channel_id': str_or_none(video_data.get('channelId')),
+ 'series': video_data.get('showName'),
+ 'season': video_data.get('seasonName'),
+ 'season_number': int_or_none(video_data.get('seasonNo')),
+ 'season_id': str_or_none(video_data.get('seasonId')),
+ 'episode': video_data.get('title'),
+ 'episode_number': int_or_none(video_data.get('episodeNo')),
+ }
+
+
+class HotStarPrefixIE(InfoExtractor):
+ """ The "hotstar:" prefix is no longer in use, but this is kept for backward compatibility """
+ IE_DESC = False
+ _VALID_URL = r'hotstar:(?:(?P<type>\w+):)?(?P<id>\d+)$'
+ _TESTS = [{
+ 'url': 'hotstar:1000076273',
+ 'only_matching': True,
+ }, {
+ 'url': 'hotstar:movies:1260009879',
+ 'info_dict': {
+ 'id': '1260009879',
+ 'ext': 'mp4',
+ 'title': 'Nuvvu Naaku Nachav',
+ 'description': 'md5:d43701b1314e6f8233ce33523c043b7d',
+ 'timestamp': 1567525674,
+ 'upload_date': '20190903',
+ 'duration': 10787,
+ 'episode': 'Nuvvu Naaku Nachav',
+ },
+ }, {
+ 'url': 'hotstar:episode:1000234847',
+ 'only_matching': True,
+ }, {
+ # contentData
+ 'url': 'hotstar:sports:1260065956',
+ 'only_matching': True,
+ }, {
+ # contentData
+ 'url': 'hotstar:sports:1260066104',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id, video_type = self._match_valid_url(url).group('id', 'type')
+ return self.url_result(HotStarIE._video_url(video_id, video_type), HotStarIE, video_id)
+
+
+class HotStarPlaylistIE(HotStarBaseIE):
+ IE_NAME = 'hotstar:playlist'
+ _VALID_URL = r'https?://(?:www\.)?hotstar\.com(?:/in)?/(?:tv|shows)(?:/[^/]+){2}/list/[^/]+/t-(?P<id>\w+)'
+ _TESTS = [{
+ 'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/popular-clips/t-3_2_26',
+ 'info_dict': {
+ 'id': '3_2_26',
+ },
+ 'playlist_mincount': 20,
+ }, {
+ 'url': 'https://www.hotstar.com/shows/savdhaan-india/s-26/list/popular-clips/t-3_2_26',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/extras/t-2480',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.hotstar.com/in/tv/karthika-deepam/15457/list/popular-clips/t-3_2_1272',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ id_ = self._match_id(url)
+ return self.playlist_result(
+ self._playlist_entries('tray/find', id_, query={'tas': 10000, 'uqId': id_}), id_)
+
+
+class HotStarSeasonIE(HotStarBaseIE):
+ IE_NAME = 'hotstar:season'
+ _VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com(?:/in)?/(?:tv|shows)/[^/]+/\w+)/seasons/[^/]+/ss-(?P<id>\w+)'
+ _TESTS = [{
+ 'url': 'https://www.hotstar.com/tv/radhakrishn/1260000646/seasons/season-2/ss-8028',
+ 'info_dict': {
+ 'id': '8028',
+ },
+ 'playlist_mincount': 35,
+ }, {
+ 'url': 'https://www.hotstar.com/in/tv/ishqbaaz/9567/seasons/season-2/ss-4357',
+ 'info_dict': {
+ 'id': '4357',
+ },
+ 'playlist_mincount': 30,
+ }, {
+ 'url': 'https://www.hotstar.com/in/tv/bigg-boss/14714/seasons/season-4/ss-8208/',
+ 'info_dict': {
+ 'id': '8208',
+ },
+ 'playlist_mincount': 19,
+ }, {
+ 'url': 'https://www.hotstar.com/in/shows/bigg-boss/14714/seasons/season-4/ss-8208/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ url, season_id = self._match_valid_url(url).groups()
+ return self.playlist_result(self._playlist_entries(
+ 'season/asset', season_id, url, query={'tao': 0, 'tas': 0, 'size': 10000, 'id': season_id}), season_id)
+
+
+class HotStarSeriesIE(HotStarBaseIE):
+ IE_NAME = 'hotstar:series'
+ _VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com(?:/in)?/(?:tv|shows)/[^/]+/(?P<id>\d+))/?(?:[#?]|$)'
+ _TESTS = [{
+ 'url': 'https://www.hotstar.com/in/tv/radhakrishn/1260000646',
+ 'info_dict': {
+ 'id': '1260000646',
+ },
+ 'playlist_mincount': 690,
+ }, {
+ 'url': 'https://www.hotstar.com/tv/dancee-/1260050431',
+ 'info_dict': {
+ 'id': '1260050431',
+ },
+ 'playlist_mincount': 43,
+ }, {
+ 'url': 'https://www.hotstar.com/in/tv/mahabharat/435/',
+ 'info_dict': {
+ 'id': '435',
+ },
+ 'playlist_mincount': 267,
+ }, {
+ 'url': 'https://www.hotstar.com/in/shows/anupama/1260022017/',
+ 'info_dict': {
+ 'id': '1260022017',
+ },
+ 'playlist_mincount': 940,
+ }]
+
+ def _real_extract(self, url):
+ url, series_id = self._match_valid_url(url).groups()
+ id_ = self._call_api_v1(
+ 'show/detail', series_id, query={'contentId': series_id})['body']['results']['item']['id']
+
+ return self.playlist_result(self._playlist_entries(
+ 'tray/g/1/items', series_id, url, query={'tao': 0, 'tas': 10000, 'etid': 0, 'eid': id_}), series_id)
diff --git a/yt_dlp/extractor/hrefli.py b/yt_dlp/extractor/hrefli.py
new file mode 100644
index 0000000..77db2ea
--- /dev/null
+++ b/yt_dlp/extractor/hrefli.py
@@ -0,0 +1,15 @@
+from .common import InfoExtractor
+
+
+class HrefLiRedirectIE(InfoExtractor):
+ IE_NAME = 'href.li'
+ IE_DESC = False # Do not list
+ _VALID_URL = r'https?://href\.li/\?(?P<url>.+)'
+
+ _TESTS = [{
+ 'url': 'https://href.li/?https://www.reddit.com/r/cats/comments/12bluel/my_cat_helps_me_with_water/?utm_source=share&utm_medium=android_app&utm_name=androidcss&utm_term=1&utm_content=share_button',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ return self.url_result(self._match_valid_url(url).group('url'))
diff --git a/yt_dlp/extractor/hrfensehen.py b/yt_dlp/extractor/hrfensehen.py
new file mode 100644
index 0000000..35e9f67
--- /dev/null
+++ b/yt_dlp/extractor/hrfensehen.py
@@ -0,0 +1,90 @@
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ traverse_obj,
+ try_call,
+ unescapeHTML,
+ unified_timestamp,
+)
+
+
+class HRFernsehenIE(InfoExtractor):
+ IE_NAME = 'hrfernsehen'
+ _VALID_URL = r'^https?://www\.(?:hr-fernsehen|hessenschau)\.de/.*,video-(?P<id>[0-9]{6})\.html'
+ _TESTS = [{
+ 'url': 'https://www.hessenschau.de/tv-sendung/hessenschau-vom-26082020,video-130546.html',
+ 'md5': '5c4e0ba94677c516a2f65a84110fc536',
+ 'info_dict': {
+ 'id': '130546',
+ 'ext': 'mp4',
+ 'description': 'Sturmtief Kirsten fegt über Hessen / Die Corona-Pandemie – eine Chronologie / '
+ 'Sterbehilfe: Die Lage in Hessen / Miss Hessen leitet zwei eigene Unternehmen / '
+ 'Pop-Up Museum zeigt Schwarze Unterhaltung und Black Music',
+ 'subtitles': {'de': [{
+ 'url': 'https://hr-a.akamaihd.net/video/as/hessenschau/2020_08/hrLogo_200826200407_L385592_512x288-25p-500kbit.vtt'
+ }]},
+ 'timestamp': 1598400000,
+ 'upload_date': '20200826',
+ 'thumbnail': 'https://www.hessenschau.de/tv-sendung/hs_ganz-1554~_t-1598465545029_v-16to9.jpg',
+ 'title': 'hessenschau vom 26.08.2020',
+ 'duration': 1654
+ }
+ }, {
+ 'url': 'https://www.hr-fernsehen.de/sendungen-a-z/mex/sendungen/fair-und-gut---was-hinter-aldis-eigenem-guetesiegel-steckt,video-130544.html',
+ 'only_matching': True
+ }]
+
+ _GEO_COUNTRIES = ['DE']
+
+ def extract_formats(self, loader_data):
+ stream_formats = []
+ data = loader_data['mediaCollection']['streams'][0]['media']
+ for inner in data[1:]:
+ stream_format = {
+ 'format_id': try_call(lambda: f'{inner["maxHResolutionPx"]}p'),
+ 'height': inner.get('maxHResolutionPx'),
+ 'url': inner['url'],
+ }
+
+ quality_information = re.search(r'([0-9]{3,4})x([0-9]{3,4})-([0-9]{2})p-([0-9]{3,4})kbit',
+ inner['url'])
+ if quality_information:
+ stream_format['width'] = int_or_none(quality_information.group(1))
+ stream_format['height'] = int_or_none(quality_information.group(2))
+ stream_format['fps'] = int_or_none(quality_information.group(3))
+ stream_format['tbr'] = int_or_none(quality_information.group(4))
+
+ stream_formats.append(stream_format)
+ return stream_formats
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_meta(
+ ['og:title', 'twitter:title', 'name'], webpage)
+ description = self._html_search_meta(
+ ['description'], webpage)
+
+ loader_str = unescapeHTML(self._search_regex(r"data-(?:new-)?hr-mediaplayer-loader='([^']*)'", webpage, 'ardloader'))
+ loader_data = json.loads(loader_str)
+
+ subtitle = traverse_obj(loader_data, ('mediaCollection', 'subTitles', 0, 'sources', 0, 'url'))
+
+ info = {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'formats': self.extract_formats(loader_data),
+ 'subtitles': {'de': [{'url': subtitle}]},
+ 'timestamp': unified_timestamp(self._search_regex(
+ r'<time\sdatetime="(\d{4}\W\d{1,2}\W\d{1,2})', webpage, 'datetime', fatal=False)),
+ 'duration': int_or_none(traverse_obj(
+ loader_data, ('playerConfig', 'pluginData', 'trackingAti@all', 'richMedia', 'duration'))),
+ 'thumbnail': self._search_regex(r'thumbnailUrl\W*([^"]+)', webpage, 'thumbnail', default=None),
+ }
+
+ return info
diff --git a/yt_dlp/extractor/hrti.py b/yt_dlp/extractor/hrti.py
new file mode 100644
index 0000000..57b76e4
--- /dev/null
+++ b/yt_dlp/extractor/hrti.py
@@ -0,0 +1,200 @@
+import json
+
+from .common import InfoExtractor
+from ..networking import Request
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ clean_html,
+ ExtractorError,
+ int_or_none,
+ parse_age_limit,
+ try_get,
+)
+
+
+class HRTiBaseIE(InfoExtractor):
+ """
+ Base Information Extractor for Croatian Radiotelevision
+ video on demand site https://hrti.hrt.hr
+ Reverse engineered from the JavaScript app in app.min.js
+ """
+ _NETRC_MACHINE = 'hrti'
+
+ _APP_LANGUAGE = 'hr'
+ _APP_VERSION = '1.1'
+ _APP_PUBLICATION_ID = 'all_in_one'
+ _API_URL = 'http://clientapi.hrt.hr/client_api.php/config/identify/format/json'
+ _token = None
+
+ def _initialize_pre_login(self):
+ init_data = {
+ 'application_publication_id': self._APP_PUBLICATION_ID
+ }
+
+ uuid = self._download_json(
+ self._API_URL, None, note='Downloading uuid',
+ errnote='Unable to download uuid',
+ data=json.dumps(init_data).encode('utf-8'))['uuid']
+
+ app_data = {
+ 'uuid': uuid,
+ 'application_publication_id': self._APP_PUBLICATION_ID,
+ 'application_version': self._APP_VERSION
+ }
+
+ req = Request(self._API_URL, data=json.dumps(app_data).encode('utf-8'))
+ req.get_method = lambda: 'PUT'
+
+ resources = self._download_json(
+ req, None, note='Downloading session information',
+ errnote='Unable to download session information')
+
+ self._session_id = resources['session_id']
+
+ modules = resources['modules']
+
+ self._search_url = modules['vod_catalog']['resources']['search']['uri'].format(
+ language=self._APP_LANGUAGE,
+ application_id=self._APP_PUBLICATION_ID)
+
+ self._login_url = (modules['user']['resources']['login']['uri']
+ + '/format/json').format(session_id=self._session_id)
+
+ self._logout_url = modules['user']['resources']['logout']['uri']
+
+ def _perform_login(self, username, password):
+ auth_data = {
+ 'username': username,
+ 'password': password,
+ }
+
+ try:
+ auth_info = self._download_json(
+ self._login_url, None, note='Logging in', errnote='Unable to log in',
+ data=json.dumps(auth_data).encode('utf-8'))
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 406:
+ auth_info = self._parse_json(e.cause.response.read().encode('utf-8'), None)
+ else:
+ raise
+
+ error_message = auth_info.get('error', {}).get('message')
+ if error_message:
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, error_message),
+ expected=True)
+
+ self._token = auth_info['secure_streaming_token']
+
+ def _real_initialize(self):
+ if not self._token:
+ # TODO: figure out authentication with cookies
+ self.raise_login_required(method='password')
+
+
+class HRTiIE(HRTiBaseIE):
+ _VALID_URL = r'''(?x)
+ (?:
+ hrti:(?P<short_id>[0-9]+)|
+ https?://
+ hrti\.hrt\.hr/(?:\#/)?video/show/(?P<id>[0-9]+)/(?P<display_id>[^/]+)?
+ )
+ '''
+ _TESTS = [{
+ 'url': 'https://hrti.hrt.hr/#/video/show/2181385/republika-dokumentarna-serija-16-hd',
+ 'info_dict': {
+ 'id': '2181385',
+ 'display_id': 'republika-dokumentarna-serija-16-hd',
+ 'ext': 'mp4',
+ 'title': 'REPUBLIKA, dokumentarna serija (1/6) (HD)',
+ 'description': 'md5:48af85f620e8e0e1df4096270568544f',
+ 'duration': 2922,
+ 'view_count': int,
+ 'average_rating': int,
+ 'episode_number': int,
+ 'season_number': int,
+ 'age_limit': 12,
+ },
+ 'skip': 'Requires account credentials',
+ }, {
+ 'url': 'https://hrti.hrt.hr/#/video/show/2181385/',
+ 'only_matching': True,
+ }, {
+ 'url': 'hrti:2181385',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://hrti.hrt.hr/video/show/3873068/cuvar-dvorca-dramska-serija-14',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('short_id') or mobj.group('id')
+ display_id = mobj.group('display_id') or video_id
+
+ video = self._download_json(
+ '%s/video_id/%s/format/json' % (self._search_url, video_id),
+ display_id, 'Downloading video metadata JSON')['video'][0]
+
+ title_info = video['title']
+ title = title_info['title_long']
+
+ movie = video['video_assets']['movie'][0]
+ m3u8_url = movie['url'].format(TOKEN=self._token)
+ formats = self._extract_m3u8_formats(
+ m3u8_url, display_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls')
+
+ description = clean_html(title_info.get('summary_long'))
+ age_limit = parse_age_limit(video.get('parental_control', {}).get('rating'))
+ view_count = int_or_none(video.get('views'))
+ average_rating = int_or_none(video.get('user_rating'))
+ duration = int_or_none(movie.get('duration'))
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'average_rating': average_rating,
+ 'age_limit': age_limit,
+ 'formats': formats,
+ }
+
+
+class HRTiPlaylistIE(HRTiBaseIE):
+ _VALID_URL = r'https?://hrti\.hrt\.hr/(?:#/)?video/list/category/(?P<id>[0-9]+)/(?P<display_id>[^/]+)?'
+ _TESTS = [{
+ 'url': 'https://hrti.hrt.hr/#/video/list/category/212/ekumena',
+ 'info_dict': {
+ 'id': '212',
+ 'title': 'ekumena',
+ },
+ 'playlist_mincount': 8,
+ 'skip': 'Requires account credentials',
+ }, {
+ 'url': 'https://hrti.hrt.hr/#/video/list/category/212/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://hrti.hrt.hr/video/list/category/212/ekumena',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ category_id = mobj.group('id')
+ display_id = mobj.group('display_id') or category_id
+
+ response = self._download_json(
+ '%s/category_id/%s/format/json' % (self._search_url, category_id),
+ display_id, 'Downloading video metadata JSON')
+
+ video_ids = try_get(
+ response, lambda x: x['video_listings'][0]['alternatives'][0]['list'],
+ list) or [video['id'] for video in response.get('videos', []) if video.get('id')]
+
+ entries = [self.url_result('hrti:%s' % video_id) for video_id in video_ids]
+
+ return self.playlist_result(entries, category_id, display_id)
diff --git a/yt_dlp/extractor/hse.py b/yt_dlp/extractor/hse.py
new file mode 100644
index 0000000..3cb21d2
--- /dev/null
+++ b/yt_dlp/extractor/hse.py
@@ -0,0 +1,93 @@
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ traverse_obj,
+ unified_timestamp,
+)
+
+
+class HSEShowBaseInfoExtractor(InfoExtractor):
+ _GEO_COUNTRIES = ['DE']
+
+ def _extract_redux_data(self, url, video_id):
+ webpage = self._download_webpage(url, video_id)
+ redux = self._html_search_regex(
+ r'window\.__REDUX_DATA__\s*=\s*({.*});?', webpage, 'redux data')
+ return self._parse_json(redux.replace('\n', ''), video_id)
+
+ def _extract_formats_and_subtitles(self, sources, video_id):
+ if not sources:
+ raise ExtractorError('No video found', expected=True, video_id=video_id)
+ formats, subtitles = [], {}
+ for src in sources:
+ if src['mimetype'] != 'application/x-mpegURL':
+ continue
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(src['url'], video_id, ext='mp4')
+ formats.extend(fmts)
+ subtitles = self._merge_subtitles(subtitles, subs)
+ return formats, subtitles
+
+
+class HSEShowIE(HSEShowBaseInfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?hse\.de/dpl/c/tv-shows/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://www.hse.de/dpl/c/tv-shows/505350',
+ 'info_dict': {
+ 'id': '505350',
+ 'ext': 'mp4',
+ 'title': 'Pfeffinger Mode & Accessoires',
+ 'timestamp': 1638810000,
+ 'upload_date': '20211206',
+ 'channel': 'HSE24',
+ 'uploader': 'Arina Pirayesh'
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ json_data = self._extract_redux_data(url, video_id)
+ formats, subtitles = self._extract_formats_and_subtitles(
+ traverse_obj(json_data, ('tvShowPage', 'tvShowVideo', 'sources')), video_id)
+
+ show = traverse_obj(json_data, ('tvShowPage', 'tvShow')) or {}
+ return {
+ 'id': video_id,
+ 'title': show.get('title') or video_id,
+ 'formats': formats,
+ 'timestamp': unified_timestamp(f'{show.get("date")} {show.get("hour")}:00'),
+ 'thumbnail': traverse_obj(json_data, ('tvShowVideo', 'poster')),
+ 'channel': self._search_regex(
+ r'tvShow \| ([A-Z0-9]+)_', show.get('actionFieldText') or '', video_id, fatal=False),
+ 'uploader': show.get('presenter'),
+ 'subtitles': subtitles,
+ }
+
+
+class HSEProductIE(HSEShowBaseInfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?hse\.de/dpl/p/product/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://www.hse.de/dpl/p/product/408630',
+ 'info_dict': {
+ 'id': '408630',
+ 'ext': 'mp4',
+ 'title': 'Hose im Ponte-Mix',
+ 'uploader': 'Judith Williams'
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ json_data = self._extract_redux_data(url, video_id)
+ video = traverse_obj(json_data, ('productContent', 'productContent', 'videos', 0)) or {}
+ formats, subtitles = self._extract_formats_and_subtitles(video.get('sources'), video_id)
+
+ return {
+ 'id': video_id,
+ 'title': traverse_obj(json_data, ('productDetail', 'product', 'name', 'short')) or video_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnail': video.get('poster'),
+ 'uploader': traverse_obj(json_data, ('productDetail', 'product', 'brand', 'brandName')),
+ }
diff --git a/yt_dlp/extractor/huajiao.py b/yt_dlp/extractor/huajiao.py
new file mode 100644
index 0000000..c498fa3
--- /dev/null
+++ b/yt_dlp/extractor/huajiao.py
@@ -0,0 +1,53 @@
+from .common import InfoExtractor
+from ..utils import (
+ parse_duration,
+ parse_iso8601,
+)
+
+
+class HuajiaoIE(InfoExtractor):
+ IE_DESC = '花椒直播'
+ _VALID_URL = r'https?://(?:www\.)?huajiao\.com/l/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://www.huajiao.com/l/38941232',
+ 'md5': 'd08bf9ac98787d24d1e4c0283f2d372d',
+ 'info_dict': {
+ 'id': '38941232',
+ 'ext': 'mp4',
+ 'title': '#新人求关注#',
+ 'description': 're:.*',
+ 'duration': 2424.0,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1475866459,
+ 'upload_date': '20161007',
+ 'uploader': 'Penny_余姿昀',
+ 'uploader_id': '75206005',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ feed_json = self._search_regex(
+ r'var\s+feed\s*=\s*({.+})', webpage, 'feed json')
+ feed = self._parse_json(feed_json, video_id)
+
+ description = self._html_search_meta(
+ 'description', webpage, 'description', fatal=False)
+
+ def get(section, field):
+ return feed.get(section, {}).get(field)
+
+ return {
+ 'id': video_id,
+ 'title': feed['feed']['formated_title'],
+ 'description': description,
+ 'duration': parse_duration(get('feed', 'duration')),
+ 'thumbnail': get('feed', 'image'),
+ 'timestamp': parse_iso8601(feed.get('creatime'), ' '),
+ 'uploader': get('author', 'nickname'),
+ 'uploader_id': get('author', 'uid'),
+ 'formats': self._extract_m3u8_formats(
+ feed['feed']['m3u8'], video_id, 'mp4', 'm3u8_native'),
+ }
diff --git a/yt_dlp/extractor/huffpost.py b/yt_dlp/extractor/huffpost.py
new file mode 100644
index 0000000..69fdc34
--- /dev/null
+++ b/yt_dlp/extractor/huffpost.py
@@ -0,0 +1,90 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ parse_duration,
+ unified_strdate,
+)
+
+
+class HuffPostIE(InfoExtractor):
+ IE_DESC = 'Huffington Post'
+ _VALID_URL = r'''(?x)
+ https?://(embed\.)?live\.huffingtonpost\.com/
+ (?:
+ r/segment/[^/]+/|
+ HPLEmbedPlayer/\?segmentId=
+ )
+ (?P<id>[0-9a-f]+)'''
+ _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1']
+
+ _TEST = {
+ 'url': 'http://live.huffingtonpost.com/r/segment/legalese-it/52dd3e4b02a7602131000677',
+ 'md5': '55f5e8981c1c80a64706a44b74833de8',
+ 'info_dict': {
+ 'id': '52dd3e4b02a7602131000677',
+ 'ext': 'mp4',
+ 'title': 'Legalese It! with @MikeSacksHP',
+ 'description': 'This week on Legalese It, Mike talks to David Bosco about his new book on the ICC, "Rough Justice," he also discusses the Virginia AG\'s historic stance on gay marriage, the execution of Edgar Tamayo, the ICC\'s delay of Kenya\'s President and more. ',
+ 'duration': 1549,
+ 'upload_date': '20140124',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['HTTP Error 404: Not Found'],
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ api_url = 'http://embed.live.huffingtonpost.com/api/segments/%s.json' % video_id
+ data = self._download_json(api_url, video_id)['data']
+
+ video_title = data['title']
+ duration = parse_duration(data.get('running_time'))
+ upload_date = unified_strdate(
+ data.get('schedule', {}).get('starts_at') or data.get('segment_start_date_time'))
+ description = data.get('description')
+
+ thumbnails = []
+ for url in filter(None, data['images'].values()):
+ m = re.match(r'.*-([0-9]+x[0-9]+)\.', url)
+ if not m:
+ continue
+ thumbnails.append({
+ 'url': url,
+ 'resolution': m.group(1),
+ })
+
+ formats = []
+ sources = data.get('sources', {})
+ live_sources = list(sources.get('live', {}).items()) + list(sources.get('live_again', {}).items())
+ for key, url in live_sources:
+ ext = determine_ext(url)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ url, video_id, ext='mp4', m3u8_id='hls', fatal=False))
+ elif ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ url + '?hdcore=2.9.5', video_id, f4m_id='hds', fatal=False))
+ else:
+ formats.append({
+ 'format': key,
+ 'format_id': key.replace('/', '.'),
+ 'ext': 'mp4',
+ 'url': url,
+ 'vcodec': 'none' if key.startswith('audio/') else None,
+ })
+
+ return {
+ 'id': video_id,
+ 'title': video_title,
+ 'description': description,
+ 'formats': formats,
+ 'duration': duration,
+ 'upload_date': upload_date,
+ 'thumbnails': thumbnails,
+ }
diff --git a/yt_dlp/extractor/hungama.py b/yt_dlp/extractor/hungama.py
new file mode 100644
index 0000000..7da8aad
--- /dev/null
+++ b/yt_dlp/extractor/hungama.py
@@ -0,0 +1,201 @@
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ remove_end,
+ traverse_obj,
+ try_get,
+ unified_timestamp,
+ url_or_none,
+ urlencode_postdata,
+)
+
+
+class HungamaBaseIE(InfoExtractor):
+ def _call_api(self, path, content_id, fatal=False):
+ return traverse_obj(self._download_json(
+ f'https://cpage.api.hungama.com/v2/page/content/{content_id}/{path}/detail',
+ content_id, fatal=fatal, query={
+ 'device': 'web',
+ 'platform': 'a',
+ 'storeId': '1',
+ }), ('data', {dict})) or {}
+
+
+class HungamaIE(HungamaBaseIE):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.|un\.)?hungama\.com/
+ (?:
+ (?:video|movie|short-film)/[^/]+/|
+ tv-show/(?:[^/]+/){2}\d+/episode/[^/]+/
+ )
+ (?P<id>\d+)
+ '''
+ _TESTS = [{
+ 'url': 'http://www.hungama.com/video/krishna-chants/39349649/',
+ 'md5': '687c5f1e9f832f3b59f44ed0eb1f120a',
+ 'info_dict': {
+ 'id': '39349649',
+ 'ext': 'mp4',
+ 'title': 'Krishna Chants',
+ 'description': ' ',
+ 'upload_date': '20180829',
+ 'duration': 264,
+ 'timestamp': 1535500800,
+ 'view_count': int,
+ 'thumbnail': 'https://images1.hungama.com/tr:n-a_169_m/c/1/0dc/2ca/39349649/39349649_350x197.jpg?v=8',
+ 'tags': 'count:6',
+ },
+ }, {
+ 'url': 'https://un.hungama.com/short-film/adira/102524179/',
+ 'md5': '2278463f5dc9db9054d0c02602d44666',
+ 'info_dict': {
+ 'id': '102524179',
+ 'ext': 'mp4',
+ 'title': 'Adira',
+ 'description': 'md5:df20cd4d41eabb33634f06de1025a4b4',
+ 'upload_date': '20230417',
+ 'timestamp': 1681689600,
+ 'view_count': int,
+ 'thumbnail': 'https://images1.hungama.com/tr:n-a_23_m/c/1/197/ac9/102524179/102524179_350x525.jpg?v=1',
+ 'tags': 'count:7',
+ },
+ }, {
+ 'url': 'https://www.hungama.com/movie/kahaani-2/44129919/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.hungama.com/tv-show/padded-ki-pushup/season-1/44139461/episode/ep-02-training-sasu-pathlaag-karing/44139503/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video_json = self._download_json(
+ 'https://www.hungama.com/index.php', video_id,
+ data=urlencode_postdata({'content_id': video_id}), headers={
+ 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
+ 'X-Requested-With': 'XMLHttpRequest',
+ }, query={
+ 'c': 'common',
+ 'm': 'get_video_mdn_url',
+ })
+ formats = self._extract_m3u8_formats(video_json['stream_url'], video_id, ext='mp4', m3u8_id='hls')
+ metadata = self._call_api('movie', video_id)
+
+ return {
+ **traverse_obj(metadata, ('head', 'data', {
+ 'title': ('title', {str}),
+ 'description': ('misc', 'description', {str}),
+ 'duration': ('duration', {int}), # duration in JSON is incorrect if string
+ 'timestamp': ('releasedate', {unified_timestamp}),
+ 'view_count': ('misc', 'playcount', {int_or_none}),
+ 'thumbnail': ('image', {url_or_none}),
+ 'tags': ('misc', 'keywords', ..., {str}),
+ })),
+ 'id': video_id,
+ 'formats': formats,
+ 'subtitles': {
+ 'en': [{
+ 'url': video_json['sub_title'],
+ 'ext': 'vtt',
+ }]
+ } if video_json.get('sub_title') else None,
+ }
+
+
+class HungamaSongIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.|un\.)?hungama\.com/song/[^/]+/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.hungama.com/song/kitni-haseen-zindagi/2931166/',
+ 'md5': '964f46828e8b250aa35e5fdcfdcac367',
+ 'info_dict': {
+ 'id': '2931166',
+ 'ext': 'mp3',
+ 'title': 'Lucky Ali - Kitni Haseen Zindagi',
+ 'track': 'Kitni Haseen Zindagi',
+ 'artist': 'Lucky Ali',
+ 'release_year': 2000,
+ 'thumbnail': 'https://stat2.hungama.ind.in/assets/images/default_images/da-200x200.png',
+ },
+ }, {
+ 'url': 'https://un.hungama.com/song/tum-kya-mile-from-rocky-aur-rani-kii-prem-kahaani/103553672',
+ 'md5': '964f46828e8b250aa35e5fdcfdcac367',
+ 'info_dict': {
+ 'id': '103553672',
+ 'ext': 'mp3',
+ 'title': 'md5:5ebeb1e10771b634ce5f700ce68ae5f4',
+ 'track': 'Tum Kya Mile (From "Rocky Aur Rani Kii Prem Kahaani")',
+ 'artist': 'Pritam Chakraborty, Arijit Singh, Shreya Ghoshal, Amitabh Bhattacharya',
+ 'album': 'Tum Kya Mile (From "Rocky Aur Rani Kii Prem Kahaani")',
+ 'release_year': 2023,
+ 'thumbnail': 'https://images.hungama.com/c/1/7c2/c7b/103553671/103553671_200x200.jpg',
+ },
+ }]
+
+ def _real_extract(self, url):
+ audio_id = self._match_id(url)
+
+ data = self._download_json(
+ 'https://www.hungama.com/audio-player-data/track/%s' % audio_id,
+ audio_id, query={'_country': 'IN'})[0]
+ track = data['song_name']
+ artist = data.get('singer_name')
+ formats = []
+ media_json = self._download_json(data.get('file') or data['preview_link'], audio_id)
+ media_url = try_get(media_json, lambda x: x['response']['media_url'], str)
+ media_type = try_get(media_json, lambda x: x['response']['type'], str)
+
+ if media_url:
+ formats.append({
+ 'url': media_url,
+ 'ext': media_type,
+ 'vcodec': 'none',
+ 'acodec': media_type,
+ })
+
+ title = '%s - %s' % (artist, track) if artist else track
+ thumbnail = data.get('img_src') or data.get('album_image')
+
+ return {
+ 'id': audio_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'track': track,
+ 'artist': artist,
+ 'album': data.get('album_name') or None,
+ 'release_year': int_or_none(data.get('date')),
+ 'formats': formats,
+ }
+
+
+class HungamaAlbumPlaylistIE(HungamaBaseIE):
+ _VALID_URL = r'https?://(?:www\.|un\.)?hungama\.com/(?P<path>playlists|album)/[^/]+/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.hungama.com/album/bhuj-the-pride-of-india/69481490/',
+ 'playlist_mincount': 7,
+ 'info_dict': {
+ 'id': '69481490',
+ },
+ }, {
+ 'url': 'https://www.hungama.com/playlists/hindi-jan-to-june-2021/123063/',
+ 'playlist_mincount': 33,
+ 'info_dict': {
+ 'id': '123063',
+ },
+ }, {
+ 'url': 'https://un.hungama.com/album/what-jhumka-%3F-from-rocky-aur-rani-kii-prem-kahaani/103891805/',
+ 'playlist_mincount': 1,
+ 'info_dict': {
+ 'id': '103891805',
+ },
+ }]
+
+ def _real_extract(self, url):
+ playlist_id, path = self._match_valid_url(url).group('id', 'path')
+ data = self._call_api(remove_end(path, 's'), playlist_id, fatal=True)
+
+ def entries():
+ for song_url in traverse_obj(data, ('body', 'rows', ..., 'data', 'misc', 'share', {url_or_none})):
+ yield self.url_result(song_url, HungamaSongIE)
+
+ return self.playlist_result(entries(), playlist_id)
diff --git a/yt_dlp/extractor/huya.py b/yt_dlp/extractor/huya.py
new file mode 100644
index 0000000..c4965f9
--- /dev/null
+++ b/yt_dlp/extractor/huya.py
@@ -0,0 +1,134 @@
+import hashlib
+import random
+import re
+
+from ..compat import compat_urlparse, compat_b64decode
+
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ str_or_none,
+ try_get,
+ unescapeHTML,
+ update_url_query,
+)
+
+from .common import InfoExtractor
+
+
+class HuyaLiveIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.|m\.)?huya\.com/(?P<id>[^/#?&]+)(?:\D|$)'
+ IE_NAME = 'huya:live'
+ IE_DESC = 'huya.com'
+ TESTS = [{
+ 'url': 'https://www.huya.com/572329',
+ 'info_dict': {
+ 'id': '572329',
+ 'title': str,
+ 'description': str,
+ 'is_live': True,
+ 'view_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.huya.com/xiaoyugame',
+ 'only_matching': True
+ }]
+
+ _RESOLUTION = {
+ '蓝光': {
+ 'width': 1920,
+ 'height': 1080,
+ },
+ '超清': {
+ 'width': 1280,
+ 'height': 720,
+ },
+ '流畅': {
+ 'width': 800,
+ 'height': 480
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id=video_id)
+ stream_data = self._search_json(r'stream:\s', webpage, 'stream', video_id=video_id, default=None)
+ room_info = try_get(stream_data, lambda x: x['data'][0]['gameLiveInfo'])
+ if not room_info:
+ raise ExtractorError('Can not extract the room info', expected=True)
+ title = room_info.get('roomName') or room_info.get('introduction') or self._html_extract_title(webpage)
+ screen_type = room_info.get('screenType')
+ live_source_type = room_info.get('liveSourceType')
+ stream_info_list = stream_data['data'][0]['gameStreamInfoList']
+ if not stream_info_list:
+ raise ExtractorError('Video is offline', expected=True)
+ formats = []
+ for stream_info in stream_info_list:
+ stream_url = stream_info.get('sFlvUrl')
+ if not stream_url:
+ continue
+ stream_name = stream_info.get('sStreamName')
+ re_secret = not screen_type and live_source_type in (0, 8, 13)
+ params = dict(compat_urlparse.parse_qsl(unescapeHTML(stream_info['sFlvAntiCode'])))
+ fm, ss = '', ''
+ if re_secret:
+ fm, ss = self.encrypt(params, stream_info, stream_name)
+ for si in stream_data.get('vMultiStreamInfo'):
+ display_name, bitrate = re.fullmatch(
+ r'(.+?)(?:(\d+)M)?', si.get('sDisplayName')).groups()
+ rate = si.get('iBitRate')
+ if rate:
+ params['ratio'] = rate
+ else:
+ params.pop('ratio', None)
+ if bitrate:
+ rate = int(bitrate) * 1000
+ if re_secret:
+ params['wsSecret'] = hashlib.md5(
+ '_'.join([fm, params['u'], stream_name, ss, params['wsTime']]))
+ formats.append({
+ 'ext': stream_info.get('sFlvUrlSuffix'),
+ 'format_id': str_or_none(stream_info.get('iLineIndex')),
+ 'tbr': rate,
+ 'url': update_url_query(f'{stream_url}/{stream_name}.{stream_info.get("sFlvUrlSuffix")}',
+ query=params),
+ **self._RESOLUTION.get(display_name, {}),
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'view_count': room_info.get('totalCount'),
+ 'thumbnail': room_info.get('screenshot'),
+ 'description': room_info.get('contentIntro'),
+ 'http_headers': {
+ 'Origin': 'https://www.huya.com',
+ 'Referer': 'https://www.huya.com/',
+ },
+ }
+
+ def encrypt(self, params, stream_info, stream_name):
+ ct = int_or_none(params.get('wsTime'), 16) + random.random()
+ presenter_uid = stream_info['lPresenterUid']
+ if not stream_name.startswith(str(presenter_uid)):
+ uid = presenter_uid
+ else:
+ uid = int_or_none(ct % 1e7 * 1e6 % 0xffffffff)
+ u1 = uid & 0xffffffff00000000
+ u2 = uid & 0xffffffff
+ u3 = uid & 0xffffff
+ u = u1 | u2 >> 24 | u3 << 8
+ params.update({
+ 'u': str_or_none(u),
+ 'seqid': str_or_none(int_or_none(ct * 1000) + uid),
+ 'ver': '1',
+ 'uuid': int_or_none(ct % 1e7 * 1e6 % 0xffffffff),
+ 't': '100',
+ })
+ fm = compat_b64decode(params['fm']).decode().split('_', 1)[0]
+ ss = hashlib.md5('|'.join([params['seqid'], params['ctype'], params['t']]))
+ return fm, ss
diff --git a/yt_dlp/extractor/hypem.py b/yt_dlp/extractor/hypem.py
new file mode 100644
index 0000000..54db7b3
--- /dev/null
+++ b/yt_dlp/extractor/hypem.py
@@ -0,0 +1,47 @@
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class HypemIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?hypem\.com/track/(?P<id>[0-9a-z]{5})'
+ _TEST = {
+ 'url': 'http://hypem.com/track/1v6ga/BODYWORK+-+TAME',
+ 'md5': 'b9cc91b5af8995e9f0c1cee04c575828',
+ 'info_dict': {
+ 'id': '1v6ga',
+ 'ext': 'mp3',
+ 'title': 'Tame',
+ 'uploader': 'BODYWORK',
+ 'timestamp': 1371810457,
+ 'upload_date': '20130621',
+ }
+ }
+
+ def _real_extract(self, url):
+ track_id = self._match_id(url)
+
+ response = self._download_webpage(url, track_id)
+
+ track = self._parse_json(self._html_search_regex(
+ r'(?s)<script\s+type="application/json"\s+id="displayList-data">(.+?)</script>',
+ response, 'tracks'), track_id)['tracks'][0]
+
+ track_id = track['id']
+ title = track['song']
+
+ final_url = self._download_json(
+ 'http://hypem.com/serve/source/%s/%s' % (track_id, track['key']),
+ track_id, 'Downloading metadata', headers={
+ 'Content-Type': 'application/json'
+ })['url']
+
+ return {
+ 'id': track_id,
+ 'url': final_url,
+ 'ext': 'mp3',
+ 'title': title,
+ 'uploader': track.get('artist'),
+ 'duration': int_or_none(track.get('time')),
+ 'timestamp': int_or_none(track.get('ts')),
+ 'track': title,
+ }
diff --git a/yt_dlp/extractor/hypergryph.py b/yt_dlp/extractor/hypergryph.py
new file mode 100644
index 0000000..96e452a
--- /dev/null
+++ b/yt_dlp/extractor/hypergryph.py
@@ -0,0 +1,32 @@
+from .common import InfoExtractor
+from ..utils import js_to_json, traverse_obj
+
+
+class MonsterSirenHypergryphMusicIE(InfoExtractor):
+ _VALID_URL = r'https?://monster-siren\.hypergryph\.com/music/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://monster-siren.hypergryph.com/music/514562',
+ 'info_dict': {
+ 'id': '514562',
+ 'ext': 'wav',
+ 'artists': ['塞壬唱片-MSR'],
+ 'album': 'Flame Shadow',
+ 'title': 'Flame Shadow',
+ }
+ }]
+
+ def _real_extract(self, url):
+ audio_id = self._match_id(url)
+ webpage = self._download_webpage(url, audio_id)
+ json_data = self._search_json(
+ r'window\.g_initialProps\s*=', webpage, 'data', audio_id, transform_source=js_to_json)
+
+ return {
+ 'id': audio_id,
+ 'title': traverse_obj(json_data, ('player', 'songDetail', 'name')),
+ 'url': traverse_obj(json_data, ('player', 'songDetail', 'sourceUrl')),
+ 'ext': 'wav',
+ 'vcodec': 'none',
+ 'artists': traverse_obj(json_data, ('player', 'songDetail', 'artists', ...)),
+ 'album': traverse_obj(json_data, ('musicPlay', 'albumDetail', 'name'))
+ }
diff --git a/yt_dlp/extractor/hytale.py b/yt_dlp/extractor/hytale.py
new file mode 100644
index 0000000..0f4dcc3
--- /dev/null
+++ b/yt_dlp/extractor/hytale.py
@@ -0,0 +1,58 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import traverse_obj
+
+
+class HytaleIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?hytale\.com/news/\d+/\d+/(?P<id>[a-z0-9-]+)'
+ _TESTS = [{
+ 'url': 'https://hytale.com/news/2021/07/summer-2021-development-update',
+ 'info_dict': {
+ 'id': 'summer-2021-development-update',
+ 'title': 'Summer 2021 Development Update',
+ },
+ 'playlist_count': 4,
+ 'playlist': [{
+ 'md5': '0854ebe347d233ee19b86ab7b2ead610',
+ 'info_dict': {
+ 'id': 'ed51a2609d21bad6e14145c37c334999',
+ 'ext': 'mp4',
+ 'title': 'Avatar Personalization',
+ 'thumbnail': r're:https://videodelivery\.net/\w+/thumbnails/thumbnail\.jpg',
+ }
+ }]
+ }, {
+ 'url': 'https://www.hytale.com/news/2019/11/hytale-graphics-update',
+ 'info_dict': {
+ 'id': 'hytale-graphics-update',
+ 'title': 'Hytale graphics update',
+ },
+ 'playlist_count': 2,
+ }]
+
+ def _real_initialize(self):
+ media_webpage = self._download_webpage(
+ 'https://hytale.com/media', None, note='Downloading list of media', fatal=False) or ''
+
+ clips_json = traverse_obj(
+ self._search_json(
+ r'window\.__INITIAL_COMPONENTS_STATE__\s*=\s*\[',
+ media_webpage, 'clips json', None),
+ ('media', 'clips')) or []
+
+ self._titles = {clip.get('src'): clip.get('caption') for clip in clips_json}
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ webpage = self._download_webpage(url, playlist_id)
+ entries = [
+ self.url_result(
+ f'https://cloudflarestream.com/{video_hash}/manifest/video.mpd?parentOrigin=https%3A%2F%2Fhytale.com',
+ title=self._titles.get(video_hash), url_transparent=True)
+ for video_hash in re.findall(
+ r'<stream\s+class\s*=\s*"ql-video\s+cf-stream"\s+src\s*=\s*"([a-f0-9]{32})"',
+ webpage)
+ ]
+
+ return self.playlist_result(entries, playlist_id, self._og_search_title(webpage))
diff --git a/yt_dlp/extractor/icareus.py b/yt_dlp/extractor/icareus.py
new file mode 100644
index 0000000..d081cf4
--- /dev/null
+++ b/yt_dlp/extractor/icareus.py
@@ -0,0 +1,179 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ determine_ext,
+ get_element_by_class,
+ int_or_none,
+ merge_dicts,
+ parse_bitrate,
+ parse_resolution,
+ remove_end,
+ str_or_none,
+ url_or_none,
+ urlencode_postdata,
+)
+
+
+class IcareusIE(InfoExtractor):
+ _DOMAINS = '|'.join(map(re.escape, (
+ 'asahitv.fi',
+ 'helsinkikanava.fi',
+ 'hyvinvointitv.fi',
+ 'inez.fi',
+ 'permanto.fi',
+ 'suite.icareus.com',
+ 'videos.minifiddlers.org',
+ )))
+ _VALID_URL = rf'(?P<base_url>https?://(?:www\.)?(?:{_DOMAINS}))/[^?#]+/player/[^?#]+\?(?:[^#]+&)?(?:assetId|eventId)=(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.helsinkikanava.fi/fi_FI/web/helsinkikanava/player/vod?assetId=68021894',
+ 'md5': 'ca0b62ffc814a5411dfa6349cf5adb8a',
+ 'info_dict': {
+ 'id': '68021894',
+ 'ext': 'mp4',
+ 'title': 'Perheiden parhaaksi',
+ 'description': 'md5:295785ea408e5ac00708766465cc1325',
+ 'thumbnail': 'https://www.helsinkikanava.fi/image/image_gallery?img_id=68022501',
+ 'upload_date': '20200924',
+ 'timestamp': 1600938300,
+ },
+ }, { # Recorded livestream
+ 'url': 'https://www.helsinkikanava.fi/fi/web/helsinkikanava/player/event/view?eventId=76241489',
+ 'md5': '014327e69dfa7b949fcc861f6d162d6d',
+ 'info_dict': {
+ 'id': '76258304',
+ 'ext': 'mp4',
+ 'title': 'Helsingin kaupungin ja HUSin tiedotustilaisuus koronaepidemiatilanteesta 24.11.2020',
+ 'description': 'md5:3129d041c6fbbcdc7fe68d9a938fef1c',
+ 'thumbnail': 'https://icareus-suite.secure2.footprint.net/image/image_gallery?img_id=76288630',
+ 'upload_date': '20201124',
+ 'timestamp': 1606206600,
+ },
+ }, { # Non-m3u8 stream
+ 'url': 'https://suite.icareus.com/fi/web/westend-indians/player/vod?assetId=47567389',
+ 'md5': '72fc04ee971bbedc44405cdf16c990b6',
+ 'info_dict': {
+ 'id': '47567389',
+ 'ext': 'mp4',
+ 'title': 'Omatoiminen harjoittelu - Laukominen',
+ 'description': '',
+ 'thumbnail': 'https://suite.icareus.com/image/image_gallery?img_id=47568162',
+ 'upload_date': '20200319',
+ 'timestamp': 1584658080,
+ },
+ }, {
+ 'url': 'https://asahitv.fi/fi/web/asahi/player/vod?assetId=89415818',
+ 'only_matching': True
+ }, {
+ 'url': 'https://hyvinvointitv.fi/fi/web/hyvinvointitv/player/vod?assetId=89149730',
+ 'only_matching': True
+ }, {
+ 'url': 'https://inez.fi/fi/web/inez-media/player/vod?assetId=71328822',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.permanto.fi/fi/web/alfatv/player/vod?assetId=135497515',
+ 'only_matching': True
+ }, {
+ 'url': 'https://videos.minifiddlers.org/web/international-minifiddlers/player/vod?assetId=1982759',
+ 'only_matching': True
+ }]
+
+ def _real_extract(self, url):
+ base_url, temp_id = self._match_valid_url(url).groups()
+ webpage = self._download_webpage(url, temp_id)
+
+ video_id = self._search_regex(r"_icareus\['itemId'\]\s*=\s*'(\d+)'", webpage, 'video_id')
+ organization_id = self._search_regex(r"_icareus\['organizationId'\]\s*=\s*'(\d+)'", webpage, 'organization_id')
+
+ assets = self._download_json(
+ self._search_regex(r'var\s+publishingServiceURL\s*=\s*"(http[^"]+)";', webpage, 'api_base'),
+ video_id, data=urlencode_postdata({
+ 'version': '03',
+ 'action': 'getAssetPlaybackUrls',
+ 'organizationId': organization_id,
+ 'assetId': video_id,
+ 'token': self._search_regex(r"_icareus\['token'\]\s*=\s*'([a-f0-9]+)'", webpage, 'icareus_token'),
+ }))
+
+ subtitles = {
+ remove_end(sdesc.split(' ')[0], ':'): [{'url': url_or_none(surl)}]
+ for _, sdesc, surl in assets.get('subtitles') or []
+ }
+
+ formats = [{
+ 'format': item.get('name'),
+ 'format_id': 'audio',
+ 'vcodec': 'none',
+ 'url': url_or_none(item['url']),
+ 'tbr': int_or_none(self._search_regex(
+ r'\((\d+)\s*k\)', item.get('name') or '', 'audio bitrate', default=None)),
+ } for item in assets.get('audio_urls') or [] if url_or_none(item.get('url'))]
+
+ for item in assets.get('urls') or []:
+ video_url = url_or_none(item.get('url'))
+ if video_url is None:
+ continue
+ ext = determine_ext(video_url)
+ if ext == 'm3u8':
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ video_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+ else:
+ fmt = item.get('name')
+ formats.append({
+ 'url': video_url,
+ 'format': fmt,
+ 'tbr': parse_bitrate(fmt),
+ 'format_id': str_or_none(item.get('id')),
+ **parse_resolution(fmt),
+ })
+
+ info, token, live_title = self._search_json_ld(webpage, video_id, default={}), None, None
+ if not info:
+ token = self._search_regex(
+ r'data\s*:\s*{action:"getAsset".*?token:\'([a-f0-9]+)\'}', webpage, 'token', default=None)
+ if not token:
+ live_title = get_element_by_class('unpublished-info-item future-event-title', webpage)
+
+ if token:
+ metadata = self._download_json(
+ f'{base_url}/icareus-suite-api-portlet/publishing',
+ video_id, fatal=False, data=urlencode_postdata({
+ 'version': '03',
+ 'action': 'getAsset',
+ 'organizationId': organization_id,
+ 'assetId': video_id,
+ 'languageId': 'en_US',
+ 'userId': '0',
+ 'token': token,
+ })) or {}
+ info = {
+ 'title': metadata.get('name'),
+ 'description': metadata.get('description'),
+ 'timestamp': int_or_none(metadata.get('date'), scale=1000),
+ 'duration': int_or_none(metadata.get('duration')),
+ 'thumbnail': url_or_none(metadata.get('thumbnailMedium')),
+ }
+ elif live_title: # Recorded livestream
+ info = {
+ 'title': live_title,
+ 'description': get_element_by_class('unpublished-info-item future-event-description', webpage),
+ 'timestamp': int_or_none(self._search_regex(
+ r'var startEvent\s*=\s*(\d+);', webpage, 'uploadDate', fatal=False), scale=1000),
+ }
+
+ thumbnails = info.get('thumbnails') or [{
+ 'url': url_or_none(info.get('thumbnail') or assets.get('thumbnail'))
+ }]
+
+ return merge_dicts({
+ 'id': video_id,
+ 'title': None,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'description': clean_html(info.get('description')),
+ 'thumbnails': thumbnails if thumbnails[0]['url'] else None,
+ }, info)
diff --git a/yt_dlp/extractor/ichinanalive.py b/yt_dlp/extractor/ichinanalive.py
new file mode 100644
index 0000000..9d55ddc
--- /dev/null
+++ b/yt_dlp/extractor/ichinanalive.py
@@ -0,0 +1,160 @@
+from .common import InfoExtractor
+from ..utils import ExtractorError, str_or_none, traverse_obj, unified_strdate
+from ..compat import compat_str
+
+
+class IchinanaLiveIE(InfoExtractor):
+ IE_NAME = '17live'
+ _VALID_URL = r'https?://(?:www\.)?17\.live/(?:[^/]+/)*(?:live|profile/r)/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://17.live/live/3773096',
+ 'info_dict': {
+ 'id': '3773096',
+ 'title': '萠珈☕🤡🍫moka',
+ 'is_live': True,
+ 'uploader': '萠珈☕🤡🍫moka',
+ 'uploader_id': '3773096',
+ 'like_count': 366,
+ 'view_count': 18121,
+ 'timestamp': 1630569012,
+ },
+ 'skip': 'running as of writing, but may be ended as of testing',
+ }, {
+ 'note': 'nothing except language differs',
+ 'url': 'https://17.live/ja/live/3773096',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return not IchinanaLiveClipIE.suitable(url) and super(IchinanaLiveIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ url = 'https://17.live/live/%s' % video_id
+
+ enter = self._download_json(
+ 'https://api-dsa.17app.co/api/v1/lives/%s/enter' % video_id, video_id,
+ headers={'Referer': url}, fatal=False, expected_status=420,
+ data=b'\0')
+ if enter and enter.get('message') == 'ended':
+ raise ExtractorError('This live has ended.', expected=True)
+
+ view_data = self._download_json(
+ 'https://api-dsa.17app.co/api/v1/lives/%s' % video_id, video_id,
+ headers={'Referer': url})
+
+ uploader = traverse_obj(
+ view_data, ('userInfo', 'displayName'), ('userInfo', 'openID'))
+
+ video_urls = view_data.get('rtmpUrls')
+ if not video_urls:
+ raise ExtractorError('unable to extract live URL information')
+ formats = []
+ for (name, value) in video_urls[0].items():
+ if not isinstance(value, compat_str):
+ continue
+ if not value.startswith('http'):
+ continue
+ quality = -1
+ if 'web' in name:
+ quality -= 1
+ if 'High' in name:
+ quality += 4
+ if 'Low' in name:
+ quality -= 2
+ formats.append({
+ 'format_id': name,
+ 'url': value,
+ 'quality': quality,
+ 'http_headers': {'Referer': url},
+ 'ext': 'flv',
+ 'vcodec': 'h264',
+ 'acodec': 'aac',
+ })
+
+ return {
+ 'id': video_id,
+ 'title': uploader or video_id,
+ 'formats': formats,
+ 'is_live': True,
+ 'uploader': uploader,
+ 'uploader_id': video_id,
+ 'like_count': view_data.get('receivedLikeCount'),
+ 'view_count': view_data.get('viewerCount'),
+ 'thumbnail': view_data.get('coverPhoto'),
+ 'description': view_data.get('caption'),
+ 'timestamp': view_data.get('beginTime'),
+ }
+
+
+class IchinanaLiveClipIE(InfoExtractor):
+ IE_NAME = '17live:clip'
+ _VALID_URL = r'https?://(?:www\.)?17\.live/(?:[^/]+/)*profile/r/(?P<uploader_id>\d+)/clip/(?P<id>[^/]+)'
+ _TESTS = [{
+ 'url': 'https://17.live/profile/r/1789280/clip/1bHQSK8KUieruFXaCH4A4upCzlN',
+ 'info_dict': {
+ 'id': '1bHQSK8KUieruFXaCH4A4upCzlN',
+ 'title': 'マチコ先生🦋Class💋',
+ 'description': 'マチ戦隊 第一次 バスターコール\n総額200万coin!\n動画制作@うぉーかー🌱Walker🎫',
+ 'uploader_id': '1789280',
+ },
+ }, {
+ 'url': 'https://17.live/ja/profile/r/1789280/clip/1bHQSK8KUieruFXaCH4A4upCzlN',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ uploader_id, video_id = self._match_valid_url(url).groups()
+ url = 'https://17.live/profile/r/%s/clip/%s' % (uploader_id, video_id)
+
+ view_data = self._download_json(
+ 'https://api-dsa.17app.co/api/v1/clips/%s' % video_id, video_id,
+ headers={'Referer': url})
+
+ uploader = traverse_obj(
+ view_data, ('userInfo', 'displayName'), ('userInfo', 'name'))
+
+ formats = []
+ if view_data.get('videoURL'):
+ formats.append({
+ 'id': 'video',
+ 'url': view_data['videoURL'],
+ 'quality': -1,
+ })
+ if view_data.get('transcodeURL'):
+ formats.append({
+ 'id': 'transcode',
+ 'url': view_data['transcodeURL'],
+ 'quality': -1,
+ })
+ if view_data.get('srcVideoURL'):
+ # highest quality
+ formats.append({
+ 'id': 'srcVideo',
+ 'url': view_data['srcVideoURL'],
+ 'quality': 1,
+ })
+
+ for fmt in formats:
+ fmt.update({
+ 'ext': 'mp4',
+ 'protocol': 'https',
+ 'vcodec': 'h264',
+ 'acodec': 'aac',
+ 'http_headers': {'Referer': url},
+ })
+
+ return {
+ 'id': video_id,
+ 'title': uploader or video_id,
+ 'formats': formats,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'like_count': view_data.get('likeCount'),
+ 'view_count': view_data.get('viewCount'),
+ 'thumbnail': view_data.get('imageURL'),
+ 'duration': view_data.get('duration'),
+ 'description': view_data.get('caption'),
+ 'upload_date': unified_strdate(str_or_none(view_data.get('createdAt'))),
+ }
diff --git a/yt_dlp/extractor/idolplus.py b/yt_dlp/extractor/idolplus.py
new file mode 100644
index 0000000..3c905b0
--- /dev/null
+++ b/yt_dlp/extractor/idolplus.py
@@ -0,0 +1,115 @@
+from .common import InfoExtractor
+from ..utils import traverse_obj, try_call, url_or_none
+
+
+class IdolPlusIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?idolplus\.com/z[us]/(?:concert/|contents/?\?(?:[^#]+&)?albumId=)(?P<id>\w+)'
+ _TESTS = [{
+ 'url': 'https://idolplus.com/zs/contents?albumId=M012077298PPV00',
+ 'md5': '2ace3f4661c943a2f7e79f0b88cea1e7',
+ 'info_dict': {
+ 'id': 'M012077298PPV00',
+ 'ext': 'mp4',
+ 'title': '[MultiCam] Aegyo on Top of Aegyo (IZ*ONE EATING TRIP)',
+ 'release_date': '20200707',
+ 'formats': 'count:65',
+ },
+ 'params': {'format': '532-KIM_MINJU'},
+ }, {
+ 'url': 'https://idolplus.com/zs/contents?albumId=M01232H058PPV00&catId=E9TX5',
+ 'info_dict': {
+ 'id': 'M01232H058PPV00',
+ 'ext': 'mp4',
+ 'title': 'YENA (CIRCLE CHART MUSIC AWARDS 2022 RED CARPET)',
+ 'release_date': '20230218',
+ 'formats': 'count:5',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ # live stream
+ 'url': 'https://idolplus.com/zu/contents?albumId=M012323174PPV00',
+ 'info_dict': {
+ 'id': 'M012323174PPV00',
+ 'ext': 'mp4',
+ 'title': 'Hanteo Music Awards 2022 DAY2',
+ 'release_date': '20230211',
+ 'formats': 'count:5',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://idolplus.com/zs/concert/M012323039PPV00',
+ 'info_dict': {
+ 'id': 'M012323039PPV00',
+ 'ext': 'mp4',
+ 'title': 'CIRCLE CHART MUSIC AWARDS 2022',
+ 'release_date': '20230218',
+ 'formats': 'count:5',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ data_list = traverse_obj(self._download_json(
+ 'https://idolplus.com/api/zs/viewdata/ruleset/build', video_id,
+ headers={'App_type': 'web', 'Country_Code': 'KR'}, query={
+ 'rulesetId': 'contents',
+ 'albumId': video_id,
+ 'distribute': 'PRD',
+ 'loggedIn': 'false',
+ 'region': 'zs',
+ 'countryGroup': '00010',
+ 'lang': 'en',
+ 'saId': '999999999998',
+ }), ('data', 'viewData', ...))
+
+ player_data = {}
+ while data_list:
+ player_data = data_list.pop()
+ if traverse_obj(player_data, 'type') == 'player':
+ break
+ elif traverse_obj(player_data, ('dataList', ...)):
+ data_list += player_data['dataList']
+
+ formats = self._extract_m3u8_formats(traverse_obj(player_data, (
+ 'vodPlayerList', 'vodProfile', 0, 'vodServer', 0, 'video_url', {url_or_none})), video_id)
+
+ subtitles = {}
+ for caption in traverse_obj(player_data, ('vodPlayerList', 'caption')) or []:
+ subtitles.setdefault(caption.get('lang') or 'und', []).append({
+ 'url': caption.get('smi_url'),
+ 'ext': 'vtt',
+ })
+
+ # Add member multicams as alternative formats
+ if (traverse_obj(player_data, ('detail', 'has_cuesheet')) == 'Y'
+ and traverse_obj(player_data, ('detail', 'is_omni_member')) == 'Y'):
+ cuesheet = traverse_obj(self._download_json(
+ 'https://idolplus.com/gapi/contents/v1.0/content/cuesheet', video_id,
+ 'Downloading JSON metadata for member multicams',
+ headers={'App_type': 'web', 'Country_Code': 'KR'}, query={
+ 'ALBUM_ID': video_id,
+ 'COUNTRY_GRP': '00010',
+ 'LANG': 'en',
+ 'SA_ID': '999999999998',
+ 'COUNTRY_CODE': 'KR',
+ }), ('data', 'cuesheet_item', 0))
+
+ for member in traverse_obj(cuesheet, ('members', ...)):
+ index = try_call(lambda: int(member['omni_view_index']) - 1)
+ member_video_url = traverse_obj(cuesheet, ('omni_view', index, 'cdn_url', 0, 'url', {url_or_none}))
+ if not member_video_url:
+ continue
+ member_formats = self._extract_m3u8_formats(
+ member_video_url, video_id, note=f'Downloading m3u8 for multicam {member["name"]}')
+ for mf in member_formats:
+ mf['format_id'] = f'{mf["format_id"]}-{member["name"].replace(" ", "_")}'
+ formats.extend(member_formats)
+
+ return {
+ 'id': video_id,
+ 'title': traverse_obj(player_data, ('detail', 'albumName')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'release_date': traverse_obj(player_data, ('detail', 'broadcastDate')),
+ }
diff --git a/yt_dlp/extractor/ign.py b/yt_dlp/extractor/ign.py
new file mode 100644
index 0000000..1c4f105
--- /dev/null
+++ b/yt_dlp/extractor/ign.py
@@ -0,0 +1,399 @@
+import re
+import urllib.parse
+
+from .common import InfoExtractor
+from ..compat import compat_parse_qs
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ ExtractorError,
+ determine_ext,
+ error_to_compat_str,
+ extract_attributes,
+ int_or_none,
+ merge_dicts,
+ parse_iso8601,
+ strip_or_none,
+ traverse_obj,
+ url_or_none,
+ urljoin,
+)
+
+
+class IGNBaseIE(InfoExtractor):
+ def _call_api(self, slug):
+ return self._download_json(
+ 'http://apis.ign.com/{0}/v3/{0}s/slug/{1}'.format(self._PAGE_TYPE, slug), slug)
+
+ def _checked_call_api(self, slug):
+ try:
+ return self._call_api(slug)
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 404:
+ e.cause.args = e.cause.args or [
+ e.cause.response.url, e.cause.status, e.cause.reason]
+ raise ExtractorError(
+ 'Content not found: expired?', cause=e.cause,
+ expected=True)
+ raise
+
+ def _extract_video_info(self, video, fatal=True):
+ video_id = video['videoId']
+
+ formats = []
+ refs = traverse_obj(video, 'refs', expected_type=dict) or {}
+
+ m3u8_url = url_or_none(refs.get('m3uUrl'))
+ if m3u8_url:
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+
+ f4m_url = url_or_none(refs.get('f4mUrl'))
+ if f4m_url:
+ formats.extend(self._extract_f4m_formats(
+ f4m_url, video_id, f4m_id='hds', fatal=False))
+
+ for asset in (video.get('assets') or []):
+ asset_url = url_or_none(asset.get('url'))
+ if not asset_url:
+ continue
+ formats.append({
+ 'url': asset_url,
+ 'tbr': int_or_none(asset.get('bitrate'), 1000),
+ 'fps': int_or_none(asset.get('frame_rate')),
+ 'height': int_or_none(asset.get('height')),
+ 'width': int_or_none(asset.get('width')),
+ })
+
+ mezzanine_url = traverse_obj(
+ video, ('system', 'mezzanineUrl'), expected_type=url_or_none)
+ if mezzanine_url:
+ formats.append({
+ 'ext': determine_ext(mezzanine_url, 'mp4'),
+ 'format_id': 'mezzanine',
+ 'quality': 1,
+ 'url': mezzanine_url,
+ })
+
+ thumbnails = traverse_obj(
+ video, ('thumbnails', ..., {'url': 'url'}), expected_type=url_or_none)
+ tags = traverse_obj(
+ video, ('tags', ..., 'displayName'),
+ expected_type=lambda x: x.strip() or None)
+
+ metadata = traverse_obj(video, 'metadata', expected_type=dict) or {}
+ title = traverse_obj(
+ metadata, 'longTitle', 'title', 'name',
+ expected_type=lambda x: x.strip() or None)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': strip_or_none(metadata.get('description')),
+ 'timestamp': parse_iso8601(metadata.get('publishDate')),
+ 'duration': int_or_none(metadata.get('duration')),
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ 'tags': tags,
+ }
+
+
+class IGNIE(IGNBaseIE):
+ """
+ Extractor for some of the IGN sites, like www.ign.com, es.ign.com de.ign.com.
+ Some videos of it.ign.com are also supported
+ """
+ _VIDEO_PATH_RE = r'/(?:\d{4}/\d{2}/\d{2}/)?(?P<id>.+?)'
+ _PLAYLIST_PATH_RE = r'(?:/?\?(?P<filt>[^&#]+))?'
+ _VALID_URL = (
+ r'https?://(?:.+?\.ign|www\.pcmag)\.com/videos(?:%s)'
+ % '|'.join((_VIDEO_PATH_RE + r'(?:[/?&#]|$)', _PLAYLIST_PATH_RE)))
+ IE_NAME = 'ign.com'
+ _PAGE_TYPE = 'video'
+
+ _TESTS = [{
+ 'url': 'http://www.ign.com/videos/2013/06/05/the-last-of-us-review',
+ 'md5': 'd2e1586d9987d40fad7867bf96a018ea',
+ 'info_dict': {
+ 'id': '8f862beef863986b2785559b9e1aa599',
+ 'ext': 'mp4',
+ 'title': 'The Last of Us Review',
+ 'description': 'md5:c8946d4260a4d43a00d5ae8ed998870c',
+ 'timestamp': 1370440800,
+ 'upload_date': '20130605',
+ 'tags': 'count:9',
+ 'display_id': 'the-last-of-us-review',
+ 'thumbnail': 'https://assets1.ignimgs.com/vid/thumbnails/user/2014/03/26/lastofusreviewmimig2.jpg',
+ 'duration': 440,
+ },
+ 'params': {
+ 'nocheckcertificate': True,
+ },
+ }, {
+ 'url': 'http://www.pcmag.com/videos/2015/01/06/010615-whats-new-now-is-gogo-snooping-on-your-data',
+ 'md5': 'f1581a6fe8c5121be5b807684aeac3f6',
+ 'info_dict': {
+ 'id': 'ee10d774b508c9b8ec07e763b9125b91',
+ 'ext': 'mp4',
+ 'title': 'What\'s New Now: Is GoGo Snooping on Your Data?',
+ 'description': 'md5:817a20299de610bd56f13175386da6fa',
+ 'timestamp': 1420571160,
+ 'upload_date': '20150106',
+ 'tags': 'count:4',
+ },
+ 'skip': '404 Not Found',
+ }, {
+ 'url': 'https://www.ign.com/videos/is-a-resident-evil-4-remake-on-the-way-ign-daily-fix',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ grids = re.findall(
+ r'''(?s)<section\b[^>]+\bclass\s*=\s*['"](?:[\w-]+\s+)*?content-feed-grid(?!\B|-)[^>]+>(.+?)</section[^>]*>''',
+ webpage)
+ return filter(None,
+ (urljoin(url, m.group('path')) for m in re.finditer(
+ r'''<a\b[^>]+\bhref\s*=\s*('|")(?P<path>/videos%s)\1'''
+ % cls._VIDEO_PATH_RE, grids[0] if grids else '')))
+
+ def _real_extract(self, url):
+ display_id, filt = self._match_valid_url(url).group('id', 'filt')
+ if display_id:
+ return self._extract_video(url, display_id)
+ return self._extract_playlist(url, filt or 'all')
+
+ def _extract_playlist(self, url, display_id):
+ webpage = self._download_webpage(url, display_id)
+
+ return self.playlist_result(
+ (self.url_result(u, self.ie_key())
+ for u in self._extract_embed_urls(url, webpage)),
+ playlist_id=display_id)
+
+ def _extract_video(self, url, display_id):
+ video = self._checked_call_api(display_id)
+
+ info = self._extract_video_info(video)
+
+ return merge_dicts({
+ 'display_id': display_id,
+ }, info)
+
+
+class IGNVideoIE(IGNBaseIE):
+ _VALID_URL = r'https?://.+?\.ign\.com/(?:[a-z]{2}/)?[^/]+/(?P<id>\d+)/(?:video|trailer)/'
+ _TESTS = [{
+ 'url': 'http://me.ign.com/en/videos/112203/video/how-hitman-aims-to-be-different-than-every-other-s',
+ 'md5': 'dd9aca7ed2657c4e118d8b261e5e9de1',
+ 'info_dict': {
+ 'id': 'e9be7ea899a9bbfc0674accc22a36cc8',
+ 'ext': 'mp4',
+ 'title': 'How Hitman Aims to Be Different Than Every Other Stealth Game - NYCC 2015',
+ 'description': 'Taking out assassination targets in Hitman has never been more stylish.',
+ 'timestamp': 1444665600,
+ 'upload_date': '20151012',
+ 'display_id': '112203',
+ 'thumbnail': 'https://sm.ign.com/ign_me/video/h/how-hitman/how-hitman-aims-to-be-different-than-every-other-s_8z14.jpg',
+ 'duration': 298,
+ 'tags': 'count:13',
+ },
+ 'expected_warnings': ['HTTP Error 400: Bad Request'],
+ }, {
+ 'url': 'http://me.ign.com/ar/angry-birds-2/106533/video/lrd-ldyy-lwl-lfylm-angry-birds',
+ 'only_matching': True,
+ }, {
+ # Youtube embed
+ 'url': 'https://me.ign.com/ar/ratchet-clank-rift-apart/144327/trailer/embed',
+ 'only_matching': True,
+ }, {
+ # Twitter embed
+ 'url': 'http://adria.ign.com/sherlock-season-4/9687/trailer/embed',
+ 'only_matching': True,
+ }, {
+ # Vimeo embed
+ 'url': 'https://kr.ign.com/bic-2018/3307/trailer/embed',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ parsed_url = urllib.parse.urlparse(url)
+ embed_url = urllib.parse.urlunparse(
+ parsed_url._replace(path=parsed_url.path.rsplit('/', 1)[0] + '/embed'))
+
+ webpage, urlh = self._download_webpage_handle(embed_url, video_id)
+ new_url = urlh.url
+ ign_url = compat_parse_qs(
+ urllib.parse.urlparse(new_url).query).get('url', [None])[-1]
+ if ign_url:
+ return self.url_result(ign_url, IGNIE.ie_key())
+ video = self._search_regex(r'(<div\b[^>]+\bdata-video-id\s*=\s*[^>]+>)', webpage, 'video element', fatal=False)
+ if not video:
+ if new_url == url:
+ raise ExtractorError('Redirect loop: ' + url)
+ return self.url_result(new_url)
+ video = extract_attributes(video)
+ video_data = video.get('data-settings') or '{}'
+ video_data = self._parse_json(video_data, video_id)['video']
+ info = self._extract_video_info(video_data)
+
+ return merge_dicts({
+ 'display_id': video_id,
+ }, info)
+
+
+class IGNArticleIE(IGNBaseIE):
+ _VALID_URL = r'https?://.+?\.ign\.com/(?:articles(?:/\d{4}/\d{2}/\d{2})?|(?:[a-z]{2}/)?(?:[\w-]+/)*?feature/\d+)/(?P<id>[^/?&#]+)'
+ _PAGE_TYPE = 'article'
+ _TESTS = [{
+ 'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',
+ 'info_dict': {
+ 'id': '72113',
+ 'title': '100 Little Things in GTA 5 That Will Blow Your Mind',
+ },
+ 'playlist': [
+ {
+ 'info_dict': {
+ 'id': '5ebbd138523268b93c9141af17bec937',
+ 'ext': 'mp4',
+ 'title': 'Grand Theft Auto V Video Review',
+ 'description': 'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.',
+ 'timestamp': 1379339880,
+ 'upload_date': '20130916',
+ 'tags': 'count:12',
+ 'thumbnail': 'https://assets1.ignimgs.com/thumbs/userUploaded/2021/8/16/gta-v-heistsjpg-e94705-1629138553533.jpeg',
+ 'display_id': 'grand-theft-auto-v-video-review',
+ 'duration': 501,
+ },
+ },
+ {
+ 'info_dict': {
+ 'id': '638672ee848ae4ff108df2a296418ee2',
+ 'ext': 'mp4',
+ 'title': 'GTA 5 In Slow Motion',
+ 'description': 'The twisted beauty of GTA 5 in stunning slow motion.',
+ 'timestamp': 1386878820,
+ 'upload_date': '20131212',
+ 'duration': 202,
+ 'tags': 'count:25',
+ 'display_id': 'gta-5-in-slow-motion',
+ 'thumbnail': 'https://assets1.ignimgs.com/vid/thumbnails/user/2013/11/03/GTA-SLO-MO-1.jpg',
+ },
+ },
+ ],
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Backend fetch failed'],
+ }, {
+ 'url': 'http://www.ign.com/articles/2014/08/15/rewind-theater-wild-trailer-gamescom-2014?watch',
+ 'info_dict': {
+ 'id': '53ee806780a81ec46e0790f8',
+ 'title': 'Rewind Theater - Wild Trailer Gamescom 2014',
+ },
+ 'playlist_count': 1,
+ 'expected_warnings': ['Backend fetch failed'],
+ }, {
+ # videoId pattern
+ 'url': 'http://www.ign.com/articles/2017/06/08/new-ducktales-short-donalds-birthday-doesnt-go-as-planned',
+ 'only_matching': True,
+ }, {
+ # Youtube embed
+ 'url': 'https://www.ign.com/articles/2021-mvp-named-in-puppy-bowl-xvii',
+ 'only_matching': True,
+ }, {
+ # IMDB embed
+ 'url': 'https://www.ign.com/articles/2014/08/07/sons-of-anarchy-final-season-trailer',
+ 'only_matching': True,
+ }, {
+ # Facebook embed
+ 'url': 'https://www.ign.com/articles/2017/09/20/marvels-the-punisher-watch-the-new-trailer-for-the-netflix-series',
+ 'only_matching': True,
+ }, {
+ # Brightcove embed
+ 'url': 'https://www.ign.com/articles/2016/01/16/supergirl-goes-flying-with-martian-manhunter-in-new-clip',
+ 'only_matching': True,
+ }]
+
+ def _checked_call_api(self, slug):
+ try:
+ return self._call_api(slug)
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError):
+ e.cause.args = e.cause.args or [
+ e.cause.response.url, e.cause.status, e.cause.reason]
+ if e.cause.status == 404:
+ raise ExtractorError(
+ 'Content not found: expired?', cause=e.cause,
+ expected=True)
+ elif e.cause.status == 503:
+ self.report_warning(error_to_compat_str(e.cause))
+ return
+ raise
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ article = self._checked_call_api(display_id)
+
+ if article:
+ # obsolete ?
+ def entries():
+ media_url = traverse_obj(
+ article, ('mediaRelations', 0, 'media', 'metadata', 'url'),
+ expected_type=url_or_none)
+ if media_url:
+ yield self.url_result(media_url, IGNIE.ie_key())
+ for content in (article.get('content') or []):
+ for video_url in re.findall(r'(?:\[(?:ignvideo\s+url|youtube\s+clip_id)|<iframe[^>]+src)="([^"]+)"', content):
+ if url_or_none(video_url):
+ yield self.url_result(video_url)
+
+ return self.playlist_result(
+ entries(), article.get('articleId'),
+ traverse_obj(
+ article, ('metadata', 'headline'),
+ expected_type=lambda x: x.strip() or None))
+
+ webpage = self._download_webpage(url, display_id)
+
+ playlist_id = self._html_search_meta('dable:item_id', webpage, default=None)
+ if playlist_id:
+
+ def entries():
+ for m in re.finditer(
+ r'''(?s)<object\b[^>]+\bclass\s*=\s*("|')ign-videoplayer\1[^>]*>(?P<params>.+?)</object''',
+ webpage):
+ flashvars = self._search_regex(
+ r'''(<param\b[^>]+\bname\s*=\s*("|')flashvars\2[^>]*>)''',
+ m.group('params'), 'flashvars', default='')
+ flashvars = compat_parse_qs(extract_attributes(flashvars).get('value') or '')
+ v_url = url_or_none((flashvars.get('url') or [None])[-1])
+ if v_url:
+ yield self.url_result(v_url)
+ else:
+ playlist_id = self._search_regex(
+ r'''\bdata-post-id\s*=\s*("|')(?P<id>[\da-f]+)\1''',
+ webpage, 'id', group='id', default=None)
+
+ nextjs_data = self._search_nextjs_data(webpage, display_id)
+
+ def entries():
+ for player in traverse_obj(
+ nextjs_data,
+ ('props', 'apolloState', 'ROOT_QUERY', lambda k, _: k.startswith('videoPlayerProps('), '__ref')):
+ # skip promo links (which may not always be served, eg GH CI servers)
+ if traverse_obj(nextjs_data,
+ ('props', 'apolloState', player.replace('PlayerProps', 'ModernContent')),
+ expected_type=dict):
+ continue
+ video = traverse_obj(nextjs_data, ('props', 'apolloState', player), expected_type=dict) or {}
+ info = self._extract_video_info(video, fatal=False)
+ if info:
+ yield merge_dicts({
+ 'display_id': display_id,
+ }, info)
+
+ return self.playlist_result(
+ entries(), playlist_id or display_id,
+ re.sub(r'\s+-\s+IGN\s*$', '', self._og_search_title(webpage, default='')) or None)
diff --git a/yt_dlp/extractor/iheart.py b/yt_dlp/extractor/iheart.py
new file mode 100644
index 0000000..fb6f51e
--- /dev/null
+++ b/yt_dlp/extractor/iheart.py
@@ -0,0 +1,94 @@
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ clean_podcast_url,
+ int_or_none,
+ str_or_none,
+)
+
+
+class IHeartRadioBaseIE(InfoExtractor):
+ def _call_api(self, path, video_id, fatal=True, query=None):
+ return self._download_json(
+ 'https://api.iheart.com/api/v3/podcast/' + path,
+ video_id, fatal=fatal, query=query)
+
+ def _extract_episode(self, episode):
+ return {
+ 'thumbnail': episode.get('imageUrl'),
+ 'description': clean_html(episode.get('description')),
+ 'timestamp': int_or_none(episode.get('startDate'), 1000),
+ 'duration': int_or_none(episode.get('duration')),
+ }
+
+
+class IHeartRadioIE(IHeartRadioBaseIE):
+ IE_NAME = 'iheartradio'
+ _VALID_URL = r'(?:https?://(?:www\.)?iheart\.com/podcast/[^/]+/episode/(?P<display_id>[^/?&#]+)-|iheartradio:)(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://www.iheart.com/podcast/105-behind-the-bastards-29236323/episode/part-one-alexander-lukashenko-the-dictator-70346499/?embed=true',
+ 'md5': 'c8609c92c8688dcb69d8541042b8abca',
+ 'info_dict': {
+ 'id': '70346499',
+ 'ext': 'mp3',
+ 'title': 'Part One: Alexander Lukashenko: The Dictator of Belarus',
+ 'description': 'md5:96cc7297b3a5a9ebae28643801c96fae',
+ 'timestamp': 1597741200,
+ 'upload_date': '20200818',
+ }
+ }
+
+ def _real_extract(self, url):
+ episode_id = self._match_id(url)
+ episode = self._call_api(
+ 'episodes/' + episode_id, episode_id)['episode']
+ info = self._extract_episode(episode)
+ info.update({
+ 'id': episode_id,
+ 'title': episode['title'],
+ 'url': clean_podcast_url(episode['mediaUrl']),
+ })
+ return info
+
+
+class IHeartRadioPodcastIE(IHeartRadioBaseIE):
+ IE_NAME = 'iheartradio:podcast'
+ _VALID_URL = r'https?://(?:www\.)?iheart(?:podcastnetwork)?\.com/podcast/[^/?&#]+-(?P<id>\d+)/?(?:[?#&]|$)'
+ _TESTS = [{
+ 'url': 'https://www.iheart.com/podcast/1119-it-could-happen-here-30717896/',
+ 'info_dict': {
+ 'id': '30717896',
+ 'title': 'It Could Happen Here',
+ 'description': 'md5:5842117412a967eb0b01f8088eb663e2',
+ },
+ 'playlist_mincount': 11,
+ }, {
+ 'url': 'https://www.iheartpodcastnetwork.com/podcast/105-stuff-you-should-know-26940277',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ podcast_id = self._match_id(url)
+ path = 'podcasts/' + podcast_id
+ episodes = self._call_api(
+ path + '/episodes', podcast_id, query={'limit': 1000000000})['data']
+
+ entries = []
+ for episode in episodes:
+ episode_id = str_or_none(episode.get('id'))
+ if not episode_id:
+ continue
+ info = self._extract_episode(episode)
+ info.update({
+ '_type': 'url',
+ 'id': episode_id,
+ 'title': episode.get('title'),
+ 'url': 'iheartradio:' + episode_id,
+ 'ie_key': IHeartRadioIE.ie_key(),
+ })
+ entries.append(info)
+
+ podcast = self._call_api(path, podcast_id, False) or {}
+
+ return self.playlist_result(
+ entries, podcast_id, podcast.get('title'), podcast.get('description'))
diff --git a/yt_dlp/extractor/ilpost.py b/yt_dlp/extractor/ilpost.py
new file mode 100644
index 0000000..ae98399
--- /dev/null
+++ b/yt_dlp/extractor/ilpost.py
@@ -0,0 +1,69 @@
+import functools
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ url_or_none,
+ urlencode_postdata,
+)
+from ..utils.traversal import traverse_obj
+
+
+class IlPostIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?ilpost\.it/episodes/(?P<id>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'https://www.ilpost.it/episodes/1-avis-akvasas-ka/',
+ 'md5': '43649f002d85e1c2f319bb478d479c40',
+ 'info_dict': {
+ 'id': '2972047',
+ 'ext': 'mp3',
+ 'display_id': '1-avis-akvasas-ka',
+ 'title': '1. Avis akvasas ka',
+ 'url': 'https://www.ilpost.it/wp-content/uploads/2023/12/28/1703781217-l-invasione-pt1-v6.mp3',
+ 'timestamp': 1703835014,
+ 'upload_date': '20231229',
+ 'duration': 2495.0,
+ 'availability': 'public',
+ 'series_id': '235598',
+ 'description': '',
+ }
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ endpoint_metadata = self._search_json(
+ r'var\s+ilpostpodcast\s*=', webpage, 'metadata', display_id)
+ episode_id = endpoint_metadata['post_id']
+ podcast_id = endpoint_metadata['podcast_id']
+ podcast_metadata = self._download_json(
+ endpoint_metadata['ajax_url'], display_id, data=urlencode_postdata({
+ 'action': 'checkpodcast',
+ 'cookie': endpoint_metadata['cookie'],
+ 'post_id': episode_id,
+ 'podcast_id': podcast_id,
+ }))
+
+ episode = traverse_obj(podcast_metadata, (
+ 'data', 'postcastList', lambda _, v: str(v['id']) == episode_id, {dict}), get_all=False)
+ if not episode:
+ raise ExtractorError('Episode could not be extracted')
+
+ return {
+ 'id': episode_id,
+ 'display_id': display_id,
+ 'series_id': podcast_id,
+ 'vcodec': 'none',
+ **traverse_obj(episode, {
+ 'title': ('title', {str}),
+ 'description': ('description', {str}),
+ 'url': ('podcast_raw_url', {url_or_none}),
+ 'thumbnail': ('image', {url_or_none}),
+ 'timestamp': ('timestamp', {int_or_none}),
+ 'duration': ('milliseconds', {functools.partial(float_or_none, scale=1000)}),
+ 'availability': ('free', {lambda v: 'public' if v else 'subscriber_only'}),
+ }),
+ }
diff --git a/yt_dlp/extractor/iltalehti.py b/yt_dlp/extractor/iltalehti.py
new file mode 100644
index 0000000..0e7e82c
--- /dev/null
+++ b/yt_dlp/extractor/iltalehti.py
@@ -0,0 +1,51 @@
+from .common import InfoExtractor
+from ..utils import js_to_json, traverse_obj
+
+
+class IltalehtiIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?iltalehti\.fi/[^/?#]+/a/(?P<id>[^/?#])'
+ _TESTS = [
+ # jwplatform embed main_media
+ {
+ 'url': 'https://www.iltalehti.fi/ulkomaat/a/9fbd067f-94e4-46cd-8748-9d958eb4dae2',
+ 'md5': 'af12d42c539f1f49f0b62d231fe72dcd',
+ 'info_dict': {
+ 'id': 'gYjjaf1L',
+ 'ext': 'mp4',
+ 'title': 'Sensuroimaton Päivärinta, jakso 227: Vieraana Suomen Venäjän ex-suurlähettiläs René Nyberg ja Kenraalimajuri evp Pekka Toveri',
+ 'description': '',
+ 'upload_date': '20220928',
+ 'timestamp': 1664360878,
+ 'duration': 2089,
+ 'thumbnail': r're:^https?://.*\.jpg',
+ },
+ },
+ # jwplatform embed body
+ {
+ 'url': 'https://www.iltalehti.fi/politiikka/a/1ce49d85-1670-428b-8db8-d2479b9950a4',
+ 'md5': '9e50334b8f8330ce8828b567a82a3c65',
+ 'info_dict': {
+ 'id': '18R6zkLi',
+ 'ext': 'mp4',
+ 'title': 'Pekka Toverin arvio: Näin Nord Stream -kaasuputken räjäyttäminen on saatettu toteuttaa',
+ 'description': 'md5:3d1302c9e17e7ffd564143ff58f8de35',
+ 'upload_date': '20220929',
+ 'timestamp': 1664435867,
+ 'duration': 165.0,
+ 'thumbnail': r're:^https?://.*\.jpg',
+ },
+ },
+ ]
+
+ def _real_extract(self, url):
+ article_id = self._match_id(url)
+ webpage = self._download_webpage(url, article_id)
+ info = self._search_json(
+ r'<script>\s*window.App\s*=', webpage, 'json', article_id,
+ transform_source=js_to_json)
+ props = traverse_obj(info, (
+ 'state', 'articles', ..., 'items', (('main_media', 'properties'), ('body', ..., 'properties'))))
+ video_ids = traverse_obj(props, (lambda _, v: v['provider'] == 'jwplayer', 'id'))
+ return self.playlist_from_matches(
+ video_ids, article_id, ie='JWPlatform', getter=lambda id: f'jwplatform:{id}',
+ title=traverse_obj(info, ('state', 'articles', ..., 'items', 'canonical_title'), get_all=False))
diff --git a/yt_dlp/extractor/imdb.py b/yt_dlp/extractor/imdb.py
new file mode 100644
index 0000000..557a3b7
--- /dev/null
+++ b/yt_dlp/extractor/imdb.py
@@ -0,0 +1,144 @@
+import base64
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ mimetype2ext,
+ qualities,
+ traverse_obj,
+ try_get,
+ url_or_none,
+)
+
+
+class ImdbIE(InfoExtractor):
+ IE_NAME = 'imdb'
+ IE_DESC = 'Internet Movie Database trailers'
+ _VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video|title|list).*?[/-]vi(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'http://www.imdb.com/video/imdb/vi2524815897',
+ 'info_dict': {
+ 'id': '2524815897',
+ 'ext': 'mp4',
+ 'title': 'No. 2',
+ 'description': 'md5:87bd0bdc61e351f21f20d2d7441cb4e7',
+ 'duration': 152,
+ 'thumbnail': r're:^https?://.+\.jpg',
+ }
+ }, {
+ 'url': 'https://www.imdb.com/video/vi3516832537',
+ 'info_dict': {
+ 'id': '3516832537',
+ 'ext': 'mp4',
+ 'title': 'Paul: U.S. Trailer #1',
+ 'description': 'md5:17fcc4fe11ec29b4399be9d4c5ef126c',
+ 'duration': 153,
+ 'thumbnail': r're:^https?://.+\.jpg',
+ }
+ }, {
+ 'url': 'http://www.imdb.com/video/_/vi2524815897',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.imdb.com/title/tt1667889/?ref_=ext_shr_eml_vi#lb-vi2524815897',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.imdb.com/title/tt1667889/#lb-vi2524815897',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.imdb.com/videoplayer/vi1562949145',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.imdb.com/title/tt4218696/videoplayer/vi2608641561',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.imdb.com/list/ls009921623/videoplayer/vi260482329',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(f'https://www.imdb.com/video/vi{video_id}', video_id)
+ info = self._search_nextjs_data(webpage, video_id)
+ video_info = traverse_obj(info, ('props', 'pageProps', 'videoPlaybackData', 'video'), default={})
+ title = (traverse_obj(video_info, ('name', 'value'), ('primaryTitle', 'titleText', 'text'))
+ or self._html_search_meta(('og:title', 'twitter:title'), webpage, default=None)
+ or self._html_extract_title(webpage))
+ data = video_info.get('playbackURLs') or try_get(self._download_json(
+ 'https://www.imdb.com/ve/data/VIDEO_PLAYBACK_DATA', video_id,
+ query={
+ 'key': base64.b64encode(json.dumps({
+ 'type': 'VIDEO_PLAYER',
+ 'subType': 'FORCE_LEGACY',
+ 'id': 'vi%s' % video_id,
+ }).encode()).decode(),
+ }), lambda x: x[0]['videoLegacyEncodings'])
+ quality = qualities(('SD', '480p', '720p', '1080p'))
+ formats, subtitles = [], {}
+ for encoding in data:
+ if not encoding or not isinstance(encoding, dict):
+ continue
+ video_url = url_or_none(encoding.get('url'))
+ if not video_url:
+ continue
+ ext = mimetype2ext(encoding.get(
+ 'mimeType')) or determine_ext(video_url)
+ if ext == 'm3u8':
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ preference=1, m3u8_id='hls', fatal=False)
+ subtitles = self._merge_subtitles(subtitles, subs)
+ formats.extend(fmts)
+ continue
+ format_id = traverse_obj(encoding, ('displayName', 'value'), 'definition')
+ formats.append({
+ 'format_id': format_id,
+ 'url': video_url,
+ 'ext': ext,
+ 'quality': quality(format_id),
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'alt_title': info.get('videoSubTitle'),
+ 'formats': formats,
+ 'description': try_get(video_info, lambda x: x['description']['value']),
+ 'thumbnail': url_or_none(try_get(video_info, lambda x: x['thumbnail']['url'])),
+ 'duration': int_or_none(try_get(video_info, lambda x: x['runtime']['value'])),
+ 'subtitles': subtitles,
+ }
+
+
+class ImdbListIE(InfoExtractor):
+ IE_NAME = 'imdb:list'
+ IE_DESC = 'Internet Movie Database lists'
+ _VALID_URL = r'https?://(?:www\.)?imdb\.com/list/ls(?P<id>\d{9})(?!/videoplayer/vi\d+)'
+ _TEST = {
+ 'url': 'https://www.imdb.com/list/ls009921623/',
+ 'info_dict': {
+ 'id': '009921623',
+ 'title': 'The Bourne Legacy',
+ 'description': 'A list of trailers, clips, and more from The Bourne Legacy, starring Jeremy Renner and Rachel Weisz.',
+ },
+ 'playlist_count': 8,
+ }
+
+ def _real_extract(self, url):
+ list_id = self._match_id(url)
+ webpage = self._download_webpage(url, list_id)
+ entries = [
+ self.url_result('http://www.imdb.com' + m, 'Imdb')
+ for m in re.findall(r'href="(/list/ls%s/videoplayer/vi[^"]+)"' % list_id, webpage)]
+
+ list_title = self._html_search_regex(
+ r'<h1[^>]+class="[^"]*header[^"]*"[^>]*>(.*?)</h1>',
+ webpage, 'list title')
+ list_description = self._html_search_regex(
+ r'<div[^>]+class="[^"]*list-description[^"]*"[^>]*><p>(.*?)</p>',
+ webpage, 'list description')
+
+ return self.playlist_result(entries, list_id, list_title, list_description)
diff --git a/yt_dlp/extractor/imggaming.py b/yt_dlp/extractor/imggaming.py
new file mode 100644
index 0000000..a40aa21
--- /dev/null
+++ b/yt_dlp/extractor/imggaming.py
@@ -0,0 +1,126 @@
+import json
+
+from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ str_or_none,
+ try_get,
+)
+
+
+class ImgGamingBaseIE(InfoExtractor):
+ _API_BASE = 'https://dce-frontoffice.imggaming.com/api/v2/'
+ _API_KEY = '857a1e5d-e35e-4fdf-805b-a87b6f8364bf'
+ _HEADERS = None
+ _MANIFEST_HEADERS = {'Accept-Encoding': 'identity'}
+ _REALM = None
+ _VALID_URL_TEMPL = r'https?://(?P<domain>%s)/(?P<type>live|playlist|video)/(?P<id>\d+)(?:\?.*?\bplaylistId=(?P<playlist_id>\d+))?'
+
+ def _initialize_pre_login(self):
+ self._HEADERS = {
+ 'Realm': 'dce.' + self._REALM,
+ 'x-api-key': self._API_KEY,
+ }
+
+ def _perform_login(self, username, password):
+ p_headers = self._HEADERS.copy()
+ p_headers['Content-Type'] = 'application/json'
+ self._HEADERS['Authorization'] = 'Bearer ' + self._download_json(
+ self._API_BASE + 'login',
+ None, 'Logging in', data=json.dumps({
+ 'id': username,
+ 'secret': password,
+ }).encode(), headers=p_headers)['authorisationToken']
+
+ def _real_initialize(self):
+ if not self._HEADERS.get('Authorization'):
+ self.raise_login_required(method='password')
+
+ def _call_api(self, path, media_id):
+ return self._download_json(
+ self._API_BASE + path + media_id, media_id, headers=self._HEADERS)
+
+ def _extract_dve_api_url(self, media_id, media_type):
+ stream_path = 'stream'
+ if media_type == 'video':
+ stream_path += '/vod/'
+ else:
+ stream_path += '?eventId='
+ try:
+ return self._call_api(
+ stream_path, media_id)['playerUrlCallback']
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 403:
+ raise ExtractorError(
+ self._parse_json(e.cause.response.read().decode(), media_id)['messages'][0],
+ expected=True)
+ raise
+
+ def _real_extract(self, url):
+ domain, media_type, media_id, playlist_id = self._match_valid_url(url).groups()
+
+ if playlist_id:
+ if self._yes_playlist(playlist_id, media_id):
+ media_type, media_id = 'playlist', playlist_id
+
+ if media_type == 'playlist':
+ playlist = self._call_api('vod/playlist/', media_id)
+ entries = []
+ for video in try_get(playlist, lambda x: x['videos']['vods']) or []:
+ video_id = str_or_none(video.get('id'))
+ if not video_id:
+ continue
+ entries.append(self.url_result(
+ 'https://%s/video/%s' % (domain, video_id),
+ self.ie_key(), video_id))
+ return self.playlist_result(
+ entries, media_id, playlist.get('title'),
+ playlist.get('description'))
+
+ dve_api_url = self._extract_dve_api_url(media_id, media_type)
+ video_data = self._download_json(dve_api_url, media_id)
+ is_live = media_type == 'live'
+ if is_live:
+ title = self._call_api('event/', media_id)['title']
+ else:
+ title = video_data['name']
+
+ formats = []
+ for proto in ('hls', 'dash'):
+ media_url = video_data.get(proto + 'Url') or try_get(video_data, lambda x: x[proto]['url'])
+ if not media_url:
+ continue
+ if proto == 'hls':
+ m3u8_formats = self._extract_m3u8_formats(
+ media_url, media_id, 'mp4', live=is_live,
+ m3u8_id='hls', fatal=False, headers=self._MANIFEST_HEADERS)
+ for f in m3u8_formats:
+ f.setdefault('http_headers', {}).update(self._MANIFEST_HEADERS)
+ formats.append(f)
+ else:
+ formats.extend(self._extract_mpd_formats(
+ media_url, media_id, mpd_id='dash', fatal=False,
+ headers=self._MANIFEST_HEADERS))
+
+ subtitles = {}
+ for subtitle in video_data.get('subtitles', []):
+ subtitle_url = subtitle.get('url')
+ if not subtitle_url:
+ continue
+ subtitles.setdefault(subtitle.get('lang', 'en_US'), []).append({
+ 'url': subtitle_url,
+ })
+
+ return {
+ 'id': media_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': video_data.get('thumbnailUrl'),
+ 'description': video_data.get('description'),
+ 'duration': int_or_none(video_data.get('duration')),
+ 'tags': video_data.get('tags'),
+ 'is_live': is_live,
+ 'subtitles': subtitles,
+ }
diff --git a/yt_dlp/extractor/imgur.py b/yt_dlp/extractor/imgur.py
new file mode 100644
index 0000000..1fa0a2a
--- /dev/null
+++ b/yt_dlp/extractor/imgur.py
@@ -0,0 +1,366 @@
+import functools
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ determine_ext,
+ float_or_none,
+ int_or_none,
+ js_to_json,
+ mimetype2ext,
+ parse_iso8601,
+ str_or_none,
+ strip_or_none,
+ traverse_obj,
+ url_or_none,
+)
+
+
+class ImgurBaseIE(InfoExtractor):
+ _CLIENT_ID = '546c25a59c58ad7'
+
+ @classmethod
+ def _imgur_result(cls, item_id):
+ return cls.url_result(f'https://imgur.com/{item_id}', ImgurIE, item_id)
+
+ def _call_api(self, endpoint, video_id, **kwargs):
+ return self._download_json(
+ f'https://api.imgur.com/post/v1/{endpoint}/{video_id}?client_id={self._CLIENT_ID}&include=media,account',
+ video_id, **kwargs)
+
+ @staticmethod
+ def get_description(s):
+ if 'Discover the magic of the internet at Imgur' in s:
+ return None
+ return s or None
+
+
+class ImgurIE(ImgurBaseIE):
+ _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!(?:a|gallery|t|topic|r)/)(?P<id>[a-zA-Z0-9]+)'
+
+ _TESTS = [{
+ 'url': 'https://imgur.com/A61SaA1',
+ 'info_dict': {
+ 'id': 'A61SaA1',
+ 'ext': 'mp4',
+ 'title': 'MRW gifv is up and running without any bugs',
+ 'timestamp': 1416446068,
+ 'upload_date': '20141120',
+ 'dislike_count': int,
+ 'comment_count': int,
+ 'release_timestamp': 1416446068,
+ 'release_date': '20141120',
+ 'like_count': int,
+ 'thumbnail': 'https://i.imgur.com/A61SaA1h.jpg',
+ },
+ }, {
+ 'url': 'https://i.imgur.com/A61SaA1.gifv',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://i.imgur.com/crGpqCV.mp4',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://i.imgur.com/jxBXAMC.gifv',
+ 'info_dict': {
+ 'id': 'jxBXAMC',
+ 'ext': 'mp4',
+ 'title': 'Fahaka puffer feeding',
+ 'timestamp': 1533835503,
+ 'upload_date': '20180809',
+ 'release_date': '20180809',
+ 'like_count': int,
+ 'duration': 30.0,
+ 'comment_count': int,
+ 'release_timestamp': 1533835503,
+ 'thumbnail': 'https://i.imgur.com/jxBXAMCh.jpg',
+ 'dislike_count': int,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ data = self._call_api('media', video_id)
+ if not traverse_obj(data, ('media', 0, (
+ ('type', {lambda t: t == 'video' or None}),
+ ('metadata', 'is_animated'))), get_all=False):
+ raise ExtractorError(f'{video_id} is not a video or animated image', expected=True)
+ webpage = self._download_webpage(
+ f'https://i.imgur.com/{video_id}.gifv', video_id, fatal=False) or ''
+ formats = []
+
+ media_fmt = traverse_obj(data, ('media', 0, {
+ 'url': ('url', {url_or_none}),
+ 'ext': ('ext', {str}),
+ 'width': ('width', {int_or_none}),
+ 'height': ('height', {int_or_none}),
+ 'filesize': ('size', {int_or_none}),
+ 'acodec': ('metadata', 'has_sound', {lambda b: None if b else 'none'}),
+ }))
+ media_url = media_fmt.get('url')
+ if media_url:
+ if not media_fmt.get('ext'):
+ media_fmt['ext'] = mimetype2ext(traverse_obj(
+ data, ('media', 0, 'mime_type'))) or determine_ext(media_url)
+ if traverse_obj(data, ('media', 0, 'type')) == 'image':
+ media_fmt['acodec'] = 'none'
+ media_fmt.setdefault('preference', -10)
+ formats.append(media_fmt)
+
+ video_elements = self._search_regex(
+ r'(?s)<div class="video-elements">(.*?)</div>',
+ webpage, 'video elements', default=None)
+
+ if video_elements:
+ def og_get_size(media_type):
+ return {
+ p: int_or_none(self._og_search_property(f'{media_type}:{p}', webpage, default=None))
+ for p in ('width', 'height')
+ }
+
+ size = og_get_size('video')
+ if not any(size.values()):
+ size = og_get_size('image')
+
+ formats = traverse_obj(
+ re.finditer(r'<source\s+src="(?P<src>[^"]+)"\s+type="(?P<type>[^"]+)"', video_elements),
+ (..., {
+ 'format_id': ('type', {lambda s: s.partition('/')[2]}),
+ 'url': ('src', {self._proto_relative_url}),
+ 'ext': ('type', {mimetype2ext}),
+ }))
+ for f in formats:
+ f.update(size)
+
+ # We can get the original gif format from the webpage as well
+ gif_json = traverse_obj(self._search_json(
+ r'var\s+videoItem\s*=', webpage, 'GIF info', video_id,
+ transform_source=js_to_json, fatal=False), {
+ 'url': ('gifUrl', {self._proto_relative_url}),
+ 'filesize': ('size', {int_or_none}),
+ })
+ if gif_json:
+ gif_json.update(size)
+ gif_json.update({
+ 'format_id': 'gif',
+ 'preference': -10, # gifs < videos
+ 'ext': 'gif',
+ 'acodec': 'none',
+ 'vcodec': 'gif',
+ 'container': 'gif',
+ })
+ formats.append(gif_json)
+
+ search = functools.partial(self._html_search_meta, html=webpage, default=None)
+
+ twitter_fmt = {
+ 'format_id': 'twitter',
+ 'url': url_or_none(search('twitter:player:stream')),
+ 'ext': mimetype2ext(search('twitter:player:stream:content_type')),
+ 'width': int_or_none(search('twitter:width')),
+ 'height': int_or_none(search('twitter:height')),
+ }
+ if twitter_fmt['url']:
+ formats.append(twitter_fmt)
+
+ if not formats:
+ self.raise_no_formats(
+ f'No sources found for video {video_id}. Maybe a plain image?', expected=True)
+ self._remove_duplicate_formats(formats)
+
+ return {
+ 'title': self._og_search_title(webpage, default=None),
+ 'description': self.get_description(self._og_search_description(webpage, default='')),
+ **traverse_obj(data, {
+ 'uploader_id': ('account_id', {lambda a: str(a) if int_or_none(a) else None}),
+ 'uploader': ('account', 'username', {lambda x: strip_or_none(x) or None}),
+ 'uploader_url': ('account', 'avatar_url', {url_or_none}),
+ 'like_count': ('upvote_count', {int_or_none}),
+ 'dislike_count': ('downvote_count', {int_or_none}),
+ 'comment_count': ('comment_count', {int_or_none}),
+ 'age_limit': ('is_mature', {lambda x: 18 if x else None}),
+ 'timestamp': (('updated_at', 'created_at'), {parse_iso8601}),
+ 'release_timestamp': ('created_at', {parse_iso8601}),
+ }, get_all=False),
+ **traverse_obj(data, ('media', 0, 'metadata', {
+ 'title': ('title', {lambda x: strip_or_none(x) or None}),
+ 'description': ('description', {self.get_description}),
+ 'duration': ('duration', {float_or_none}),
+ 'timestamp': (('updated_at', 'created_at'), {parse_iso8601}),
+ 'release_timestamp': ('created_at', {parse_iso8601}),
+ }), get_all=False),
+ 'id': video_id,
+ 'formats': formats,
+ 'thumbnail': url_or_none(search('thumbnailUrl')),
+ }
+
+
+class ImgurGalleryBaseIE(ImgurBaseIE):
+ _GALLERY = True
+
+ def _real_extract(self, url):
+ gallery_id = self._match_id(url)
+
+ data = self._call_api('albums', gallery_id, fatal=False, expected_status=404)
+
+ info = traverse_obj(data, {
+ 'title': ('title', {lambda x: strip_or_none(x) or None}),
+ 'description': ('description', {self.get_description}),
+ })
+
+ if traverse_obj(data, 'is_album'):
+
+ def yield_media_ids():
+ for m_id in traverse_obj(data, (
+ 'media', lambda _, v: v.get('type') == 'video' or v['metadata']['is_animated'],
+ 'id', {lambda x: str_or_none(x) or None})):
+ yield m_id
+
+ # if a gallery with exactly one video, apply album metadata to video
+ media_id = (
+ self._GALLERY
+ and traverse_obj(data, ('image_count', {lambda c: c == 1}))
+ and next(yield_media_ids(), None))
+
+ if not media_id:
+ result = self.playlist_result(
+ map(self._imgur_result, yield_media_ids()), gallery_id)
+ result.update(info)
+ return result
+ gallery_id = media_id
+
+ result = self._imgur_result(gallery_id)
+ info['_type'] = 'url_transparent'
+ result.update(info)
+ return result
+
+
+class ImgurGalleryIE(ImgurGalleryBaseIE):
+ IE_NAME = 'imgur:gallery'
+ _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:gallery|(?:t(?:opic)?|r)/[^/?#]+)/(?P<id>[a-zA-Z0-9]+)'
+
+ _TESTS = [{
+ 'url': 'http://imgur.com/gallery/Q95ko',
+ 'info_dict': {
+ 'id': 'Q95ko',
+ 'title': 'Adding faces make every GIF better',
+ },
+ 'playlist_count': 25,
+ 'skip': 'Zoinks! You\'ve taken a wrong turn.',
+ }, {
+ # TODO: static images - replace with animated/video gallery
+ 'url': 'http://imgur.com/topic/Aww/ll5Vk',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://imgur.com/gallery/YcAQlkx',
+ 'add_ies': ['Imgur'],
+ 'info_dict': {
+ 'id': 'YcAQlkx',
+ 'ext': 'mp4',
+ 'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....',
+ 'timestamp': 1358554297,
+ 'upload_date': '20130119',
+ 'uploader_id': '1648642',
+ 'uploader': 'wittyusernamehere',
+ 'release_timestamp': 1358554297,
+ 'thumbnail': 'https://i.imgur.com/YcAQlkxh.jpg',
+ 'release_date': '20130119',
+ 'uploader_url': 'https://i.imgur.com/u3R4I2S_d.png?maxwidth=290&fidelity=grand',
+ 'comment_count': int,
+ 'dislike_count': int,
+ 'like_count': int,
+ },
+ }, {
+ # TODO: static image - replace with animated/video gallery
+ 'url': 'http://imgur.com/topic/Funny/N8rOudd',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://imgur.com/r/aww/VQcQPhM',
+ 'add_ies': ['Imgur'],
+ 'info_dict': {
+ 'id': 'VQcQPhM',
+ 'ext': 'mp4',
+ 'title': 'The boss is here',
+ 'timestamp': 1476494751,
+ 'upload_date': '20161015',
+ 'uploader_id': '19138530',
+ 'uploader': 'thematrixcam',
+ 'comment_count': int,
+ 'dislike_count': int,
+ 'uploader_url': 'https://i.imgur.com/qCjr5Pi_d.png?maxwidth=290&fidelity=grand',
+ 'release_timestamp': 1476494751,
+ 'like_count': int,
+ 'release_date': '20161015',
+ 'thumbnail': 'https://i.imgur.com/VQcQPhMh.jpg',
+ },
+ },
+ # from https://github.com/ytdl-org/youtube-dl/pull/16674
+ {
+ 'url': 'https://imgur.com/t/unmuted/6lAn9VQ',
+ 'info_dict': {
+ 'id': '6lAn9VQ',
+ 'title': 'Penguins !',
+ },
+ 'playlist_count': 3,
+ }, {
+ 'url': 'https://imgur.com/t/unmuted/kx2uD3C',
+ 'add_ies': ['Imgur'],
+ 'info_dict': {
+ 'id': 'ZVMv45i',
+ 'ext': 'mp4',
+ 'title': 'Intruder',
+ 'timestamp': 1528129683,
+ 'upload_date': '20180604',
+ 'release_timestamp': 1528129683,
+ 'release_date': '20180604',
+ 'like_count': int,
+ 'dislike_count': int,
+ 'comment_count': int,
+ 'duration': 30.03,
+ 'thumbnail': 'https://i.imgur.com/ZVMv45ih.jpg',
+ },
+ }, {
+ 'url': 'https://imgur.com/t/unmuted/wXSK0YH',
+ 'add_ies': ['Imgur'],
+ 'info_dict': {
+ 'id': 'JCAP4io',
+ 'ext': 'mp4',
+ 'title': 're:I got the blues$',
+ 'description': 'Luka’s vocal stylings.\n\nFP edit: don’t encourage me. I’ll never stop posting Luka and friends.',
+ 'timestamp': 1527809525,
+ 'upload_date': '20180531',
+ 'like_count': int,
+ 'dislike_count': int,
+ 'duration': 30.03,
+ 'comment_count': int,
+ 'release_timestamp': 1527809525,
+ 'thumbnail': 'https://i.imgur.com/JCAP4ioh.jpg',
+ 'release_date': '20180531',
+ },
+ }]
+
+
+class ImgurAlbumIE(ImgurGalleryBaseIE):
+ IE_NAME = 'imgur:album'
+ _VALID_URL = r'https?://(?:i\.)?imgur\.com/a/(?P<id>[a-zA-Z0-9]+)'
+ _GALLERY = False
+ _TESTS = [{
+ # TODO: only static images - replace with animated/video gallery
+ 'url': 'http://imgur.com/a/j6Orj',
+ 'only_matching': True,
+ },
+ # from https://github.com/ytdl-org/youtube-dl/pull/21693
+ {
+ 'url': 'https://imgur.com/a/iX265HX',
+ 'info_dict': {
+ 'id': 'iX265HX',
+ 'title': 'enen-no-shouboutai'
+ },
+ 'playlist_count': 2,
+ }, {
+ 'url': 'https://imgur.com/a/8pih2Ed',
+ 'info_dict': {
+ 'id': '8pih2Ed'
+ },
+ 'playlist_mincount': 1,
+ }]
diff --git a/yt_dlp/extractor/ina.py b/yt_dlp/extractor/ina.py
new file mode 100644
index 0000000..857013d
--- /dev/null
+++ b/yt_dlp/extractor/ina.py
@@ -0,0 +1,84 @@
+from .common import InfoExtractor
+from ..utils import unified_strdate
+
+
+class InaIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:www|m)\.)?ina\.fr/(?:[^?#]+/)(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html',
+ 'md5': 'c5a09e5cb5604ed10709f06e7a377dda',
+ 'info_dict': {
+ 'id': 'I12055569',
+ 'ext': 'mp4',
+ 'title': 'François Hollande "Je crois que c\'est clair"',
+ 'description': 'md5:19f61e2b4844ed4bb2e3df9ab9f527ff',
+ 'upload_date': '20070712',
+ 'thumbnail': 'https://cdn-hub.ina.fr/notice/690x517/3c4/I12055569.jpeg',
+ }
+ }, {
+ 'url': 'https://www.ina.fr/video/S806544_001/don-d-organes-des-avancees-mais-d-importants-besoins-video.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.ina.fr/audio/P16173408',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.ina.fr/video/P16173408-video.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://m.ina.fr/video/I12055569',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.ina.fr/ina-eclaire-actu/video/cpb8205116303/les-jeux-electroniques',
+ 'md5': '4b8284a9a3a184fdc7e744225b8251e7',
+ 'info_dict': {
+ 'id': 'CPB8205116303',
+ 'ext': 'mp4',
+ 'title': 'Les jeux électroniques',
+ 'description': 'md5:e09f7683dad1cc60b74950490127d233',
+ 'upload_date': '19821204',
+ 'duration': 657,
+ 'thumbnail': 'https://cdn-hub.ina.fr/notice/690x517/203/CPB8205116303.jpeg',
+ },
+ }, {
+ 'url': 'https://www.ina.fr/ina-eclaire-actu/arletty-carriere-conseils-actrice-marcel-carne',
+ 'md5': '743d6f069a00e19dda0da166a54eeccb',
+ 'info_dict': {
+ 'id': 'I22203233',
+ 'ext': 'mp4',
+ 'title': 'Arletty sur le métier d\'actrice',
+ 'description': 'md5:3d89b5e419d8514c934f146045ccdbad',
+ 'upload_date': '19581128',
+ 'thumbnail': 'https://cdn-hub.ina.fr/notice/690x517/082/I22203233.jpeg',
+ },
+ }, {
+ 'url': 'https://www.ina.fr/ina-eclaire-actu/chasse-croise-sncf-gare-d-austerlitz-vacances-d-ete',
+ 'md5': 'a96fb85e9ba3b5c5b2eeb0c5daa55f2f',
+ 'info_dict': {
+ 'id': 'CAF91038285',
+ 'ext': 'mp4',
+ 'title': 'Les grands départs : les trains',
+ 'description': 'md5:1630ee819d8d4da97df53459e99f72bb',
+ 'upload_date': '19740801',
+ 'thumbnail': 'https://cdn-hub.ina.fr/notice/690x517/2cf/CAF91038285.jpeg',
+ },
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ api_url = self._html_search_regex(r'asset-details-url\s*=\s*["\'](?P<api_url>[^"\']+)', webpage, 'api_url')
+ asset_id = self._search_regex(r'assets/([^?/]+)', api_url, 'asset_id')
+
+ api_response = self._download_json(api_url.replace(asset_id, f'{asset_id}.json'), asset_id)
+
+ return {
+ 'id': asset_id,
+ 'url': api_response['resourceUrl'],
+ 'ext': {'video': 'mp4', 'audio': 'mp3'}.get(api_response.get('type')),
+ 'title': api_response.get('title'),
+ 'description': api_response.get('description'),
+ 'upload_date': unified_strdate(api_response.get('dateOfBroadcast')),
+ 'duration': api_response.get('duration'),
+ 'thumbnail': api_response.get('resourceThumbnail'),
+ }
diff --git a/yt_dlp/extractor/inc.py b/yt_dlp/extractor/inc.py
new file mode 100644
index 0000000..9b3fe9a
--- /dev/null
+++ b/yt_dlp/extractor/inc.py
@@ -0,0 +1,57 @@
+from .common import InfoExtractor
+from .kaltura import KalturaIE
+
+
+class IncIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?inc\.com/(?:[^/]+/)+(?P<id>[^.]+).html'
+ _TESTS = [{
+ 'url': 'http://www.inc.com/tip-sheet/bill-gates-says-these-5-books-will-make-you-smarter.html',
+ 'md5': '7416739c9c16438c09fa35619d6ba5cb',
+ 'info_dict': {
+ 'id': '1_wqig47aq',
+ 'ext': 'mov',
+ 'title': 'Bill Gates Says These 5 Books Will Make You Smarter',
+ 'description': 'md5:bea7ff6cce100886fc1995acb743237e',
+ 'timestamp': 1474414430,
+ 'upload_date': '20160920',
+ 'uploader_id': 'video@inc.com',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # div with id=kaltura_player_1_kqs38cgm
+ 'url': 'https://www.inc.com/oscar-raymundo/richard-branson-young-entrepeneurs.html',
+ 'info_dict': {
+ 'id': '1_kqs38cgm',
+ 'ext': 'mp4',
+ 'title': 'Branson: "In the end, you have to say, Screw it. Just do it."',
+ 'description': 'md5:21b832d034f9af5191ca5959da5e9cb6',
+ 'timestamp': 1364403232,
+ 'upload_date': '20130327',
+ 'uploader_id': 'incdigital@inc.com',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.inc.com/video/david-whitford/founders-forum-tripadvisor-steve-kaufer-most-enjoyable-moment-for-entrepreneur.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ partner_id = self._search_regex(
+ r'var\s+_?bizo_data_partner_id\s*=\s*["\'](\d+)', webpage,
+ 'partner id', default='1034971')
+
+ kaltura_id = self._search_regex(
+ r'id=(["\'])kaltura_player_(?P<id>.+?)\1', webpage, 'kaltura id',
+ default=None, group='id') or self._parse_json(self._search_regex(
+ r'pageInfo\.videos\s*=\s*\[(.+)\];', webpage, 'kaltura id'),
+ display_id)['vid_kaltura_id']
+
+ return self.url_result(
+ 'kaltura:%s:%s' % (partner_id, kaltura_id), KalturaIE.ie_key())
diff --git a/yt_dlp/extractor/indavideo.py b/yt_dlp/extractor/indavideo.py
new file mode 100644
index 0000000..564bf8a
--- /dev/null
+++ b/yt_dlp/extractor/indavideo.py
@@ -0,0 +1,115 @@
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_age_limit,
+ parse_iso8601,
+ time_seconds,
+ update_url_query,
+)
+
+
+class IndavideoEmbedIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:embed\.)?indavideo\.hu/player/video/|assets\.indavideo\.hu/swf/player\.swf\?.*\b(?:v(?:ID|id))=)(?P<id>[\da-f]+)'
+ # Some example URLs covered by generic extractor:
+ # https://indavideo.hu/video/Vicces_cica_1
+ # https://index.indavideo.hu/video/Hod_Nemetorszagban
+ # https://auto.indavideo.hu/video/Sajat_utanfutoban_a_kis_tacsko
+ # https://film.indavideo.hu/video/f_farkaslesen
+ # https://palyazat.indavideo.hu/video/Embertelen_dal_Dodgem_egyuttes
+ _EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)//embed\.indavideo\.hu/player/video/[\da-f]+)']
+ _TESTS = [{
+ 'url': 'https://indavideo.hu/player/video/1bdc3c6d80/',
+ 'md5': 'c8a507a1c7410685f83a06eaeeaafeab',
+ 'info_dict': {
+ 'id': '1837039',
+ 'ext': 'mp4',
+ 'title': 'Cicatánc',
+ 'description': '',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'cukiajanlo',
+ 'uploader_id': '83729',
+ 'timestamp': 1439193826,
+ 'upload_date': '20150810',
+ 'duration': 72,
+ 'age_limit': 0,
+ 'tags': ['tánc', 'cica', 'cuki', 'cukiajanlo', 'newsroom'],
+ },
+ }, {
+ 'url': 'https://embed.indavideo.hu/player/video/1bdc3c6d80?autostart=1&hide=1',
+ 'only_matching': True,
+ }]
+ _WEBPAGE_TESTS = [{
+ 'url': 'https://indavideo.hu/video/Vicces_cica_1',
+ 'info_dict': {
+ 'id': '1335611',
+ 'ext': 'mp4',
+ 'title': 'Vicces cica',
+ 'description': 'Játszik a tablettel. :D',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'Jet_Pack',
+ 'uploader_id': '491217',
+ 'timestamp': 1390821212,
+ 'upload_date': '20140127',
+ 'duration': 7,
+ 'age_limit': 0,
+ 'tags': ['cica', 'Jet_Pack'],
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video = self._download_json(
+ f'https://amfphp.indavideo.hu/SYm0json.php/player.playerHandler.getVideoData/{video_id}/',
+ video_id, query={'_': time_seconds()})['data']
+
+ video_urls = []
+
+ video_files = video.get('video_files')
+ if isinstance(video_files, list):
+ video_urls.extend(video_files)
+ elif isinstance(video_files, dict):
+ video_urls.extend(video_files.values())
+
+ video_urls = list(set(video_urls))
+
+ filesh = video.get('filesh') or {}
+
+ formats = []
+ for video_url in video_urls:
+ height = int_or_none(self._search_regex(
+ r'\.(\d{3,4})\.mp4(?:\?|$)', video_url, 'height', default=None))
+ if not height and len(filesh) == 1:
+ height = int_or_none(list(filesh.keys())[0])
+ token = filesh.get(str(height))
+ if token is None:
+ continue
+ formats.append({
+ 'url': update_url_query(video_url, {'token': token}),
+ 'height': height,
+ })
+
+ timestamp = video.get('date')
+ if timestamp:
+ # upload date is in CEST
+ timestamp = parse_iso8601(timestamp + ' +0200', ' ')
+
+ thumbnails = [{
+ 'url': self._proto_relative_url(thumbnail)
+ } for thumbnail in video.get('thumbnails', [])]
+
+ tags = [tag['title'] for tag in video.get('tags') or []]
+
+ return {
+ 'id': video.get('id') or video_id,
+ 'title': video.get('title'),
+ 'description': video.get('description'),
+ 'thumbnails': thumbnails,
+ 'uploader': video.get('user_name'),
+ 'uploader_id': video.get('user_id'),
+ 'timestamp': timestamp,
+ 'duration': int_or_none(video.get('length')),
+ 'age_limit': parse_age_limit(video.get('age_limit')),
+ 'tags': tags,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/infoq.py b/yt_dlp/extractor/infoq.py
new file mode 100644
index 0000000..192bcfe
--- /dev/null
+++ b/yt_dlp/extractor/infoq.py
@@ -0,0 +1,136 @@
+from ..compat import (
+ compat_b64decode,
+ compat_urllib_parse_unquote,
+ compat_urlparse,
+)
+from ..utils import (
+ ExtractorError,
+ determine_ext,
+ update_url_query,
+ traverse_obj,
+)
+from .bokecc import BokeCCBaseIE
+
+
+class InfoQIE(BokeCCBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?infoq\.com/(?:[^/]+/)+(?P<id>[^/]+)'
+
+ _TESTS = [{
+ 'url': 'http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things',
+ 'md5': 'b5ca0e0a8c1fed93b0e65e48e462f9a2',
+ 'info_dict': {
+ 'id': 'A-Few-of-My-Favorite-Python-Things',
+ 'ext': 'mp4',
+ 'description': 'Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.',
+ 'title': 'A Few of My Favorite [Python] Things',
+ },
+ }, {
+ 'url': 'http://www.infoq.com/fr/presentations/changez-avis-sur-javascript',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.infoq.com/cn/presentations/openstack-continued-delivery',
+ 'md5': '4918d0cca1497f2244572caf626687ef',
+ 'info_dict': {
+ 'id': 'openstack-continued-delivery',
+ 'title': 'OpenStack持续交付之路',
+ 'ext': 'flv',
+ 'description': 'md5:308d981fb28fa42f49f9568322c683ff',
+ },
+ 'skip': 'Sorry, the page you visited does not exist',
+ }, {
+ 'url': 'https://www.infoq.com/presentations/Simple-Made-Easy',
+ 'md5': '0e34642d4d9ef44bf86f66f6399672db',
+ 'info_dict': {
+ 'id': 'Simple-Made-Easy',
+ 'title': 'Simple Made Easy',
+ 'ext': 'mp3',
+ 'description': 'md5:3e0e213a8bbd074796ef89ea35ada25b',
+ },
+ 'params': {
+ 'format': 'bestaudio',
+ },
+ }]
+
+ def _extract_rtmp_video(self, webpage):
+ # The server URL is hardcoded
+ video_url = 'rtmpe://videof.infoq.com/cfx/st/'
+
+ # Extract video URL
+ encoded_id = self._search_regex(
+ r"jsclassref\s*=\s*'([^']*)'", webpage, 'encoded id', default=None)
+
+ real_id = compat_urllib_parse_unquote(compat_b64decode(encoded_id).decode('utf-8'))
+ playpath = 'mp4:' + real_id
+
+ return [{
+ 'format_id': 'rtmp_video',
+ 'url': video_url,
+ 'ext': determine_ext(playpath),
+ 'play_path': playpath,
+ }]
+
+ def _extract_cf_auth(self, webpage):
+ policy = self._search_regex(r'InfoQConstants\.scp\s*=\s*\'([^\']+)\'', webpage, 'policy')
+ signature = self._search_regex(r'InfoQConstants\.scs\s*=\s*\'([^\']+)\'', webpage, 'signature')
+ key_pair_id = self._search_regex(r'InfoQConstants\.sck\s*=\s*\'([^\']+)\'', webpage, 'key-pair-id')
+ return {
+ 'Policy': policy,
+ 'Signature': signature,
+ 'Key-Pair-Id': key_pair_id,
+ }
+
+ def _extract_http_video(self, webpage):
+ http_video_url = self._search_regex(r'P\.s\s*=\s*\'([^\']+)\'', webpage, 'video URL')
+ http_video_url = update_url_query(http_video_url, self._extract_cf_auth(webpage))
+ return [{
+ 'format_id': 'http_video',
+ 'url': http_video_url,
+ 'http_headers': {'Referer': 'https://www.infoq.com/'},
+ }]
+
+ def _extract_http_audio(self, webpage, video_id):
+ try:
+ http_audio_url = traverse_obj(self._form_hidden_inputs('mp3Form', webpage), 'filename')
+ except ExtractorError:
+ http_audio_url = None
+ if not http_audio_url:
+ return []
+
+ # base URL is found in the Location header in the response returned by
+ # GET https://www.infoq.com/mp3download.action?filename=... when logged in.
+ http_audio_url = compat_urlparse.urljoin('http://ress.infoq.com/downloads/mp3downloads/', http_audio_url)
+ http_audio_url = update_url_query(http_audio_url, self._extract_cf_auth(webpage))
+
+ # audio file seem to be missing some times even if there is a download link
+ # so probe URL to make sure
+ if not self._is_valid_url(http_audio_url, video_id):
+ return []
+
+ return [{
+ 'format_id': 'http_audio',
+ 'url': http_audio_url,
+ 'vcodec': 'none',
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ video_title = self._html_extract_title(webpage)
+ video_description = self._html_search_meta('description', webpage, 'description')
+
+ if '/cn/' in url:
+ # for China videos, HTTP video URL exists but always fails with 403
+ formats = self._extract_bokecc_formats(webpage, video_id)
+ else:
+ formats = (
+ self._extract_rtmp_video(webpage)
+ + self._extract_http_video(webpage)
+ + self._extract_http_audio(webpage, video_id))
+
+ return {
+ 'id': video_id,
+ 'title': video_title,
+ 'description': video_description,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py
new file mode 100644
index 0000000..f7f2150
--- /dev/null
+++ b/yt_dlp/extractor/instagram.py
@@ -0,0 +1,735 @@
+import hashlib
+import itertools
+import json
+import re
+import time
+
+from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ ExtractorError,
+ decode_base_n,
+ encode_base_n,
+ filter_dict,
+ float_or_none,
+ format_field,
+ get_element_by_attribute,
+ int_or_none,
+ lowercase_escape,
+ str_or_none,
+ str_to_int,
+ traverse_obj,
+ url_or_none,
+ urlencode_postdata,
+)
+
+_ENCODING_CHARS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_'
+
+
+def _pk_to_id(id):
+ """Source: https://stackoverflow.com/questions/24437823/getting-instagram-post-url-from-media-id"""
+ return encode_base_n(int(id.split('_')[0]), table=_ENCODING_CHARS)
+
+
+def _id_to_pk(shortcode):
+ """Covert a shortcode to a numeric value"""
+ return decode_base_n(shortcode[:11], table=_ENCODING_CHARS)
+
+
+class InstagramBaseIE(InfoExtractor):
+ _NETRC_MACHINE = 'instagram'
+ _IS_LOGGED_IN = False
+
+ _API_BASE_URL = 'https://i.instagram.com/api/v1'
+ _LOGIN_URL = 'https://www.instagram.com/accounts/login'
+ _API_HEADERS = {
+ 'X-IG-App-ID': '936619743392459',
+ 'X-ASBD-ID': '198387',
+ 'X-IG-WWW-Claim': '0',
+ 'Origin': 'https://www.instagram.com',
+ 'Accept': '*/*',
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36',
+ }
+
+ def _perform_login(self, username, password):
+ if self._IS_LOGGED_IN:
+ return
+
+ login_webpage = self._download_webpage(
+ self._LOGIN_URL, None, note='Downloading login webpage', errnote='Failed to download login webpage')
+
+ shared_data = self._parse_json(self._search_regex(
+ r'window\._sharedData\s*=\s*({.+?});', login_webpage, 'shared data', default='{}'), None)
+
+ login = self._download_json(
+ f'{self._LOGIN_URL}/ajax/', None, note='Logging in', headers={
+ **self._API_HEADERS,
+ 'X-Requested-With': 'XMLHttpRequest',
+ 'X-CSRFToken': shared_data['config']['csrf_token'],
+ 'X-Instagram-AJAX': shared_data['rollout_hash'],
+ 'Referer': 'https://www.instagram.com/',
+ }, data=urlencode_postdata({
+ 'enc_password': f'#PWD_INSTAGRAM_BROWSER:0:{int(time.time())}:{password}',
+ 'username': username,
+ 'queryParams': '{}',
+ 'optIntoOneTap': 'false',
+ 'stopDeletionNonce': '',
+ 'trustedDeviceRecords': '{}',
+ }))
+
+ if not login.get('authenticated'):
+ if login.get('message'):
+ raise ExtractorError(f'Unable to login: {login["message"]}')
+ elif login.get('user'):
+ raise ExtractorError('Unable to login: Sorry, your password was incorrect. Please double-check your password.', expected=True)
+ elif login.get('user') is False:
+ raise ExtractorError('Unable to login: The username you entered doesn\'t belong to an account. Please check your username and try again.', expected=True)
+ raise ExtractorError('Unable to login')
+ InstagramBaseIE._IS_LOGGED_IN = True
+
+ def _get_count(self, media, kind, *keys):
+ return traverse_obj(
+ media, (kind, 'count'), *((f'edge_media_{key}', 'count') for key in keys),
+ expected_type=int_or_none)
+
+ def _get_dimension(self, name, media, webpage=None):
+ return (
+ traverse_obj(media, ('dimensions', name), expected_type=int_or_none)
+ or int_or_none(self._html_search_meta(
+ (f'og:video:{name}', f'video:{name}'), webpage or '', default=None)))
+
+ def _extract_nodes(self, nodes, is_direct=False):
+ for idx, node in enumerate(nodes, start=1):
+ if node.get('__typename') != 'GraphVideo' and node.get('is_video') is not True:
+ continue
+
+ video_id = node.get('shortcode')
+
+ if is_direct:
+ info = {
+ 'id': video_id or node['id'],
+ 'url': node.get('video_url'),
+ 'width': self._get_dimension('width', node),
+ 'height': self._get_dimension('height', node),
+ 'http_headers': {
+ 'Referer': 'https://www.instagram.com/',
+ }
+ }
+ elif not video_id:
+ continue
+ else:
+ info = {
+ '_type': 'url',
+ 'ie_key': 'Instagram',
+ 'id': video_id,
+ 'url': f'https://instagram.com/p/{video_id}',
+ }
+
+ yield {
+ **info,
+ 'title': node.get('title') or (f'Video {idx}' if is_direct else None),
+ 'description': traverse_obj(
+ node, ('edge_media_to_caption', 'edges', 0, 'node', 'text'), expected_type=str),
+ 'thumbnail': traverse_obj(
+ node, 'display_url', 'thumbnail_src', 'display_src', expected_type=url_or_none),
+ 'duration': float_or_none(node.get('video_duration')),
+ 'timestamp': int_or_none(node.get('taken_at_timestamp')),
+ 'view_count': int_or_none(node.get('video_view_count')),
+ 'comment_count': self._get_count(node, 'comments', 'preview_comment', 'to_comment', 'to_parent_comment'),
+ 'like_count': self._get_count(node, 'likes', 'preview_like'),
+ }
+
+ def _extract_product_media(self, product_media):
+ media_id = product_media.get('code') or _pk_to_id(product_media.get('pk'))
+ vcodec = product_media.get('video_codec')
+ dash_manifest_raw = product_media.get('video_dash_manifest')
+ videos_list = product_media.get('video_versions')
+ if not (dash_manifest_raw or videos_list):
+ return {}
+
+ formats = [{
+ 'format_id': format.get('id'),
+ 'url': format.get('url'),
+ 'width': format.get('width'),
+ 'height': format.get('height'),
+ 'vcodec': vcodec,
+ } for format in videos_list or []]
+ if dash_manifest_raw:
+ formats.extend(self._parse_mpd_formats(self._parse_xml(dash_manifest_raw, media_id), mpd_id='dash'))
+
+ thumbnails = [{
+ 'url': thumbnail.get('url'),
+ 'width': thumbnail.get('width'),
+ 'height': thumbnail.get('height')
+ } for thumbnail in traverse_obj(product_media, ('image_versions2', 'candidates')) or []]
+ return {
+ 'id': media_id,
+ 'duration': float_or_none(product_media.get('video_duration')),
+ 'formats': formats,
+ 'thumbnails': thumbnails
+ }
+
+ def _extract_product(self, product_info):
+ if isinstance(product_info, list):
+ product_info = product_info[0]
+
+ user_info = product_info.get('user') or {}
+ info_dict = {
+ 'id': _pk_to_id(traverse_obj(product_info, 'pk', 'id', expected_type=str_or_none)[:19]),
+ 'title': product_info.get('title') or f'Video by {user_info.get("username")}',
+ 'description': traverse_obj(product_info, ('caption', 'text'), expected_type=str_or_none),
+ 'timestamp': int_or_none(product_info.get('taken_at')),
+ 'channel': user_info.get('username'),
+ 'uploader': user_info.get('full_name'),
+ 'uploader_id': str_or_none(user_info.get('pk')),
+ 'view_count': int_or_none(product_info.get('view_count')),
+ 'like_count': int_or_none(product_info.get('like_count')),
+ 'comment_count': int_or_none(product_info.get('comment_count')),
+ '__post_extractor': self.extract_comments(_pk_to_id(product_info.get('pk'))),
+ 'http_headers': {
+ 'Referer': 'https://www.instagram.com/',
+ }
+ }
+ carousel_media = product_info.get('carousel_media')
+ if carousel_media:
+ return {
+ '_type': 'playlist',
+ **info_dict,
+ 'title': f'Post by {user_info.get("username")}',
+ 'entries': [{
+ **info_dict,
+ **self._extract_product_media(product_media),
+ } for product_media in carousel_media],
+ }
+
+ return {
+ **info_dict,
+ **self._extract_product_media(product_info)
+ }
+
+ def _get_comments(self, video_id):
+ comments_info = self._download_json(
+ f'{self._API_BASE_URL}/media/{_id_to_pk(video_id)}/comments/?can_support_threading=true&permalink_enabled=false', video_id,
+ fatal=False, errnote='Comments extraction failed', note='Downloading comments info', headers=self._API_HEADERS) or {}
+
+ comment_data = traverse_obj(comments_info, ('edge_media_to_parent_comment', 'edges'), 'comments')
+ for comment_dict in comment_data or []:
+ yield {
+ 'author': traverse_obj(comment_dict, ('node', 'owner', 'username'), ('user', 'username')),
+ 'author_id': traverse_obj(comment_dict, ('node', 'owner', 'id'), ('user', 'pk')),
+ 'author_thumbnail': traverse_obj(comment_dict, ('node', 'owner', 'profile_pic_url'), ('user', 'profile_pic_url'), expected_type=url_or_none),
+ 'id': traverse_obj(comment_dict, ('node', 'id'), 'pk'),
+ 'text': traverse_obj(comment_dict, ('node', 'text'), 'text'),
+ 'like_count': traverse_obj(comment_dict, ('node', 'edge_liked_by', 'count'), 'comment_like_count', expected_type=int_or_none),
+ 'timestamp': traverse_obj(comment_dict, ('node', 'created_at'), 'created_at', expected_type=int_or_none),
+ }
+
+
+class InstagramIOSIE(InfoExtractor):
+ IE_DESC = 'IOS instagram:// URL'
+ _VALID_URL = r'instagram://media\?id=(?P<id>[\d_]+)'
+ _TESTS = [{
+ 'url': 'instagram://media?id=482584233761418119',
+ 'md5': '0d2da106a9d2631273e192b372806516',
+ 'info_dict': {
+ 'id': 'aye83DjauH',
+ 'ext': 'mp4',
+ 'title': 'Video by naomipq',
+ 'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 0,
+ 'timestamp': 1371748545,
+ 'upload_date': '20130620',
+ 'uploader_id': 'naomipq',
+ 'uploader': 'B E A U T Y F O R A S H E S',
+ 'like_count': int,
+ 'comment_count': int,
+ 'comments': list,
+ },
+ 'add_ie': ['Instagram']
+ }]
+
+ def _real_extract(self, url):
+ video_id = _pk_to_id(self._match_id(url))
+ return self.url_result(f'http://instagram.com/tv/{video_id}', InstagramIE, video_id)
+
+
+class InstagramIE(InstagramBaseIE):
+ _VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com(?:/[^/]+)?/(?:p|tv|reel)/(?P<id>[^/?#&]+))'
+ _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?instagram\.com/p/[^/]+/embed.*?)\1']
+ _TESTS = [{
+ 'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
+ 'md5': '0d2da106a9d2631273e192b372806516',
+ 'info_dict': {
+ 'id': 'aye83DjauH',
+ 'ext': 'mp4',
+ 'title': 'Video by naomipq',
+ 'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 8.747,
+ 'timestamp': 1371748545,
+ 'upload_date': '20130620',
+ 'uploader_id': '2815873',
+ 'uploader': 'B E A U T Y F O R A S H E S',
+ 'channel': 'naomipq',
+ 'like_count': int,
+ 'comment_count': int,
+ 'comments': list,
+ },
+ 'expected_warnings': [
+ 'General metadata extraction failed',
+ 'Main webpage is locked behind the login page',
+ ],
+ }, {
+ # reel
+ 'url': 'https://www.instagram.com/reel/Chunk8-jurw/',
+ 'md5': 'f6d8277f74515fa3ff9f5791426e42b1',
+ 'info_dict': {
+ 'id': 'Chunk8-jurw',
+ 'ext': 'mp4',
+ 'title': 'Video by instagram',
+ 'description': 'md5:c9cde483606ed6f80fbe9283a6a2b290',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 5.016,
+ 'timestamp': 1661529231,
+ 'upload_date': '20220826',
+ 'uploader_id': '25025320',
+ 'uploader': 'Instagram',
+ 'channel': 'instagram',
+ 'like_count': int,
+ 'comment_count': int,
+ 'comments': list,
+ },
+ 'expected_warnings': [
+ 'General metadata extraction failed',
+ 'Main webpage is locked behind the login page',
+ ],
+ }, {
+ # multi video post
+ 'url': 'https://www.instagram.com/p/BQ0eAlwhDrw/',
+ 'playlist': [{
+ 'info_dict': {
+ 'id': 'BQ0dSaohpPW',
+ 'ext': 'mp4',
+ 'title': 'Video 1',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'view_count': int,
+ },
+ }, {
+ 'info_dict': {
+ 'id': 'BQ0dTpOhuHT',
+ 'ext': 'mp4',
+ 'title': 'Video 2',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'view_count': int,
+ },
+ }, {
+ 'info_dict': {
+ 'id': 'BQ0dT7RBFeF',
+ 'ext': 'mp4',
+ 'title': 'Video 3',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'view_count': int,
+ },
+ }],
+ 'info_dict': {
+ 'id': 'BQ0eAlwhDrw',
+ 'title': 'Post by instagram',
+ 'description': 'md5:0f9203fc6a2ce4d228da5754bcf54957',
+ },
+ 'expected_warnings': [
+ 'General metadata extraction failed',
+ 'Main webpage is locked behind the login page',
+ ],
+ }, {
+ # IGTV
+ 'url': 'https://www.instagram.com/tv/BkfuX9UB-eK/',
+ 'info_dict': {
+ 'id': 'BkfuX9UB-eK',
+ 'ext': 'mp4',
+ 'title': 'Fingerboarding Tricks with @cass.fb',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 53.83,
+ 'timestamp': 1530032919,
+ 'upload_date': '20180626',
+ 'uploader_id': '25025320',
+ 'uploader': 'Instagram',
+ 'channel': 'instagram',
+ 'like_count': int,
+ 'comment_count': int,
+ 'comments': list,
+ 'description': 'Meet Cass Hirst (@cass.fb), a fingerboarding pro who can perform tiny ollies and kickflips while blindfolded.',
+ },
+ 'expected_warnings': [
+ 'General metadata extraction failed',
+ 'Main webpage is locked behind the login page',
+ ],
+ }, {
+ 'url': 'https://instagram.com/p/-Cmh1cukG2/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://instagram.com/p/9o6LshA7zy/embed/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.instagram.com/tv/aye83DjauH/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.instagram.com/reel/CDUMkliABpa/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.instagram.com/marvelskies.fc/reel/CWqAgUZgCku/',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ res = tuple(super()._extract_embed_urls(url, webpage))
+ if res:
+ return res
+
+ mobj = re.search(r'<a[^>]+href=([\'"])(?P<link>[^\'"]+)\1',
+ get_element_by_attribute('class', 'instagram-media', webpage) or '')
+ if mobj:
+ return [mobj.group('link')]
+
+ def _real_extract(self, url):
+ video_id, url = self._match_valid_url(url).group('id', 'url')
+ media, webpage = {}, ''
+
+ if self._get_cookies(url).get('sessionid'):
+ info = traverse_obj(self._download_json(
+ f'{self._API_BASE_URL}/media/{_id_to_pk(video_id)}/info/', video_id,
+ fatal=False, errnote='Video info extraction failed',
+ note='Downloading video info', headers=self._API_HEADERS), ('items', 0))
+ if info:
+ media.update(info)
+ return self._extract_product(media)
+
+ api_check = self._download_json(
+ f'{self._API_BASE_URL}/web/get_ruling_for_content/?content_type=MEDIA&target_id={_id_to_pk(video_id)}',
+ video_id, headers=self._API_HEADERS, fatal=False, note='Setting up session', errnote=False) or {}
+ csrf_token = self._get_cookies('https://www.instagram.com').get('csrftoken')
+
+ if not csrf_token:
+ self.report_warning('No csrf token set by Instagram API', video_id)
+ else:
+ csrf_token = csrf_token.value if api_check.get('status') == 'ok' else None
+ if not csrf_token:
+ self.report_warning('Instagram API is not granting access', video_id)
+
+ variables = {
+ 'shortcode': video_id,
+ 'child_comment_count': 3,
+ 'fetch_comment_count': 40,
+ 'parent_comment_count': 24,
+ 'has_threaded_comments': True,
+ }
+ general_info = self._download_json(
+ 'https://www.instagram.com/graphql/query/', video_id, fatal=False, errnote=False,
+ headers={
+ **self._API_HEADERS,
+ 'X-CSRFToken': csrf_token or '',
+ 'X-Requested-With': 'XMLHttpRequest',
+ 'Referer': url,
+ }, query={
+ 'query_hash': '9f8827793ef34641b2fb195d4d41151c',
+ 'variables': json.dumps(variables, separators=(',', ':')),
+ })
+ media.update(traverse_obj(general_info, ('data', 'shortcode_media')) or {})
+
+ if not general_info:
+ self.report_warning('General metadata extraction failed (some metadata might be missing).', video_id)
+ webpage, urlh = self._download_webpage_handle(url, video_id)
+ shared_data = self._search_json(
+ r'window\._sharedData\s*=', webpage, 'shared data', video_id, fatal=False) or {}
+
+ if shared_data and self._LOGIN_URL not in urlh.url:
+ media.update(traverse_obj(
+ shared_data, ('entry_data', 'PostPage', 0, 'graphql', 'shortcode_media'),
+ ('entry_data', 'PostPage', 0, 'media'), expected_type=dict) or {})
+ else:
+ self.report_warning('Main webpage is locked behind the login page. Retrying with embed webpage (some metadata might be missing).')
+ webpage = self._download_webpage(
+ f'{url}/embed/', video_id, note='Downloading embed webpage', fatal=False)
+ additional_data = self._search_json(
+ r'window\.__additionalDataLoaded\s*\(\s*[^,]+,', webpage, 'additional data', video_id, fatal=False)
+ if not additional_data and not media:
+ self.raise_login_required('Requested content is not available, rate-limit reached or login required')
+
+ product_item = traverse_obj(additional_data, ('items', 0), expected_type=dict)
+ if product_item:
+ media.update(product_item)
+ return self._extract_product(media)
+
+ media.update(traverse_obj(
+ additional_data, ('graphql', 'shortcode_media'), 'shortcode_media', expected_type=dict) or {})
+
+ username = traverse_obj(media, ('owner', 'username')) or self._search_regex(
+ r'"owner"\s*:\s*{\s*"username"\s*:\s*"(.+?)"', webpage, 'username', fatal=False)
+
+ description = (
+ traverse_obj(media, ('edge_media_to_caption', 'edges', 0, 'node', 'text'), expected_type=str)
+ or media.get('caption'))
+ if not description:
+ description = self._search_regex(
+ r'"caption"\s*:\s*"(.+?)"', webpage, 'description', default=None)
+ if description is not None:
+ description = lowercase_escape(description)
+
+ video_url = media.get('video_url')
+ if not video_url:
+ nodes = traverse_obj(media, ('edge_sidecar_to_children', 'edges', ..., 'node'), expected_type=dict) or []
+ if nodes:
+ return self.playlist_result(
+ self._extract_nodes(nodes, True), video_id,
+ format_field(username, None, 'Post by %s'), description)
+
+ video_url = self._og_search_video_url(webpage, secure=False)
+
+ formats = [{
+ 'url': video_url,
+ 'width': self._get_dimension('width', media, webpage),
+ 'height': self._get_dimension('height', media, webpage),
+ }]
+ dash = traverse_obj(media, ('dash_info', 'video_dash_manifest'))
+ if dash:
+ formats.extend(self._parse_mpd_formats(self._parse_xml(dash, video_id), mpd_id='dash'))
+
+ comment_data = traverse_obj(media, ('edge_media_to_parent_comment', 'edges'))
+ comments = [{
+ 'author': traverse_obj(comment_dict, ('node', 'owner', 'username')),
+ 'author_id': traverse_obj(comment_dict, ('node', 'owner', 'id')),
+ 'id': traverse_obj(comment_dict, ('node', 'id')),
+ 'text': traverse_obj(comment_dict, ('node', 'text')),
+ 'timestamp': traverse_obj(comment_dict, ('node', 'created_at'), expected_type=int_or_none),
+ } for comment_dict in comment_data] if comment_data else None
+
+ display_resources = (
+ media.get('display_resources')
+ or [{'src': media.get(key)} for key in ('display_src', 'display_url')]
+ or [{'src': self._og_search_thumbnail(webpage)}])
+ thumbnails = [{
+ 'url': thumbnail['src'],
+ 'width': thumbnail.get('config_width'),
+ 'height': thumbnail.get('config_height'),
+ } for thumbnail in display_resources if thumbnail.get('src')]
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': media.get('title') or 'Video by %s' % username,
+ 'description': description,
+ 'duration': float_or_none(media.get('video_duration')),
+ 'timestamp': traverse_obj(media, 'taken_at_timestamp', 'date', expected_type=int_or_none),
+ 'uploader_id': traverse_obj(media, ('owner', 'id')),
+ 'uploader': traverse_obj(media, ('owner', 'full_name')),
+ 'channel': username,
+ 'like_count': self._get_count(media, 'likes', 'preview_like') or str_to_int(self._search_regex(
+ r'data-log-event="likeCountClick"[^>]*>[^\d]*([\d,\.]+)', webpage, 'like count', fatal=False)),
+ 'comment_count': self._get_count(media, 'comments', 'preview_comment', 'to_comment', 'to_parent_comment'),
+ 'comments': comments,
+ 'thumbnails': thumbnails,
+ 'http_headers': {
+ 'Referer': 'https://www.instagram.com/',
+ }
+ }
+
+
+class InstagramPlaylistBaseIE(InstagramBaseIE):
+ _gis_tmpl = None # used to cache GIS request type
+
+ def _parse_graphql(self, webpage, item_id):
+ # Reads a webpage and returns its GraphQL data.
+ return self._parse_json(
+ self._search_regex(
+ r'sharedData\s*=\s*({.+?})\s*;\s*[<\n]', webpage, 'data'),
+ item_id)
+
+ def _extract_graphql(self, data, url):
+ # Parses GraphQL queries containing videos and generates a playlist.
+ uploader_id = self._match_id(url)
+ csrf_token = data['config']['csrf_token']
+ rhx_gis = data.get('rhx_gis') or '3c7ca9dcefcf966d11dacf1f151335e8'
+
+ cursor = ''
+ for page_num in itertools.count(1):
+ variables = {
+ 'first': 12,
+ 'after': cursor,
+ }
+ variables.update(self._query_vars_for(data))
+ variables = json.dumps(variables)
+
+ if self._gis_tmpl:
+ gis_tmpls = [self._gis_tmpl]
+ else:
+ gis_tmpls = [
+ '%s' % rhx_gis,
+ '',
+ '%s:%s' % (rhx_gis, csrf_token),
+ '%s:%s:%s' % (rhx_gis, csrf_token, self.get_param('http_headers')['User-Agent']),
+ ]
+
+ # try all of the ways to generate a GIS query, and not only use the
+ # first one that works, but cache it for future requests
+ for gis_tmpl in gis_tmpls:
+ try:
+ json_data = self._download_json(
+ 'https://www.instagram.com/graphql/query/', uploader_id,
+ 'Downloading JSON page %d' % page_num, headers={
+ 'X-Requested-With': 'XMLHttpRequest',
+ 'X-Instagram-GIS': hashlib.md5(
+ ('%s:%s' % (gis_tmpl, variables)).encode('utf-8')).hexdigest(),
+ }, query={
+ 'query_hash': self._QUERY_HASH,
+ 'variables': variables,
+ })
+ media = self._parse_timeline_from(json_data)
+ self._gis_tmpl = gis_tmpl
+ break
+ except ExtractorError as e:
+ # if it's an error caused by a bad query, and there are
+ # more GIS templates to try, ignore it and keep trying
+ if isinstance(e.cause, HTTPError) and e.cause.status == 403:
+ if gis_tmpl != gis_tmpls[-1]:
+ continue
+ raise
+
+ nodes = traverse_obj(media, ('edges', ..., 'node'), expected_type=dict) or []
+ if not nodes:
+ break
+ yield from self._extract_nodes(nodes)
+
+ has_next_page = traverse_obj(media, ('page_info', 'has_next_page'))
+ cursor = traverse_obj(media, ('page_info', 'end_cursor'), expected_type=str)
+ if not has_next_page or not cursor:
+ break
+
+ def _real_extract(self, url):
+ user_or_tag = self._match_id(url)
+ webpage = self._download_webpage(url, user_or_tag)
+ data = self._parse_graphql(webpage, user_or_tag)
+
+ self._set_cookie('instagram.com', 'ig_pr', '1')
+
+ return self.playlist_result(
+ self._extract_graphql(data, url), user_or_tag, user_or_tag)
+
+
+class InstagramUserIE(InstagramPlaylistBaseIE):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P<id>[^/]{2,})/?(?:$|[?#])'
+ IE_DESC = 'Instagram user profile'
+ IE_NAME = 'instagram:user'
+ _TESTS = [{
+ 'url': 'https://instagram.com/porsche',
+ 'info_dict': {
+ 'id': 'porsche',
+ 'title': 'porsche',
+ },
+ 'playlist_count': 5,
+ 'params': {
+ 'extract_flat': True,
+ 'skip_download': True,
+ 'playlistend': 5,
+ }
+ }]
+
+ _QUERY_HASH = '42323d64886122307be10013ad2dcc44',
+
+ @staticmethod
+ def _parse_timeline_from(data):
+ # extracts the media timeline data from a GraphQL result
+ return data['data']['user']['edge_owner_to_timeline_media']
+
+ @staticmethod
+ def _query_vars_for(data):
+ # returns a dictionary of variables to add to the timeline query based
+ # on the GraphQL of the original page
+ return {
+ 'id': data['entry_data']['ProfilePage'][0]['graphql']['user']['id']
+ }
+
+
+class InstagramTagIE(InstagramPlaylistBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?instagram\.com/explore/tags/(?P<id>[^/]+)'
+ IE_DESC = 'Instagram hashtag search URLs'
+ IE_NAME = 'instagram:tag'
+ _TESTS = [{
+ 'url': 'https://instagram.com/explore/tags/lolcats',
+ 'info_dict': {
+ 'id': 'lolcats',
+ 'title': 'lolcats',
+ },
+ 'playlist_count': 50,
+ 'params': {
+ 'extract_flat': True,
+ 'skip_download': True,
+ 'playlistend': 50,
+ }
+ }]
+
+ _QUERY_HASH = 'f92f56d47dc7a55b606908374b43a314',
+
+ @staticmethod
+ def _parse_timeline_from(data):
+ # extracts the media timeline data from a GraphQL result
+ return data['data']['hashtag']['edge_hashtag_to_media']
+
+ @staticmethod
+ def _query_vars_for(data):
+ # returns a dictionary of variables to add to the timeline query based
+ # on the GraphQL of the original page
+ return {
+ 'tag_name':
+ data['entry_data']['TagPage'][0]['graphql']['hashtag']['name']
+ }
+
+
+class InstagramStoryIE(InstagramBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?instagram\.com/stories/(?P<user>[^/]+)/(?P<id>\d+)'
+ IE_NAME = 'instagram:story'
+
+ _TESTS = [{
+ 'url': 'https://www.instagram.com/stories/highlights/18090946048123978/',
+ 'info_dict': {
+ 'id': '18090946048123978',
+ 'title': 'Rare',
+ },
+ 'playlist_mincount': 50
+ }]
+
+ def _real_extract(self, url):
+ username, story_id = self._match_valid_url(url).groups()
+ story_info = self._download_webpage(url, story_id)
+ user_info = self._search_json(r'"user":', story_info, 'user info', story_id, fatal=False)
+ if not user_info:
+ self.raise_login_required('This content is unreachable')
+
+ user_id = traverse_obj(user_info, 'pk', 'id', expected_type=str)
+ story_info_url = user_id if username != 'highlights' else f'highlight:{story_id}'
+ if not story_info_url: # user id is only mandatory for non-highlights
+ raise ExtractorError('Unable to extract user id')
+
+ videos = traverse_obj(self._download_json(
+ f'{self._API_BASE_URL}/feed/reels_media/?reel_ids={story_info_url}',
+ story_id, errnote=False, fatal=False, headers=self._API_HEADERS), 'reels')
+ if not videos:
+ self.raise_login_required('You need to log in to access this content')
+
+ full_name = traverse_obj(videos, (f'highlight:{story_id}', 'user', 'full_name'), (user_id, 'user', 'full_name'))
+ story_title = traverse_obj(videos, (f'highlight:{story_id}', 'title'))
+ if not story_title:
+ story_title = f'Story by {username}'
+
+ highlights = traverse_obj(videos, (f'highlight:{story_id}', 'items'), (user_id, 'items'))
+ info_data = []
+ for highlight in highlights:
+ highlight_data = self._extract_product(highlight)
+ if highlight_data.get('formats'):
+ info_data.append({
+ 'uploader': full_name,
+ 'uploader_id': user_id,
+ **filter_dict(highlight_data),
+ })
+ return self.playlist_result(info_data, playlist_id=story_id, playlist_title=story_title)
diff --git a/yt_dlp/extractor/internazionale.py b/yt_dlp/extractor/internazionale.py
new file mode 100644
index 0000000..1b1cb57
--- /dev/null
+++ b/yt_dlp/extractor/internazionale.py
@@ -0,0 +1,75 @@
+from .common import InfoExtractor
+from ..utils import unified_timestamp
+
+
+class InternazionaleIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?internazionale\.it/video/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.internazionale.it/video/2015/02/19/richard-linklater-racconta-una-scena-di-boyhood',
+ 'md5': '3e39d32b66882c1218e305acbf8348ca',
+ 'info_dict': {
+ 'id': '265968',
+ 'display_id': 'richard-linklater-racconta-una-scena-di-boyhood',
+ 'ext': 'mp4',
+ 'title': 'Richard Linklater racconta una scena di Boyhood',
+ 'description': 'md5:efb7e5bbfb1a54ae2ed5a4a015f0e665',
+ 'timestamp': 1424354635,
+ 'upload_date': '20150219',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ }, {
+ 'url': 'https://www.internazionale.it/video/2018/08/29/telefono-stare-con-noi-stessi',
+ 'md5': '9db8663704cab73eb972d1cee0082c79',
+ 'info_dict': {
+ 'id': '761344',
+ 'display_id': 'telefono-stare-con-noi-stessi',
+ 'ext': 'mp4',
+ 'title': 'Usiamo il telefono per evitare di stare con noi stessi',
+ 'description': 'md5:75ccfb0d6bcefc6e7428c68b4aa1fe44',
+ 'timestamp': 1535528954,
+ 'upload_date': '20180829',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ DATA_RE = r'data-%s=(["\'])(?P<value>(?:(?!\1).)+)\1'
+
+ title = self._search_regex(
+ DATA_RE % 'video-title', webpage, 'title', default=None,
+ group='value') or self._og_search_title(webpage)
+
+ video_id = self._search_regex(
+ DATA_RE % 'job-id', webpage, 'video id', group='value')
+ video_path = self._search_regex(
+ DATA_RE % 'video-path', webpage, 'video path', group='value')
+ video_available_abroad = self._search_regex(
+ DATA_RE % 'video-available_abroad', webpage,
+ 'video available aboard', default='1', group='value')
+ video_available_abroad = video_available_abroad == '1'
+
+ video_base = 'https://video%s.internazionale.it/%s/%s.' % \
+ ('' if video_available_abroad else '-ita', video_path, video_id)
+
+ formats = self._extract_m3u8_formats(
+ video_base + 'm3u8', display_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
+ formats.extend(self._extract_mpd_formats(
+ video_base + 'mpd', display_id, mpd_id='dash', fatal=False))
+
+ timestamp = unified_timestamp(self._html_search_meta(
+ 'article:published_time', webpage, 'timestamp'))
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'description': self._og_search_description(webpage),
+ 'timestamp': timestamp,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/internetvideoarchive.py b/yt_dlp/extractor/internetvideoarchive.py
new file mode 100644
index 0000000..9d2574c
--- /dev/null
+++ b/yt_dlp/extractor/internetvideoarchive.py
@@ -0,0 +1,58 @@
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import parse_qs
+
+
+class InternetVideoArchiveIE(InfoExtractor):
+ _VALID_URL = r'https?://video\.internetvideoarchive\.net/(?:player|flash/players)/.*?\?.*?publishedid.*?'
+
+ _TEST = {
+ 'url': 'http://video.internetvideoarchive.net/player/6/configuration.ashx?customerid=69249&publishedid=194487&reporttag=vdbetatitle&playerid=641&autolist=0&domain=www.videodetective.com&maxrate=high&minrate=low&socialplayer=false',
+ 'info_dict': {
+ 'id': '194487',
+ 'ext': 'mp4',
+ 'title': 'Kick-Ass 2',
+ 'description': 'md5:c189d5b7280400630a1d3dd17eaa8d8a',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }
+
+ @staticmethod
+ def _build_json_url(query):
+ return 'http://video.internetvideoarchive.net/player/6/configuration.ashx?' + query
+
+ def _real_extract(self, url):
+ query = parse_qs(url)
+ video_id = query['publishedid'][0]
+ data = self._download_json(
+ 'https://video.internetvideoarchive.net/videojs7/videojs7.ivasettings.ashx',
+ video_id, data=json.dumps({
+ 'customerid': query['customerid'][0],
+ 'publishedid': video_id,
+ }).encode())
+ title = data['Title']
+ formats = self._extract_m3u8_formats(
+ data['VideoUrl'], video_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False)
+ file_url = formats[0]['url']
+ if '.ism/' in file_url:
+ replace_url = lambda x: re.sub(r'\.ism/[^?]+', '.ism/' + x, file_url)
+ formats.extend(self._extract_f4m_formats(
+ replace_url('.f4m'), video_id, f4m_id='hds', fatal=False))
+ formats.extend(self._extract_mpd_formats(
+ replace_url('.mpd'), video_id, mpd_id='dash', fatal=False))
+ formats.extend(self._extract_ism_formats(
+ replace_url('Manifest'), video_id, ism_id='mss', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': data.get('PosterUrl'),
+ 'description': data.get('Description'),
+ }
diff --git a/yt_dlp/extractor/iprima.py b/yt_dlp/extractor/iprima.py
new file mode 100644
index 0000000..f7aa579
--- /dev/null
+++ b/yt_dlp/extractor/iprima.py
@@ -0,0 +1,280 @@
+import re
+import time
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ js_to_json,
+ urlencode_postdata,
+ ExtractorError,
+ parse_qs,
+ traverse_obj
+)
+
+
+class IPrimaIE(InfoExtractor):
+ _VALID_URL = r'https?://(?!cnn)(?:[^/]+)\.iprima\.cz/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _GEO_BYPASS = False
+ _NETRC_MACHINE = 'iprima'
+ _AUTH_ROOT = 'https://auth.iprima.cz'
+ access_token = None
+
+ _TESTS = [{
+ 'url': 'https://prima.iprima.cz/particka/92-epizoda',
+ 'info_dict': {
+ 'id': 'p51388',
+ 'ext': 'mp4',
+ 'title': 'Partička (92)',
+ 'description': 'md5:859d53beae4609e6dd7796413f1b6cac',
+ 'upload_date': '20201103',
+ 'timestamp': 1604437480,
+ },
+ 'params': {
+ 'skip_download': True, # m3u8 download
+ },
+ }, {
+ 'url': 'http://play.iprima.cz/particka/particka-92',
+ 'only_matching': True,
+ }, {
+ # geo restricted
+ 'url': 'http://play.iprima.cz/closer-nove-pripady/closer-nove-pripady-iv-1',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://prima.iprima.cz/my-little-pony/mapa-znameni-2-2',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://prima.iprima.cz/porady/jak-se-stavi-sen/rodina-rathousova-praha',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.iprima.cz/filmy/desne-rande',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://zoom.iprima.cz/10-nejvetsich-tajemstvi-zahad/posvatna-mista-a-stavby',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://krimi.iprima.cz/mraz-0/sebevrazdy',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://cool.iprima.cz/derava-silnice-nevadi',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://love.iprima.cz/laska-az-za-hrob/slib-dany-bratrovi',
+ 'only_matching': True,
+ }]
+
+ def _perform_login(self, username, password):
+ if self.access_token:
+ return
+
+ login_page = self._download_webpage(
+ f'{self._AUTH_ROOT}/oauth2/login', None, note='Downloading login page',
+ errnote='Downloading login page failed')
+
+ login_form = self._hidden_inputs(login_page)
+
+ login_form.update({
+ '_email': username,
+ '_password': password})
+
+ profile_select_html, login_handle = self._download_webpage_handle(
+ f'{self._AUTH_ROOT}/oauth2/login', None, data=urlencode_postdata(login_form),
+ note='Logging in')
+
+ # a profile may need to be selected first, even when there is only a single one
+ if '/profile-select' in login_handle.url:
+ profile_id = self._search_regex(
+ r'data-identifier\s*=\s*["\']?(\w+)', profile_select_html, 'profile id')
+
+ login_handle = self._request_webpage(
+ f'{self._AUTH_ROOT}/user/profile-select-perform/{profile_id}', None,
+ query={'continueUrl': '/user/login?redirect_uri=/user/'}, note='Selecting profile')
+
+ code = traverse_obj(login_handle.url, ({parse_qs}, 'code', 0))
+ if not code:
+ raise ExtractorError('Login failed', expected=True)
+
+ token_request_data = {
+ 'scope': 'openid+email+profile+phone+address+offline_access',
+ 'client_id': 'prima_sso',
+ 'grant_type': 'authorization_code',
+ 'code': code,
+ 'redirect_uri': f'{self._AUTH_ROOT}/sso/auth-check'}
+
+ token_data = self._download_json(
+ f'{self._AUTH_ROOT}/oauth2/token', None,
+ note='Downloading token', errnote='Downloading token failed',
+ data=urlencode_postdata(token_request_data))
+
+ self.access_token = token_data.get('access_token')
+ if self.access_token is None:
+ raise ExtractorError('Getting token failed', expected=True)
+
+ def _real_initialize(self):
+ if not self.access_token:
+ self.raise_login_required('Login is required to access any iPrima content', method='password')
+
+ def _raise_access_error(self, error_code):
+ if error_code == 'PLAY_GEOIP_DENIED':
+ self.raise_geo_restricted(countries=['CZ'], metadata_available=True)
+ elif error_code is not None:
+ self.raise_no_formats('Access to stream infos forbidden', expected=True)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_extract_title(webpage) or self._html_search_meta(
+ ['og:title', 'twitter:title'],
+ webpage, 'title', default=None)
+
+ video_id = self._search_regex((
+ r'productId\s*=\s*([\'"])(?P<id>p\d+)\1',
+ r'pproduct_id\s*=\s*([\'"])(?P<id>p\d+)\1',
+ ), webpage, 'real id', group='id', default=None)
+
+ if not video_id:
+ nuxt_data = self._search_nuxt_data(webpage, video_id, traverse='data', fatal=False)
+ video_id = traverse_obj(
+ nuxt_data, (..., 'content', 'additionals', 'videoPlayId', {str}), get_all=False)
+
+ if not video_id:
+ nuxt_data = self._search_json(
+ r'<script[^>]+\bid=["\']__NUXT_DATA__["\'][^>]*>',
+ webpage, 'nuxt data', None, end_pattern=r'</script>', contains_pattern=r'\[(?s:.+)\]')
+
+ video_id = traverse_obj(nuxt_data, lambda _, v: re.fullmatch(r'p\d+', v), get_all=False)
+
+ if not video_id:
+ self.raise_no_formats('Unable to extract video ID from webpage')
+
+ metadata = self._download_json(
+ f'https://api.play-backend.iprima.cz/api/v1//products/id-{video_id}/play',
+ video_id, note='Getting manifest URLs', errnote='Failed to get manifest URLs',
+ headers={'X-OTT-Access-Token': self.access_token},
+ expected_status=403)
+
+ self._raise_access_error(metadata.get('errorCode'))
+
+ stream_infos = metadata.get('streamInfos')
+ formats = []
+ if stream_infos is None:
+ self.raise_no_formats('Reading stream infos failed', expected=True)
+ else:
+ for manifest in stream_infos:
+ manifest_type = manifest.get('type')
+ manifest_url = manifest.get('url')
+ ext = determine_ext(manifest_url)
+ if manifest_type == 'HLS' or ext == 'm3u8':
+ formats += self._extract_m3u8_formats(
+ manifest_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False)
+ elif manifest_type == 'DASH' or ext == 'mpd':
+ formats += self._extract_mpd_formats(
+ manifest_url, video_id, mpd_id='dash', fatal=False)
+
+ final_result = self._search_json_ld(webpage, video_id, default={})
+ final_result.update({
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': self._html_search_meta(
+ ['thumbnail', 'og:image', 'twitter:image'],
+ webpage, 'thumbnail', default=None),
+ 'formats': formats,
+ 'description': self._html_search_meta(
+ ['description', 'og:description', 'twitter:description'],
+ webpage, 'description', default=None)})
+
+ return final_result
+
+
+class IPrimaCNNIE(InfoExtractor):
+ _VALID_URL = r'https?://cnn\.iprima\.cz/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _GEO_BYPASS = False
+
+ _TESTS = [{
+ 'url': 'https://cnn.iprima.cz/porady/strunc/24072020-koronaviru-mam-plne-zuby-strasit-druhou-vlnou-je-absurdni-rika-senatorka-dernerova',
+ 'info_dict': {
+ 'id': 'p716177',
+ 'ext': 'mp4',
+ 'title': 'md5:277c6b1ed0577e51b40ddd35602ff43e',
+ },
+ 'params': {
+ 'skip_download': 'm3u8'
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ self._set_cookie('play.iprima.cz', 'ott_adult_confirmed', '1')
+
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._og_search_title(
+ webpage, default=None) or self._search_regex(
+ r'<h1>([^<]+)', webpage, 'title')
+
+ video_id = self._search_regex(
+ (r'<iframe[^>]+\bsrc=["\'](?:https?:)?//(?:api\.play-backend\.iprima\.cz/prehravac/embedded|prima\.iprima\.cz/[^/]+/[^/]+)\?.*?\bid=(p\d+)',
+ r'data-product="([^"]+)">',
+ r'id=["\']player-(p\d+)"',
+ r'playerId\s*:\s*["\']player-(p\d+)',
+ r'\bvideos\s*=\s*["\'](p\d+)'),
+ webpage, 'real id')
+
+ playerpage = self._download_webpage(
+ 'http://play.iprima.cz/prehravac/init',
+ video_id, note='Downloading player', query={
+ '_infuse': 1,
+ '_ts': round(time.time()),
+ 'productId': video_id,
+ }, headers={'Referer': url})
+
+ formats = []
+
+ def extract_formats(format_url, format_key=None, lang=None):
+ ext = determine_ext(format_url)
+ new_formats = []
+ if format_key == 'hls' or ext == 'm3u8':
+ new_formats = self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False)
+ elif format_key == 'dash' or ext == 'mpd':
+ return
+ new_formats = self._extract_mpd_formats(
+ format_url, video_id, mpd_id='dash', fatal=False)
+ if lang:
+ for f in new_formats:
+ if not f.get('language'):
+ f['language'] = lang
+ formats.extend(new_formats)
+
+ options = self._parse_json(
+ self._search_regex(
+ r'(?s)(?:TDIPlayerOptions|playerOptions)\s*=\s*({.+?});\s*\]\]',
+ playerpage, 'player options', default='{}'),
+ video_id, transform_source=js_to_json, fatal=False)
+ if options:
+ for key, tracks in options.get('tracks', {}).items():
+ if not isinstance(tracks, list):
+ continue
+ for track in tracks:
+ src = track.get('src')
+ if src:
+ extract_formats(src, key.lower(), track.get('lang'))
+
+ if not formats:
+ for _, src in re.findall(r'src["\']\s*:\s*(["\'])(.+?)\1', playerpage):
+ extract_formats(src)
+
+ if not formats and '>GEO_IP_NOT_ALLOWED<' in playerpage:
+ self.raise_geo_restricted(countries=['CZ'], metadata_available=True)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': self._og_search_thumbnail(webpage, default=None),
+ 'formats': formats,
+ 'description': self._og_search_description(webpage, default=None),
+ }
diff --git a/yt_dlp/extractor/iqiyi.py b/yt_dlp/extractor/iqiyi.py
new file mode 100644
index 0000000..3368ab1
--- /dev/null
+++ b/yt_dlp/extractor/iqiyi.py
@@ -0,0 +1,766 @@
+import hashlib
+import itertools
+import re
+import time
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urllib_parse_urlencode,
+ compat_urllib_parse_unquote
+)
+from .openload import PhantomJSwrapper
+from ..utils import (
+ clean_html,
+ decode_packed_codes,
+ ExtractorError,
+ float_or_none,
+ format_field,
+ get_element_by_id,
+ get_element_by_attribute,
+ int_or_none,
+ js_to_json,
+ ohdave_rsa_encrypt,
+ parse_age_limit,
+ parse_duration,
+ parse_iso8601,
+ parse_resolution,
+ qualities,
+ remove_start,
+ str_or_none,
+ traverse_obj,
+ urljoin,
+)
+
+
+def md5_text(text):
+ return hashlib.md5(text.encode('utf-8')).hexdigest()
+
+
+class IqiyiSDK:
+ def __init__(self, target, ip, timestamp):
+ self.target = target
+ self.ip = ip
+ self.timestamp = timestamp
+
+ @staticmethod
+ def split_sum(data):
+ return compat_str(sum(map(lambda p: int(p, 16), list(data))))
+
+ @staticmethod
+ def digit_sum(num):
+ if isinstance(num, int):
+ num = compat_str(num)
+ return compat_str(sum(map(int, num)))
+
+ def even_odd(self):
+ even = self.digit_sum(compat_str(self.timestamp)[::2])
+ odd = self.digit_sum(compat_str(self.timestamp)[1::2])
+ return even, odd
+
+ def preprocess(self, chunksize):
+ self.target = md5_text(self.target)
+ chunks = []
+ for i in range(32 // chunksize):
+ chunks.append(self.target[chunksize * i:chunksize * (i + 1)])
+ if 32 % chunksize:
+ chunks.append(self.target[32 - 32 % chunksize:])
+ return chunks, list(map(int, self.ip.split('.')))
+
+ def mod(self, modulus):
+ chunks, ip = self.preprocess(32)
+ self.target = chunks[0] + ''.join(map(lambda p: compat_str(p % modulus), ip))
+
+ def split(self, chunksize):
+ modulus_map = {
+ 4: 256,
+ 5: 10,
+ 8: 100,
+ }
+
+ chunks, ip = self.preprocess(chunksize)
+ ret = ''
+ for i in range(len(chunks)):
+ ip_part = compat_str(ip[i] % modulus_map[chunksize]) if i < 4 else ''
+ if chunksize == 8:
+ ret += ip_part + chunks[i]
+ else:
+ ret += chunks[i] + ip_part
+ self.target = ret
+
+ def handle_input16(self):
+ self.target = md5_text(self.target)
+ self.target = self.split_sum(self.target[:16]) + self.target + self.split_sum(self.target[16:])
+
+ def handle_input8(self):
+ self.target = md5_text(self.target)
+ ret = ''
+ for i in range(4):
+ part = self.target[8 * i:8 * (i + 1)]
+ ret += self.split_sum(part) + part
+ self.target = ret
+
+ def handleSum(self):
+ self.target = md5_text(self.target)
+ self.target = self.split_sum(self.target) + self.target
+
+ def date(self, scheme):
+ self.target = md5_text(self.target)
+ d = time.localtime(self.timestamp)
+ strings = {
+ 'y': compat_str(d.tm_year),
+ 'm': '%02d' % d.tm_mon,
+ 'd': '%02d' % d.tm_mday,
+ }
+ self.target += ''.join(map(lambda c: strings[c], list(scheme)))
+
+ def split_time_even_odd(self):
+ even, odd = self.even_odd()
+ self.target = odd + md5_text(self.target) + even
+
+ def split_time_odd_even(self):
+ even, odd = self.even_odd()
+ self.target = even + md5_text(self.target) + odd
+
+ def split_ip_time_sum(self):
+ chunks, ip = self.preprocess(32)
+ self.target = compat_str(sum(ip)) + chunks[0] + self.digit_sum(self.timestamp)
+
+ def split_time_ip_sum(self):
+ chunks, ip = self.preprocess(32)
+ self.target = self.digit_sum(self.timestamp) + chunks[0] + compat_str(sum(ip))
+
+
+class IqiyiSDKInterpreter:
+ def __init__(self, sdk_code):
+ self.sdk_code = sdk_code
+
+ def run(self, target, ip, timestamp):
+ self.sdk_code = decode_packed_codes(self.sdk_code)
+
+ functions = re.findall(r'input=([a-zA-Z0-9]+)\(input', self.sdk_code)
+
+ sdk = IqiyiSDK(target, ip, timestamp)
+
+ other_functions = {
+ 'handleSum': sdk.handleSum,
+ 'handleInput8': sdk.handle_input8,
+ 'handleInput16': sdk.handle_input16,
+ 'splitTimeEvenOdd': sdk.split_time_even_odd,
+ 'splitTimeOddEven': sdk.split_time_odd_even,
+ 'splitIpTimeSum': sdk.split_ip_time_sum,
+ 'splitTimeIpSum': sdk.split_time_ip_sum,
+ }
+ for function in functions:
+ if re.match(r'mod\d+', function):
+ sdk.mod(int(function[3:]))
+ elif re.match(r'date[ymd]{3}', function):
+ sdk.date(function[4:])
+ elif re.match(r'split\d+', function):
+ sdk.split(int(function[5:]))
+ elif function in other_functions:
+ other_functions[function]()
+ else:
+ raise ExtractorError('Unknown function %s' % function)
+
+ return sdk.target
+
+
+class IqiyiIE(InfoExtractor):
+ IE_NAME = 'iqiyi'
+ IE_DESC = '爱奇艺'
+
+ _VALID_URL = r'https?://(?:(?:[^.]+\.)?iqiyi\.com|www\.pps\.tv)/.+\.html'
+
+ _NETRC_MACHINE = 'iqiyi'
+
+ _TESTS = [{
+ 'url': 'http://www.iqiyi.com/v_19rrojlavg.html',
+ # MD5 checksum differs on my machine and Travis CI
+ 'info_dict': {
+ 'id': '9c1fb1b99d192b21c559e5a1a2cb3c73',
+ 'ext': 'mp4',
+ 'title': '美国德州空中惊现奇异云团 酷似UFO',
+ }
+ }, {
+ 'url': 'http://www.iqiyi.com/v_19rrhnnclk.html',
+ 'md5': 'b7dc800a4004b1b57749d9abae0472da',
+ 'info_dict': {
+ 'id': 'e3f585b550a280af23c98b6cb2be19fb',
+ 'ext': 'mp4',
+ # This can be either Simplified Chinese or Traditional Chinese
+ 'title': r're:^(?:名侦探柯南 国语版:第752集 迫近灰原秘密的黑影 下篇|名偵探柯南 國語版:第752集 迫近灰原秘密的黑影 下篇)$',
+ },
+ 'skip': 'Geo-restricted to China',
+ }, {
+ 'url': 'http://www.iqiyi.com/w_19rt6o8t9p.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.iqiyi.com/a_19rrhbc6kt.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://yule.iqiyi.com/pcb.html',
+ 'info_dict': {
+ 'id': '4a0af228fddb55ec96398a364248ed7f',
+ 'ext': 'mp4',
+ 'title': '第2017-04-21期 女艺人频遭极端粉丝骚扰',
+ },
+ }, {
+ # VIP-only video. The first 2 parts (6 minutes) are available without login
+ # MD5 sums omitted as values are different on Travis CI and my machine
+ 'url': 'http://www.iqiyi.com/v_19rrny4w8w.html',
+ 'info_dict': {
+ 'id': 'f3cf468b39dddb30d676f89a91200dc1',
+ 'ext': 'mp4',
+ 'title': '泰坦尼克号',
+ },
+ 'skip': 'Geo-restricted to China',
+ }, {
+ 'url': 'http://www.iqiyi.com/a_19rrhb8ce1.html',
+ 'info_dict': {
+ 'id': '202918101',
+ 'title': '灌篮高手 国语版',
+ },
+ 'playlist_count': 101,
+ }, {
+ 'url': 'http://www.pps.tv/w_19rrbav0ph.html',
+ 'only_matching': True,
+ }]
+
+ _FORMATS_MAP = {
+ '96': 1, # 216p, 240p
+ '1': 2, # 336p, 360p
+ '2': 3, # 480p, 504p
+ '21': 4, # 504p
+ '4': 5, # 720p
+ '17': 5, # 720p
+ '5': 6, # 1072p, 1080p
+ '18': 7, # 1080p
+ }
+
+ @staticmethod
+ def _rsa_fun(data):
+ # public key extracted from http://static.iqiyi.com/js/qiyiV2/20160129180840/jobs/i18n/i18nIndex.js
+ N = 0xab86b6371b5318aaa1d3c9e612a9f1264f372323c8c0f19875b5fc3b3fd3afcc1e5bec527aa94bfa85bffc157e4245aebda05389a5357b75115ac94f074aefcd
+ e = 65537
+
+ return ohdave_rsa_encrypt(data, e, N)
+
+ def _perform_login(self, username, password):
+
+ data = self._download_json(
+ 'http://kylin.iqiyi.com/get_token', None,
+ note='Get token for logging', errnote='Unable to get token for logging')
+ sdk = data['sdk']
+ timestamp = int(time.time())
+ target = '/apis/reglogin/login.action?lang=zh_TW&area_code=null&email=%s&passwd=%s&agenttype=1&from=undefined&keeplogin=0&piccode=&fromurl=&_pos=1' % (
+ username, self._rsa_fun(password.encode('utf-8')))
+
+ interp = IqiyiSDKInterpreter(sdk)
+ sign = interp.run(target, data['ip'], timestamp)
+
+ validation_params = {
+ 'target': target,
+ 'server': 'BEA3AA1908656AABCCFF76582C4C6660',
+ 'token': data['token'],
+ 'bird_src': 'f8d91d57af224da7893dd397d52d811a',
+ 'sign': sign,
+ 'bird_t': timestamp,
+ }
+ validation_result = self._download_json(
+ 'http://kylin.iqiyi.com/validate?' + compat_urllib_parse_urlencode(validation_params), None,
+ note='Validate credentials', errnote='Unable to validate credentials')
+
+ MSG_MAP = {
+ 'P00107': 'please login via the web interface and enter the CAPTCHA code',
+ 'P00117': 'bad username or password',
+ }
+
+ code = validation_result['code']
+ if code != 'A00000':
+ msg = MSG_MAP.get(code)
+ if not msg:
+ msg = 'error %s' % code
+ if validation_result.get('msg'):
+ msg += ': ' + validation_result['msg']
+ self.report_warning('unable to log in: ' + msg)
+ return False
+
+ return True
+
+ def get_raw_data(self, tvid, video_id):
+ tm = int(time.time() * 1000)
+
+ key = 'd5fb4bd9d50c4be6948c97edd7254b0e'
+ sc = md5_text(compat_str(tm) + key + tvid)
+ params = {
+ 'tvid': tvid,
+ 'vid': video_id,
+ 'src': '76f90cbd92f94a2e925d83e8ccd22cb7',
+ 'sc': sc,
+ 't': tm,
+ }
+
+ return self._download_json(
+ 'http://cache.m.iqiyi.com/jp/tmts/%s/%s/' % (tvid, video_id),
+ video_id, transform_source=lambda s: remove_start(s, 'var tvInfoJs='),
+ query=params, headers=self.geo_verification_headers())
+
+ def _extract_playlist(self, webpage):
+ PAGE_SIZE = 50
+
+ links = re.findall(
+ r'<a[^>]+class="site-piclist_pic_link"[^>]+href="(http://www\.iqiyi\.com/.+\.html)"',
+ webpage)
+ if not links:
+ return
+
+ album_id = self._search_regex(
+ r'albumId\s*:\s*(\d+),', webpage, 'album ID')
+ album_title = self._search_regex(
+ r'data-share-title="([^"]+)"', webpage, 'album title', fatal=False)
+
+ entries = list(map(self.url_result, links))
+
+ # Start from 2 because links in the first page are already on webpage
+ for page_num in itertools.count(2):
+ pagelist_page = self._download_webpage(
+ 'http://cache.video.qiyi.com/jp/avlist/%s/%d/%d/' % (album_id, page_num, PAGE_SIZE),
+ album_id,
+ note='Download playlist page %d' % page_num,
+ errnote='Failed to download playlist page %d' % page_num)
+ pagelist = self._parse_json(
+ remove_start(pagelist_page, 'var tvInfoJs='), album_id)
+ vlist = pagelist['data']['vlist']
+ for item in vlist:
+ entries.append(self.url_result(item['vurl']))
+ if len(vlist) < PAGE_SIZE:
+ break
+
+ return self.playlist_result(entries, album_id, album_title)
+
+ def _real_extract(self, url):
+ webpage = self._download_webpage(
+ url, 'temp_id', note='download video page')
+
+ # There's no simple way to determine whether an URL is a playlist or not
+ # Sometimes there are playlist links in individual videos, so treat it
+ # as a single video first
+ tvid = self._search_regex(
+ r'data-(?:player|shareplattrigger)-tvid\s*=\s*[\'"](\d+)', webpage, 'tvid', default=None)
+ if tvid is None:
+ playlist_result = self._extract_playlist(webpage)
+ if playlist_result:
+ return playlist_result
+ raise ExtractorError('Can\'t find any video')
+
+ video_id = self._search_regex(
+ r'data-(?:player|shareplattrigger)-videoid\s*=\s*[\'"]([a-f\d]+)', webpage, 'video_id')
+
+ formats = []
+ for _ in range(5):
+ raw_data = self.get_raw_data(tvid, video_id)
+
+ if raw_data['code'] != 'A00000':
+ if raw_data['code'] == 'A00111':
+ self.raise_geo_restricted()
+ raise ExtractorError('Unable to load data. Error code: ' + raw_data['code'])
+
+ data = raw_data['data']
+
+ for stream in data['vidl']:
+ if 'm3utx' not in stream:
+ continue
+ vd = compat_str(stream['vd'])
+ formats.append({
+ 'url': stream['m3utx'],
+ 'format_id': vd,
+ 'ext': 'mp4',
+ 'quality': self._FORMATS_MAP.get(vd, -1),
+ 'protocol': 'm3u8_native',
+ })
+
+ if formats:
+ break
+
+ self._sleep(5, video_id)
+
+ title = (get_element_by_id('widget-videotitle', webpage)
+ or clean_html(get_element_by_attribute('class', 'mod-play-tit', webpage))
+ or self._html_search_regex(r'<span[^>]+data-videochanged-title="word"[^>]*>([^<]+)</span>', webpage, 'title'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ }
+
+
+class IqIE(InfoExtractor):
+ IE_NAME = 'iq.com'
+ IE_DESC = 'International version of iQiyi'
+ _VALID_URL = r'https?://(?:www\.)?iq\.com/play/(?:[\w%-]*-)?(?P<id>\w+)'
+ _TESTS = [{
+ 'url': 'https://www.iq.com/play/one-piece-episode-1000-1ma1i6ferf4',
+ 'md5': '2d7caf6eeca8a32b407094b33b757d39',
+ 'info_dict': {
+ 'ext': 'mp4',
+ 'id': '1ma1i6ferf4',
+ 'title': '航海王 第1000集',
+ 'description': 'Subtitle available on Sunday 4PM(GMT+8).',
+ 'duration': 1430,
+ 'timestamp': 1637488203,
+ 'upload_date': '20211121',
+ 'episode_number': 1000,
+ 'episode': 'Episode 1000',
+ 'series': 'One Piece',
+ 'age_limit': 13,
+ 'average_rating': float,
+ },
+ 'params': {
+ 'format': '500',
+ },
+ 'expected_warnings': ['format is restricted']
+ }, {
+ # VIP-restricted video
+ 'url': 'https://www.iq.com/play/mermaid-in-the-fog-2021-gbdpx13bs4',
+ 'only_matching': True
+ }]
+ _BID_TAGS = {
+ '100': '240P',
+ '200': '360P',
+ '300': '480P',
+ '500': '720P',
+ '600': '1080P',
+ '610': '1080P50',
+ '700': '2K',
+ '800': '4K',
+ }
+ _LID_TAGS = {
+ '1': 'zh_CN',
+ '2': 'zh_TW',
+ '3': 'en',
+ '4': 'ko',
+ '5': 'ja',
+ '18': 'th',
+ '21': 'my',
+ '23': 'vi',
+ '24': 'id',
+ '26': 'es',
+ '27': 'pt',
+ '28': 'ar',
+ }
+
+ _DASH_JS = '''
+ console.log(page.evaluate(function() {
+ var tvid = "%(tvid)s"; var vid = "%(vid)s"; var src = "%(src)s";
+ var uid = "%(uid)s"; var dfp = "%(dfp)s"; var mode = "%(mode)s"; var lang = "%(lang)s";
+ var bid_list = %(bid_list)s; var ut_list = %(ut_list)s; var tm = new Date().getTime();
+ var cmd5x_func = %(cmd5x_func)s; var cmd5x_exporter = {}; cmd5x_func({}, cmd5x_exporter, {}); var cmd5x = cmd5x_exporter.cmd5x;
+ var authKey = cmd5x(cmd5x('') + tm + '' + tvid);
+ var k_uid = Array.apply(null, Array(32)).map(function() {return Math.floor(Math.random() * 15).toString(16)}).join('');
+ var dash_paths = {};
+ bid_list.forEach(function(bid) {
+ var query = {
+ 'tvid': tvid,
+ 'bid': bid,
+ 'ds': 1,
+ 'vid': vid,
+ 'src': src,
+ 'vt': 0,
+ 'rs': 1,
+ 'uid': uid,
+ 'ori': 'pcw',
+ 'ps': 1,
+ 'k_uid': k_uid,
+ 'pt': 0,
+ 'd': 0,
+ 's': '',
+ 'lid': '',
+ 'slid': 0,
+ 'cf': '',
+ 'ct': '',
+ 'authKey': authKey,
+ 'k_tag': 1,
+ 'ost': 0,
+ 'ppt': 0,
+ 'dfp': dfp,
+ 'prio': JSON.stringify({
+ 'ff': 'f4v',
+ 'code': 2
+ }),
+ 'k_err_retries': 0,
+ 'up': '',
+ 'su': 2,
+ 'applang': lang,
+ 'sver': 2,
+ 'X-USER-MODE': mode,
+ 'qd_v': 2,
+ 'tm': tm,
+ 'qdy': 'a',
+ 'qds': 0,
+ 'k_ft1': '143486267424900',
+ 'k_ft4': '1572868',
+ 'k_ft7': '4',
+ 'k_ft5': '1',
+ 'bop': JSON.stringify({
+ 'version': '10.0',
+ 'dfp': dfp
+ }),
+ };
+ var enc_params = [];
+ for (var prop in query) {
+ enc_params.push(encodeURIComponent(prop) + '=' + encodeURIComponent(query[prop]));
+ }
+ ut_list.forEach(function(ut) {
+ enc_params.push('ut=' + ut);
+ })
+ var dash_path = '/dash?' + enc_params.join('&'); dash_path += '&vf=' + cmd5x(dash_path);
+ dash_paths[bid] = dash_path;
+ });
+ return JSON.stringify(dash_paths);
+ }));
+ saveAndExit();
+ '''
+
+ def _extract_vms_player_js(self, webpage, video_id):
+ player_js_cache = self.cache.load('iq', 'player_js')
+ if player_js_cache:
+ return player_js_cache
+ webpack_js_url = self._proto_relative_url(self._search_regex(
+ r'<script src="((?:https?:)?//stc\.iqiyipic\.com/_next/static/chunks/webpack-\w+\.js)"', webpage, 'webpack URL'))
+ webpack_js = self._download_webpage(webpack_js_url, video_id, note='Downloading webpack JS', errnote='Unable to download webpack JS')
+
+ webpack_map = self._search_json(
+ r'["\']\s*\+\s*', webpack_js, 'JS locations', video_id,
+ contains_pattern=r'{\s*(?:\d+\s*:\s*["\'][\da-f]+["\']\s*,?\s*)+}',
+ end_pattern=r'\[\w+\]\+["\']\.js', transform_source=js_to_json)
+
+ replacement_map = self._search_json(
+ r'["\']\s*\+\(\s*', webpack_js, 'replacement map', video_id,
+ contains_pattern=r'{\s*(?:\d+\s*:\s*["\'][\w.-]+["\']\s*,?\s*)+}',
+ end_pattern=r'\[\w+\]\|\|\w+\)\+["\']\.', transform_source=js_to_json,
+ fatal=False) or {}
+
+ for module_index in reversed(webpack_map):
+ real_module = replacement_map.get(module_index) or module_index
+ module_js = self._download_webpage(
+ f'https://stc.iqiyipic.com/_next/static/chunks/{real_module}.{webpack_map[module_index]}.js',
+ video_id, note=f'Downloading #{module_index} module JS', errnote='Unable to download module JS', fatal=False) or ''
+ if 'vms request' in module_js:
+ self.cache.store('iq', 'player_js', module_js)
+ return module_js
+ raise ExtractorError('Unable to extract player JS')
+
+ def _extract_cmd5x_function(self, webpage, video_id):
+ return self._search_regex(r',\s*(function\s*\([^\)]*\)\s*{\s*var _qda.+_qdc\(\)\s*})\s*,',
+ self._extract_vms_player_js(webpage, video_id), 'signature function')
+
+ def _update_bid_tags(self, webpage, video_id):
+ extracted_bid_tags = self._search_json(
+ r'function\s*\([^)]*\)\s*\{\s*"use strict";?\s*var \w\s*=\s*',
+ self._extract_vms_player_js(webpage, video_id), 'video tags', video_id,
+ contains_pattern=r'{\s*\d+\s*:\s*\{\s*nbid\s*:.+}\s*}',
+ end_pattern=r'\s*,\s*\w\s*=\s*\{\s*getNewVd', fatal=False, transform_source=js_to_json)
+ if not extracted_bid_tags:
+ return
+ self._BID_TAGS = {
+ bid: traverse_obj(extracted_bid_tags, (bid, 'value'), expected_type=str, default=self._BID_TAGS.get(bid))
+ for bid in extracted_bid_tags.keys()
+ }
+
+ def _get_cookie(self, name, default=None):
+ cookie = self._get_cookies('https://iq.com/').get(name)
+ return cookie.value if cookie else default
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ self._update_bid_tags(webpage, video_id)
+
+ next_props = self._search_nextjs_data(webpage, video_id)['props']
+ page_data = next_props['initialState']['play']
+ video_info = page_data['curVideoInfo']
+
+ uid = traverse_obj(
+ self._parse_json(
+ self._get_cookie('I00002', '{}'), video_id, transform_source=compat_urllib_parse_unquote, fatal=False),
+ ('data', 'uid'), default=0)
+
+ if uid:
+ vip_data = self._download_json(
+ 'https://pcw-api.iq.com/api/vtype', video_id, note='Downloading VIP data', errnote='Unable to download VIP data', query={
+ 'batch': 1,
+ 'platformId': 3,
+ 'modeCode': self._get_cookie('mod', 'intl'),
+ 'langCode': self._get_cookie('lang', 'en_us'),
+ 'deviceId': self._get_cookie('QC005', '')
+ }, fatal=False)
+ ut_list = traverse_obj(vip_data, ('data', 'all_vip', ..., 'vipType'), expected_type=str_or_none)
+ else:
+ ut_list = ['0']
+
+ # bid 0 as an initial format checker
+ dash_paths = self._parse_json(PhantomJSwrapper(self, timeout=120_000).get(
+ url, note2='Executing signature code (this may take a couple minutes)',
+ html='<!DOCTYPE html>', video_id=video_id, jscode=self._DASH_JS % {
+ 'tvid': video_info['tvId'],
+ 'vid': video_info['vid'],
+ 'src': traverse_obj(next_props, ('initialProps', 'pageProps', 'ptid'),
+ expected_type=str, default='04022001010011000000'),
+ 'uid': uid,
+ 'dfp': self._get_cookie('dfp', ''),
+ 'mode': self._get_cookie('mod', 'intl'),
+ 'lang': self._get_cookie('lang', 'en_us'),
+ 'bid_list': '[' + ','.join(['0', *self._BID_TAGS.keys()]) + ']',
+ 'ut_list': '[' + ','.join(ut_list) + ']',
+ 'cmd5x_func': self._extract_cmd5x_function(webpage, video_id),
+ })[1].strip(), video_id)
+
+ formats, subtitles = [], {}
+ initial_format_data = self._download_json(
+ urljoin('https://cache-video.iq.com', dash_paths['0']), video_id,
+ note='Downloading initial video format info', errnote='Unable to download initial video format info')['data']
+
+ preview_time = traverse_obj(
+ initial_format_data, ('boss_ts', (None, 'data'), ('previewTime', 'rtime')), expected_type=float_or_none, get_all=False)
+ if traverse_obj(initial_format_data, ('boss_ts', 'data', 'prv'), expected_type=int_or_none):
+ self.report_warning('This preview video is limited%s' % format_field(preview_time, None, ' to %s seconds'))
+
+ # TODO: Extract audio-only formats
+ for bid in set(traverse_obj(initial_format_data, ('program', 'video', ..., 'bid'), expected_type=str_or_none)):
+ dash_path = dash_paths.get(bid)
+ if not dash_path:
+ self.report_warning(f'Unknown format id: {bid}. It is currently not being extracted')
+ continue
+ format_data = traverse_obj(self._download_json(
+ urljoin('https://cache-video.iq.com', dash_path), video_id,
+ note=f'Downloading format data for {self._BID_TAGS[bid]}', errnote='Unable to download format data',
+ fatal=False), 'data', expected_type=dict)
+
+ video_format = traverse_obj(format_data, ('program', 'video', lambda _, v: str(v['bid']) == bid),
+ expected_type=dict, get_all=False) or {}
+ extracted_formats = []
+ if video_format.get('m3u8Url'):
+ extracted_formats.extend(self._extract_m3u8_formats(
+ urljoin(format_data.get('dm3u8', 'https://cache-m.iq.com/dc/dt/'), video_format['m3u8Url']),
+ 'mp4', m3u8_id=bid, fatal=False))
+ if video_format.get('mpdUrl'):
+ # TODO: Properly extract mpd hostname
+ extracted_formats.extend(self._extract_mpd_formats(
+ urljoin(format_data.get('dm3u8', 'https://cache-m.iq.com/dc/dt/'), video_format['mpdUrl']),
+ mpd_id=bid, fatal=False))
+ if video_format.get('m3u8'):
+ ff = video_format.get('ff', 'ts')
+ if ff == 'ts':
+ m3u8_formats, _ = self._parse_m3u8_formats_and_subtitles(
+ video_format['m3u8'], ext='mp4', m3u8_id=bid, fatal=False)
+ extracted_formats.extend(m3u8_formats)
+ elif ff == 'm4s':
+ mpd_data = traverse_obj(
+ self._parse_json(video_format['m3u8'], video_id, fatal=False), ('payload', ..., 'data'), expected_type=str)
+ if not mpd_data:
+ continue
+ mpd_formats, _ = self._parse_mpd_formats_and_subtitles(
+ mpd_data, bid, format_data.get('dm3u8', 'https://cache-m.iq.com/dc/dt/'))
+ extracted_formats.extend(mpd_formats)
+ else:
+ self.report_warning(f'{ff} formats are currently not supported')
+
+ if not extracted_formats:
+ if video_format.get('s'):
+ self.report_warning(f'{self._BID_TAGS[bid]} format is restricted')
+ else:
+ self.report_warning(f'Unable to extract {self._BID_TAGS[bid]} format')
+ for f in extracted_formats:
+ f.update({
+ 'quality': qualities(list(self._BID_TAGS.keys()))(bid),
+ 'format_note': self._BID_TAGS[bid],
+ **parse_resolution(video_format.get('scrsz'))
+ })
+ formats.extend(extracted_formats)
+
+ for sub_format in traverse_obj(initial_format_data, ('program', 'stl', ...), expected_type=dict):
+ lang = self._LID_TAGS.get(str_or_none(sub_format.get('lid')), sub_format.get('_name'))
+ subtitles.setdefault(lang, []).extend([{
+ 'ext': format_ext,
+ 'url': urljoin(initial_format_data.get('dstl', 'http://meta.video.iqiyi.com'), sub_format[format_key])
+ } for format_key, format_ext in [('srt', 'srt'), ('webvtt', 'vtt')] if sub_format.get(format_key)])
+
+ extra_metadata = page_data.get('albumInfo') if video_info.get('albumId') and page_data.get('albumInfo') else video_info
+ return {
+ 'id': video_id,
+ 'title': video_info['name'],
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'description': video_info.get('mergeDesc'),
+ 'duration': parse_duration(video_info.get('len')),
+ 'age_limit': parse_age_limit(video_info.get('rating')),
+ 'average_rating': traverse_obj(page_data, ('playScoreInfo', 'score'), expected_type=float_or_none),
+ 'timestamp': parse_iso8601(video_info.get('isoUploadDate')),
+ 'categories': traverse_obj(extra_metadata, ('videoTagMap', ..., ..., 'name'), expected_type=str),
+ 'cast': traverse_obj(extra_metadata, ('actorArr', ..., 'name'), expected_type=str),
+ 'episode_number': int_or_none(video_info.get('order')) or None,
+ 'series': video_info.get('albumName'),
+ }
+
+
+class IqAlbumIE(InfoExtractor):
+ IE_NAME = 'iq.com:album'
+ _VALID_URL = r'https?://(?:www\.)?iq\.com/album/(?:[\w%-]*-)?(?P<id>\w+)'
+ _TESTS = [{
+ 'url': 'https://www.iq.com/album/one-piece-1999-1bk9icvr331',
+ 'info_dict': {
+ 'id': '1bk9icvr331',
+ 'title': 'One Piece',
+ 'description': 'Subtitle available on Sunday 4PM(GMT+8).'
+ },
+ 'playlist_mincount': 238
+ }, {
+ # Movie/single video
+ 'url': 'https://www.iq.com/album/九龙城寨-2021-22yjnij099k',
+ 'info_dict': {
+ 'ext': 'mp4',
+ 'id': '22yjnij099k',
+ 'title': '九龙城寨',
+ 'description': 'md5:8a09f50b8ba0db4dc69bc7c844228044',
+ 'duration': 5000,
+ 'timestamp': 1641911371,
+ 'upload_date': '20220111',
+ 'series': '九龙城寨',
+ 'cast': ['Shi Yan Neng', 'Yu Lang', 'Peter lv', 'Sun Zi Jun', 'Yang Xiao Bo'],
+ 'age_limit': 13,
+ 'average_rating': float,
+ },
+ 'expected_warnings': ['format is restricted']
+ }]
+
+ def _entries(self, album_id_num, page_ranges, album_id=None, mode_code='intl', lang_code='en_us'):
+ for page_range in page_ranges:
+ page = self._download_json(
+ f'https://pcw-api.iq.com/api/episodeListSource/{album_id_num}', album_id,
+ note=f'Downloading video list episodes {page_range.get("msg", "")}',
+ errnote='Unable to download video list', query={
+ 'platformId': 3,
+ 'modeCode': mode_code,
+ 'langCode': lang_code,
+ 'endOrder': page_range['to'],
+ 'startOrder': page_range['from']
+ })
+ for video in page['data']['epg']:
+ yield self.url_result('https://www.iq.com/play/%s' % (video.get('playLocSuffix') or video['qipuIdStr']),
+ IqIE.ie_key(), video.get('qipuIdStr'), video.get('name'))
+
+ def _real_extract(self, url):
+ album_id = self._match_id(url)
+ webpage = self._download_webpage(url, album_id)
+ next_data = self._search_nextjs_data(webpage, album_id)
+ album_data = next_data['props']['initialState']['album']['videoAlbumInfo']
+
+ if album_data.get('videoType') == 'singleVideo':
+ return self.url_result('https://www.iq.com/play/%s' % album_id, IqIE.ie_key())
+ return self.playlist_result(
+ self._entries(album_data['albumId'], album_data['totalPageRange'], album_id,
+ traverse_obj(next_data, ('props', 'initialProps', 'pageProps', 'modeCode')),
+ traverse_obj(next_data, ('props', 'initialProps', 'pageProps', 'langCode'))),
+ album_id, album_data.get('name'), album_data.get('desc'))
diff --git a/yt_dlp/extractor/islamchannel.py b/yt_dlp/extractor/islamchannel.py
new file mode 100644
index 0000000..253a846
--- /dev/null
+++ b/yt_dlp/extractor/islamchannel.py
@@ -0,0 +1,81 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import traverse_obj, urljoin
+
+
+class IslamChannelIE(InfoExtractor):
+ _VALID_URL = r'https?://watch\.islamchannel\.tv/watch/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://watch.islamchannel.tv/watch/38604310',
+ 'info_dict': {
+ 'id': '38604310',
+ 'title': 'Omar - Young Omar',
+ 'description': 'md5:5cc7ddecef064ea7afe52eb5e0e33b55',
+ 'thumbnail': r're:https?://.+',
+ 'ext': 'mp4',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ thumbnail = self._search_regex(
+ r'data-poster="([^"]+)"', webpage, 'data poster', fatal=False) or \
+ self._html_search_meta(('og:image', 'twitter:image'), webpage)
+
+ headers = {
+ 'Token': self._search_regex(r'data-token="([^"]+)"', webpage, 'data token'),
+ 'Token-Expiry': self._search_regex(r'data-expiry="([^"]+)"', webpage, 'data expiry'),
+ 'Uvid': video_id,
+ }
+ show_stream = self._download_json(
+ f'https://v2-streams-elb.simplestreamcdn.com/api/show/stream/{video_id}', video_id,
+ query={
+ 'key': self._search_regex(r'data-key="([^"]+)"', webpage, 'data key'),
+ 'platform': 'chrome',
+ }, headers=headers)
+ # TODO: show_stream['stream'] and show_stream['drm'] may contain something interesting
+ streams = self._download_json(
+ traverse_obj(show_stream, ('response', 'tokenization', 'url')), video_id,
+ headers=headers)
+ formats, subs = self._extract_m3u8_formats_and_subtitles(traverse_obj(streams, ('Streams', 'Adaptive')), video_id, 'mp4')
+
+ return {
+ 'id': video_id,
+ 'title': self._html_search_meta(('og:title', 'twitter:title'), webpage),
+ 'description': self._html_search_meta(('og:description', 'twitter:description', 'description'), webpage),
+ 'formats': formats,
+ 'subtitles': subs,
+ 'thumbnails': [{
+ 'id': 'unscaled',
+ 'url': thumbnail.split('?')[0],
+ 'ext': 'jpg',
+ 'preference': 2,
+ }, {
+ 'id': 'orig',
+ 'url': thumbnail,
+ 'ext': 'jpg',
+ 'preference': 1,
+ }] if thumbnail else None,
+ }
+
+
+class IslamChannelSeriesIE(InfoExtractor):
+ _VALID_URL = r'https?://watch\.islamchannel\.tv/series/(?P<id>[a-f\d-]+)'
+ _TESTS = [{
+ 'url': 'https://watch.islamchannel.tv/series/a6cccef3-3ef1-11eb-bc19-06b69c2357cd',
+ 'info_dict': {
+ 'id': 'a6cccef3-3ef1-11eb-bc19-06b69c2357cd',
+ },
+ 'playlist_mincount': 31,
+ }]
+
+ def _real_extract(self, url):
+ pl_id = self._match_id(url)
+ webpage = self._download_webpage(url, pl_id)
+
+ return self.playlist_from_matches(
+ re.finditer(r'<a\s+href="(/watch/\d+)"[^>]+?data-video-type="show">', webpage),
+ pl_id, getter=lambda x: urljoin(url, x.group(1)), ie=IslamChannelIE)
diff --git a/yt_dlp/extractor/israelnationalnews.py b/yt_dlp/extractor/israelnationalnews.py
new file mode 100644
index 0000000..35040f5
--- /dev/null
+++ b/yt_dlp/extractor/israelnationalnews.py
@@ -0,0 +1,50 @@
+from .common import InfoExtractor
+from ..utils import ExtractorError, traverse_obj
+
+
+class IsraelNationalNewsIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?israelnationalnews\.com/news/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.israelnationalnews.com/news/354520',
+ 'info_dict': {
+ 'id': '354520'
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': 'jA84wQhVvg8',
+ 'title': 'Even CNN Host Is Shocked by How Bad Biden\'s Approval Ratings Have Gotten | DM CLIPS | Rubin Report',
+ 'ext': 'mp4',
+ 'description': 'md5:b7325a3d00c7596337dc3ae37e32d35c',
+ 'channel': 'The Rubin Report',
+ 'channel_follower_count': int,
+ 'comment_count': int,
+ 'categories': ['News & Politics'],
+ 'like_count': int,
+ 'uploader_url': 'http://www.youtube.com/user/RubinReport',
+ 'uploader_id': 'RubinReport',
+ 'availability': 'public',
+ 'view_count': int,
+ 'duration': 240,
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/jA84wQhVvg8/maxresdefault.webp',
+ 'live_status': 'not_live',
+ 'playable_in_embed': True,
+ 'age_limit': 0,
+ 'tags': 'count:29',
+ 'channel_id': 'UCJdKr0Bgd_5saZYqLCa9mng',
+ 'channel_url': 'https://www.youtube.com/channel/UCJdKr0Bgd_5saZYqLCa9mng',
+ 'upload_date': '20220606',
+ 'uploader': 'The Rubin Report',
+ }
+ }]
+ }]
+
+ def _real_extract(self, url):
+ news_article_id = self._match_id(url)
+ article_json = self._download_json(
+ f'https://www.israelnationalnews.com/Generic/NewAPI/Item?type=0&Item={news_article_id}', news_article_id)
+
+ urls = traverse_obj(article_json, ('Content2', ..., 'content', ..., 'attrs', 'src'))
+ if not urls:
+ raise ExtractorError('This article does not have any videos', expected=True)
+
+ return self.playlist_from_matches(urls, news_article_id, ie='Youtube')
diff --git a/yt_dlp/extractor/itprotv.py b/yt_dlp/extractor/itprotv.py
new file mode 100644
index 0000000..713fd4e
--- /dev/null
+++ b/yt_dlp/extractor/itprotv.py
@@ -0,0 +1,139 @@
+import re
+
+from .common import InfoExtractor
+
+from ..utils import (
+ int_or_none,
+ str_or_none,
+ traverse_obj,
+ urljoin
+)
+
+
+class ITProTVBaseIE(InfoExtractor):
+ _ENDPOINTS = {
+ 'course': 'course?url={}&brand=00002560-0000-3fa9-0000-1d61000035f3',
+ 'episode': 'brand/00002560-0000-3fa9-0000-1d61000035f3/episode?url={}'
+ }
+
+ def _call_api(self, ep, item_id, webpage):
+ return self._download_json(
+ f'https://api.itpro.tv/api/urza/v3/consumer-web/{self._ENDPOINTS[ep].format(item_id)}',
+ item_id, note=f'Fetching {ep} data API',
+ headers={'Authorization': f'Bearer {self._fetch_jwt(webpage)}'})[ep]
+
+ def _fetch_jwt(self, webpage):
+ return self._search_regex(r'{"passedToken":"([\w-]+\.[\w-]+\.[\w-]+)",', webpage, 'jwt')
+
+ def _check_if_logged_in(self, webpage):
+ if re.match(r'{\s*member\s*:\s*null', webpage):
+ self.raise_login_required()
+
+
+class ITProTVIE(ITProTVBaseIE):
+ _VALID_URL = r'https?://app\.itpro\.tv/course/(?P<course>[\w-]+)/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://app.itpro.tv/course/guided-tour/introductionitprotv',
+ 'md5': 'bca4a28c2667fd1a63052e71a94bb88c',
+ 'info_dict': {
+ 'id': 'introductionitprotv',
+ 'ext': 'mp4',
+ 'title': 'An Introduction to ITProTV 101',
+ 'thumbnail': 'https://itprotv-image-bucket.s3.amazonaws.com/getting-started/itprotv-101-introduction-PGM.11_39_56_02.Still001.png',
+ 'description': 'md5:b175c2c3061ce35a4dd33865b2c1da4e',
+ 'duration': 269,
+ 'series': 'ITProTV 101',
+ 'series_id': 'guided-tour',
+ 'availability': 'needs_auth',
+ 'chapter': 'ITProTV 101',
+ 'chapter_number': 1,
+ 'chapter_id': '5dbb3de426b46c0010b5d1b6'
+ },
+ },
+ {
+ 'url': 'https://app.itpro.tv/course/beyond-tech/job-interview-tips',
+ 'md5': '101a299b98c47ccf4c67f9f0951defa8',
+ 'info_dict': {
+ 'id': 'job-interview-tips',
+ 'ext': 'mp4',
+ 'title': 'Job Interview Tips',
+ 'thumbnail': 'https://s3.amazonaws.com:443/production-itprotv-thumbnails/2f370bf5-294d-4bbe-ab80-c0b5781630ea.png',
+ 'description': 'md5:30d8ba483febdf89ec85623aad3c3cb6',
+ 'duration': 267,
+ 'series': 'Beyond Tech',
+ 'series_id': 'beyond-tech',
+ 'availability': 'needs_auth',
+ 'chapter': 'Job Development',
+ 'chapter_number': 2,
+ 'chapter_id': '5f7c78d424330c000edf04d9'
+ },
+ }]
+
+ def _real_extract(self, url):
+ episode_id, course_name = self._match_valid_url(url).group('id', 'course')
+ webpage = self._download_webpage(url, episode_id)
+ self._check_if_logged_in(webpage)
+ course = self._call_api('course', course_name, webpage)
+ episode = self._call_api('episode', episode_id, webpage)
+
+ chapter_number, chapter = next((
+ (i, topic) for i, topic in enumerate(course.get('topics') or [], 1)
+ if traverse_obj(topic, 'id') == episode.get('topic')), {})
+
+ return {
+ 'id': episode_id,
+ 'title': episode.get('title'),
+ 'description': episode.get('description'),
+ 'thumbnail': episode.get('thumbnail'),
+ 'formats': [
+ {'url': episode[f'jwVideo{h}Embed'], 'height': h}
+ for h in (320, 480, 720, 1080) if episode.get(f'jwVideo{h}Embed')
+ ],
+ 'duration': int_or_none(episode.get('length')),
+ 'series': course.get('name'),
+ 'series_id': course.get('url'),
+ 'chapter': str_or_none(chapter.get('title')),
+ 'chapter_number': chapter_number,
+ 'chapter_id': str_or_none(chapter.get('id')),
+ 'subtitles': {
+ 'en': [{'ext': 'vtt', 'data': episode['enCaptionData']}]
+ } if episode.get('enCaptionData') else None,
+ }
+
+
+class ITProTVCourseIE(ITProTVBaseIE):
+ _VALID_URL = r'https?://app\.itpro\.tv/course/(?P<id>[\w-]+)/?(?:$|[#?])'
+ _TESTS = [
+ {
+ 'url': 'https://app.itpro.tv/course/guided-tour',
+ 'info_dict': {
+ 'id': 'guided-tour',
+ 'description': 'md5:b175c2c3061ce35a4dd33865b2c1da4e',
+ 'title': 'ITProTV 101',
+ },
+ 'playlist_count': 6
+ },
+ {
+ 'url': 'https://app.itpro.tv/course/beyond-tech',
+ 'info_dict': {
+ 'id': 'beyond-tech',
+ 'description': 'md5:44cd99855e7f81a15ce1269bd0621fed',
+ 'title': 'Beyond Tech'
+ },
+ 'playlist_count': 15
+ },
+ ]
+
+ def _real_extract(self, url):
+ course_id = self._match_id(url)
+ webpage = self._download_webpage(url, course_id)
+ self._check_if_logged_in(webpage)
+ course = self._call_api('course', course_id, webpage)
+
+ entries = [self.url_result(
+ urljoin(url, f'{course_id}/{episode["url"]}'), ITProTVIE,
+ episode['url'], episode.get('title'), url_transparent=True)
+ for episode in course['episodes']]
+
+ return self.playlist_result(
+ entries, course_id, course.get('name'), course.get('description'))
diff --git a/yt_dlp/extractor/itv.py b/yt_dlp/extractor/itv.py
new file mode 100644
index 0000000..9ac7be3
--- /dev/null
+++ b/yt_dlp/extractor/itv.py
@@ -0,0 +1,266 @@
+import json
+
+from .common import InfoExtractor
+from .brightcove import BrightcoveNewIE
+
+from ..compat import compat_str
+from ..utils import (
+ base_url,
+ clean_html,
+ determine_ext,
+ extract_attributes,
+ ExtractorError,
+ get_element_by_class,
+ JSON_LD_RE,
+ merge_dicts,
+ parse_duration,
+ smuggle_url,
+ try_get,
+ url_or_none,
+ url_basename,
+ urljoin,
+)
+
+
+class ITVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P<id>[0-9a-zA-Z]+)'
+ _GEO_COUNTRIES = ['GB']
+ _TESTS = [{
+ 'url': 'https://www.itv.com/hub/plebs/2a1873a0002',
+ 'info_dict': {
+ 'id': '2a1873a0002',
+ 'ext': 'mp4',
+ 'title': 'Plebs - The Orgy',
+ 'description': 'md5:4d7159af53ebd5b36e8b3ec82a41fdb4',
+ 'series': 'Plebs',
+ 'season_number': 1,
+ 'episode_number': 1,
+ 'thumbnail': r're:https?://hubimages\.itv\.com/episode/2_1873_0002'
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.itv.com/hub/the-jonathan-ross-show/2a1166a0209',
+ 'info_dict': {
+ 'id': '2a1166a0209',
+ 'ext': 'mp4',
+ 'title': 'The Jonathan Ross Show - Series 17 - Episode 8',
+ 'description': 'md5:3023dcdd375db1bc9967186cdb3f1399',
+ 'series': 'The Jonathan Ross Show',
+ 'episode_number': 8,
+ 'season_number': 17,
+ 'thumbnail': r're:https?://hubimages\.itv\.com/episode/2_1873_0002'
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ # unavailable via data-playlist-url
+ 'url': 'https://www.itv.com/hub/through-the-keyhole/2a2271a0033',
+ 'only_matching': True,
+ }, {
+ # InvalidVodcrid
+ 'url': 'https://www.itv.com/hub/james-martins-saturday-morning/2a5159a0034',
+ 'only_matching': True,
+ }, {
+ # ContentUnavailable
+ 'url': 'https://www.itv.com/hub/whos-doing-the-dishes/2a2898a0024',
+ 'only_matching': True,
+ }]
+
+ def _generate_api_headers(self, hmac):
+ return merge_dicts({
+ 'Accept': 'application/vnd.itv.vod.playlist.v2+json',
+ 'Content-Type': 'application/json',
+ 'hmac': hmac.upper(),
+ }, self.geo_verification_headers())
+
+ def _call_api(self, video_id, playlist_url, headers, platform_tag, featureset, fatal=True):
+ return self._download_json(
+ playlist_url, video_id, data=json.dumps({
+ 'user': {
+ 'itvUserId': '',
+ 'entitlements': [],
+ 'token': ''
+ },
+ 'device': {
+ 'manufacturer': 'Safari',
+ 'model': '5',
+ 'os': {
+ 'name': 'Windows NT',
+ 'version': '6.1',
+ 'type': 'desktop'
+ }
+ },
+ 'client': {
+ 'version': '4.1',
+ 'id': 'browser'
+ },
+ 'variantAvailability': {
+ 'featureset': {
+ 'min': featureset,
+ 'max': featureset
+ },
+ 'platformTag': platform_tag
+ }
+ }).encode(), headers=headers, fatal=fatal)
+
+ def _get_subtitles(self, video_id, variants, ios_playlist_url, headers, *args, **kwargs):
+ subtitles = {}
+ # Prefer last matching featureset
+ # See: https://github.com/yt-dlp/yt-dlp/issues/986
+ platform_tag_subs, featureset_subs = next(
+ ((platform_tag, featureset)
+ for platform_tag, featuresets in reversed(list(variants.items())) for featureset in featuresets
+ if try_get(featureset, lambda x: x[2]) == 'outband-webvtt'),
+ (None, None))
+
+ if platform_tag_subs and featureset_subs:
+ subs_playlist = self._call_api(
+ video_id, ios_playlist_url, headers, platform_tag_subs, featureset_subs, fatal=False)
+ subs = try_get(subs_playlist, lambda x: x['Playlist']['Video']['Subtitles'], list) or []
+ for sub in subs:
+ if not isinstance(sub, dict):
+ continue
+ href = url_or_none(sub.get('Href'))
+ if not href:
+ continue
+ subtitles.setdefault('en', []).append({'url': href})
+ return subtitles
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ params = extract_attributes(self._search_regex(
+ r'(?s)(<[^>]+id="video"[^>]*>)', webpage, 'params'))
+ variants = self._parse_json(
+ try_get(params, lambda x: x['data-video-variants'], compat_str) or '{}',
+ video_id, fatal=False)
+ # Prefer last matching featureset
+ # See: https://github.com/yt-dlp/yt-dlp/issues/986
+ platform_tag_video, featureset_video = next(
+ ((platform_tag, featureset)
+ for platform_tag, featuresets in reversed(list(variants.items())) for featureset in featuresets
+ if set(try_get(featureset, lambda x: x[:2]) or []) == {'aes', 'hls'}),
+ (None, None))
+ if not platform_tag_video or not featureset_video:
+ raise ExtractorError('No downloads available', expected=True, video_id=video_id)
+
+ ios_playlist_url = params.get('data-video-playlist') or params['data-video-id']
+ headers = self._generate_api_headers(params['data-video-hmac'])
+ ios_playlist = self._call_api(
+ video_id, ios_playlist_url, headers, platform_tag_video, featureset_video)
+
+ video_data = try_get(ios_playlist, lambda x: x['Playlist']['Video'], dict) or {}
+ ios_base_url = video_data.get('Base')
+ formats = []
+ for media_file in (video_data.get('MediaFiles') or []):
+ href = media_file.get('Href')
+ if not href:
+ continue
+ if ios_base_url:
+ href = ios_base_url + href
+ ext = determine_ext(href)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ href, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ formats.append({
+ 'url': href,
+ })
+ info = self._search_json_ld(webpage, video_id, default={})
+ if not info:
+ json_ld = self._parse_json(self._search_regex(
+ JSON_LD_RE, webpage, 'JSON-LD', '{}',
+ group='json_ld'), video_id, fatal=False)
+ if json_ld and json_ld.get('@type') == 'BreadcrumbList':
+ for ile in (json_ld.get('itemListElement:') or []):
+ item = ile.get('item:') or {}
+ if item.get('@type') == 'TVEpisode':
+ item['@context'] = 'http://schema.org'
+ info = self._json_ld(item, video_id, fatal=False) or {}
+ break
+
+ thumbnails = []
+ thumbnail_url = try_get(params, lambda x: x['data-video-posterframe'], compat_str)
+ if thumbnail_url:
+ thumbnails.extend([{
+ 'url': thumbnail_url.format(width=1920, height=1080, quality=100, blur=0, bg='false'),
+ 'width': 1920,
+ 'height': 1080,
+ }, {
+ 'url': urljoin(base_url(thumbnail_url), url_basename(thumbnail_url)),
+ 'preference': -2
+ }])
+
+ thumbnail_url = self._html_search_meta(['og:image', 'twitter:image'], webpage, default=None)
+ if thumbnail_url:
+ thumbnails.append({
+ 'url': thumbnail_url,
+ })
+ self._remove_duplicate_formats(thumbnails)
+
+ return merge_dicts({
+ 'id': video_id,
+ 'title': self._html_search_meta(['og:title', 'twitter:title'], webpage),
+ 'formats': formats,
+ 'subtitles': self.extract_subtitles(video_id, variants, ios_playlist_url, headers),
+ 'duration': parse_duration(video_data.get('Duration')),
+ 'description': clean_html(get_element_by_class('episode-info__synopsis', webpage)),
+ 'thumbnails': thumbnails
+ }, info)
+
+
+class ITVBTCCIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?itv\.com/(?:news|btcc)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.itv.com/btcc/articles/btcc-2019-brands-hatch-gp-race-action',
+ 'info_dict': {
+ 'id': 'btcc-2019-brands-hatch-gp-race-action',
+ 'title': 'BTCC 2019: Brands Hatch GP race action',
+ },
+ 'playlist_count': 12,
+ }, {
+ 'url': 'https://www.itv.com/news/2021-10-27/i-have-to-protect-the-country-says-rishi-sunak-as-uk-faces-interest-rate-hike',
+ 'info_dict': {
+ 'id': 'i-have-to-protect-the-country-says-rishi-sunak-as-uk-faces-interest-rate-hike',
+ 'title': 'md5:6ef054dd9f069330db3dcc66cb772d32'
+ },
+ 'playlist_count': 4
+ }]
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ json_map = try_get(
+ self._search_nextjs_data(webpage, playlist_id),
+ lambda x: x['props']['pageProps']['article']['body']['content']) or []
+
+ entries = []
+ for video in json_map:
+ if not any(video['data'].get(attr) == 'Brightcove' for attr in ('name', 'type')):
+ continue
+ video_id = video['data']['id']
+ account_id = video['data']['accountId']
+ player_id = video['data']['playerId']
+ entries.append(self.url_result(
+ smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id), {
+ # ITV does not like some GB IP ranges, so here are some
+ # IP blocks it accepts
+ 'geo_ip_blocks': [
+ '193.113.0.0/16', '54.36.162.0/23', '159.65.16.0/21'
+ ],
+ 'referrer': url,
+ }),
+ ie=BrightcoveNewIE.ie_key(), video_id=video_id))
+
+ title = self._og_search_title(webpage, fatal=False)
+
+ return self.playlist_result(entries, playlist_id, title)
diff --git a/yt_dlp/extractor/ivi.py b/yt_dlp/extractor/ivi.py
new file mode 100644
index 0000000..fa5ceec
--- /dev/null
+++ b/yt_dlp/extractor/ivi.py
@@ -0,0 +1,253 @@
+import json
+import re
+
+from .common import InfoExtractor
+from ..dependencies import Cryptodome
+from ..utils import ExtractorError, int_or_none, qualities
+
+
+class IviIE(InfoExtractor):
+ IE_DESC = 'ivi.ru'
+ IE_NAME = 'ivi'
+ _VALID_URL = r'https?://(?:www\.)?ivi\.(?:ru|tv)/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<id>\d+)'
+ _EMBED_REGEX = [r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1']
+ _GEO_BYPASS = False
+ _GEO_COUNTRIES = ['RU']
+ _LIGHT_KEY = b'\xf1\x02\x32\xb7\xbc\x5c\x7a\xe8\xf7\x96\xc1\x33\x2b\x27\xa1\x8c'
+ _LIGHT_URL = 'https://api.ivi.ru/light/'
+
+ _TESTS = [
+ # Single movie
+ {
+ 'url': 'http://www.ivi.ru/watch/53141',
+ 'md5': '6ff5be2254e796ed346251d117196cf4',
+ 'info_dict': {
+ 'id': '53141',
+ 'ext': 'mp4',
+ 'title': 'Иван Васильевич меняет профессию',
+ 'description': 'md5:b924063ea1677c8fe343d8a72ac2195f',
+ 'duration': 5498,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'skip': 'Only works from Russia',
+ },
+ # Serial's series
+ {
+ 'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa/9549',
+ 'md5': '221f56b35e3ed815fde2df71032f4b3e',
+ 'info_dict': {
+ 'id': '9549',
+ 'ext': 'mp4',
+ 'title': 'Двое из ларца - Дело Гольдберга (1 часть)',
+ 'series': 'Двое из ларца',
+ 'season': 'Сезон 1',
+ 'season_number': 1,
+ 'episode': 'Дело Гольдберга (1 часть)',
+ 'episode_number': 1,
+ 'duration': 2655,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'skip': 'Only works from Russia',
+ },
+ {
+ # with MP4-HD720 format
+ 'url': 'http://www.ivi.ru/watch/146500',
+ 'md5': 'd63d35cdbfa1ea61a5eafec7cc523e1e',
+ 'info_dict': {
+ 'id': '146500',
+ 'ext': 'mp4',
+ 'title': 'Кукла',
+ 'description': 'md5:ffca9372399976a2d260a407cc74cce6',
+ 'duration': 5599,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'skip': 'Only works from Russia',
+ },
+ {
+ 'url': 'https://www.ivi.tv/watch/33560/',
+ 'only_matching': True,
+ },
+ ]
+
+ # Sorted by quality
+ _KNOWN_FORMATS = (
+ 'MP4-low-mobile', 'MP4-mobile', 'FLV-lo', 'MP4-lo', 'FLV-hi', 'MP4-hi',
+ 'MP4-SHQ', 'MP4-HD720', 'MP4-HD1080')
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ data = json.dumps({
+ 'method': 'da.content.get',
+ 'params': [
+ video_id, {
+ 'site': 's%d',
+ 'referrer': 'http://www.ivi.ru/watch/%s' % video_id,
+ 'contentid': video_id
+ }
+ ]
+ })
+
+ for site in (353, 183):
+ content_data = (data % site).encode()
+ if site == 353:
+ if not Cryptodome.CMAC:
+ continue
+
+ timestamp = (self._download_json(
+ self._LIGHT_URL, video_id,
+ 'Downloading timestamp JSON', data=json.dumps({
+ 'method': 'da.timestamp.get',
+ 'params': []
+ }).encode(), fatal=False) or {}).get('result')
+ if not timestamp:
+ continue
+
+ query = {
+ 'ts': timestamp,
+ 'sign': Cryptodome.CMAC.new(self._LIGHT_KEY, timestamp.encode() + content_data,
+ Cryptodome.Blowfish).hexdigest(),
+ }
+ else:
+ query = {}
+
+ video_json = self._download_json(
+ self._LIGHT_URL, video_id,
+ 'Downloading video JSON', data=content_data, query=query)
+
+ error = video_json.get('error')
+ if error:
+ origin = error.get('origin')
+ message = error.get('message') or error.get('user_message')
+ extractor_msg = 'Unable to download video %s'
+ if origin == 'NotAllowedForLocation':
+ self.raise_geo_restricted(message, self._GEO_COUNTRIES)
+ elif origin == 'NoRedisValidData':
+ extractor_msg = 'Video %s does not exist'
+ elif site == 353:
+ continue
+ elif not Cryptodome.CMAC:
+ raise ExtractorError('pycryptodomex not found. Please install', expected=True)
+ elif message:
+ extractor_msg += ': ' + message
+ raise ExtractorError(extractor_msg % video_id, expected=True)
+ else:
+ break
+
+ result = video_json['result']
+ title = result['title']
+
+ quality = qualities(self._KNOWN_FORMATS)
+
+ formats = []
+ for f in result.get('files', []):
+ f_url = f.get('url')
+ content_format = f.get('content_format')
+ if not f_url:
+ continue
+ if (not self.get_param('allow_unplayable_formats')
+ and ('-MDRM-' in content_format or '-FPS-' in content_format)):
+ continue
+ formats.append({
+ 'url': f_url,
+ 'format_id': content_format,
+ 'quality': quality(content_format),
+ 'filesize': int_or_none(f.get('size_in_bytes')),
+ })
+
+ compilation = result.get('compilation')
+ episode = title if compilation else None
+
+ title = '%s - %s' % (compilation, title) if compilation is not None else title
+
+ thumbnails = [{
+ 'url': preview['url'],
+ 'id': preview.get('content_format'),
+ } for preview in result.get('preview', []) if preview.get('url')]
+
+ webpage = self._download_webpage(url, video_id)
+
+ season = self._search_regex(
+ r'<li[^>]+class="season active"[^>]*><a[^>]+>([^<]+)',
+ webpage, 'season', default=None)
+ season_number = int_or_none(self._search_regex(
+ r'<li[^>]+class="season active"[^>]*><a[^>]+data-season(?:-index)?="(\d+)"',
+ webpage, 'season number', default=None))
+
+ episode_number = int_or_none(self._search_regex(
+ r'[^>]+itemprop="episode"[^>]*>\s*<meta[^>]+itemprop="episodeNumber"[^>]+content="(\d+)',
+ webpage, 'episode number', default=None))
+
+ description = self._og_search_description(webpage, default=None) or self._html_search_meta(
+ 'description', webpage, 'description', default=None)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'series': compilation,
+ 'season': season,
+ 'season_number': season_number,
+ 'episode': episode,
+ 'episode_number': episode_number,
+ 'thumbnails': thumbnails,
+ 'description': description,
+ 'duration': int_or_none(result.get('duration')),
+ 'formats': formats,
+ }
+
+
+class IviCompilationIE(InfoExtractor):
+ IE_DESC = 'ivi.ru compilations'
+ IE_NAME = 'ivi:compilation'
+ _VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
+ _TESTS = [{
+ 'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa',
+ 'info_dict': {
+ 'id': 'dvoe_iz_lartsa',
+ 'title': 'Двое из ларца (2006 - 2008)',
+ },
+ 'playlist_mincount': 24,
+ }, {
+ 'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa/season1',
+ 'info_dict': {
+ 'id': 'dvoe_iz_lartsa/season1',
+ 'title': 'Двое из ларца (2006 - 2008) 1 сезон',
+ },
+ 'playlist_mincount': 12,
+ }]
+
+ def _extract_entries(self, html, compilation_id):
+ return [
+ self.url_result(
+ 'http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), IviIE.ie_key())
+ for serie in re.findall(
+ r'<a\b[^>]+\bhref=["\']/watch/%s/(\d+)["\']' % compilation_id, html)]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ compilation_id = mobj.group('compilationid')
+ season_id = mobj.group('seasonid')
+
+ if season_id is not None: # Season link
+ season_page = self._download_webpage(
+ url, compilation_id, 'Downloading season %s web page' % season_id)
+ playlist_id = '%s/season%s' % (compilation_id, season_id)
+ playlist_title = self._html_search_meta('title', season_page, 'title')
+ entries = self._extract_entries(season_page, compilation_id)
+ else: # Compilation link
+ compilation_page = self._download_webpage(url, compilation_id, 'Downloading compilation web page')
+ playlist_id = compilation_id
+ playlist_title = self._html_search_meta('title', compilation_page, 'title')
+ seasons = re.findall(
+ r'<a href="/watch/%s/season(\d+)' % compilation_id, compilation_page)
+ if not seasons: # No seasons in this compilation
+ entries = self._extract_entries(compilation_page, compilation_id)
+ else:
+ entries = []
+ for season_id in seasons:
+ season_page = self._download_webpage(
+ 'http://www.ivi.ru/watch/%s/season%s' % (compilation_id, season_id),
+ compilation_id, 'Downloading season %s web page' % season_id)
+ entries.extend(self._extract_entries(season_page, compilation_id))
+
+ return self.playlist_result(entries, playlist_id, playlist_title)
diff --git a/yt_dlp/extractor/ivideon.py b/yt_dlp/extractor/ivideon.py
new file mode 100644
index 0000000..7d1e554
--- /dev/null
+++ b/yt_dlp/extractor/ivideon.py
@@ -0,0 +1,77 @@
+from .common import InfoExtractor
+from ..compat import (
+ compat_urllib_parse_urlencode,
+ compat_urlparse,
+)
+from ..utils import qualities
+
+
+class IvideonIE(InfoExtractor):
+ IE_NAME = 'ivideon'
+ IE_DESC = 'Ivideon TV'
+ _VALID_URL = r'https?://(?:www\.)?ivideon\.com/tv/(?:[^/]+/)*camera/(?P<id>\d+-[\da-f]+)/(?P<camera_id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.ivideon.com/tv/camera/100-916ca13b5c4ad9f564266424a026386d/0/',
+ 'info_dict': {
+ 'id': '100-916ca13b5c4ad9f564266424a026386d',
+ 'ext': 'flv',
+ 'title': 're:^Касса [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'description': 'Основное предназначение - запись действий кассиров. Плюс общий вид.',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'https://www.ivideon.com/tv/camera/100-c4ee4cb9ede885cf62dfbe93d7b53783/589824/?lang=ru',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.ivideon.com/tv/map/22.917923/-31.816406/16/camera/100-e7bc16c7d4b5bbd633fd5350b66dfa9a/0',
+ 'only_matching': True,
+ }]
+
+ _QUALITIES = ('low', 'mid', 'hi')
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ server_id, camera_id = mobj.group('id'), mobj.group('camera_id')
+ camera_name, description = None, None
+ camera_url = compat_urlparse.urljoin(
+ url, '/tv/camera/%s/%s/' % (server_id, camera_id))
+
+ webpage = self._download_webpage(camera_url, server_id, fatal=False)
+ if webpage:
+ config_string = self._search_regex(
+ r'var\s+config\s*=\s*({.+?});', webpage, 'config', default=None)
+ if config_string:
+ config = self._parse_json(config_string, server_id, fatal=False)
+ camera_info = config.get('ivTvAppOptions', {}).get('currentCameraInfo')
+ if camera_info:
+ camera_name = camera_info.get('camera_name')
+ description = camera_info.get('misc', {}).get('description')
+ if not camera_name:
+ camera_name = self._html_search_meta(
+ 'name', webpage, 'camera name', default=None) or self._search_regex(
+ r'<h1[^>]+class="b-video-title"[^>]*>([^<]+)', webpage, 'camera name', default=None)
+
+ quality = qualities(self._QUALITIES)
+
+ formats = [{
+ 'url': 'https://streaming.ivideon.com/flv/live?%s' % compat_urllib_parse_urlencode({
+ 'server': server_id,
+ 'camera': camera_id,
+ 'sessionId': 'demo',
+ 'q': quality(format_id),
+ }),
+ 'format_id': format_id,
+ 'ext': 'flv',
+ 'quality': quality(format_id),
+ } for format_id in self._QUALITIES]
+
+ return {
+ 'id': server_id,
+ 'title': camera_name or server_id,
+ 'description': description,
+ 'is_live': True,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/iwara.py b/yt_dlp/extractor/iwara.py
new file mode 100644
index 0000000..e23fdfd
--- /dev/null
+++ b/yt_dlp/extractor/iwara.py
@@ -0,0 +1,298 @@
+import functools
+import urllib.parse
+import urllib.error
+import hashlib
+import json
+import time
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ OnDemandPagedList,
+ int_or_none,
+ jwt_decode_hs256,
+ mimetype2ext,
+ qualities,
+ traverse_obj,
+ try_call,
+ unified_timestamp,
+)
+
+
+class IwaraBaseIE(InfoExtractor):
+ _NETRC_MACHINE = 'iwara'
+ _USERTOKEN = None
+ _MEDIATOKEN = None
+
+ def _is_token_expired(self, token, token_type):
+ # User token TTL == ~3 weeks, Media token TTL == ~1 hour
+ if (try_call(lambda: jwt_decode_hs256(token)['exp']) or 0) <= int(time.time() - 120):
+ self.to_screen(f'{token_type} token has expired')
+ return True
+
+ def _get_user_token(self):
+ username, password = self._get_login_info()
+ if not username or not password:
+ return
+
+ user_token = IwaraBaseIE._USERTOKEN or self.cache.load(self._NETRC_MACHINE, username)
+ if not user_token or self._is_token_expired(user_token, 'User'):
+ response = self._download_json(
+ 'https://api.iwara.tv/user/login', None, note='Logging in',
+ headers={'Content-Type': 'application/json'}, data=json.dumps({
+ 'email': username,
+ 'password': password
+ }).encode(), expected_status=lambda x: True)
+ user_token = traverse_obj(response, ('token', {str}))
+ if not user_token:
+ error = traverse_obj(response, ('message', {str}))
+ if 'invalidLogin' in error:
+ raise ExtractorError('Invalid login credentials', expected=True)
+ else:
+ raise ExtractorError(f'Iwara API said: {error or "nothing"}')
+
+ self.cache.store(self._NETRC_MACHINE, username, user_token)
+
+ IwaraBaseIE._USERTOKEN = user_token
+
+ def _get_media_token(self):
+ self._get_user_token()
+ if not IwaraBaseIE._USERTOKEN:
+ return # user has not passed credentials
+
+ if not IwaraBaseIE._MEDIATOKEN or self._is_token_expired(IwaraBaseIE._MEDIATOKEN, 'Media'):
+ IwaraBaseIE._MEDIATOKEN = self._download_json(
+ 'https://api.iwara.tv/user/token', None, note='Fetching media token',
+ data=b'', headers={
+ 'Authorization': f'Bearer {IwaraBaseIE._USERTOKEN}',
+ 'Content-Type': 'application/json'
+ })['accessToken']
+
+ return {'Authorization': f'Bearer {IwaraBaseIE._MEDIATOKEN}'}
+
+ def _perform_login(self, username, password):
+ self._get_media_token()
+
+
+class IwaraIE(IwaraBaseIE):
+ IE_NAME = 'iwara'
+ _VALID_URL = r'https?://(?:www\.|ecchi\.)?iwara\.tv/videos?/(?P<id>[a-zA-Z0-9]+)'
+ _TESTS = [{
+ 'url': 'https://www.iwara.tv/video/k2ayoueezfkx6gvq',
+ 'info_dict': {
+ 'id': 'k2ayoueezfkx6gvq',
+ 'ext': 'mp4',
+ 'age_limit': 18,
+ 'title': 'Defeat of Irybelda - アイリベルダの敗北',
+ 'description': 'md5:70278abebe706647a8b4cb04cf23e0d3',
+ 'uploader': 'Inwerwm',
+ 'uploader_id': 'inwerwm',
+ 'tags': 'count:1',
+ 'like_count': 6133,
+ 'view_count': 1050343,
+ 'comment_count': 1,
+ 'timestamp': 1677843869,
+ 'modified_timestamp': 1679056362,
+ },
+ 'skip': 'this video cannot be played because of migration',
+ }, {
+ 'url': 'https://iwara.tv/video/1ywe1sbkqwumpdxz5/',
+ 'md5': '7645f966f069b8ec9210efd9130c9aad',
+ 'info_dict': {
+ 'id': '1ywe1sbkqwumpdxz5',
+ 'ext': 'mp4',
+ 'age_limit': 18,
+ 'title': 'Aponia アポニア SEX Party Tonight 手の脱衣 巨乳 ',
+ 'description': 'md5:3f60016fff22060eef1ef26d430b1f67',
+ 'uploader': 'Lyu ya',
+ 'uploader_id': 'user792540',
+ 'tags': [
+ 'uncategorized'
+ ],
+ 'like_count': int,
+ 'view_count': int,
+ 'comment_count': int,
+ 'timestamp': 1678732213,
+ 'modified_timestamp': int,
+ 'thumbnail': 'https://files.iwara.tv/image/thumbnail/581d12b5-46f4-4f15-beb2-cfe2cde5d13d/thumbnail-00.jpg',
+ 'modified_date': '20230614',
+ 'upload_date': '20230313',
+ },
+ }, {
+ 'url': 'https://iwara.tv/video/blggmfno8ghl725bg',
+ 'info_dict': {
+ 'id': 'blggmfno8ghl725bg',
+ 'ext': 'mp4',
+ 'age_limit': 18,
+ 'title': 'お外でおしっこしちゃう猫耳ロリメイド',
+ 'description': 'md5:0342ba9bf6db09edbbb28729657c3611',
+ 'uploader': 'Fe_Kurosabi',
+ 'uploader_id': 'fekurosabi',
+ 'tags': [
+ 'pee'
+ ],
+ 'like_count': int,
+ 'view_count': int,
+ 'comment_count': int,
+ 'timestamp': 1598880567,
+ 'modified_timestamp': int,
+ 'upload_date': '20200831',
+ 'modified_date': '20230605',
+ 'thumbnail': 'https://files.iwara.tv/image/thumbnail/7693e881-d302-42a4-a780-f16d66b5dadd/thumbnail-00.jpg',
+ # 'availability': 'needs_auth',
+ },
+ }]
+
+ def _extract_formats(self, video_id, fileurl):
+ up = urllib.parse.urlparse(fileurl)
+ q = urllib.parse.parse_qs(up.query)
+ paths = up.path.rstrip('/').split('/')
+ # https://github.com/yt-dlp/yt-dlp/issues/6549#issuecomment-1473771047
+ x_version = hashlib.sha1('_'.join((paths[-1], q['expires'][0], '5nFp9kmbNnHdAFhaqMvt')).encode()).hexdigest()
+
+ preference = qualities(['preview', '360', '540', 'Source'])
+
+ files = self._download_json(fileurl, video_id, headers={'X-Version': x_version})
+ for fmt in files:
+ yield traverse_obj(fmt, {
+ 'format_id': 'name',
+ 'url': ('src', ('view', 'download'), {self._proto_relative_url}),
+ 'ext': ('type', {mimetype2ext}),
+ 'quality': ('name', {preference}),
+ 'height': ('name', {int_or_none}),
+ }, get_all=False)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ username, _ = self._get_login_info()
+ video_data = self._download_json(
+ f'https://api.iwara.tv/video/{video_id}', video_id,
+ expected_status=lambda x: True, headers=self._get_media_token())
+ errmsg = video_data.get('message')
+ # at this point we can actually get uploaded user info, but do we need it?
+ if errmsg == 'errors.privateVideo':
+ self.raise_login_required('Private video. Login if you have permissions to watch', method='password')
+ elif errmsg == 'errors.notFound' and not username:
+ self.raise_login_required('Video may need login to view', method='password')
+ elif errmsg: # None if success
+ raise ExtractorError(f'Iwara says: {errmsg}')
+
+ if not video_data.get('fileUrl'):
+ if video_data.get('embedUrl'):
+ return self.url_result(video_data.get('embedUrl'))
+ raise ExtractorError('This video is unplayable', expected=True)
+
+ return {
+ 'id': video_id,
+ 'age_limit': 18 if video_data.get('rating') == 'ecchi' else 0, # ecchi is 'sexy' in Japanese
+ **traverse_obj(video_data, {
+ 'title': 'title',
+ 'description': 'body',
+ 'uploader': ('user', 'name'),
+ 'uploader_id': ('user', 'username'),
+ 'tags': ('tags', ..., 'id'),
+ 'like_count': 'numLikes',
+ 'view_count': 'numViews',
+ 'comment_count': 'numComments',
+ 'timestamp': ('createdAt', {unified_timestamp}),
+ 'modified_timestamp': ('updatedAt', {unified_timestamp}),
+ 'thumbnail': ('file', 'id', {str}, {
+ lambda x: f'https://files.iwara.tv/image/thumbnail/{x}/thumbnail-00.jpg'}),
+ }),
+ 'formats': list(self._extract_formats(video_id, video_data.get('fileUrl'))),
+ }
+
+
+class IwaraUserIE(IwaraBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?iwara\.tv/profile/(?P<id>[^/?#&]+)'
+ IE_NAME = 'iwara:user'
+ _PER_PAGE = 32
+
+ _TESTS = [{
+ 'url': 'https://iwara.tv/profile/user792540/videos',
+ 'info_dict': {
+ 'id': 'user792540',
+ 'title': 'Lyu ya',
+ },
+ 'playlist_mincount': 70,
+ }, {
+ 'url': 'https://iwara.tv/profile/theblackbirdcalls/videos',
+ 'info_dict': {
+ 'id': 'theblackbirdcalls',
+ 'title': 'TheBlackbirdCalls',
+ },
+ 'playlist_mincount': 723,
+ }, {
+ 'url': 'https://iwara.tv/profile/user792540',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://iwara.tv/profile/theblackbirdcalls',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.iwara.tv/profile/lumymmd',
+ 'info_dict': {
+ 'id': 'lumymmd',
+ 'title': 'Lumy MMD',
+ },
+ 'playlist_mincount': 1,
+ }]
+
+ def _entries(self, playlist_id, user_id, page):
+ videos = self._download_json(
+ 'https://api.iwara.tv/videos', playlist_id,
+ note=f'Downloading page {page}',
+ query={
+ 'page': page,
+ 'sort': 'date',
+ 'user': user_id,
+ 'limit': self._PER_PAGE,
+ }, headers=self._get_media_token())
+ for x in traverse_obj(videos, ('results', ..., 'id')):
+ yield self.url_result(f'https://iwara.tv/video/{x}')
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ user_info = self._download_json(
+ f'https://api.iwara.tv/profile/{playlist_id}', playlist_id,
+ note='Requesting user info')
+ user_id = traverse_obj(user_info, ('user', 'id'))
+
+ return self.playlist_result(
+ OnDemandPagedList(
+ functools.partial(self._entries, playlist_id, user_id),
+ self._PER_PAGE),
+ playlist_id, traverse_obj(user_info, ('user', 'name')))
+
+
+class IwaraPlaylistIE(IwaraBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?iwara\.tv/playlist/(?P<id>[0-9a-f-]+)'
+ IE_NAME = 'iwara:playlist'
+ _PER_PAGE = 32
+
+ _TESTS = [{
+ 'url': 'https://iwara.tv/playlist/458e5486-36a4-4ac0-b233-7e9eef01025f',
+ 'info_dict': {
+ 'id': '458e5486-36a4-4ac0-b233-7e9eef01025f',
+ },
+ 'playlist_mincount': 3,
+ }]
+
+ def _entries(self, playlist_id, first_page, page):
+ videos = self._download_json(
+ 'https://api.iwara.tv/videos', playlist_id, f'Downloading page {page}',
+ query={'page': page, 'limit': self._PER_PAGE},
+ headers=self._get_media_token()) if page else first_page
+ for x in traverse_obj(videos, ('results', ..., 'id')):
+ yield self.url_result(f'https://iwara.tv/video/{x}')
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ page_0 = self._download_json(
+ f'https://api.iwara.tv/playlist/{playlist_id}?page=0&limit={self._PER_PAGE}', playlist_id,
+ note='Requesting playlist info', headers=self._get_media_token())
+
+ return self.playlist_result(
+ OnDemandPagedList(
+ functools.partial(self._entries, playlist_id, page_0),
+ self._PER_PAGE),
+ playlist_id, traverse_obj(page_0, ('title', 'name')))
diff --git a/yt_dlp/extractor/ixigua.py b/yt_dlp/extractor/ixigua.py
new file mode 100644
index 0000000..1f086d2
--- /dev/null
+++ b/yt_dlp/extractor/ixigua.py
@@ -0,0 +1,83 @@
+import base64
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ get_element_by_id,
+ int_or_none,
+ js_to_json,
+ str_or_none,
+ traverse_obj,
+)
+
+
+class IxiguaIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:\w+\.)?ixigua\.com/(?:video/)?(?P<id>\d+).+'
+ _TESTS = [{
+ 'url': 'https://www.ixigua.com/6996881461559165471',
+ 'info_dict': {
+ 'id': '6996881461559165471',
+ 'ext': 'mp4',
+ 'title': '盲目涉水风险大,亲身示范高水位行车注意事项',
+ 'description': 'md5:8c82f46186299add4a1c455430740229',
+ 'tags': ['video_car'],
+ 'like_count': int,
+ 'dislike_count': int,
+ 'view_count': int,
+ 'uploader': '懂车帝原创',
+ 'uploader_id': '6480145787',
+ 'thumbnail': r're:^https?://.+\.(avif|webp)',
+ 'timestamp': 1629088414,
+ 'duration': 1030,
+ }
+ }]
+
+ def _get_json_data(self, webpage, video_id):
+ js_data = get_element_by_id('SSR_HYDRATED_DATA', webpage)
+ if not js_data:
+ if self._cookies_passed:
+ raise ExtractorError('Failed to get SSR_HYDRATED_DATA')
+ raise ExtractorError('Cookies (not necessarily logged in) are needed', expected=True)
+
+ return self._parse_json(
+ js_data.replace('window._SSR_HYDRATED_DATA=', ''), video_id, transform_source=js_to_json)
+
+ def _media_selector(self, json_data):
+ for path, override in (
+ (('video_list', ), {}),
+ (('dynamic_video', 'dynamic_video_list'), {'acodec': 'none'}),
+ (('dynamic_video', 'dynamic_audio_list'), {'vcodec': 'none', 'ext': 'm4a'}),
+ ):
+ for media in traverse_obj(json_data, (..., *path, lambda _, v: v['main_url'])):
+ yield {
+ 'url': base64.b64decode(media['main_url']).decode(),
+ 'width': int_or_none(media.get('vwidth')),
+ 'height': int_or_none(media.get('vheight')),
+ 'fps': int_or_none(media.get('fps')),
+ 'vcodec': media.get('codec_type'),
+ 'format_id': str_or_none(media.get('quality_type')),
+ 'filesize': int_or_none(media.get('size')),
+ 'ext': 'mp4',
+ **override,
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ json_data = self._get_json_data(webpage, video_id)['anyVideo']['gidInformation']['packerData']['video']
+
+ formats = list(self._media_selector(json_data.get('videoResource')))
+ return {
+ 'id': video_id,
+ 'title': json_data.get('title'),
+ 'description': json_data.get('video_abstract'),
+ 'formats': formats,
+ 'like_count': json_data.get('video_like_count'),
+ 'duration': int_or_none(json_data.get('duration')),
+ 'tags': [json_data.get('tag')],
+ 'uploader_id': traverse_obj(json_data, ('user_info', 'user_id')),
+ 'uploader': traverse_obj(json_data, ('user_info', 'name')),
+ 'view_count': json_data.get('video_watch_count'),
+ 'dislike_count': json_data.get('video_unlike_count'),
+ 'timestamp': int_or_none(json_data.get('video_publish_time')),
+ }
diff --git a/yt_dlp/extractor/izlesene.py b/yt_dlp/extractor/izlesene.py
new file mode 100644
index 0000000..5cdf870
--- /dev/null
+++ b/yt_dlp/extractor/izlesene.py
@@ -0,0 +1,113 @@
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urllib_parse_unquote,
+)
+from ..utils import (
+ determine_ext,
+ float_or_none,
+ get_element_by_id,
+ int_or_none,
+ parse_iso8601,
+ str_to_int,
+)
+
+
+class IzleseneIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://(?:(?:www|m)\.)?izlesene\.com/
+ (?:video|embedplayer)/(?:[^/]+/)?(?P<id>[0-9]+)
+ '''
+ _TESTS = [
+ {
+ 'url': 'http://www.izlesene.com/video/sevincten-cildirtan-dogum-gunu-hediyesi/7599694',
+ 'md5': '4384f9f0ea65086734b881085ee05ac2',
+ 'info_dict': {
+ 'id': '7599694',
+ 'ext': 'mp4',
+ 'title': 'Sevinçten Çıldırtan Doğum Günü Hediyesi',
+ 'description': 'md5:253753e2655dde93f59f74b572454f6d',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'uploader_id': 'pelikzzle',
+ 'timestamp': int,
+ 'upload_date': '20140702',
+ 'duration': 95.395,
+ 'age_limit': 0,
+ }
+ },
+ {
+ 'url': 'http://www.izlesene.com/video/tarkan-dortmund-2006-konseri/17997',
+ 'md5': '97f09b6872bffa284cb7fa4f6910cb72',
+ 'info_dict': {
+ 'id': '17997',
+ 'ext': 'mp4',
+ 'title': 'Tarkan Dortmund 2006 Konseri',
+ 'thumbnail': r're:^https://.*\.jpg',
+ 'uploader_id': 'parlayankiz',
+ 'timestamp': int,
+ 'upload_date': '20061112',
+ 'duration': 253.666,
+ 'age_limit': 0,
+ }
+ },
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage('http://www.izlesene.com/video/%s' % video_id, video_id)
+
+ video = self._parse_json(
+ self._search_regex(
+ r'videoObj\s*=\s*({.+?})\s*;\s*\n', webpage, 'streams'),
+ video_id)
+
+ title = video.get('videoTitle') or self._og_search_title(webpage)
+
+ formats = []
+ for stream in video['media']['level']:
+ source_url = stream.get('source')
+ if not source_url or not isinstance(source_url, compat_str):
+ continue
+ ext = determine_ext(url, 'mp4')
+ quality = stream.get('value')
+ height = int_or_none(quality)
+ formats.append({
+ 'format_id': '%sp' % quality if quality else 'sd',
+ 'url': compat_urllib_parse_unquote(source_url),
+ 'ext': ext,
+ 'height': height,
+ })
+
+ description = self._og_search_description(webpage, default=None)
+ thumbnail = video.get('posterURL') or self._proto_relative_url(
+ self._og_search_thumbnail(webpage), scheme='http:')
+
+ uploader = self._html_search_regex(
+ r"adduserUsername\s*=\s*'([^']+)';",
+ webpage, 'uploader', fatal=False)
+ timestamp = parse_iso8601(self._html_search_meta(
+ 'uploadDate', webpage, 'upload date'))
+
+ duration = float_or_none(video.get('duration') or self._html_search_regex(
+ r'videoduration["\']?\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
+ webpage, 'duration', fatal=False, group='value'), scale=1000)
+
+ view_count = str_to_int(get_element_by_id('videoViewCount', webpage))
+ comment_count = self._html_search_regex(
+ r'comment_count\s*=\s*\'([^\']+)\';',
+ webpage, 'comment_count', fatal=False)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'uploader_id': uploader,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'view_count': int_or_none(view_count),
+ 'comment_count': int_or_none(comment_count),
+ 'age_limit': self._family_friendly_search(webpage),
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/jable.py b/yt_dlp/extractor/jable.py
new file mode 100644
index 0000000..71fed49
--- /dev/null
+++ b/yt_dlp/extractor/jable.py
@@ -0,0 +1,103 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ InAdvancePagedList,
+ int_or_none,
+ orderedSet,
+ unified_strdate,
+)
+
+
+class JableIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?jable\.tv/videos/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://jable.tv/videos/pppd-812/',
+ 'md5': 'f1537283a9bc073c31ff86ca35d9b2a6',
+ 'info_dict': {
+ 'id': 'pppd-812',
+ 'ext': 'mp4',
+ 'title': 'PPPD-812 只要表現好巨乳女教師吉根柚莉愛就獎勵學生們在白虎穴內射出精液',
+ 'description': 'md5:5b6d4199a854f62c5e56e26ccad19967',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'age_limit': 18,
+ 'like_count': int,
+ 'view_count': int,
+ },
+ }, {
+ 'url': 'https://jable.tv/videos/apak-220/',
+ 'md5': '71f9239d69ced58ab74a816908847cc1',
+ 'info_dict': {
+ 'id': 'apak-220',
+ 'ext': 'mp4',
+ 'title': 'md5:5c3861b7cf80112a6e2b70bccf170824',
+ 'description': '',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'age_limit': 18,
+ 'like_count': int,
+ 'view_count': int,
+ 'upload_date': '20220319',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ formats = self._extract_m3u8_formats(
+ self._search_regex(r'var\s+hlsUrl\s*=\s*\'([^\']+)', webpage, 'hls_url'), video_id, 'mp4', m3u8_id='hls')
+
+ return {
+ 'id': video_id,
+ 'title': self._og_search_title(webpage),
+ 'description': self._og_search_description(webpage, default=''),
+ 'thumbnail': self._og_search_thumbnail(webpage, default=None),
+ 'formats': formats,
+ 'age_limit': 18,
+ 'upload_date': unified_strdate(self._search_regex(
+ r'class="inactive-color">\D+\s+(\d{4}-\d+-\d+)', webpage, 'upload_date', default=None)),
+ 'view_count': int_or_none(self._search_regex(
+ r'#icon-eye"></use></svg>\n*<span class="mr-3">([\d ]+)',
+ webpage, 'view_count', default='').replace(' ', '')),
+ 'like_count': int_or_none(self._search_regex(
+ r'#icon-heart"></use></svg><span class="count">(\d+)', webpage, 'link_count', default=None)),
+ }
+
+
+class JablePlaylistIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?jable\.tv/(?:categories|models|tags)/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://jable.tv/models/kaede-karen/',
+ 'info_dict': {
+ 'id': 'kaede-karen',
+ 'title': '楓カレン',
+ },
+ 'playlist_count': 34,
+ }, {
+ 'url': 'https://jable.tv/categories/roleplay/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://jable.tv/tags/girl/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ webpage = self._download_webpage(url, playlist_id)
+
+ def page_func(page_num):
+ return [
+ self.url_result(player_url, JableIE)
+ for player_url in orderedSet(re.findall(
+ r'href="(https://jable.tv/videos/[\w-]+/?)"',
+ self._download_webpage(url, playlist_id, query={
+ 'mode': 'async',
+ 'from': page_num + 1,
+ 'function': 'get_block',
+ 'block_id': 'list_videos_common_videos_list',
+ }, note=f'Downloading page {page_num + 1}')))]
+
+ return self.playlist_result(
+ InAdvancePagedList(page_func, int_or_none(self._search_regex(
+ r'from:(\d+)">[^<]+\s*&raquo;', webpage, 'last page number', default=1)), 24),
+ playlist_id, self._search_regex(
+ r'<h2 class="h3-md mb-1">([^<]+)', webpage, 'playlist title', default=None))
diff --git a/yt_dlp/extractor/jamendo.py b/yt_dlp/extractor/jamendo.py
new file mode 100644
index 0000000..a2bbba3
--- /dev/null
+++ b/yt_dlp/extractor/jamendo.py
@@ -0,0 +1,210 @@
+import hashlib
+import random
+
+from ..compat import compat_str
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ int_or_none,
+ try_get,
+)
+
+
+class JamendoIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ licensing\.jamendo\.com/[^/]+|
+ (?:www\.)?jamendo\.com
+ )
+ /track/(?P<id>[0-9]+)(?:/(?P<display_id>[^/?#&]+))?
+ '''
+ _TESTS = [{
+ 'url': 'https://www.jamendo.com/track/196219/stories-from-emona-i',
+ 'md5': '6e9e82ed6db98678f171c25a8ed09ffd',
+ 'info_dict': {
+ 'id': '196219',
+ 'display_id': 'stories-from-emona-i',
+ 'ext': 'flac',
+ # 'title': 'Maya Filipič - Stories from Emona I',
+ 'title': 'Stories from Emona I',
+ 'artist': 'Maya Filipič',
+ 'album': 'Between two worlds',
+ 'track': 'Stories from Emona I',
+ 'duration': 210,
+ 'thumbnail': 'https://usercontent.jamendo.com?type=album&id=29279&width=300&trackid=196219',
+ 'timestamp': 1217438117,
+ 'upload_date': '20080730',
+ 'license': 'by-nc-nd',
+ 'view_count': int,
+ 'like_count': int,
+ 'average_rating': int,
+ 'tags': ['piano', 'peaceful', 'newage', 'strings', 'upbeat'],
+ }
+ }, {
+ 'url': 'https://licensing.jamendo.com/en/track/1496667/energetic-rock',
+ 'only_matching': True,
+ }]
+
+ def _call_api(self, resource, resource_id, fatal=True):
+ path = '/api/%ss' % resource
+ rand = compat_str(random.random())
+ return self._download_json(
+ 'https://www.jamendo.com' + path, resource_id, fatal=fatal, query={
+ 'id[]': resource_id,
+ }, headers={
+ 'X-Jam-Call': '$%s*%s~' % (hashlib.sha1((path + rand).encode()).hexdigest(), rand)
+ })[0]
+
+ def _real_extract(self, url):
+ track_id, display_id = self._match_valid_url(url).groups()
+ # webpage = self._download_webpage(
+ # 'https://www.jamendo.com/track/' + track_id, track_id)
+ # models = self._parse_json(self._html_search_regex(
+ # r"data-bundled-models='([^']+)",
+ # webpage, 'bundled models'), track_id)
+ # track = models['track']['models'][0]
+ track = self._call_api('track', track_id)
+ title = track_name = track['name']
+ # get_model = lambda x: try_get(models, lambda y: y[x]['models'][0], dict) or {}
+ # artist = get_model('artist')
+ # artist_name = artist.get('name')
+ # if artist_name:
+ # title = '%s - %s' % (artist_name, title)
+ # album = get_model('album')
+ artist = self._call_api("artist", track.get('artistId'), fatal=False)
+ album = self._call_api("album", track.get('albumId'), fatal=False)
+
+ formats = [{
+ 'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294'
+ % (sub_domain, track_id, format_id),
+ 'format_id': format_id,
+ 'ext': ext,
+ 'quality': quality,
+ } for quality, (format_id, sub_domain, ext) in enumerate((
+ ('mp31', 'mp3l', 'mp3'),
+ ('mp32', 'mp3d', 'mp3'),
+ ('ogg1', 'ogg', 'ogg'),
+ ('flac', 'flac', 'flac'),
+ ))]
+
+ urls = []
+ thumbnails = []
+ for covers in (track.get('cover') or {}).values():
+ for cover_id, cover_url in covers.items():
+ if not cover_url or cover_url in urls:
+ continue
+ urls.append(cover_url)
+ size = int_or_none(cover_id.lstrip('size'))
+ thumbnails.append({
+ 'id': cover_id,
+ 'url': cover_url,
+ 'width': size,
+ 'height': size,
+ })
+
+ tags = []
+ for tag in (track.get('tags') or []):
+ tag_name = tag.get('name')
+ if not tag_name:
+ continue
+ tags.append(tag_name)
+
+ stats = track.get('stats') or {}
+ license = track.get('licenseCC') or []
+
+ return {
+ 'id': track_id,
+ 'display_id': display_id,
+ 'thumbnails': thumbnails,
+ 'title': title,
+ 'description': track.get('description'),
+ 'duration': int_or_none(track.get('duration')),
+ 'artist': artist.get('name'),
+ 'track': track_name,
+ 'album': album.get('name'),
+ 'formats': formats,
+ 'license': '-'.join(license) if license else None,
+ 'timestamp': int_or_none(track.get('dateCreated')),
+ 'view_count': int_or_none(stats.get('listenedAll')),
+ 'like_count': int_or_none(stats.get('favorited')),
+ 'average_rating': int_or_none(stats.get('averageNote')),
+ 'tags': tags,
+ }
+
+
+class JamendoAlbumIE(JamendoIE): # XXX: Do not subclass from concrete IE
+ _VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://www.jamendo.com/album/121486/duck-on-cover',
+ 'info_dict': {
+ 'id': '121486',
+ 'title': 'Duck On Cover',
+ 'description': 'md5:c2920eaeef07d7af5b96d7c64daf1239',
+ },
+ 'playlist': [{
+ 'md5': 'e1a2fcb42bda30dfac990212924149a8',
+ 'info_dict': {
+ 'id': '1032333',
+ 'ext': 'flac',
+ 'title': 'Warmachine',
+ 'artist': 'Shearer',
+ 'track': 'Warmachine',
+ 'timestamp': 1368089771,
+ 'upload_date': '20130509',
+ 'view_count': int,
+ 'thumbnail': 'https://usercontent.jamendo.com?type=album&id=121486&width=300&trackid=1032333',
+ 'duration': 190,
+ 'license': 'by',
+ 'album': 'Duck On Cover',
+ 'average_rating': 4,
+ 'tags': ['rock', 'drums', 'bass', 'world', 'punk', 'neutral'],
+ 'like_count': int,
+ }
+ }, {
+ 'md5': '1f358d7b2f98edfe90fd55dac0799d50',
+ 'info_dict': {
+ 'id': '1032330',
+ 'ext': 'flac',
+ 'title': 'Without Your Ghost',
+ 'artist': 'Shearer',
+ 'track': 'Without Your Ghost',
+ 'timestamp': 1368089771,
+ 'upload_date': '20130509',
+ 'duration': 192,
+ 'tags': ['rock', 'drums', 'bass', 'world', 'punk'],
+ 'album': 'Duck On Cover',
+ 'thumbnail': 'https://usercontent.jamendo.com?type=album&id=121486&width=300&trackid=1032330',
+ 'view_count': int,
+ 'average_rating': 4,
+ 'license': 'by',
+ 'like_count': int,
+ }
+ }],
+ 'params': {
+ 'playlistend': 2
+ }
+ }]
+
+ def _real_extract(self, url):
+ album_id = self._match_id(url)
+ album = self._call_api('album', album_id)
+ album_name = album.get('name')
+
+ entries = []
+ for track in (album.get('tracks') or []):
+ track_id = track.get('id')
+ if not track_id:
+ continue
+ track_id = compat_str(track_id)
+ entries.append({
+ '_type': 'url_transparent',
+ 'url': 'https://www.jamendo.com/track/' + track_id,
+ 'ie_key': JamendoIE.ie_key(),
+ 'id': track_id,
+ 'album': album_name,
+ })
+
+ return self.playlist_result(
+ entries, album_id, album_name,
+ clean_html(try_get(album, lambda x: x['description']['en'], compat_str)))
diff --git a/yt_dlp/extractor/japandiet.py b/yt_dlp/extractor/japandiet.py
new file mode 100644
index 0000000..6c65056
--- /dev/null
+++ b/yt_dlp/extractor/japandiet.py
@@ -0,0 +1,274 @@
+import re
+
+from ..utils import (
+ ExtractorError,
+ clean_html,
+ int_or_none,
+ join_nonempty,
+ parse_qs,
+ smuggle_url,
+ traverse_obj,
+ try_call,
+ unsmuggle_url
+)
+from .common import InfoExtractor
+
+
+def _parse_japanese_date(text):
+ if not text:
+ return None
+ ERA_TABLE = {
+ '明治': 1868,
+ '大正': 1912,
+ '昭和': 1926,
+ '平成': 1989,
+ '令和': 2019,
+ }
+ ERA_RE = '|'.join(map(re.escape, ERA_TABLE.keys()))
+ mobj = re.search(rf'({ERA_RE})?(\d+)年(\d+)月(\d+)日', re.sub(r'[\s\u3000]+', '', text))
+ if not mobj:
+ return None
+ era, year, month, day = mobj.groups()
+ year, month, day = map(int, (year, month, day))
+ if era:
+ # example input: 令和5年3月34日
+ # even though each era have their end, don't check here
+ year += ERA_TABLE[era]
+ return '%04d%02d%02d' % (year, month, day)
+
+
+def _parse_japanese_duration(text):
+ mobj = re.search(r'(?:(\d+)日間?)?(?:(\d+)時間?)?(?:(\d+)分)?(?:(\d+)秒)?', re.sub(r'[\s\u3000]+', '', text or ''))
+ if not mobj:
+ return
+ days, hours, mins, secs = [int_or_none(x, default=0) for x in mobj.groups()]
+ return secs + mins * 60 + hours * 60 * 60 + days * 24 * 60 * 60
+
+
+class ShugiinItvBaseIE(InfoExtractor):
+ _INDEX_ROOMS = None
+
+ @classmethod
+ def _find_rooms(cls, webpage):
+ return [{
+ '_type': 'url',
+ 'id': x.group(1),
+ 'title': clean_html(x.group(2)).strip(),
+ 'url': smuggle_url(f'https://www.shugiintv.go.jp/jp/index.php?room_id={x.group(1)}', {'g': x.groups()}),
+ 'ie_key': ShugiinItvLiveIE.ie_key(),
+ } for x in re.finditer(r'(?s)<a\s+href="[^"]+\?room_id=(room\d+)"\s*class="play_live".+?class="s12_14">(.+?)</td>', webpage)]
+
+ def _fetch_rooms(self):
+ if not self._INDEX_ROOMS:
+ webpage = self._download_webpage(
+ 'https://www.shugiintv.go.jp/jp/index.php', None,
+ encoding='euc-jp', note='Downloading proceedings info')
+ ShugiinItvBaseIE._INDEX_ROOMS = self._find_rooms(webpage)
+ return self._INDEX_ROOMS
+
+
+class ShugiinItvLiveIE(ShugiinItvBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?shugiintv\.go\.jp/(?:jp|en)(?:/index\.php)?$'
+ IE_DESC = '衆議院インターネット審議中継'
+
+ _TESTS = [{
+ 'url': 'https://www.shugiintv.go.jp/jp/index.php',
+ 'info_dict': {
+ '_type': 'playlist',
+ 'title': 'All proceedings for today',
+ },
+ # expect at least one proceedings is running
+ 'playlist_mincount': 1,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return super().suitable(url) and not any(x.suitable(url) for x in (ShugiinItvLiveRoomIE, ShugiinItvVodIE))
+
+ def _real_extract(self, url):
+ self.to_screen(
+ 'Downloading all running proceedings. To specify one proceeding, use direct link from the website')
+ return self.playlist_result(self._fetch_rooms(), playlist_title='All proceedings for today')
+
+
+class ShugiinItvLiveRoomIE(ShugiinItvBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?shugiintv\.go\.jp/(?:jp|en)/index\.php\?room_id=(?P<id>room\d+)'
+ IE_DESC = '衆議院インターネット審議中継 (中継)'
+
+ _TESTS = [{
+ 'url': 'https://www.shugiintv.go.jp/jp/index.php?room_id=room01',
+ 'info_dict': {
+ 'id': 'room01',
+ 'title': '内閣委員会',
+ },
+ 'skip': 'this runs for a time and not every day',
+ }, {
+ 'url': 'https://www.shugiintv.go.jp/jp/index.php?room_id=room11',
+ 'info_dict': {
+ 'id': 'room11',
+ 'title': '外務委員会',
+ },
+ 'skip': 'this runs for a time and not every day',
+ }]
+
+ def _real_extract(self, url):
+ url, smug = unsmuggle_url(url, default={})
+ if smug.get('g'):
+ room_id, title = smug['g']
+ else:
+ room_id = self._match_id(url)
+ title = traverse_obj(self._fetch_rooms(), (lambda k, v: v['id'] == room_id, 'title'), get_all=False)
+
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ f'https://hlslive.shugiintv.go.jp/{room_id}/amlst:{room_id}/playlist.m3u8',
+ room_id, ext='mp4')
+
+ return {
+ 'id': room_id,
+ 'title': title,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'is_live': True,
+ }
+
+
+class ShugiinItvVodIE(ShugiinItvBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?shugiintv\.go\.jp/(?:jp|en)/index\.php\?ex=VL(?:\&[^=]+=[^&]*)*\&deli_id=(?P<id>\d+)'
+ IE_DESC = '衆議院インターネット審議中継 (ビデオライブラリ)'
+ _TESTS = [{
+ 'url': 'https://www.shugiintv.go.jp/jp/index.php?ex=VL&media_type=&deli_id=53846',
+ 'info_dict': {
+ 'id': '53846',
+ 'title': 'ウクライナ大統領国会演説(オンライン)',
+ 'release_date': '20220323',
+ 'chapters': 'count:4',
+ }
+ }, {
+ 'url': 'https://www.shugiintv.go.jp/en/index.php?ex=VL&media_type=&deli_id=53846',
+ 'only_matching': True
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(
+ f'https://www.shugiintv.go.jp/jp/index.php?ex=VL&media_type=&deli_id={video_id}', video_id,
+ encoding='euc-jp')
+
+ m3u8_url = self._search_regex(
+ r'id="vtag_src_base_vod"\s*value="(http.+?\.m3u8)"', webpage, 'm3u8 url')
+ m3u8_url = re.sub(r'^http://', 'https://', m3u8_url)
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ m3u8_url, video_id, ext='mp4')
+
+ title = self._html_search_regex(
+ (r'<td\s+align="left">(.+)\s*\(\d+分\)',
+ r'<TD.+?<IMG\s*src=".+?/spacer\.gif".+?height="15">(.+?)<IMG'), webpage, 'title', fatal=False)
+
+ release_date = _parse_japanese_date(self._html_search_regex(
+ r'開会日</td>\s*<td.+?/td>\s*<TD>(.+?)</TD>',
+ webpage, 'title', fatal=False))
+
+ chapters = []
+ for chp in re.finditer(r'(?i)<A\s+HREF="([^"]+?)"\s*class="play_vod">(?!<img)(.+)</[Aa]>', webpage):
+ chapters.append({
+ 'title': clean_html(chp.group(2)).strip(),
+ 'start_time': try_call(lambda: float(parse_qs(chp.group(1))['time'][0].strip())),
+ })
+ # NOTE: there are blanks at the first and the end of the videos,
+ # so getting/providing the video duration is not possible
+ # also, the exact end_time for the last chapter is unknown (we can get at most minutes of granularity)
+ last_tr = re.findall(r'(?s)<TR\s*class="s14_24">(.+?)</TR>', webpage)[-1]
+ if last_tr and chapters:
+ last_td = re.findall(r'<TD.+?</TD>', last_tr)[-1]
+ if last_td:
+ chapters[-1]['end_time'] = chapters[-1]['start_time'] + _parse_japanese_duration(clean_html(last_td))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'release_date': release_date,
+ 'chapters': chapters,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+
+class SangiinInstructionIE(InfoExtractor):
+ _VALID_URL = r'^https?://www\.webtv\.sangiin\.go\.jp/webtv/index\.php'
+ IE_DESC = False # this shouldn't be listed as a supported site
+
+ def _real_extract(self, url):
+ raise ExtractorError('Copy the link from the botton below the video description or player, and use the link to download. If there are no button in the frame, get the URL of the frame showing the video.', expected=True)
+
+
+class SangiinIE(InfoExtractor):
+ _VALID_URL = r'https?://www\.webtv\.sangiin\.go\.jp/webtv/detail\.php\?sid=(?P<id>\d+)'
+ IE_DESC = '参議院インターネット審議中継 (archive)'
+
+ _TESTS = [{
+ 'url': 'https://www.webtv.sangiin.go.jp/webtv/detail.php?sid=7052',
+ 'info_dict': {
+ 'id': '7052',
+ 'title': '2022年10月7日 本会議',
+ 'description': 'md5:0a5fed523f95c88105a0b0bf1dd71489',
+ 'upload_date': '20221007',
+ 'ext': 'mp4',
+ },
+ }, {
+ 'url': 'https://www.webtv.sangiin.go.jp/webtv/detail.php?sid=7037',
+ 'info_dict': {
+ 'id': '7037',
+ 'title': '2022年10月3日 開会式',
+ 'upload_date': '20221003',
+ 'ext': 'mp4',
+ },
+ }, {
+ 'url': 'https://www.webtv.sangiin.go.jp/webtv/detail.php?sid=7076',
+ 'info_dict': {
+ 'id': '7076',
+ 'title': '2022年10月27日 法務委員会',
+ 'upload_date': '20221027',
+ 'ext': 'mp4',
+ 'is_live': True,
+ },
+ 'skip': 'this live is turned into archive after it ends',
+ }, ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ date = self._html_search_regex(
+ r'<dt[^>]*>\s*開会日\s*</dt>\s*<dd[^>]*>\s*(.+?)\s*</dd>', webpage,
+ 'date', fatal=False)
+ upload_date = _parse_japanese_date(date)
+
+ title = self._html_search_regex(
+ r'<dt[^>]*>\s*会議名\s*</dt>\s*<dd[^>]*>\s*(.+?)\s*</dd>', webpage,
+ 'date', fatal=False)
+
+ # some videos don't have the elements, so assume it's missing
+ description = self._html_search_regex(
+ r'会議の経過\s*</h3>\s*<span[^>]*>(.+?)</span>', webpage,
+ 'description', default=None)
+
+ # this row appears only when it's livestream
+ is_live = bool(self._html_search_regex(
+ r'<dt[^>]*>\s*公報掲載時刻\s*</dt>\s*<dd[^>]*>\s*(.+?)\s*</dd>', webpage,
+ 'is_live', default=None))
+
+ m3u8_url = self._search_regex(
+ r'var\s+videopath\s*=\s*(["\'])([^"\']+)\1', webpage,
+ 'm3u8 url', group=2)
+
+ formats, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4')
+
+ return {
+ 'id': video_id,
+ 'title': join_nonempty(date, title, delim=' '),
+ 'description': description,
+ 'upload_date': upload_date,
+ 'formats': formats,
+ 'subtitles': subs,
+ 'is_live': is_live,
+ }
diff --git a/yt_dlp/extractor/jeuxvideo.py b/yt_dlp/extractor/jeuxvideo.py
new file mode 100644
index 0000000..7938206
--- /dev/null
+++ b/yt_dlp/extractor/jeuxvideo.py
@@ -0,0 +1,52 @@
+from .common import InfoExtractor
+
+
+class JeuxVideoIE(InfoExtractor):
+ _WORKING = False
+ _ENABLED = None # XXX: pass through to GenericIE
+ _VALID_URL = r'https?://.*?\.jeuxvideo\.com/.*/(.*?)\.htm'
+
+ _TESTS = [{
+ 'url': 'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
+ 'md5': '046e491afb32a8aaac1f44dd4ddd54ee',
+ 'info_dict': {
+ 'id': '114765',
+ 'ext': 'mp4',
+ 'title': 'Tearaway : GC 2013 : Tearaway nous présente ses papiers d\'identité',
+ 'description': 'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.',
+ },
+ }, {
+ 'url': 'http://www.jeuxvideo.com/videos/chroniques/434220/l-histoire-du-jeu-video-la-saturn.htm',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ title = mobj.group(1)
+ webpage = self._download_webpage(url, title)
+ title = self._html_search_meta('name', webpage) or self._og_search_title(webpage)
+ config_url = self._html_search_regex(
+ r'data-src(?:set-video)?="(/contenu/medias/video\.php.*?)"',
+ webpage, 'config URL')
+ config_url = 'http://www.jeuxvideo.com' + config_url
+
+ video_id = self._search_regex(
+ r'id=(\d+)',
+ config_url, 'video ID')
+
+ config = self._download_json(
+ config_url, title, 'Downloading JSON config')
+
+ formats = [{
+ 'url': source['file'],
+ 'format_id': source['label'],
+ 'resolution': source['label'],
+ } for source in reversed(config['sources'])]
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'description': self._og_search_description(webpage),
+ 'thumbnail': config.get('image'),
+ }
diff --git a/yt_dlp/extractor/jiosaavn.py b/yt_dlp/extractor/jiosaavn.py
new file mode 100644
index 0000000..a592098
--- /dev/null
+++ b/yt_dlp/extractor/jiosaavn.py
@@ -0,0 +1,105 @@
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ js_to_json,
+ url_or_none,
+ urlencode_postdata,
+ urljoin,
+)
+from ..utils.traversal import traverse_obj
+
+
+class JioSaavnBaseIE(InfoExtractor):
+ def _extract_initial_data(self, url, audio_id):
+ webpage = self._download_webpage(url, audio_id)
+ return self._search_json(
+ r'window\.__INITIAL_DATA__\s*=', webpage,
+ 'init json', audio_id, transform_source=js_to_json)
+
+
+class JioSaavnSongIE(JioSaavnBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?(?:jiosaavn\.com/song/[^/?#]+/|saavn\.com/s/song/(?:[^/?#]+/){3})(?P<id>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'https://www.jiosaavn.com/song/leja-re/OQsEfQFVUXk',
+ 'md5': '3b84396d15ed9e083c3106f1fa589c04',
+ 'info_dict': {
+ 'id': 'OQsEfQFVUXk',
+ 'ext': 'mp4',
+ 'title': 'Leja Re',
+ 'album': 'Leja Re',
+ 'thumbnail': 'https://c.saavncdn.com/258/Leja-Re-Hindi-2018-20181124024539-500x500.jpg',
+ 'duration': 205,
+ 'view_count': int,
+ 'release_year': 2018,
+ },
+ }, {
+ 'url': 'https://www.saavn.com/s/song/hindi/Saathiya/O-Humdum-Suniyo-Re/KAMiazoCblU',
+ 'only_matching': True,
+ }]
+
+ _VALID_BITRATES = ('16', '32', '64', '128', '320')
+
+ def _real_extract(self, url):
+ audio_id = self._match_id(url)
+ extract_bitrates = self._configuration_arg('bitrate', ['128', '320'], ie_key='JioSaavn')
+ if invalid_bitrates := [br for br in extract_bitrates if br not in self._VALID_BITRATES]:
+ raise ValueError(
+ f'Invalid bitrate(s): {", ".join(invalid_bitrates)}. '
+ + f'Valid bitrates are: {", ".join(self._VALID_BITRATES)}')
+
+ song_data = self._extract_initial_data(url, audio_id)['song']['song']
+ formats = []
+ for bitrate in extract_bitrates:
+ media_data = self._download_json(
+ 'https://www.jiosaavn.com/api.php', audio_id, f'Downloading format info for {bitrate}',
+ fatal=False, data=urlencode_postdata({
+ '__call': 'song.generateAuthToken',
+ '_format': 'json',
+ 'bitrate': bitrate,
+ 'url': song_data['encrypted_media_url'],
+ }))
+ if not media_data.get('auth_url'):
+ self.report_warning(f'Unable to extract format info for {bitrate}')
+ continue
+ formats.append({
+ 'url': media_data['auth_url'],
+ 'ext': media_data.get('type'),
+ 'format_id': bitrate,
+ 'abr': int(bitrate),
+ 'vcodec': 'none',
+ })
+
+ return {
+ 'id': audio_id,
+ 'formats': formats,
+ **traverse_obj(song_data, {
+ 'title': ('title', 'text'),
+ 'album': ('album', 'text'),
+ 'thumbnail': ('image', 0, {url_or_none}),
+ 'duration': ('duration', {int_or_none}),
+ 'view_count': ('play_count', {int_or_none}),
+ 'release_year': ('year', {int_or_none}),
+ }),
+ }
+
+
+class JioSaavnAlbumIE(JioSaavnBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?(?:jio)?saavn\.com/album/[^/?#]+/(?P<id>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'https://www.jiosaavn.com/album/96/buIOjYZDrNA_',
+ 'info_dict': {
+ 'id': 'buIOjYZDrNA_',
+ 'title': '96',
+ },
+ 'playlist_count': 10,
+ }]
+
+ def _real_extract(self, url):
+ album_id = self._match_id(url)
+ album_view = self._extract_initial_data(url, album_id)['albumView']
+
+ return self.playlist_from_matches(
+ traverse_obj(album_view, (
+ 'modules', lambda _, x: x['key'] == 'list', 'data', ..., 'title', 'action', {str})),
+ album_id, traverse_obj(album_view, ('album', 'title', 'text', {str})), ie=JioSaavnSongIE,
+ getter=lambda x: urljoin('https://www.jiosaavn.com/', x))
diff --git a/yt_dlp/extractor/jixie.py b/yt_dlp/extractor/jixie.py
new file mode 100644
index 0000000..4830e61
--- /dev/null
+++ b/yt_dlp/extractor/jixie.py
@@ -0,0 +1,47 @@
+from .common import InfoExtractor
+from ..utils import clean_html, float_or_none, traverse_obj, try_call
+
+
+class JixieBaseIE(InfoExtractor):
+ """
+ API Reference:
+ https://jixie.atlassian.net/servicedesk/customer/portal/2/article/1339654214?src=-1456335525,
+ https://scripts.jixie.media/jxvideo.3.1.min.js
+ """
+
+ def _extract_data_from_jixie_id(self, display_id, video_id, webpage):
+ json_data = self._download_json(
+ 'https://apidam.jixie.io/api/public/stream', display_id,
+ query={'metadata': 'full', 'video_id': video_id})['data']
+
+ formats, subtitles = [], {}
+ for stream in json_data['streams']:
+ if stream.get('type') == 'HLS':
+ fmt, sub = self._extract_m3u8_formats_and_subtitles(stream.get('url'), display_id, ext='mp4')
+ if json_data.get('drm'):
+ for f in fmt:
+ f['has_drm'] = True
+ formats.extend(fmt)
+ self._merge_subtitles(sub, target=subtitles)
+ else:
+ formats.append({
+ 'url': stream.get('url'),
+ 'width': stream.get('width'),
+ 'height': stream.get('height'),
+ 'ext': 'mp4',
+ })
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'title': json_data.get('title') or self._html_search_meta(['og:title', 'twitter:title'], webpage),
+ 'description': (clean_html(traverse_obj(json_data, ('metadata', 'description')))
+ or self._html_search_meta(['description', 'og:description', 'twitter:description'], webpage)),
+ 'thumbnails': traverse_obj(json_data, ('metadata', 'thumbnails')),
+ 'duration': float_or_none(traverse_obj(json_data, ('metadata', 'duration'))),
+ 'tags': try_call(lambda: (json_data['metadata']['keywords'] or None).split(',')),
+ 'categories': try_call(lambda: (json_data['metadata']['categories'] or None).split(',')),
+ 'uploader_id': json_data.get('owner_id'),
+ }
diff --git a/yt_dlp/extractor/joj.py b/yt_dlp/extractor/joj.py
new file mode 100644
index 0000000..ea46042
--- /dev/null
+++ b/yt_dlp/extractor/joj.py
@@ -0,0 +1,108 @@
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ format_field,
+ int_or_none,
+ js_to_json,
+ try_get,
+)
+
+
+class JojIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ (?:
+ joj:|
+ https?://media\.joj\.sk/embed/
+ )
+ (?P<id>[^/?#^]+)
+ '''
+ _EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//media\.joj\.sk/embed/(?:(?!\1).)+)\1']
+ _TESTS = [{
+ 'url': 'https://media.joj.sk/embed/a388ec4c-6019-4a4a-9312-b1bee194e932',
+ 'info_dict': {
+ 'id': 'a388ec4c-6019-4a4a-9312-b1bee194e932',
+ 'ext': 'mp4',
+ 'title': 'NOVÉ BÝVANIE',
+ 'thumbnail': r're:^https?://.*?$',
+ 'duration': 3118,
+ }
+ }, {
+ 'url': 'https://media.joj.sk/embed/CSM0Na0l0p1',
+ 'info_dict': {
+ 'id': 'CSM0Na0l0p1',
+ 'ext': 'mp4',
+ 'height': 576,
+ 'title': 'Extrémne rodiny 2 - POKRAČOVANIE (2012/04/09 21:30:00)',
+ 'duration': 3937,
+ 'thumbnail': r're:^https?://.*?$',
+ }
+ }, {
+ 'url': 'https://media.joj.sk/embed/9i1cxv',
+ 'only_matching': True,
+ }, {
+ 'url': 'joj:a388ec4c-6019-4a4a-9312-b1bee194e932',
+ 'only_matching': True,
+ }, {
+ 'url': 'joj:9i1cxv',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'https://media.joj.sk/embed/%s' % video_id, video_id)
+
+ title = (self._search_json(r'videoTitle\s*:', webpage, 'title', video_id,
+ contains_pattern=r'["\'].+["\']', default=None)
+ or self._html_extract_title(webpage, default=None)
+ or self._og_search_title(webpage))
+
+ bitrates = self._parse_json(
+ self._search_regex(
+ r'(?s)(?:src|bitrates)\s*=\s*({.+?});', webpage, 'bitrates',
+ default='{}'),
+ video_id, transform_source=js_to_json, fatal=False)
+
+ formats = []
+ for format_url in try_get(bitrates, lambda x: x['mp4'], list) or []:
+ if isinstance(format_url, compat_str):
+ height = self._search_regex(
+ r'(\d+)[pP]|(pal)\.', format_url, 'height', default=None)
+ if height == 'pal':
+ height = 576
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_field(height, None, '%sp'),
+ 'height': int_or_none(height),
+ })
+ if not formats:
+ playlist = self._download_xml(
+ 'https://media.joj.sk/services/Video.php?clip=%s' % video_id,
+ video_id)
+ for file_el in playlist.findall('./files/file'):
+ path = file_el.get('path')
+ if not path:
+ continue
+ format_id = file_el.get('id') or file_el.get('label')
+ formats.append({
+ 'url': 'http://n16.joj.sk/storage/%s' % path.replace(
+ 'dat/', '', 1),
+ 'format_id': format_id,
+ 'height': int_or_none(self._search_regex(
+ r'(\d+)[pP]', format_id or path, 'height',
+ default=None)),
+ })
+
+ thumbnail = self._og_search_thumbnail(webpage)
+
+ duration = int_or_none(self._search_regex(
+ r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/joqrag.py b/yt_dlp/extractor/joqrag.py
new file mode 100644
index 0000000..3bb28af
--- /dev/null
+++ b/yt_dlp/extractor/joqrag.py
@@ -0,0 +1,112 @@
+import datetime
+import urllib.parse
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ datetime_from_str,
+ unified_timestamp,
+ urljoin,
+)
+
+
+class JoqrAgIE(InfoExtractor):
+ IE_DESC = '超!A&G+ 文化放送 (f.k.a. AGQR) Nippon Cultural Broadcasting, Inc. (JOQR)'
+ _VALID_URL = [r'https?://www\.uniqueradio\.jp/agplayer5/(?:player|inc-player-hls)\.php',
+ r'https?://(?:www\.)?joqr\.co\.jp/ag/',
+ r'https?://(?:www\.)?joqr\.co\.jp/qr/ag(?:daily|regular)program/?(?:$|[#?])']
+ _TESTS = [{
+ 'url': 'https://www.uniqueradio.jp/agplayer5/player.php',
+ 'info_dict': {
+ 'id': 'live',
+ 'title': str,
+ 'channel': '超!A&G+',
+ 'description': str,
+ 'live_status': 'is_live',
+ 'release_timestamp': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ 'ignore_no_formats_error': True,
+ },
+ }, {
+ 'url': 'https://www.uniqueradio.jp/agplayer5/inc-player-hls.php',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.joqr.co.jp/ag/article/103760/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.joqr.co.jp/qr/agdailyprogram/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.joqr.co.jp/qr/agregularprogram/',
+ 'only_matching': True,
+ }]
+
+ def _extract_metadata(self, variable, html):
+ return clean_html(urllib.parse.unquote_plus(self._search_regex(
+ rf'var\s+{variable}\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
+ html, 'metadata', group='value', default=''))) or None
+
+ def _extract_start_timestamp(self, video_id, is_live):
+ def extract_start_time_from(date_str):
+ dt = datetime_from_str(date_str) + datetime.timedelta(hours=9)
+ date = dt.strftime('%Y%m%d')
+ start_time = self._search_regex(
+ r'<h3[^>]+\bclass="dailyProgram-itemHeaderTime"[^>]*>[\s\d:]+–\s*(\d{1,2}:\d{1,2})',
+ self._download_webpage(
+ f'https://www.joqr.co.jp/qr/agdailyprogram/?date={date}', video_id,
+ note=f'Downloading program list of {date}', fatal=False,
+ errnote=f'Failed to download program list of {date}') or '',
+ 'start time', default=None)
+ if start_time:
+ return unified_timestamp(f'{dt.strftime("%Y/%m/%d")} {start_time} +09:00')
+ return None
+
+ start_timestamp = extract_start_time_from('today')
+ if not start_timestamp:
+ return None
+
+ if not is_live or start_timestamp < datetime_from_str('now').timestamp():
+ return start_timestamp
+ else:
+ return extract_start_time_from('yesterday')
+
+ def _real_extract(self, url):
+ video_id = 'live'
+
+ metadata = self._download_webpage(
+ 'https://www.uniqueradio.jp/aandg', video_id,
+ note='Downloading metadata', errnote='Failed to download metadata')
+ title = self._extract_metadata('Program_name', metadata)
+
+ if title == '放送休止':
+ formats = []
+ live_status = 'is_upcoming'
+ release_timestamp = self._extract_start_timestamp(video_id, False)
+ msg = 'This stream is not currently live'
+ if release_timestamp:
+ msg += (' and will start at '
+ + datetime.datetime.fromtimestamp(release_timestamp).strftime('%Y-%m-%d %H:%M:%S'))
+ self.raise_no_formats(msg, expected=True)
+ else:
+ m3u8_path = self._search_regex(
+ r'<source\s[^>]*\bsrc="([^"]+)"',
+ self._download_webpage(
+ 'https://www.uniqueradio.jp/agplayer5/inc-player-hls.php', video_id,
+ note='Downloading player data', errnote='Failed to download player data'),
+ 'm3u8 url')
+ formats = self._extract_m3u8_formats(
+ urljoin('https://www.uniqueradio.jp/', m3u8_path), video_id)
+ live_status = 'is_live'
+ release_timestamp = self._extract_start_timestamp(video_id, True)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'channel': '超!A&G+',
+ 'description': self._extract_metadata('Program_text', metadata),
+ 'formats': formats,
+ 'live_status': live_status,
+ 'release_timestamp': release_timestamp,
+ }
diff --git a/yt_dlp/extractor/jove.py b/yt_dlp/extractor/jove.py
new file mode 100644
index 0000000..245fe73
--- /dev/null
+++ b/yt_dlp/extractor/jove.py
@@ -0,0 +1,76 @@
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ unified_strdate
+)
+
+
+class JoveIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?jove\.com/video/(?P<id>[0-9]+)'
+ _CHAPTERS_URL = 'http://www.jove.com/video-chapters?videoid={video_id:}'
+ _TESTS = [
+ {
+ 'url': 'http://www.jove.com/video/2744/electrode-positioning-montage-transcranial-direct-current',
+ 'md5': '93723888d82dbd6ba8b3d7d0cd65dd2b',
+ 'info_dict': {
+ 'id': '2744',
+ 'ext': 'mp4',
+ 'title': 'Electrode Positioning and Montage in Transcranial Direct Current Stimulation',
+ 'description': 'md5:015dd4509649c0908bc27f049e0262c6',
+ 'thumbnail': r're:^https?://.*\.png$',
+ 'upload_date': '20110523',
+ }
+ },
+ {
+ 'url': 'http://www.jove.com/video/51796/culturing-caenorhabditis-elegans-axenic-liquid-media-creation',
+ 'md5': '914aeb356f416811d911996434811beb',
+ 'info_dict': {
+ 'id': '51796',
+ 'ext': 'mp4',
+ 'title': 'Culturing Caenorhabditis elegans in Axenic Liquid Media and Creation of Transgenic Worms by Microparticle Bombardment',
+ 'description': 'md5:35ff029261900583970c4023b70f1dc9',
+ 'thumbnail': r're:^https?://.*\.png$',
+ 'upload_date': '20140802',
+ }
+ },
+
+ ]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, video_id)
+
+ chapters_id = self._html_search_regex(
+ r'/video-chapters\?videoid=([0-9]+)', webpage, 'chapters id')
+
+ chapters_xml = self._download_xml(
+ self._CHAPTERS_URL.format(video_id=chapters_id),
+ video_id, note='Downloading chapters XML',
+ errnote='Failed to download chapters XML')
+
+ video_url = chapters_xml.attrib.get('video')
+ if not video_url:
+ raise ExtractorError('Failed to get the video URL')
+
+ title = self._html_search_meta('citation_title', webpage, 'title')
+ thumbnail = self._og_search_thumbnail(webpage)
+ description = self._html_search_regex(
+ r'<div id="section_body_summary"><p class="jove_content">(.+?)</p>',
+ webpage, 'description', fatal=False)
+ publish_date = unified_strdate(self._html_search_meta(
+ 'citation_publication_date', webpage, 'publish date', fatal=False))
+ comment_count = int(self._html_search_regex(
+ r'<meta name="num_comments" content="(\d+) Comments?"',
+ webpage, 'comment count', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'url': video_url,
+ 'thumbnail': thumbnail,
+ 'description': description,
+ 'upload_date': publish_date,
+ 'comment_count': comment_count,
+ }
diff --git a/yt_dlp/extractor/jstream.py b/yt_dlp/extractor/jstream.py
new file mode 100644
index 0000000..3e2e627
--- /dev/null
+++ b/yt_dlp/extractor/jstream.py
@@ -0,0 +1,73 @@
+import base64
+import re
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ float_or_none,
+ js_to_json,
+ remove_start,
+)
+
+
+class JStreamIE(InfoExtractor):
+ # group "id" only exists for compliance, not directly used in requests
+ # also all components are mandatory
+ _VALID_URL = r'jstream:(?P<host>www\d+):(?P<id>(?P<publisher>[a-z0-9]+):(?P<mid>\d+))'
+
+ _TESTS = [{
+ 'url': 'jstream:www50:eqd638pvwx:752',
+ 'info_dict': {
+ 'id': 'eqd638pvwx:752',
+ 'ext': 'mp4',
+ 'title': '阪神淡路大震災 激震の記録2020年版 解説動画',
+ 'duration': 672,
+ 'thumbnail': r're:https?://eqd638pvwx\.eq\.webcdn\.stream\.ne\.jp/.+\.jpg',
+ },
+ }]
+
+ def _parse_jsonp(self, callback, string, video_id):
+ return self._search_json(rf'\s*{re.escape(callback)}\s*\(', string, callback, video_id)
+
+ def _find_formats(self, video_id, movie_list_hls, host, publisher, subtitles):
+ for value in movie_list_hls:
+ text = value.get('text') or ''
+ if not text.startswith('auto'):
+ continue
+ m3u8_id = remove_start(remove_start(text, 'auto'), '_') or None
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ f'https://{publisher}.eq.webcdn.stream.ne.jp/{host}/{publisher}/jmc_pub/{value.get("url")}', video_id, 'mp4', m3u8_id=m3u8_id)
+ self._merge_subtitles(subs, target=subtitles)
+ yield from fmts
+
+ def _real_extract(self, url):
+ host, publisher, mid, video_id = self._match_valid_url(url).group('host', 'publisher', 'mid', 'id')
+ video_info_jsonp = self._download_webpage(
+ f'https://{publisher}.eq.webcdn.stream.ne.jp/{host}/{publisher}/jmc_pub/eq_meta/v1/{mid}.jsonp',
+ video_id, 'Requesting video info')
+ video_info = self._parse_jsonp('metaDataResult', video_info_jsonp, video_id)['movie']
+ subtitles = {}
+ formats = list(self._find_formats(video_id, video_info.get('movie_list_hls'), host, publisher, subtitles))
+ self._remove_duplicate_formats(formats)
+ return {
+ 'id': video_id,
+ 'title': video_info.get('title'),
+ 'duration': float_or_none(video_info.get('duration')),
+ 'thumbnail': video_info.get('thumbnail_url'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ # check for eligiblity of webpage
+ # https://support.eq.stream.co.jp/hc/ja/articles/115008388147-%E3%83%97%E3%83%AC%E3%82%A4%E3%83%A4%E3%83%BCAPI%E3%81%AE%E3%82%B5%E3%83%B3%E3%83%97%E3%83%AB%E3%82%B3%E3%83%BC%E3%83%89
+ script_tag = re.search(r'<script\s*[^>]+?src="https://ssl-cache\.stream\.ne\.jp/(?P<host>www\d+)/(?P<publisher>[a-z0-9]+)/[^"]+?/if\.js"', webpage)
+ if not script_tag:
+ return
+ host, publisher = script_tag.groups()
+ for m in re.finditer(r'(?s)PlayerFactoryIF\.create\(\s*({[^\}]+?})\s*\)\s*;', webpage):
+ # TODO: using json.loads here as InfoExtractor._parse_json is not classmethod
+ info = json.loads(js_to_json(m.group(1)))
+ mid = base64.b64decode(info.get('m')).decode()
+ yield f'jstream:{host}:{publisher}:{mid}'
diff --git a/yt_dlp/extractor/jtbc.py b/yt_dlp/extractor/jtbc.py
new file mode 100644
index 0000000..573f749
--- /dev/null
+++ b/yt_dlp/extractor/jtbc.py
@@ -0,0 +1,156 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_duration,
+ url_or_none,
+)
+from ..utils.traversal import traverse_obj
+
+
+class JTBCIE(InfoExtractor):
+ IE_DESC = 'jtbc.co.kr'
+ _VALID_URL = r'''(?x)
+ https?://(?:
+ vod\.jtbc\.co\.kr/player/(?:program|clip)
+ |tv\.jtbc\.co\.kr/(?:replay|trailer|clip)/pr\d+/pm\d+
+ )/(?P<id>(?:ep|vo)\d+)'''
+ _GEO_COUNTRIES = ['KR']
+
+ _TESTS = [{
+ 'url': 'https://tv.jtbc.co.kr/replay/pr10011629/pm10067930/ep20216321/view',
+ 'md5': 'e6ade71d8c8685bbfd6e6ce4167c6a6c',
+ 'info_dict': {
+ 'id': 'VO10721192',
+ 'display_id': 'ep20216321',
+ 'ext': 'mp4',
+ 'title': '힘쎈여자 강남순 2회 다시보기',
+ 'description': 'md5:043c1d9019100ce271dba09995dbd1e2',
+ 'duration': 3770.0,
+ 'release_date': '20231008',
+ 'age_limit': 15,
+ 'thumbnail': 'https://fs.jtbc.co.kr//joydata/CP00000001/prog/drama/stronggirlnamsoon/img/20231008_163541_522_1.jpg',
+ 'series': '힘쎈여자 강남순',
+ },
+ }, {
+ 'url': 'https://vod.jtbc.co.kr/player/program/ep20216733',
+ 'md5': '217a6d190f115a75e4bda0ceaa4cd7f4',
+ 'info_dict': {
+ 'id': 'VO10721429',
+ 'display_id': 'ep20216733',
+ 'ext': 'mp4',
+ 'title': '헬로 마이 닥터 친절한 진료실 149회 다시보기',
+ 'description': 'md5:1d70788a982dd5de26874a92fcffddb8',
+ 'duration': 2720.0,
+ 'release_date': '20231009',
+ 'age_limit': 15,
+ 'thumbnail': 'https://fs.jtbc.co.kr//joydata/CP00000001/prog/culture/hellomydoctor/img/20231009_095002_528_1.jpg',
+ 'series': '헬로 마이 닥터 친절한 진료실',
+ },
+ }, {
+ 'url': 'https://vod.jtbc.co.kr/player/clip/vo10721270',
+ 'md5': '05782e2dc22a9c548aebefe62ae4328a',
+ 'info_dict': {
+ 'id': 'VO10721270',
+ 'display_id': 'vo10721270',
+ 'ext': 'mp4',
+ 'title': '뭉쳐야 찬다3 2회 예고편 - A매치로 향하는 마지막 관문💥',
+ 'description': 'md5:d48b51a8655c84843b4ed8d0c39aae68',
+ 'duration': 46.0,
+ 'release_date': '20231015',
+ 'age_limit': 15,
+ 'thumbnail': 'https://fs.jtbc.co.kr//joydata/CP00000001/prog/enter/soccer3/img/20231008_210957_775_1.jpg',
+ 'series': '뭉쳐야 찬다3',
+ },
+ }, {
+ 'url': 'https://tv.jtbc.co.kr/trailer/pr10010392/pm10032526/vo10720912/view',
+ 'md5': '367d480eb3ef54a9cd7a4b4d69c4b32d',
+ 'info_dict': {
+ 'id': 'VO10720912',
+ 'display_id': 'vo10720912',
+ 'ext': 'mp4',
+ 'title': '아는 형님 404회 예고편 | 10월 14일(토) 저녁 8시 50분 방송!',
+ 'description': 'md5:2743bb1079ceb85bb00060f2ad8f0280',
+ 'duration': 148.0,
+ 'release_date': '20231014',
+ 'age_limit': 15,
+ 'thumbnail': 'https://fs.jtbc.co.kr//joydata/CP00000001/prog/enter/jtbcbros/img/20231006_230023_802_1.jpg',
+ 'series': '아는 형님',
+ },
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ if display_id.startswith('vo'):
+ video_id = display_id.upper()
+ else:
+ webpage = self._download_webpage(url, display_id)
+ video_id = self._search_regex(r'data-vod="(VO\d+)"', webpage, 'vod id')
+
+ playback_data = self._download_json(
+ f'https://api.jtbc.co.kr/vod/{video_id}', video_id, note='Downloading VOD playback data')
+
+ subtitles = {}
+ for sub in traverse_obj(playback_data, ('tracks', lambda _, v: v['file'])):
+ subtitles.setdefault(sub.get('label', 'und'), []).append({'url': sub['file']})
+
+ formats = []
+ for stream_url in traverse_obj(playback_data, ('sources', 'HLS', ..., 'file', {url_or_none})):
+ stream_url = re.sub(r'/playlist(?:_pd\d+)?\.m3u8', '/index.m3u8', stream_url)
+ formats.extend(self._extract_m3u8_formats(stream_url, video_id, fatal=False))
+
+ metadata = self._download_json(
+ 'https://now-api.jtbc.co.kr/v1/vod/detail', video_id,
+ note='Downloading mobile details', fatal=False, query={'vodFileId': video_id})
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ **traverse_obj(metadata, ('vodDetail', {
+ 'title': 'vodTitleView',
+ 'series': 'programTitle',
+ 'age_limit': ('watchAge', {int_or_none}),
+ 'release_date': ('broadcastDate', {lambda x: re.match(r'\d{8}', x.replace('.', ''))}, 0),
+ 'description': 'episodeContents',
+ 'thumbnail': ('imgFileUrl', {url_or_none}),
+ })),
+ 'duration': parse_duration(playback_data.get('playTime')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+
+class JTBCProgramIE(InfoExtractor):
+ IE_NAME = 'JTBC:program'
+ _VALID_URL = r'https?://(?:vod\.jtbc\.co\.kr/program|tv\.jtbc\.co\.kr/replay)/(?P<id>pr\d+)/(?:replay|pm\d+)/?(?:$|[?#])'
+
+ _TESTS = [{
+ 'url': 'https://tv.jtbc.co.kr/replay/pr10010392/pm10032710',
+ 'info_dict': {
+ '_type': 'playlist',
+ 'id': 'pr10010392',
+ },
+ 'playlist_count': 398,
+ }, {
+ 'url': 'https://vod.jtbc.co.kr/program/pr10011491/replay',
+ 'info_dict': {
+ '_type': 'playlist',
+ 'id': 'pr10011491',
+ },
+ 'playlist_count': 59,
+ }]
+
+ def _real_extract(self, url):
+ program_id = self._match_id(url)
+
+ vod_list = self._download_json(
+ 'https://now-api.jtbc.co.kr/v1/vodClip/programHome/programReplayVodList', program_id,
+ note='Downloading program replay list', query={
+ 'programId': program_id,
+ 'rowCount': '10000',
+ })
+
+ entries = [self.url_result(f'https://vod.jtbc.co.kr/player/program/{video_id}', JTBCIE, video_id)
+ for video_id in traverse_obj(vod_list, ('programReplayVodList', ..., 'episodeId'))]
+ return self.playlist_result(entries, program_id)
diff --git a/yt_dlp/extractor/jwplatform.py b/yt_dlp/extractor/jwplatform.py
new file mode 100644
index 0000000..bc47aa6
--- /dev/null
+++ b/yt_dlp/extractor/jwplatform.py
@@ -0,0 +1,90 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import unsmuggle_url
+
+
+class JWPlatformIE(InfoExtractor):
+ _VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview|manifest)s|jw6|v2/media)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
+ _TESTS = [{
+ 'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js',
+ 'md5': '3aa16e4f6860e6e78b7df5829519aed3',
+ 'info_dict': {
+ 'id': 'nPripu9l',
+ 'ext': 'mp4',
+ 'title': 'Big Buck Bunny Trailer',
+ 'description': 'Big Buck Bunny is a short animated film by the Blender Institute. It is made using free and open source software.',
+ 'upload_date': '20081127',
+ 'timestamp': 1227796140,
+ 'duration': 32.0,
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/nPripu9l/poster.jpg?width=720',
+ }
+ }, {
+ 'url': 'https://cdn.jwplayer.com/players/nPripu9l-ALJ3XQCI.js',
+ 'only_matching': True,
+ }]
+
+ _WEBPAGE_TESTS = [{
+ # JWPlatform iframe
+ 'url': 'https://www.covermagazine.co.uk/feature/2465255/business-protection-involved',
+ 'info_dict': {
+ 'id': 'AG26UQXM',
+ 'ext': 'mp4',
+ 'upload_date': '20160719',
+ 'timestamp': 1468923808,
+ 'title': '2016_05_18 Cover L&G Business Protection V1 FINAL.mp4',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/AG26UQXM/poster.jpg?width=720',
+ 'description': '',
+ 'duration': 294.0,
+ },
+ }, {
+ # Player url not surrounded by quotes
+ 'url': 'https://www.deutsche-kinemathek.de/en/online/streaming/school-trip',
+ 'info_dict': {
+ 'id': 'jUxh5uin',
+ 'title': 'Klassenfahrt',
+ 'ext': 'mp4',
+ 'upload_date': '20230109',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/jUxh5uin/poster.jpg?width=720',
+ 'timestamp': 1673270298,
+ 'description': '',
+ 'duration': 5193.0,
+ },
+ 'params': {'allowed_extractors': ['generic', 'jwplatform']},
+ }, {
+ # iframe src attribute includes backslash before URL string
+ 'url': 'https://www.elespectador.com/colombia/video-asi-se-evito-la-fuga-de-john-poulos-presunto-feminicida-de-valentina-trespalacios-explicacion',
+ 'info_dict': {
+ 'id': 'QD3gsexj',
+ 'title': 'Así se evitó la fuga de John Poulos, presunto feminicida de Valentina Trespalacios',
+ 'ext': 'mp4',
+ 'upload_date': '20230127',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/QD3gsexj/poster.jpg?width=720',
+ 'timestamp': 1674862986,
+ 'description': 'md5:128fd74591c4e1fc2da598c5cb6f5ce4',
+ 'duration': 263.0,
+ },
+ }]
+
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ for tag, key in ((r'(?:script|iframe)', 'src'), ('input', 'value')):
+ # <input value=URL> is used by hyland.com
+ # if we find <iframe>, dont look for <input>
+ ret = re.findall(
+ r'<%s[^>]+?%s=\\?["\']?((?:https?:)?//(?:content\.jwplatform|cdn\.jwplayer)\.com/players/[a-zA-Z0-9]{8})' % (tag, key),
+ webpage)
+ if ret:
+ return ret
+ mobj = re.search(r'<div\b[^>]* data-video-jw-id="([a-zA-Z0-9]{8})"', webpage)
+ if mobj:
+ return [f'jwplatform:{mobj.group(1)}']
+
+ def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+ self._initialize_geo_bypass({
+ 'countries': smuggled_data.get('geo_countries'),
+ })
+ video_id = self._match_id(url)
+ json_data = self._download_json('https://cdn.jwplayer.com/v2/media/' + video_id, video_id)
+ return self._parse_jwplayer_data(json_data, video_id)
diff --git a/yt_dlp/extractor/kakao.py b/yt_dlp/extractor/kakao.py
new file mode 100644
index 0000000..43055e8
--- /dev/null
+++ b/yt_dlp/extractor/kakao.py
@@ -0,0 +1,152 @@
+from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ strip_or_none,
+ str_or_none,
+ traverse_obj,
+ unified_timestamp,
+)
+
+
+class KakaoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:play-)?tv\.kakao\.com/(?:channel/\d+|embed/player)/cliplink/(?P<id>\d+|[^?#&]+@my)'
+ _API_BASE_TMPL = 'http://tv.kakao.com/api/v1/ft/playmeta/cliplink/%s/'
+ _CDN_API = 'https://tv.kakao.com/katz/v1/ft/cliplink/%s/readyNplay?'
+
+ _TESTS = [{
+ 'url': 'http://tv.kakao.com/channel/2671005/cliplink/301965083',
+ 'md5': '702b2fbdeb51ad82f5c904e8c0766340',
+ 'info_dict': {
+ 'id': '301965083',
+ 'ext': 'mp4',
+ 'title': '乃木坂46 バナナマン 「3期生紹介コーナーが始動!顔高低差GPも!」 『乃木坂工事中』',
+ 'description': '',
+ 'uploader_id': '2671005',
+ 'uploader': '그랑그랑이',
+ 'timestamp': 1488160199,
+ 'upload_date': '20170227',
+ 'like_count': int,
+ 'thumbnail': r're:http://.+/thumb\.png',
+ 'tags': ['乃木坂'],
+ 'view_count': int,
+ 'duration': 1503,
+ 'comment_count': int,
+ }
+ }, {
+ 'url': 'http://tv.kakao.com/channel/2653210/cliplink/300103180',
+ 'md5': 'a8917742069a4dd442516b86e7d66529',
+ 'info_dict': {
+ 'id': '300103180',
+ 'ext': 'mp4',
+ 'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\r\n\r\n[쇼! 음악중심] 20160611, 507회',
+ 'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)',
+ 'uploader_id': '2653210',
+ 'uploader': '쇼! 음악중심',
+ 'timestamp': 1485684628,
+ 'upload_date': '20170129',
+ 'like_count': int,
+ 'thumbnail': r're:http://.+/thumb\.png',
+ 'tags': 'count:28',
+ 'view_count': int,
+ 'duration': 184,
+ 'comment_count': int,
+ }
+ }, {
+ # geo restricted
+ 'url': 'https://tv.kakao.com/channel/3643855/cliplink/412069491',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ api_base = self._API_BASE_TMPL % video_id
+ cdn_api_base = self._CDN_API % video_id
+
+ query = {
+ 'player': 'monet_html5',
+ 'referer': url,
+ 'uuid': '',
+ 'service': 'kakao_tv',
+ 'section': '',
+ 'dteType': 'PC',
+ 'fields': ','.join([
+ '-*', 'tid', 'clipLink', 'displayTitle', 'clip', 'title',
+ 'description', 'channelId', 'createTime', 'duration', 'playCount',
+ 'likeCount', 'commentCount', 'tagList', 'channel', 'name',
+ 'clipChapterThumbnailList', 'thumbnailUrl', 'timeInSec', 'isDefault',
+ 'videoOutputList', 'width', 'height', 'kbps', 'profile', 'label'])
+ }
+
+ api_json = self._download_json(
+ api_base, video_id, 'Downloading video info')
+
+ clip_link = api_json['clipLink']
+ clip = clip_link['clip']
+
+ title = clip.get('title') or clip_link.get('displayTitle')
+
+ formats = []
+ for fmt in clip.get('videoOutputList') or []:
+ profile_name = fmt.get('profile')
+ if not profile_name or profile_name == 'AUDIO':
+ continue
+ query.update({
+ 'profile': profile_name,
+ 'fields': '-*,code,message,url',
+ })
+ try:
+ fmt_url_json = self._download_json(
+ cdn_api_base, video_id, query=query,
+ note='Downloading video URL for profile %s' % profile_name)
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 403:
+ resp = self._parse_json(e.cause.response.read().decode(), video_id)
+ if resp.get('code') == 'GeoBlocked':
+ self.raise_geo_restricted()
+ raise
+
+ fmt_url = traverse_obj(fmt_url_json, ('videoLocation', 'url'))
+ if not fmt_url:
+ continue
+
+ formats.append({
+ 'url': fmt_url,
+ 'format_id': profile_name,
+ 'width': int_or_none(fmt.get('width')),
+ 'height': int_or_none(fmt.get('height')),
+ 'format_note': fmt.get('label'),
+ 'filesize': int_or_none(fmt.get('filesize')),
+ 'tbr': int_or_none(fmt.get('kbps')),
+ })
+
+ thumbs = []
+ for thumb in clip.get('clipChapterThumbnailList') or []:
+ thumbs.append({
+ 'url': thumb.get('thumbnailUrl'),
+ 'id': str(thumb.get('timeInSec')),
+ 'preference': -1 if thumb.get('isDefault') else 0
+ })
+ top_thumbnail = clip.get('thumbnailUrl')
+ if top_thumbnail:
+ thumbs.append({
+ 'url': top_thumbnail,
+ 'preference': 10,
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': strip_or_none(clip.get('description')),
+ 'uploader': traverse_obj(clip_link, ('channel', 'name')),
+ 'uploader_id': str_or_none(clip_link.get('channelId')),
+ 'thumbnails': thumbs,
+ 'timestamp': unified_timestamp(clip_link.get('createTime')),
+ 'duration': int_or_none(clip.get('duration')),
+ 'view_count': int_or_none(clip.get('playCount')),
+ 'like_count': int_or_none(clip.get('likeCount')),
+ 'comment_count': int_or_none(clip.get('commentCount')),
+ 'formats': formats,
+ 'tags': clip.get('tagList'),
+ }
diff --git a/yt_dlp/extractor/kaltura.py b/yt_dlp/extractor/kaltura.py
new file mode 100644
index 0000000..95e2dee
--- /dev/null
+++ b/yt_dlp/extractor/kaltura.py
@@ -0,0 +1,545 @@
+import base64
+import json
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_urlparse,
+ compat_parse_qs,
+)
+from ..utils import (
+ clean_html,
+ ExtractorError,
+ format_field,
+ int_or_none,
+ unsmuggle_url,
+ smuggle_url,
+ traverse_obj,
+ remove_start
+)
+
+
+class KalturaIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ (?:
+ kaltura:(?P<partner_id>\w+):(?P<id>\w+)(?::(?P<player_type>\w+))?|
+ https?://
+ (:?(?:www|cdnapi(?:sec)?)\.)?kaltura\.com(?::\d+)?/
+ (?:
+ (?:
+ # flash player
+ index\.php/(?:kwidget|extwidget/preview)|
+ # html5 player
+ html5/html5lib/[^/]+/mwEmbedFrame\.php
+ )
+ )(?:/(?P<path>[^?]+))?(?:\?(?P<query>.*))?
+ )
+ '''
+ _SERVICE_URL = 'http://cdnapi.kaltura.com'
+ _SERVICE_BASE = '/api_v3/service/multirequest'
+ # See https://github.com/kaltura/server/blob/master/plugins/content/caption/base/lib/model/enums/CaptionType.php
+ _CAPTION_TYPES = {
+ 1: 'srt',
+ 2: 'ttml',
+ 3: 'vtt',
+ }
+ _TESTS = [
+ {
+ 'url': 'kaltura:269692:1_1jc2y3e4',
+ 'md5': '3adcbdb3dcc02d647539e53f284ba171',
+ 'info_dict': {
+ 'id': '1_1jc2y3e4',
+ 'ext': 'mp4',
+ 'title': 'Straight from the Heart',
+ 'upload_date': '20131219',
+ 'uploader_id': 'mlundberg@wolfgangsvault.com',
+ 'description': 'The Allman Brothers Band, 12/16/1981',
+ 'thumbnail': 're:^https?://.*/thumbnail/.*',
+ 'timestamp': int,
+ },
+ 'skip': 'The access to this service is forbidden since the specified partner is blocked'
+ },
+ {
+ 'url': 'http://www.kaltura.com/index.php/kwidget/cache_st/1300318621/wid/_269692/uiconf_id/3873291/entry_id/1_1jc2y3e4',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://cdnapisec.kaltura.com/index.php/kwidget/wid/_557781/uiconf_id/22845202/entry_id/1_plr1syf3',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://cdnapisec.kaltura.com/html5/html5lib/v2.30.2/mwEmbedFrame.php/p/1337/uiconf_id/20540612/entry_id/1_sf5ovm7u?wid=_243342',
+ 'only_matching': True,
+ },
+ {
+ # video with subtitles
+ 'url': 'kaltura:111032:1_cw786r8q',
+ 'only_matching': True,
+ },
+ {
+ # video with ttml subtitles (no fileExt)
+ 'url': 'kaltura:1926081:0_l5ye1133',
+ 'info_dict': {
+ 'id': '0_l5ye1133',
+ 'ext': 'mp4',
+ 'title': 'What Can You Do With Python?',
+ 'upload_date': '20160221',
+ 'uploader_id': 'stork',
+ 'thumbnail': 're:^https?://.*/thumbnail/.*',
+ 'timestamp': int,
+ 'subtitles': {
+ 'en': [{
+ 'ext': 'ttml',
+ }],
+ },
+ },
+ 'skip': 'Gone. Maybe https://www.safaribooksonline.com/library/tutorials/introduction-to-python-anon/3469/',
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'https://www.kaltura.com/index.php/extwidget/preview/partner_id/1770401/uiconf_id/37307382/entry_id/0_58u8kme7/embed/iframe?&flashvars[streamerType]=auto',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://www.kaltura.com:443/index.php/extwidget/preview/partner_id/1770401/uiconf_id/37307382/entry_id/0_58u8kme7/embed/iframe?&flashvars[streamerType]=auto',
+ 'only_matching': True,
+ },
+ {
+ # unavailable source format
+ 'url': 'kaltura:513551:1_66x4rg7o',
+ 'only_matching': True,
+ },
+ {
+ # html5lib URL using kwidget player
+ 'url': 'https://cdnapisec.kaltura.com/html5/html5lib/v2.46/mwEmbedFrame.php/p/691292/uiconf_id/20499062/entry_id/0_c076mna6?wid=_691292&iframeembed=true&playerId=kaltura_player_1420508608&entry_id=0_c076mna6&flashvars%5BakamaiHD.loadingPolicy%5D=preInitialize&flashvars%5BakamaiHD.asyncInit%5D=true&flashvars%5BstreamerType%5D=hdnetwork',
+ 'info_dict': {
+ 'id': '0_c076mna6',
+ 'ext': 'mp4',
+ 'title': 'md5:4883e7acbcbf42583a2dddc97dee4855',
+ 'duration': 3608,
+ 'uploader_id': 'commons@swinburne.edu.au',
+ 'timestamp': 1408086874,
+ 'view_count': int,
+ 'upload_date': '20140815',
+ 'thumbnail': 'http://cfvod.kaltura.com/p/691292/sp/69129200/thumbnail/entry_id/0_c076mna6/version/100022',
+ }
+ },
+ {
+ # html5lib playlist URL using kwidget player
+ 'url': 'https://cdnapisec.kaltura.com/html5/html5lib/v2.89/mwEmbedFrame.php/p/2019031/uiconf_id/40436601?wid=1_4j3m32cv&iframeembed=true&playerId=kaltura_player_&flashvars[playlistAPI.kpl0Id]=1_jovey5nu&flashvars[ks]=&&flashvars[imageDefaultDuration]=30&flashvars[localizationCode]=en&flashvars[leadWithHTML5]=true&flashvars[forceMobileHTML5]=true&flashvars[nextPrevBtn.plugin]=true&flashvars[hotspots.plugin]=true&flashvars[sideBarContainer.plugin]=true&flashvars[sideBarContainer.position]=left&flashvars[sideBarContainer.clickToClose]=true&flashvars[chapters.plugin]=true&flashvars[chapters.layout]=vertical&flashvars[chapters.thumbnailRotator]=false&flashvars[streamSelector.plugin]=true&flashvars[EmbedPlayer.SpinnerTarget]=videoHolder&flashvars[dualScreen.plugin]=true&flashvars[playlistAPI.playlistUrl]=https://canvasgatechtest.kaf.kaltura.com/playlist/details/{playlistAPI.kpl0Id}/categoryid/126428551',
+ 'info_dict': {
+ 'id': '1_jovey5nu',
+ 'title': '00-00 Introduction'
+ },
+ 'playlist': [
+ {
+ 'info_dict': {
+ 'id': '1_b1y5hlvx',
+ 'ext': 'mp4',
+ 'title': 'CS7646_00-00 Introductio_Introduction',
+ 'duration': 91,
+ 'thumbnail': 'http://cfvod.kaltura.com/p/2019031/sp/201903100/thumbnail/entry_id/1_b1y5hlvx/version/100001',
+ 'view_count': int,
+ 'timestamp': 1533154447,
+ 'upload_date': '20180801',
+ 'uploader_id': 'djoyner3',
+ }
+ }, {
+ 'info_dict': {
+ 'id': '1_jfb7mdpn',
+ 'ext': 'mp4',
+ 'title': 'CS7646_00-00 Introductio_Three parts to the course',
+ 'duration': 63,
+ 'thumbnail': 'http://cfvod.kaltura.com/p/2019031/sp/201903100/thumbnail/entry_id/1_jfb7mdpn/version/100001',
+ 'view_count': int,
+ 'timestamp': 1533154489,
+ 'upload_date': '20180801',
+ 'uploader_id': 'djoyner3',
+ }
+ }, {
+ 'info_dict': {
+ 'id': '1_8xflxdp7',
+ 'ext': 'mp4',
+ 'title': 'CS7646_00-00 Introductio_Textbooks',
+ 'duration': 37,
+ 'thumbnail': 'http://cfvod.kaltura.com/p/2019031/sp/201903100/thumbnail/entry_id/1_8xflxdp7/version/100001',
+ 'view_count': int,
+ 'timestamp': 1533154512,
+ 'upload_date': '20180801',
+ 'uploader_id': 'djoyner3',
+ }
+ }, {
+ 'info_dict': {
+ 'id': '1_3hqew8kn',
+ 'ext': 'mp4',
+ 'title': 'CS7646_00-00 Introductio_Prerequisites',
+ 'duration': 49,
+ 'thumbnail': 'http://cfvod.kaltura.com/p/2019031/sp/201903100/thumbnail/entry_id/1_3hqew8kn/version/100001',
+ 'view_count': int,
+ 'timestamp': 1533154536,
+ 'upload_date': '20180801',
+ 'uploader_id': 'djoyner3',
+ }
+ }
+ ]
+ }
+ ]
+
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ # Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site
+ finditer = (
+ list(re.finditer(
+ r"""(?xs)
+ kWidget\.(?:thumb)?[Ee]mbed\(
+ \{.*?
+ (?P<q1>['"])wid(?P=q1)\s*:\s*
+ (?P<q2>['"])_?(?P<partner_id>(?:(?!(?P=q2)).)+)(?P=q2),.*?
+ (?P<q3>['"])entry_?[Ii]d(?P=q3)\s*:\s*
+ (?P<q4>['"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\})
+ """, webpage))
+ or list(re.finditer(
+ r'''(?xs)
+ (?P<q1>["'])
+ (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
+ (?P=q1).*?
+ (?:
+ (?:
+ entry_?[Ii]d|
+ (?P<q2>["'])entry_?[Ii]d(?P=q2)
+ )\s*:\s*|
+ \[\s*(?P<q2_1>["'])entry_?[Ii]d(?P=q2_1)\s*\]\s*=\s*
+ )
+ (?P<q3>["'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3)
+ ''', webpage))
+ or list(re.finditer(
+ r'''(?xs)
+ <(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["'])\s*
+ (?:https?:)?//(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)
+ (?:(?!(?P=q1)).)*
+ [?&;]entry_id=(?P<id>(?:(?!(?P=q1))[^&])+)
+ (?:(?!(?P=q1)).)*
+ (?P=q1)
+ ''', webpage))
+ )
+ urls = []
+ for mobj in finditer:
+ embed_info = mobj.groupdict()
+ for k, v in embed_info.items():
+ if v:
+ embed_info[k] = v.strip()
+ embed_url = 'kaltura:%(partner_id)s:%(id)s' % embed_info
+ escaped_pid = re.escape(embed_info['partner_id'])
+ service_mobj = re.search(
+ r'<script[^>]+src=(["\'])(?P<id>(?:https?:)?//(?:(?!\1).)+)/p/%s/sp/%s00/embedIframeJs' % (escaped_pid, escaped_pid),
+ webpage)
+ if service_mobj:
+ embed_url = smuggle_url(embed_url, {'service_url': service_mobj.group('id')})
+ urls.append(embed_url)
+ return urls
+
+ def _kaltura_api_call(self, video_id, actions, service_url=None, *args, **kwargs):
+ params = actions[0]
+ params.update({i: a for i, a in enumerate(actions[1:], start=1)})
+
+ data = self._download_json(
+ (service_url or self._SERVICE_URL) + self._SERVICE_BASE,
+ video_id, data=json.dumps(params).encode('utf-8'),
+ headers={
+ 'Content-Type': 'application/json',
+ 'Accept-Encoding': 'gzip, deflate, br',
+ }, *args, **kwargs)
+
+ for idx, status in enumerate(data):
+ if not isinstance(status, dict):
+ continue
+ if status.get('objectType') == 'KalturaAPIException':
+ raise ExtractorError(
+ '%s said: %s (%d)' % (self.IE_NAME, status['message'], idx))
+
+ data[1] = traverse_obj(data, (1, 'objects', 0))
+
+ return data
+
+ def _get_video_info(self, video_id, partner_id, service_url=None, player_type='html5'):
+ assert player_type in ('html5', 'kwidget')
+ if player_type == 'kwidget':
+ return self._get_video_info_kwidget(video_id, partner_id, service_url)
+
+ return self._get_video_info_html5(video_id, partner_id, service_url)
+
+ def _get_video_info_html5(self, video_id, partner_id, service_url=None):
+ actions = [
+ {
+ 'apiVersion': '3.3.0',
+ 'clientTag': 'html5:v3.1.0',
+ 'format': 1, # JSON, 2 = XML, 3 = PHP
+ 'ks': '',
+ 'partnerId': partner_id,
+ },
+ {
+ 'expiry': 86400,
+ 'service': 'session',
+ 'action': 'startWidgetSession',
+ 'widgetId': self._build_widget_id(partner_id),
+ },
+ # info
+ {
+ 'action': 'list',
+ 'filter': {'redirectFromEntryId': video_id},
+ 'service': 'baseentry',
+ 'ks': '{1:result:ks}',
+ 'responseProfile': {
+ 'type': 1,
+ 'fields': 'createdAt,dataUrl,duration,name,plays,thumbnailUrl,userId',
+ },
+ },
+ # flavor_assets
+ {
+ 'action': 'getbyentryid',
+ 'entryId': video_id,
+ 'service': 'flavorAsset',
+ 'ks': '{1:result:ks}',
+ },
+ # captions
+ {
+ 'action': 'list',
+ 'filter:entryIdEqual': video_id,
+ 'service': 'caption_captionasset',
+ 'ks': '{1:result:ks}',
+ },
+ ]
+ return self._kaltura_api_call(
+ video_id, actions, service_url, note='Downloading video info JSON (Kaltura html5 player)')
+
+ def _get_video_info_kwidget(self, video_id, partner_id, service_url=None):
+ actions = [
+ {
+ 'service': 'multirequest',
+ 'apiVersion': '3.1',
+ 'expiry': 86400,
+ 'clientTag': 'kwidget:v2.89',
+ 'format': 1, # JSON, 2 = XML, 3 = PHP
+ 'ignoreNull': 1,
+ 'action': 'null',
+ },
+ # header
+ {
+ 'expiry': 86400,
+ 'service': 'session',
+ 'action': 'startWidgetSession',
+ 'widgetId': self._build_widget_id(partner_id),
+ },
+ # (empty)
+ {
+ 'expiry': 86400,
+ 'service': 'session',
+ 'action': 'startwidgetsession',
+ 'widgetId': self._build_widget_id(partner_id),
+ 'format': 9,
+ 'apiVersion': '3.1',
+ 'clientTag': 'kwidget:v2.89',
+ 'ignoreNull': 1,
+ 'ks': '{1:result:ks}'
+ },
+ # info
+ {
+ 'action': 'list',
+ 'filter': {'redirectFromEntryId': video_id},
+ 'service': 'baseentry',
+ 'ks': '{1:result:ks}',
+ 'responseProfile': {
+ 'type': 1,
+ 'fields': 'createdAt,dataUrl,duration,name,plays,thumbnailUrl,userId',
+ },
+ },
+ # flavor_assets
+ {
+ 'action': 'getbyentryid',
+ 'entryId': video_id,
+ 'service': 'flavorAsset',
+ 'ks': '{1:result:ks}',
+ },
+ # captions
+ {
+ 'action': 'list',
+ 'filter:entryIdEqual': video_id,
+ 'service': 'caption_captionasset',
+ 'ks': '{1:result:ks}',
+ },
+ ]
+ # second object (representing the second start widget session) is None
+ header, _, _info, flavor_assets, captions = self._kaltura_api_call(
+ video_id, actions, service_url, note='Downloading video info JSON (Kaltura kwidget player)')
+ info = _info['objects'][0]
+ return header, info, flavor_assets, captions
+
+ def _build_widget_id(self, partner_id):
+ return partner_id if '_' in partner_id else f'_{partner_id}'
+
+ IFRAME_PACKAGE_DATA_REGEX = r'window\.kalturaIframePackageData\s*='
+
+ def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+
+ mobj = self._match_valid_url(url)
+ partner_id, entry_id, player_type = mobj.group('partner_id', 'id', 'player_type')
+ ks, captions = None, None
+ if not player_type:
+ player_type = 'kwidget' if 'html5lib/v2' in url else 'html5'
+ if partner_id and entry_id:
+ _, info, flavor_assets, captions = self._get_video_info(entry_id, partner_id, smuggled_data.get('service_url'), player_type=player_type)
+ else:
+ path, query = mobj.group('path', 'query')
+ if not path and not query:
+ raise ExtractorError('Invalid URL', expected=True)
+ params = {}
+ if query:
+ params = compat_parse_qs(query)
+ if path:
+ splitted_path = path.split('/')
+ params.update(dict((zip(splitted_path[::2], [[v] for v in splitted_path[1::2]]))))
+ if 'wid' in params:
+ partner_id = remove_start(params['wid'][0], '_')
+ elif 'p' in params:
+ partner_id = params['p'][0]
+ elif 'partner_id' in params:
+ partner_id = params['partner_id'][0]
+ else:
+ raise ExtractorError('Invalid URL', expected=True)
+ if 'entry_id' in params:
+ entry_id = params['entry_id'][0]
+ _, info, flavor_assets, captions = self._get_video_info(entry_id, partner_id, player_type=player_type)
+ elif 'uiconf_id' in params and 'flashvars[referenceId]' in params:
+ reference_id = params['flashvars[referenceId]'][0]
+ webpage = self._download_webpage(url, reference_id)
+ entry_data = self._search_json(
+ self.IFRAME_PACKAGE_DATA_REGEX, webpage,
+ 'kalturaIframePackageData', reference_id)['entryResult']
+ info, flavor_assets = entry_data['meta'], entry_data['contextData']['flavorAssets']
+ entry_id = info['id']
+ # Unfortunately, data returned in kalturaIframePackageData lacks
+ # captions so we will try requesting the complete data using
+ # regular approach since we now know the entry_id
+ try:
+ _, info, flavor_assets, captions = self._get_video_info(
+ entry_id, partner_id, player_type=player_type)
+ except ExtractorError:
+ # Regular scenario failed but we already have everything
+ # extracted apart from captions and can process at least
+ # with this
+ pass
+ elif 'uiconf_id' in params and 'flashvars[playlistAPI.kpl0Id]' in params:
+ playlist_id = params['flashvars[playlistAPI.kpl0Id]'][0]
+ webpage = self._download_webpage(url, playlist_id)
+ playlist_data = self._search_json(
+ self.IFRAME_PACKAGE_DATA_REGEX, webpage,
+ 'kalturaIframePackageData', playlist_id)['playlistResult']
+ return self.playlist_from_matches(
+ traverse_obj(playlist_data, (playlist_id, 'items', ..., 'id')),
+ playlist_id, traverse_obj(playlist_data, (playlist_id, 'name')),
+ ie=KalturaIE, getter=lambda x: f'kaltura:{partner_id}:{x}:{player_type}')
+ else:
+ raise ExtractorError('Invalid URL', expected=True)
+ ks = params.get('flashvars[ks]', [None])[0]
+
+ return self._per_video_extract(smuggled_data, entry_id, info, ks, flavor_assets, captions)
+
+ def _per_video_extract(self, smuggled_data, entry_id, info, ks, flavor_assets, captions):
+ source_url = smuggled_data.get('source_url')
+ if source_url:
+ referrer = base64.b64encode(
+ '://'.join(compat_urlparse.urlparse(source_url)[:2])
+ .encode('utf-8')).decode('utf-8')
+ else:
+ referrer = None
+
+ def sign_url(unsigned_url):
+ if ks:
+ unsigned_url += '/ks/%s' % ks
+ if referrer:
+ unsigned_url += '?referrer=%s' % referrer
+ return unsigned_url
+
+ data_url = info['dataUrl']
+ if '/flvclipper/' in data_url:
+ data_url = re.sub(r'/flvclipper/.*', '/serveFlavor', data_url)
+
+ formats = []
+ subtitles = {}
+ for f in flavor_assets:
+ # Continue if asset is not ready
+ if f.get('status') != 2:
+ continue
+ # Original format that's not available (e.g. kaltura:1926081:0_c03e1b5g)
+ # skip for now.
+ if f.get('fileExt') == 'chun':
+ continue
+ # DRM-protected video, cannot be decrypted
+ if not self.get_param('allow_unplayable_formats') and f.get('fileExt') == 'wvm':
+ continue
+ if not f.get('fileExt'):
+ # QT indicates QuickTime; some videos have broken fileExt
+ if f.get('containerFormat') == 'qt':
+ f['fileExt'] = 'mov'
+ else:
+ f['fileExt'] = 'mp4'
+ video_url = sign_url(
+ '%s/flavorId/%s' % (data_url, f['id']))
+ format_id = '%(fileExt)s-%(bitrate)s' % f
+ # Source format may not be available (e.g. kaltura:513551:1_66x4rg7o)
+ if f.get('isOriginal') is True and not self._is_valid_url(
+ video_url, entry_id, format_id):
+ continue
+ # audio-only has no videoCodecId (e.g. kaltura:1926081:0_c03e1b5g
+ # -f mp4-56)
+ vcodec = 'none' if 'videoCodecId' not in f and f.get(
+ 'frameRate') == 0 else f.get('videoCodecId')
+ formats.append({
+ 'format_id': format_id,
+ 'ext': f.get('fileExt'),
+ 'tbr': int_or_none(f['bitrate']),
+ 'fps': int_or_none(f.get('frameRate')),
+ 'filesize_approx': int_or_none(f.get('size'), invscale=1024),
+ 'container': f.get('containerFormat'),
+ 'vcodec': vcodec,
+ 'height': int_or_none(f.get('height')),
+ 'width': int_or_none(f.get('width')),
+ 'url': video_url,
+ })
+ if '/playManifest/' in data_url:
+ m3u8_url = sign_url(data_url.replace(
+ 'format/url', 'format/applehttp'))
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ m3u8_url, entry_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+
+ if captions:
+ for caption in captions.get('objects', []):
+ # Continue if caption is not ready
+ if caption.get('status') != 2:
+ continue
+ if not caption.get('id'):
+ continue
+ caption_format = int_or_none(caption.get('format'))
+ subtitles.setdefault(caption.get('languageCode') or caption.get('language'), []).append({
+ 'url': '%s/api_v3/service/caption_captionasset/action/serve/captionAssetId/%s' % (self._SERVICE_URL, caption['id']),
+ 'ext': caption.get('fileExt') or self._CAPTION_TYPES.get(caption_format) or 'ttml',
+ })
+
+ return {
+ 'id': entry_id,
+ 'title': info['name'],
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'description': clean_html(info.get('description')),
+ 'thumbnail': info.get('thumbnailUrl'),
+ 'duration': info.get('duration'),
+ 'timestamp': info.get('createdAt'),
+ 'uploader_id': format_field(info, 'userId', ignore=('None', None)),
+ 'view_count': int_or_none(info.get('plays')),
+ }
diff --git a/yt_dlp/extractor/kankanews.py b/yt_dlp/extractor/kankanews.py
new file mode 100644
index 0000000..8f247b3
--- /dev/null
+++ b/yt_dlp/extractor/kankanews.py
@@ -0,0 +1,49 @@
+import time
+import random
+import string
+import hashlib
+import urllib.parse
+
+from .common import InfoExtractor
+
+
+class KankaNewsIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:www\.)?kankanews\.com/a/\d+\-\d+\-\d+/(?P<id>\d+)\.shtml'
+ _TESTS = [{
+ 'url': 'https://www.kankanews.com/a/2022-11-08/00310276054.shtml?appid=1088227',
+ 'md5': '05e126513c74b1258d657452a6f4eef9',
+ 'info_dict': {
+ 'id': '4485057',
+ 'url': 'http://mediaplay.kksmg.com/2022/11/08/h264_450k_mp4_1a388ad771e0e4cc28b0da44d245054e_ncm.mp4',
+ 'ext': 'mp4',
+ 'title': '视频|第23个中国记者节,我们在进博切蛋糕',
+ 'thumbnail': r're:^https?://.*\.jpg*',
+ }
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ video_id = self._search_regex(r'omsid\s*=\s*"(\d+)"', webpage, 'video id')
+
+ params = {
+ 'nonce': ''.join(random.choices(string.ascii_lowercase + string.digits, k=8)),
+ 'omsid': video_id,
+ 'platform': 'pc',
+ 'timestamp': int(time.time()),
+ 'version': '1.0',
+ }
+ params['sign'] = hashlib.md5((hashlib.md5((
+ urllib.parse.urlencode(params) + '&28c8edde3d61a0411511d3b1866f0636'
+ ).encode()).hexdigest()).encode()).hexdigest()
+
+ meta = self._download_json('https://api-app.kankanews.com/kankan/pc/getvideo',
+ video_id, query=params)['result']['video']
+
+ return {
+ 'id': video_id,
+ 'url': meta['videourl'],
+ 'title': self._search_regex(r'g\.title\s*=\s*"([^"]+)"', webpage, 'title'),
+ 'thumbnail': meta.get('titlepic'),
+ }
diff --git a/yt_dlp/extractor/karaoketv.py b/yt_dlp/extractor/karaoketv.py
new file mode 100644
index 0000000..381dc00
--- /dev/null
+++ b/yt_dlp/extractor/karaoketv.py
@@ -0,0 +1,61 @@
+from .common import InfoExtractor
+
+
+class KaraoketvIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?karaoketv\.co\.il/[^/]+/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://www.karaoketv.co.il/%D7%A9%D7%99%D7%A8%D7%99_%D7%A7%D7%A8%D7%99%D7%95%D7%A7%D7%99/58356/%D7%90%D7%99%D7%96%D7%95%D7%9F',
+ 'info_dict': {
+ 'id': '58356',
+ 'ext': 'flv',
+ 'title': 'קריוקי של איזון',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+ api_page_url = self._search_regex(
+ r'<iframe[^>]+src=(["\'])(?P<url>https?://www\.karaoke\.co\.il/api_play\.php\?.+?)\1',
+ webpage, 'API play URL', group='url')
+
+ api_page = self._download_webpage(api_page_url, video_id)
+ video_cdn_url = self._search_regex(
+ r'<iframe[^>]+src=(["\'])(?P<url>https?://www\.video-cdn\.com/embed/iframe/.+?)\1',
+ api_page, 'video cdn URL', group='url')
+
+ video_cdn = self._download_webpage(video_cdn_url, video_id)
+ play_path = self._parse_json(
+ self._search_regex(
+ r'var\s+options\s*=\s*({.+?});', video_cdn, 'options'),
+ video_id)['clip']['url']
+
+ settings = self._parse_json(
+ self._search_regex(
+ r'var\s+settings\s*=\s*({.+?});', video_cdn, 'servers', default='{}'),
+ video_id, fatal=False) or {}
+
+ servers = settings.get('servers')
+ if not servers or not isinstance(servers, list):
+ servers = ('wowzail.video-cdn.com:80/vodcdn', )
+
+ formats = [{
+ 'url': 'rtmp://%s' % server if not server.startswith('rtmp') else server,
+ 'play_path': play_path,
+ 'app': 'vodcdn',
+ 'page_url': video_cdn_url,
+ 'player_url': 'http://www.video-cdn.com/assets/flowplayer/flowplayer.commercial-3.2.18.swf',
+ 'rtmp_real_time': True,
+ 'ext': 'flv',
+ } for server in servers]
+
+ return {
+ 'id': video_id,
+ 'title': self._og_search_title(webpage),
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/kelbyone.py b/yt_dlp/extractor/kelbyone.py
new file mode 100644
index 0000000..bba527e
--- /dev/null
+++ b/yt_dlp/extractor/kelbyone.py
@@ -0,0 +1,81 @@
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class KelbyOneIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://members\.kelbyone\.com/course/(?P<id>[^$&?#/]+)'
+
+ _TESTS = [{
+ 'url': 'https://members.kelbyone.com/course/glyn-dewis-mastering-selections/',
+ 'playlist_mincount': 1,
+ 'info_dict': {
+ 'id': 'glyn-dewis-mastering-selections',
+ 'title': 'Trailer - Mastering Selections in Photoshop',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': 'MkiOnLqK',
+ 'ext': 'mp4',
+ 'title': 'Trailer - Mastering Selections in Photoshop',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'thumbnail': 'https://content.jwplatform.com/v2/media/MkiOnLqK/poster.jpg?width=720',
+ 'timestamp': 1601568639,
+ 'duration': 90,
+ 'upload_date': '20201001',
+ },
+ }]
+ }]
+
+ def _entries(self, playlist):
+ for item in playlist:
+ video_id = item['mediaid']
+ thumbnails = [{
+ 'url': image.get('src'),
+ 'width': int_or_none(image.get('width')),
+ } for image in item.get('images') or []]
+ formats, subtitles = [], {}
+ for source in item.get('sources') or []:
+ if not source.get('file'):
+ continue
+ if source.get('type') == 'application/vnd.apple.mpegurl':
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(source['file'], video_id)
+ formats.extend(fmts)
+ subtitles = self._merge_subtitles(subs, subtitles)
+ elif source.get('type') == 'audio/mp4':
+ formats.append({
+ 'format_id': source.get('label'),
+ 'url': source['file'],
+ 'vcodec': 'none',
+ })
+ else:
+ formats.append({
+ 'format_id': source.get('label'),
+ 'height': source.get('height'),
+ 'width': source.get('width'),
+ 'url': source['file'],
+ })
+ for track in item.get('tracks'):
+ if track.get('kind') == 'captions' and track.get('file'):
+ subtitles.setdefault('en', []).append({
+ 'url': track['file'],
+ })
+ yield {
+ 'id': video_id,
+ 'title': item['title'],
+ 'description': item.get('description'),
+ 'thumbnails': thumbnails,
+ 'thumbnail': item.get('image'),
+ 'timestamp': item.get('pubdate'),
+ 'duration': item.get('duration'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+ def _real_extract(self, url):
+ item_id = self._match_id(url)
+ webpage = self._download_webpage(url, item_id)
+ playlist_url = self._html_search_regex(r'playlist"\:"(https.*content\.jwplatform\.com.*json)"', webpage, 'playlist url').replace('\\', '')
+ course_data = self._download_json(playlist_url, item_id)
+ return self.playlist_result(self._entries(course_data['playlist']), item_id,
+ course_data.get('title'), course_data.get('description'))
diff --git a/yt_dlp/extractor/khanacademy.py b/yt_dlp/extractor/khanacademy.py
new file mode 100644
index 0000000..5333036
--- /dev/null
+++ b/yt_dlp/extractor/khanacademy.py
@@ -0,0 +1,110 @@
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+ try_get,
+)
+
+
+class KhanAcademyBaseIE(InfoExtractor):
+ _VALID_URL_TEMPL = r'https?://(?:www\.)?khanacademy\.org/(?P<id>(?:[^/]+/){%s}%s[^?#/&]+)'
+
+ def _parse_video(self, video):
+ return {
+ '_type': 'url_transparent',
+ 'url': video['youtubeId'],
+ 'id': video.get('slug'),
+ 'title': video.get('title'),
+ 'thumbnail': video.get('imageUrl') or video.get('thumbnailUrl'),
+ 'duration': int_or_none(video.get('duration')),
+ 'description': video.get('description'),
+ 'ie_key': 'Youtube',
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ content = self._download_json(
+ 'https://www.khanacademy.org/api/internal/graphql/FetchContentData',
+ display_id, query={
+ 'fastly_cacheable': 'persist_until_publish',
+ 'hash': '4134764944',
+ 'lang': 'en',
+ 'variables': json.dumps({
+ 'path': display_id,
+ 'queryParams': 'lang=en',
+ 'isModal': False,
+ 'followRedirects': True,
+ 'countryCode': 'US',
+ }),
+ })['data']['contentJson']
+ return self._parse_component_props(self._parse_json(content, display_id)['componentProps'])
+
+
+class KhanAcademyIE(KhanAcademyBaseIE):
+ IE_NAME = 'khanacademy'
+ _VALID_URL = KhanAcademyBaseIE._VALID_URL_TEMPL % ('4', 'v/')
+ _TEST = {
+ 'url': 'https://www.khanacademy.org/computing/computer-science/cryptography/crypt/v/one-time-pad',
+ 'md5': '9c84b7b06f9ebb80d22a5c8dedefb9a0',
+ 'info_dict': {
+ 'id': 'FlIG3TvQCBQ',
+ 'ext': 'mp4',
+ 'title': 'The one-time pad',
+ 'description': 'The perfect cipher',
+ 'duration': 176,
+ 'uploader': 'Brit Cruise',
+ 'uploader_id': 'khanacademy',
+ 'upload_date': '20120411',
+ 'timestamp': 1334170113,
+ 'license': 'cc-by-nc-sa',
+ },
+ 'add_ie': ['Youtube'],
+ }
+
+ def _parse_component_props(self, component_props):
+ video = component_props['tutorialPageData']['contentModel']
+ info = self._parse_video(video)
+ author_names = video.get('authorNames')
+ info.update({
+ 'uploader': ', '.join(author_names) if author_names else None,
+ 'timestamp': parse_iso8601(video.get('dateAdded')),
+ 'license': video.get('kaUserLicense'),
+ })
+ return info
+
+
+class KhanAcademyUnitIE(KhanAcademyBaseIE):
+ IE_NAME = 'khanacademy:unit'
+ _VALID_URL = (KhanAcademyBaseIE._VALID_URL_TEMPL % ('2', '')) + '/?(?:[?#&]|$)'
+ _TEST = {
+ 'url': 'https://www.khanacademy.org/computing/computer-science/cryptography',
+ 'info_dict': {
+ 'id': 'cryptography',
+ 'title': 'Cryptography',
+ 'description': 'How have humans protected their secret messages through history? What has changed today?',
+ },
+ 'playlist_mincount': 31,
+ }
+
+ def _parse_component_props(self, component_props):
+ curation = component_props['curation']
+
+ entries = []
+ tutorials = try_get(curation, lambda x: x['tabs'][0]['modules'][0]['tutorials'], list) or []
+ for tutorial_number, tutorial in enumerate(tutorials, 1):
+ chapter_info = {
+ 'chapter': tutorial.get('title'),
+ 'chapter_number': tutorial_number,
+ 'chapter_id': tutorial.get('id'),
+ }
+ for content_item in (tutorial.get('contentItems') or []):
+ if content_item.get('kind') == 'Video':
+ info = self._parse_video(content_item)
+ info.update(chapter_info)
+ entries.append(info)
+
+ return self.playlist_result(
+ entries, curation.get('unit'), curation.get('title'),
+ curation.get('description'))
diff --git a/yt_dlp/extractor/kick.py b/yt_dlp/extractor/kick.py
new file mode 100644
index 0000000..d124372
--- /dev/null
+++ b/yt_dlp/extractor/kick.py
@@ -0,0 +1,126 @@
+from .common import InfoExtractor
+from ..networking import HEADRequest
+from ..utils import (
+ UserNotLive,
+ float_or_none,
+ merge_dicts,
+ str_or_none,
+ traverse_obj,
+ unified_timestamp,
+ url_or_none,
+)
+
+
+class KickBaseIE(InfoExtractor):
+ def _real_initialize(self):
+ self._request_webpage(HEADRequest('https://kick.com/'), None, 'Setting up session', fatal=False)
+ xsrf_token = self._get_cookies('https://kick.com/').get('XSRF-TOKEN')
+ if not xsrf_token:
+ self.write_debug('kick.com did not set XSRF-TOKEN cookie')
+ KickBaseIE._API_HEADERS = {
+ 'Authorization': f'Bearer {xsrf_token.value}',
+ 'X-XSRF-TOKEN': xsrf_token.value,
+ } if xsrf_token else {}
+
+ def _call_api(self, path, display_id, note='Downloading API JSON', headers={}, **kwargs):
+ return self._download_json(
+ f'https://kick.com/api/v1/{path}', display_id, note=note,
+ headers=merge_dicts(headers, self._API_HEADERS), **kwargs)
+
+
+class KickIE(KickBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?kick\.com/(?!(?:video|categories|search|auth)(?:[/?#]|$))(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://kick.com/yuppy',
+ 'info_dict': {
+ 'id': '6cde1-kickrp-joe-flemmingskick-info-heremust-knowmust-see21',
+ 'ext': 'mp4',
+ 'title': str,
+ 'description': str,
+ 'channel': 'yuppy',
+ 'channel_id': '33538',
+ 'uploader': 'Yuppy',
+ 'uploader_id': '33793',
+ 'upload_date': str,
+ 'live_status': 'is_live',
+ 'timestamp': int,
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'categories': list,
+ },
+ 'skip': 'livestream',
+ }, {
+ 'url': 'https://kick.com/kmack710',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ channel = self._match_id(url)
+ response = self._call_api(f'channels/{channel}', channel)
+ if not traverse_obj(response, 'livestream', expected_type=dict):
+ raise UserNotLive(video_id=channel)
+
+ return {
+ 'id': str(traverse_obj(
+ response, ('livestream', ('slug', 'id')), get_all=False, default=channel)),
+ 'formats': self._extract_m3u8_formats(
+ response['playback_url'], channel, 'mp4', live=True),
+ 'title': traverse_obj(
+ response, ('livestream', ('session_title', 'slug')), get_all=False, default=''),
+ 'description': traverse_obj(response, ('user', 'bio')),
+ 'channel': channel,
+ 'channel_id': str_or_none(traverse_obj(response, 'id', ('livestream', 'channel_id'))),
+ 'uploader': traverse_obj(response, 'name', ('user', 'username')),
+ 'uploader_id': str_or_none(traverse_obj(response, 'user_id', ('user', 'id'))),
+ 'is_live': True,
+ 'timestamp': unified_timestamp(traverse_obj(response, ('livestream', 'created_at'))),
+ 'thumbnail': traverse_obj(
+ response, ('livestream', 'thumbnail', 'url'), expected_type=url_or_none),
+ 'categories': traverse_obj(response, ('recent_categories', ..., 'name')),
+ }
+
+
+class KickVODIE(KickBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?kick\.com/video/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
+ _TESTS = [{
+ 'url': 'https://kick.com/video/54244b5e-050a-4df4-a013-b2433dafbe35',
+ 'md5': '73691206a6a49db25c5aa1588e6538fc',
+ 'info_dict': {
+ 'id': '54244b5e-050a-4df4-a013-b2433dafbe35',
+ 'ext': 'mp4',
+ 'title': 'Making 710-carBoosting. Kinda No Pixel inspired. !guilded - !links',
+ 'description': 'md5:a0d3546bf7955d0a8252ffe0fd6f518f',
+ 'channel': 'kmack710',
+ 'channel_id': '16278',
+ 'uploader': 'Kmack710',
+ 'uploader_id': '16412',
+ 'upload_date': '20221206',
+ 'timestamp': 1670318289,
+ 'duration': 40104.0,
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'categories': ['Grand Theft Auto V'],
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ response = self._call_api(f'video/{video_id}', video_id)
+
+ return {
+ 'id': video_id,
+ 'formats': self._extract_m3u8_formats(response['source'], video_id, 'mp4'),
+ 'title': traverse_obj(
+ response, ('livestream', ('session_title', 'slug')), get_all=False, default=''),
+ 'description': traverse_obj(response, ('livestream', 'channel', 'user', 'bio')),
+ 'channel': traverse_obj(response, ('livestream', 'channel', 'slug')),
+ 'channel_id': str_or_none(traverse_obj(response, ('livestream', 'channel', 'id'))),
+ 'uploader': traverse_obj(response, ('livestream', 'channel', 'user', 'username')),
+ 'uploader_id': str_or_none(traverse_obj(response, ('livestream', 'channel', 'user_id'))),
+ 'timestamp': unified_timestamp(response.get('created_at')),
+ 'duration': float_or_none(traverse_obj(response, ('livestream', 'duration')), scale=1000),
+ 'thumbnail': traverse_obj(
+ response, ('livestream', 'thumbnail'), expected_type=url_or_none),
+ 'categories': traverse_obj(response, ('livestream', 'categories', ..., 'name')),
+ }
diff --git a/yt_dlp/extractor/kicker.py b/yt_dlp/extractor/kicker.py
new file mode 100644
index 0000000..a2c7dd4
--- /dev/null
+++ b/yt_dlp/extractor/kicker.py
@@ -0,0 +1,55 @@
+from .common import InfoExtractor
+from .dailymotion import DailymotionIE
+
+
+class KickerIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)kicker\.(?:de)/(?P<id>[\w-]+)/video'
+ _TESTS = [{
+ 'url': 'https://www.kicker.de/pogba-dembel-co-die-top-11-der-abloesefreien-spieler-905049/video',
+ 'info_dict': {
+ 'id': 'km04mrK0DrRAVxy2GcA',
+ 'title': 'md5:b91d145bac5745ac58d5479d8347a875',
+ 'ext': 'mp4',
+ 'duration': 350,
+ 'description': 'md5:a5a3dd77dbb6550dbfb997be100b9998',
+ 'uploader_id': 'x2dfupo',
+ 'timestamp': 1654677626,
+ 'like_count': int,
+ 'uploader': 'kicker.de',
+ 'view_count': int,
+ 'age_limit': 0,
+ 'thumbnail': r're:https://s\d+\.dmcdn\.net/v/T-x741YeYAx8aSZ0Z/x1080',
+ 'tags': ['published', 'category.InternationalSoccer'],
+ 'upload_date': '20220608'
+ }
+ }, {
+ 'url': 'https://www.kicker.de/ex-unioner-in-der-bezirksliga-felix-kroos-vereinschallenge-in-pankow-902825/video',
+ 'info_dict': {
+ 'id': 'k2omNsJKdZ3TxwxYSFJ',
+ 'title': 'md5:72ec24d7f84b8436fe1e89d198152adf',
+ 'ext': 'mp4',
+ 'uploader_id': 'x2dfupo',
+ 'duration': 331,
+ 'timestamp': 1652966015,
+ 'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/TxU4Z1YYCmtisTbMq/x1080',
+ 'tags': ['FELIX KROOS', 'EINFACH MAL LUPPEN', 'KROOS', 'FSV FORTUNA PANKOW', 'published', 'category.Amateurs', 'marketingpreset.Spreekick'],
+ 'age_limit': 0,
+ 'view_count': int,
+ 'upload_date': '20220519',
+ 'uploader': 'kicker.de',
+ 'description': 'md5:0c2060c899a91c8bf40f578f78c5846f',
+ 'like_count': int,
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_slug = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_slug)
+ dailymotion_video_id = self._search_regex(
+ r'data-dmprivateid\s*=\s*[\'"](?P<video_id>\w+)', webpage,
+ 'video id', group='video_id')
+
+ return self.url_result(
+ f'https://www.dailymotion.com/video/{dailymotion_video_id}',
+ ie=DailymotionIE, video_title=self._html_extract_title(webpage))
diff --git a/yt_dlp/extractor/kickstarter.py b/yt_dlp/extractor/kickstarter.py
new file mode 100644
index 0000000..c0d851d
--- /dev/null
+++ b/yt_dlp/extractor/kickstarter.py
@@ -0,0 +1,68 @@
+from .common import InfoExtractor
+from ..utils import smuggle_url
+
+
+class KickStarterIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?kickstarter\.com/projects/(?P<id>[^/]*)/.*'
+ _TESTS = [{
+ 'url': 'https://www.kickstarter.com/projects/1404461844/intersection-the-story-of-josh-grant/description',
+ 'md5': 'c81addca81327ffa66c642b5d8b08cab',
+ 'info_dict': {
+ 'id': '1404461844',
+ 'ext': 'mp4',
+ 'title': 'Intersection: The Story of Josh Grant by Kyle Cowling',
+ 'description': (
+ 'A unique motocross documentary that examines the '
+ 'life and mind of one of sports most elite athletes: Josh Grant.'
+ ),
+ },
+ }, {
+ 'note': 'Embedded video (not using the native kickstarter video service)',
+ 'url': 'https://www.kickstarter.com/projects/597507018/pebble-e-paper-watch-for-iphone-and-android/posts/659178',
+ 'info_dict': {
+ 'id': '78704821',
+ 'ext': 'mp4',
+ 'uploader_id': 'pebble',
+ 'uploader': 'Pebble Technology',
+ 'title': 'Pebble iOS Notifications',
+ },
+ 'add_ie': ['Vimeo'],
+ }, {
+ 'url': 'https://www.kickstarter.com/projects/1420158244/power-drive-2000/widget/video.html',
+ 'info_dict': {
+ 'id': '1420158244',
+ 'ext': 'mp4',
+ 'title': 'Power Drive 2000',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_regex(
+ r'<title>\s*(.*?)(?:\s*&mdash;\s*Kickstarter)?\s*</title>',
+ webpage, 'title')
+ video_url = self._search_regex(
+ r'data-video-url="(.*?)"',
+ webpage, 'video URL', default=None)
+ if video_url is None: # No native kickstarter, look for embedded videos
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': 'Generic',
+ 'url': smuggle_url(url, {'to_generic': True}),
+ 'title': title,
+ }
+
+ thumbnail = self._og_search_thumbnail(webpage, default=None)
+ if thumbnail is None:
+ thumbnail = self._html_search_regex(
+ r'<img[^>]+class="[^"]+\s*poster\s*[^"]+"[^>]+src="([^"]+)"',
+ webpage, 'thumbnail image', fatal=False)
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': self._og_search_description(webpage, default=None),
+ 'thumbnail': thumbnail,
+ }
diff --git a/yt_dlp/extractor/kinja.py b/yt_dlp/extractor/kinja.py
new file mode 100644
index 0000000..f4e5c4c
--- /dev/null
+++ b/yt_dlp/extractor/kinja.py
@@ -0,0 +1,199 @@
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urllib_parse_unquote,
+)
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+ strip_or_none,
+ try_get,
+)
+
+
+class KinjaEmbedIE(InfoExtractor):
+ IE_NAME = 'kinja:embed'
+ _DOMAIN_REGEX = r'''(?:[^.]+\.)?
+ (?:
+ avclub|
+ clickhole|
+ deadspin|
+ gizmodo|
+ jalopnik|
+ jezebel|
+ kinja|
+ kotaku|
+ lifehacker|
+ splinternews|
+ the(?:inventory|onion|root|takeout)
+ )\.com'''
+ _COMMON_REGEX = r'''/
+ (?:
+ ajax/inset|
+ embed/video
+ )/iframe\?.*?\bid='''
+ _VALID_URL = r'''(?x)https?://%s%s
+ (?P<type>
+ fb|
+ imgur|
+ instagram|
+ jwp(?:layer)?-video|
+ kinjavideo|
+ mcp|
+ megaphone|
+ soundcloud(?:-playlist)?|
+ tumblr-post|
+ twitch-stream|
+ twitter|
+ ustream-channel|
+ vimeo|
+ vine|
+ youtube-(?:list|video)
+ )-(?P<id>[^&]+)''' % (_DOMAIN_REGEX, _COMMON_REGEX)
+ _EMBED_REGEX = [rf'(?x)<iframe[^>]+?src=(?P<q>["\'])(?P<url>(?:(?:https?:)?//{_DOMAIN_REGEX})?{_COMMON_REGEX}(?:(?!\1).)+)\1']
+ _TESTS = [{
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=fb-10103303356633621',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=kinjavideo-100313',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=megaphone-PPY1300931075',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=soundcloud-128574047',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=soundcloud-playlist-317413750',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=tumblr-post-160130699814-daydreams-at-midnight',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=twitch-stream-libratus_extra',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=twitter-1068875942473404422',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=ustream-channel-10414700',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=vimeo-120153502',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=vine-5BlvV5qqPrD',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=youtube-list-BCQ3KyrPjgA/PLE6509247C270A72E',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=youtube-video-00QyL0AgPAE',
+ 'only_matching': True,
+ }]
+ _JWPLATFORM_PROVIDER = ('cdn.jwplayer.com/v2/media/', 'JWPlatform')
+ _PROVIDER_MAP = {
+ 'fb': ('facebook.com/video.php?v=', 'Facebook'),
+ 'imgur': ('imgur.com/', 'Imgur'),
+ 'instagram': ('instagram.com/p/', 'Instagram'),
+ 'jwplayer-video': _JWPLATFORM_PROVIDER,
+ 'jwp-video': _JWPLATFORM_PROVIDER,
+ 'megaphone': ('player.megaphone.fm/', 'Generic'),
+ 'soundcloud': ('api.soundcloud.com/tracks/', 'Soundcloud'),
+ 'soundcloud-playlist': ('api.soundcloud.com/playlists/', 'SoundcloudPlaylist'),
+ 'tumblr-post': ('%s.tumblr.com/post/%s', 'Tumblr'),
+ 'twitch-stream': ('twitch.tv/', 'TwitchStream'),
+ 'twitter': ('twitter.com/i/cards/tfw/v1/', 'TwitterCard'),
+ 'ustream-channel': ('ustream.tv/embed/', 'Ustream'),
+ 'vimeo': ('vimeo.com/', 'Vimeo'),
+ 'vine': ('vine.co/v/', 'Vine'),
+ 'youtube-list': ('youtube.com/embed/%s?list=%s', 'YoutubePlaylist'),
+ 'youtube-video': ('youtube.com/embed/', 'Youtube'),
+ }
+
+ def _real_extract(self, url):
+ video_type, video_id = self._match_valid_url(url).groups()
+
+ provider = self._PROVIDER_MAP.get(video_type)
+ if provider:
+ video_id = compat_urllib_parse_unquote(video_id)
+ if video_type == 'tumblr-post':
+ video_id, blog = video_id.split('-', 1)
+ result_url = provider[0] % (blog, video_id)
+ elif video_type == 'youtube-list':
+ video_id, playlist_id = video_id.split('/')
+ result_url = provider[0] % (video_id, playlist_id)
+ else:
+ result_url = provider[0] + video_id
+ return self.url_result('http://' + result_url, provider[1])
+
+ if video_type == 'kinjavideo':
+ data = self._download_json(
+ 'https://kinja.com/api/core/video/views/videoById',
+ video_id, query={'videoId': video_id})['data']
+ title = data['title']
+
+ formats = []
+ for k in ('signedPlaylist', 'streaming'):
+ m3u8_url = data.get(k + 'Url')
+ if m3u8_url:
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+
+ thumbnail = None
+ poster = data.get('poster') or {}
+ poster_id = poster.get('id')
+ if poster_id:
+ thumbnail = 'https://i.kinja-img.com/gawker-media/image/upload/%s.%s' % (poster_id, poster.get('format') or 'jpg')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': strip_or_none(data.get('description')),
+ 'formats': formats,
+ 'tags': data.get('tags'),
+ 'timestamp': int_or_none(try_get(
+ data, lambda x: x['postInfo']['publishTimeMillis']), 1000),
+ 'thumbnail': thumbnail,
+ 'uploader': data.get('network'),
+ }
+ else:
+ video_data = self._download_json(
+ 'https://api.vmh.univision.com/metadata/v1/content/' + video_id,
+ video_id)['videoMetadata']
+ iptc = video_data['photoVideoMetadataIPTC']
+ title = iptc['title']['en']
+ fmg = video_data.get('photoVideoMetadata_fmg') or {}
+ tvss_domain = fmg.get('tvssDomain') or 'https://auth.univision.com'
+ data = self._download_json(
+ tvss_domain + '/api/v3/video-auth/url-signature-tokens',
+ video_id, query={'mcpids': video_id})['data'][0]
+ formats = []
+
+ rendition_url = data.get('renditionUrl')
+ if rendition_url:
+ formats = self._extract_m3u8_formats(
+ rendition_url, video_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False)
+
+ fallback_rendition_url = data.get('fallbackRenditionUrl')
+ if fallback_rendition_url:
+ formats.append({
+ 'format_id': 'fallback',
+ 'tbr': int_or_none(self._search_regex(
+ r'_(\d+)\.mp4', fallback_rendition_url,
+ 'bitrate', default=None)),
+ 'url': fallback_rendition_url,
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': try_get(iptc, lambda x: x['cloudinaryLink']['link'], compat_str),
+ 'uploader': fmg.get('network'),
+ 'duration': int_or_none(iptc.get('fileDuration')),
+ 'formats': formats,
+ 'description': try_get(iptc, lambda x: x['description']['en'], compat_str),
+ 'timestamp': parse_iso8601(iptc.get('dateReleased')),
+ }
diff --git a/yt_dlp/extractor/kinopoisk.py b/yt_dlp/extractor/kinopoisk.py
new file mode 100644
index 0000000..5db9083
--- /dev/null
+++ b/yt_dlp/extractor/kinopoisk.py
@@ -0,0 +1,63 @@
+from .common import InfoExtractor
+from ..utils import (
+ dict_get,
+ int_or_none,
+)
+
+
+class KinoPoiskIE(InfoExtractor):
+ _GEO_COUNTRIES = ['RU']
+ _VALID_URL = r'https?://(?:www\.)?kinopoisk\.ru/film/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.kinopoisk.ru/film/81041/watch/',
+ 'md5': '4f71c80baea10dfa54a837a46111d326',
+ 'info_dict': {
+ 'id': '81041',
+ 'ext': 'mp4',
+ 'title': 'Алеша попович и тугарин змей',
+ 'description': 'md5:43787e673d68b805d0aa1df5a5aea701',
+ 'thumbnail': r're:^https?://.*',
+ 'duration': 4533,
+ 'age_limit': 12,
+ },
+ }, {
+ 'url': 'https://www.kinopoisk.ru/film/81041',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'https://ott-widget.kinopoisk.ru/v1/kp/', video_id,
+ query={'kpId': video_id})
+
+ data = self._parse_json(
+ self._search_regex(
+ r'(?s)<script[^>]+\btype=["\']application/json[^>]+>(.+?)<',
+ webpage, 'data'),
+ video_id)['models']
+
+ film = data['filmStatus']
+ title = film.get('title') or film['originalTitle']
+
+ formats = self._extract_m3u8_formats(
+ data['playlistEntity']['uri'], video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls')
+
+ description = dict_get(
+ film, ('descriptscription', 'description',
+ 'shortDescriptscription', 'shortDescription'))
+ thumbnail = film.get('coverUrl') or film.get('posterUrl')
+ duration = int_or_none(film.get('duration'))
+ age_limit = int_or_none(film.get('restrictionAge'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'age_limit': age_limit,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/kommunetv.py b/yt_dlp/extractor/kommunetv.py
new file mode 100644
index 0000000..432816c
--- /dev/null
+++ b/yt_dlp/extractor/kommunetv.py
@@ -0,0 +1,31 @@
+from .common import InfoExtractor
+from ..utils import update_url
+
+
+class KommunetvIE(InfoExtractor):
+ _VALID_URL = r'https?://\w+\.kommunetv\.no/archive/(?P<id>\w+)'
+ _TEST = {
+ 'url': 'https://oslo.kommunetv.no/archive/921',
+ 'md5': '5f102be308ee759be1e12b63d5da4bbc',
+ 'info_dict': {
+ 'id': '921',
+ 'title': 'Bystyremøte',
+ 'ext': 'mp4'
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ headers = {
+ 'Accept': 'application/json'
+ }
+ data = self._download_json('https://oslo.kommunetv.no/api/streams?streamType=1&id=%s' % video_id, video_id, headers=headers)
+ title = data['stream']['title']
+ file = data['playlist'][0]['playlist'][0]['file']
+ url = update_url(file, query=None, fragment=None)
+ formats = self._extract_m3u8_formats(url, video_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': title
+ }
diff --git a/yt_dlp/extractor/kompas.py b/yt_dlp/extractor/kompas.py
new file mode 100644
index 0000000..8bad961
--- /dev/null
+++ b/yt_dlp/extractor/kompas.py
@@ -0,0 +1,26 @@
+from .jixie import JixieBaseIE
+
+
+class KompasVideoIE(JixieBaseIE):
+ _VALID_URL = r'https?://video\.kompas\.com/\w+/(?P<id>\d+)/(?P<slug>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://video.kompas.com/watch/164474/kim-jong-un-siap-kirim-nuklir-lawan-as-dan-korsel',
+ 'info_dict': {
+ 'id': '164474',
+ 'ext': 'mp4',
+ 'title': 'Kim Jong Un Siap Kirim Nuklir Lawan AS dan Korsel',
+ 'description': 'md5:262530c4fb7462398235f9a5dba92456',
+ 'uploader_id': '9262bf2590d558736cac4fff7978fcb1',
+ 'display_id': 'kim-jong-un-siap-kirim-nuklir-lawan-as-dan-korsel',
+ 'duration': 85.066667,
+ 'categories': ['news'],
+ 'thumbnail': 'https://video.jixie.media/1001/164474/164474_1280x720.jpg',
+ 'tags': 'count:9',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id, display_id = self._match_valid_url(url).group('id', 'slug')
+ webpage = self._download_webpage(url, display_id)
+
+ return self._extract_data_from_jixie_id(display_id, video_id, webpage)
diff --git a/yt_dlp/extractor/koo.py b/yt_dlp/extractor/koo.py
new file mode 100644
index 0000000..c78a7b9
--- /dev/null
+++ b/yt_dlp/extractor/koo.py
@@ -0,0 +1,114 @@
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ try_get,
+)
+
+
+class KooIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:www\.)?kooapp\.com/koo/[^/]+/(?P<id>[^/&#$?]+)'
+ _TESTS = [{ # Test for video in the comments
+ 'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/946c4189-bc2d-4524-b95b-43f641e2adde',
+ 'info_dict': {
+ 'id': '946c4189-bc2d-4524-b95b-43f641e2adde',
+ 'ext': 'mp4',
+ 'title': 'test for video in comment',
+ 'description': 'md5:daa77dc214add4da8b6ea7d2226776e7',
+ 'timestamp': 1632215195,
+ 'uploader_id': 'ytdlpTestAccount',
+ 'uploader': 'yt-dlpTestAccount',
+ 'duration': 7000,
+ 'upload_date': '20210921'
+ },
+ 'params': {'skip_download': True}
+ }, { # Test for koo with long title
+ 'url': 'https://www.kooapp.com/koo/laxman_kumarDBFEC/33decbf7-5e1e-4bb8-bfd7-04744a064361',
+ 'info_dict': {
+ 'id': '33decbf7-5e1e-4bb8-bfd7-04744a064361',
+ 'ext': 'mp4',
+ 'title': 'md5:47a71c2337295330c5a19a8af1bbf450',
+ 'description': 'md5:06a6a84e9321499486dab541693d8425',
+ 'timestamp': 1632106884,
+ 'uploader_id': 'laxman_kumarDBFEC',
+ 'uploader': 'Laxman Kumar 🇮🇳',
+ 'duration': 46000,
+ 'upload_date': '20210920'
+ },
+ 'params': {'skip_download': True}
+ }, { # Test for audio
+ 'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/a2a9c88e-ce4b-4d2d-952f-d06361c5b602',
+ 'info_dict': {
+ 'id': 'a2a9c88e-ce4b-4d2d-952f-d06361c5b602',
+ 'ext': 'mp4',
+ 'title': 'Test for audio',
+ 'description': 'md5:ecb9a2b6a5d34b736cecb53788cb11e8',
+ 'timestamp': 1632211634,
+ 'uploader_id': 'ytdlpTestAccount',
+ 'uploader': 'yt-dlpTestAccount',
+ 'duration': 214000,
+ 'upload_date': '20210921'
+ },
+ 'params': {'skip_download': True}
+ }, { # Test for video
+ 'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/a3e56c53-c1ed-4ac9-ac02-ed1630e6b1d1',
+ 'info_dict': {
+ 'id': 'a3e56c53-c1ed-4ac9-ac02-ed1630e6b1d1',
+ 'ext': 'mp4',
+ 'title': 'Test for video',
+ 'description': 'md5:7afc4eb839074ddeb2beea5dd6fe9500',
+ 'timestamp': 1632211468,
+ 'uploader_id': 'ytdlpTestAccount',
+ 'uploader': 'yt-dlpTestAccount',
+ 'duration': 14000,
+ 'upload_date': '20210921'
+ },
+ 'params': {'skip_download': True}
+ }, { # Test for link
+ 'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/01bf5b94-81a5-4d8e-a387-5f732022e15a',
+ 'skip': 'No video/audio found at the provided url.',
+ 'info_dict': {
+ 'id': '01bf5b94-81a5-4d8e-a387-5f732022e15a',
+ 'title': 'Test for link',
+ 'ext': 'none',
+ },
+ }, { # Test for images
+ 'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/dc05d9cd-a61d-45fd-bb07-e8019d8ca8cb',
+ 'skip': 'No video/audio found at the provided url.',
+ 'info_dict': {
+ 'id': 'dc05d9cd-a61d-45fd-bb07-e8019d8ca8cb',
+ 'title': 'Test for images',
+ 'ext': 'none',
+ },
+ }]
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ data_json = self._download_json(f'https://www.kooapp.com/apiV1/ku/{id}?limit=20&offset=0&showSimilarKoos=true', id)['parentContent']
+ item_json = next(content['items'][0] for content in data_json
+ if try_get(content, lambda x: x['items'][0]['id']) == id)
+ media_json = item_json['mediaMap']
+ formats = []
+
+ mp4_url = media_json.get('videoMp4')
+ video_m3u8_url = media_json.get('videoHls')
+ if mp4_url:
+ formats.append({
+ 'url': mp4_url,
+ 'ext': 'mp4',
+ })
+ if video_m3u8_url:
+ formats.extend(self._extract_m3u8_formats(video_m3u8_url, id, fatal=False, ext='mp4'))
+ if not formats:
+ self.raise_no_formats('No video/audio found at the provided url.', expected=True)
+
+ return {
+ 'id': id,
+ 'title': clean_html(item_json.get('title')),
+ 'description': f'{clean_html(item_json.get("title"))}\n\n{clean_html(item_json.get("enTransliteration"))}',
+ 'timestamp': item_json.get('createdAt'),
+ 'uploader_id': item_json.get('handle'),
+ 'uploader': item_json.get('name'),
+ 'duration': media_json.get('duration'),
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/krasview.py b/yt_dlp/extractor/krasview.py
new file mode 100644
index 0000000..0febf75
--- /dev/null
+++ b/yt_dlp/extractor/krasview.py
@@ -0,0 +1,58 @@
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ js_to_json,
+)
+
+
+class KrasViewIE(InfoExtractor):
+ _WORKING = False
+ IE_DESC = 'Красвью'
+ _VALID_URL = r'https?://krasview\.ru/(?:video|embed)/(?P<id>\d+)'
+
+ _TEST = {
+ 'url': 'http://krasview.ru/video/512228',
+ 'md5': '3b91003cf85fc5db277870c8ebd98eae',
+ 'info_dict': {
+ 'id': '512228',
+ 'ext': 'mp4',
+ 'title': 'Снег, лёд, заносы',
+ 'description': 'Снято в городе Нягань, в Ханты-Мансийском автономном округе.',
+ 'duration': 27,
+ 'thumbnail': r're:^https?://.*\.jpg',
+ },
+ 'params': {
+ 'skip_download': 'Not accessible from Travis CI server',
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ flashvars = json.loads(js_to_json(self._search_regex(
+ r'video_Init\(({.+?})', webpage, 'flashvars')))
+
+ video_url = flashvars['url']
+ title = self._og_search_title(webpage)
+ description = self._og_search_description(webpage, default=None)
+ thumbnail = flashvars.get('image') or self._og_search_thumbnail(webpage)
+ duration = int_or_none(flashvars.get('duration'))
+ width = int_or_none(self._og_search_property(
+ 'video:width', webpage, 'video width', default=None))
+ height = int_or_none(self._og_search_property(
+ 'video:height', webpage, 'video height', default=None))
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'width': width,
+ 'height': height,
+ }
diff --git a/yt_dlp/extractor/kth.py b/yt_dlp/extractor/kth.py
new file mode 100644
index 0000000..e17c6db
--- /dev/null
+++ b/yt_dlp/extractor/kth.py
@@ -0,0 +1,28 @@
+from .common import InfoExtractor
+from ..utils import smuggle_url
+
+
+class KTHIE(InfoExtractor):
+ _VALID_URL = r'https?://play\.kth\.se/(?:[^/]+/)+(?P<id>[a-z0-9_]+)'
+ _TEST = {
+ 'url': 'https://play.kth.se/media/Lunch+breakA+De+nya+aff%C3%A4rerna+inom+Fordonsdalen/0_uoop6oz9',
+ 'md5': 'd83ada6d00ca98b73243a88efe19e8a6',
+ 'info_dict': {
+ 'id': '0_uoop6oz9',
+ 'ext': 'mp4',
+ 'title': 'md5:bd1d6931facb6828762a33e6ce865f37',
+ 'thumbnail': 're:https?://.+/thumbnail/.+',
+ 'duration': 3516,
+ 'timestamp': 1647345358,
+ 'upload_date': '20220315',
+ 'uploader_id': 'md5:0ec23e33a89e795a4512930c8102509f',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ result = self.url_result(
+ smuggle_url('kaltura:308:%s' % video_id, {
+ 'service_url': 'https://api.kaltura.nordu.net'}),
+ 'Kaltura')
+ return result
diff --git a/yt_dlp/extractor/ku6.py b/yt_dlp/extractor/ku6.py
new file mode 100644
index 0000000..31b4ea0
--- /dev/null
+++ b/yt_dlp/extractor/ku6.py
@@ -0,0 +1,30 @@
+from .common import InfoExtractor
+
+
+class Ku6IE(InfoExtractor):
+ _VALID_URL = r'https?://v\.ku6\.com/show/(?P<id>[a-zA-Z0-9\-\_]+)(?:\.)*html'
+ _TEST = {
+ 'url': 'http://v.ku6.com/show/JG-8yS14xzBr4bCn1pu0xw...html',
+ 'md5': '01203549b9efbb45f4b87d55bdea1ed1',
+ 'info_dict': {
+ 'id': 'JG-8yS14xzBr4bCn1pu0xw',
+ 'ext': 'f4v',
+ 'title': 'techniques test',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_regex(
+ r'<h1 title=.*>(.*?)</h1>', webpage, 'title')
+ dataUrl = 'http://v.ku6.com/fetchVideo4Player/%s.html' % video_id
+ jsonData = self._download_json(dataUrl, video_id)
+ downloadUrl = jsonData['data']['f']
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'url': downloadUrl
+ }
diff --git a/yt_dlp/extractor/kukululive.py b/yt_dlp/extractor/kukululive.py
new file mode 100644
index 0000000..86ab5d4
--- /dev/null
+++ b/yt_dlp/extractor/kukululive.py
@@ -0,0 +1,140 @@
+import urllib.parse
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ clean_html,
+ filter_dict,
+ get_element_by_id,
+ int_or_none,
+ join_nonempty,
+ js_to_json,
+ qualities,
+ url_or_none,
+ urljoin,
+)
+from ..utils.traversal import traverse_obj
+
+
+class KukuluLiveIE(InfoExtractor):
+ _VALID_URL = r'https?://live\.erinn\.biz/live\.php\?h(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://live.erinn.biz/live.php?h675134569',
+ 'md5': 'e380fa6a47fc703d91cea913ab44ec2e',
+ 'info_dict': {
+ 'id': '675134569',
+ 'ext': 'mp4',
+ 'title': 'プロセカ',
+ 'description': 'テストも兼ねたプロセカ配信。',
+ 'timestamp': 1702689148,
+ 'upload_date': '20231216',
+ 'thumbnail': r're:^https?://.*',
+ },
+ }, {
+ 'url': 'https://live.erinn.biz/live.php?h102338092',
+ 'md5': 'dcf5167a934b1c60333461e13a81a6e2',
+ 'info_dict': {
+ 'id': '102338092',
+ 'ext': 'mp4',
+ 'title': 'Among Usで遊びます!!',
+ 'description': 'VTuberになりましたねんねこ㌨ですよろしくお願いします',
+ 'timestamp': 1704603118,
+ 'upload_date': '20240107',
+ 'thumbnail': r're:^https?://.*',
+ },
+ }, {
+ 'url': 'https://live.erinn.biz/live.php?h878049531',
+ 'only_matching': True,
+ }]
+
+ def _get_quality_meta(self, video_id, desc, code, force_h264=None):
+ desc += ' (force_h264)' if force_h264 else ''
+ qs = self._download_webpage(
+ 'https://live.erinn.biz/live.player.fplayer.php', video_id,
+ f'Downloading {desc} quality metadata', f'Unable to download {desc} quality metadata',
+ query=filter_dict({
+ 'hash': video_id,
+ 'action': f'get{code}liveByAjax',
+ 'force_h264': force_h264,
+ }))
+ return urllib.parse.parse_qs(qs)
+
+ def _add_quality_formats(self, formats, quality_meta):
+ vcodec = traverse_obj(quality_meta, ('vcodec', 0, {str}))
+ quality = traverse_obj(quality_meta, ('now_quality', 0, {str}))
+ quality_priority = qualities(('low', 'h264', 'high'))(quality)
+ if traverse_obj(quality_meta, ('hlsaddr', 0, {url_or_none})):
+ formats.append({
+ 'format_id': quality,
+ 'url': quality_meta['hlsaddr'][0],
+ 'ext': 'mp4',
+ 'vcodec': vcodec,
+ 'quality': quality_priority,
+ })
+ if traverse_obj(quality_meta, ('hlsaddr_audioonly', 0, {url_or_none})):
+ formats.append({
+ 'format_id': join_nonempty(quality, 'audioonly'),
+ 'url': quality_meta['hlsaddr_audioonly'][0],
+ 'ext': 'm4a',
+ 'vcodec': 'none',
+ 'quality': quality_priority,
+ })
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ html = self._download_webpage(url, video_id)
+
+ if '>タイムシフトが見つかりませんでした。<' in html:
+ raise ExtractorError('This stream has expired', expected=True)
+
+ title = clean_html(
+ get_element_by_id('livetitle', html.replace('<SPAN', '<span').replace('SPAN>', 'span>')))
+ description = self._html_search_meta('Description', html)
+ thumbnail = self._html_search_meta(['og:image', 'twitter:image'], html)
+
+ if self._search_regex(r'(var\s+timeshift\s*=\s*false)', html, 'is livestream', default=False):
+ formats = []
+ for (desc, code) in [('high', 'Z'), ('low', 'ForceLow')]:
+ quality_meta = self._get_quality_meta(video_id, desc, code)
+ self._add_quality_formats(formats, quality_meta)
+ if desc == 'high' and traverse_obj(quality_meta, ('vcodec', 0)) == 'HEVC':
+ self._add_quality_formats(
+ formats, self._get_quality_meta(video_id, desc, code, force_h264='1'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'is_live': True,
+ 'formats': formats,
+ }
+
+ # VOD extraction
+ player_html = self._download_webpage(
+ 'https://live.erinn.biz/live.timeshift.fplayer.php', video_id,
+ 'Downloading player html', 'Unable to download player html', query={'hash': video_id})
+
+ sources = traverse_obj(self._search_json(
+ r'var\s+fplayer_source\s*=', player_html, 'stream data', video_id,
+ contains_pattern=r'\[(?s:.+)\]', transform_source=js_to_json), lambda _, v: v['file'])
+
+ def entries(segments, playlist=True):
+ for i, segment in enumerate(segments, 1):
+ yield {
+ 'id': f'{video_id}_{i}' if playlist else video_id,
+ 'title': f'{title} (Part {i})' if playlist else title,
+ 'description': description,
+ 'timestamp': traverse_obj(segment, ('time_start', {int_or_none})),
+ 'thumbnail': thumbnail,
+ 'formats': [{
+ 'url': urljoin('https://live.erinn.biz', segment['file']),
+ 'ext': 'mp4',
+ 'protocol': 'm3u8_native',
+ }],
+ }
+
+ if len(sources) == 1:
+ return next(entries(sources, playlist=False))
+
+ return self.playlist_result(entries(sources), video_id, title, description, multi_video=True)
diff --git a/yt_dlp/extractor/kuwo.py b/yt_dlp/extractor/kuwo.py
new file mode 100644
index 0000000..3c93ded
--- /dev/null
+++ b/yt_dlp/extractor/kuwo.py
@@ -0,0 +1,352 @@
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+from ..utils import (
+ get_element_by_id,
+ clean_html,
+ ExtractorError,
+ InAdvancePagedList,
+ remove_start,
+)
+
+
+class KuwoBaseIE(InfoExtractor):
+ _FORMATS = [
+ {'format': 'ape', 'ext': 'ape', 'preference': 100},
+ {'format': 'mp3-320', 'ext': 'mp3', 'br': '320kmp3', 'abr': 320, 'preference': 80},
+ {'format': 'mp3-192', 'ext': 'mp3', 'br': '192kmp3', 'abr': 192, 'preference': 70},
+ {'format': 'mp3-128', 'ext': 'mp3', 'br': '128kmp3', 'abr': 128, 'preference': 60},
+ {'format': 'wma', 'ext': 'wma', 'preference': 20},
+ {'format': 'aac', 'ext': 'aac', 'abr': 48, 'preference': 10}
+ ]
+
+ def _get_formats(self, song_id, tolerate_ip_deny=False):
+ formats = []
+ for file_format in self._FORMATS:
+ query = {
+ 'format': file_format['ext'],
+ 'br': file_format.get('br', ''),
+ 'rid': 'MUSIC_%s' % song_id,
+ 'type': 'convert_url',
+ 'response': 'url'
+ }
+
+ song_url = self._download_webpage(
+ 'http://antiserver.kuwo.cn/anti.s',
+ song_id, note='Download %s url info' % file_format['format'],
+ query=query, headers=self.geo_verification_headers(),
+ )
+
+ if song_url == 'IPDeny' and not tolerate_ip_deny:
+ raise ExtractorError('This song is blocked in this region', expected=True)
+
+ if song_url.startswith('http://') or song_url.startswith('https://'):
+ formats.append({
+ 'url': song_url,
+ 'format_id': file_format['format'],
+ 'format': file_format['format'],
+ 'quality': file_format['preference'],
+ 'abr': file_format.get('abr'),
+ })
+
+ return formats
+
+
+class KuwoIE(KuwoBaseIE):
+ _WORKING = False
+ IE_NAME = 'kuwo:song'
+ IE_DESC = '酷我音乐'
+ _VALID_URL = r'https?://(?:www\.)?kuwo\.cn/yinyue/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://www.kuwo.cn/yinyue/635632/',
+ 'info_dict': {
+ 'id': '635632',
+ 'ext': 'ape',
+ 'title': '爱我别走',
+ 'creator': '张震岳',
+ 'upload_date': '20080122',
+ 'description': 'md5:ed13f58e3c3bf3f7fd9fbc4e5a7aa75c'
+ },
+ 'skip': 'this song has been offline because of copyright issues',
+ }, {
+ 'url': 'http://www.kuwo.cn/yinyue/6446136/',
+ 'info_dict': {
+ 'id': '6446136',
+ 'ext': 'mp3',
+ 'title': '心',
+ 'description': 'md5:5d0e947b242c35dc0eb1d2fce9fbf02c',
+ 'creator': 'IU',
+ 'upload_date': '20150518',
+ },
+ 'params': {
+ 'format': 'mp3-320',
+ },
+ }, {
+ 'url': 'http://www.kuwo.cn/yinyue/3197154?catalog=yueku2016',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ song_id = self._match_id(url)
+ webpage, urlh = self._download_webpage_handle(
+ url, song_id, note='Download song detail info',
+ errnote='Unable to get song detail info')
+ if song_id not in urlh.url or '对不起,该歌曲由于版权问题已被下线,将返回网站首页' in webpage:
+ raise ExtractorError('this song has been offline because of copyright issues', expected=True)
+
+ song_name = self._html_search_regex(
+ r'<p[^>]+id="lrcName">([^<]+)</p>', webpage, 'song name')
+ singer_name = remove_start(self._html_search_regex(
+ r'<a[^>]+href="http://www\.kuwo\.cn/artist/content\?name=([^"]+)">',
+ webpage, 'singer name', fatal=False), '歌手')
+ lrc_content = clean_html(get_element_by_id('lrcContent', webpage))
+ if lrc_content == '暂无': # indicates no lyrics
+ lrc_content = None
+
+ formats = self._get_formats(song_id)
+
+ album_id = self._html_search_regex(
+ r'<a[^>]+href="http://www\.kuwo\.cn/album/(\d+)/"',
+ webpage, 'album id', fatal=False)
+
+ publish_time = None
+ if album_id is not None:
+ album_info_page = self._download_webpage(
+ 'http://www.kuwo.cn/album/%s/' % album_id, song_id,
+ note='Download album detail info',
+ errnote='Unable to get album detail info')
+
+ publish_time = self._html_search_regex(
+ r'发行时间:(\d{4}-\d{2}-\d{2})', album_info_page,
+ 'publish time', fatal=False)
+ if publish_time:
+ publish_time = publish_time.replace('-', '')
+
+ return {
+ 'id': song_id,
+ 'title': song_name,
+ 'creator': singer_name,
+ 'upload_date': publish_time,
+ 'description': lrc_content,
+ 'formats': formats,
+ }
+
+
+class KuwoAlbumIE(InfoExtractor):
+ _WORKING = False
+ IE_NAME = 'kuwo:album'
+ IE_DESC = '酷我音乐 - 专辑'
+ _VALID_URL = r'https?://(?:www\.)?kuwo\.cn/album/(?P<id>\d+?)/'
+ _TEST = {
+ 'url': 'http://www.kuwo.cn/album/502294/',
+ 'info_dict': {
+ 'id': '502294',
+ 'title': 'Made\xa0Series\xa0《M》',
+ 'description': 'md5:d463f0d8a0ff3c3ea3d6ed7452a9483f',
+ },
+ 'playlist_count': 2,
+ }
+
+ def _real_extract(self, url):
+ album_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ url, album_id, note='Download album info',
+ errnote='Unable to get album info')
+
+ album_name = self._html_search_regex(
+ r'<div[^>]+class="comm"[^<]+<h1[^>]+title="([^"]+)"', webpage,
+ 'album name')
+ album_intro = remove_start(
+ clean_html(get_element_by_id('intro', webpage)),
+ '%s简介:' % album_name)
+
+ entries = [
+ self.url_result(song_url, 'Kuwo') for song_url in re.findall(
+ r'<p[^>]+class="listen"><a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+/)"',
+ webpage)
+ ]
+ return self.playlist_result(entries, album_id, album_name, album_intro)
+
+
+class KuwoChartIE(InfoExtractor):
+ _WORKING = False
+ IE_NAME = 'kuwo:chart'
+ IE_DESC = '酷我音乐 - 排行榜'
+ _VALID_URL = r'https?://yinyue\.kuwo\.cn/billboard_(?P<id>[^.]+).htm'
+ _TEST = {
+ 'url': 'http://yinyue.kuwo.cn/billboard_香港中文龙虎榜.htm',
+ 'info_dict': {
+ 'id': '香港中文龙虎榜',
+ },
+ 'playlist_mincount': 7,
+ }
+
+ def _real_extract(self, url):
+ chart_id = self._match_id(url)
+ webpage = self._download_webpage(
+ url, chart_id, note='Download chart info',
+ errnote='Unable to get chart info')
+
+ entries = [
+ self.url_result(song_url, 'Kuwo') for song_url in re.findall(
+ r'<a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+)', webpage)
+ ]
+ return self.playlist_result(entries, chart_id)
+
+
+class KuwoSingerIE(InfoExtractor):
+ _WORKING = False
+ IE_NAME = 'kuwo:singer'
+ IE_DESC = '酷我音乐 - 歌手'
+ _VALID_URL = r'https?://(?:www\.)?kuwo\.cn/mingxing/(?P<id>[^/]+)'
+ _TESTS = [{
+ 'url': 'http://www.kuwo.cn/mingxing/bruno+mars/',
+ 'info_dict': {
+ 'id': 'bruno+mars',
+ 'title': 'Bruno\xa0Mars',
+ },
+ 'playlist_mincount': 329,
+ }, {
+ 'url': 'http://www.kuwo.cn/mingxing/Ali/music.htm',
+ 'info_dict': {
+ 'id': 'Ali',
+ 'title': 'Ali',
+ },
+ 'playlist_mincount': 95,
+ 'skip': 'Regularly stalls travis build', # See https://travis-ci.org/ytdl-org/youtube-dl/jobs/78878540
+ }]
+
+ PAGE_SIZE = 15
+
+ def _real_extract(self, url):
+ singer_id = self._match_id(url)
+ webpage = self._download_webpage(
+ url, singer_id, note='Download singer info',
+ errnote='Unable to get singer info')
+
+ singer_name = self._html_search_regex(
+ r'<h1>([^<]+)</h1>', webpage, 'singer name')
+
+ artist_id = self._html_search_regex(
+ r'data-artistid="(\d+)"', webpage, 'artist id')
+
+ page_count = int(self._html_search_regex(
+ r'data-page="(\d+)"', webpage, 'page count'))
+
+ def page_func(page_num):
+ webpage = self._download_webpage(
+ 'http://www.kuwo.cn/artist/contentMusicsAjax',
+ singer_id, note='Download song list page #%d' % (page_num + 1),
+ errnote='Unable to get song list page #%d' % (page_num + 1),
+ query={'artistId': artist_id, 'pn': page_num, 'rn': self.PAGE_SIZE})
+
+ return [
+ self.url_result(compat_urlparse.urljoin(url, song_url), 'Kuwo')
+ for song_url in re.findall(
+ r'<div[^>]+class="name"><a[^>]+href="(/yinyue/\d+)',
+ webpage)
+ ]
+
+ entries = InAdvancePagedList(page_func, page_count, self.PAGE_SIZE)
+
+ return self.playlist_result(entries, singer_id, singer_name)
+
+
+class KuwoCategoryIE(InfoExtractor):
+ _WORKING = False
+ IE_NAME = 'kuwo:category'
+ IE_DESC = '酷我音乐 - 分类'
+ _VALID_URL = r'https?://yinyue\.kuwo\.cn/yy/cinfo_(?P<id>\d+?).htm'
+ _TEST = {
+ 'url': 'http://yinyue.kuwo.cn/yy/cinfo_86375.htm',
+ 'info_dict': {
+ 'id': '86375',
+ 'title': '八十年代精选',
+ 'description': '这些都是属于八十年代的回忆!',
+ },
+ 'playlist_mincount': 24,
+ }
+
+ def _real_extract(self, url):
+ category_id = self._match_id(url)
+ webpage = self._download_webpage(
+ url, category_id, note='Download category info',
+ errnote='Unable to get category info')
+
+ category_name = self._html_search_regex(
+ r'<h1[^>]+title="([^<>]+?)">[^<>]+?</h1>', webpage, 'category name')
+
+ category_desc = remove_start(
+ get_element_by_id('intro', webpage).strip(),
+ '%s简介:' % category_name)
+ if category_desc == '暂无':
+ category_desc = None
+
+ jsonm = self._parse_json(self._html_search_regex(
+ r'var\s+jsonm\s*=\s*([^;]+);', webpage, 'category songs'), category_id)
+
+ entries = [
+ self.url_result('http://www.kuwo.cn/yinyue/%s/' % song['musicrid'], 'Kuwo')
+ for song in jsonm['musiclist']
+ ]
+ return self.playlist_result(entries, category_id, category_name, category_desc)
+
+
+class KuwoMvIE(KuwoBaseIE):
+ _WORKING = False
+ IE_NAME = 'kuwo:mv'
+ IE_DESC = '酷我音乐 - MV'
+ _VALID_URL = r'https?://(?:www\.)?kuwo\.cn/mv/(?P<id>\d+?)/'
+ _TEST = {
+ 'url': 'http://www.kuwo.cn/mv/6480076/',
+ 'info_dict': {
+ 'id': '6480076',
+ 'ext': 'mp4',
+ 'title': 'My HouseMV',
+ 'creator': '2PM',
+ },
+ # In this video, music URLs (anti.s) are blocked outside China and
+ # USA, while the MV URL (mvurl) is available globally, so force the MV
+ # URL for consistent results in different countries
+ 'params': {
+ 'format': 'mv',
+ },
+ }
+ _FORMATS = KuwoBaseIE._FORMATS + [
+ {'format': 'mkv', 'ext': 'mkv', 'preference': 250},
+ {'format': 'mp4', 'ext': 'mp4', 'preference': 200},
+ ]
+
+ def _real_extract(self, url):
+ song_id = self._match_id(url)
+ webpage = self._download_webpage(
+ url, song_id, note='Download mv detail info: %s' % song_id,
+ errnote='Unable to get mv detail info: %s' % song_id)
+
+ mobj = re.search(
+ r'<h1[^>]+title="(?P<song>[^"]+)">[^<]+<span[^>]+title="(?P<singer>[^"]+)"',
+ webpage)
+ if mobj:
+ song_name = mobj.group('song')
+ singer_name = mobj.group('singer')
+ else:
+ raise ExtractorError('Unable to find song or singer names')
+
+ formats = self._get_formats(song_id, tolerate_ip_deny=True)
+
+ mv_url = self._download_webpage(
+ 'http://www.kuwo.cn/yy/st/mvurl?rid=MUSIC_%s' % song_id,
+ song_id, note='Download %s MV URL' % song_id)
+ formats.append({
+ 'url': mv_url,
+ 'format_id': 'mv',
+ })
+
+ return {
+ 'id': song_id,
+ 'title': song_name,
+ 'creator': singer_name,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/la7.py b/yt_dlp/extractor/la7.py
new file mode 100644
index 0000000..f5fd241
--- /dev/null
+++ b/yt_dlp/extractor/la7.py
@@ -0,0 +1,234 @@
+import re
+
+from .common import InfoExtractor
+from ..networking import HEADRequest
+from ..utils import float_or_none, int_or_none, parse_duration, unified_strdate
+
+
+class LA7IE(InfoExtractor):
+ IE_NAME = 'la7.it'
+ _VALID_URL = r'''(?x)https?://(?:
+ (?:www\.)?la7\.it/([^/]+)/(?:rivedila7|video|news)/|
+ tg\.la7\.it/repliche-tgla7\?id=
+ )(?P<id>.+)'''
+
+ _TESTS = [{
+ # single quality video
+ 'url': 'http://www.la7.it/crozza/video/inccool8-02-10-2015-163722',
+ 'md5': '8b613ffc0c4bf9b9e377169fc19c214c',
+ 'info_dict': {
+ 'id': 'inccool8-02-10-2015-163722',
+ 'ext': 'mp4',
+ 'title': 'Inc.Cool8',
+ 'description': 'Benvenuti nell\'incredibile mondo della INC. COOL. 8. dove “INC.” sta per “Incorporated” “COOL” sta per “fashion” ed Eight sta per il gesto atletico',
+ 'thumbnail': 're:^https?://.*',
+ 'upload_date': '20151002',
+ 'formats': 'count:4',
+ },
+ }, {
+ # multiple quality video
+ 'url': 'https://www.la7.it/calcio-femminile/news/il-gol-di-lindsey-thomas-fiorentina-vs-milan-serie-a-calcio-femminile-26-11-2022-461736',
+ 'md5': 'd2370e78f75e8d1238cb3a0db9a2eda3',
+ 'info_dict': {
+ 'id': 'il-gol-di-lindsey-thomas-fiorentina-vs-milan-serie-a-calcio-femminile-26-11-2022-461736',
+ 'ext': 'mp4',
+ 'title': 'Il gol di Lindsey Thomas | Fiorentina vs Milan | Serie A Calcio Femminile',
+ 'description': 'Il gol di Lindsey Thomas | Fiorentina vs Milan | Serie A Calcio Femminile',
+ 'thumbnail': 're:^https?://.*',
+ 'upload_date': '20221126',
+ 'formats': 'count:8',
+ },
+ }, {
+ 'url': 'http://www.la7.it/omnibus/rivedila7/omnibus-news-02-07-2016-189077',
+ 'only_matching': True,
+ }]
+ _HOST = 'https://awsvodpkg.iltrovatore.it'
+
+ def _generate_mp4_url(self, quality, m3u8_formats):
+ for f in m3u8_formats:
+ if f['vcodec'] != 'none' and quality in f['url']:
+ http_url = f'{self._HOST}{quality}.mp4'
+
+ urlh = self._request_webpage(
+ HEADRequest(http_url), quality,
+ note='Check filesize', fatal=False)
+ if urlh:
+ http_f = f.copy()
+ del http_f['manifest_url']
+ http_f.update({
+ 'format_id': http_f['format_id'].replace('hls-', 'https-'),
+ 'url': http_url,
+ 'protocol': 'https',
+ 'filesize_approx': int_or_none(urlh.headers.get('Content-Length', None)),
+ })
+ return http_f
+ return None
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ if re.search(r'(?i)(drmsupport\s*:\s*true)\s*', webpage):
+ self.report_drm(video_id)
+
+ video_path = self._search_regex(
+ r'(/content/[\w/,]+?)\.mp4(?:\.csmil)?/master\.m3u8', webpage, 'video_path')
+
+ formats = self._extract_mpd_formats(
+ f'{self._HOST}/local/dash/,{video_path}.mp4.urlset/manifest.mpd',
+ video_id, mpd_id='dash', fatal=False)
+ m3u8_formats = self._extract_m3u8_formats(
+ f'{self._HOST}/local/hls/,{video_path}.mp4.urlset/master.m3u8',
+ video_id, 'mp4', m3u8_id='hls', fatal=False)
+ formats.extend(m3u8_formats)
+
+ for q in filter(None, video_path.split(',')):
+ http_f = self._generate_mp4_url(q, m3u8_formats)
+ if http_f:
+ formats.append(http_f)
+
+ return {
+ 'id': video_id,
+ 'title': self._og_search_title(webpage, default=None),
+ 'description': self._og_search_description(webpage, default=None),
+ 'thumbnail': self._og_search_thumbnail(webpage, default=None),
+ 'formats': formats,
+ 'upload_date': unified_strdate(self._search_regex(r'datetime="(.+?)"', webpage, 'upload_date', fatal=False))
+ }
+
+
+class LA7PodcastEpisodeIE(InfoExtractor):
+ IE_NAME = 'la7.it:pod:episode'
+ _VALID_URL = r'https?://(?:www\.)?la7\.it/[^/]+/podcast/([^/]+-)?(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'https://www.la7.it/voicetown/podcast/la-carezza-delle-memoria-di-carlo-verdone-23-03-2021-371497',
+ 'md5': '7737d4d79b3c1a34b3de3e16297119ed',
+ 'info_dict': {
+ 'id': '371497',
+ 'ext': 'mp3',
+ 'title': '"La carezza delle memoria" di Carlo Verdone',
+ 'description': 'md5:5abf07c3c551a687db80af3f9ceb7d52',
+ 'thumbnail': 'https://www.la7.it/sites/default/files/podcast/371497.jpg',
+ 'upload_date': '20210323',
+ },
+ }, {
+ # embed url
+ 'url': 'https://www.la7.it/embed/podcast/371497',
+ 'only_matching': True,
+ }, {
+ # date already in the title
+ 'url': 'https://www.la7.it/propagandalive/podcast/lintervista-di-diego-bianchi-ad-annalisa-cuzzocrea-puntata-del-1932021-20-03-2021-371130',
+ 'only_matching': True,
+ }, {
+ # title same as show_title
+ 'url': 'https://www.la7.it/otto-e-mezzo/podcast/otto-e-mezzo-26-03-2021-372340',
+ 'only_matching': True,
+ }]
+
+ def _extract_info(self, webpage, video_id=None, ppn=None):
+ if not video_id:
+ video_id = self._search_regex(
+ r'data-nid=([\'"])(?P<vid>\d+)\1',
+ webpage, 'video_id', group='vid')
+
+ media_url = self._search_regex(
+ (r'src\s*:\s*([\'"])(?P<url>\S+?mp3.+?)\1',
+ r'data-podcast\s*=\s*([\'"])(?P<url>\S+?mp3.+?)\1'),
+ webpage, 'media_url', group='url')
+ formats = [{
+ 'url': media_url,
+ 'format_id': 'http-mp3',
+ 'ext': 'mp3',
+ 'acodec': 'mp3',
+ 'vcodec': 'none',
+ }]
+
+ title = self._html_search_regex(
+ (r'<div class="title">(?P<title>.+?)</',
+ r'<title>(?P<title>[^<]+)</title>',
+ r'title:\s*([\'"])(?P<title>.+?)\1'),
+ webpage, 'title', group='title')
+
+ description = (
+ self._html_search_regex(
+ (r'<div class="description">(.+?)</div>',
+ r'<div class="description-mobile">(.+?)</div>',
+ r'<div class="box-txt">([^<]+?)</div>',
+ r'<div class="field-content"><p>(.+?)</p></div>'),
+ webpage, 'description', default=None)
+ or self._html_search_meta('description', webpage))
+
+ thumb = self._html_search_regex(
+ (r'<div class="podcast-image"><img src="(.+?)"></div>',
+ r'<div class="container-embed"[^<]+url\((.+?)\);">',
+ r'<div class="field-content"><img src="(.+?)"'),
+ webpage, 'thumbnail', fatal=False, default=None)
+
+ duration = parse_duration(self._html_search_regex(
+ r'<span class="(?:durata|duration)">([\d:]+)</span>',
+ webpage, 'duration', fatal=False, default=None))
+
+ date = self._html_search_regex(
+ r'class="data">\s*(?:<span>)?([\d\.]+)\s*</',
+ webpage, 'date', default=None)
+
+ date_alt = self._search_regex(
+ r'(\d+[\./]\d+[\./]\d+)', title, 'date_alt', default=None)
+ ppn = ppn or self._search_regex(
+ r'ppN:\s*([\'"])(?P<ppn>.+?)\1',
+ webpage, 'ppn', group='ppn', default=None)
+ # if the date is not in the title
+ # and title is the same as the show_title
+ # add the date to the title
+ if date and not date_alt and ppn and ppn.lower() == title.lower():
+ title = f'{title} del {date}'
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'duration': float_or_none(duration),
+ 'formats': formats,
+ 'thumbnail': thumb,
+ 'upload_date': unified_strdate(date),
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ return self._extract_info(webpage, video_id)
+
+
+class LA7PodcastIE(LA7PodcastEpisodeIE): # XXX: Do not subclass from concrete IE
+ IE_NAME = 'la7.it:podcast'
+ _VALID_URL = r'https?://(?:www\.)?la7\.it/(?P<id>[^/]+)/podcast/?(?:$|[#?])'
+
+ _TESTS = [{
+ 'url': 'https://www.la7.it/propagandalive/podcast',
+ 'info_dict': {
+ 'id': 'propagandalive',
+ 'title': 'Propaganda Live',
+ },
+ 'playlist_mincount': 10,
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ webpage = self._download_webpage(url, playlist_id)
+
+ title = (
+ self._html_search_regex(
+ r'<h1.*?>(.+?)</h1>', webpage, 'title', fatal=False, default=None)
+ or self._og_search_title(webpage))
+ ppn = self._search_regex(
+ r'window\.ppN\s*=\s*([\'"])(?P<ppn>.+?)\1',
+ webpage, 'ppn', group='ppn', default=None)
+
+ entries = []
+ for episode in re.finditer(
+ r'<div class="container-podcast-property">([\s\S]+?)(?:</div>\s*){3}',
+ webpage):
+ entries.append(self._extract_info(episode.group(1), ppn=ppn))
+
+ return self.playlist_result(entries, playlist_id, title)
diff --git a/yt_dlp/extractor/lastfm.py b/yt_dlp/extractor/lastfm.py
new file mode 100644
index 0000000..6710335
--- /dev/null
+++ b/yt_dlp/extractor/lastfm.py
@@ -0,0 +1,129 @@
+import itertools
+import re
+
+from .common import InfoExtractor
+from ..utils import int_or_none, parse_qs, traverse_obj
+
+
+class LastFMPlaylistBaseIE(InfoExtractor):
+ def _entries(self, url, playlist_id):
+ single_page = traverse_obj(parse_qs(url), ('page', -1, {int_or_none}))
+ for page in itertools.count(single_page or 1):
+ webpage = self._download_webpage(
+ url, playlist_id, f'Downloading page {page}', query={'page': page})
+ videos = re.findall(r'data-youtube-url="([^"]+)"', webpage)
+ yield from videos
+ if single_page or not videos:
+ return
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ return self.playlist_from_matches(self._entries(url, playlist_id), playlist_id, ie='Youtube')
+
+
+class LastFMPlaylistIE(LastFMPlaylistBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?last\.fm/(music|tag)/(?P<id>[^/]+)(?:/[^/]+)?/?(?:[?#]|$)'
+ _TESTS = [{
+ 'url': 'https://www.last.fm/music/Oasis/(What%27s+the+Story)+Morning+Glory%3F',
+ 'info_dict': {
+ 'id': 'Oasis',
+ },
+ 'playlist_mincount': 11,
+ }, {
+ 'url': 'https://www.last.fm/music/Oasis',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.last.fm/music/Oasis/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.last.fm/music/Oasis?top_tracks_date_preset=ALL#top-tracks',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.last.fm/music/Oasis/+tracks',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.last.fm/music/Oasis/+tracks?page=2',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.last.fm/music/Oasis/+tracks?date_preset=LAST_90_DAYS#top-tracks',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.last.fm/tag/rock',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.last.fm/tag/rock/tracks',
+ 'only_matching': True,
+ }]
+
+
+class LastFMUserIE(LastFMPlaylistBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?last\.fm/user/[^/]+/playlists/(?P<id>[^/#?]+)'
+ _TESTS = [{
+ 'url': 'https://www.last.fm/user/mehq/playlists/12319471',
+ 'info_dict': {
+ 'id': '12319471',
+ },
+ 'playlist_count': 30,
+ }, {
+ 'url': 'https://www.last.fm/user/naamloos1/playlists/12543760',
+ 'info_dict': {
+ 'id': '12543760',
+ },
+ 'playlist_mincount': 80,
+ }, {
+ 'url': 'https://www.last.fm/user/naamloos1/playlists/12543760?page=3',
+ 'info_dict': {
+ 'id': '12543760',
+ },
+ 'playlist_count': 32,
+ }]
+
+
+class LastFMIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?last\.fm/music(?:/[^/]+){2}/(?P<id>[^/#?]+)'
+ _TESTS = [{
+ 'url': 'https://www.last.fm/music/Oasis/_/Wonderwall',
+ 'md5': '9c4a70c2e84c03d54fe24229b9e13b7b',
+ 'info_dict': {
+ 'id': '6hzrDeceEKc',
+ 'ext': 'mp4',
+ 'title': 'Oasis - Wonderwall (Official Video)',
+ 'thumbnail': r're:^https?://i.ytimg.com/.*\.jpg$',
+ 'description': 'md5:0848669853c10687cc28e88b5756738f',
+ 'uploader': 'Oasis',
+ 'uploader_id': 'oasisinetofficial',
+ 'upload_date': '20080207',
+ 'album': '(What\'s The Story) Morning Glory? (Remastered)',
+ 'track': 'Wonderwall (Remastered)',
+ 'channel_id': 'UCUDVBtnOQi4c7E8jebpjc9Q',
+ 'view_count': int,
+ 'live_status': 'not_live',
+ 'channel_url': 'https://www.youtube.com/channel/UCUDVBtnOQi4c7E8jebpjc9Q',
+ 'tags': 'count:39',
+ 'creator': 'Oasis',
+ 'uploader_url': 're:^https?://www.youtube.com/user/oasisinetofficial',
+ 'duration': 279,
+ 'alt_title': 'Wonderwall (Remastered)',
+ 'age_limit': 0,
+ 'channel': 'Oasis',
+ 'channel_follower_count': int,
+ 'categories': ['Music'],
+ 'availability': 'public',
+ 'like_count': int,
+ 'playable_in_embed': True,
+ 'artist': 'Oasis',
+ },
+ 'add_ie': ['Youtube'],
+ }, {
+ 'url': 'https://www.last.fm/music/Oasis/_/Don%27t+Look+Back+In+Anger+-+Remastered/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.last.fm/music/Guns+N%27+Roses/_/Sweet+Child+o%27+Mine',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ player_url = self._search_regex(r'(?s)class="header-new-playlink"\s+href="([^"]+)"', webpage, 'player_url')
+ return self.url_result(player_url, 'Youtube')
diff --git a/yt_dlp/extractor/laxarxames.py b/yt_dlp/extractor/laxarxames.py
new file mode 100644
index 0000000..e157f7c
--- /dev/null
+++ b/yt_dlp/extractor/laxarxames.py
@@ -0,0 +1,73 @@
+import json
+
+from .brightcove import BrightcoveNewIE
+from .common import InfoExtractor
+from ..utils import ExtractorError
+from ..utils.traversal import traverse_obj
+
+
+class LaXarxaMesIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?laxarxames\.cat/(?:[^/?#]+/)*?(player|movie-details)/(?P<id>\d+)'
+ _NETRC_MACHINE = 'laxarxames'
+ _TOKEN = None
+ _TESTS = [{
+ 'url': 'https://www.laxarxames.cat/player/3459421',
+ 'md5': '0966f46c34275934c19af78f3df6e2bc',
+ 'info_dict': {
+ 'id': '6339612436112',
+ 'ext': 'mp4',
+ 'title': 'Resum | UA Horta — UD Viladecans',
+ 'timestamp': 1697905186,
+ 'thumbnail': r're:https?://.*\.jpg',
+ 'description': '',
+ 'upload_date': '20231021',
+ 'duration': 129.44,
+ 'tags': ['ott', 'esports', '23-24', ' futbol', ' futbol-partits', 'elit', 'resum'],
+ 'uploader_id': '5779379807001',
+ },
+ 'skip': 'Requires login',
+ }]
+
+ def _perform_login(self, username, password):
+ if self._TOKEN:
+ return
+
+ login = self._download_json(
+ 'https://api.laxarxames.cat/Authorization/SignIn', None, note='Logging in', headers={
+ 'X-Tenantorigin': 'https://laxarxames.cat',
+ 'Content-Type': 'application/json',
+ }, data=json.dumps({
+ 'Username': username,
+ 'Password': password,
+ 'Device': {
+ 'PlatformCode': 'WEB',
+ 'Name': 'Mac OS ()',
+ },
+ }).encode(), expected_status=401)
+
+ self._TOKEN = traverse_obj(login, ('AuthorizationToken', 'Token', {str}))
+ if not self._TOKEN:
+ raise ExtractorError('Login failed', expected=True)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ if not self._TOKEN:
+ self.raise_login_required()
+
+ media_play_info = self._download_json(
+ 'https://api.laxarxames.cat/Media/GetMediaPlayInfo', video_id,
+ data=json.dumps({
+ 'MediaId': int(video_id),
+ 'StreamType': 'MAIN'
+ }).encode(), headers={
+ 'Authorization': f'Bearer {self._TOKEN}',
+ 'X-Tenantorigin': 'https://laxarxames.cat',
+ 'Content-Type': 'application/json',
+ })
+
+ if not traverse_obj(media_play_info, ('ContentUrl', {str})):
+ self.raise_no_formats('No video found', expected=True)
+
+ return self.url_result(
+ f'https://players.brightcove.net/5779379807001/default_default/index.html?videoId={media_play_info["ContentUrl"]}',
+ BrightcoveNewIE, video_id, media_play_info.get('Title'))
diff --git a/yt_dlp/extractor/lbry.py b/yt_dlp/extractor/lbry.py
new file mode 100644
index 0000000..dcb44d0
--- /dev/null
+++ b/yt_dlp/extractor/lbry.py
@@ -0,0 +1,429 @@
+import functools
+import json
+import re
+import urllib.parse
+
+from .common import InfoExtractor
+from ..networking import HEADRequest
+from ..utils import (
+ ExtractorError,
+ OnDemandPagedList,
+ UnsupportedError,
+ determine_ext,
+ int_or_none,
+ mimetype2ext,
+ parse_qs,
+ traverse_obj,
+ try_get,
+ url_or_none,
+ urlhandle_detect_ext,
+ urljoin,
+)
+
+
+class LBRYBaseIE(InfoExtractor):
+ _BASE_URL_REGEX = r'(?x)(?:https?://(?:www\.)?(?:lbry\.tv|odysee\.com)/|lbry://)'
+ _CLAIM_ID_REGEX = r'[0-9a-f]{1,40}'
+ _OPT_CLAIM_ID = '[^$@:/?#&]+(?:[:#]%s)?' % _CLAIM_ID_REGEX
+ _SUPPORTED_STREAM_TYPES = ['video', 'audio']
+ _PAGE_SIZE = 50
+
+ def _call_api_proxy(self, method, display_id, params, resource):
+ headers = {'Content-Type': 'application/json-rpc'}
+ token = try_get(self._get_cookies('https://odysee.com'), lambda x: x['auth_token'].value)
+ if token:
+ headers['x-lbry-auth-token'] = token
+ response = self._download_json(
+ 'https://api.lbry.tv/api/v1/proxy',
+ display_id, 'Downloading %s JSON metadata' % resource,
+ headers=headers,
+ data=json.dumps({
+ 'method': method,
+ 'params': params,
+ }).encode())
+ err = response.get('error')
+ if err:
+ raise ExtractorError(
+ f'{self.IE_NAME} said: {err.get("code")} - {err.get("message")}', expected=True)
+ return response['result']
+
+ def _resolve_url(self, url, display_id, resource):
+ return self._call_api_proxy(
+ 'resolve', display_id, {'urls': url}, resource)[url]
+
+ def _permanent_url(self, url, claim_name, claim_id):
+ return urljoin(
+ url.replace('lbry://', 'https://lbry.tv/'),
+ '/%s:%s' % (claim_name, claim_id))
+
+ def _parse_stream(self, stream, url):
+ stream_type = traverse_obj(stream, ('value', 'stream_type', {str}))
+
+ info = traverse_obj(stream, {
+ 'title': ('value', 'title', {str}),
+ 'thumbnail': ('value', 'thumbnail', 'url', {url_or_none}),
+ 'description': ('value', 'description', {str}),
+ 'license': ('value', 'license', {str}),
+ 'timestamp': ('timestamp', {int_or_none}),
+ 'release_timestamp': ('value', 'release_time', {int_or_none}),
+ 'tags': ('value', 'tags', ..., {lambda x: x or None}),
+ 'duration': ('value', stream_type, 'duration', {int_or_none}),
+ 'channel': ('signing_channel', 'value', 'title', {str}),
+ 'channel_id': ('signing_channel', 'claim_id', {str}),
+ 'uploader_id': ('signing_channel', 'name', {str}),
+ })
+
+ if info.get('uploader_id') and info.get('channel_id'):
+ info['channel_url'] = self._permanent_url(url, info['uploader_id'], info['channel_id'])
+
+ return info
+
+ def _fetch_page(self, display_id, url, params, page):
+ page += 1
+ page_params = {
+ 'no_totals': True,
+ 'page': page,
+ 'page_size': self._PAGE_SIZE,
+ **params,
+ }
+ result = self._call_api_proxy(
+ 'claim_search', display_id, page_params, f'page {page}')
+ for item in traverse_obj(result, ('items', lambda _, v: v['name'] and v['claim_id'])):
+ yield {
+ **self._parse_stream(item, url),
+ '_type': 'url',
+ 'id': item['claim_id'],
+ 'url': self._permanent_url(url, item['name'], item['claim_id']),
+ }
+
+ def _playlist_entries(self, url, display_id, claim_param, metadata):
+ qs = parse_qs(url)
+ content = qs.get('content', [None])[0]
+ params = {
+ 'fee_amount': qs.get('fee_amount', ['>=0'])[0],
+ 'order_by': {
+ 'new': ['release_time'],
+ 'top': ['effective_amount'],
+ 'trending': ['trending_group', 'trending_mixed'],
+ }[qs.get('order', ['new'])[0]],
+ 'claim_type': 'stream',
+ 'stream_types': [content] if content in ['audio', 'video'] else self._SUPPORTED_STREAM_TYPES,
+ **claim_param,
+ }
+ duration = qs.get('duration', [None])[0]
+ if duration:
+ params['duration'] = {
+ 'long': '>=1200',
+ 'short': '<=240',
+ }[duration]
+ language = qs.get('language', ['all'])[0]
+ if language != 'all':
+ languages = [language]
+ if language == 'en':
+ languages.append('none')
+ params['any_languages'] = languages
+
+ entries = OnDemandPagedList(
+ functools.partial(self._fetch_page, display_id, url, params),
+ self._PAGE_SIZE)
+
+ return self.playlist_result(
+ entries, display_id, **traverse_obj(metadata, ('value', {
+ 'title': 'title',
+ 'description': 'description',
+ })))
+
+
+class LBRYIE(LBRYBaseIE):
+ IE_NAME = 'lbry'
+ _VALID_URL = LBRYBaseIE._BASE_URL_REGEX + rf'''
+ (?:\$/(?:download|embed)/)?
+ (?P<id>
+ [^$@:/?#]+/{LBRYBaseIE._CLAIM_ID_REGEX}
+ |(?:@{LBRYBaseIE._OPT_CLAIM_ID}/)?{LBRYBaseIE._OPT_CLAIM_ID}
+ )'''
+ _TESTS = [{
+ # Video
+ 'url': 'https://lbry.tv/@Mantega:1/First-day-LBRY:1',
+ 'md5': '65bd7ec1f6744ada55da8e4c48a2edf9',
+ 'info_dict': {
+ 'id': '17f983b61f53091fb8ea58a9c56804e4ff8cff4d',
+ 'ext': 'mp4',
+ 'title': 'First day in LBRY? Start HERE!',
+ 'description': 'md5:f6cb5c704b332d37f5119313c2c98f51',
+ 'timestamp': 1595694354,
+ 'upload_date': '20200725',
+ 'release_timestamp': 1595340697,
+ 'release_date': '20200721',
+ 'width': 1280,
+ 'height': 720,
+ 'thumbnail': 'https://spee.ch/7/67f2d809c263288c.png',
+ 'license': 'None',
+ 'uploader_id': '@Mantega',
+ 'duration': 346,
+ 'channel': 'LBRY/Odysee rats united!!!',
+ 'channel_id': '1c8ad6a2ab4e889a71146ae4deeb23bb92dab627',
+ 'channel_url': 'https://lbry.tv/@Mantega:1c8ad6a2ab4e889a71146ae4deeb23bb92dab627',
+ 'tags': [
+ 'first day in lbry',
+ 'lbc',
+ 'lbry',
+ 'start',
+ 'tutorial'
+ ],
+ }
+ }, {
+ # Audio
+ 'url': 'https://lbry.tv/@LBRYFoundation:0/Episode-1:e',
+ 'md5': 'c94017d3eba9b49ce085a8fad6b98d00',
+ 'info_dict': {
+ 'id': 'e7d93d772bd87e2b62d5ab993c1c3ced86ebb396',
+ 'ext': 'mp3',
+ 'title': 'The LBRY Foundation Community Podcast Episode 1 - Introduction, Streaming on LBRY, Transcoding',
+ 'description': 'md5:661ac4f1db09f31728931d7b88807a61',
+ 'timestamp': 1591312601,
+ 'upload_date': '20200604',
+ 'release_timestamp': 1591312421,
+ 'release_date': '20200604',
+ 'tags': list,
+ 'duration': 2570,
+ 'channel': 'The LBRY Foundation',
+ 'channel_id': '0ed629d2b9c601300cacf7eabe9da0be79010212',
+ 'channel_url': 'https://lbry.tv/@LBRYFoundation:0ed629d2b9c601300cacf7eabe9da0be79010212',
+ 'vcodec': 'none',
+ 'thumbnail': 'https://spee.ch/d/0bc63b0e6bf1492d.png',
+ 'license': 'None',
+ 'uploader_id': '@LBRYFoundation',
+ }
+ }, {
+ 'url': 'https://odysee.com/@gardeningincanada:b/plants-i-will-never-grow-again.-the:e',
+ 'md5': 'c35fac796f62a14274b4dc2addb5d0ba',
+ 'info_dict': {
+ 'id': 'e51671357333fe22ae88aad320bde2f6f96b1410',
+ 'ext': 'mp4',
+ 'title': 'PLANTS I WILL NEVER GROW AGAIN. THE BLACK LIST PLANTS FOR A CANADIAN GARDEN | Gardening in Canada 🍁',
+ 'description': 'md5:9c539c6a03fb843956de61a4d5288d5e',
+ 'timestamp': 1618254123,
+ 'upload_date': '20210412',
+ 'release_timestamp': 1618254002,
+ 'release_date': '20210412',
+ 'tags': list,
+ 'duration': 554,
+ 'channel': 'Gardening In Canada',
+ 'channel_id': 'b8be0e93b423dad221abe29545fbe8ec36e806bc',
+ 'channel_url': 'https://odysee.com/@gardeningincanada:b8be0e93b423dad221abe29545fbe8ec36e806bc',
+ 'uploader_id': '@gardeningincanada',
+ 'formats': 'mincount:3',
+ 'thumbnail': 'https://thumbnails.lbry.com/AgHSc_HzrrE',
+ 'license': 'Copyrighted (contact publisher)',
+ }
+ }, {
+ # HLS live stream (might expire)
+ 'url': 'https://odysee.com/@RT:fd/livestream_RT:d',
+ 'info_dict': {
+ 'id': 'fdd11cb3ab75f95efb7b3bc2d726aa13ac915b66',
+ 'ext': 'mp4',
+ 'live_status': 'is_live',
+ 'title': 'startswith:RT News | Livestream 24/7',
+ 'description': 'md5:fe68d0056dfe79c1a6b8ce8c34d5f6fa',
+ 'timestamp': int,
+ 'upload_date': str,
+ 'release_timestamp': int,
+ 'release_date': str,
+ 'tags': list,
+ 'channel': 'RT',
+ 'channel_id': 'fdd11cb3ab75f95efb7b3bc2d726aa13ac915b66',
+ 'channel_url': 'https://odysee.com/@RT:fdd11cb3ab75f95efb7b3bc2d726aa13ac915b66',
+ 'formats': 'mincount:1',
+ 'thumbnail': 'startswith:https://thumb',
+ 'license': 'None',
+ 'uploader_id': '@RT',
+ },
+ 'params': {'skip_download': True}
+ }, {
+ # original quality format w/higher resolution than HLS formats
+ 'url': 'https://odysee.com/@wickedtruths:2/Biotechnological-Invasion-of-Skin-(April-2023):4',
+ 'md5': '305b0b3b369bde1b984961f005b67193',
+ 'info_dict': {
+ 'id': '41fbfe805eb73c8d3012c0c49faa0f563274f634',
+ 'ext': 'mp4',
+ 'title': 'Biotechnological Invasion of Skin (April 2023)',
+ 'description': 'md5:fe28689db2cb7ba3436d819ac3ffc378',
+ 'channel': 'Wicked Truths',
+ 'channel_id': '23d2bbf856b0ceed5b1d7c5960bcc72da5a20cb0',
+ 'channel_url': 'https://odysee.com/@wickedtruths:23d2bbf856b0ceed5b1d7c5960bcc72da5a20cb0',
+ 'uploader_id': '@wickedtruths',
+ 'timestamp': 1695114347,
+ 'upload_date': '20230919',
+ 'release_timestamp': 1685617473,
+ 'release_date': '20230601',
+ 'duration': 1063,
+ 'thumbnail': 'https://thumbs.odycdn.com/4e6d39da4df0cfdad45f64e253a15959.webp',
+ 'tags': ['smart skin surveillance', 'biotechnology invasion of skin', 'morgellons'],
+ 'license': 'None',
+ 'protocol': 'https', # test for direct mp4 download
+ },
+ }, {
+ 'url': 'https://odysee.com/@BrodieRobertson:5/apple-is-tracking-everything-you-do-on:e',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://odysee.com/@ScammerRevolts:b0/I-SYSKEY\'D-THE-SAME-SCAMMERS-3-TIMES!:b',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://lbry.tv/Episode-1:e7d93d772bd87e2b62d5ab993c1c3ced86ebb396',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://lbry.tv/$/embed/Episode-1/e7d93d772bd87e2b62d5ab993c1c3ced86ebb396',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://lbry.tv/Episode-1:e7',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://lbry.tv/@LBRYFoundation/Episode-1',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://lbry.tv/$/download/Episode-1/e7d93d772bd87e2b62d5ab993c1c3ced86ebb396',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://lbry.tv/@lacajadepandora:a/TRUMP-EST%C3%81-BIEN-PUESTO-con-Pilar-Baselga,-Carlos-Senra,-Luis-Palacios-(720p_30fps_H264-192kbit_AAC):1',
+ 'only_matching': True,
+ }, {
+ 'url': 'lbry://@lbry#3f/odysee#7',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ if display_id.startswith('@'):
+ display_id = display_id.replace(':', '#')
+ else:
+ display_id = display_id.replace('/', ':')
+ display_id = urllib.parse.unquote(display_id)
+ uri = 'lbry://' + display_id
+ result = self._resolve_url(uri, display_id, 'stream')
+ headers = {'Referer': 'https://odysee.com/'}
+
+ formats = []
+ stream_type = traverse_obj(result, ('value', 'stream_type', {str}))
+
+ if stream_type in self._SUPPORTED_STREAM_TYPES:
+ claim_id, is_live = result['claim_id'], False
+ streaming_url = self._call_api_proxy(
+ 'get', claim_id, {'uri': uri}, 'streaming url')['streaming_url']
+
+ # GET request to v3 API returns original video/audio file if available
+ direct_url = re.sub(r'/api/v\d+/', '/api/v3/', streaming_url)
+ urlh = self._request_webpage(
+ direct_url, display_id, 'Checking for original quality', headers=headers, fatal=False)
+ if urlh and urlhandle_detect_ext(urlh) != 'm3u8':
+ formats.append({
+ 'url': direct_url,
+ 'format_id': 'original',
+ 'quality': 1,
+ **traverse_obj(result, ('value', {
+ 'ext': ('source', (('name', {determine_ext}), ('media_type', {mimetype2ext}))),
+ 'filesize': ('source', 'size', {int_or_none}),
+ 'width': ('video', 'width', {int_or_none}),
+ 'height': ('video', 'height', {int_or_none}),
+ }), get_all=False),
+ 'vcodec': 'none' if stream_type == 'audio' else None,
+ })
+
+ # HEAD request returns redirect response to m3u8 URL if available
+ final_url = self._request_webpage(
+ HEADRequest(streaming_url), display_id, headers=headers,
+ note='Downloading streaming redirect url info').url
+
+ elif result.get('value_type') == 'stream':
+ claim_id, is_live = result['signing_channel']['claim_id'], True
+ live_data = self._download_json(
+ 'https://api.odysee.live/livestream/is_live', claim_id,
+ query={'channel_claim_id': claim_id},
+ note='Downloading livestream JSON metadata')['data']
+ final_url = live_data.get('VideoURL')
+ # Upcoming videos may still give VideoURL
+ if not live_data.get('Live'):
+ final_url = None
+ self.raise_no_formats('This stream is not live', True, claim_id)
+
+ else:
+ raise UnsupportedError(url)
+
+ if determine_ext(final_url) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ final_url, display_id, 'mp4', m3u8_id='hls', live=is_live, headers=headers))
+
+ return {
+ **self._parse_stream(result, url),
+ 'id': claim_id,
+ 'formats': formats,
+ 'is_live': is_live,
+ 'http_headers': headers,
+ }
+
+
+class LBRYChannelIE(LBRYBaseIE):
+ IE_NAME = 'lbry:channel'
+ _VALID_URL = LBRYBaseIE._BASE_URL_REGEX + rf'(?P<id>@{LBRYBaseIE._OPT_CLAIM_ID})/?(?:[?&]|$)'
+ _TESTS = [{
+ 'url': 'https://lbry.tv/@LBRYFoundation:0',
+ 'info_dict': {
+ 'id': '0ed629d2b9c601300cacf7eabe9da0be79010212',
+ 'title': 'The LBRY Foundation',
+ 'description': 'Channel for the LBRY Foundation. Follow for updates and news.',
+ },
+ 'playlist_mincount': 29,
+ }, {
+ 'url': 'https://lbry.tv/@LBRYFoundation',
+ 'only_matching': True,
+ }, {
+ 'url': 'lbry://@lbry#3f',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url).replace(':', '#')
+ result = self._resolve_url(f'lbry://{display_id}', display_id, 'channel')
+ claim_id = result['claim_id']
+
+ return self._playlist_entries(url, claim_id, {'channel_ids': [claim_id]}, result)
+
+
+class LBRYPlaylistIE(LBRYBaseIE):
+ IE_NAME = 'lbry:playlist'
+ _VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'\$/(?:play)?list/(?P<id>[0-9a-f-]+)'
+ _TESTS = [{
+ 'url': 'https://odysee.com/$/playlist/ffef782f27486f0ac138bde8777f72ebdd0548c2',
+ 'info_dict': {
+ 'id': 'ffef782f27486f0ac138bde8777f72ebdd0548c2',
+ 'title': 'Théâtre Classique',
+ 'description': 'Théâtre Classique',
+ },
+ 'playlist_mincount': 4,
+ }, {
+ 'url': 'https://odysee.com/$/list/9c6658b3dd21e4f2a0602d523a13150e2b48b770',
+ 'info_dict': {
+ 'id': '9c6658b3dd21e4f2a0602d523a13150e2b48b770',
+ 'title': 'Social Media Exposed',
+ 'description': 'md5:98af97317aacd5b85d595775ea37d80e',
+ },
+ 'playlist_mincount': 34,
+ }, {
+ 'url': 'https://odysee.com/$/playlist/938fb11d-215f-4d1c-ad64-723954df2184',
+ 'info_dict': {
+ 'id': '938fb11d-215f-4d1c-ad64-723954df2184',
+ },
+ 'playlist_mincount': 1000,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ result = traverse_obj(self._call_api_proxy('claim_search', display_id, {
+ 'claim_ids': [display_id],
+ 'no_totals': True,
+ 'page': 1,
+ 'page_size': self._PAGE_SIZE,
+ }, 'playlist'), ('items', 0))
+ claim_param = {'claim_ids': traverse_obj(result, ('value', 'claims', ..., {str}))}
+
+ return self._playlist_entries(url, display_id, claim_param, result)
diff --git a/yt_dlp/extractor/lci.py b/yt_dlp/extractor/lci.py
new file mode 100644
index 0000000..e7d2f8a
--- /dev/null
+++ b/yt_dlp/extractor/lci.py
@@ -0,0 +1,28 @@
+from .common import InfoExtractor
+
+
+class LCIIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?(?:lci|tf1info)\.fr/[^/]+/[\w-]+-(?P<id>\d+)\.html'
+ _TESTS = [{
+ 'url': 'https://www.tf1info.fr/politique/election-presidentielle-2022-second-tour-j-2-marine-le-pen-et-emmanuel-macron-en-interview-de-lci-vendredi-soir-2217486.html',
+ 'info_dict': {
+ 'id': '13875948',
+ 'ext': 'mp4',
+ 'title': 'md5:660df5481fd418bc3bbb0d070e6fdb5a',
+ 'thumbnail': 'https://photos.tf1.fr/1280/720/presidentielle-2022-marine-le-pen-et-emmanuel-macron-invites-de-lci-ce-vendredi-9c0e73-e1a036-0@1x.jpg',
+ 'upload_date': '20220422',
+ 'duration': 33,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.lci.fr/politique/election-presidentielle-2022-second-tour-j-2-marine-le-pen-et-emmanuel-macron-en-interview-de-lci-vendredi-soir-2217486.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ wat_id = self._search_regex(r'watId["\']?\s*:\s*["\']?(\d+)', webpage, 'wat id')
+ return self.url_result('wat:' + wat_id, 'Wat', wat_id)
diff --git a/yt_dlp/extractor/lcp.py b/yt_dlp/extractor/lcp.py
new file mode 100644
index 0000000..9846319
--- /dev/null
+++ b/yt_dlp/extractor/lcp.py
@@ -0,0 +1,87 @@
+from .common import InfoExtractor
+from .arkena import ArkenaIE
+
+
+class LcpPlayIE(ArkenaIE): # XXX: Do not subclass from concrete IE
+ _VALID_URL = r'https?://play\.lcp\.fr/embed/(?P<id>[^/]+)/(?P<account_id>[^/]+)/[^/]+/[^/]+'
+ _TESTS = [{
+ 'url': 'http://play.lcp.fr/embed/327336/131064/darkmatter/0',
+ 'md5': 'b8bd9298542929c06c1c15788b1f277a',
+ 'info_dict': {
+ 'id': '327336',
+ 'ext': 'mp4',
+ 'title': '327336',
+ 'timestamp': 1456391602,
+ 'upload_date': '20160225',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+
+class LcpIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?lcp\.fr/(?:[^/]+/)*(?P<id>[^/]+)'
+
+ _TESTS = [{
+ # arkena embed
+ 'url': 'http://www.lcp.fr/la-politique-en-video/schwartzenberg-prg-preconise-francois-hollande-de-participer-une-primaire',
+ 'md5': 'b8bd9298542929c06c1c15788b1f277a',
+ 'info_dict': {
+ 'id': 'd56d03e9',
+ 'ext': 'mp4',
+ 'title': 'Schwartzenberg (PRG) préconise à François Hollande de participer à une primaire à gauche',
+ 'description': 'md5:96ad55009548da9dea19f4120c6c16a8',
+ 'timestamp': 1456488895,
+ 'upload_date': '20160226',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # dailymotion live stream
+ 'url': 'http://www.lcp.fr/le-direct',
+ 'info_dict': {
+ 'id': 'xji3qy',
+ 'ext': 'mp4',
+ 'title': 'La Chaine Parlementaire (LCP), Live TNT',
+ 'description': 'md5:5c69593f2de0f38bd9a949f2c95e870b',
+ 'uploader': 'LCP',
+ 'uploader_id': 'xbz33d',
+ 'timestamp': 1308923058,
+ 'upload_date': '20110624',
+ },
+ 'params': {
+ # m3u8 live stream
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.lcp.fr/emissions/277792-les-volontaires',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ play_url = self._search_regex(
+ r'<iframe[^>]+src=(["\'])(?P<url>%s?(?:(?!\1).)*)\1' % LcpPlayIE._VALID_URL,
+ webpage, 'play iframe', default=None, group='url')
+
+ if not play_url:
+ return self.url_result(url, 'Generic')
+
+ title = self._og_search_title(webpage, default=None) or self._html_search_meta(
+ 'twitter:title', webpage, fatal=True)
+ description = self._html_search_meta(
+ ('description', 'twitter:description'), webpage)
+
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': LcpPlayIE.ie_key(),
+ 'url': play_url,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ }
diff --git a/yt_dlp/extractor/lecture2go.py b/yt_dlp/extractor/lecture2go.py
new file mode 100644
index 0000000..10fb5d4
--- /dev/null
+++ b/yt_dlp/extractor/lecture2go.py
@@ -0,0 +1,67 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ determine_protocol,
+ parse_duration,
+ int_or_none,
+)
+
+
+class Lecture2GoIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://lecture2go\.uni-hamburg\.de/veranstaltungen/-/v/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://lecture2go.uni-hamburg.de/veranstaltungen/-/v/17473',
+ 'md5': 'ac02b570883020d208d405d5a3fd2f7f',
+ 'info_dict': {
+ 'id': '17473',
+ 'ext': 'mp4',
+ 'title': '2 - Endliche Automaten und reguläre Sprachen',
+ 'creator': 'Frank Heitmann',
+ 'duration': 5220,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_regex(r'<em[^>]+class="title">(.+)</em>', webpage, 'title')
+
+ formats = []
+ for url in set(re.findall(r'var\s+playerUri\d+\s*=\s*"([^"]+)"', webpage)):
+ ext = determine_ext(url)
+ protocol = determine_protocol({'url': url})
+ if ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(url, video_id, f4m_id='hds'))
+ elif ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(url, video_id, ext='mp4', m3u8_id='hls'))
+ else:
+ if protocol == 'rtmp':
+ continue # XXX: currently broken
+ formats.append({
+ 'format_id': protocol,
+ 'url': url,
+ })
+
+ creator = self._html_search_regex(
+ r'<div[^>]+id="description">([^<]+)</div>', webpage, 'creator', fatal=False)
+ duration = parse_duration(self._html_search_regex(
+ r'Duration:\s*</em>\s*<em[^>]*>([^<]+)</em>', webpage, 'duration', fatal=False))
+ view_count = int_or_none(self._html_search_regex(
+ r'Views:\s*</em>\s*<em[^>]+>(\d+)</em>', webpage, 'view count', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'creator': creator,
+ 'duration': duration,
+ 'view_count': view_count,
+ }
diff --git a/yt_dlp/extractor/lecturio.py b/yt_dlp/extractor/lecturio.py
new file mode 100644
index 0000000..629d208
--- /dev/null
+++ b/yt_dlp/extractor/lecturio.py
@@ -0,0 +1,235 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ determine_ext,
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ str_or_none,
+ url_or_none,
+ urlencode_postdata,
+ urljoin,
+)
+
+
+class LecturioBaseIE(InfoExtractor):
+ _API_BASE_URL = 'https://app.lecturio.com/api/en/latest/html5/'
+ _LOGIN_URL = 'https://app.lecturio.com/en/login'
+ _NETRC_MACHINE = 'lecturio'
+
+ def _perform_login(self, username, password):
+ # Sets some cookies
+ _, urlh = self._download_webpage_handle(
+ self._LOGIN_URL, None, 'Downloading login popup')
+
+ def is_logged(url_handle):
+ return self._LOGIN_URL not in url_handle.url
+
+ # Already logged in
+ if is_logged(urlh):
+ return
+
+ login_form = {
+ 'signin[email]': username,
+ 'signin[password]': password,
+ 'signin[remember]': 'on',
+ }
+
+ response, urlh = self._download_webpage_handle(
+ self._LOGIN_URL, None, 'Logging in',
+ data=urlencode_postdata(login_form))
+
+ # Logged in successfully
+ if is_logged(urlh):
+ return
+
+ errors = self._html_search_regex(
+ r'(?s)<ul[^>]+class=["\']error_list[^>]+>(.+?)</ul>', response,
+ 'errors', default=None)
+ if errors:
+ raise ExtractorError('Unable to login: %s' % errors, expected=True)
+ raise ExtractorError('Unable to log in')
+
+
+class LecturioIE(LecturioBaseIE):
+ _VALID_URL = r'''(?x)
+ https://
+ (?:
+ app\.lecturio\.com/([^/?#]+/(?P<nt>[^/?#&]+)\.lecture|(?:\#/)?lecture/c/\d+/(?P<id>\d+))|
+ (?:www\.)?lecturio\.de/(?:[^/?#]+/)+(?P<nt_de>[^/?#&]+)\.vortrag
+ )
+ '''
+ _TESTS = [{
+ 'url': 'https://app.lecturio.com/medical-courses/important-concepts-and-terms-introduction-to-microbiology.lecture#tab/videos',
+ 'md5': '9a42cf1d8282a6311bf7211bbde26fde',
+ 'info_dict': {
+ 'id': '39634',
+ 'ext': 'mp4',
+ 'title': 'Important Concepts and Terms — Introduction to Microbiology',
+ },
+ 'skip': 'Requires lecturio account credentials',
+ }, {
+ 'url': 'https://www.lecturio.de/jura/oeffentliches-recht-staatsexamen.vortrag',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.lecturio.de/jura/oeffentliches-recht-at-1-staatsexamen/oeffentliches-recht-staatsexamen.vortrag',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://app.lecturio.com/#/lecture/c/6434/39634',
+ 'only_matching': True,
+ }]
+
+ _CC_LANGS = {
+ 'Arabic': 'ar',
+ 'Bulgarian': 'bg',
+ 'German': 'de',
+ 'English': 'en',
+ 'Spanish': 'es',
+ 'Persian': 'fa',
+ 'French': 'fr',
+ 'Japanese': 'ja',
+ 'Polish': 'pl',
+ 'Pashto': 'ps',
+ 'Russian': 'ru',
+ }
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ nt = mobj.group('nt') or mobj.group('nt_de')
+ lecture_id = mobj.group('id')
+ display_id = nt or lecture_id
+ api_path = 'lectures/' + lecture_id if lecture_id else 'lecture/' + nt + '.json'
+ video = self._download_json(
+ self._API_BASE_URL + api_path, display_id)
+ title = video['title'].strip()
+ if not lecture_id:
+ pid = video.get('productId') or video.get('uid')
+ if pid:
+ spid = pid.split('_')
+ if spid and len(spid) == 2:
+ lecture_id = spid[1]
+
+ formats = []
+ for format_ in video['content']['media']:
+ if not isinstance(format_, dict):
+ continue
+ file_ = format_.get('file')
+ if not file_:
+ continue
+ ext = determine_ext(file_)
+ if ext == 'smil':
+ # smil contains only broken RTMP formats anyway
+ continue
+ file_url = url_or_none(file_)
+ if not file_url:
+ continue
+ label = str_or_none(format_.get('label'))
+ filesize = int_or_none(format_.get('fileSize'))
+ f = {
+ 'url': file_url,
+ 'format_id': label,
+ 'filesize': float_or_none(filesize, invscale=1000)
+ }
+ if label:
+ mobj = re.match(r'(\d+)p\s*\(([^)]+)\)', label)
+ if mobj:
+ f.update({
+ 'format_id': mobj.group(2),
+ 'height': int(mobj.group(1)),
+ })
+ formats.append(f)
+
+ subtitles = {}
+ automatic_captions = {}
+ captions = video.get('captions') or []
+ for cc in captions:
+ cc_url = cc.get('url')
+ if not cc_url:
+ continue
+ cc_label = cc.get('translatedCode')
+ lang = cc.get('languageCode') or self._search_regex(
+ r'/([a-z]{2})_', cc_url, 'lang',
+ default=cc_label.split()[0] if cc_label else 'en')
+ original_lang = self._search_regex(
+ r'/[a-z]{2}_([a-z]{2})_', cc_url, 'original lang',
+ default=None)
+ sub_dict = (automatic_captions
+ if 'auto-translated' in cc_label or original_lang
+ else subtitles)
+ sub_dict.setdefault(self._CC_LANGS.get(lang, lang), []).append({
+ 'url': cc_url,
+ })
+
+ return {
+ 'id': lecture_id or nt,
+ 'title': title,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'automatic_captions': automatic_captions,
+ }
+
+
+class LecturioCourseIE(LecturioBaseIE):
+ _VALID_URL = r'https?://app\.lecturio\.com/(?:[^/]+/(?P<nt>[^/?#&]+)\.course|(?:#/)?course/c/(?P<id>\d+))'
+ _TESTS = [{
+ 'url': 'https://app.lecturio.com/medical-courses/microbiology-introduction.course#/',
+ 'info_dict': {
+ 'id': 'microbiology-introduction',
+ 'title': 'Microbiology: Introduction',
+ 'description': 'md5:13da8500c25880c6016ae1e6d78c386a',
+ },
+ 'playlist_count': 45,
+ 'skip': 'Requires lecturio account credentials',
+ }, {
+ 'url': 'https://app.lecturio.com/#/course/c/6434',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ nt, course_id = self._match_valid_url(url).groups()
+ display_id = nt or course_id
+ api_path = 'courses/' + course_id if course_id else 'course/content/' + nt + '.json'
+ course = self._download_json(
+ self._API_BASE_URL + api_path, display_id)
+ entries = []
+ for lecture in course.get('lectures', []):
+ lecture_id = str_or_none(lecture.get('id'))
+ lecture_url = lecture.get('url')
+ if lecture_url:
+ lecture_url = urljoin(url, lecture_url)
+ else:
+ lecture_url = 'https://app.lecturio.com/#/lecture/c/%s/%s' % (course_id, lecture_id)
+ entries.append(self.url_result(
+ lecture_url, ie=LecturioIE.ie_key(), video_id=lecture_id))
+ return self.playlist_result(
+ entries, display_id, course.get('title'),
+ clean_html(course.get('description')))
+
+
+class LecturioDeCourseIE(LecturioBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?lecturio\.de/[^/]+/(?P<id>[^/?#&]+)\.kurs'
+ _TEST = {
+ 'url': 'https://www.lecturio.de/jura/grundrechte.kurs',
+ 'only_matching': True,
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ entries = []
+ for mobj in re.finditer(
+ r'(?s)<td[^>]+\bdata-lecture-id=["\'](?P<id>\d+).+?\bhref=(["\'])(?P<url>(?:(?!\2).)+\.vortrag)\b[^>]+>',
+ webpage):
+ lecture_url = urljoin(url, mobj.group('url'))
+ lecture_id = mobj.group('id')
+ entries.append(self.url_result(
+ lecture_url, ie=LecturioIE.ie_key(), video_id=lecture_id))
+
+ title = self._search_regex(
+ r'<h1[^>]*>([^<]+)', webpage, 'title', default=None)
+
+ return self.playlist_result(entries, display_id, title)
diff --git a/yt_dlp/extractor/leeco.py b/yt_dlp/extractor/leeco.py
new file mode 100644
index 0000000..85033b8
--- /dev/null
+++ b/yt_dlp/extractor/leeco.py
@@ -0,0 +1,364 @@
+import datetime
+import hashlib
+import re
+import time
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_b64decode,
+ compat_ord,
+ compat_str,
+ compat_urllib_parse_urlencode,
+)
+from ..utils import (
+ determine_ext,
+ encode_data_uri,
+ ExtractorError,
+ int_or_none,
+ orderedSet,
+ parse_iso8601,
+ str_or_none,
+ url_basename,
+ urshift,
+)
+
+
+class LeIE(InfoExtractor):
+ IE_DESC = '乐视网'
+ _VALID_URL = r'https?://(?:www\.le\.com/ptv/vplay|(?:sports\.le|(?:www\.)?lesports)\.com/(?:match|video))/(?P<id>\d+)\.html'
+ _GEO_COUNTRIES = ['CN']
+ _URL_TEMPLATE = 'http://www.le.com/ptv/vplay/%s.html'
+
+ _TESTS = [{
+ 'url': 'http://www.le.com/ptv/vplay/22005890.html',
+ 'md5': 'edadcfe5406976f42f9f266057ee5e40',
+ 'info_dict': {
+ 'id': '22005890',
+ 'ext': 'mp4',
+ 'title': '第87届奥斯卡颁奖礼完美落幕 《鸟人》成最大赢家',
+ 'description': 'md5:a9cb175fd753e2962176b7beca21a47c',
+ },
+ 'params': {
+ 'hls_prefer_native': True,
+ },
+ }, {
+ 'url': 'http://www.le.com/ptv/vplay/1415246.html',
+ 'info_dict': {
+ 'id': '1415246',
+ 'ext': 'mp4',
+ 'title': '美人天下01',
+ 'description': 'md5:28942e650e82ed4fcc8e4de919ee854d',
+ },
+ 'params': {
+ 'hls_prefer_native': True,
+ },
+ }, {
+ 'note': 'This video is available only in Mainland China, thus a proxy is needed',
+ 'url': 'http://www.le.com/ptv/vplay/1118082.html',
+ 'md5': '2424c74948a62e5f31988438979c5ad1',
+ 'info_dict': {
+ 'id': '1118082',
+ 'ext': 'mp4',
+ 'title': '与龙共舞 完整版',
+ 'description': 'md5:7506a5eeb1722bb9d4068f85024e3986',
+ },
+ 'params': {
+ 'hls_prefer_native': True,
+ },
+ }, {
+ 'url': 'http://sports.le.com/video/25737697.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.lesports.com/match/1023203003.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://sports.le.com/match/1023203003.html',
+ 'only_matching': True,
+ }]
+
+ # ror() and calc_time_key() are reversed from a embedded swf file in LetvPlayer.swf
+ def ror(self, param1, param2):
+ _loc3_ = 0
+ while _loc3_ < param2:
+ param1 = urshift(param1, 1) + ((param1 & 1) << 31)
+ _loc3_ += 1
+ return param1
+
+ def calc_time_key(self, param1):
+ _loc2_ = 185025305
+ return self.ror(param1, _loc2_ % 17) ^ _loc2_
+
+ # see M3U8Encryption class in KLetvPlayer.swf
+ @staticmethod
+ def decrypt_m3u8(encrypted_data):
+ if encrypted_data[:5].decode('utf-8').lower() != 'vc_01':
+ return encrypted_data
+ encrypted_data = encrypted_data[5:]
+
+ _loc4_ = bytearray(2 * len(encrypted_data))
+ for idx, val in enumerate(encrypted_data):
+ b = compat_ord(val)
+ _loc4_[2 * idx] = b // 16
+ _loc4_[2 * idx + 1] = b % 16
+ idx = len(_loc4_) - 11
+ _loc4_ = _loc4_[idx:] + _loc4_[:idx]
+ _loc7_ = bytearray(len(encrypted_data))
+ for i in range(len(encrypted_data)):
+ _loc7_[i] = _loc4_[2 * i] * 16 + _loc4_[2 * i + 1]
+
+ return bytes(_loc7_)
+
+ def _check_errors(self, play_json):
+ # Check for errors
+ playstatus = play_json['msgs']['playstatus']
+ if playstatus['status'] == 0:
+ flag = playstatus['flag']
+ if flag == 1:
+ self.raise_geo_restricted()
+ else:
+ raise ExtractorError('Generic error. flag = %d' % flag, expected=True)
+
+ def _real_extract(self, url):
+ media_id = self._match_id(url)
+ page = self._download_webpage(url, media_id)
+
+ play_json_flash = self._download_json(
+ 'http://player-pc.le.com/mms/out/video/playJson',
+ media_id, 'Downloading flash playJson data', query={
+ 'id': media_id,
+ 'platid': 1,
+ 'splatid': 105,
+ 'format': 1,
+ 'source': 1000,
+ 'tkey': self.calc_time_key(int(time.time())),
+ 'domain': 'www.le.com',
+ 'region': 'cn',
+ },
+ headers=self.geo_verification_headers())
+ self._check_errors(play_json_flash)
+
+ def get_flash_urls(media_url, format_id):
+ nodes_data = self._download_json(
+ media_url, media_id,
+ 'Download JSON metadata for format %s' % format_id,
+ query={
+ 'm3v': 1,
+ 'format': 1,
+ 'expect': 3,
+ 'tss': 'ios',
+ })
+
+ req = self._request_webpage(
+ nodes_data['nodelist'][0]['location'], media_id,
+ note='Downloading m3u8 information for format %s' % format_id)
+
+ m3u8_data = self.decrypt_m3u8(req.read())
+
+ return {
+ 'hls': encode_data_uri(m3u8_data, 'application/vnd.apple.mpegurl'),
+ }
+
+ extracted_formats = []
+ formats = []
+ playurl = play_json_flash['msgs']['playurl']
+ play_domain = playurl['domain'][0]
+
+ for format_id, format_data in playurl.get('dispatch', []).items():
+ if format_id in extracted_formats:
+ continue
+ extracted_formats.append(format_id)
+
+ media_url = play_domain + format_data[0]
+ for protocol, format_url in get_flash_urls(media_url, format_id).items():
+ f = {
+ 'url': format_url,
+ 'ext': determine_ext(format_data[1]),
+ 'format_id': '%s-%s' % (protocol, format_id),
+ 'protocol': 'm3u8_native' if protocol == 'hls' else 'http',
+ 'quality': int_or_none(format_id),
+ }
+
+ if format_id[-1:] == 'p':
+ f['height'] = int_or_none(format_id[:-1])
+
+ formats.append(f)
+
+ publish_time = parse_iso8601(self._html_search_regex(
+ r'发布时间&nbsp;([^<>]+) ', page, 'publish time', default=None),
+ delimiter=' ', timezone=datetime.timedelta(hours=8))
+ description = self._html_search_meta('description', page, fatal=False)
+
+ return {
+ 'id': media_id,
+ 'formats': formats,
+ 'title': playurl['title'],
+ 'thumbnail': playurl['pic'],
+ 'description': description,
+ 'timestamp': publish_time,
+ '_format_sort_fields': ('res', 'quality'),
+ }
+
+
+class LePlaylistIE(InfoExtractor):
+ _VALID_URL = r'https?://[a-z]+\.le\.com/(?!video)[a-z]+/(?P<id>[a-z0-9_]+)'
+
+ _TESTS = [{
+ 'url': 'http://www.le.com/tv/46177.html',
+ 'info_dict': {
+ 'id': '46177',
+ 'title': '美人天下',
+ 'description': 'md5:395666ff41b44080396e59570dbac01c'
+ },
+ 'playlist_count': 35
+ }, {
+ 'url': 'http://tv.le.com/izt/wuzetian/index.html',
+ 'info_dict': {
+ 'id': 'wuzetian',
+ 'title': '武媚娘传奇',
+ 'description': 'md5:e12499475ab3d50219e5bba00b3cb248'
+ },
+ # This playlist contains some extra videos other than the drama itself
+ 'playlist_mincount': 96
+ }, {
+ 'url': 'http://tv.le.com/pzt/lswjzzjc/index.shtml',
+ # This series is moved to http://www.le.com/tv/10005297.html
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.le.com/comic/92063.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://list.le.com/listn/c1009_sc532002_d2_p1_o1.html',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if LeIE.suitable(url) else super(LePlaylistIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ page = self._download_webpage(url, playlist_id)
+
+ # Currently old domain names are still used in playlists
+ media_ids = orderedSet(re.findall(
+ r'<a[^>]+href="http://www\.letv\.com/ptv/vplay/(\d+)\.html', page))
+ entries = [self.url_result(LeIE._URL_TEMPLATE % media_id, ie='Le')
+ for media_id in media_ids]
+
+ title = self._html_search_meta('keywords', page,
+ fatal=False).split(',')[0]
+ description = self._html_search_meta('description', page, fatal=False)
+
+ return self.playlist_result(entries, playlist_id, playlist_title=title,
+ playlist_description=description)
+
+
+class LetvCloudIE(InfoExtractor):
+ # Most of *.letv.com is changed to *.le.com on 2016/01/02
+ # but yuntv.letv.com is kept, so also keep the extractor name
+ IE_DESC = '乐视云'
+ _VALID_URL = r'https?://yuntv\.letv\.com/bcloud.html\?.+'
+
+ _TESTS = [{
+ 'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=467623dedf',
+ 'md5': '26450599afd64c513bc77030ad15db44',
+ 'info_dict': {
+ 'id': 'p7jnfw5hw9_467623dedf',
+ 'ext': 'mp4',
+ 'title': 'Video p7jnfw5hw9_467623dedf',
+ },
+ }, {
+ 'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=ec93197892&pu=2c7cd40209&auto_play=1&gpcflag=1&width=640&height=360',
+ 'md5': 'e03d9cc8d9c13191e1caf277e42dbd31',
+ 'info_dict': {
+ 'id': 'p7jnfw5hw9_ec93197892',
+ 'ext': 'mp4',
+ 'title': 'Video p7jnfw5hw9_ec93197892',
+ },
+ }, {
+ 'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=187060b6fd',
+ 'md5': 'cb988699a776b22d4a41b9d43acfb3ac',
+ 'info_dict': {
+ 'id': 'p7jnfw5hw9_187060b6fd',
+ 'ext': 'mp4',
+ 'title': 'Video p7jnfw5hw9_187060b6fd',
+ },
+ }]
+
+ @staticmethod
+ def sign_data(obj):
+ if obj['cf'] == 'flash':
+ salt = '2f9d6924b33a165a6d8b5d3d42f4f987'
+ items = ['cf', 'format', 'ran', 'uu', 'ver', 'vu']
+ elif obj['cf'] == 'html5':
+ salt = 'fbeh5player12c43eccf2bec3300344'
+ items = ['cf', 'ran', 'uu', 'bver', 'vu']
+ input_data = ''.join([item + obj[item] for item in items]) + salt
+ obj['sign'] = hashlib.md5(input_data.encode('utf-8')).hexdigest()
+
+ def _get_formats(self, cf, uu, vu, media_id):
+ def get_play_json(cf, timestamp):
+ data = {
+ 'cf': cf,
+ 'ver': '2.2',
+ 'bver': 'firefox44.0',
+ 'format': 'json',
+ 'uu': uu,
+ 'vu': vu,
+ 'ran': compat_str(timestamp),
+ }
+ self.sign_data(data)
+ return self._download_json(
+ 'http://api.letvcloud.com/gpc.php?' + compat_urllib_parse_urlencode(data),
+ media_id, 'Downloading playJson data for type %s' % cf)
+
+ play_json = get_play_json(cf, time.time())
+ # The server time may be different from local time
+ if play_json.get('code') == 10071:
+ play_json = get_play_json(cf, play_json['timestamp'])
+
+ if not play_json.get('data'):
+ if play_json.get('message'):
+ raise ExtractorError('Letv cloud said: %s' % play_json['message'], expected=True)
+ elif play_json.get('code'):
+ raise ExtractorError('Letv cloud returned error %d' % play_json['code'], expected=True)
+ else:
+ raise ExtractorError('Letv cloud returned an unknown error')
+
+ def b64decode(s):
+ return compat_b64decode(s).decode('utf-8')
+
+ formats = []
+ for media in play_json['data']['video_info']['media'].values():
+ play_url = media['play_url']
+ url = b64decode(play_url['main_url'])
+ decoded_url = b64decode(url_basename(url))
+ formats.append({
+ 'url': url,
+ 'ext': determine_ext(decoded_url),
+ 'format_id': str_or_none(play_url.get('vtype')),
+ 'format_note': str_or_none(play_url.get('definition')),
+ 'width': int_or_none(play_url.get('vwidth')),
+ 'height': int_or_none(play_url.get('vheight')),
+ })
+
+ return formats
+
+ def _real_extract(self, url):
+ uu_mobj = re.search(r'uu=([\w]+)', url)
+ vu_mobj = re.search(r'vu=([\w]+)', url)
+
+ if not uu_mobj or not vu_mobj:
+ raise ExtractorError('Invalid URL: %s' % url, expected=True)
+
+ uu = uu_mobj.group(1)
+ vu = vu_mobj.group(1)
+ media_id = uu + '_' + vu
+
+ formats = self._get_formats('flash', uu, vu, media_id) + self._get_formats('html5', uu, vu, media_id)
+
+ return {
+ 'id': media_id,
+ 'title': 'Video %s' % media_id,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/lefigaro.py b/yt_dlp/extractor/lefigaro.py
new file mode 100644
index 0000000..a452d87
--- /dev/null
+++ b/yt_dlp/extractor/lefigaro.py
@@ -0,0 +1,136 @@
+import json
+import math
+
+from .common import InfoExtractor
+from ..utils import (
+ InAdvancePagedList,
+ traverse_obj,
+)
+
+
+class LeFigaroVideoEmbedIE(InfoExtractor):
+ _VALID_URL = r'https?://video\.lefigaro\.fr/embed/[^?#]+/(?P<id>[\w-]+)'
+
+ _TESTS = [{
+ 'url': 'https://video.lefigaro.fr/embed/figaro/video/les-francais-ne-veulent-ils-plus-travailler-suivez-en-direct-le-club-le-figaro-idees/',
+ 'md5': 'a0c3069b7e4c4526abf0053a7713f56f',
+ 'info_dict': {
+ 'id': 'g9j7Eovo',
+ 'title': 'Les Français ne veulent-ils plus travailler ? Retrouvez Le Club Le Figaro Idées',
+ 'description': 'md5:862b8813148ba4bf10763a65a69dfe41',
+ 'upload_date': '20230216',
+ 'timestamp': 1676581615,
+ 'duration': 3076,
+ 'thumbnail': r're:^https?://[^?#]+\.(?:jpeg|jpg)',
+ 'ext': 'mp4',
+ },
+ }, {
+ 'url': 'https://video.lefigaro.fr/embed/figaro/video/intelligence-artificielle-faut-il-sen-mefier/',
+ 'md5': '319c662943dd777bab835cae1e2d73a5',
+ 'info_dict': {
+ 'id': 'LeAgybyc',
+ 'title': 'Intelligence artificielle : faut-il s’en méfier ?',
+ 'description': 'md5:249d136e3e5934a67c8cb704f8abf4d2',
+ 'upload_date': '20230124',
+ 'timestamp': 1674584477,
+ 'duration': 860,
+ 'thumbnail': r're:^https?://[^?#]+\.(?:jpeg|jpg)',
+ 'ext': 'mp4',
+ },
+ }]
+
+ _WEBPAGE_TESTS = [{
+ 'url': 'https://video.lefigaro.fr/figaro/video/suivez-en-direct-le-club-le-figaro-international-avec-philippe-gelie-9/',
+ 'md5': '6289f9489efb969e38245f31721596fe',
+ 'info_dict': {
+ 'id': 'QChnbPYA',
+ 'title': 'Où en est le couple franco-allemand ? Retrouvez Le Club Le Figaro International',
+ 'description': 'md5:6f47235b7e7c93b366fd8ebfa10572ac',
+ 'upload_date': '20230123',
+ 'timestamp': 1674503575,
+ 'duration': 3153,
+ 'thumbnail': r're:^https?://[^?#]+\.(?:jpeg|jpg)',
+ 'age_limit': 0,
+ 'ext': 'mp4',
+ },
+ }, {
+ 'url': 'https://video.lefigaro.fr/figaro/video/la-philosophe-nathalie-sarthou-lajus-est-linvitee-du-figaro-live/',
+ 'md5': 'f6df814cae53e85937621599d2967520',
+ 'info_dict': {
+ 'id': 'QJzqoNbf',
+ 'title': 'La philosophe Nathalie Sarthou-Lajus est l’invitée du Figaro Live',
+ 'description': 'md5:c586793bb72e726c83aa257f99a8c8c4',
+ 'upload_date': '20230217',
+ 'timestamp': 1676661986,
+ 'duration': 1558,
+ 'thumbnail': r're:^https?://[^?#]+\.(?:jpeg|jpg)',
+ 'age_limit': 0,
+ 'ext': 'mp4',
+ },
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ player_data = self._search_nextjs_data(
+ webpage, display_id)['props']['pageProps']['initialProps']['pageData']['playerData']
+
+ return self.url_result(
+ f'jwplatform:{player_data["videoId"]}', title=player_data.get('title'),
+ description=player_data.get('description'), thumbnail=player_data.get('poster'))
+
+
+class LeFigaroVideoSectionIE(InfoExtractor):
+ _VALID_URL = r'https?://video\.lefigaro\.fr/figaro/(?P<id>[\w-]+)/?(?:[#?]|$)'
+
+ _TESTS = [{
+ 'url': 'https://video.lefigaro.fr/figaro/le-club-le-figaro-idees/',
+ 'info_dict': {
+ 'id': 'le-club-le-figaro-idees',
+ 'title': 'Le Club Le Figaro Idées',
+ },
+ 'playlist_mincount': 14,
+ }, {
+ 'url': 'https://video.lefigaro.fr/figaro/factu/',
+ 'info_dict': {
+ 'id': 'factu',
+ 'title': 'Factu',
+ },
+ 'playlist_mincount': 519,
+ }]
+
+ _PAGE_SIZE = 20
+
+ def _get_api_response(self, display_id, page_num, note=None):
+ return self._download_json(
+ 'https://api-graphql.lefigaro.fr/graphql', display_id, note=note,
+ query={
+ 'id': 'flive-website_UpdateListPage_1fb260f996bca2d78960805ac382544186b3225f5bedb43ad08b9b8abef79af6',
+ 'variables': json.dumps({
+ 'slug': display_id,
+ 'videosLimit': self._PAGE_SIZE,
+ 'sort': 'DESC',
+ 'order': 'PUBLISHED_AT',
+ 'page': page_num,
+ }).encode(),
+ })
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ initial_response = self._get_api_response(display_id, page_num=1)['data']['playlist']
+
+ def page_func(page_num):
+ api_response = self._get_api_response(display_id, page_num + 1, note=f'Downloading page {page_num + 1}')
+
+ return [self.url_result(
+ video['embedUrl'], LeFigaroVideoEmbedIE, **traverse_obj(video, {
+ 'title': 'name',
+ 'description': 'description',
+ 'thumbnail': 'thumbnailUrl',
+ })) for video in api_response['data']['playlist']['jsonLd'][0]['itemListElement']]
+
+ entries = InAdvancePagedList(
+ page_func, math.ceil(initial_response['videoCount'] / self._PAGE_SIZE), self._PAGE_SIZE)
+
+ return self.playlist_result(entries, playlist_id=display_id, playlist_title=initial_response.get('title'))
diff --git a/yt_dlp/extractor/lego.py b/yt_dlp/extractor/lego.py
new file mode 100644
index 0000000..46fc7a9
--- /dev/null
+++ b/yt_dlp/extractor/lego.py
@@ -0,0 +1,141 @@
+import uuid
+
+from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ join_nonempty,
+ qualities,
+)
+
+
+class LEGOIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?lego\.com/(?P<locale>[a-z]{2}-[a-z]{2})/(?:[^/]+/)*videos/(?:[^/]+/)*[^/?#]+-(?P<id>[0-9a-f]{32})'
+ _TESTS = [{
+ 'url': 'http://www.lego.com/en-us/videos/themes/club/blocumentary-kawaguchi-55492d823b1b4d5e985787fa8c2973b1',
+ 'md5': 'f34468f176cfd76488767fc162c405fa',
+ 'info_dict': {
+ 'id': '55492d82-3b1b-4d5e-9857-87fa8c2973b1_en-US',
+ 'ext': 'mp4',
+ 'title': 'Blocumentary Great Creations: Akiyuki Kawaguchi',
+ 'description': 'Blocumentary Great Creations: Akiyuki Kawaguchi',
+ },
+ }, {
+ # geo-restricted but the contentUrl contain a valid url
+ 'url': 'http://www.lego.com/nl-nl/videos/themes/nexoknights/episode-20-kingdom-of-heroes-13bdc2299ab24d9685701a915b3d71e7##sp=399',
+ 'md5': 'c7420221f7ffd03ff056f9db7f8d807c',
+ 'info_dict': {
+ 'id': '13bdc229-9ab2-4d96-8570-1a915b3d71e7_nl-NL',
+ 'ext': 'mp4',
+ 'title': 'Aflevering 20: Helden van het koninkrijk',
+ 'description': 'md5:8ee499aac26d7fa8bcb0cedb7f9c3941',
+ 'age_limit': 5,
+ },
+ }, {
+ # with subtitle
+ 'url': 'https://www.lego.com/nl-nl/kids/videos/classic/creative-storytelling-the-little-puppy-aa24f27c7d5242bc86102ebdc0f24cba',
+ 'info_dict': {
+ 'id': 'aa24f27c-7d52-42bc-8610-2ebdc0f24cba_nl-NL',
+ 'ext': 'mp4',
+ 'title': 'De kleine puppy',
+ 'description': 'md5:5b725471f849348ac73f2e12cfb4be06',
+ 'age_limit': 1,
+ 'subtitles': {
+ 'nl': [{
+ 'ext': 'srt',
+ 'url': r're:^https://.+\.srt$',
+ }],
+ },
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+ _QUALITIES = {
+ 'Lowest': (64, 180, 320),
+ 'Low': (64, 270, 480),
+ 'Medium': (96, 360, 640),
+ 'High': (128, 540, 960),
+ 'Highest': (128, 720, 1280),
+ }
+
+ def _real_extract(self, url):
+ locale, video_id = self._match_valid_url(url).groups()
+ countries = [locale.split('-')[1].upper()]
+ self._initialize_geo_bypass({
+ 'countries': countries,
+ })
+
+ try:
+ item = self._download_json(
+ # https://contentfeed.services.lego.com/api/v2/item/[VIDEO_ID]?culture=[LOCALE]&contentType=Video
+ 'https://services.slingshot.lego.com/mediaplayer/v2',
+ video_id, query={
+ 'videoId': '%s_%s' % (uuid.UUID(video_id), locale),
+ }, headers=self.geo_verification_headers())
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 451:
+ self.raise_geo_restricted(countries=countries)
+ raise
+
+ video = item['Video']
+ video_id = video['Id']
+ title = video['Title']
+
+ q = qualities(['Lowest', 'Low', 'Medium', 'High', 'Highest'])
+ formats = []
+ for video_source in item.get('VideoFormats', []):
+ video_source_url = video_source.get('Url')
+ if not video_source_url:
+ continue
+ video_source_format = video_source.get('Format')
+ if video_source_format == 'F4M':
+ formats.extend(self._extract_f4m_formats(
+ video_source_url, video_id,
+ f4m_id=video_source_format, fatal=False))
+ elif video_source_format == 'M3U8':
+ formats.extend(self._extract_m3u8_formats(
+ video_source_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id=video_source_format, fatal=False))
+ else:
+ video_source_quality = video_source.get('Quality')
+ f = {
+ 'format_id': join_nonempty(video_source_format, video_source_quality),
+ 'quality': q(video_source_quality),
+ 'url': video_source_url,
+ }
+ quality = self._QUALITIES.get(video_source_quality)
+ if quality:
+ f.update({
+ 'abr': quality[0],
+ 'height': quality[1],
+ 'width': quality[2],
+ }),
+ formats.append(f)
+
+ subtitles = {}
+ sub_file_id = video.get('SubFileId')
+ if sub_file_id and sub_file_id != '00000000-0000-0000-0000-000000000000':
+ net_storage_path = video.get('NetstoragePath')
+ invariant_id = video.get('InvariantId')
+ video_file_id = video.get('VideoFileId')
+ video_version = video.get('VideoVersion')
+ if net_storage_path and invariant_id and video_file_id and video_version:
+ subtitles.setdefault(locale[:2], []).append({
+ 'url': 'https://lc-mediaplayerns-live-s.legocdn.com/public/%s/%s_%s_%s_%s_sub.srt' % (net_storage_path, invariant_id, video_file_id, locale, video_version),
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video.get('Description'),
+ 'thumbnail': video.get('GeneratedCoverImage') or video.get('GeneratedThumbnail'),
+ 'duration': int_or_none(video.get('Length')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'age_limit': int_or_none(video.get('AgeFrom')),
+ 'season': video.get('SeasonTitle'),
+ 'season_number': int_or_none(video.get('Season')) or None,
+ 'episode_number': int_or_none(video.get('Episode')) or None,
+ }
diff --git a/yt_dlp/extractor/lemonde.py b/yt_dlp/extractor/lemonde.py
new file mode 100644
index 0000000..c916791
--- /dev/null
+++ b/yt_dlp/extractor/lemonde.py
@@ -0,0 +1,56 @@
+from .common import InfoExtractor
+
+
+class LemondeIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:.+?\.)?lemonde\.fr/(?:[^/]+/)*(?P<id>[^/]+)\.html'
+ _TESTS = [{
+ 'url': 'http://www.lemonde.fr/police-justice/video/2016/01/19/comprendre-l-affaire-bygmalion-en-cinq-minutes_4849702_1653578.html',
+ 'md5': 'da120c8722d8632eec6ced937536cc98',
+ 'info_dict': {
+ 'id': 'lqm3kl',
+ 'ext': 'mp4',
+ 'title': "Comprendre l'affaire Bygmalion en 5 minutes",
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 309,
+ 'upload_date': '20160119',
+ 'timestamp': 1453194778,
+ 'uploader_id': '3pmkp',
+ },
+ }, {
+ # standard iframe embed
+ 'url': 'http://www.lemonde.fr/les-decodeurs/article/2016/10/18/tout-comprendre-du-ceta-le-petit-cousin-du-traite-transatlantique_5015920_4355770.html',
+ 'info_dict': {
+ 'id': 'uzsxms',
+ 'ext': 'mp4',
+ 'title': "CETA : quelles suites pour l'accord commercial entre l'Europe et le Canada ?",
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 325,
+ 'upload_date': '20161021',
+ 'timestamp': 1477044540,
+ 'uploader_id': '3pmkp',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://redaction.actu.lemonde.fr/societe/video/2016/01/18/calais-debut-des-travaux-de-defrichement-dans-la-jungle_4849233_3224.html',
+ 'only_matching': True,
+ }, {
+ # YouTube embeds
+ 'url': 'http://www.lemonde.fr/pixels/article/2016/12/09/pourquoi-pewdiepie-superstar-de-youtube-a-menace-de-fermer-sa-chaine_5046649_4408996.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ digiteka_url = self._proto_relative_url(self._search_regex(
+ r'url\s*:\s*(["\'])(?P<url>(?:https?://)?//(?:www\.)?(?:digiteka\.net|ultimedia\.com)/deliver/.+?)\1',
+ webpage, 'digiteka url', group='url', default=None))
+
+ if digiteka_url:
+ return self.url_result(digiteka_url, 'Digiteka')
+
+ return self.url_result(url, 'Generic')
diff --git a/yt_dlp/extractor/lenta.py b/yt_dlp/extractor/lenta.py
new file mode 100644
index 0000000..fe01bda
--- /dev/null
+++ b/yt_dlp/extractor/lenta.py
@@ -0,0 +1,51 @@
+from .common import InfoExtractor
+
+
+class LentaIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:www\.)?lenta\.ru/[^/]+/\d+/\d+/\d+/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://lenta.ru/news/2018/03/22/savshenko_go/',
+ 'info_dict': {
+ 'id': '964400',
+ 'ext': 'mp4',
+ 'title': 'Надежду Савченко задержали',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 61,
+ 'view_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # EaglePlatform iframe embed
+ 'url': 'http://lenta.ru/news/2015/03/06/navalny/',
+ 'info_dict': {
+ 'id': '227304',
+ 'ext': 'mp4',
+ 'title': 'Навальный вышел на свободу',
+ 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 87,
+ 'view_count': int,
+ 'age_limit': 0,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ video_id = self._search_regex(
+ r'vid\s*:\s*["\']?(\d+)', webpage, 'eagleplatform id',
+ default=None)
+ if video_id:
+ return self.url_result(
+ 'eagleplatform:lentaru.media.eagleplatform.com:%s' % video_id,
+ ie='EaglePlatform', video_id=video_id)
+
+ return self.url_result(url, ie='Generic')
diff --git a/yt_dlp/extractor/libraryofcongress.py b/yt_dlp/extractor/libraryofcongress.py
new file mode 100644
index 0000000..b76ca09
--- /dev/null
+++ b/yt_dlp/extractor/libraryofcongress.py
@@ -0,0 +1,148 @@
+import re
+
+from .common import InfoExtractor
+
+from ..utils import (
+ determine_ext,
+ float_or_none,
+ int_or_none,
+ parse_filesize,
+)
+
+
+class LibraryOfCongressIE(InfoExtractor):
+ IE_NAME = 'loc'
+ IE_DESC = 'Library of Congress'
+ _VALID_URL = r'https?://(?:www\.)?loc\.gov/(?:item/|today/cyberlc/feature_wdesc\.php\?.*\brec=)(?P<id>[0-9a-z_.]+)'
+ _TESTS = [{
+ # embedded via <div class="media-player"
+ 'url': 'http://loc.gov/item/90716351/',
+ 'md5': '6ec0ae8f07f86731b1b2ff70f046210a',
+ 'info_dict': {
+ 'id': '90716351',
+ 'ext': 'mp4',
+ 'title': "Pa's trip to Mars",
+ 'duration': 0,
+ 'view_count': int,
+ },
+ }, {
+ # webcast embedded via mediaObjectId
+ 'url': 'https://www.loc.gov/today/cyberlc/feature_wdesc.php?rec=5578',
+ 'info_dict': {
+ 'id': '5578',
+ 'ext': 'mp4',
+ 'title': 'Help! Preservation Training Needs Here, There & Everywhere',
+ 'duration': 3765,
+ 'view_count': int,
+ 'subtitles': 'mincount:1',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # with direct download links
+ 'url': 'https://www.loc.gov/item/78710669/',
+ 'info_dict': {
+ 'id': '78710669',
+ 'ext': 'mp4',
+ 'title': 'La vie et la passion de Jesus-Christ',
+ 'duration': 0,
+ 'view_count': int,
+ 'formats': 'mincount:4',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.loc.gov/item/ihas.200197114/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.loc.gov/item/afc1981005_afs20503/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ media_id = self._search_regex(
+ (r'id=(["\'])media-player-(?P<id>.+?)\1',
+ r'<video[^>]+id=(["\'])uuid-(?P<id>.+?)\1',
+ r'<video[^>]+data-uuid=(["\'])(?P<id>.+?)\1',
+ r'mediaObjectId\s*:\s*(["\'])(?P<id>.+?)\1',
+ r'data-tab="share-media-(?P<id>[0-9A-F]{32})"'),
+ webpage, 'media id', group='id')
+
+ data = self._download_json(
+ 'https://media.loc.gov/services/v1/media?id=%s&context=json' % media_id,
+ media_id)['mediaObject']
+
+ derivative = data['derivatives'][0]
+ media_url = derivative['derivativeUrl']
+
+ title = derivative.get('shortName') or data.get('shortName') or self._og_search_title(
+ webpage)
+
+ # Following algorithm was extracted from setAVSource js function
+ # found in webpage
+ media_url = media_url.replace('rtmp', 'https')
+
+ is_video = data.get('mediaType', 'v').lower() == 'v'
+ ext = determine_ext(media_url)
+ if ext not in ('mp4', 'mp3'):
+ media_url += '.mp4' if is_video else '.mp3'
+
+ formats = []
+ if '/vod/mp4:' in media_url:
+ formats.append({
+ 'url': media_url.replace('/vod/mp4:', '/hls-vod/media/') + '.m3u8',
+ 'format_id': 'hls',
+ 'ext': 'mp4',
+ 'protocol': 'm3u8_native',
+ 'quality': 1,
+ })
+ http_format = {
+ 'url': re.sub(r'(://[^/]+/)(?:[^/]+/)*(?:mp4|mp3):', r'\1', media_url),
+ 'format_id': 'http',
+ 'quality': 1,
+ }
+ if not is_video:
+ http_format['vcodec'] = 'none'
+ formats.append(http_format)
+
+ download_urls = set()
+ for m in re.finditer(
+ r'<option[^>]+value=(["\'])(?P<url>.+?)\1[^>]+data-file-download=[^>]+>\s*(?P<id>.+?)(?:(?:&nbsp;|\s+)\((?P<size>.+?)\))?\s*<', webpage):
+ format_id = m.group('id').lower()
+ if format_id in ('gif', 'jpeg'):
+ continue
+ download_url = m.group('url')
+ if download_url in download_urls:
+ continue
+ download_urls.add(download_url)
+ formats.append({
+ 'url': download_url,
+ 'format_id': format_id,
+ 'filesize_approx': parse_filesize(m.group('size')),
+ })
+
+ duration = float_or_none(data.get('duration'))
+ view_count = int_or_none(data.get('viewCount'))
+
+ subtitles = {}
+ cc_url = data.get('ccUrl')
+ if cc_url:
+ subtitles.setdefault('en', []).append({
+ 'url': cc_url,
+ 'ext': 'ttml',
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': self._og_search_thumbnail(webpage, default=None),
+ 'duration': duration,
+ 'view_count': view_count,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
diff --git a/yt_dlp/extractor/libsyn.py b/yt_dlp/extractor/libsyn.py
new file mode 100644
index 0000000..29bbb03
--- /dev/null
+++ b/yt_dlp/extractor/libsyn.py
@@ -0,0 +1,89 @@
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ get_element_by_class,
+ parse_duration,
+ strip_or_none,
+ unified_strdate,
+)
+
+
+class LibsynIE(InfoExtractor):
+ _VALID_URL = r'(?P<mainurl>https?://html5-player\.libsyn\.com/embed/episode/id/(?P<id>[0-9]+))'
+ _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1']
+
+ _TESTS = [{
+ 'url': 'http://html5-player.libsyn.com/embed/episode/id/6385796/',
+ 'md5': '2a55e75496c790cdeb058e7e6c087746',
+ 'info_dict': {
+ 'id': '6385796',
+ 'ext': 'mp3',
+ 'title': "Champion Minded - Developing a Growth Mindset",
+ # description fetched using another request:
+ # http://html5-player.libsyn.com/embed/getitemdetails?item_id=6385796
+ # 'description': 'In this episode, Allistair talks about the importance of developing a growth mindset, not only in sports, but in life too.',
+ 'upload_date': '20180320',
+ 'thumbnail': 're:^https?://.*',
+ },
+ }, {
+ 'url': 'https://html5-player.libsyn.com/embed/episode/id/3727166/height/75/width/200/theme/standard/direction/no/autoplay/no/autonext/no/thumbnail/no/preload/no/no_addthis/no/',
+ 'md5': '6c5cb21acd622d754d3b1a92b582ce42',
+ 'info_dict': {
+ 'id': '3727166',
+ 'ext': 'mp3',
+ 'title': 'Clients From Hell Podcast - How a Sex Toy Company Kickstarted my Freelance Career',
+ 'upload_date': '20150818',
+ 'thumbnail': 're:^https?://.*',
+ }
+ }]
+
+ def _real_extract(self, url):
+ url, video_id = self._match_valid_url(url).groups()
+ webpage = self._download_webpage(url, video_id)
+
+ data = self._parse_json(self._search_regex(
+ r'var\s+playlistItem\s*=\s*({.+?});',
+ webpage, 'JSON data block'), video_id)
+
+ episode_title = data.get('item_title') or get_element_by_class('episode-title', webpage)
+ if not episode_title:
+ self._search_regex(
+ [r'data-title="([^"]+)"', r'<title>(.+?)</title>'],
+ webpage, 'episode title')
+ episode_title = episode_title.strip()
+
+ podcast_title = strip_or_none(clean_html(self._search_regex(
+ r'<h3>([^<]+)</h3>', webpage, 'podcast title',
+ default=None) or get_element_by_class('podcast-title', webpage)))
+
+ title = '%s - %s' % (podcast_title, episode_title) if podcast_title else episode_title
+
+ formats = []
+ for k, format_id in (('media_url_libsyn', 'libsyn'), ('media_url', 'main'), ('download_link', 'download')):
+ f_url = data.get(k)
+ if not f_url:
+ continue
+ formats.append({
+ 'url': f_url,
+ 'format_id': format_id,
+ })
+
+ description = self._html_search_regex(
+ r'<p\s+id="info_text_body">(.+?)</p>', webpage,
+ 'description', default=None)
+ if description:
+ # Strip non-breaking and normal spaces
+ description = description.replace('\u00A0', ' ').strip()
+ release_date = unified_strdate(self._search_regex(
+ r'<div class="release_date">Released: ([^<]+)<',
+ webpage, 'release date', default=None) or data.get('release_date'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': data.get('thumbnail_url'),
+ 'upload_date': release_date,
+ 'duration': parse_duration(data.get('duration')),
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/lifenews.py b/yt_dlp/extractor/lifenews.py
new file mode 100644
index 0000000..919cfcb
--- /dev/null
+++ b/yt_dlp/extractor/lifenews.py
@@ -0,0 +1,234 @@
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urlparse,
+)
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ parse_iso8601,
+ remove_end,
+)
+
+
+class LifeNewsIE(InfoExtractor):
+ IE_NAME = 'life'
+ IE_DESC = 'Life.ru'
+ _VALID_URL = r'https?://life\.ru/t/[^/]+/(?P<id>\d+)'
+
+ _TESTS = [{
+ # single video embedded via video/source
+ 'url': 'https://life.ru/t/новости/98736',
+ 'md5': '77c95eaefaca216e32a76a343ad89d23',
+ 'info_dict': {
+ 'id': '98736',
+ 'ext': 'mp4',
+ 'title': 'Мужчина нашел дома архив оборонного завода',
+ 'description': 'md5:3b06b1b39b5e2bea548e403d99b8bf26',
+ 'timestamp': 1344154740,
+ 'upload_date': '20120805',
+ 'view_count': int,
+ }
+ }, {
+ # single video embedded via iframe
+ 'url': 'https://life.ru/t/новости/152125',
+ 'md5': '77d19a6f0886cd76bdbf44b4d971a273',
+ 'info_dict': {
+ 'id': '152125',
+ 'ext': 'mp4',
+ 'title': 'В Сети появилось видео захвата «Правым сектором» колхозных полей ',
+ 'description': 'Жители двух поселков Днепропетровской области не простили радикалам угрозу лишения плодородных земель и пошли в лобовую. ',
+ 'timestamp': 1427961840,
+ 'upload_date': '20150402',
+ 'view_count': int,
+ }
+ }, {
+ # two videos embedded via iframe
+ 'url': 'https://life.ru/t/новости/153461',
+ 'info_dict': {
+ 'id': '153461',
+ 'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве',
+ 'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
+ 'timestamp': 1430825520,
+ 'view_count': int,
+ },
+ 'playlist': [{
+ 'md5': '9b6ef8bc0ffa25aebc8bdb40d89ab795',
+ 'info_dict': {
+ 'id': '153461-video1',
+ 'ext': 'mp4',
+ 'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве (Видео 1)',
+ 'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
+ 'timestamp': 1430825520,
+ 'upload_date': '20150505',
+ },
+ }, {
+ 'md5': 'ebb3bf3b1ce40e878d0d628e93eb0322',
+ 'info_dict': {
+ 'id': '153461-video2',
+ 'ext': 'mp4',
+ 'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве (Видео 2)',
+ 'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
+ 'timestamp': 1430825520,
+ 'upload_date': '20150505',
+ },
+ }],
+ }, {
+ 'url': 'https://life.ru/t/новости/213035',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://life.ru/t/%D0%BD%D0%BE%D0%B2%D0%BE%D1%81%D1%82%D0%B8/153461',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://life.ru/t/новости/411489/manuel_vals_nazval_frantsiiu_tsieliu_nomier_odin_dlia_ighil',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ video_urls = re.findall(
+ r'<video[^>]+><source[^>]+src=["\'](.+?)["\']', webpage)
+
+ iframe_links = re.findall(
+ r'<iframe[^>]+src=["\']((?:https?:)?//embed\.life\.ru/(?:embed|video)/.+?)["\']',
+ webpage)
+
+ if not video_urls and not iframe_links:
+ raise ExtractorError('No media links available for %s' % video_id)
+
+ title = remove_end(
+ self._og_search_title(webpage),
+ ' - Life.ru')
+
+ description = self._og_search_description(webpage)
+
+ view_count = self._html_search_regex(
+ r'<div[^>]+class=(["\']).*?\bhits-count\b.*?\1[^>]*>\s*(?P<value>\d+)\s*</div>',
+ webpage, 'view count', fatal=False, group='value')
+
+ timestamp = parse_iso8601(self._search_regex(
+ r'<time[^>]+datetime=(["\'])(?P<value>.+?)\1',
+ webpage, 'upload date', fatal=False, group='value'))
+
+ common_info = {
+ 'description': description,
+ 'view_count': int_or_none(view_count),
+ 'timestamp': timestamp,
+ }
+
+ def make_entry(video_id, video_url, index=None):
+ cur_info = dict(common_info)
+ cur_info.update({
+ 'id': video_id if not index else '%s-video%s' % (video_id, index),
+ 'url': video_url,
+ 'title': title if not index else '%s (Видео %s)' % (title, index),
+ })
+ return cur_info
+
+ def make_video_entry(video_id, video_url, index=None):
+ video_url = compat_urlparse.urljoin(url, video_url)
+ return make_entry(video_id, video_url, index)
+
+ def make_iframe_entry(video_id, video_url, index=None):
+ video_url = self._proto_relative_url(video_url, 'http:')
+ cur_info = make_entry(video_id, video_url, index)
+ cur_info['_type'] = 'url_transparent'
+ return cur_info
+
+ if len(video_urls) == 1 and not iframe_links:
+ return make_video_entry(video_id, video_urls[0])
+
+ if len(iframe_links) == 1 and not video_urls:
+ return make_iframe_entry(video_id, iframe_links[0])
+
+ entries = []
+
+ if video_urls:
+ for num, video_url in enumerate(video_urls, 1):
+ entries.append(make_video_entry(video_id, video_url, num))
+
+ if iframe_links:
+ for num, iframe_link in enumerate(iframe_links, len(video_urls) + 1):
+ entries.append(make_iframe_entry(video_id, iframe_link, num))
+
+ playlist = common_info.copy()
+ playlist.update(self.playlist_result(entries, video_id, title, description))
+ return playlist
+
+
+class LifeEmbedIE(InfoExtractor):
+ IE_NAME = 'life:embed'
+ _VALID_URL = r'https?://embed\.life\.ru/(?:embed|video)/(?P<id>[\da-f]{32})'
+
+ _TESTS = [{
+ 'url': 'http://embed.life.ru/embed/e50c2dec2867350528e2574c899b8291',
+ 'md5': 'b889715c9e49cb1981281d0e5458fbbe',
+ 'info_dict': {
+ 'id': 'e50c2dec2867350528e2574c899b8291',
+ 'ext': 'mp4',
+ 'title': 'e50c2dec2867350528e2574c899b8291',
+ 'thumbnail': r're:http://.*\.jpg',
+ }
+ }, {
+ # with 1080p
+ 'url': 'https://embed.life.ru/video/e50c2dec2867350528e2574c899b8291',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ thumbnail = None
+ formats = []
+
+ def extract_m3u8(manifest_url):
+ formats.extend(self._extract_m3u8_formats(
+ manifest_url, video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='m3u8'))
+
+ def extract_original(original_url):
+ formats.append({
+ 'url': original_url,
+ 'format_id': determine_ext(original_url, None),
+ 'quality': 1,
+ })
+
+ playlist = self._parse_json(
+ self._search_regex(
+ r'options\s*=\s*({.+?});', webpage, 'options', default='{}'),
+ video_id).get('playlist', {})
+ if playlist:
+ master = playlist.get('master')
+ if isinstance(master, compat_str) and determine_ext(master) == 'm3u8':
+ extract_m3u8(compat_urlparse.urljoin(url, master))
+ original = playlist.get('original')
+ if isinstance(original, compat_str):
+ extract_original(original)
+ thumbnail = playlist.get('image')
+
+ # Old rendition fallback
+ if not formats:
+ for video_url in re.findall(r'"file"\s*:\s*"([^"]+)', webpage):
+ video_url = compat_urlparse.urljoin(url, video_url)
+ if determine_ext(video_url) == 'm3u8':
+ extract_m3u8(video_url)
+ else:
+ extract_original(video_url)
+
+ thumbnail = thumbnail or self._search_regex(
+ r'"image"\s*:\s*"([^"]+)', webpage, 'thumbnail', default=None)
+
+ return {
+ 'id': video_id,
+ 'title': video_id,
+ 'thumbnail': thumbnail,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/likee.py b/yt_dlp/extractor/likee.py
new file mode 100644
index 0000000..3244631
--- /dev/null
+++ b/yt_dlp/extractor/likee.py
@@ -0,0 +1,182 @@
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ js_to_json,
+ parse_iso8601,
+ str_or_none,
+ traverse_obj,
+)
+
+
+class LikeeIE(InfoExtractor):
+ IE_NAME = 'likee'
+ _VALID_URL = r'(?x)https?://(www\.)?likee\.video/(?:(?P<channel_name>[^/]+)/video/|v/)(?P<id>\w+)'
+ _TESTS = [{
+ 'url': 'https://likee.video/@huynh_hong_quan_/video/7093444807096327263',
+ 'info_dict': {
+ 'id': '7093444807096327263',
+ 'ext': 'mp4',
+ 'title': '🤴🤴🤴',
+ 'description': 'md5:9a7ebe816f0e78722ee5ed76f75983b4',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ 'uploader': 'Huỳnh Hồng Qu&acirc;n ',
+ 'artist': 'Huỳnh Hồng Qu&acirc;n ',
+ 'timestamp': 1651571320,
+ 'upload_date': '20220503',
+ 'view_count': int,
+ 'uploader_id': 'huynh_hong_quan_',
+ 'duration': 12374,
+ 'comment_count': int,
+ 'like_count': int,
+ },
+ }, {
+ 'url': 'https://likee.video/@649222262/video/7093167848050058862',
+ 'info_dict': {
+ 'id': '7093167848050058862',
+ 'ext': 'mp4',
+ 'title': 'likee video #7093167848050058862',
+ 'description': 'md5:3f971c8c6ee8a216f2b1a9094c5de99f',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ 'comment_count': int,
+ 'like_count': int,
+ 'uploader': 'Vương Phước Nhi',
+ 'timestamp': 1651506835,
+ 'upload_date': '20220502',
+ 'duration': 60024,
+ 'artist': 'Vương Phước Nhi',
+ 'uploader_id': '649222262',
+ 'view_count': int,
+ },
+ }, {
+ 'url': 'https://likee.video/@fernanda_rivasg/video/6932224568407629502',
+ 'info_dict': {
+ 'id': '6932224568407629502',
+ 'ext': 'mp4',
+ 'title': 'Un trend viejito🔥 #LIKEE #Ferlovers #trend ',
+ 'description': 'md5:c42b903a72a99d6d8b73e3d1126fbcef',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ 'comment_count': int,
+ 'duration': 9684,
+ 'uploader_id': 'fernanda_rivasg',
+ 'view_count': int,
+ 'artist': 'La Cami La✨',
+ 'like_count': int,
+ 'uploader': 'Fernanda Rivas🎶',
+ 'timestamp': 1614034308,
+ 'upload_date': '20210222',
+ },
+ }, {
+ 'url': 'https://likee.video/v/k6QcOp',
+ 'info_dict': {
+ 'id': 'k6QcOp',
+ 'ext': 'mp4',
+ 'title': '#AguaChallenge t&uacute; ya lo intentaste?😱🤩',
+ 'description': 'md5:b0cc462689d4ff2b624daa4dba7640d9',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ 'comment_count': int,
+ 'duration': 18014,
+ 'view_count': int,
+ 'timestamp': 1611694774,
+ 'like_count': int,
+ 'uploader': 'Fernanda Rivas🎶',
+ 'uploader_id': 'fernanda_rivasg',
+ 'artist': 'ʟᴇʀɪᴋ_ᴜɴɪᴄᴏʀɴ♡︎',
+ 'upload_date': '20210126',
+ },
+ }, {
+ 'url': 'https://www.likee.video/@649222262/video/7093167848050058862',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.likee.video/v/k6QcOp',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ info = self._parse_json(
+ self._search_regex(r'window\.data\s=\s({.+?});', webpage, 'video info'),
+ video_id, transform_source=js_to_json)
+ video_url = traverse_obj(info, 'video_url', ('originVideoInfo', 'video_url'))
+ if not video_url:
+ self.raise_no_formats('Video was deleted', expected=True)
+ formats = [{
+ 'format_id': 'mp4-with-watermark',
+ 'url': video_url,
+ 'height': info.get('video_height'),
+ 'width': info.get('video_width'),
+ }, {
+ 'format_id': 'mp4-without-watermark',
+ 'url': video_url.replace('_4', ''),
+ 'height': info.get('video_height'),
+ 'width': info.get('video_width'),
+ 'quality': 1,
+ }]
+ return {
+ 'id': video_id,
+ 'title': info.get('msgText'),
+ 'description': info.get('share_desc'),
+ 'view_count': int_or_none(info.get('video_count')),
+ 'like_count': int_or_none(info.get('likeCount')),
+ 'comment_count': int_or_none(info.get('comment_count')),
+ 'uploader': str_or_none(info.get('nick_name')),
+ 'uploader_id': str_or_none(info.get('likeeId')),
+ 'artist': str_or_none(traverse_obj(info, ('sound', 'owner_name'))),
+ 'timestamp': parse_iso8601(info.get('uploadDate')),
+ 'thumbnail': info.get('coverUrl'),
+ 'duration': int_or_none(traverse_obj(info, ('option_data', 'dur'))),
+ 'formats': formats,
+ }
+
+
+class LikeeUserIE(InfoExtractor):
+ IE_NAME = 'likee:user'
+ _VALID_URL = r'https?://(www\.)?likee\.video/(?P<id>[^/]+)/?$'
+ _TESTS = [{
+ 'url': 'https://likee.video/@fernanda_rivasg',
+ 'info_dict': {
+ 'id': '925638334',
+ 'title': 'fernanda_rivasg',
+ },
+ 'playlist_mincount': 500,
+ }, {
+ 'url': 'https://likee.video/@may_hmoob',
+ 'info_dict': {
+ 'id': '2943949041',
+ 'title': 'may_hmoob',
+ },
+ 'playlist_mincount': 80,
+ }]
+ _PAGE_SIZE = 50
+ _API_GET_USER_VIDEO = 'https://api.like-video.com/likee-activity-flow-micro/videoApi/getUserVideo'
+
+ def _entries(self, user_name, user_id):
+ last_post_id = ''
+ while True:
+ user_videos = self._download_json(
+ self._API_GET_USER_VIDEO, user_name,
+ data=json.dumps({
+ 'uid': user_id,
+ 'count': self._PAGE_SIZE,
+ 'lastPostId': last_post_id,
+ 'tabType': 0,
+ }).encode('utf-8'),
+ headers={'content-type': 'application/json'},
+ note=f'Get user info with lastPostId #{last_post_id}')
+ items = traverse_obj(user_videos, ('data', 'videoList'))
+ if not items:
+ break
+ for item in items:
+ last_post_id = item['postId']
+ yield self.url_result(f'https://likee.video/{user_name}/video/{last_post_id}')
+
+ def _real_extract(self, url):
+ user_name = self._match_id(url)
+ webpage = self._download_webpage(url, user_name)
+ info = self._parse_json(
+ self._search_regex(r'window\.data\s*=\s*({.+?});', webpage, 'user info'),
+ user_name, transform_source=js_to_json)
+ user_id = traverse_obj(info, ('userinfo', 'uid'))
+ return self.playlist_result(self._entries(user_name, user_id), user_id, traverse_obj(info, ('userinfo', 'user_name')))
diff --git a/yt_dlp/extractor/limelight.py b/yt_dlp/extractor/limelight.py
new file mode 100644
index 0000000..4e50f10
--- /dev/null
+++ b/yt_dlp/extractor/limelight.py
@@ -0,0 +1,358 @@
+import re
+
+from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ determine_ext,
+ float_or_none,
+ int_or_none,
+ smuggle_url,
+ try_get,
+ unsmuggle_url,
+ ExtractorError,
+)
+
+
+class LimelightBaseIE(InfoExtractor):
+ _PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s'
+
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ lm = {
+ 'Media': 'media',
+ 'Channel': 'channel',
+ 'ChannelList': 'channel_list',
+ }
+
+ def smuggle(url):
+ return smuggle_url(url, {'source_url': url})
+
+ entries = []
+ for kind, video_id in re.findall(
+ r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})',
+ webpage):
+ entries.append(cls.url_result(
+ smuggle('limelight:%s:%s' % (lm[kind], video_id)),
+ 'Limelight%s' % kind, video_id))
+ for mobj in re.finditer(
+ # As per [1] class attribute should be exactly equal to
+ # LimelightEmbeddedPlayerFlash but numerous examples seen
+ # that don't exactly match it (e.g. [2]).
+ # 1. http://support.3playmedia.com/hc/en-us/articles/227732408-Limelight-Embedding-the-Captions-Plugin-with-the-Limelight-Player-on-Your-Webpage
+ # 2. http://www.sedona.com/FacilitatorTraining2017
+ r'''(?sx)
+ <object[^>]+class=(["\'])(?:(?!\1).)*\bLimelightEmbeddedPlayerFlash\b(?:(?!\1).)*\1[^>]*>.*?
+ <param[^>]+
+ name=(["\'])flashVars\2[^>]+
+ value=(["\'])(?:(?!\3).)*(?P<kind>media|channel(?:List)?)Id=(?P<id>[a-z0-9]{32})
+ ''', webpage):
+ kind, video_id = mobj.group('kind'), mobj.group('id')
+ entries.append(cls.url_result(
+ smuggle('limelight:%s:%s' % (kind, video_id)),
+ 'Limelight%s' % kind.capitalize(), video_id))
+ # http://support.3playmedia.com/hc/en-us/articles/115009517327-Limelight-Embedding-the-Audio-Description-Plugin-with-the-Limelight-Player-on-Your-Web-Page)
+ for video_id in re.findall(
+ r'(?s)LimelightPlayerUtil\.embed\s*\(\s*{.*?\bmediaId["\']\s*:\s*["\'](?P<id>[a-z0-9]{32})',
+ webpage):
+ entries.append(cls.url_result(
+ smuggle('limelight:media:%s' % video_id),
+ LimelightMediaIE.ie_key(), video_id))
+ return entries
+
+ def _call_playlist_service(self, item_id, method, fatal=True, referer=None):
+ headers = {}
+ if referer:
+ headers['Referer'] = referer
+ try:
+ return self._download_json(
+ self._PLAYLIST_SERVICE_URL % (self._PLAYLIST_SERVICE_PATH, item_id, method),
+ item_id, 'Downloading PlaylistService %s JSON' % method,
+ fatal=fatal, headers=headers)
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 403:
+ error = self._parse_json(e.cause.response.read().decode(), item_id)['detail']['contentAccessPermission']
+ if error == 'CountryDisabled':
+ self.raise_geo_restricted()
+ raise ExtractorError(error, expected=True)
+ raise
+
+ def _extract(self, item_id, pc_method, mobile_method, referer=None):
+ pc = self._call_playlist_service(item_id, pc_method, referer=referer)
+ mobile = self._call_playlist_service(
+ item_id, mobile_method, fatal=False, referer=referer)
+ return pc, mobile
+
+ def _extract_info(self, pc, mobile, i, referer):
+ get_item = lambda x, y: try_get(x, lambda x: x[y][i], dict) or {}
+ pc_item = get_item(pc, 'playlistItems')
+ mobile_item = get_item(mobile, 'mediaList')
+ video_id = pc_item.get('mediaId') or mobile_item['mediaId']
+ title = pc_item.get('title') or mobile_item['title']
+
+ formats = []
+ urls = []
+ for stream in pc_item.get('streams', []):
+ stream_url = stream.get('url')
+ if not stream_url or stream_url in urls:
+ continue
+ if not self.get_param('allow_unplayable_formats') and stream.get('drmProtected'):
+ continue
+ urls.append(stream_url)
+ ext = determine_ext(stream_url)
+ if ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ stream_url, video_id, f4m_id='hds', fatal=False))
+ else:
+ fmt = {
+ 'url': stream_url,
+ 'abr': float_or_none(stream.get('audioBitRate')),
+ 'fps': float_or_none(stream.get('videoFrameRate')),
+ 'ext': ext,
+ }
+ width = int_or_none(stream.get('videoWidthInPixels'))
+ height = int_or_none(stream.get('videoHeightInPixels'))
+ vbr = float_or_none(stream.get('videoBitRate'))
+ if width or height or vbr:
+ fmt.update({
+ 'width': width,
+ 'height': height,
+ 'vbr': vbr,
+ })
+ else:
+ fmt['vcodec'] = 'none'
+ rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp[34]:.+)$', stream_url)
+ if rtmp:
+ format_id = 'rtmp'
+ if stream.get('videoBitRate'):
+ format_id += '-%d' % int_or_none(stream['videoBitRate'])
+ http_format_id = format_id.replace('rtmp', 'http')
+
+ CDN_HOSTS = (
+ ('delvenetworks.com', 'cpl.delvenetworks.com'),
+ ('video.llnw.net', 's2.content.video.llnw.net'),
+ )
+ for cdn_host, http_host in CDN_HOSTS:
+ if cdn_host not in rtmp.group('host').lower():
+ continue
+ http_url = 'http://%s/%s' % (http_host, rtmp.group('playpath')[4:])
+ urls.append(http_url)
+ if self._is_valid_url(http_url, video_id, http_format_id):
+ http_fmt = fmt.copy()
+ http_fmt.update({
+ 'url': http_url,
+ 'format_id': http_format_id,
+ })
+ formats.append(http_fmt)
+ break
+
+ fmt.update({
+ 'url': rtmp.group('url'),
+ 'play_path': rtmp.group('playpath'),
+ 'app': rtmp.group('app'),
+ 'ext': 'flv',
+ 'format_id': format_id,
+ })
+ formats.append(fmt)
+
+ for mobile_url in mobile_item.get('mobileUrls', []):
+ media_url = mobile_url.get('mobileUrl')
+ format_id = mobile_url.get('targetMediaPlatform')
+ if not media_url or media_url in urls:
+ continue
+ if (format_id in ('Widevine', 'SmoothStreaming')
+ and not self.get_param('allow_unplayable_formats', False)):
+ continue
+ urls.append(media_url)
+ ext = determine_ext(media_url)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ media_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id=format_id, fatal=False))
+ elif ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ stream_url, video_id, f4m_id=format_id, fatal=False))
+ else:
+ formats.append({
+ 'url': media_url,
+ 'format_id': format_id,
+ 'quality': -10,
+ 'ext': ext,
+ })
+
+ subtitles = {}
+ for flag in mobile_item.get('flags'):
+ if flag == 'ClosedCaptions':
+ closed_captions = self._call_playlist_service(
+ video_id, 'getClosedCaptionsDetailsByMediaId',
+ False, referer) or []
+ for cc in closed_captions:
+ cc_url = cc.get('webvttFileUrl')
+ if not cc_url:
+ continue
+ lang = cc.get('languageCode') or self._search_regex(r'/([a-z]{2})\.vtt', cc_url, 'lang', default='en')
+ subtitles.setdefault(lang, []).append({
+ 'url': cc_url,
+ })
+ break
+
+ get_meta = lambda x: pc_item.get(x) or mobile_item.get(x)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': get_meta('description'),
+ 'formats': formats,
+ 'duration': float_or_none(get_meta('durationInMilliseconds'), 1000),
+ 'thumbnail': get_meta('previewImageUrl') or get_meta('thumbnailImageUrl'),
+ 'subtitles': subtitles,
+ }
+
+
+class LimelightMediaIE(LimelightBaseIE):
+ IE_NAME = 'limelight'
+ _VALID_URL = r'''(?x)
+ (?:
+ limelight:media:|
+ https?://
+ (?:
+ link\.videoplatform\.limelight\.com/media/|
+ assets\.delvenetworks\.com/player/loader\.swf
+ )
+ \?.*?\bmediaId=
+ )
+ (?P<id>[a-z0-9]{32})
+ '''
+ _TESTS = [{
+ 'url': 'http://link.videoplatform.limelight.com/media/?mediaId=3ffd040b522b4485b6d84effc750cd86',
+ 'info_dict': {
+ 'id': '3ffd040b522b4485b6d84effc750cd86',
+ 'ext': 'mp4',
+ 'title': 'HaP and the HB Prince Trailer',
+ 'description': 'md5:8005b944181778e313d95c1237ddb640',
+ 'thumbnail': r're:^https?://.*\.jpeg$',
+ 'duration': 144.23,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ # video with subtitles
+ 'url': 'limelight:media:a3e00274d4564ec4a9b29b9466432335',
+ 'md5': '2fa3bad9ac321e23860ca23bc2c69e3d',
+ 'info_dict': {
+ 'id': 'a3e00274d4564ec4a9b29b9466432335',
+ 'ext': 'mp4',
+ 'title': '3Play Media Overview Video',
+ 'thumbnail': r're:^https?://.*\.jpeg$',
+ 'duration': 78.101,
+ # TODO: extract all languages that were accessible via API
+ # 'subtitles': 'mincount:9',
+ 'subtitles': 'mincount:1',
+ },
+ }, {
+ 'url': 'https://assets.delvenetworks.com/player/loader.swf?mediaId=8018a574f08d416e95ceaccae4ba0452',
+ 'only_matching': True,
+ }]
+ _PLAYLIST_SERVICE_PATH = 'media'
+
+ def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+ video_id = self._match_id(url)
+ source_url = smuggled_data.get('source_url')
+ self._initialize_geo_bypass({
+ 'countries': smuggled_data.get('geo_countries'),
+ })
+
+ pc, mobile = self._extract(
+ video_id, 'getPlaylistByMediaId',
+ 'getMobilePlaylistByMediaId', source_url)
+
+ return self._extract_info(pc, mobile, 0, source_url)
+
+
+class LimelightChannelIE(LimelightBaseIE):
+ IE_NAME = 'limelight:channel'
+ _VALID_URL = r'''(?x)
+ (?:
+ limelight:channel:|
+ https?://
+ (?:
+ link\.videoplatform\.limelight\.com/media/|
+ assets\.delvenetworks\.com/player/loader\.swf
+ )
+ \?.*?\bchannelId=
+ )
+ (?P<id>[a-z0-9]{32})
+ '''
+ _TESTS = [{
+ 'url': 'http://link.videoplatform.limelight.com/media/?channelId=ab6a524c379342f9b23642917020c082',
+ 'info_dict': {
+ 'id': 'ab6a524c379342f9b23642917020c082',
+ 'title': 'Javascript Sample Code',
+ 'description': 'Javascript Sample Code - http://www.delvenetworks.com/sample-code/playerCode-demo.html',
+ },
+ 'playlist_mincount': 3,
+ }, {
+ 'url': 'http://assets.delvenetworks.com/player/loader.swf?channelId=ab6a524c379342f9b23642917020c082',
+ 'only_matching': True,
+ }]
+ _PLAYLIST_SERVICE_PATH = 'channel'
+
+ def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+ channel_id = self._match_id(url)
+ source_url = smuggled_data.get('source_url')
+
+ pc, mobile = self._extract(
+ channel_id, 'getPlaylistByChannelId',
+ 'getMobilePlaylistWithNItemsByChannelId?begin=0&count=-1',
+ source_url)
+
+ entries = [
+ self._extract_info(pc, mobile, i, source_url)
+ for i in range(len(pc['playlistItems']))]
+
+ return self.playlist_result(
+ entries, channel_id, pc.get('title'), mobile.get('description'))
+
+
+class LimelightChannelListIE(LimelightBaseIE):
+ IE_NAME = 'limelight:channel_list'
+ _VALID_URL = r'''(?x)
+ (?:
+ limelight:channel_list:|
+ https?://
+ (?:
+ link\.videoplatform\.limelight\.com/media/|
+ assets\.delvenetworks\.com/player/loader\.swf
+ )
+ \?.*?\bchannelListId=
+ )
+ (?P<id>[a-z0-9]{32})
+ '''
+ _TESTS = [{
+ 'url': 'http://link.videoplatform.limelight.com/media/?channelListId=301b117890c4465c8179ede21fd92e2b',
+ 'info_dict': {
+ 'id': '301b117890c4465c8179ede21fd92e2b',
+ 'title': 'Website - Hero Player',
+ },
+ 'playlist_mincount': 2,
+ }, {
+ 'url': 'https://assets.delvenetworks.com/player/loader.swf?channelListId=301b117890c4465c8179ede21fd92e2b',
+ 'only_matching': True,
+ }]
+ _PLAYLIST_SERVICE_PATH = 'channel_list'
+
+ def _real_extract(self, url):
+ channel_list_id = self._match_id(url)
+
+ channel_list = self._call_playlist_service(
+ channel_list_id, 'getMobileChannelListById')
+
+ entries = [
+ self.url_result('limelight:channel:%s' % channel['id'], 'LimelightChannel')
+ for channel in channel_list['channelList']]
+
+ return self.playlist_result(
+ entries, channel_list_id, channel_list['title'])
diff --git a/yt_dlp/extractor/linkedin.py b/yt_dlp/extractor/linkedin.py
new file mode 100644
index 0000000..ad41c0e
--- /dev/null
+++ b/yt_dlp/extractor/linkedin.py
@@ -0,0 +1,272 @@
+from itertools import zip_longest
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ extract_attributes,
+ float_or_none,
+ int_or_none,
+ srt_subtitles_timecode,
+ mimetype2ext,
+ traverse_obj,
+ try_get,
+ url_or_none,
+ urlencode_postdata,
+ urljoin,
+)
+
+
+class LinkedInBaseIE(InfoExtractor):
+ _NETRC_MACHINE = 'linkedin'
+ _logged_in = False
+
+ def _perform_login(self, username, password):
+ if self._logged_in:
+ return
+
+ login_page = self._download_webpage(
+ self._LOGIN_URL, None, 'Downloading login page')
+ action_url = urljoin(self._LOGIN_URL, self._search_regex(
+ r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page, 'post url',
+ default='https://www.linkedin.com/uas/login-submit', group='url'))
+ data = self._hidden_inputs(login_page)
+ data.update({
+ 'session_key': username,
+ 'session_password': password,
+ })
+ login_submit_page = self._download_webpage(
+ action_url, None, 'Logging in',
+ data=urlencode_postdata(data))
+ error = self._search_regex(
+ r'<span[^>]+class="error"[^>]*>\s*(.+?)\s*</span>',
+ login_submit_page, 'error', default=None)
+ if error:
+ raise ExtractorError(error, expected=True)
+ LinkedInBaseIE._logged_in = True
+
+
+class LinkedInLearningBaseIE(LinkedInBaseIE):
+ _LOGIN_URL = 'https://www.linkedin.com/uas/login?trk=learning'
+
+ def _call_api(self, course_slug, fields, video_slug=None, resolution=None):
+ query = {
+ 'courseSlug': course_slug,
+ 'fields': fields,
+ 'q': 'slugs',
+ }
+ sub = ''
+ if video_slug:
+ query.update({
+ 'videoSlug': video_slug,
+ 'resolution': '_%s' % resolution,
+ })
+ sub = ' %dp' % resolution
+ api_url = 'https://www.linkedin.com/learning-api/detailedCourses'
+ if not self._get_cookies(api_url).get('JSESSIONID'):
+ self.raise_login_required()
+ return self._download_json(
+ api_url, video_slug, 'Downloading%s JSON metadata' % sub, headers={
+ 'Csrf-Token': self._get_cookies(api_url)['JSESSIONID'].value,
+ }, query=query)['elements'][0]
+
+ def _get_urn_id(self, video_data):
+ urn = video_data.get('urn')
+ if urn:
+ mobj = re.search(r'urn:li:lyndaCourse:\d+,(\d+)', urn)
+ if mobj:
+ return mobj.group(1)
+
+ def _get_video_id(self, video_data, course_slug, video_slug):
+ return self._get_urn_id(video_data) or '%s/%s' % (course_slug, video_slug)
+
+
+class LinkedInIE(LinkedInBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?linkedin\.com/posts/[^/?#]+-(?P<id>\d+)-\w{4}/?(?:[?#]|$)'
+ _TESTS = [{
+ 'url': 'https://www.linkedin.com/posts/mishalkhawaja_sendinblueviews-toronto-digitalmarketing-ugcPost-6850898786781339649-mM20',
+ 'info_dict': {
+ 'id': '6850898786781339649',
+ 'ext': 'mp4',
+ 'title': 'Mishal K. on LinkedIn: #sendinblueviews #toronto #digitalmarketing #nowhiring #sendinblue…',
+ 'description': 'md5:2998a31f6f479376dd62831f53a80f71',
+ 'uploader': 'Mishal K.',
+ 'thumbnail': 're:^https?://media.licdn.com/dms/image/.*$',
+ 'like_count': int
+ },
+ }, {
+ 'url': 'https://www.linkedin.com/posts/the-mathworks_2_what-is-mathworks-cloud-center-activity-7151241570371948544-4Gu7',
+ 'info_dict': {
+ 'id': '7151241570371948544',
+ 'ext': 'mp4',
+ 'title': 'MathWorks on LinkedIn: What Is MathWorks Cloud Center?',
+ 'description': 'md5:95f9d4eeb6337882fb47eefe13d7a40c',
+ 'uploader': 'MathWorks',
+ 'thumbnail': 're:^https?://media.licdn.com/dms/image/.*$',
+ 'like_count': int,
+ 'subtitles': 'mincount:1'
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ video_attrs = extract_attributes(self._search_regex(r'(<video[^>]+>)', webpage, 'video'))
+ sources = self._parse_json(video_attrs['data-sources'], video_id)
+ formats = [{
+ 'url': source['src'],
+ 'ext': mimetype2ext(source.get('type')),
+ 'tbr': float_or_none(source.get('data-bitrate'), scale=1000),
+ } for source in sources]
+ subtitles = {'en': [{
+ 'url': video_attrs['data-captions-url'],
+ 'ext': 'vtt',
+ }]} if url_or_none(video_attrs.get('data-captions-url')) else {}
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': self._og_search_title(webpage, default=None) or self._html_extract_title(webpage),
+ 'like_count': int_or_none(self._search_regex(
+ r'\bdata-num-reactions="(\d+)"', webpage, 'reactions', default=None)),
+ 'uploader': traverse_obj(
+ self._yield_json_ld(webpage, video_id),
+ (lambda _, v: v['@type'] == 'SocialMediaPosting', 'author', 'name', {str}), get_all=False),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'description': self._og_search_description(webpage, default=None),
+ 'subtitles': subtitles,
+ }
+
+
+class LinkedInLearningIE(LinkedInLearningBaseIE):
+ IE_NAME = 'linkedin:learning'
+ _VALID_URL = r'https?://(?:www\.)?linkedin\.com/learning/(?P<course_slug>[^/]+)/(?P<id>[^/?#]+)'
+ _TEST = {
+ 'url': 'https://www.linkedin.com/learning/programming-foundations-fundamentals/welcome?autoplay=true',
+ 'md5': 'a1d74422ff0d5e66a792deb996693167',
+ 'info_dict': {
+ 'id': '90426',
+ 'ext': 'mp4',
+ 'title': 'Welcome',
+ 'timestamp': 1430396150.82,
+ 'upload_date': '20150430',
+ },
+ }
+
+ def json2srt(self, transcript_lines, duration=None):
+ srt_data = ''
+ for line, (line_dict, next_dict) in enumerate(zip_longest(transcript_lines, transcript_lines[1:])):
+ start_time, caption = line_dict['transcriptStartAt'] / 1000, line_dict['caption']
+ end_time = next_dict['transcriptStartAt'] / 1000 if next_dict else duration or start_time + 1
+ srt_data += '%d\n%s --> %s\n%s\n\n' % (line + 1, srt_subtitles_timecode(start_time),
+ srt_subtitles_timecode(end_time),
+ caption)
+ return srt_data
+
+ def _real_extract(self, url):
+ course_slug, video_slug = self._match_valid_url(url).groups()
+
+ formats = []
+ for width, height in ((640, 360), (960, 540), (1280, 720)):
+ video_data = self._call_api(
+ course_slug, 'selectedVideo', video_slug, height)['selectedVideo']
+
+ video_url_data = video_data.get('url') or {}
+ progressive_url = video_url_data.get('progressiveUrl')
+ if progressive_url:
+ formats.append({
+ 'format_id': 'progressive-%dp' % height,
+ 'url': progressive_url,
+ 'ext': 'mp4',
+ 'height': height,
+ 'width': width,
+ 'source_preference': 1,
+ })
+
+ title = video_data['title']
+
+ audio_url = video_data.get('audio', {}).get('progressiveUrl')
+ if audio_url:
+ formats.append({
+ 'abr': 64,
+ 'ext': 'm4a',
+ 'format_id': 'audio',
+ 'url': audio_url,
+ 'vcodec': 'none',
+ })
+
+ streaming_url = video_url_data.get('streamingUrl')
+ if streaming_url:
+ formats.extend(self._extract_m3u8_formats(
+ streaming_url, video_slug, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False))
+
+ subtitles = {}
+ duration = int_or_none(video_data.get('durationInSeconds'))
+ transcript_lines = try_get(video_data, lambda x: x['transcript']['lines'], expected_type=list)
+ if transcript_lines:
+ subtitles['en'] = [{
+ 'ext': 'srt',
+ 'data': self.json2srt(transcript_lines, duration)
+ }]
+
+ return {
+ 'id': self._get_video_id(video_data, course_slug, video_slug),
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': video_data.get('defaultThumbnail'),
+ 'timestamp': float_or_none(video_data.get('publishedOn'), 1000),
+ 'duration': duration,
+ 'subtitles': subtitles,
+ # It seems like this would be correctly handled by default
+ # However, unless someone can confirm this, the old
+ # behaviour is being kept as-is
+ '_format_sort_fields': ('res', 'source_preference')
+ }
+
+
+class LinkedInLearningCourseIE(LinkedInLearningBaseIE):
+ IE_NAME = 'linkedin:learning:course'
+ _VALID_URL = r'https?://(?:www\.)?linkedin\.com/learning/(?P<id>[^/?#]+)'
+ _TEST = {
+ 'url': 'https://www.linkedin.com/learning/programming-foundations-fundamentals',
+ 'info_dict': {
+ 'id': 'programming-foundations-fundamentals',
+ 'title': 'Programming Foundations: Fundamentals',
+ 'description': 'md5:76e580b017694eb89dc8e8923fff5c86',
+ },
+ 'playlist_mincount': 61,
+ }
+
+ @classmethod
+ def suitable(cls, url):
+ return False if LinkedInLearningIE.suitable(url) else super(LinkedInLearningCourseIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ course_slug = self._match_id(url)
+ course_data = self._call_api(course_slug, 'chapters,description,title')
+
+ entries = []
+ for chapter_number, chapter in enumerate(course_data.get('chapters', []), 1):
+ chapter_title = chapter.get('title')
+ chapter_id = self._get_urn_id(chapter)
+ for video in chapter.get('videos', []):
+ video_slug = video.get('slug')
+ if not video_slug:
+ continue
+ entries.append({
+ '_type': 'url_transparent',
+ 'id': self._get_video_id(video, course_slug, video_slug),
+ 'title': video.get('title'),
+ 'url': 'https://www.linkedin.com/learning/%s/%s' % (course_slug, video_slug),
+ 'chapter': chapter_title,
+ 'chapter_number': chapter_number,
+ 'chapter_id': chapter_id,
+ 'ie_key': LinkedInLearningIE.ie_key(),
+ })
+
+ return self.playlist_result(
+ entries, course_slug,
+ course_data.get('title'),
+ course_data.get('description'))
diff --git a/yt_dlp/extractor/liputan6.py b/yt_dlp/extractor/liputan6.py
new file mode 100644
index 0000000..c4477b9
--- /dev/null
+++ b/yt_dlp/extractor/liputan6.py
@@ -0,0 +1,64 @@
+from .common import InfoExtractor
+from .vidio import VidioIE
+
+
+class Liputan6IE(InfoExtractor):
+ _VALID_URL = r'https?://www\.liputan6\.com/\w+/read/\d+/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://www.liputan6.com/news/read/5007510/video-duh-perawat-rs-di-medan-diduga-salah-berikan-obat-ke-pasien',
+ 'info_dict': {
+ 'id': '7082548',
+ 'ext': 'mp4',
+ 'title': 'Duh, Perawat RS di Medan Diduga Salah Berikan Obat Ke Pasien',
+ 'thumbnail': 'https://thumbor.prod.vidiocdn.com/lOz5pStm9X-jjlTa_VQQUelOPtw=/640x360/filters:quality(70)/vidio-web-prod-video/uploads/video/image/7082548/duh-perawat-rs-di-medan-diduga-salah-berikan-obat-ke-pasien-ca1125.jpg',
+ 'channel_id': '185693',
+ 'uploader': 'Liputan6.com',
+ 'duration': 104,
+ 'uploader_url': 'https://www.vidio.com/@liputan6',
+ 'description': 'md5:3b58ecff10ec3a41d4304cf98228435a',
+ 'timestamp': 1657159427,
+ 'uploader_id': 'liputan6',
+ 'display_id': 'video-duh-perawat-rs-di-medan-diduga-salah-berikan-obat-ke-pasien',
+ 'like_count': int,
+ 'view_count': int,
+ 'comment_count': int,
+ 'tags': ['perawat indonesia', 'rumah sakit', 'Medan', 'viral hari ini', 'viral', 'enamplus'],
+ 'channel': 'Default Channel',
+ 'dislike_count': int,
+ 'upload_date': '20220707'
+ }
+ }, {
+ 'url': 'https://www.liputan6.com/tv/read/5007719/video-program-minyakita-minyak-goreng-kemasan-sederhana-seharga-rp-14-ribu',
+ 'info_dict': {
+ 'id': '7082543',
+ 'ext': 'mp4',
+ 'title': 'md5:ecb7b3c598b97798bfd0eb50c6233b8c',
+ 'channel_id': '604054',
+ 'dislike_count': int,
+ 'comment_count': int,
+ 'timestamp': 1657159211,
+ 'upload_date': '20220707',
+ 'tags': ['minyakita', 'minyak goreng', 'liputan 6', 'sctv'],
+ 'uploader_url': 'https://www.vidio.com/@sctv',
+ 'display_id': 'video-program-minyakita-minyak-goreng-kemasan-sederhana-seharga-rp-14-ribu',
+ 'like_count': int,
+ 'uploader': 'SCTV',
+ 'description': 'md5:6c374d82589b71fb98b3d550edb6873f',
+ 'duration': 99,
+ 'uploader_id': 'sctv',
+ 'thumbnail': 'https://thumbor.prod.vidiocdn.com/AAIOjz-64hKojjdw5hr0oNNEeJg=/640x360/filters:quality(70)/vidio-web-prod-video/uploads/video/image/7082543/program-minyakita-minyak-goreng-kemasan-sederhana-seharga-rp14-ribu-_-liputan-6-7d9fbb.jpg',
+ 'channel': 'Liputan 6 Pagi',
+ 'view_count': int,
+ }
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ json_data = self._search_json(
+ r'window.kmklabs.gtm\s*=', webpage, 'json_data', display_id)
+ video_id = json_data['videos']['video_1']['video_id']
+
+ return self.url_result(
+ f'https://www.vidio.com/watch/{video_id}-{display_id}', ie=VidioIE, video_id=display_id)
diff --git a/yt_dlp/extractor/listennotes.py b/yt_dlp/extractor/listennotes.py
new file mode 100644
index 0000000..4ebc9be
--- /dev/null
+++ b/yt_dlp/extractor/listennotes.py
@@ -0,0 +1,86 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ extract_attributes,
+ get_element_by_class,
+ get_element_html_by_id,
+ get_element_text_and_html_by_tag,
+ parse_duration,
+ strip_or_none,
+ traverse_obj,
+ try_call,
+)
+
+
+class ListenNotesIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?listennotes\.com/podcasts/[^/]+/[^/]+-(?P<id>.+)/'
+ _TESTS = [{
+ 'url': 'https://www.listennotes.com/podcasts/thriving-on-overload/tim-oreilly-on-noticing-KrDgvNb_u1n/',
+ 'md5': '5b91a32f841e5788fb82b72a1a8af7f7',
+ 'info_dict': {
+ 'id': 'KrDgvNb_u1n',
+ 'ext': 'mp3',
+ 'title': 'md5:32236591a921adf17bbdbf0441b6c0e9',
+ 'description': 'md5:c581ed197eeddcee55a67cdb547c8cbd',
+ 'duration': 2148.0,
+ 'channel': 'Thriving on Overload',
+ 'channel_id': 'ed84wITivxF',
+ 'episode_id': 'e1312583fa7b4e24acfbb5131050be00',
+ 'thumbnail': 'https://production.listennotes.com/podcasts/thriving-on-overload-ross-dawson-1wb_KospA3P-ed84wITivxF.300x300.jpg',
+ 'channel_url': 'https://www.listennotes.com/podcasts/thriving-on-overload-ross-dawson-ed84wITivxF/',
+ 'cast': ['Tim O’Reilly', 'Cookie Monster', 'Lao Tzu', 'Wallace Steven', 'Eric Raymond', 'Christine Peterson', 'John Maynard Keyne', 'Ross Dawson'],
+ }
+ }, {
+ 'url': 'https://www.listennotes.com/podcasts/ask-noah-show/episode-177-wireguard-with-lwEA3154JzG/',
+ 'md5': '62fb4ffe7fc525632a1138bf72a5ce53',
+ 'info_dict': {
+ 'id': 'lwEA3154JzG',
+ 'ext': 'mp3',
+ 'title': 'Episode 177: WireGuard with Jason Donenfeld',
+ 'description': 'md5:24744f36456a3e95f83c1193a3458594',
+ 'duration': 3861.0,
+ 'channel': 'Ask Noah Show',
+ 'channel_id': '4DQTzdS5-j7',
+ 'episode_id': '8c8954b95e0b4859ad1eecec8bf6d3a4',
+ 'channel_url': 'https://www.listennotes.com/podcasts/ask-noah-show-noah-j-chelliah-4DQTzdS5-j7/',
+ 'thumbnail': 'https://production.listennotes.com/podcasts/ask-noah-show-noah-j-chelliah-cfbRUw9Gs3F-4DQTzdS5-j7.300x300.jpg',
+ 'cast': ['noah showlink', 'noah show', 'noah dashboard', 'jason donenfeld'],
+ }
+ }]
+
+ def _clean_description(self, description):
+ return clean_html(re.sub(r'(</?(div|p)>\s*)+', '<br/><br/>', description or ''))
+
+ def _real_extract(self, url):
+ audio_id = self._match_id(url)
+ webpage = self._download_webpage(url, audio_id)
+ data = self._search_json(
+ r'<script id="original-content"[^>]+\btype="application/json">', webpage, 'content', audio_id)
+ data.update(extract_attributes(get_element_html_by_id(
+ r'episode-play-button-toolbar|episode-no-play-button-toolbar', webpage, escape_value=False)))
+
+ duration, description = self._search_regex(
+ r'(?P<duration>[\d:]+)\s*-\s*(?P<description>.+)',
+ self._html_search_meta(['og:description', 'description', 'twitter:description'], webpage),
+ 'description', fatal=False, group=('duration', 'description')) or (None, None)
+
+ return {
+ 'id': audio_id,
+ 'url': data['audio'],
+ 'title': (data.get('data-title')
+ or try_call(lambda: get_element_text_and_html_by_tag('h1', webpage)[0])
+ or self._html_search_meta(('og:title', 'title', 'twitter:title'), webpage, 'title')),
+ 'description': (self._clean_description(get_element_by_class('ln-text-p', webpage))
+ or strip_or_none(description)),
+ 'duration': parse_duration(traverse_obj(data, 'audio_length', 'data-duration') or duration),
+ 'episode_id': traverse_obj(data, 'uuid', 'data-episode-uuid'),
+ **traverse_obj(data, {
+ 'thumbnail': 'data-image',
+ 'channel': 'data-channel-title',
+ 'cast': ('nlp_entities', ..., 'name'),
+ 'channel_url': 'channel_url',
+ 'channel_id': 'channel_short_uuid',
+ })
+ }
diff --git a/yt_dlp/extractor/litv.py b/yt_dlp/extractor/litv.py
new file mode 100644
index 0000000..1003fb2
--- /dev/null
+++ b/yt_dlp/extractor/litv.py
@@ -0,0 +1,148 @@
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ smuggle_url,
+ traverse_obj,
+ try_call,
+ unsmuggle_url,
+)
+
+
+class LiTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?litv\.tv/(?:vod|promo)/[^/]+/(?:content\.do)?\?.*?\b(?:content_)?id=(?P<id>[^&]+)'
+
+ _URL_TEMPLATE = 'https://www.litv.tv/vod/%s/content.do?content_id=%s'
+
+ _TESTS = [{
+ 'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1',
+ 'info_dict': {
+ 'id': 'VOD00041606',
+ 'title': '花千骨',
+ },
+ 'playlist_count': 51, # 50 episodes + 1 trailer
+ }, {
+ 'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1',
+ 'md5': 'b90ff1e9f1d8f5cfcd0a44c3e2b34c7a',
+ 'info_dict': {
+ 'id': 'VOD00041610',
+ 'ext': 'mp4',
+ 'title': '花千骨第1集',
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'description': '《花千骨》陸劇線上看。十六年前,平靜的村莊內,一名女嬰隨異相出生,途徑此地的蜀山掌門清虛道長算出此女命運非同一般,她體內散發的異香易招惹妖魔。一念慈悲下,他在村莊周邊設下結界阻擋妖魔入侵,讓其年滿十六後去蜀山,並賜名花千骨。',
+ 'categories': ['奇幻', '愛情', '中國', '仙俠'],
+ 'episode': 'Episode 1',
+ 'episode_number': 1,
+ },
+ 'params': {
+ 'noplaylist': True,
+ },
+ 'skip': 'Georestricted to Taiwan',
+ }, {
+ 'url': 'https://www.litv.tv/promo/miyuezhuan/?content_id=VOD00044841&',
+ 'md5': '88322ea132f848d6e3e18b32a832b918',
+ 'info_dict': {
+ 'id': 'VOD00044841',
+ 'ext': 'mp4',
+ 'title': '芈月傳第1集 霸星芈月降世楚國',
+ 'description': '楚威王二年,太史令唐昧夜觀星象,發現霸星即將現世。王后得知霸星的預言後,想盡辦法不讓孩子順利出生,幸得莒姬相護化解危機。沒想到眾人期待下出生的霸星卻是位公主,楚威王對此失望至極。楚王后命人將女嬰丟棄河中,居然奇蹟似的被少司命像攔下,楚威王認為此女非同凡響,為她取名芈月。',
+ },
+ 'skip': 'No longer exists',
+ }]
+
+ def _extract_playlist(self, playlist_data, content_type):
+ all_episodes = [
+ self.url_result(smuggle_url(
+ self._URL_TEMPLATE % (content_type, episode['contentId']),
+ {'force_noplaylist': True})) # To prevent infinite recursion
+ for episode in traverse_obj(playlist_data, ('seasons', ..., 'episode', lambda _, v: v['contentId']))]
+
+ return self.playlist_result(all_episodes, playlist_data['contentId'], playlist_data.get('title'))
+
+ def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ if self._search_regex(
+ r'(?i)<meta\s[^>]*http-equiv="refresh"\s[^>]*content="[0-9]+;\s*url=https://www\.litv\.tv/"',
+ webpage, 'meta refresh redirect', default=False, group=0):
+ raise ExtractorError('No such content found', expected=True)
+
+ program_info = self._parse_json(self._search_regex(
+ r'var\s+programInfo\s*=\s*([^;]+)', webpage, 'VOD data', default='{}'),
+ video_id)
+
+ # In browsers `getProgramInfo` request is always issued. Usually this
+ # endpoint gives the same result as the data embedded in the webpage.
+ # If, for some reason, there are no embedded data, we do an extra request.
+ if 'assetId' not in program_info:
+ program_info = self._download_json(
+ 'https://www.litv.tv/vod/ajax/getProgramInfo', video_id,
+ query={'contentId': video_id},
+ headers={'Accept': 'application/json'})
+
+ series_id = program_info['seriesId']
+ if self._yes_playlist(series_id, video_id, smuggled_data):
+ playlist_data = self._download_json(
+ 'https://www.litv.tv/vod/ajax/getSeriesTree', video_id,
+ query={'seriesId': series_id}, headers={'Accept': 'application/json'})
+ return self._extract_playlist(playlist_data, program_info['contentType'])
+
+ video_data = self._parse_json(self._search_regex(
+ r'uiHlsUrl\s*=\s*testBackendData\(([^;]+)\);',
+ webpage, 'video data', default='{}'), video_id)
+ if not video_data:
+ payload = {'assetId': program_info['assetId']}
+ puid = try_call(lambda: self._get_cookies('https://www.litv.tv/')['PUID'].value)
+ if puid:
+ payload.update({
+ 'type': 'auth',
+ 'puid': puid,
+ })
+ endpoint = 'getUrl'
+ else:
+ payload.update({
+ 'watchDevices': program_info['watchDevices'],
+ 'contentType': program_info['contentType'],
+ })
+ endpoint = 'getMainUrlNoAuth'
+ video_data = self._download_json(
+ f'https://www.litv.tv/vod/ajax/{endpoint}', video_id,
+ data=json.dumps(payload).encode('utf-8'),
+ headers={'Content-Type': 'application/json'})
+
+ if not video_data.get('fullpath'):
+ error_msg = video_data.get('errorMessage')
+ if error_msg == 'vod.error.outsideregionerror':
+ self.raise_geo_restricted('This video is available in Taiwan only')
+ if error_msg:
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, error_msg), expected=True)
+ raise ExtractorError('Unexpected result from %s' % self.IE_NAME)
+
+ formats = self._extract_m3u8_formats(
+ video_data['fullpath'], video_id, ext='mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls')
+ for a_format in formats:
+ # LiTV HLS segments doesn't like compressions
+ a_format.setdefault('http_headers', {})['Accept-Encoding'] = 'identity'
+
+ title = program_info['title'] + program_info.get('secondaryMark', '')
+ description = program_info.get('description')
+ thumbnail = program_info.get('imageFile')
+ categories = [item['name'] for item in program_info.get('category', [])]
+ episode = int_or_none(program_info.get('episode'))
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'categories': categories,
+ 'episode_number': episode,
+ }
diff --git a/yt_dlp/extractor/livejournal.py b/yt_dlp/extractor/livejournal.py
new file mode 100644
index 0000000..96bd8b2
--- /dev/null
+++ b/yt_dlp/extractor/livejournal.py
@@ -0,0 +1,39 @@
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import int_or_none
+
+
+class LiveJournalIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[^.]+\.)?livejournal\.com/video/album/\d+.+?\bid=(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://andrei-bt.livejournal.com/video/album/407/?mode=view&id=51272',
+ 'md5': 'adaf018388572ced8a6f301ace49d4b2',
+ 'info_dict': {
+ 'id': '1263729',
+ 'ext': 'mp4',
+ 'title': 'Истребители против БПЛА',
+ 'upload_date': '20190624',
+ 'timestamp': 1561406715,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ record = self._parse_json(self._search_regex(
+ r'Site\.page\s*=\s*({.+?});', webpage,
+ 'page data'), video_id)['video']['record']
+ storage_id = compat_str(record['storageid'])
+ title = record.get('name')
+ if title:
+ # remove filename extension(.mp4, .mov, etc...)
+ title = title.rsplit('.', 1)[0]
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': record.get('thumbnail'),
+ 'timestamp': int_or_none(record.get('timecreate')),
+ 'url': 'eagleplatform:vc.videos.livejournal.com:' + storage_id,
+ 'ie_key': 'EaglePlatform',
+ }
diff --git a/yt_dlp/extractor/livestream.py b/yt_dlp/extractor/livestream.py
new file mode 100644
index 0000000..a05a0fa
--- /dev/null
+++ b/yt_dlp/extractor/livestream.py
@@ -0,0 +1,388 @@
+import itertools
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str, compat_urlparse
+from ..utils import (
+ determine_ext,
+ find_xpath_attr,
+ float_or_none,
+ int_or_none,
+ orderedSet,
+ parse_iso8601,
+ traverse_obj,
+ update_url_query,
+ xpath_attr,
+ xpath_text,
+ xpath_with_ns,
+)
+
+
+class LivestreamIE(InfoExtractor):
+ IE_NAME = 'livestream'
+ _VALID_URL = r'''(?x)
+ https?://(?:new\.)?livestream\.com/
+ (?:accounts/(?P<account_id>\d+)|(?P<account_name>[^/]+))
+ (?:/events/(?P<event_id>\d+)|/(?P<event_name>[^/]+))?
+ (?:/videos/(?P<id>\d+))?
+ '''
+ _EMBED_REGEX = [r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"']
+
+ _TESTS = [{
+ 'url': 'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
+ 'md5': '7876c5f5dc3e711b6b73acce4aac1527',
+ 'info_dict': {
+ 'id': '4719370',
+ 'ext': 'mp4',
+ 'title': 'Live from Webster Hall NYC',
+ 'timestamp': 1350008072,
+ 'upload_date': '20121012',
+ 'duration': 5968.0,
+ 'like_count': int,
+ 'view_count': int,
+ 'comment_count': int,
+ 'thumbnail': r're:^http://.*\.jpg$'
+ }
+ }, {
+ 'url': 'https://livestream.com/coheedandcambria/websterhall',
+ 'info_dict': {
+ 'id': '1585861',
+ 'title': 'Live From Webster Hall'
+ },
+ 'playlist_mincount': 1,
+ }, {
+ 'url': 'https://livestream.com/dayananda/events/7954027',
+ 'info_dict': {
+ 'title': 'Live from Mevo',
+ 'id': '7954027',
+ },
+ 'playlist_mincount': 4,
+ }, {
+ 'url': 'https://livestream.com/accounts/82',
+ 'info_dict': {
+ 'id': '253978',
+ 'view_count': int,
+ 'title': 'trsr',
+ 'comment_count': int,
+ 'like_count': int,
+ 'upload_date': '20120306',
+ 'timestamp': 1331042383,
+ 'thumbnail': 'http://img.new.livestream.com/videos/0000000000000372/cacbeed6-fb68-4b5e-ad9c-e148124e68a9_640x427.jpg',
+ 'duration': 15.332,
+ 'ext': 'mp4'
+ }
+ }, {
+ 'url': 'https://new.livestream.com/accounts/362/events/3557232/videos/67864563/player?autoPlay=false&height=360&mute=false&width=640',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://livestream.com/bsww/concacafbeachsoccercampeonato2015',
+ 'only_matching': True,
+ }]
+ _API_URL_TEMPLATE = 'http://livestream.com/api/accounts/%s/events/%s'
+
+ def _parse_smil_formats_and_subtitles(
+ self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
+ base_ele = find_xpath_attr(
+ smil, self._xpath_ns('.//meta', namespace), 'name', 'httpBase')
+ base = base_ele.get('content') if base_ele is not None else 'http://livestreamvod-f.akamaihd.net/'
+
+ formats = []
+ video_nodes = smil.findall(self._xpath_ns('.//video', namespace))
+
+ for vn in video_nodes:
+ tbr = int_or_none(vn.attrib.get('system-bitrate'), 1000)
+ furl = (
+ update_url_query(compat_urlparse.urljoin(base, vn.attrib['src']), {
+ 'v': '3.0.3',
+ 'fp': 'WIN% 14,0,0,145',
+ }))
+ if 'clipBegin' in vn.attrib:
+ furl += '&ssek=' + vn.attrib['clipBegin']
+ formats.append({
+ 'url': furl,
+ 'format_id': 'smil_%d' % tbr,
+ 'ext': 'flv',
+ 'tbr': tbr,
+ 'preference': -1000, # Strictly inferior than all other formats?
+ })
+ return formats, {}
+
+ def _extract_video_info(self, video_data):
+ video_id = compat_str(video_data['id'])
+
+ FORMAT_KEYS = (
+ ('sd', 'progressive_url'),
+ ('hd', 'progressive_url_hd'),
+ )
+
+ formats = []
+ for format_id, key in FORMAT_KEYS:
+ video_url = video_data.get(key)
+ if video_url:
+ ext = determine_ext(video_url)
+ if ext == 'm3u8':
+ continue
+ bitrate = int_or_none(self._search_regex(
+ r'(\d+)\.%s' % ext, video_url, 'bitrate', default=None))
+ formats.append({
+ 'url': video_url,
+ 'format_id': format_id,
+ 'tbr': bitrate,
+ 'ext': ext,
+ })
+
+ smil_url = video_data.get('smil_url')
+ if smil_url:
+ formats.extend(self._extract_smil_formats(smil_url, video_id, fatal=False))
+
+ m3u8_url = video_data.get('m3u8_url')
+ if m3u8_url:
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+
+ f4m_url = video_data.get('f4m_url')
+ if f4m_url:
+ formats.extend(self._extract_f4m_formats(
+ f4m_url, video_id, f4m_id='hds', fatal=False))
+
+ comments = [{
+ 'author_id': comment.get('author_id'),
+ 'author': comment.get('author', {}).get('full_name'),
+ 'id': comment.get('id'),
+ 'text': comment['text'],
+ 'timestamp': parse_iso8601(comment.get('created_at')),
+ } for comment in video_data.get('comments', {}).get('data', [])]
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': video_data['caption'],
+ 'description': video_data.get('description'),
+ 'thumbnail': video_data.get('thumbnail_url'),
+ 'duration': float_or_none(video_data.get('duration'), 1000),
+ 'timestamp': parse_iso8601(video_data.get('publish_at')),
+ 'like_count': video_data.get('likes', {}).get('total'),
+ 'comment_count': video_data.get('comments', {}).get('total'),
+ 'view_count': video_data.get('views'),
+ 'comments': comments,
+ }
+
+ def _extract_stream_info(self, stream_info):
+ broadcast_id = compat_str(stream_info['broadcast_id'])
+ is_live = stream_info.get('is_live')
+
+ formats = []
+ smil_url = stream_info.get('play_url')
+ if smil_url:
+ formats.extend(self._extract_smil_formats(smil_url, broadcast_id))
+
+ m3u8_url = stream_info.get('m3u8_url')
+ if m3u8_url:
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, broadcast_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+
+ rtsp_url = stream_info.get('rtsp_url')
+ if rtsp_url:
+ formats.append({
+ 'url': rtsp_url,
+ 'format_id': 'rtsp',
+ })
+
+ return {
+ 'id': broadcast_id,
+ 'formats': formats,
+ 'title': stream_info['stream_title'],
+ 'thumbnail': stream_info.get('thumbnail_url'),
+ 'is_live': is_live,
+ }
+
+ def _generate_event_playlist(self, event_data):
+ event_id = compat_str(event_data['id'])
+ account_id = compat_str(event_data['owner_account_id'])
+ feed_root_url = self._API_URL_TEMPLATE % (account_id, event_id) + '/feed.json'
+
+ stream_info = event_data.get('stream_info')
+ if stream_info:
+ return self._extract_stream_info(stream_info)
+
+ last_video = None
+ for i in itertools.count(1):
+ if last_video is None:
+ info_url = feed_root_url
+ else:
+ info_url = '{root}?&id={id}&newer=-1&type=video'.format(
+ root=feed_root_url, id=last_video)
+ videos_info = self._download_json(
+ info_url, event_id, f'Downloading page {i}')['data']
+ videos_info = [v['data'] for v in videos_info if v['type'] == 'video']
+ if not videos_info:
+ break
+ for v in videos_info:
+ v_id = compat_str(v['id'])
+ yield self.url_result(
+ f'http://livestream.com/accounts/{account_id}/events/{event_id}/videos/{v_id}',
+ LivestreamIE, v_id, v.get('caption'))
+ last_video = videos_info[-1]['id']
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id')
+ event = mobj.group('event_id') or mobj.group('event_name')
+ account = mobj.group('account_id') or mobj.group('account_name')
+ api_url = f'http://livestream.com/api/accounts/{account}'
+
+ if video_id:
+ video_data = self._download_json(
+ f'{api_url}/events/{event}/videos/{video_id}', video_id)
+ return self._extract_video_info(video_data)
+ elif event:
+ event_data = self._download_json(f'{api_url}/events/{event}', None)
+ return self.playlist_result(
+ self._generate_event_playlist(event_data), str(event_data['id']), event_data['full_name'])
+
+ account_data = self._download_json(api_url, None)
+ items = traverse_obj(account_data, (('upcoming_events', 'past_events'), 'data', ...))
+ return self.playlist_result(
+ itertools.chain.from_iterable(map(self._generate_event_playlist, items)),
+ account_data.get('id'), account_data.get('full_name'))
+
+
+# The original version of Livestream uses a different system
+class LivestreamOriginalIE(InfoExtractor):
+ IE_NAME = 'livestream:original'
+ _VALID_URL = r'''(?x)https?://original\.livestream\.com/
+ (?P<user>[^/\?#]+)(?:/(?P<type>video|folder)
+ (?:(?:\?.*?Id=|/)(?P<id>.*?)(&|$))?)?
+ '''
+ _TESTS = [{
+ 'url': 'http://original.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
+ 'info_dict': {
+ 'id': 'pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
+ 'ext': 'mp4',
+ 'title': 'Spark 1 (BitCoin) with Cameron Winklevoss & Tyler Winklevoss of Winklevoss Capital',
+ 'duration': 771.301,
+ 'view_count': int,
+ },
+ }, {
+ 'url': 'https://original.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3',
+ 'info_dict': {
+ 'id': 'a07bf706-d0e4-4e75-a747-b021d84f2fd3',
+ },
+ 'playlist_mincount': 4,
+ }, {
+ # live stream
+ 'url': 'http://original.livestream.com/znsbahamas',
+ 'only_matching': True,
+ }]
+
+ def _extract_video_info(self, user, video_id):
+ api_url = 'http://x%sx.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id=%s' % (user, video_id)
+ info = self._download_xml(api_url, video_id)
+
+ item = info.find('channel').find('item')
+ title = xpath_text(item, 'title')
+ media_ns = {'media': 'http://search.yahoo.com/mrss'}
+ thumbnail_url = xpath_attr(
+ item, xpath_with_ns('media:thumbnail', media_ns), 'url')
+ duration = float_or_none(xpath_attr(
+ item, xpath_with_ns('media:content', media_ns), 'duration'))
+ ls_ns = {'ls': 'http://api.channel.livestream.com/2.0'}
+ view_count = int_or_none(xpath_text(
+ item, xpath_with_ns('ls:viewsCount', ls_ns)))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail_url,
+ 'duration': duration,
+ 'view_count': view_count,
+ }
+
+ def _extract_video_formats(self, video_data, video_id):
+ formats = []
+
+ progressive_url = video_data.get('progressiveUrl')
+ if progressive_url:
+ formats.append({
+ 'url': progressive_url,
+ 'format_id': 'http',
+ })
+
+ m3u8_url = video_data.get('httpUrl')
+ if m3u8_url:
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+
+ rtsp_url = video_data.get('rtspUrl')
+ if rtsp_url:
+ formats.append({
+ 'url': rtsp_url,
+ 'format_id': 'rtsp',
+ })
+
+ return formats
+
+ def _extract_folder(self, url, folder_id):
+ webpage = self._download_webpage(url, folder_id)
+ paths = orderedSet(re.findall(
+ r'''(?x)(?:
+ <li\s+class="folder">\s*<a\s+href="|
+ <a\s+href="(?=https?://livestre\.am/)
+ )([^"]+)"''', webpage))
+
+ entries = [{
+ '_type': 'url',
+ 'url': compat_urlparse.urljoin(url, p),
+ } for p in paths]
+
+ return self.playlist_result(entries, folder_id)
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ user = mobj.group('user')
+ url_type = mobj.group('type')
+ content_id = mobj.group('id')
+ if url_type == 'folder':
+ return self._extract_folder(url, content_id)
+ else:
+ # this url is used on mobile devices
+ stream_url = 'http://x%sx.api.channel.livestream.com/3.0/getstream.json' % user
+ info = {}
+ if content_id:
+ stream_url += '?id=%s' % content_id
+ info = self._extract_video_info(user, content_id)
+ else:
+ content_id = user
+ webpage = self._download_webpage(url, content_id)
+ info = {
+ 'title': self._og_search_title(webpage),
+ 'description': self._og_search_description(webpage),
+ 'thumbnail': self._search_regex(r'channelLogo\.src\s*=\s*"([^"]+)"', webpage, 'thumbnail', None),
+ }
+ video_data = self._download_json(stream_url, content_id)
+ is_live = video_data.get('isLive')
+ info.update({
+ 'id': content_id,
+ 'title': info['title'],
+ 'formats': self._extract_video_formats(video_data, content_id),
+ 'is_live': is_live,
+ })
+ return info
+
+
+# The server doesn't support HEAD request, the generic extractor can't detect
+# the redirection
+class LivestreamShortenerIE(InfoExtractor):
+ IE_NAME = 'livestream:shortener'
+ IE_DESC = False # Do not list
+ _VALID_URL = r'https?://livestre\.am/(?P<id>.+)'
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ id = mobj.group('id')
+ webpage = self._download_webpage(url, id)
+
+ return self.url_result(self._og_search_url(webpage))
diff --git a/yt_dlp/extractor/livestreamfails.py b/yt_dlp/extractor/livestreamfails.py
new file mode 100644
index 0000000..0df6384
--- /dev/null
+++ b/yt_dlp/extractor/livestreamfails.py
@@ -0,0 +1,37 @@
+from .common import InfoExtractor
+from ..utils import format_field, traverse_obj, unified_timestamp
+
+
+class LivestreamfailsIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?livestreamfails\.com/(?:clip|post)/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://livestreamfails.com/clip/139200',
+ 'md5': '8a03aea1a46e94a05af6410337463102',
+ 'info_dict': {
+ 'id': '139200',
+ 'ext': 'mp4',
+ 'display_id': 'ConcernedLitigiousSalmonPeteZaroll-O8yo9W2L8OZEKhV2',
+ 'title': 'Streamer jumps off a trampoline at full speed',
+ 'creator': 'paradeev1ch',
+ 'thumbnail': r're:^https?://.+',
+ 'timestamp': 1656271785,
+ 'upload_date': '20220626',
+ }
+ }, {
+ 'url': 'https://livestreamfails.com/post/139200',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ api_response = self._download_json(f'https://api.livestreamfails.com/clip/{video_id}', video_id)
+
+ return {
+ 'id': video_id,
+ 'display_id': api_response.get('sourceId'),
+ 'timestamp': unified_timestamp(api_response.get('createdAt')),
+ 'url': f'https://livestreamfails-video-prod.b-cdn.net/video/{api_response["videoId"]}',
+ 'title': api_response.get('label'),
+ 'creator': traverse_obj(api_response, ('streamer', 'label')),
+ 'thumbnail': format_field(api_response, 'imageId', 'https://livestreamfails-image-prod.b-cdn.net/image/%s')
+ }
diff --git a/yt_dlp/extractor/lnkgo.py b/yt_dlp/extractor/lnkgo.py
new file mode 100644
index 0000000..6282d2e
--- /dev/null
+++ b/yt_dlp/extractor/lnkgo.py
@@ -0,0 +1,163 @@
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ clean_html,
+ format_field,
+ int_or_none,
+ parse_iso8601,
+ unified_strdate,
+)
+
+
+class LnkGoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?lnk(?:go)?\.(?:alfa\.)?lt/(?:visi-video/[^/]+|video)/(?P<id>[A-Za-z0-9-]+)(?:/(?P<episode_id>\d+))?'
+ _TESTS = [{
+ 'url': 'http://www.lnkgo.lt/visi-video/aktualai-pratesimas/ziurek-putka-trys-klausimai',
+ 'info_dict': {
+ 'id': '10809',
+ 'ext': 'mp4',
+ 'title': "Put'ka: Trys Klausimai",
+ 'upload_date': '20161216',
+ 'description': 'Seniai matytas Put’ka užduoda tris klausimėlius. Pabandykime surasti atsakymus.',
+ 'age_limit': 18,
+ 'duration': 117,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1481904000,
+ },
+ 'params': {
+ 'skip_download': True, # HLS download
+ },
+ }, {
+ 'url': 'http://lnkgo.alfa.lt/visi-video/aktualai-pratesimas/ziurek-nerdas-taiso-kompiuteri-2',
+ 'info_dict': {
+ 'id': '10467',
+ 'ext': 'mp4',
+ 'title': 'Nėrdas: Kompiuterio Valymas',
+ 'upload_date': '20150113',
+ 'description': 'md5:7352d113a242a808676ff17e69db6a69',
+ 'age_limit': 18,
+ 'duration': 346,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1421164800,
+ },
+ 'params': {
+ 'skip_download': True, # HLS download
+ },
+ }, {
+ 'url': 'https://lnk.lt/video/neigalieji-tv-bokste/37413',
+ 'only_matching': True,
+ }]
+ _AGE_LIMITS = {
+ 'N-7': 7,
+ 'N-14': 14,
+ 'S': 18,
+ }
+ _M3U8_TEMPL = 'https://vod.lnk.lt/lnk_vod/lnk/lnk/%s:%s/playlist.m3u8%s'
+
+ def _real_extract(self, url):
+ display_id, video_id = self._match_valid_url(url).groups()
+
+ video_info = self._download_json(
+ 'https://lnk.lt/api/main/video-page/%s/%s/false' % (display_id, video_id or '0'),
+ display_id)['videoConfig']['videoInfo']
+
+ video_id = compat_str(video_info['id'])
+ title = video_info['title']
+ prefix = 'smil' if video_info.get('isQualityChangeAvailable') else 'mp4'
+ formats = self._extract_m3u8_formats(
+ self._M3U8_TEMPL % (prefix, video_info['videoUrl'], video_info.get('secureTokenParams') or ''),
+ video_id, 'mp4', 'm3u8_native')
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': format_field(video_info, 'posterImage', 'https://lnk.lt/all-images/%s'),
+ 'duration': int_or_none(video_info.get('duration')),
+ 'description': clean_html(video_info.get('htmlDescription')),
+ 'age_limit': self._AGE_LIMITS.get(video_info.get('pgRating'), 0),
+ 'timestamp': parse_iso8601(video_info.get('airDate')),
+ 'view_count': int_or_none(video_info.get('viewsCount')),
+ }
+
+
+class LnkIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?lnk\.lt/[^/]+/(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'https://lnk.lt/zinios/79791',
+ 'info_dict': {
+ 'id': '79791',
+ 'ext': 'mp4',
+ 'title': 'LNK.lt: Viešintų gyventojai sukilo prieš radijo bangų siųstuvą',
+ 'description': 'Svarbiausios naujienos trumpai, LNK žinios ir Info dienos pokalbiai.',
+ 'view_count': int,
+ 'duration': 233,
+ 'upload_date': '20191123',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'episode_number': 13431,
+ 'series': 'Naujausi žinių reportažai',
+ 'episode': 'Episode 13431'
+ },
+ 'params': {'skip_download': True}
+ }, {
+ 'url': 'https://lnk.lt/istorijos-trumpai/152546',
+ 'info_dict': {
+ 'id': '152546',
+ 'ext': 'mp4',
+ 'title': 'Radžio koncertas gaisre ',
+ 'description': 'md5:0666b5b85cb9fc7c1238dec96f71faba',
+ 'view_count': int,
+ 'duration': 54,
+ 'upload_date': '20220105',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'episode_number': 1036,
+ 'series': 'Istorijos trumpai',
+ 'episode': 'Episode 1036'
+ },
+ 'params': {'skip_download': True}
+ }, {
+ 'url': 'https://lnk.lt/gyvunu-pasaulis/151549',
+ 'info_dict': {
+ 'id': '151549',
+ 'ext': 'mp4',
+ 'title': 'Gyvūnų pasaulis',
+ 'description': '',
+ 'view_count': int,
+ 'duration': 1264,
+ 'upload_date': '20220108',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'episode_number': 16,
+ 'series': 'Gyvūnų pasaulis',
+ 'episode': 'Episode 16'
+ },
+ 'params': {'skip_download': True}
+ }]
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ video_json = self._download_json(f'https://lnk.lt/api/video/video-config/{id}', id)['videoInfo']
+ formats, subtitles = [], {}
+ if video_json.get('videoUrl'):
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(video_json['videoUrl'], id)
+ formats.extend(fmts)
+ subtitles = self._merge_subtitles(subtitles, subs)
+ if video_json.get('videoFairplayUrl') and not video_json.get('drm'):
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(video_json['videoFairplayUrl'], id)
+ formats.extend(fmts)
+ subtitles = self._merge_subtitles(subtitles, subs)
+
+ return {
+ 'id': id,
+ 'title': video_json.get('title'),
+ 'description': video_json.get('description'),
+ 'view_count': video_json.get('viewsCount'),
+ 'duration': video_json.get('duration'),
+ 'upload_date': unified_strdate(video_json.get('airDate')),
+ 'thumbnail': format_field(video_json, 'posterImage', 'https://lnk.lt/all-images/%s'),
+ 'episode_number': int_or_none(video_json.get('episodeNumber')),
+ 'series': video_json.get('programTitle'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
diff --git a/yt_dlp/extractor/lovehomeporn.py b/yt_dlp/extractor/lovehomeporn.py
new file mode 100644
index 0000000..ba5a13a
--- /dev/null
+++ b/yt_dlp/extractor/lovehomeporn.py
@@ -0,0 +1,33 @@
+from .nuevo import NuevoBaseIE
+
+
+class LoveHomePornIE(NuevoBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?lovehomeporn\.com/video/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?'
+ _TEST = {
+ 'url': 'http://lovehomeporn.com/video/48483/stunning-busty-brunette-girlfriend-sucking-and-riding-a-big-dick#menu',
+ 'info_dict': {
+ 'id': '48483',
+ 'display_id': 'stunning-busty-brunette-girlfriend-sucking-and-riding-a-big-dick',
+ 'ext': 'mp4',
+ 'title': 'Stunning busty brunette girlfriend sucking and riding a big dick',
+ 'age_limit': 18,
+ 'duration': 238.47,
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id')
+
+ info = self._extract_nuevo(
+ 'http://lovehomeporn.com/media/nuevo/config.php?key=%s' % video_id,
+ video_id)
+ info.update({
+ 'display_id': display_id,
+ 'age_limit': 18
+ })
+ return info
diff --git a/yt_dlp/extractor/lrt.py b/yt_dlp/extractor/lrt.py
new file mode 100644
index 0000000..80d4d1c
--- /dev/null
+++ b/yt_dlp/extractor/lrt.py
@@ -0,0 +1,108 @@
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ merge_dicts,
+ traverse_obj,
+ url_or_none,
+)
+
+
+class LRTBaseIE(InfoExtractor):
+ def _extract_js_var(self, webpage, var_name, default=None):
+ return self._search_regex(
+ fr'{var_name}\s*=\s*(["\'])((?:(?!\1).)+)\1',
+ webpage, var_name.replace('_', ' '), default, group=2)
+
+
+class LRTStreamIE(LRTBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?lrt\.lt/mediateka/tiesiogiai/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://www.lrt.lt/mediateka/tiesiogiai/lrt-opus',
+ 'info_dict': {
+ 'id': 'lrt-opus',
+ 'live_status': 'is_live',
+ 'title': 're:^LRT Opus.+$',
+ 'ext': 'mp4'
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ streams_data = self._download_json(self._extract_js_var(webpage, 'tokenURL'), video_id)
+
+ formats, subtitles = [], {}
+ for stream_url in traverse_obj(streams_data, (
+ 'response', 'data', lambda k, _: k.startswith('content')), expected_type=url_or_none):
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(stream_url, video_id, 'mp4', m3u8_id='hls', live=True)
+ formats.extend(fmts)
+ subtitles = self._merge_subtitles(subtitles, subs)
+
+ stream_title = self._extract_js_var(webpage, 'video_title', 'LRT')
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'is_live': True,
+ 'title': f'{self._og_search_title(webpage)} - {stream_title}'
+ }
+
+
+class LRTVODIE(LRTBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?lrt\.lt(?P<path>/mediateka/irasas/(?P<id>[0-9]+))'
+ _TESTS = [{
+ # m3u8 download
+ 'url': 'https://www.lrt.lt/mediateka/irasas/2000127261/greita-ir-gardu-sicilijos-ikvepta-klasikiniu-makaronu-su-baklazanais-vakariene',
+ 'info_dict': {
+ 'id': '2000127261',
+ 'ext': 'mp4',
+ 'title': 'Greita ir gardu: Sicilijos įkvėpta klasikinių makaronų su baklažanais vakarienė',
+ 'description': 'md5:ad7d985f51b0dc1489ba2d76d7ed47fa',
+ 'duration': 3035,
+ 'timestamp': 1604079000,
+ 'upload_date': '20201030',
+ 'tags': ['LRT TELEVIZIJA', 'Beatos virtuvė', 'Beata Nicholson', 'Makaronai', 'Baklažanai', 'Vakarienė', 'Receptas'],
+ 'thumbnail': 'https://www.lrt.lt/img/2020/10/30/764041-126478-1287x836.jpg'
+ },
+ }, {
+ # direct mp3 download
+ 'url': 'http://www.lrt.lt/mediateka/irasas/1013074524/',
+ 'md5': '389da8ca3cad0f51d12bed0c844f6a0a',
+ 'info_dict': {
+ 'id': '1013074524',
+ 'ext': 'mp3',
+ 'title': 'Kita tema 2016-09-05 15:05',
+ 'description': 'md5:1b295a8fc7219ed0d543fc228c931fb5',
+ 'duration': 3008,
+ 'view_count': int,
+ 'like_count': int,
+ },
+ }]
+
+ def _real_extract(self, url):
+ path, video_id = self._match_valid_url(url).groups()
+ webpage = self._download_webpage(url, video_id)
+
+ media_url = self._extract_js_var(webpage, 'main_url', path)
+ media = self._download_json(self._extract_js_var(
+ webpage, 'media_info_url',
+ 'https://www.lrt.lt/servisai/stream_url/vod/media_info/'),
+ video_id, query={'url': media_url})
+ jw_data = self._parse_jwplayer_data(
+ media['playlist_item'], video_id, base_url=url)
+
+ json_ld_data = self._search_json_ld(webpage, video_id)
+
+ tags = []
+ for tag in (media.get('tags') or []):
+ tag_name = tag.get('name')
+ if not tag_name:
+ continue
+ tags.append(tag_name)
+
+ clean_info = {
+ 'description': clean_html(media.get('content')),
+ 'tags': tags,
+ }
+
+ return merge_dicts(clean_info, jw_data, json_ld_data)
diff --git a/yt_dlp/extractor/lsm.py b/yt_dlp/extractor/lsm.py
new file mode 100644
index 0000000..35a831f
--- /dev/null
+++ b/yt_dlp/extractor/lsm.py
@@ -0,0 +1,282 @@
+import re
+import urllib.parse
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ determine_ext,
+ int_or_none,
+ js_to_json,
+ parse_iso8601,
+ parse_qs,
+ str_or_none,
+ url_or_none,
+ urljoin,
+)
+from ..utils.traversal import traverse_obj
+
+
+class LSMLREmbedIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://(?:
+ (?:latvijasradio|lr1|lr2|klasika|lr4|naba|radioteatris)\.lsm|
+ pieci
+ )\.lv/[^/?#]+/(?:
+ pleijeris|embed
+ )/?\?(?:[^#]+&)?(?:show|id)=(?P<id>\d+)'''
+ _TESTS = [{
+ 'url': 'https://latvijasradio.lsm.lv/lv/embed/?theme=black&size=16x9&showCaptions=0&id=183522',
+ 'md5': '719b33875cd1429846eeeaeec6df2830',
+ 'info_dict': {
+ 'id': 'a342781',
+ 'ext': 'mp3',
+ 'duration': 1823,
+ 'title': '#138 Nepilnīgā kompensējamo zāļu sistēma pat mēnešiem dzenā pacientus pa aptiekām',
+ 'thumbnail': 'https://pic.latvijasradio.lv/public/assets/media/9/d/gallery_fd4675ac.jpg',
+ }
+ }, {
+ 'url': 'https://radioteatris.lsm.lv/lv/embed/?id=&show=1270&theme=white&size=16x9',
+ 'info_dict': {
+ 'id': '1270',
+ },
+ 'playlist_count': 3,
+ 'playlist': [{
+ 'md5': '2e61b6eceff00d14d57fdbbe6ab24cac',
+ 'info_dict': {
+ 'id': 'a297397',
+ 'ext': 'mp3',
+ 'title': 'Eriks Emanuels Šmits "Pilāta evaņģēlijs". 1. daļa',
+ 'thumbnail': 'https://radioteatris.lsm.lv/public/assets/shows/62f131ae81e3c.jpg',
+ 'duration': 3300,
+ },
+ }],
+ }, {
+ 'url': 'https://radioteatris.lsm.lv/lv/embed/?id=&show=1269&theme=white&size=16x9',
+ 'md5': '24810d4a961da2295d9860afdcaf4f5a',
+ 'info_dict': {
+ 'id': 'a230690',
+ 'ext': 'mp3',
+ 'title': 'Jens Ahlboms "Spārni". Radioizrāde ar Mārtiņa Freimaņa mūziku',
+ 'thumbnail': 'https://radioteatris.lsm.lv/public/assets/shows/62f13023a457c.jpg',
+ 'duration': 1788,
+ }
+ }, {
+ 'url': 'https://lr1.lsm.lv/lv/embed/?id=166557&show=0&theme=white&size=16x9',
+ 'info_dict': {
+ 'id': '166557',
+ },
+ 'playlist_count': 2,
+ 'playlist': [{
+ 'md5': '6a8b0927572f443f09c6e50a3ad65f2d',
+ 'info_dict': {
+ 'id': 'a303104',
+ 'ext': 'mp3',
+ 'thumbnail': 'https://pic.latvijasradio.lv/public/assets/media/c/5/gallery_a83ad2c2.jpg',
+ 'title': 'Krustpunktā Lielā intervija: Valsts prezidents Egils Levits',
+ 'duration': 3222,
+ },
+ }, {
+ 'md5': '5d5e191e718b7644e5118b7b4e093a6d',
+ 'info_dict': {
+ 'id': 'v303104',
+ 'ext': 'mp4',
+ 'thumbnail': 'https://pic.latvijasradio.lv/public/assets/media/c/5/gallery_a83ad2c2.jpg',
+ 'title': 'Krustpunktā Lielā intervija: Valsts prezidents Egils Levits - Video Version',
+ 'duration': 3222,
+ },
+ }],
+ }, {
+ 'url': 'https://lr1.lsm.lv/lv/embed/?id=183522&show=0&theme=white&size=16x9',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://lr2.lsm.lv/lv/embed/?id=182126&show=0&theme=white&size=16x9',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://klasika.lsm.lv/lv/embed/?id=110806&show=0&theme=white&size=16x9',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://lr4.lsm.lv/lv/embed/?id=184282&show=0&theme=white&size=16x9',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://pieci.lv/lv/embed/?id=168896&show=0&theme=white&size=16x9',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://naba.lsm.lv/lv/embed/?id=182901&show=0&theme=white&size=16x9',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://radioteatris.lsm.lv/lv/embed/?id=176439&show=0&theme=white&size=16x9',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://lr1.lsm.lv/lv/pleijeris/?embed=0&id=48205&time=00%3A00&idx=0',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ query = parse_qs(url)
+ video_id = traverse_obj(query, (
+ ('show', 'id'), 0, {int_or_none}, {lambda x: x or None}, {str_or_none}), get_all=False)
+ webpage = self._download_webpage(url, video_id)
+
+ player_data, media_data = self._search_regex(
+ r'LR\.audio\.Player\s*\([^{]*(?P<player>\{.*?\}),(?P<media>\{.*\})\);',
+ webpage, 'player json', group=('player', 'media'))
+
+ player_json = self._parse_json(
+ player_data, video_id, transform_source=js_to_json, fatal=False) or {}
+ media_json = self._parse_json(media_data, video_id, transform_source=js_to_json)
+
+ entries = []
+ for item in traverse_obj(media_json, (('audio', 'video'), lambda _, v: v['id'])):
+ formats = []
+ for source_url in traverse_obj(item, ('sources', ..., 'file', {url_or_none})):
+ if determine_ext(source_url) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(source_url, video_id, fatal=False))
+ else:
+ formats.append({'url': source_url})
+
+ id_ = item['id']
+ title = item.get('title')
+ if id_.startswith('v') and not title:
+ title = traverse_obj(
+ media_json, ('audio', lambda _, v: v['id'][1:] == id_[1:], 'title',
+ {lambda x: x and f'{x} - Video Version'}), get_all=False)
+
+ entries.append({
+ 'formats': formats,
+ 'thumbnail': urljoin(url, player_json.get('poster')),
+ 'id': id_,
+ 'title': title,
+ 'duration': traverse_obj(item, ('duration', {int_or_none})),
+ })
+
+ if len(entries) == 1:
+ return entries[0]
+
+ return self.playlist_result(entries, video_id)
+
+
+class LSMLTVEmbedIE(InfoExtractor):
+ _VALID_URL = r'https?://ltv\.lsm\.lv/embed\?(?:[^#]+&)?c=(?P<id>[^#&]+)'
+ _TESTS = [{
+ 'url': 'https://ltv.lsm.lv/embed?c=eyJpdiI6IjQzbHVUeHAyaDJiamFjcjdSUUFKdnc9PSIsInZhbHVlIjoiMHl3SnJNRmd2TmFIdnZwOGtGUUpzODFzUEZ4SVVsN2xoRjliSW9vckUyMWZIWG8vbWVzaFFkY0lhNmRjbjRpaCIsIm1hYyI6ImMzNjdhMzFhNTFhZmY1ZmE0NWI5YmFjZGI1YmJiNGEyNjgzNDM4MjUzMWEwM2FmMDMyZDMwYWM1MDFjZmM5MGIiLCJ0YWciOiIifQ==',
+ 'md5': '64f72a360ca530d5ed89c77646c9eee5',
+ 'info_dict': {
+ 'id': '46k_d23-6000-105',
+ 'ext': 'mp4',
+ 'timestamp': 1700589151,
+ 'duration': 1442,
+ 'upload_date': '20231121',
+ 'title': 'D23-6000-105_cetstud',
+ 'thumbnail': 'https://store.cloudycdn.services/tmsp00060/assets/media/660858/placeholder1700589200.jpg',
+ }
+ }, {
+ 'url': 'https://ltv.lsm.lv/embed?enablesdkjs=1&c=eyJpdiI6IncwVzZmUFk2MU12enVWK1I3SUcwQ1E9PSIsInZhbHVlIjoid3FhV29vamc3T2sxL1RaRmJ5Rm1GTXozU0o2dVczdUtLK0cwZEZJMDQ2a3ZIRG5DK2pneGlnbktBQy9uazVleHN6VXhxdWIweWNvcHRDSnlISlNYOHlVZ1lpcTUrcWZSTUZPQW14TVdkMW9aOUtRWVNDcFF4eWpHNGcrT0VZbUNFQStKQk91cGpndW9FVjJIa0lpbkh3PT0iLCJtYWMiOiIyZGI1NDJlMWRlM2QyMGNhOGEwYTM2MmNlN2JlOGRhY2QyYjdkMmEzN2RlOTEzYTVkNzI1ODlhZDlhZjU4MjQ2IiwidGFnIjoiIn0=',
+ 'md5': 'a1711e190fe680fdb68fd8413b378e87',
+ 'info_dict': {
+ 'id': 'wUnFArIPDSY',
+ 'ext': 'mp4',
+ 'uploader': 'LTV_16plus',
+ 'release_date': '20220514',
+ 'channel_url': 'https://www.youtube.com/channel/UCNMrnafwXD2XKeeQOyfkFCw',
+ 'view_count': int,
+ 'availability': 'public',
+ 'thumbnail': 'https://i.ytimg.com/vi/wUnFArIPDSY/maxresdefault.jpg',
+ 'release_timestamp': 1652544074,
+ 'title': 'EIROVĪZIJA SALĀTOS',
+ 'live_status': 'was_live',
+ 'uploader_id': '@LTV16plus',
+ 'comment_count': int,
+ 'channel_id': 'UCNMrnafwXD2XKeeQOyfkFCw',
+ 'channel_follower_count': int,
+ 'categories': ['Entertainment'],
+ 'duration': 5269,
+ 'upload_date': '20220514',
+ 'age_limit': 0,
+ 'channel': 'LTV_16plus',
+ 'playable_in_embed': True,
+ 'tags': [],
+ 'uploader_url': 'https://www.youtube.com/@LTV16plus',
+ 'like_count': int,
+ 'description': 'md5:7ff0c42ba971e3c13e4b8a2ff03b70b5',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = urllib.parse.unquote(self._match_id(url))
+ webpage = self._download_webpage(url, video_id)
+ data = self._search_json(
+ r'window\.ltvEmbedPayload\s*=', webpage, 'embed json', video_id)
+ embed_type = traverse_obj(data, ('source', 'name', {str}))
+
+ if embed_type == 'telia':
+ ie_key = 'CloudyCDN'
+ embed_url = traverse_obj(data, ('source', 'embed_url', {url_or_none}))
+ elif embed_type == 'youtube':
+ ie_key = 'Youtube'
+ embed_url = traverse_obj(data, ('source', 'id', {str}))
+ else:
+ raise ExtractorError(f'Unsupported embed type {embed_type!r}')
+
+ return self.url_result(
+ embed_url, ie_key, video_id, **traverse_obj(data, {
+ 'title': ('parentInfo', 'title'),
+ 'duration': ('parentInfo', 'duration', {int_or_none}),
+ 'thumbnail': ('source', 'poster', {url_or_none}),
+ }))
+
+
+class LSMReplayIE(InfoExtractor):
+ _VALID_URL = r'https?://replay\.lsm\.lv/[^/?#]+/(?:ieraksts|statja)/[^/?#]+/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://replay.lsm.lv/lv/ieraksts/ltv/311130/4-studija-zolitudes-tragedija-un-incupes-stacija',
+ 'md5': '64f72a360ca530d5ed89c77646c9eee5',
+ 'info_dict': {
+ 'id': '46k_d23-6000-105',
+ 'ext': 'mp4',
+ 'timestamp': 1700586300,
+ 'description': 'md5:0f1b14798cc39e1ae578bd0eb268f759',
+ 'duration': 1442,
+ 'upload_date': '20231121',
+ 'title': '4. studija. Zolitūdes traģēdija un Inčupes stacija',
+ 'thumbnail': 'https://ltv.lsm.lv/storage/media/8/7/large/5/1f9604e1.jpg',
+ }
+ }, {
+ 'url': 'https://replay.lsm.lv/lv/ieraksts/lr/183522/138-nepilniga-kompensejamo-zalu-sistema-pat-menesiem-dzena-pacientus-pa-aptiekam',
+ 'md5': '719b33875cd1429846eeeaeec6df2830',
+ 'info_dict': {
+ 'id': 'a342781',
+ 'ext': 'mp3',
+ 'duration': 1823,
+ 'title': '#138 Nepilnīgā kompensējamo zāļu sistēma pat mēnešiem dzenā pacientus pa aptiekām',
+ 'thumbnail': 'https://pic.latvijasradio.lv/public/assets/media/9/d/large_fd4675ac.jpg',
+ 'upload_date': '20231102',
+ 'timestamp': 1698921060,
+ 'description': 'md5:7bac3b2dd41e44325032943251c357b1',
+ }
+ }, {
+ 'url': 'https://replay.lsm.lv/ru/statja/ltv/311130/4-studija-zolitudes-tragedija-un-incupes-stacija',
+ 'only_matching': True,
+ }]
+
+ def _fix_nuxt_data(self, webpage):
+ return re.sub(r'Object\.create\(null(?:,(\{.+\}))?\)', lambda m: m.group(1) or 'null', webpage)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ data = self._search_nuxt_data(
+ self._fix_nuxt_data(webpage), video_id, context_name='__REPLAY__')
+
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ **traverse_obj(data, {
+ 'url': ('playback', 'service', 'url', {url_or_none}),
+ 'title': ('mediaItem', 'title'),
+ 'description': ('mediaItem', ('lead', 'body')),
+ 'duration': ('mediaItem', 'duration', {int_or_none}),
+ 'timestamp': ('mediaItem', 'aired_at', {parse_iso8601}),
+ 'thumbnail': ('mediaItem', 'largeThumbnail', {url_or_none}),
+ }, get_all=False),
+ }
diff --git a/yt_dlp/extractor/lumni.py b/yt_dlp/extractor/lumni.py
new file mode 100644
index 0000000..5a95383
--- /dev/null
+++ b/yt_dlp/extractor/lumni.py
@@ -0,0 +1,23 @@
+from .francetv import FranceTVBaseInfoExtractor
+
+
+class LumniIE(FranceTVBaseInfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?lumni\.fr/video/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://www.lumni.fr/video/l-homme-et-son-environnement-dans-la-revolution-industrielle',
+ 'md5': '960e8240c4f2c7a20854503a71e52f5e',
+ 'info_dict': {
+ 'id': 'd2b9a4e5-a526-495b-866c-ab72737e3645',
+ 'ext': 'mp4',
+ 'title': "L'homme et son environnement dans la révolution industrielle - L'ère de l'homme",
+ 'thumbnail': 'https://assets.webservices.francetelevisions.fr/v1/assets/images/a7/17/9f/a7179f5f-63a5-4e11-8d4d-012ab942d905.jpg',
+ 'duration': 230,
+ }
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ video_id = self._html_search_regex(
+ r'<div[^>]+data-factoryid\s*=\s*["\']([^"\']+)', webpage, 'video id')
+ return self._make_url_result(video_id, url=url)
diff --git a/yt_dlp/extractor/lynda.py b/yt_dlp/extractor/lynda.py
new file mode 100644
index 0000000..768ce91
--- /dev/null
+++ b/yt_dlp/extractor/lynda.py
@@ -0,0 +1,330 @@
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urlparse,
+)
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ urlencode_postdata,
+)
+
+
+class LyndaBaseIE(InfoExtractor):
+ _SIGNIN_URL = 'https://www.lynda.com/signin/lynda'
+ _PASSWORD_URL = 'https://www.lynda.com/signin/password'
+ _USER_URL = 'https://www.lynda.com/signin/user'
+ _ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'
+ _NETRC_MACHINE = 'lynda'
+
+ @staticmethod
+ def _check_error(json_string, key_or_keys):
+ keys = [key_or_keys] if isinstance(key_or_keys, compat_str) else key_or_keys
+ for key in keys:
+ error = json_string.get(key)
+ if error:
+ raise ExtractorError('Unable to login: %s' % error, expected=True)
+
+ def _perform_login_step(self, form_html, fallback_action_url, extra_form_data, note, referrer_url):
+ action_url = self._search_regex(
+ r'<form[^>]+action=(["\'])(?P<url>.+?)\1', form_html,
+ 'post url', default=fallback_action_url, group='url')
+
+ if not action_url.startswith('http'):
+ action_url = compat_urlparse.urljoin(self._SIGNIN_URL, action_url)
+
+ form_data = self._hidden_inputs(form_html)
+ form_data.update(extra_form_data)
+
+ response = self._download_json(
+ action_url, None, note,
+ data=urlencode_postdata(form_data),
+ headers={
+ 'Referer': referrer_url,
+ 'X-Requested-With': 'XMLHttpRequest',
+ }, expected_status=(418, 500, ))
+
+ self._check_error(response, ('email', 'password', 'ErrorMessage'))
+
+ return response, action_url
+
+ def _perform_login(self, username, password):
+ # Step 1: download signin page
+ signin_page = self._download_webpage(
+ self._SIGNIN_URL, None, 'Downloading signin page')
+
+ # Already logged in
+ if any(re.search(p, signin_page) for p in (
+ r'isLoggedIn\s*:\s*true', r'logout\.aspx', r'>Log out<')):
+ return
+
+ # Step 2: submit email
+ signin_form = self._search_regex(
+ r'(?s)(<form[^>]+data-form-name=["\']signin["\'][^>]*>.+?</form>)',
+ signin_page, 'signin form')
+ signin_page, signin_url = self._login_step(
+ signin_form, self._PASSWORD_URL, {'email': username},
+ 'Submitting email', self._SIGNIN_URL)
+
+ # Step 3: submit password
+ password_form = signin_page['body']
+ self._login_step(
+ password_form, self._USER_URL, {'email': username, 'password': password},
+ 'Submitting password', signin_url)
+
+
+class LyndaIE(LyndaBaseIE):
+ IE_NAME = 'lynda'
+ IE_DESC = 'lynda.com videos'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?(?:lynda\.com|educourse\.ga)/
+ (?:
+ (?:[^/]+/){2,3}(?P<course_id>\d+)|
+ player/embed
+ )/
+ (?P<id>\d+)
+ '''
+
+ _TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]'
+
+ _TESTS = [{
+ 'url': 'https://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
+ # md5 is unstable
+ 'info_dict': {
+ 'id': '114408',
+ 'ext': 'mp4',
+ 'title': 'Using the exercise files',
+ 'duration': 68
+ }
+ }, {
+ 'url': 'https://www.lynda.com/player/embed/133770?tr=foo=1;bar=g;fizz=rt&fs=0',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://educourse.ga/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.lynda.com/de/Graphic-Design-tutorials/Willkommen-Grundlagen-guten-Gestaltung/393570/393572-4.html',
+ 'only_matching': True,
+ }, {
+ # Status="NotFound", Message="Transcript not found"
+ 'url': 'https://www.lynda.com/ASP-NET-tutorials/What-you-should-know/5034180/2811512-4.html',
+ 'only_matching': True,
+ }]
+
+ def _raise_unavailable(self, video_id):
+ self.raise_login_required(
+ 'Video %s is only available for members' % video_id)
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id')
+ course_id = mobj.group('course_id')
+
+ query = {
+ 'videoId': video_id,
+ 'type': 'video',
+ }
+
+ video = self._download_json(
+ 'https://www.lynda.com/ajax/player', video_id,
+ 'Downloading video JSON', fatal=False, query=query)
+
+ # Fallback scenario
+ if not video:
+ query['courseId'] = course_id
+
+ play = self._download_json(
+ 'https://www.lynda.com/ajax/course/%s/%s/play'
+ % (course_id, video_id), video_id, 'Downloading play JSON')
+
+ if not play:
+ self._raise_unavailable(video_id)
+
+ formats = []
+ for formats_dict in play:
+ urls = formats_dict.get('urls')
+ if not isinstance(urls, dict):
+ continue
+ cdn = formats_dict.get('name')
+ for format_id, format_url in urls.items():
+ if not format_url:
+ continue
+ formats.append({
+ 'url': format_url,
+ 'format_id': '%s-%s' % (cdn, format_id) if cdn else format_id,
+ 'height': int_or_none(format_id),
+ })
+
+ conviva = self._download_json(
+ 'https://www.lynda.com/ajax/player/conviva', video_id,
+ 'Downloading conviva JSON', query=query)
+
+ return {
+ 'id': video_id,
+ 'title': conviva['VideoTitle'],
+ 'description': conviva.get('VideoDescription'),
+ 'release_year': int_or_none(conviva.get('ReleaseYear')),
+ 'duration': int_or_none(conviva.get('Duration')),
+ 'creator': conviva.get('Author'),
+ 'formats': formats,
+ }
+
+ if 'Status' in video:
+ raise ExtractorError(
+ 'lynda returned error: %s' % video['Message'], expected=True)
+
+ if video.get('HasAccess') is False:
+ self._raise_unavailable(video_id)
+
+ video_id = compat_str(video.get('ID') or video_id)
+ duration = int_or_none(video.get('DurationInSeconds'))
+ title = video['Title']
+
+ formats = []
+
+ fmts = video.get('Formats')
+ if fmts:
+ formats.extend([{
+ 'url': f['Url'],
+ 'ext': f.get('Extension'),
+ 'width': int_or_none(f.get('Width')),
+ 'height': int_or_none(f.get('Height')),
+ 'filesize': int_or_none(f.get('FileSize')),
+ 'format_id': compat_str(f.get('Resolution')) if f.get('Resolution') else None,
+ } for f in fmts if f.get('Url')])
+
+ prioritized_streams = video.get('PrioritizedStreams')
+ if prioritized_streams:
+ for prioritized_stream_id, prioritized_stream in prioritized_streams.items():
+ formats.extend([{
+ 'url': video_url,
+ 'height': int_or_none(format_id),
+ 'format_id': '%s-%s' % (prioritized_stream_id, format_id),
+ } for format_id, video_url in prioritized_stream.items()])
+
+ self._check_formats(formats, video_id)
+
+ subtitles = self.extract_subtitles(video_id)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'duration': duration,
+ 'subtitles': subtitles,
+ 'formats': formats
+ }
+
+ def _fix_subtitles(self, subs):
+ srt = ''
+ seq_counter = 0
+ for pos in range(0, len(subs) - 1):
+ seq_current = subs[pos]
+ m_current = re.match(self._TIMECODE_REGEX, seq_current['Timecode'])
+ if m_current is None:
+ continue
+ seq_next = subs[pos + 1]
+ m_next = re.match(self._TIMECODE_REGEX, seq_next['Timecode'])
+ if m_next is None:
+ continue
+ appear_time = m_current.group('timecode')
+ disappear_time = m_next.group('timecode')
+ text = seq_current['Caption'].strip()
+ if text:
+ seq_counter += 1
+ srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (seq_counter, appear_time, disappear_time, text)
+ if srt:
+ return srt
+
+ def _get_subtitles(self, video_id):
+ url = 'https://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id
+ subs = self._download_webpage(
+ url, video_id, 'Downloading subtitles JSON', fatal=False)
+ if not subs or 'Status="NotFound"' in subs:
+ return {}
+ subs = self._parse_json(subs, video_id, fatal=False)
+ if not subs:
+ return {}
+ fixed_subs = self._fix_subtitles(subs)
+ if fixed_subs:
+ return {'en': [{'ext': 'srt', 'data': fixed_subs}]}
+ return {}
+
+
+class LyndaCourseIE(LyndaBaseIE):
+ IE_NAME = 'lynda:course'
+ IE_DESC = 'lynda.com online courses'
+
+ # Course link equals to welcome/introduction video link of same course
+ # We will recognize it as course link
+ _VALID_URL = r'https?://(?:www|m)\.(?:lynda\.com|educourse\.ga)/(?P<coursepath>(?:[^/]+/){2,3}(?P<courseid>\d+))-2\.html'
+
+ _TESTS = [{
+ 'url': 'https://www.lynda.com/Graphic-Design-tutorials/Grundlagen-guten-Gestaltung/393570-2.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.lynda.com/de/Graphic-Design-tutorials/Grundlagen-guten-Gestaltung/393570-2.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ course_path = mobj.group('coursepath')
+ course_id = mobj.group('courseid')
+
+ item_template = 'https://www.lynda.com/%s/%%s-4.html' % course_path
+
+ course = self._download_json(
+ 'https://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id,
+ course_id, 'Downloading course JSON', fatal=False)
+
+ if not course:
+ webpage = self._download_webpage(url, course_id)
+ entries = [
+ self.url_result(
+ item_template % video_id, ie=LyndaIE.ie_key(),
+ video_id=video_id)
+ for video_id in re.findall(
+ r'data-video-id=["\'](\d+)', webpage)]
+ return self.playlist_result(
+ entries, course_id,
+ self._og_search_title(webpage, fatal=False),
+ self._og_search_description(webpage))
+
+ if course.get('Status') == 'NotFound':
+ raise ExtractorError(
+ 'Course %s does not exist' % course_id, expected=True)
+
+ unaccessible_videos = 0
+ entries = []
+
+ # Might want to extract videos right here from video['Formats'] as it seems 'Formats' is not provided
+ # by single video API anymore
+
+ for chapter in course['Chapters']:
+ for video in chapter.get('Videos', []):
+ if video.get('HasAccess') is False:
+ unaccessible_videos += 1
+ continue
+ video_id = video.get('ID')
+ if video_id:
+ entries.append({
+ '_type': 'url_transparent',
+ 'url': item_template % video_id,
+ 'ie_key': LyndaIE.ie_key(),
+ 'chapter': chapter.get('Title'),
+ 'chapter_number': int_or_none(chapter.get('ChapterIndex')),
+ 'chapter_id': compat_str(chapter.get('ID')),
+ })
+
+ if unaccessible_videos > 0:
+ self.report_warning(
+ '%s videos are only available for members (or paid members) and will not be downloaded. '
+ % unaccessible_videos + self._ACCOUNT_CREDENTIALS_HINT)
+
+ course_title = course.get('Title')
+ course_description = course.get('Description')
+
+ return self.playlist_result(entries, course_id, course_title, course_description)
diff --git a/yt_dlp/extractor/maariv.py b/yt_dlp/extractor/maariv.py
new file mode 100644
index 0000000..425a8b3
--- /dev/null
+++ b/yt_dlp/extractor/maariv.py
@@ -0,0 +1,62 @@
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_resolution,
+ unified_timestamp,
+ url_or_none,
+)
+from ..utils.traversal import traverse_obj
+
+
+class MaarivIE(InfoExtractor):
+ IE_NAME = 'maariv.co.il'
+ _VALID_URL = r'https?://player\.maariv\.co\.il/public/player\.html\?(?:[^#]+&)?media=(?P<id>\d+)'
+ _EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL})']
+ _TESTS = [{
+ 'url': 'https://player.maariv.co.il/public/player.html?player=maariv-desktop&media=3611585',
+ 'info_dict': {
+ 'id': '3611585',
+ 'duration': 75,
+ 'ext': 'mp4',
+ 'upload_date': '20231009',
+ 'title': 'מבצע חרבות ברזל',
+ 'timestamp': 1696851301,
+ },
+ }]
+ _WEBPAGE_TESTS = [{
+ 'url': 'https://www.maariv.co.il/news/law/Article-1044008',
+ 'info_dict': {
+ 'id': '3611585',
+ 'duration': 75,
+ 'ext': 'mp4',
+ 'upload_date': '20231009',
+ 'title': 'מבצע חרבות ברזל',
+ 'timestamp': 1696851301,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ data = self._download_json(
+ f'https://dal.walla.co.il/media/{video_id}?origin=player.maariv.co.il', video_id)['data']
+
+ formats = []
+ if hls_url := traverse_obj(data, ('video', 'url', {url_or_none})):
+ formats.extend(self._extract_m3u8_formats(hls_url, video_id, m3u8_id='hls', fatal=False))
+
+ for http_format in traverse_obj(data, ('video', 'stream_urls', ..., 'stream_url', {url_or_none})):
+ formats.append({
+ 'url': http_format,
+ 'format_id': 'http',
+ **parse_resolution(http_format),
+ })
+
+ return {
+ 'id': video_id,
+ **traverse_obj(data, {
+ 'title': 'title',
+ 'duration': ('video', 'duration', {int_or_none}),
+ 'timestamp': ('upload_date', {unified_timestamp}),
+ }),
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/magellantv.py b/yt_dlp/extractor/magellantv.py
new file mode 100644
index 0000000..6f2524b
--- /dev/null
+++ b/yt_dlp/extractor/magellantv.py
@@ -0,0 +1,62 @@
+from .common import InfoExtractor
+from ..utils import parse_age_limit, parse_duration, traverse_obj
+
+
+class MagellanTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?magellantv\.com/(?:watch|video)/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://www.magellantv.com/watch/my-dads-on-death-row?type=v',
+ 'info_dict': {
+ 'id': 'my-dads-on-death-row',
+ 'ext': 'mp4',
+ 'title': 'My Dad\'s On Death Row',
+ 'description': 'md5:33ba23b9f0651fc4537ed19b1d5b0d7a',
+ 'duration': 3780.0,
+ 'age_limit': 14,
+ 'tags': ['Justice', 'Reality', 'United States', 'True Crime'],
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://www.magellantv.com/video/james-bulger-the-new-revelations',
+ 'info_dict': {
+ 'id': 'james-bulger-the-new-revelations',
+ 'ext': 'mp4',
+ 'title': 'James Bulger: The New Revelations',
+ 'description': 'md5:7b97922038bad1d0fe8d0470d8a189f2',
+ 'duration': 2640.0,
+ 'age_limit': 0,
+ 'tags': ['Investigation', 'True Crime', 'Justice', 'Europe'],
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://www.magellantv.com/watch/celebration-nation',
+ 'info_dict': {
+ 'id': 'celebration-nation',
+ 'ext': 'mp4',
+ 'tags': ['Art & Culture', 'Human Interest', 'Anthropology', 'China', 'History'],
+ 'duration': 2640.0,
+ 'title': 'Ancestors',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ data = traverse_obj(self._search_nextjs_data(webpage, video_id), (
+ 'props', 'pageProps', 'reactContext',
+ (('video', 'detail'), ('series', 'currentEpisode')), {dict}), get_all=False)
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(data['jwpVideoUrl'], video_id)
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ **traverse_obj(data, {
+ 'title': ('title', {str}),
+ 'description': ('metadata', 'description', {str}),
+ 'duration': ('duration', {parse_duration}),
+ 'age_limit': ('ratingCategory', {parse_age_limit}),
+ 'tags': ('tags', ..., {str}),
+ }),
+ }
diff --git a/yt_dlp/extractor/magentamusik.py b/yt_dlp/extractor/magentamusik.py
new file mode 100644
index 0000000..9d86a1b
--- /dev/null
+++ b/yt_dlp/extractor/magentamusik.py
@@ -0,0 +1,62 @@
+from .common import InfoExtractor
+from ..utils import ExtractorError, int_or_none, join_nonempty, url_or_none
+from ..utils.traversal import traverse_obj
+
+
+class MagentaMusikIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?magentamusik\.de/(?P<id>[^/?#]+)'
+
+ _TESTS = [{
+ 'url': 'https://www.magentamusik.de/marty-friedman-woa-2023-9208205928595409235',
+ 'md5': 'd82dd4748f55fc91957094546aaf8584',
+ 'info_dict': {
+ 'id': '9208205928595409235',
+ 'display_id': 'marty-friedman-woa-2023-9208205928595409235',
+ 'ext': 'mp4',
+ 'title': 'Marty Friedman: W:O:A 2023',
+ 'alt_title': 'Konzert vom: 05.08.2023 13:00',
+ 'duration': 2760,
+ 'categories': ['Musikkonzert'],
+ 'release_year': 2023,
+ 'location': 'Deutschland',
+ }
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ player_config = self._search_json(
+ r'data-js-element="o-video-player__config">', webpage, 'player config', display_id, fatal=False)
+ if not player_config:
+ raise ExtractorError('No video found', expected=True)
+
+ asset_id = player_config['assetId']
+ asset_details = self._download_json(
+ f'https://wcps.t-online.de/cvss/magentamusic/vodclient/v2/assetdetails/58938/{asset_id}',
+ display_id, note='Downloading asset details')
+
+ video_id = traverse_obj(
+ asset_details, ('content', 'partnerInformation', ..., 'reference', {str}), get_all=False)
+ if not video_id:
+ raise ExtractorError('Unable to extract video id')
+
+ vod_data = self._download_json(
+ f'https://wcps.t-online.de/cvss/magentamusic/vodclient/v2/player/58935/{video_id}/Main%20Movie', video_id)
+ smil_url = traverse_obj(
+ vod_data, ('content', 'feature', 'representations', ...,
+ 'contentPackages', ..., 'media', 'href', {url_or_none}), get_all=False)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'formats': self._extract_smil_formats(smil_url, video_id),
+ **traverse_obj(vod_data, ('content', 'feature', 'metadata', {
+ 'title': 'title',
+ 'alt_title': 'originalTitle',
+ 'description': 'longDescription',
+ 'duration': ('runtimeInSeconds', {int_or_none}),
+ 'location': ('countriesOfProduction', {list}, {lambda x: join_nonempty(*x, delim=', ')}),
+ 'release_year': ('yearOfProduction', {int_or_none}),
+ 'categories': ('mainGenre', {str}, {lambda x: x and [x]}),
+ })),
+ }
diff --git a/yt_dlp/extractor/mailru.py b/yt_dlp/extractor/mailru.py
new file mode 100644
index 0000000..0f0550c
--- /dev/null
+++ b/yt_dlp/extractor/mailru.py
@@ -0,0 +1,338 @@
+import itertools
+import json
+import re
+import urllib.parse
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse_unquote
+from ..utils import (
+ int_or_none,
+ parse_duration,
+ remove_end,
+ try_get,
+ urljoin,
+)
+
+
+class MailRuIE(InfoExtractor):
+ IE_NAME = 'mailru'
+ IE_DESC = 'Видео@Mail.Ru'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:(?:www|m|videoapi)\.)?my\.mail\.ru/+
+ (?:
+ video/.*\#video=/?(?P<idv1>(?:[^/]+/){3}\d+)|
+ (?:videos/embed/)?(?:(?P<idv2prefix>(?:[^/]+/+){2})(?:video/(?:embed/)?)?(?P<idv2suffix>[^/]+/\d+))(?:\.html)?|
+ (?:video/embed|\+/video/meta)/(?P<metaid>\d+)
+ )
+ '''
+ _TESTS = [
+ {
+ 'url': 'http://my.mail.ru/video/top#video=/mail/sonypicturesrus/75/76',
+ 'md5': 'dea205f03120046894db4ebb6159879a',
+ 'info_dict': {
+ 'id': '46301138_76',
+ 'ext': 'mp4',
+ 'title': 'Новый Человек-Паук. Высокое напряжение. Восстание Электро',
+ 'timestamp': 1393235077,
+ 'upload_date': '20140224',
+ 'uploader': 'sonypicturesrus',
+ 'uploader_id': 'sonypicturesrus@mail.ru',
+ 'duration': 184,
+ },
+ 'skip': 'Not accessible from Travis CI server',
+ },
+ {
+ 'url': 'http://my.mail.ru/corp/hitech/video/news_hi-tech_mail_ru/1263.html',
+ 'md5': '00a91a58c3402204dcced523777b475f',
+ 'info_dict': {
+ 'id': '46843144_1263',
+ 'ext': 'mp4',
+ 'title': 'Samsung Galaxy S5 Hammer Smash Fail Battery Explosion',
+ 'timestamp': 1397039888,
+ 'upload_date': '20140409',
+ 'uploader': 'hitech',
+ 'uploader_id': 'hitech@corp.mail.ru',
+ 'duration': 245,
+ },
+ 'skip': 'Not accessible from Travis CI server',
+ },
+ {
+ # only available via metaUrl API
+ 'url': 'http://my.mail.ru/mail/720pizle/video/_myvideo/502.html',
+ 'md5': '3b26d2491c6949d031a32b96bd97c096',
+ 'info_dict': {
+ 'id': '56664382_502',
+ 'ext': 'mp4',
+ 'title': ':8336',
+ 'timestamp': 1449094163,
+ 'upload_date': '20151202',
+ 'uploader': '720pizle@mail.ru',
+ 'uploader_id': '720pizle@mail.ru',
+ 'duration': 6001,
+ },
+ 'skip': 'Not accessible from Travis CI server',
+ },
+ {
+ 'url': 'http://m.my.mail.ru/mail/3sktvtr/video/_myvideo/138.html',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://my.mail.ru/video/embed/7949340477499637815',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://my.mail.ru/+/video/meta/7949340477499637815',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://my.mail.ru//list/sinyutin10/video/_myvideo/4.html',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://my.mail.ru//list//sinyutin10/video/_myvideo/4.html',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://my.mail.ru/mail/cloud-strife/video/embed/Games/2009',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://videoapi.my.mail.ru/videos/embed/mail/cloud-strife/Games/2009.html',
+ 'only_matching': True,
+ }
+ ]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ meta_id = mobj.group('metaid')
+
+ video_id = None
+ if meta_id:
+ meta_url = 'https://my.mail.ru/+/video/meta/%s' % meta_id
+ else:
+ video_id = mobj.group('idv1')
+ if not video_id:
+ video_id = mobj.group('idv2prefix') + mobj.group('idv2suffix')
+ webpage = self._download_webpage(url, video_id)
+ page_config = self._parse_json(self._search_regex([
+ r'(?s)<script[^>]+class="sp-video__page-config"[^>]*>(.+?)</script>',
+ r'(?s)"video":\s*({.+?}),'],
+ webpage, 'page config', default='{}'), video_id, fatal=False)
+ if page_config:
+ meta_url = page_config.get('metaUrl') or page_config.get('video', {}).get('metaUrl') or page_config.get('metadataUrl')
+ else:
+ meta_url = None
+
+ video_data = None
+
+ # fix meta_url if missing the host address
+ if re.match(r'^\/\+\/', meta_url):
+ meta_url = urljoin('https://my.mail.ru', meta_url)
+
+ if meta_url:
+ video_data = self._download_json(
+ meta_url, video_id or meta_id, 'Downloading video meta JSON',
+ fatal=not video_id)
+
+ # Fallback old approach
+ if not video_data:
+ video_data = self._download_json(
+ 'http://api.video.mail.ru/videos/%s.json?new=1' % video_id,
+ video_id, 'Downloading video JSON')
+
+ video_key = self._get_cookies('https://my.mail.ru').get('video_key')
+
+ formats = []
+ for f in video_data['videos']:
+ video_url = f.get('url')
+ if not video_url:
+ continue
+ if video_key:
+ self._set_cookie(urllib.parse.urlparse(video_url).hostname, 'video_key', video_key.value)
+ format_id = f.get('key')
+ height = int_or_none(self._search_regex(
+ r'^(\d+)[pP]$', format_id, 'height', default=None)) if format_id else None
+ formats.append({
+ 'url': video_url,
+ 'format_id': format_id,
+ 'height': height,
+ })
+
+ meta_data = video_data['meta']
+ title = remove_end(meta_data['title'], '.mp4')
+
+ author = video_data.get('author')
+ uploader = author.get('name')
+ uploader_id = author.get('id') or author.get('email')
+ view_count = int_or_none(video_data.get('viewsCount') or video_data.get('views_count'))
+
+ acc_id = meta_data.get('accId')
+ item_id = meta_data.get('itemId')
+ content_id = '%s_%s' % (acc_id, item_id) if acc_id and item_id else video_id
+
+ thumbnail = meta_data.get('poster')
+ duration = int_or_none(meta_data.get('duration'))
+ timestamp = int_or_none(meta_data.get('timestamp'))
+
+ return {
+ 'id': content_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'formats': formats,
+ }
+
+
+class MailRuMusicSearchBaseIE(InfoExtractor):
+ def _search(self, query, url, audio_id, limit=100, offset=0):
+ search = self._download_json(
+ 'https://my.mail.ru/cgi-bin/my/ajax', audio_id,
+ 'Downloading songs JSON page %d' % (offset // limit + 1),
+ headers={
+ 'Referer': url,
+ 'X-Requested-With': 'XMLHttpRequest',
+ }, query={
+ 'xemail': '',
+ 'ajax_call': '1',
+ 'func_name': 'music.search',
+ 'mna': '',
+ 'mnb': '',
+ 'arg_query': query,
+ 'arg_extended': '1',
+ 'arg_search_params': json.dumps({
+ 'music': {
+ 'limit': limit,
+ 'offset': offset,
+ },
+ }),
+ 'arg_limit': limit,
+ 'arg_offset': offset,
+ })
+ return next(e for e in search if isinstance(e, dict))
+
+ @staticmethod
+ def _extract_track(t, fatal=True):
+ audio_url = t['URL'] if fatal else t.get('URL')
+ if not audio_url:
+ return
+
+ audio_id = t['File'] if fatal else t.get('File')
+ if not audio_id:
+ return
+
+ thumbnail = t.get('AlbumCoverURL') or t.get('FiledAlbumCover')
+ uploader = t.get('OwnerName') or t.get('OwnerName_Text_HTML')
+ uploader_id = t.get('UploaderID')
+ duration = int_or_none(t.get('DurationInSeconds')) or parse_duration(
+ t.get('Duration') or t.get('DurationStr'))
+ view_count = int_or_none(t.get('PlayCount') or t.get('PlayCount_hr'))
+
+ track = t.get('Name') or t.get('Name_Text_HTML')
+ artist = t.get('Author') or t.get('Author_Text_HTML')
+
+ if track:
+ title = '%s - %s' % (artist, track) if artist else track
+ else:
+ title = audio_id
+
+ return {
+ 'extractor_key': MailRuMusicIE.ie_key(),
+ 'id': audio_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'vcodec': 'none',
+ 'abr': int_or_none(t.get('BitRate')),
+ 'track': track,
+ 'artist': artist,
+ 'album': t.get('Album'),
+ 'url': audio_url,
+ }
+
+
+class MailRuMusicIE(MailRuMusicSearchBaseIE):
+ IE_NAME = 'mailru:music'
+ IE_DESC = 'Музыка@Mail.Ru'
+ _VALID_URL = r'https?://my\.mail\.ru/+music/+songs/+[^/?#&]+-(?P<id>[\da-f]+)'
+ _TESTS = [{
+ 'url': 'https://my.mail.ru/music/songs/%D0%BC8%D0%BB8%D1%82%D1%85-l-a-h-luciferian-aesthetics-of-herrschaft-single-2017-4e31f7125d0dfaef505d947642366893',
+ 'md5': '0f8c22ef8c5d665b13ac709e63025610',
+ 'info_dict': {
+ 'id': '4e31f7125d0dfaef505d947642366893',
+ 'ext': 'mp3',
+ 'title': 'L.A.H. (Luciferian Aesthetics of Herrschaft) single, 2017 - М8Л8ТХ',
+ 'uploader': 'Игорь Мудрый',
+ 'uploader_id': '1459196328',
+ 'duration': 280,
+ 'view_count': int,
+ 'vcodec': 'none',
+ 'abr': 320,
+ 'track': 'L.A.H. (Luciferian Aesthetics of Herrschaft) single, 2017',
+ 'artist': 'М8Л8ТХ',
+ },
+ }]
+
+ def _real_extract(self, url):
+ audio_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, audio_id)
+
+ title = self._og_search_title(webpage)
+ music_data = self._search(title, url, audio_id)['MusicData']
+ t = next(t for t in music_data if t.get('File') == audio_id)
+
+ info = self._extract_track(t)
+ info['title'] = title
+ return info
+
+
+class MailRuMusicSearchIE(MailRuMusicSearchBaseIE):
+ IE_NAME = 'mailru:music:search'
+ IE_DESC = 'Музыка@Mail.Ru'
+ _VALID_URL = r'https?://my\.mail\.ru/+music/+search/+(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://my.mail.ru/music/search/black%20shadow',
+ 'info_dict': {
+ 'id': 'black shadow',
+ },
+ 'playlist_mincount': 532,
+ }]
+
+ def _real_extract(self, url):
+ query = compat_urllib_parse_unquote(self._match_id(url))
+
+ entries = []
+
+ LIMIT = 100
+ offset = 0
+
+ for _ in itertools.count(1):
+ search = self._search(query, url, query, LIMIT, offset)
+
+ music_data = search.get('MusicData')
+ if not music_data or not isinstance(music_data, list):
+ break
+
+ for t in music_data:
+ track = self._extract_track(t, fatal=False)
+ if track:
+ entries.append(track)
+
+ total = try_get(
+ search, lambda x: x['Results']['music']['Total'], int)
+
+ if total is not None:
+ if offset > total:
+ break
+
+ offset += LIMIT
+
+ return self.playlist_result(entries, query)
diff --git a/yt_dlp/extractor/mainstreaming.py b/yt_dlp/extractor/mainstreaming.py
new file mode 100644
index 0000000..fd9bba8
--- /dev/null
+++ b/yt_dlp/extractor/mainstreaming.py
@@ -0,0 +1,210 @@
+import re
+
+from .common import InfoExtractor
+
+from ..utils import (
+ int_or_none,
+ js_to_json,
+ parse_duration,
+ traverse_obj,
+ try_get,
+ urljoin
+)
+
+
+class MainStreamingIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:webtools-?)?(?P<host>[A-Za-z0-9-]*\.msvdn\.net)/(?:embed|amp_embed|content)/(?P<id>\w+)'
+ _EMBED_REGEX = [rf'<iframe[^>]+?src=["\']?(?P<url>{_VALID_URL})["\']?']
+ IE_DESC = 'MainStreaming Player'
+
+ _TESTS = [
+ {
+ # Live stream offline, has alternative content id
+ 'url': 'https://webtools-e18da6642b684f8aa9ae449862783a56.msvdn.net/embed/53EN6GxbWaJC',
+ 'info_dict': {
+ 'id': '53EN6GxbWaJC',
+ 'title': 'Diretta homepage 2021-12-31 12:00',
+ 'description': '',
+ 'live_status': 'was_live',
+ 'ext': 'mp4',
+ 'thumbnail': r're:https?://[A-Za-z0-9-]*\.msvdn.net/image/\w+/poster',
+ },
+ 'expected_warnings': [
+ 'Ignoring alternative content ID: WDAF1KOWUpH3',
+ 'MainStreaming said: Live event is OFFLINE'
+ ],
+ 'skip': 'live stream offline'
+ }, {
+ # playlist
+ 'url': 'https://webtools-e18da6642b684f8aa9ae449862783a56.msvdn.net/embed/WDAF1KOWUpH3',
+ 'info_dict': {
+ 'id': 'WDAF1KOWUpH3',
+ 'title': 'Playlist homepage',
+ },
+ 'playlist_mincount': 2
+ }, {
+ # livestream
+ 'url': 'https://webtools-859c1818ed614cc5b0047439470927b0.msvdn.net/embed/tDoFkZD3T1Lw',
+ 'info_dict': {
+ 'id': 'tDoFkZD3T1Lw',
+ 'title': r're:Class CNBC Live \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
+ 'live_status': 'is_live',
+ 'ext': 'mp4',
+ 'thumbnail': r're:https?://[A-Za-z0-9-]*\.msvdn.net/image/\w+/poster',
+ },
+ 'skip': 'live stream'
+ }, {
+ 'url': 'https://webtools-f5842579ff984c1c98d63b8d789673eb.msvdn.net/embed/EUlZfGWkGpOd?autoPlay=false',
+ 'info_dict': {
+ 'id': 'EUlZfGWkGpOd',
+ 'title': 'La Settimana ',
+ 'description': '03 Ottobre ore 02:00',
+ 'ext': 'mp4',
+ 'live_status': 'not_live',
+ 'thumbnail': r're:https?://[A-Za-z0-9-]*\.msvdn.net/image/\w+/poster',
+ 'duration': 1512
+ }
+ }, {
+ # video without webtools- prefix
+ 'url': 'https://f5842579ff984c1c98d63b8d789673eb.msvdn.net/embed/MfuWmzL2lGkA?autoplay=false&T=1635860445',
+ 'info_dict': {
+ 'id': 'MfuWmzL2lGkA',
+ 'title': 'TG Mattina',
+ 'description': '06 Ottobre ore 08:00',
+ 'ext': 'mp4',
+ 'live_status': 'not_live',
+ 'thumbnail': r're:https?://[A-Za-z0-9-]*\.msvdn.net/image/\w+/poster',
+ 'duration': 789.04
+ }
+ }, {
+ # always-on livestream with DVR
+ 'url': 'https://webtools-f5842579ff984c1c98d63b8d789673eb.msvdn.net/embed/HVvPMzy',
+ 'info_dict': {
+ 'id': 'HVvPMzy',
+ 'title': r're:^Diretta LaC News24 \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
+ 'description': 'canale all news',
+ 'live_status': 'is_live',
+ 'ext': 'mp4',
+ 'thumbnail': r're:https?://[A-Za-z0-9-]*\.msvdn.net/image/\w+/poster',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # no host
+ 'url': 'https://webtools.msvdn.net/embed/MfuWmzL2lGkA',
+ 'only_matching': True
+ }, {
+ 'url': 'https://859c1818ed614cc5b0047439470927b0.msvdn.net/amp_embed/tDoFkZD3T1Lw',
+ 'only_matching': True
+ }, {
+ 'url': 'https://859c1818ed614cc5b0047439470927b0.msvdn.net/content/tDoFkZD3T1Lw#',
+ 'only_matching': True
+ }
+ ]
+
+ def _playlist_entries(self, host, playlist_content):
+ for entry in playlist_content:
+ content_id = entry.get('contentID')
+ yield {
+ '_type': 'url',
+ 'ie_key': MainStreamingIE.ie_key(),
+ 'id': content_id,
+ 'duration': int_or_none(traverse_obj(entry, ('duration', 'totalSeconds'))),
+ 'title': entry.get('title'),
+ 'url': f'https://{host}/embed/{content_id}'
+ }
+
+ @staticmethod
+ def _get_webtools_host(host):
+ if not host.startswith('webtools'):
+ host = 'webtools' + ('-' if not host.startswith('.') else '') + host
+ return host
+
+ def _get_webtools_base_url(self, host):
+ return f'{self.http_scheme()}//{self._get_webtools_host(host)}'
+
+ def _call_api(self, host: str, path: str, item_id: str, query=None, note='Downloading API JSON', fatal=False):
+ # JSON API, does not appear to be documented
+ return self._call_webtools_api(host, '/api/v2/' + path, item_id, query, note, fatal)
+
+ def _call_webtools_api(self, host: str, path: str, item_id: str, query=None, note='Downloading webtools API JSON', fatal=False):
+ # webtools docs: https://webtools.msvdn.net/
+ return self._download_json(
+ urljoin(self._get_webtools_base_url(host), path), item_id, query=query, note=note, fatal=fatal)
+
+ def _real_extract(self, url):
+ host, video_id = self._match_valid_url(url).groups()
+ content_info = try_get(
+ self._call_api(
+ host, f'content/{video_id}', video_id, note='Downloading content info API JSON'), lambda x: x['playerContentInfo'])
+ # Fallback
+ if not content_info:
+ webpage = self._download_webpage(url, video_id)
+ player_config = self._parse_json(
+ self._search_regex(
+ r'config\s*=\s*({.+?})\s*;', webpage, 'mainstreaming player config',
+ default='{}', flags=re.DOTALL),
+ video_id, transform_source=js_to_json, fatal=False) or {}
+ content_info = player_config['contentInfo']
+
+ host = content_info.get('host') or host
+ video_id = content_info.get('contentID') or video_id
+ title = content_info.get('title')
+ description = traverse_obj(content_info, 'longDescription', 'shortDescription', expected_type=str)
+ live_status = 'not_live'
+ if content_info.get('drmEnabled'):
+ self.report_drm(video_id)
+
+ alternative_content_id = content_info.get('alternativeContentID')
+ if alternative_content_id:
+ self.report_warning(f'Ignoring alternative content ID: {alternative_content_id}')
+
+ content_type = int_or_none(content_info.get('contentType'))
+ format_base_url = None
+ formats = []
+ subtitles = {}
+ # Live content
+ if content_type == 20:
+ dvr_enabled = traverse_obj(content_info, ('playerSettings', 'dvrEnabled'), expected_type=bool)
+ format_base_url = f"https://{host}/live/{content_info['liveSourceID']}/{video_id}/%s{'?DVR' if dvr_enabled else ''}"
+ live_status = 'is_live'
+ heartbeat = self._call_api(host, f'heartbeat/{video_id}', video_id, note='Checking stream status') or {}
+ if heartbeat.get('heartBeatUp') is False:
+ self.raise_no_formats(f'MainStreaming said: {heartbeat.get("responseMessage")}', expected=True)
+ live_status = 'was_live'
+
+ # Playlist
+ elif content_type == 31:
+ return self.playlist_result(
+ self._playlist_entries(host, content_info.get('playlistContents')), video_id, title, description)
+ # Normal video content?
+ elif content_type == 10:
+ format_base_url = f'https://{host}/vod/{video_id}/%s'
+ # Progressive format
+ # Note: in https://webtools.msvdn.net/loader/playerV2.js there is mention of original.mp3 format,
+ # however it seems to be the same as original.mp4?
+ formats.append({'url': format_base_url % 'original.mp4', 'format_note': 'original', 'quality': 1})
+ else:
+ self.raise_no_formats(f'Unknown content type {content_type}')
+
+ if format_base_url:
+ m3u8_formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
+ format_base_url % 'playlist.m3u8', video_id=video_id, fatal=False)
+ mpd_formats, mpd_subs = self._extract_mpd_formats_and_subtitles(
+ format_base_url % 'manifest.mpd', video_id=video_id, fatal=False)
+
+ subtitles = self._merge_subtitles(m3u8_subs, mpd_subs)
+ formats.extend(m3u8_formats + mpd_formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'formats': formats,
+ 'live_status': live_status,
+ 'duration': parse_duration(content_info.get('duration')),
+ 'tags': content_info.get('tags'),
+ 'subtitles': subtitles,
+ 'thumbnail': urljoin(self._get_webtools_base_url(host), f'image/{video_id}/poster')
+ }
diff --git a/yt_dlp/extractor/mangomolo.py b/yt_dlp/extractor/mangomolo.py
new file mode 100644
index 0000000..efaf66f
--- /dev/null
+++ b/yt_dlp/extractor/mangomolo.py
@@ -0,0 +1,73 @@
+from .common import InfoExtractor
+from ..compat import (
+ compat_b64decode,
+ compat_urllib_parse_unquote,
+)
+from ..utils import classproperty, int_or_none
+
+
+class MangomoloBaseIE(InfoExtractor):
+ _BASE_REGEX = r'(?:https?:)?//(?:admin\.mangomolo\.com/analytics/index\.php/customers/embed/|player\.mangomolo\.com/v1/)'
+ _SLUG = None
+
+ @classproperty
+ def _VALID_URL(cls):
+ return f'{cls._BASE_REGEX}{cls._SLUG}'
+
+ @classproperty
+ def _EMBED_REGEX(cls):
+ return [rf'<iframe[^>]+src=(["\'])(?P<url>{cls._VALID_URL}.+?)\1']
+
+ def _extract_from_webpage(self, url, webpage):
+ for res in super()._extract_from_webpage(url, webpage):
+ yield {
+ **res,
+ '_type': 'url_transparent',
+ 'id': self._search_regex(self._SLUG, res['url'], 'id', group='id'),
+ 'uploader': self._search_regex(r'^(?:https?://)?([^/]*)/.*', url, 'video uploader'),
+ }
+
+ def _get_real_id(self, page_id):
+ return page_id
+
+ def _real_extract(self, url):
+ page_id = self._get_real_id(self._match_id(url))
+ webpage = self._download_webpage(
+ 'https://player.mangomolo.com/v1/%s?%s' % (self._TYPE, url.split('?')[1]), page_id)
+ hidden_inputs = self._hidden_inputs(webpage)
+ m3u8_entry_protocol = 'm3u8' if self._IS_LIVE else 'm3u8_native'
+
+ format_url = self._html_search_regex(
+ [
+ r'(?:file|src)\s*:\s*"(https?://[^"]+?/playlist\.m3u8)',
+ r'<a[^>]+href="(rtsp://[^"]+)"'
+ ], webpage, 'format url')
+ formats = self._extract_wowza_formats(
+ format_url, page_id, m3u8_entry_protocol, ['smil'])
+
+ return {
+ 'id': page_id,
+ 'title': page_id,
+ 'uploader_id': hidden_inputs.get('userid'),
+ 'duration': int_or_none(hidden_inputs.get('duration')),
+ 'is_live': self._IS_LIVE,
+ 'formats': formats,
+ }
+
+
+class MangomoloVideoIE(MangomoloBaseIE):
+ _TYPE = 'video'
+ IE_NAME = 'mangomolo:' + _TYPE
+ _SLUG = r'video\?.*?\bid=(?P<id>\d+)'
+
+ _IS_LIVE = False
+
+
+class MangomoloLiveIE(MangomoloBaseIE):
+ _TYPE = 'live'
+ IE_NAME = 'mangomolo:' + _TYPE
+ _SLUG = r'(?:live|index)\?.*?\bchannelid=(?P<id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)'
+ _IS_LIVE = True
+
+ def _get_real_id(self, page_id):
+ return compat_b64decode(compat_urllib_parse_unquote(page_id)).decode()
diff --git a/yt_dlp/extractor/manoto.py b/yt_dlp/extractor/manoto.py
new file mode 100644
index 0000000..2792e6e
--- /dev/null
+++ b/yt_dlp/extractor/manoto.py
@@ -0,0 +1,133 @@
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ int_or_none,
+ traverse_obj
+)
+
+
+_API_URL = 'https://dak1vd5vmi7x6.cloudfront.net/api/v1/publicrole/{}/{}?id={}'
+
+
+class ManotoTVIE(InfoExtractor):
+ IE_DESC = 'Manoto TV (Episode)'
+ _VALID_URL = r'https?://(?:www\.)?manototv\.com/episode/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://www.manototv.com/episode/8475',
+ 'info_dict': {
+ 'id': '8475',
+ 'series': 'خانه های رویایی با برادران اسکات',
+ 'season_number': 7,
+ 'episode_number': 25,
+ 'episode_id': 'My Dream Home S7: Carol & John',
+ 'duration': 3600,
+ 'categories': ['سرگرمی'],
+ 'title': 'کارول و جان',
+ 'description': 'md5:d0fff1f8ba5c6775d312a00165d1a97e',
+ 'thumbnail': r're:^https?://.*\.(jpeg|png|jpg)$',
+ 'ext': 'mp4'
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ }
+ }, {
+ 'url': 'https://www.manototv.com/episode/12576',
+ 'info_dict': {
+ 'id': '12576',
+ 'series': 'فیلم های ایرانی',
+ 'episode_id': 'Seh Mah Taatili',
+ 'duration': 5400,
+ 'view_count': int,
+ 'categories': ['سرگرمی'],
+ 'title': 'سه ماه تعطیلی',
+ 'description': 'سه ماه تعطیلی فیلمی به کارگردانی و نویسندگی شاپور قریب ساختهٔ سال ۱۳۵۶ است.',
+ 'thumbnail': r're:^https?://.*\.(jpeg|png|jpg)$',
+ 'ext': 'mp4'
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ episode_json = self._download_json(_API_URL.format('showmodule', 'episodedetails', video_id), video_id)
+ details = episode_json.get('details', {})
+ formats = self._extract_m3u8_formats(details.get('videoM3u8Url'), video_id, 'mp4')
+ return {
+ 'id': video_id,
+ 'series': details.get('showTitle'),
+ 'season_number': int_or_none(details.get('analyticsSeasonNumber')),
+ 'episode_number': int_or_none(details.get('episodeNumber')),
+ 'episode_id': details.get('analyticsEpisodeTitle'),
+ 'duration': int_or_none(details.get('durationInMinutes'), invscale=60),
+ 'view_count': details.get('viewCount'),
+ 'categories': [details.get('videoCategory')],
+ 'title': details.get('episodeTitle'),
+ 'description': clean_html(details.get('episodeDescription')),
+ 'thumbnail': details.get('episodelandscapeImgIxUrl'),
+ 'formats': formats,
+ }
+
+
+class ManotoTVShowIE(InfoExtractor):
+ IE_DESC = 'Manoto TV (Show)'
+ _VALID_URL = r'https?://(?:www\.)?manototv\.com/show/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://www.manototv.com/show/2526',
+ 'playlist_mincount': 68,
+ 'info_dict': {
+ 'id': '2526',
+ 'title': 'فیلم های ایرانی',
+ 'description': 'مجموعه ای از فیلم های سینمای کلاسیک ایران',
+ },
+ }]
+
+ def _real_extract(self, url):
+ show_id = self._match_id(url)
+ show_json = self._download_json(_API_URL.format('showmodule', 'details', show_id), show_id)
+ show_details = show_json.get('details', {})
+ title = show_details.get('showTitle')
+ description = show_details.get('showSynopsis')
+
+ series_json = self._download_json(_API_URL.format('showmodule', 'serieslist', show_id), show_id)
+ playlist_id = str(traverse_obj(series_json, ('details', 'list', 0, 'id')))
+
+ playlist_json = self._download_json(_API_URL.format('showmodule', 'episodelist', playlist_id), playlist_id)
+ playlist = traverse_obj(playlist_json, ('details', 'list')) or []
+
+ entries = [
+ self.url_result(
+ 'https://www.manototv.com/episode/%s' % item['slideID'], ie=ManotoTVIE.ie_key(), video_id=item['slideID'])
+ for item in playlist]
+ return self.playlist_result(entries, show_id, title, description)
+
+
+class ManotoTVLiveIE(InfoExtractor):
+ IE_DESC = 'Manoto TV (Live)'
+ _VALID_URL = r'https?://(?:www\.)?manototv\.com/live/'
+ _TEST = {
+ 'url': 'https://www.manototv.com/live/',
+ 'info_dict': {
+ 'id': 'live',
+ 'title': 'Manoto TV Live',
+ 'ext': 'mp4',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = 'live'
+ json = self._download_json(_API_URL.format('livemodule', 'details', ''), video_id)
+ details = json.get('details', {})
+ video_url = details.get('liveUrl')
+ formats = self._extract_m3u8_formats(video_url, video_id, 'mp4', live=True)
+ return {
+ 'id': video_id,
+ 'title': 'Manoto TV Live',
+ 'is_live': True,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/manyvids.py b/yt_dlp/extractor/manyvids.py
new file mode 100644
index 0000000..2aa3a3c
--- /dev/null
+++ b/yt_dlp/extractor/manyvids.py
@@ -0,0 +1,162 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ extract_attributes,
+ int_or_none,
+ str_to_int,
+ url_or_none,
+ urlencode_postdata,
+)
+
+
+class ManyVidsIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'(?i)https?://(?:www\.)?manyvids\.com/video/(?P<id>\d+)'
+ _TESTS = [{
+ # preview video
+ 'url': 'https://www.manyvids.com/Video/133957/everthing-about-me/',
+ 'md5': '03f11bb21c52dd12a05be21a5c7dcc97',
+ 'info_dict': {
+ 'id': '133957',
+ 'ext': 'mp4',
+ 'title': 'everthing about me (Preview)',
+ 'uploader': 'ellyxxix',
+ 'view_count': int,
+ 'like_count': int,
+ },
+ }, {
+ # full video
+ 'url': 'https://www.manyvids.com/Video/935718/MY-FACE-REVEAL/',
+ 'md5': 'bb47bab0e0802c2a60c24ef079dfe60f',
+ 'info_dict': {
+ 'id': '935718',
+ 'ext': 'mp4',
+ 'title': 'MY FACE REVEAL',
+ 'description': 'md5:ec5901d41808b3746fed90face161612',
+ 'uploader': 'Sarah Calanthe',
+ 'view_count': int,
+ 'like_count': int,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ real_url = 'https://www.manyvids.com/video/%s/gtm.js' % (video_id, )
+ try:
+ webpage = self._download_webpage(real_url, video_id)
+ except Exception:
+ # probably useless fallback
+ webpage = self._download_webpage(url, video_id)
+
+ info = self._search_regex(
+ r'''(<div\b[^>]*\bid\s*=\s*(['"])pageMetaDetails\2[^>]*>)''',
+ webpage, 'meta details', default='')
+ info = extract_attributes(info)
+
+ player = self._search_regex(
+ r'''(<div\b[^>]*\bid\s*=\s*(['"])rmpPlayerStream\2[^>]*>)''',
+ webpage, 'player details', default='')
+ player = extract_attributes(player)
+
+ video_urls_and_ids = (
+ (info.get('data-meta-video'), 'video'),
+ (player.get('data-video-transcoded'), 'transcoded'),
+ (player.get('data-video-filepath'), 'filepath'),
+ (self._og_search_video_url(webpage, secure=False, default=None), 'og_video'),
+ )
+
+ def txt_or_none(s, default=None):
+ return (s.strip() or default) if isinstance(s, str) else default
+
+ uploader = txt_or_none(info.get('data-meta-author'))
+
+ def mung_title(s):
+ if uploader:
+ s = re.sub(r'^\s*%s\s+[|-]' % (re.escape(uploader), ), '', s)
+ return txt_or_none(s)
+
+ title = (
+ mung_title(info.get('data-meta-title'))
+ or self._html_search_regex(
+ (r'<span[^>]+class=["\']item-title[^>]+>([^<]+)',
+ r'<h2[^>]+class=["\']h2 m-0["\'][^>]*>([^<]+)'),
+ webpage, 'title', default=None)
+ or self._html_search_meta(
+ 'twitter:title', webpage, 'title', fatal=True))
+
+ title = re.sub(r'\s*[|-]\s+ManyVids\s*$', '', title) or title
+
+ if any(p in webpage for p in ('preview_videos', '_preview.mp4')):
+ title += ' (Preview)'
+
+ mv_token = self._search_regex(
+ r'data-mvtoken=(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
+ 'mv token', default=None, group='value')
+
+ if mv_token:
+ # Sets some cookies
+ self._download_webpage(
+ 'https://www.manyvids.com/includes/ajax_repository/you_had_me_at_hello.php',
+ video_id, note='Setting format cookies', fatal=False,
+ data=urlencode_postdata({
+ 'mvtoken': mv_token,
+ 'vid': video_id,
+ }), headers={
+ 'Referer': url,
+ 'X-Requested-With': 'XMLHttpRequest'
+ })
+
+ formats = []
+ for v_url, fmt in video_urls_and_ids:
+ v_url = url_or_none(v_url)
+ if not v_url:
+ continue
+ if determine_ext(v_url) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ v_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls'))
+ else:
+ formats.append({
+ 'url': v_url,
+ 'format_id': fmt,
+ })
+
+ self._remove_duplicate_formats(formats)
+
+ for f in formats:
+ if f.get('height') is None:
+ f['height'] = int_or_none(
+ self._search_regex(r'_(\d{2,3}[02468])_', f['url'], 'video height', default=None))
+ if '/preview/' in f['url']:
+ f['format_id'] = '_'.join(filter(None, (f.get('format_id'), 'preview')))
+ f['preference'] = -10
+ if 'transcoded' in f['format_id']:
+ f['preference'] = f.get('preference', -1) - 1
+
+ def get_likes():
+ likes = self._search_regex(
+ r'''(<a\b[^>]*\bdata-id\s*=\s*(['"])%s\2[^>]*>)''' % (video_id, ),
+ webpage, 'likes', default='')
+ likes = extract_attributes(likes)
+ return int_or_none(likes.get('data-likes'))
+
+ def get_views():
+ return str_to_int(self._html_search_regex(
+ r'''(?s)<span\b[^>]*\bclass\s*=["']views-wrapper\b[^>]+>.+?<span\b[^>]+>\s*(\d[\d,.]*)\s*</span>''',
+ webpage, 'view count', default=None))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'description': txt_or_none(info.get('data-meta-description')),
+ 'uploader': txt_or_none(info.get('data-meta-author')),
+ 'thumbnail': (
+ url_or_none(info.get('data-meta-image'))
+ or url_or_none(player.get('data-video-screenshot'))),
+ 'view_count': get_views(),
+ 'like_count': get_likes(),
+ }
diff --git a/yt_dlp/extractor/maoritv.py b/yt_dlp/extractor/maoritv.py
new file mode 100644
index 0000000..67780ea
--- /dev/null
+++ b/yt_dlp/extractor/maoritv.py
@@ -0,0 +1,28 @@
+from .common import InfoExtractor
+
+
+class MaoriTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?maoritelevision\.com/shows/(?:[^/]+/)+(?P<id>[^/?&#]+)'
+ _TEST = {
+ 'url': 'https://www.maoritelevision.com/shows/korero-mai/S01E054/korero-mai-series-1-episode-54',
+ 'md5': '5ade8ef53851b6a132c051b1cd858899',
+ 'info_dict': {
+ 'id': '4774724855001',
+ 'ext': 'mp4',
+ 'title': 'Kōrero Mai, Series 1 Episode 54',
+ 'upload_date': '20160226',
+ 'timestamp': 1456455018,
+ 'description': 'md5:59bde32fd066d637a1a55794c56d8dcb',
+ 'uploader_id': '1614493167001',
+ },
+ }
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1614493167001/HJlhIQhQf_default/index.html?videoId=%s'
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ brightcove_id = self._search_regex(
+ r'data-main-video-id=["\'](\d+)', webpage, 'brightcove id')
+ return self.url_result(
+ self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
+ 'BrightcoveNew', brightcove_id)
diff --git a/yt_dlp/extractor/markiza.py b/yt_dlp/extractor/markiza.py
new file mode 100644
index 0000000..ca465ea
--- /dev/null
+++ b/yt_dlp/extractor/markiza.py
@@ -0,0 +1,124 @@
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ orderedSet,
+ parse_duration,
+ try_get,
+)
+
+
+class MarkizaIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:www\.)?videoarchiv\.markiza\.sk/(?:video/(?:[^/]+/)*|embed/)(?P<id>\d+)(?:[_/]|$)'
+ _TESTS = [{
+ 'url': 'http://videoarchiv.markiza.sk/video/oteckovia/84723_oteckovia-109',
+ 'md5': 'ada4e9fad038abeed971843aa028c7b0',
+ 'info_dict': {
+ 'id': '139078',
+ 'ext': 'mp4',
+ 'title': 'Oteckovia 109',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 2760,
+ },
+ }, {
+ 'url': 'http://videoarchiv.markiza.sk/video/televizne-noviny/televizne-noviny/85430_televizne-noviny',
+ 'info_dict': {
+ 'id': '85430',
+ 'title': 'Televízne noviny',
+ },
+ 'playlist_count': 23,
+ }, {
+ 'url': 'http://videoarchiv.markiza.sk/video/oteckovia/84723',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://videoarchiv.markiza.sk/video/84723',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://videoarchiv.markiza.sk/video/filmy/85190_kamenak',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://videoarchiv.markiza.sk/video/reflex/zo-zakulisia/84651_pribeh-alzbetky',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://videoarchiv.markiza.sk/embed/85295',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ data = self._download_json(
+ 'http://videoarchiv.markiza.sk/json/video_jwplayer7.json',
+ video_id, query={'id': video_id})
+
+ info = self._parse_jwplayer_data(data, m3u8_id='hls', mpd_id='dash')
+
+ if info.get('_type') == 'playlist':
+ info.update({
+ 'id': video_id,
+ 'title': try_get(
+ data, lambda x: x['details']['name'], compat_str),
+ })
+ else:
+ info['duration'] = parse_duration(
+ try_get(data, lambda x: x['details']['duration'], compat_str))
+ return info
+
+
+class MarkizaPageIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:www\.)?(?:(?:[^/]+\.)?markiza|tvnoviny)\.sk/(?:[^/]+/)*(?P<id>\d+)_'
+ _TESTS = [{
+ 'url': 'http://www.markiza.sk/soubiz/zahranicny/1923705_oteckovia-maju-svoj-den-ti-slavni-nie-su-o-nic-menej-rozkosni',
+ 'md5': 'ada4e9fad038abeed971843aa028c7b0',
+ 'info_dict': {
+ 'id': '139355',
+ 'ext': 'mp4',
+ 'title': 'Oteckovia 110',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 2604,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://dajto.markiza.sk/filmy-a-serialy/1774695_frajeri-vo-vegas',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://superstar.markiza.sk/aktualne/1923870_to-je-ale-telo-spevacka-ukazala-sexy-postavicku-v-bikinach',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://hybsa.markiza.sk/aktualne/1923790_uzasna-atmosfera-na-hybsa-v-poprade-superstaristi-si-prve-koncerty-pred-davom-ludi-poriadne-uzili',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://doma.markiza.sk/filmy/1885250_moja-vysnivana-svadba',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.tvnoviny.sk/domace/1923887_po-smrti-manzela-ju-cakalo-poriadne-prekvapenie',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if MarkizaIE.suitable(url) else super(MarkizaPageIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ # Downloading for some hosts (e.g. dajto, doma) fails with 500
+ # although everything seems to be OK, so considering 500
+ # status code to be expected.
+ url, playlist_id, expected_status=500)
+
+ entries = [
+ self.url_result('http://videoarchiv.markiza.sk/video/%s' % video_id)
+ for video_id in orderedSet(re.findall(
+ r'(?:initPlayer_|data-entity=["\']|id=["\']player_)(\d+)',
+ webpage))]
+
+ return self.playlist_result(entries, playlist_id)
diff --git a/yt_dlp/extractor/massengeschmacktv.py b/yt_dlp/extractor/massengeschmacktv.py
new file mode 100644
index 0000000..1490e9b
--- /dev/null
+++ b/yt_dlp/extractor/massengeschmacktv.py
@@ -0,0 +1,72 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ determine_ext,
+ int_or_none,
+ js_to_json,
+ mimetype2ext,
+ parse_filesize,
+)
+
+
+class MassengeschmackTVIE(InfoExtractor):
+ IE_NAME = 'massengeschmack.tv'
+ _VALID_URL = r'https?://(?:www\.)?massengeschmack\.tv/play/(?P<id>[^?&#]+)'
+
+ _TEST = {
+ 'url': 'https://massengeschmack.tv/play/fktv202',
+ 'md5': '9996f314994a49fefe5f39aa1b07ae21',
+ 'info_dict': {
+ 'id': 'fktv202',
+ 'ext': 'mp4',
+ 'title': 'Fernsehkritik-TV #202',
+ 'thumbnail': 'https://cache.massengeschmack.tv/img/mag/fktv202.jpg'
+ },
+ }
+
+ def _real_extract(self, url):
+ episode = self._match_id(url)
+
+ webpage = self._download_webpage(url, episode)
+ sources = self._parse_json(self._search_regex(r'(?s)MEDIA\s*=\s*(\[.+?\]);', webpage, 'media'), episode, js_to_json)
+
+ formats = []
+ for source in sources:
+ furl = source.get('src')
+ if not furl:
+ continue
+ furl = self._proto_relative_url(furl)
+ ext = determine_ext(furl) or mimetype2ext(source.get('type'))
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ furl, episode, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ formats.append({
+ 'url': furl,
+ 'format_id': determine_ext(furl),
+ })
+
+ for (durl, format_id, width, height, filesize) in re.findall(r'''(?x)
+ <a[^>]+?href="(?P<url>(?:https:)?//[^"]+)".*?
+ <strong>(?P<format_id>.+?)</strong>.*?
+ <small>(?:(?P<width>\d+)x(?P<height>\d+))?\s+?\((?P<filesize>[\d,]+\s*[GM]iB)\)</small>
+ ''', webpage):
+ formats.append({
+ 'url': durl,
+ 'format_id': format_id,
+ 'width': int_or_none(width),
+ 'height': int_or_none(height),
+ 'filesize': parse_filesize(filesize),
+ 'vcodec': 'none' if format_id.startswith('Audio') else None,
+ })
+
+ return {
+ 'id': episode,
+ 'title': clean_html(self._html_search_regex(
+ r'<span[^>]+\bid=["\']clip-title["\'][^>]*>([^<]+)', webpage, 'title', fatal=False)),
+ 'formats': formats,
+ 'thumbnail': self._search_regex(r'POSTER\s*=\s*"([^"]+)', webpage, 'thumbnail', fatal=False),
+ }
diff --git a/yt_dlp/extractor/masters.py b/yt_dlp/extractor/masters.py
new file mode 100644
index 0000000..716f1c9
--- /dev/null
+++ b/yt_dlp/extractor/masters.py
@@ -0,0 +1,38 @@
+from __future__ import unicode_literals
+from .common import InfoExtractor
+from ..utils import (
+ traverse_obj,
+ unified_strdate,
+)
+
+
+class MastersIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?masters\.com/en_US/watch/(?P<date>\d{4}-\d{2}-\d{2})/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.masters.com/en_US/watch/2022-04-07/16493755593805191/sungjae_im_thursday_interview_2022.html',
+ 'info_dict': {
+ 'id': '16493755593805191',
+ 'ext': 'mp4',
+ 'title': 'Sungjae Im: Thursday Interview 2022',
+ 'upload_date': '20220407',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id, upload_date = self._match_valid_url(url).group('id', 'date')
+ content_resp = self._download_json(
+ f'https://www.masters.com/relatedcontent/rest/v2/masters_v1/en/content/masters_v1_{video_id}_en',
+ video_id)
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(traverse_obj(content_resp, ('media', 'm3u8')), video_id, 'mp4')
+
+ thumbnails = [{'id': name, 'url': url} for name, url in traverse_obj(content_resp, ('images', 0), default={}).items()]
+
+ return {
+ 'id': video_id,
+ 'title': content_resp.get('title'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'upload_date': unified_strdate(upload_date),
+ 'thumbnails': thumbnails,
+ }
diff --git a/yt_dlp/extractor/matchtv.py b/yt_dlp/extractor/matchtv.py
new file mode 100644
index 0000000..a67fa9f
--- /dev/null
+++ b/yt_dlp/extractor/matchtv.py
@@ -0,0 +1,51 @@
+import random
+
+from .common import InfoExtractor
+from ..utils import xpath_text
+
+
+class MatchTVIE(InfoExtractor):
+ _VALID_URL = r'https?://matchtv\.ru(?:/on-air|/?#live-player)'
+ _TESTS = [{
+ 'url': 'http://matchtv.ru/#live-player',
+ 'info_dict': {
+ 'id': 'matchtv-live',
+ 'ext': 'flv',
+ 'title': r're:^Матч ТВ - Прямой эфир \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://matchtv.ru/on-air/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = 'matchtv-live'
+ video_url = self._download_json(
+ 'http://player.matchtv.ntvplus.tv/player/smil', video_id,
+ query={
+ 'ts': '',
+ 'quality': 'SD',
+ 'contentId': '561d2c0df7159b37178b4567',
+ 'sign': '',
+ 'includeHighlights': '0',
+ 'userId': '',
+ 'sessionId': random.randint(1, 1000000000),
+ 'contentType': 'channel',
+ 'timeShift': '0',
+ 'platform': 'portal',
+ },
+ headers={
+ 'Referer': 'http://player.matchtv.ntvplus.tv/embed-player/NTVEmbedPlayer.swf',
+ })['data']['videoUrl']
+ f4m_url = xpath_text(self._download_xml(video_url, video_id), './to')
+ formats = self._extract_f4m_formats(f4m_url, video_id)
+ return {
+ 'id': video_id,
+ 'title': 'Матч ТВ - Прямой эфир',
+ 'is_live': True,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/mbn.py b/yt_dlp/extractor/mbn.py
new file mode 100644
index 0000000..4917c46
--- /dev/null
+++ b/yt_dlp/extractor/mbn.py
@@ -0,0 +1,89 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ unified_strdate,
+ url_or_none,
+)
+from ..utils.traversal import traverse_obj
+
+
+class MBNIE(InfoExtractor):
+ IE_DESC = 'mbn.co.kr (매일방송)'
+ _VALID_URL = r'https?://(?:www\.)?mbn\.co\.kr/vod/programContents/preview(?:list)?/\d+/\d+/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://mbn.co.kr/vod/programContents/previewlist/861/5433/1276155',
+ 'md5': '85e1694e5b247c04d1386b7e3c90fd76',
+ 'info_dict': {
+ 'id': '1276155',
+ 'ext': 'mp4',
+ 'title': '결국 사로잡힌 권유리, 그녀를 목숨 걸고 구하려는 정일우!',
+ 'duration': 3891,
+ 'release_date': '20210703',
+ 'thumbnail': 'http://img.vod.mbn.co.kr/mbnvod2img/861/2021/07/03/20210703230811_20_861_1276155_360_7_0.jpg',
+ 'series': '보쌈 - 운명을 훔치다',
+ 'episode': 'Episode 19',
+ 'episode_number': 19,
+ },
+ }, {
+ 'url': 'https://www.mbn.co.kr/vod/programContents/previewlist/835/5294/1084744',
+ 'md5': 'fc65d3aac85e85e0b5056f4ef99cde4a',
+ 'info_dict': {
+ 'id': '1084744',
+ 'ext': 'mp4',
+ 'title': '김정은♥최원영, 제자리를 찾은 위험한 부부! "결혼은 투쟁이면서, 어려운 방식이야.."',
+ 'duration': 93,
+ 'release_date': '20201124',
+ 'thumbnail': 'http://img.vod.mbn.co.kr/mbnvod2img/835/2020/11/25/20201125000221_21_835_1084744_360_7_0.jpg',
+ 'series': '나의 위험한 아내',
+ },
+ }, {
+ 'url': 'https://www.mbn.co.kr/vod/programContents/preview/952/6088/1054797?next=1',
+ 'md5': 'c711103c72aeac8323a5cf1751f10097',
+ 'info_dict': {
+ 'id': '1054797',
+ 'ext': 'mp4',
+ 'title': '[2차 티저] MBN 주말 미니시리즈 <완벽한 결혼의 정석> l 그녀에게 주어진 두 번째 인생',
+ 'duration': 65,
+ 'release_date': '20231028',
+ 'thumbnail': 'http://img.vod.mbn.co.kr/vod2/952/2023/09/11/20230911130223_22_952_1054797_1080_7.jpg',
+ 'series': '완벽한 결혼의 정석',
+ },
+ }]
+
+ def _real_extract(self, url):
+ content_id = self._match_id(url)
+ webpage = self._download_webpage(url, content_id)
+
+ content_cls_cd = self._search_regex(
+ r'"\?content_cls_cd=(\d+)&', webpage, 'content cls cd', fatal=False) or '20'
+ media_info = self._download_json(
+ 'https://www.mbn.co.kr/player/mbnVodPlayer_2020.mbn', content_id,
+ note='Fetching playback data', query={
+ 'content_cls_cd': content_cls_cd,
+ 'content_id': content_id,
+ 'relay_type': '1',
+ })
+
+ formats = []
+ for stream_url in traverse_obj(media_info, ('movie_list', ..., 'url', {url_or_none})):
+ stream_url = re.sub(r'/(?:chunk|play)list(?:_pd\d+)?\.m3u8', '/manifest.m3u8', stream_url)
+ final_url = url_or_none(self._download_webpage(
+ f'https://www.mbn.co.kr/player/mbnStreamAuth_new_vod.mbn?vod_url={stream_url}',
+ content_id, note='Fetching authenticated m3u8 url'))
+
+ formats.extend(self._extract_m3u8_formats(final_url, content_id, fatal=False))
+
+ return {
+ 'id': content_id,
+ **traverse_obj(media_info, {
+ 'title': ('movie_title', {str}),
+ 'duration': ('play_sec', {int_or_none}),
+ 'release_date': ('bcast_date', {lambda x: x.replace('.', '')}, {unified_strdate}),
+ 'thumbnail': ('movie_start_Img', {url_or_none}),
+ 'series': ('prog_nm', {str}),
+ 'episode_number': ('ad_contentnumber', {int_or_none}),
+ }),
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/mdr.py b/yt_dlp/extractor/mdr.py
new file mode 100644
index 0000000..49f5b49
--- /dev/null
+++ b/yt_dlp/extractor/mdr.py
@@ -0,0 +1,184 @@
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ join_nonempty,
+ parse_duration,
+ parse_iso8601,
+ url_or_none,
+ xpath_text,
+)
+
+
+class MDRIE(InfoExtractor):
+ IE_DESC = 'MDR.DE and KiKA'
+ _VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z-]+-?(?P<id>\d+)(?:_.+?)?\.html'
+
+ _GEO_COUNTRIES = ['DE']
+
+ _TESTS = [{
+ # MDR regularly deletes its videos
+ 'url': 'http://www.mdr.de/fakt/video189002.html',
+ 'only_matching': True,
+ }, {
+ # audio
+ 'url': 'http://www.mdr.de/kultur/audio1312272_zc-15948bad_zs-86171fdd.html',
+ 'md5': '64c4ee50f0a791deb9479cd7bbe9d2fa',
+ 'info_dict': {
+ 'id': '1312272',
+ 'ext': 'mp3',
+ 'title': 'Feuilleton vom 30. Oktober 2015',
+ 'duration': 250,
+ 'uploader': 'MITTELDEUTSCHER RUNDFUNK',
+ },
+ 'skip': '404 not found',
+ }, {
+ 'url': 'http://www.kika.de/baumhaus/videos/video19636.html',
+ 'md5': '4930515e36b06c111213e80d1e4aad0e',
+ 'info_dict': {
+ 'id': '19636',
+ 'ext': 'mp4',
+ 'title': 'Baumhaus vom 30. Oktober 2015',
+ 'duration': 134,
+ 'uploader': 'KIKA',
+ },
+ 'skip': '404 not found',
+ }, {
+ 'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/videos/video8182.html',
+ 'md5': '5fe9c4dd7d71e3b238f04b8fdd588357',
+ 'info_dict': {
+ 'id': '8182',
+ 'ext': 'mp4',
+ 'title': 'Beutolomäus und der geheime Weihnachtswunsch',
+ 'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd',
+ 'timestamp': 1482541200,
+ 'upload_date': '20161224',
+ 'duration': 4628,
+ 'uploader': 'KIKA',
+ },
+ }, {
+ # audio with alternative playerURL pattern
+ 'url': 'http://www.mdr.de/kultur/videos-und-audios/audio-radio/operation-mindfuck-robert-wilson100.html',
+ 'info_dict': {
+ 'id': '100',
+ 'ext': 'mp4',
+ 'title': 'Feature: Operation Mindfuck - Robert Anton Wilson',
+ 'duration': 3239,
+ 'uploader': 'MITTELDEUTSCHER RUNDFUNK',
+ },
+ }, {
+ # empty bitrateVideo and bitrateAudio
+ 'url': 'https://www.kika.de/filme/sendung128372_zc-572e3f45_zs-1d9fb70e.html',
+ 'info_dict': {
+ 'id': '128372',
+ 'ext': 'mp4',
+ 'title': 'Der kleine Wichtel kehrt zurück',
+ 'description': 'md5:f77fafdff90f7aa1e9dca14f662c052a',
+ 'duration': 4876,
+ 'timestamp': 1607823300,
+ 'upload_date': '20201213',
+ 'uploader': 'ZDF',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.kika.de/baumhaus/sendungen/video19636_zc-fea7f8a0_zs-4bf89c60.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/einzelsendung2534.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.mdr.de/mediathek/mdr-videos/a/video-1334.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ data_url = self._search_regex(
+ r'(?:dataURL|playerXml(?:["\'])?)\s*:\s*(["\'])(?P<url>.+?-avCustom\.xml)\1',
+ webpage, 'data url', group='url').replace(r'\/', '/')
+
+ doc = self._download_xml(
+ compat_urlparse.urljoin(url, data_url), video_id)
+
+ title = xpath_text(doc, ['./title', './broadcast/broadcastName'], 'title', fatal=True)
+
+ type_ = xpath_text(doc, './type', default=None)
+
+ formats = []
+ processed_urls = []
+ for asset in doc.findall('./assets/asset'):
+ for source in (
+ 'download',
+ 'progressiveDownload',
+ 'dynamicHttpStreamingRedirector',
+ 'adaptiveHttpStreamingRedirector'):
+ url_el = asset.find('./%sUrl' % source)
+ if url_el is None:
+ continue
+
+ video_url = url_or_none(url_el.text)
+ if not video_url or video_url in processed_urls:
+ continue
+
+ processed_urls.append(video_url)
+
+ ext = determine_ext(video_url)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ quality=1, m3u8_id='HLS', fatal=False))
+ elif ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ video_url + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id,
+ quality=1, f4m_id='HDS', fatal=False))
+ else:
+ media_type = xpath_text(asset, './mediaType', 'media type', default='MP4')
+ vbr = int_or_none(xpath_text(asset, './bitrateVideo', 'vbr'), 1000)
+ abr = int_or_none(xpath_text(asset, './bitrateAudio', 'abr'), 1000)
+ filesize = int_or_none(xpath_text(asset, './fileSize', 'file size'))
+
+ f = {
+ 'url': video_url,
+ 'format_id': join_nonempty(media_type, vbr or abr),
+ 'filesize': filesize,
+ 'abr': abr,
+ 'vbr': vbr,
+ }
+
+ if vbr:
+ f.update({
+ 'width': int_or_none(xpath_text(asset, './frameWidth', 'width')),
+ 'height': int_or_none(xpath_text(asset, './frameHeight', 'height')),
+ })
+
+ if type_ == 'audio':
+ f['vcodec'] = 'none'
+
+ formats.append(f)
+
+ description = xpath_text(doc, './broadcast/broadcastDescription', 'description')
+ timestamp = parse_iso8601(
+ xpath_text(
+ doc, [
+ './broadcast/broadcastDate',
+ './broadcast/broadcastStartDate',
+ './broadcast/broadcastEndDate'],
+ 'timestamp', default=None))
+ duration = parse_duration(xpath_text(doc, './duration', 'duration'))
+ uploader = xpath_text(doc, './rights', 'uploader')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'uploader': uploader,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/medaltv.py b/yt_dlp/extractor/medaltv.py
new file mode 100644
index 0000000..675ad8c
--- /dev/null
+++ b/yt_dlp/extractor/medaltv.py
@@ -0,0 +1,162 @@
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ format_field,
+ float_or_none,
+ int_or_none,
+ str_or_none,
+ traverse_obj,
+)
+
+
+class MedalTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?medal\.tv/games/[^/?#&]+/clips/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://medal.tv/games/valorant/clips/jTBFnLKdLy15K',
+ 'md5': '03e4911fdcf7fce563090705c2e79267',
+ 'info_dict': {
+ 'id': 'jTBFnLKdLy15K',
+ 'ext': 'mp4',
+ 'title': "Mornu's clutch",
+ 'description': '',
+ 'uploader': 'Aciel',
+ 'timestamp': 1651628243,
+ 'upload_date': '20220504',
+ 'uploader_id': '19335460',
+ 'uploader_url': 'https://medal.tv/users/19335460',
+ 'comment_count': int,
+ 'view_count': int,
+ 'like_count': int,
+ 'duration': 13,
+ }
+ }, {
+ 'url': 'https://medal.tv/games/cod-cold-war/clips/2mA60jWAGQCBH',
+ 'md5': 'fc7a3e4552ae8993c1c4006db46be447',
+ 'info_dict': {
+ 'id': '2mA60jWAGQCBH',
+ 'ext': 'mp4',
+ 'title': 'Quad Cold',
+ 'description': 'Medal,https://medal.tv/desktop/',
+ 'uploader': 'MowgliSB',
+ 'timestamp': 1603165266,
+ 'upload_date': '20201020',
+ 'uploader_id': '10619174',
+ 'thumbnail': 'https://cdn.medal.tv/10619174/thumbnail-34934644-720p.jpg?t=1080p&c=202042&missing',
+ 'uploader_url': 'https://medal.tv/users/10619174',
+ 'comment_count': int,
+ 'view_count': int,
+ 'like_count': int,
+ 'duration': 23,
+ }
+ }, {
+ 'url': 'https://medal.tv/games/cod-cold-war/clips/2um24TWdty0NA',
+ 'md5': 'b6dc76b78195fff0b4f8bf4a33ec2148',
+ 'info_dict': {
+ 'id': '2um24TWdty0NA',
+ 'ext': 'mp4',
+ 'title': 'u tk me i tk u bigger',
+ 'description': 'Medal,https://medal.tv/desktop/',
+ 'uploader': 'Mimicc',
+ 'timestamp': 1605580939,
+ 'upload_date': '20201117',
+ 'uploader_id': '5156321',
+ 'thumbnail': 'https://cdn.medal.tv/5156321/thumbnail-36787208-360p.jpg?t=1080p&c=202046&missing',
+ 'uploader_url': 'https://medal.tv/users/5156321',
+ 'comment_count': int,
+ 'view_count': int,
+ 'like_count': int,
+ 'duration': 9,
+ }
+ }, {
+ 'url': 'https://medal.tv/games/valorant/clips/37rMeFpryCC-9',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://medal.tv/games/valorant/clips/2WRj40tpY_EU9',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id, query={'mobilebypass': 'true'})
+
+ hydration_data = self._search_json(
+ r'<script[^>]*>[^<]*\bhydrationData\s*=', webpage,
+ 'next data', video_id, end_pattern='</script>', fatal=False)
+
+ clip = traverse_obj(hydration_data, ('clips', ...), get_all=False)
+ if not clip:
+ raise ExtractorError(
+ 'Could not find video information.', video_id=video_id)
+
+ title = clip['contentTitle']
+
+ source_width = int_or_none(clip.get('sourceWidth'))
+ source_height = int_or_none(clip.get('sourceHeight'))
+
+ aspect_ratio = source_width / source_height if source_width and source_height else 16 / 9
+
+ def add_item(container, item_url, height, id_key='format_id', item_id=None):
+ item_id = item_id or '%dp' % height
+ if item_id not in item_url:
+ return
+ width = int(round(aspect_ratio * height))
+ container.append({
+ 'url': item_url,
+ id_key: item_id,
+ 'width': width,
+ 'height': height
+ })
+
+ formats = []
+ thumbnails = []
+ for k, v in clip.items():
+ if not (v and isinstance(v, compat_str)):
+ continue
+ mobj = re.match(r'(contentUrl|thumbnail)(?:(\d+)p)?$', k)
+ if not mobj:
+ continue
+ prefix = mobj.group(1)
+ height = int_or_none(mobj.group(2))
+ if prefix == 'contentUrl':
+ add_item(
+ formats, v, height or source_height,
+ item_id=None if height else 'source')
+ elif prefix == 'thumbnail':
+ add_item(thumbnails, v, height, 'id')
+
+ error = clip.get('error')
+ if not formats and error:
+ if error == 404:
+ self.raise_no_formats(
+ 'That clip does not exist.',
+ expected=True, video_id=video_id)
+ else:
+ self.raise_no_formats(
+ 'An unknown error occurred ({0}).'.format(error),
+ video_id=video_id)
+
+ # Necessary because the id of the author is not known in advance.
+ # Won't raise an issue if no profile can be found as this is optional.
+ author = traverse_obj(hydration_data, ('profiles', ...), get_all=False) or {}
+ author_id = str_or_none(author.get('userId'))
+ author_url = format_field(author_id, None, 'https://medal.tv/users/%s')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'description': clip.get('contentDescription'),
+ 'uploader': author.get('displayName'),
+ 'timestamp': float_or_none(clip.get('created'), 1000),
+ 'uploader_id': author_id,
+ 'uploader_url': author_url,
+ 'duration': int_or_none(clip.get('videoLengthSeconds')),
+ 'view_count': int_or_none(clip.get('views')),
+ 'like_count': int_or_none(clip.get('likes')),
+ 'comment_count': int_or_none(clip.get('comments')),
+ }
diff --git a/yt_dlp/extractor/mediaite.py b/yt_dlp/extractor/mediaite.py
new file mode 100644
index 0000000..32887cb
--- /dev/null
+++ b/yt_dlp/extractor/mediaite.py
@@ -0,0 +1,104 @@
+from .common import InfoExtractor
+
+
+class MediaiteIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?mediaite\.com(?!/category)(?:/[\w-]+){2}'
+ _TESTS = [{
+ 'url': 'https://www.mediaite.com/sports/bill-burr-roasts-nfl-for-promoting-black-lives-matter-while-scheduling-more-games-after-all-the-sht-they-know-about-cte/',
+ 'info_dict': {
+ 'id': 'vPHKITzy',
+ 'ext': 'm4a',
+ 'title': 'Bill Burr On NFL And Black Lives Matter',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/vPHKITzy/poster.jpg?width=720',
+ 'duration': 55,
+ 'timestamp': 1631630185,
+ 'upload_date': '20210914',
+ },
+ 'params': {'skip_download': True}
+ }, {
+ 'url': 'https://www.mediaite.com/tv/joe-scarborough-goes-off-on-tax-breaks-for-super-wealthy-largest-income-redistribution-scam-in-american-history/',
+ 'info_dict': {
+ 'id': 'eeFcK4Xm',
+ 'ext': 'mp4',
+ 'title': 'Morning Joe-6_16_52 am - 6_21_10 am-2021-09-14.mp4',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/eeFcK4Xm/poster.jpg?width=720',
+ 'duration': 258,
+ 'timestamp': 1631618057,
+ 'upload_date': '20210914',
+ },
+ 'params': {'skip_download': True}
+ }, {
+ 'url': 'https://www.mediaite.com/politics/watch-rudy-giuliani-impersonates-queen-elizabeth-calls-mark-milley-an-asshle-in-bizarre-9-11-speech/',
+ 'info_dict': {
+ 'id': 'EiyiXKcr',
+ 'ext': 'mp4',
+ 'title': 'Giuliani 1',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/EiyiXKcr/poster.jpg?width=720',
+ 'duration': 39,
+ 'timestamp': 1631536476,
+ 'upload_date': '20210913',
+ },
+ 'params': {'skip_download': True}
+ }, {
+ 'url': 'https://www.mediaite.com/podcasts/clarissa-ward-says-she-decided-to-become-a-journalist-on-9-11/',
+ 'info_dict': {
+ 'id': 'TxavoRTx',
+ 'ext': 'mp4',
+ 'title': 'clarissa-ward-3.mp4',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/TxavoRTx/poster.jpg?width=720',
+ 'duration': 83,
+ 'timestamp': 1631311188,
+ 'upload_date': '20210910',
+ },
+ 'params': {'skip_download': True}
+ }, {
+ 'url': 'https://www.mediaite.com/opinion/mainstream-media-ignores-rose-mcgowans-bombshell-allegation-that-newsoms-wife-tried-to-silence-her-on-weinstein/',
+ 'info_dict': {
+ 'id': 'sEIWvKR7',
+ 'ext': 'mp4',
+ 'title': 'KTTV_09-13-2021_05.34.21',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/sEIWvKR7/poster.jpg?width=720',
+ 'duration': 52,
+ 'timestamp': 1631553328,
+ 'upload_date': '20210913',
+ },
+ 'params': {'skip_download': True}
+ }, {
+ 'url': 'https://www.mediaite.com/news/watch-cnbcs-jim-cramer-says-nobody-wants-to-die-getting-infected-by-unvaccinated-coworker-even-for-22-an-hour/',
+ 'info_dict': {
+ 'id': 'nwpt1elX',
+ 'ext': 'mp4',
+ 'title': "CNBC's Jim Cramer Says Nobody Wants to Die Getting Infected by Unvaccinated Coworker 'Even for $22 an Hour'.mp4",
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/nwpt1elX/poster.jpg?width=720',
+ 'duration': 60,
+ 'timestamp': 1633014214,
+ 'upload_date': '20210930',
+ },
+ 'params': {'skip_download': True}
+ }, {
+ 'url': 'https://www.mediaite.com/politics/i-cant-read-it-fast-enough-while-defending-trump-larry-kudlow-overwhelmed-by-volume-of-ex-presidents-legal-troubles/',
+ 'info_dict': {
+ 'id': 'E6EhDX5z',
+ 'ext': 'mp4',
+ 'title': 'Fox Business Network - 4:00 PM - 5:00 PM - 1:39:42 pm - 1:42:20 pm',
+ 'description': '',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/E6EhDX5z/poster.jpg?width=720',
+ 'duration': 157,
+ 'timestamp': 1691015535,
+ 'upload_date': '20230802',
+ },
+ 'params': {'skip_download': True}
+ }]
+
+ def _real_extract(self, url):
+ webpage = self._download_webpage(url, None)
+ video_id = self._search_regex(
+ [r'"https://cdn\.jwplayer\.com/players/(\w+)', r'data-video-id\s*=\s*\"([^\"]+)\"'], webpage, 'id')
+ data_json = self._download_json(f'https://cdn.jwplayer.com/v2/media/{video_id}', video_id)
+ return self._parse_jwplayer_data(data_json)
diff --git a/yt_dlp/extractor/mediaklikk.py b/yt_dlp/extractor/mediaklikk.py
new file mode 100644
index 0000000..fcc4827
--- /dev/null
+++ b/yt_dlp/extractor/mediaklikk.py
@@ -0,0 +1,156 @@
+from ..utils import (
+ ExtractorError,
+ traverse_obj,
+ unified_strdate,
+ url_or_none,
+)
+from .common import InfoExtractor
+from ..compat import (
+ compat_urllib_parse_unquote,
+ compat_str
+)
+
+
+class MediaKlikkIE(InfoExtractor):
+ _VALID_URL = r'''(?x)https?://(?:www\.)?
+ (?:mediaklikk|m4sport|hirado|petofilive)\.hu/.*?(?:videok?|cikk)/
+ (?:(?P<year>[0-9]{4})/(?P<month>[0-9]{1,2})/(?P<day>[0-9]{1,2})/)?
+ (?P<id>[^/#?_]+)'''
+
+ _TESTS = [{
+ # (old) mediaklikk. date in html.
+ 'url': 'https://mediaklikk.hu/video/hazajaro-delnyugat-bacska-a-duna-menten-palankatol-doroszloig/',
+ 'info_dict': {
+ 'id': '4754129',
+ 'title': 'Hazajáró, DÉLNYUGAT-BÁCSKA – A Duna mentén Palánkától Doroszlóig',
+ 'ext': 'mp4',
+ 'upload_date': '20210901',
+ 'thumbnail': 'http://mediaklikk.hu/wp-content/uploads/sites/4/2014/02/hazajarouj_JO.jpg'
+ },
+ 'skip': 'Webpage redirects to 404 page',
+ }, {
+ # mediaklikk. date in html.
+ 'url': 'https://mediaklikk.hu/video/hazajaro-fabova-hegyseg-kishont-koronaja/',
+ 'info_dict': {
+ 'id': '6696133',
+ 'title': 'Hazajáró, Fabova-hegység - Kishont koronája',
+ 'display_id': 'hazajaro-fabova-hegyseg-kishont-koronaja',
+ 'ext': 'mp4',
+ 'upload_date': '20230903',
+ 'thumbnail': 'https://mediaklikk.hu/wp-content/uploads/sites/4/2014/02/hazajarouj_JO.jpg'
+ }
+ }, {
+ # (old) m4sport
+ 'url': 'https://m4sport.hu/video/2021/08/30/gyemant-liga-parizs/',
+ 'info_dict': {
+ 'id': '4754999',
+ 'title': 'Gyémánt Liga, Párizs',
+ 'ext': 'mp4',
+ 'upload_date': '20210830',
+ 'thumbnail': 'http://m4sport.hu/wp-content/uploads/sites/4/2021/08/vlcsnap-2021-08-30-18h21m20s10-1024x576.jpg'
+ },
+ 'skip': 'Webpage redirects to 404 page',
+ }, {
+ # m4sport
+ 'url': 'https://m4sport.hu/sportkozvetitesek/video/2023/09/08/atletika-gyemant-liga-brusszel/',
+ 'info_dict': {
+ 'id': '6711136',
+ 'title': 'Atlétika – Gyémánt Liga, Brüsszel',
+ 'display_id': 'atletika-gyemant-liga-brusszel',
+ 'ext': 'mp4',
+ 'upload_date': '20230908',
+ 'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-08-22h43m18s691.jpg'
+ }
+ }, {
+ # m4sport with *video/ url and no date
+ 'url': 'https://m4sport.hu/bl-video/real-madrid-chelsea-1-1/',
+ 'info_dict': {
+ 'id': '4492099',
+ 'title': 'Real Madrid - Chelsea 1-1',
+ 'display_id': 'real-madrid-chelsea-1-1',
+ 'ext': 'mp4',
+ 'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2021/04/Sequence-01.Still001-1024x576.png'
+ }
+ }, {
+ # (old) hirado
+ 'url': 'https://hirado.hu/videok/felteteleket-szabott-a-fovaros/',
+ 'info_dict': {
+ 'id': '4760120',
+ 'title': 'Feltételeket szabott a főváros',
+ 'ext': 'mp4',
+ 'thumbnail': 'http://hirado.hu/wp-content/uploads/sites/4/2021/09/vlcsnap-2021-09-01-20h20m37s165.jpg'
+ },
+ 'skip': 'Webpage redirects to video list page',
+ }, {
+ # hirado
+ 'url': 'https://hirado.hu/belfold/video/2023/09/11/marad-az-eves-elszamolas-a-napelemekre-beruhazo-csaladoknal',
+ 'info_dict': {
+ 'id': '6716068',
+ 'title': 'Marad az éves elszámolás a napelemekre beruházó családoknál',
+ 'display_id': 'marad-az-eves-elszamolas-a-napelemekre-beruhazo-csaladoknal',
+ 'ext': 'mp4',
+ 'upload_date': '20230911',
+ 'thumbnail': 'https://hirado.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-11-09h16m09s882.jpg'
+ }
+ }, {
+ # (old) petofilive
+ 'url': 'https://petofilive.hu/video/2021/06/07/tha-shudras-az-akusztikban/',
+ 'info_dict': {
+ 'id': '4571948',
+ 'title': 'Tha Shudras az Akusztikban',
+ 'ext': 'mp4',
+ 'upload_date': '20210607',
+ 'thumbnail': 'http://petofilive.hu/wp-content/uploads/sites/4/2021/06/vlcsnap-2021-06-07-22h14m23s915-1024x576.jpg'
+ },
+ 'skip': 'Webpage redirects to empty page',
+ }, {
+ # petofilive
+ 'url': 'https://petofilive.hu/video/2023/09/09/futball-fesztival-a-margitszigeten/',
+ 'info_dict': {
+ 'id': '6713233',
+ 'title': 'Futball Fesztivál a Margitszigeten',
+ 'display_id': 'futball-fesztival-a-margitszigeten',
+ 'ext': 'mp4',
+ 'upload_date': '20230909',
+ 'thumbnail': 'https://petofilive.hu/wp-content/uploads/sites/4/2023/09/Clipboard11-2.jpg'
+ }
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ display_id = mobj.group('id')
+ webpage = self._download_webpage(url, display_id)
+
+ player_data_str = self._html_search_regex(
+ r'mtva_player_manager\.player\(document.getElementById\(.*\),\s?(\{.*\}).*\);', webpage, 'player data')
+ player_data = self._parse_json(player_data_str, display_id, compat_urllib_parse_unquote)
+ video_id = compat_str(player_data['contentId'])
+ title = player_data.get('title') or self._og_search_title(webpage, fatal=False) or \
+ self._html_search_regex(r'<h\d+\b[^>]+\bclass="article_title">([^<]+)<', webpage, 'title')
+
+ upload_date = unified_strdate(
+ '%s-%s-%s' % (mobj.group('year'), mobj.group('month'), mobj.group('day')))
+ if not upload_date:
+ upload_date = unified_strdate(self._html_search_regex(
+ r'<p+\b[^>]+\bclass="article_date">([^<]+)<', webpage, 'upload date', default=None))
+
+ player_data['video'] = player_data.pop('token')
+ player_page = self._download_webpage('https://player.mediaklikk.hu/playernew/player.php', video_id, query=player_data)
+ player_json = self._search_json(
+ r'\bpl\.setup\s*\(', player_page, 'player json', video_id, end_pattern=r'\);')
+ playlist_url = traverse_obj(
+ player_json, ('playlist', lambda _, v: v['type'] == 'hls', 'file', {url_or_none}), get_all=False)
+ if not playlist_url:
+ raise ExtractorError('Unable to extract playlist url')
+
+ formats = self._extract_wowza_formats(
+ playlist_url, video_id, skip_protocols=['f4m', 'smil', 'dash'])
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'display_id': display_id,
+ 'formats': formats,
+ 'upload_date': upload_date,
+ 'thumbnail': player_data.get('bgImage') or self._og_search_thumbnail(webpage)
+ }
diff --git a/yt_dlp/extractor/medialaan.py b/yt_dlp/extractor/medialaan.py
new file mode 100644
index 0000000..bce20dc
--- /dev/null
+++ b/yt_dlp/extractor/medialaan.py
@@ -0,0 +1,111 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ extract_attributes,
+ int_or_none,
+ mimetype2ext,
+ parse_iso8601,
+)
+
+
+class MedialaanIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:embed\.)?mychannels.video/embed/|
+ embed\.mychannels\.video/(?:s(?:dk|cript)/)?production/|
+ (?:www\.)?(?:
+ (?:
+ 7sur7|
+ demorgen|
+ hln|
+ joe|
+ qmusic
+ )\.be|
+ (?:
+ [abe]d|
+ bndestem|
+ destentor|
+ gelderlander|
+ pzc|
+ tubantia|
+ volkskrant
+ )\.nl
+ )/video/(?:[^/]+/)*[^/?&#]+~p
+ )
+ (?P<id>\d+)
+ '''
+ _TESTS = [{
+ 'url': 'https://www.bndestem.nl/video/de-terugkeer-van-ally-de-aap-en-wie-vertrekt-er-nog-bij-nac~p193993',
+ 'info_dict': {
+ 'id': '193993',
+ 'ext': 'mp4',
+ 'title': 'De terugkeer van Ally de Aap en wie vertrekt er nog bij NAC?',
+ 'timestamp': 1611663540,
+ 'upload_date': '20210126',
+ 'duration': 238,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.gelderlander.nl/video/kanalen/degelderlander~c320/series/snel-nieuws~s984/noodbevel-in-doetinchem-politie-stuurt-mensen-centrum-uit~p194093',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://embed.mychannels.video/sdk/production/193993?options=TFTFF_default',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://embed.mychannels.video/script/production/193993',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://embed.mychannels.video/production/193993',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://mychannels.video/embed/193993',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://embed.mychannels.video/embed/193993',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ entries = []
+ for element in re.findall(r'(<div[^>]+data-mychannels-type="video"[^>]*>)', webpage):
+ mychannels_id = extract_attributes(element).get('data-mychannels-id')
+ if mychannels_id:
+ entries.append('https://mychannels.video/embed/' + mychannels_id)
+ return entries
+
+ def _real_extract(self, url):
+ production_id = self._match_id(url)
+ production = self._download_json(
+ 'https://embed.mychannels.video/sdk/production/' + production_id,
+ production_id, query={'options': 'UUUU_default'})['productions'][0]
+ title = production['title']
+
+ formats = []
+ for source in (production.get('sources') or []):
+ src = source.get('src')
+ if not src:
+ continue
+ ext = mimetype2ext(source.get('type'))
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ src, production_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ formats.append({
+ 'ext': ext,
+ 'url': src,
+ })
+
+ return {
+ 'id': production_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': production.get('posterUrl'),
+ 'timestamp': parse_iso8601(production.get('publicationDate'), ' '),
+ 'duration': int_or_none(production.get('duration')) or None,
+ }
diff --git a/yt_dlp/extractor/mediaset.py b/yt_dlp/extractor/mediaset.py
new file mode 100644
index 0000000..e04a1ce
--- /dev/null
+++ b/yt_dlp/extractor/mediaset.py
@@ -0,0 +1,320 @@
+import functools
+import re
+
+from .theplatform import ThePlatformBaseIE
+from ..utils import (
+ ExtractorError,
+ GeoRestrictedError,
+ int_or_none,
+ OnDemandPagedList,
+ try_get,
+ urljoin,
+ update_url_query,
+)
+
+
+class MediasetIE(ThePlatformBaseIE):
+ _TP_TLD = 'eu'
+ _GUID_RE = r'F[0-9A-Z]{15}'
+ _VALID_URL = rf'''(?x)
+ (?:
+ mediaset:|
+ https?://
+ (?:\w+\.)+mediaset\.it/
+ (?:
+ (?:video|on-demand|movie)/(?:[^/]+/)+[^/]+_|
+ player/(?:v\d+/)?index\.html\?\S*?\bprogramGuid=
+ )
+ )(?P<id>{_GUID_RE})
+ '''
+
+ _EMBED_REGEX = [
+ rf'<iframe[^>]+src=[\'"](?P<url>(?:https?:)?//(?:\w+\.)+mediaset\.it/player/(?:v\d+/)?index\.html\?\S*?programGuid={_GUID_RE})[\'"&]'
+ ]
+ _TESTS = [{
+ # full episode
+ 'url': 'https://mediasetinfinity.mediaset.it/video/mrwronglezionidamore/episodio-1_F310575103000102',
+ 'md5': 'a7e75c6384871f322adb781d3bd72c26',
+ 'info_dict': {
+ 'id': 'F310575103000102',
+ 'ext': 'mp4',
+ 'title': 'Episodio 1',
+ 'description': 'md5:e8017b7d7194e9bfb75299c2b8d81e02',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 2682.0,
+ 'upload_date': '20210530',
+ 'series': 'Mr Wrong - Lezioni d\'amore',
+ 'timestamp': 1622413946,
+ 'uploader': 'Canale 5',
+ 'uploader_id': 'C5',
+ 'season': 'Season 1',
+ 'episode': 'Episode 1',
+ 'season_number': 1,
+ 'episode_number': 1,
+ 'chapters': [{'start_time': 0.0, 'end_time': 439.88}, {'start_time': 439.88, 'end_time': 1685.84}, {'start_time': 1685.84, 'end_time': 2682.0}],
+ },
+ }, {
+ 'url': 'https://mediasetinfinity.mediaset.it/video/matrix/puntata-del-25-maggio_F309013801000501',
+ 'md5': '1276f966ac423d16ba255ce867de073e',
+ 'info_dict': {
+ 'id': 'F309013801000501',
+ 'ext': 'mp4',
+ 'title': 'Puntata del 25 maggio',
+ 'description': 'md5:ee2e456e3eb1dba5e814596655bb5296',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 6565.008,
+ 'upload_date': '20200903',
+ 'series': 'Matrix',
+ 'timestamp': 1599172492,
+ 'uploader': 'Canale 5',
+ 'uploader_id': 'C5',
+ 'season': 'Season 5',
+ 'episode': 'Episode 5',
+ 'season_number': 5,
+ 'episode_number': 5,
+ 'chapters': [{'start_time': 0.0, 'end_time': 3409.08}, {'start_time': 3409.08, 'end_time': 6565.008}],
+ 'categories': ['Informazione'],
+ },
+ }, {
+ # DRM
+ 'url': 'https://mediasetinfinity.mediaset.it/movie/selvaggi/selvaggi_F006474501000101',
+ 'info_dict': {
+ 'id': 'F006474501000101',
+ 'ext': 'mp4',
+ 'title': 'Selvaggi',
+ 'description': 'md5:cfdedbbfdd12d4d0e5dcf1fa1b75284f',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 5233.01,
+ 'upload_date': '20210729',
+ 'timestamp': 1627594716,
+ 'uploader': 'Cine34',
+ 'uploader_id': 'B6',
+ 'chapters': [{'start_time': 0.0, 'end_time': 1938.56}, {'start_time': 1938.56, 'end_time': 5233.01}],
+ },
+ 'params': {
+ 'ignore_no_formats_error': True,
+ },
+ 'expected_warnings': [
+ 'None of the available releases match the specified AssetType, ProtectionScheme, and/or Format preferences',
+ 'Content behind paywall and DRM',
+ ],
+ 'skip': True,
+ }, {
+ # old domain
+ 'url': 'https://www.mediasetplay.mediaset.it/video/mrwronglezionidamore/episodio-1_F310575103000102',
+ 'only_matching': True,
+ }, {
+ # iframe
+ 'url': 'https://static3.mediasetplay.mediaset.it/player/index.html?appKey=5ad3966b1de1c4000d5cec48&programGuid=FAFU000000665924&id=665924',
+ 'only_matching': True,
+ }, {
+ 'url': 'mediaset:FAFU000000665924',
+ 'only_matching': True,
+ }]
+ _WEBPAGE_TESTS = [{
+ # Mediaset embed
+ 'url': 'http://www.tgcom24.mediaset.it/politica/serracchiani-voglio-vivere-in-una-societa-aperta-reazioni-sproporzionate-_3071354-201702a.shtml',
+ 'info_dict': {
+ 'id': 'FD00000000004929',
+ 'ext': 'mp4',
+ 'title': 'Serracchiani: "Voglio vivere in una società aperta, con tutela del patto di fiducia"',
+ 'duration': 67.013,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'Mediaset Play',
+ 'uploader_id': 'QY',
+ 'upload_date': '20201005',
+ 'timestamp': 1601866168,
+ 'chapters': [],
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'Dead link',
+ }, {
+ # WittyTV embed
+ 'url': 'https://www.wittytv.it/mauriziocostanzoshow/ultima-puntata-venerdi-25-novembre/',
+ 'info_dict': {
+ 'id': 'F312172801000801',
+ 'ext': 'mp4',
+ 'title': 'Ultima puntata - Venerdì 25 novembre',
+ 'description': 'Una serata all\'insegna della musica e del buonumore ma non priva di spunti di riflessione',
+ 'duration': 6203.01,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'Canale 5',
+ 'uploader_id': 'C5',
+ 'upload_date': '20221126',
+ 'timestamp': 1669428689,
+ 'chapters': list,
+ 'series': 'Maurizio Costanzo Show',
+ 'season': 'Season 12',
+ 'season_number': 12,
+ 'episode': 'Episode 8',
+ 'episode_number': 8,
+ 'categories': ['Intrattenimento'],
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }]
+
+ def _parse_smil_formats_and_subtitles(
+ self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
+ for video in smil.findall(self._xpath_ns('.//video', namespace)):
+ video.attrib['src'] = re.sub(r'(https?://vod05)t(-mediaset-it\.akamaized\.net/.+?.mpd)\?.+', r'\1\2', video.attrib['src'])
+ return super(MediasetIE, self)._parse_smil_formats_and_subtitles(
+ smil, smil_url, video_id, namespace, f4m_params, transform_rtmp_url)
+
+ def _check_drm_formats(self, tp_formats, video_id):
+ has_nondrm, drm_manifest = False, ''
+ for f in tp_formats:
+ if '_sampleaes/' in (f.get('manifest_url') or ''):
+ drm_manifest = drm_manifest or f['manifest_url']
+ f['has_drm'] = True
+ if not f.get('has_drm') and f.get('manifest_url'):
+ has_nondrm = True
+
+ nodrm_manifest = re.sub(r'_sampleaes/(\w+)_fp_', r'/\1_no_', drm_manifest)
+ if has_nondrm or nodrm_manifest == drm_manifest:
+ return
+
+ tp_formats.extend(self._extract_m3u8_formats(
+ nodrm_manifest, video_id, m3u8_id='hls', fatal=False) or [])
+
+ def _real_extract(self, url):
+ guid = self._match_id(url)
+ tp_path = f'PR1GhC/media/guid/2702976343/{guid}'
+ info = self._extract_theplatform_metadata(tp_path, guid)
+
+ formats = []
+ subtitles = {}
+ first_e = geo_e = None
+ asset_type = 'geoNo:HD,browser,geoIT|geoNo:HD,geoIT|geoNo:SD,browser,geoIT|geoNo:SD,geoIT|geoNo|HD|SD'
+ # TODO: fixup ISM+none manifest URLs
+ for f in ('MPEG4', 'MPEG-DASH', 'M3U'):
+ try:
+ tp_formats, tp_subtitles = self._extract_theplatform_smil(
+ update_url_query(f'http://link.theplatform.{self._TP_TLD}/s/{tp_path}', {
+ 'mbr': 'true',
+ 'formats': f,
+ 'assetTypes': asset_type,
+ }), guid, f'Downloading {f.split("+")[0]} SMIL data')
+ except ExtractorError as e:
+ if e.orig_msg == 'None of the available releases match the specified AssetType, ProtectionScheme, and/or Format preferences':
+ e.orig_msg = 'This video is DRM protected'
+ if not geo_e and isinstance(e, GeoRestrictedError):
+ geo_e = e
+ if not first_e:
+ first_e = e
+ continue
+ self._check_drm_formats(tp_formats, guid)
+ formats.extend(tp_formats)
+ subtitles = self._merge_subtitles(subtitles, tp_subtitles)
+
+ # check for errors and report them
+ if (first_e or geo_e) and not formats:
+ raise geo_e or first_e
+
+ feed_data = self._download_json(
+ f'https://feed.entertainment.tv.theplatform.eu/f/PR1GhC/mediaset-prod-all-programs-v2/guid/-/{guid}',
+ guid, fatal=False)
+ if feed_data:
+ publish_info = feed_data.get('mediasetprogram$publishInfo') or {}
+ thumbnails = feed_data.get('thumbnails') or {}
+ thumbnail = None
+ for key, value in thumbnails.items():
+ if key.startswith('image_keyframe_poster-'):
+ thumbnail = value.get('url')
+ break
+
+ info.update({
+ 'description': info.get('description') or feed_data.get('description') or feed_data.get('longDescription'),
+ 'uploader': publish_info.get('description'),
+ 'uploader_id': publish_info.get('channel'),
+ 'view_count': int_or_none(feed_data.get('mediasetprogram$numberOfViews')),
+ 'thumbnail': thumbnail,
+ })
+
+ if feed_data.get('programType') == 'episode':
+ info.update({
+ 'episode_number': int_or_none(
+ feed_data.get('tvSeasonEpisodeNumber')),
+ 'season_number': int_or_none(
+ feed_data.get('tvSeasonNumber')),
+ 'series': feed_data.get('mediasetprogram$brandTitle'),
+ })
+
+ info.update({
+ 'id': guid,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ })
+ return info
+
+
+class MediasetShowIE(MediasetIE): # XXX: Do not subclass from concrete IE
+ _VALID_URL = r'''(?x)
+ (?:
+ https?://
+ (\w+\.)+mediaset\.it/
+ (?:
+ (?:fiction|programmi-tv|serie-tv|kids)/(?:.+?/)?
+ (?:[a-z-]+)_SE(?P<id>\d{12})
+ (?:,ST(?P<st>\d{12}))?
+ (?:,sb(?P<sb>\d{9}))?$
+ )
+ )
+ '''
+ _TESTS = [{
+ # TV Show webpage (general webpage)
+ 'url': 'https://mediasetinfinity.mediaset.it/programmi-tv/leiene/leiene_SE000000000061',
+ 'info_dict': {
+ 'id': '000000000061',
+ 'title': 'Le Iene 2022/2023',
+ },
+ 'playlist_mincount': 6,
+ }, {
+ # TV Show webpage (specific season)
+ 'url': 'https://mediasetinfinity.mediaset.it/programmi-tv/leiene/leiene_SE000000000061,ST000000002763',
+ 'info_dict': {
+ 'id': '000000002763',
+ 'title': 'Le Iene 2021/2022',
+ },
+ 'playlist_mincount': 7,
+ }, {
+ # TV Show specific playlist (with multiple pages)
+ 'url': 'https://mediasetinfinity.mediaset.it/programmi-tv/leiene/iservizi_SE000000000061,ST000000002763,sb100013375',
+ 'info_dict': {
+ 'id': '100013375',
+ 'title': 'I servizi',
+ },
+ 'playlist_mincount': 50,
+ }]
+
+ _BY_SUBBRAND = 'https://feed.entertainment.tv.theplatform.eu/f/PR1GhC/mediaset-prod-all-programs-v2?byCustomValue={subBrandId}{%s}&sort=:publishInfo_lastPublished|desc,tvSeasonEpisodeNumber|desc&range=%d-%d'
+ _PAGE_SIZE = 25
+
+ def _fetch_page(self, sb, page):
+ lower_limit = page * self._PAGE_SIZE + 1
+ upper_limit = lower_limit + self._PAGE_SIZE - 1
+ content = self._download_json(
+ self._BY_SUBBRAND % (sb, lower_limit, upper_limit), sb)
+ for entry in content.get('entries') or []:
+ yield self.url_result(
+ 'mediaset:' + entry['guid'],
+ playlist_title=entry['mediasetprogram$subBrandDescription'])
+
+ def _real_extract(self, url):
+ playlist_id, st, sb = self._match_valid_url(url).group('id', 'st', 'sb')
+ if not sb:
+ page = self._download_webpage(url, st or playlist_id)
+ entries = [self.url_result(urljoin('https://mediasetinfinity.mediaset.it', url))
+ for url in re.findall(r'href="([^<>=]+SE\d{12},ST\d{12},sb\d{9})">[^<]+<', page)]
+ title = self._html_extract_title(page).split('|')[0].strip()
+ return self.playlist_result(entries, st or playlist_id, title)
+
+ entries = OnDemandPagedList(
+ functools.partial(self._fetch_page, sb),
+ self._PAGE_SIZE)
+ title = try_get(entries, lambda x: x[0]['playlist_title'])
+
+ return self.playlist_result(entries, sb, title)
diff --git a/yt_dlp/extractor/mediasite.py b/yt_dlp/extractor/mediasite.py
new file mode 100644
index 0000000..7ea78ab
--- /dev/null
+++ b/yt_dlp/extractor/mediasite.py
@@ -0,0 +1,411 @@
+import re
+import json
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urlparse,
+)
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ mimetype2ext,
+ str_or_none,
+ try_call,
+ try_get,
+ smuggle_url,
+ unsmuggle_url,
+ url_or_none,
+ urljoin,
+)
+
+
+_ID_RE = r'(?:[0-9a-f]{32,34}|[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12,14})'
+
+
+class MediasiteIE(InfoExtractor):
+ _VALID_URL = r'(?xi)https?://[^/]+/Mediasite/(?:Play|Showcase/[^/#?]+/Presentation)/(?P<id>%s)(?P<query>\?[^#]+|)' % _ID_RE
+ _EMBED_REGEX = [r'(?xi)<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:(?:https?:)?//[^/]+)?/Mediasite/Play/%s(?:\?.*?)?)\1' % _ID_RE]
+ _TESTS = [
+ {
+ 'url': 'https://hitsmediaweb.h-its.org/mediasite/Play/2db6c271681e4f199af3c60d1f82869b1d',
+ 'info_dict': {
+ 'id': '2db6c271681e4f199af3c60d1f82869b1d',
+ 'ext': 'mp4',
+ 'title': 'Lecture: Tuesday, September 20, 2016 - Sir Andrew Wiles',
+ 'description': 'Sir Andrew Wiles: “Equations in arithmetic”\\n\\nI will describe some of the interactions between modern number theory and the problem of solving equations in rational numbers or integers\\u0027.',
+ 'timestamp': 1474268400.0,
+ 'upload_date': '20160919',
+ },
+ },
+ {
+ 'url': 'http://mediasite.uib.no/Mediasite/Play/90bb363295d945d6b548c867d01181361d?catalog=a452b7df-9ae1-46b7-a3ba-aceeb285f3eb',
+ 'info_dict': {
+ 'id': '90bb363295d945d6b548c867d01181361d',
+ 'ext': 'mp4',
+ 'upload_date': '20150429',
+ 'title': '5) IT-forum 2015-Dag 1 - Dungbeetle - How and why Rain created a tiny bug tracker for Unity',
+ 'timestamp': 1430311380.0,
+ },
+ },
+ {
+ 'url': 'https://collegerama.tudelft.nl/Mediasite/Play/585a43626e544bdd97aeb71a0ec907a01d',
+ 'md5': '481fda1c11f67588c0d9d8fbdced4e39',
+ 'info_dict': {
+ 'id': '585a43626e544bdd97aeb71a0ec907a01d',
+ 'ext': 'mp4',
+ 'title': 'Een nieuwe wereld: waarden, bewustzijn en techniek van de mensheid 2.0.',
+ 'description': '',
+ 'thumbnail': r're:^https?://.*\.jpg(?:\?.*)?$',
+ 'duration': 7713.088,
+ 'timestamp': 1413309600,
+ 'upload_date': '20141014',
+ },
+ },
+ {
+ 'url': 'https://collegerama.tudelft.nl/Mediasite/Play/86a9ea9f53e149079fbdb4202b521ed21d?catalog=fd32fd35-6c99-466c-89d4-cd3c431bc8a4',
+ 'md5': 'ef1fdded95bdf19b12c5999949419c92',
+ 'info_dict': {
+ 'id': '86a9ea9f53e149079fbdb4202b521ed21d',
+ 'ext': 'wmv',
+ 'title': '64ste Vakantiecursus: Afvalwater',
+ 'description': 'md5:7fd774865cc69d972f542b157c328305',
+ 'thumbnail': r're:^https?://.*\.jpg(?:\?.*?)?$',
+ 'duration': 10853,
+ 'timestamp': 1326446400,
+ 'upload_date': '20120113',
+ },
+ },
+ {
+ 'url': 'http://digitalops.sandia.gov/Mediasite/Play/24aace4429fc450fb5b38cdbf424a66e1d',
+ 'md5': '9422edc9b9a60151727e4b6d8bef393d',
+ 'info_dict': {
+ 'id': '24aace4429fc450fb5b38cdbf424a66e1d',
+ 'ext': 'mp4',
+ 'title': 'Xyce Software Training - Section 1',
+ 'description': r're:(?s)SAND Number: SAND 2013-7800.{200,}',
+ 'upload_date': '20120409',
+ 'timestamp': 1333983600,
+ 'duration': 7794,
+ }
+ },
+ {
+ 'url': 'https://collegerama.tudelft.nl/Mediasite/Showcase/livebroadcast/Presentation/ada7020854f743c49fbb45c9ec7dbb351d',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://mediasite.ntnu.no/Mediasite/Showcase/default/Presentation/7d8b913259334b688986e970fae6fcb31d',
+ 'only_matching': True,
+ },
+ {
+ # dashed id
+ 'url': 'https://hitsmediaweb.h-its.org/mediasite/Play/2db6c271-681e-4f19-9af3-c60d1f82869b1d',
+ 'only_matching': True,
+ }
+ ]
+
+ # look in Mediasite.Core.js (Mediasite.ContentStreamType[*])
+ _STREAM_TYPES = {
+ 0: 'video1', # the main video
+ 2: 'slide',
+ 3: 'presentation',
+ 4: 'video2', # screencast?
+ 5: 'video3',
+ }
+
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ for embed_url in super()._extract_embed_urls(url, webpage):
+ yield smuggle_url(embed_url, {'UrlReferrer': url})
+
+ def __extract_slides(self, *, stream_id, snum, Stream, duration, images):
+ slide_base_url = Stream['SlideBaseUrl']
+
+ fname_template = Stream['SlideImageFileNameTemplate']
+ if fname_template != 'slide_{0:D4}.jpg':
+ self.report_warning('Unusual slide file name template; report a bug if slide downloading fails')
+ fname_template = re.sub(r'\{0:D([0-9]+)\}', r'{0:0\1}', fname_template)
+
+ fragments = []
+ for i, slide in enumerate(Stream['Slides']):
+ if i == 0:
+ if slide['Time'] > 0:
+ default_slide = images.get('DefaultSlide')
+ if default_slide is None:
+ default_slide = images.get('DefaultStreamImage')
+ if default_slide is not None:
+ default_slide = default_slide['ImageFilename']
+ if default_slide is not None:
+ fragments.append({
+ 'path': default_slide,
+ 'duration': slide['Time'] / 1000,
+ })
+
+ next_time = try_call(
+ lambda: Stream['Slides'][i + 1]['Time'],
+ lambda: duration,
+ lambda: slide['Time'],
+ expected_type=(int, float))
+
+ fragments.append({
+ 'path': fname_template.format(slide.get('Number', i + 1)),
+ 'duration': (next_time - slide['Time']) / 1000
+ })
+
+ return {
+ 'format_id': '%s-%u.slides' % (stream_id, snum),
+ 'ext': 'mhtml',
+ 'url': slide_base_url,
+ 'protocol': 'mhtml',
+ 'acodec': 'none',
+ 'vcodec': 'none',
+ 'format_note': 'Slides',
+ 'fragments': fragments,
+ 'fragment_base_url': slide_base_url,
+ }
+
+ def _real_extract(self, url):
+ url, data = unsmuggle_url(url, {})
+ mobj = self._match_valid_url(url)
+ resource_id = mobj.group('id')
+ query = mobj.group('query')
+
+ webpage, urlh = self._download_webpage_handle(url, resource_id) # XXX: add UrlReferrer?
+ redirect_url = urlh.url
+
+ # XXX: might have also extracted UrlReferrer and QueryString from the html
+ service_path = compat_urlparse.urljoin(redirect_url, self._html_search_regex(
+ r'<div[^>]+\bid=["\']ServicePath[^>]+>(.+?)</div>', webpage, resource_id,
+ default='/Mediasite/PlayerService/PlayerService.svc/json'))
+
+ player_options = self._download_json(
+ '%s/GetPlayerOptions' % service_path, resource_id,
+ headers={
+ 'Content-type': 'application/json; charset=utf-8',
+ 'X-Requested-With': 'XMLHttpRequest',
+ },
+ data=json.dumps({
+ 'getPlayerOptionsRequest': {
+ 'ResourceId': resource_id,
+ 'QueryString': query,
+ 'UrlReferrer': data.get('UrlReferrer', ''),
+ 'UseScreenReader': False,
+ }
+ }).encode('utf-8'))['d']
+
+ presentation = player_options['Presentation']
+ title = presentation['Title']
+
+ if presentation is None:
+ raise ExtractorError(
+ 'Mediasite says: %s' % player_options['PlayerPresentationStatusMessage'],
+ expected=True)
+
+ thumbnails = []
+ formats = []
+ for snum, Stream in enumerate(presentation['Streams']):
+ stream_type = Stream.get('StreamType')
+ if stream_type is None:
+ continue
+
+ video_urls = Stream.get('VideoUrls')
+ if not isinstance(video_urls, list):
+ video_urls = []
+
+ stream_id = self._STREAM_TYPES.get(
+ stream_type, 'type%u' % stream_type)
+
+ stream_formats = []
+ for unum, VideoUrl in enumerate(video_urls):
+ video_url = url_or_none(VideoUrl.get('Location'))
+ if not video_url:
+ continue
+ # XXX: if Stream.get('CanChangeScheme', False), switch scheme to HTTP/HTTPS
+
+ media_type = VideoUrl.get('MediaType')
+ if media_type == 'SS':
+ stream_formats.extend(self._extract_ism_formats(
+ video_url, resource_id,
+ ism_id='%s-%u.%u' % (stream_id, snum, unum),
+ fatal=False))
+ elif media_type == 'Dash':
+ stream_formats.extend(self._extract_mpd_formats(
+ video_url, resource_id,
+ mpd_id='%s-%u.%u' % (stream_id, snum, unum),
+ fatal=False))
+ else:
+ stream_formats.append({
+ 'format_id': '%s-%u.%u' % (stream_id, snum, unum),
+ 'url': video_url,
+ 'ext': mimetype2ext(VideoUrl.get('MimeType')),
+ })
+
+ if Stream.get('HasSlideContent', False):
+ images = player_options['PlayerLayoutOptions']['Images']
+ stream_formats.append(self.__extract_slides(
+ stream_id=stream_id,
+ snum=snum,
+ Stream=Stream,
+ duration=presentation.get('Duration'),
+ images=images,
+ ))
+
+ # disprefer 'secondary' streams
+ if stream_type != 0:
+ for fmt in stream_formats:
+ fmt['quality'] = -10
+
+ thumbnail_url = Stream.get('ThumbnailUrl')
+ if thumbnail_url:
+ thumbnails.append({
+ 'id': '%s-%u' % (stream_id, snum),
+ 'url': urljoin(redirect_url, thumbnail_url),
+ 'preference': -1 if stream_type != 0 else 0,
+ })
+ formats.extend(stream_formats)
+
+ # XXX: Presentation['Presenters']
+ # XXX: Presentation['Transcript']
+
+ return {
+ 'id': resource_id,
+ 'title': title,
+ 'description': presentation.get('Description'),
+ 'duration': float_or_none(presentation.get('Duration'), 1000),
+ 'timestamp': float_or_none(presentation.get('UnixTime'), 1000),
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ }
+
+
+class MediasiteCatalogIE(InfoExtractor):
+ _VALID_URL = r'''(?xi)
+ (?P<url>https?://[^/]+/Mediasite)
+ /Catalog/Full/
+ (?P<catalog_id>{0})
+ (?:
+ /(?P<current_folder_id>{0})
+ /(?P<root_dynamic_folder_id>{0})
+ )?
+ '''.format(_ID_RE)
+ _TESTS = [{
+ 'url': 'http://events7.mediasite.com/Mediasite/Catalog/Full/631f9e48530d454381549f955d08c75e21',
+ 'info_dict': {
+ 'id': '631f9e48530d454381549f955d08c75e21',
+ 'title': 'WCET Summit: Adaptive Learning in Higher Ed: Improving Outcomes Dynamically',
+ },
+ 'playlist_count': 6,
+ 'expected_warnings': ['is not a supported codec'],
+ }, {
+ # with CurrentFolderId and RootDynamicFolderId
+ 'url': 'https://medaudio.medicine.iu.edu/Mediasite/Catalog/Full/9518c4a6c5cf4993b21cbd53e828a92521/97a9db45f7ab47428c77cd2ed74bb98f14/9518c4a6c5cf4993b21cbd53e828a92521',
+ 'info_dict': {
+ 'id': '9518c4a6c5cf4993b21cbd53e828a92521',
+ 'title': 'IUSM Family and Friends Sessions',
+ },
+ 'playlist_count': 2,
+ }, {
+ 'url': 'http://uipsyc.mediasite.com/mediasite/Catalog/Full/d5d79287c75243c58c50fef50174ec1b21',
+ 'only_matching': True,
+ }, {
+ # no AntiForgeryToken
+ 'url': 'https://live.libraries.psu.edu/Mediasite/Catalog/Full/8376d4b24dd1457ea3bfe4cf9163feda21',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://medaudio.medicine.iu.edu/Mediasite/Catalog/Full/9518c4a6c5cf4993b21cbd53e828a92521/97a9db45f7ab47428c77cd2ed74bb98f14/9518c4a6c5cf4993b21cbd53e828a92521',
+ 'only_matching': True,
+ }, {
+ # dashed id
+ 'url': 'http://events7.mediasite.com/Mediasite/Catalog/Full/631f9e48-530d-4543-8154-9f955d08c75e',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ mediasite_url = mobj.group('url')
+ catalog_id = mobj.group('catalog_id')
+ current_folder_id = mobj.group('current_folder_id') or catalog_id
+ root_dynamic_folder_id = mobj.group('root_dynamic_folder_id')
+
+ webpage = self._download_webpage(url, catalog_id)
+
+ # AntiForgeryToken is optional (e.g. [1])
+ # 1. https://live.libraries.psu.edu/Mediasite/Catalog/Full/8376d4b24dd1457ea3bfe4cf9163feda21
+ anti_forgery_token = self._search_regex(
+ r'AntiForgeryToken\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
+ webpage, 'anti forgery token', default=None, group='value')
+ if anti_forgery_token:
+ anti_forgery_header = self._search_regex(
+ r'AntiForgeryHeaderName\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
+ webpage, 'anti forgery header name',
+ default='X-SOFO-AntiForgeryHeader', group='value')
+
+ data = {
+ 'IsViewPage': True,
+ 'IsNewFolder': True,
+ 'AuthTicket': None,
+ 'CatalogId': catalog_id,
+ 'CurrentFolderId': current_folder_id,
+ 'RootDynamicFolderId': root_dynamic_folder_id,
+ 'ItemsPerPage': 1000,
+ 'PageIndex': 0,
+ 'PermissionMask': 'Execute',
+ 'CatalogSearchType': 'SearchInFolder',
+ 'SortBy': 'Date',
+ 'SortDirection': 'Descending',
+ 'StartDate': None,
+ 'EndDate': None,
+ 'StatusFilterList': None,
+ 'PreviewKey': None,
+ 'Tags': [],
+ }
+
+ headers = {
+ 'Content-Type': 'application/json; charset=UTF-8',
+ 'Referer': url,
+ 'X-Requested-With': 'XMLHttpRequest',
+ }
+ if anti_forgery_token:
+ headers[anti_forgery_header] = anti_forgery_token
+
+ catalog = self._download_json(
+ '%s/Catalog/Data/GetPresentationsForFolder' % mediasite_url,
+ catalog_id, data=json.dumps(data).encode(), headers=headers)
+
+ entries = []
+ for video in catalog['PresentationDetailsList']:
+ if not isinstance(video, dict):
+ continue
+ video_id = str_or_none(video.get('Id'))
+ if not video_id:
+ continue
+ entries.append(self.url_result(
+ '%s/Play/%s' % (mediasite_url, video_id),
+ ie=MediasiteIE.ie_key(), video_id=video_id))
+
+ title = try_get(
+ catalog, lambda x: x['CurrentFolder']['Name'], compat_str)
+
+ return self.playlist_result(entries, catalog_id, title,)
+
+
+class MediasiteNamedCatalogIE(InfoExtractor):
+ _VALID_URL = r'(?xi)(?P<url>https?://[^/]+/Mediasite)/Catalog/catalogs/(?P<catalog_name>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://msite.misis.ru/Mediasite/Catalog/catalogs/2016-industrial-management-skriabin-o-o',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ mediasite_url = mobj.group('url')
+ catalog_name = mobj.group('catalog_name')
+
+ webpage = self._download_webpage(url, catalog_name)
+
+ catalog_id = self._search_regex(
+ r'CatalogId\s*:\s*["\'](%s)' % _ID_RE, webpage, 'catalog id')
+
+ return self.url_result(
+ '%s/Catalog/Full/%s' % (mediasite_url, catalog_id),
+ ie=MediasiteCatalogIE.ie_key(), video_id=catalog_id)
diff --git a/yt_dlp/extractor/mediastream.py b/yt_dlp/extractor/mediastream.py
new file mode 100644
index 0000000..ae0fb2a
--- /dev/null
+++ b/yt_dlp/extractor/mediastream.py
@@ -0,0 +1,226 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ filter_dict,
+ parse_qs,
+ remove_end,
+ traverse_obj,
+ update_url_query,
+ urljoin,
+)
+
+
+class MediaStreamBaseIE(InfoExtractor):
+ _EMBED_BASE_URL = 'https://mdstrm.com/embed'
+ _BASE_URL_RE = r'https?://mdstrm\.com/(?:embed|live-stream)'
+
+ def _extract_mediastream_urls(self, webpage):
+ yield from traverse_obj(list(self._yield_json_ld(webpage, None, fatal=False)), (
+ lambda _, v: v['@type'] == 'VideoObject', ('embedUrl', 'contentUrl'),
+ {lambda x: x if re.match(rf'{self._BASE_URL_RE}/\w+', x) else None}))
+
+ for mobj in re.finditer(r'<script[^>]+>[^>]*playerMdStream\.mdstreamVideo\(\s*[\'"](?P<video_id>\w+)', webpage):
+ yield f'{self._EMBED_BASE_URL}/{mobj.group("video_id")}'
+
+ yield from re.findall(
+ rf'<iframe[^>]+\bsrc="({self._BASE_URL_RE}/\w+)', webpage)
+
+ for mobj in re.finditer(
+ r'''(?x)
+ <(?:div|ps-mediastream)[^>]+
+ (class="[^"]*MediaStreamVideoPlayer)[^"]*"[^>]+
+ data-video-id="(?P<video_id>\w+)"
+ (?:\s*data-video-type="(?P<video_type>[^"]+))?
+ (?:[^>]*>\s*<div[^>]+\1[^"]*"[^>]+data-mediastream=["\'][^>]+
+ https://mdstrm\.com/(?P<live>live-stream))?
+ ''', webpage):
+
+ video_type = 'live-stream' if mobj.group('video_type') == 'live' or mobj.group('live') else 'embed'
+ yield f'https://mdstrm.com/{video_type}/{mobj.group("video_id")}'
+
+
+class MediaStreamIE(MediaStreamBaseIE):
+ _VALID_URL = MediaStreamBaseIE._BASE_URL_RE + r'/(?P<id>\w+)'
+
+ _TESTS = [{
+ 'url': 'https://mdstrm.com/embed/6318e3f1d1d316083ae48831',
+ 'md5': '97b4f2634b8e8612cc574dfcd504df05',
+ 'info_dict': {
+ 'id': '6318e3f1d1d316083ae48831',
+ 'title': 'Video: Así fue el despido de Thomas Tuchel del Chelsea',
+ 'description': 'md5:358ce1e1396010d50a1ece1be3633c95',
+ 'thumbnail': r're:^https?://[^?#]+6318e3f1d1d316083ae48831',
+ 'ext': 'mp4',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }]
+
+ _WEBPAGE_TESTS = [{
+ 'url': 'https://www.multimedios.com/video/costa-rica-tv-en-vivo/v2616',
+ 'info_dict': {
+ 'id': '5a7b1e63a8da282c34d65445',
+ 'title': 're:mmtv-costarica',
+ 'description': 'mmtv-costarica',
+ 'thumbnail': 're:^https?://[^?#]+5a7b1e63a8da282c34d65445',
+ 'ext': 'mp4',
+ 'live_status': 'is_live',
+ },
+ 'params': {'skip_download': 'Livestream'},
+ }, {
+ 'url': 'https://www.multimedios.com/television/clases-de-llaves-y-castigos-quien-sabe-mas',
+ 'md5': 'de31f0b1ecc321fb35bf22d58734ea40',
+ 'info_dict': {
+ 'id': '63731bab8ec9b308a2c9ed28',
+ 'title': 'Clases de llaves y castigos ¿Quién sabe más?',
+ 'description': 'md5:1b49aa1ee5a4b32fbd66104b2d629e9d',
+ 'thumbnail': 're:^https?://[^?#]+63731bab8ec9b308a2c9ed28',
+ 'ext': 'mp4',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://www.americatv.com.pe/videos/esto-es-guerra/facundo-gonzalez-sufrio-fuerte-golpe-durante-competencia-frente-hugo-garcia-eeg-noticia-139120',
+ 'info_dict': {
+ 'id': '63756df1c638b008a5659dec',
+ 'title': 'Facundo González sufrió fuerte golpe durante competencia frente a Hugo García en EEG',
+ 'description': 'md5:9490c034264afd756eef7b2c3adee69e',
+ 'thumbnail': 're:^https?://[^?#]+63756df1c638b008a5659dec',
+ 'ext': 'mp4',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://www.americatv.com.pe/videos/al-fondo-hay-sitio/nuevas-lomas-town-bernardo-mata-se-enfrento-sujeto-luchar-amor-macarena-noticia-139083',
+ 'info_dict': {
+ 'id': '637307669609130f74cd3a6e',
+ 'title': 'Las Nuevas Lomas Town: Bernardo De La Mata se enfrentó a sujeto para luchar por el amor de Macarena',
+ 'description': 'md5:60d71772f1e1496923539ae58aa17124',
+ 'thumbnail': 're:^https?://[^?#]+637307669609130f74cd3a6e',
+ 'ext': 'mp4',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }]
+
+ def _extract_from_webpage(self, url, webpage):
+ for embed_url in self._extract_mediastream_urls(webpage):
+ yield self.url_result(embed_url, MediaStreamIE, None)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ for message in [
+ 'Debido a tu ubicación no puedes ver el contenido',
+ 'You are not allowed to watch this video: Geo Fencing Restriction',
+ 'Este contenido no está disponible en tu zona geográfica.',
+ 'El contenido sólo está disponible dentro de',
+ ]:
+ if message in webpage:
+ self.raise_geo_restricted()
+
+ player_config = self._search_json(r'window\.MDSTRM\.OPTIONS\s*=', webpage, 'metadata', video_id)
+
+ formats, subtitles = [], {}
+ for video_format in player_config['src']:
+ if video_format == 'hls':
+ params = {
+ 'at': 'web-app',
+ 'access_token': traverse_obj(parse_qs(url), ('access_token', 0)),
+ }
+ for name, key in (('MDSTRMUID', 'uid'), ('MDSTRMSID', 'sid'), ('MDSTRMPID', 'pid'), ('VERSION', 'av')):
+ params[key] = self._search_regex(
+ rf'window\.{name}\s*=\s*["\']([^"\']+)["\'];', webpage, key, default=None)
+
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ update_url_query(player_config['src'][video_format], filter_dict(params)), video_id)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+ elif video_format == 'mpd':
+ fmts, subs = self._extract_mpd_formats_and_subtitles(player_config['src'][video_format], video_id)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+ else:
+ formats.append({
+ 'url': player_config['src'][video_format],
+ })
+
+ return {
+ 'id': video_id,
+ 'title': self._og_search_title(webpage) or player_config.get('title'),
+ 'description': self._og_search_description(webpage),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'is_live': player_config.get('type') == 'live',
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ }
+
+
+class WinSportsVideoIE(MediaStreamBaseIE):
+ _VALID_URL = r'https?://www\.winsports\.co/videos/(?P<id>[\w-]+)'
+
+ _TESTS = [{
+ 'url': 'https://www.winsports.co/videos/siempre-castellanos-gran-atajada-del-portero-cardenal-para-evitar-la-caida-de-su-arco-60536',
+ 'info_dict': {
+ 'id': '62dc8357162c4b0821fcfb3c',
+ 'display_id': 'siempre-castellanos-gran-atajada-del-portero-cardenal-para-evitar-la-caida-de-su-arco-60536',
+ 'title': '¡Siempre Castellanos! Gran atajada del portero \'cardenal\' para evitar la caída de su arco',
+ 'description': 'md5:eb811b2b2882bdc59431732c06b905f2',
+ 'thumbnail': r're:^https?://[^?#]+62dc8357162c4b0821fcfb3c',
+ 'ext': 'mp4',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://www.winsports.co/videos/observa-aqui-los-goles-del-empate-entre-tolima-y-nacional-60548',
+ 'info_dict': {
+ 'id': '62dcb875ef12a5526790b552',
+ 'display_id': 'observa-aqui-los-goles-del-empate-entre-tolima-y-nacional-60548',
+ 'title': 'Observa aquí los goles del empate entre Tolima y Nacional',
+ 'description': 'md5:b19402ba6e46558b93fd24b873eea9c9',
+ 'thumbnail': r're:^https?://[^?#]+62dcb875ef12a5526790b552',
+ 'ext': 'mp4',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://www.winsports.co/videos/equidad-vuelve-defender-su-arco-de-remates-de-junior',
+ 'info_dict': {
+ 'id': '63fa7eca72f1741ad3a4d515',
+ 'display_id': 'equidad-vuelve-defender-su-arco-de-remates-de-junior',
+ 'title': '⚽ Equidad vuelve a defender su arco de remates de Junior',
+ 'description': 'Remate de Sierra',
+ 'thumbnail': r're:^https?://[^?#]+63fa7eca72f1741ad3a4d515',
+ 'ext': 'mp4',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://www.winsports.co/videos/bucaramanga-se-quedo-con-el-grito-de-gol-en-la-garganta',
+ 'info_dict': {
+ 'id': '6402adb62bbf3b18d454e1b0',
+ 'display_id': 'bucaramanga-se-quedo-con-el-grito-de-gol-en-la-garganta',
+ 'title': '⚽Bucaramanga se quedó con el grito de gol en la garganta',
+ 'description': 'Gol anulado Bucaramanga',
+ 'thumbnail': r're:^https?://[^?#]+6402adb62bbf3b18d454e1b0',
+ 'ext': 'mp4',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ data = self._search_json(
+ r'<script\s*[^>]+data-drupal-selector="drupal-settings-json">', webpage, 'data', display_id)
+
+ mediastream_url = urljoin(f'{self._EMBED_BASE_URL}/', (
+ traverse_obj(data, (
+ (('settings', 'mediastream_formatter', ..., 'mediastream_id'), 'url'), {str}), get_all=False)
+ or next(self._extract_mediastream_urls(webpage), None)))
+
+ if not mediastream_url:
+ self.raise_no_formats('No MediaStream embed found in webpage')
+
+ title = clean_html(remove_end(
+ self._search_json_ld(webpage, display_id, expected_type='VideoObject', default={}).get('title')
+ or self._og_search_title(webpage), '| Win Sports'))
+
+ return self.url_result(
+ mediastream_url, MediaStreamIE, display_id, url_transparent=True, display_id=display_id, video_title=title)
diff --git a/yt_dlp/extractor/mediaworksnz.py b/yt_dlp/extractor/mediaworksnz.py
new file mode 100644
index 0000000..62e37d2
--- /dev/null
+++ b/yt_dlp/extractor/mediaworksnz.py
@@ -0,0 +1,103 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ bug_reports_message,
+ float_or_none,
+ traverse_obj,
+ unified_timestamp,
+)
+
+
+class MediaWorksNZVODIE(InfoExtractor):
+ _VALID_URL_BASE_RE = r'https?://vodupload-api\.mediaworks\.nz/library/asset/published/'
+ _VALID_URL_ID_RE = r'(?P<id>[A-Za-z0-9-]+)'
+ _VALID_URL = rf'{_VALID_URL_BASE_RE}{_VALID_URL_ID_RE}'
+ _TESTS = [{
+ 'url': 'https://vodupload-api.mediaworks.nz/library/asset/published/VID00359',
+ 'info_dict': {
+ 'id': 'VID00359',
+ 'ext': 'mp4',
+ 'title': 'GRG Jacinda Ardern safe drug testing 1920x1080',
+ 'description': 'md5:d4d7dc366742e86d8130b257dcb520ba',
+ 'duration': 142.76,
+ 'timestamp': 1604268608,
+ 'upload_date': '20201101',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'channel': 'George FM'
+ }
+ }, {
+ # has audio-only format
+ 'url': 'https://vodupload-api.mediaworks.nz/library/asset/published/VID02627',
+ 'info_dict': {
+ 'id': 'VID02627',
+ 'ext': 'mp3',
+ 'title': 'Tova O\'Brien meets Ukraine President Volodymyr Zelensky',
+ 'channel': 'Today FM',
+ 'description': 'Watch in full the much anticipated interview of Volodymyr Zelensky',
+ 'duration': 2061.16,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20220822',
+ 'timestamp': 1661152289,
+ },
+ 'params': {'format': 'ba[ext=mp3]'}
+ }]
+
+ _WEBPAGE_TESTS = [{
+ 'url': 'https://www.rova.nz/home/podcasts/socrates-walks-into-a-bar/the-trolley-problem---episode-1.html',
+ 'info_dict': {
+ 'id': 'VID02494',
+ 'ext': 'mp4',
+ 'title': 'The Trolley Problem',
+ 'duration': 2843.56,
+ 'channel': 'Other',
+ 'timestamp': 1658356489,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'Socrates Walks Into A Bar Podcast Episode 1',
+ 'upload_date': '20220720',
+ }
+ }]
+
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ for mobj in re.finditer(
+ rf'''(?x)<div\s+\bid=["']Player-Attributes-JWID[^>]+\b
+ data-request-url=["']{cls._VALID_URL_BASE_RE}["'][^>]+\b
+ data-asset-id=["']{cls._VALID_URL_ID_RE}["']''', webpage
+ ):
+ yield f'https://vodupload-api.mediaworks.nz/library/asset/published/{mobj.group("id")}'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ asset = self._download_json(url, video_id)['asset']
+
+ if asset.get('drm') not in ('NonDRM', None):
+ self.report_drm(video_id)
+
+ content_type = asset.get('type')
+ if content_type and content_type != 'video':
+ self.report_warning(f'Unknown content type: {content_type}' + bug_reports_message(), video_id)
+
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(asset['streamingUrl'], video_id)
+
+ audio_streaming_url = traverse_obj(
+ asset, 'palyoutPathAudio', 'playoutpathaudio', expected_type=str)
+ if audio_streaming_url:
+ audio_formats = self._extract_m3u8_formats(audio_streaming_url, video_id, fatal=False, ext='mp3')
+ for audio_format in audio_formats:
+ # all the audio streams appear to be aac
+ audio_format.setdefault('vcodec', 'none')
+ audio_format.setdefault('acodec', 'aac')
+ formats.append(audio_format)
+
+ return {
+ 'id': video_id,
+ 'title': asset.get('title'),
+ 'description': asset.get('description'),
+ 'duration': float_or_none(asset.get('duration')),
+ 'timestamp': unified_timestamp(asset.get('dateadded')),
+ 'channel': asset.get('brand'),
+ 'thumbnails': [{'url': thumbnail_url} for thumbnail_url in asset.get('thumbnails') or []],
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
diff --git a/yt_dlp/extractor/medici.py b/yt_dlp/extractor/medici.py
new file mode 100644
index 0000000..328ccd2
--- /dev/null
+++ b/yt_dlp/extractor/medici.py
@@ -0,0 +1,67 @@
+from .common import InfoExtractor
+from ..utils import (
+ unified_strdate,
+ update_url_query,
+ urlencode_postdata,
+)
+
+
+class MediciIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?medici\.tv/#!/(?P<id>[^?#&]+)'
+ _TEST = {
+ 'url': 'http://www.medici.tv/#!/daniel-harding-frans-helmerson-verbier-festival-music-camp',
+ 'md5': '004c21bb0a57248085b6ff3fec72719d',
+ 'info_dict': {
+ 'id': '3059',
+ 'ext': 'flv',
+ 'title': 'Daniel Harding conducts the Verbier Festival Music Camp \u2013 With Frans Helmerson',
+ 'description': 'md5:322a1e952bafb725174fd8c1a8212f58',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20170408',
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ # Sets csrftoken cookie
+ self._download_webpage(url, video_id)
+
+ MEDICI_URL = 'http://www.medici.tv/'
+
+ data = self._download_json(
+ MEDICI_URL, video_id,
+ data=urlencode_postdata({
+ 'json': 'true',
+ 'page': '/%s' % video_id,
+ 'timezone_offset': -420,
+ }), headers={
+ 'X-CSRFToken': self._get_cookies(url)['csrftoken'].value,
+ 'X-Requested-With': 'XMLHttpRequest',
+ 'Referer': MEDICI_URL,
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ })
+
+ video = data['video']['videos']['video1']
+
+ title = video.get('nom') or data['title']
+
+ video_id = video.get('id') or video_id
+ formats = self._extract_f4m_formats(
+ update_url_query(video['url_akamai'], {
+ 'hdcore': '3.1.0',
+ 'plugin=aasp': '3.1.0.43.124',
+ }), video_id, f4m_id='hds')
+
+ description = data.get('meta_description')
+ thumbnail = video.get('url_thumbnail') or data.get('main_image')
+ upload_date = unified_strdate(data['video'].get('date'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/megaphone.py b/yt_dlp/extractor/megaphone.py
new file mode 100644
index 0000000..d249a84
--- /dev/null
+++ b/yt_dlp/extractor/megaphone.py
@@ -0,0 +1,46 @@
+from .common import InfoExtractor
+from ..utils import js_to_json
+
+
+class MegaphoneIE(InfoExtractor):
+ IE_NAME = 'megaphone.fm'
+ IE_DESC = 'megaphone.fm embedded players'
+ _VALID_URL = r'https?://player\.megaphone\.fm/(?P<id>[A-Z0-9]+)'
+ _EMBED_REGEX = [rf'<iframe[^>]*?\ssrc=["\'](?P<url>{_VALID_URL})']
+ _TEST = {
+ 'url': 'https://player.megaphone.fm/GLT9749789991',
+ 'md5': '4816a0de523eb3e972dc0dda2c191f96',
+ 'info_dict': {
+ 'id': 'GLT9749789991',
+ 'ext': 'mp3',
+ 'title': '#97 What Kind Of Idiot Gets Phished?',
+ 'thumbnail': r're:^https://.*\.png.*$',
+ 'duration': 1998.36,
+ 'creators': ['Reply All'],
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._og_search_property('audio:title', webpage)
+ author = self._og_search_property('audio:artist', webpage)
+ thumbnail = self._og_search_thumbnail(webpage)
+
+ episode_json = self._search_regex(r'(?s)var\s+episode\s*=\s*(\{.+?\});', webpage, 'episode JSON')
+ episode_data = self._parse_json(episode_json, video_id, js_to_json)
+ video_url = self._proto_relative_url(episode_data['mediaUrl'], 'https:')
+
+ formats = [{
+ 'url': video_url,
+ }]
+
+ return {
+ 'id': video_id,
+ 'thumbnail': thumbnail,
+ 'title': title,
+ 'creators': [author] if author else None,
+ 'duration': episode_data['duration'],
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/megatvcom.py b/yt_dlp/extractor/megatvcom.py
new file mode 100644
index 0000000..93c7e7d
--- /dev/null
+++ b/yt_dlp/extractor/megatvcom.py
@@ -0,0 +1,164 @@
+import re
+
+from .common import InfoExtractor
+from ..networking import HEADRequest
+from ..utils import (
+ ExtractorError,
+ clean_html,
+ determine_ext,
+ extract_attributes,
+ get_element_by_class,
+ get_element_html_by_id,
+ parse_qs,
+ unescapeHTML,
+ unified_timestamp,
+)
+
+
+class MegaTVComBaseIE(InfoExtractor):
+ _PLAYER_DIV_ID = 'player_div_id'
+
+ def _extract_player_attrs(self, webpage):
+ player_el = get_element_html_by_id(self._PLAYER_DIV_ID, webpage)
+ return {
+ re.sub(r'^data-(?:kwik_)?', '', k): v
+ for k, v in extract_attributes(player_el).items()
+ if k not in ('id',)
+ }
+
+
+class MegaTVComIE(MegaTVComBaseIE):
+ IE_NAME = 'megatvcom'
+ IE_DESC = 'megatv.com videos'
+ _VALID_URL = r'https?://(?:www\.)?megatv\.com/(?:\d{4}/\d{2}/\d{2}|[^/]+/(?P<id>\d+))/(?P<slug>[^/]+)'
+
+ _TESTS = [{
+ 'url': 'https://www.megatv.com/2021/10/23/egkainia-gia-ti-nea-skini-omega-tou-dimotikou-theatrou-peiraia/',
+ 'md5': '6546a1a37fff0dd51c9dce5f490b7d7d',
+ 'info_dict': {
+ 'id': '520979',
+ 'ext': 'mp4',
+ 'title': 'md5:70eef71a9cd2c1ecff7ee428354dded2',
+ 'description': 'md5:0209fa8d318128569c0d256a5c404db1',
+ 'timestamp': 1634975747,
+ 'upload_date': '20211023',
+ 'display_id': 'egkainia-gia-ti-nea-skini-omega-tou-dimotikou-theatrou-peiraia',
+ 'thumbnail': 'https://www.megatv.com/wp-content/uploads/2021/10/ΠΕΙΡΑΙΑΣ-1024x450.jpg',
+ },
+ }, {
+ 'url': 'https://www.megatv.com/tvshows/527800/epeisodio-65-12/',
+ 'md5': 'cba2085d45c1abeb8e7e9b7e1d6c0072',
+ 'info_dict': {
+ 'id': '527800',
+ 'ext': 'mp4',
+ 'title': 'md5:fc322cb51f682eecfe2f54cd5ab3a157',
+ 'description': 'md5:b2b7ed3690a78f2a0156eb790fdc00df',
+ 'timestamp': 1636048859,
+ 'upload_date': '20211104',
+ 'display_id': 'epeisodio-65-12',
+ 'thumbnail': 'https://www.megatv.com/wp-content/uploads/2021/11/16-1-1.jpg',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id, display_id = self._match_valid_url(url).group('id', 'slug')
+ _is_article = video_id is None
+ webpage = self._download_webpage(url, video_id or display_id)
+ if _is_article:
+ video_id = self._search_regex(
+ r'<article[^>]*\sid=["\']Article_(\d+)["\']', webpage, 'article id')
+ player_attrs = self._extract_player_attrs(webpage)
+ title = player_attrs.get('label') or self._og_search_title(webpage)
+ description = get_element_by_class(
+ 'article-wrapper' if _is_article else 'story_content',
+ webpage)
+ description = clean_html(re.sub(r'<script[^>]*>[^<]+</script>', '', description))
+ if not description:
+ description = self._og_search_description(webpage)
+ thumbnail = player_attrs.get('image') or self._og_search_thumbnail(webpage)
+ timestamp = unified_timestamp(self._html_search_meta(
+ 'article:published_time', webpage))
+ source = player_attrs.get('source')
+ if not source:
+ raise ExtractorError('No source found', video_id=video_id)
+ if determine_ext(source) == 'm3u8':
+ formats, subs = self._extract_m3u8_formats_and_subtitles(source, video_id, 'mp4')
+ else:
+ formats, subs = [{'url': source}], {}
+ if player_attrs.get('subs'):
+ self._merge_subtitles({'und': [{'url': player_attrs['subs']}]}, target=subs)
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'formats': formats,
+ 'subtitles': subs,
+ }
+
+
+class MegaTVComEmbedIE(MegaTVComBaseIE):
+ IE_NAME = 'megatvcom:embed'
+ IE_DESC = 'megatv.com embedded videos'
+ _VALID_URL = r'(?:https?:)?//(?:www\.)?megatv\.com/embed/?\?p=(?P<id>\d+)'
+ _EMBED_REGEX = [rf'''<iframe[^>]+?src=(?P<_q1>["'])(?P<url>{_VALID_URL})(?P=_q1)''']
+
+ _TESTS = [{
+ 'url': 'https://www.megatv.com/embed/?p=2020520979',
+ 'md5': '6546a1a37fff0dd51c9dce5f490b7d7d',
+ 'info_dict': {
+ 'id': '520979',
+ 'ext': 'mp4',
+ 'title': 'md5:70eef71a9cd2c1ecff7ee428354dded2',
+ 'description': 'md5:0209fa8d318128569c0d256a5c404db1',
+ 'timestamp': 1634975747,
+ 'upload_date': '20211023',
+ 'display_id': 'egkainia-gia-ti-nea-skini-omega-tou-dimotikou-theatrou-peiraia',
+ 'thumbnail': 'https://www.megatv.com/wp-content/uploads/2021/10/ΠΕΙΡΑΙΑΣ-1024x450.jpg',
+ },
+ }, {
+ 'url': 'https://www.megatv.com/embed/?p=2020534081',
+ 'md5': '6ac8b3ce4dc6120c802f780a1e6b3812',
+ 'info_dict': {
+ 'id': '534081',
+ 'ext': 'mp4',
+ 'title': 'md5:062e9d5976ef854d8bdc1f5724d9b2d0',
+ 'description': 'md5:36dbe4c3762d2ede9513eea8d07f6d52',
+ 'timestamp': 1636376351,
+ 'upload_date': '20211108',
+ 'display_id': 'neo-rekor-stin-timi-tou-ilektrikou-reymatos-pano-apo-ta-200e-i-xondriki-timi-tou-ilektrikou',
+ 'thumbnail': 'https://www.megatv.com/wp-content/uploads/2021/11/Capture-266.jpg',
+ },
+ }]
+
+ def _match_canonical_url(self, webpage):
+ LINK_RE = r'''(?x)
+ <link(?:
+ rel=(?P<_q1>["'])(?P<canonical>canonical)(?P=_q1)|
+ href=(?P<_q2>["'])(?P<href>(?:(?!(?P=_q2)).)+)(?P=_q2)|
+ [^>]*?
+ )+>
+ '''
+ for mobj in re.finditer(LINK_RE, webpage):
+ canonical, href = mobj.group('canonical', 'href')
+ if canonical and href:
+ return unescapeHTML(href)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ player_attrs = self._extract_player_attrs(webpage)
+ canonical_url = player_attrs.get('share_url') or self._match_canonical_url(webpage)
+ if not canonical_url:
+ raise ExtractorError('canonical URL not found')
+ video_id = parse_qs(canonical_url)['p'][0]
+
+ # Defer to megatvcom as the metadata extracted from the embeddable page some
+ # times are slightly different, for the same video
+ canonical_url = self._request_webpage(
+ HEADRequest(canonical_url), video_id,
+ note='Resolve canonical URL',
+ errnote='Could not resolve canonical URL').url
+ return self.url_result(canonical_url, MegaTVComIE.ie_key(), video_id)
diff --git a/yt_dlp/extractor/meipai.py b/yt_dlp/extractor/meipai.py
new file mode 100644
index 0000000..1a6f3cd
--- /dev/null
+++ b/yt_dlp/extractor/meipai.py
@@ -0,0 +1,99 @@
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_duration,
+ unified_timestamp,
+)
+
+
+class MeipaiIE(InfoExtractor):
+ IE_DESC = '美拍'
+ _VALID_URL = r'https?://(?:www\.)?meipai\.com/media/(?P<id>[0-9]+)'
+ _TESTS = [{
+ # regular uploaded video
+ 'url': 'http://www.meipai.com/media/531697625',
+ 'md5': 'e3e9600f9e55a302daecc90825854b4f',
+ 'info_dict': {
+ 'id': '531697625',
+ 'ext': 'mp4',
+ 'title': '#葉子##阿桑##余姿昀##超級女聲#',
+ 'description': '#葉子##阿桑##余姿昀##超級女聲#',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 152,
+ 'timestamp': 1465492420,
+ 'upload_date': '20160609',
+ 'view_count': 35511,
+ 'creator': '她她-TATA',
+ 'tags': ['葉子', '阿桑', '余姿昀', '超級女聲'],
+ }
+ }, {
+ # record of live streaming
+ 'url': 'http://www.meipai.com/media/585526361',
+ 'md5': 'ff7d6afdbc6143342408223d4f5fb99a',
+ 'info_dict': {
+ 'id': '585526361',
+ 'ext': 'mp4',
+ 'title': '姿昀和善願 練歌練琴啦😁😁😁',
+ 'description': '姿昀和善願 練歌練琴啦😁😁😁',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 5975,
+ 'timestamp': 1474311799,
+ 'upload_date': '20160919',
+ 'view_count': 1215,
+ 'creator': '她她-TATA',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._generic_title('', webpage)
+
+ formats = []
+
+ # recorded playback of live streaming
+ m3u8_url = self._html_search_regex(
+ r'file:\s*encodeURIComponent\((["\'])(?P<url>(?:(?!\1).)+)\1\)',
+ webpage, 'm3u8 url', group='url', default=None)
+ if m3u8_url:
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+
+ if not formats:
+ # regular uploaded video
+ video_url = self._search_regex(
+ r'data-video=(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, 'video url',
+ group='url', default=None)
+ if video_url:
+ formats.append({
+ 'url': video_url,
+ 'format_id': 'http',
+ })
+
+ timestamp = unified_timestamp(self._og_search_property(
+ 'video:release_date', webpage, 'release date', fatal=False))
+
+ tags = self._og_search_property(
+ 'video:tag', webpage, 'tags', default='').split(',')
+
+ view_count = int_or_none(self._html_search_meta(
+ 'interactionCount', webpage, 'view count'))
+ duration = parse_duration(self._html_search_meta(
+ 'duration', webpage, 'duration'))
+ creator = self._og_search_property(
+ 'video:director', webpage, 'creator', fatal=False)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': self._og_search_description(webpage),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'view_count': view_count,
+ 'creator': creator,
+ 'tags': tags,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/melonvod.py b/yt_dlp/extractor/melonvod.py
new file mode 100644
index 0000000..1d3fff8
--- /dev/null
+++ b/yt_dlp/extractor/melonvod.py
@@ -0,0 +1,68 @@
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ urljoin,
+)
+
+
+class MelonVODIE(InfoExtractor):
+ _VALID_URL = r'https?://vod\.melon\.com/video/detail2\.html?\?.*?mvId=(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://vod.melon.com/video/detail2.htm?mvId=50158734',
+ 'info_dict': {
+ 'id': '50158734',
+ 'ext': 'mp4',
+ 'title': "Jessica 'Wonderland' MV Making Film",
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'artist': 'Jessica (제시카)',
+ 'upload_date': '20161212',
+ 'duration': 203,
+ },
+ 'params': {
+ 'skip_download': 'm3u8 download',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ play_info = self._download_json(
+ 'http://vod.melon.com/video/playerInfo.json', video_id,
+ note='Downloading player info JSON', query={'mvId': video_id})
+
+ title = play_info['mvInfo']['MVTITLE']
+
+ info = self._download_json(
+ 'http://vod.melon.com/delivery/streamingInfo.json', video_id,
+ note='Downloading streaming info JSON',
+ query={
+ 'contsId': video_id,
+ 'contsType': 'VIDEO',
+ })
+
+ stream_info = info['streamingInfo']
+
+ formats = self._extract_m3u8_formats(
+ stream_info['encUrl'], video_id, 'mp4', m3u8_id='hls')
+
+ artist_list = play_info.get('artistList')
+ artist = None
+ if isinstance(artist_list, list):
+ artist = ', '.join(
+ [a['ARTISTNAMEWEBLIST']
+ for a in artist_list if a.get('ARTISTNAMEWEBLIST')])
+
+ thumbnail = urljoin(info.get('staticDomain'), stream_info.get('imgPath'))
+
+ duration = int_or_none(stream_info.get('playTime'))
+ upload_date = stream_info.get('mvSvcOpenDt', '')[:8] or None
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'artist': artist,
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ 'duration': duration,
+ 'formats': formats
+ }
diff --git a/yt_dlp/extractor/metacritic.py b/yt_dlp/extractor/metacritic.py
new file mode 100644
index 0000000..1441054
--- /dev/null
+++ b/yt_dlp/extractor/metacritic.py
@@ -0,0 +1,62 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ fix_xml_ampersands,
+)
+
+
+class MetacriticIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?metacritic\.com/.+?/trailers/(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'http://www.metacritic.com/game/playstation-4/infamous-second-son/trailers/3698222',
+ 'info_dict': {
+ 'id': '3698222',
+ 'ext': 'mp4',
+ 'title': 'inFamous: Second Son - inSide Sucker Punch: Smoke & Mirrors',
+ 'description': 'Take a peak behind-the-scenes to see how Sucker Punch brings smoke into the universe of inFAMOUS Second Son on the PS4.',
+ 'duration': 221,
+ },
+ 'skip': 'Not providing trailers anymore',
+ }, {
+ 'url': 'http://www.metacritic.com/game/playstation-4/tales-from-the-borderlands-a-telltale-game-series/trailers/5740315',
+ 'info_dict': {
+ 'id': '5740315',
+ 'ext': 'mp4',
+ 'title': 'Tales from the Borderlands - Finale: The Vault of the Traveler',
+ 'description': 'In the final episode of the season, all hell breaks loose. Jack is now in control of Helios\' systems, and he\'s ready to reclaim his rightful place as king of Hyperion (with or without you).',
+ 'duration': 114,
+ },
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id')
+ webpage = self._download_webpage(url, video_id)
+ # The xml is not well formatted, there are raw '&'
+ info = self._download_xml('http://www.metacritic.com/video_data?video=' + video_id,
+ video_id, 'Downloading info xml', transform_source=fix_xml_ampersands)
+
+ clip = next(c for c in info.findall('playList/clip') if c.find('id').text == video_id)
+ formats = []
+ for videoFile in clip.findall('httpURI/videoFile'):
+ rate_str = videoFile.find('rate').text
+ video_url = videoFile.find('filePath').text
+ formats.append({
+ 'url': video_url,
+ 'ext': 'mp4',
+ 'format_id': rate_str,
+ 'tbr': int(rate_str),
+ })
+
+ description = self._html_search_regex(r'<b>Description:</b>(.*?)</p>',
+ webpage, 'description', flags=re.DOTALL)
+
+ return {
+ 'id': video_id,
+ 'title': clip.find('title').text,
+ 'formats': formats,
+ 'description': description,
+ 'duration': int(clip.find('duration').text),
+ }
diff --git a/yt_dlp/extractor/mgtv.py b/yt_dlp/extractor/mgtv.py
new file mode 100644
index 0000000..31ccf00
--- /dev/null
+++ b/yt_dlp/extractor/mgtv.py
@@ -0,0 +1,165 @@
+import base64
+import time
+import uuid
+
+from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ parse_resolution,
+ traverse_obj,
+ try_get,
+ url_or_none,
+ urljoin,
+)
+
+
+class MGTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:w(?:ww)?\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P<id>\d+)\.html'
+ IE_DESC = '芒果TV'
+ IE_NAME = 'MangoTV'
+
+ _TESTS = [{
+ 'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html',
+ 'info_dict': {
+ 'id': '3116640',
+ 'ext': 'mp4',
+ 'title': '我是歌手 第四季',
+ 'description': '我是歌手第四季双年巅峰会',
+ 'duration': 7461,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://w.mgtv.com/b/427837/15588271.html',
+ 'info_dict': {
+ 'id': '15588271',
+ 'ext': 'mp4',
+ 'title': '春日迟迟再出发 沉浸版第1期:陆莹结婚半年查出肾炎被离婚 吴雅婷把一半票根退给前夫',
+ 'description': 'md5:a7a05a05b1aa87bd50cae619b19bbca6',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ 'duration': 4026,
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://w.mgtv.com/b/333652/7329822.html',
+ 'info_dict': {
+ 'id': '7329822',
+ 'ext': 'mp4',
+ 'title': '拜托,请你爱我',
+ 'description': 'md5:cd81be6499bafe32e4d143abd822bf9c',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ 'duration': 2656,
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://w.mgtv.com/b/427837/15591647.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://w.mgtv.com/b/388252/15634192.html?fpa=33318&fpos=4&lastp=ch_home',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.mgtv.com/b/301817/3826653.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://w.mgtv.com/b/301817/3826653.html',
+ 'only_matching': True,
+ }]
+
+ _RESOLUTIONS = {
+ '标清': ('480p', '854x480'),
+ '高清': ('540p', '960x540'),
+ '超清': ('720p', '1280x720'),
+ '蓝光': ('1080p', '1920x1080'),
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ tk2 = base64.urlsafe_b64encode(
+ f'did={str(uuid.uuid4())}|pno=1030|ver=0.3.0301|clit={int(time.time())}'.encode())[::-1]
+ try:
+ api_data = self._download_json(
+ 'https://pcweb.api.mgtv.com/player/video', video_id, query={
+ 'tk2': tk2,
+ 'video_id': video_id,
+ 'type': 'pch5'
+ }, headers=self.geo_verification_headers())['data']
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 401:
+ error = self._parse_json(e.cause.response.read().decode(), None)
+ if error.get('code') == 40005:
+ self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
+ raise ExtractorError(error['msg'], expected=True)
+ raise
+
+ stream_data = self._download_json(
+ 'https://pcweb.api.mgtv.com/player/getSource', video_id, query={
+ 'tk2': tk2,
+ 'pm2': api_data['atc']['pm2'],
+ 'video_id': video_id,
+ 'type': 'pch5',
+ 'src': 'intelmgtv',
+ }, headers=self.geo_verification_headers())['data']
+ stream_domain = traverse_obj(stream_data, ('stream_domain', ..., {url_or_none}), get_all=False)
+
+ formats = []
+ for idx, stream in enumerate(traverse_obj(stream_data, ('stream', lambda _, v: v['url']))):
+ stream_name = traverse_obj(stream, 'name', 'standardName', 'barName', expected_type=str)
+ resolution = traverse_obj(
+ self._RESOLUTIONS, (stream_name, 1 if stream.get('scale') == '16:9' else 0))
+ format_url = traverse_obj(self._download_json(
+ urljoin(stream_domain, stream['url']), video_id, fatal=False,
+ note=f'Downloading video info for format {resolution or stream_name}'),
+ ('info', {url_or_none}))
+ if not format_url:
+ continue
+ tbr = int_or_none(stream.get('filebitrate') or self._search_regex(
+ r'_(\d+)_mp4/', format_url, 'tbr', default=None))
+ formats.append({
+ 'format_id': str(tbr or idx),
+ 'url': format_url,
+ 'ext': 'mp4',
+ 'tbr': tbr,
+ 'vcodec': stream.get('videoFormat'),
+ 'acodec': stream.get('audioFormat'),
+ **parse_resolution(resolution),
+ 'protocol': 'm3u8_native',
+ 'http_headers': {
+ 'Referer': url,
+ },
+ 'format_note': stream_name,
+ })
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ **traverse_obj(api_data, ('info', {
+ 'title': ('title', {str.strip}),
+ 'description': ('desc', {str}),
+ 'duration': ('duration', {int_or_none}),
+ 'thumbnail': ('thumb', {url_or_none}),
+ })),
+ 'subtitles': self.extract_subtitles(video_id, stream_domain),
+ }
+
+ def _get_subtitles(self, video_id, domain):
+ info = self._download_json(f'https://pcweb.api.mgtv.com/video/title?videoId={video_id}',
+ video_id, fatal=False) or {}
+ subtitles = {}
+ for sub in try_get(info, lambda x: x['data']['title']) or []:
+ url_sub = sub.get('url')
+ if not url_sub:
+ continue
+ locale = sub.get('captionSimpleName') or 'en'
+ sub = self._download_json(f'{domain}{url_sub}', video_id, fatal=False,
+ note=f'Download subtitle for locale {sub.get("name")} ({locale})') or {}
+ sub_url = url_or_none(sub.get('info'))
+ if not sub_url:
+ continue
+ subtitles.setdefault(locale.lower(), []).append({
+ 'url': sub_url,
+ 'name': sub.get('name'),
+ 'ext': 'srt'
+ })
+ return subtitles
diff --git a/yt_dlp/extractor/microsoftembed.py b/yt_dlp/extractor/microsoftembed.py
new file mode 100644
index 0000000..f71ab3e
--- /dev/null
+++ b/yt_dlp/extractor/microsoftembed.py
@@ -0,0 +1,65 @@
+from .common import InfoExtractor
+from ..utils import int_or_none, traverse_obj, unified_timestamp
+
+
+class MicrosoftEmbedIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?microsoft\.com/(?:[^/]+/)?videoplayer/embed/(?P<id>[a-z0-9A-Z]+)'
+
+ _TESTS = [{
+ 'url': 'https://www.microsoft.com/en-us/videoplayer/embed/RWL07e',
+ 'md5': 'eb0ae9007f9b305f9acd0a03e74cb1a9',
+ 'info_dict': {
+ 'id': 'RWL07e',
+ 'title': 'Microsoft for Public Health and Social Services',
+ 'ext': 'mp4',
+ 'thumbnail': 'http://img-prod-cms-rt-microsoft-com.akamaized.net/cms/api/am/imageFileData/RWL7Ju?ver=cae5',
+ 'age_limit': 0,
+ 'timestamp': 1631658316,
+ 'upload_date': '20210914'
+ }
+ }]
+ _API_URL = 'https://prod-video-cms-rt-microsoft-com.akamaized.net/vhs/api/videos/'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ metadata = self._download_json(self._API_URL + video_id, video_id)
+
+ formats = []
+ for source_type, source in metadata['streams'].items():
+ if source_type == 'smooth_Streaming':
+ formats.extend(self._extract_ism_formats(source['url'], video_id, 'mss'))
+ elif source_type == 'apple_HTTP_Live_Streaming':
+ formats.extend(self._extract_m3u8_formats(source['url'], video_id, 'mp4'))
+ elif source_type == 'mPEG_DASH':
+ formats.extend(self._extract_mpd_formats(source['url'], video_id))
+ else:
+ formats.append({
+ 'format_id': source_type,
+ 'url': source['url'],
+ 'height': source.get('heightPixels'),
+ 'width': source.get('widthPixels'),
+ })
+
+ subtitles = {
+ lang: [{
+ 'url': data.get('url'),
+ 'ext': 'vtt',
+ }] for lang, data in traverse_obj(metadata, 'captions', default={}).items()
+ }
+
+ thumbnails = [{
+ 'url': thumb.get('url'),
+ 'width': thumb.get('width') or None,
+ 'height': thumb.get('height') or None,
+ } for thumb in traverse_obj(metadata, ('snippet', 'thumbnails', ...))]
+ self._remove_duplicate_formats(thumbnails)
+
+ return {
+ 'id': video_id,
+ 'title': traverse_obj(metadata, ('snippet', 'title')),
+ 'timestamp': unified_timestamp(traverse_obj(metadata, ('snippet', 'activeStartDate'))),
+ 'age_limit': int_or_none(traverse_obj(metadata, ('snippet', 'minimumAge'))) or 0,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnails': thumbnails,
+ }
diff --git a/yt_dlp/extractor/microsoftstream.py b/yt_dlp/extractor/microsoftstream.py
new file mode 100644
index 0000000..9b50996
--- /dev/null
+++ b/yt_dlp/extractor/microsoftstream.py
@@ -0,0 +1,121 @@
+from base64 import b64decode
+
+from .common import InfoExtractor
+from ..utils import (
+ merge_dicts,
+ parse_iso8601,
+ parse_duration,
+ parse_resolution,
+ try_get,
+ url_basename,
+)
+
+
+class MicrosoftStreamIE(InfoExtractor):
+ IE_NAME = 'microsoftstream'
+ IE_DESC = 'Microsoft Stream'
+ _VALID_URL = r'https?://(?:web|www|msit)\.microsoftstream\.com/video/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+
+ _TESTS = [{
+ 'url': 'https://web.microsoftstream.com/video/6e51d928-4f46-4f1c-b141-369925e37b62?list=user&userId=f5491e02-e8fe-4e34-b67c-ec2e79a6ecc0',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://msit.microsoftstream.com/video/b60f5987-aabd-4e1c-a42f-c559d138f2ca',
+ 'only_matching': True,
+ }]
+
+ def _get_all_subtitles(self, api_url, video_id, headers):
+ subtitles = {}
+ automatic_captions = {}
+ text_tracks = self._download_json(
+ f'{api_url}/videos/{video_id}/texttracks', video_id,
+ note='Downloading subtitles JSON', fatal=False, headers=headers,
+ query={'api-version': '1.4-private'}).get('value') or []
+ for track in text_tracks:
+ if not track.get('language') or not track.get('url'):
+ continue
+ sub_dict = automatic_captions if track.get('autoGenerated') else subtitles
+ sub_dict.setdefault(track['language'], []).append({
+ 'ext': 'vtt',
+ 'url': track.get('url')
+ })
+ return {
+ 'subtitles': subtitles,
+ 'automatic_captions': automatic_captions
+ }
+
+ def extract_all_subtitles(self, *args, **kwargs):
+ if (self.get_param('writesubtitles', False)
+ or self.get_param('writeautomaticsub', False)
+ or self.get_param('listsubtitles')):
+ return self._get_all_subtitles(*args, **kwargs)
+ return {}
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ if '<title>Microsoft Stream</title>' not in webpage:
+ self.raise_login_required(method='cookies')
+
+ access_token = self._html_search_regex(r'"AccessToken":"(.+?)"', webpage, 'access token')
+ api_url = self._html_search_regex(r'"ApiGatewayUri":"(.+?)"', webpage, 'api url')
+
+ headers = {'Authorization': f'Bearer {access_token}'}
+
+ video_data = self._download_json(
+ f'{api_url}/videos/{video_id}', video_id,
+ headers=headers, query={
+ '$expand': 'creator,tokens,status,liveEvent,extensions',
+ 'api-version': '1.4-private'
+ })
+ video_id = video_data.get('id') or video_id
+ language = video_data.get('language')
+
+ thumbnails = []
+ for thumbnail_id in ('extraSmall', 'small', 'medium', 'large'):
+ thumbnail_url = try_get(video_data, lambda x: x['posterImage'][thumbnail_id]['url'], str)
+ if not thumbnail_url:
+ continue
+ thumb = {
+ 'id': thumbnail_id,
+ 'url': thumbnail_url,
+ }
+ thumb_name = url_basename(thumbnail_url)
+ thumb_name = str(b64decode(thumb_name + '=' * (-len(thumb_name) % 4)))
+ thumb.update(parse_resolution(thumb_name))
+ thumbnails.append(thumb)
+
+ formats = []
+ for playlist in video_data['playbackUrls']:
+ if playlist['mimeType'] == 'application/vnd.apple.mpegurl':
+ formats.extend(self._extract_m3u8_formats(
+ playlist['playbackUrl'], video_id,
+ ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls',
+ fatal=False, headers=headers))
+ elif playlist['mimeType'] == 'application/dash+xml':
+ formats.extend(self._extract_mpd_formats(
+ playlist['playbackUrl'], video_id, mpd_id='dash',
+ fatal=False, headers=headers))
+ elif playlist['mimeType'] == 'application/vnd.ms-sstr+xml':
+ formats.extend(self._extract_ism_formats(
+ playlist['playbackUrl'], video_id, ism_id='mss',
+ fatal=False, headers=headers))
+ formats = [merge_dicts(f, {'language': language}) for f in formats]
+
+ return {
+ 'id': video_id,
+ 'title': video_data['name'],
+ 'description': video_data.get('description'),
+ 'uploader': try_get(video_data, lambda x: x['creator']['name'], str),
+ 'uploader_id': try_get(video_data, (lambda x: x['creator']['mail'],
+ lambda x: x['creator']['id']), str),
+ 'thumbnails': thumbnails,
+ **self.extract_all_subtitles(api_url, video_id, headers),
+ 'timestamp': parse_iso8601(video_data.get('created')),
+ 'duration': parse_duration(try_get(video_data, lambda x: x['media']['duration'])),
+ 'webpage_url': f'https://web.microsoftstream.com/video/{video_id}',
+ 'view_count': try_get(video_data, lambda x: x['metrics']['views'], int),
+ 'like_count': try_get(video_data, lambda x: x['metrics']['likes'], int),
+ 'comment_count': try_get(video_data, lambda x: x['metrics']['comments'], int),
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/microsoftvirtualacademy.py b/yt_dlp/extractor/microsoftvirtualacademy.py
new file mode 100644
index 0000000..b759b18
--- /dev/null
+++ b/yt_dlp/extractor/microsoftvirtualacademy.py
@@ -0,0 +1,189 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_duration,
+ smuggle_url,
+ unsmuggle_url,
+ xpath_text,
+)
+
+
+class MicrosoftVirtualAcademyBaseIE(InfoExtractor):
+ def _extract_base_url(self, course_id, display_id):
+ return self._download_json(
+ 'https://api-mlxprod.microsoft.com/services/products/anonymous/%s' % course_id,
+ display_id, 'Downloading course base URL')
+
+ def _extract_chapter_and_title(self, title):
+ if not title:
+ return None, None
+ m = re.search(r'(?P<chapter>\d+)\s*\|\s*(?P<title>.+)', title)
+ return (int(m.group('chapter')), m.group('title')) if m else (None, title)
+
+
+class MicrosoftVirtualAcademyIE(MicrosoftVirtualAcademyBaseIE):
+ IE_NAME = 'mva'
+ IE_DESC = 'Microsoft Virtual Academy videos'
+ _VALID_URL = r'(?:%s:|https?://(?:mva\.microsoft|(?:www\.)?microsoftvirtualacademy)\.com/[^/]+/training-courses/[^/?#&]+-)(?P<course_id>\d+)(?::|\?l=)(?P<id>[\da-zA-Z]+_\d+)' % IE_NAME
+
+ _TESTS = [{
+ 'url': 'https://mva.microsoft.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788?l=gfVXISmEB_6804984382',
+ 'md5': '7826c44fc31678b12ad8db11f6b5abb9',
+ 'info_dict': {
+ 'id': 'gfVXISmEB_6804984382',
+ 'ext': 'mp4',
+ 'title': 'Course Introduction',
+ 'formats': 'mincount:3',
+ 'subtitles': {
+ 'en': [{
+ 'ext': 'ttml',
+ }],
+ },
+ }
+ }, {
+ 'url': 'mva:11788:gfVXISmEB_6804984382',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+
+ mobj = self._match_valid_url(url)
+ course_id = mobj.group('course_id')
+ video_id = mobj.group('id')
+
+ base_url = smuggled_data.get('base_url') or self._extract_base_url(course_id, video_id)
+
+ settings = self._download_xml(
+ '%s/content/content_%s/videosettings.xml?v=1' % (base_url, video_id),
+ video_id, 'Downloading video settings XML')
+
+ _, title = self._extract_chapter_and_title(xpath_text(
+ settings, './/Title', 'title', fatal=True))
+
+ formats = []
+
+ for sources in settings.findall('.//MediaSources'):
+ sources_type = sources.get('videoType')
+ for source in sources.findall('./MediaSource'):
+ video_url = source.text
+ if not video_url or not video_url.startswith('http'):
+ continue
+ if sources_type == 'smoothstreaming':
+ formats.extend(self._extract_ism_formats(
+ video_url, video_id, 'mss', fatal=False))
+ continue
+ video_mode = source.get('videoMode')
+ height = int_or_none(self._search_regex(
+ r'^(\d+)[pP]$', video_mode or '', 'height', default=None))
+ codec = source.get('codec')
+ acodec, vcodec = [None] * 2
+ if codec:
+ codecs = codec.split(',')
+ if len(codecs) == 2:
+ acodec, vcodec = codecs
+ elif len(codecs) == 1:
+ vcodec = codecs[0]
+ formats.append({
+ 'url': video_url,
+ 'format_id': video_mode,
+ 'height': height,
+ 'acodec': acodec,
+ 'vcodec': vcodec,
+ })
+
+ subtitles = {}
+ for source in settings.findall('.//MarkerResourceSource'):
+ subtitle_url = source.text
+ if not subtitle_url:
+ continue
+ subtitles.setdefault('en', []).append({
+ 'url': '%s/%s' % (base_url, subtitle_url),
+ 'ext': source.get('type'),
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'subtitles': subtitles,
+ 'formats': formats
+ }
+
+
+class MicrosoftVirtualAcademyCourseIE(MicrosoftVirtualAcademyBaseIE):
+ IE_NAME = 'mva:course'
+ IE_DESC = 'Microsoft Virtual Academy courses'
+ _VALID_URL = r'(?:%s:|https?://(?:mva\.microsoft|(?:www\.)?microsoftvirtualacademy)\.com/[^/]+/training-courses/(?P<display_id>[^/?#&]+)-)(?P<id>\d+)' % IE_NAME
+
+ _TESTS = [{
+ 'url': 'https://mva.microsoft.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788',
+ 'info_dict': {
+ 'id': '11788',
+ 'title': 'Microsoft Azure Fundamentals: Virtual Machines',
+ },
+ 'playlist_count': 36,
+ }, {
+ # with emphasized chapters
+ 'url': 'https://mva.microsoft.com/en-US/training-courses/developing-windows-10-games-with-construct-2-16335',
+ 'info_dict': {
+ 'id': '16335',
+ 'title': 'Developing Windows 10 Games with Construct 2',
+ },
+ 'playlist_count': 10,
+ }, {
+ 'url': 'https://www.microsoftvirtualacademy.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788',
+ 'only_matching': True,
+ }, {
+ 'url': 'mva:course:11788',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if MicrosoftVirtualAcademyIE.suitable(url) else super(
+ MicrosoftVirtualAcademyCourseIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ course_id = mobj.group('id')
+ display_id = mobj.group('display_id')
+
+ base_url = self._extract_base_url(course_id, display_id)
+
+ manifest = self._download_json(
+ '%s/imsmanifestlite.json' % base_url,
+ display_id, 'Downloading course manifest JSON')['manifest']
+
+ organization = manifest['organizations']['organization'][0]
+
+ entries = []
+ for chapter in organization['item']:
+ chapter_number, chapter_title = self._extract_chapter_and_title(chapter.get('title'))
+ chapter_id = chapter.get('@identifier')
+ for item in chapter.get('item', []):
+ item_id = item.get('@identifier')
+ if not item_id:
+ continue
+ metadata = item.get('resource', {}).get('metadata') or {}
+ if metadata.get('learningresourcetype') != 'Video':
+ continue
+ _, title = self._extract_chapter_and_title(item.get('title'))
+ duration = parse_duration(metadata.get('duration'))
+ description = metadata.get('description')
+ entries.append({
+ '_type': 'url_transparent',
+ 'url': smuggle_url(
+ 'mva:%s:%s' % (course_id, item_id), {'base_url': base_url}),
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'chapter': chapter_title,
+ 'chapter_number': chapter_number,
+ 'chapter_id': chapter_id,
+ })
+
+ title = organization.get('title') or manifest.get('metadata', {}).get('title')
+
+ return self.playlist_result(entries, course_id, title)
diff --git a/yt_dlp/extractor/mildom.py b/yt_dlp/extractor/mildom.py
new file mode 100644
index 0000000..f64d575
--- /dev/null
+++ b/yt_dlp/extractor/mildom.py
@@ -0,0 +1,291 @@
+import functools
+import json
+import uuid
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ dict_get,
+ ExtractorError,
+ float_or_none,
+ OnDemandPagedList,
+ traverse_obj,
+)
+
+
+class MildomBaseIE(InfoExtractor):
+ _GUEST_ID = None
+
+ def _call_api(self, url, video_id, query=None, note='Downloading JSON metadata', body=None):
+ if not self._GUEST_ID:
+ self._GUEST_ID = f'pc-gp-{str(uuid.uuid4())}'
+
+ content = self._download_json(
+ url, video_id, note=note, data=json.dumps(body).encode() if body else None,
+ headers={'Content-Type': 'application/json'} if body else {},
+ query={
+ '__guest_id': self._GUEST_ID,
+ '__platform': 'web',
+ **(query or {}),
+ })
+
+ if content['code'] != 0:
+ raise ExtractorError(
+ f'Mildom says: {content["message"]} (code {content["code"]})',
+ expected=True)
+ return content['body']
+
+
+class MildomIE(MildomBaseIE):
+ IE_NAME = 'mildom'
+ IE_DESC = 'Record ongoing live by specific user in Mildom'
+ _VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/(?P<id>\d+)'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(f'https://www.mildom.com/{video_id}', video_id)
+
+ enterstudio = self._call_api(
+ 'https://cloudac.mildom.com/nonolive/gappserv/live/enterstudio', video_id,
+ note='Downloading live metadata', query={'user_id': video_id})
+ result_video_id = enterstudio.get('log_id', video_id)
+
+ servers = self._call_api(
+ 'https://cloudac.mildom.com/nonolive/gappserv/live/liveserver', result_video_id,
+ note='Downloading live server list', query={
+ 'user_id': video_id,
+ 'live_server_type': 'hls',
+ })
+
+ playback_token = self._call_api(
+ 'https://cloudac.mildom.com/nonolive/gappserv/live/token', result_video_id,
+ note='Obtaining live playback token', body={'host_id': video_id, 'type': 'hls'})
+ playback_token = traverse_obj(playback_token, ('data', ..., 'token'), get_all=False)
+ if not playback_token:
+ raise ExtractorError('Failed to obtain live playback token')
+
+ formats = self._extract_m3u8_formats(
+ f'{servers["stream_server"]}/{video_id}_master.m3u8?{playback_token}',
+ result_video_id, 'mp4', headers={
+ 'Referer': 'https://www.mildom.com/',
+ 'Origin': 'https://www.mildom.com',
+ })
+
+ for fmt in formats:
+ fmt.setdefault('http_headers', {})['Referer'] = 'https://www.mildom.com/'
+
+ return {
+ 'id': result_video_id,
+ 'title': self._html_search_meta('twitter:description', webpage, default=None) or traverse_obj(enterstudio, 'anchor_intro'),
+ 'description': traverse_obj(enterstudio, 'intro', 'live_intro', expected_type=str),
+ 'timestamp': float_or_none(enterstudio.get('live_start_ms'), scale=1000),
+ 'uploader': self._html_search_meta('twitter:title', webpage, default=None) or traverse_obj(enterstudio, 'loginname'),
+ 'uploader_id': video_id,
+ 'formats': formats,
+ 'is_live': True,
+ }
+
+
+class MildomVodIE(MildomBaseIE):
+ IE_NAME = 'mildom:vod'
+ IE_DESC = 'VOD in Mildom'
+ _VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/playback/(?P<user_id>\d+)/(?P<id>(?P=user_id)-[a-zA-Z0-9]+-?[0-9]*)'
+ _TESTS = [{
+ 'url': 'https://www.mildom.com/playback/10882672/10882672-1597662269',
+ 'info_dict': {
+ 'id': '10882672-1597662269',
+ 'ext': 'mp4',
+ 'title': '始めてのミルダム配信じゃぃ!',
+ 'thumbnail': r're:^https?://.*\.(png|jpg)$',
+ 'upload_date': '20200817',
+ 'duration': 4138.37,
+ 'description': 'ゲームをしたくて!',
+ 'timestamp': 1597662269.0,
+ 'uploader_id': '10882672',
+ 'uploader': 'kson組長(けいそん)',
+ },
+ }, {
+ 'url': 'https://www.mildom.com/playback/10882672/10882672-1597758589870-477',
+ 'info_dict': {
+ 'id': '10882672-1597758589870-477',
+ 'ext': 'mp4',
+ 'title': '【kson】感染メイズ!麻酔銃で無双する',
+ 'thumbnail': r're:^https?://.*\.(png|jpg)$',
+ 'timestamp': 1597759093.0,
+ 'uploader': 'kson組長(けいそん)',
+ 'duration': 4302.58,
+ 'uploader_id': '10882672',
+ 'description': 'このステージ絶対乗り越えたい',
+ 'upload_date': '20200818',
+ },
+ }, {
+ 'url': 'https://www.mildom.com/playback/10882672/10882672-buha9td2lrn97fk2jme0',
+ 'info_dict': {
+ 'id': '10882672-buha9td2lrn97fk2jme0',
+ 'ext': 'mp4',
+ 'title': '【kson組長】CART RACER!!!',
+ 'thumbnail': r're:^https?://.*\.(png|jpg)$',
+ 'uploader_id': '10882672',
+ 'uploader': 'kson組長(けいそん)',
+ 'upload_date': '20201104',
+ 'timestamp': 1604494797.0,
+ 'duration': 4657.25,
+ 'description': 'WTF',
+ },
+ }]
+
+ def _real_extract(self, url):
+ user_id, video_id = self._match_valid_url(url).group('user_id', 'id')
+ webpage = self._download_webpage(f'https://www.mildom.com/playback/{user_id}/{video_id}', video_id)
+
+ autoplay = self._call_api(
+ 'https://cloudac.mildom.com/nonolive/videocontent/playback/getPlaybackDetail', video_id,
+ note='Downloading playback metadata', query={
+ 'v_id': video_id,
+ })['playback']
+
+ formats = [{
+ 'url': autoplay['audio_url'],
+ 'format_id': 'audio',
+ 'protocol': 'm3u8_native',
+ 'vcodec': 'none',
+ 'acodec': 'aac',
+ 'ext': 'm4a'
+ }]
+ for fmt in autoplay['video_link']:
+ formats.append({
+ 'format_id': 'video-%s' % fmt['name'],
+ 'url': fmt['url'],
+ 'protocol': 'm3u8_native',
+ 'width': fmt['level'] * autoplay['video_width'] // autoplay['video_height'],
+ 'height': fmt['level'],
+ 'vcodec': 'h264',
+ 'acodec': 'aac',
+ 'ext': 'mp4'
+ })
+
+ return {
+ 'id': video_id,
+ 'title': self._html_search_meta(('og:description', 'description'), webpage, default=None) or autoplay.get('title'),
+ 'description': traverse_obj(autoplay, 'video_intro'),
+ 'timestamp': float_or_none(autoplay.get('publish_time'), scale=1000),
+ 'duration': float_or_none(autoplay.get('video_length'), scale=1000),
+ 'thumbnail': dict_get(autoplay, ('upload_pic', 'video_pic')),
+ 'uploader': traverse_obj(autoplay, ('author_info', 'login_name')),
+ 'uploader_id': user_id,
+ 'formats': formats,
+ }
+
+
+class MildomClipIE(MildomBaseIE):
+ IE_NAME = 'mildom:clip'
+ IE_DESC = 'Clip in Mildom'
+ _VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/clip/(?P<id>(?P<user_id>\d+)-[a-zA-Z0-9]+)'
+ _TESTS = [{
+ 'url': 'https://www.mildom.com/clip/10042245-63921673e7b147ebb0806d42b5ba5ce9',
+ 'info_dict': {
+ 'id': '10042245-63921673e7b147ebb0806d42b5ba5ce9',
+ 'title': '全然違ったよ',
+ 'timestamp': 1619181890,
+ 'duration': 59,
+ 'thumbnail': r're:https?://.+',
+ 'uploader': 'ざきんぽ',
+ 'uploader_id': '10042245',
+ },
+ }, {
+ 'url': 'https://www.mildom.com/clip/10111524-ebf4036e5aa8411c99fb3a1ae0902864',
+ 'info_dict': {
+ 'id': '10111524-ebf4036e5aa8411c99fb3a1ae0902864',
+ 'title': 'かっこいい',
+ 'timestamp': 1621094003,
+ 'duration': 59,
+ 'thumbnail': r're:https?://.+',
+ 'uploader': '(ルーキー',
+ 'uploader_id': '10111524',
+ },
+ }, {
+ 'url': 'https://www.mildom.com/clip/10660174-2c539e6e277c4aaeb4b1fbe8d22cb902',
+ 'info_dict': {
+ 'id': '10660174-2c539e6e277c4aaeb4b1fbe8d22cb902',
+ 'title': 'あ',
+ 'timestamp': 1614769431,
+ 'duration': 31,
+ 'thumbnail': r're:https?://.+',
+ 'uploader': 'ドルゴルスレンギーン=ダグワドルジ',
+ 'uploader_id': '10660174',
+ },
+ }]
+
+ def _real_extract(self, url):
+ user_id, video_id = self._match_valid_url(url).group('user_id', 'id')
+ webpage = self._download_webpage(f'https://www.mildom.com/clip/{video_id}', video_id)
+
+ clip_detail = self._call_api(
+ 'https://cloudac-cf-jp.mildom.com/nonolive/videocontent/clip/detail', video_id,
+ note='Downloading playback metadata', query={
+ 'clip_id': video_id,
+ })
+
+ return {
+ 'id': video_id,
+ 'title': self._html_search_meta(
+ ('og:description', 'description'), webpage, default=None) or clip_detail.get('title'),
+ 'timestamp': float_or_none(clip_detail.get('create_time')),
+ 'duration': float_or_none(clip_detail.get('length')),
+ 'thumbnail': clip_detail.get('cover'),
+ 'uploader': traverse_obj(clip_detail, ('user_info', 'loginname')),
+ 'uploader_id': user_id,
+
+ 'url': clip_detail['url'],
+ 'ext': determine_ext(clip_detail.get('url'), 'mp4'),
+ }
+
+
+class MildomUserVodIE(MildomBaseIE):
+ IE_NAME = 'mildom:user:vod'
+ IE_DESC = 'Download all VODs from specific user in Mildom'
+ _VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/profile/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.mildom.com/profile/10093333',
+ 'info_dict': {
+ 'id': '10093333',
+ 'title': 'Uploads from ねこばたけ',
+ },
+ 'playlist_mincount': 732,
+ }, {
+ 'url': 'https://www.mildom.com/profile/10882672',
+ 'info_dict': {
+ 'id': '10882672',
+ 'title': 'Uploads from kson組長(けいそん)',
+ },
+ 'playlist_mincount': 201,
+ }]
+
+ def _fetch_page(self, user_id, page):
+ page += 1
+ reply = self._call_api(
+ 'https://cloudac.mildom.com/nonolive/videocontent/profile/playbackList',
+ user_id, note=f'Downloading page {page}', query={
+ 'user_id': user_id,
+ 'page': page,
+ 'limit': '30',
+ })
+ if not reply:
+ return
+ for x in reply:
+ v_id = x.get('v_id')
+ if not v_id:
+ continue
+ yield self.url_result(f'https://www.mildom.com/playback/{user_id}/{v_id}')
+
+ def _real_extract(self, url):
+ user_id = self._match_id(url)
+ self.to_screen('This will download all VODs belonging to user. To download ongoing live video, use "https://www.mildom.com/%s" instead' % user_id)
+
+ profile = self._call_api(
+ 'https://cloudac.mildom.com/nonolive/gappserv/user/profileV2', user_id,
+ query={'user_id': user_id}, note='Downloading user profile')['user_info']
+
+ return self.playlist_result(
+ OnDemandPagedList(functools.partial(self._fetch_page, user_id), 30),
+ user_id, f'Uploads from {profile["loginname"]}')
diff --git a/yt_dlp/extractor/minds.py b/yt_dlp/extractor/minds.py
new file mode 100644
index 0000000..27a6e38
--- /dev/null
+++ b/yt_dlp/extractor/minds.py
@@ -0,0 +1,193 @@
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ clean_html,
+ format_field,
+ int_or_none,
+ str_or_none,
+ strip_or_none,
+)
+
+
+class MindsBaseIE(InfoExtractor):
+ _VALID_URL_BASE = r'https?://(?:www\.)?minds\.com/'
+
+ def _call_api(self, path, video_id, resource, query=None):
+ api_url = 'https://www.minds.com/api/' + path
+ token = self._get_cookies(api_url).get('XSRF-TOKEN')
+ return self._download_json(
+ api_url, video_id, 'Downloading %s JSON metadata' % resource, headers={
+ 'Referer': 'https://www.minds.com/',
+ 'X-XSRF-TOKEN': token.value if token else '',
+ }, query=query)
+
+
+class MindsIE(MindsBaseIE):
+ IE_NAME = 'minds'
+ _VALID_URL = MindsBaseIE._VALID_URL_BASE + r'(?:media|newsfeed|archive/view)/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://www.minds.com/media/100000000000086822',
+ 'md5': '215a658184a419764852239d4970b045',
+ 'info_dict': {
+ 'id': '100000000000086822',
+ 'ext': 'mp4',
+ 'title': 'Minds intro sequence',
+ 'thumbnail': r're:https?://.+\.png',
+ 'uploader_id': 'ottman',
+ 'upload_date': '20130524',
+ 'timestamp': 1369404826,
+ 'uploader': 'Bill Ottman',
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'tags': ['animation'],
+ 'comment_count': int,
+ 'license': 'attribution-cc',
+ },
+ }, {
+ # entity.type == 'activity' and empty title
+ 'url': 'https://www.minds.com/newsfeed/798025111988506624',
+ 'md5': 'b2733a74af78d7fd3f541c4cbbaa5950',
+ 'info_dict': {
+ 'id': '798022190320226304',
+ 'ext': 'mp4',
+ 'title': '798022190320226304',
+ 'uploader': 'ColinFlaherty',
+ 'upload_date': '20180111',
+ 'timestamp': 1515639316,
+ 'uploader_id': 'ColinFlaherty',
+ },
+ }, {
+ 'url': 'https://www.minds.com/archive/view/715172106794442752',
+ 'only_matching': True,
+ }, {
+ # youtube perma_url
+ 'url': 'https://www.minds.com/newsfeed/1197131838022602752',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ entity_id = self._match_id(url)
+ entity = self._call_api(
+ 'v1/entities/entity/' + entity_id, entity_id, 'entity')['entity']
+ if entity.get('type') == 'activity':
+ if entity.get('custom_type') == 'video':
+ video_id = entity['entity_guid']
+ else:
+ return self.url_result(entity['perma_url'])
+ else:
+ assert entity['subtype'] == 'video'
+ video_id = entity_id
+ # 1080p and webm formats available only on the sources array
+ video = self._call_api(
+ 'v2/media/video/' + video_id, video_id, 'video')
+
+ formats = []
+ for source in (video.get('sources') or []):
+ src = source.get('src')
+ if not src:
+ continue
+ formats.append({
+ 'format_id': source.get('label'),
+ 'height': int_or_none(source.get('size')),
+ 'url': src,
+ })
+
+ entity = video.get('entity') or entity
+ owner = entity.get('ownerObj') or {}
+ uploader_id = owner.get('username')
+
+ tags = entity.get('tags')
+ if tags and isinstance(tags, compat_str):
+ tags = [tags]
+
+ thumbnail = None
+ poster = video.get('poster') or entity.get('thumbnail_src')
+ if poster:
+ urlh = self._request_webpage(poster, video_id, fatal=False)
+ if urlh:
+ thumbnail = urlh.url
+
+ return {
+ 'id': video_id,
+ 'title': entity.get('title') or video_id,
+ 'formats': formats,
+ 'description': clean_html(entity.get('description')) or None,
+ 'license': str_or_none(entity.get('license')),
+ 'timestamp': int_or_none(entity.get('time_created')),
+ 'uploader': strip_or_none(owner.get('name')),
+ 'uploader_id': uploader_id,
+ 'uploader_url': format_field(uploader_id, None, 'https://www.minds.com/%s'),
+ 'view_count': int_or_none(entity.get('play:count')),
+ 'like_count': int_or_none(entity.get('thumbs:up:count')),
+ 'dislike_count': int_or_none(entity.get('thumbs:down:count')),
+ 'tags': tags,
+ 'comment_count': int_or_none(entity.get('comments:count')),
+ 'thumbnail': thumbnail,
+ }
+
+
+class MindsFeedBaseIE(MindsBaseIE):
+ _PAGE_SIZE = 150
+
+ def _entries(self, feed_id):
+ query = {'limit': self._PAGE_SIZE, 'sync': 1}
+ i = 1
+ while True:
+ data = self._call_api(
+ 'v2/feeds/container/%s/videos' % feed_id,
+ feed_id, 'page %s' % i, query)
+ entities = data.get('entities') or []
+ for entity in entities:
+ guid = entity.get('guid')
+ if not guid:
+ continue
+ yield self.url_result(
+ 'https://www.minds.com/newsfeed/' + guid,
+ MindsIE.ie_key(), guid)
+ query['from_timestamp'] = data['load-next']
+ if not (query['from_timestamp'] and len(entities) == self._PAGE_SIZE):
+ break
+ i += 1
+
+ def _real_extract(self, url):
+ feed_id = self._match_id(url)
+ feed = self._call_api(
+ 'v1/%s/%s' % (self._FEED_PATH, feed_id),
+ feed_id, self._FEED_TYPE)[self._FEED_TYPE]
+
+ return self.playlist_result(
+ self._entries(feed['guid']), feed_id,
+ strip_or_none(feed.get('name')),
+ feed.get('briefdescription'))
+
+
+class MindsChannelIE(MindsFeedBaseIE):
+ _FEED_TYPE = 'channel'
+ IE_NAME = 'minds:' + _FEED_TYPE
+ _VALID_URL = MindsBaseIE._VALID_URL_BASE + r'(?!(?:newsfeed|media|api|archive|groups)/)(?P<id>[^/?&#]+)'
+ _FEED_PATH = 'channel'
+ _TEST = {
+ 'url': 'https://www.minds.com/ottman',
+ 'info_dict': {
+ 'id': 'ottman',
+ 'title': 'Bill Ottman',
+ 'description': 'Co-creator & CEO @minds',
+ },
+ 'playlist_mincount': 54,
+ }
+
+
+class MindsGroupIE(MindsFeedBaseIE):
+ _FEED_TYPE = 'group'
+ IE_NAME = 'minds:' + _FEED_TYPE
+ _VALID_URL = MindsBaseIE._VALID_URL_BASE + r'groups/profile/(?P<id>[0-9]+)'
+ _FEED_PATH = 'groups/group'
+ _TEST = {
+ 'url': 'https://www.minds.com/groups/profile/785582576369672204/feed/videos',
+ 'info_dict': {
+ 'id': '785582576369672204',
+ 'title': 'Cooking Videos',
+ },
+ 'playlist_mincount': 1,
+ }
diff --git a/yt_dlp/extractor/minoto.py b/yt_dlp/extractor/minoto.py
new file mode 100644
index 0000000..032bf3b
--- /dev/null
+++ b/yt_dlp/extractor/minoto.py
@@ -0,0 +1,45 @@
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_codecs,
+)
+
+
+class MinotoIE(InfoExtractor):
+ _VALID_URL = r'(?:minoto:|https?://(?:play|iframe|embed)\.minoto-video\.com/(?P<player_id>[0-9]+)/)(?P<id>[a-zA-Z0-9]+)'
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ player_id = mobj.group('player_id') or '1'
+ video_id = mobj.group('id')
+ video_data = self._download_json('http://play.minoto-video.com/%s/%s.js' % (player_id, video_id), video_id)
+ video_metadata = video_data['video-metadata']
+ formats = []
+ for fmt in video_data['video-files']:
+ fmt_url = fmt.get('url')
+ if not fmt_url:
+ continue
+ container = fmt.get('container')
+ if container == 'hls':
+ formats.extend(self._extract_m3u8_formats(fmt_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
+ else:
+ fmt_profile = fmt.get('profile') or {}
+ formats.append({
+ 'format_id': fmt_profile.get('name-short'),
+ 'format_note': fmt_profile.get('name'),
+ 'url': fmt_url,
+ 'container': container,
+ 'tbr': int_or_none(fmt.get('bitrate')),
+ 'filesize': int_or_none(fmt.get('filesize')),
+ 'width': int_or_none(fmt.get('width')),
+ 'height': int_or_none(fmt.get('height')),
+ **parse_codecs(fmt.get('codecs')),
+ })
+
+ return {
+ 'id': video_id,
+ 'title': video_metadata['title'],
+ 'description': video_metadata.get('description'),
+ 'thumbnail': video_metadata.get('video-poster', {}).get('url'),
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/mirrativ.py b/yt_dlp/extractor/mirrativ.py
new file mode 100644
index 0000000..0a8ee0c
--- /dev/null
+++ b/yt_dlp/extractor/mirrativ.py
@@ -0,0 +1,118 @@
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ dict_get,
+ traverse_obj,
+ try_get,
+)
+
+
+class MirrativBaseIE(InfoExtractor):
+ def assert_error(self, response):
+ error_message = traverse_obj(response, ('status', 'error'))
+ if error_message:
+ raise ExtractorError('Mirrativ says: %s' % error_message, expected=True)
+
+
+class MirrativIE(MirrativBaseIE):
+ IE_NAME = 'mirrativ'
+ _VALID_URL = r'https?://(?:www\.)?mirrativ\.com/live/(?P<id>[^/?#&]+)'
+
+ TESTS = [{
+ 'url': 'https://mirrativ.com/live/UQomuS7EMgHoxRHjEhNiHw',
+ 'info_dict': {
+ 'id': 'UQomuS7EMgHoxRHjEhNiHw',
+ 'title': 'ねむいぃ、。『参加型』🔰jcが初めてやるCOD✨初見さん大歓迎💗',
+ 'is_live': True,
+ 'description': 'md5:bfcd8f77f2fab24c3c672e5620f3f16e',
+ 'thumbnail': r're:https?://.+',
+ 'uploader': '# あ ち ゅ 。💡',
+ 'uploader_id': '118572165',
+ 'duration': None,
+ 'view_count': 1241,
+ 'release_timestamp': 1646229192,
+ 'timestamp': 1646229167,
+ 'was_live': False,
+ },
+ 'skip': 'livestream',
+ }, {
+ 'url': 'https://mirrativ.com/live/POxyuG1KmW2982lqlDTuPw',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage('https://www.mirrativ.com/live/%s' % video_id, video_id)
+ live_response = self._download_json(f'https://www.mirrativ.com/api/live/live?live_id={video_id}', video_id)
+ self.assert_error(live_response)
+
+ hls_url = dict_get(live_response, ('archive_url_hls', 'streaming_url_hls'))
+ is_live = bool(live_response.get('is_live'))
+ if not hls_url:
+ raise ExtractorError('Neither archive nor live is available.', expected=True)
+
+ formats = self._extract_m3u8_formats(
+ hls_url, video_id,
+ ext='mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', live=is_live)
+
+ return {
+ 'id': video_id,
+ 'title': self._og_search_title(webpage, default=None) or self._search_regex(
+ r'<title>\s*(.+?) - Mirrativ\s*</title>', webpage) or live_response.get('title'),
+ 'is_live': is_live,
+ 'description': live_response.get('description'),
+ 'formats': formats,
+ 'thumbnail': live_response.get('image_url'),
+ 'uploader': traverse_obj(live_response, ('owner', 'name')),
+ 'uploader_id': traverse_obj(live_response, ('owner', 'user_id')),
+ 'duration': try_get(live_response, lambda x: x['ended_at'] - x['started_at']) if not is_live else None,
+ 'view_count': live_response.get('total_viewer_num'),
+ 'release_timestamp': live_response.get('started_at'),
+ 'timestamp': live_response.get('created_at'),
+ 'was_live': bool(live_response.get('is_archive')),
+ }
+
+
+class MirrativUserIE(MirrativBaseIE):
+ IE_NAME = 'mirrativ:user'
+ _VALID_URL = r'https?://(?:www\.)?mirrativ\.com/user/(?P<id>\d+)'
+
+ _TESTS = [{
+ # Live archive is available up to 3 days
+ # see: https://helpfeel.com/mirrativ/%E9%8C%B2%E7%94%BB-5e26d3ad7b59ef0017fb49ac (Japanese)
+ 'url': 'https://www.mirrativ.com/user/110943130',
+ 'note': 'multiple archives available',
+ 'only_matching': True,
+ }]
+
+ def _entries(self, user_id):
+ page = 1
+ while page is not None:
+ api_response = self._download_json(
+ f'https://www.mirrativ.com/api/live/live_history?user_id={user_id}&page={page}', user_id,
+ note=f'Downloading page {page}')
+ self.assert_error(api_response)
+ lives = api_response.get('lives')
+ if not lives:
+ break
+ for live in lives:
+ if not live.get('is_archive') and not live.get('is_live'):
+ # neither archive nor live is available, so skip it
+ # or the service will ban your IP address for a while
+ continue
+ live_id = live.get('live_id')
+ url = 'https://www.mirrativ.com/live/%s' % live_id
+ yield self.url_result(url, video_id=live_id, video_title=live.get('title'))
+ page = api_response.get('next_page')
+
+ def _real_extract(self, url):
+ user_id = self._match_id(url)
+ user_info = self._download_json(
+ f'https://www.mirrativ.com/api/user/profile?user_id={user_id}', user_id,
+ note='Downloading user info', fatal=False)
+ self.assert_error(user_info)
+
+ return self.playlist_result(
+ self._entries(user_id), user_id,
+ user_info.get('name'), user_info.get('description'))
diff --git a/yt_dlp/extractor/mirrorcouk.py b/yt_dlp/extractor/mirrorcouk.py
new file mode 100644
index 0000000..7b4f95b
--- /dev/null
+++ b/yt_dlp/extractor/mirrorcouk.py
@@ -0,0 +1,98 @@
+from .common import InfoExtractor
+from ..utils import unescapeHTML
+
+
+class MirrorCoUKIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?mirror\.co\.uk/[/+[\w-]+-(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.mirror.co.uk/tv/tv-news/love-island-fans-baffled-after-27163139',
+ 'info_dict': {
+ 'id': 'voyyS7SV',
+ 'ext': 'mp4',
+ 'title': 'Love Island: Gemma Owen enters the villa',
+ 'description': 'Love Island: Michael Owen\'s daughter Gemma Owen enters the villa.',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/voyyS7SV/poster.jpg?width=720',
+ 'display_id': '27163139',
+ 'timestamp': 1654547895,
+ 'duration': 57.0,
+ 'upload_date': '20220606',
+ },
+ }, {
+ 'url': 'https://www.mirror.co.uk/3am/celebrity-news/michael-jacksons-son-blankets-new-25344890',
+ 'info_dict': {
+ 'id': 'jyXpdvxp',
+ 'ext': 'mp4',
+ 'title': 'Michael Jackson’s son Bigi calls for action on climate change',
+ 'description': 'md5:d39ceaba2b7a615b4ca6557e7bc40222',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/jyXpdvxp/poster.jpg?width=720',
+ 'display_id': '25344890',
+ 'timestamp': 1635749907,
+ 'duration': 56.0,
+ 'upload_date': '20211101',
+ },
+ }, {
+ 'url': 'https://www.mirror.co.uk/sport/football/news/antonio-conte-next-tottenham-manager-25346042',
+ 'info_dict': {
+ 'id': 'q6FkKa4p',
+ 'ext': 'mp4',
+ 'title': 'Nuno sacked by Tottenham after fifth Premier League defeat of the season',
+ 'description': 'Nuno Espirito Santo has been sacked as Tottenham boss after only four months in charge.',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/q6FkKa4p/poster.jpg?width=720',
+ 'display_id': '25346042',
+ 'timestamp': 1635763157,
+ 'duration': 40.0,
+ 'upload_date': '20211101',
+ },
+ }, {
+ 'url': 'https://www.mirror.co.uk/3am/celebrity-news/johnny-depp-splashes-50k-curry-27160737',
+ 'info_dict': {
+ 'id': 'IT0oa1nH',
+ 'ext': 'mp4',
+ 'title': 'Johnny Depp Leaves The Grand Hotel in Birmingham',
+ 'description': 'Johnny Depp Leaves The Grand Hotel in Birmingham.',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/IT0oa1nH/poster.jpg?width=720',
+ 'display_id': '27160737',
+ 'timestamp': 1654524120,
+ 'duration': 65.0,
+ 'upload_date': '20220606',
+ },
+ }, {
+ 'url': 'https://www.mirror.co.uk/tv/tv-news/love-islands-liam-could-first-27162602',
+ 'info_dict': {
+ 'id': 'EaPr5Z2j',
+ 'ext': 'mp4',
+ 'title': 'Love Island: Davide reveals plot twist after receiving text',
+ 'description': 'Love Island: Davide reveals plot twist after receiving text',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/EaPr5Z2j/poster.jpg?width=720',
+ 'display_id': '27162602',
+ 'timestamp': 1654552597,
+ 'duration': 23.0,
+ 'upload_date': '20220606',
+ },
+ }, {
+ 'url': 'https://www.mirror.co.uk/news/uk-news/william-kate-sent-message-george-27160572',
+ 'info_dict': {
+ 'id': 'ygtceXIu',
+ 'ext': 'mp4',
+ 'title': 'Prince William and Kate arrive in Wales with George and Charlotte',
+ 'description': 'Prince William and Kate Middleton arrive in Wales with children Prince George and Princess Charlotte.',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/ygtceXIu/poster.jpg?width=720',
+ 'display_id': '27160572',
+ 'timestamp': 1654349678,
+ 'duration': 106.0,
+ 'upload_date': '20220604',
+ },
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ data = self._search_json(r'div\s+class="json-placeholder"\s+data-json="',
+ webpage, 'data', display_id, transform_source=unescapeHTML)['videoData']
+
+ return {
+ '_type': 'url_transparent',
+ 'url': f'jwplatform:{data["videoId"]}',
+ 'ie_key': 'JWPlatform',
+ 'display_id': display_id,
+ }
diff --git a/yt_dlp/extractor/mit.py b/yt_dlp/extractor/mit.py
new file mode 100644
index 0000000..38cc0c2
--- /dev/null
+++ b/yt_dlp/extractor/mit.py
@@ -0,0 +1,130 @@
+import re
+import json
+
+from .common import InfoExtractor
+from .youtube import YoutubeIE
+from ..utils import (
+ clean_html,
+ ExtractorError,
+ get_element_by_id,
+)
+
+
+class TechTVMITIE(InfoExtractor):
+ IE_NAME = 'techtv.mit.edu'
+ _VALID_URL = r'https?://techtv\.mit\.edu/(?:videos|embeds)/(?P<id>\d+)'
+
+ _TEST = {
+ 'url': 'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set',
+ 'md5': '00a3a27ee20d44bcaa0933ccec4a2cf7',
+ 'info_dict': {
+ 'id': '25418',
+ 'ext': 'mp4',
+ 'title': 'MIT DNA and Protein Sets',
+ 'description': 'md5:46f5c69ce434f0a97e7c628cc142802d',
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ raw_page = self._download_webpage(
+ 'http://techtv.mit.edu/videos/%s' % video_id, video_id)
+ clean_page = re.compile(r'<!--.*?-->', re.S).sub('', raw_page)
+
+ base_url = self._proto_relative_url(self._search_regex(
+ r'ipadUrl: \'(.+?cloudfront.net/)', raw_page, 'base url'), 'http:')
+ formats_json = self._search_regex(
+ r'bitrates: (\[.+?\])', raw_page, 'video formats')
+ formats_mit = json.loads(formats_json)
+ formats = [
+ {
+ 'format_id': f['label'],
+ 'url': base_url + f['url'].partition(':')[2],
+ 'ext': f['url'].partition(':')[0],
+ 'format': f['label'],
+ 'width': f['width'],
+ 'vbr': f['bitrate'],
+ }
+ for f in formats_mit
+ ]
+
+ title = get_element_by_id('edit-title', clean_page)
+ description = clean_html(get_element_by_id('edit-description', clean_page))
+ thumbnail = self._search_regex(
+ r'playlist:.*?url: \'(.+?)\'',
+ raw_page, 'thumbnail', flags=re.DOTALL)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ }
+
+
+class OCWMITIE(InfoExtractor):
+ IE_NAME = 'ocw.mit.edu'
+ _VALID_URL = r'^https?://ocw\.mit\.edu/courses/(?P<topic>[a-z0-9\-]+)'
+ _BASE_URL = 'http://ocw.mit.edu/'
+
+ _TESTS = [
+ {
+ 'url': 'http://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-041-probabilistic-systems-analysis-and-applied-probability-fall-2010/video-lectures/lecture-7-multiple-variables-expectations-independence/',
+ 'info_dict': {
+ 'id': 'EObHWIEKGjA',
+ 'ext': 'webm',
+ 'title': 'Lecture 7: Multiple Discrete Random Variables: Expectations, Conditioning, Independence',
+ 'description': 'In this lecture, the professor discussed multiple random variables, expectations, and binomial distribution.',
+ 'upload_date': '20121109',
+ 'uploader_id': 'MIT',
+ 'uploader': 'MIT OpenCourseWare',
+ }
+ },
+ {
+ 'url': 'http://ocw.mit.edu/courses/mathematics/18-01sc-single-variable-calculus-fall-2010/1.-differentiation/part-a-definition-and-basic-rules/session-1-introduction-to-derivatives/',
+ 'info_dict': {
+ 'id': '7K1sB05pE0A',
+ 'ext': 'mp4',
+ 'title': 'Session 1: Introduction to Derivatives',
+ 'upload_date': '20090818',
+ 'uploader_id': 'MIT',
+ 'uploader': 'MIT OpenCourseWare',
+ 'description': 'This section contains lecture video excerpts, lecture notes, an interactive mathlet with supporting documents, and problem solving videos.',
+ }
+ }
+ ]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ topic = mobj.group('topic')
+
+ webpage = self._download_webpage(url, topic)
+ title = self._html_search_meta('WT.cg_s', webpage)
+ description = self._html_search_meta('Description', webpage)
+
+ # search for call to ocw_embed_chapter_media(container_id, media_url, provider, page_url, image_url, start, stop, captions_file)
+ embed_chapter_media = re.search(r'ocw_embed_chapter_media\((.+?)\)', webpage)
+ if embed_chapter_media:
+ metadata = re.sub(r'[\'"]', '', embed_chapter_media.group(1))
+ metadata = re.split(r', ?', metadata)
+ yt = metadata[1]
+ else:
+ # search for call to ocw_embed_chapter_media(container_id, media_url, provider, page_url, image_url, captions_file)
+ embed_media = re.search(r'ocw_embed_media\((.+?)\)', webpage)
+ if embed_media:
+ metadata = re.sub(r'[\'"]', '', embed_media.group(1))
+ metadata = re.split(r', ?', metadata)
+ yt = metadata[1]
+ else:
+ raise ExtractorError('Unable to find embedded YouTube video.')
+ video_id = YoutubeIE.extract_id(yt)
+
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'url': yt,
+ 'ie_key': 'Youtube',
+ }
diff --git a/yt_dlp/extractor/mitele.py b/yt_dlp/extractor/mitele.py
new file mode 100644
index 0000000..ea29986
--- /dev/null
+++ b/yt_dlp/extractor/mitele.py
@@ -0,0 +1,82 @@
+from .telecinco import TelecincoIE
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+)
+
+
+class MiTeleIE(TelecincoIE): # XXX: Do not subclass from concrete IE
+ IE_DESC = 'mitele.es'
+ _VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/)+(?P<id>[^/]+)/player'
+
+ _TESTS = [{
+ 'url': 'http://www.mitele.es/programas-tv/diario-de/57b0dfb9c715da65618b4afa/player',
+ 'info_dict': {
+ 'id': 'FhYW1iNTE6J6H7NkQRIEzfne6t2quqPg',
+ 'ext': 'mp4',
+ 'title': 'Diario de La redacción Programa 144',
+ 'description': 'md5:07c35a7b11abb05876a6a79185b58d27',
+ 'series': 'Diario de',
+ 'season': 'Season 14',
+ 'season_number': 14,
+ 'episode': 'Tor, la web invisible',
+ 'episode_number': 3,
+ 'thumbnail': r're:(?i)^https?://.*\.jpg$',
+ 'duration': 2913,
+ 'age_limit': 16,
+ 'timestamp': 1471209401,
+ 'upload_date': '20160814',
+ },
+ }, {
+ # no explicit title
+ 'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/57b0de3dc915da14058b4876/player',
+ 'info_dict': {
+ 'id': 'oyNG1iNTE6TAPP-JmCjbwfwJqqMMX3Vq',
+ 'ext': 'mp4',
+ 'title': 'Cuarto Milenio Temporada 6 Programa 226',
+ 'description': 'md5:5ff132013f0cd968ffbf1f5f3538a65f',
+ 'series': 'Cuarto Milenio',
+ 'season': 'Season 6',
+ 'season_number': 6,
+ 'episode': 'Episode 24',
+ 'episode_number': 24,
+ 'thumbnail': r're:(?i)^https?://.*\.jpg$',
+ 'duration': 7313,
+ 'age_limit': 12,
+ 'timestamp': 1471209021,
+ 'upload_date': '20160814',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.mitele.es/series-online/la-que-se-avecina/57aac5c1c915da951a8b45ed/player',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144-40_1006364575251/player/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ pre_player = self._parse_json(self._search_regex(
+ r'window\.\$REACTBASE_STATE\.prePlayer_mtweb\s*=\s*({.+})',
+ webpage, 'Pre Player'), display_id)['prePlayer']
+ title = pre_player['title']
+ video_info = self._parse_content(pre_player['video'], url)
+ content = pre_player.get('content') or {}
+ info = content.get('info') or {}
+
+ video_info.update({
+ 'title': title,
+ 'description': info.get('synopsis'),
+ 'series': content.get('title'),
+ 'season_number': int_or_none(info.get('season_number')),
+ 'episode': content.get('subtitle'),
+ 'episode_number': int_or_none(info.get('episode_number')),
+ 'duration': int_or_none(info.get('duration')),
+ 'age_limit': int_or_none(info.get('rating')),
+ 'timestamp': parse_iso8601(pre_player.get('publishedTime')),
+ })
+ return video_info
diff --git a/yt_dlp/extractor/mixch.py b/yt_dlp/extractor/mixch.py
new file mode 100644
index 0000000..4be6947
--- /dev/null
+++ b/yt_dlp/extractor/mixch.py
@@ -0,0 +1,81 @@
+from .common import InfoExtractor
+from ..utils import UserNotLive, traverse_obj
+
+
+class MixchIE(InfoExtractor):
+ IE_NAME = 'mixch'
+ _VALID_URL = r'https?://(?:www\.)?mixch\.tv/u/(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'https://mixch.tv/u/16236849/live',
+ 'skip': 'don\'t know if this live persists',
+ 'info_dict': {
+ 'id': '16236849',
+ 'title': '24配信シェア⭕️投票🙏💦',
+ 'comment_count': 13145,
+ 'view_count': 28348,
+ 'timestamp': 1636189377,
+ 'uploader': '🦥伊咲👶🏻#フレアワ',
+ 'uploader_id': '16236849',
+ }
+ }, {
+ 'url': 'https://mixch.tv/u/16137876/live',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(f'https://mixch.tv/u/{video_id}/live', video_id)
+
+ initial_js_state = self._parse_json(self._search_regex(
+ r'(?m)^\s*window\.__INITIAL_JS_STATE__\s*=\s*(\{.+?\});\s*$', webpage, 'initial JS state'), video_id)
+ if not initial_js_state.get('liveInfo'):
+ raise UserNotLive(video_id=video_id)
+
+ return {
+ 'id': video_id,
+ 'title': traverse_obj(initial_js_state, ('liveInfo', 'title')),
+ 'comment_count': traverse_obj(initial_js_state, ('liveInfo', 'comments')),
+ 'view_count': traverse_obj(initial_js_state, ('liveInfo', 'visitor')),
+ 'timestamp': traverse_obj(initial_js_state, ('liveInfo', 'created')),
+ 'uploader': traverse_obj(initial_js_state, ('broadcasterInfo', 'name')),
+ 'uploader_id': video_id,
+ 'formats': [{
+ 'format_id': 'hls',
+ 'url': (traverse_obj(initial_js_state, ('liveInfo', 'hls'))
+ or f'https://d1hd0ww6piyb43.cloudfront.net/hls/torte_{video_id}.m3u8'),
+ 'ext': 'mp4',
+ 'protocol': 'm3u8',
+ }],
+ 'is_live': True,
+ }
+
+
+class MixchArchiveIE(InfoExtractor):
+ IE_NAME = 'mixch:archive'
+ _VALID_URL = r'https?://(?:www\.)?mixch\.tv/archive/(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'https://mixch.tv/archive/421',
+ 'skip': 'paid video, no DRM. expires at Jan 23',
+ 'info_dict': {
+ 'id': '421',
+ 'title': '96NEKO SHOW TIME',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ html5_videos = self._parse_html5_media_entries(
+ url, webpage.replace('video-js', 'video'), video_id, 'hls')
+ if not html5_videos:
+ self.raise_login_required(method='cookies')
+ infodict = html5_videos[0]
+ infodict.update({
+ 'id': video_id,
+ 'title': self._html_search_regex(r'class="archive-title">(.+?)</', webpage, 'title')
+ })
+
+ return infodict
diff --git a/yt_dlp/extractor/mixcloud.py b/yt_dlp/extractor/mixcloud.py
new file mode 100644
index 0000000..8a95d1a
--- /dev/null
+++ b/yt_dlp/extractor/mixcloud.py
@@ -0,0 +1,379 @@
+import itertools
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_b64decode,
+ compat_ord,
+ compat_str,
+ compat_urllib_parse_unquote,
+)
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ parse_iso8601,
+ strip_or_none,
+ try_get,
+)
+
+
+class MixcloudBaseIE(InfoExtractor):
+ def _call_api(self, object_type, object_fields, display_id, username, slug=None):
+ lookup_key = object_type + 'Lookup'
+ return self._download_json(
+ 'https://app.mixcloud.com/graphql', display_id, query={
+ 'query': '''{
+ %s(lookup: {username: "%s"%s}) {
+ %s
+ }
+}''' % (lookup_key, username, ', slug: "%s"' % slug if slug else '', object_fields)
+ })['data'][lookup_key]
+
+
+class MixcloudIE(MixcloudBaseIE):
+ _VALID_URL = r'https?://(?:(?:www|beta|m)\.)?mixcloud\.com/([^/]+)/(?!stream|uploads|favorites|listens|playlists)([^/]+)'
+ IE_NAME = 'mixcloud'
+
+ _TESTS = [{
+ 'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/',
+ 'info_dict': {
+ 'id': 'dholbach_cryptkeeper',
+ 'ext': 'm4a',
+ 'title': 'Cryptkeeper',
+ 'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
+ 'uploader': 'Daniel Holbach',
+ 'uploader_id': 'dholbach',
+ 'thumbnail': r're:https?://.*\.jpg',
+ 'view_count': int,
+ 'timestamp': 1321359578,
+ 'upload_date': '20111115',
+ 'uploader_url': 'https://www.mixcloud.com/dholbach/',
+ 'artist': 'Submorphics & Chino , Telekinesis, Porter Robinson, Enei, Breakage ft Jess Mills',
+ 'duration': 3723,
+ 'tags': [],
+ 'comment_count': int,
+ 'repost_count': int,
+ 'like_count': int,
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/',
+ 'info_dict': {
+ 'id': 'gillespeterson_caribou-7-inch-vinyl-mix-chat',
+ 'ext': 'mp3',
+ 'title': 'Caribou 7 inch Vinyl Mix & Chat',
+ 'description': 'md5:2b8aec6adce69f9d41724647c65875e8',
+ 'uploader': 'Gilles Peterson Worldwide',
+ 'uploader_id': 'gillespeterson',
+ 'thumbnail': 're:https?://.*',
+ 'view_count': int,
+ 'timestamp': 1422987057,
+ 'upload_date': '20150203',
+ 'uploader_url': 'https://www.mixcloud.com/gillespeterson/',
+ 'duration': 2992,
+ 'tags': [],
+ 'comment_count': int,
+ 'repost_count': int,
+ 'like_count': int,
+ },
+ 'params': {'skip_download': '404 playback error on site'},
+ }, {
+ 'url': 'https://beta.mixcloud.com/RedLightRadio/nosedrip-15-red-light-radio-01-18-2016/',
+ 'only_matching': True,
+ }]
+ _DECRYPTION_KEY = 'IFYOUWANTTHEARTISTSTOGETPAIDDONOTDOWNLOADFROMMIXCLOUD'
+
+ @staticmethod
+ def _decrypt_xor_cipher(key, ciphertext):
+ """Encrypt/Decrypt XOR cipher. Both ways are possible because it's XOR."""
+ return ''.join([
+ chr(compat_ord(ch) ^ compat_ord(k))
+ for ch, k in zip(ciphertext, itertools.cycle(key))])
+
+ def _real_extract(self, url):
+ username, slug = self._match_valid_url(url).groups()
+ username, slug = compat_urllib_parse_unquote(username), compat_urllib_parse_unquote(slug)
+ track_id = '%s_%s' % (username, slug)
+
+ cloudcast = self._call_api('cloudcast', '''audioLength
+ comments(first: 100) {
+ edges {
+ node {
+ comment
+ created
+ user {
+ displayName
+ username
+ }
+ }
+ }
+ totalCount
+ }
+ description
+ favorites {
+ totalCount
+ }
+ featuringArtistList
+ isExclusive
+ name
+ owner {
+ displayName
+ url
+ username
+ }
+ picture(width: 1024, height: 1024) {
+ url
+ }
+ plays
+ publishDate
+ reposts {
+ totalCount
+ }
+ streamInfo {
+ dashUrl
+ hlsUrl
+ url
+ }
+ tags {
+ tag {
+ name
+ }
+ }
+ restrictedReason
+ id''', track_id, username, slug)
+
+ if not cloudcast:
+ raise ExtractorError('Track not found', expected=True)
+
+ reason = cloudcast.get('restrictedReason')
+ if reason == 'tracklist':
+ raise ExtractorError('Track unavailable in your country due to licensing restrictions', expected=True)
+ elif reason == 'repeat_play':
+ raise ExtractorError('You have reached your play limit for this track', expected=True)
+ elif reason:
+ raise ExtractorError('Track is restricted', expected=True)
+
+ title = cloudcast['name']
+
+ stream_info = cloudcast['streamInfo']
+ formats = []
+
+ for url_key in ('url', 'hlsUrl', 'dashUrl'):
+ format_url = stream_info.get(url_key)
+ if not format_url:
+ continue
+ decrypted = self._decrypt_xor_cipher(
+ self._DECRYPTION_KEY, compat_b64decode(format_url))
+ if url_key == 'hlsUrl':
+ formats.extend(self._extract_m3u8_formats(
+ decrypted, track_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif url_key == 'dashUrl':
+ formats.extend(self._extract_mpd_formats(
+ decrypted, track_id, mpd_id='dash', fatal=False))
+ else:
+ formats.append({
+ 'format_id': 'http',
+ 'url': decrypted,
+ 'vcodec': 'none',
+ 'downloader_options': {
+ # Mixcloud starts throttling at >~5M
+ 'http_chunk_size': 5242880,
+ },
+ })
+
+ if not formats and cloudcast.get('isExclusive'):
+ self.raise_login_required(metadata_available=True)
+
+ comments = []
+ for edge in (try_get(cloudcast, lambda x: x['comments']['edges']) or []):
+ node = edge.get('node') or {}
+ text = strip_or_none(node.get('comment'))
+ if not text:
+ continue
+ user = node.get('user') or {}
+ comments.append({
+ 'author': user.get('displayName'),
+ 'author_id': user.get('username'),
+ 'text': text,
+ 'timestamp': parse_iso8601(node.get('created')),
+ })
+
+ tags = []
+ for t in cloudcast.get('tags'):
+ tag = try_get(t, lambda x: x['tag']['name'], compat_str)
+ if not tag:
+ tags.append(tag)
+
+ get_count = lambda x: int_or_none(try_get(cloudcast, lambda y: y[x]['totalCount']))
+
+ owner = cloudcast.get('owner') or {}
+
+ return {
+ 'id': track_id,
+ 'title': title,
+ 'formats': formats,
+ 'description': cloudcast.get('description'),
+ 'thumbnail': try_get(cloudcast, lambda x: x['picture']['url'], compat_str),
+ 'uploader': owner.get('displayName'),
+ 'timestamp': parse_iso8601(cloudcast.get('publishDate')),
+ 'uploader_id': owner.get('username'),
+ 'uploader_url': owner.get('url'),
+ 'duration': int_or_none(cloudcast.get('audioLength')),
+ 'view_count': int_or_none(cloudcast.get('plays')),
+ 'like_count': get_count('favorites'),
+ 'repost_count': get_count('reposts'),
+ 'comment_count': get_count('comments'),
+ 'comments': comments,
+ 'tags': tags,
+ 'artist': ', '.join(cloudcast.get('featuringArtistList') or []) or None,
+ }
+
+
+class MixcloudPlaylistBaseIE(MixcloudBaseIE):
+ def _get_cloudcast(self, node):
+ return node
+
+ def _get_playlist_title(self, title, slug):
+ return title
+
+ def _real_extract(self, url):
+ username, slug = self._match_valid_url(url).groups()
+ username = compat_urllib_parse_unquote(username)
+ if not slug:
+ slug = 'uploads'
+ else:
+ slug = compat_urllib_parse_unquote(slug)
+ playlist_id = '%s_%s' % (username, slug)
+
+ is_playlist_type = self._ROOT_TYPE == 'playlist'
+ playlist_type = 'items' if is_playlist_type else slug
+ list_filter = ''
+
+ has_next_page = True
+ entries = []
+ while has_next_page:
+ playlist = self._call_api(
+ self._ROOT_TYPE, '''%s
+ %s
+ %s(first: 100%s) {
+ edges {
+ node {
+ %s
+ }
+ }
+ pageInfo {
+ endCursor
+ hasNextPage
+ }
+ }''' % (self._TITLE_KEY, self._DESCRIPTION_KEY, playlist_type, list_filter, self._NODE_TEMPLATE),
+ playlist_id, username, slug if is_playlist_type else None)
+
+ items = playlist.get(playlist_type) or {}
+ for edge in items.get('edges', []):
+ cloudcast = self._get_cloudcast(edge.get('node') or {})
+ cloudcast_url = cloudcast.get('url')
+ if not cloudcast_url:
+ continue
+ item_slug = try_get(cloudcast, lambda x: x['slug'], compat_str)
+ owner_username = try_get(cloudcast, lambda x: x['owner']['username'], compat_str)
+ video_id = f'{owner_username}_{item_slug}' if item_slug and owner_username else None
+ entries.append(self.url_result(
+ cloudcast_url, MixcloudIE.ie_key(), video_id))
+
+ page_info = items['pageInfo']
+ has_next_page = page_info['hasNextPage']
+ list_filter = ', after: "%s"' % page_info['endCursor']
+
+ return self.playlist_result(
+ entries, playlist_id,
+ self._get_playlist_title(playlist[self._TITLE_KEY], slug),
+ playlist.get(self._DESCRIPTION_KEY))
+
+
+class MixcloudUserIE(MixcloudPlaylistBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<id>[^/]+)/(?P<type>uploads|favorites|listens|stream)?/?$'
+ IE_NAME = 'mixcloud:user'
+
+ _TESTS = [{
+ 'url': 'http://www.mixcloud.com/dholbach/',
+ 'info_dict': {
+ 'id': 'dholbach_uploads',
+ 'title': 'Daniel Holbach (uploads)',
+ 'description': 'md5:a3f468a60ac8c3e1f8616380fc469b2b',
+ },
+ 'playlist_mincount': 36,
+ }, {
+ 'url': 'http://www.mixcloud.com/dholbach/uploads/',
+ 'info_dict': {
+ 'id': 'dholbach_uploads',
+ 'title': 'Daniel Holbach (uploads)',
+ 'description': 'md5:a3f468a60ac8c3e1f8616380fc469b2b',
+ },
+ 'playlist_mincount': 36,
+ }, {
+ 'url': 'http://www.mixcloud.com/dholbach/favorites/',
+ 'info_dict': {
+ 'id': 'dholbach_favorites',
+ 'title': 'Daniel Holbach (favorites)',
+ 'description': 'md5:a3f468a60ac8c3e1f8616380fc469b2b',
+ },
+ # 'params': {
+ # 'playlist_items': '1-100',
+ # },
+ 'playlist_mincount': 396,
+ }, {
+ 'url': 'http://www.mixcloud.com/dholbach/listens/',
+ 'info_dict': {
+ 'id': 'dholbach_listens',
+ 'title': 'Daniel Holbach (listens)',
+ 'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
+ },
+ # 'params': {
+ # 'playlist_items': '1-100',
+ # },
+ 'playlist_mincount': 1623,
+ 'skip': 'Large list',
+ }, {
+ 'url': 'https://www.mixcloud.com/FirstEar/stream/',
+ 'info_dict': {
+ 'id': 'FirstEar_stream',
+ 'title': 'First Ear (stream)',
+ 'description': 'we maraud for ears',
+ },
+ 'playlist_mincount': 269,
+ }]
+
+ _TITLE_KEY = 'displayName'
+ _DESCRIPTION_KEY = 'biog'
+ _ROOT_TYPE = 'user'
+ _NODE_TEMPLATE = '''slug
+ url
+ owner { username }'''
+
+ def _get_playlist_title(self, title, slug):
+ return '%s (%s)' % (title, slug)
+
+
+class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/playlists/(?P<playlist>[^/]+)/?$'
+ IE_NAME = 'mixcloud:playlist'
+
+ _TESTS = [{
+ 'url': 'https://www.mixcloud.com/maxvibes/playlists/jazzcat-on-ness-radio/',
+ 'info_dict': {
+ 'id': 'maxvibes_jazzcat-on-ness-radio',
+ 'title': 'Ness Radio sessions',
+ },
+ 'playlist_mincount': 59,
+ }]
+ _TITLE_KEY = 'name'
+ _DESCRIPTION_KEY = 'description'
+ _ROOT_TYPE = 'playlist'
+ _NODE_TEMPLATE = '''cloudcast {
+ slug
+ url
+ owner { username }
+ }'''
+
+ def _get_cloudcast(self, node):
+ return node.get('cloudcast') or {}
diff --git a/yt_dlp/extractor/mlb.py b/yt_dlp/extractor/mlb.py
new file mode 100644
index 0000000..d715b97
--- /dev/null
+++ b/yt_dlp/extractor/mlb.py
@@ -0,0 +1,379 @@
+import re
+import urllib.parse
+import uuid
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ join_nonempty,
+ parse_duration,
+ parse_iso8601,
+ traverse_obj,
+ try_get,
+)
+
+
+class MLBBaseIE(InfoExtractor):
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ video = self._download_video_data(display_id)
+ video_id = video['id']
+ title = video['title']
+ feed = self._get_feed(video)
+
+ formats = []
+ for playback in (feed.get('playbacks') or []):
+ playback_url = playback.get('url')
+ if not playback_url:
+ continue
+ name = playback.get('name')
+ ext = determine_ext(playback_url)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ playback_url, video_id, 'mp4',
+ 'm3u8_native', m3u8_id=name, fatal=False))
+ else:
+ f = {
+ 'format_id': name,
+ 'url': playback_url,
+ }
+ mobj = re.search(r'_(\d+)K_(\d+)X(\d+)', name)
+ if mobj:
+ f.update({
+ 'height': int(mobj.group(3)),
+ 'tbr': int(mobj.group(1)),
+ 'width': int(mobj.group(2)),
+ })
+ mobj = re.search(r'_(\d+)x(\d+)_(\d+)_(\d+)K\.mp4', playback_url)
+ if mobj:
+ f.update({
+ 'fps': int(mobj.group(3)),
+ 'height': int(mobj.group(2)),
+ 'tbr': int(mobj.group(4)),
+ 'width': int(mobj.group(1)),
+ })
+ formats.append(f)
+
+ thumbnails = []
+ for cut in (try_get(feed, lambda x: x['image']['cuts'], list) or []):
+ src = cut.get('src')
+ if not src:
+ continue
+ thumbnails.append({
+ 'height': int_or_none(cut.get('height')),
+ 'url': src,
+ 'width': int_or_none(cut.get('width')),
+ })
+
+ language = (video.get('language') or 'EN').lower()
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'description': video.get('description'),
+ 'duration': parse_duration(feed.get('duration')),
+ 'thumbnails': thumbnails,
+ 'timestamp': parse_iso8601(video.get(self._TIMESTAMP_KEY)),
+ 'subtitles': self._extract_mlb_subtitles(feed, language),
+ }
+
+
+class MLBIE(MLBBaseIE):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:[\da-z_-]+\.)*mlb\.com/
+ (?:
+ (?:
+ (?:[^/]+/)*video/[^/]+/c-|
+ (?:
+ shared/video/embed/(?:embed|m-internal-embed)\.html|
+ (?:[^/]+/)+(?:play|index)\.jsp|
+ )\?.*?\bcontent_id=
+ )
+ (?P<id>\d+)
+ )
+ '''
+ _EMBED_REGEX = [
+ r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
+ r'data-video-link=["\'](?P<url>http://m\.mlb\.com/video/[^"\']+)',
+ ]
+ _TESTS = [
+ {
+ 'url': 'https://www.mlb.com/mariners/video/ackleys-spectacular-catch/c-34698933',
+ 'md5': '632358dacfceec06bad823b83d21df2d',
+ 'info_dict': {
+ 'id': '34698933',
+ 'ext': 'mp4',
+ 'title': "Ackley's spectacular catch",
+ 'description': 'md5:7f5a981eb4f3cbc8daf2aeffa2215bf0',
+ 'duration': 66,
+ 'timestamp': 1405995000,
+ 'upload_date': '20140722',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ },
+ {
+ 'url': 'https://www.mlb.com/video/stanton-prepares-for-derby/c-34496663',
+ 'md5': 'bf2619bf9cacc0a564fc35e6aeb9219f',
+ 'info_dict': {
+ 'id': '34496663',
+ 'ext': 'mp4',
+ 'title': 'Stanton prepares for Derby',
+ 'description': 'md5:d00ce1e5fd9c9069e9c13ab4faedfa57',
+ 'duration': 46,
+ 'timestamp': 1405120200,
+ 'upload_date': '20140711',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ },
+ {
+ 'url': 'https://www.mlb.com/video/cespedes-repeats-as-derby-champ/c-34578115',
+ 'md5': '99bb9176531adc600b90880fb8be9328',
+ 'info_dict': {
+ 'id': '34578115',
+ 'ext': 'mp4',
+ 'title': 'Cespedes repeats as Derby champ',
+ 'description': 'md5:08df253ce265d4cf6fb09f581fafad07',
+ 'duration': 488,
+ 'timestamp': 1405414336,
+ 'upload_date': '20140715',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ },
+ {
+ 'url': 'https://www.mlb.com/video/bautista-on-home-run-derby/c-34577915',
+ 'md5': 'da8b57a12b060e7663ee1eebd6f330ec',
+ 'info_dict': {
+ 'id': '34577915',
+ 'ext': 'mp4',
+ 'title': 'Bautista on Home Run Derby',
+ 'description': 'md5:b80b34031143d0986dddc64a8839f0fb',
+ 'duration': 52,
+ 'timestamp': 1405405122,
+ 'upload_date': '20140715',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ },
+ {
+ 'url': 'https://www.mlb.com/video/hargrove-homers-off-caldwell/c-1352023483?tid=67793694',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://m.mlb.com/shared/video/embed/embed.html?content_id=35692085&topic_id=6479266&width=400&height=224&property=mlb',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://mlb.mlb.com/shared/video/embed/embed.html?content_id=36599553',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://mlb.mlb.com/es/video/play.jsp?content_id=36599553',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://www.mlb.com/cardinals/video/piscottys-great-sliding-catch/c-51175783',
+ 'only_matching': True,
+ },
+ {
+ # From http://m.mlb.com/news/article/118550098/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer
+ 'url': 'http://mlb.mlb.com/shared/video/embed/m-internal-embed.html?content_id=75609783&property=mlb&autoplay=true&hashmode=false&siteSection=mlb/multimedia/article_118550098/article_embed&club=mlb',
+ 'only_matching': True,
+ },
+ ]
+ _TIMESTAMP_KEY = 'date'
+
+ @staticmethod
+ def _get_feed(video):
+ return video
+
+ @staticmethod
+ def _extract_mlb_subtitles(feed, language):
+ subtitles = {}
+ for keyword in (feed.get('keywordsAll') or []):
+ keyword_type = keyword.get('type')
+ if keyword_type and keyword_type.startswith('closed_captions_location_'):
+ cc_location = keyword.get('value')
+ if cc_location:
+ subtitles.setdefault(language, []).append({
+ 'url': cc_location,
+ })
+ return subtitles
+
+ def _download_video_data(self, display_id):
+ return self._download_json(
+ 'http://content.mlb.com/mlb/item/id/v1/%s/details/web-v1.json' % display_id,
+ display_id)
+
+
+class MLBVideoIE(MLBBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?mlb\.com/(?:[^/]+/)*video/(?P<id>[^/?&#]+)'
+ _TEST = {
+ 'url': 'https://www.mlb.com/mariners/video/ackley-s-spectacular-catch-c34698933',
+ 'md5': '632358dacfceec06bad823b83d21df2d',
+ 'info_dict': {
+ 'id': 'c04a8863-f569-42e6-9f87-992393657614',
+ 'ext': 'mp4',
+ 'title': "Ackley's spectacular catch",
+ 'description': 'md5:7f5a981eb4f3cbc8daf2aeffa2215bf0',
+ 'duration': 66,
+ 'timestamp': 1405995000,
+ 'upload_date': '20140722',
+ 'thumbnail': r're:^https?://.+',
+ },
+ }
+ _TIMESTAMP_KEY = 'timestamp'
+
+ @classmethod
+ def suitable(cls, url):
+ return False if MLBIE.suitable(url) else super(MLBVideoIE, cls).suitable(url)
+
+ @staticmethod
+ def _get_feed(video):
+ return video['feeds'][0]
+
+ @staticmethod
+ def _extract_mlb_subtitles(feed, language):
+ subtitles = {}
+ for cc_location in (feed.get('closedCaptions') or []):
+ subtitles.setdefault(language, []).append({
+ 'url': cc_location,
+ })
+
+ def _download_video_data(self, display_id):
+ # https://www.mlb.com/data-service/en/videos/[SLUG]
+ return self._download_json(
+ 'https://fastball-gateway.mlb.com/graphql',
+ display_id, query={
+ 'query': '''{
+ mediaPlayback(ids: "%s") {
+ description
+ feeds(types: CMS) {
+ closedCaptions
+ duration
+ image {
+ cuts {
+ width
+ height
+ src
+ }
+ }
+ playbacks {
+ name
+ url
+ }
+ }
+ id
+ timestamp
+ title
+ }
+}''' % display_id,
+ })['data']['mediaPlayback'][0]
+
+
+class MLBTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?mlb\.com/tv/g(?P<id>\d{6})'
+ _NETRC_MACHINE = 'mlb'
+
+ _TESTS = [{
+ 'url': 'https://www.mlb.com/tv/g661581/vee2eff5f-a7df-4c20-bdb4-7b926fa12638',
+ 'info_dict': {
+ 'id': '661581',
+ 'ext': 'mp4',
+ 'title': '2022-07-02 - St. Louis Cardinals @ Philadelphia Phillies',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+ _access_token = None
+
+ def _real_initialize(self):
+ if not self._access_token:
+ self.raise_login_required(
+ 'All videos are only available to registered users', method='password')
+
+ def _perform_login(self, username, password):
+ data = f'grant_type=password&username={urllib.parse.quote(username)}&password={urllib.parse.quote(password)}&scope=openid offline_access&client_id=0oa3e1nutA1HLzAKG356'
+ access_token = self._download_json(
+ 'https://ids.mlb.com/oauth2/aus1m088yK07noBfh356/v1/token', None,
+ headers={
+ 'User-Agent': 'okhttp/3.12.1',
+ 'Content-Type': 'application/x-www-form-urlencoded'
+ }, data=data.encode())['access_token']
+
+ entitlement = self._download_webpage(
+ f'https://media-entitlement.mlb.com/api/v3/jwt?os=Android&appname=AtBat&did={str(uuid.uuid4())}', None,
+ headers={
+ 'User-Agent': 'okhttp/3.12.1',
+ 'Authorization': f'Bearer {access_token}'
+ })
+
+ data = f'grant_type=urn:ietf:params:oauth:grant-type:token-exchange&subject_token={entitlement}&subject_token_type=urn:ietf:params:oauth:token-type:jwt&platform=android-tv'
+ self._access_token = self._download_json(
+ 'https://us.edge.bamgrid.com/token', None,
+ headers={
+ 'Accept': 'application/json',
+ 'Authorization': 'Bearer bWxidHYmYW5kcm9pZCYxLjAuMA.6LZMbH2r--rbXcgEabaDdIslpo4RyZrlVfWZhsAgXIk',
+ 'Content-Type': 'application/x-www-form-urlencoded'
+ }, data=data.encode())['access_token']
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ airings = self._download_json(
+ f'https://search-api-mlbtv.mlb.com/svc/search/v2/graphql/persisted/query/core/Airings?variables=%7B%22partnerProgramIds%22%3A%5B%22{video_id}%22%5D%2C%22applyEsniMediaRightsLabels%22%3Atrue%7D',
+ video_id)['data']['Airings']
+
+ formats, subtitles = [], {}
+ for airing in airings:
+ m3u8_url = self._download_json(
+ airing['playbackUrls'][0]['href'].format(scenario='browser~csai'), video_id,
+ headers={
+ 'Authorization': self._access_token,
+ 'Accept': 'application/vnd.media-service+json; version=2'
+ })['stream']['complete']
+ f, s = self._extract_m3u8_formats_and_subtitles(
+ m3u8_url, video_id, 'mp4', m3u8_id=join_nonempty(airing.get('feedType'), airing.get('feedLanguage')))
+ formats.extend(f)
+ self._merge_subtitles(s, target=subtitles)
+
+ return {
+ 'id': video_id,
+ 'title': traverse_obj(airings, (..., 'titles', 0, 'episodeName'), get_all=False),
+ 'is_live': traverse_obj(airings, (..., 'mediaConfig', 'productType'), get_all=False) == 'LIVE',
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'http_headers': {'Authorization': f'Bearer {self._access_token}'},
+ }
+
+
+class MLBArticleIE(InfoExtractor):
+ _VALID_URL = r'https?://www\.mlb\.com/news/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://www.mlb.com/news/manny-machado-robs-guillermo-heredia-reacts',
+ 'info_dict': {
+ 'id': '36db7394-343c-4ea3-b8ca-ead2e61bca9a',
+ 'title': 'Machado\'s grab draws hilarious irate reaction',
+ 'modified_timestamp': 1675888370,
+ 'description': 'md5:a19d4eb0487b2cb304e9a176f6b67676',
+ 'modified_date': '20230208',
+ },
+ 'playlist_mincount': 2,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ apollo_cache_json = self._search_json(r'window\.initState\s*=', webpage, 'window.initState', display_id)['apolloCache']
+
+ content_real_info = traverse_obj(
+ apollo_cache_json, ('ROOT_QUERY', lambda k, _: k.startswith('getArticle')), get_all=False)
+
+ return self.playlist_from_matches(
+ traverse_obj(content_real_info, ('parts', lambda _, v: v['__typename'] == 'Video' or v['type'] == 'video')),
+ getter=lambda x: f'https://www.mlb.com/video/{x["slug"]}',
+ ie=MLBVideoIE, playlist_id=content_real_info.get('translationId'),
+ title=self._html_search_meta('og:title', webpage),
+ description=content_real_info.get('summary'),
+ modified_timestamp=parse_iso8601(content_real_info.get('lastUpdatedDate')))
diff --git a/yt_dlp/extractor/mlssoccer.py b/yt_dlp/extractor/mlssoccer.py
new file mode 100644
index 0000000..9383f13
--- /dev/null
+++ b/yt_dlp/extractor/mlssoccer.py
@@ -0,0 +1,114 @@
+from .common import InfoExtractor
+
+
+class MLSSoccerIE(InfoExtractor):
+ _VALID_DOMAINS = r'(?:(?:cfmontreal|intermiamicf|lagalaxy|lafc|houstondynamofc|dcunited|atlutd|mlssoccer|fcdallas|columbuscrew|coloradorapids|fccincinnati|chicagofirefc|austinfc|nashvillesc|whitecapsfc|sportingkc|soundersfc|sjearthquakes|rsl|timbers|philadelphiaunion|orlandocitysc|newyorkredbulls|nycfc)\.com|(?:torontofc)\.ca|(?:revolutionsoccer)\.net)'
+ _VALID_URL = r'https?://(?:www\.)?%s/video/#?(?P<id>[^/&$#?]+)' % _VALID_DOMAINS
+
+ _TESTS = [{
+ 'url': 'https://www.mlssoccer.com/video/the-octagon-can-alphonso-davies-lead-canada-to-first-world-cup-since-1986#the-octagon-can-alphonso-davies-lead-canada-to-first-world-cup-since-1986',
+ 'info_dict': {
+ 'id': '6276033198001',
+ 'ext': 'mp4',
+ 'title': 'The Octagon | Can Alphonso Davies lead Canada to first World Cup since 1986?',
+ 'description': 'md5:f0a883ee33592a0221798f451a98be8f',
+ 'thumbnail': 'https://cf-images.us-east-1.prod.boltdns.net/v1/static/5530036772001/1bbc44f6-c63c-4981-82fa-46b0c1f891e0/5c1ca44a-a033-4e98-b531-ff24c4947608/160x90/match/image.jpg',
+ 'duration': 350.165,
+ 'timestamp': 1633627291,
+ 'uploader_id': '5530036772001',
+ 'tags': ['club/canada'],
+ 'is_live': False,
+ 'upload_date': '20211007',
+ 'filesize_approx': 255193528.83200002
+ },
+ 'params': {'skip_download': True}
+ }, {
+ 'url': 'https://www.whitecapsfc.com/video/highlights-san-jose-earthquakes-vs-vancouver-whitecaps-fc-october-23-2021#highlights-san-jose-earthquakes-vs-vancouver-whitecaps-fc-october-23-2021',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.torontofc.ca/video/highlights-toronto-fc-vs-cf-montreal-october-23-2021-x6733#highlights-toronto-fc-vs-cf-montreal-october-23-2021-x6733',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.sportingkc.com/video/post-match-press-conference-john-pulskamp-oct-27-2021#post-match-press-conference-john-pulskamp-oct-27-2021',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.soundersfc.com/video/highlights-seattle-sounders-fc-vs-sporting-kansas-city-october-23-2021',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.sjearthquakes.com/video/#highlights-austin-fc-vs-san-jose-earthquakes-june-19-2021',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.rsl.com/video/2021-u-of-u-health-mic-d-up-vs-colorado-10-16-21#2021-u-of-u-health-mic-d-up-vs-colorado-10-16-21',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.timbers.com/video/highlights-d-chara-asprilla-with-goals-in-portland-timbers-2-0-win-over-san-jose#highlights-d-chara-asprilla-with-goals-in-portland-timbers-2-0-win-over-san-jose',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.philadelphiaunion.com/video/highlights-torvphi',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.orlandocitysc.com/video/highlight-columbus-crew-vs-orlando-city-sc',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.newyorkredbulls.com/video/all-access-matchday-double-derby-week#all-access-matchday-double-derby-week',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.nycfc.com/video/highlights-nycfc-1-0-chicago-fire-fc#highlights-nycfc-1-0-chicago-fire-fc',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.revolutionsoccer.net/video/two-minute-highlights-revs-1-rapids-0-october-27-2021#two-minute-highlights-revs-1-rapids-0-october-27-2021',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.nashvillesc.com/video/goal-c-j-sapong-nashville-sc-92nd-minute',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.cfmontreal.com/video/faits-saillants-tor-v-mtl#faits-saillants-orl-v-mtl-x5645',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.intermiamicf.com/video/all-access-victory-vs-nashville-sc-by-ukg#all-access-victory-vs-nashville-sc-by-ukg',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.lagalaxy.com/video/#moment-of-the-month-presented-by-san-manuel-casino-rayan-raveloson-scores-his-se',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.lafc.com/video/breaking-down-lafc-s-final-6-matches-of-the-2021-mls-regular-season#breaking-down-lafc-s-final-6-matches-of-the-2021-mls-regular-season',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.houstondynamofc.com/video/postgame-press-conference-michael-nelson-presented-by-coushatta-casino-res-x9660#postgame-press-conference-michael-nelson-presented-by-coushatta-casino-res-x9660',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.dcunited.com/video/tony-alfaro-my-family-pushed-me-to-believe-everything-was-possible',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.fcdallas.com/video/highlights-fc-dallas-vs-minnesota-united-fc-october-02-2021#highlights-fc-dallas-vs-minnesota-united-fc-october-02-2021',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.columbuscrew.com/video/match-rewind-columbus-crew-vs-new-york-red-bulls-october-23-2021',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.coloradorapids.com/video/postgame-reaction-robin-fraser-october-27#postgame-reaction-robin-fraser-october-27',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.fccincinnati.com/video/#keeping-cincy-chill-presented-by-coors-lite',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.chicagofirefc.com/video/all-access-fire-score-dramatic-road-win-in-cincy#all-access-fire-score-dramatic-road-win-in-cincy',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.austinfc.com/video/highlights-colorado-rapids-vs-austin-fc-september-29-2021#highlights-colorado-rapids-vs-austin-fc-september-29-2021',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.atlutd.com/video/goal-josef-martinez-scores-in-the-73rd-minute#goal-josef-martinez-scores-in-the-73rd-minute',
+ 'only_matching': True
+ }]
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ webpage = self._download_webpage(url, id)
+ data_json = self._parse_json(self._html_search_regex(r'data-options\=\"([^\"]+)\"', webpage, 'json'), id)['videoList'][0]
+ return {
+ 'id': id,
+ '_type': 'url',
+ 'url': 'https://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (data_json['accountId'], data_json['videoId']),
+ 'ie_key': 'BrightcoveNew',
+ }
diff --git a/yt_dlp/extractor/mocha.py b/yt_dlp/extractor/mocha.py
new file mode 100644
index 0000000..2fbc0e9
--- /dev/null
+++ b/yt_dlp/extractor/mocha.py
@@ -0,0 +1,64 @@
+from .common import InfoExtractor
+from ..utils import int_or_none, traverse_obj
+
+
+class MochaVideoIE(InfoExtractor):
+ _VALID_URL = r'https?://video\.mocha\.com\.vn/(?P<video_slug>[\w-]+)'
+ _TESTS = [{
+ 'url': 'http://video.mocha.com.vn/chuyen-meo-gia-su-tu-thong-diep-cuoc-song-v18694039',
+ 'info_dict': {
+ 'id': '18694039',
+ 'title': 'Chuyện mèo giả sư tử | Thông điệp cuộc sống',
+ 'ext': 'mp4',
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'display_id': 'chuyen-meo-gia-su-tu-thong-diep-cuoc-song',
+ 'thumbnail': 'http://mcvideomd1fr.keeng.net/playnow/images/20220505/ad0a055d-2f69-42ca-b888-4790041fe6bc_640x480.jpg',
+ 'description': '',
+ 'duration': 70,
+ 'timestamp': 1652254203,
+ 'upload_date': '20220511',
+ 'comment_count': int,
+ 'categories': ['Kids']
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_slug = self._match_valid_url(url).group('video_slug')
+ json_data = self._download_json(
+ 'http://apivideo.mocha.com.vn:8081/onMediaBackendBiz/mochavideo/getVideoDetail',
+ video_slug, query={'url': url, 'token': ''})['data']['videoDetail']
+ video_id = str(json_data['id'])
+ video_urls = (json_data.get('list_resolution') or []) + [json_data.get('original_path')]
+
+ formats, subtitles = [], {}
+ for video in video_urls:
+ if isinstance(video, str):
+ formats.extend([{'url': video, 'ext': 'mp4'}])
+ else:
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ video.get('video_path'), video_id, ext='mp4')
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+
+ return {
+ 'id': video_id,
+ 'display_id': json_data.get('slug') or video_slug,
+ 'title': json_data.get('name'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'description': json_data.get('description'),
+ 'duration': json_data.get('durationS'),
+ 'view_count': json_data.get('total_view'),
+ 'like_count': json_data.get('total_like'),
+ 'dislike_count': json_data.get('total_unlike'),
+ 'thumbnail': json_data.get('image_path_thumb'),
+ 'timestamp': int_or_none(json_data.get('publish_time'), scale=1000),
+ 'is_live': json_data.get('isLive'),
+ 'channel': traverse_obj(json_data, ('channels', '0', 'name')),
+ 'channel_id': traverse_obj(json_data, ('channels', '0', 'id')),
+ 'channel_follower_count': traverse_obj(json_data, ('channels', '0', 'numfollow')),
+ 'categories': traverse_obj(json_data, ('categories', ..., 'categoryname')),
+ 'comment_count': json_data.get('total_comment'),
+ }
diff --git a/yt_dlp/extractor/mojvideo.py b/yt_dlp/extractor/mojvideo.py
new file mode 100644
index 0000000..d47ad07
--- /dev/null
+++ b/yt_dlp/extractor/mojvideo.py
@@ -0,0 +1,52 @@
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ parse_duration,
+)
+
+
+class MojvideoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?mojvideo\.com/video-(?P<display_id>[^/]+)/(?P<id>[a-f0-9]+)'
+ _TEST = {
+ 'url': 'http://www.mojvideo.com/video-v-avtu-pred-mano-rdecelaska-alfi-nipic/3d1ed4497707730b2906',
+ 'md5': 'f7fd662cc8ce2be107b0d4f2c0483ae7',
+ 'info_dict': {
+ 'id': '3d1ed4497707730b2906',
+ 'display_id': 'v-avtu-pred-mano-rdecelaska-alfi-nipic',
+ 'ext': 'mp4',
+ 'title': 'V avtu pred mano rdečelaska - Alfi Nipič',
+ 'thumbnail': r're:^http://.*\.jpg$',
+ 'duration': 242,
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id')
+
+ # XML is malformed
+ playerapi = self._download_webpage(
+ 'http://www.mojvideo.com/playerapi.php?v=%s&t=1' % video_id, display_id)
+
+ if '<error>true</error>' in playerapi:
+ error_desc = self._html_search_regex(
+ r'<errordesc>([^<]*)</errordesc>', playerapi, 'error description', fatal=False)
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, error_desc), expected=True)
+
+ title = self._html_extract_title(playerapi)
+ video_url = self._html_search_regex(
+ r'<file>([^<]+)</file>', playerapi, 'video URL')
+ thumbnail = self._html_search_regex(
+ r'<preview>([^<]+)</preview>', playerapi, 'thumbnail', fatal=False)
+ duration = parse_duration(self._html_search_regex(
+ r'<duration>([^<]+)</duration>', playerapi, 'duration', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'url': video_url,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ }
diff --git a/yt_dlp/extractor/monstercat.py b/yt_dlp/extractor/monstercat.py
new file mode 100644
index 0000000..a69a12e
--- /dev/null
+++ b/yt_dlp/extractor/monstercat.py
@@ -0,0 +1,77 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ extract_attributes,
+ get_element_by_class,
+ get_element_html_by_class,
+ get_element_text_and_html_by_tag,
+ int_or_none,
+ unified_strdate,
+ strip_or_none,
+ traverse_obj,
+ try_call,
+)
+
+
+class MonstercatIE(InfoExtractor):
+ _VALID_URL = r'https?://www\.monstercat\.com/release/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.monstercat.com/release/742779548009',
+ 'playlist_count': 20,
+ 'info_dict': {
+ 'title': 'The Secret Language of Trees',
+ 'id': '742779548009',
+ 'thumbnail': 'https://www.monstercat.com/release/742779548009/cover',
+ 'release_date': '20230711',
+ 'album': 'The Secret Language of Trees',
+ 'album_artist': 'BT',
+ }
+ }]
+
+ def _extract_tracks(self, table, album_meta):
+ for td in re.findall(r'<tr[^<]*>((?:(?!</tr>)[\w\W])+)', table): # regex by chatgpt due to lack of get_elements_by_tag
+ title = clean_html(try_call(
+ lambda: get_element_by_class('d-inline-flex flex-column', td).partition(' <span')[0]))
+ ids = extract_attributes(try_call(lambda: get_element_html_by_class('btn-play cursor-pointer mr-small', td)) or '')
+ track_id = ids.get('data-track-id')
+ release_id = ids.get('data-release-id')
+
+ track_number = int_or_none(try_call(lambda: get_element_by_class('py-xsmall', td)))
+ if not track_id or not release_id:
+ self.report_warning(f'Skipping track {track_number}, ID(s) not found')
+ self.write_debug(f'release_id={repr(release_id)} track_id={repr(track_id)}')
+ continue
+ yield {
+ **album_meta,
+ 'title': title,
+ 'track': title,
+ 'track_number': track_number,
+ 'artist': clean_html(try_call(lambda: get_element_by_class('d-block fs-xxsmall', td))),
+ 'url': f'https://www.monstercat.com/api/release/{release_id}/track-stream/{track_id}',
+ 'id': track_id,
+ 'ext': 'mp3'
+ }
+
+ def _real_extract(self, url):
+ url_id = self._match_id(url)
+ html = self._download_webpage(url, url_id)
+ # wrap all `get_elements` in `try_call`, HTMLParser has problems with site's html
+ tracklist_table = try_call(lambda: get_element_by_class('table table-small', html)) or ''
+
+ title = try_call(lambda: get_element_text_and_html_by_tag('h1', html)[0])
+ date = traverse_obj(html, ({lambda html: get_element_by_class('font-italic mb-medium d-tablet-none d-phone-block',
+ html).partition('Released ')}, 2, {strip_or_none}, {unified_strdate}))
+
+ album_meta = {
+ 'title': title,
+ 'album': title,
+ 'thumbnail': f'https://www.monstercat.com/release/{url_id}/cover',
+ 'album_artist': try_call(
+ lambda: get_element_by_class('h-normal text-uppercase mb-desktop-medium mb-smallish', html)),
+ 'release_date': date,
+ }
+
+ return self.playlist_result(
+ self._extract_tracks(tracklist_table, album_meta), playlist_id=url_id, **album_meta)
diff --git a/yt_dlp/extractor/motherless.py b/yt_dlp/extractor/motherless.py
new file mode 100644
index 0000000..160150a
--- /dev/null
+++ b/yt_dlp/extractor/motherless.py
@@ -0,0 +1,297 @@
+import datetime
+import re
+import urllib.parse
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ OnDemandPagedList,
+ remove_end,
+ str_to_int,
+ unified_strdate,
+)
+
+
+class MotherlessIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?motherless\.com/(?:g/[a-z0-9_]+/|G[VIG]?[A-F0-9]+/)?(?P<id>[A-F0-9]+)'
+ _TESTS = [{
+ 'url': 'http://motherless.com/EE97006',
+ 'md5': 'cb5e7438f7a3c4e886b7bccc1292a3bc',
+ 'info_dict': {
+ 'id': 'EE97006',
+ 'ext': 'mp4',
+ 'title': 'Dogging blond Brit getting glazed (comp)',
+ 'categories': ['UK', 'slag', 'whore', 'dogging', 'cunt', 'cumhound', 'big tits', 'Pearl Necklace'],
+ 'upload_date': '20230519',
+ 'uploader_id': 'deathbird',
+ 'thumbnail': r're:https?://.*\.jpg',
+ 'age_limit': 18,
+ 'comment_count': int,
+ 'view_count': int,
+ 'like_count': int,
+ },
+ 'params': {
+ # Incomplete cert chains
+ 'nocheckcertificate': True,
+ },
+ }, {
+ 'url': 'http://motherless.com/532291B',
+ 'md5': 'bc59a6b47d1f958e61fbd38a4d31b131',
+ 'info_dict': {
+ 'id': '532291B',
+ 'ext': 'mp4',
+ 'title': 'Amazing girl playing the omegle game, PERFECT!',
+ 'categories': ['Amateur', 'webcam', 'omegle', 'pink', 'young', 'masturbate', 'teen',
+ 'game', 'hairy'],
+ 'upload_date': '20140622',
+ 'uploader_id': 'Sulivana7x',
+ 'thumbnail': r're:https?://.*\.jpg',
+ 'age_limit': 18,
+ },
+ 'skip': '404',
+ }, {
+ 'url': 'http://motherless.com/g/cosplay/633979F',
+ 'md5': '0b2a43f447a49c3e649c93ad1fafa4a0',
+ 'info_dict': {
+ 'id': '633979F',
+ 'ext': 'mp4',
+ 'title': 'Turtlette',
+ 'categories': ['superheroine heroine superher'],
+ 'upload_date': '20140827',
+ 'uploader_id': 'shade0230',
+ 'thumbnail': r're:https?://.*\.jpg',
+ 'age_limit': 18,
+ 'like_count': int,
+ 'comment_count': int,
+ 'view_count': int,
+ },
+ 'params': {
+ 'nocheckcertificate': True,
+ },
+ }, {
+ 'url': 'http://motherless.com/8B4BBC1',
+ 'info_dict': {
+ 'id': '8B4BBC1',
+ 'ext': 'mp4',
+ 'title': 'VIDEO00441.mp4',
+ 'categories': [],
+ 'upload_date': '20160214',
+ 'uploader_id': 'NMWildGirl',
+ 'thumbnail': r're:https?://.*\.jpg',
+ 'age_limit': 18,
+ 'like_count': int,
+ 'comment_count': int,
+ 'view_count': int,
+ },
+ 'params': {
+ 'nocheckcertificate': True,
+ },
+ }, {
+ # see https://motherless.com/videos/recent for recent videos with
+ # uploaded date in "ago" format
+ 'url': 'https://motherless.com/3C3E2CF',
+ 'info_dict': {
+ 'id': '3C3E2CF',
+ 'ext': 'mp4',
+ 'title': 'a/ Hot Teens',
+ 'categories': list,
+ 'upload_date': '20210104',
+ 'uploader_id': 'anonymous',
+ 'thumbnail': r're:https?://.*\.jpg',
+ 'age_limit': 18,
+ 'like_count': int,
+ 'comment_count': int,
+ 'view_count': int,
+ },
+ 'params': {
+ 'nocheckcertificate': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ if any(p in webpage for p in (
+ '<title>404 - MOTHERLESS.COM<',
+ ">The page you're looking for cannot be found.<")):
+ raise ExtractorError('Video %s does not exist' % video_id, expected=True)
+
+ if '>The content you are trying to view is for friends only.' in webpage:
+ raise ExtractorError('Video %s is for friends only' % video_id, expected=True)
+
+ title = self._html_search_regex(
+ (r'(?s)<div[^>]+\bclass=["\']media-meta-title[^>]+>(.+?)</div>',
+ r'id="view-upload-title">\s+([^<]+)<'), webpage, 'title')
+ video_url = (self._html_search_regex(
+ (r'setup\(\{\s*["\']file["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
+ r'fileurl\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1'),
+ webpage, 'video URL', default=None, group='url')
+ or 'http://cdn4.videos.motherlessmedia.com/videos/%s.mp4?fs=opencloud' % video_id)
+ age_limit = self._rta_search(webpage)
+ view_count = str_to_int(self._html_search_regex(
+ (r'>([\d,.]+)\s+Views<', r'<strong>Views</strong>\s+([^<]+)<'),
+ webpage, 'view count', fatal=False))
+ like_count = str_to_int(self._html_search_regex(
+ (r'>([\d,.]+)\s+Favorites<',
+ r'<strong>Favorited</strong>\s+([^<]+)<'),
+ webpage, 'like count', fatal=False))
+
+ upload_date = unified_strdate(self._search_regex(
+ r'class=["\']count[^>]+>(\d+\s+[a-zA-Z]{3}\s+\d{4})<', webpage,
+ 'upload date', default=None))
+ if not upload_date:
+ uploaded_ago = self._search_regex(
+ r'>\s*(\d+[hd])\s+[aA]go\b', webpage, 'uploaded ago',
+ default=None)
+ if uploaded_ago:
+ delta = int(uploaded_ago[:-1])
+ _AGO_UNITS = {
+ 'h': 'hours',
+ 'd': 'days',
+ }
+ kwargs = {_AGO_UNITS.get(uploaded_ago[-1]): delta}
+ upload_date = (datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(**kwargs)).strftime('%Y%m%d')
+
+ comment_count = len(re.findall(r'''class\s*=\s*['"]media-comment-contents\b''', webpage))
+ uploader_id = self._html_search_regex(
+ (r'''<span\b[^>]+\bclass\s*=\s*["']username\b[^>]*>([^<]+)</span>''',
+ r'''(?s)['"](?:media-meta-member|thumb-member-username)\b[^>]+>\s*<a\b[^>]+\bhref\s*=\s*['"]/m/([^"']+)'''),
+ webpage, 'uploader_id', fatal=False)
+ categories = self._html_search_meta('keywords', webpage, default='')
+ categories = [cat.strip() for cat in categories.split(',') if cat.strip()]
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'upload_date': upload_date,
+ 'uploader_id': uploader_id,
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'categories': categories,
+ 'view_count': view_count,
+ 'like_count': like_count,
+ 'comment_count': comment_count,
+ 'age_limit': age_limit,
+ 'url': video_url,
+ }
+
+
+class MotherlessPaginatedIE(InfoExtractor):
+ _EXTRA_QUERY = {}
+ _PAGE_SIZE = 60
+
+ def _correct_path(self, url, item_id):
+ raise NotImplementedError('This method must be implemented by subclasses')
+
+ def _extract_entries(self, webpage, base):
+ for mobj in re.finditer(r'href="[^"]*(?P<href>/[A-F0-9]+)"\s+title="(?P<title>[^"]+)',
+ webpage):
+ video_url = urllib.parse.urljoin(base, mobj.group('href'))
+ video_id = MotherlessIE.get_temp_id(video_url)
+
+ if video_id:
+ yield self.url_result(video_url, MotherlessIE, video_id, mobj.group('title'))
+
+ def _real_extract(self, url):
+ item_id = self._match_id(url)
+ real_url = self._correct_path(url, item_id)
+ webpage = self._download_webpage(real_url, item_id, 'Downloading page 1')
+
+ def get_page(idx):
+ page = idx + 1
+ current_page = webpage if not idx else self._download_webpage(
+ real_url, item_id, note=f'Downloading page {page}', query={'page': page, **self._EXTRA_QUERY})
+ yield from self._extract_entries(current_page, real_url)
+
+ return self.playlist_result(
+ OnDemandPagedList(get_page, self._PAGE_SIZE), item_id,
+ remove_end(self._html_extract_title(webpage), ' | MOTHERLESS.COM ™'))
+
+
+class MotherlessGroupIE(MotherlessPaginatedIE):
+ _VALID_URL = r'https?://(?:www\.)?motherless\.com/g[vifm]?/(?P<id>[a-z0-9_]+)/?(?:$|[#?])'
+ _TESTS = [{
+ 'url': 'http://motherless.com/gv/movie_scenes',
+ 'info_dict': {
+ 'id': 'movie_scenes',
+ 'title': 'Movie Scenes - Videos - Hot and sexy scenes from "regular" movies... Beautiful actresses fully',
+ },
+ 'playlist_mincount': 540,
+ }, {
+ 'url': 'http://motherless.com/g/sex_must_be_funny',
+ 'info_dict': {
+ 'id': 'sex_must_be_funny',
+ 'title': 'Sex must be funny',
+ },
+ 'playlist_count': 0,
+ }, {
+ 'url': 'https://motherless.com/gv/beautiful_cock',
+ 'info_dict': {
+ 'id': 'beautiful_cock',
+ 'title': 'Beautiful Cock',
+ },
+ 'playlist_mincount': 2040,
+ }]
+
+ def _correct_path(self, url, item_id):
+ return urllib.parse.urljoin(url, f'/gv/{item_id}')
+
+
+class MotherlessGalleryIE(MotherlessPaginatedIE):
+ _VALID_URL = r'https?://(?:www\.)?motherless\.com/G[VIG]?(?P<id>[A-F0-9]+)/?(?:$|[#?])'
+ _TESTS = [{
+ 'url': 'https://motherless.com/GV338999F',
+ 'info_dict': {
+ 'id': '338999F',
+ 'title': 'Random',
+ },
+ 'playlist_mincount': 171,
+ }, {
+ 'url': 'https://motherless.com/GVABD6213',
+ 'info_dict': {
+ 'id': 'ABD6213',
+ 'title': 'Cuties',
+ },
+ 'playlist_mincount': 2,
+ }, {
+ 'url': 'https://motherless.com/GVBCF7622',
+ 'info_dict': {
+ 'id': 'BCF7622',
+ 'title': 'Vintage',
+ },
+ 'playlist_count': 0,
+ }, {
+ 'url': 'https://motherless.com/G035DE2F',
+ 'info_dict': {
+ 'id': '035DE2F',
+ 'title': 'General',
+ },
+ 'playlist_mincount': 420,
+ }]
+
+ def _correct_path(self, url, item_id):
+ return urllib.parse.urljoin(url, f'/GV{item_id}')
+
+
+class MotherlessUploaderIE(MotherlessPaginatedIE):
+ _VALID_URL = r'https?://(?:www\.)?motherless\.com/u/(?P<id>\w+)/?(?:$|[?#])'
+ _TESTS = [{
+ 'url': 'https://motherless.com/u/Mrgo4hrs2023',
+ 'info_dict': {
+ 'id': 'Mrgo4hrs2023',
+ 'title': "Mrgo4hrs2023's Uploads - Videos",
+ },
+ 'playlist_mincount': 32,
+ }, {
+ 'url': 'https://motherless.com/u/Happy_couple?t=v',
+ 'info_dict': {
+ 'id': 'Happy_couple',
+ 'title': "Happy_couple's Uploads - Videos",
+ },
+ 'playlist_mincount': 8,
+ }]
+
+ _EXTRA_QUERY = {'t': 'v'}
+
+ def _correct_path(self, url, item_id):
+ return urllib.parse.urljoin(url, f'/u/{item_id}?t=v')
diff --git a/yt_dlp/extractor/motorsport.py b/yt_dlp/extractor/motorsport.py
new file mode 100644
index 0000000..167d85f
--- /dev/null
+++ b/yt_dlp/extractor/motorsport.py
@@ -0,0 +1,52 @@
+from .common import InfoExtractor
+from ..compat import (
+ compat_urlparse,
+)
+
+
+class MotorsportIE(InfoExtractor):
+ _WORKING = False
+ IE_DESC = 'motorsport.com'
+ _VALID_URL = r'https?://(?:www\.)?motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/?(?:$|[?#])'
+ _TEST = {
+ 'url': 'http://www.motorsport.com/f1/video/main-gallery/red-bull-racing-2014-rules-explained/',
+ 'info_dict': {
+ 'id': '2-T3WuR-KMM',
+ 'ext': 'mp4',
+ 'title': 'Red Bull Racing: 2014 Rules Explained',
+ 'duration': 208,
+ 'description': 'A new clip from Red Bull sees Daniel Ricciardo and Sebastian Vettel explain the 2014 Formula One regulations – which are arguably the most complex the sport has ever seen.',
+ 'uploader': 'mcomstaff',
+ 'uploader_id': 'UC334JIYKkVnyFoNCclfZtHQ',
+ 'upload_date': '20140903',
+ 'thumbnail': r're:^https?://.+\.jpg$'
+ },
+ 'add_ie': ['Youtube'],
+ 'params': {
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ iframe_path = self._html_search_regex(
+ r'<iframe id="player_iframe"[^>]+src="([^"]+)"', webpage, 'iframe path', default=None)
+
+ if iframe_path is None:
+ iframe_path = self._html_search_regex(
+ r'<iframe [^>]*\bsrc="(https://motorsport\.tv/embed/[^"]+)', webpage, 'embed iframe path')
+ return self.url_result(iframe_path)
+
+ iframe = self._download_webpage(
+ compat_urlparse.urljoin(url, iframe_path), display_id,
+ 'Downloading iframe')
+ youtube_id = self._search_regex(
+ r'www.youtube.com/embed/(.{11})', iframe, 'youtube id')
+
+ return {
+ '_type': 'url_transparent',
+ 'display_id': display_id,
+ 'url': 'https://youtube.com/watch?v=%s' % youtube_id,
+ }
diff --git a/yt_dlp/extractor/moviepilot.py b/yt_dlp/extractor/moviepilot.py
new file mode 100644
index 0000000..668c098
--- /dev/null
+++ b/yt_dlp/extractor/moviepilot.py
@@ -0,0 +1,97 @@
+from .dailymotion import DailymotionIE
+from .common import InfoExtractor
+
+
+class MoviepilotIE(InfoExtractor):
+ _IE_NAME = 'moviepilot'
+ _IE_DESC = 'Moviepilot trailer'
+ _VALID_URL = r'https?://(?:www\.)?moviepilot\.de/movies/(?P<id>[^/]+)'
+
+ _TESTS = [{
+ 'url': 'https://www.moviepilot.de/movies/interstellar-2/',
+ 'info_dict': {
+ 'id': 'x7xdpkk',
+ 'display_id': 'interstellar-2',
+ 'ext': 'mp4',
+ 'title': 'Interstellar',
+ 'thumbnail': r're:https://\w+\.dmcdn\.net/v/SaV-q1ZganMw4HVXg/x1080',
+ 'timestamp': 1605010596,
+ 'description': 'md5:0ae9cb452af52610c9ffc60f2fd0474c',
+ 'uploader': 'Moviepilot',
+ 'like_count': int,
+ 'view_count': int,
+ 'uploader_id': 'x6nd9k',
+ 'upload_date': '20201110',
+ 'duration': 97,
+ 'age_limit': 0,
+ 'tags': ['Alle Trailer', 'Movie', 'Verleih'],
+ },
+ }, {
+ 'url': 'https://www.moviepilot.de/movies/interstellar-2/trailer',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.moviepilot.de/movies/interstellar-2/kinoprogramm/berlin',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.moviepilot.de/movies/queen-slim/trailer',
+ 'info_dict': {
+ 'id': 'x7xj6o7',
+ 'display_id': 'queen-slim',
+ 'title': 'Queen & Slim',
+ 'ext': 'mp4',
+ 'thumbnail': r're:https://\w+\.dmcdn\.net/v/SbUM71ZeG2N975lf2/x1080',
+ 'timestamp': 1605555825,
+ 'description': 'md5:83228bb86f5367dd181447fdc4873989',
+ 'uploader': 'Moviepilot',
+ 'like_count': int,
+ 'view_count': int,
+ 'uploader_id': 'x6nd9k',
+ 'upload_date': '20201116',
+ 'duration': 138,
+ 'age_limit': 0,
+ 'tags': ['Movie', 'Verleih', 'Neue Trailer'],
+ },
+ }, {
+ 'url': 'https://www.moviepilot.de/movies/der-geiger-von-florenz/trailer',
+ 'info_dict': {
+ 'id': 'der-geiger-von-florenz',
+ 'title': 'Der Geiger von Florenz',
+ 'ext': 'mp4',
+ },
+ 'skip': 'No trailer for this movie.',
+ }, {
+ 'url': 'https://www.moviepilot.de/movies/muellers-buero/',
+ 'info_dict': {
+ 'id': 'x7xcw1i',
+ 'display_id': 'muellers-buero',
+ 'title': 'Müllers Büro',
+ 'ext': 'mp4',
+ 'description': 'md5:4d23a8f4ca035196cd4523863c4fe5a4',
+ 'timestamp': 1604958457,
+ 'age_limit': 0,
+ 'duration': 82,
+ 'upload_date': '20201109',
+ 'thumbnail': r're:https://\w+\.dmcdn\.net/v/SaMes1Zg3lxLv9j5u/x1080',
+ 'uploader': 'Moviepilot',
+ 'like_count': int,
+ 'view_count': int,
+ 'tags': ['Alle Trailer', 'Movie', 'Verleih'],
+ 'uploader_id': 'x6nd9k',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(f'https://www.moviepilot.de/movies/{video_id}/trailer', video_id)
+
+ clip = self._search_nextjs_data(webpage, video_id)['props']['initialProps']['pageProps']
+
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': DailymotionIE.ie_key(),
+ 'display_id': video_id,
+ 'title': clip.get('title'),
+ 'url': f'https://www.dailymotion.com/video/{clip["videoRemoteId"]}',
+ 'description': clip.get('summary'),
+ }
diff --git a/yt_dlp/extractor/moview.py b/yt_dlp/extractor/moview.py
new file mode 100644
index 0000000..678b2eb
--- /dev/null
+++ b/yt_dlp/extractor/moview.py
@@ -0,0 +1,43 @@
+from .jixie import JixieBaseIE
+
+
+class MoviewPlayIE(JixieBaseIE):
+ _VALID_URL = r'https?://www\.moview\.id/play/\d+/(?P<id>[\w-]+)'
+ _TESTS = [
+ {
+ # drm hls, only use direct link
+ 'url': 'https://www.moview.id/play/174/Candy-Monster',
+ 'info_dict': {
+ 'id': '146182',
+ 'ext': 'mp4',
+ 'display_id': 'Candy-Monster',
+ 'uploader_id': 'Mo165qXUUf',
+ 'duration': 528.2,
+ 'title': 'Candy Monster',
+ 'description': 'Mengapa Candy Monster ingin mengambil permen Chloe?',
+ 'thumbnail': 'https://video.jixie.media/1034/146182/146182_1280x720.jpg',
+ }
+ }, {
+ # non-drm hls
+ 'url': 'https://www.moview.id/play/75/Paris-Van-Java-Episode-16',
+ 'info_dict': {
+ 'id': '28210',
+ 'ext': 'mp4',
+ 'duration': 2595.666667,
+ 'display_id': 'Paris-Van-Java-Episode-16',
+ 'uploader_id': 'Mo165qXUUf',
+ 'thumbnail': 'https://video.jixie.media/1003/28210/28210_1280x720.jpg',
+ 'description': 'md5:2a5e18d98eef9b39d7895029cac96c63',
+ 'title': 'Paris Van Java Episode 16',
+ }
+ }
+ ]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ video_id = self._search_regex(
+ r'video_id\s*=\s*"(?P<video_id>[^"]+)', webpage, 'video_id')
+
+ return self._extract_data_from_jixie_id(display_id, video_id, webpage)
diff --git a/yt_dlp/extractor/moviezine.py b/yt_dlp/extractor/moviezine.py
new file mode 100644
index 0000000..cffcdcf
--- /dev/null
+++ b/yt_dlp/extractor/moviezine.py
@@ -0,0 +1,38 @@
+from .common import InfoExtractor
+
+
+class MoviezineIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?moviezine\.se/video/(?P<id>[^?#]+)'
+
+ _TEST = {
+ 'url': 'http://www.moviezine.se/video/205866',
+ 'info_dict': {
+ 'id': '205866',
+ 'ext': 'mp4',
+ 'title': 'Oculus - Trailer 1',
+ 'description': 'md5:40cc6790fc81d931850ca9249b40e8a4',
+ 'thumbnail': r're:http://.*\.jpg',
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, video_id)
+ jsplayer = self._download_webpage('http://www.moviezine.se/api/player.js?video=%s' % video_id, video_id, 'Downloading js api player')
+
+ formats = [{
+ 'format_id': 'sd',
+ 'url': self._html_search_regex(r'file: "(.+?)",', jsplayer, 'file'),
+ 'quality': 0,
+ 'ext': 'mp4',
+ }]
+
+ return {
+ 'id': video_id,
+ 'title': self._search_regex(r'title: "(.+?)",', jsplayer, 'title'),
+ 'thumbnail': self._search_regex(r'image: "(.+?)",', jsplayer, 'image'),
+ 'formats': formats,
+ 'description': self._og_search_description(webpage),
+ }
diff --git a/yt_dlp/extractor/movingimage.py b/yt_dlp/extractor/movingimage.py
new file mode 100644
index 0000000..cdd8ba4
--- /dev/null
+++ b/yt_dlp/extractor/movingimage.py
@@ -0,0 +1,50 @@
+from .common import InfoExtractor
+from ..utils import (
+ unescapeHTML,
+ parse_duration,
+)
+
+
+class MovingImageIE(InfoExtractor):
+ _VALID_URL = r'https?://movingimage\.nls\.uk/film/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://movingimage.nls.uk/film/3561',
+ 'md5': '4caa05c2b38453e6f862197571a7be2f',
+ 'info_dict': {
+ 'id': '3561',
+ 'ext': 'mp4',
+ 'title': 'SHETLAND WOOL',
+ 'description': 'md5:c5afca6871ad59b4271e7704fe50ab04',
+ 'duration': 900,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ formats = self._extract_m3u8_formats(
+ self._html_search_regex(r'file\s*:\s*"([^"]+)"', webpage, 'm3u8 manifest URL'),
+ video_id, ext='mp4', entry_protocol='m3u8_native')
+
+ def search_field(field_name, fatal=False):
+ return self._search_regex(
+ r'<span\s+class="field_title">%s:</span>\s*<span\s+class="field_content">([^<]+)</span>' % field_name,
+ webpage, 'title', fatal=fatal)
+
+ title = unescapeHTML(search_field('Title', fatal=True)).strip('()[]')
+ description = unescapeHTML(search_field('Description'))
+ duration = parse_duration(search_field('Running time'))
+ thumbnail = self._search_regex(
+ r"image\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False)
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'thumbnail': thumbnail,
+ }
diff --git a/yt_dlp/extractor/msn.py b/yt_dlp/extractor/msn.py
new file mode 100644
index 0000000..77d1806
--- /dev/null
+++ b/yt_dlp/extractor/msn.py
@@ -0,0 +1,168 @@
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ unescapeHTML,
+)
+
+
+class MSNIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:(?:www|preview)\.)?msn\.com/(?:[^/]+/)+(?P<display_id>[^/]+)/[a-z]{2}-(?P<id>[\da-zA-Z]+)'
+ _TESTS = [{
+ 'url': 'https://www.msn.com/en-in/money/video/7-ways-to-get-rid-of-chest-congestion/vi-BBPxU6d',
+ 'md5': '087548191d273c5c55d05028f8d2cbcd',
+ 'info_dict': {
+ 'id': 'BBPxU6d',
+ 'display_id': '7-ways-to-get-rid-of-chest-congestion',
+ 'ext': 'mp4',
+ 'title': 'Seven ways to get rid of chest congestion',
+ 'description': '7 Ways to Get Rid of Chest Congestion',
+ 'duration': 88,
+ 'uploader': 'Health',
+ 'uploader_id': 'BBPrMqa',
+ },
+ }, {
+ # Article, multiple Dailymotion Embeds
+ 'url': 'https://www.msn.com/en-in/money/sports/hottest-football-wags-greatest-footballers-turned-managers-and-more/ar-BBpc7Nl',
+ 'info_dict': {
+ 'id': 'BBpc7Nl',
+ },
+ 'playlist_mincount': 4,
+ }, {
+ 'url': 'http://www.msn.com/en-ae/news/offbeat/meet-the-nine-year-old-self-made-millionaire/ar-BBt6ZKf',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.msn.com/en-ae/video/watch/obama-a-lot-of-people-will-be-disappointed/vi-AAhxUMH',
+ 'only_matching': True,
+ }, {
+ # geo restricted
+ 'url': 'http://www.msn.com/en-ae/foodanddrink/joinourtable/the-first-fart-makes-you-laugh-the-last-fart-makes-you-cry/vp-AAhzIBU',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.msn.com/en-ae/entertainment/bollywood/watch-how-salman-khan-reacted-when-asked-if-he-would-apologize-for-his-‘raped-woman’-comment/vi-AAhvzW6',
+ 'only_matching': True,
+ }, {
+ # Vidible(AOL) Embed
+ 'url': 'https://www.msn.com/en-us/money/other/jupiter-is-about-to-come-so-close-you-can-see-its-moons-with-binoculars/vi-AACqsHR',
+ 'only_matching': True,
+ }, {
+ # Dailymotion Embed
+ 'url': 'https://www.msn.com/es-ve/entretenimiento/watch/winston-salem-paire-refait-des-siennes-en-perdant-sa-raquette-au-service/vp-AAG704L',
+ 'only_matching': True,
+ }, {
+ # YouTube Embed
+ 'url': 'https://www.msn.com/en-in/money/news/meet-vikram-%E2%80%94-chandrayaan-2s-lander/vi-AAGUr0v',
+ 'only_matching': True,
+ }, {
+ # NBCSports Embed
+ 'url': 'https://www.msn.com/en-us/money/football_nfl/week-13-preview-redskins-vs-panthers/vi-BBXsCDb',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id, page_id = self._match_valid_url(url).groups()
+
+ webpage = self._download_webpage(url, display_id)
+
+ entries = []
+ for _, metadata in re.findall(r'data-metadata\s*=\s*(["\'])(?P<data>.+?)\1', webpage):
+ video = self._parse_json(unescapeHTML(metadata), display_id)
+
+ provider_id = video.get('providerId')
+ player_name = video.get('playerName')
+ if player_name and provider_id:
+ entry = None
+ if player_name == 'AOL':
+ if provider_id.startswith('http'):
+ provider_id = self._search_regex(
+ r'https?://delivery\.vidible\.tv/video/redirect/([0-9a-f]{24})',
+ provider_id, 'vidible id')
+ entry = self.url_result(
+ 'aol-video:' + provider_id, 'Aol', provider_id)
+ elif player_name == 'Dailymotion':
+ entry = self.url_result(
+ 'https://www.dailymotion.com/video/' + provider_id,
+ 'Dailymotion', provider_id)
+ elif player_name == 'YouTube':
+ entry = self.url_result(
+ provider_id, 'Youtube', provider_id)
+ elif player_name == 'NBCSports':
+ entry = self.url_result(
+ 'http://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/media/' + provider_id,
+ 'NBCSportsVPlayer', provider_id)
+ if entry:
+ entries.append(entry)
+ continue
+
+ video_id = video['uuid']
+ title = video['title']
+
+ formats = []
+ for file_ in video.get('videoFiles', []):
+ format_url = file_.get('url')
+ if not format_url:
+ continue
+ if 'format=m3u8-aapl' in format_url:
+ # m3u8_native should not be used here until
+ # https://github.com/ytdl-org/youtube-dl/issues/9913 is fixed
+ formats.extend(self._extract_m3u8_formats(
+ format_url, display_id, 'mp4',
+ m3u8_id='hls', fatal=False))
+ elif 'format=mpd-time-csf' in format_url:
+ formats.extend(self._extract_mpd_formats(
+ format_url, display_id, 'dash', fatal=False))
+ elif '.ism' in format_url:
+ if format_url.endswith('.ism'):
+ format_url += '/manifest'
+ formats.extend(self._extract_ism_formats(
+ format_url, display_id, 'mss', fatal=False))
+ else:
+ format_id = file_.get('formatCode')
+ formats.append({
+ 'url': format_url,
+ 'ext': 'mp4',
+ 'format_id': format_id,
+ 'width': int_or_none(file_.get('width')),
+ 'height': int_or_none(file_.get('height')),
+ 'vbr': int_or_none(self._search_regex(r'_(\d+)\.mp4', format_url, 'vbr', default=None)),
+ 'quality': 1 if format_id == '1001' else None,
+ })
+
+ subtitles = {}
+ for file_ in video.get('files', []):
+ format_url = file_.get('url')
+ format_code = file_.get('formatCode')
+ if not format_url or not format_code:
+ continue
+ if compat_str(format_code) == '3100':
+ subtitles.setdefault(file_.get('culture', 'en'), []).append({
+ 'ext': determine_ext(format_url, 'ttml'),
+ 'url': format_url,
+ })
+
+ entries.append({
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': video.get('description'),
+ 'thumbnail': video.get('headlineImage', {}).get('url'),
+ 'duration': int_or_none(video.get('durationSecs')),
+ 'uploader': video.get('sourceFriendly'),
+ 'uploader_id': video.get('providerId'),
+ 'creator': video.get('creator'),
+ 'subtitles': subtitles,
+ 'formats': formats,
+ })
+
+ if not entries:
+ error = unescapeHTML(self._search_regex(
+ r'data-error=(["\'])(?P<error>.+?)\1',
+ webpage, 'error', group='error'))
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
+
+ return self.playlist_result(entries, page_id)
diff --git a/yt_dlp/extractor/mtv.py b/yt_dlp/extractor/mtv.py
new file mode 100644
index 0000000..404e431
--- /dev/null
+++ b/yt_dlp/extractor/mtv.py
@@ -0,0 +1,654 @@
+import re
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..networking import HEADRequest, Request
+from ..utils import (
+ ExtractorError,
+ RegexNotFoundError,
+ find_xpath_attr,
+ fix_xml_ampersands,
+ float_or_none,
+ int_or_none,
+ join_nonempty,
+ strip_or_none,
+ timeconvert,
+ try_get,
+ unescapeHTML,
+ update_url_query,
+ url_basename,
+ xpath_text,
+)
+
+
+def _media_xml_tag(tag):
+ return '{http://search.yahoo.com/mrss/}%s' % tag
+
+
+class MTVServicesInfoExtractor(InfoExtractor):
+ _MOBILE_TEMPLATE = None
+ _LANG = None
+
+ @staticmethod
+ def _id_from_uri(uri):
+ return uri.split(':')[-1]
+
+ @staticmethod
+ def _remove_template_parameter(url):
+ # Remove the templates, like &device={device}
+ return re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', url)
+
+ def _get_feed_url(self, uri, url=None):
+ return self._FEED_URL
+
+ def _get_thumbnail_url(self, uri, itemdoc):
+ search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
+ thumb_node = itemdoc.find(search_path)
+ if thumb_node is None:
+ return None
+ return thumb_node.get('url') or thumb_node.text or None
+
+ def _extract_mobile_video_formats(self, mtvn_id):
+ webpage_url = self._MOBILE_TEMPLATE % mtvn_id
+ req = Request(webpage_url)
+ # Otherwise we get a webpage that would execute some javascript
+ req.headers['User-Agent'] = 'curl/7'
+ webpage = self._download_webpage(req, mtvn_id,
+ 'Downloading mobile page')
+ metrics_url = unescapeHTML(self._search_regex(r'<a href="(http://metrics.+?)"', webpage, 'url'))
+ req = HEADRequest(metrics_url)
+ response = self._request_webpage(req, mtvn_id, 'Resolving url')
+ url = response.url
+ # Transform the url to get the best quality:
+ url = re.sub(r'.+pxE=mp4', 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=0+_pxK=18639+_pxE=mp4', url, 1)
+ return [{'url': url, 'ext': 'mp4'}]
+
+ def _extract_video_formats(self, mdoc, mtvn_id, video_id):
+ if re.match(r'.*/(error_country_block\.swf|geoblock\.mp4|copyright_error\.flv(?:\?geo\b.+?)?)$', mdoc.find('.//src').text) is not None:
+ if mtvn_id is not None and self._MOBILE_TEMPLATE is not None:
+ self.to_screen('The normal version is not available from your '
+ 'country, trying with the mobile version')
+ return self._extract_mobile_video_formats(mtvn_id)
+ raise ExtractorError('This video is not available from your country.',
+ expected=True)
+
+ formats = []
+ for rendition in mdoc.findall('.//rendition'):
+ if rendition.get('method') == 'hls':
+ hls_url = rendition.find('./src').text
+ formats.extend(self._extract_m3u8_formats(
+ hls_url, video_id, ext='mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ # fms
+ try:
+ _, _, ext = rendition.attrib['type'].partition('/')
+ rtmp_video_url = rendition.find('./src').text
+ if 'error_not_available.swf' in rtmp_video_url:
+ raise ExtractorError(
+ '%s said: video is not available' % self.IE_NAME,
+ expected=True)
+ if rtmp_video_url.endswith('siteunavail.png'):
+ continue
+ formats.extend([{
+ 'ext': 'flv' if rtmp_video_url.startswith('rtmp') else ext,
+ 'url': rtmp_video_url,
+ 'format_id': join_nonempty(
+ 'rtmp' if rtmp_video_url.startswith('rtmp') else None,
+ rendition.get('bitrate')),
+ 'width': int(rendition.get('width')),
+ 'height': int(rendition.get('height')),
+ }])
+ except (KeyError, TypeError):
+ raise ExtractorError('Invalid rendition field.')
+ return formats
+
+ def _extract_subtitles(self, mdoc, mtvn_id):
+ subtitles = {}
+ for transcript in mdoc.findall('.//transcript'):
+ if transcript.get('kind') != 'captions':
+ continue
+ lang = transcript.get('srclang')
+ for typographic in transcript.findall('./typographic'):
+ sub_src = typographic.get('src')
+ if not sub_src:
+ continue
+ ext = typographic.get('format')
+ if ext == 'cea-608':
+ ext = 'scc'
+ subtitles.setdefault(lang, []).append({
+ 'url': compat_str(sub_src),
+ 'ext': ext
+ })
+ return subtitles
+
+ def _get_video_info(self, itemdoc, use_hls=True):
+ uri = itemdoc.find('guid').text
+ video_id = self._id_from_uri(uri)
+ self.report_extraction(video_id)
+ content_el = itemdoc.find('%s/%s' % (_media_xml_tag('group'), _media_xml_tag('content')))
+ mediagen_url = self._remove_template_parameter(content_el.attrib['url'])
+ mediagen_url = mediagen_url.replace('device={device}', '')
+ if 'acceptMethods' not in mediagen_url:
+ mediagen_url += '&' if '?' in mediagen_url else '?'
+ mediagen_url += 'acceptMethods='
+ mediagen_url += 'hls' if use_hls else 'fms'
+
+ mediagen_doc = self._download_xml(
+ mediagen_url, video_id, 'Downloading video urls', fatal=False)
+
+ if not isinstance(mediagen_doc, xml.etree.ElementTree.Element):
+ return None
+
+ item = mediagen_doc.find('./video/item')
+ if item is not None and item.get('type') == 'text':
+ message = '%s returned error: ' % self.IE_NAME
+ if item.get('code') is not None:
+ message += '%s - ' % item.get('code')
+ message += item.text
+ raise ExtractorError(message, expected=True)
+
+ description = strip_or_none(xpath_text(itemdoc, 'description'))
+
+ timestamp = timeconvert(xpath_text(itemdoc, 'pubDate'))
+
+ title_el = None
+ if title_el is None:
+ title_el = find_xpath_attr(
+ itemdoc, './/{http://search.yahoo.com/mrss/}category',
+ 'scheme', 'urn:mtvn:video_title')
+ if title_el is None:
+ title_el = itemdoc.find('.//{http://search.yahoo.com/mrss/}title')
+ if title_el is None:
+ title_el = itemdoc.find('.//title')
+ if title_el.text is None:
+ title_el = None
+
+ title = title_el.text
+ if title is None:
+ raise ExtractorError('Could not find video title')
+ title = title.strip()
+
+ series = find_xpath_attr(
+ itemdoc, './/{http://search.yahoo.com/mrss/}category',
+ 'scheme', 'urn:mtvn:franchise')
+ season = find_xpath_attr(
+ itemdoc, './/{http://search.yahoo.com/mrss/}category',
+ 'scheme', 'urn:mtvn:seasonN')
+ episode = find_xpath_attr(
+ itemdoc, './/{http://search.yahoo.com/mrss/}category',
+ 'scheme', 'urn:mtvn:episodeN')
+ series = series.text if series is not None else None
+ season = season.text if season is not None else None
+ episode = episode.text if episode is not None else None
+ if season and episode:
+ # episode number includes season, so remove it
+ episode = re.sub(r'^%s' % season, '', episode)
+
+ # This a short id that's used in the webpage urls
+ mtvn_id = None
+ mtvn_id_node = find_xpath_attr(itemdoc, './/{http://search.yahoo.com/mrss/}category',
+ 'scheme', 'urn:mtvn:id')
+ if mtvn_id_node is not None:
+ mtvn_id = mtvn_id_node.text
+
+ formats = self._extract_video_formats(mediagen_doc, mtvn_id, video_id)
+
+ # Some parts of complete video may be missing (e.g. missing Act 3 in
+ # http://www.southpark.de/alle-episoden/s14e01-sexual-healing)
+ if not formats:
+ return None
+
+ return {
+ 'title': title,
+ 'formats': formats,
+ 'subtitles': self._extract_subtitles(mediagen_doc, mtvn_id),
+ 'id': video_id,
+ 'thumbnail': self._get_thumbnail_url(uri, itemdoc),
+ 'description': description,
+ 'duration': float_or_none(content_el.attrib.get('duration')),
+ 'timestamp': timestamp,
+ 'series': series,
+ 'season_number': int_or_none(season),
+ 'episode_number': int_or_none(episode),
+ }
+
+ def _get_feed_query(self, uri):
+ data = {'uri': uri}
+ if self._LANG:
+ data['lang'] = self._LANG
+ return data
+
+ def _get_videos_info(self, uri, use_hls=True, url=None):
+ video_id = self._id_from_uri(uri)
+ feed_url = self._get_feed_url(uri, url)
+ info_url = update_url_query(feed_url, self._get_feed_query(uri))
+ return self._get_videos_info_from_url(info_url, video_id, use_hls)
+
+ def _get_videos_info_from_url(self, url, video_id, use_hls=True):
+ idoc = self._download_xml(
+ url, video_id,
+ 'Downloading info', transform_source=fix_xml_ampersands)
+
+ title = xpath_text(idoc, './channel/title')
+ description = xpath_text(idoc, './channel/description')
+
+ entries = []
+ for item in idoc.findall('.//item'):
+ info = self._get_video_info(item, use_hls)
+ if info:
+ entries.append(info)
+
+ # TODO: should be multi-video
+ return self.playlist_result(
+ entries, playlist_title=title, playlist_description=description)
+
+ def _extract_triforce_mgid(self, webpage, data_zone=None, video_id=None):
+ triforce_feed = self._parse_json(self._search_regex(
+ r'triforceManifestFeed\s*=\s*({.+?})\s*;\s*\n', webpage,
+ 'triforce feed', default='{}'), video_id, fatal=False)
+
+ data_zone = self._search_regex(
+ r'data-zone=(["\'])(?P<zone>.+?_lc_promo.*?)\1', webpage,
+ 'data zone', default=data_zone, group='zone')
+
+ feed_url = try_get(
+ triforce_feed, lambda x: x['manifest']['zones'][data_zone]['feed'],
+ compat_str)
+ if not feed_url:
+ return
+
+ feed = self._download_json(feed_url, video_id, fatal=False)
+ if not feed:
+ return
+
+ return try_get(feed, lambda x: x['result']['data']['id'], compat_str)
+
+ @staticmethod
+ def _extract_child_with_type(parent, t):
+ for c in parent['children']:
+ if c.get('type') == t:
+ return c
+
+ def _extract_mgid(self, webpage):
+ try:
+ # the url can be http://media.mtvnservices.com/fb/{mgid}.swf
+ # or http://media.mtvnservices.com/{mgid}
+ og_url = self._og_search_video_url(webpage)
+ mgid = url_basename(og_url)
+ if mgid.endswith('.swf'):
+ mgid = mgid[:-4]
+ except RegexNotFoundError:
+ mgid = None
+
+ if mgid is None or ':' not in mgid:
+ mgid = self._search_regex(
+ [r'data-mgid="(.*?)"', r'swfobject\.embedSWF\(".*?(mgid:.*?)"'],
+ webpage, 'mgid', default=None)
+
+ if not mgid:
+ sm4_embed = self._html_search_meta(
+ 'sm4:video:embed', webpage, 'sm4 embed', default='')
+ mgid = self._search_regex(
+ r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid', default=None)
+
+ if not mgid:
+ mgid = self._extract_triforce_mgid(webpage)
+
+ if not mgid:
+ data = self._parse_json(self._search_regex(
+ r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None)
+ main_container = self._extract_child_with_type(data, 'MainContainer')
+ ab_testing = self._extract_child_with_type(main_container, 'ABTesting')
+ video_player = self._extract_child_with_type(ab_testing or main_container, 'VideoPlayer')
+ if video_player:
+ mgid = try_get(video_player, lambda x: x['props']['media']['video']['config']['uri'])
+ else:
+ flex_wrapper = self._extract_child_with_type(ab_testing or main_container, 'FlexWrapper')
+ auth_suite_wrapper = self._extract_child_with_type(flex_wrapper, 'AuthSuiteWrapper')
+ player = self._extract_child_with_type(auth_suite_wrapper or flex_wrapper, 'Player')
+ if player:
+ mgid = try_get(player, lambda x: x['props']['videoDetail']['mgid'])
+
+ if not mgid:
+ raise ExtractorError('Could not extract mgid')
+
+ return mgid
+
+ def _real_extract(self, url):
+ title = url_basename(url)
+ webpage = self._download_webpage(url, title)
+ mgid = self._extract_mgid(webpage)
+ videos_info = self._get_videos_info(mgid, url=url)
+ return videos_info
+
+
+class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
+ IE_NAME = 'mtvservices:embedded'
+ _VALID_URL = r'https?://media\.mtvnservices\.com/embed/(?P<mgid>.+?)(\?|/|$)'
+ _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//media\.mtvnservices\.com/embed/.+?)\1']
+
+ _TEST = {
+ # From http://www.thewrap.com/peter-dinklage-sums-up-game-of-thrones-in-45-seconds-video/
+ 'url': 'http://media.mtvnservices.com/embed/mgid:uma:video:mtv.com:1043906/cp~vid%3D1043906%26uri%3Dmgid%3Auma%3Avideo%3Amtv.com%3A1043906',
+ 'md5': 'cb349b21a7897164cede95bd7bf3fbb9',
+ 'info_dict': {
+ 'id': '1043906',
+ 'ext': 'mp4',
+ 'title': 'Peter Dinklage Sums Up \'Game Of Thrones\' In 45 Seconds',
+ 'description': '"Sexy sexy sexy, stabby stabby stabby, beautiful language," says Peter Dinklage as he tries summarizing "Game of Thrones" in under a minute.',
+ 'timestamp': 1400126400,
+ 'upload_date': '20140515',
+ },
+ }
+
+ def _get_feed_url(self, uri, url=None):
+ video_id = self._id_from_uri(uri)
+ config = self._download_json(
+ 'http://media.mtvnservices.com/pmt/e1/access/index.html?uri=%s&configtype=edge' % uri, video_id)
+ return self._remove_template_parameter(config['feedWithQueryParams'])
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ mgid = mobj.group('mgid')
+ return self._get_videos_info(mgid)
+
+
+class MTVIE(MTVServicesInfoExtractor):
+ IE_NAME = 'mtv'
+ _VALID_URL = r'https?://(?:www\.)?mtv\.com/(?:video-clips|(?:full-)?episodes)/(?P<id>[^/?#.]+)'
+ _FEED_URL = 'http://www.mtv.com/feeds/mrss/'
+
+ _TESTS = [{
+ 'url': 'http://www.mtv.com/video-clips/vl8qof/unlocking-the-truth-trailer',
+ 'md5': '1edbcdf1e7628e414a8c5dcebca3d32b',
+ 'info_dict': {
+ 'id': '5e14040d-18a4-47c4-a582-43ff602de88e',
+ 'ext': 'mp4',
+ 'title': 'Unlocking The Truth|July 18, 2016|1|101|Trailer',
+ 'description': '"Unlocking the Truth" premieres August 17th at 11/10c.',
+ 'timestamp': 1468846800,
+ 'upload_date': '20160718',
+ },
+ }, {
+ 'url': 'http://www.mtv.com/full-episodes/94tujl/unlocking-the-truth-gates-of-hell-season-1-ep-101',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.mtv.com/episodes/g8xu7q/teen-mom-2-breaking-the-wall-season-7-ep-713',
+ 'only_matching': True,
+ }]
+
+
+class MTVJapanIE(MTVServicesInfoExtractor):
+ IE_NAME = 'mtvjapan'
+ _VALID_URL = r'https?://(?:www\.)?mtvjapan\.com/videos/(?P<id>[0-9a-z]+)'
+
+ _TEST = {
+ 'url': 'http://www.mtvjapan.com/videos/prayht/fresh-info-cadillac-escalade',
+ 'info_dict': {
+ 'id': 'bc01da03-6fe5-4284-8880-f291f4e368f5',
+ 'ext': 'mp4',
+ 'title': '【Fresh Info】Cadillac ESCALADE Sport Edition',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }
+ _GEO_COUNTRIES = ['JP']
+ _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
+
+ def _get_feed_query(self, uri):
+ return {
+ 'arcEp': 'mtvjapan.com',
+ 'mgid': uri,
+ }
+
+
+class MTVVideoIE(MTVServicesInfoExtractor):
+ IE_NAME = 'mtv:video'
+ _VALID_URL = r'''(?x)^https?://
+ (?:(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$|
+ m\.mtv\.com/videos/video\.rbml\?.*?id=(?P<mgid>[^&]+))'''
+
+ _FEED_URL = 'http://www.mtv.com/player/embed/AS3/rss/'
+
+ _TESTS = [
+ {
+ 'url': 'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml',
+ 'md5': '850f3f143316b1e71fa56a4edfd6e0f8',
+ 'info_dict': {
+ 'id': '853555',
+ 'ext': 'mp4',
+ 'title': 'Taylor Swift - "Ours (VH1 Storytellers)"',
+ 'description': 'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.',
+ 'timestamp': 1352610000,
+ 'upload_date': '20121111',
+ },
+ },
+ ]
+
+ def _get_thumbnail_url(self, uri, itemdoc):
+ return 'http://mtv.mtvnimages.com/uri/' + uri
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('videoid')
+ uri = mobj.groupdict().get('mgid')
+ if uri is None:
+ webpage = self._download_webpage(url, video_id)
+
+ # Some videos come from Vevo.com
+ m_vevo = re.search(
+ r'(?s)isVevoVideo = true;.*?vevoVideoId = "(.*?)";', webpage)
+ if m_vevo:
+ vevo_id = m_vevo.group(1)
+ self.to_screen('Vevo video detected: %s' % vevo_id)
+ return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
+
+ uri = self._html_search_regex(r'/uri/(.*?)\?', webpage, 'uri')
+ return self._get_videos_info(uri)
+
+
+class MTVDEIE(MTVServicesInfoExtractor):
+ _WORKING = False
+ IE_NAME = 'mtv.de'
+ _VALID_URL = r'https?://(?:www\.)?mtv\.de/(?:musik/videoclips|folgen|news)/(?P<id>[0-9a-z]+)'
+ _TESTS = [{
+ 'url': 'http://www.mtv.de/musik/videoclips/2gpnv7/Traum',
+ 'info_dict': {
+ 'id': 'd5d472bc-f5b7-11e5-bffd-a4badb20dab5',
+ 'ext': 'mp4',
+ 'title': 'Traum',
+ 'description': 'Traum',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'Blocked at Travis CI',
+ }, {
+ # mediagen URL without query (e.g. http://videos.mtvnn.com/mediagen/e865da714c166d18d6f80893195fcb97)
+ 'url': 'http://www.mtv.de/folgen/6b1ylu/teen-mom-2-enthuellungen-S5-F1',
+ 'info_dict': {
+ 'id': '1e5a878b-31c5-11e7-a442-0e40cf2fc285',
+ 'ext': 'mp4',
+ 'title': 'Teen Mom 2',
+ 'description': 'md5:dc65e357ef7e1085ed53e9e9d83146a7',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'Blocked at Travis CI',
+ }, {
+ 'url': 'http://www.mtv.de/news/glolix/77491-mtv-movies-spotlight--pixels--teil-3',
+ 'info_dict': {
+ 'id': 'local_playlist-4e760566473c4c8c5344',
+ 'ext': 'mp4',
+ 'title': 'Article_mtv-movies-spotlight-pixels-teil-3_short-clips_part1',
+ 'description': 'MTV Movies Supercut',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'Das Video kann zur Zeit nicht abgespielt werden.',
+ }]
+ _GEO_COUNTRIES = ['DE']
+ _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
+
+ def _get_feed_query(self, uri):
+ return {
+ 'arcEp': 'mtv.de',
+ 'mgid': uri,
+ }
+
+
+class MTVItaliaIE(MTVServicesInfoExtractor):
+ IE_NAME = 'mtv.it'
+ _VALID_URL = r'https?://(?:www\.)?mtv\.it/(?:episodi|video|musica)/(?P<id>[0-9a-z]+)'
+ _TESTS = [{
+ 'url': 'http://www.mtv.it/episodi/24bqab/mario-una-serie-di-maccio-capatonda-cavoli-amario-episodio-completo-S1-E1',
+ 'info_dict': {
+ 'id': '0f0fc78e-45fc-4cce-8f24-971c25477530',
+ 'ext': 'mp4',
+ 'title': 'Cavoli amario (episodio completo)',
+ 'description': 'md5:4962bccea8fed5b7c03b295ae1340660',
+ 'series': 'Mario - Una Serie Di Maccio Capatonda',
+ 'season_number': 1,
+ 'episode_number': 1,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+ _GEO_COUNTRIES = ['IT']
+ _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
+
+ def _get_feed_query(self, uri):
+ return {
+ 'arcEp': 'mtv.it',
+ 'mgid': uri,
+ }
+
+
+class MTVItaliaProgrammaIE(MTVItaliaIE): # XXX: Do not subclass from concrete IE
+ IE_NAME = 'mtv.it:programma'
+ _VALID_URL = r'https?://(?:www\.)?mtv\.it/(?:programmi|playlist)/(?P<id>[0-9a-z]+)'
+ _TESTS = [{
+ # program page: general
+ 'url': 'http://www.mtv.it/programmi/s2rppv/mario-una-serie-di-maccio-capatonda',
+ 'info_dict': {
+ 'id': 'a6f155bc-8220-4640-aa43-9b95f64ffa3d',
+ 'title': 'Mario - Una Serie Di Maccio Capatonda',
+ 'description': 'md5:72fbffe1f77ccf4e90757dd4e3216153',
+ },
+ 'playlist_count': 2,
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # program page: specific season
+ 'url': 'http://www.mtv.it/programmi/d9ncjf/mario-una-serie-di-maccio-capatonda-S2',
+ 'info_dict': {
+ 'id': '4deeb5d8-f272-490c-bde2-ff8d261c6dd1',
+ 'title': 'Mario - Una Serie Di Maccio Capatonda - Stagione 2',
+ },
+ 'playlist_count': 34,
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # playlist page + redirect
+ 'url': 'http://www.mtv.it/playlist/sexy-videos/ilctal',
+ 'info_dict': {
+ 'id': 'dee8f9ee-756d-493b-bf37-16d1d2783359',
+ 'title': 'Sexy Videos',
+ },
+ 'playlist_mincount': 145,
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+ _GEO_COUNTRIES = ['IT']
+ _FEED_URL = 'http://www.mtv.it/feeds/triforce/manifest/v8'
+
+ def _get_entries(self, title, url):
+ while True:
+ pg = self._search_regex(r'/(\d+)$', url, 'entries', '1')
+ entries = self._download_json(url, title, 'page %s' % pg)
+ url = try_get(
+ entries, lambda x: x['result']['nextPageURL'], compat_str)
+ entries = try_get(
+ entries, (
+ lambda x: x['result']['data']['items'],
+ lambda x: x['result']['data']['seasons']),
+ list)
+ for entry in entries or []:
+ if entry.get('canonicalURL'):
+ yield self.url_result(entry['canonicalURL'])
+ if not url:
+ break
+
+ def _real_extract(self, url):
+ query = {'url': url}
+ info_url = update_url_query(self._FEED_URL, query)
+ video_id = self._match_id(url)
+ info = self._download_json(info_url, video_id).get('manifest')
+
+ redirect = try_get(
+ info, lambda x: x['newLocation']['url'], compat_str)
+ if redirect:
+ return self.url_result(redirect)
+
+ title = info.get('title')
+ video_id = try_get(
+ info, lambda x: x['reporting']['itemId'], compat_str)
+ parent_id = try_get(
+ info, lambda x: x['reporting']['parentId'], compat_str)
+
+ playlist_url = current_url = None
+ for z in (info.get('zones') or {}).values():
+ if z.get('moduleName') in ('INTL_M304', 'INTL_M209'):
+ info_url = z.get('feed')
+ if z.get('moduleName') in ('INTL_M308', 'INTL_M317'):
+ playlist_url = playlist_url or z.get('feed')
+ if z.get('moduleName') in ('INTL_M300',):
+ current_url = current_url or z.get('feed')
+
+ if not info_url:
+ raise ExtractorError('No info found')
+
+ if video_id == parent_id:
+ video_id = self._search_regex(
+ r'([^\/]+)/[^\/]+$', info_url, 'video_id')
+
+ info = self._download_json(info_url, video_id, 'Show infos')
+ info = try_get(info, lambda x: x['result']['data'], dict)
+ title = title or try_get(
+ info, (
+ lambda x: x['title'],
+ lambda x: x['headline']),
+ compat_str)
+ description = try_get(info, lambda x: x['content'], compat_str)
+
+ if current_url:
+ season = try_get(
+ self._download_json(playlist_url, video_id, 'Seasons info'),
+ lambda x: x['result']['data'], dict)
+ current = try_get(
+ season, lambda x: x['currentSeason'], compat_str)
+ seasons = try_get(
+ season, lambda x: x['seasons'], list) or []
+
+ if current in [s.get('eTitle') for s in seasons]:
+ playlist_url = current_url
+
+ title = re.sub(
+ r'[-|]\s*(?:mtv\s*italia|programma|playlist)',
+ '', title, flags=re.IGNORECASE).strip()
+
+ return self.playlist_result(
+ self._get_entries(title, playlist_url),
+ video_id, title, description)
diff --git a/yt_dlp/extractor/muenchentv.py b/yt_dlp/extractor/muenchentv.py
new file mode 100644
index 0000000..934cd4f
--- /dev/null
+++ b/yt_dlp/extractor/muenchentv.py
@@ -0,0 +1,72 @@
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ js_to_json,
+)
+
+
+class MuenchenTVIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:www\.)?muenchen\.tv/livestream'
+ IE_DESC = 'münchen.tv'
+ _TEST = {
+ 'url': 'http://www.muenchen.tv/livestream/',
+ 'info_dict': {
+ 'id': '5334',
+ 'display_id': 'live',
+ 'ext': 'mp4',
+ 'title': 're:^münchen.tv-Livestream [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'is_live': True,
+ 'thumbnail': r're:^https?://.*\.jpg$'
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }
+
+ def _real_extract(self, url):
+ display_id = 'live'
+ webpage = self._download_webpage(url, display_id)
+
+ title = self._og_search_title(webpage)
+
+ data_js = self._search_regex(
+ r'(?s)\nplaylist:\s*(\[.*?}\]),',
+ webpage, 'playlist configuration')
+ data_json = js_to_json(data_js)
+ data = json.loads(data_json)[0]
+
+ video_id = data['mediaid']
+ thumbnail = data.get('image')
+
+ formats = []
+ for format_num, s in enumerate(data['sources']):
+ ext = determine_ext(s['file'], None)
+ label_str = s.get('label')
+ if label_str is None:
+ label_str = '_%d' % format_num
+
+ if ext is None:
+ format_id = label_str
+ else:
+ format_id = '%s-%s' % (ext, label_str)
+
+ formats.append({
+ 'url': s['file'],
+ 'tbr': int_or_none(s.get('label')),
+ 'ext': 'mp4',
+ 'format_id': format_id,
+ 'preference': -100 if '.smil' in s['file'] else 0, # Strictly inferior than all other formats?
+ })
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'formats': formats,
+ 'is_live': True,
+ 'thumbnail': thumbnail,
+ }
diff --git a/yt_dlp/extractor/murrtube.py b/yt_dlp/extractor/murrtube.py
new file mode 100644
index 0000000..74365c0
--- /dev/null
+++ b/yt_dlp/extractor/murrtube.py
@@ -0,0 +1,164 @@
+import functools
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ OnDemandPagedList,
+ determine_ext,
+ int_or_none,
+ try_get,
+)
+
+
+class MurrtubeIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'''(?x)
+ (?:
+ murrtube:|
+ https?://murrtube\.net/videos/(?P<slug>[a-z0-9\-]+)\-
+ )
+ (?P<id>[a-f0-9]{8}\-[a-f0-9]{4}\-[a-f0-9]{4}\-[a-f0-9]{4}\-[a-f0-9]{12})
+ '''
+ _TEST = {
+ 'url': 'https://murrtube.net/videos/inferno-x-skyler-148b6f2a-fdcc-4902-affe-9c0f41aaaca0',
+ 'md5': '169f494812d9a90914b42978e73aa690',
+ 'info_dict': {
+ 'id': '148b6f2a-fdcc-4902-affe-9c0f41aaaca0',
+ 'ext': 'mp4',
+ 'title': 'Inferno X Skyler',
+ 'description': 'Humping a very good slutty sheppy (roomate)',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 284,
+ 'uploader': 'Inferno Wolf',
+ 'age_limit': 18,
+ 'comment_count': int,
+ 'view_count': int,
+ 'like_count': int,
+ 'tags': ['hump', 'breed', 'Fursuit', 'murrsuit', 'bareback'],
+ }
+ }
+
+ def _download_gql(self, video_id, op, note=None, fatal=True):
+ result = self._download_json(
+ 'https://murrtube.net/graphql',
+ video_id, note, data=json.dumps(op).encode(), fatal=fatal,
+ headers={'Content-Type': 'application/json'})
+ return result['data']
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ data = self._download_gql(video_id, {
+ 'operationName': 'Medium',
+ 'variables': {
+ 'id': video_id,
+ },
+ 'query': '''\
+query Medium($id: ID!) {
+ medium(id: $id) {
+ title
+ description
+ key
+ duration
+ commentsCount
+ likesCount
+ viewsCount
+ thumbnailKey
+ tagList
+ user {
+ name
+ __typename
+ }
+ __typename
+ }
+}'''})
+ meta = data['medium']
+
+ storage_url = 'https://storage.murrtube.net/murrtube/'
+ format_url = storage_url + meta.get('key', '')
+ thumbnail = storage_url + meta.get('thumbnailKey', '')
+
+ if determine_ext(format_url) == 'm3u8':
+ formats = self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', entry_protocol='m3u8_native', fatal=False)
+ else:
+ formats = [{'url': format_url}]
+
+ return {
+ 'id': video_id,
+ 'title': meta.get('title'),
+ 'description': meta.get('description'),
+ 'formats': formats,
+ 'thumbnail': thumbnail,
+ 'duration': int_or_none(meta.get('duration')),
+ 'uploader': try_get(meta, lambda x: x['user']['name']),
+ 'view_count': meta.get('viewsCount'),
+ 'like_count': meta.get('likesCount'),
+ 'comment_count': meta.get('commentsCount'),
+ 'tags': meta.get('tagList'),
+ 'age_limit': 18,
+ }
+
+
+class MurrtubeUserIE(MurrtubeIE): # XXX: Do not subclass from concrete IE
+ _WORKING = False
+ IE_DESC = 'Murrtube user profile'
+ _VALID_URL = r'https?://murrtube\.net/(?P<id>[^/]+)$'
+ _TEST = {
+ 'url': 'https://murrtube.net/stormy',
+ 'info_dict': {
+ 'id': 'stormy',
+ },
+ 'playlist_mincount': 27,
+ }
+ _PAGE_SIZE = 10
+
+ def _fetch_page(self, username, user_id, page):
+ data = self._download_gql(username, {
+ 'operationName': 'Media',
+ 'variables': {
+ 'limit': self._PAGE_SIZE,
+ 'offset': page * self._PAGE_SIZE,
+ 'sort': 'latest',
+ 'userId': user_id,
+ },
+ 'query': '''\
+query Media($q: String, $sort: String, $userId: ID, $offset: Int!, $limit: Int!) {
+ media(q: $q, sort: $sort, userId: $userId, offset: $offset, limit: $limit) {
+ id
+ __typename
+ }
+}'''},
+ 'Downloading page {0}'.format(page + 1))
+ if data is None:
+ raise ExtractorError(f'Failed to retrieve video list for page {page + 1}')
+
+ media = data['media']
+
+ for entry in media:
+ yield self.url_result('murrtube:{0}'.format(entry['id']), MurrtubeIE.ie_key())
+
+ def _real_extract(self, url):
+ username = self._match_id(url)
+ data = self._download_gql(username, {
+ 'operationName': 'User',
+ 'variables': {
+ 'id': username,
+ },
+ 'query': '''\
+query User($id: ID!) {
+ user(id: $id) {
+ id
+ __typename
+ }
+}'''},
+ 'Downloading user info')
+ if data is None:
+ raise ExtractorError('Failed to fetch user info')
+
+ user = data['user']
+
+ entries = OnDemandPagedList(functools.partial(
+ self._fetch_page, username, user.get('id')), self._PAGE_SIZE)
+
+ return self.playlist_result(entries, username)
diff --git a/yt_dlp/extractor/museai.py b/yt_dlp/extractor/museai.py
new file mode 100644
index 0000000..7f66928
--- /dev/null
+++ b/yt_dlp/extractor/museai.py
@@ -0,0 +1,112 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ determine_ext,
+ float_or_none,
+ int_or_none,
+ js_to_json,
+ traverse_obj,
+ url_or_none,
+)
+
+
+class MuseAIIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?muse\.ai/(?:v|embed)/(?P<id>\w+)'
+ _TESTS = [{
+ 'url': 'https://muse.ai/embed/YdTWvUW',
+ 'md5': 'f994f9a38be1c3aaf9e37cbd7d76fe7c',
+ 'info_dict': {
+ 'id': 'YdTWvUW',
+ 'ext': 'mp4',
+ 'title': '2023-05-28-Grabien-1941111 (1)',
+ 'description': '',
+ 'uploader': 'Today News Africa',
+ 'uploader_id': 'TodayNewsAfrica',
+ 'upload_date': '20230528',
+ 'timestamp': 1685285044,
+ 'duration': 1291.3,
+ 'view_count': int,
+ 'availability': 'public',
+ },
+ }, {
+ 'url': 'https://muse.ai/v/gQ4gGAA-0756',
+ 'md5': '52dbfc78e865e56dc19a1715badc35e8',
+ 'info_dict': {
+ 'id': 'gQ4gGAA',
+ 'ext': 'mp4',
+ 'title': '0756',
+ 'description': 'md5:0ca1483f9aac423e9a96ad00bb3a0785',
+ 'uploader': 'Aerial.ie',
+ 'uploader_id': 'aerial',
+ 'upload_date': '20210306',
+ 'timestamp': 1615072842,
+ 'duration': 21.4,
+ 'view_count': int,
+ 'availability': 'public',
+ },
+ }]
+ _WEBPAGE_TESTS = [{
+ 'url': 'https://muse.ai/docs',
+ 'playlist_mincount': 4,
+ 'info_dict': {
+ 'id': 'docs',
+ 'title': 'muse.ai | docs',
+ 'description': 'md5:6c0293431481582739c82ee8902687fa',
+ 'age_limit': 0,
+ 'thumbnail': 'https://muse.ai/static/imgs/poster-img-docs.png',
+ },
+ 'params': {'allowed_extractors': ['all', '-html5']},
+ }]
+ _EMBED_REGEX = [r'<iframe[^>]*\bsrc=["\'](?P<url>https://muse\.ai/embed/\w+)']
+
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ yield from super()._extract_embed_urls(url, webpage)
+ for embed_id in re.findall(r'<script>[^<]*\bMusePlayer\(\{[^}<]*\bvideo:\s*["\'](\w+)["\']', webpage):
+ yield f'https://muse.ai/embed/{embed_id}'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(f'https://muse.ai/embed/{video_id}', video_id)
+ data = self._search_json(
+ r'player\.setData\(', webpage, 'player data', video_id, transform_source=js_to_json)
+
+ source_url = data['url']
+ if not url_or_none(source_url):
+ raise ExtractorError('Unable to extract video URL')
+
+ formats = [{
+ 'url': source_url,
+ 'format_id': 'source',
+ 'quality': 1,
+ **traverse_obj(data, {
+ 'ext': ('filename', {determine_ext}),
+ 'width': ('width', {int_or_none}),
+ 'height': ('height', {int_or_none}),
+ 'filesize': ('size', {int_or_none}),
+ }),
+ }]
+ if source_url.endswith('/data'):
+ base_url = f'{source_url[:-5]}/videos'
+ formats.extend(self._extract_m3u8_formats(
+ f'{base_url}/hls.m3u8', video_id, m3u8_id='hls', fatal=False))
+ formats.extend(self._extract_mpd_formats(
+ f'{base_url}/dash.mpd', video_id, mpd_id='dash', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ **traverse_obj(data, {
+ 'title': ('title', {str}),
+ 'description': ('description', {str}),
+ 'duration': ('duration', {float_or_none}),
+ 'timestamp': ('tcreated', {int_or_none}),
+ 'uploader': ('owner_name', {str}),
+ 'uploader_id': ('owner_username', {str}),
+ 'view_count': ('views', {int_or_none}),
+ 'age_limit': ('mature', {lambda x: 18 if x else None}),
+ 'availability': ('visibility', {lambda x: x if x in ('private', 'unlisted') else 'public'}),
+ }),
+ }
diff --git a/yt_dlp/extractor/musescore.py b/yt_dlp/extractor/musescore.py
new file mode 100644
index 0000000..289ae57
--- /dev/null
+++ b/yt_dlp/extractor/musescore.py
@@ -0,0 +1,64 @@
+from .common import InfoExtractor
+
+
+class MuseScoreIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?musescore\.com/(?:user/\d+|[^/]+)(?:/scores)?/(?P<id>[^#&?]+)'
+ _TESTS = [{
+ 'url': 'https://musescore.com/user/73797/scores/142975',
+ 'info_dict': {
+ 'id': '142975',
+ 'ext': 'mp3',
+ 'title': 'WA Mozart Marche Turque (Turkish March fingered)',
+ 'description': 'md5:7ede08230e4eaabd67a4a98bb54d07be',
+ 'thumbnail': r're:https?://(?:www\.)?musescore\.com/.*\.png[^$]+',
+ 'uploader': 'PapyPiano',
+ 'creator': 'Wolfgang Amadeus Mozart',
+ }
+ }, {
+ 'url': 'https://musescore.com/user/36164500/scores/6837638',
+ 'info_dict': {
+ 'id': '6837638',
+ 'ext': 'mp3',
+ 'title': 'Sweet Child O\' Mine – Guns N\' Roses sweet child',
+ 'description': 'md5:4dca71191c14abc312a0a4192492eace',
+ 'thumbnail': r're:https?://(?:www\.)?musescore\.com/.*\.png[^$]+',
+ 'uploader': 'roxbelviolin',
+ 'creator': 'Guns N´Roses Arr. Roxbel Violin',
+ }
+ }, {
+ 'url': 'https://musescore.com/classicman/fur-elise',
+ 'info_dict': {
+ 'id': '33816',
+ 'ext': 'mp3',
+ 'title': 'Für Elise – Beethoven',
+ 'description': 'md5:49515a3556d5ecaf9fa4b2514064ac34',
+ 'thumbnail': r're:https?://(?:www\.)?musescore\.com/.*\.png[^$]+',
+ 'uploader': 'ClassicMan',
+ 'creator': 'Ludwig van Beethoven (1770–1827)',
+ }
+ }, {
+ 'url': 'https://musescore.com/minh_cuteee/scores/6555384',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ webpage = self._download_webpage(url, None)
+ url = self._og_search_url(webpage) or url
+ id = self._match_id(url)
+ mp3_url = self._download_json(f'https://musescore.com/api/jmuse?id={id}&index=0&type=mp3&v2=1', id,
+ headers={'authorization': '63794e5461e4cfa046edfbdddfccc1ac16daffd2'})['info']['url']
+ formats = [{
+ 'url': mp3_url,
+ 'ext': 'mp3',
+ 'vcodec': 'none',
+ }]
+
+ return {
+ 'id': id,
+ 'formats': formats,
+ 'title': self._og_search_title(webpage),
+ 'description': self._og_search_description(webpage),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'uploader': self._html_search_meta('musescore:author', webpage, 'uploader'),
+ 'creator': self._html_search_meta('musescore:composer', webpage, 'composer'),
+ }
diff --git a/yt_dlp/extractor/musicdex.py b/yt_dlp/extractor/musicdex.py
new file mode 100644
index 0000000..a863514
--- /dev/null
+++ b/yt_dlp/extractor/musicdex.py
@@ -0,0 +1,172 @@
+from .common import InfoExtractor
+from ..utils import (
+ date_from_str,
+ format_field,
+ try_get,
+ unified_strdate,
+)
+
+
+class MusicdexBaseIE(InfoExtractor):
+ def _return_info(self, track_json, album_json, id):
+ return {
+ 'id': str(id),
+ 'title': track_json.get('name'),
+ 'track': track_json.get('name'),
+ 'description': track_json.get('description'),
+ 'track_number': track_json.get('number'),
+ 'url': format_field(track_json, 'url', 'https://www.musicdex.org/%s'),
+ 'duration': track_json.get('duration'),
+ 'genres': [genre.get('name') for genre in track_json.get('genres') or []],
+ 'like_count': track_json.get('likes_count'),
+ 'view_count': track_json.get('plays'),
+ 'artists': [artist.get('name') for artist in track_json.get('artists') or []],
+ 'album_artists': [artist.get('name') for artist in album_json.get('artists') or []],
+ 'thumbnail': format_field(album_json, 'image', 'https://www.musicdex.org/%s'),
+ 'album': album_json.get('name'),
+ 'release_year': try_get(album_json, lambda x: date_from_str(unified_strdate(x['release_date'])).year),
+ 'extractor_key': MusicdexSongIE.ie_key(),
+ 'extractor': 'MusicdexSong',
+ }
+
+
+class MusicdexSongIE(MusicdexBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?musicdex\.org/track/(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'https://www.musicdex.org/track/306/dual-existence',
+ 'info_dict': {
+ 'id': '306',
+ 'ext': 'mp3',
+ 'title': 'dual existence',
+ 'description': '#NIPPONSEI @ IRC.RIZON.NET',
+ 'track': 'dual existence',
+ 'track_number': 1,
+ 'duration': 266000,
+ 'genres': ['Anime'],
+ 'like_count': int,
+ 'view_count': int,
+ 'artists': ['fripSide'],
+ 'album_artists': ['fripSide'],
+ 'thumbnail': 'https://www.musicdex.org/storage/album/9iDIam1DHTVqUG4UclFIEq1WAFGXfPW4y0TtZa91.png',
+ 'album': 'To Aru Kagaku no Railgun T OP2 Single - dual existence',
+ 'release_year': 2020
+ },
+ 'params': {'skip_download': True}
+ }]
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ data_json = self._download_json(f'https://www.musicdex.org/secure/tracks/{id}?defaultRelations=true', id)['track']
+ return self._return_info(data_json, data_json.get('album') or {}, id)
+
+
+class MusicdexAlbumIE(MusicdexBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?musicdex\.org/album/(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'https://www.musicdex.org/album/56/tenmon-and-eiichiro-yanagi-minori/ef-a-tale-of-memories-original-soundtrack-2-fortissimo',
+ 'playlist_mincount': 28,
+ 'info_dict': {
+ 'id': '56',
+ 'genres': ['OST'],
+ 'view_count': int,
+ 'artists': ['TENMON & Eiichiro Yanagi / minori'],
+ 'title': 'ef - a tale of memories Original Soundtrack 2 ~fortissimo~',
+ 'release_year': 2008,
+ 'thumbnail': 'https://www.musicdex.org/storage/album/2rSHkyYBYfB7sbvElpEyTMcUn6toY7AohOgJuDlE.jpg',
+ },
+ }]
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ data_json = self._download_json(f'https://www.musicdex.org/secure/albums/{id}?defaultRelations=true', id)['album']
+ entries = [self._return_info(track, data_json, track['id']) for track in data_json.get('tracks') or [] if track.get('id')]
+
+ return {
+ '_type': 'playlist',
+ 'id': id,
+ 'title': data_json.get('name'),
+ 'description': data_json.get('description'),
+ 'genres': [genre.get('name') for genre in data_json.get('genres') or []],
+ 'view_count': data_json.get('plays'),
+ 'artists': [artist.get('name') for artist in data_json.get('artists') or []],
+ 'thumbnail': format_field(data_json, 'image', 'https://www.musicdex.org/%s'),
+ 'release_year': try_get(data_json, lambda x: date_from_str(unified_strdate(x['release_date'])).year),
+ 'entries': entries,
+ }
+
+
+class MusicdexPageIE(MusicdexBaseIE): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
+ def _entries(self, id):
+ next_page_url = self._API_URL % id
+ while next_page_url:
+ data_json = self._download_json(next_page_url, id)['pagination']
+ for data in data_json.get('data') or []:
+ yield data
+ next_page_url = data_json.get('next_page_url')
+
+
+class MusicdexArtistIE(MusicdexPageIE):
+ _VALID_URL = r'https?://(?:www\.)?musicdex\.org/artist/(?P<id>\d+)'
+ _API_URL = 'https://www.musicdex.org/secure/artists/%s/albums?page=1'
+
+ _TESTS = [{
+ 'url': 'https://www.musicdex.org/artist/11/fripside',
+ 'playlist_mincount': 28,
+ 'info_dict': {
+ 'id': '11',
+ 'view_count': int,
+ 'title': 'fripSide',
+ 'thumbnail': 'https://www.musicdex.org/storage/artist/ZmOz0lN2vsweegB660em3xWffCjLPmTQHqJls5Xx.jpg',
+ },
+ }]
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ data_json = self._download_json(f'https://www.musicdex.org/secure/artists/{id}', id)['artist']
+ entries = []
+ for album in self._entries(id):
+ entries.extend(self._return_info(track, album, track['id']) for track in album.get('tracks') or [] if track.get('id'))
+
+ return {
+ '_type': 'playlist',
+ 'id': id,
+ 'title': data_json.get('name'),
+ 'view_count': data_json.get('plays'),
+ 'thumbnail': format_field(data_json, 'image_small', 'https://www.musicdex.org/%s'),
+ 'entries': entries,
+ }
+
+
+class MusicdexPlaylistIE(MusicdexPageIE):
+ _VALID_URL = r'https?://(?:www\.)?musicdex\.org/playlist/(?P<id>\d+)'
+ _API_URL = 'https://www.musicdex.org/secure/playlists/%s/tracks?perPage=10000&page=1'
+
+ _TESTS = [{
+ 'url': 'https://www.musicdex.org/playlist/9/test',
+ 'playlist_mincount': 73,
+ 'info_dict': {
+ 'id': '9',
+ 'view_count': int,
+ 'title': 'Test',
+ 'thumbnail': 'https://www.musicdex.org/storage/album/jXATI79f0IbQ2sgsKYOYRCW3zRwF3XsfHhzITCuJ.jpg',
+ 'description': 'Test 123 123 21312 32121321321321312',
+ },
+ }]
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ data_json = self._download_json(f'https://www.musicdex.org/secure/playlists/{id}', id)['playlist']
+ entries = [self._return_info(track, track.get('album') or {}, track['id'])
+ for track in self._entries(id) or [] if track.get('id')]
+
+ return {
+ '_type': 'playlist',
+ 'id': id,
+ 'title': data_json.get('name'),
+ 'description': data_json.get('description'),
+ 'view_count': data_json.get('plays'),
+ 'thumbnail': format_field(data_json, 'image', 'https://www.musicdex.org/%s'),
+ 'entries': entries,
+ }
diff --git a/yt_dlp/extractor/mx3.py b/yt_dlp/extractor/mx3.py
new file mode 100644
index 0000000..cb9f50e
--- /dev/null
+++ b/yt_dlp/extractor/mx3.py
@@ -0,0 +1,171 @@
+import re
+
+from .common import InfoExtractor
+from ..networking import HEADRequest
+from ..utils import (
+ get_element_by_class,
+ int_or_none,
+ try_call,
+ url_or_none,
+ urlhandle_detect_ext,
+)
+from ..utils.traversal import traverse_obj
+
+
+class Mx3BaseIE(InfoExtractor):
+ _VALID_URL_TMPL = r'https?://(?:www\.)?%s/t/(?P<id>\w+)'
+ _FORMATS = [{
+ 'url': 'player_asset',
+ 'format_id': 'default',
+ 'quality': 0,
+ }, {
+ 'url': 'player_asset?quality=hd',
+ 'format_id': 'hd',
+ 'quality': 1,
+ }, {
+ 'url': 'download',
+ 'format_id': 'download',
+ 'quality': 2,
+ }, {
+ 'url': 'player_asset?quality=source',
+ 'format_id': 'source',
+ 'quality': 2,
+ }]
+
+ def _extract_formats(self, track_id):
+ formats = []
+ for fmt in self._FORMATS:
+ format_url = f'https://{self._DOMAIN}/tracks/{track_id}/{fmt["url"]}'
+ urlh = self._request_webpage(
+ HEADRequest(format_url), track_id, fatal=False, expected_status=404,
+ note=f'Checking for format {fmt["format_id"]}')
+ if urlh and urlh.status == 200:
+ formats.append({
+ **fmt,
+ 'url': format_url,
+ 'ext': urlhandle_detect_ext(urlh),
+ 'filesize': int_or_none(urlh.headers.get('Content-Length')),
+ })
+ return formats
+
+ def _real_extract(self, url):
+ track_id = self._match_id(url)
+ webpage = self._download_webpage(url, track_id)
+ more_info = get_element_by_class('single-more-info', webpage)
+ data = self._download_json(f'https://{self._DOMAIN}/t/{track_id}.json', track_id, fatal=False)
+
+ def get_info_field(name):
+ return self._html_search_regex(
+ rf'<dt[^>]*>\s*{name}\s*</dt>\s*<dd[^>]*>(.*?)</dd>',
+ more_info, name, default=None, flags=re.DOTALL)
+
+ return {
+ 'id': track_id,
+ 'formats': self._extract_formats(track_id),
+ 'genre': self._html_search_regex(
+ r'<div\b[^>]+class="single-band-genre"[^>]*>([^<]+)</div>', webpage, 'genre', default=None),
+ 'release_year': int_or_none(get_info_field('Year of creation')),
+ 'description': get_info_field('Description'),
+ 'tags': try_call(lambda: get_info_field('Tag').split(', '), list),
+ **traverse_obj(data, {
+ 'title': ('title', {str}),
+ 'artist': (('performer_name', 'artist'), {str}),
+ 'album_artist': ('artist', {str}),
+ 'composer': ('composer_name', {str}),
+ 'thumbnail': (('picture_url_xlarge', 'picture_url'), {url_or_none}),
+ }, get_all=False),
+ }
+
+
+class Mx3IE(Mx3BaseIE):
+ _DOMAIN = 'mx3.ch'
+ _VALID_URL = Mx3BaseIE._VALID_URL_TMPL % re.escape(_DOMAIN)
+ _TESTS = [{
+ 'url': 'https://mx3.ch/t/1Cru',
+ 'md5': '7ba09e9826b4447d4e1ce9d69e0e295f',
+ 'info_dict': {
+ 'id': '1Cru',
+ 'ext': 'wav',
+ 'artist': 'Godina',
+ 'album_artist': 'Tortue Tortue',
+ 'composer': 'Olivier Godinat',
+ 'genre': 'Rock',
+ 'thumbnail': 'https://mx3.ch/pictures/mx3/file/0101/4643/square_xlarge/1-s-envoler-1.jpg?1630272813',
+ 'title': "S'envoler",
+ 'release_year': 2021,
+ 'tags': [],
+ }
+ }, {
+ 'url': 'https://mx3.ch/t/1LIY',
+ 'md5': '48293cb908342547827f963a5a2e9118',
+ 'info_dict': {
+ 'id': '1LIY',
+ 'ext': 'mov',
+ 'artist': 'Tania Kimfumu',
+ 'album_artist': 'The Broots',
+ 'composer': 'Emmanuel Diserens',
+ 'genre': 'Electro',
+ 'thumbnail': 'https://mx3.ch/pictures/mx3/file/0110/0003/video_xlarge/frame_0000.png?1686963670',
+ 'title': 'The Broots-Larytta remix "Begging For Help"',
+ 'release_year': 2023,
+ 'tags': ['the broots', 'cassata records', 'larytta'],
+ 'description': '"Begging for Help" Larytta Remix Official Video\nRealized By Kali Donkilie in 2023',
+ }
+ }, {
+ 'url': 'https://mx3.ch/t/1C6E',
+ 'md5': '1afcd578493ddb8e5008e94bb6d97e25',
+ 'info_dict': {
+ 'id': '1C6E',
+ 'ext': 'wav',
+ 'artist': 'Alien Bubblegum',
+ 'album_artist': 'Alien Bubblegum',
+ 'composer': 'Alien Bubblegum',
+ 'genre': 'Punk',
+ 'thumbnail': 'https://mx3.ch/pictures/mx3/file/0101/1551/square_xlarge/pandora-s-box-cover-with-title.png?1627054733',
+ 'title': 'Wide Awake',
+ 'release_year': 2021,
+ 'tags': ['alien bubblegum', 'bubblegum', 'alien', 'pop punk', 'poppunk'],
+ }
+ }]
+
+
+class Mx3NeoIE(Mx3BaseIE):
+ _DOMAIN = 'neo.mx3.ch'
+ _VALID_URL = Mx3BaseIE._VALID_URL_TMPL % re.escape(_DOMAIN)
+ _TESTS = [{
+ 'url': 'https://neo.mx3.ch/t/1hpd',
+ 'md5': '6d9986bbae5cac3296ec8813bf965eb2',
+ 'info_dict': {
+ 'id': '1hpd',
+ 'ext': 'wav',
+ 'artist': 'Baptiste Lopez',
+ 'album_artist': 'Kammerorchester Basel',
+ 'composer': 'Jannik Giger',
+ 'genre': 'Composition, Orchestra',
+ 'title': 'Troisième œil. Für Kammerorchester (2023)',
+ 'thumbnail': 'https://neo.mx3.ch/pictures/neo/file/0000/0241/square_xlarge/kammerorchester-basel-group-photo-2_c_-lukasz-rajchert.jpg?1560341252',
+ 'release_year': 2023,
+ 'tags': [],
+ }
+ }]
+
+
+class Mx3VolksmusikIE(Mx3BaseIE):
+ _DOMAIN = 'volksmusik.mx3.ch'
+ _VALID_URL = Mx3BaseIE._VALID_URL_TMPL % re.escape(_DOMAIN)
+ _TESTS = [{
+ 'url': 'https://volksmusik.mx3.ch/t/Zx',
+ 'md5': 'dd967a7b0c1ef898f3e072cf9c2eae3c',
+ 'info_dict': {
+ 'id': 'Zx',
+ 'ext': 'mp3',
+ 'artist': 'Ländlerkapelle GrischArt',
+ 'album_artist': 'Ländlerkapelle GrischArt',
+ 'composer': 'Urs Glauser',
+ 'genre': 'Instrumental, Graubünden',
+ 'title': 'Chämilouf',
+ 'thumbnail': 'https://volksmusik.mx3.ch/pictures/vxm/file/0000/3815/square_xlarge/grischart1.jpg?1450530120',
+ 'release_year': 2012,
+ 'tags': [],
+ }
+ }]
diff --git a/yt_dlp/extractor/mxplayer.py b/yt_dlp/extractor/mxplayer.py
new file mode 100644
index 0000000..1fdb08e
--- /dev/null
+++ b/yt_dlp/extractor/mxplayer.py
@@ -0,0 +1,241 @@
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ int_or_none,
+ traverse_obj,
+ try_get,
+ urljoin,
+)
+
+
+class MxplayerIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?mxplayer\.in/(?P<type>movie|show/[-\w]+/[-\w]+)/(?P<display_id>[-\w]+)-(?P<id>\w+)'
+ _TESTS = [{
+ 'url': 'https://www.mxplayer.in/show/watch-my-girlfriend-is-an-alien-hindi-dubbed/season-1/episode-1-online-9d2013d31d5835bb8400e3b3c5e7bb72',
+ 'info_dict': {
+ 'id': '9d2013d31d5835bb8400e3b3c5e7bb72',
+ 'display_id': 'episode-1-online',
+ 'ext': 'mp4',
+ 'title': 'Episode 1',
+ 'description': 'md5:62ed43eb9fec5efde5cf3bd1040b7670',
+ 'season_number': 1,
+ 'episode_number': 1,
+ 'duration': 2451,
+ 'season': 'Season 1',
+ 'series': 'My Girlfriend Is An Alien (Hindi Dubbed)',
+ 'episode': 'Episode 1'
+ },
+ 'params': {
+ 'format': 'bv',
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.mxplayer.in/movie/watch-knock-knock-hindi-dubbed-movie-online-b9fa28df3bfb8758874735bbd7d2655a?watch=true',
+ 'info_dict': {
+ 'id': 'b9fa28df3bfb8758874735bbd7d2655a',
+ 'display_id': 'episode-1-online',
+ 'ext': 'mp4',
+ 'title': 'Knock Knock (Hindi Dubbed)',
+ 'description': 'md5:4160f2dfc3b87c524261366f6b736329',
+ 'duration': 5970,
+ },
+ 'params': {
+ 'format': 'bv',
+ 'skip_download': True,
+ },
+ 'skip': 'No longer available',
+ }, {
+ 'url': 'https://www.mxplayer.in/show/watch-shaitaan/season-1/the-infamous-taxi-gang-of-meerut-online-45055d5bcff169ad48f2ad7552a83d6c',
+ 'info_dict': {
+ 'id': '45055d5bcff169ad48f2ad7552a83d6c',
+ 'ext': 'mp4',
+ 'title': 'The infamous taxi gang of Meerut',
+ 'description': 'md5:033a0a7e3fd147be4fb7e07a01a3dc28',
+ 'season_number': 1,
+ 'episode_number': 1,
+ 'duration': 2332,
+ 'season': 'Season 1',
+ 'series': 'Shaitaan',
+ 'episode': 'Episode 1'
+ },
+ 'params': {
+ 'format': 'best',
+ 'skip_download': True,
+ },
+ 'skip': 'No longer available.'
+ }, {
+ 'url': 'https://www.mxplayer.in/show/watch-aashram/chapter-1/duh-swapna-online-d445579792b0135598ba1bc9088a84cb',
+ 'info_dict': {
+ 'id': 'd445579792b0135598ba1bc9088a84cb',
+ 'display_id': 'duh-swapna-online',
+ 'ext': 'mp4',
+ 'title': 'Duh Swapna',
+ 'description': 'md5:35ff39c4bdac403c53be1e16a04192d8',
+ 'season_number': 1,
+ 'episode_number': 3,
+ 'duration': 2568,
+ 'season': 'Season 1',
+ 'series': 'Aashram',
+ 'episode': 'Episode 3'
+ },
+ 'params': {
+ 'format': 'bv',
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.mxplayer.in/show/watch-dangerous/season-1/chapter-1-online-5a351b4f9fb69436f6bd6ae3a1a75292',
+ 'info_dict': {
+ 'id': '5a351b4f9fb69436f6bd6ae3a1a75292',
+ 'display_id': 'chapter-1-online',
+ 'ext': 'mp4',
+ 'title': 'Chapter 1',
+ 'description': 'md5:233886b8598bc91648ac098abe1d288f',
+ 'season_number': 1,
+ 'episode_number': 1,
+ 'duration': 1305,
+ 'season': 'Season 1',
+ 'series': 'Dangerous',
+ 'episode': 'Episode 1'
+ },
+ 'params': {
+ 'format': 'bv',
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.mxplayer.in/movie/watch-the-attacks-of-2611-movie-online-0452f0d80226c398d63ce7e3ea40fa2d',
+ 'info_dict': {
+ 'id': '0452f0d80226c398d63ce7e3ea40fa2d',
+ 'ext': 'mp4',
+ 'title': 'The Attacks of 26/11',
+ 'description': 'md5:689bacd29e97b3f31eaf519eb14127e5',
+ 'duration': 6085,
+ },
+ 'params': {
+ 'format': 'best',
+ 'skip_download': True,
+ },
+ 'skip': 'No longer available. Cannot be played on browser'
+ }, {
+ 'url': 'https://www.mxplayer.in/movie/watch-kitne-door-kitne-paas-movie-online-a9e9c76c566205955f70d8b2cb88a6a2',
+ 'info_dict': {
+ 'id': 'a9e9c76c566205955f70d8b2cb88a6a2',
+ 'display_id': 'watch-kitne-door-kitne-paas-movie-online',
+ 'title': 'Kitne Door Kitne Paas',
+ 'duration': 8458,
+ 'ext': 'mp4',
+ 'description': 'md5:fb825f3c542513088024dcafef0921b4',
+ },
+ 'params': {
+ 'format': 'bv',
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.mxplayer.in/show/watch-ek-thi-begum-hindi/season-2/game-of-power-online-5e5305c28f1409847cdc4520b6ad77cf',
+ 'info_dict': {
+ 'id': '5e5305c28f1409847cdc4520b6ad77cf',
+ 'display_id': 'game-of-power-online',
+ 'title': 'Game Of Power',
+ 'duration': 1845,
+ 'ext': 'mp4',
+ 'description': 'md5:1d0948d2a5312d7013792d53542407f9',
+ 'series': 'Ek Thi Begum (Hindi)',
+ 'season': 'Season 2',
+ 'season_number': 2,
+ 'episode': 'Episode 2',
+ 'episode_number': 2,
+ },
+ 'params': {
+ 'format': 'bv',
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.mxplayer.in/movie/watch-deewane-huye-paagal-movie-online-4f9175c40a11c3994182a65afdd37ec6?watch=true',
+ 'info_dict': {
+ 'id': '4f9175c40a11c3994182a65afdd37ec6',
+ 'display_id': 'watch-deewane-huye-paagal-movie-online',
+ 'title': 'Deewane Huye Paagal',
+ 'duration': 9037,
+ 'ext': 'mp4',
+ 'description': 'md5:d17bd5c651016c4ed2e6f8a4ace15534',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }]
+
+ def _real_extract(self, url):
+ video_type, display_id, video_id = self._match_valid_url(url).group('type', 'display_id', 'id')
+ if 'show' in video_type:
+ video_type = 'episode'
+
+ data_json = self._download_json(
+ f'https://api.mxplay.com/v1/web/detail/video?type={video_type}&id={video_id}', display_id)
+
+ formats, subtitles = [], {}
+ m3u8_url = urljoin('https://llvod.mxplay.com/', traverse_obj(
+ data_json, ('stream', (('thirdParty', 'hlsUrl'), ('hls', 'high'))), get_all=False))
+ if m3u8_url:
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, display_id, 'mp4', fatal=False)
+ mpd_url = urljoin('https://llvod.mxplay.com/', traverse_obj(
+ data_json, ('stream', (('thirdParty', 'dashUrl'), ('dash', 'high'))), get_all=False))
+ if mpd_url:
+ fmts, subs = self._extract_mpd_formats_and_subtitles(mpd_url, display_id, fatal=False)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+
+ season = traverse_obj(data_json, ('container', 'title'))
+ return {
+ 'id': video_id,
+ 'title': data_json.get('title'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'display_id': display_id,
+ 'duration': data_json.get('duration'),
+ 'series': traverse_obj(data_json, ('container', 'container', 'title')),
+ 'description': data_json.get('description'),
+ 'season': season,
+ 'season_number': int_or_none(
+ self._search_regex(r'Season (\d+)', season, 'Season Number', default=None)),
+ 'episode_number': data_json.get('sequence') or None,
+ }
+
+
+class MxplayerShowIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?mxplayer\.in/show/(?P<display_id>[-\w]+)-(?P<id>\w+)/?(?:$|[#?])'
+ _TESTS = [{
+ 'url': 'https://www.mxplayer.in/show/watch-chakravartin-ashoka-samrat-series-online-a8f44e3cc0814b5601d17772cedf5417',
+ 'playlist_mincount': 440,
+ 'info_dict': {
+ 'id': 'a8f44e3cc0814b5601d17772cedf5417',
+ 'title': 'Watch Chakravartin Ashoka Samrat Series Online',
+ }
+ }]
+
+ _API_SHOW_URL = "https://api.mxplay.com/v1/web/detail/tab/tvshowseasons?type=tv_show&id={}&device-density=2&platform=com.mxplay.desktop&content-languages=hi,en"
+ _API_EPISODES_URL = "https://api.mxplay.com/v1/web/detail/tab/tvshowepisodes?type=season&id={}&device-density=1&platform=com.mxplay.desktop&content-languages=hi,en&{}"
+
+ def _entries(self, show_id):
+ show_json = self._download_json(
+ self._API_SHOW_URL.format(show_id),
+ video_id=show_id, headers={'Referer': 'https://mxplayer.in'})
+ page_num = 0
+ for season in show_json.get('items') or []:
+ season_id = try_get(season, lambda x: x['id'], compat_str)
+ next_url = ''
+ while next_url is not None:
+ page_num += 1
+ season_json = self._download_json(
+ self._API_EPISODES_URL.format(season_id, next_url),
+ video_id=season_id,
+ headers={'Referer': 'https://mxplayer.in'},
+ note='Downloading JSON metadata page %d' % page_num)
+ for episode in season_json.get('items') or []:
+ video_url = episode['webUrl']
+ yield self.url_result(
+ 'https://mxplayer.in%s' % video_url,
+ ie=MxplayerIE.ie_key(), video_id=video_url.split('-')[-1])
+ next_url = season_json.get('next')
+
+ def _real_extract(self, url):
+ display_id, show_id = self._match_valid_url(url).groups()
+ return self.playlist_result(
+ self._entries(show_id), playlist_id=show_id,
+ playlist_title=display_id.replace('-', ' ').title())
diff --git a/yt_dlp/extractor/myspace.py b/yt_dlp/extractor/myspace.py
new file mode 100644
index 0000000..3451098
--- /dev/null
+++ b/yt_dlp/extractor/myspace.py
@@ -0,0 +1,195 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ parse_iso8601,
+)
+
+
+class MySpaceIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ myspace\.com/[^/]+/
+ (?P<mediatype>
+ video/[^/]+/(?P<video_id>\d+)|
+ music/song/[^/?#&]+-(?P<song_id>\d+)-\d+(?:[/?#&]|$)
+ )
+ '''
+
+ _TESTS = [{
+ 'url': 'https://myspace.com/fiveminutestothestage/video/little-big-town/109594919',
+ 'md5': '9c1483c106f4a695c47d2911feed50a7',
+ 'info_dict': {
+ 'id': '109594919',
+ 'ext': 'mp4',
+ 'title': 'Little Big Town',
+ 'description': 'This country quartet was all smiles while playing a sold out show at the Pacific Amphitheatre in Orange County, California.',
+ 'uploader': 'Five Minutes to the Stage',
+ 'uploader_id': 'fiveminutestothestage',
+ 'timestamp': 1414108751,
+ 'upload_date': '20141023',
+ },
+ }, {
+ # songs
+ 'url': 'https://myspace.com/killsorrow/music/song/of-weakened-soul...-93388656-103880681',
+ 'md5': '1d7ee4604a3da226dd69a123f748b262',
+ 'info_dict': {
+ 'id': '93388656',
+ 'ext': 'm4a',
+ 'title': 'Of weakened soul...',
+ 'uploader': 'Killsorrow',
+ 'uploader_id': 'killsorrow',
+ },
+ }, {
+ 'url': 'https://myspace.com/starset2/music/song/first-light-95799905-106964426',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://myspace.com/thelargemouthbassband/music/song/02-pure-eyes.mp3-94422330-105113388',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('video_id') or mobj.group('song_id')
+ is_song = mobj.group('mediatype').startswith('music/song')
+ webpage = self._download_webpage(url, video_id)
+ player_url = self._search_regex(
+ r'videoSwf":"([^"?]*)', webpage, 'player URL', fatal=False)
+
+ def formats_from_stream_urls(stream_url, hls_stream_url, http_stream_url, width=None, height=None):
+ formats = []
+ vcodec = 'none' if is_song else None
+ if hls_stream_url:
+ formats.append({
+ 'format_id': 'hls',
+ 'url': hls_stream_url,
+ 'protocol': 'm3u8_native',
+ 'ext': 'm4a' if is_song else 'mp4',
+ 'vcodec': vcodec,
+ })
+ if stream_url and player_url:
+ rtmp_url, play_path = stream_url.split(';', 1)
+ formats.append({
+ 'format_id': 'rtmp',
+ 'url': rtmp_url,
+ 'play_path': play_path,
+ 'player_url': player_url,
+ 'protocol': 'rtmp',
+ 'ext': 'flv',
+ 'width': width,
+ 'height': height,
+ 'vcodec': vcodec,
+ })
+ if http_stream_url:
+ formats.append({
+ 'format_id': 'http',
+ 'url': http_stream_url,
+ 'width': width,
+ 'height': height,
+ 'vcodec': vcodec,
+ })
+ return formats
+
+ if is_song:
+ # songs don't store any useful info in the 'context' variable
+ song_data = self._search_regex(
+ r'''<button.*data-song-id=(["\'])%s\1.*''' % video_id,
+ webpage, 'song_data', default=None, group=0)
+ if song_data is None:
+ # some songs in an album are not playable
+ self.report_warning(
+ '%s: No downloadable song on this page' % video_id)
+ return
+
+ def search_data(name):
+ return self._search_regex(
+ r'''data-%s=([\'"])(?P<data>.*?)\1''' % name,
+ song_data, name, default='', group='data')
+ formats = formats_from_stream_urls(
+ search_data('stream-url'), search_data('hls-stream-url'),
+ search_data('http-stream-url'))
+ if not formats:
+ vevo_id = search_data('vevo-id')
+ youtube_id = search_data('youtube-id')
+ if vevo_id:
+ self.to_screen('Vevo video detected: %s' % vevo_id)
+ return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
+ elif youtube_id:
+ self.to_screen('Youtube video detected: %s' % youtube_id)
+ return self.url_result(youtube_id, ie='Youtube')
+ else:
+ raise ExtractorError(
+ 'Found song but don\'t know how to download it')
+ return {
+ 'id': video_id,
+ 'title': self._og_search_title(webpage),
+ 'uploader': search_data('artist-name'),
+ 'uploader_id': search_data('artist-username'),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'duration': int_or_none(search_data('duration')),
+ 'formats': formats,
+ }
+ else:
+ video = self._parse_json(self._search_regex(
+ r'context = ({.*?});', webpage, 'context'),
+ video_id)['video']
+ formats = formats_from_stream_urls(
+ video.get('streamUrl'), video.get('hlsStreamUrl'),
+ video.get('mp4StreamUrl'), int_or_none(video.get('width')),
+ int_or_none(video.get('height')))
+ return {
+ 'id': video_id,
+ 'title': video['title'],
+ 'description': video.get('description'),
+ 'thumbnail': video.get('imageUrl'),
+ 'uploader': video.get('artistName'),
+ 'uploader_id': video.get('artistUsername'),
+ 'duration': int_or_none(video.get('duration')),
+ 'timestamp': parse_iso8601(video.get('dateAdded')),
+ 'formats': formats,
+ }
+
+
+class MySpaceAlbumIE(InfoExtractor):
+ IE_NAME = 'MySpace:album'
+ _VALID_URL = r'https?://myspace\.com/([^/]+)/music/album/(?P<title>.*-)(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'https://myspace.com/starset2/music/album/transmissions-19455773',
+ 'info_dict': {
+ 'title': 'Transmissions',
+ 'id': '19455773',
+ },
+ 'playlist_count': 14,
+ 'skip': 'this album is only available in some countries',
+ }, {
+ 'url': 'https://myspace.com/killsorrow/music/album/the-demo-18596029',
+ 'info_dict': {
+ 'title': 'The Demo',
+ 'id': '18596029',
+ },
+ 'playlist_count': 5,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ playlist_id = mobj.group('id')
+ display_id = mobj.group('title') + playlist_id
+ webpage = self._download_webpage(url, display_id)
+ tracks_paths = re.findall(r'"music:song" content="(.*?)"', webpage)
+ if not tracks_paths:
+ raise ExtractorError(
+ '%s: No songs found, try using proxy' % display_id,
+ expected=True)
+ entries = [
+ self.url_result(t_path, ie=MySpaceIE.ie_key())
+ for t_path in tracks_paths]
+ return {
+ '_type': 'playlist',
+ 'id': playlist_id,
+ 'display_id': display_id,
+ 'title': self._og_search_title(webpage),
+ 'entries': entries,
+ }
diff --git a/yt_dlp/extractor/myspass.py b/yt_dlp/extractor/myspass.py
new file mode 100644
index 0000000..28ac982
--- /dev/null
+++ b/yt_dlp/extractor/myspass.py
@@ -0,0 +1,92 @@
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ int_or_none,
+ parse_duration,
+ xpath_text,
+)
+
+
+class MySpassIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?myspass\.de/(?:[^/]+/)*(?P<id>\d+)/?[^/]*$'
+ _TESTS = [{
+ 'url': 'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/',
+ 'md5': '0b49f4844a068f8b33f4b7c88405862b',
+ 'info_dict': {
+ 'id': '11741',
+ 'ext': 'mp4',
+ 'description': 'md5:9f0db5044c8fe73f528a390498f7ce9b',
+ 'title': '17.02.2013 - Die Highlights, Teil 2',
+ 'thumbnail': r're:.*\.jpg',
+ 'duration': 323.0,
+ 'episode': '17.02.2013 - Die Highlights, Teil 2',
+ 'season_id': '544',
+ 'episode_number': 1,
+ 'series': 'Absolute Mehrheit',
+ 'season_number': 2,
+ 'season': 'Season 2',
+ },
+ },
+ {
+ 'url': 'https://www.myspass.de/shows/tvshows/tv-total/Novak-Puffovic-bei-bester-Laune--/44996/',
+ 'md5': 'eb28b7c5e254192046e86ebaf7deac8f',
+ 'info_dict': {
+ 'id': '44996',
+ 'ext': 'mp4',
+ 'description': 'md5:74c7f886e00834417f1e427ab0da6121',
+ 'title': 'Novak Puffovic bei bester Laune',
+ 'thumbnail': r're:.*\.jpg',
+ 'episode_number': 8,
+ 'episode': 'Novak Puffovic bei bester Laune',
+ 'series': 'TV total',
+ 'season': 'Season 19',
+ 'season_id': '987',
+ 'duration': 2941.0,
+ 'season_number': 19,
+ },
+ },
+ {
+ 'url': 'https://www.myspass.de/channels/tv-total-raabigramm/17033/20831/',
+ 'md5': '7b293a6b9f3a7acdd29304c8d0dbb7cc',
+ 'info_dict': {
+ 'id': '20831',
+ 'ext': 'mp4',
+ 'description': 'Gefühle pur: Schaut euch die ungeschnittene Version von Stefans Liebesbeweis an die Moderationsgrazie von Welt, Verona Feldbusch, an.',
+ 'title': 'Raabigramm Verona Feldbusch',
+ 'thumbnail': r're:.*\.jpg',
+ 'episode_number': 6,
+ 'episode': 'Raabigramm Verona Feldbusch',
+ 'series': 'TV total',
+ 'season': 'Season 1',
+ 'season_id': '34',
+ 'duration': 105.0,
+ 'season_number': 1,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ metadata = self._download_xml('http://www.myspass.de/myspass/includes/apps/video/getvideometadataxml.php?id=' + video_id, video_id)
+
+ title = xpath_text(metadata, 'title', fatal=True)
+ video_url = xpath_text(metadata, 'url_flv', 'download url', True)
+ video_id_int = int(video_id)
+ for group in self._search_regex(r'/myspass2009/\d+/(\d+)/(\d+)/(\d+)/', video_url, 'myspass', group=(1, 2, 3), default=[]):
+ group_int = int(group)
+ if group_int > video_id_int:
+ video_url = video_url.replace(group, compat_str(group_int // video_id_int))
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'thumbnail': xpath_text(metadata, 'imagePreview'),
+ 'description': xpath_text(metadata, 'description'),
+ 'duration': parse_duration(xpath_text(metadata, 'duration')),
+ 'series': xpath_text(metadata, 'format'),
+ 'season_number': int_or_none(xpath_text(metadata, 'season')),
+ 'season_id': xpath_text(metadata, 'season_id'),
+ 'episode': title,
+ 'episode_number': int_or_none(xpath_text(metadata, 'episode')),
+ }
diff --git a/yt_dlp/extractor/myvideoge.py b/yt_dlp/extractor/myvideoge.py
new file mode 100644
index 0000000..3e0bb24
--- /dev/null
+++ b/yt_dlp/extractor/myvideoge.py
@@ -0,0 +1,81 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ MONTH_NAMES,
+ clean_html,
+ get_element_by_class,
+ get_element_by_id,
+ int_or_none,
+ js_to_json,
+ qualities,
+ unified_strdate,
+)
+
+
+class MyVideoGeIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?myvideo\.ge/v/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'https://www.myvideo.ge/v/3941048',
+ 'md5': '8c192a7d2b15454ba4f29dc9c9a52ea9',
+ 'info_dict': {
+ 'id': '3941048',
+ 'ext': 'mp4',
+ 'title': 'The best prikol',
+ 'upload_date': '20200611',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'chixa33',
+ 'description': 'md5:5b067801318e33c2e6eea4ab90b1fdd3',
+ },
+ }
+ _MONTH_NAMES_KA = ['იანვარი', 'თებერვალი', 'მარტი', 'აპრილი', 'მაისი', 'ივნისი', 'ივლისი', 'აგვისტო', 'სექტემბერი', 'ოქტომბერი', 'ნოემბერი', 'დეკემბერი']
+
+ _quality = staticmethod(qualities(('SD', 'HD')))
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = (
+ self._og_search_title(webpage, default=None)
+ or clean_html(get_element_by_class('my_video_title', webpage))
+ or self._html_search_regex(r'<title\b[^>]*>([^<]+)</title\b', webpage, 'title'))
+
+ jwplayer_sources = self._parse_json(
+ self._search_regex(
+ r'''(?s)jwplayer\s*\(\s*['"]mvplayer['"]\s*\)\s*\.\s*setup\s*\(.*?\bsources\s*:\s*(\[.*?])\s*[,});]''', webpage, 'jwplayer sources', fatal=False)
+ or '',
+ video_id, transform_source=js_to_json, fatal=False)
+
+ formats = self._parse_jwplayer_formats(jwplayer_sources or [], video_id)
+ for f in formats or []:
+ f['quality'] = self._quality(f['format_id'])
+
+ description = (
+ self._og_search_description(webpage)
+ or get_element_by_id('long_desc_holder', webpage)
+ or self._html_search_meta('description', webpage))
+
+ uploader = self._search_regex(r'<a[^>]+class="mv_user_name"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False)
+
+ upload_date = get_element_by_class('mv_vid_upl_date', webpage)
+ # as ka locale may not be present roll a local date conversion
+ upload_date = (unified_strdate(
+ # translate any ka month to an en one
+ re.sub('|'.join(self._MONTH_NAMES_KA),
+ lambda m: MONTH_NAMES['en'][self._MONTH_NAMES_KA.index(m.group(0))],
+ upload_date, flags=re.I))
+ if upload_date else None)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'uploader': uploader,
+ 'formats': formats,
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'upload_date': upload_date,
+ 'view_count': int_or_none(get_element_by_class('mv_vid_views', webpage)),
+ 'like_count': int_or_none(get_element_by_id('likes_count', webpage)),
+ 'dislike_count': int_or_none(get_element_by_id('dislikes_count', webpage)),
+ }
diff --git a/yt_dlp/extractor/myvidster.py b/yt_dlp/extractor/myvidster.py
new file mode 100644
index 0000000..e3b700d
--- /dev/null
+++ b/yt_dlp/extractor/myvidster.py
@@ -0,0 +1,27 @@
+from .common import InfoExtractor
+
+
+class MyVidsterIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?myvidster\.com/video/(?P<id>\d+)'
+
+ _TEST = {
+ 'url': 'http://www.myvidster.com/video/32059805/Hot_chemistry_with_raw_love_making',
+ 'md5': '95296d0231c1363222c3441af62dc4ca',
+ 'info_dict': {
+ 'id': '3685814',
+ 'title': 'md5:7d8427d6d02c4fbcef50fe269980c749',
+ 'upload_date': '20141027',
+ 'uploader': 'utkualp',
+ 'ext': 'mp4',
+ 'age_limit': 18,
+ },
+ 'add_ie': ['XHamster'],
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ return self.url_result(self._html_search_regex(
+ r'rel="videolink" href="(?P<real_url>.*)">',
+ webpage, 'real video url'))
diff --git a/yt_dlp/extractor/mzaalo.py b/yt_dlp/extractor/mzaalo.py
new file mode 100644
index 0000000..1996368
--- /dev/null
+++ b/yt_dlp/extractor/mzaalo.py
@@ -0,0 +1,95 @@
+from .common import InfoExtractor
+from ..utils import (
+ parse_age_limit,
+ parse_duration,
+ traverse_obj,
+ url_or_none,
+)
+
+
+class MzaaloIE(InfoExtractor):
+ _VALID_URL = r'(?i)https?://(?:www\.)?mzaalo\.com/(?:play|watch)/(?P<type>movie|original|clip)/(?P<id>[a-f0-9-]+)/[\w-]+'
+ _TESTS = [{
+ # Movies
+ 'url': 'https://www.mzaalo.com/play/movie/c0958d9f-f90e-4503-a755-44358758921d/Jamun',
+ 'info_dict': {
+ 'id': 'c0958d9f-f90e-4503-a755-44358758921d',
+ 'title': 'Jamun',
+ 'ext': 'mp4',
+ 'description': 'md5:24fe9ebb9bbe5b36f7b54b90ab1e2f31',
+ 'thumbnails': 'count:15',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 5527.0,
+ 'language': 'hin',
+ 'categories': ['Drama'],
+ 'age_limit': 13,
+ },
+ 'params': {'skip_download': 'm3u8'}
+ }, {
+ # Shows
+ 'url': 'https://www.mzaalo.com/play/original/93d42b2b-f373-4c2d-bca4-997412cb069d/Modi-Season-2-CM-TO-PM/Episode-1:Decision,-Not-Promises',
+ 'info_dict': {
+ 'id': '93d42b2b-f373-4c2d-bca4-997412cb069d',
+ 'title': 'Episode 1:Decision, Not Promises',
+ 'ext': 'mp4',
+ 'description': 'md5:16f76058432a54774fbb2561a1955652',
+ 'thumbnails': 'count:22',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 2040.0,
+ 'language': 'hin',
+ 'categories': ['Drama'],
+ 'age_limit': 13,
+ },
+ 'params': {'skip_download': 'm3u8'}
+ }, {
+ # Streams/Clips
+ 'url': 'https://www.mzaalo.com/play/clip/83cdbcb5-400a-42f1-a1d2-459053cfbda5/Manto-Ki-Kahaaniya',
+ 'info_dict': {
+ 'id': '83cdbcb5-400a-42f1-a1d2-459053cfbda5',
+ 'title': 'Manto Ki Kahaaniya',
+ 'ext': 'mp4',
+ 'description': 'md5:c3c5f1d05f0fd1bfcb05b673d1cc9f2f',
+ 'thumbnails': 'count:3',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 1937.0,
+ 'language': 'hin',
+ },
+ 'params': {'skip_download': 'm3u8'}
+ }, {
+ 'url': 'https://mzaalo.com/watch/MOVIE/389c892d-0b65-4019-bf73-d4edcb1c014f/Chalo-Dilli',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id, type_ = self._match_valid_url(url).group('id', 'type')
+ path = (f'partner/streamurl?&assetId={video_id}&getClipDetails=YES' if type_ == 'clip'
+ else f'api/v2/player/details?assetType={type_.upper()}&assetId={video_id}')
+ data = self._download_json(
+ f'https://production.mzaalo.com/platform/{path}', video_id, headers={
+ 'Ocp-Apim-Subscription-Key': '1d0caac2702049b89a305929fdf4cbae',
+ })['data']
+
+ formats = self._extract_m3u8_formats(data['streamURL'], video_id)
+
+ subtitles = {}
+ for subs_lang, subs_url in traverse_obj(data, ('subtitles', {dict.items}, ...)):
+ if url_or_none(subs_url):
+ subtitles[subs_lang] = [{'url': subs_url, 'ext': 'vtt'}]
+
+ lang = traverse_obj(data, ('language', {str.lower}))
+ for f in formats:
+ f['language'] = lang
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ **traverse_obj(data, {
+ 'title': ('title', {str}),
+ 'description': ('description', {str}),
+ 'duration': ('duration', {parse_duration}),
+ 'age_limit': ('maturity_rating', {parse_age_limit}),
+ 'thumbnails': ('images', ..., {'url': {url_or_none}}),
+ 'categories': ('genre', ..., {str}),
+ }),
+ }
diff --git a/yt_dlp/extractor/n1.py b/yt_dlp/extractor/n1.py
new file mode 100644
index 0000000..edc4144
--- /dev/null
+++ b/yt_dlp/extractor/n1.py
@@ -0,0 +1,163 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ unified_timestamp,
+ extract_attributes,
+)
+
+
+class N1InfoAssetIE(InfoExtractor):
+ _VALID_URL = r'https?://best-vod\.umn\.cdn\.united\.cloud/stream\?asset=(?P<id>[^&]+)'
+ _TESTS = [{
+ 'url': 'https://best-vod.umn.cdn.united.cloud/stream?asset=ljsottomazilirija3060921-n1info-si-worldwide&stream=hp1400&t=0&player=m3u8v&sp=n1info&u=n1info&p=n1Sh4redSecre7iNf0',
+ 'md5': '28b08b32aeaff2b8562736ccd5a66fe7',
+ 'info_dict': {
+ 'id': 'ljsottomazilirija3060921-n1info-si-worldwide',
+ 'ext': 'mp4',
+ 'title': 'ljsottomazilirija3060921-n1info-si-worldwide',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ formats = self._extract_m3u8_formats(
+ url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
+
+ return {
+ 'id': video_id,
+ 'title': video_id,
+ 'formats': formats,
+ }
+
+
+class N1InfoIIE(InfoExtractor):
+ IE_NAME = 'N1Info:article'
+ _VALID_URL = r'https?://(?:(?:\w+\.)?n1info\.\w+|nova\.rs)/(?:[^/?#]+/){1,2}(?P<id>[^/?#]+)'
+ _TESTS = [{
+ # Youtube embedded
+ 'url': 'https://rs.n1info.com/sport-klub/tenis/kako-je-djokovic-propustio-istorijsku-priliku-video/',
+ 'md5': '01ddb6646d0fd9c4c7d990aa77fe1c5a',
+ 'info_dict': {
+ 'id': 'L5Hd4hQVUpk',
+ 'ext': 'mp4',
+ 'upload_date': '20210913',
+ 'title': 'Ozmo i USO21, ep. 13: Novak Đoković – Danil Medvedev | Ključevi Poraza, Budućnost | SPORT KLUB TENIS',
+ 'description': 'md5:467f330af1effedd2e290f10dc31bb8e',
+ 'uploader': 'Sport Klub',
+ 'uploader_id': 'sportklub',
+ }
+ }, {
+ 'url': 'https://rs.n1info.com/vesti/djilas-los-plan-za-metro-nece-resiti-nijedan-saobracajni-problem/',
+ 'info_dict': {
+ 'id': 'bgmetrosot2409zta20210924174316682-n1info-rs-worldwide',
+ 'ext': 'mp4',
+ 'title': 'Đilas: Predlog izgradnje metroa besmislen; SNS odbacuje navode',
+ 'upload_date': '20210924',
+ 'timestamp': 1632481347,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://n1info.si/novice/slovenija/zadnji-dnevi-na-kopaliscu-ilirija-ilirija-ni-umrla-ubili-so-jo/',
+ 'info_dict': {
+ 'id': 'ljsottomazilirija3060921-n1info-si-worldwide',
+ 'ext': 'mp4',
+ 'title': 'Zadnji dnevi na kopališču Ilirija: “Ilirija ni umrla, ubili so jo”',
+ 'timestamp': 1632567630,
+ 'upload_date': '20210925',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # Reddit embedded
+ 'url': 'https://ba.n1info.com/lifestyle/vucic-bolji-od-tita-ako-izgubi-ja-cu-da-crknem-jugoslavija-je-gotova/',
+ 'info_dict': {
+ 'id': '2wmfee9eycp71',
+ 'ext': 'mp4',
+ 'title': '"Ako Vučić izgubi izbore, ja ću da crknem, Jugoslavija je gotova"',
+ 'upload_date': '20210924',
+ 'timestamp': 1632448649.0,
+ 'uploader': 'YouLotWhatDontStop',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://nova.rs/vesti/politika/zaklina-tatalovic-ani-brnabic-pricate-lazi-video/',
+ 'info_dict': {
+ 'id': 'tnjganabrnabicizaklinatatalovic100danavladegp-novas-worldwide',
+ 'ext': 'mp4',
+ 'title': 'Žaklina Tatalović Ani Brnabić: Pričate laži (VIDEO)',
+ 'upload_date': '20211102',
+ 'timestamp': 1635861677,
+ },
+ }, {
+ 'url': 'https://n1info.rs/vesti/cuta-biti-u-kosovskoj-mitrovici-znaci-da-te-docekaju-eksplozivnim-napravama/',
+ 'info_dict': {
+ 'id': '1332368',
+ 'ext': 'mp4',
+ 'title': 'Ćuta: Biti u Kosovskoj Mitrovici znači da te dočekaju eksplozivnim napravama',
+ 'upload_date': '20230620',
+ 'timestamp': 1687290536,
+ 'thumbnail': 'https://cdn.brid.tv/live/partners/26827/snapshot/1332368_th_6492013a8356f_1687290170.jpg'
+ },
+ }, {
+ 'url': 'https://hr.n1info.com/vijesti/pravobraniteljica-o-ubojstvu-u-zagrebu-radi-se-o-doista-nezapamcenoj-situaciji/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_regex(r'<h1[^>]+>(.+?)</h1>', webpage, 'title')
+ timestamp = unified_timestamp(self._html_search_meta('article:published_time', webpage))
+ plugin_data = self._html_search_meta('BridPlugin', webpage)
+ entries = []
+ if plugin_data:
+ site_id = self._html_search_regex(r'site:(\d+)', webpage, 'site id')
+ for video_data in re.findall(r'\$bp\("Brid_\d+", (.+)\);', webpage):
+ video_id = self._parse_json(video_data, title)['video']
+ entries.append({
+ 'id': video_id,
+ 'title': title,
+ 'timestamp': timestamp,
+ 'thumbnail': self._html_search_meta('thumbnailURL', webpage),
+ 'formats': self._extract_m3u8_formats(
+ f'https://cdn-uc.brid.tv/live/partners/{site_id}/streaming/{video_id}/{video_id}.m3u8',
+ video_id, fatal=False),
+ })
+ else:
+ # Old player still present in older articles
+ videos = re.findall(r'(?m)(<video[^>]+>)', webpage)
+ for video in videos:
+ video_data = extract_attributes(video)
+ entries.append({
+ '_type': 'url_transparent',
+ 'url': video_data.get('data-url'),
+ 'id': video_data.get('id'),
+ 'title': title,
+ 'thumbnail': video_data.get('data-thumbnail'),
+ 'timestamp': timestamp,
+ 'ie_key': 'N1InfoAsset',
+ })
+
+ embedded_videos = re.findall(r'(<iframe[^>]+>)', webpage)
+ for embedded_video in embedded_videos:
+ video_data = extract_attributes(embedded_video)
+ url = video_data.get('src') or ''
+ if url.startswith('https://www.youtube.com'):
+ entries.append(self.url_result(url, ie='Youtube'))
+ elif url.startswith('https://www.redditmedia.com'):
+ entries.append(self.url_result(url, ie='RedditR'))
+
+ return {
+ '_type': 'playlist',
+ 'id': video_id,
+ 'title': title,
+ 'timestamp': timestamp,
+ 'entries': entries,
+ }
diff --git a/yt_dlp/extractor/nate.py b/yt_dlp/extractor/nate.py
new file mode 100644
index 0000000..5e74caa
--- /dev/null
+++ b/yt_dlp/extractor/nate.py
@@ -0,0 +1,120 @@
+import itertools
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ str_or_none,
+ traverse_obj,
+ unified_strdate,
+)
+
+
+class NateIE(InfoExtractor):
+ _VALID_URL = r'https?://tv\.nate\.com/clip/(?P<id>[0-9]+)'
+
+ _TESTS = [{
+ 'url': 'https://tv.nate.com/clip/1848976',
+ 'info_dict': {
+ 'id': '1848976',
+ 'ext': 'mp4',
+ 'title': '[결승 오프닝 타이틀] 2018 LCK 서머 스플릿 결승전 kt Rolster VS Griffin',
+ 'description': 'md5:e1b79a7dcf0d8d586443f11366f50e6f',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'upload_date': '20180908',
+ 'age_limit': 15,
+ 'duration': 73,
+ 'uploader': '2018 LCK 서머 스플릿(롤챔스)',
+ 'channel': '2018 LCK 서머 스플릿(롤챔스)',
+ 'channel_id': '3606',
+ 'uploader_id': '3606',
+ 'tags': 'count:59',
+ },
+ 'params': {'skip_download': True}
+ }, {
+ 'url': 'https://tv.nate.com/clip/4300566',
+ 'info_dict': {
+ 'id': '4300566',
+ 'ext': 'mp4',
+ 'title': '[심쿵엔딩] 이준호x이세영, 서로를 기억하며 끌어안는 두 사람!💕, MBC 211204 방송',
+ 'description': 'md5:be1653502d9c13ce344ddf7828e089fa',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'upload_date': '20211204',
+ 'age_limit': 15,
+ 'duration': 201,
+ 'uploader': '옷소매 붉은 끝동',
+ 'channel': '옷소매 붉은 끝동',
+ 'channel_id': '27987',
+ 'uploader_id': '27987',
+ 'tags': 'count:20',
+ },
+ 'params': {'skip_download': True}
+ }]
+
+ _QUALITY = {
+ '36': 2160,
+ '35': 1080,
+ '34': 720,
+ '33': 480,
+ '32': 360,
+ '31': 270,
+ }
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ video_data = self._download_json(f'https://tv.nate.com/api/v1/clip/{id}', id)
+ formats = [{
+ 'format_id': f_url[-2:],
+ 'url': f_url,
+ 'height': self._QUALITY.get(f_url[-2:]),
+ 'quality': int_or_none(f_url[-2:]),
+ } for f_url in video_data.get('smcUriList') or []]
+ return {
+ 'id': id,
+ 'title': video_data.get('clipTitle'),
+ 'description': video_data.get('synopsis'),
+ 'thumbnail': video_data.get('contentImg'),
+ 'upload_date': unified_strdate(traverse_obj(video_data, 'broadDate', 'regDate')),
+ 'age_limit': video_data.get('targetAge'),
+ 'duration': video_data.get('playTime'),
+ 'formats': formats,
+ 'uploader': video_data.get('programTitle'),
+ 'channel': video_data.get('programTitle'),
+ 'channel_id': str_or_none(video_data.get('programSeq')),
+ 'uploader_id': str_or_none(video_data.get('programSeq')),
+ 'tags': video_data['hashTag'].split(',') if video_data.get('hashTag') else None,
+ }
+
+
+class NateProgramIE(InfoExtractor):
+ _VALID_URL = r'https?://tv\.nate\.com/program/clips/(?P<id>[0-9]+)'
+
+ _TESTS = [{
+ 'url': 'https://tv.nate.com/program/clips/27987',
+ 'playlist_mincount': 191,
+ 'info_dict': {
+ 'id': '27987',
+ },
+ }, {
+ 'url': 'https://tv.nate.com/program/clips/3606',
+ 'playlist_mincount': 15,
+ 'info_dict': {
+ 'id': '3606',
+ },
+ }]
+
+ def _entries(self, id):
+ for page_num in itertools.count(1):
+ program_data = self._download_json(f'https://tv.nate.com/api/v1/program/{id}/clip/ranking?size=20&page={page_num}',
+ id, note=f'Downloading page {page_num}')
+ for clip in program_data.get('content') or []:
+ clip_id = clip.get('clipSeq')
+ if clip_id:
+ yield self.url_result(
+ 'https://tv.nate.com/clip/%s' % clip_id,
+ ie=NateIE.ie_key(), video_id=clip_id)
+ if program_data.get('last'):
+ break
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ return self.playlist_result(self._entries(id), playlist_id=id)
diff --git a/yt_dlp/extractor/nationalgeographic.py b/yt_dlp/extractor/nationalgeographic.py
new file mode 100644
index 0000000..6f046bc
--- /dev/null
+++ b/yt_dlp/extractor/nationalgeographic.py
@@ -0,0 +1,83 @@
+from .common import InfoExtractor
+from .fox import FOXIE
+from ..utils import (
+ smuggle_url,
+ url_basename,
+)
+
+
+class NationalGeographicVideoIE(InfoExtractor):
+ IE_NAME = 'natgeo:video'
+ _VALID_URL = r'https?://video\.nationalgeographic\.com/.*?'
+
+ _TESTS = [
+ {
+ 'url': 'http://video.nationalgeographic.com/video/news/150210-news-crab-mating-vin?source=featuredvideo',
+ 'md5': '730855d559abbad6b42c2be1fa584917',
+ 'info_dict': {
+ 'id': '0000014b-70a1-dd8c-af7f-f7b559330001',
+ 'ext': 'mp4',
+ 'title': 'Mating Crabs Busted by Sharks',
+ 'description': 'md5:16f25aeffdeba55aaa8ec37e093ad8b3',
+ 'timestamp': 1423523799,
+ 'upload_date': '20150209',
+ 'uploader': 'NAGS',
+ },
+ 'add_ie': ['ThePlatform'],
+ 'skip': 'Redirects to main page',
+ },
+ {
+ 'url': 'http://video.nationalgeographic.com/wild/when-sharks-attack/the-real-jaws',
+ 'md5': '6a3105eb448c070503b3105fb9b320b5',
+ 'info_dict': {
+ 'id': 'ngc-I0IauNSWznb_UV008GxSbwY35BZvgi2e',
+ 'ext': 'mp4',
+ 'title': 'The Real Jaws',
+ 'description': 'md5:8d3e09d9d53a85cd397b4b21b2c77be6',
+ 'timestamp': 1433772632,
+ 'upload_date': '20150608',
+ 'uploader': 'NAGS',
+ },
+ 'add_ie': ['ThePlatform'],
+ 'skip': 'Redirects to main page',
+ },
+ ]
+
+ def _real_extract(self, url):
+ name = url_basename(url)
+
+ webpage = self._download_webpage(url, name)
+ guid = self._search_regex(
+ r'id="(?:videoPlayer|player-container)"[^>]+data-guid="([^"]+)"',
+ webpage, 'guid')
+
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': 'ThePlatform',
+ 'url': smuggle_url(
+ 'http://link.theplatform.com/s/ngs/media/guid/2423130747/%s?mbr=true' % guid,
+ {'force_smil_url': True}),
+ 'id': guid,
+ }
+
+
+class NationalGeographicTVIE(FOXIE): # XXX: Do not subclass from concrete IE
+ _VALID_URL = r'https?://(?:www\.)?nationalgeographic\.com/tv/watch/(?P<id>[\da-fA-F]+)'
+ _TESTS = [{
+ 'url': 'https://www.nationalgeographic.com/tv/watch/6a875e6e734b479beda26438c9f21138/',
+ 'info_dict': {
+ 'id': '6a875e6e734b479beda26438c9f21138',
+ 'ext': 'mp4',
+ 'title': 'Why Nat Geo? Valley of the Boom',
+ 'description': 'The lives of prominent figures in the tech world, including their friendships, rivalries, victories and failures.',
+ 'timestamp': 1542662458,
+ 'upload_date': '20181119',
+ 'age_limit': 14,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'Content not available',
+ }]
+ _HOME_PAGE_URL = 'https://www.nationalgeographic.com/tv/'
+ _API_KEY = '238bb0a0c2aba67922c48709ce0c06fd'
diff --git a/yt_dlp/extractor/naver.py b/yt_dlp/extractor/naver.py
new file mode 100644
index 0000000..806b790
--- /dev/null
+++ b/yt_dlp/extractor/naver.py
@@ -0,0 +1,404 @@
+import base64
+import hashlib
+import hmac
+import itertools
+import json
+import re
+import time
+from urllib.parse import parse_qs, urlparse
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ dict_get,
+ int_or_none,
+ join_nonempty,
+ merge_dicts,
+ parse_iso8601,
+ traverse_obj,
+ try_get,
+ unified_timestamp,
+ update_url_query,
+ url_or_none,
+)
+
+
+class NaverBaseIE(InfoExtractor):
+ _CAPTION_EXT_RE = r'\.(?:ttml|vtt)'
+
+ @staticmethod # NB: Used in WeverseIE
+ def process_subtitles(vod_data, process_url):
+ ret = {'subtitles': {}, 'automatic_captions': {}}
+ for caption in traverse_obj(vod_data, ('captions', 'list', ...)):
+ caption_url = caption.get('source')
+ if not caption_url:
+ continue
+ type_ = 'automatic_captions' if caption.get('type') == 'auto' else 'subtitles'
+ lang = caption.get('locale') or join_nonempty('language', 'country', from_dict=caption) or 'und'
+ if caption.get('type') == 'fan':
+ lang += '_fan%d' % next(i for i in itertools.count(1) if f'{lang}_fan{i}' not in ret[type_])
+ ret[type_].setdefault(lang, []).extend({
+ 'url': sub_url,
+ 'name': join_nonempty('label', 'fanName', from_dict=caption, delim=' - '),
+ } for sub_url in process_url(caption_url))
+ return ret
+
+ def _extract_video_info(self, video_id, vid, key):
+ video_data = self._download_json(
+ 'http://play.rmcnmv.naver.com/vod/play/v2.0/' + vid,
+ video_id, query={
+ 'key': key,
+ })
+ meta = video_data['meta']
+ title = meta['subject']
+ formats = []
+ get_list = lambda x: try_get(video_data, lambda y: y[x + 's']['list'], list) or []
+
+ def extract_formats(streams, stream_type, query={}):
+ for stream in streams:
+ stream_url = stream.get('source')
+ if not stream_url:
+ continue
+ stream_url = update_url_query(stream_url, query)
+ encoding_option = stream.get('encodingOption', {})
+ bitrate = stream.get('bitrate', {})
+ formats.append({
+ 'format_id': '%s_%s' % (stream.get('type') or stream_type, dict_get(encoding_option, ('name', 'id'))),
+ 'url': stream_url,
+ 'ext': 'mp4',
+ 'width': int_or_none(encoding_option.get('width')),
+ 'height': int_or_none(encoding_option.get('height')),
+ 'vbr': int_or_none(bitrate.get('video')),
+ 'abr': int_or_none(bitrate.get('audio')),
+ 'filesize': int_or_none(stream.get('size')),
+ 'protocol': 'm3u8_native' if stream_type == 'HLS' else None,
+ })
+
+ extract_formats(get_list('video'), 'H264')
+ for stream_set in video_data.get('streams', []):
+ query = {}
+ for param in stream_set.get('keys', []):
+ query[param['name']] = param['value']
+ stream_type = stream_set.get('type')
+ videos = stream_set.get('videos')
+ if videos:
+ extract_formats(videos, stream_type, query)
+ elif stream_type == 'HLS':
+ stream_url = stream_set.get('source')
+ if not stream_url:
+ continue
+ formats.extend(self._extract_m3u8_formats(
+ update_url_query(stream_url, query), video_id,
+ 'mp4', 'm3u8_native', m3u8_id=stream_type, fatal=False))
+
+ replace_ext = lambda x, y: re.sub(self._CAPTION_EXT_RE, '.' + y, x)
+
+ def get_subs(caption_url):
+ if re.search(self._CAPTION_EXT_RE, caption_url):
+ return [
+ replace_ext(caption_url, 'ttml'),
+ replace_ext(caption_url, 'vtt'),
+ ]
+ return [caption_url]
+
+ user = meta.get('user', {})
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': try_get(meta, lambda x: x['cover']['source']),
+ 'view_count': int_or_none(meta.get('count')),
+ 'uploader_id': user.get('id'),
+ 'uploader': user.get('name'),
+ 'uploader_url': user.get('url'),
+ **self.process_subtitles(video_data, get_subs),
+ }
+
+ def _call_api(self, path, video_id):
+ api_endpoint = f'https://apis.naver.com/now_web2/now_web_api/v1{path}'
+ key = b'nbxvs5nwNG9QKEWK0ADjYA4JZoujF4gHcIwvoCxFTPAeamq5eemvt5IWAYXxrbYM'
+ msgpad = int(time.time() * 1000)
+ md = base64.b64encode(hmac.HMAC(
+ key, f'{api_endpoint[:255]}{msgpad}'.encode(), digestmod=hashlib.sha1).digest()).decode()
+
+ return self._download_json(api_endpoint, video_id=video_id, headers=self.geo_verification_headers(), query={
+ 'msgpad': msgpad,
+ 'md': md,
+ })['result']
+
+
+class NaverIE(NaverBaseIE):
+ _VALID_URL = r'https?://(?:m\.)?tv(?:cast)?\.naver\.com/(?:v|embed)/(?P<id>\d+)'
+ _GEO_BYPASS = False
+ _TESTS = [{
+ 'url': 'http://tv.naver.com/v/81652',
+ 'info_dict': {
+ 'id': '81652',
+ 'ext': 'mp4',
+ 'title': '[9월 모의고사 해설강의][수학_김상희] 수학 A형 16~20번',
+ 'description': '메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.',
+ 'timestamp': 1378200754,
+ 'upload_date': '20130903',
+ 'uploader': '메가스터디, 합격불변의 법칙',
+ 'uploader_id': 'megastudy',
+ 'uploader_url': 'https://tv.naver.com/megastudy',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'duration': 2118,
+ 'thumbnail': r're:^https?://.*\.jpg',
+ },
+ }, {
+ 'url': 'http://tv.naver.com/v/395837',
+ 'md5': '7791205fa89dbed2f5e3eb16d287ff05',
+ 'info_dict': {
+ 'id': '395837',
+ 'ext': 'mp4',
+ 'title': '9년이 지나도 아픈 기억, 전효성의 아버지',
+ 'description': 'md5:c76be23e21403a6473d8119678cdb5cb',
+ 'timestamp': 1432030253,
+ 'upload_date': '20150519',
+ 'uploader': '4가지쇼',
+ 'uploader_id': '4show',
+ 'uploader_url': 'https://tv.naver.com/4show',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'duration': 277,
+ 'thumbnail': r're:^https?://.*\.jpg',
+ },
+ }, {
+ 'url': 'http://tvcast.naver.com/v/81652',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ data = self._call_api(f'/clips/{video_id}/play-info', video_id)
+
+ vid = traverse_obj(data, ('clip', 'videoId', {str}))
+ in_key = traverse_obj(data, ('play', 'inKey', {str}))
+
+ if not vid or not in_key:
+ raise ExtractorError('Unable to extract video info')
+
+ info = self._extract_video_info(video_id, vid, in_key)
+ info.update(traverse_obj(data, ('clip', {
+ 'title': 'title',
+ 'description': 'description',
+ 'timestamp': ('firstExposureDatetime', {parse_iso8601}),
+ 'duration': ('playTime', {int_or_none}),
+ 'like_count': ('likeItCount', {int_or_none}),
+ 'view_count': ('playCount', {int_or_none}),
+ 'comment_count': ('commentCount', {int_or_none}),
+ 'thumbnail': ('thumbnailImageUrl', {url_or_none}),
+ 'uploader': 'channelName',
+ 'uploader_id': 'channelId',
+ 'uploader_url': ('channelUrl', {url_or_none}),
+ 'age_limit': ('adultVideo', {lambda x: 19 if x else None}),
+ })))
+ return info
+
+
+class NaverLiveIE(NaverBaseIE):
+ IE_NAME = 'Naver:live'
+ _VALID_URL = r'https?://(?:m\.)?tv(?:cast)?\.naver\.com/l/(?P<id>\d+)'
+ _GEO_BYPASS = False
+ _TESTS = [{
+ 'url': 'https://tv.naver.com/l/127062',
+ 'info_dict': {
+ 'id': '127062',
+ 'ext': 'mp4',
+ 'live_status': 'is_live',
+ 'channel': '뉴스는 YTN',
+ 'channel_id': 'ytnnews24',
+ 'title': 're:^대한민국 24시간 뉴스 채널 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'description': 'md5:f938b5956711beab6f882314ffadf4d5',
+ 'start_time': 1677752280,
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)',
+ 'like_count': int,
+ },
+ }, {
+ 'url': 'https://tv.naver.com/l/140535',
+ 'info_dict': {
+ 'id': '140535',
+ 'ext': 'mp4',
+ 'live_status': 'is_live',
+ 'channel': 'KBS뉴스',
+ 'channel_id': 'kbsnews',
+ 'start_time': 1696867320,
+ 'title': 're:^언제 어디서나! KBS 뉴스 24 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'description': 'md5:6ad419c0bf2f332829bda3f79c295284',
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)',
+ 'like_count': int,
+ },
+ }, {
+ 'url': 'https://tv.naver.com/l/54887',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ data = self._call_api(f'/live-end/normal/{video_id}/play-info?renewLastPlayDate=true', video_id)
+
+ status = traverse_obj(data, ('live', 'liveStatus'))
+ if status == 'CLOSED':
+ raise ExtractorError('Stream is offline.', expected=True)
+ elif status != 'OPENED':
+ raise ExtractorError(f'Unknown status {status!r}')
+
+ return {
+ 'id': video_id,
+ 'formats': self._extract_m3u8_formats(
+ traverse_obj(data, ('playbackBody', {json.loads}, 'media', 0, 'path')), video_id, live=True),
+ **traverse_obj(data, ('live', {
+ 'title': 'title',
+ 'channel': 'channelName',
+ 'channel_id': 'channelId',
+ 'description': 'description',
+ 'like_count': (('likeCount', 'likeItCount'), {int_or_none}),
+ 'thumbnail': ('thumbnailImageUrl', {url_or_none}),
+ 'start_time': (('startTime', 'startDateTime', 'startYmdt'), {parse_iso8601}),
+ }), get_all=False),
+ 'is_live': True
+ }
+
+
+class NaverNowIE(NaverBaseIE):
+ IE_NAME = 'navernow'
+ _VALID_URL = r'https?://now\.naver\.com/s/now\.(?P<id>\w+)'
+ _API_URL = 'https://apis.naver.com/now_web/oldnow_web/v4'
+ _TESTS = [{
+ 'url': 'https://now.naver.com/s/now.4759?shareReplayId=26331132#replay=',
+ 'md5': 'e05854162c21c221481de16b2944a0bc',
+ 'info_dict': {
+ 'id': '4759-26331132',
+ 'title': '아이키X노제\r\n💖꽁냥꽁냥💖(1)',
+ 'ext': 'mp4',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'timestamp': 1650369600,
+ 'upload_date': '20220419',
+ 'uploader_id': 'now',
+ 'view_count': int,
+ 'uploader_url': 'https://now.naver.com/show/4759',
+ 'uploader': '아이키의 떰즈업',
+ },
+ 'params': {
+ 'noplaylist': True,
+ }
+ }, {
+ 'url': 'https://now.naver.com/s/now.4759?shareHightlight=26601461#highlight=',
+ 'md5': '9f6118e398aa0f22b2152f554ea7851b',
+ 'info_dict': {
+ 'id': '4759-26601461',
+ 'title': '아이키: 나 리정한테 흔들렸어,,, 질투 폭발하는 노제 여보😾 [아이키의 떰즈업]ㅣ네이버 NOW.',
+ 'ext': 'mp4',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'upload_date': '20220504',
+ 'timestamp': 1651648311,
+ 'uploader_id': 'now',
+ 'view_count': int,
+ 'uploader_url': 'https://now.naver.com/show/4759',
+ 'uploader': '아이키의 떰즈업',
+ },
+ 'params': {
+ 'noplaylist': True,
+ },
+ }, {
+ 'url': 'https://now.naver.com/s/now.4759',
+ 'info_dict': {
+ 'id': '4759',
+ 'title': '아이키의 떰즈업',
+ },
+ 'playlist_mincount': 101
+ }, {
+ 'url': 'https://now.naver.com/s/now.4759?shareReplayId=26331132#replay',
+ 'info_dict': {
+ 'id': '4759',
+ 'title': '아이키의 떰즈업',
+ },
+ 'playlist_mincount': 101,
+ }, {
+ 'url': 'https://now.naver.com/s/now.4759?shareHightlight=26601461#highlight=',
+ 'info_dict': {
+ 'id': '4759',
+ 'title': '아이키의 떰즈업',
+ },
+ 'playlist_mincount': 101,
+ }, {
+ 'url': 'https://now.naver.com/s/now.kihyunplay?shareReplayId=30573291#replay',
+ 'only_matching': True,
+ }]
+
+ def _extract_replay(self, show_id, replay_id):
+ vod_info = self._download_json(f'{self._API_URL}/shows/now.{show_id}/vod/{replay_id}', replay_id)
+ in_key = self._download_json(f'{self._API_URL}/shows/now.{show_id}/vod/{replay_id}/inkey', replay_id)['inKey']
+ return merge_dicts({
+ 'id': f'{show_id}-{replay_id}',
+ 'title': traverse_obj(vod_info, ('episode', 'title')),
+ 'timestamp': unified_timestamp(traverse_obj(vod_info, ('episode', 'start_time'))),
+ 'thumbnail': vod_info.get('thumbnail_image_url'),
+ }, self._extract_video_info(replay_id, vod_info['video_id'], in_key))
+
+ def _extract_show_replays(self, show_id):
+ page_size = 15
+ page = 1
+ while True:
+ show_vod_info = self._download_json(
+ f'{self._API_URL}/vod-shows/now.{show_id}', show_id,
+ query={'page': page, 'page_size': page_size},
+ note=f'Downloading JSON vod list for show {show_id} - page {page}'
+ )['response']['result']
+ for v in show_vod_info.get('vod_list') or []:
+ yield self._extract_replay(show_id, v['id'])
+
+ if len(show_vod_info.get('vod_list') or []) < page_size:
+ break
+ page += 1
+
+ def _extract_show_highlights(self, show_id, highlight_id=None):
+ page_size = 10
+ page = 1
+ while True:
+ highlights_videos = self._download_json(
+ f'{self._API_URL}/shows/now.{show_id}/highlights/videos/', show_id,
+ query={'page': page, 'page_size': page_size},
+ note=f'Downloading JSON highlights for show {show_id} - page {page}')
+
+ for highlight in highlights_videos.get('results') or []:
+ if highlight_id and highlight.get('clip_no') != int(highlight_id):
+ continue
+ yield merge_dicts({
+ 'id': f'{show_id}-{highlight["clip_no"]}',
+ 'title': highlight.get('title'),
+ 'timestamp': unified_timestamp(highlight.get('regdate')),
+ 'thumbnail': highlight.get('thumbnail_url'),
+ }, self._extract_video_info(highlight['clip_no'], highlight['video_id'], highlight['video_inkey']))
+
+ if len(highlights_videos.get('results') or []) < page_size:
+ break
+ page += 1
+
+ def _extract_highlight(self, show_id, highlight_id):
+ try:
+ return next(self._extract_show_highlights(show_id, highlight_id))
+ except StopIteration:
+ raise ExtractorError(f'Unable to find highlight {highlight_id} for show {show_id}')
+
+ def _real_extract(self, url):
+ show_id = self._match_id(url)
+ qs = parse_qs(urlparse(url).query)
+
+ if not self._yes_playlist(show_id, qs.get('shareHightlight')):
+ return self._extract_highlight(show_id, qs['shareHightlight'][0])
+ elif not self._yes_playlist(show_id, qs.get('shareReplayId')):
+ return self._extract_replay(show_id, qs['shareReplayId'][0])
+
+ show_info = self._download_json(
+ f'{self._API_URL}/shows/now.{show_id}/', show_id,
+ note=f'Downloading JSON vod list for show {show_id}')
+
+ return self.playlist_result(
+ itertools.chain(self._extract_show_replays(show_id), self._extract_show_highlights(show_id)),
+ show_id, show_info.get('title'))
diff --git a/yt_dlp/extractor/nba.py b/yt_dlp/extractor/nba.py
new file mode 100644
index 0000000..81d11e3
--- /dev/null
+++ b/yt_dlp/extractor/nba.py
@@ -0,0 +1,419 @@
+import functools
+import re
+
+from .turner import TurnerBaseIE
+from ..compat import (
+ compat_str,
+ compat_urllib_parse_unquote,
+)
+from ..utils import (
+ int_or_none,
+ merge_dicts,
+ OnDemandPagedList,
+ parse_duration,
+ parse_iso8601,
+ parse_qs,
+ try_get,
+ update_url_query,
+ urljoin,
+)
+
+
+class NBACVPBaseIE(TurnerBaseIE):
+ def _extract_nba_cvp_info(self, path, video_id, fatal=False):
+ return self._extract_cvp_info(
+ 'http://secure.nba.com/%s' % path, video_id, {
+ 'default': {
+ 'media_src': 'http://nba.cdn.turner.com/nba/big',
+ },
+ 'm3u8': {
+ 'media_src': 'http://nbavod-f.akamaihd.net',
+ },
+ }, fatal=fatal)
+
+
+class NBAWatchBaseIE(NBACVPBaseIE):
+ _VALID_URL_BASE = r'https?://(?:(?:www\.)?nba\.com(?:/watch)?|watch\.nba\.com)/'
+
+ def _extract_video(self, filter_key, filter_value):
+ video = self._download_json(
+ 'https://neulionscnbav2-a.akamaihd.net/solr/nbad_program/usersearch',
+ filter_value, query={
+ 'fl': 'description,image,name,pid,releaseDate,runtime,tags,seoName',
+ 'q': filter_key + ':' + filter_value,
+ 'wt': 'json',
+ })['response']['docs'][0]
+
+ video_id = str(video['pid'])
+ title = video['name']
+
+ formats = []
+ m3u8_url = (self._download_json(
+ 'https://watch.nba.com/service/publishpoint', video_id, query={
+ 'type': 'video',
+ 'format': 'json',
+ 'id': video_id,
+ }, headers={
+ 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0_1 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A402 Safari/604.1',
+ }, fatal=False) or {}).get('path')
+ if m3u8_url:
+ m3u8_formats = self._extract_m3u8_formats(
+ re.sub(r'_(?:pc|iphone)\.', '.', m3u8_url), video_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False)
+ formats.extend(m3u8_formats)
+ for f in m3u8_formats:
+ http_f = f.copy()
+ http_f.update({
+ 'format_id': http_f['format_id'].replace('hls-', 'http-'),
+ 'protocol': 'http',
+ 'url': http_f['url'].replace('.m3u8', ''),
+ })
+ formats.append(http_f)
+
+ info = {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': urljoin('https://nbadsdmt.akamaized.net/media/nba/nba/thumbs/', video.get('image')),
+ 'description': video.get('description'),
+ 'duration': int_or_none(video.get('runtime')),
+ 'timestamp': parse_iso8601(video.get('releaseDate')),
+ 'tags': video.get('tags'),
+ }
+
+ seo_name = video.get('seoName')
+ if seo_name and re.search(r'\d{4}/\d{2}/\d{2}/', seo_name):
+ base_path = ''
+ if seo_name.startswith('teams/'):
+ base_path += seo_name.split('/')[1] + '/'
+ base_path += 'video/'
+ cvp_info = self._extract_nba_cvp_info(
+ base_path + seo_name + '.xml', video_id, False)
+ if cvp_info:
+ formats.extend(cvp_info['formats'])
+ info = merge_dicts(info, cvp_info)
+
+ info['formats'] = formats
+ return info
+
+
+class NBAWatchEmbedIE(NBAWatchBaseIE):
+ IE_NAME = 'nba:watch:embed'
+ _VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'embed\?.*?\bid=(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://watch.nba.com/embed?id=659395',
+ 'md5': 'b7e3f9946595f4ca0a13903ce5edd120',
+ 'info_dict': {
+ 'id': '659395',
+ 'ext': 'mp4',
+ 'title': 'Mix clip: More than 7 points of Joe Ingles, Luc Mbah a Moute, Blake Griffin and 6 more in Utah Jazz vs. the Clippers, 4/15/2017',
+ 'description': 'Mix clip: More than 7 points of Joe Ingles, Luc Mbah a Moute, Blake Griffin and 6 more in Utah Jazz vs. the Clippers, 4/15/2017',
+ 'timestamp': 1492228800,
+ 'upload_date': '20170415',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self._extract_video('pid', video_id)
+
+
+class NBAWatchIE(NBAWatchBaseIE):
+ IE_NAME = 'nba:watch'
+ _VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'(?:nba/)?video/(?P<id>.+?(?=/index\.html)|(?:[^/]+/)*[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
+ 'md5': '9d902940d2a127af3f7f9d2f3dc79c96',
+ 'info_dict': {
+ 'id': '70946',
+ 'ext': 'mp4',
+ 'title': 'Thunder vs. Nets',
+ 'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
+ 'duration': 181,
+ 'timestamp': 1354597200,
+ 'upload_date': '20121204',
+ },
+ }, {
+ 'url': 'http://www.nba.com/video/games/hornets/2014/12/05/0021400276-nyk-cha-play5.nba/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://watch.nba.com/video/channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba',
+ 'md5': 'b2b39b81cf28615ae0c3360a3f9668c4',
+ 'info_dict': {
+ 'id': '330865',
+ 'ext': 'mp4',
+ 'title': 'Hawks vs. Cavaliers Game 1',
+ 'description': 'md5:8094c3498d35a9bd6b1a8c396a071b4d',
+ 'duration': 228,
+ 'timestamp': 1432094400,
+ 'upload_date': '20150521',
+ },
+ }, {
+ 'url': 'http://watch.nba.com/nba/video/channels/nba_tv/2015/06/11/YT_go_big_go_home_Game4_061115',
+ 'only_matching': True,
+ }, {
+ # only CVP mp4 format available
+ 'url': 'https://watch.nba.com/video/teams/cavaliers/2012/10/15/sloan121015mov-2249106',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://watch.nba.com/video/top-100-dunks-from-the-2019-20-season?plsrc=nba&collection=2019-20-season-highlights',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ collection_id = parse_qs(url).get('collection', [None])[0]
+ if self._yes_playlist(collection_id, display_id):
+ return self.url_result(
+ 'https://www.nba.com/watch/list/collection/' + collection_id,
+ NBAWatchCollectionIE.ie_key(), collection_id)
+ return self._extract_video('seoName', display_id)
+
+
+class NBAWatchCollectionIE(NBAWatchBaseIE):
+ IE_NAME = 'nba:watch:collection'
+ _VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'list/collection/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://watch.nba.com/list/collection/season-preview-2020',
+ 'info_dict': {
+ 'id': 'season-preview-2020',
+ },
+ 'playlist_mincount': 43,
+ }]
+ _PAGE_SIZE = 100
+
+ def _fetch_page(self, collection_id, page):
+ page += 1
+ videos = self._download_json(
+ 'https://content-api-prod.nba.com/public/1/endeavor/video-list/collection/' + collection_id,
+ collection_id, 'Downloading page %d JSON metadata' % page, query={
+ 'count': self._PAGE_SIZE,
+ 'page': page,
+ })['results']['videos']
+ for video in videos:
+ program = video.get('program') or {}
+ seo_name = program.get('seoName') or program.get('slug')
+ if not seo_name:
+ continue
+ yield {
+ '_type': 'url',
+ 'id': program.get('id'),
+ 'title': program.get('title') or video.get('title'),
+ 'url': 'https://www.nba.com/watch/video/' + seo_name,
+ 'thumbnail': video.get('image'),
+ 'description': program.get('description') or video.get('description'),
+ 'duration': parse_duration(program.get('runtimeHours')),
+ 'timestamp': parse_iso8601(video.get('releaseDate')),
+ }
+
+ def _real_extract(self, url):
+ collection_id = self._match_id(url)
+ entries = OnDemandPagedList(
+ functools.partial(self._fetch_page, collection_id),
+ self._PAGE_SIZE)
+ return self.playlist_result(entries, collection_id)
+
+
+class NBABaseIE(NBACVPBaseIE):
+ _VALID_URL_BASE = r'''(?x)
+ https?://(?:www\.)?nba\.com/
+ (?P<team>
+ blazers|
+ bucks|
+ bulls|
+ cavaliers|
+ celtics|
+ clippers|
+ grizzlies|
+ hawks|
+ heat|
+ hornets|
+ jazz|
+ kings|
+ knicks|
+ lakers|
+ magic|
+ mavericks|
+ nets|
+ nuggets|
+ pacers|
+ pelicans|
+ pistons|
+ raptors|
+ rockets|
+ sixers|
+ spurs|
+ suns|
+ thunder|
+ timberwolves|
+ warriors|
+ wizards
+ )
+ (?:/play\#)?/'''
+ _CHANNEL_PATH_REGEX = r'video/channel|series'
+
+ def _embed_url_result(self, team, content_id):
+ return self.url_result(update_url_query(
+ 'https://secure.nba.com/assets/amp/include/video/iframe.html', {
+ 'contentId': content_id,
+ 'team': team,
+ }), NBAEmbedIE.ie_key())
+
+ def _call_api(self, team, content_id, query, resource):
+ return self._download_json(
+ 'https://api.nba.net/2/%s/video,imported_video,wsc/' % team,
+ content_id, 'Download %s JSON metadata' % resource,
+ query=query, headers={
+ 'accessToken': 'internal|bb88df6b4c2244e78822812cecf1ee1b',
+ })['response']['result']
+
+ def _extract_video(self, video, team, extract_all=True):
+ video_id = compat_str(video['nid'])
+ team = video['brand']
+
+ info = {
+ 'id': video_id,
+ 'title': video.get('title') or video.get('headline') or video['shortHeadline'],
+ 'description': video.get('description'),
+ 'timestamp': parse_iso8601(video.get('published')),
+ }
+
+ subtitles = {}
+ captions = try_get(video, lambda x: x['videoCaptions']['sidecars'], dict) or {}
+ for caption_url in captions.values():
+ subtitles.setdefault('en', []).append({'url': caption_url})
+
+ formats = []
+ mp4_url = video.get('mp4')
+ if mp4_url:
+ formats.append({
+ 'url': mp4_url,
+ })
+
+ if extract_all:
+ source_url = video.get('videoSource')
+ if source_url and not source_url.startswith('s3://') and self._is_valid_url(source_url, video_id, 'source'):
+ formats.append({
+ 'format_id': 'source',
+ 'url': source_url,
+ 'quality': 1,
+ })
+
+ m3u8_url = video.get('m3u8')
+ if m3u8_url:
+ if '.akamaihd.net/i/' in m3u8_url:
+ formats.extend(self._extract_akamai_formats(
+ m3u8_url, video_id, {'http': 'pmd.cdn.turner.com'}))
+ else:
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False))
+
+ content_xml = video.get('contentXml')
+ if team and content_xml:
+ cvp_info = self._extract_nba_cvp_info(
+ team + content_xml, video_id, fatal=False)
+ if cvp_info:
+ formats.extend(cvp_info['formats'])
+ subtitles = self._merge_subtitles(subtitles, cvp_info['subtitles'])
+ info = merge_dicts(info, cvp_info)
+
+ else:
+ info.update(self._embed_url_result(team, video['videoId']))
+
+ info.update({
+ 'formats': formats,
+ 'subtitles': subtitles,
+ })
+
+ return info
+
+ def _real_extract(self, url):
+ team, display_id = self._match_valid_url(url).groups()
+ if '/play#/' in url:
+ display_id = compat_urllib_parse_unquote(display_id)
+ else:
+ webpage = self._download_webpage(url, display_id)
+ display_id = self._search_regex(
+ self._CONTENT_ID_REGEX + r'\s*:\s*"([^"]+)"', webpage, 'video id')
+ return self._extract_url_results(team, display_id)
+
+
+class NBAEmbedIE(NBABaseIE):
+ IE_NAME = 'nba:embed'
+ _VALID_URL = r'https?://secure\.nba\.com/assets/amp/include/video/(?:topI|i)frame\.html\?.*?\bcontentId=(?P<id>[^?#&]+)'
+ _TESTS = [{
+ 'url': 'https://secure.nba.com/assets/amp/include/video/topIframe.html?contentId=teams/bulls/2020/12/04/3478774/1607105587854-20201204_SCHEDULE_RELEASE_FINAL_DRUPAL-3478774&team=bulls&adFree=false&profile=71&videoPlayerName=TAMPCVP&baseUrl=&videoAdsection=nba.com_mobile_web_teamsites_chicagobulls&ampEnv=',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://secure.nba.com/assets/amp/include/video/iframe.html?contentId=2016/10/29/0021600027boschaplay7&adFree=false&profile=71&team=&videoPlayerName=LAMPCVP',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ qs = parse_qs(url)
+ content_id = qs['contentId'][0]
+ team = qs.get('team', [None])[0]
+ if not team:
+ return self.url_result(
+ 'https://watch.nba.com/video/' + content_id, NBAWatchIE.ie_key())
+ video = self._call_api(team, content_id, {'videoid': content_id}, 'video')[0]
+ return self._extract_video(video, team)
+
+
+class NBAIE(NBABaseIE):
+ IE_NAME = 'nba'
+ _VALID_URL = NBABaseIE._VALID_URL_BASE + '(?!%s)video/(?P<id>(?:[^/]+/)*[^/?#&]+)' % NBABaseIE._CHANNEL_PATH_REGEX
+ _TESTS = [{
+ 'url': 'https://www.nba.com/bulls/video/teams/bulls/2020/12/04/3478774/1607105587854-20201204schedulereleasefinaldrupal-3478774',
+ 'info_dict': {
+ 'id': '45039',
+ 'ext': 'mp4',
+ 'title': 'AND WE BACK.',
+ 'description': 'Part 1 of our 2020-21 schedule is here! Watch our games on NBC Sports Chicago.',
+ 'duration': 94,
+ 'timestamp': 1607112000,
+ 'upload_date': '20201218',
+ },
+ }, {
+ 'url': 'https://www.nba.com/bucks/play#/video/teams%2Fbucks%2F2020%2F12%2F17%2F64860%2F1608252863446-Op_Dream_16x9-64860',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.nba.com/bucks/play#/video/wsc%2Fteams%2F2787C911AA1ACD154B5377F7577CCC7134B2A4B0',
+ 'only_matching': True,
+ }]
+ _CONTENT_ID_REGEX = r'videoID'
+
+ def _extract_url_results(self, team, content_id):
+ return self._embed_url_result(team, content_id)
+
+
+class NBAChannelIE(NBABaseIE):
+ IE_NAME = 'nba:channel'
+ _VALID_URL = NBABaseIE._VALID_URL_BASE + '(?:%s)/(?P<id>[^/?#&]+)' % NBABaseIE._CHANNEL_PATH_REGEX
+ _TESTS = [{
+ 'url': 'https://www.nba.com/blazers/video/channel/summer_league',
+ 'info_dict': {
+ 'title': 'Summer League',
+ },
+ 'playlist_mincount': 138,
+ }, {
+ 'url': 'https://www.nba.com/bucks/play#/series/On%20This%20Date',
+ 'only_matching': True,
+ }]
+ _CONTENT_ID_REGEX = r'videoSubCategory'
+ _PAGE_SIZE = 100
+
+ def _fetch_page(self, team, channel, page):
+ results = self._call_api(team, channel, {
+ 'channels': channel,
+ 'count': self._PAGE_SIZE,
+ 'offset': page * self._PAGE_SIZE,
+ }, 'page %d' % (page + 1))
+ for video in results:
+ yield self._extract_video(video, team, False)
+
+ def _extract_url_results(self, team, content_id):
+ entries = OnDemandPagedList(
+ functools.partial(self._fetch_page, team, content_id),
+ self._PAGE_SIZE)
+ return self.playlist_result(entries, playlist_title=content_id)
diff --git a/yt_dlp/extractor/nbc.py b/yt_dlp/extractor/nbc.py
new file mode 100644
index 0000000..267fa83
--- /dev/null
+++ b/yt_dlp/extractor/nbc.py
@@ -0,0 +1,851 @@
+import base64
+import json
+import re
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+from .theplatform import ThePlatformIE, default_ns
+from .adobepass import AdobePassIE
+from ..compat import compat_urllib_parse_unquote
+from ..networking import HEADRequest
+from ..utils import (
+ ExtractorError,
+ RegexNotFoundError,
+ UserNotLive,
+ clean_html,
+ determine_ext,
+ float_or_none,
+ int_or_none,
+ mimetype2ext,
+ parse_age_limit,
+ parse_duration,
+ remove_end,
+ smuggle_url,
+ traverse_obj,
+ try_get,
+ unescapeHTML,
+ unified_timestamp,
+ update_url_query,
+ url_basename,
+)
+
+
+class NBCIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
+ _VALID_URL = r'https?(?P<permalink>://(?:www\.)?nbc\.com/(?:classic-tv/)?[^/]+/video/[^/]+/(?P<id>(?:NBCE|n)?\d+))'
+
+ _TESTS = [
+ {
+ 'url': 'http://www.nbc.com/the-tonight-show/video/jimmy-fallon-surprises-fans-at-ben-jerrys/2848237',
+ 'info_dict': {
+ 'id': '2848237',
+ 'ext': 'mp4',
+ 'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s',
+ 'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.',
+ 'timestamp': 1424246400,
+ 'upload_date': '20150218',
+ 'uploader': 'NBCU-COM',
+ 'episode': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s',
+ 'episode_number': 86,
+ 'season': 'Season 2',
+ 'season_number': 2,
+ 'series': 'Tonight Show: Jimmy Fallon',
+ 'duration': 237.0,
+ 'chapters': 'count:1',
+ 'tags': 'count:4',
+ 'thumbnail': r're:https?://.+\.jpg',
+ 'categories': ['Series/The Tonight Show Starring Jimmy Fallon'],
+ 'media_type': 'Full Episode',
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ },
+ {
+ 'url': 'http://www.nbc.com/saturday-night-live/video/star-wars-teaser/2832821',
+ 'info_dict': {
+ 'id': '2832821',
+ 'ext': 'mp4',
+ 'title': 'Star Wars Teaser',
+ 'description': 'md5:0b40f9cbde5b671a7ff62fceccc4f442',
+ 'timestamp': 1417852800,
+ 'upload_date': '20141206',
+ 'uploader': 'NBCU-COM',
+ },
+ 'skip': 'page not found',
+ },
+ {
+ # HLS streams requires the 'hdnea3' cookie
+ 'url': 'http://www.nbc.com/Kings/video/goliath/n1806',
+ 'info_dict': {
+ 'id': '101528f5a9e8127b107e98c5e6ce4638',
+ 'ext': 'mp4',
+ 'title': 'Goliath',
+ 'description': 'When an unknown soldier saves the life of the King\'s son in battle, he\'s thrust into the limelight and politics of the kingdom.',
+ 'timestamp': 1237100400,
+ 'upload_date': '20090315',
+ 'uploader': 'NBCU-COM',
+ },
+ 'skip': 'page not found',
+ },
+ {
+ # manifest url does not have extension
+ 'url': 'https://www.nbc.com/the-golden-globe-awards/video/oprah-winfrey-receives-cecil-b-de-mille-award-at-the-2018-golden-globes/3646439',
+ 'info_dict': {
+ 'id': '3646439',
+ 'ext': 'mp4',
+ 'title': 'Oprah Winfrey Receives Cecil B. de Mille Award at the 2018 Golden Globes',
+ 'episode': 'Oprah Winfrey Receives Cecil B. de Mille Award at the 2018 Golden Globes',
+ 'episode_number': 1,
+ 'season': 'Season 75',
+ 'season_number': 75,
+ 'series': 'The Golden Globe Awards',
+ 'description': 'Oprah Winfrey receives the Cecil B. de Mille Award at the 75th Annual Golden Globe Awards.',
+ 'uploader': 'NBCU-COM',
+ 'upload_date': '20180107',
+ 'timestamp': 1515312000,
+ 'duration': 570.0,
+ 'tags': 'count:8',
+ 'thumbnail': r're:https?://.+\.jpg',
+ 'chapters': 'count:1',
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ },
+ {
+ # new video_id format
+ 'url': 'https://www.nbc.com/quantum-leap/video/bens-first-leap-nbcs-quantum-leap/NBCE125189978',
+ 'info_dict': {
+ 'id': 'NBCE125189978',
+ 'ext': 'mp4',
+ 'title': 'Ben\'s First Leap | NBC\'s Quantum Leap',
+ 'description': 'md5:a82762449b7ec4bb83291a7b355ebf8e',
+ 'uploader': 'NBCU-COM',
+ 'series': 'Quantum Leap',
+ 'season': 'Season 1',
+ 'season_number': 1,
+ 'episode': 'Ben\'s First Leap | NBC\'s Quantum Leap',
+ 'episode_number': 1,
+ 'duration': 170.171,
+ 'chapters': [],
+ 'timestamp': 1663956155,
+ 'upload_date': '20220923',
+ 'tags': 'count:10',
+ 'age_limit': 0,
+ 'thumbnail': r're:https?://.+\.jpg',
+ 'categories': ['Series/Quantum Leap 2022'],
+ 'media_type': 'Highlight',
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ },
+ {
+ 'url': 'https://www.nbc.com/classic-tv/charles-in-charge/video/charles-in-charge-pilot/n3310',
+ 'only_matching': True,
+ },
+ {
+ # Percent escaped url
+ 'url': 'https://www.nbc.com/up-all-night/video/day-after-valentine%27s-day/n2189',
+ 'only_matching': True,
+ }
+ ]
+
+ def _real_extract(self, url):
+ permalink, video_id = self._match_valid_url(url).groups()
+ permalink = 'http' + compat_urllib_parse_unquote(permalink)
+ video_data = self._download_json(
+ 'https://friendship.nbc.co/v2/graphql', video_id, query={
+ 'query': '''query bonanzaPage(
+ $app: NBCUBrands! = nbc
+ $name: String!
+ $oneApp: Boolean
+ $platform: SupportedPlatforms! = web
+ $type: EntityPageType! = VIDEO
+ $userId: String!
+) {
+ bonanzaPage(
+ app: $app
+ name: $name
+ oneApp: $oneApp
+ platform: $platform
+ type: $type
+ userId: $userId
+ ) {
+ metadata {
+ ... on VideoPageData {
+ description
+ episodeNumber
+ keywords
+ locked
+ mpxAccountId
+ mpxGuid
+ rating
+ resourceId
+ seasonNumber
+ secondaryTitle
+ seriesShortTitle
+ }
+ }
+ }
+}''',
+ 'variables': json.dumps({
+ 'name': permalink,
+ 'oneApp': True,
+ 'userId': '0',
+ }),
+ })['data']['bonanzaPage']['metadata']
+ query = {
+ 'mbr': 'true',
+ 'manifest': 'm3u',
+ 'switch': 'HLSServiceSecure',
+ }
+ video_id = video_data['mpxGuid']
+ tp_path = 'NnzsPC/media/guid/%s/%s' % (video_data.get('mpxAccountId') or '2410887629', video_id)
+ tpm = self._download_theplatform_metadata(tp_path, video_id)
+ title = tpm.get('title') or video_data.get('secondaryTitle')
+ if video_data.get('locked'):
+ resource = self._get_mvpd_resource(
+ video_data.get('resourceId') or 'nbcentertainment',
+ title, video_id, video_data.get('rating'))
+ query['auth'] = self._extract_mvpd_auth(
+ url, video_id, 'nbcentertainment', resource)
+ theplatform_url = smuggle_url(update_url_query(
+ 'http://link.theplatform.com/s/NnzsPC/media/guid/%s/%s' % (video_data.get('mpxAccountId') or '2410887629', video_id),
+ query), {'force_smil_url': True})
+
+ # Empty string or 0 can be valid values for these. So the check must be `is None`
+ description = video_data.get('description')
+ if description is None:
+ description = tpm.get('description')
+ episode_number = int_or_none(video_data.get('episodeNumber'))
+ if episode_number is None:
+ episode_number = int_or_none(tpm.get('nbcu$airOrder'))
+ rating = video_data.get('rating')
+ if rating is None:
+ try_get(tpm, lambda x: x['ratings'][0]['rating'])
+ season_number = int_or_none(video_data.get('seasonNumber'))
+ if season_number is None:
+ season_number = int_or_none(tpm.get('nbcu$seasonNumber'))
+ series = video_data.get('seriesShortTitle')
+ if series is None:
+ series = tpm.get('nbcu$seriesShortTitle')
+ tags = video_data.get('keywords')
+ if tags is None or len(tags) == 0:
+ tags = tpm.get('keywords')
+
+ return {
+ '_type': 'url_transparent',
+ 'age_limit': parse_age_limit(rating),
+ 'description': description,
+ 'episode': title,
+ 'episode_number': episode_number,
+ 'id': video_id,
+ 'ie_key': 'ThePlatform',
+ 'season_number': season_number,
+ 'series': series,
+ 'tags': tags,
+ 'title': title,
+ 'url': theplatform_url,
+ }
+
+
+class NBCSportsVPlayerIE(InfoExtractor):
+ _VALID_URL_BASE = r'https?://(?:vplayer\.nbcsports\.com|(?:www\.)?nbcsports\.com/vplayer)/'
+ _VALID_URL = _VALID_URL_BASE + r'(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)'
+ _EMBED_REGEX = [r'(?:iframe[^>]+|var video|div[^>]+data-(?:mpx-)?)[sS]rc\s?=\s?"(?P<url>%s[^\"]+)' % _VALID_URL_BASE]
+
+ _TESTS = [{
+ 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/9CsDKds0kvHI',
+ 'info_dict': {
+ 'id': '9CsDKds0kvHI',
+ 'ext': 'mp4',
+ 'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
+ 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
+ 'timestamp': 1426270238,
+ 'upload_date': '20150313',
+ 'uploader': 'NBCU-SPORTS',
+ 'duration': 72.818,
+ 'chapters': [],
+ 'thumbnail': r're:^https?://.*\.jpg$'
+ }
+ }, {
+ 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/media/PEgOtlNcC_y2',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.nbcsports.com/vplayer/p/BxmELC/nbcsports/select/PHJSaFWbrTY9?form=html&autoPlay=true',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ theplatform_url = self._html_search_regex(r'tp:releaseUrl="(.+?)"', webpage, 'url')
+ return self.url_result(theplatform_url, 'ThePlatform')
+
+
+class NBCSportsIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?nbcsports\.com//?(?!vplayer/)(?:[^/]+/)+(?P<id>[0-9a-z-]+)'
+
+ _TESTS = [{
+ # iframe src
+ 'url': 'https://www.nbcsports.com/watch/nfl/profootballtalk/pft-pm/unpacking-addisons-reckless-driving-citation',
+ 'info_dict': {
+ 'id': 'PHJSaFWbrTY9',
+ 'ext': 'mp4',
+ 'title': 'Tom Izzo, Michigan St. has \'so much respect\' for Duke',
+ 'description': 'md5:ecb459c9d59e0766ac9c7d5d0eda8113',
+ 'uploader': 'NBCU-SPORTS',
+ 'upload_date': '20150330',
+ 'timestamp': 1427726529,
+ 'chapters': [],
+ 'thumbnail': 'https://hdliveextra-a.akamaihd.net/HD/image_sports/NBCU_Sports_Group_-_nbcsports/253/303/izzodps.jpg',
+ 'duration': 528.395,
+ }
+ }, {
+ # data-mpx-src
+ 'url': 'https://www.nbcsports.com/philadelphia/philadelphia-phillies/bruce-bochy-hector-neris-hes-idiot',
+ 'only_matching': True,
+ }, {
+ # data-src
+ 'url': 'https://www.nbcsports.com/boston/video/report-card-pats-secondary-no-match-josh-allen',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ return self.url_result(
+ NBCSportsVPlayerIE._extract_url(webpage), 'NBCSportsVPlayer')
+
+
+class NBCSportsStreamIE(AdobePassIE):
+ _VALID_URL = r'https?://stream\.nbcsports\.com/.+?\bpid=(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://stream.nbcsports.com/nbcsn/generic?pid=206559',
+ 'info_dict': {
+ 'id': '206559',
+ 'ext': 'mp4',
+ 'title': 'Amgen Tour of California Women\'s Recap',
+ 'description': 'md5:66520066b3b5281ada7698d0ea2aa894',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'skip': 'Requires Adobe Pass Authentication',
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ live_source = self._download_json(
+ 'http://stream.nbcsports.com/data/live_sources_%s.json' % video_id,
+ video_id)
+ video_source = live_source['videoSources'][0]
+ title = video_source['title']
+ source_url = None
+ for k in ('source', 'msl4source', 'iossource', 'hlsv4'):
+ sk = k + 'Url'
+ source_url = video_source.get(sk) or video_source.get(sk + 'Alt')
+ if source_url:
+ break
+ else:
+ source_url = video_source['ottStreamUrl']
+ is_live = video_source.get('type') == 'live' or video_source.get('status') == 'Live'
+ resource = self._get_mvpd_resource('nbcsports', title, video_id, '')
+ token = self._extract_mvpd_auth(url, video_id, 'nbcsports', resource)
+ tokenized_url = self._download_json(
+ 'https://token.playmakerservices.com/cdn',
+ video_id, data=json.dumps({
+ 'requestorId': 'nbcsports',
+ 'pid': video_id,
+ 'application': 'NBCSports',
+ 'version': 'v1',
+ 'platform': 'desktop',
+ 'cdn': 'akamai',
+ 'url': video_source['sourceUrl'],
+ 'token': base64.b64encode(token.encode()).decode(),
+ 'resourceId': base64.b64encode(resource.encode()).decode(),
+ }).encode())['tokenizedUrl']
+ formats = self._extract_m3u8_formats(tokenized_url, video_id, 'mp4')
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': live_source.get('description'),
+ 'formats': formats,
+ 'is_live': is_live,
+ }
+
+
+class NBCNewsIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
+ _VALID_URL = r'(?x)https?://(?:www\.)?(?:nbcnews|today|msnbc)\.com/([^/]+/)*(?:.*-)?(?P<id>[^/?]+)'
+ _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//www\.nbcnews\.com/widget/video-embed/[^"\']+)\1']
+
+ _TESTS = [
+ {
+ 'url': 'http://www.nbcnews.com/watch/nbcnews-com/how-twitter-reacted-to-the-snowden-interview-269389891880',
+ 'md5': 'fb3dcd2d7b1dd9804305fa2fc95ab610', # md5 tends to fluctuate
+ 'info_dict': {
+ 'id': '269389891880',
+ 'ext': 'mp4',
+ 'title': 'How Twitter Reacted To The Snowden Interview',
+ 'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64',
+ 'timestamp': 1401363060,
+ 'upload_date': '20140529',
+ 'duration': 46.0,
+ 'thumbnail': 'https://media-cldnry.s-nbcnews.com/image/upload/MSNBC/Components/Video/140529/p_tweet_snow_140529.jpg',
+ },
+ },
+ {
+ 'url': 'http://www.nbcnews.com/feature/dateline-full-episodes/full-episode-family-business-n285156',
+ 'md5': 'fdbf39ab73a72df5896b6234ff98518a',
+ 'info_dict': {
+ 'id': '529953347624',
+ 'ext': 'mp4',
+ 'title': 'FULL EPISODE: Family Business',
+ 'description': 'md5:757988edbaae9d7be1d585eb5d55cc04',
+ },
+ 'skip': 'This page is unavailable.',
+ },
+ {
+ 'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844',
+ 'md5': '40d0e48c68896359c80372306ece0fc3',
+ 'info_dict': {
+ 'id': '394064451844',
+ 'ext': 'mp4',
+ 'title': 'Nightly News with Brian Williams Full Broadcast (February 4)',
+ 'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5',
+ 'timestamp': 1423104900,
+ 'upload_date': '20150205',
+ 'duration': 1236.0,
+ 'thumbnail': 'https://media-cldnry.s-nbcnews.com/image/upload/MSNBC/Components/Video/__NEW/nn_netcast_150204.jpg',
+ },
+ },
+ {
+ 'url': 'http://www.nbcnews.com/business/autos/volkswagen-11-million-vehicles-could-have-suspect-software-emissions-scandal-n431456',
+ 'md5': 'ffb59bcf0733dc3c7f0ace907f5e3939',
+ 'info_dict': {
+ 'id': 'n431456',
+ 'ext': 'mp4',
+ 'title': "Volkswagen U.S. Chief: We 'Totally Screwed Up'",
+ 'description': 'md5:d22d1281a24f22ea0880741bb4dd6301',
+ 'upload_date': '20150922',
+ 'timestamp': 1442917800,
+ 'duration': 37.0,
+ 'thumbnail': 'https://media-cldnry.s-nbcnews.com/image/upload/MSNBC/Components/Video/__NEW/x_lon_vwhorn_150922.jpg',
+ },
+ },
+ {
+ 'url': 'http://www.today.com/video/see-the-aurora-borealis-from-space-in-stunning-new-nasa-video-669831235788',
+ 'md5': '693d1fa21d23afcc9b04c66b227ed9ff',
+ 'info_dict': {
+ 'id': '669831235788',
+ 'ext': 'mp4',
+ 'title': 'See the aurora borealis from space in stunning new NASA video',
+ 'description': 'md5:74752b7358afb99939c5f8bb2d1d04b1',
+ 'upload_date': '20160420',
+ 'timestamp': 1461152093,
+ 'duration': 69.0,
+ 'thumbnail': 'https://media-cldnry.s-nbcnews.com/image/upload/MSNBC/Components/Video/201604/2016-04-20T11-35-09-133Z--1280x720.jpg',
+ },
+ },
+ {
+ 'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924',
+ 'md5': '6d236bf4f3dddc226633ce6e2c3f814d',
+ 'info_dict': {
+ 'id': '314487875924',
+ 'ext': 'mp4',
+ 'title': 'The chaotic GOP immigration vote',
+ 'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1406937606,
+ 'upload_date': '20140802',
+ 'duration': 940.0,
+ },
+ },
+ {
+ 'url': 'http://www.nbcnews.com/watch/dateline/full-episode--deadly-betrayal-386250819952',
+ 'only_matching': True,
+ },
+ {
+ # From http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html
+ 'url': 'http://www.nbcnews.com/widget/video-embed/701714499682',
+ 'only_matching': True,
+ },
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ data = self._search_nextjs_data(webpage, video_id)['props']['initialState']
+ video_data = try_get(data, lambda x: x['video']['current'], dict)
+ if not video_data:
+ video_data = data['article']['content'][0]['primaryMedia']['video']
+ title = video_data['headline']['primary']
+
+ formats = []
+ for va in video_data.get('videoAssets', []):
+ public_url = va.get('publicUrl')
+ if not public_url:
+ continue
+ if '://link.theplatform.com/' in public_url:
+ public_url = update_url_query(public_url, {'format': 'redirect'})
+ format_id = va.get('format')
+ if format_id == 'M3U':
+ formats.extend(self._extract_m3u8_formats(
+ public_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id=format_id, fatal=False))
+ continue
+ tbr = int_or_none(va.get('bitrate'), 1000)
+ if tbr:
+ format_id += '-%d' % tbr
+ formats.append({
+ 'format_id': format_id,
+ 'url': public_url,
+ 'width': int_or_none(va.get('width')),
+ 'height': int_or_none(va.get('height')),
+ 'tbr': tbr,
+ 'ext': 'mp4',
+ })
+
+ subtitles = {}
+ closed_captioning = video_data.get('closedCaptioning')
+ if closed_captioning:
+ for cc_url in closed_captioning.values():
+ if not cc_url:
+ continue
+ subtitles.setdefault('en', []).append({
+ 'url': cc_url,
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': try_get(video_data, lambda x: x['description']['primary']),
+ 'thumbnail': try_get(video_data, lambda x: x['primaryImage']['url']['primary']),
+ 'duration': parse_duration(video_data.get('duration')),
+ 'timestamp': unified_timestamp(video_data.get('datePublished')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+
+class NBCOlympicsIE(InfoExtractor):
+ IE_NAME = 'nbcolympics'
+ _VALID_URL = r'https?://www\.nbcolympics\.com/videos?/(?P<id>[0-9a-z-]+)'
+
+ _TEST = {
+ # Geo-restricted to US
+ 'url': 'http://www.nbcolympics.com/video/justin-roses-son-leo-was-tears-after-his-dad-won-gold',
+ 'md5': '54fecf846d05429fbaa18af557ee523a',
+ 'info_dict': {
+ 'id': 'WjTBzDXx5AUq',
+ 'display_id': 'justin-roses-son-leo-was-tears-after-his-dad-won-gold',
+ 'ext': 'mp4',
+ 'title': 'Rose\'s son Leo was in tears after his dad won gold',
+ 'description': 'Olympic gold medalist Justin Rose gets emotional talking to the impact his win in men\'s golf has already had on his children.',
+ 'timestamp': 1471274964,
+ 'upload_date': '20160815',
+ 'uploader': 'NBCU-SPORTS',
+ },
+ 'skip': '404 Not Found',
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ try:
+ drupal_settings = self._parse_json(self._search_regex(
+ r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
+ webpage, 'drupal settings'), display_id)
+
+ iframe_url = drupal_settings['vod']['iframe_url']
+ theplatform_url = iframe_url.replace(
+ 'vplayer.nbcolympics.com', 'player.theplatform.com')
+ except RegexNotFoundError:
+ theplatform_url = self._search_regex(
+ r"([\"'])embedUrl\1: *([\"'])(?P<embedUrl>.+)\2",
+ webpage, 'embedding URL', group="embedUrl")
+
+ return {
+ '_type': 'url_transparent',
+ 'url': theplatform_url,
+ 'ie_key': ThePlatformIE.ie_key(),
+ 'display_id': display_id,
+ }
+
+
+class NBCOlympicsStreamIE(AdobePassIE):
+ IE_NAME = 'nbcolympics:stream'
+ _VALID_URL = r'https?://stream\.nbcolympics\.com/(?P<id>[0-9a-z-]+)'
+ _TESTS = [
+ {
+ 'note': 'Tokenized m3u8 source URL',
+ 'url': 'https://stream.nbcolympics.com/womens-soccer-group-round-11',
+ 'info_dict': {
+ 'id': '2019740',
+ 'ext': 'mp4',
+ 'title': r"re:Women's Group Stage - Netherlands vs\. Brazil [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$",
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ 'skip': 'Livestream',
+ }, {
+ 'note': 'Plain m3u8 source URL',
+ 'url': 'https://stream.nbcolympics.com/gymnastics-event-finals-mens-floor-pommel-horse-womens-vault-bars',
+ 'info_dict': {
+ 'id': '2021729',
+ 'ext': 'mp4',
+ 'title': r're:Event Finals: M Floor, W Vault, M Pommel, W Uneven Bars [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ 'skip': 'Livestream',
+ },
+ ]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ pid = self._search_regex(r'pid\s*=\s*(\d+);', webpage, 'pid')
+
+ event_config = self._download_json(
+ f'http://stream.nbcolympics.com/data/event_config_{pid}.json',
+ pid, 'Downloading event config')['eventConfig']
+
+ title = event_config['eventTitle']
+ is_live = {'live': True, 'replay': False}.get(event_config.get('eventStatus'))
+
+ source_url = self._download_json(
+ f'https://api-leap.nbcsports.com/feeds/assets/{pid}?application=NBCOlympics&platform=desktop&format=nbc-player&env=staging',
+ pid, 'Downloading leap config'
+ )['videoSources'][0]['cdnSources']['primary'][0]['sourceUrl']
+
+ if event_config.get('cdnToken'):
+ ap_resource = self._get_mvpd_resource(
+ event_config.get('resourceId', 'NBCOlympics'),
+ re.sub(r'[^\w\d ]+', '', event_config['eventTitle']), pid,
+ event_config.get('ratingId', 'NO VALUE'))
+ media_token = self._extract_mvpd_auth(url, pid, event_config.get('requestorId', 'NBCOlympics'), ap_resource)
+
+ source_url = self._download_json(
+ 'https://tokens.playmakerservices.com/', pid, 'Retrieving tokenized URL',
+ data=json.dumps({
+ 'application': 'NBCSports',
+ 'authentication-type': 'adobe-pass',
+ 'cdn': 'akamai',
+ 'pid': pid,
+ 'platform': 'desktop',
+ 'requestorId': 'NBCOlympics',
+ 'resourceId': base64.b64encode(ap_resource.encode()).decode(),
+ 'token': base64.b64encode(media_token.encode()).decode(),
+ 'url': source_url,
+ 'version': 'v1',
+ }).encode(),
+ )['akamai'][0]['tokenizedUrl']
+
+ formats = self._extract_m3u8_formats(source_url, pid, 'mp4', live=is_live)
+ for f in formats:
+ # -http_seekable requires ffmpeg 4.3+ but it doesnt seem possible to
+ # download with ffmpeg without this option
+ f['downloader_options'] = {'ffmpeg_args': ['-seekable', '0', '-http_seekable', '0', '-icy', '0']}
+
+ return {
+ 'id': pid,
+ 'display_id': display_id,
+ 'title': title,
+ 'formats': formats,
+ 'is_live': is_live,
+ }
+
+
+class NBCStationsIE(InfoExtractor):
+ _DOMAIN_RE = '|'.join(map(re.escape, (
+ 'nbcbayarea', 'nbcboston', 'nbcchicago', 'nbcconnecticut', 'nbcdfw', 'nbclosangeles',
+ 'nbcmiami', 'nbcnewyork', 'nbcphiladelphia', 'nbcsandiego', 'nbcwashington',
+ 'necn', 'telemundo52', 'telemundoarizona', 'telemundochicago', 'telemundonuevainglaterra',
+ )))
+ _VALID_URL = rf'https?://(?:www\.)?(?P<site>{_DOMAIN_RE})\.com/(?:[^/?#]+/)*(?P<id>[^/?#]+)/?(?:$|[#?])'
+
+ _TESTS = [{
+ 'url': 'https://www.nbclosangeles.com/news/local/large-structure-fire-in-downtown-la-prompts-smoke-odor-advisory/2968618/',
+ 'info_dict': {
+ 'id': '2968618',
+ 'ext': 'mp4',
+ 'title': 'Large Structure Fire in Downtown LA Prompts Smoke Odor Advisory',
+ 'description': 'md5:417ed3c2d91fe9d301e6db7b0942f182',
+ 'duration': 112.513,
+ 'timestamp': 1661135892,
+ 'upload_date': '20220822',
+ 'uploader': 'NBC 4',
+ 'channel_id': 'KNBC',
+ 'channel': 'nbclosangeles',
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ }, {
+ 'url': 'https://www.telemundoarizona.com/responde/huracan-complica-reembolso-para-televidente-de-tucson/2247002/',
+ 'info_dict': {
+ 'id': '2247002',
+ 'ext': 'mp4',
+ 'title': 'Huracán complica que televidente de Tucson reciba reembolso',
+ 'description': 'md5:af298dc73aab74d4fca6abfb12acb6cf',
+ 'duration': 172.406,
+ 'timestamp': 1660886507,
+ 'upload_date': '20220819',
+ 'uploader': 'Telemundo Arizona',
+ 'channel_id': 'KTAZ',
+ 'channel': 'telemundoarizona',
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ }, {
+ # direct mp4 link
+ 'url': 'https://www.nbcboston.com/weather/video-weather/highs-near-freezing-in-boston-on-wednesday/2961135/',
+ 'md5': '9bf8c41dc7abbb75b1a44f1491a4cc85',
+ 'info_dict': {
+ 'id': '2961135',
+ 'ext': 'mp4',
+ 'title': 'Highs Near Freezing in Boston on Wednesday',
+ 'description': 'md5:3ec486609a926c99f00a3512e6c0e85b',
+ 'duration': 235.669,
+ 'timestamp': 1675268656,
+ 'upload_date': '20230201',
+ 'uploader': '',
+ 'channel_id': 'WBTS',
+ 'channel': 'nbcboston',
+ },
+ }]
+
+ _RESOLUTIONS = {
+ '1080': '1920',
+ '720': '1280',
+ '540': '960',
+ '360': '640',
+ '234': '416',
+ }
+
+ def _real_extract(self, url):
+ channel, video_id = self._match_valid_url(url).group('site', 'id')
+ webpage = self._download_webpage(url, video_id)
+
+ nbc_data = self._search_json(
+ r'<script>\s*var\s+nbc\s*=', webpage, 'NBC JSON data', video_id)
+ pdk_acct = nbc_data.get('pdkAcct') or 'Yh1nAC'
+ fw_ssid = traverse_obj(nbc_data, ('video', 'fwSSID'))
+
+ video_data = self._search_json(
+ r'data-videos="\[', webpage, 'video data', video_id, default={}, transform_source=unescapeHTML)
+ video_data.update(self._search_json(
+ r'data-meta="', webpage, 'metadata', video_id, default={}, transform_source=unescapeHTML))
+ if not video_data:
+ raise ExtractorError('No video metadata found in webpage', expected=True)
+
+ info, formats = {}, []
+ is_live = int_or_none(video_data.get('mpx_is_livestream')) == 1
+ query = {
+ 'formats': 'MPEG-DASH none,M3U none,MPEG-DASH none,MPEG4,MP3',
+ 'format': 'SMIL',
+ 'fwsitesection': fw_ssid,
+ 'fwNetworkID': traverse_obj(nbc_data, ('video', 'fwNetworkID'), default='382114'),
+ 'pprofile': 'ots_desktop_html',
+ 'sensitive': 'false',
+ 'w': '1920',
+ 'h': '1080',
+ 'mode': 'LIVE' if is_live else 'on-demand',
+ 'vpaid': 'script',
+ 'schema': '2.0',
+ 'sdk': 'PDK 6.1.3',
+ }
+
+ if is_live:
+ player_id = traverse_obj(video_data, ((None, ('video', 'meta')), (
+ 'mpx_m3upid', 'mpx_pid', 'pid_streaming_web_medium')), get_all=False)
+ info['title'] = f'{channel} livestream'
+
+ else:
+ player_id = traverse_obj(video_data, (
+ (None, ('video', 'meta')), ('pid_streaming_web_high', 'mpx_pid')), get_all=False)
+
+ date_string = traverse_obj(video_data, 'date_string', 'date_gmt')
+ if date_string:
+ date_string = self._search_regex(
+ r'datetime="([^"]+)"', date_string, 'date string', fatal=False)
+ else:
+ date_string = traverse_obj(
+ nbc_data, ('dataLayer', 'adobe', ('prop70', 'eVar70', 'eVar59')), get_all=False)
+
+ video_url = traverse_obj(video_data, ((None, ('video', 'meta')), 'mp4_url'), get_all=False)
+ if video_url:
+ ext = determine_ext(video_url)
+ height = self._search_regex(r'\d+-(\d+)p', url_basename(video_url), 'height', default=None)
+ formats.append({
+ 'url': video_url,
+ 'ext': ext,
+ 'width': int_or_none(self._RESOLUTIONS.get(height)),
+ 'height': int_or_none(height),
+ 'format_id': f'http-{ext}',
+ })
+
+ info.update({
+ 'title': video_data.get('title') or traverse_obj(nbc_data, (
+ 'dataLayer', (None, 'adobe'), ('contenttitle', 'title', 'prop22')), get_all=False),
+ 'description':
+ traverse_obj(video_data, 'summary', 'excerpt', 'video_hero_text')
+ or clean_html(traverse_obj(nbc_data, ('dataLayer', 'summary'))),
+ 'timestamp': unified_timestamp(date_string),
+ })
+
+ smil = None
+ if player_id and fw_ssid:
+ smil = self._download_xml(
+ f'https://link.theplatform.com/s/{pdk_acct}/{player_id}', video_id,
+ note='Downloading SMIL data', query=query, fatal=is_live)
+ if not isinstance(smil, xml.etree.ElementTree.Element):
+ smil = None
+ subtitles = self._parse_smil_subtitles(smil, default_ns) if smil is not None else {}
+ for video in smil.findall(self._xpath_ns('.//video', default_ns)) if smil is not None else []:
+ info['duration'] = float_or_none(remove_end(video.get('dur'), 'ms'), 1000)
+ video_src_url = video.get('src')
+ ext = mimetype2ext(video.get('type'), default=determine_ext(video_src_url))
+ if ext == 'm3u8':
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ video_src_url, video_id, 'mp4', m3u8_id='hls', fatal=is_live,
+ live=is_live, errnote='No HLS formats found')
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+ elif video_src_url:
+ formats.append({
+ 'url': video_src_url,
+ 'format_id': f'https-{ext}',
+ 'ext': ext,
+ 'width': int_or_none(video.get('width')),
+ 'height': int_or_none(video.get('height')),
+ })
+
+ if not formats:
+ self.raise_no_formats('No video content found in webpage', expected=True)
+ elif is_live:
+ try:
+ self._request_webpage(
+ HEADRequest(formats[0]['url']), video_id, note='Checking live status')
+ except ExtractorError:
+ raise UserNotLive(video_id=channel)
+
+ return {
+ 'id': video_id,
+ 'channel': channel,
+ 'channel_id': nbc_data.get('callLetters'),
+ 'uploader': nbc_data.get('on_air_name'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'is_live': is_live,
+ **info,
+ }
diff --git a/yt_dlp/extractor/ndr.py b/yt_dlp/extractor/ndr.py
new file mode 100644
index 0000000..41ea362
--- /dev/null
+++ b/yt_dlp/extractor/ndr.py
@@ -0,0 +1,471 @@
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse_urlparse
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ merge_dicts,
+ parse_iso8601,
+ qualities,
+ try_get,
+ urljoin,
+)
+
+
+class NDRBaseIE(InfoExtractor):
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ display_id = next(group for group in mobj.groups() if group)
+ webpage = self._download_webpage(url, display_id)
+ return self._extract_embed(webpage, display_id, url)
+
+
+class NDRIE(NDRBaseIE):
+ IE_NAME = 'ndr'
+ IE_DESC = 'NDR.de - Norddeutscher Rundfunk'
+ _VALID_URL = r'https?://(?:\w+\.)*ndr\.de/(?:[^/]+/)*(?P<id>[^/?#]+),[\da-z]+\.html'
+ _TESTS = [{
+ # httpVideo, same content id
+ 'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html',
+ 'md5': '6515bc255dc5c5f8c85bbc38e035a659',
+ 'info_dict': {
+ 'id': 'hafengeburtstag988',
+ 'display_id': 'Party-Poette-und-Parade',
+ 'ext': 'mp4',
+ 'title': 'Party, Pötte und Parade',
+ 'description': 'md5:ad14f9d2f91d3040b6930c697e5f6b4c',
+ 'uploader': 'ndrtv',
+ 'timestamp': 1431255671,
+ 'upload_date': '20150510',
+ 'duration': 3498,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Unable to download f4m manifest'],
+ }, {
+ # httpVideo, different content id
+ 'url': 'http://www.ndr.de/sport/fussball/40-Osnabrueck-spielt-sich-in-einen-Rausch,osna270.html',
+ 'md5': '1043ff203eab307f0c51702ec49e9a71',
+ 'info_dict': {
+ 'id': 'osna272',
+ 'display_id': '40-Osnabrueck-spielt-sich-in-einen-Rausch',
+ 'ext': 'mp4',
+ 'title': 'Osnabrück - Wehen Wiesbaden: Die Highlights',
+ 'description': 'md5:32e9b800b3d2d4008103752682d5dc01',
+ 'uploader': 'ndrtv',
+ 'timestamp': 1442059200,
+ 'upload_date': '20150912',
+ 'duration': 510,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'No longer available',
+ }, {
+ # httpAudio, same content id
+ 'url': 'http://www.ndr.de/info/La-Valette-entgeht-der-Hinrichtung,audio51535.html',
+ 'md5': 'bb3cd38e24fbcc866d13b50ca59307b8',
+ 'info_dict': {
+ 'id': 'audio51535',
+ 'display_id': 'La-Valette-entgeht-der-Hinrichtung',
+ 'ext': 'mp3',
+ 'title': 'La Valette entgeht der Hinrichtung',
+ 'description': 'md5:22f9541913a40fe50091d5cdd7c9f536',
+ 'uploader': 'ndrinfo',
+ 'timestamp': 1631711863,
+ 'upload_date': '20210915',
+ 'duration': 884,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # with subtitles
+ 'url': 'https://www.ndr.de/fernsehen/sendungen/extra_3/extra-3-Satiremagazin-mit-Christian-Ehring,sendung1091858.html',
+ 'info_dict': {
+ 'id': 'extra18674',
+ 'display_id': 'extra-3-Satiremagazin-mit-Christian-Ehring',
+ 'ext': 'mp4',
+ 'title': 'Extra 3 vom 11.11.2020 mit Christian Ehring',
+ 'description': 'md5:700f6de264010585012a72f97b0ac0c9',
+ 'uploader': 'ndrtv',
+ 'upload_date': '20201207',
+ 'timestamp': 1614349457,
+ 'duration': 1749,
+ 'subtitles': {
+ 'de': [{
+ 'ext': 'ttml',
+ 'url': r're:^https://www\.ndr\.de.+',
+ }],
+ },
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Unable to download f4m manifest'],
+ }, {
+ 'url': 'https://www.ndr.de/Fettes-Brot-Ferris-MC-und-Thees-Uhlmann-live-on-stage,festivalsommer116.html',
+ 'only_matching': True,
+ }]
+
+ def _extract_embed(self, webpage, display_id, url):
+ embed_url = (
+ self._html_search_meta(
+ 'embedURL', webpage, 'embed URL',
+ default=None)
+ or self._search_regex(
+ r'\bembedUrl["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+ 'embed URL', group='url', default=None)
+ or self._search_regex(
+ r'\bvar\s*sophoraID\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+ 'embed URL', group='url', default=''))
+ # some more work needed if we only found sophoraID
+ if re.match(r'^[a-z]+\d+$', embed_url):
+ # get the initial part of the url path,. eg /panorama/archiv/2022/
+ parsed_url = compat_urllib_parse_urlparse(url)
+ path = self._search_regex(r'(.+/)%s' % display_id, parsed_url.path or '', 'embed URL', default='')
+ # find tell-tale image with the actual ID
+ ndr_id = self._search_regex(r'%s([a-z]+\d+)(?!\.)\b' % (path, ), webpage, 'embed URL', default=None)
+ # or try to use special knowledge!
+ NDR_INFO_URL_TPL = 'https://www.ndr.de/info/%s-player.html'
+ embed_url = 'ndr:%s' % (ndr_id, ) if ndr_id else NDR_INFO_URL_TPL % (embed_url, )
+ if not embed_url:
+ raise ExtractorError('Unable to extract embedUrl')
+
+ description = self._search_regex(
+ r'<p[^>]+itemprop="description">([^<]+)</p>',
+ webpage, 'description', default=None) or self._og_search_description(webpage)
+ timestamp = parse_iso8601(
+ self._search_regex(
+ (r'<span[^>]+itemprop="(?:datePublished|uploadDate)"[^>]+content="(?P<cont>[^"]+)"',
+ r'\bvar\s*pdt\s*=\s*(?P<q>["\'])(?P<cont>(?:(?!(?P=q)).)+)(?P=q)', ),
+ webpage, 'upload date', group='cont', default=None))
+ info = self._search_json_ld(webpage, display_id, default={})
+ return merge_dicts({
+ '_type': 'url_transparent',
+ 'url': embed_url,
+ 'display_id': display_id,
+ 'description': description,
+ 'timestamp': timestamp,
+ }, info)
+
+
+class NJoyIE(NDRBaseIE):
+ IE_NAME = 'njoy'
+ IE_DESC = 'N-JOY'
+ _VALID_URL = r'https?://(?:www\.)?n-joy\.de/(?:[^/]+/)*(?:(?P<display_id>[^/?#]+),)?(?P<id>[\da-z]+)\.html'
+ _TESTS = [{
+ # httpVideo, same content id
+ 'url': 'http://www.n-joy.de/entertainment/comedy/comedy_contest/Benaissa-beim-NDR-Comedy-Contest,comedycontest2480.html',
+ 'md5': 'cb63be60cd6f9dd75218803146d8dc67',
+ 'info_dict': {
+ 'id': 'comedycontest2480',
+ 'display_id': 'Benaissa-beim-NDR-Comedy-Contest',
+ 'ext': 'mp4',
+ 'title': 'Benaissa beim NDR Comedy Contest',
+ 'description': 'md5:f057a6c4e1c728b10d33b5ffd36ddc39',
+ 'uploader': 'ndrtv',
+ 'upload_date': '20141129',
+ 'duration': 654,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'No longer available',
+ }, {
+ # httpVideo, different content id
+ 'url': 'http://www.n-joy.de/musik/Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn-,felixjaehn168.html',
+ 'md5': '417660fffa90e6df2fda19f1b40a64d8',
+ 'info_dict': {
+ 'id': 'livestream283',
+ 'display_id': 'Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn-',
+ 'ext': 'mp3',
+ 'title': 'Das frueheste DJ Set des Nordens live mit Felix Jaehn',
+ 'description': 'md5:681698f527b8601e511e7b79edde7d2c',
+ 'uploader': 'njoy',
+ 'upload_date': '20210830',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.n-joy.de/radio/webradio/morningshow209.html',
+ 'only_matching': True,
+ }]
+
+ def _extract_embed(self, webpage, display_id, url=None):
+ # find tell-tale URL with the actual ID, or ...
+ video_id = self._search_regex(
+ (r'''\bsrc\s*=\s*["']?(?:/\w+)+/([a-z]+\d+)(?!\.)\b''',
+ r'<iframe[^>]+id="pp_([\da-z]+)"', ),
+ webpage, 'NDR id', default=None)
+
+ description = (
+ self._html_search_meta('description', webpage)
+ or self._search_regex(
+ r'<div[^>]+class="subline"[^>]*>[^<]+</div>\s*<p>([^<]+)</p>',
+ webpage, 'description', fatal=False))
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': 'NDREmbedBase',
+ 'url': 'ndr:%s' % video_id,
+ 'display_id': display_id,
+ 'description': description,
+ 'title': display_id.replace('-', ' ').strip(),
+ }
+
+
+class NDREmbedBaseIE(InfoExtractor): # XXX: Conventionally, Concrete class names do not end in BaseIE
+ IE_NAME = 'ndr:embed:base'
+ _VALID_URL = r'(?:ndr:(?P<id_s>[\da-z]+)|https?://www\.ndr\.de/(?P<id>[\da-z]+)-ppjson\.json)'
+ _TESTS = [{
+ 'url': 'ndr:soundcheck3366',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.ndr.de/soundcheck3366-ppjson.json',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id') or mobj.group('id_s')
+
+ ppjson = self._download_json(
+ 'http://www.ndr.de/%s-ppjson.json' % video_id, video_id)
+
+ playlist = ppjson['playlist']
+
+ formats = []
+ quality_key = qualities(('xs', 's', 'm', 'l', 'xl'))
+
+ for format_id, f in playlist.items():
+ src = f.get('src')
+ if not src:
+ continue
+ ext = determine_ext(src, None)
+ if ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ src + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id,
+ f4m_id='hds', fatal=False))
+ elif ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ src, video_id, 'mp4', m3u8_id='hls',
+ entry_protocol='m3u8_native', fatal=False))
+ else:
+ quality = f.get('quality')
+ ff = {
+ 'url': src,
+ 'format_id': quality or format_id,
+ 'quality': quality_key(quality),
+ }
+ type_ = f.get('type')
+ if type_ and type_.split('/')[0] == 'audio':
+ ff['vcodec'] = 'none'
+ ff['ext'] = ext or 'mp3'
+ formats.append(ff)
+
+ config = playlist['config']
+
+ live = playlist.get('config', {}).get('streamType') in ['httpVideoLive', 'httpAudioLive']
+ title = config['title']
+ uploader = ppjson.get('config', {}).get('branding')
+ upload_date = ppjson.get('config', {}).get('publicationDate')
+ duration = int_or_none(config.get('duration'))
+
+ thumbnails = []
+ poster = try_get(config, lambda x: x['poster'], dict) or {}
+ for thumbnail_id, thumbnail in poster.items():
+ thumbnail_url = urljoin(url, thumbnail.get('src'))
+ if not thumbnail_url:
+ continue
+ thumbnails.append({
+ 'id': thumbnail.get('quality') or thumbnail_id,
+ 'url': thumbnail_url,
+ 'preference': quality_key(thumbnail.get('quality')),
+ })
+
+ subtitles = {}
+ tracks = config.get('tracks')
+ if tracks and isinstance(tracks, list):
+ for track in tracks:
+ if not isinstance(track, dict):
+ continue
+ track_url = urljoin(url, track.get('src'))
+ if not track_url:
+ continue
+ subtitles.setdefault(track.get('srclang') or 'de', []).append({
+ 'url': track_url,
+ 'ext': 'ttml',
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'is_live': live,
+ 'uploader': uploader if uploader != '-' else None,
+ 'upload_date': upload_date[0:8] if upload_date else None,
+ 'duration': duration,
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+
+class NDREmbedIE(NDREmbedBaseIE): # XXX: Do not subclass from concrete IE
+ IE_NAME = 'ndr:embed'
+ _VALID_URL = r'https?://(?:\w+\.)*ndr\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:(?:ard)?player|externalPlayer)\.html'
+ _TESTS = [{
+ 'url': 'http://www.ndr.de/fernsehen/sendungen/ndr_aktuell/ndraktuell28488-player.html',
+ 'md5': '8b9306142fe65bbdefb5ce24edb6b0a9',
+ 'info_dict': {
+ 'id': 'ndraktuell28488',
+ 'ext': 'mp4',
+ 'title': 'Norddeutschland begrüßt Flüchtlinge',
+ 'is_live': False,
+ 'uploader': 'ndrtv',
+ 'upload_date': '20150907',
+ 'duration': 132,
+ },
+ 'skip': 'No longer available',
+ }, {
+ 'url': 'http://www.ndr.de/ndr2/events/soundcheck/soundcheck3366-player.html',
+ 'md5': '002085c44bae38802d94ae5802a36e78',
+ 'info_dict': {
+ 'id': 'soundcheck3366',
+ 'ext': 'mp4',
+ 'title': 'Ella Henderson braucht Vergleiche nicht zu scheuen',
+ 'is_live': False,
+ 'uploader': 'ndr2',
+ 'upload_date': '20150912',
+ 'duration': 3554,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'No longer available',
+ }, {
+ 'url': 'http://www.ndr.de/info/audio51535-player.html',
+ 'md5': 'bb3cd38e24fbcc866d13b50ca59307b8',
+ 'info_dict': {
+ 'id': 'audio51535',
+ 'ext': 'mp3',
+ 'title': 'La Valette entgeht der Hinrichtung',
+ 'is_live': False,
+ 'uploader': 'ndrinfo',
+ 'upload_date': '20210915',
+ 'duration': 884,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.ndr.de/fernsehen/sendungen/visite/visite11010-externalPlayer.html',
+ 'md5': 'ae57f80511c1e1f2fd0d0d3d31aeae7c',
+ 'info_dict': {
+ 'id': 'visite11010',
+ 'ext': 'mp4',
+ 'title': 'Visite - die ganze Sendung',
+ 'is_live': False,
+ 'uploader': 'ndrtv',
+ 'upload_date': '20150902',
+ 'duration': 3525,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'No longer available',
+ }, {
+ # httpVideoLive
+ 'url': 'http://www.ndr.de/fernsehen/livestream/livestream217-externalPlayer.html',
+ 'info_dict': {
+ 'id': 'livestream217',
+ 'ext': 'mp4',
+ 'title': r're:^NDR Fernsehen Niedersachsen \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
+ 'is_live': True,
+ 'upload_date': '20210409',
+ 'uploader': 'ndrtv',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.ndr.de/ndrkultur/audio255020-player.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.ndr.de/fernsehen/sendungen/nordtour/nordtour7124-player.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.ndr.de/kultur/film/videos/videoimport10424-player.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.ndr.de/fernsehen/sendungen/hamburg_journal/hamj43006-player.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.ndr.de/fernsehen/sendungen/weltbilder/weltbilder4518-player.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.ndr.de/fernsehen/doku952-player.html',
+ 'only_matching': True,
+ }]
+
+
+class NJoyEmbedIE(NDREmbedBaseIE): # XXX: Do not subclass from concrete IE
+ IE_NAME = 'njoy:embed'
+ _VALID_URL = r'https?://(?:www\.)?n-joy\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)_[^/]+\.html'
+ _TESTS = [{
+ # httpVideo
+ 'url': 'http://www.n-joy.de/events/reeperbahnfestival/doku948-player_image-bc168e87-5263-4d6d-bd27-bb643005a6de_theme-n-joy.html',
+ 'md5': '8483cbfe2320bd4d28a349d62d88bd74',
+ 'info_dict': {
+ 'id': 'doku948',
+ 'ext': 'mp4',
+ 'title': 'Zehn Jahre Reeperbahn Festival - die Doku',
+ 'is_live': False,
+ 'upload_date': '20200826',
+ 'duration': 1011,
+ },
+ 'expected_warnings': ['Unable to download f4m manifest'],
+ }, {
+ # httpAudio
+ 'url': 'http://www.n-joy.de/news_wissen/stefanrichter100-player_image-d5e938b1-f21a-4b9a-86b8-aaba8bca3a13_theme-n-joy.html',
+ 'md5': 'd989f80f28ac954430f7b8a48197188a',
+ 'info_dict': {
+ 'id': 'stefanrichter100',
+ 'ext': 'mp3',
+ 'title': 'Interview mit einem Augenzeugen',
+ 'is_live': False,
+ 'uploader': 'njoy',
+ 'upload_date': '20150909',
+ 'duration': 140,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'No longer available',
+ }, {
+ # httpAudioLive, no explicit ext
+ 'url': 'http://www.n-joy.de/news_wissen/webradioweltweit100-player_image-3fec0484-2244-4565-8fb8-ed25fd28b173_theme-n-joy.html',
+ 'info_dict': {
+ 'id': 'webradioweltweit100',
+ 'ext': 'mp3',
+ 'title': r're:^N-JOY Weltweit \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
+ 'is_live': True,
+ 'uploader': 'njoy',
+ 'upload_date': '20210830',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.n-joy.de/musik/dockville882-player_image-3905259e-0803-4764-ac72-8b7de077d80a_theme-n-joy.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.n-joy.de/radio/sendungen/morningshow/urlaubsfotos190-player_image-066a5df1-5c95-49ec-a323-941d848718db_theme-n-joy.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.n-joy.de/entertainment/comedy/krudetv290-player_image-ab261bfe-51bf-4bf3-87ba-c5122ee35b3d_theme-n-joy.html',
+ 'only_matching': True,
+ }]
diff --git a/yt_dlp/extractor/ndtv.py b/yt_dlp/extractor/ndtv.py
new file mode 100644
index 0000000..d099db3
--- /dev/null
+++ b/yt_dlp/extractor/ndtv.py
@@ -0,0 +1,107 @@
+import urllib.parse
+
+from .common import InfoExtractor
+from ..utils import parse_duration, remove_end, unified_strdate, urljoin
+
+
+class NDTVIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:[^/]+\.)?ndtv\.com/(?:[^/]+/)*videos?/?(?:[^/]+/)*[^/?^&]+-(?P<id>\d+)'
+
+ _TESTS = [
+ {
+ 'url': 'https://khabar.ndtv.com/video/show/prime-time/prime-time-ill-system-and-poor-education-468818',
+ 'md5': '78efcf3880ef3fd9b83d405ca94a38eb',
+ 'info_dict': {
+ 'id': '468818',
+ 'ext': 'mp4',
+ 'title': "प्राइम टाइम: सिस्टम बीमार, स्कूल बदहाल",
+ 'description': 'md5:f410512f1b49672e5695dea16ef2731d',
+ 'upload_date': '20170928',
+ 'duration': 2218,
+ 'thumbnail': r're:https?://.*\.jpg',
+ }
+ },
+ {
+ # __filename is url
+ 'url': 'http://movies.ndtv.com/videos/cracker-free-diwali-wishes-from-karan-johar-kriti-sanon-other-stars-470304',
+ 'md5': 'f1d709352305b44443515ac56b45aa46',
+ 'info_dict': {
+ 'id': '470304',
+ 'ext': 'mp4',
+ 'title': "Cracker-Free Diwali Wishes From Karan Johar, Kriti Sanon & Other Stars",
+ 'description': 'md5:f115bba1adf2f6433fa7c1ade5feb465',
+ 'upload_date': '20171019',
+ 'duration': 137,
+ 'thumbnail': r're:https?://.*\.jpg',
+ }
+ },
+ {
+ 'url': 'https://www.ndtv.com/video/news/news/delhi-s-air-quality-status-report-after-diwali-is-very-poor-470372',
+ 'only_matching': True
+ },
+ {
+ 'url': 'https://auto.ndtv.com/videos/the-cnb-daily-october-13-2017-469935',
+ 'only_matching': True
+ },
+ {
+ 'url': 'https://sports.ndtv.com/cricket/videos/2nd-t20i-rock-thrown-at-australia-cricket-team-bus-after-win-over-india-469764',
+ 'only_matching': True
+ },
+ {
+ 'url': 'http://gadgets.ndtv.com/videos/uncharted-the-lost-legacy-review-465568',
+ 'only_matching': True
+ },
+ {
+ 'url': 'http://profit.ndtv.com/videos/news/video-indian-economy-on-very-solid-track-international-monetary-fund-chief-470040',
+ 'only_matching': True
+ },
+ {
+ 'url': 'http://food.ndtv.com/video-basil-seeds-coconut-porridge-419083',
+ 'only_matching': True
+ },
+ {
+ 'url': 'https://doctor.ndtv.com/videos/top-health-stories-of-the-week-467396',
+ 'only_matching': True
+ },
+ {
+ 'url': 'https://swirlster.ndtv.com/video/how-to-make-friends-at-work-469324',
+ 'only_matching': True
+ }
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ # '__title' does not contain extra words such as sub-site name, "Video" etc.
+ title = urllib.parse.unquote_plus(
+ self._search_regex(r"__title\s*=\s*'([^']+)'", webpage, 'title', default=None)
+ or self._og_search_title(webpage))
+
+ filename = self._search_regex(
+ r"(?:__)?filename\s*[:=]\s*'([^']+)'", webpage, 'video filename')
+ # in "movies" sub-site pages, filename is URL
+ video_url = urljoin('https://ndtvod.bc-ssl.cdn.bitgravity.com/23372/ndtv/', filename.lstrip('/'))
+
+ # "doctor" sub-site has MM:SS format
+ duration = parse_duration(self._search_regex(
+ r"(?:__)?duration\s*[:=]\s*'([^']+)'", webpage, 'duration', fatal=False))
+
+ # "sports", "doctor", "swirlster" sub-sites don't have 'publish-date'
+ upload_date = unified_strdate(self._html_search_meta(
+ 'publish-date', webpage, 'upload date', default=None) or self._html_search_meta(
+ 'uploadDate', webpage, 'upload date', default=None) or self._search_regex(
+ r'datePublished"\s*:\s*"([^"]+)"', webpage, 'upload date', fatal=False))
+
+ description = remove_end(self._og_search_description(webpage), ' (Read more)')
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'duration': duration,
+ 'upload_date': upload_date,
+ }
diff --git a/yt_dlp/extractor/nebula.py b/yt_dlp/extractor/nebula.py
new file mode 100644
index 0000000..cb8f6a6
--- /dev/null
+++ b/yt_dlp/extractor/nebula.py
@@ -0,0 +1,468 @@
+import itertools
+import json
+
+from .art19 import Art19IE
+from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ make_archive_id,
+ parse_iso8601,
+ smuggle_url,
+ try_call,
+ unsmuggle_url,
+ update_url_query,
+ url_or_none,
+ urljoin,
+)
+from ..utils.traversal import traverse_obj
+
+_BASE_URL_RE = r'https?://(?:www\.|beta\.)?(?:watchnebula\.com|nebula\.app|nebula\.tv)'
+
+
+class NebulaBaseIE(InfoExtractor):
+ _NETRC_MACHINE = 'watchnebula'
+ _token = _api_token = None
+
+ def _perform_login(self, username, password):
+ try:
+ response = self._download_json(
+ 'https://nebula.tv/auth/login/', None,
+ 'Logging in to Nebula', 'Login failed',
+ data=json.dumps({'email': username, 'password': password}).encode(),
+ headers={'content-type': 'application/json'})
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 400:
+ raise ExtractorError('Login failed: Invalid username or password', expected=True)
+ raise
+ self._api_token = traverse_obj(response, ('key', {str}))
+ if not self._api_token:
+ raise ExtractorError('Login failed: No token')
+
+ def _call_api(self, *args, **kwargs):
+ if self._token:
+ kwargs.setdefault('headers', {})['Authorization'] = f'Bearer {self._token}'
+ try:
+ return self._download_json(*args, **kwargs)
+ except ExtractorError as e:
+ if not isinstance(e.cause, HTTPError) or e.cause.status not in (401, 403):
+ raise
+ self.to_screen(
+ f'Reauthorizing with Nebula and retrying, because last API call resulted in error {e.cause.status}')
+ self._real_initialize()
+ if self._token:
+ kwargs.setdefault('headers', {})['Authorization'] = f'Bearer {self._token}'
+ return self._download_json(*args, **kwargs)
+
+ def _real_initialize(self):
+ if not self._api_token:
+ self._api_token = try_call(
+ lambda: self._get_cookies('https://nebula.tv')['nebula_auth.apiToken'].value)
+ self._token = self._download_json(
+ 'https://users.api.nebula.app/api/v1/authorization/', None,
+ headers={'Authorization': f'Token {self._api_token}'} if self._api_token else None,
+ note='Authorizing to Nebula', data=b'')['token']
+
+ def _extract_formats(self, content_id, slug):
+ for retry in (False, True):
+ try:
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ f'https://content.api.nebula.app/{content_id.split(":")[0]}s/{content_id}/manifest.m3u8',
+ slug, 'mp4', query={
+ 'token': self._token,
+ 'app_version': '23.10.0',
+ 'platform': 'ios',
+ })
+ return {'formats': fmts, 'subtitles': subs}
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 401:
+ self.raise_login_required()
+ if not retry and isinstance(e.cause, HTTPError) and e.cause.status == 403:
+ self.to_screen('Reauthorizing with Nebula and retrying, because fetching video resulted in error')
+ self._real_initialize()
+ continue
+ raise
+
+ def _extract_video_metadata(self, episode):
+ channel_url = traverse_obj(
+ episode, (('channel_slug', 'class_slug'), {lambda x: urljoin('https://nebula.tv/', x)}), get_all=False)
+ return {
+ 'id': episode['id'].partition(':')[2],
+ **traverse_obj(episode, {
+ 'display_id': 'slug',
+ 'title': 'title',
+ 'description': 'description',
+ 'timestamp': ('published_at', {parse_iso8601}),
+ 'duration': ('duration', {int_or_none}),
+ 'channel_id': 'channel_slug',
+ 'uploader_id': 'channel_slug',
+ 'channel': 'channel_title',
+ 'uploader': 'channel_title',
+ 'series': 'channel_title',
+ 'creator': 'channel_title',
+ 'thumbnail': ('images', 'thumbnail', 'src', {url_or_none}),
+ 'episode_number': ('order', {int_or_none}),
+ # Old code was wrongly setting extractor_key from NebulaSubscriptionsIE
+ '_old_archive_ids': ('zype_id', {lambda x: [
+ make_archive_id(NebulaIE, x), make_archive_id(NebulaSubscriptionsIE, x)] if x else None}),
+ }),
+ 'channel_url': channel_url,
+ 'uploader_url': channel_url,
+ }
+
+
+class NebulaIE(NebulaBaseIE):
+ IE_NAME = 'nebula:video'
+ _VALID_URL = rf'{_BASE_URL_RE}/videos/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://nebula.tv/videos/that-time-disney-remade-beauty-and-the-beast',
+ 'info_dict': {
+ 'id': '84ed544d-4afd-4723-8cd5-2b95261f0abf',
+ 'ext': 'mp4',
+ 'title': 'That Time Disney Remade Beauty and the Beast',
+ 'description': 'md5:2aae3c4cfc5ee09a1ecdff0909618cf4',
+ 'upload_date': '20180731',
+ 'timestamp': 1533009600,
+ 'channel': 'Lindsay Ellis',
+ 'channel_id': 'lindsayellis',
+ 'uploader': 'Lindsay Ellis',
+ 'uploader_id': 'lindsayellis',
+ 'uploader_url': r're:https://nebula\.(tv|app)/lindsayellis',
+ 'series': 'Lindsay Ellis',
+ 'display_id': 'that-time-disney-remade-beauty-and-the-beast',
+ 'channel_url': r're:https://nebula\.(tv|app)/lindsayellis',
+ 'creator': 'Lindsay Ellis',
+ 'duration': 2212,
+ 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+',
+ '_old_archive_ids': ['nebula 5c271b40b13fd613090034fd', 'nebulasubscriptions 5c271b40b13fd613090034fd'],
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://nebula.tv/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
+ 'md5': 'd05739cf6c38c09322422f696b569c23',
+ 'info_dict': {
+ 'id': '7e623145-1b44-4ca3-aa0b-ed25a247ea34',
+ 'ext': 'mp4',
+ 'title': 'Landing Craft - How The Allies Got Ashore',
+ 'description': r're:^In this episode we explore the unsung heroes of D-Day, the landing craft.',
+ 'upload_date': '20200327',
+ 'timestamp': 1585348140,
+ 'channel': 'Real Engineering — The Logistics of D-Day',
+ 'channel_id': 'd-day',
+ 'uploader': 'Real Engineering — The Logistics of D-Day',
+ 'uploader_id': 'd-day',
+ 'series': 'Real Engineering — The Logistics of D-Day',
+ 'display_id': 'the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
+ 'creator': 'Real Engineering — The Logistics of D-Day',
+ 'duration': 841,
+ 'channel_url': 'https://nebula.tv/d-day',
+ 'uploader_url': 'https://nebula.tv/d-day',
+ 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+',
+ '_old_archive_ids': ['nebula 5e7e78171aaf320001fbd6be', 'nebulasubscriptions 5e7e78171aaf320001fbd6be'],
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://nebula.tv/videos/money-episode-1-the-draw',
+ 'md5': 'ebe28a7ad822b9ee172387d860487868',
+ 'info_dict': {
+ 'id': 'b96c5714-9e2b-4ec3-b3f1-20f6e89cc553',
+ 'ext': 'mp4',
+ 'title': 'Episode 1: The Draw',
+ 'description': r'contains:There’s free money on offer… if the players can all work together.',
+ 'upload_date': '20200323',
+ 'timestamp': 1584980400,
+ 'channel': 'Tom Scott Presents: Money',
+ 'channel_id': 'tom-scott-presents-money',
+ 'uploader': 'Tom Scott Presents: Money',
+ 'uploader_id': 'tom-scott-presents-money',
+ 'uploader_url': 'https://nebula.tv/tom-scott-presents-money',
+ 'duration': 825,
+ 'channel_url': 'https://nebula.tv/tom-scott-presents-money',
+ 'series': 'Tom Scott Presents: Money',
+ 'display_id': 'money-episode-1-the-draw',
+ 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+',
+ 'creator': 'Tom Scott Presents: Money',
+ '_old_archive_ids': ['nebula 5e779ebdd157bc0001d1c75a', 'nebulasubscriptions 5e779ebdd157bc0001d1c75a'],
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://watchnebula.com/videos/money-episode-1-the-draw',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://nebula.tv/videos/tldrnewseu-did-the-us-really-blow-up-the-nordstream-pipelines',
+ 'info_dict': {
+ 'id': 'e389af9d-1dab-44f2-8788-ee24deb7ff0d',
+ 'ext': 'mp4',
+ 'display_id': 'tldrnewseu-did-the-us-really-blow-up-the-nordstream-pipelines',
+ 'title': 'Did the US Really Blow Up the NordStream Pipelines?',
+ 'description': 'md5:b4e2a14e3ff08f546a3209c75261e789',
+ 'upload_date': '20230223',
+ 'timestamp': 1677144070,
+ 'channel': 'TLDR News EU',
+ 'channel_id': 'tldrnewseu',
+ 'uploader': 'TLDR News EU',
+ 'uploader_id': 'tldrnewseu',
+ 'uploader_url': r're:https://nebula\.(tv|app)/tldrnewseu',
+ 'duration': 524,
+ 'channel_url': r're:https://nebula\.(tv|app)/tldrnewseu',
+ 'series': 'TLDR News EU',
+ 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+',
+ 'creator': 'TLDR News EU',
+ '_old_archive_ids': ['nebula 63f64c74366fcd00017c1513', 'nebulasubscriptions 63f64c74366fcd00017c1513'],
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://beta.nebula.tv/videos/money-episode-1-the-draw',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ slug = self._match_id(url)
+ url, smuggled_data = unsmuggle_url(url, {})
+ if smuggled_data.get('id'):
+ return {
+ 'id': smuggled_data['id'],
+ 'display_id': slug,
+ 'title': '',
+ **self._extract_formats(smuggled_data['id'], slug),
+ }
+
+ metadata = self._call_api(
+ f'https://content.api.nebula.app/content/videos/{slug}',
+ slug, note='Fetching video metadata')
+ return {
+ **self._extract_video_metadata(metadata),
+ **self._extract_formats(metadata['id'], slug),
+ }
+
+
+class NebulaClassIE(NebulaBaseIE):
+ IE_NAME = 'nebula:media'
+ _VALID_URL = rf'{_BASE_URL_RE}/(?!(?:myshows|library|videos)/)(?P<id>[\w-]+)/(?P<ep>[\w-]+)/?(?:$|[?#])'
+ _TESTS = [{
+ 'url': 'https://nebula.tv/copyright-for-fun-and-profit/14',
+ 'info_dict': {
+ 'id': 'd7432cdc-c608-474d-942c-f74345daed7b',
+ 'ext': 'mp4',
+ 'display_id': '14',
+ 'channel_url': 'https://nebula.tv/copyright-for-fun-and-profit',
+ 'episode_number': 14,
+ 'thumbnail': 'https://dj423fildxgac.cloudfront.net/d533718d-9307-42d4-8fb0-e283285e99c9',
+ 'uploader_url': 'https://nebula.tv/copyright-for-fun-and-profit',
+ 'duration': 646,
+ 'episode': 'Episode 14',
+ 'title': 'Photos, Sculpture, and Video',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://nebula.tv/extremitiespodcast/pyramiden-the-high-arctic-soviet-ghost-town',
+ 'info_dict': {
+ 'ext': 'mp3',
+ 'id': '018f65f0-0033-4021-8f87-2d132beb19aa',
+ 'description': 'md5:05d2b23ab780c955e2511a2b9127acff',
+ 'series_id': '335e8159-d663-491a-888f-1732285706ac',
+ 'modified_timestamp': 1599091504,
+ 'episode_id': '018f65f0-0033-4021-8f87-2d132beb19aa',
+ 'series': 'Extremities',
+ 'modified_date': '20200903',
+ 'upload_date': '20200902',
+ 'title': 'Pyramiden: The High-Arctic Soviet Ghost Town',
+ 'release_timestamp': 1571237958,
+ 'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
+ 'duration': 1546.05714,
+ 'timestamp': 1599085608,
+ 'release_date': '20191016',
+ },
+ }, {
+ 'url': 'https://nebula.tv/thelayover/the-layover-episode-1',
+ 'info_dict': {
+ 'ext': 'mp3',
+ 'id': '9d74a762-00bb-45a8-9e8d-9ed47c04a1d0',
+ 'episode_number': 1,
+ 'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
+ 'release_date': '20230304',
+ 'modified_date': '20230403',
+ 'series': 'The Layover',
+ 'episode_id': '9d74a762-00bb-45a8-9e8d-9ed47c04a1d0',
+ 'modified_timestamp': 1680554566,
+ 'duration': 3130.46401,
+ 'release_timestamp': 1677943800,
+ 'title': 'The Layover — Episode 1',
+ 'series_id': '874303a5-4900-4626-a4b6-2aacac34466a',
+ 'upload_date': '20230303',
+ 'episode': 'Episode 1',
+ 'timestamp': 1677883672,
+ 'description': 'md5:002cca89258e3bc7c268d5b8c24ba482',
+ },
+ }]
+
+ def _real_extract(self, url):
+ slug, episode = self._match_valid_url(url).group('id', 'ep')
+ url, smuggled_data = unsmuggle_url(url, {})
+ if smuggled_data.get('id'):
+ return {
+ 'id': smuggled_data['id'],
+ 'display_id': slug,
+ 'title': '',
+ **self._extract_formats(smuggled_data['id'], slug),
+ }
+
+ metadata = self._call_api(
+ f'https://content.api.nebula.app/content/{slug}/{episode}/?include=lessons',
+ slug, note='Fetching class/podcast metadata')
+ content_type = metadata.get('type')
+ if content_type == 'lesson':
+ return {
+ **self._extract_video_metadata(metadata),
+ **self._extract_formats(metadata['id'], slug),
+ }
+ elif content_type == 'podcast_episode':
+ episode_url = metadata['episode_url']
+ if not episode_url and metadata.get('premium'):
+ self.raise_login_required()
+
+ if Art19IE.suitable(episode_url):
+ return self.url_result(episode_url, Art19IE)
+ return traverse_obj(metadata, {
+ 'id': ('id', {str}),
+ 'url': ('episode_url', {url_or_none}),
+ 'title': ('title', {str}),
+ 'description': ('description', {str}),
+ 'timestamp': ('published_at', {parse_iso8601}),
+ 'duration': ('duration', {int_or_none}),
+ 'channel_id': ('channel_id', {str}),
+ 'chnanel': ('channel_title', {str}),
+ 'thumbnail': ('assets', 'regular', {url_or_none}),
+ })
+
+ raise ExtractorError(f'Unexpected content type {content_type!r}')
+
+
+class NebulaSubscriptionsIE(NebulaBaseIE):
+ IE_NAME = 'nebula:subscriptions'
+ _VALID_URL = rf'{_BASE_URL_RE}/(?P<id>myshows|library/latest-videos)/?(?:$|[?#])'
+ _TESTS = [{
+ 'url': 'https://nebula.tv/myshows',
+ 'playlist_mincount': 1,
+ 'info_dict': {
+ 'id': 'myshows',
+ },
+ }]
+
+ def _generate_playlist_entries(self):
+ next_url = update_url_query('https://content.api.nebula.app/video_episodes/', {
+ 'following': 'true',
+ 'include': 'engagement',
+ 'ordering': '-published_at',
+ })
+ for page_num in itertools.count(1):
+ channel = self._call_api(
+ next_url, 'myshows', note=f'Retrieving subscriptions page {page_num}')
+ for episode in channel['results']:
+ metadata = self._extract_video_metadata(episode)
+ yield self.url_result(smuggle_url(
+ f'https://nebula.tv/videos/{metadata["display_id"]}',
+ {'id': episode['id']}), NebulaIE, url_transparent=True, **metadata)
+ next_url = channel.get('next')
+ if not next_url:
+ return
+
+ def _real_extract(self, url):
+ return self.playlist_result(self._generate_playlist_entries(), 'myshows')
+
+
+class NebulaChannelIE(NebulaBaseIE):
+ IE_NAME = 'nebula:channel'
+ _VALID_URL = rf'{_BASE_URL_RE}/(?!myshows|library|videos)(?P<id>[\w-]+)/?(?:$|[?#])'
+ _TESTS = [{
+ 'url': 'https://nebula.tv/tom-scott-presents-money',
+ 'info_dict': {
+ 'id': 'tom-scott-presents-money',
+ 'title': 'Tom Scott Presents: Money',
+ 'description': 'Tom Scott hosts a series all about trust, negotiation and money.',
+ },
+ 'playlist_count': 5,
+ }, {
+ 'url': 'https://nebula.tv/lindsayellis',
+ 'info_dict': {
+ 'id': 'lindsayellis',
+ 'title': 'Lindsay Ellis',
+ 'description': 'Enjoy these hottest of takes on Disney, Transformers, and Musicals.',
+ },
+ 'playlist_mincount': 2,
+ }, {
+ 'url': 'https://nebula.tv/johnnyharris',
+ 'info_dict': {
+ 'id': 'johnnyharris',
+ 'title': 'Johnny Harris',
+ 'description': 'I make videos about maps and many other things.',
+ },
+ 'playlist_mincount': 90,
+ }, {
+ 'url': 'https://nebula.tv/copyright-for-fun-and-profit',
+ 'info_dict': {
+ 'id': 'copyright-for-fun-and-profit',
+ 'title': 'Copyright for Fun and Profit',
+ 'description': 'md5:6690248223eed044a9f11cd5a24f9742',
+ },
+ 'playlist_count': 23,
+ }, {
+ 'url': 'https://nebula.tv/trussissuespodcast',
+ 'info_dict': {
+ 'id': 'trussissuespodcast',
+ 'title': 'The TLDR News Podcast',
+ 'description': 'md5:a08c4483bc0b705881d3e0199e721385',
+ },
+ 'playlist_mincount': 80,
+ }]
+
+ def _generate_playlist_entries(self, collection_id, collection_slug):
+ next_url = f'https://content.api.nebula.app/video_channels/{collection_id}/video_episodes/?ordering=-published_at'
+ for page_num in itertools.count(1):
+ episodes = self._call_api(next_url, collection_slug, note=f'Retrieving channel page {page_num}')
+ for episode in episodes['results']:
+ metadata = self._extract_video_metadata(episode)
+ yield self.url_result(smuggle_url(
+ episode.get('share_url') or f'https://nebula.tv/videos/{metadata["display_id"]}',
+ {'id': episode['id']}), NebulaIE, url_transparent=True, **metadata)
+ next_url = episodes.get('next')
+ if not next_url:
+ break
+
+ def _generate_class_entries(self, channel):
+ for lesson in channel['lessons']:
+ metadata = self._extract_video_metadata(lesson)
+ yield self.url_result(smuggle_url(
+ lesson.get('share_url') or f'https://nebula.tv/{metadata["class_slug"]}/{metadata["slug"]}',
+ {'id': lesson['id']}), NebulaClassIE, url_transparent=True, **metadata)
+
+ def _generate_podcast_entries(self, collection_id, collection_slug):
+ next_url = f'https://content.api.nebula.app/podcast_channels/{collection_id}/podcast_episodes/?ordering=-published_at&premium=true'
+ for page_num in itertools.count(1):
+ episodes = self._call_api(next_url, collection_slug, note=f'Retrieving podcast page {page_num}')
+
+ for episode in traverse_obj(episodes, ('results', lambda _, v: url_or_none(v['share_url']))):
+ yield self.url_result(episode['share_url'], NebulaClassIE)
+ next_url = episodes.get('next')
+ if not next_url:
+ break
+
+ def _real_extract(self, url):
+ collection_slug = self._match_id(url)
+ channel = self._call_api(
+ f'https://content.api.nebula.app/content/{collection_slug}/?include=lessons',
+ collection_slug, note='Retrieving channel')
+
+ if channel.get('type') == 'class':
+ entries = self._generate_class_entries(channel)
+ elif channel.get('type') == 'podcast_channel':
+ entries = self._generate_podcast_entries(channel['id'], collection_slug)
+ else:
+ entries = self._generate_playlist_entries(channel['id'], collection_slug)
+
+ return self.playlist_result(
+ entries=entries,
+ playlist_id=collection_slug,
+ playlist_title=channel.get('title'),
+ playlist_description=channel.get('description'))
diff --git a/yt_dlp/extractor/nekohacker.py b/yt_dlp/extractor/nekohacker.py
new file mode 100644
index 0000000..24b6657
--- /dev/null
+++ b/yt_dlp/extractor/nekohacker.py
@@ -0,0 +1,213 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ determine_ext,
+ extract_attributes,
+ get_element_by_class,
+ get_element_text_and_html_by_tag,
+ parse_duration,
+ traverse_obj,
+ try_call,
+ url_or_none,
+)
+
+
+class NekoHackerIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?nekohacker\.com/(?P<id>(?!free-dl)[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://nekohacker.com/nekoverse/',
+ 'info_dict': {
+ 'id': 'nekoverse',
+ 'title': 'Nekoverse',
+ },
+ 'playlist': [
+ {
+ 'url': 'https://nekohacker.com/wp-content/uploads/2022/11/01-Spaceship.mp3',
+ 'md5': '44223701ebedba0467ebda4cc07fb3aa',
+ 'info_dict': {
+ 'id': '1712',
+ 'ext': 'mp3',
+ 'title': 'Spaceship',
+ 'thumbnail': 'https://nekohacker.com/wp-content/uploads/2022/11/Nekoverse_Artwork-1024x1024.jpg',
+ 'vcodec': 'none',
+ 'acodec': 'mp3',
+ 'release_date': '20221101',
+ 'album': 'Nekoverse',
+ 'artist': 'Neko Hacker',
+ 'track': 'Spaceship',
+ 'track_number': 1,
+ 'duration': 195.0
+ }
+ },
+ {
+ 'url': 'https://nekohacker.com/wp-content/uploads/2022/11/02-City-Runner.mp3',
+ 'md5': '8f853c71719389d32bbbd3f1a87b3f08',
+ 'info_dict': {
+ 'id': '1713',
+ 'ext': 'mp3',
+ 'title': 'City Runner',
+ 'thumbnail': 'https://nekohacker.com/wp-content/uploads/2022/11/Nekoverse_Artwork-1024x1024.jpg',
+ 'vcodec': 'none',
+ 'acodec': 'mp3',
+ 'release_date': '20221101',
+ 'album': 'Nekoverse',
+ 'artist': 'Neko Hacker',
+ 'track': 'City Runner',
+ 'track_number': 2,
+ 'duration': 148.0
+ }
+ },
+ {
+ 'url': 'https://nekohacker.com/wp-content/uploads/2022/11/03-Nature-Talk.mp3',
+ 'md5': '5a8a8ae852720cee4c0ac95c7d1a7450',
+ 'info_dict': {
+ 'id': '1714',
+ 'ext': 'mp3',
+ 'title': 'Nature Talk',
+ 'thumbnail': 'https://nekohacker.com/wp-content/uploads/2022/11/Nekoverse_Artwork-1024x1024.jpg',
+ 'vcodec': 'none',
+ 'acodec': 'mp3',
+ 'release_date': '20221101',
+ 'album': 'Nekoverse',
+ 'artist': 'Neko Hacker',
+ 'track': 'Nature Talk',
+ 'track_number': 3,
+ 'duration': 174.0
+ }
+ },
+ {
+ 'url': 'https://nekohacker.com/wp-content/uploads/2022/11/04-Crystal-World.mp3',
+ 'md5': 'd8e59a48061764e50d92386a294abd50',
+ 'info_dict': {
+ 'id': '1715',
+ 'ext': 'mp3',
+ 'title': 'Crystal World',
+ 'thumbnail': 'https://nekohacker.com/wp-content/uploads/2022/11/Nekoverse_Artwork-1024x1024.jpg',
+ 'vcodec': 'none',
+ 'acodec': 'mp3',
+ 'release_date': '20221101',
+ 'album': 'Nekoverse',
+ 'artist': 'Neko Hacker',
+ 'track': 'Crystal World',
+ 'track_number': 4,
+ 'duration': 199.0
+ }
+ }
+ ]
+ }, {
+ 'url': 'https://nekohacker.com/susume/',
+ 'info_dict': {
+ 'id': 'susume',
+ 'title': '進め!むじなカンパニー',
+ },
+ 'playlist': [
+ {
+ 'url': 'https://nekohacker.com/wp-content/uploads/2021/01/進め!むじなカンパニー-feat.-六科なじむ-CV_-日高里菜-割戶真友-CV_-金元寿子-軽井沢ユキ-CV_-上坂すみれ-出稼ぎガルシア-CV_-金子彩花-.mp3',
+ 'md5': 'fb13f008aa81f26ba48f91fd2d6186ce',
+ 'info_dict': {
+ 'id': '711',
+ 'ext': 'mp3',
+ 'title': 'md5:1a5fcbc96ca3c3265b1c6f9f79f30fd0',
+ 'thumbnail': 'https://nekohacker.com/wp-content/uploads/2021/01/OP表-1024x1024.png',
+ 'vcodec': 'none',
+ 'acodec': 'mp3',
+ 'release_date': '20210115',
+ 'album': '進め!むじなカンパニー',
+ 'artist': 'Neko Hacker',
+ 'track': 'md5:1a5fcbc96ca3c3265b1c6f9f79f30fd0',
+ 'track_number': 1,
+ }
+ },
+ {
+ 'url': 'https://nekohacker.com/wp-content/uploads/2021/01/むじな-de-なじむ-feat.-六科なじむ-CV_-日高里菜-.mp3',
+ 'md5': '028803f70241df512b7764e73396fdd1',
+ 'info_dict': {
+ 'id': '709',
+ 'ext': 'mp3',
+ 'title': 'むじな de なじむ feat. 六科なじむ (CV: 日高里菜 )',
+ 'thumbnail': 'https://nekohacker.com/wp-content/uploads/2021/01/OP表-1024x1024.png',
+ 'vcodec': 'none',
+ 'acodec': 'mp3',
+ 'release_date': '20210115',
+ 'album': '進め!むじなカンパニー',
+ 'artist': 'Neko Hacker',
+ 'track': 'むじな de なじむ feat. 六科なじむ (CV: 日高里菜 )',
+ 'track_number': 2,
+ }
+ },
+ {
+ 'url': 'https://nekohacker.com/wp-content/uploads/2021/01/進め!むじなカンパニー-instrumental.mp3',
+ 'md5': 'adde9e9a16e1da5e602b579c247d0fb9',
+ 'info_dict': {
+ 'id': '710',
+ 'ext': 'mp3',
+ 'title': '進め!むじなカンパニー (instrumental)',
+ 'thumbnail': 'https://nekohacker.com/wp-content/uploads/2021/01/OP表-1024x1024.png',
+ 'vcodec': 'none',
+ 'acodec': 'mp3',
+ 'release_date': '20210115',
+ 'album': '進め!むじなカンパニー',
+ 'artist': 'Neko Hacker',
+ 'track': '進め!むじなカンパニー (instrumental)',
+ 'track_number': 3,
+ }
+ },
+ {
+ 'url': 'https://nekohacker.com/wp-content/uploads/2021/01/むじな-de-なじむ-instrumental.mp3',
+ 'md5': 'ebb0443039cf5f9ff7fd557ed9b23599',
+ 'info_dict': {
+ 'id': '712',
+ 'ext': 'mp3',
+ 'title': 'むじな de なじむ (instrumental)',
+ 'thumbnail': 'https://nekohacker.com/wp-content/uploads/2021/01/OP表-1024x1024.png',
+ 'vcodec': 'none',
+ 'acodec': 'mp3',
+ 'release_date': '20210115',
+ 'album': '進め!むじなカンパニー',
+ 'artist': 'Neko Hacker',
+ 'track': 'むじな de なじむ (instrumental)',
+ 'track_number': 4,
+ }
+ }
+ ]
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+ playlist = get_element_by_class('playlist', webpage)
+
+ if not playlist:
+ iframe = try_call(lambda: get_element_text_and_html_by_tag('iframe', webpage)[1]) or ''
+ iframe_src = url_or_none(extract_attributes(iframe).get('src'))
+ if not iframe_src:
+ raise ExtractorError('No playlist or embed found in webpage')
+ elif re.match(r'https?://(?:\w+\.)?spotify\.com/', iframe_src):
+ raise ExtractorError('Spotify embeds are not supported', expected=True)
+ return self.url_result(url, 'Generic')
+
+ entries = []
+ for track_number, track in enumerate(re.findall(r'(<li[^>]+data-audiopath[^>]+>)', playlist), 1):
+ entry = traverse_obj(extract_attributes(track), {
+ 'url': ('data-audiopath', {url_or_none}),
+ 'ext': ('data-audiopath', {determine_ext}),
+ 'id': 'data-trackid',
+ 'title': 'data-tracktitle',
+ 'track': 'data-tracktitle',
+ 'album': 'data-albumtitle',
+ 'duration': ('data-tracktime', {parse_duration}),
+ 'release_date': ('data-releasedate', {lambda x: re.match(r'\d{8}', x.replace('.', ''))}, 0),
+ 'thumbnail': ('data-albumart', {url_or_none}),
+ })
+ entries.append({
+ **entry,
+ 'track_number': track_number,
+ 'artist': 'Neko Hacker',
+ 'vcodec': 'none',
+ 'acodec': 'mp3' if entry['ext'] == 'mp3' else None,
+ })
+
+ return self.playlist_result(entries, playlist_id, traverse_obj(entries, (0, 'album')))
diff --git a/yt_dlp/extractor/nerdcubed.py b/yt_dlp/extractor/nerdcubed.py
new file mode 100644
index 0000000..5f5607a
--- /dev/null
+++ b/yt_dlp/extractor/nerdcubed.py
@@ -0,0 +1,38 @@
+from .common import InfoExtractor
+from .youtube import YoutubeIE
+from ..utils import parse_iso8601, url_or_none
+from ..utils.traversal import traverse_obj
+
+
+class NerdCubedFeedIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?nerdcubed\.co\.uk/?(?:$|[#?])'
+ _TEST = {
+ 'url': 'http://www.nerdcubed.co.uk/',
+ 'info_dict': {
+ 'id': 'nerdcubed-feed',
+ 'title': 'nerdcubed.co.uk feed',
+ },
+ 'playlist_mincount': 5500,
+ }
+
+ def _extract_video(self, feed_entry):
+ return self.url_result(
+ f'https://www.youtube.com/watch?v={feed_entry["id"]}', YoutubeIE,
+ **traverse_obj(feed_entry, {
+ 'id': ('id', {str}),
+ 'title': ('title', {str}),
+ 'description': ('description', {str}),
+ 'timestamp': ('publishedAt', {parse_iso8601}),
+ 'channel': ('source', 'name', {str}),
+ 'channel_id': ('source', 'id', {str}),
+ 'channel_url': ('source', 'url', {str}),
+ 'thumbnail': ('thumbnail', 'source', {url_or_none}),
+ }), url_transparent=True)
+
+ def _real_extract(self, url):
+ video_id = 'nerdcubed-feed'
+ feed = self._download_json('https://www.nerdcubed.co.uk/_/cdn/videos.json', video_id)
+
+ return self.playlist_result(
+ map(self._extract_video, traverse_obj(feed, ('videos', lambda _, v: v['id']))),
+ video_id, 'nerdcubed.co.uk feed')
diff --git a/yt_dlp/extractor/neteasemusic.py b/yt_dlp/extractor/neteasemusic.py
new file mode 100644
index 0000000..d332b84
--- /dev/null
+++ b/yt_dlp/extractor/neteasemusic.py
@@ -0,0 +1,615 @@
+import itertools
+import json
+import re
+import time
+from hashlib import md5
+from random import randint
+
+from .common import InfoExtractor
+from ..aes import aes_ecb_encrypt, pkcs7_padding
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ join_nonempty,
+ str_or_none,
+ strftime_or_none,
+ traverse_obj,
+ unified_strdate,
+ url_or_none,
+ urljoin,
+ variadic,
+)
+
+
+class NetEaseMusicBaseIE(InfoExtractor):
+ _FORMATS = ['bMusic', 'mMusic', 'hMusic']
+ _API_BASE = 'http://music.163.com/api/'
+ _GEO_BYPASS = False
+
+ @staticmethod
+ def kilo_or_none(value):
+ return int_or_none(value, scale=1000)
+
+ def _create_eapi_cipher(self, api_path, query_body, cookies):
+ request_text = json.dumps({**query_body, 'header': cookies}, separators=(',', ':'))
+
+ message = f'nobody{api_path}use{request_text}md5forencrypt'.encode('latin1')
+ msg_digest = md5(message).hexdigest()
+
+ data = pkcs7_padding(list(str.encode(
+ f'{api_path}-36cd479b6b5-{request_text}-36cd479b6b5-{msg_digest}')))
+ encrypted = bytes(aes_ecb_encrypt(data, list(b'e82ckenh8dichen8')))
+ return f'params={encrypted.hex().upper()}'.encode()
+
+ def _download_eapi_json(self, path, video_id, query_body, headers={}, **kwargs):
+ cookies = {
+ 'osver': 'undefined',
+ 'deviceId': 'undefined',
+ 'appver': '8.0.0',
+ 'versioncode': '140',
+ 'mobilename': 'undefined',
+ 'buildver': '1623435496',
+ 'resolution': '1920x1080',
+ '__csrf': '',
+ 'os': 'pc',
+ 'channel': 'undefined',
+ 'requestId': f'{int(time.time() * 1000)}_{randint(0, 1000):04}',
+ **traverse_obj(self._get_cookies(self._API_BASE), {
+ 'MUSIC_U': ('MUSIC_U', {lambda i: i.value}),
+ })
+ }
+ return self._download_json(
+ urljoin('https://interface3.music.163.com/', f'/eapi{path}'), video_id,
+ data=self._create_eapi_cipher(f'/api{path}', query_body, cookies), headers={
+ 'Referer': 'https://music.163.com',
+ 'Cookie': '; '.join([f'{k}={v}' for k, v in cookies.items()]),
+ **headers,
+ }, **kwargs)
+
+ def _call_player_api(self, song_id, bitrate):
+ return self._download_eapi_json(
+ '/song/enhance/player/url', song_id, {'ids': f'[{song_id}]', 'br': bitrate},
+ note=f'Downloading song URL info: bitrate {bitrate}')
+
+ def extract_formats(self, info):
+ err = 0
+ formats = []
+ song_id = info['id']
+ for song_format in self._FORMATS:
+ details = info.get(song_format)
+ if not details:
+ continue
+ bitrate = int_or_none(details.get('bitrate')) or 999000
+ for song in traverse_obj(self._call_player_api(song_id, bitrate), ('data', lambda _, v: url_or_none(v['url']))):
+ song_url = song['url']
+ if self._is_valid_url(song_url, info['id'], 'song'):
+ formats.append({
+ 'url': song_url,
+ 'format_id': song_format,
+ 'asr': traverse_obj(details, ('sr', {int_or_none})),
+ **traverse_obj(song, {
+ 'ext': ('type', {str}),
+ 'abr': ('br', {self.kilo_or_none}),
+ 'filesize': ('size', {int_or_none}),
+ }),
+ })
+ elif err == 0:
+ err = traverse_obj(song, ('code', {int})) or 0
+
+ if not formats:
+ if err != 0 and (err < 200 or err >= 400):
+ raise ExtractorError(f'No media links found (site code {err})', expected=True)
+ else:
+ self.raise_geo_restricted(
+ 'No media links found: probably due to geo restriction.', countries=['CN'])
+ return formats
+
+ def query_api(self, endpoint, video_id, note):
+ result = self._download_json(
+ f'{self._API_BASE}{endpoint}', video_id, note, headers={'Referer': self._API_BASE})
+ code = traverse_obj(result, ('code', {int}))
+ message = traverse_obj(result, ('message', {str})) or ''
+ if code == -462:
+ self.raise_login_required(f'Login required to download: {message}')
+ elif code != 200:
+ raise ExtractorError(f'Failed to get meta info: {code} {message}')
+ return result
+
+ def _get_entries(self, songs_data, entry_keys=None, id_key='id', name_key='name'):
+ for song in traverse_obj(songs_data, (
+ *variadic(entry_keys, (str, bytes, dict, set)),
+ lambda _, v: int_or_none(v[id_key]) is not None)):
+ song_id = str(song[id_key])
+ yield self.url_result(
+ f'http://music.163.com/#/song?id={song_id}', NetEaseMusicIE,
+ song_id, traverse_obj(song, (name_key, {str})))
+
+
+class NetEaseMusicIE(NetEaseMusicBaseIE):
+ IE_NAME = 'netease:song'
+ IE_DESC = '网易云音乐'
+ _VALID_URL = r'https?://(y\.)?music\.163\.com/(?:[#m]/)?song\?.*?\bid=(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://music.163.com/#/song?id=548648087',
+ 'info_dict': {
+ 'id': '548648087',
+ 'ext': 'mp3',
+ 'title': '戒烟 (Live)',
+ 'creator': '李荣浩 / 朱正廷 / 陈立农 / 尤长靖 / ONER灵超 / ONER木子洋 / 杨非同 / 陆定昊',
+ 'timestamp': 1522944000,
+ 'upload_date': '20180405',
+ 'description': 'md5:3650af9ee22c87e8637cb2dde22a765c',
+ 'subtitles': {'lyrics': [{'ext': 'lrc'}]},
+ "duration": 256,
+ 'thumbnail': r're:^http.*\.jpg',
+ 'album': '偶像练习生 表演曲目合集',
+ 'average_rating': int,
+ 'album_artist': '偶像练习生',
+ },
+ }, {
+ 'note': 'No lyrics.',
+ 'url': 'http://music.163.com/song?id=17241424',
+ 'info_dict': {
+ 'id': '17241424',
+ 'ext': 'mp3',
+ 'title': 'Opus 28',
+ 'creator': 'Dustin O\'Halloran',
+ 'upload_date': '20080211',
+ 'timestamp': 1202745600,
+ 'duration': 263,
+ 'thumbnail': r're:^http.*\.jpg',
+ 'album': 'Piano Solos Vol. 2',
+ 'album_artist': 'Dustin O\'Halloran',
+ 'average_rating': int,
+ },
+ }, {
+ 'url': 'https://y.music.163.com/m/song?app_version=8.8.45&id=95670&uct2=sKnvS4+0YStsWkqsPhFijw%3D%3D&dlt=0846',
+ 'md5': '95826c73ea50b1c288b22180ec9e754d',
+ 'info_dict': {
+ 'id': '95670',
+ 'ext': 'mp3',
+ 'title': '国际歌',
+ 'creator': '马备',
+ 'upload_date': '19911130',
+ 'timestamp': 691516800,
+ 'description': 'md5:1ba2f911a2b0aa398479f595224f2141',
+ 'subtitles': {'lyrics': [{'ext': 'lrc'}]},
+ 'duration': 268,
+ 'alt_title': '伴唱:现代人乐队 合唱:总政歌舞团',
+ 'thumbnail': r're:^http.*\.jpg',
+ 'average_rating': int,
+ 'album': '红色摇滚',
+ 'album_artist': '侯牧人',
+ },
+ }, {
+ 'url': 'http://music.163.com/#/song?id=32102397',
+ 'md5': '3e909614ce09b1ccef4a3eb205441190',
+ 'info_dict': {
+ 'id': '32102397',
+ 'ext': 'mp3',
+ 'title': 'Bad Blood',
+ 'creator': 'Taylor Swift / Kendrick Lamar',
+ 'upload_date': '20150516',
+ 'timestamp': 1431792000,
+ 'description': 'md5:21535156efb73d6d1c355f95616e285a',
+ 'subtitles': {'lyrics': [{'ext': 'lrc'}]},
+ 'duration': 199,
+ 'thumbnail': r're:^http.*\.jpg',
+ 'album': 'Bad Blood',
+ 'average_rating': int,
+ 'album_artist': 'Taylor Swift',
+ },
+ 'skip': 'Blocked outside Mainland China',
+ }, {
+ 'note': 'Has translated name.',
+ 'url': 'http://music.163.com/#/song?id=22735043',
+ 'info_dict': {
+ 'id': '22735043',
+ 'ext': 'mp3',
+ 'title': '소원을 말해봐 (Genie)',
+ 'creator': '少女时代',
+ 'upload_date': '20100127',
+ 'timestamp': 1264608000,
+ 'description': 'md5:03d1ffebec3139aa4bafe302369269c5',
+ 'subtitles': {'lyrics': [{'ext': 'lrc'}]},
+ 'duration': 229,
+ 'alt_title': '说出愿望吧(Genie)',
+ 'thumbnail': r're:^http.*\.jpg',
+ 'average_rating': int,
+ 'album': 'Oh!',
+ 'album_artist': '少女时代',
+ },
+ 'skip': 'Blocked outside Mainland China',
+ }]
+
+ def _process_lyrics(self, lyrics_info):
+ original = traverse_obj(lyrics_info, ('lrc', 'lyric', {str}))
+ translated = traverse_obj(lyrics_info, ('tlyric', 'lyric', {str}))
+
+ if not original or original == '[99:00.00]纯音乐,请欣赏\n':
+ return None
+
+ if not translated:
+ return {
+ 'lyrics': [{'data': original, 'ext': 'lrc'}],
+ }
+
+ lyrics_expr = r'(\[[0-9]{2}:[0-9]{2}\.[0-9]{2,}\])([^\n]+)'
+ original_ts_texts = re.findall(lyrics_expr, original)
+ translation_ts_dict = dict(re.findall(lyrics_expr, translated))
+
+ merged = '\n'.join(
+ join_nonempty(f'{timestamp}{text}', translation_ts_dict.get(timestamp, ''), delim=' / ')
+ for timestamp, text in original_ts_texts)
+
+ return {
+ 'lyrics_merged': [{'data': merged, 'ext': 'lrc'}],
+ 'lyrics': [{'data': original, 'ext': 'lrc'}],
+ 'lyrics_translated': [{'data': translated, 'ext': 'lrc'}],
+ }
+
+ def _real_extract(self, url):
+ song_id = self._match_id(url)
+
+ info = self.query_api(
+ f'song/detail?id={song_id}&ids=%5B{song_id}%5D', song_id, 'Downloading song info')['songs'][0]
+
+ formats = self.extract_formats(info)
+
+ lyrics = self._process_lyrics(self.query_api(
+ f'song/lyric?id={song_id}&lv=-1&tv=-1', song_id, 'Downloading lyrics data'))
+ lyric_data = {
+ 'description': traverse_obj(lyrics, (('lyrics_merged', 'lyrics'), 0, 'data'), get_all=False),
+ 'subtitles': lyrics,
+ } if lyrics else {}
+
+ return {
+ 'id': song_id,
+ 'formats': formats,
+ 'alt_title': '/'.join(traverse_obj(info, (('transNames', 'alias'), ...))) or None,
+ 'creator': ' / '.join(traverse_obj(info, ('artists', ..., 'name'))) or None,
+ 'album_artist': ' / '.join(traverse_obj(info, ('album', 'artists', ..., 'name'))) or None,
+ **lyric_data,
+ **traverse_obj(info, {
+ 'title': ('name', {str}),
+ 'timestamp': ('album', 'publishTime', {self.kilo_or_none}),
+ 'thumbnail': ('album', 'picUrl', {url_or_none}),
+ 'duration': ('duration', {self.kilo_or_none}),
+ 'album': ('album', 'name', {str}),
+ 'average_rating': ('score', {int_or_none}),
+ }),
+ }
+
+
+class NetEaseMusicAlbumIE(NetEaseMusicBaseIE):
+ IE_NAME = 'netease:album'
+ IE_DESC = '网易云音乐 - 专辑'
+ _VALID_URL = r'https?://music\.163\.com/(#/)?album\?id=(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://music.163.com/#/album?id=133153666',
+ 'info_dict': {
+ 'id': '133153666',
+ 'title': '桃几的翻唱',
+ 'upload_date': '20210913',
+ 'description': '桃几2021年翻唱合集',
+ 'thumbnail': r're:^http.*\.jpg',
+ },
+ 'playlist_mincount': 13,
+ }, {
+ 'url': 'http://music.163.com/#/album?id=220780',
+ 'info_dict': {
+ 'id': '220780',
+ 'title': 'B\'Day',
+ 'upload_date': '20060904',
+ 'description': 'md5:71a74e1d8f392d88cf1bbe48879ad0b0',
+ 'thumbnail': r're:^http.*\.jpg',
+ },
+ 'playlist_count': 23,
+ }]
+
+ def _real_extract(self, url):
+ album_id = self._match_id(url)
+ webpage = self._download_webpage(f'https://music.163.com/album?id={album_id}', album_id)
+
+ songs = self._search_json(
+ r'<textarea[^>]+\bid="song-list-pre-data"[^>]*>', webpage, 'metainfo', album_id,
+ end_pattern=r'</textarea>', contains_pattern=r'\[(?s:.+)\]')
+ metainfo = {
+ 'title': self._og_search_property('title', webpage, 'title', fatal=False),
+ 'description': self._html_search_regex(
+ (rf'<div[^>]+\bid="album-desc-{suffix}"[^>]*>(.*?)</div>' for suffix in ('more', 'dot')),
+ webpage, 'description', flags=re.S, fatal=False),
+ 'thumbnail': self._og_search_property('image', webpage, 'thumbnail', fatal=False),
+ 'upload_date': unified_strdate(self._html_search_meta('music:release_date', webpage, 'date', fatal=False)),
+ }
+ return self.playlist_result(self._get_entries(songs), album_id, **metainfo)
+
+
+class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
+ IE_NAME = 'netease:singer'
+ IE_DESC = '网易云音乐 - 歌手'
+ _VALID_URL = r'https?://music\.163\.com/(#/)?artist\?id=(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'note': 'Singer has aliases.',
+ 'url': 'http://music.163.com/#/artist?id=10559',
+ 'info_dict': {
+ 'id': '10559',
+ 'title': '张惠妹 - aMEI;阿妹;阿密特',
+ },
+ 'playlist_count': 50,
+ }, {
+ 'note': 'Singer has translated name.',
+ 'url': 'http://music.163.com/#/artist?id=124098',
+ 'info_dict': {
+ 'id': '124098',
+ 'title': '李昇基 - 이승기',
+ },
+ 'playlist_count': 50,
+ }, {
+ 'note': 'Singer with both translated and alias',
+ 'url': 'https://music.163.com/#/artist?id=159692',
+ 'info_dict': {
+ 'id': '159692',
+ 'title': '初音ミク - 初音未来;Hatsune Miku',
+ },
+ 'playlist_count': 50,
+ }]
+
+ def _real_extract(self, url):
+ singer_id = self._match_id(url)
+
+ info = self.query_api(
+ f'artist/{singer_id}?id={singer_id}', singer_id, note='Downloading singer data')
+
+ name = join_nonempty(
+ traverse_obj(info, ('artist', 'name', {str})),
+ join_nonempty(*traverse_obj(info, ('artist', ('trans', ('alias', ...)), {str})), delim=';'),
+ delim=' - ')
+
+ return self.playlist_result(self._get_entries(info, 'hotSongs'), singer_id, name)
+
+
+class NetEaseMusicListIE(NetEaseMusicBaseIE):
+ IE_NAME = 'netease:playlist'
+ IE_DESC = '网易云音乐 - 歌单'
+ _VALID_URL = r'https?://music\.163\.com/(#/)?(playlist|discover/toplist)\?id=(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'http://music.163.com/#/playlist?id=79177352',
+ 'info_dict': {
+ 'id': '79177352',
+ 'title': 'Billboard 2007 Top 100',
+ 'description': 'md5:12fd0819cab2965b9583ace0f8b7b022',
+ 'tags': ['欧美'],
+ 'uploader': '浑然破灭',
+ 'uploader_id': '67549805',
+ 'timestamp': int,
+ 'upload_date': r're:\d{8}',
+ },
+ 'playlist_mincount': 95,
+ }, {
+ 'note': 'Toplist/Charts sample',
+ 'url': 'https://music.163.com/#/discover/toplist?id=60198',
+ 'info_dict': {
+ 'id': '60198',
+ 'title': 're:美国Billboard榜 [0-9]{4}-[0-9]{2}-[0-9]{2}',
+ 'description': '美国Billboard排行榜',
+ 'tags': ['流行', '欧美', '榜单'],
+ 'uploader': 'Billboard公告牌',
+ 'uploader_id': '48171',
+ 'timestamp': int,
+ 'upload_date': r're:\d{8}',
+ },
+ 'playlist_count': 100,
+ }, {
+ 'note': 'Toplist/Charts sample',
+ 'url': 'http://music.163.com/#/discover/toplist?id=3733003',
+ 'info_dict': {
+ 'id': '3733003',
+ 'title': 're:韩国Melon排行榜周榜 [0-9]{4}-[0-9]{2}-[0-9]{2}',
+ 'description': 'md5:73ec782a612711cadc7872d9c1e134fc',
+ },
+ 'playlist_count': 50,
+ 'skip': 'Blocked outside Mainland China',
+ }]
+
+ def _real_extract(self, url):
+ list_id = self._match_id(url)
+
+ info = self._download_eapi_json(
+ '/v3/playlist/detail', list_id,
+ {'id': list_id, 't': '-1', 'n': '500', 's': '0'},
+ note="Downloading playlist info")
+
+ metainfo = traverse_obj(info, ('playlist', {
+ 'title': ('name', {str}),
+ 'description': ('description', {str}),
+ 'tags': ('tags', ..., {str}),
+ 'uploader': ('creator', 'nickname', {str}),
+ 'uploader_id': ('creator', 'userId', {str_or_none}),
+ 'timestamp': ('updateTime', {self.kilo_or_none}),
+ }))
+ if traverse_obj(info, ('playlist', 'specialType')) == 10:
+ metainfo['title'] = f'{metainfo.get("title")} {strftime_or_none(metainfo.get("timestamp"), "%Y-%m-%d")}'
+
+ return self.playlist_result(self._get_entries(info, ('playlist', 'tracks')), list_id, **metainfo)
+
+
+class NetEaseMusicMvIE(NetEaseMusicBaseIE):
+ IE_NAME = 'netease:mv'
+ IE_DESC = '网易云音乐 - MV'
+ _VALID_URL = r'https?://music\.163\.com/(#/)?mv\?id=(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://music.163.com/#/mv?id=10958064',
+ 'info_dict': {
+ 'id': '10958064',
+ 'ext': 'mp4',
+ 'title': '交换余生',
+ 'description': 'md5:e845872cff28820642a2b02eda428fea',
+ 'creator': '林俊杰',
+ 'upload_date': '20200916',
+ 'thumbnail': r're:http.*\.jpg',
+ 'duration': 364,
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ },
+ }, {
+ 'url': 'http://music.163.com/#/mv?id=415350',
+ 'info_dict': {
+ 'id': '415350',
+ 'ext': 'mp4',
+ 'title': '이럴거면 그러지말지',
+ 'description': '白雅言自作曲唱甜蜜爱情',
+ 'creator': '白娥娟',
+ 'upload_date': '20150520',
+ 'thumbnail': r're:http.*\.jpg',
+ 'duration': 216,
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ },
+ }]
+
+ def _real_extract(self, url):
+ mv_id = self._match_id(url)
+
+ info = self.query_api(
+ f'mv/detail?id={mv_id}&type=mp4', mv_id, 'Downloading mv info')['data']
+
+ formats = [
+ {'url': mv_url, 'ext': 'mp4', 'format_id': f'{brs}p', 'height': int_or_none(brs)}
+ for brs, mv_url in info['brs'].items()
+ ]
+
+ return {
+ 'id': mv_id,
+ 'formats': formats,
+ **traverse_obj(info, {
+ 'title': ('name', {str}),
+ 'description': (('desc', 'briefDesc'), {str}, {lambda x: x or None}),
+ 'creator': ('artistName', {str}),
+ 'upload_date': ('publishTime', {unified_strdate}),
+ 'thumbnail': ('cover', {url_or_none}),
+ 'duration': ('duration', {self.kilo_or_none}),
+ 'view_count': ('playCount', {int_or_none}),
+ 'like_count': ('likeCount', {int_or_none}),
+ 'comment_count': ('commentCount', {int_or_none}),
+ }, get_all=False),
+ }
+
+
+class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
+ IE_NAME = 'netease:program'
+ IE_DESC = '网易云音乐 - 电台节目'
+ _VALID_URL = r'https?://music\.163\.com/(#/?)program\?id=(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'http://music.163.com/#/program?id=10109055',
+ 'info_dict': {
+ 'id': '32593346',
+ 'ext': 'mp3',
+ 'title': '不丹足球背后的故事',
+ 'description': '喜马拉雅人的足球梦 ...',
+ 'creator': '大话西藏',
+ 'timestamp': 1434179287,
+ 'upload_date': '20150613',
+ 'thumbnail': r're:http.*\.jpg',
+ 'duration': 900,
+ },
+ }, {
+ 'note': 'This program has accompanying songs.',
+ 'url': 'http://music.163.com/#/program?id=10141022',
+ 'info_dict': {
+ 'id': '10141022',
+ 'title': '滚滚电台的有声节目',
+ 'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
+ 'creator': '滚滚电台ORZ',
+ 'timestamp': 1434450733,
+ 'upload_date': '20150616',
+ 'thumbnail': r're:http.*\.jpg',
+ },
+ 'playlist_count': 4,
+ }, {
+ 'note': 'This program has accompanying songs.',
+ 'url': 'http://music.163.com/#/program?id=10141022',
+ 'info_dict': {
+ 'id': '32647209',
+ 'ext': 'mp3',
+ 'title': '滚滚电台的有声节目',
+ 'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
+ 'creator': '滚滚电台ORZ',
+ 'timestamp': 1434450733,
+ 'upload_date': '20150616',
+ 'thumbnail': r're:http.*\.jpg',
+ 'duration': 1104,
+ },
+ 'params': {
+ 'noplaylist': True
+ },
+ }]
+
+ def _real_extract(self, url):
+ program_id = self._match_id(url)
+
+ info = self.query_api(
+ f'dj/program/detail?id={program_id}', program_id, note='Downloading program info')['program']
+
+ metainfo = traverse_obj(info, {
+ 'title': ('name', {str}),
+ 'description': ('description', {str}),
+ 'creator': ('dj', 'brand', {str}),
+ 'thumbnail': ('coverUrl', {url_or_none}),
+ 'timestamp': ('createTime', {self.kilo_or_none}),
+ })
+
+ if not self._yes_playlist(info['songs'] and program_id, info['mainSong']['id']):
+ formats = self.extract_formats(info['mainSong'])
+
+ return {
+ 'id': str(info['mainSong']['id']),
+ 'formats': formats,
+ 'duration': traverse_obj(info, ('mainSong', 'duration', {self.kilo_or_none})),
+ **metainfo,
+ }
+
+ songs = traverse_obj(info, (('mainSong', ('songs', ...)),))
+ return self.playlist_result(self._get_entries(songs), program_id, **metainfo)
+
+
+class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE):
+ IE_NAME = 'netease:djradio'
+ IE_DESC = '网易云音乐 - 电台'
+ _VALID_URL = r'https?://music\.163\.com/(#/)?djradio\?id=(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://music.163.com/#/djradio?id=42',
+ 'info_dict': {
+ 'id': '42',
+ 'title': '声音蔓延',
+ 'description': 'md5:c7381ebd7989f9f367668a5aee7d5f08'
+ },
+ 'playlist_mincount': 40,
+ }
+ _PAGE_SIZE = 1000
+
+ def _real_extract(self, url):
+ dj_id = self._match_id(url)
+
+ metainfo = {}
+ entries = []
+ for offset in itertools.count(start=0, step=self._PAGE_SIZE):
+ info = self.query_api(
+ f'dj/program/byradio?asc=false&limit={self._PAGE_SIZE}&radioId={dj_id}&offset={offset}',
+ dj_id, note=f'Downloading dj programs - {offset}')
+
+ entries.extend(self.url_result(
+ f'http://music.163.com/#/program?id={program["id"]}', NetEaseMusicProgramIE,
+ program['id'], program.get('name')) for program in info['programs'])
+ if not metainfo:
+ metainfo = traverse_obj(info, ('programs', 0, 'radio', {
+ 'title': ('name', {str}),
+ 'description': ('desc', {str}),
+ }))
+
+ if not info['more']:
+ break
+
+ return self.playlist_result(entries, dj_id, **metainfo)
diff --git a/yt_dlp/extractor/netverse.py b/yt_dlp/extractor/netverse.py
new file mode 100644
index 0000000..ef53e15
--- /dev/null
+++ b/yt_dlp/extractor/netverse.py
@@ -0,0 +1,281 @@
+import itertools
+
+from .common import InfoExtractor, SearchInfoExtractor
+from .dailymotion import DailymotionIE
+from ..utils import smuggle_url, traverse_obj
+
+
+class NetverseBaseIE(InfoExtractor):
+ _ENDPOINTS = {
+ 'watch': 'watchvideo',
+ 'video': 'watchvideo',
+ 'webseries': 'webseries',
+ 'season': 'webseason_videos',
+ }
+
+ def _call_api(self, slug, endpoint, query={}, season_id='', display_id=None):
+ return self._download_json(
+ f'https://api.netverse.id/medias/api/v2/{self._ENDPOINTS[endpoint]}/{slug}/{season_id}',
+ display_id or slug, query=query)
+
+ def _get_comments(self, video_id):
+ last_page_number = None
+ for i in itertools.count(1):
+ comment_data = self._download_json(
+ f'https://api.netverse.id/mediadetails/api/v3/videos/comments/{video_id}',
+ video_id, data=b'', fatal=False, query={'page': i},
+ note=f'Downloading JSON comment metadata page {i}') or {}
+ yield from traverse_obj(comment_data, ('response', 'comments', 'data', ..., {
+ 'id': '_id',
+ 'text': 'comment',
+ 'author_id': 'customer_id',
+ 'author': ('customer', 'name'),
+ 'author_thumbnail': ('customer', 'profile_picture'),
+ }))
+
+ if not last_page_number:
+ last_page_number = traverse_obj(comment_data, ('response', 'comments', 'last_page'))
+ if i >= (last_page_number or 0):
+ break
+
+
+class NetverseIE(NetverseBaseIE):
+ _VALID_URL = r'https?://(?:\w+\.)?netverse\.id/(?P<type>watch|video)/(?P<display_id>[^/?#&]+)'
+ _TESTS = [{
+ # Watch video
+ 'url': 'https://www.netverse.id/watch/waktu-indonesia-bercanda-edisi-spesial-lebaran-2016',
+ 'info_dict': {
+ 'id': 'k4yhqUwINAGtmHx3NkL',
+ 'title': 'Waktu Indonesia Bercanda - Edisi Spesial Lebaran 2016',
+ 'ext': 'mp4',
+ 'season': 'Season 2016',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080',
+ 'episode_number': 22,
+ 'episode': 'Episode 22',
+ 'uploader_id': 'x2ir3vq',
+ 'age_limit': 0,
+ 'tags': [],
+ 'view_count': int,
+ 'display_id': 'waktu-indonesia-bercanda-edisi-spesial-lebaran-2016',
+ 'duration': 2990,
+ 'upload_date': '20210722',
+ 'timestamp': 1626919804,
+ 'like_count': int,
+ 'uploader': 'Net Prime',
+ }
+ }, {
+ # series
+ 'url': 'https://www.netverse.id/watch/jadoo-seorang-model',
+ 'info_dict': {
+ 'id': 'x88izwc',
+ 'title': 'Jadoo Seorang Model',
+ 'ext': 'mp4',
+ 'season': 'Season 2',
+ 'description': 'md5:8a74f70812cca267e19ee0635f0af835',
+ 'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080',
+ 'episode_number': 2,
+ 'episode': 'Episode 2',
+ 'view_count': int,
+ 'like_count': int,
+ 'display_id': 'jadoo-seorang-model',
+ 'uploader_id': 'x2ir3vq',
+ 'duration': 635,
+ 'timestamp': 1646372927,
+ 'tags': ['PG069497-hellojadooseason2eps2'],
+ 'upload_date': '20220304',
+ 'uploader': 'Net Prime',
+ 'age_limit': 0,
+ },
+ 'skip': 'video get Geo-blocked for some country'
+ }, {
+ # non www host
+ 'url': 'https://netverse.id/watch/tetangga-baru',
+ 'info_dict': {
+ 'id': 'k4CNGz7V0HJ7vfwZbXy',
+ 'ext': 'mp4',
+ 'title': 'Tetangga Baru',
+ 'season': 'Season 1',
+ 'description': 'md5:23fcf70e97d461d3029d25d59b2ccfb9',
+ 'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080',
+ 'episode_number': 1,
+ 'episode': 'Episode 1',
+ 'timestamp': 1624538169,
+ 'view_count': int,
+ 'upload_date': '20210624',
+ 'age_limit': 0,
+ 'uploader_id': 'x2ir3vq',
+ 'like_count': int,
+ 'uploader': 'Net Prime',
+ 'tags': ['PG008534', 'tetangga', 'Baru'],
+ 'display_id': 'tetangga-baru',
+ 'duration': 1406,
+ },
+ }, {
+ # /video url
+ 'url': 'https://www.netverse.id/video/pg067482-hellojadoo-season1',
+ 'title': 'Namaku Choi Jadoo',
+ 'info_dict': {
+ 'id': 'x887jzz',
+ 'ext': 'mp4',
+ 'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080',
+ 'season': 'Season 1',
+ 'episode_number': 1,
+ 'description': 'md5:d4f627b3e7a3f9acdc55f6cdd5ea41d5',
+ 'title': 'Namaku Choi Jadoo',
+ 'episode': 'Episode 1',
+ 'age_limit': 0,
+ 'like_count': int,
+ 'view_count': int,
+ 'tags': ['PG067482', 'PG067482-HelloJadoo-season1'],
+ 'duration': 780,
+ 'display_id': 'pg067482-hellojadoo-season1',
+ 'uploader_id': 'x2ir3vq',
+ 'uploader': 'Net Prime',
+ 'timestamp': 1645764984,
+ 'upload_date': '20220225',
+ },
+ 'skip': 'This video get Geo-blocked for some country'
+ }, {
+ # video with comments
+ 'url': 'https://netverse.id/video/episode-1-season-2016-ok-food',
+ 'info_dict': {
+ 'id': 'k6hetBPiQMljSxxvAy7',
+ 'ext': 'mp4',
+ 'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080',
+ 'display_id': 'episode-1-season-2016-ok-food',
+ 'like_count': int,
+ 'description': '',
+ 'duration': 1471,
+ 'age_limit': 0,
+ 'timestamp': 1642405848,
+ 'episode_number': 1,
+ 'season': 'Season 2016',
+ 'uploader_id': 'x2ir3vq',
+ 'title': 'Episode 1 - Season 2016 - Ok Food',
+ 'upload_date': '20220117',
+ 'tags': [],
+ 'view_count': int,
+ 'episode': 'Episode 1',
+ 'uploader': 'Net Prime',
+ 'comment_count': int,
+ },
+ 'params': {
+ 'getcomments': True
+ }
+ }, {
+ # video with multiple page comment
+ 'url': 'https://netverse.id/video/match-island-eps-1-fix',
+ 'info_dict': {
+ 'id': 'x8aznjc',
+ 'ext': 'mp4',
+ 'like_count': int,
+ 'tags': ['Match-Island', 'Pd00111'],
+ 'display_id': 'match-island-eps-1-fix',
+ 'view_count': int,
+ 'episode': 'Episode 1',
+ 'uploader': 'Net Prime',
+ 'duration': 4070,
+ 'timestamp': 1653068165,
+ 'description': 'md5:e9cf3b480ad18e9c33b999e3494f223f',
+ 'age_limit': 0,
+ 'title': 'Welcome To Match Island',
+ 'upload_date': '20220520',
+ 'episode_number': 1,
+ 'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080',
+ 'uploader_id': 'x2ir3vq',
+ 'season': 'Season 1',
+ 'comment_count': int,
+ },
+ 'params': {
+ 'getcomments': True
+ }
+ }]
+
+ def _real_extract(self, url):
+ display_id, sites_type = self._match_valid_url(url).group('display_id', 'type')
+ program_json = self._call_api(display_id, sites_type)
+ videos = program_json['response']['videos']
+
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': DailymotionIE.ie_key(),
+ 'url': smuggle_url(videos['dailymotion_url'], {'query': {'embedder': 'https://www.netverse.id'}}),
+ 'display_id': display_id,
+ 'title': videos.get('title'),
+ 'season': videos.get('season_name'),
+ 'thumbnail': traverse_obj(videos, ('program_detail', 'thumbnail_image')),
+ 'description': traverse_obj(videos, ('program_detail', 'description')),
+ 'episode_number': videos.get('episode_order'),
+ '__post_extractor': self.extract_comments(display_id),
+ }
+
+
+class NetversePlaylistIE(NetverseBaseIE):
+ _VALID_URL = r'https?://(?:\w+\.)?netverse\.id/(?P<type>webseries)/(?P<display_id>[^/?#&]+)'
+ _TESTS = [{
+ # multiple season
+ 'url': 'https://netverse.id/webseries/tetangga-masa-gitu',
+ 'info_dict': {
+ 'id': 'tetangga-masa-gitu',
+ 'title': 'Tetangga Masa Gitu',
+ },
+ 'playlist_count': 519,
+ }, {
+ # single season
+ 'url': 'https://netverse.id/webseries/kelas-internasional',
+ 'info_dict': {
+ 'id': 'kelas-internasional',
+ 'title': 'Kelas Internasional',
+ },
+ 'playlist_count': 203,
+ }]
+
+ def parse_playlist(self, json_data, playlist_id):
+ slug_sample = traverse_obj(json_data, ('related', 'data', ..., 'slug'))[0]
+ for season in traverse_obj(json_data, ('seasons', ..., 'id')):
+ playlist_json = self._call_api(
+ slug_sample, 'season', display_id=playlist_id, season_id=season)
+
+ for current_page in range(playlist_json['response']['season_list']['last_page']):
+ playlist_json = self._call_api(slug_sample, 'season', query={'page': current_page + 1},
+ season_id=season, display_id=playlist_id)
+ for slug in traverse_obj(playlist_json, ('response', ..., 'data', ..., 'slug')):
+ yield self.url_result(f'https://www.netverse.id/video/{slug}', NetverseIE)
+
+ def _real_extract(self, url):
+ playlist_id, sites_type = self._match_valid_url(url).group('display_id', 'type')
+ playlist_data = self._call_api(playlist_id, sites_type)
+
+ return self.playlist_result(
+ self.parse_playlist(playlist_data['response'], playlist_id),
+ traverse_obj(playlist_data, ('response', 'webseries_info', 'slug')),
+ traverse_obj(playlist_data, ('response', 'webseries_info', 'title')))
+
+
+class NetverseSearchIE(SearchInfoExtractor):
+ _SEARCH_KEY = 'netsearch'
+
+ _TESTS = [{
+ 'url': 'netsearch10:tetangga',
+ 'info_dict': {
+ 'id': 'tetangga',
+ 'title': 'tetangga',
+ },
+ 'playlist_count': 10,
+ }]
+
+ def _search_results(self, query):
+ last_page = None
+ for i in itertools.count(1):
+ search_data = self._download_json(
+ 'https://api.netverse.id/search/elastic/search', query,
+ query={'q': query, 'page': i}, note=f'Downloading page {i}')
+
+ videos = traverse_obj(search_data, ('response', 'data', ...))
+ for video in videos:
+ yield self.url_result(f'https://netverse.id/video/{video["slug"]}', NetverseIE)
+
+ last_page = last_page or traverse_obj(search_data, ('response', 'lastpage'))
+ if not videos or i >= (last_page or 0):
+ break
diff --git a/yt_dlp/extractor/netzkino.py b/yt_dlp/extractor/netzkino.py
new file mode 100644
index 0000000..e9422ee
--- /dev/null
+++ b/yt_dlp/extractor/netzkino.py
@@ -0,0 +1,85 @@
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ int_or_none,
+ js_to_json,
+ parse_iso8601,
+)
+
+
+class NetzkinoIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:www\.)?netzkino\.de/\#!/[^/]+/(?P<id>[^/]+)'
+
+ _TESTS = [{
+ 'url': 'https://www.netzkino.de/#!/scifikino/rakete-zum-mond',
+ 'md5': '92a3f8b76f8d7220acce5377ea5d4873',
+ 'info_dict': {
+ 'id': 'rakete-zum-mond',
+ 'ext': 'mp4',
+ 'title': 'Rakete zum Mond \u2013 Jules Verne',
+ 'description': 'md5:f0a8024479618ddbfa450ff48ffa6c60',
+ 'upload_date': '20120813',
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'timestamp': 1344858571,
+ 'age_limit': 12,
+ },
+ 'params': {
+ 'skip_download': 'Download only works from Germany',
+ }
+ }, {
+ 'url': 'https://www.netzkino.de/#!/filme/dr-jekyll-mrs-hyde-2',
+ 'md5': 'c7728b2dadd04ff6727814847a51ef03',
+ 'info_dict': {
+ 'id': 'dr-jekyll-mrs-hyde-2',
+ 'ext': 'mp4',
+ 'title': 'Dr. Jekyll & Mrs. Hyde 2',
+ 'description': 'md5:c2e9626ebd02de0a794b95407045d186',
+ 'upload_date': '20190130',
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'timestamp': 1548849437,
+ 'age_limit': 18,
+ },
+ 'params': {
+ 'skip_download': 'Download only works from Germany',
+ }
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id')
+
+ api_url = 'https://api.netzkino.de.simplecache.net/capi-2.0a/movies/%s.json?d=www' % video_id
+ info = self._download_json(api_url, video_id)
+ custom_fields = info['custom_fields']
+
+ production_js = self._download_webpage(
+ 'http://www.netzkino.de/beta/dist/production.min.js', video_id,
+ note='Downloading player code')
+ avo_js = self._search_regex(
+ r'var urlTemplate=(\{.*?"\})',
+ production_js, 'URL templates')
+ templates = self._parse_json(
+ avo_js, video_id, transform_source=js_to_json)
+
+ suffix = {
+ 'hds': '.mp4/manifest.f4m',
+ 'hls': '.mp4/master.m3u8',
+ 'pmd': '.mp4',
+ }
+ film_fn = custom_fields['Streaming'][0]
+ formats = [{
+ 'format_id': key,
+ 'ext': 'mp4',
+ 'url': tpl.replace('{}', film_fn) + suffix[key],
+ } for key, tpl in templates.items()]
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': info['title'],
+ 'age_limit': int_or_none(custom_fields.get('FSK')[0]),
+ 'timestamp': parse_iso8601(info.get('date'), delimiter=' '),
+ 'description': clean_html(info.get('content')),
+ 'thumbnail': info.get('thumbnail'),
+ }
diff --git a/yt_dlp/extractor/newgrounds.py b/yt_dlp/extractor/newgrounds.py
new file mode 100644
index 0000000..67e52ef
--- /dev/null
+++ b/yt_dlp/extractor/newgrounds.py
@@ -0,0 +1,311 @@
+import functools
+import re
+
+from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ ExtractorError,
+ OnDemandPagedList,
+ clean_html,
+ extract_attributes,
+ get_element_by_id,
+ int_or_none,
+ parse_count,
+ parse_duration,
+ unified_timestamp,
+ url_or_none,
+ urlencode_postdata,
+ urljoin,
+)
+from ..utils.traversal import traverse_obj
+
+
+class NewgroundsIE(InfoExtractor):
+ _NETRC_MACHINE = 'newgrounds'
+ _VALID_URL = r'https?://(?:www\.)?newgrounds\.com/(?:audio/listen|portal/view)/(?P<id>\d+)(?:/format/flash)?'
+ _TESTS = [{
+ 'url': 'https://www.newgrounds.com/audio/listen/549479',
+ 'md5': 'fe6033d297591288fa1c1f780386f07a',
+ 'info_dict': {
+ 'id': '549479',
+ 'ext': 'mp3',
+ 'title': 'B7 - BusMode',
+ 'uploader': 'Burn7',
+ 'timestamp': 1378892945,
+ 'upload_date': '20130911',
+ 'duration': 143,
+ 'view_count': int,
+ 'description': 'md5:b8b3c2958875189f07d8e313462e8c4f',
+ 'age_limit': 0,
+ 'thumbnail': r're:^https://aicon\.ngfiles\.com/549/549479\.png',
+ },
+ }, {
+ 'url': 'https://www.newgrounds.com/portal/view/1',
+ 'md5': 'fbfb40e2dc765a7e830cb251d370d981',
+ 'info_dict': {
+ 'id': '1',
+ 'ext': 'mp4',
+ 'title': 'Scrotum 1',
+ 'uploader': 'Brian-Beaton',
+ 'timestamp': 955078533,
+ 'upload_date': '20000407',
+ 'view_count': int,
+ 'description': 'Scrotum plays "catch."',
+ 'age_limit': 17,
+ 'thumbnail': r're:^https://picon\.ngfiles\.com/0/flash_1_card\.png',
+ },
+ }, {
+ # source format unavailable, additional mp4 formats
+ 'url': 'http://www.newgrounds.com/portal/view/689400',
+ 'info_dict': {
+ 'id': '689400',
+ 'ext': 'mp4',
+ 'title': 'ZTV News Episode 8',
+ 'uploader': 'ZONE-SAMA',
+ 'timestamp': 1487983183,
+ 'upload_date': '20170225',
+ 'view_count': int,
+ 'description': 'md5:aff9b330ec2e78ed93b1ad6d017accc6',
+ 'age_limit': 17,
+ 'thumbnail': r're:^https://picon\.ngfiles\.com/689000/flash_689400_card\.png',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.newgrounds.com/portal/view/297383',
+ 'md5': '2c11f5fd8cb6b433a63c89ba3141436c',
+ 'info_dict': {
+ 'id': '297383',
+ 'ext': 'mp4',
+ 'title': 'Metal Gear Awesome',
+ 'uploader': 'Egoraptor',
+ 'timestamp': 1140681292,
+ 'upload_date': '20060223',
+ 'view_count': int,
+ 'description': 'md5:9246c181614e23754571995104da92e0',
+ 'age_limit': 13,
+ 'thumbnail': r're:^https://picon\.ngfiles\.com/297000/flash_297383_card\.png',
+ }
+ }, {
+ 'url': 'https://www.newgrounds.com/portal/view/297383/format/flash',
+ 'md5': '5d05585a9a0caca059f5abfbd3865524',
+ 'info_dict': {
+ 'id': '297383',
+ 'ext': 'swf',
+ 'title': 'Metal Gear Awesome',
+ 'description': 'Metal Gear Awesome',
+ 'uploader': 'Egoraptor',
+ 'upload_date': '20060223',
+ 'timestamp': 1140681292,
+ 'view_count': int,
+ 'age_limit': 13,
+ 'thumbnail': r're:^https://picon\.ngfiles\.com/297000/flash_297383_card\.png',
+ }
+ }, {
+ 'url': 'https://www.newgrounds.com/portal/view/823109',
+ 'info_dict': {
+ 'id': '823109',
+ 'ext': 'mp4',
+ 'title': 'Rouge Futa Fleshlight Fuck',
+ 'description': 'I made a fleshlight model and I wanted to use it in an animation. Based on a video by CDNaturally.',
+ 'uploader': 'DefaultUser12',
+ 'upload_date': '20211122',
+ 'timestamp': 1637611540,
+ 'view_count': int,
+ 'age_limit': 18,
+ 'thumbnail': r're:^https://picon\.ngfiles\.com/823000/flash_823109_card\.png',
+ }
+ }]
+ _AGE_LIMIT = {
+ 'e': 0,
+ 't': 13,
+ 'm': 17,
+ 'a': 18,
+ }
+ _LOGIN_URL = 'https://www.newgrounds.com/passport'
+
+ def _perform_login(self, username, password):
+ login_webpage = self._download_webpage(self._LOGIN_URL, None, 'Downloading login page')
+ login_url = urljoin(self._LOGIN_URL, self._search_regex(
+ r'<form action="([^"]+)"', login_webpage, 'login endpoint', default=None))
+ result = self._download_json(login_url, None, 'Logging in', headers={
+ 'Accept': 'application/json',
+ 'Referer': self._LOGIN_URL,
+ 'X-Requested-With': 'XMLHttpRequest'
+ }, data=urlencode_postdata({
+ **self._hidden_inputs(login_webpage),
+ 'username': username,
+ 'password': password,
+ }))
+ if errors := traverse_obj(result, ('errors', ..., {str})):
+ raise ExtractorError(', '.join(errors) or 'Unknown Error', expected=True)
+
+ def _real_extract(self, url):
+ media_id = self._match_id(url)
+ try:
+ webpage = self._download_webpage(url, media_id)
+ except ExtractorError as error:
+ if isinstance(error.cause, HTTPError) and error.cause.status == 401:
+ self.raise_login_required()
+ raise
+
+ media_url_string = self._search_regex(
+ r'embedController\(\[{"url"\s*:\s*("[^"]+"),', webpage, 'media url', default=None)
+ if media_url_string:
+ uploader = None
+ formats = [{
+ 'url': self._parse_json(media_url_string, media_id),
+ 'format_id': 'source',
+ 'quality': 1,
+ }]
+
+ else:
+ json_video = self._download_json(f'https://www.newgrounds.com/portal/video/{media_id}', media_id, headers={
+ 'Accept': 'application/json',
+ 'Referer': url,
+ 'X-Requested-With': 'XMLHttpRequest'
+ })
+
+ formats = []
+ uploader = traverse_obj(json_video, ('author', {str}))
+ for format_id, sources in traverse_obj(json_video, ('sources', {dict.items}, ...)):
+ quality = int_or_none(format_id[:-1])
+ formats.extend({
+ 'format_id': format_id,
+ 'quality': quality,
+ 'url': url,
+ } for url in traverse_obj(sources, (..., 'src', {url_or_none})))
+
+ if not uploader:
+ uploader = self._html_search_regex(
+ (r'(?s)<h4[^>]*>(.+?)</h4>.*?<em>\s*(?:Author|Artist)\s*</em>',
+ r'(?:Author|Writer)\s*<a[^>]+>([^<]+)'), webpage, 'uploader',
+ fatal=False)
+
+ if len(formats) == 1:
+ formats[0]['filesize'] = int_or_none(self._html_search_regex(
+ r'"filesize"\s*:\s*["\']?([\d]+)["\']?,', webpage, 'filesize', default=None))
+
+ video_type_description = self._html_search_regex(
+ r'"description"\s*:\s*["\']?([^"\']+)["\']?,', webpage, 'media type', default=None)
+ if video_type_description == 'Audio File':
+ formats[0]['vcodec'] = 'none'
+
+ self._check_formats(formats, media_id)
+ return {
+ 'id': media_id,
+ 'title': self._html_extract_title(webpage),
+ 'uploader': uploader,
+ 'timestamp': unified_timestamp(self._search_regex(
+ r'itemprop="(?:uploadDate|datePublished)"\s+content="([^"]+)"',
+ webpage, 'timestamp', default=None)),
+ 'duration': parse_duration(self._html_search_regex(
+ r'"duration"\s*:\s*["\']?(\d+)["\']?', webpage, 'duration', default=None)),
+ 'formats': formats,
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'description': (
+ clean_html(get_element_by_id('author_comments', webpage))
+ or self._og_search_description(webpage)),
+ 'age_limit': self._AGE_LIMIT.get(self._html_search_regex(
+ r'<h2\s+class=["\']rated-([etma])["\']', webpage, 'age_limit', default='e')),
+ 'view_count': parse_count(self._html_search_regex(
+ r'(?s)<dt>\s*(?:Views|Listens)\s*</dt>\s*<dd>([\d\.,]+)</dd>',
+ webpage, 'view count', default=None)),
+ }
+
+
+class NewgroundsPlaylistIE(InfoExtractor):
+ IE_NAME = 'Newgrounds:playlist'
+ _VALID_URL = r'https?://(?:www\.)?newgrounds\.com/(?:collection|[^/]+/search/[^/]+)/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.newgrounds.com/collection/cats',
+ 'info_dict': {
+ 'id': 'cats',
+ 'title': 'Cats',
+ },
+ 'playlist_mincount': 45,
+ }, {
+ 'url': 'https://www.newgrounds.com/collection/dogs',
+ 'info_dict': {
+ 'id': 'dogs',
+ 'title': 'Dogs',
+ },
+ 'playlist_mincount': 26,
+ }, {
+ 'url': 'http://www.newgrounds.com/audio/search/title/cats',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ title = self._html_extract_title(webpage, default=None)
+
+ # cut left menu
+ webpage = self._search_regex(
+ r'(?s)<div[^>]+\bclass=["\']column wide(.+)',
+ webpage, 'wide column', default=webpage)
+
+ entries = []
+ for a, path, media_id in re.findall(
+ r'(<a[^>]+\bhref=["\'][^"\']+((?:portal/view|audio/listen)/(\d+))[^>]+>)',
+ webpage):
+ a_class = extract_attributes(a).get('class')
+ if a_class not in ('item-portalsubmission', 'item-audiosubmission'):
+ continue
+ entries.append(
+ self.url_result(
+ f'https://www.newgrounds.com/{path}',
+ ie=NewgroundsIE.ie_key(), video_id=media_id))
+
+ return self.playlist_result(entries, playlist_id, title)
+
+
+class NewgroundsUserIE(InfoExtractor):
+ IE_NAME = 'Newgrounds:user'
+ _VALID_URL = r'https?://(?P<id>[^\.]+)\.newgrounds\.com/(?:movies|audio)/?(?:[#?]|$)'
+ _TESTS = [{
+ 'url': 'https://burn7.newgrounds.com/audio',
+ 'info_dict': {
+ 'id': 'burn7',
+ },
+ 'playlist_mincount': 150,
+ }, {
+ 'url': 'https://burn7.newgrounds.com/movies',
+ 'info_dict': {
+ 'id': 'burn7',
+ },
+ 'playlist_mincount': 2,
+ }, {
+ 'url': 'https://brian-beaton.newgrounds.com/movies',
+ 'info_dict': {
+ 'id': 'brian-beaton',
+ },
+ 'playlist_mincount': 10,
+ }]
+ _PAGE_SIZE = 30
+
+ def _fetch_page(self, channel_id, url, page):
+ page += 1
+ posts_info = self._download_json(
+ f'{url}?page={page}', channel_id,
+ note=f'Downloading page {page}', headers={
+ 'Accept': 'application/json, text/javascript, */*; q = 0.01',
+ 'X-Requested-With': 'XMLHttpRequest',
+ })
+ for post in traverse_obj(posts_info, ('items', ..., ..., {str})):
+ path, media_id = self._search_regex(
+ r'<a[^>]+\bhref=["\'][^"\']+((?:portal/view|audio/listen)/(\d+))[^>]+>',
+ post, 'url', group=(1, 2))
+ yield self.url_result(f'https://www.newgrounds.com/{path}', NewgroundsIE.ie_key(), media_id)
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+
+ entries = OnDemandPagedList(functools.partial(
+ self._fetch_page, channel_id, url), self._PAGE_SIZE)
+
+ return self.playlist_result(entries, channel_id)
diff --git a/yt_dlp/extractor/newspicks.py b/yt_dlp/extractor/newspicks.py
new file mode 100644
index 0000000..4a1cb0a
--- /dev/null
+++ b/yt_dlp/extractor/newspicks.py
@@ -0,0 +1,53 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+
+class NewsPicksIE(InfoExtractor):
+ _VALID_URL = r'https?://newspicks\.com/movie-series/(?P<channel_id>\d+)\?movieId=(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'https://newspicks.com/movie-series/11?movieId=1813',
+ 'info_dict': {
+ 'id': '1813',
+ 'title': '日本の課題を破壊せよ【ゲスト:成田悠輔】',
+ 'description': 'md5:09397aad46d6ded6487ff13f138acadf',
+ 'channel': 'HORIE ONE',
+ 'channel_id': '11',
+ 'release_date': '20220117',
+ 'thumbnail': r're:https://.+jpg',
+ 'ext': 'mp4',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id, channel_id = self._match_valid_url(url).group('id', 'channel_id')
+ webpage = self._download_webpage(url, video_id)
+ entries = self._parse_html5_media_entries(
+ url, webpage.replace('movie-for-pc', 'movie'), video_id, 'hls')
+ if not entries:
+ raise ExtractorError('No HTML5 media elements found')
+ info = entries[0]
+
+ title = self._html_search_meta('og:title', webpage, fatal=False)
+ description = self._html_search_meta(
+ ('og:description', 'twitter:title'), webpage, fatal=False)
+ channel = self._html_search_regex(
+ r'value="11".+?<div\s+class="title">(.+?)</div', webpage, 'channel name', fatal=False)
+ if not title or not channel:
+ title, channel = re.split(r'\s*|\s*', self._html_extract_title(webpage))
+
+ release_date = self._search_regex(
+ r'<span\s+class="on-air-date">\s*(\d+)年(\d+)月(\d+)日\s*</span>',
+ webpage, 'release date', fatal=False, group=(1, 2, 3))
+
+ info.update({
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'channel': channel,
+ 'channel_id': channel_id,
+ 'release_date': ('%04d%02d%02d' % tuple(map(int, release_date))) if release_date else None,
+ })
+ return info
diff --git a/yt_dlp/extractor/newsy.py b/yt_dlp/extractor/newsy.py
new file mode 100644
index 0000000..a5a7b16
--- /dev/null
+++ b/yt_dlp/extractor/newsy.py
@@ -0,0 +1,47 @@
+from .common import InfoExtractor
+from ..utils import (
+ js_to_json,
+ merge_dicts,
+)
+
+
+class NewsyIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?newsy\.com/stories/(?P<id>[^/?#$&]+)'
+
+ _TESTS = [{
+ 'url': 'https://www.newsy.com/stories/nft-trend-leads-to-fraudulent-art-auctions/',
+ 'info_dict': {
+ 'id': '609d65125b086c24fb529312',
+ 'ext': 'mp4',
+ 'title': 'NFT Art Auctions Have A Piracy Problem',
+ 'description': 'md5:971e52ab8bc97e50305475cde8284c83',
+ 'display_id': 'nft-trend-leads-to-fraudulent-art-auctions',
+ 'timestamp': 1621339200,
+ 'duration': 339630,
+ 'thumbnail': 'https://cdn.newsy.com/images/videos/x/1620927824_xyrrP4.jpg',
+ 'upload_date': '20210518'
+ },
+ 'params': {'skip_download': True}
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ data_json = self._parse_json(self._html_search_regex(
+ r'data-video-player\s?=\s?"({[^"]+})">', webpage, 'data'), display_id, js_to_json)
+ ld_json = self._search_json_ld(webpage, display_id, fatal=False)
+
+ formats, subtitles = [], {}
+ if data_json.get('stream'):
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(data_json['stream'], display_id)
+ formats.extend(fmts)
+ subtitles = self._merge_subtitles(subtitles, subs)
+ return merge_dicts(ld_json, {
+ 'id': data_json['id'],
+ 'display_id': display_id,
+ 'title': data_json.get('headline'),
+ 'duration': data_json.get('duration'),
+ 'thumbnail': data_json.get('image'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ })
diff --git a/yt_dlp/extractor/nextmedia.py b/yt_dlp/extractor/nextmedia.py
new file mode 100644
index 0000000..871d3e6
--- /dev/null
+++ b/yt_dlp/extractor/nextmedia.py
@@ -0,0 +1,237 @@
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+from ..utils import (
+ clean_html,
+ get_element_by_class,
+ int_or_none,
+ parse_iso8601,
+ remove_start,
+ unified_timestamp,
+)
+
+
+class NextMediaIE(InfoExtractor):
+ IE_DESC = '蘋果日報'
+ _VALID_URL = r'https?://hk\.apple\.nextmedia\.com/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://hk.apple.nextmedia.com/realtime/news/20141108/53109199',
+ 'md5': 'dff9fad7009311c421176d1ac90bfe4f',
+ 'info_dict': {
+ 'id': '53109199',
+ 'ext': 'mp4',
+ 'title': '【佔領金鐘】50外國領事議員撐場 讚學生勇敢香港有希望',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'md5:28222b9912b6665a21011b034c70fcc7',
+ 'timestamp': 1415456273,
+ 'upload_date': '20141108',
+ }
+ }]
+
+ _URL_PATTERN = r'\{ url: \'(.+)\' \}'
+
+ def _real_extract(self, url):
+ news_id = self._match_id(url)
+ page = self._download_webpage(url, news_id)
+ return self._extract_from_nextmedia_page(news_id, url, page)
+
+ def _extract_from_nextmedia_page(self, news_id, url, page):
+ redirection_url = self._search_regex(
+ r'window\.location\.href\s*=\s*([\'"])(?P<url>(?!\1).+)\1',
+ page, 'redirection URL', default=None, group='url')
+ if redirection_url:
+ return self.url_result(compat_urlparse.urljoin(url, redirection_url))
+
+ title = self._fetch_title(page)
+ video_url = self._search_regex(self._URL_PATTERN, page, 'video url')
+
+ attrs = {
+ 'id': news_id,
+ 'title': title,
+ 'url': video_url, # ext can be inferred from url
+ 'thumbnail': self._fetch_thumbnail(page),
+ 'description': self._fetch_description(page),
+ }
+
+ timestamp = self._fetch_timestamp(page)
+ if timestamp:
+ attrs['timestamp'] = timestamp
+ else:
+ attrs['upload_date'] = self._fetch_upload_date(url)
+
+ return attrs
+
+ def _fetch_title(self, page):
+ return self._og_search_title(page)
+
+ def _fetch_thumbnail(self, page):
+ return self._og_search_thumbnail(page)
+
+ def _fetch_timestamp(self, page):
+ dateCreated = self._search_regex('"dateCreated":"([^"]+)"', page, 'created time')
+ return parse_iso8601(dateCreated)
+
+ def _fetch_upload_date(self, url):
+ return self._search_regex(self._VALID_URL, url, 'upload date', group='date')
+
+ def _fetch_description(self, page):
+ return self._og_search_property('description', page)
+
+
+class NextMediaActionNewsIE(NextMediaIE): # XXX: Do not subclass from concrete IE
+ IE_DESC = '蘋果日報 - 動新聞'
+ _VALID_URL = r'https?://hk\.dv\.nextmedia\.com/actionnews/[^/]+/(?P<date>\d+)/(?P<id>\d+)/\d+'
+ _TESTS = [{
+ 'url': 'http://hk.dv.nextmedia.com/actionnews/hit/20150121/19009428/20061460',
+ 'md5': '05fce8ffeed7a5e00665d4b7cf0f9201',
+ 'info_dict': {
+ 'id': '19009428',
+ 'ext': 'mp4',
+ 'title': '【壹週刊】細10年男友偷食 50歲邵美琪再失戀',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'md5:cd802fad1f40fd9ea178c1e2af02d659',
+ 'timestamp': 1421791200,
+ 'upload_date': '20150120',
+ }
+ }]
+
+ def _real_extract(self, url):
+ news_id = self._match_id(url)
+ actionnews_page = self._download_webpage(url, news_id)
+ article_url = self._og_search_url(actionnews_page)
+ article_page = self._download_webpage(article_url, news_id)
+ return self._extract_from_nextmedia_page(news_id, url, article_page)
+
+
+class AppleDailyIE(NextMediaIE): # XXX: Do not subclass from concrete IE
+ IE_DESC = '臺灣蘋果日報'
+ _VALID_URL = r'https?://(www|ent)\.appledaily\.com\.tw/[^/]+/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
+ _TESTS = [{
+ 'url': 'http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694',
+ 'md5': 'a843ab23d150977cc55ef94f1e2c1e4d',
+ 'info_dict': {
+ 'id': '36354694',
+ 'ext': 'mp4',
+ 'title': '周亭羽走過摩鐵陰霾2男陪吃 九把刀孤寒看醫生',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'md5:2acd430e59956dc47cd7f67cb3c003f4',
+ 'upload_date': '20150128',
+ }
+ }, {
+ 'url': 'http://www.appledaily.com.tw/realtimenews/article/strange/20150128/550549/%E4%B8%8D%E6%BB%BF%E8%A2%AB%E8%B8%A9%E8%85%B3%E3%80%80%E5%B1%B1%E6%9D%B1%E5%85%A9%E5%A4%A7%E5%AA%BD%E4%B8%80%E8%B7%AF%E6%89%93%E4%B8%8B%E8%BB%8A',
+ 'md5': '86b4e9132d158279c7883822d94ccc49',
+ 'info_dict': {
+ 'id': '550549',
+ 'ext': 'mp4',
+ 'title': '不滿被踩腳 山東兩大媽一路打下車',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'md5:175b4260c1d7c085993474217e4ab1b4',
+ 'upload_date': '20150128',
+ }
+ }, {
+ 'url': 'http://www.appledaily.com.tw/animation/realtimenews/new/20150128/5003671',
+ 'md5': '03df296d95dedc2d5886debbb80cb43f',
+ 'info_dict': {
+ 'id': '5003671',
+ 'ext': 'mp4',
+ 'title': '20正妹熱舞 《刀龍傳說Online》火辣上市',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'md5:23c0aac567dc08c9c16a3161a2c2e3cd',
+ 'upload_date': '20150128',
+ },
+ 'skip': 'redirect to http://www.appledaily.com.tw/animation/',
+ }, {
+ # No thumbnail
+ 'url': 'http://www.appledaily.com.tw/animation/realtimenews/new/20150128/5003673/',
+ 'md5': 'b06182cd386ea7bc6115ec7ff0f72aeb',
+ 'info_dict': {
+ 'id': '5003673',
+ 'ext': 'mp4',
+ 'title': '半夜尿尿 好像會看到___',
+ 'description': 'md5:61d2da7fe117fede148706cdb85ac066',
+ 'upload_date': '20150128',
+ },
+ 'expected_warnings': [
+ 'video thumbnail',
+ ],
+ 'skip': 'redirect to http://www.appledaily.com.tw/animation/',
+ }, {
+ 'url': 'http://www.appledaily.com.tw/appledaily/article/supplement/20140417/35770334/',
+ 'md5': 'eaa20e6b9df418c912d7f5dec2ba734d',
+ 'info_dict': {
+ 'id': '35770334',
+ 'ext': 'mp4',
+ 'title': '咖啡占卜測 XU裝熟指數',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'md5:7b859991a6a4fedbdf3dd3b66545c748',
+ 'upload_date': '20140417',
+ },
+ }, {
+ 'url': 'http://www.appledaily.com.tw/actionnews/appledaily/7/20161003/960588/',
+ 'only_matching': True,
+ }, {
+ # Redirected from http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694
+ 'url': 'http://ent.appledaily.com.tw/section/article/headline/20150128/36354694',
+ 'only_matching': True,
+ }]
+
+ _URL_PATTERN = r'\{url: \'(.+)\'\}'
+
+ def _fetch_title(self, page):
+ return (self._html_search_regex(r'<h1 id="h1">([^<>]+)</h1>', page, 'news title', default=None)
+ or self._html_search_meta('description', page, 'news title'))
+
+ def _fetch_thumbnail(self, page):
+ return self._html_search_regex(r"setInitialImage\(\'([^']+)'\)", page, 'video thumbnail', fatal=False)
+
+ def _fetch_timestamp(self, page):
+ return None
+
+ def _fetch_description(self, page):
+ return self._html_search_meta('description', page, 'news description')
+
+
+class NextTVIE(InfoExtractor):
+ _WORKING = False
+ _ENABLED = None # XXX: pass through to GenericIE
+ IE_DESC = '壹電視'
+ _VALID_URL = r'https?://(?:www\.)?nexttv\.com\.tw/(?:[^/]+/)+(?P<id>\d+)'
+
+ _TEST = {
+ 'url': 'http://www.nexttv.com.tw/news/realtime/politics/11779671',
+ 'info_dict': {
+ 'id': '11779671',
+ 'ext': 'mp4',
+ 'title': '「超收稅」近4千億! 藍議員籲發消費券',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1484825400,
+ 'upload_date': '20170119',
+ 'view_count': int,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_regex(
+ r'<h1[^>]*>([^<]+)</h1>', webpage, 'title')
+
+ data = self._hidden_inputs(webpage)
+
+ video_url = data['ntt-vod-src-detailview']
+
+ date_str = get_element_by_class('date', webpage)
+ timestamp = unified_timestamp(date_str + '+0800') if date_str else None
+
+ view_count = int_or_none(remove_start(
+ clean_html(get_element_by_class('click', webpage)), '點閱:'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'url': video_url,
+ 'thumbnail': data.get('ntt-vod-img-src'),
+ 'timestamp': timestamp,
+ 'view_count': view_count,
+ }
diff --git a/yt_dlp/extractor/nexx.py b/yt_dlp/extractor/nexx.py
new file mode 100644
index 0000000..b4874c8
--- /dev/null
+++ b/yt_dlp/extractor/nexx.py
@@ -0,0 +1,525 @@
+import hashlib
+import random
+import re
+import time
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ parse_duration,
+ srt_subtitles_timecode,
+ traverse_obj,
+ try_get,
+ urlencode_postdata,
+)
+
+
+class NexxIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ (?:
+ https?://api\.nexx(?:\.cloud|cdn\.com)/v3(?:\.\d)?/(?P<domain_id>\d+)/videos/byid/|
+ nexx:(?:(?P<domain_id_s>\d+):)?|
+ https?://arc\.nexx\.cloud/api/video/
+ )
+ (?P<id>\d+)
+ '''
+ _TESTS = [{
+ # movie
+ 'url': 'https://api.nexx.cloud/v3/748/videos/byid/128907',
+ 'md5': '31899fd683de49ad46f4ee67e53e83fe',
+ 'info_dict': {
+ 'id': '128907',
+ 'ext': 'mp4',
+ 'title': 'Stiftung Warentest',
+ 'alt_title': 'Wie ein Test abläuft',
+ 'description': 'md5:d1ddb1ef63de721132abd38639cc2fd2',
+ 'creator': 'SPIEGEL TV',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 2509,
+ 'timestamp': 1384264416,
+ 'upload_date': '20131112',
+ },
+ 'skip': 'Spiegel nexx CDNs are now disabled'
+ }, {
+ # episode with captions
+ 'url': 'https://api.nexx.cloud/v3.1/741/videos/byid/1701834',
+ 'info_dict': {
+ 'id': '1701834',
+ 'ext': 'mp4',
+ 'title': 'Mein Leben mit \'nem TikTok E-Boy 😤',
+ 'alt_title': 'Mein Leben mit \'nem TikTok E-Boy 😤',
+ 'description': 'md5:f84f395a881fd143f952c892deab528d',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 770,
+ 'timestamp': 1595600027,
+ 'upload_date': '20200724',
+ 'episode_number': 2,
+ 'season_number': 2,
+ 'episode': 'Episode 2',
+ 'season': 'Season 2',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'nexx:741:1269984',
+ 'md5': 'd5f14e14b592501e51addd5abef95a7f',
+ 'info_dict': {
+ 'id': '1269984',
+ 'ext': 'mp4',
+ 'title': '1 TAG ohne KLO... wortwörtlich! ?',
+ 'alt_title': '1 TAG ohne KLO... wortwörtlich! ?',
+ 'description': 'md5:2016393a31991a900946432ccdd09a6f',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 607,
+ 'timestamp': 1518614955,
+ 'upload_date': '20180214',
+ },
+ }, {
+ # free cdn from http://www.spiegel.de/video/eifel-zoo-aufregung-um-ausgebrochene-raubtiere-video-99018031.html
+ 'url': 'nexx:747:1533779',
+ 'md5': '6bf6883912b82b7069fb86c2297e9893',
+ 'info_dict': {
+ 'id': '1533779',
+ 'ext': 'mp4',
+ 'title': 'Aufregung um ausgebrochene Raubtiere',
+ 'alt_title': 'Eifel-Zoo',
+ 'description': 'md5:f21375c91c74ad741dcb164c427999d2',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 111,
+ 'timestamp': 1527874460,
+ 'upload_date': '20180601',
+ },
+ 'skip': 'Spiegel nexx CDNs are now disabled'
+ }, {
+ 'url': 'https://api.nexxcdn.com/v3/748/videos/byid/128907',
+ 'only_matching': True,
+ }, {
+ 'url': 'nexx:748:128907',
+ 'only_matching': True,
+ }, {
+ 'url': 'nexx:128907',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://arc.nexx.cloud/api/video/128907.json',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_domain_id(webpage):
+ mobj = re.search(
+ r'<script\b[^>]+\bsrc=["\'](?:https?:)?//(?:require|arc)\.nexx(?:\.cloud|cdn\.com)/(?:sdk/)?(?P<id>\d+)',
+ webpage)
+ return mobj.group('id') if mobj else None
+
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ # Reference:
+ # 1. https://nx-s.akamaized.net/files/201510/44.pdf
+
+ entries = []
+
+ # JavaScript Integration
+ domain_id = NexxIE._extract_domain_id(webpage)
+ if domain_id:
+ for video_id in re.findall(
+ r'(?is)onPLAYReady.+?_play\.(?:init|(?:control\.)?addPlayer)\s*\(.+?\s*,\s*["\']?(\d+)',
+ webpage):
+ entries.append(
+ 'https://api.nexx.cloud/v3/%s/videos/byid/%s'
+ % (domain_id, video_id))
+
+ # TODO: support more embed formats
+
+ return entries
+
+ def _handle_error(self, response):
+ if traverse_obj(response, ('metadata', 'notice'), expected_type=str):
+ self.report_warning('%s said: %s' % (self.IE_NAME, response['metadata']['notice']))
+ status = int_or_none(try_get(
+ response, lambda x: x['metadata']['status']) or 200)
+ if 200 <= status < 300:
+ return
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, response['metadata']['errorhint']),
+ expected=True)
+
+ def _call_api(self, domain_id, path, video_id, data=None, headers={}):
+ headers['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8'
+ result = self._download_json(
+ 'https://api.nexx.cloud/v3/%s/%s' % (domain_id, path), video_id,
+ 'Downloading %s JSON' % path, data=urlencode_postdata(data),
+ headers=headers)
+ self._handle_error(result)
+ return result['result']
+
+ def _extract_free_formats(self, video, video_id):
+ stream_data = video['streamdata']
+ cdn = stream_data['cdnType']
+ assert cdn == 'free'
+
+ hash = video['general']['hash']
+
+ ps = compat_str(stream_data['originalDomain'])
+ if stream_data['applyFolderHierarchy'] == 1:
+ s = ('%04d' % int(video_id))[::-1]
+ ps += '/%s/%s' % (s[0:2], s[2:4])
+ ps += '/%s/%s_' % (video_id, hash)
+
+ t = 'http://%s' + ps
+ fd = stream_data['azureFileDistribution'].split(',')
+ cdn_provider = stream_data['cdnProvider']
+
+ def p0(p):
+ return '_%s' % p if stream_data['applyAzureStructure'] == 1 else ''
+
+ formats = []
+ if cdn_provider == 'ak':
+ t += ','
+ for i in fd:
+ p = i.split(':')
+ t += p[1] + p0(int(p[0])) + ','
+ t += '.mp4.csmil/master.%s'
+ elif cdn_provider == 'ce':
+ k = t.split('/')
+ h = k.pop()
+ http_base = t = '/'.join(k)
+ http_base = http_base % stream_data['cdnPathHTTP']
+ t += '/asset.ism/manifest.%s?dcp_ver=aos4&videostream='
+ for i in fd:
+ p = i.split(':')
+ tbr = int(p[0])
+ filename = '%s%s%s.mp4' % (h, p[1], p0(tbr))
+ f = {
+ 'url': http_base + '/' + filename,
+ 'format_id': '%s-http-%d' % (cdn, tbr),
+ 'tbr': tbr,
+ }
+ width_height = p[1].split('x')
+ if len(width_height) == 2:
+ f.update({
+ 'width': int_or_none(width_height[0]),
+ 'height': int_or_none(width_height[1]),
+ })
+ formats.append(f)
+ a = filename + ':%s' % (tbr * 1000)
+ t += a + ','
+ t = t[:-1] + '&audiostream=' + a.split(':')[0]
+ else:
+ assert False
+
+ if cdn_provider == 'ce':
+ formats.extend(self._extract_mpd_formats(
+ t % (stream_data['cdnPathDASH'], 'mpd'), video_id,
+ mpd_id='%s-dash' % cdn, fatal=False))
+ formats.extend(self._extract_m3u8_formats(
+ t % (stream_data['cdnPathHLS'], 'm3u8'), video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='%s-hls' % cdn, fatal=False))
+
+ return formats
+
+ def _extract_3q_formats(self, video, video_id):
+ stream_data = video['streamdata']
+ cdn = stream_data['cdnType']
+ assert cdn == '3q'
+
+ q_acc, q_prefix, q_locator, q_hash = stream_data['qAccount'], stream_data['qPrefix'], stream_data['qLocator'], stream_data['qHash']
+ protection_key = traverse_obj(
+ video, ('protectiondata', 'key'), expected_type=str)
+
+ def get_cdn_shield_base(shield_type=''):
+ for secure in ('', 's'):
+ cdn_shield = stream_data.get('cdnShield%sHTTP%s' % (shield_type, secure.upper()))
+ if cdn_shield:
+ return 'http%s://%s' % (secure, cdn_shield)
+ return f'http://sdn-global-{"prog" if shield_type.lower() == "prog" else "streaming"}-cache.3qsdn.com/' + (f's/{protection_key}/' if protection_key else '')
+
+ stream_base = get_cdn_shield_base()
+
+ formats = []
+ formats.extend(self._extract_m3u8_formats(
+ f'{stream_base}{q_acc}/files/{q_prefix}/{q_locator}/{q_acc}-{stream_data.get("qHEVCHash") or q_hash}.ism/manifest.m3u8',
+ video_id, 'mp4', m3u8_id=f'{cdn}-hls', fatal=False))
+ formats.extend(self._extract_mpd_formats(
+ f'{stream_base}{q_acc}/files/{q_prefix}/{q_locator}/{q_acc}-{q_hash}.ism/manifest.mpd',
+ video_id, mpd_id=f'{cdn}-dash', fatal=False))
+
+ progressive_base = get_cdn_shield_base('Prog')
+ q_references = stream_data.get('qReferences') or ''
+ fds = q_references.split(',')
+ for fd in fds:
+ ss = fd.split(':')
+ if len(ss) != 3:
+ continue
+ tbr = int_or_none(ss[1], scale=1000)
+ formats.append({
+ 'url': f'{progressive_base}{q_acc}/uploads/{q_acc}-{ss[2]}.webm',
+ 'format_id': f'{cdn}-{ss[0]}{"-%s" % tbr if tbr else ""}',
+ 'tbr': tbr,
+ })
+
+ azure_file_distribution = stream_data.get('azureFileDistribution') or ''
+ fds = azure_file_distribution.split(',')
+ for fd in fds:
+ ss = fd.split(':')
+ if len(ss) != 3:
+ continue
+ tbr = int_or_none(ss[0])
+ width, height = ss[1].split('x') if len(ss[1].split('x')) == 2 else (None, None)
+ f = {
+ 'url': f'{progressive_base}{q_acc}/files/{q_prefix}/{q_locator}/{ss[2]}.mp4',
+ 'format_id': f'{cdn}-http-{"-%s" % tbr if tbr else ""}',
+ 'tbr': tbr,
+ 'width': int_or_none(width),
+ 'height': int_or_none(height),
+ }
+ formats.append(f)
+
+ return formats
+
+ def _extract_azure_formats(self, video, video_id):
+ stream_data = video['streamdata']
+ cdn = stream_data['cdnType']
+ assert cdn == 'azure'
+
+ azure_locator = stream_data['azureLocator']
+
+ def get_cdn_shield_base(shield_type='', static=False):
+ for secure in ('', 's'):
+ cdn_shield = stream_data.get('cdnShield%sHTTP%s' % (shield_type, secure.upper()))
+ if cdn_shield:
+ return 'http%s://%s' % (secure, cdn_shield)
+ else:
+ if 'fb' in stream_data['azureAccount']:
+ prefix = 'df' if static else 'f'
+ else:
+ prefix = 'd' if static else 'p'
+ account = int(stream_data['azureAccount'].replace('nexxplayplus', '').replace('nexxplayfb', ''))
+ return 'http://nx-%s%02d.akamaized.net/' % (prefix, account)
+
+ language = video['general'].get('language_raw') or ''
+
+ azure_stream_base = get_cdn_shield_base()
+ is_ml = ',' in language
+ azure_manifest_url = '%s%s/%s_src%s.ism/Manifest' % (
+ azure_stream_base, azure_locator, video_id, ('_manifest' if is_ml else '')) + '%s'
+
+ protection_token = try_get(
+ video, lambda x: x['protectiondata']['token'], compat_str)
+ if protection_token:
+ azure_manifest_url += '?hdnts=%s' % protection_token
+
+ formats = self._extract_m3u8_formats(
+ azure_manifest_url % '(format=m3u8-aapl)',
+ video_id, 'mp4', 'm3u8_native',
+ m3u8_id='%s-hls' % cdn, fatal=False)
+ formats.extend(self._extract_mpd_formats(
+ azure_manifest_url % '(format=mpd-time-csf)',
+ video_id, mpd_id='%s-dash' % cdn, fatal=False))
+ formats.extend(self._extract_ism_formats(
+ azure_manifest_url % '', video_id, ism_id='%s-mss' % cdn, fatal=False))
+
+ azure_progressive_base = get_cdn_shield_base('Prog', True)
+ azure_file_distribution = stream_data.get('azureFileDistribution')
+ if azure_file_distribution:
+ fds = azure_file_distribution.split(',')
+ if fds:
+ for fd in fds:
+ ss = fd.split(':')
+ if len(ss) == 2:
+ tbr = int_or_none(ss[0])
+ if tbr:
+ f = {
+ 'url': '%s%s/%s_src_%s_%d.mp4' % (
+ azure_progressive_base, azure_locator, video_id, ss[1], tbr),
+ 'format_id': '%s-http-%d' % (cdn, tbr),
+ 'tbr': tbr,
+ }
+ width_height = ss[1].split('x')
+ if len(width_height) == 2:
+ f.update({
+ 'width': int_or_none(width_height[0]),
+ 'height': int_or_none(width_height[1]),
+ })
+ formats.append(f)
+
+ return formats
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ domain_id = mobj.group('domain_id') or mobj.group('domain_id_s')
+ video_id = mobj.group('id')
+
+ video = None
+
+ def find_video(result):
+ if isinstance(result, dict):
+ return result
+ elif isinstance(result, list):
+ vid = int(video_id)
+ for v in result:
+ if try_get(v, lambda x: x['general']['ID'], int) == vid:
+ return v
+ return None
+
+ response = self._download_json(
+ 'https://arc.nexx.cloud/api/video/%s.json' % video_id,
+ video_id, fatal=False)
+ if response and isinstance(response, dict):
+ result = response.get('result')
+ if result:
+ video = find_video(result)
+
+ # not all videos work via arc, e.g. nexx:741:1269984
+ if not video:
+ # Reverse engineered from JS code (see getDeviceID function)
+ device_id = '%d:%d:%d%d' % (
+ random.randint(1, 4), int(time.time()),
+ random.randint(1e4, 99999), random.randint(1, 9))
+
+ result = self._call_api(domain_id, 'session/init', video_id, data={
+ 'nxp_devh': device_id,
+ 'nxp_userh': '',
+ 'precid': '0',
+ 'playlicense': '0',
+ 'screenx': '1920',
+ 'screeny': '1080',
+ 'playerversion': '6.0.00',
+ 'gateway': 'html5',
+ 'adGateway': '',
+ 'explicitlanguage': 'en-US',
+ 'addTextTemplates': '1',
+ 'addDomainData': '1',
+ 'addAdModel': '1',
+ }, headers={
+ 'X-Request-Enable-Auth-Fallback': '1',
+ })
+
+ cid = result['general']['cid']
+
+ # As described in [1] X-Request-Token generation algorithm is
+ # as follows:
+ # md5( operation + domain_id + domain_secret )
+ # where domain_secret is a static value that will be given by nexx.tv
+ # as per [1]. Here is how this "secret" is generated (reversed
+ # from _play._factory.data.getDomainData function, search for
+ # domaintoken or enableAPIAccess). So it's actually not static
+ # and not that much of a secret.
+ # 1. https://nexxtvstorage.blob.core.windows.net/files/201610/27.pdf
+ secret = result['device']['domaintoken'][int(device_id[0]):]
+ secret = secret[0:len(secret) - int(device_id[-1])]
+
+ op = 'byid'
+
+ # Reversed from JS code for _play.api.call function (search for
+ # X-Request-Token)
+ request_token = hashlib.md5(
+ ''.join((op, domain_id, secret)).encode('utf-8')).hexdigest()
+
+ result = self._call_api(
+ domain_id, 'videos/%s/%s' % (op, video_id), video_id, data={
+ 'additionalfields': 'language,channel,format,licenseby,slug,fileversion,episode,season',
+ 'addInteractionOptions': '1',
+ 'addStatusDetails': '1',
+ 'addStreamDetails': '1',
+ 'addFeatures': '1',
+ # Caption format selection doesn't seem to be enforced?
+ 'addCaptions': 'vtt',
+ 'addScenes': '1',
+ 'addChapters': '1',
+ 'addHotSpots': '1',
+ 'addConnectedMedia': 'persons',
+ 'addBumpers': '1',
+ }, headers={
+ 'X-Request-CID': cid,
+ 'X-Request-Token': request_token,
+ })
+ video = find_video(result)
+
+ general = video['general']
+ title = general['title']
+
+ cdn = video['streamdata']['cdnType']
+
+ if cdn == 'azure':
+ formats = self._extract_azure_formats(video, video_id)
+ elif cdn == 'free':
+ formats = self._extract_free_formats(video, video_id)
+ elif cdn == '3q':
+ formats = self._extract_3q_formats(video, video_id)
+ else:
+ self.raise_no_formats(f'{cdn} formats are currently not supported', video_id)
+
+ subtitles = {}
+ for sub in video.get('captiondata') or []:
+ if sub.get('data'):
+ subtitles.setdefault(sub.get('language', 'en'), []).append({
+ 'ext': 'srt',
+ 'data': '\n\n'.join(
+ f'{i + 1}\n{srt_subtitles_timecode(line["fromms"] / 1000)} --> {srt_subtitles_timecode(line["toms"] / 1000)}\n{line["caption"]}'
+ for i, line in enumerate(sub['data'])),
+ 'name': sub.get('language_long') or sub.get('title')
+ })
+ elif sub.get('url'):
+ subtitles.setdefault(sub.get('language', 'en'), []).append({
+ 'url': sub['url'],
+ 'ext': sub.get('format'),
+ 'name': sub.get('language_long') or sub.get('title')
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'alt_title': general.get('subtitle'),
+ 'description': general.get('description'),
+ 'release_year': int_or_none(general.get('year')),
+ 'creator': general.get('studio') or general.get('studio_adref') or None,
+ 'thumbnail': try_get(
+ video, lambda x: x['imagedata']['thumb'], compat_str),
+ 'duration': parse_duration(general.get('runtime')),
+ 'timestamp': int_or_none(general.get('uploaded')),
+ 'episode_number': traverse_obj(
+ video, (('episodedata', 'general'), 'episode'), expected_type=int, get_all=False),
+ 'season_number': traverse_obj(
+ video, (('episodedata', 'general'), 'season'), expected_type=int, get_all=False),
+ 'cast': traverse_obj(video, ('connectedmedia', ..., 'title'), expected_type=str),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+
+class NexxEmbedIE(InfoExtractor):
+ _VALID_URL = r'https?://embed\.nexx(?:\.cloud|cdn\.com)/\d+/(?:video/)?(?P<id>[^/?#&]+)'
+ # Reference. https://nx-s.akamaized.net/files/201510/44.pdf
+ _EMBED_REGEX = [r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//embed\.nexx(?:\.cloud|cdn\.com)/\d+/(?:(?!\1).)+)\1']
+ _TESTS = [{
+ 'url': 'http://embed.nexx.cloud/748/KC1614647Z27Y7T?autoplay=1',
+ 'md5': '16746bfc28c42049492385c989b26c4a',
+ 'info_dict': {
+ 'id': '161464',
+ 'ext': 'mp4',
+ 'title': 'Nervenkitzel Achterbahn',
+ 'alt_title': 'Karussellbauer in Deutschland',
+ 'description': 'md5:ffe7b1cc59a01f585e0569949aef73cc',
+ 'creator': 'SPIEGEL TV',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 2761,
+ 'timestamp': 1394021479,
+ 'upload_date': '20140305',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://embed.nexx.cloud/11888/video/DSRTO7UVOX06S7',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ embed_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, embed_id)
+
+ return self.url_result(NexxIE._extract_url(webpage), ie=NexxIE.ie_key())
diff --git a/yt_dlp/extractor/nfb.py b/yt_dlp/extractor/nfb.py
new file mode 100644
index 0000000..6f78728
--- /dev/null
+++ b/yt_dlp/extractor/nfb.py
@@ -0,0 +1,300 @@
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ join_nonempty,
+ merge_dicts,
+ parse_count,
+ url_or_none,
+ urljoin,
+)
+from ..utils.traversal import traverse_obj
+
+
+class NFBBaseIE(InfoExtractor):
+ _VALID_URL_BASE = r'https?://(?:www\.)?(?P<site>nfb|onf)\.ca'
+ _GEO_COUNTRIES = ['CA']
+
+ def _extract_ep_data(self, webpage, video_id, fatal=False):
+ return self._search_json(
+ r'const\s+episodesData\s*=', webpage, 'episode data', video_id,
+ contains_pattern=r'\[\s*{(?s:.+)}\s*\]', fatal=fatal) or []
+
+ def _extract_ep_info(self, data, video_id, slug=None):
+ info = traverse_obj(data, (lambda _, v: video_id in v['embed_url'], {
+ 'description': ('description', {str}),
+ 'thumbnail': ('thumbnail_url', {url_or_none}),
+ 'uploader': ('data_layer', 'episodeMaker', {str}),
+ 'release_year': ('data_layer', 'episodeYear', {int_or_none}),
+ 'episode': ('data_layer', 'episodeTitle', {str}),
+ 'season': ('data_layer', 'seasonTitle', {str}),
+ 'season_number': ('data_layer', 'seasonTitle', {parse_count}),
+ 'series': ('data_layer', 'seriesTitle', {str}),
+ }), get_all=False)
+
+ return {
+ **info,
+ 'id': video_id,
+ 'title': join_nonempty('series', 'episode', from_dict=info, delim=' - '),
+ 'episode_number': int_or_none(self._search_regex(
+ r'[/-]e(?:pisode)?-?(\d+)(?:[/-]|$)', slug or video_id, 'episode number', default=None)),
+ }
+
+
+class NFBIE(NFBBaseIE):
+ IE_NAME = 'nfb'
+ IE_DESC = 'nfb.ca and onf.ca films and episodes'
+ _VALID_URL = [
+ rf'{NFBBaseIE._VALID_URL_BASE}/(?P<type>film)/(?P<id>[^/?#&]+)',
+ rf'{NFBBaseIE._VALID_URL_BASE}/(?P<type>series?)/(?P<id>[^/?#&]+/s(?:ea|ai)son\d+/episode\d+)',
+ ]
+ _TESTS = [{
+ 'note': 'NFB film',
+ 'url': 'https://www.nfb.ca/film/trafficopter/',
+ 'info_dict': {
+ 'id': 'trafficopter',
+ 'ext': 'mp4',
+ 'title': 'Trafficopter',
+ 'description': 'md5:060228455eb85cf88785c41656776bc0',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'Barrie Howells',
+ 'release_year': 1972,
+ 'duration': 600.0,
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'note': 'ONF film',
+ 'url': 'https://www.onf.ca/film/mal-du-siecle/',
+ 'info_dict': {
+ 'id': 'mal-du-siecle',
+ 'ext': 'mp4',
+ 'title': 'Le mal du siècle',
+ 'description': 'md5:1abf774d77569ebe603419f2d344102b',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'Catherine Lepage',
+ 'release_year': 2019,
+ 'duration': 300.0,
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'note': 'NFB episode with English title',
+ 'url': 'https://www.nfb.ca/series/true-north-inside-the-rise-of-toronto-basketball/season1/episode9/',
+ 'info_dict': {
+ 'id': 'true-north-episode9-true-north-finale-making-it',
+ 'ext': 'mp4',
+ 'title': 'True North: Inside the Rise of Toronto Basketball - Finale: Making It',
+ 'description': 'We catch up with each player in the midst of their journey as they reflect on their road ahead.',
+ 'series': 'True North: Inside the Rise of Toronto Basketball',
+ 'release_year': 2018,
+ 'season': 'Season 1',
+ 'season_number': 1,
+ 'episode': 'Finale: Making It',
+ 'episode_number': 9,
+ 'uploader': 'Ryan Sidhoo',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'note': 'ONF episode with French title',
+ 'url': 'https://www.onf.ca/serie/direction-nord-la-montee-du-basketball-a-toronto/saison1/episode9/',
+ 'info_dict': {
+ 'id': 'direction-nord-episode-9',
+ 'ext': 'mp4',
+ 'title': 'Direction nord – La montée du basketball à Toronto - Finale : Réussir',
+ 'description': 'md5:349a57419b71432b97bf6083d92b029d',
+ 'series': 'Direction nord – La montée du basketball à Toronto',
+ 'release_year': 2018,
+ 'season': 'Saison 1',
+ 'season_number': 1,
+ 'episode': 'Finale : Réussir',
+ 'episode_number': 9,
+ 'uploader': 'Ryan Sidhoo',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'note': 'NFB episode with French title (needs geo-bypass)',
+ 'url': 'https://www.nfb.ca/series/etoile-du-nord/saison1/episode1/',
+ 'info_dict': {
+ 'id': 'etoile-du-nord-episode-1-lobservation',
+ 'ext': 'mp4',
+ 'title': 'Étoile du Nord - L\'observation',
+ 'description': 'md5:161a4617260dee3de70f509b2c9dd21b',
+ 'series': 'Étoile du Nord',
+ 'release_year': 2023,
+ 'season': 'Saison 1',
+ 'season_number': 1,
+ 'episode': 'L\'observation',
+ 'episode_number': 1,
+ 'uploader': 'Patrick Bossé',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'note': 'ONF episode with English title (needs geo-bypass)',
+ 'url': 'https://www.onf.ca/serie/north-star/season1/episode1/',
+ 'info_dict': {
+ 'id': 'north-star-episode-1-observation',
+ 'ext': 'mp4',
+ 'title': 'North Star - Observation',
+ 'description': 'md5:c727f370839d8a817392b9e3f23655c7',
+ 'series': 'North Star',
+ 'release_year': 2023,
+ 'season': 'Season 1',
+ 'season_number': 1,
+ 'episode': 'Observation',
+ 'episode_number': 1,
+ 'uploader': 'Patrick Bossé',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'note': 'NFB episode with /film/ URL and English title (needs geo-bypass)',
+ 'url': 'https://www.nfb.ca/film/north-star-episode-1-observation/',
+ 'info_dict': {
+ 'id': 'north-star-episode-1-observation',
+ 'ext': 'mp4',
+ 'title': 'North Star - Observation',
+ 'description': 'md5:c727f370839d8a817392b9e3f23655c7',
+ 'series': 'North Star',
+ 'release_year': 2023,
+ 'season': 'Season 1',
+ 'season_number': 1,
+ 'episode': 'Observation',
+ 'episode_number': 1,
+ 'uploader': 'Patrick Bossé',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'note': 'ONF episode with /film/ URL and French title (needs geo-bypass)',
+ 'url': 'https://www.onf.ca/film/etoile-du-nord-episode-1-lobservation/',
+ 'info_dict': {
+ 'id': 'etoile-du-nord-episode-1-lobservation',
+ 'ext': 'mp4',
+ 'title': 'Étoile du Nord - L\'observation',
+ 'description': 'md5:161a4617260dee3de70f509b2c9dd21b',
+ 'series': 'Étoile du Nord',
+ 'release_year': 2023,
+ 'season': 'Saison 1',
+ 'season_number': 1,
+ 'episode': 'L\'observation',
+ 'episode_number': 1,
+ 'uploader': 'Patrick Bossé',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'note': 'Season 2 episode w/o episode num in id, extract from json ld',
+ 'url': 'https://www.onf.ca/film/liste-des-choses-qui-existent-saison-2-ours',
+ 'info_dict': {
+ 'id': 'liste-des-choses-qui-existent-saison-2-ours',
+ 'ext': 'mp4',
+ 'title': 'La liste des choses qui existent - L\'ours en peluche',
+ 'description': 'md5:d5e8d8fc5f3a7385a9cf0f509b37e28a',
+ 'series': 'La liste des choses qui existent',
+ 'release_year': 2022,
+ 'season': 'Saison 2',
+ 'season_number': 2,
+ 'episode': 'L\'ours en peluche',
+ 'episode_number': 12,
+ 'uploader': 'Francis Papillon',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'note': 'NFB film /embed/player/ page',
+ 'url': 'https://www.nfb.ca/film/afterlife/embed/player/',
+ 'info_dict': {
+ 'id': 'afterlife',
+ 'ext': 'mp4',
+ 'title': 'Afterlife',
+ 'description': 'md5:84951394f594f1fb1e62d9c43242fdf5',
+ 'release_year': 1978,
+ 'duration': 420.0,
+ 'uploader': 'Ishu Patel',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }]
+
+ def _real_extract(self, url):
+ site, type_, slug = self._match_valid_url(url).group('site', 'type', 'id')
+ # Need to construct the URL since we match /embed/player/ URLs as well
+ webpage, urlh = self._download_webpage_handle(f'https://www.{site}.ca/{type_}/{slug}/', slug)
+ # type_ can change from film to serie(s) after redirect; new slug may have episode number
+ type_, slug = self._match_valid_url(urlh.url).group('type', 'id')
+
+ embed_url = urljoin(f'https://www.{site}.ca', self._html_search_regex(
+ r'<[^>]+\bid=["\']player-iframe["\'][^>]*\bsrc=["\']([^"\']+)', webpage, 'embed url'))
+ video_id = self._match_id(embed_url) # embed url has unique slug
+ player = self._download_webpage(embed_url, video_id, 'Downloading player page')
+ if 'MESSAGE_GEOBLOCKED' in player:
+ self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
+
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ self._html_search_regex(r'source:\s*\'([^\']+)', player, 'm3u8 url'),
+ video_id, 'mp4', m3u8_id='hls')
+
+ if dv_source := self._html_search_regex(r'dvSource:\s*\'([^\']+)', player, 'dv', default=None):
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ dv_source, video_id, 'mp4', m3u8_id='dv', preference=-2, fatal=False)
+ for fmt in fmts:
+ fmt['format_note'] = 'described video'
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+
+ info = {
+ 'id': video_id,
+ 'title': self._html_search_regex(
+ r'<[^>]+\bid=["\']titleHeader["\'][^>]*>\s*<h1[^>]*>\s*([^<]+?)\s*</h1>',
+ webpage, 'title', default=None),
+ 'description': self._html_search_regex(
+ r'<[^>]+\bid=["\']tabSynopsis["\'][^>]*>\s*<p[^>]*>\s*([^<]+)',
+ webpage, 'description', default=None),
+ 'thumbnail': self._html_search_regex(
+ r'poster:\s*\'([^\']+)', player, 'thumbnail', default=None),
+ 'uploader': self._html_search_regex(
+ r'<[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)', webpage, 'uploader', default=None),
+ 'release_year': int_or_none(self._html_search_regex(
+ r'<[^>]+\bitemprop=["\']datePublished["\'][^>]*>([^<]+)',
+ webpage, 'release_year', default=None)),
+ } if type_ == 'film' else self._extract_ep_info(self._extract_ep_data(webpage, video_id, slug), video_id)
+
+ return merge_dicts({
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }, info, self._search_json_ld(webpage, video_id, default={}))
+
+
+class NFBSeriesIE(NFBBaseIE):
+ IE_NAME = 'nfb:series'
+ IE_DESC = 'nfb.ca and onf.ca series'
+ _VALID_URL = rf'{NFBBaseIE._VALID_URL_BASE}/(?P<type>series?)/(?P<id>[^/?#&]+)/?(?:[?#]|$)'
+ _TESTS = [{
+ 'url': 'https://www.nfb.ca/series/true-north-inside-the-rise-of-toronto-basketball/',
+ 'playlist_mincount': 9,
+ 'info_dict': {
+ 'id': 'true-north-inside-the-rise-of-toronto-basketball',
+ },
+ }, {
+ 'url': 'https://www.onf.ca/serie/la-liste-des-choses-qui-existent-serie/',
+ 'playlist_mincount': 26,
+ 'info_dict': {
+ 'id': 'la-liste-des-choses-qui-existent-serie',
+ },
+ }]
+
+ def _entries(self, episodes):
+ for episode in traverse_obj(episodes, lambda _, v: NFBIE.suitable(v['embed_url'])):
+ mobj = NFBIE._match_valid_url(episode['embed_url'])
+ yield self.url_result(
+ mobj[0], NFBIE, **self._extract_ep_info([episode], mobj.group('id')))
+
+ def _real_extract(self, url):
+ site, type_, series_id = self._match_valid_url(url).group('site', 'type', 'id')
+ season_path = 'saison' if type_ == 'serie' else 'season'
+ webpage = self._download_webpage(
+ f'https://www.{site}.ca/{type_}/{series_id}/{season_path}1/episode1', series_id)
+ episodes = self._extract_ep_data(webpage, series_id, fatal=True)
+
+ return self.playlist_result(self._entries(episodes), series_id)
diff --git a/yt_dlp/extractor/nfhsnetwork.py b/yt_dlp/extractor/nfhsnetwork.py
new file mode 100644
index 0000000..febad8f
--- /dev/null
+++ b/yt_dlp/extractor/nfhsnetwork.py
@@ -0,0 +1,141 @@
+from .common import InfoExtractor
+
+
+from ..utils import (
+ try_get,
+ unified_strdate,
+ unified_timestamp
+)
+
+
+class NFHSNetworkIE(InfoExtractor):
+ IE_NAME = 'NFHSNetwork'
+ _VALID_URL = r'https?://(?:www\.)?nfhsnetwork\.com/events/[\w-]+/(?P<id>(?:gam|evt|dd|)?[\w\d]{0,10})'
+ _TESTS = [{
+ # Auto-generated two-team sport (pixellot)
+ 'url': 'https://www.nfhsnetwork.com/events/rockford-high-school-rockford-mi/gamcf7e54cfbc',
+ 'info_dict': {
+ 'id': 'gamcf7e54cfbc',
+ 'ext': 'mp4',
+ 'title': 'Rockford vs Spring Lake - Girls Varsity Lacrosse 03/27/2021',
+ 'uploader': 'MHSAA - Michigan: Rockford High School, Rockford, MI',
+ 'uploader_id': 'cd2622cf76',
+ 'uploader_url': 'https://www.nfhsnetwork.com/schools/rockford-high-school-rockford-mi',
+ 'location': 'Rockford, Michigan',
+ 'timestamp': 1616859000,
+ 'upload_date': '20210327'
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ }, {
+ # Non-sport activity with description
+ 'url': 'https://www.nfhsnetwork.com/events/limon-high-school-limon-co/evt4a30e3726c',
+ 'info_dict': {
+ 'id': 'evt4a30e3726c',
+ 'ext': 'mp4',
+ 'title': 'Drama Performance Limon High School vs. Limon High School - 12/13/2020',
+ 'description': 'Join the broadcast of the Limon High School Musical Performance at 2 PM.',
+ 'uploader': 'CHSAA: Limon High School, Limon, CO',
+ 'uploader_id': '7d2d121332',
+ 'uploader_url': 'https://www.nfhsnetwork.com/schools/limon-high-school-limon-co',
+ 'location': 'Limon, Colorado',
+ 'timestamp': 1607893200,
+ 'upload_date': '20201213'
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ }, {
+ # Postseason game
+ 'url': 'https://www.nfhsnetwork.com/events/nfhs-network-special-events/dd8de71d45',
+ 'info_dict': {
+ 'id': 'dd8de71d45',
+ 'ext': 'mp4',
+ 'title': '2015 UA Holiday Classic Tournament: National Division - 12/26/2015',
+ 'uploader': 'SoCal Sports Productions',
+ 'uploader_id': '063dba0150',
+ 'uploader_url': 'https://www.nfhsnetwork.com/affiliates/socal-sports-productions',
+ 'location': 'San Diego, California',
+ 'timestamp': 1451187000,
+ 'upload_date': '20151226'
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ }, {
+ # Video with no broadcasts object
+ 'url': 'https://www.nfhsnetwork.com/events/wiaa-wi/9aa2f92f82',
+ 'info_dict': {
+ 'id': '9aa2f92f82',
+ 'ext': 'mp4',
+ 'title': 'Competitive Equity - 01/21/2015',
+ 'description': 'Committee members discuss points of their research regarding a competitive equity plan',
+ 'uploader': 'WIAA - Wisconsin: Wisconsin Interscholastic Athletic Association',
+ 'uploader_id': 'a49f7d1002',
+ 'uploader_url': 'https://www.nfhsnetwork.com/associations/wiaa-wi',
+ 'location': 'Stevens Point, Wisconsin',
+ 'timestamp': 1421856000,
+ 'upload_date': '20150121'
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ }
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ data = self._download_json(
+ 'https://cfunity.nfhsnetwork.com/v2/game_or_event/' + video_id,
+ video_id)
+ publisher = data.get('publishers')[0] # always exists
+ broadcast = (publisher.get('broadcasts') or publisher.get('vods'))[0] # some (older) videos don't have a broadcasts object
+ uploader = publisher.get('formatted_name') or publisher.get('name')
+ uploaderID = publisher.get('publisher_key')
+ pubType = publisher.get('type')
+ uploaderPrefix = (
+ "schools" if pubType == "school"
+ else "associations" if "association" in pubType
+ else "affiliates" if (pubType == "publisher" or pubType == "affiliate")
+ else "schools")
+ uploaderPage = 'https://www.nfhsnetwork.com/%s/%s' % (uploaderPrefix, publisher.get('slug'))
+ location = '%s, %s' % (data.get('city'), data.get('state_name'))
+ description = broadcast.get('description')
+ isLive = broadcast.get('on_air') or broadcast.get('status') == 'on_air' or False
+
+ timestamp = unified_timestamp(data.get('local_start_time'))
+ upload_date = unified_strdate(data.get('local_start_time'))
+
+ title = (
+ self._og_search_title(webpage)
+ or self._html_search_regex(r'<h1 class="sr-hidden">(.*?)</h1>', webpage, 'title'))
+ title = title.split('|')[0].strip()
+
+ video_type = 'broadcasts' if isLive else 'vods'
+ key = broadcast.get('key') if isLive else try_get(publisher, lambda x: x['vods'][0]['key'])
+ m3u8_url = self._download_json(
+ 'https://cfunity.nfhsnetwork.com/v2/%s/%s/url' % (video_type, key),
+ video_id).get('video_url')
+
+ formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', live=isLive)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'description': description,
+ 'timestamp': timestamp,
+ 'uploader': uploader,
+ 'uploader_id': uploaderID,
+ 'uploader_url': uploaderPage,
+ 'location': location,
+ 'upload_date': upload_date,
+ 'is_live': isLive,
+ '_format_sort_fields': ('res', 'tbr'),
+ }
diff --git a/yt_dlp/extractor/nfl.py b/yt_dlp/extractor/nfl.py
new file mode 100644
index 0000000..3f83cd2
--- /dev/null
+++ b/yt_dlp/extractor/nfl.py
@@ -0,0 +1,373 @@
+import base64
+import json
+import re
+import time
+import uuid
+
+from .anvato import AnvatoIE
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ clean_html,
+ determine_ext,
+ get_element_by_class,
+ traverse_obj,
+ urlencode_postdata,
+)
+
+
+class NFLBaseIE(InfoExtractor):
+ _VALID_URL_BASE = r'''(?x)
+ https?://
+ (?P<host>
+ (?:www\.)?
+ (?:
+ (?:
+ nfl|
+ buffalobills|
+ miamidolphins|
+ patriots|
+ newyorkjets|
+ baltimoreravens|
+ bengals|
+ clevelandbrowns|
+ steelers|
+ houstontexans|
+ colts|
+ jaguars|
+ (?:titansonline|tennesseetitans)|
+ denverbroncos|
+ (?:kc)?chiefs|
+ raiders|
+ chargers|
+ dallascowboys|
+ giants|
+ philadelphiaeagles|
+ (?:redskins|washingtonfootball)|
+ chicagobears|
+ detroitlions|
+ packers|
+ vikings|
+ atlantafalcons|
+ panthers|
+ neworleanssaints|
+ buccaneers|
+ azcardinals|
+ (?:stlouis|the)rams|
+ 49ers|
+ seahawks
+ )\.com|
+ .+?\.clubs\.nfl\.com
+ )
+ )/
+ '''
+ _VIDEO_CONFIG_REGEX = r'<script[^>]+id="[^"]*video-config-[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12}[^"]*"[^>]*>\s*({.+});?\s*</script>'
+ _ANVATO_PREFIX = 'anvato:GXvEgwyJeWem8KCYXfeoHWknwP48Mboj:'
+
+ _CLIENT_DATA = {
+ 'clientKey': '4cFUW6DmwJpzT9L7LrG3qRAcABG5s04g',
+ 'clientSecret': 'CZuvCL49d9OwfGsR',
+ 'deviceId': str(uuid.uuid4()),
+ 'deviceInfo': base64.b64encode(json.dumps({
+ 'model': 'desktop',
+ 'version': 'Chrome',
+ 'osName': 'Windows',
+ 'osVersion': '10.0',
+ }, separators=(',', ':')).encode()).decode(),
+ 'networkType': 'other',
+ 'nflClaimGroupsToAdd': [],
+ 'nflClaimGroupsToRemove': [],
+ }
+ _ACCOUNT_INFO = {}
+ _API_KEY = None
+
+ _TOKEN = None
+ _TOKEN_EXPIRY = 0
+
+ def _get_account_info(self, url, slug):
+ if not self._API_KEY:
+ webpage = self._download_webpage(url, slug, fatal=False) or ''
+ self._API_KEY = self._search_regex(
+ r'window\.gigyaApiKey\s*=\s*["\'](\w+)["\'];', webpage, 'API key',
+ fatal=False) or '3_Qa8TkWpIB8ESCBT8tY2TukbVKgO5F6BJVc7N1oComdwFzI7H2L9NOWdm11i_BY9f'
+
+ cookies = self._get_cookies('https://auth-id.nfl.com/')
+ login_token = traverse_obj(cookies, (
+ (f'glt_{self._API_KEY}', lambda k, _: k.startswith('glt_')), {lambda x: x.value}), get_all=False)
+ if not login_token:
+ self.raise_login_required()
+ if 'ucid' not in cookies:
+ raise ExtractorError(
+ 'Required cookies for the auth-id.nfl.com domain were not found among passed cookies. '
+ 'If using --cookies, these cookies must be exported along with .nfl.com cookies, '
+ 'or else try using --cookies-from-browser instead', expected=True)
+
+ account = self._download_json(
+ 'https://auth-id.nfl.com/accounts.getAccountInfo', slug,
+ note='Downloading account info', data=urlencode_postdata({
+ 'include': 'profile,data',
+ 'lang': 'en',
+ 'APIKey': self._API_KEY,
+ 'sdk': 'js_latest',
+ 'login_token': login_token,
+ 'authMode': 'cookie',
+ 'pageURL': url,
+ 'sdkBuild': traverse_obj(cookies, (
+ 'gig_canary_ver', {lambda x: x.value.partition('-')[0]}), default='15170'),
+ 'format': 'json',
+ }), headers={'Content-Type': 'application/x-www-form-urlencoded'})
+
+ self._ACCOUNT_INFO = traverse_obj(account, {
+ 'signatureTimestamp': 'signatureTimestamp',
+ 'uid': 'UID',
+ 'uidSignature': 'UIDSignature',
+ })
+
+ if len(self._ACCOUNT_INFO) != 3:
+ raise ExtractorError('Failed to retrieve account info with provided cookies', expected=True)
+
+ def _get_auth_token(self, url, slug):
+ if self._TOKEN and self._TOKEN_EXPIRY > int(time.time() + 30):
+ return
+
+ if not self._ACCOUNT_INFO:
+ self._get_account_info(url, slug)
+
+ token = self._download_json(
+ 'https://api.nfl.com/identity/v3/token%s' % (
+ '/refresh' if self._ACCOUNT_INFO.get('refreshToken') else ''),
+ slug, headers={'Content-Type': 'application/json'}, note='Downloading access token',
+ data=json.dumps({**self._CLIENT_DATA, **self._ACCOUNT_INFO}, separators=(',', ':')).encode())
+
+ self._TOKEN = token['accessToken']
+ self._TOKEN_EXPIRY = token['expiresIn']
+ self._ACCOUNT_INFO['refreshToken'] = token['refreshToken']
+
+ def _parse_video_config(self, video_config, display_id):
+ video_config = self._parse_json(video_config, display_id)
+ item = video_config['playlist'][0]
+ mcp_id = item.get('mcpID')
+ if mcp_id:
+ info = self.url_result(f'{self._ANVATO_PREFIX}{mcp_id}', AnvatoIE, mcp_id)
+ else:
+ media_id = item.get('id') or item['entityId']
+ title = item.get('title')
+ item_url = item['url']
+ info = {'id': media_id}
+ ext = determine_ext(item_url)
+ if ext == 'm3u8':
+ info['formats'] = self._extract_m3u8_formats(item_url, media_id, 'mp4')
+ else:
+ info['url'] = item_url
+ if item.get('audio') is True:
+ info['vcodec'] = 'none'
+ is_live = video_config.get('live') is True
+ thumbnails = None
+ image_url = item.get(item.get('imageSrc')) or item.get(item.get('posterImage'))
+ if image_url:
+ thumbnails = [{
+ 'url': image_url,
+ 'ext': determine_ext(image_url, 'jpg'),
+ }]
+ info.update({
+ 'title': title,
+ 'is_live': is_live,
+ 'description': clean_html(item.get('description')),
+ 'thumbnails': thumbnails,
+ })
+ return info
+
+
+class NFLIE(NFLBaseIE):
+ IE_NAME = 'nfl.com'
+ _VALID_URL = NFLBaseIE._VALID_URL_BASE + r'(?:videos?|listen|audio)/(?P<id>[^/#?&]+)'
+ _TESTS = [{
+ 'url': 'https://www.nfl.com/videos/baker-mayfield-s-game-changing-plays-from-3-td-game-week-14',
+ 'info_dict': {
+ 'id': '899441',
+ 'ext': 'mp4',
+ 'title': "Baker Mayfield's game-changing plays from 3-TD game Week 14",
+ 'description': 'md5:85e05a3cc163f8c344340f220521136d',
+ 'upload_date': '20201215',
+ 'timestamp': 1608009755,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'NFL',
+ 'tags': 'count:6',
+ 'duration': 157,
+ 'categories': 'count:3',
+ }
+ }, {
+ 'url': 'https://www.chiefs.com/listen/patrick-mahomes-travis-kelce-react-to-win-over-dolphins-the-breakdown',
+ 'md5': '6886b32c24b463038c760ceb55a34566',
+ 'info_dict': {
+ 'id': 'd87e8790-3e14-11eb-8ceb-ff05c2867f99',
+ 'ext': 'mp3',
+ 'title': 'Patrick Mahomes, Travis Kelce React to Win Over Dolphins | The Breakdown',
+ 'description': 'md5:12ada8ee70e6762658c30e223e095075',
+ },
+ 'skip': 'HTTP Error 404: Not Found',
+ }, {
+ 'url': 'https://www.buffalobills.com/video/buffalo-bills-military-recognition-week-14',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.raiders.com/audio/instant-reactions-raiders-week-14-loss-to-indianapolis-colts-espn-jason-fitz',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ return self._parse_video_config(self._search_regex(
+ self._VIDEO_CONFIG_REGEX, webpage, 'video config'), display_id)
+
+
+class NFLArticleIE(NFLBaseIE):
+ IE_NAME = 'nfl.com:article'
+ _VALID_URL = NFLBaseIE._VALID_URL_BASE + r'news/(?P<id>[^/#?&]+)'
+ _TEST = {
+ 'url': 'https://www.buffalobills.com/news/the-only-thing-we-ve-earned-is-the-noise-bills-coaches-discuss-handling-rising-e',
+ 'info_dict': {
+ 'id': 'the-only-thing-we-ve-earned-is-the-noise-bills-coaches-discuss-handling-rising-e',
+ 'title': "'The only thing we've earned is the noise' | Bills coaches discuss handling rising expectations",
+ },
+ 'playlist_count': 4,
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ entries = []
+ for video_config in re.findall(self._VIDEO_CONFIG_REGEX, webpage):
+ entries.append(self._parse_video_config(video_config, display_id))
+ title = clean_html(get_element_by_class(
+ 'nfl-c-article__title', webpage)) or self._html_search_meta(
+ ['og:title', 'twitter:title'], webpage)
+ return self.playlist_result(entries, display_id, title)
+
+
+class NFLPlusReplayIE(NFLBaseIE):
+ IE_NAME = 'nfl.com:plus:replay'
+ _VALID_URL = r'https?://(?:www\.)?nfl\.com/plus/games/(?P<slug>[\w-]+)(?:/(?P<id>\d+))?'
+ _TESTS = [{
+ 'url': 'https://www.nfl.com/plus/games/giants-at-vikings-2022-post-1/1572108',
+ 'info_dict': {
+ 'id': '1572108',
+ 'ext': 'mp4',
+ 'title': 'New York Giants at Minnesota Vikings',
+ 'description': 'New York Giants play the Minnesota Vikings at U.S. Bank Stadium on January 15, 2023',
+ 'uploader': 'NFL',
+ 'upload_date': '20230116',
+ 'timestamp': 1673864520,
+ 'duration': 7157,
+ 'categories': ['Game Highlights'],
+ 'tags': ['Minnesota Vikings', 'New York Giants', 'Minnesota Vikings vs. New York Giants'],
+ 'thumbnail': r're:^https?://.*\.jpg',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'note': 'Subscription required',
+ 'url': 'https://www.nfl.com/plus/games/giants-at-vikings-2022-post-1',
+ 'playlist_count': 4,
+ 'info_dict': {
+ 'id': 'giants-at-vikings-2022-post-1',
+ },
+ }, {
+ 'note': 'Subscription required',
+ 'url': 'https://www.nfl.com/plus/games/giants-at-patriots-2011-pre-4',
+ 'playlist_count': 2,
+ 'info_dict': {
+ 'id': 'giants-at-patriots-2011-pre-4',
+ },
+ }, {
+ 'note': 'Subscription required',
+ 'url': 'https://www.nfl.com/plus/games/giants-at-patriots-2011-pre-4',
+ 'info_dict': {
+ 'id': '950701',
+ 'ext': 'mp4',
+ 'title': 'Giants @ Patriots',
+ 'description': 'Giants at Patriots on September 01, 2011',
+ 'uploader': 'NFL',
+ 'upload_date': '20210724',
+ 'timestamp': 1627085874,
+ 'duration': 1532,
+ 'categories': ['Game Highlights'],
+ 'tags': ['play-by-play'],
+ 'thumbnail': r're:^https?://.*\.jpg',
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ 'extractor_args': {'nflplusreplay': {'type': ['condensed_game']}},
+ },
+ }]
+
+ _REPLAY_TYPES = {
+ 'full_game': 'Full Game',
+ 'full_game_spanish': 'Full Game - Spanish',
+ 'condensed_game': 'Condensed Game',
+ 'all_22': 'All-22',
+ }
+
+ def _real_extract(self, url):
+ slug, video_id = self._match_valid_url(url).group('slug', 'id')
+ requested_types = self._configuration_arg('type', ['all'])
+ if 'all' in requested_types:
+ requested_types = list(self._REPLAY_TYPES.keys())
+ requested_types = traverse_obj(self._REPLAY_TYPES, (None, requested_types))
+
+ if not video_id:
+ self._get_auth_token(url, slug)
+ headers = {'Authorization': f'Bearer {self._TOKEN}'}
+ game_id = self._download_json(
+ f'https://api.nfl.com/football/v2/games/externalId/slug/{slug}', slug,
+ 'Downloading game ID', query={'withExternalIds': 'true'}, headers=headers)['id']
+ replays = self._download_json(
+ 'https://api.nfl.com/content/v1/videos/replays', slug, 'Downloading replays JSON',
+ query={'gameId': game_id}, headers=headers)
+ if len(requested_types) == 1:
+ video_id = traverse_obj(replays, (
+ 'items', lambda _, v: v['subType'] == requested_types[0], 'mcpPlaybackId'), get_all=False)
+
+ if video_id:
+ return self.url_result(f'{self._ANVATO_PREFIX}{video_id}', AnvatoIE, video_id)
+
+ def entries():
+ for replay in traverse_obj(
+ replays, ('items', lambda _, v: v['mcpPlaybackId'] and v['subType'] in requested_types)
+ ):
+ video_id = replay['mcpPlaybackId']
+ yield self.url_result(f'{self._ANVATO_PREFIX}{video_id}', AnvatoIE, video_id)
+
+ return self.playlist_result(entries(), slug)
+
+
+class NFLPlusEpisodeIE(NFLBaseIE):
+ IE_NAME = 'nfl.com:plus:episode'
+ _VALID_URL = r'https?://(?:www\.)?nfl\.com/plus/episodes/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'note': 'Subscription required',
+ 'url': 'https://www.nfl.com/plus/episodes/kurt-s-qb-insider-conference-championships',
+ 'info_dict': {
+ 'id': '1576832',
+ 'ext': 'mp4',
+ 'title': 'Conference Championships',
+ 'description': 'md5:944f7fab56f7a37430bf8473f5473857',
+ 'uploader': 'NFL',
+ 'upload_date': '20230127',
+ 'timestamp': 1674782760,
+ 'duration': 730,
+ 'categories': ['Analysis'],
+ 'tags': ['Cincinnati Bengals at Kansas City Chiefs (2022-POST-3)'],
+ 'thumbnail': r're:^https?://.*\.jpg',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }]
+
+ def _real_extract(self, url):
+ slug = self._match_id(url)
+ self._get_auth_token(url, slug)
+ video_id = self._download_json(
+ f'https://api.nfl.com/content/v1/videos/episodes/{slug}', slug, headers={
+ 'Authorization': f'Bearer {self._TOKEN}',
+ })['mcpPlaybackId']
+
+ return self.url_result(f'{self._ANVATO_PREFIX}{video_id}', AnvatoIE, video_id)
diff --git a/yt_dlp/extractor/nhk.py b/yt_dlp/extractor/nhk.py
new file mode 100644
index 0000000..7cf5b24
--- /dev/null
+++ b/yt_dlp/extractor/nhk.py
@@ -0,0 +1,708 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ clean_html,
+ get_element_by_class,
+ int_or_none,
+ join_nonempty,
+ parse_duration,
+ traverse_obj,
+ try_call,
+ unescapeHTML,
+ unified_timestamp,
+ url_or_none,
+ urljoin,
+)
+
+
+class NhkBaseIE(InfoExtractor):
+ _API_URL_TEMPLATE = 'https://nwapi.nhk.jp/nhkworld/%sod%slist/v7b/%s/%s/%s/all%s.json'
+ _BASE_URL_REGEX = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P<lang>[a-z]{2})/ondemand'
+ _TYPE_REGEX = r'/(?P<type>video|audio)/'
+
+ def _call_api(self, m_id, lang, is_video, is_episode, is_clip):
+ return self._download_json(
+ self._API_URL_TEMPLATE % (
+ 'v' if is_video else 'r',
+ 'clip' if is_clip else 'esd',
+ 'episode' if is_episode else 'program',
+ m_id, lang, '/all' if is_video else ''),
+ m_id, query={'apikey': 'EJfK8jdS57GqlupFgAfAAwr573q01y6k'})['data']['episodes'] or []
+
+ def _get_api_info(self, refresh=True):
+ if not refresh:
+ return self.cache.load('nhk', 'api_info')
+
+ self.cache.store('nhk', 'api_info', {})
+ movie_player_js = self._download_webpage(
+ 'https://movie-a.nhk.or.jp/world/player/js/movie-player.js', None,
+ note='Downloading stream API information')
+ api_info = {
+ 'url': self._search_regex(
+ r'prod:[^;]+\bapiUrl:\s*[\'"]([^\'"]+)[\'"]', movie_player_js, None, 'stream API url'),
+ 'token': self._search_regex(
+ r'prod:[^;]+\btoken:\s*[\'"]([^\'"]+)[\'"]', movie_player_js, None, 'stream API token'),
+ }
+ self.cache.store('nhk', 'api_info', api_info)
+ return api_info
+
+ def _extract_stream_info(self, vod_id):
+ for refresh in (False, True):
+ api_info = self._get_api_info(refresh)
+ if not api_info:
+ continue
+
+ api_url = api_info.pop('url')
+ meta = traverse_obj(
+ self._download_json(
+ api_url, vod_id, 'Downloading stream url info', fatal=False, query={
+ **api_info,
+ 'type': 'json',
+ 'optional_id': vod_id,
+ 'active_flg': 1,
+ }), ('meta', 0))
+ stream_url = traverse_obj(
+ meta, ('movie_url', ('mb_auto', 'auto_sp', 'auto_pc'), {url_or_none}), get_all=False)
+
+ if stream_url:
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(stream_url, vod_id)
+ return {
+ **traverse_obj(meta, {
+ 'duration': ('duration', {int_or_none}),
+ 'timestamp': ('publication_date', {unified_timestamp}),
+ 'release_timestamp': ('insert_date', {unified_timestamp}),
+ 'modified_timestamp': ('update_date', {unified_timestamp}),
+ }),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+ raise ExtractorError('Unable to extract stream url')
+
+ def _extract_episode_info(self, url, episode=None):
+ fetch_episode = episode is None
+ lang, m_type, episode_id = NhkVodIE._match_valid_url(url).group('lang', 'type', 'id')
+ is_video = m_type == 'video'
+
+ if is_video:
+ episode_id = episode_id[:4] + '-' + episode_id[4:]
+
+ if fetch_episode:
+ episode = self._call_api(
+ episode_id, lang, is_video, True, episode_id[:4] == '9999')[0]
+
+ def get_clean_field(key):
+ return clean_html(episode.get(key + '_clean') or episode.get(key))
+
+ title = get_clean_field('sub_title')
+ series = get_clean_field('title')
+
+ thumbnails = []
+ for s, w, h in [('', 640, 360), ('_l', 1280, 720)]:
+ img_path = episode.get('image' + s)
+ if not img_path:
+ continue
+ thumbnails.append({
+ 'id': '%dp' % h,
+ 'height': h,
+ 'width': w,
+ 'url': 'https://www3.nhk.or.jp' + img_path,
+ })
+
+ episode_name = title
+ if series and title:
+ title = f'{series} - {title}'
+ elif series and not title:
+ title = series
+ series = None
+ episode_name = None
+ else: # title, no series
+ episode_name = None
+
+ info = {
+ 'id': episode_id + '-' + lang,
+ 'title': title,
+ 'description': get_clean_field('description'),
+ 'thumbnails': thumbnails,
+ 'series': series,
+ 'episode': episode_name,
+ }
+
+ if is_video:
+ vod_id = episode['vod_id']
+ info.update({
+ **self._extract_stream_info(vod_id),
+ 'id': vod_id,
+ })
+
+ else:
+ if fetch_episode:
+ audio_path = episode['audio']['audio']
+ info['formats'] = self._extract_m3u8_formats(
+ 'https://nhkworld-vh.akamaihd.net/i%s/master.m3u8' % audio_path,
+ episode_id, 'm4a', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False)
+ for f in info['formats']:
+ f['language'] = lang
+ else:
+ info.update({
+ '_type': 'url_transparent',
+ 'ie_key': NhkVodIE.ie_key(),
+ 'url': url,
+ })
+ return info
+
+
+class NhkVodIE(NhkBaseIE):
+ # the 7-character IDs can have alphabetic chars too: assume [a-z] rather than just [a-f], eg
+ _VALID_URL = [rf'{NhkBaseIE._BASE_URL_REGEX}/(?P<type>video)/(?P<id>[0-9a-z]+)',
+ rf'{NhkBaseIE._BASE_URL_REGEX}/(?P<type>audio)/(?P<id>[^/?#]+?-\d{{8}}-[0-9a-z]+)']
+ # Content available only for a limited period of time. Visit
+ # https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
+ _TESTS = [{
+ 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2049126/',
+ 'info_dict': {
+ 'id': 'nw_vod_v_en_2049_126_20230413233000_01_1681398302',
+ 'ext': 'mp4',
+ 'title': 'Japan Railway Journal - The Tohoku Shinkansen: Full Speed Ahead',
+ 'description': 'md5:49f7c5b206e03868a2fdf0d0814b92f6',
+ 'thumbnail': 'md5:51bcef4a21936e7fea1ff4e06353f463',
+ 'episode': 'The Tohoku Shinkansen: Full Speed Ahead',
+ 'series': 'Japan Railway Journal',
+ 'modified_timestamp': 1694243656,
+ 'timestamp': 1681428600,
+ 'release_timestamp': 1693883728,
+ 'duration': 1679,
+ 'upload_date': '20230413',
+ 'modified_date': '20230909',
+ 'release_date': '20230905',
+
+ },
+ }, {
+ # video clip
+ 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/',
+ 'md5': '153c3016dfd252ba09726588149cf0e7',
+ 'info_dict': {
+ 'id': 'lpZXIwaDE6_Z-976CPsFdxyICyWUzlT5',
+ 'ext': 'mp4',
+ 'title': 'Dining with the Chef - Chef Saito\'s Family recipe: MENCHI-KATSU',
+ 'description': 'md5:5aee4a9f9d81c26281862382103b0ea5',
+ 'thumbnail': 'md5:d6a4d9b6e9be90aaadda0bcce89631ed',
+ 'series': 'Dining with the Chef',
+ 'episode': 'Chef Saito\'s Family recipe: MENCHI-KATSU',
+ 'duration': 148,
+ 'upload_date': '20190816',
+ 'release_date': '20230902',
+ 'release_timestamp': 1693619292,
+ 'modified_timestamp': 1694168033,
+ 'modified_date': '20230908',
+ 'timestamp': 1565997540,
+ },
+ }, {
+ # radio
+ 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/livinginjapan-20231001-1/',
+ 'info_dict': {
+ 'id': 'livinginjapan-20231001-1-en',
+ 'ext': 'm4a',
+ 'title': 'Living in Japan - Tips for Travelers to Japan / Ramen Vending Machines',
+ 'series': 'Living in Japan',
+ 'description': 'md5:0a0e2077d8f07a03071e990a6f51bfab',
+ 'thumbnail': 'md5:960622fb6e06054a4a1a0c97ea752545',
+ 'episode': 'Tips for Travelers to Japan / Ramen Vending Machines'
+ },
+ }, {
+ 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/plugin-20190404-1/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www3.nhk.or.jp/nhkworld/fr/ondemand/audio/plugin-20190404-1/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/j_art-20150903-1/',
+ 'only_matching': True,
+ }, {
+ # video, alphabetic character in ID #29670
+ 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999a34/',
+ 'info_dict': {
+ 'id': 'qfjay6cg',
+ 'ext': 'mp4',
+ 'title': 'DESIGN TALKS plus - Fishermen’s Finery',
+ 'description': 'md5:8a8f958aaafb0d7cb59d38de53f1e448',
+ 'thumbnail': r're:^https?:/(/[a-z0-9.-]+)+\.jpg\?w=1920&h=1080$',
+ 'upload_date': '20210615',
+ 'timestamp': 1623722008,
+ },
+ 'skip': '404 Not Found',
+ }, {
+ # japanese-language, longer id than english
+ 'url': 'https://www3.nhk.or.jp/nhkworld/ja/ondemand/video/0020271111/',
+ 'info_dict': {
+ 'id': 'nw_ja_v_jvod_ohayou_20231008',
+ 'ext': 'mp4',
+ 'title': 'おはよう日本(7時台) - 10月8日放送',
+ 'series': 'おはよう日本(7時台)',
+ 'episode': '10月8日放送',
+ 'thumbnail': 'md5:d733b1c8e965ab68fb02b2d347d0e9b4',
+ 'description': 'md5:9c1d6cbeadb827b955b20e99ab920ff0',
+ },
+ 'skip': 'expires 2023-10-15',
+ }, {
+ # a one-off (single-episode series). title from the api is just '<p></p>'
+ 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/3004952/',
+ 'info_dict': {
+ 'id': 'nw_vod_v_en_3004_952_20230723091000_01_1690074552',
+ 'ext': 'mp4',
+ 'title': 'Barakan Discovers AMAMI OSHIMA: Isson\'s Treasure Island',
+ 'description': 'md5:5db620c46a0698451cc59add8816b797',
+ 'thumbnail': 'md5:67d9ff28009ba379bfa85ad1aaa0e2bd',
+ 'release_date': '20230905',
+ 'timestamp': 1690103400,
+ 'duration': 2939,
+ 'release_timestamp': 1693898699,
+ 'modified_timestamp': 1698057495,
+ 'modified_date': '20231023',
+ 'upload_date': '20230723',
+ },
+ }]
+
+ def _real_extract(self, url):
+ return self._extract_episode_info(url)
+
+
+class NhkVodProgramIE(NhkBaseIE):
+ _VALID_URL = rf'{NhkBaseIE._BASE_URL_REGEX}/program{NhkBaseIE._TYPE_REGEX}(?P<id>\w+)(?:.+?\btype=(?P<episode_type>clip|(?:radio|tv)Episode))?'
+ _TESTS = [{
+ # video program episodes
+ 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/sumo',
+ 'info_dict': {
+ 'id': 'sumo',
+ 'title': 'GRAND SUMO Highlights',
+ 'description': 'md5:fc20d02dc6ce85e4b72e0273aa52fdbf',
+ },
+ 'playlist_mincount': 0,
+ }, {
+ 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/japanrailway',
+ 'info_dict': {
+ 'id': 'japanrailway',
+ 'title': 'Japan Railway Journal',
+ 'description': 'md5:ea39d93af7d05835baadf10d1aae0e3f',
+ },
+ 'playlist_mincount': 12,
+ }, {
+ # video program clips
+ 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/japanrailway/?type=clip',
+ 'info_dict': {
+ 'id': 'japanrailway',
+ 'title': 'Japan Railway Journal',
+ 'description': 'md5:ea39d93af7d05835baadf10d1aae0e3f',
+ },
+ 'playlist_mincount': 5,
+ }, {
+ 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/10yearshayaomiyazaki/',
+ 'only_matching': True,
+ }, {
+ # audio program
+ 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/audio/listener/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ lang, m_type, program_id, episode_type = self._match_valid_url(url).group('lang', 'type', 'id', 'episode_type')
+ episodes = self._call_api(
+ program_id, lang, m_type == 'video', False, episode_type == 'clip')
+
+ entries = []
+ for episode in episodes:
+ episode_path = episode.get('url')
+ if not episode_path:
+ continue
+ entries.append(self._extract_episode_info(
+ urljoin(url, episode_path), episode))
+
+ html = self._download_webpage(url, program_id)
+ program_title = clean_html(get_element_by_class('p-programDetail__title', html))
+ program_description = clean_html(get_element_by_class('p-programDetail__text', html))
+
+ return self.playlist_result(entries, program_id, program_title, program_description)
+
+
+class NhkForSchoolBangumiIE(InfoExtractor):
+ _VALID_URL = r'https?://www2\.nhk\.or\.jp/school/movie/(?P<type>bangumi|clip)\.cgi\?das_id=(?P<id>[a-zA-Z0-9_-]+)'
+ _TESTS = [{
+ 'url': 'https://www2.nhk.or.jp/school/movie/bangumi.cgi?das_id=D0005150191_00000',
+ 'info_dict': {
+ 'id': 'D0005150191_00003',
+ 'title': 'にている かな',
+ 'duration': 599.999,
+ 'timestamp': 1396414800,
+
+ 'upload_date': '20140402',
+ 'ext': 'mp4',
+
+ 'chapters': 'count:12'
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ program_type, video_id = self._match_valid_url(url).groups()
+
+ webpage = self._download_webpage(
+ f'https://www2.nhk.or.jp/school/movie/{program_type}.cgi?das_id={video_id}', video_id)
+
+ # searches all variables
+ base_values = {g.group(1): g.group(2) for g in re.finditer(r'var\s+([a-zA-Z_]+)\s*=\s*"([^"]+?)";', webpage)}
+ # and programObj values too
+ program_values = {g.group(1): g.group(3) for g in re.finditer(r'(?:program|clip)Obj\.([a-zA-Z_]+)\s*=\s*(["\'])([^"]+?)\2;', webpage)}
+ # extract all chapters
+ chapter_durations = [parse_duration(g.group(1)) for g in re.finditer(r'chapterTime\.push\(\'([0-9:]+?)\'\);', webpage)]
+ chapter_titles = [' '.join([g.group(1) or '', unescapeHTML(g.group(2))]).strip() for g in re.finditer(r'<div class="cpTitle"><span>(scene\s*\d+)?</span>([^<]+?)</div>', webpage)]
+
+ # this is how player_core.js is actually doing (!)
+ version = base_values.get('r_version') or program_values.get('version')
+ if version:
+ video_id = f'{video_id.split("_")[0]}_{version}'
+
+ formats = self._extract_m3u8_formats(
+ f'https://nhks-vh.akamaihd.net/i/das/{video_id[0:8]}/{video_id}_V_000.f4v/master.m3u8',
+ video_id, ext='mp4', m3u8_id='hls')
+
+ duration = parse_duration(base_values.get('r_duration'))
+
+ chapters = None
+ if chapter_durations and chapter_titles and len(chapter_durations) == len(chapter_titles):
+ start_time = chapter_durations
+ end_time = chapter_durations[1:] + [duration]
+ chapters = [{
+ 'start_time': s,
+ 'end_time': e,
+ 'title': t,
+ } for s, e, t in zip(start_time, end_time, chapter_titles)]
+
+ return {
+ 'id': video_id,
+ 'title': program_values.get('name'),
+ 'duration': parse_duration(base_values.get('r_duration')),
+ 'timestamp': unified_timestamp(base_values['r_upload']),
+ 'formats': formats,
+ 'chapters': chapters,
+ }
+
+
+class NhkForSchoolSubjectIE(InfoExtractor):
+ IE_DESC = 'Portal page for each school subjects, like Japanese (kokugo, 国語) or math (sansuu/suugaku or 算数・数学)'
+ KNOWN_SUBJECTS = (
+ 'rika', 'syakai', 'kokugo',
+ 'sansuu', 'seikatsu', 'doutoku',
+ 'ongaku', 'taiiku', 'zukou',
+ 'gijutsu', 'katei', 'sougou',
+ 'eigo', 'tokkatsu',
+ 'tokushi', 'sonota',
+ )
+ _VALID_URL = r'https?://www\.nhk\.or\.jp/school/(?P<id>%s)/?(?:[\?#].*)?$' % '|'.join(re.escape(s) for s in KNOWN_SUBJECTS)
+
+ _TESTS = [{
+ 'url': 'https://www.nhk.or.jp/school/sougou/',
+ 'info_dict': {
+ 'id': 'sougou',
+ 'title': '総合的な学習の時間',
+ },
+ 'playlist_mincount': 16,
+ }, {
+ 'url': 'https://www.nhk.or.jp/school/rika/',
+ 'info_dict': {
+ 'id': 'rika',
+ 'title': '理科',
+ },
+ 'playlist_mincount': 15,
+ }]
+
+ def _real_extract(self, url):
+ subject_id = self._match_id(url)
+ webpage = self._download_webpage(url, subject_id)
+
+ return self.playlist_from_matches(
+ re.finditer(rf'href="((?:https?://www\.nhk\.or\.jp)?/school/{re.escape(subject_id)}/[^/]+/)"', webpage),
+ subject_id,
+ self._html_search_regex(r'(?s)<span\s+class="subjectName">\s*<img\s*[^<]+>\s*([^<]+?)</span>', webpage, 'title', fatal=False),
+ lambda g: urljoin(url, g.group(1)))
+
+
+class NhkForSchoolProgramListIE(InfoExtractor):
+ _VALID_URL = r'https?://www\.nhk\.or\.jp/school/(?P<id>(?:%s)/[a-zA-Z0-9_-]+)' % (
+ '|'.join(re.escape(s) for s in NhkForSchoolSubjectIE.KNOWN_SUBJECTS)
+ )
+ _TESTS = [{
+ 'url': 'https://www.nhk.or.jp/school/sougou/q/',
+ 'info_dict': {
+ 'id': 'sougou/q',
+ 'title': 'Q~こどものための哲学',
+ },
+ 'playlist_mincount': 20,
+ }]
+
+ def _real_extract(self, url):
+ program_id = self._match_id(url)
+
+ webpage = self._download_webpage(f'https://www.nhk.or.jp/school/{program_id}/', program_id)
+
+ title = (self._generic_title('', webpage)
+ or self._html_search_regex(r'<h3>([^<]+?)とは?\s*</h3>', webpage, 'title', fatal=False))
+ title = re.sub(r'\s*\|\s*NHK\s+for\s+School\s*$', '', title) if title else None
+ description = self._html_search_regex(
+ r'(?s)<div\s+class="programDetail\s*">\s*<p>[^<]+</p>',
+ webpage, 'description', fatal=False, group=0)
+
+ bangumi_list = self._download_json(
+ f'https://www.nhk.or.jp/school/{program_id}/meta/program.json', program_id)
+ # they're always bangumi
+ bangumis = [
+ self.url_result(f'https://www2.nhk.or.jp/school/movie/bangumi.cgi?das_id={x}')
+ for x in traverse_obj(bangumi_list, ('part', ..., 'part-video-dasid')) or []]
+
+ return self.playlist_result(bangumis, program_id, title, description)
+
+
+class NhkRadiruIE(InfoExtractor):
+ _GEO_COUNTRIES = ['JP']
+ IE_DESC = 'NHK らじる (Radiru/Rajiru)'
+ _VALID_URL = r'https?://www\.nhk\.or\.jp/radio/(?:player/ondemand|ondemand/detail)\.html\?p=(?P<site>[\da-zA-Z]+)_(?P<corner>[\da-zA-Z]+)(?:_(?P<headline>[\da-zA-Z]+))?'
+ _TESTS = [{
+ 'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=0449_01_3926210',
+ 'skip': 'Episode expired on 2024-02-24',
+ 'info_dict': {
+ 'title': 'ジャズ・トゥナイト シリーズJAZZジャイアンツ 56 ジョニー・ホッジス',
+ 'id': '0449_01_3926210',
+ 'ext': 'm4a',
+ 'series': 'ジャズ・トゥナイト',
+ 'uploader': 'NHK-FM',
+ 'channel': 'NHK-FM',
+ 'thumbnail': 'https://www.nhk.or.jp/prog/img/449/g449.jpg',
+ 'release_date': '20240217',
+ 'description': 'md5:a456ee8e5e59e6dd2a7d32e62386e811',
+ 'timestamp': 1708185600,
+ 'release_timestamp': 1708178400,
+ 'upload_date': '20240217',
+ },
+ }, {
+ # playlist, airs every weekday so it should _hopefully_ be okay forever
+ 'url': 'https://www.nhk.or.jp/radio/ondemand/detail.html?p=0458_01',
+ 'info_dict': {
+ 'id': '0458_01',
+ 'title': 'ベストオブクラシック',
+ 'description': '世界中の上質な演奏会をじっくり堪能する本格派クラシック番組。',
+ 'channel': 'NHK-FM',
+ 'uploader': 'NHK-FM',
+ 'thumbnail': 'https://www.nhk.or.jp/prog/img/458/g458.jpg',
+ },
+ 'playlist_mincount': 3,
+ }, {
+ # one with letters in the id
+ 'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F300_06_3738470',
+ 'note': 'Expires on 2024-03-31',
+ 'info_dict': {
+ 'id': 'F300_06_3738470',
+ 'ext': 'm4a',
+ 'title': '有島武郎「一房のぶどう」',
+ 'description': '朗読:川野一宇(ラジオ深夜便アンカー)\r\n\r\n(2016年12月8日放送「ラジオ深夜便『アンカー朗読シリーズ』」より)',
+ 'channel': 'NHKラジオ第1、NHK-FM',
+ 'uploader': 'NHKラジオ第1、NHK-FM',
+ 'timestamp': 1635757200,
+ 'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F300/img/corner/box_109_thumbnail.jpg',
+ 'release_date': '20161207',
+ 'series': 'らじる文庫 by ラジオ深夜便 ',
+ 'release_timestamp': 1481126700,
+ 'upload_date': '20211101',
+ },
+ 'expected_warnings': ['Unable to download JSON metadata', 'Failed to get extended description'],
+ }, {
+ # news
+ 'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F261_01_3855109',
+ 'skip': 'Expires on 2023-04-17',
+ 'info_dict': {
+ 'id': 'F261_01_3855109',
+ 'ext': 'm4a',
+ 'channel': 'NHKラジオ第1',
+ 'uploader': 'NHKラジオ第1',
+ 'timestamp': 1681635900,
+ 'release_date': '20230416',
+ 'series': 'NHKラジオニュース',
+ 'title': '午後6時のNHKニュース',
+ 'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F261/img/RADIONEWS_640.jpg',
+ 'upload_date': '20230416',
+ 'release_timestamp': 1681635600,
+ },
+ }]
+
+ _API_URL_TMPL = None
+
+ def _extract_extended_description(self, episode_id, episode):
+ service, _, area = traverse_obj(episode, ('aa_vinfo2', {str}, {lambda x: (x or '').partition(',')}))
+ aa_vinfo3 = traverse_obj(episode, ('aa_vinfo3', {str}))
+ detail_url = try_call(
+ lambda: self._API_URL_TMPL.format(service=service, area=area, dateid=aa_vinfo3))
+ if not detail_url:
+ return
+
+ full_meta = traverse_obj(
+ self._download_json(detail_url, episode_id, 'Downloading extended metadata', fatal=False),
+ ('list', service, 0, {dict})) or {}
+ return join_nonempty('subtitle', 'content', 'act', 'music', delim='\n\n', from_dict=full_meta)
+
+ def _extract_episode_info(self, headline, programme_id, series_meta):
+ episode_id = f'{programme_id}_{headline["headline_id"]}'
+ episode = traverse_obj(headline, ('file_list', 0, {dict}))
+ description = self._extract_extended_description(episode_id, episode)
+ if not description:
+ self.report_warning('Failed to get extended description, falling back to summary')
+ description = traverse_obj(episode, ('file_title_sub', {str}))
+
+ return {
+ **series_meta,
+ 'id': episode_id,
+ 'formats': self._extract_m3u8_formats(episode.get('file_name'), episode_id, fatal=False),
+ 'container': 'm4a_dash', # force fixup, AAC-only HLS
+ 'was_live': True,
+ 'series': series_meta.get('title'),
+ 'thumbnail': url_or_none(headline.get('headline_image')) or series_meta.get('thumbnail'),
+ 'description': description,
+ **traverse_obj(episode, {
+ 'title': 'file_title',
+ 'timestamp': ('open_time', {unified_timestamp}),
+ 'release_timestamp': ('aa_vinfo4', {lambda x: x.split('_')[0]}, {unified_timestamp}),
+ }),
+ }
+
+ def _real_initialize(self):
+ if self._API_URL_TMPL:
+ return
+ api_config = self._download_xml(
+ 'https://www.nhk.or.jp/radio/config/config_web.xml', None, 'Downloading API config', fatal=False)
+ NhkRadiruIE._API_URL_TMPL = try_call(lambda: f'https:{api_config.find(".//url_program_detail").text}')
+
+ def _real_extract(self, url):
+ site_id, corner_id, headline_id = self._match_valid_url(url).group('site', 'corner', 'headline')
+ programme_id = f'{site_id}_{corner_id}'
+
+ if site_id == 'F261':
+ json_url = 'https://www.nhk.or.jp/s-media/news/news-site/list/v1/all.json'
+ else:
+ json_url = f'https://www.nhk.or.jp/radioondemand/json/{site_id}/bangumi_{programme_id}.json'
+
+ meta = self._download_json(json_url, programme_id)['main']
+
+ series_meta = traverse_obj(meta, {
+ 'title': 'program_name',
+ 'channel': 'media_name',
+ 'uploader': 'media_name',
+ 'thumbnail': (('thumbnail_c', 'thumbnail_p'), {url_or_none}),
+ }, get_all=False)
+
+ if headline_id:
+ return self._extract_episode_info(
+ traverse_obj(meta, (
+ 'detail_list', lambda _, v: v['headline_id'] == headline_id), get_all=False),
+ programme_id, series_meta)
+
+ def entries():
+ for headline in traverse_obj(meta, ('detail_list', ..., {dict})):
+ yield self._extract_episode_info(headline, programme_id, series_meta)
+
+ return self.playlist_result(
+ entries(), programme_id, playlist_description=meta.get('site_detail'), **series_meta)
+
+
+class NhkRadioNewsPageIE(InfoExtractor):
+ _VALID_URL = r'https?://www\.nhk\.or\.jp/radionews/?(?:$|[?#])'
+ _TESTS = [{
+ # airs daily, on-the-hour most hours
+ 'url': 'https://www.nhk.or.jp/radionews/',
+ 'playlist_mincount': 5,
+ 'info_dict': {
+ 'id': 'F261_01',
+ 'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F261/img/RADIONEWS_640.jpg',
+ 'description': 'md5:bf2c5b397e44bc7eb26de98d8f15d79d',
+ 'channel': 'NHKラジオ第1',
+ 'uploader': 'NHKラジオ第1',
+ 'title': 'NHKラジオニュース',
+ }
+ }]
+
+ def _real_extract(self, url):
+ return self.url_result('https://www.nhk.or.jp/radio/ondemand/detail.html?p=F261_01', NhkRadiruIE)
+
+
+class NhkRadiruLiveIE(InfoExtractor):
+ _GEO_COUNTRIES = ['JP']
+ _VALID_URL = r'https?://www\.nhk\.or\.jp/radio/player/\?ch=(?P<id>r[12]|fm)'
+ _TESTS = [{
+ # radio 1, no area specified
+ 'url': 'https://www.nhk.or.jp/radio/player/?ch=r1',
+ 'info_dict': {
+ 'id': 'r1-tokyo',
+ 'title': 're:^NHKネットラジオ第1 東京.+$',
+ 'ext': 'm4a',
+ 'thumbnail': 'https://www.nhk.or.jp/common/img/media/r1-200x200.png',
+ 'live_status': 'is_live',
+ },
+ }, {
+ # radio 2, area specified
+ # (the area doesnt actually matter, r2 is national)
+ 'url': 'https://www.nhk.or.jp/radio/player/?ch=r2',
+ 'params': {'extractor_args': {'nhkradirulive': {'area': ['fukuoka']}}},
+ 'info_dict': {
+ 'id': 'r2-fukuoka',
+ 'title': 're:^NHKネットラジオ第2 福岡.+$',
+ 'ext': 'm4a',
+ 'thumbnail': 'https://www.nhk.or.jp/common/img/media/r2-200x200.png',
+ 'live_status': 'is_live',
+ },
+ }, {
+ # fm, area specified
+ 'url': 'https://www.nhk.or.jp/radio/player/?ch=fm',
+ 'params': {'extractor_args': {'nhkradirulive': {'area': ['sapporo']}}},
+ 'info_dict': {
+ 'id': 'fm-sapporo',
+ 'title': 're:^NHKネットラジオFM 札幌.+$',
+ 'ext': 'm4a',
+ 'thumbnail': 'https://www.nhk.or.jp/common/img/media/fm-200x200.png',
+ 'live_status': 'is_live',
+ }
+ }]
+
+ _NOA_STATION_IDS = {'r1': 'n1', 'r2': 'n2', 'fm': 'n3'}
+
+ def _real_extract(self, url):
+ station = self._match_id(url)
+ area = self._configuration_arg('area', ['tokyo'])[0]
+
+ config = self._download_xml(
+ 'https://www.nhk.or.jp/radio/config/config_web.xml', station, 'Downloading area information')
+ data = config.find(f'.//data//area[.="{area}"]/..')
+
+ if not data:
+ raise ExtractorError('Invalid area. Valid areas are: %s' % ', '.join(
+ [i.text for i in config.findall('.//data//area')]), expected=True)
+
+ noa_info = self._download_json(
+ f'https:{config.find(".//url_program_noa").text}'.format(area=data.find('areakey').text),
+ station, note=f'Downloading {area} station metadata', fatal=False)
+ present_info = traverse_obj(noa_info, ('nowonair_list', self._NOA_STATION_IDS.get(station), 'present'))
+
+ return {
+ 'title': ' '.join(traverse_obj(present_info, (('service', 'area',), 'name', {str}))),
+ 'id': join_nonempty(station, area),
+ 'thumbnails': traverse_obj(present_info, ('service', 'images', ..., {
+ 'url': 'url',
+ 'width': ('width', {int_or_none}),
+ 'height': ('height', {int_or_none}),
+ })),
+ 'formats': self._extract_m3u8_formats(data.find(f'{station}hls').text, station),
+ 'is_live': True,
+ }
diff --git a/yt_dlp/extractor/nhl.py b/yt_dlp/extractor/nhl.py
new file mode 100644
index 0000000..2521c40
--- /dev/null
+++ b/yt_dlp/extractor/nhl.py
@@ -0,0 +1,123 @@
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ parse_iso8601,
+ parse_duration,
+)
+
+
+class NHLBaseIE(InfoExtractor):
+ def _real_extract(self, url):
+ site, tmp_id = self._match_valid_url(url).groups()
+ video_data = self._download_json(
+ 'https://%s/%s/%sid/v1/%s/details/web-v1.json'
+ % (self._CONTENT_DOMAIN, site[:3], 'item/' if site == 'mlb' else '', tmp_id), tmp_id)
+ if video_data.get('type') != 'video':
+ video_data = video_data['media']
+ video = video_data.get('video')
+ if video:
+ video_data = video
+ else:
+ videos = video_data.get('videos')
+ if videos:
+ video_data = videos[0]
+
+ video_id = compat_str(video_data['id'])
+ title = video_data['title']
+
+ formats = []
+ for playback in video_data.get('playbacks', []):
+ playback_url = playback.get('url')
+ if not playback_url:
+ continue
+ ext = determine_ext(playback_url)
+ if ext == 'm3u8':
+ m3u8_formats = self._extract_m3u8_formats(
+ playback_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id=playback.get('name', 'hls'), fatal=False)
+ self._check_formats(m3u8_formats, video_id)
+ formats.extend(m3u8_formats)
+ else:
+ height = int_or_none(playback.get('height'))
+ formats.append({
+ 'format_id': playback.get('name', 'http' + ('-%dp' % height if height else '')),
+ 'url': playback_url,
+ 'width': int_or_none(playback.get('width')),
+ 'height': height,
+ 'tbr': int_or_none(self._search_regex(r'_(\d+)[kK]', playback_url, 'bitrate', default=None)),
+ })
+
+ thumbnails = []
+ cuts = video_data.get('image', {}).get('cuts') or []
+ if isinstance(cuts, dict):
+ cuts = cuts.values()
+ for thumbnail_data in cuts:
+ thumbnail_url = thumbnail_data.get('src')
+ if not thumbnail_url:
+ continue
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'width': int_or_none(thumbnail_data.get('width')),
+ 'height': int_or_none(thumbnail_data.get('height')),
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video_data.get('description'),
+ 'timestamp': parse_iso8601(video_data.get('date')),
+ 'duration': parse_duration(video_data.get('duration')),
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ }
+
+
+class NHLIE(NHLBaseIE):
+ IE_NAME = 'nhl.com'
+ _VALID_URL = r'https?://(?:www\.)?(?P<site>nhl|wch2016)\.com/(?:[^/]+/)*c-(?P<id>\d+)'
+ _CONTENT_DOMAIN = 'nhl.bamcontent.com'
+ _TESTS = [{
+ # type=video
+ 'url': 'https://www.nhl.com/video/anisimov-cleans-up-mess/t-277752844/c-43663503',
+ 'md5': '0f7b9a8f986fb4b4eeeece9a56416eaf',
+ 'info_dict': {
+ 'id': '43663503',
+ 'ext': 'mp4',
+ 'title': 'Anisimov cleans up mess',
+ 'description': 'md5:a02354acdfe900e940ce40706939ca63',
+ 'timestamp': 1461288600,
+ 'upload_date': '20160422',
+ },
+ }, {
+ # type=article
+ 'url': 'https://www.nhl.com/news/dennis-wideman-suspended/c-278258934',
+ 'md5': '1f39f4ea74c1394dea110699a25b366c',
+ 'info_dict': {
+ 'id': '40784403',
+ 'ext': 'mp4',
+ 'title': 'Wideman suspended by NHL',
+ 'description': 'Flames defenseman Dennis Wideman was banned 20 games for violation of Rule 40 (Physical Abuse of Officials)',
+ 'upload_date': '20160204',
+ 'timestamp': 1454544904,
+ },
+ }, {
+ # Some m3u8 URLs are invalid (https://github.com/ytdl-org/youtube-dl/issues/10713)
+ 'url': 'https://www.nhl.com/predators/video/poile-laviolette-on-subban-trade/t-277437416/c-44315003',
+ 'md5': '50b2bb47f405121484dda3ccbea25459',
+ 'info_dict': {
+ 'id': '44315003',
+ 'ext': 'mp4',
+ 'title': 'Poile, Laviolette on Subban trade',
+ 'description': 'General manager David Poile and head coach Peter Laviolette share their thoughts on acquiring P.K. Subban from Montreal (06/29/16)',
+ 'timestamp': 1467242866,
+ 'upload_date': '20160629',
+ },
+ }, {
+ 'url': 'https://www.wch2016.com/video/caneur-best-of-game-2-micd-up/t-281230378/c-44983703',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.wch2016.com/news/3-stars-team-europe-vs-team-canada/c-282195068',
+ 'only_matching': True,
+ }]
diff --git a/yt_dlp/extractor/nick.py b/yt_dlp/extractor/nick.py
new file mode 100644
index 0000000..165d8ce
--- /dev/null
+++ b/yt_dlp/extractor/nick.py
@@ -0,0 +1,224 @@
+from .mtv import MTVServicesInfoExtractor
+from ..utils import update_url_query
+
+
+class NickIE(MTVServicesInfoExtractor):
+ IE_NAME = 'nick.com'
+ _VALID_URL = r'https?://(?P<domain>(?:www\.)?nick(?:jr)?\.com)/(?:[^/]+/)?(?P<type>videos/clip|[^/]+/videos|episodes/[^/]+)/(?P<id>[^/?#.]+)'
+ _FEED_URL = 'http://udat.mtvnservices.com/service1/dispatch.htm'
+ _GEO_COUNTRIES = ['US']
+ _TESTS = [{
+ 'url': 'https://www.nick.com/episodes/sq47rw/spongebob-squarepants-a-place-for-pets-lockdown-for-love-season-13-ep-1',
+ 'info_dict': {
+ 'description': 'md5:0650a9eb88955609d5c1d1c79292e234',
+ 'title': 'A Place for Pets/Lockdown for Love',
+ },
+ 'playlist': [
+ {
+ 'md5': 'cb8a2afeafb7ae154aca5a64815ec9d6',
+ 'info_dict': {
+ 'id': '85ee8177-d6ce-48f8-9eee-a65364f8a6df',
+ 'ext': 'mp4',
+ 'title': 'SpongeBob SquarePants: "A Place for Pets/Lockdown for Love" S1',
+ 'description': 'A Place for Pets/Lockdown for Love: When customers bring pets into the Krusty Krab, Mr. Krabs realizes pets are more profitable than owners. Plankton ruins another date with Karen, so she puts the Chum Bucket on lockdown until he proves his affection.',
+
+ }
+ },
+ {
+ 'md5': '839a04f49900a1fcbf517020d94e0737',
+ 'info_dict': {
+ 'id': '2e2a9960-8fd4-411d-868b-28eb1beb7fae',
+ 'ext': 'mp4',
+ 'title': 'SpongeBob SquarePants: "A Place for Pets/Lockdown for Love" S2',
+ 'description': 'A Place for Pets/Lockdown for Love: When customers bring pets into the Krusty Krab, Mr. Krabs realizes pets are more profitable than owners. Plankton ruins another date with Karen, so she puts the Chum Bucket on lockdown until he proves his affection.',
+
+ }
+ },
+ {
+ 'md5': 'f1145699f199770e2919ee8646955d46',
+ 'info_dict': {
+ 'id': 'dc91c304-6876-40f7-84a6-7aece7baa9d0',
+ 'ext': 'mp4',
+ 'title': 'SpongeBob SquarePants: "A Place for Pets/Lockdown for Love" S3',
+ 'description': 'A Place for Pets/Lockdown for Love: When customers bring pets into the Krusty Krab, Mr. Krabs realizes pets are more profitable than owners. Plankton ruins another date with Karen, so she puts the Chum Bucket on lockdown until he proves his affection.',
+
+ }
+ },
+ {
+ 'md5': 'd463116875aee2585ee58de3b12caebd',
+ 'info_dict': {
+ 'id': '5d929486-cf4c-42a1-889a-6e0d183a101a',
+ 'ext': 'mp4',
+ 'title': 'SpongeBob SquarePants: "A Place for Pets/Lockdown for Love" S4',
+ 'description': 'A Place for Pets/Lockdown for Love: When customers bring pets into the Krusty Krab, Mr. Krabs realizes pets are more profitable than owners. Plankton ruins another date with Karen, so she puts the Chum Bucket on lockdown until he proves his affection.',
+
+ }
+ },
+ ],
+ }, {
+ 'url': 'http://www.nickjr.com/blues-clues-and-you/videos/blues-clues-and-you-original-209-imagination-station/',
+ 'info_dict': {
+ 'id': '31631529-2fc5-430b-b2ef-6a74b4609abd',
+ 'ext': 'mp4',
+ 'description': 'md5:9d65a66df38e02254852794b2809d1cf',
+ 'title': 'Blue\'s Imagination Station',
+ },
+ 'skip': 'Not accessible?'
+ }]
+
+ def _get_feed_query(self, uri):
+ return {
+ 'feed': 'nick_arc_player_prime',
+ 'mgid': uri,
+ }
+
+ def _real_extract(self, url):
+ domain, video_type, display_id = self._match_valid_url(url).groups()
+ if video_type.startswith("episodes"):
+ return super()._real_extract(url)
+ video_data = self._download_json(
+ 'http://%s/data/video.endLevel.json' % domain,
+ display_id, query={
+ 'urlKey': display_id,
+ })
+ return self._get_videos_info(video_data['player'] + video_data['id'])
+
+
+class NickBrIE(MTVServicesInfoExtractor):
+ IE_NAME = 'nickelodeon:br'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?P<domain>(?:www\.)?nickjr|mundonick\.uol)\.com\.br|
+ (?:www\.)?nickjr\.[a-z]{2}|
+ (?:www\.)?nickelodeonjunior\.fr
+ )
+ /(?:programas/)?[^/]+/videos/(?:episodios/)?(?P<id>[^/?\#.]+)
+ '''
+ _TESTS = [{
+ 'url': 'http://www.nickjr.com.br/patrulha-canina/videos/210-labirinto-de-pipoca/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://mundonick.uol.com.br/programas/the-loud-house/videos/muitas-irmas/7ljo9j',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.nickjr.nl/paw-patrol/videos/311-ge-wol-dig-om-terug-te-zijn/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.nickjr.de/blaze-und-die-monster-maschinen/videos/f6caaf8f-e4e8-4cc1-b489-9380d6dcd059/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.nickelodeonjunior.fr/paw-patrol-la-pat-patrouille/videos/episode-401-entier-paw-patrol/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ domain, display_id = self._match_valid_url(url).groups()
+ webpage = self._download_webpage(url, display_id)
+ uri = self._search_regex(
+ r'data-(?:contenturi|mgid)="([^"]+)', webpage, 'mgid')
+ video_id = self._id_from_uri(uri)
+ config = self._download_json(
+ 'http://media.mtvnservices.com/pmt/e1/access/index.html',
+ video_id, query={
+ 'uri': uri,
+ 'configtype': 'edge',
+ }, headers={
+ 'Referer': url,
+ })
+ info_url = self._remove_template_parameter(config['feedWithQueryParams'])
+ if info_url == 'None':
+ if domain.startswith('www.'):
+ domain = domain[4:]
+ content_domain = {
+ 'mundonick.uol': 'mundonick.com.br',
+ 'nickjr': 'br.nickelodeonjunior.tv',
+ }[domain]
+ query = {
+ 'mgid': uri,
+ 'imageEp': content_domain,
+ 'arcEp': content_domain,
+ }
+ if domain == 'nickjr.com.br':
+ query['ep'] = 'c4b16088'
+ info_url = update_url_query(
+ 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed', query)
+ return self._get_videos_info_from_url(info_url, video_id)
+
+
+class NickDeIE(MTVServicesInfoExtractor):
+ IE_NAME = 'nick.de'
+ _VALID_URL = r'https?://(?:www\.)?(?P<host>nick\.(?:de|com\.pl|ch)|nickelodeon\.(?:nl|be|at|dk|no|se))/[^/]+/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'http://www.nick.de/playlist/3773-top-videos/videos/episode/17306-zu-wasser-und-zu-land-rauchende-erdnusse',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.nick.de/shows/342-icarly',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.nickelodeon.nl/shows/474-spongebob/videos/17403-een-kijkje-in-de-keuken-met-sandy-van-binnenuit',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.nickelodeon.at/playlist/3773-top-videos/videos/episode/77993-das-letzte-gefecht',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.nick.com.pl/seriale/474-spongebob-kanciastoporty/wideo/17412-teatr-to-jest-to-rodeo-oszolom',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.nickelodeon.no/program/2626-bulderhuset/videoer/90947-femteklasse-veronica-vs-vanzilla',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.nickelodeon.dk/serier/2626-hojs-hus/videoer/761-tissepause',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.nickelodeon.se/serier/2626-lugn-i-stormen/videos/998-',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.nick.ch/shows/2304-adventure-time-abenteuerzeit-mit-finn-und-jake',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.nickelodeon.be/afspeellijst/4530-top-videos/videos/episode/73917-inval-broodschapper-lariekoek-arie',
+ 'only_matching': True,
+ }]
+
+ def _get_feed_url(self, uri, url=None):
+ video_id = self._id_from_uri(uri)
+ config = self._download_json(
+ 'http://media.mtvnservices.com/pmt/e1/access/index.html?uri=%s&configtype=edge&ref=%s' % (uri, url), video_id)
+ return self._remove_template_parameter(config['feedWithQueryParams'])
+
+
+class NickRuIE(MTVServicesInfoExtractor):
+ IE_NAME = 'nickelodeonru'
+ _VALID_URL = r'https?://(?:www\.)nickelodeon\.(?:ru|fr|es|pt|ro|hu|com\.tr)/[^/]+/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'http://www.nickelodeon.ru/shows/henrydanger/videos/episodes/3-sezon-15-seriya-licenziya-na-polyot/pmomfb#playlist/7airc6',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.nickelodeon.ru/videos/smotri-na-nickelodeon-v-iyule/g9hvh7',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.nickelodeon.fr/programmes/bob-l-eponge/videos/le-marathon-de-booh-kini-bottom-mardi-31-octobre/nfn7z0',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.nickelodeon.es/videos/nickelodeon-consejos-tortitas/f7w7xy',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.nickelodeon.pt/series/spongebob-squarepants/videos/a-bolha-de-tinta-gigante/xutq1b',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.nickelodeon.ro/emisiuni/shimmer-si-shine/video/nahal-din-bomboane/uw5u2k',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.nickelodeon.hu/musorok/spongyabob-kockanadrag/videok/episodes/buborekfujas-az-elszakadt-nadrag/q57iob#playlist/k6te4y',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.nickelodeon.com.tr/programlar/sunger-bob/videolar/kayip-yatak/mgqbjy',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ mgid = self._extract_mgid(webpage, url)
+ return self.url_result('http://media.mtvnservices.com/embed/%s' % mgid)
diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py
new file mode 100644
index 0000000..6a46246
--- /dev/null
+++ b/yt_dlp/extractor/niconico.py
@@ -0,0 +1,1061 @@
+import datetime
+import functools
+import itertools
+import json
+import re
+import time
+
+from urllib.parse import urlparse
+
+from .common import InfoExtractor, SearchInfoExtractor
+from ..networking import Request
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ ExtractorError,
+ OnDemandPagedList,
+ clean_html,
+ float_or_none,
+ int_or_none,
+ join_nonempty,
+ parse_duration,
+ parse_iso8601,
+ parse_resolution,
+ qualities,
+ remove_start,
+ str_or_none,
+ traverse_obj,
+ try_get,
+ unescapeHTML,
+ update_url_query,
+ url_or_none,
+ urlencode_postdata,
+ urljoin,
+)
+
+
+class NiconicoIE(InfoExtractor):
+ IE_NAME = 'niconico'
+ IE_DESC = 'ニコニコ動画'
+ _GEO_COUNTRIES = ['JP']
+ _GEO_BYPASS = False
+
+ _TESTS = [{
+ 'url': 'http://www.nicovideo.jp/watch/sm22312215',
+ 'md5': 'd1a75c0823e2f629128c43e1212760f9',
+ 'info_dict': {
+ 'id': 'sm22312215',
+ 'ext': 'mp4',
+ 'title': 'Big Buck Bunny',
+ 'thumbnail': r're:https?://.*',
+ 'uploader': 'takuya0301',
+ 'uploader_id': '2698420',
+ 'upload_date': '20131123',
+ 'timestamp': int, # timestamp is unstable
+ 'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
+ 'duration': 33,
+ 'view_count': int,
+ 'comment_count': int,
+ 'genres': ['未設定'],
+ 'tags': [],
+ 'expected_protocol': str,
+ },
+ }, {
+ # File downloaded with and without credentials are different, so omit
+ # the md5 field
+ 'url': 'http://www.nicovideo.jp/watch/nm14296458',
+ 'info_dict': {
+ 'id': 'nm14296458',
+ 'ext': 'mp4',
+ 'title': '【Kagamine Rin】Dance on media【Original】take2!',
+ 'description': 'md5:9368f2b1f4178de64f2602c2f3d6cbf5',
+ 'thumbnail': r're:https?://.*',
+ 'uploader': 'りょうた',
+ 'uploader_id': '18822557',
+ 'upload_date': '20110429',
+ 'timestamp': 1304065916,
+ 'duration': 208.0,
+ 'comment_count': int,
+ 'view_count': int,
+ 'genres': ['音楽・サウンド'],
+ 'tags': ['Translation_Request', 'Kagamine_Rin', 'Rin_Original'],
+ 'expected_protocol': str,
+ },
+ }, {
+ # 'video exists but is marked as "deleted"
+ # md5 is unstable
+ 'url': 'http://www.nicovideo.jp/watch/sm10000',
+ 'info_dict': {
+ 'id': 'sm10000',
+ 'ext': 'unknown_video',
+ 'description': 'deleted',
+ 'title': 'ドラえもんエターナル第3話「決戦第3新東京市」<前編>',
+ 'thumbnail': r're:https?://.*',
+ 'upload_date': '20071224',
+ 'timestamp': int, # timestamp field has different value if logged in
+ 'duration': 304,
+ 'view_count': int,
+ },
+ 'skip': 'Requires an account',
+ }, {
+ 'url': 'http://www.nicovideo.jp/watch/so22543406',
+ 'info_dict': {
+ 'id': '1388129933',
+ 'ext': 'mp4',
+ 'title': '【第1回】RADIOアニメロミックス ラブライブ!~のぞえりRadio Garden~',
+ 'description': 'md5:b27d224bb0ff53d3c8269e9f8b561cf1',
+ 'thumbnail': r're:https?://.*',
+ 'timestamp': 1388851200,
+ 'upload_date': '20140104',
+ 'uploader': 'アニメロチャンネル',
+ 'uploader_id': '312',
+ },
+ 'skip': 'The viewing period of the video you were searching for has expired.',
+ }, {
+ # video not available via `getflv`; "old" HTML5 video
+ 'url': 'http://www.nicovideo.jp/watch/sm1151009',
+ 'md5': 'f95a3d259172667b293530cc2e41ebda',
+ 'info_dict': {
+ 'id': 'sm1151009',
+ 'ext': 'mp4',
+ 'title': 'マスターシステム本体内蔵のスペハリのメインテーマ(PSG版)',
+ 'description': 'md5:f95a3d259172667b293530cc2e41ebda',
+ 'thumbnail': r're:https?://.*',
+ 'duration': 184,
+ 'timestamp': 1190835883,
+ 'upload_date': '20070926',
+ 'uploader': 'denden2',
+ 'uploader_id': '1392194',
+ 'view_count': int,
+ 'comment_count': int,
+ 'genres': ['ゲーム'],
+ 'tags': [],
+ 'expected_protocol': str,
+ },
+ }, {
+ # "New" HTML5 video
+ # md5 is unstable
+ 'url': 'http://www.nicovideo.jp/watch/sm31464864',
+ 'info_dict': {
+ 'id': 'sm31464864',
+ 'ext': 'mp4',
+ 'title': '新作TVアニメ「戦姫絶唱シンフォギアAXZ」PV 最高画質',
+ 'description': 'md5:e52974af9a96e739196b2c1ca72b5feb',
+ 'timestamp': 1498481660,
+ 'upload_date': '20170626',
+ 'uploader': 'no-namamae',
+ 'uploader_id': '40826363',
+ 'thumbnail': r're:https?://.*',
+ 'duration': 198,
+ 'view_count': int,
+ 'comment_count': int,
+ 'genres': ['アニメ'],
+ 'tags': [],
+ 'expected_protocol': str,
+ },
+ }, {
+ # Video without owner
+ 'url': 'http://www.nicovideo.jp/watch/sm18238488',
+ 'md5': 'd265680a1f92bdcbbd2a507fc9e78a9e',
+ 'info_dict': {
+ 'id': 'sm18238488',
+ 'ext': 'mp4',
+ 'title': '【実写版】ミュータントタートルズ',
+ 'description': 'md5:15df8988e47a86f9e978af2064bf6d8e',
+ 'timestamp': 1341128008,
+ 'upload_date': '20120701',
+ 'thumbnail': r're:https?://.*',
+ 'duration': 5271,
+ 'view_count': int,
+ 'comment_count': int,
+ 'genres': ['エンターテイメント'],
+ 'tags': [],
+ 'expected_protocol': str,
+ },
+ }, {
+ 'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg',
+ 'only_matching': True,
+ }, {
+ 'note': 'a video that is only served as an ENCRYPTED HLS.',
+ 'url': 'https://www.nicovideo.jp/watch/so38016254',
+ 'only_matching': True,
+ }]
+
+ _VALID_URL = r'https?://(?:(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch|nico\.ms)/(?P<id>(?:[a-z]{2})?[0-9]+)'
+ _NETRC_MACHINE = 'niconico'
+ _API_HEADERS = {
+ 'X-Frontend-ID': '6',
+ 'X-Frontend-Version': '0',
+ 'X-Niconico-Language': 'en-us',
+ 'Referer': 'https://www.nicovideo.jp/',
+ 'Origin': 'https://www.nicovideo.jp',
+ }
+
+ def _perform_login(self, username, password):
+ login_ok = True
+ login_form_strs = {
+ 'mail_tel': username,
+ 'password': password,
+ }
+ self._request_webpage(
+ 'https://account.nicovideo.jp/login', None,
+ note='Acquiring Login session')
+ page = self._download_webpage(
+ 'https://account.nicovideo.jp/login/redirector?show_button_twitter=1&site=niconico&show_button_facebook=1', None,
+ note='Logging in', errnote='Unable to log in',
+ data=urlencode_postdata(login_form_strs),
+ headers={
+ 'Referer': 'https://account.nicovideo.jp/login',
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ })
+ if 'oneTimePw' in page:
+ post_url = self._search_regex(
+ r'<form[^>]+action=(["\'])(?P<url>.+?)\1', page, 'post url', group='url')
+ page = self._download_webpage(
+ urljoin('https://account.nicovideo.jp', post_url), None,
+ note='Performing MFA', errnote='Unable to complete MFA',
+ data=urlencode_postdata({
+ 'otp': self._get_tfa_info('6 digits code')
+ }), headers={
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ })
+ if 'oneTimePw' in page or 'formError' in page:
+ err_msg = self._html_search_regex(
+ r'formError["\']+>(.*?)</div>', page, 'form_error',
+ default='There\'s an error but the message can\'t be parsed.',
+ flags=re.DOTALL)
+ self.report_warning(f'Unable to log in: MFA challenge failed, "{err_msg}"')
+ return False
+ login_ok = 'class="notice error"' not in page
+ if not login_ok:
+ self.report_warning('Unable to log in: bad username or password')
+ return login_ok
+
+ def _get_heartbeat_info(self, info_dict):
+ video_id, video_src_id, audio_src_id = info_dict['url'].split(':')[1].split('/')
+ dmc_protocol = info_dict['expected_protocol']
+
+ api_data = (
+ info_dict.get('_api_data')
+ or self._parse_json(
+ self._html_search_regex(
+ 'data-api-data="([^"]+)"',
+ self._download_webpage('https://www.nicovideo.jp/watch/' + video_id, video_id),
+ 'API data', default='{}'),
+ video_id))
+
+ session_api_data = try_get(api_data, lambda x: x['media']['delivery']['movie']['session'])
+ session_api_endpoint = try_get(session_api_data, lambda x: x['urls'][0])
+
+ def ping():
+ tracking_id = traverse_obj(api_data, ('media', 'delivery', 'trackingId'))
+ if tracking_id:
+ tracking_url = update_url_query('https://nvapi.nicovideo.jp/v1/2ab0cbaa/watch', {'t': tracking_id})
+ watch_request_response = self._download_json(
+ tracking_url, video_id,
+ note='Acquiring permission for downloading video', fatal=False,
+ headers=self._API_HEADERS)
+ if traverse_obj(watch_request_response, ('meta', 'status')) != 200:
+ self.report_warning('Failed to acquire permission for playing video. Video download may fail.')
+
+ yesno = lambda x: 'yes' if x else 'no'
+
+ if dmc_protocol == 'http':
+ protocol = 'http'
+ protocol_parameters = {
+ 'http_output_download_parameters': {
+ 'use_ssl': yesno(session_api_data['urls'][0]['isSsl']),
+ 'use_well_known_port': yesno(session_api_data['urls'][0]['isWellKnownPort']),
+ }
+ }
+ elif dmc_protocol == 'hls':
+ protocol = 'm3u8'
+ segment_duration = try_get(self._configuration_arg('segment_duration'), lambda x: int(x[0])) or 6000
+ parsed_token = self._parse_json(session_api_data['token'], video_id)
+ encryption = traverse_obj(api_data, ('media', 'delivery', 'encryption'))
+ protocol_parameters = {
+ 'hls_parameters': {
+ 'segment_duration': segment_duration,
+ 'transfer_preset': '',
+ 'use_ssl': yesno(session_api_data['urls'][0]['isSsl']),
+ 'use_well_known_port': yesno(session_api_data['urls'][0]['isWellKnownPort']),
+ }
+ }
+ if 'hls_encryption' in parsed_token and encryption:
+ protocol_parameters['hls_parameters']['encryption'] = {
+ parsed_token['hls_encryption']: {
+ 'encrypted_key': encryption['encryptedKey'],
+ 'key_uri': encryption['keyUri'],
+ }
+ }
+ else:
+ protocol = 'm3u8_native'
+ else:
+ raise ExtractorError(f'Unsupported DMC protocol: {dmc_protocol}')
+
+ session_response = self._download_json(
+ session_api_endpoint['url'], video_id,
+ query={'_format': 'json'},
+ headers={'Content-Type': 'application/json'},
+ note='Downloading JSON metadata for %s' % info_dict['format_id'],
+ data=json.dumps({
+ 'session': {
+ 'client_info': {
+ 'player_id': session_api_data.get('playerId'),
+ },
+ 'content_auth': {
+ 'auth_type': try_get(session_api_data, lambda x: x['authTypes'][session_api_data['protocols'][0]]),
+ 'content_key_timeout': session_api_data.get('contentKeyTimeout'),
+ 'service_id': 'nicovideo',
+ 'service_user_id': session_api_data.get('serviceUserId')
+ },
+ 'content_id': session_api_data.get('contentId'),
+ 'content_src_id_sets': [{
+ 'content_src_ids': [{
+ 'src_id_to_mux': {
+ 'audio_src_ids': [audio_src_id],
+ 'video_src_ids': [video_src_id],
+ }
+ }]
+ }],
+ 'content_type': 'movie',
+ 'content_uri': '',
+ 'keep_method': {
+ 'heartbeat': {
+ 'lifetime': session_api_data.get('heartbeatLifetime')
+ }
+ },
+ 'priority': session_api_data['priority'],
+ 'protocol': {
+ 'name': 'http',
+ 'parameters': {
+ 'http_parameters': {
+ 'parameters': protocol_parameters
+ }
+ }
+ },
+ 'recipe_id': session_api_data.get('recipeId'),
+ 'session_operation_auth': {
+ 'session_operation_auth_by_signature': {
+ 'signature': session_api_data.get('signature'),
+ 'token': session_api_data.get('token'),
+ }
+ },
+ 'timing_constraint': 'unlimited'
+ }
+ }).encode())
+
+ info_dict['url'] = session_response['data']['session']['content_uri']
+ info_dict['protocol'] = protocol
+
+ # get heartbeat info
+ heartbeat_info_dict = {
+ 'url': session_api_endpoint['url'] + '/' + session_response['data']['session']['id'] + '?_format=json&_method=PUT',
+ 'data': json.dumps(session_response['data']),
+ # interval, convert milliseconds to seconds, then halve to make a buffer.
+ 'interval': float_or_none(session_api_data.get('heartbeatLifetime'), scale=3000),
+ 'ping': ping
+ }
+
+ return info_dict, heartbeat_info_dict
+
+ def _extract_format_for_quality(self, video_id, audio_quality, video_quality, dmc_protocol):
+
+ if not audio_quality.get('isAvailable') or not video_quality.get('isAvailable'):
+ return None
+
+ format_id = '-'.join(
+ [remove_start(s['id'], 'archive_') for s in (video_quality, audio_quality)] + [dmc_protocol])
+
+ vid_qual_label = traverse_obj(video_quality, ('metadata', 'label'))
+
+ return {
+ 'url': 'niconico_dmc:%s/%s/%s' % (video_id, video_quality['id'], audio_quality['id']),
+ 'format_id': format_id,
+ 'format_note': join_nonempty('DMC', vid_qual_label, dmc_protocol.upper(), delim=' '),
+ 'ext': 'mp4', # Session API are used in HTML5, which always serves mp4
+ 'acodec': 'aac',
+ 'vcodec': 'h264',
+ **traverse_obj(audio_quality, ('metadata', {
+ 'abr': ('bitrate', {functools.partial(float_or_none, scale=1000)}),
+ 'asr': ('samplingRate', {int_or_none}),
+ })),
+ **traverse_obj(video_quality, ('metadata', {
+ 'vbr': ('bitrate', {functools.partial(float_or_none, scale=1000)}),
+ 'height': ('resolution', 'height', {int_or_none}),
+ 'width': ('resolution', 'width', {int_or_none}),
+ })),
+ 'quality': -2 if 'low' in video_quality['id'] else None,
+ 'protocol': 'niconico_dmc',
+ 'expected_protocol': dmc_protocol, # XXX: This is not a documented field
+ 'http_headers': {
+ 'Origin': 'https://www.nicovideo.jp',
+ 'Referer': 'https://www.nicovideo.jp/watch/' + video_id,
+ }
+ }
+
+ def _yield_dmc_formats(self, api_data, video_id):
+ dmc_data = traverse_obj(api_data, ('media', 'delivery', 'movie'))
+ audios = traverse_obj(dmc_data, ('audios', ..., {dict}))
+ videos = traverse_obj(dmc_data, ('videos', ..., {dict}))
+ protocols = traverse_obj(dmc_data, ('session', 'protocols', ..., {str}))
+ if not all((audios, videos, protocols)):
+ return
+
+ for audio_quality, video_quality, protocol in itertools.product(audios, videos, protocols):
+ if fmt := self._extract_format_for_quality(video_id, audio_quality, video_quality, protocol):
+ yield fmt
+
+ def _yield_dms_formats(self, api_data, video_id):
+ fmt_filter = lambda _, v: v['isAvailable'] and v['id']
+ videos = traverse_obj(api_data, ('media', 'domand', 'videos', fmt_filter))
+ audios = traverse_obj(api_data, ('media', 'domand', 'audios', fmt_filter))
+ access_key = traverse_obj(api_data, ('media', 'domand', 'accessRightKey', {str}))
+ track_id = traverse_obj(api_data, ('client', 'watchTrackId', {str}))
+ if not all((videos, audios, access_key, track_id)):
+ return
+
+ dms_m3u8_url = self._download_json(
+ f'https://nvapi.nicovideo.jp/v1/watch/{video_id}/access-rights/hls', video_id,
+ data=json.dumps({
+ 'outputs': list(itertools.product((v['id'] for v in videos), (a['id'] for a in audios)))
+ }).encode(), query={'actionTrackId': track_id}, headers={
+ 'x-access-right-key': access_key,
+ 'x-frontend-id': 6,
+ 'x-frontend-version': 0,
+ 'x-request-with': 'https://www.nicovideo.jp',
+ })['data']['contentUrl']
+ # Getting all audio formats results in duplicate video formats which we filter out later
+ dms_fmts = self._extract_m3u8_formats(dms_m3u8_url, video_id)
+
+ # m3u8 extraction does not provide audio bitrates, so extract from the API data and fix
+ for audio_fmt in traverse_obj(dms_fmts, lambda _, v: v['vcodec'] == 'none'):
+ yield {
+ **audio_fmt,
+ **traverse_obj(audios, (lambda _, v: audio_fmt['format_id'].startswith(v['id']), {
+ 'format_id': ('id', {str}),
+ 'abr': ('bitRate', {functools.partial(float_or_none, scale=1000)}),
+ 'asr': ('samplingRate', {int_or_none}),
+ }), get_all=False),
+ 'acodec': 'aac',
+ 'ext': 'm4a',
+ }
+
+ # Sort before removing dupes to keep the format dicts with the lowest tbr
+ video_fmts = sorted((fmt for fmt in dms_fmts if fmt['vcodec'] != 'none'), key=lambda f: f['tbr'])
+ self._remove_duplicate_formats(video_fmts)
+ # Calculate the true vbr/tbr by subtracting the lowest abr
+ min_abr = min(traverse_obj(audios, (..., 'bitRate', {float_or_none})), default=0) / 1000
+ for video_fmt in video_fmts:
+ video_fmt['tbr'] -= min_abr
+ video_fmt['format_id'] = f'video-{video_fmt["tbr"]:.0f}'
+ yield video_fmt
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ try:
+ webpage, handle = self._download_webpage_handle(
+ 'https://www.nicovideo.jp/watch/' + video_id, video_id)
+ if video_id.startswith('so'):
+ video_id = self._match_id(handle.url)
+
+ api_data = self._parse_json(self._html_search_regex(
+ 'data-api-data="([^"]+)"', webpage,
+ 'API data', default='{}'), video_id)
+ except ExtractorError as e:
+ try:
+ api_data = self._download_json(
+ 'https://www.nicovideo.jp/api/watch/v3/%s?_frontendId=6&_frontendVersion=0&actionTrackId=AAAAAAAAAA_%d' % (video_id, round(time.time() * 1000)), video_id,
+ note='Downloading API JSON', errnote='Unable to fetch data')['data']
+ except ExtractorError:
+ if not isinstance(e.cause, HTTPError):
+ raise
+ webpage = e.cause.response.read().decode('utf-8', 'replace')
+ error_msg = self._html_search_regex(
+ r'(?s)<section\s+class="(?:(?:ErrorMessage|WatchExceptionPage-message)\s*)+">(.+?)</section>',
+ webpage, 'error reason', default=None)
+ if not error_msg:
+ raise
+ raise ExtractorError(clean_html(error_msg), expected=True)
+
+ availability = self._availability(**(traverse_obj(api_data, ('payment', 'video', {
+ 'needs_premium': ('isPremium', {bool}),
+ 'needs_subscription': ('isAdmission', {bool}),
+ })) or {'needs_auth': True}))
+ formats = [*self._yield_dmc_formats(api_data, video_id),
+ *self._yield_dms_formats(api_data, video_id)]
+ if not formats:
+ fail_msg = clean_html(self._html_search_regex(
+ r'<p[^>]+\bclass="fail-message"[^>]*>(?P<msg>.+?)</p>',
+ webpage, 'fail message', default=None, group='msg'))
+ if fail_msg:
+ self.to_screen(f'Niconico said: {fail_msg}')
+ if fail_msg and 'された地域と同じ地域からのみ視聴できます。' in fail_msg:
+ availability = None
+ self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
+ elif availability == 'premium_only':
+ self.raise_login_required('This video requires premium', metadata_available=True)
+ elif availability == 'subscriber_only':
+ self.raise_login_required('This video is for members only', metadata_available=True)
+ elif availability == 'needs_auth':
+ self.raise_login_required(metadata_available=False)
+
+ # Start extracting information
+ tags = None
+ if webpage:
+ # use og:video:tag (not logged in)
+ og_video_tags = re.finditer(r'<meta\s+property="og:video:tag"\s*content="(.*?)">', webpage)
+ tags = list(filter(None, (clean_html(x.group(1)) for x in og_video_tags)))
+ if not tags:
+ # use keywords and split with comma (not logged in)
+ kwds = self._html_search_meta('keywords', webpage, default=None)
+ if kwds:
+ tags = [x for x in kwds.split(',') if x]
+ if not tags:
+ # find in json (logged in)
+ tags = traverse_obj(api_data, ('tag', 'items', ..., 'name'))
+
+ thumb_prefs = qualities(['url', 'middleUrl', 'largeUrl', 'player', 'ogp'])
+
+ def get_video_info(*items, get_first=True, **kwargs):
+ return traverse_obj(api_data, ('video', *items), get_all=not get_first, **kwargs)
+
+ return {
+ 'id': video_id,
+ '_api_data': api_data,
+ 'title': get_video_info(('originalTitle', 'title')) or self._og_search_title(webpage, default=None),
+ 'formats': formats,
+ 'availability': availability,
+ 'thumbnails': [{
+ 'id': key,
+ 'url': url,
+ 'ext': 'jpg',
+ 'preference': thumb_prefs(key),
+ **parse_resolution(url, lenient=True),
+ } for key, url in (get_video_info('thumbnail') or {}).items() if url],
+ 'description': clean_html(get_video_info('description')),
+ 'uploader': traverse_obj(api_data, ('owner', 'nickname'), ('channel', 'name'), ('community', 'name')),
+ 'uploader_id': str_or_none(traverse_obj(api_data, ('owner', 'id'), ('channel', 'id'), ('community', 'id'))),
+ 'timestamp': parse_iso8601(get_video_info('registeredAt')) or parse_iso8601(
+ self._html_search_meta('video:release_date', webpage, 'date published', default=None)),
+ 'channel': traverse_obj(api_data, ('channel', 'name'), ('community', 'name')),
+ 'channel_id': traverse_obj(api_data, ('channel', 'id'), ('community', 'id')),
+ 'view_count': int_or_none(get_video_info('count', 'view')),
+ 'tags': tags,
+ 'genre': traverse_obj(api_data, ('genre', 'label'), ('genre', 'key')),
+ 'comment_count': get_video_info('count', 'comment', expected_type=int),
+ 'duration': (
+ parse_duration(self._html_search_meta('video:duration', webpage, 'video duration', default=None))
+ or get_video_info('duration')),
+ 'webpage_url': url_or_none(url) or f'https://www.nicovideo.jp/watch/{video_id}',
+ 'subtitles': self.extract_subtitles(video_id, api_data),
+ }
+
+ def _get_subtitles(self, video_id, api_data):
+ comments_info = traverse_obj(api_data, ('comment', 'nvComment', {dict})) or {}
+ if not comments_info.get('server'):
+ return
+
+ danmaku = traverse_obj(self._download_json(
+ f'{comments_info["server"]}/v1/threads', video_id, data=json.dumps({
+ 'additionals': {},
+ 'params': comments_info.get('params'),
+ 'threadKey': comments_info.get('threadKey'),
+ }).encode(), fatal=False,
+ headers={
+ 'Referer': 'https://www.nicovideo.jp/',
+ 'Origin': 'https://www.nicovideo.jp',
+ 'Content-Type': 'text/plain;charset=UTF-8',
+ 'x-client-os-type': 'others',
+ 'x-frontend-id': '6',
+ 'x-frontend-version': '0',
+ },
+ note='Downloading comments', errnote='Failed to download comments'),
+ ('data', 'threads', ..., 'comments', ...))
+
+ return {
+ 'comments': [{
+ 'ext': 'json',
+ 'data': json.dumps(danmaku),
+ }],
+ }
+
+
+class NiconicoPlaylistBaseIE(InfoExtractor):
+ _PAGE_SIZE = 100
+
+ _API_HEADERS = {
+ 'X-Frontend-ID': '6',
+ 'X-Frontend-Version': '0',
+ 'X-Niconico-Language': 'en-us'
+ }
+
+ def _call_api(self, list_id, resource, query):
+ raise NotImplementedError('Must be implemented in subclasses')
+
+ @staticmethod
+ def _parse_owner(item):
+ return {
+ 'uploader': traverse_obj(item, ('owner', 'name')),
+ 'uploader_id': traverse_obj(item, ('owner', 'id')),
+ }
+
+ def _fetch_page(self, list_id, page):
+ page += 1
+ resp = self._call_api(list_id, 'page %d' % page, {
+ 'page': page,
+ 'pageSize': self._PAGE_SIZE,
+ })
+ # this is needed to support both mylist and user
+ for video in traverse_obj(resp, ('items', ..., ('video', None))) or []:
+ video_id = video.get('id')
+ if not video_id:
+ # skip {"video": {"id": "blablabla", ...}}
+ continue
+ count = video.get('count') or {}
+ get_count = lambda x: int_or_none(count.get(x))
+ yield {
+ '_type': 'url',
+ 'id': video_id,
+ 'title': video.get('title'),
+ 'url': f'https://www.nicovideo.jp/watch/{video_id}',
+ 'description': video.get('shortDescription'),
+ 'duration': int_or_none(video.get('duration')),
+ 'view_count': get_count('view'),
+ 'comment_count': get_count('comment'),
+ 'thumbnail': traverse_obj(video, ('thumbnail', ('nHdUrl', 'largeUrl', 'listingUrl', 'url'))),
+ 'ie_key': NiconicoIE.ie_key(),
+ **self._parse_owner(video),
+ }
+
+ def _entries(self, list_id):
+ return OnDemandPagedList(functools.partial(self._fetch_page, list_id), self._PAGE_SIZE)
+
+
+class NiconicoPlaylistIE(NiconicoPlaylistBaseIE):
+ IE_NAME = 'niconico:playlist'
+ _VALID_URL = r'https?://(?:(?:www\.|sp\.)?nicovideo\.jp|nico\.ms)/(?:user/\d+/)?(?:my/)?mylist/(?:#/)?(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'http://www.nicovideo.jp/mylist/27411728',
+ 'info_dict': {
+ 'id': '27411728',
+ 'title': 'AKB48のオールナイトニッポン',
+ 'description': 'md5:d89694c5ded4b6c693dea2db6e41aa08',
+ 'uploader': 'のっく',
+ 'uploader_id': '805442',
+ },
+ 'playlist_mincount': 291,
+ }, {
+ 'url': 'https://www.nicovideo.jp/user/805442/mylist/27411728',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.nicovideo.jp/my/mylist/#/68048635',
+ 'only_matching': True,
+ }]
+
+ def _call_api(self, list_id, resource, query):
+ return self._download_json(
+ f'https://nvapi.nicovideo.jp/v2/mylists/{list_id}', list_id,
+ f'Downloading {resource}', query=query,
+ headers=self._API_HEADERS)['data']['mylist']
+
+ def _real_extract(self, url):
+ list_id = self._match_id(url)
+ mylist = self._call_api(list_id, 'list', {
+ 'pageSize': 1,
+ })
+ return self.playlist_result(
+ self._entries(list_id), list_id,
+ mylist.get('name'), mylist.get('description'), **self._parse_owner(mylist))
+
+
+class NiconicoSeriesIE(InfoExtractor):
+ IE_NAME = 'niconico:series'
+ _VALID_URL = r'https?://(?:(?:www\.|sp\.)?nicovideo\.jp(?:/user/\d+)?|nico\.ms)/series/(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'https://www.nicovideo.jp/user/44113208/series/110226',
+ 'info_dict': {
+ 'id': '110226',
+ 'title': 'ご立派ァ!のシリーズ',
+ },
+ 'playlist_mincount': 10,
+ }, {
+ 'url': 'https://www.nicovideo.jp/series/12312/',
+ 'info_dict': {
+ 'id': '12312',
+ 'title': 'バトルスピリッツ お勧めカード紹介(調整中)',
+ },
+ 'playlist_mincount': 103,
+ }, {
+ 'url': 'https://nico.ms/series/203559',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ list_id = self._match_id(url)
+ webpage = self._download_webpage(url, list_id)
+
+ title = self._search_regex(
+ (r'<title>「(.+)(全',
+ r'<div class="TwitterShareButton"\s+data-text="(.+)\s+https:'),
+ webpage, 'title', fatal=False)
+ if title:
+ title = unescapeHTML(title)
+ json_data = next(self._yield_json_ld(webpage, None, fatal=False))
+ return self.playlist_from_matches(
+ traverse_obj(json_data, ('itemListElement', ..., 'url')), list_id, title, ie=NiconicoIE)
+
+
+class NiconicoHistoryIE(NiconicoPlaylistBaseIE):
+ IE_NAME = 'niconico:history'
+ IE_DESC = 'NicoNico user history or likes. Requires cookies.'
+ _VALID_URL = r'https?://(?:www\.|sp\.)?nicovideo\.jp/my/(?P<id>history(?:/like)?)'
+
+ _TESTS = [{
+ 'note': 'PC page, with /video',
+ 'url': 'https://www.nicovideo.jp/my/history/video',
+ 'only_matching': True,
+ }, {
+ 'note': 'PC page, without /video',
+ 'url': 'https://www.nicovideo.jp/my/history',
+ 'only_matching': True,
+ }, {
+ 'note': 'mobile page, with /video',
+ 'url': 'https://sp.nicovideo.jp/my/history/video',
+ 'only_matching': True,
+ }, {
+ 'note': 'mobile page, without /video',
+ 'url': 'https://sp.nicovideo.jp/my/history',
+ 'only_matching': True,
+ }, {
+ 'note': 'PC page',
+ 'url': 'https://www.nicovideo.jp/my/history/like',
+ 'only_matching': True,
+ }, {
+ 'note': 'Mobile page',
+ 'url': 'https://sp.nicovideo.jp/my/history/like',
+ 'only_matching': True,
+ }]
+
+ def _call_api(self, list_id, resource, query):
+ path = 'likes' if list_id == 'history/like' else 'watch/history'
+ return self._download_json(
+ f'https://nvapi.nicovideo.jp/v1/users/me/{path}', list_id,
+ f'Downloading {resource}', query=query, headers=self._API_HEADERS)['data']
+
+ def _real_extract(self, url):
+ list_id = self._match_id(url)
+ try:
+ mylist = self._call_api(list_id, 'list', {'pageSize': 1})
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 401:
+ self.raise_login_required('You have to be logged in to get your history')
+ raise
+ return self.playlist_result(self._entries(list_id), list_id, **self._parse_owner(mylist))
+
+
+class NicovideoSearchBaseIE(InfoExtractor):
+ _SEARCH_TYPE = 'search'
+
+ def _entries(self, url, item_id, query=None, note='Downloading page %(page)s'):
+ query = query or {}
+ pages = [query['page']] if 'page' in query else itertools.count(1)
+ for page_num in pages:
+ query['page'] = str(page_num)
+ webpage = self._download_webpage(url, item_id, query=query, note=note % {'page': page_num})
+ results = re.findall(r'(?<=data-video-id=)["\']?(?P<videoid>.*?)(?=["\'])', webpage)
+ for item in results:
+ yield self.url_result(f'https://www.nicovideo.jp/watch/{item}', 'Niconico', item)
+ if not results:
+ break
+
+ def _search_results(self, query):
+ return self._entries(
+ self._proto_relative_url(f'//www.nicovideo.jp/{self._SEARCH_TYPE}/{query}'), query)
+
+
+class NicovideoSearchIE(NicovideoSearchBaseIE, SearchInfoExtractor):
+ IE_DESC = 'Nico video search'
+ IE_NAME = 'nicovideo:search'
+ _SEARCH_KEY = 'nicosearch'
+
+
+class NicovideoSearchURLIE(NicovideoSearchBaseIE):
+ IE_NAME = f'{NicovideoSearchIE.IE_NAME}_url'
+ IE_DESC = 'Nico video search URLs'
+ _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/search/(?P<id>[^?#&]+)?'
+ _TESTS = [{
+ 'url': 'http://www.nicovideo.jp/search/sm9',
+ 'info_dict': {
+ 'id': 'sm9',
+ 'title': 'sm9'
+ },
+ 'playlist_mincount': 40,
+ }, {
+ 'url': 'https://www.nicovideo.jp/search/sm9?sort=h&order=d&end=2020-12-31&start=2020-01-01',
+ 'info_dict': {
+ 'id': 'sm9',
+ 'title': 'sm9'
+ },
+ 'playlist_count': 31,
+ }]
+
+ def _real_extract(self, url):
+ query = self._match_id(url)
+ return self.playlist_result(self._entries(url, query), query, query)
+
+
+class NicovideoSearchDateIE(NicovideoSearchBaseIE, SearchInfoExtractor):
+ IE_DESC = 'Nico video search, newest first'
+ IE_NAME = f'{NicovideoSearchIE.IE_NAME}:date'
+ _SEARCH_KEY = 'nicosearchdate'
+ _TESTS = [{
+ 'url': 'nicosearchdateall:a',
+ 'info_dict': {
+ 'id': 'a',
+ 'title': 'a'
+ },
+ 'playlist_mincount': 1610,
+ }]
+
+ _START_DATE = datetime.date(2007, 1, 1)
+ _RESULTS_PER_PAGE = 32
+ _MAX_PAGES = 50
+
+ def _entries(self, url, item_id, start_date=None, end_date=None):
+ start_date, end_date = start_date or self._START_DATE, end_date or datetime.datetime.now().date()
+
+ # If the last page has a full page of videos, we need to break down the query interval further
+ last_page_len = len(list(self._get_entries_for_date(
+ url, item_id, start_date, end_date, self._MAX_PAGES,
+ note=f'Checking number of videos from {start_date} to {end_date}')))
+ if (last_page_len == self._RESULTS_PER_PAGE and start_date != end_date):
+ midpoint = start_date + ((end_date - start_date) // 2)
+ yield from self._entries(url, item_id, midpoint, end_date)
+ yield from self._entries(url, item_id, start_date, midpoint)
+ else:
+ self.to_screen(f'{item_id}: Downloading results from {start_date} to {end_date}')
+ yield from self._get_entries_for_date(
+ url, item_id, start_date, end_date, note=' Downloading page %(page)s')
+
+ def _get_entries_for_date(self, url, item_id, start_date, end_date=None, page_num=None, note=None):
+ query = {
+ 'start': str(start_date),
+ 'end': str(end_date or start_date),
+ 'sort': 'f',
+ 'order': 'd',
+ }
+ if page_num:
+ query['page'] = str(page_num)
+
+ yield from super()._entries(url, item_id, query=query, note=note)
+
+
+class NicovideoTagURLIE(NicovideoSearchBaseIE):
+ IE_NAME = 'niconico:tag'
+ IE_DESC = 'NicoNico video tag URLs'
+ _SEARCH_TYPE = 'tag'
+ _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/tag/(?P<id>[^?#&]+)?'
+ _TESTS = [{
+ 'url': 'https://www.nicovideo.jp/tag/ドキュメンタリー淫夢',
+ 'info_dict': {
+ 'id': 'ドキュメンタリー淫夢',
+ 'title': 'ドキュメンタリー淫夢'
+ },
+ 'playlist_mincount': 400,
+ }]
+
+ def _real_extract(self, url):
+ query = self._match_id(url)
+ return self.playlist_result(self._entries(url, query), query, query)
+
+
+class NiconicoUserIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/user/(?P<id>\d+)/?(?:$|[#?])'
+ _TEST = {
+ 'url': 'https://www.nicovideo.jp/user/419948',
+ 'info_dict': {
+ 'id': '419948',
+ },
+ 'playlist_mincount': 101,
+ }
+ _API_URL = "https://nvapi.nicovideo.jp/v1/users/%s/videos?sortKey=registeredAt&sortOrder=desc&pageSize=%s&page=%s"
+ _PAGE_SIZE = 100
+
+ _API_HEADERS = {
+ 'X-Frontend-ID': '6',
+ 'X-Frontend-Version': '0'
+ }
+
+ def _entries(self, list_id):
+ total_count = 1
+ count = page_num = 0
+ while count < total_count:
+ json_parsed = self._download_json(
+ self._API_URL % (list_id, self._PAGE_SIZE, page_num + 1), list_id,
+ headers=self._API_HEADERS,
+ note='Downloading JSON metadata%s' % (' page %d' % page_num if page_num else ''))
+ if not page_num:
+ total_count = int_or_none(json_parsed['data'].get('totalCount'))
+ for entry in json_parsed["data"]["items"]:
+ count += 1
+ yield self.url_result('https://www.nicovideo.jp/watch/%s' % entry['id'])
+ page_num += 1
+
+ def _real_extract(self, url):
+ list_id = self._match_id(url)
+ return self.playlist_result(self._entries(list_id), list_id, ie=NiconicoIE.ie_key())
+
+
+class NiconicoLiveIE(InfoExtractor):
+ IE_NAME = 'niconico:live'
+ IE_DESC = 'ニコニコ生放送'
+ _VALID_URL = r'https?://(?:sp\.)?live2?\.nicovideo\.jp/(?:watch|gate)/(?P<id>lv\d+)'
+ _TESTS = [{
+ 'note': 'this test case includes invisible characters for title, pasting them as-is',
+ 'url': 'https://live.nicovideo.jp/watch/lv339533123',
+ 'info_dict': {
+ 'id': 'lv339533123',
+ 'title': '激辛ペヤング食べます‪( ;ᯅ; )‬(歌枠オーディション参加中)',
+ 'view_count': 1526,
+ 'comment_count': 1772,
+ 'description': '初めましてもかって言います❕\nのんびり自由に適当に暮らしてます',
+ 'uploader': 'もか',
+ 'channel': 'ゲストさんのコミュニティ',
+ 'channel_id': 'co5776900',
+ 'channel_url': 'https://com.nicovideo.jp/community/co5776900',
+ 'timestamp': 1670677328,
+ 'is_live': True,
+ },
+ 'skip': 'livestream',
+ }, {
+ 'url': 'https://live2.nicovideo.jp/watch/lv339533123',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://sp.live.nicovideo.jp/watch/lv339533123',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://sp.live2.nicovideo.jp/watch/lv339533123',
+ 'only_matching': True,
+ }]
+
+ _KNOWN_LATENCY = ('high', 'low')
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage, urlh = self._download_webpage_handle(f'https://live.nicovideo.jp/watch/{video_id}', video_id)
+
+ embedded_data = self._parse_json(unescapeHTML(self._search_regex(
+ r'<script\s+id="embedded-data"\s*data-props="(.+?)"', webpage, 'embedded data')), video_id)
+
+ ws_url = traverse_obj(embedded_data, ('site', 'relive', 'webSocketUrl'))
+ if not ws_url:
+ raise ExtractorError('The live hasn\'t started yet or already ended.', expected=True)
+ ws_url = update_url_query(ws_url, {
+ 'frontend_id': traverse_obj(embedded_data, ('site', 'frontendId')) or '9',
+ })
+
+ hostname = remove_start(urlparse(urlh.url).hostname, 'sp.')
+ latency = try_get(self._configuration_arg('latency'), lambda x: x[0])
+ if latency not in self._KNOWN_LATENCY:
+ latency = 'high'
+
+ ws = self._request_webpage(
+ Request(ws_url, headers={'Origin': f'https://{hostname}'}),
+ video_id=video_id, note='Connecting to WebSocket server')
+
+ self.write_debug('[debug] Sending HLS server request')
+ ws.send(json.dumps({
+ 'type': 'startWatching',
+ 'data': {
+ 'stream': {
+ 'quality': 'abr',
+ 'protocol': 'hls+fmp4',
+ 'latency': latency,
+ 'chasePlay': False
+ },
+ 'room': {
+ 'protocol': 'webSocket',
+ 'commentable': True
+ },
+ 'reconnect': False,
+ }
+ }))
+
+ while True:
+ recv = ws.recv()
+ if not recv:
+ continue
+ data = json.loads(recv)
+ if not isinstance(data, dict):
+ continue
+ if data.get('type') == 'stream':
+ m3u8_url = data['data']['uri']
+ qualities = data['data']['availableQualities']
+ break
+ elif data.get('type') == 'disconnect':
+ self.write_debug(recv)
+ raise ExtractorError('Disconnected at middle of extraction')
+ elif data.get('type') == 'error':
+ self.write_debug(recv)
+ message = traverse_obj(data, ('body', 'code')) or recv
+ raise ExtractorError(message)
+ elif self.get_param('verbose', False):
+ if len(recv) > 100:
+ recv = recv[:100] + '...'
+ self.write_debug('Server said: %s' % recv)
+
+ title = traverse_obj(embedded_data, ('program', 'title')) or self._html_search_meta(
+ ('og:title', 'twitter:title'), webpage, 'live title', fatal=False)
+
+ raw_thumbs = traverse_obj(embedded_data, ('program', 'thumbnail')) or {}
+ thumbnails = []
+ for name, value in raw_thumbs.items():
+ if not isinstance(value, dict):
+ thumbnails.append({
+ 'id': name,
+ 'url': value,
+ **parse_resolution(value, lenient=True),
+ })
+ continue
+
+ for k, img_url in value.items():
+ res = parse_resolution(k, lenient=True) or parse_resolution(img_url, lenient=True)
+ width, height = res.get('width'), res.get('height')
+
+ thumbnails.append({
+ 'id': f'{name}_{width}x{height}',
+ 'url': img_url,
+ **res,
+ })
+
+ formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True)
+ for fmt, q in zip(formats, reversed(qualities[1:])):
+ fmt.update({
+ 'format_id': q,
+ 'protocol': 'niconico_live',
+ 'ws': ws,
+ 'video_id': video_id,
+ 'live_latency': latency,
+ 'origin': hostname,
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ **traverse_obj(embedded_data, {
+ 'view_count': ('program', 'statistics', 'watchCount'),
+ 'comment_count': ('program', 'statistics', 'commentCount'),
+ 'uploader': ('program', 'supplier', 'name'),
+ 'channel': ('socialGroup', 'name'),
+ 'channel_id': ('socialGroup', 'id'),
+ 'channel_url': ('socialGroup', 'socialGroupPageUrl'),
+ }),
+ 'description': clean_html(traverse_obj(embedded_data, ('program', 'description'))),
+ 'timestamp': int_or_none(traverse_obj(embedded_data, ('program', 'openTime'))),
+ 'is_live': True,
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/niconicochannelplus.py b/yt_dlp/extractor/niconicochannelplus.py
new file mode 100644
index 0000000..89af3f7
--- /dev/null
+++ b/yt_dlp/extractor/niconicochannelplus.py
@@ -0,0 +1,426 @@
+import functools
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ OnDemandPagedList,
+ filter_dict,
+ int_or_none,
+ parse_qs,
+ str_or_none,
+ traverse_obj,
+ unified_timestamp,
+ url_or_none,
+)
+
+
+class NiconicoChannelPlusBaseIE(InfoExtractor):
+ _WEBPAGE_BASE_URL = 'https://nicochannel.jp'
+
+ def _call_api(self, path, item_id, *args, **kwargs):
+ return self._download_json(
+ f'https://nfc-api.nicochannel.jp/fc/{path}', video_id=item_id, *args, **kwargs)
+
+ def _find_fanclub_site_id(self, channel_name):
+ fanclub_list_json = self._call_api(
+ 'content_providers/channels', item_id=f'channels/{channel_name}',
+ note='Fetching channel list', errnote='Unable to fetch channel list',
+ )['data']['content_providers']
+ fanclub_id = traverse_obj(fanclub_list_json, (
+ lambda _, v: v['domain'] == f'{self._WEBPAGE_BASE_URL}/{channel_name}', 'id'),
+ get_all=False)
+ if not fanclub_id:
+ raise ExtractorError(f'Channel {channel_name} does not exist', expected=True)
+ return fanclub_id
+
+ def _get_channel_base_info(self, fanclub_site_id):
+ return traverse_obj(self._call_api(
+ f'fanclub_sites/{fanclub_site_id}/page_base_info', item_id=f'fanclub_sites/{fanclub_site_id}',
+ note='Fetching channel base info', errnote='Unable to fetch channel base info', fatal=False,
+ ), ('data', 'fanclub_site', {dict})) or {}
+
+ def _get_channel_user_info(self, fanclub_site_id):
+ return traverse_obj(self._call_api(
+ f'fanclub_sites/{fanclub_site_id}/user_info', item_id=f'fanclub_sites/{fanclub_site_id}',
+ note='Fetching channel user info', errnote='Unable to fetch channel user info', fatal=False,
+ data=json.dumps('null').encode('ascii'),
+ ), ('data', 'fanclub_site', {dict})) or {}
+
+
+class NiconicoChannelPlusIE(NiconicoChannelPlusBaseIE):
+ IE_NAME = 'NiconicoChannelPlus'
+ IE_DESC = 'ニコニコチャンネルプラス'
+ _VALID_URL = r'https?://nicochannel\.jp/(?P<channel>[\w.-]+)/(?:video|live)/(?P<code>sm\w+)'
+ _TESTS = [{
+ 'url': 'https://nicochannel.jp/kaorin/video/smsDd8EdFLcVZk9yyAhD6H7H',
+ 'info_dict': {
+ 'id': 'smsDd8EdFLcVZk9yyAhD6H7H',
+ 'title': '前田佳織里はニコ生がしたい!',
+ 'ext': 'mp4',
+ 'channel': '前田佳織里の世界攻略計画',
+ 'channel_id': 'kaorin',
+ 'channel_url': 'https://nicochannel.jp/kaorin',
+ 'live_status': 'not_live',
+ 'thumbnail': 'https://nicochannel.jp/public_html/contents/video_pages/74/thumbnail_path',
+ 'description': '2021年11月に放送された\n「前田佳織里はニコ生がしたい!」アーカイブになります。',
+ 'timestamp': 1641360276,
+ 'duration': 4097,
+ 'comment_count': int,
+ 'view_count': int,
+ 'tags': [],
+ 'upload_date': '20220105',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # age limited video; test purpose channel.
+ 'url': 'https://nicochannel.jp/testman/video/smDXbcrtyPNxLx9jc4BW69Ve',
+ 'info_dict': {
+ 'id': 'smDXbcrtyPNxLx9jc4BW69Ve',
+ 'title': 'test oshiro',
+ 'ext': 'mp4',
+ 'channel': '本番チャンネルプラステストマン',
+ 'channel_id': 'testman',
+ 'channel_url': 'https://nicochannel.jp/testman',
+ 'age_limit': 18,
+ 'live_status': 'was_live',
+ 'timestamp': 1666344616,
+ 'duration': 86465,
+ 'comment_count': int,
+ 'view_count': int,
+ 'tags': [],
+ 'upload_date': '20221021',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ content_code, channel_id = self._match_valid_url(url).group('code', 'channel')
+ fanclub_site_id = self._find_fanclub_site_id(channel_id)
+
+ data_json = self._call_api(
+ f'video_pages/{content_code}', item_id=content_code, headers={'fc_use_device': 'null'},
+ note='Fetching video page info', errnote='Unable to fetch video page info',
+ )['data']['video_page']
+
+ live_status, session_id = self._get_live_status_and_session_id(content_code, data_json)
+
+ release_timestamp_str = data_json.get('live_scheduled_start_at')
+
+ formats = []
+
+ if live_status == 'is_upcoming':
+ if release_timestamp_str:
+ msg = f'This live event will begin at {release_timestamp_str} UTC'
+ else:
+ msg = 'This event has not started yet'
+ self.raise_no_formats(msg, expected=True, video_id=content_code)
+ else:
+ formats = self._extract_m3u8_formats(
+ # "authenticated_url" is a format string that contains "{session_id}".
+ m3u8_url=data_json['video_stream']['authenticated_url'].format(session_id=session_id),
+ video_id=content_code)
+
+ return {
+ 'id': content_code,
+ 'formats': formats,
+ '_format_sort_fields': ('tbr', 'vcodec', 'acodec'),
+ 'channel': self._get_channel_base_info(fanclub_site_id).get('fanclub_site_name'),
+ 'channel_id': channel_id,
+ 'channel_url': f'{self._WEBPAGE_BASE_URL}/{channel_id}',
+ 'age_limit': traverse_obj(self._get_channel_user_info(fanclub_site_id), ('content_provider', 'age_limit')),
+ 'live_status': live_status,
+ 'release_timestamp': unified_timestamp(release_timestamp_str),
+ **traverse_obj(data_json, {
+ 'title': ('title', {str}),
+ 'thumbnail': ('thumbnail_url', {url_or_none}),
+ 'description': ('description', {str}),
+ 'timestamp': ('released_at', {unified_timestamp}),
+ 'duration': ('active_video_filename', 'length', {int_or_none}),
+ 'comment_count': ('video_aggregate_info', 'number_of_comments', {int_or_none}),
+ 'view_count': ('video_aggregate_info', 'total_views', {int_or_none}),
+ 'tags': ('video_tags', ..., 'tag', {str}),
+ }),
+ '__post_extractor': self.extract_comments(
+ content_code=content_code,
+ comment_group_id=traverse_obj(data_json, ('video_comment_setting', 'comment_group_id'))),
+ }
+
+ def _get_comments(self, content_code, comment_group_id):
+ item_id = f'{content_code}/comments'
+
+ if not comment_group_id:
+ return None
+
+ comment_access_token = self._call_api(
+ f'video_pages/{content_code}/comments_user_token', item_id,
+ note='Getting comment token', errnote='Unable to get comment token',
+ )['data']['access_token']
+
+ comment_list = self._download_json(
+ 'https://comm-api.sheeta.com/messages.history', video_id=item_id,
+ note='Fetching comments', errnote='Unable to fetch comments',
+ headers={'Content-Type': 'application/json'},
+ query={
+ 'sort_direction': 'asc',
+ 'limit': int_or_none(self._configuration_arg('max_comments', [''])[0]) or 120,
+ },
+ data=json.dumps({
+ 'token': comment_access_token,
+ 'group_id': comment_group_id,
+ }).encode('ascii'))
+
+ for comment in traverse_obj(comment_list, ...):
+ yield traverse_obj(comment, {
+ 'author': ('nickname', {str}),
+ 'author_id': ('sender_id', {str_or_none}),
+ 'id': ('id', {str_or_none}),
+ 'text': ('message', {str}),
+ 'timestamp': (('updated_at', 'sent_at', 'created_at'), {unified_timestamp}),
+ 'author_is_uploader': ('sender_id', {lambda x: x == '-1'}),
+ }, get_all=False)
+
+ def _get_live_status_and_session_id(self, content_code, data_json):
+ video_type = data_json.get('type')
+ live_finished_at = data_json.get('live_finished_at')
+
+ payload = {}
+ if video_type == 'vod':
+ if live_finished_at:
+ live_status = 'was_live'
+ else:
+ live_status = 'not_live'
+ elif video_type == 'live':
+ if not data_json.get('live_started_at'):
+ return 'is_upcoming', ''
+
+ if not live_finished_at:
+ live_status = 'is_live'
+ else:
+ live_status = 'was_live'
+ payload = {'broadcast_type': 'dvr'}
+
+ video_allow_dvr_flg = traverse_obj(data_json, ('video', 'allow_dvr_flg'))
+ video_convert_to_vod_flg = traverse_obj(data_json, ('video', 'convert_to_vod_flg'))
+
+ self.write_debug(f'allow_dvr_flg = {video_allow_dvr_flg}, convert_to_vod_flg = {video_convert_to_vod_flg}.')
+
+ if not (video_allow_dvr_flg and video_convert_to_vod_flg):
+ raise ExtractorError(
+ 'Live was ended, there is no video for download.', video_id=content_code, expected=True)
+ else:
+ raise ExtractorError(f'Unknown type: {video_type}', video_id=content_code, expected=False)
+
+ self.write_debug(f'{content_code}: video_type={video_type}, live_status={live_status}')
+
+ session_id = self._call_api(
+ f'video_pages/{content_code}/session_ids', item_id=f'{content_code}/session',
+ data=json.dumps(payload).encode('ascii'), headers={
+ 'Content-Type': 'application/json',
+ 'fc_use_device': 'null',
+ 'origin': 'https://nicochannel.jp',
+ },
+ note='Getting session id', errnote='Unable to get session id',
+ )['data']['session_id']
+
+ return live_status, session_id
+
+
+class NiconicoChannelPlusChannelBaseIE(NiconicoChannelPlusBaseIE):
+ _PAGE_SIZE = 12
+
+ def _fetch_paged_channel_video_list(self, path, query, channel_name, item_id, page):
+ response = self._call_api(
+ path, item_id, query={
+ **query,
+ 'page': (page + 1),
+ 'per_page': self._PAGE_SIZE,
+ },
+ headers={'fc_use_device': 'null'},
+ note=f'Getting channel info (page {page + 1})',
+ errnote=f'Unable to get channel info (page {page + 1})')
+
+ for content_code in traverse_obj(response, ('data', 'video_pages', 'list', ..., 'content_code')):
+ # "video/{content_code}" works for both VOD and live, but "live/{content_code}" doesn't work for VOD
+ yield self.url_result(
+ f'{self._WEBPAGE_BASE_URL}/{channel_name}/video/{content_code}', NiconicoChannelPlusIE)
+
+
+class NiconicoChannelPlusChannelVideosIE(NiconicoChannelPlusChannelBaseIE):
+ IE_NAME = 'NiconicoChannelPlus:channel:videos'
+ IE_DESC = 'ニコニコチャンネルプラス - チャンネル - 動画リスト. nicochannel.jp/channel/videos'
+ _VALID_URL = r'https?://nicochannel\.jp/(?P<id>[a-z\d\._-]+)/videos(?:\?.*)?'
+ _TESTS = [{
+ # query: None
+ 'url': 'https://nicochannel.jp/testman/videos',
+ 'info_dict': {
+ 'id': 'testman-videos',
+ 'title': '本番チャンネルプラステストマン-videos',
+ },
+ 'playlist_mincount': 18,
+ }, {
+ # query: None
+ 'url': 'https://nicochannel.jp/testtarou/videos',
+ 'info_dict': {
+ 'id': 'testtarou-videos',
+ 'title': 'チャンネルプラステスト太郎-videos',
+ },
+ 'playlist_mincount': 2,
+ }, {
+ # query: None
+ 'url': 'https://nicochannel.jp/testjirou/videos',
+ 'info_dict': {
+ 'id': 'testjirou-videos',
+ 'title': 'チャンネルプラステスト二郎-videos',
+ },
+ 'playlist_mincount': 12,
+ }, {
+ # query: tag
+ 'url': 'https://nicochannel.jp/testman/videos?tag=%E6%A4%9C%E8%A8%BC%E7%94%A8',
+ 'info_dict': {
+ 'id': 'testman-videos',
+ 'title': '本番チャンネルプラステストマン-videos',
+ },
+ 'playlist_mincount': 6,
+ }, {
+ # query: vodType
+ 'url': 'https://nicochannel.jp/testman/videos?vodType=1',
+ 'info_dict': {
+ 'id': 'testman-videos',
+ 'title': '本番チャンネルプラステストマン-videos',
+ },
+ 'playlist_mincount': 18,
+ }, {
+ # query: sort
+ 'url': 'https://nicochannel.jp/testman/videos?sort=-released_at',
+ 'info_dict': {
+ 'id': 'testman-videos',
+ 'title': '本番チャンネルプラステストマン-videos',
+ },
+ 'playlist_mincount': 18,
+ }, {
+ # query: tag, vodType
+ 'url': 'https://nicochannel.jp/testman/videos?tag=%E6%A4%9C%E8%A8%BC%E7%94%A8&vodType=1',
+ 'info_dict': {
+ 'id': 'testman-videos',
+ 'title': '本番チャンネルプラステストマン-videos',
+ },
+ 'playlist_mincount': 6,
+ }, {
+ # query: tag, sort
+ 'url': 'https://nicochannel.jp/testman/videos?tag=%E6%A4%9C%E8%A8%BC%E7%94%A8&sort=-released_at',
+ 'info_dict': {
+ 'id': 'testman-videos',
+ 'title': '本番チャンネルプラステストマン-videos',
+ },
+ 'playlist_mincount': 6,
+ }, {
+ # query: vodType, sort
+ 'url': 'https://nicochannel.jp/testman/videos?vodType=1&sort=-released_at',
+ 'info_dict': {
+ 'id': 'testman-videos',
+ 'title': '本番チャンネルプラステストマン-videos',
+ },
+ 'playlist_mincount': 18,
+ }, {
+ # query: tag, vodType, sort
+ 'url': 'https://nicochannel.jp/testman/videos?tag=%E6%A4%9C%E8%A8%BC%E7%94%A8&vodType=1&sort=-released_at',
+ 'info_dict': {
+ 'id': 'testman-videos',
+ 'title': '本番チャンネルプラステストマン-videos',
+ },
+ 'playlist_mincount': 6,
+ }]
+
+ def _real_extract(self, url):
+ """
+ API parameters:
+ sort:
+ -released_at 公開日が新しい順 (newest to oldest)
+ released_at 公開日が古い順 (oldest to newest)
+ -number_of_vod_views 再生数が多い順 (most play count)
+ number_of_vod_views コメントが多い順 (most comments)
+ vod_type (is "vodType" in "url"):
+ 0 すべて (all)
+ 1 会員限定 (members only)
+ 2 一部無料 (partially free)
+ 3 レンタル (rental)
+ 4 生放送アーカイブ (live archives)
+ 5 アップロード動画 (uploaded videos)
+ """
+
+ channel_id = self._match_id(url)
+ fanclub_site_id = self._find_fanclub_site_id(channel_id)
+ channel_name = self._get_channel_base_info(fanclub_site_id).get('fanclub_site_name')
+ qs = parse_qs(url)
+
+ return self.playlist_result(
+ OnDemandPagedList(
+ functools.partial(
+ self._fetch_paged_channel_video_list, f'fanclub_sites/{fanclub_site_id}/video_pages',
+ filter_dict({
+ 'tag': traverse_obj(qs, ('tag', 0)),
+ 'sort': traverse_obj(qs, ('sort', 0), default='-released_at'),
+ 'vod_type': traverse_obj(qs, ('vodType', 0), default='0'),
+ }),
+ channel_id, f'{channel_id}/videos'),
+ self._PAGE_SIZE),
+ playlist_id=f'{channel_id}-videos', playlist_title=f'{channel_name}-videos')
+
+
+class NiconicoChannelPlusChannelLivesIE(NiconicoChannelPlusChannelBaseIE):
+ IE_NAME = 'NiconicoChannelPlus:channel:lives'
+ IE_DESC = 'ニコニコチャンネルプラス - チャンネル - ライブリスト. nicochannel.jp/channel/lives'
+ _VALID_URL = r'https?://nicochannel\.jp/(?P<id>[a-z\d\._-]+)/lives'
+ _TESTS = [{
+ 'url': 'https://nicochannel.jp/testman/lives',
+ 'info_dict': {
+ 'id': 'testman-lives',
+ 'title': '本番チャンネルプラステストマン-lives',
+ },
+ 'playlist_mincount': 18,
+ }, {
+ 'url': 'https://nicochannel.jp/testtarou/lives',
+ 'info_dict': {
+ 'id': 'testtarou-lives',
+ 'title': 'チャンネルプラステスト太郎-lives',
+ },
+ 'playlist_mincount': 2,
+ }, {
+ 'url': 'https://nicochannel.jp/testjirou/lives',
+ 'info_dict': {
+ 'id': 'testjirou-lives',
+ 'title': 'チャンネルプラステスト二郎-lives',
+ },
+ 'playlist_mincount': 6,
+ }]
+
+ def _real_extract(self, url):
+ """
+ API parameters:
+ live_type:
+ 1 放送中 (on air)
+ 2 放送予定 (scheduled live streams, oldest to newest)
+ 3 過去の放送 - すべて (all ended live streams, newest to oldest)
+ 4 過去の放送 - 生放送アーカイブ (all archives for live streams, oldest to newest)
+ We use "4" instead of "3" because some recently ended live streams could not be downloaded.
+ """
+
+ channel_id = self._match_id(url)
+ fanclub_site_id = self._find_fanclub_site_id(channel_id)
+ channel_name = self._get_channel_base_info(fanclub_site_id).get('fanclub_site_name')
+
+ return self.playlist_result(
+ OnDemandPagedList(
+ functools.partial(
+ self._fetch_paged_channel_video_list, f'fanclub_sites/{fanclub_site_id}/live_pages',
+ {
+ 'live_type': 4,
+ },
+ channel_id, f'{channel_id}/lives'),
+ self._PAGE_SIZE),
+ playlist_id=f'{channel_id}-lives', playlist_title=f'{channel_name}-lives')
diff --git a/yt_dlp/extractor/ninaprotocol.py b/yt_dlp/extractor/ninaprotocol.py
new file mode 100644
index 0000000..ea57c5f
--- /dev/null
+++ b/yt_dlp/extractor/ninaprotocol.py
@@ -0,0 +1,225 @@
+from .common import InfoExtractor
+from ..utils import int_or_none, mimetype2ext, parse_iso8601, url_or_none
+from ..utils.traversal import traverse_obj
+
+
+class NinaProtocolIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?ninaprotocol\.com/releases/(?P<id>[^/#?]+)'
+ _TESTS = [{
+ 'url': 'https://www.ninaprotocol.com/releases/3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ',
+ 'info_dict': {
+ 'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ',
+ 'title': 'The Spatulas - March Chant',
+ 'tags': ['punk', 'postpresentmedium', 'cambridge'],
+ 'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
+ 'channel': 'ppm',
+ 'description': 'md5:bb9f9d39d8f786449cd5d0ff7c5772db',
+ 'album': 'The Spatulas - March Chant',
+ 'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
+ 'timestamp': 1701417610,
+ 'uploader': 'ppmrecs',
+ 'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
+ 'display_id': 'the-spatulas-march-chant',
+ 'upload_date': '20231201',
+ 'album_artist': 'Post Present Medium ',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_1',
+ 'title': 'March Chant In April',
+ 'track': 'March Chant In April',
+ 'ext': 'mp3',
+ 'duration': 152,
+ 'track_number': 1,
+ 'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
+ 'uploader': 'ppmrecs',
+ 'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
+ 'timestamp': 1701417610,
+ 'channel': 'ppm',
+ 'album': 'The Spatulas - March Chant',
+ 'tags': ['punk', 'postpresentmedium', 'cambridge'],
+ 'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
+ 'upload_date': '20231201',
+ 'album_artist': 'Post Present Medium ',
+ }
+ }, {
+ 'info_dict': {
+ 'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_2',
+ 'title': 'Rescue Mission',
+ 'track': 'Rescue Mission',
+ 'ext': 'mp3',
+ 'duration': 212,
+ 'track_number': 2,
+ 'album_artist': 'Post Present Medium ',
+ 'uploader': 'ppmrecs',
+ 'tags': ['punk', 'postpresentmedium', 'cambridge'],
+ 'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
+ 'channel': 'ppm',
+ 'upload_date': '20231201',
+ 'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
+ 'timestamp': 1701417610,
+ 'album': 'The Spatulas - March Chant',
+ 'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
+ }
+ }, {
+ 'info_dict': {
+ 'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_3',
+ 'title': 'Slinger Style',
+ 'track': 'Slinger Style',
+ 'ext': 'mp3',
+ 'duration': 179,
+ 'track_number': 3,
+ 'timestamp': 1701417610,
+ 'upload_date': '20231201',
+ 'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
+ 'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
+ 'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
+ 'album_artist': 'Post Present Medium ',
+ 'album': 'The Spatulas - March Chant',
+ 'tags': ['punk', 'postpresentmedium', 'cambridge'],
+ 'uploader': 'ppmrecs',
+ 'channel': 'ppm',
+ }
+ }, {
+ 'info_dict': {
+ 'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_4',
+ 'title': 'Psychic Signal',
+ 'track': 'Psychic Signal',
+ 'ext': 'mp3',
+ 'duration': 220,
+ 'track_number': 4,
+ 'tags': ['punk', 'postpresentmedium', 'cambridge'],
+ 'upload_date': '20231201',
+ 'album': 'The Spatulas - March Chant',
+ 'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
+ 'timestamp': 1701417610,
+ 'album_artist': 'Post Present Medium ',
+ 'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
+ 'channel': 'ppm',
+ 'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
+ 'uploader': 'ppmrecs',
+ }
+ }, {
+ 'info_dict': {
+ 'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_5',
+ 'title': 'Curvy Color',
+ 'track': 'Curvy Color',
+ 'ext': 'mp3',
+ 'duration': 148,
+ 'track_number': 5,
+ 'timestamp': 1701417610,
+ 'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
+ 'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
+ 'album': 'The Spatulas - March Chant',
+ 'album_artist': 'Post Present Medium ',
+ 'channel': 'ppm',
+ 'tags': ['punk', 'postpresentmedium', 'cambridge'],
+ 'uploader': 'ppmrecs',
+ 'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
+ 'upload_date': '20231201',
+ }
+ }, {
+ 'info_dict': {
+ 'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_6',
+ 'title': 'Caveman Star',
+ 'track': 'Caveman Star',
+ 'ext': 'mp3',
+ 'duration': 121,
+ 'track_number': 6,
+ 'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
+ 'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
+ 'tags': ['punk', 'postpresentmedium', 'cambridge'],
+ 'album_artist': 'Post Present Medium ',
+ 'uploader': 'ppmrecs',
+ 'timestamp': 1701417610,
+ 'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
+ 'album': 'The Spatulas - March Chant',
+ 'channel': 'ppm',
+ 'upload_date': '20231201',
+ },
+ }],
+ }, {
+ 'url': 'https://www.ninaprotocol.com/releases/f-g-s-american-shield',
+ 'info_dict': {
+ 'id': '76PZnJwaMgViQHYfA4NYJXds7CmW6vHQKAtQUxGene6J',
+ 'description': 'md5:63f08d5db558b4b36e1896f317062721',
+ 'title': 'F.G.S. - American Shield',
+ 'uploader_id': 'Ej3rozs11wYqFk1Gs6oggGCkGLz8GzBhmJfnUxf6gPci',
+ 'channel_id': '6JuksCZPXuP16wJ1BUfwuukJzh42C7guhLrFPPkVJfyE',
+ 'channel': 'tinkscough',
+ 'tags': [],
+ 'album_artist': 'F.G.S.',
+ 'album': 'F.G.S. - American Shield',
+ 'thumbnail': 'https://www.arweave.net/YJpgImkXLT9SbpFb576KuZ5pm6bdvs452LMs3Rx6lm8',
+ 'display_id': 'f-g-s-american-shield',
+ 'uploader': 'flannerysilva',
+ 'timestamp': 1702395858,
+ 'upload_date': '20231212',
+ },
+ 'playlist_count': 1,
+ }, {
+ 'url': 'https://www.ninaprotocol.com/releases/time-to-figure-things-out',
+ 'info_dict': {
+ 'id': '6Zi1nC5hj6b13NkpxVYwRhFy6mYA7oLBbe9DMrgGDcYh',
+ 'display_id': 'time-to-figure-things-out',
+ 'description': 'md5:960202ed01c3134bb8958f1008527e35',
+ 'timestamp': 1706283607,
+ 'title': 'DJ STEPDAD - time to figure things out',
+ 'album_artist': 'DJ STEPDAD',
+ 'uploader': 'tddvsss',
+ 'upload_date': '20240126',
+ 'album': 'time to figure things out',
+ 'uploader_id': 'AXQNRgTyYsySyAMFDwxzumuGjfmoXshorCesjpquwCBi',
+ 'thumbnail': 'https://www.arweave.net/O4i8bcKVqJVZvNeHHFp6r8knpFGh9ZwEgbeYacr4nss',
+ 'tags': [],
+ },
+ 'playlist_count': 4,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ release = self._download_json(
+ f'https://api.ninaprotocol.com/v1/releases/{video_id}', video_id)['release']
+
+ video_id = release.get('publicKey') or video_id
+
+ common_info = traverse_obj(release, {
+ 'album': ('metadata', 'properties', 'title', {str}),
+ 'album_artist': ((('hub', 'data'), 'publisherAccount'), 'displayName', {str}),
+ 'timestamp': ('datetime', {parse_iso8601}),
+ 'thumbnail': ('metadata', 'image', {url_or_none}),
+ 'uploader': ('publisherAccount', 'handle', {str}),
+ 'uploader_id': ('publisherAccount', 'publicKey', {str}),
+ 'channel': ('hub', 'handle', {str}),
+ 'channel_id': ('hub', 'publicKey', {str}),
+ }, get_all=False)
+ common_info['tags'] = traverse_obj(release, ('metadata', 'properties', 'tags', ..., {str}))
+
+ entries = []
+ for track_num, track in enumerate(traverse_obj(release, (
+ 'metadata', 'properties', 'files', lambda _, v: url_or_none(v['uri']))), 1):
+ entries.append({
+ 'id': f'{video_id}_{track_num}',
+ 'url': track['uri'],
+ **traverse_obj(track, {
+ 'title': ('track_title', {str}),
+ 'track': ('track_title', {str}),
+ 'ext': ('type', {mimetype2ext}),
+ 'track_number': ('track', {int_or_none}),
+ 'duration': ('duration', {int_or_none}),
+ }),
+ 'vcodec': 'none',
+ **common_info,
+ })
+
+ return {
+ '_type': 'playlist',
+ 'id': video_id,
+ 'entries': entries,
+ **traverse_obj(release, {
+ 'display_id': ('slug', {str}),
+ 'title': ('metadata', 'name', {str}),
+ 'description': ('metadata', 'description', {str}),
+ }),
+ **common_info,
+ }
diff --git a/yt_dlp/extractor/ninecninemedia.py b/yt_dlp/extractor/ninecninemedia.py
new file mode 100644
index 0000000..579370f
--- /dev/null
+++ b/yt_dlp/extractor/ninecninemedia.py
@@ -0,0 +1,130 @@
+from .common import InfoExtractor
+from ..utils import (
+ float_or_none,
+ int_or_none,
+ parse_iso8601,
+ str_or_none,
+ try_get,
+)
+
+
+class NineCNineMediaIE(InfoExtractor):
+ IE_NAME = '9c9media'
+ _GEO_COUNTRIES = ['CA']
+ _VALID_URL = r'9c9media:(?P<destination_code>[^:]+):(?P<id>\d+)'
+ _API_BASE_TEMPLATE = 'http://capi.9c9media.com/destinations/%s/platforms/desktop/contents/%s/'
+
+ def _real_extract(self, url):
+ destination_code, content_id = self._match_valid_url(url).groups()
+ api_base_url = self._API_BASE_TEMPLATE % (destination_code, content_id)
+ content = self._download_json(api_base_url, content_id, query={
+ '$include': '[Media.Name,Season,ContentPackages.Duration,ContentPackages.Id]',
+ })
+ title = content['Name']
+ content_package = content['ContentPackages'][0]
+ package_id = content_package['Id']
+ content_package_url = api_base_url + 'contentpackages/%s/' % package_id
+ content_package = self._download_json(
+ content_package_url, content_id, query={
+ '$include': '[HasClosedCaptions]',
+ })
+
+ if (not self.get_param('allow_unplayable_formats')
+ and try_get(content_package, lambda x: x['Constraints']['Security']['Type'])):
+ self.report_drm(content_id)
+
+ manifest_base_url = content_package_url + 'manifest.'
+ formats = []
+ formats.extend(self._extract_m3u8_formats(
+ manifest_base_url + 'm3u8', content_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False))
+ formats.extend(self._extract_f4m_formats(
+ manifest_base_url + 'f4m', content_id,
+ f4m_id='hds', fatal=False))
+ formats.extend(self._extract_mpd_formats(
+ manifest_base_url + 'mpd', content_id,
+ mpd_id='dash', fatal=False))
+
+ thumbnails = []
+ for image in (content.get('Images') or []):
+ image_url = image.get('Url')
+ if not image_url:
+ continue
+ thumbnails.append({
+ 'url': image_url,
+ 'width': int_or_none(image.get('Width')),
+ 'height': int_or_none(image.get('Height')),
+ })
+
+ tags, categories = [], []
+ for source_name, container in (('Tags', tags), ('Genres', categories)):
+ for e in content.get(source_name, []):
+ e_name = e.get('Name')
+ if not e_name:
+ continue
+ container.append(e_name)
+
+ season = content.get('Season') or {}
+
+ info = {
+ 'id': content_id,
+ 'title': title,
+ 'description': content.get('Desc') or content.get('ShortDesc'),
+ 'timestamp': parse_iso8601(content.get('BroadcastDateTime')),
+ 'episode_number': int_or_none(content.get('Episode')),
+ 'season': season.get('Name'),
+ 'season_number': int_or_none(season.get('Number')),
+ 'season_id': str_or_none(season.get('Id')),
+ 'series': try_get(content, lambda x: x['Media']['Name']),
+ 'tags': tags,
+ 'categories': categories,
+ 'duration': float_or_none(content_package.get('Duration')),
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ }
+
+ if content_package.get('HasClosedCaptions'):
+ info['subtitles'] = {
+ 'en': [{
+ 'url': manifest_base_url + 'vtt',
+ 'ext': 'vtt',
+ }, {
+ 'url': manifest_base_url + 'srt',
+ 'ext': 'srt',
+ }]
+ }
+
+ return info
+
+
+class CPTwentyFourIE(InfoExtractor):
+ IE_NAME = 'cp24'
+ _GEO_COUNTRIES = ['CA']
+ _VALID_URL = r'https?://(?:www\.)?cp24\.com/news/(?P<id>[^?#]+)'
+
+ _TESTS = [{
+ 'url': 'https://www.cp24.com/news/video-shows-atm-being-ripped-out-of-business-by-pickup-truck-driver-in-mississauga-1.5676877',
+ 'info_dict': {
+ 'id': '2328005',
+ 'ext': 'mp4',
+ 'title': 'WATCH: Truck rips ATM from Mississauga business',
+ 'description': 'md5:cf7498480885f080a754389a2b2f7073',
+ 'timestamp': 1637618377,
+ 'season': 'Season 0',
+ 'season_number': 0,
+ 'season_id': '57974',
+ 'series': 'CTV News Toronto',
+ 'duration': 26.86,
+ 'thumbnail': 'http://images2.9c9media.com/image_asset/2014_11_5_2eb609a0-475b-0132-fbd6-34b52f6f1279_jpg_2000x1125.jpg',
+ 'upload_date': '20211122',
+ },
+ 'params': {'skip_download': True, 'format': 'bv'}
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ id, destination = self._search_regex(
+ r'getAuthStates\("(?P<id>[^"]+)",\s?"(?P<destination>[^"]+)"\);',
+ webpage, 'video id and destination', group=('id', 'destination'))
+ return self.url_result(f'9c9media:{destination}:{id}', ie=NineCNineMediaIE.ie_key(), video_id=id)
diff --git a/yt_dlp/extractor/ninegag.py b/yt_dlp/extractor/ninegag.py
new file mode 100644
index 0000000..865ad99
--- /dev/null
+++ b/yt_dlp/extractor/ninegag.py
@@ -0,0 +1,148 @@
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ determine_ext,
+ int_or_none,
+ traverse_obj,
+ unescapeHTML,
+ url_or_none,
+)
+
+
+class NineGagIE(InfoExtractor):
+ IE_NAME = '9gag'
+ IE_DESC = '9GAG'
+ _VALID_URL = r'https?://(?:www\.)?9gag\.com/gag/(?P<id>[^/?&#]+)'
+
+ _TESTS = [{
+ 'url': 'https://9gag.com/gag/ae5Ag7B',
+ 'info_dict': {
+ 'id': 'ae5Ag7B',
+ 'ext': 'webm',
+ 'title': 'Capybara Agility Training',
+ 'upload_date': '20191108',
+ 'timestamp': 1573237208,
+ 'thumbnail': 'https://img-9gag-fun.9cache.com/photo/ae5Ag7B_460s.jpg',
+ 'categories': ['Awesome'],
+ 'tags': ['Awesome'],
+ 'duration': 44,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'comment_count': int,
+ }
+ }, {
+ # HTML escaped title
+ 'url': 'https://9gag.com/gag/av5nvyb',
+ 'only_matching': True,
+ }, {
+ # Non Anonymous Uploader
+ 'url': 'https://9gag.com/gag/ajgp66G',
+ 'info_dict': {
+ 'id': 'ajgp66G',
+ 'ext': 'webm',
+ 'title': 'Master Shifu! Or Splinter! You decide:',
+ 'upload_date': '20220806',
+ 'timestamp': 1659803411,
+ 'thumbnail': 'https://img-9gag-fun.9cache.com/photo/ajgp66G_460s.jpg',
+ 'categories': ['Funny'],
+ 'tags': ['Funny'],
+ 'duration': 26,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'comment_count': int,
+ 'uploader': 'Peter Klaus',
+ 'uploader_id': 'peterklaus12',
+ 'uploader_url': 'https://9gag.com/u/peterklaus12',
+ }
+ }]
+
+ def _real_extract(self, url):
+ post_id = self._match_id(url)
+ post = self._download_json(
+ 'https://9gag.com/v1/post', post_id, query={
+ 'id': post_id
+ })['data']['post']
+
+ if post.get('type') != 'Animated':
+ raise ExtractorError(
+ 'The given url does not contain a video',
+ expected=True)
+
+ duration = None
+ formats = []
+ thumbnails = []
+ for key, image in (post.get('images') or {}).items():
+ image_url = url_or_none(image.get('url'))
+ if not image_url:
+ continue
+ ext = determine_ext(image_url)
+ image_id = key.strip('image')
+ common = {
+ 'url': image_url,
+ 'width': int_or_none(image.get('width')),
+ 'height': int_or_none(image.get('height')),
+ }
+ if ext in ('jpg', 'png'):
+ webp_url = image.get('webpUrl')
+ if webp_url:
+ t = common.copy()
+ t.update({
+ 'id': image_id + '-webp',
+ 'url': webp_url,
+ })
+ thumbnails.append(t)
+ common.update({
+ 'id': image_id,
+ 'ext': ext,
+ })
+ thumbnails.append(common)
+ elif ext in ('webm', 'mp4'):
+ if not duration:
+ duration = int_or_none(image.get('duration'))
+ common['acodec'] = 'none' if image.get('hasAudio') == 0 else None
+ for vcodec in ('vp8', 'vp9', 'h265'):
+ c_url = image.get(vcodec + 'Url')
+ if not c_url:
+ continue
+ c_f = common.copy()
+ c_f.update({
+ 'format_id': image_id + '-' + vcodec,
+ 'url': c_url,
+ 'vcodec': vcodec,
+ })
+ formats.append(c_f)
+ common.update({
+ 'ext': ext,
+ 'format_id': image_id,
+ })
+ formats.append(common)
+
+ section = traverse_obj(post, ('postSection', 'name'))
+
+ tags = None
+ post_tags = post.get('tags')
+ if post_tags:
+ tags = []
+ for tag in post_tags:
+ tag_key = tag.get('key')
+ if not tag_key:
+ continue
+ tags.append(tag_key)
+
+ return {
+ 'id': post_id,
+ 'title': unescapeHTML(post.get('title')),
+ 'timestamp': int_or_none(post.get('creationTs')),
+ 'duration': duration,
+ 'uploader': traverse_obj(post, ('creator', 'fullName')),
+ 'uploader_id': traverse_obj(post, ('creator', 'username')),
+ 'uploader_url': url_or_none(traverse_obj(post, ('creator', 'profileUrl'))),
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'like_count': int_or_none(post.get('upVoteCount')),
+ 'dislike_count': int_or_none(post.get('downVoteCount')),
+ 'comment_count': int_or_none(post.get('commentsCount')),
+ 'age_limit': 18 if post.get('nsfw') == 1 else None,
+ 'categories': [section] if section else None,
+ 'tags': tags,
+ }
diff --git a/yt_dlp/extractor/ninenews.py b/yt_dlp/extractor/ninenews.py
new file mode 100644
index 0000000..900d9ba
--- /dev/null
+++ b/yt_dlp/extractor/ninenews.py
@@ -0,0 +1,72 @@
+from .common import InfoExtractor
+from .brightcove import BrightcoveNewIE
+from ..utils import ExtractorError
+from ..utils.traversal import traverse_obj
+
+
+class NineNewsIE(InfoExtractor):
+ IE_NAME = '9News'
+ _VALID_URL = r'https?://(?:www\.)?9news\.com\.au/(?:[\w-]+/){2,3}(?P<id>[\w-]+)/?(?:$|[?#])'
+ _TESTS = [{
+ 'url': 'https://www.9news.com.au/videos/national/fair-trading-pulls-dozens-of-toys-from-shelves/clqgc7dvj000y0jnvfism0w5m',
+ 'md5': 'd1a65b2e9d126e5feb9bc5cb96e62c80',
+ 'info_dict': {
+ 'id': '6343717246112',
+ 'ext': 'mp4',
+ 'title': 'Fair Trading pulls dozens of toys from shelves',
+ 'description': 'Fair Trading Australia have been forced to pull dozens of toys from shelves over hazard fears.',
+ 'thumbnail': 'md5:bdbe44294e2323b762d97acf8843f66c',
+ 'duration': 93.44,
+ 'timestamp': 1703231748,
+ 'upload_date': '20231222',
+ 'uploader_id': '664969388001',
+ 'tags': ['networkclip', 'aunews_aunationalninenews', 'christmas presents', 'toys', 'fair trading', 'au_news'],
+ }
+ }, {
+ 'url': 'https://www.9news.com.au/world/tape-reveals-donald-trump-pressured-michigan-officials-not-to-certify-2020-vote-a-new-report-says/0b8b880e-7d3c-41b9-b2bd-55bc7e492259',
+ 'md5': 'a885c44d20898c3e70e9a53e8188cea1',
+ 'info_dict': {
+ 'id': '6343587450112',
+ 'ext': 'mp4',
+ 'title': 'Trump found ineligible to run for president by state court',
+ 'description': 'md5:40e6e7db7a4ac6be0e960569a5af6066',
+ 'thumbnail': 'md5:3e132c48c186039fd06c10787de9bff2',
+ 'duration': 104.64,
+ 'timestamp': 1703058034,
+ 'upload_date': '20231220',
+ 'uploader_id': '664969388001',
+ 'tags': ['networkclip', 'aunews_aunationalninenews', 'ineligible', 'presidential candidate', 'donald trump', 'au_news'],
+ }
+ }, {
+ 'url': 'https://www.9news.com.au/national/outrage-as-parents-banned-from-giving-gifts-to-kindergarten-teachers/e19b49d4-a1a4-4533-9089-6e10e2d9386a',
+ 'info_dict': {
+ 'id': '6343716797112',
+ 'ext': 'mp4',
+ 'title': 'Outrage as parents banned from giving gifts to kindergarten teachers',
+ 'description': 'md5:7a8b0ed2f9e08875fd9a3e86e462bc46',
+ 'thumbnail': 'md5:5ee4d66717bdd0dee9fc9a705ef041b8',
+ 'duration': 91.307,
+ 'timestamp': 1703229584,
+ 'upload_date': '20231222',
+ 'uploader_id': '664969388001',
+ 'tags': ['networkclip', 'aunews_aunationalninenews', 'presents', 'teachers', 'kindergarten', 'au_news'],
+ },
+ }]
+
+ def _real_extract(self, url):
+ article_id = self._match_id(url)
+ webpage = self._download_webpage(url, article_id)
+ initial_state = self._search_json(
+ r'var\s+__INITIAL_STATE__\s*=', webpage, 'initial state', article_id)
+ video_id = traverse_obj(
+ initial_state, ('videoIndex', 'currentVideo', 'brightcoveId', {str}),
+ ('article', ..., 'media', lambda _, v: v['type'] == 'video', 'urn', {str}), get_all=False)
+ account = traverse_obj(initial_state, (
+ 'videoIndex', 'config', (None, 'video'), 'account', {str}), get_all=False)
+
+ if not video_id or not account:
+ raise ExtractorError('Unable to get the required video data')
+
+ return self.url_result(
+ f'https://players.brightcove.net/{account}/default_default/index.html?videoId={video_id}',
+ BrightcoveNewIE, video_id)
diff --git a/yt_dlp/extractor/ninenow.py b/yt_dlp/extractor/ninenow.py
new file mode 100644
index 0000000..c655b75
--- /dev/null
+++ b/yt_dlp/extractor/ninenow.py
@@ -0,0 +1,122 @@
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ float_or_none,
+ smuggle_url,
+ str_or_none,
+ try_get,
+ unified_strdate,
+ unified_timestamp,
+)
+
+
+class NineNowIE(InfoExtractor):
+ IE_NAME = '9now.com.au'
+ _VALID_URL = r'https?://(?:www\.)?9now\.com\.au/(?:[^/]+/){2}(?P<id>[^/?#]+)'
+ _GEO_COUNTRIES = ['AU']
+ _TESTS = [{
+ # clip
+ 'url': 'https://www.9now.com.au/afl-footy-show/2016/clip-ciql02091000g0hp5oktrnytc',
+ 'md5': '17cf47d63ec9323e562c9957a968b565',
+ 'info_dict': {
+ 'id': '16801',
+ 'ext': 'mp4',
+ 'title': 'St. Kilda\'s Joey Montagna on the potential for a player\'s strike',
+ 'description': 'Is a boycott of the NAB Cup "on the table"?',
+ 'uploader_id': '4460760524001',
+ 'upload_date': '20160713',
+ 'timestamp': 1468421266,
+ },
+ 'skip': 'Only available in Australia',
+ }, {
+ # episode
+ 'url': 'https://www.9now.com.au/afl-footy-show/2016/episode-19',
+ 'only_matching': True,
+ }, {
+ # DRM protected
+ 'url': 'https://www.9now.com.au/andrew-marrs-history-of-the-world/season-1/episode-1',
+ 'only_matching': True,
+ }, {
+ # episode of series
+ 'url': 'https://www.9now.com.au/lego-masters/season-3/episode-3',
+ 'info_dict': {
+ 'id': '6249614030001',
+ 'title': 'Episode 3',
+ 'ext': 'mp4',
+ 'season_number': 3,
+ 'episode_number': 3,
+ 'description': 'In the first elimination of the competition, teams will have 10 hours to build a world inside a snow globe.',
+ 'uploader_id': '4460760524001',
+ 'timestamp': 1619002200,
+ 'upload_date': '20210421',
+ },
+ 'expected_warnings': ['Ignoring subtitle tracks'],
+ 'params': {
+ 'skip_download': True,
+ }
+ }]
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/4460760524001/default_default/index.html?videoId=%s'
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ page_data = self._parse_json(self._search_regex(
+ r'window\.__data\s*=\s*({.*?});', webpage,
+ 'page data', default='{}'), display_id, fatal=False)
+ if not page_data:
+ page_data = self._parse_json(self._parse_json(self._search_regex(
+ r'window\.__data\s*=\s*JSON\.parse\s*\(\s*(".+?")\s*\)\s*;',
+ webpage, 'page data'), display_id), display_id)
+
+ for kind in ('episode', 'clip'):
+ current_key = page_data.get(kind, {}).get(
+ 'current%sKey' % kind.capitalize())
+ if not current_key:
+ continue
+ cache = page_data.get(kind, {}).get('%sCache' % kind, {})
+ if not cache:
+ continue
+ common_data = {
+ 'episode': (cache.get(current_key) or list(cache.values())[0])[kind],
+ 'season': (cache.get(current_key) or list(cache.values())[0]).get('season', None)
+ }
+ break
+ else:
+ raise ExtractorError('Unable to find video data')
+
+ if not self.get_param('allow_unplayable_formats') and try_get(common_data, lambda x: x['episode']['video']['drm'], bool):
+ self.report_drm(display_id)
+ brightcove_id = try_get(
+ common_data, lambda x: x['episode']['video']['brightcoveId'], compat_str) or 'ref:%s' % common_data['episode']['video']['referenceId']
+ video_id = str_or_none(try_get(common_data, lambda x: x['episode']['video']['id'])) or brightcove_id
+
+ title = try_get(common_data, lambda x: x['episode']['name'], compat_str)
+ season_number = try_get(common_data, lambda x: x['season']['seasonNumber'], int)
+ episode_number = try_get(common_data, lambda x: x['episode']['episodeNumber'], int)
+ timestamp = unified_timestamp(try_get(common_data, lambda x: x['episode']['airDate'], compat_str))
+ release_date = unified_strdate(try_get(common_data, lambda x: x['episode']['availability'], compat_str))
+ thumbnails_data = try_get(common_data, lambda x: x['episode']['image']['sizes'], dict) or {}
+ thumbnails = [{
+ 'id': thumbnail_id,
+ 'url': thumbnail_url,
+ 'width': int_or_none(thumbnail_id[1:]),
+ } for thumbnail_id, thumbnail_url in thumbnails_data.items()]
+
+ return {
+ '_type': 'url_transparent',
+ 'url': smuggle_url(
+ self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
+ {'geo_countries': self._GEO_COUNTRIES}),
+ 'id': video_id,
+ 'title': title,
+ 'description': try_get(common_data, lambda x: x['episode']['description'], compat_str),
+ 'duration': float_or_none(try_get(common_data, lambda x: x['episode']['video']['duration'], float), 1000),
+ 'thumbnails': thumbnails,
+ 'ie_key': 'BrightcoveNew',
+ 'season_number': season_number,
+ 'episode_number': episode_number,
+ 'timestamp': timestamp,
+ 'release_date': release_date,
+ }
diff --git a/yt_dlp/extractor/nintendo.py b/yt_dlp/extractor/nintendo.py
new file mode 100644
index 0000000..853a169
--- /dev/null
+++ b/yt_dlp/extractor/nintendo.py
@@ -0,0 +1,131 @@
+import json
+import urllib.parse
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ make_archive_id,
+ unified_timestamp,
+ urljoin,
+)
+from ..utils.traversal import traverse_obj
+
+
+class NintendoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?nintendo\.com/(?:(?P<locale>\w{2}(?:-\w{2})?)/)?nintendo-direct/(?P<slug>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'https://www.nintendo.com/nintendo-direct/09-04-2019/',
+ 'info_dict': {
+ 'ext': 'mp4',
+ 'id': '2oPmiviVePUA1IqAZzjuVh',
+ 'display_id': '09-04-2019',
+ 'title': 'Nintendo Direct 9.4.2019',
+ 'timestamp': 1567580400,
+ 'description': 'md5:8aac2780361d8cb772b6d1de66d7d6f4',
+ 'upload_date': '20190904',
+ 'age_limit': 17,
+ '_old_archive_ids': ['nintendo J2bXdmaTE6fe3dWJTPcc7m23FNbc_A1V'],
+ },
+ }, {
+ 'url': 'https://www.nintendo.com/en-ca/nintendo-direct/08-31-2023/',
+ 'info_dict': {
+ 'ext': 'mp4',
+ 'id': '2TB2w2rJhNYF84qQ9E57hU',
+ 'display_id': '08-31-2023',
+ 'title': 'Super Mario Bros. Wonder Direct 8.31.2023',
+ 'timestamp': 1693465200,
+ 'description': 'md5:3067c5b824bcfdae9090a7f38ab2d200',
+ 'tags': ['Mild Fantasy Violence', 'In-Game Purchases'],
+ 'upload_date': '20230831',
+ 'age_limit': 6,
+ },
+ }, {
+ 'url': 'https://www.nintendo.com/us/nintendo-direct/50-fact-extravaganza/',
+ 'info_dict': {
+ 'ext': 'mp4',
+ 'id': 'j0BBGzfw0pQ',
+ 'channel_follower_count': int,
+ 'view_count': int,
+ 'description': 'Learn new details about Super Smash Bros. for Wii U, which launches on November 21.',
+ 'duration': 2123,
+ 'availability': 'public',
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/j0BBGzfw0pQ/maxresdefault.webp',
+ 'timestamp': 1414047600,
+ 'channel_id': 'UCGIY_O-8vW4rfX98KlMkvRg',
+ 'chapters': 'count:53',
+ 'heatmap': 'count:100',
+ 'upload_date': '20141023',
+ 'uploader_id': '@NintendoAmerica',
+ 'playable_in_embed': True,
+ 'categories': ['Gaming'],
+ 'display_id': '50-fact-extravaganza',
+ 'channel': 'Nintendo of America',
+ 'tags': ['Comic Mischief', 'Cartoon Violence', 'Mild Suggestive Themes'],
+ 'like_count': int,
+ 'channel_url': 'https://www.youtube.com/channel/UCGIY_O-8vW4rfX98KlMkvRg',
+ 'age_limit': 10,
+ 'uploader_url': 'https://www.youtube.com/@NintendoAmerica',
+ 'comment_count': int,
+ 'live_status': 'not_live',
+ 'uploader': 'Nintendo of America',
+ 'title': '50-FACT Extravaganza',
+ },
+ }]
+
+ def _create_asset_url(self, path):
+ return urljoin('https://assets.nintendo.com/', urllib.parse.quote(path))
+
+ def _real_extract(self, url):
+ locale, slug = self._match_valid_url(url).group('locale', 'slug')
+
+ language, _, country = (locale or 'US').rpartition('-')
+ parsed_locale = f'{language.lower() or "en"}_{country.upper()}'
+ self.write_debug(f'Using locale {parsed_locale} (from {locale})', only_once=True)
+
+ response = self._download_json('https://graph.nintendo.com/', slug, query={
+ 'operationName': 'NintendoDirect',
+ 'variables': json.dumps({
+ 'locale': parsed_locale,
+ 'slug': slug,
+ }, separators=(',', ':')),
+ 'extensions': json.dumps({
+ 'persistedQuery': {
+ 'version': 1,
+ 'sha256Hash': '969b16fe9f08b686fa37bc44d1fd913b6188e65794bb5e341c54fa683a8004cb'
+ },
+ }, separators=(',', ':')),
+ })
+ # API returns `{"data": {"direct": null}}` if no matching id
+ direct_info = traverse_obj(response, ('data', 'direct', {dict}))
+ if not direct_info:
+ raise ExtractorError(f'No Nintendo Direct with id {slug} exists', expected=True)
+
+ errors = ', '.join(traverse_obj(response, ('errors', ..., 'message')))
+ if errors:
+ raise ExtractorError(f'GraphQL API error: {errors or "Unknown error"}')
+
+ result = traverse_obj(direct_info, {
+ 'id': ('id', {str}),
+ 'title': ('name', {str}),
+ 'timestamp': ('startDate', {unified_timestamp}),
+ 'description': ('description', 'text', {str}),
+ 'age_limit': ('contentRating', 'order', {int}),
+ 'tags': ('contentDescriptors', ..., 'label', {str}),
+ 'thumbnail': ('thumbnail', {self._create_asset_url}),
+ })
+ result['display_id'] = slug
+
+ asset_id = traverse_obj(direct_info, ('video', 'publicId', {str}))
+ if not asset_id:
+ youtube_id = traverse_obj(direct_info, ('liveStream', {str}))
+ if not youtube_id:
+ self.raise_no_formats('Could not find any video formats', video_id=slug)
+
+ return self.url_result(youtube_id, **result, url_transparent=True)
+
+ if asset_id.startswith('Legacy Videos/'):
+ result['_old_archive_ids'] = [make_archive_id(self, asset_id[14:])]
+ result['formats'] = self._extract_m3u8_formats(
+ self._create_asset_url(f'/video/upload/sp_full_hd/v1/{asset_id}.m3u8'), slug)
+
+ return result
diff --git a/yt_dlp/extractor/nitter.py b/yt_dlp/extractor/nitter.py
new file mode 100644
index 0000000..35d1311
--- /dev/null
+++ b/yt_dlp/extractor/nitter.py
@@ -0,0 +1,360 @@
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+from ..utils import (
+ parse_count,
+ unified_timestamp,
+ remove_end,
+ determine_ext,
+)
+import re
+import random
+
+
+class NitterIE(InfoExtractor):
+ # Taken from https://github.com/zedeus/nitter/wiki/Instances
+
+ NON_HTTP_INSTANCES = (
+ '3nzoldnxplag42gqjs23xvghtzf6t6yzssrtytnntc6ppc7xxuoneoad.onion',
+ 'nitter.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd.onion',
+ 'nitter7bryz3jv7e3uekphigvmoyoem4al3fynerxkj22dmoxoq553qd.onion',
+ 'npf37k3mtzwxreiw52ccs5ay4e6qt2fkcs2ndieurdyn2cuzzsfyfvid.onion',
+ 'nitter.v6vgyqpa7yefkorazmg5d5fimstmvm2vtbirt6676mt7qmllrcnwycqd.onion',
+ 'i23nv6w3juvzlw32xzoxcqzktegd4i4fu3nmnc2ewv4ggiu4ledwklad.onion',
+ '26oq3gioiwcmfojub37nz5gzbkdiqp7fue5kvye7d4txv4ny6fb4wwid.onion',
+ 'vfaomgh4jxphpbdfizkm5gbtjahmei234giqj4facbwhrfjtcldauqad.onion',
+ 'iwgu3cv7ywf3gssed5iqtavmrlszgsxazkmwwnt4h2kdait75thdyrqd.onion',
+ 'erpnncl5nhyji3c32dcfmztujtl3xaddqb457jsbkulq24zqq7ifdgad.onion',
+ 'ckzuw5misyahmg7j5t5xwwuj3bwy62jfolxyux4brfflramzsvvd3syd.onion',
+ 'jebqj47jgxleaiosfcxfibx2xdahjettuydlxbg64azd4khsxv6kawid.onion',
+ 'nttr2iupbb6fazdpr2rgbooon2tzbbsvvkagkgkwohhodjzj43stxhad.onion',
+ 'nitraeju2mipeziu2wtcrqsxg7h62v5y4eqgwi75uprynkj74gevvuqd.onion',
+ 'nitter.lqs5fjmajyp7rvp4qvyubwofzi6d4imua7vs237rkc4m5qogitqwrgyd.onion',
+ 'ibsboeui2im5o7dxnik3s5yghufumgy5abevtij5nbizequfpu4qi4ad.onion',
+ 'ec5nvbycpfa5k6ro77blxgkyrzbkv7uy6r5cngcbkadtjj2733nm3uyd.onion',
+
+ 'nitter.i2p',
+ 'u6ikd6zndl3c4dsdq4mmujpntgeevdk5qzkfb57r4tnfeccrn2qa.b32.i2p',
+
+ 'nitterlgj3n5fgwesu3vxc5h67ruku33nqaoeoocae2mvlzhsu6k7fqd.onion',
+ )
+
+ HTTP_INSTANCES = (
+ 'nitter.lacontrevoie.fr',
+ 'nitter.fdn.fr',
+ 'nitter.1d4.us',
+ 'nitter.kavin.rocks',
+ 'nitter.unixfox.eu',
+ 'nitter.domain.glass',
+ 'nitter.namazso.eu',
+ 'birdsite.xanny.family',
+ 'nitter.moomoo.me',
+ 'bird.trom.tf',
+ 'nitter.it',
+ 'twitter.censors.us',
+ 'nitter.grimneko.de',
+ 'twitter.076.ne.jp',
+ 'nitter.fly.dev',
+ 'notabird.site',
+ 'nitter.weiler.rocks',
+ 'nitter.sethforprivacy.com',
+ 'nitter.cutelab.space',
+ 'nitter.nl',
+ 'nitter.mint.lgbt',
+ 'nitter.bus-hit.me',
+ 'nitter.esmailelbob.xyz',
+ 'tw.artemislena.eu',
+ 'nitter.winscloud.net',
+ 'nitter.tiekoetter.com',
+ 'nitter.spaceint.fr',
+ 'nitter.privacy.com.de',
+ 'nitter.poast.org',
+ 'nitter.bird.froth.zone',
+ 'nitter.dcs0.hu',
+ 'twitter.dr460nf1r3.org',
+ 'nitter.garudalinux.org',
+ 'twitter.femboy.hu',
+ 'nitter.cz',
+ 'nitter.privacydev.net',
+ 'nitter.evil.site',
+ 'tweet.lambda.dance',
+ 'nitter.kylrth.com',
+ 'nitter.foss.wtf',
+ 'nitter.priv.pw',
+ 'nitter.tokhmi.xyz',
+ 'nitter.catalyst.sx',
+ 'unofficialbird.com',
+ 'nitter.projectsegfau.lt',
+ 'nitter.eu.projectsegfau.lt',
+ 'singapore.unofficialbird.com',
+ 'canada.unofficialbird.com',
+ 'india.unofficialbird.com',
+ 'nederland.unofficialbird.com',
+ 'uk.unofficialbird.com',
+ 'n.l5.ca',
+ 'nitter.slipfox.xyz',
+ 'nitter.soopy.moe',
+ 'nitter.qwik.space',
+ 'read.whatever.social',
+ 'nitter.rawbit.ninja',
+ 'nt.vern.cc',
+ 'ntr.odyssey346.dev',
+ 'nitter.ir',
+ 'nitter.privacytools.io',
+ 'nitter.sneed.network',
+ 'n.sneed.network',
+ 'nitter.manasiwibi.com',
+ 'nitter.smnz.de',
+ 'nitter.twei.space',
+ 'nitter.inpt.fr',
+ 'nitter.d420.de',
+ 'nitter.caioalonso.com',
+ 'nitter.at',
+ 'nitter.drivet.xyz',
+ 'nitter.pw',
+ 'nitter.nicfab.eu',
+ 'bird.habedieeh.re',
+ 'nitter.hostux.net',
+ 'nitter.adminforge.de',
+ 'nitter.platypush.tech',
+ 'nitter.mask.sh',
+ 'nitter.pufe.org',
+ 'nitter.us.projectsegfau.lt',
+ 'nitter.arcticfoxes.net',
+ 't.com.sb',
+ 'nitter.kling.gg',
+ 'nitter.ktachibana.party',
+ 'nitter.riverside.rocks',
+ 'nitter.girlboss.ceo',
+ 'nitter.lunar.icu',
+ 'twitter.moe.ngo',
+ 'nitter.freedit.eu',
+ 'ntr.frail.duckdns.org',
+ 'nitter.librenode.org',
+ 'n.opnxng.com',
+ 'nitter.plus.st',
+ )
+
+ DEAD_INSTANCES = (
+ # maintenance
+ 'nitter.ethibox.fr',
+
+ # official, rate limited
+ 'nitter.net',
+ # offline
+ 'is-nitter.resolv.ee',
+ 'lu-nitter.resolv.ee',
+ 'nitter.13ad.de',
+ 'nitter.40two.app',
+ 'nitter.cattube.org',
+ 'nitter.cc',
+ 'nitter.dark.fail',
+ 'nitter.himiko.cloud',
+ 'nitter.koyu.space',
+ 'nitter.mailstation.de',
+ 'nitter.mastodont.cat',
+ 'nitter.tedomum.net',
+ 'nitter.tokhmi.xyz',
+ 'nitter.weaponizedhumiliation.com',
+ 'nitter.vxempire.xyz',
+ 'tweet.lambda.dance',
+ 'nitter.ca',
+ 'nitter.42l.fr',
+ 'nitter.pussthecat.org',
+ 'nitter.nixnet.services',
+ 'nitter.eu',
+ 'nitter.actionsack.com',
+ 'nitter.hu',
+ 'twitr.gq',
+ 'nittereu.moomoo.me',
+ 'bird.from.tf',
+ 'twitter.grimneko.de',
+ 'nitter.alefvanoon.xyz',
+ 'n.hyperborea.cloud',
+ 'twitter.mstdn.social',
+ 'nitter.silkky.cloud',
+ 'nttr.stream',
+ 'fuckthesacklers.network',
+ 'nitter.govt.land',
+ 'nitter.datatunnel.xyz',
+ 'de.nttr.stream',
+ 'twtr.bch.bar',
+ 'nitter.exonip.de',
+ 'nitter.mastodon.pro',
+ 'nitter.notraxx.ch',
+ 'nitter.skrep.in',
+ 'nitter.snopyta.org',
+ )
+
+ INSTANCES = NON_HTTP_INSTANCES + HTTP_INSTANCES + DEAD_INSTANCES
+
+ _INSTANCES_RE = f'(?:{"|".join(map(re.escape, INSTANCES))})'
+ _VALID_URL = fr'https?://{_INSTANCES_RE}/(?P<uploader_id>.+)/status/(?P<id>[0-9]+)(#.)?'
+ current_instance = random.choice(HTTP_INSTANCES)
+
+ _TESTS = [
+ {
+ # GIF (wrapped in mp4)
+ 'url': f'https://{current_instance}/firefox/status/1314279897502629888#m',
+ 'info_dict': {
+ 'id': '1314279897502629888',
+ 'ext': 'mp4',
+ 'title': 'md5:7890a9277da4639ab624dd899424c5d8',
+ 'description': 'md5:5fea96a4d3716c350f8b95b21b3111fe',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'Firefox 🔥',
+ 'uploader_id': 'firefox',
+ 'uploader_url': f'https://{current_instance}/firefox',
+ 'upload_date': '20201008',
+ 'timestamp': 1602183720,
+ 'like_count': int,
+ 'repost_count': int,
+ 'comment_count': int,
+ },
+ }, { # normal video
+ 'url': f'https://{current_instance}/Le___Doc/status/1299715685392756737#m',
+ 'info_dict': {
+ 'id': '1299715685392756737',
+ 'ext': 'mp4',
+ 'title': 're:^.* - "Je ne prédis jamais rien"\nD Raoult, Août 2020...',
+ 'description': '"Je ne prédis jamais rien"\nD Raoult, Août 2020...',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 're:^Le *Doc',
+ 'uploader_id': 'Le___Doc',
+ 'uploader_url': f'https://{current_instance}/Le___Doc',
+ 'upload_date': '20200829',
+ 'timestamp': 1598711340,
+ 'view_count': int,
+ 'like_count': int,
+ 'repost_count': int,
+ 'comment_count': int,
+ },
+ }, { # video embed in a "Streaming Political Ads" box
+ 'url': f'https://{current_instance}/mozilla/status/1321147074491092994#m',
+ 'info_dict': {
+ 'id': '1321147074491092994',
+ 'ext': 'mp4',
+ 'title': 'md5:8290664aabb43b9189145c008386bf12',
+ 'description': 'md5:9cf2762d49674bc416a191a689fb2aaa',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'Mozilla',
+ 'uploader_id': 'mozilla',
+ 'uploader_url': f'https://{current_instance}/mozilla',
+ 'upload_date': '20201027',
+ 'timestamp': 1603820940,
+ 'view_count': int,
+ 'like_count': int,
+ 'repost_count': int,
+ 'comment_count': int,
+ },
+ 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
+ }, { # not the first tweet but main-tweet
+ 'url': f'https://{current_instance}/firefox/status/1354848277481414657#m',
+ 'info_dict': {
+ 'id': '1354848277481414657',
+ 'ext': 'mp4',
+ 'title': 'md5:bef647f03bd1c6b15b687ea70dfc9700',
+ 'description': 'md5:5efba25e2f9dac85ebcd21160cb4341f',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'Firefox 🔥',
+ 'uploader_id': 'firefox',
+ 'uploader_url': f'https://{current_instance}/firefox',
+ 'upload_date': '20210128',
+ 'timestamp': 1611855960,
+ 'view_count': int,
+ 'like_count': int,
+ 'repost_count': int,
+ 'comment_count': int,
+ }
+ }, { # no OpenGraph title
+ 'url': f'https://{current_instance}/LocalBateman/status/1678455464038735895#m',
+ 'info_dict': {
+ 'id': '1678455464038735895',
+ 'ext': 'mp4',
+ 'title': 'Your Typical Local Man - Local man, what did Romanians ever do to you?',
+ 'description': 'Local man, what did Romanians ever do to you?',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'Your Typical Local Man',
+ 'uploader_id': 'LocalBateman',
+ 'uploader_url': f'https://{current_instance}/LocalBateman',
+ 'upload_date': '20230710',
+ 'timestamp': 1689009900,
+ 'view_count': int,
+ 'like_count': int,
+ 'repost_count': int,
+ 'comment_count': int,
+ },
+ 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
+ 'params': {'skip_download': 'm3u8'},
+ }
+ ]
+
+ def _real_extract(self, url):
+ video_id, uploader_id = self._match_valid_url(url).group('id', 'uploader_id')
+ parsed_url = compat_urlparse.urlparse(url)
+ base_url = f'{parsed_url.scheme}://{parsed_url.netloc}'
+
+ self._set_cookie(parsed_url.netloc, 'hlsPlayback', 'on')
+ full_webpage = webpage = self._download_webpage(url, video_id)
+
+ main_tweet_start = full_webpage.find('class="main-tweet"')
+ if main_tweet_start > 0:
+ webpage = full_webpage[main_tweet_start:]
+
+ video_url = '%s%s' % (base_url, self._html_search_regex(
+ r'(?:<video[^>]+data-url|<source[^>]+src)="([^"]+)"', webpage, 'video url'))
+ ext = determine_ext(video_url)
+
+ if ext == 'unknown_video':
+ formats = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
+ else:
+ formats = [{
+ 'url': video_url,
+ 'ext': ext
+ }]
+
+ title = description = self._og_search_description(full_webpage, default=None) or self._html_search_regex(
+ r'<div class="tweet-content[^>]+>([^<]+)</div>', webpage, 'title', fatal=False)
+
+ uploader_id = self._html_search_regex(
+ r'<a class="username"[^>]+title="@([^"]+)"', webpage, 'uploader id', fatal=False) or uploader_id
+
+ uploader = self._html_search_regex(
+ r'<a class="fullname"[^>]+title="([^"]+)"', webpage, 'uploader name', fatal=False)
+ if uploader:
+ title = f'{uploader} - {title}'
+
+ counts = {
+ f'{x[0]}_count': self._html_search_regex(
+ fr'<span[^>]+class="icon-{x[1]}[^>]*></span>([^<]*)</div>',
+ webpage, f'{x[0]} count', fatal=False)
+ for x in (('view', 'play'), ('like', 'heart'), ('repost', 'retweet'), ('comment', 'comment'))
+ }
+ counts = {field: 0 if count == '' else parse_count(count) for field, count in counts.items()}
+
+ thumbnail = (
+ self._html_search_meta('og:image', full_webpage, 'thumbnail url')
+ or remove_end('%s%s' % (base_url, self._html_search_regex(
+ r'<video[^>]+poster="([^"]+)"', webpage, 'thumbnail url', fatal=False)), '%3Asmall'))
+
+ thumbnails = [
+ {'id': id, 'url': f'{thumbnail}%3A{id}'}
+ for id in ('thumb', 'small', 'large', 'medium', 'orig')
+ ]
+
+ date = self._html_search_regex(
+ r'<span[^>]+class="tweet-date"[^>]*><a[^>]+title="([^"]+)"',
+ webpage, 'upload date', default='').replace('·', '')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'uploader': uploader,
+ 'timestamp': unified_timestamp(date),
+ 'uploader_id': uploader_id,
+ 'uploader_url': f'{base_url}/{uploader_id}',
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'thumbnail': thumbnail,
+ **counts,
+ }
diff --git a/yt_dlp/extractor/nobelprize.py b/yt_dlp/extractor/nobelprize.py
new file mode 100644
index 0000000..cddc72f
--- /dev/null
+++ b/yt_dlp/extractor/nobelprize.py
@@ -0,0 +1,59 @@
+from .common import InfoExtractor
+from ..utils import (
+ js_to_json,
+ mimetype2ext,
+ determine_ext,
+ update_url_query,
+ get_element_by_attribute,
+ int_or_none,
+)
+
+
+class NobelPrizeIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:www\.)?nobelprize\.org/mediaplayer.*?\bid=(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://www.nobelprize.org/mediaplayer/?id=2636',
+ 'md5': '04c81e5714bb36cc4e2232fee1d8157f',
+ 'info_dict': {
+ 'id': '2636',
+ 'ext': 'mp4',
+ 'title': 'Announcement of the 2016 Nobel Prize in Physics',
+ 'description': 'md5:05beba57f4f5a4bbd4cf2ef28fcff739',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ media = self._parse_json(self._search_regex(
+ r'(?s)var\s*config\s*=\s*({.+?});', webpage,
+ 'config'), video_id, js_to_json)['media']
+ title = media['title']
+
+ formats = []
+ for source in media.get('source', []):
+ source_src = source.get('src')
+ if not source_src:
+ continue
+ ext = mimetype2ext(source.get('type')) or determine_ext(source_src)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ source_src, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ update_url_query(source_src, {'hdcore': '3.7.0'}),
+ video_id, f4m_id='hds', fatal=False))
+ else:
+ formats.append({
+ 'url': source_src,
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': get_element_by_attribute('itemprop', 'description', webpage),
+ 'duration': int_or_none(media.get('duration')),
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/noice.py b/yt_dlp/extractor/noice.py
new file mode 100644
index 0000000..e6e3433
--- /dev/null
+++ b/yt_dlp/extractor/noice.py
@@ -0,0 +1,116 @@
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ determine_ext,
+ int_or_none,
+ parse_iso8601,
+ traverse_obj,
+ variadic,
+)
+
+
+class NoicePodcastIE(InfoExtractor):
+ _VALID_URL = r'https?://open\.noice\.id/content/(?P<id>[a-fA-F0-9-]+)'
+ _TESTS = [{
+ 'url': 'https://open.noice.id/content/7694bb04-ff0f-40fa-a60b-5b39f29584b2',
+ 'info_dict': {
+ 'id': '7694bb04-ff0f-40fa-a60b-5b39f29584b2',
+ 'ext': 'm4a',
+ 'season': 'Season 1',
+ 'description': 'md5:58d1274e6857b6fbbecf47075885380d',
+ 'release_date': '20221115',
+ 'timestamp': 1668496642,
+ 'season_number': 1,
+ 'upload_date': '20221115',
+ 'release_timestamp': 1668496642,
+ 'title': 'Eps 1. Belajar dari Wishnutama: Kreatif Bukan Followers! (bersama Wishnutama)',
+ 'modified_date': '20221121',
+ 'categories': ['Bisnis dan Keuangan'],
+ 'duration': 3567,
+ 'modified_timestamp': 1669030647,
+ 'thumbnail': 'https://images.noiceid.cc/catalog/content-1668496302560',
+ 'channel_id': '9dab1024-5b92-4265-ae1c-63da87359832',
+ 'like_count': int,
+ 'channel': 'Noice Space Talks',
+ 'comment_count': int,
+ 'dislike_count': int,
+ 'channel_follower_count': int,
+ }
+ }, {
+ 'url': 'https://open.noice.id/content/222134e4-99f2-456f-b8a2-b8be404bf063',
+ 'info_dict': {
+ 'id': '222134e4-99f2-456f-b8a2-b8be404bf063',
+ 'ext': 'm4a',
+ 'release_timestamp': 1653488220,
+ 'description': 'md5:35074f6190cef52b05dd133bb2ef460e',
+ 'upload_date': '20220525',
+ 'timestamp': 1653460637,
+ 'release_date': '20220525',
+ 'thumbnail': 'https://images.noiceid.cc/catalog/content-1653460337625',
+ 'title': 'Eps 1: Dijodohin Sama Anak Pak RT',
+ 'modified_timestamp': 1669030647,
+ 'season_number': 1,
+ 'modified_date': '20221121',
+ 'categories': ['Cerita dan Drama'],
+ 'duration': 1830,
+ 'season': 'Season 1',
+ 'channel_id': '60193f6b-d24d-4b23-913b-ceed5a731e74',
+ 'dislike_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'channel': 'Dear Jerome',
+ 'channel_follower_count': int,
+ }
+ }]
+
+ def _get_formats_and_subtitles(self, media_url, video_id):
+ formats, subtitles = [], {}
+ for url in variadic(media_url):
+ ext = determine_ext(url)
+ if ext == 'm3u8':
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(url, video_id)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+ else:
+ formats.append({
+ 'url': url,
+ 'ext': 'mp3',
+ 'vcodec': 'none',
+ 'acodec': 'mp3',
+ })
+ return formats, subtitles
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ nextjs_data = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['contentDetails']
+
+ media_url_list = traverse_obj(nextjs_data, (('rawContentUrl', 'url'), ))
+ formats, subtitles = self._get_formats_and_subtitles(media_url_list, display_id)
+
+ return {
+ 'id': nextjs_data.get('id') or display_id,
+ 'title': nextjs_data.get('title') or self._html_search_meta('og:title', webpage),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'description': (nextjs_data.get('description') or clean_html(nextjs_data.get('htmlDescription'))
+ or self._html_search_meta(['description', 'og:description'], webpage)),
+ 'thumbnail': nextjs_data.get('image') or self._html_search_meta('og:image', webpage),
+ 'timestamp': parse_iso8601(nextjs_data.get('createdAt')),
+ 'release_timestamp': parse_iso8601(nextjs_data.get('publishedAt')),
+ 'modified_timestamp': parse_iso8601(
+ nextjs_data.get('updatedAt') or self._html_search_meta('og:updated_time', webpage)),
+ 'duration': int_or_none(nextjs_data.get('duration')),
+ 'categories': traverse_obj(nextjs_data, ('genres', ..., 'name')),
+ 'season': nextjs_data.get('seasonName'),
+ 'season_number': int_or_none(nextjs_data.get('seasonNumber')),
+ 'channel': traverse_obj(nextjs_data, ('catalog', 'title')),
+ 'channel_id': traverse_obj(nextjs_data, ('catalog', 'id'), 'catalogId'),
+ **traverse_obj(nextjs_data, ('meta', 'aggregations', {
+ 'like_count': 'likes',
+ 'dislike_count': 'dislikes',
+ 'comment_count': 'comments',
+ 'channel_follower_count': 'followers',
+ }))
+ }
diff --git a/yt_dlp/extractor/nonktube.py b/yt_dlp/extractor/nonktube.py
new file mode 100644
index 0000000..f191be3
--- /dev/null
+++ b/yt_dlp/extractor/nonktube.py
@@ -0,0 +1,36 @@
+from .nuevo import NuevoBaseIE
+
+
+class NonkTubeIE(NuevoBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?nonktube\.com/(?:(?:video|embed)/|media/nuevo/embed\.php\?.*?\bid=)(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.nonktube.com/video/118636/sensual-wife-uncensored-fucked-in-hairy-pussy-and-facialized',
+ 'info_dict': {
+ 'id': '118636',
+ 'ext': 'mp4',
+ 'title': 'Sensual Wife Uncensored Fucked In Hairy Pussy And Facialized',
+ 'age_limit': 18,
+ 'duration': 1150.98,
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'https://www.nonktube.com/embed/118636',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._og_search_title(webpage)
+ info = self._parse_html5_media_entries(url, webpage, video_id)[0]
+
+ info.update({
+ 'id': video_id,
+ 'title': title,
+ 'age_limit': 18,
+ })
+ return info
diff --git a/yt_dlp/extractor/noodlemagazine.py b/yt_dlp/extractor/noodlemagazine.py
new file mode 100644
index 0000000..1c1a763
--- /dev/null
+++ b/yt_dlp/extractor/noodlemagazine.py
@@ -0,0 +1,80 @@
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_count,
+ parse_duration,
+ unified_strdate,
+ urljoin,
+)
+from ..utils.traversal import traverse_obj
+
+
+class NoodleMagazineIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www|adult\.)?noodlemagazine\.com/watch/(?P<id>[0-9-_]+)'
+ _TEST = {
+ 'url': 'https://adult.noodlemagazine.com/watch/-67421364_456239604',
+ 'md5': '9e02aa763612929d0b4b850591a9248b',
+ 'info_dict': {
+ 'id': '-67421364_456239604',
+ 'title': 'Aria alexander manojob',
+ 'thumbnail': r're:^https://.*\.jpg',
+ 'ext': 'mp4',
+ 'duration': 903,
+ 'view_count': int,
+ 'like_count': int,
+ 'description': 'Aria alexander manojob',
+ 'tags': ['aria', 'alexander', 'manojob'],
+ 'upload_date': '20190218',
+ 'age_limit': 18
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ title = self._og_search_title(webpage)
+ duration = parse_duration(self._html_search_meta('video:duration', webpage, 'duration', default=None))
+ description = self._og_search_property('description', webpage, default='').replace(' watch online hight quality video', '')
+ tags = self._html_search_meta('video:tag', webpage, default='').split(', ')
+ view_count = parse_count(self._html_search_meta('ya:ovs:views_total', webpage, default=None))
+ like_count = parse_count(self._html_search_meta('ya:ovs:likes', webpage, default=None))
+ upload_date = unified_strdate(self._html_search_meta('ya:ovs:upload_date', webpage, default=''))
+
+ def build_url(url_or_path):
+ return urljoin('https://adult.noodlemagazine.com', url_or_path)
+
+ headers = {'Referer': url}
+ player_path = self._html_search_regex(
+ r'<iframe[^>]+\bid="iplayer"[^>]+\bsrc="([^"]+)"', webpage, 'player path')
+ player_iframe = self._download_webpage(
+ build_url(player_path), video_id, 'Downloading iframe page', headers=headers)
+ playlist_url = self._search_regex(
+ r'window\.playlistUrl\s*=\s*["\']([^"\']+)["\']', player_iframe, 'playlist url')
+ playlist_info = self._download_json(build_url(playlist_url), video_id, headers=headers)
+
+ formats = []
+ for source in traverse_obj(playlist_info, ('sources', lambda _, v: v['file'])):
+ if source.get('type') == 'hls':
+ formats.extend(self._extract_m3u8_formats(
+ build_url(source['file']), video_id, 'mp4', fatal=False, m3u8_id='hls'))
+ else:
+ formats.append(traverse_obj(source, {
+ 'url': ('file', {build_url}),
+ 'format_id': 'label',
+ 'height': ('label', {int_or_none}),
+ 'ext': 'type',
+ }))
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': title,
+ 'thumbnail': self._og_search_property('image', webpage, default=None) or playlist_info.get('image'),
+ 'duration': duration,
+ 'description': description,
+ 'tags': tags,
+ 'view_count': view_count,
+ 'like_count': like_count,
+ 'upload_date': upload_date,
+ 'age_limit': 18
+ }
diff --git a/yt_dlp/extractor/noovo.py b/yt_dlp/extractor/noovo.py
new file mode 100644
index 0000000..acbb74c
--- /dev/null
+++ b/yt_dlp/extractor/noovo.py
@@ -0,0 +1,101 @@
+from .brightcove import BrightcoveNewIE
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ int_or_none,
+ js_to_json,
+ smuggle_url,
+ try_get,
+)
+
+
+class NoovoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[^/]+\.)?noovo\.ca/videos/(?P<id>[^/]+/[^/?#&]+)'
+ _TESTS = [{
+ # clip
+ 'url': 'http://noovo.ca/videos/rpm-plus/chrysler-imperial',
+ 'info_dict': {
+ 'id': '5386045029001',
+ 'ext': 'mp4',
+ 'title': 'Chrysler Imperial',
+ 'description': 'md5:de3c898d1eb810f3e6243e08c8b4a056',
+ 'timestamp': 1491399228,
+ 'upload_date': '20170405',
+ 'uploader_id': '618566855001',
+ 'series': 'RPM+',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # episode
+ 'url': 'http://noovo.ca/videos/l-amour-est-dans-le-pre/episode-13-8',
+ 'info_dict': {
+ 'id': '5395865725001',
+ 'title': 'Épisode 13 : Les retrouvailles',
+ 'description': 'md5:888c3330f0c1b4476c5bc99a1c040473',
+ 'ext': 'mp4',
+ 'timestamp': 1492019320,
+ 'upload_date': '20170412',
+ 'uploader_id': '618566855001',
+ 'series': "L'amour est dans le pré",
+ 'season_number': 5,
+ 'episode': 'Épisode 13',
+ 'episode_number': 13,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/618566855001/default_default/index.html?videoId=%s'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ brightcove_id = self._search_regex(
+ r'data-video-id=["\'](\d+)', webpage, 'brightcove id')
+
+ data = self._parse_json(
+ self._search_regex(
+ r'(?s)dataLayer\.push\(\s*({.+?})\s*\);', webpage, 'data',
+ default='{}'),
+ video_id, transform_source=js_to_json, fatal=False)
+
+ title = try_get(
+ data, lambda x: x['video']['nom'],
+ compat_str) or self._html_search_meta(
+ 'dcterms.Title', webpage, 'title', fatal=True)
+
+ description = self._html_search_meta(
+ ('dcterms.Description', 'description'), webpage, 'description')
+
+ series = try_get(
+ data, lambda x: x['emission']['nom']) or self._search_regex(
+ r'<div[^>]+class="banner-card__subtitle h4"[^>]*>([^<]+)',
+ webpage, 'series', default=None)
+
+ season_el = try_get(data, lambda x: x['emission']['saison'], dict) or {}
+ season = try_get(season_el, lambda x: x['nom'], compat_str)
+ season_number = int_or_none(try_get(season_el, lambda x: x['numero']))
+
+ episode_el = try_get(season_el, lambda x: x['episode'], dict) or {}
+ episode = try_get(episode_el, lambda x: x['nom'], compat_str)
+ episode_number = int_or_none(try_get(episode_el, lambda x: x['numero']))
+
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': BrightcoveNewIE.ie_key(),
+ 'url': smuggle_url(
+ self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
+ {'geo_countries': ['CA']}),
+ 'id': brightcove_id,
+ 'title': title,
+ 'description': description,
+ 'series': series,
+ 'season': season,
+ 'season_number': season_number,
+ 'episode': episode,
+ 'episode_number': episode_number,
+ }
diff --git a/yt_dlp/extractor/nosnl.py b/yt_dlp/extractor/nosnl.py
new file mode 100644
index 0000000..cea54c9
--- /dev/null
+++ b/yt_dlp/extractor/nosnl.py
@@ -0,0 +1,115 @@
+from .common import InfoExtractor
+from ..utils import parse_duration, parse_iso8601, traverse_obj
+
+
+class NOSNLArticleIE(InfoExtractor):
+ _VALID_URL = r'https?://nos\.nl/(?P<type>video|(\w+/)?\w+)/?\d+-(?P<display_id>[\w-]+)'
+ _TESTS = [
+ {
+ # only 1 video
+ 'url': 'https://nos.nl/nieuwsuur/artikel/2440353-verzakking-door-droogte-dreigt-tot-een-miljoen-kwetsbare-huizen',
+ 'info_dict': {
+ 'id': '2440340',
+ 'ext': 'mp4',
+ 'description': 'md5:5f83185d902ac97af3af4bed7ece3db5',
+ 'title': '\'We hebben een huis vol met scheuren\'',
+ 'duration': 95.0,
+ 'thumbnail': 'https://cdn.nos.nl/image/2022/08/12/887149/3840x2160a.jpg',
+ }
+ }, {
+ # more than 1 video
+ 'url': 'https://nos.nl/artikel/2440409-vannacht-sliepen-weer-enkele-honderden-asielzoekers-in-ter-apel-buiten',
+ 'info_dict': {
+ 'id': '2440409',
+ 'title': 'Vannacht sliepen weer enkele honderden asielzoekers in Ter Apel buiten',
+ 'description': 'md5:72b1e1674d798460e79d78fa37e9f56d',
+ 'tags': ['aanmeldcentrum', 'Centraal Orgaan opvang asielzoekers', 'COA', 'asielzoekers', 'Ter Apel'],
+ 'modified_timestamp': 1660452773,
+ 'modified_date': '20220814',
+ 'upload_date': '20220813',
+ 'thumbnail': 'https://cdn.nos.nl/image/2022/07/18/880346/1024x576a.jpg',
+ 'timestamp': 1660401384,
+ 'categories': ['Regionaal nieuws', 'Binnenland'],
+ },
+ 'playlist_count': 2,
+ }, {
+ # audio + video
+ 'url': 'https://nos.nl/artikel/2440789-wekdienst-16-8-groningse-acties-tien-jaar-na-zware-aardbeving-femke-bol-in-actie-op-ek-atletiek',
+ 'info_dict': {
+ 'id': '2440789',
+ 'title': 'Wekdienst 16/8: Groningse acties tien jaar na zware aardbeving • Femke Bol in actie op EK atletiek ',
+ 'description': 'md5:0bd277ed7a44fc15cb12a9d27d8f6641',
+ 'tags': ['wekdienst'],
+ 'modified_date': '20220816',
+ 'modified_timestamp': 1660625449,
+ 'timestamp': 1660625449,
+ 'upload_date': '20220816',
+ 'thumbnail': 'https://cdn.nos.nl/image/2022/08/16/888178/1024x576a.jpg',
+ 'categories': ['Binnenland', 'Buitenland'],
+ },
+ 'playlist_count': 2,
+ }, {
+ # video url
+ 'url': 'https://nos.nl/video/2452718-xi-en-trudeau-botsen-voor-de-camera-op-g20-top-je-hebt-gelekt',
+ 'info_dict': {
+ 'id': '2452718',
+ 'title': 'Xi en Trudeau botsen voor de camera op G20-top: \'Je hebt gelekt\'',
+ 'modified_date': '20221117',
+ 'description': 'md5:61907dac576f75c11bf8ffffd4a3cc0f',
+ 'tags': ['Xi', 'Trudeau', 'G20', 'indonesié'],
+ 'upload_date': '20221117',
+ 'thumbnail': 'https://cdn.nos.nl/image/2022/11/17/916155/1024x576a.jpg',
+ 'modified_timestamp': 1668663388,
+ 'timestamp': 1668663388,
+ 'categories': ['Buitenland'],
+ },
+ 'playlist_mincount': 1,
+ }
+ ]
+
+ def _entries(self, nextjs_json, display_id):
+ for item in nextjs_json:
+ if item.get('type') == 'video':
+ formats, subtitle = self._extract_m3u8_formats_and_subtitles(
+ traverse_obj(item, ('source', 'url')), display_id, ext='mp4')
+ yield {
+ 'id': str(item['id']),
+ 'title': item.get('title'),
+ 'description': item.get('description'),
+ 'formats': formats,
+ 'subtitles': subtitle,
+ 'duration': parse_duration(item.get('duration')),
+ 'thumbnails': [{
+ 'url': traverse_obj(image, ('url', ...), get_all=False),
+ 'width': image.get('width'),
+ 'height': image.get('height')
+ } for image in traverse_obj(item, ('imagesByRatio', ...))[0]],
+ }
+
+ elif item.get('type') == 'audio':
+ yield {
+ 'id': str(item['id']),
+ 'title': item.get('title'),
+ 'url': traverse_obj(item, ('media', 'src')),
+ 'ext': 'mp3',
+ }
+
+ def _real_extract(self, url):
+ site_type, display_id = self._match_valid_url(url).group('type', 'display_id')
+ webpage = self._download_webpage(url, display_id)
+
+ nextjs_json = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['data']
+ return {
+ '_type': 'playlist',
+ 'entries': self._entries(
+ [nextjs_json['video']] if site_type == 'video' else nextjs_json['items'], display_id),
+ 'id': str(nextjs_json['id']),
+ 'title': nextjs_json.get('title') or self._html_search_meta(['title', 'og:title', 'twitter:title'], webpage),
+ 'description': (nextjs_json.get('description')
+ or self._html_search_meta(['description', 'twitter:description', 'og:description'], webpage)),
+ 'tags': nextjs_json.get('keywords'),
+ 'modified_timestamp': parse_iso8601(nextjs_json.get('modifiedAt')),
+ 'thumbnail': nextjs_json.get('shareImageSrc') or self._html_search_meta(['og:image', 'twitter:image'], webpage),
+ 'timestamp': parse_iso8601(nextjs_json.get('publishedAt')),
+ 'categories': traverse_obj(nextjs_json, ('categories', ..., 'label')),
+ }
diff --git a/yt_dlp/extractor/nova.py b/yt_dlp/extractor/nova.py
new file mode 100644
index 0000000..72884aa
--- /dev/null
+++ b/yt_dlp/extractor/nova.py
@@ -0,0 +1,307 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ determine_ext,
+ int_or_none,
+ js_to_json,
+ traverse_obj,
+ unified_strdate,
+ url_or_none,
+)
+
+
+class NovaEmbedIE(InfoExtractor):
+ _VALID_URL = r'https?://media(?:tn)?\.cms\.nova\.cz/embed/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://media.cms.nova.cz/embed/8o0n0r?autoplay=1',
+ 'info_dict': {
+ 'id': '8o0n0r',
+ 'title': '2180. díl',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 2578,
+ },
+ 'params': {
+ 'skip_download': True,
+ 'ignore_no_formats_error': True,
+ },
+ 'expected_warnings': ['DRM protected', 'Requested format is not available'],
+ }, {
+ 'url': 'https://media.cms.nova.cz/embed/KybpWYvcgOa',
+ 'info_dict': {
+ 'id': 'KybpWYvcgOa',
+ 'ext': 'mp4',
+ 'title': 'Borhyová oslavila 60? Soutěžící z pořadu odboural moderátora Ondřeje Sokola',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 114,
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://mediatn.cms.nova.cz/embed/EU5ELEsmOHt?autoplay=1',
+ 'info_dict': {
+ 'id': 'EU5ELEsmOHt',
+ 'ext': 'mp4',
+ 'title': 'Haptické křeslo, bionická ruka nebo roboti. Reportérka se podívala na Týden inovací',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 1780,
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ has_drm = False
+ duration = None
+ formats = []
+
+ def process_format_list(format_list, format_id=""):
+ nonlocal formats, has_drm
+ if not isinstance(format_list, list):
+ format_list = [format_list]
+ for format_dict in format_list:
+ if not isinstance(format_dict, dict):
+ continue
+ if (not self.get_param('allow_unplayable_formats')
+ and traverse_obj(format_dict, ('drm', 'keySystem'))):
+ has_drm = True
+ continue
+ format_url = url_or_none(format_dict.get('src'))
+ format_type = format_dict.get('type')
+ ext = determine_ext(format_url)
+ if (format_type == 'application/x-mpegURL'
+ or format_id == 'HLS' or ext == 'm3u8'):
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls',
+ fatal=False))
+ elif (format_type == 'application/dash+xml'
+ or format_id == 'DASH' or ext == 'mpd'):
+ formats.extend(self._extract_mpd_formats(
+ format_url, video_id, mpd_id='dash', fatal=False))
+ else:
+ formats.append({
+ 'url': format_url,
+ })
+
+ player = self._search_json(
+ r'player:', webpage, 'player', video_id, fatal=False, end_pattern=r';\s*</script>')
+ if player:
+ for src in traverse_obj(player, ('lib', 'source', 'sources', ...)):
+ process_format_list(src)
+ duration = traverse_obj(player, ('sourceInfo', 'duration', {int_or_none}))
+ if not formats and not has_drm:
+ # older code path, in use before August 2023
+ player = self._parse_json(
+ self._search_regex(
+ (r'(?:(?:replacePlaceholders|processAdTagModifier).*?:\s*)?(?:replacePlaceholders|processAdTagModifier)\s*\(\s*(?P<json>{.*?})\s*\)(?:\s*\))?\s*,',
+ r'Player\.init\s*\([^,]+,(?P<cndn>\s*\w+\s*\?)?\s*(?P<json>{(?(cndn).+?|.+)})\s*(?(cndn):|,\s*{.+?}\s*\)\s*;)'),
+ webpage, 'player', group='json'), video_id)
+ if player:
+ for format_id, format_list in player['tracks'].items():
+ process_format_list(format_list, format_id)
+ duration = int_or_none(player.get('duration'))
+
+ if not formats and has_drm:
+ self.report_drm(video_id)
+
+ title = self._og_search_title(
+ webpage, default=None) or self._search_regex(
+ (r'<value>(?P<title>[^<]+)',
+ r'videoTitle\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
+ 'title', group='value')
+ thumbnail = self._og_search_thumbnail(
+ webpage, default=None) or self._search_regex(
+ r'poster\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
+ 'thumbnail', fatal=False, group='value')
+ duration = int_or_none(self._search_regex(
+ r'videoDuration\s*:\s*(\d+)', webpage, 'duration',
+ default=duration))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'formats': formats,
+ }
+
+
+class NovaIE(InfoExtractor):
+ IE_DESC = 'TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz'
+ _VALID_URL = r'https?://(?:[^.]+\.)?(?P<site>tv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P<id>[^/]+?)(?:\.html|/|$)'
+ _TESTS = [{
+ 'url': 'http://tn.nova.cz/clanek/tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci.html#player_13260',
+ 'md5': 'da8f3f1fcdaf9fb0f112a32a165760a3',
+ 'info_dict': {
+ 'id': '8OvQqEvV3MW',
+ 'display_id': '8OvQqEvV3MW',
+ 'ext': 'mp4',
+ 'title': 'Podzemní nemocnice v pražské Krči',
+ 'description': 'md5:f0a42dd239c26f61c28f19e62d20ef53',
+ 'thumbnail': r're:^https?://.*\.(?:jpg)',
+ 'duration': 151,
+ }
+ }, {
+ 'url': 'http://fanda.nova.cz/clanek/fun-and-games/krvavy-epos-zaklinac-3-divoky-hon-vychazi-vyhrajte-ho-pro-sebe.html',
+ 'info_dict': {
+ 'id': '1753621',
+ 'ext': 'mp4',
+ 'title': 'Zaklínač 3: Divoký hon',
+ 'description': 're:.*Pokud se stejně jako my nemůžete.*',
+ 'thumbnail': r're:https?://.*\.jpg(\?.*)?',
+ 'upload_date': '20150521',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'gone',
+ }, {
+ # media.cms.nova.cz embed
+ 'url': 'https://novaplus.nova.cz/porad/ulice/epizoda/18760-2180-dil',
+ 'info_dict': {
+ 'id': '8o0n0r',
+ 'ext': 'mp4',
+ 'title': '2180. díl',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 2578,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [NovaEmbedIE.ie_key()],
+ 'skip': 'CHYBA 404: STRÁNKA NENALEZENA',
+ }, {
+ 'url': 'http://sport.tn.nova.cz/clanek/sport/hokej/nhl/zivot-jde-dal-hodnotil-po-vyrazeni-z-playoff-jiri-sekac.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://fanda.nova.cz/clanek/fun-and-games/krvavy-epos-zaklinac-3-divoky-hon-vychazi-vyhrajte-ho-pro-sebe.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://doma.nova.cz/clanek/zdravi/prijdte-se-zapsat-do-registru-kostni-drene-jiz-ve-stredu-3-cervna.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://prask.nova.cz/clanek/novinky/co-si-na-sobe-nase-hvezdy-nechaly-pojistit.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://tv.nova.cz/clanek/novinky/zivot-je-zivot-bondovsky-trailer.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ display_id = mobj.group('id')
+ site = mobj.group('site')
+
+ webpage = self._download_webpage(url, display_id)
+
+ description = clean_html(self._og_search_description(webpage, default=None))
+ if site == 'novaplus':
+ upload_date = unified_strdate(self._search_regex(
+ r'(\d{1,2}-\d{1,2}-\d{4})$', display_id, 'upload date', default=None))
+ elif site == 'fanda':
+ upload_date = unified_strdate(self._search_regex(
+ r'<span class="date_time">(\d{1,2}\.\d{1,2}\.\d{4})', webpage, 'upload date', default=None))
+ else:
+ upload_date = None
+
+ # novaplus
+ embed_id = self._search_regex(
+ r'<iframe[^>]+\bsrc=["\'](?:https?:)?//media(?:tn)?\.cms\.nova\.cz/embed/([^/?#&"\']+)',
+ webpage, 'embed url', default=None)
+ if embed_id:
+ return {
+ '_type': 'url_transparent',
+ 'url': 'https://media.cms.nova.cz/embed/%s' % embed_id,
+ 'ie_key': NovaEmbedIE.ie_key(),
+ 'id': embed_id,
+ 'description': description,
+ 'upload_date': upload_date
+ }
+
+ video_id = self._search_regex(
+ [r"(?:media|video_id)\s*:\s*'(\d+)'",
+ r'media=(\d+)',
+ r'id="article_video_(\d+)"',
+ r'id="player_(\d+)"'],
+ webpage, 'video id')
+
+ config_url = self._search_regex(
+ r'src="(https?://(?:tn|api)\.nova\.cz/bin/player/videojs/config\.php\?[^"]+)"',
+ webpage, 'config url', default=None)
+ config_params = {}
+
+ if not config_url:
+ player = self._parse_json(
+ self._search_regex(
+ r'(?s)Player\s*\(.+?\s*,\s*({.+?\bmedia\b["\']?\s*:\s*["\']?\d+.+?})\s*\)', webpage,
+ 'player', default='{}'),
+ video_id, transform_source=js_to_json, fatal=False)
+ if player:
+ config_url = url_or_none(player.get('configUrl'))
+ params = player.get('configParams')
+ if isinstance(params, dict):
+ config_params = params
+
+ if not config_url:
+ DEFAULT_SITE_ID = '23000'
+ SITES = {
+ 'tvnoviny': DEFAULT_SITE_ID,
+ 'novaplus': DEFAULT_SITE_ID,
+ 'vymena': DEFAULT_SITE_ID,
+ 'krasna': DEFAULT_SITE_ID,
+ 'fanda': '30',
+ 'tn': '30',
+ 'doma': '30',
+ }
+
+ site_id = self._search_regex(
+ r'site=(\d+)', webpage, 'site id', default=None) or SITES.get(
+ site, DEFAULT_SITE_ID)
+
+ config_url = 'https://api.nova.cz/bin/player/videojs/config.php'
+ config_params = {
+ 'site': site_id,
+ 'media': video_id,
+ 'quality': 3,
+ 'version': 1,
+ }
+
+ config = self._download_json(
+ config_url, display_id,
+ 'Downloading config JSON', query=config_params,
+ transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1])
+
+ mediafile = config['mediafile']
+ video_url = mediafile['src']
+
+ m = re.search(r'^(?P<url>rtmpe?://[^/]+/(?P<app>[^/]+?))/&*(?P<playpath>.+)$', video_url)
+ if m:
+ formats = [{
+ 'url': m.group('url'),
+ 'app': m.group('app'),
+ 'play_path': m.group('playpath'),
+ 'player_path': 'http://tvnoviny.nova.cz/static/shared/app/videojs/video-js.swf',
+ 'ext': 'flv',
+ }]
+ else:
+ formats = [{
+ 'url': video_url,
+ }]
+
+ title = mediafile.get('meta', {}).get('title') or self._og_search_title(webpage)
+ thumbnail = config.get('poster')
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'upload_date': upload_date,
+ 'thumbnail': thumbnail,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/novaplay.py b/yt_dlp/extractor/novaplay.py
new file mode 100644
index 0000000..adab33f
--- /dev/null
+++ b/yt_dlp/extractor/novaplay.py
@@ -0,0 +1,67 @@
+from .common import InfoExtractor
+from ..utils import int_or_none, parse_duration, parse_iso8601
+
+
+class NovaPlayIE(InfoExtractor):
+ _VALID_URL = r'https?://play\.nova\.bg/video/[^?#]+/(?P<id>\d+)'
+ _TESTS = [
+ {
+ 'url': 'https://play.nova.bg/video/ochakvaite/season-0/ochakvaite-2022-07-22-sybudi-se-sat/606627',
+ 'md5': 'd79dff2d09d196c595a7290f48e33399',
+ 'info_dict': {
+ 'id': '606627',
+ 'ext': 'mp4',
+ 'title': 'Събуди се - събота по NOVA (23.07.2022)',
+ 'alt_title': 'ochakvaite/season-0/ochakvaite-2022-07-22-sybudi-se-sat',
+ 'duration': 29.0,
+ 'timestamp': 1658491547,
+ 'upload_date': '20220722',
+ 'thumbnail': 'https://nbg-img.fite.tv/img/606627_460x260.jpg',
+ 'description': '29 сек',
+ },
+ },
+ {
+ 'url': 'https://play.nova.bg/video/ochakvaite/season-0/ochakvaite-2022-07-22-cherry-tazi/606609',
+ 'md5': 'f3e973e2ed1a5b9b3f498b1ab82d01b3',
+ 'info_dict': {
+ 'id': '606609',
+ 'ext': 'mp4',
+ 'title': 'Черешката на тортата - тази вечер по NOVA (22.07.2022)',
+ 'alt_title': 'ochakvaite/season-0/ochakvaite-2022-07-22-cherry-tazi',
+ 'duration': 29.0,
+ 'timestamp': 1658476303,
+ 'upload_date': '20220722',
+ 'thumbnail': 'https://nbg-img.fite.tv/img/606609_460x260.jpg',
+ 'description': '29 сек',
+ },
+ }
+ ]
+
+ _access_token = None
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ self._access_token = self._access_token or self._download_json(
+ 'https://play.nova.bg/api/client', None, note='Fetching access token')['accessToken']
+ video_props = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['video']
+ m3u8_url = self._download_json(
+ f'https://nbg-api.fite.tv/api/v2/videos/{video_id}/streams',
+ video_id, headers={
+ 'x-flipps-user-agent': 'Flipps/75/9.7',
+ 'x-flipps-version': '2022-05-17',
+ 'Authorization': f'Bearer {self._access_token}'
+ })[0]['links']['play']['href']
+ formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls')
+
+ return {
+ 'id': video_id,
+ 'title': video_props['title'],
+ 'alt_title': video_props.get('slug'),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'description': self._og_search_description(webpage),
+ 'formats': formats,
+ 'duration': parse_duration(video_props['duration']),
+ 'timestamp': parse_iso8601(video_props['published_at']),
+ 'view_count': int_or_none(video_props['view_count']),
+ }
diff --git a/yt_dlp/extractor/nowness.py b/yt_dlp/extractor/nowness.py
new file mode 100644
index 0000000..a3c29f6
--- /dev/null
+++ b/yt_dlp/extractor/nowness.py
@@ -0,0 +1,142 @@
+from .brightcove import (
+ BrightcoveLegacyIE,
+ BrightcoveNewIE,
+)
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..networking import Request
+from ..utils import ExtractorError
+
+
+class NownessBaseIE(InfoExtractor):
+ def _extract_url_result(self, post):
+ if post['type'] == 'video':
+ for media in post['media']:
+ if media['type'] == 'video':
+ video_id = media['content']
+ source = media['source']
+ if source == 'brightcove':
+ player_code = self._download_webpage(
+ 'http://www.nowness.com/iframe?id=%s' % video_id, video_id,
+ note='Downloading player JavaScript',
+ errnote='Unable to download player JavaScript')
+ bc_url = BrightcoveLegacyIE._extract_brightcove_url(player_code)
+ if bc_url:
+ return self.url_result(bc_url, BrightcoveLegacyIE.ie_key())
+ bc_url = BrightcoveNewIE._extract_url(self, player_code)
+ if bc_url:
+ return self.url_result(bc_url, BrightcoveNewIE.ie_key())
+ raise ExtractorError('Could not find player definition')
+ elif source == 'vimeo':
+ return self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo')
+ elif source == 'youtube':
+ return self.url_result(video_id, 'Youtube')
+ elif source == 'cinematique':
+ # yt-dlp currently doesn't support cinematique
+ # return self.url_result('http://cinematique.com/embed/%s' % video_id, 'Cinematique')
+ pass
+
+ def _api_request(self, url, request_path):
+ display_id = self._match_id(url)
+ request = Request(
+ 'http://api.nowness.com/api/' + request_path % display_id,
+ headers={
+ 'X-Nowness-Language': 'zh-cn' if 'cn.nowness.com' in url else 'en-us',
+ })
+ return display_id, self._download_json(request, display_id)
+
+
+class NownessIE(NownessBaseIE):
+ IE_NAME = 'nowness'
+ _VALID_URL = r'https?://(?:(?:www|cn)\.)?nowness\.com/(?:story|(?:series|category)/[^/]+)/(?P<id>[^/]+?)(?:$|[?#])'
+ _TESTS = [{
+ 'url': 'https://www.nowness.com/story/candor-the-art-of-gesticulation',
+ 'md5': '068bc0202558c2e391924cb8cc470676',
+ 'info_dict': {
+ 'id': '2520295746001',
+ 'ext': 'mp4',
+ 'title': 'Candor: The Art of Gesticulation',
+ 'description': 'Candor: The Art of Gesticulation',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'timestamp': 1446745676,
+ 'upload_date': '20151105',
+ 'uploader_id': '2385340575001',
+ },
+ 'add_ie': ['BrightcoveNew'],
+ }, {
+ 'url': 'https://cn.nowness.com/story/kasper-bjorke-ft-jaakko-eino-kalevi-tnr',
+ 'md5': 'e79cf125e387216f86b2e0a5b5c63aa3',
+ 'info_dict': {
+ 'id': '3716354522001',
+ 'ext': 'mp4',
+ 'title': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR',
+ 'description': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'timestamp': 1407315371,
+ 'upload_date': '20140806',
+ 'uploader_id': '2385340575001',
+ },
+ 'add_ie': ['BrightcoveNew'],
+ }, {
+ # vimeo
+ 'url': 'https://www.nowness.com/series/nowness-picks/jean-luc-godard-supercut',
+ 'md5': '9a5a6a8edf806407e411296ab6bc2a49',
+ 'info_dict': {
+ 'id': '130020913',
+ 'ext': 'mp4',
+ 'title': 'Bleu, Blanc, Rouge - A Godard Supercut',
+ 'description': 'md5:f0ea5f1857dffca02dbd37875d742cec',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'upload_date': '20150607',
+ 'uploader': 'Cinema Sem Lei',
+ 'uploader_id': 'cinemasemlei',
+ },
+ 'add_ie': ['Vimeo'],
+ }]
+
+ def _real_extract(self, url):
+ _, post = self._api_request(url, 'post/getBySlug/%s')
+ return self._extract_url_result(post)
+
+
+class NownessPlaylistIE(NownessBaseIE):
+ IE_NAME = 'nowness:playlist'
+ _VALID_URL = r'https?://(?:(?:www|cn)\.)?nowness\.com/playlist/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://www.nowness.com/playlist/3286/i-guess-thats-why-they-call-it-the-blues',
+ 'info_dict': {
+ 'id': '3286',
+ },
+ 'playlist_mincount': 8,
+ }
+
+ def _real_extract(self, url):
+ playlist_id, playlist = self._api_request(url, 'post?PlaylistId=%s')
+ entries = [self._extract_url_result(item) for item in playlist['items']]
+ return self.playlist_result(entries, playlist_id)
+
+
+class NownessSeriesIE(NownessBaseIE):
+ IE_NAME = 'nowness:series'
+ _VALID_URL = r'https?://(?:(?:www|cn)\.)?nowness\.com/series/(?P<id>[^/]+?)(?:$|[?#])'
+ _TEST = {
+ 'url': 'https://www.nowness.com/series/60-seconds',
+ 'info_dict': {
+ 'id': '60',
+ 'title': '60 Seconds',
+ 'description': 'One-minute wisdom in a new NOWNESS series',
+ },
+ 'playlist_mincount': 4,
+ }
+
+ def _real_extract(self, url):
+ display_id, series = self._api_request(url, 'series/getBySlug/%s')
+ entries = [self._extract_url_result(post) for post in series['posts']]
+ series_title = None
+ series_description = None
+ translations = series.get('translations', [])
+ if translations:
+ series_title = translations[0].get('title') or translations[0]['seoTitle']
+ series_description = translations[0].get('seoDescription')
+ return self.playlist_result(
+ entries, compat_str(series['id']), series_title, series_description)
diff --git a/yt_dlp/extractor/noz.py b/yt_dlp/extractor/noz.py
new file mode 100644
index 0000000..c7b8038
--- /dev/null
+++ b/yt_dlp/extractor/noz.py
@@ -0,0 +1,83 @@
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ find_xpath_attr,
+ xpath_text,
+ update_url_query,
+)
+from ..compat import compat_urllib_parse_unquote
+
+
+class NozIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:www\.)?noz\.de/video/(?P<id>[0-9]+)/'
+ _TESTS = [{
+ 'url': 'http://www.noz.de/video/25151/32-Deutschland-gewinnt-Badminton-Lnderspiel-in-Melle',
+ 'info_dict': {
+ 'id': '25151',
+ 'ext': 'mp4',
+ 'duration': 215,
+ 'title': '3:2 - Deutschland gewinnt Badminton-Länderspiel in Melle',
+ 'description': 'Vor rund 370 Zuschauern gewinnt die deutsche Badminton-Nationalmannschaft am Donnerstag ein EM-Vorbereitungsspiel gegen Frankreich in Melle. Video Moritz Frankenberg.',
+ 'thumbnail': r're:^http://.*\.jpg',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ description = self._og_search_description(webpage)
+
+ edge_url = self._html_search_regex(
+ r'<script\s+(?:type="text/javascript"\s+)?src="(.*?/videojs_.*?)"',
+ webpage, 'edge URL')
+ edge_content = self._download_webpage(edge_url, 'meta configuration')
+
+ config_url_encoded = self._search_regex(
+ r'so\.addVariable\("config_url","[^,]*,(.*?)"',
+ edge_content, 'config URL'
+ )
+ config_url = compat_urllib_parse_unquote(config_url_encoded)
+
+ doc = self._download_xml(config_url, 'video configuration')
+ title = xpath_text(doc, './/title')
+ thumbnail = xpath_text(doc, './/article/thumbnail/url')
+ duration = int_or_none(xpath_text(
+ doc, './/article/movie/file/duration'))
+ formats = []
+ for qnode in doc.findall('.//article/movie/file/qualities/qual'):
+ http_url_ele = find_xpath_attr(
+ qnode, './html_urls/video_url', 'format', 'video/mp4')
+ http_url = http_url_ele.text if http_url_ele is not None else None
+ if http_url:
+ formats.append({
+ 'url': http_url,
+ 'format_name': xpath_text(qnode, './name'),
+ 'format_id': '%s-%s' % ('http', xpath_text(qnode, './id')),
+ 'height': int_or_none(xpath_text(qnode, './height')),
+ 'width': int_or_none(xpath_text(qnode, './width')),
+ 'tbr': int_or_none(xpath_text(qnode, './bitrate'), scale=1000),
+ })
+ else:
+ f4m_url = xpath_text(qnode, 'url_hd2')
+ if f4m_url:
+ formats.extend(self._extract_f4m_formats(
+ update_url_query(f4m_url, {'hdcore': '3.4.0'}),
+ video_id, f4m_id='hds', fatal=False))
+ m3u8_url_ele = find_xpath_attr(
+ qnode, './html_urls/video_url',
+ 'format', 'application/vnd.apple.mpegurl')
+ m3u8_url = m3u8_url_ele.text if m3u8_url_ele is not None else None
+ if m3u8_url:
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': title,
+ 'duration': duration,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ }
diff --git a/yt_dlp/extractor/npo.py b/yt_dlp/extractor/npo.py
new file mode 100644
index 0000000..4d5ff50
--- /dev/null
+++ b/yt_dlp/extractor/npo.py
@@ -0,0 +1,612 @@
+import random
+import re
+import urllib.parse
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ merge_dicts,
+ orderedSet,
+ str_or_none,
+ try_call,
+ unified_timestamp,
+ url_or_none,
+ urlencode_postdata,
+)
+
+
+class NPOIE(InfoExtractor):
+ IE_NAME = 'npo'
+ IE_DESC = 'npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl'
+ _VALID_URL = r'''(?x)
+ (?:
+ npo:|
+ https?://
+ (?:www\.)?
+ (?:
+ npo\.nl/(?:[^/]+/)*|
+ (?:ntr|npostart)\.nl/(?:[^/]+/){2,}|
+ omroepwnl\.nl/video/fragment/[^/]+__|
+ (?:zapp|npo3)\.nl/(?:[^/]+/){2,}
+ )
+ )
+ (?P<id>[^/?#]+)
+ '''
+
+ _TESTS = [{
+ 'url': 'http://www.npo.nl/nieuwsuur/22-06-2014/VPWON_1220719',
+ 'md5': '4b3f9c429157ec4775f2c9cb7b911016',
+ 'info_dict': {
+ 'id': 'VPWON_1220719',
+ 'ext': 'm4v',
+ 'title': 'Nieuwsuur',
+ 'description': 'Dagelijks tussen tien en elf: nieuws, sport en achtergronden.',
+ 'upload_date': '20140622',
+ },
+ 'skip': 'Video was removed',
+ }, {
+ 'url': 'http://www.npo.nl/de-mega-mike-mega-thomas-show/27-02-2009/VARA_101191800',
+ 'md5': 'da50a5787dbfc1603c4ad80f31c5120b',
+ 'info_dict': {
+ 'id': 'VARA_101191800',
+ 'ext': 'm4v',
+ 'title': 'De Mega Mike & Mega Thomas show: The best of.',
+ 'description': 'md5:3b74c97fc9d6901d5a665aac0e5400f4',
+ 'upload_date': '20090227',
+ 'duration': 2400,
+ },
+ 'skip': 'Video was removed',
+ }, {
+ 'url': 'http://www.npo.nl/tegenlicht/25-02-2013/VPWON_1169289',
+ 'md5': '1b279c0547f6b270e014c576415268c5',
+ 'info_dict': {
+ 'id': 'VPWON_1169289',
+ 'ext': 'mp4',
+ 'title': 'Zwart geld: de toekomst komt uit Afrika',
+ 'description': 'md5:dffaf3d628a9c36f78ca48d834246261',
+ 'upload_date': '20130225',
+ 'duration': 3000,
+ 'creator': 'NED2',
+ 'series': 'Tegenlicht',
+ 'timestamp': 1361822340,
+ 'thumbnail': 'https://images.npo.nl/tile/1280x720/142854.jpg',
+ 'episode': 'Zwart geld: de toekomst komt uit Afrika',
+ 'episode_number': 18,
+ },
+ }, {
+ 'url': 'http://www.npo.nl/de-nieuwe-mens-deel-1/21-07-2010/WO_VPRO_043706',
+ 'info_dict': {
+ 'id': 'WO_VPRO_043706',
+ 'ext': 'mp4',
+ 'title': 'De nieuwe mens - Deel 1',
+ 'description': 'md5:518ae51ba1293ffb80d8d8ce90b74e4b',
+ 'duration': 4680,
+ 'episode': 'De nieuwe mens - Deel 1',
+ 'thumbnail': 'https://images.npo.nl/tile/1280x720/6289.jpg',
+ 'timestamp': 1279716057,
+ 'series': 'De nieuwe mens - Deel 1',
+ 'upload_date': '20100721',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # non asf in streams
+ 'url': 'http://www.npo.nl/hoe-gaat-europa-verder-na-parijs/10-01-2015/WO_NOS_762771',
+ 'info_dict': {
+ 'id': 'WO_NOS_762771',
+ 'ext': 'mp4',
+ 'title': 'Hoe gaat Europa verder na Parijs?',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'Video was removed',
+ }, {
+ 'url': 'http://www.ntr.nl/Aap-Poot-Pies/27/detail/Aap-poot-pies/VPWON_1233944#content',
+ 'info_dict': {
+ 'id': 'VPWON_1233944',
+ 'ext': 'mp4',
+ 'title': 'Aap, poot, pies',
+ 'description': 'md5:4b46b1b9553b4c036a04d2a532a137e6',
+ 'upload_date': '20150508',
+ 'duration': 599,
+ 'episode': 'Aap, poot, pies',
+ 'thumbnail': 'https://images.poms.omroep.nl/image/s1280/c1280x720/608118.jpg',
+ 'timestamp': 1431064200,
+ 'series': 'Aap, poot, pies',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.omroepwnl.nl/video/fragment/vandaag-de-dag-verkiezingen__POMS_WNL_853698',
+ 'info_dict': {
+ 'id': 'POW_00996502',
+ 'ext': 'm4v',
+ 'title': '''"Dit is wel een 'landslide'..."''',
+ 'description': 'md5:f8d66d537dfb641380226e31ca57b8e8',
+ 'upload_date': '20150508',
+ 'duration': 462,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'Video was removed',
+ }, {
+ # audio
+ 'url': 'http://www.npo.nl/jouw-stad-rotterdam/29-01-2017/RBX_FUNX_6683215/RBX_FUNX_7601437',
+ 'info_dict': {
+ 'id': 'RBX_FUNX_6683215',
+ 'ext': 'mp3',
+ 'title': 'Jouw Stad Rotterdam',
+ 'description': 'md5:db251505244f097717ec59fabc372d9f',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'Video was removed',
+ }, {
+ 'url': 'http://www.zapp.nl/de-bzt-show/gemist/KN_1687547',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.zapp.nl/de-bzt-show/filmpjes/POMS_KN_7315118',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.zapp.nl/beste-vrienden-quiz/extra-video-s/WO_NTR_1067990',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.npo3.nl/3onderzoekt/16-09-2015/VPWON_1239870',
+ 'only_matching': True,
+ }, {
+ # live stream
+ 'url': 'npo:LI_NL1_4188102',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.npo.nl/radio-gaga/13-06-2017/BNN_101383373',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.zapp.nl/1803-skelterlab/instructie-video-s/740-instructievideo-s/POMS_AT_11736927',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.npostart.nl/broodje-gezond-ei/28-05-2018/KN_1698996',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://npo.nl/KN_1698996',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.npo3.nl/the-genius/21-11-2022/VPWON_1341105',
+ 'info_dict': {
+ 'id': 'VPWON_1341105',
+ 'ext': 'mp4',
+ 'duration': 2658,
+ 'series': 'The Genius',
+ 'description': 'md5:db02f1456939ca63f7c408f858044e94',
+ 'title': 'The Genius',
+ 'timestamp': 1669062000,
+ 'creator': 'NED3',
+ 'episode': 'The Genius',
+ 'thumbnail': 'https://images.npo.nl/tile/1280x720/1827650.jpg',
+ 'episode_number': 8,
+ 'upload_date': '20221121',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return (False if any(ie.suitable(url)
+ for ie in (NPOLiveIE, NPORadioIE, NPORadioFragmentIE))
+ else super(NPOIE, cls).suitable(url))
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ if urllib.parse.urlparse(url).netloc in ['www.ntr.nl', 'ntr.nl']:
+ player = self._download_json(
+ f'https://www.ntr.nl/ajax/player/embed/{video_id}', video_id,
+ 'Downloading player JSON', query={
+ 'parameters[elementId]': f'npo{random.randint(0, 999)}',
+ 'parameters[sterReferralUrl]': url,
+ 'parameters[autoplay]': 0,
+ })
+ else:
+ self._request_webpage(
+ 'https://www.npostart.nl/api/token', video_id,
+ 'Downloading token', headers={
+ 'Referer': url,
+ 'X-Requested-With': 'XMLHttpRequest',
+ })
+ player = self._download_json(
+ f'https://www.npostart.nl/player/{video_id}', video_id,
+ 'Downloading player JSON', data=urlencode_postdata({
+ 'autoplay': 0,
+ 'share': 1,
+ 'pageUrl': url,
+ 'hasAdConsent': 0,
+ }), headers={
+ 'x-xsrf-token': try_call(lambda: urllib.parse.unquote(
+ self._get_cookies('https://www.npostart.nl')['XSRF-TOKEN'].value))
+ })
+
+ player_token = player['token']
+
+ drm = False
+ format_urls = set()
+ formats = []
+ for profile in ('hls', 'dash-widevine', 'dash-playready', 'smooth'):
+ streams = self._download_json(
+ 'https://start-player.npo.nl/video/%s/streams' % video_id,
+ video_id, 'Downloading %s profile JSON' % profile, fatal=False,
+ query={
+ 'profile': profile,
+ 'quality': 'npoplus',
+ 'tokenId': player_token,
+ 'streamType': 'broadcast',
+ }, data=b'') # endpoint requires POST
+ if not streams:
+ continue
+ stream = streams.get('stream')
+ if not isinstance(stream, dict):
+ continue
+ stream_url = url_or_none(stream.get('src'))
+ if not stream_url or stream_url in format_urls:
+ continue
+ format_urls.add(stream_url)
+ if stream.get('protection') is not None or stream.get('keySystemOptions') is not None:
+ drm = True
+ continue
+ stream_type = stream.get('type')
+ stream_ext = determine_ext(stream_url)
+ if stream_type == 'application/dash+xml' or stream_ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ stream_url, video_id, mpd_id='dash', fatal=False))
+ elif stream_type == 'application/vnd.apple.mpegurl' or stream_ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ stream_url, video_id, ext='mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
+ elif re.search(r'\.isml?/Manifest', stream_url):
+ formats.extend(self._extract_ism_formats(
+ stream_url, video_id, ism_id='mss', fatal=False))
+ else:
+ formats.append({
+ 'url': stream_url,
+ })
+
+ if not formats:
+ if not self.get_param('allow_unplayable_formats') and drm:
+ self.report_drm(video_id)
+
+ info = {
+ 'id': video_id,
+ 'title': video_id,
+ 'formats': formats,
+ }
+
+ embed_url = url_or_none(player.get('embedUrl'))
+ if embed_url:
+ webpage = self._download_webpage(
+ embed_url, video_id, 'Downloading embed page', fatal=False)
+ if webpage:
+ video = self._parse_json(
+ self._search_regex(
+ r'\bvideo\s*=\s*({.+?})\s*;', webpage, 'video',
+ default='{}'), video_id)
+ if video:
+ title = video.get('episodeTitle')
+ subtitles = {}
+ subtitles_list = video.get('subtitles')
+ if isinstance(subtitles_list, list):
+ for cc in subtitles_list:
+ cc_url = url_or_none(cc.get('src'))
+ if not cc_url:
+ continue
+ lang = str_or_none(cc.get('language')) or 'nl'
+ subtitles.setdefault(lang, []).append({
+ 'url': cc_url,
+ })
+ return merge_dicts({
+ 'title': title,
+ 'description': video.get('description'),
+ 'thumbnail': url_or_none(
+ video.get('still_image_url') or video.get('orig_image_url')),
+ 'duration': int_or_none(video.get('duration')),
+ 'timestamp': unified_timestamp(video.get('broadcastDate')),
+ 'creator': video.get('channel'),
+ 'series': video.get('title'),
+ 'episode': title,
+ 'episode_number': int_or_none(video.get('episodeNumber')),
+ 'subtitles': subtitles,
+ }, info)
+
+ return info
+
+
+class NPOLiveIE(InfoExtractor):
+ IE_NAME = 'npo.nl:live'
+ _VALID_URL = r'https?://(?:www\.)?npo(?:start)?\.nl/live(?:/(?P<id>[^/?#&]+))?'
+
+ _TESTS = [{
+ 'url': 'http://www.npo.nl/live/npo-1',
+ 'info_dict': {
+ 'id': 'LI_NL1_4188102',
+ 'display_id': 'npo-1',
+ 'ext': 'mp4',
+ 'title': 're:^NPO 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'http://www.npo.nl/live',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.npostart.nl/live/npo-1',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url) or 'npo-1'
+
+ webpage = self._download_webpage(url, display_id)
+
+ live_id = self._search_regex(
+ [r'media-id="([^"]+)"', r'data-prid="([^"]+)"'], webpage, 'live id')
+
+ return {
+ '_type': 'url_transparent',
+ 'url': 'npo:%s' % live_id,
+ 'ie_key': NPOIE.ie_key(),
+ 'id': live_id,
+ 'display_id': display_id,
+ }
+
+
+class NPORadioIE(InfoExtractor):
+ IE_NAME = 'npo.nl:radio'
+ _VALID_URL = r'https?://(?:www\.)?npo\.nl/radio/(?P<id>[^/]+)'
+
+ _TEST = {
+ 'url': 'http://www.npo.nl/radio/radio-1',
+ 'info_dict': {
+ 'id': 'radio-1',
+ 'ext': 'mp3',
+ 'title': 're:^NPO Radio 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }
+
+ @classmethod
+ def suitable(cls, url):
+ return False if NPORadioFragmentIE.suitable(url) else super(NPORadioIE, cls).suitable(url)
+
+ @staticmethod
+ def _html_get_attribute_regex(attribute):
+ return r'{0}\s*=\s*\'([^\']+)\''.format(attribute)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_regex(
+ self._html_get_attribute_regex('data-channel'), webpage, 'title')
+
+ stream = self._parse_json(
+ self._html_search_regex(self._html_get_attribute_regex('data-streams'), webpage, 'data-streams'),
+ video_id)
+
+ codec = stream.get('codec')
+
+ return {
+ 'id': video_id,
+ 'url': stream['url'],
+ 'title': title,
+ 'acodec': codec,
+ 'ext': codec,
+ 'is_live': True,
+ }
+
+
+class NPORadioFragmentIE(InfoExtractor):
+ IE_NAME = 'npo.nl:radio:fragment'
+ _VALID_URL = r'https?://(?:www\.)?npo\.nl/radio/[^/]+/fragment/(?P<id>\d+)'
+
+ _TEST = {
+ 'url': 'http://www.npo.nl/radio/radio-5/fragment/174356',
+ 'md5': 'dd8cc470dad764d0fdc70a9a1e2d18c2',
+ 'info_dict': {
+ 'id': '174356',
+ 'ext': 'mp3',
+ 'title': 'Jubileumconcert Willeke Alberti',
+ },
+ }
+
+ def _real_extract(self, url):
+ audio_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, audio_id)
+
+ title = self._html_search_regex(
+ r'href="/radio/[^/]+/fragment/%s" title="([^"]+)"' % audio_id,
+ webpage, 'title')
+
+ audio_url = self._search_regex(
+ r"data-streams='([^']+)'", webpage, 'audio url')
+
+ return {
+ 'id': audio_id,
+ 'url': audio_url,
+ 'title': title,
+ }
+
+
+class NPODataMidEmbedIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ video_id = self._search_regex(
+ r'data-mid=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video_id', group='id')
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': 'NPO',
+ 'url': 'npo:%s' % video_id,
+ 'display_id': display_id
+ }
+
+
+class SchoolTVIE(NPODataMidEmbedIE):
+ IE_NAME = 'schooltv'
+ _VALID_URL = r'https?://(?:www\.)?schooltv\.nl/video/(?P<id>[^/?#&]+)'
+
+ _TEST = {
+ 'url': 'http://www.schooltv.nl/video/ademhaling-de-hele-dag-haal-je-adem-maar-wat-gebeurt-er-dan-eigenlijk-in-je-lichaam/',
+ 'info_dict': {
+ 'id': 'WO_NTR_429477',
+ 'display_id': 'ademhaling-de-hele-dag-haal-je-adem-maar-wat-gebeurt-er-dan-eigenlijk-in-je-lichaam',
+ 'title': 'Ademhaling: De hele dag haal je adem. Maar wat gebeurt er dan eigenlijk in je lichaam?',
+ 'ext': 'mp4',
+ 'description': 'md5:abfa0ff690adb73fd0297fd033aaa631'
+ },
+ 'params': {
+ # Skip because of m3u8 download
+ 'skip_download': True
+ }
+ }
+
+
+class HetKlokhuisIE(NPODataMidEmbedIE):
+ IE_NAME = 'hetklokhuis'
+ _VALID_URL = r'https?://(?:www\.)?hetklokhuis\.nl/[^/]+/\d+/(?P<id>[^/?#&]+)'
+
+ _TEST = {
+ 'url': 'http://hetklokhuis.nl/tv-uitzending/3471/Zwaartekrachtsgolven',
+ 'info_dict': {
+ 'id': 'VPWON_1260528',
+ 'display_id': 'Zwaartekrachtsgolven',
+ 'ext': 'm4v',
+ 'title': 'Het Klokhuis: Zwaartekrachtsgolven',
+ 'description': 'md5:c94f31fb930d76c2efa4a4a71651dd48',
+ 'upload_date': '20170223',
+ },
+ 'params': {
+ 'skip_download': True
+ }
+ }
+
+
+class NPOPlaylistBaseIE(NPOIE): # XXX: Do not subclass from concrete IE
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ entries = [
+ self.url_result('npo:%s' % video_id if not video_id.startswith('http') else video_id)
+ for video_id in orderedSet(re.findall(self._PLAYLIST_ENTRY_RE, webpage))
+ ]
+
+ playlist_title = self._html_search_regex(
+ self._PLAYLIST_TITLE_RE, webpage, 'playlist title',
+ default=None) or self._og_search_title(webpage)
+
+ return self.playlist_result(entries, playlist_id, playlist_title)
+
+
+class VPROIE(NPOPlaylistBaseIE):
+ IE_NAME = 'vpro'
+ _VALID_URL = r'https?://(?:www\.)?(?:(?:tegenlicht\.)?vpro|2doc)\.nl/(?:[^/]+/)*(?P<id>[^/]+)\.html'
+ _PLAYLIST_TITLE_RE = (r'<h1[^>]+class=["\'].*?\bmedia-platform-title\b.*?["\'][^>]*>([^<]+)',
+ r'<h5[^>]+class=["\'].*?\bmedia-platform-subtitle\b.*?["\'][^>]*>([^<]+)')
+ _PLAYLIST_ENTRY_RE = r'data-media-id="([^"]+)"'
+
+ _TESTS = [
+ {
+ 'url': 'http://tegenlicht.vpro.nl/afleveringen/2012-2013/de-toekomst-komt-uit-afrika.html',
+ 'md5': 'f8065e4e5a7824068ed3c7e783178f2c',
+ 'info_dict': {
+ 'id': 'VPWON_1169289',
+ 'ext': 'm4v',
+ 'title': 'De toekomst komt uit Afrika',
+ 'description': 'md5:52cf4eefbc96fffcbdc06d024147abea',
+ 'upload_date': '20130225',
+ },
+ 'skip': 'Video gone',
+ },
+ {
+ 'url': 'http://www.vpro.nl/programmas/2doc/2015/sergio-herman.html',
+ 'info_dict': {
+ 'id': 'sergio-herman',
+ 'title': 'sergio herman: fucking perfect',
+ },
+ 'playlist_count': 2,
+ },
+ {
+ # playlist with youtube embed
+ 'url': 'http://www.vpro.nl/programmas/2doc/2015/education-education.html',
+ 'info_dict': {
+ 'id': 'education-education',
+ 'title': 'education education',
+ },
+ 'playlist_count': 2,
+ },
+ {
+ 'url': 'http://www.2doc.nl/documentaires/series/2doc/2015/oktober/de-tegenprestatie.html',
+ 'info_dict': {
+ 'id': 'de-tegenprestatie',
+ 'title': 'De Tegenprestatie',
+ },
+ 'playlist_count': 2,
+ }, {
+ 'url': 'http://www.2doc.nl/speel~VARA_101375237~mh17-het-verdriet-van-nederland~.html',
+ 'info_dict': {
+ 'id': 'VARA_101375237',
+ 'ext': 'm4v',
+ 'title': 'MH17: Het verdriet van Nederland',
+ 'description': 'md5:09e1a37c1fdb144621e22479691a9f18',
+ 'upload_date': '20150716',
+ },
+ 'params': {
+ # Skip because of m3u8 download
+ 'skip_download': True
+ },
+ }
+ ]
+
+
+class WNLIE(NPOPlaylistBaseIE):
+ IE_NAME = 'wnl'
+ _VALID_URL = r'https?://(?:www\.)?omroepwnl\.nl/video/detail/(?P<id>[^/]+)__\d+'
+ _PLAYLIST_TITLE_RE = r'(?s)<h1[^>]+class="subject"[^>]*>(.+?)</h1>'
+ _PLAYLIST_ENTRY_RE = r'<a[^>]+href="([^"]+)"[^>]+class="js-mid"[^>]*>Deel \d+'
+
+ _TESTS = [{
+ 'url': 'http://www.omroepwnl.nl/video/detail/vandaag-de-dag-6-mei__060515',
+ 'info_dict': {
+ 'id': 'vandaag-de-dag-6-mei',
+ 'title': 'Vandaag de Dag 6 mei',
+ },
+ 'playlist_count': 4,
+ }]
+
+
+class AndereTijdenIE(NPOPlaylistBaseIE):
+ IE_NAME = 'anderetijden'
+ _VALID_URL = r'https?://(?:www\.)?anderetijden\.nl/programma/(?:[^/]+/)+(?P<id>[^/?#&]+)'
+ _PLAYLIST_TITLE_RE = r'(?s)<h1[^>]+class=["\'].*?\bpage-title\b.*?["\'][^>]*>(.+?)</h1>'
+ _PLAYLIST_ENTRY_RE = r'<figure[^>]+class=["\']episode-container episode-page["\'][^>]+data-prid=["\'](.+?)["\']'
+
+ _TESTS = [{
+ 'url': 'http://anderetijden.nl/programma/1/Andere-Tijden/aflevering/676/Duitse-soldaten-over-de-Slag-bij-Arnhem',
+ 'info_dict': {
+ 'id': 'Duitse-soldaten-over-de-Slag-bij-Arnhem',
+ 'title': 'Duitse soldaten over de Slag bij Arnhem',
+ },
+ 'playlist_count': 3,
+ }]
diff --git a/yt_dlp/extractor/npr.py b/yt_dlp/extractor/npr.py
new file mode 100644
index 0000000..4b6855c
--- /dev/null
+++ b/yt_dlp/extractor/npr.py
@@ -0,0 +1,132 @@
+from .common import InfoExtractor
+from ..utils import int_or_none, qualities, traverse_obj, url_or_none
+
+
+class NprIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?npr\.org/(?:sections/[^/]+/)?\d{4}/\d{2}/\d{2}/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.npr.org/sections/allsongs/2015/10/21/449974205/new-music-from-beach-house-chairlift-cmj-discoveries-and-more',
+ 'info_dict': {
+ 'id': '449974205',
+ 'title': 'New Music From Beach House, Chairlift, CMJ Discoveries And More'
+ },
+ 'playlist_count': 7,
+ }, {
+ 'url': 'https://www.npr.org/sections/deceptivecadence/2015/10/09/446928052/music-from-the-shadows-ancient-armenian-hymns-and-piano-jazz',
+ 'info_dict': {
+ 'id': '446928052',
+ 'title': "Songs We Love: Tigran Hamasyan, 'Your Mercy is Boundless'"
+ },
+ 'playlist': [{
+ 'md5': '12fa60cb2d3ed932f53609d4aeceabf1',
+ 'info_dict': {
+ 'id': '446929930',
+ 'ext': 'mp3',
+ 'title': 'Your Mercy is Boundless (Bazum en Qo gtutyunqd)',
+ 'duration': 402,
+ },
+ }],
+ }, {
+ # multimedia, not media title
+ 'url': 'https://www.npr.org/2017/06/19/533198237/tigers-jaw-tiny-desk-concert',
+ 'info_dict': {
+ 'id': '533198237',
+ 'title': 'Tigers Jaw: Tiny Desk Concert',
+ },
+ 'playlist': [{
+ 'md5': '12fa60cb2d3ed932f53609d4aeceabf1',
+ 'info_dict': {
+ 'id': '533201718',
+ 'ext': 'mp4',
+ 'title': 'Tigers Jaw: Tiny Desk Concert',
+ 'duration': 402,
+ },
+ }],
+ 'expected_warnings': ['Failed to download m3u8 information'],
+ }, {
+ # multimedia, no formats, stream
+ 'url': 'https://www.npr.org/2020/02/14/805476846/laura-stevenson-tiny-desk-concert',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.npr.org/2022/03/15/1084896560/bonobo-tiny-desk-home-concert',
+ 'info_dict': {
+ 'id': '1086468851',
+ 'ext': 'mp4',
+ 'title': 'Bonobo: Tiny Desk (Home) Concert',
+ 'duration': 1061,
+ 'thumbnail': r're:^https?://media.npr.org/assets/img/.*\.jpg$',
+ },
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ story = self._download_json(
+ 'http://api.npr.org/query', playlist_id, query={
+ 'id': playlist_id,
+ 'fields': 'audio,multimedia,title',
+ 'format': 'json',
+ 'apiKey': 'MDAzMzQ2MjAyMDEyMzk4MTU1MDg3ZmM3MQ010',
+ })['list']['story'][0]
+ playlist_title = story.get('title', {}).get('$text')
+
+ KNOWN_FORMATS = ('threegp', 'm3u8', 'smil', 'mp4', 'mp3')
+ quality = qualities(KNOWN_FORMATS)
+
+ entries = []
+ for media in story.get('audio', []) + story.get('multimedia', []):
+ media_id = media['id']
+
+ formats = []
+ for format_id, formats_entry in media.get('format', {}).items():
+ if not formats_entry:
+ continue
+ if isinstance(formats_entry, list):
+ formats_entry = formats_entry[0]
+ format_url = formats_entry.get('$text')
+ if not format_url:
+ continue
+ if format_id in KNOWN_FORMATS:
+ if format_id == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, media_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif format_id == 'smil':
+ smil_formats = self._extract_smil_formats(
+ format_url, media_id, transform_source=lambda s: s.replace(
+ 'rtmp://flash.npr.org/ondemand/', 'https://ondemand.npr.org/'),
+ fatal=False)
+ self._check_formats(smil_formats, media_id)
+ formats.extend(smil_formats)
+ else:
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_id,
+ 'quality': quality(format_id),
+ })
+ for stream_id, stream_entry in media.get('stream', {}).items():
+ if not isinstance(stream_entry, dict):
+ continue
+ if stream_id != 'hlsUrl':
+ continue
+ stream_url = url_or_none(stream_entry.get('$text'))
+ if not stream_url:
+ continue
+ formats.extend(self._extract_m3u8_formats(
+ stream_url, stream_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+
+ if not formats:
+ raw_json_ld = self._yield_json_ld(self._download_webpage(url, playlist_id), playlist_id, fatal=False)
+ m3u8_url = traverse_obj(list(raw_json_ld), (..., 'subjectOf', ..., 'embedUrl'), get_all=False)
+ formats = self._extract_m3u8_formats(m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False)
+
+ entries.append({
+ 'id': media_id,
+ 'title': media.get('title', {}).get('$text') or playlist_title,
+ 'thumbnail': media.get('altImageUrl', {}).get('$text'),
+ 'duration': int_or_none(media.get('duration', {}).get('$text')),
+ 'formats': formats,
+ })
+
+ return self.playlist_result(entries, playlist_id, playlist_title)
diff --git a/yt_dlp/extractor/nrk.py b/yt_dlp/extractor/nrk.py
new file mode 100644
index 0000000..384865a
--- /dev/null
+++ b/yt_dlp/extractor/nrk.py
@@ -0,0 +1,875 @@
+import itertools
+import random
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ ExtractorError,
+ determine_ext,
+ int_or_none,
+ parse_duration,
+ parse_iso8601,
+ str_or_none,
+ try_get,
+ url_or_none,
+ urljoin,
+)
+
+
+class NRKBaseIE(InfoExtractor):
+ _GEO_COUNTRIES = ['NO']
+ _CDN_REPL_REGEX = r'''(?x)://
+ (?:
+ nrkod\d{1,2}-httpcache0-47115-cacheod0\.dna\.ip-only\.net/47115-cacheod0|
+ nrk-od-no\.telenorcdn\.net|
+ minicdn-od\.nrk\.no/od/nrkhd-osl-rr\.netwerk\.no/no
+ )/'''
+
+ def _extract_nrk_formats(self, asset_url, video_id):
+ if re.match(r'https?://[^/]+\.akamaihd\.net/i/', asset_url):
+ return self._extract_akamai_formats(asset_url, video_id)
+ asset_url = re.sub(r'(?:bw_(?:low|high)=\d+|no_audio_only)&?', '', asset_url)
+ formats = self._extract_m3u8_formats(
+ asset_url, video_id, 'mp4', 'm3u8_native', fatal=False)
+ if not formats and re.search(self._CDN_REPL_REGEX, asset_url):
+ formats = self._extract_m3u8_formats(
+ re.sub(self._CDN_REPL_REGEX, '://nrk-od-%02d.akamaized.net/no/' % random.randint(0, 99), asset_url),
+ video_id, 'mp4', 'm3u8_native', fatal=False)
+ return formats
+
+ def _raise_error(self, data):
+ MESSAGES = {
+ 'ProgramRightsAreNotReady': 'Du kan dessverre ikke se eller høre programmet',
+ 'ProgramRightsHasExpired': 'Programmet har gått ut',
+ 'NoProgramRights': 'Ikke tilgjengelig',
+ 'ProgramIsGeoBlocked': 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge',
+ }
+ message_type = data.get('messageType', '')
+ # Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked*
+ if 'IsGeoBlocked' in message_type or try_get(data, lambda x: x['usageRights']['isGeoBlocked']) is True:
+ self.raise_geo_restricted(
+ msg=MESSAGES.get('ProgramIsGeoBlocked'),
+ countries=self._GEO_COUNTRIES)
+ message = data.get('endUserMessage') or MESSAGES.get(message_type, message_type)
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True)
+
+ def _call_api(self, path, video_id, item=None, note=None, fatal=True, query=None):
+ return self._download_json(
+ urljoin('https://psapi.nrk.no/', path),
+ video_id, note or 'Downloading %s JSON' % item,
+ fatal=fatal, query=query)
+
+
+class NRKIE(NRKBaseIE):
+ _VALID_URL = r'''(?x)
+ (?:
+ nrk:|
+ https?://
+ (?:
+ (?:www\.)?nrk\.no/video/(?:PS\*|[^_]+_)|
+ v8[-.]psapi\.nrk\.no/mediaelement/
+ )
+ )
+ (?P<id>[^?\#&]+)
+ '''
+
+ _TESTS = [{
+ # video
+ 'url': 'http://www.nrk.no/video/PS*150533',
+ 'md5': 'f46be075326e23ad0e524edfcb06aeb6',
+ 'info_dict': {
+ 'id': '150533',
+ 'ext': 'mp4',
+ 'title': 'Dompap og andre fugler i Piip-Show',
+ 'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
+ 'duration': 262,
+ }
+ }, {
+ # audio
+ 'url': 'http://www.nrk.no/video/PS*154915',
+ # MD5 is unstable
+ 'info_dict': {
+ 'id': '154915',
+ 'ext': 'mp4',
+ 'title': 'Slik høres internett ut når du er blind',
+ 'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
+ 'duration': 20,
+ }
+ }, {
+ 'url': 'nrk:ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
+ 'only_matching': True,
+ }, {
+ 'url': 'nrk:clip/7707d5a3-ebe7-434a-87d5-a3ebe7a34a70',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://v8-psapi.nrk.no/mediaelement/ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.nrk.no/video/dompap-og-andre-fugler-i-piip-show_150533',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.nrk.no/video/humor/kommentatorboksen-reiser-til-sjos_d1fda11f-a4ad-437a-a374-0398bc84e999',
+ 'only_matching': True,
+ }, {
+ # podcast
+ 'url': 'nrk:l_96f4f1b0-de54-4e6a-b4f1-b0de54fe6af8',
+ 'only_matching': True,
+ }, {
+ 'url': 'nrk:podcast/l_96f4f1b0-de54-4e6a-b4f1-b0de54fe6af8',
+ 'only_matching': True,
+ }, {
+ # clip
+ 'url': 'nrk:150533',
+ 'only_matching': True,
+ }, {
+ 'url': 'nrk:clip/150533',
+ 'only_matching': True,
+ }, {
+ # program
+ 'url': 'nrk:MDDP12000117',
+ 'only_matching': True,
+ }, {
+ 'url': 'nrk:program/ENRK10100318',
+ 'only_matching': True,
+ }, {
+ # direkte
+ 'url': 'nrk:nrk1',
+ 'only_matching': True,
+ }, {
+ 'url': 'nrk:channel/nrk1',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url).split('/')[-1]
+
+ def call_playback_api(item, query=None):
+ try:
+ return self._call_api(f'playback/{item}/program/{video_id}', video_id, item, query=query)
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 400:
+ return self._call_api(f'playback/{item}/{video_id}', video_id, item, query=query)
+ raise
+
+ # known values for preferredCdn: akamai, iponly, minicdn and telenor
+ manifest = call_playback_api('manifest', {'preferredCdn': 'akamai'})
+
+ video_id = try_get(manifest, lambda x: x['id'], compat_str) or video_id
+
+ if manifest.get('playability') == 'nonPlayable':
+ self._raise_error(manifest['nonPlayable'])
+
+ playable = manifest['playable']
+
+ formats = []
+ for asset in playable['assets']:
+ if not isinstance(asset, dict):
+ continue
+ if asset.get('encrypted'):
+ continue
+ format_url = url_or_none(asset.get('url'))
+ if not format_url:
+ continue
+ asset_format = (asset.get('format') or '').lower()
+ if asset_format == 'hls' or determine_ext(format_url) == 'm3u8':
+ formats.extend(self._extract_nrk_formats(format_url, video_id))
+ elif asset_format == 'mp3':
+ formats.append({
+ 'url': format_url,
+ 'format_id': asset_format,
+ 'vcodec': 'none',
+ })
+
+ data = call_playback_api('metadata')
+
+ preplay = data['preplay']
+ titles = preplay['titles']
+ title = titles['title']
+ alt_title = titles.get('subtitle')
+
+ description = try_get(preplay, lambda x: x['description'].replace('\r', '\n'))
+ duration = parse_duration(playable.get('duration')) or parse_duration(data.get('duration'))
+
+ thumbnails = []
+ for image in try_get(
+ preplay, lambda x: x['poster']['images'], list) or []:
+ if not isinstance(image, dict):
+ continue
+ image_url = url_or_none(image.get('url'))
+ if not image_url:
+ continue
+ thumbnails.append({
+ 'url': image_url,
+ 'width': int_or_none(image.get('pixelWidth')),
+ 'height': int_or_none(image.get('pixelHeight')),
+ })
+
+ subtitles = {}
+ for sub in try_get(playable, lambda x: x['subtitles'], list) or []:
+ if not isinstance(sub, dict):
+ continue
+ sub_url = url_or_none(sub.get('webVtt'))
+ if not sub_url:
+ continue
+ sub_key = str_or_none(sub.get('language')) or 'nb'
+ sub_type = str_or_none(sub.get('type'))
+ if sub_type:
+ sub_key += '-%s' % sub_type
+ subtitles.setdefault(sub_key, []).append({
+ 'url': sub_url,
+ })
+
+ legal_age = try_get(
+ data, lambda x: x['legalAge']['body']['rating']['code'], compat_str)
+ # https://en.wikipedia.org/wiki/Norwegian_Media_Authority
+ age_limit = None
+ if legal_age:
+ if legal_age == 'A':
+ age_limit = 0
+ elif legal_age.isdigit():
+ age_limit = int_or_none(legal_age)
+
+ is_series = try_get(data, lambda x: x['_links']['series']['name']) == 'series'
+
+ info = {
+ 'id': video_id,
+ 'title': title,
+ 'alt_title': alt_title,
+ 'description': description,
+ 'duration': duration,
+ 'thumbnails': thumbnails,
+ 'age_limit': age_limit,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'timestamp': parse_iso8601(try_get(manifest, lambda x: x['availability']['onDemand']['from'], str))
+ }
+
+ if is_series:
+ series = season_id = season_number = episode = episode_number = None
+ programs = self._call_api(
+ 'programs/%s' % video_id, video_id, 'programs', fatal=False)
+ if programs and isinstance(programs, dict):
+ series = str_or_none(programs.get('seriesTitle'))
+ season_id = str_or_none(programs.get('seasonId'))
+ season_number = int_or_none(programs.get('seasonNumber'))
+ episode = str_or_none(programs.get('episodeTitle'))
+ episode_number = int_or_none(programs.get('episodeNumber'))
+ if not series:
+ series = title
+ if alt_title:
+ title += ' - %s' % alt_title
+ if not season_number:
+ season_number = int_or_none(self._search_regex(
+ r'Sesong\s+(\d+)', description or '', 'season number',
+ default=None))
+ if not episode:
+ episode = alt_title if is_series else None
+ if not episode_number:
+ episode_number = int_or_none(self._search_regex(
+ r'^(\d+)\.', episode or '', 'episode number',
+ default=None))
+ if not episode_number:
+ episode_number = int_or_none(self._search_regex(
+ r'\((\d+)\s*:\s*\d+\)', description or '',
+ 'episode number', default=None))
+ info.update({
+ 'title': title,
+ 'series': series,
+ 'season_id': season_id,
+ 'season_number': season_number,
+ 'episode': episode,
+ 'episode_number': episode_number,
+ })
+
+ return info
+
+
+class NRKTVIE(InfoExtractor):
+ IE_DESC = 'NRK TV and NRK Radio'
+ _EPISODE_RE = r'(?P<id>[a-zA-Z]{4}\d{8})'
+ _VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/(?:[^/]+/)*%s' % _EPISODE_RE
+ _TESTS = [{
+ 'url': 'https://tv.nrk.no/program/MDDP12000117',
+ 'md5': 'c4a5960f1b00b40d47db65c1064e0ab1',
+ 'info_dict': {
+ 'id': 'MDDP12000117',
+ 'ext': 'mp4',
+ 'title': 'Alarm Trolltunga',
+ 'description': 'md5:46923a6e6510eefcce23d5ef2a58f2ce',
+ 'duration': 2223.44,
+ 'age_limit': 6,
+ 'subtitles': {
+ 'nb-nor': [{
+ 'ext': 'vtt',
+ }],
+ 'nb-ttv': [{
+ 'ext': 'vtt',
+ }]
+ },
+ },
+ }, {
+ 'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
+ 'md5': '8d40dab61cea8ab0114e090b029a0565',
+ 'info_dict': {
+ 'id': 'MUHH48000314',
+ 'ext': 'mp4',
+ 'title': '20 spørsmål - 23. mai 2014',
+ 'alt_title': '23. mai 2014',
+ 'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
+ 'duration': 1741,
+ 'series': '20 spørsmål',
+ 'episode': '23. mai 2014',
+ 'age_limit': 0,
+ },
+ }, {
+ 'url': 'https://tv.nrk.no/program/mdfp15000514',
+ 'info_dict': {
+ 'id': 'MDFP15000514',
+ 'ext': 'mp4',
+ 'title': 'Kunnskapskanalen - Grunnlovsjubiléet - Stor ståhei for ingenting',
+ 'description': 'md5:89290c5ccde1b3a24bb8050ab67fe1db',
+ 'duration': 4605.08,
+ 'series': 'Kunnskapskanalen',
+ 'episode': 'Grunnlovsjubiléet - Stor ståhei for ingenting',
+ 'age_limit': 0,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # single playlist video
+ 'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2',
+ 'info_dict': {
+ 'id': 'MSPO40010515',
+ 'ext': 'mp4',
+ 'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015',
+ 'description': 'md5:c03aba1e917561eface5214020551b7a',
+ 'age_limit': 0,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Failed to download m3u8 information'],
+ 'skip': 'particular part is not supported currently',
+ }, {
+ 'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015',
+ 'info_dict': {
+ 'id': 'MSPO40010515',
+ 'ext': 'mp4',
+ 'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015',
+ 'description': 'md5:c03aba1e917561eface5214020551b7a',
+ 'age_limit': 0,
+ },
+ 'expected_warnings': ['Failed to download m3u8 information'],
+ 'skip': 'Ikke tilgjengelig utenfor Norge',
+ }, {
+ 'url': 'https://tv.nrk.no/serie/anno/KMTE50001317/sesong-3/episode-13',
+ 'info_dict': {
+ 'id': 'KMTE50001317',
+ 'ext': 'mp4',
+ 'title': 'Anno - 13. episode',
+ 'description': 'md5:11d9613661a8dbe6f9bef54e3a4cbbfa',
+ 'duration': 2340,
+ 'series': 'Anno',
+ 'episode': '13. episode',
+ 'season_number': 3,
+ 'episode_number': 13,
+ 'age_limit': 0,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://tv.nrk.no/serie/nytt-paa-nytt/MUHH46000317/27-01-2017',
+ 'info_dict': {
+ 'id': 'MUHH46000317',
+ 'ext': 'mp4',
+ 'title': 'Nytt på Nytt 27.01.2017',
+ 'description': 'md5:5358d6388fba0ea6f0b6d11c48b9eb4b',
+ 'duration': 1796,
+ 'series': 'Nytt på nytt',
+ 'episode': '27.01.2017',
+ 'age_limit': 0,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'ProgramRightsHasExpired',
+ }, {
+ 'url': 'https://radio.nrk.no/serie/dagsnytt/NPUB21019315/12-07-2015#',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tv.nrk.no/serie/lindmo/2018/MUHU11006318/avspiller',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://radio.nrk.no/serie/dagsnytt/sesong/201507/NPUB21019315',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self.url_result(
+ 'nrk:%s' % video_id, ie=NRKIE.ie_key(), video_id=video_id)
+
+
+class NRKTVEpisodeIE(InfoExtractor):
+ _VALID_URL = r'https?://tv\.nrk\.no/serie/(?P<id>[^/]+/sesong/(?P<season_number>\d+)/episode/(?P<episode_number>\d+))'
+ _TESTS = [{
+ 'url': 'https://tv.nrk.no/serie/hellums-kro/sesong/1/episode/2',
+ 'info_dict': {
+ 'id': 'MUHH36005220',
+ 'ext': 'mp4',
+ 'title': 'Hellums kro - 2. Kro, krig og kjærlighet',
+ 'description': 'md5:ad92ddffc04cea8ce14b415deef81787',
+ 'duration': 1563.92,
+ 'series': 'Hellums kro',
+ 'season_number': 1,
+ 'episode_number': 2,
+ 'episode': '2. Kro, krig og kjærlighet',
+ 'age_limit': 6,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://tv.nrk.no/serie/backstage/sesong/1/episode/8',
+ 'info_dict': {
+ 'id': 'MSUI14000816',
+ 'ext': 'mp4',
+ 'title': 'Backstage - 8. episode',
+ 'description': 'md5:de6ca5d5a2d56849e4021f2bf2850df4',
+ 'duration': 1320,
+ 'series': 'Backstage',
+ 'season_number': 1,
+ 'episode_number': 8,
+ 'episode': '8. episode',
+ 'age_limit': 0,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'ProgramRightsHasExpired',
+ }]
+
+ def _real_extract(self, url):
+ display_id, season_number, episode_number = self._match_valid_url(url).groups()
+
+ webpage = self._download_webpage(url, display_id)
+
+ info = self._search_json_ld(webpage, display_id, default={})
+ nrk_id = info.get('@id') or self._html_search_meta(
+ 'nrk:program-id', webpage, default=None) or self._search_regex(
+ r'data-program-id=["\'](%s)' % NRKTVIE._EPISODE_RE, webpage,
+ 'nrk id')
+ assert re.match(NRKTVIE._EPISODE_RE, nrk_id)
+
+ info.update({
+ '_type': 'url',
+ 'id': nrk_id,
+ 'url': 'nrk:%s' % nrk_id,
+ 'ie_key': NRKIE.ie_key(),
+ 'season_number': int(season_number),
+ 'episode_number': int(episode_number),
+ })
+ return info
+
+
+class NRKTVSerieBaseIE(NRKBaseIE):
+ def _extract_entries(self, entry_list):
+ if not isinstance(entry_list, list):
+ return []
+ entries = []
+ for episode in entry_list:
+ nrk_id = episode.get('prfId') or episode.get('episodeId')
+ if not nrk_id or not isinstance(nrk_id, compat_str):
+ continue
+ entries.append(self.url_result(
+ 'nrk:%s' % nrk_id, ie=NRKIE.ie_key(), video_id=nrk_id))
+ return entries
+
+ _ASSETS_KEYS = ('episodes', 'instalments',)
+
+ def _extract_assets_key(self, embedded):
+ for asset_key in self._ASSETS_KEYS:
+ if embedded.get(asset_key):
+ return asset_key
+
+ @staticmethod
+ def _catalog_name(serie_kind):
+ return 'podcast' if serie_kind in ('podcast', 'podkast') else 'series'
+
+ def _entries(self, data, display_id):
+ for page_num in itertools.count(1):
+ embedded = data.get('_embedded') or data
+ if not isinstance(embedded, dict):
+ break
+ assets_key = self._extract_assets_key(embedded)
+ if not assets_key:
+ break
+ # Extract entries
+ entries = try_get(
+ embedded,
+ (lambda x: x[assets_key]['_embedded'][assets_key],
+ lambda x: x[assets_key]),
+ list)
+ for e in self._extract_entries(entries):
+ yield e
+ # Find next URL
+ next_url_path = try_get(
+ data,
+ (lambda x: x['_links']['next']['href'],
+ lambda x: x['_embedded'][assets_key]['_links']['next']['href']),
+ compat_str)
+ if not next_url_path:
+ break
+ data = self._call_api(
+ next_url_path, display_id,
+ note='Downloading %s JSON page %d' % (assets_key, page_num),
+ fatal=False)
+ if not data:
+ break
+
+
+class NRKTVSeasonIE(NRKTVSerieBaseIE):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?P<domain>tv|radio)\.nrk\.no/
+ (?P<serie_kind>serie|pod[ck]ast)/
+ (?P<serie>[^/]+)/
+ (?:
+ (?:sesong/)?(?P<id>\d+)|
+ sesong/(?P<id_2>[^/?#&]+)
+ )
+ '''
+ _TESTS = [{
+ 'url': 'https://tv.nrk.no/serie/backstage/sesong/1',
+ 'info_dict': {
+ 'id': 'backstage/1',
+ 'title': 'Sesong 1',
+ },
+ 'playlist_mincount': 30,
+ }, {
+ # no /sesong/ in path
+ 'url': 'https://tv.nrk.no/serie/lindmo/2016',
+ 'info_dict': {
+ 'id': 'lindmo/2016',
+ 'title': '2016',
+ },
+ 'playlist_mincount': 29,
+ }, {
+ # weird nested _embedded in catalog JSON response
+ 'url': 'https://radio.nrk.no/serie/dickie-dick-dickens/sesong/1',
+ 'info_dict': {
+ 'id': 'dickie-dick-dickens/1',
+ 'title': 'Sesong 1',
+ },
+ 'playlist_mincount': 11,
+ }, {
+ # 841 entries, multi page
+ 'url': 'https://radio.nrk.no/serie/dagsnytt/sesong/201509',
+ 'info_dict': {
+ 'id': 'dagsnytt/201509',
+ 'title': 'September 2015',
+ },
+ 'playlist_mincount': 841,
+ }, {
+ # 180 entries, single page
+ 'url': 'https://tv.nrk.no/serie/spangas/sesong/1',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://radio.nrk.no/podkast/hele_historien/sesong/diagnose-kverulant',
+ 'info_dict': {
+ 'id': 'hele_historien/diagnose-kverulant',
+ 'title': 'Diagnose kverulant',
+ },
+ 'playlist_mincount': 3,
+ }, {
+ 'url': 'https://radio.nrk.no/podkast/loerdagsraadet/sesong/202101',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return (False if NRKTVIE.suitable(url) or NRKTVEpisodeIE.suitable(url) or NRKRadioPodkastIE.suitable(url)
+ else super(NRKTVSeasonIE, cls).suitable(url))
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ domain = mobj.group('domain')
+ serie_kind = mobj.group('serie_kind')
+ serie = mobj.group('serie')
+ season_id = mobj.group('id') or mobj.group('id_2')
+ display_id = '%s/%s' % (serie, season_id)
+
+ data = self._call_api(
+ '%s/catalog/%s/%s/seasons/%s'
+ % (domain, self._catalog_name(serie_kind), serie, season_id),
+ display_id, 'season', query={'pageSize': 50})
+
+ title = try_get(data, lambda x: x['titles']['title'], compat_str) or display_id
+ return self.playlist_result(
+ self._entries(data, display_id),
+ display_id, title)
+
+
+class NRKTVSeriesIE(NRKTVSerieBaseIE):
+ _VALID_URL = r'https?://(?P<domain>(?:tv|radio)\.nrk|(?:tv\.)?nrksuper)\.no/(?P<serie_kind>serie|pod[ck]ast)/(?P<id>[^/]+)'
+ _TESTS = [{
+ # new layout, instalments
+ 'url': 'https://tv.nrk.no/serie/groenn-glede',
+ 'info_dict': {
+ 'id': 'groenn-glede',
+ 'title': 'Grønn glede',
+ 'description': 'md5:7576e92ae7f65da6993cf90ee29e4608',
+ },
+ 'playlist_mincount': 90,
+ }, {
+ # new layout, instalments, more entries
+ 'url': 'https://tv.nrk.no/serie/lindmo',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tv.nrk.no/serie/blank',
+ 'info_dict': {
+ 'id': 'blank',
+ 'title': 'Blank',
+ 'description': 'md5:7664b4e7e77dc6810cd3bca367c25b6e',
+ },
+ 'playlist_mincount': 30,
+ }, {
+ # new layout, seasons
+ 'url': 'https://tv.nrk.no/serie/backstage',
+ 'info_dict': {
+ 'id': 'backstage',
+ 'title': 'Backstage',
+ 'description': 'md5:63692ceb96813d9a207e9910483d948b',
+ },
+ 'playlist_mincount': 60,
+ }, {
+ # old layout
+ 'url': 'https://tv.nrksuper.no/serie/labyrint',
+ 'info_dict': {
+ 'id': 'labyrint',
+ 'title': 'Labyrint',
+ 'description': 'I Daidalos sin undersjøiske Labyrint venter spennende oppgaver, skumle robotskapninger og slim.',
+ },
+ 'playlist_mincount': 3,
+ }, {
+ 'url': 'https://tv.nrk.no/serie/broedrene-dal-og-spektralsteinene',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tv.nrk.no/serie/saving-the-human-race',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tv.nrk.no/serie/postmann-pat',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://radio.nrk.no/serie/dickie-dick-dickens',
+ 'info_dict': {
+ 'id': 'dickie-dick-dickens',
+ 'title': 'Dickie Dick Dickens',
+ 'description': 'md5:19e67411ffe57f7dce08a943d7a0b91f',
+ },
+ 'playlist_mincount': 8,
+ }, {
+ 'url': 'https://nrksuper.no/serie/labyrint',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://radio.nrk.no/podkast/ulrikkes_univers',
+ 'info_dict': {
+ 'id': 'ulrikkes_univers',
+ },
+ 'playlist_mincount': 10,
+ }, {
+ 'url': 'https://radio.nrk.no/podkast/ulrikkes_univers/nrkno-poddkast-26588-134079-05042018030000',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return (
+ False if any(ie.suitable(url)
+ for ie in (NRKTVIE, NRKTVEpisodeIE, NRKRadioPodkastIE, NRKTVSeasonIE))
+ else super(NRKTVSeriesIE, cls).suitable(url))
+
+ def _real_extract(self, url):
+ site, serie_kind, series_id = self._match_valid_url(url).groups()
+ is_radio = site == 'radio.nrk'
+ domain = 'radio' if is_radio else 'tv'
+
+ size_prefix = 'p' if is_radio else 'embeddedInstalmentsP'
+ series = self._call_api(
+ '%s/catalog/%s/%s'
+ % (domain, self._catalog_name(serie_kind), series_id),
+ series_id, 'serie', query={size_prefix + 'ageSize': 50})
+ titles = try_get(series, [
+ lambda x: x['titles'],
+ lambda x: x[x['type']]['titles'],
+ lambda x: x[x['seriesType']]['titles'],
+ ]) or {}
+
+ entries = []
+ entries.extend(self._entries(series, series_id))
+ embedded = series.get('_embedded') or {}
+ linked_seasons = try_get(series, lambda x: x['_links']['seasons']) or []
+ embedded_seasons = embedded.get('seasons') or []
+ if len(linked_seasons) > len(embedded_seasons):
+ for season in linked_seasons:
+ season_url = urljoin(url, season.get('href'))
+ if not season_url:
+ season_name = season.get('name')
+ if season_name and isinstance(season_name, compat_str):
+ season_url = 'https://%s.nrk.no/serie/%s/sesong/%s' % (domain, series_id, season_name)
+ if season_url:
+ entries.append(self.url_result(
+ season_url, ie=NRKTVSeasonIE.ie_key(),
+ video_title=season.get('title')))
+ else:
+ for season in embedded_seasons:
+ entries.extend(self._entries(season, series_id))
+ entries.extend(self._entries(
+ embedded.get('extraMaterial') or {}, series_id))
+
+ return self.playlist_result(
+ entries, series_id, titles.get('title'), titles.get('subtitle'))
+
+
+class NRKTVDirekteIE(NRKTVIE): # XXX: Do not subclass from concrete IE
+ IE_DESC = 'NRK TV Direkte and NRK Radio Direkte'
+ _VALID_URL = r'https?://(?:tv|radio)\.nrk\.no/direkte/(?P<id>[^/?#&]+)'
+
+ _TESTS = [{
+ 'url': 'https://tv.nrk.no/direkte/nrk1',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://radio.nrk.no/direkte/p1_oslo_akershus',
+ 'only_matching': True,
+ }]
+
+
+class NRKRadioPodkastIE(InfoExtractor):
+ _VALID_URL = r'https?://radio\.nrk\.no/pod[ck]ast/(?:[^/]+/)+(?P<id>l_[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+
+ _TESTS = [{
+ 'url': 'https://radio.nrk.no/podkast/ulrikkes_univers/l_96f4f1b0-de54-4e6a-b4f1-b0de54fe6af8',
+ 'md5': '8d40dab61cea8ab0114e090b029a0565',
+ 'info_dict': {
+ 'id': 'MUHH48000314AA',
+ 'ext': 'mp4',
+ 'title': '20 spørsmål 23.05.2014',
+ 'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
+ 'duration': 1741,
+ 'series': '20 spørsmål',
+ 'episode': '23.05.2014',
+ },
+ }, {
+ 'url': 'https://radio.nrk.no/podcast/ulrikkes_univers/l_96f4f1b0-de54-4e6a-b4f1-b0de54fe6af8',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://radio.nrk.no/podkast/ulrikkes_univers/sesong/1/l_96f4f1b0-de54-4e6a-b4f1-b0de54fe6af8',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://radio.nrk.no/podkast/hele_historien/sesong/bortfoert-i-bergen/l_774d1a2c-7aa7-4965-8d1a-2c7aa7d9652c',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self.url_result(
+ 'nrk:%s' % video_id, ie=NRKIE.ie_key(), video_id=video_id)
+
+
+class NRKPlaylistBaseIE(InfoExtractor):
+ def _extract_description(self, webpage):
+ pass
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ entries = [
+ self.url_result('nrk:%s' % video_id, NRKIE.ie_key())
+ for video_id in re.findall(self._ITEM_RE, webpage)
+ ]
+
+ playlist_title = self._extract_title(webpage)
+ playlist_description = self._extract_description(webpage)
+
+ return self.playlist_result(
+ entries, playlist_id, playlist_title, playlist_description)
+
+
+class NRKPlaylistIE(NRKPlaylistBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?nrk\.no/(?!video|skole)(?:[^/]+/)+(?P<id>[^/]+)'
+ _ITEM_RE = r'class="[^"]*\brich\b[^"]*"[^>]+data-video-id="([^"]+)"'
+ _TESTS = [{
+ 'url': 'http://www.nrk.no/troms/gjenopplev-den-historiske-solformorkelsen-1.12270763',
+ 'info_dict': {
+ 'id': 'gjenopplev-den-historiske-solformorkelsen-1.12270763',
+ 'title': 'Gjenopplev den historiske solformørkelsen',
+ 'description': 'md5:c2df8ea3bac5654a26fc2834a542feed',
+ },
+ 'playlist_count': 2,
+ }, {
+ 'url': 'http://www.nrk.no/kultur/bok/rivertonprisen-til-karin-fossum-1.12266449',
+ 'info_dict': {
+ 'id': 'rivertonprisen-til-karin-fossum-1.12266449',
+ 'title': 'Rivertonprisen til Karin Fossum',
+ 'description': 'Første kvinne på 15 år til å vinne krimlitteraturprisen.',
+ },
+ 'playlist_count': 2,
+ }]
+
+ def _extract_title(self, webpage):
+ return self._og_search_title(webpage, fatal=False)
+
+ def _extract_description(self, webpage):
+ return self._og_search_description(webpage)
+
+
+class NRKTVEpisodesIE(NRKPlaylistBaseIE):
+ _VALID_URL = r'https?://tv\.nrk\.no/program/[Ee]pisodes/[^/]+/(?P<id>\d+)'
+ _ITEM_RE = r'data-episode=["\']%s' % NRKTVIE._EPISODE_RE
+ _TESTS = [{
+ 'url': 'https://tv.nrk.no/program/episodes/nytt-paa-nytt/69031',
+ 'info_dict': {
+ 'id': '69031',
+ 'title': 'Nytt på nytt, sesong: 201210',
+ },
+ 'playlist_count': 4,
+ }]
+
+ def _extract_title(self, webpage):
+ return self._html_search_regex(
+ r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False)
+
+
+class NRKSkoleIE(InfoExtractor):
+ IE_DESC = 'NRK Skole'
+ _VALID_URL = r'https?://(?:www\.)?nrk\.no/skole/?\?.*\bmediaId=(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'https://www.nrk.no/skole/?page=search&q=&mediaId=14099',
+ 'md5': '18c12c3d071953c3bf8d54ef6b2587b7',
+ 'info_dict': {
+ 'id': '6021',
+ 'ext': 'mp4',
+ 'title': 'Genetikk og eneggede tvillinger',
+ 'description': 'md5:3aca25dcf38ec30f0363428d2b265f8d',
+ 'duration': 399,
+ },
+ }, {
+ 'url': 'https://www.nrk.no/skole/?page=objectives&subject=naturfag&objective=K15114&mediaId=19355',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ nrk_id = self._download_json(
+ 'https://nrkno-skole-prod.kube.nrk.no/skole/api/media/%s' % video_id,
+ video_id)['psId']
+
+ return self.url_result('nrk:%s' % nrk_id)
diff --git a/yt_dlp/extractor/nrl.py b/yt_dlp/extractor/nrl.py
new file mode 100644
index 0000000..1e8cf0b
--- /dev/null
+++ b/yt_dlp/extractor/nrl.py
@@ -0,0 +1,27 @@
+from .common import InfoExtractor
+
+
+class NRLTVIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:www\.)?nrl\.com/tv(/[^/]+)*/(?P<id>[^/?&#]+)'
+ _TEST = {
+ 'url': 'https://www.nrl.com/tv/news/match-highlights-titans-v-knights-862805/',
+ 'info_dict': {
+ 'id': 'YyNnFuaDE6kPJqlDhG4CGQ_w89mKTau4',
+ 'ext': 'mp4',
+ 'title': 'Match Highlights: Titans v Knights',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ q_data = self._parse_json(self._html_search_regex(
+ r'(?s)q-data="({.+?})"', webpage, 'player data'), display_id)
+ ooyala_id = q_data['videoId']
+ return self.url_result(
+ 'ooyala:' + ooyala_id, 'Ooyala', ooyala_id, q_data.get('title'))
diff --git a/yt_dlp/extractor/ntvcojp.py b/yt_dlp/extractor/ntvcojp.py
new file mode 100644
index 0000000..422ec6e
--- /dev/null
+++ b/yt_dlp/extractor/ntvcojp.py
@@ -0,0 +1,55 @@
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ smuggle_url,
+ traverse_obj,
+)
+
+
+class NTVCoJpCUIE(InfoExtractor):
+ IE_NAME = 'cu.ntv.co.jp'
+ IE_DESC = 'Nippon Television Network'
+ _VALID_URL = r'https?://cu\.ntv\.co\.jp/(?!program)(?P<id>[^/?&#]+)'
+ _TEST = {
+ 'url': 'https://cu.ntv.co.jp/televiva-chill-gohan_181031/',
+ 'info_dict': {
+ 'id': '5978891207001',
+ 'ext': 'mp4',
+ 'title': '桜エビと炒り卵がポイント! 「中華風 エビチリおにぎり」──『美虎』五十嵐美幸',
+ 'upload_date': '20181213',
+ 'description': 'md5:1985b51a9abc285df0104d982a325f2a',
+ 'uploader_id': '3855502814001',
+ 'timestamp': 1544669941,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }
+
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ player_config = self._search_nuxt_data(webpage, display_id)
+ video_id = traverse_obj(player_config, ('movie', 'video_id'))
+ if not video_id:
+ raise ExtractorError('Failed to extract video ID for Brightcove')
+ account_id = traverse_obj(player_config, ('player', 'account')) or '3855502814001'
+ title = traverse_obj(player_config, ('movie', 'name'))
+ if not title:
+ og_title = self._og_search_title(webpage, fatal=False) or traverse_obj(player_config, ('player', 'title'))
+ if og_title:
+ title = og_title.split('(', 1)[0].strip()
+ description = (traverse_obj(player_config, ('movie', 'description'))
+ or self._html_search_meta(['description', 'og:description'], webpage))
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'url': smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % (account_id, video_id), {'geo_countries': ['JP']}),
+ 'ie_key': 'BrightcoveNew',
+ }
diff --git a/yt_dlp/extractor/ntvde.py b/yt_dlp/extractor/ntvde.py
new file mode 100644
index 0000000..9f3a498
--- /dev/null
+++ b/yt_dlp/extractor/ntvde.py
@@ -0,0 +1,83 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ js_to_json,
+ url_or_none,
+)
+from ..utils.traversal import traverse_obj
+
+
+class NTVDeIE(InfoExtractor):
+ IE_NAME = 'n-tv.de'
+ _VALID_URL = r'https?://(?:www\.)?n-tv\.de/mediathek/(?:videos|magazine)/[^/?#]+/[^/?#]+-article(?P<id>[^/?#]+)\.html'
+
+ _TESTS = [{
+ 'url': 'http://www.n-tv.de/mediathek/videos/panorama/Schnee-und-Glaette-fuehren-zu-zahlreichen-Unfaellen-und-Staus-article14438086.html',
+ 'md5': '6bcf2a6638cb83f45d5561659a1cb498',
+ 'info_dict': {
+ 'id': '14438086',
+ 'ext': 'mp4',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'title': 'Schnee und Glätte führen zu zahlreichen Unfällen und Staus',
+ 'alt_title': 'Winterchaos auf deutschen Straßen',
+ 'description': 'Schnee und Glätte sorgen deutschlandweit für einen chaotischen Start in die Woche: Auf den Straßen kommt es zu kilometerlangen Staus und Dutzenden Glätteunfällen. In Düsseldorf und München wirbelt der Schnee zudem den Flugplan durcheinander. Dutzende Flüge landen zu spät, einige fallen ganz aus.',
+ 'duration': 67,
+ 'timestamp': 1422892797,
+ 'upload_date': '20150202',
+ },
+ }, {
+ 'url': 'https://www.n-tv.de/mediathek/magazine/auslandsreport/Juedische-Siedler-wollten-Rache-die-wollten-nur-toeten-article24523089.html',
+ 'md5': 'c5c6014c014ccc3359470e1d34472bfd',
+ 'info_dict': {
+ 'id': '24523089',
+ 'ext': 'mp4',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'title': 'Jüdische Siedler "wollten Rache, die wollten nur töten"',
+ 'alt_title': 'Israelische Gewalt fern von Gaza',
+ 'description': 'Vier Tage nach dem Massaker der Hamas greifen jüdische Siedler das Haus einer palästinensischen Familie im Westjordanland an. Die Überlebenden berichten, sie waren unbewaffnet, die Angreifer seien nur auf "Rache und Töten" aus gewesen. Als die Toten beerdigt werden sollen, eröffnen die Siedler erneut das Feuer.',
+ 'duration': 326,
+ 'timestamp': 1699688294,
+ 'upload_date': '20231111',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ info = self._search_json(
+ r'article:', webpage, 'info', video_id, transform_source=js_to_json)
+
+ vdata = self._search_json(
+ r'\$\(\s*"#playerwrapper"\s*\)\s*\.data\(\s*"player",',
+ webpage, 'player data', video_id,
+ transform_source=lambda s: js_to_json(re.sub(r'ivw:[^},]+', '', s)))['setup']['source']
+
+ formats = []
+ if vdata.get('progressive'):
+ formats.append({
+ 'format_id': 'http',
+ 'url': vdata['progressive'],
+ })
+ if vdata.get('hls'):
+ formats.extend(self._extract_m3u8_formats(
+ vdata['hls'], video_id, 'mp4', m3u8_id='hls', fatal=False))
+ if vdata.get('dash'):
+ formats.extend(self._extract_mpd_formats(vdata['dash'], video_id, fatal=False, mpd_id='dash'))
+
+ return {
+ 'id': video_id,
+ **traverse_obj(info, {
+ 'title': 'headline',
+ 'description': 'intro',
+ 'alt_title': 'kicker',
+ 'timestamp': ('publishedDateAsUnixTimeStamp', {int_or_none}),
+ }),
+ **traverse_obj(vdata, {
+ 'thumbnail': ('poster', {url_or_none}),
+ 'duration': ('length', {int_or_none}),
+ }),
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/ntvru.py b/yt_dlp/extractor/ntvru.py
new file mode 100644
index 0000000..fe39657
--- /dev/null
+++ b/yt_dlp/extractor/ntvru.py
@@ -0,0 +1,142 @@
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ strip_or_none,
+ unescapeHTML,
+ xpath_text,
+)
+
+
+class NTVRuIE(InfoExtractor):
+ IE_NAME = 'ntv.ru'
+ _VALID_URL = r'https?://(?:www\.)?ntv\.ru/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+
+ _TESTS = [{
+ 'url': 'http://www.ntv.ru/novosti/863142/',
+ 'md5': 'ba7ea172a91cb83eb734cad18c10e723',
+ 'info_dict': {
+ 'id': '746000',
+ 'ext': 'mp4',
+ 'title': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
+ 'description': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
+ 'thumbnail': r're:^http://.*\.jpg',
+ 'duration': 136,
+ 'view_count': int,
+ },
+ }, {
+ 'url': 'http://www.ntv.ru/video/novosti/750370/',
+ 'md5': 'adecff79691b4d71e25220a191477124',
+ 'info_dict': {
+ 'id': '750370',
+ 'ext': 'mp4',
+ 'title': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
+ 'description': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
+ 'thumbnail': r're:^http://.*\.jpg',
+ 'duration': 172,
+ 'view_count': int,
+ },
+ 'skip': '404 Not Found',
+ }, {
+ 'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416',
+ 'md5': '82dbd49b38e3af1d00df16acbeab260c',
+ 'info_dict': {
+ 'id': '747480',
+ 'ext': 'mp4',
+ 'title': '«Сегодня». 21 марта 2014 года. 16:00',
+ 'description': '«Сегодня». 21 марта 2014 года. 16:00',
+ 'thumbnail': r're:^http://.*\.jpg',
+ 'duration': 1496,
+ 'view_count': int,
+ },
+ }, {
+ 'url': 'https://www.ntv.ru/kino/Koma_film/m70281/o336036/video/',
+ 'md5': 'e9c7cde24d9d3eaed545911a04e6d4f4',
+ 'info_dict': {
+ 'id': '1126480',
+ 'ext': 'mp4',
+ 'title': 'Остросюжетный фильм «Кома»',
+ 'description': 'Остросюжетный фильм «Кома»',
+ 'thumbnail': r're:^http://.*\.jpg',
+ 'duration': 5592,
+ 'view_count': int,
+ },
+ }, {
+ 'url': 'http://www.ntv.ru/serial/Delo_vrachey/m31760/o233916/',
+ 'md5': '9320cd0e23f3ea59c330dc744e06ff3b',
+ 'info_dict': {
+ 'id': '751482',
+ 'ext': 'mp4',
+ 'title': '«Дело врачей»: «Деревце жизни»',
+ 'description': '«Дело врачей»: «Деревце жизни»',
+ 'thumbnail': r're:^http://.*\.jpg',
+ 'duration': 2590,
+ 'view_count': int,
+ },
+ }, {
+ # Schemeless file URL
+ 'url': 'https://www.ntv.ru/video/1797442',
+ 'only_matching': True,
+ }]
+
+ _VIDEO_ID_REGEXES = [
+ r'<meta property="og:url" content="https?://www\.ntv\.ru/video/(\d+)',
+ r'<meta property="og:video:(?:url|iframe)" content="https?://www\.ntv\.ru/embed/(\d+)',
+ r'<video embed=[^>]+><id>(\d+)</id>',
+ r'<video restriction[^>]+><key>(\d+)</key>',
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ video_url = self._og_search_property(
+ ('video', 'video:iframe'), webpage, default=None)
+ if video_url:
+ video_id = self._search_regex(
+ r'https?://(?:www\.)?ntv\.ru/video/(?:embed/)?(\d+)',
+ video_url, 'video id', default=None)
+
+ if not video_id:
+ video_id = self._html_search_regex(
+ self._VIDEO_ID_REGEXES, webpage, 'video id')
+
+ player = self._download_xml(
+ 'http://www.ntv.ru/vi%s/' % video_id,
+ video_id, 'Downloading video XML')
+
+ title = strip_or_none(unescapeHTML(xpath_text(player, './data/title', 'title', fatal=True)))
+
+ video = player.find('./data/video')
+
+ formats = []
+ for format_id in ['', 'hi', 'webm']:
+ file_ = xpath_text(video, './%sfile' % format_id)
+ if not file_:
+ continue
+ if file_.startswith('//'):
+ file_ = self._proto_relative_url(file_)
+ elif not file_.startswith('http'):
+ file_ = 'http://media.ntv.ru/vod/' + file_
+ formats.append({
+ 'url': file_,
+ 'filesize': int_or_none(xpath_text(video, './%ssize' % format_id)),
+ })
+ hls_manifest = xpath_text(video, './playback/hls')
+ if hls_manifest:
+ formats.extend(self._extract_m3u8_formats(
+ hls_manifest, video_id, m3u8_id='hls', fatal=False))
+ dash_manifest = xpath_text(video, './playback/dash')
+ if dash_manifest:
+ formats.extend(self._extract_mpd_formats(
+ dash_manifest, video_id, mpd_id='dash', fatal=False))
+
+ return {
+ 'id': xpath_text(video, './id'),
+ 'title': title,
+ 'description': strip_or_none(unescapeHTML(xpath_text(player, './data/description'))),
+ 'thumbnail': xpath_text(video, './splash'),
+ 'duration': int_or_none(xpath_text(video, './totaltime')),
+ 'view_count': int_or_none(xpath_text(video, './views')),
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/nubilesporn.py b/yt_dlp/extractor/nubilesporn.py
new file mode 100644
index 0000000..1d630f5
--- /dev/null
+++ b/yt_dlp/extractor/nubilesporn.py
@@ -0,0 +1,99 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ float_or_none,
+ format_field,
+ get_element_by_class,
+ get_element_by_id,
+ get_element_html_by_class,
+ get_elements_by_class,
+ int_or_none,
+ try_call,
+ unified_timestamp,
+ urlencode_postdata,
+)
+
+
+class NubilesPornIE(InfoExtractor):
+ _NETRC_MACHINE = 'nubiles-porn'
+ _VALID_URL = r'''(?x)
+ https://members\.nubiles-porn\.com/video/watch/(?P<id>\d+)
+ (?:/(?P<display_id>[\w\-]+-s(?P<season>\d+)e(?P<episode>\d+)))?
+ '''
+
+ _TESTS = [{
+ 'url': 'https://members.nubiles-porn.com/video/watch/165320/trying-to-focus-my-one-track-mind-s3e1',
+ 'md5': 'fa7f09da8027c35e4bdf0f94f55eac82',
+ 'info_dict': {
+ 'id': '165320',
+ 'title': 'Trying To Focus My One Track Mind - S3:E1',
+ 'ext': 'mp4',
+ 'display_id': 'trying-to-focus-my-one-track-mind-s3e1',
+ 'thumbnail': 'https://images.nubiles-porn.com/videos/trying_to_focus_my_one_track_mind/samples/cover1280.jpg',
+ 'description': 'md5:81f3d4372e0e39bff5c801da277a5141',
+ 'timestamp': 1676160000,
+ 'upload_date': '20230212',
+ 'channel': 'Younger Mommy',
+ 'channel_id': '64',
+ 'channel_url': 'https://members.nubiles-porn.com/video/website/64',
+ 'like_count': int,
+ 'average_rating': float,
+ 'age_limit': 18,
+ 'categories': ['Big Boobs', 'Big Naturals', 'Blowjob', 'Brunette', 'Cowgirl', 'Girl Orgasm', 'Girl-Boy',
+ 'Glasses', 'Hardcore', 'Milf', 'Shaved Pussy', 'Tattoos', 'YoungerMommy.com'],
+ 'tags': list,
+ 'cast': ['Kenzie Love'],
+ 'availability': 'needs_auth',
+ 'series': 'Younger Mommy',
+ 'series_id': '64',
+ 'season': 'Season 3',
+ 'season_number': 3,
+ 'episode': 'Episode 1',
+ 'episode_number': 1
+ }
+ }]
+
+ def _perform_login(self, username, password):
+ login_webpage = self._download_webpage('https://nubiles-porn.com/login', video_id=None)
+ inputs = self._hidden_inputs(login_webpage)
+ inputs.update({'username': username, 'password': password})
+ self._request_webpage('https://nubiles-porn.com/authentication/login', None, data=urlencode_postdata(inputs))
+
+ def _real_extract(self, url):
+ url_match = self._match_valid_url(url)
+ video_id = url_match.group('id')
+ page = self._download_webpage(url, video_id)
+
+ media_entries = self._parse_html5_media_entries(
+ url, get_element_by_class('watch-page-video-wrapper', page), video_id)[0]
+
+ channel_id, channel_name = self._search_regex(
+ r'/video/website/(?P<id>\d+).+>(?P<name>\w+).com', get_element_html_by_class('site-link', page),
+ 'channel', fatal=False, group=('id', 'name')) or (None, None)
+ channel_name = re.sub(r'([^A-Z]+)([A-Z]+)', r'\1 \2', channel_name)
+
+ return {
+ 'id': video_id,
+ 'title': self._search_regex('<h2>([^<]+)</h2>', page, 'title', fatal=False),
+ 'formats': media_entries.get('formats'),
+ 'display_id': url_match.group('display_id'),
+ 'thumbnail': media_entries.get('thumbnail'),
+ 'description': clean_html(get_element_html_by_class('content-pane-description', page)),
+ 'timestamp': unified_timestamp(get_element_by_class('date', page)),
+ 'channel': channel_name,
+ 'channel_id': channel_id,
+ 'channel_url': format_field(channel_id, None, 'https://members.nubiles-porn.com/video/website/%s'),
+ 'like_count': int_or_none(get_element_by_id('likecount', page)),
+ 'average_rating': float_or_none(get_element_by_class('score', page)),
+ 'age_limit': 18,
+ 'categories': try_call(lambda: list(map(clean_html, get_elements_by_class('btn', get_element_by_class('categories', page))))),
+ 'tags': try_call(lambda: list(map(clean_html, get_elements_by_class('btn', get_elements_by_class('tags', page)[1])))),
+ 'cast': get_elements_by_class('content-pane-performer', page),
+ 'availability': 'needs_auth',
+ 'series': channel_name,
+ 'series_id': channel_id,
+ 'season_number': int_or_none(url_match.group('season')),
+ 'episode_number': int_or_none(url_match.group('episode'))
+ }
diff --git a/yt_dlp/extractor/nuevo.py b/yt_dlp/extractor/nuevo.py
new file mode 100644
index 0000000..ec54041
--- /dev/null
+++ b/yt_dlp/extractor/nuevo.py
@@ -0,0 +1,36 @@
+from .common import InfoExtractor
+
+from ..utils import (
+ float_or_none,
+ xpath_text
+)
+
+
+class NuevoBaseIE(InfoExtractor):
+ def _extract_nuevo(self, config_url, video_id, headers={}):
+ config = self._download_xml(
+ config_url, video_id, transform_source=lambda s: s.strip(),
+ headers=headers)
+
+ title = xpath_text(config, './title', 'title', fatal=True).strip()
+ video_id = xpath_text(config, './mediaid', default=video_id)
+ thumbnail = xpath_text(config, ['./image', './thumb'])
+ duration = float_or_none(xpath_text(config, './duration'))
+
+ formats = []
+ for element_name, format_id in (('file', 'sd'), ('filehd', 'hd')):
+ video_url = xpath_text(config, element_name)
+ if video_url:
+ formats.append({
+ 'url': video_url,
+ 'format_id': format_id,
+ })
+ self._check_formats(formats, video_id)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'formats': formats
+ }
diff --git a/yt_dlp/extractor/nuum.py b/yt_dlp/extractor/nuum.py
new file mode 100644
index 0000000..3db663d
--- /dev/null
+++ b/yt_dlp/extractor/nuum.py
@@ -0,0 +1,199 @@
+import functools
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ OnDemandPagedList,
+ UserNotLive,
+ filter_dict,
+ int_or_none,
+ parse_iso8601,
+ str_or_none,
+ url_or_none,
+)
+from ..utils.traversal import traverse_obj
+
+
+class NuumBaseIE(InfoExtractor):
+ def _call_api(self, path, video_id, description, query={}):
+ response = self._download_json(
+ f'https://nuum.ru/api/v2/{path}', video_id, query=query,
+ note=f'Downloading {description} metadata',
+ errnote=f'Unable to download {description} metadata')
+ if error := response.get('error'):
+ raise ExtractorError(f'API returned error: {error!r}')
+ return response['result']
+
+ def _get_channel_info(self, channel_name):
+ return self._call_api(
+ 'broadcasts/public', video_id=channel_name, description='channel',
+ query={
+ 'with_extra': 'true',
+ 'channel_name': channel_name,
+ 'with_deleted': 'true',
+ })
+
+ def _parse_video_data(self, container, extract_formats=True):
+ stream = traverse_obj(container, ('media_container_streams', 0, {dict})) or {}
+ media = traverse_obj(stream, ('stream_media', 0, {dict})) or {}
+ media_url = traverse_obj(media, (
+ 'media_meta', ('media_archive_url', 'media_url'), {url_or_none}), get_all=False)
+
+ video_id = str(container['media_container_id'])
+ is_live = media.get('media_status') == 'RUNNING'
+
+ formats, subtitles = None, None
+ if extract_formats:
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ media_url, video_id, 'mp4', live=is_live)
+
+ return filter_dict({
+ 'id': video_id,
+ 'is_live': is_live,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ **traverse_obj(container, {
+ 'title': ('media_container_name', {str}),
+ 'description': ('media_container_description', {str}),
+ 'timestamp': ('created_at', {parse_iso8601}),
+ 'channel': ('media_container_channel', 'channel_name', {str}),
+ 'channel_id': ('media_container_channel', 'channel_id', {str_or_none}),
+ }),
+ **traverse_obj(stream, {
+ 'view_count': ('stream_total_viewers', {int_or_none}),
+ 'concurrent_view_count': ('stream_current_viewers', {int_or_none}),
+ }),
+ **traverse_obj(media, {
+ 'duration': ('media_duration', {int_or_none}),
+ 'thumbnail': ('media_meta', ('media_preview_archive_url', 'media_preview_url'), {url_or_none}),
+ }, get_all=False),
+ })
+
+
+class NuumMediaIE(NuumBaseIE):
+ IE_NAME = 'nuum:media'
+ _VALID_URL = r'https?://nuum\.ru/(?:streams|videos|clips)/(?P<id>[\d]+)'
+ _TESTS = [{
+ 'url': 'https://nuum.ru/streams/1592713-7-days-to-die',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://nuum.ru/videos/1567547-toxi-hurtz',
+ 'md5': 'f1d9118a30403e32b702a204eb03aca3',
+ 'info_dict': {
+ 'id': '1567547',
+ 'ext': 'mp4',
+ 'title': 'Toxi$ - Hurtz',
+ 'description': '',
+ 'timestamp': 1702631651,
+ 'upload_date': '20231215',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ 'view_count': int,
+ 'concurrent_view_count': int,
+ 'channel_id': '6911',
+ 'channel': 'toxis',
+ 'duration': 116,
+ },
+ }, {
+ 'url': 'https://nuum.ru/clips/1552564-pro-misu',
+ 'md5': 'b248ae1565b1e55433188f11beeb0ca1',
+ 'info_dict': {
+ 'id': '1552564',
+ 'ext': 'mp4',
+ 'title': 'Про Мису 🙃',
+ 'timestamp': 1701971828,
+ 'upload_date': '20231207',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ 'view_count': int,
+ 'concurrent_view_count': int,
+ 'channel_id': '3320',
+ 'channel': 'Misalelik',
+ 'duration': 41,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video_data = self._call_api(f'media-containers/{video_id}', video_id, 'media')
+
+ return self._parse_video_data(video_data)
+
+
+class NuumLiveIE(NuumBaseIE):
+ IE_NAME = 'nuum:live'
+ _VALID_URL = r'https?://nuum\.ru/channel/(?P<id>[^/#?]+)/?(?:$|[#?])'
+ _TESTS = [{
+ 'url': 'https://nuum.ru/channel/mts_live',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ channel = self._match_id(url)
+ channel_info = self._get_channel_info(channel)
+ if traverse_obj(channel_info, ('channel', 'channel_is_live')) is False:
+ raise UserNotLive(video_id=channel)
+
+ info = self._parse_video_data(channel_info['media_container'])
+ return {
+ 'webpage_url': f'https://nuum.ru/streams/{info["id"]}',
+ 'extractor_key': NuumMediaIE.ie_key(),
+ 'extractor': NuumMediaIE.IE_NAME,
+ **info,
+ }
+
+
+class NuumTabIE(NuumBaseIE):
+ IE_NAME = 'nuum:tab'
+ _VALID_URL = r'https?://nuum\.ru/channel/(?P<id>[^/#?]+)/(?P<type>streams|videos|clips)'
+ _TESTS = [{
+ 'url': 'https://nuum.ru/channel/dankon_/clips',
+ 'info_dict': {
+ 'id': 'dankon__clips',
+ 'title': 'Dankon_',
+ },
+ 'playlist_mincount': 29,
+ }, {
+ 'url': 'https://nuum.ru/channel/dankon_/videos',
+ 'info_dict': {
+ 'id': 'dankon__videos',
+ 'title': 'Dankon_',
+ },
+ 'playlist_mincount': 2,
+ }, {
+ 'url': 'https://nuum.ru/channel/dankon_/streams',
+ 'info_dict': {
+ 'id': 'dankon__streams',
+ 'title': 'Dankon_',
+ },
+ 'playlist_mincount': 1,
+ }]
+
+ _PAGE_SIZE = 50
+
+ def _fetch_page(self, channel_id, tab_type, tab_id, page):
+ CONTAINER_TYPES = {
+ 'clips': ['SHORT_VIDEO', 'REVIEW_VIDEO'],
+ 'videos': ['LONG_VIDEO'],
+ 'streams': ['SINGLE'],
+ }
+
+ media_containers = self._call_api(
+ 'media-containers', video_id=tab_id, description=f'{tab_type} tab page {page + 1}',
+ query={
+ 'limit': self._PAGE_SIZE,
+ 'offset': page * self._PAGE_SIZE,
+ 'channel_id': channel_id,
+ 'media_container_status': 'STOPPED',
+ 'media_container_type': CONTAINER_TYPES[tab_type],
+ })
+ for container in traverse_obj(media_containers, (..., {dict})):
+ metadata = self._parse_video_data(container, extract_formats=False)
+ yield self.url_result(f'https://nuum.ru/videos/{metadata["id"]}', NuumMediaIE, **metadata)
+
+ def _real_extract(self, url):
+ channel_name, tab_type = self._match_valid_url(url).group('id', 'type')
+ tab_id = f'{channel_name}_{tab_type}'
+ channel_data = self._get_channel_info(channel_name)['channel']
+
+ return self.playlist_result(OnDemandPagedList(functools.partial(
+ self._fetch_page, channel_data['channel_id'], tab_type, tab_id), self._PAGE_SIZE),
+ playlist_id=tab_id, playlist_title=channel_data.get('channel_name'))
diff --git a/yt_dlp/extractor/nuvid.py b/yt_dlp/extractor/nuvid.py
new file mode 100644
index 0000000..6ac351c
--- /dev/null
+++ b/yt_dlp/extractor/nuvid.py
@@ -0,0 +1,99 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ parse_duration,
+ int_or_none,
+ strip_or_none,
+ traverse_obj,
+ url_or_none,
+)
+
+
+class NuvidIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www|m)\.nuvid\.com/video/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://www.nuvid.com/video/6513023/italian-babe',
+ 'md5': '772d2f8288f3d3c5c45f7a41761c7844',
+ 'info_dict': {
+ 'id': '6513023',
+ 'ext': 'mp4',
+ 'title': 'italian babe',
+ 'duration': 321.0,
+ 'age_limit': 18,
+ 'thumbnail': r're:https?://.+\.jpg',
+ }
+ }, {
+ 'url': 'https://m.nuvid.com/video/6523263',
+ 'md5': 'ebd22ce8e47e1d9a4d0756a15c67da52',
+ 'info_dict': {
+ 'id': '6523263',
+ 'ext': 'mp4',
+ 'title': 'Slut brunette college student anal dorm',
+ 'duration': 421.0,
+ 'age_limit': 18,
+ 'thumbnail': r're:https?://.+\.jpg',
+ 'thumbnails': list,
+ }
+ }, {
+ 'url': 'http://m.nuvid.com/video/6415801/',
+ 'md5': '638d5ececb138d5753593f751ae3f697',
+ 'info_dict': {
+ 'id': '6415801',
+ 'ext': 'mp4',
+ 'title': 'My best friend wanted to fuck my wife for a long time',
+ 'duration': 1882,
+ 'age_limit': 18,
+ 'thumbnail': r're:https?://.+\.jpg',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ qualities = {
+ 'lq': '360p',
+ 'hq': '720p',
+ }
+
+ json_url = f'https://www.nuvid.com/player_config_json/?vid={video_id}&aid=0&domain_id=0&embed=0&check_speed=0'
+ video_data = self._download_json(
+ json_url, video_id, headers={
+ 'Accept': 'application/json, text/javascript, */*; q = 0.01',
+ 'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8',
+ })
+
+ webpage = self._download_webpage(
+ 'http://m.nuvid.com/video/%s' % (video_id, ),
+ video_id, 'Downloading video page', fatal=False) or ''
+
+ title = strip_or_none(video_data.get('title') or self._html_search_regex(
+ (r'''<span\s[^>]*?\btitle\s*=\s*(?P<q>"|'|\b)(?P<title>[^"]+)(?P=q)\s*>''',
+ r'''<div\s[^>]*?\bclass\s*=\s*(?P<q>"|'|\b)thumb-holder video(?P=q)>\s*<h5\b[^>]*>(?P<title>[^<]+)</h5''',
+ r'''<span\s[^>]*?\bclass\s*=\s*(?P<q>"|'|\b)title_thumb(?P=q)>(?P<title>[^<]+)</span'''),
+ webpage, 'title', group='title'))
+
+ formats = [{
+ 'url': source,
+ 'format_id': qualities.get(quality),
+ 'height': int_or_none(qualities.get(quality)[:-1]),
+ } for quality, source in video_data.get('files').items() if source]
+
+ self._check_formats(formats, video_id)
+
+ duration = parse_duration(traverse_obj(video_data, 'duration', 'duration_format'))
+ thumbnails = [
+ {'url': thumb_url} for thumb_url in re.findall(
+ r'<div\s+class\s*=\s*"video-tmb-wrap"\s*>\s*<img\s+src\s*=\s*"([^"]+)"\s*/>', webpage)
+ if url_or_none(thumb_url)]
+ if url_or_none(video_data.get('poster')):
+ thumbnails.append({'url': video_data['poster'], 'preference': 1})
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': title,
+ 'thumbnails': thumbnails,
+ 'duration': duration,
+ 'age_limit': 18,
+ }
diff --git a/yt_dlp/extractor/nytimes.py b/yt_dlp/extractor/nytimes.py
new file mode 100644
index 0000000..3019202
--- /dev/null
+++ b/yt_dlp/extractor/nytimes.py
@@ -0,0 +1,420 @@
+import json
+import uuid
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ clean_html,
+ determine_ext,
+ extract_attributes,
+ float_or_none,
+ get_elements_html_by_class,
+ int_or_none,
+ merge_dicts,
+ mimetype2ext,
+ parse_iso8601,
+ remove_end,
+ remove_start,
+ str_or_none,
+ traverse_obj,
+ url_or_none,
+)
+
+
+class NYTimesBaseIE(InfoExtractor):
+ _DNS_NAMESPACE = uuid.UUID('36dd619a-56dc-595b-9e09-37f4152c7b5d')
+ _TOKEN = 'MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAuNIzKBOFB77aT/jN/FQ+/QVKWq5V1ka1AYmCR9hstz1pGNPH5ajOU9gAqta0T89iPnhjwla+3oec/Z3kGjxbpv6miQXufHFq3u2RC6HyU458cLat5kVPSOQCe3VVB5NRpOlRuwKHqn0txfxnwSSj8mqzstR997d3gKB//RO9zE16y3PoWlDQXkASngNJEWvL19iob/xwAkfEWCjyRILWFY0JYX3AvLMSbq7wsqOCE5srJpo7rRU32zsByhsp1D5W9OYqqwDmflsgCEQy2vqTsJjrJohuNg+urMXNNZ7Y3naMoqttsGDrWVxtPBafKMI8pM2ReNZBbGQsQXRzQNo7+QIDAQAB'
+ _GRAPHQL_API = 'https://samizdat-graphql.nytimes.com/graphql/v2'
+ _GRAPHQL_QUERY = '''query VideoQuery($id: String!) {
+ video(id: $id) {
+ ... on Video {
+ bylines {
+ renderedRepresentation
+ }
+ duration
+ firstPublished
+ promotionalHeadline
+ promotionalMedia {
+ ... on Image {
+ crops {
+ name
+ renditions {
+ name
+ width
+ height
+ url
+ }
+ }
+ }
+ }
+ renditions {
+ type
+ width
+ height
+ url
+ bitrate
+ }
+ summary
+ }
+ }
+}'''
+
+ def _call_api(self, media_id):
+ # reference: `id-to-uri.js`
+ video_uuid = uuid.uuid5(self._DNS_NAMESPACE, 'video')
+ media_uuid = uuid.uuid5(video_uuid, media_id)
+
+ return traverse_obj(self._download_json(
+ self._GRAPHQL_API, media_id, 'Downloading JSON from GraphQL API', data=json.dumps({
+ 'query': self._GRAPHQL_QUERY,
+ 'variables': {'id': f'nyt://video/{media_uuid}'},
+ }, separators=(',', ':')).encode(), headers={
+ 'Content-Type': 'application/json',
+ 'Nyt-App-Type': 'vhs',
+ 'Nyt-App-Version': 'v3.52.21',
+ 'Nyt-Token': self._TOKEN,
+ 'Origin': 'https://nytimes.com',
+ }, fatal=False), ('data', 'video', {dict})) or {}
+
+ def _extract_thumbnails(self, thumbs):
+ return traverse_obj(thumbs, (lambda _, v: url_or_none(v['url']), {
+ 'url': 'url',
+ 'width': ('width', {int_or_none}),
+ 'height': ('height', {int_or_none}),
+ }), default=None)
+
+ def _extract_formats_and_subtitles(self, video_id, content_media_json):
+ urls = []
+ formats = []
+ subtitles = {}
+ for video in traverse_obj(content_media_json, ('renditions', ..., {dict})):
+ video_url = video.get('url')
+ format_id = video.get('type')
+ if not video_url or format_id == 'thumbs' or video_url in urls:
+ continue
+ urls.append(video_url)
+ ext = mimetype2ext(video.get('mimetype')) or determine_ext(video_url)
+ if ext == 'm3u8':
+ m3u8_fmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
+ video_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id=format_id or 'hls', fatal=False)
+ formats.extend(m3u8_fmts)
+ self._merge_subtitles(m3u8_subs, target=subtitles)
+ elif ext == 'mpd':
+ continue # all mpd urls give 404 errors
+ else:
+ formats.append({
+ 'url': video_url,
+ 'format_id': format_id,
+ 'vcodec': video.get('videoencoding') or video.get('video_codec'),
+ 'width': int_or_none(video.get('width')),
+ 'height': int_or_none(video.get('height')),
+ 'filesize': traverse_obj(video, (
+ ('file_size', 'fileSize'), (None, ('value')), {int_or_none}), get_all=False),
+ 'tbr': int_or_none(video.get('bitrate'), 1000) or None,
+ 'ext': ext,
+ })
+
+ return formats, subtitles
+
+ def _extract_video(self, media_id):
+ data = self._call_api(media_id)
+ formats, subtitles = self._extract_formats_and_subtitles(media_id, data)
+
+ return {
+ 'id': media_id,
+ 'title': data.get('promotionalHeadline'),
+ 'description': data.get('summary'),
+ 'timestamp': parse_iso8601(data.get('firstPublished')),
+ 'duration': float_or_none(data.get('duration'), scale=1000),
+ 'creator': ', '.join(traverse_obj(data, ( # TODO: change to 'creators'
+ 'bylines', ..., 'renderedRepresentation', {lambda x: remove_start(x, 'By ')}))),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnails': self._extract_thumbnails(
+ traverse_obj(data, ('promotionalMedia', 'crops', ..., 'renditions', ...))),
+ }
+
+
+class NYTimesIE(NYTimesBaseIE):
+ _VALID_URL = r'https?://(?:(?:www\.)?nytimes\.com/video/(?:[^/]+/)+?|graphics8\.nytimes\.com/bcvideo/\d+(?:\.\d+)?/iframe/embed\.html\?videoId=)(?P<id>\d+)'
+ _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>']
+ _TESTS = [{
+ 'url': 'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263',
+ 'md5': 'a553aa344014e3723d33893d89d4defc',
+ 'info_dict': {
+ 'id': '100000002847155',
+ 'ext': 'mp4',
+ 'title': 'Verbatim: What Is a Photocopier?',
+ 'description': 'md5:93603dada88ddbda9395632fdc5da260',
+ 'timestamp': 1398646132,
+ 'upload_date': '20140428',
+ 'creator': 'Brett Weiner',
+ 'thumbnail': r're:https?://\w+\.nyt.com/images/.+\.jpg',
+ 'duration': 419,
+ },
+ }, {
+ 'url': 'http://www.nytimes.com/video/travel/100000003550828/36-hours-in-dubai.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ return self._extract_video(video_id)
+
+
+class NYTimesArticleIE(NYTimesBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?nytimes\.com/\d{4}/\d{2}/\d{2}/(?!books|podcasts)[^/?#]+/(?:\w+/)?(?P<id>[^./?#]+)(?:\.html)?'
+ _TESTS = [{
+ 'url': 'http://www.nytimes.com/2015/04/14/business/owner-of-gravity-payments-a-credit-card-processor-is-setting-a-new-minimum-wage-70000-a-year.html?_r=0',
+ 'md5': '3eb5ddb1d6f86254fe4f233826778737',
+ 'info_dict': {
+ 'id': '100000003628438',
+ 'ext': 'mp4',
+ 'title': 'One Company’s New Minimum Wage: $70,000 a Year',
+ 'description': 'md5:89ba9ab67ca767bb92bf823d1f138433',
+ 'timestamp': 1429047468,
+ 'upload_date': '20150414',
+ 'uploader': 'Matthew Williams',
+ 'creator': 'Patricia Cohen',
+ 'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
+ 'duration': 119.0,
+ },
+ }, {
+ # article with audio and no video
+ 'url': 'https://www.nytimes.com/2023/09/29/health/mosquitoes-genetic-engineering.html',
+ 'md5': '2365b3555c8aa7f4dd34ca735ad02e6a',
+ 'info_dict': {
+ 'id': '100000009110381',
+ 'ext': 'mp3',
+ 'title': 'The Gamble: Can Genetically Modified Mosquitoes End Disease?',
+ 'description': 'md5:9ff8b47acbaf7f3ca8c732f5c815be2e',
+ 'timestamp': 1695960700,
+ 'upload_date': '20230929',
+ 'creator': 'Stephanie Nolen, Natalija Gormalova',
+ 'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
+ 'duration': 1322,
+ },
+ }, {
+ 'url': 'https://www.nytimes.com/2023/11/29/business/dealbook/kamala-harris-biden-voters.html',
+ 'md5': '3eb5ddb1d6f86254fe4f233826778737',
+ 'info_dict': {
+ 'id': '100000009202270',
+ 'ext': 'mp4',
+ 'title': 'Kamala Harris Defends Biden Policies, but Says ‘More Work’ Needed to Reach Voters',
+ 'description': 'md5:de4212a7e19bb89e4fb14210ca915f1f',
+ 'timestamp': 1701290997,
+ 'upload_date': '20231129',
+ 'uploader': 'By The New York Times',
+ 'creator': 'Katie Rogers',
+ 'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
+ 'duration': 97.631,
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ }, {
+ # multiple videos in the same article
+ 'url': 'https://www.nytimes.com/2023/12/02/business/air-traffic-controllers-safety.html',
+ 'info_dict': {
+ 'id': 'air-traffic-controllers-safety',
+ 'title': 'Drunk and Asleep on the Job: Air Traffic Controllers Pushed to the Brink',
+ 'description': 'md5:549e5a5e935bf7d048be53ba3d2c863d',
+ 'upload_date': '20231202',
+ 'creator': 'Emily Steel, Sydney Ember',
+ 'timestamp': 1701511264,
+ },
+ 'playlist_count': 3,
+ }, {
+ 'url': 'https://www.nytimes.com/2023/12/02/business/media/netflix-squid-game-challenge.html',
+ 'only_matching': True,
+ }]
+
+ def _extract_content_from_block(self, block):
+ details = traverse_obj(block, {
+ 'id': ('sourceId', {str}),
+ 'uploader': ('bylines', ..., 'renderedRepresentation', {str}),
+ 'duration': (None, (('duration', {lambda x: float_or_none(x, scale=1000)}), ('length', {int_or_none}))),
+ 'timestamp': ('firstPublished', {parse_iso8601}),
+ 'series': ('podcastSeries', {str}),
+ }, get_all=False)
+
+ formats, subtitles = self._extract_formats_and_subtitles(details.get('id'), block)
+ # audio articles will have an url and no formats
+ url = traverse_obj(block, ('fileUrl', {url_or_none}))
+ if not formats and url:
+ formats.append({'url': url, 'vcodec': 'none'})
+
+ return {
+ **details,
+ 'thumbnails': self._extract_thumbnails(traverse_obj(
+ block, ('promotionalMedia', 'crops', ..., 'renditions', ...))),
+ 'formats': formats,
+ 'subtitles': subtitles
+ }
+
+ def _real_extract(self, url):
+ page_id = self._match_id(url)
+ webpage = self._download_webpage(url, page_id)
+ art_json = self._search_json(
+ r'window\.__preloadedData\s*=', webpage, 'media details', page_id,
+ transform_source=lambda x: x.replace('undefined', 'null'))['initialData']['data']['article']
+
+ blocks = traverse_obj(art_json, (
+ 'sprinkledBody', 'content', ..., ('ledeMedia', None),
+ lambda _, v: v['__typename'] in ('Video', 'Audio')))
+ if not blocks:
+ raise ExtractorError('Unable to extract any media blocks from webpage')
+
+ common_info = {
+ 'title': remove_end(self._html_extract_title(webpage), ' - The New York Times'),
+ 'description': traverse_obj(art_json, (
+ 'sprinkledBody', 'content', ..., 'summary', 'content', ..., 'text', {str}),
+ get_all=False) or self._html_search_meta(['og:description', 'twitter:description'], webpage),
+ 'timestamp': traverse_obj(art_json, ('firstPublished', {parse_iso8601})),
+ 'creator': ', '.join(
+ traverse_obj(art_json, ('bylines', ..., 'creators', ..., 'displayName'))), # TODO: change to 'creators' (list)
+ 'thumbnails': self._extract_thumbnails(traverse_obj(
+ art_json, ('promotionalMedia', 'assetCrops', ..., 'renditions', ...))),
+ }
+
+ entries = []
+ for block in blocks:
+ entries.append(merge_dicts(self._extract_content_from_block(block), common_info))
+
+ if len(entries) > 1:
+ return self.playlist_result(entries, page_id, **common_info)
+
+ return {
+ 'id': page_id,
+ **entries[0],
+ }
+
+
+class NYTimesCookingIE(NYTimesBaseIE):
+ IE_NAME = 'NYTimesCookingGuide'
+ _VALID_URL = r'https?://cooking\.nytimes\.com/guides/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://cooking.nytimes.com/guides/13-how-to-cook-a-turkey',
+ 'info_dict': {
+ 'id': '13-how-to-cook-a-turkey',
+ 'title': 'How to Cook a Turkey',
+ 'description': 'md5:726cfd3f9b161bdf5c279879e8050ca0',
+ },
+ 'playlist_count': 2,
+ }, {
+ # single video example
+ 'url': 'https://cooking.nytimes.com/guides/50-how-to-make-mac-and-cheese',
+ 'md5': '64415805fe0b8640fce6b0b9def5989a',
+ 'info_dict': {
+ 'id': '100000005835845',
+ 'ext': 'mp4',
+ 'title': 'How to Make Mac and Cheese',
+ 'description': 'md5:b8f2f33ec1fb7523b21367147c9594f1',
+ 'timestamp': 1522950315,
+ 'upload_date': '20180405',
+ 'duration': 9.51,
+ 'creator': 'Alison Roman',
+ 'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
+ },
+ }, {
+ 'url': 'https://cooking.nytimes.com/guides/20-how-to-frost-a-cake',
+ 'md5': '64415805fe0b8640fce6b0b9def5989a',
+ 'info_dict': {
+ 'id': '20-how-to-frost-a-cake',
+ 'title': 'How to Frost a Cake',
+ 'description': 'md5:a31fe3b98a8ce7b98aae097730c269cd',
+ },
+ 'playlist_count': 8,
+ }]
+
+ def _real_extract(self, url):
+ page_id = self._match_id(url)
+ webpage = self._download_webpage(url, page_id)
+ title = self._html_search_meta(['og:title', 'twitter:title'], webpage)
+ description = self._html_search_meta(['og:description', 'twitter:description'], webpage)
+
+ lead_video_id = self._search_regex(
+ r'data-video-player-id="(\d+)"></div>', webpage, 'lead video')
+ media_ids = traverse_obj(
+ get_elements_html_by_class('video-item', webpage), (..., {extract_attributes}, 'data-video-id'))
+
+ if media_ids:
+ media_ids.append(lead_video_id)
+ return self.playlist_result(
+ [self._extract_video(media_id) for media_id in media_ids], page_id, title, description)
+
+ return {
+ **self._extract_video(lead_video_id),
+ 'title': title,
+ 'description': description,
+ 'creator': self._search_regex( # TODO: change to 'creators'
+ r'<span itemprop="author">([^<]+)</span></p>', webpage, 'author', default=None),
+ }
+
+
+class NYTimesCookingRecipeIE(InfoExtractor):
+ _VALID_URL = r'https?://cooking\.nytimes\.com/recipes/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://cooking.nytimes.com/recipes/1017817-cranberry-curd-tart',
+ 'md5': '579e83bbe8e61e9de67f80edba8a78a8',
+ 'info_dict': {
+ 'id': '1017817',
+ 'ext': 'mp4',
+ 'title': 'Cranberry Curd Tart',
+ 'description': 'md5:ad77a3fc321db636256d4343c5742152',
+ 'timestamp': 1447804800,
+ 'upload_date': '20151118',
+ 'creator': 'David Tanis',
+ 'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
+ },
+ }, {
+ 'url': 'https://cooking.nytimes.com/recipes/1024781-neapolitan-checkerboard-cookies',
+ 'md5': '58df35998241dcf0620e99e646331b42',
+ 'info_dict': {
+ 'id': '1024781',
+ 'ext': 'mp4',
+ 'title': 'Neapolitan Checkerboard Cookies',
+ 'description': 'md5:ba12394c585ababea951cb6d2fcc6631',
+ 'timestamp': 1701302400,
+ 'upload_date': '20231130',
+ 'creator': 'Sue Li',
+ 'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
+ },
+ }, {
+ 'url': 'https://cooking.nytimes.com/recipes/1019516-overnight-oats',
+ 'md5': '2fe7965a3adc899913b8e25ada360823',
+ 'info_dict': {
+ 'id': '1019516',
+ 'ext': 'mp4',
+ 'timestamp': 1546387200,
+ 'description': 'md5:8856ce10239161bd2596ac335b9f9bfb',
+ 'upload_date': '20190102',
+ 'title': 'Overnight Oats',
+ 'creator': 'Genevieve Ko',
+ 'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
+ },
+ }]
+
+ def _real_extract(self, url):
+ page_id = self._match_id(url)
+ webpage = self._download_webpage(url, page_id)
+ recipe_data = self._search_nextjs_data(webpage, page_id)['props']['pageProps']['recipe']
+
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ recipe_data['videoSrc'], page_id, 'mp4', m3u8_id='hls')
+
+ return {
+ **traverse_obj(recipe_data, {
+ 'id': ('id', {str_or_none}),
+ 'title': ('title', {str}),
+ 'description': ('topnote', {clean_html}),
+ 'timestamp': ('publishedAt', {int_or_none}),
+ 'creator': ('contentAttribution', 'cardByline', {str}),
+ }),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnails': [{'url': thumb_url} for thumb_url in traverse_obj(
+ recipe_data, ('image', 'crops', 'recipe', ..., {url_or_none}))],
+ }
diff --git a/yt_dlp/extractor/nzherald.py b/yt_dlp/extractor/nzherald.py
new file mode 100644
index 0000000..062f9a8
--- /dev/null
+++ b/yt_dlp/extractor/nzherald.py
@@ -0,0 +1,123 @@
+import json
+
+from .brightcove import BrightcoveNewIE
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ traverse_obj
+)
+
+
+class NZHeraldIE(InfoExtractor):
+ IE_NAME = 'nzherald'
+ _VALID_URL = r'https?://(?:www\.)?nzherald\.co\.nz/[\w\/-]+\/(?P<id>[A-Z0-9]+)'
+ _TESTS = [
+ {
+ # Video accessible under 'video' key
+ 'url': 'https://www.nzherald.co.nz/nz/queen-elizabeth-death-nz-public-holiday-announced-for-september-26/CEOPBSXO2JDCLNK3H7E3BIE2FA/',
+ 'info_dict': {
+ 'id': '6312191736112',
+ 'ext': 'mp4',
+ 'title': 'Focus: PM holds post-Cabinet press conference',
+ 'duration': 238.08,
+ 'upload_date': '20220912',
+ 'uploader_id': '1308227299001',
+ 'timestamp': 1662957159,
+ 'tags': [],
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'description': 'md5:2f17713fcbfcfbe38bb9e7dfccbb0f2e',
+ }
+ }, {
+ # Webpage has brightcove embed player url
+ 'url': 'https://www.nzherald.co.nz/travel/pencarrow-coastal-trail/HDVTPJEPP46HJ2UEMK4EGD2DFI/',
+ 'info_dict': {
+ 'id': '6261791733001',
+ 'ext': 'mp4',
+ 'title': 'Pencarrow Coastal Trail',
+ 'timestamp': 1625102897,
+ 'upload_date': '20210701',
+ 'uploader_id': '1308227299001',
+ 'description': 'md5:d361aaa0c6498f7ac1bc4fc0a0aec1e4',
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'tags': ['travel', 'video'],
+ 'duration': 43.627,
+ }
+ }, {
+ # two video embeds of the same video
+ 'url': 'https://www.nzherald.co.nz/nz/truck-driver-captured-cutting-off-motorist-on-state-highway-1-in-canterbury/FIHNJB7PLLPHWQPK4S7ZBDUC4I/',
+ 'info_dict': {
+ 'id': '6251114530001',
+ 'ext': 'mp4',
+ 'title': 'Truck travelling north from Rakaia runs car off road',
+ 'timestamp': 1619730509,
+ 'upload_date': '20210429',
+ 'uploader_id': '1308227299001',
+ 'description': 'md5:4cae7dfb7613ac4c73b9e73a75c6b5d7'
+ },
+ 'skip': 'video removed',
+ }, {
+ # customVideo embed requiring additional API call
+ 'url': 'https://www.nzherald.co.nz/nz/politics/reserve-bank-rejects-political-criticisms-stands-by-review/2JO5Q4WLZRCBBNWTLACZMOP4RA/',
+ 'info_dict': {
+ 'id': '6315123873112',
+ 'ext': 'mp4',
+ 'timestamp': 1667862725,
+ 'title': 'Focus: Luxon on re-appointment of Reserve Bank governor Adrian Orr',
+ 'upload_date': '20221107',
+ 'description': 'md5:df2f1f7033a8160c66e28e4743f5d934',
+ 'uploader_id': '1308227299001',
+ 'tags': ['video', 'nz herald focus', 'politics', 'politics videos'],
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'duration': 99.584,
+ }
+ }, {
+ 'url': 'https://www.nzherald.co.nz/kahu/kaupapa-companies-my-taiao-supporting-maori-in-study-and-business/PQBO2J25WCG77VGRX7W7BVYEAI/',
+ 'only_matching': True
+ }, {
+ 'url': 'https://nzherald.co.nz/the-country/video/focus-nzs-first-mass-covid-19-vaccination-event/N5I7IL3BRFLZSD33TLDLYJDGK4/',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.nzherald.co.nz/the-vision-is-clear/news/tvic-damian-roper-planting-trees-an-addiction/AN2AAEPNRK5VLISDWQAJZB6ATQ',
+ 'only_matching': True
+ }
+ ]
+
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1308227299001/S1BXZn8t_default/index.html?videoId=%s'
+
+ def _extract_bc_embed_url(self, webpage):
+ """The initial webpage may include the brightcove player embed url"""
+ bc_url = BrightcoveNewIE._extract_url(self, webpage)
+ return bc_url or self._search_regex(
+ r'(?:embedUrl)\"\s*:\s*\"(?P<embed_url>%s)' % BrightcoveNewIE._VALID_URL,
+ webpage, 'embed url', default=None, group='embed_url')
+
+ def _real_extract(self, url):
+ article_id = self._match_id(url)
+ webpage = self._download_webpage(url, article_id)
+ bc_url = self._extract_bc_embed_url(webpage)
+
+ if not bc_url:
+ fusion_metadata = self._parse_json(
+ self._search_regex(r'Fusion\.globalContent\s*=\s*({.+?})\s*;', webpage, 'fusion metadata'), article_id)
+
+ video_metadata = fusion_metadata.get('video')
+ if not video_metadata:
+ custom_video_id = traverse_obj(fusion_metadata, ('customVideo', 'embed', 'id'), expected_type=str)
+ if custom_video_id:
+ video_metadata = self._download_json(
+ 'https://www.nzherald.co.nz/pf/api/v3/content/fetch/full-content-by-id', article_id,
+ query={'query': json.dumps({'id': custom_video_id, 'site': 'nzh'}), '_website': 'nzh'})
+ bc_video_id = traverse_obj(
+ video_metadata or fusion_metadata, # fusion metadata is the video metadata for video-only pages
+ 'brightcoveId', ('content_elements', ..., 'referent', 'id'),
+ get_all=False, expected_type=compat_str)
+
+ if not bc_video_id:
+ if isinstance(video_metadata, dict) and len(video_metadata) == 0:
+ raise ExtractorError('This article does not have a video.', expected=True)
+ else:
+ raise ExtractorError('Failed to extract brightcove video id')
+ bc_url = self.BRIGHTCOVE_URL_TEMPLATE % bc_video_id
+
+ return self.url_result(bc_url, 'BrightcoveNew')
diff --git a/yt_dlp/extractor/nzonscreen.py b/yt_dlp/extractor/nzonscreen.py
new file mode 100644
index 0000000..bf2dbca
--- /dev/null
+++ b/yt_dlp/extractor/nzonscreen.py
@@ -0,0 +1,93 @@
+from .common import InfoExtractor
+from ..utils import (
+ float_or_none,
+ int_or_none,
+ remove_end,
+ strip_or_none,
+ traverse_obj,
+ url_or_none,
+)
+
+
+class NZOnScreenIE(InfoExtractor):
+ _VALID_URL = r'^https?://www\.nzonscreen\.com/title/(?P<id>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'https://www.nzonscreen.com/title/shoop-shoop-diddy-wop-cumma-cumma-wang-dang-1982',
+ 'info_dict': {
+ 'id': '726ed6585c6bfb30',
+ 'ext': 'mp4',
+ 'format_id': 'hi',
+ 'display_id': 'shoop-shoop-diddy-wop-cumma-cumma-wang-dang-1982',
+ 'title': 'Monte Video - "Shoop Shoop, Diddy Wop"',
+ 'description': 'Monte Video - "Shoop Shoop, Diddy Wop"',
+ 'alt_title': 'Shoop Shoop Diddy Wop Cumma Cumma Wang Dang | Music Video',
+ 'thumbnail': r're:https://www\.nzonscreen\.com/content/images/.+\.jpg',
+ 'duration': 158,
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://www.nzonscreen.com/title/shes-a-mod-1964?collection=best-of-the-60s',
+ 'info_dict': {
+ 'id': '3dbe709ff03c36f1',
+ 'ext': 'mp4',
+ 'format_id': 'hi',
+ 'display_id': 'shes-a-mod-1964',
+ 'title': 'Ray Columbus - \'She\'s A Mod\'',
+ 'description': 'Ray Columbus - \'She\'s A Mod\'',
+ 'alt_title': 'She\'s a Mod | Music Video',
+ 'thumbnail': r're:https://www\.nzonscreen\.com/content/images/.+\.jpg',
+ 'duration': 130,
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://www.nzonscreen.com/title/puha-and-pakeha-1968/overview',
+ 'info_dict': {
+ 'id': 'f86342544385ad8a',
+ 'ext': 'mp4',
+ 'format_id': 'hi',
+ 'display_id': 'puha-and-pakeha-1968',
+ 'title': 'Looking At New Zealand - Puha and Pakeha',
+ 'alt_title': 'Looking at New Zealand - \'Pūhā and Pākehā\' | Television',
+ 'description': 'An excerpt from this television programme.',
+ 'duration': 212,
+ 'thumbnail': r're:https://www\.nzonscreen\.com/content/images/.+\.jpg',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }]
+
+ def _extract_formats(self, playlist):
+ for quality, (id_, url) in enumerate(traverse_obj(
+ playlist, ('h264', {'lo': 'lo_res', 'hi': 'hi_res'}), expected_type=url_or_none).items()):
+ yield {
+ 'url': url,
+ 'format_id': id_,
+ 'ext': 'mp4',
+ 'quality': quality,
+ 'height': int_or_none(playlist.get('height')) if id_ == 'hi' else None,
+ 'width': int_or_none(playlist.get('width')) if id_ == 'hi' else None,
+ 'filesize_approx': float_or_none(traverse_obj(playlist, ('h264', f'{id_}_res_mb')), invscale=1024**2),
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ playlist = self._parse_json(self._html_search_regex(
+ r'data-video-config=\'([^\']+)\'', webpage, 'media data'), video_id)
+
+ return {
+ 'id': playlist['uuid'],
+ 'display_id': video_id,
+ 'title': strip_or_none(playlist.get('label')),
+ 'description': strip_or_none(playlist.get('description')),
+ 'alt_title': strip_or_none(remove_end(
+ self._html_extract_title(webpage, default=None) or self._og_search_title(webpage),
+ ' | NZ On Screen')),
+ 'thumbnail': traverse_obj(playlist, ('thumbnail', 'path')),
+ 'duration': float_or_none(playlist.get('duration')),
+ 'formats': list(self._extract_formats(playlist)),
+ 'http_headers': {
+ 'Referer': 'https://www.nzonscreen.com/',
+ 'Origin': 'https://www.nzonscreen.com/',
+ }
+ }
diff --git a/yt_dlp/extractor/nzz.py b/yt_dlp/extractor/nzz.py
new file mode 100644
index 0000000..ac3b731
--- /dev/null
+++ b/yt_dlp/extractor/nzz.py
@@ -0,0 +1,40 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ extract_attributes,
+)
+
+
+class NZZIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?nzz\.ch/(?:[^/]+/)*[^/?#]+-ld\.(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://www.nzz.ch/zuerich/gymizyte/gymizyte-schreiben-schueler-heute-noch-diktate-ld.9153',
+ 'info_dict': {
+ 'id': '9153',
+ },
+ 'playlist_mincount': 6,
+ }, {
+ 'url': 'https://www.nzz.ch/video/nzz-standpunkte/cvp-auf-der-suche-nach-dem-mass-der-mitte-ld.1368112',
+ 'info_dict': {
+ 'id': '1368112',
+ },
+ 'playlist_count': 1,
+ }]
+
+ def _real_extract(self, url):
+ page_id = self._match_id(url)
+ webpage = self._download_webpage(url, page_id)
+
+ entries = []
+ for player_element in re.findall(
+ r'(<[^>]+class="kalturaPlayer[^"]*"[^>]*>)', webpage):
+ player_params = extract_attributes(player_element)
+ if player_params.get('data-type') not in ('kaltura_singleArticle',):
+ self.report_warning('Unsupported player type')
+ continue
+ entry_id = player_params['data-id']
+ entries.append(self.url_result(
+ 'kaltura:1750922:' + entry_id, 'Kaltura', entry_id))
+
+ return self.playlist_result(entries, page_id)
diff --git a/yt_dlp/extractor/odkmedia.py b/yt_dlp/extractor/odkmedia.py
new file mode 100644
index 0000000..b852160
--- /dev/null
+++ b/yt_dlp/extractor/odkmedia.py
@@ -0,0 +1,105 @@
+import json
+
+from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ ExtractorError,
+ GeoRestrictedError,
+ float_or_none,
+ traverse_obj,
+ try_call
+)
+
+
+class OnDemandChinaEpisodeIE(InfoExtractor):
+ _VALID_URL = r'https?://www\.ondemandchina\.com/\w+/watch/(?P<series>[\w-]+)/(?P<id>ep-(?P<ep>\d+))'
+ _TESTS = [{
+ 'url': 'https://www.ondemandchina.com/en/watch/together-against-covid-19/ep-1',
+ 'info_dict': {
+ 'id': '264394',
+ 'ext': 'mp4',
+ 'duration': 3256.88,
+ 'title': 'EP 1 The Calling',
+ 'alt_title': '第1集 令出如山',
+ 'thumbnail': 'https://d2y2efdi5wgkcl.cloudfront.net/fit-in/256x256/media-io/2020/9/11/image.d9816e81.jpg',
+ 'description': '疫情严峻,党政军民学、东西南北中协同应考',
+ 'tags': ['Social Humanities', 'Documentary', 'Medical', 'Social'],
+ }
+ }]
+
+ _QUERY = '''
+ query Episode($programSlug: String!, $episodeNumber: Int!) {
+ episode(
+ programSlug: $programSlug
+ episodeNumber: $episodeNumber
+ kind: "series"
+ part: null
+ ) {
+ id
+ title
+ titleEn
+ titleKo
+ titleZhHans
+ titleZhHant
+ synopsis
+ synopsisEn
+ synopsisKo
+ synopsisZhHans
+ synopsisZhHant
+ videoDuration
+ images {
+ thumbnail
+ }
+ }
+ }'''
+
+ def _real_extract(self, url):
+ program_slug, display_id, ep_number = self._match_valid_url(url).group('series', 'id', 'ep')
+ webpage = self._download_webpage(url, display_id)
+
+ video_info = self._download_json(
+ 'https://odc-graphql.odkmedia.io/graphql', display_id,
+ headers={'Content-type': 'application/json'},
+ data=json.dumps({
+ 'operationName': 'Episode',
+ 'query': self._QUERY,
+ 'variables': {
+ 'programSlug': program_slug,
+ 'episodeNumber': int(ep_number),
+ },
+ }).encode())['data']['episode']
+
+ try:
+ source_json = self._download_json(
+ f'https://odkmedia.io/odc/api/v2/playback/{video_info["id"]}/', display_id,
+ headers={'Authorization': '', 'service-name': 'odc'})
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError):
+ error_data = self._parse_json(e.cause.response.read(), display_id)['detail']
+ raise GeoRestrictedError(error_data)
+
+ formats, subtitles = [], {}
+ for source in traverse_obj(source_json, ('sources', ...)):
+ if source.get('type') == 'hls':
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(source.get('url'), display_id)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+ else:
+ self.report_warning(f'Unsupported format {source.get("type")}', display_id)
+
+ return {
+ 'id': str(video_info['id']),
+ 'duration': float_or_none(video_info.get('videoDuration'), 1000),
+ 'thumbnail': (traverse_obj(video_info, ('images', 'thumbnail'))
+ or self._html_search_meta(['og:image', 'twitter:image'], webpage)),
+ 'title': (traverse_obj(video_info, 'title', 'titleEn')
+ or self._html_search_meta(['og:title', 'twitter:title'], webpage)
+ or self._html_extract_title(webpage)),
+ 'alt_title': traverse_obj(video_info, 'titleKo', 'titleZhHans', 'titleZhHant'),
+ 'description': (traverse_obj(
+ video_info, 'synopsisEn', 'synopsisKo', 'synopsisZhHans', 'synopsisZhHant', 'synopisis')
+ or self._html_search_meta(['og:description', 'twitter:description', 'description'], webpage)),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'tags': try_call(lambda: self._html_search_meta('keywords', webpage).split(', '))
+ }
diff --git a/yt_dlp/extractor/odnoklassniki.py b/yt_dlp/extractor/odnoklassniki.py
new file mode 100644
index 0000000..1be45d8
--- /dev/null
+++ b/yt_dlp/extractor/odnoklassniki.py
@@ -0,0 +1,464 @@
+import urllib.parse
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_etree_fromstring,
+ compat_parse_qs,
+ compat_urllib_parse_unquote,
+ compat_urllib_parse_urlparse,
+)
+from ..networking import HEADRequest
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ qualities,
+ smuggle_url,
+ traverse_obj,
+ unescapeHTML,
+ unified_strdate,
+ unsmuggle_url,
+ url_or_none,
+ urlencode_postdata,
+)
+
+
+class OdnoklassnikiIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:(?:www|m|mobile)\.)?
+ (?:odnoklassniki|ok)\.ru/
+ (?:
+ video(?P<embed>embed)?/|
+ web-api/video/moviePlayer/|
+ live/|
+ dk\?.*?st\.mvId=
+ )
+ (?P<id>[\d-]+)
+ '''
+ _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1']
+ _TESTS = [{
+ 'note': 'Coub embedded',
+ 'url': 'http://ok.ru/video/1484130554189',
+ 'info_dict': {
+ 'id': '1keok9',
+ 'ext': 'mp4',
+ 'timestamp': 1545580896,
+ 'view_count': int,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'title': 'Народная забава',
+ 'uploader': 'Nevata',
+ 'upload_date': '20181223',
+ 'age_limit': 0,
+ 'uploader_id': 'nevata.s',
+ 'like_count': int,
+ 'duration': 8.08,
+ 'repost_count': int,
+ },
+ }, {
+ 'note': 'vk.com embedded',
+ 'url': 'https://ok.ru/video/3568183087575',
+ 'info_dict': {
+ 'id': '-165101755_456243749',
+ 'ext': 'mp4',
+ 'uploader_id': '-165101755',
+ 'duration': 132,
+ 'timestamp': 1642869935,
+ 'upload_date': '20220122',
+ 'thumbnail': str,
+ 'title': str,
+ 'uploader': str,
+ },
+ 'skip': 'vk extractor error',
+ }, {
+ # metadata in JSON, webm_dash with Firefox UA
+ 'url': 'http://ok.ru/video/20079905452',
+ 'md5': '8f477d8931c531374a3e36daec617b2c',
+ 'info_dict': {
+ 'id': '20079905452',
+ 'ext': 'webm',
+ 'title': 'Культура меняет нас (прекрасный ролик!))',
+ 'thumbnail': str,
+ 'duration': 100,
+ 'upload_date': '20141207',
+ 'uploader_id': '330537914540',
+ 'uploader': 'Виталий Добровольский',
+ 'like_count': int,
+ 'age_limit': 0,
+ },
+ 'params': {
+ 'format': 'bv[ext=webm]',
+ 'http_headers': {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:102.0) Gecko/20100101 Firefox/102.0'},
+ },
+ }, {
+ # metadataUrl
+ 'url': 'http://ok.ru/video/63567059965189-0?fromTime=5',
+ 'md5': '2bae2f58eefe1b3d26f3926c4a64d2f3',
+ 'info_dict': {
+ 'id': '63567059965189-0',
+ 'ext': 'mp4',
+ 'title': 'Девушка без комплексов ...',
+ 'thumbnail': str,
+ 'duration': 191,
+ 'upload_date': '20150518',
+ 'uploader_id': '534380003155',
+ 'uploader': '☭ Андрей Мещанинов ☭',
+ 'like_count': int,
+ 'age_limit': 0,
+ 'start_time': 5,
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ # YouTube embed (metadataUrl, provider == USER_YOUTUBE)
+ 'url': 'https://ok.ru/video/3952212382174',
+ 'md5': '5fb5f83ce16cb212d6bf887282b5da53',
+ 'info_dict': {
+ 'id': '5axVgHHDBvU',
+ 'ext': 'mp4',
+ 'title': 'Youtube-dl 101: What is it and HOW to use it! Full Download Walkthrough and Guide',
+ 'description': 'md5:b57209eeb9d5c2f20c984dfb58862097',
+ 'uploader': 'Lod Mer',
+ 'uploader_id': '575186401502',
+ 'duration': 1529,
+ 'age_limit': 0,
+ 'upload_date': '20210405',
+ 'comment_count': int,
+ 'live_status': 'not_live',
+ 'view_count': int,
+ 'thumbnail': 'https://i.mycdn.me/i?r=AEHujHvw2RjEbemUCNEorZbxYpb_p_9AcN2FmGik64Krkcmz37YtlY093oAM5-HIEAt7Zi9s0CiBOSDmbngC-I-k&fn=external_8',
+ 'uploader_url': 'https://www.youtube.com/@MrKewlkid94',
+ 'channel_follower_count': int,
+ 'tags': ['youtube-dl', 'youtube playlists', 'download videos', 'download audio'],
+ 'channel_id': 'UCVGtvURtEURYHtJFUegdSug',
+ 'like_count': int,
+ 'availability': 'public',
+ 'channel_url': 'https://www.youtube.com/channel/UCVGtvURtEURYHtJFUegdSug',
+ 'categories': ['Education'],
+ 'playable_in_embed': True,
+ 'channel': 'BornToReact',
+ },
+ }, {
+ # YouTube embed (metadata, provider == USER_YOUTUBE, no metadata.movie.title field)
+ 'url': 'http://ok.ru/video/62036049272859-0',
+ 'info_dict': {
+ 'id': '62036049272859-0',
+ 'ext': 'mp4',
+ 'title': 'МУЗЫКА ДОЖДЯ .',
+ 'description': 'md5:6f1867132bd96e33bf53eda1091e8ed0',
+ 'upload_date': '20120106',
+ 'uploader_id': '473534735899',
+ 'uploader': 'МARINA D',
+ 'age_limit': 0,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'Video has not been found',
+ }, {
+ 'note': 'Only available in mobile webpage',
+ 'url': 'https://m.ok.ru/video/2361249957145',
+ 'info_dict': {
+ 'id': '2361249957145',
+ 'ext': 'mp4',
+ 'title': 'Быковское крещение',
+ 'duration': 3038.181,
+ 'thumbnail': r're:^https?://i\.mycdn\.me/videoPreview\?.+',
+ },
+ }, {
+ 'note': 'subtitles',
+ 'url': 'https://ok.ru/video/4249587550747',
+ 'info_dict': {
+ 'id': '4249587550747',
+ 'ext': 'mp4',
+ 'title': 'Small Country An African Childhood (2020) (1080p) +subtitle',
+ 'uploader': 'Sunflower Movies',
+ 'uploader_id': '595802161179',
+ 'upload_date': '20220816',
+ 'duration': 6728,
+ 'age_limit': 0,
+ 'thumbnail': r're:^https?://i\.mycdn\.me/videoPreview\?.+',
+ 'like_count': int,
+ 'subtitles': dict,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.ok.ru/video/20648036891',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.ok.ru/videoembed/20648036891',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://m.ok.ru/video/20079905452',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://mobile.ok.ru/video/20079905452',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.ok.ru/live/484531969818',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://m.ok.ru/dk?st.cmd=movieLayer&st.discId=863789452017&st.retLoc=friend&st.rtu=%2Fdk%3Fst.cmd%3DfriendMovies%26st.mode%3Down%26st.mrkId%3D%257B%2522uploadedMovieMarker%2522%253A%257B%2522marker%2522%253A%25221519410114503%2522%252C%2522hasMore%2522%253Atrue%257D%252C%2522sharedMovieMarker%2522%253A%257B%2522marker%2522%253Anull%252C%2522hasMore%2522%253Afalse%257D%257D%26st.friendId%3D561722190321%26st.frwd%3Don%26_prevCmd%3DfriendMovies%26tkn%3D7257&st.discType=MOVIE&st.mvId=863789452017&_prevCmd=friendMovies&tkn=3648#lst#',
+ 'only_matching': True,
+ }, {
+ # Paid video
+ 'url': 'https://ok.ru/video/954886983203',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://ok.ru/videoembed/2932705602075',
+ 'info_dict': {
+ 'id': '2932705602075',
+ 'ext': 'mp4',
+ 'thumbnail': 'https://i.mycdn.me/videoPreview?id=1369902483995&type=37&idx=2&tkn=fqlnoQD_xwq5ovIlKfgNyU08qmM&fn=external_8',
+ 'title': 'Boosty для тебя!',
+ 'uploader_id': '597811038747',
+ 'like_count': 0,
+ 'duration': 35,
+ },
+ }]
+
+ _WEBPAGE_TESTS = [{
+ 'url': 'https://boosty.to/ikakprosto/posts/56cedaca-b56a-4dfd-b3ed-98c79cfa0167',
+ 'info_dict': {
+ 'id': '3950343629563',
+ 'ext': 'mp4',
+ 'thumbnail': 'https://i.mycdn.me/videoPreview?id=2776238394107&type=37&idx=11&tkn=F3ejkUFcpuI4DnMRxrDGcH5YcmM&fn=external_8',
+ 'title': 'Заяц Бусти.mp4',
+ 'uploader_id': '571368965883',
+ 'like_count': 0,
+ 'duration': 10444,
+ },
+ 'skip': 'Site no longer embeds',
+ }]
+
+ def _clear_cookies(self, cdn_url):
+ # Direct http downloads will fail if CDN cookies are set
+ # so we need to reset them after each format extraction
+ self.cookiejar.clear(domain='.mycdn.me')
+ self.cookiejar.clear(domain=urllib.parse.urlparse(cdn_url).hostname)
+
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ for x in super()._extract_embed_urls(url, webpage):
+ yield smuggle_url(x, {'referrer': url})
+
+ def _real_extract(self, url):
+ try:
+ return self._extract_desktop(url)
+ except ExtractorError as e:
+ try:
+ return self._extract_mobile(url)
+ except ExtractorError:
+ # error message of desktop webpage is in English
+ raise e
+
+ def _extract_desktop(self, url):
+ start_time = int_or_none(compat_parse_qs(
+ compat_urllib_parse_urlparse(url).query).get('fromTime', [None])[0])
+
+ url, smuggled = unsmuggle_url(url, {})
+ video_id, is_embed = self._match_valid_url(url).group('id', 'embed')
+ mode = 'videoembed' if is_embed else 'video'
+
+ webpage = self._download_webpage(
+ f'https://ok.ru/{mode}/{video_id}', video_id,
+ note='Downloading desktop webpage',
+ headers={'Referer': smuggled['referrer']} if smuggled.get('referrer') else {})
+
+ error = self._search_regex(
+ r'[^>]+class="vp_video_stub_txt"[^>]*>([^<]+)<',
+ webpage, 'error', default=None)
+ # Direct link from boosty
+ if (error == 'The author of this video has not been found or is blocked'
+ and not smuggled.get('referrer') and mode == 'videoembed'):
+ return self._extract_desktop(smuggle_url(url, {'referrer': 'https://boosty.to'}))
+ elif error:
+ raise ExtractorError(error, expected=True)
+
+ player = self._parse_json(
+ unescapeHTML(self._search_regex(
+ r'data-options=(?P<quote>["\'])(?P<player>{.+?%s.+?})(?P=quote)' % video_id,
+ webpage, 'player', group='player')),
+ video_id)
+
+ # embedded external player
+ if player.get('isExternalPlayer') and player.get('url'):
+ return self.url_result(player['url'])
+
+ flashvars = player['flashvars']
+
+ metadata = flashvars.get('metadata')
+ if metadata:
+ metadata = self._parse_json(metadata, video_id)
+ else:
+ data = {}
+ st_location = flashvars.get('location')
+ if st_location:
+ data['st.location'] = st_location
+ metadata = self._download_json(
+ compat_urllib_parse_unquote(flashvars['metadataUrl']),
+ video_id, 'Downloading metadata JSON',
+ data=urlencode_postdata(data))
+
+ movie = metadata['movie']
+
+ # Some embedded videos may not contain title in movie dict (e.g.
+ # http://ok.ru/video/62036049272859-0) thus we allow missing title
+ # here and it's going to be extracted later by an extractor that
+ # will process the actual embed.
+ provider = metadata.get('provider')
+ title = movie['title'] if provider == 'UPLOADED_ODKL' else movie.get('title')
+
+ thumbnail = movie.get('poster')
+ duration = int_or_none(movie.get('duration'))
+
+ author = metadata.get('author', {})
+ uploader_id = author.get('id')
+ uploader = author.get('name')
+
+ upload_date = unified_strdate(self._html_search_meta(
+ 'ya:ovs:upload_date', webpage, 'upload date', default=None))
+
+ age_limit = None
+ adult = self._html_search_meta(
+ 'ya:ovs:adult', webpage, 'age limit', default=None)
+ if adult:
+ age_limit = 18 if adult == 'true' else 0
+
+ like_count = int_or_none(metadata.get('likeCount'))
+
+ subtitles = {}
+ for sub in traverse_obj(metadata, ('movie', 'subtitleTracks', ...), expected_type=dict):
+ sub_url = sub.get('url')
+ if not sub_url:
+ continue
+ subtitles.setdefault(sub.get('language') or 'en', []).append({
+ 'url': sub_url,
+ 'ext': 'vtt',
+ })
+
+ info = {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'upload_date': upload_date,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'like_count': like_count,
+ 'age_limit': age_limit,
+ 'start_time': start_time,
+ 'subtitles': subtitles,
+ }
+
+ # pladform
+ if provider == 'OPEN_GRAPH':
+ info.update({
+ '_type': 'url_transparent',
+ 'url': movie['contentId'],
+ })
+ return info
+
+ if provider == 'USER_YOUTUBE':
+ info.update({
+ '_type': 'url_transparent',
+ 'url': movie['contentId'],
+ })
+ return info
+
+ assert title
+ if provider == 'LIVE_TV_APP':
+ info['title'] = title
+
+ quality = qualities(('4', '0', '1', '2', '3', '5', '6', '7'))
+
+ formats = [{
+ 'url': f['url'],
+ 'ext': 'mp4',
+ 'format_id': f.get('name'),
+ } for f in traverse_obj(metadata, ('videos', lambda _, v: url_or_none(v['url'])))]
+
+ m3u8_url = traverse_obj(metadata, 'hlsManifestUrl', 'ondemandHls')
+ if m3u8_url:
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ self._clear_cookies(m3u8_url)
+
+ for mpd_id, mpd_key in [('dash', 'ondemandDash'), ('webm', 'metadataWebmUrl')]:
+ mpd_url = metadata.get(mpd_key)
+ if mpd_url:
+ formats.extend(self._extract_mpd_formats(
+ mpd_url, video_id, mpd_id=mpd_id, fatal=False))
+ self._clear_cookies(mpd_url)
+
+ dash_manifest = metadata.get('metadataEmbedded')
+ if dash_manifest:
+ formats.extend(self._parse_mpd_formats(
+ compat_etree_fromstring(dash_manifest), 'mpd'))
+
+ for fmt in formats:
+ fmt_type = self._search_regex(
+ r'\btype[/=](\d)', fmt['url'],
+ 'format type', default=None)
+ if fmt_type:
+ fmt['quality'] = quality(fmt_type)
+
+ # Live formats
+ m3u8_url = metadata.get('hlsMasterPlaylistUrl')
+ if m3u8_url:
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
+ self._clear_cookies(m3u8_url)
+ rtmp_url = metadata.get('rtmpUrl')
+ if rtmp_url:
+ formats.append({
+ 'url': rtmp_url,
+ 'format_id': 'rtmp',
+ 'ext': 'flv',
+ })
+
+ if not formats:
+ payment_info = metadata.get('paymentInfo')
+ if payment_info:
+ self.raise_no_formats('This video is paid, subscribe to download it', expected=True)
+
+ info['formats'] = formats
+ return info
+
+ def _extract_mobile(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'http://m.ok.ru/video/%s' % video_id, video_id,
+ note='Downloading mobile webpage')
+
+ error = self._search_regex(
+ r'видео</a>\s*<div\s+class="empty">(.+?)</div>',
+ webpage, 'error', default=None)
+ if error:
+ raise ExtractorError(error, expected=True)
+
+ json_data = self._search_regex(
+ r'data-video="(.+?)"', webpage, 'json data')
+ json_data = self._parse_json(unescapeHTML(json_data), video_id) or {}
+
+ redirect_url = self._request_webpage(HEADRequest(
+ json_data['videoSrc']), video_id, 'Requesting download URL').url
+ self._clear_cookies(redirect_url)
+
+ return {
+ 'id': video_id,
+ 'title': json_data.get('videoName'),
+ 'duration': float_or_none(json_data.get('videoDuration'), scale=1000),
+ 'thumbnail': json_data.get('videoPosterSrc'),
+ 'formats': [{
+ 'format_id': 'mobile',
+ 'url': redirect_url,
+ 'ext': 'mp4',
+ }]
+ }
diff --git a/yt_dlp/extractor/oftv.py b/yt_dlp/extractor/oftv.py
new file mode 100644
index 0000000..4cac518
--- /dev/null
+++ b/yt_dlp/extractor/oftv.py
@@ -0,0 +1,54 @@
+from .common import InfoExtractor
+from .zype import ZypeIE
+from ..utils import traverse_obj
+
+
+class OfTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?of\.tv/video/(?P<id>\w+)'
+ _TESTS = [{
+ 'url': 'https://of.tv/video/627d7d95b353db0001dadd1a',
+ 'md5': 'cb9cd5db3bb9ee0d32bfd7e373d6ef0a',
+ 'info_dict': {
+ 'id': '627d7d95b353db0001dadd1a',
+ 'ext': 'mp4',
+ 'title': 'E1: Jacky vs Eric',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'average_rating': 0,
+ 'description': 'md5:dd16e3e2a8d27d922e7a989f85986853',
+ 'display_id': '',
+ 'duration': 1423,
+ 'timestamp': 1652391300,
+ 'upload_date': '20220512',
+ 'view_count': 0,
+ 'creator': 'This is Fire'
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ info = next(ZypeIE.extract_from_webpage(self._downloader, url, webpage))
+ info['_type'] = 'url_transparent'
+ info['creator'] = self._search_regex(r'<a[^>]+class=\"creator-name\"[^>]+>([^<]+)', webpage, 'creator')
+ return info
+
+
+class OfTVPlaylistIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?of\.tv/creators/(?P<id>[a-zA-Z0-9-]+)/?(?:$|[?#])'
+ _TESTS = [{
+ 'url': 'https://of.tv/creators/this-is-fire/',
+ 'playlist_count': 8,
+ 'info_dict': {
+ 'id': 'this-is-fire'
+ }
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ webpage = self._download_webpage(url, playlist_id)
+
+ json_match = self._search_json(
+ r'var\s*remaining_videos\s*=', webpage, 'oftv playlists', playlist_id, contains_pattern=r'\[.+\]')
+
+ return self.playlist_from_matches(
+ traverse_obj(json_match, (..., 'discovery_url')), playlist_id)
diff --git a/yt_dlp/extractor/oktoberfesttv.py b/yt_dlp/extractor/oktoberfesttv.py
new file mode 100644
index 0000000..e0ac856
--- /dev/null
+++ b/yt_dlp/extractor/oktoberfesttv.py
@@ -0,0 +1,44 @@
+from .common import InfoExtractor
+
+
+class OktoberfestTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?oktoberfest-tv\.de/[^/]+/[^/]+/video/(?P<id>[^/?#]+)'
+
+ _TEST = {
+ 'url': 'http://www.oktoberfest-tv.de/de/kameras/video/hb-zelt',
+ 'info_dict': {
+ 'id': 'hb-zelt',
+ 'ext': 'mp4',
+ 'title': 're:^Live-Kamera: Hofbräuzelt [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_regex(
+ r'<h1><strong>.*?</strong>(.*?)</h1>', webpage, 'title')
+
+ clip = self._search_regex(
+ r"clip:\s*\{\s*url:\s*'([^']+)'", webpage, 'clip')
+ ncurl = self._search_regex(
+ r"netConnectionUrl:\s*'([^']+)'", webpage, 'rtmp base')
+ video_url = ncurl + clip
+ thumbnail = self._search_regex(
+ r"canvas:\s*\{\s*backgroundImage:\s*'url\(([^)]+)\)'", webpage,
+ 'thumbnail', fatal=False)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'url': video_url,
+ 'ext': 'mp4',
+ 'is_live': True,
+ 'thumbnail': thumbnail,
+ }
diff --git a/yt_dlp/extractor/olympics.py b/yt_dlp/extractor/olympics.py
new file mode 100644
index 0000000..61d1f40
--- /dev/null
+++ b/yt_dlp/extractor/olympics.py
@@ -0,0 +1,65 @@
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ try_get
+)
+
+
+class OlympicsReplayIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?olympics\.com(?:/tokyo-2020)?/[a-z]{2}/(?:replay|video)/(?P<id>[^/#&?]+)'
+ _TESTS = [{
+ 'url': 'https://olympics.com/fr/video/men-s-109kg-group-a-weightlifting-tokyo-2020-replays',
+ 'info_dict': {
+ 'id': 'f6a0753c-8e6f-4b7d-a435-027054a4f8e9',
+ 'ext': 'mp4',
+ 'title': '+109kg (H) Groupe A - Haltérophilie | Replay de Tokyo 2020',
+ 'upload_date': '20210801',
+ 'timestamp': 1627783200,
+ 'description': 'md5:c66af4a5bc7429dbcc43d15845ff03b3',
+ 'uploader': 'International Olympic Committee',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://olympics.com/tokyo-2020/en/replay/bd242924-4b22-49a5-a846-f1d4c809250d/mens-bronze-medal-match-hun-esp',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+
+ webpage = self._download_webpage(url, id)
+ title = self._html_search_meta(('title', 'og:title', 'twitter:title'), webpage)
+ uuid = self._html_search_meta('episode_uid', webpage)
+ m3u8_url = self._html_search_meta('video_url', webpage)
+ json_ld = self._search_json_ld(webpage, uuid)
+ thumbnails_list = json_ld.get('image')
+ if not thumbnails_list:
+ thumbnails_list = self._html_search_regex(
+ r'["\']image["\']:\s*["\']([^"\']+)["\']', webpage, 'images', default='')
+ thumbnails_list = thumbnails_list.replace('[', '').replace(']', '').split(',')
+ thumbnails_list = [thumbnail.strip() for thumbnail in thumbnails_list]
+ thumbnails = []
+ for thumbnail in thumbnails_list:
+ width_a, height_a, width = self._search_regex(
+ r'/images/image/private/t_(?P<width_a>\d+)-(?P<height_a>\d+)_(?P<width>\d+)/primary/[\W\w\d]+',
+ thumbnail, 'thumb', group=(1, 2, 3), default=(None, None, None))
+ width_a, height_a, width = int_or_none(width_a), int_or_none(height_a), int_or_none(width)
+ thumbnails.append({
+ 'url': thumbnail,
+ 'width': width,
+ 'height': int_or_none(try_get(width, lambda x: x * height_a / width_a))
+ })
+ m3u8_url = self._download_json(
+ f'https://olympics.com/tokenGenerator?url={m3u8_url}', uuid, note='Downloading m3u8 url')
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, uuid, 'mp4', m3u8_id='hls')
+
+ return {
+ 'id': uuid,
+ 'title': title,
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ **json_ld
+ }
diff --git a/yt_dlp/extractor/on24.py b/yt_dlp/extractor/on24.py
new file mode 100644
index 0000000..9a4abc9
--- /dev/null
+++ b/yt_dlp/extractor/on24.py
@@ -0,0 +1,87 @@
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ strip_or_none,
+ try_get,
+ urljoin,
+)
+
+
+class On24IE(InfoExtractor):
+ IE_NAME = 'on24'
+ IE_DESC = 'ON24'
+
+ _VALID_URL = r'''(?x)
+ https?://event\.on24\.com/(?:
+ wcc/r/(?P<id_1>\d{7})/(?P<key_1>[0-9A-F]{32})|
+ eventRegistration/(?:console/EventConsoleApollo|EventLobbyServlet\?target=lobby30)
+ \.jsp\?(?:[^/#?]*&)?eventid=(?P<id_2>\d{7})[^/#?]*&key=(?P<key_2>[0-9A-F]{32})
+ )'''
+
+ _TESTS = [{
+ 'url': 'https://event.on24.com/eventRegistration/console/EventConsoleApollo.jsp?uimode=nextgeneration&eventid=2197467&sessionid=1&key=5DF57BE53237F36A43B478DD36277A84&contenttype=A&eventuserid=305999&playerwidth=1000&playerheight=650&caller=previewLobby&text_language_id=en&format=fhaudio&newConsole=false',
+ 'info_dict': {
+ 'id': '2197467',
+ 'ext': 'wav',
+ 'title': 'Pearson Test of English General/Pearson English International Certificate Teacher Training Guide',
+ 'upload_date': '20200219',
+ 'timestamp': 1582149600.0,
+ 'view_count': int,
+ }
+ }, {
+ 'url': 'https://event.on24.com/wcc/r/2639291/82829018E813065A122363877975752E?mode=login&email=johnsmith@gmail.com',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://event.on24.com/eventRegistration/console/EventConsoleApollo.jsp?&eventid=2639291&sessionid=1&username=&partnerref=&format=fhvideo1&mobile=&flashsupportedmobiledevice=&helpcenter=&key=82829018E813065A122363877975752E&newConsole=true&nxChe=true&newTabCon=true&text_language_id=en&playerwidth=748&playerheight=526&eventuserid=338788762&contenttype=A&mediametricsessionid=384764716&mediametricid=3558192&usercd=369267058&mode=launch',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ event_id = mobj.group('id_1') or mobj.group('id_2')
+ event_key = mobj.group('key_1') or mobj.group('key_2')
+
+ event_data = self._download_json(
+ 'https://event.on24.com/apic/utilApp/EventConsoleCachedServlet',
+ event_id, query={
+ 'eventId': event_id,
+ 'displayProfile': 'player',
+ 'key': event_key,
+ 'contentType': 'A'
+ })
+ event_id = str(try_get(event_data, lambda x: x['presentationLogInfo']['eventid'])) or event_id
+ language = event_data.get('localelanguagecode')
+
+ formats = []
+ for media in event_data.get('mediaUrlInfo', []):
+ media_url = urljoin('https://event.on24.com/media/news/corporatevideo/events/', str(media.get('url')))
+ if not media_url:
+ continue
+ media_type = media.get('code')
+ if media_type == 'fhvideo1':
+ formats.append({
+ 'format_id': 'video',
+ 'url': media_url,
+ 'language': language,
+ 'ext': 'mp4',
+ 'vcodec': 'avc1.640020',
+ 'acodec': 'mp4a.40.2',
+ })
+ elif media_type == 'audio':
+ formats.append({
+ 'format_id': 'audio',
+ 'url': media_url,
+ 'language': language,
+ 'ext': 'wav',
+ 'vcodec': 'none',
+ 'acodec': 'wav'
+ })
+
+ return {
+ 'id': event_id,
+ 'title': strip_or_none(event_data.get('description')),
+ 'timestamp': int_or_none(try_get(event_data, lambda x: x['session']['startdate']), 1000),
+ 'webpage_url': f'https://event.on24.com/wcc/r/{event_id}/{event_key}',
+ 'view_count': event_data.get('registrantcount'),
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/once.py b/yt_dlp/extractor/once.py
new file mode 100644
index 0000000..989f10a
--- /dev/null
+++ b/yt_dlp/extractor/once.py
@@ -0,0 +1,40 @@
+import re
+
+from .common import InfoExtractor
+
+
+class OnceIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
+ _VALID_URL = r'https?://.+?\.unicornmedia\.com/now/(?:ads/vmap/)?[^/]+/[^/]+/(?P<domain_id>[^/]+)/(?P<application_id>[^/]+)/(?:[^/]+/)?(?P<media_item_id>[^/]+)/content\.(?:once|m3u8|mp4)'
+ ADAPTIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/master/playlist/%s/%s/%s/content.m3u8'
+ PROGRESSIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/media/progressive/%s/%s/%s/%s/content.mp4'
+
+ def _extract_once_formats(self, url, http_formats_preference=None):
+ domain_id, application_id, media_item_id = re.match(
+ OnceIE._VALID_URL, url).groups()
+ formats = self._extract_m3u8_formats(
+ self.ADAPTIVE_URL_TEMPLATE % (
+ domain_id, application_id, media_item_id),
+ media_item_id, 'mp4', m3u8_id='hls', fatal=False)
+ progressive_formats = []
+ for adaptive_format in formats:
+ # Prevent advertisement from embedding into m3u8 playlist (see
+ # https://github.com/ytdl-org/youtube-dl/issues/8893#issuecomment-199912684)
+ adaptive_format['url'] = re.sub(
+ r'\badsegmentlength=\d+', r'adsegmentlength=0', adaptive_format['url'])
+ rendition_id = self._search_regex(
+ r'/now/media/playlist/[^/]+/[^/]+/([^/]+)',
+ adaptive_format['url'], 'redition id', default=None)
+ if rendition_id:
+ progressive_format = adaptive_format.copy()
+ progressive_format.update({
+ 'url': self.PROGRESSIVE_URL_TEMPLATE % (
+ domain_id, application_id, rendition_id, media_item_id),
+ 'format_id': adaptive_format['format_id'].replace(
+ 'hls', 'http'),
+ 'protocol': 'http',
+ 'preference': http_formats_preference,
+ })
+ progressive_formats.append(progressive_format)
+ self._check_formats(progressive_formats, media_item_id)
+ formats.extend(progressive_formats)
+ return formats
diff --git a/yt_dlp/extractor/ondemandkorea.py b/yt_dlp/extractor/ondemandkorea.py
new file mode 100644
index 0000000..591b414
--- /dev/null
+++ b/yt_dlp/extractor/ondemandkorea.py
@@ -0,0 +1,169 @@
+import functools
+import re
+import uuid
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ OnDemandPagedList,
+ float_or_none,
+ int_or_none,
+ join_nonempty,
+ parse_age_limit,
+ parse_qs,
+ str_or_none,
+ unified_strdate,
+ url_or_none,
+)
+from ..utils.traversal import traverse_obj
+
+
+class OnDemandKoreaIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?:en/)?player/vod/[a-z0-9-]+\?(?:[^#]+&)?contentId=(?P<id>\d+)'
+ _GEO_COUNTRIES = ['US', 'CA']
+
+ _TESTS = [{
+ 'url': 'https://www.ondemandkorea.com/player/vod/ask-us-anything?contentId=686471',
+ 'md5': 'e2ff77255d989e3135bde0c5889fbce8',
+ 'info_dict': {
+ 'id': '686471',
+ 'ext': 'mp4',
+ 'title': 'Ask Us Anything: Jung Sung-ho, Park Seul-gi, Kim Bo-min, Yang Seung-won',
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)',
+ 'duration': 5486.955,
+ 'release_date': '20220924',
+ 'series': 'Ask Us Anything',
+ 'series_id': '11790',
+ 'episode_number': 351,
+ 'episode': 'Jung Sung-ho, Park Seul-gi, Kim Bo-min, Yang Seung-won',
+ },
+ }, {
+ 'url': 'https://www.ondemandkorea.com/player/vod/breakup-probation-a-week?contentId=1595796',
+ 'md5': '57266c720006962be7ff415b24775caa',
+ 'info_dict': {
+ 'id': '1595796',
+ 'ext': 'mp4',
+ 'title': 'Breakup Probation, A Week: E08',
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)',
+ 'duration': 1586.0,
+ 'release_date': '20231001',
+ 'series': 'Breakup Probation, A Week',
+ 'series_id': '22912',
+ 'episode_number': 8,
+ 'episode': 'E08',
+ },
+ }, {
+ 'url': 'https://www.ondemandkorea.com/player/vod/the-outlaws?contentId=369531',
+ 'md5': 'fa5523b87aa1f6d74fc622a97f2b47cd',
+ 'info_dict': {
+ 'id': '369531',
+ 'ext': 'mp4',
+ 'release_date': '20220519',
+ 'duration': 7267.0,
+ 'title': 'The Outlaws: Main Movie',
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)',
+ 'age_limit': 18,
+ },
+ }, {
+ 'url': 'https://www.ondemandkorea.com/en/player/vod/capture-the-moment-how-is-that-possible?contentId=1605006',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ data = self._download_json(
+ f'https://odkmedia.io/odx/api/v3/playback/{video_id}/', video_id, fatal=False,
+ headers={'service-name': 'odk'}, query={'did': str(uuid.uuid4())}, expected_status=(403, 404))
+ if not traverse_obj(data, ('result', {dict})):
+ msg = traverse_obj(data, ('messages', '__default'), 'title', expected_type=str)
+ raise ExtractorError(msg or 'Got empty response from playback API', expected=True)
+
+ data = data['result']
+
+ def try_geo_bypass(url):
+ return traverse_obj(url, ({parse_qs}, 'stream_url', 0, {url_or_none})) or url
+
+ formats = []
+ for m3u8_url in traverse_obj(data, (('sources', 'manifest'), ..., 'url', {url_or_none}, {try_geo_bypass})):
+ mod_url = re.sub(r'_720(p?)\.m3u8', r'_1080\1.m3u8', m3u8_url)
+ if mod_url != m3u8_url:
+ mod_format = self._extract_m3u8_formats(
+ mod_url, video_id, note='Checking for higher quality format',
+ errnote='No higher quality format found', fatal=False)
+ if mod_format:
+ formats.extend(mod_format)
+ continue
+ formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, fatal=False))
+
+ subtitles = {}
+ for track in traverse_obj(data, ('text_tracks', lambda _, v: url_or_none(v['url']))):
+ subtitles.setdefault(track.get('language', 'und'), []).append({
+ 'url': track['url'],
+ 'ext': track.get('codec'),
+ 'name': track.get('label'),
+ })
+
+ def if_series(key=None):
+ return lambda obj: obj[key] if key and obj['kind'] == 'series' else None
+
+ return {
+ 'id': video_id,
+ 'title': join_nonempty(
+ ('episode', 'program', 'title'),
+ ('episode', 'title'), from_dict=data, delim=': '),
+ **traverse_obj(data, {
+ 'thumbnail': ('episode', 'images', 'thumbnail', {url_or_none}),
+ 'release_date': ('episode', 'release_date', {lambda x: x.replace('-', '')}, {unified_strdate}),
+ 'duration': ('duration', {functools.partial(float_or_none, scale=1000)}),
+ 'age_limit': ('age_rating', 'name', {lambda x: x.replace('R', '')}, {parse_age_limit}),
+ 'series': ('episode', {if_series(key='program')}, 'title'),
+ 'series_id': ('episode', {if_series(key='program')}, 'id', {str_or_none}),
+ 'episode': ('episode', {if_series(key='title')}),
+ 'episode_number': ('episode', {if_series(key='number')}, {int_or_none}),
+ }, get_all=False),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+
+class OnDemandKoreaProgramIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?:en/)?player/vod/(?P<id>[a-z0-9-]+)(?:$|#)'
+ _GEO_COUNTRIES = ['US', 'CA']
+
+ _TESTS = [{
+ 'url': 'https://www.ondemandkorea.com/player/vod/uskn-news',
+ 'info_dict': {
+ 'id': 'uskn-news',
+ },
+ 'playlist_mincount': 755,
+ }, {
+ 'url': 'https://www.ondemandkorea.com/en/player/vod/the-land',
+ 'info_dict': {
+ 'id': 'the-land',
+ },
+ 'playlist_count': 52,
+ }]
+
+ _PAGE_SIZE = 100
+
+ def _fetch_page(self, display_id, page):
+ page += 1
+ page_data = self._download_json(
+ f'https://odkmedia.io/odx/api/v3/program/{display_id}/episodes/', display_id,
+ headers={'service-name': 'odk'}, query={
+ 'page': page,
+ 'page_size': self._PAGE_SIZE,
+ }, note=f'Downloading page {page}', expected_status=404)
+ for episode in traverse_obj(page_data, ('result', 'results', ...)):
+ yield self.url_result(
+ f'https://www.ondemandkorea.com/player/vod/{display_id}?contentId={episode["id"]}',
+ ie=OnDemandKoreaIE, video_title=episode.get('title'))
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ entries = OnDemandPagedList(functools.partial(
+ self._fetch_page, display_id), self._PAGE_SIZE)
+
+ return self.playlist_result(entries, display_id)
diff --git a/yt_dlp/extractor/onefootball.py b/yt_dlp/extractor/onefootball.py
new file mode 100644
index 0000000..e1b7268
--- /dev/null
+++ b/yt_dlp/extractor/onefootball.py
@@ -0,0 +1,51 @@
+from .common import InfoExtractor
+from .jwplatform import JWPlatformIE
+from ..utils import make_archive_id
+
+
+class OneFootballIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?onefootball\.com/[a-z]{2}/video/[^/&?#]+-(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'https://onefootball.com/en/video/highlights-fc-zuerich-3-3-fc-basel-34012334',
+ 'info_dict': {
+ 'id': 'Y2VtcWAT',
+ 'ext': 'mp4',
+ 'title': 'Highlights: FC Zürich 3-3 FC Basel',
+ 'description': 'md5:33d9855cb790702c4fe42a513700aba8',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/Y2VtcWAT/poster.jpg?width=720',
+ 'timestamp': 1635874895,
+ 'upload_date': '20211102',
+ 'duration': 375.0,
+ 'tags': ['Football', 'Soccer', 'OneFootball'],
+ '_old_archive_ids': ['onefootball 34012334'],
+ },
+ 'params': {'skip_download': True},
+ 'expected_warnings': ['Failed to download m3u8 information'],
+ }, {
+ 'url': 'https://onefootball.com/en/video/klopp-fumes-at-var-decisions-in-west-ham-defeat-34041020',
+ 'info_dict': {
+ 'id': 'leVJrMho',
+ 'ext': 'mp4',
+ 'title': 'Klopp fumes at VAR decisions in West Ham defeat',
+ 'description': 'md5:9c50371095a01ad3f63311c73d8f51a5',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/leVJrMho/poster.jpg?width=720',
+ 'timestamp': 1636315232,
+ 'upload_date': '20211107',
+ 'duration': 93.0,
+ 'tags': ['Football', 'Soccer', 'OneFootball'],
+ '_old_archive_ids': ['onefootball 34041020'],
+ },
+ 'params': {'skip_download': True}
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ data_json = self._search_json_ld(webpage, video_id, fatal=False)
+ data_json.pop('url', None)
+ m3u8_url = self._html_search_regex(r'(https://cdn\.jwplayer\.com/manifests/\w+\.m3u8)', webpage, 'm3u8_url')
+
+ return self.url_result(
+ m3u8_url, JWPlatformIE, video_id, _old_archive_ids=[make_archive_id(self, video_id)],
+ **data_json, url_transparent=True)
diff --git a/yt_dlp/extractor/onenewsnz.py b/yt_dlp/extractor/onenewsnz.py
new file mode 100644
index 0000000..a46211e
--- /dev/null
+++ b/yt_dlp/extractor/onenewsnz.py
@@ -0,0 +1,111 @@
+from .brightcove import BrightcoveNewIE
+from .common import InfoExtractor
+
+from ..utils import (
+ ExtractorError,
+ traverse_obj
+)
+
+
+class OneNewsNZIE(InfoExtractor):
+ IE_NAME = '1News'
+ IE_DESC = '1news.co.nz article videos'
+ _VALID_URL = r'https?://(?:www\.)?(?:1|one)news\.co\.nz/\d+/\d+/\d+/(?P<id>[^/?#&]+)'
+ _TESTS = [
+ { # Brightcove video
+ 'url': 'https://www.1news.co.nz/2022/09/29/cows-painted-green-on-parliament-lawn-in-climate-protest/',
+ 'info_dict': {
+ 'id': 'cows-painted-green-on-parliament-lawn-in-climate-protest',
+ 'title': '\'Cows\' painted green on Parliament lawn in climate protest',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '6312993358112',
+ 'title': 'Activists dressed as cows painted green outside Parliament in climate protest',
+ 'ext': 'mp4',
+ 'tags': 'count:6',
+ 'uploader_id': '963482464001',
+ 'timestamp': 1664416255,
+ 'upload_date': '20220929',
+ 'duration': 38.272,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'Greenpeace accused the Government of "greenwashing" instead of taking climate action.',
+ }
+ }]
+ }, {
+ # YouTube video
+ 'url': 'https://www.1news.co.nz/2022/09/30/now-is-the-time-to-care-about-womens-rugby/',
+ 'info_dict': {
+ 'id': 'now-is-the-time-to-care-about-womens-rugby',
+ 'title': 'Now is the time to care about women\'s rugby',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': 's4wEB9neTfU',
+ 'title': 'Why I love women’s rugby: Black Fern Ruahei Demant',
+ 'ext': 'mp4',
+ 'channel_follower_count': int,
+ 'channel_url': 'https://www.youtube.com/channel/UC2BQ3U9IxoYIJyulv0bN5PQ',
+ 'tags': 'count:12',
+ 'uploader': 'Re: News',
+ 'upload_date': '20211215',
+ 'uploader_id': 'UC2BQ3U9IxoYIJyulv0bN5PQ',
+ 'uploader_url': 'http://www.youtube.com/channel/UC2BQ3U9IxoYIJyulv0bN5PQ',
+ 'channel_id': 'UC2BQ3U9IxoYIJyulv0bN5PQ',
+ 'channel': 'Re: News',
+ 'like_count': int,
+ 'thumbnail': 'https://i.ytimg.com/vi/s4wEB9neTfU/maxresdefault.jpg',
+ 'age_limit': 0,
+ 'view_count': int,
+ 'categories': ['Sports'],
+ 'duration': 222,
+ 'description': 'md5:8874410e5740ed1d8fd0df839f849813',
+ 'availability': 'public',
+ 'playable_in_embed': True,
+ 'live_status': 'not_live',
+ }
+ }]
+ }, {
+ # 2 Brightcove videos
+ 'url': 'https://www.1news.co.nz/2022/09/29/raw-videos-capture-hurricane-ians-fury-as-it-slams-florida/',
+ 'info_dict': {
+ 'id': 'raw-videos-capture-hurricane-ians-fury-as-it-slams-florida',
+ 'title': 'Raw videos capture Hurricane Ian\'s fury as it slams Florida',
+ },
+ 'playlist_mincount': 2,
+ }, {
+ 'url': 'https://www.onenews.co.nz/2022/09/29/cows-painted-green-on-parliament-lawn-in-climate-protest/',
+ 'only_matching': True,
+ }]
+
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/0xpHIR6IB_default/index.html?videoId=%s'
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ fusion_metadata = self._search_json(r'Fusion\.globalContent\s*=', webpage, 'fusion metadata', display_id)
+
+ entries = []
+ for item in traverse_obj(fusion_metadata, 'content_elements') or []:
+ item_type = traverse_obj(item, 'subtype')
+ if item_type == 'video':
+ brightcove_config = traverse_obj(item, ('embed', 'config'))
+ brightcove_url = self.BRIGHTCOVE_URL_TEMPLATE % (
+ traverse_obj(brightcove_config, 'brightcoveAccount') or '963482464001',
+ traverse_obj(brightcove_config, 'brightcoveVideoId')
+ )
+ entries.append(self.url_result(brightcove_url, BrightcoveNewIE))
+ elif item_type == 'youtube':
+ video_id_or_url = traverse_obj(item, ('referent', 'id'), ('raw_oembed', '_id'))
+ if video_id_or_url:
+ entries.append(self.url_result(video_id_or_url, ie='Youtube'))
+
+ if not entries:
+ raise ExtractorError('This article does not have a video.', expected=True)
+
+ playlist_title = (
+ traverse_obj(fusion_metadata, ('headlines', 'basic'))
+ or self._generic_title('', webpage)
+ )
+ return self.playlist_result(entries, display_id, playlist_title)
diff --git a/yt_dlp/extractor/oneplace.py b/yt_dlp/extractor/oneplace.py
new file mode 100644
index 0000000..86337ad
--- /dev/null
+++ b/yt_dlp/extractor/oneplace.py
@@ -0,0 +1,43 @@
+from .common import InfoExtractor
+
+
+class OnePlacePodcastIE(InfoExtractor):
+ _VALID_URL = r'https?://www\.oneplace\.com/[\w]+/[^/]+/listen/[\w-]+-(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.oneplace.com/ministries/a-daily-walk/listen/living-in-the-last-days-part-2-958461.html',
+ 'info_dict': {
+ 'id': '958461',
+ 'ext': 'mp3',
+ 'title': 'Living in the Last Days Part 2 | A Daily Walk with John Randall',
+ 'description': 'md5:fbb8f1cf21447ac54ecaa2887fc20c6e',
+ }
+ }, {
+ 'url': 'https://www.oneplace.com/ministries/ankerberg-show/listen/ep-3-relying-on-the-constant-companionship-of-the-holy-spirit-part-2-922513.html',
+ 'info_dict': {
+ 'id': '922513',
+ 'ext': 'mp3',
+ 'description': 'md5:8b810b4349aa40a5d033b4536fe428e1',
+ 'title': 'md5:ce10f7d8d5ddcf485ed8905ef109659d',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ return {
+ 'id': video_id,
+ 'url': self._search_regex((
+ r'mp3-url\s*=\s*"([^"]+)',
+ r'<div[^>]+id\s*=\s*"player"[^>]+data-media-url\s*=\s*"(?P<media_url>[^"]+)',
+ ), webpage, 'media url'),
+ 'ext': 'mp3',
+ 'vcodec': 'none',
+ 'title': self._html_search_regex((
+ r'<div[^>]class\s*=\s*"details"[^>]+>[^<]<h2[^>]+>(?P<content>[^>]+)>',
+ self._meta_regex('og:title'), self._meta_regex('title'),
+ ), webpage, 'title', group='content', default=None),
+ 'description': self._html_search_regex(
+ r'<div[^>]+class="[^"]+epDesc"[^>]*>\s*(?P<desc>.+?)\s*</div>',
+ webpage, 'description', default=None),
+ }
diff --git a/yt_dlp/extractor/onet.py b/yt_dlp/extractor/onet.py
new file mode 100644
index 0000000..0d59e8c
--- /dev/null
+++ b/yt_dlp/extractor/onet.py
@@ -0,0 +1,259 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ float_or_none,
+ get_element_by_class,
+ int_or_none,
+ js_to_json,
+ NO_DEFAULT,
+ parse_iso8601,
+ remove_start,
+ strip_or_none,
+ url_basename,
+)
+
+
+class OnetBaseIE(InfoExtractor):
+ _URL_BASE_RE = r'https?://(?:(?:www\.)?onet\.tv|onet100\.vod\.pl)/[a-z]/'
+
+ def _search_mvp_id(self, webpage):
+ return self._search_regex(
+ r'id=(["\'])mvp:(?P<id>.+?)\1', webpage, 'mvp id', group='id')
+
+ def _extract_from_id(self, video_id, webpage=None):
+ response = self._download_json(
+ 'http://qi.ckm.onetapi.pl/', video_id,
+ query={
+ 'body[id]': video_id,
+ 'body[jsonrpc]': '2.0',
+ 'body[method]': 'get_asset_detail',
+ 'body[params][ID_Publikacji]': video_id,
+ 'body[params][Service]': 'www.onet.pl',
+ 'content-type': 'application/jsonp',
+ 'x-onet-app': 'player.front.onetapi.pl',
+ })
+
+ error = response.get('error')
+ if error:
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, error['message']), expected=True)
+
+ video = response['result'].get('0')
+
+ formats = []
+ for format_type, formats_dict in video['formats'].items():
+ if not isinstance(formats_dict, dict):
+ continue
+ for format_id, format_list in formats_dict.items():
+ if not isinstance(format_list, list):
+ continue
+ for f in format_list:
+ video_url = f.get('url')
+ if not video_url:
+ continue
+ ext = determine_ext(video_url)
+ if format_id.startswith('ism'):
+ formats.extend(self._extract_ism_formats(
+ video_url, video_id, 'mss', fatal=False))
+ elif ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ video_url, video_id, mpd_id='dash', fatal=False))
+ elif format_id.startswith('hls'):
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ http_f = {
+ 'url': video_url,
+ 'format_id': format_id,
+ 'abr': float_or_none(f.get('audio_bitrate')),
+ }
+ if format_type == 'audio':
+ http_f['vcodec'] = 'none'
+ else:
+ http_f.update({
+ 'height': int_or_none(f.get('vertical_resolution')),
+ 'width': int_or_none(f.get('horizontal_resolution')),
+ 'vbr': float_or_none(f.get('video_bitrate')),
+ })
+ formats.append(http_f)
+
+ meta = video.get('meta', {})
+
+ title = (self._og_search_title(
+ webpage, default=None) if webpage else None) or meta['title']
+ description = (self._og_search_description(
+ webpage, default=None) if webpage else None) or meta.get('description')
+ duration = meta.get('length') or meta.get('lenght')
+ timestamp = parse_iso8601(meta.get('addDate'), ' ')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'formats': formats,
+ }
+
+
+class OnetMVPIE(OnetBaseIE):
+ _VALID_URL = r'onetmvp:(?P<id>\d+\.\d+)'
+
+ _TEST = {
+ 'url': 'onetmvp:381027.1509591944',
+ 'only_matching': True,
+ }
+
+ def _real_extract(self, url):
+ return self._extract_from_id(self._match_id(url))
+
+
+class OnetIE(OnetBaseIE):
+ _VALID_URL = OnetBaseIE._URL_BASE_RE + r'[a-z]+/(?P<display_id>[0-9a-z-]+)/(?P<id>[0-9a-z]+)'
+ IE_NAME = 'onet.tv'
+
+ _TESTS = [{
+ 'url': 'http://onet.tv/k/openerfestival/open-er-festival-2016-najdziwniejsze-wymagania-gwiazd/qbpyqc',
+ 'md5': '436102770fb095c75b8bb0392d3da9ff',
+ 'info_dict': {
+ 'id': 'qbpyqc',
+ 'display_id': 'open-er-festival-2016-najdziwniejsze-wymagania-gwiazd',
+ 'ext': 'mp4',
+ 'title': 'Open\'er Festival 2016: najdziwniejsze wymagania gwiazd',
+ 'description': 'Trzy samochody, których nigdy nie użyto, prywatne spa, hotel dekorowany czarnym suknem czy nielegalne używki. Organizatorzy koncertów i festiwali muszą stawać przed nie lada wyzwaniem zapraszając gwia...',
+ 'upload_date': '20160705',
+ 'timestamp': 1467721580,
+ },
+ }, {
+ 'url': 'https://onet100.vod.pl/k/openerfestival/open-er-festival-2016-najdziwniejsze-wymagania-gwiazd/qbpyqc',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ display_id, video_id = mobj.group('display_id', 'id')
+
+ webpage = self._download_webpage(url, display_id)
+
+ mvp_id = self._search_mvp_id(webpage)
+
+ info_dict = self._extract_from_id(mvp_id, webpage)
+ info_dict.update({
+ 'id': video_id,
+ 'display_id': display_id,
+ })
+
+ return info_dict
+
+
+class OnetChannelIE(OnetBaseIE):
+ _VALID_URL = OnetBaseIE._URL_BASE_RE + r'(?P<id>[a-z]+)(?:[?#]|$)'
+ IE_NAME = 'onet.tv:channel'
+
+ _TESTS = [{
+ 'url': 'http://onet.tv/k/openerfestival',
+ 'info_dict': {
+ 'id': 'openerfestival',
+ 'title': "Open'er Festival",
+ 'description': "Tak było na Open'er Festival 2016! Oglądaj nasze reportaże i wywiady z artystami.",
+ },
+ 'playlist_mincount': 35,
+ }, {
+ 'url': 'https://onet100.vod.pl/k/openerfestival',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, channel_id)
+
+ current_clip_info = self._parse_json(self._search_regex(
+ r'var\s+currentClip\s*=\s*({[^}]+})', webpage, 'video info'), channel_id,
+ transform_source=lambda s: js_to_json(re.sub(r'\'\s*\+\s*\'', '', s)))
+ video_id = remove_start(current_clip_info['ckmId'], 'mvp:')
+ video_name = url_basename(current_clip_info['url'])
+
+ if not self._yes_playlist(channel_id, video_name, playlist_label='channel'):
+ return self._extract_from_id(video_id, webpage)
+
+ matches = re.findall(
+ r'<a[^>]+href=[\'"](%s[a-z]+/[0-9a-z-]+/[0-9a-z]+)' % self._URL_BASE_RE,
+ webpage)
+ entries = [
+ self.url_result(video_link, OnetIE.ie_key())
+ for video_link in matches]
+
+ channel_title = strip_or_none(get_element_by_class('o_channelName', webpage))
+ channel_description = strip_or_none(get_element_by_class('o_channelDesc', webpage))
+ return self.playlist_result(entries, channel_id, channel_title, channel_description)
+
+
+class OnetPlIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[^/]+\.)?(?:onet|businessinsider\.com|plejada)\.pl/(?:[^/]+/)+(?P<id>[0-9a-z]+)'
+ IE_NAME = 'onet.pl'
+
+ _TESTS = [{
+ 'url': 'http://eurosport.onet.pl/zimowe/skoki-narciarskie/ziobro-wygral-kwalifikacje-w-pjongczangu/9ckrly',
+ 'md5': 'b94021eb56214c3969380388b6e73cb0',
+ 'info_dict': {
+ 'id': '1561707.1685479',
+ 'ext': 'mp4',
+ 'title': 'Ziobro wygrał kwalifikacje w Pjongczangu',
+ 'description': 'md5:61fb0740084d2d702ea96512a03585b4',
+ 'upload_date': '20170214',
+ 'timestamp': 1487078046,
+ },
+ }, {
+ # embedded via pulsembed
+ 'url': 'http://film.onet.pl/pensjonat-nad-rozlewiskiem-relacja-z-planu-serialu/y428n0',
+ 'info_dict': {
+ 'id': '501235.965429946',
+ 'ext': 'mp4',
+ 'title': '"Pensjonat nad rozlewiskiem": relacja z planu serialu',
+ 'upload_date': '20170622',
+ 'timestamp': 1498159955,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://film.onet.pl/zwiastuny/ghost-in-the-shell-drugi-zwiastun-pl/5q6yl3',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://moto.onet.pl/jak-wybierane-sa-miejsca-na-fotoradary/6rs04e',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://businessinsider.com.pl/wideo/scenariusz-na-koniec-swiata-wedlug-nasa/dwnqptk',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://plejada.pl/weronika-rosati-o-swoim-domniemanym-slubie/n2bq89',
+ 'only_matching': True,
+ }]
+
+ def _search_mvp_id(self, webpage, default=NO_DEFAULT):
+ return self._search_regex(
+ r'data-(?:params-)?mvp=["\'](\d+\.\d+)', webpage, 'mvp id',
+ default=default)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ mvp_id = self._search_mvp_id(webpage, default=None)
+
+ if not mvp_id:
+ pulsembed_url = self._search_regex(
+ r'data-src=(["\'])(?P<url>(?:https?:)?//pulsembed\.eu/.+?)\1',
+ webpage, 'pulsembed url', group='url')
+ webpage = self._download_webpage(
+ pulsembed_url, video_id, 'Downloading pulsembed webpage')
+ mvp_id = self._search_mvp_id(webpage)
+
+ return self.url_result(
+ 'onetmvp:%s' % mvp_id, OnetMVPIE.ie_key(), video_id=mvp_id)
diff --git a/yt_dlp/extractor/onionstudios.py b/yt_dlp/extractor/onionstudios.py
new file mode 100644
index 0000000..5fa49e1
--- /dev/null
+++ b/yt_dlp/extractor/onionstudios.py
@@ -0,0 +1,42 @@
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import js_to_json
+
+
+class OnionStudiosIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?onionstudios\.com/(?:video(?:s/[^/]+-|/)|embed\?.*\bid=)(?P<id>\d+)(?!-)'
+ _EMBED_REGEX = [r'(?s)<(?:iframe|bulbs-video)[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?onionstudios\.com/(?:embed.+?|video/\d+\.json))\1']
+
+ _TESTS = [{
+ 'url': 'http://www.onionstudios.com/videos/hannibal-charges-forward-stops-for-a-cocktail-2937',
+ 'md5': '5a118d466d62b5cd03647cf2c593977f',
+ 'info_dict': {
+ 'id': '3459881',
+ 'ext': 'mp4',
+ 'title': 'Hannibal charges forward, stops for a cocktail',
+ 'description': 'md5:545299bda6abf87e5ec666548c6a9448',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'a.v. club',
+ 'upload_date': '20150619',
+ 'timestamp': 1434728546,
+ },
+ }, {
+ 'url': 'http://www.onionstudios.com/embed?id=2855&autoplay=true',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.onionstudios.com/video/6139.json',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'http://onionstudios.com/embed/dc94dc2899fe644c0e7241fa04c1b732.js',
+ video_id)
+ mcp_id = compat_str(self._parse_json(self._search_regex(
+ r'window\.mcpMapping\s*=\s*({.+?});', webpage,
+ 'MCP Mapping'), video_id, js_to_json)[video_id]['mcp_id'])
+ return self.url_result(
+ 'http://kinja.com/ajax/inset/iframe?id=mcp-' + mcp_id,
+ 'KinjaEmbed', mcp_id)
diff --git a/yt_dlp/extractor/opencast.py b/yt_dlp/extractor/opencast.py
new file mode 100644
index 0000000..1fafd9a
--- /dev/null
+++ b/yt_dlp/extractor/opencast.py
@@ -0,0 +1,183 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ parse_iso8601,
+ traverse_obj,
+ variadic,
+)
+
+
+class OpencastBaseIE(InfoExtractor):
+ _INSTANCES_RE = r'''(?:
+ opencast\.informatik\.kit\.edu|
+ electures\.uni-muenster\.de|
+ oc-presentation\.ltcc\.tuwien\.ac\.at|
+ medien\.ph-noe\.ac\.at|
+ oc-video\.ruhr-uni-bochum\.de|
+ oc-video1\.ruhr-uni-bochum\.de|
+ opencast\.informatik\.uni-goettingen\.de|
+ heicast\.uni-heidelberg\.de|
+ opencast\.hawk\.de:8080|
+ opencast\.hs-osnabrueck\.de|
+ video[0-9]+\.virtuos\.uni-osnabrueck\.de|
+ opencast\.uni-koeln\.de|
+ media\.opencast\.hochschule-rhein-waal\.de|
+ matterhorn\.dce\.harvard\.edu|
+ hs-harz\.opencast\.uni-halle\.de|
+ videocampus\.urz\.uni-leipzig\.de|
+ media\.uct\.ac\.za|
+ vid\.igb\.illinois\.edu|
+ cursosabertos\.c3sl\.ufpr\.br|
+ mcmedia\.missioncollege\.org|
+ clases\.odon\.edu\.uy
+ )'''
+ _UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
+
+ def _call_api(self, host, video_id, **kwargs):
+ return self._download_json(self._API_BASE % (host, video_id), video_id, **kwargs)
+
+ def _parse_mediapackage(self, video):
+ video_id = video.get('id')
+ if video_id is None:
+ raise ExtractorError('Video id was not found')
+
+ formats = []
+ for track in variadic(traverse_obj(video, ('media', 'track')) or []):
+ href = track.get('url')
+ if href is None:
+ continue
+ ext = determine_ext(href, None)
+
+ transport = track.get('transport')
+
+ if transport == 'DASH' or ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(href, video_id, mpd_id='dash', fatal=False))
+ elif transport == 'HLS' or ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ href, video_id, m3u8_id='hls', entry_protocol='m3u8_native', fatal=False))
+ elif transport == 'HDS' or ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(href, video_id, f4m_id='hds', fatal=False))
+ elif transport == 'SMOOTH':
+ formats.extend(self._extract_ism_formats(href, video_id, ism_id='smooth', fatal=False))
+ elif ext == 'smil':
+ formats.extend(self._extract_smil_formats(href, video_id, fatal=False))
+ else:
+ track_obj = {
+ 'url': href,
+ 'ext': ext,
+ 'format_note': track.get('transport'),
+ 'resolution': traverse_obj(track, ('video', 'resolution')),
+ 'fps': int_or_none(traverse_obj(track, ('video', 'framerate'))),
+ 'vbr': int_or_none(traverse_obj(track, ('video', 'bitrate')), scale=1000),
+ 'vcodec': traverse_obj(track, ('video', 'encoder', 'type')) if track.get('video') else 'none',
+ 'abr': int_or_none(traverse_obj(track, ('audio', 'bitrate')), scale=1000),
+ 'asr': int_or_none(traverse_obj(track, ('audio', 'samplingrate'))),
+ 'acodec': traverse_obj(track, ('audio', 'encoder', 'type')) if track.get('audio') else 'none',
+ }
+
+ if transport == 'RTMP':
+ m_obj = re.search(r'(?:rtmp://[^/]+/(?P<app>[^/]+))/(?P<ext>.+):(?P<playpath>.+)', href)
+ if not m_obj:
+ continue
+ track_obj.update({
+ 'app': m_obj.group('app'),
+ 'ext': m_obj.group('ext'),
+ 'play_path': m_obj.group('ext') + ':' + m_obj.group('playpath'),
+ 'rtmp_live': True,
+ 'preference': -2,
+ })
+ formats.append(track_obj)
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': video.get('title'),
+ 'series': video.get('seriestitle'),
+ 'season_id': video.get('series'),
+ 'creator': traverse_obj(video, ('creators', 'creator')),
+ 'timestamp': parse_iso8601(video.get('start')),
+ 'thumbnail': traverse_obj(video, ('attachments', 'attachment', ..., 'url'), get_all=False),
+ }
+
+
+class OpencastIE(OpencastBaseIE):
+ _VALID_URL = rf'''(?x)
+ https?://(?P<host>{OpencastBaseIE._INSTANCES_RE})/paella/ui/watch\.html\?
+ (?:[^#]+&)?id=(?P<id>{OpencastBaseIE._UUID_RE})'''
+
+ _API_BASE = 'https://%s/search/episode.json?id=%s'
+
+ _TESTS = [
+ {
+ 'url': 'https://oc-video1.ruhr-uni-bochum.de/paella/ui/watch.html?id=ed063cd5-72c8-46b5-a60a-569243edcea8',
+ 'md5': '554c8e99a90f7be7e874619fcf2a3bc9',
+ 'info_dict': {
+ 'id': 'ed063cd5-72c8-46b5-a60a-569243edcea8',
+ 'ext': 'mp4',
+ 'title': '11 - Kryptographie - 24.11.2015',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1606208400,
+ 'upload_date': '20201124',
+ 'season_id': 'cf68a4a1-36b1-4a53-a6ba-61af5705a0d0',
+ 'series': 'Kryptographie - WiSe 15/16',
+ 'creator': 'Alexander May',
+ },
+ }
+ ]
+
+ def _real_extract(self, url):
+ host, video_id = self._match_valid_url(url).group('host', 'id')
+ return self._parse_mediapackage(
+ self._call_api(host, video_id)['search-results']['result']['mediapackage'])
+
+
+class OpencastPlaylistIE(OpencastBaseIE):
+ _VALID_URL = rf'''(?x)
+ https?://(?P<host>{OpencastBaseIE._INSTANCES_RE})(?:
+ /engage/ui/index\.html\?(?:[^#]+&)?epFrom=|
+ /ltitools/index\.html\?(?:[^#]+&)?series=
+ )(?P<id>{OpencastBaseIE._UUID_RE})'''
+
+ _API_BASE = 'https://%s/search/episode.json?sid=%s'
+
+ _TESTS = [
+ {
+ 'url': 'https://oc-video1.ruhr-uni-bochum.de/engage/ui/index.html?epFrom=cf68a4a1-36b1-4a53-a6ba-61af5705a0d0',
+ 'info_dict': {
+ 'id': 'cf68a4a1-36b1-4a53-a6ba-61af5705a0d0',
+ 'title': 'Kryptographie - WiSe 15/16',
+ },
+ 'playlist_mincount': 29,
+ },
+ {
+ 'url': 'https://oc-video1.ruhr-uni-bochum.de/ltitools/index.html?subtool=series&series=cf68a4a1-36b1-4a53-a6ba-61af5705a0d0&lng=de',
+ 'info_dict': {
+ 'id': 'cf68a4a1-36b1-4a53-a6ba-61af5705a0d0',
+ 'title': 'Kryptographie - WiSe 15/16',
+ },
+ 'playlist_mincount': 29,
+ },
+ {
+ 'url': 'https://electures.uni-muenster.de/engage/ui/index.html?e=1&p=1&epFrom=39391d10-a711-4d23-b21d-afd2ed7d758c',
+ 'info_dict': {
+ 'id': '39391d10-a711-4d23-b21d-afd2ed7d758c',
+ 'title': '021670 Theologische Themen bei Hans Blumenberg WiSe 2017/18',
+ },
+ 'playlist_mincount': 13,
+ },
+ ]
+
+ def _real_extract(self, url):
+ host, video_id = self._match_valid_url(url).group('host', 'id')
+
+ entries = [
+ self._parse_mediapackage(episode['mediapackage'])
+ for episode in variadic(self._call_api(host, video_id)['search-results']['result'])
+ if episode.get('mediapackage')
+ ]
+
+ return self.playlist_result(entries, video_id, traverse_obj(entries, (0, 'series')))
diff --git a/yt_dlp/extractor/openload.py b/yt_dlp/extractor/openload.py
new file mode 100644
index 0000000..56b8330
--- /dev/null
+++ b/yt_dlp/extractor/openload.py
@@ -0,0 +1,243 @@
+import collections
+import contextlib
+import json
+import os
+import subprocess
+import tempfile
+
+from ..compat import compat_urlparse
+from ..utils import (
+ ExtractorError,
+ Popen,
+ check_executable,
+ format_field,
+ get_exe_version,
+ is_outdated_version,
+ shell_quote,
+)
+
+
+def cookie_to_dict(cookie):
+ cookie_dict = {
+ 'name': cookie.name,
+ 'value': cookie.value,
+ }
+ if cookie.port_specified:
+ cookie_dict['port'] = cookie.port
+ if cookie.domain_specified:
+ cookie_dict['domain'] = cookie.domain
+ if cookie.path_specified:
+ cookie_dict['path'] = cookie.path
+ if cookie.expires is not None:
+ cookie_dict['expires'] = cookie.expires
+ if cookie.secure is not None:
+ cookie_dict['secure'] = cookie.secure
+ if cookie.discard is not None:
+ cookie_dict['discard'] = cookie.discard
+ with contextlib.suppress(TypeError):
+ if (cookie.has_nonstandard_attr('httpOnly')
+ or cookie.has_nonstandard_attr('httponly')
+ or cookie.has_nonstandard_attr('HttpOnly')):
+ cookie_dict['httponly'] = True
+ return cookie_dict
+
+
+def cookie_jar_to_list(cookie_jar):
+ return [cookie_to_dict(cookie) for cookie in cookie_jar]
+
+
+class PhantomJSwrapper:
+ """PhantomJS wrapper class
+
+ This class is experimental.
+ """
+
+ INSTALL_HINT = 'Please download it from https://phantomjs.org/download.html'
+
+ _BASE_JS = R'''
+ phantom.onError = function(msg, trace) {{
+ var msgStack = ['PHANTOM ERROR: ' + msg];
+ if(trace && trace.length) {{
+ msgStack.push('TRACE:');
+ trace.forEach(function(t) {{
+ msgStack.push(' -> ' + (t.file || t.sourceURL) + ': ' + t.line
+ + (t.function ? ' (in function ' + t.function +')' : ''));
+ }});
+ }}
+ console.error(msgStack.join('\n'));
+ phantom.exit(1);
+ }};
+ '''
+
+ _TEMPLATE = R'''
+ var page = require('webpage').create();
+ var fs = require('fs');
+ var read = {{ mode: 'r', charset: 'utf-8' }};
+ var write = {{ mode: 'w', charset: 'utf-8' }};
+ JSON.parse(fs.read("{cookies}", read)).forEach(function(x) {{
+ phantom.addCookie(x);
+ }});
+ page.settings.resourceTimeout = {timeout};
+ page.settings.userAgent = "{ua}";
+ page.onLoadStarted = function() {{
+ page.evaluate(function() {{
+ delete window._phantom;
+ delete window.callPhantom;
+ }});
+ }};
+ var saveAndExit = function() {{
+ fs.write("{html}", page.content, write);
+ fs.write("{cookies}", JSON.stringify(phantom.cookies), write);
+ phantom.exit();
+ }};
+ page.onLoadFinished = function(status) {{
+ if(page.url === "") {{
+ page.setContent(fs.read("{html}", read), "{url}");
+ }}
+ else {{
+ {jscode}
+ }}
+ }};
+ page.open("");
+ '''
+
+ _TMP_FILE_NAMES = ['script', 'html', 'cookies']
+
+ @staticmethod
+ def _version():
+ return get_exe_version('phantomjs', version_re=r'([0-9.]+)')
+
+ def __init__(self, extractor, required_version=None, timeout=10000):
+ self._TMP_FILES = {}
+
+ self.exe = check_executable('phantomjs', ['-v'])
+ if not self.exe:
+ raise ExtractorError(f'PhantomJS not found, {self.INSTALL_HINT}', expected=True)
+
+ self.extractor = extractor
+
+ if required_version:
+ version = self._version()
+ if is_outdated_version(version, required_version):
+ self.extractor._downloader.report_warning(
+ 'Your copy of PhantomJS is outdated, update it to version '
+ '%s or newer if you encounter any errors.' % required_version)
+
+ for name in self._TMP_FILE_NAMES:
+ tmp = tempfile.NamedTemporaryFile(delete=False)
+ tmp.close()
+ self._TMP_FILES[name] = tmp
+
+ self.options = collections.ChainMap({
+ 'timeout': timeout,
+ }, {
+ x: self._TMP_FILES[x].name.replace('\\', '\\\\').replace('"', '\\"')
+ for x in self._TMP_FILE_NAMES
+ })
+
+ def __del__(self):
+ for name in self._TMP_FILE_NAMES:
+ with contextlib.suppress(OSError, KeyError):
+ os.remove(self._TMP_FILES[name].name)
+
+ def _save_cookies(self, url):
+ cookies = cookie_jar_to_list(self.extractor.cookiejar)
+ for cookie in cookies:
+ if 'path' not in cookie:
+ cookie['path'] = '/'
+ if 'domain' not in cookie:
+ cookie['domain'] = compat_urlparse.urlparse(url).netloc
+ with open(self._TMP_FILES['cookies'].name, 'wb') as f:
+ f.write(json.dumps(cookies).encode('utf-8'))
+
+ def _load_cookies(self):
+ with open(self._TMP_FILES['cookies'].name, 'rb') as f:
+ cookies = json.loads(f.read().decode('utf-8'))
+ for cookie in cookies:
+ if cookie['httponly'] is True:
+ cookie['rest'] = {'httpOnly': None}
+ if 'expiry' in cookie:
+ cookie['expire_time'] = cookie['expiry']
+ self.extractor._set_cookie(**cookie)
+
+ def get(self, url, html=None, video_id=None, note=None, note2='Executing JS on webpage', headers={}, jscode='saveAndExit();'):
+ """
+ Downloads webpage (if needed) and executes JS
+
+ Params:
+ url: website url
+ html: optional, html code of website
+ video_id: video id
+ note: optional, displayed when downloading webpage
+ note2: optional, displayed when executing JS
+ headers: custom http headers
+ jscode: code to be executed when page is loaded
+
+ Returns tuple with:
+ * downloaded website (after JS execution)
+ * anything you print with `console.log` (but not inside `page.execute`!)
+
+ In most cases you don't need to add any `jscode`.
+ It is executed in `page.onLoadFinished`.
+ `saveAndExit();` is mandatory, use it instead of `phantom.exit()`
+ It is possible to wait for some element on the webpage, e.g.
+ var check = function() {
+ var elementFound = page.evaluate(function() {
+ return document.querySelector('#b.done') !== null;
+ });
+ if(elementFound)
+ saveAndExit();
+ else
+ window.setTimeout(check, 500);
+ }
+
+ page.evaluate(function(){
+ document.querySelector('#a').click();
+ });
+ check();
+ """
+ if 'saveAndExit();' not in jscode:
+ raise ExtractorError('`saveAndExit();` not found in `jscode`')
+ if not html:
+ html = self.extractor._download_webpage(url, video_id, note=note, headers=headers)
+ with open(self._TMP_FILES['html'].name, 'wb') as f:
+ f.write(html.encode('utf-8'))
+
+ self._save_cookies(url)
+
+ user_agent = headers.get('User-Agent') or self.extractor.get_param('http_headers')['User-Agent']
+ jscode = self._TEMPLATE.format_map(self.options.new_child({
+ 'url': url,
+ 'ua': user_agent.replace('"', '\\"'),
+ 'jscode': jscode,
+ }))
+
+ stdout = self.execute(jscode, video_id, note=note2)
+
+ with open(self._TMP_FILES['html'].name, 'rb') as f:
+ html = f.read().decode('utf-8')
+ self._load_cookies()
+
+ return html, stdout
+
+ def execute(self, jscode, video_id=None, *, note='Executing JS'):
+ """Execute JS and return stdout"""
+ if 'phantom.exit();' not in jscode:
+ jscode += ';\nphantom.exit();'
+ jscode = self._BASE_JS + jscode
+
+ with open(self._TMP_FILES['script'].name, 'w', encoding='utf-8') as f:
+ f.write(jscode)
+ self.extractor.to_screen(f'{format_field(video_id, None, "%s: ")}{note}')
+
+ cmd = [self.exe, '--ssl-protocol=any', self._TMP_FILES['script'].name]
+ self.extractor.write_debug(f'PhantomJS command line: {shell_quote(cmd)}')
+ try:
+ stdout, stderr, returncode = Popen.run(cmd, timeout=self.options['timeout'] / 1000,
+ text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ except Exception as e:
+ raise ExtractorError(f'{note} failed: Unable to run PhantomJS binary', cause=e)
+ if returncode:
+ raise ExtractorError(f'{note} failed with returncode {returncode}:\n{stderr.strip()}')
+
+ return stdout
diff --git a/yt_dlp/extractor/openrec.py b/yt_dlp/extractor/openrec.py
new file mode 100644
index 0000000..82a81c6
--- /dev/null
+++ b/yt_dlp/extractor/openrec.py
@@ -0,0 +1,151 @@
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ get_first,
+ int_or_none,
+ traverse_obj,
+ try_get,
+ unified_strdate,
+ unified_timestamp,
+)
+from ..compat import compat_str
+
+
+class OpenRecBaseIE(InfoExtractor):
+ _M3U8_HEADERS = {'Referer': 'https://www.openrec.tv/'}
+
+ def _extract_pagestore(self, webpage, video_id):
+ return self._parse_json(
+ self._search_regex(r'(?m)window\.pageStore\s*=\s*(\{.+?\});$', webpage, 'window.pageStore'), video_id)
+
+ def _expand_media(self, video_id, media):
+ for name, m3u8_url in (media or {}).items():
+ if not m3u8_url:
+ continue
+ yield from self._extract_m3u8_formats(
+ m3u8_url, video_id, ext='mp4', m3u8_id=name, headers=self._M3U8_HEADERS)
+
+ def _extract_movie(self, webpage, video_id, name, is_live):
+ window_stores = self._extract_pagestore(webpage, video_id)
+ movie_stores = [
+ # extract all three important data (most of data are duplicated each other, but slightly different!)
+ traverse_obj(window_stores, ('v8', 'state', 'movie'), expected_type=dict),
+ traverse_obj(window_stores, ('v8', 'movie'), expected_type=dict),
+ traverse_obj(window_stores, 'movieStore', expected_type=dict),
+ ]
+ if not any(movie_stores):
+ raise ExtractorError(f'Failed to extract {name} info')
+
+ formats = list(self._expand_media(video_id, get_first(movie_stores, 'media')))
+ if not formats:
+ # archived livestreams or subscriber-only videos
+ cookies = self._get_cookies('https://www.openrec.tv/')
+ detail = self._download_json(
+ f'https://apiv5.openrec.tv/api/v5/movies/{video_id}/detail', video_id,
+ headers={
+ 'Origin': 'https://www.openrec.tv',
+ 'Referer': 'https://www.openrec.tv/',
+ 'access-token': try_get(cookies, lambda x: x.get('access_token').value),
+ 'uuid': try_get(cookies, lambda x: x.get('uuid').value),
+ })
+ new_media = traverse_obj(detail, ('data', 'items', ..., 'media'), get_all=False)
+ formats = list(self._expand_media(video_id, new_media))
+ is_live = False
+
+ return {
+ 'id': video_id,
+ 'title': get_first(movie_stores, 'title'),
+ 'description': get_first(movie_stores, 'introduction'),
+ 'thumbnail': get_first(movie_stores, 'thumbnailUrl'),
+ 'formats': formats,
+ 'uploader': get_first(movie_stores, ('channel', 'user', 'name')),
+ 'uploader_id': get_first(movie_stores, ('channel', 'user', 'id')),
+ 'timestamp': int_or_none(get_first(movie_stores, ['publishedAt', 'time']), scale=1000) or unified_timestamp(get_first(movie_stores, 'publishedAt')),
+ 'is_live': is_live,
+ 'http_headers': self._M3U8_HEADERS,
+ }
+
+
+class OpenRecIE(OpenRecBaseIE):
+ IE_NAME = 'openrec'
+ _VALID_URL = r'https?://(?:www\.)?openrec\.tv/live/(?P<id>[^/]+)'
+ _TESTS = [{
+ 'url': 'https://www.openrec.tv/live/2p8v31qe4zy',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.openrec.tv/live/wez93eqvjzl',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(f'https://www.openrec.tv/live/{video_id}', video_id)
+
+ return self._extract_movie(webpage, video_id, 'live', True)
+
+
+class OpenRecCaptureIE(OpenRecBaseIE):
+ IE_NAME = 'openrec:capture'
+ _VALID_URL = r'https?://(?:www\.)?openrec\.tv/capture/(?P<id>[^/]+)'
+ _TESTS = [{
+ 'url': 'https://www.openrec.tv/capture/l9nk2x4gn14',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.openrec.tv/capture/mldjr82p7qk',
+ 'info_dict': {
+ 'id': 'mldjr82p7qk',
+ 'title': 'たいじの恥ずかしい英語力',
+ 'uploader': 'たいちゃんねる',
+ 'uploader_id': 'Yaritaiji',
+ 'upload_date': '20210803',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(f'https://www.openrec.tv/capture/{video_id}', video_id)
+
+ window_stores = self._extract_pagestore(webpage, video_id)
+ movie_store = window_stores.get('movie')
+
+ capture_data = window_stores.get('capture')
+ if not capture_data:
+ raise ExtractorError('Cannot extract title')
+
+ formats = self._extract_m3u8_formats(
+ capture_data.get('source'), video_id, ext='mp4', headers=self._M3U8_HEADERS)
+
+ return {
+ 'id': video_id,
+ 'title': capture_data.get('title'),
+ 'thumbnail': capture_data.get('thumbnailUrl'),
+ 'formats': formats,
+ 'timestamp': unified_timestamp(traverse_obj(movie_store, 'createdAt', expected_type=compat_str)),
+ 'uploader': traverse_obj(movie_store, ('channel', 'name'), expected_type=compat_str),
+ 'uploader_id': traverse_obj(movie_store, ('channel', 'id'), expected_type=compat_str),
+ 'upload_date': unified_strdate(capture_data.get('createdAt')),
+ 'http_headers': self._M3U8_HEADERS,
+ }
+
+
+class OpenRecMovieIE(OpenRecBaseIE):
+ IE_NAME = 'openrec:movie'
+ _VALID_URL = r'https?://(?:www\.)?openrec\.tv/movie/(?P<id>[^/]+)'
+ _TESTS = [{
+ 'url': 'https://www.openrec.tv/movie/nqz5xl5km8v',
+ 'info_dict': {
+ 'id': 'nqz5xl5km8v',
+ 'title': '限定コミュニティ(Discord)参加方法ご説明動画',
+ 'description': 'md5:ebd563e5f5b060cda2f02bf26b14d87f',
+ 'thumbnail': r're:https://.+',
+ 'uploader': 'タイキとカズヒロ',
+ 'uploader_id': 'taiki_to_kazuhiro',
+ 'timestamp': 1638856800,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(f'https://www.openrec.tv/movie/{video_id}', video_id)
+
+ return self._extract_movie(webpage, video_id, 'movie', False)
diff --git a/yt_dlp/extractor/ora.py b/yt_dlp/extractor/ora.py
new file mode 100644
index 0000000..d49909d
--- /dev/null
+++ b/yt_dlp/extractor/ora.py
@@ -0,0 +1,71 @@
+import re
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+from ..utils import (
+ get_element_by_attribute,
+ qualities,
+ unescapeHTML,
+)
+
+
+class OraTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?(?:ora\.tv|unsafespeech\.com)/([^/]+/)*(?P<id>[^/\?#]+)'
+ _TESTS = [{
+ 'url': 'https://www.ora.tv/larrykingnow/2015/12/16/vine-youtube-stars-zach-king-king-bach-on-their-viral-videos-0_36jupg6090pq',
+ 'md5': 'fa33717591c631ec93b04b0e330df786',
+ 'info_dict': {
+ 'id': '50178',
+ 'ext': 'mp4',
+ 'title': 'Vine & YouTube Stars Zach King & King Bach On Their Viral Videos!',
+ 'description': 'md5:ebbc5b1424dd5dba7be7538148287ac1',
+ }
+ }, {
+ 'url': 'http://www.unsafespeech.com/video/2016/5/10/student-self-censorship-and-the-thought-police-on-university-campuses-0_6622bnkppw4d',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ video_data = self._search_regex(
+ r'"(?:video|current)"\s*:\s*({[^}]+?})', webpage, 'current video')
+ m3u8_url = self._search_regex(
+ r'hls_stream"?\s*:\s*"([^"]+)', video_data, 'm3u8 url', None)
+ if m3u8_url:
+ formats = self._extract_m3u8_formats(
+ m3u8_url, display_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False)
+ # similar to GameSpotIE
+ m3u8_path = compat_urlparse.urlparse(m3u8_url).path
+ QUALITIES_RE = r'((,[a-z]+\d+)+,?)'
+ available_qualities = self._search_regex(
+ QUALITIES_RE, m3u8_path, 'qualities').strip(',').split(',')
+ http_path = m3u8_path[1:].split('/', 1)[1]
+ http_template = re.sub(QUALITIES_RE, r'%s', http_path)
+ http_template = http_template.replace('.csmil/master.m3u8', '')
+ http_template = compat_urlparse.urljoin(
+ 'http://videocdn-pmd.ora.tv/', http_template)
+ preference = qualities(
+ ['mobile400', 'basic400', 'basic600', 'sd900', 'sd1200', 'sd1500', 'hd720', 'hd1080'])
+ for q in available_qualities:
+ formats.append({
+ 'url': http_template % q,
+ 'format_id': q,
+ 'quality': preference(q),
+ })
+ else:
+ return self.url_result(self._search_regex(
+ r'"youtube_id"\s*:\s*"([^"]+)', webpage, 'youtube id'), 'Youtube')
+
+ return {
+ 'id': self._search_regex(
+ r'"id"\s*:\s*(\d+)', video_data, 'video id', default=display_id),
+ 'display_id': display_id,
+ 'title': unescapeHTML(self._og_search_title(webpage)),
+ 'description': get_element_by_attribute(
+ 'class', 'video_txt_decription', webpage),
+ 'thumbnail': self._proto_relative_url(self._search_regex(
+ r'"thumb"\s*:\s*"([^"]+)', video_data, 'thumbnail', None)),
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/orf.py b/yt_dlp/extractor/orf.py
new file mode 100644
index 0000000..526e9ac
--- /dev/null
+++ b/yt_dlp/extractor/orf.py
@@ -0,0 +1,630 @@
+import base64
+import functools
+import re
+
+from .common import InfoExtractor
+from ..networking import HEADRequest
+from ..utils import (
+ InAdvancePagedList,
+ clean_html,
+ determine_ext,
+ float_or_none,
+ int_or_none,
+ join_nonempty,
+ make_archive_id,
+ mimetype2ext,
+ orderedSet,
+ remove_end,
+ smuggle_url,
+ strip_jsonp,
+ try_call,
+ unescapeHTML,
+ unified_strdate,
+ unsmuggle_url,
+ url_or_none,
+)
+from ..utils.traversal import traverse_obj
+
+
+class ORFTVthekIE(InfoExtractor):
+ IE_NAME = 'orf:tvthek'
+ IE_DESC = 'ORF TVthek'
+ _VALID_URL = r'(?P<url>https?://tvthek\.orf\.at/(?:(?:[^/]+/){2}){1,2}(?P<id>\d+))(/[^/]+/(?P<vid>\d+))?(?:$|[?#])'
+
+ _TESTS = [{
+ 'url': 'https://tvthek.orf.at/profile/ZIB-2/1211/ZIB-2/14121079',
+ 'info_dict': {
+ 'id': '14121079',
+ },
+ 'playlist_count': 11,
+ 'params': {'noplaylist': True}
+ }, {
+ 'url': 'https://tvthek.orf.at/profile/ZIB-2/1211/ZIB-2/14121079/Umfrage-Welches-Tier-ist-Sebastian-Kurz/15083150',
+ 'info_dict': {
+ 'id': '14121079',
+ },
+ 'playlist_count': 1,
+ 'params': {'playlist_items': '5'}
+ }, {
+ 'url': 'https://tvthek.orf.at/profile/ZIB-2/1211/ZIB-2/14121079/Umfrage-Welches-Tier-ist-Sebastian-Kurz/15083150',
+ 'info_dict': {
+ 'id': '14121079',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '15083150',
+ 'ext': 'mp4',
+ 'description': 'md5:7be1c485425f5f255a5e4e4815e77d04',
+ 'thumbnail': 'https://api-tvthek.orf.at/uploads/media/segments/0130/59/824271ea35cd8931a0fb08ab316a5b0a1562342c.jpeg',
+ 'title': 'Umfrage: Welches Tier ist Sebastian Kurz?',
+ }
+ }],
+ 'playlist_count': 1,
+ 'params': {'noplaylist': True, 'skip_download': 'm3u8'}
+ }, {
+ 'url': 'http://tvthek.orf.at/program/Aufgetischt/2745173/Aufgetischt-Mit-der-Steirischen-Tafelrunde/8891389',
+ 'playlist': [{
+ 'md5': '2942210346ed779588f428a92db88712',
+ 'info_dict': {
+ 'id': '8896777',
+ 'ext': 'mp4',
+ 'title': 'Aufgetischt: Mit der Steirischen Tafelrunde',
+ 'description': 'md5:c1272f0245537812d4e36419c207b67d',
+ 'duration': 2668,
+ 'upload_date': '20141208',
+ },
+ }],
+ 'skip': 'Blocked outside of Austria / Germany',
+ }, {
+ 'url': 'http://tvthek.orf.at/topic/Im-Wandel-der-Zeit/8002126/Best-of-Ingrid-Thurnher/7982256',
+ 'info_dict': {
+ 'id': '7982259',
+ 'ext': 'mp4',
+ 'title': 'Best of Ingrid Thurnher',
+ 'upload_date': '20140527',
+ 'description': 'Viele Jahre war Ingrid Thurnher das "Gesicht" der ZIB 2. Vor ihrem Wechsel zur ZIB 2 im Jahr 1995 moderierte sie unter anderem "Land und Leute", "Österreich-Bild" und "Niederösterreich heute".',
+ },
+ 'params': {
+ 'skip_download': True, # rtsp downloads
+ },
+ 'skip': 'Blocked outside of Austria / Germany',
+ }, {
+ 'url': 'http://tvthek.orf.at/topic/Fluechtlingskrise/10463081/Heimat-Fremde-Heimat/13879132/Senioren-betreuen-Migrantenkinder/13879141',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://tvthek.orf.at/profile/Universum/35429',
+ 'only_matching': True,
+ }]
+
+ def _pagefunc(self, url, data_jsb, n, *, image=None):
+ sd = data_jsb[n]
+ video_id, title = str(sd['id']), sd['title']
+ formats = []
+ for fd in sd['sources']:
+ src = url_or_none(fd.get('src'))
+ if not src:
+ continue
+ format_id = join_nonempty('delivery', 'quality', 'quality_string', from_dict=fd)
+ ext = determine_ext(src)
+ if ext == 'm3u8':
+ m3u8_formats = self._extract_m3u8_formats(
+ src, video_id, 'mp4', m3u8_id=format_id, fatal=False, note=f'Downloading {format_id} m3u8 manifest')
+ if any('/geoprotection' in f['url'] for f in m3u8_formats):
+ self.raise_geo_restricted()
+ formats.extend(m3u8_formats)
+ elif ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ src, video_id, f4m_id=format_id, fatal=False))
+ elif ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ src, video_id, mpd_id=format_id, fatal=False, note=f'Downloading {format_id} mpd manifest'))
+ else:
+ formats.append({
+ 'format_id': format_id,
+ 'url': src,
+ 'protocol': fd.get('protocol'),
+ })
+
+ # Check for geoblocking.
+ # There is a property is_geoprotection, but that's always false
+ geo_str = sd.get('geoprotection_string')
+ http_url = next(
+ (f['url'] for f in formats if re.match(r'^https?://.*\.mp4$', f['url'])),
+ None) if geo_str else None
+ if http_url:
+ self._request_webpage(
+ HEADRequest(http_url), video_id, fatal=False, note='Testing for geoblocking',
+ errnote=f'This video seems to be blocked outside of {geo_str}. You may want to try the streaming-* formats')
+
+ subtitles = {}
+ for sub in sd.get('subtitles', []):
+ sub_src = sub.get('src')
+ if not sub_src:
+ continue
+ subtitles.setdefault(sub.get('lang', 'de-AT'), []).append({
+ 'url': sub_src,
+ })
+
+ upload_date = unified_strdate(sd.get('created_date'))
+
+ thumbnails = []
+ preview = sd.get('preview_image_url')
+ if preview:
+ thumbnails.append({
+ 'id': 'preview',
+ 'url': preview,
+ 'preference': 0,
+ })
+ image = sd.get('image_full_url') or image
+ if image:
+ thumbnails.append({
+ 'id': 'full',
+ 'url': image,
+ 'preference': 1,
+ })
+
+ yield {
+ 'id': video_id,
+ 'title': title,
+ 'webpage_url': smuggle_url(f'{url}/part/{video_id}', {'force_noplaylist': True}),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'description': sd.get('description'),
+ 'duration': int_or_none(sd.get('duration_in_seconds')),
+ 'upload_date': upload_date,
+ 'thumbnails': thumbnails,
+ }
+
+ def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url)
+ playlist_id, video_id, base_url = self._match_valid_url(url).group('id', 'vid', 'url')
+ webpage = self._download_webpage(url, playlist_id)
+
+ data_jsb = self._parse_json(
+ self._search_regex(
+ r'<div[^>]+class=(["\']).*?VideoPlaylist.*?\1[^>]+data-jsb=(["\'])(?P<json>.+?)\2',
+ webpage, 'playlist', group='json'),
+ playlist_id, transform_source=unescapeHTML)['playlist']['videos']
+
+ if not self._yes_playlist(playlist_id, video_id, smuggled_data):
+ data_jsb = [sd for sd in data_jsb if str(sd.get('id')) == video_id]
+
+ playlist_count = len(data_jsb)
+ image = self._og_search_thumbnail(webpage) if playlist_count == 1 else None
+
+ page_func = functools.partial(self._pagefunc, base_url, data_jsb, image=image)
+ return {
+ '_type': 'playlist',
+ 'entries': InAdvancePagedList(page_func, playlist_count, 1),
+ 'id': playlist_id,
+ }
+
+
+class ORFRadioIE(InfoExtractor):
+ IE_NAME = 'orf:radio'
+
+ STATION_INFO = {
+ 'fm4': ('fm4', 'fm4', 'orffm4'),
+ 'noe': ('noe', 'oe2n', 'orfnoe'),
+ 'wien': ('wie', 'oe2w', 'orfwie'),
+ 'burgenland': ('bgl', 'oe2b', 'orfbgl'),
+ 'ooe': ('ooe', 'oe2o', 'orfooe'),
+ 'steiermark': ('stm', 'oe2st', 'orfstm'),
+ 'kaernten': ('ktn', 'oe2k', 'orfktn'),
+ 'salzburg': ('sbg', 'oe2s', 'orfsbg'),
+ 'tirol': ('tir', 'oe2t', 'orftir'),
+ 'vorarlberg': ('vbg', 'oe2v', 'orfvbg'),
+ 'oe3': ('oe3', 'oe3', 'orfoe3'),
+ 'oe1': ('oe1', 'oe1', 'orfoe1'),
+ }
+ _STATION_RE = '|'.join(map(re.escape, STATION_INFO.keys()))
+
+ _VALID_URL = rf'''(?x)
+ https?://(?:
+ (?P<station>{_STATION_RE})\.orf\.at/player|
+ radiothek\.orf\.at/(?P<station2>{_STATION_RE})
+ )/(?P<date>[0-9]+)/(?P<show>\w+)'''
+
+ _TESTS = [{
+ 'url': 'https://radiothek.orf.at/ooe/20220801/OGMO',
+ 'info_dict': {
+ 'id': 'OGMO',
+ 'title': 'Guten Morgen OÖ',
+ 'description': 'md5:a3f6083399ef92b8cbe2d421b180835a',
+ },
+ 'playlist': [{
+ 'md5': 'f33147d954a326e338ea52572c2810e8',
+ 'info_dict': {
+ 'id': '2022-08-01_0459_tl_66_7DaysMon1_319062',
+ 'ext': 'mp3',
+ 'title': 'Guten Morgen OÖ',
+ 'upload_date': '20220801',
+ 'duration': 18000,
+ 'timestamp': 1659322789,
+ 'description': 'md5:a3f6083399ef92b8cbe2d421b180835a',
+ }
+ }]
+ }, {
+ 'url': 'https://ooe.orf.at/player/20220801/OGMO',
+ 'info_dict': {
+ 'id': 'OGMO',
+ 'title': 'Guten Morgen OÖ',
+ 'description': 'md5:a3f6083399ef92b8cbe2d421b180835a',
+ },
+ 'playlist': [{
+ 'md5': 'f33147d954a326e338ea52572c2810e8',
+ 'info_dict': {
+ 'id': '2022-08-01_0459_tl_66_7DaysMon1_319062',
+ 'ext': 'mp3',
+ 'title': 'Guten Morgen OÖ',
+ 'upload_date': '20220801',
+ 'duration': 18000,
+ 'timestamp': 1659322789,
+ 'description': 'md5:a3f6083399ef92b8cbe2d421b180835a',
+ }
+ }]
+ }, {
+ 'url': 'http://fm4.orf.at/player/20170107/4CC',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://noe.orf.at/player/20200423/NGM',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://wien.orf.at/player/20200423/WGUM',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://burgenland.orf.at/player/20200423/BGM',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://steiermark.orf.at/player/20200423/STGMS',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kaernten.orf.at/player/20200423/KGUMO',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://salzburg.orf.at/player/20200423/SGUM',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tirol.orf.at/player/20200423/TGUMO',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://vorarlberg.orf.at/player/20200423/VGUM',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://oe3.orf.at/player/20200424/3WEK',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://oe1.orf.at/player/20170108/456544',
+ 'md5': '34d8a6e67ea888293741c86a099b745b',
+ 'info_dict': {
+ 'id': '2017-01-08_0759_tl_51_7DaysSun6_256141',
+ 'ext': 'mp3',
+ 'title': 'Morgenjournal',
+ 'duration': 609,
+ 'timestamp': 1483858796,
+ 'upload_date': '20170108',
+ },
+ 'skip': 'Shows from ORF radios are only available for 7 days.'
+ }]
+
+ def _entries(self, data, station):
+ _, loop_station, old_ie = self.STATION_INFO[station]
+ for info in data['streams']:
+ item_id = info.get('loopStreamId')
+ if not item_id:
+ continue
+ video_id = item_id.replace('.mp3', '')
+ yield {
+ 'id': video_id,
+ 'ext': 'mp3',
+ 'url': f'https://loopstream01.apa.at/?channel={loop_station}&id={item_id}',
+ '_old_archive_ids': [make_archive_id(old_ie, video_id)],
+ 'title': data.get('title'),
+ 'description': clean_html(data.get('subtitle')),
+ 'duration': try_call(lambda: (info['end'] - info['start']) / 1000),
+ 'timestamp': int_or_none(info.get('start'), scale=1000),
+ 'series': data.get('programTitle'),
+ }
+
+ def _real_extract(self, url):
+ station, station2, show_date, show_id = self._match_valid_url(url).group('station', 'station2', 'date', 'show')
+ api_station, _, _ = self.STATION_INFO[station or station2]
+ data = self._download_json(
+ f'http://audioapi.orf.at/{api_station}/api/json/current/broadcast/{show_id}/{show_date}', show_id)
+
+ return self.playlist_result(
+ self._entries(data, station or station2), show_id, data.get('title'), clean_html(data.get('subtitle')))
+
+
+class ORFPodcastIE(InfoExtractor):
+ IE_NAME = 'orf:podcast'
+ _STATION_RE = '|'.join(map(re.escape, (
+ 'bgl', 'fm4', 'ktn', 'noe', 'oe1', 'oe3',
+ 'ooe', 'sbg', 'stm', 'tir', 'tv', 'vbg', 'wie')))
+ _VALID_URL = rf'https?://sound\.orf\.at/podcast/(?P<station>{_STATION_RE})/(?P<show>[\w-]+)/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://sound.orf.at/podcast/oe3/fruehstueck-bei-mir/nicolas-stockhammer-15102023',
+ 'md5': '526a5700e03d271a1505386a8721ab9b',
+ 'info_dict': {
+ 'id': 'nicolas-stockhammer-15102023',
+ 'ext': 'mp3',
+ 'title': 'Nicolas Stockhammer (15.10.2023)',
+ 'duration': 3396.0,
+ 'series': 'Frühstück bei mir',
+ },
+ 'skip': 'ORF podcasts are only available for a limited time'
+ }]
+
+ def _real_extract(self, url):
+ station, show, show_id = self._match_valid_url(url).group('station', 'show', 'id')
+ data = self._download_json(
+ f'https://audioapi.orf.at/radiothek/api/2.0/podcast/{station}/{show}/{show_id}', show_id)
+
+ return {
+ 'id': show_id,
+ 'ext': 'mp3',
+ 'vcodec': 'none',
+ **traverse_obj(data, ('payload', {
+ 'url': ('enclosures', 0, 'url'),
+ 'ext': ('enclosures', 0, 'type', {mimetype2ext}),
+ 'title': 'title',
+ 'description': ('description', {clean_html}),
+ 'duration': ('duration', {functools.partial(float_or_none, scale=1000)}),
+ 'series': ('podcast', 'title'),
+ })),
+ }
+
+
+class ORFIPTVIE(InfoExtractor):
+ IE_NAME = 'orf:iptv'
+ IE_DESC = 'iptv.ORF.at'
+ _VALID_URL = r'https?://iptv\.orf\.at/(?:#/)?stories/(?P<id>\d+)'
+
+ _TEST = {
+ 'url': 'http://iptv.orf.at/stories/2275236/',
+ 'md5': 'c8b22af4718a4b4af58342529453e3e5',
+ 'info_dict': {
+ 'id': '350612',
+ 'ext': 'flv',
+ 'title': 'Weitere Evakuierungen um Vulkan Calbuco',
+ 'description': 'md5:d689c959bdbcf04efeddedbf2299d633',
+ 'duration': 68.197,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20150425',
+ },
+ }
+
+ def _real_extract(self, url):
+ story_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'http://iptv.orf.at/stories/%s' % story_id, story_id)
+
+ video_id = self._search_regex(
+ r'data-video(?:id)?="(\d+)"', webpage, 'video id')
+
+ data = self._download_json(
+ 'http://bits.orf.at/filehandler/static-api/json/current/data.json?file=%s' % video_id,
+ video_id)[0]
+
+ duration = float_or_none(data['duration'], 1000)
+
+ video = data['sources']['default']
+ load_balancer_url = video['loadBalancerUrl']
+ abr = int_or_none(video.get('audioBitrate'))
+ vbr = int_or_none(video.get('bitrate'))
+ fps = int_or_none(video.get('videoFps'))
+ width = int_or_none(video.get('videoWidth'))
+ height = int_or_none(video.get('videoHeight'))
+ thumbnail = video.get('preview')
+
+ rendition = self._download_json(
+ load_balancer_url, video_id, transform_source=strip_jsonp)
+
+ f = {
+ 'abr': abr,
+ 'vbr': vbr,
+ 'fps': fps,
+ 'width': width,
+ 'height': height,
+ }
+
+ formats = []
+ for format_id, format_url in rendition['redirect'].items():
+ if format_id == 'rtmp':
+ ff = f.copy()
+ ff.update({
+ 'url': format_url,
+ 'format_id': format_id,
+ })
+ formats.append(ff)
+ elif determine_ext(format_url) == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ format_url, video_id, f4m_id=format_id))
+ elif determine_ext(format_url) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', m3u8_id=format_id))
+ else:
+ continue
+
+ title = remove_end(self._og_search_title(webpage), ' - iptv.ORF.at')
+ description = self._og_search_description(webpage)
+ upload_date = unified_strdate(self._html_search_meta(
+ 'dc.date', webpage, 'upload date'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ 'formats': formats,
+ }
+
+
+class ORFFM4StoryIE(InfoExtractor):
+ IE_NAME = 'orf:fm4:story'
+ IE_DESC = 'fm4.orf.at stories'
+ _VALID_URL = r'https?://fm4\.orf\.at/stories/(?P<id>\d+)'
+
+ _TEST = {
+ 'url': 'http://fm4.orf.at/stories/2865738/',
+ 'playlist': [{
+ 'md5': 'e1c2c706c45c7b34cf478bbf409907ca',
+ 'info_dict': {
+ 'id': '547792',
+ 'ext': 'flv',
+ 'title': 'Manu Delago und Inner Tongue live',
+ 'description': 'Manu Delago und Inner Tongue haben bei der FM4 Soundpark Session live alles gegeben. Hier gibt es Fotos und die gesamte Session als Video.',
+ 'duration': 1748.52,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20170913',
+ },
+ }, {
+ 'md5': 'c6dd2179731f86f4f55a7b49899d515f',
+ 'info_dict': {
+ 'id': '547798',
+ 'ext': 'flv',
+ 'title': 'Manu Delago und Inner Tongue live (2)',
+ 'duration': 1504.08,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20170913',
+ 'description': 'Manu Delago und Inner Tongue haben bei der FM4 Soundpark Session live alles gegeben. Hier gibt es Fotos und die gesamte Session als Video.',
+ },
+ }],
+ }
+
+ def _real_extract(self, url):
+ story_id = self._match_id(url)
+ webpage = self._download_webpage(url, story_id)
+
+ entries = []
+ all_ids = orderedSet(re.findall(r'data-video(?:id)?="(\d+)"', webpage))
+ for idx, video_id in enumerate(all_ids):
+ data = self._download_json(
+ 'http://bits.orf.at/filehandler/static-api/json/current/data.json?file=%s' % video_id,
+ video_id)[0]
+
+ duration = float_or_none(data['duration'], 1000)
+
+ video = data['sources']['q8c']
+ load_balancer_url = video['loadBalancerUrl']
+ abr = int_or_none(video.get('audioBitrate'))
+ vbr = int_or_none(video.get('bitrate'))
+ fps = int_or_none(video.get('videoFps'))
+ width = int_or_none(video.get('videoWidth'))
+ height = int_or_none(video.get('videoHeight'))
+ thumbnail = video.get('preview')
+
+ rendition = self._download_json(
+ load_balancer_url, video_id, transform_source=strip_jsonp)
+
+ f = {
+ 'abr': abr,
+ 'vbr': vbr,
+ 'fps': fps,
+ 'width': width,
+ 'height': height,
+ }
+
+ formats = []
+ for format_id, format_url in rendition['redirect'].items():
+ if format_id == 'rtmp':
+ ff = f.copy()
+ ff.update({
+ 'url': format_url,
+ 'format_id': format_id,
+ })
+ formats.append(ff)
+ elif determine_ext(format_url) == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ format_url, video_id, f4m_id=format_id))
+ elif determine_ext(format_url) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', m3u8_id=format_id))
+ else:
+ continue
+
+ title = remove_end(self._og_search_title(webpage), ' - fm4.ORF.at')
+ if idx >= 1:
+ # Titles are duplicates, make them unique
+ title += ' (' + str(idx + 1) + ')'
+ description = self._og_search_description(webpage)
+ upload_date = unified_strdate(self._html_search_meta(
+ 'dc.date', webpage, 'upload date'))
+
+ entries.append({
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ 'formats': formats,
+ })
+
+ return self.playlist_result(entries)
+
+
+class ORFONIE(InfoExtractor):
+ IE_NAME = 'orf:on'
+ _VALID_URL = r'https?://on\.orf\.at/video/(?P<id>\d{8})/(?P<slug>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://on.orf.at/video/14210000/school-of-champions-48',
+ 'info_dict': {
+ 'id': '14210000',
+ 'ext': 'mp4',
+ 'duration': 2651.08,
+ 'thumbnail': 'https://api-tvthek.orf.at/assets/segments/0167/98/thumb_16697671_segments_highlight_teaser.jpeg',
+ 'title': 'School of Champions (4/8)',
+ 'description': 'md5:d09ad279fc2e8502611e7648484b6afd',
+ 'media_type': 'episode',
+ 'timestamp': 1706472362,
+ 'upload_date': '20240128',
+ }
+ }]
+
+ def _extract_video(self, video_id, display_id):
+ encrypted_id = base64.b64encode(f'3dSlfek03nsLKdj4Jsd{video_id}'.encode()).decode()
+ api_json = self._download_json(
+ f'https://api-tvthek.orf.at/api/v4.3/public/episode/encrypted/{encrypted_id}', display_id)
+
+ formats, subtitles = [], {}
+ for manifest_type in traverse_obj(api_json, ('sources', {dict.keys}, ...)):
+ for manifest_url in traverse_obj(api_json, ('sources', manifest_type, ..., 'src', {url_or_none})):
+ if manifest_type == 'hls':
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ manifest_url, display_id, fatal=False, m3u8_id='hls')
+ elif manifest_type == 'dash':
+ fmts, subs = self._extract_mpd_formats_and_subtitles(
+ manifest_url, display_id, fatal=False, mpd_id='dash')
+ else:
+ continue
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ **traverse_obj(api_json, {
+ 'duration': ('duration_second', {float_or_none}),
+ 'title': (('title', 'headline'), {str}),
+ 'description': (('description', 'teaser_text'), {str}),
+ 'media_type': ('video_type', {str}),
+ }, get_all=False),
+ }
+
+ def _real_extract(self, url):
+ video_id, display_id = self._match_valid_url(url).group('id', 'slug')
+ webpage = self._download_webpage(url, display_id)
+
+ return {
+ 'id': video_id,
+ 'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, default=None),
+ 'description': self._html_search_meta(
+ ['description', 'og:description', 'twitter:description'], webpage, default=None),
+ **self._search_json_ld(webpage, display_id, fatal=False),
+ **self._extract_video(video_id, display_id),
+ }
diff --git a/yt_dlp/extractor/outsidetv.py b/yt_dlp/extractor/outsidetv.py
new file mode 100644
index 0000000..b1fcbd6
--- /dev/null
+++ b/yt_dlp/extractor/outsidetv.py
@@ -0,0 +1,25 @@
+from .common import InfoExtractor
+
+
+class OutsideTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?outsidetv\.com/(?:[^/]+/)*?play/[a-zA-Z0-9]{8}/\d+/\d+/(?P<id>[a-zA-Z0-9]{8})'
+ _TESTS = [{
+ 'url': 'http://www.outsidetv.com/category/snow/play/ZjQYboH6/1/10/Hdg0jukV/4',
+ 'md5': '192d968fedc10b2f70ec31865ffba0da',
+ 'info_dict': {
+ 'id': 'Hdg0jukV',
+ 'ext': 'mp4',
+ 'title': 'Home - Jackson Ep 1 | Arbor Snowboards',
+ 'description': 'md5:41a12e94f3db3ca253b04bb1e8d8f4cd',
+ 'upload_date': '20181225',
+ 'timestamp': 1545742800,
+ }
+ }, {
+ 'url': 'http://www.outsidetv.com/home/play/ZjQYboH6/1/10/Hdg0jukV/4',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ jw_media_id = self._match_id(url)
+ return self.url_result(
+ 'jwplatform:' + jw_media_id, 'JWPlatform', jw_media_id)
diff --git a/yt_dlp/extractor/owncloud.py b/yt_dlp/extractor/owncloud.py
new file mode 100644
index 0000000..79fd830
--- /dev/null
+++ b/yt_dlp/extractor/owncloud.py
@@ -0,0 +1,80 @@
+import re
+import urllib.parse
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ determine_ext,
+ url_or_none,
+ urlencode_postdata,
+)
+
+
+class OwnCloudIE(InfoExtractor):
+ _INSTANCES_RE = '|'.join((
+ r'(?:[^\.]+\.)?sciebo\.de',
+ r'cloud\.uni-koblenz-landau\.de',
+ ))
+ _VALID_URL = rf'https?://(?:{_INSTANCES_RE})/s/(?P<id>[\w.-]+)'
+
+ _TESTS = [
+ {
+ 'url': 'https://ruhr-uni-bochum.sciebo.de/s/wWhqZzh9jTumVFN',
+ 'info_dict': {
+ 'id': 'wWhqZzh9jTumVFN',
+ 'ext': 'mp4',
+ 'title': 'CmvpJST.mp4',
+ },
+ },
+ {
+ 'url': 'https://ruhr-uni-bochum.sciebo.de/s/WNDuFu0XuFtmm3f',
+ 'info_dict': {
+ 'id': 'WNDuFu0XuFtmm3f',
+ 'ext': 'mp4',
+ 'title': 'CmvpJST.mp4',
+ },
+ 'params': {
+ 'videopassword': '12345',
+ },
+ },
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage, urlh = self._download_webpage_handle(url, video_id)
+
+ if re.search(r'<label[^>]+for="password"', webpage):
+ webpage = self._verify_video_password(webpage, urlh.url, video_id)
+
+ hidden_inputs = self._hidden_inputs(webpage)
+ title = hidden_inputs.get('filename')
+ parsed_url = urllib.parse.urlparse(url)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'url': url_or_none(hidden_inputs.get('downloadURL')) or parsed_url._replace(
+ path=urllib.parse.urljoin(parsed_url.path, 'download')).geturl(),
+ 'ext': determine_ext(title),
+ }
+
+ def _verify_video_password(self, webpage, url, video_id):
+ password = self.get_param('videopassword')
+ if password is None:
+ raise ExtractorError(
+ 'This video is protected by a password, use the --video-password option',
+ expected=True)
+
+ validation_response = self._download_webpage(
+ url, video_id, 'Validating Password', 'Wrong password?',
+ data=urlencode_postdata({
+ 'requesttoken': self._hidden_inputs(webpage)['requesttoken'],
+ 'password': password,
+ }))
+
+ if re.search(r'<label[^>]+for="password"', validation_response):
+ warning = self._search_regex(
+ r'<div[^>]+class="warning">([^<]*)</div>', validation_response,
+ 'warning', default='The password is wrong')
+ raise ExtractorError(f'Opening the video failed, {self.IE_NAME} said: {warning!r}', expected=True)
+ return validation_response
diff --git a/yt_dlp/extractor/packtpub.py b/yt_dlp/extractor/packtpub.py
new file mode 100644
index 0000000..5620330
--- /dev/null
+++ b/yt_dlp/extractor/packtpub.py
@@ -0,0 +1,155 @@
+import json
+
+from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ clean_html,
+ ExtractorError,
+ # remove_end,
+ str_or_none,
+ strip_or_none,
+ unified_timestamp,
+ # urljoin,
+)
+
+
+class PacktPubBaseIE(InfoExtractor):
+ # _PACKT_BASE = 'https://www.packtpub.com'
+ _STATIC_PRODUCTS_BASE = 'https://static.packt-cdn.com/products/'
+
+
+class PacktPubIE(PacktPubBaseIE):
+ _VALID_URL = r'https?://(?:(?:www\.)?packtpub\.com/mapt|subscription\.packtpub\.com)/video/[^/]+/(?P<course_id>\d+)/(?P<chapter_id>[^/]+)/(?P<id>[^/]+)(?:/(?P<display_id>[^/?&#]+))?'
+
+ _TESTS = [{
+ 'url': 'https://www.packtpub.com/mapt/video/web-development/9781787122215/20528/20530/Project+Intro',
+ 'md5': '1e74bd6cfd45d7d07666f4684ef58f70',
+ 'info_dict': {
+ 'id': '20530',
+ 'ext': 'mp4',
+ 'title': 'Project Intro',
+ 'thumbnail': r're:(?i)^https?://.*\.jpg',
+ 'timestamp': 1490918400,
+ 'upload_date': '20170331',
+ },
+ }, {
+ 'url': 'https://subscription.packtpub.com/video/web_development/9781787122215/20528/20530/project-intro',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://subscription.packtpub.com/video/programming/9781838988906/p1/video1_1/business-card-project',
+ 'only_matching': True,
+ }]
+ _NETRC_MACHINE = 'packtpub'
+ _TOKEN = None
+
+ def _perform_login(self, username, password):
+ try:
+ self._TOKEN = self._download_json(
+ 'https://services.packtpub.com/auth-v1/users/tokens', None,
+ 'Downloading Authorization Token', data=json.dumps({
+ 'username': username,
+ 'password': password,
+ }).encode())['data']['access']
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status in (400, 401, 404):
+ message = self._parse_json(e.cause.response.read().decode(), None)['message']
+ raise ExtractorError(message, expected=True)
+ raise
+
+ def _real_extract(self, url):
+ course_id, chapter_id, video_id, display_id = self._match_valid_url(url).groups()
+
+ headers = {}
+ if self._TOKEN:
+ headers['Authorization'] = 'Bearer ' + self._TOKEN
+ try:
+ video_url = self._download_json(
+ 'https://services.packtpub.com/products-v1/products/%s/%s/%s' % (course_id, chapter_id, video_id), video_id,
+ 'Downloading JSON video', headers=headers)['data']
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 400:
+ self.raise_login_required('This video is locked')
+ raise
+
+ # TODO: find a better way to avoid duplicating course requests
+ # metadata = self._download_json(
+ # '%s/products/%s/chapters/%s/sections/%s/metadata'
+ # % (self._MAPT_REST, course_id, chapter_id, video_id),
+ # video_id)['data']
+
+ # title = metadata['pageTitle']
+ # course_title = metadata.get('title')
+ # if course_title:
+ # title = remove_end(title, ' - %s' % course_title)
+ # timestamp = unified_timestamp(metadata.get('publicationDate'))
+ # thumbnail = urljoin(self._PACKT_BASE, metadata.get('filepath'))
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': display_id or video_id, # title,
+ # 'thumbnail': thumbnail,
+ # 'timestamp': timestamp,
+ }
+
+
+class PacktPubCourseIE(PacktPubBaseIE):
+ _VALID_URL = r'(?P<url>https?://(?:(?:www\.)?packtpub\.com/mapt|subscription\.packtpub\.com)/video/[^/]+/(?P<id>\d+))'
+ _TESTS = [{
+ 'url': 'https://www.packtpub.com/mapt/video/web-development/9781787122215',
+ 'info_dict': {
+ 'id': '9781787122215',
+ 'title': 'Learn Nodejs by building 12 projects [Video]',
+ 'description': 'md5:489da8d953f416e51927b60a1c7db0aa',
+ },
+ 'playlist_count': 90,
+ }, {
+ 'url': 'https://subscription.packtpub.com/video/web_development/9781787122215',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if PacktPubIE.suitable(url) else super(
+ PacktPubCourseIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ url, course_id = mobj.group('url', 'id')
+
+ course = self._download_json(
+ self._STATIC_PRODUCTS_BASE + '%s/toc' % course_id, course_id)
+ metadata = self._download_json(
+ self._STATIC_PRODUCTS_BASE + '%s/summary' % course_id,
+ course_id, fatal=False) or {}
+
+ entries = []
+ for chapter_num, chapter in enumerate(course['chapters'], 1):
+ chapter_id = str_or_none(chapter.get('id'))
+ sections = chapter.get('sections')
+ if not chapter_id or not isinstance(sections, list):
+ continue
+ chapter_info = {
+ 'chapter': chapter.get('title'),
+ 'chapter_number': chapter_num,
+ 'chapter_id': chapter_id,
+ }
+ for section in sections:
+ section_id = str_or_none(section.get('id'))
+ if not section_id or section.get('contentType') != 'video':
+ continue
+ entry = {
+ '_type': 'url_transparent',
+ 'url': '/'.join([url, chapter_id, section_id]),
+ 'title': strip_or_none(section.get('title')),
+ 'description': clean_html(section.get('summary')),
+ 'thumbnail': metadata.get('coverImage'),
+ 'timestamp': unified_timestamp(metadata.get('publicationDate')),
+ 'ie_key': PacktPubIE.ie_key(),
+ }
+ entry.update(chapter_info)
+ entries.append(entry)
+
+ return self.playlist_result(
+ entries, course_id, metadata.get('title'),
+ clean_html(metadata.get('about')))
diff --git a/yt_dlp/extractor/palcomp3.py b/yt_dlp/extractor/palcomp3.py
new file mode 100644
index 0000000..4b0801c
--- /dev/null
+++ b/yt_dlp/extractor/palcomp3.py
@@ -0,0 +1,143 @@
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ int_or_none,
+ str_or_none,
+ try_get,
+)
+
+
+class PalcoMP3BaseIE(InfoExtractor):
+ _GQL_QUERY_TMPL = '''{
+ artist(slug: "%s") {
+ %s
+ }
+}'''
+ _ARTIST_FIELDS_TMPL = '''music(slug: "%%s") {
+ %s
+ }'''
+ _MUSIC_FIELDS = '''duration
+ hls
+ mp3File
+ musicID
+ plays
+ title'''
+
+ def _call_api(self, artist_slug, artist_fields):
+ return self._download_json(
+ 'https://www.palcomp3.com.br/graphql/', artist_slug, query={
+ 'query': self._GQL_QUERY_TMPL % (artist_slug, artist_fields),
+ })['data']
+
+ def _parse_music(self, music):
+ music_id = compat_str(music['musicID'])
+ title = music['title']
+
+ formats = []
+ hls_url = music.get('hls')
+ if hls_url:
+ formats.append({
+ 'url': hls_url,
+ 'protocol': 'm3u8_native',
+ 'ext': 'mp4',
+ })
+ mp3_file = music.get('mp3File')
+ if mp3_file:
+ formats.append({
+ 'url': mp3_file,
+ })
+
+ return {
+ 'id': music_id,
+ 'title': title,
+ 'formats': formats,
+ 'duration': int_or_none(music.get('duration')),
+ 'view_count': int_or_none(music.get('plays')),
+ }
+
+ def _real_initialize(self):
+ self._ARTIST_FIELDS_TMPL = self._ARTIST_FIELDS_TMPL % self._MUSIC_FIELDS
+
+ def _real_extract(self, url):
+ artist_slug, music_slug = self._match_valid_url(url).groups()
+ artist_fields = self._ARTIST_FIELDS_TMPL % music_slug
+ music = self._call_api(artist_slug, artist_fields)['artist']['music']
+ return self._parse_music(music)
+
+
+class PalcoMP3IE(PalcoMP3BaseIE):
+ IE_NAME = 'PalcoMP3:song'
+ _VALID_URL = r'https?://(?:www\.)?palcomp3\.com(?:\.br)?/(?P<artist>[^/]+)/(?P<id>[^/?&#]+)'
+ _TESTS = [{
+ 'url': 'https://www.palcomp3.com/maiaraemaraisaoficial/nossas-composicoes-cuida-bem-dela/',
+ 'md5': '99fd6405b2d8fd589670f6db1ba3b358',
+ 'info_dict': {
+ 'id': '3162927',
+ 'ext': 'mp3',
+ 'title': 'Nossas Composições - CUIDA BEM DELA',
+ 'duration': 210,
+ 'view_count': int,
+ }
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if PalcoMP3VideoIE.suitable(url) else super(PalcoMP3IE, cls).suitable(url)
+
+
+class PalcoMP3ArtistIE(PalcoMP3BaseIE):
+ IE_NAME = 'PalcoMP3:artist'
+ _VALID_URL = r'https?://(?:www\.)?palcomp3\.com(?:\.br)?/(?P<id>[^/?&#]+)'
+ _TESTS = [{
+ 'url': 'https://www.palcomp3.com.br/condedoforro/',
+ 'info_dict': {
+ 'id': '358396',
+ 'title': 'Conde do Forró',
+ },
+ 'playlist_mincount': 188,
+ }]
+ _ARTIST_FIELDS_TMPL = '''artistID
+ musics {
+ nodes {
+ %s
+ }
+ }
+ name'''
+
+ @classmethod
+ def suitable(cls, url):
+ return False if PalcoMP3IE._match_valid_url(url) else super(PalcoMP3ArtistIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ artist_slug = self._match_id(url)
+ artist = self._call_api(artist_slug, self._ARTIST_FIELDS_TMPL)['artist']
+
+ def entries():
+ for music in (try_get(artist, lambda x: x['musics']['nodes'], list) or []):
+ yield self._parse_music(music)
+
+ return self.playlist_result(
+ entries(), str_or_none(artist.get('artistID')), artist.get('name'))
+
+
+class PalcoMP3VideoIE(PalcoMP3BaseIE):
+ IE_NAME = 'PalcoMP3:video'
+ _VALID_URL = r'https?://(?:www\.)?palcomp3\.com(?:\.br)?/(?P<artist>[^/]+)/(?P<id>[^/?&#]+)/?#clipe'
+ _TESTS = [{
+ 'url': 'https://www.palcomp3.com/maiaraemaraisaoficial/maiara-e-maraisa-voce-faz-falta-aqui-ao-vivo-em-vicosa-mg/#clipe',
+ 'add_ie': ['Youtube'],
+ 'info_dict': {
+ 'id': '_pD1nR2qqPg',
+ 'ext': 'mp4',
+ 'title': 'Maiara e Maraisa - Você Faz Falta Aqui - DVD Ao Vivo Em Campo Grande',
+ 'description': 'md5:7043342c09a224598e93546e98e49282',
+ 'upload_date': '20161107',
+ 'uploader_id': 'maiaramaraisaoficial',
+ 'uploader': 'Maiara e Maraisa',
+ }
+ }]
+ _MUSIC_FIELDS = 'youtubeID'
+
+ def _parse_music(self, music):
+ youtube_id = music['youtubeID']
+ return self.url_result(youtube_id, 'Youtube', youtube_id)
diff --git a/yt_dlp/extractor/panopto.py b/yt_dlp/extractor/panopto.py
new file mode 100644
index 0000000..ddea32d
--- /dev/null
+++ b/yt_dlp/extractor/panopto.py
@@ -0,0 +1,600 @@
+import calendar
+import json
+import functools
+from datetime import datetime, timezone
+from random import random
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_urllib_parse_urlparse,
+ compat_urlparse
+)
+
+from ..utils import (
+ bug_reports_message,
+ ExtractorError,
+ get_first,
+ int_or_none,
+ OnDemandPagedList,
+ parse_qs,
+ srt_subtitles_timecode,
+ traverse_obj,
+)
+
+
+class PanoptoBaseIE(InfoExtractor):
+ BASE_URL_RE = r'(?P<base_url>https?://[\w.-]+\.panopto.(?:com|eu)/Panopto)'
+
+ # see panopto core.js
+ _SUB_LANG_MAPPING = {
+ 0: 'en-US',
+ 1: 'en-GB',
+ 2: 'es-MX',
+ 3: 'es-ES',
+ 4: 'de-DE',
+ 5: 'fr-FR',
+ 6: 'nl-NL',
+ 7: 'th-TH',
+ 8: 'zh-CN',
+ 9: 'zh-TW',
+ 10: 'ko-KR',
+ 11: 'ja-JP',
+ 12: 'ru-RU',
+ 13: 'pt-PT',
+ 14: 'pl-PL',
+ 15: 'en-AU',
+ 16: 'da-DK',
+ 17: 'fi-FI',
+ 18: 'hu-HU',
+ 19: 'nb-NO',
+ 20: 'sv-SE',
+ 21: 'it-IT'
+ }
+
+ def _call_api(self, base_url, path, video_id, data=None, fatal=True, **kwargs):
+ response = self._download_json(
+ base_url + path, video_id, data=json.dumps(data).encode('utf8') if data else None,
+ fatal=fatal, headers={'accept': 'application/json', 'content-type': 'application/json'}, **kwargs)
+ if not response:
+ return
+ error_code = traverse_obj(response, 'ErrorCode')
+ if error_code == 2:
+ self.raise_login_required(method='cookies')
+ elif error_code is not None:
+ msg = f'Panopto said: {response.get("ErrorMessage")}'
+ if fatal:
+ raise ExtractorError(msg, video_id=video_id, expected=True)
+ else:
+ self.report_warning(msg, video_id=video_id)
+ return response
+
+ @staticmethod
+ def _parse_fragment(url):
+ return {k: json.loads(v[0]) for k, v in compat_urlparse.parse_qs(compat_urllib_parse_urlparse(url).fragment).items()}
+
+
+class PanoptoIE(PanoptoBaseIE):
+ _VALID_URL = PanoptoBaseIE.BASE_URL_RE + r'/Pages/(Viewer|Embed)\.aspx.*(?:\?|&)id=(?P<id>[a-f0-9-]+)'
+ _EMBED_REGEX = [rf'<iframe[^>]+src=["\'](?P<url>{PanoptoBaseIE.BASE_URL_RE}/Pages/(Viewer|Embed|Sessions/List)\.aspx[^"\']+)']
+ _TESTS = [
+ {
+ 'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=26b3ae9e-4a48-4dcc-96ba-0befba08a0fb',
+ 'info_dict': {
+ 'id': '26b3ae9e-4a48-4dcc-96ba-0befba08a0fb',
+ 'title': 'Panopto for Business - Use Cases',
+ 'timestamp': 1459184200,
+ 'thumbnail': r're:https://demo\.hosted\.panopto\.com/.+',
+ 'upload_date': '20160328',
+ 'ext': 'mp4',
+ 'cast': [],
+ 'chapters': [],
+ 'duration': 88.17099999999999,
+ 'average_rating': int,
+ 'uploader_id': '2db6b718-47a0-4b0b-9e17-ab0b00f42b1e',
+ 'channel_id': 'e4c6a2fc-1214-4ca0-8fb7-aef2e29ff63a',
+ 'channel': 'Showcase Videos'
+ },
+ },
+ {
+ 'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=ed01b077-c9e5-4c7b-b8ff-15fa306d7a59',
+ 'info_dict': {
+ 'id': 'ed01b077-c9e5-4c7b-b8ff-15fa306d7a59',
+ 'title': 'Overcoming Top 4 Challenges of Enterprise Video',
+ 'uploader': 'Panopto Support',
+ 'timestamp': 1449409251,
+ 'thumbnail': r're:https://demo\.hosted\.panopto\.com/.+',
+ 'upload_date': '20151206',
+ 'ext': 'mp4',
+ 'chapters': 'count:12',
+ 'cast': ['Panopto Support'],
+ 'uploader_id': 'a96d1a31-b4de-489b-9eee-b4a5b414372c',
+ 'average_rating': int,
+ 'description': 'md5:4391837802b3fc856dadf630c4b375d1',
+ 'duration': 1088.2659999999998,
+ 'channel_id': '9f3c1921-43bb-4bda-8b3a-b8d2f05a8546',
+ 'channel': 'Webcasts',
+ },
+ },
+ {
+ # Extra params in URL
+ 'url': 'https://howtovideos.hosted.panopto.com/Panopto/Pages/Viewer.aspx?randomparam=thisisnotreal&id=5fa74e93-3d87-4694-b60e-aaa4012214ed&advance=true',
+ 'info_dict': {
+ 'id': '5fa74e93-3d87-4694-b60e-aaa4012214ed',
+ 'ext': 'mp4',
+ 'duration': 129.513,
+ 'cast': ['Kathryn Kelly'],
+ 'uploader_id': '316a0a58-7fa2-4cd9-be1c-64270d284a56',
+ 'timestamp': 1569845768,
+ 'tags': ['Viewer', 'Enterprise'],
+ 'chapters': [],
+ 'upload_date': '20190930',
+ 'thumbnail': r're:https://howtovideos\.hosted\.panopto\.com/.+',
+ 'description': 'md5:2d844aaa1b1a14ad0e2601a0993b431f',
+ 'title': 'Getting Started: View a Video',
+ 'average_rating': int,
+ 'uploader': 'Kathryn Kelly',
+ 'channel_id': 'fb93bc3c-6750-4b80-a05b-a921013735d3',
+ 'channel': 'Getting Started',
+ }
+ },
+ {
+ # Does not allow normal Viewer.aspx. AUDIO livestream has no url, so should be skipped and only give one stream.
+ 'url': 'https://unisa.au.panopto.com/Panopto/Pages/Embed.aspx?id=9d9a0fa3-e99a-4ebd-a281-aac2017f4da4',
+ 'info_dict': {
+ 'id': '9d9a0fa3-e99a-4ebd-a281-aac2017f4da4',
+ 'ext': 'mp4',
+ 'cast': ['LTS CLI Script'],
+ 'chapters': [],
+ 'duration': 2178.45,
+ 'description': 'md5:ee5cf653919f55b72bce2dbcf829c9fa',
+ 'channel_id': 'b23e673f-c287-4cb1-8344-aae9005a69f8',
+ 'average_rating': int,
+ 'uploader_id': '38377323-6a23-41e2-9ff6-a8e8004bf6f7',
+ 'uploader': 'LTS CLI Script',
+ 'timestamp': 1572458134,
+ 'title': 'WW2 Vets Interview 3 Ronald Stanley George',
+ 'thumbnail': r're:https://unisa\.au\.panopto\.com/.+',
+ 'channel': 'World War II Veteran Interviews',
+ 'upload_date': '20191030',
+ },
+ },
+ {
+ # Slides/storyboard
+ 'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=a7f12f1d-3872-4310-84b0-f8d8ab15326b',
+ 'info_dict': {
+ 'id': 'a7f12f1d-3872-4310-84b0-f8d8ab15326b',
+ 'ext': 'mhtml',
+ 'timestamp': 1448798857,
+ 'duration': 4712.681,
+ 'title': 'Cache Memory - CompSci 15-213, Lecture 12',
+ 'channel_id': 'e4c6a2fc-1214-4ca0-8fb7-aef2e29ff63a',
+ 'uploader_id': 'a96d1a31-b4de-489b-9eee-b4a5b414372c',
+ 'upload_date': '20151129',
+ 'average_rating': 0,
+ 'uploader': 'Panopto Support',
+ 'channel': 'Showcase Videos',
+ 'description': 'md5:55e51d54233ddb0e6c2ed388ca73822c',
+ 'cast': ['ISR Videographer', 'Panopto Support'],
+ 'chapters': 'count:28',
+ 'thumbnail': r're:https://demo\.hosted\.panopto\.com/.+',
+ },
+ 'params': {'format': 'mhtml', 'skip_download': True}
+ },
+ {
+ 'url': 'https://na-training-1.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=8285224a-9a2b-4957-84f2-acb0000c4ea9',
+ 'info_dict': {
+ 'id': '8285224a-9a2b-4957-84f2-acb0000c4ea9',
+ 'ext': 'mp4',
+ 'chapters': [],
+ 'title': 'Company Policy',
+ 'average_rating': 0,
+ 'timestamp': 1615058901,
+ 'channel': 'Human Resources',
+ 'tags': ['HumanResources'],
+ 'duration': 1604.243,
+ 'thumbnail': r're:https://na-training-1\.hosted\.panopto\.com/.+',
+ 'uploader_id': '8e8ba0a3-424f-40df-a4f1-ab3a01375103',
+ 'uploader': 'Cait M.',
+ 'upload_date': '20210306',
+ 'cast': ['Cait M.'],
+ 'subtitles': {'en-US': [{'ext': 'srt', 'data': 'md5:a3f4d25963fdeace838f327097c13265'}],
+ 'es-ES': [{'ext': 'srt', 'data': 'md5:57e9dad365fd0fbaf0468eac4949f189'}]},
+ },
+ 'params': {'writesubtitles': True, 'skip_download': True}
+ }, {
+ # On Panopto there are two subs: "Default" and en-US. en-US is blank and should be skipped.
+ 'url': 'https://na-training-1.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=940cbd41-f616-4a45-b13e-aaf1000c915b',
+ 'info_dict': {
+ 'id': '940cbd41-f616-4a45-b13e-aaf1000c915b',
+ 'ext': 'mp4',
+ 'subtitles': 'count:1',
+ 'title': 'HR Benefits Review Meeting*',
+ 'cast': ['Panopto Support'],
+ 'chapters': [],
+ 'timestamp': 1575024251,
+ 'thumbnail': r're:https://na-training-1\.hosted\.panopto\.com/.+',
+ 'channel': 'Zoom',
+ 'description': 'md5:04f90a9c2c68b7828144abfb170f0106',
+ 'uploader': 'Panopto Support',
+ 'average_rating': 0,
+ 'duration': 409.34499999999997,
+ 'uploader_id': 'b6ac04ad-38b8-4724-a004-a851004ea3df',
+ 'upload_date': '20191129',
+
+ },
+ 'params': {'writesubtitles': True, 'skip_download': True}
+ },
+ {
+ 'url': 'https://ucc.cloud.panopto.eu/Panopto/Pages/Viewer.aspx?id=0e8484a4-4ceb-4d98-a63f-ac0200b455cb',
+ 'only_matching': True
+ },
+ {
+ 'url': 'https://brown.hosted.panopto.com/Panopto/Pages/Embed.aspx?id=0b3ff73b-36a0-46c5-8455-aadf010a3638',
+ 'only_matching': True
+ },
+ ]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if PanoptoPlaylistIE.suitable(url) else super().suitable(url)
+
+ def _mark_watched(self, base_url, video_id, delivery_info):
+ duration = traverse_obj(delivery_info, ('Delivery', 'Duration'), expected_type=float)
+ invocation_id = delivery_info.get('InvocationId')
+ stream_id = traverse_obj(delivery_info, ('Delivery', 'Streams', ..., 'PublicID'), get_all=False, expected_type=str)
+ if invocation_id and stream_id and duration:
+ timestamp_str = f'/Date({calendar.timegm(datetime.now(timezone.utc).timetuple())}000)/'
+ data = {
+ 'streamRequests': [
+ {
+ 'ClientTimeStamp': timestamp_str,
+ 'ID': 0,
+ 'InvocationID': invocation_id,
+ 'PlaybackSpeed': 1,
+ 'SecondsListened': duration - 1,
+ 'SecondsRejected': 0,
+ 'StartPosition': 0,
+ 'StartReason': 2,
+ 'StopReason': None,
+ 'StreamID': stream_id,
+ 'TimeStamp': timestamp_str,
+ 'UpdatesRejected': 0
+ },
+ ]}
+
+ self._download_webpage(
+ base_url + '/Services/Analytics.svc/AddStreamRequests', video_id,
+ fatal=False, data=json.dumps(data).encode('utf8'), headers={'content-type': 'application/json'},
+ note='Marking watched', errnote='Unable to mark watched')
+
+ @staticmethod
+ def _extract_chapters(timestamps):
+ chapters = []
+ for timestamp in timestamps or []:
+ caption = timestamp.get('Caption')
+ start, duration = int_or_none(timestamp.get('Time')), int_or_none(timestamp.get('Duration'))
+ if not caption or start is None or duration is None:
+ continue
+ chapters.append({
+ 'start_time': start,
+ 'end_time': start + duration,
+ 'title': caption
+ })
+ return chapters
+
+ @staticmethod
+ def _extract_mhtml_formats(base_url, timestamps):
+ image_frags = {}
+ for timestamp in timestamps or []:
+ duration = timestamp.get('Duration')
+ obj_id, obj_sn = timestamp.get('ObjectIdentifier'), timestamp.get('ObjectSequenceNumber'),
+ if timestamp.get('EventTargetType') == 'PowerPoint' and obj_id is not None and obj_sn is not None:
+ image_frags.setdefault('slides', []).append({
+ 'url': base_url + f'/Pages/Viewer/Image.aspx?id={obj_id}&number={obj_sn}',
+ 'duration': duration
+ })
+
+ obj_pid, session_id, abs_time = timestamp.get('ObjectPublicIdentifier'), timestamp.get('SessionID'), timestamp.get('AbsoluteTime')
+ if None not in (obj_pid, session_id, abs_time):
+ image_frags.setdefault('chapter', []).append({
+ 'url': base_url + f'/Pages/Viewer/Thumb.aspx?eventTargetPID={obj_pid}&sessionPID={session_id}&number={obj_sn}&isPrimary=false&absoluteTime={abs_time}',
+ 'duration': duration,
+ })
+ for name, fragments in image_frags.items():
+ yield {
+ 'format_id': name,
+ 'ext': 'mhtml',
+ 'protocol': 'mhtml',
+ 'acodec': 'none',
+ 'vcodec': 'none',
+ 'url': 'about:invalid',
+ 'fragments': fragments
+ }
+
+ @staticmethod
+ def _json2srt(data, delivery):
+ def _gen_lines():
+ for i, line in enumerate(data):
+ start_time = line['Time']
+ duration = line.get('Duration')
+ if duration:
+ end_time = start_time + duration
+ else:
+ end_time = traverse_obj(data, (i + 1, 'Time')) or delivery['Duration']
+ yield f'{i + 1}\n{srt_subtitles_timecode(start_time)} --> {srt_subtitles_timecode(end_time)}\n{line["Caption"]}'
+ return '\n\n'.join(_gen_lines())
+
+ def _get_subtitles(self, base_url, video_id, delivery):
+ subtitles = {}
+ for lang in delivery.get('AvailableLanguages') or []:
+ response = self._call_api(
+ base_url, '/Pages/Viewer/DeliveryInfo.aspx', video_id, fatal=False,
+ note='Downloading captions JSON metadata', query={
+ 'deliveryId': video_id,
+ 'getCaptions': True,
+ 'language': str(lang),
+ 'responseType': 'json'
+ }
+ )
+ if not isinstance(response, list):
+ continue
+ subtitles.setdefault(self._SUB_LANG_MAPPING.get(lang) or 'default', []).append({
+ 'ext': 'srt',
+ 'data': self._json2srt(response, delivery),
+ })
+ return subtitles
+
+ def _extract_streams_formats_and_subtitles(self, video_id, streams, **fmt_kwargs):
+ formats = []
+ subtitles = {}
+ for stream in streams or []:
+ stream_formats = []
+ http_stream_url = stream.get('StreamHttpUrl')
+ stream_url = stream.get('StreamUrl')
+
+ if http_stream_url:
+ stream_formats.append({'url': http_stream_url})
+
+ if stream_url:
+ media_type = stream.get('ViewerMediaFileTypeName')
+ if media_type in ('hls', ):
+ m3u8_formats, stream_subtitles = self._extract_m3u8_formats_and_subtitles(stream_url, video_id)
+ stream_formats.extend(m3u8_formats)
+ subtitles = self._merge_subtitles(subtitles, stream_subtitles)
+ else:
+ stream_formats.append({
+ 'url': stream_url
+ })
+ for fmt in stream_formats:
+ fmt.update({
+ 'format_note': stream.get('Tag'),
+ **fmt_kwargs
+ })
+ formats.extend(stream_formats)
+
+ return formats, subtitles
+
+ def _real_extract(self, url):
+ base_url, video_id = self._match_valid_url(url).group('base_url', 'id')
+ delivery_info = self._call_api(
+ base_url, '/Pages/Viewer/DeliveryInfo.aspx', video_id,
+ query={
+ 'deliveryId': video_id,
+ 'invocationId': '',
+ 'isLiveNotes': 'false',
+ 'refreshAuthCookie': 'true',
+ 'isActiveBroadcast': 'false',
+ 'isEditing': 'false',
+ 'isKollectiveAgentInstalled': 'false',
+ 'isEmbed': 'false',
+ 'responseType': 'json',
+ }
+ )
+
+ delivery = delivery_info['Delivery']
+ session_start_time = int_or_none(delivery.get('SessionStartTime'))
+ timestamps = delivery.get('Timestamps')
+
+ # Podcast stream is usually the combined streams. We will prefer that by default.
+ podcast_formats, podcast_subtitles = self._extract_streams_formats_and_subtitles(
+ video_id, delivery.get('PodcastStreams'), format_note='PODCAST')
+
+ streams_formats, streams_subtitles = self._extract_streams_formats_and_subtitles(
+ video_id, delivery.get('Streams'), preference=-10)
+
+ formats = podcast_formats + streams_formats
+ formats.extend(self._extract_mhtml_formats(base_url, timestamps))
+ subtitles = self._merge_subtitles(
+ podcast_subtitles, streams_subtitles, self.extract_subtitles(base_url, video_id, delivery))
+
+ self.mark_watched(base_url, video_id, delivery_info)
+
+ return {
+ 'id': video_id,
+ 'title': delivery.get('SessionName'),
+ 'cast': traverse_obj(delivery, ('Contributors', ..., 'DisplayName'), expected_type=lambda x: x or None),
+ 'timestamp': session_start_time - 11640000000 if session_start_time else None,
+ 'duration': delivery.get('Duration'),
+ 'thumbnail': base_url + f'/Services/FrameGrabber.svc/FrameRedirect?objectId={video_id}&mode=Delivery&random={random()}',
+ 'average_rating': delivery.get('AverageRating'),
+ 'chapters': self._extract_chapters(timestamps),
+ 'uploader': delivery.get('OwnerDisplayName') or None,
+ 'uploader_id': delivery.get('OwnerId'),
+ 'description': delivery.get('SessionAbstract'),
+ 'tags': traverse_obj(delivery, ('Tags', ..., 'Content')),
+ 'channel_id': delivery.get('SessionGroupPublicID'),
+ 'channel': traverse_obj(delivery, 'SessionGroupLongName', 'SessionGroupShortName', get_all=False),
+ 'formats': formats,
+ 'subtitles': subtitles
+ }
+
+
+class PanoptoPlaylistIE(PanoptoBaseIE):
+ _VALID_URL = PanoptoBaseIE.BASE_URL_RE + r'/Pages/(Viewer|Embed)\.aspx.*(?:\?|&)pid=(?P<id>[a-f0-9-]+)'
+ _TESTS = [
+ {
+ 'url': 'https://howtovideos.hosted.panopto.com/Panopto/Pages/Viewer.aspx?pid=f3b39fcf-882f-4849-93d6-a9f401236d36&id=5fa74e93-3d87-4694-b60e-aaa4012214ed&advance=true',
+ 'info_dict': {
+ 'title': 'Featured Video Tutorials',
+ 'id': 'f3b39fcf-882f-4849-93d6-a9f401236d36',
+ 'description': '',
+ },
+ 'playlist_mincount': 36
+ },
+ {
+ 'url': 'https://utsa.hosted.panopto.com/Panopto/Pages/Viewer.aspx?pid=e2900555-3ad4-4bdb-854d-ad2401686190',
+ 'info_dict': {
+ 'title': 'Library Website Introduction Playlist',
+ 'id': 'e2900555-3ad4-4bdb-854d-ad2401686190',
+ 'description': 'md5:f958bca50a1cbda15fdc1e20d32b3ecb',
+ },
+ 'playlist_mincount': 4
+ },
+
+ ]
+
+ def _entries(self, base_url, playlist_id, session_list_id):
+ session_list_info = self._call_api(
+ base_url, f'/Api/SessionLists/{session_list_id}?collections[0].maxCount=500&collections[0].name=items', playlist_id)
+
+ items = session_list_info['Items']
+ for item in items:
+ if item.get('TypeName') != 'Session':
+ self.report_warning('Got an item in the playlist that is not a Session' + bug_reports_message(), only_once=True)
+ continue
+ yield {
+ '_type': 'url',
+ 'id': item.get('Id'),
+ 'url': item.get('ViewerUri'),
+ 'title': item.get('Name'),
+ 'description': item.get('Description'),
+ 'duration': item.get('Duration'),
+ 'channel': traverse_obj(item, ('Parent', 'Name')),
+ 'channel_id': traverse_obj(item, ('Parent', 'Id'))
+ }
+
+ def _real_extract(self, url):
+ base_url, playlist_id = self._match_valid_url(url).group('base_url', 'id')
+
+ video_id = get_first(parse_qs(url), 'id')
+ if video_id:
+ if self.get_param('noplaylist'):
+ self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
+ return self.url_result(base_url + f'/Pages/Viewer.aspx?id={video_id}', ie_key=PanoptoIE.ie_key(), video_id=video_id)
+ else:
+ self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')
+
+ playlist_info = self._call_api(base_url, f'/Api/Playlists/{playlist_id}', playlist_id)
+ return self.playlist_result(
+ self._entries(base_url, playlist_id, playlist_info['SessionListId']),
+ playlist_id=playlist_id, playlist_title=playlist_info.get('Name'),
+ playlist_description=playlist_info.get('Description'))
+
+
+class PanoptoListIE(PanoptoBaseIE):
+ _VALID_URL = PanoptoBaseIE.BASE_URL_RE + r'/Pages/Sessions/List\.aspx'
+ _PAGE_SIZE = 250
+ _TESTS = [
+ {
+ 'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Sessions/List.aspx#folderID=%22e4c6a2fc-1214-4ca0-8fb7-aef2e29ff63a%22',
+ 'info_dict': {
+ 'id': 'e4c6a2fc-1214-4ca0-8fb7-aef2e29ff63a',
+ 'title': 'Showcase Videos'
+ },
+ 'playlist_mincount': 140
+
+ },
+ {
+ 'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Sessions/List.aspx#view=2&maxResults=250',
+ 'info_dict': {
+ 'id': 'panopto_list',
+ 'title': 'panopto_list'
+ },
+ 'playlist_mincount': 300
+ },
+ {
+ # Folder that contains 8 folders and a playlist
+ 'url': 'https://howtovideos.hosted.panopto.com/Panopto/Pages/Sessions/List.aspx?noredirect=true#folderID=%224b9de7ae-0080-4158-8496-a9ba01692c2e%22',
+ 'info_dict': {
+ 'id': '4b9de7ae-0080-4158-8496-a9ba01692c2e',
+ 'title': 'Video Tutorials'
+ },
+ 'playlist_mincount': 9
+ }
+
+ ]
+
+ def _fetch_page(self, base_url, query_params, display_id, page):
+
+ params = {
+ 'sortColumn': 1,
+ 'getFolderData': True,
+ 'includePlaylists': True,
+ **query_params,
+ 'page': page,
+ 'maxResults': self._PAGE_SIZE,
+ }
+
+ response = self._call_api(
+ base_url, '/Services/Data.svc/GetSessions', f'{display_id} page {page + 1}',
+ data={'queryParameters': params}, fatal=False)
+
+ for result in get_first(response, 'Results', default=[]):
+ # This could be a video, playlist (or maybe something else)
+ item_id = result.get('DeliveryID')
+ yield {
+ '_type': 'url',
+ 'id': item_id,
+ 'title': result.get('SessionName'),
+ 'url': traverse_obj(result, 'ViewerUrl', 'EmbedUrl', get_all=False) or (base_url + f'/Pages/Viewer.aspx?id={item_id}'),
+ 'duration': result.get('Duration'),
+ 'channel': result.get('FolderName'),
+ 'channel_id': result.get('FolderID'),
+ }
+
+ for folder in get_first(response, 'Subfolders', default=[]):
+ folder_id = folder.get('ID')
+ yield self.url_result(
+ base_url + f'/Pages/Sessions/List.aspx#folderID="{folder_id}"',
+ ie_key=PanoptoListIE.ie_key(), video_id=folder_id, title=folder.get('Name'))
+
+ def _extract_folder_metadata(self, base_url, folder_id):
+ response = self._call_api(
+ base_url, '/Services/Data.svc/GetFolderInfo', folder_id,
+ data={'folderID': folder_id}, fatal=False)
+ return {
+ 'title': get_first(response, 'Name')
+ }
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ base_url = mobj.group('base_url')
+
+ query_params = self._parse_fragment(url)
+ folder_id, display_id = query_params.get('folderID'), 'panopto_list'
+
+ if query_params.get('isSubscriptionsPage'):
+ display_id = 'subscriptions'
+ if not query_params.get('subscribableTypes'):
+ query_params['subscribableTypes'] = [0, 1, 2]
+ elif query_params.get('isSharedWithMe'):
+ display_id = 'sharedwithme'
+ elif folder_id:
+ display_id = folder_id
+
+ query = query_params.get('query')
+ if query:
+ display_id += f': query "{query}"'
+
+ info = {
+ '_type': 'playlist',
+ 'id': display_id,
+ 'title': display_id,
+ }
+ if folder_id:
+ info.update(self._extract_folder_metadata(base_url, folder_id))
+
+ info['entries'] = OnDemandPagedList(
+ functools.partial(self._fetch_page, base_url, query_params, display_id), self._PAGE_SIZE)
+
+ return info
diff --git a/yt_dlp/extractor/paramountplus.py b/yt_dlp/extractor/paramountplus.py
new file mode 100644
index 0000000..7e472a6
--- /dev/null
+++ b/yt_dlp/extractor/paramountplus.py
@@ -0,0 +1,201 @@
+import itertools
+
+from .common import InfoExtractor
+from .cbs import CBSBaseIE
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ url_or_none,
+)
+
+
+class ParamountPlusIE(CBSBaseIE):
+ _VALID_URL = r'''(?x)
+ (?:
+ paramountplus:|
+ https?://(?:www\.)?(?:
+ paramountplus\.com/(?:shows|movies)/(?:video|[^/]+/video|[^/]+)/
+ )(?P<id>[\w-]+))'''
+
+ # All tests are blocked outside US
+ _TESTS = [{
+ 'url': 'https://www.paramountplus.com/shows/video/Oe44g5_NrlgiZE3aQVONleD6vXc8kP0k/',
+ 'info_dict': {
+ 'id': 'Oe44g5_NrlgiZE3aQVONleD6vXc8kP0k',
+ 'ext': 'mp4',
+ 'title': 'CatDog - Climb Every CatDog/The Canine Mutiny',
+ 'description': 'md5:7ac835000645a69933df226940e3c859',
+ 'duration': 1426,
+ 'timestamp': 920264400,
+ 'upload_date': '19990301',
+ 'uploader': 'CBSI-NEW',
+ 'episode_number': 5,
+ 'thumbnail': r're:https?://.+\.jpg$',
+ 'season': 'Season 2',
+ 'chapters': 'count:3',
+ 'episode': 'Episode 5',
+ 'season_number': 2,
+ 'series': 'CatDog',
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ }, {
+ 'url': 'https://www.paramountplus.com/shows/video/6hSWYWRrR9EUTz7IEe5fJKBhYvSUfexd/',
+ 'info_dict': {
+ 'id': '6hSWYWRrR9EUTz7IEe5fJKBhYvSUfexd',
+ 'ext': 'mp4',
+ 'title': '7/23/21 WEEK IN REVIEW (Rep. Jahana Hayes/Howard Fineman/Sen. Michael Bennet/Sheera Frenkel & Cecilia Kang)',
+ 'description': 'md5:f4adcea3e8b106192022e121f1565bae',
+ 'duration': 2506,
+ 'timestamp': 1627063200,
+ 'upload_date': '20210723',
+ 'uploader': 'CBSI-NEW',
+ 'episode_number': 81,
+ 'thumbnail': r're:https?://.+\.jpg$',
+ 'season': 'Season 2',
+ 'chapters': 'count:4',
+ 'episode': 'Episode 81',
+ 'season_number': 2,
+ 'series': 'Tooning Out The News',
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ }, {
+ 'url': 'https://www.paramountplus.com/movies/video/vM2vm0kE6vsS2U41VhMRKTOVHyQAr6pC/',
+ 'info_dict': {
+ 'id': 'vM2vm0kE6vsS2U41VhMRKTOVHyQAr6pC',
+ 'ext': 'mp4',
+ 'title': 'Daddy\'s Home',
+ 'upload_date': '20151225',
+ 'description': 'md5:9a6300c504d5e12000e8707f20c54745',
+ 'uploader': 'CBSI-NEW',
+ 'timestamp': 1451030400,
+ 'thumbnail': r're:https?://.+\.jpg$',
+ 'chapters': 'count:0',
+ 'duration': 5761,
+ 'series': 'Paramount+ Movies',
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ 'skip': 'DRM',
+ }, {
+ 'url': 'https://www.paramountplus.com/movies/video/5EKDXPOzdVf9voUqW6oRuocyAEeJGbEc/',
+ 'info_dict': {
+ 'id': '5EKDXPOzdVf9voUqW6oRuocyAEeJGbEc',
+ 'ext': 'mp4',
+ 'uploader': 'CBSI-NEW',
+ 'description': 'md5:bc7b6fea84ba631ef77a9bda9f2ff911',
+ 'timestamp': 1577865600,
+ 'title': 'Sonic the Hedgehog',
+ 'upload_date': '20200101',
+ 'thumbnail': r're:https?://.+\.jpg$',
+ 'chapters': 'count:0',
+ 'duration': 5932,
+ 'series': 'Paramount+ Movies',
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ 'skip': 'DRM',
+ }, {
+ 'url': 'https://www.paramountplus.com/shows/the-real-world/video/mOVeHeL9ub9yWdyzSZFYz8Uj4ZBkVzQg/the-real-world-reunion/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.paramountplus.com/shows/video/mOVeHeL9ub9yWdyzSZFYz8Uj4ZBkVzQg/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.paramountplus.com/movies/video/W0VyStQqUnqKzJkrpSAIARuCc9YuYGNy/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.paramountplus.com/movies/paw-patrol-the-movie/W0VyStQqUnqKzJkrpSAIARuCc9YuYGNy/',
+ 'only_matching': True,
+ }]
+
+ def _extract_video_info(self, content_id, mpx_acc=2198311517):
+ items_data = self._download_json(
+ f'https://www.paramountplus.com/apps-api/v2.0/androidtv/video/cid/{content_id}.json',
+ content_id, query={
+ 'locale': 'en-us',
+ 'at': 'ABCXgPuoStiPipsK0OHVXIVh68zNys+G4f7nW9R6qH68GDOcneW6Kg89cJXGfiQCsj0=',
+ }, headers=self.geo_verification_headers())
+
+ asset_types = {
+ item.get('assetType'): {
+ 'format': 'SMIL',
+ 'formats': 'M3U+none,MPEG4', # '+none' specifies ProtectionScheme (no DRM)
+ } for item in items_data['itemList']
+ }
+ item = items_data['itemList'][-1]
+
+ info, error = {}, None
+ metadata = {
+ 'title': item.get('title'),
+ 'series': item.get('seriesTitle'),
+ 'season_number': int_or_none(item.get('seasonNum')),
+ 'episode_number': int_or_none(item.get('episodeNum')),
+ 'duration': int_or_none(item.get('duration')),
+ 'thumbnail': url_or_none(item.get('thumbnail')),
+ }
+ try:
+ info = self._extract_common_video_info(content_id, asset_types, mpx_acc, extra_info=metadata)
+ except ExtractorError as e:
+ error = e
+
+ # Check for DRM formats to give appropriate error
+ if not info.get('formats'):
+ for query in asset_types.values():
+ query['formats'] = 'MPEG-DASH,M3U,MPEG4' # allows DRM formats
+
+ try:
+ drm_info = self._extract_common_video_info(content_id, asset_types, mpx_acc, extra_info=metadata)
+ except ExtractorError:
+ if error:
+ raise error from None
+ raise
+ if drm_info['formats']:
+ self.report_drm(content_id)
+ elif error:
+ raise error
+
+ return info
+
+
+class ParamountPlusSeriesIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?paramountplus\.com/shows/(?P<id>[a-zA-Z0-9-_]+)/?(?:[#?]|$)'
+ _TESTS = [{
+ 'url': 'https://www.paramountplus.com/shows/drake-josh',
+ 'playlist_mincount': 50,
+ 'info_dict': {
+ 'id': 'drake-josh',
+ }
+ }, {
+ 'url': 'https://www.paramountplus.com/shows/hawaii_five_0/',
+ 'playlist_mincount': 240,
+ 'info_dict': {
+ 'id': 'hawaii_five_0',
+ }
+ }, {
+ 'url': 'https://www.paramountplus.com/shows/spongebob-squarepants/',
+ 'playlist_mincount': 248,
+ 'info_dict': {
+ 'id': 'spongebob-squarepants',
+ }
+ }]
+
+ def _entries(self, show_name):
+ for page in itertools.count():
+ show_json = self._download_json(
+ f'https://www.paramountplus.com/shows/{show_name}/xhr/episodes/page/{page}/size/50/xs/0/season/0', show_name)
+ if not show_json.get('success'):
+ return
+ for episode in show_json['result']['data']:
+ yield self.url_result(
+ 'https://www.paramountplus.com%s' % episode['url'],
+ ie=ParamountPlusIE.ie_key(), video_id=episode['content_id'])
+
+ def _real_extract(self, url):
+ show_name = self._match_id(url)
+ return self.playlist_result(self._entries(show_name), playlist_id=show_name)
diff --git a/yt_dlp/extractor/parler.py b/yt_dlp/extractor/parler.py
new file mode 100644
index 0000000..563012f
--- /dev/null
+++ b/yt_dlp/extractor/parler.py
@@ -0,0 +1,91 @@
+import functools
+
+from .common import InfoExtractor
+from .youtube import YoutubeIE
+from ..utils import (
+ clean_html,
+ int_or_none,
+ strip_or_none,
+ traverse_obj,
+ unified_timestamp,
+ urljoin,
+)
+
+
+class ParlerIE(InfoExtractor):
+ IE_DESC = 'Posts on parler.com'
+ _VALID_URL = r'https?://parler\.com/feed/(?P<id>[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12})'
+ _TESTS = [
+ {
+ 'url': 'https://parler.com/feed/df79fdba-07cc-48fe-b085-3293897520d7',
+ 'md5': '16e0f447bf186bb3cf64de5bbbf4d22d',
+ 'info_dict': {
+ 'id': 'df79fdba-07cc-48fe-b085-3293897520d7',
+ 'ext': 'mp4',
+ 'thumbnail': 'https://bl-images.parler.com/videos/6ce7cdf3-a27a-4d72-bf9c-d3e17ce39a66/thumbnail.jpeg',
+ 'title': 'Parler video #df79fdba-07cc-48fe-b085-3293897520d7',
+ 'description': 'md5:6f220bde2df4a97cbb89ac11f1fd8197',
+ 'timestamp': 1659785481,
+ 'upload_date': '20220806',
+ 'uploader': 'Tulsi Gabbard',
+ 'uploader_id': 'TulsiGabbard',
+ 'uploader_url': 'https://parler.com/TulsiGabbard',
+ 'view_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
+ },
+ },
+ {
+ 'url': 'https://parler.com/feed/f23b85c1-6558-470f-b9ff-02c145f28da5',
+ 'md5': 'eaba1ff4a10fe281f5ce74e930ab2cb4',
+ 'info_dict': {
+ 'id': 'r5vkSaz8PxQ',
+ 'ext': 'mp4',
+ 'live_status': 'not_live',
+ 'comment_count': int,
+ 'duration': 1267,
+ 'like_count': int,
+ 'channel_follower_count': int,
+ 'channel_id': 'UCox6YeMSY1PQInbCtTaZj_w',
+ 'upload_date': '20220716',
+ 'thumbnail': 'https://i.ytimg.com/vi/r5vkSaz8PxQ/maxresdefault.jpg',
+ 'tags': 'count:17',
+ 'availability': 'public',
+ 'categories': ['Entertainment'],
+ 'playable_in_embed': True,
+ 'channel': 'Who Knows What! With Mahesh & Friends',
+ 'title': 'Tom MacDonald Names Reaction',
+ 'uploader': 'Who Knows What! With Mahesh & Friends',
+ 'uploader_id': '@maheshchookolingo',
+ 'age_limit': 0,
+ 'description': 'md5:33c21f0d35ae6dc2edf3007d6696baea',
+ 'channel_url': 'https://www.youtube.com/channel/UCox6YeMSY1PQInbCtTaZj_w',
+ 'view_count': int,
+ 'uploader_url': 'http://www.youtube.com/@maheshchookolingo',
+ },
+ },
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ data = self._download_json(f'https://api.parler.com/v0/public/parleys/{video_id}',
+ video_id)['data']
+ if data.get('link'):
+ return self.url_result(data['link'], YoutubeIE)
+
+ return {
+ 'id': video_id,
+ 'title': strip_or_none(data.get('title')) or '',
+ **traverse_obj(data, {
+ 'url': ('video', 'videoSrc'),
+ 'thumbnail': ('video', 'thumbnailUrl'),
+ 'description': ('body', {clean_html}),
+ 'timestamp': ('date_created', {unified_timestamp}),
+ 'uploader': ('user', 'name', {strip_or_none}),
+ 'uploader_id': ('user', 'username', {str}),
+ 'uploader_url': ('user', 'username', {functools.partial(urljoin, 'https://parler.com/')}),
+ 'view_count': ('views', {int_or_none}),
+ 'comment_count': ('total_comments', {int_or_none}),
+ 'repost_count': ('echos', {int_or_none}),
+ })
+ }
diff --git a/yt_dlp/extractor/parlview.py b/yt_dlp/extractor/parlview.py
new file mode 100644
index 0000000..777b008
--- /dev/null
+++ b/yt_dlp/extractor/parlview.py
@@ -0,0 +1,64 @@
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ int_or_none,
+ try_get,
+ unified_timestamp,
+)
+
+
+class ParlviewIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:www\.)?parlview\.aph\.gov\.au/(?:[^/]+)?\bvideoID=(?P<id>\d{6})'
+ _TESTS = [{
+ 'url': 'https://parlview.aph.gov.au/mediaPlayer.php?videoID=542661',
+ 'info_dict': {
+ 'id': '542661',
+ 'ext': 'mp4',
+ 'title': "Australia's Family Law System [Part 2]",
+ 'duration': 5799,
+ 'description': 'md5:7099883b391619dbae435891ca871a62',
+ 'timestamp': 1621430700,
+ 'upload_date': '20210519',
+ 'uploader': 'Joint Committee',
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'https://parlview.aph.gov.au/mediaPlayer.php?videoID=539936',
+ 'only_matching': True,
+ }]
+ _API_URL = 'https://parlview.aph.gov.au/api_v3/1/playback/getUniversalPlayerConfig?videoID=%s&format=json'
+ _MEDIA_INFO_URL = 'https://parlview.aph.gov.au/ajaxPlayer.php?videoID=%s&tabNum=4&action=loadTab'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ media = self._download_json(self._API_URL % video_id, video_id).get('media')
+ timestamp = try_get(media, lambda x: x['timeMap']['source']['timecode_offsets'][0], compat_str) or '/'
+
+ stream = try_get(media, lambda x: x['renditions'][0], dict)
+ if not stream:
+ self.raise_no_formats('No streams were detected')
+ elif stream.get('streamType') != 'VOD':
+ self.raise_no_formats('Unknown type of stream was detected: "%s"' % str(stream.get('streamType')))
+ formats = self._extract_m3u8_formats(stream['url'], video_id, 'mp4', 'm3u8_native')
+
+ media_info = self._download_webpage(
+ self._MEDIA_INFO_URL % video_id, video_id, note='Downloading media info', fatal=False)
+
+ return {
+ 'id': video_id,
+ 'url': url,
+ 'title': self._html_search_regex(r'<h2>([^<]+)<', webpage, 'title', fatal=False),
+ 'formats': formats,
+ 'duration': int_or_none(media.get('duration')),
+ 'timestamp': unified_timestamp(timestamp.split('/', 1)[1].replace('_', ' ')),
+ 'description': self._html_search_regex(
+ r'<div[^>]+class="descripti?on"[^>]*>[^>]+<strong>[^>]+>[^>]+>([^<]+)',
+ webpage, 'description', fatal=False),
+ 'uploader': self._html_search_regex(
+ r'<td>[^>]+>Channel:[^>]+>([^<]+)', media_info, 'channel', fatal=False),
+ 'thumbnail': media.get('staticImage'),
+ }
diff --git a/yt_dlp/extractor/patreon.py b/yt_dlp/extractor/patreon.py
new file mode 100644
index 0000000..d2ddb72
--- /dev/null
+++ b/yt_dlp/extractor/patreon.py
@@ -0,0 +1,454 @@
+import itertools
+
+from .common import InfoExtractor
+from .vimeo import VimeoIE
+from ..compat import compat_urllib_parse_unquote
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ KNOWN_EXTENSIONS,
+ ExtractorError,
+ clean_html,
+ determine_ext,
+ int_or_none,
+ mimetype2ext,
+ parse_iso8601,
+ str_or_none,
+ traverse_obj,
+ try_get,
+ url_or_none,
+ urljoin,
+)
+
+
+class PatreonBaseIE(InfoExtractor):
+ USER_AGENT = 'Patreon/7.6.28 (Android; Android 11; Scale/2.10)'
+
+ def _call_api(self, ep, item_id, query=None, headers=None, fatal=True, note=None):
+ if headers is None:
+ headers = {}
+ if 'User-Agent' not in headers:
+ headers['User-Agent'] = self.USER_AGENT
+ if query:
+ query.update({'json-api-version': 1.0})
+
+ try:
+ return self._download_json(
+ f'https://www.patreon.com/api/{ep}',
+ item_id, note='Downloading API JSON' if not note else note,
+ query=query, fatal=fatal, headers=headers)
+ except ExtractorError as e:
+ if not isinstance(e.cause, HTTPError) or mimetype2ext(e.cause.response.headers.get('Content-Type')) != 'json':
+ raise
+ err_json = self._parse_json(self._webpage_read_content(e.cause.response, None, item_id), item_id, fatal=False)
+ err_message = traverse_obj(err_json, ('errors', ..., 'detail'), get_all=False)
+ if err_message:
+ raise ExtractorError(f'Patreon said: {err_message}', expected=True)
+ raise
+
+
+class PatreonIE(PatreonBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?patreon\.com/(?:creation\?hid=|posts/(?:[\w-]+-)?)(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://www.patreon.com/creation?hid=743933',
+ 'md5': 'e25505eec1053a6e6813b8ed369875cc',
+ 'info_dict': {
+ 'id': '743933',
+ 'ext': 'mp3',
+ 'title': 'Episode 166: David Smalley of Dogma Debate',
+ 'description': 'md5:34d207dd29aa90e24f1b3f58841b81c7',
+ 'uploader': 'Cognitive Dissonance Podcast',
+ 'thumbnail': 're:^https?://.*$',
+ 'timestamp': 1406473987,
+ 'upload_date': '20140727',
+ 'uploader_id': '87145',
+ 'like_count': int,
+ 'comment_count': int,
+ 'uploader_url': 'https://www.patreon.com/dissonancepod',
+ 'channel_id': '80642',
+ 'channel_url': 'https://www.patreon.com/dissonancepod',
+ 'channel_follower_count': int,
+ },
+ }, {
+ 'url': 'http://www.patreon.com/creation?hid=754133',
+ 'md5': '3eb09345bf44bf60451b8b0b81759d0a',
+ 'info_dict': {
+ 'id': '754133',
+ 'ext': 'mp3',
+ 'title': 'CD 167 Extra',
+ 'uploader': 'Cognitive Dissonance Podcast',
+ 'thumbnail': 're:^https?://.*$',
+ 'like_count': int,
+ 'comment_count': int,
+ 'uploader_url': 'https://www.patreon.com/dissonancepod',
+ },
+ 'skip': 'Patron-only content',
+ }, {
+ 'url': 'https://www.patreon.com/creation?hid=1682498',
+ 'info_dict': {
+ 'id': 'SU4fj_aEMVw',
+ 'ext': 'mp4',
+ 'title': 'I\'m on Patreon!',
+ 'uploader': 'TraciJHines',
+ 'thumbnail': 're:^https?://.*$',
+ 'upload_date': '20150211',
+ 'description': 'md5:8af6425f50bd46fbf29f3db0fc3a8364',
+ 'uploader_id': 'TraciJHines',
+ 'categories': ['Entertainment'],
+ 'duration': 282,
+ 'view_count': int,
+ 'tags': 'count:39',
+ 'age_limit': 0,
+ 'channel': 'TraciJHines',
+ 'channel_url': 'https://www.youtube.com/channel/UCGLim4T2loE5rwCMdpCIPVg',
+ 'live_status': 'not_live',
+ 'like_count': int,
+ 'channel_id': 'UCGLim4T2loE5rwCMdpCIPVg',
+ 'availability': 'public',
+ 'channel_follower_count': int,
+ 'playable_in_embed': True,
+ 'uploader_url': 'http://www.youtube.com/user/TraciJHines',
+ 'comment_count': int,
+ },
+ 'params': {
+ 'noplaylist': True,
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'https://www.patreon.com/posts/episode-166-of-743933',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.patreon.com/posts/743933',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.patreon.com/posts/kitchen-as-seen-51706779',
+ 'md5': '96656690071f6d64895866008484251b',
+ 'info_dict': {
+ 'id': '555089736',
+ 'ext': 'mp4',
+ 'title': 'KITCHEN AS SEEN ON DEEZ NUTS EXTENDED!',
+ 'uploader': 'Cold Ones',
+ 'thumbnail': 're:^https?://.*$',
+ 'upload_date': '20210526',
+ 'description': 'md5:557a409bd79d3898689419094934ba79',
+ 'uploader_id': '14936315',
+ },
+ 'skip': 'Patron-only content'
+ }, {
+ # m3u8 video (https://github.com/yt-dlp/yt-dlp/issues/2277)
+ 'url': 'https://www.patreon.com/posts/video-sketchbook-32452882',
+ 'info_dict': {
+ 'id': '32452882',
+ 'ext': 'mp4',
+ 'comment_count': int,
+ 'uploader_id': '4301314',
+ 'like_count': int,
+ 'timestamp': 1576696962,
+ 'upload_date': '20191218',
+ 'thumbnail': r're:^https?://.*$',
+ 'uploader_url': 'https://www.patreon.com/loish',
+ 'description': 'md5:e2693e97ee299c8ece47ffdb67e7d9d2',
+ 'title': 'VIDEO // sketchbook flipthrough',
+ 'uploader': 'Loish ',
+ 'tags': ['sketchbook', 'video'],
+ 'channel_id': '1641751',
+ 'channel_url': 'https://www.patreon.com/loish',
+ 'channel_follower_count': int,
+ }
+ }, {
+ # bad videos under media (if media is included). Real one is under post_file
+ 'url': 'https://www.patreon.com/posts/premium-access-70282931',
+ 'info_dict': {
+ 'id': '70282931',
+ 'ext': 'mp4',
+ 'title': '[Premium Access + Uncut] The Office - 2x6 The Fight - Group Reaction',
+ 'channel_url': 'https://www.patreon.com/thenormies',
+ 'channel_id': '573397',
+ 'uploader_id': '2929435',
+ 'uploader': 'The Normies',
+ 'description': 'md5:79c9fd8778e2cef84049a94c058a5e23',
+ 'comment_count': int,
+ 'upload_date': '20220809',
+ 'thumbnail': r're:^https?://.*$',
+ 'channel_follower_count': int,
+ 'like_count': int,
+ 'timestamp': 1660052820,
+ 'tags': ['The Office', 'early access', 'uncut'],
+ 'uploader_url': 'https://www.patreon.com/thenormies',
+ },
+ 'skip': 'Patron-only content',
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ post = self._call_api(
+ f'posts/{video_id}', video_id, query={
+ 'fields[media]': 'download_url,mimetype,size_bytes',
+ 'fields[post]': 'comment_count,content,embed,image,like_count,post_file,published_at,title,current_user_can_view',
+ 'fields[user]': 'full_name,url',
+ 'fields[post_tag]': 'value',
+ 'fields[campaign]': 'url,name,patron_count',
+ 'json-api-use-default-includes': 'false',
+ 'include': 'audio,user,user_defined_tags,campaign,attachments_media',
+ })
+ attributes = post['data']['attributes']
+ title = attributes['title'].strip()
+ image = attributes.get('image') or {}
+ info = {
+ 'id': video_id,
+ 'title': title,
+ 'description': clean_html(attributes.get('content')),
+ 'thumbnail': image.get('large_url') or image.get('url'),
+ 'timestamp': parse_iso8601(attributes.get('published_at')),
+ 'like_count': int_or_none(attributes.get('like_count')),
+ 'comment_count': int_or_none(attributes.get('comment_count')),
+ }
+ can_view_post = traverse_obj(attributes, 'current_user_can_view')
+ if can_view_post and info['comment_count']:
+ info['__post_extractor'] = self.extract_comments(video_id)
+
+ for i in post.get('included', []):
+ i_type = i.get('type')
+ if i_type == 'media':
+ media_attributes = i.get('attributes') or {}
+ download_url = media_attributes.get('download_url')
+ ext = mimetype2ext(media_attributes.get('mimetype'))
+
+ # if size_bytes is None, this media file is likely unavailable
+ # See: https://github.com/yt-dlp/yt-dlp/issues/4608
+ size_bytes = int_or_none(media_attributes.get('size_bytes'))
+ if download_url and ext in KNOWN_EXTENSIONS and size_bytes is not None:
+ # XXX: what happens if there are multiple attachments?
+ return {
+ **info,
+ 'ext': ext,
+ 'filesize': size_bytes,
+ 'url': download_url,
+ }
+ elif i_type == 'user':
+ user_attributes = i.get('attributes')
+ if user_attributes:
+ info.update({
+ 'uploader': user_attributes.get('full_name'),
+ 'uploader_id': str_or_none(i.get('id')),
+ 'uploader_url': user_attributes.get('url'),
+ })
+
+ elif i_type == 'post_tag':
+ info.setdefault('tags', []).append(traverse_obj(i, ('attributes', 'value')))
+
+ elif i_type == 'campaign':
+ info.update({
+ 'channel': traverse_obj(i, ('attributes', 'title')),
+ 'channel_id': str_or_none(i.get('id')),
+ 'channel_url': traverse_obj(i, ('attributes', 'url')),
+ 'channel_follower_count': int_or_none(traverse_obj(i, ('attributes', 'patron_count'))),
+ })
+
+ # handle Vimeo embeds
+ if try_get(attributes, lambda x: x['embed']['provider']) == 'Vimeo':
+ embed_html = try_get(attributes, lambda x: x['embed']['html'])
+ v_url = url_or_none(compat_urllib_parse_unquote(
+ self._search_regex(r'(https(?:%3A%2F%2F|://)player\.vimeo\.com.+app_id(?:=|%3D)+\d+)', embed_html, 'vimeo url', fatal=False)))
+ if v_url:
+ return {
+ **info,
+ '_type': 'url_transparent',
+ 'url': VimeoIE._smuggle_referrer(v_url, 'https://patreon.com'),
+ 'ie_key': 'Vimeo',
+ }
+
+ embed_url = try_get(attributes, lambda x: x['embed']['url'])
+ if embed_url:
+ return {
+ **info,
+ '_type': 'url',
+ 'url': embed_url,
+ }
+
+ post_file = traverse_obj(attributes, 'post_file')
+ if post_file:
+ name = post_file.get('name')
+ ext = determine_ext(name)
+ if ext in KNOWN_EXTENSIONS:
+ return {
+ **info,
+ 'ext': ext,
+ 'url': post_file['url'],
+ }
+ elif name == 'video' or determine_ext(post_file.get('url')) == 'm3u8':
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(post_file['url'], video_id)
+ return {
+ **info,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+ if can_view_post is False:
+ self.raise_no_formats('You do not have access to this post', video_id=video_id, expected=True)
+ else:
+ self.raise_no_formats('No supported media found in this post', video_id=video_id, expected=True)
+ return info
+
+ def _get_comments(self, post_id):
+ cursor = None
+ count = 0
+ params = {
+ 'page[count]': 50,
+ 'include': 'parent.commenter.campaign,parent.post.user,parent.post.campaign.creator,parent.replies.parent,parent.replies.commenter.campaign,parent.replies.post.user,parent.replies.post.campaign.creator,commenter.campaign,post.user,post.campaign.creator,replies.parent,replies.commenter.campaign,replies.post.user,replies.post.campaign.creator,on_behalf_of_campaign',
+ 'fields[comment]': 'body,created,is_by_creator',
+ 'fields[user]': 'image_url,full_name,url',
+ 'filter[flair]': 'image_tiny_url,name',
+ 'sort': '-created',
+ 'json-api-version': 1.0,
+ 'json-api-use-default-includes': 'false',
+ }
+
+ for page in itertools.count(1):
+
+ params.update({'page[cursor]': cursor} if cursor else {})
+ response = self._call_api(
+ f'posts/{post_id}/comments', post_id, query=params, note='Downloading comments page %d' % page)
+
+ cursor = None
+ for comment in traverse_obj(response, (('data', ('included', lambda _, v: v['type'] == 'comment')), ...)):
+ count += 1
+ comment_id = comment.get('id')
+ attributes = comment.get('attributes') or {}
+ if comment_id is None:
+ continue
+ author_id = traverse_obj(comment, ('relationships', 'commenter', 'data', 'id'))
+ author_info = traverse_obj(
+ response, ('included', lambda _, v: v['id'] == author_id and v['type'] == 'user', 'attributes'),
+ get_all=False, expected_type=dict, default={})
+
+ yield {
+ 'id': comment_id,
+ 'text': attributes.get('body'),
+ 'timestamp': parse_iso8601(attributes.get('created')),
+ 'parent': traverse_obj(comment, ('relationships', 'parent', 'data', 'id'), default='root'),
+ 'author_is_uploader': attributes.get('is_by_creator'),
+ 'author_id': author_id,
+ 'author': author_info.get('full_name'),
+ 'author_thumbnail': author_info.get('image_url'),
+ }
+
+ if count < traverse_obj(response, ('meta', 'count')):
+ cursor = traverse_obj(response, ('data', -1, 'id'))
+
+ if cursor is None:
+ break
+
+
+class PatreonCampaignIE(PatreonBaseIE):
+
+ _VALID_URL = r'https?://(?:www\.)?patreon\.com/(?!rss)(?:(?:m/(?P<campaign_id>\d+))|(?P<vanity>[-\w]+))'
+ _TESTS = [{
+ 'url': 'https://www.patreon.com/dissonancepod/',
+ 'info_dict': {
+ 'title': 'Cognitive Dissonance Podcast',
+ 'channel_url': 'https://www.patreon.com/dissonancepod',
+ 'id': '80642',
+ 'description': 'md5:eb2fa8b83da7ab887adeac34da6b7af7',
+ 'channel_id': '80642',
+ 'channel': 'Cognitive Dissonance Podcast',
+ 'age_limit': 0,
+ 'channel_follower_count': int,
+ 'uploader_id': '87145',
+ 'uploader_url': 'https://www.patreon.com/dissonancepod',
+ 'uploader': 'Cognitive Dissonance Podcast',
+ 'thumbnail': r're:^https?://.*$',
+ },
+ 'playlist_mincount': 68,
+ }, {
+ 'url': 'https://www.patreon.com/m/4767637/posts',
+ 'info_dict': {
+ 'title': 'Not Just Bikes',
+ 'channel_follower_count': int,
+ 'id': '4767637',
+ 'channel_id': '4767637',
+ 'channel_url': 'https://www.patreon.com/notjustbikes',
+ 'description': 'md5:595c6e7dca76ae615b1d38c298a287a1',
+ 'age_limit': 0,
+ 'channel': 'Not Just Bikes',
+ 'uploader_url': 'https://www.patreon.com/notjustbikes',
+ 'uploader': 'Not Just Bikes',
+ 'uploader_id': '37306634',
+ 'thumbnail': r're:^https?://.*$',
+ },
+ 'playlist_mincount': 71
+ }, {
+ 'url': 'https://www.patreon.com/dissonancepod/posts',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.patreon.com/m/5932659',
+ 'only_matching': True
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if PatreonIE.suitable(url) else super(PatreonCampaignIE, cls).suitable(url)
+
+ def _entries(self, campaign_id):
+ cursor = None
+ params = {
+ 'fields[post]': 'patreon_url,url',
+ 'filter[campaign_id]': campaign_id,
+ 'filter[is_draft]': 'false',
+ 'sort': '-published_at',
+ 'json-api-use-default-includes': 'false',
+ }
+
+ for page in itertools.count(1):
+
+ params.update({'page[cursor]': cursor} if cursor else {})
+ posts_json = self._call_api('posts', campaign_id, query=params, note='Downloading posts page %d' % page)
+
+ cursor = traverse_obj(posts_json, ('meta', 'pagination', 'cursors', 'next'))
+ for post_url in traverse_obj(posts_json, ('data', ..., 'attributes', 'patreon_url')):
+ yield self.url_result(urljoin('https://www.patreon.com/', post_url), PatreonIE)
+
+ if cursor is None:
+ break
+
+ def _real_extract(self, url):
+
+ campaign_id, vanity = self._match_valid_url(url).group('campaign_id', 'vanity')
+ if campaign_id is None:
+ webpage = self._download_webpage(url, vanity, headers={'User-Agent': self.USER_AGENT})
+ campaign_id = self._search_regex(r'https://www.patreon.com/api/campaigns/(\d+)/?', webpage, 'Campaign ID')
+
+ params = {
+ 'json-api-use-default-includes': 'false',
+ 'fields[user]': 'full_name,url',
+ 'fields[campaign]': 'name,summary,url,patron_count,creation_count,is_nsfw,avatar_photo_url',
+ 'include': 'creator'
+ }
+
+ campaign_response = self._call_api(
+ f'campaigns/{campaign_id}', campaign_id,
+ note='Downloading campaign info', fatal=False,
+ query=params) or {}
+
+ campaign_info = campaign_response.get('data') or {}
+ channel_name = traverse_obj(campaign_info, ('attributes', 'name'))
+ user_info = traverse_obj(
+ campaign_response, ('included', lambda _, v: v['type'] == 'user'),
+ default={}, expected_type=dict, get_all=False)
+
+ return {
+ '_type': 'playlist',
+ 'id': campaign_id,
+ 'title': channel_name,
+ 'entries': self._entries(campaign_id),
+ 'description': clean_html(traverse_obj(campaign_info, ('attributes', 'summary'))),
+ 'channel_url': traverse_obj(campaign_info, ('attributes', 'url')),
+ 'channel_follower_count': int_or_none(traverse_obj(campaign_info, ('attributes', 'patron_count'))),
+ 'channel_id': campaign_id,
+ 'channel': channel_name,
+ 'uploader_url': traverse_obj(user_info, ('attributes', 'url')),
+ 'uploader_id': str_or_none(user_info.get('id')),
+ 'uploader': traverse_obj(user_info, ('attributes', 'full_name')),
+ 'playlist_count': traverse_obj(campaign_info, ('attributes', 'creation_count')),
+ 'age_limit': 18 if traverse_obj(campaign_info, ('attributes', 'is_nsfw')) else 0,
+ 'thumbnail': url_or_none(traverse_obj(campaign_info, ('attributes', 'avatar_photo_url'))),
+ }
diff --git a/yt_dlp/extractor/pbs.py b/yt_dlp/extractor/pbs.py
new file mode 100644
index 0000000..2bb2ea9
--- /dev/null
+++ b/yt_dlp/extractor/pbs.py
@@ -0,0 +1,757 @@
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ determine_ext,
+ int_or_none,
+ float_or_none,
+ js_to_json,
+ orderedSet,
+ strip_jsonp,
+ strip_or_none,
+ traverse_obj,
+ unified_strdate,
+ url_or_none,
+ US_RATINGS,
+)
+
+
+class PBSIE(InfoExtractor):
+ _STATIONS = (
+ (r'(?:video|www|player)\.pbs\.org', 'PBS: Public Broadcasting Service'), # http://www.pbs.org/
+ (r'video\.aptv\.org', 'APT - Alabama Public Television (WBIQ)'), # http://aptv.org/
+ (r'video\.gpb\.org', 'GPB/Georgia Public Broadcasting (WGTV)'), # http://www.gpb.org/
+ (r'video\.mpbonline\.org', 'Mississippi Public Broadcasting (WMPN)'), # http://www.mpbonline.org
+ (r'video\.wnpt\.org', 'Nashville Public Television (WNPT)'), # http://www.wnpt.org
+ (r'video\.wfsu\.org', 'WFSU-TV (WFSU)'), # http://wfsu.org/
+ (r'video\.wsre\.org', 'WSRE (WSRE)'), # http://www.wsre.org
+ (r'video\.wtcitv\.org', 'WTCI (WTCI)'), # http://www.wtcitv.org
+ (r'video\.pba\.org', 'WPBA/Channel 30 (WPBA)'), # http://pba.org/
+ (r'video\.alaskapublic\.org', 'Alaska Public Media (KAKM)'), # http://alaskapublic.org/kakm
+ # (r'kuac\.org', 'KUAC (KUAC)'), # http://kuac.org/kuac-tv/
+ # (r'ktoo\.org', '360 North (KTOO)'), # http://www.ktoo.org/
+ # (r'azpm\.org', 'KUAT 6 (KUAT)'), # http://www.azpm.org/
+ (r'video\.azpbs\.org', 'Arizona PBS (KAET)'), # http://www.azpbs.org
+ (r'portal\.knme\.org', 'KNME-TV/Channel 5 (KNME)'), # http://www.newmexicopbs.org/
+ (r'video\.vegaspbs\.org', 'Vegas PBS (KLVX)'), # http://vegaspbs.org/
+ (r'watch\.aetn\.org', 'AETN/ARKANSAS ETV NETWORK (KETS)'), # http://www.aetn.org/
+ (r'video\.ket\.org', 'KET (WKLE)'), # http://www.ket.org/
+ (r'video\.wkno\.org', 'WKNO/Channel 10 (WKNO)'), # http://www.wkno.org/
+ (r'video\.lpb\.org', 'LPB/LOUISIANA PUBLIC BROADCASTING (WLPB)'), # http://www.lpb.org/
+ (r'videos\.oeta\.tv', 'OETA (KETA)'), # http://www.oeta.tv
+ (r'video\.optv\.org', 'Ozarks Public Television (KOZK)'), # http://www.optv.org/
+ (r'watch\.wsiu\.org', 'WSIU Public Broadcasting (WSIU)'), # http://www.wsiu.org/
+ (r'video\.keet\.org', 'KEET TV (KEET)'), # http://www.keet.org
+ (r'pbs\.kixe\.org', 'KIXE/Channel 9 (KIXE)'), # http://kixe.org/
+ (r'video\.kpbs\.org', 'KPBS San Diego (KPBS)'), # http://www.kpbs.org/
+ (r'video\.kqed\.org', 'KQED (KQED)'), # http://www.kqed.org
+ (r'vids\.kvie\.org', 'KVIE Public Television (KVIE)'), # http://www.kvie.org
+ (r'video\.pbssocal\.org', 'PBS SoCal/KOCE (KOCE)'), # http://www.pbssocal.org/
+ (r'video\.valleypbs\.org', 'ValleyPBS (KVPT)'), # http://www.valleypbs.org/
+ (r'video\.cptv\.org', 'CONNECTICUT PUBLIC TELEVISION (WEDH)'), # http://cptv.org
+ (r'watch\.knpb\.org', 'KNPB Channel 5 (KNPB)'), # http://www.knpb.org/
+ (r'video\.soptv\.org', 'SOPTV (KSYS)'), # http://www.soptv.org
+ # (r'klcs\.org', 'KLCS/Channel 58 (KLCS)'), # http://www.klcs.org
+ # (r'krcb\.org', 'KRCB Television & Radio (KRCB)'), # http://www.krcb.org
+ # (r'kvcr\.org', 'KVCR TV/DT/FM :: Vision for the Future (KVCR)'), # http://kvcr.org
+ (r'video\.rmpbs\.org', 'Rocky Mountain PBS (KRMA)'), # http://www.rmpbs.org
+ (r'video\.kenw\.org', 'KENW-TV3 (KENW)'), # http://www.kenw.org
+ (r'video\.kued\.org', 'KUED Channel 7 (KUED)'), # http://www.kued.org
+ (r'video\.wyomingpbs\.org', 'Wyoming PBS (KCWC)'), # http://www.wyomingpbs.org
+ (r'video\.cpt12\.org', 'Colorado Public Television / KBDI 12 (KBDI)'), # http://www.cpt12.org/
+ (r'video\.kbyueleven\.org', 'KBYU-TV (KBYU)'), # http://www.kbyutv.org/
+ (r'video\.thirteen\.org', 'Thirteen/WNET New York (WNET)'), # http://www.thirteen.org
+ (r'video\.wgbh\.org', 'WGBH/Channel 2 (WGBH)'), # http://wgbh.org
+ (r'video\.wgby\.org', 'WGBY (WGBY)'), # http://www.wgby.org
+ (r'watch\.njtvonline\.org', 'NJTV Public Media NJ (WNJT)'), # http://www.njtvonline.org/
+ # (r'ripbs\.org', 'Rhode Island PBS (WSBE)'), # http://www.ripbs.org/home/
+ (r'watch\.wliw\.org', 'WLIW21 (WLIW)'), # http://www.wliw.org/
+ (r'video\.mpt\.tv', 'mpt/Maryland Public Television (WMPB)'), # http://www.mpt.org
+ (r'watch\.weta\.org', 'WETA Television and Radio (WETA)'), # http://www.weta.org
+ (r'video\.whyy\.org', 'WHYY (WHYY)'), # http://www.whyy.org
+ (r'video\.wlvt\.org', 'PBS 39 (WLVT)'), # http://www.wlvt.org/
+ (r'video\.wvpt\.net', 'WVPT - Your Source for PBS and More! (WVPT)'), # http://www.wvpt.net
+ (r'video\.whut\.org', 'Howard University Television (WHUT)'), # http://www.whut.org
+ (r'video\.wedu\.org', 'WEDU PBS (WEDU)'), # http://www.wedu.org
+ (r'video\.wgcu\.org', 'WGCU Public Media (WGCU)'), # http://www.wgcu.org/
+ # (r'wjct\.org', 'WJCT Public Broadcasting (WJCT)'), # http://www.wjct.org
+ (r'video\.wpbt2\.org', 'WPBT2 (WPBT)'), # http://www.wpbt2.org
+ (r'video\.wucftv\.org', 'WUCF TV (WUCF)'), # http://wucftv.org
+ (r'video\.wuft\.org', 'WUFT/Channel 5 (WUFT)'), # http://www.wuft.org
+ (r'watch\.wxel\.org', 'WXEL/Channel 42 (WXEL)'), # http://www.wxel.org/home/
+ (r'video\.wlrn\.org', 'WLRN/Channel 17 (WLRN)'), # http://www.wlrn.org/
+ (r'video\.wusf\.usf\.edu', 'WUSF Public Broadcasting (WUSF)'), # http://wusf.org/
+ (r'video\.scetv\.org', 'ETV (WRLK)'), # http://www.scetv.org
+ (r'video\.unctv\.org', 'UNC-TV (WUNC)'), # http://www.unctv.org/
+ # (r'pbsguam\.org', 'PBS Guam (KGTF)'), # http://www.pbsguam.org/
+ (r'video\.pbshawaii\.org', 'PBS Hawaii - Oceanic Cable Channel 10 (KHET)'), # http://www.pbshawaii.org/
+ (r'video\.idahoptv\.org', 'Idaho Public Television (KAID)'), # http://idahoptv.org
+ (r'video\.ksps\.org', 'KSPS (KSPS)'), # http://www.ksps.org/home/
+ (r'watch\.opb\.org', 'OPB (KOPB)'), # http://www.opb.org
+ (r'watch\.nwptv\.org', 'KWSU/Channel 10 & KTNW/Channel 31 (KWSU)'), # http://www.kwsu.org
+ (r'video\.will\.illinois\.edu', 'WILL-TV (WILL)'), # http://will.illinois.edu/
+ (r'video\.networkknowledge\.tv', 'Network Knowledge - WSEC/Springfield (WSEC)'), # http://www.wsec.tv
+ (r'video\.wttw\.com', 'WTTW11 (WTTW)'), # http://www.wttw.com/
+ # (r'wtvp\.org', 'WTVP & WTVP.org, Public Media for Central Illinois (WTVP)'), # http://www.wtvp.org/
+ (r'video\.iptv\.org', 'Iowa Public Television/IPTV (KDIN)'), # http://www.iptv.org/
+ (r'video\.ninenet\.org', 'Nine Network (KETC)'), # http://www.ninenet.org
+ (r'video\.wfwa\.org', 'PBS39 Fort Wayne (WFWA)'), # http://wfwa.org/
+ (r'video\.wfyi\.org', 'WFYI Indianapolis (WFYI)'), # http://www.wfyi.org
+ (r'video\.mptv\.org', 'Milwaukee Public Television (WMVS)'), # http://www.mptv.org
+ (r'video\.wnin\.org', 'WNIN (WNIN)'), # http://www.wnin.org/
+ (r'video\.wnit\.org', 'WNIT Public Television (WNIT)'), # http://www.wnit.org/
+ (r'video\.wpt\.org', 'WPT (WPNE)'), # http://www.wpt.org/
+ (r'video\.wvut\.org', 'WVUT/Channel 22 (WVUT)'), # http://wvut.org/
+ (r'video\.weiu\.net', 'WEIU/Channel 51 (WEIU)'), # http://www.weiu.net
+ (r'video\.wqpt\.org', 'WQPT-TV (WQPT)'), # http://www.wqpt.org
+ (r'video\.wycc\.org', 'WYCC PBS Chicago (WYCC)'), # http://www.wycc.org
+ # (r'lakeshorepublicmedia\.org', 'Lakeshore Public Television (WYIN)'), # http://lakeshorepublicmedia.org/
+ (r'video\.wipb\.org', 'WIPB-TV (WIPB)'), # http://wipb.org
+ (r'video\.indianapublicmedia\.org', 'WTIU (WTIU)'), # http://indianapublicmedia.org/tv/
+ (r'watch\.cetconnect\.org', 'CET (WCET)'), # http://www.cetconnect.org
+ (r'video\.thinktv\.org', 'ThinkTVNetwork (WPTD)'), # http://www.thinktv.org
+ (r'video\.wbgu\.org', 'WBGU-TV (WBGU)'), # http://wbgu.org
+ (r'video\.wgvu\.org', 'WGVU TV (WGVU)'), # http://www.wgvu.org/
+ (r'video\.netnebraska\.org', 'NET1 (KUON)'), # http://netnebraska.org
+ (r'video\.pioneer\.org', 'Pioneer Public Television (KWCM)'), # http://www.pioneer.org
+ (r'watch\.sdpb\.org', 'SDPB Television (KUSD)'), # http://www.sdpb.org
+ (r'video\.tpt\.org', 'TPT (KTCA)'), # http://www.tpt.org
+ (r'watch\.ksmq\.org', 'KSMQ (KSMQ)'), # http://www.ksmq.org/
+ (r'watch\.kpts\.org', 'KPTS/Channel 8 (KPTS)'), # http://www.kpts.org/
+ (r'watch\.ktwu\.org', 'KTWU/Channel 11 (KTWU)'), # http://ktwu.org
+ # (r'shptv\.org', 'Smoky Hills Public Television (KOOD)'), # http://www.shptv.org
+ # (r'kcpt\.org', 'KCPT Kansas City Public Television (KCPT)'), # http://kcpt.org/
+ # (r'blueridgepbs\.org', 'Blue Ridge PBS (WBRA)'), # http://www.blueridgepbs.org/
+ (r'watch\.easttennesseepbs\.org', 'East Tennessee PBS (WSJK)'), # http://easttennesseepbs.org
+ (r'video\.wcte\.tv', 'WCTE-TV (WCTE)'), # http://www.wcte.org
+ (r'video\.wljt\.org', 'WLJT, Channel 11 (WLJT)'), # http://wljt.org/
+ (r'video\.wosu\.org', 'WOSU TV (WOSU)'), # http://wosu.org/
+ (r'video\.woub\.org', 'WOUB/WOUC (WOUB)'), # http://woub.org/tv/index.php?section=5
+ (r'video\.wvpublic\.org', 'WVPB (WVPB)'), # http://wvpublic.org/
+ (r'video\.wkyupbs\.org', 'WKYU-PBS (WKYU)'), # http://www.wkyupbs.org
+ # (r'wyes\.org', 'WYES-TV/New Orleans (WYES)'), # http://www.wyes.org
+ (r'video\.kera\.org', 'KERA 13 (KERA)'), # http://www.kera.org/
+ (r'video\.mpbn\.net', 'MPBN (WCBB)'), # http://www.mpbn.net/
+ (r'video\.mountainlake\.org', 'Mountain Lake PBS (WCFE)'), # http://www.mountainlake.org/
+ (r'video\.nhptv\.org', 'NHPTV (WENH)'), # http://nhptv.org/
+ (r'video\.vpt\.org', 'Vermont PBS (WETK)'), # http://www.vpt.org
+ (r'video\.witf\.org', 'witf (WITF)'), # http://www.witf.org
+ (r'watch\.wqed\.org', 'WQED Multimedia (WQED)'), # http://www.wqed.org/
+ (r'video\.wmht\.org', 'WMHT Educational Telecommunications (WMHT)'), # http://www.wmht.org/home/
+ (r'video\.deltabroadcasting\.org', 'Q-TV (WDCQ)'), # http://www.deltabroadcasting.org
+ (r'video\.dptv\.org', 'WTVS Detroit Public TV (WTVS)'), # http://www.dptv.org/
+ (r'video\.wcmu\.org', 'CMU Public Television (WCMU)'), # http://www.wcmu.org
+ (r'video\.wkar\.org', 'WKAR-TV (WKAR)'), # http://wkar.org/
+ (r'wnmuvideo\.nmu\.edu', 'WNMU-TV Public TV 13 (WNMU)'), # http://wnmutv.nmu.edu
+ (r'video\.wdse\.org', 'WDSE - WRPT (WDSE)'), # http://www.wdse.org/
+ (r'video\.wgte\.org', 'WGTE TV (WGTE)'), # http://www.wgte.org
+ (r'video\.lptv\.org', 'Lakeland Public Television (KAWE)'), # http://www.lakelandptv.org
+ # (r'prairiepublic\.org', 'PRAIRIE PUBLIC (KFME)'), # http://www.prairiepublic.org/
+ (r'video\.kmos\.org', 'KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS)'), # http://www.kmos.org/
+ (r'watch\.montanapbs\.org', 'MontanaPBS (KUSM)'), # http://montanapbs.org
+ (r'video\.krwg\.org', 'KRWG/Channel 22 (KRWG)'), # http://www.krwg.org
+ (r'video\.kacvtv\.org', 'KACV (KACV)'), # http://www.panhandlepbs.org/home/
+ (r'video\.kcostv\.org', 'KCOS/Channel 13 (KCOS)'), # www.kcostv.org
+ (r'video\.wcny\.org', 'WCNY/Channel 24 (WCNY)'), # http://www.wcny.org
+ (r'video\.wned\.org', 'WNED (WNED)'), # http://www.wned.org/
+ (r'watch\.wpbstv\.org', 'WPBS (WPBS)'), # http://www.wpbstv.org
+ (r'video\.wskg\.org', 'WSKG Public TV (WSKG)'), # http://wskg.org
+ (r'video\.wxxi\.org', 'WXXI (WXXI)'), # http://wxxi.org
+ (r'video\.wpsu\.org', 'WPSU (WPSU)'), # http://www.wpsu.org
+ # (r'wqln\.org', 'WQLN/Channel 54 (WQLN)'), # http://www.wqln.org
+ (r'on-demand\.wvia\.org', 'WVIA Public Media Studios (WVIA)'), # http://www.wvia.org/
+ (r'video\.wtvi\.org', 'WTVI (WTVI)'), # http://www.wtvi.org/
+ # (r'whro\.org', 'WHRO (WHRO)'), # http://whro.org
+ (r'video\.westernreservepublicmedia\.org', 'Western Reserve PBS (WNEO)'), # http://www.WesternReservePublicMedia.org/
+ (r'video\.ideastream\.org', 'WVIZ/PBS ideastream (WVIZ)'), # http://www.wviz.org/
+ (r'video\.kcts9\.org', 'KCTS 9 (KCTS)'), # http://kcts9.org/
+ (r'video\.basinpbs\.org', 'Basin PBS (KPBT)'), # http://www.basinpbs.org
+ (r'video\.houstonpbs\.org', 'KUHT / Channel 8 (KUHT)'), # http://www.houstonpublicmedia.org/
+ # (r'tamu\.edu', 'KAMU - TV (KAMU)'), # http://KAMU.tamu.edu
+ # (r'kedt\.org', 'KEDT/Channel 16 (KEDT)'), # http://www.kedt.org
+ (r'video\.klrn\.org', 'KLRN (KLRN)'), # http://www.klrn.org
+ (r'video\.klru\.tv', 'KLRU (KLRU)'), # http://www.klru.org
+ # (r'kmbh\.org', 'KMBH-TV (KMBH)'), # http://www.kmbh.org
+ # (r'knct\.org', 'KNCT (KNCT)'), # http://www.knct.org
+ # (r'ktxt\.org', 'KTTZ-TV (KTXT)'), # http://www.ktxt.org
+ (r'video\.wtjx\.org', 'WTJX Channel 12 (WTJX)'), # http://www.wtjx.org/
+ (r'video\.ideastations\.org', 'WCVE PBS (WCVE)'), # http://ideastations.org/
+ (r'video\.kbtc\.org', 'KBTC Public Television (KBTC)'), # http://kbtc.org
+ )
+
+ IE_NAME = 'pbs'
+ IE_DESC = 'Public Broadcasting Service (PBS) and member stations: %s' % ', '.join(list(zip(*_STATIONS))[1])
+
+ _VALID_URL = r'''(?x)https?://
+ (?:
+ # Direct video URL
+ (?:%s)/(?:(?:vir|port)alplayer|video)/(?P<id>[0-9]+)(?:[?/]|$) |
+ # Article with embedded player (or direct video)
+ (?:www\.)?pbs\.org/(?:[^/]+/){1,5}(?P<presumptive_id>[^/]+?)(?:\.html)?/?(?:$|[?\#]) |
+ # Player
+ (?:video|player)\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)
+ )
+ ''' % '|'.join(list(zip(*_STATIONS))[0])
+
+ _GEO_COUNTRIES = ['US']
+
+ _TESTS = [
+ {
+ 'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/',
+ 'md5': '173dc391afd361fa72eab5d3d918968d',
+ 'info_dict': {
+ 'id': '2365006249',
+ 'ext': 'mp4',
+ 'title': 'Constitution USA with Peter Sagal - A More Perfect Union',
+ 'description': 'md5:31b664af3c65fd07fa460d306b837d00',
+ 'duration': 3190,
+ },
+ },
+ {
+ 'url': 'http://www.pbs.org/wgbh/pages/frontline/losing-iraq/',
+ 'md5': '6f722cb3c3982186d34b0f13374499c7',
+ 'info_dict': {
+ 'id': '2365297690',
+ 'ext': 'mp4',
+ 'title': 'FRONTLINE - Losing Iraq',
+ 'description': 'md5:5979a4d069b157f622d02bff62fbe654',
+ 'duration': 5050,
+ },
+ },
+ {
+ 'url': 'http://www.pbs.org/newshour/bb/education-jan-june12-cyberschools_02-23/',
+ 'md5': 'b19856d7f5351b17a5ab1dc6a64be633',
+ 'info_dict': {
+ 'id': '2201174722',
+ 'ext': 'mp4',
+ 'title': 'PBS NewsHour - Cyber Schools Gain Popularity, but Quality Questions Persist',
+ 'description': 'md5:86ab9a3d04458b876147b355788b8781',
+ 'duration': 801,
+ },
+ },
+ {
+ 'url': 'http://www.pbs.org/wnet/gperf/dudamel-conducts-verdi-requiem-hollywood-bowl-full-episode/3374/',
+ 'md5': 'c62859342be2a0358d6c9eb306595978',
+ 'info_dict': {
+ 'id': '2365297708',
+ 'ext': 'mp4',
+ 'title': 'Great Performances - Dudamel Conducts Verdi Requiem at the Hollywood Bowl - Full',
+ 'description': 'md5:657897370e09e2bc6bf0f8d2cd313c6b',
+ 'duration': 6559,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ },
+ {
+ 'url': 'http://www.pbs.org/wgbh/nova/earth/killer-typhoon.html',
+ 'md5': '908f3e5473a693b266b84e25e1cf9703',
+ 'info_dict': {
+ 'id': '2365160389',
+ 'display_id': 'killer-typhoon',
+ 'ext': 'mp4',
+ 'description': 'md5:c741d14e979fc53228c575894094f157',
+ 'title': 'NOVA - Killer Typhoon',
+ 'duration': 3172,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20140122',
+ 'age_limit': 10,
+ },
+ },
+ {
+ 'url': 'http://www.pbs.org/wgbh/pages/frontline/united-states-of-secrets/',
+ 'info_dict': {
+ 'id': 'united-states-of-secrets',
+ },
+ 'playlist_count': 2,
+ },
+ {
+ 'url': 'http://www.pbs.org/wgbh/americanexperience/films/great-war/',
+ 'info_dict': {
+ 'id': 'great-war',
+ },
+ 'playlist_count': 3,
+ },
+ {
+ 'url': 'http://www.pbs.org/wgbh/americanexperience/films/death/player/',
+ 'info_dict': {
+ 'id': '2276541483',
+ 'display_id': 'player',
+ 'ext': 'mp4',
+ 'title': 'American Experience - Death and the Civil War, Chapter 1',
+ 'description': 'md5:67fa89a9402e2ee7d08f53b920674c18',
+ 'duration': 682,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'params': {
+ 'skip_download': True, # requires ffmpeg
+ },
+ },
+ {
+ 'url': 'http://www.pbs.org/video/2365245528/',
+ 'md5': '115223d41bd55cda8ae5cd5ed4e11497',
+ 'info_dict': {
+ 'id': '2365245528',
+ 'display_id': '2365245528',
+ 'ext': 'mp4',
+ 'title': 'FRONTLINE - United States of Secrets (Part One)',
+ 'description': 'md5:55756bd5c551519cc4b7703e373e217e',
+ 'duration': 6851,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ },
+ {
+ # Video embedded in iframe containing angle brackets as attribute's value (e.g.
+ # "<iframe style='position: absolute;<br />\ntop: 0; left: 0;' ...", see
+ # https://github.com/ytdl-org/youtube-dl/issues/7059)
+ 'url': 'http://www.pbs.org/food/features/a-chefs-life-season-3-episode-5-prickly-business/',
+ 'md5': '59b0ef5009f9ac8a319cc5efebcd865e',
+ 'info_dict': {
+ 'id': '2365546844',
+ 'display_id': 'a-chefs-life-season-3-episode-5-prickly-business',
+ 'ext': 'mp4',
+ 'title': "A Chef's Life - Season 3, Ep. 5: Prickly Business",
+ 'description': 'md5:c0ff7475a4b70261c7e58f493c2792a5',
+ 'duration': 1480,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ },
+ {
+ # Frontline video embedded via flp2012.js
+ 'url': 'http://www.pbs.org/wgbh/pages/frontline/the-atomic-artists',
+ 'info_dict': {
+ 'id': '2070868960',
+ 'display_id': 'the-atomic-artists',
+ 'ext': 'mp4',
+ 'title': 'FRONTLINE - The Atomic Artists',
+ 'description': 'md5:f677e4520cfacb4a5ce1471e31b57800',
+ 'duration': 723,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'params': {
+ 'skip_download': True, # requires ffmpeg
+ },
+ },
+ {
+ # Serves hd only via wigget/partnerplayer page
+ 'url': 'http://www.pbs.org/video/2365641075/',
+ 'md5': 'fdf907851eab57211dd589cf12006666',
+ 'info_dict': {
+ 'id': '2365641075',
+ 'ext': 'mp4',
+ 'title': 'FRONTLINE - Netanyahu at War',
+ 'duration': 6852,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'formats': 'mincount:8',
+ },
+ },
+ {
+ # https://github.com/ytdl-org/youtube-dl/issues/13801
+ 'url': 'https://www.pbs.org/video/pbs-newshour-full-episode-july-31-2017-1501539057/',
+ 'info_dict': {
+ 'id': '3003333873',
+ 'ext': 'mp4',
+ 'title': 'PBS NewsHour - full episode July 31, 2017',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'duration': 3265,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://www.pbs.org/wgbh/roadshow/watch/episode/2105-indianapolis-hour-2/',
+ 'info_dict': {
+ 'id': '2365936247',
+ 'ext': 'mp4',
+ 'title': 'Antiques Roadshow - Indianapolis, Hour 2',
+ 'description': 'md5:524b32249db55663e7231b6b8d1671a2',
+ 'duration': 3180,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['HTTP Error 403: Forbidden'],
+ },
+ {
+ 'url': 'https://www.pbs.org/wgbh/masterpiece/episodes/victoria-s2-e1/',
+ 'info_dict': {
+ 'id': '3007193718',
+ 'ext': 'mp4',
+ 'title': "Victoria - A Soldier's Daughter / The Green-Eyed Monster",
+ 'description': 'md5:37efbac85e0c09b009586523ec143652',
+ 'duration': 6292,
+ 'thumbnail': r're:^https?://.*\.(?:jpg|JPG)$',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['HTTP Error 403: Forbidden'],
+ },
+ {
+ 'url': 'https://player.pbs.org/partnerplayer/tOz9tM5ljOXQqIIWke53UA==/',
+ 'info_dict': {
+ 'id': '3011407934',
+ 'ext': 'mp4',
+ 'title': 'Stories from the Stage - Road Trip',
+ 'duration': 1619,
+ 'thumbnail': r're:^https?://.*\.(?:jpg|JPG)$',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['HTTP Error 403: Forbidden'],
+ },
+ {
+ 'url': 'http://player.pbs.org/widget/partnerplayer/2365297708/?start=0&end=0&chapterbar=false&endscreen=false&topbar=true',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://watch.knpb.org/video/2365616055/',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://player.pbs.org/portalplayer/3004638221/?uid=',
+ 'only_matching': True,
+ }
+ ]
+ _ERRORS = {
+ 101: 'We\'re sorry, but this video is not yet available.',
+ 403: 'We\'re sorry, but this video is not available in your region due to right restrictions.',
+ 404: 'We are experiencing technical difficulties that are preventing us from playing the video at this time. Please check back again soon.',
+ 410: 'This video has expired and is no longer available for online streaming.',
+ }
+
+ def _real_initialize(self):
+ cookie = (self._download_json(
+ 'http://localization.services.pbs.org/localize/auto/cookie/',
+ None, headers=self.geo_verification_headers(), fatal=False) or {}).get('cookie')
+ if cookie:
+ station = self._search_regex(r'#?s=\["([^"]+)"', cookie, 'station')
+ if station:
+ self._set_cookie('.pbs.org', 'pbsol.station', station)
+
+ def _extract_webpage(self, url):
+ mobj = self._match_valid_url(url)
+
+ description = None
+
+ presumptive_id = mobj.group('presumptive_id')
+ display_id = presumptive_id
+ if presumptive_id:
+ webpage = self._download_webpage(url, display_id)
+
+ description = strip_or_none(self._og_search_description(
+ webpage, default=None) or self._html_search_meta(
+ 'description', webpage, default=None))
+ upload_date = unified_strdate(self._search_regex(
+ r'<input type="hidden" id="air_date_[0-9]+" value="([^"]+)"',
+ webpage, 'upload date', default=None))
+
+ # tabbed frontline videos
+ MULTI_PART_REGEXES = (
+ r'<div[^>]+class="videotab[^"]*"[^>]+vid="(\d+)"',
+ r'<a[^>]+href=["\']#(?:video-|part)\d+["\'][^>]+data-cove[Ii]d=["\'](\d+)',
+ )
+ for p in MULTI_PART_REGEXES:
+ tabbed_videos = orderedSet(re.findall(p, webpage))
+ if tabbed_videos:
+ return tabbed_videos, presumptive_id, upload_date, description
+
+ MEDIA_ID_REGEXES = [
+ r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'", # frontline video embed
+ r'class="coveplayerid">([^<]+)<', # coveplayer
+ r'<section[^>]+data-coveid="(\d+)"', # coveplayer from http://www.pbs.org/wgbh/frontline/film/real-csi/
+ r'<input type="hidden" id="pbs_video_id_[0-9]+" value="([0-9]+)"/>', # jwplayer
+ r"(?s)window\.PBS\.playerConfig\s*=\s*{.*?id\s*:\s*'([0-9]+)',",
+ r'<div[^>]+\bdata-cove-id=["\'](\d+)"', # http://www.pbs.org/wgbh/roadshow/watch/episode/2105-indianapolis-hour-2/
+ r'<iframe[^>]+\bsrc=["\'](?:https?:)?//video\.pbs\.org/widget/partnerplayer/(\d+)', # https://www.pbs.org/wgbh/masterpiece/episodes/victoria-s2-e1/
+ ]
+
+ media_id = self._search_regex(
+ MEDIA_ID_REGEXES, webpage, 'media ID', fatal=False, default=None)
+ if media_id:
+ return media_id, presumptive_id, upload_date, description
+
+ # Frontline video embedded via flp
+ video_id = self._search_regex(
+ r'videoid\s*:\s*"([\d+a-z]{7,})"', webpage, 'videoid', default=None)
+ if video_id:
+ # pkg_id calculation is reverse engineered from
+ # http://www.pbs.org/wgbh/pages/frontline/js/flp2012.js
+ prg_id = self._search_regex(
+ r'videoid\s*:\s*"([\d+a-z]{7,})"', webpage, 'videoid')[7:]
+ if 'q' in prg_id:
+ prg_id = prg_id.split('q')[1]
+ prg_id = int(prg_id, 16)
+ getdir = self._download_json(
+ 'http://www.pbs.org/wgbh/pages/frontline/.json/getdir/getdir%d.json' % prg_id,
+ presumptive_id, 'Downloading getdir JSON',
+ transform_source=strip_jsonp)
+ return getdir['mid'], presumptive_id, upload_date, description
+
+ for iframe in re.findall(r'(?s)<iframe(.+?)></iframe>', webpage):
+ url = self._search_regex(
+ r'src=(["\'])(?P<url>.+?partnerplayer.+?)\1', iframe,
+ 'player URL', default=None, group='url')
+ if url:
+ break
+
+ if not url:
+ url = self._og_search_url(webpage)
+
+ mobj = re.match(
+ self._VALID_URL, self._proto_relative_url(url.strip()))
+
+ player_id = mobj.group('player_id')
+ if not display_id:
+ display_id = player_id
+ if player_id:
+ player_page = self._download_webpage(
+ url, display_id, note='Downloading player page',
+ errnote='Could not download player page')
+ video_id = self._search_regex(
+ r'<div\s+id=["\']video_(\d+)', player_page, 'video ID',
+ default=None)
+ if not video_id:
+ video_info = self._extract_video_data(
+ player_page, 'video data', display_id)
+ video_id = compat_str(
+ video_info.get('id') or video_info['contentID'])
+ else:
+ video_id = mobj.group('id')
+ display_id = video_id
+
+ return video_id, display_id, None, description
+
+ def _extract_video_data(self, string, name, video_id, fatal=True):
+ return self._parse_json(
+ self._search_regex(
+ [r'(?s)PBS\.videoData\s*=\s*({.+?});\n',
+ r'window\.videoBridge\s*=\s*({.+?});'],
+ string, name, default='{}'),
+ video_id, transform_source=js_to_json, fatal=fatal)
+
+ def _real_extract(self, url):
+ video_id, display_id, upload_date, description = self._extract_webpage(url)
+
+ if isinstance(video_id, list):
+ entries = [self.url_result(
+ 'http://video.pbs.org/video/%s' % vid_id, 'PBS', vid_id)
+ for vid_id in video_id]
+ return self.playlist_result(entries, display_id)
+
+ info = {}
+ redirects = []
+ redirect_urls = set()
+
+ def extract_redirect_urls(info):
+ for encoding_name in ('recommended_encoding', 'alternate_encoding'):
+ redirect = info.get(encoding_name)
+ if not redirect:
+ continue
+ redirect_url = redirect.get('url')
+ if redirect_url and redirect_url not in redirect_urls:
+ redirects.append(redirect)
+ redirect_urls.add(redirect_url)
+ encodings = info.get('encodings')
+ if isinstance(encodings, list):
+ for encoding in encodings:
+ encoding_url = url_or_none(encoding)
+ if encoding_url and encoding_url not in redirect_urls:
+ redirects.append({'url': encoding_url})
+ redirect_urls.add(encoding_url)
+
+ chapters = []
+ # Player pages may also serve different qualities
+ for page in ('widget/partnerplayer', 'portalplayer'):
+ player = self._download_webpage(
+ 'http://player.pbs.org/%s/%s' % (page, video_id),
+ display_id, 'Downloading %s page' % page, fatal=False)
+ if player:
+ video_info = self._extract_video_data(
+ player, '%s video data' % page, display_id, fatal=False)
+ if video_info:
+ extract_redirect_urls(video_info)
+ if not info:
+ info = video_info
+ if not chapters:
+ raw_chapters = video_info.get('chapters') or []
+ if not raw_chapters:
+ for chapter_data in re.findall(r'(?s)chapters\.push\(({.*?})\)', player):
+ chapter = self._parse_json(chapter_data, video_id, js_to_json, fatal=False)
+ if not chapter:
+ continue
+ raw_chapters.append(chapter)
+ for chapter in raw_chapters:
+ start_time = float_or_none(chapter.get('start_time'), 1000)
+ duration = float_or_none(chapter.get('duration'), 1000)
+ if start_time is None or duration is None:
+ continue
+ chapters.append({
+ 'start_time': start_time,
+ 'end_time': start_time + duration,
+ 'title': chapter.get('title'),
+ })
+
+ formats = []
+ http_url = None
+ hls_subs = {}
+ for num, redirect in enumerate(redirects):
+ redirect_id = redirect.get('eeid')
+
+ redirect_info = self._download_json(
+ '%s?format=json' % redirect['url'], display_id,
+ 'Downloading %s video url info' % (redirect_id or num),
+ headers=self.geo_verification_headers())
+
+ if redirect_info['status'] == 'error':
+ message = self._ERRORS.get(
+ redirect_info['http_code'], redirect_info['message'])
+ if redirect_info['http_code'] == 403:
+ self.raise_geo_restricted(
+ msg=message, countries=self._GEO_COUNTRIES)
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, message), expected=True)
+
+ format_url = redirect_info.get('url')
+ if not format_url:
+ continue
+
+ if determine_ext(format_url) == 'm3u8':
+ hls_formats, hls_subs = self._extract_m3u8_formats_and_subtitles(
+ format_url, display_id, 'mp4', m3u8_id='hls', fatal=False)
+ formats.extend(hls_formats)
+ else:
+ formats.append({
+ 'url': format_url,
+ 'format_id': redirect_id,
+ })
+ if re.search(r'^https?://.*(?:\d+k|baseline)', format_url):
+ http_url = format_url
+ self._remove_duplicate_formats(formats)
+ m3u8_formats = list(filter(
+ lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none',
+ formats))
+ if http_url:
+ for m3u8_format in m3u8_formats:
+ bitrate = self._search_regex(r'(\d+)k', m3u8_format['url'], 'bitrate', default=None)
+ # Lower qualities (150k and 192k) are not available as HTTP formats (see [1]),
+ # we won't try extracting them.
+ # Since summer 2016 higher quality formats (4500k and 6500k) are also available
+ # albeit they are not documented in [2].
+ # 1. https://github.com/ytdl-org/youtube-dl/commit/cbc032c8b70a038a69259378c92b4ba97b42d491#commitcomment-17313656
+ # 2. https://projects.pbs.org/confluence/display/coveapi/COVE+Video+Specifications
+ if not bitrate or int(bitrate) < 400:
+ continue
+ f_url = re.sub(r'\d+k|baseline', bitrate + 'k', http_url)
+ # This may produce invalid links sometimes (e.g.
+ # http://www.pbs.org/wgbh/frontline/film/suicide-plan)
+ if not self._is_valid_url(f_url, display_id, 'http-%sk video' % bitrate):
+ continue
+ f = m3u8_format.copy()
+ f.update({
+ 'url': f_url,
+ 'format_id': m3u8_format['format_id'].replace('hls', 'http'),
+ 'protocol': 'http',
+ })
+ formats.append(f)
+ for f in formats:
+ if (f.get('format_note') or '').endswith(' AD'): # Audio description
+ f['language_preference'] = -10
+
+ rating_str = info.get('rating')
+ if rating_str is not None:
+ rating_str = rating_str.rpartition('-')[2]
+ age_limit = US_RATINGS.get(rating_str)
+
+ subtitles = {}
+ captions = info.get('cc') or {}
+ for caption_url in captions.values():
+ subtitles.setdefault('en', []).append({
+ 'url': caption_url
+ })
+ subtitles = self._merge_subtitles(subtitles, hls_subs)
+
+ # info['title'] is often incomplete (e.g. 'Full Episode', 'Episode 5', etc)
+ # Try turning it to 'program - title' naming scheme if possible
+ alt_title = info.get('program', {}).get('title')
+ if alt_title:
+ info['title'] = alt_title + ' - ' + re.sub(r'^' + alt_title + r'[\s\-:]+', '', info['title'])
+
+ description = info.get('description') or info.get(
+ 'program', {}).get('description') or description
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': info['title'],
+ 'description': description,
+ 'thumbnail': info.get('image_url'),
+ 'duration': int_or_none(info.get('duration')),
+ 'age_limit': age_limit,
+ 'upload_date': upload_date,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'chapters': chapters,
+ }
+
+
+class PBSKidsIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?pbskids\.org/video/[\w-]+/(?P<id>\d+)'
+ _TESTS = [
+ {
+ 'url': 'https://pbskids.org/video/molly-of-denali/3030407927',
+ 'md5': '1ded20a017cc6b53446238f1804ce4c7',
+ 'info_dict': {
+ 'id': '3030407927',
+ 'title': 'Bird in the Hand/Bye-Bye Birdie',
+ 'channel': 'molly-of-denali',
+ 'duration': 1540,
+ 'ext': 'mp4',
+ 'series': 'Molly of Denali',
+ 'description': 'md5:d006b2211633685d8ebc8d03b6d5611e',
+ 'categories': ['Episode'],
+ 'upload_date': '20190718',
+ }
+ },
+ {
+ 'url': 'https://pbskids.org/video/plum-landing/2365205059',
+ 'md5': '92e5d189851a64ae1d0237a965be71f5',
+ 'info_dict': {
+ 'id': '2365205059',
+ 'title': 'Cooper\'s Favorite Place in Nature',
+ 'channel': 'plum-landing',
+ 'duration': 67,
+ 'ext': 'mp4',
+ 'series': 'Plum Landing',
+ 'description': 'md5:657e5fc4356a84ead1c061eb280ff05d',
+ 'categories': ['Episode'],
+ 'upload_date': '20140302',
+ }
+ }
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ meta = self._search_json(r'window\._PBS_KIDS_DEEPLINK\s*=', webpage, 'video info', video_id)
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ traverse_obj(meta, ('video_obj', 'URI', {url_or_none})), video_id, ext='mp4')
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ **traverse_obj(meta, {
+ 'categories': ('video_obj', 'video_type', {str}, {lambda x: [x] if x else None}),
+ 'channel': ('show_slug', {str}),
+ 'description': ('video_obj', 'description', {str}),
+ 'duration': ('video_obj', 'duration', {int_or_none}),
+ 'series': ('video_obj', 'program_title', {str}),
+ 'title': ('video_obj', 'title', {str}),
+ 'upload_date': ('video_obj', 'air_date', {unified_strdate}),
+ })
+ }
diff --git a/yt_dlp/extractor/pearvideo.py b/yt_dlp/extractor/pearvideo.py
new file mode 100644
index 0000000..e27e5a7
--- /dev/null
+++ b/yt_dlp/extractor/pearvideo.py
@@ -0,0 +1,68 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ qualities,
+ unified_timestamp,
+ traverse_obj,
+)
+
+
+class PearVideoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?pearvideo\.com/video_(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://www.pearvideo.com/video_1076290',
+ 'info_dict': {
+ 'id': '1076290',
+ 'ext': 'mp4',
+ 'title': '小浣熊在主人家玻璃上滚石头:没砸',
+ 'description': 'md5:01d576b747de71be0ee85eb7cac25f9d',
+ 'timestamp': 1494275280,
+ 'upload_date': '20170508',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ quality = qualities(
+ ('ldflv', 'ld', 'sdflv', 'sd', 'hdflv', 'hd', 'src'))
+
+ formats = [{
+ 'url': mobj.group('url'),
+ 'format_id': mobj.group('id'),
+ 'quality': quality(mobj.group('id')),
+ } for mobj in re.finditer(
+ r'(?P<id>[a-zA-Z]+)Url\s*=\s*(["\'])(?P<url>(?:https?:)?//.+?)\2',
+ webpage)]
+ if not formats:
+ info = self._download_json(
+ 'https://www.pearvideo.com/videoStatus.jsp', video_id=video_id,
+ query={'contId': video_id}, headers={'Referer': url})
+ formats = [{
+ 'format_id': k,
+ 'url': v.replace(info['systemTime'], f'cont-{video_id}') if k == 'srcUrl' else v
+ } for k, v in traverse_obj(info, ('videoInfo', 'videos'), default={}).items() if v]
+
+ title = self._search_regex(
+ (r'<h1[^>]+\bclass=(["\'])video-tt\1[^>]*>(?P<value>[^<]+)',
+ r'<[^>]+\bdata-title=(["\'])(?P<value>(?:(?!\1).)+)\1'),
+ webpage, 'title', group='value')
+ description = self._search_regex(
+ (r'<div[^>]+\bclass=(["\'])summary\1[^>]*>(?P<value>[^<]+)',
+ r'<[^>]+\bdata-summary=(["\'])(?P<value>(?:(?!\1).)+)\1'),
+ webpage, 'description', default=None,
+ group='value') or self._html_search_meta('Description', webpage)
+ timestamp = unified_timestamp(self._search_regex(
+ r'<div[^>]+\bclass=["\']date["\'][^>]*>([^<]+)',
+ webpage, 'timestamp', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'timestamp': timestamp,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/peekvids.py b/yt_dlp/extractor/peekvids.py
new file mode 100644
index 0000000..939c26d
--- /dev/null
+++ b/yt_dlp/extractor/peekvids.py
@@ -0,0 +1,188 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ get_element_by_class,
+ int_or_none,
+ merge_dicts,
+ url_or_none,
+)
+
+
+class PeekVidsBaseIE(InfoExtractor):
+ def _real_extract(self, url):
+ domain, video_id = self._match_valid_url(url).group('domain', 'id')
+ webpage = self._download_webpage(url, video_id, expected_status=429)
+ if '>Rate Limit Exceeded' in webpage:
+ raise ExtractorError(
+ f'You are suspected as a bot. Wait, or pass the captcha on the site and provide cookies. {self._login_hint()}',
+ video_id=video_id, expected=True)
+
+ title = self._html_search_regex(r'(?s)<h1\b[^>]*>(.+?)</h1>', webpage, 'title')
+
+ display_id = video_id
+ video_id = self._search_regex(r'(?s)<video\b[^>]+\bdata-id\s*=\s*["\']?([\w-]+)', webpage, 'short video ID')
+ srcs = self._download_json(
+ f'https://www.{domain}/v-alt/{video_id}', video_id,
+ note='Downloading list of source files')
+
+ formats = []
+ for k, v in srcs.items():
+ f_url = url_or_none(v)
+ if not f_url:
+ continue
+
+ height = self._search_regex(r'^data-src(\d{3,})$', k, 'height', default=None)
+ if not height:
+ continue
+
+ formats.append({
+ 'url': f_url,
+ 'format_id': height,
+ 'height': int_or_none(height),
+ })
+
+ if not formats:
+ formats = [{'url': url} for url in srcs.values()]
+
+ info = self._search_json_ld(webpage, video_id, expected_type='VideoObject', default={})
+ info.pop('url', None)
+
+ # may not have found the thumbnail if it was in a list in the ld+json
+ info.setdefault('thumbnail', self._og_search_thumbnail(webpage))
+ detail = (get_element_by_class('detail-video-block', webpage)
+ or get_element_by_class('detail-block', webpage) or '')
+ info['description'] = self._html_search_regex(
+ rf'(?s)(.+?)(?:{re.escape(info.get("description", ""))}\s*<|<ul\b)',
+ detail, 'description', default=None) or None
+ info['title'] = re.sub(r'\s*[,-][^,-]+$', '', info.get('title') or title) or self._generic_title(url)
+
+ def cat_tags(name, html):
+ l = self._html_search_regex(
+ rf'(?s)<span\b[^>]*>\s*{re.escape(name)}\s*:\s*</span>(.+?)</li>',
+ html, name, default='')
+ return list(filter(None, re.split(r'\s+', l)))
+
+ return merge_dicts({
+ 'id': video_id,
+ 'display_id': display_id,
+ 'age_limit': 18,
+ 'formats': formats,
+ 'categories': cat_tags('Categories', detail),
+ 'tags': cat_tags('Tags', detail),
+ 'uploader': self._html_search_regex(r'[Uu]ploaded\s+by\s(.+?)"', webpage, 'uploader', default=None),
+ }, info)
+
+
+class PeekVidsIE(PeekVidsBaseIE):
+ _VALID_URL = r'''(?x)
+ https?://(?:www\.)?(?P<domain>peekvids\.com)/
+ (?:(?:[^/?#]+/){2}|embed/?\?(?:[^#]*&)?v=)
+ (?P<id>[^/?&#]*)
+ '''
+ _TESTS = [{
+ 'url': 'https://peekvids.com/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp/BSyLMbN0YCd',
+ 'md5': '2ff6a357a9717dc9dc9894b51307e9a2',
+ 'info_dict': {
+ 'id': '1262717',
+ 'display_id': 'BSyLMbN0YCd',
+ 'title': ' Dane Jones - Cute redhead with perfect tits with Mini Vamp',
+ 'ext': 'mp4',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'md5:0a61df3620de26c0af8963b1a730cd69',
+ 'timestamp': 1642579329,
+ 'upload_date': '20220119',
+ 'duration': 416,
+ 'view_count': int,
+ 'age_limit': 18,
+ 'uploader': 'SEXYhub.com',
+ 'categories': list,
+ 'tags': list,
+ },
+ }]
+
+
+class PlayVidsIE(PeekVidsBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?(?P<domain>playvids\.com)/(?:embed/|\w\w?/)?(?P<id>[^/?#]*)'
+ _TESTS = [{
+ 'url': 'https://www.playvids.com/U3pBrYhsjXM/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp',
+ 'md5': '2f12e50213dd65f142175da633c4564c',
+ 'info_dict': {
+ 'id': '1978030',
+ 'display_id': 'U3pBrYhsjXM',
+ 'title': ' Dane Jones - Cute redhead with perfect tits with Mini Vamp',
+ 'ext': 'mp4',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'md5:0a61df3620de26c0af8963b1a730cd69',
+ 'timestamp': 1640435839,
+ 'upload_date': '20211225',
+ 'duration': 416,
+ 'view_count': int,
+ 'age_limit': 18,
+ 'uploader': 'SEXYhub.com',
+ 'categories': list,
+ 'tags': list,
+ },
+ }, {
+ 'url': 'https://www.playvids.com/es/U3pBrYhsjXM/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.playvids.com/embed/U3pBrYhsjXM',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.playvids.com/bKmGLe3IwjZ/sv/brazzers-800-phone-sex-madison-ivy-always-on-the-line',
+ 'md5': 'e783986e596cafbf46411a174ab42ba6',
+ 'info_dict': {
+ 'id': '762385',
+ 'display_id': 'bKmGLe3IwjZ',
+ 'ext': 'mp4',
+ 'title': 'Brazzers - 1 800 Phone Sex: Madison Ivy Always On The Line 6',
+ 'description': 'md5:bdcd2db2b8ad85831a491d7c8605dcef',
+ 'timestamp': 1516958544,
+ 'upload_date': '20180126',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 480,
+ 'uploader': 'Brazzers',
+ 'age_limit': 18,
+ 'view_count': int,
+ 'categories': list,
+ 'tags': list,
+ },
+ }, {
+ 'url': 'https://www.playvids.com/v/47iUho33toY',
+ 'md5': 'b056b5049d34b648c1e86497cf4febce',
+ 'info_dict': {
+ 'id': '700621',
+ 'display_id': '47iUho33toY',
+ 'ext': 'mp4',
+ 'title': 'KATEE OWEN STRIPTIASE IN SEXY RED LINGERIE',
+ 'timestamp': 1507052209,
+ 'upload_date': '20171003',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 332,
+ 'uploader': 'Cacerenele',
+ 'age_limit': 18,
+ 'view_count': int,
+ 'categories': list,
+ 'tags': list,
+ },
+ }, {
+ 'url': 'https://www.playvids.com/z3_7iwWCmqt/sexy-teen-filipina-striptease-beautiful-pinay-bargirl-strips-and-dances',
+ 'md5': 'efa09be9f031314b7b7e3bc6510cd0df',
+ 'info_dict': {
+ 'id': '1523518',
+ 'display_id': 'z3_7iwWCmqt',
+ 'ext': 'mp4',
+ 'title': 'SEXY TEEN FILIPINA STRIPTEASE - Beautiful Pinay Bargirl Strips and Dances',
+ 'timestamp': 1607470323,
+ 'upload_date': '20201208',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 593,
+ 'uploader': 'yorours',
+ 'age_limit': 18,
+ 'view_count': int,
+ 'categories': list,
+ 'tags': list,
+ },
+ }]
diff --git a/yt_dlp/extractor/peertube.py b/yt_dlp/extractor/peertube.py
new file mode 100644
index 0000000..730b239
--- /dev/null
+++ b/yt_dlp/extractor/peertube.py
@@ -0,0 +1,1647 @@
+import functools
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ format_field,
+ int_or_none,
+ parse_resolution,
+ str_or_none,
+ try_get,
+ unified_timestamp,
+ url_or_none,
+ urljoin,
+ OnDemandPagedList,
+)
+
+
+class PeerTubeIE(InfoExtractor):
+ _INSTANCES_RE = r'''(?:
+ # Taken from https://instances.joinpeertube.org/instances
+ 0ch\.tv|
+ 3dctube\.3dcandy\.social|
+ all\.electric\.kitchen|
+ alterscope\.fr|
+ anarchy\.tube|
+ apathy\.tv|
+ apertatube\.net|
+ archive\.nocopyrightintended\.tv|
+ archive\.reclaim\.tv|
+ area51\.media|
+ astrotube-ufe\.obspm\.fr|
+ astrotube\.obspm\.fr|
+ audio\.freediverse\.com|
+ azxtube\.youssefc\.tn|
+ bark\.video|
+ battlepenguin\.video|
+ bava\.tv|
+ bee-tube\.fr|
+ beetoons\.tv|
+ biblion\.refchat\.net|
+ biblioteca\.theowlclub\.net|
+ bideoak\.argia\.eus|
+ bideoteka\.eus|
+ birdtu\.be|
+ bitcointv\.com|
+ bonn\.video|
+ breeze\.tube|
+ brioco\.live|
+ brocosoup\.fr|
+ canal\.facil\.services|
+ canard\.tube|
+ cdn01\.tilvids\.com|
+ celluloid-media\.huma-num\.fr|
+ chicago1\.peertube\.support|
+ cliptube\.org|
+ cloudtube\.ise\.fraunhofer\.de|
+ comf\.tube|
+ comics\.peertube\.biz|
+ commons\.tube|
+ communitymedia\.video|
+ conspiracydistillery\.com|
+ crank\.recoil\.org|
+ dalek\.zone|
+ dalliance\.network|
+ dangly\.parts|
+ darkvapor\.nohost\.me|
+ daschauher\.aksel\.rocks|
+ digitalcourage\.video|
+ displayeurope\.video|
+ ds106\.tv|
+ dud-video\.inf\.tu-dresden\.de|
+ dud175\.inf\.tu-dresden\.de|
+ dytube\.com|
+ ebildungslabor\.video|
+ evangelisch\.video|
+ fair\.tube|
+ fedi\.video|
+ fedimovie\.com|
+ fediverse\.tv|
+ film\.k-prod\.fr|
+ flipboard\.video|
+ foss\.video|
+ fossfarmers\.company|
+ fotogramas\.politicaconciencia\.org|
+ freediverse\.com|
+ freesoto-u2151\.vm\.elestio\.app|
+ freesoto\.tv|
+ garr\.tv|
+ greatview\.video|
+ grypstube\.uni-greifswald\.de|
+ habratube\.site|
+ ilbjach\.ru|
+ infothema\.net|
+ itvplus\.iiens\.net|
+ johnydeep\.net|
+ juggling\.digital|
+ jupiter\.tube|
+ kadras\.live|
+ kino\.kompot\.si|
+ kino\.schuerz\.at|
+ kinowolnosc\.pl|
+ kirche\.peertube-host\.de|
+ kiwi\.froggirl\.club|
+ kodcast\.com|
+ kolektiva\.media|
+ kpop\.22x22\.ru|
+ kumi\.tube|
+ la2\.peertube\.support|
+ la3\.peertube\.support|
+ la4\.peertube\.support|
+ lastbreach\.tv|
+ lawsplaining\.peertube\.biz|
+ leopard\.tube|
+ live\.codinglab\.ch|
+ live\.libratoi\.org|
+ live\.oldskool\.fi|
+ live\.solari\.com|
+ lucarne\.balsamine\.be|
+ luxtube\.lu|
+ makertube\.net|
+ media\.econoalchemist\.com|
+ media\.exo\.cat|
+ media\.fsfe\.org|
+ media\.gzevd\.de|
+ media\.interior\.edu\.uy|
+ media\.krashboyz\.org|
+ media\.mzhd\.de|
+ media\.smz-ma\.de|
+ media\.theplattform\.net|
+ media\.undeadnetwork\.de|
+ medias\.debrouillonet\.org|
+ medias\.pingbase\.net|
+ mediatube\.fermalo\.fr|
+ melsungen\.peertube-host\.de|
+ merci-la-police\.fr|
+ mindlyvideos\.com|
+ mirror\.peertube\.metalbanana\.net|
+ mirrored\.rocks|
+ mix\.video|
+ mountaintown\.video|
+ movies\.metricsmaster\.eu|
+ mtube\.mooo\.com|
+ mytube\.kn-cloud\.de|
+ mytube\.le5emeaxe\.fr|
+ mytube\.madzel\.de|
+ nadajemy\.com|
+ nanawel-peertube\.dyndns\.org|
+ neat\.tube|
+ nethack\.tv|
+ nicecrew\.tv|
+ nightshift\.minnix\.dev|
+ nolog\.media|
+ nyltube\.nylarea\.com|
+ ocfedtest\.hosted\.spacebear\.ee|
+ openmedia\.edunova\.it|
+ p2ptv\.ru|
+ p\.eertu\.be|
+ p\.lu|
+ pastafriday\.club|
+ patriottube\.sonsofliberty\.red|
+ pcbu\.nl|
+ peer\.azurs\.fr|
+ peer\.d0g4\.me|
+ peer\.lukeog\.com|
+ peer\.madiator\.cloud|
+ peer\.raise-uav\.com|
+ peershare\.togart\.de|
+ peertube-blablalinux\.be|
+ peertube-demo\.learning-hub\.fr|
+ peertube-docker\.cpy\.re|
+ peertube-eu\.howlround\.com|
+ peertube-u5014\.vm\.elestio\.app|
+ peertube-us\.howlround\.com|
+ peertube\.020\.pl|
+ peertube\.0x5e\.eu|
+ peertube\.1984\.cz|
+ peertube\.2i2l\.net|
+ peertube\.adjutor\.xyz|
+ peertube\.adresse\.data\.gouv\.fr|
+ peertube\.alpharius\.io|
+ peertube\.am-networks\.fr|
+ peertube\.anduin\.net|
+ peertube\.anti-logic\.com|
+ peertube\.arch-linux\.cz|
+ peertube\.art3mis\.de|
+ peertube\.artsrn\.ualberta\.ca|
+ peertube\.askan\.info|
+ peertube\.astral0pitek\.synology\.me|
+ peertube\.atsuchan\.page|
+ peertube\.automat\.click|
+ peertube\.b38\.rural-it\.org|
+ peertube\.be|
+ peertube\.beeldengeluid\.nl|
+ peertube\.bgzashtita\.es|
+ peertube\.bike|
+ peertube\.bildung-ekhn\.de|
+ peertube\.biz|
+ peertube\.br0\.fr|
+ peertube\.bridaahost\.ynh\.fr|
+ peertube\.bubbletea\.dev|
+ peertube\.bubuit\.net|
+ peertube\.cabaal\.net|
+ peertube\.chatinbit\.com|
+ peertube\.chaunchy\.com|
+ peertube\.chir\.rs|
+ peertube\.christianpacaud\.com|
+ peertube\.chtisurel\.net|
+ peertube\.chuggybumba\.com|
+ peertube\.cipherbliss\.com|
+ peertube\.cirkau\.art|
+ peertube\.cloud\.nerdraum\.de|
+ peertube\.cloud\.sans\.pub|
+ peertube\.coko\.foundation|
+ peertube\.communecter\.org|
+ peertube\.concordia\.social|
+ peertube\.corrigan\.xyz|
+ peertube\.cpge-brizeux\.fr|
+ peertube\.ctseuro\.com|
+ peertube\.cuatrolibertades\.org|
+ peertube\.cube4fun\.net|
+ peertube\.dair-institute\.org|
+ peertube\.davigge\.com|
+ peertube\.dc\.pini\.fr|
+ peertube\.deadtom\.me|
+ peertube\.debian\.social|
+ peertube\.delta0189\.xyz|
+ peertube\.demonix\.fr|
+ peertube\.designersethiques\.org|
+ peertube\.desmu\.fr|
+ peertube\.devol\.it|
+ peertube\.dk|
+ peertube\.doesstuff\.social|
+ peertube\.eb8\.org|
+ peertube\.education-forum\.com|
+ peertube\.elforcer\.ru|
+ peertube\.em\.id\.lv|
+ peertube\.ethibox\.fr|
+ peertube\.eu\.org|
+ peertube\.european-pirates\.eu|
+ peertube\.eus|
+ peertube\.euskarabildua\.eus|
+ peertube\.expi\.studio|
+ peertube\.familie-berner\.de|
+ peertube\.familleboisteau\.fr|
+ peertube\.fedihost\.website|
+ peertube\.fenarinarsa\.com|
+ peertube\.festnoz\.de|
+ peertube\.forteza\.fr|
+ peertube\.freestorm\.online|
+ peertube\.functional\.cafe|
+ peertube\.gaminglinux\.fr|
+ peertube\.gargantia\.fr|
+ peertube\.geekgalaxy\.fr|
+ peertube\.gemlog\.ca|
+ peertube\.genma\.fr|
+ peertube\.get-racing\.de|
+ peertube\.ghis94\.ovh|
+ peertube\.gidikroon\.eu|
+ peertube\.giftedmc\.com|
+ peertube\.grosist\.fr|
+ peertube\.gruntwerk\.org|
+ peertube\.gsugambit\.com|
+ peertube\.hackerfoo\.com|
+ peertube\.hellsite\.net|
+ peertube\.helvetet\.eu|
+ peertube\.histoirescrepues\.fr|
+ peertube\.home\.x0r\.fr|
+ peertube\.hyperfreedom\.org|
+ peertube\.ichigo\.everydayimshuflin\.com|
+ peertube\.ifwo\.eu|
+ peertube\.in\.ua|
+ peertube\.inapurna\.org|
+ peertube\.informaction\.info|
+ peertube\.interhop\.org|
+ peertube\.it|
+ peertube\.it-arts\.net|
+ peertube\.jensdiemer\.de|
+ peertube\.johntheserg\.al|
+ peertube\.kaleidos\.net|
+ peertube\.kalua\.im|
+ peertube\.kcore\.org|
+ peertube\.keazilla\.net|
+ peertube\.klaewyss\.fr|
+ peertube\.kleph\.eu|
+ peertube\.kodein\.be|
+ peertube\.kooperatywa\.tech|
+ peertube\.kriom\.net|
+ peertube\.kx\.studio|
+ peertube\.kyriog\.eu|
+ peertube\.la-famille-muller\.fr|
+ peertube\.labeuropereunion\.eu|
+ peertube\.lagvoid\.com|
+ peertube\.lhc\.net\.br|
+ peertube\.libresolutions\.network|
+ peertube\.libretic\.fr|
+ peertube\.librosphere\.fr|
+ peertube\.logilab\.fr|
+ peertube\.lon\.tv|
+ peertube\.louisematic\.site|
+ peertube\.luckow\.org|
+ peertube\.luga\.at|
+ peertube\.lyceeconnecte\.fr|
+ peertube\.madixam\.xyz|
+ peertube\.magicstone\.dev|
+ peertube\.marienschule\.de|
+ peertube\.marud\.fr|
+ peertube\.maxweiss\.io|
+ peertube\.miguelcr\.me|
+ peertube\.mikemestnik\.net|
+ peertube\.mobilsicher\.de|
+ peertube\.monlycee\.net|
+ peertube\.mxinfo\.fr|
+ peertube\.naln1\.ca|
+ peertube\.netzbegruenung\.de|
+ peertube\.nicolastissot\.fr|
+ peertube\.nogafam\.fr|
+ peertube\.normalgamingcommunity\.cz|
+ peertube\.nz|
+ peertube\.offerman\.com|
+ peertube\.ohioskates\.com|
+ peertube\.onionstorm\.net|
+ peertube\.opencloud\.lu|
+ peertube\.otakufarms\.com|
+ peertube\.paladyn\.org|
+ peertube\.pix-n-chill\.fr|
+ peertube\.r2\.enst\.fr|
+ peertube\.r5c3\.fr|
+ peertube\.redpill-insight\.com|
+ peertube\.researchinstitute\.at|
+ peertube\.revelin\.fr|
+ peertube\.rlp\.schule|
+ peertube\.rokugan\.fr|
+ peertube\.rougevertbleu\.tv|
+ peertube\.roundpond\.net|
+ peertube\.rural-it\.org|
+ peertube\.satoshishop\.de|
+ peertube\.scyldings\.com|
+ peertube\.securitymadein\.lu|
+ peertube\.semperpax\.com|
+ peertube\.semweb\.pro|
+ peertube\.sensin\.eu|
+ peertube\.sidh\.bzh|
+ peertube\.skorpil\.cz|
+ peertube\.smertrios\.com|
+ peertube\.sqweeb\.net|
+ peertube\.stattzeitung\.org|
+ peertube\.stream|
+ peertube\.su|
+ peertube\.swrs\.net|
+ peertube\.takeko\.cyou|
+ peertube\.taxinachtegel\.de|
+ peertube\.teftera\.com|
+ peertube\.teutronic-services\.de|
+ peertube\.ti-fr\.com|
+ peertube\.tiennot\.net|
+ peertube\.tmp\.rcp\.tf|
+ peertube\.tspu\.edu\.ru|
+ peertube\.tv|
+ peertube\.tweb\.tv|
+ peertube\.underworld\.fr|
+ peertube\.vapronva\.pw|
+ peertube\.veen\.world|
+ peertube\.vesdia\.eu|
+ peertube\.virtual-assembly\.org|
+ peertube\.viviers-fibre\.net|
+ peertube\.vlaki\.cz|
+ peertube\.wiesbaden\.social|
+ peertube\.wivodaim\.net|
+ peertube\.wtf|
+ peertube\.wtfayla\.net|
+ peertube\.xrcb\.cat|
+ peertube\.xwiki\.com|
+ peertube\.zd\.do|
+ peertube\.zetamc\.net|
+ peertube\.zmuuf\.org|
+ peertube\.zoz-serv\.org|
+ peertube\.zwindler\.fr|
+ peervideo\.ru|
+ periscope\.numenaute\.org|
+ pete\.warpnine\.de|
+ petitlutinartube\.fr|
+ phijkchu\.com|
+ phoenixproject\.group|
+ piraten\.space|
+ pirtube\.calut\.fr|
+ pityu\.flaki\.hu|
+ play\.mittdata\.se|
+ player\.ojamajo\.moe|
+ podlibre\.video|
+ portal\.digilab\.nfa\.cz|
+ private\.fedimovie\.com|
+ pt01\.lehrerfortbildung-bw\.de|
+ pt\.diaspodon\.fr|
+ pt\.freedomwolf\.cc|
+ pt\.gordons\.gen\.nz|
+ pt\.ilyamikcoder\.com|
+ pt\.irnok\.net|
+ pt\.mezzo\.moe|
+ pt\.na4\.eu|
+ pt\.netcraft\.ch|
+ pt\.rwx\.ch|
+ pt\.sfunk1x\.com|
+ pt\.thishorsie\.rocks|
+ pt\.vern\.cc|
+ ptb\.lunarviews\.net|
+ ptube\.de|
+ ptube\.ranranhome\.info|
+ puffy\.tube|
+ puppet\.zone|
+ qtube\.qlyoung\.net|
+ quantube\.win|
+ rankett\.net|
+ replay\.jres\.org|
+ review\.peertube\.biz|
+ sdmtube\.fr|
+ secure\.direct-live\.net|
+ secure\.scanovid\.com|
+ seka\.pona\.la|
+ serv3\.wiki-tube\.de|
+ skeptube\.fr|
+ social\.fedimovie\.com|
+ socpeertube\.ru|
+ sovran\.video|
+ special\.videovortex\.tv|
+ spectra\.video|
+ stl1988\.peertube-host\.de|
+ stream\.biovisata\.lt|
+ stream\.conesphere\.cloud|
+ stream\.elven\.pw|
+ stream\.jurnalfm\.md|
+ stream\.k-prod\.fr|
+ stream\.litera\.tools|
+ stream\.nuemedia\.se|
+ stream\.rlp-media\.de|
+ stream\.vrse\.be|
+ studios\.racer159\.com|
+ styxhexenhammer666\.com|
+ syrteplay\.obspm\.fr|
+ t\.0x0\.st|
+ tbh\.co-shaoghal\.net|
+ test-fab\.ynh\.fr|
+ testube\.distrilab\.fr|
+ tgi\.hosted\.spacebear\.ee|
+ theater\.ethernia\.net|
+ thecool\.tube|
+ thevideoverse\.com|
+ tilvids\.com|
+ tinkerbetter\.tube|
+ tinsley\.video|
+ trailers\.ddigest\.com|
+ tube-action-educative\.apps\.education\.fr|
+ tube-arts-lettres-sciences-humaines\.apps\.education\.fr|
+ tube-cycle-2\.apps\.education\.fr|
+ tube-cycle-3\.apps\.education\.fr|
+ tube-education-physique-et-sportive\.apps\.education\.fr|
+ tube-enseignement-professionnel\.apps\.education\.fr|
+ tube-institutionnel\.apps\.education\.fr|
+ tube-langues-vivantes\.apps\.education\.fr|
+ tube-maternelle\.apps\.education\.fr|
+ tube-numerique-educatif\.apps\.education\.fr|
+ tube-sciences-technologies\.apps\.education\.fr|
+ tube-test\.apps\.education\.fr|
+ tube1\.perron-service\.de|
+ tube\.9minuti\.it|
+ tube\.abolivier\.bzh|
+ tube\.alado\.space|
+ tube\.amic37\.fr|
+ tube\.area404\.cloud|
+ tube\.arthack\.nz|
+ tube\.asulia\.fr|
+ tube\.awkward\.company|
+ tube\.azbyka\.ru|
+ tube\.azkware\.net|
+ tube\.bartrip\.me\.uk|
+ tube\.belowtoxic\.media|
+ tube\.bingle\.plus|
+ tube\.bit-friends\.de|
+ tube\.bstly\.de|
+ tube\.chosto\.me|
+ tube\.cms\.garden|
+ tube\.communia\.org|
+ tube\.cyberia\.club|
+ tube\.cybershock\.life|
+ tube\.dembased\.xyz|
+ tube\.dev\.displ\.eu|
+ tube\.digitalesozialearbeit\.de|
+ tube\.distrilab\.fr|
+ tube\.doortofreedom\.org|
+ tube\.dsocialize\.net|
+ tube\.e-jeremy\.com|
+ tube\.ebin\.club|
+ tube\.elemac\.fr|
+ tube\.erzbistum-hamburg\.de|
+ tube\.exozy\.me|
+ tube\.fdn\.fr|
+ tube\.fedi\.quebec|
+ tube\.fediverse\.at|
+ tube\.felinn\.org|
+ tube\.flokinet\.is|
+ tube\.foad\.me\.uk|
+ tube\.freepeople\.fr|
+ tube\.friloux\.me|
+ tube\.froth\.zone|
+ tube\.fulda\.social|
+ tube\.futuretic\.fr|
+ tube\.g1zm0\.de|
+ tube\.g4rf\.net|
+ tube\.gaiac\.io|
+ tube\.geekyboo\.net|
+ tube\.genb\.de|
+ tube\.ghk-academy\.info|
+ tube\.gi-it\.de|
+ tube\.grap\.coop|
+ tube\.graz\.social|
+ tube\.grin\.hu|
+ tube\.hokai\.lol|
+ tube\.int5\.net|
+ tube\.interhacker\.space|
+ tube\.invisible\.ch|
+ tube\.io18\.top|
+ tube\.itsg\.host|
+ tube\.jeena\.net|
+ tube\.kh-berlin\.de|
+ tube\.kockatoo\.org|
+ tube\.kotur\.org|
+ tube\.koweb\.fr|
+ tube\.la-dina\.net|
+ tube\.lab\.nrw|
+ tube\.lacaveatonton\.ovh|
+ tube\.laurent-malys\.fr|
+ tube\.leetdreams\.ch|
+ tube\.linkse\.media|
+ tube\.lokad\.com|
+ tube\.lucie-philou\.com|
+ tube\.media-techport\.de|
+ tube\.morozoff\.pro|
+ tube\.neshweb\.net|
+ tube\.nestor\.coop|
+ tube\.network\.europa\.eu|
+ tube\.nicfab\.eu|
+ tube\.nieuwwestbrabant\.nl|
+ tube\.nogafa\.org|
+ tube\.novg\.net|
+ tube\.nox-rhea\.org|
+ tube\.nuagelibre\.fr|
+ tube\.numerique\.gouv\.fr|
+ tube\.nuxnik\.com|
+ tube\.nx12\.net|
+ tube\.octaplex\.net|
+ tube\.oisux\.org|
+ tube\.okcinfo\.news|
+ tube\.onlinekirche\.net|
+ tube\.opportunis\.me|
+ tube\.oraclefilms\.com|
+ tube\.org\.il|
+ tube\.pacapime\.ovh|
+ tube\.parinux\.org|
+ tube\.pastwind\.top|
+ tube\.picasoft\.net|
+ tube\.pilgerweg-21\.de|
+ tube\.pmj\.rocks|
+ tube\.pol\.social|
+ tube\.ponsonaille\.fr|
+ tube\.portes-imaginaire\.org|
+ tube\.public\.apolut\.net|
+ tube\.pustule\.org|
+ tube\.pyngu\.com|
+ tube\.querdenken-711\.de|
+ tube\.rebellion\.global|
+ tube\.reseau-canope\.fr|
+ tube\.rhythms-of-resistance\.org|
+ tube\.risedsky\.ovh|
+ tube\.rooty\.fr|
+ tube\.rsi\.cnr\.it|
+ tube\.ryne\.moe|
+ tube\.schleuss\.online|
+ tube\.schule\.social|
+ tube\.sekretaerbaer\.net|
+ tube\.shanti\.cafe|
+ tube\.shela\.nu|
+ tube\.skrep\.in|
+ tube\.sleeping\.town|
+ tube\.sp-codes\.de|
+ tube\.spdns\.org|
+ tube\.systerserver\.net|
+ tube\.systest\.eu|
+ tube\.tappret\.fr|
+ tube\.techeasy\.org|
+ tube\.thierrytalbert\.fr|
+ tube\.tinfoil-hat\.net|
+ tube\.toldi\.eu|
+ tube\.tpshd\.de|
+ tube\.trax\.im|
+ tube\.troopers\.agency|
+ tube\.ttk\.is|
+ tube\.tuxfriend\.fr|
+ tube\.tylerdavis\.xyz|
+ tube\.ullihome\.de|
+ tube\.ulne\.be|
+ tube\.undernet\.uy|
+ tube\.vrpnet\.org|
+ tube\.wolfe\.casa|
+ tube\.xd0\.de|
+ tube\.xn--baw-joa\.social|
+ tube\.xy-space\.de|
+ tube\.yapbreak\.fr|
+ tubedu\.org|
+ tubulus\.openlatin\.org|
+ turtleisland\.video|
+ tututu\.tube|
+ tv\.adast\.dk|
+ tv\.adn\.life|
+ tv\.arns\.lt|
+ tv\.atmx\.ca|
+ tv\.based\.quest|
+ tv\.farewellutopia\.com|
+ tv\.filmfreedom\.net|
+ tv\.gravitons\.org|
+ tv\.io\.seg\.br|
+ tv\.lumbung\.space|
+ tv\.pirateradio\.social|
+ tv\.pirati\.cz|
+ tv\.santic-zombie\.ru|
+ tv\.undersco\.re|
+ tv\.zonepl\.net|
+ tvox\.ru|
+ twctube\.twc-zone\.eu|
+ twobeek\.com|
+ urbanists\.video|
+ v\.9tail\.net|
+ v\.basspistol\.org|
+ v\.j4\.lc|
+ v\.kisombrella\.top|
+ v\.koa\.im|
+ v\.kyaru\.xyz|
+ v\.lor\.sh|
+ v\.mkp\.ca|
+ v\.posm\.gay|
+ v\.slaycer\.top|
+ veedeo\.org|
+ vhs\.absturztau\.be|
+ vid\.cthos\.dev|
+ vid\.kinuseka\.us|
+ vid\.mkp\.ca|
+ vid\.nocogabriel\.fr|
+ vid\.norbipeti\.eu|
+ vid\.northbound\.online|
+ vid\.ohboii\.de|
+ vid\.plantplotting\.co\.uk|
+ vid\.pretok\.tv|
+ vid\.prometheus\.systems|
+ vid\.soafen\.love|
+ vid\.twhtv\.club|
+ vid\.wildeboer\.net|
+ video-cave-v2\.de|
+ video-liberty\.com|
+ video\.076\.ne\.jp|
+ video\.1146\.nohost\.me|
+ video\.9wd\.eu|
+ video\.abraum\.de|
+ video\.ados\.accoord\.fr|
+ video\.amiga-ng\.org|
+ video\.anartist\.org|
+ video\.asgardius\.company|
+ video\.audiovisuel-participatif\.org|
+ video\.bards\.online|
+ video\.barkoczy\.social|
+ video\.benetou\.fr|
+ video\.beyondwatts\.social|
+ video\.bgeneric\.net|
+ video\.bilecik\.edu\.tr|
+ video\.blast-info\.fr|
+ video\.bmu\.cloud|
+ video\.catgirl\.biz|
+ video\.causa-arcana\.com|
+ video\.chasmcity\.net|
+ video\.chbmeyer\.de|
+ video\.cigliola\.com|
+ video\.citizen4\.eu|
+ video\.clumsy\.computer|
+ video\.cnnumerique\.fr|
+ video\.cnr\.it|
+ video\.cnt\.social|
+ video\.coales\.co|
+ video\.comune\.trento\.it|
+ video\.coyp\.us|
+ video\.csc49\.fr|
+ video\.davduf\.net|
+ video\.davejansen\.com|
+ video\.dlearning\.nl|
+ video\.dnfi\.no|
+ video\.dresden\.network|
+ video\.drgnz\.club|
+ video\.dudenas\.lt|
+ video\.eientei\.org|
+ video\.ellijaymakerspace\.org|
+ video\.emergeheart\.info|
+ video\.eradicatinglove\.xyz|
+ video\.everythingbagel\.me|
+ video\.extremelycorporate\.ca|
+ video\.fabiomanganiello\.com|
+ video\.fedi\.bzh|
+ video\.fhtagn\.org|
+ video\.firehawk-systems\.com|
+ video\.fox-romka\.ru|
+ video\.fuss\.bz\.it|
+ video\.glassbeadcollective\.org|
+ video\.graine-pdl\.org|
+ video\.gyt\.is|
+ video\.hainry\.fr|
+ video\.hardlimit\.com|
+ video\.hostux\.net|
+ video\.igem\.org|
+ video\.infojournal\.fr|
+ video\.internet-czas-dzialac\.pl|
+ video\.interru\.io|
+ video\.ipng\.ch|
+ video\.ironsysadmin\.com|
+ video\.islameye\.com|
+ video\.jacen\.moe|
+ video\.jadin\.me|
+ video\.jeffmcbride\.net|
+ video\.jigmedatse\.com|
+ video\.kuba-orlik\.name|
+ video\.lacalligramme\.fr|
+ video\.lanceurs-alerte\.fr|
+ video\.laotra\.red|
+ video\.lapineige\.fr|
+ video\.laraffinerie\.re|
+ video\.lavolte\.net|
+ video\.liberta\.vip|
+ video\.libreti\.net|
+ video\.licentia\.net|
+ video\.linc\.systems|
+ video\.linux\.it|
+ video\.linuxtrent\.it|
+ video\.liveitlive\.show|
+ video\.lono\.space|
+ video\.lrose\.de|
+ video\.lunago\.net|
+ video\.lundi\.am|
+ video\.lycee-experimental\.org|
+ video\.maechler\.cloud|
+ video\.marcorennmaus\.de|
+ video\.mass-trespass\.uk|
+ video\.matomocamp\.org|
+ video\.medienzentrum-harburg\.de|
+ video\.mentality\.rip|
+ video\.metaversum\.wtf|
+ video\.midreality\.com|
+ video\.mttv\.it|
+ video\.mugoreve\.fr|
+ video\.mxtthxw\.art|
+ video\.mycrowd\.ca|
+ video\.niboe\.info|
+ video\.nogafam\.es|
+ video\.nstr\.no|
+ video\.occm\.cc|
+ video\.off-investigation\.fr|
+ video\.olos311\.org|
+ video\.ordinobsolete\.fr|
+ video\.osvoj\.ru|
+ video\.ourcommon\.cloud|
+ video\.ozgurkon\.org|
+ video\.pcf\.fr|
+ video\.pcgaldo\.com|
+ video\.phyrone\.de|
+ video\.poul\.org|
+ video\.publicspaces\.net|
+ video\.pullopen\.xyz|
+ video\.r3s\.nrw|
+ video\.rainevixen\.com|
+ video\.resolutions\.it|
+ video\.retroedge\.tech|
+ video\.rhizome\.org|
+ video\.rlp-media\.de|
+ video\.rs-einrich\.de|
+ video\.rubdos\.be|
+ video\.sadmin\.io|
+ video\.sftblw\.moe|
+ video\.shitposter\.club|
+ video\.simplex-software\.ru|
+ video\.slipfox\.xyz|
+ video\.snug\.moe|
+ video\.software-fuer-engagierte\.de|
+ video\.soi\.ch|
+ video\.sonet\.ws|
+ video\.surazal\.net|
+ video\.taskcards\.eu|
+ video\.team-lcbs\.eu|
+ video\.techforgood\.social|
+ video\.telemillevaches\.net|
+ video\.thepolarbear\.co\.uk|
+ video\.thinkof\.name|
+ video\.tii\.space|
+ video\.tkz\.es|
+ video\.trankil\.info|
+ video\.triplea\.fr|
+ video\.tum\.social|
+ video\.turbo\.chat|
+ video\.uriopss-pdl\.fr|
+ video\.ustim\.ru|
+ video\.ut0pia\.org|
+ video\.vaku\.org\.ua|
+ video\.vegafjord\.me|
+ video\.veloma\.org|
+ video\.violoncello\.ch|
+ video\.voidconspiracy\.band|
+ video\.wakkeren\.nl|
+ video\.windfluechter\.org|
+ video\.ziez\.eu|
+ videos-passages\.huma-num\.fr|
+ videos\.aadtp\.be|
+ videos\.ahp-numerique\.fr|
+ videos\.alamaisondulibre\.org|
+ videos\.archigny\.net|
+ videos\.aroaduntraveled\.com|
+ videos\.b4tech\.org|
+ videos\.benjaminbrady\.ie|
+ videos\.bik\.opencloud\.lu|
+ videos\.cloudron\.io|
+ videos\.codingotaku\.com|
+ videos\.coletivos\.org|
+ videos\.collate\.social|
+ videos\.danksquad\.org|
+ videos\.digitaldragons\.eu|
+ videos\.dromeadhere\.fr|
+ videos\.explain-it\.org|
+ videos\.factsonthegroundshow\.com|
+ videos\.foilen\.com|
+ videos\.fsci\.in|
+ videos\.gamercast\.net|
+ videos\.gianmarco\.gg|
+ videos\.globenet\.org|
+ videos\.grafo\.zone|
+ videos\.hauspie\.fr|
+ videos\.hush\.is|
+ videos\.hyphalfusion\.network|
+ videos\.icum\.to|
+ videos\.im\.allmendenetz\.de|
+ videos\.jacksonchen666\.com|
+ videos\.john-livingston\.fr|
+ videos\.knazarov\.com|
+ videos\.kuoushi\.com|
+ videos\.laliguepaysdelaloire\.org|
+ videos\.lemouvementassociatif-pdl\.org|
+ videos\.leslionsfloorball\.fr|
+ videos\.librescrum\.org|
+ videos\.mastodont\.cat|
+ videos\.metus\.ca|
+ videos\.miolo\.org|
+ videos\.offroad\.town|
+ videos\.openmandriva\.org|
+ videos\.parleur\.net|
+ videos\.pcorp\.us|
+ videos\.pop\.eu\.com|
+ videos\.rampin\.org|
+ videos\.rauten\.co\.za|
+ videos\.ritimo\.org|
+ videos\.sarcasmstardust\.com|
+ videos\.scanlines\.xyz|
+ videos\.shmalls\.pw|
+ videos\.stadtfabrikanten\.org|
+ videos\.supertuxkart\.net|
+ videos\.testimonia\.org|
+ videos\.thinkerview\.com|
+ videos\.torrenezzi10\.xyz|
+ videos\.trom\.tf|
+ videos\.utsukta\.org|
+ videos\.viorsan\.com|
+ videos\.wherelinux\.xyz|
+ videos\.wikilibriste\.fr|
+ videos\.yesil\.club|
+ videos\.yeswiki\.net|
+ videotube\.duckdns\.org|
+ vids\.capypara\.de|
+ vids\.roshless\.me|
+ vids\.stary\.pc\.pl|
+ vids\.tekdmn\.me|
+ vidz\.julien\.ovh|
+ views\.southfox\.me|
+ virtual-girls-are\.definitely-for\.me|
+ viste\.pt|
+ vnchich\.com|
+ vnop\.org|
+ vod\.newellijay\.tv|
+ voluntarytube\.com|
+ vtr\.chikichiki\.tube|
+ vulgarisation-informatique\.fr|
+ watch\.easya\.solutions|
+ watch\.goodluckgabe\.life|
+ watch\.ignorance\.eu|
+ watch\.jimmydore\.com|
+ watch\.libertaria\.space|
+ watch\.nuked\.social|
+ watch\.ocaml\.org|
+ watch\.thelema\.social|
+ watch\.tubelab\.video|
+ web-fellow\.de|
+ webtv\.vandoeuvre\.net|
+ wetubevid\.online|
+ wikileaks\.video|
+ wiwi\.video|
+ wow\.such\.disappointment\.fail|
+ www\.jvideos\.net|
+ www\.kotikoff\.net|
+ www\.makertube\.net|
+ www\.mypeer\.tube|
+ www\.nadajemy\.com|
+ www\.neptube\.io|
+ www\.rocaguinarda\.tv|
+ www\.vnshow\.net|
+ xxivproduction\.video|
+ yt\.orokoro\.ru|
+ ytube\.retronerd\.at|
+ zumvideo\.de|
+
+ # from youtube-dl
+ peertube\.rainbowswingers\.net|
+ tube\.stanisic\.nl|
+ peer\.suiri\.us|
+ medias\.libox\.fr|
+ videomensoif\.ynh\.fr|
+ peertube\.travelpandas\.eu|
+ peertube\.rachetjay\.fr|
+ peertube\.montecsys\.fr|
+ tube\.eskuero\.me|
+ peer\.tube|
+ peertube\.umeahackerspace\.se|
+ tube\.nx-pod\.de|
+ video\.monsieurbidouille\.fr|
+ tube\.openalgeria\.org|
+ vid\.lelux\.fi|
+ video\.anormallostpod\.ovh|
+ tube\.crapaud-fou\.org|
+ peertube\.stemy\.me|
+ lostpod\.space|
+ exode\.me|
+ peertube\.snargol\.com|
+ vis\.ion\.ovh|
+ videosdulib\.re|
+ v\.mbius\.io|
+ videos\.judrey\.eu|
+ peertube\.osureplayviewer\.xyz|
+ peertube\.mathieufamily\.ovh|
+ www\.videos-libr\.es|
+ fightforinfo\.com|
+ peertube\.fediverse\.ru|
+ peertube\.oiseauroch\.fr|
+ video\.nesven\.eu|
+ v\.bearvideo\.win|
+ video\.qoto\.org|
+ justporn\.cc|
+ video\.vny\.fr|
+ peervideo\.club|
+ tube\.taker\.fr|
+ peertube\.chantierlibre\.org|
+ tube\.ipfixe\.info|
+ tube\.kicou\.info|
+ tube\.dodsorf\.as|
+ videobit\.cc|
+ video\.yukari\.moe|
+ videos\.elbinario\.net|
+ hkvideo\.live|
+ pt\.tux\.tf|
+ www\.hkvideo\.live|
+ FIGHTFORINFO\.com|
+ pt\.765racing\.com|
+ peertube\.gnumeria\.eu\.org|
+ nordenmedia\.com|
+ peertube\.co\.uk|
+ tube\.darfweb\.eu|
+ tube\.kalah-france\.org|
+ 0ch\.in|
+ vod\.mochi\.academy|
+ film\.node9\.org|
+ peertube\.hatthieves\.es|
+ video\.fitchfamily\.org|
+ peertube\.ddns\.net|
+ video\.ifuncle\.kr|
+ video\.fdlibre\.eu|
+ tube\.22decembre\.eu|
+ peertube\.harmoniescreatives\.com|
+ tube\.fabrigli\.fr|
+ video\.thedwyers\.co|
+ video\.bruitbruit\.com|
+ peertube\.foxfam\.club|
+ peer\.philoxweb\.be|
+ videos\.bugs\.social|
+ peertube\.malbert\.xyz|
+ peertube\.bilange\.ca|
+ libretube\.net|
+ diytelevision\.com|
+ peertube\.fedilab\.app|
+ libre\.video|
+ video\.mstddntfdn\.online|
+ us\.tv|
+ peertube\.sl-network\.fr|
+ peertube\.dynlinux\.io|
+ peertube\.david\.durieux\.family|
+ peertube\.linuxrocks\.online|
+ peerwatch\.xyz|
+ v\.kretschmann\.social|
+ tube\.otter\.sh|
+ yt\.is\.nota\.live|
+ tube\.dragonpsi\.xyz|
+ peertube\.boneheadmedia\.com|
+ videos\.funkwhale\.audio|
+ watch\.44con\.com|
+ peertube\.gcaillaut\.fr|
+ peertube\.icu|
+ pony\.tube|
+ spacepub\.space|
+ tube\.stbr\.io|
+ v\.mom-gay\.faith|
+ tube\.port0\.xyz|
+ peertube\.simounet\.net|
+ play\.jergefelt\.se|
+ peertube\.zeteo\.me|
+ tube\.danq\.me|
+ peertube\.kerenon\.com|
+ tube\.fab-l3\.org|
+ tube\.calculate\.social|
+ peertube\.mckillop\.org|
+ tube\.netzspielplatz\.de|
+ vod\.ksite\.de|
+ peertube\.laas\.fr|
+ tube\.govital\.net|
+ peertube\.stephenson\.cc|
+ bistule\.nohost\.me|
+ peertube\.kajalinifi\.de|
+ video\.ploud\.jp|
+ video\.omniatv\.com|
+ peertube\.ffs2play\.fr|
+ peertube\.leboulaire\.ovh|
+ peertube\.tronic-studio\.com|
+ peertube\.public\.cat|
+ peertube\.metalbanana\.net|
+ video\.1000i100\.fr|
+ peertube\.alter-nativ-voll\.de|
+ tube\.pasa\.tf|
+ tube\.worldofhauru\.xyz|
+ pt\.kamp\.site|
+ peertube\.teleassist\.fr|
+ videos\.mleduc\.xyz|
+ conf\.tube|
+ media\.privacyinternational\.org|
+ pt\.forty-two\.nl|
+ video\.halle-leaks\.de|
+ video\.grosskopfgames\.de|
+ peertube\.schaeferit\.de|
+ peertube\.jackbot\.fr|
+ tube\.extinctionrebellion\.fr|
+ peertube\.f-si\.org|
+ video\.subak\.ovh|
+ videos\.koweb\.fr|
+ peertube\.zergy\.net|
+ peertube\.roflcopter\.fr|
+ peertube\.floss-marketing-school\.com|
+ vloggers\.social|
+ peertube\.iriseden\.eu|
+ videos\.ubuntu-paris\.org|
+ peertube\.mastodon\.host|
+ armstube\.com|
+ peertube\.s2s\.video|
+ peertube\.lol|
+ tube\.open-plug\.eu|
+ open\.tube|
+ peertube\.ch|
+ peertube\.normandie-libre\.fr|
+ peertube\.slat\.org|
+ video\.lacaveatonton\.ovh|
+ peertube\.uno|
+ peertube\.servebeer\.com|
+ peertube\.fedi\.quebec|
+ tube\.h3z\.jp|
+ tube\.plus200\.com|
+ peertube\.eric\.ovh|
+ tube\.metadocs\.cc|
+ tube\.unmondemeilleur\.eu|
+ gouttedeau\.space|
+ video\.antirep\.net|
+ nrop\.cant\.at|
+ tube\.ksl-bmx\.de|
+ tube\.plaf\.fr|
+ tube\.tchncs\.de|
+ video\.devinberg\.com|
+ hitchtube\.fr|
+ peertube\.kosebamse\.com|
+ yunopeertube\.myddns\.me|
+ peertube\.varney\.fr|
+ peertube\.anon-kenkai\.com|
+ tube\.maiti\.info|
+ tubee\.fr|
+ videos\.dinofly\.com|
+ toobnix\.org|
+ videotape\.me|
+ voca\.tube|
+ video\.heromuster\.com|
+ video\.lemediatv\.fr|
+ video\.up\.edu\.ph|
+ balafon\.video|
+ video\.ivel\.fr|
+ thickrips\.cloud|
+ pt\.laurentkruger\.fr|
+ video\.monarch-pass\.net|
+ peertube\.artica\.center|
+ video\.alternanet\.fr|
+ indymotion\.fr|
+ fanvid\.stopthatimp\.net|
+ video\.farci\.org|
+ v\.lesterpig\.com|
+ video\.okaris\.de|
+ tube\.pawelko\.net|
+ peertube\.mablr\.org|
+ tube\.fede\.re|
+ pytu\.be|
+ evertron\.tv|
+ devtube\.dev-wiki\.de|
+ raptube\.antipub\.org|
+ video\.selea\.se|
+ peertube\.mygaia\.org|
+ video\.oh14\.de|
+ peertube\.livingutopia\.org|
+ peertube\.the-penguin\.de|
+ tube\.thechangebook\.org|
+ tube\.anjara\.eu|
+ pt\.pube\.tk|
+ video\.samedi\.pm|
+ mplayer\.demouliere\.eu|
+ widemus\.de|
+ peertube\.me|
+ peertube\.zapashcanon\.fr|
+ video\.latavernedejohnjohn\.fr|
+ peertube\.pcservice46\.fr|
+ peertube\.mazzonetto\.eu|
+ video\.irem\.univ-paris-diderot\.fr|
+ video\.livecchi\.cloud|
+ alttube\.fr|
+ video\.coop\.tools|
+ video\.cabane-libre\.org|
+ peertube\.openstreetmap\.fr|
+ videos\.alolise\.org|
+ irrsinn\.video|
+ video\.antopie\.org|
+ scitech\.video|
+ tube2\.nemsia\.org|
+ video\.amic37\.fr|
+ peertube\.freeforge\.eu|
+ video\.arbitrarion\.com|
+ video\.datsemultimedia\.com|
+ stoptrackingus\.tv|
+ peertube\.ricostrongxxx\.com|
+ docker\.videos\.lecygnenoir\.info|
+ peertube\.togart\.de|
+ tube\.postblue\.info|
+ videos\.domainepublic\.net|
+ peertube\.cyber-tribal\.com|
+ video\.gresille\.org|
+ peertube\.dsmouse\.net|
+ cinema\.yunohost\.support|
+ tube\.theocevaer\.fr|
+ repro\.video|
+ tube\.4aem\.com|
+ quaziinc\.com|
+ peertube\.metawurst\.space|
+ videos\.wakapo\.com|
+ video\.ploud\.fr|
+ video\.freeradical\.zone|
+ tube\.valinor\.fr|
+ refuznik\.video|
+ pt\.kircheneuenburg\.de|
+ peertube\.asrun\.eu|
+ peertube\.lagob\.fr|
+ videos\.side-ways\.net|
+ 91video\.online|
+ video\.valme\.io|
+ video\.taboulisme\.com|
+ videos-libr\.es|
+ tv\.mooh\.fr|
+ nuage\.acostey\.fr|
+ video\.monsieur-a\.fr|
+ peertube\.librelois\.fr|
+ videos\.pair2jeux\.tube|
+ videos\.pueseso\.club|
+ peer\.mathdacloud\.ovh|
+ media\.assassinate-you\.net|
+ vidcommons\.org|
+ ptube\.rousset\.nom\.fr|
+ tube\.cyano\.at|
+ videos\.squat\.net|
+ video\.iphodase\.fr|
+ peertube\.makotoworkshop\.org|
+ peertube\.serveur\.slv-valbonne\.fr|
+ vault\.mle\.party|
+ hostyour\.tv|
+ videos\.hack2g2\.fr|
+ libre\.tube|
+ pire\.artisanlogiciel\.net|
+ videos\.numerique-en-commun\.fr|
+ video\.netsyms\.com|
+ video\.die-partei\.social|
+ video\.writeas\.org|
+ peertube\.swarm\.solvingmaz\.es|
+ tube\.pericoloso\.ovh|
+ watching\.cypherpunk\.observer|
+ videos\.adhocmusic\.com|
+ tube\.rfc1149\.net|
+ peertube\.librelabucm\.org|
+ videos\.numericoop\.fr|
+ peertube\.koehn\.com|
+ peertube\.anarchmusicall\.net|
+ tube\.kampftoast\.de|
+ vid\.y-y\.li|
+ peertube\.xtenz\.xyz|
+ diode\.zone|
+ tube\.egf\.mn|
+ peertube\.nomagic\.uk|
+ visionon\.tv|
+ videos\.koumoul\.com|
+ video\.rastapuls\.com|
+ video\.mantlepro\.com|
+ video\.deadsuperhero\.com|
+ peertube\.musicstudio\.pro|
+ peertube\.we-keys\.fr|
+ artitube\.artifaille\.fr|
+ peertube\.ethernia\.net|
+ tube\.midov\.pl|
+ peertube\.fr|
+ watch\.snoot\.tube|
+ peertube\.donnadieu\.fr|
+ argos\.aquilenet\.fr|
+ tube\.nemsia\.org|
+ tube\.bruniau\.net|
+ videos\.darckoune\.moe|
+ tube\.traydent\.info|
+ dev\.videos\.lecygnenoir\.info|
+ peertube\.nayya\.org|
+ peertube\.live|
+ peertube\.mofgao\.space|
+ video\.lequerrec\.eu|
+ peertube\.amicale\.net|
+ aperi\.tube|
+ tube\.ac-lyon\.fr|
+ video\.lw1\.at|
+ www\.yiny\.org|
+ videos\.pofilo\.fr|
+ tube\.lou\.lt|
+ choob\.h\.etbus\.ch|
+ tube\.hoga\.fr|
+ peertube\.heberge\.fr|
+ video\.obermui\.de|
+ videos\.cloudfrancois\.fr|
+ betamax\.video|
+ video\.typica\.us|
+ tube\.piweb\.be|
+ video\.blender\.org|
+ peertube\.cat|
+ tube\.kdy\.ch|
+ pe\.ertu\.be|
+ peertube\.social|
+ videos\.lescommuns\.org|
+ tv\.datamol\.org|
+ videonaute\.fr|
+ dialup\.express|
+ peertube\.nogafa\.org|
+ megatube\.lilomoino\.fr|
+ peertube\.tamanoir\.foucry\.net|
+ peertube\.devosi\.org|
+ peertube\.1312\.media|
+ tube\.bootlicker\.party|
+ skeptikon\.fr|
+ video\.blueline\.mg|
+ tube\.homecomputing\.fr|
+ tube\.ouahpiti\.info|
+ video\.tedomum\.net|
+ video\.g3l\.org|
+ fontube\.fr|
+ peertube\.gaialabs\.ch|
+ tube\.kher\.nl|
+ peertube\.qtg\.fr|
+ video\.migennes\.net|
+ tube\.p2p\.legal|
+ troll\.tv|
+ videos\.iut-orsay\.fr|
+ peertube\.solidev\.net|
+ videos\.cemea\.org|
+ video\.passageenseine\.fr|
+ videos\.festivalparminous\.org|
+ peertube\.touhoppai\.moe|
+ sikke\.fi|
+ peer\.hostux\.social|
+ share\.tube|
+ peertube\.walkingmountains\.fr|
+ videos\.benpro\.fr|
+ peertube\.parleur\.net|
+ peertube\.heraut\.eu|
+ tube\.aquilenet\.fr|
+ peertube\.gegeweb\.eu|
+ framatube\.org|
+ thinkerview\.video|
+ tube\.conferences-gesticulees\.net|
+ peertube\.datagueule\.tv|
+ video\.lqdn\.fr|
+ tube\.mochi\.academy|
+ media\.zat\.im|
+ video\.colibris-outilslibres\.org|
+ tube\.svnet\.fr|
+ peertube\.video|
+ peertube2\.cpy\.re|
+ peertube3\.cpy\.re|
+ videos\.tcit\.fr|
+ peertube\.cpy\.re|
+ canard\.tube
+ )'''
+ _UUID_RE = r'[\da-zA-Z]{22}|[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
+ _API_BASE = 'https://%s/api/v1/videos/%s/%s'
+ _VALID_URL = r'''(?x)
+ (?:
+ peertube:(?P<host>[^:]+):|
+ https?://(?P<host_2>%s)/(?:videos/(?:watch|embed)|api/v\d/videos|w)/
+ )
+ (?P<id>%s)
+ ''' % (_INSTANCES_RE, _UUID_RE)
+ _EMBED_REGEX = [r'''(?x)<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//{_INSTANCES_RE}/videos/embed/{cls._UUID_RE})''']
+ _TESTS = [{
+ 'url': 'https://framatube.org/videos/watch/9c9de5e8-0a1e-484a-b099-e80766180a6d',
+ 'md5': '8563064d245a4be5705bddb22bb00a28',
+ 'info_dict': {
+ 'id': '9c9de5e8-0a1e-484a-b099-e80766180a6d',
+ 'ext': 'mp4',
+ 'title': 'What is PeerTube?',
+ 'description': 'md5:3fefb8dde2b189186ce0719fda6f7b10',
+ 'thumbnail': r're:https?://.*\.(?:jpg|png)',
+ 'timestamp': 1538391166,
+ 'upload_date': '20181001',
+ 'uploader': 'Framasoft',
+ 'uploader_id': '3',
+ 'uploader_url': 'https://framatube.org/accounts/framasoft',
+ 'channel': 'A propos de PeerTube',
+ 'channel_id': '2215',
+ 'channel_url': 'https://framatube.org/video-channels/joinpeertube',
+ 'language': 'en',
+ 'license': 'Attribution - Share Alike',
+ 'duration': 113,
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'tags': ['framasoft', 'peertube'],
+ 'categories': ['Science & Technology'],
+ }
+ }, {
+ 'url': 'https://peertube2.cpy.re/w/122d093a-1ede-43bd-bd34-59d2931ffc5e',
+ 'info_dict': {
+ 'id': '122d093a-1ede-43bd-bd34-59d2931ffc5e',
+ 'ext': 'mp4',
+ 'title': 'E2E tests',
+ 'uploader_id': '37855',
+ 'timestamp': 1589276219,
+ 'upload_date': '20200512',
+ 'uploader': 'chocobozzz',
+ }
+ }, {
+ 'url': 'https://peertube2.cpy.re/w/3fbif9S3WmtTP8gGsC5HBd',
+ 'info_dict': {
+ 'id': '3fbif9S3WmtTP8gGsC5HBd',
+ 'ext': 'mp4',
+ 'title': 'E2E tests',
+ 'uploader_id': '37855',
+ 'timestamp': 1589276219,
+ 'upload_date': '20200512',
+ 'uploader': 'chocobozzz',
+ },
+ }, {
+ 'url': 'https://peertube2.cpy.re/api/v1/videos/3fbif9S3WmtTP8gGsC5HBd',
+ 'info_dict': {
+ 'id': '3fbif9S3WmtTP8gGsC5HBd',
+ 'ext': 'mp4',
+ 'title': 'E2E tests',
+ 'uploader_id': '37855',
+ 'timestamp': 1589276219,
+ 'upload_date': '20200512',
+ 'uploader': 'chocobozzz',
+ },
+ }, {
+ # Issue #26002
+ 'url': 'peertube:spacepub.space:d8943b2d-8280-497b-85ec-bc282ec2afdc',
+ 'info_dict': {
+ 'id': 'd8943b2d-8280-497b-85ec-bc282ec2afdc',
+ 'ext': 'mp4',
+ 'title': 'Dot matrix printer shell demo',
+ 'uploader_id': '3',
+ 'timestamp': 1587401293,
+ 'upload_date': '20200420',
+ 'uploader': 'Drew DeVault',
+ }
+ }, {
+ 'url': 'https://peertube.debian.social/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44',
+ 'only_matching': True,
+ }, {
+ # nsfw
+ 'url': 'https://vod.ksite.de/videos/watch/9bb88cd3-9959-46d9-9ab9-33d2bb704c39',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://vod.ksite.de/videos/embed/fed67262-6edb-4d1c-833b-daa9085c71d7',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://peertube.tv/api/v1/videos/c1875674-97d0-4c94-a058-3f7e64c962e8',
+ 'only_matching': True,
+ }, {
+ 'url': 'peertube:framatube.org:b37a5b9f-e6b5-415c-b700-04a5cd6ec205',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_peertube_url(webpage, source_url):
+ mobj = re.match(
+ r'https?://(?P<host>[^/]+)/(?:videos/(?:watch|embed)|w)/(?P<id>%s)'
+ % PeerTubeIE._UUID_RE, source_url)
+ if mobj and any(p in webpage for p in (
+ 'meta property="og:platform" content="PeerTube"',
+ '<title>PeerTube<',
+ 'There will be other non JS-based clients to access PeerTube',
+ '>We are sorry but it seems that PeerTube is not compatible with your web browser.<')):
+ return 'peertube:%s:%s' % mobj.group('host', 'id')
+
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ embeds = tuple(super()._extract_embed_urls(url, webpage))
+ if embeds:
+ return embeds
+
+ peertube_url = cls._extract_peertube_url(webpage, url)
+ if peertube_url:
+ return [peertube_url]
+
+ def _call_api(self, host, video_id, path, note=None, errnote=None, fatal=True):
+ return self._download_json(
+ self._API_BASE % (host, video_id, path), video_id,
+ note=note, errnote=errnote, fatal=fatal)
+
+ def _get_subtitles(self, host, video_id):
+ captions = self._call_api(
+ host, video_id, 'captions', note='Downloading captions JSON',
+ fatal=False)
+ if not isinstance(captions, dict):
+ return
+ data = captions.get('data')
+ if not isinstance(data, list):
+ return
+ subtitles = {}
+ for e in data:
+ language_id = try_get(e, lambda x: x['language']['id'], compat_str)
+ caption_url = urljoin('https://%s' % host, e.get('captionPath'))
+ if not caption_url:
+ continue
+ subtitles.setdefault(language_id or 'en', []).append({
+ 'url': caption_url,
+ })
+ return subtitles
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ host = mobj.group('host') or mobj.group('host_2')
+ video_id = mobj.group('id')
+
+ video = self._call_api(
+ host, video_id, '', note='Downloading video JSON')
+
+ title = video['name']
+
+ formats = []
+ files = video.get('files') or []
+ for playlist in (video.get('streamingPlaylists') or []):
+ if not isinstance(playlist, dict):
+ continue
+ playlist_files = playlist.get('files')
+ if not (playlist_files and isinstance(playlist_files, list)):
+ continue
+ files.extend(playlist_files)
+ for file_ in files:
+ if not isinstance(file_, dict):
+ continue
+ file_url = url_or_none(file_.get('fileUrl'))
+ if not file_url:
+ continue
+ file_size = int_or_none(file_.get('size'))
+ format_id = try_get(
+ file_, lambda x: x['resolution']['label'], compat_str)
+ f = parse_resolution(format_id)
+ f.update({
+ 'url': file_url,
+ 'format_id': format_id,
+ 'filesize': file_size,
+ })
+ if format_id == '0p':
+ f['vcodec'] = 'none'
+ else:
+ f['fps'] = int_or_none(file_.get('fps'))
+ formats.append(f)
+
+ description = video.get('description')
+ if description and len(description) >= 250:
+ # description is shortened
+ full_description = self._call_api(
+ host, video_id, 'description', note='Downloading description JSON',
+ fatal=False)
+
+ if isinstance(full_description, dict):
+ description = str_or_none(full_description.get('description')) or description
+
+ subtitles = self.extract_subtitles(host, video_id)
+
+ def data(section, field, type_):
+ return try_get(video, lambda x: x[section][field], type_)
+
+ def account_data(field, type_):
+ return data('account', field, type_)
+
+ def channel_data(field, type_):
+ return data('channel', field, type_)
+
+ category = data('category', 'label', compat_str)
+ categories = [category] if category else None
+
+ nsfw = video.get('nsfw')
+ if nsfw is bool:
+ age_limit = 18 if nsfw else 0
+ else:
+ age_limit = None
+
+ webpage_url = 'https://%s/videos/watch/%s' % (host, video_id)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': urljoin(webpage_url, video.get('thumbnailPath')),
+ 'timestamp': unified_timestamp(video.get('publishedAt')),
+ 'uploader': account_data('displayName', compat_str),
+ 'uploader_id': str_or_none(account_data('id', int)),
+ 'uploader_url': url_or_none(account_data('url', compat_str)),
+ 'channel': channel_data('displayName', compat_str),
+ 'channel_id': str_or_none(channel_data('id', int)),
+ 'channel_url': url_or_none(channel_data('url', compat_str)),
+ 'language': data('language', 'id', compat_str),
+ 'license': data('licence', 'label', compat_str),
+ 'duration': int_or_none(video.get('duration')),
+ 'view_count': int_or_none(video.get('views')),
+ 'like_count': int_or_none(video.get('likes')),
+ 'dislike_count': int_or_none(video.get('dislikes')),
+ 'age_limit': age_limit,
+ 'tags': try_get(video, lambda x: x['tags'], list),
+ 'categories': categories,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'webpage_url': webpage_url,
+ }
+
+
+class PeerTubePlaylistIE(InfoExtractor):
+ IE_NAME = 'PeerTube:Playlist'
+ _TYPES = {
+ 'a': 'accounts',
+ 'c': 'video-channels',
+ 'w/p': 'video-playlists',
+ }
+ _VALID_URL = r'''(?x)
+ https?://(?P<host>%s)/(?P<type>(?:%s))/
+ (?P<id>[^/]+)
+ ''' % (PeerTubeIE._INSTANCES_RE, '|'.join(_TYPES.keys()))
+ _TESTS = [{
+ 'url': 'https://peertube.debian.social/w/p/hFdJoTuyhNJVa1cDWd1d12',
+ 'info_dict': {
+ 'id': 'hFdJoTuyhNJVa1cDWd1d12',
+ 'description': 'Diversas palestras do Richard Stallman no Brasil.',
+ 'title': 'Richard Stallman no Brasil',
+ 'timestamp': 1599676222,
+ },
+ 'playlist_mincount': 9,
+ }, {
+ 'url': 'https://peertube2.cpy.re/a/chocobozzz/videos',
+ 'info_dict': {
+ 'id': 'chocobozzz',
+ 'timestamp': 1553874564,
+ 'title': 'chocobozzz',
+ },
+ 'playlist_mincount': 2,
+ }, {
+ 'url': 'https://framatube.org/c/bf54d359-cfad-4935-9d45-9d6be93f63e8/videos',
+ 'info_dict': {
+ 'id': 'bf54d359-cfad-4935-9d45-9d6be93f63e8',
+ 'timestamp': 1519917377,
+ 'title': 'Les vidéos de Framasoft',
+ },
+ 'playlist_mincount': 345,
+ }, {
+ 'url': 'https://peertube2.cpy.re/c/blender_open_movies@video.blender.org/videos',
+ 'info_dict': {
+ 'id': 'blender_open_movies@video.blender.org',
+ 'timestamp': 1542287810,
+ 'title': 'Official Blender Open Movies',
+ },
+ 'playlist_mincount': 11,
+ }]
+ _API_BASE = 'https://%s/api/v1/%s/%s%s'
+ _PAGE_SIZE = 30
+
+ def call_api(self, host, name, path, base, **kwargs):
+ return self._download_json(
+ self._API_BASE % (host, base, name, path), name, **kwargs)
+
+ def fetch_page(self, host, id, type, page):
+ page += 1
+ video_data = self.call_api(
+ host, id,
+ f'/videos?sort=-createdAt&start={self._PAGE_SIZE * (page - 1)}&count={self._PAGE_SIZE}&nsfw=both',
+ type, note=f'Downloading page {page}').get('data', [])
+ for video in video_data:
+ shortUUID = video.get('shortUUID') or try_get(video, lambda x: x['video']['shortUUID'])
+ video_title = video.get('name') or try_get(video, lambda x: x['video']['name'])
+ yield self.url_result(
+ f'https://{host}/w/{shortUUID}', PeerTubeIE.ie_key(),
+ video_id=shortUUID, video_title=video_title)
+
+ def _extract_playlist(self, host, type, id):
+ info = self.call_api(host, id, '', type, note='Downloading playlist information', fatal=False)
+
+ playlist_title = info.get('displayName')
+ playlist_description = info.get('description')
+ playlist_timestamp = unified_timestamp(info.get('createdAt'))
+ channel = try_get(info, lambda x: x['ownerAccount']['name']) or info.get('displayName')
+ channel_id = try_get(info, lambda x: x['ownerAccount']['id']) or info.get('id')
+ thumbnail = format_field(info, 'thumbnailPath', f'https://{host}%s')
+
+ entries = OnDemandPagedList(functools.partial(
+ self.fetch_page, host, id, type), self._PAGE_SIZE)
+
+ return self.playlist_result(
+ entries, id, playlist_title, playlist_description,
+ timestamp=playlist_timestamp, channel=channel, channel_id=channel_id, thumbnail=thumbnail)
+
+ def _real_extract(self, url):
+ type, host, id = self._match_valid_url(url).group('type', 'host', 'id')
+ type = self._TYPES[type]
+ return self._extract_playlist(host, type, id)
diff --git a/yt_dlp/extractor/peertv.py b/yt_dlp/extractor/peertv.py
new file mode 100644
index 0000000..a709e21
--- /dev/null
+++ b/yt_dlp/extractor/peertv.py
@@ -0,0 +1,52 @@
+from .common import InfoExtractor
+from ..utils import js_to_json
+
+
+class PeerTVIE(InfoExtractor):
+ IE_NAME = 'peer.tv'
+ _VALID_URL = r'https?://(?:www\.)?peer\.tv/(?:de|it|en)/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.peer.tv/de/841',
+ 'info_dict': {
+ 'id': '841',
+ 'ext': 'mp4',
+ 'title': 'Die Brunnenburg',
+ 'description': 'md5:4395f6142b090338340ab88a3aae24ed',
+ },
+ }, {
+ 'url': 'https://www.peer.tv/it/404',
+ 'info_dict': {
+ 'id': '404',
+ 'ext': 'mp4',
+ 'title': 'Cascate di ghiaccio in Val Gardena',
+ 'description': 'md5:e8e5907f236171842674e8090e3577b8',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ video_key = self._html_search_regex(r'player\.peer\.tv/js/([a-zA-Z0-9]+)', webpage, 'video key')
+
+ js = self._download_webpage(f'https://player.peer.tv/js/{video_key}/', video_id,
+ headers={'Referer': 'https://www.peer.tv/'}, note='Downloading session id')
+
+ session_id = self._search_regex(r'["\']session_id["\']:\s*["\']([a-zA-Z0-9]+)["\']', js, 'session id')
+
+ player_webpage = self._download_webpage(
+ f'https://player.peer.tv/jsc/{video_key}/{session_id}?jsr=aHR0cHM6Ly93d3cucGVlci50di9kZS84NDE=&cs=UTF-8&mq=2&ua=0&webm=p&mp4=p&hls=1',
+ video_id, note='Downloading player webpage')
+
+ m3u8_url = self._search_regex(r'["\']playlist_url["\']:\s*(["\'][^"\']+["\'])', player_webpage, 'm3u8 url')
+ m3u8_url = self._parse_json(m3u8_url, video_id, transform_source=js_to_json)
+
+ formats = self._extract_m3u8_formats(m3u8_url, video_id, m3u8_id='hls')
+
+ return {
+ 'id': video_id,
+ 'title': self._html_search_regex(r'<h1>(.+?)</h1>', webpage, 'title').replace('\xa0', ' '),
+ 'formats': formats,
+ 'description': self._html_search_meta(('og:description', 'description'), webpage),
+ 'thumbnail': self._html_search_meta(('og:image', 'image'), webpage)
+ }
diff --git a/yt_dlp/extractor/peloton.py b/yt_dlp/extractor/peloton.py
new file mode 100644
index 0000000..7864299
--- /dev/null
+++ b/yt_dlp/extractor/peloton.py
@@ -0,0 +1,215 @@
+import json
+import re
+import urllib.parse
+
+from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ str_or_none,
+ traverse_obj,
+ url_or_none,
+)
+
+
+class PelotonIE(InfoExtractor):
+ IE_NAME = 'peloton'
+ _NETRC_MACHINE = 'peloton'
+ _VALID_URL = r'https?://members\.onepeloton\.com/classes/player/(?P<id>[a-f0-9]+)'
+ _TESTS = [{
+ 'url': 'https://members.onepeloton.com/classes/player/0e9653eb53544eeb881298c8d7a87b86',
+ 'info_dict': {
+ 'id': '0e9653eb53544eeb881298c8d7a87b86',
+ 'title': '20 min Chest & Back Strength',
+ 'ext': 'mp4',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ 'description': 'md5:fcd5be9b9eda0194b470e13219050a66',
+ 'creator': 'Chase Tucker',
+ 'release_timestamp': 1556141400,
+ 'timestamp': 1556141400,
+ 'upload_date': '20190424',
+ 'duration': 1389,
+ 'categories': ['Strength'],
+ 'tags': ['Workout Mat', 'Light Weights', 'Medium Weights'],
+ 'is_live': False,
+ 'chapters': 'count:1',
+ 'subtitles': {'en': [{
+ 'url': r're:^https?://.+',
+ 'ext': 'vtt'
+ }]},
+ }, 'params': {
+ 'skip_download': 'm3u8',
+ },
+ '_skip': 'Account needed'
+ }, {
+ 'url': 'https://members.onepeloton.com/classes/player/26603d53d6bb4de1b340514864a6a6a8',
+ 'info_dict': {
+ 'id': '26603d53d6bb4de1b340514864a6a6a8',
+ 'title': '30 min Earth Day Run',
+ 'ext': 'm4a',
+ 'thumbnail': r're:https://.+\.jpg',
+ 'description': 'md5:adc065a073934d7ee0475d217afe0c3d',
+ 'creator': 'Selena Samuela',
+ 'release_timestamp': 1587567600,
+ 'timestamp': 1587567600,
+ 'upload_date': '20200422',
+ 'duration': 1802,
+ 'categories': ['Running'],
+ 'is_live': False,
+ 'chapters': 'count:3'
+ }, 'params': {
+ 'skip_download': 'm3u8',
+ },
+ '_skip': 'Account needed'
+ }]
+
+ _MANIFEST_URL_TEMPLATE = '%s?hdnea=%s'
+
+ def _start_session(self, video_id):
+ self._download_webpage('https://api.onepeloton.com/api/started_client_session', video_id, note='Starting session')
+
+ def _login(self, video_id):
+ username, password = self._get_login_info()
+ if not (username and password):
+ self.raise_login_required()
+ try:
+ self._download_json(
+ 'https://api.onepeloton.com/auth/login', video_id, note='Logging in',
+ data=json.dumps({
+ 'username_or_email': username,
+ 'password': password,
+ 'with_pubsub': False
+ }).encode(),
+ headers={'Content-Type': 'application/json', 'User-Agent': 'web'})
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 401:
+ json_string = self._webpage_read_content(e.cause.response, None, video_id)
+ res = self._parse_json(json_string, video_id)
+ raise ExtractorError(res['message'], expected=res['message'] == 'Login failed')
+ else:
+ raise
+
+ def _get_token(self, video_id):
+ try:
+ subscription = self._download_json(
+ 'https://api.onepeloton.com/api/subscription/stream', video_id, note='Downloading token',
+ data=json.dumps({}).encode(), headers={'Content-Type': 'application/json'})
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 403:
+ json_string = self._webpage_read_content(e.cause.response, None, video_id)
+ res = self._parse_json(json_string, video_id)
+ raise ExtractorError(res['message'], expected=res['message'] == 'Stream limit reached')
+ else:
+ raise
+ return subscription['token']
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ try:
+ self._start_session(video_id)
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 401:
+ self._login(video_id)
+ self._start_session(video_id)
+ else:
+ raise
+
+ metadata = self._download_json('https://api.onepeloton.com/api/ride/%s/details?stream_source=multichannel' % video_id, video_id)
+ ride_data = metadata.get('ride')
+ if not ride_data:
+ raise ExtractorError('Missing stream metadata')
+ token = self._get_token(video_id)
+
+ is_live = False
+ if ride_data.get('content_format') == 'audio':
+ url = self._MANIFEST_URL_TEMPLATE % (ride_data.get('vod_stream_url'), urllib.parse.quote(token))
+ formats = [{
+ 'url': url,
+ 'ext': 'm4a',
+ 'format_id': 'audio',
+ 'vcodec': 'none',
+ }]
+ subtitles = {}
+ else:
+ if ride_data.get('vod_stream_url'):
+ url = 'https://members.onepeloton.com/.netlify/functions/m3u8-proxy?displayLanguage=en&acceptedSubtitles=%s&url=%s?hdnea=%s' % (
+ ','.join([re.sub('^([a-z]+)-([A-Z]+)$', r'\1', caption) for caption in ride_data['captions']]),
+ ride_data['vod_stream_url'],
+ urllib.parse.quote(urllib.parse.quote(token)))
+ elif ride_data.get('live_stream_url'):
+ url = self._MANIFEST_URL_TEMPLATE % (ride_data.get('live_stream_url'), urllib.parse.quote(token))
+ is_live = True
+ else:
+ raise ExtractorError('Missing video URL')
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4')
+
+ if metadata.get('instructor_cues'):
+ subtitles['cues'] = [{
+ 'data': json.dumps(metadata.get('instructor_cues')),
+ 'ext': 'json'
+ }]
+
+ category = ride_data.get('fitness_discipline_display_name')
+ chapters = [{
+ 'start_time': segment.get('start_time_offset'),
+ 'end_time': segment.get('start_time_offset') + segment.get('length'),
+ 'title': segment.get('name')
+ } for segment in traverse_obj(metadata, ('segments', 'segment_list'))]
+
+ return {
+ 'id': video_id,
+ 'title': ride_data.get('title'),
+ 'formats': formats,
+ 'thumbnail': url_or_none(ride_data.get('image_url')),
+ 'description': str_or_none(ride_data.get('description')),
+ 'creator': traverse_obj(ride_data, ('instructor', 'name')),
+ 'release_timestamp': ride_data.get('original_air_time'),
+ 'timestamp': ride_data.get('original_air_time'),
+ 'subtitles': subtitles,
+ 'duration': float_or_none(ride_data.get('length')),
+ 'categories': [category] if category else None,
+ 'tags': traverse_obj(ride_data, ('equipment_tags', ..., 'name')),
+ 'is_live': is_live,
+ 'chapters': chapters
+ }
+
+
+class PelotonLiveIE(InfoExtractor):
+ IE_NAME = 'peloton:live'
+ IE_DESC = 'Peloton Live'
+ _VALID_URL = r'https?://members\.onepeloton\.com/player/live/(?P<id>[a-f0-9]+)'
+ _TEST = {
+ 'url': 'https://members.onepeloton.com/player/live/eedee2d19f804a9788f53aa8bd38eb1b',
+ 'info_dict': {
+ 'id': '32edc92d28044be5bf6c7b6f1f8d1cbc',
+ 'title': '30 min HIIT Ride: Live from Home',
+ 'ext': 'mp4',
+ 'thumbnail': r're:^https?://.+\.png',
+ 'description': 'md5:f0d7d8ed3f901b7ee3f62c1671c15817',
+ 'creator': 'Alex Toussaint',
+ 'release_timestamp': 1587736620,
+ 'timestamp': 1587736620,
+ 'upload_date': '20200424',
+ 'duration': 2014,
+ 'categories': ['Cycling'],
+ 'is_live': False,
+ 'chapters': 'count:3'
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ '_skip': 'Account needed'
+ }
+
+ def _real_extract(self, url):
+ workout_id = self._match_id(url)
+ peloton = self._download_json(f'https://api.onepeloton.com/api/peloton/{workout_id}', workout_id)
+
+ if peloton.get('ride_id'):
+ if not peloton.get('is_live') or peloton.get('is_encore') or peloton.get('status') != 'PRE_START':
+ return self.url_result('https://members.onepeloton.com/classes/player/%s' % peloton['ride_id'])
+ else:
+ raise ExtractorError('Ride has not started', expected=True)
+ else:
+ raise ExtractorError('Missing video ID')
diff --git a/yt_dlp/extractor/performgroup.py b/yt_dlp/extractor/performgroup.py
new file mode 100644
index 0000000..f4d7f22
--- /dev/null
+++ b/yt_dlp/extractor/performgroup.py
@@ -0,0 +1,77 @@
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class PerformGroupIE(InfoExtractor):
+ _VALID_URL = r'https?://player\.performgroup\.com/eplayer(?:/eplayer\.html|\.js)#/?(?P<id>[0-9a-f]{26})\.(?P<auth_token>[0-9a-z]{26})'
+ _TESTS = [{
+ # http://www.faz.net/aktuell/sport/fussball/wm-2018-playoffs-schweiz-besiegt-nordirland-1-0-15286104.html
+ 'url': 'http://player.performgroup.com/eplayer/eplayer.html#d478c41c5d192f56b9aa859de8.1w4crrej5w14e1ed4s1ce4ykab',
+ 'md5': '259cb03d142e2e52471e8837ecacb29f',
+ 'info_dict': {
+ 'id': 'xgrwobuzumes1lwjxtcdpwgxd',
+ 'ext': 'mp4',
+ 'title': 'Liga MX: Keine Einsicht nach Horrorfoul',
+ 'description': 'md5:7cd3b459c82725b021e046ab10bf1c5b',
+ 'timestamp': 1511533477,
+ 'upload_date': '20171124',
+ }
+ }]
+
+ def _call_api(self, service, auth_token, content_id, referer_url):
+ return self._download_json(
+ 'http://ep3.performfeeds.com/ep%s/%s/%s/' % (service, auth_token, content_id),
+ content_id, headers={
+ 'Referer': referer_url,
+ 'Origin': 'http://player.performgroup.com',
+ }, query={
+ '_fmt': 'json',
+ })
+
+ def _real_extract(self, url):
+ player_id, auth_token = self._match_valid_url(url).groups()
+ bootstrap = self._call_api('bootstrap', auth_token, player_id, url)
+ video = bootstrap['config']['dataSource']['sourceItems'][0]['videos'][0]
+ video_id = video['uuid']
+ vod = self._call_api('vod', auth_token, video_id, url)
+ media = vod['videos']['video'][0]['media']
+
+ formats = []
+ hls_url = media.get('hls', {}).get('url')
+ if hls_url:
+ formats.extend(self._extract_m3u8_formats(hls_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
+
+ hds_url = media.get('hds', {}).get('url')
+ if hds_url:
+ formats.extend(self._extract_f4m_formats(hds_url + '?hdcore', video_id, f4m_id='hds', fatal=False))
+
+ for c in media.get('content', []):
+ c_url = c.get('url')
+ if not c_url:
+ continue
+ tbr = int_or_none(c.get('bitrate'), 1000)
+ format_id = 'http'
+ if tbr:
+ format_id += '-%d' % tbr
+ formats.append({
+ 'format_id': format_id,
+ 'url': c_url,
+ 'tbr': tbr,
+ 'width': int_or_none(c.get('width')),
+ 'height': int_or_none(c.get('height')),
+ 'filesize': int_or_none(c.get('fileSize')),
+ 'vcodec': c.get('type'),
+ 'fps': int_or_none(c.get('videoFrameRate')),
+ 'vbr': int_or_none(c.get('videoRate'), 1000),
+ 'abr': int_or_none(c.get('audioRate'), 1000),
+ })
+
+ return {
+ 'id': video_id,
+ 'title': video['title'],
+ 'description': video.get('description'),
+ 'thumbnail': video.get('poster'),
+ 'duration': int_or_none(video.get('duration')),
+ 'timestamp': int_or_none(video.get('publishedTime'), 1000),
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/periscope.py b/yt_dlp/extractor/periscope.py
new file mode 100644
index 0000000..d2351df
--- /dev/null
+++ b/yt_dlp/extractor/periscope.py
@@ -0,0 +1,188 @@
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+ unescapeHTML,
+)
+from ..utils.traversal import traverse_obj
+
+
+class PeriscopeBaseIE(InfoExtractor):
+ _M3U8_HEADERS = {
+ 'Referer': 'https://www.periscope.tv/'
+ }
+
+ def _call_api(self, method, query, item_id):
+ return self._download_json(
+ 'https://api.periscope.tv/api/v2/%s' % method,
+ item_id, query=query)
+
+ def _parse_broadcast_data(self, broadcast, video_id):
+ title = broadcast.get('status') or 'Periscope Broadcast'
+ uploader = broadcast.get('user_display_name') or broadcast.get('username')
+ title = '%s - %s' % (uploader, title) if uploader else title
+ thumbnails = [{
+ 'url': broadcast[image],
+ } for image in ('image_url', 'image_url_medium', 'image_url_small') if broadcast.get(image)]
+
+ return {
+ 'id': broadcast.get('id') or video_id,
+ 'title': title,
+ 'timestamp': parse_iso8601(broadcast.get('created_at')) or int_or_none(
+ broadcast.get('created_at_ms'), scale=1000),
+ 'release_timestamp': int_or_none(broadcast.get('scheduled_start_ms'), scale=1000),
+ 'uploader': uploader,
+ 'uploader_id': broadcast.get('user_id') or broadcast.get('username'),
+ 'thumbnails': thumbnails,
+ 'view_count': int_or_none(broadcast.get('total_watched')),
+ 'concurrent_view_count': int_or_none(broadcast.get('total_watching')),
+ 'tags': broadcast.get('tags'),
+ 'live_status': {
+ 'running': 'is_live',
+ 'not_started': 'is_upcoming',
+ }.get(traverse_obj(broadcast, ('state', {str.lower}))) or 'was_live'
+ }
+
+ @staticmethod
+ def _extract_common_format_info(broadcast):
+ return broadcast.get('state').lower(), int_or_none(broadcast.get('width')), int_or_none(broadcast.get('height'))
+
+ @staticmethod
+ def _add_width_and_height(f, width, height):
+ for key, val in (('width', width), ('height', height)):
+ if not f.get(key):
+ f[key] = val
+
+ def _extract_pscp_m3u8_formats(self, m3u8_url, video_id, format_id, state, width, height, fatal=True):
+ m3u8_formats = self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4',
+ entry_protocol='m3u8_native'
+ if state in ('ended', 'timed_out') else 'm3u8',
+ m3u8_id=format_id, fatal=fatal, headers=self._M3U8_HEADERS)
+ if len(m3u8_formats) == 1:
+ self._add_width_and_height(m3u8_formats[0], width, height)
+ for f in m3u8_formats:
+ f.setdefault('http_headers', {}).update(self._M3U8_HEADERS)
+ return m3u8_formats
+
+
+class PeriscopeIE(PeriscopeBaseIE):
+ IE_DESC = 'Periscope'
+ IE_NAME = 'periscope'
+ _VALID_URL = r'https?://(?:www\.)?(?:periscope|pscp)\.tv/[^/]+/(?P<id>[^/?#]+)'
+ _EMBED_REGEX = [r'<iframe[^>]+src=([\'"])(?P<url>(?:https?:)?//(?:www\.)?(?:periscope|pscp)\.tv/(?:(?!\1).)+)\1']
+ # Alive example URLs can be found here https://www.periscope.tv/
+ _TESTS = [{
+ 'url': 'https://www.periscope.tv/w/aJUQnjY3MjA3ODF8NTYxMDIyMDl2zCg2pECBgwTqRpQuQD352EMPTKQjT4uqlM3cgWFA-g==',
+ 'md5': '65b57957972e503fcbbaeed8f4fa04ca',
+ 'info_dict': {
+ 'id': '56102209',
+ 'ext': 'mp4',
+ 'title': 'Bec Boop - 🚠✈️🇬🇧 Fly above #London in Emirates Air Line cable car at night 🇬🇧✈️🚠 #BoopScope 🎀💗',
+ 'timestamp': 1438978559,
+ 'upload_date': '20150807',
+ 'uploader': 'Bec Boop',
+ 'uploader_id': '1465763',
+ },
+ 'skip': 'Expires in 24 hours',
+ }, {
+ 'url': 'https://www.periscope.tv/w/1ZkKzPbMVggJv',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.periscope.tv/bastaakanoggano/1OdKrlkZZjOJX',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.periscope.tv/w/1ZkKzPbMVggJv',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ token = self._match_id(url)
+
+ stream = self._call_api(
+ 'accessVideoPublic', {'broadcast_id': token}, token)
+
+ broadcast = stream['broadcast']
+ info = self._parse_broadcast_data(broadcast, token)
+
+ state = broadcast.get('state').lower()
+ width = int_or_none(broadcast.get('width'))
+ height = int_or_none(broadcast.get('height'))
+
+ def add_width_and_height(f):
+ for key, val in (('width', width), ('height', height)):
+ if not f.get(key):
+ f[key] = val
+
+ video_urls = set()
+ formats = []
+ for format_id in ('replay', 'rtmp', 'hls', 'https_hls', 'lhls', 'lhlsweb'):
+ video_url = stream.get(format_id + '_url')
+ if not video_url or video_url in video_urls:
+ continue
+ video_urls.add(video_url)
+ if format_id != 'rtmp':
+ m3u8_formats = self._extract_pscp_m3u8_formats(
+ video_url, token, format_id, state, width, height, False)
+ formats.extend(m3u8_formats)
+ continue
+ rtmp_format = {
+ 'url': video_url,
+ 'ext': 'flv' if format_id == 'rtmp' else 'mp4',
+ }
+ self._add_width_and_height(rtmp_format)
+ formats.append(rtmp_format)
+
+ info['formats'] = formats
+ return info
+
+
+class PeriscopeUserIE(PeriscopeBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?(?:periscope|pscp)\.tv/(?P<id>[^/]+)/?$'
+ IE_DESC = 'Periscope user videos'
+ IE_NAME = 'periscope:user'
+
+ _TEST = {
+ 'url': 'https://www.periscope.tv/LularoeHusbandMike/',
+ 'info_dict': {
+ 'id': 'LularoeHusbandMike',
+ 'title': 'LULAROE HUSBAND MIKE',
+ 'description': 'md5:6cf4ec8047768098da58e446e82c82f0',
+ },
+ # Periscope only shows videos in the last 24 hours, so it's possible to
+ # get 0 videos
+ 'playlist_mincount': 0,
+ }
+
+ def _real_extract(self, url):
+ user_name = self._match_id(url)
+
+ webpage = self._download_webpage(url, user_name)
+
+ data_store = self._parse_json(
+ unescapeHTML(self._search_regex(
+ r'data-store=(["\'])(?P<data>.+?)\1',
+ webpage, 'data store', default='{}', group='data')),
+ user_name)
+
+ user = list(data_store['UserCache']['users'].values())[0]['user']
+ user_id = user['id']
+ session_id = data_store['SessionToken']['public']['broadcastHistory']['token']['session_id']
+
+ broadcasts = self._call_api(
+ 'getUserBroadcastsPublic',
+ {'user_id': user_id, 'session_id': session_id},
+ user_name)['broadcasts']
+
+ broadcast_ids = [
+ broadcast['id'] for broadcast in broadcasts if broadcast.get('id')]
+
+ title = user.get('display_name') or user.get('username') or user_name
+ description = user.get('description')
+
+ entries = [
+ self.url_result(
+ 'https://www.periscope.tv/%s/%s' % (user_name, broadcast_id))
+ for broadcast_id in broadcast_ids]
+
+ return self.playlist_result(entries, user_id, title, description)
diff --git a/yt_dlp/extractor/pgatour.py b/yt_dlp/extractor/pgatour.py
new file mode 100644
index 0000000..36c2c62
--- /dev/null
+++ b/yt_dlp/extractor/pgatour.py
@@ -0,0 +1,47 @@
+from .brightcove import BrightcoveNewIE
+from .common import InfoExtractor
+
+
+class PGATourIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?pgatour\.com/video/[\w-]+/(?P<tc>T)?(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.pgatour.com/video/competition/T6322447785112/adam-hadwin-2023-the-players-round-4-18th-hole-shot-1',
+ 'info_dict': {
+ 'id': '6322447785112',
+ 'ext': 'mp4',
+ 'title': 'Adam Hadwin | 2023 THE PLAYERS | Round 4 | 18th hole | Shot 1',
+ 'uploader_id': '6116716431001',
+ 'upload_date': '20230312',
+ 'timestamp': 1678653136,
+ 'duration': 20.011,
+ 'thumbnail': r're:^https://.+\.jpg',
+ 'tags': 'count:7',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://www.pgatour.com/video/features/6322506425112/follow-the-players-trophy-on-championship-sunday',
+ 'info_dict': {
+ 'id': '6322506425112',
+ 'ext': 'mp4',
+ 'title': 'Follow THE PLAYERS trophy on Championship Sunday',
+ 'description': 'md5:4d29e4bdfa03694a0ebfd08950398568',
+ 'uploader_id': '6082840763001',
+ 'upload_date': '20230313',
+ 'timestamp': 1678739835,
+ 'duration': 123.435,
+ 'thumbnail': r're:^https://.+\.jpg',
+ 'tags': 'count:8',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }]
+
+ def _real_extract(self, url):
+ video_id, is_tourcast = self._match_valid_url(url).group('id', 'tc')
+
+ # From https://www.pgatour.com/_next/static/chunks/pages/_app-8bcf849560daf38d.js
+ account_id = '6116716431001' if is_tourcast else '6082840763001'
+ player_id = 'Vsd5Umu8r' if is_tourcast else 'FWIBYMBPj'
+
+ return self.url_result(
+ f'https://players.brightcove.net/{account_id}/{player_id}_default/index.html?videoId={video_id}',
+ BrightcoveNewIE)
diff --git a/yt_dlp/extractor/philharmoniedeparis.py b/yt_dlp/extractor/philharmoniedeparis.py
new file mode 100644
index 0000000..e8494a0
--- /dev/null
+++ b/yt_dlp/extractor/philharmoniedeparis.py
@@ -0,0 +1,97 @@
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import try_get
+
+
+class PhilharmonieDeParisIE(InfoExtractor):
+ IE_DESC = 'Philharmonie de Paris'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ live\.philharmoniedeparis\.fr/(?:[Cc]oncert/|embed(?:app)?/|misc/Playlist\.ashx\?id=)|
+ pad\.philharmoniedeparis\.fr/(?:doc/CIMU/|player\.aspx\?id=)|
+ philharmoniedeparis\.fr/fr/live/concert/|
+ otoplayer\.philharmoniedeparis\.fr/fr/embed/
+ )
+ (?P<id>\d+)
+ '''
+ _TESTS = [{
+ 'url': 'https://philharmoniedeparis.fr/fr/live/concert/1129666-danses-symphoniques',
+ 'md5': '24bdb7e86c200c107680e1f7770330ae',
+ 'info_dict': {
+ 'id': '1129666',
+ 'ext': 'mp4',
+ 'title': 'Danses symphoniques. Orchestre symphonique Divertimento - Zahia Ziouani. Bizet, de Falla, Stravinski, Moussorgski, Saint-Saëns',
+ },
+ }, {
+ 'url': 'https://philharmoniedeparis.fr/fr/live/concert/1032066-akademie-fur-alte-musik-berlin-rias-kammerchor-rene-jacobs-passion-selon-saint-jean-de-johann',
+ 'info_dict': {
+ 'id': '1032066',
+ 'title': 'Akademie für alte Musik Berlin, Rias Kammerchor, René Jacobs : Passion selon saint Jean de Johann Sebastian Bach',
+ },
+ 'playlist_mincount': 2,
+ }, {
+ 'url': 'https://philharmoniedeparis.fr/fr/live/concert/1030324-orchestre-philharmonique-de-radio-france-myung-whun-chung-renaud-capucon-pascal-dusapin-johannes',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://live.philharmoniedeparis.fr/misc/Playlist.ashx?id=1030324&track=&lang=fr',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://live.philharmoniedeparis.fr/embedapp/1098406/berlioz-fantastique-lelio-les-siecles-national-youth-choir-of.html?lang=fr-FR',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://otoplayer.philharmoniedeparis.fr/fr/embed/1098406?lang=fr-FR',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ config = self._download_json(
+ 'https://otoplayer.philharmoniedeparis.fr/fr/config/%s.json' % video_id, video_id, query={
+ 'id': video_id,
+ 'lang': 'fr-FR',
+ })
+
+ def extract_entry(source):
+ if not isinstance(source, dict):
+ return
+ title = source.get('title')
+ if not title:
+ return
+ files = source.get('files')
+ if not isinstance(files, dict):
+ return
+ format_urls = set()
+ formats = []
+ for format_id in ('mobile', 'desktop'):
+ format_url = try_get(
+ files, lambda x: x[format_id]['file'], compat_str)
+ if not format_url or format_url in format_urls:
+ continue
+ format_urls.add(format_url)
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ if not formats and not self.get_param('ignore_no_formats'):
+ return
+ return {
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': files.get('thumbnail'),
+ }
+ info = extract_entry(config)
+ if info:
+ info.update({
+ 'id': video_id,
+ })
+ return info
+ entries = []
+ for num, chapter in enumerate(config['chapters'], start=1):
+ entry = extract_entry(chapter)
+ if entry is None:
+ continue
+ entry['id'] = '%s-%d' % (video_id, num)
+ entries.append(entry)
+
+ return self.playlist_result(entries, video_id, config.get('title'))
diff --git a/yt_dlp/extractor/phoenix.py b/yt_dlp/extractor/phoenix.py
new file mode 100644
index 0000000..5fa133a
--- /dev/null
+++ b/yt_dlp/extractor/phoenix.py
@@ -0,0 +1,130 @@
+import re
+
+from .youtube import YoutubeIE
+from .zdf import ZDFBaseIE
+from ..compat import compat_str
+from ..utils import (
+ int_or_none,
+ merge_dicts,
+ try_get,
+ unified_timestamp,
+ urljoin,
+)
+
+
+class PhoenixIE(ZDFBaseIE):
+ IE_NAME = 'phoenix.de'
+ _VALID_URL = r'https?://(?:www\.)?phoenix\.de/(?:[^/]+/)*[^/?#&]*-a-(?P<id>\d+)\.html'
+ _TESTS = [{
+ # Same as https://www.zdf.de/politik/phoenix-sendungen/wohin-fuehrt-der-protest-in-der-pandemie-100.html
+ 'url': 'https://www.phoenix.de/sendungen/ereignisse/corona-nachgehakt/wohin-fuehrt-der-protest-in-der-pandemie-a-2050630.html',
+ 'md5': '34ec321e7eb34231fd88616c65c92db0',
+ 'info_dict': {
+ 'id': '210222_phx_nachgehakt_corona_protest',
+ 'ext': 'mp4',
+ 'title': 'Wohin führt der Protest in der Pandemie?',
+ 'description': 'md5:7d643fe7f565e53a24aac036b2122fbd',
+ 'duration': 1691,
+ 'timestamp': 1613902500,
+ 'upload_date': '20210221',
+ 'uploader': 'Phoenix',
+ 'series': 'corona nachgehakt',
+ 'episode': 'Wohin führt der Protest in der Pandemie?',
+ },
+ }, {
+ # Youtube embed
+ 'url': 'https://www.phoenix.de/sendungen/gespraeche/phoenix-streitgut-brennglas-corona-a-1965505.html',
+ 'info_dict': {
+ 'id': 'hMQtqFYjomk',
+ 'ext': 'mp4',
+ 'title': 'phoenix streitgut: Brennglas Corona - Wie gerecht ist unsere Gesellschaft?',
+ 'description': 'md5:ac7a02e2eb3cb17600bc372e4ab28fdd',
+ 'duration': 3509,
+ 'upload_date': '20201219',
+ 'uploader': 'phoenix',
+ 'uploader_id': 'phoenix',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.phoenix.de/entwicklungen-in-russland-a-2044720.html',
+ 'only_matching': True,
+ }, {
+ # no media
+ 'url': 'https://www.phoenix.de/sendungen/dokumentationen/mit-dem-jumbo-durch-die-nacht-a-89625.html',
+ 'only_matching': True,
+ }, {
+ # Same as https://www.zdf.de/politik/phoenix-sendungen/die-gesten-der-maechtigen-100.html
+ 'url': 'https://www.phoenix.de/sendungen/dokumentationen/gesten-der-maechtigen-i-a-89468.html?ref=suche',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ article_id = self._match_id(url)
+
+ article = self._download_json(
+ 'https://www.phoenix.de/response/id/%s' % article_id, article_id,
+ 'Downloading article JSON')
+
+ video = article['absaetze'][0]
+ title = video.get('titel') or article.get('subtitel')
+
+ if video.get('typ') == 'video-youtube':
+ video_id = video['id']
+ return self.url_result(
+ video_id, ie=YoutubeIE.ie_key(), video_id=video_id,
+ video_title=title)
+
+ video_id = compat_str(video.get('basename') or video.get('content'))
+
+ details = self._download_json(
+ 'https://www.phoenix.de/php/mediaplayer/data/beitrags_details.php',
+ video_id, 'Downloading details JSON', query={
+ 'ak': 'web',
+ 'ptmd': 'true',
+ 'id': video_id,
+ 'profile': 'player2',
+ })
+
+ title = title or details['title']
+ content_id = details['tracking']['nielsen']['content']['assetid']
+
+ info = self._extract_ptmd(
+ 'https://tmd.phoenix.de/tmd/2/ngplayer_2_3/vod/ptmd/phoenix/%s' % content_id,
+ content_id, None, url)
+
+ duration = int_or_none(try_get(
+ details, lambda x: x['tracking']['nielsen']['content']['length']))
+ timestamp = unified_timestamp(details.get('editorialDate'))
+ series = try_get(
+ details, lambda x: x['tracking']['nielsen']['content']['program'],
+ compat_str)
+ episode = title if details.get('contentType') == 'episode' else None
+
+ thumbnails = []
+ teaser_images = try_get(details, lambda x: x['teaserImageRef']['layouts'], dict) or {}
+ for thumbnail_key, thumbnail_url in teaser_images.items():
+ thumbnail_url = urljoin(url, thumbnail_url)
+ if not thumbnail_url:
+ continue
+ thumbnail = {
+ 'url': thumbnail_url,
+ }
+ m = re.match('^([0-9]+)x([0-9]+)$', thumbnail_key)
+ if m:
+ thumbnail['width'] = int(m.group(1))
+ thumbnail['height'] = int(m.group(2))
+ thumbnails.append(thumbnail)
+
+ return merge_dicts(info, {
+ 'id': content_id,
+ 'title': title,
+ 'description': details.get('leadParagraph'),
+ 'duration': duration,
+ 'thumbnails': thumbnails,
+ 'timestamp': timestamp,
+ 'uploader': details.get('tvService'),
+ 'series': series,
+ 'episode': episode,
+ })
diff --git a/yt_dlp/extractor/photobucket.py b/yt_dlp/extractor/photobucket.py
new file mode 100644
index 0000000..71e9a48
--- /dev/null
+++ b/yt_dlp/extractor/photobucket.py
@@ -0,0 +1,43 @@
+import json
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse_unquote
+
+
+class PhotobucketIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P<id>.*)\.(?P<ext>(flv)|(mp4))'
+ _TEST = {
+ 'url': 'http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0',
+ 'md5': '7dabfb92b0a31f6c16cebc0f8e60ff99',
+ 'info_dict': {
+ 'id': 'zpsc0c3b9fa',
+ 'ext': 'mp4',
+ 'timestamp': 1367669341,
+ 'upload_date': '20130504',
+ 'uploader': 'rachaneronas',
+ 'title': 'Tired of Link Building? Try BacklinkMyDomain.com!',
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id')
+ video_extension = mobj.group('ext')
+
+ webpage = self._download_webpage(url, video_id)
+
+ # Extract URL, uploader, and title from webpage
+ self.report_extraction(video_id)
+ info_json = self._search_regex(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (.*?)\);',
+ webpage, 'info json')
+ info = json.loads(info_json)
+ url = compat_urllib_parse_unquote(self._html_search_regex(r'file=(.+\.mp4)', info['linkcodes']['html'], 'url'))
+ return {
+ 'id': video_id,
+ 'url': url,
+ 'uploader': info['username'],
+ 'timestamp': info['creationDate'],
+ 'title': info['title'],
+ 'ext': video_extension,
+ 'thumbnail': info['thumbUrl'],
+ }
diff --git a/yt_dlp/extractor/piapro.py b/yt_dlp/extractor/piapro.py
new file mode 100644
index 0000000..3ae985d
--- /dev/null
+++ b/yt_dlp/extractor/piapro.py
@@ -0,0 +1,121 @@
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+from ..utils import (
+ ExtractorError,
+ parse_duration,
+ parse_filesize,
+ str_to_int,
+ unified_timestamp,
+ urlencode_postdata,
+)
+
+
+class PiaproIE(InfoExtractor):
+ _NETRC_MACHINE = 'piapro'
+ _VALID_URL = r'https?://piapro\.jp/(?:t|content)/(?P<id>[\w-]+)/?'
+ _TESTS = [{
+ 'url': 'https://piapro.jp/t/NXYR',
+ 'md5': 'f7c0f760913fb1d44a1c45a4af793909',
+ 'info_dict': {
+ 'id': 'NXYR',
+ 'ext': 'mp3',
+ 'uploader': 'wowaka',
+ 'uploader_id': 'wowaka',
+ 'title': '裏表ラバーズ',
+ 'description': 'http://www.nicovideo.jp/watch/sm8082467',
+ 'duration': 189.0,
+ 'timestamp': 1251785475,
+ 'thumbnail': r're:^https?://.*\.(?:png|jpg)$',
+ 'upload_date': '20090901',
+ 'view_count': int,
+ }
+ }, {
+ 'note': 'There are break lines in description, mandating (?s) flag',
+ 'url': 'https://piapro.jp/t/9cSd',
+ 'md5': '952bb6d1e8de95050206408a87790676',
+ 'info_dict': {
+ 'id': '9cSd',
+ 'ext': 'mp3',
+ 'title': '青に溶けた風船 / 初音ミク',
+ 'description': 'md5:d395a9bd151447631a5a1460bc7f9132',
+ 'uploader': 'シアン・キノ',
+ 'duration': 229.0,
+ 'timestamp': 1644030039,
+ 'upload_date': '20220205',
+ 'view_count': int,
+ 'thumbnail': r're:^https?://.*\.(?:png|jpg)$',
+ 'uploader_id': 'cyankino',
+ }
+ }, {
+ 'url': 'https://piapro.jp/content/hcw0z3a169wtemz6',
+ 'only_matching': True
+ }, {
+ 'url': 'https://piapro.jp/t/-SO-',
+ 'only_matching': True
+ }]
+
+ _login_status = False
+
+ def _perform_login(self, username, password):
+ login_ok = True
+ login_form_strs = {
+ '_username': username,
+ '_password': password,
+ '_remember_me': 'on',
+ 'login': 'ログイン'
+ }
+ self._request_webpage('https://piapro.jp/login/', None)
+ urlh = self._request_webpage(
+ 'https://piapro.jp/login/exe', None,
+ note='Logging in', errnote='Unable to log in',
+ data=urlencode_postdata(login_form_strs))
+ if urlh is False:
+ login_ok = False
+ else:
+ parts = compat_urlparse.urlparse(urlh.url)
+ if parts.path != '/':
+ login_ok = False
+ if not login_ok:
+ self.report_warning(
+ 'unable to log in: bad username or password')
+ self._login_status = login_ok
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ category_id = self._search_regex(r'categoryId=(.+)">', webpage, 'category ID')
+ if category_id not in ('1', '2', '21', '22', '23', '24', '25'):
+ raise ExtractorError('The URL does not contain audio.', expected=True)
+
+ str_duration, str_filesize = self._search_regex(
+ r'サイズ:</span>(.+?)/\(([0-9,]+?[KMG]?B))', webpage, 'duration and size',
+ group=(1, 2), default=(None, None))
+ str_viewcount = self._search_regex(r'閲覧数:</span>([0-9,]+)\s+', webpage, 'view count', fatal=False)
+
+ uploader_id, uploader = self._search_regex(
+ r'<a\s+class="cd_user-name"\s+href="/(.*)">([^<]+)さん<', webpage, 'uploader',
+ group=(1, 2), default=(None, None))
+ content_id = self._search_regex(r'contentId\:\'(.+)\'', webpage, 'content ID')
+ create_date = self._search_regex(r'createDate\:\'(.+)\'', webpage, 'timestamp')
+
+ player_webpage = self._download_webpage(
+ f'https://piapro.jp/html5_player_popup/?id={content_id}&cdate={create_date}',
+ video_id, note='Downloading player webpage')
+
+ return {
+ 'id': video_id,
+ 'title': self._html_search_regex(r'<h1\s+class="cd_works-title">(.+?)</h1>', webpage, 'title', fatal=False),
+ 'description': self._html_search_regex(r'(?s)<p\s+class="cd_dtl_cap">(.+?)</p>\s*<div', webpage, 'description', fatal=False),
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'timestamp': unified_timestamp(create_date, False),
+ 'duration': parse_duration(str_duration),
+ 'view_count': str_to_int(str_viewcount),
+ 'thumbnail': self._html_search_meta('twitter:image', webpage),
+
+ 'filesize_approx': parse_filesize(str_filesize.replace(',', '')),
+ 'url': self._search_regex(r'mp3:\s*\'(.*?)\'\}', player_webpage, 'url'),
+ 'ext': 'mp3',
+ 'vcodec': 'none',
+ }
diff --git a/yt_dlp/extractor/piaulizaportal.py b/yt_dlp/extractor/piaulizaportal.py
new file mode 100644
index 0000000..1eb6d92
--- /dev/null
+++ b/yt_dlp/extractor/piaulizaportal.py
@@ -0,0 +1,70 @@
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ parse_qs,
+ time_seconds,
+ traverse_obj,
+)
+
+
+class PIAULIZAPortalIE(InfoExtractor):
+ IE_DESC = 'ulizaportal.jp - PIA LIVE STREAM'
+ _VALID_URL = r'https?://(?:www\.)?ulizaportal\.jp/pages/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
+ _TESTS = [{
+ 'url': 'https://ulizaportal.jp/pages/005f18b7-e810-5618-cb82-0987c5755d44',
+ 'info_dict': {
+ 'id': '005f18b7-e810-5618-cb82-0987c5755d44',
+ 'title': 'プレゼンテーションプレイヤーのサンプル',
+ 'live_status': 'not_live',
+ },
+ 'params': {
+ 'skip_download': True,
+ 'ignore_no_formats_error': True,
+ },
+ }, {
+ 'url': 'https://ulizaportal.jp/pages/005e1b23-fe93-5780-19a0-98e917cc4b7d?expires=4102412400&signature=f422a993b683e1068f946caf406d211c17d1ef17da8bef3df4a519502155aa91&version=1',
+ 'info_dict': {
+ 'id': '005e1b23-fe93-5780-19a0-98e917cc4b7d',
+ 'title': '【確認用】視聴サンプルページ(ULIZA)',
+ 'live_status': 'not_live',
+ },
+ 'params': {
+ 'skip_download': True,
+ 'ignore_no_formats_error': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ expires = int_or_none(traverse_obj(parse_qs(url), ('expires', 0)))
+ if expires and expires <= time_seconds():
+ raise ExtractorError('The link is expired.', video_id=video_id, expected=True)
+
+ webpage = self._download_webpage(url, video_id)
+
+ player_data = self._download_webpage(
+ self._search_regex(
+ r'<script [^>]*\bsrc="(https://player-api\.p\.uliza\.jp/v1/players/[^"]+)"',
+ webpage, 'player data url'),
+ video_id, headers={'Referer': 'https://ulizaportal.jp/'},
+ note='Fetching player data', errnote='Unable to fetch player data')
+
+ formats = self._extract_m3u8_formats(
+ self._search_regex(
+ r'["\'](https://vms-api\.p\.uliza\.jp/v1/prog-index\.m3u8[^"\']+)', player_data,
+ 'm3u8 url', default=None),
+ video_id, fatal=False)
+ m3u8_type = self._search_regex(
+ r'/hls/(dvr|video)/', traverse_obj(formats, (0, 'url')), 'm3u8 type', default=None)
+
+ return {
+ 'id': video_id,
+ 'title': self._html_extract_title(webpage),
+ 'formats': formats,
+ 'live_status': {
+ 'video': 'is_live',
+ 'dvr': 'was_live', # short-term archives
+ }.get(m3u8_type, 'not_live'), # VOD or long-term archives
+ }
diff --git a/yt_dlp/extractor/picarto.py b/yt_dlp/extractor/picarto.py
new file mode 100644
index 0000000..d415ba2
--- /dev/null
+++ b/yt_dlp/extractor/picarto.py
@@ -0,0 +1,152 @@
+import urllib.parse
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ str_or_none,
+ traverse_obj,
+)
+
+
+class PicartoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)'
+ _TEST = {
+ 'url': 'https://picarto.tv/Setz',
+ 'info_dict': {
+ 'id': 'Setz',
+ 'ext': 'mp4',
+ 'title': 're:^Setz [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'timestamp': int,
+ 'is_live': True
+ },
+ 'skip': 'Stream is offline',
+ }
+
+ @classmethod
+ def suitable(cls, url):
+ return False if PicartoVodIE.suitable(url) else super(PicartoIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+
+ data = self._download_json(
+ 'https://ptvintern.picarto.tv/ptvapi', channel_id, query={
+ 'query': '''{
+ channel(name: "%s") {
+ adult
+ id
+ online
+ stream_name
+ title
+ }
+ getLoadBalancerUrl(channel_name: "%s") {
+ url
+ }
+}''' % (channel_id, channel_id),
+ })['data']
+ metadata = data['channel']
+
+ if metadata.get('online') == 0:
+ raise ExtractorError('Stream is offline', expected=True)
+ title = metadata['title']
+
+ cdn_data = self._download_json(
+ data['getLoadBalancerUrl']['url'] + '/stream/json_' + metadata['stream_name'] + '.js',
+ channel_id, 'Downloading load balancing info')
+
+ formats = []
+ for source in (cdn_data.get('source') or []):
+ source_url = source.get('url')
+ if not source_url:
+ continue
+ source_type = source.get('type')
+ if source_type == 'html5/application/vnd.apple.mpegurl':
+ formats.extend(self._extract_m3u8_formats(
+ source_url, channel_id, 'mp4', m3u8_id='hls', fatal=False))
+ elif source_type == 'html5/video/mp4':
+ formats.append({
+ 'url': source_url,
+ })
+
+ mature = metadata.get('adult')
+ if mature is None:
+ age_limit = None
+ else:
+ age_limit = 18 if mature is True else 0
+
+ return {
+ 'id': channel_id,
+ 'title': title.strip(),
+ 'is_live': True,
+ 'channel': channel_id,
+ 'channel_id': metadata.get('id'),
+ 'channel_url': 'https://picarto.tv/%s' % channel_id,
+ 'age_limit': age_limit,
+ 'formats': formats,
+ }
+
+
+class PicartoVodIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?picarto\.tv/(?:videopopout|\w+/videos)/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://picarto.tv/videopopout/ArtofZod_2017.12.12.00.13.23.flv',
+ 'md5': '3ab45ba4352c52ee841a28fb73f2d9ca',
+ 'info_dict': {
+ 'id': 'ArtofZod_2017.12.12.00.13.23.flv',
+ 'ext': 'mp4',
+ 'title': 'ArtofZod_2017.12.12.00.13.23.flv',
+ 'thumbnail': r're:^https?://.*\.jpg'
+ },
+ 'skip': 'The VOD does not exist',
+ }, {
+ 'url': 'https://picarto.tv/ArtofZod/videos/772650',
+ 'md5': '00067a0889f1f6869cc512e3e79c521b',
+ 'info_dict': {
+ 'id': '772650',
+ 'ext': 'mp4',
+ 'title': 'Art of Zod - Drawing and Painting',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'channel': 'ArtofZod',
+ 'age_limit': 18,
+ }
+ }, {
+ 'url': 'https://picarto.tv/videopopout/Plague',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ data = self._download_json(
+ 'https://ptvintern.picarto.tv/ptvapi', video_id, query={
+ 'query': f'''{{
+ video(id: "{video_id}") {{
+ id
+ title
+ adult
+ file_name
+ video_recording_image_url
+ channel {{
+ name
+ }}
+ }}
+}}'''
+ })['data']['video']
+
+ file_name = data['file_name']
+ netloc = urllib.parse.urlparse(data['video_recording_image_url']).netloc
+
+ formats = self._extract_m3u8_formats(
+ f'https://{netloc}/stream/hls/{file_name}/index.m3u8', video_id, 'mp4', m3u8_id='hls')
+
+ return {
+ 'id': video_id,
+ **traverse_obj(data, {
+ 'id': ('id', {str_or_none}),
+ 'title': ('title', {str}),
+ 'thumbnail': 'video_recording_image_url',
+ 'channel': ('channel', 'name', {str}),
+ 'age_limit': ('adult', {lambda x: 18 if x else 0}),
+ }),
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/piksel.py b/yt_dlp/extractor/piksel.py
new file mode 100644
index 0000000..97a9bf5
--- /dev/null
+++ b/yt_dlp/extractor/piksel.py
@@ -0,0 +1,174 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ dict_get,
+ ExtractorError,
+ int_or_none,
+ join_nonempty,
+ parse_iso8601,
+ traverse_obj,
+ try_get,
+ unescapeHTML,
+ urljoin,
+)
+
+
+class PikselIE(InfoExtractor):
+ _VALID_URL = r'''(?x)https?://
+ (?:
+ (?:
+ player\.
+ (?:
+ olympusattelecom|
+ vibebyvista
+ )|
+ (?:api|player)\.multicastmedia|
+ (?:api-ovp|player)\.piksel
+ )\.com|
+ (?:
+ mz-edge\.stream\.co|
+ movie-s\.nhk\.or
+ )\.jp|
+ vidego\.baltimorecity\.gov
+ )/v/(?:refid/(?P<refid>[^/]+)/prefid/)?(?P<id>[\w-]+)'''
+ _EMBED_REGEX = [r'<iframe[^>]+src=["\'](?P<url>(?:https?:)?//player\.piksel\.com/v/[a-z0-9]+)']
+ _TESTS = [
+ {
+ 'url': 'http://player.piksel.com/v/ums2867l',
+ 'md5': '34e34c8d89dc2559976a6079db531e85',
+ 'info_dict': {
+ 'id': 'ums2867l',
+ 'ext': 'mp4',
+ 'title': 'GX-005 with Caption',
+ 'timestamp': 1481335659,
+ 'upload_date': '20161210'
+ }
+ },
+ {
+ # Original source: http://www.uscourts.gov/cameras-courts/state-washington-vs-donald-j-trump-et-al
+ 'url': 'https://player.piksel.com/v/v80kqp41',
+ 'md5': '753ddcd8cc8e4fa2dda4b7be0e77744d',
+ 'info_dict': {
+ 'id': 'v80kqp41',
+ 'ext': 'mp4',
+ 'title': 'WAW- State of Washington vs. Donald J. Trump, et al',
+ 'description': 'State of Washington vs. Donald J. Trump, et al, Case Number 17-CV-00141-JLR, TRO Hearing, Civil Rights Case, 02/3/2017, 1:00 PM (PST), Seattle Federal Courthouse, Seattle, WA, Judge James L. Robart presiding.',
+ 'timestamp': 1486171129,
+ 'upload_date': '20170204'
+ }
+ },
+ {
+ # https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2019240/
+ 'url': 'http://player.piksel.com/v/refid/nhkworld/prefid/nw_vod_v_en_2019_240_20190823233000_02_1566873477',
+ 'only_matching': True,
+ }
+ ]
+
+ def _call_api(self, app_token, resource, display_id, query, host='https://player.piksel.com', fatal=True):
+ url = urljoin(host, f'/ws/ws_{resource}/api/{app_token}/mode/json/apiv/5')
+ response = traverse_obj(
+ self._download_json(url, display_id, query=query, fatal=fatal), ('response', {dict})) or {}
+ failure = traverse_obj(response, ('failure', 'reason')) if response else 'Empty response from API'
+ if failure:
+ if fatal:
+ raise ExtractorError(failure, expected=True)
+ self.report_warning(failure)
+ return response
+
+ def _real_extract(self, url):
+ ref_id, display_id = self._match_valid_url(url).groups()
+ webpage = self._download_webpage(url, display_id)
+ app_token = self._search_regex([
+ r'clientAPI\s*:\s*"([^"]+)"',
+ r'data-de-api-key\s*=\s*"([^"]+)"'
+ ], webpage, 'app token')
+ query = {'refid': ref_id, 'prefid': display_id} if ref_id else {'v': display_id}
+ program = self._call_api(
+ app_token, 'program', display_id, query, url)['WsProgramResponse']['program']
+ video_id = program['uuid']
+ video_data = program['asset']
+ title = video_data['title']
+ asset_type = dict_get(video_data, ['assetType', 'asset_type'])
+
+ formats = []
+
+ def process_asset_file(asset_file):
+ if not asset_file:
+ return
+ # TODO: extract rtmp formats
+ http_url = asset_file.get('http_url')
+ if not http_url:
+ return
+ tbr = None
+ vbr = int_or_none(asset_file.get('videoBitrate'), 1024)
+ abr = int_or_none(asset_file.get('audioBitrate'), 1024)
+ if asset_type == 'video':
+ tbr = vbr + abr
+ elif asset_type == 'audio':
+ tbr = abr
+
+ formats.append({
+ 'format_id': join_nonempty('http', tbr),
+ 'url': unescapeHTML(http_url),
+ 'vbr': vbr,
+ 'abr': abr,
+ 'width': int_or_none(asset_file.get('videoWidth')),
+ 'height': int_or_none(asset_file.get('videoHeight')),
+ 'filesize': int_or_none(asset_file.get('filesize')),
+ 'tbr': tbr,
+ })
+
+ def process_asset_files(asset_files):
+ for asset_file in (asset_files or []):
+ process_asset_file(asset_file)
+
+ process_asset_files(video_data.get('assetFiles'))
+ process_asset_file(video_data.get('referenceFile'))
+ if not formats:
+ asset_id = video_data.get('assetid') or program.get('assetid')
+ if asset_id:
+ process_asset_files(try_get(self._call_api(
+ app_token, 'asset_file', display_id, {
+ 'assetid': asset_id,
+ }, url, False), lambda x: x['WsAssetFileResponse']['AssetFiles']))
+
+ m3u8_url = dict_get(video_data, [
+ 'm3u8iPadURL',
+ 'ipadM3u8Url',
+ 'm3u8AndroidURL',
+ 'm3u8iPhoneURL',
+ 'iphoneM3u8Url'])
+ if m3u8_url:
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+
+ smil_url = dict_get(video_data, ['httpSmil', 'hdSmil', 'rtmpSmil'])
+ if smil_url:
+ transform_source = None
+ if ref_id == 'nhkworld':
+ # TODO: figure out if this is something to be fixed in urljoin,
+ # _parse_smil_formats or keep it here
+ transform_source = lambda x: x.replace('src="/', 'src="').replace('/media"', '/media/"')
+ formats.extend(self._extract_smil_formats(
+ re.sub(r'/od/[^/]+/', '/od/http/', smil_url), video_id,
+ transform_source=transform_source, fatal=False))
+
+ subtitles = {}
+ for caption in video_data.get('captions', []):
+ caption_url = caption.get('url')
+ if caption_url:
+ subtitles.setdefault(caption.get('locale', 'en'), []).append({
+ 'url': caption_url})
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video_data.get('description'),
+ 'thumbnail': video_data.get('thumbnailUrl'),
+ 'timestamp': parse_iso8601(video_data.get('dateadd')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ '_format_sort_fields': ('tbr', ), # Incomplete resolution information
+ }
diff --git a/yt_dlp/extractor/pinkbike.py b/yt_dlp/extractor/pinkbike.py
new file mode 100644
index 0000000..e4e1caa
--- /dev/null
+++ b/yt_dlp/extractor/pinkbike.py
@@ -0,0 +1,93 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ remove_end,
+ remove_start,
+ str_to_int,
+ unified_strdate,
+)
+
+
+class PinkbikeIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:www\.)?pinkbike\.com/video/|es\.pinkbike\.org/i/kvid/kvid-y5\.swf\?id=)(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'http://www.pinkbike.com/video/402811/',
+ 'md5': '4814b8ca7651034cd87e3361d5c2155a',
+ 'info_dict': {
+ 'id': '402811',
+ 'ext': 'mp4',
+ 'title': 'Brandon Semenuk - RAW 100',
+ 'description': 'Official release: www.redbull.ca/rupertwalker',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 100,
+ 'upload_date': '20150406',
+ 'uploader': 'revelco',
+ 'location': 'Victoria, British Columbia, Canada',
+ 'view_count': int,
+ 'comment_count': int,
+ }
+ }, {
+ 'url': 'http://es.pinkbike.org/i/kvid/kvid-y5.swf?id=406629',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'http://www.pinkbike.com/video/%s' % video_id, video_id)
+
+ formats = []
+ for _, format_id, src in re.findall(
+ r'data-quality=((?:\\)?["\'])(.+?)\1[^>]+src=\1(.+?)\1', webpage):
+ height = int_or_none(self._search_regex(
+ r'^(\d+)[pP]$', format_id, 'height', default=None))
+ formats.append({
+ 'url': src,
+ 'format_id': format_id,
+ 'height': height,
+ })
+
+ title = remove_end(self._og_search_title(webpage), ' Video - Pinkbike')
+ description = self._html_search_regex(
+ r'(?s)id="media-description"[^>]*>(.+?)<',
+ webpage, 'description', default=None) or remove_start(
+ self._og_search_description(webpage), title + '. ')
+ thumbnail = self._og_search_thumbnail(webpage)
+ duration = int_or_none(self._html_search_meta(
+ 'video:duration', webpage, 'duration'))
+
+ uploader = self._search_regex(
+ r'<a[^>]+\brel=["\']author[^>]+>([^<]+)', webpage,
+ 'uploader', fatal=False)
+ upload_date = unified_strdate(self._search_regex(
+ r'class="fullTime"[^>]+title="([^"]+)"',
+ webpage, 'upload date', fatal=False))
+
+ location = self._html_search_regex(
+ r'(?s)<dt>Location</dt>\s*<dd>(.+?)<',
+ webpage, 'location', fatal=False)
+
+ def extract_count(webpage, label):
+ return str_to_int(self._search_regex(
+ r'<span[^>]+class="stat-num"[^>]*>([\d,.]+)</span>\s*<span[^>]+class="stat-label"[^>]*>%s' % label,
+ webpage, label, fatal=False))
+
+ view_count = extract_count(webpage, 'Views')
+ comment_count = extract_count(webpage, 'Comments')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'upload_date': upload_date,
+ 'uploader': uploader,
+ 'location': location,
+ 'view_count': view_count,
+ 'comment_count': comment_count,
+ 'formats': formats
+ }
diff --git a/yt_dlp/extractor/pinterest.py b/yt_dlp/extractor/pinterest.py
new file mode 100644
index 0000000..8361fbb
--- /dev/null
+++ b/yt_dlp/extractor/pinterest.py
@@ -0,0 +1,248 @@
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ float_or_none,
+ int_or_none,
+ str_or_none,
+ strip_or_none,
+ traverse_obj,
+ unified_timestamp,
+ url_or_none,
+)
+
+
+class PinterestBaseIE(InfoExtractor):
+ _VALID_URL_BASE = r'''(?x)
+ https?://(?:[^/]+\.)?pinterest\.(?:
+ com|fr|de|ch|jp|cl|ca|it|co\.uk|nz|ru|com\.au|at|pt|co\.kr|es|com\.mx|
+ dk|ph|th|com\.uy|co|nl|info|kr|ie|vn|com\.vn|ec|mx|in|pe|co\.at|hu|
+ co\.in|co\.nz|id|com\.ec|com\.py|tw|be|uk|com\.bo|com\.pe)'''
+
+ def _call_api(self, resource, video_id, options):
+ return self._download_json(
+ 'https://www.pinterest.com/resource/%sResource/get/' % resource,
+ video_id, 'Download %s JSON metadata' % resource, query={
+ 'data': json.dumps({'options': options})
+ })['resource_response']
+
+ def _extract_video(self, data, extract_formats=True):
+ video_id = data['id']
+ thumbnails = []
+ images = data.get('images')
+ if isinstance(images, dict):
+ for thumbnail_id, thumbnail in images.items():
+ if not isinstance(thumbnail, dict):
+ continue
+ thumbnail_url = url_or_none(thumbnail.get('url'))
+ if not thumbnail_url:
+ continue
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'width': int_or_none(thumbnail.get('width')),
+ 'height': int_or_none(thumbnail.get('height')),
+ })
+
+ info = {
+ 'title': strip_or_none(traverse_obj(data, 'title', 'grid_title', default='')),
+ 'description': traverse_obj(data, 'seo_description', 'description'),
+ 'timestamp': unified_timestamp(data.get('created_at')),
+ 'thumbnails': thumbnails,
+ 'uploader': traverse_obj(data, ('closeup_attribution', 'full_name')),
+ 'uploader_id': str_or_none(traverse_obj(data, ('closeup_attribution', 'id'))),
+ 'repost_count': int_or_none(data.get('repin_count')),
+ 'comment_count': int_or_none(data.get('comment_count')),
+ 'categories': traverse_obj(data, ('pin_join', 'visual_annotation'), expected_type=list),
+ 'tags': traverse_obj(data, 'hashtags', expected_type=list),
+ }
+
+ urls = []
+ formats = []
+ duration = None
+ domain = data.get('domain', '')
+ if domain.lower() != 'uploaded by user' and traverse_obj(data, ('embed', 'src')):
+ if not info['title']:
+ info['title'] = None
+ return {
+ '_type': 'url_transparent',
+ 'url': data['embed']['src'],
+ **info,
+ }
+
+ elif extract_formats:
+ video_list = traverse_obj(
+ data, ('videos', 'video_list'),
+ ('story_pin_data', 'pages', ..., 'blocks', ..., 'video', 'video_list'),
+ expected_type=dict, get_all=False, default={})
+ for format_id, format_dict in video_list.items():
+ if not isinstance(format_dict, dict):
+ continue
+ format_url = url_or_none(format_dict.get('url'))
+ if not format_url or format_url in urls:
+ continue
+ urls.append(format_url)
+ duration = float_or_none(format_dict.get('duration'), scale=1000)
+ ext = determine_ext(format_url)
+ if 'hls' in format_id.lower() or ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id=format_id, fatal=False))
+ else:
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_id,
+ 'width': int_or_none(format_dict.get('width')),
+ 'height': int_or_none(format_dict.get('height')),
+ 'duration': duration,
+ })
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'duration': duration,
+ 'webpage_url': f'https://www.pinterest.com/pin/{video_id}/',
+ 'extractor_key': PinterestIE.ie_key(),
+ 'extractor': PinterestIE.IE_NAME,
+ **info,
+ }
+
+
+class PinterestIE(PinterestBaseIE):
+ _VALID_URL = r'%s/pin/(?P<id>\d+)' % PinterestBaseIE._VALID_URL_BASE
+ _TESTS = [{
+ # formats found in data['videos']
+ 'url': 'https://www.pinterest.com/pin/664281013778109217/',
+ 'md5': '6550c2af85d6d9f3fe3b88954d1577fc',
+ 'info_dict': {
+ 'id': '664281013778109217',
+ 'ext': 'mp4',
+ 'title': 'Origami',
+ 'description': 'md5:e29801cab7d741ea8c741bc50c8d00ab',
+ 'duration': 57.7,
+ 'timestamp': 1593073622,
+ 'upload_date': '20200625',
+ 'repost_count': int,
+ 'comment_count': int,
+ 'categories': list,
+ 'tags': list,
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
+ },
+ }, {
+ # formats found in data['story_pin_data']
+ 'url': 'https://www.pinterest.com/pin/1084663891475263837/',
+ 'md5': '069ac19919ab9e1e13fa60de46290b03',
+ 'info_dict': {
+ 'id': '1084663891475263837',
+ 'ext': 'mp4',
+ 'title': 'Gadget, Cool products, Amazon product, technology, Kitchen gadgets',
+ 'description': 'md5:d0a4b6ae996ff0c6eed83bc869598d13',
+ 'uploader': 'CoolCrazyGadgets',
+ 'uploader_id': '1084664028912989237',
+ 'upload_date': '20211003',
+ 'timestamp': 1633246654.0,
+ 'duration': 14.9,
+ 'comment_count': int,
+ 'repost_count': int,
+ 'categories': 'count:9',
+ 'tags': list,
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
+ },
+ }, {
+ # vimeo.com embed
+ 'url': 'https://www.pinterest.ca/pin/441282463481903715/',
+ 'info_dict': {
+ 'id': '111691128',
+ 'ext': 'mp4',
+ 'title': 'Tonite Let\'s All Make Love In London (1967)',
+ 'description': 'md5:8190f37b3926807809ec57ec21aa77b2',
+ 'uploader': 'Vimeo',
+ 'uploader_id': '473792960706651251',
+ 'upload_date': '20180120',
+ 'timestamp': 1516409040,
+ 'duration': 3404,
+ 'comment_count': int,
+ 'repost_count': int,
+ 'categories': 'count:9',
+ 'tags': [],
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
+ 'uploader_url': 'https://vimeo.com/willardandrade',
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ }, {
+ 'url': 'https://co.pinterest.com/pin/824721750502199491/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ data = self._call_api(
+ 'Pin', video_id, {
+ 'field_set_key': 'unauth_react_main_pin',
+ 'id': video_id,
+ })['data']
+ return self._extract_video(data)
+
+
+class PinterestCollectionIE(PinterestBaseIE):
+ _VALID_URL = r'%s/(?P<username>[^/]+)/(?P<id>[^/?#&]+)' % PinterestBaseIE._VALID_URL_BASE
+ _TESTS = [{
+ 'url': 'https://www.pinterest.ca/mashal0407/cool-diys/',
+ 'info_dict': {
+ 'id': '585890301462791043',
+ 'title': 'cool diys',
+ },
+ 'playlist_count': 8,
+ }, {
+ 'url': 'https://www.pinterest.ca/fudohub/videos/',
+ 'info_dict': {
+ 'id': '682858430939307450',
+ 'title': 'VIDEOS',
+ },
+ 'playlist_mincount': 365,
+ 'skip': 'Test with extract_formats=False',
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if PinterestIE.suitable(url) else super(
+ PinterestCollectionIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ username, slug = self._match_valid_url(url).groups()
+ board = self._call_api(
+ 'Board', slug, {
+ 'slug': slug,
+ 'username': username
+ })['data']
+ board_id = board['id']
+ options = {
+ 'board_id': board_id,
+ 'page_size': 250,
+ }
+ bookmark = None
+ entries = []
+ while True:
+ if bookmark:
+ options['bookmarks'] = [bookmark]
+ board_feed = self._call_api('BoardFeed', board_id, options)
+ for item in (board_feed.get('data') or []):
+ if not isinstance(item, dict) or item.get('type') != 'pin':
+ continue
+ video_id = item.get('id')
+ if video_id:
+ # Some pins may not be available anonymously via pin URL
+ # video = self._extract_video(item, extract_formats=False)
+ # video.update({
+ # '_type': 'url_transparent',
+ # 'url': 'https://www.pinterest.com/pin/%s/' % video_id,
+ # })
+ # entries.append(video)
+ entries.append(self._extract_video(item))
+ bookmark = board_feed.get('bookmark')
+ if not bookmark:
+ break
+ return self.playlist_result(
+ entries, playlist_id=board_id, playlist_title=board.get('name'))
diff --git a/yt_dlp/extractor/pixivsketch.py b/yt_dlp/extractor/pixivsketch.py
new file mode 100644
index 0000000..850c6f2
--- /dev/null
+++ b/yt_dlp/extractor/pixivsketch.py
@@ -0,0 +1,118 @@
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ traverse_obj,
+ unified_timestamp,
+)
+
+
+class PixivSketchBaseIE(InfoExtractor):
+ def _call_api(self, video_id, path, referer, note='Downloading JSON metadata'):
+ response = self._download_json(f'https://sketch.pixiv.net/api/{path}', video_id, note=note, headers={
+ 'Referer': referer,
+ 'X-Requested-With': referer,
+ })
+ errors = traverse_obj(response, ('errors', ..., 'message'))
+ if errors:
+ raise ExtractorError(' '.join(f'{e}.' for e in errors))
+ return response.get('data') or {}
+
+
+class PixivSketchIE(PixivSketchBaseIE):
+ IE_NAME = 'pixiv:sketch'
+ _VALID_URL = r'https?://sketch\.pixiv\.net/@(?P<uploader_id>[a-zA-Z0-9_-]+)/lives/(?P<id>\d+)/?'
+ _TESTS = [{
+ 'url': 'https://sketch.pixiv.net/@nuhutya/lives/3654620468641830507',
+ 'info_dict': {
+ 'id': '7370666691623196569',
+ 'title': 'まにあえクリスマス!',
+ 'uploader': 'ぬふちゃ',
+ 'uploader_id': 'nuhutya',
+ 'channel_id': '9844815',
+ 'age_limit': 0,
+ 'timestamp': 1640351536,
+ },
+ 'skip': True,
+ }, {
+ # these two (age_limit > 0) requires you to login on website, but it's actually not required for download
+ 'url': 'https://sketch.pixiv.net/@namahyou/lives/4393103321546851377',
+ 'info_dict': {
+ 'id': '4907995960957946943',
+ 'title': 'クリスマスなんて知らん🖕',
+ 'uploader': 'すゃもり',
+ 'uploader_id': 'suya2mori2',
+ 'channel_id': '31169300',
+ 'age_limit': 15,
+ 'timestamp': 1640347640,
+ },
+ 'skip': True,
+ }, {
+ 'url': 'https://sketch.pixiv.net/@8aki/lives/3553803162487249670',
+ 'info_dict': {
+ 'id': '1593420639479156945',
+ 'title': 'おまけ本作業(リョナ有)',
+ 'uploader': 'おぶい / Obui',
+ 'uploader_id': 'oving',
+ 'channel_id': '17606',
+ 'age_limit': 18,
+ 'timestamp': 1640330263,
+ },
+ 'skip': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id, uploader_id = self._match_valid_url(url).group('id', 'uploader_id')
+ data = self._call_api(video_id, f'lives/{video_id}.json', url)
+
+ if not traverse_obj(data, 'is_broadcasting'):
+ raise ExtractorError(f'This live is offline. Use https://sketch.pixiv.net/@{uploader_id} for ongoing live.', expected=True)
+
+ m3u8_url = traverse_obj(data, ('owner', 'hls_movie', 'url'))
+ formats = self._extract_m3u8_formats(
+ m3u8_url, video_id, ext='mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls')
+
+ return {
+ 'id': video_id,
+ 'title': data.get('name'),
+ 'formats': formats,
+ 'uploader': traverse_obj(data, ('user', 'name'), ('owner', 'user', 'name')),
+ 'uploader_id': traverse_obj(data, ('user', 'unique_name'), ('owner', 'user', 'unique_name')),
+ 'channel_id': str(traverse_obj(data, ('user', 'pixiv_user_id'), ('owner', 'user', 'pixiv_user_id'))),
+ 'age_limit': 18 if data.get('is_r18') else 15 if data.get('is_r15') else 0,
+ 'timestamp': unified_timestamp(data.get('created_at')),
+ 'is_live': True
+ }
+
+
+class PixivSketchUserIE(PixivSketchBaseIE):
+ IE_NAME = 'pixiv:sketch:user'
+ _VALID_URL = r'https?://sketch\.pixiv\.net/@(?P<id>[a-zA-Z0-9_-]+)/?'
+ _TESTS = [{
+ 'url': 'https://sketch.pixiv.net/@nuhutya',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://sketch.pixiv.net/@namahyou',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://sketch.pixiv.net/@8aki',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return super(PixivSketchUserIE, cls).suitable(url) and not PixivSketchIE.suitable(url)
+
+ def _real_extract(self, url):
+ user_id = self._match_id(url)
+ data = self._call_api(user_id, f'lives/users/@{user_id}.json', url)
+
+ if not traverse_obj(data, 'is_broadcasting'):
+ try:
+ self._call_api(user_id, 'users/current.json', url, 'Investigating reason for request failure')
+ except ExtractorError as ex:
+ if ex.cause and ex.cause.code == 401:
+ self.raise_login_required(f'Please log in, or use direct link like https://sketch.pixiv.net/@{user_id}/1234567890', method='cookies')
+ raise ExtractorError('This user is offline', expected=True)
+
+ return self.url_result(f'https://sketch.pixiv.net/@{user_id}/lives/{data["id"]}')
diff --git a/yt_dlp/extractor/pladform.py b/yt_dlp/extractor/pladform.py
new file mode 100644
index 0000000..d67f600
--- /dev/null
+++ b/yt_dlp/extractor/pladform.py
@@ -0,0 +1,135 @@
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ parse_qs,
+ xpath_text,
+ qualities,
+)
+
+
+class PladformIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:
+ out\.pladform\.ru/player|
+ static\.pladform\.ru/player\.swf
+ )
+ \?.*\bvideoid=|
+ video\.pladform\.ru/catalog/video/videoid/
+ )
+ (?P<id>\d+)
+ '''
+ _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//out\.pladform\.ru/player\?.+?)\1']
+ _TESTS = [{
+ 'url': 'http://out.pladform.ru/player?pl=18079&type=html5&videoid=100231282',
+ 'info_dict': {
+ 'id': '6216d548e755edae6e8280667d774791',
+ 'ext': 'mp4',
+ 'timestamp': 1406117012,
+ 'title': 'Гарик Мартиросян и Гарик Харламов - Кастинг на концерт ко Дню милиции',
+ 'age_limit': 0,
+ 'upload_date': '20140723',
+ 'thumbnail': str,
+ 'view_count': int,
+ 'description': str,
+ 'uploader_id': '12082',
+ 'uploader': 'Comedy Club',
+ 'duration': 367,
+ },
+ 'expected_warnings': ['HTTP Error 404: Not Found']
+ }, {
+ 'url': 'https://out.pladform.ru/player?pl=64471&videoid=3777899&vk_puid15=0&vk_puid34=0',
+ 'md5': '53362fac3a27352da20fa2803cc5cd6f',
+ 'info_dict': {
+ 'id': '3777899',
+ 'ext': 'mp4',
+ 'title': 'СТУДИЯ СОЮЗ • Шоу Студия Союз, 24 выпуск (01.02.2018) Нурлан Сабуров и Слава Комиссаренко',
+ 'description': 'md5:05140e8bf1b7e2d46e7ba140be57fd95',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 3190,
+ },
+ }, {
+ 'url': 'http://static.pladform.ru/player.swf?pl=21469&videoid=100183293&vkcid=0',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://video.pladform.ru/catalog/video/videoid/100183293/vkcid/0',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ qs = parse_qs(url)
+ pl = qs.get('pl', ['1'])[0]
+
+ video = self._download_xml(
+ 'http://out.pladform.ru/getVideo', video_id, query={
+ 'pl': pl,
+ 'videoid': video_id,
+ }, fatal=False)
+
+ def fail(text):
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, text),
+ expected=True)
+
+ if not video:
+ targetUrl = self._request_webpage(url, video_id, note='Resolving final URL').url
+ if targetUrl == url:
+ raise ExtractorError('Can\'t parse page')
+ return self.url_result(targetUrl)
+
+ if video.tag == 'error':
+ fail(video.text)
+
+ quality = qualities(('ld', 'sd', 'hd'))
+
+ formats = []
+ for src in video.findall('./src'):
+ if src is None:
+ continue
+ format_url = src.text
+ if not format_url:
+ continue
+ if src.get('type') == 'hls' or determine_ext(format_url) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ formats.append({
+ 'url': src.text,
+ 'format_id': src.get('quality'),
+ 'quality': quality(src.get('quality')),
+ })
+
+ if not formats:
+ error = xpath_text(video, './cap', 'error', default=None)
+ if error:
+ fail(error)
+
+ webpage = self._download_webpage(
+ 'http://video.pladform.ru/catalog/video/videoid/%s' % video_id,
+ video_id)
+
+ title = self._og_search_title(webpage, fatal=False) or xpath_text(
+ video, './/title', 'title', fatal=True)
+ description = self._search_regex(
+ r'</h3>\s*<p>([^<]+)</p>', webpage, 'description', fatal=False)
+ thumbnail = self._og_search_thumbnail(webpage) or xpath_text(
+ video, './/cover', 'cover')
+
+ duration = int_or_none(xpath_text(video, './/time', 'duration'))
+ age_limit = int_or_none(xpath_text(video, './/age18', 'age limit'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'age_limit': age_limit,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/planetmarathi.py b/yt_dlp/extractor/planetmarathi.py
new file mode 100644
index 0000000..a4b612a
--- /dev/null
+++ b/yt_dlp/extractor/planetmarathi.py
@@ -0,0 +1,71 @@
+from .common import InfoExtractor
+from ..utils import (
+ try_get,
+ unified_strdate,
+)
+
+
+class PlanetMarathiIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?planetmarathi\.com/titles/(?P<id>[^/#&?$]+)'
+ _TESTS = [{
+ 'url': 'https://www.planetmarathi.com/titles/ek-unad-divas',
+ 'playlist_mincount': 2,
+ 'info_dict': {
+ 'id': 'ek-unad-divas',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': 'ASSETS-MOVIE-ASSET-01_ek-unad-divas',
+ 'ext': 'mp4',
+ 'title': 'ek unad divas',
+ 'alt_title': 'चित्रपट',
+ 'description': 'md5:41c7ed6b041c2fea9820a3f3125bd881',
+ 'episode_number': 1,
+ 'duration': 5539,
+ 'upload_date': '20210829',
+ },
+ }] # Trailer skipped
+ }, {
+ 'url': 'https://www.planetmarathi.com/titles/baap-beep-baap-season-1',
+ 'playlist_mincount': 10,
+ 'info_dict': {
+ 'id': 'baap-beep-baap-season-1',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': 'ASSETS-CHARACTER-PROFILE-SEASON-01-ASSET-01_baap-beep-baap-season-1',
+ 'ext': 'mp4',
+ 'title': 'Manohar Kanhere',
+ 'alt_title': 'मनोहर कान्हेरे',
+ 'description': 'md5:285ed45d5c0ab5522cac9a043354ebc6',
+ 'season_number': 1,
+ 'episode_number': 1,
+ 'duration': 29,
+ 'upload_date': '20210829',
+ },
+ }] # Trailers, Episodes, other Character profiles skipped
+ }]
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ entries = []
+ json_data = self._download_json(f'https://www.planetmarathi.com/api/v1/titles/{id}/assets', id)['assets']
+ for asset in json_data:
+ asset_title = asset['mediaAssetName']['en']
+ if asset_title == 'Movie':
+ asset_title = id.replace('-', ' ')
+ asset_id = f'{asset["sk"]}_{id}'.replace('#', '-')
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(asset['mediaAssetURL'], asset_id)
+ entries.append({
+ 'id': asset_id,
+ 'title': asset_title,
+ 'alt_title': try_get(asset, lambda x: x['mediaAssetName']['mr']),
+ 'description': try_get(asset, lambda x: x['mediaAssetDescription']['en']),
+ 'season_number': asset.get('mediaAssetSeason'),
+ 'episode_number': asset.get('mediaAssetIndexForAssetType'),
+ 'duration': asset.get('mediaAssetDurationInSeconds'),
+ 'upload_date': unified_strdate(asset.get('created')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ })
+ return self.playlist_result(entries, playlist_id=id)
diff --git a/yt_dlp/extractor/platzi.py b/yt_dlp/extractor/platzi.py
new file mode 100644
index 0000000..166b98c
--- /dev/null
+++ b/yt_dlp/extractor/platzi.py
@@ -0,0 +1,213 @@
+from .common import InfoExtractor
+from ..compat import (
+ compat_b64decode,
+ compat_str,
+)
+from ..utils import (
+ clean_html,
+ ExtractorError,
+ int_or_none,
+ str_or_none,
+ try_get,
+ url_or_none,
+ urlencode_postdata,
+ urljoin,
+)
+
+
+class PlatziBaseIE(InfoExtractor):
+ _LOGIN_URL = 'https://platzi.com/login/'
+ _NETRC_MACHINE = 'platzi'
+
+ def _perform_login(self, username, password):
+ login_page = self._download_webpage(
+ self._LOGIN_URL, None, 'Downloading login page')
+
+ login_form = self._hidden_inputs(login_page)
+
+ login_form.update({
+ 'email': username,
+ 'password': password,
+ })
+
+ urlh = self._request_webpage(
+ self._LOGIN_URL, None, 'Logging in',
+ data=urlencode_postdata(login_form),
+ headers={'Referer': self._LOGIN_URL})
+
+ # login succeeded
+ if 'platzi.com/login' not in urlh.url:
+ return
+
+ login_error = self._webpage_read_content(
+ urlh, self._LOGIN_URL, None, 'Downloading login error page')
+
+ login = self._parse_json(
+ self._search_regex(
+ r'login\s*=\s*({.+?})(?:\s*;|\s*</script)', login_error, 'login'),
+ None)
+
+ for kind in ('error', 'password', 'nonFields'):
+ error = str_or_none(login.get('%sError' % kind))
+ if error:
+ raise ExtractorError(
+ 'Unable to login: %s' % error, expected=True)
+ raise ExtractorError('Unable to log in')
+
+
+class PlatziIE(PlatziBaseIE):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ platzi\.com/clases| # es version
+ courses\.platzi\.com/classes # en version
+ )/[^/]+/(?P<id>\d+)-[^/?\#&]+
+ '''
+
+ _TESTS = [{
+ 'url': 'https://platzi.com/clases/1311-next-js/12074-creando-nuestra-primera-pagina/',
+ 'md5': '8f56448241005b561c10f11a595b37e3',
+ 'info_dict': {
+ 'id': '12074',
+ 'ext': 'mp4',
+ 'title': 'Creando nuestra primera página',
+ 'description': 'md5:4c866e45034fc76412fbf6e60ae008bc',
+ 'duration': 420,
+ },
+ 'skip': 'Requires platzi account credentials',
+ }, {
+ 'url': 'https://courses.platzi.com/classes/1367-communication-codestream/13430-background/',
+ 'info_dict': {
+ 'id': '13430',
+ 'ext': 'mp4',
+ 'title': 'Background',
+ 'description': 'md5:49c83c09404b15e6e71defaf87f6b305',
+ 'duration': 360,
+ },
+ 'skip': 'Requires platzi account credentials',
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ lecture_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, lecture_id)
+
+ data = self._parse_json(
+ self._search_regex(
+ # client_data may contain "};" so that we have to try more
+ # strict regex first
+ (r'client_data\s*=\s*({.+?})\s*;\s*\n',
+ r'client_data\s*=\s*({.+?})\s*;'),
+ webpage, 'client data'),
+ lecture_id)
+
+ material = data['initialState']['material']
+ desc = material['description']
+ title = desc['title']
+
+ formats = []
+ for server_id, server in material['videos'].items():
+ if not isinstance(server, dict):
+ continue
+ for format_id in ('hls', 'dash'):
+ format_url = url_or_none(server.get(format_id))
+ if not format_url:
+ continue
+ if format_id == 'hls':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, lecture_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id=format_id,
+ note='Downloading %s m3u8 information' % server_id,
+ fatal=False))
+ elif format_id == 'dash':
+ formats.extend(self._extract_mpd_formats(
+ format_url, lecture_id, mpd_id=format_id,
+ note='Downloading %s MPD manifest' % server_id,
+ fatal=False))
+
+ content = str_or_none(desc.get('content'))
+ description = (clean_html(compat_b64decode(content).decode('utf-8'))
+ if content else None)
+ duration = int_or_none(material.get('duration'), invscale=60)
+
+ return {
+ 'id': lecture_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'formats': formats,
+ }
+
+
+class PlatziCourseIE(PlatziBaseIE):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ platzi\.com/clases| # es version
+ courses\.platzi\.com/classes # en version
+ )/(?P<id>[^/?\#&]+)
+ '''
+ _TESTS = [{
+ 'url': 'https://platzi.com/clases/next-js/',
+ 'info_dict': {
+ 'id': '1311',
+ 'title': 'Curso de Next.js',
+ },
+ 'playlist_count': 22,
+ }, {
+ 'url': 'https://courses.platzi.com/classes/communication-codestream/',
+ 'info_dict': {
+ 'id': '1367',
+ 'title': 'Codestream Course',
+ },
+ 'playlist_count': 14,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if PlatziIE.suitable(url) else super(PlatziCourseIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ course_name = self._match_id(url)
+
+ webpage = self._download_webpage(url, course_name)
+
+ props = self._parse_json(
+ self._search_regex(r'data\s*=\s*({.+?})\s*;', webpage, 'data'),
+ course_name)['initialProps']
+
+ entries = []
+ for chapter_num, chapter in enumerate(props['concepts'], 1):
+ if not isinstance(chapter, dict):
+ continue
+ materials = chapter.get('materials')
+ if not materials or not isinstance(materials, list):
+ continue
+ chapter_title = chapter.get('title')
+ chapter_id = str_or_none(chapter.get('id'))
+ for material in materials:
+ if not isinstance(material, dict):
+ continue
+ if material.get('material_type') != 'video':
+ continue
+ video_url = urljoin(url, material.get('url'))
+ if not video_url:
+ continue
+ entries.append({
+ '_type': 'url_transparent',
+ 'url': video_url,
+ 'title': str_or_none(material.get('name')),
+ 'id': str_or_none(material.get('id')),
+ 'ie_key': PlatziIE.ie_key(),
+ 'chapter': chapter_title,
+ 'chapter_number': chapter_num,
+ 'chapter_id': chapter_id,
+ })
+
+ course_id = compat_str(try_get(props, lambda x: x['course']['id']))
+ course_title = try_get(props, lambda x: x['course']['name'], compat_str)
+
+ return self.playlist_result(entries, course_id, course_title)
diff --git a/yt_dlp/extractor/playplustv.py b/yt_dlp/extractor/playplustv.py
new file mode 100644
index 0000000..a4439c8
--- /dev/null
+++ b/yt_dlp/extractor/playplustv.py
@@ -0,0 +1,100 @@
+import json
+
+from .common import InfoExtractor
+from ..networking import PUTRequest
+from ..networking.exceptions import HTTPError
+from ..utils import ExtractorError, clean_html, int_or_none
+
+
+class PlayPlusTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?playplus\.(?:com|tv)/VOD/(?P<project_id>[0-9]+)/(?P<id>[0-9a-f]{32})'
+ _TEST = {
+ 'url': 'https://www.playplus.tv/VOD/7572/db8d274a5163424e967f35a30ddafb8e',
+ 'md5': 'd078cb89d7ab6b9df37ce23c647aef72',
+ 'info_dict': {
+ 'id': 'db8d274a5163424e967f35a30ddafb8e',
+ 'ext': 'mp4',
+ 'title': 'Capítulo 179 - Final',
+ 'description': 'md5:01085d62d8033a1e34121d3c3cabc838',
+ 'timestamp': 1529992740,
+ 'upload_date': '20180626',
+ },
+ 'skip': 'Requires account credential',
+ }
+ _NETRC_MACHINE = 'playplustv'
+ _GEO_COUNTRIES = ['BR']
+ _token = None
+ _profile_id = None
+
+ def _call_api(self, resource, video_id=None, query=None):
+ return self._download_json('https://api.playplus.tv/api/media/v2/get' + resource, video_id, headers={
+ 'Authorization': 'Bearer ' + self._token,
+ }, query=query)
+
+ def _perform_login(self, username, password):
+ req = PUTRequest(
+ 'https://api.playplus.tv/api/web/login', json.dumps({
+ 'email': username,
+ 'password': password,
+ }).encode(), {
+ 'Content-Type': 'application/json; charset=utf-8',
+ })
+
+ try:
+ self._token = self._download_json(req, None)['token']
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 401:
+ raise ExtractorError(self._parse_json(
+ e.cause.response.read(), None)['errorMessage'], expected=True)
+ raise
+
+ self._profile = self._call_api('Profiles')['list'][0]['_id']
+
+ def _real_initialize(self):
+ if not self._token:
+ self.raise_login_required(method='password')
+
+ def _real_extract(self, url):
+ project_id, media_id = self._match_valid_url(url).groups()
+ media = self._call_api(
+ 'Media', media_id, {
+ 'profileId': self._profile,
+ 'projectId': project_id,
+ 'mediaId': media_id,
+ })['obj']
+ title = media['title']
+
+ formats = []
+ for f in media.get('files', []):
+ f_url = f.get('url')
+ if not f_url:
+ continue
+ file_info = f.get('fileInfo') or {}
+ formats.append({
+ 'url': f_url,
+ 'width': int_or_none(file_info.get('width')),
+ 'height': int_or_none(file_info.get('height')),
+ })
+
+ thumbnails = []
+ for thumb in media.get('thumbs', []):
+ thumb_url = thumb.get('url')
+ if not thumb_url:
+ continue
+ thumbnails.append({
+ 'url': thumb_url,
+ 'width': int_or_none(thumb.get('width')),
+ 'height': int_or_none(thumb.get('height')),
+ })
+
+ return {
+ 'id': media_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'description': clean_html(media.get('description')) or media.get('shortDescription'),
+ 'timestamp': int_or_none(media.get('publishDate'), 1000),
+ 'view_count': int_or_none(media.get('numberOfViews')),
+ 'comment_count': int_or_none(media.get('numberOfComments')),
+ 'tags': media.get('tags'),
+ }
diff --git a/yt_dlp/extractor/playsuisse.py b/yt_dlp/extractor/playsuisse.py
new file mode 100644
index 0000000..7c5cad1
--- /dev/null
+++ b/yt_dlp/extractor/playsuisse.py
@@ -0,0 +1,234 @@
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ parse_qs,
+ traverse_obj,
+ update_url_query,
+ urlencode_postdata,
+)
+
+
+class PlaySuisseIE(InfoExtractor):
+ _NETRC_MACHINE = 'playsuisse'
+ _VALID_URL = r'https?://(?:www\.)?playsuisse\.ch/(?:watch|detail)/(?:[^#]*[?&]episodeId=)?(?P<id>[0-9]+)'
+ _TESTS = [
+ {
+ # Old URL
+ 'url': 'https://www.playsuisse.ch/watch/763211/0',
+ 'only_matching': True,
+ },
+ {
+ # episode in a series
+ 'url': 'https://www.playsuisse.ch/watch/763182?episodeId=763211',
+ 'md5': '82df2a470b2dfa60c2d33772a8a60cf8',
+ 'info_dict': {
+ 'id': '763211',
+ 'ext': 'mp4',
+ 'title': 'Knochen',
+ 'description': 'md5:8ea7a8076ba000cd9e8bc132fd0afdd8',
+ 'duration': 3344,
+ 'series': 'Wilder',
+ 'season': 'Season 1',
+ 'season_number': 1,
+ 'episode': 'Knochen',
+ 'episode_number': 1,
+ 'thumbnail': 're:https://playsuisse-img.akamaized.net/',
+ }
+ }, {
+ # film
+ 'url': 'https://www.playsuisse.ch/watch/808675',
+ 'md5': '818b94c1d2d7c4beef953f12cb8f3e75',
+ 'info_dict': {
+ 'id': '808675',
+ 'ext': 'mp4',
+ 'title': 'Der Läufer',
+ 'description': 'md5:9f61265c7e6dcc3e046137a792b275fd',
+ 'duration': 5280,
+ 'thumbnail': 're:https://playsuisse-img.akamaized.net/',
+ }
+ }, {
+ # series (treated as a playlist)
+ 'url': 'https://www.playsuisse.ch/detail/1115687',
+ 'info_dict': {
+ 'description': 'md5:e4a2ae29a8895823045b5c3145a02aa3',
+ 'id': '1115687',
+ 'series': 'They all came out to Montreux',
+ 'title': 'They all came out to Montreux',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'description': 'md5:f2462744834b959a31adc6292380cda2',
+ 'duration': 3180,
+ 'episode': 'Folge 1',
+ 'episode_number': 1,
+ 'id': '1112663',
+ 'season': 'Season 1',
+ 'season_number': 1,
+ 'series': 'They all came out to Montreux',
+ 'thumbnail': 're:https://playsuisse-img.akamaized.net/',
+ 'title': 'Folge 1',
+ 'ext': 'mp4'
+ },
+ }, {
+ 'info_dict': {
+ 'description': 'md5:9dfd308699fe850d3bce12dc1bad9b27',
+ 'duration': 2935,
+ 'episode': 'Folge 2',
+ 'episode_number': 2,
+ 'id': '1112661',
+ 'season': 'Season 1',
+ 'season_number': 1,
+ 'series': 'They all came out to Montreux',
+ 'thumbnail': 're:https://playsuisse-img.akamaized.net/',
+ 'title': 'Folge 2',
+ 'ext': 'mp4'
+ },
+ }, {
+ 'info_dict': {
+ 'description': 'md5:14a93a3356b2492a8f786ab2227ef602',
+ 'duration': 2994,
+ 'episode': 'Folge 3',
+ 'episode_number': 3,
+ 'id': '1112664',
+ 'season': 'Season 1',
+ 'season_number': 1,
+ 'series': 'They all came out to Montreux',
+ 'thumbnail': 're:https://playsuisse-img.akamaized.net/',
+ 'title': 'Folge 3',
+ 'ext': 'mp4'
+ }
+ }],
+ }
+ ]
+
+ _GRAPHQL_QUERY = '''
+ query AssetWatch($assetId: ID!) {
+ assetV2(id: $assetId) {
+ ...Asset
+ episodes {
+ ...Asset
+ }
+ }
+ }
+ fragment Asset on AssetV2 {
+ id
+ name
+ description
+ duration
+ episodeNumber
+ seasonNumber
+ seriesName
+ medias {
+ type
+ url
+ }
+ thumbnail16x9 {
+ ...ImageDetails
+ }
+ thumbnail2x3 {
+ ...ImageDetails
+ }
+ thumbnail16x9WithTitle {
+ ...ImageDetails
+ }
+ thumbnail2x3WithTitle {
+ ...ImageDetails
+ }
+ }
+ fragment ImageDetails on AssetImage {
+ id
+ url
+ }'''
+ _LOGIN_BASE_URL = 'https://login.srgssr.ch/srgssrlogin.onmicrosoft.com'
+ _LOGIN_PATH = 'B2C_1A__SignInV2'
+ _ID_TOKEN = None
+
+ def _perform_login(self, username, password):
+ login_page = self._download_webpage(
+ 'https://www.playsuisse.ch/api/sso/login', None, note='Downloading login page',
+ query={'x': 'x', 'locale': 'de', 'redirectUrl': 'https://www.playsuisse.ch/'})
+ settings = self._search_json(r'var\s+SETTINGS\s*=', login_page, 'settings', None)
+
+ csrf_token = settings['csrf']
+ query = {'tx': settings['transId'], 'p': self._LOGIN_PATH}
+
+ status = traverse_obj(self._download_json(
+ f'{self._LOGIN_BASE_URL}/{self._LOGIN_PATH}/SelfAsserted', None, 'Logging in',
+ query=query, headers={'X-CSRF-TOKEN': csrf_token}, data=urlencode_postdata({
+ 'request_type': 'RESPONSE',
+ 'signInName': username,
+ 'password': password
+ }), expected_status=400), ('status', {int_or_none}))
+ if status == 400:
+ raise ExtractorError('Invalid username or password', expected=True)
+
+ urlh = self._request_webpage(
+ f'{self._LOGIN_BASE_URL}/{self._LOGIN_PATH}/api/CombinedSigninAndSignup/confirmed',
+ None, 'Downloading ID token', query={
+ 'rememberMe': 'false',
+ 'csrf_token': csrf_token,
+ **query,
+ 'diags': '',
+ })
+
+ self._ID_TOKEN = traverse_obj(parse_qs(urlh.url), ('id_token', 0))
+ if not self._ID_TOKEN:
+ raise ExtractorError('Login failed')
+
+ def _get_media_data(self, media_id):
+ # NOTE In the web app, the "locale" header is used to switch between languages,
+ # However this doesn't seem to take effect when passing the header here.
+ response = self._download_json(
+ 'https://www.playsuisse.ch/api/graphql',
+ media_id, data=json.dumps({
+ 'operationName': 'AssetWatch',
+ 'query': self._GRAPHQL_QUERY,
+ 'variables': {'assetId': media_id}
+ }).encode('utf-8'),
+ headers={'Content-Type': 'application/json', 'locale': 'de'})
+
+ return response['data']['assetV2']
+
+ def _real_extract(self, url):
+ if not self._ID_TOKEN:
+ self.raise_login_required(method='password')
+
+ media_id = self._match_id(url)
+ media_data = self._get_media_data(media_id)
+ info = self._extract_single(media_data)
+ if media_data.get('episodes'):
+ info.update({
+ '_type': 'playlist',
+ 'entries': map(self._extract_single, media_data['episodes']),
+ })
+ return info
+
+ def _extract_single(self, media_data):
+ thumbnails = traverse_obj(media_data, lambda k, _: k.startswith('thumbnail'))
+
+ formats, subtitles = [], {}
+ for media in traverse_obj(media_data, 'medias', default=[]):
+ if not media.get('url') or media.get('type') != 'HLS':
+ continue
+ f, subs = self._extract_m3u8_formats_and_subtitles(
+ update_url_query(media['url'], {'id_token': self._ID_TOKEN}),
+ media_data['id'], 'mp4', m3u8_id='HLS', fatal=False)
+ formats.extend(f)
+ self._merge_subtitles(subs, target=subtitles)
+
+ return {
+ 'id': media_data['id'],
+ 'title': media_data.get('name'),
+ 'description': media_data.get('description'),
+ 'thumbnails': thumbnails,
+ 'duration': int_or_none(media_data.get('duration')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'series': media_data.get('seriesName'),
+ 'season_number': int_or_none(media_data.get('seasonNumber')),
+ 'episode': media_data.get('name') if media_data.get('episodeNumber') else None,
+ 'episode_number': int_or_none(media_data.get('episodeNumber')),
+ }
diff --git a/yt_dlp/extractor/playtvak.py b/yt_dlp/extractor/playtvak.py
new file mode 100644
index 0000000..c418f88
--- /dev/null
+++ b/yt_dlp/extractor/playtvak.py
@@ -0,0 +1,185 @@
+from .common import InfoExtractor
+from ..compat import (
+ compat_urlparse,
+ compat_urllib_parse_urlencode,
+)
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ parse_iso8601,
+ qualities,
+)
+
+
+class PlaytvakIE(InfoExtractor):
+ IE_DESC = 'Playtvak.cz, iDNES.cz and Lidovky.cz'
+ _VALID_URL = r'https?://(?:.+?\.)?(?:playtvak|idnes|lidovky|metro)\.cz/.*\?(?:c|idvideo)=(?P<id>[^&]+)'
+ _TESTS = [{
+ 'url': 'http://www.playtvak.cz/vyzente-vosy-a-srsne-ze-zahrady-dn5-/hodinovy-manzel.aspx?c=A150730_150323_hodinovy-manzel_kuko',
+ 'md5': '4525ae312c324b4be2f4603cc78ceb4a',
+ 'info_dict': {
+ 'id': 'A150730_150323_hodinovy-manzel_kuko',
+ 'ext': 'mp4',
+ 'title': 'Vyžeňte vosy a sršně ze zahrady',
+ 'description': 'md5:4436e61b7df227a093778efb7e373571',
+ 'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$',
+ 'duration': 279,
+ 'timestamp': 1438732860,
+ 'upload_date': '20150805',
+ 'is_live': False,
+ }
+ }, { # live video test
+ 'url': 'http://slowtv.playtvak.cz/planespotting-0pr-/planespotting.aspx?c=A150624_164934_planespotting_cat',
+ 'info_dict': {
+ 'id': 'A150624_164934_planespotting_cat',
+ 'ext': 'flv',
+ 'title': 're:^Planespotting [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'description': 'Sledujte provoz na ranveji Letiště Václava Havla v Praze',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True, # requires rtmpdump
+ },
+ }, { # another live stream, this one without Misc.videoFLV
+ 'url': 'https://slowtv.playtvak.cz/zive-sledujte-vlaky-v-primem-prenosu-dwi-/hlavni-nadrazi.aspx?c=A151218_145728_hlavni-nadrazi_plap',
+ 'info_dict': {
+ 'id': 'A151218_145728_hlavni-nadrazi_plap',
+ 'ext': 'flv',
+ 'title': 're:^Hlavní nádraží [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True, # requires rtmpdump
+ },
+ }, { # idnes.cz
+ 'url': 'http://zpravy.idnes.cz/pes-zavreny-v-aute-rozbijeni-okynek-v-aute-fj5-/domaci.aspx?c=A150809_104116_domaci_pku',
+ 'md5': '819832ba33cd7016e58a6658577fe289',
+ 'info_dict': {
+ 'id': 'A150809_104116_domaci_pku',
+ 'ext': 'mp4',
+ 'title': 'Zavřeli jsme mraženou pizzu do auta. Upekla se',
+ 'description': 'md5:01e73f02329e2e5760bd5eed4d42e3c2',
+ 'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$',
+ 'duration': 39,
+ 'timestamp': 1438969140,
+ 'upload_date': '20150807',
+ 'is_live': False,
+ }
+ }, { # lidovky.cz
+ 'url': 'http://www.lidovky.cz/dalsi-demonstrace-v-praze-o-migraci-duq-/video.aspx?c=A150808_214044_ln-video_ELE',
+ 'md5': 'c7209ac4ba9d234d4ad5bab7485bcee8',
+ 'info_dict': {
+ 'id': 'A150808_214044_ln-video_ELE',
+ 'ext': 'mp4',
+ 'title': 'Táhni! Demonstrace proti imigrantům budila emoce',
+ 'description': 'md5:97c81d589a9491fbfa323c9fa3cca72c',
+ 'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$',
+ 'timestamp': 1439052180,
+ 'upload_date': '20150808',
+ 'is_live': False,
+ }
+ }, { # metro.cz
+ 'url': 'http://www.metro.cz/video-pod-billboardem-se-na-vltavske-roztocil-kolotoc-deti-vozil-jen-par-hodin-1hx-/metro-extra.aspx?c=A141111_173251_metro-extra_row',
+ 'md5': '84fc1deedcac37b7d4a6ccae7c716668',
+ 'info_dict': {
+ 'id': 'A141111_173251_metro-extra_row',
+ 'ext': 'mp4',
+ 'title': 'Recesisté udělali z billboardu kolotoč',
+ 'description': 'md5:7369926049588c3989a66c9c1a043c4c',
+ 'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$',
+ 'timestamp': 1415725500,
+ 'upload_date': '20141111',
+ 'is_live': False,
+ }
+ }, {
+ 'url': 'http://www.playtvak.cz/embed.aspx?idvideo=V150729_141549_play-porad_kuko',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ info_url = self._html_search_regex(
+ r'Misc\.video(?:FLV)?\(\s*{\s*data\s*:\s*"([^"]+)"', webpage, 'info url')
+
+ parsed_url = compat_urlparse.urlparse(info_url)
+
+ qs = compat_urlparse.parse_qs(parsed_url.query)
+ qs.update({
+ 'reklama': ['0'],
+ 'type': ['js'],
+ })
+
+ info_url = compat_urlparse.urlunparse(
+ parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
+
+ json_info = self._download_json(
+ info_url, video_id,
+ transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1])
+
+ item = None
+ for i in json_info['items']:
+ if i.get('type') == 'video' or i.get('type') == 'stream':
+ item = i
+ break
+ if not item:
+ raise ExtractorError('No suitable stream found')
+
+ quality = qualities(('low', 'middle', 'high'))
+
+ formats = []
+ for fmt in item['video']:
+ video_url = fmt.get('file')
+ if not video_url:
+ continue
+
+ format_ = fmt['format']
+ format_id = '%s_%s' % (format_, fmt['quality'])
+ preference = None
+
+ if format_ in ('mp4', 'webm'):
+ ext = format_
+ elif format_ == 'rtmp':
+ ext = 'flv'
+ elif format_ == 'apple':
+ ext = 'mp4'
+ # Some streams have mp3 audio which does not play
+ # well with ffmpeg filter aac_adtstoasc
+ preference = -10
+ elif format_ == 'adobe': # f4m manifest fails with 404 in 80% of requests
+ continue
+ else: # Other formats not supported yet
+ continue
+
+ formats.append({
+ 'url': video_url,
+ 'ext': ext,
+ 'format_id': format_id,
+ 'quality': quality(fmt.get('quality')),
+ 'preference': preference,
+ })
+
+ title = item['title']
+ is_live = item['type'] == 'stream'
+ description = self._og_search_description(webpage, default=None) or self._html_search_meta(
+ 'description', webpage, 'description', default=None)
+ timestamp = None
+ duration = None
+ if not is_live:
+ duration = int_or_none(item.get('length'))
+ timestamp = item.get('published')
+ if timestamp:
+ timestamp = parse_iso8601(timestamp[:-5])
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': item.get('image'),
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'is_live': is_live,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/playwire.py b/yt_dlp/extractor/playwire.py
new file mode 100644
index 0000000..1057bff
--- /dev/null
+++ b/yt_dlp/extractor/playwire.py
@@ -0,0 +1,72 @@
+from .common import InfoExtractor
+from ..utils import (
+ dict_get,
+ float_or_none,
+)
+
+
+class PlaywireIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:config|cdn)\.playwire\.com(?:/v2)?/(?P<publisher_id>\d+)/(?:videos/v2|embed|config)/(?P<id>\d+)'
+ _EMBED_REGEX = [r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1']
+
+ _TESTS = [{
+ 'url': 'http://config.playwire.com/14907/videos/v2/3353705/player.json',
+ 'md5': 'e6398701e3595888125729eaa2329ed9',
+ 'info_dict': {
+ 'id': '3353705',
+ 'ext': 'mp4',
+ 'title': 'S04_RM_UCL_Rus',
+ 'thumbnail': r're:^https?://.*\.png$',
+ 'duration': 145.94,
+ },
+ }, {
+ # m3u8 in f4m
+ 'url': 'http://config.playwire.com/21772/videos/v2/4840492/zeus.json',
+ 'info_dict': {
+ 'id': '4840492',
+ 'ext': 'mp4',
+ 'title': 'ITV EL SHOW FULL',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ # Multiple resolutions while bitrates missing
+ 'url': 'http://cdn.playwire.com/11625/embed/85228.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://config.playwire.com/12421/videos/v2/3389892/zeus.json',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://cdn.playwire.com/v2/12342/config/1532636.json',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ publisher_id, video_id = mobj.group('publisher_id'), mobj.group('id')
+
+ player = self._download_json(
+ 'http://config.playwire.com/%s/videos/v2/%s/zeus.json' % (publisher_id, video_id),
+ video_id)
+
+ title = player['settings']['title']
+ duration = float_or_none(player.get('duration'), 1000)
+
+ content = player['content']
+ thumbnail = content.get('poster')
+ src = content['media']['f4m']
+
+ formats = self._extract_f4m_formats(src, video_id, m3u8_id='hls')
+ for a_format in formats:
+ if not dict_get(a_format, ['tbr', 'width', 'height']):
+ a_format['quality'] = 1 if '-hd.' in a_format['url'] else 0
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/pluralsight.py b/yt_dlp/extractor/pluralsight.py
new file mode 100644
index 0000000..809b656
--- /dev/null
+++ b/yt_dlp/extractor/pluralsight.py
@@ -0,0 +1,491 @@
+import collections
+import json
+import os
+import random
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urlparse,
+)
+from ..utils import (
+ dict_get,
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ parse_duration,
+ parse_qs,
+ qualities,
+ srt_subtitles_timecode,
+ try_get,
+ update_url_query,
+ urlencode_postdata,
+)
+
+
+class PluralsightBaseIE(InfoExtractor):
+ _API_BASE = 'https://app.pluralsight.com'
+
+ _GRAPHQL_EP = '%s/player/api/graphql' % _API_BASE
+ _GRAPHQL_HEADERS = {
+ 'Content-Type': 'application/json;charset=UTF-8',
+ }
+ _GRAPHQL_COURSE_TMPL = '''
+query BootstrapPlayer {
+ rpc {
+ bootstrapPlayer {
+ profile {
+ firstName
+ lastName
+ email
+ username
+ userHandle
+ authed
+ isAuthed
+ plan
+ }
+ course(courseId: "%s") {
+ name
+ title
+ courseHasCaptions
+ translationLanguages {
+ code
+ name
+ }
+ supportsWideScreenVideoFormats
+ timestamp
+ modules {
+ name
+ title
+ duration
+ formattedDuration
+ author
+ authorized
+ clips {
+ authorized
+ clipId
+ duration
+ formattedDuration
+ id
+ index
+ moduleIndex
+ moduleTitle
+ name
+ title
+ watched
+ }
+ }
+ }
+ }
+ }
+}'''
+
+ def _download_course(self, course_id, url, display_id):
+ try:
+ return self._download_course_rpc(course_id, url, display_id)
+ except ExtractorError:
+ # Old API fallback
+ return self._download_json(
+ 'https://app.pluralsight.com/player/user/api/v1/player/payload',
+ display_id, data=urlencode_postdata({'courseId': course_id}),
+ headers={'Referer': url})
+
+ def _download_course_rpc(self, course_id, url, display_id):
+ response = self._download_json(
+ self._GRAPHQL_EP, display_id, data=json.dumps({
+ 'query': self._GRAPHQL_COURSE_TMPL % course_id,
+ 'variables': {}
+ }).encode('utf-8'), headers=self._GRAPHQL_HEADERS)
+
+ course = try_get(
+ response, lambda x: x['data']['rpc']['bootstrapPlayer']['course'],
+ dict)
+ if course:
+ return course
+
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, response['error']['message']),
+ expected=True)
+
+
+class PluralsightIE(PluralsightBaseIE):
+ IE_NAME = 'pluralsight'
+ _VALID_URL = r'https?://(?:(?:www|app)\.)?pluralsight\.com/(?:training/)?player\?'
+ _LOGIN_URL = 'https://app.pluralsight.com/id/'
+
+ _NETRC_MACHINE = 'pluralsight'
+
+ _TESTS = [{
+ 'url': 'http://www.pluralsight.com/training/player?author=mike-mckeown&name=hosting-sql-server-windows-azure-iaas-m7-mgmt&mode=live&clip=3&course=hosting-sql-server-windows-azure-iaas',
+ 'md5': '4d458cf5cf4c593788672419a8dd4cf8',
+ 'info_dict': {
+ 'id': 'hosting-sql-server-windows-azure-iaas-m7-mgmt-04',
+ 'ext': 'mp4',
+ 'title': 'Demo Monitoring',
+ 'duration': 338,
+ },
+ 'skip': 'Requires pluralsight account credentials',
+ }, {
+ 'url': 'https://app.pluralsight.com/training/player?course=angularjs-get-started&author=scott-allen&name=angularjs-get-started-m1-introduction&clip=0&mode=live',
+ 'only_matching': True,
+ }, {
+ # available without pluralsight account
+ 'url': 'http://app.pluralsight.com/training/player?author=scott-allen&name=angularjs-get-started-m1-introduction&mode=live&clip=0&course=angularjs-get-started',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://app.pluralsight.com/player?course=ccna-intro-networking&author=ross-bagurdes&name=ccna-intro-networking-m06&clip=0',
+ 'only_matching': True,
+ }]
+
+ GRAPHQL_VIEWCLIP_TMPL = '''
+query viewClip {
+ viewClip(input: {
+ author: "%(author)s",
+ clipIndex: %(clipIndex)d,
+ courseName: "%(courseName)s",
+ includeCaptions: %(includeCaptions)s,
+ locale: "%(locale)s",
+ mediaType: "%(mediaType)s",
+ moduleName: "%(moduleName)s",
+ quality: "%(quality)s"
+ }) {
+ urls {
+ url
+ cdn
+ rank
+ source
+ },
+ status
+ }
+}'''
+
+ def _perform_login(self, username, password):
+ login_page = self._download_webpage(
+ self._LOGIN_URL, None, 'Downloading login page')
+
+ login_form = self._hidden_inputs(login_page)
+
+ login_form.update({
+ 'Username': username,
+ 'Password': password,
+ })
+
+ post_url = self._search_regex(
+ r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
+ 'post url', default=self._LOGIN_URL, group='url')
+
+ if not post_url.startswith('http'):
+ post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
+
+ response = self._download_webpage(
+ post_url, None, 'Logging in',
+ data=urlencode_postdata(login_form),
+ headers={'Content-Type': 'application/x-www-form-urlencoded'})
+
+ error = self._search_regex(
+ r'<span[^>]+class="field-validation-error"[^>]*>([^<]+)</span>',
+ response, 'error message', default=None)
+ if error:
+ raise ExtractorError('Unable to login: %s' % error, expected=True)
+
+ if all(not re.search(p, response) for p in (
+ r'__INITIAL_STATE__', r'["\']currentUser["\']',
+ # new layout?
+ r'>\s*Sign out\s*<')):
+ BLOCKED = 'Your account has been blocked due to suspicious activity'
+ if BLOCKED in response:
+ raise ExtractorError(
+ 'Unable to login: %s' % BLOCKED, expected=True)
+ MUST_AGREE = 'To continue using Pluralsight, you must agree to'
+ if any(p in response for p in (MUST_AGREE, '>Disagree<', '>Agree<')):
+ raise ExtractorError(
+ 'Unable to login: %s some documents. Go to pluralsight.com, '
+ 'log in and agree with what Pluralsight requires.'
+ % MUST_AGREE, expected=True)
+
+ raise ExtractorError('Unable to log in')
+
+ def _get_subtitles(self, author, clip_idx, clip_id, lang, name, duration, video_id):
+ captions = None
+ if clip_id:
+ captions = self._download_json(
+ '%s/transcript/api/v1/caption/json/%s/%s'
+ % (self._API_BASE, clip_id, lang), video_id,
+ 'Downloading captions JSON', 'Unable to download captions JSON',
+ fatal=False)
+ if not captions:
+ captions_post = {
+ 'a': author,
+ 'cn': int(clip_idx),
+ 'lc': lang,
+ 'm': name,
+ }
+ captions = self._download_json(
+ '%s/player/retrieve-captions' % self._API_BASE, video_id,
+ 'Downloading captions JSON', 'Unable to download captions JSON',
+ fatal=False, data=json.dumps(captions_post).encode('utf-8'),
+ headers={'Content-Type': 'application/json;charset=utf-8'})
+ if captions:
+ return {
+ lang: [{
+ 'ext': 'json',
+ 'data': json.dumps(captions),
+ }, {
+ 'ext': 'srt',
+ 'data': self._convert_subtitles(duration, captions),
+ }]
+ }
+
+ @staticmethod
+ def _convert_subtitles(duration, subs):
+ srt = ''
+ TIME_OFFSET_KEYS = ('displayTimeOffset', 'DisplayTimeOffset')
+ TEXT_KEYS = ('text', 'Text')
+ for num, current in enumerate(subs):
+ current = subs[num]
+ start, text = (
+ float_or_none(dict_get(current, TIME_OFFSET_KEYS, skip_false_values=False)),
+ dict_get(current, TEXT_KEYS))
+ if start is None or text is None:
+ continue
+ end = duration if num == len(subs) - 1 else float_or_none(
+ dict_get(subs[num + 1], TIME_OFFSET_KEYS, skip_false_values=False))
+ if end is None:
+ continue
+ srt += os.linesep.join(
+ (
+ '%d' % num,
+ '%s --> %s' % (
+ srt_subtitles_timecode(start),
+ srt_subtitles_timecode(end)),
+ text,
+ os.linesep,
+ ))
+ return srt
+
+ def _real_extract(self, url):
+ qs = parse_qs(url)
+
+ author = qs.get('author', [None])[0]
+ name = qs.get('name', [None])[0]
+ clip_idx = qs.get('clip', [None])[0]
+ course_name = qs.get('course', [None])[0]
+
+ if any(not f for f in (author, name, clip_idx, course_name,)):
+ raise ExtractorError('Invalid URL', expected=True)
+
+ display_id = '%s-%s' % (name, clip_idx)
+
+ course = self._download_course(course_name, url, display_id)
+
+ collection = course['modules']
+
+ clip = None
+
+ for module_ in collection:
+ if name in (module_.get('moduleName'), module_.get('name')):
+ for clip_ in module_.get('clips', []):
+ clip_index = clip_.get('clipIndex')
+ if clip_index is None:
+ clip_index = clip_.get('index')
+ if clip_index is None:
+ continue
+ if compat_str(clip_index) == clip_idx:
+ clip = clip_
+ break
+
+ if not clip:
+ raise ExtractorError('Unable to resolve clip')
+
+ title = clip['title']
+ clip_id = clip.get('clipName') or clip.get('name') or clip['clipId']
+
+ QUALITIES = {
+ 'low': {'width': 640, 'height': 480},
+ 'medium': {'width': 848, 'height': 640},
+ 'high': {'width': 1024, 'height': 768},
+ 'high-widescreen': {'width': 1280, 'height': 720},
+ }
+
+ QUALITIES_PREFERENCE = ('low', 'medium', 'high', 'high-widescreen',)
+ quality_key = qualities(QUALITIES_PREFERENCE)
+
+ AllowedQuality = collections.namedtuple('AllowedQuality', ['ext', 'qualities'])
+
+ ALLOWED_QUALITIES = (
+ AllowedQuality('webm', ['high', ]),
+ AllowedQuality('mp4', ['low', 'medium', 'high', ]),
+ )
+
+ # Some courses also offer widescreen resolution for high quality (see
+ # https://github.com/ytdl-org/youtube-dl/issues/7766)
+ widescreen = course.get('supportsWideScreenVideoFormats') is True
+ best_quality = 'high-widescreen' if widescreen else 'high'
+ if widescreen:
+ for allowed_quality in ALLOWED_QUALITIES:
+ allowed_quality.qualities.append(best_quality)
+
+ # In order to minimize the number of calls to ViewClip API and reduce
+ # the probability of being throttled or banned by Pluralsight we will request
+ # only single format until formats listing was explicitly requested.
+ if self.get_param('listformats', False):
+ allowed_qualities = ALLOWED_QUALITIES
+ else:
+ def guess_allowed_qualities():
+ req_format = self.get_param('format') or 'best'
+ req_format_split = req_format.split('-', 1)
+ if len(req_format_split) > 1:
+ req_ext, req_quality = req_format_split
+ req_quality = '-'.join(req_quality.split('-')[:2])
+ for allowed_quality in ALLOWED_QUALITIES:
+ if req_ext == allowed_quality.ext and req_quality in allowed_quality.qualities:
+ return (AllowedQuality(req_ext, (req_quality, )), )
+ req_ext = 'webm' if self.get_param('prefer_free_formats') else 'mp4'
+ return (AllowedQuality(req_ext, (best_quality, )), )
+ allowed_qualities = guess_allowed_qualities()
+
+ formats = []
+ for ext, qualities_ in allowed_qualities:
+ for quality in qualities_:
+ f = QUALITIES[quality].copy()
+ clip_post = {
+ 'author': author,
+ 'includeCaptions': 'false',
+ 'clipIndex': int(clip_idx),
+ 'courseName': course_name,
+ 'locale': 'en',
+ 'moduleName': name,
+ 'mediaType': ext,
+ 'quality': '%dx%d' % (f['width'], f['height']),
+ }
+ format_id = '%s-%s' % (ext, quality)
+
+ try:
+ viewclip = self._download_json(
+ self._GRAPHQL_EP, display_id,
+ 'Downloading %s viewclip graphql' % format_id,
+ data=json.dumps({
+ 'query': self.GRAPHQL_VIEWCLIP_TMPL % clip_post,
+ 'variables': {}
+ }).encode('utf-8'),
+ headers=self._GRAPHQL_HEADERS)['data']['viewClip']
+ except ExtractorError:
+ # Still works but most likely will go soon
+ viewclip = self._download_json(
+ '%s/video/clips/viewclip' % self._API_BASE, display_id,
+ 'Downloading %s viewclip JSON' % format_id, fatal=False,
+ data=json.dumps(clip_post).encode('utf-8'),
+ headers={'Content-Type': 'application/json;charset=utf-8'})
+
+ # Pluralsight tracks multiple sequential calls to ViewClip API and start
+ # to return 429 HTTP errors after some time (see
+ # https://github.com/ytdl-org/youtube-dl/pull/6989). Moreover it may even lead
+ # to account ban (see https://github.com/ytdl-org/youtube-dl/issues/6842).
+ # To somewhat reduce the probability of these consequences
+ # we will sleep random amount of time before each call to ViewClip.
+ self._sleep(
+ random.randint(5, 10), display_id,
+ '%(video_id)s: Waiting for %(timeout)s seconds to avoid throttling')
+
+ if not viewclip:
+ continue
+
+ clip_urls = viewclip.get('urls')
+ if not isinstance(clip_urls, list):
+ continue
+
+ for clip_url_data in clip_urls:
+ clip_url = clip_url_data.get('url')
+ if not clip_url:
+ continue
+ cdn = clip_url_data.get('cdn')
+ clip_f = f.copy()
+ clip_f.update({
+ 'url': clip_url,
+ 'ext': ext,
+ 'format_id': '%s-%s' % (format_id, cdn) if cdn else format_id,
+ 'quality': quality_key(quality),
+ 'source_preference': int_or_none(clip_url_data.get('rank')),
+ })
+ formats.append(clip_f)
+
+ duration = int_or_none(
+ clip.get('duration')) or parse_duration(clip.get('formattedDuration'))
+
+ # TODO: other languages?
+ subtitles = self.extract_subtitles(
+ author, clip_idx, clip.get('clipId'), 'en', name, duration, display_id)
+
+ return {
+ 'id': clip_id,
+ 'title': title,
+ 'duration': duration,
+ 'creator': author,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+
+class PluralsightCourseIE(PluralsightBaseIE):
+ IE_NAME = 'pluralsight:course'
+ _VALID_URL = r'https?://(?:(?:www|app)\.)?pluralsight\.com/(?:library/)?courses/(?P<id>[^/]+)'
+ _TESTS = [{
+ # Free course from Pluralsight Starter Subscription for Microsoft TechNet
+ # https://offers.pluralsight.com/technet?loc=zTS3z&prod=zOTprodz&tech=zOttechz&prog=zOTprogz&type=zSOz&media=zOTmediaz&country=zUSz
+ 'url': 'http://www.pluralsight.com/courses/hosting-sql-server-windows-azure-iaas',
+ 'info_dict': {
+ 'id': 'hosting-sql-server-windows-azure-iaas',
+ 'title': 'Hosting SQL Server in Microsoft Azure IaaS Fundamentals',
+ 'description': 'md5:61b37e60f21c4b2f91dc621a977d0986',
+ },
+ 'playlist_count': 31,
+ }, {
+ # available without pluralsight account
+ 'url': 'https://www.pluralsight.com/courses/angularjs-get-started',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://app.pluralsight.com/library/courses/understanding-microsoft-azure-amazon-aws/table-of-contents',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ course_id = self._match_id(url)
+
+ # TODO: PSM cookie
+
+ course = self._download_course(course_id, url, course_id)
+
+ title = course['title']
+ course_name = course['name']
+ course_data = course['modules']
+ description = course.get('description') or course.get('shortDescription')
+
+ entries = []
+ for num, module in enumerate(course_data, 1):
+ author = module.get('author')
+ module_name = module.get('name')
+ if not author or not module_name:
+ continue
+ for clip in module.get('clips', []):
+ clip_index = int_or_none(clip.get('index'))
+ if clip_index is None:
+ continue
+ clip_url = update_url_query(
+ '%s/player' % self._API_BASE, query={
+ 'mode': 'live',
+ 'course': course_name,
+ 'author': author,
+ 'name': module_name,
+ 'clip': clip_index,
+ })
+ entries.append({
+ '_type': 'url_transparent',
+ 'url': clip_url,
+ 'ie_key': PluralsightIE.ie_key(),
+ 'chapter': module.get('title'),
+ 'chapter_number': num,
+ 'chapter_id': module.get('moduleRef'),
+ })
+
+ return self.playlist_result(entries, course_id, title, description)
diff --git a/yt_dlp/extractor/plutotv.py b/yt_dlp/extractor/plutotv.py
new file mode 100644
index 0000000..5898d92
--- /dev/null
+++ b/yt_dlp/extractor/plutotv.py
@@ -0,0 +1,195 @@
+import re
+import uuid
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urlparse,
+)
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ try_get,
+ url_or_none,
+)
+
+
+class PlutoTVIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'''(?x)
+ https?://(?:www\.)?pluto\.tv(?:/[^/]+)?/on-demand
+ /(?P<video_type>movies|series)
+ /(?P<series_or_movie_slug>[^/]+)
+ (?:
+ (?:/seasons?/(?P<season_no>\d+))?
+ (?:/episode/(?P<episode_slug>[^/]+))?
+ )?
+ /?(?:$|[#?])'''
+
+ _INFO_URL = 'https://service-vod.clusters.pluto.tv/v3/vod/slugs/'
+ _INFO_QUERY_PARAMS = {
+ 'appName': 'web',
+ 'appVersion': 'na',
+ 'clientID': compat_str(uuid.uuid1()),
+ 'clientModelNumber': 'na',
+ 'serverSideAds': 'false',
+ 'deviceMake': 'unknown',
+ 'deviceModel': 'web',
+ 'deviceType': 'web',
+ 'deviceVersion': 'unknown',
+ 'sid': compat_str(uuid.uuid1()),
+ }
+ _TESTS = [
+ {
+ 'url': 'https://pluto.tv/on-demand/series/i-love-money/season/2/episode/its-in-the-cards-2009-2-3',
+ 'md5': 'ebcdd8ed89aaace9df37924f722fd9bd',
+ 'info_dict': {
+ 'id': '5de6c598e9379ae4912df0a8',
+ 'ext': 'mp4',
+ 'title': 'It\'s In The Cards',
+ 'episode': 'It\'s In The Cards',
+ 'description': 'The teams face off against each other in a 3-on-2 soccer showdown. Strategy comes into play, though, as each team gets to select their opposing teams’ two defenders.',
+ 'series': 'I Love Money',
+ 'season_number': 2,
+ 'episode_number': 3,
+ 'duration': 3600,
+ }
+ }, {
+ 'url': 'https://pluto.tv/on-demand/series/i-love-money/season/1/',
+ 'playlist_count': 11,
+ 'info_dict': {
+ 'id': '5de6c582e9379ae4912dedbd',
+ 'title': 'I Love Money - Season 1',
+ }
+ }, {
+ 'url': 'https://pluto.tv/on-demand/series/i-love-money/',
+ 'playlist_count': 26,
+ 'info_dict': {
+ 'id': '5de6c582e9379ae4912dedbd',
+ 'title': 'I Love Money',
+ }
+ }, {
+ 'url': 'https://pluto.tv/on-demand/movies/arrival-2015-1-1',
+ 'md5': '3cead001d317a018bf856a896dee1762',
+ 'info_dict': {
+ 'id': '5e83ac701fa6a9001bb9df24',
+ 'ext': 'mp4',
+ 'title': 'Arrival',
+ 'description': 'When mysterious spacecraft touch down across the globe, an elite team - led by expert translator Louise Banks (Academy Award® nominee Amy Adams) – races against time to decipher their intent.',
+ 'duration': 9000,
+ }
+ }, {
+ 'url': 'https://pluto.tv/en/on-demand/series/manhunters-fugitive-task-force/seasons/1/episode/third-times-the-charm-1-1',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://pluto.tv/it/on-demand/series/csi-vegas/episode/legacy-2021-1-1',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://pluto.tv/en/on-demand/movies/attack-of-the-killer-tomatoes-1977-1-1-ptv1',
+ 'md5': '7db56369c0da626a32d505ec6eb3f89f',
+ 'info_dict': {
+ 'id': '5b190c7bb0875c36c90c29c4',
+ 'ext': 'mp4',
+ 'title': 'Attack of the Killer Tomatoes',
+ 'description': 'A group of scientists band together to save the world from mutated tomatoes that KILL! (1978)',
+ 'duration': 5700,
+ }
+ }
+ ]
+
+ def _to_ad_free_formats(self, video_id, formats, subtitles):
+ ad_free_formats, ad_free_subtitles, m3u8_urls = [], {}, set()
+ for fmt in formats:
+ res = self._download_webpage(
+ fmt.get('url'), video_id, note='Downloading m3u8 playlist',
+ fatal=False)
+ if not res:
+ continue
+ first_segment_url = re.search(
+ r'^(https?://.*/)0\-(end|[0-9]+)/[^/]+\.ts$', res,
+ re.MULTILINE)
+ if first_segment_url:
+ m3u8_urls.add(
+ compat_urlparse.urljoin(first_segment_url.group(1), '0-end/master.m3u8'))
+ continue
+ first_segment_url = re.search(
+ r'^(https?://.*/).+\-0+[0-1]0\.ts$', res,
+ re.MULTILINE)
+ if first_segment_url:
+ m3u8_urls.add(
+ compat_urlparse.urljoin(first_segment_url.group(1), 'master.m3u8'))
+ continue
+
+ for m3u8_url in m3u8_urls:
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
+ ad_free_formats.extend(fmts)
+ ad_free_subtitles = self._merge_subtitles(ad_free_subtitles, subs)
+ if ad_free_formats:
+ formats, subtitles = ad_free_formats, ad_free_subtitles
+ else:
+ self.report_warning('Unable to find ad-free formats')
+ return formats, subtitles
+
+ def _get_video_info(self, video_json, slug, series_name=None):
+ video_id = video_json.get('_id', slug)
+ formats, subtitles = [], {}
+ for video_url in try_get(video_json, lambda x: x['stitched']['urls'], list) or []:
+ if video_url.get('type') != 'hls':
+ continue
+ url = url_or_none(video_url.get('url'))
+
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
+ formats.extend(fmts)
+ subtitles = self._merge_subtitles(subtitles, subs)
+
+ formats, subtitles = self._to_ad_free_formats(video_id, formats, subtitles)
+
+ info = {
+ 'id': video_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'title': video_json.get('name'),
+ 'description': video_json.get('description'),
+ 'duration': float_or_none(video_json.get('duration'), scale=1000),
+ }
+ if series_name:
+ info.update({
+ 'series': series_name,
+ 'episode': video_json.get('name'),
+ 'season_number': int_or_none(video_json.get('season')),
+ 'episode_number': int_or_none(video_json.get('number')),
+ })
+ return info
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url).groupdict()
+ info_slug = mobj['series_or_movie_slug']
+ video_json = self._download_json(self._INFO_URL + info_slug, info_slug, query=self._INFO_QUERY_PARAMS)
+
+ if mobj['video_type'] == 'series':
+ series_name = video_json.get('name', info_slug)
+ season_number, episode_slug = mobj.get('season_number'), mobj.get('episode_slug')
+
+ videos = []
+ for season in video_json['seasons']:
+ if season_number is not None and season_number != int_or_none(season.get('number')):
+ continue
+ for episode in season['episodes']:
+ if episode_slug is not None and episode_slug != episode.get('slug'):
+ continue
+ videos.append(self._get_video_info(episode, episode_slug, series_name))
+ if not videos:
+ raise ExtractorError('Failed to find any videos to extract')
+ if episode_slug is not None and len(videos) == 1:
+ return videos[0]
+ playlist_title = series_name
+ if season_number is not None:
+ playlist_title += ' - Season %d' % season_number
+ return self.playlist_result(videos,
+ playlist_id=video_json.get('_id', info_slug),
+ playlist_title=playlist_title)
+ return self._get_video_info(video_json, info_slug)
diff --git a/yt_dlp/extractor/podbayfm.py b/yt_dlp/extractor/podbayfm.py
new file mode 100644
index 0000000..2a26fd2
--- /dev/null
+++ b/yt_dlp/extractor/podbayfm.py
@@ -0,0 +1,75 @@
+from .common import InfoExtractor
+from ..utils import OnDemandPagedList, int_or_none, jwt_decode_hs256, try_call
+
+
+def result_from_props(props, episode_id=None):
+ return {
+ 'id': props.get('podcast_id') or episode_id,
+ 'title': props.get('title'),
+ 'url': props['mediaURL'],
+ 'ext': 'mp3',
+ 'thumbnail': try_call(lambda: jwt_decode_hs256(props['image'])['url']),
+ 'timestamp': props.get('timestamp'),
+ 'duration': int_or_none(props.get('duration')),
+ }
+
+
+class PodbayFMIE(InfoExtractor):
+ _VALID_URL = r'https?://podbay\.fm/p/[^/]*/e/(?P<id>[^/]*)/?(?:[\?#].*)?$'
+ _TESTS = [{
+ 'url': 'https://podbay.fm/p/behind-the-bastards/e/1647338400',
+ 'md5': '98b41285dcf7989d105a4ed0404054cf',
+ 'info_dict': {
+ 'id': '1647338400',
+ 'title': 'Part One: Kissinger',
+ 'ext': 'mp3',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'timestamp': 1647338400,
+ 'duration': 5001,
+ 'upload_date': '20220315',
+ },
+ }]
+
+ def _real_extract(self, url):
+ episode_id = self._match_id(url)
+ webpage = self._download_webpage(url, episode_id)
+ data = self._search_nextjs_data(webpage, episode_id)
+ return result_from_props(data['props']['pageProps']['episode'], episode_id)
+
+
+class PodbayFMChannelIE(InfoExtractor):
+ _VALID_URL = r'https?://podbay\.fm/p/(?P<id>[^/]*)/?(?:[\?#].*)?$'
+ _TESTS = [{
+ 'url': 'https://podbay.fm/p/behind-the-bastards',
+ 'info_dict': {
+ 'id': 'behind-the-bastards',
+ 'title': 'Behind the Bastards',
+ },
+ }]
+ _PAGE_SIZE = 10
+
+ def _fetch_page(self, channel_id, pagenum):
+ return self._download_json(
+ f'https://podbay.fm/api/podcast?reverse=true&page={pagenum}&slug={channel_id}',
+ channel_id)['podcast']
+
+ @staticmethod
+ def _results_from_page(channel_id, page):
+ return [{
+ **result_from_props(e),
+ 'extractor': PodbayFMIE.IE_NAME,
+ 'extractor_key': PodbayFMIE.ie_key(),
+ # somehow they use timestamps as the episode identifier
+ 'webpage_url': f'https://podbay.fm/p/{channel_id}/e/{e["timestamp"]}',
+ } for e in page['episodes']]
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+
+ first_page = self._fetch_page(channel_id, 0)
+ entries = OnDemandPagedList(
+ lambda pagenum: self._results_from_page(
+ channel_id, self._fetch_page(channel_id, pagenum) if pagenum else first_page),
+ self._PAGE_SIZE)
+
+ return self.playlist_result(entries, channel_id, first_page.get('title'))
diff --git a/yt_dlp/extractor/podchaser.py b/yt_dlp/extractor/podchaser.py
new file mode 100644
index 0000000..fc2d407
--- /dev/null
+++ b/yt_dlp/extractor/podchaser.py
@@ -0,0 +1,97 @@
+import functools
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ OnDemandPagedList,
+ float_or_none,
+ str_or_none,
+ str_to_int,
+ traverse_obj,
+ unified_timestamp,
+)
+
+
+class PodchaserIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?podchaser\.com/podcasts/[\w-]+-(?P<podcast_id>\d+)(?:/episodes/[\w-]+-(?P<id>\d+))?'
+ _PAGE_SIZE = 100
+ _TESTS = [{
+ 'url': 'https://www.podchaser.com/podcasts/cum-town-36924/episodes/ep-285-freeze-me-off-104365585',
+ 'info_dict': {
+ 'id': '104365585',
+ 'title': 'Ep. 285 – freeze me off',
+ 'description': 'cam ahn',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'ext': 'mp3',
+ 'categories': ['Comedy'],
+ 'tags': ['comedy', 'dark humor'],
+ 'series': 'Cum Town',
+ 'duration': 3708,
+ 'timestamp': 1636531259,
+ 'upload_date': '20211110',
+ 'average_rating': 4.0
+ }
+ }, {
+ 'url': 'https://www.podchaser.com/podcasts/the-bone-zone-28853',
+ 'info_dict': {
+ 'id': '28853',
+ 'title': 'The Bone Zone',
+ 'description': 'Podcast by The Bone Zone',
+ },
+ 'playlist_count': 275
+ }, {
+ 'url': 'https://www.podchaser.com/podcasts/sean-carrolls-mindscape-scienc-699349/episodes',
+ 'info_dict': {
+ 'id': '699349',
+ 'title': 'Sean Carroll\'s Mindscape: Science, Society, Philosophy, Culture, Arts, and Ideas',
+ 'description': 'md5:2cbd8f4749891a84dc8235342e0b5ff1'
+ },
+ 'playlist_mincount': 225
+ }]
+
+ @staticmethod
+ def _parse_episode(episode, podcast):
+ return {
+ 'id': str(episode.get('id')),
+ 'title': episode.get('title'),
+ 'description': episode.get('description'),
+ 'url': episode.get('audio_url'),
+ 'thumbnail': episode.get('image_url'),
+ 'duration': str_to_int(episode.get('length')),
+ 'timestamp': unified_timestamp(episode.get('air_date')),
+ 'average_rating': float_or_none(episode.get('rating')),
+ 'categories': list(set(traverse_obj(podcast, (('summary', None), 'categories', ..., 'text')))),
+ 'tags': traverse_obj(podcast, ('tags', ..., 'text')),
+ 'series': podcast.get('title'),
+ }
+
+ def _call_api(self, path, *args, **kwargs):
+ return self._download_json(f'https://api.podchaser.com/{path}', *args, **kwargs)
+
+ def _fetch_page(self, podcast_id, podcast, page):
+ json_response = self._call_api(
+ 'list/episode', podcast_id,
+ headers={'Content-Type': 'application/json;charset=utf-8'},
+ data=json.dumps({
+ 'start': page * self._PAGE_SIZE,
+ 'count': self._PAGE_SIZE,
+ 'sort_order': 'SORT_ORDER_RECENT',
+ 'filters': {
+ 'podcast_id': podcast_id
+ },
+ 'options': {}
+ }).encode())
+
+ for episode in json_response['entities']:
+ yield self._parse_episode(episode, podcast)
+
+ def _real_extract(self, url):
+ podcast_id, episode_id = self._match_valid_url(url).group('podcast_id', 'id')
+ podcast = self._call_api(f'podcasts/{podcast_id}', episode_id or podcast_id)
+ if not episode_id:
+ return self.playlist_result(
+ OnDemandPagedList(functools.partial(self._fetch_page, podcast_id, podcast), self._PAGE_SIZE),
+ str_or_none(podcast.get('id')), podcast.get('title'), podcast.get('description'))
+
+ episode = self._call_api(f'episodes/{episode_id}', episode_id)
+ return self._parse_episode(episode, podcast)
diff --git a/yt_dlp/extractor/podomatic.py b/yt_dlp/extractor/podomatic.py
new file mode 100644
index 0000000..37b6869
--- /dev/null
+++ b/yt_dlp/extractor/podomatic.py
@@ -0,0 +1,74 @@
+import json
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class PodomaticIE(InfoExtractor):
+ _WORKING = False
+ IE_NAME = 'podomatic'
+ _VALID_URL = r'''(?x)
+ (?P<proto>https?)://
+ (?:
+ (?P<channel>[^.]+)\.podomatic\.com/entry|
+ (?:www\.)?podomatic\.com/podcasts/(?P<channel_2>[^/]+)/episodes
+ )/
+ (?P<id>[^/?#&]+)
+ '''
+
+ _TESTS = [{
+ 'url': 'http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00',
+ 'md5': '84bb855fcf3429e6bf72460e1eed782d',
+ 'info_dict': {
+ 'id': '2009-01-02T16_03_35-08_00',
+ 'ext': 'mp3',
+ 'uploader': 'Science Teaching Tips',
+ 'uploader_id': 'scienceteachingtips',
+ 'title': '64. When the Moon Hits Your Eye',
+ 'duration': 446,
+ }
+ }, {
+ 'url': 'http://ostbahnhof.podomatic.com/entry/2013-11-15T16_31_21-08_00',
+ 'md5': 'd2cf443931b6148e27638650e2638297',
+ 'info_dict': {
+ 'id': '2013-11-15T16_31_21-08_00',
+ 'ext': 'mp3',
+ 'uploader': 'Ostbahnhof / Techno Mix',
+ 'uploader_id': 'ostbahnhof',
+ 'title': 'Einunddreizig',
+ 'duration': 3799,
+ }
+ }, {
+ 'url': 'https://www.podomatic.com/podcasts/scienceteachingtips/episodes/2009-01-02T16_03_35-08_00',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id')
+ channel = mobj.group('channel') or mobj.group('channel_2')
+
+ json_url = (('%s://%s.podomatic.com/entry/embed_params/%s'
+ + '?permalink=true&rtmp=0') %
+ (mobj.group('proto'), channel, video_id))
+ data_json = self._download_webpage(
+ json_url, video_id, 'Downloading video info')
+ data = json.loads(data_json)
+
+ video_url = data['downloadLink']
+ if not video_url:
+ video_url = '%s/%s' % (data['streamer'].replace('rtmp', 'http'), data['mediaLocation'])
+ uploader = data['podcast']
+ title = data['title']
+ thumbnail = data['imageLocation']
+ duration = int_or_none(data.get('length'), 1000)
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'uploader': uploader,
+ 'uploader_id': channel,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ }
diff --git a/yt_dlp/extractor/pokemon.py b/yt_dlp/extractor/pokemon.py
new file mode 100644
index 0000000..0911893
--- /dev/null
+++ b/yt_dlp/extractor/pokemon.py
@@ -0,0 +1,136 @@
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ extract_attributes,
+ int_or_none,
+ js_to_json,
+ merge_dicts,
+)
+
+
+class PokemonIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?pokemon\.com/[a-z]{2}(?:.*?play=(?P<id>[a-z0-9]{32})|/(?:[^/]+/)+(?P<display_id>[^/?#&]+))'
+ _TESTS = [{
+ 'url': 'https://www.pokemon.com/us/pokemon-episodes/20_30-the-ol-raise-and-switch/',
+ 'md5': '2fe8eaec69768b25ef898cda9c43062e',
+ 'info_dict': {
+ 'id': 'afe22e30f01c41f49d4f1d9eab5cd9a4',
+ 'ext': 'mp4',
+ 'title': 'The Ol’ Raise and Switch!',
+ 'description': 'md5:7db77f7107f98ba88401d3adc80ff7af',
+ },
+ 'add_id': ['LimelightMedia'],
+ }, {
+ # no data-video-title
+ 'url': 'https://www.pokemon.com/fr/episodes-pokemon/films-pokemon/pokemon-lascension-de-darkrai-2008',
+ 'info_dict': {
+ 'id': 'dfbaf830d7e54e179837c50c0c6cc0e1',
+ 'ext': 'mp4',
+ 'title': "Pokémon : L'ascension de Darkrai",
+ 'description': 'md5:d1dbc9e206070c3e14a06ff557659fb5',
+ },
+ 'add_id': ['LimelightMedia'],
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.pokemon.com/uk/pokemon-episodes/?play=2e8b5c761f1d4a9286165d7748c1ece2',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.pokemon.com/fr/episodes-pokemon/18_09-un-hiver-inattendu/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.pokemon.com/de/pokemon-folgen/01_20-bye-bye-smettbo/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id, display_id = self._match_valid_url(url).groups()
+ webpage = self._download_webpage(url, video_id or display_id)
+ video_data = extract_attributes(self._search_regex(
+ r'(<[^>]+data-video-id="%s"[^>]*>)' % (video_id if video_id else '[a-z0-9]{32}'),
+ webpage, 'video data element'))
+ video_id = video_data['data-video-id']
+ title = video_data.get('data-video-title') or self._html_search_meta(
+ 'pkm-title', webpage, ' title', default=None) or self._search_regex(
+ r'<h1[^>]+\bclass=["\']us-title[^>]+>([^<]+)', webpage, 'title')
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'url': 'limelight:media:%s' % video_id,
+ 'title': title,
+ 'description': video_data.get('data-video-summary'),
+ 'thumbnail': video_data.get('data-video-poster'),
+ 'series': 'Pokémon',
+ 'season_number': int_or_none(video_data.get('data-video-season')),
+ 'episode': title,
+ 'episode_number': int_or_none(video_data.get('data-video-episode')),
+ 'ie_key': 'LimelightMedia',
+ }
+
+
+class PokemonWatchIE(InfoExtractor):
+ _VALID_URL = r'https?://watch\.pokemon\.com/[a-z]{2}-[a-z]{2}/(?:#/)?player(?:\.html)?\?id=(?P<id>[a-z0-9]{32})'
+ _API_URL = 'https://www.pokemon.com/api/pokemontv/v2/channels/{0:}'
+ _TESTS = [{
+ 'url': 'https://watch.pokemon.com/en-us/player.html?id=8309a40969894a8e8d5bc1311e9c5667',
+ 'md5': '62833938a31e61ab49ada92f524c42ff',
+ 'info_dict': {
+ 'id': '8309a40969894a8e8d5bc1311e9c5667',
+ 'ext': 'mp4',
+ 'title': 'Lillier and the Staff!',
+ 'description': 'md5:338841b8c21b283d24bdc9b568849f04',
+ }
+ }, {
+ 'url': 'https://watch.pokemon.com/en-us/#/player?id=3fe7752ba09141f0b0f7756d1981c6b2',
+ 'only_matching': True
+ }, {
+ 'url': 'https://watch.pokemon.com/de-de/player.html?id=b3c402e111a4459eb47e12160ab0ba07',
+ 'only_matching': True
+ }]
+
+ def _extract_media(self, channel_array, video_id):
+ for channel in channel_array:
+ for media in channel.get('media'):
+ if media.get('id') == video_id:
+ return media
+ return None
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ info = {
+ '_type': 'url',
+ 'id': video_id,
+ 'url': 'limelight:media:%s' % video_id,
+ 'ie_key': 'LimelightMedia',
+ }
+
+ # API call can be avoided entirely if we are listing formats
+ if self.get_param('listformats', False):
+ return info
+
+ webpage = self._download_webpage(url, video_id)
+ build_vars = self._parse_json(self._search_regex(
+ r'(?s)buildVars\s*=\s*({.*?})', webpage, 'build vars'),
+ video_id, transform_source=js_to_json)
+ region = build_vars.get('region')
+ channel_array = self._download_json(self._API_URL.format(region), video_id)
+ video_data = self._extract_media(channel_array, video_id)
+
+ if video_data is None:
+ raise ExtractorError(
+ 'Video %s does not exist' % video_id, expected=True)
+
+ info['_type'] = 'url_transparent'
+ images = video_data.get('images')
+
+ return merge_dicts(info, {
+ 'title': video_data.get('title'),
+ 'description': video_data.get('description'),
+ 'thumbnail': images.get('medium') or images.get('small'),
+ 'series': 'Pokémon',
+ 'season_number': int_or_none(video_data.get('season')),
+ 'episode': video_data.get('title'),
+ 'episode_number': int_or_none(video_data.get('episode')),
+ })
diff --git a/yt_dlp/extractor/pokergo.py b/yt_dlp/extractor/pokergo.py
new file mode 100644
index 0000000..5c7baad
--- /dev/null
+++ b/yt_dlp/extractor/pokergo.py
@@ -0,0 +1,106 @@
+import base64
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ try_get,
+)
+
+
+class PokerGoBaseIE(InfoExtractor):
+ _NETRC_MACHINE = 'pokergo'
+ _AUTH_TOKEN = None
+ _PROPERTY_ID = '1dfb3940-7d53-4980-b0b0-f28b369a000d'
+
+ def _perform_login(self, username, password):
+ if self._AUTH_TOKEN:
+ return
+ self.report_login()
+ PokerGoBaseIE._AUTH_TOKEN = self._download_json(
+ f'https://subscription.pokergo.com/properties/{self._PROPERTY_ID}/sign-in', None,
+ headers={'authorization': f'Basic {base64.b64encode(f"{username}:{password}".encode()).decode()}'},
+ data=b'')['meta']['token']
+ if not self._AUTH_TOKEN:
+ raise ExtractorError('Unable to get Auth Token.', expected=True)
+
+ def _real_initialize(self):
+ if not self._AUTH_TOKEN:
+ self.raise_login_required(method='password')
+
+
+class PokerGoIE(PokerGoBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?pokergo\.com/videos/(?P<id>[^&$#/?]+)'
+
+ _TESTS = [{
+ 'url': 'https://www.pokergo.com/videos/2a70ec4e-4a80-414b-97ec-725d9b72a7dc',
+ 'info_dict': {
+ 'id': 'aVLOxDzY',
+ 'ext': 'mp4',
+ 'title': 'Poker After Dark | Season 12 (2020) | Cry Me a River | Episode 2',
+ 'description': 'md5:c7a8c29556cbfb6eb3c0d5d622251b71',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/aVLOxDzY/poster.jpg?width=720',
+ 'timestamp': 1608085715,
+ 'duration': 2700.12,
+ 'season_number': 12,
+ 'episode_number': 2,
+ 'series': 'poker after dark',
+ 'upload_date': '20201216',
+ 'season': 'Season 12',
+ 'episode': 'Episode 2',
+ 'display_id': '2a70ec4e-4a80-414b-97ec-725d9b72a7dc',
+ },
+ 'params': {'skip_download': True}
+ }]
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ data_json = self._download_json(f'https://api.pokergo.com/v2/properties/{self._PROPERTY_ID}/videos/{id}', id,
+ headers={'authorization': f'Bearer {self._AUTH_TOKEN}'})['data']
+ v_id = data_json['source']
+
+ thumbnails = [{
+ 'url': image['url'],
+ 'id': image.get('label'),
+ 'width': image.get('width'),
+ 'height': image.get('height')
+ } for image in data_json.get('images') or [] if image.get('url')]
+ series_json = next(dct for dct in data_json.get('show_tags') or [] if dct.get('video_id') == id) or {}
+
+ return {
+ '_type': 'url_transparent',
+ 'display_id': id,
+ 'title': data_json.get('title'),
+ 'description': data_json.get('description'),
+ 'duration': data_json.get('duration'),
+ 'thumbnails': thumbnails,
+ 'season_number': series_json.get('season'),
+ 'episode_number': series_json.get('episode_number'),
+ 'series': try_get(series_json, lambda x: x['tag']['name']),
+ 'url': f'https://cdn.jwplayer.com/v2/media/{v_id}'
+ }
+
+
+class PokerGoCollectionIE(PokerGoBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?pokergo\.com/collections/(?P<id>[^&$#/?]+)'
+
+ _TESTS = [{
+ 'url': 'https://www.pokergo.com/collections/19ffe481-5dae-481a-8869-75cc0e3c4700',
+ 'playlist_mincount': 13,
+ 'info_dict': {
+ 'id': '19ffe481-5dae-481a-8869-75cc0e3c4700',
+ },
+ }]
+
+ def _entries(self, id):
+ data_json = self._download_json(f'https://api.pokergo.com/v2/properties/{self._PROPERTY_ID}/collections/{id}?include=entities',
+ id, headers={'authorization': f'Bearer {self._AUTH_TOKEN}'})['data']
+ for video in data_json.get('collection_video') or []:
+ video_id = video.get('id')
+ if video_id:
+ yield self.url_result(
+ f'https://www.pokergo.com/videos/{video_id}',
+ ie=PokerGoIE.ie_key(), video_id=video_id)
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ return self.playlist_result(self._entries(id), playlist_id=id)
diff --git a/yt_dlp/extractor/polsatgo.py b/yt_dlp/extractor/polsatgo.py
new file mode 100644
index 0000000..1524a1f
--- /dev/null
+++ b/yt_dlp/extractor/polsatgo.py
@@ -0,0 +1,86 @@
+from uuid import uuid4
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ try_get,
+ url_or_none,
+ ExtractorError,
+)
+
+
+class PolsatGoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?polsat(?:box)?go\.pl/.+/(?P<id>[0-9a-fA-F]+)(?:[/#?]|$)'
+ _TESTS = [{
+ 'url': 'https://polsatgo.pl/wideo/seriale/swiat-wedlug-kiepskich/5024045/sezon-1/5028300/swiat-wedlug-kiepskich-odcinek-88/4121',
+ 'info_dict': {
+ 'id': '4121',
+ 'ext': 'mp4',
+ 'title': 'Świat według Kiepskich - Odcinek 88',
+ 'age_limit': 12,
+ },
+ }]
+
+ def _extract_formats(self, sources, video_id):
+ for source in sources or []:
+ if not source.get('id'):
+ continue
+ url = url_or_none(self._call_api(
+ 'drm', video_id, 'getPseudoLicense',
+ {'mediaId': video_id, 'sourceId': source['id']}).get('url'))
+ if not url:
+ continue
+ yield {
+ 'url': url,
+ 'height': int_or_none(try_get(source, lambda x: x['quality'][:-1]))
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ media = self._call_api('navigation', video_id, 'prePlayData', {'mediaId': video_id})['mediaItem']
+
+ formats = list(self._extract_formats(
+ try_get(media, lambda x: x['playback']['mediaSources']), video_id))
+
+ return {
+ 'id': video_id,
+ 'title': media['displayInfo']['title'],
+ 'formats': formats,
+ 'age_limit': int_or_none(media['displayInfo']['ageGroup'])
+ }
+
+ def _call_api(self, endpoint, media_id, method, params):
+ rand_uuid = str(uuid4())
+ res = self._download_json(
+ f'https://b2c-mobile.redefine.pl/rpc/{endpoint}/', media_id,
+ note=f'Downloading {method} JSON metadata',
+ data=json.dumps({
+ 'method': method,
+ 'id': '2137',
+ 'jsonrpc': '2.0',
+ 'params': {
+ **params,
+ 'userAgentData': {
+ 'deviceType': 'mobile',
+ 'application': 'native',
+ 'os': 'android',
+ 'build': 10003,
+ 'widevine': False,
+ 'portal': 'pg',
+ 'player': 'cpplayer',
+ },
+ 'deviceId': {
+ 'type': 'other',
+ 'value': rand_uuid,
+ },
+ 'clientId': rand_uuid,
+ 'cpid': 1,
+ },
+ }).encode('utf-8'),
+ headers={'Content-type': 'application/json'})
+ if not res.get('result'):
+ if res['error']['code'] == 13404:
+ raise ExtractorError('This video is either unavailable in your region or is DRM protected', expected=True)
+ raise ExtractorError(f'Solorz said: {res["error"]["message"]} - {res["error"]["data"]["userMessage"]}')
+ return res['result']
diff --git a/yt_dlp/extractor/polskieradio.py b/yt_dlp/extractor/polskieradio.py
new file mode 100644
index 0000000..e0b22ff
--- /dev/null
+++ b/yt_dlp/extractor/polskieradio.py
@@ -0,0 +1,610 @@
+import itertools
+import json
+import math
+import re
+import urllib.parse
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ InAdvancePagedList,
+ determine_ext,
+ extract_attributes,
+ int_or_none,
+ js_to_json,
+ parse_iso8601,
+ strip_or_none,
+ traverse_obj,
+ unescapeHTML,
+ unified_timestamp,
+ url_or_none,
+ urljoin,
+)
+
+
+class PolskieRadioBaseExtractor(InfoExtractor):
+ def _extract_webpage_player_entries(self, webpage, playlist_id, base_data):
+ media_urls = set()
+
+ for data_media in re.findall(r'<[^>]+data-media="?({[^>]+})"?', webpage):
+ media = self._parse_json(data_media, playlist_id, transform_source=unescapeHTML, fatal=False)
+ if not media.get('file') or not media.get('desc'):
+ continue
+ media_url = self._proto_relative_url(media['file'])
+ if media_url in media_urls:
+ continue
+ media_urls.add(media_url)
+ entry = base_data.copy()
+ entry.update({
+ 'id': compat_str(media['id']),
+ 'url': media_url,
+ 'duration': int_or_none(media.get('length')),
+ 'vcodec': 'none' if media.get('provider') == 'audio' else None,
+ })
+ entry_title = urllib.parse.unquote(media['desc'])
+ if entry_title:
+ entry['title'] = entry_title
+ yield entry
+
+
+class PolskieRadioLegacyIE(PolskieRadioBaseExtractor):
+ # legacy sites
+ IE_NAME = 'polskieradio:legacy'
+ _VALID_URL = r'https?://(?:www\.)?polskieradio(?:24)?\.pl/\d+/\d+/[Aa]rtykul/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.polskieradio.pl/8/2382/Artykul/2534482,Zagarysci-Poezja-jak-spoiwo',
+ 'info_dict': {
+ 'id': '2534482',
+ 'title': 'Żagaryści. Poezja jak spoiwo',
+ 'description': 'md5:f18d95d5dcba747a09b635e21a4c0695',
+ },
+ 'playlist': [{
+ 'md5': 'd07559829f61d5a93a75755987ded760',
+ 'info_dict': {
+ 'id': '2516679',
+ 'ext': 'mp3',
+ 'title': 'md5:c6e1234e0b747ad883cb91b7ad06b98c',
+ 'timestamp': 1592654400,
+ 'upload_date': '20200620',
+ 'duration': 1430,
+ 'thumbnail': r're:^https?://static\.prsa\.pl/images/.*\.jpg$'
+ },
+ }],
+ }, {
+ # PR4 audition - other frontend
+ 'url': 'https://www.polskieradio.pl/10/6071/Artykul/2610977,Poglos-29-pazdziernika-godz-2301',
+ 'info_dict': {
+ 'id': '2610977',
+ 'ext': 'mp3',
+ 'title': 'Pogłos 29 października godz. 23:01',
+ },
+ }, {
+ 'url': 'https://polskieradio24.pl/130/4503/Artykul/2621876,Narusza-nasza-suwerennosc-Publicysci-o-uzaleznieniu-funduszy-UE-od-praworzadnosci',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage, urlh = self._download_webpage_handle(url, playlist_id)
+ if PolskieRadioIE.suitable(urlh.url):
+ return self.url_result(urlh.url, PolskieRadioIE, playlist_id)
+
+ content = self._search_regex(
+ r'(?s)<div[^>]+class="\s*this-article\s*"[^>]*>(.+?)<div[^>]+class="tags"[^>]*>',
+ webpage, 'content', default=None)
+
+ timestamp = unified_timestamp(self._html_search_regex(
+ r'(?s)<span[^>]+id="datetime2"[^>]*>(.+?)</span>',
+ webpage, 'timestamp', default=None))
+
+ thumbnail_url = self._og_search_thumbnail(webpage, default=None)
+
+ title = self._og_search_title(webpage).strip()
+
+ description = strip_or_none(self._og_search_description(webpage, default=None))
+ description = description.replace('\xa0', ' ') if description is not None else None
+
+ if not content:
+ return {
+ 'id': playlist_id,
+ 'url': self._proto_relative_url(
+ self._search_regex(
+ r"source:\s*'(//static\.prsa\.pl/[^']+)'",
+ webpage, 'audition record url')),
+ 'title': title,
+ 'description': description,
+ 'timestamp': timestamp,
+ 'thumbnail': thumbnail_url,
+ }
+
+ entries = self._extract_webpage_player_entries(content, playlist_id, {
+ 'title': title,
+ 'timestamp': timestamp,
+ 'thumbnail': thumbnail_url,
+ })
+
+ return self.playlist_result(entries, playlist_id, title, description)
+
+
+class PolskieRadioIE(PolskieRadioBaseExtractor):
+ # new next.js sites
+ _VALID_URL = r'https?://(?:[^/]+\.)?(?:polskieradio(?:24)?|radiokierowcow)\.pl/artykul/(?P<id>\d+)'
+ _TESTS = [{
+ # articleData, attachments
+ 'url': 'https://jedynka.polskieradio.pl/artykul/1587943',
+ 'info_dict': {
+ 'id': '1587943',
+ 'title': 'Prof. Andrzej Nowak: o historii nie da się myśleć beznamiętnie',
+ 'description': 'md5:12f954edbf3120c5e7075e17bf9fc5c5',
+ },
+ 'playlist': [{
+ 'md5': '2984ee6ce9046d91fc233bc1a864a09a',
+ 'info_dict': {
+ 'id': '7a85d429-5356-4def-a347-925e4ae7406b',
+ 'ext': 'mp3',
+ 'title': 'md5:d4623290d4ac983bf924061c75c23a0d',
+ },
+ }],
+ }, {
+ # post, legacy html players
+ 'url': 'https://trojka.polskieradio.pl/artykul/2589163,Czy-wciaz-otrzymujemy-zdjecia-z-sond-Voyager',
+ 'info_dict': {
+ 'id': '2589163',
+ 'title': 'Czy wciąż otrzymujemy zdjęcia z sond Voyager?',
+ 'description': 'md5:cf1a7f348d63a2db9c0d7a63d1669473',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '2577880',
+ 'ext': 'mp3',
+ 'title': 'md5:a57d10a0c02abd34dd675cb33707ad5a',
+ 'duration': 321,
+ },
+ }],
+ }, {
+ # data, legacy
+ 'url': 'https://radiokierowcow.pl/artykul/2694529',
+ 'info_dict': {
+ 'id': '2694529',
+ 'title': 'Zielona fala reliktem przeszłości?',
+ 'description': 'md5:f20a9a7ed9cb58916c54add94eae3bc0',
+ },
+ 'playlist_count': 3,
+ }, {
+ 'url': 'https://trojka.polskieradio.pl/artykul/1632955',
+ 'only_matching': True,
+ }, {
+ # with mp4 video
+ 'url': 'https://trojka.polskieradio.pl/artykul/1634903',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://jedynka.polskieradio.pl/artykul/3042436,Polityka-wschodnia-ojca-i-syna-Wladyslawa-Lokietka-i-Kazimierza-Wielkiego',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ article_data = traverse_obj(
+ self._search_nextjs_data(webpage, playlist_id), (
+ 'props', 'pageProps', (('data', 'articleData'), 'post', 'data')), get_all=False)
+
+ title = strip_or_none(article_data['title'])
+
+ description = strip_or_none(article_data.get('lead'))
+
+ entries = [{
+ 'url': entry['file'],
+ 'ext': determine_ext(entry.get('fileName')),
+ 'id': self._search_regex(
+ r'([a-f\d]{8}-(?:[a-f\d]{4}-){3}[a-f\d]{12})', entry['file'], 'entry id'),
+ 'title': strip_or_none(entry.get('description')) or title,
+ } for entry in article_data.get('attachments') or () if entry.get('fileType') in ('Audio', )]
+
+ if not entries:
+ # some legacy articles have no json attachments, but players in body
+ entries = self._extract_webpage_player_entries(article_data['content'], playlist_id, {
+ 'title': title,
+ })
+
+ return self.playlist_result(entries, playlist_id, title, description)
+
+
+class PolskieRadioAuditionIE(InfoExtractor):
+ # new next.js sites
+ IE_NAME = 'polskieradio:audition'
+ _VALID_URL = r'https?://(?:[^/]+\.)?polskieradio\.pl/audycj[ae]/(?P<id>\d+)'
+ _TESTS = [{
+ # articles, PR1
+ 'url': 'https://jedynka.polskieradio.pl/audycje/5102',
+ 'info_dict': {
+ 'id': '5102',
+ 'title': 'Historia żywa',
+ 'thumbnail': r're:https://static\.prsa\.pl/images/.+',
+ },
+ 'playlist_mincount': 38,
+ }, {
+ # episodes, PR1
+ 'url': 'https://jedynka.polskieradio.pl/audycje/5769',
+ 'info_dict': {
+ 'id': '5769',
+ 'title': 'AgroFakty',
+ 'thumbnail': r're:https://static\.prsa\.pl/images/.+',
+ },
+ 'playlist_mincount': 269,
+ }, {
+ # both episodes and articles, PR3
+ 'url': 'https://trojka.polskieradio.pl/audycja/8906',
+ 'info_dict': {
+ 'id': '8906',
+ 'title': 'Trójka budzi',
+ 'thumbnail': r're:https://static\.prsa\.pl/images/.+',
+ },
+ 'playlist_mincount': 722,
+ }, {
+ # some articles were "promoted to main page" and thus link to old frontend
+ 'url': 'https://trojka.polskieradio.pl/audycja/305',
+ 'info_dict': {
+ 'id': '305',
+ 'title': 'Co w mowie piszczy?',
+ 'thumbnail': r're:https://static\.prsa\.pl/images/.+',
+ },
+ 'playlist_count': 1523,
+ }]
+
+ def _call_lp3(self, path, query, video_id, note):
+ return self._download_json(
+ f'https://lp3test.polskieradio.pl/{path}', video_id, note,
+ query=query, headers={'x-api-key': '9bf6c5a2-a7d0-4980-9ed7-a3f7291f2a81'})
+
+ def _entries(self, playlist_id, has_episodes, has_articles):
+ for i in itertools.count(0) if has_episodes else []:
+ page = self._call_lp3(
+ 'AudioArticle/GetListByCategoryId', {
+ 'categoryId': playlist_id,
+ 'PageSize': 10,
+ 'skip': i,
+ 'format': 400,
+ }, playlist_id, f'Downloading episode list page {i + 1}')
+ if not traverse_obj(page, 'data'):
+ break
+ for episode in page['data']:
+ yield {
+ 'id': str(episode['id']),
+ 'url': episode['file'],
+ 'title': episode.get('title'),
+ 'duration': int_or_none(episode.get('duration')),
+ 'timestamp': parse_iso8601(episode.get('datePublic')),
+ }
+
+ for i in itertools.count(0) if has_articles else []:
+ page = self._call_lp3(
+ 'Article/GetListByCategoryId', {
+ 'categoryId': playlist_id,
+ 'PageSize': 9,
+ 'skip': i,
+ 'format': 400,
+ }, playlist_id, f'Downloading article list page {i + 1}')
+ if not traverse_obj(page, 'data'):
+ break
+ for article in page['data']:
+ yield {
+ '_type': 'url_transparent',
+ 'id': str(article['id']),
+ 'url': article['url'],
+ 'title': article.get('shortTitle'),
+ 'description': traverse_obj(article, ('description', 'lead')),
+ 'timestamp': parse_iso8601(article.get('datePublic')),
+ }
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ page_props = traverse_obj(
+ self._search_nextjs_data(self._download_webpage(url, playlist_id), playlist_id),
+ ('props', 'pageProps', ('data', None)), get_all=False)
+
+ has_episodes = bool(traverse_obj(page_props, 'episodes', 'audios'))
+ has_articles = bool(traverse_obj(page_props, 'articles'))
+
+ return self.playlist_result(
+ self._entries(playlist_id, has_episodes, has_articles), playlist_id,
+ title=traverse_obj(page_props, ('details', 'name')),
+ description=traverse_obj(page_props, ('details', 'description', 'lead')),
+ thumbnail=traverse_obj(page_props, ('details', 'photo')))
+
+
+class PolskieRadioCategoryIE(InfoExtractor):
+ # legacy sites
+ IE_NAME = 'polskieradio:category'
+ _VALID_URL = r'https?://(?:www\.)?polskieradio\.pl/(?:\d+(?:,[^/]+)?/|[^/]+/Tag)(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://www.polskieradio.pl/37,RedakcjaKatolicka/4143,Kierunek-Krakow',
+ 'info_dict': {
+ 'id': '4143',
+ 'title': 'Kierunek Kraków',
+ },
+ 'playlist_mincount': 61
+ }, {
+ 'url': 'http://www.polskieradio.pl/10,czworka/214,muzyka',
+ 'info_dict': {
+ 'id': '214',
+ 'title': 'Muzyka',
+ },
+ 'playlist_mincount': 61
+ }, {
+ # billennium tabs
+ 'url': 'https://www.polskieradio.pl/8/2385',
+ 'info_dict': {
+ 'id': '2385',
+ 'title': 'Droga przez mąkę',
+ },
+ 'playlist_mincount': 111,
+ }, {
+ 'url': 'https://www.polskieradio.pl/10/4930',
+ 'info_dict': {
+ 'id': '4930',
+ 'title': 'Teraz K-pop!',
+ },
+ 'playlist_mincount': 392,
+ }, {
+ # post back pages, audio content directly without articles
+ 'url': 'https://www.polskieradio.pl/8,dwojka/7376,nowa-mowa',
+ 'info_dict': {
+ 'id': '7376',
+ 'title': 'Nowa mowa',
+ },
+ 'playlist_mincount': 244,
+ }, {
+ 'url': 'https://www.polskieradio.pl/Krzysztof-Dziuba/Tag175458',
+ 'info_dict': {
+ 'id': '175458',
+ 'title': 'Krzysztof Dziuba',
+ },
+ 'playlist_mincount': 420,
+ }, {
+ 'url': 'http://www.polskieradio.pl/8,Dwojka/196,Publicystyka',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if PolskieRadioLegacyIE.suitable(url) else super().suitable(url)
+
+ def _entries(self, url, page, category_id):
+ content = page
+ is_billennium_tabs = 'onclick="TB_LoadTab(' in page
+ is_post_back = 'onclick="__doPostBack(' in page
+ pagination = page if is_billennium_tabs else None
+ for page_num in itertools.count(2):
+ for a_entry, entry_id in re.findall(
+ r'(?s)<article[^>]+>.*?(<a[^>]+href=["\'](?:(?:https?)?://[^/]+)?/\d+/\d+/Artykul/(\d+)[^>]+>).*?</article>',
+ content):
+ entry = extract_attributes(a_entry)
+ if entry.get('href'):
+ yield self.url_result(
+ urljoin(url, entry['href']), PolskieRadioLegacyIE, entry_id, entry.get('title'))
+ for a_entry in re.findall(r'<span data-media=({[^ ]+})', content):
+ yield traverse_obj(self._parse_json(a_entry, category_id), {
+ 'url': 'file',
+ 'id': 'uid',
+ 'duration': 'length',
+ 'title': ('title', {urllib.parse.unquote}),
+ 'description': ('desc', {urllib.parse.unquote}),
+ })
+ if is_billennium_tabs:
+ params = self._search_json(
+ r'<div[^>]+class=["\']next["\'][^>]*>\s*<a[^>]+onclick=["\']TB_LoadTab\(',
+ pagination, 'next page params', category_id, default=None, close_objects=1,
+ contains_pattern='.+', transform_source=lambda x: '[%s' % js_to_json(unescapeHTML(x)))
+ if not params:
+ break
+ tab_content = self._download_json(
+ 'https://www.polskieradio.pl/CMS/TemplateBoxesManagement/TemplateBoxTabContent.aspx/GetTabContent',
+ category_id, f'Downloading page {page_num}', headers={'content-type': 'application/json'},
+ data=json.dumps(dict(zip((
+ 'boxInstanceId', 'tabId', 'categoryType', 'sectionId', 'categoryId', 'pagerMode',
+ 'subjectIds', 'tagIndexId', 'queryString', 'name', 'openArticlesInParentTemplate',
+ 'idSectionFromUrl', 'maxDocumentAge', 'showCategoryForArticle', 'pageNumber'
+ ), params))).encode())['d']
+ content, pagination = tab_content['Content'], tab_content.get('PagerContent')
+ elif is_post_back:
+ target = self._search_regex(
+ r'onclick=(?:["\'])__doPostBack\((?P<q1>["\'])(?P<target>[\w$]+)(?P=q1)\s*,\s*(?P<q2>["\'])Next(?P=q2)',
+ content, 'pagination postback target', group='target', default=None)
+ if not target:
+ break
+ content = self._download_webpage(
+ url, category_id, f'Downloading page {page_num}',
+ data=urllib.parse.urlencode({
+ **self._hidden_inputs(content),
+ '__EVENTTARGET': target,
+ '__EVENTARGUMENT': 'Next',
+ }).encode())
+ else:
+ next_url = urljoin(url, self._search_regex(
+ r'<div[^>]+class=["\']next["\'][^>]*>\s*<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1',
+ content, 'next page url', group='url', default=None))
+ if not next_url:
+ break
+ content = self._download_webpage(next_url, category_id, f'Downloading page {page_num}')
+
+ def _real_extract(self, url):
+ category_id = self._match_id(url)
+ webpage, urlh = self._download_webpage_handle(url, category_id)
+ if PolskieRadioAuditionIE.suitable(urlh.url):
+ return self.url_result(urlh.url, PolskieRadioAuditionIE, category_id)
+ title = self._html_search_regex(
+ r'<title>([^<]+)(?: - [^<]+ - [^<]+| w [Pp]olskie[Rr]adio\.pl\s*)</title>',
+ webpage, 'title', fatal=False)
+ return self.playlist_result(
+ self._entries(url, webpage, category_id),
+ category_id, title)
+
+
+class PolskieRadioPlayerIE(InfoExtractor):
+ IE_NAME = 'polskieradio:player'
+ _VALID_URL = r'https?://player\.polskieradio\.pl/anteny/(?P<id>[^/]+)'
+
+ _BASE_URL = 'https://player.polskieradio.pl'
+ _PLAYER_URL = 'https://player.polskieradio.pl/main.bundle.js'
+ _STATIONS_API_URL = 'https://apipr.polskieradio.pl/api/stacje'
+
+ _TESTS = [{
+ 'url': 'https://player.polskieradio.pl/anteny/trojka',
+ 'info_dict': {
+ 'id': '3',
+ 'ext': 'm4a',
+ 'title': 'Trójka',
+ },
+ 'params': {
+ 'format': 'bestaudio',
+ 'skip_download': 'endless stream',
+ },
+ }]
+
+ def _get_channel_list(self, channel_url='no_channel'):
+ player_code = self._download_webpage(
+ self._PLAYER_URL, channel_url,
+ note='Downloading js player')
+ channel_list = js_to_json(self._search_regex(
+ r';var r="anteny",a=(\[.+?\])},', player_code, 'channel list'))
+ return self._parse_json(channel_list, channel_url)
+
+ def _real_extract(self, url):
+ channel_url = self._match_id(url)
+ channel_list = self._get_channel_list(channel_url)
+
+ channel = next((c for c in channel_list if c.get('url') == channel_url), None)
+
+ if not channel:
+ raise ExtractorError('Channel not found')
+
+ station_list = self._download_json(self._STATIONS_API_URL, channel_url,
+ note='Downloading stream url list',
+ headers={
+ 'Accept': 'application/json',
+ 'Referer': url,
+ 'Origin': self._BASE_URL,
+ })
+ station = next((s for s in station_list
+ if s.get('Name') == (channel.get('streamName') or channel.get('name'))), None)
+ if not station:
+ raise ExtractorError('Station not found even though we extracted channel')
+
+ formats = []
+ for stream_url in station['Streams']:
+ stream_url = self._proto_relative_url(stream_url)
+ if stream_url.endswith('/playlist.m3u8'):
+ formats.extend(self._extract_m3u8_formats(stream_url, channel_url, live=True))
+ elif stream_url.endswith('/manifest.f4m'):
+ formats.extend(self._extract_mpd_formats(stream_url, channel_url))
+ elif stream_url.endswith('/Manifest'):
+ formats.extend(self._extract_ism_formats(stream_url, channel_url))
+ else:
+ formats.append({
+ 'url': stream_url,
+ })
+
+ return {
+ 'id': compat_str(channel['id']),
+ 'formats': formats,
+ 'title': channel.get('name') or channel.get('streamName'),
+ 'display_id': channel_url,
+ 'thumbnail': f'{self._BASE_URL}/images/{channel_url}-color-logo.png',
+ 'is_live': True,
+ }
+
+
+class PolskieRadioPodcastBaseExtractor(InfoExtractor):
+ _API_BASE = 'https://apipodcasts.polskieradio.pl/api'
+
+ def _parse_episode(self, data):
+ return {
+ 'id': data['guid'],
+ 'formats': [{
+ 'url': data['url'],
+ 'filesize': int_or_none(data.get('fileSize')),
+ }],
+ 'title': data['title'],
+ 'description': data.get('description'),
+ 'duration': int_or_none(data.get('length')),
+ 'timestamp': parse_iso8601(data.get('publishDate')),
+ 'thumbnail': url_or_none(data.get('image')),
+ 'series': data.get('podcastTitle'),
+ 'episode': data['title'],
+ }
+
+
+class PolskieRadioPodcastListIE(PolskieRadioPodcastBaseExtractor):
+ IE_NAME = 'polskieradio:podcast:list'
+ _VALID_URL = r'https?://podcasty\.polskieradio\.pl/podcast/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://podcasty.polskieradio.pl/podcast/8/',
+ 'info_dict': {
+ 'id': '8',
+ 'title': 'Śniadanie w Trójce',
+ 'description': 'md5:57abcc27bc4c6a6b25baa3061975b9ef',
+ 'uploader': 'Beata Michniewicz',
+ },
+ 'playlist_mincount': 714,
+ }]
+ _PAGE_SIZE = 10
+
+ def _call_api(self, podcast_id, page):
+ return self._download_json(
+ f'{self._API_BASE}/Podcasts/{podcast_id}/?pageSize={self._PAGE_SIZE}&page={page}',
+ podcast_id, f'Downloading page {page}')
+
+ def _real_extract(self, url):
+ podcast_id = self._match_id(url)
+ data = self._call_api(podcast_id, 1)
+
+ def get_page(page_num):
+ page_data = self._call_api(podcast_id, page_num + 1) if page_num else data
+ yield from (self._parse_episode(ep) for ep in page_data['items'])
+
+ return {
+ '_type': 'playlist',
+ 'entries': InAdvancePagedList(
+ get_page, math.ceil(data['itemCount'] / self._PAGE_SIZE), self._PAGE_SIZE),
+ 'id': str(data['id']),
+ 'title': data.get('title'),
+ 'description': data.get('description'),
+ 'uploader': data.get('announcer'),
+ }
+
+
+class PolskieRadioPodcastIE(PolskieRadioPodcastBaseExtractor):
+ IE_NAME = 'polskieradio:podcast'
+ _VALID_URL = r'https?://podcasty\.polskieradio\.pl/track/(?P<id>[a-f\d]{8}(?:-[a-f\d]{4}){4}[a-f\d]{8})'
+ _TESTS = [{
+ 'url': 'https://podcasty.polskieradio.pl/track/6eafe403-cb8f-4756-b896-4455c3713c32',
+ 'info_dict': {
+ 'id': '6eafe403-cb8f-4756-b896-4455c3713c32',
+ 'ext': 'mp3',
+ 'title': 'Theresa May rezygnuje. Co dalej z brexitem?',
+ 'description': 'md5:e41c409a29d022b70ef0faa61dbded60',
+ 'episode': 'Theresa May rezygnuje. Co dalej z brexitem?',
+ 'duration': 2893,
+ 'thumbnail': 'https://static.prsa.pl/images/58649376-c8a0-4ba2-a714-78b383285f5f.jpg',
+ 'series': 'Raport o stanie świata',
+ },
+ }]
+
+ def _real_extract(self, url):
+ podcast_id = self._match_id(url)
+ data = self._download_json(
+ f'{self._API_BASE}/audio',
+ podcast_id, 'Downloading podcast metadata',
+ data=json.dumps({
+ 'guids': [podcast_id],
+ }).encode('utf-8'),
+ headers={
+ 'Content-Type': 'application/json',
+ })
+ return self._parse_episode(data[0])
diff --git a/yt_dlp/extractor/popcorntimes.py b/yt_dlp/extractor/popcorntimes.py
new file mode 100644
index 0000000..ddc5ec8
--- /dev/null
+++ b/yt_dlp/extractor/popcorntimes.py
@@ -0,0 +1,91 @@
+from .common import InfoExtractor
+from ..compat import compat_b64decode
+from ..utils import int_or_none
+
+
+class PopcorntimesIE(InfoExtractor):
+ _VALID_URL = r'https?://popcorntimes\.tv/[^/]+/m/(?P<id>[^/]+)/(?P<display_id>[^/?#&]+)'
+ _TEST = {
+ 'url': 'https://popcorntimes.tv/de/m/A1XCFvz/haensel-und-gretel-opera-fantasy',
+ 'md5': '93f210991ad94ba8c3485950a2453257',
+ 'info_dict': {
+ 'id': 'A1XCFvz',
+ 'display_id': 'haensel-und-gretel-opera-fantasy',
+ 'ext': 'mp4',
+ 'title': 'Hänsel und Gretel',
+ 'description': 'md5:1b8146791726342e7b22ce8125cf6945',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'creator': 'John Paul',
+ 'release_date': '19541009',
+ 'duration': 4260,
+ 'tbr': 5380,
+ 'width': 720,
+ 'height': 540,
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id, display_id = mobj.group('id', 'display_id')
+
+ webpage = self._download_webpage(url, display_id)
+
+ title = self._search_regex(
+ r'<h1>([^<]+)', webpage, 'title',
+ default=None) or self._html_search_meta(
+ 'ya:ovs:original_name', webpage, 'title', fatal=True)
+
+ loc = self._search_regex(
+ r'PCTMLOC\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, 'loc',
+ group='value')
+
+ loc_b64 = ''
+ for c in loc:
+ c_ord = ord(c)
+ if ord('a') <= c_ord <= ord('z') or ord('A') <= c_ord <= ord('Z'):
+ upper = ord('Z') if c_ord <= ord('Z') else ord('z')
+ c_ord += 13
+ if upper < c_ord:
+ c_ord -= 26
+ loc_b64 += chr(c_ord)
+
+ video_url = compat_b64decode(loc_b64).decode('utf-8')
+
+ description = self._html_search_regex(
+ r'(?s)<div[^>]+class=["\']pt-movie-desc[^>]+>(.+?)</div>', webpage,
+ 'description', fatal=False)
+
+ thumbnail = self._search_regex(
+ r'<img[^>]+class=["\']video-preview[^>]+\bsrc=(["\'])(?P<value>(?:(?!\1).)+)\1',
+ webpage, 'thumbnail', default=None,
+ group='value') or self._og_search_thumbnail(webpage)
+
+ creator = self._html_search_meta(
+ 'video:director', webpage, 'creator', default=None)
+
+ release_date = self._html_search_meta(
+ 'video:release_date', webpage, default=None)
+ if release_date:
+ release_date = release_date.replace('-', '')
+
+ def int_meta(name):
+ return int_or_none(self._html_search_meta(
+ name, webpage, default=None))
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'creator': creator,
+ 'release_date': release_date,
+ 'duration': int_meta('video:duration'),
+ 'tbr': int_meta('ya:ovs:bitrate'),
+ 'width': int_meta('og:video:width'),
+ 'height': int_meta('og:video:height'),
+ 'http_headers': {
+ 'Referer': url,
+ },
+ }
diff --git a/yt_dlp/extractor/popcorntv.py b/yt_dlp/extractor/popcorntv.py
new file mode 100644
index 0000000..7798462
--- /dev/null
+++ b/yt_dlp/extractor/popcorntv.py
@@ -0,0 +1,72 @@
+from .common import InfoExtractor
+from ..utils import (
+ extract_attributes,
+ int_or_none,
+ unified_timestamp,
+)
+
+
+class PopcornTVIE(InfoExtractor):
+ _VALID_URL = r'https?://[^/]+\.popcorntv\.it/guarda/(?P<display_id>[^/]+)/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://animemanga.popcorntv.it/guarda/food-wars-battaglie-culinarie-episodio-01/9183',
+ 'md5': '47d65a48d147caf692ab8562fe630b45',
+ 'info_dict': {
+ 'id': '9183',
+ 'display_id': 'food-wars-battaglie-culinarie-episodio-01',
+ 'ext': 'mp4',
+ 'title': 'Food Wars, Battaglie Culinarie | Episodio 01',
+ 'description': 'md5:b8bea378faae4651d3b34c6e112463d0',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1497610857,
+ 'upload_date': '20170616',
+ 'duration': 1440,
+ 'view_count': int,
+ },
+ }, {
+ 'url': 'https://cinema.popcorntv.it/guarda/smash-cut/10433',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ display_id, video_id = mobj.group('display_id', 'id')
+
+ webpage = self._download_webpage(url, display_id)
+
+ m3u8_url = extract_attributes(
+ self._search_regex(
+ r'(<link[^>]+itemprop=["\'](?:content|embed)Url[^>]*>)',
+ webpage, 'content'
+ ))['href']
+
+ formats = self._extract_m3u8_formats(
+ m3u8_url, display_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls')
+
+ title = self._search_regex(
+ r'<h1[^>]+itemprop=["\']name[^>]*>([^<]+)', webpage,
+ 'title', default=None) or self._og_search_title(webpage)
+
+ description = self._html_search_regex(
+ r'(?s)<article[^>]+itemprop=["\']description[^>]*>(.+?)</article>',
+ webpage, 'description', fatal=False)
+ thumbnail = self._og_search_thumbnail(webpage)
+ timestamp = unified_timestamp(self._html_search_meta(
+ 'uploadDate', webpage, 'timestamp'))
+ duration = int_or_none(self._html_search_meta(
+ 'duration', webpage), invscale=60)
+ view_count = int_or_none(self._html_search_meta(
+ 'interactionCount', webpage, 'view count'))
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/porn91.py b/yt_dlp/extractor/porn91.py
new file mode 100644
index 0000000..7d16a16
--- /dev/null
+++ b/yt_dlp/extractor/porn91.py
@@ -0,0 +1,95 @@
+import urllib.parse
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ parse_duration,
+ remove_end,
+ unified_strdate,
+ ExtractorError,
+)
+
+
+class Porn91IE(InfoExtractor):
+ IE_NAME = '91porn'
+ _VALID_URL = r'(?:https?://)(?:www\.|)91porn\.com/view_video.php\?([^#]+&)?viewkey=(?P<id>\w+)'
+
+ _TESTS = [{
+ 'url': 'http://91porn.com/view_video.php?viewkey=7e42283b4f5ab36da134',
+ 'md5': 'd869db281402e0ef4ddef3c38b866f86',
+ 'info_dict': {
+ 'id': '7e42283b4f5ab36da134',
+ 'title': '18岁大一漂亮学妹,水嫩性感,再爽一次!',
+ 'description': 'md5:1ff241f579b07ae936a54e810ad2e891',
+ 'ext': 'mp4',
+ 'duration': 431,
+ 'upload_date': '20150520',
+ 'comment_count': int,
+ 'view_count': int,
+ 'age_limit': 18,
+ }
+ }, {
+ 'url': 'https://91porn.com/view_video.php?viewkey=7ef0cf3d362c699ab91c',
+ 'md5': 'f8fd50540468a6d795378cd778b40226',
+ 'info_dict': {
+ 'id': '7ef0cf3d362c699ab91c',
+ 'title': '真实空乘,冲上云霄第二部',
+ 'description': 'md5:618bf9652cafcc66cd277bd96789baea',
+ 'ext': 'mp4',
+ 'duration': 248,
+ 'upload_date': '20221119',
+ 'comment_count': int,
+ 'view_count': int,
+ 'age_limit': 18,
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ self._set_cookie('91porn.com', 'language', 'cn_CN')
+
+ webpage = self._download_webpage(
+ 'http://91porn.com/view_video.php?viewkey=%s' % video_id, video_id)
+
+ if '视频不存在,可能已经被删除或者被举报为不良内容!' in webpage:
+ raise ExtractorError('91 Porn says: Video does not exist', expected=True)
+
+ daily_limit = self._search_regex(
+ r'作为游客,你每天只可观看([\d]+)个视频', webpage, 'exceeded daily limit', default=None, fatal=False)
+ if daily_limit:
+ raise ExtractorError(f'91 Porn says: Daily limit {daily_limit} videos exceeded', expected=True)
+
+ video_link_url = self._search_regex(
+ r'document\.write\(\s*strencode2\s*\(\s*((?:"[^"]+")|(?:\'[^\']+\'))', webpage, 'video link')
+ video_link_url = self._search_regex(
+ r'src=["\']([^"\']+)["\']', urllib.parse.unquote(video_link_url), 'unquoted video link')
+
+ formats, subtitles = self._get_formats_and_subtitle(video_link_url, video_id)
+
+ return {
+ 'id': video_id,
+ 'title': remove_end(self._html_extract_title(webpage).replace('\n', ''), 'Chinese homemade video').strip(),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'upload_date': unified_strdate(self._search_regex(
+ r'<span\s+class=["\']title-yakov["\']>(\d{4}-\d{2}-\d{2})</span>', webpage, 'upload_date', fatal=False)),
+ 'description': self._html_search_regex(
+ r'<span\s+class=["\']more title["\']>\s*([^<]+)', webpage, 'description', fatal=False),
+ 'duration': parse_duration(self._search_regex(
+ r'时长:\s*<span[^>]*>\s*(\d+(?::\d+){1,2})', webpage, 'duration', fatal=False)),
+ 'comment_count': int_or_none(self._search_regex(
+ r'留言:\s*<span[^>]*>\s*(\d+)\s*</span>', webpage, 'comment count', fatal=False)),
+ 'view_count': int_or_none(self._search_regex(
+ r'热度:\s*<span[^>]*>\s*(\d+)\s*</span>', webpage, 'view count', fatal=False)),
+ 'age_limit': 18,
+ }
+
+ def _get_formats_and_subtitle(self, video_link_url, video_id):
+ ext = determine_ext(video_link_url)
+ if ext == 'm3u8':
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_link_url, video_id, ext='mp4')
+ else:
+ formats = [{'url': video_link_url, 'ext': ext}]
+ subtitles = {}
+
+ return formats, subtitles
diff --git a/yt_dlp/extractor/pornbox.py b/yt_dlp/extractor/pornbox.py
new file mode 100644
index 0000000..c381382
--- /dev/null
+++ b/yt_dlp/extractor/pornbox.py
@@ -0,0 +1,113 @@
+from .common import InfoExtractor
+from ..compat import functools
+from ..utils import (
+ int_or_none,
+ parse_duration,
+ parse_iso8601,
+ qualities,
+ str_or_none,
+ traverse_obj,
+ url_or_none,
+)
+
+
+class PornboxIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?pornbox\.com/application/watch-page/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://pornbox.com/application/watch-page/212108',
+ 'md5': '3ff6b6e206f263be4c5e987a3162ac6e',
+ 'info_dict': {
+ 'id': '212108',
+ 'ext': 'mp4',
+ 'title': 'md5:ececc5c6e6c9dd35d290c45fed05fd49',
+ 'uploader': 'Lily Strong',
+ 'timestamp': 1665871200,
+ 'upload_date': '20221015',
+ 'age_limit': 18,
+ 'availability': 'needs_auth',
+ 'duration': 1505,
+ 'cast': ['Lily Strong', 'John Strong'],
+ 'tags': 'count:11',
+ 'description': 'md5:589c7f33e183aa8aa939537300efb859',
+ 'thumbnail': r're:^https?://cdn-image\.gtflixtv\.com.*\.jpg.*$'
+ }
+ }, {
+ 'url': 'https://pornbox.com/application/watch-page/216045',
+ 'info_dict': {
+ 'id': '216045',
+ 'title': 'md5:3e48528e73a9a2b12f7a2772ed0b26a2',
+ 'description': 'md5:3e631dcaac029f15ed434e402d1b06c7',
+ 'uploader': 'VK Studio',
+ 'timestamp': 1618264800,
+ 'upload_date': '20210412',
+ 'age_limit': 18,
+ 'availability': 'premium_only',
+ 'duration': 2710,
+ 'cast': 'count:3',
+ 'tags': 'count:29',
+ 'thumbnail': r're:^https?://cdn-image\.gtflixtv\.com.*\.jpg.*$',
+ 'subtitles': 'count:6'
+ },
+ 'params': {
+ 'skip_download': True,
+ 'ignore_no_formats_error': True
+ },
+ 'expected_warnings': [
+ 'You are either not logged in or do not have access to this scene',
+ 'No video formats found', 'Requested format is not available']
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ public_data = self._download_json(f'https://pornbox.com/contents/{video_id}', video_id)
+
+ subtitles = {country_code: [{
+ 'url': f'https://pornbox.com/contents/{video_id}/subtitles/{country_code}',
+ 'ext': 'srt'
+ }] for country_code in traverse_obj(public_data, ('subtitles', ..., {str}))}
+
+ is_free_scene = traverse_obj(
+ public_data, ('price', 'is_available_for_free', {bool}), default=False)
+
+ metadata = {
+ 'id': video_id,
+ **traverse_obj(public_data, {
+ 'title': ('scene_name', {str.strip}),
+ 'description': ('small_description', {str.strip}),
+ 'uploader': 'studio',
+ 'duration': ('runtime', {parse_duration}),
+ 'cast': (('models', 'male_models'), ..., 'model_name'),
+ 'thumbnail': ('player_poster', {url_or_none}),
+ 'tags': ('niches', ..., 'niche'),
+ }),
+ 'age_limit': 18,
+ 'timestamp': parse_iso8601(traverse_obj(
+ public_data, ('studios', 'release_date'), 'publish_date')),
+ 'availability': self._availability(needs_auth=True, needs_premium=not is_free_scene),
+ 'subtitles': subtitles,
+ }
+
+ if not public_data.get('is_purchased') or not is_free_scene:
+ self.raise_login_required(
+ 'You are either not logged in or do not have access to this scene', metadata_available=True)
+ return metadata
+
+ media_id = traverse_obj(public_data, (
+ 'medias', lambda _, v: v['title'] == 'Full video', 'media_id', {int}), get_all=False)
+ if not media_id:
+ self.raise_no_formats('Could not find stream id', video_id=video_id)
+
+ stream_data = self._download_json(
+ f'https://pornbox.com/media/{media_id}/stream', video_id=video_id, note='Getting manifest urls')
+
+ get_quality = qualities(['web', 'vga', 'hd', '1080p', '4k', '8k'])
+ metadata['formats'] = traverse_obj(stream_data, ('qualities', lambda _, v: v['src'], {
+ 'url': 'src',
+ 'vbr': ('bitrate', {functools.partial(int_or_none, scale=1000)}),
+ 'format_id': ('quality', {str_or_none}),
+ 'quality': ('quality', {get_quality}),
+ 'width': ('size', {lambda x: int(x[:-1])}),
+ }))
+
+ return metadata
diff --git a/yt_dlp/extractor/pornflip.py b/yt_dlp/extractor/pornflip.py
new file mode 100644
index 0000000..51a9cf3
--- /dev/null
+++ b/yt_dlp/extractor/pornflip.py
@@ -0,0 +1,77 @@
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_duration,
+ parse_iso8601
+)
+
+
+class PornFlipIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?pornflip\.com/(?:(embed|sv|v)/)?(?P<id>[^/]+)'
+ _TESTS = [
+ {
+ 'url': 'https://www.pornflip.com/dzv9Mtw1qj2/sv/brazzers-double-dare-two-couples-fucked-jenna-reid-maya-bijou',
+ 'info_dict': {
+ 'id': 'dzv9Mtw1qj2',
+ 'ext': 'mp4',
+ 'title': 'Brazzers - Double Dare Two couples fucked Jenna Reid Maya Bijou',
+ 'description': 'md5:d2b69e6cc743c5fd158e162aa7f05821',
+ 'duration': 476,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'view_count': int,
+ 'timestamp': 1617846819,
+ 'upload_date': '20210408',
+ 'uploader': 'Brazzers',
+ 'age_limit': 18,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'https://www.pornflip.com/v/IrJEC40i21L',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://www.pornflip.com/Z3jzbChC5-P/sexintaxi-e-sereyna-gomez-czech-naked-couple',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://www.pornflip.com/embed/bLcDFxnrZnU',
+ 'only_matching': True,
+ },
+ ]
+ _HOST = 'www.pornflip.com'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(
+ 'https://{}/sv/{}'.format(self._HOST, video_id), video_id, headers={'host': self._HOST})
+ description = self._html_search_regex(r'&p\[summary\]=(.*?)\s*&p', webpage, 'description', fatal=False)
+ duration = self._search_regex(r'"duration":\s+"([^"]+)",', webpage, 'duration', fatal=False)
+ view_count = self._search_regex(r'"interactionCount":\s+"([^"]+)"', webpage, 'view_count', fatal=False)
+ title = self._html_search_regex(r'id="mediaPlayerTitleLink"[^>]*>(.+)</a>', webpage, 'title', fatal=False)
+ uploader = self._html_search_regex(r'class="title-chanel"[^>]*>[^<]*<a[^>]*>([^<]+)<', webpage, 'uploader', fatal=False)
+ upload_date = self._search_regex(r'"uploadDate":\s+"([^"]+)",', webpage, 'upload_date', fatal=False)
+ likes = self._html_search_regex(
+ r'class="btn btn-up-rating[^>]*>[^<]*<i[^>]*>[^<]*</i>[^>]*<span[^>]*>[^0-9]*([0-9]+)[^<0-9]*<', webpage, 'like_count', fatal=False)
+ dislikes = self._html_search_regex(
+ r'class="btn btn-down-rating[^>]*>[^<]*<i[^>]*>[^<]*</i>[^>]*<span[^>]*>[^0-9]*([0-9]+)[^<0-9]*<', webpage, 'dislike_count', fatal=False)
+ mpd_url = self._search_regex(r'"([^"]+userscontent.net/dash/[0-9]+/manifest.mpd[^"]*)"', webpage, 'mpd_url').replace('&amp;', '&')
+ formats = self._extract_mpd_formats(mpd_url, video_id, mpd_id='dash')
+
+ return {
+ 'age_limit': 18,
+ 'description': description,
+ 'dislike_count': int_or_none(dislikes),
+ 'duration': parse_duration(duration),
+ 'formats': formats,
+ 'id': video_id,
+ 'like_count': int_or_none(likes),
+ 'timestamp': parse_iso8601(upload_date),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'title': title,
+ 'uploader': uploader,
+ 'view_count': int_or_none(view_count),
+ }
diff --git a/yt_dlp/extractor/pornhub.py b/yt_dlp/extractor/pornhub.py
new file mode 100644
index 0000000..29a3e43
--- /dev/null
+++ b/yt_dlp/extractor/pornhub.py
@@ -0,0 +1,825 @@
+import functools
+import itertools
+import math
+import operator
+import re
+
+from .common import InfoExtractor
+from .openload import PhantomJSwrapper
+from ..compat import compat_str
+from ..networking import Request
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ NO_DEFAULT,
+ ExtractorError,
+ clean_html,
+ determine_ext,
+ format_field,
+ int_or_none,
+ merge_dicts,
+ orderedSet,
+ remove_quotes,
+ remove_start,
+ str_to_int,
+ update_url_query,
+ url_or_none,
+ urlencode_postdata,
+)
+
+
+class PornHubBaseIE(InfoExtractor):
+ _NETRC_MACHINE = 'pornhub'
+ _PORNHUB_HOST_RE = r'(?:(?P<host>pornhub(?:premium)?\.(?:com|net|org))|pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd\.onion)'
+
+ def _download_webpage_handle(self, *args, **kwargs):
+ def dl(*args, **kwargs):
+ return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs)
+
+ ret = dl(*args, **kwargs)
+
+ if not ret:
+ return ret
+
+ webpage, urlh = ret
+
+ if any(re.search(p, webpage) for p in (
+ r'<body\b[^>]+\bonload=["\']go\(\)',
+ r'document\.cookie\s*=\s*["\']RNKEY=',
+ r'document\.location\.reload\(true\)')):
+ url_or_request = args[0]
+ url = (url_or_request.url
+ if isinstance(url_or_request, Request)
+ else url_or_request)
+ phantom = PhantomJSwrapper(self, required_version='2.0')
+ phantom.get(url, html=webpage)
+ webpage, urlh = dl(*args, **kwargs)
+
+ return webpage, urlh
+
+ def _real_initialize(self):
+ self._logged_in = False
+
+ def _set_age_cookies(self, host):
+ self._set_cookie(host, 'age_verified', '1')
+ self._set_cookie(host, 'accessAgeDisclaimerPH', '1')
+ self._set_cookie(host, 'accessAgeDisclaimerUK', '1')
+ self._set_cookie(host, 'accessPH', '1')
+
+ def _login(self, host):
+ if self._logged_in:
+ return
+
+ site = host.split('.')[0]
+
+ # Both sites pornhub and pornhubpremium have separate accounts
+ # so there should be an option to provide credentials for both.
+ # At the same time some videos are available under the same video id
+ # on both sites so that we have to identify them as the same video.
+ # For that purpose we have to keep both in the same extractor
+ # but under different netrc machines.
+ username, password = self._get_login_info(netrc_machine=site)
+ if username is None:
+ return
+
+ login_url = 'https://www.%s/%slogin' % (host, 'premium/' if 'premium' in host else '')
+ login_page = self._download_webpage(
+ login_url, None, 'Downloading %s login page' % site)
+
+ def is_logged(webpage):
+ return any(re.search(p, webpage) for p in (
+ r'id="profileMenuDropdown"',
+ r'class="ph-icon-logout"'))
+
+ if is_logged(login_page):
+ self._logged_in = True
+ return
+
+ login_form = self._hidden_inputs(login_page)
+
+ login_form.update({
+ 'username': username,
+ 'password': password,
+ })
+
+ response = self._download_json(
+ 'https://www.%s/front/authenticate' % host, None,
+ 'Logging in to %s' % site,
+ data=urlencode_postdata(login_form),
+ headers={
+ 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
+ 'Referer': login_url,
+ 'X-Requested-With': 'XMLHttpRequest',
+ })
+
+ if response.get('success') == '1':
+ self._logged_in = True
+ return
+
+ message = response.get('message')
+ if message is not None:
+ raise ExtractorError(
+ 'Unable to login: %s' % message, expected=True)
+
+ raise ExtractorError('Unable to log in')
+
+
+class PornHubIE(PornHubBaseIE):
+ IE_DESC = 'PornHub and Thumbzilla'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:[^/]+\.)?
+ %s
+ /(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
+ (?:www\.)?thumbzilla\.com/video/
+ )
+ (?P<id>[\da-z]+)
+ ''' % PornHubBaseIE._PORNHUB_HOST_RE
+ _EMBED_REGEX = [r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub(?:premium)?\.(?:com|net|org)/embed/[\da-z]+)']
+ _TESTS = [{
+ 'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
+ 'md5': 'a6391306d050e4547f62b3f485dd9ba9',
+ 'info_dict': {
+ 'id': '648719015',
+ 'ext': 'mp4',
+ 'title': 'Seductive Indian beauty strips down and fingers her pink pussy',
+ 'uploader': 'Babes',
+ 'upload_date': '20130628',
+ 'timestamp': 1372447216,
+ 'duration': 361,
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'comment_count': int,
+ 'age_limit': 18,
+ 'tags': list,
+ 'categories': list,
+ 'cast': list,
+ },
+ }, {
+ # non-ASCII title
+ 'url': 'http://www.pornhub.com/view_video.php?viewkey=1331683002',
+ 'info_dict': {
+ 'id': '1331683002',
+ 'ext': 'mp4',
+ 'title': '重庆婷婷女王足交',
+ 'upload_date': '20150213',
+ 'timestamp': 1423804862,
+ 'duration': 1753,
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'comment_count': int,
+ 'age_limit': 18,
+ 'tags': list,
+ 'categories': list,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'Video has been flagged for verification in accordance with our trust and safety policy',
+ }, {
+ # subtitles
+ 'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5af5fef7c2aa7',
+ 'info_dict': {
+ 'id': 'ph5af5fef7c2aa7',
+ 'ext': 'mp4',
+ 'title': 'BFFS - Cute Teen Girls Share Cock On the Floor',
+ 'uploader': 'BFFs',
+ 'duration': 622,
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'comment_count': int,
+ 'age_limit': 18,
+ 'tags': list,
+ 'categories': list,
+ 'subtitles': {
+ 'en': [{
+ "ext": 'srt'
+ }]
+ },
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'This video has been disabled',
+ }, {
+ 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph601dc30bae19a',
+ 'info_dict': {
+ 'id': 'ph601dc30bae19a',
+ 'uploader': 'Projekt Melody',
+ 'uploader_id': 'projekt-melody',
+ 'upload_date': '20210205',
+ 'title': '"Welcome to My Pussy Mansion" - CB Stream (02/03/21)',
+ 'thumbnail': r're:https?://.+',
+ },
+ }, {
+ 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
+ 'only_matching': True,
+ }, {
+ # removed at the request of cam4.com
+ 'url': 'http://fr.pornhub.com/view_video.php?viewkey=ph55ca2f9760862',
+ 'only_matching': True,
+ }, {
+ # removed at the request of the copyright owner
+ 'url': 'http://www.pornhub.com/view_video.php?viewkey=788152859',
+ 'only_matching': True,
+ }, {
+ # removed by uploader
+ 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph572716d15a111',
+ 'only_matching': True,
+ }, {
+ # private video
+ 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph56fd731fce6b7',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.thumbzilla.com/video/ph56c6114abd99a/horny-girlfriend-sex',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.pornhub.com/video/show?viewkey=648719015',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.net/view_video.php?viewkey=203640933',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.org/view_video.php?viewkey=203640933',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5e4acdae54a82',
+ 'only_matching': True,
+ }, {
+ # Some videos are available with the same id on both premium
+ # and non-premium sites (e.g. this and the following test)
+ 'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5f75b0f4b18e3',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5f75b0f4b18e3',
+ 'only_matching': True,
+ }, {
+ # geo restricted
+ 'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5a9813bfa7156',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd.onion/view_video.php?viewkey=ph5a9813bfa7156',
+ 'only_matching': True,
+ }]
+
+ def _extract_count(self, pattern, webpage, name):
+ return str_to_int(self._search_regex(pattern, webpage, '%s count' % name, default=None))
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ host = mobj.group('host') or 'pornhub.com'
+ video_id = mobj.group('id')
+
+ self._login(host)
+ self._set_age_cookies(host)
+
+ def dl_webpage(platform):
+ self._set_cookie(host, 'platform', platform)
+ return self._download_webpage(
+ 'https://www.%s/view_video.php?viewkey=%s' % (host, video_id),
+ video_id, 'Downloading %s webpage' % platform)
+
+ webpage = dl_webpage('pc')
+
+ error_msg = self._html_search_regex(
+ (r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>',
+ r'(?s)<section[^>]+class=["\']noVideo["\'][^>]*>(?P<error>.+?)</section>'),
+ webpage, 'error message', default=None, group='error')
+ if error_msg:
+ error_msg = re.sub(r'\s+', ' ', error_msg)
+ raise ExtractorError(
+ 'PornHub said: %s' % error_msg,
+ expected=True, video_id=video_id)
+
+ if any(re.search(p, webpage) for p in (
+ r'class=["\']geoBlocked["\']',
+ r'>\s*This content is unavailable in your country')):
+ self.raise_geo_restricted()
+
+ # video_title from flashvars contains whitespace instead of non-ASCII (see
+ # http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
+ # on that anymore.
+ title = self._html_search_meta(
+ 'twitter:title', webpage, default=None) or self._html_search_regex(
+ (r'(?s)<h1[^>]+class=["\']title["\'][^>]*>(?P<title>.+?)</h1>',
+ r'<div[^>]+data-video-title=(["\'])(?P<title>(?:(?!\1).)+)\1',
+ r'shareTitle["\']\s*[=:]\s*(["\'])(?P<title>(?:(?!\1).)+)\1'),
+ webpage, 'title', group='title')
+
+ video_urls = []
+ video_urls_set = set()
+ subtitles = {}
+
+ flashvars = self._parse_json(
+ self._search_regex(
+ r'var\s+flashvars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'),
+ video_id)
+ if flashvars:
+ subtitle_url = url_or_none(flashvars.get('closedCaptionsFile'))
+ if subtitle_url:
+ subtitles.setdefault('en', []).append({
+ 'url': subtitle_url,
+ 'ext': 'srt',
+ })
+ thumbnail = flashvars.get('image_url')
+ duration = int_or_none(flashvars.get('video_duration'))
+ media_definitions = flashvars.get('mediaDefinitions')
+ if isinstance(media_definitions, list):
+ for definition in media_definitions:
+ if not isinstance(definition, dict):
+ continue
+ video_url = definition.get('videoUrl')
+ if not video_url or not isinstance(video_url, compat_str):
+ continue
+ if video_url in video_urls_set:
+ continue
+ video_urls_set.add(video_url)
+ video_urls.append(
+ (video_url, int_or_none(definition.get('quality'))))
+ else:
+ thumbnail, duration = [None] * 2
+
+ def extract_js_vars(webpage, pattern, default=NO_DEFAULT):
+ assignments = self._search_regex(
+ pattern, webpage, 'encoded url', default=default)
+ if not assignments:
+ return {}
+
+ assignments = assignments.split(';')
+
+ js_vars = {}
+
+ def parse_js_value(inp):
+ inp = re.sub(r'/\*(?:(?!\*/).)*?\*/', '', inp)
+ if '+' in inp:
+ inps = inp.split('+')
+ return functools.reduce(
+ operator.concat, map(parse_js_value, inps))
+ inp = inp.strip()
+ if inp in js_vars:
+ return js_vars[inp]
+ return remove_quotes(inp)
+
+ for assn in assignments:
+ assn = assn.strip()
+ if not assn:
+ continue
+ assn = re.sub(r'var\s+', '', assn)
+ vname, value = assn.split('=', 1)
+ js_vars[vname] = parse_js_value(value)
+ return js_vars
+
+ def add_video_url(video_url):
+ v_url = url_or_none(video_url)
+ if not v_url:
+ return
+ if v_url in video_urls_set:
+ return
+ video_urls.append((v_url, None))
+ video_urls_set.add(v_url)
+
+ def parse_quality_items(quality_items):
+ q_items = self._parse_json(quality_items, video_id, fatal=False)
+ if not isinstance(q_items, list):
+ return
+ for item in q_items:
+ if isinstance(item, dict):
+ add_video_url(item.get('url'))
+
+ if not video_urls:
+ FORMAT_PREFIXES = ('media', 'quality', 'qualityItems')
+ js_vars = extract_js_vars(
+ webpage, r'(var\s+(?:%s)_.+)' % '|'.join(FORMAT_PREFIXES),
+ default=None)
+ if js_vars:
+ for key, format_url in js_vars.items():
+ if key.startswith(FORMAT_PREFIXES[-1]):
+ parse_quality_items(format_url)
+ elif any(key.startswith(p) for p in FORMAT_PREFIXES[:2]):
+ add_video_url(format_url)
+ if not video_urls and re.search(
+ r'<[^>]+\bid=["\']lockedPlayer', webpage):
+ raise ExtractorError(
+ 'Video %s is locked' % video_id, expected=True)
+
+ if not video_urls:
+ js_vars = extract_js_vars(
+ dl_webpage('tv'), r'(var.+?mediastring.+?)</script>')
+ add_video_url(js_vars['mediastring'])
+
+ for mobj in re.finditer(
+ r'<a[^>]+\bclass=["\']downloadBtn\b[^>]+\bhref=(["\'])(?P<url>(?:(?!\1).)+)\1',
+ webpage):
+ video_url = mobj.group('url')
+ if video_url not in video_urls_set:
+ video_urls.append((video_url, None))
+ video_urls_set.add(video_url)
+
+ upload_date = None
+ formats = []
+
+ def add_format(format_url, height=None):
+ ext = determine_ext(format_url)
+ if ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ format_url, video_id, mpd_id='dash', fatal=False))
+ return
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ return
+ if not height:
+ height = int_or_none(self._search_regex(
+ r'(?P<height>\d+)[pP]?_\d+[kK]', format_url, 'height',
+ default=None))
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_field(height, None, '%dp'),
+ 'height': height,
+ })
+
+ for video_url, height in video_urls:
+ if not upload_date:
+ upload_date = self._search_regex(
+ r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None)
+ if upload_date:
+ upload_date = upload_date.replace('/', '')
+ if '/video/get_media' in video_url:
+ medias = self._download_json(video_url, video_id, fatal=False)
+ if isinstance(medias, list):
+ for media in medias:
+ if not isinstance(media, dict):
+ continue
+ video_url = url_or_none(media.get('videoUrl'))
+ if not video_url:
+ continue
+ height = int_or_none(media.get('quality'))
+ add_format(video_url, height)
+ continue
+ add_format(video_url)
+
+ model_profile = self._search_json(
+ r'var\s+MODEL_PROFILE\s*=', webpage, 'model profile', video_id, fatal=False)
+ video_uploader = self._html_search_regex(
+ r'(?s)From:&nbsp;.+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
+ webpage, 'uploader', default=None) or model_profile.get('username')
+
+ def extract_vote_count(kind, name):
+ return self._extract_count(
+ (r'<span[^>]+\bclass="votes%s"[^>]*>([\d,\.]+)</span>' % kind,
+ r'<span[^>]+\bclass=["\']votes%s["\'][^>]*\bdata-rating=["\'](\d+)' % kind),
+ webpage, name)
+
+ view_count = self._extract_count(
+ r'<span class="count">([\d,\.]+)</span> [Vv]iews', webpage, 'view')
+ like_count = extract_vote_count('Up', 'like')
+ dislike_count = extract_vote_count('Down', 'dislike')
+ comment_count = self._extract_count(
+ r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
+
+ def extract_list(meta_key):
+ div = self._search_regex(
+ r'(?s)<div[^>]+\bclass=["\'].*?\b%sWrapper[^>]*>(.+?)</div>'
+ % meta_key, webpage, meta_key, default=None)
+ if div:
+ return [clean_html(x).strip() for x in re.findall(r'(?s)<a[^>]+\bhref=[^>]+>.+?</a>', div)]
+
+ info = self._search_json_ld(webpage, video_id, default={})
+ # description provided in JSON-LD is irrelevant
+ info['description'] = None
+
+ return merge_dicts({
+ 'id': video_id,
+ 'uploader': video_uploader,
+ 'uploader_id': remove_start(model_profile.get('modelProfileLink'), '/model/'),
+ 'upload_date': upload_date,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'like_count': like_count,
+ 'dislike_count': dislike_count,
+ 'comment_count': comment_count,
+ 'formats': formats,
+ 'age_limit': 18,
+ 'tags': extract_list('tags'),
+ 'categories': extract_list('categories'),
+ 'cast': extract_list('pornstars'),
+ 'subtitles': subtitles,
+ }, info)
+
+
+class PornHubPlaylistBaseIE(PornHubBaseIE):
+ def _extract_page(self, url):
+ return int_or_none(self._search_regex(
+ r'\bpage=(\d+)', url, 'page', default=None))
+
+ def _extract_entries(self, webpage, host):
+ # Only process container div with main playlist content skipping
+ # drop-down menu that uses similar pattern for videos (see
+ # https://github.com/ytdl-org/youtube-dl/issues/11594).
+ container = self._search_regex(
+ r'(?s)(<div[^>]+class=["\']container.+)', webpage,
+ 'container', default=webpage)
+
+ return [
+ self.url_result(
+ 'http://www.%s/%s' % (host, video_url),
+ PornHubIE.ie_key(), video_title=title)
+ for video_url, title in orderedSet(re.findall(
+ r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"',
+ container))
+ ]
+
+
+class PornHubUserIE(PornHubPlaylistBaseIE):
+ _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?%s/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)' % PornHubBaseIE._PORNHUB_HOST_RE
+ _TESTS = [{
+ 'url': 'https://www.pornhub.com/model/zoe_ph',
+ 'playlist_mincount': 118,
+ }, {
+ 'url': 'https://www.pornhub.com/pornstar/liz-vicious',
+ 'info_dict': {
+ 'id': 'liz-vicious',
+ },
+ 'playlist_mincount': 118,
+ }, {
+ 'url': 'https://www.pornhub.com/users/russianveet69',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/channels/povd',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/model/zoe_ph?abc=1',
+ 'only_matching': True,
+ }, {
+ # Unavailable via /videos page, but available with direct pagination
+ # on pornstar page (see [1]), requires premium
+ # 1. https://github.com/ytdl-org/youtube-dl/issues/27853
+ 'url': 'https://www.pornhubpremium.com/pornstar/sienna-west',
+ 'only_matching': True,
+ }, {
+ # Same as before, multi page
+ 'url': 'https://www.pornhubpremium.com/pornstar/lily-labeau',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd.onion/model/zoe_ph',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ user_id = mobj.group('id')
+ videos_url = '%s/videos' % mobj.group('url')
+ self._set_age_cookies(mobj.group('host'))
+ page = self._extract_page(url)
+ if page:
+ videos_url = update_url_query(videos_url, {'page': page})
+ return self.url_result(
+ videos_url, ie=PornHubPagedVideoListIE.ie_key(), video_id=user_id)
+
+
+class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
+ @staticmethod
+ def _has_more(webpage):
+ return re.search(
+ r'''(?x)
+ <li[^>]+\bclass=["\']page_next|
+ <link[^>]+\brel=["\']next|
+ <button[^>]+\bid=["\']moreDataBtn
+ ''', webpage) is not None
+
+ def _entries(self, url, host, item_id):
+ page = self._extract_page(url)
+
+ VIDEOS = '/videos'
+
+ def download_page(base_url, num, fallback=False):
+ note = 'Downloading page %d%s' % (num, ' (switch to fallback)' if fallback else '')
+ return self._download_webpage(
+ base_url, item_id, note, query={'page': num})
+
+ def is_404(e):
+ return isinstance(e.cause, HTTPError) and e.cause.status == 404
+
+ base_url = url
+ has_page = page is not None
+ first_page = page if has_page else 1
+ for page_num in (first_page, ) if has_page else itertools.count(first_page):
+ try:
+ try:
+ webpage = download_page(base_url, page_num)
+ except ExtractorError as e:
+ # Some sources may not be available via /videos page,
+ # trying to fallback to main page pagination (see [1])
+ # 1. https://github.com/ytdl-org/youtube-dl/issues/27853
+ if is_404(e) and page_num == first_page and VIDEOS in base_url:
+ base_url = base_url.replace(VIDEOS, '')
+ webpage = download_page(base_url, page_num, fallback=True)
+ else:
+ raise
+ except ExtractorError as e:
+ if is_404(e) and page_num != first_page:
+ break
+ raise
+ page_entries = self._extract_entries(webpage, host)
+ if not page_entries:
+ break
+ for e in page_entries:
+ yield e
+ if not self._has_more(webpage):
+ break
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ host = mobj.group('host')
+ item_id = mobj.group('id')
+
+ self._login(host)
+ self._set_age_cookies(host)
+
+ return self.playlist_result(self._entries(url, host, item_id), item_id)
+
+
+class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
+ _VALID_URL = r'https?://(?:[^/]+\.)?%s/(?!playlist/)(?P<id>(?:[^/]+/)*[^/?#&]+)' % PornHubBaseIE._PORNHUB_HOST_RE
+ _TESTS = [{
+ 'url': 'https://www.pornhub.com/model/zoe_ph/videos',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.pornhub.com/users/rushandlia/videos',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos',
+ 'info_dict': {
+ 'id': 'pornstar/jenny-blighe/videos',
+ },
+ 'playlist_mincount': 149,
+ }, {
+ 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos?page=3',
+ 'info_dict': {
+ 'id': 'pornstar/jenny-blighe/videos',
+ },
+ 'playlist_mincount': 40,
+ }, {
+ # default sorting as Top Rated Videos
+ 'url': 'https://www.pornhub.com/channels/povd/videos',
+ 'info_dict': {
+ 'id': 'channels/povd/videos',
+ },
+ 'playlist_mincount': 293,
+ }, {
+ # Top Rated Videos
+ 'url': 'https://www.pornhub.com/channels/povd/videos?o=ra',
+ 'only_matching': True,
+ }, {
+ # Most Recent Videos
+ 'url': 'https://www.pornhub.com/channels/povd/videos?o=da',
+ 'only_matching': True,
+ }, {
+ # Most Viewed Videos
+ 'url': 'https://www.pornhub.com/channels/povd/videos?o=vi',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
+ 'only_matching': True,
+ }, {
+ # Most Viewed Videos
+ 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=mv',
+ 'only_matching': True,
+ }, {
+ # Top Rated Videos
+ 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=tr',
+ 'only_matching': True,
+ }, {
+ # Longest Videos
+ 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=lg',
+ 'only_matching': True,
+ }, {
+ # Newest Videos
+ 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=cm',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/paid',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/fanonly',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/video',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/video?page=3',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/video/search?search=123',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/categories/teen',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/categories/teen?page=3',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/hd',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/hd?page=3',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/described-video',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/described-video?page=2',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/video/incategories/60fps-1/hd-porn',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd.onion/model/zoe_ph/videos',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return (False
+ if PornHubIE.suitable(url) or PornHubUserIE.suitable(url) or PornHubUserVideosUploadIE.suitable(url)
+ else super(PornHubPagedVideoListIE, cls).suitable(url))
+
+
+class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
+ _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?%s/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)' % PornHubBaseIE._PORNHUB_HOST_RE
+ _TESTS = [{
+ 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload',
+ 'info_dict': {
+ 'id': 'jenny-blighe',
+ },
+ 'playlist_mincount': 129,
+ }, {
+ 'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd.onion/pornstar/jenny-blighe/videos/upload',
+ 'only_matching': True,
+ }]
+
+
+class PornHubPlaylistIE(PornHubPlaylistBaseIE):
+ _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?%s/playlist/(?P<id>[^/?#&]+))' % PornHubBaseIE._PORNHUB_HOST_RE
+ _TESTS = [{
+ 'url': 'https://www.pornhub.com/playlist/44121572',
+ 'info_dict': {
+ 'id': '44121572',
+ },
+ 'playlist_count': 77,
+ }, {
+ 'url': 'https://www.pornhub.com/playlist/4667351',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://de.pornhub.com/playlist/4667351',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://de.pornhub.com/playlist/4667351?page=2',
+ 'only_matching': True,
+ }]
+
+ def _entries(self, url, host, item_id):
+ webpage = self._download_webpage(url, item_id, 'Downloading page 1')
+ playlist_id = self._search_regex(r'var\s+playlistId\s*=\s*"([^"]+)"', webpage, 'playlist_id')
+ video_count = int_or_none(
+ self._search_regex(r'var\s+itemsCount\s*=\s*([0-9]+)\s*\|\|', webpage, 'video_count'))
+ token = self._search_regex(r'var\s+token\s*=\s*"([^"]+)"', webpage, 'token')
+ page_count = math.ceil((video_count - 36) / 40.) + 1
+ page_entries = self._extract_entries(webpage, host)
+
+ def download_page(page_num):
+ note = 'Downloading page {}'.format(page_num)
+ page_url = 'https://www.{}/playlist/viewChunked'.format(host)
+ return self._download_webpage(page_url, item_id, note, query={
+ 'id': playlist_id,
+ 'page': page_num,
+ 'token': token,
+ })
+
+ for page_num in range(1, page_count + 1):
+ if page_num > 1:
+ webpage = download_page(page_num)
+ page_entries = self._extract_entries(webpage, host)
+ if not page_entries:
+ break
+ for e in page_entries:
+ yield e
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ host = mobj.group('host')
+ item_id = mobj.group('id')
+
+ self._login(host)
+ self._set_age_cookies(host)
+
+ return self.playlist_result(self._entries(mobj.group('url'), host, item_id), item_id)
diff --git a/yt_dlp/extractor/pornotube.py b/yt_dlp/extractor/pornotube.py
new file mode 100644
index 0000000..e0960f4
--- /dev/null
+++ b/yt_dlp/extractor/pornotube.py
@@ -0,0 +1,83 @@
+import json
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class PornotubeIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:\w+\.)?pornotube\.com/(?:[^?#]*?)/video/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://www.pornotube.com/orientation/straight/video/4964/title/weird-hot-and-wet-science',
+ 'md5': '60fc5a4f0d93a97968fc7999d98260c9',
+ 'info_dict': {
+ 'id': '4964',
+ 'ext': 'mp4',
+ 'upload_date': '20141203',
+ 'title': 'Weird Hot and Wet Science',
+ 'description': 'md5:a8304bef7ef06cb4ab476ca6029b01b0',
+ 'categories': ['Adult Humor', 'Blondes'],
+ 'uploader': 'Alpha Blue Archives',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1417582800,
+ 'age_limit': 18,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ token = self._download_json(
+ 'https://api.aebn.net/auth/v2/origins/authenticate',
+ video_id, note='Downloading token',
+ data=json.dumps({'credentials': 'Clip Application'}).encode('utf-8'),
+ headers={
+ 'Content-Type': 'application/json',
+ 'Origin': 'http://www.pornotube.com',
+ })['tokenKey']
+
+ video_url = self._download_json(
+ 'https://api.aebn.net/delivery/v1/clips/%s/MP4' % video_id,
+ video_id, note='Downloading delivery information',
+ headers={'Authorization': token})['mediaUrl']
+
+ FIELDS = (
+ 'title', 'description', 'startSecond', 'endSecond', 'publishDate',
+ 'studios{name}', 'categories{name}', 'movieId', 'primaryImageNumber'
+ )
+
+ info = self._download_json(
+ 'https://api.aebn.net/content/v2/clips/%s?fields=%s'
+ % (video_id, ','.join(FIELDS)), video_id,
+ note='Downloading metadata',
+ headers={'Authorization': token})
+
+ if isinstance(info, list):
+ info = info[0]
+
+ title = info['title']
+
+ timestamp = int_or_none(info.get('publishDate'), scale=1000)
+ uploader = info.get('studios', [{}])[0].get('name')
+ movie_id = info.get('movieId')
+ primary_image_number = info.get('primaryImageNumber')
+ thumbnail = None
+ if movie_id and primary_image_number:
+ thumbnail = 'http://pic.aebn.net/dis/t/%s/%s_%08d.jpg' % (
+ movie_id, movie_id, primary_image_number)
+ start = int_or_none(info.get('startSecond'))
+ end = int_or_none(info.get('endSecond'))
+ duration = end - start if start and end else None
+ categories = [c['name'] for c in info.get('categories', []) if c.get('name')]
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': info.get('description'),
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'uploader': uploader,
+ 'thumbnail': thumbnail,
+ 'categories': categories,
+ 'age_limit': 18,
+ }
diff --git a/yt_dlp/extractor/pornovoisines.py b/yt_dlp/extractor/pornovoisines.py
new file mode 100644
index 0000000..2e51b4f
--- /dev/null
+++ b/yt_dlp/extractor/pornovoisines.py
@@ -0,0 +1,103 @@
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ float_or_none,
+ unified_strdate,
+)
+
+
+class PornoVoisinesIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:www\.)?pornovoisines\.com/videos/show/(?P<id>\d+)/(?P<display_id>[^/.]+)'
+
+ _TEST = {
+ 'url': 'http://www.pornovoisines.com/videos/show/919/recherche-appartement.html',
+ 'md5': '6f8aca6a058592ab49fe701c8ba8317b',
+ 'info_dict': {
+ 'id': '919',
+ 'display_id': 'recherche-appartement',
+ 'ext': 'mp4',
+ 'title': 'Recherche appartement',
+ 'description': 'md5:fe10cb92ae2dd3ed94bb4080d11ff493',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20140925',
+ 'duration': 120,
+ 'view_count': int,
+ 'average_rating': float,
+ 'categories': ['Débutante', 'Débutantes', 'Scénario', 'Sodomie'],
+ 'age_limit': 18,
+ 'subtitles': {
+ 'fr': [{
+ 'ext': 'vtt',
+ }]
+ },
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id')
+
+ settings_url = self._download_json(
+ 'http://www.pornovoisines.com/api/video/%s/getsettingsurl/' % video_id,
+ video_id, note='Getting settings URL')['video_settings_url']
+ settings = self._download_json(settings_url, video_id)['data']
+
+ formats = []
+ for kind, data in settings['variants'].items():
+ if kind == 'HLS':
+ formats.extend(self._extract_m3u8_formats(
+ data, video_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls'))
+ elif kind == 'MP4':
+ for item in data:
+ formats.append({
+ 'url': item['url'],
+ 'height': item.get('height'),
+ 'bitrate': item.get('bitrate'),
+ })
+
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._og_search_title(webpage)
+ description = self._og_search_description(webpage)
+
+ # The webpage has a bug - there's no space between "thumb" and src=
+ thumbnail = self._html_search_regex(
+ r'<img[^>]+class=([\'"])thumb\1[^>]*src=([\'"])(?P<url>[^"]+)\2',
+ webpage, 'thumbnail', fatal=False, group='url')
+
+ upload_date = unified_strdate(self._search_regex(
+ r'Le\s*<b>([\d/]+)', webpage, 'upload date', fatal=False))
+ duration = settings.get('main', {}).get('duration')
+ view_count = int_or_none(self._search_regex(
+ r'(\d+) vues', webpage, 'view count', fatal=False))
+ average_rating = self._search_regex(
+ r'Note\s*:\s*(\d+(?:,\d+)?)', webpage, 'average rating', fatal=False)
+ if average_rating:
+ average_rating = float_or_none(average_rating.replace(',', '.'))
+
+ categories = self._html_search_regex(
+ r'(?s)Catégories\s*:\s*<b>(.+?)</b>', webpage, 'categories', fatal=False)
+ if categories:
+ categories = [category.strip() for category in categories.split(',')]
+
+ subtitles = {'fr': [{
+ 'url': subtitle,
+ } for subtitle in settings.get('main', {}).get('vtt_tracks', {}).values()]}
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'formats': formats,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'average_rating': average_rating,
+ 'categories': categories,
+ 'age_limit': 18,
+ 'subtitles': subtitles,
+ }
diff --git a/yt_dlp/extractor/pornoxo.py b/yt_dlp/extractor/pornoxo.py
new file mode 100644
index 0000000..049feb4
--- /dev/null
+++ b/yt_dlp/extractor/pornoxo.py
@@ -0,0 +1,55 @@
+from .common import InfoExtractor
+from ..utils import (
+ str_to_int,
+)
+
+
+class PornoXOIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:www\.)?pornoxo\.com/videos/(?P<id>\d+)/(?P<display_id>[^/]+)\.html'
+ _TEST = {
+ 'url': 'http://www.pornoxo.com/videos/7564/striptease-from-sexy-secretary.html',
+ 'md5': '582f28ecbaa9e6e24cb90f50f524ce87',
+ 'info_dict': {
+ 'id': '7564',
+ 'ext': 'flv',
+ 'title': 'Striptease From Sexy Secretary!',
+ 'display_id': 'striptease-from-sexy-secretary',
+ 'description': 'md5:0ee35252b685b3883f4a1d38332f9980',
+ 'categories': list, # NSFW
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'age_limit': 18,
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id, display_id = mobj.groups()
+
+ webpage = self._download_webpage(url, video_id)
+ video_data = self._extract_jwplayer_data(webpage, video_id, require_title=False)
+
+ title = self._html_search_regex(
+ r'<title>([^<]+)\s*-\s*PornoXO', webpage, 'title')
+
+ view_count = str_to_int(self._html_search_regex(
+ r'[vV]iews:\s*([0-9,]+)', webpage, 'view count', fatal=False))
+
+ categories_str = self._html_search_regex(
+ r'<meta name="description" content=".*featuring\s*([^"]+)"',
+ webpage, 'categories', fatal=False)
+ categories = (
+ None if categories_str is None
+ else categories_str.split(','))
+
+ video_data.update({
+ 'id': video_id,
+ 'title': title,
+ 'display_id': display_id,
+ 'description': self._html_search_meta('description', webpage),
+ 'categories': categories,
+ 'view_count': view_count,
+ 'age_limit': 18,
+ })
+
+ return video_data
diff --git a/yt_dlp/extractor/pr0gramm.py b/yt_dlp/extractor/pr0gramm.py
new file mode 100644
index 0000000..66f8a5f
--- /dev/null
+++ b/yt_dlp/extractor/pr0gramm.py
@@ -0,0 +1,201 @@
+import json
+from urllib.parse import unquote
+
+from .common import InfoExtractor
+from ..compat import functools
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ make_archive_id,
+ mimetype2ext,
+ str_or_none,
+ urljoin,
+)
+from ..utils.traversal import traverse_obj
+
+
+class Pr0grammIE(InfoExtractor):
+ _VALID_URL = r'https?://pr0gramm\.com\/(?:[^/?#]+/)+(?P<id>[\d]+)(?:[/?#:]|$)'
+ _TESTS = [{
+ 'url': 'https://pr0gramm.com/new/video/5466437',
+ 'info_dict': {
+ 'id': '5466437',
+ 'ext': 'mp4',
+ 'title': 'pr0gramm-5466437 by g11st',
+ 'tags': ['Neon Genesis Evangelion', 'Touhou Project', 'Fly me to the Moon', 'Marisad', 'Marisa Kirisame', 'video', 'sound', 'Marisa', 'Anime'],
+ 'uploader': 'g11st',
+ 'uploader_id': '394718',
+ 'timestamp': 1671590240,
+ 'upload_date': '20221221',
+ 'like_count': int,
+ 'dislike_count': int,
+ 'age_limit': 0,
+ 'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg',
+ '_old_archive_ids': ['pr0grammstatic 5466437'],
+ },
+ }, {
+ 'url': 'https://pr0gramm.com/new/3052805:comment28391322',
+ 'info_dict': {
+ 'id': '3052805',
+ 'ext': 'mp4',
+ 'title': 'pr0gramm-3052805 by Hansking1',
+ 'tags': 'count:15',
+ 'uploader': 'Hansking1',
+ 'uploader_id': '385563',
+ 'timestamp': 1552930408,
+ 'upload_date': '20190318',
+ 'like_count': int,
+ 'dislike_count': int,
+ 'age_limit': 0,
+ 'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg',
+ '_old_archive_ids': ['pr0grammstatic 3052805'],
+ },
+ }, {
+ # Requires verified account
+ 'url': 'https://pr0gramm.com/new/Gianna%20Michaels/5848332',
+ 'info_dict': {
+ 'id': '5848332',
+ 'ext': 'mp4',
+ 'title': 'pr0gramm-5848332 by erd0pfel',
+ 'tags': 'count:18',
+ 'uploader': 'erd0pfel',
+ 'uploader_id': '349094',
+ 'timestamp': 1694489652,
+ 'upload_date': '20230912',
+ 'like_count': int,
+ 'dislike_count': int,
+ 'age_limit': 18,
+ 'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg',
+ '_old_archive_ids': ['pr0grammstatic 5848332'],
+ },
+ }, {
+ 'url': 'https://pr0gramm.com/top/5895149',
+ 'info_dict': {
+ 'id': '5895149',
+ 'ext': 'mp4',
+ 'title': 'pr0gramm-5895149 by algoholigSeeManThrower',
+ 'tags': 'count:19',
+ 'uploader': 'algoholigSeeManThrower',
+ 'uploader_id': '457556',
+ 'timestamp': 1697580902,
+ 'upload_date': '20231018',
+ 'like_count': int,
+ 'dislike_count': int,
+ 'age_limit': 0,
+ 'thumbnail': 'https://thumb.pr0gramm.com/2023/10/18/db47bb3db5e1a1b3.jpg',
+ '_old_archive_ids': ['pr0grammstatic 5895149'],
+ },
+ }, {
+ 'url': 'https://pr0gramm.com/static/5466437',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://pr0gramm.com/new/rowan%20atkinson%20herr%20bohne/3052805',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://pr0gramm.com/user/froschler/dafur-ist-man-hier/5091290',
+ 'only_matching': True,
+ }]
+
+ BASE_URL = 'https://pr0gramm.com'
+
+ @functools.cached_property
+ def _is_logged_in(self):
+ return 'pp' in self._get_cookies(self.BASE_URL)
+
+ @functools.cached_property
+ def _maximum_flags(self):
+ # We need to guess the flags for the content otherwise the api will raise an error
+ # We can guess the maximum allowed flags for the account from the cookies
+ # Bitflags are (msbf): pol, nsfp, nsfl, nsfw, sfw
+ flags = 0b10001
+ if self._is_logged_in:
+ flags |= 0b01000
+ cookies = self._get_cookies(self.BASE_URL)
+ if 'me' not in cookies:
+ self._download_webpage(self.BASE_URL, None, 'Refreshing verification information')
+ if traverse_obj(cookies, ('me', {lambda x: x.value}, {unquote}, {json.loads}, 'verified')):
+ flags |= 0b00110
+
+ return flags
+
+ def _call_api(self, endpoint, video_id, query={}, note='Downloading API json'):
+ data = self._download_json(
+ f'https://pr0gramm.com/api/items/{endpoint}',
+ video_id, note, query=query, expected_status=403)
+
+ error = traverse_obj(data, ('error', {str}))
+ if error in ('nsfwRequired', 'nsflRequired', 'nsfpRequired', 'verificationRequired'):
+ if not self._is_logged_in:
+ self.raise_login_required()
+ raise ExtractorError(f'Unverified account cannot access NSFW/NSFL ({error})', expected=True)
+ elif error:
+ message = traverse_obj(data, ('msg', {str})) or error
+ raise ExtractorError(f'API returned error: {message}', expected=True)
+
+ return data
+
+ @staticmethod
+ def _create_source_url(path):
+ return urljoin('https://img.pr0gramm.com', path)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video_info = traverse_obj(
+ self._call_api('get', video_id, {'id': video_id, 'flags': self._maximum_flags}),
+ ('items', 0, {dict}))
+
+ source = video_info.get('image')
+ if not source or not source.endswith('mp4'):
+ self.raise_no_formats('Could not extract a video', expected=bool(source), video_id=video_id)
+
+ metadata = self._call_api('info', video_id, {'itemId': video_id}, note='Downloading tags')
+ tags = traverse_obj(metadata, ('tags', ..., 'tag', {str}))
+ # Sorted by "confidence", higher confidence = earlier in list
+ confidences = traverse_obj(metadata, ('tags', ..., 'confidence', ({int}, {float})))
+ if confidences:
+ tags = [tag for _, tag in sorted(zip(confidences, tags), reverse=True)]
+
+ formats = traverse_obj(video_info, ('variants', ..., {
+ 'format_id': ('name', {str}),
+ 'url': ('path', {self._create_source_url}),
+ 'ext': ('mimeType', {mimetype2ext}),
+ 'vcodec': ('codec', {str}),
+ 'width': ('width', {int_or_none}),
+ 'height': ('height', {int_or_none}),
+ 'bitrate': ('bitRate', {float_or_none}),
+ 'filesize': ('fileSize', {int_or_none}),
+ })) if video_info.get('variants') else [{
+ 'ext': 'mp4',
+ 'format_id': 'source',
+ **traverse_obj(video_info, {
+ 'url': ('image', {self._create_source_url}),
+ 'width': ('width', {int_or_none}),
+ 'height': ('height', {int_or_none}),
+ }),
+ }]
+
+ subtitles = {}
+ for subtitle in traverse_obj(video_info, ('subtitles', lambda _, v: v['language'])):
+ subtitles.setdefault(subtitle['language'], []).append(traverse_obj(subtitle, {
+ 'url': ('path', {self._create_source_url}),
+ 'note': ('label', {str}),
+ }))
+
+ return {
+ 'id': video_id,
+ 'title': f'pr0gramm-{video_id} by {video_info.get("user")}',
+ 'tags': tags,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'age_limit': 18 if traverse_obj(video_info, ('flags', {0b110.__and__})) else 0,
+ '_old_archive_ids': [make_archive_id('Pr0grammStatic', video_id)],
+ **traverse_obj(video_info, {
+ 'uploader': ('user', {str}),
+ 'uploader_id': ('userId', {str_or_none}),
+ 'like_count': ('up', {int}),
+ 'dislike_count': ('down', {int}),
+ 'timestamp': ('created', {int}),
+ 'thumbnail': ('thumb', {lambda x: urljoin('https://thumb.pr0gramm.com', x)})
+ }),
+ }
diff --git a/yt_dlp/extractor/prankcast.py b/yt_dlp/extractor/prankcast.py
new file mode 100644
index 0000000..56cd40d
--- /dev/null
+++ b/yt_dlp/extractor/prankcast.py
@@ -0,0 +1,137 @@
+import json
+
+from .common import InfoExtractor
+from ..utils import float_or_none, parse_iso8601, str_or_none, try_call
+from ..utils.traversal import traverse_obj
+
+
+class PrankCastIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?prankcast\.com/[^/?#]+/showreel/(?P<id>\d+)-(?P<display_id>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'https://prankcast.com/Devonanustart/showreel/1561-Beverly-is-back-like-a-heart-attack-',
+ 'info_dict': {
+ 'id': '1561',
+ 'ext': 'mp3',
+ 'title': 'Beverly is back like a heart attack!',
+ 'display_id': 'Beverly-is-back-like-a-heart-attack-',
+ 'timestamp': 1661391575,
+ 'uploader': 'Devonanustart',
+ 'channel_id': '4',
+ 'duration': 7918,
+ 'cast': ['Devonanustart', 'Phonelosers'],
+ 'description': '',
+ 'categories': ['prank'],
+ 'tags': ['prank call', 'prank', 'live show'],
+ 'upload_date': '20220825'
+ }
+ }, {
+ 'url': 'https://prankcast.com/phonelosers/showreel/2048-NOT-COOL',
+ 'info_dict': {
+ 'id': '2048',
+ 'ext': 'mp3',
+ 'title': 'NOT COOL',
+ 'display_id': 'NOT-COOL',
+ 'timestamp': 1665028364,
+ 'uploader': 'phonelosers',
+ 'channel_id': '6',
+ 'duration': 4044,
+ 'cast': ['phonelosers'],
+ 'description': '',
+ 'categories': ['prank'],
+ 'tags': ['prank call', 'prank', 'live show'],
+ 'upload_date': '20221006'
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
+
+ webpage = self._download_webpage(url, video_id)
+ json_info = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['ssr_data_showreel']
+
+ uploader = json_info.get('user_name')
+ guests_json = self._parse_json(json_info.get('guests_json') or '{}', video_id)
+ start_date = parse_iso8601(json_info.get('start_date'))
+
+ return {
+ 'id': video_id,
+ 'title': json_info.get('broadcast_title') or self._og_search_title(webpage),
+ 'display_id': display_id,
+ 'url': f'{json_info["broadcast_url"]}{json_info["recording_hash"]}.mp3',
+ 'timestamp': start_date,
+ 'uploader': uploader,
+ 'channel_id': str_or_none(json_info.get('user_id')),
+ 'duration': try_call(lambda: parse_iso8601(json_info['end_date']) - start_date),
+ 'cast': list(filter(None, [uploader] + traverse_obj(guests_json, (..., 'name')))),
+ 'description': json_info.get('broadcast_description'),
+ 'categories': [json_info.get('broadcast_category')],
+ 'tags': try_call(lambda: json_info['broadcast_tags'].split(','))
+ }
+
+
+class PrankCastPostIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?prankcast\.com/[^/?#]+/posts/(?P<id>\d+)-(?P<display_id>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'https://prankcast.com/devonanustart/posts/6214-happy-national-rachel-day-',
+ 'info_dict': {
+ 'id': '6214',
+ 'ext': 'mp3',
+ 'title': 'Happy National Rachel Day!',
+ 'display_id': 'happy-national-rachel-day-',
+ 'timestamp': 1704333938,
+ 'uploader': 'Devonanustart',
+ 'channel_id': '4',
+ 'duration': 13175,
+ 'cast': ['Devonanustart'],
+ 'description': '',
+ 'categories': ['prank call'],
+ 'upload_date': '20240104'
+ }
+ }, {
+ 'url': 'https://prankcast.com/despicabledogs/posts/6217-jake-the-work-crow-',
+ 'info_dict': {
+ 'id': '6217',
+ 'ext': 'mp3',
+ 'title': 'Jake the Work Crow!',
+ 'display_id': 'jake-the-work-crow-',
+ 'timestamp': 1704346592,
+ 'uploader': 'despicabledogs',
+ 'channel_id': '957',
+ 'duration': 263.287,
+ 'cast': ['despicabledogs'],
+ 'description': 'https://imgur.com/a/vtxLvKU',
+ 'categories': [],
+ 'upload_date': '20240104'
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
+
+ webpage = self._download_webpage(url, video_id)
+ post = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['ssr_data_posts']
+ content = self._parse_json(post['post_contents_json'], video_id)[0]
+
+ uploader = post.get('user_name')
+ guests_json = traverse_obj(content, ('guests_json', {json.loads}, {dict})) or {}
+
+ return {
+ 'id': video_id,
+ 'title': post.get('post_title') or self._og_search_title(webpage),
+ 'display_id': display_id,
+ 'url': content.get('url'),
+ 'timestamp': parse_iso8601(content.get('start_date') or content.get('crdate'), ' '),
+ 'uploader': uploader,
+ 'channel_id': str_or_none(post.get('user_id')),
+ 'duration': float_or_none(content.get('duration')),
+ 'cast': list(filter(None, [uploader] + traverse_obj(guests_json, (..., 'name')))),
+ 'description': post.get('post_body'),
+ 'categories': list(filter(None, [content.get('category')])),
+ 'tags': try_call(lambda: list(filter('', post['post_tags'].split(',')))),
+ 'subtitles': {
+ 'live_chat': [{
+ 'url': f'https://prankcast.com/api/private/chat/select-broadcast?id={post["content_id"]}&cache=',
+ 'ext': 'json',
+ }],
+ } if post.get('content_id') else None
+ }
diff --git a/yt_dlp/extractor/premiershiprugby.py b/yt_dlp/extractor/premiershiprugby.py
new file mode 100644
index 0000000..67d41fd
--- /dev/null
+++ b/yt_dlp/extractor/premiershiprugby.py
@@ -0,0 +1,39 @@
+from .common import InfoExtractor
+from ..utils import int_or_none, traverse_obj
+
+
+class PremiershipRugbyIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:\w+\.)premiershiprugby\.(?:com)/watch/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://www.premiershiprugby.com/watch/full-match-harlequins-v-newcastle-falcons',
+ 'info_dict': {
+ 'id': '0_mbkb7ldt',
+ 'title': 'Full Match: Harlequins v Newcastle Falcons',
+ 'ext': 'mp4',
+ 'thumbnail': 'https://open.http.mp.streamamg.com/p/3000914/sp/300091400/thumbnail/entry_id/0_mbkb7ldt//width/960/height/540/type/1/quality/75',
+ 'duration': 6093.0,
+ 'tags': ['video'],
+ 'categories': ['Full Match', 'Harlequins', 'Newcastle Falcons', 'gallaher premiership'],
+ }
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ json_data = self._download_json(
+ f'https://article-cms-api.incrowdsports.com/v2/articles/slug/{display_id}',
+ display_id, query={'clientId': 'PRL'})['data']['article']
+
+ formats, subs = self._extract_m3u8_formats_and_subtitles(
+ json_data['heroMedia']['content']['videoLink'], display_id)
+
+ return {
+ 'id': json_data['heroMedia']['content']['sourceSystemId'],
+ 'display_id': display_id,
+ 'title': traverse_obj(json_data, ('heroMedia', 'title')),
+ 'formats': formats,
+ 'subtitles': subs,
+ 'thumbnail': traverse_obj(json_data, ('heroMedia', 'content', 'videoThumbnail')),
+ 'duration': int_or_none(traverse_obj(json_data, ('heroMedia', 'content', 'metadata', 'msDuration')), scale=1000),
+ 'tags': json_data.get('tags'),
+ 'categories': traverse_obj(json_data, ('categories', ..., 'text')),
+ }
diff --git a/yt_dlp/extractor/presstv.py b/yt_dlp/extractor/presstv.py
new file mode 100644
index 0000000..26ce74a
--- /dev/null
+++ b/yt_dlp/extractor/presstv.py
@@ -0,0 +1,69 @@
+from .common import InfoExtractor
+from ..utils import remove_start
+
+
+class PressTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?presstv\.ir/[^/]+/(?P<y>\d+)/(?P<m>\d+)/(?P<d>\d+)/(?P<id>\d+)/(?P<display_id>[^/]+)?'
+
+ _TEST = {
+ 'url': 'http://www.presstv.ir/Detail/2016/04/09/459911/Australian-sewerage-treatment-facility-/',
+ 'md5': '5d7e3195a447cb13e9267e931d8dd5a5',
+ 'info_dict': {
+ 'id': '459911',
+ 'display_id': 'Australian-sewerage-treatment-facility-',
+ 'ext': 'mp4',
+ 'title': 'Organic mattresses used to clean waste water',
+ 'upload_date': '20160409',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'description': 'md5:20002e654bbafb6908395a5c0cfcd125'
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id') or video_id
+
+ webpage = self._download_webpage(url, display_id)
+
+ # extract video URL from webpage
+ video_url = self._hidden_inputs(webpage)['inpPlayback']
+
+ # build list of available formats
+ # specified in http://www.presstv.ir/Scripts/playback.js
+ base_url = 'http://192.99.219.222:82/presstv'
+ _formats = [
+ (180, '_low200.mp4'),
+ (360, '_low400.mp4'),
+ (720, '_low800.mp4'),
+ (1080, '.mp4')
+ ]
+
+ formats = [{
+ 'url': base_url + video_url[:-4] + extension,
+ 'format_id': '%dp' % height,
+ 'height': height,
+ } for height, extension in _formats]
+
+ # extract video metadata
+ title = remove_start(
+ self._html_search_meta('title', webpage, fatal=True), 'PressTV-')
+
+ thumbnail = self._og_search_thumbnail(webpage)
+ description = self._og_search_description(webpage)
+
+ upload_date = '%04d%02d%02d' % (
+ int(mobj.group('y')),
+ int(mobj.group('m')),
+ int(mobj.group('d')),
+ )
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ 'description': description
+ }
diff --git a/yt_dlp/extractor/projectveritas.py b/yt_dlp/extractor/projectveritas.py
new file mode 100644
index 0000000..daf1405
--- /dev/null
+++ b/yt_dlp/extractor/projectveritas.py
@@ -0,0 +1,52 @@
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ traverse_obj,
+ unified_strdate,
+)
+
+
+class ProjectVeritasIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:www\.)?projectveritas\.com/(?P<type>news|video)/(?P<id>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'https://www.projectveritas.com/news/exclusive-inside-the-new-york-and-new-jersey-hospitals-battling-coronavirus/',
+ 'info_dict': {
+ 'id': '51910aab-365a-5cf1-88f2-8eb1ca5fd3c6',
+ 'ext': 'mp4',
+ 'title': 'Exclusive: Inside The New York and New Jersey Hospitals Battling Coronavirus',
+ 'upload_date': '20200327',
+ 'thumbnail': 'md5:6076477fe50b03eb8708be9415e18e1c',
+ }
+ }, {
+ 'url': 'https://www.projectveritas.com/video/ilhan-omar-connected-ballot-harvester-in-cash-for-ballots-scheme-car-is-full/',
+ 'info_dict': {
+ 'id': 'c5aab304-a56b-54b1-9f0b-03b77bc5f2f6',
+ 'ext': 'mp4',
+ 'title': 'Ilhan Omar connected Ballot Harvester in cash-for-ballots scheme: "Car is full" of absentee ballots',
+ 'upload_date': '20200927',
+ 'thumbnail': 'md5:194b8edf0e2ba64f25500ff4378369a4',
+ }
+ }]
+
+ def _real_extract(self, url):
+ id, type = self._match_valid_url(url).group('id', 'type')
+ api_url = f'https://www.projectveritas.com/page-data/{type}/{id}/page-data.json'
+ data_json = self._download_json(api_url, id)['result']['data']
+ main_data = traverse_obj(data_json, 'video', 'post')
+ video_id = main_data['id']
+ thumbnail = traverse_obj(main_data, ('image', 'ogImage', 'src'))
+ mux_asset = traverse_obj(main_data,
+ 'muxAsset', ('body', 'json', 'content', ..., 'data', 'target', 'fields', 'muxAsset'),
+ get_all=False, expected_type=dict)
+ if not mux_asset:
+ raise ExtractorError('No video on the provided url.', expected=True)
+ playback_id = traverse_obj(mux_asset, 'playbackId', ('en-US', 'playbackId'))
+ formats = self._extract_m3u8_formats(f'https://stream.mux.com/{playback_id}.m3u8', video_id)
+ return {
+ 'id': video_id,
+ 'title': main_data['title'],
+ 'upload_date': unified_strdate(main_data.get('date')),
+ 'thumbnail': thumbnail.replace('//', ''),
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/prosiebensat1.py b/yt_dlp/extractor/prosiebensat1.py
new file mode 100644
index 0000000..46e2e8a
--- /dev/null
+++ b/yt_dlp/extractor/prosiebensat1.py
@@ -0,0 +1,496 @@
+import re
+
+from hashlib import sha1
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ determine_ext,
+ float_or_none,
+ int_or_none,
+ merge_dicts,
+ unified_strdate,
+)
+
+
+class ProSiebenSat1BaseIE(InfoExtractor):
+ _GEO_BYPASS = False
+ _ACCESS_ID = None
+ _SUPPORTED_PROTOCOLS = 'dash:clear,hls:clear,progressive:clear'
+ _V4_BASE_URL = 'https://vas-v4.p7s1video.net/4.0/get'
+
+ def _extract_video_info(self, url, clip_id):
+ client_location = url
+
+ video = self._download_json(
+ 'http://vas.sim-technik.de/vas/live/v2/videos',
+ clip_id, 'Downloading videos JSON', query={
+ 'access_token': self._TOKEN,
+ 'client_location': client_location,
+ 'client_name': self._CLIENT_NAME,
+ 'ids': clip_id,
+ })[0]
+
+ if not self.get_param('allow_unplayable_formats') and video.get('is_protected') is True:
+ self.report_drm(clip_id)
+
+ formats = []
+ if self._ACCESS_ID:
+ raw_ct = self._ENCRYPTION_KEY + clip_id + self._IV + self._ACCESS_ID
+ protocols = self._download_json(
+ self._V4_BASE_URL + 'protocols', clip_id,
+ 'Downloading protocols JSON',
+ headers=self.geo_verification_headers(), query={
+ 'access_id': self._ACCESS_ID,
+ 'client_token': sha1((raw_ct).encode()).hexdigest(),
+ 'video_id': clip_id,
+ }, fatal=False, expected_status=(403,)) or {}
+ error = protocols.get('error') or {}
+ if error.get('title') == 'Geo check failed':
+ self.raise_geo_restricted(countries=['AT', 'CH', 'DE'])
+ server_token = protocols.get('server_token')
+ if server_token:
+ urls = (self._download_json(
+ self._V4_BASE_URL + 'urls', clip_id, 'Downloading urls JSON', query={
+ 'access_id': self._ACCESS_ID,
+ 'client_token': sha1((raw_ct + server_token + self._SUPPORTED_PROTOCOLS).encode()).hexdigest(),
+ 'protocols': self._SUPPORTED_PROTOCOLS,
+ 'server_token': server_token,
+ 'video_id': clip_id,
+ }, fatal=False) or {}).get('urls') or {}
+ for protocol, variant in urls.items():
+ source_url = variant.get('clear', {}).get('url')
+ if not source_url:
+ continue
+ if protocol == 'dash':
+ formats.extend(self._extract_mpd_formats(
+ source_url, clip_id, mpd_id=protocol, fatal=False))
+ elif protocol == 'hls':
+ formats.extend(self._extract_m3u8_formats(
+ source_url, clip_id, 'mp4', 'm3u8_native',
+ m3u8_id=protocol, fatal=False))
+ else:
+ formats.append({
+ 'url': source_url,
+ 'format_id': protocol,
+ })
+ if not formats:
+ source_ids = [compat_str(source['id']) for source in video['sources']]
+
+ client_id = self._SALT[:2] + sha1(''.join([clip_id, self._SALT, self._TOKEN, client_location, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest()
+
+ sources = self._download_json(
+ 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources' % clip_id,
+ clip_id, 'Downloading sources JSON', query={
+ 'access_token': self._TOKEN,
+ 'client_id': client_id,
+ 'client_location': client_location,
+ 'client_name': self._CLIENT_NAME,
+ })
+ server_id = sources['server_id']
+
+ def fix_bitrate(bitrate):
+ bitrate = int_or_none(bitrate)
+ if not bitrate:
+ return None
+ return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate
+
+ for source_id in source_ids:
+ client_id = self._SALT[:2] + sha1(''.join([self._SALT, clip_id, self._TOKEN, server_id, client_location, source_id, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest()
+ urls = self._download_json(
+ 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url' % clip_id,
+ clip_id, 'Downloading urls JSON', fatal=False, query={
+ 'access_token': self._TOKEN,
+ 'client_id': client_id,
+ 'client_location': client_location,
+ 'client_name': self._CLIENT_NAME,
+ 'server_id': server_id,
+ 'source_ids': source_id,
+ })
+ if not urls:
+ continue
+ if urls.get('status_code') != 0:
+ raise ExtractorError('This video is unavailable', expected=True)
+ urls_sources = urls['sources']
+ if isinstance(urls_sources, dict):
+ urls_sources = urls_sources.values()
+ for source in urls_sources:
+ source_url = source.get('url')
+ if not source_url:
+ continue
+ protocol = source.get('protocol')
+ mimetype = source.get('mimetype')
+ if mimetype == 'application/f4m+xml' or 'f4mgenerator' in source_url or determine_ext(source_url) == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ source_url, clip_id, f4m_id='hds', fatal=False))
+ elif mimetype == 'application/x-mpegURL':
+ formats.extend(self._extract_m3u8_formats(
+ source_url, clip_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif mimetype == 'application/dash+xml':
+ formats.extend(self._extract_mpd_formats(
+ source_url, clip_id, mpd_id='dash', fatal=False))
+ else:
+ tbr = fix_bitrate(source['bitrate'])
+ if protocol in ('rtmp', 'rtmpe'):
+ mobj = re.search(r'^(?P<url>rtmpe?://[^/]+)/(?P<path>.+)$', source_url)
+ if not mobj:
+ continue
+ path = mobj.group('path')
+ mp4colon_index = path.rfind('mp4:')
+ app = path[:mp4colon_index]
+ play_path = path[mp4colon_index:]
+ formats.append({
+ 'url': '%s/%s' % (mobj.group('url'), app),
+ 'app': app,
+ 'play_path': play_path,
+ 'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf',
+ 'page_url': 'http://www.prosieben.de',
+ 'tbr': tbr,
+ 'ext': 'flv',
+ 'format_id': 'rtmp%s' % ('-%d' % tbr if tbr else ''),
+ })
+ else:
+ formats.append({
+ 'url': source_url,
+ 'tbr': tbr,
+ 'format_id': 'http%s' % ('-%d' % tbr if tbr else ''),
+ })
+
+ return {
+ 'duration': float_or_none(video.get('duration')),
+ 'formats': formats,
+ }
+
+
+class ProSiebenSat1IE(ProSiebenSat1BaseIE):
+ IE_NAME = 'prosiebensat1'
+ IE_DESC = 'ProSiebenSat.1 Digital'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?
+ (?:
+ (?:beta\.)?
+ (?:
+ prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|advopedia
+ )\.(?:de|at|ch)|
+ ran\.de|fem\.com|advopedia\.de|galileo\.tv/video
+ )
+ /(?P<id>.+)
+ '''
+
+ _TESTS = [
+ {
+ # Tests changes introduced in https://github.com/ytdl-org/youtube-dl/pull/6242
+ # in response to fixing https://github.com/ytdl-org/youtube-dl/issues/6215:
+ # - malformed f4m manifest support
+ # - proper handling of URLs starting with `https?://` in 2.0 manifests
+ # - recursive child f4m manifests extraction
+ 'url': 'http://www.prosieben.de/tv/circus-halligalli/videos/218-staffel-2-episode-18-jahresrueckblick-ganze-folge',
+ 'info_dict': {
+ 'id': '2104602',
+ 'ext': 'mp4',
+ 'title': 'CIRCUS HALLIGALLI - Episode 18 - Staffel 2',
+ 'description': 'md5:8733c81b702ea472e069bc48bb658fc1',
+ 'upload_date': '20131231',
+ 'duration': 5845.04,
+ 'series': 'CIRCUS HALLIGALLI',
+ 'season_number': 2,
+ 'episode': 'Episode 18 - Staffel 2',
+ 'episode_number': 18,
+ },
+ },
+ {
+ 'url': 'http://www.prosieben.de/videokatalog/Gesellschaft/Leben/Trends/video-Lady-Umstyling-f%C3%BCr-Audrina-Rebekka-Audrina-Fergen-billig-aussehen-Battal-Modica-700544.html',
+ 'info_dict': {
+ 'id': '2570327',
+ 'ext': 'mp4',
+ 'title': 'Lady-Umstyling für Audrina',
+ 'description': 'md5:4c16d0c17a3461a0d43ea4084e96319d',
+ 'upload_date': '20131014',
+ 'duration': 606.76,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'Seems to be broken',
+ },
+ {
+ 'url': 'http://www.prosiebenmaxx.de/tv/experience/video/144-countdown-fuer-die-autowerkstatt-ganze-folge',
+ 'info_dict': {
+ 'id': '2429369',
+ 'ext': 'mp4',
+ 'title': 'Countdown für die Autowerkstatt',
+ 'description': 'md5:809fc051a457b5d8666013bc40698817',
+ 'upload_date': '20140223',
+ 'duration': 2595.04,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'This video is unavailable',
+ },
+ {
+ 'url': 'http://www.sixx.de/stars-style/video/sexy-laufen-in-ugg-boots-clip',
+ 'info_dict': {
+ 'id': '2904997',
+ 'ext': 'mp4',
+ 'title': 'Sexy laufen in Ugg Boots',
+ 'description': 'md5:edf42b8bd5bc4e5da4db4222c5acb7d6',
+ 'upload_date': '20140122',
+ 'duration': 245.32,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'This video is unavailable',
+ },
+ {
+ 'url': 'http://www.sat1.de/film/der-ruecktritt/video/im-interview-kai-wiesinger-clip',
+ 'info_dict': {
+ 'id': '2906572',
+ 'ext': 'mp4',
+ 'title': 'Im Interview: Kai Wiesinger',
+ 'description': 'md5:e4e5370652ec63b95023e914190b4eb9',
+ 'upload_date': '20140203',
+ 'duration': 522.56,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'This video is unavailable',
+ },
+ {
+ 'url': 'http://www.kabeleins.de/tv/rosins-restaurants/videos/jagd-auf-fertigkost-im-elsthal-teil-2-ganze-folge',
+ 'info_dict': {
+ 'id': '2992323',
+ 'ext': 'mp4',
+ 'title': 'Jagd auf Fertigkost im Elsthal - Teil 2',
+ 'description': 'md5:2669cde3febe9bce13904f701e774eb6',
+ 'upload_date': '20141014',
+ 'duration': 2410.44,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'This video is unavailable',
+ },
+ {
+ 'url': 'http://www.ran.de/fussball/bundesliga/video/schalke-toennies-moechte-raul-zurueck-ganze-folge',
+ 'info_dict': {
+ 'id': '3004256',
+ 'ext': 'mp4',
+ 'title': 'Schalke: Tönnies möchte Raul zurück',
+ 'description': 'md5:4b5b271d9bcde223b54390754c8ece3f',
+ 'upload_date': '20140226',
+ 'duration': 228.96,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'This video is unavailable',
+ },
+ {
+ 'url': 'http://www.the-voice-of-germany.de/video/31-andreas-kuemmert-rocket-man-clip',
+ 'info_dict': {
+ 'id': '2572814',
+ 'ext': 'mp4',
+ 'title': 'The Voice of Germany - Andreas Kümmert: Rocket Man',
+ 'description': 'md5:6ddb02b0781c6adf778afea606652e38',
+ 'timestamp': 1382041620,
+ 'upload_date': '20131017',
+ 'duration': 469.88,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://www.fem.com/videos/beauty-lifestyle/kurztrips-zum-valentinstag',
+ 'info_dict': {
+ 'id': '2156342',
+ 'ext': 'mp4',
+ 'title': 'Kurztrips zum Valentinstag',
+ 'description': 'Romantischer Kurztrip zum Valentinstag? Nina Heinemann verrät, was sich hier wirklich lohnt.',
+ 'duration': 307.24,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://www.prosieben.de/tv/joko-gegen-klaas/videos/playlists/episode-8-ganze-folge-playlist',
+ 'info_dict': {
+ 'id': '439664',
+ 'title': 'Episode 8 - Ganze Folge - Playlist',
+ 'description': 'md5:63b8963e71f481782aeea877658dec84',
+ },
+ 'playlist_count': 2,
+ 'skip': 'This video is unavailable',
+ },
+ {
+ # title in <h2 class="subtitle">
+ 'url': 'http://www.prosieben.de/stars/oscar-award/videos/jetzt-erst-enthuellt-das-geheimnis-von-emma-stones-oscar-robe-clip',
+ 'info_dict': {
+ 'id': '4895826',
+ 'ext': 'mp4',
+ 'title': 'Jetzt erst enthüllt: Das Geheimnis von Emma Stones Oscar-Robe',
+ 'description': 'md5:e5ace2bc43fadf7b63adc6187e9450b9',
+ 'upload_date': '20170302',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'geo restricted to Germany',
+ },
+ {
+ # geo restricted to Germany
+ 'url': 'http://www.kabeleinsdoku.de/tv/mayday-alarm-im-cockpit/video/102-notlandung-im-hudson-river-ganze-folge',
+ 'only_matching': True,
+ },
+ {
+ # geo restricted to Germany
+ 'url': 'http://www.sat1gold.de/tv/edel-starck/video/11-staffel-1-episode-1-partner-wider-willen-ganze-folge',
+ 'only_matching': True,
+ },
+ {
+ # geo restricted to Germany
+ 'url': 'https://www.galileo.tv/video/diese-emojis-werden-oft-missverstanden',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://www.sat1gold.de/tv/edel-starck/playlist/die-gesamte-1-staffel',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://www.advopedia.de/videos/lenssen-klaert-auf/lenssen-klaert-auf-folge-8-staffel-3-feiertage-und-freie-tage',
+ 'only_matching': True,
+ },
+ ]
+
+ _TOKEN = 'prosieben'
+ _SALT = '01!8d8F_)r9]4s[qeuXfP%'
+ _CLIENT_NAME = 'kolibri-2.0.19-splec4'
+
+ _ACCESS_ID = 'x_prosiebenmaxx-de'
+ _ENCRYPTION_KEY = 'Eeyeey9oquahthainoofashoyoikosag'
+ _IV = 'Aeluchoc6aevechuipiexeeboowedaok'
+
+ _CLIPID_REGEXES = [
+ r'"clip_id"\s*:\s+"(\d+)"',
+ r'clipid: "(\d+)"',
+ r'clip[iI]d=(\d+)',
+ r'clip[iI][dD]\s*=\s*["\'](\d+)',
+ r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)",
+ r'proMamsId&quot;\s*:\s*&quot;(\d+)',
+ r'proMamsId"\s*:\s*"(\d+)',
+ ]
+ _TITLE_REGEXES = [
+ r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>',
+ r'<header class="clearfix">\s*<h3>(.+?)</h3>',
+ r'<!-- start video -->\s*<h1>(.+?)</h1>',
+ r'<h1 class="att-name">\s*(.+?)</h1>',
+ r'<header class="module_header">\s*<h2>([^<]+)</h2>\s*</header>',
+ r'<h2 class="video-title" itemprop="name">\s*(.+?)</h2>',
+ r'<div[^>]+id="veeseoTitle"[^>]*>(.+?)</div>',
+ r'<h2[^>]+class="subtitle"[^>]*>([^<]+)</h2>',
+ ]
+ _DESCRIPTION_REGEXES = [
+ r'<p itemprop="description">\s*(.+?)</p>',
+ r'<div class="videoDecription">\s*<p><strong>Beschreibung</strong>: (.+?)</p>',
+ r'<div class="g-plusone" data-size="medium"></div>\s*</div>\s*</header>\s*(.+?)\s*<footer>',
+ r'<p class="att-description">\s*(.+?)\s*</p>',
+ r'<p class="video-description" itemprop="description">\s*(.+?)</p>',
+ r'<div[^>]+id="veeseoDescription"[^>]*>(.+?)</div>',
+ ]
+ _UPLOAD_DATE_REGEXES = [
+ r'<span>\s*(\d{2}\.\d{2}\.\d{4} \d{2}:\d{2}) \|\s*<span itemprop="duration"',
+ r'<footer>\s*(\d{2}\.\d{2}\.\d{4}) \d{2}:\d{2} Uhr',
+ r'<span style="padding-left: 4px;line-height:20px; color:#404040">(\d{2}\.\d{2}\.\d{4})</span>',
+ r'(\d{2}\.\d{2}\.\d{4}) \| \d{2}:\d{2} Min<br/>',
+ ]
+ _PAGE_TYPE_REGEXES = [
+ r'<meta name="page_type" content="([^"]+)">',
+ r"'itemType'\s*:\s*'([^']*)'",
+ ]
+ _PLAYLIST_ID_REGEXES = [
+ r'content[iI]d=(\d+)',
+ r"'itemId'\s*:\s*'([^']*)'",
+ ]
+ _PLAYLIST_CLIP_REGEXES = [
+ r'(?s)data-qvt=.+?<a href="([^"]+)"',
+ ]
+
+ def _extract_clip(self, url, webpage):
+ clip_id = self._html_search_regex(
+ self._CLIPID_REGEXES, webpage, 'clip id')
+ title = self._html_search_regex(
+ self._TITLE_REGEXES, webpage, 'title',
+ default=None) or self._og_search_title(webpage)
+ info = self._extract_video_info(url, clip_id)
+ description = self._html_search_regex(
+ self._DESCRIPTION_REGEXES, webpage, 'description', default=None)
+ if description is None:
+ description = self._og_search_description(webpage)
+ thumbnail = self._og_search_thumbnail(webpage)
+ upload_date = unified_strdate(
+ self._html_search_meta('og:published_time', webpage,
+ 'upload date', default=None)
+ or self._html_search_regex(self._UPLOAD_DATE_REGEXES,
+ webpage, 'upload date', default=None))
+
+ json_ld = self._search_json_ld(webpage, clip_id, default={})
+
+ return merge_dicts(info, {
+ 'id': clip_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ }, json_ld)
+
+ def _extract_playlist(self, url, webpage):
+ playlist_id = self._html_search_regex(
+ self._PLAYLIST_ID_REGEXES, webpage, 'playlist id')
+ playlist = self._parse_json(
+ self._search_regex(
+ r'var\s+contentResources\s*=\s*(\[.+?\]);\s*</script',
+ webpage, 'playlist'),
+ playlist_id)
+ entries = []
+ for item in playlist:
+ clip_id = item.get('id') or item.get('upc')
+ if not clip_id:
+ continue
+ info = self._extract_video_info(url, clip_id)
+ info.update({
+ 'id': clip_id,
+ 'title': item.get('title') or item.get('teaser', {}).get('headline'),
+ 'description': item.get('teaser', {}).get('description'),
+ 'thumbnail': item.get('poster'),
+ 'duration': float_or_none(item.get('duration')),
+ 'series': item.get('tvShowTitle'),
+ 'uploader': item.get('broadcastPublisher'),
+ })
+ entries.append(info)
+ return self.playlist_result(entries, playlist_id)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ page_type = self._search_regex(
+ self._PAGE_TYPE_REGEXES, webpage,
+ 'page type', default='clip').lower()
+ if page_type == 'clip':
+ return self._extract_clip(url, webpage)
+ elif page_type == 'playlist':
+ return self._extract_playlist(url, webpage)
+ else:
+ raise ExtractorError(
+ 'Unsupported page type %s' % page_type, expected=True)
diff --git a/yt_dlp/extractor/prx.py b/yt_dlp/extractor/prx.py
new file mode 100644
index 0000000..5bb1832
--- /dev/null
+++ b/yt_dlp/extractor/prx.py
@@ -0,0 +1,428 @@
+import itertools
+from .common import InfoExtractor, SearchInfoExtractor
+from ..utils import (
+ urljoin,
+ traverse_obj,
+ int_or_none,
+ mimetype2ext,
+ clean_html,
+ url_or_none,
+ unified_timestamp,
+ str_or_none,
+)
+
+
+class PRXBaseIE(InfoExtractor):
+ PRX_BASE_URL_RE = r'https?://(?:(?:beta|listen)\.)?prx.org/%s'
+
+ def _call_api(self, item_id, path, query=None, fatal=True, note='Downloading CMS API JSON'):
+ return self._download_json(
+ urljoin('https://cms.prx.org/api/v1/', path), item_id, query=query, fatal=fatal, note=note)
+
+ @staticmethod
+ def _get_prx_embed_response(response, section):
+ return traverse_obj(response, ('_embedded', f'prx:{section}'))
+
+ @staticmethod
+ def _extract_file_link(response):
+ return url_or_none(traverse_obj(
+ response, ('_links', 'enclosure', 'href'), expected_type=str))
+
+ @classmethod
+ def _extract_image(cls, image_response):
+ if not isinstance(image_response, dict):
+ return
+ return {
+ 'id': str_or_none(image_response.get('id')),
+ 'filesize': image_response.get('size'),
+ 'width': image_response.get('width'),
+ 'height': image_response.get('height'),
+ 'url': cls._extract_file_link(image_response)
+ }
+
+ @classmethod
+ def _extract_base_info(cls, response):
+ if not isinstance(response, dict):
+ return
+ item_id = str_or_none(response.get('id'))
+ if not item_id:
+ return
+ thumbnail_dict = cls._extract_image(cls._get_prx_embed_response(response, 'image'))
+ description = (
+ clean_html(response.get('description'))
+ or response.get('shortDescription'))
+ return {
+ 'id': item_id,
+ 'title': response.get('title') or item_id,
+ 'thumbnails': [thumbnail_dict] if thumbnail_dict else None,
+ 'description': description,
+ 'release_timestamp': unified_timestamp(response.get('releasedAt')),
+ 'timestamp': unified_timestamp(response.get('createdAt')),
+ 'modified_timestamp': unified_timestamp(response.get('updatedAt')),
+ 'duration': int_or_none(response.get('duration')),
+ 'tags': response.get('tags'),
+ 'episode_number': int_or_none(response.get('episodeIdentifier')),
+ 'season_number': int_or_none(response.get('seasonIdentifier'))
+ }
+
+ @classmethod
+ def _extract_series_info(cls, series_response):
+ base_info = cls._extract_base_info(series_response)
+ if not base_info:
+ return
+ account_info = cls._extract_account_info(
+ cls._get_prx_embed_response(series_response, 'account')) or {}
+ return {
+ **base_info,
+ 'channel_id': account_info.get('channel_id'),
+ 'channel_url': account_info.get('channel_url'),
+ 'channel': account_info.get('channel'),
+ 'series': base_info.get('title'),
+ 'series_id': base_info.get('id'),
+ }
+
+ @classmethod
+ def _extract_account_info(cls, account_response):
+ base_info = cls._extract_base_info(account_response)
+ if not base_info:
+ return
+ name = account_response.get('name')
+ return {
+ **base_info,
+ 'title': name,
+ 'channel_id': base_info.get('id'),
+ 'channel_url': 'https://beta.prx.org/accounts/%s' % base_info.get('id'),
+ 'channel': name,
+ }
+
+ @classmethod
+ def _extract_story_info(cls, story_response):
+ base_info = cls._extract_base_info(story_response)
+ if not base_info:
+ return
+ series = cls._extract_series_info(
+ cls._get_prx_embed_response(story_response, 'series')) or {}
+ account = cls._extract_account_info(
+ cls._get_prx_embed_response(story_response, 'account')) or {}
+ return {
+ **base_info,
+ 'series': series.get('series'),
+ 'series_id': series.get('series_id'),
+ 'channel_id': account.get('channel_id'),
+ 'channel_url': account.get('channel_url'),
+ 'channel': account.get('channel')
+ }
+
+ def _entries(self, item_id, endpoint, entry_func, query=None):
+ """
+ Extract entries from paginated list API
+ @param entry_func: Function to generate entry from response item
+ """
+ total = 0
+ for page in itertools.count(1):
+ response = self._call_api(f'{item_id}: page {page}', endpoint, query={
+ **(query or {}),
+ 'page': page,
+ 'per': 100
+ })
+ items = self._get_prx_embed_response(response, 'items')
+ if not response or not items:
+ break
+
+ yield from filter(None, map(entry_func, items))
+
+ total += response['count']
+ if total >= response['total']:
+ break
+
+ def _story_playlist_entry(self, response):
+ story = self._extract_story_info(response)
+ if not story:
+ return
+ story.update({
+ '_type': 'url',
+ 'url': 'https://beta.prx.org/stories/%s' % story['id'],
+ 'ie_key': PRXStoryIE.ie_key()
+ })
+ return story
+
+ def _series_playlist_entry(self, response):
+ series = self._extract_series_info(response)
+ if not series:
+ return
+ series.update({
+ '_type': 'url',
+ 'url': 'https://beta.prx.org/series/%s' % series['id'],
+ 'ie_key': PRXSeriesIE.ie_key()
+ })
+ return series
+
+
+class PRXStoryIE(PRXBaseIE):
+ _VALID_URL = PRXBaseIE.PRX_BASE_URL_RE % r'stories/(?P<id>\d+)'
+
+ _TESTS = [
+ {
+ # Story with season and episode details
+ 'url': 'https://beta.prx.org/stories/399200',
+ 'info_dict': {
+ 'id': '399200',
+ 'title': 'Fly Me To The Moon',
+ 'description': 'md5:43230168390b95d3322048d8a56bf2bb',
+ 'release_timestamp': 1640250000,
+ 'timestamp': 1640208972,
+ 'modified_timestamp': 1641318202,
+ 'duration': 1004,
+ 'tags': 'count:7',
+ 'episode_number': 8,
+ 'season_number': 5,
+ 'series': 'AirSpace',
+ 'series_id': '38057',
+ 'channel_id': '220986',
+ 'channel_url': 'https://beta.prx.org/accounts/220986',
+ 'channel': 'Air and Space Museum',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '399200_part1',
+ 'title': 'Fly Me To The Moon',
+ 'description': 'md5:43230168390b95d3322048d8a56bf2bb',
+ 'release_timestamp': 1640250000,
+ 'timestamp': 1640208972,
+ 'modified_timestamp': 1641318202,
+ 'duration': 530,
+ 'tags': 'count:7',
+ 'episode_number': 8,
+ 'season_number': 5,
+ 'series': 'AirSpace',
+ 'series_id': '38057',
+ 'channel_id': '220986',
+ 'channel_url': 'https://beta.prx.org/accounts/220986',
+ 'channel': 'Air and Space Museum',
+ 'ext': 'mp3',
+ 'upload_date': '20211222',
+ 'episode': 'Episode 8',
+ 'release_date': '20211223',
+ 'season': 'Season 5',
+ 'modified_date': '20220104'
+ }
+ }, {
+ 'info_dict': {
+ 'id': '399200_part2',
+ 'title': 'Fly Me To The Moon',
+ 'description': 'md5:43230168390b95d3322048d8a56bf2bb',
+ 'release_timestamp': 1640250000,
+ 'timestamp': 1640208972,
+ 'modified_timestamp': 1641318202,
+ 'duration': 474,
+ 'tags': 'count:7',
+ 'episode_number': 8,
+ 'season_number': 5,
+ 'series': 'AirSpace',
+ 'series_id': '38057',
+ 'channel_id': '220986',
+ 'channel_url': 'https://beta.prx.org/accounts/220986',
+ 'channel': 'Air and Space Museum',
+ 'ext': 'mp3',
+ 'upload_date': '20211222',
+ 'episode': 'Episode 8',
+ 'release_date': '20211223',
+ 'season': 'Season 5',
+ 'modified_date': '20220104'
+ }
+ }
+
+ ]
+ }, {
+ # Story with only split audio
+ 'url': 'https://beta.prx.org/stories/326414',
+ 'info_dict': {
+ 'id': '326414',
+ 'title': 'Massachusetts v EPA',
+ 'description': 'md5:744fffba08f19f4deab69fa8d49d5816',
+ 'timestamp': 1592509124,
+ 'modified_timestamp': 1592510457,
+ 'duration': 3088,
+ 'tags': 'count:0',
+ 'series': 'Outside/In',
+ 'series_id': '36252',
+ 'channel_id': '206',
+ 'channel_url': 'https://beta.prx.org/accounts/206',
+ 'channel': 'New Hampshire Public Radio',
+ },
+ 'playlist_count': 4
+ }, {
+ # Story with single combined audio
+ 'url': 'https://beta.prx.org/stories/400404',
+ 'info_dict': {
+ 'id': '400404',
+ 'title': 'Cafe Chill (Episode 2022-01)',
+ 'thumbnails': 'count:1',
+ 'description': 'md5:9f1b5a3cbd64fb159d08c3baa31f1539',
+ 'timestamp': 1641233952,
+ 'modified_timestamp': 1641234248,
+ 'duration': 3540,
+ 'series': 'Café Chill',
+ 'series_id': '37762',
+ 'channel_id': '5767',
+ 'channel_url': 'https://beta.prx.org/accounts/5767',
+ 'channel': 'C89.5 - KNHC Seattle',
+ 'ext': 'mp3',
+ 'tags': 'count:0',
+ 'thumbnail': r're:https?://cms\.prx\.org/pub/\w+/0/web/story_image/767965/medium/Aurora_Over_Trees\.jpg',
+ 'upload_date': '20220103',
+ 'modified_date': '20220103'
+ }
+ }, {
+ 'url': 'https://listen.prx.org/stories/399200',
+ 'only_matching': True
+ }
+ ]
+
+ def _extract_audio_pieces(self, audio_response):
+ return [{
+ 'format_id': str_or_none(piece_response.get('id')),
+ 'format_note': str_or_none(piece_response.get('label')),
+ 'filesize': int_or_none(piece_response.get('size')),
+ 'duration': int_or_none(piece_response.get('duration')),
+ 'ext': mimetype2ext(piece_response.get('contentType')),
+ 'asr': int_or_none(piece_response.get('frequency'), scale=1000),
+ 'abr': int_or_none(piece_response.get('bitRate')),
+ 'url': self._extract_file_link(piece_response),
+ 'vcodec': 'none'
+ } for piece_response in sorted(
+ self._get_prx_embed_response(audio_response, 'items') or [],
+ key=lambda p: int_or_none(p.get('position')))]
+
+ def _extract_story(self, story_response):
+ info = self._extract_story_info(story_response)
+ if not info:
+ return
+ audio_pieces = self._extract_audio_pieces(
+ self._get_prx_embed_response(story_response, 'audio'))
+ if len(audio_pieces) == 1:
+ return {
+ 'formats': audio_pieces,
+ **info
+ }
+
+ entries = [{
+ **info,
+ 'id': '%s_part%d' % (info['id'], (idx + 1)),
+ 'formats': [fmt],
+ } for idx, fmt in enumerate(audio_pieces)]
+ return {
+ '_type': 'multi_video',
+ 'entries': entries,
+ **info
+ }
+
+ def _real_extract(self, url):
+ story_id = self._match_id(url)
+ response = self._call_api(story_id, f'stories/{story_id}')
+ return self._extract_story(response)
+
+
+class PRXSeriesIE(PRXBaseIE):
+ _VALID_URL = PRXBaseIE.PRX_BASE_URL_RE % r'series/(?P<id>\d+)'
+ _TESTS = [
+ {
+ 'url': 'https://beta.prx.org/series/36252',
+ 'info_dict': {
+ 'id': '36252',
+ 'title': 'Outside/In',
+ 'thumbnails': 'count:1',
+ 'description': 'md5:a6bedc5f810777bcb09ab30ff9059114',
+ 'timestamp': 1470684964,
+ 'modified_timestamp': 1582308830,
+ 'channel_id': '206',
+ 'channel_url': 'https://beta.prx.org/accounts/206',
+ 'channel': 'New Hampshire Public Radio',
+ 'series': 'Outside/In',
+ 'series_id': '36252'
+ },
+ 'playlist_mincount': 39
+ }, {
+ # Blank series
+ 'url': 'https://beta.prx.org/series/25038',
+ 'info_dict': {
+ 'id': '25038',
+ 'title': '25038',
+ 'timestamp': 1207612800,
+ 'modified_timestamp': 1207612800,
+ 'channel_id': '206',
+ 'channel_url': 'https://beta.prx.org/accounts/206',
+ 'channel': 'New Hampshire Public Radio',
+ 'series': '25038',
+ 'series_id': '25038'
+ },
+ 'playlist_count': 0
+ }
+ ]
+
+ def _extract_series(self, series_response):
+ info = self._extract_series_info(series_response)
+ return {
+ '_type': 'playlist',
+ 'entries': self._entries(info['id'], 'series/%s/stories' % info['id'], self._story_playlist_entry),
+ **info
+ }
+
+ def _real_extract(self, url):
+ series_id = self._match_id(url)
+ response = self._call_api(series_id, f'series/{series_id}')
+ return self._extract_series(response)
+
+
+class PRXAccountIE(PRXBaseIE):
+ _VALID_URL = PRXBaseIE.PRX_BASE_URL_RE % r'accounts/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://beta.prx.org/accounts/206',
+ 'info_dict': {
+ 'id': '206',
+ 'title': 'New Hampshire Public Radio',
+ 'description': 'md5:277f2395301d0aca563c80c70a18ee0a',
+ 'channel_id': '206',
+ 'channel_url': 'https://beta.prx.org/accounts/206',
+ 'channel': 'New Hampshire Public Radio',
+ 'thumbnails': 'count:1'
+ },
+ 'playlist_mincount': 380
+ }]
+
+ def _extract_account(self, account_response):
+ info = self._extract_account_info(account_response)
+ series = self._entries(
+ info['id'], f'accounts/{info["id"]}/series', self._series_playlist_entry)
+ stories = self._entries(
+ info['id'], f'accounts/{info["id"]}/stories', self._story_playlist_entry)
+ return {
+ '_type': 'playlist',
+ 'entries': itertools.chain(series, stories),
+ **info
+ }
+
+ def _real_extract(self, url):
+ account_id = self._match_id(url)
+ response = self._call_api(account_id, f'accounts/{account_id}')
+ return self._extract_account(response)
+
+
+class PRXStoriesSearchIE(PRXBaseIE, SearchInfoExtractor):
+ IE_DESC = 'PRX Stories Search'
+ IE_NAME = 'prxstories:search'
+ _SEARCH_KEY = 'prxstories'
+
+ def _search_results(self, query):
+ yield from self._entries(
+ f'query {query}', 'stories/search', self._story_playlist_entry, query={'q': query})
+
+
+class PRXSeriesSearchIE(PRXBaseIE, SearchInfoExtractor):
+ IE_DESC = 'PRX Series Search'
+ IE_NAME = 'prxseries:search'
+ _SEARCH_KEY = 'prxseries'
+
+ def _search_results(self, query):
+ yield from self._entries(
+ f'query {query}', 'series/search', self._series_playlist_entry, query={'q': query})
diff --git a/yt_dlp/extractor/puhutv.py b/yt_dlp/extractor/puhutv.py
new file mode 100644
index 0000000..4b8e5e9
--- /dev/null
+++ b/yt_dlp/extractor/puhutv.py
@@ -0,0 +1,233 @@
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ float_or_none,
+ parse_resolution,
+ str_or_none,
+ try_get,
+ unified_timestamp,
+ url_or_none,
+ urljoin,
+)
+
+
+class PuhuTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P<id>[^/?#&]+)-izle'
+ IE_NAME = 'puhutv'
+ _TESTS = [{
+ # film
+ 'url': 'https://puhutv.com/sut-kardesler-izle',
+ 'md5': 'a347470371d56e1585d1b2c8dab01c96',
+ 'info_dict': {
+ 'id': '5085',
+ 'display_id': 'sut-kardesler',
+ 'ext': 'mp4',
+ 'title': 'Süt Kardeşler',
+ 'description': 'md5:ca09da25b7e57cbb5a9280d6e48d17aa',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 4832.44,
+ 'creator': 'Arzu Film',
+ 'timestamp': 1561062602,
+ 'upload_date': '20190620',
+ 'release_year': 1976,
+ 'view_count': int,
+ 'tags': list,
+ },
+ }, {
+ # episode, geo restricted, bypassable with --geo-verification-proxy
+ 'url': 'https://puhutv.com/jet-sosyete-1-bolum-izle',
+ 'only_matching': True,
+ }, {
+ # 4k, with subtitles
+ 'url': 'https://puhutv.com/dip-1-bolum-izle',
+ 'only_matching': True,
+ }]
+ _SUBTITLE_LANGS = {
+ 'English': 'en',
+ 'Deutsch': 'de',
+ 'عربى': 'ar'
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ info = self._download_json(
+ urljoin(url, '/api/slug/%s-izle' % display_id),
+ display_id)['data']
+
+ video_id = compat_str(info['id'])
+ show = info.get('title') or {}
+ title = info.get('name') or show['name']
+ if info.get('display_name'):
+ title = '%s %s' % (title, info['display_name'])
+
+ try:
+ videos = self._download_json(
+ 'https://puhutv.com/api/assets/%s/videos' % video_id,
+ display_id, 'Downloading video JSON',
+ headers=self.geo_verification_headers())
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 403:
+ self.raise_geo_restricted()
+ raise
+
+ urls = []
+ formats = []
+
+ for video in videos['data']['videos']:
+ media_url = url_or_none(video.get('url'))
+ if not media_url or media_url in urls:
+ continue
+ urls.append(media_url)
+
+ playlist = video.get('is_playlist')
+ if (video.get('stream_type') == 'hls' and playlist is True) or 'playlist.m3u8' in media_url:
+ formats.extend(self._extract_m3u8_formats(
+ media_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ continue
+
+ quality = int_or_none(video.get('quality'))
+ f = {
+ 'url': media_url,
+ 'ext': 'mp4',
+ 'height': quality
+ }
+ video_format = video.get('video_format')
+ is_hls = (video_format == 'hls' or '/hls/' in media_url or '/chunklist.m3u8' in media_url) and playlist is False
+ if is_hls:
+ format_id = 'hls'
+ f['protocol'] = 'm3u8_native'
+ elif video_format == 'mp4':
+ format_id = 'http'
+ else:
+ continue
+ if quality:
+ format_id += '-%sp' % quality
+ f['format_id'] = format_id
+ formats.append(f)
+
+ creator = try_get(
+ show, lambda x: x['producer']['name'], compat_str)
+
+ content = info.get('content') or {}
+
+ images = try_get(
+ content, lambda x: x['images']['wide'], dict) or {}
+ thumbnails = []
+ for image_id, image_url in images.items():
+ if not isinstance(image_url, compat_str):
+ continue
+ if not image_url.startswith(('http', '//')):
+ image_url = 'https://%s' % image_url
+ t = parse_resolution(image_id)
+ t.update({
+ 'id': image_id,
+ 'url': image_url
+ })
+ thumbnails.append(t)
+
+ tags = []
+ for genre in show.get('genres') or []:
+ if not isinstance(genre, dict):
+ continue
+ genre_name = genre.get('name')
+ if genre_name and isinstance(genre_name, compat_str):
+ tags.append(genre_name)
+
+ subtitles = {}
+ for subtitle in content.get('subtitles') or []:
+ if not isinstance(subtitle, dict):
+ continue
+ lang = subtitle.get('language')
+ sub_url = url_or_none(subtitle.get('url') or subtitle.get('file'))
+ if not lang or not isinstance(lang, compat_str) or not sub_url:
+ continue
+ subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{
+ 'url': sub_url
+ }]
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': info.get('description') or show.get('description'),
+ 'season_id': str_or_none(info.get('season_id')),
+ 'season_number': int_or_none(info.get('season_number')),
+ 'episode_number': int_or_none(info.get('episode_number')),
+ 'release_year': int_or_none(show.get('released_at')),
+ 'timestamp': unified_timestamp(info.get('created_at')),
+ 'creator': creator,
+ 'view_count': int_or_none(content.get('watch_count')),
+ 'duration': float_or_none(content.get('duration_in_ms'), 1000),
+ 'tags': tags,
+ 'subtitles': subtitles,
+ 'thumbnails': thumbnails,
+ 'formats': formats
+ }
+
+
+class PuhuTVSerieIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P<id>[^/?#&]+)-detay'
+ IE_NAME = 'puhutv:serie'
+ _TESTS = [{
+ 'url': 'https://puhutv.com/deniz-yildizi-detay',
+ 'info_dict': {
+ 'title': 'Deniz Yıldızı',
+ 'id': 'deniz-yildizi',
+ },
+ 'playlist_mincount': 205,
+ }, {
+ # a film detail page which is using same url with serie page
+ 'url': 'https://puhutv.com/kaybedenler-kulubu-detay',
+ 'only_matching': True,
+ }]
+
+ def _extract_entries(self, seasons):
+ for season in seasons:
+ season_id = season.get('id')
+ if not season_id:
+ continue
+ page = 1
+ has_more = True
+ while has_more is True:
+ season = self._download_json(
+ 'https://galadriel.puhutv.com/seasons/%s' % season_id,
+ season_id, 'Downloading page %s' % page, query={
+ 'page': page,
+ 'per': 40,
+ })
+ episodes = season.get('episodes')
+ if isinstance(episodes, list):
+ for ep in episodes:
+ slug_path = str_or_none(ep.get('slugPath'))
+ if not slug_path:
+ continue
+ video_id = str_or_none(int_or_none(ep.get('id')))
+ yield self.url_result(
+ 'https://puhutv.com/%s' % slug_path,
+ ie=PuhuTVIE.ie_key(), video_id=video_id,
+ video_title=ep.get('name') or ep.get('eventLabel'))
+ page += 1
+ has_more = season.get('hasMore')
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ info = self._download_json(
+ urljoin(url, '/api/slug/%s-detay' % playlist_id),
+ playlist_id)['data']
+
+ seasons = info.get('seasons')
+ if seasons:
+ return self.playlist_result(
+ self._extract_entries(seasons), playlist_id, info.get('name'))
+
+ # For films, these are using same url with series
+ video_id = info.get('slug') or info['assets'][0]['slug']
+ return self.url_result(
+ 'https://puhutv.com/%s-izle' % video_id,
+ PuhuTVIE.ie_key(), video_id)
diff --git a/yt_dlp/extractor/puls4.py b/yt_dlp/extractor/puls4.py
new file mode 100644
index 0000000..38c5d11
--- /dev/null
+++ b/yt_dlp/extractor/puls4.py
@@ -0,0 +1,51 @@
+from .prosiebensat1 import ProSiebenSat1BaseIE
+from ..compat import compat_str
+from ..utils import parse_duration, unified_strdate
+
+
+class Puls4IE(ProSiebenSat1BaseIE):
+ _VALID_URL = r'https?://(?:www\.)?puls4\.com/(?P<id>[^?#&]+)'
+ _TESTS = [{
+ 'url': 'http://www.puls4.com/2-minuten-2-millionen/staffel-3/videos/2min2miotalk/Tobias-Homberger-von-myclubs-im-2min2miotalk-118118',
+ 'md5': 'fd3c6b0903ac72c9d004f04bc6bb3e03',
+ 'info_dict': {
+ 'id': '118118',
+ 'ext': 'flv',
+ 'title': 'Tobias Homberger von myclubs im #2min2miotalk',
+ 'description': 'md5:f9def7c5e8745d6026d8885487d91955',
+ 'upload_date': '20160830',
+ 'uploader': 'PULS_4',
+ },
+ }, {
+ 'url': 'http://www.puls4.com/pro-und-contra/wer-wird-prasident/Ganze-Folgen/Wer-wird-Praesident.-Norbert-Hofer',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.puls4.com/pro-und-contra/wer-wird-prasident/Ganze-Folgen/Wer-wird-Praesident-Analyse-des-Interviews-mit-Norbert-Hofer-416598',
+ 'only_matching': True,
+ }]
+ _TOKEN = 'puls4'
+ _SALT = '01!kaNgaiNgah1Ie4AeSha'
+ _CLIENT_NAME = ''
+
+ def _real_extract(self, url):
+ path = self._match_id(url)
+ content_path = self._download_json(
+ 'http://www.puls4.com/api/json-fe/page/' + path, path)['content'][0]['url']
+ media = self._download_json(
+ 'http://www.puls4.com' + content_path,
+ content_path)['mediaCurrent']
+ player_content = media['playerContent']
+ info = self._extract_video_info(url, player_content['id'])
+ info.update({
+ 'id': compat_str(media['objectId']),
+ 'title': player_content['title'],
+ 'description': media.get('description'),
+ 'thumbnail': media.get('previewLink'),
+ 'upload_date': unified_strdate(media.get('date')),
+ 'duration': parse_duration(player_content.get('duration')),
+ 'episode': player_content.get('episodePartName'),
+ 'show': media.get('channel'),
+ 'season_id': player_content.get('seasonId'),
+ 'uploader': player_content.get('sourceCompany'),
+ })
+ return info
diff --git a/yt_dlp/extractor/pyvideo.py b/yt_dlp/extractor/pyvideo.py
new file mode 100644
index 0000000..7b25166
--- /dev/null
+++ b/yt_dlp/extractor/pyvideo.py
@@ -0,0 +1,70 @@
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import int_or_none
+
+
+class PyvideoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?pyvideo\.org/(?P<category>[^/]+)/(?P<id>[^/?#&.]+)'
+
+ _TESTS = [{
+ 'url': 'http://pyvideo.org/pycon-us-2013/become-a-logging-expert-in-30-minutes.html',
+ 'info_dict': {
+ 'id': 'become-a-logging-expert-in-30-minutes',
+ },
+ 'playlist_count': 2,
+ }, {
+ 'url': 'http://pyvideo.org/pygotham-2012/gloriajw-spotifywitherikbernhardsson182m4v.html',
+ 'md5': '5fe1c7e0a8aa5570330784c847ff6d12',
+ 'info_dict': {
+ 'id': '2542',
+ 'ext': 'm4v',
+ 'title': 'Gloriajw-SpotifyWithErikBernhardsson182.m4v',
+ },
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ category = mobj.group('category')
+ video_id = mobj.group('id')
+
+ entries = []
+
+ data = self._download_json(
+ 'https://raw.githubusercontent.com/pyvideo/data/master/%s/videos/%s.json'
+ % (category, video_id), video_id, fatal=False)
+
+ if data:
+ for video in data['videos']:
+ video_url = video.get('url')
+ if video_url:
+ if video.get('type') == 'youtube':
+ entries.append(self.url_result(video_url, 'Youtube'))
+ else:
+ entries.append({
+ 'id': compat_str(data.get('id') or video_id),
+ 'url': video_url,
+ 'title': data['title'],
+ 'description': data.get('description') or data.get('summary'),
+ 'thumbnail': data.get('thumbnail_url'),
+ 'duration': int_or_none(data.get('duration')),
+ })
+ else:
+ webpage = self._download_webpage(url, video_id)
+ title = self._og_search_title(webpage)
+ media_urls = self._search_regex(
+ r'(?s)Media URL:(.+?)</li>', webpage, 'media urls')
+ for m in re.finditer(
+ r'<a[^>]+href=(["\'])(?P<url>http.+?)\1', media_urls):
+ media_url = m.group('url')
+ if re.match(r'https?://www\.youtube\.com/watch\?v=.*', media_url):
+ entries.append(self.url_result(media_url, 'Youtube'))
+ else:
+ entries.append({
+ 'id': video_id,
+ 'url': media_url,
+ 'title': title,
+ })
+
+ return self.playlist_result(entries, video_id)
diff --git a/yt_dlp/extractor/qdance.py b/yt_dlp/extractor/qdance.py
new file mode 100644
index 0000000..934ebbf
--- /dev/null
+++ b/yt_dlp/extractor/qdance.py
@@ -0,0 +1,171 @@
+import json
+import time
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ jwt_decode_hs256,
+ str_or_none,
+ traverse_obj,
+ try_call,
+ url_or_none,
+)
+
+
+class QDanceIE(InfoExtractor):
+ _NETRC_MACHINE = 'qdance'
+ _VALID_URL = r'https?://(?:www\.)?q-dance\.com/network/(?:library|live)/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'note': 'vod',
+ 'url': 'https://www.q-dance.com/network/library/146542138',
+ 'info_dict': {
+ 'id': '146542138',
+ 'ext': 'mp4',
+ 'title': 'Sound Rush [LIVE] | Defqon.1 Weekend Festival 2022 | Friday | RED',
+ 'display_id': 'sound-rush-live-v3-defqon-1-weekend-festival-2022-friday-red',
+ 'description': 'Relive Defqon.1 - Primal Energy 2022 with the sounds of Sound Rush LIVE at the RED on Friday! 🔥',
+ 'season': 'Defqon.1 Weekend Festival 2022',
+ 'season_id': '31840632',
+ 'series': 'Defqon.1',
+ 'series_id': '31840378',
+ 'thumbnail': 'https://images.q-dance.network/1674829540-20220624171509-220624171509_delio_dn201093-2.jpg',
+ 'availability': 'premium_only',
+ 'duration': 1829,
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'note': 'livestream',
+ 'url': 'https://www.q-dance.com/network/live/149170353',
+ 'info_dict': {
+ 'id': '149170353',
+ 'ext': 'mp4',
+ 'title': r're:^Defqon\.1 2023 - Friday - RED',
+ 'display_id': 'defqon-1-2023-friday-red',
+ 'description': 'md5:3c73fbbd4044e578e696adfc64019163',
+ 'season': 'Defqon.1 Weekend Festival 2023',
+ 'season_id': '141735599',
+ 'series': 'Defqon.1',
+ 'series_id': '31840378',
+ 'thumbnail': 'https://images.q-dance.network/1686849069-area-thumbs_red.png',
+ 'availability': 'subscriber_only',
+ 'live_status': 'is_live',
+ 'channel_id': 'qdancenetwork.video_149170353',
+ },
+ 'skip': 'Completed livestream',
+ }, {
+ 'note': 'vod with alphanumeric id',
+ 'url': 'https://www.q-dance.com/network/library/WhDleSIWSfeT3Q9ObBKBeA',
+ 'info_dict': {
+ 'id': 'WhDleSIWSfeT3Q9ObBKBeA',
+ 'ext': 'mp4',
+ 'title': 'Aftershock I Defqon.1 Weekend Festival 2023 I Sunday I BLUE',
+ 'display_id': 'naam-i-defqon-1-weekend-festival-2023-i-dag-i-podium',
+ 'description': 'Relive Defqon.1 Path of the Warrior with Aftershock at the BLUE 🔥',
+ 'series': 'Defqon.1',
+ 'series_id': '31840378',
+ 'season': 'Defqon.1 Weekend Festival 2023',
+ 'season_id': '141735599',
+ 'duration': 3507,
+ 'availability': 'premium_only',
+ 'thumbnail': 'https://images.q-dance.network/1698158361-230625-135716-defqon-1-aftershock.jpg',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://www.q-dance.com/network/library/-uRFKXwmRZGVnve7av9uqA',
+ 'only_matching': True,
+ }]
+
+ _access_token = None
+ _refresh_token = None
+
+ def _call_login_api(self, data, note='Logging in'):
+ login = self._download_json(
+ 'https://members.id-t.com/api/auth/login', None, note, headers={
+ 'content-type': 'application/json',
+ 'brand': 'qdance',
+ 'origin': 'https://www.q-dance.com',
+ 'referer': 'https://www.q-dance.com/',
+ }, data=json.dumps(data, separators=(',', ':')).encode(),
+ expected_status=lambda x: True)
+
+ tokens = traverse_obj(login, ('data', {
+ '_id-t-accounts-token': ('accessToken', {str}),
+ '_id-t-accounts-refresh': ('refreshToken', {str}),
+ '_id-t-accounts-id-token': ('idToken', {str}),
+ }))
+
+ if not tokens.get('_id-t-accounts-token'):
+ error = ': '.join(traverse_obj(login, ('error', ('code', 'message'), {str})))
+ if 'validation_error' not in error:
+ raise ExtractorError(f'Q-Dance API said "{error}"')
+ msg = 'Invalid username or password' if 'email' in data else 'Refresh token has expired'
+ raise ExtractorError(msg, expected=True)
+
+ for name, value in tokens.items():
+ self._set_cookie('.q-dance.com', name, value)
+
+ def _perform_login(self, username, password):
+ self._call_login_api({'email': username, 'password': password})
+
+ def _real_initialize(self):
+ cookies = self._get_cookies('https://www.q-dance.com/')
+ self._refresh_token = try_call(lambda: cookies['_id-t-accounts-refresh'].value)
+ self._access_token = try_call(lambda: cookies['_id-t-accounts-token'].value)
+ if not self._access_token:
+ self.raise_login_required()
+
+ def _get_auth(self):
+ if (try_call(lambda: jwt_decode_hs256(self._access_token)['exp']) or 0) <= int(time.time() - 120):
+ if not self._refresh_token:
+ raise ExtractorError(
+ 'Cannot refresh access token, login with yt-dlp or refresh cookies in browser')
+ self._call_login_api({'refreshToken': self._refresh_token}, note='Refreshing access token')
+ self._real_initialize()
+
+ return {'Authorization': self._access_token}
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ data = self._search_nuxt_data(webpage, video_id, traverse=('data', 0, 'data'))
+
+ def extract_availability(level):
+ level = int_or_none(level) or 0
+ return self._availability(
+ needs_premium=(level >= 20), needs_subscription=(level >= 15), needs_auth=True)
+
+ info = traverse_obj(data, {
+ 'title': ('title', {str.strip}),
+ 'description': ('description', {str.strip}),
+ 'display_id': ('slug', {str}),
+ 'thumbnail': ('thumbnail', {url_or_none}),
+ 'duration': ('durationInSeconds', {int_or_none}, {lambda x: x or None}),
+ 'availability': ('subscription', 'level', {extract_availability}),
+ 'is_live': ('type', {lambda x: x.lower() == 'live'}),
+ 'artist': ('acts', ..., {str}),
+ 'series': ('event', 'title', {str.strip}),
+ 'series_id': ('event', 'id', {str_or_none}),
+ 'season': ('eventEdition', 'title', {str.strip}),
+ 'season_id': ('eventEdition', 'id', {str_or_none}),
+ 'channel_id': ('pubnub', 'channelName', {str}),
+ })
+
+ stream = self._download_json(
+ f'https://dc9h6qmsoymbq.cloudfront.net/api/content/videos/{video_id}/url',
+ video_id, headers=self._get_auth(), expected_status=401)
+
+ m3u8_url = traverse_obj(stream, ('data', 'url', {url_or_none}))
+ if not m3u8_url and traverse_obj(stream, ('error', 'code')) == 'unauthorized':
+ raise ExtractorError('Your account does not have access to this content', expected=True)
+
+ formats = self._extract_m3u8_formats(
+ m3u8_url, video_id, fatal=False, live=True) if m3u8_url else []
+ if not formats:
+ self.raise_no_formats('No active streams found', expected=bool(info.get('is_live')))
+
+ return {
+ **info,
+ 'id': video_id,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/qingting.py b/yt_dlp/extractor/qingting.py
new file mode 100644
index 0000000..aa690d4
--- /dev/null
+++ b/yt_dlp/extractor/qingting.py
@@ -0,0 +1,47 @@
+from .common import InfoExtractor
+
+from ..utils import traverse_obj
+
+
+class QingTingIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.|m\.)?(?:qingting\.fm|qtfm\.cn)/v?channels/(?P<channel>\d+)/programs/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.qingting.fm/channels/378005/programs/22257411/',
+ 'md5': '47e6a94f4e621ed832c316fd1888fb3c',
+ 'info_dict': {
+ 'id': '22257411',
+ 'title': '用了十年才修改,谁在乎教科书?',
+ 'channel_id': '378005',
+ 'channel': '睡前消息',
+ 'uploader': '马督工',
+ 'ext': 'm4a',
+ }
+ }, {
+ 'url': 'https://m.qtfm.cn/vchannels/378005/programs/23023573/',
+ 'md5': '2703120b6abe63b5fa90b975a58f4c0e',
+ 'info_dict': {
+ 'id': '23023573',
+ 'title': '【睡前消息488】重庆山火之后,有图≠真相',
+ 'channel_id': '378005',
+ 'channel': '睡前消息',
+ 'uploader': '马督工',
+ 'ext': 'm4a',
+ }
+ }]
+
+ def _real_extract(self, url):
+ channel_id, pid = self._match_valid_url(url).group('channel', 'id')
+ webpage = self._download_webpage(
+ f'https://m.qtfm.cn/vchannels/{channel_id}/programs/{pid}/', pid)
+ info = self._search_json(r'window\.__initStores\s*=', webpage, 'program info', pid)
+ return {
+ 'id': pid,
+ 'title': traverse_obj(info, ('ProgramStore', 'programInfo', 'title')),
+ 'channel_id': channel_id,
+ 'channel': traverse_obj(info, ('ProgramStore', 'channelInfo', 'title')),
+ 'uploader': traverse_obj(info, ('ProgramStore', 'podcasterInfo', 'podcaster', 'nickname')),
+ 'url': traverse_obj(info, ('ProgramStore', 'programInfo', 'audioUrl')),
+ 'vcodec': 'none',
+ 'acodec': 'm4a',
+ 'ext': 'm4a',
+ }
diff --git a/yt_dlp/extractor/qqmusic.py b/yt_dlp/extractor/qqmusic.py
new file mode 100644
index 0000000..9285825
--- /dev/null
+++ b/yt_dlp/extractor/qqmusic.py
@@ -0,0 +1,365 @@
+import random
+import re
+import time
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ ExtractorError,
+ strip_jsonp,
+ unescapeHTML,
+)
+
+
+class QQMusicIE(InfoExtractor):
+ IE_NAME = 'qqmusic'
+ IE_DESC = 'QQ音乐'
+ _VALID_URL = r'https?://y\.qq\.com/n/yqq/song/(?P<id>[0-9A-Za-z]+)\.html'
+ _TESTS = [{
+ 'url': 'https://y.qq.com/n/yqq/song/004295Et37taLD.html',
+ 'md5': '5f1e6cea39e182857da7ffc5ef5e6bb8',
+ 'info_dict': {
+ 'id': '004295Et37taLD',
+ 'ext': 'mp3',
+ 'title': '可惜没如果',
+ 'release_date': '20141227',
+ 'creator': '林俊杰',
+ 'description': 'md5:d85afb3051952ecc50a1ee8a286d1eac',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ }
+ }, {
+ 'note': 'There is no mp3-320 version of this song.',
+ 'url': 'https://y.qq.com/n/yqq/song/004MsGEo3DdNxV.html',
+ 'md5': 'fa3926f0c585cda0af8fa4f796482e3e',
+ 'info_dict': {
+ 'id': '004MsGEo3DdNxV',
+ 'ext': 'mp3',
+ 'title': '如果',
+ 'release_date': '20050626',
+ 'creator': '李季美',
+ 'description': 'md5:46857d5ed62bc4ba84607a805dccf437',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ }
+ }, {
+ 'note': 'lyrics not in .lrc format',
+ 'url': 'https://y.qq.com/n/yqq/song/001JyApY11tIp6.html',
+ 'info_dict': {
+ 'id': '001JyApY11tIp6',
+ 'ext': 'mp3',
+ 'title': 'Shadows Over Transylvania',
+ 'release_date': '19970225',
+ 'creator': 'Dark Funeral',
+ 'description': 'md5:c9b20210587cbcd6836a1c597bab4525',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ _FORMATS = {
+ 'mp3-320': {'prefix': 'M800', 'ext': 'mp3', 'preference': 40, 'abr': 320},
+ 'mp3-128': {'prefix': 'M500', 'ext': 'mp3', 'preference': 30, 'abr': 128},
+ 'm4a': {'prefix': 'C200', 'ext': 'm4a', 'preference': 10}
+ }
+
+ # Reference: m_r_GetRUin() in top_player.js
+ # http://imgcache.gtimg.cn/music/portal_v3/y/top_player.js
+ @staticmethod
+ def m_r_get_ruin():
+ curMs = int(time.time() * 1000) % 1000
+ return int(round(random.random() * 2147483647) * curMs % 1E10)
+
+ def _real_extract(self, url):
+ mid = self._match_id(url)
+
+ detail_info_page = self._download_webpage(
+ 'http://s.plcloud.music.qq.com/fcgi-bin/fcg_yqq_song_detail_info.fcg?songmid=%s&play=0' % mid,
+ mid, note='Download song detail info',
+ errnote='Unable to get song detail info', encoding='gbk')
+
+ song_name = self._html_search_regex(
+ r"songname:\s*'([^']+)'", detail_info_page, 'song name')
+
+ publish_time = self._html_search_regex(
+ r'发行时间:(\d{4}-\d{2}-\d{2})', detail_info_page,
+ 'publish time', default=None)
+ if publish_time:
+ publish_time = publish_time.replace('-', '')
+
+ singer = self._html_search_regex(
+ r"singer:\s*'([^']+)", detail_info_page, 'singer', default=None)
+
+ lrc_content = self._html_search_regex(
+ r'<div class="content" id="lrc_content"[^<>]*>([^<>]+)</div>',
+ detail_info_page, 'LRC lyrics', default=None)
+ if lrc_content:
+ lrc_content = lrc_content.replace('\\n', '\n')
+
+ thumbnail_url = None
+ albummid = self._search_regex(
+ [r'albummid:\'([0-9a-zA-Z]+)\'', r'"albummid":"([0-9a-zA-Z]+)"'],
+ detail_info_page, 'album mid', default=None)
+ if albummid:
+ thumbnail_url = 'http://i.gtimg.cn/music/photo/mid_album_500/%s/%s/%s.jpg' \
+ % (albummid[-2:-1], albummid[-1], albummid)
+
+ guid = self.m_r_get_ruin()
+
+ vkey = self._download_json(
+ 'http://base.music.qq.com/fcgi-bin/fcg_musicexpress.fcg?json=3&guid=%s' % guid,
+ mid, note='Retrieve vkey', errnote='Unable to get vkey',
+ transform_source=strip_jsonp)['key']
+
+ formats = []
+ for format_id, details in self._FORMATS.items():
+ formats.append({
+ 'url': 'http://cc.stream.qqmusic.qq.com/%s%s.%s?vkey=%s&guid=%s&fromtag=0'
+ % (details['prefix'], mid, details['ext'], vkey, guid),
+ 'format': format_id,
+ 'format_id': format_id,
+ 'quality': details['preference'],
+ 'abr': details.get('abr'),
+ })
+ self._check_formats(formats, mid)
+
+ actual_lrc_lyrics = ''.join(
+ line + '\n' for line in re.findall(
+ r'(?m)^(\[[0-9]{2}:[0-9]{2}(?:\.[0-9]{2,})?\][^\n]*|\[[^\]]*\])', lrc_content))
+
+ info_dict = {
+ 'id': mid,
+ 'formats': formats,
+ 'title': song_name,
+ 'release_date': publish_time,
+ 'creator': singer,
+ 'description': lrc_content,
+ 'thumbnail': thumbnail_url
+ }
+ if actual_lrc_lyrics:
+ info_dict['subtitles'] = {
+ 'origin': [{
+ 'ext': 'lrc',
+ 'data': actual_lrc_lyrics,
+ }]
+ }
+ return info_dict
+
+
+class QQPlaylistBaseIE(InfoExtractor):
+ @staticmethod
+ def qq_static_url(category, mid):
+ return 'http://y.qq.com/y/static/%s/%s/%s/%s.html' % (category, mid[-2], mid[-1], mid)
+
+ def get_singer_all_songs(self, singmid, num):
+ return self._download_webpage(
+ r'https://c.y.qq.com/v8/fcg-bin/fcg_v8_singer_track_cp.fcg', singmid,
+ query={
+ 'format': 'json',
+ 'inCharset': 'utf8',
+ 'outCharset': 'utf-8',
+ 'platform': 'yqq',
+ 'needNewCode': 0,
+ 'singermid': singmid,
+ 'order': 'listen',
+ 'begin': 0,
+ 'num': num,
+ 'songstatus': 1,
+ })
+
+ def get_entries_from_page(self, singmid):
+ entries = []
+
+ default_num = 1
+ json_text = self.get_singer_all_songs(singmid, default_num)
+ json_obj_all_songs = self._parse_json(json_text, singmid)
+
+ if json_obj_all_songs['code'] == 0:
+ total = json_obj_all_songs['data']['total']
+ json_text = self.get_singer_all_songs(singmid, total)
+ json_obj_all_songs = self._parse_json(json_text, singmid)
+
+ for item in json_obj_all_songs['data']['list']:
+ if item['musicData'].get('songmid') is not None:
+ songmid = item['musicData']['songmid']
+ entries.append(self.url_result(
+ r'https://y.qq.com/n/yqq/song/%s.html' % songmid, 'QQMusic', songmid))
+
+ return entries
+
+
+class QQMusicSingerIE(QQPlaylistBaseIE):
+ IE_NAME = 'qqmusic:singer'
+ IE_DESC = 'QQ音乐 - 歌手'
+ _VALID_URL = r'https?://y\.qq\.com/n/yqq/singer/(?P<id>[0-9A-Za-z]+)\.html'
+ _TEST = {
+ 'url': 'https://y.qq.com/n/yqq/singer/001BLpXF2DyJe2.html',
+ 'info_dict': {
+ 'id': '001BLpXF2DyJe2',
+ 'title': '林俊杰',
+ 'description': 'md5:870ec08f7d8547c29c93010899103751',
+ },
+ 'playlist_mincount': 12,
+ }
+
+ def _real_extract(self, url):
+ mid = self._match_id(url)
+
+ entries = self.get_entries_from_page(mid)
+ singer_page = self._download_webpage(url, mid, 'Download singer page')
+ singer_name = self._html_search_regex(
+ r"singername\s*:\s*'(.*?)'", singer_page, 'singer name', default=None)
+ singer_desc = None
+
+ if mid:
+ singer_desc_page = self._download_xml(
+ 'http://s.plcloud.music.qq.com/fcgi-bin/fcg_get_singer_desc.fcg', mid,
+ 'Donwload singer description XML',
+ query={'utf8': 1, 'outCharset': 'utf-8', 'format': 'xml', 'singermid': mid},
+ headers={'Referer': 'https://y.qq.com/n/yqq/singer/'})
+
+ singer_desc = singer_desc_page.find('./data/info/desc').text
+
+ return self.playlist_result(entries, mid, singer_name, singer_desc)
+
+
+class QQMusicAlbumIE(QQPlaylistBaseIE):
+ IE_NAME = 'qqmusic:album'
+ IE_DESC = 'QQ音乐 - 专辑'
+ _VALID_URL = r'https?://y\.qq\.com/n/yqq/album/(?P<id>[0-9A-Za-z]+)\.html'
+
+ _TESTS = [{
+ 'url': 'https://y.qq.com/n/yqq/album/000gXCTb2AhRR1.html',
+ 'info_dict': {
+ 'id': '000gXCTb2AhRR1',
+ 'title': '我们都是这样长大的',
+ 'description': 'md5:179c5dce203a5931970d306aa9607ea6',
+ },
+ 'playlist_count': 4,
+ }, {
+ 'url': 'https://y.qq.com/n/yqq/album/002Y5a3b3AlCu3.html',
+ 'info_dict': {
+ 'id': '002Y5a3b3AlCu3',
+ 'title': '그리고...',
+ 'description': 'md5:a48823755615508a95080e81b51ba729',
+ },
+ 'playlist_count': 8,
+ }]
+
+ def _real_extract(self, url):
+ mid = self._match_id(url)
+
+ album = self._download_json(
+ 'http://i.y.qq.com/v8/fcg-bin/fcg_v8_album_info_cp.fcg?albummid=%s&format=json' % mid,
+ mid, 'Download album page')['data']
+
+ entries = [
+ self.url_result(
+ 'https://y.qq.com/n/yqq/song/' + song['songmid'] + '.html', 'QQMusic', song['songmid']
+ ) for song in album['list']
+ ]
+ album_name = album.get('name')
+ album_detail = album.get('desc')
+ if album_detail is not None:
+ album_detail = album_detail.strip()
+
+ return self.playlist_result(entries, mid, album_name, album_detail)
+
+
+class QQMusicToplistIE(QQPlaylistBaseIE):
+ IE_NAME = 'qqmusic:toplist'
+ IE_DESC = 'QQ音乐 - 排行榜'
+ _VALID_URL = r'https?://y\.qq\.com/n/yqq/toplist/(?P<id>[0-9]+)\.html'
+
+ _TESTS = [{
+ 'url': 'https://y.qq.com/n/yqq/toplist/123.html',
+ 'info_dict': {
+ 'id': '123',
+ 'title': '美国iTunes榜',
+ 'description': 'md5:89db2335fdbb10678dee2d43fe9aba08',
+ },
+ 'playlist_count': 100,
+ }, {
+ 'url': 'https://y.qq.com/n/yqq/toplist/3.html',
+ 'info_dict': {
+ 'id': '3',
+ 'title': '巅峰榜·欧美',
+ 'description': 'md5:5a600d42c01696b26b71f8c4d43407da',
+ },
+ 'playlist_count': 100,
+ }, {
+ 'url': 'https://y.qq.com/n/yqq/toplist/106.html',
+ 'info_dict': {
+ 'id': '106',
+ 'title': '韩国Mnet榜',
+ 'description': 'md5:cb84b325215e1d21708c615cac82a6e7',
+ },
+ 'playlist_count': 50,
+ }]
+
+ def _real_extract(self, url):
+ list_id = self._match_id(url)
+
+ toplist_json = self._download_json(
+ 'http://i.y.qq.com/v8/fcg-bin/fcg_v8_toplist_cp.fcg', list_id,
+ note='Download toplist page',
+ query={'type': 'toplist', 'topid': list_id, 'format': 'json'})
+
+ entries = [self.url_result(
+ 'https://y.qq.com/n/yqq/song/' + song['data']['songmid'] + '.html', 'QQMusic',
+ song['data']['songmid'])
+ for song in toplist_json['songlist']]
+
+ topinfo = toplist_json.get('topinfo', {})
+ list_name = topinfo.get('ListName')
+ list_description = topinfo.get('info')
+ return self.playlist_result(entries, list_id, list_name, list_description)
+
+
+class QQMusicPlaylistIE(QQPlaylistBaseIE):
+ IE_NAME = 'qqmusic:playlist'
+ IE_DESC = 'QQ音乐 - 歌单'
+ _VALID_URL = r'https?://y\.qq\.com/n/yqq/playlist/(?P<id>[0-9]+)\.html'
+
+ _TESTS = [{
+ 'url': 'http://y.qq.com/n/yqq/playlist/3462654915.html',
+ 'info_dict': {
+ 'id': '3462654915',
+ 'title': '韩国5月新歌精选下旬',
+ 'description': 'md5:d2c9d758a96b9888cf4fe82f603121d4',
+ },
+ 'playlist_count': 40,
+ 'skip': 'playlist gone',
+ }, {
+ 'url': 'https://y.qq.com/n/yqq/playlist/1374105607.html',
+ 'info_dict': {
+ 'id': '1374105607',
+ 'title': '易入人心的华语民谣',
+ 'description': '民谣的歌曲易于传唱、、歌词朗朗伤口、旋律简单温馨。属于那种才入耳孔。却上心头的感觉。没有太多的复杂情绪。简单而直接地表达乐者的情绪,就是这样的简单才易入人心。',
+ },
+ 'playlist_count': 20,
+ }]
+
+ def _real_extract(self, url):
+ list_id = self._match_id(url)
+
+ list_json = self._download_json(
+ 'http://i.y.qq.com/qzone-music/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg',
+ list_id, 'Download list page',
+ query={'type': 1, 'json': 1, 'utf8': 1, 'onlysong': 0, 'disstid': list_id},
+ transform_source=strip_jsonp)
+ if not len(list_json.get('cdlist', [])):
+ if list_json.get('code'):
+ raise ExtractorError(
+ 'QQ Music said: error %d in fetching playlist info' % list_json['code'],
+ expected=True)
+ raise ExtractorError('Unable to get playlist info')
+
+ cdlist = list_json['cdlist'][0]
+ entries = [self.url_result(
+ 'https://y.qq.com/n/yqq/song/' + song['songmid'] + '.html', 'QQMusic', song['songmid'])
+ for song in cdlist['songlist']]
+
+ list_name = cdlist.get('dissname')
+ list_description = clean_html(unescapeHTML(cdlist.get('desc')))
+ return self.playlist_result(entries, list_id, list_name, list_description)
diff --git a/yt_dlp/extractor/r7.py b/yt_dlp/extractor/r7.py
new file mode 100644
index 0000000..36f0b52
--- /dev/null
+++ b/yt_dlp/extractor/r7.py
@@ -0,0 +1,112 @@
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class R7IE(InfoExtractor):
+ _WORKING = False
+ _ENABLED = None # XXX: pass through to GenericIE
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:[a-zA-Z]+)\.r7\.com(?:/[^/]+)+/idmedia/|
+ noticias\.r7\.com(?:/[^/]+)+/[^/]+-|
+ player\.r7\.com/video/i/
+ )
+ (?P<id>[\da-f]{24})
+ '''
+ _TESTS = [{
+ 'url': 'http://videos.r7.com/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-/idmedia/54e7050b0cf2ff57e0279389.html',
+ 'md5': '403c4e393617e8e8ddc748978ee8efde',
+ 'info_dict': {
+ 'id': '54e7050b0cf2ff57e0279389',
+ 'ext': 'mp4',
+ 'title': 'Policiais humilham suspeito à beira da morte: "Morre com dignidade"',
+ 'description': 'md5:01812008664be76a6479aa58ec865b72',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 98,
+ 'like_count': int,
+ 'view_count': int,
+ },
+ }, {
+ 'url': 'http://esportes.r7.com/videos/cigano-manda-recado-aos-fas/idmedia/4e176727b51a048ee6646a1b.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://noticias.r7.com/record-news/video/representante-do-instituto-sou-da-paz-fala-sobre-fim-do-estatuto-do-desarmamento-5480fc580cf2285b117f438d/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://player.r7.com/video/i/54e7050b0cf2ff57e0279389?play=true&video=http://vsh.r7.com/54e7050b0cf2ff57e0279389/ER7_RE_BG_MORTE_JOVENS_570kbps_2015-02-2009f17818-cc82-4c8f-86dc-89a66934e633-ATOS_copy.mp4&linkCallback=http://videos.r7.com/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-/idmedia/54e7050b0cf2ff57e0279389.html&thumbnail=http://vtb.r7.com/ER7_RE_BG_MORTE_JOVENS_570kbps_2015-02-2009f17818-cc82-4c8f-86dc-89a66934e633-thumb.jpg&idCategory=192&share=true&layout=full&full=true',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video = self._download_json(
+ 'http://player-api.r7.com/video/i/%s' % video_id, video_id)
+
+ title = video['title']
+
+ formats = []
+ media_url_hls = video.get('media_url_hls')
+ if media_url_hls:
+ formats.extend(self._extract_m3u8_formats(
+ media_url_hls, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ media_url = video.get('media_url')
+ if media_url:
+ f = {
+ 'url': media_url,
+ 'format_id': 'http',
+ }
+ # m3u8 format always matches the http format, let's copy metadata from
+ # one to another
+ m3u8_formats = list(filter(
+ lambda f: f.get('vcodec') != 'none', formats))
+ if len(m3u8_formats) == 1:
+ f_copy = m3u8_formats[0].copy()
+ f_copy.update(f)
+ f_copy['protocol'] = 'http'
+ f = f_copy
+ formats.append(f)
+
+ description = video.get('description')
+ thumbnail = video.get('thumb')
+ duration = int_or_none(video.get('media_duration'))
+ like_count = int_or_none(video.get('likes'))
+ view_count = int_or_none(video.get('views'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'like_count': like_count,
+ 'view_count': view_count,
+ 'formats': formats,
+ }
+
+
+class R7ArticleIE(InfoExtractor):
+ _WORKING = False
+ _ENABLED = None # XXX: pass through to GenericIE
+ _VALID_URL = r'https?://(?:[a-zA-Z]+)\.r7\.com/(?:[^/]+/)+[^/?#&]+-(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://tv.r7.com/record-play/balanco-geral/videos/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-16102015',
+ 'only_matching': True,
+ }
+
+ @classmethod
+ def suitable(cls, url):
+ return False if R7IE.suitable(url) else super(R7ArticleIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ video_id = self._search_regex(
+ r'<div[^>]+(?:id=["\']player-|class=["\']embed["\'][^>]+id=["\'])([\da-f]{24})',
+ webpage, 'video id')
+
+ return self.url_result('http://player.r7.com/video/i/%s' % video_id, R7IE.ie_key())
diff --git a/yt_dlp/extractor/radiko.py b/yt_dlp/extractor/radiko.py
new file mode 100644
index 0000000..f013582
--- /dev/null
+++ b/yt_dlp/extractor/radiko.py
@@ -0,0 +1,261 @@
+import base64
+import random
+import re
+import urllib.parse
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ clean_html,
+ time_seconds,
+ try_call,
+ unified_timestamp,
+ update_url_query,
+)
+from ..utils.traversal import traverse_obj
+
+
+class RadikoBaseIE(InfoExtractor):
+ _GEO_BYPASS = False
+ _FULL_KEY = None
+ _HOSTS_FOR_TIME_FREE_FFMPEG_UNSUPPORTED = (
+ 'https://c-rpaa.smartstream.ne.jp',
+ 'https://si-c-radiko.smartstream.ne.jp',
+ 'https://tf-f-rpaa-radiko.smartstream.ne.jp',
+ 'https://tf-c-rpaa-radiko.smartstream.ne.jp',
+ 'https://si-f-radiko.smartstream.ne.jp',
+ 'https://rpaa.smartstream.ne.jp',
+ )
+ _HOSTS_FOR_TIME_FREE_FFMPEG_SUPPORTED = (
+ 'https://rd-wowza-radiko.radiko-cf.com',
+ 'https://radiko.jp',
+ 'https://f-radiko.smartstream.ne.jp',
+ )
+ # Following URL forcibly connects not Time Free but Live
+ _HOSTS_FOR_LIVE = (
+ 'https://c-radiko.smartstream.ne.jp',
+ )
+
+ def _negotiate_token(self):
+ _, auth1_handle = self._download_webpage_handle(
+ 'https://radiko.jp/v2/api/auth1', None, 'Downloading authentication page',
+ headers={
+ 'x-radiko-app': 'pc_html5',
+ 'x-radiko-app-version': '0.0.1',
+ 'x-radiko-device': 'pc',
+ 'x-radiko-user': 'dummy_user',
+ })
+ auth1_header = auth1_handle.headers
+
+ auth_token = auth1_header['X-Radiko-AuthToken']
+ kl = int(auth1_header['X-Radiko-KeyLength'])
+ ko = int(auth1_header['X-Radiko-KeyOffset'])
+ raw_partial_key = self._extract_full_key()[ko:ko + kl]
+ partial_key = base64.b64encode(raw_partial_key).decode()
+
+ area_id = self._download_webpage(
+ 'https://radiko.jp/v2/api/auth2', None, 'Authenticating',
+ headers={
+ 'x-radiko-device': 'pc',
+ 'x-radiko-user': 'dummy_user',
+ 'x-radiko-authtoken': auth_token,
+ 'x-radiko-partialkey': partial_key,
+ }).split(',')[0]
+
+ if area_id == 'OUT':
+ self.raise_geo_restricted(countries=['JP'])
+
+ auth_data = (auth_token, area_id)
+ self.cache.store('radiko', 'auth_data', auth_data)
+ return auth_data
+
+ def _auth_client(self):
+ cachedata = self.cache.load('radiko', 'auth_data')
+ if cachedata is not None:
+ response = self._download_webpage(
+ 'https://radiko.jp/v2/api/auth_check', None, 'Checking cached token', expected_status=401,
+ headers={'X-Radiko-AuthToken': cachedata[0], 'X-Radiko-AreaId': cachedata[1]})
+ if response == 'OK':
+ return cachedata
+ return self._negotiate_token()
+
+ def _extract_full_key(self):
+ if self._FULL_KEY:
+ return self._FULL_KEY
+
+ jscode = self._download_webpage(
+ 'https://radiko.jp/apps/js/playerCommon.js', None,
+ note='Downloading player js code')
+ full_key = self._search_regex(
+ (r"RadikoJSPlayer\([^,]*,\s*(['\"])pc_html5\1,\s*(['\"])(?P<fullkey>[0-9a-f]+)\2,\s*{"),
+ jscode, 'full key', fatal=False, group='fullkey')
+
+ if full_key:
+ full_key = full_key.encode()
+ else: # use only full key ever known
+ full_key = b'bcd151073c03b352e1ef2fd66c32209da9ca0afa'
+
+ self._FULL_KEY = full_key
+ return full_key
+
+ def _find_program(self, video_id, station, cursor):
+ station_program = self._download_xml(
+ 'https://radiko.jp/v3/program/station/weekly/%s.xml' % station, video_id,
+ note='Downloading radio program for %s station' % station)
+
+ prog = None
+ for p in station_program.findall('.//prog'):
+ ft_str, to_str = p.attrib['ft'], p.attrib['to']
+ ft = unified_timestamp(ft_str, False)
+ to = unified_timestamp(to_str, False)
+ if ft <= cursor and cursor < to:
+ prog = p
+ break
+ if not prog:
+ raise ExtractorError('Cannot identify radio program to download!')
+ assert ft, to
+ return prog, station_program, ft, ft_str, to_str
+
+ def _extract_formats(self, video_id, station, is_onair, ft, cursor, auth_token, area_id, query):
+ m3u8_playlist_data = self._download_xml(
+ f'https://radiko.jp/v3/station/stream/pc_html5/{station}.xml', video_id,
+ note='Downloading stream information')
+
+ formats = []
+ found = set()
+
+ timefree_int = 0 if is_onair else 1
+
+ for element in m3u8_playlist_data.findall(f'.//url[@timefree="{timefree_int}"]/playlist_create_url'):
+ pcu = element.text
+ if pcu in found:
+ continue
+ found.add(pcu)
+ playlist_url = update_url_query(pcu, {
+ 'station_id': station,
+ **query,
+ 'l': '15',
+ 'lsid': ''.join(random.choices('0123456789abcdef', k=32)),
+ 'type': 'b',
+ })
+
+ time_to_skip = None if is_onair else cursor - ft
+
+ domain = urllib.parse.urlparse(playlist_url).netloc
+ subformats = self._extract_m3u8_formats(
+ playlist_url, video_id, ext='m4a',
+ live=True, fatal=False, m3u8_id=domain,
+ note=f'Downloading m3u8 information from {domain}',
+ headers={
+ 'X-Radiko-AreaId': area_id,
+ 'X-Radiko-AuthToken': auth_token,
+ })
+ for sf in subformats:
+ if (is_onair ^ pcu.startswith(self._HOSTS_FOR_LIVE)) or (
+ not is_onair and pcu.startswith(self._HOSTS_FOR_TIME_FREE_FFMPEG_UNSUPPORTED)):
+ sf['preference'] = -100
+ sf['format_note'] = 'not preferred'
+ if not is_onair and timefree_int == 1 and time_to_skip:
+ sf['downloader_options'] = {'ffmpeg_args': ['-ss', str(time_to_skip)]}
+ formats.extend(subformats)
+
+ return formats
+
+ def _extract_performers(self, prog):
+ return traverse_obj(prog, (
+ 'pfm/text()', ..., {lambda x: re.split(r'[//、 ,,]', x)}, ..., {str.strip})) or None
+
+
+class RadikoIE(RadikoBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?radiko\.jp/#!/ts/(?P<station>[A-Z0-9-]+)/(?P<id>\d+)'
+
+ _TESTS = [{
+ # QRR (文化放送) station provides <desc>
+ 'url': 'https://radiko.jp/#!/ts/QRR/20210425101300',
+ 'only_matching': True,
+ }, {
+ # FMT (TOKYO FM) station does not provide <desc>
+ 'url': 'https://radiko.jp/#!/ts/FMT/20210810150000',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://radiko.jp/#!/ts/JOAK-FM/20210509090000',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ station, video_id = self._match_valid_url(url).groups()
+ vid_int = unified_timestamp(video_id, False)
+ prog, station_program, ft, radio_begin, radio_end = self._find_program(video_id, station, vid_int)
+
+ auth_token, area_id = self._auth_client()
+
+ return {
+ 'id': video_id,
+ 'title': try_call(lambda: prog.find('title').text),
+ 'cast': self._extract_performers(prog),
+ 'description': clean_html(try_call(lambda: prog.find('info').text)),
+ 'uploader': try_call(lambda: station_program.find('.//name').text),
+ 'uploader_id': station,
+ 'timestamp': vid_int,
+ 'duration': try_call(lambda: unified_timestamp(radio_end, False) - unified_timestamp(radio_begin, False)),
+ 'is_live': True,
+ 'formats': self._extract_formats(
+ video_id=video_id, station=station, is_onair=False,
+ ft=ft, cursor=vid_int, auth_token=auth_token, area_id=area_id,
+ query={
+ 'start_at': radio_begin,
+ 'ft': radio_begin,
+ 'end_at': radio_end,
+ 'to': radio_end,
+ 'seek': video_id
+ }
+ ),
+ }
+
+
+class RadikoRadioIE(RadikoBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?radiko\.jp/#!/live/(?P<id>[A-Z0-9-]+)'
+
+ _TESTS = [{
+ # QRR (文化放送) station provides <desc>
+ 'url': 'https://radiko.jp/#!/live/QRR',
+ 'only_matching': True,
+ }, {
+ # FMT (TOKYO FM) station does not provide <desc>
+ 'url': 'https://radiko.jp/#!/live/FMT',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://radiko.jp/#!/live/JOAK-FM',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ station = self._match_id(url)
+ self.report_warning('Downloader will not stop at the end of the program! Press Ctrl+C to stop')
+
+ auth_token, area_id = self._auth_client()
+ # get current time in JST (GMT+9:00 w/o DST)
+ vid_now = time_seconds(hours=9)
+
+ prog, station_program, ft, _, _ = self._find_program(station, station, vid_now)
+
+ title = prog.find('title').text
+ description = clean_html(prog.find('info').text)
+ station_name = station_program.find('.//name').text
+
+ formats = self._extract_formats(
+ video_id=station, station=station, is_onair=True,
+ ft=ft, cursor=vid_now, auth_token=auth_token, area_id=area_id,
+ query={})
+
+ return {
+ 'id': station,
+ 'title': title,
+ 'cast': self._extract_performers(prog),
+ 'description': description,
+ 'uploader': station_name,
+ 'uploader_id': station,
+ 'timestamp': ft,
+ 'formats': formats,
+ 'is_live': True,
+ }
diff --git a/yt_dlp/extractor/radiocanada.py b/yt_dlp/extractor/radiocanada.py
new file mode 100644
index 0000000..1a5a635
--- /dev/null
+++ b/yt_dlp/extractor/radiocanada.py
@@ -0,0 +1,165 @@
+from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ unified_strdate,
+)
+
+
+class RadioCanadaIE(InfoExtractor):
+ IE_NAME = 'radiocanada'
+ _VALID_URL = r'(?:radiocanada:|https?://ici\.radio-canada\.ca/widgets/mediaconsole/)(?P<app_code>[^:/]+)[:/](?P<id>[0-9]+)'
+ _TESTS = [
+ {
+ 'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272',
+ 'info_dict': {
+ 'id': '7184272',
+ 'ext': 'mp4',
+ 'title': 'Le parcours du tireur capté sur vidéo',
+ 'description': 'Images des caméras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa',
+ 'upload_date': '20141023',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ },
+ {
+ # empty Title
+ 'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7754998/',
+ 'info_dict': {
+ 'id': '7754998',
+ 'ext': 'mp4',
+ 'title': 'letelejournal22h',
+ 'description': 'INTEGRALE WEB 22H-TJ',
+ 'upload_date': '20170720',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ {
+ # with protectionType but not actually DRM protected
+ 'url': 'radiocanada:toutv:140872',
+ 'info_dict': {
+ 'id': '140872',
+ 'title': 'Épisode 1',
+ 'series': 'District 31',
+ },
+ 'only_matching': True,
+ }
+ ]
+ _GEO_COUNTRIES = ['CA']
+ _access_token = None
+ _claims = None
+
+ def _call_api(self, path, video_id=None, app_code=None, query=None):
+ if not query:
+ query = {}
+ query.update({
+ 'client_key': '773aea60-0e80-41bb-9c7f-e6d7c3ad17fb',
+ 'output': 'json',
+ })
+ if video_id:
+ query.update({
+ 'appCode': app_code,
+ 'idMedia': video_id,
+ })
+ if self._access_token:
+ query['access_token'] = self._access_token
+ try:
+ return self._download_json(
+ 'https://services.radio-canada.ca/media/' + path, video_id, query=query)
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status in (401, 422):
+ data = self._parse_json(e.cause.response.read().decode(), None)
+ error = data.get('error_description') or data['errorMessage']['text']
+ raise ExtractorError(error, expected=True)
+ raise
+
+ def _extract_info(self, app_code, video_id):
+ metas = self._call_api('meta/v1/index.ashx', video_id, app_code)['Metas']
+
+ def get_meta(name):
+ for meta in metas:
+ if meta.get('name') == name:
+ text = meta.get('text')
+ if text:
+ return text
+
+ # protectionType does not necessarily mean the video is DRM protected (see
+ # https://github.com/ytdl-org/youtube-dl/pull/18609).
+ if get_meta('protectionType'):
+ self.report_warning('This video is probably DRM protected.')
+
+ query = {
+ 'connectionType': 'hd',
+ 'deviceType': 'ipad',
+ 'multibitrate': 'true',
+ }
+ if self._claims:
+ query['claims'] = self._claims
+ v_data = self._call_api('validation/v2/', video_id, app_code, query)
+ v_url = v_data.get('url')
+ if not v_url:
+ error = v_data['message']
+ if error == "Le contenu sélectionné n'est pas disponible dans votre pays":
+ raise self.raise_geo_restricted(error, self._GEO_COUNTRIES)
+ if error == 'Le contenu sélectionné est disponible seulement en premium':
+ self.raise_login_required(error)
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, error), expected=True)
+ formats = self._extract_m3u8_formats(v_url, video_id, 'mp4')
+
+ subtitles = {}
+ closed_caption_url = get_meta('closedCaption') or get_meta('closedCaptionHTML5')
+ if closed_caption_url:
+ subtitles['fr'] = [{
+ 'url': closed_caption_url,
+ 'ext': determine_ext(closed_caption_url, 'vtt'),
+ }]
+
+ return {
+ 'id': video_id,
+ 'title': get_meta('Title') or get_meta('AV-nomEmission'),
+ 'description': get_meta('Description') or get_meta('ShortDescription'),
+ 'thumbnail': get_meta('imageHR') or get_meta('imageMR') or get_meta('imageBR'),
+ 'duration': int_or_none(get_meta('length')),
+ 'series': get_meta('Emission'),
+ 'season_number': int_or_none('SrcSaison'),
+ 'episode_number': int_or_none('SrcEpisode'),
+ 'upload_date': unified_strdate(get_meta('Date')),
+ 'subtitles': subtitles,
+ 'formats': formats,
+ }
+
+ def _real_extract(self, url):
+ return self._extract_info(*self._match_valid_url(url).groups())
+
+
+class RadioCanadaAudioVideoIE(InfoExtractor):
+ IE_NAME = 'radiocanada:audiovideo'
+ _VALID_URL = r'https?://ici\.radio-canada\.ca/([^/]+/)*media-(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'http://ici.radio-canada.ca/audio-video/media-7527184/barack-obama-au-vietnam',
+ 'info_dict': {
+ 'id': '7527184',
+ 'ext': 'mp4',
+ 'title': 'Barack Obama au Vietnam',
+ 'description': 'Les États-Unis lèvent l\'embargo sur la vente d\'armes qui datait de la guerre du Vietnam',
+ 'upload_date': '20160523',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://ici.radio-canada.ca/info/videos/media-7527184/barack-obama-au-vietnam',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ return self.url_result('radiocanada:medianet:%s' % self._match_id(url))
diff --git a/yt_dlp/extractor/radiocomercial.py b/yt_dlp/extractor/radiocomercial.py
new file mode 100644
index 0000000..38f8cf7
--- /dev/null
+++ b/yt_dlp/extractor/radiocomercial.py
@@ -0,0 +1,154 @@
+import itertools
+
+from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ ExtractorError,
+ extract_attributes,
+ get_element_by_class,
+ get_element_html_by_class,
+ get_element_text_and_html_by_tag,
+ get_elements_html_by_class,
+ int_or_none,
+ join_nonempty,
+ try_call,
+ unified_strdate,
+ update_url,
+ urljoin
+)
+from ..utils.traversal import traverse_obj
+
+
+class RadioComercialIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?radiocomercial\.pt/podcasts/[^/?#]+/t?(?P<season>\d+)/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://radiocomercial.pt/podcasts/o-homem-que-mordeu-o-cao/t6/taylor-swift-entranhando-se-que-nem-uma-espada-no-ventre-dos-fas#page-content-wrapper',
+ 'md5': '5f4fe8e485b29d2e8fd495605bc2c7e4',
+ 'info_dict': {
+ 'id': 'taylor-swift-entranhando-se-que-nem-uma-espada-no-ventre-dos-fas',
+ 'ext': 'mp3',
+ 'title': 'Taylor Swift entranhando-se que nem uma espada no ventre dos fãs.',
+ 'release_date': '20231025',
+ 'thumbnail': r're:https://radiocomercial.pt/upload/[^.]+.jpg',
+ 'season': 'Season 6',
+ 'season_number': 6,
+ }
+ }, {
+ 'url': 'https://radiocomercial.pt/podcasts/convenca-me-num-minuto/t3/convenca-me-num-minuto-que-os-lobisomens-existem',
+ 'md5': '47e96c273aef96a8eb160cd6cf46d782',
+ 'info_dict': {
+ 'id': 'convenca-me-num-minuto-que-os-lobisomens-existem',
+ 'ext': 'mp3',
+ 'title': 'Convença-me num minuto que os lobisomens existem',
+ 'release_date': '20231026',
+ 'thumbnail': r're:https://radiocomercial.pt/upload/[^.]+.jpg',
+ 'season': 'Season 3',
+ 'season_number': 3,
+ }
+ }, {
+ 'url': 'https://radiocomercial.pt/podcasts/inacreditavel-by-ines-castel-branco/t2/o-desastre-de-aviao',
+ 'md5': '69be64255420fec23b7259955d771e54',
+ 'info_dict': {
+ 'id': 'o-desastre-de-aviao',
+ 'ext': 'mp3',
+ 'title': 'O desastre de avião',
+ 'description': 'md5:8a82beeb372641614772baab7246245f',
+ 'release_date': '20231101',
+ 'thumbnail': r're:https://radiocomercial.pt/upload/[^.]+.jpg',
+ 'season': 'Season 2',
+ 'season_number': 2,
+ },
+ 'params': {
+ # inconsistant md5
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://radiocomercial.pt/podcasts/tnt-todos-no-top/2023/t-n-t-29-de-outubro',
+ 'md5': '91d32d4d4b1407272068b102730fc9fa',
+ 'info_dict': {
+ 'id': 't-n-t-29-de-outubro',
+ 'ext': 'mp3',
+ 'title': 'T.N.T 29 de outubro',
+ 'release_date': '20231029',
+ 'thumbnail': r're:https://radiocomercial.pt/upload/[^.]+.jpg',
+ 'season': 'Season 2023',
+ 'season_number': 2023,
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id, season = self._match_valid_url(url).group('id', 'season')
+ webpage = self._download_webpage(url, video_id)
+ return {
+ 'id': video_id,
+ 'title': self._html_extract_title(webpage),
+ 'description': self._og_search_description(webpage, default=None),
+ 'release_date': unified_strdate(get_element_by_class(
+ 'date', get_element_html_by_class('descriptions', webpage) or '')),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'season_number': int_or_none(season),
+ 'url': extract_attributes(get_element_html_by_class('audiofile', webpage) or '').get('href'),
+ }
+
+
+class RadioComercialPlaylistIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?radiocomercial\.pt/podcasts/(?P<id>[\w-]+)(?:/t?(?P<season>\d+))?/?(?:$|[?#])'
+ _TESTS = [{
+ 'url': 'https://radiocomercial.pt/podcasts/convenca-me-num-minuto/t3',
+ 'info_dict': {
+ 'id': 'convenca-me-num-minuto_t3',
+ 'title': 'Convença-me num Minuto - Temporada 3',
+ },
+ 'playlist_mincount': 32
+ }, {
+ 'url': 'https://radiocomercial.pt/podcasts/o-homem-que-mordeu-o-cao',
+ 'info_dict': {
+ 'id': 'o-homem-que-mordeu-o-cao',
+ 'title': 'O Homem Que Mordeu o Cão',
+ },
+ 'playlist_mincount': 19
+ }, {
+ 'url': 'https://radiocomercial.pt/podcasts/as-minhas-coisas-favoritas',
+ 'info_dict': {
+ 'id': 'as-minhas-coisas-favoritas',
+ 'title': 'As Minhas Coisas Favoritas',
+ },
+ 'playlist_mincount': 131
+ }, {
+ 'url': 'https://radiocomercial.pt/podcasts/tnt-todos-no-top/t2023',
+ 'info_dict': {
+ 'id': 'tnt-todos-no-top_t2023',
+ 'title': 'TNT - Todos No Top - Temporada 2023',
+ },
+ 'playlist_mincount': 39
+ }]
+
+ def _entries(self, url, playlist_id):
+ for page in itertools.count(1):
+ try:
+ webpage = self._download_webpage(
+ f'{url}/{page}', playlist_id, f'Downloading page {page}')
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 404:
+ break
+ raise
+
+ episodes = get_elements_html_by_class('tm-ouvir-podcast', webpage)
+ if not episodes:
+ break
+ for url_path in traverse_obj(episodes, (..., {extract_attributes}, 'href')):
+ episode_url = urljoin(url, url_path)
+ if RadioComercialIE.suitable(episode_url):
+ yield episode_url
+
+ def _real_extract(self, url):
+ podcast, season = self._match_valid_url(url).group('id', 'season')
+ playlist_id = join_nonempty(podcast, season, delim='_t')
+ url = update_url(url, query=None, fragment=None)
+ webpage = self._download_webpage(url, playlist_id)
+
+ name = try_call(lambda: get_element_text_and_html_by_tag('h1', webpage)[0])
+ title = name if name == season else join_nonempty(name, season, delim=' - Temporada ')
+
+ return self.playlist_from_matches(
+ self._entries(url, playlist_id), playlist_id, title, ie=RadioComercialIE)
diff --git a/yt_dlp/extractor/radiode.py b/yt_dlp/extractor/radiode.py
new file mode 100644
index 0000000..7262078
--- /dev/null
+++ b/yt_dlp/extractor/radiode.py
@@ -0,0 +1,50 @@
+from .common import InfoExtractor
+
+
+class RadioDeIE(InfoExtractor):
+ _WORKING = False
+ IE_NAME = 'radio.de'
+ _VALID_URL = r'https?://(?P<id>.+?)\.(?:radio\.(?:de|at|fr|pt|es|pl|it)|rad\.io)'
+ _TEST = {
+ 'url': 'http://ndr2.radio.de/',
+ 'info_dict': {
+ 'id': 'ndr2',
+ 'ext': 'mp3',
+ 'title': 're:^NDR 2 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'description': 'md5:591c49c702db1a33751625ebfb67f273',
+ 'thumbnail': r're:^https?://.*\.png',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }
+
+ def _real_extract(self, url):
+ radio_id = self._match_id(url)
+ webpage = self._download_webpage(url, radio_id)
+ jscode = self._search_regex(
+ r"'components/station/stationService':\s*\{\s*'?station'?:\s*(\{.*?\s*\}),\n",
+ webpage, 'broadcast')
+
+ broadcast = self._parse_json(jscode, radio_id)
+ title = broadcast['name']
+ description = broadcast.get('description') or broadcast.get('shortDescription')
+ thumbnail = broadcast.get('picture4Url') or broadcast.get('picture4TransUrl') or broadcast.get('logo100x100')
+
+ formats = [{
+ 'url': stream['streamUrl'],
+ 'ext': stream['streamContentFormat'].lower(),
+ 'acodec': stream['streamContentFormat'],
+ 'abr': stream['bitRate'],
+ 'asr': stream['sampleRate']
+ } for stream in broadcast['streamUrls']]
+
+ return {
+ 'id': radio_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'is_live': True,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/radiofrance.py b/yt_dlp/extractor/radiofrance.py
new file mode 100644
index 0000000..6bd6fe9
--- /dev/null
+++ b/yt_dlp/extractor/radiofrance.py
@@ -0,0 +1,473 @@
+import itertools
+import re
+import urllib.parse
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ join_nonempty,
+ js_to_json,
+ parse_duration,
+ strftime_or_none,
+ traverse_obj,
+ unified_strdate,
+ urljoin,
+)
+
+
+class RadioFranceIE(InfoExtractor):
+ _VALID_URL = r'^https?://maison\.radiofrance\.fr/radiovisions/(?P<id>[^?#]+)'
+ IE_NAME = 'radiofrance'
+
+ _TEST = {
+ 'url': 'http://maison.radiofrance.fr/radiovisions/one-one',
+ 'md5': 'bdbb28ace95ed0e04faab32ba3160daf',
+ 'info_dict': {
+ 'id': 'one-one',
+ 'ext': 'ogg',
+ 'title': 'One to one',
+ 'description': "Plutôt que d'imaginer la radio de demain comme technologie ou comme création de contenu, je veux montrer que quelles que soient ses évolutions, j'ai l'intime conviction que la radio continuera d'être un grand média de proximité pour les auditeurs.",
+ 'uploader': 'Thomas Hercouët',
+ },
+ }
+
+ def _real_extract(self, url):
+ m = self._match_valid_url(url)
+ video_id = m.group('id')
+
+ webpage = self._download_webpage(url, video_id)
+ title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title')
+ description = self._html_search_regex(
+ r'<div class="bloc_page_wrapper"><div class="text">(.*?)</div>',
+ webpage, 'description', fatal=False)
+ uploader = self._html_search_regex(
+ r'<div class="credit">&nbsp;&nbsp;&copy;&nbsp;(.*?)</div>',
+ webpage, 'uploader', fatal=False)
+
+ formats_str = self._html_search_regex(
+ r'class="jp-jplayer[^"]*" data-source="([^"]+)">',
+ webpage, 'audio URLs')
+ formats = [
+ {
+ 'format_id': fm[0],
+ 'url': fm[1],
+ 'vcodec': 'none',
+ 'quality': i,
+ }
+ for i, fm in
+ enumerate(re.findall(r"([a-z0-9]+)\s*:\s*'([^']+)'", formats_str))
+ ]
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'description': description,
+ 'uploader': uploader,
+ }
+
+
+class RadioFranceBaseIE(InfoExtractor):
+ _VALID_URL_BASE = r'https?://(?:www\.)?radiofrance\.fr'
+
+ _STATIONS_RE = '|'.join(map(re.escape, (
+ 'franceculture',
+ 'franceinfo',
+ 'franceinter',
+ 'francemusique',
+ 'fip',
+ 'mouv',
+ )))
+
+ def _extract_data_from_webpage(self, webpage, display_id, key):
+ return traverse_obj(self._search_json(
+ r'\bconst\s+data\s*=', webpage, key, display_id,
+ contains_pattern=r'\[\{(?s:.+)\}\]', transform_source=js_to_json),
+ (..., 'data', key, {dict}), get_all=False) or {}
+
+
+class FranceCultureIE(RadioFranceBaseIE):
+ _VALID_URL = rf'''(?x)
+ {RadioFranceBaseIE._VALID_URL_BASE}
+ /(?:{RadioFranceBaseIE._STATIONS_RE})
+ /podcasts/(?:[^?#]+/)?(?P<display_id>[^?#]+)-(?P<id>\d{{6,}})(?:$|[?#])
+ '''
+
+ _TESTS = [
+ {
+ 'url': 'https://www.radiofrance.fr/franceculture/podcasts/science-en-questions/la-physique-d-einstein-aiderait-elle-a-comprendre-le-cerveau-8440487',
+ 'info_dict': {
+ 'id': '8440487',
+ 'display_id': 'la-physique-d-einstein-aiderait-elle-a-comprendre-le-cerveau',
+ 'ext': 'mp3',
+ 'title': 'La physique d’Einstein aiderait-elle à comprendre le cerveau ?',
+ 'description': 'Existerait-il un pont conceptuel entre la physique de l’espace-temps et les neurosciences ?',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ 'upload_date': '20220514',
+ 'duration': 2750,
+ },
+ },
+ {
+ 'url': 'https://www.radiofrance.fr/franceinter/podcasts/le-7-9-30/le-7-9-30-du-vendredi-10-mars-2023-2107675',
+ 'info_dict': {
+ 'id': '2107675',
+ 'display_id': 'le-7-9-30-du-vendredi-10-mars-2023',
+ 'title': 'Inflation alimentaire : comment en sortir ? - Régis Debray et Claude Grange - Cybèle Idelot',
+ 'description': 'md5:36ee74351ede77a314fdebb94026b916',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ 'upload_date': '20230310',
+ 'duration': 8977,
+ 'ext': 'mp3',
+ },
+ },
+ {
+ 'url': 'https://www.radiofrance.fr/franceinter/podcasts/la-rafle-du-vel-d-hiv-une-affaire-d-etat/les-racines-du-crime-episode-1-3715507',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.radiofrance.fr/franceinfo/podcasts/le-billet-sciences/sante-bientot-un-vaccin-contre-l-asthme-allergique-3057200',
+ 'only_matching': True,
+ }
+ ]
+
+ def _real_extract(self, url):
+ video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
+ webpage = self._download_webpage(url, display_id)
+
+ # _search_json_ld doesn't correctly handle this. See https://github.com/yt-dlp/yt-dlp/pull/3874#discussion_r891903846
+ video_data = self._search_json('', webpage, 'audio data', display_id, contains_pattern=r'{\s*"@type"\s*:\s*"AudioObject".+}')
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'url': video_data['contentUrl'],
+ 'vcodec': 'none' if video_data.get('encodingFormat') == 'mp3' else None,
+ 'duration': parse_duration(video_data.get('duration')),
+ 'title': self._html_search_regex(r'(?s)<h1[^>]*itemprop="[^"]*name[^"]*"[^>]*>(.+?)</h1>',
+ webpage, 'title', default=self._og_search_title(webpage)),
+ 'description': self._html_search_regex(
+ r'(?s)<meta name="description"\s*content="([^"]+)', webpage, 'description', default=None),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'uploader': self._html_search_regex(
+ r'(?s)<span class="author">(.*?)</span>', webpage, 'uploader', default=None),
+ 'upload_date': unified_strdate(self._search_regex(
+ r'"datePublished"\s*:\s*"([^"]+)', webpage, 'timestamp', fatal=False))
+ }
+
+
+class RadioFranceLiveIE(RadioFranceBaseIE):
+ _VALID_URL = rf'''(?x)
+ https?://(?:www\.)?radiofrance\.fr
+ /(?P<id>{RadioFranceBaseIE._STATIONS_RE})
+ /?(?P<substation_id>radio-[\w-]+)?(?:[#?]|$)
+ '''
+
+ _TESTS = [{
+ 'url': 'https://www.radiofrance.fr/franceinter/',
+ 'info_dict': {
+ 'id': 'franceinter',
+ 'title': str,
+ 'live_status': 'is_live',
+ 'ext': 'aac',
+ },
+ 'params': {
+ 'skip_download': 'Livestream',
+ },
+ }, {
+ 'url': 'https://www.radiofrance.fr/franceculture',
+ 'info_dict': {
+ 'id': 'franceculture',
+ 'title': str,
+ 'live_status': 'is_live',
+ 'ext': 'aac',
+ },
+ 'params': {
+ 'skip_download': 'Livestream',
+ },
+ }, {
+ 'url': 'https://www.radiofrance.fr/mouv/radio-musique-kids-family',
+ 'info_dict': {
+ 'id': 'mouv-radio-musique-kids-family',
+ 'title': str,
+ 'live_status': 'is_live',
+ 'ext': 'aac',
+ },
+ 'params': {
+ 'skip_download': 'Livestream',
+ },
+ }, {
+ 'url': 'https://www.radiofrance.fr/mouv/radio-rnb-soul',
+ 'info_dict': {
+ 'id': 'mouv-radio-rnb-soul',
+ 'title': str,
+ 'live_status': 'is_live',
+ 'ext': 'aac',
+ },
+ 'params': {
+ 'skip_download': 'Livestream',
+ },
+ }, {
+ 'url': 'https://www.radiofrance.fr/mouv/radio-musique-mix',
+ 'info_dict': {
+ 'id': 'mouv-radio-musique-mix',
+ 'title': str,
+ 'live_status': 'is_live',
+ 'ext': 'aac',
+ },
+ 'params': {
+ 'skip_download': 'Livestream',
+ },
+ }, {
+ 'url': 'https://www.radiofrance.fr/fip/radio-rock',
+ 'info_dict': {
+ 'id': 'fip-radio-rock',
+ 'title': str,
+ 'live_status': 'is_live',
+ 'ext': 'aac',
+ },
+ 'params': {
+ 'skip_download': 'Livestream',
+ },
+ }, {
+ 'url': 'https://www.radiofrance.fr/mouv',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ station_id, substation_id = self._match_valid_url(url).group('id', 'substation_id')
+
+ if substation_id:
+ webpage = self._download_webpage(url, station_id)
+ api_response = self._extract_data_from_webpage(webpage, station_id, 'webRadioData')
+ else:
+ api_response = self._download_json(
+ f'https://www.radiofrance.fr/{station_id}/api/live', station_id)
+
+ formats, subtitles = [], {}
+ for media_source in traverse_obj(api_response, (('now', None), 'media', 'sources', lambda _, v: v['url'])):
+ if media_source.get('format') == 'hls':
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(media_source['url'], station_id, fatal=False)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+ else:
+ formats.append({
+ 'url': media_source['url'],
+ 'abr': media_source.get('bitrate'),
+ })
+
+ return {
+ 'id': join_nonempty(station_id, substation_id),
+ 'title': traverse_obj(api_response, ('visual', 'legend')) or join_nonempty(
+ ('now', 'firstLine', 'title'), ('now', 'secondLine', 'title'), from_dict=api_response, delim=' - '),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'is_live': True,
+ }
+
+
+class RadioFrancePlaylistBaseIE(RadioFranceBaseIE):
+ """Subclasses must set _METADATA_KEY"""
+
+ def _call_api(self, content_id, cursor, page_num):
+ raise NotImplementedError('This method must be implemented by subclasses')
+
+ def _generate_playlist_entries(self, content_id, content_response):
+ for page_num in itertools.count(2):
+ for entry in content_response['items']:
+ yield self.url_result(
+ f'https://www.radiofrance.fr/{entry["path"]}', url_transparent=True, **traverse_obj(entry, {
+ 'title': 'title',
+ 'description': 'standFirst',
+ 'timestamp': ('publishedDate', {int_or_none}),
+ 'thumbnail': ('visual', 'src'),
+ }))
+
+ next_cursor = traverse_obj(content_response, (('pagination', None), 'next'), get_all=False)
+ if not next_cursor:
+ break
+
+ content_response = self._call_api(content_id, next_cursor, page_num)
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ metadata = self._download_json(
+ 'https://www.radiofrance.fr/api/v2.1/path', display_id,
+ query={'value': urllib.parse.urlparse(url).path})['content']
+
+ content_id = metadata['id']
+
+ return self.playlist_result(
+ self._generate_playlist_entries(content_id, metadata[self._METADATA_KEY]), content_id,
+ display_id=display_id, **{**traverse_obj(metadata, {
+ 'title': 'title',
+ 'description': 'standFirst',
+ 'thumbnail': ('visual', 'src'),
+ }), **traverse_obj(metadata, {
+ 'title': 'name',
+ 'description': 'role',
+ })})
+
+
+class RadioFrancePodcastIE(RadioFrancePlaylistBaseIE):
+ _VALID_URL = rf'''(?x)
+ {RadioFranceBaseIE._VALID_URL_BASE}
+ /(?:{RadioFranceBaseIE._STATIONS_RE})
+ /podcasts/(?P<id>[\w-]+)/?(?:[?#]|$)
+ '''
+
+ _TESTS = [{
+ 'url': 'https://www.radiofrance.fr/franceinfo/podcasts/le-billet-vert',
+ 'info_dict': {
+ 'id': 'eaf6ef81-a980-4f1c-a7d1-8a75ecd54b17',
+ 'display_id': 'le-billet-vert',
+ 'title': 'Le billet sciences',
+ 'description': 'md5:eb1007b34b0c0a680daaa71525bbd4c1',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ },
+ 'playlist_mincount': 11,
+ }, {
+ 'url': 'https://www.radiofrance.fr/franceinter/podcasts/jean-marie-le-pen-l-obsession-nationale',
+ 'info_dict': {
+ 'id': '566fd524-3074-4fbc-ac69-8696f2152a54',
+ 'display_id': 'jean-marie-le-pen-l-obsession-nationale',
+ 'title': 'Jean-Marie Le Pen, l\'obsession nationale',
+ 'description': 'md5:a07c0cfb894f6d07a62d0ad12c4b7d73',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ },
+ 'playlist_count': 7,
+ }, {
+ 'url': 'https://www.radiofrance.fr/franceculture/podcasts/serie-thomas-grjebine',
+ 'info_dict': {
+ 'id': '63c1ddc9-9f15-457a-98b2-411bac63f48d',
+ 'display_id': 'serie-thomas-grjebine',
+ 'title': 'Thomas Grjebine',
+ },
+ 'playlist_count': 1,
+ }, {
+ 'url': 'https://www.radiofrance.fr/fip/podcasts/certains-l-aiment-fip',
+ 'info_dict': {
+ 'id': '143dff38-e956-4a5d-8576-1c0b7242b99e',
+ 'display_id': 'certains-l-aiment-fip',
+ 'title': 'Certains l’aiment Fip',
+ 'description': 'md5:ff974672ba00d4fd5be80fb001c5b27e',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ },
+ 'playlist_mincount': 321,
+ }, {
+ 'url': 'https://www.radiofrance.fr/franceinter/podcasts/le-7-9',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.radiofrance.fr/mouv/podcasts/dirty-mix',
+ 'only_matching': True,
+ }]
+
+ _METADATA_KEY = 'expressions'
+
+ def _call_api(self, podcast_id, cursor, page_num):
+ return self._download_json(
+ f'https://www.radiofrance.fr/api/v2.1/concepts/{podcast_id}/expressions', podcast_id,
+ note=f'Downloading page {page_num}', query={'pageCursor': cursor})
+
+
+class RadioFranceProfileIE(RadioFrancePlaylistBaseIE):
+ _VALID_URL = rf'{RadioFranceBaseIE._VALID_URL_BASE}/personnes/(?P<id>[\w-]+)'
+
+ _TESTS = [{
+ 'url': 'https://www.radiofrance.fr/personnes/thomas-pesquet?p=3',
+ 'info_dict': {
+ 'id': '86c62790-e481-11e2-9f7b-782bcb6744eb',
+ 'display_id': 'thomas-pesquet',
+ 'title': 'Thomas Pesquet',
+ 'description': 'Astronaute à l\'agence spatiale européenne',
+ },
+ 'playlist_mincount': 212,
+ }, {
+ 'url': 'https://www.radiofrance.fr/personnes/eugenie-bastie',
+ 'info_dict': {
+ 'id': '9593050b-0183-4972-a0b5-d8f699079e02',
+ 'display_id': 'eugenie-bastie',
+ 'title': 'Eugénie Bastié',
+ 'description': 'Journaliste et essayiste',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ },
+ 'playlist_mincount': 39,
+ }, {
+ 'url': 'https://www.radiofrance.fr/personnes/lea-salame',
+ 'only_matching': True,
+ }]
+
+ _METADATA_KEY = 'documents'
+
+ def _call_api(self, profile_id, cursor, page_num):
+ resp = self._download_json(
+ f'https://www.radiofrance.fr/api/v2.1/taxonomy/{profile_id}/documents', profile_id,
+ note=f'Downloading page {page_num}', query={
+ 'relation': 'personality',
+ 'cursor': cursor,
+ })
+
+ resp['next'] = traverse_obj(resp, ('pagination', 'next'))
+ return resp
+
+
+class RadioFranceProgramScheduleIE(RadioFranceBaseIE):
+ _VALID_URL = rf'''(?x)
+ {RadioFranceBaseIE._VALID_URL_BASE}
+ /(?P<station>{RadioFranceBaseIE._STATIONS_RE})
+ /grille-programmes(?:\?date=(?P<date>[\d-]+))?
+ '''
+
+ _TESTS = [{
+ 'url': 'https://www.radiofrance.fr/franceinter/grille-programmes?date=17-02-2023',
+ 'info_dict': {
+ 'id': 'franceinter-program-20230217',
+ 'upload_date': '20230217',
+ },
+ 'playlist_count': 25,
+ }, {
+ 'url': 'https://www.radiofrance.fr/franceculture/grille-programmes?date=01-02-2023',
+ 'info_dict': {
+ 'id': 'franceculture-program-20230201',
+ 'upload_date': '20230201',
+ },
+ 'playlist_count': 25,
+ }, {
+ 'url': 'https://www.radiofrance.fr/mouv/grille-programmes?date=19-03-2023',
+ 'info_dict': {
+ 'id': 'mouv-program-20230319',
+ 'upload_date': '20230319',
+ },
+ 'playlist_count': 3,
+ }, {
+ 'url': 'https://www.radiofrance.fr/francemusique/grille-programmes?date=18-03-2023',
+ 'info_dict': {
+ 'id': 'francemusique-program-20230318',
+ 'upload_date': '20230318',
+ },
+ 'playlist_count': 15,
+ }, {
+ 'url': 'https://www.radiofrance.fr/franceculture/grille-programmes',
+ 'only_matching': True,
+ }]
+
+ def _generate_playlist_entries(self, webpage_url, api_response):
+ for entry in traverse_obj(api_response, ('steps', lambda _, v: v['expression']['path'])):
+ yield self.url_result(
+ urljoin(webpage_url, f'/{entry["expression"]["path"]}'), ie=FranceCultureIE,
+ url_transparent=True, **traverse_obj(entry, {
+ 'title': ('expression', 'title'),
+ 'thumbnail': ('expression', 'visual', 'src'),
+ 'timestamp': ('startTime', {int_or_none}),
+ 'series_id': ('concept', 'id'),
+ 'series': ('concept', 'title'),
+ }))
+
+ def _real_extract(self, url):
+ station, date = self._match_valid_url(url).group('station', 'date')
+ webpage = self._download_webpage(url, station)
+ grid_data = self._extract_data_from_webpage(webpage, station, 'grid')
+ upload_date = strftime_or_none(grid_data.get('date'), '%Y%m%d')
+
+ return self.playlist_result(
+ self._generate_playlist_entries(url, grid_data),
+ join_nonempty(station, 'program', upload_date), upload_date=upload_date)
diff --git a/yt_dlp/extractor/radiojavan.py b/yt_dlp/extractor/radiojavan.py
new file mode 100644
index 0000000..b3befae
--- /dev/null
+++ b/yt_dlp/extractor/radiojavan.py
@@ -0,0 +1,81 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ parse_resolution,
+ str_to_int,
+ unified_strdate,
+ urlencode_postdata,
+ urljoin,
+)
+
+
+class RadioJavanIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:www\.)?radiojavan\.com/videos/video/(?P<id>[^/]+)/?'
+ _TEST = {
+ 'url': 'http://www.radiojavan.com/videos/video/chaartaar-ashoobam',
+ 'md5': 'e85208ffa3ca8b83534fca9fe19af95b',
+ 'info_dict': {
+ 'id': 'chaartaar-ashoobam',
+ 'ext': 'mp4',
+ 'title': 'Chaartaar - Ashoobam',
+ 'thumbnail': r're:^https?://.*\.jpe?g$',
+ 'upload_date': '20150215',
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ download_host = self._download_json(
+ 'https://www.radiojavan.com/videos/video_host', video_id,
+ data=urlencode_postdata({'id': video_id}),
+ headers={
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ 'Referer': url,
+ }).get('host', 'https://host1.rjmusicmedia.com')
+
+ webpage = self._download_webpage(url, video_id)
+
+ formats = []
+ for format_id, _, video_path in re.findall(
+ r'RJ\.video(?P<format_id>\d+[pPkK])\s*=\s*(["\'])(?P<url>(?:(?!\2).)+)\2',
+ webpage):
+ f = parse_resolution(format_id)
+ f.update({
+ 'url': urljoin(download_host, video_path),
+ 'format_id': format_id,
+ })
+ formats.append(f)
+
+ title = self._og_search_title(webpage)
+ thumbnail = self._og_search_thumbnail(webpage)
+
+ upload_date = unified_strdate(self._search_regex(
+ r'class="date_added">Date added: ([^<]+)<',
+ webpage, 'upload date', fatal=False))
+
+ view_count = str_to_int(self._search_regex(
+ r'class="views">Plays: ([\d,]+)',
+ webpage, 'view count', fatal=False))
+ like_count = str_to_int(self._search_regex(
+ r'class="rating">([\d,]+) likes',
+ webpage, 'like count', fatal=False))
+ dislike_count = str_to_int(self._search_regex(
+ r'class="rating">([\d,]+) dislikes',
+ webpage, 'dislike count', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ 'view_count': view_count,
+ 'like_count': like_count,
+ 'dislike_count': dislike_count,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/radiokapital.py b/yt_dlp/extractor/radiokapital.py
new file mode 100644
index 0000000..8f9737a
--- /dev/null
+++ b/yt_dlp/extractor/radiokapital.py
@@ -0,0 +1,97 @@
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ traverse_obj,
+ unescapeHTML,
+)
+
+import itertools
+from urllib.parse import urlencode
+
+
+class RadioKapitalBaseIE(InfoExtractor):
+ def _call_api(self, resource, video_id, note='Downloading JSON metadata', qs={}):
+ return self._download_json(
+ f'https://www.radiokapital.pl/wp-json/kapital/v1/{resource}?{urlencode(qs)}',
+ video_id, note=note)
+
+ def _parse_episode(self, data):
+ release = '%s%s%s' % (data['published'][6:11], data['published'][3:6], data['published'][:3])
+ return {
+ '_type': 'url_transparent',
+ 'url': data['mixcloud_url'],
+ 'ie_key': 'Mixcloud',
+ 'title': unescapeHTML(data['title']),
+ 'description': clean_html(data.get('content')),
+ 'tags': traverse_obj(data, ('tags', ..., 'name')),
+ 'release_date': release,
+ 'series': traverse_obj(data, ('show', 'title')),
+ }
+
+
+class RadioKapitalIE(RadioKapitalBaseIE):
+ IE_NAME = 'radiokapital'
+ _VALID_URL = r'https?://(?:www\.)?radiokapital\.pl/shows/[a-z\d-]+/(?P<id>[a-z\d-]+)'
+
+ _TESTS = [{
+ 'url': 'https://radiokapital.pl/shows/tutaj-sa-smoki/5-its-okay-to-be-immaterial',
+ 'info_dict': {
+ 'id': 'radiokapital_radio-kapitał-tutaj-są-smoki-5-its-okay-to-be-immaterial-2021-05-20',
+ 'ext': 'm4a',
+ 'title': '#5: It’s okay to\xa0be\xa0immaterial',
+ 'description': 'md5:2499da5fbfb0e88333b7d37ec8e9e4c4',
+ 'uploader': 'Radio Kapitał',
+ 'uploader_id': 'radiokapital',
+ 'timestamp': 1621640164,
+ 'upload_date': '20210521',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ episode = self._call_api('episodes/%s' % video_id, video_id)
+ return self._parse_episode(episode)
+
+
+class RadioKapitalShowIE(RadioKapitalBaseIE):
+ IE_NAME = 'radiokapital:show'
+ _VALID_URL = r'https?://(?:www\.)?radiokapital\.pl/shows/(?P<id>[a-z\d-]+)/?(?:$|[?#])'
+
+ _TESTS = [{
+ 'url': 'https://radiokapital.pl/shows/wesz',
+ 'info_dict': {
+ 'id': '100',
+ 'title': 'WĘSZ',
+ 'description': 'md5:3a557a1e0f31af612b0dcc85b1e0ca5c',
+ },
+ 'playlist_mincount': 17,
+ }]
+
+ def _get_episode_list(self, series_id, page_no):
+ return self._call_api(
+ 'episodes', series_id,
+ f'Downloading episode list page #{page_no}', qs={
+ 'show': series_id,
+ 'page': page_no,
+ })
+
+ def _entries(self, series_id):
+ for page_no in itertools.count(1):
+ episode_list = self._get_episode_list(series_id, page_no)
+ yield from (self._parse_episode(ep) for ep in episode_list['items'])
+ if episode_list['next'] is None:
+ break
+
+ def _real_extract(self, url):
+ series_id = self._match_id(url)
+
+ show = self._call_api(f'shows/{series_id}', series_id, 'Downloading show metadata')
+ entries = self._entries(series_id)
+ return {
+ '_type': 'playlist',
+ 'entries': entries,
+ 'id': str(show['id']),
+ 'title': show.get('title'),
+ 'description': clean_html(show.get('content')),
+ }
diff --git a/yt_dlp/extractor/radiozet.py b/yt_dlp/extractor/radiozet.py
new file mode 100644
index 0000000..6752017
--- /dev/null
+++ b/yt_dlp/extractor/radiozet.py
@@ -0,0 +1,50 @@
+from .common import InfoExtractor
+from ..utils import (
+ traverse_obj,
+ strip_or_none,
+)
+
+
+class RadioZetPodcastIE(InfoExtractor):
+ _VALID_URL = r'https?://player\.radiozet\.pl\/Podcasty/.*?/(?P<id>.+)'
+ _TEST = {
+ 'url': 'https://player.radiozet.pl/Podcasty/Nie-Ma-Za-Co/O-przedmiotach-szkolnych-ktore-przydaja-sie-w-zyciu',
+ 'md5': 'e03665c316b4fbc5f6a8f232948bbba3',
+ 'info_dict': {
+ 'id': '42154',
+ 'display_id': 'O-przedmiotach-szkolnych-ktore-przydaja-sie-w-zyciu',
+ 'title': 'O przedmiotach szkolnych, które przydają się w życiu',
+ 'description': 'md5:fa72bed49da334b09e5b2f79851f185c',
+ 'release_timestamp': 1592985480,
+ 'ext': 'mp3',
+ 'thumbnail': r're:^https?://.*\.png$',
+ 'duration': 83,
+ 'series': 'Nie Ma Za Co',
+ 'creator': 'Katarzyna Pakosińska',
+ }
+ }
+
+ def _call_api(self, podcast_id, display_id):
+ return self._download_json(
+ f'https://player.radiozet.pl/api/podcasts/getPodcast/(node)/{podcast_id}/(station)/radiozet',
+ display_id)
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ podcast_id = self._html_search_regex(r'<div.*?\sid="player".*?\sdata-id=[\'"]([^\'"]+)[\'"]',
+ webpage, 'podcast id')
+ data = self._call_api(podcast_id, display_id)['data'][0]
+
+ return {
+ 'id': podcast_id,
+ 'display_id': display_id,
+ 'title': strip_or_none(data.get('title')),
+ 'description': strip_or_none(traverse_obj(data, ('program', 'desc'))),
+ 'release_timestamp': data.get('published_date'),
+ 'url': traverse_obj(data, ('player', 'stream')),
+ 'thumbnail': traverse_obj(data, ('program', 'image', 'original')),
+ 'duration': traverse_obj(data, ('player', 'duration')),
+ 'series': strip_or_none(traverse_obj(data, ('program', 'title'))),
+ 'creator': strip_or_none(traverse_obj(data, ('presenter', 0, 'title'))),
+ }
diff --git a/yt_dlp/extractor/radlive.py b/yt_dlp/extractor/radlive.py
new file mode 100644
index 0000000..3c00183
--- /dev/null
+++ b/yt_dlp/extractor/radlive.py
@@ -0,0 +1,180 @@
+import json
+
+from ..utils import (
+ ExtractorError,
+ format_field,
+ traverse_obj,
+ try_get,
+ unified_timestamp
+)
+from .common import InfoExtractor
+
+
+class RadLiveIE(InfoExtractor):
+ IE_NAME = 'radlive'
+ _VALID_URL = r'https?://(?:www\.)?rad\.live/content/(?P<content_type>feature|episode)/(?P<id>[a-f0-9-]+)'
+ _TESTS = [{
+ 'url': 'https://rad.live/content/feature/dc5acfbc-761b-4bec-9564-df999905116a',
+ 'md5': '6219d5d31d52de87d21c9cf5b7cb27ff',
+ 'info_dict': {
+ 'id': 'dc5acfbc-761b-4bec-9564-df999905116a',
+ 'ext': 'mp4',
+ 'title': 'Deathpact - Digital Mirage 2 [Full Set]',
+ 'language': 'en',
+ 'thumbnail': 'https://static.12core.net/cb65ae077a079c68380e38f387fbc438.png',
+ 'description': '',
+ 'release_timestamp': 1600185600.0,
+ 'channel': 'Proximity',
+ 'channel_id': '9ce6dd01-70a4-4d59-afb6-d01f807cd009',
+ 'channel_url': 'https://rad.live/content/channel/9ce6dd01-70a4-4d59-afb6-d01f807cd009',
+ }
+ }, {
+ 'url': 'https://rad.live/content/episode/bbcf66ec-0d02-4ca0-8dc0-4213eb2429bf',
+ 'md5': '40b2175f347592125d93e9a344080125',
+ 'info_dict': {
+ 'id': 'bbcf66ec-0d02-4ca0-8dc0-4213eb2429bf',
+ 'ext': 'mp4',
+ 'title': 'E01: Bad Jokes 1',
+ 'language': 'en',
+ 'thumbnail': 'https://lsp.littlstar.com/channels/WHISTLE/BAD_JOKES/SEASON_1/BAD_JOKES_101/poster.jpg',
+ 'description': 'Bad Jokes - Champions, Adam Pally, Super Troopers, Team Edge and 2Hype',
+ 'episode': 'E01: Bad Jokes 1',
+ 'episode_number': 1,
+ 'episode_id': '336',
+ },
+ }]
+
+ def _real_extract(self, url):
+ content_type, video_id = self._match_valid_url(url).groups()
+
+ webpage = self._download_webpage(url, video_id)
+
+ content_info = json.loads(self._search_regex(
+ r'<script[^>]*type=([\'"])application/json\1[^>]*>(?P<json>{.+?})</script>',
+ webpage, 'video info', group='json'))['props']['pageProps']['initialContentData']
+ video_info = content_info[content_type]
+
+ if not video_info:
+ raise ExtractorError('Unable to extract video info, make sure the URL is valid')
+
+ formats = self._extract_m3u8_formats(video_info['assets']['videos'][0]['url'], video_id)
+
+ data = video_info.get('structured_data', {})
+
+ release_date = unified_timestamp(traverse_obj(data, ('releasedEvent', 'startDate')))
+ channel = next(iter(content_info.get('channels', [])), {})
+ channel_id = channel.get('lrn', '').split(':')[-1] or None
+
+ result = {
+ 'id': video_id,
+ 'title': video_info['title'],
+ 'formats': formats,
+ 'language': traverse_obj(data, ('potentialAction', 'target', 'inLanguage')),
+ 'thumbnail': traverse_obj(data, ('image', 'contentUrl')),
+ 'description': data.get('description'),
+ 'release_timestamp': release_date,
+ 'channel': channel.get('name'),
+ 'channel_id': channel_id,
+ 'channel_url': format_field(channel_id, None, 'https://rad.live/content/channel/%s'),
+
+ }
+ if content_type == 'episode':
+ result.update({
+ # TODO: Get season number when downloading single episode
+ 'episode': video_info.get('title'),
+ 'episode_number': video_info.get('number'),
+ 'episode_id': video_info.get('id'),
+ })
+
+ return result
+
+
+class RadLiveSeasonIE(RadLiveIE): # XXX: Do not subclass from concrete IE
+ IE_NAME = 'radlive:season'
+ _VALID_URL = r'https?://(?:www\.)?rad\.live/content/season/(?P<id>[a-f0-9-]+)'
+ _TESTS = [{
+ 'url': 'https://rad.live/content/season/08a290f7-c9ef-4e22-9105-c255995a2e75',
+ 'md5': '40b2175f347592125d93e9a344080125',
+ 'info_dict': {
+ 'id': '08a290f7-c9ef-4e22-9105-c255995a2e75',
+ 'title': 'Bad Jokes - Season 1',
+ },
+ 'playlist_mincount': 5,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if RadLiveIE.suitable(url) else super(RadLiveSeasonIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ season_id = self._match_id(url)
+ webpage = self._download_webpage(url, season_id)
+
+ content_info = json.loads(self._search_regex(
+ r'<script[^>]*type=([\'"])application/json\1[^>]*>(?P<json>{.+?})</script>',
+ webpage, 'video info', group='json'))['props']['pageProps']['initialContentData']
+ video_info = content_info['season']
+
+ entries = [{
+ '_type': 'url_transparent',
+ 'id': episode['structured_data']['url'].split('/')[-1],
+ 'url': episode['structured_data']['url'],
+ 'series': try_get(content_info, lambda x: x['series']['title']),
+ 'season': video_info['title'],
+ 'season_number': video_info.get('number'),
+ 'season_id': video_info.get('id'),
+ 'ie_key': RadLiveIE.ie_key(),
+ } for episode in video_info['episodes']]
+
+ return self.playlist_result(entries, season_id, video_info.get('title'))
+
+
+class RadLiveChannelIE(RadLiveIE): # XXX: Do not subclass from concrete IE
+ IE_NAME = 'radlive:channel'
+ _VALID_URL = r'https?://(?:www\.)?rad\.live/content/channel/(?P<id>[a-f0-9-]+)'
+ _TESTS = [{
+ 'url': 'https://rad.live/content/channel/5c4d8df4-6fa0-413c-81e3-873479b49274',
+ 'md5': '625156a08b7f2b0b849f234e664457ac',
+ 'info_dict': {
+ 'id': '5c4d8df4-6fa0-413c-81e3-873479b49274',
+ 'title': 'Whistle Sports',
+ },
+ 'playlist_mincount': 7,
+ }]
+
+ _QUERY = '''
+query WebChannelListing ($lrn: ID!) {
+ channel (id:$lrn) {
+ name
+ features {
+ structured_data
+ }
+ }
+}'''
+
+ @classmethod
+ def suitable(cls, url):
+ return False if RadLiveIE.suitable(url) else super(RadLiveChannelIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+
+ graphql = self._download_json(
+ 'https://content.mhq.12core.net/graphql', channel_id,
+ headers={'Content-Type': 'application/json'},
+ data=json.dumps({
+ 'query': self._QUERY,
+ 'variables': {'lrn': f'lrn:12core:media:content:channel:{channel_id}'}
+ }).encode('utf-8'))
+
+ data = traverse_obj(graphql, ('data', 'channel'))
+ if not data:
+ raise ExtractorError('Unable to extract video info, make sure the URL is valid')
+
+ entries = [{
+ '_type': 'url_transparent',
+ 'url': feature['structured_data']['url'],
+ 'ie_key': RadLiveIE.ie_key(),
+ } for feature in data['features']]
+
+ return self.playlist_result(entries, channel_id, data.get('name'))
diff --git a/yt_dlp/extractor/rai.py b/yt_dlp/extractor/rai.py
new file mode 100644
index 0000000..c1fc65c
--- /dev/null
+++ b/yt_dlp/extractor/rai.py
@@ -0,0 +1,816 @@
+import re
+
+from .common import InfoExtractor
+from ..networking import HEADRequest
+from ..utils import (
+ clean_html,
+ determine_ext,
+ ExtractorError,
+ filter_dict,
+ GeoRestrictedError,
+ int_or_none,
+ join_nonempty,
+ parse_duration,
+ remove_start,
+ strip_or_none,
+ traverse_obj,
+ try_get,
+ unified_strdate,
+ unified_timestamp,
+ update_url_query,
+ urljoin,
+ xpath_text,
+)
+
+
+class RaiBaseIE(InfoExtractor):
+ _UUID_RE = r'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}'
+ _GEO_COUNTRIES = ['IT']
+ _GEO_BYPASS = False
+
+ def _fix_m3u8_formats(self, media_url, video_id):
+ fmts = self._extract_m3u8_formats(
+ media_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
+
+ # Fix malformed m3u8 manifests by setting audio-only/video-only formats
+ for f in fmts:
+ if not f.get('acodec'):
+ f['acodec'] = 'mp4a'
+ if not f.get('vcodec'):
+ f['vcodec'] = 'avc1'
+ man_url = f['url']
+ if re.search(r'chunklist(?:_b\d+)*_ao[_.]', man_url): # audio only
+ f['vcodec'] = 'none'
+ elif re.search(r'chunklist(?:_b\d+)*_vo[_.]', man_url): # video only
+ f['acodec'] = 'none'
+ else: # video+audio
+ if f['acodec'] == 'none':
+ f['acodec'] = 'mp4a'
+ if f['vcodec'] == 'none':
+ f['vcodec'] = 'avc1'
+
+ return fmts
+
+ def _extract_relinker_info(self, relinker_url, video_id, audio_only=False):
+ def fix_cdata(s):
+ # remove \r\n\t before and after <![CDATA[ ]]> to avoid
+ # polluted text with xpath_text
+ s = re.sub(r'(\]\]>)[\r\n\t]+(</)', '\\1\\2', s)
+ return re.sub(r'(>)[\r\n\t]+(<!\[CDATA\[)', '\\1\\2', s)
+
+ if not re.match(r'https?://', relinker_url):
+ return {'formats': [{'url': relinker_url}]}
+
+ # set User-Agent to generic 'Rai' to avoid quality filtering from
+ # the media server and get the maximum qualities available
+ relinker = self._download_xml(
+ relinker_url, video_id, note='Downloading XML metadata',
+ transform_source=fix_cdata, query={'output': 64},
+ headers={**self.geo_verification_headers(), 'User-Agent': 'Rai'})
+
+ if xpath_text(relinker, './license_url', default='{}') != '{}':
+ self.report_drm(video_id)
+
+ is_live = xpath_text(relinker, './is_live', default='N') == 'Y'
+ duration = parse_duration(xpath_text(relinker, './duration', default=None))
+ media_url = xpath_text(relinker, './url[@type="content"]', default=None)
+
+ if not media_url:
+ self.raise_no_formats('The relinker returned no media url')
+
+ # geo flag is a bit unreliable and not properly set all the time
+ geoprotection = xpath_text(relinker, './geoprotection', default='N') == 'Y'
+
+ ext = determine_ext(media_url)
+ formats = []
+
+ if ext == 'mp3':
+ formats.append({
+ 'url': media_url,
+ 'vcodec': 'none',
+ 'acodec': 'mp3',
+ 'format_id': 'https-mp3',
+ })
+ elif ext == 'm3u8' or 'format=m3u8' in media_url:
+ formats.extend(self._fix_m3u8_formats(media_url, video_id))
+ elif ext == 'f4m':
+ # very likely no longer needed. Cannot find any url that uses it.
+ manifest_url = update_url_query(
+ media_url.replace('manifest#live_hds.f4m', 'manifest.f4m'),
+ {'hdcore': '3.7.0', 'plugin': 'aasp-3.7.0.39.44'})
+ formats.extend(self._extract_f4m_formats(
+ manifest_url, video_id, f4m_id='hds', fatal=False))
+ elif ext == 'mp4':
+ bitrate = int_or_none(xpath_text(relinker, './bitrate'))
+ formats.append({
+ 'url': media_url,
+ 'tbr': bitrate if bitrate > 0 else None,
+ 'format_id': join_nonempty('https', bitrate, delim='-'),
+ })
+ else:
+ raise ExtractorError('Unrecognized media file found')
+
+ if (not formats and geoprotection is True) or '/video_no_available.mp4' in media_url:
+ self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
+
+ if not audio_only and not is_live:
+ formats.extend(self._create_http_urls(media_url, relinker_url, formats, video_id))
+
+ return filter_dict({
+ 'is_live': is_live,
+ 'duration': duration,
+ 'formats': formats,
+ })
+
+ def _create_http_urls(self, manifest_url, relinker_url, fmts, video_id):
+ _MANIFEST_REG = r'/(?P<id>\w+)(?:_(?P<quality>[\d\,]+))?(?:\.mp4)?(?:\.csmil)?/playlist\.m3u8'
+ _MP4_TMPL = '%s&overrideUserAgentRule=mp4-%s'
+ _QUALITY = {
+ # tbr: w, h
+ 250: [352, 198],
+ 400: [512, 288],
+ 600: [512, 288],
+ 700: [512, 288],
+ 800: [700, 394],
+ 1200: [736, 414],
+ 1500: [920, 518],
+ 1800: [1024, 576],
+ 2400: [1280, 720],
+ 3200: [1440, 810],
+ 3600: [1440, 810],
+ 5000: [1920, 1080],
+ 10000: [1920, 1080],
+ }
+
+ def percentage(number, target, pc=20, roof=125):
+ '''check if the target is in the range of number +/- percent'''
+ if not number or number < 0:
+ return False
+ return abs(target - number) < min(float(number) * float(pc) / 100.0, roof)
+
+ def get_format_info(tbr):
+ import math
+ br = int_or_none(tbr)
+ if len(fmts) == 1 and not br:
+ br = fmts[0].get('tbr')
+ if br and br > 300:
+ tbr = math.floor(br / 100) * 100
+ else:
+ tbr = 250
+
+ # try extracting info from available m3u8 formats
+ format_copy = [None, None]
+ for f in fmts:
+ if f.get('tbr'):
+ if percentage(tbr, f['tbr']):
+ format_copy[0] = f.copy()
+ if [f.get('width'), f.get('height')] == _QUALITY.get(tbr):
+ format_copy[1] = f.copy()
+ format_copy[1]['tbr'] = tbr
+
+ # prefer format with similar bitrate because there might be
+ # multiple video with the same resolution but different bitrate
+ format_copy = format_copy[0] or format_copy[1] or {}
+ return {
+ 'format_id': f'https-{tbr}',
+ 'width': format_copy.get('width'),
+ 'height': format_copy.get('height'),
+ 'tbr': format_copy.get('tbr') or tbr,
+ 'vcodec': format_copy.get('vcodec') or 'avc1',
+ 'acodec': format_copy.get('acodec') or 'mp4a',
+ 'fps': format_copy.get('fps') or 25,
+ } if format_copy else {
+ 'format_id': f'https-{tbr}',
+ 'width': _QUALITY[tbr][0],
+ 'height': _QUALITY[tbr][1],
+ 'tbr': tbr,
+ 'vcodec': 'avc1',
+ 'acodec': 'mp4a',
+ 'fps': 25,
+ }
+
+ # Check if MP4 download is available
+ try:
+ self._request_webpage(
+ HEADRequest(_MP4_TMPL % (relinker_url, '*')), video_id, 'Checking MP4 availability')
+ except ExtractorError as e:
+ self.to_screen(f'{video_id}: MP4 direct download is not available: {e.cause}')
+ return []
+
+ # filter out single-stream formats
+ fmts = [f for f in fmts
+ if not f.get('vcodec') == 'none' and not f.get('acodec') == 'none']
+
+ mobj = re.search(_MANIFEST_REG, manifest_url)
+ if not mobj:
+ return []
+ available_qualities = mobj.group('quality').split(',') if mobj.group('quality') else ['*']
+
+ formats = []
+ for q in filter(None, available_qualities):
+ self.write_debug(f'Creating https format for quality {q}')
+ formats.append({
+ 'url': _MP4_TMPL % (relinker_url, q),
+ 'protocol': 'https',
+ 'ext': 'mp4',
+ **get_format_info(q)
+ })
+ return formats
+
+ @staticmethod
+ def _get_thumbnails_list(thumbs, url):
+ return [{
+ 'url': urljoin(url, thumb_url),
+ } for thumb_url in (thumbs or {}).values() if thumb_url]
+
+ @staticmethod
+ def _extract_subtitles(url, video_data):
+ STL_EXT = 'stl'
+ SRT_EXT = 'srt'
+ subtitles = {}
+ subtitles_array = video_data.get('subtitlesArray') or video_data.get('subtitleList') or []
+ for k in ('subtitles', 'subtitlesUrl'):
+ subtitles_array.append({'url': video_data.get(k)})
+ for subtitle in subtitles_array:
+ sub_url = subtitle.get('url')
+ if sub_url and isinstance(sub_url, str):
+ sub_lang = subtitle.get('language') or 'it'
+ sub_url = urljoin(url, sub_url)
+ sub_ext = determine_ext(sub_url, SRT_EXT)
+ subtitles.setdefault(sub_lang, []).append({
+ 'ext': sub_ext,
+ 'url': sub_url,
+ })
+ if STL_EXT == sub_ext:
+ subtitles[sub_lang].append({
+ 'ext': SRT_EXT,
+ 'url': sub_url[:-len(STL_EXT)] + SRT_EXT,
+ })
+ return subtitles
+
+
+class RaiPlayIE(RaiBaseIE):
+ _VALID_URL = rf'(?P<base>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>{RaiBaseIE._UUID_RE}))\.(?:html|json)'
+ _TESTS = [{
+ 'url': 'https://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html',
+ 'md5': '8970abf8caf8aef4696e7b1f2adfc696',
+ 'info_dict': {
+ 'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391',
+ 'ext': 'mp4',
+ 'title': 'Report del 07/04/2014',
+ 'alt_title': 'St 2013/14 - Report - Espresso nel caffè - 07/04/2014',
+ 'description': 'md5:d730c168a58f4bb35600fc2f881ec04e',
+ 'thumbnail': r're:^https?://www\.raiplay\.it/.+\.jpg',
+ 'uploader': 'Rai 3',
+ 'creator': 'Rai 3',
+ 'duration': 6160,
+ 'series': 'Report',
+ 'season': '2013/14',
+ 'subtitles': {'it': 'count:4'},
+ 'release_year': 2024,
+ 'episode': 'Espresso nel caffè - 07/04/2014',
+ 'timestamp': 1396919880,
+ 'upload_date': '20140408',
+ 'formats': 'count:4',
+ },
+ 'params': {'skip_download': True},
+ }, {
+ # 1080p
+ 'url': 'https://www.raiplay.it/video/2021/11/Blanca-S1E1-Senza-occhi-b1255a4a-8e72-4a2f-b9f3-fc1308e00736.html',
+ 'md5': 'aeda7243115380b2dd5e881fd42d949a',
+ 'info_dict': {
+ 'id': 'b1255a4a-8e72-4a2f-b9f3-fc1308e00736',
+ 'ext': 'mp4',
+ 'title': 'Blanca - S1E1 - Senza occhi',
+ 'alt_title': 'St 1 Ep 1 - Blanca - Senza occhi',
+ 'description': 'md5:75f95d5c030ec8bac263b1212322e28c',
+ 'thumbnail': r're:^https://www\.raiplay\.it/dl/img/.+\.jpg',
+ 'uploader': 'Rai Premium',
+ 'creator': 'Rai Fiction',
+ 'duration': 6493,
+ 'series': 'Blanca',
+ 'season': 'Season 1',
+ 'episode_number': 1,
+ 'release_year': 2021,
+ 'season_number': 1,
+ 'episode': 'Senza occhi',
+ 'timestamp': 1637318940,
+ 'upload_date': '20211119',
+ 'formats': 'count:7',
+ },
+ 'params': {'skip_download': True},
+ 'expected_warnings': ['Video not available. Likely due to geo-restriction.']
+ }, {
+ # 1500 quality
+ 'url': 'https://www.raiplay.it/video/2012/09/S1E11---Tutto-cio-che-luccica-0cab3323-732e-45d6-8e86-7704acab6598.html',
+ 'md5': 'a634d20e8ab2d43724c273563f6bf87a',
+ 'info_dict': {
+ 'id': '0cab3323-732e-45d6-8e86-7704acab6598',
+ 'ext': 'mp4',
+ 'title': 'Mia and Me - S1E11 - Tutto ciò che luccica',
+ 'alt_title': 'St 1 Ep 11 - Mia and Me - Tutto ciò che luccica',
+ 'description': 'md5:4969e594184b1920c4c1f2b704da9dea',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'Rai Gulp',
+ 'series': 'Mia and Me',
+ 'season': 'Season 1',
+ 'episode_number': 11,
+ 'release_year': 2015,
+ 'season_number': 1,
+ 'episode': 'Tutto ciò che luccica',
+ 'timestamp': 1348495020,
+ 'upload_date': '20120924',
+ },
+ }, {
+ 'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?',
+ 'only_matching': True,
+ }, {
+ # subtitles at 'subtitlesArray' key (see #27698)
+ 'url': 'https://www.raiplay.it/video/2020/12/Report---04-01-2021-2e90f1de-8eee-4de4-ac0e-78d21db5b600.html',
+ 'only_matching': True,
+ }, {
+ # DRM protected
+ 'url': 'https://www.raiplay.it/video/2021/06/Lo-straordinario-mondo-di-Zoey-S2E1-Lo-straordinario-ritorno-di-Zoey-3ba992de-2332-41ad-9214-73e32ab209f4.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ base, video_id = self._match_valid_url(url).groups()
+
+ media = self._download_json(
+ f'{base}.json', video_id, 'Downloading video JSON')
+
+ if not self.get_param('allow_unplayable_formats'):
+ if traverse_obj(media, (('program_info', None), 'rights_management', 'rights', 'drm')):
+ self.report_drm(video_id)
+
+ video = media['video']
+ relinker_info = self._extract_relinker_info(video['content_url'], video_id)
+ date_published = join_nonempty(
+ media.get('date_published'), media.get('time_published'), delim=' ')
+ season = media.get('season')
+ alt_title = join_nonempty(media.get('subtitle'), media.get('toptitle'), delim=' - ')
+
+ return {
+ 'id': remove_start(media.get('id'), 'ContentItem-') or video_id,
+ 'display_id': video_id,
+ 'title': media.get('name'),
+ 'alt_title': strip_or_none(alt_title or None),
+ 'description': media.get('description'),
+ 'uploader': strip_or_none(
+ traverse_obj(media, ('program_info', 'channel'))
+ or media.get('channel') or None),
+ 'creator': strip_or_none(
+ traverse_obj(media, ('program_info', 'editor'))
+ or media.get('editor') or None),
+ 'duration': parse_duration(video.get('duration')),
+ 'timestamp': unified_timestamp(date_published),
+ 'thumbnails': self._get_thumbnails_list(media.get('images'), url),
+ 'series': traverse_obj(media, ('program_info', 'name')),
+ 'season_number': int_or_none(season),
+ 'season': season if (season and not season.isdigit()) else None,
+ 'episode': media.get('episode_title'),
+ 'episode_number': int_or_none(media.get('episode')),
+ 'subtitles': self._extract_subtitles(url, video),
+ 'release_year': int_or_none(traverse_obj(media, ('track_info', 'edit_year'))),
+ **relinker_info
+ }
+
+
+class RaiPlayLiveIE(RaiPlayIE): # XXX: Do not subclass from concrete IE
+ _VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/dirette/(?P<id>[^/?#&]+))'
+ _TESTS = [{
+ 'url': 'http://www.raiplay.it/dirette/rainews24',
+ 'info_dict': {
+ 'id': 'd784ad40-e0ae-4a69-aa76-37519d238a9c',
+ 'display_id': 'rainews24',
+ 'ext': 'mp4',
+ 'title': 're:^Diretta di Rai News 24 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'description': 'md5:4d00bcf6dc98b27c6ec480de329d1497',
+ 'uploader': 'Rai News 24',
+ 'creator': 'Rai News 24',
+ 'is_live': True,
+ 'live_status': 'is_live',
+ 'upload_date': '20090502',
+ 'timestamp': 1241276220,
+ 'formats': 'count:3',
+ },
+ 'params': {'skip_download': True},
+ }]
+
+
+class RaiPlayPlaylistIE(InfoExtractor):
+ _VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/programmi/(?P<id>[^/?#&]+))(?:/(?P<extra_id>[^?#&]+))?'
+ _TESTS = [{
+ # entire series episodes + extras...
+ 'url': 'https://www.raiplay.it/programmi/nondirloalmiocapo/',
+ 'info_dict': {
+ 'id': 'nondirloalmiocapo',
+ 'title': 'Non dirlo al mio capo',
+ 'description': 'md5:98ab6b98f7f44c2843fd7d6f045f153b',
+ },
+ 'playlist_mincount': 30,
+ }, {
+ # single season
+ 'url': 'https://www.raiplay.it/programmi/nondirloalmiocapo/episodi/stagione-2/',
+ 'info_dict': {
+ 'id': 'nondirloalmiocapo',
+ 'title': 'Non dirlo al mio capo - Stagione 2',
+ 'description': 'md5:98ab6b98f7f44c2843fd7d6f045f153b',
+ },
+ 'playlist_count': 12,
+ }]
+
+ def _real_extract(self, url):
+ base, playlist_id, extra_id = self._match_valid_url(url).groups()
+
+ program = self._download_json(
+ f'{base}.json', playlist_id, 'Downloading program JSON')
+
+ if extra_id:
+ extra_id = extra_id.upper().rstrip('/')
+
+ playlist_title = program.get('name')
+ entries = []
+ for b in (program.get('blocks') or []):
+ for s in (b.get('sets') or []):
+ if extra_id:
+ if extra_id != join_nonempty(
+ b.get('name'), s.get('name'), delim='/').replace(' ', '-').upper():
+ continue
+ playlist_title = join_nonempty(playlist_title, s.get('name'), delim=' - ')
+
+ s_id = s.get('id')
+ if not s_id:
+ continue
+ medias = self._download_json(
+ f'{base}/{s_id}.json', s_id,
+ 'Downloading content set JSON', fatal=False)
+ if not medias:
+ continue
+ for m in (medias.get('items') or []):
+ path_id = m.get('path_id')
+ if not path_id:
+ continue
+ video_url = urljoin(url, path_id)
+ entries.append(self.url_result(
+ video_url, ie=RaiPlayIE.ie_key(),
+ video_id=RaiPlayIE._match_id(video_url)))
+
+ return self.playlist_result(
+ entries, playlist_id, playlist_title,
+ try_get(program, lambda x: x['program_info']['description']))
+
+
+class RaiPlaySoundIE(RaiBaseIE):
+ _VALID_URL = rf'(?P<base>https?://(?:www\.)?raiplaysound\.it/.+?-(?P<id>{RaiBaseIE._UUID_RE}))\.(?:html|json)'
+ _TESTS = [{
+ 'url': 'https://www.raiplaysound.it/audio/2021/12/IL-RUGGITO-DEL-CONIGLIO-1ebae2a7-7cdb-42bb-842e-fe0d193e9707.html',
+ 'md5': '8970abf8caf8aef4696e7b1f2adfc696',
+ 'info_dict': {
+ 'id': '1ebae2a7-7cdb-42bb-842e-fe0d193e9707',
+ 'ext': 'mp3',
+ 'title': 'Il Ruggito del Coniglio del 10/12/2021',
+ 'alt_title': 'md5:0e6476cd57858bb0f3fcc835d305b455',
+ 'description': 'md5:2a17d2107e59a4a8faa0e18334139ee2',
+ 'thumbnail': r're:^https?://.+\.jpg$',
+ 'uploader': 'rai radio 2',
+ 'duration': 5685,
+ 'series': 'Il Ruggito del Coniglio',
+ 'episode': 'Il Ruggito del Coniglio del 10/12/2021',
+ 'creator': 'rai radio 2',
+ 'timestamp': 1638346620,
+ 'upload_date': '20211201',
+ },
+ 'params': {'skip_download': True},
+ }]
+
+ def _real_extract(self, url):
+ base, audio_id = self._match_valid_url(url).group('base', 'id')
+ media = self._download_json(f'{base}.json', audio_id, 'Downloading audio JSON')
+ uid = try_get(media, lambda x: remove_start(remove_start(x['uniquename'], 'ContentItem-'), 'Page-'))
+
+ info = {}
+ formats = []
+ relinkers = set(traverse_obj(media, (('downloadable_audio', 'audio', ('live', 'cards', 0, 'audio')), 'url')))
+ for r in relinkers:
+ info = self._extract_relinker_info(r, audio_id, True)
+ formats.extend(info.get('formats'))
+
+ date_published = try_get(media, (lambda x: f'{x["create_date"]} {x.get("create_time") or ""}',
+ lambda x: x['live']['create_date']))
+
+ podcast_info = traverse_obj(media, 'podcast_info', ('live', 'cards', 0)) or {}
+
+ return {
+ **info,
+ 'id': uid or audio_id,
+ 'display_id': audio_id,
+ 'title': traverse_obj(media, 'title', 'episode_title'),
+ 'alt_title': traverse_obj(media, ('track_info', 'media_name'), expected_type=strip_or_none),
+ 'description': media.get('description'),
+ 'uploader': traverse_obj(media, ('track_info', 'channel'), expected_type=strip_or_none),
+ 'creator': traverse_obj(media, ('track_info', 'editor'), expected_type=strip_or_none),
+ 'timestamp': unified_timestamp(date_published),
+ 'thumbnails': self._get_thumbnails_list(podcast_info.get('images'), url),
+ 'series': podcast_info.get('title'),
+ 'season_number': int_or_none(media.get('season')),
+ 'episode': media.get('episode_title'),
+ 'episode_number': int_or_none(media.get('episode')),
+ 'formats': formats,
+ }
+
+
+class RaiPlaySoundLiveIE(RaiPlaySoundIE): # XXX: Do not subclass from concrete IE
+ _VALID_URL = r'(?P<base>https?://(?:www\.)?raiplaysound\.it/(?P<id>[^/?#&]+)$)'
+ _TESTS = [{
+ 'url': 'https://www.raiplaysound.it/radio2',
+ 'info_dict': {
+ 'id': 'b00a50e6-f404-4af6-8f8c-ff3b9af73a44',
+ 'display_id': 'radio2',
+ 'ext': 'mp4',
+ 'title': r're:Rai Radio 2 \d+-\d+-\d+ \d+:\d+',
+ 'thumbnail': r're:^https://www\.raiplaysound\.it/dl/img/.+\.png',
+ 'uploader': 'rai radio 2',
+ 'series': 'Rai Radio 2',
+ 'creator': 'raiplaysound',
+ 'is_live': True,
+ 'live_status': 'is_live',
+ },
+ 'params': {'skip_download': True},
+ }]
+
+
+class RaiPlaySoundPlaylistIE(InfoExtractor):
+ _VALID_URL = r'(?P<base>https?://(?:www\.)?raiplaysound\.it/(?:programmi|playlist|audiolibri)/(?P<id>[^/?#&]+))(?:/(?P<extra_id>[^?#&]+))?'
+ _TESTS = [{
+ # entire show
+ 'url': 'https://www.raiplaysound.it/programmi/ilruggitodelconiglio',
+ 'info_dict': {
+ 'id': 'ilruggitodelconiglio',
+ 'title': 'Il Ruggito del Coniglio',
+ 'description': 'md5:62a627b3a2d0635d08fa8b6e0a04f27e',
+ },
+ 'playlist_mincount': 65,
+ }, {
+ # single season
+ 'url': 'https://www.raiplaysound.it/programmi/ilruggitodelconiglio/puntate/prima-stagione-1995',
+ 'info_dict': {
+ 'id': 'ilruggitodelconiglio_puntate_prima-stagione-1995',
+ 'title': 'Prima Stagione 1995',
+ },
+ 'playlist_count': 1,
+ }]
+
+ def _real_extract(self, url):
+ base, playlist_id, extra_id = self._match_valid_url(url).group('base', 'id', 'extra_id')
+ url = f'{base}.json'
+ program = self._download_json(url, playlist_id, 'Downloading program JSON')
+
+ if extra_id:
+ extra_id = extra_id.rstrip('/')
+ playlist_id += '_' + extra_id.replace('/', '_')
+ path = next(c['path_id'] for c in program.get('filters') or [] if extra_id in c.get('weblink'))
+ program = self._download_json(
+ urljoin('https://www.raiplaysound.it', path), playlist_id, 'Downloading program secondary JSON')
+
+ entries = [
+ self.url_result(urljoin(base, c['path_id']), ie=RaiPlaySoundIE.ie_key())
+ for c in traverse_obj(program, 'cards', ('block', 'cards')) or []
+ if c.get('path_id')]
+
+ return self.playlist_result(entries, playlist_id, program.get('title'),
+ traverse_obj(program, ('podcast_info', 'description')))
+
+
+class RaiIE(RaiBaseIE):
+ _VALID_URL = rf'https?://[^/]+\.(?:rai\.(?:it|tv))/.+?-(?P<id>{RaiBaseIE._UUID_RE})(?:-.+?)?\.html'
+ _TESTS = [{
+ 'url': 'https://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html',
+ 'info_dict': {
+ 'id': '04a9f4bd-b563-40cf-82a6-aad3529cb4a9',
+ 'ext': 'mp4',
+ 'title': 'TG PRIMO TEMPO',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 1758,
+ 'upload_date': '20140612',
+ },
+ 'params': {'skip_download': True},
+ 'expected_warnings': ['Video not available. Likely due to geo-restriction.']
+ }, {
+ 'url': 'https://www.rai.it/dl/RaiTV/programmi/media/ContentItem-efb17665-691c-45d5-a60c-5301333cbb0c.html',
+ 'info_dict': {
+ 'id': 'efb17665-691c-45d5-a60c-5301333cbb0c',
+ 'ext': 'mp4',
+ 'title': 'TG1 ore 20:00 del 03/11/2016',
+ 'description': 'TG1 edizione integrale ore 20:00 del giorno 03/11/2016',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 2214,
+ 'upload_date': '20161103'
+ },
+ 'params': {'skip_download': True},
+ }, {
+ # Direct MMS: Media URL no longer works.
+ 'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-b63a4089-ac28-48cf-bca5-9f5b5bc46df5.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ content_id = self._match_id(url)
+ media = self._download_json(
+ f'https://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-{content_id}.html?json',
+ content_id, 'Downloading video JSON', fatal=False, expected_status=404)
+
+ if media is None:
+ return None
+
+ if 'Audio' in media['type']:
+ relinker_info = {
+ 'formats': [{
+ 'format_id': join_nonempty('https', media.get('formatoAudio'), delim='-'),
+ 'url': media['audioUrl'],
+ 'ext': media.get('formatoAudio'),
+ 'vcodec': 'none',
+ 'acodec': media.get('formatoAudio'),
+ }]
+ }
+ elif 'Video' in media['type']:
+ relinker_info = self._extract_relinker_info(media['mediaUri'], content_id)
+ else:
+ raise ExtractorError('not a media file')
+
+ thumbnails = self._get_thumbnails_list(
+ {image_type: media.get(image_type) for image_type in (
+ 'image', 'image_medium', 'image_300')}, url)
+
+ return {
+ 'id': content_id,
+ 'title': strip_or_none(media.get('name') or media.get('title')),
+ 'description': strip_or_none(media.get('desc')) or None,
+ 'thumbnails': thumbnails,
+ 'uploader': strip_or_none(media.get('author')) or None,
+ 'upload_date': unified_strdate(media.get('date')),
+ 'duration': parse_duration(media.get('length')),
+ 'subtitles': self._extract_subtitles(url, media),
+ **relinker_info
+ }
+
+
+class RaiNewsIE(RaiBaseIE):
+ _VALID_URL = rf'https?://(www\.)?rainews\.it/(?!articoli)[^?#]+-(?P<id>{RaiBaseIE._UUID_RE})(?:-[^/?#]+)?\.html'
+ _EMBED_REGEX = [rf'<iframe[^>]+data-src="(?P<url>/iframe/[^?#]+?{RaiBaseIE._UUID_RE}\.html)']
+ _TESTS = [{
+ # new rainews player (#3911)
+ 'url': 'https://www.rainews.it/video/2024/02/membri-della-croce-rossa-evacuano-gli-abitanti-di-un-villaggio-nella-regione-ucraina-di-kharkiv-il-filmato-dallucraina--31e8017c-845c-43f5-9c48-245b43c3a079.html',
+ 'info_dict': {
+ 'id': '31e8017c-845c-43f5-9c48-245b43c3a079',
+ 'ext': 'mp4',
+ 'title': 'md5:1e81364b09de4a149042bac3c7d36f0b',
+ 'duration': 196,
+ 'upload_date': '20240225',
+ 'uploader': 'rainews',
+ 'formats': 'count:2',
+ },
+ 'params': {'skip_download': True},
+ }, {
+ # old content with fallback method to extract media urls
+ 'url': 'https://www.rainews.it/dl/rainews/media/Weekend-al-cinema-da-Hollywood-arriva-il-thriller-di-Tate-Taylor-La-ragazza-del-treno-1632c009-c843-4836-bb65-80c33084a64b.html',
+ 'info_dict': {
+ 'id': '1632c009-c843-4836-bb65-80c33084a64b',
+ 'ext': 'mp4',
+ 'title': 'Weekend al cinema, da Hollywood arriva il thriller di Tate Taylor "La ragazza del treno"',
+ 'description': 'I film in uscita questa settimana.',
+ 'thumbnail': r're:^https?://.*\.png$',
+ 'duration': 833,
+ 'upload_date': '20161103',
+ 'formats': 'count:8',
+ },
+ 'params': {'skip_download': True},
+ 'expected_warnings': ['unable to extract player_data'],
+ }, {
+ # iframe + drm
+ 'url': 'https://www.rainews.it/iframe/video/2022/07/euro2022-europei-calcio-femminile-italia-belgio-gol-0-1-video-4de06a69-de75-4e32-a657-02f0885f8118.html',
+ 'only_matching': True,
+ }]
+ _PLAYER_TAG = 'news'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ player_data = self._search_json(
+ rf'<rai{self._PLAYER_TAG}-player\s*data=\'', webpage, 'player_data', video_id,
+ transform_source=clean_html, default={})
+ track_info = player_data.get('track_info')
+ relinker_url = traverse_obj(player_data, 'mediapolis', 'content_url')
+
+ if not relinker_url:
+ # fallback on old implementation for some old content
+ try:
+ return RaiIE._real_extract(self, url)
+ except GeoRestrictedError:
+ raise
+ except ExtractorError as e:
+ raise ExtractorError('Relinker URL not found', cause=e)
+
+ relinker_info = self._extract_relinker_info(urljoin(url, relinker_url), video_id)
+
+ return {
+ 'id': video_id,
+ 'title': player_data.get('title') or track_info.get('title') or self._og_search_title(webpage),
+ 'upload_date': unified_strdate(track_info.get('date')),
+ 'uploader': strip_or_none(track_info.get('editor') or None),
+ **relinker_info
+ }
+
+
+class RaiCulturaIE(RaiNewsIE): # XXX: Do not subclass from concrete IE
+ _VALID_URL = rf'https?://(www\.)?raicultura\.it/(?!articoli)[^?#]+-(?P<id>{RaiBaseIE._UUID_RE})(?:-[^/?#]+)?\.html'
+ _EMBED_REGEX = [rf'<iframe[^>]+data-src="(?P<url>/iframe/[^?#]+?{RaiBaseIE._UUID_RE}\.html)']
+ _TESTS = [{
+ 'url': 'https://www.raicultura.it/letteratura/articoli/2018/12/Alberto-Asor-Rosa-Letteratura-e-potere-05ba8775-82b5-45c5-a89d-dd955fbde1fb.html',
+ 'info_dict': {
+ 'id': '05ba8775-82b5-45c5-a89d-dd955fbde1fb',
+ 'ext': 'mp4',
+ 'title': 'Alberto Asor Rosa: Letteratura e potere',
+ 'duration': 1756,
+ 'upload_date': '20181206',
+ 'uploader': 'raicultura',
+ 'formats': 'count:2',
+ },
+ 'params': {'skip_download': True},
+ }]
+ _PLAYER_TAG = 'cultura'
+
+
+class RaiSudtirolIE(RaiBaseIE):
+ _VALID_URL = r'https?://raisudtirol\.rai\.it/.+media=(?P<id>\w+)'
+ _TESTS = [{
+ # mp4 file
+ 'url': 'https://raisudtirol.rai.it/la/index.php?media=Ptv1619729460',
+ 'info_dict': {
+ 'id': 'Ptv1619729460',
+ 'ext': 'mp4',
+ 'title': 'Euro: trasmisciun d\'economia - 29-04-2021 20:51',
+ 'series': 'Euro: trasmisciun d\'economia',
+ 'upload_date': '20210429',
+ 'thumbnail': r're:https://raisudtirol\.rai\.it/img/.+\.jpg',
+ 'uploader': 'raisudtirol',
+ 'formats': 'count:1',
+ },
+ 'params': {'skip_download': True},
+ }, {
+ # m3u manifest
+ 'url': 'https://raisudtirol.rai.it/it/kidsplayer.php?lang=it&media=GUGGUG_P1.smil',
+ 'info_dict': {
+ 'id': 'GUGGUG_P1',
+ 'ext': 'mp4',
+ 'title': 'GUGGUG! La Prospettiva - Die Perspektive',
+ 'uploader': 'raisudtirol',
+ 'formats': 'count:6',
+ },
+ 'params': {'skip_download': True},
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ video_date = self._html_search_regex(
+ r'<span class="med_data">(.+?)</span>', webpage, 'video_date', default=None)
+ video_title = self._html_search_regex([
+ r'<span class="med_title">(.+?)</span>', r'title: \'(.+?)\','],
+ webpage, 'video_title', default=None)
+ video_url = self._html_search_regex([
+ r'sources:\s*\[\{file:\s*"(.+?)"\}\]',
+ r'<source\s+src="(.+?)"\s+type="application/x-mpegURL"'],
+ webpage, 'video_url', default=None)
+
+ ext = determine_ext(video_url)
+ if ext == 'm3u8':
+ formats = self._extract_m3u8_formats(video_url, video_id)
+ elif ext == 'mp4':
+ formats = [{
+ 'format_id': 'https-mp4',
+ 'url': self._proto_relative_url(video_url),
+ 'width': 1024,
+ 'height': 576,
+ 'fps': 25,
+ 'vcodec': 'avc1',
+ 'acodec': 'mp4a',
+ }]
+ else:
+ formats = []
+ self.raise_no_formats(f'Unrecognized media file: {video_url}')
+
+ return {
+ 'id': video_id,
+ 'title': join_nonempty(video_title, video_date, delim=' - '),
+ 'series': video_title if video_date else None,
+ 'upload_date': unified_strdate(video_date),
+ 'thumbnail': urljoin('https://raisudtirol.rai.it/', self._html_search_regex(
+ r'image: \'(.+?)\'', webpage, 'video_thumb', default=None)),
+ 'uploader': 'raisudtirol',
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/raywenderlich.py b/yt_dlp/extractor/raywenderlich.py
new file mode 100644
index 0000000..e0e3c3e
--- /dev/null
+++ b/yt_dlp/extractor/raywenderlich.py
@@ -0,0 +1,177 @@
+import re
+
+from .common import InfoExtractor
+from .vimeo import VimeoIE
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ merge_dicts,
+ try_get,
+ unescapeHTML,
+ unified_timestamp,
+ urljoin,
+)
+
+
+class RayWenderlichIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ videos\.raywenderlich\.com/courses|
+ (?:www\.)?raywenderlich\.com
+ )/
+ (?P<course_id>[^/]+)/lessons/(?P<id>\d+)
+ '''
+
+ _TESTS = [{
+ 'url': 'https://www.raywenderlich.com/3530-testing-in-ios/lessons/1',
+ 'info_dict': {
+ 'id': '248377018',
+ 'ext': 'mp4',
+ 'title': 'Introduction',
+ 'description': 'md5:804d031b3efa9fcb49777d512d74f722',
+ 'timestamp': 1513906277,
+ 'upload_date': '20171222',
+ 'duration': 133,
+ 'uploader': 'Ray Wenderlich',
+ 'uploader_id': 'user3304672',
+ },
+ 'params': {
+ 'noplaylist': True,
+ 'skip_download': True,
+ },
+ 'add_ie': [VimeoIE.ie_key()],
+ 'expected_warnings': ['HTTP Error 403: Forbidden'],
+ }, {
+ 'url': 'https://videos.raywenderlich.com/courses/105-testing-in-ios/lessons/1',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_video_id(data, lesson_id):
+ if not data:
+ return
+ groups = try_get(data, lambda x: x['groups'], list) or []
+ if not groups:
+ return
+ for group in groups:
+ if not isinstance(group, dict):
+ continue
+ contents = try_get(data, lambda x: x['contents'], list) or []
+ for content in contents:
+ if not isinstance(content, dict):
+ continue
+ ordinal = int_or_none(content.get('ordinal'))
+ if ordinal != lesson_id:
+ continue
+ video_id = content.get('identifier')
+ if video_id:
+ return compat_str(video_id)
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ course_id, lesson_id = mobj.group('course_id', 'id')
+ display_id = '%s/%s' % (course_id, lesson_id)
+
+ webpage = self._download_webpage(url, display_id)
+
+ thumbnail = self._og_search_thumbnail(
+ webpage, default=None) or self._html_search_meta(
+ 'twitter:image', webpage, 'thumbnail')
+
+ if '>Subscribe to unlock' in webpage:
+ raise ExtractorError(
+ 'This content is only available for subscribers',
+ expected=True)
+
+ info = {
+ 'thumbnail': thumbnail,
+ }
+
+ vimeo_id = self._search_regex(
+ r'data-vimeo-id=["\'](\d+)', webpage, 'vimeo id', default=None)
+
+ if not vimeo_id:
+ data = self._parse_json(
+ self._search_regex(
+ r'data-collection=(["\'])(?P<data>{.+?})\1', webpage,
+ 'data collection', default='{}', group='data'),
+ display_id, transform_source=unescapeHTML, fatal=False)
+ video_id = self._extract_video_id(
+ data, lesson_id) or self._search_regex(
+ r'/videos/(\d+)/', thumbnail, 'video id')
+ headers = {
+ 'Referer': url,
+ 'X-Requested-With': 'XMLHttpRequest',
+ }
+ csrf_token = self._html_search_meta(
+ 'csrf-token', webpage, 'csrf token', default=None)
+ if csrf_token:
+ headers['X-CSRF-Token'] = csrf_token
+ video = self._download_json(
+ 'https://videos.raywenderlich.com/api/v1/videos/%s.json'
+ % video_id, display_id, headers=headers)['video']
+ vimeo_id = video['clips'][0]['provider_id']
+ info.update({
+ '_type': 'url_transparent',
+ 'title': video.get('name'),
+ 'description': video.get('description') or video.get(
+ 'meta_description'),
+ 'duration': int_or_none(video.get('duration')),
+ 'timestamp': unified_timestamp(video.get('created_at')),
+ })
+
+ return merge_dicts(info, self.url_result(
+ VimeoIE._smuggle_referrer(
+ 'https://player.vimeo.com/video/%s' % vimeo_id, url),
+ ie=VimeoIE.ie_key(), video_id=vimeo_id))
+
+
+class RayWenderlichCourseIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ videos\.raywenderlich\.com/courses|
+ (?:www\.)?raywenderlich\.com
+ )/
+ (?P<id>[^/]+)
+ '''
+
+ _TEST = {
+ 'url': 'https://www.raywenderlich.com/3530-testing-in-ios',
+ 'info_dict': {
+ 'title': 'Testing in iOS',
+ 'id': '3530-testing-in-ios',
+ },
+ 'params': {
+ 'noplaylist': False,
+ },
+ 'playlist_count': 29,
+ }
+
+ @classmethod
+ def suitable(cls, url):
+ return False if RayWenderlichIE.suitable(url) else super(
+ RayWenderlichCourseIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ course_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, course_id)
+
+ entries = []
+ lesson_urls = set()
+ for lesson_url in re.findall(
+ r'<a[^>]+\bhref=["\'](/%s/lessons/\d+)' % course_id, webpage):
+ if lesson_url in lesson_urls:
+ continue
+ lesson_urls.add(lesson_url)
+ entries.append(self.url_result(
+ urljoin(url, lesson_url), ie=RayWenderlichIE.ie_key()))
+
+ title = self._og_search_title(
+ webpage, default=None) or self._html_search_meta(
+ 'twitter:title', webpage, 'title', default=None)
+
+ return self.playlist_result(entries, course_id, title)
diff --git a/yt_dlp/extractor/rbgtum.py b/yt_dlp/extractor/rbgtum.py
new file mode 100644
index 0000000..54f194c
--- /dev/null
+++ b/yt_dlp/extractor/rbgtum.py
@@ -0,0 +1,142 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import parse_qs, remove_start, traverse_obj, ExtractorError
+
+
+class RbgTumIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:live\.rbg\.tum\.de|tum\.live)/w/(?P<id>[^?#]+)'
+ _TESTS = [{
+ # Combined view
+ 'url': 'https://live.rbg.tum.de/w/cpp/22128',
+ 'md5': '53a5e7b3e07128e33bbf36687fe1c08f',
+ 'info_dict': {
+ 'id': 'cpp/22128',
+ 'ext': 'mp4',
+ 'title': 'Lecture: October 18. 2022',
+ 'series': 'Concepts of C++ programming (IN2377)',
+ }
+ }, {
+ # Presentation only
+ 'url': 'https://live.rbg.tum.de/w/I2DL/12349/PRES',
+ 'md5': '36c584272179f3e56b0db5d880639cba',
+ 'info_dict': {
+ 'id': 'I2DL/12349/PRES',
+ 'ext': 'mp4',
+ 'title': 'Lecture 3: Introduction to Neural Networks',
+ 'series': 'Introduction to Deep Learning (IN2346)',
+ }
+ }, {
+ # Camera only
+ 'url': 'https://live.rbg.tum.de/w/fvv-info/16130/CAM',
+ 'md5': 'e04189d92ff2f56aedf5cede65d37aad',
+ 'info_dict': {
+ 'id': 'fvv-info/16130/CAM',
+ 'ext': 'mp4',
+ 'title': 'Fachschaftsvollversammlung',
+ 'series': 'Fachschaftsvollversammlung Informatik',
+ }
+ }, {
+ 'url': 'https://tum.live/w/linalginfo/27102',
+ 'only_matching': True,
+ }, ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ m3u8 = self._html_search_regex(r'"(https://[^"]+\.m3u8[^"]*)', webpage, 'm3u8')
+ lecture_title = self._html_search_regex(r'<h1[^>]*>([^<]+)</h1>', webpage, 'title', fatal=False)
+ lecture_series_title = remove_start(self._html_extract_title(webpage), 'TUM-Live | ')
+
+ formats = self._extract_m3u8_formats(m3u8, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
+
+ return {
+ 'id': video_id,
+ 'title': lecture_title,
+ 'series': lecture_series_title,
+ 'formats': formats,
+ }
+
+
+class RbgTumCourseIE(InfoExtractor):
+ _VALID_URL = r'https?://(?P<hostname>(?:live\.rbg\.tum\.de|tum\.live))/old/course/(?P<id>(?P<year>\d+)/(?P<term>\w+)/(?P<slug>[^/?#]+))'
+ _TESTS = [{
+ 'url': 'https://live.rbg.tum.de/old/course/2022/S/fpv',
+ 'info_dict': {
+ 'title': 'Funktionale Programmierung und Verifikation (IN0003)',
+ 'id': '2022/S/fpv',
+ },
+ 'params': {
+ 'noplaylist': False,
+ },
+ 'playlist_count': 13,
+ }, {
+ 'url': 'https://live.rbg.tum.de/old/course/2022/W/set',
+ 'info_dict': {
+ 'title': 'SET FSMPIC',
+ 'id': '2022/W/set',
+ },
+ 'params': {
+ 'noplaylist': False,
+ },
+ 'playlist_count': 6,
+ }, {
+ 'url': 'https://tum.live/old/course/2023/S/linalginfo',
+ 'only_matching': True,
+ }, ]
+
+ def _real_extract(self, url):
+ course_id, hostname, year, term, slug = self._match_valid_url(url).group('id', 'hostname', 'year', 'term', 'slug')
+ meta = self._download_json(
+ f'https://{hostname}/api/courses/{slug}/', course_id, fatal=False,
+ query={'year': year, 'term': term}) or {}
+ lecture_series_title = meta.get('Name')
+ lectures = [self.url_result(f'https://{hostname}/w/{slug}/{stream_id}', RbgTumIE)
+ for stream_id in traverse_obj(meta, ('Streams', ..., 'ID'))]
+
+ if not lectures:
+ webpage = self._download_webpage(url, course_id)
+ lecture_series_title = remove_start(self._html_extract_title(webpage), 'TUM-Live | ')
+ lectures = [self.url_result(f'https://{hostname}{lecture_path}', RbgTumIE)
+ for lecture_path in re.findall(r'href="(/w/[^/"]+/[^/"]+)"', webpage)]
+
+ return self.playlist_result(lectures, course_id, lecture_series_title)
+
+
+class RbgTumNewCourseIE(InfoExtractor):
+ _VALID_URL = r'https?://(?P<hostname>(?:live\.rbg\.tum\.de|tum\.live))/\?'
+ _TESTS = [{
+ 'url': 'https://live.rbg.tum.de/?year=2022&term=S&slug=fpv&view=3',
+ 'info_dict': {
+ 'title': 'Funktionale Programmierung und Verifikation (IN0003)',
+ 'id': '2022/S/fpv',
+ },
+ 'params': {
+ 'noplaylist': False,
+ },
+ 'playlist_count': 13,
+ }, {
+ 'url': 'https://live.rbg.tum.de/?year=2022&term=W&slug=set&view=3',
+ 'info_dict': {
+ 'title': 'SET FSMPIC',
+ 'id': '2022/W/set',
+ },
+ 'params': {
+ 'noplaylist': False,
+ },
+ 'playlist_count': 6,
+ }, {
+ 'url': 'https://tum.live/?year=2023&term=S&slug=linalginfo&view=3',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ query = parse_qs(url)
+ errors = [key for key in ('year', 'term', 'slug') if not query.get(key)]
+ if errors:
+ raise ExtractorError(f'Input URL is missing query parameters: {", ".join(errors)}')
+ year, term, slug = query['year'][0], query['term'][0], query['slug'][0]
+ hostname = self._match_valid_url(url).group('hostname')
+
+ return self.url_result(f'https://{hostname}/old/course/{year}/{term}/{slug}', RbgTumCourseIE)
diff --git a/yt_dlp/extractor/rcs.py b/yt_dlp/extractor/rcs.py
new file mode 100644
index 0000000..b865f63
--- /dev/null
+++ b/yt_dlp/extractor/rcs.py
@@ -0,0 +1,372 @@
+import re
+
+from .common import InfoExtractor
+from ..networking import HEADRequest
+from ..utils import (
+ ExtractorError,
+ base_url,
+ clean_html,
+ extract_attributes,
+ get_element_html_by_class,
+ get_element_html_by_id,
+ int_or_none,
+ js_to_json,
+ mimetype2ext,
+ sanitize_url,
+ traverse_obj,
+ try_call,
+ url_basename,
+ urljoin,
+)
+
+
+class RCSBaseIE(InfoExtractor):
+ # based on VideoPlayerLoader.prototype.getVideoSrc
+ # and VideoPlayerLoader.prototype.transformSrc from
+ # https://js2.corriereobjects.it/includes2013/LIBS/js/corriere_video.sjs
+ _UUID_RE = r'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}'
+ _RCS_ID_RE = r'[\w-]+-\d{10}'
+ _MIGRATION_MAP = {
+ 'videoamica-vh.akamaihd': 'amica',
+ 'media2-amica-it.akamaized': 'amica',
+ 'corrierevam-vh.akamaihd': 'corriere',
+ 'media2vam-corriere-it.akamaized': 'corriere',
+ 'cormezzogiorno-vh.akamaihd': 'corrieredelmezzogiorno',
+ 'media2vam-mezzogiorno-corriere-it.akamaized': 'corrieredelmezzogiorno',
+ 'corveneto-vh.akamaihd': 'corrieredelveneto',
+ 'media2vam-veneto-corriere-it.akamaized': 'corrieredelveneto',
+ 'corbologna-vh.akamaihd': 'corrieredibologna',
+ 'media2vam-bologna-corriere-it.akamaized': 'corrieredibologna',
+ 'corfiorentino-vh.akamaihd': 'corrierefiorentino',
+ 'media2vam-fiorentino-corriere-it.akamaized': 'corrierefiorentino',
+ 'corinnovazione-vh.akamaihd': 'corriereinnovazione',
+ 'media2-gazzanet-gazzetta-it.akamaized': 'gazzanet',
+ 'videogazzanet-vh.akamaihd': 'gazzanet',
+ 'videogazzaworld-vh.akamaihd': 'gazzaworld',
+ 'gazzettavam-vh.akamaihd': 'gazzetta',
+ 'media2vam-gazzetta-it.akamaized': 'gazzetta',
+ 'videoiodonna-vh.akamaihd': 'iodonna',
+ 'media2-leitv-it.akamaized': 'leitv',
+ 'videoleitv-vh.akamaihd': 'leitv',
+ 'videoliving-vh.akamaihd': 'living',
+ 'media2-living-corriere-it.akamaized': 'living',
+ 'media2-oggi-it.akamaized': 'oggi',
+ 'videooggi-vh.akamaihd': 'oggi',
+ 'media2-quimamme-it.akamaized': 'quimamme',
+ 'quimamme-vh.akamaihd': 'quimamme',
+ 'videorunning-vh.akamaihd': 'running',
+ 'media2-style-corriere-it.akamaized': 'style',
+ 'style-vh.akamaihd': 'style',
+ 'videostyle-vh.akamaihd': 'style',
+ 'media2-stylepiccoli-it.akamaized': 'stylepiccoli',
+ 'stylepiccoli-vh.akamaihd': 'stylepiccoli',
+ 'doveviaggi-vh.akamaihd': 'viaggi',
+ 'media2-doveviaggi-it.akamaized': 'viaggi',
+ 'media2-vivimilano-corriere-it.akamaized': 'vivimilano',
+ 'vivimilano-vh.akamaihd': 'vivimilano',
+ 'media2-youreporter-it.akamaized': 'youreporter'
+ }
+
+ def _get_video_src(self, video):
+ for source in traverse_obj(video, (
+ 'mediaProfile', 'mediaFile', lambda _, v: v.get('mimeType'))):
+ url = source['value']
+ for s, r in (
+ ('media2vam.corriere.it.edgesuite.net', 'media2vam-corriere-it.akamaized.net'),
+ ('media.youreporter.it.edgesuite.net', 'media-youreporter-it.akamaized.net'),
+ ('corrierepmd.corriere.it.edgesuite.net', 'corrierepmd-corriere-it.akamaized.net'),
+ ('media2vam-corriere-it.akamaized.net/fcs.quotidiani/vr/videos/', 'video.corriere.it/vr360/videos/'),
+ ('http://', 'https://'),
+ ):
+ url = url.replace(s, r)
+
+ type_ = mimetype2ext(source['mimeType'])
+ if type_ == 'm3u8' and '-vh.akamaihd' in url:
+ # still needed for some old content: see _TESTS #3
+ matches = re.search(r'(?:https?:)?//(?P<host>[\w\.\-]+)\.net/i(?P<path>.+)$', url)
+ if matches:
+ url = f'https://vod.rcsobjects.it/hls/{self._MIGRATION_MAP[matches.group("host")]}{matches.group("path")}'
+ if traverse_obj(video, ('mediaProfile', 'geoblocking')) or (
+ type_ == 'm3u8' and 'fcs.quotidiani_!' in url):
+ url = url.replace('vod.rcsobjects', 'vod-it.rcsobjects')
+ if type_ == 'm3u8' and 'vod' in url:
+ url = url.replace('.csmil', '.urlset')
+ if type_ == 'mp3':
+ url = url.replace('media2vam-corriere-it.akamaized.net', 'vod.rcsobjects.it/corriere')
+
+ yield {
+ 'type': type_,
+ 'url': url,
+ 'bitrate': source.get('bitrate')
+ }
+
+ def _create_http_formats(self, m3u8_formats, video_id):
+ for f in m3u8_formats:
+ if f['vcodec'] == 'none':
+ continue
+ http_url = re.sub(r'(https?://[^/]+)/hls/([^?#]+?\.mp4).+', r'\g<1>/\g<2>', f['url'])
+ if http_url == f['url']:
+ continue
+
+ http_f = f.copy()
+ del http_f['manifest_url']
+ format_id = try_call(lambda: http_f['format_id'].replace('hls-', 'https-'))
+ urlh = self._request_webpage(HEADRequest(http_url), video_id, fatal=False,
+ note=f'Check filesize for {format_id}')
+ if not urlh:
+ continue
+
+ http_f.update({
+ 'format_id': format_id,
+ 'url': http_url,
+ 'protocol': 'https',
+ 'filesize_approx': int_or_none(urlh.headers.get('Content-Length', None)),
+ })
+ yield http_f
+
+ def _create_formats(self, sources, video_id):
+ for source in sources:
+ if source['type'] == 'm3u8':
+ m3u8_formats = self._extract_m3u8_formats(
+ source['url'], video_id, 'mp4', m3u8_id='hls', fatal=False)
+ yield from m3u8_formats
+ yield from self._create_http_formats(m3u8_formats, video_id)
+ elif source['type'] == 'mp3':
+ yield {
+ 'format_id': 'https-mp3',
+ 'ext': 'mp3',
+ 'acodec': 'mp3',
+ 'vcodec': 'none',
+ 'abr': source.get('bitrate'),
+ 'url': source['url'],
+ }
+
+ def _real_extract(self, url):
+ cdn, video_id = self._match_valid_url(url).group('cdn', 'id')
+ display_id, video_data = None, None
+
+ if re.match(self._UUID_RE, video_id) or re.match(self._RCS_ID_RE, video_id):
+ url = f'https://video.{cdn}/video-json/{video_id}'
+ else:
+ webpage = self._download_webpage(url, video_id)
+ data_config = get_element_html_by_id('divVideoPlayer', webpage) or get_element_html_by_class('divVideoPlayer', webpage)
+
+ if data_config:
+ data_config = self._parse_json(
+ extract_attributes(data_config).get('data-config'),
+ video_id, fatal=False) or {}
+ if data_config.get('newspaper'):
+ cdn = f'{data_config["newspaper"]}.it'
+ display_id, video_id = video_id, data_config.get('uuid') or video_id
+ url = f'https://video.{cdn}/video-json/{video_id}'
+ else:
+ json_url = self._search_regex(
+ r'''(?x)url\s*=\s*(["'])
+ (?P<url>
+ (?:https?:)?//video\.rcs\.it
+ /fragment-includes/video-includes/[^"']+?\.json
+ )\1;''',
+ webpage, video_id, group='url', default=None)
+ if json_url:
+ video_data = self._download_json(sanitize_url(json_url, scheme='https'), video_id)
+ display_id, video_id = video_id, video_data.get('id') or video_id
+
+ if not video_data:
+ webpage = self._download_webpage(url, video_id)
+
+ video_data = self._search_json(
+ '##start-video##', webpage, 'video data', video_id, default=None,
+ end_pattern='##end-video##', transform_source=js_to_json)
+
+ if not video_data:
+ # try search for iframes
+ emb = RCSEmbedsIE._extract_url(webpage)
+ if emb:
+ return {
+ '_type': 'url_transparent',
+ 'url': emb,
+ 'ie_key': RCSEmbedsIE.ie_key()
+ }
+
+ if not video_data:
+ raise ExtractorError('Video data not found in the page')
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': video_data.get('title'),
+ 'description': (clean_html(video_data.get('description'))
+ or clean_html(video_data.get('htmlDescription'))
+ or self._html_search_meta('description', webpage)),
+ 'uploader': video_data.get('provider') or cdn,
+ 'formats': list(self._create_formats(self._get_video_src(video_data), video_id)),
+ }
+
+
+class RCSEmbedsIE(RCSBaseIE):
+ _VALID_URL = r'''(?x)
+ https?://(?P<vid>video)\.
+ (?P<cdn>
+ (?:
+ rcs|
+ (?:corriere\w+\.)?corriere|
+ (?:gazzanet\.)?gazzetta
+ )\.it)
+ /video-embed/(?P<id>[^/=&\?]+?)(?:$|\?)'''
+ _EMBED_REGEX = [r'''(?x)
+ (?:
+ data-frame-src=|
+ <iframe[^\n]+src=
+ )
+ (["'])
+ (?P<url>(?:https?:)?//video\.
+ (?:
+ rcs|
+ (?:corriere\w+\.)?corriere|
+ (?:gazzanet\.)?gazzetta
+ )
+ \.it/video-embed/.+?)
+ \1''']
+ _TESTS = [{
+ 'url': 'https://video.rcs.it/video-embed/iodonna-0001585037',
+ 'md5': '0faca97df525032bb9847f690bc3720c',
+ 'info_dict': {
+ 'id': 'iodonna-0001585037',
+ 'ext': 'mp4',
+ 'title': 'Sky Arte racconta Madonna nella serie "Artist to icon"',
+ 'description': 'md5:65b09633df9ffee57f48b39e34c9e067',
+ 'uploader': 'rcs.it',
+ }
+ }, {
+ 'url': 'https://video.gazzanet.gazzetta.it/video-embed/gazzanet-mo05-0000260789',
+ 'only_matching': True
+ }, {
+ 'url': 'https://video.gazzetta.it/video-embed/49612410-00ca-11eb-bcd8-30d4253e0140',
+ 'only_matching': True
+ }]
+ _WEBPAGE_TESTS = [{
+ 'url': 'https://www.iodonna.it/video-iodonna/personaggi-video/monica-bellucci-piu-del-lavoro-oggi-per-me-sono-importanti-lamicizia-e-la-famiglia/',
+ 'info_dict': {
+ 'id': 'iodonna-0002033648',
+ 'ext': 'mp4',
+ 'title': 'Monica Bellucci: «Più del lavoro, oggi per me sono importanti l\'amicizia e la famiglia»',
+ 'description': 'md5:daea6d9837351e56b1ab615c06bebac1',
+ 'uploader': 'rcs.it',
+ }
+ }]
+
+ @staticmethod
+ def _sanitize_url(url):
+ url = sanitize_url(url, scheme='https')
+ return urljoin(base_url(url), url_basename(url))
+
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ return map(cls._sanitize_url, super()._extract_embed_urls(url, webpage))
+
+
+class RCSIE(RCSBaseIE):
+ _VALID_URL = r'''(?x)https?://(?P<vid>video|viaggi)\.
+ (?P<cdn>
+ (?:
+ corrieredelmezzogiorno\.
+ |corrieredelveneto\.
+ |corrieredibologna\.
+ |corrierefiorentino\.
+ )?corriere\.it
+ |(?:gazzanet\.)?gazzetta\.it)
+ /(?!video-embed/)[^?#]+?/(?P<id>[^/\?]+)(?=\?|/$|$)'''
+ _TESTS = [{
+ # json iframe directly from id
+ 'url': 'https://video.corriere.it/sport/formula-1/vettel-guida-ferrari-sf90-mugello-suo-fianco-c-elecrerc-bendato-video-esilarante/b727632a-f9d0-11ea-91b0-38d50a849abb',
+ 'md5': '14946840dec46ecfddf66ba4eea7d2b2',
+ 'info_dict': {
+ 'id': 'b727632a-f9d0-11ea-91b0-38d50a849abb',
+ 'ext': 'mp4',
+ 'title': 'Vettel guida la Ferrari SF90 al Mugello e al suo fianco c\'è Leclerc (bendato): il video è esilarante',
+ 'description': 'md5:3915ce5ebb3d2571deb69a5eb85ac9b5',
+ 'uploader': 'Corriere Tv',
+ }
+ }, {
+ # search for video id inside the page
+ 'url': 'https://viaggi.corriere.it/video/norvegia-il-nuovo-ponte-spettacolare-sopra-la-cascata-di-voringsfossen/',
+ 'md5': 'f22a92d9e666e80f2fffbf2825359c81',
+ 'info_dict': {
+ 'id': '5b7cd134-e2c1-11ea-89b3-b56dd0df2aa2',
+ 'display_id': 'norvegia-il-nuovo-ponte-spettacolare-sopra-la-cascata-di-voringsfossen',
+ 'ext': 'mp4',
+ 'title': 'La nuova spettacolare attrazione in Norvegia: il ponte sopra Vøringsfossen',
+ 'description': 'md5:18b35a291f6746c0c8dacd16e5f5f4f8',
+ 'uploader': 'DOVE Viaggi',
+ }
+ }, {
+ # only audio format https://github.com/yt-dlp/yt-dlp/issues/5683
+ 'url': 'https://video.corriere.it/cronaca/audio-telefonata-il-papa-becciu-santita-lettera-che-mi-ha-inviato-condanna/b94c0d20-70c2-11ed-9572-e4b947a0ebd2',
+ 'md5': 'aaffb08d02f2ce4292a4654694c78150',
+ 'info_dict': {
+ 'id': 'b94c0d20-70c2-11ed-9572-e4b947a0ebd2',
+ 'ext': 'mp3',
+ 'title': 'L\'audio della telefonata tra il Papa e Becciu: «Santità, la lettera che mi ha inviato è una condanna»',
+ 'description': 'md5:c0ddb61bd94a8d4e0d4bb9cda50a689b',
+ 'uploader': 'Corriere Tv',
+ 'formats': [{'format_id': 'https-mp3', 'ext': 'mp3'}],
+ }
+ }, {
+ # old content still needs cdn migration
+ 'url': 'https://viaggi.corriere.it/video/milano-varallo-sesia-sul-treno-a-vapore/',
+ 'md5': '2dfdce7af249654ad27eeba03fe1e08d',
+ 'info_dict': {
+ 'id': 'd8f6c8d0-f7d7-11e8-bfca-f74cf4634191',
+ 'display_id': 'milano-varallo-sesia-sul-treno-a-vapore',
+ 'ext': 'mp4',
+ 'title': 'Milano-Varallo Sesia sul treno a vapore',
+ 'description': 'md5:6348f47aac230397fe341a74f7678d53',
+ 'uploader': 'DOVE Viaggi',
+ }
+ }, {
+ 'url': 'https://video.corriere.it/video-360/metro-copenaghen-tutta-italiana/a248a7f0-e2db-11e9-9830-af2de6b1f945',
+ 'only_matching': True
+ }]
+
+
+class RCSVariousIE(RCSBaseIE):
+ _VALID_URL = r'''(?x)https?://www\.
+ (?P<cdn>
+ leitv\.it|
+ youreporter\.it|
+ amica\.it
+ )/(?:[^/]+/)?(?P<id>[^/]+?)(?:$|\?|/)'''
+ _TESTS = [{
+ 'url': 'https://www.leitv.it/benessere/mal-di-testa/',
+ 'md5': '3b7a683d105a7313ec7513b014443631',
+ 'info_dict': {
+ 'id': 'leitv-0000125151',
+ 'display_id': 'mal-di-testa',
+ 'ext': 'mp4',
+ 'title': 'Cervicalgia e mal di testa, il video con i suggerimenti dell\'esperto',
+ 'description': 'md5:ae21418f34cee0b8d02a487f55bcabb5',
+ 'uploader': 'leitv.it',
+ }
+ }, {
+ 'url': 'https://www.youreporter.it/fiume-sesia-3-ottobre-2020/',
+ 'md5': '3989b6d603482611a2abd2f32b79f739',
+ 'info_dict': {
+ 'id': 'youreporter-0000332574',
+ 'display_id': 'fiume-sesia-3-ottobre-2020',
+ 'ext': 'mp4',
+ 'title': 'Fiume Sesia 3 ottobre 2020',
+ 'description': 'md5:0070eef1cc884d13c970a4125063de55',
+ 'uploader': 'youreporter.it',
+ }
+ }, {
+ 'url': 'https://www.amica.it/video-post/saint-omer-al-cinema-il-film-leone-dargento-che-ribalta-gli-stereotipi/',
+ 'md5': '187cce524dfd0343c95646c047375fc4',
+ 'info_dict': {
+ 'id': 'amica-0001225365',
+ 'display_id': 'saint-omer-al-cinema-il-film-leone-dargento-che-ribalta-gli-stereotipi',
+ 'ext': 'mp4',
+ 'title': '"Saint Omer": al cinema il film Leone d\'argento che ribalta gli stereotipi',
+ 'description': 'md5:b1c8869c2dcfd6073a2a311ba0008aa8',
+ 'uploader': 'rcs.it',
+ }
+ }]
diff --git a/yt_dlp/extractor/rcti.py b/yt_dlp/extractor/rcti.py
new file mode 100644
index 0000000..6a7c7f3
--- /dev/null
+++ b/yt_dlp/extractor/rcti.py
@@ -0,0 +1,373 @@
+import json
+import random
+import time
+
+from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ dict_get,
+ ExtractorError,
+ strip_or_none,
+ traverse_obj,
+ try_get
+)
+
+
+class RCTIPlusBaseIE(InfoExtractor):
+ def _real_initialize(self):
+ self._AUTH_KEY = self._download_json(
+ 'https://api.rctiplus.com/api/v1/visitor?platform=web', # platform can be web, mweb, android, ios
+ None, 'Fetching authorization key')['data']['access_token']
+
+ def _call_api(self, url, video_id, note=None):
+ json = self._download_json(
+ url, video_id, note=note, headers={'Authorization': self._AUTH_KEY})
+ if json.get('status', {}).get('code', 0) != 0:
+ raise ExtractorError(f'{self.IE_NAME} said: {json["status"]["message_client"]}', cause=json)
+ return json.get('data'), json.get('meta')
+
+
+class RCTIPlusIE(RCTIPlusBaseIE):
+ _VALID_URL = r'https?://www\.rctiplus\.com/(?:programs/\d+?/.*?/)?(?P<type>episode|clip|extra|live-event|missed-event)/(?P<id>\d+)/(?P<display_id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.rctiplus.com/programs/1259/kiko-untuk-lola/episode/22124/untuk-lola',
+ 'md5': '56ed45affad45fa18d5592a1bc199997',
+ 'info_dict': {
+ 'id': 'v_e22124',
+ 'title': 'Untuk Lola',
+ 'display_id': 'untuk-lola',
+ 'description': 'md5:2b809075c0b1e071e228ad6d13e41deb',
+ 'ext': 'mp4',
+ 'duration': 1400,
+ 'timestamp': 1615978800,
+ 'upload_date': '20210317',
+ 'series': 'Kiko : Untuk Lola',
+ 'season_number': 1,
+ 'episode_number': 1,
+ 'channel': 'RCTI',
+ },
+ 'params': {
+ 'fixup': 'never',
+ },
+ }, { # Clip; Series title doesn't appear on metadata JSON
+ 'url': 'https://www.rctiplus.com/programs/316/cahaya-terindah/clip/3921/make-a-wish',
+ 'md5': 'd179b2ff356f0e91a53bcc6a4d8504f0',
+ 'info_dict': {
+ 'id': 'v_c3921',
+ 'title': 'Make A Wish',
+ 'display_id': 'make-a-wish',
+ 'description': 'Make A Wish',
+ 'ext': 'mp4',
+ 'duration': 288,
+ 'timestamp': 1571652600,
+ 'upload_date': '20191021',
+ 'series': 'Cahaya Terindah',
+ 'channel': 'RCTI',
+ },
+ 'params': {
+ 'fixup': 'never',
+ },
+ }, { # Extra
+ 'url': 'https://www.rctiplus.com/programs/616/inews-malam/extra/9438/diungkapkan-melalui-surat-terbuka-ceo-ruangguru-belva-devara-mundur-dari-staf-khusus-presiden',
+ 'md5': 'c48106afdbce609749f5e0c007d9278a',
+ 'info_dict': {
+ 'id': 'v_ex9438',
+ 'title': 'md5:2ede828c0f8bde249e0912be150314ca',
+ 'display_id': 'md5:62b8d4e9ff096db527a1ad797e8a9933',
+ 'description': 'md5:2ede828c0f8bde249e0912be150314ca',
+ 'ext': 'mp4',
+ 'duration': 93,
+ 'timestamp': 1587561540,
+ 'upload_date': '20200422',
+ 'series': 'iNews Malam',
+ 'channel': 'INews',
+ },
+ }, { # Missed event/replay
+ 'url': 'https://www.rctiplus.com/missed-event/2507/mou-signing-ceremony-27-juli-2021-1400-wib',
+ 'md5': '649c5f27250faed1452ca8b91e06922d',
+ 'info_dict': {
+ 'id': 'v_pe2507',
+ 'title': 'MOU Signing Ceremony | 27 Juli 2021 | 14.00 WIB',
+ 'display_id': 'mou-signing-ceremony-27-juli-2021-1400-wib',
+ 'ext': 'mp4',
+ 'timestamp': 1627142400,
+ 'upload_date': '20210724',
+ 'was_live': True,
+ 'release_timestamp': 1627369200,
+ },
+ 'params': {
+ 'fixup': 'never',
+ },
+ }, { # Live event; Cloudfront CDN
+ 'url': 'https://www.rctiplus.com/live-event/2530/dai-muda-charging-imun-dengan-iman-4-agustus-2021-1600-wib',
+ 'info_dict': {
+ 'id': 'v_le2530',
+ 'title': 'Dai Muda : Charging Imun dengan Iman | 4 Agustus 2021 | 16.00 WIB',
+ 'display_id': 'dai-muda-charging-imun-dengan-iman-4-agustus-2021-1600-wib',
+ 'ext': 'mp4',
+ 'timestamp': 1627898400,
+ 'upload_date': '20210802',
+ 'release_timestamp': 1628067600,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'This live event has ended.',
+ }, { # TV; live_at is null
+ 'url': 'https://www.rctiplus.com/live-event/1/rcti',
+ 'info_dict': {
+ 'id': 'v_lt1',
+ 'title': 'RCTI',
+ 'display_id': 'rcti',
+ 'ext': 'mp4',
+ 'timestamp': 1546344000,
+ 'upload_date': '20190101',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+ _CONVIVA_JSON_TEMPLATE = {
+ 't': 'CwsSessionHb',
+ 'cid': 'ff84ae928c3b33064b76dec08f12500465e59a6f',
+ 'clid': '0',
+ 'sid': 0,
+ 'seq': 0,
+ 'caps': 0,
+ 'sf': 7,
+ 'sdk': True,
+ }
+
+ def _real_extract(self, url):
+ match = self._match_valid_url(url).groupdict()
+ video_type, video_id, display_id = match['type'], match['id'], match['display_id']
+
+ url_api_version = 'v2' if video_type == 'missed-event' else 'v1'
+ appier_id = '23984824_' + str(random.randint(0, 10000000000)) # Based on the webpage's uuidRandom generator
+ video_json = self._call_api(
+ f'https://api.rctiplus.com/api/{url_api_version}/{video_type}/{video_id}/url?appierid={appier_id}', display_id, 'Downloading video URL JSON')[0]
+ video_url = video_json['url']
+
+ is_upcoming = try_get(video_json, lambda x: x['current_date'] < x['live_at'])
+ if is_upcoming is None:
+ is_upcoming = try_get(video_json, lambda x: x['current_date'] < x['start_date'])
+ if is_upcoming:
+ self.raise_no_formats(
+ 'This event will start at %s.' % video_json['live_label'] if video_json.get('live_label') else 'This event has not started yet.', expected=True)
+ if 'akamaized' in video_url:
+ # For some videos hosted on Akamai's CDN (possibly AES-encrypted ones?), a session needs to at least be made via Conviva's API
+ conviva_json_data = {
+ **self._CONVIVA_JSON_TEMPLATE,
+ 'url': video_url,
+ 'sst': int(time.time())
+ }
+ conviva_json_res = self._download_json(
+ 'https://ff84ae928c3b33064b76dec08f12500465e59a6f.cws.conviva.com/0/wsg', display_id,
+ 'Creating Conviva session', 'Failed to create Conviva session',
+ fatal=False, data=json.dumps(conviva_json_data).encode('utf-8'))
+ if conviva_json_res and conviva_json_res.get('err') != 'ok':
+ self.report_warning('Conviva said: %s' % str(conviva_json_res.get('err')))
+
+ video_meta, meta_paths = self._call_api(
+ 'https://api.rctiplus.com/api/v1/%s/%s' % (video_type, video_id), display_id, 'Downloading video metadata')
+
+ thumbnails, image_path = [], meta_paths.get('image_path', 'https://rstatic.akamaized.net/media/')
+ if video_meta.get('portrait_image'):
+ thumbnails.append({
+ 'id': 'portrait_image',
+ 'url': '%s%d%s' % (image_path, 2000, video_meta['portrait_image']) # 2000px seems to be the highest resolution that can be given
+ })
+ if video_meta.get('landscape_image'):
+ thumbnails.append({
+ 'id': 'landscape_image',
+ 'url': '%s%d%s' % (image_path, 2000, video_meta['landscape_image'])
+ })
+ try:
+ formats = self._extract_m3u8_formats(video_url, display_id, 'mp4', headers={'Referer': 'https://www.rctiplus.com/'})
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 403:
+ self.raise_geo_restricted(countries=['ID'], metadata_available=True)
+ else:
+ raise e
+ for f in formats:
+ if 'akamaized' in f['url'] or 'cloudfront' in f['url']:
+ f.setdefault('http_headers', {})['Referer'] = 'https://www.rctiplus.com/' # Referer header is required for akamai/cloudfront CDNs
+
+ return {
+ 'id': video_meta.get('product_id') or video_json.get('product_id'),
+ 'title': dict_get(video_meta, ('title', 'name')) or dict_get(video_json, ('content_name', 'assets_name')),
+ 'display_id': display_id,
+ 'description': video_meta.get('summary'),
+ 'timestamp': video_meta.get('release_date') or video_json.get('start_date'),
+ 'duration': video_meta.get('duration'),
+ 'categories': [video_meta['genre']] if video_meta.get('genre') else None,
+ 'average_rating': video_meta.get('star_rating'),
+ 'series': video_meta.get('program_title') or video_json.get('program_title'),
+ 'season_number': video_meta.get('season'),
+ 'episode_number': video_meta.get('episode'),
+ 'channel': video_json.get('tv_name'),
+ 'channel_id': video_json.get('tv_id'),
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'is_live': video_type == 'live-event' and not is_upcoming,
+ 'was_live': video_type == 'missed-event',
+ 'live_status': 'is_upcoming' if is_upcoming else None,
+ 'release_timestamp': video_json.get('live_at'),
+ }
+
+
+class RCTIPlusSeriesIE(RCTIPlusBaseIE):
+ _VALID_URL = r'https?://www\.rctiplus\.com/programs/(?P<id>\d+)/(?P<display_id>[^/?#&]+)(?:/(?P<type>episodes|extras|clips))?'
+ _TESTS = [{
+ 'url': 'https://www.rctiplus.com/programs/829/putri-untuk-pangeran',
+ 'playlist_mincount': 1019,
+ 'info_dict': {
+ 'id': '829',
+ 'title': 'Putri Untuk Pangeran',
+ 'description': 'md5:aca7b54d05bd95a67d4f4613cc1d622d',
+ 'age_limit': 2,
+ 'cast': ['Verrel Bramasta', 'Ranty Maria', 'Riza Syah', 'Ivan Fadilla', 'Nicole Parham', 'Dll', 'Aviv Elham'],
+ 'display_id': 'putri-untuk-pangeran',
+ 'tags': 'count:18',
+ },
+ }, { # No episodes
+ 'url': 'https://www.rctiplus.com/programs/615/inews-pagi',
+ 'playlist_mincount': 388,
+ 'info_dict': {
+ 'id': '615',
+ 'title': 'iNews Pagi',
+ 'description': 'md5:f18ee3d4643cfb41c358e5a9b693ee04',
+ 'age_limit': 2,
+ 'tags': 'count:11',
+ 'display_id': 'inews-pagi',
+ }
+ }]
+ _AGE_RATINGS = { # Based off https://id.wikipedia.org/wiki/Sistem_rating_konten_televisi with additional ratings
+ 'S-SU': 2,
+ 'SU': 2,
+ 'P': 2,
+ 'A': 7,
+ 'R': 13,
+ 'R-R/1': 17, # Labelled as 17+ despite being R
+ 'D': 18,
+ }
+
+ @classmethod
+ def suitable(cls, url):
+ return False if RCTIPlusIE.suitable(url) else super(RCTIPlusSeriesIE, cls).suitable(url)
+
+ def _entries(self, url, display_id=None, note='Downloading entries JSON', metadata={}):
+ total_pages = 0
+ try:
+ total_pages = self._call_api(
+ '%s&length=20&page=0' % url,
+ display_id, note)[1]['pagination']['total_page']
+ except ExtractorError as e:
+ if 'not found' in str(e):
+ return []
+ raise e
+ if total_pages <= 0:
+ return []
+
+ for page_num in range(1, total_pages + 1):
+ episode_list = self._call_api(
+ '%s&length=20&page=%s' % (url, page_num),
+ display_id, '%s page %s' % (note, page_num))[0] or []
+
+ for video_json in episode_list:
+ yield {
+ '_type': 'url',
+ 'url': video_json['share_link'],
+ 'ie_key': RCTIPlusIE.ie_key(),
+ 'id': video_json.get('product_id'),
+ 'title': video_json.get('title'),
+ 'display_id': video_json.get('title_code').replace('_', '-'),
+ 'description': video_json.get('summary'),
+ 'timestamp': video_json.get('release_date'),
+ 'duration': video_json.get('duration'),
+ 'season_number': video_json.get('season'),
+ 'episode_number': video_json.get('episode'),
+ **metadata
+ }
+
+ def _series_entries(self, series_id, display_id=None, video_type=None, metadata={}):
+ if not video_type or video_type in 'episodes':
+ try:
+ seasons_list = self._call_api(
+ f'https://api.rctiplus.com/api/v1/program/{series_id}/season',
+ display_id, 'Downloading seasons list JSON')[0]
+ except ExtractorError as e:
+ if 'not found' not in str(e):
+ raise
+ seasons_list = []
+ for season in seasons_list:
+ yield from self._entries(
+ f'https://api.rctiplus.com/api/v2/program/{series_id}/episode?season={season["season"]}',
+ display_id, f'Downloading season {season["season"]} episode entries', metadata)
+ if not video_type or video_type in 'extras':
+ yield from self._entries(
+ f'https://api.rctiplus.com/api/v2/program/{series_id}/extra?content_id=0',
+ display_id, 'Downloading extra entries', metadata)
+ if not video_type or video_type in 'clips':
+ yield from self._entries(
+ f'https://api.rctiplus.com/api/v2/program/{series_id}/clip?content_id=0',
+ display_id, 'Downloading clip entries', metadata)
+
+ def _real_extract(self, url):
+ series_id, display_id, video_type = self._match_valid_url(url).group('id', 'display_id', 'type')
+ if video_type:
+ self.report_warning(
+ f'Only {video_type} will be downloaded. '
+ f'To download everything from the series, remove "/{video_type}" from the URL')
+
+ series_meta, meta_paths = self._call_api(
+ f'https://api.rctiplus.com/api/v1/program/{series_id}/detail', display_id, 'Downloading series metadata')
+ metadata = {
+ 'age_limit': try_get(series_meta, lambda x: self._AGE_RATINGS[x['age_restriction'][0]['code']]),
+ 'cast': traverse_obj(series_meta, (('starring', 'creator', 'writer'), ..., 'name'),
+ expected_type=lambda x: strip_or_none(x) or None),
+ 'tags': traverse_obj(series_meta, ('tag', ..., 'name'),
+ expected_type=lambda x: strip_or_none(x) or None),
+ }
+ return self.playlist_result(
+ self._series_entries(series_id, display_id, video_type, metadata), series_id,
+ series_meta.get('title'), series_meta.get('summary'), display_id=display_id, **metadata)
+
+
+class RCTIPlusTVIE(RCTIPlusBaseIE):
+ _VALID_URL = r'https?://www\.rctiplus\.com/((tv/(?P<tvname>\w+))|(?P<eventname>live-event|missed-event))'
+ _TESTS = [{
+ 'url': 'https://www.rctiplus.com/tv/rcti',
+ 'info_dict': {
+ 'id': 'v_lt1',
+ 'title': 'RCTI',
+ 'ext': 'mp4',
+ 'timestamp': 1546344000,
+ 'upload_date': '20190101',
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }, {
+ # Returned video will always change
+ 'url': 'https://www.rctiplus.com/live-event',
+ 'only_matching': True,
+ }, {
+ # Returned video will also always change
+ 'url': 'https://www.rctiplus.com/missed-event',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if RCTIPlusIE.suitable(url) else super(RCTIPlusTVIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ match = self._match_valid_url(url).groupdict()
+ tv_id = match.get('tvname') or match.get('eventname')
+ webpage = self._download_webpage(url, tv_id)
+ video_type, video_id = self._search_regex(
+ r'url\s*:\s*["\']https://api\.rctiplus\.com/api/v./(?P<type>[^/]+)/(?P<id>\d+)/url',
+ webpage, 'video link', group=('type', 'id'))
+ return self.url_result(f'https://www.rctiplus.com/{video_type}/{video_id}/{tv_id}', 'RCTIPlus')
diff --git a/yt_dlp/extractor/rds.py b/yt_dlp/extractor/rds.py
new file mode 100644
index 0000000..1a1c663
--- /dev/null
+++ b/yt_dlp/extractor/rds.py
@@ -0,0 +1,68 @@
+from .common import InfoExtractor
+from ..utils import (
+ parse_duration,
+ parse_iso8601,
+ js_to_json,
+)
+from ..compat import compat_str
+
+
+class RDSIE(InfoExtractor):
+ _WORKING = False
+ IE_DESC = 'RDS.ca'
+ _VALID_URL = r'https?://(?:www\.)?rds\.ca/vid(?:[eé]|%C3%A9)os/(?:[^/]+/)*(?P<id>[^/]+)-\d+\.\d+'
+
+ _TESTS = [{
+ # has two 9c9media ContentPackages, the web player selects the first ContentPackage
+ 'url': 'https://www.rds.ca/videos/Hockey/NationalHockeyLeague/teams/9/forum-du-5-a-7-jesperi-kotkaniemi-de-retour-de-finlande-3.1377606',
+ 'info_dict': {
+ 'id': '2083309',
+ 'display_id': 'forum-du-5-a-7-jesperi-kotkaniemi-de-retour-de-finlande',
+ 'ext': 'flv',
+ 'title': 'Forum du 5 à 7 : Kotkaniemi de retour de Finlande',
+ 'description': 'md5:83fa38ecc4a79b19e433433254077f25',
+ 'timestamp': 1606129030,
+ 'upload_date': '20201123',
+ 'duration': 773.039,
+ }
+ }, {
+ 'url': 'http://www.rds.ca/vid%C3%A9os/un-voyage-positif-3.877934',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ item = self._parse_json(self._search_regex(r'(?s)itemToPush\s*=\s*({.+?});', webpage, 'item'), display_id, js_to_json)
+ video_id = compat_str(item['id'])
+ title = item.get('title') or self._og_search_title(webpage) or self._html_search_meta(
+ 'title', webpage, 'title', fatal=True)
+ description = self._og_search_description(webpage) or self._html_search_meta(
+ 'description', webpage, 'description')
+ thumbnail = item.get('urlImageBig') or self._og_search_thumbnail(webpage) or self._search_regex(
+ [r'<link[^>]+itemprop="thumbnailUrl"[^>]+href="([^"]+)"',
+ r'<span[^>]+itemprop="thumbnailUrl"[^>]+content="([^"]+)"'],
+ webpage, 'thumbnail', fatal=False)
+ timestamp = parse_iso8601(self._search_regex(
+ r'<span[^>]+itemprop="uploadDate"[^>]+content="([^"]+)"',
+ webpage, 'upload date', fatal=False))
+ duration = parse_duration(self._search_regex(
+ r'<span[^>]+itemprop="duration"[^>]+content="([^"]+)"',
+ webpage, 'duration', fatal=False))
+ age_limit = self._family_friendly_search(webpage)
+
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'display_id': display_id,
+ 'url': '9c9media:rds_web:%s' % video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'age_limit': age_limit,
+ 'ie_key': 'NineCNineMedia',
+ }
diff --git a/yt_dlp/extractor/redbee.py b/yt_dlp/extractor/redbee.py
new file mode 100644
index 0000000..4d71133
--- /dev/null
+++ b/yt_dlp/extractor/redbee.py
@@ -0,0 +1,380 @@
+import json
+import re
+import time
+import urllib.parse
+import uuid
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ strip_or_none,
+ traverse_obj,
+ try_call,
+ unified_timestamp,
+)
+
+
+class RedBeeBaseIE(InfoExtractor):
+ _DEVICE_ID = str(uuid.uuid4())
+
+ @property
+ def _API_URL(self):
+ """
+ Ref: https://apidocs.emp.ebsd.ericsson.net
+ Subclasses must set _REDBEE_CUSTOMER, _REDBEE_BUSINESS_UNIT
+ """
+ return f'https://exposure.api.redbee.live/v2/customer/{self._REDBEE_CUSTOMER}/businessunit/{self._REDBEE_BUSINESS_UNIT}'
+
+ def _get_bearer_token(self, asset_id, jwt=None):
+ request = {
+ 'deviceId': self._DEVICE_ID,
+ 'device': {
+ 'deviceId': self._DEVICE_ID,
+ 'name': 'Mozilla Firefox 102',
+ 'type': 'WEB',
+ },
+ }
+ if jwt:
+ request['jwt'] = jwt
+
+ return self._download_json(
+ f'{self._API_URL}/auth/{"gigyaLogin" if jwt else "anonymous"}',
+ asset_id, data=json.dumps(request).encode('utf-8'), headers={
+ 'Content-Type': 'application/json;charset=utf-8'
+ })['sessionToken']
+
+ def _get_formats_and_subtitles(self, asset_id, **kwargs):
+ bearer_token = self._get_bearer_token(asset_id, **kwargs)
+ api_response = self._download_json(
+ f'{self._API_URL}/entitlement/{asset_id}/play',
+ asset_id, headers={
+ 'Authorization': f'Bearer {bearer_token}',
+ 'Accept': 'application/json, text/plain, */*'
+ })
+
+ formats, subtitles = [], {}
+ for format in api_response['formats']:
+ if not format.get('mediaLocator'):
+ continue
+
+ fmts, subs = [], {}
+ if format.get('format') == 'DASH':
+ fmts, subs = self._extract_mpd_formats_and_subtitles(
+ format['mediaLocator'], asset_id, fatal=False)
+ elif format.get('format') == 'SMOOTHSTREAMING':
+ fmts, subs = self._extract_ism_formats_and_subtitles(
+ format['mediaLocator'], asset_id, fatal=False)
+ elif format.get('format') == 'HLS':
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ format['mediaLocator'], asset_id, fatal=False)
+
+ if format.get('drm'):
+ for f in fmts:
+ f['has_drm'] = True
+
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+
+ return formats, subtitles
+
+
+class ParliamentLiveUKIE(RedBeeBaseIE):
+ IE_NAME = 'parliamentlive.tv'
+ IE_DESC = 'UK parliament videos'
+ _VALID_URL = r'(?i)https?://(?:www\.)?parliamentlive\.tv/Event/Index/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+
+ _REDBEE_CUSTOMER = 'UKParliament'
+ _REDBEE_BUSINESS_UNIT = 'ParliamentLive'
+
+ _TESTS = [{
+ 'url': 'http://parliamentlive.tv/Event/Index/c1e9d44d-fd6c-4263-b50f-97ed26cc998b',
+ 'info_dict': {
+ 'id': 'c1e9d44d-fd6c-4263-b50f-97ed26cc998b',
+ 'ext': 'mp4',
+ 'title': 'Home Affairs Committee',
+ 'timestamp': 1395153872,
+ 'upload_date': '20140318',
+ 'thumbnail': r're:https?://[^?#]+c1e9d44d-fd6c-4263-b50f-97ed26cc998b[^/]*/thumbnail',
+ },
+ }, {
+ 'url': 'http://parliamentlive.tv/event/index/3f24936f-130f-40bf-9a5d-b3d6479da6a4',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://parliamentlive.tv/Event/Index/27cf25e4-e77b-42a3-93c5-c815cd6d7377',
+ 'info_dict': {
+ 'id': '27cf25e4-e77b-42a3-93c5-c815cd6d7377',
+ 'ext': 'mp4',
+ 'title': 'House of Commons',
+ 'timestamp': 1658392447,
+ 'upload_date': '20220721',
+ 'thumbnail': r're:https?://[^?#]+27cf25e4-e77b-42a3-93c5-c815cd6d7377[^/]*/thumbnail',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ formats, subtitles = self._get_formats_and_subtitles(video_id)
+
+ video_info = self._download_json(
+ f'https://www.parliamentlive.tv/Event/GetShareVideo/{video_id}', video_id, fatal=False)
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'title': traverse_obj(video_info, ('event', 'title')),
+ 'thumbnail': traverse_obj(video_info, 'thumbnailUrl'),
+ 'timestamp': traverse_obj(
+ video_info, ('event', 'publishedStartTime'), expected_type=unified_timestamp),
+ '_format_sort_fields': ('res', 'proto'),
+ }
+
+
+class RTBFIE(RedBeeBaseIE):
+ _WORKING = False
+ _VALID_URL = r'''(?x)
+ https?://(?:www\.)?rtbf\.be/
+ (?:
+ video/[^?]+\?.*\bid=|
+ ouftivi/(?:[^/]+/)*[^?]+\?.*\bvideoId=|
+ auvio/[^/]+\?.*\b(?P<live>l)?id=
+ )(?P<id>\d+)'''
+ _NETRC_MACHINE = 'rtbf'
+
+ _REDBEE_CUSTOMER = 'RTBF'
+ _REDBEE_BUSINESS_UNIT = 'Auvio'
+
+ _TESTS = [{
+ 'url': 'https://www.rtbf.be/video/detail_les-diables-au-coeur-episode-2?id=1921274',
+ 'md5': '8c876a1cceeb6cf31b476461ade72384',
+ 'info_dict': {
+ 'id': '1921274',
+ 'ext': 'mp4',
+ 'title': 'Les Diables au coeur (épisode 2)',
+ 'description': '(du 25/04/2014)',
+ 'duration': 3099.54,
+ 'upload_date': '20140425',
+ 'timestamp': 1398456300,
+ },
+ 'skip': 'No longer available',
+ }, {
+ # geo restricted
+ 'url': 'http://www.rtbf.be/ouftivi/heros/detail_scooby-doo-mysteres-associes?id=1097&videoId=2057442',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.rtbf.be/ouftivi/niouzz?videoId=2055858',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.rtbf.be/auvio/detail_jeudi-en-prime-siegfried-bracke?id=2102996',
+ 'only_matching': True,
+ }, {
+ # Live
+ 'url': 'https://www.rtbf.be/auvio/direct_pure-fm?lid=134775',
+ 'only_matching': True,
+ }, {
+ # Audio
+ 'url': 'https://www.rtbf.be/auvio/detail_cinq-heures-cinema?id=2360811',
+ 'only_matching': True,
+ }, {
+ # With Subtitle
+ 'url': 'https://www.rtbf.be/auvio/detail_les-carnets-du-bourlingueur?id=2361588',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.rtbf.be/auvio/detail_investigation?id=2921926',
+ 'md5': 'd5d11bb62169fef38d7ce7ac531e034f',
+ 'info_dict': {
+ 'id': '2921926',
+ 'ext': 'mp4',
+ 'title': 'Le handicap un confinement perpétuel - Maladie de Lyme',
+ 'description': 'md5:dcbd5dcf6015488c9069b057c15ccc52',
+ 'duration': 5258.8,
+ 'upload_date': '20220727',
+ 'timestamp': 1658934000,
+ 'series': '#Investigation',
+ 'thumbnail': r're:^https?://[^?&]+\.jpg$',
+ },
+ }, {
+ 'url': 'https://www.rtbf.be/auvio/detail_la-belgique-criminelle?id=2920492',
+ 'md5': '054f9f143bc79c89647c35e5a7d35fa8',
+ 'info_dict': {
+ 'id': '2920492',
+ 'ext': 'mp4',
+ 'title': '04 - Le crime de la rue Royale',
+ 'description': 'md5:0c3da1efab286df83f2ab3f8f96bd7a6',
+ 'duration': 1574.6,
+ 'upload_date': '20220723',
+ 'timestamp': 1658596887,
+ 'series': 'La Belgique criminelle - TV',
+ 'thumbnail': r're:^https?://[^?&]+\.jpg$',
+ },
+ }]
+
+ _IMAGE_HOST = 'http://ds1.ds.static.rtbf.be'
+ _PROVIDERS = {
+ 'YOUTUBE': 'Youtube',
+ 'DAILYMOTION': 'Dailymotion',
+ 'VIMEO': 'Vimeo',
+ }
+ _QUALITIES = [
+ ('mobile', 'SD'),
+ ('web', 'MD'),
+ ('high', 'HD'),
+ ]
+ _LOGIN_URL = 'https://login.rtbf.be/accounts.login'
+ _GIGYA_API_KEY = '3_kWKuPgcdAybqnqxq_MvHVk0-6PN8Zk8pIIkJM_yXOu-qLPDDsGOtIDFfpGivtbeO'
+ _LOGIN_COOKIE_ID = f'glt_{_GIGYA_API_KEY}'
+
+ def _perform_login(self, username, password):
+ if self._get_cookies(self._LOGIN_URL).get(self._LOGIN_COOKIE_ID):
+ return
+
+ self._set_cookie('.rtbf.be', 'gmid', 'gmid.ver4', secure=True, expire_time=time.time() + 3600)
+
+ login_response = self._download_json(
+ self._LOGIN_URL, None, data=urllib.parse.urlencode({
+ 'loginID': username,
+ 'password': password,
+ 'APIKey': self._GIGYA_API_KEY,
+ 'targetEnv': 'jssdk',
+ 'sessionExpiration': '-2',
+ }).encode('utf-8'), headers={
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ })
+
+ if login_response['statusCode'] != 200:
+ raise ExtractorError('Login failed. Server message: %s' % login_response['errorMessage'], expected=True)
+
+ self._set_cookie('.rtbf.be', self._LOGIN_COOKIE_ID, login_response['sessionInfo']['login_token'],
+ secure=True, expire_time=time.time() + 3600)
+
+ def _get_formats_and_subtitles(self, url, media_id):
+ login_token = self._get_cookies(url).get(self._LOGIN_COOKIE_ID)
+ if not login_token:
+ self.raise_login_required()
+
+ session_jwt = try_call(lambda: self._get_cookies(url)['rtbf_jwt'].value) or self._download_json(
+ 'https://login.rtbf.be/accounts.getJWT', media_id, query={
+ 'login_token': login_token.value,
+ 'APIKey': self._GIGYA_API_KEY,
+ 'sdk': 'js_latest',
+ 'authMode': 'cookie',
+ 'pageURL': url,
+ 'sdkBuild': '13273',
+ 'format': 'json',
+ })['id_token']
+
+ return super()._get_formats_and_subtitles(media_id, jwt=session_jwt)
+
+ def _real_extract(self, url):
+ live, media_id = self._match_valid_url(url).groups()
+ embed_page = self._download_webpage(
+ 'https://www.rtbf.be/auvio/embed/' + ('direct' if live else 'media'),
+ media_id, query={'id': media_id})
+
+ media_data = self._html_search_regex(r'data-media="([^"]+)"', embed_page, 'media data', fatal=False)
+ if not media_data:
+ if re.search(r'<div[^>]+id="js-error-expired"[^>]+class="(?![^"]*hidden)', embed_page):
+ raise ExtractorError('Livestream has ended.', expected=True)
+ if re.search(r'<div[^>]+id="js-sso-connect"[^>]+class="(?![^"]*hidden)', embed_page):
+ self.raise_login_required()
+
+ raise ExtractorError('Could not find media data')
+
+ data = self._parse_json(media_data, media_id)
+
+ error = data.get('error')
+ if error:
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
+
+ provider = data.get('provider')
+ if provider in self._PROVIDERS:
+ return self.url_result(data['url'], self._PROVIDERS[provider])
+
+ title = traverse_obj(data, 'subtitle', 'title')
+ is_live = data.get('isLive')
+ height_re = r'-(\d+)p\.'
+ formats, subtitles = [], {}
+
+ # The old api still returns m3u8 and mpd manifest for livestreams, but these are 'fake'
+ # since all they contain is a 20s video that is completely unrelated.
+ # https://github.com/yt-dlp/yt-dlp/issues/4656#issuecomment-1214461092
+ m3u8_url = None if data.get('isLive') else traverse_obj(data, 'urlHlsAes128', 'urlHls')
+ if m3u8_url:
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+
+ fix_url = lambda x: x.replace('//rtbf-vod.', '//rtbf.') if '/geo/drm/' in x else x
+ http_url = data.get('url')
+ if formats and http_url and re.search(height_re, http_url):
+ http_url = fix_url(http_url)
+ for m3u8_f in formats[:]:
+ height = m3u8_f.get('height')
+ if not height:
+ continue
+ f = m3u8_f.copy()
+ del f['protocol']
+ f.update({
+ 'format_id': m3u8_f['format_id'].replace('hls-', 'http-'),
+ 'url': re.sub(height_re, '-%dp.' % height, http_url),
+ })
+ formats.append(f)
+ else:
+ sources = data.get('sources') or {}
+ for key, format_id in self._QUALITIES:
+ format_url = sources.get(key)
+ if not format_url:
+ continue
+ height = int_or_none(self._search_regex(
+ height_re, format_url, 'height', default=None))
+ formats.append({
+ 'format_id': format_id,
+ 'url': fix_url(format_url),
+ 'height': height,
+ })
+
+ mpd_url = None if data.get('isLive') else data.get('urlDash')
+ if mpd_url and (self.get_param('allow_unplayable_formats') or not data.get('drm')):
+ fmts, subs = self._extract_mpd_formats_and_subtitles(
+ mpd_url, media_id, mpd_id='dash', fatal=False)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+
+ audio_url = data.get('urlAudio')
+ if audio_url:
+ formats.append({
+ 'format_id': 'audio',
+ 'url': audio_url,
+ 'vcodec': 'none',
+ })
+
+ for track in (data.get('tracks') or {}).values():
+ sub_url = track.get('url')
+ if not sub_url:
+ continue
+ subtitles.setdefault(track.get('lang') or 'fr', []).append({
+ 'url': sub_url,
+ })
+
+ if not formats:
+ fmts, subs = self._get_formats_and_subtitles(url, f'live_{media_id}' if is_live else media_id)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+
+ return {
+ 'id': media_id,
+ 'formats': formats,
+ 'title': title,
+ 'description': strip_or_none(data.get('description')),
+ 'thumbnail': data.get('thumbnail'),
+ 'duration': float_or_none(data.get('realDuration')),
+ 'timestamp': int_or_none(data.get('liveFrom')),
+ 'series': data.get('programLabel'),
+ 'subtitles': subtitles,
+ 'is_live': is_live,
+ '_format_sort_fields': ('res', 'proto'),
+ }
diff --git a/yt_dlp/extractor/redbulltv.py b/yt_dlp/extractor/redbulltv.py
new file mode 100644
index 0000000..d1de249
--- /dev/null
+++ b/yt_dlp/extractor/redbulltv.py
@@ -0,0 +1,224 @@
+from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ float_or_none,
+ ExtractorError,
+)
+
+
+class RedBullTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?redbull(?:\.tv|\.com(?:/[^/]+)?(?:/tv)?)(?:/events/[^/]+)?/(?:videos?|live|(?:film|episode)s)/(?P<id>AP-\w+)'
+ _TESTS = [{
+ # film
+ 'url': 'https://www.redbull.tv/video/AP-1Q6XCDTAN1W11',
+ 'md5': 'fb0445b98aa4394e504b413d98031d1f',
+ 'info_dict': {
+ 'id': 'AP-1Q6XCDTAN1W11',
+ 'ext': 'mp4',
+ 'title': 'ABC of... WRC - ABC of... S1E6',
+ 'description': 'md5:5c7ed8f4015c8492ecf64b6ab31e7d31',
+ 'duration': 1582.04,
+ },
+ }, {
+ # episode
+ 'url': 'https://www.redbull.tv/video/AP-1PMHKJFCW1W11',
+ 'info_dict': {
+ 'id': 'AP-1PMHKJFCW1W11',
+ 'ext': 'mp4',
+ 'title': 'Grime - Hashtags S2E4',
+ 'description': 'md5:5546aa612958c08a98faaad4abce484d',
+ 'duration': 904,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.redbull.com/int-en/tv/video/AP-1UWHCAR9S1W11/rob-meets-sam-gaze?playlist=playlists::3f81040a-2f31-4832-8e2e-545b1d39d173',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.redbull.com/us-en/videos/AP-1YM9QCYE52111',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.redbull.com/us-en/events/AP-1XV2K61Q51W11/live/AP-1XUJ86FDH1W11',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.redbull.com/int-en/films/AP-1ZSMAW8FH2111',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.redbull.com/int-en/episodes/AP-1TQWK7XE11W11',
+ 'only_matching': True,
+ }]
+
+ def extract_info(self, video_id):
+ session = self._download_json(
+ 'https://api.redbull.tv/v3/session', video_id,
+ note='Downloading access token', query={
+ 'category': 'personal_computer',
+ 'os_family': 'http',
+ })
+ if session.get('code') == 'error':
+ raise ExtractorError('%s said: %s' % (
+ self.IE_NAME, session['message']))
+ token = session['token']
+
+ try:
+ video = self._download_json(
+ 'https://api.redbull.tv/v3/products/' + video_id,
+ video_id, note='Downloading video information',
+ headers={'Authorization': token}
+ )
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 404:
+ error_message = self._parse_json(
+ e.cause.response.read().decode(), video_id)['error']
+ raise ExtractorError('%s said: %s' % (
+ self.IE_NAME, error_message), expected=True)
+ raise
+
+ title = video['title'].strip()
+
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ 'https://dms.redbull.tv/v3/%s/%s/playlist.m3u8' % (video_id, token),
+ video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
+
+ for resource in video.get('resources', []):
+ if resource.startswith('closed_caption_'):
+ splitted_resource = resource.split('_')
+ if splitted_resource[2]:
+ subtitles.setdefault('en', []).append({
+ 'url': 'https://resources.redbull.tv/%s/%s' % (video_id, resource),
+ 'ext': splitted_resource[2],
+ })
+
+ subheading = video.get('subheading')
+ if subheading:
+ title += ' - %s' % subheading
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video.get('long_description') or video.get(
+ 'short_description'),
+ 'duration': float_or_none(video.get('duration'), scale=1000),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self.extract_info(video_id)
+
+
+class RedBullEmbedIE(RedBullTVIE): # XXX: Do not subclass from concrete IE
+ _VALID_URL = r'https?://(?:www\.)?redbull\.com/embed/(?P<id>rrn:content:[^:]+:[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}:[a-z]{2}-[A-Z]{2,3})'
+ _TESTS = [{
+ # HLS manifest accessible only using assetId
+ 'url': 'https://www.redbull.com/embed/rrn:content:episode-videos:f3021f4f-3ed4-51ac-915a-11987126e405:en-INT',
+ 'only_matching': True,
+ }]
+ _VIDEO_ESSENSE_TMPL = '''... on %s {
+ videoEssence {
+ attributes
+ }
+ }'''
+
+ def _real_extract(self, url):
+ rrn_id = self._match_id(url)
+ asset_id = self._download_json(
+ 'https://edge-graphql.crepo-production.redbullaws.com/v1/graphql',
+ rrn_id, headers={
+ 'Accept': 'application/json',
+ 'API-KEY': 'e90a1ff11335423998b100c929ecc866',
+ }, query={
+ 'query': '''{
+ resource(id: "%s", enforceGeoBlocking: false) {
+ %s
+ %s
+ }
+}''' % (rrn_id, self._VIDEO_ESSENSE_TMPL % 'LiveVideo', self._VIDEO_ESSENSE_TMPL % 'VideoResource'),
+ })['data']['resource']['videoEssence']['attributes']['assetId']
+ return self.extract_info(asset_id)
+
+
+class RedBullTVRrnContentIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?redbull\.com/(?P<region>[a-z]{2,3})-(?P<lang>[a-z]{2})/tv/(?:video|live|film)/(?P<id>rrn:content:[^:]+:[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+ _TESTS = [{
+ 'url': 'https://www.redbull.com/int-en/tv/video/rrn:content:live-videos:e3e6feb4-e95f-50b7-962a-c70f8fd13c73/mens-dh-finals-fort-william',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.redbull.com/int-en/tv/video/rrn:content:videos:a36a0f36-ff1b-5db8-a69d-ee11a14bf48b/tn-ts-style?playlist=rrn:content:event-profiles:83f05926-5de8-5389-b5e4-9bb312d715e8:extras',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.redbull.com/int-en/tv/film/rrn:content:films:d1f4d00e-4c04-5d19-b510-a805ffa2ab83/follow-me',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ region, lang, rrn_id = self._match_valid_url(url).groups()
+ rrn_id += ':%s-%s' % (lang, region.upper())
+ return self.url_result(
+ 'https://www.redbull.com/embed/' + rrn_id,
+ RedBullEmbedIE.ie_key(), rrn_id)
+
+
+class RedBullIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?redbull\.com/(?P<region>[a-z]{2,3})-(?P<lang>[a-z]{2})/(?P<type>(?:episode|film|(?:(?:recap|trailer)-)?video)s|live)/(?!AP-|rrn:content:)(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.redbull.com/int-en/episodes/grime-hashtags-s02-e04',
+ 'md5': 'db8271a7200d40053a1809ed0dd574ff',
+ 'info_dict': {
+ 'id': 'AA-1MT8DQWA91W14',
+ 'ext': 'mp4',
+ 'title': 'Grime - Hashtags S2E4',
+ 'description': 'md5:5546aa612958c08a98faaad4abce484d',
+ },
+ }, {
+ 'url': 'https://www.redbull.com/int-en/films/kilimanjaro-mountain-of-greatness',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.redbull.com/int-en/recap-videos/uci-mountain-bike-world-cup-2017-mens-xco-finals-from-vallnord',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.redbull.com/int-en/trailer-videos/kings-of-content',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.redbull.com/int-en/videos/tnts-style-red-bull-dance-your-style-s1-e12',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.redbull.com/int-en/live/mens-dh-finals-fort-william',
+ 'only_matching': True,
+ }, {
+ # only available on the int-en website so a fallback is need for the API
+ # https://www.redbull.com/v3/api/graphql/v1/v3/query/en-GB>en-INT?filter[uriSlug]=fia-wrc-saturday-recap-estonia&rb3Schema=v1:hero
+ 'url': 'https://www.redbull.com/gb-en/live/fia-wrc-saturday-recap-estonia',
+ 'only_matching': True,
+ }]
+ _INT_FALLBACK_LIST = ['de', 'en', 'es', 'fr']
+ _LAT_FALLBACK_MAP = ['ar', 'bo', 'car', 'cl', 'co', 'mx', 'pe']
+
+ def _real_extract(self, url):
+ region, lang, filter_type, display_id = self._match_valid_url(url).groups()
+ if filter_type == 'episodes':
+ filter_type = 'episode-videos'
+ elif filter_type == 'live':
+ filter_type = 'live-videos'
+
+ regions = [region.upper()]
+ if region != 'int':
+ if region in self._LAT_FALLBACK_MAP:
+ regions.append('LAT')
+ if lang in self._INT_FALLBACK_LIST:
+ regions.append('INT')
+ locale = '>'.join(['%s-%s' % (lang, reg) for reg in regions])
+
+ rrn_id = self._download_json(
+ 'https://www.redbull.com/v3/api/graphql/v1/v3/query/' + locale,
+ display_id, query={
+ 'filter[type]': filter_type,
+ 'filter[uriSlug]': display_id,
+ 'rb3Schema': 'v1:hero',
+ })['data']['id']
+
+ return self.url_result(
+ 'https://www.redbull.com/embed/' + rrn_id,
+ RedBullEmbedIE.ie_key(), rrn_id)
diff --git a/yt_dlp/extractor/reddit.py b/yt_dlp/extractor/reddit.py
new file mode 100644
index 0000000..62f669f
--- /dev/null
+++ b/yt_dlp/extractor/reddit.py
@@ -0,0 +1,353 @@
+import urllib.parse
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ traverse_obj,
+ try_get,
+ unescapeHTML,
+ urlencode_postdata,
+ url_or_none,
+)
+
+
+class RedditIE(InfoExtractor):
+ _NETRC_MACHINE = 'reddit'
+ _VALID_URL = r'https?://(?P<host>(?:\w+\.)?reddit(?:media)?\.com)/(?P<slug>(?:(?:r|user)/[^/]+/)?comments/(?P<id>[^/?#&]+))'
+ _TESTS = [{
+ 'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/',
+ 'info_dict': {
+ 'id': 'zv89llsvexdz',
+ 'ext': 'mp4',
+ 'display_id': '6rrwyj',
+ 'title': 'That small heart attack.',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ 'thumbnails': 'count:4',
+ 'timestamp': 1501941939,
+ 'upload_date': '20170805',
+ 'uploader': 'Antw87',
+ 'duration': 12,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'comment_count': int,
+ 'age_limit': 0,
+ 'channel_id': 'videos',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # 1080p fallback format
+ 'url': 'https://www.reddit.com/r/aww/comments/90bu6w/heat_index_was_110_degrees_so_we_offered_him_a/',
+ 'md5': '8b5902cfda3006bf90faea7adf765a49',
+ 'info_dict': {
+ 'id': 'gyh95hiqc0b11',
+ 'ext': 'mp4',
+ 'display_id': '90bu6w',
+ 'title': 'Heat index was 110 degrees so we offered him a cold drink. He went for a full body soak instead',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ 'thumbnails': 'count:7',
+ 'timestamp': 1532051078,
+ 'upload_date': '20180720',
+ 'uploader': 'FootLoosePickleJuice',
+ 'duration': 14,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'comment_count': int,
+ 'age_limit': 0,
+ 'channel_id': 'aww',
+ },
+ }, {
+ # User post
+ 'url': 'https://www.reddit.com/user/creepyt0es/comments/nip71r/i_plan_to_make_more_stickers_and_prints_check/',
+ 'info_dict': {
+ 'id': 'zasobba6wp071',
+ 'ext': 'mp4',
+ 'display_id': 'nip71r',
+ 'title': 'I plan to make more stickers and prints! Check them out on my Etsy! Or get them through my Patreon. Links below.',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ 'thumbnails': 'count:5',
+ 'timestamp': 1621709093,
+ 'upload_date': '20210522',
+ 'uploader': 'creepyt0es',
+ 'duration': 6,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'comment_count': int,
+ 'age_limit': 0,
+ 'channel_id': 'u_creepyt0es',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # videos embedded in reddit text post
+ 'url': 'https://www.reddit.com/r/KamenRider/comments/wzqkxp/finale_kamen_rider_revice_episode_50_family_to/',
+ 'playlist_count': 2,
+ 'info_dict': {
+ 'id': 'wzqkxp',
+ 'title': 'md5:72d3d19402aa11eff5bd32fc96369b37',
+ },
+ }, {
+ # crossposted reddit-hosted media
+ 'url': 'https://www.reddit.com/r/dumbfuckers_club/comments/zjjw82/cringe/',
+ 'md5': '746180895c7b75a9d6b05341f507699a',
+ 'info_dict': {
+ 'id': 'a1oneun6pa5a1',
+ 'ext': 'mp4',
+ 'display_id': 'zjjw82',
+ 'title': 'Cringe',
+ 'uploader': 'Otaku-senpai69420',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ 'upload_date': '20221212',
+ 'timestamp': 1670812309,
+ 'duration': 16,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'comment_count': int,
+ 'age_limit': 0,
+ 'channel_id': 'dumbfuckers_club',
+ },
+ }, {
+ # post link without subreddit
+ 'url': 'https://www.reddit.com/comments/124pp33',
+ 'md5': '15eec9d828adcef4468b741a7e45a395',
+ 'info_dict': {
+ 'id': 'antsenjc2jqa1',
+ 'ext': 'mp4',
+ 'display_id': '124pp33',
+ 'title': 'Harmless prank of some old friends',
+ 'uploader': 'Dudezila',
+ 'channel_id': 'ContagiousLaughter',
+ 'duration': 17,
+ 'upload_date': '20230328',
+ 'timestamp': 1680012043,
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ 'age_limit': 0,
+ 'comment_count': int,
+ 'dislike_count': int,
+ 'like_count': int,
+ },
+ }, {
+ # quarantined subreddit post
+ 'url': 'https://old.reddit.com/r/GenZedong/comments/12fujy3/based_hasan/',
+ 'md5': '3156ea69e3c1f1b6259683c5abd36e71',
+ 'info_dict': {
+ 'id': '8bwtclfggpsa1',
+ 'ext': 'mp4',
+ 'display_id': '12fujy3',
+ 'title': 'Based Hasan?',
+ 'uploader': 'KingNigelXLII',
+ 'channel_id': 'GenZedong',
+ 'duration': 16,
+ 'upload_date': '20230408',
+ 'timestamp': 1680979138,
+ 'age_limit': 0,
+ 'comment_count': int,
+ 'dislike_count': int,
+ 'like_count': int,
+ },
+ 'skip': 'Requires account that has opted-in to the GenZedong subreddit',
+ }, {
+ 'url': 'https://www.reddit.com/r/videos/comments/6rrwyj',
+ 'only_matching': True,
+ }, {
+ # imgur
+ 'url': 'https://www.reddit.com/r/MadeMeSmile/comments/6t7wi5/wait_for_it/',
+ 'only_matching': True,
+ }, {
+ # imgur @ old reddit
+ 'url': 'https://old.reddit.com/r/MadeMeSmile/comments/6t7wi5/wait_for_it/',
+ 'only_matching': True,
+ }, {
+ # streamable
+ 'url': 'https://www.reddit.com/r/videos/comments/6t7sg9/comedians_hilarious_joke_about_the_guam_flag/',
+ 'only_matching': True,
+ }, {
+ # youtube
+ 'url': 'https://www.reddit.com/r/videos/comments/6t75wq/southern_man_tries_to_speak_without_an_accent/',
+ 'only_matching': True,
+ }, {
+ # reddit video @ nm reddit
+ 'url': 'https://nm.reddit.com/r/Cricket/comments/8idvby/lousy_cameraman_finds_himself_in_cairns_line_of/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.redditmedia.com/r/serbia/comments/pu9wbx/ako_vu%C4%8Di%C4%87_izgubi_izbore_ja_%C4%87u_da_crknem/',
+ 'only_matching': True,
+ }]
+
+ def _perform_login(self, username, password):
+ captcha = self._download_json(
+ 'https://www.reddit.com/api/requires_captcha/login.json', None,
+ 'Checking login requirement')['required']
+ if captcha:
+ raise ExtractorError('Reddit is requiring captcha before login', expected=True)
+ login = self._download_json(
+ f'https://www.reddit.com/api/login/{username}', None, data=urlencode_postdata({
+ 'op': 'login-main',
+ 'user': username,
+ 'passwd': password,
+ 'api_type': 'json',
+ }), note='Logging in', errnote='Login request failed')
+ errors = '; '.join(traverse_obj(login, ('json', 'errors', ..., 1)))
+ if errors:
+ raise ExtractorError(f'Unable to login, Reddit API says {errors}', expected=True)
+ elif not traverse_obj(login, ('json', 'data', 'cookie', {str})):
+ raise ExtractorError('Unable to login, no cookie was returned')
+
+ def _real_extract(self, url):
+ host, slug, video_id = self._match_valid_url(url).group('host', 'slug', 'id')
+
+ data = self._download_json(
+ f'https://{host}/{slug}/.json', video_id, fatal=False, expected_status=403)
+ if not data:
+ fallback_host = 'old.reddit.com' if host != 'old.reddit.com' else 'www.reddit.com'
+ self.to_screen(f'{host} request failed, retrying with {fallback_host}')
+ data = self._download_json(
+ f'https://{fallback_host}/{slug}/.json', video_id, expected_status=403)
+
+ if traverse_obj(data, 'error') == 403:
+ reason = data.get('reason')
+ if reason == 'quarantined':
+ self.raise_login_required('Quarantined subreddit; an account that has opted in is required')
+ elif reason == 'private':
+ self.raise_login_required('Private subreddit; an account that has been approved is required')
+ else:
+ raise ExtractorError(f'HTTP Error 403 Forbidden; reason given: {reason}')
+
+ data = data[0]['data']['children'][0]['data']
+ video_url = data['url']
+
+ over_18 = data.get('over_18')
+ if over_18 is True:
+ age_limit = 18
+ elif over_18 is False:
+ age_limit = 0
+ else:
+ age_limit = None
+
+ thumbnails = []
+
+ def add_thumbnail(src):
+ if not isinstance(src, dict):
+ return
+ thumbnail_url = url_or_none(src.get('url'))
+ if not thumbnail_url:
+ return
+ thumbnails.append({
+ 'url': unescapeHTML(thumbnail_url),
+ 'width': int_or_none(src.get('width')),
+ 'height': int_or_none(src.get('height')),
+ 'http_headers': {'Accept': '*/*'},
+ })
+
+ for image in try_get(data, lambda x: x['preview']['images']) or []:
+ if not isinstance(image, dict):
+ continue
+ add_thumbnail(image.get('source'))
+ resolutions = image.get('resolutions')
+ if isinstance(resolutions, list):
+ for resolution in resolutions:
+ add_thumbnail(resolution)
+
+ info = {
+ 'title': data.get('title'),
+ 'thumbnails': thumbnails,
+ 'timestamp': float_or_none(data.get('created_utc')),
+ 'uploader': data.get('author'),
+ 'channel_id': data.get('subreddit'),
+ 'like_count': int_or_none(data.get('ups')),
+ 'dislike_count': int_or_none(data.get('downs')),
+ 'comment_count': int_or_none(data.get('num_comments')),
+ 'age_limit': age_limit,
+ }
+
+ parsed_url = urllib.parse.urlparse(video_url)
+
+ # Check for embeds in text posts, or else raise to avoid recursing into the same reddit URL
+ if 'reddit.com' in parsed_url.netloc and f'/{video_id}/' in parsed_url.path:
+ entries = []
+ for media in traverse_obj(data, ('media_metadata', ...), expected_type=dict):
+ if not media.get('id') or media.get('e') != 'RedditVideo':
+ continue
+ formats = []
+ if media.get('hlsUrl'):
+ formats.extend(self._extract_m3u8_formats(
+ unescapeHTML(media['hlsUrl']), video_id, 'mp4', m3u8_id='hls', fatal=False))
+ if media.get('dashUrl'):
+ formats.extend(self._extract_mpd_formats(
+ unescapeHTML(media['dashUrl']), video_id, mpd_id='dash', fatal=False))
+ if formats:
+ entries.append({
+ 'id': media['id'],
+ 'display_id': video_id,
+ 'formats': formats,
+ **info,
+ })
+ if entries:
+ return self.playlist_result(entries, video_id, info.get('title'))
+ raise ExtractorError('No media found', expected=True)
+
+ # Check if media is hosted on reddit:
+ reddit_video = traverse_obj(data, (
+ (None, ('crosspost_parent_list', ...)), ('secure_media', 'media'), 'reddit_video'), get_all=False)
+ if reddit_video:
+ playlist_urls = [
+ try_get(reddit_video, lambda x: unescapeHTML(x[y]))
+ for y in ('dash_url', 'hls_url')
+ ]
+
+ # Update video_id
+ display_id = video_id
+ video_id = self._search_regex(
+ r'https?://v\.redd\.it/(?P<id>[^/?#&]+)', reddit_video['fallback_url'],
+ 'video_id', default=display_id)
+
+ dash_playlist_url = playlist_urls[0] or f'https://v.redd.it/{video_id}/DASHPlaylist.mpd'
+ hls_playlist_url = playlist_urls[1] or f'https://v.redd.it/{video_id}/HLSPlaylist.m3u8'
+
+ formats = [{
+ 'url': unescapeHTML(reddit_video['fallback_url']),
+ 'height': int_or_none(reddit_video.get('height')),
+ 'width': int_or_none(reddit_video.get('width')),
+ 'tbr': int_or_none(reddit_video.get('bitrate_kbps')),
+ 'acodec': 'none',
+ 'vcodec': 'h264',
+ 'ext': 'mp4',
+ 'format_id': 'fallback',
+ 'format_note': 'DASH video, mp4_dash',
+ }]
+ hls_fmts, subtitles = self._extract_m3u8_formats_and_subtitles(
+ hls_playlist_url, display_id, 'mp4', m3u8_id='hls', fatal=False)
+ formats.extend(hls_fmts)
+ dash_fmts, dash_subs = self._extract_mpd_formats_and_subtitles(
+ dash_playlist_url, display_id, mpd_id='dash', fatal=False)
+ formats.extend(dash_fmts)
+ self._merge_subtitles(dash_subs, target=subtitles)
+
+ return {
+ **info,
+ 'id': video_id,
+ 'display_id': display_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'duration': int_or_none(reddit_video.get('duration')),
+ }
+
+ if parsed_url.netloc == 'v.redd.it':
+ self.raise_no_formats('This video is processing', expected=True, video_id=video_id)
+ return {
+ **info,
+ 'id': parsed_url.path.split('/')[1],
+ 'display_id': video_id,
+ }
+
+ # Not hosted on reddit, must continue extraction
+ return {
+ **info,
+ 'display_id': video_id,
+ '_type': 'url_transparent',
+ 'url': video_url,
+ }
diff --git a/yt_dlp/extractor/redge.py b/yt_dlp/extractor/redge.py
new file mode 100644
index 0000000..875d6f8
--- /dev/null
+++ b/yt_dlp/extractor/redge.py
@@ -0,0 +1,135 @@
+import functools
+
+from .common import InfoExtractor
+from ..networking import HEADRequest
+from ..utils import (
+ float_or_none,
+ int_or_none,
+ join_nonempty,
+ parse_qs,
+ update_url_query,
+)
+from ..utils.traversal import traverse_obj
+
+
+class RedCDNLivxIE(InfoExtractor):
+ _VALID_URL = r'https?://[^.]+\.(?:dcs\.redcdn|atmcdn)\.pl/(?:live(?:dash|hls|ss)|nvr)/o2/(?P<tenant>[^/?#]+)/(?P<id>[^?#]+)\.livx'
+ IE_NAME = 'redcdnlivx'
+
+ _TESTS = [{
+ 'url': 'https://r.dcs.redcdn.pl/livedash/o2/senat/ENC02/channel.livx?indexMode=true&startTime=638272860000&stopTime=638292544000',
+ 'info_dict': {
+ 'id': 'ENC02-638272860000-638292544000',
+ 'ext': 'mp4',
+ 'title': 'ENC02',
+ 'duration': 19683.982,
+ 'live_status': 'was_live',
+ },
+ }, {
+ 'url': 'https://r.dcs.redcdn.pl/livedash/o2/sejm/ENC18/live.livx?indexMode=true&startTime=722333096000&stopTime=722335562000',
+ 'info_dict': {
+ 'id': 'ENC18-722333096000-722335562000',
+ 'ext': 'mp4',
+ 'title': 'ENC18',
+ 'duration': 2463.995,
+ 'live_status': 'was_live',
+ },
+ }, {
+ 'url': 'https://r.dcs.redcdn.pl/livehls/o2/sportevolution/live/triathlon2018/warsaw.livx/playlist.m3u8?startTime=550305000000&stopTime=550327620000',
+ 'info_dict': {
+ 'id': 'triathlon2018-warsaw-550305000000-550327620000',
+ 'ext': 'mp4',
+ 'title': 'triathlon2018/warsaw',
+ 'duration': 22619.98,
+ 'live_status': 'was_live',
+ },
+ }, {
+ 'url': 'https://n-25-12.dcs.redcdn.pl/nvr/o2/sejm/Migacz-ENC01/1.livx?startTime=722347200000&stopTime=722367345000',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://redir.atmcdn.pl/nvr/o2/sejm/ENC08/1.livx?startTime=503831270000&stopTime=503840040000',
+ 'only_matching': True,
+ }]
+
+ """
+ Known methods (first in url path):
+ - `livedash` - DASH MPD
+ - `livehls` - HTTP Live Streaming
+ - `livess` - IIS Smooth Streaming
+ - `nvr` - CCTV mode, directly returns a file, typically flv, avc1, aac
+ - `sc` - shoutcast/icecast (audio streams, like radio)
+ """
+
+ def _real_extract(self, url):
+ tenant, path = self._match_valid_url(url).group('tenant', 'id')
+ qs = parse_qs(url)
+ start_time = traverse_obj(qs, ('startTime', 0, {int_or_none}))
+ stop_time = traverse_obj(qs, ('stopTime', 0, {int_or_none}))
+
+ def livx_mode(mode):
+ suffix = ''
+ if mode == 'livess':
+ suffix = '/manifest'
+ elif mode == 'livehls':
+ suffix = '/playlist.m3u8'
+ file_qs = {}
+ if start_time:
+ file_qs['startTime'] = start_time
+ if stop_time:
+ file_qs['stopTime'] = stop_time
+ if mode == 'nvr':
+ file_qs['nolimit'] = 1
+ elif mode != 'sc':
+ file_qs['indexMode'] = 'true'
+ return update_url_query(f'https://r.dcs.redcdn.pl/{mode}/o2/{tenant}/{path}.livx{suffix}', file_qs)
+
+ # no id or title for a transmission. making ones up.
+ title = path \
+ .replace('/live', '').replace('live/', '') \
+ .replace('/channel', '').replace('channel/', '') \
+ .strip('/')
+ video_id = join_nonempty(title.replace('/', '-'), start_time, stop_time)
+
+ formats = []
+ # downloading the manifest separately here instead of _extract_ism_formats to also get some stream metadata
+ ism_res = self._download_xml_handle(
+ livx_mode('livess'), video_id,
+ note='Downloading ISM manifest',
+ errnote='Failed to download ISM manifest',
+ fatal=False)
+ ism_doc = None
+ if ism_res is not False:
+ ism_doc, ism_urlh = ism_res
+ formats, _ = self._parse_ism_formats_and_subtitles(ism_doc, ism_urlh.url, 'ss')
+
+ nvr_urlh = self._request_webpage(
+ HEADRequest(livx_mode('nvr')), video_id, 'Follow flv file redirect', fatal=False,
+ expected_status=lambda _: True)
+ if nvr_urlh and nvr_urlh.status == 200:
+ formats.append({
+ 'url': nvr_urlh.url,
+ 'ext': 'flv',
+ 'format_id': 'direct-0',
+ 'preference': -1, # might be slow
+ })
+ formats.extend(self._extract_mpd_formats(livx_mode('livedash'), video_id, mpd_id='dash', fatal=False))
+ formats.extend(self._extract_m3u8_formats(
+ livx_mode('livehls'), video_id, m3u8_id='hls', ext='mp4', fatal=False))
+
+ time_scale = traverse_obj(ism_doc, ('@TimeScale', {int_or_none})) or 10000000
+ duration = traverse_obj(
+ ism_doc, ('@Duration', {functools.partial(float_or_none, scale=time_scale)})) or None
+
+ live_status = None
+ if traverse_obj(ism_doc, '@IsLive') == 'TRUE':
+ live_status = 'is_live'
+ elif duration:
+ live_status = 'was_live'
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'duration': duration,
+ 'live_status': live_status,
+ }
diff --git a/yt_dlp/extractor/redgifs.py b/yt_dlp/extractor/redgifs.py
new file mode 100644
index 0000000..f945320
--- /dev/null
+++ b/yt_dlp/extractor/redgifs.py
@@ -0,0 +1,260 @@
+import functools
+
+from .common import InfoExtractor
+from ..compat import compat_parse_qs
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ qualities,
+ try_get,
+ OnDemandPagedList,
+)
+
+
+class RedGifsBaseInfoExtractor(InfoExtractor):
+ _FORMATS = {
+ 'gif': 250,
+ 'sd': 480,
+ 'hd': None,
+ }
+
+ _API_HEADERS = {
+ 'referer': 'https://www.redgifs.com/',
+ 'origin': 'https://www.redgifs.com',
+ 'content-type': 'application/json',
+ }
+
+ def _parse_gif_data(self, gif_data):
+ video_id = gif_data.get('id')
+ quality = qualities(tuple(self._FORMATS.keys()))
+
+ orig_height = int_or_none(gif_data.get('height'))
+ aspect_ratio = try_get(gif_data, lambda x: orig_height / x['width'])
+
+ formats = []
+ for format_id, height in self._FORMATS.items():
+ video_url = gif_data['urls'].get(format_id)
+ if not video_url:
+ continue
+ height = min(orig_height, height or orig_height)
+ formats.append({
+ 'url': video_url,
+ 'format_id': format_id,
+ 'width': height * aspect_ratio if aspect_ratio else None,
+ 'height': height,
+ 'quality': quality(format_id),
+ })
+
+ return {
+ 'id': video_id,
+ 'webpage_url': f'https://redgifs.com/watch/{video_id}',
+ 'extractor_key': RedGifsIE.ie_key(),
+ 'extractor': 'RedGifs',
+ 'title': ' '.join(gif_data.get('tags') or []) or 'RedGifs',
+ 'timestamp': int_or_none(gif_data.get('createDate')),
+ 'uploader': gif_data.get('userName'),
+ 'duration': int_or_none(gif_data.get('duration')),
+ 'view_count': int_or_none(gif_data.get('views')),
+ 'like_count': int_or_none(gif_data.get('likes')),
+ 'categories': gif_data.get('tags') or [],
+ 'tags': gif_data.get('tags'),
+ 'age_limit': 18,
+ 'formats': formats,
+ }
+
+ def _fetch_oauth_token(self, video_id):
+ # https://github.com/Redgifs/api/wiki/Temporary-tokens
+ auth = self._download_json('https://api.redgifs.com/v2/auth/temporary',
+ video_id, note='Fetching temporary token')
+ if not auth.get('token'):
+ raise ExtractorError('Unable to get temporary token')
+ self._API_HEADERS['authorization'] = f'Bearer {auth["token"]}'
+
+ def _call_api(self, ep, video_id, *args, **kwargs):
+ for first_attempt in True, False:
+ if 'authorization' not in self._API_HEADERS:
+ self._fetch_oauth_token(video_id)
+ try:
+ headers = dict(self._API_HEADERS)
+ headers['x-customheader'] = f'https://www.redgifs.com/watch/{video_id}'
+ data = self._download_json(
+ f'https://api.redgifs.com/v2/{ep}', video_id, headers=headers, *args, **kwargs)
+ break
+ except ExtractorError as e:
+ if first_attempt and isinstance(e.cause, HTTPError) and e.cause.status == 401:
+ del self._API_HEADERS['authorization'] # refresh the token
+ continue
+ raise
+
+ if 'error' in data:
+ raise ExtractorError(f'RedGifs said: {data["error"]}', expected=True, video_id=video_id)
+ return data
+
+ def _fetch_page(self, ep, video_id, query, page):
+ query['page'] = page + 1
+ data = self._call_api(
+ ep, video_id, query=query, note=f'Downloading JSON metadata page {page + 1}')
+
+ for entry in data['gifs']:
+ yield self._parse_gif_data(entry)
+
+ def _prepare_api_query(self, query, fields):
+ api_query = [
+ (field_name, query.get(field_name, (default,))[0])
+ for field_name, default in fields.items()]
+
+ return {key: val for key, val in api_query if val is not None}
+
+ def _paged_entries(self, ep, item_id, query, fields):
+ page = int_or_none(query.get('page', (None,))[0])
+ page_fetcher = functools.partial(
+ self._fetch_page, ep, item_id, self._prepare_api_query(query, fields))
+ return page_fetcher(page) if page else OnDemandPagedList(page_fetcher, self._PAGE_SIZE)
+
+
+class RedGifsIE(RedGifsBaseInfoExtractor):
+ _VALID_URL = r'https?://(?:(?:www\.)?redgifs\.com/watch/|thumbs2\.redgifs\.com/)(?P<id>[^-/?#\.]+)'
+ _TESTS = [{
+ 'url': 'https://www.redgifs.com/watch/squeakyhelplesswisent',
+ 'info_dict': {
+ 'id': 'squeakyhelplesswisent',
+ 'ext': 'mp4',
+ 'title': 'Hotwife Legs Thick',
+ 'timestamp': 1636287915,
+ 'upload_date': '20211107',
+ 'uploader': 'ignored52',
+ 'duration': 16,
+ 'view_count': int,
+ 'like_count': int,
+ 'categories': list,
+ 'age_limit': 18,
+ 'tags': list,
+ }
+ }, {
+ 'url': 'https://thumbs2.redgifs.com/SqueakyHelplessWisent-mobile.mp4#t=0',
+ 'info_dict': {
+ 'id': 'squeakyhelplesswisent',
+ 'ext': 'mp4',
+ 'title': 'Hotwife Legs Thick',
+ 'timestamp': 1636287915,
+ 'upload_date': '20211107',
+ 'uploader': 'ignored52',
+ 'duration': 16,
+ 'view_count': int,
+ 'like_count': int,
+ 'categories': list,
+ 'age_limit': 18,
+ 'tags': list,
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url).lower()
+ video_info = self._call_api(
+ f'gifs/{video_id}?views=yes', video_id, note='Downloading video info')
+ return self._parse_gif_data(video_info['gif'])
+
+
+class RedGifsSearchIE(RedGifsBaseInfoExtractor):
+ IE_DESC = 'Redgifs search'
+ _VALID_URL = r'https?://(?:www\.)?redgifs\.com/browse\?(?P<query>[^#]+)'
+ _PAGE_SIZE = 80
+ _TESTS = [
+ {
+ 'url': 'https://www.redgifs.com/browse?tags=Lesbian',
+ 'info_dict': {
+ 'id': 'tags=Lesbian',
+ 'title': 'Lesbian',
+ 'description': 'RedGifs search for Lesbian, ordered by trending'
+ },
+ 'playlist_mincount': 100,
+ },
+ {
+ 'url': 'https://www.redgifs.com/browse?type=g&order=latest&tags=Lesbian',
+ 'info_dict': {
+ 'id': 'type=g&order=latest&tags=Lesbian',
+ 'title': 'Lesbian',
+ 'description': 'RedGifs search for Lesbian, ordered by latest'
+ },
+ 'playlist_mincount': 100,
+ },
+ {
+ 'url': 'https://www.redgifs.com/browse?type=g&order=latest&tags=Lesbian&page=2',
+ 'info_dict': {
+ 'id': 'type=g&order=latest&tags=Lesbian&page=2',
+ 'title': 'Lesbian',
+ 'description': 'RedGifs search for Lesbian, ordered by latest'
+ },
+ 'playlist_count': 80,
+ }
+ ]
+
+ def _real_extract(self, url):
+ query_str = self._match_valid_url(url).group('query')
+ query = compat_parse_qs(query_str)
+ if not query.get('tags'):
+ raise ExtractorError('Invalid query tags', expected=True)
+
+ tags = query.get('tags')[0]
+ order = query.get('order', ('trending',))[0]
+
+ query['search_text'] = [tags]
+ entries = self._paged_entries('gifs/search', query_str, query, {
+ 'search_text': None,
+ 'order': 'trending',
+ 'type': None,
+ })
+
+ return self.playlist_result(
+ entries, query_str, tags, f'RedGifs search for {tags}, ordered by {order}')
+
+
+class RedGifsUserIE(RedGifsBaseInfoExtractor):
+ IE_DESC = 'Redgifs user'
+ _VALID_URL = r'https?://(?:www\.)?redgifs\.com/users/(?P<username>[^/?#]+)(?:\?(?P<query>[^#]+))?'
+ _PAGE_SIZE = 30
+ _TESTS = [
+ {
+ 'url': 'https://www.redgifs.com/users/lamsinka89',
+ 'info_dict': {
+ 'id': 'lamsinka89',
+ 'title': 'lamsinka89',
+ 'description': 'RedGifs user lamsinka89, ordered by recent'
+ },
+ 'playlist_mincount': 100,
+ },
+ {
+ 'url': 'https://www.redgifs.com/users/lamsinka89?page=3',
+ 'info_dict': {
+ 'id': 'lamsinka89?page=3',
+ 'title': 'lamsinka89',
+ 'description': 'RedGifs user lamsinka89, ordered by recent'
+ },
+ 'playlist_count': 30,
+ },
+ {
+ 'url': 'https://www.redgifs.com/users/lamsinka89?order=best&type=g',
+ 'info_dict': {
+ 'id': 'lamsinka89?order=best&type=g',
+ 'title': 'lamsinka89',
+ 'description': 'RedGifs user lamsinka89, ordered by best'
+ },
+ 'playlist_mincount': 100,
+ }
+ ]
+
+ def _real_extract(self, url):
+ username, query_str = self._match_valid_url(url).group('username', 'query')
+ playlist_id = f'{username}?{query_str}' if query_str else username
+
+ query = compat_parse_qs(query_str)
+ order = query.get('order', ('recent',))[0]
+
+ entries = self._paged_entries(f'users/{username}/search', playlist_id, query, {
+ 'order': 'recent',
+ 'type': None,
+ })
+
+ return self.playlist_result(
+ entries, playlist_id, username, f'RedGifs user {username}, ordered by {order}')
diff --git a/yt_dlp/extractor/redtube.py b/yt_dlp/extractor/redtube.py
new file mode 100644
index 0000000..965abbe
--- /dev/null
+++ b/yt_dlp/extractor/redtube.py
@@ -0,0 +1,144 @@
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ merge_dicts,
+ str_to_int,
+ unified_strdate,
+ url_or_none,
+ urljoin,
+)
+
+
+class RedTubeIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:\w+\.)?redtube\.com(?:\.br)?/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)'
+ _EMBED_REGEX = [r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//embed\.redtube\.com/\?.*?\bid=\d+)']
+ _TESTS = [{
+ 'url': 'https://www.redtube.com/38864951',
+ 'md5': '4fba70cbca3aefd25767ab4b523c9878',
+ 'info_dict': {
+ 'id': '38864951',
+ 'ext': 'mp4',
+ 'title': 'Public Sex on the Balcony in Freezing Paris! Amateur Couple LeoLulu',
+ 'description': 'Watch video Public Sex on the Balcony in Freezing Paris! Amateur Couple LeoLulu on Redtube, home of free Blowjob porn videos and Blonde sex movies online. Video length: (10:46) - Uploaded by leolulu - Verified User - Starring Pornstar: Leolulu',
+ 'upload_date': '20210111',
+ 'timestamp': 1610343109,
+ 'duration': 646,
+ 'view_count': int,
+ 'age_limit': 18,
+ 'thumbnail': r're:https://\wi-ph\.rdtcdn\.com/videos/.+/.+\.jpg',
+ },
+ }, {
+ 'url': 'http://embed.redtube.com/?bgcolor=000000&id=1443286',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://it.redtube.com/66418',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.redtube.com.br/103224331',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(
+ f'https://www.redtube.com/{video_id}', video_id)
+
+ ERRORS = (
+ (('video-deleted-info', '>This video has been removed'), 'has been removed'),
+ (('private_video_text', '>This video is private', '>Send a friend request to its owner to be able to view it'), 'is private'),
+ )
+
+ for patterns, message in ERRORS:
+ if any(p in webpage for p in patterns):
+ raise ExtractorError(
+ 'Video %s %s' % (video_id, message), expected=True)
+
+ info = self._search_json_ld(webpage, video_id, default={})
+
+ if not info.get('title'):
+ info['title'] = self._html_search_regex(
+ (r'<h(\d)[^>]+class="(?:video_title_text|videoTitle|video_title)[^"]*">(?P<title>(?:(?!\1).)+)</h\1>',
+ r'(?:videoTitle|title)\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1',),
+ webpage, 'title', group='title',
+ default=None) or self._og_search_title(webpage)
+
+ formats = []
+ sources = self._parse_json(
+ self._search_regex(
+ r'sources\s*:\s*({.+?})', webpage, 'source', default='{}'),
+ video_id, fatal=False)
+ if sources and isinstance(sources, dict):
+ for format_id, format_url in sources.items():
+ if format_url:
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_id,
+ 'height': int_or_none(format_id),
+ })
+ medias = self._parse_json(
+ self._search_regex(
+ r'mediaDefinition["\']?\s*:\s*(\[.+?}\s*\])', webpage,
+ 'media definitions', default='{}'),
+ video_id, fatal=False)
+ for media in medias if isinstance(medias, list) else []:
+ format_url = urljoin('https://www.redtube.com', media.get('videoUrl'))
+ if not format_url:
+ continue
+ format_id = media.get('format')
+ quality = media.get('quality')
+ if format_id == 'hls' or (format_id == 'mp4' and not quality):
+ more_media = self._download_json(format_url, video_id, fatal=False)
+ else:
+ more_media = [media]
+ for media in more_media if isinstance(more_media, list) else []:
+ format_url = url_or_none(media.get('videoUrl'))
+ if not format_url:
+ continue
+ format_id = media.get('format')
+ if format_id == 'hls' or determine_ext(format_url) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id=format_id or 'hls',
+ fatal=False))
+ continue
+ format_id = media.get('quality')
+ formats.append({
+ 'url': format_url,
+ 'ext': 'mp4',
+ 'format_id': format_id,
+ 'height': int_or_none(format_id),
+ })
+ if not formats:
+ video_url = self._html_search_regex(
+ r'<source src="(.+?)" type="video/mp4">', webpage, 'video URL')
+ formats.append({'url': video_url, 'ext': 'mp4'})
+
+ thumbnail = self._og_search_thumbnail(webpage)
+ upload_date = unified_strdate(self._search_regex(
+ r'<span[^>]+>(?:ADDED|Published on) ([^<]+)<',
+ webpage, 'upload date', default=None))
+ duration = int_or_none(self._og_search_property(
+ 'video:duration', webpage, default=None) or self._search_regex(
+ r'videoDuration\s*:\s*(\d+)', webpage, 'duration', default=None))
+ view_count = str_to_int(self._search_regex(
+ (r'<div[^>]*>Views</div>\s*<div[^>]*>\s*([\d,.]+)',
+ r'<span[^>]*>VIEWS</span>\s*</td>\s*<td>\s*([\d,.]+)',
+ r'<span[^>]+\bclass=["\']video_view_count[^>]*>\s*([\d,.]+)'),
+ webpage, 'view count', default=None))
+
+ # No self-labeling, but they describe themselves as
+ # "Home of Videos Porno"
+ age_limit = 18
+
+ return merge_dicts(info, {
+ 'id': video_id,
+ 'ext': 'mp4',
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'age_limit': age_limit,
+ 'formats': formats,
+ })
diff --git a/yt_dlp/extractor/rentv.py b/yt_dlp/extractor/rentv.py
new file mode 100644
index 0000000..abb537c
--- /dev/null
+++ b/yt_dlp/extractor/rentv.py
@@ -0,0 +1,104 @@
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ url_or_none,
+)
+
+
+class RENTVIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'(?:rentv:|https?://(?:www\.)?ren\.tv/(?:player|video/epizod)/)(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://ren.tv/video/epizod/118577',
+ 'md5': 'd91851bf9af73c0ad9b2cdf76c127fbb',
+ 'info_dict': {
+ 'id': '118577',
+ 'ext': 'mp4',
+ 'title': 'Документальный спецпроект: "Промывка мозгов. Технологии XXI века"',
+ 'timestamp': 1472230800,
+ 'upload_date': '20160826',
+ }
+ }, {
+ 'url': 'http://ren.tv/player/118577',
+ 'only_matching': True,
+ }, {
+ 'url': 'rentv:118577',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage('http://ren.tv/player/' + video_id, video_id)
+ config = self._parse_json(self._search_regex(
+ r'config\s*=\s*({.+})\s*;', webpage, 'config'), video_id)
+ title = config['title']
+ formats = []
+ for video in config['src']:
+ src = url_or_none(video.get('src'))
+ if not src:
+ continue
+ ext = determine_ext(src)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ src, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ formats.append({
+ 'url': src,
+ })
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': config.get('description'),
+ 'thumbnail': config.get('image'),
+ 'duration': int_or_none(config.get('duration')),
+ 'timestamp': int_or_none(config.get('date')),
+ 'formats': formats,
+ }
+
+
+class RENTVArticleIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:www\.)?ren\.tv/novosti/\d{4}-\d{2}-\d{2}/(?P<id>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'http://ren.tv/novosti/2016-10-26/video-mikroavtobus-popavshiy-v-dtp-s-gruzovikami-v-podmoskove-prevratilsya-v',
+ 'md5': 'ebd63c4680b167693745ab91343df1d6',
+ 'info_dict': {
+ 'id': '136472',
+ 'ext': 'mp4',
+ 'title': 'Видео: микроавтобус, попавший в ДТП с грузовиками в Подмосковье, превратился в груду металла',
+ 'description': 'Жертвами столкновения двух фур и микроавтобуса, по последним данным, стали семь человек.',
+ }
+ }, {
+ # TODO: invalid m3u8
+ 'url': 'http://ren.tv/novosti/2015-09-25/sluchaynyy-prohozhiy-poymal-avtougonshchika-v-murmanske-video',
+ 'info_dict': {
+ 'id': 'playlist',
+ 'ext': 'mp4',
+ 'title': 'Случайный прохожий поймал автоугонщика в Мурманске. ВИДЕО | РЕН ТВ',
+ 'uploader': 'ren.tv',
+ },
+ 'params': {
+ # m3u8 downloads
+ 'skip_download': True,
+ },
+ 'skip': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ drupal_settings = self._parse_json(self._search_regex(
+ r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
+ webpage, 'drupal settings'), display_id)
+
+ entries = []
+ for config_profile in drupal_settings.get('ren_jwplayer', {}).values():
+ media_id = config_profile.get('mediaid')
+ if not media_id:
+ continue
+ media_id = compat_str(media_id)
+ entries.append(self.url_result('rentv:' + media_id, 'RENTV', media_id))
+ return self.playlist_result(entries, display_id)
diff --git a/yt_dlp/extractor/restudy.py b/yt_dlp/extractor/restudy.py
new file mode 100644
index 0000000..f49262a
--- /dev/null
+++ b/yt_dlp/extractor/restudy.py
@@ -0,0 +1,41 @@
+from .common import InfoExtractor
+
+
+class RestudyIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:(?:www|portal)\.)?restudy\.dk/video/[^/]+/id/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://www.restudy.dk/video/play/id/1637',
+ 'info_dict': {
+ 'id': '1637',
+ 'ext': 'flv',
+ 'title': 'Leiden-frosteffekt',
+ 'description': 'Denne video er et eksperiment med flydende kvælstof.',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'https://portal.restudy.dk/video/leiden-frosteffekt/id/1637',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._og_search_title(webpage).strip()
+ description = self._og_search_description(webpage).strip()
+
+ formats = self._extract_smil_formats(
+ 'https://cdn.portal.restudy.dk/dynamic/themes/front/awsmedia/SmilDirectory/video_%s.xml' % video_id,
+ video_id)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/reuters.py b/yt_dlp/extractor/reuters.py
new file mode 100644
index 0000000..0a8f13b
--- /dev/null
+++ b/yt_dlp/extractor/reuters.py
@@ -0,0 +1,66 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ js_to_json,
+ int_or_none,
+ unescapeHTML,
+)
+
+
+class ReutersIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:www\.)?reuters\.com/.*?\?.*?videoId=(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://www.reuters.com/video/2016/05/20/san-francisco-police-chief-resigns?videoId=368575562',
+ 'md5': '8015113643a0b12838f160b0b81cc2ee',
+ 'info_dict': {
+ 'id': '368575562',
+ 'ext': 'mp4',
+ 'title': 'San Francisco police chief resigns',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(
+ 'http://www.reuters.com/assets/iframe/yovideo?videoId=%s' % video_id, video_id)
+ video_data = js_to_json(self._search_regex(
+ r'(?s)Reuters\.yovideo\.drawPlayer\(({.*?})\);',
+ webpage, 'video data'))
+
+ def get_json_value(key, fatal=False):
+ return self._search_regex(r'"%s"\s*:\s*"([^"]+)"' % key, video_data, key, fatal=fatal)
+
+ title = unescapeHTML(get_json_value('title', fatal=True))
+ mmid, fid = re.search(r',/(\d+)\?f=(\d+)', get_json_value('flv', fatal=True)).groups()
+
+ mas_data = self._download_json(
+ 'http://mas-e.cds1.yospace.com/mas/%s/%s?trans=json' % (mmid, fid),
+ video_id, transform_source=js_to_json)
+ formats = []
+ for f in mas_data:
+ f_url = f.get('url')
+ if not f_url:
+ continue
+ method = f.get('method')
+ if method == 'hls':
+ formats.extend(self._extract_m3u8_formats(
+ f_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
+ else:
+ container = f.get('container')
+ ext = '3gp' if method == 'mobile' else container
+ formats.append({
+ 'format_id': ext,
+ 'url': f_url,
+ 'ext': ext,
+ 'container': container if method != 'mobile' else None,
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': get_json_value('thumb'),
+ 'duration': int_or_none(get_json_value('seconds')),
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/reverbnation.py b/yt_dlp/extractor/reverbnation.py
new file mode 100644
index 0000000..06b6c3c
--- /dev/null
+++ b/yt_dlp/extractor/reverbnation.py
@@ -0,0 +1,51 @@
+from .common import InfoExtractor
+from ..utils import (
+ qualities,
+ str_or_none,
+)
+
+
+class ReverbNationIE(InfoExtractor):
+ _VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$'
+ _TESTS = [{
+ 'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa',
+ 'md5': 'c0aaf339bcee189495fdf5a8c8ba8645',
+ 'info_dict': {
+ 'id': '16965047',
+ 'ext': 'mp3',
+ 'title': 'MONA LISA',
+ 'uploader': 'ALKILADOS',
+ 'uploader_id': '216429',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ },
+ }]
+
+ def _real_extract(self, url):
+ song_id = self._match_id(url)
+
+ api_res = self._download_json(
+ 'https://api.reverbnation.com/song/%s' % song_id,
+ song_id,
+ note='Downloading information of song %s' % song_id
+ )
+
+ THUMBNAILS = ('thumbnail', 'image')
+ quality = qualities(THUMBNAILS)
+ thumbnails = []
+ for thumb_key in THUMBNAILS:
+ if api_res.get(thumb_key):
+ thumbnails.append({
+ 'url': api_res[thumb_key],
+ 'preference': quality(thumb_key)
+ })
+
+ return {
+ 'id': song_id,
+ 'title': api_res['name'],
+ 'url': api_res['url'],
+ 'uploader': api_res.get('artist', {}).get('name'),
+ 'uploader_id': str_or_none(api_res.get('artist', {}).get('id')),
+ 'thumbnails': thumbnails,
+ 'ext': 'mp3',
+ 'vcodec': 'none',
+ }
diff --git a/yt_dlp/extractor/rheinmaintv.py b/yt_dlp/extractor/rheinmaintv.py
new file mode 100644
index 0000000..c3b352d
--- /dev/null
+++ b/yt_dlp/extractor/rheinmaintv.py
@@ -0,0 +1,94 @@
+from .common import InfoExtractor
+from ..utils import extract_attributes, merge_dicts, remove_end
+
+
+class RheinMainTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?rheinmaintv\.de/sendungen/(?:[\w-]+/)*(?P<video_id>(?P<display_id>[\w-]+)/vom-\d{2}\.\d{2}\.\d{4}(?:/\d+)?)'
+ _TESTS = [{
+ 'url': 'https://www.rheinmaintv.de/sendungen/beitrag-video/auf-dem-weg-zur-deutschen-meisterschaft/vom-07.11.2022/',
+ 'info_dict': {
+ 'id': 'auf-dem-weg-zur-deutschen-meisterschaft-vom-07.11.2022',
+ 'ext': 'ismv', # ismv+isma will be merged into mp4
+ 'alt_title': 'Auf dem Weg zur Deutschen Meisterschaft',
+ 'title': 'Auf dem Weg zur Deutschen Meisterschaft',
+ 'upload_date': '20221108',
+ 'view_count': int,
+ 'display_id': 'auf-dem-weg-zur-deutschen-meisterschaft',
+ 'thumbnail': r're:^https://.+\.jpg',
+ 'description': 'md5:48c59b74192bc819a9b34af1d5ed1eb9',
+ 'timestamp': 1667933057,
+ 'duration': 243.0,
+ },
+ 'params': {'skip_download': 'ism'},
+ }, {
+ 'url': 'https://www.rheinmaintv.de/sendungen/beitrag-video/formationsgemeinschaft-rhein-main-bei-den-deutschen-meisterschaften/vom-14.11.2022/',
+ 'info_dict': {
+ 'id': 'formationsgemeinschaft-rhein-main-bei-den-deutschen-meisterschaften-vom-14.11.2022',
+ 'ext': 'ismv',
+ 'title': 'Formationsgemeinschaft Rhein-Main bei den Deutschen Meisterschaften',
+ 'timestamp': 1668526214,
+ 'display_id': 'formationsgemeinschaft-rhein-main-bei-den-deutschen-meisterschaften',
+ 'alt_title': 'Formationsgemeinschaft Rhein-Main bei den Deutschen Meisterschaften',
+ 'view_count': int,
+ 'thumbnail': r're:^https://.+\.jpg',
+ 'duration': 345.0,
+ 'description': 'md5:9370ba29526984006c2cba1372e5c5a0',
+ 'upload_date': '20221115',
+ },
+ 'params': {'skip_download': 'ism'},
+ }, {
+ 'url': 'https://www.rheinmaintv.de/sendungen/beitrag-video/casino-mainz-bei-den-deutschen-meisterschaften/vom-14.11.2022/',
+ 'info_dict': {
+ 'id': 'casino-mainz-bei-den-deutschen-meisterschaften-vom-14.11.2022',
+ 'ext': 'ismv',
+ 'title': 'Casino Mainz bei den Deutschen Meisterschaften',
+ 'view_count': int,
+ 'timestamp': 1668527402,
+ 'alt_title': 'Casino Mainz bei den Deutschen Meisterschaften',
+ 'upload_date': '20221115',
+ 'display_id': 'casino-mainz-bei-den-deutschen-meisterschaften',
+ 'duration': 348.0,
+ 'thumbnail': r're:^https://.+\.jpg',
+ 'description': 'md5:70fc1660eeba96da17199e5bdff4c0aa',
+ },
+ 'params': {'skip_download': 'ism'},
+ }, {
+ 'url': 'https://www.rheinmaintv.de/sendungen/beitrag-video/bricks4kids/vom-22.06.2022/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ display_id = mobj.group('display_id')
+ video_id = mobj.group('video_id').replace('/', '-')
+ webpage = self._download_webpage(url, video_id)
+
+ source, img = self._search_regex(r'(?s)(?P<source><source[^>]*>)(?P<img><img[^>]*>)',
+ webpage, 'video', group=('source', 'img'))
+ source = extract_attributes(source)
+ img = extract_attributes(img)
+
+ raw_json_ld = list(self._yield_json_ld(webpage, video_id))
+ json_ld = self._json_ld(raw_json_ld, video_id)
+ json_ld.pop('url', None)
+
+ ism_manifest_url = (
+ source.get('src')
+ or next(json_ld.get('embedUrl') for json_ld in raw_json_ld if json_ld.get('@type') == 'VideoObject')
+ )
+ formats, subtitles = self._extract_ism_formats_and_subtitles(ism_manifest_url, video_id)
+
+ return merge_dicts({
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title':
+ self._html_search_regex(r'<h1><span class="title">([^<]*)</span>',
+ webpage, 'headline', default=None)
+ or img.get('title') or json_ld.get('title') or self._og_search_title(webpage)
+ or remove_end(self._html_extract_title(webpage), ' -'),
+ 'alt_title': img.get('alt'),
+ 'description': json_ld.get('description') or self._og_search_description(webpage),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnails': [{'url': img['src']}] if 'src' in img else json_ld.get('thumbnails'),
+ }, json_ld)
diff --git a/yt_dlp/extractor/ridehome.py b/yt_dlp/extractor/ridehome.py
new file mode 100644
index 0000000..78f838a
--- /dev/null
+++ b/yt_dlp/extractor/ridehome.py
@@ -0,0 +1,96 @@
+from .art19 import Art19IE
+from .common import InfoExtractor
+from ..utils import extract_attributes, get_elements_html_by_class
+from ..utils.traversal import traverse_obj
+
+
+class RideHomeIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?ridehome\.info/show/[\w-]+/(?P<id>[\w-]+)/?(?:$|[?#])'
+ _TESTS = [{
+ 'url': 'https://www.ridehome.info/show/techmeme-ride-home/thu-1228-will-2024-be-the-year-apple-gets-serious-about-gaming-on-macs/',
+ 'info_dict': {
+ 'id': 'thu-1228-will-2024-be-the-year-apple-gets-serious-about-gaming-on-macs',
+ },
+ 'playlist_count': 1,
+ 'playlist': [{
+ 'md5': 'c84ea3cc96950a9ab86fe540f3edc588',
+ 'info_dict': {
+ 'id': '540e5493-9fe6-4c14-a488-dc508d8794b2',
+ 'ext': 'mp3',
+ 'title': 'Thu. 12/28 – Will 2024 Be The Year Apple Gets Serious About Gaming On Macs?',
+ 'description': 'md5:9dba86ae9b5047a8150eceddeeb629c2',
+ 'series': 'Techmeme Ride Home',
+ 'series_id': '3c30e8f4-ab48-415b-9421-1ae06cd4058b',
+ 'upload_date': '20231228',
+ 'timestamp': 1703780995,
+ 'modified_date': '20231230',
+ 'episode_id': '540e5493-9fe6-4c14-a488-dc508d8794b2',
+ 'modified_timestamp': 1703912404,
+ 'release_date': '20231228',
+ 'release_timestamp': 1703782800,
+ 'duration': 1000.1502,
+ 'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com/images/.*\.jpeg$',
+ },
+ }],
+ }, {
+ 'url': 'https://www.ridehome.info/show/techmeme-ride-home/portfolio-profile-sensel-with-ilyarosenberg/',
+ 'info_dict': {
+ 'id': 'portfolio-profile-sensel-with-ilyarosenberg',
+ },
+ 'playlist_count': 1,
+ 'playlist': [{
+ 'md5': 'bf9d6efad221008ce71aea09d5533cf6',
+ 'info_dict': {
+ 'id': '6beed803-b1ef-4536-9fef-c23cf6b4dcac',
+ 'ext': 'mp3',
+ 'title': '(Portfolio Profile) Sensel - With @IlyaRosenberg',
+ 'description': 'md5:e1e4a970bce04290e0ba6f030b0125db',
+ 'series': 'Techmeme Ride Home',
+ 'series_id': '3c30e8f4-ab48-415b-9421-1ae06cd4058b',
+ 'upload_date': '20220108',
+ 'timestamp': 1641656064,
+ 'modified_date': '20230418',
+ 'episode_id': '6beed803-b1ef-4536-9fef-c23cf6b4dcac',
+ 'modified_timestamp': 1681843318,
+ 'release_date': '20220108',
+ 'release_timestamp': 1641672000,
+ 'duration': 2789.38122,
+ 'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com/images/.*\.jpeg$'
+ },
+ }],
+ }, {
+ 'url': 'https://www.ridehome.info/show/spacecasts/big-tech-news-apples-macbook-pro-event/',
+ 'info_dict': {
+ 'id': 'big-tech-news-apples-macbook-pro-event',
+ },
+ 'playlist_count': 1,
+ 'playlist': [{
+ 'md5': 'b1428530c6e03904a8271e978007fc05',
+ 'info_dict': {
+ 'id': 'f4780044-6c4b-4ce0-8215-8a86cc66bff7',
+ 'ext': 'mp3',
+ 'title': 'md5:e6c05d44d59b6577a4145ac339de5040',
+ 'description': 'md5:14152f7228c8a301a77e3d6bc891b145',
+ 'series': 'SpaceCasts',
+ 'series_id': '8e3e837d-7fe0-4a23-8e11-894917e07e17',
+ 'upload_date': '20211026',
+ 'timestamp': 1635271450,
+ 'modified_date': '20230502',
+ 'episode_id': 'f4780044-6c4b-4ce0-8215-8a86cc66bff7',
+ 'modified_timestamp': 1683057500,
+ 'release_date': '20211026',
+ 'release_timestamp': 1635272124,
+ 'duration': 2266.30531,
+ 'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com/images/.*\.jpeg$'
+ },
+ }],
+ }]
+
+ def _real_extract(self, url):
+ article_id = self._match_id(url)
+ webpage = self._download_webpage(url, article_id)
+
+ urls = traverse_obj(
+ get_elements_html_by_class('iframeContainer', webpage),
+ (..., {extract_attributes}, lambda k, v: k == 'data-src' and Art19IE.suitable(v)))
+ return self.playlist_from_matches(urls, article_id, ie=Art19IE)
diff --git a/yt_dlp/extractor/rinsefm.py b/yt_dlp/extractor/rinsefm.py
new file mode 100644
index 0000000..f87b895
--- /dev/null
+++ b/yt_dlp/extractor/rinsefm.py
@@ -0,0 +1,89 @@
+from .common import InfoExtractor
+from ..utils import (
+ MEDIA_EXTENSIONS,
+ determine_ext,
+ parse_iso8601,
+ traverse_obj,
+ url_or_none,
+)
+
+
+class RinseFMBaseIE(InfoExtractor):
+ @staticmethod
+ def _parse_entry(entry):
+ return {
+ **traverse_obj(entry, {
+ 'id': ('id', {str}),
+ 'title': ('title', {str}),
+ 'url': ('fileUrl', {url_or_none}),
+ 'release_timestamp': ('episodeDate', {parse_iso8601}),
+ 'thumbnail': ('featuredImage', 0, 'filename', {str},
+ {lambda x: x and f'https://rinse.imgix.net/media/{x}'}),
+ 'webpage_url': ('slug', {str},
+ {lambda x: x and f'https://rinse.fm/episodes/{x}'}),
+ }),
+ 'vcodec': 'none',
+ 'extractor_key': RinseFMIE.ie_key(),
+ 'extractor': RinseFMIE.IE_NAME,
+ }
+
+
+class RinseFMIE(RinseFMBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?rinse\.fm/episodes/(?P<id>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'https://rinse.fm/episodes/club-glow-15-12-2023-2000/',
+ 'md5': '76ee0b719315617df42e15e710f46c7b',
+ 'info_dict': {
+ 'id': '1536535',
+ 'ext': 'mp3',
+ 'title': 'Club Glow - 15/12/2023 - 20:00',
+ 'thumbnail': r're:^https://.+\.(?:jpg|JPG)$',
+ 'release_timestamp': 1702598400,
+ 'release_date': '20231215'
+ }
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ entry = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['entry']
+
+ return self._parse_entry(entry)
+
+
+class RinseFMArtistPlaylistIE(RinseFMBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?rinse\.fm/shows/(?P<id>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'https://rinse.fm/shows/resources/',
+ 'info_dict': {
+ 'id': 'resources',
+ 'title': '[re]sources',
+ 'description': '[re]sources est un label parisien piloté par le DJ et producteur Tommy Kid.'
+ },
+ 'playlist_mincount': 40
+ }, {
+ 'url': 'https://rinse.fm/shows/ivy/',
+ 'info_dict': {
+ 'id': 'ivy',
+ 'title': '[IVY]',
+ 'description': 'A dedicated space for DNB/Turbo House and 4x4.'
+ },
+ 'playlist_mincount': 7
+ }]
+
+ def _entries(self, data):
+ for episode in traverse_obj(data, (
+ 'props', 'pageProps', 'episodes', lambda _, v: determine_ext(v['fileUrl']) in MEDIA_EXTENSIONS.audio)
+ ):
+ yield self._parse_entry(episode)
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ webpage = self._download_webpage(url, playlist_id)
+ title = self._og_search_title(webpage) or self._html_search_meta('title', webpage)
+ description = self._og_search_description(webpage) or self._html_search_meta(
+ 'description', webpage)
+ data = self._search_nextjs_data(webpage, playlist_id)
+
+ return self.playlist_result(
+ self._entries(data), playlist_id, title, description=description)
diff --git a/yt_dlp/extractor/rmcdecouverte.py b/yt_dlp/extractor/rmcdecouverte.py
new file mode 100644
index 0000000..8d29b30
--- /dev/null
+++ b/yt_dlp/extractor/rmcdecouverte.py
@@ -0,0 +1,71 @@
+from .common import InfoExtractor
+from .brightcove import BrightcoveLegacyIE
+from ..compat import (
+ compat_parse_qs,
+ compat_urlparse,
+)
+from ..utils import smuggle_url
+
+
+class RMCDecouverteIE(InfoExtractor):
+ _VALID_URL = r'https?://rmcdecouverte\.bfmtv\.com/(?:[^?#]*_(?P<id>\d+)|mediaplayer-direct)/?(?:[#?]|$)'
+
+ _TESTS = [{
+ 'url': 'https://rmcdecouverte.bfmtv.com/vestiges-de-guerre_22240/les-bunkers-secrets-domaha-beach_25303/',
+ 'info_dict': {
+ 'id': '6250879771001',
+ 'ext': 'mp4',
+ 'title': 'LES BUNKERS SECRETS D´OMAHA BEACH',
+ 'uploader_id': '1969646226001',
+ 'description': 'md5:aed573ca24abde62a148e0eba909657d',
+ 'timestamp': 1619622984,
+ 'upload_date': '20210428',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://rmcdecouverte.bfmtv.com/wheeler-dealers-occasions-a-saisir/program_2566/',
+ 'info_dict': {
+ 'id': '5983675500001',
+ 'ext': 'mp4',
+ 'title': 'CORVETTE',
+ 'description': 'md5:c1e8295521e45ffebf635d6a7658f506',
+ 'uploader_id': '1969646226001',
+ 'upload_date': '20181226',
+ 'timestamp': 1545861635,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'only available for a week',
+ }, {
+ 'url': 'https://rmcdecouverte.bfmtv.com/avions-furtifs-la-technologie-de-lextreme_10598',
+ 'only_matching': True,
+ }, {
+ # The website accepts any URL as long as it has _\d+ at the end
+ 'url': 'https://rmcdecouverte.bfmtv.com/any/thing/can/go/here/_10598',
+ 'only_matching': True,
+ }, {
+ # live, geo restricted, bypassable
+ 'url': 'https://rmcdecouverte.bfmtv.com/mediaplayer-direct/',
+ 'only_matching': True,
+ }]
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1969646226001/default_default/index.html?videoId=%s'
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ display_id = mobj.group('id') or 'direct'
+ webpage = self._download_webpage(url, display_id)
+ brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
+ if brightcove_legacy_url:
+ brightcove_id = compat_parse_qs(compat_urlparse.urlparse(
+ brightcove_legacy_url).query)['@videoPlayer'][0]
+ else:
+ brightcove_id = self._search_regex(
+ r'data-video-id=["\'](\d+)', webpage, 'brightcove id')
+ return self.url_result(
+ smuggle_url(
+ self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
+ {'geo_countries': ['FR']}),
+ 'BrightcoveNew', brightcove_id)
diff --git a/yt_dlp/extractor/rockstargames.py b/yt_dlp/extractor/rockstargames.py
new file mode 100644
index 0000000..1662243
--- /dev/null
+++ b/yt_dlp/extractor/rockstargames.py
@@ -0,0 +1,65 @@
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+)
+
+
+class RockstarGamesIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:www\.)?rockstargames\.com/videos(?:/video/|#?/?\?.*\bvideo=)(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.rockstargames.com/videos/video/11544/',
+ 'md5': '03b5caa6e357a4bd50e3143fc03e5733',
+ 'info_dict': {
+ 'id': '11544',
+ 'ext': 'mp4',
+ 'title': 'Further Adventures in Finance and Felony Trailer',
+ 'description': 'md5:6d31f55f30cb101b5476c4a379e324a3',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1464876000,
+ 'upload_date': '20160602',
+ }
+ }, {
+ 'url': 'http://www.rockstargames.com/videos#/?video=48',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video = self._download_json(
+ 'https://www.rockstargames.com/videoplayer/videos/get-video.json',
+ video_id, query={
+ 'id': video_id,
+ 'locale': 'en_us',
+ })['video']
+
+ title = video['title']
+
+ formats = []
+ for v in video['files_processed']['video/mp4']:
+ if not v.get('src'):
+ continue
+ resolution = v.get('resolution')
+ height = int_or_none(self._search_regex(
+ r'^(\d+)[pP]$', resolution or '', 'height', default=None))
+ formats.append({
+ 'url': self._proto_relative_url(v['src']),
+ 'format_id': resolution,
+ 'height': height,
+ })
+
+ if not formats:
+ youtube_id = video.get('youtube_id')
+ if youtube_id:
+ return self.url_result(youtube_id, 'Youtube')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video.get('description'),
+ 'thumbnail': self._proto_relative_url(video.get('screencap')),
+ 'timestamp': parse_iso8601(video.get('created')),
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/rokfin.py b/yt_dlp/extractor/rokfin.py
new file mode 100644
index 0000000..5099f3a
--- /dev/null
+++ b/yt_dlp/extractor/rokfin.py
@@ -0,0 +1,455 @@
+import itertools
+import json
+import re
+import urllib.parse
+from datetime import datetime
+
+from .common import InfoExtractor, SearchInfoExtractor
+from ..utils import (
+ ExtractorError,
+ determine_ext,
+ float_or_none,
+ format_field,
+ int_or_none,
+ str_or_none,
+ traverse_obj,
+ try_get,
+ unescapeHTML,
+ unified_timestamp,
+ url_or_none,
+ urlencode_postdata,
+)
+
+_API_BASE_URL = 'https://prod-api-v2.production.rokfin.com/api/v2/public/'
+
+
+class RokfinIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?rokfin\.com/(?P<id>(?P<type>post|stream)/\d+)'
+ _NETRC_MACHINE = 'rokfin'
+ _AUTH_BASE = 'https://secure.rokfin.com/auth/realms/rokfin-web/protocol/openid-connect'
+ _access_mgmt_tokens = {} # OAuth 2.0: RFC 6749, Sec. 1.4-5
+ _TESTS = [{
+ 'url': 'https://www.rokfin.com/post/57548/Mitt-Romneys-Crazy-Solution-To-Climate-Change',
+ 'info_dict': {
+ 'id': 'post/57548',
+ 'ext': 'mp4',
+ 'title': 'Mitt Romney\'s Crazy Solution To Climate Change',
+ 'thumbnail': r're:https://img\.production\.rokfin\.com/.+',
+ 'upload_date': '20211023',
+ 'timestamp': 1634998029,
+ 'channel': 'Jimmy Dore',
+ 'channel_id': '65429',
+ 'channel_url': 'https://rokfin.com/TheJimmyDoreShow',
+ 'availability': 'public',
+ 'live_status': 'not_live',
+ 'dislike_count': int,
+ 'like_count': int,
+ 'duration': 213,
+ }
+ }, {
+ 'url': 'https://rokfin.com/post/223/Julian-Assange-Arrested-Streaming-In-Real-Time',
+ 'info_dict': {
+ 'id': 'post/223',
+ 'ext': 'mp4',
+ 'title': 'Julian Assange Arrested: Streaming In Real Time',
+ 'thumbnail': r're:https://img\.production\.rokfin\.com/.+',
+ 'upload_date': '20190412',
+ 'timestamp': 1555052644,
+ 'channel': 'Ron Placone',
+ 'channel_id': '10',
+ 'channel_url': 'https://rokfin.com/RonPlacone',
+ 'availability': 'public',
+ 'live_status': 'not_live',
+ 'dislike_count': int,
+ 'like_count': int,
+ 'tags': ['FreeThinkingMedia^', 'RealProgressives^'],
+ }
+ }, {
+ 'url': 'https://www.rokfin.com/stream/10543/Its-A-Crazy-Mess-Regional-Director-Blows-Whistle-On-Pfizers-Vaccine-Trial-Data',
+ 'info_dict': {
+ 'id': 'stream/10543',
+ 'ext': 'mp4',
+ 'title': '"It\'s A Crazy Mess" Regional Director Blows Whistle On Pfizer\'s Vaccine Trial Data',
+ 'thumbnail': r're:https://img\.production\.rokfin\.com/.+',
+ 'description': 'md5:324ce2d3e3b62e659506409e458b9d8e',
+ 'channel': 'TLAVagabond',
+ 'channel_id': '53856',
+ 'channel_url': 'https://rokfin.com/TLAVagabond',
+ 'availability': 'public',
+ 'is_live': False,
+ 'was_live': True,
+ 'live_status': 'was_live',
+ 'timestamp': 1635874720,
+ 'release_timestamp': 1635874720,
+ 'release_date': '20211102',
+ 'upload_date': '20211102',
+ 'dislike_count': int,
+ 'like_count': int,
+ 'tags': ['FreeThinkingMedia^'],
+ }
+ }, {
+ 'url': 'https://rokfin.com/post/126703/Brave-New-World--Aldous-Huxley-DEEPDIVE--Chpts-13--Quite-Frankly--Jay-Dyer',
+ 'info_dict': {
+ 'id': 'post/126703',
+ 'ext': 'mp4',
+ 'title': 'Brave New World - Aldous Huxley DEEPDIVE! (Chpts 1-3) - Quite Frankly & Jay Dyer',
+ 'thumbnail': r're:https://img\.production\.rokfin\.com/.+',
+ 'channel': 'Jay Dyer',
+ 'channel_id': '186881',
+ 'channel_url': 'https://rokfin.com/jaydyer',
+ 'availability': 'premium_only',
+ 'live_status': 'not_live',
+ 'dislike_count': int,
+ 'like_count': int,
+ 'timestamp': 1678213357,
+ 'upload_date': '20230307',
+ 'tags': ['FreeThinkingMedia^', 'OpenMind^'],
+ 'description': 'md5:cb04e32e68326c9b2b251b297bacff35',
+ 'duration': 3100,
+ }
+ }, {
+ 'url': 'https://rokfin.com/stream/31332/The-Grayzone-live-on-Nordstream-blame-game',
+ 'info_dict': {
+ 'id': 'stream/31332',
+ 'ext': 'mp4',
+ 'title': 'The Grayzone live on Nordstream blame game',
+ 'thumbnail': r're:https://image\.v\.rokfin\.com/.+',
+ 'channel': 'Max Blumenthal',
+ 'channel_id': '248902',
+ 'channel_url': 'https://rokfin.com/MaxBlumenthal',
+ 'availability': 'premium_only',
+ 'live_status': 'was_live',
+ 'dislike_count': int,
+ 'like_count': int,
+ 'timestamp': 1678475166,
+ 'release_timestamp': 1678475166.0,
+ 'release_date': '20230310',
+ 'upload_date': '20230310',
+ 'tags': ['FreeThinkingMedia^'],
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id, video_type = self._match_valid_url(url).group('id', 'type')
+ metadata = self._download_json_using_access_token(f'{_API_BASE_URL}{video_id}', video_id)
+
+ scheduled = unified_timestamp(metadata.get('scheduledAt'))
+ live_status = ('was_live' if metadata.get('stoppedAt')
+ else 'is_upcoming' if scheduled
+ else 'is_live' if video_type == 'stream'
+ else 'not_live')
+
+ video_url = traverse_obj(metadata, 'url', ('content', 'contentUrl'), expected_type=url_or_none)
+ if video_url in (None, 'fake.m3u8'):
+ video_url = format_field(self._search_regex(
+ r'https?://[^/]+/([^/]+)/storyboard.vtt',
+ traverse_obj(metadata, 'timelineUrl', ('content', 'timelineUrl'), expected_type=url_or_none),
+ video_id, default=None), None, 'https://stream.v.rokfin.com/%s.m3u8')
+
+ formats, subtitles = [{'url': video_url}] if video_url else [], {}
+ if determine_ext(video_url) == 'm3u8':
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ video_url, video_id, fatal=False, live=live_status == 'is_live')
+
+ if not formats:
+ if traverse_obj(metadata, 'premiumPlan', 'premium'):
+ self.raise_login_required('This video is only available to premium users', True, method='cookies')
+ elif scheduled:
+ self.raise_no_formats(
+ f'Stream is offline; scheduled for {datetime.fromtimestamp(scheduled).strftime("%Y-%m-%d %H:%M:%S")}',
+ video_id=video_id, expected=True)
+
+ uploader = traverse_obj(metadata, ('createdBy', 'username'), ('creator', 'username'))
+ timestamp = (scheduled or float_or_none(metadata.get('postedAtMilli'), 1000)
+ or unified_timestamp(metadata.get('creationDateTime')))
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'title': str_or_none(traverse_obj(metadata, 'title', ('content', 'contentTitle'))),
+ 'duration': float_or_none(traverse_obj(metadata, ('content', 'duration'))),
+ 'thumbnail': url_or_none(traverse_obj(metadata, 'thumbnail', ('content', 'thumbnailUrl1'))),
+ 'description': str_or_none(traverse_obj(metadata, 'description', ('content', 'contentDescription'))),
+ 'like_count': int_or_none(metadata.get('likeCount')),
+ 'dislike_count': int_or_none(metadata.get('dislikeCount')),
+ 'channel': str_or_none(traverse_obj(metadata, ('createdBy', 'name'), ('creator', 'name'))),
+ 'channel_id': str_or_none(traverse_obj(metadata, ('createdBy', 'id'), ('creator', 'id'))),
+ 'channel_url': url_or_none(f'https://rokfin.com/{uploader}') if uploader else None,
+ 'timestamp': timestamp,
+ 'release_timestamp': timestamp if live_status != 'not_live' else None,
+ 'tags': traverse_obj(metadata, ('tags', ..., 'title'), expected_type=str_or_none),
+ 'live_status': live_status,
+ 'availability': self._availability(
+ needs_premium=bool(traverse_obj(metadata, 'premiumPlan', 'premium')),
+ is_private=False, needs_subscription=False, needs_auth=False, is_unlisted=False),
+ # 'comment_count': metadata.get('numComments'), # Data provided by website is wrong
+ '__post_extractor': self.extract_comments(video_id) if video_type == 'post' else None,
+ }
+
+ def _get_comments(self, video_id):
+ pages_total = None
+ for page_n in itertools.count():
+ raw_comments = self._download_json(
+ f'{_API_BASE_URL}comment?postId={video_id[5:]}&page={page_n}&size=50',
+ video_id, note=f'Downloading viewer comments page {page_n + 1}{format_field(pages_total, None, " of %s")}',
+ fatal=False) or {}
+
+ for comment in raw_comments.get('content') or []:
+ yield {
+ 'text': str_or_none(comment.get('comment')),
+ 'author': str_or_none(comment.get('name')),
+ 'id': comment.get('commentId'),
+ 'author_id': comment.get('userId'),
+ 'parent': 'root',
+ 'like_count': int_or_none(comment.get('numLikes')),
+ 'dislike_count': int_or_none(comment.get('numDislikes')),
+ 'timestamp': unified_timestamp(comment.get('postedAt'))
+ }
+
+ pages_total = int_or_none(raw_comments.get('totalPages')) or None
+ is_last = raw_comments.get('last')
+ if not raw_comments.get('content') or is_last or (page_n > pages_total if pages_total else is_last is not False):
+ return
+
+ def _perform_login(self, username, password):
+ # https://openid.net/specs/openid-connect-core-1_0.html#CodeFlowAuth (Sec. 3.1)
+ login_page = self._download_webpage(
+ f'{self._AUTH_BASE}/auth?client_id=web&redirect_uri=https%3A%2F%2Frokfin.com%2Ffeed&response_mode=fragment&response_type=code&scope=openid',
+ None, note='loading login page', errnote='error loading login page')
+ authentication_point_url = unescapeHTML(self._search_regex(
+ r'<form\s+[^>]+action\s*=\s*"(https://secure\.rokfin\.com/auth/realms/rokfin-web/login-actions/authenticate\?[^"]+)"',
+ login_page, name='Authentication URL'))
+
+ resp_body = self._download_webpage(
+ authentication_point_url, None, note='logging in', fatal=False, expected_status=404,
+ data=urlencode_postdata({'username': username, 'password': password, 'rememberMe': 'off', 'credentialId': ''}))
+ if not self._authentication_active():
+ if re.search(r'(?i)(invalid\s+username\s+or\s+password)', resp_body or ''):
+ raise ExtractorError('invalid username/password', expected=True)
+ raise ExtractorError('Login failed')
+
+ urlh = self._request_webpage(
+ f'{self._AUTH_BASE}/auth', None,
+ note='granting user authorization', errnote='user authorization rejected by Rokfin',
+ query={
+ 'client_id': 'web',
+ 'prompt': 'none',
+ 'redirect_uri': 'https://rokfin.com/silent-check-sso.html',
+ 'response_mode': 'fragment',
+ 'response_type': 'code',
+ 'scope': 'openid',
+ })
+ self._access_mgmt_tokens = self._download_json(
+ f'{self._AUTH_BASE}/token', None,
+ note='getting access credentials', errnote='error getting access credentials',
+ data=urlencode_postdata({
+ 'code': urllib.parse.parse_qs(urllib.parse.urldefrag(urlh.url).fragment).get('code')[0],
+ 'client_id': 'web',
+ 'grant_type': 'authorization_code',
+ 'redirect_uri': 'https://rokfin.com/silent-check-sso.html'
+ }))
+
+ def _authentication_active(self):
+ return not (
+ {'KEYCLOAK_IDENTITY', 'KEYCLOAK_IDENTITY_LEGACY', 'KEYCLOAK_SESSION', 'KEYCLOAK_SESSION_LEGACY'}
+ - set(self._get_cookies(self._AUTH_BASE)))
+
+ def _get_auth_token(self):
+ return try_get(self._access_mgmt_tokens, lambda x: ' '.join([x['token_type'], x['access_token']]))
+
+ def _download_json_using_access_token(self, url_or_request, video_id, headers={}, query={}):
+ assert 'authorization' not in headers
+ headers = headers.copy()
+ auth_token = self._get_auth_token()
+ refresh_token = self._access_mgmt_tokens.get('refresh_token')
+ if auth_token:
+ headers['authorization'] = auth_token
+
+ json_string, urlh = self._download_webpage_handle(
+ url_or_request, video_id, headers=headers, query=query, expected_status=401)
+ if not auth_token or urlh.status != 401 or refresh_token is None:
+ return self._parse_json(json_string, video_id)
+
+ self._access_mgmt_tokens = self._download_json(
+ f'{self._AUTH_BASE}/token', video_id,
+ note='User authorization expired or canceled by Rokfin. Re-authorizing ...', errnote='Failed to re-authorize',
+ data=urlencode_postdata({
+ 'grant_type': 'refresh_token',
+ 'refresh_token': refresh_token,
+ 'client_id': 'web'
+ }))
+ headers['authorization'] = self._get_auth_token()
+ if headers['authorization'] is None:
+ raise ExtractorError('User authorization lost', expected=True)
+
+ return self._download_json(url_or_request, video_id, headers=headers, query=query)
+
+
+class RokfinPlaylistBaseIE(InfoExtractor):
+ _TYPES = {
+ 'video': 'post',
+ 'audio': 'post',
+ 'stream': 'stream',
+ 'dead_stream': 'stream',
+ 'stack': 'stack',
+ }
+
+ def _get_video_data(self, metadata):
+ for content in metadata.get('content') or []:
+ media_type = self._TYPES.get(content.get('mediaType'))
+ video_id = content.get('id') if media_type == 'post' else content.get('mediaId')
+ if not media_type or not video_id:
+ continue
+
+ yield self.url_result(f'https://rokfin.com/{media_type}/{video_id}', video_id=f'{media_type}/{video_id}',
+ video_title=str_or_none(traverse_obj(content, ('content', 'contentTitle'))))
+
+
+class RokfinStackIE(RokfinPlaylistBaseIE):
+ IE_NAME = 'rokfin:stack'
+ IE_DESC = 'Rokfin Stacks'
+ _VALID_URL = r'https?://(?:www\.)?rokfin\.com/stack/(?P<id>[^/]+)'
+ _TESTS = [{
+ 'url': 'https://www.rokfin.com/stack/271/Tulsi-Gabbard-Portsmouth-Townhall-FULL--Feb-9-2020',
+ 'playlist_count': 8,
+ 'info_dict': {
+ 'id': '271',
+ },
+ }]
+
+ def _real_extract(self, url):
+ list_id = self._match_id(url)
+ return self.playlist_result(self._get_video_data(
+ self._download_json(f'{_API_BASE_URL}stack/{list_id}', list_id)), list_id)
+
+
+class RokfinChannelIE(RokfinPlaylistBaseIE):
+ IE_NAME = 'rokfin:channel'
+ IE_DESC = 'Rokfin Channels'
+ _VALID_URL = r'https?://(?:www\.)?rokfin\.com/(?!((feed/?)|(discover/?)|(channels/?))$)(?P<id>[^/]+)/?$'
+ _TESTS = [{
+ 'url': 'https://rokfin.com/TheConvoCouch',
+ 'playlist_mincount': 100,
+ 'info_dict': {
+ 'id': '12071-new',
+ 'title': 'TheConvoCouch - New',
+ 'description': 'md5:bb622b1bca100209b91cd685f7847f06',
+ },
+ }]
+
+ _TABS = {
+ 'new': 'posts',
+ 'top': 'top',
+ 'videos': 'video',
+ 'podcasts': 'audio',
+ 'streams': 'stream',
+ 'stacks': 'stack',
+ }
+
+ def _real_initialize(self):
+ self._validate_extractor_args()
+
+ def _validate_extractor_args(self):
+ requested_tabs = self._configuration_arg('tab', None)
+ if requested_tabs is not None and (len(requested_tabs) > 1 or requested_tabs[0] not in self._TABS):
+ raise ExtractorError(f'Invalid extractor-arg "tab". Must be one of {", ".join(self._TABS)}', expected=True)
+
+ def _entries(self, channel_id, channel_name, tab):
+ pages_total = None
+ for page_n in itertools.count(0):
+ if tab in ('posts', 'top'):
+ data_url = f'{_API_BASE_URL}user/{channel_name}/{tab}?page={page_n}&size=50'
+ else:
+ data_url = f'{_API_BASE_URL}post/search/{tab}?page={page_n}&size=50&creator={channel_id}'
+ metadata = self._download_json(
+ data_url, channel_name,
+ note=f'Downloading video metadata page {page_n + 1}{format_field(pages_total, None, " of %s")}')
+
+ yield from self._get_video_data(metadata)
+ pages_total = int_or_none(metadata.get('totalPages')) or None
+ is_last = metadata.get('last')
+ if is_last or (page_n > pages_total if pages_total else is_last is not False):
+ return
+
+ def _real_extract(self, url):
+ channel_name = self._match_id(url)
+ channel_info = self._download_json(f'{_API_BASE_URL}user/{channel_name}', channel_name)
+ channel_id = channel_info['id']
+ tab = self._configuration_arg('tab', default=['new'])[0]
+
+ return self.playlist_result(
+ self._entries(channel_id, channel_name, self._TABS[tab]),
+ f'{channel_id}-{tab}', f'{channel_name} - {tab.title()}', str_or_none(channel_info.get('description')))
+
+
+class RokfinSearchIE(SearchInfoExtractor):
+ IE_NAME = 'rokfin:search'
+ IE_DESC = 'Rokfin Search'
+ _SEARCH_KEY = 'rkfnsearch'
+ _TYPES = {
+ 'video': (('id', 'raw'), 'post'),
+ 'audio': (('id', 'raw'), 'post'),
+ 'stream': (('content_id', 'raw'), 'stream'),
+ 'dead_stream': (('content_id', 'raw'), 'stream'),
+ 'stack': (('content_id', 'raw'), 'stack'),
+ }
+ _TESTS = [{
+ 'url': 'rkfnsearch5:"zelenko"',
+ 'playlist_count': 5,
+ 'info_dict': {
+ 'id': '"zelenko"',
+ 'title': '"zelenko"',
+ }
+ }]
+ _db_url = None
+ _db_access_key = None
+
+ def _real_initialize(self):
+ self._db_url, self._db_access_key = self.cache.load(self.ie_key(), 'auth', default=(None, None))
+ if not self._db_url:
+ self._get_db_access_credentials()
+
+ def _search_results(self, query):
+ total_pages = None
+ for page_number in itertools.count(1):
+ search_results = self._run_search_query(
+ query, data={'query': query, 'page': {'size': 100, 'current': page_number}},
+ note=f'Downloading page {page_number}{format_field(total_pages, None, " of ~%s")}')
+ total_pages = traverse_obj(search_results, ('meta', 'page', 'total_pages'), expected_type=int_or_none)
+
+ for result in search_results.get('results') or []:
+ video_id_key, video_type = self._TYPES.get(traverse_obj(result, ('content_type', 'raw')), (None, None))
+ video_id = traverse_obj(result, video_id_key, expected_type=int_or_none)
+ if video_id and video_type:
+ yield self.url_result(url=f'https://rokfin.com/{video_type}/{video_id}')
+ if not search_results.get('results'):
+ return
+
+ def _run_search_query(self, video_id, data, **kwargs):
+ data = json.dumps(data).encode()
+ for attempt in range(2):
+ search_results = self._download_json(
+ self._db_url, video_id, data=data, fatal=(attempt == 1),
+ headers={'authorization': self._db_access_key}, **kwargs)
+ if search_results:
+ return search_results
+ self.write_debug('Updating access credentials')
+ self._get_db_access_credentials(video_id)
+
+ def _get_db_access_credentials(self, video_id=None):
+ auth_data = {'SEARCH_KEY': None, 'ENDPOINT_BASE': None}
+ notfound_err_page = self._download_webpage(
+ 'https://rokfin.com/discover', video_id, expected_status=404, note='Downloading home page')
+ for js_file_path in re.findall(r'<script\b[^>]*\ssrc\s*=\s*"(/static/js/[^">]+)"', notfound_err_page):
+ js_content = self._download_webpage(
+ f'https://rokfin.com{js_file_path}', video_id, note='Downloading JavaScript file', fatal=False)
+ auth_data.update(re.findall(
+ rf'REACT_APP_({"|".join(auth_data.keys())})\s*:\s*"([^"]+)"', js_content or ''))
+ if not all(auth_data.values()):
+ continue
+
+ self._db_url = url_or_none(f'{auth_data["ENDPOINT_BASE"]}/api/as/v1/engines/rokfin-search/search.json')
+ self._db_access_key = f'Bearer {auth_data["SEARCH_KEY"]}'
+ self.cache.store(self.ie_key(), 'auth', (self._db_url, self._db_access_key))
+ return
+ raise ExtractorError('Unable to extract access credentials')
diff --git a/yt_dlp/extractor/roosterteeth.py b/yt_dlp/extractor/roosterteeth.py
new file mode 100644
index 0000000..5c62239
--- /dev/null
+++ b/yt_dlp/extractor/roosterteeth.py
@@ -0,0 +1,352 @@
+from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ ExtractorError,
+ LazyList,
+ int_or_none,
+ join_nonempty,
+ parse_iso8601,
+ parse_qs,
+ smuggle_url,
+ str_or_none,
+ url_or_none,
+ urlencode_postdata,
+ urljoin,
+)
+from ..utils.traversal import traverse_obj
+
+
+class RoosterTeethBaseIE(InfoExtractor):
+ _NETRC_MACHINE = 'roosterteeth'
+ _API_BASE = 'https://svod-be.roosterteeth.com'
+ _API_BASE_URL = f'{_API_BASE}/api/v1'
+
+ def _perform_login(self, username, password):
+ if self._get_cookies(self._API_BASE_URL).get('rt_access_token'):
+ return
+
+ try:
+ self._download_json(
+ 'https://auth.roosterteeth.com/oauth/token',
+ None, 'Logging in', data=urlencode_postdata({
+ 'client_id': '4338d2b4bdc8db1239360f28e72f0d9ddb1fd01e7a38fbb07b4b1f4ba4564cc5',
+ 'grant_type': 'password',
+ 'username': username,
+ 'password': password,
+ }))
+ except ExtractorError as e:
+ msg = 'Unable to login'
+ if isinstance(e.cause, HTTPError) and e.cause.status == 401:
+ resp = self._parse_json(e.cause.response.read().decode(), None, fatal=False)
+ if resp:
+ error = resp.get('extra_info') or resp.get('error_description') or resp.get('error')
+ if error:
+ msg += ': ' + error
+ self.report_warning(msg)
+
+ def _extract_video_info(self, data):
+ thumbnails = []
+ for image in traverse_obj(data, ('included', 'images')):
+ if image.get('type') not in ('episode_image', 'bonus_feature_image'):
+ continue
+ thumbnails.extend([{
+ 'id': name,
+ 'url': url,
+ } for name, url in (image.get('attributes') or {}).items() if url_or_none(url)])
+
+ attributes = data.get('attributes') or {}
+ title = traverse_obj(attributes, 'title', 'display_title')
+ sub_only = attributes.get('is_sponsors_only')
+
+ episode_id = str_or_none(data.get('uuid'))
+ video_id = str_or_none(data.get('id'))
+ if video_id and 'parent_content_id' in attributes: # parent_content_id is a bonus-only key
+ video_id += '-bonus' # there are collisions with bonus ids and regular ids
+ elif not video_id:
+ video_id = episode_id
+
+ return {
+ 'id': video_id,
+ 'display_id': attributes.get('slug'),
+ 'title': title,
+ 'description': traverse_obj(attributes, 'description', 'caption'),
+ 'series': traverse_obj(attributes, 'show_title', 'parent_content_title'),
+ 'season_number': int_or_none(attributes.get('season_number')),
+ 'season_id': str_or_none(attributes.get('season_id')),
+ 'episode': title,
+ 'episode_number': int_or_none(attributes.get('number')),
+ 'episode_id': episode_id,
+ 'channel_id': attributes.get('channel_id'),
+ 'duration': int_or_none(attributes.get('length')),
+ 'release_timestamp': parse_iso8601(attributes.get('original_air_date')),
+ 'thumbnails': thumbnails,
+ 'availability': self._availability(
+ needs_premium=sub_only, needs_subscription=sub_only, needs_auth=sub_only,
+ is_private=False, is_unlisted=False),
+ 'tags': attributes.get('genres')
+ }
+
+
+class RoosterTeethIE(RoosterTeethBaseIE):
+ _VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/(?:bonus-feature|episode|watch)/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'http://roosterteeth.com/episode/million-dollars-but-season-2-million-dollars-but-the-game-announcement',
+ 'info_dict': {
+ 'id': '9156',
+ 'display_id': 'million-dollars-but-season-2-million-dollars-but-the-game-announcement',
+ 'ext': 'mp4',
+ 'title': 'Million Dollars, But... The Game Announcement',
+ 'description': 'md5:168a54b40e228e79f4ddb141e89fe4f5',
+ 'thumbnail': r're:^https?://.*\.png$',
+ 'series': 'Million Dollars, But...',
+ 'episode': 'Million Dollars, But... The Game Announcement',
+ 'tags': ['Game Show', 'Sketch'],
+ 'season_number': 2,
+ 'availability': 'public',
+ 'episode_number': 10,
+ 'episode_id': '00374575-464e-11e7-a302-065410f210c4',
+ 'season': 'Season 2',
+ 'season_id': 'ffa27d48-464d-11e7-a302-065410f210c4',
+ 'channel_id': '92b6bb21-91d2-4b1b-bf95-3268fa0d9939',
+ 'duration': 145,
+ 'release_timestamp': 1462982400,
+ 'release_date': '20160511',
+ },
+ 'params': {'skip_download': True},
+ }, {
+ 'url': 'https://roosterteeth.com/watch/rwby-bonus-25',
+ 'info_dict': {
+ 'id': '40432',
+ 'display_id': 'rwby-bonus-25',
+ 'title': 'Grimm',
+ 'description': 'md5:f30ff570741213418a8d2c19868b93ab',
+ 'episode': 'Grimm',
+ 'channel_id': '92f780eb-ebfe-4bf5-a3b5-c6ad5460a5f1',
+ 'thumbnail': r're:^https?://.*\.(png|jpe?g)$',
+ 'ext': 'mp4',
+ 'availability': 'public',
+ 'episode_id': 'f8117b13-f068-499e-803e-eec9ea2dec8c',
+ 'episode_number': 3,
+ 'tags': ['Animation'],
+ 'season_id': '4b8f0a9e-12c4-41ed-8caa-fed15a85bab8',
+ 'season': 'Season 1',
+ 'series': 'RWBY: World of Remnant',
+ 'season_number': 1,
+ 'duration': 216,
+ 'release_timestamp': 1413489600,
+ 'release_date': '20141016',
+ },
+ 'params': {'skip_download': True},
+ }, {
+ # bonus feature with /watch/ url
+ 'url': 'https://roosterteeth.com/watch/rwby-bonus-21',
+ 'info_dict': {
+ 'id': '33-bonus',
+ 'display_id': 'rwby-bonus-21',
+ 'title': 'Volume 5 Yang Character Short',
+ 'description': 'md5:8c2440bc763ea90c52cfe0a68093e1f7',
+ 'episode': 'Volume 5 Yang Character Short',
+ 'channel_id': '92f780eb-ebfe-4bf5-a3b5-c6ad5460a5f1',
+ 'thumbnail': r're:^https?://.*\.(png|jpe?g)$',
+ 'ext': 'mp4',
+ 'availability': 'public',
+ 'episode_id': 'f2a9f132-1fe2-44ad-8956-63d7c0267720',
+ 'episode_number': 55,
+ 'series': 'RWBY',
+ 'duration': 255,
+ 'release_timestamp': 1507993200,
+ 'release_date': '20171014',
+ },
+ 'params': {'skip_download': True},
+ }, {
+ # only works with video_data['attributes']['url'] m3u8 url
+ 'url': 'https://www.roosterteeth.com/watch/achievement-hunter-achievement-hunter-fatality-walkthrough-deathstroke-lex-luthor-captain-marvel-green-lantern-and-wonder-woman',
+ 'info_dict': {
+ 'id': '25394',
+ 'ext': 'mp4',
+ 'title': 'Fatality Walkthrough: Deathstroke, Lex Luthor, Captain Marvel, Green Lantern, and Wonder Woman',
+ 'description': 'md5:91bb934698344fb9647b1c7351f16964',
+ 'availability': 'public',
+ 'thumbnail': r're:^https?://.*\.(png|jpe?g)$',
+ 'episode': 'Fatality Walkthrough: Deathstroke, Lex Luthor, Captain Marvel, Green Lantern, and Wonder Woman',
+ 'episode_number': 71,
+ 'episode_id': 'ffaec998-464d-11e7-a302-065410f210c4',
+ 'season': 'Season 2008',
+ 'tags': ['Gaming'],
+ 'series': 'Achievement Hunter',
+ 'display_id': 'md5:4465ce4f001735f9d7a2ae529a543d31',
+ 'season_id': 'ffa13340-464d-11e7-a302-065410f210c4',
+ 'season_number': 2008,
+ 'channel_id': '2cb2a70c-be50-46f5-93d7-84a1baabb4f7',
+ 'duration': 189,
+ 'release_timestamp': 1228317300,
+ 'release_date': '20081203',
+ },
+ 'params': {'skip_download': True},
+ }, {
+ # brightcove fallback extraction needed
+ 'url': 'https://roosterteeth.com/watch/lets-play-2013-126',
+ 'info_dict': {
+ 'id': '17845',
+ 'ext': 'mp4',
+ 'title': 'WWE \'13',
+ 'availability': 'public',
+ 'series': 'Let\'s Play',
+ 'episode_number': 10,
+ 'season_id': 'ffa23d9c-464d-11e7-a302-065410f210c4',
+ 'channel_id': '75ba87e8-06fd-4482-bad9-52a4da2c6181',
+ 'episode': 'WWE \'13',
+ 'episode_id': 'ffdbe55e-464d-11e7-a302-065410f210c4',
+ 'thumbnail': r're:^https?://.*\.(png|jpe?g)$',
+ 'tags': ['Gaming', 'Our Favorites'],
+ 'description': 'md5:b4a5226d2bbcf0dafbde11a2ba27262d',
+ 'display_id': 'lets-play-2013-126',
+ 'season_number': 3,
+ 'season': 'Season 3',
+ 'release_timestamp': 1359999840,
+ 'release_date': '20130204',
+ },
+ 'expected_warnings': ['Direct m3u8 URL returned HTTP Error 403'],
+ 'params': {'skip_download': True},
+ }, {
+ 'url': 'http://achievementhunter.roosterteeth.com/episode/off-topic-the-achievement-hunter-podcast-2016-i-didn-t-think-it-would-pass-31',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://funhaus.roosterteeth.com/episode/funhaus-shorts-2016-austin-sucks-funhaus-shorts',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://screwattack.roosterteeth.com/episode/death-battle-season-3-mewtwo-vs-shadow',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://theknow.roosterteeth.com/episode/the-know-game-news-season-1-boring-steam-sales-are-better',
+ 'only_matching': True,
+ }, {
+ # only available for FIRST members
+ 'url': 'http://roosterteeth.com/episode/rt-docs-the-world-s-greatest-head-massage-the-world-s-greatest-head-massage-an-asmr-journey-part-one',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://roosterteeth.com/watch/million-dollars-but-season-2-million-dollars-but-the-game-announcement',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://roosterteeth.com/bonus-feature/camp-camp-soundtrack-another-rap-song-about-foreign-cars-richie-branson',
+ 'only_matching': True,
+ }]
+
+ _BRIGHTCOVE_ACCOUNT_ID = '6203312018001'
+
+ def _extract_brightcove_formats_and_subtitles(self, bc_id, url, m3u8_url):
+ account_id = self._search_regex(
+ r'/accounts/(\d+)/videos/', m3u8_url, 'account id', default=self._BRIGHTCOVE_ACCOUNT_ID)
+ info = self._downloader.get_info_extractor('BrightcoveNew').extract(smuggle_url(
+ f'https://players.brightcove.net/{account_id}/default_default/index.html?videoId={bc_id}',
+ {'referrer': url}))
+ return info['formats'], info['subtitles']
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ api_episode_url = f'{self._API_BASE_URL}/watch/{display_id}'
+
+ try:
+ video_data = self._download_json(
+ api_episode_url + '/videos', display_id, 'Downloading video JSON metadata',
+ headers={'Client-Type': 'web'})['data'][0] # web client-type yields ad-free streams
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 403:
+ if self._parse_json(e.cause.response.read().decode(), display_id).get('access') is False:
+ self.raise_login_required(
+ '%s is only available for FIRST members' % display_id)
+ raise
+
+ # XXX: additional ad-free URL at video_data['links']['download'] but often gives 403 errors
+ m3u8_url = video_data['attributes']['url']
+ is_brightcove = traverse_obj(video_data, ('attributes', 'encoding_pipeline')) == 'brightcove'
+ bc_id = traverse_obj(video_data, ('attributes', 'uid', {str}))
+
+ try:
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ m3u8_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls')
+ except ExtractorError as e:
+ if is_brightcove and bc_id and isinstance(e.cause, HTTPError) and e.cause.status == 403:
+ self.report_warning(
+ 'Direct m3u8 URL returned HTTP Error 403; retrying with Brightcove extraction')
+ formats, subtitles = self._extract_brightcove_formats_and_subtitles(bc_id, url, m3u8_url)
+ else:
+ raise
+
+ episode = self._download_json(
+ api_episode_url, display_id,
+ 'Downloading episode JSON metadata')['data'][0]
+
+ return {
+ 'display_id': display_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ **self._extract_video_info(episode)
+ }
+
+
+class RoosterTeethSeriesIE(RoosterTeethBaseIE):
+ _VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/series/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://roosterteeth.com/series/rwby?season=7',
+ 'playlist_count': 13,
+ 'info_dict': {
+ 'id': 'rwby-7',
+ 'title': 'RWBY - Season 7',
+ },
+ }, {
+ 'url': 'https://roosterteeth.com/series/the-weird-place',
+ 'playlist_count': 7,
+ 'info_dict': {
+ 'id': 'the-weird-place',
+ 'title': 'The Weird Place',
+ },
+ }, {
+ 'url': 'https://roosterteeth.com/series/role-initiative',
+ 'playlist_mincount': 16,
+ 'info_dict': {
+ 'id': 'role-initiative',
+ 'title': 'Role Initiative',
+ },
+ }, {
+ 'url': 'https://roosterteeth.com/series/let-s-play-minecraft?season=9',
+ 'playlist_mincount': 50,
+ 'info_dict': {
+ 'id': 'let-s-play-minecraft-9',
+ 'title': 'Let\'s Play Minecraft - Season 9',
+ },
+ }]
+
+ def _entries(self, series_id, season_number):
+ display_id = join_nonempty(series_id, season_number)
+
+ def yield_episodes(data):
+ for episode in traverse_obj(data, ('data', lambda _, v: v['canonical_links']['self'])):
+ yield self.url_result(
+ urljoin('https://www.roosterteeth.com', episode['canonical_links']['self']),
+ RoosterTeethIE, **self._extract_video_info(episode))
+
+ series_data = self._download_json(
+ f'{self._API_BASE_URL}/shows/{series_id}/seasons?order=asc&order_by', display_id)
+ for season_data in traverse_obj(series_data, ('data', lambda _, v: v['links']['episodes'])):
+ idx = traverse_obj(season_data, ('attributes', 'number'))
+ if season_number is not None and idx != season_number:
+ continue
+ yield from yield_episodes(self._download_json(
+ urljoin(self._API_BASE, season_data['links']['episodes']), display_id,
+ f'Downloading season {idx} JSON metadata', query={'per_page': 1000}))
+
+ if season_number is None: # extract series-level bonus features
+ yield from yield_episodes(self._download_json(
+ f'{self._API_BASE_URL}/shows/{series_id}/bonus_features?order=asc&order_by&per_page=1000',
+ display_id, 'Downloading bonus features JSON metadata', fatal=False))
+
+ def _real_extract(self, url):
+ series_id = self._match_id(url)
+ season_number = traverse_obj(parse_qs(url), ('season', 0), expected_type=int_or_none)
+
+ entries = LazyList(self._entries(series_id, season_number))
+ return self.playlist_result(
+ entries,
+ join_nonempty(series_id, season_number),
+ join_nonempty(entries[0].get('series'), season_number, delim=' - Season '))
diff --git a/yt_dlp/extractor/rottentomatoes.py b/yt_dlp/extractor/rottentomatoes.py
new file mode 100644
index 0000000..e357175
--- /dev/null
+++ b/yt_dlp/extractor/rottentomatoes.py
@@ -0,0 +1,80 @@
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ clean_html,
+ float_or_none,
+ get_element_by_class,
+ join_nonempty,
+ traverse_obj,
+ url_or_none,
+)
+
+
+class RottenTomatoesIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?rottentomatoes\.com/m/(?P<playlist>[^/]+)(?:/(?P<tr>trailers)(?:/(?P<id>\w+))?)?'
+
+ _TESTS = [{
+ 'url': 'http://www.rottentomatoes.com/m/toy_story_3/trailers/11028566/',
+ 'info_dict': {
+ 'id': '11028566',
+ 'ext': 'mp4',
+ 'title': 'Toy Story 3',
+ 'description': 'From the creators of the beloved TOY STORY films, comes a story that will reunite the gang in a whole new way.'
+ },
+ 'skip': 'No longer available',
+ }, {
+ 'url': 'https://www.rottentomatoes.com/m/toy_story_3/trailers/VycaVoBKhGuk',
+ 'info_dict': {
+ 'id': 'VycaVoBKhGuk',
+ 'ext': 'mp4',
+ 'title': 'Toy Story 3: Trailer 2',
+ 'description': '',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 149.941
+ },
+ }, {
+ 'url': 'http://www.rottentomatoes.com/m/toy_story_3',
+ 'info_dict': {
+ 'id': 'toy_story_3',
+ 'title': 'Toy Story 3',
+ },
+ 'playlist_mincount': 4,
+ }, {
+ 'url': 'http://www.rottentomatoes.com/m/toy_story_3/trailers',
+ 'info_dict': {
+ 'id': 'toy_story_3-trailers',
+ },
+ 'playlist_mincount': 5,
+ }]
+
+ def _extract_videos(self, data, display_id):
+ for video in traverse_obj(data, (lambda _, v: v['publicId'] and v['file'] and v['type'] == 'hls')):
+ yield {
+ 'formats': self._extract_m3u8_formats(
+ video['file'], display_id, 'mp4', m3u8_id='hls', fatal=False),
+ **traverse_obj(video, {
+ 'id': 'publicId',
+ 'title': 'title',
+ 'description': 'description',
+ 'duration': ('durationInSeconds', {float_or_none}),
+ 'thumbnail': ('image', {url_or_none}),
+ }),
+ }
+
+ def _real_extract(self, url):
+ playlist_id, trailers, video_id = self._match_valid_url(url).group('playlist', 'tr', 'id')
+ playlist_id = join_nonempty(playlist_id, trailers)
+ webpage = self._download_webpage(url, playlist_id)
+ data = self._search_json(
+ r'<script[^>]+\bid=["\'](?:heroV|v)ideos["\'][^>]*>', webpage,
+ 'data', playlist_id, contains_pattern=r'\[{(?s:.+)}\]')
+
+ if video_id:
+ video_data = traverse_obj(data, lambda _, v: v['publicId'] == video_id)
+ if not video_data:
+ raise ExtractorError('Unable to extract video from webpage')
+ return next(self._extract_videos(video_data, video_id))
+
+ return self.playlist_result(
+ self._extract_videos(data, playlist_id), playlist_id,
+ clean_html(get_element_by_class('scoreboard__title', webpage)))
diff --git a/yt_dlp/extractor/rozhlas.py b/yt_dlp/extractor/rozhlas.py
new file mode 100644
index 0000000..411a625
--- /dev/null
+++ b/yt_dlp/extractor/rozhlas.py
@@ -0,0 +1,363 @@
+import itertools
+
+from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ ExtractorError,
+ extract_attributes,
+ int_or_none,
+ remove_start,
+ str_or_none,
+ traverse_obj,
+ unified_timestamp,
+ url_or_none,
+)
+
+
+class RozhlasIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?prehravac\.rozhlas\.cz/audio/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'http://prehravac.rozhlas.cz/audio/3421320',
+ 'md5': '504c902dbc9e9a1fd50326eccf02a7e2',
+ 'info_dict': {
+ 'id': '3421320',
+ 'ext': 'mp3',
+ 'title': 'Echo Pavla Klusáka (30.06.2015 21:00)',
+ 'description': 'Osmdesátiny Terryho Rileyho jsou skvělou příležitostí proletět se elektronickými i akustickými díly zakladatatele minimalismu, který je aktivní už přes padesát let'
+ }
+ }, {
+ 'url': 'http://prehravac.rozhlas.cz/audio/3421320/embed',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ audio_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'http://prehravac.rozhlas.cz/audio/%s' % audio_id, audio_id)
+
+ title = self._html_search_regex(
+ r'<h3>(.+?)</h3>\s*<p[^>]*>.*?</p>\s*<div[^>]+id=["\']player-track',
+ webpage, 'title', default=None) or remove_start(
+ self._og_search_title(webpage), 'Radio Wave - ')
+ description = self._html_search_regex(
+ r'<p[^>]+title=(["\'])(?P<url>(?:(?!\1).)+)\1[^>]*>.*?</p>\s*<div[^>]+id=["\']player-track',
+ webpage, 'description', fatal=False, group='url')
+ duration = int_or_none(self._search_regex(
+ r'data-duration=["\'](\d+)', webpage, 'duration', default=None))
+
+ return {
+ 'id': audio_id,
+ 'url': 'http://media.rozhlas.cz/_audio/%s.mp3' % audio_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'vcodec': 'none',
+ }
+
+
+class RozhlasBaseIE(InfoExtractor):
+ def _extract_formats(self, entry, audio_id):
+ formats = []
+ for audio in traverse_obj(entry, ('audioLinks', lambda _, v: url_or_none(v['url']))):
+ ext = audio.get('variant')
+ for retry in self.RetryManager():
+ if retry.attempt > 1:
+ self._sleep(1, audio_id)
+ try:
+ if ext == 'dash':
+ formats.extend(self._extract_mpd_formats(
+ audio['url'], audio_id, mpd_id=ext))
+ elif ext == 'hls':
+ formats.extend(self._extract_m3u8_formats(
+ audio['url'], audio_id, 'm4a', m3u8_id=ext))
+ else:
+ formats.append({
+ 'url': audio['url'],
+ 'ext': ext,
+ 'format_id': ext,
+ 'abr': int_or_none(audio.get('bitrate')),
+ 'acodec': ext,
+ 'vcodec': 'none',
+ })
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 429:
+ retry.error = e.cause
+ else:
+ self.report_warning(e.msg)
+
+ return formats
+
+
+class RozhlasVltavaIE(RozhlasBaseIE):
+ _VALID_URL = r'https?://(?:\w+\.rozhlas|english\.radio)\.cz/[\w-]+-(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://wave.rozhlas.cz/papej-masicko-porcujeme-a-bilancujeme-filmy-a-serialy-ktere-letos-zabily-8891337',
+ 'md5': 'ba2fdbc1242fc16771c7695d271ec355',
+ 'info_dict': {
+ 'id': '8891337',
+ 'title': 'md5:21f99739d04ab49d8c189ec711eef4ec',
+ },
+ 'playlist_count': 1,
+ 'playlist': [{
+ 'md5': 'ba2fdbc1242fc16771c7695d271ec355',
+ 'info_dict': {
+ 'id': '10520988',
+ 'ext': 'mp3',
+ 'title': 'Papej masíčko! Porcujeme a bilancujeme filmy a seriály, které to letos zabily',
+ 'description': 'md5:1c6d29fb9564e1f17fc1bb83ae7da0bc',
+ 'duration': 1574,
+ 'artist': 'Aleš Stuchlý',
+ 'channel_id': 'radio-wave',
+ },
+ }]
+ }, {
+ 'url': 'https://wave.rozhlas.cz/poslechnete-si-neklid-podcastovy-thriller-o-vine-strachu-a-vztahu-ktery-zasel-8554744',
+ 'info_dict': {
+ 'id': '8554744',
+ 'title': 'Poslechněte si Neklid. Podcastový thriller o vině, strachu a vztahu, který zašel příliš daleko',
+ },
+ 'playlist_count': 5,
+ 'playlist': [{
+ 'md5': '93d4109cf8f40523699ae9c1d4600bdd',
+ 'info_dict': {
+ 'id': '9890713',
+ 'ext': 'mp3',
+ 'title': 'Neklid #1',
+ 'description': '1. díl: Neklid: 1. díl',
+ 'duration': 1025,
+ 'artist': 'Josef Kokta',
+ 'channel_id': 'radio-wave',
+ 'chapter': 'Neklid #1',
+ 'chapter_number': 1,
+ },
+ }, {
+ 'md5': 'e9763235be4a6dcf94bc8a5bac1ca126',
+ 'info_dict': {
+ 'id': '9890716',
+ 'ext': 'mp3',
+ 'title': 'Neklid #2',
+ 'description': '2. díl: Neklid: 2. díl',
+ 'duration': 768,
+ 'artist': 'Josef Kokta',
+ 'channel_id': 'radio-wave',
+ 'chapter': 'Neklid #2',
+ 'chapter_number': 2,
+ },
+ }, {
+ 'md5': '00b642ea94b78cc949ac84da09f87895',
+ 'info_dict': {
+ 'id': '9890722',
+ 'ext': 'mp3',
+ 'title': 'Neklid #3',
+ 'description': '3. díl: Neklid: 3. díl',
+ 'duration': 607,
+ 'artist': 'Josef Kokta',
+ 'channel_id': 'radio-wave',
+ 'chapter': 'Neklid #3',
+ 'chapter_number': 3,
+ },
+ }, {
+ 'md5': 'faef97b1b49da7df874740f118c19dea',
+ 'info_dict': {
+ 'id': '9890728',
+ 'ext': 'mp3',
+ 'title': 'Neklid #4',
+ 'description': '4. díl: Neklid: 4. díl',
+ 'duration': 621,
+ 'artist': 'Josef Kokta',
+ 'channel_id': 'radio-wave',
+ 'chapter': 'Neklid #4',
+ 'chapter_number': 4,
+ },
+ }, {
+ 'md5': '6e729fa39b647325b868d419c76f3efa',
+ 'info_dict': {
+ 'id': '9890734',
+ 'ext': 'mp3',
+ 'title': 'Neklid #5',
+ 'description': '5. díl: Neklid: 5. díl',
+ 'duration': 908,
+ 'artist': 'Josef Kokta',
+ 'channel_id': 'radio-wave',
+ 'chapter': 'Neklid #5',
+ 'chapter_number': 5,
+ },
+ }]
+ }, {
+ 'url': 'https://dvojka.rozhlas.cz/karel-siktanc-cerny-jezdec-bily-kun-napinava-pohadka-o-tajemnem-prizraku-8946969',
+ 'info_dict': {
+ 'id': '8946969',
+ 'title': 'Karel Šiktanc: Černý jezdec, bílý kůň. Napínavá pohádka o tajemném přízraku',
+ },
+ 'playlist_count': 1,
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '10631121',
+ 'ext': 'm4a',
+ 'title': 'Karel Šiktanc: Černý jezdec, bílý kůň. Napínavá pohádka o tajemném přízraku',
+ 'description': 'Karel Šiktanc: Černý jezdec, bílý kůň',
+ 'duration': 2656,
+ 'artist': 'Tvůrčí skupina Drama a literatura',
+ 'channel_id': 'dvojka',
+ },
+ }],
+ 'params': {'skip_download': 'dash'},
+ }]
+
+ def _extract_video(self, entry):
+ audio_id = entry['meta']['ga']['contentId']
+ chapter_number = traverse_obj(entry, ('meta', 'ga', 'contentSerialPart', {int_or_none}))
+
+ return {
+ 'id': audio_id,
+ 'chapter': traverse_obj(entry, ('meta', 'ga', 'contentNameShort')) if chapter_number else None,
+ 'chapter_number': chapter_number,
+ 'formats': self._extract_formats(entry, audio_id),
+ **traverse_obj(entry, {
+ 'title': ('meta', 'ga', 'contentName'),
+ 'description': 'title',
+ 'duration': ('duration', {int_or_none}),
+ 'artist': ('meta', 'ga', 'contentAuthor'),
+ 'channel_id': ('meta', 'ga', 'contentCreator'),
+ })
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ # FIXME: Use get_element_text_and_html_by_tag when it accepts less strict html
+ data = self._parse_json(extract_attributes(self._search_regex(
+ r'(<div class="mujRozhlasPlayer" data-player=\'[^\']+\'>)',
+ webpage, 'player'))['data-player'], video_id)['data']
+
+ return {
+ '_type': 'playlist',
+ 'id': str_or_none(data.get('embedId')) or video_id,
+ 'title': traverse_obj(data, ('series', 'title')),
+ 'entries': map(self._extract_video, data['playlist']),
+ }
+
+
+class MujRozhlasIE(RozhlasBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?mujrozhlas\.cz/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ # single episode extraction
+ 'url': 'https://www.mujrozhlas.cz/vykopavky/ach-jo-zase-teleci-rizek-je-mnohem-min-cesky-nez-jsme-si-mysleli',
+ 'md5': '6f8fd68663e64936623e67c152a669e0',
+ 'info_dict': {
+ 'id': '10787730',
+ 'ext': 'mp3',
+ 'title': 'Ach jo, zase to telecí! Řízek je mnohem míň český, než jsme si mysleli',
+ 'description': 'md5:db7141e9caaedc9041ec7cefb9a62908',
+ 'timestamp': 1684915200,
+ 'modified_timestamp': 1687550432,
+ 'series': 'Vykopávky',
+ 'thumbnail': 'https://portal.rozhlas.cz/sites/default/files/images/84377046610af6ddc54d910b1dd7a22b.jpg',
+ 'channel_id': 'radio-wave',
+ 'upload_date': '20230524',
+ 'modified_date': '20230623',
+ },
+ }, {
+ # serial extraction
+ 'url': 'https://www.mujrozhlas.cz/radiokniha/jaroslava-janackova-pribeh-tajemneho-psani-o-pramenech-genezi-babicky',
+ 'playlist_mincount': 7,
+ 'info_dict': {
+ 'id': 'bb2b5f4e-ffb4-35a6-a34a-046aa62d6f6b',
+ 'title': 'Jaroslava Janáčková: Příběh tajemného psaní. O pramenech a genezi Babičky',
+ 'description': 'md5:7434d8fac39ac9fee6df098e11dfb1be',
+ },
+ }, {
+ # show extraction
+ 'url': 'https://www.mujrozhlas.cz/nespavci',
+ 'playlist_mincount': 14,
+ 'info_dict': {
+ 'id': '09db9b37-d0f4-368c-986a-d3439f741f08',
+ 'title': 'Nespavci',
+ 'description': 'md5:c430adcbf9e2b9eac88b745881e814dc',
+ },
+ }, {
+ # serialPart
+ 'url': 'https://www.mujrozhlas.cz/povidka/gustavo-adolfo-becquer-hora-duchu',
+ 'info_dict': {
+ 'id': '8889035',
+ 'ext': 'm4a',
+ 'title': 'Gustavo Adolfo Bécquer: Hora duchů',
+ 'description': 'md5:343a15257b376c276e210b78e900ffea',
+ 'chapter': 'Hora duchů a Polibek – dva tajemné příběhy Gustava Adolfa Bécquera',
+ 'thumbnail': 'https://portal.rozhlas.cz/sites/default/files/images/2adfe1387fb140634be725c1ccf26214.jpg',
+ 'timestamp': 1708173000,
+ 'episode': 'Episode 1',
+ 'episode_number': 1,
+ 'series': 'Povídka',
+ 'modified_date': '20240217',
+ 'upload_date': '20240217',
+ 'modified_timestamp': 1708173198,
+ 'channel_id': 'vltava',
+ },
+ 'params': {'skip_download': 'dash'},
+ }]
+
+ def _call_api(self, path, item_id, msg='API JSON'):
+ return self._download_json(
+ f'https://api.mujrozhlas.cz/{path}/{item_id}', item_id,
+ note=f'Downloading {msg}', errnote=f'Failed to download {msg}')['data']
+
+ def _extract_audio_entry(self, entry):
+ audio_id = entry['meta']['ga']['contentId']
+
+ return {
+ 'id': audio_id,
+ 'formats': self._extract_formats(entry['attributes'], audio_id),
+ **traverse_obj(entry, {
+ 'title': ('attributes', 'title'),
+ 'description': ('attributes', 'description'),
+ 'episode_number': ('attributes', 'part'),
+ 'series': ('attributes', 'mirroredShow', 'title'),
+ 'chapter': ('attributes', 'mirroredSerial', 'title'),
+ 'artist': ('meta', 'ga', 'contentAuthor'),
+ 'channel_id': ('meta', 'ga', 'contentCreator'),
+ 'timestamp': ('attributes', 'since', {unified_timestamp}),
+ 'modified_timestamp': ('attributes', 'updated', {unified_timestamp}),
+ 'thumbnail': ('attributes', 'asset', 'url', {url_or_none}),
+ })
+ }
+
+ def _entries(self, api_url, playlist_id):
+ for page in itertools.count(1):
+ episodes = self._download_json(
+ api_url, playlist_id, note=f'Downloading episodes page {page}',
+ errnote=f'Failed to download episodes page {page}', fatal=False)
+ for episode in traverse_obj(episodes, ('data', lambda _, v: v['meta']['ga']['contentId'])):
+ yield self._extract_audio_entry(episode)
+ api_url = traverse_obj(episodes, ('links', 'next', {url_or_none}))
+ if not api_url:
+ break
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ info = self._search_json(r'\bvar\s+dl\s*=', webpage, 'info json', display_id)
+
+ entity = info['siteEntityBundle']
+
+ if entity in ('episode', 'serialPart'):
+ return self._extract_audio_entry(self._call_api(
+ 'episodes', info['contentId'], 'episode info API JSON'))
+
+ elif entity in ('show', 'serial'):
+ playlist_id = info['contentShow'].split(':')[0] if entity == 'show' else info['contentId']
+ data = self._call_api(f'{entity}s', playlist_id, f'{entity} playlist JSON')
+ api_url = data['relationships']['episodes']['links']['related']
+ return self.playlist_result(
+ self._entries(api_url, playlist_id), playlist_id,
+ **traverse_obj(data, ('attributes', {
+ 'title': 'title',
+ 'description': 'description',
+ })))
+
+ else:
+ # `entity == 'person'` not implemented yet by API, ref:
+ # https://api.mujrozhlas.cz/persons/8367e456-2a57-379a-91bb-e699619bea49/participation
+ raise ExtractorError(f'Unsupported entity type "{entity}"')
diff --git a/yt_dlp/extractor/rte.py b/yt_dlp/extractor/rte.py
new file mode 100644
index 0000000..7ba80d4
--- /dev/null
+++ b/yt_dlp/extractor/rte.py
@@ -0,0 +1,162 @@
+import re
+
+from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ float_or_none,
+ parse_iso8601,
+ str_or_none,
+ try_get,
+ unescapeHTML,
+ url_or_none,
+ ExtractorError,
+)
+
+
+class RteBaseIE(InfoExtractor):
+ def _real_extract(self, url):
+ item_id = self._match_id(url)
+
+ info_dict = {}
+ formats = []
+
+ ENDPOINTS = (
+ 'https://feeds.rasset.ie/rteavgen/player/playlist?type=iptv&format=json&showId=',
+ 'http://www.rte.ie/rteavgen/getplaylist/?type=web&format=json&id=',
+ )
+
+ for num, ep_url in enumerate(ENDPOINTS, start=1):
+ try:
+ data = self._download_json(ep_url + item_id, item_id)
+ except ExtractorError as ee:
+ if num < len(ENDPOINTS) or formats:
+ continue
+ if isinstance(ee.cause, HTTPError) and ee.cause.status == 404:
+ error_info = self._parse_json(ee.cause.response.read().decode(), item_id, fatal=False)
+ if error_info:
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, error_info['message']),
+ expected=True)
+ raise
+
+ # NB the string values in the JSON are stored using XML escaping(!)
+ show = try_get(data, lambda x: x['shows'][0], dict)
+ if not show:
+ continue
+
+ if not info_dict:
+ title = unescapeHTML(show['title'])
+ description = unescapeHTML(show.get('description'))
+ thumbnail = show.get('thumbnail')
+ duration = float_or_none(show.get('duration'), 1000)
+ timestamp = parse_iso8601(show.get('published'))
+ info_dict = {
+ 'id': item_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ }
+
+ mg = try_get(show, lambda x: x['media:group'][0], dict)
+ if not mg:
+ continue
+
+ if mg.get('url'):
+ m = re.match(r'(?P<url>rtmpe?://[^/]+)/(?P<app>.+)/(?P<playpath>mp4:.*)', mg['url'])
+ if m:
+ m = m.groupdict()
+ formats.append({
+ 'url': m['url'] + '/' + m['app'],
+ 'app': m['app'],
+ 'play_path': m['playpath'],
+ 'player_url': url,
+ 'ext': 'flv',
+ 'format_id': 'rtmp',
+ })
+
+ if mg.get('hls_server') and mg.get('hls_url'):
+ formats.extend(self._extract_m3u8_formats(
+ mg['hls_server'] + mg['hls_url'], item_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
+
+ if mg.get('hds_server') and mg.get('hds_url'):
+ formats.extend(self._extract_f4m_formats(
+ mg['hds_server'] + mg['hds_url'], item_id,
+ f4m_id='hds', fatal=False))
+
+ mg_rte_server = str_or_none(mg.get('rte:server'))
+ mg_url = str_or_none(mg.get('url'))
+ if mg_rte_server and mg_url:
+ hds_url = url_or_none(mg_rte_server + mg_url)
+ if hds_url:
+ formats.extend(self._extract_f4m_formats(
+ hds_url, item_id, f4m_id='hds', fatal=False))
+
+ info_dict['formats'] = formats
+ return info_dict
+
+
+class RteIE(RteBaseIE):
+ IE_NAME = 'rte'
+ IE_DESC = 'Raidió Teilifís Éireann TV'
+ _VALID_URL = r'https?://(?:www\.)?rte\.ie/player/[^/]{2,3}/show/[^/]+/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://www.rte.ie/player/ie/show/iwitness-862/10478715/',
+ 'md5': '4a76eb3396d98f697e6e8110563d2604',
+ 'info_dict': {
+ 'id': '10478715',
+ 'ext': 'mp4',
+ 'title': 'iWitness',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'The spirit of Ireland, one voice and one minute at a time.',
+ 'duration': 60.046,
+ 'upload_date': '20151012',
+ 'timestamp': 1444694160,
+ },
+ }
+
+
+class RteRadioIE(RteBaseIE):
+ IE_NAME = 'rte:radio'
+ IE_DESC = 'Raidió Teilifís Éireann radio'
+ # Radioplayer URLs have two distinct specifier formats,
+ # the old format #!rii=<channel_id>:<id>:<playable_item_id>:<date>:
+ # the new format #!rii=b<channel_id>_<id>_<playable_item_id>_<date>_
+ # where the IDs are int/empty, the date is DD-MM-YYYY, and the specifier may be truncated.
+ # An <id> uniquely defines an individual recording, and is the only part we require.
+ _VALID_URL = r'https?://(?:www\.)?rte\.ie/radio/utils/radioplayer/rteradioweb\.html#!rii=(?:b?[0-9]*)(?:%3A|:|%5F|_)(?P<id>[0-9]+)'
+
+ _TESTS = [{
+ # Old-style player URL; HLS and RTMPE formats
+ 'url': 'http://www.rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=16:10507902:2414:27-12-2015:',
+ 'md5': 'c79ccb2c195998440065456b69760411',
+ 'info_dict': {
+ 'id': '10507902',
+ 'ext': 'mp4',
+ 'title': 'Gloria',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'md5:9ce124a7fb41559ec68f06387cabddf0',
+ 'timestamp': 1451203200,
+ 'upload_date': '20151227',
+ 'duration': 7230.0,
+ },
+ }, {
+ # New-style player URL; RTMPE formats only
+ 'url': 'http://rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=b16_3250678_8861_06-04-2012_',
+ 'info_dict': {
+ 'id': '3250678',
+ 'ext': 'flv',
+ 'title': 'The Lyric Concert with Paul Herriott',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': '',
+ 'timestamp': 1333742400,
+ 'upload_date': '20120406',
+ 'duration': 7199.016,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ }]
diff --git a/yt_dlp/extractor/rtl2.py b/yt_dlp/extractor/rtl2.py
new file mode 100644
index 0000000..07e1aa3
--- /dev/null
+++ b/yt_dlp/extractor/rtl2.py
@@ -0,0 +1,95 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class RTL2IE(InfoExtractor):
+ IE_NAME = 'rtl2'
+ _VALID_URL = r'https?://(?:www\.)?rtl2\.de/sendung/[^/]+/(?:video/(?P<vico_id>\d+)[^/]+/(?P<vivi_id>\d+)-|folge/)(?P<id>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'http://www.rtl2.de/sendung/grip-das-motormagazin/folge/folge-203-0',
+ 'info_dict': {
+ 'id': 'folge-203-0',
+ 'ext': 'f4v',
+ 'title': 'GRIP sucht den Sommerkönig',
+ 'description': 'md5:e3adbb940fd3c6e76fa341b8748b562f'
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'],
+ }, {
+ 'url': 'http://www.rtl2.de/sendung/koeln-50667/video/5512-anna/21040-anna-erwischt-alex/',
+ 'info_dict': {
+ 'id': 'anna-erwischt-alex',
+ 'ext': 'mp4',
+ 'title': 'Anna erwischt Alex!',
+ 'description': 'Anna nimmt ihrem Vater nicht ab, dass er nicht spielt. Und tatsächlich erwischt sie ihn auf frischer Tat.'
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'],
+ }]
+
+ def _real_extract(self, url):
+ vico_id, vivi_id, display_id = self._match_valid_url(url).groups()
+ if not vico_id:
+ webpage = self._download_webpage(url, display_id)
+
+ mobj = re.search(
+ r'data-collection="(?P<vico_id>\d+)"[^>]+data-video="(?P<vivi_id>\d+)"',
+ webpage)
+ if mobj:
+ vico_id = mobj.group('vico_id')
+ vivi_id = mobj.group('vivi_id')
+ else:
+ vico_id = self._html_search_regex(
+ r'vico_id\s*:\s*([0-9]+)', webpage, 'vico_id')
+ vivi_id = self._html_search_regex(
+ r'vivi_id\s*:\s*([0-9]+)', webpage, 'vivi_id')
+
+ info = self._download_json(
+ 'https://service.rtl2.de/api-player-vipo/video.php',
+ display_id, query={
+ 'vico_id': vico_id,
+ 'vivi_id': vivi_id,
+ })
+ video_info = info['video']
+ title = video_info['titel']
+
+ formats = []
+
+ rtmp_url = video_info.get('streamurl')
+ if rtmp_url:
+ rtmp_url = rtmp_url.replace('\\', '')
+ stream_url = 'mp4:' + self._html_search_regex(r'/ondemand/(.+)', rtmp_url, 'stream URL')
+ rtmp_conn = ['S:connect', 'O:1', 'NS:pageUrl:' + url, 'NB:fpad:0', 'NN:videoFunction:1', 'O:0']
+
+ formats.append({
+ 'format_id': 'rtmp',
+ 'url': rtmp_url,
+ 'play_path': stream_url,
+ 'player_url': 'https://www.rtl2.de/sites/default/modules/rtl2/jwplayer/jwplayer-7.6.0/jwplayer.flash.swf',
+ 'page_url': url,
+ 'flash_version': 'LNX 11,2,202,429',
+ 'rtmp_conn': rtmp_conn,
+ 'no_resume': True,
+ 'quality': 1,
+ })
+
+ m3u8_url = video_info.get('streamurl_hls')
+ if m3u8_url:
+ formats.extend(self._extract_akamai_formats(m3u8_url, display_id))
+
+ return {
+ 'id': display_id,
+ 'title': title,
+ 'thumbnail': video_info.get('image'),
+ 'description': video_info.get('beschreibung'),
+ 'duration': int_or_none(video_info.get('duration')),
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/rtlnl.py b/yt_dlp/extractor/rtlnl.py
new file mode 100644
index 0000000..724cb64
--- /dev/null
+++ b/yt_dlp/extractor/rtlnl.py
@@ -0,0 +1,294 @@
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_duration,
+)
+
+
+class RtlNlIE(InfoExtractor):
+ IE_NAME = 'rtl.nl'
+ IE_DESC = 'rtl.nl and rtlxl.nl'
+ _EMBED_REGEX = [r'<iframe[^>]+?\bsrc=(?P<q1>[\'"])(?P<url>(?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)(?P=q1)']
+ _VALID_URL = r'''(?x)
+ https?://(?:(?:www|static)\.)?
+ (?:
+ rtlxl\.nl/(?:[^\#]*\#!|programma)/[^/]+/|
+ rtl\.nl/(?:(?:system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html|embed)\b.+?\buuid=|video/)|
+ embed\.rtl\.nl/\#uuid=
+ )
+ (?P<id>[0-9a-f-]+)'''
+
+ _TESTS = [{
+ # new URL schema
+ 'url': 'https://www.rtlxl.nl/programma/rtl-nieuws/0bd1384d-d970-3086-98bb-5c104e10c26f',
+ 'md5': '490428f1187b60d714f34e1f2e3af0b6',
+ 'info_dict': {
+ 'id': '0bd1384d-d970-3086-98bb-5c104e10c26f',
+ 'ext': 'mp4',
+ 'title': 'RTL Nieuws',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'timestamp': 1593293400,
+ 'upload_date': '20200627',
+ 'duration': 661.08,
+ },
+ }, {
+ # old URL schema
+ 'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/82b1aad1-4a14-3d7b-b554-b0aed1b2c416',
+ 'md5': '473d1946c1fdd050b2c0161a4b13c373',
+ 'info_dict': {
+ 'id': '82b1aad1-4a14-3d7b-b554-b0aed1b2c416',
+ 'ext': 'mp4',
+ 'title': 'RTL Nieuws',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'timestamp': 1461951000,
+ 'upload_date': '20160429',
+ 'duration': 1167.96,
+ },
+ 'skip': '404',
+ }, {
+ # best format available a3t
+ 'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false',
+ 'md5': 'dea7474214af1271d91ef332fb8be7ea',
+ 'info_dict': {
+ 'id': '84ae5571-ac25-4225-ae0c-ef8d9efb2aed',
+ 'ext': 'mp4',
+ 'timestamp': 1424039400,
+ 'title': 'RTL Nieuws - Nieuwe beelden Kopenhagen: chaos direct na aanslag',
+ 'thumbnail': r're:^https?://screenshots\.rtl\.nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed$',
+ 'upload_date': '20150215',
+ 'description': 'Er zijn nieuwe beelden vrijgegeven die vlak na de aanslag in Kopenhagen zijn gemaakt. Op de video is goed te zien hoe omstanders zich bekommeren om één van de slachtoffers, terwijl de eerste agenten ter plaatse komen.',
+ }
+ }, {
+ # empty synopsis and missing episodes (see https://github.com/ytdl-org/youtube-dl/issues/6275)
+ # best format available nettv
+ 'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a/autoplay=false',
+ 'info_dict': {
+ 'id': 'f536aac0-1dc3-4314-920e-3bd1c5b3811a',
+ 'ext': 'mp4',
+ 'title': 'RTL Nieuws - Meer beelden van overval juwelier',
+ 'thumbnail': r're:^https?://screenshots\.rtl\.nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a$',
+ 'timestamp': 1437233400,
+ 'upload_date': '20150718',
+ 'duration': 30.474,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # encrypted m3u8 streams, georestricted
+ 'url': 'http://www.rtlxl.nl/#!/afl-2-257632/52a74543-c504-4cde-8aa8-ec66fe8d68a7',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.rtl.nl/system/videoplayer/derden/embed.html#!/uuid=bb0353b0-d6a4-1dad-90e9-18fe75b8d1f0',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://rtlxl.nl/?_ga=1.204735956.572365465.1466978370#!/rtl-nieuws-132237/3c487912-023b-49ac-903e-2c5d79f8410f',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.rtl.nl/video/c603c9c2-601d-4b5e-8175-64f1e942dc7d/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://static.rtl.nl/embed/?uuid=1a2970fc-5c0b-43ff-9fdc-927e39e6d1bc&autoplay=false&publicatiepunt=rtlnieuwsnl',
+ 'only_matching': True,
+ }, {
+ # new embed URL schema
+ 'url': 'https://embed.rtl.nl/#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ uuid = self._match_id(url)
+ info = self._download_json(
+ 'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=adaptive/' % uuid,
+ uuid)
+
+ material = info['material'][0]
+ title = info['abstracts'][0]['name']
+ subtitle = material.get('title')
+ if subtitle:
+ title += ' - %s' % subtitle
+ description = material.get('synopsis')
+
+ meta = info.get('meta', {})
+
+ videopath = material['videopath']
+ m3u8_url = meta.get('videohost', 'http://manifest.us.rtl.nl') + videopath
+
+ formats = self._extract_m3u8_formats(
+ m3u8_url, uuid, 'mp4', m3u8_id='hls', fatal=False)
+
+ thumbnails = []
+
+ for p in ('poster_base_url', '"thumb_base_url"'):
+ if not meta.get(p):
+ continue
+
+ thumbnails.append({
+ 'url': self._proto_relative_url(meta[p] + uuid),
+ 'width': int_or_none(self._search_regex(
+ r'/sz=([0-9]+)', meta[p], 'thumbnail width', fatal=False)),
+ 'height': int_or_none(self._search_regex(
+ r'/sz=[0-9]+x([0-9]+)',
+ meta[p], 'thumbnail height', fatal=False))
+ })
+
+ return {
+ 'id': uuid,
+ 'title': title,
+ 'formats': formats,
+ 'timestamp': material['original_date'],
+ 'description': description,
+ 'duration': parse_duration(material.get('duration')),
+ 'thumbnails': thumbnails,
+ }
+
+
+class RTLLuBaseIE(InfoExtractor):
+ _MEDIA_REGEX = {
+ 'video': r'<rtl-player\s[^>]*\bhls\s*=\s*"([^"]+)',
+ 'audio': r'<rtl-audioplayer\s[^>]*\bsrc\s*=\s*"([^"]+)',
+ 'thumbnail': r'<rtl-player\s[^>]*\bposter\s*=\s*"([^"]+)',
+ }
+
+ def get_media_url(self, webpage, video_id, media_type):
+ return self._search_regex(self._MEDIA_REGEX[media_type], webpage, f'{media_type} url', default=None)
+
+ def get_formats_and_subtitles(self, webpage, video_id):
+ video_url, audio_url = self.get_media_url(webpage, video_id, 'video'), self.get_media_url(webpage, video_id, 'audio')
+
+ formats, subtitles = [], {}
+ if video_url is not None:
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_url, video_id)
+ if audio_url is not None:
+ formats.append({'url': audio_url, 'ext': 'mp3', 'vcodec': 'none'})
+
+ return formats, subtitles
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ is_live = video_id in ('live', 'live-2', 'lauschteren')
+
+ # TODO: extract comment from https://www.rtl.lu/comments?status=1&order=desc&context=news|article|<video_id>
+ # we can context from <rtl-comments context=<context> in webpage
+ webpage = self._download_webpage(url, video_id)
+
+ formats, subtitles = self.get_formats_and_subtitles(webpage, video_id)
+
+ return {
+ 'id': video_id,
+ 'title': self._og_search_title(webpage),
+ 'description': self._og_search_description(webpage, default=None),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnail': self.get_media_url(webpage, video_id, 'thumbnail') or self._og_search_thumbnail(webpage, default=None),
+ 'is_live': is_live,
+ }
+
+
+class RTLLuTeleVODIE(RTLLuBaseIE):
+ IE_NAME = 'rtl.lu:tele-vod'
+ _VALID_URL = r'https?://(?:www\.)?rtl\.lu/(tele/(?P<slug>[\w-]+)/v/|video/)(?P<id>\d+)(\.html)?'
+ _TESTS = [{
+ 'url': 'https://www.rtl.lu/tele/de-journal-vun-der-tele/v/3266757.html',
+ 'info_dict': {
+ 'id': '3266757',
+ 'title': 'Informatiounsversammlung Héichwaasser',
+ 'ext': 'mp4',
+ 'thumbnail': 'https://replay-assets.rtl.lu/2021/11/16/d3647fc4-470d-11ec-adc2-3a00abd6e90f_00008.jpg',
+ 'description': 'md5:b1db974408cc858c9fd241812e4a2a14',
+ }
+ }, {
+ 'url': 'https://www.rtl.lu/video/3295215',
+ 'info_dict': {
+ 'id': '3295215',
+ 'title': 'Kulturassisen iwwer d\'Bestandsopnam vum Lëtzebuerger Konscht',
+ 'ext': 'mp4',
+ 'thumbnail': 'https://replay-assets.rtl.lu/2022/06/28/0000_3295215_0000.jpg',
+ 'description': 'md5:85bcd4e0490aa6ec969d9bf16927437b',
+ }
+ }]
+
+
+class RTLLuArticleIE(RTLLuBaseIE):
+ IE_NAME = 'rtl.lu:article'
+ _VALID_URL = r'https?://(?:(www|5minutes|today)\.)rtl\.lu/(?:[\w-]+)/(?:[\w-]+)/a/(?P<id>\d+)\.html'
+ _TESTS = [{
+ # Audio-only
+ 'url': 'https://www.rtl.lu/sport/news/a/1934360.html',
+ 'info_dict': {
+ 'id': '1934360',
+ 'ext': 'mp3',
+ 'thumbnail': 'https://static.rtl.lu/rtl2008.lu/nt/p/2022/06/28/19/e4b37d66ddf00bab4c45617b91a5bb9b.jpeg',
+ 'description': 'md5:5eab4a2a911c1fff7efc1682a38f9ef7',
+ 'title': 'md5:40aa85f135578fbd549d3c9370321f99',
+ }
+ }, {
+ # 5minutes
+ 'url': 'https://5minutes.rtl.lu/espace-frontaliers/frontaliers-en-questions/a/1853173.html',
+ 'info_dict': {
+ 'id': '1853173',
+ 'ext': 'mp4',
+ 'description': 'md5:ac031da0740e997a5cf4633173634fee',
+ 'title': 'md5:87e17722ed21af0f24be3243f4ec0c46',
+ 'thumbnail': 'https://replay-assets.rtl.lu/2022/01/26/screenshot_20220126104933_3274749_12b249833469b0d6e4440a1dec83cdfa.jpg',
+ }
+ }, {
+ # today.lu
+ 'url': 'https://today.rtl.lu/entertainment/news/a/1936203.html',
+ 'info_dict': {
+ 'id': '1936203',
+ 'ext': 'mp4',
+ 'title': 'Once Upon A Time...zu Lëtzebuerg: The Three Witches\' Tower',
+ 'description': 'The witchy theme continues in the latest episode of Once Upon A Time...',
+ 'thumbnail': 'https://replay-assets.rtl.lu/2022/07/02/screenshot_20220702122859_3290019_412dc5185951b7f6545a4039c8be9235.jpg',
+ }
+ }]
+
+
+class RTLLuLiveIE(RTLLuBaseIE):
+ _VALID_URL = r'https?://www\.rtl\.lu/(?:tele|radio)/(?P<id>live(?:-\d+)?|lauschteren)'
+ _TESTS = [{
+ # Tele:live
+ 'url': 'https://www.rtl.lu/tele/live',
+ 'info_dict': {
+ 'id': 'live',
+ 'ext': 'mp4',
+ 'live_status': 'is_live',
+ 'title': r're:RTL - Télé LIVE \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
+ 'thumbnail': 'https://static.rtl.lu/livestream/channel1.jpg',
+ }
+ }, {
+ # Tele:live-2
+ 'url': 'https://www.rtl.lu/tele/live-2',
+ 'info_dict': {
+ 'id': 'live-2',
+ 'ext': 'mp4',
+ 'live_status': 'is_live',
+ 'title': r're:RTL - Télé LIVE \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
+ 'thumbnail': 'https://static.rtl.lu/livestream/channel2.jpg',
+ }
+ }, {
+ # Radio:lauschteren
+ 'url': 'https://www.rtl.lu/radio/lauschteren',
+ 'info_dict': {
+ 'id': 'lauschteren',
+ 'ext': 'mp4',
+ 'live_status': 'is_live',
+ 'title': r're:RTL - Radio LIVE \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
+ 'thumbnail': 'https://static.rtl.lu/livestream/rtlradiowebtv.jpg',
+ }
+ }]
+
+
+class RTLLuRadioIE(RTLLuBaseIE):
+ _VALID_URL = r'https?://www\.rtl\.lu/radio/(?:[\w-]+)/s/(?P<id>\d+)(\.html)?'
+ _TESTS = [{
+ 'url': 'https://www.rtl.lu/radio/5-vir-12/s/4033058.html',
+ 'info_dict': {
+ 'id': '4033058',
+ 'ext': 'mp3',
+ 'description': 'md5:f855a4f3e3235393ae47ed1db5d934b9',
+ 'title': '5 vir 12 - Stau um Stau',
+ 'thumbnail': 'https://static.rtl.lu/rtlg//2022/06/24/c9c19e5694a14be46a3647a3760e1f62.jpg',
+ }
+ }]
diff --git a/yt_dlp/extractor/rtnews.py b/yt_dlp/extractor/rtnews.py
new file mode 100644
index 0000000..6be9945
--- /dev/null
+++ b/yt_dlp/extractor/rtnews.py
@@ -0,0 +1,196 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import js_to_json
+
+
+class RTNewsIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?rt\.com/[^/]+/(?:[^/]+/)?(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'https://www.rt.com/sport/546301-djokovic-arrives-belgrade-crowds/',
+ 'playlist_mincount': 2,
+ 'info_dict': {
+ 'id': '546301',
+ 'title': 'Crowds gather to greet deported Djokovic as he returns to Serbia (VIDEO)',
+ 'description': 'md5:1d5bfe1a988d81fd74227cfdf93d314d',
+ 'thumbnail': 'https://cdni.rt.com/files/2022.01/article/61e587a085f540102c3386c1.png'
+ },
+ }, {
+ 'url': 'https://www.rt.com/shows/in-question/535980-plot-to-assassinate-julian-assange/',
+ 'playlist_mincount': 1,
+ 'info_dict': {
+ 'id': '535980',
+ 'title': 'The plot to assassinate Julian Assange',
+ 'description': 'md5:55279ce5e4441dc1d16e2e4a730152cd',
+ 'thumbnail': 'https://cdni.rt.com/files/2021.09/article/615226f42030274e8879b53d.png'
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '6152271d85f5400464496162',
+ 'ext': 'mp4',
+ 'title': '6152271d85f5400464496162',
+ },
+ }]
+ }]
+
+ def _entries(self, webpage):
+ video_urls = set(re.findall(r'https://cdnv\.rt\.com/.*[a-f0-9]+\.mp4', webpage))
+ for v_url in video_urls:
+ v_id = re.search(r'([a-f0-9]+)\.mp4', v_url).group(1)
+ if v_id:
+ yield {
+ 'id': v_id,
+ 'title': v_id,
+ 'url': v_url,
+ }
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ webpage = self._download_webpage(url, id)
+
+ return {
+ '_type': 'playlist',
+ 'id': id,
+ 'entries': self._entries(webpage),
+ 'title': self._og_search_title(webpage),
+ 'description': self._og_search_description(webpage),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ }
+
+
+class RTDocumentryIE(InfoExtractor):
+ _VALID_URL = r'https?://rtd\.rt\.com/(?:(?:series|shows)/[^/]+|films)/(?P<id>[^/?$&#]+)'
+
+ _TESTS = [{
+ 'url': 'https://rtd.rt.com/films/escobars-hitman/',
+ 'info_dict': {
+ 'id': 'escobars-hitman',
+ 'ext': 'mp4',
+ 'title': "Escobar's Hitman. Former drug-gang killer, now loved and loathed in Colombia",
+ 'description': 'md5:647c76984b7cb9a8b52a567e87448d88',
+ 'thumbnail': 'https://cdni.rt.com/rtd-files/films/escobars-hitman/escobars-hitman_11.jpg',
+ 'average_rating': 8.53,
+ 'duration': 3134.0
+ },
+ 'params': {'skip_download': True}
+ }, {
+ 'url': 'https://rtd.rt.com/shows/the-kalashnikova-show-military-secrets-anna-knishenko/iskander-tactical-system-natos-headache/',
+ 'info_dict': {
+ 'id': 'iskander-tactical-system-natos-headache',
+ 'ext': 'mp4',
+ 'title': "Iskander tactical system. NATO's headache | The Kalashnikova Show. Episode 10",
+ 'description': 'md5:da7c24a0aa67bc2bb88c86658508ca87',
+ 'thumbnail': 'md5:89de8ce38c710b7c501ff02d47e2aa89',
+ 'average_rating': 9.27,
+ 'duration': 274.0,
+ 'timestamp': 1605726000,
+ 'view_count': int,
+ 'upload_date': '20201118'
+ },
+ 'params': {'skip_download': True}
+ }, {
+ 'url': 'https://rtd.rt.com/series/i-am-hacked-trailer/introduction-to-safe-digital-life-ep2/',
+ 'info_dict': {
+ 'id': 'introduction-to-safe-digital-life-ep2',
+ 'ext': 'mp4',
+ 'title': 'How to Keep your Money away from Hackers | I am Hacked. Episode 2',
+ 'description': 'md5:c46fa9a5af86c0008c45a3940a8cce87',
+ 'thumbnail': 'md5:a5e81b9bf5aed8f5e23d9c053601b825',
+ 'average_rating': 10.0,
+ 'duration': 1524.0,
+ 'timestamp': 1636977600,
+ 'view_count': int,
+ 'upload_date': '20211115'
+ },
+ 'params': {'skip_download': True}
+ }]
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ webpage = self._download_webpage(url, id)
+ ld_json = self._search_json_ld(webpage, None, fatal=False)
+ if not ld_json:
+ self.raise_no_formats('No video/audio found at the provided url.', expected=True)
+ media_json = self._parse_json(
+ self._search_regex(r'(?s)\'Med\'\s*:\s*\[\s*({.+})\s*\]\s*};', webpage, 'media info'),
+ id, transform_source=js_to_json)
+ if 'title' not in ld_json and 'title' in media_json:
+ ld_json['title'] = media_json['title']
+ formats = [{'url': src['file']} for src in media_json.get('sources') or [] if src.get('file')]
+
+ return {
+ 'id': id,
+ 'thumbnail': media_json.get('image'),
+ 'formats': formats,
+ **ld_json
+ }
+
+
+class RTDocumentryPlaylistIE(InfoExtractor):
+ _VALID_URL = r'https?://rtd\.rt\.com/(?:series|shows)/(?P<id>[^/]+)/$'
+
+ _TESTS = [{
+ 'url': 'https://rtd.rt.com/series/i-am-hacked-trailer/',
+ 'playlist_mincount': 6,
+ 'info_dict': {
+ 'id': 'i-am-hacked-trailer',
+ },
+ }, {
+ 'url': 'https://rtd.rt.com/shows/the-kalashnikova-show-military-secrets-anna-knishenko/',
+ 'playlist_mincount': 34,
+ 'info_dict': {
+ 'id': 'the-kalashnikova-show-military-secrets-anna-knishenko',
+ },
+ }]
+
+ def _entries(self, webpage, id):
+ video_urls = set(re.findall(r'list-2__link\s*"\s*href="([^"]+)"', webpage))
+ for v_url in video_urls:
+ if id not in v_url:
+ continue
+ yield self.url_result(
+ 'https://rtd.rt.com%s' % v_url,
+ ie=RTDocumentryIE.ie_key())
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ webpage = self._download_webpage(url, id)
+
+ return {
+ '_type': 'playlist',
+ 'id': id,
+ 'entries': self._entries(webpage, id),
+ }
+
+
+class RuptlyIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?ruptly\.tv/[a-z]{2}/videos/(?P<id>\d+-\d+)'
+
+ _TESTS = [{
+ 'url': 'https://www.ruptly.tv/en/videos/20220112-020-Japan-Double-trouble-Tokyo-zoo-presents-adorable-panda-twins',
+ 'info_dict': {
+ 'id': '20220112-020',
+ 'ext': 'mp4',
+ 'title': 'Japan: Double trouble! Tokyo zoo presents adorable panda twins | Video Ruptly',
+ 'description': 'md5:85a8da5fdb31486f0562daf4360ce75a',
+ 'thumbnail': 'https://storage.ruptly.tv/thumbnails/20220112-020/i6JQKnTNpYuqaXsR/i6JQKnTNpYuqaXsR.jpg'
+ },
+ 'params': {'skip_download': True}
+ }]
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ webpage = self._download_webpage(url, id)
+ m3u8_url = self._search_regex(r'preview_url"\s?:\s?"(https?://storage\.ruptly\.tv/video_projects/.+\.m3u8)"', webpage, 'm3u8 url', fatal=False)
+ if not m3u8_url:
+ self.raise_no_formats('No video/audio found at the provided url.', expected=True)
+ formats, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, id, ext='mp4')
+ return {
+ 'id': id,
+ 'formats': formats,
+ 'subtitles': subs,
+ 'title': self._og_search_title(webpage),
+ 'description': self._og_search_description(webpage),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ }
diff --git a/yt_dlp/extractor/rtp.py b/yt_dlp/extractor/rtp.py
new file mode 100644
index 0000000..5928a20
--- /dev/null
+++ b/yt_dlp/extractor/rtp.py
@@ -0,0 +1,97 @@
+from .common import InfoExtractor
+from ..utils import js_to_json
+import re
+import json
+import urllib.parse
+import base64
+
+
+class RTPIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?rtp\.pt/play/p(?P<program_id>[0-9]+)/(?P<id>[^/?#]+)/?'
+ _TESTS = [{
+ 'url': 'http://www.rtp.pt/play/p405/e174042/paixoes-cruzadas',
+ 'md5': 'e736ce0c665e459ddb818546220b4ef8',
+ 'info_dict': {
+ 'id': 'e174042',
+ 'ext': 'mp3',
+ 'title': 'Paixões Cruzadas',
+ 'description': 'As paixões musicais de António Cartaxo e António Macedo',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ },
+ }, {
+ 'url': 'http://www.rtp.pt/play/p831/a-quimica-das-coisas',
+ 'only_matching': True,
+ }]
+
+ _RX_OBFUSCATION = re.compile(r'''(?xs)
+ atob\s*\(\s*decodeURIComponent\s*\(\s*
+ (\[[0-9A-Za-z%,'"]*\])
+ \s*\.\s*join\(\s*(?:""|'')\s*\)\s*\)\s*\)
+ ''')
+
+ def __unobfuscate(self, data, *, video_id):
+ if data.startswith('{'):
+ data = self._RX_OBFUSCATION.sub(
+ lambda m: json.dumps(
+ base64.b64decode(urllib.parse.unquote(
+ ''.join(self._parse_json(m.group(1), video_id))
+ )).decode('iso-8859-1')),
+ data)
+ return js_to_json(data)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+ title = self._html_search_meta(
+ 'twitter:title', webpage, display_name='title', fatal=True)
+
+ f, config = self._search_regex(
+ r'''(?sx)
+ var\s+f\s*=\s*(?P<f>".*?"|{[^;]+?});\s*
+ var\s+player1\s+=\s+new\s+RTPPlayer\s*\((?P<config>{(?:(?!\*/).)+?})\);(?!\s*\*/)
+ ''', webpage,
+ 'player config', group=('f', 'config'))
+
+ f = self._parse_json(
+ f, video_id,
+ lambda data: self.__unobfuscate(data, video_id=video_id))
+ config = self._parse_json(
+ config, video_id,
+ lambda data: self.__unobfuscate(data, video_id=video_id))
+
+ formats = []
+ if isinstance(f, dict):
+ f_hls = f.get('hls')
+ if f_hls is not None:
+ formats.extend(self._extract_m3u8_formats(
+ f_hls, video_id, 'mp4', 'm3u8_native', m3u8_id='hls'))
+
+ f_dash = f.get('dash')
+ if f_dash is not None:
+ formats.extend(self._extract_mpd_formats(f_dash, video_id, mpd_id='dash'))
+ else:
+ formats.append({
+ 'format_id': 'f',
+ 'url': f,
+ 'vcodec': 'none' if config.get('mediaType') == 'audio' else None,
+ })
+
+ subtitles = {}
+
+ vtt = config.get('vtt')
+ if vtt is not None:
+ for lcode, lname, url in vtt:
+ subtitles.setdefault(lcode, []).append({
+ 'name': lname,
+ 'url': url,
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'description': self._html_search_meta(['description', 'twitter:description'], webpage),
+ 'thumbnail': config.get('poster') or self._og_search_thumbnail(webpage),
+ 'subtitles': subtitles,
+ }
diff --git a/yt_dlp/extractor/rtrfm.py b/yt_dlp/extractor/rtrfm.py
new file mode 100644
index 0000000..7381d82
--- /dev/null
+++ b/yt_dlp/extractor/rtrfm.py
@@ -0,0 +1,65 @@
+from .common import InfoExtractor
+
+
+class RTRFMIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?rtrfm\.com\.au/(?:shows|show-episode)/(?P<id>[^/?\#&]+)'
+ _TESTS = [
+ {
+ 'url': 'https://rtrfm.com.au/shows/breakfast/',
+ 'md5': '46168394d3a5ce237cf47e85d0745413',
+ 'info_dict': {
+ 'id': 'breakfast-2021-11-16',
+ 'ext': 'mp3',
+ 'series': 'Breakfast with Taylah',
+ 'title': r're:^Breakfast with Taylah \d{4}-\d{2}-\d{2}$',
+ 'description': 'md5:0979c3ab1febfbec3f1ccb743633c611',
+ },
+ 'skip': 'ID and md5 changes daily',
+ },
+ {
+ 'url': 'https://rtrfm.com.au/show-episode/breakfast-2021-11-11/',
+ 'md5': '396bedf1e40f96c62b30d4999202a790',
+ 'info_dict': {
+ 'id': 'breakfast-2021-11-11',
+ 'ext': 'mp3',
+ 'series': 'Breakfast with Taylah',
+ 'title': 'Breakfast with Taylah 2021-11-11',
+ 'description': 'md5:0979c3ab1febfbec3f1ccb743633c611',
+ },
+ },
+ {
+ 'url': 'https://rtrfm.com.au/show-episode/breakfast-2020-06-01/',
+ 'md5': '594027f513ec36a24b15d65007a24dff',
+ 'info_dict': {
+ 'id': 'breakfast-2020-06-01',
+ 'ext': 'mp3',
+ 'series': 'Breakfast with Taylah',
+ 'title': 'Breakfast with Taylah 2020-06-01',
+ 'description': r're:^Breakfast with Taylah ',
+ },
+ 'skip': 'This audio has expired',
+ },
+ ]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ show, date, title = self._search_regex(
+ r'''\.playShow(?:From)?\(['"](?P<show>[^'"]+)['"],\s*['"](?P<date>[0-9]{4}-[0-9]{2}-[0-9]{2})['"],\s*['"](?P<title>[^'"]+)['"]''',
+ webpage, 'details', group=('show', 'date', 'title'))
+ url = self._download_json(
+ 'https://restreams.rtrfm.com.au/rzz',
+ show, 'Downloading MP3 URL', query={'n': show, 'd': date})['u']
+ # This is the only indicator of an error until trying to download the URL and
+ # downloads of mp4 URLs always fail (403 for current episodes, 404 for missing).
+ if '.mp4' in url:
+ url = None
+ self.raise_no_formats('Expired or no episode on this date', expected=True)
+ return {
+ 'id': '%s-%s' % (show, date),
+ 'title': '%s %s' % (title, date),
+ 'series': title,
+ 'url': url,
+ 'release_date': date,
+ 'description': self._og_search_description(webpage),
+ }
diff --git a/yt_dlp/extractor/rts.py b/yt_dlp/extractor/rts.py
new file mode 100644
index 0000000..bce5cba
--- /dev/null
+++ b/yt_dlp/extractor/rts.py
@@ -0,0 +1,232 @@
+import re
+
+from .srgssr import SRGSSRIE
+from ..compat import compat_str
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ parse_duration,
+ parse_iso8601,
+ unescapeHTML,
+ urljoin,
+)
+
+
+class RTSIE(SRGSSRIE): # XXX: Do not subclass from concrete IE
+ _WORKING = False
+ IE_DESC = 'RTS.ch'
+ _VALID_URL = r'rts:(?P<rts_id>\d+)|https?://(?:.+?\.)?rts\.ch/(?:[^/]+/){2,}(?P<id>[0-9]+)-(?P<display_id>.+?)\.html'
+
+ _TESTS = [
+ {
+ 'url': 'http://www.rts.ch/archives/tv/divers/3449373-les-enfants-terribles.html',
+ 'md5': '753b877968ad8afaeddccc374d4256a5',
+ 'info_dict': {
+ 'id': '3449373',
+ 'display_id': 'les-enfants-terribles',
+ 'ext': 'mp4',
+ 'duration': 1488,
+ 'title': 'Les Enfants Terribles',
+ 'description': 'France Pommier et sa soeur Luce Feral, les deux filles de ce groupe de 5.',
+ 'uploader': 'Divers',
+ 'upload_date': '19680921',
+ 'timestamp': -40280400,
+ 'thumbnail': r're:^https?://.*\.image',
+ 'view_count': int,
+ },
+ 'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'],
+ },
+ {
+ 'url': 'http://www.rts.ch/emissions/passe-moi-les-jumelles/5624067-entre-ciel-et-mer.html',
+ 'info_dict': {
+ 'id': '5624065',
+ 'title': 'Passe-moi les jumelles',
+ },
+ 'playlist_mincount': 4,
+ },
+ {
+ 'url': 'http://www.rts.ch/video/sport/hockey/5745975-1-2-kloten-fribourg-5-2-second-but-pour-gotteron-par-kwiatowski.html',
+ 'info_dict': {
+ 'id': '5745975',
+ 'display_id': '1-2-kloten-fribourg-5-2-second-but-pour-gotteron-par-kwiatowski',
+ 'ext': 'mp4',
+ 'duration': 48,
+ 'title': '1/2, Kloten - Fribourg (5-2): second but pour Gottéron par Kwiatowski',
+ 'description': 'Hockey - Playoff',
+ 'uploader': 'Hockey',
+ 'upload_date': '20140403',
+ 'timestamp': 1396556882,
+ 'thumbnail': r're:^https?://.*\.image',
+ 'view_count': int,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'],
+ 'skip': 'Blocked outside Switzerland',
+ },
+ {
+ 'url': 'http://www.rts.ch/video/info/journal-continu/5745356-londres-cachee-par-un-epais-smog.html',
+ 'md5': '9bb06503773c07ce83d3cbd793cebb91',
+ 'info_dict': {
+ 'id': '5745356',
+ 'display_id': 'londres-cachee-par-un-epais-smog',
+ 'ext': 'mp4',
+ 'duration': 33,
+ 'title': 'Londres cachée par un épais smog',
+ 'description': 'Un important voile de smog recouvre Londres depuis mercredi, provoqué par la pollution et du sable du Sahara.',
+ 'uploader': 'L\'actu en vidéo',
+ 'upload_date': '20140403',
+ 'timestamp': 1396537322,
+ 'thumbnail': r're:^https?://.*\.image',
+ 'view_count': int,
+ },
+ 'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'],
+ },
+ {
+ 'url': 'http://www.rts.ch/audio/couleur3/programmes/la-belle-video-de-stephane-laurenceau/5706148-urban-hippie-de-damien-krisl-03-04-2014.html',
+ 'md5': 'dd8ef6a22dff163d063e2a52bc8adcae',
+ 'info_dict': {
+ 'id': '5706148',
+ 'display_id': 'urban-hippie-de-damien-krisl-03-04-2014',
+ 'ext': 'mp3',
+ 'duration': 123,
+ 'title': '"Urban Hippie", de Damien Krisl',
+ 'description': 'Des Hippies super glam.',
+ 'upload_date': '20140403',
+ 'timestamp': 1396551600,
+ },
+ },
+ {
+ # article with videos on rhs
+ 'url': 'http://www.rts.ch/sport/hockey/6693917-hockey-davos-decroche-son-31e-titre-de-champion-de-suisse.html',
+ 'info_dict': {
+ 'id': '6693917',
+ 'title': 'Hockey: Davos décroche son 31e titre de champion de Suisse',
+ },
+ 'playlist_mincount': 5,
+ },
+ {
+ 'url': 'http://pages.rts.ch/emissions/passe-moi-les-jumelles/5624065-entre-ciel-et-mer.html',
+ 'only_matching': True,
+ }
+ ]
+
+ def _real_extract(self, url):
+ m = self._match_valid_url(url)
+ media_id = m.group('rts_id') or m.group('id')
+ display_id = m.group('display_id') or media_id
+
+ def download_json(internal_id):
+ return self._download_json(
+ 'http://www.rts.ch/a/%s.html?f=json/article' % internal_id,
+ display_id)
+
+ all_info = download_json(media_id)
+
+ # media_id extracted out of URL is not always a real id
+ if 'video' not in all_info and 'audio' not in all_info:
+ entries = []
+
+ for item in all_info.get('items', []):
+ item_url = item.get('url')
+ if not item_url:
+ continue
+ entries.append(self.url_result(item_url, 'RTS'))
+
+ if not entries:
+ page, urlh = self._download_webpage_handle(url, display_id)
+ if re.match(self._VALID_URL, urlh.url).group('id') != media_id:
+ return self.url_result(urlh.url, 'RTS')
+
+ # article with videos on rhs
+ videos = re.findall(
+ r'<article[^>]+class="content-item"[^>]*>\s*<a[^>]+data-video-urn="urn:([^"]+)"',
+ page)
+ if not videos:
+ videos = re.findall(
+ r'(?s)<iframe[^>]+class="srg-player"[^>]+src="[^"]+urn:([^"]+)"',
+ page)
+ if videos:
+ entries = [self.url_result('srgssr:%s' % video_urn, 'SRGSSR') for video_urn in videos]
+
+ if entries:
+ return self.playlist_result(entries, media_id, all_info.get('title'))
+
+ internal_id = self._html_search_regex(
+ r'<(?:video|audio) data-id="([0-9]+)"', page,
+ 'internal video id')
+ all_info = download_json(internal_id)
+
+ media_type = 'video' if 'video' in all_info else 'audio'
+
+ # check for errors
+ self._get_media_data('rts', media_type, media_id)
+
+ info = all_info['video']['JSONinfo'] if 'video' in all_info else all_info['audio']
+
+ title = info['title']
+
+ def extract_bitrate(url):
+ return int_or_none(self._search_regex(
+ r'-([0-9]+)k\.', url, 'bitrate', default=None))
+
+ formats = []
+ streams = info.get('streams', {})
+ for format_id, format_url in streams.items():
+ if format_id == 'hds_sd' and 'hds' in streams:
+ continue
+ if format_id == 'hls_sd' and 'hls' in streams:
+ continue
+ ext = determine_ext(format_url)
+ if ext in ('m3u8', 'f4m'):
+ format_url = self._get_tokenized_src(format_url, media_id, format_id)
+ if ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ format_url + ('?' if '?' not in format_url else '&') + 'hdcore=3.4.0',
+ media_id, f4m_id=format_id, fatal=False))
+ else:
+ formats.extend(self._extract_m3u8_formats(
+ format_url, media_id, 'mp4', 'm3u8_native', m3u8_id=format_id, fatal=False))
+ else:
+ formats.append({
+ 'format_id': format_id,
+ 'url': format_url,
+ 'tbr': extract_bitrate(format_url),
+ })
+
+ download_base = 'http://rtsww%s-d.rts.ch/' % ('-a' if media_type == 'audio' else '')
+ for media in info.get('media', []):
+ media_url = media.get('url')
+ if not media_url or re.match(r'https?://', media_url):
+ continue
+ rate = media.get('rate')
+ ext = media.get('ext') or determine_ext(media_url, 'mp4')
+ format_id = ext
+ if rate:
+ format_id += '-%dk' % rate
+ formats.append({
+ 'format_id': format_id,
+ 'url': urljoin(download_base, media_url),
+ 'tbr': rate or extract_bitrate(media_url),
+ })
+
+ self._check_formats(formats, media_id)
+
+ duration = info.get('duration') or info.get('cutout') or info.get('cutduration')
+ if isinstance(duration, compat_str):
+ duration = parse_duration(duration)
+
+ return {
+ 'id': media_id,
+ 'display_id': display_id,
+ 'formats': formats,
+ 'title': title,
+ 'description': info.get('intro'),
+ 'duration': duration,
+ 'view_count': int_or_none(info.get('plays')),
+ 'uploader': info.get('programName'),
+ 'timestamp': parse_iso8601(info.get('broadcast_date')),
+ 'thumbnail': unescapeHTML(info.get('preview_image_url')),
+ }
diff --git a/yt_dlp/extractor/rtvcplay.py b/yt_dlp/extractor/rtvcplay.py
new file mode 100644
index 0000000..741c472
--- /dev/null
+++ b/yt_dlp/extractor/rtvcplay.py
@@ -0,0 +1,285 @@
+import re
+
+from .common import InfoExtractor, ExtractorError
+from ..utils import (
+ clean_html,
+ determine_ext,
+ int_or_none,
+ float_or_none,
+ js_to_json,
+ mimetype2ext,
+ traverse_obj,
+ urljoin,
+ url_or_none,
+)
+
+
+class RTVCPlayBaseIE(InfoExtractor):
+ _BASE_VALID_URL = r'https?://(?:www\.)?rtvcplay\.co'
+
+ def _extract_player_config(self, webpage, video_id):
+ return self._search_json(
+ r'<script\b[^>]*>[^<]*(?:var|let|const)\s+config\s*=', re.sub(r'"\s*\+\s*"', '', webpage),
+ 'player_config', video_id, transform_source=js_to_json)
+
+ def _extract_formats_and_subtitles_player_config(self, player_config, video_id):
+ formats, subtitles = [], {}
+ for source in traverse_obj(player_config, ('sources', ..., lambda _, v: url_or_none(v['url']))):
+ ext = mimetype2ext(source.get('mimetype'), default=determine_ext(source['url']))
+ if ext == 'm3u8':
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ source['url'], video_id, 'mp4', fatal=False)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+ else:
+ formats.append({
+ 'url': source['url'],
+ 'ext': ext,
+ })
+
+ return formats, subtitles
+
+
+class RTVCPlayIE(RTVCPlayBaseIE):
+ _VALID_URL = RTVCPlayBaseIE._BASE_VALID_URL + r'/(?P<category>(?!embed)[^/]+)/(?:[^?#]+/)?(?P<id>[\w-]+)'
+
+ _TESTS = [{
+ 'url': 'https://www.rtvcplay.co/en-vivo/canal-institucional',
+ 'info_dict': {
+ 'id': 'canal-institucional',
+ 'title': r're:^Canal Institucional',
+ 'description': 'md5:eff9e548394175928059320c006031ea',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ 'live_status': 'is_live',
+ 'ext': 'mp4',
+ },
+ 'params': {
+ 'skip_download': 'Livestream',
+ },
+ }, {
+ 'url': 'https://www.rtvcplay.co/en-vivo/senal-colombia',
+ 'info_dict': {
+ 'id': 'senal-colombia',
+ 'title': r're:^Señal Colombia',
+ 'description': 'md5:799f16a401d97f40c33a2c6a3e2a507b',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ 'live_status': 'is_live',
+ 'ext': 'mp4',
+ },
+ 'params': {
+ 'skip_download': 'Livestream',
+ },
+ }, {
+ 'url': 'https://www.rtvcplay.co/en-vivo/radio-nacional',
+ 'info_dict': {
+ 'id': 'radio-nacional',
+ 'title': r're:^Radio Nacional',
+ 'description': 'md5:5de009bc6a9fa79d2a6cf0b73f977d53',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ 'live_status': 'is_live',
+ 'ext': 'mp4',
+ },
+ 'params': {
+ 'skip_download': 'Livestream',
+ },
+ }, {
+ 'url': 'https://www.rtvcplay.co/peliculas-ficcion/senoritas',
+ 'md5': '1288ee6f6d1330d880f98bff2ed710a3',
+ 'info_dict': {
+ 'id': 'senoritas',
+ 'title': 'Señoritas',
+ 'description': 'md5:f095a2bb52cb6cf279daf6302f86fb32',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ 'ext': 'mp4',
+ },
+ }, {
+ 'url': 'https://www.rtvcplay.co/competencias-basicas-ciudadanas-y-socioemocionales/profe-en-tu-casa/james-regresa-clases-28022022',
+ 'md5': 'f040a7380a269ad633cf837384d5e9fc',
+ 'info_dict': {
+ 'id': 'james-regresa-clases-28022022',
+ 'title': 'James regresa a clases - 28/02/2022',
+ 'description': 'md5:c5dcdf757c7ab29305e8763c6007e675',
+ 'ext': 'mp4',
+ },
+ }, {
+ 'url': 'https://www.rtvcplay.co/peliculas-documentales/llinas-el-cerebro-y-el-universo',
+ 'info_dict': {
+ 'id': 'llinas-el-cerebro-y-el-universo',
+ 'title': 'Llinás, el cerebro y el universo',
+ 'description': 'md5:add875bf2309bb52b3e8b9b06116d9b0',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ },
+ 'playlist_mincount': 3,
+ }, {
+ 'url': 'https://www.rtvcplay.co/competencias-basicas-ciudadanas-y-socioemocionales/profe-en-tu-casa',
+ 'info_dict': {
+ 'id': 'profe-en-tu-casa',
+ 'title': 'Profe en tu casa',
+ 'description': 'md5:47dbe20e263194413b1db2a2805a4f2e',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ },
+ 'playlist_mincount': 537,
+ }, {
+ 'url': 'https://www.rtvcplay.co/series-al-oido/relato-de-un-naufrago-una-travesia-del-periodismo-a-la-literatura',
+ 'info_dict': {
+ 'id': 'relato-de-un-naufrago-una-travesia-del-periodismo-a-la-literatura',
+ 'title': 'Relato de un náufrago: una travesía del periodismo a la literatura',
+ 'description': 'md5:6da28fdca4a5a568ea47ef65ef775603',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ },
+ 'playlist_mincount': 5,
+ }, {
+ 'url': 'https://www.rtvcplay.co/series-al-oido/diez-versiones',
+ 'info_dict': {
+ 'id': 'diez-versiones',
+ 'title': 'Diez versiones',
+ 'description': 'md5:997471ed971cb3fd8e41969457675306',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ },
+ 'playlist_mincount': 20,
+ }]
+
+ def _real_extract(self, url):
+ video_id, category = self._match_valid_url(url).group('id', 'category')
+ webpage = self._download_webpage(url, video_id)
+
+ hydration = self._search_json(
+ r'window\.__RTVCPLAY_STATE__\s*=', webpage, 'hydration',
+ video_id, transform_source=js_to_json)['content']['currentContent']
+
+ asset_id = traverse_obj(hydration, ('video', 'assetid'))
+ if asset_id:
+ hls_url = hydration['base_url_hls'].replace('[node:field_asset_id]', asset_id)
+ else:
+ hls_url = traverse_obj(hydration, ('channel', 'hls'))
+
+ metadata = traverse_obj(hydration, {
+ 'title': 'title',
+ 'description': 'description',
+ 'thumbnail': ((('channel', 'image', 'logo'), ('resource', 'image', 'cover_desktop')), 'path'),
+ }, get_all=False)
+
+ # Probably it's a program's page
+ if not hls_url:
+ seasons = traverse_obj(
+ hydration, ('widgets', lambda _, y: y['type'] == 'seasonList', 'contents'),
+ get_all=False)
+ if not seasons:
+ podcast_episodes = hydration.get('audios')
+ if not podcast_episodes:
+ raise ExtractorError('Could not find asset_id nor program playlist nor podcast episodes')
+
+ return self.playlist_result([
+ self.url_result(episode['file'], url_transparent=True, **traverse_obj(episode, {
+ 'title': 'title',
+ 'description': ('description', {clean_html}),
+ 'episode_number': ('chapter_number', {float_or_none}, {int_or_none}),
+ 'season_number': ('season', {int_or_none}),
+ })) for episode in podcast_episodes], video_id, **metadata)
+
+ entries = [self.url_result(
+ urljoin(url, episode['slug']), url_transparent=True,
+ **traverse_obj(season, {
+ 'season': 'title',
+ 'season_number': ('season', {int_or_none}),
+ }), **traverse_obj(episode, {
+ 'title': 'title',
+ 'thumbnail': ('image', 'cover', 'path'),
+ 'episode_number': ('chapter_number', {int_or_none}),
+ })) for season in seasons for episode in traverse_obj(season, ('contents', ...))]
+
+ return self.playlist_result(entries, video_id, **metadata)
+
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(hls_url, video_id, 'mp4')
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'is_live': category == 'en-vivo',
+ **metadata,
+ }
+
+
+class RTVCPlayEmbedIE(RTVCPlayBaseIE):
+ _VALID_URL = RTVCPlayBaseIE._BASE_VALID_URL + r'/embed/(?P<id>[\w-]+)'
+
+ _TESTS = [{
+ 'url': 'https://www.rtvcplay.co/embed/72b0e699-248b-4929-a4a8-3782702fa7f9',
+ 'md5': 'ed529aeaee7aa2a72afe91ac7d1177a8',
+ 'info_dict': {
+ 'id': '72b0e699-248b-4929-a4a8-3782702fa7f9',
+ 'title': 'Tráiler: Señoritas',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ 'ext': 'mp4',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ player_config = self._extract_player_config(webpage, video_id)
+ formats, subtitles = self._extract_formats_and_subtitles_player_config(player_config, video_id)
+
+ asset_id = traverse_obj(player_config, ('rtvcplay', 'assetid'))
+ metadata = {} if not asset_id else self._download_json(
+ f'https://cms.rtvcplay.co/api/v1/video/asset-id/{asset_id}', video_id, fatal=False)
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ **traverse_obj(metadata, {
+ 'title': 'title',
+ 'description': 'description',
+ 'thumbnail': ('image', ..., 'thumbnail', 'path'),
+ }, get_all=False)
+ }
+
+
+class RTVCKalturaIE(RTVCPlayBaseIE):
+ _VALID_URL = r'https?://media\.rtvc\.gov\.co/kalturartvc/(?P<id>[\w-]+)'
+
+ _TESTS = [{
+ 'url': 'https://media.rtvc.gov.co/kalturartvc/indexSC.html',
+ 'info_dict': {
+ 'id': 'indexSC',
+ 'title': r're:^Señal Colombia',
+ 'description': 'md5:799f16a401d97f40c33a2c6a3e2a507b',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ 'live_status': 'is_live',
+ 'ext': 'mp4',
+ },
+ 'params': {
+ 'skip_download': 'Livestream',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ player_config = self._extract_player_config(webpage, video_id)
+ formats, subtitles = self._extract_formats_and_subtitles_player_config(player_config, video_id)
+
+ channel_id = traverse_obj(player_config, ('rtvcplay', 'channelId'))
+ metadata = {} if not channel_id else self._download_json(
+ f'https://cms.rtvcplay.co/api/v1/taxonomy_term/streaming/{channel_id}', video_id, fatal=False)
+
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ traverse_obj(metadata, ('channel', 'hls')), video_id, 'mp4', fatal=False)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'is_live': True,
+ **traverse_obj(metadata, {
+ 'title': 'title',
+ 'description': 'description',
+ 'thumbnail': ('channel', 'image', 'logo', 'path'),
+ })
+ }
diff --git a/yt_dlp/extractor/rtve.py b/yt_dlp/extractor/rtve.py
new file mode 100644
index 0000000..a99a266
--- /dev/null
+++ b/yt_dlp/extractor/rtve.py
@@ -0,0 +1,344 @@
+import base64
+import io
+import struct
+
+from .common import InfoExtractor
+from ..compat import compat_b64decode
+from ..utils import (
+ ExtractorError,
+ determine_ext,
+ float_or_none,
+ qualities,
+ remove_end,
+ remove_start,
+ try_get,
+)
+
+
+class RTVEALaCartaIE(InfoExtractor):
+ IE_NAME = 'rtve.es:alacarta'
+ IE_DESC = 'RTVE a la carta'
+ _VALID_URL = r'https?://(?:www\.)?rtve\.es/(m/)?(alacarta/videos|filmoteca)/[^/]+/[^/]+/(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/',
+ 'md5': '1d49b7e1ca7a7502c56a4bf1b60f1b43',
+ 'info_dict': {
+ 'id': '2491869',
+ 'ext': 'mp4',
+ 'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia',
+ 'duration': 5024.566,
+ 'series': 'Balonmano',
+ },
+ 'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
+ }, {
+ 'note': 'Live stream',
+ 'url': 'http://www.rtve.es/alacarta/videos/television/24h-live/1694255/',
+ 'info_dict': {
+ 'id': '1694255',
+ 'ext': 'mp4',
+ 'title': 're:^24H LIVE [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': 'live stream',
+ },
+ }, {
+ 'url': 'http://www.rtve.es/alacarta/videos/servir-y-proteger/servir-proteger-capitulo-104/4236788/',
+ 'md5': 'd850f3c8731ea53952ebab489cf81cbf',
+ 'info_dict': {
+ 'id': '4236788',
+ 'ext': 'mp4',
+ 'title': 'Servir y proteger - Capítulo 104',
+ 'duration': 3222.0,
+ },
+ 'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
+ }, {
+ 'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.rtve.es/filmoteca/no-do/not-1-introduccion-primer-noticiario-espanol/1465256/',
+ 'only_matching': True,
+ }]
+
+ def _real_initialize(self):
+ user_agent_b64 = base64.b64encode(self.get_param('http_headers')['User-Agent'].encode('utf-8')).decode('utf-8')
+ self._manager = self._download_json(
+ 'http://www.rtve.es/odin/loki/' + user_agent_b64,
+ None, 'Fetching manager info')['manager']
+
+ @staticmethod
+ def _decrypt_url(png):
+ encrypted_data = io.BytesIO(compat_b64decode(png)[8:])
+ while True:
+ length = struct.unpack('!I', encrypted_data.read(4))[0]
+ chunk_type = encrypted_data.read(4)
+ if chunk_type == b'IEND':
+ break
+ data = encrypted_data.read(length)
+ if chunk_type == b'tEXt':
+ alphabet_data, text = data.split(b'\0')
+ quality, url_data = text.split(b'%%')
+ alphabet = []
+ e = 0
+ d = 0
+ for l in alphabet_data.decode('iso-8859-1'):
+ if d == 0:
+ alphabet.append(l)
+ d = e = (e + 1) % 4
+ else:
+ d -= 1
+ url = ''
+ f = 0
+ e = 3
+ b = 1
+ for letter in url_data.decode('iso-8859-1'):
+ if f == 0:
+ l = int(letter) * 10
+ f = 1
+ else:
+ if e == 0:
+ l += int(letter)
+ url += alphabet[l]
+ e = (b + 3) % 4
+ f = 0
+ b += 1
+ else:
+ e -= 1
+
+ yield quality.decode(), url
+ encrypted_data.read(4) # CRC
+
+ def _extract_png_formats(self, video_id):
+ png = self._download_webpage(
+ 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id),
+ video_id, 'Downloading url information', query={'q': 'v2'})
+ q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL'])
+ formats = []
+ for quality, video_url in self._decrypt_url(png):
+ ext = determine_ext(video_url)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ video_url, video_id, 'dash', fatal=False))
+ else:
+ formats.append({
+ 'format_id': quality,
+ 'quality': q(quality),
+ 'url': video_url,
+ })
+ return formats
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ info = self._download_json(
+ 'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id,
+ video_id)['page']['items'][0]
+ if info['state'] == 'DESPU':
+ raise ExtractorError('The video is no longer available', expected=True)
+ title = info['title'].strip()
+ formats = self._extract_png_formats(video_id)
+
+ subtitles = None
+ sbt_file = info.get('sbtFile')
+ if sbt_file:
+ subtitles = self.extract_subtitles(video_id, sbt_file)
+
+ is_live = info.get('live') is True
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': info.get('image'),
+ 'subtitles': subtitles,
+ 'duration': float_or_none(info.get('duration'), 1000),
+ 'is_live': is_live,
+ 'series': info.get('programTitle'),
+ }
+
+ def _get_subtitles(self, video_id, sub_file):
+ subs = self._download_json(
+ sub_file + '.json', video_id,
+ 'Downloading subtitles info')['page']['items']
+ return dict(
+ (s['lang'], [{'ext': 'vtt', 'url': s['src']}])
+ for s in subs)
+
+
+class RTVEAudioIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE
+ IE_NAME = 'rtve.es:audio'
+ IE_DESC = 'RTVE audio'
+ _VALID_URL = r'https?://(?:www\.)?rtve\.es/(alacarta|play)/audios/[^/]+/[^/]+/(?P<id>[0-9]+)'
+
+ _TESTS = [{
+ 'url': 'https://www.rtve.es/alacarta/audios/a-hombros-de-gigantes/palabra-ingeniero-codigos-informaticos-27-04-21/5889192/',
+ 'md5': 'ae06d27bff945c4e87a50f89f6ce48ce',
+ 'info_dict': {
+ 'id': '5889192',
+ 'ext': 'mp3',
+ 'title': 'Códigos informáticos',
+ 'thumbnail': r're:https?://.+/1598856591583.jpg',
+ 'duration': 349.440,
+ 'series': 'A hombros de gigantes',
+ },
+ }, {
+ 'url': 'https://www.rtve.es/play/audios/en-radio-3/ignatius-farray/5791165/',
+ 'md5': '072855ab89a9450e0ba314c717fa5ebc',
+ 'info_dict': {
+ 'id': '5791165',
+ 'ext': 'mp3',
+ 'title': 'Ignatius Farray',
+ 'thumbnail': r're:https?://.+/1613243011863.jpg',
+ 'duration': 3559.559,
+ 'series': 'En Radio 3'
+ },
+ }, {
+ 'url': 'https://www.rtve.es/play/audios/frankenstein-o-el-moderno-prometeo/capitulo-26-ultimo-muerte-victor-juan-jose-plans-mary-shelley/6082623/',
+ 'md5': '0eadab248cc8dd193fa5765712e84d5c',
+ 'info_dict': {
+ 'id': '6082623',
+ 'ext': 'mp3',
+ 'title': 'Capítulo 26 y último: La muerte de Victor',
+ 'thumbnail': r're:https?://.+/1632147445707.jpg',
+ 'duration': 3174.086,
+ 'series': 'Frankenstein o el moderno Prometeo'
+ },
+ }]
+
+ def _extract_png_formats(self, audio_id):
+ """
+ This function retrieves media related png thumbnail which obfuscate
+ valuable information about the media. This information is decrypted
+ via base class _decrypt_url function providing media quality and
+ media url
+ """
+ png = self._download_webpage(
+ 'http://www.rtve.es/ztnr/movil/thumbnail/%s/audios/%s.png' %
+ (self._manager, audio_id),
+ audio_id, 'Downloading url information', query={'q': 'v2'})
+ q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL'])
+ formats = []
+ for quality, audio_url in self._decrypt_url(png):
+ ext = determine_ext(audio_url)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ audio_url, audio_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ audio_url, audio_id, 'dash', fatal=False))
+ else:
+ formats.append({
+ 'format_id': quality,
+ 'quality': q(quality),
+ 'url': audio_url,
+ })
+ return formats
+
+ def _real_extract(self, url):
+ audio_id = self._match_id(url)
+ info = self._download_json(
+ 'https://www.rtve.es/api/audios/%s.json' % audio_id,
+ audio_id)['page']['items'][0]
+
+ return {
+ 'id': audio_id,
+ 'title': info['title'].strip(),
+ 'thumbnail': info.get('thumbnail'),
+ 'duration': float_or_none(info.get('duration'), 1000),
+ 'series': try_get(info, lambda x: x['programInfo']['title']),
+ 'formats': self._extract_png_formats(audio_id),
+ }
+
+
+class RTVEInfantilIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE
+ IE_NAME = 'rtve.es:infantil'
+ IE_DESC = 'RTVE infantil'
+ _VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/[^/]+/video/[^/]+/(?P<id>[0-9]+)/'
+
+ _TESTS = [{
+ 'url': 'http://www.rtve.es/infantil/serie/cleo/video/maneras-vivir/3040283/',
+ 'md5': '5747454717aedf9f9fdf212d1bcfc48d',
+ 'info_dict': {
+ 'id': '3040283',
+ 'ext': 'mp4',
+ 'title': 'Maneras de vivir',
+ 'thumbnail': r're:https?://.+/1426182947956\.JPG',
+ 'duration': 357.958,
+ },
+ 'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
+ }]
+
+
+class RTVELiveIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE
+ IE_NAME = 'rtve.es:live'
+ IE_DESC = 'RTVE.es live streams'
+ _VALID_URL = r'https?://(?:www\.)?rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)'
+
+ _TESTS = [{
+ 'url': 'http://www.rtve.es/directo/la-1/',
+ 'info_dict': {
+ 'id': 'la-1',
+ 'ext': 'mp4',
+ 'title': 're:^La 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ },
+ 'params': {
+ 'skip_download': 'live stream',
+ }
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, video_id)
+ title = remove_end(self._og_search_title(webpage), ' en directo en RTVE.es')
+ title = remove_start(title, 'Estoy viendo ')
+
+ vidplayer_id = self._search_regex(
+ (r'playerId=player([0-9]+)',
+ r'class=["\'].*?\blive_mod\b.*?["\'][^>]+data-assetid=["\'](\d+)',
+ r'data-id=["\'](\d+)'),
+ webpage, 'internal video ID')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': self._extract_png_formats(vidplayer_id),
+ 'is_live': True,
+ }
+
+
+class RTVETelevisionIE(InfoExtractor):
+ IE_NAME = 'rtve.es:television'
+ _VALID_URL = r'https?://(?:www\.)?rtve\.es/television/[^/]+/[^/]+/(?P<id>\d+).shtml'
+
+ _TEST = {
+ 'url': 'http://www.rtve.es/television/20160628/revolucion-del-movil/1364141.shtml',
+ 'info_dict': {
+ 'id': '3069778',
+ 'ext': 'mp4',
+ 'title': 'Documentos TV - La revolución del móvil',
+ 'duration': 3496.948,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ page_id = self._match_id(url)
+ webpage = self._download_webpage(url, page_id)
+
+ alacarta_url = self._search_regex(
+ r'data-location="alacarta_videos"[^<]+url&quot;:&quot;(http://www\.rtve\.es/alacarta.+?)&',
+ webpage, 'alacarta url', default=None)
+ if alacarta_url is None:
+ raise ExtractorError(
+ 'The webpage doesn\'t contain any video', expected=True)
+
+ return self.url_result(alacarta_url, ie=RTVEALaCartaIE.ie_key())
diff --git a/yt_dlp/extractor/rtvs.py b/yt_dlp/extractor/rtvs.py
new file mode 100644
index 0000000..a84a78d
--- /dev/null
+++ b/yt_dlp/extractor/rtvs.py
@@ -0,0 +1,85 @@
+import re
+
+from .common import InfoExtractor
+
+from ..utils import (
+ parse_duration,
+ traverse_obj,
+ unified_timestamp,
+)
+
+
+class RTVSIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?rtvs\.sk/(?:radio|televizia)/archiv(?:/\d+)?/(?P<id>\d+)/?(?:[#?]|$)'
+ _TESTS = [{
+ # radio archive
+ 'url': 'http://www.rtvs.sk/radio/archiv/11224/414872',
+ 'md5': '134d5d6debdeddf8a5d761cbc9edacb8',
+ 'info_dict': {
+ 'id': '414872',
+ 'ext': 'mp3',
+ 'title': 'Ostrov pokladov 1 časť.mp3',
+ 'duration': 2854,
+ 'thumbnail': 'https://www.rtvs.sk/media/a501/image/file/2/0000/b1R8.rtvs.jpg',
+ 'display_id': '135331',
+ }
+ }, {
+ # tv archive
+ 'url': 'http://www.rtvs.sk/televizia/archiv/8249/63118',
+ 'info_dict': {
+ 'id': '63118',
+ 'ext': 'mp4',
+ 'title': 'Amaro Džives - Náš deň',
+ 'description': 'Galavečer pri príležitosti Medzinárodného dňa Rómov.',
+ 'thumbnail': 'https://www.rtvs.sk/media/a501/image/file/2/0031/L7Qm.amaro_dzives_png.jpg',
+ 'timestamp': 1428555900,
+ 'upload_date': '20150409',
+ 'duration': 4986,
+ }
+ }, {
+ # tv archive
+ 'url': 'https://www.rtvs.sk/televizia/archiv/18083?utm_source=web&utm_medium=rozcestnik&utm_campaign=Robin',
+ 'info_dict': {
+ 'id': '18083',
+ 'ext': 'mp4',
+ 'title': 'Robin',
+ 'description': 'md5:2f70505a7b8364491003d65ff7a0940a',
+ 'timestamp': 1636652760,
+ 'display_id': '307655',
+ 'duration': 831,
+ 'upload_date': '20211111',
+ 'thumbnail': 'https://www.rtvs.sk/media/a501/image/file/2/0916/robin.jpg',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+ iframe_id = self._search_regex(
+ r'<iframe[^>]+id\s*=\s*"player_[^_]+_([0-9]+)"', webpage, 'Iframe ID')
+ iframe_url = self._search_regex(
+ fr'<iframe[^>]+id\s*=\s*"player_[^_]+_{re.escape(iframe_id)}"[^>]+src\s*=\s*"([^"]+)"', webpage, 'Iframe URL')
+
+ webpage = self._download_webpage(iframe_url, video_id, 'Downloading iframe')
+ json_url = self._search_regex(r'var\s+url\s*=\s*"([^"]+)"\s*\+\s*ruurl', webpage, 'json URL')
+ data = self._download_json(f'https:{json_url}b=mozilla&p=win&v=97&f=0&d=1', video_id)
+
+ if data.get('clip'):
+ data['playlist'] = [data['clip']]
+
+ if traverse_obj(data, ('playlist', 0, 'sources', 0, 'type')) == 'audio/mp3':
+ formats = [{'url': traverse_obj(data, ('playlist', 0, 'sources', 0, 'src'))}]
+ else:
+ formats = self._extract_m3u8_formats(traverse_obj(data, ('playlist', 0, 'sources', 0, 'src')), video_id)
+
+ return {
+ 'id': video_id,
+ 'display_id': iframe_id,
+ 'title': traverse_obj(data, ('playlist', 0, 'title')),
+ 'description': traverse_obj(data, ('playlist', 0, 'description')),
+ 'duration': parse_duration(traverse_obj(data, ('playlist', 0, 'length'))),
+ 'thumbnail': traverse_obj(data, ('playlist', 0, 'image')),
+ 'timestamp': unified_timestamp(traverse_obj(data, ('playlist', 0, 'datetime_create'))),
+ 'formats': formats
+ }
diff --git a/yt_dlp/extractor/rtvslo.py b/yt_dlp/extractor/rtvslo.py
new file mode 100644
index 0000000..39ace7c
--- /dev/null
+++ b/yt_dlp/extractor/rtvslo.py
@@ -0,0 +1,166 @@
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ parse_duration,
+ traverse_obj,
+ unified_timestamp,
+ url_or_none,
+)
+
+
+class RTVSLOIE(InfoExtractor):
+ IE_NAME = 'rtvslo.si'
+ _VALID_URL = r'''(?x)
+ https?://(?:
+ (?:365|4d)\.rtvslo.si/arhiv/[^/?#&;]+|
+ (?:www\.)?rtvslo\.si/rtv365/arhiv
+ )/(?P<id>\d+)'''
+ _GEO_COUNTRIES = ['SI']
+
+ _API_BASE = 'https://api.rtvslo.si/ava/{}/{}?client_id=82013fb3a531d5414f478747c1aca622'
+ SUB_LANGS_MAP = {'Slovenski': 'sl'}
+
+ _TESTS = [
+ {
+ 'url': 'https://www.rtvslo.si/rtv365/arhiv/174842550?s=tv',
+ 'info_dict': {
+ 'id': '174842550',
+ 'ext': 'mp4',
+ 'release_timestamp': 1643140032,
+ 'upload_date': '20220125',
+ 'series': 'Dnevnik',
+ 'thumbnail': 'https://img.rtvcdn.si/_up/ava/ava_misc/show_logos/92/dnevnik_3_wide2.jpg',
+ 'description': 'md5:76a18692757aeb8f0f51221106277dd2',
+ 'timestamp': 1643137046,
+ 'title': 'Dnevnik',
+ 'series_id': '92',
+ 'release_date': '20220125',
+ 'duration': 1789,
+ },
+ }, {
+ 'url': 'https://365.rtvslo.si/arhiv/utrip/174843754',
+ 'info_dict': {
+ 'id': '174843754',
+ 'ext': 'mp4',
+ 'series_id': '94',
+ 'release_date': '20220129',
+ 'timestamp': 1643484455,
+ 'title': 'Utrip',
+ 'duration': 813,
+ 'thumbnail': 'https://img.rtvcdn.si/_up/ava/ava_misc/show_logos/94/utrip_1_wide2.jpg',
+ 'description': 'md5:77f2892630c7b17bb7a5bb84319020c9',
+ 'release_timestamp': 1643485825,
+ 'upload_date': '20220129',
+ 'series': 'Utrip',
+ },
+ }, {
+ 'url': 'https://365.rtvslo.si/arhiv/il-giornale-della-sera/174844609',
+ 'info_dict': {
+ 'id': '174844609',
+ 'ext': 'mp3',
+ 'series_id': '106615841',
+ 'title': 'Il giornale della sera',
+ 'duration': 1328,
+ 'series': 'Il giornale della sera',
+ 'timestamp': 1643743800,
+ 'release_timestamp': 1643745424,
+ 'thumbnail': 'https://img.rtvcdn.si/_up/ava/ava_misc/show_logos/il-giornale-della-sera_wide2.jpg',
+ 'upload_date': '20220201',
+ 'tbr': 128000,
+ 'release_date': '20220201',
+ },
+ }, {
+ 'url': 'https://365.rtvslo.si/arhiv/razred-zase/148350750',
+ 'info_dict': {
+ 'id': '148350750',
+ 'ext': 'mp4',
+ 'title': 'Prvi šolski dan, mozaična oddaja za mlade',
+ 'series': 'Razred zase',
+ 'series_id': '148185730',
+ 'duration': 1481,
+ 'upload_date': '20121019',
+ 'timestamp': 1350672122,
+ 'release_date': '20121019',
+ 'release_timestamp': 1350672122,
+ 'thumbnail': 'https://img.rtvcdn.si/_up/ava/ava_misc/show_logos/148185730/razred_zase_2014_logo_4d_wide2.jpg',
+ },
+ }, {
+ 'url': 'https://4d.rtvslo.si/arhiv/dnevnik/174842550',
+ 'only_matching': True
+ }
+ ]
+
+ def _real_extract(self, url):
+ v_id = self._match_id(url)
+ meta = self._download_json(self._API_BASE.format('getRecordingDrm', v_id), v_id)['response']
+
+ thumbs = [{'id': k, 'url': v, 'http_headers': {'Accept': 'image/jpeg'}}
+ for k, v in (meta.get('images') or {}).items()]
+
+ subs = {}
+ for s in traverse_obj(meta, 'subs', 'subtitles', default=[]):
+ lang = self.SUB_LANGS_MAP.get(s.get('language'), s.get('language') or 'und')
+ subs.setdefault(lang, []).append({
+ 'url': s.get('file'),
+ 'ext': traverse_obj(s, 'format', expected_type=str.lower),
+ })
+
+ jwt = meta.get('jwt')
+ if not jwt:
+ raise ExtractorError('Site did not provide an authentication token, cannot proceed.')
+
+ media = self._download_json(self._API_BASE.format('getMedia', v_id), v_id, query={'jwt': jwt})['response']
+
+ formats = []
+ skip_protocols = ['smil', 'f4m', 'dash']
+ adaptive_url = traverse_obj(media, ('addaptiveMedia', 'hls_sec'), expected_type=url_or_none)
+ if adaptive_url:
+ formats = self._extract_wowza_formats(adaptive_url, v_id, skip_protocols=skip_protocols)
+
+ adaptive_url = traverse_obj(media, ('addaptiveMedia_sl', 'hls_sec'), expected_type=url_or_none)
+ if adaptive_url:
+ for f in self._extract_wowza_formats(adaptive_url, v_id, skip_protocols=skip_protocols):
+ formats.append({
+ **f,
+ 'format_id': 'sign-' + f['format_id'],
+ 'format_note': 'Sign language interpretation', 'preference': -10,
+ 'language': (
+ 'slv' if f.get('language') == 'eng' and f.get('acodec') != 'none'
+ else f.get('language'))
+ })
+
+ for mediafile in traverse_obj(media, ('mediaFiles', lambda _, v: url_or_none(v['streams']['https']))):
+ formats.append(traverse_obj(mediafile, {
+ 'url': ('streams', 'https'),
+ 'ext': ('mediaType', {str.lower}),
+ 'width': ('width', {int_or_none}),
+ 'height': ('height', {int_or_none}),
+ 'tbr': ('bitrate', {int_or_none}),
+ 'filesize': ('filesize', {int_or_none}),
+ }))
+
+ for mediafile in traverse_obj(media, ('mediaFiles', lambda _, v: url_or_none(v['streams']['hls_sec']))):
+ formats.extend(self._extract_wowza_formats(
+ mediafile['streams']['hls_sec'], v_id, skip_protocols=skip_protocols))
+
+ if any('intermission.mp4' in x['url'] for x in formats):
+ self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
+ if any('dummy_720p.mp4' in x.get('manifest_url', '') for x in formats) and meta.get('stub') == 'error':
+ raise ExtractorError(f'{self.IE_NAME} said: Clip not available', expected=True)
+
+ return {
+ 'id': v_id,
+ 'webpage_url': ''.join(traverse_obj(meta, ('canonical', ('domain', 'path')))),
+ 'title': meta.get('title'),
+ 'formats': formats,
+ 'subtitles': subs,
+ 'thumbnails': thumbs,
+ 'description': meta.get('description'),
+ 'timestamp': unified_timestamp(traverse_obj(meta, 'broadcastDate', ('broadcastDates', 0))),
+ 'release_timestamp': unified_timestamp(meta.get('recordingDate')),
+ 'duration': meta.get('duration') or parse_duration(meta.get('length')),
+ 'tags': meta.get('genre'),
+ 'series': meta.get('showName'),
+ 'series_id': meta.get('showId'),
+ }
diff --git a/yt_dlp/extractor/rudovideo.py b/yt_dlp/extractor/rudovideo.py
new file mode 100644
index 0000000..1b85955
--- /dev/null
+++ b/yt_dlp/extractor/rudovideo.py
@@ -0,0 +1,135 @@
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ determine_ext,
+ js_to_json,
+ traverse_obj,
+ update_url_query,
+ url_or_none,
+)
+
+
+class RudoVideoIE(InfoExtractor):
+ _VALID_URL = r'https?://rudo\.video/(?P<type>vod|podcast|live)/(?P<id>[^/?&#]+)'
+ _EMBED_REGEX = [r'<iframe[^>]+src=[\'"](?P<url>(?:https?:)//rudo\.video/(?:vod|podcast|live)/[^\'"]+)']
+ _TESTS = [{
+ 'url': 'https://rudo.video/podcast/cz2wrUy8l0o',
+ 'md5': '28ed82b477708dc5e12e072da2449221',
+ 'info_dict': {
+ 'id': 'cz2wrUy8l0o',
+ 'title': 'Diego Cabot',
+ 'ext': 'mp4',
+ 'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$',
+ },
+ }, {
+ 'url': 'https://rudo.video/podcast/bQkt07',
+ 'md5': '36b22a9863de0f47f00fc7532a32a898',
+ 'info_dict': {
+ 'id': 'bQkt07',
+ 'title': 'Tubular Bells',
+ 'ext': 'mp4',
+ 'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$',
+ },
+ }, {
+ 'url': 'https://rudo.video/podcast/b42ZUznHX0',
+ 'md5': 'b91c70d832938871367f8ad10c895821',
+ 'info_dict': {
+ 'id': 'b42ZUznHX0',
+ 'title': 'Columna Ruperto Concha',
+ 'ext': 'mp3',
+ 'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$',
+ },
+ }, {
+ 'url': 'https://rudo.video/vod/bN5AaJ',
+ 'md5': '01324a329227e2591530ecb4f555c881',
+ 'info_dict': {
+ 'id': 'bN5AaJ',
+ 'title': 'Ucrania 19.03',
+ 'creator': 'La Tercera',
+ 'ext': 'mp4',
+ 'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$',
+ },
+ }, {
+ 'url': 'https://rudo.video/live/bbtv',
+ 'info_dict': {
+ 'id': 'bbtv',
+ 'ext': 'mp4',
+ 'creator': 'BioBioTV',
+ 'live_status': 'is_live',
+ 'title': r're:^LIVE BBTV\s\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}$',
+ 'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$',
+ },
+ }, {
+ 'url': 'https://rudo.video/live/c13',
+ 'info_dict': {
+ 'id': 'c13',
+ 'title': 'CANAL13',
+ 'ext': 'mp4',
+ },
+ 'skip': 'Geo-restricted to Chile',
+ }, {
+ 'url': 'https://rudo.video/live/t13-13cl',
+ 'info_dict': {
+ 'id': 't13-13cl',
+ 'title': 'T13',
+ 'ext': 'mp4',
+ },
+ 'skip': 'Geo-restricted to Chile',
+ }]
+
+ def _real_extract(self, url):
+ video_id, type_ = self._match_valid_url(url).group('id', 'type')
+ is_live = type_ == 'live'
+
+ webpage = self._download_webpage(url, video_id)
+ if 'Streaming is not available in your area' in webpage:
+ self.raise_geo_restricted()
+
+ media_url = (
+ self._search_regex(
+ r'var\s+streamURL\s*=\s*[\'"]([^?\'"]+)', webpage, 'stream url', default=None)
+ # Source URL must be used only if streamURL is unavailable
+ or self._search_regex(
+ r'<source[^>]+src=[\'"]([^\'"]+)', webpage, 'source url', default=None))
+ if not media_url:
+ youtube_url = self._search_regex(r'file:\s*[\'"]((?:https?:)//(?:www\.)?youtube\.com[^\'"]+)',
+ webpage, 'youtube url', default=None)
+ if youtube_url:
+ return self.url_result(youtube_url, 'Youtube')
+ raise ExtractorError('Unable to extract stream url')
+
+ token_array = self._search_json(
+ r'<script>var\s+_\$_[a-zA-Z0-9]+\s*=', webpage, 'access token array', video_id,
+ contains_pattern=r'\[(?s:.+)\]', default=None, transform_source=js_to_json)
+ if token_array:
+ token_url = traverse_obj(token_array, (..., {url_or_none}), get_all=False)
+ if not token_url:
+ raise ExtractorError('Invalid access token array')
+ access_token = self._download_json(
+ token_url, video_id, note='Downloading access token')['data']['authToken']
+ media_url = update_url_query(media_url, {'auth-token': access_token})
+
+ ext = determine_ext(media_url)
+ if ext == 'm3u8':
+ formats = self._extract_m3u8_formats(media_url, video_id, live=is_live)
+ elif ext == 'mp3':
+ formats = [{
+ 'url': media_url,
+ 'vcodec': 'none',
+ }]
+ else:
+ formats = [{'url': media_url}]
+
+ return {
+ 'id': video_id,
+ 'title': (self._search_regex(r'var\s+titleVideo\s*=\s*[\'"]([^\'"]+)',
+ webpage, 'title', default=None)
+ or self._og_search_title(webpage)),
+ 'creator': self._search_regex(r'var\s+videoAuthor\s*=\s*[\'"]([^?\'"]+)',
+ webpage, 'videoAuthor', default=None),
+ 'thumbnail': (self._search_regex(r'var\s+posterIMG\s*=\s*[\'"]([^?\'"]+)',
+ webpage, 'thumbnail', default=None)
+ or self._og_search_thumbnail(webpage)),
+ 'formats': formats,
+ 'is_live': is_live,
+ }
diff --git a/yt_dlp/extractor/rule34video.py b/yt_dlp/extractor/rule34video.py
new file mode 100644
index 0000000..11095b2
--- /dev/null
+++ b/yt_dlp/extractor/rule34video.py
@@ -0,0 +1,123 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ extract_attributes,
+ get_element_by_attribute,
+ get_element_by_class,
+ get_element_html_by_class,
+ get_elements_by_class,
+ int_or_none,
+ parse_count,
+ parse_duration,
+ unescapeHTML,
+)
+from ..utils.traversal import traverse_obj
+
+
+class Rule34VideoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?rule34video\.com/videos?/(?P<id>\d+)'
+ _TESTS = [
+ {
+ 'url': 'https://rule34video.com/video/3065157/shot-it-mmd-hmv/',
+ 'md5': 'ffccac2c23799dabbd192621ae4d04f3',
+ 'info_dict': {
+ 'id': '3065157',
+ 'ext': 'mp4',
+ 'title': 'Shot It-(mmd hmv)',
+ 'thumbnail': 'https://rule34video.com/contents/videos_screenshots/3065000/3065157/preview.jpg',
+ 'duration': 347.0,
+ 'age_limit': 18,
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'timestamp': 1639872000,
+ 'description': 'https://discord.gg/aBqPrHSHvv',
+ 'upload_date': '20211219',
+ 'uploader': 'Sweet HMV',
+ 'uploader_url': 'https://rule34video.com/members/22119/',
+ 'categories': ['3D', 'MMD', 'iwara'],
+ 'tags': 'mincount:10'
+ }
+ },
+ {
+ 'url': 'https://rule34video.com/videos/3065296/lara-in-trouble-ep-7-wildeerstudio/',
+ 'md5': '6bb5169f9f6b38cd70882bf2e64f6b86',
+ 'info_dict': {
+ 'id': '3065296',
+ 'ext': 'mp4',
+ 'title': 'Lara in Trouble Ep. 7 [WildeerStudio]',
+ 'thumbnail': 'https://rule34video.com/contents/videos_screenshots/3065000/3065296/preview.jpg',
+ 'duration': 938.0,
+ 'age_limit': 18,
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'timestamp': 1640131200,
+ 'description': '',
+ 'creators': ['WildeerStudio'],
+ 'upload_date': '20211222',
+ 'uploader': 'CerZule',
+ 'uploader_url': 'https://rule34video.com/members/36281/',
+ 'categories': ['3D', 'Tomb Raider'],
+ 'tags': 'mincount:40'
+ }
+ },
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ formats = []
+
+ for mobj in re.finditer(r'<a[^>]+href="(?P<video_url>[^"]+download=true[^"]+)".*>(?P<ext>[^\s]+) (?P<quality>[^<]+)p</a>', webpage):
+ url, ext, quality = mobj.groups()
+ formats.append({
+ 'url': url,
+ 'ext': ext.lower(),
+ 'quality': quality,
+ })
+
+ categories, creators, uploader, uploader_url = [None] * 4
+ for col in get_elements_by_class('col', webpage):
+ label = clean_html(get_element_by_class('label', col))
+ if label == 'Categories:':
+ categories = list(map(clean_html, get_elements_by_class('item', col)))
+ elif label == 'Artist:':
+ creators = list(map(clean_html, get_elements_by_class('item', col)))
+ elif label == 'Uploaded By:':
+ uploader = clean_html(get_element_by_class('name', col))
+ uploader_url = extract_attributes(get_element_html_by_class('name', col) or '').get('href')
+
+ return {
+ **traverse_obj(self._search_json_ld(webpage, video_id, default={}), ({
+ 'title': 'title',
+ 'view_count': 'view_count',
+ 'like_count': 'like_count',
+ 'duration': 'duration',
+ 'timestamp': 'timestamp',
+ 'description': 'description',
+ 'thumbnail': ('thumbnails', 0, 'url'),
+ })),
+ 'id': video_id,
+ 'formats': formats,
+ 'title': self._html_extract_title(webpage),
+ 'thumbnail': self._html_search_regex(
+ r'preview_url:\s+\'([^\']+)\'', webpage, 'thumbnail', default=None),
+ 'duration': parse_duration(self._html_search_regex(
+ r'"icon-clock"></i>\s+<span>((?:\d+:?)+)', webpage, 'duration', default=None)),
+ 'view_count': int_or_none(self._html_search_regex(
+ r'"icon-eye"></i>\s+<span>([ \d]+)', webpage, 'views', default='').replace(' ', '')),
+ 'like_count': parse_count(get_element_by_class('voters count', webpage)),
+ 'comment_count': int_or_none(self._search_regex(
+ r'[^(]+\((\d+)\)', get_element_by_attribute('href', '#tab_comments', webpage), 'comment count', fatal=False)),
+ 'age_limit': 18,
+ 'creators': creators,
+ 'uploader': uploader,
+ 'uploader_url': uploader_url,
+ 'categories': categories,
+ 'tags': list(map(unescapeHTML, re.findall(
+ r'<a class="tag_item"[^>]+\bhref="https://rule34video\.com/tags/\d+/"[^>]*>(?P<tag>[^>]*)</a>', webpage))),
+ }
diff --git a/yt_dlp/extractor/rumble.py b/yt_dlp/extractor/rumble.py
new file mode 100644
index 0000000..837a324
--- /dev/null
+++ b/yt_dlp/extractor/rumble.py
@@ -0,0 +1,390 @@
+import itertools
+import re
+
+from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ ExtractorError,
+ UnsupportedError,
+ clean_html,
+ determine_ext,
+ format_field,
+ get_element_by_class,
+ int_or_none,
+ join_nonempty,
+ parse_count,
+ parse_iso8601,
+ traverse_obj,
+ unescapeHTML,
+)
+
+
+class RumbleEmbedIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?rumble\.com/embed/(?:[0-9a-z]+\.)?(?P<id>[0-9a-z]+)'
+ _EMBED_REGEX = [fr'(?:<(?:script|iframe)[^>]+\bsrc=|["\']embedUrl["\']\s*:\s*)["\'](?P<url>{_VALID_URL})']
+ _TESTS = [{
+ 'url': 'https://rumble.com/embed/v5pv5f',
+ 'md5': '36a18a049856720189f30977ccbb2c34',
+ 'info_dict': {
+ 'id': 'v5pv5f',
+ 'ext': 'mp4',
+ 'title': 'WMAR 2 News Latest Headlines | October 20, 6pm',
+ 'timestamp': 1571611968,
+ 'upload_date': '20191020',
+ 'channel_url': 'https://rumble.com/c/WMAR',
+ 'channel': 'WMAR',
+ 'thumbnail': 'https://sp.rmbl.ws/s8/1/5/M/z/1/5Mz1a.qR4e-small-WMAR-2-News-Latest-Headline.jpg',
+ 'duration': 234,
+ 'uploader': 'WMAR',
+ 'live_status': 'not_live',
+ }
+ }, {
+ 'url': 'https://rumble.com/embed/vslb7v',
+ 'md5': '7418035de1a30a178b8af34dc2b6a52b',
+ 'info_dict': {
+ 'id': 'vslb7v',
+ 'ext': 'mp4',
+ 'title': 'Defense Sec. says US Commitment to NATO Defense \'Ironclad\'',
+ 'timestamp': 1645142135,
+ 'upload_date': '20220217',
+ 'channel_url': 'https://rumble.com/c/CyberTechNews',
+ 'channel': 'CTNews',
+ 'thumbnail': 'https://sp.rmbl.ws/s8/6/7/i/9/h/7i9hd.OvCc.jpg',
+ 'duration': 901,
+ 'uploader': 'CTNews',
+ 'live_status': 'not_live',
+ }
+ }, {
+ 'url': 'https://rumble.com/embed/vunh1h',
+ 'info_dict': {
+ 'id': 'vunh1h',
+ 'ext': 'mp4',
+ 'title': '‘Gideon, op zoek naar de waarheid’ including ENG SUBS',
+ 'timestamp': 1647197663,
+ 'upload_date': '20220313',
+ 'channel_url': 'https://rumble.com/user/BLCKBX',
+ 'channel': 'BLCKBX',
+ 'thumbnail': r're:https://.+\.jpg',
+ 'duration': 5069,
+ 'uploader': 'BLCKBX',
+ 'live_status': 'not_live',
+ 'subtitles': {
+ 'en': [
+ {
+ 'url': r're:https://.+\.vtt',
+ 'name': 'English',
+ 'ext': 'vtt'
+ }
+ ]
+ },
+ },
+ 'params': {'skip_download': True}
+ }, {
+ 'url': 'https://rumble.com/embed/v1essrt',
+ 'info_dict': {
+ 'id': 'v1essrt',
+ 'ext': 'mp4',
+ 'title': 'startswith:lofi hip hop radio 📚 - beats to relax/study to',
+ 'timestamp': 1661519399,
+ 'upload_date': '20220826',
+ 'channel_url': 'https://rumble.com/c/LofiGirl',
+ 'channel': 'Lofi Girl',
+ 'thumbnail': r're:https://.+\.jpg',
+ 'uploader': 'Lofi Girl',
+ 'live_status': 'is_live',
+ },
+ 'params': {'skip_download': True}
+ }, {
+ 'url': 'https://rumble.com/embed/v1amumr',
+ 'info_dict': {
+ 'id': 'v1amumr',
+ 'ext': 'mp4',
+ 'fps': 60,
+ 'title': 'Turning Point USA 2022 Student Action Summit DAY 1 - Rumble Exclusive Live',
+ 'timestamp': 1658518457,
+ 'upload_date': '20220722',
+ 'channel_url': 'https://rumble.com/c/RumbleEvents',
+ 'channel': 'Rumble Events',
+ 'thumbnail': r're:https://.+\.jpg',
+ 'duration': 16427,
+ 'uploader': 'Rumble Events',
+ 'live_status': 'was_live',
+ },
+ 'params': {'skip_download': True}
+ }, {
+ 'url': 'https://rumble.com/embed/ufe9n.v5pv5f',
+ 'only_matching': True,
+ }]
+
+ _WEBPAGE_TESTS = [
+ {
+ 'note': 'Rumble JS embed',
+ 'url': 'https://therightscoop.com/what-does-9-plus-1-plus-1-equal-listen-to-this-audio-of-attempted-kavanaugh-assassins-call-and-youll-get-it',
+ 'md5': '4701209ac99095592e73dbba21889690',
+ 'info_dict': {
+ 'id': 'v15eqxl',
+ 'ext': 'mp4',
+ 'channel': 'Mr Producer Media',
+ 'duration': 92,
+ 'title': '911 Audio From The Man Who Wanted To Kill Supreme Court Justice Kavanaugh',
+ 'channel_url': 'https://rumble.com/c/RichSementa',
+ 'thumbnail': 'https://sp.rmbl.ws/s8/1/P/j/f/A/PjfAe.qR4e-small-911-Audio-From-The-Man-Who-.jpg',
+ 'timestamp': 1654892716,
+ 'uploader': 'Mr Producer Media',
+ 'upload_date': '20220610',
+ 'live_status': 'not_live',
+ }
+ },
+ ]
+
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ embeds = tuple(super()._extract_embed_urls(url, webpage))
+ if embeds:
+ return embeds
+ return [f'https://rumble.com/embed/{mobj.group("id")}' for mobj in re.finditer(
+ r'<script>[^<]*\bRumble\(\s*"play"\s*,\s*{[^}]*[\'"]?video[\'"]?\s*:\s*[\'"](?P<id>[0-9a-z]+)[\'"]', webpage)]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video = self._download_json(
+ 'https://rumble.com/embedJS/u3/', video_id,
+ query={'request': 'video', 'ver': 2, 'v': video_id})
+
+ sys_msg = traverse_obj(video, ('sys', 'msg'))
+ if sys_msg:
+ self.report_warning(sys_msg, video_id=video_id)
+
+ if video.get('live') == 0:
+ live_status = 'not_live' if video.get('livestream_has_dvr') is None else 'was_live'
+ elif video.get('live') == 1:
+ live_status = 'is_upcoming' if video.get('livestream_has_dvr') else 'was_live'
+ elif video.get('live') == 2:
+ live_status = 'is_live'
+ else:
+ live_status = None
+
+ formats = []
+ for ext, ext_info in (video.get('ua') or {}).items():
+ if isinstance(ext_info, dict):
+ for height, video_info in ext_info.items():
+ if not traverse_obj(video_info, ('meta', 'h', {int_or_none})):
+ video_info.setdefault('meta', {})['h'] = height
+ ext_info = ext_info.values()
+
+ for video_info in ext_info:
+ meta = video_info.get('meta') or {}
+ if not video_info.get('url'):
+ continue
+ if ext == 'hls':
+ if meta.get('live') is True and video.get('live') == 1:
+ live_status = 'post_live'
+ formats.extend(self._extract_m3u8_formats(
+ video_info['url'], video_id,
+ ext='mp4', m3u8_id='hls', fatal=False, live=live_status == 'is_live'))
+ continue
+ timeline = ext == 'timeline'
+ if timeline:
+ ext = determine_ext(video_info['url'])
+ formats.append({
+ 'ext': ext,
+ 'acodec': 'none' if timeline else None,
+ 'url': video_info['url'],
+ 'format_id': join_nonempty(ext, format_field(meta, 'h', '%sp')),
+ 'format_note': 'Timeline' if timeline else None,
+ 'fps': None if timeline else video.get('fps'),
+ **traverse_obj(meta, {
+ 'tbr': 'bitrate',
+ 'filesize': 'size',
+ 'width': 'w',
+ 'height': 'h',
+ }, expected_type=lambda x: int(x) or None)
+ })
+
+ subtitles = {
+ lang: [{
+ 'url': sub_info['path'],
+ 'name': sub_info.get('language') or '',
+ }] for lang, sub_info in (video.get('cc') or {}).items() if sub_info.get('path')
+ }
+
+ author = video.get('author') or {}
+ thumbnails = traverse_obj(video, ('t', ..., {'url': 'i', 'width': 'w', 'height': 'h'}))
+ if not thumbnails and video.get('i'):
+ thumbnails = [{'url': video['i']}]
+
+ if live_status in {'is_live', 'post_live'}:
+ duration = None
+ else:
+ duration = int_or_none(video.get('duration'))
+
+ return {
+ 'id': video_id,
+ 'title': unescapeHTML(video.get('title')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnails': thumbnails,
+ 'timestamp': parse_iso8601(video.get('pubDate')),
+ 'channel': author.get('name'),
+ 'channel_url': author.get('url'),
+ 'duration': duration,
+ 'uploader': author.get('name'),
+ 'live_status': live_status,
+ }
+
+
+class RumbleIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?rumble\.com/(?P<id>v(?!ideos)[\w.-]+)[^/]*$'
+ _EMBED_REGEX = [
+ r'<a class=video-item--a href=(?P<url>/v[\w.-]+\.html)>',
+ r'<a[^>]+class="videostream__link link"[^>]+href=(?P<url>/v[\w.-]+\.html)[^>]*>']
+ _TESTS = [{
+ 'add_ie': ['RumbleEmbed'],
+ 'url': 'https://rumble.com/vdmum1-moose-the-dog-helps-girls-dig-a-snow-fort.html',
+ 'md5': '53af34098a7f92c4e51cf0bd1c33f009',
+ 'info_dict': {
+ 'id': 'vb0ofn',
+ 'ext': 'mp4',
+ 'timestamp': 1612662578,
+ 'uploader': 'LovingMontana',
+ 'channel': 'LovingMontana',
+ 'upload_date': '20210207',
+ 'title': 'Winter-loving dog helps girls dig a snow fort ',
+ 'description': 'Moose the dog is more than happy to help with digging out this epic snow fort. Great job, Moose!',
+ 'channel_url': 'https://rumble.com/c/c-546523',
+ 'thumbnail': r're:https://.+\.jpg',
+ 'duration': 103,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'view_count': int,
+ 'live_status': 'not_live',
+ }
+ }, {
+ 'url': 'http://www.rumble.com/vDMUM1?key=value',
+ 'only_matching': True,
+ }, {
+ 'note': 'timeline format',
+ 'url': 'https://rumble.com/v2ea9qb-the-u.s.-cannot-hide-this-in-ukraine-anymore-redacted-with-natali-and-clayt.html',
+ 'md5': '40d61fec6c0945bca3d0e1dc1aa53d79',
+ 'params': {'format': 'wv'},
+ 'info_dict': {
+ 'id': 'v2bou5f',
+ 'ext': 'mp4',
+ 'uploader': 'Redacted News',
+ 'upload_date': '20230322',
+ 'timestamp': 1679445010,
+ 'title': 'The U.S. CANNOT hide this in Ukraine anymore | Redacted with Natali and Clayton Morris',
+ 'duration': 892,
+ 'channel': 'Redacted News',
+ 'description': 'md5:aaad0c5c3426d7a361c29bdaaced7c42',
+ 'channel_url': 'https://rumble.com/c/Redacted',
+ 'live_status': 'not_live',
+ 'thumbnail': 'https://sp.rmbl.ws/s8/1/d/x/2/O/dx2Oi.qR4e-small-The-U.S.-CANNOT-hide-this-i.jpg',
+ 'like_count': int,
+ 'dislike_count': int,
+ 'view_count': int,
+ },
+ }, {
+ 'url': 'https://rumble.com/v2e7fju-the-covid-twitter-files-drop-protecting-fauci-while-censoring-the-truth-wma.html',
+ 'info_dict': {
+ 'id': 'v2blzyy',
+ 'ext': 'mp4',
+ 'live_status': 'was_live',
+ 'release_timestamp': 1679446804,
+ 'description': 'md5:2ac4908ccfecfb921f8ffa4b30c1e636',
+ 'release_date': '20230322',
+ 'timestamp': 1679445692,
+ 'duration': 4435,
+ 'upload_date': '20230322',
+ 'title': 'The Covid Twitter Files Drop: Protecting Fauci While Censoring The Truth w/Matt Taibbi',
+ 'uploader': 'Kim Iversen',
+ 'channel_url': 'https://rumble.com/c/KimIversen',
+ 'channel': 'Kim Iversen',
+ 'thumbnail': 'https://sp.rmbl.ws/s8/1/6/b/w/O/6bwOi.qR4e-small-The-Covid-Twitter-Files-Dro.jpg',
+ 'like_count': int,
+ 'dislike_count': int,
+ 'view_count': int,
+ },
+ }]
+
+ _WEBPAGE_TESTS = [{
+ 'url': 'https://rumble.com/videos?page=2',
+ 'playlist_mincount': 24,
+ 'info_dict': {
+ 'id': 'videos?page=2',
+ 'title': 'All videos',
+ 'description': 'Browse videos uploaded to Rumble.com',
+ 'age_limit': 0,
+ },
+ }, {
+ 'url': 'https://rumble.com/browse/live',
+ 'playlist_mincount': 25,
+ 'info_dict': {
+ 'id': 'live',
+ 'title': 'Browse',
+ 'age_limit': 0,
+ },
+ }, {
+ 'url': 'https://rumble.com/search/video?q=rumble&sort=views',
+ 'playlist_mincount': 24,
+ 'info_dict': {
+ 'id': 'video?q=rumble&sort=views',
+ 'title': 'Search results for: rumble',
+ 'age_limit': 0,
+ },
+ }]
+
+ def _real_extract(self, url):
+ page_id = self._match_id(url)
+ webpage = self._download_webpage(url, page_id)
+ url_info = next(RumbleEmbedIE.extract_from_webpage(self._downloader, url, webpage), None)
+ if not url_info:
+ raise UnsupportedError(url)
+
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': url_info['ie_key'],
+ 'url': url_info['url'],
+ 'release_timestamp': parse_iso8601(self._search_regex(
+ r'(?:Livestream begins|Streamed on):\s+<time datetime="([^"]+)', webpage, 'release date', default=None)),
+ 'view_count': int_or_none(self._search_regex(
+ r'"userInteractionCount"\s*:\s*(\d+)', webpage, 'view count', default=None)),
+ 'like_count': parse_count(self._search_regex(
+ r'<span data-js="rumbles_up_votes">\s*([\d,.KM]+)', webpage, 'like count', default=None)),
+ 'dislike_count': parse_count(self._search_regex(
+ r'<span data-js="rumbles_down_votes">\s*([\d,.KM]+)', webpage, 'dislike count', default=None)),
+ 'description': clean_html(get_element_by_class('media-description', webpage))
+ }
+
+
+class RumbleChannelIE(InfoExtractor):
+ _VALID_URL = r'(?P<url>https?://(?:www\.)?rumble\.com/(?:c|user)/(?P<id>[^&?#$/]+))'
+
+ _TESTS = [{
+ 'url': 'https://rumble.com/c/Styxhexenhammer666',
+ 'playlist_mincount': 1160,
+ 'info_dict': {
+ 'id': 'Styxhexenhammer666',
+ },
+ }, {
+ 'url': 'https://rumble.com/user/goldenpoodleharleyeuna',
+ 'playlist_mincount': 4,
+ 'info_dict': {
+ 'id': 'goldenpoodleharleyeuna',
+ },
+ }]
+
+ def entries(self, url, playlist_id):
+ for page in itertools.count(1):
+ try:
+ webpage = self._download_webpage(f'{url}?page={page}', playlist_id, note='Downloading page %d' % page)
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 404:
+ break
+ raise
+ for video_url in re.findall(r'class="[^>"]*videostream__link[^>]+href="([^"]+\.html)"', webpage):
+ yield self.url_result('https://rumble.com' + video_url)
+
+ def _real_extract(self, url):
+ url, playlist_id = self._match_valid_url(url).groups()
+ return self.playlist_result(self.entries(url, playlist_id), playlist_id=playlist_id)
diff --git a/yt_dlp/extractor/rutube.py b/yt_dlp/extractor/rutube.py
new file mode 100644
index 0000000..287824d
--- /dev/null
+++ b/yt_dlp/extractor/rutube.py
@@ -0,0 +1,365 @@
+import itertools
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+)
+from ..utils import (
+ determine_ext,
+ bool_or_none,
+ int_or_none,
+ parse_qs,
+ try_get,
+ unified_timestamp,
+ url_or_none,
+)
+
+
+class RutubeBaseIE(InfoExtractor):
+ def _download_api_info(self, video_id, query=None):
+ if not query:
+ query = {}
+ query['format'] = 'json'
+ return self._download_json(
+ 'http://rutube.ru/api/video/%s/' % video_id,
+ video_id, 'Downloading video JSON',
+ 'Unable to download video JSON', query=query)
+
+ def _extract_info(self, video, video_id=None, require_title=True):
+ title = video['title'] if require_title else video.get('title')
+
+ age_limit = video.get('is_adult')
+ if age_limit is not None:
+ age_limit = 18 if age_limit is True else 0
+
+ uploader_id = try_get(video, lambda x: x['author']['id'])
+ category = try_get(video, lambda x: x['category']['name'])
+ description = video.get('description')
+ duration = int_or_none(video.get('duration'))
+
+ return {
+ 'id': video.get('id') or video_id if video_id else video['id'],
+ 'title': title,
+ 'description': description,
+ 'thumbnail': video.get('thumbnail_url'),
+ 'duration': duration,
+ 'uploader': try_get(video, lambda x: x['author']['name']),
+ 'uploader_id': compat_str(uploader_id) if uploader_id else None,
+ 'timestamp': unified_timestamp(video.get('created_ts')),
+ 'categories': [category] if category else None,
+ 'age_limit': age_limit,
+ 'view_count': int_or_none(video.get('hits')),
+ 'comment_count': int_or_none(video.get('comments_count')),
+ 'is_live': bool_or_none(video.get('is_livestream')),
+ 'chapters': self._extract_chapters_from_description(description, duration),
+ }
+
+ def _download_and_extract_info(self, video_id, query=None):
+ return self._extract_info(
+ self._download_api_info(video_id, query=query), video_id)
+
+ def _download_api_options(self, video_id, query=None):
+ if not query:
+ query = {}
+ query['format'] = 'json'
+ return self._download_json(
+ 'http://rutube.ru/api/play/options/%s/' % video_id,
+ video_id, 'Downloading options JSON',
+ 'Unable to download options JSON',
+ headers=self.geo_verification_headers(), query=query)
+
+ def _extract_formats(self, options, video_id):
+ formats = []
+ for format_id, format_url in options['video_balancer'].items():
+ ext = determine_ext(format_url)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
+ elif ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ format_url, video_id, f4m_id=format_id, fatal=False))
+ else:
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_id,
+ })
+ return formats
+
+ def _download_and_extract_formats(self, video_id, query=None):
+ return self._extract_formats(
+ self._download_api_options(video_id, query=query), video_id)
+
+
+class RutubeIE(RutubeBaseIE):
+ IE_NAME = 'rutube'
+ IE_DESC = 'Rutube videos'
+ _VALID_URL = r'https?://rutube\.ru/(?:video(?:/private)?|(?:play/)?embed)/(?P<id>[\da-z]{32})'
+ _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//rutube\.ru/(?:play/)?embed/[\da-z]{32}.*?)\1']
+
+ _TESTS = [{
+ 'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
+ 'md5': 'e33ac625efca66aba86cbec9851f2692',
+ 'info_dict': {
+ 'id': '3eac3b4561676c17df9132a9a1e62e3e',
+ 'ext': 'mp4',
+ 'title': 'Раненный кенгуру забежал в аптеку',
+ 'description': 'http://www.ntdtv.ru ',
+ 'duration': 81,
+ 'uploader': 'NTDRussian',
+ 'uploader_id': '29790',
+ 'timestamp': 1381943602,
+ 'upload_date': '20131016',
+ 'age_limit': 0,
+ 'view_count': int,
+ 'thumbnail': 'http://pic.rutubelist.ru/video/d2/a0/d2a0aec998494a396deafc7ba2c82add.jpg',
+ 'categories': ['Новости и СМИ'],
+ 'chapters': [],
+ },
+ 'expected_warnings': ['Unable to download f4m'],
+ }, {
+ 'url': 'http://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://rutube.ru/embed/a10e53b86e8f349080f718582ce4c661',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/?pl_id=4252',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_type=source',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://rutube.ru/video/private/884fb55f07a97ab673c7d654553e0f48/?p=x2QojCumHTS3rsKHWXN8Lg',
+ 'md5': 'd106225f15d625538fe22971158e896f',
+ 'info_dict': {
+ 'id': '884fb55f07a97ab673c7d654553e0f48',
+ 'ext': 'mp4',
+ 'title': 'Яцуноками, Nioh2',
+ 'description': 'Nioh2: финал сражения с боссом Яцуноками',
+ 'duration': 15,
+ 'uploader': 'mexus',
+ 'uploader_id': '24222106',
+ 'timestamp': 1670646232,
+ 'upload_date': '20221210',
+ 'age_limit': 0,
+ 'view_count': int,
+ 'thumbnail': 'http://pic.rutubelist.ru/video/f2/d4/f2d42b54be0a6e69c1c22539e3152156.jpg',
+ 'categories': ['Видеоигры'],
+ 'chapters': [],
+ },
+ 'expected_warnings': ['Unable to download f4m'],
+ }, {
+ 'url': 'https://rutube.ru/video/c65b465ad0c98c89f3b25cb03dcc87c6/',
+ 'info_dict': {
+ 'id': 'c65b465ad0c98c89f3b25cb03dcc87c6',
+ 'ext': 'mp4',
+ 'chapters': 'count:4',
+ 'categories': ['Бизнес и предпринимательство'],
+ 'description': 'md5:252feac1305257d8c1bab215cedde75d',
+ 'thumbnail': 'http://pic.rutubelist.ru/video/71/8f/718f27425ea9706073eb80883dd3787b.png',
+ 'duration': 782,
+ 'age_limit': 0,
+ 'uploader_id': '23491359',
+ 'timestamp': 1677153329,
+ 'view_count': int,
+ 'upload_date': '20230223',
+ 'title': 'Бизнес с нуля: найм сотрудников. Интервью с директором строительной компании',
+ 'uploader': 'Стас Быков',
+ },
+ 'expected_warnings': ['Unable to download f4m'],
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if RutubePlaylistIE.suitable(url) else super(RutubeIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ query = parse_qs(url)
+ info = self._download_and_extract_info(video_id, query)
+ info['formats'] = self._download_and_extract_formats(video_id, query)
+ return info
+
+
+class RutubeEmbedIE(RutubeBaseIE):
+ IE_NAME = 'rutube:embed'
+ IE_DESC = 'Rutube embedded videos'
+ _VALID_URL = r'https?://rutube\.ru/(?:video|play)/embed/(?P<id>[0-9]+)'
+
+ _TESTS = [{
+ 'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=',
+ 'info_dict': {
+ 'id': 'a10e53b86e8f349080f718582ce4c661',
+ 'ext': 'mp4',
+ 'timestamp': 1387830582,
+ 'upload_date': '20131223',
+ 'uploader_id': '297833',
+ 'description': 'Видео группы ★http://vk.com/foxkidsreset★ музей Fox Kids и Jetix<br/><br/> восстановлено и сделано в шикоформате subziro89 http://vk.com/subziro89',
+ 'uploader': 'subziro89 ILya',
+ 'title': 'Мистический городок Эйри в Индиан 5 серия озвучка subziro89',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://rutube.ru/play/embed/8083783',
+ 'only_matching': True,
+ }, {
+ # private video
+ 'url': 'https://rutube.ru/play/embed/10631925?p=IbAigKqWd1do4mjaM5XLIQ',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ embed_id = self._match_id(url)
+ # Query may contain private videos token and should be passed to API
+ # requests (see #19163)
+ query = parse_qs(url)
+ options = self._download_api_options(embed_id, query)
+ video_id = options['effective_video']
+ formats = self._extract_formats(options, video_id)
+ info = self._download_and_extract_info(video_id, query)
+ info.update({
+ 'extractor_key': 'Rutube',
+ 'formats': formats,
+ })
+ return info
+
+
+class RutubePlaylistBaseIE(RutubeBaseIE):
+ def _next_page_url(self, page_num, playlist_id, *args, **kwargs):
+ return self._PAGE_TEMPLATE % (playlist_id, page_num)
+
+ def _entries(self, playlist_id, *args, **kwargs):
+ next_page_url = None
+ for pagenum in itertools.count(1):
+ page = self._download_json(
+ next_page_url or self._next_page_url(
+ pagenum, playlist_id, *args, **kwargs),
+ playlist_id, 'Downloading page %s' % pagenum)
+
+ results = page.get('results')
+ if not results or not isinstance(results, list):
+ break
+
+ for result in results:
+ video_url = url_or_none(result.get('video_url'))
+ if not video_url:
+ continue
+ entry = self._extract_info(result, require_title=False)
+ entry.update({
+ '_type': 'url',
+ 'url': video_url,
+ 'ie_key': RutubeIE.ie_key(),
+ })
+ yield entry
+
+ next_page_url = page.get('next')
+ if not next_page_url or not page.get('has_next'):
+ break
+
+ def _extract_playlist(self, playlist_id, *args, **kwargs):
+ return self.playlist_result(
+ self._entries(playlist_id, *args, **kwargs),
+ playlist_id, kwargs.get('playlist_name'))
+
+ def _real_extract(self, url):
+ return self._extract_playlist(self._match_id(url))
+
+
+class RutubeTagsIE(RutubePlaylistBaseIE):
+ IE_NAME = 'rutube:tags'
+ IE_DESC = 'Rutube tags'
+ _VALID_URL = r'https?://rutube\.ru/tags/video/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://rutube.ru/tags/video/1800/',
+ 'info_dict': {
+ 'id': '1800',
+ },
+ 'playlist_mincount': 68,
+ }]
+
+ _PAGE_TEMPLATE = 'http://rutube.ru/api/tags/video/%s/?page=%s&format=json'
+
+
+class RutubeMovieIE(RutubePlaylistBaseIE):
+ IE_NAME = 'rutube:movie'
+ IE_DESC = 'Rutube movies'
+ _VALID_URL = r'https?://rutube\.ru/metainfo/tv/(?P<id>\d+)'
+
+ _MOVIE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/?format=json'
+ _PAGE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/video?page=%s&format=json'
+
+ def _real_extract(self, url):
+ movie_id = self._match_id(url)
+ movie = self._download_json(
+ self._MOVIE_TEMPLATE % movie_id, movie_id,
+ 'Downloading movie JSON')
+ return self._extract_playlist(
+ movie_id, playlist_name=movie.get('name'))
+
+
+class RutubePersonIE(RutubePlaylistBaseIE):
+ IE_NAME = 'rutube:person'
+ IE_DESC = 'Rutube person videos'
+ _VALID_URL = r'https?://rutube\.ru/video/person/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://rutube.ru/video/person/313878/',
+ 'info_dict': {
+ 'id': '313878',
+ },
+ 'playlist_mincount': 37,
+ }]
+
+ _PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json'
+
+
+class RutubePlaylistIE(RutubePlaylistBaseIE):
+ IE_NAME = 'rutube:playlist'
+ IE_DESC = 'Rutube playlists'
+ _VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/[\da-z]{32}/\?.*?\bpl_id=(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://rutube.ru/video/cecd58ed7d531fc0f3d795d51cee9026/?pl_id=3097&pl_type=tag',
+ 'info_dict': {
+ 'id': '3097',
+ },
+ 'playlist_count': 27,
+ }, {
+ 'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_id=4252&pl_type=source',
+ 'only_matching': True,
+ }]
+
+ _PAGE_TEMPLATE = 'http://rutube.ru/api/playlist/%s/%s/?page=%s&format=json'
+
+ @classmethod
+ def suitable(cls, url):
+ from ..utils import int_or_none, parse_qs
+
+ if not super(RutubePlaylistIE, cls).suitable(url):
+ return False
+ params = parse_qs(url)
+ return params.get('pl_type', [None])[0] and int_or_none(params.get('pl_id', [None])[0])
+
+ def _next_page_url(self, page_num, playlist_id, item_kind):
+ return self._PAGE_TEMPLATE % (item_kind, playlist_id, page_num)
+
+ def _real_extract(self, url):
+ qs = parse_qs(url)
+ playlist_kind = qs['pl_type'][0]
+ playlist_id = qs['pl_id'][0]
+ return self._extract_playlist(playlist_id, item_kind=playlist_kind)
+
+
+class RutubeChannelIE(RutubePlaylistBaseIE):
+ IE_NAME = 'rutube:channel'
+ IE_DESC = 'Rutube channel'
+ _VALID_URL = r'https?://rutube\.ru/channel/(?P<id>\d+)/videos'
+ _TESTS = [{
+ 'url': 'https://rutube.ru/channel/639184/videos/',
+ 'info_dict': {
+ 'id': '639184',
+ },
+ 'playlist_mincount': 133,
+ }]
+
+ _PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json'
diff --git a/yt_dlp/extractor/rutv.py b/yt_dlp/extractor/rutv.py
new file mode 100644
index 0000000..d7f9a73
--- /dev/null
+++ b/yt_dlp/extractor/rutv.py
@@ -0,0 +1,203 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ str_to_int
+)
+
+
+class RUTVIE(InfoExtractor):
+ IE_DESC = 'RUTV.RU'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:test)?player\.(?:rutv\.ru|vgtrk\.com)/
+ (?P<path>
+ flash\d+v/container\.swf\?id=|
+ iframe/(?P<type>swf|video|live)/id/|
+ index/iframe/cast_id/
+ )
+ (?P<id>\d+)
+ '''
+ _EMBED_URLS = [
+ r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:test)?player\.(?:rutv\.ru|vgtrk\.com)/(?:iframe/(?:swf|video|live)/id|index/iframe/cast_id)/.+?)\1',
+ r'<meta[^>]+?property=(["\'])og:video\1[^>]+?content=(["\'])(?P<url>https?://(?:test)?player\.(?:rutv\.ru|vgtrk\.com)/flash\d+v/container\.swf\?id=.+?\2)',
+ ]
+
+ _TESTS = [
+ {
+ 'url': 'http://player.rutv.ru/flash2v/container.swf?id=774471&sid=kultura&fbv=true&isPlay=true&ssl=false&i=560&acc_video_id=episode_id/972347/video_id/978186/brand_id/31724',
+ 'info_dict': {
+ 'id': '774471',
+ 'ext': 'mp4',
+ 'title': 'Монологи на все времена',
+ 'description': 'md5:18d8b5e6a41fb1faa53819471852d5d5',
+ 'duration': 2906,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'https://player.vgtrk.com/flash2v/container.swf?id=774016&sid=russiatv&fbv=true&isPlay=true&ssl=false&i=560&acc_video_id=episode_id/972098/video_id/977760/brand_id/57638',
+ 'info_dict': {
+ 'id': '774016',
+ 'ext': 'mp4',
+ 'title': 'Чужой в семье Сталина',
+ 'description': '',
+ 'duration': 2539,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://player.rutv.ru/iframe/swf/id/766888/sid/hitech/?acc_video_id=4000',
+ 'info_dict': {
+ 'id': '766888',
+ 'ext': 'mp4',
+ 'title': 'Вести.net: интернет-гиганты начали перетягивание программных "одеял"',
+ 'description': 'md5:65ddd47f9830c4f42ed6475f8730c995',
+ 'duration': 279,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://player.rutv.ru/iframe/video/id/771852/start_zoom/true/showZoomBtn/false/sid/russiatv/?acc_video_id=episode_id/970443/video_id/975648/brand_id/5169',
+ 'info_dict': {
+ 'id': '771852',
+ 'ext': 'mp4',
+ 'title': 'Прямой эфир. Жертвы загадочной болезни: смерть от старости в 17 лет',
+ 'description': 'md5:b81c8c55247a4bd996b43ce17395b2d8',
+ 'duration': 3096,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://player.rutv.ru/iframe/live/id/51499/showZoomBtn/false/isPlay/true/sid/sochi2014',
+ 'info_dict': {
+ 'id': '51499',
+ 'ext': 'flv',
+ 'title': 'Сочи-2014. Биатлон. Индивидуальная гонка. Мужчины ',
+ 'description': 'md5:9e0ed5c9d2fa1efbfdfed90c9a6d179c',
+ },
+ 'skip': 'Translation has finished',
+ },
+ {
+ 'url': 'http://player.rutv.ru/iframe/live/id/21/showZoomBtn/false/isPlay/true/',
+ 'info_dict': {
+ 'id': '21',
+ 'ext': 'mp4',
+ 'title': 're:^Россия 24. Прямой эфир [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'is_live': True,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'https://testplayer.vgtrk.com/iframe/live/id/19201/showZoomBtn/false/isPlay/true/',
+ 'only_matching': True,
+ },
+ ]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id')
+ video_path = mobj.group('path')
+
+ if re.match(r'flash\d+v', video_path):
+ video_type = 'video'
+ elif video_path.startswith('iframe'):
+ video_type = mobj.group('type')
+ if video_type == 'swf':
+ video_type = 'video'
+ elif video_path.startswith('index/iframe/cast_id'):
+ video_type = 'live'
+
+ is_live = video_type == 'live'
+
+ json_data = self._download_json(
+ 'http://player.vgtrk.com/iframe/data%s/id/%s' % ('live' if is_live else 'video', video_id),
+ video_id, 'Downloading JSON')
+
+ if json_data['errors']:
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, json_data['errors']), expected=True)
+
+ playlist = json_data['data']['playlist']
+ medialist = playlist['medialist']
+ media = medialist[0]
+
+ if media['errors']:
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, media['errors']), expected=True)
+
+ view_count = int_or_none(playlist.get('count_views'))
+ priority_transport = playlist['priority_transport']
+
+ thumbnail = media['picture']
+ width = int_or_none(media['width'])
+ height = int_or_none(media['height'])
+ description = media['anons']
+ title = media['title']
+ duration = int_or_none(media.get('duration'))
+
+ formats = []
+ subtitles = {}
+
+ for transport, links in media['sources'].items():
+ for quality, url in links.items():
+ preference = -1 if priority_transport == transport else -2
+ if transport == 'rtmp':
+ mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<playpath>.+)$', url)
+ if not mobj:
+ continue
+ fmt = {
+ 'url': mobj.group('url'),
+ 'play_path': mobj.group('playpath'),
+ 'app': mobj.group('app'),
+ 'page_url': 'http://player.rutv.ru',
+ 'player_url': 'http://player.rutv.ru/flash3v/osmf.swf?i=22',
+ 'rtmp_live': True,
+ 'ext': 'flv',
+ 'vbr': str_to_int(quality),
+ }
+ elif transport == 'm3u8':
+ fmt, subs = self._extract_m3u8_formats_and_subtitles(
+ url, video_id, 'mp4', quality=preference, m3u8_id='hls')
+ formats.extend(fmt)
+ self._merge_subtitles(subs, target=subtitles)
+ continue
+ else:
+ fmt = {
+ 'url': url
+ }
+ fmt.update({
+ 'width': int_or_none(quality, default=height, invscale=width, scale=height),
+ 'height': int_or_none(quality, default=height),
+ 'format_id': '%s-%s' % (transport, quality),
+ 'source_preference': preference,
+ })
+ formats.append(fmt)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'view_count': view_count,
+ 'duration': duration,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'is_live': is_live,
+ '_format_sort_fields': ('source', ),
+ }
diff --git a/yt_dlp/extractor/ruutu.py b/yt_dlp/extractor/ruutu.py
new file mode 100644
index 0000000..33f6652
--- /dev/null
+++ b/yt_dlp/extractor/ruutu.py
@@ -0,0 +1,262 @@
+import json
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse_urlparse
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ find_xpath_attr,
+ int_or_none,
+ traverse_obj,
+ try_call,
+ unified_strdate,
+ url_or_none,
+ xpath_attr,
+ xpath_text,
+)
+
+
+class RuutuIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:www\.)?(?:ruutu|supla)\.fi/(?:video|supla|audio)/|
+ static\.nelonenmedia\.fi/player/misc/embed_player\.html\?.*?\bnid=
+ )
+ (?P<id>\d+)
+ '''
+ _TESTS = [
+ {
+ 'url': 'http://www.ruutu.fi/video/2058907',
+ 'md5': 'ab2093f39be1ca8581963451b3c0234f',
+ 'info_dict': {
+ 'id': '2058907',
+ 'ext': 'mp4',
+ 'title': 'Oletko aina halunnut tietää mitä tapahtuu vain hetki ennen lähetystä? - Nyt se selvisi!',
+ 'description': 'md5:cfc6ccf0e57a814360df464a91ff67d6',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 114,
+ 'age_limit': 0,
+ 'upload_date': '20150508',
+ },
+ },
+ {
+ 'url': 'http://www.ruutu.fi/video/2057306',
+ 'md5': '065a10ae4d5b8cfd9d0c3d332465e3d9',
+ 'info_dict': {
+ 'id': '2057306',
+ 'ext': 'mp4',
+ 'title': 'Superpesis: katso koko kausi Ruudussa',
+ 'description': 'md5:bfb7336df2a12dc21d18fa696c9f8f23',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 40,
+ 'age_limit': 0,
+ 'upload_date': '20150507',
+ 'series': 'Superpesis',
+ 'categories': ['Urheilu'],
+ },
+ },
+ {
+ 'url': 'http://www.supla.fi/supla/2231370',
+ 'md5': 'df14e782d49a2c0df03d3be2a54ef949',
+ 'info_dict': {
+ 'id': '2231370',
+ 'ext': 'mp4',
+ 'title': 'Osa 1: Mikael Jungner',
+ 'description': 'md5:7d90f358c47542e3072ff65d7b1bcffe',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'age_limit': 0,
+ 'upload_date': '20151012',
+ 'series': 'Läpivalaisu',
+ },
+ },
+ # Episode where <SourceFile> is "NOT-USED", but has other
+ # downloadable sources available.
+ {
+ 'url': 'http://www.ruutu.fi/video/3193728',
+ 'only_matching': True,
+ },
+ {
+ # audio podcast
+ 'url': 'https://www.supla.fi/supla/3382410',
+ 'md5': 'b9d7155fed37b2ebf6021d74c4b8e908',
+ 'info_dict': {
+ 'id': '3382410',
+ 'ext': 'mp3',
+ 'title': 'Mikä ihmeen poltergeist?',
+ 'description': 'md5:bbb6963df17dfd0ecd9eb9a61bf14b52',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'age_limit': 0,
+ 'upload_date': '20190320',
+ 'series': 'Mysteeritarinat',
+ 'duration': 1324,
+ },
+ 'expected_warnings': [
+ 'HTTP Error 502: Bad Gateway',
+ 'Failed to download m3u8 information',
+ ],
+ },
+ {
+ 'url': 'http://www.supla.fi/audio/2231370',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://static.nelonenmedia.fi/player/misc/embed_player.html?nid=3618790',
+ 'only_matching': True,
+ },
+ {
+ # episode
+ 'url': 'https://www.ruutu.fi/video/3401964',
+ 'info_dict': {
+ 'id': '3401964',
+ 'ext': 'mp4',
+ 'title': 'Temptation Island Suomi - Kausi 5 - Jakso 17',
+ 'description': 'md5:87cf01d5e1e88adf0c8a2937d2bd42ba',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 2582,
+ 'age_limit': 12,
+ 'upload_date': '20190508',
+ 'series': 'Temptation Island Suomi',
+ 'season_number': 5,
+ 'episode_number': 17,
+ 'categories': ['Reality ja tositapahtumat', 'Kotimaiset suosikit', 'Romantiikka ja parisuhde'],
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # premium
+ 'url': 'https://www.ruutu.fi/video/3618715',
+ 'only_matching': True,
+ },
+ ]
+ _API_BASE = 'https://gatling.nelonenmedia.fi'
+
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ # nelonen.fi
+ settings = try_call(
+ lambda: json.loads(re.search(
+ r'jQuery\.extend\(Drupal\.settings, ({.+?})\);', webpage).group(1), strict=False))
+ if settings:
+ video_id = traverse_obj(settings, (
+ 'mediaCrossbowSettings', 'file', 'field_crossbow_video_id', 'und', 0, 'value'))
+ if video_id:
+ return [f'http://www.ruutu.fi/video/{video_id}']
+ # hs.fi and is.fi
+ settings = try_call(
+ lambda: json.loads(re.search(
+ '(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>',
+ webpage).group(1), strict=False))
+ if settings:
+ video_ids = set(traverse_obj(settings, (
+ 'props', 'pageProps', 'page', 'assetData', 'splitBody', ..., 'video', 'sourceId')) or [])
+ if video_ids:
+ return [f'http://www.ruutu.fi/video/{v}' for v in video_ids]
+ video_id = traverse_obj(settings, (
+ 'props', 'pageProps', 'page', 'assetData', 'mainVideo', 'sourceId'))
+ if video_id:
+ return [f'http://www.ruutu.fi/video/{video_id}']
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video_xml = self._download_xml(
+ '%s/media-xml-cache' % self._API_BASE, video_id,
+ query={'id': video_id})
+
+ formats = []
+ processed_urls = []
+
+ def extract_formats(node):
+ for child in node:
+ if child.tag.endswith('Files'):
+ extract_formats(child)
+ elif child.tag.endswith('File'):
+ video_url = child.text
+ if (not video_url or video_url in processed_urls
+ or any(p in video_url for p in ('NOT_USED', 'NOT-USED'))):
+ continue
+ processed_urls.append(video_url)
+ ext = determine_ext(video_url)
+ auth_video_url = url_or_none(self._download_webpage(
+ '%s/auth/access/v2' % self._API_BASE, video_id,
+ note='Downloading authenticated %s stream URL' % ext,
+ fatal=False, query={'stream': video_url}))
+ if auth_video_url:
+ processed_urls.append(auth_video_url)
+ video_url = auth_video_url
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls',
+ fatal=False))
+ elif ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ video_url, video_id, f4m_id='hds', fatal=False))
+ elif ext == 'mpd':
+ # video-only and audio-only streams are of different
+ # duration resulting in out of sync issue
+ continue
+ formats.extend(self._extract_mpd_formats(
+ video_url, video_id, mpd_id='dash', fatal=False))
+ elif ext == 'mp3' or child.tag == 'AudioMediaFile':
+ formats.append({
+ 'format_id': 'audio',
+ 'url': video_url,
+ 'vcodec': 'none',
+ })
+ else:
+ proto = compat_urllib_parse_urlparse(video_url).scheme
+ if not child.tag.startswith('HTTP') and proto != 'rtmp':
+ continue
+ preference = -1 if proto == 'rtmp' else 1
+ label = child.get('label')
+ tbr = int_or_none(child.get('bitrate'))
+ format_id = '%s-%s' % (proto, label if label else tbr) if label or tbr else proto
+ if not self._is_valid_url(video_url, video_id, format_id):
+ continue
+ width, height = [int_or_none(x) for x in child.get('resolution', 'x').split('x')[:2]]
+ formats.append({
+ 'format_id': format_id,
+ 'url': video_url,
+ 'width': width,
+ 'height': height,
+ 'tbr': tbr,
+ 'preference': preference,
+ })
+
+ extract_formats(video_xml.find('./Clip'))
+
+ def pv(name):
+ value = try_call(lambda: find_xpath_attr(
+ video_xml, './Clip/PassthroughVariables/variable', 'name', name).get('value'))
+ if value != 'NA':
+ return value or None
+
+ if not formats:
+ if (not self.get_param('allow_unplayable_formats')
+ and xpath_text(video_xml, './Clip/DRM', default=None)):
+ self.report_drm(video_id)
+ ns_st_cds = pv('ns_st_cds')
+ if ns_st_cds != 'free':
+ raise ExtractorError('This video is %s.' % ns_st_cds, expected=True)
+
+ themes = pv('themes')
+
+ return {
+ 'id': video_id,
+ 'title': xpath_attr(video_xml, './/Behavior/Program', 'program_name', 'title', fatal=True),
+ 'description': xpath_attr(video_xml, './/Behavior/Program', 'description', 'description'),
+ 'thumbnail': xpath_attr(video_xml, './/Behavior/Startpicture', 'href', 'thumbnail'),
+ 'duration': int_or_none(xpath_text(video_xml, './/Runtime', 'duration')) or int_or_none(pv('runtime')),
+ 'age_limit': int_or_none(xpath_text(video_xml, './/AgeLimit', 'age limit')),
+ 'upload_date': unified_strdate(pv('date_start')),
+ 'series': pv('series_name'),
+ 'season_number': int_or_none(pv('season_number')),
+ 'episode_number': int_or_none(pv('episode_number')),
+ 'categories': themes.split(',') if themes else None,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/ruv.py b/yt_dlp/extractor/ruv.py
new file mode 100644
index 0000000..12499d6
--- /dev/null
+++ b/yt_dlp/extractor/ruv.py
@@ -0,0 +1,186 @@
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ parse_duration,
+ traverse_obj,
+ unified_timestamp,
+)
+
+
+class RuvIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?ruv\.is/(?:sarpurinn/[^/]+|node)/(?P<id>[^/]+(?:/\d+)?)'
+ _TESTS = [{
+ # m3u8
+ 'url': 'http://ruv.is/sarpurinn/ruv-aukaras/fh-valur/20170516',
+ 'md5': '66347652f4e13e71936817102acc1724',
+ 'info_dict': {
+ 'id': '1144499',
+ 'display_id': 'fh-valur/20170516',
+ 'ext': 'mp4',
+ 'title': 'FH - Valur',
+ 'description': 'Bein útsending frá 3. leik FH og Vals í úrslitum Olísdeildar karla í handbolta.',
+ 'timestamp': 1494963600,
+ 'upload_date': '20170516',
+ },
+ }, {
+ # mp3
+ 'url': 'http://ruv.is/sarpurinn/ras-2/morgunutvarpid/20170619',
+ 'md5': '395ea250c8a13e5fdb39d4670ef85378',
+ 'info_dict': {
+ 'id': '1153630',
+ 'display_id': 'morgunutvarpid/20170619',
+ 'ext': 'mp3',
+ 'title': 'Morgunútvarpið',
+ 'description': 'md5:a4cf1202c0a1645ca096b06525915418',
+ 'timestamp': 1497855000,
+ 'upload_date': '20170619',
+ },
+ }, {
+ 'url': 'http://ruv.is/sarpurinn/ruv/frettir/20170614',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.ruv.is/node/1151854',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://ruv.is/sarpurinn/klippa/secret-soltice-hefst-a-morgun',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://ruv.is/sarpurinn/ras-1/morgunvaktin/20170619',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ title = self._og_search_title(webpage)
+
+ FIELD_RE = r'video\.%s\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1'
+
+ media_url = self._html_search_regex(
+ FIELD_RE % 'src', webpage, 'video URL', group='url')
+
+ video_id = self._search_regex(
+ r'<link\b[^>]+\bhref=["\']https?://www\.ruv\.is/node/(\d+)',
+ webpage, 'video id', default=display_id)
+
+ ext = determine_ext(media_url)
+
+ if ext == 'm3u8':
+ formats = self._extract_m3u8_formats(
+ media_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls')
+ elif ext == 'mp3':
+ formats = [{
+ 'format_id': 'mp3',
+ 'url': media_url,
+ 'vcodec': 'none',
+ }]
+ else:
+ formats = [{
+ 'url': media_url,
+ }]
+
+ description = self._og_search_description(webpage, default=None)
+ thumbnail = self._og_search_thumbnail(
+ webpage, default=None) or self._search_regex(
+ FIELD_RE % 'poster', webpage, 'thumbnail', fatal=False)
+ timestamp = unified_timestamp(self._html_search_meta(
+ 'article:published_time', webpage, 'timestamp', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'formats': formats,
+ }
+
+
+class RuvSpilaIE(InfoExtractor):
+ IE_NAME = 'ruv.is:spila'
+ _VALID_URL = r'https?://(?:www\.)?ruv\.is/(?:(?:sjon|ut)varp|(?:krakka|ung)ruv)/spila/.+/(?P<series_id>[0-9]+)/(?P<id>[a-z0-9]+)'
+ _TESTS = [{
+ 'url': 'https://www.ruv.is/sjonvarp/spila/ithrottir/30657/9jcnd4',
+ 'info_dict': {
+ 'id': '9jcnd4',
+ 'ext': 'mp4',
+ 'title': '01.02.2022',
+ 'chapters': 'count:4',
+ 'timestamp': 1643743500,
+ 'upload_date': '20220201',
+ 'thumbnail': 'https://d38kdhuogyllre.cloudfront.net/fit-in/1960x/filters:quality(65)/hd_posters/94boog-iti3jg.jpg',
+ 'description': 'Íþróttafréttir.',
+ 'age_limit': 0,
+ },
+ }, {
+ 'url': 'https://www.ruv.is/utvarp/spila/i-ljosi-sogunnar/23795/7hqkre',
+ 'info_dict': {
+ 'id': '7hqkre',
+ 'ext': 'mp3',
+ 'thumbnail': 'https://d38kdhuogyllre.cloudfront.net/fit-in/1960x/filters:quality(65)/hd_posters/7hqkre-7uepao.jpg',
+ 'description': 'md5:8d7046549daff35e9a3190dc9901a120',
+ 'chapters': [],
+ 'upload_date': '20220204',
+ 'timestamp': 1643965500,
+ 'title': 'Nellie Bly II',
+ 'age_limit': 0,
+ },
+ }, {
+ 'url': 'https://www.ruv.is/ungruv/spila/ungruv/28046/8beuph',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.ruv.is/krakkaruv/spila/krakkafrettir/30712/9jbgb0',
+ 'only_matching': True
+ }]
+
+ def _real_extract(self, url):
+ display_id, series_id = self._match_valid_url(url).group('id', 'series_id')
+ program = self._download_json(
+ 'https://www.ruv.is/gql/', display_id, query={'query': '''{
+ Program(id: %s){
+ title image description short_description
+ episodes(id: {value: "%s"}) {
+ rating title duration file image firstrun description
+ clips {
+ time text
+ }
+ subtitles {
+ name value
+ }
+ }
+ }
+ }''' % (series_id, display_id)})['data']['Program']
+ episode = program['episodes'][0]
+
+ subs = {}
+ for trk in episode.get('subtitles'):
+ if trk.get('name') and trk.get('value'):
+ subs.setdefault(trk['name'], []).append({'url': trk['value'], 'ext': 'vtt'})
+
+ media_url = episode['file']
+ if determine_ext(media_url) == 'm3u8':
+ formats = self._extract_m3u8_formats(media_url, display_id)
+ else:
+ formats = [{'url': media_url}]
+
+ clips = [
+ {'start_time': parse_duration(c.get('time')), 'title': c.get('text')}
+ for c in episode.get('clips') or []]
+
+ return {
+ 'id': display_id,
+ 'title': traverse_obj(program, ('episodes', 0, 'title'), 'title'),
+ 'description': traverse_obj(
+ program, ('episodes', 0, 'description'), 'description', 'short_description',
+ expected_type=lambda x: x or None),
+ 'subtitles': subs,
+ 'thumbnail': episode.get('image', '').replace('$$IMAGESIZE$$', '1960') or None,
+ 'timestamp': unified_timestamp(episode.get('firstrun')),
+ 'formats': formats,
+ 'age_limit': episode.get('rating'),
+ 'chapters': clips
+ }
diff --git a/yt_dlp/extractor/s4c.py b/yt_dlp/extractor/s4c.py
new file mode 100644
index 0000000..67eff72
--- /dev/null
+++ b/yt_dlp/extractor/s4c.py
@@ -0,0 +1,103 @@
+from .common import InfoExtractor
+from ..utils import traverse_obj, url_or_none
+
+
+class S4CIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?s4c\.cymru/clic/programme/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.s4c.cymru/clic/programme/861362209',
+ 'info_dict': {
+ 'id': '861362209',
+ 'ext': 'mp4',
+ 'title': 'Y Swn',
+ 'description': 'md5:f7681a30e4955b250b3224aa9fe70cf0',
+ 'duration': 5340,
+ 'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Y_Swn_2023S4C_099_ii.jpg'
+ },
+ }, {
+ 'url': 'https://www.s4c.cymru/clic/programme/856636948',
+ 'info_dict': {
+ 'id': '856636948',
+ 'ext': 'mp4',
+ 'title': 'Am Dro',
+ 'duration': 2880,
+ 'description': 'md5:100d8686fc9a632a0cb2db52a3433ffe',
+ 'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Am_Dro_2022-23S4C_P6_4005.jpg'
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ details = self._download_json(
+ f'https://www.s4c.cymru/df/full_prog_details?lang=e&programme_id={video_id}',
+ video_id, fatal=False)
+
+ player_config = self._download_json(
+ 'https://player-api.s4c-cdn.co.uk/player-configuration/prod', video_id, query={
+ 'programme_id': video_id,
+ 'signed': '0',
+ 'lang': 'en',
+ 'mode': 'od',
+ 'appId': 'clic',
+ 'streamName': '',
+ }, note='Downloading player config JSON')
+ subtitles = {}
+ for sub in traverse_obj(player_config, ('subtitles', lambda _, v: url_or_none(v['0']))):
+ subtitles.setdefault(sub.get('3', 'en'), []).append({
+ 'url': sub['0'],
+ 'name': sub.get('1'),
+ })
+ m3u8_url = self._download_json(
+ 'https://player-api.s4c-cdn.co.uk/streaming-urls/prod', video_id, query={
+ 'mode': 'od',
+ 'application': 'clic',
+ 'region': 'WW',
+ 'extra': 'false',
+ 'thirdParty': 'false',
+ 'filename': player_config['filename'],
+ }, note='Downloading streaming urls JSON')['hls']
+
+ return {
+ 'id': video_id,
+ 'formats': self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls'),
+ 'subtitles': subtitles,
+ 'thumbnail': url_or_none(player_config.get('poster')),
+ **traverse_obj(details, ('full_prog_details', 0, {
+ 'title': (('programme_title', 'series_title'), {str}),
+ 'description': ('full_billing', {str.strip}),
+ 'duration': ('duration', {lambda x: int(x) * 60}),
+ }), get_all=False),
+ }
+
+
+class S4CSeriesIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?s4c\.cymru/clic/series/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.s4c.cymru/clic/series/864982911',
+ 'playlist_mincount': 6,
+ 'info_dict': {
+ 'id': '864982911',
+ 'title': 'Iaith ar Daith',
+ },
+ }, {
+ 'url': 'https://www.s4c.cymru/clic/series/866852587',
+ 'playlist_mincount': 8,
+ 'info_dict': {
+ 'id': '866852587',
+ 'title': 'FFIT Cymru',
+ },
+ }]
+
+ def _real_extract(self, url):
+ series_id = self._match_id(url)
+ series_details = self._download_json(
+ 'https://www.s4c.cymru/df/series_details', series_id, query={
+ 'lang': 'e',
+ 'series_id': series_id,
+ 'show_prog_in_series': 'Y'
+ }, note='Downloading series details JSON')
+
+ return self.playlist_result(
+ [self.url_result(f'https://www.s4c.cymru/clic/programme/{episode_id}', S4CIE, episode_id)
+ for episode_id in traverse_obj(series_details, ('other_progs_in_series', ..., 'id'))],
+ series_id, traverse_obj(series_details, ('full_prog_details', 0, 'series_title', {str})))
diff --git a/yt_dlp/extractor/safari.py b/yt_dlp/extractor/safari.py
new file mode 100644
index 0000000..8d322d7
--- /dev/null
+++ b/yt_dlp/extractor/safari.py
@@ -0,0 +1,259 @@
+import json
+import re
+
+from .common import InfoExtractor
+
+from ..compat import (
+ compat_parse_qs,
+ compat_urlparse,
+)
+from ..utils import (
+ ExtractorError,
+ update_url_query,
+)
+
+
+class SafariBaseIE(InfoExtractor):
+ _LOGIN_URL = 'https://learning.oreilly.com/accounts/login/'
+ _NETRC_MACHINE = 'safari'
+
+ _API_BASE = 'https://learning.oreilly.com/api/v1'
+ _API_FORMAT = 'json'
+
+ LOGGED_IN = False
+
+ def _perform_login(self, username, password):
+ _, urlh = self._download_webpage_handle(
+ 'https://learning.oreilly.com/accounts/login-check/', None,
+ 'Downloading login page')
+
+ def is_logged(urlh):
+ return 'learning.oreilly.com/home/' in urlh.url
+
+ if is_logged(urlh):
+ self.LOGGED_IN = True
+ return
+
+ redirect_url = urlh.url
+ parsed_url = compat_urlparse.urlparse(redirect_url)
+ qs = compat_parse_qs(parsed_url.query)
+ next_uri = compat_urlparse.urljoin(
+ 'https://api.oreilly.com', qs['next'][0])
+
+ auth, urlh = self._download_json_handle(
+ 'https://www.oreilly.com/member/auth/login/', None, 'Logging in',
+ data=json.dumps({
+ 'email': username,
+ 'password': password,
+ 'redirect_uri': next_uri,
+ }).encode(), headers={
+ 'Content-Type': 'application/json',
+ 'Referer': redirect_url,
+ }, expected_status=400)
+
+ credentials = auth.get('credentials')
+ if (not auth.get('logged_in') and not auth.get('redirect_uri')
+ and credentials):
+ raise ExtractorError(
+ 'Unable to login: %s' % credentials, expected=True)
+
+ # oreilly serves two same instances of the following cookies
+ # in Set-Cookie header and expects first one to be actually set
+ for cookie in ('groot_sessionid', 'orm-jwt', 'orm-rt'):
+ self._apply_first_set_cookie_header(urlh, cookie)
+
+ _, urlh = self._download_webpage_handle(
+ auth.get('redirect_uri') or next_uri, None, 'Completing login',)
+
+ if is_logged(urlh):
+ self.LOGGED_IN = True
+ return
+
+ raise ExtractorError('Unable to log in')
+
+
+class SafariIE(SafariBaseIE):
+ IE_NAME = 'safari'
+ IE_DESC = 'safaribooksonline.com online video'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/
+ (?:
+ library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>[^/?\#&]+)\.html|
+ videos/[^/]+/[^/]+/(?P<reference_id>[^-]+-[^/?\#&]+)
+ )
+ '''
+
+ _TESTS = [{
+ 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html',
+ 'md5': 'dcc5a425e79f2564148652616af1f2a3',
+ 'info_dict': {
+ 'id': '0_qbqx90ic',
+ 'ext': 'mp4',
+ 'title': 'Introduction to Hadoop Fundamentals LiveLessons',
+ 'timestamp': 1437758058,
+ 'upload_date': '20150724',
+ 'uploader_id': 'stork',
+ },
+ }, {
+ # non-digits in course id
+ 'url': 'https://www.safaribooksonline.com/library/view/create-a-nodejs/100000006A0210/part00.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.safaribooksonline.com/library/view/learning-path-red/9780134664057/RHCE_Introduction.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314/9780134217314-PYMC_13_00',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838/9780133392838-00_SeriesIntro',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.oreilly.com/library/view/hadoop-fundamentals-livelessons/9780133392838/00_SeriesIntro.html',
+ 'only_matching': True,
+ }]
+
+ _PARTNER_ID = '1926081'
+ _UICONF_ID = '29375172'
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+
+ reference_id = mobj.group('reference_id')
+ if reference_id:
+ video_id = reference_id
+ partner_id = self._PARTNER_ID
+ ui_id = self._UICONF_ID
+ else:
+ video_id = '%s-%s' % (mobj.group('course_id'), mobj.group('part'))
+
+ webpage, urlh = self._download_webpage_handle(url, video_id)
+
+ mobj = re.match(self._VALID_URL, urlh.url)
+ reference_id = mobj.group('reference_id')
+ if not reference_id:
+ reference_id = self._search_regex(
+ r'data-reference-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
+ webpage, 'kaltura reference id', group='id')
+ partner_id = self._search_regex(
+ r'data-partner-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
+ webpage, 'kaltura widget id', default=self._PARTNER_ID,
+ group='id')
+ ui_id = self._search_regex(
+ r'data-ui-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
+ webpage, 'kaltura uiconf id', default=self._UICONF_ID,
+ group='id')
+
+ query = {
+ 'wid': '_%s' % partner_id,
+ 'uiconf_id': ui_id,
+ 'flashvars[referenceId]': reference_id,
+ }
+
+ if self.LOGGED_IN:
+ kaltura_session = self._download_json(
+ '%s/player/kaltura_session/?reference_id=%s' % (self._API_BASE, reference_id),
+ video_id, 'Downloading kaltura session JSON',
+ 'Unable to download kaltura session JSON', fatal=False,
+ headers={'Accept': 'application/json'})
+ if kaltura_session:
+ session = kaltura_session.get('session')
+ if session:
+ query['flashvars[ks]'] = session
+
+ return self.url_result(update_url_query(
+ 'https://cdnapisec.kaltura.com/html5/html5lib/v2.37.1/mwEmbedFrame.php', query),
+ 'Kaltura')
+
+
+class SafariApiIE(SafariBaseIE):
+ IE_NAME = 'safari:api'
+ _VALID_URL = r'https?://(?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/api/v1/book/(?P<course_id>[^/]+)/chapter(?:-content)?/(?P<part>[^/?#&]+)\.html'
+
+ _TESTS = [{
+ 'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.safaribooksonline.com/api/v1/book/9780134664057/chapter/RHCE_Introduction.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ part = self._download_json(
+ url, '%s/%s' % (mobj.group('course_id'), mobj.group('part')),
+ 'Downloading part JSON')
+ web_url = part['web_url']
+ if 'library/view' in web_url:
+ web_url = web_url.replace('library/view', 'videos')
+ natural_keys = part['natural_key']
+ web_url = f'{web_url.rsplit("/", 1)[0]}/{natural_keys[0]}-{natural_keys[1][:-5]}'
+ return self.url_result(web_url, SafariIE.ie_key())
+
+
+class SafariCourseIE(SafariBaseIE):
+ IE_NAME = 'safari:course'
+ IE_DESC = 'safaribooksonline.com online courses'
+
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/
+ (?:
+ library/view/[^/]+|
+ api/v1/book|
+ videos/[^/]+
+ )|
+ techbus\.safaribooksonline\.com
+ )
+ /(?P<id>[^/]+)
+ '''
+
+ _TESTS = [{
+ 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/',
+ 'info_dict': {
+ 'id': '9780133392838',
+ 'title': 'Hadoop Fundamentals LiveLessons',
+ },
+ 'playlist_count': 22,
+ 'skip': 'Requires safaribooksonline account credentials',
+ }, {
+ 'url': 'https://www.safaribooksonline.com/api/v1/book/9781449396459/?override_format=json',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://techbus.safaribooksonline.com/9780134426365',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.oreilly.com/library/view/hadoop-fundamentals-livelessons/9780133392838/',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return (False if SafariIE.suitable(url) or SafariApiIE.suitable(url)
+ else super(SafariCourseIE, cls).suitable(url))
+
+ def _real_extract(self, url):
+ course_id = self._match_id(url)
+
+ course_json = self._download_json(
+ '%s/book/%s/?override_format=%s' % (self._API_BASE, course_id, self._API_FORMAT),
+ course_id, 'Downloading course JSON')
+
+ if 'chapters' not in course_json:
+ raise ExtractorError(
+ 'No chapters found for course %s' % course_id, expected=True)
+
+ entries = [
+ self.url_result(chapter, SafariApiIE.ie_key())
+ for chapter in course_json['chapters']]
+
+ course_title = course_json['title']
+
+ return self.playlist_result(entries, course_id, course_title)
diff --git a/yt_dlp/extractor/saitosan.py b/yt_dlp/extractor/saitosan.py
new file mode 100644
index 0000000..a5f05e1
--- /dev/null
+++ b/yt_dlp/extractor/saitosan.py
@@ -0,0 +1,75 @@
+from .common import InfoExtractor
+from ..utils import ExtractorError, try_get
+
+
+class SaitosanIE(InfoExtractor):
+ _WORKING = False
+ IE_NAME = 'Saitosan'
+ _VALID_URL = r'https?://(?:www\.)?saitosan\.net/bview.html\?id=(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'http://www.saitosan.net/bview.html?id=10031846',
+ 'info_dict': {
+ 'id': '10031846',
+ 'ext': 'mp4',
+ 'title': '井下原 和弥',
+ 'uploader': '井下原 和弥',
+ 'thumbnail': 'http://111.171.196.85:8088/921f916f-7f55-4c97-b92e-5d9d0fef8f5f/thumb',
+ 'is_live': True,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'skip': 'Broadcasts are ephemeral',
+ },
+ {
+ 'url': 'http://www.saitosan.net/bview.html?id=10031795',
+ 'info_dict': {
+ 'id': '10031795',
+ 'ext': 'mp4',
+ 'title': '橋本',
+ 'uploader': '橋本',
+ 'thumbnail': 'http://111.171.196.85:8088/1a3933e1-a01a-483b-8931-af15f37f8082/thumb',
+ 'is_live': True,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'skip': 'Broadcasts are ephemeral',
+ }]
+
+ def _real_extract(self, url):
+ b_id = self._match_id(url)
+
+ base = 'http://hankachi.saitosan-api.net:8002/socket.io/?transport=polling&EIO=3'
+ sid = self._download_socket_json(base, b_id, note='Opening socket').get('sid')
+ base += '&sid=' + sid
+
+ self._download_webpage(base, b_id, note='Polling socket')
+ payload = '420["room_start_join",{"room_id":"%s"}]' % b_id
+ payload = '%s:%s' % (len(payload), payload)
+
+ self._download_webpage(base, b_id, data=payload, note='Polling socket with payload')
+ response = self._download_socket_json(base, b_id, note='Polling socket')
+ if not response.get('ok'):
+ err = response.get('error') or {}
+ raise ExtractorError(
+ '%s said: %s - %s' % (self.IE_NAME, err.get('code', '?'), err.get('msg', 'Unknown')) if err
+ else 'The socket reported that the broadcast could not be joined. Maybe it\'s offline or the URL is incorrect',
+ expected=True, video_id=b_id)
+
+ self._download_webpage(base, b_id, data='26:421["room_finish_join",{}]', note='Polling socket')
+ b_data = self._download_socket_json(base, b_id, note='Getting broadcast metadata from socket')
+ m3u8_url = b_data.get('url')
+
+ self._download_webpage(base, b_id, data='1:1', note='Closing socket', fatal=False)
+
+ return {
+ 'id': b_id,
+ 'title': b_data.get('name'),
+ 'formats': self._extract_m3u8_formats(m3u8_url, b_id, 'mp4', live=True),
+ 'thumbnail': m3u8_url.replace('av.m3u8', 'thumb'),
+ 'uploader': try_get(b_data, lambda x: x['broadcast_user']['name']), # same as title
+ 'is_live': True
+ }
diff --git a/yt_dlp/extractor/samplefocus.py b/yt_dlp/extractor/samplefocus.py
new file mode 100644
index 0000000..e9f5c22
--- /dev/null
+++ b/yt_dlp/extractor/samplefocus.py
@@ -0,0 +1,97 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ extract_attributes,
+ get_element_by_attribute,
+ int_or_none,
+)
+
+
+class SampleFocusIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?samplefocus\.com/samples/(?P<id>[^/?&#]+)'
+ _TESTS = [{
+ 'url': 'https://samplefocus.com/samples/lil-peep-sad-emo-guitar',
+ 'md5': '48c8d62d60be467293912e0e619a5120',
+ 'info_dict': {
+ 'id': '40316',
+ 'display_id': 'lil-peep-sad-emo-guitar',
+ 'ext': 'mp3',
+ 'title': 'Lil Peep Sad Emo Guitar',
+ 'thumbnail': r're:^https?://.+\.png',
+ 'license': 'Standard License',
+ 'uploader': 'CapsCtrl',
+ 'uploader_id': 'capsctrl',
+ 'like_count': int,
+ 'comment_count': int,
+ 'categories': ['Samples', 'Guitar', 'Electric guitar'],
+ },
+ }, {
+ 'url': 'https://samplefocus.com/samples/dababy-style-bass-808',
+ 'only_matching': True
+ }, {
+ 'url': 'https://samplefocus.com/samples/young-chop-kick',
+ 'only_matching': True
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ sample_id = self._search_regex(
+ r'<input[^>]+id=(["\'])sample_id\1[^>]+value=(?:["\'])(?P<id>\d+)',
+ webpage, 'sample id', group='id')
+
+ title = self._og_search_title(webpage, fatal=False) or self._html_search_regex(
+ r'<h1>(.+?)</h1>', webpage, 'title')
+
+ mp3_url = self._search_regex(
+ r'<input[^>]+id=(["\'])sample_mp3\1[^>]+value=(["\'])(?P<url>(?:(?!\2).)+)',
+ webpage, 'mp3', fatal=False, group='url') or extract_attributes(self._search_regex(
+ r'<meta[^>]+itemprop=(["\'])contentUrl\1[^>]*>',
+ webpage, 'mp3 url', group=0))['content']
+
+ thumbnail = self._og_search_thumbnail(webpage) or self._html_search_regex(
+ r'<img[^>]+class=(?:["\'])waveform responsive-img[^>]+src=(["\'])(?P<url>(?:(?!\1).)+)',
+ webpage, 'mp3', fatal=False, group='url')
+
+ comments = []
+ for author_id, author, body in re.findall(r'(?s)<p[^>]+class="comment-author"><a[^>]+href="/users/([^"]+)">([^"]+)</a>.+?<p[^>]+class="comment-body">([^>]+)</p>', webpage):
+ comments.append({
+ 'author': author,
+ 'author_id': author_id,
+ 'text': body,
+ })
+
+ uploader_id = uploader = None
+ mobj = re.search(r'>By <a[^>]+href="/users/([^"]+)"[^>]*>([^<]+)', webpage)
+ if mobj:
+ uploader_id, uploader = mobj.groups()
+
+ breadcrumb = get_element_by_attribute('typeof', 'BreadcrumbList', webpage)
+ categories = []
+ if breadcrumb:
+ for _, name in re.findall(r'<span[^>]+property=(["\'])name\1[^>]*>([^<]+)', breadcrumb):
+ categories.append(name)
+
+ def extract_count(klass):
+ return int_or_none(self._html_search_regex(
+ r'<span[^>]+class=(?:["\'])?%s-count[^>]*>(\d+)' % klass,
+ webpage, klass, fatal=False))
+
+ return {
+ 'id': sample_id,
+ 'title': title,
+ 'url': mp3_url,
+ 'display_id': display_id,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ 'license': self._html_search_regex(
+ r'<a[^>]+href=(["\'])/license\1[^>]*>(?P<license>[^<]+)<',
+ webpage, 'license', fatal=False, group='license'),
+ 'uploader_id': uploader_id,
+ 'like_count': extract_count('sample-%s-favorites' % sample_id),
+ 'comment_count': extract_count('comments'),
+ 'comments': comments,
+ 'categories': categories,
+ }
diff --git a/yt_dlp/extractor/sapo.py b/yt_dlp/extractor/sapo.py
new file mode 100644
index 0000000..beffaee
--- /dev/null
+++ b/yt_dlp/extractor/sapo.py
@@ -0,0 +1,114 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ parse_duration,
+ unified_strdate,
+)
+
+
+class SapoIE(InfoExtractor):
+ IE_DESC = 'SAPO Vídeos'
+ _VALID_URL = r'https?://(?:(?:v2|www)\.)?videos\.sapo\.(?:pt|cv|ao|mz|tl)/(?P<id>[\da-zA-Z]{20})'
+
+ _TESTS = [
+ {
+ 'url': 'http://videos.sapo.pt/UBz95kOtiWYUMTA5Ghfi',
+ 'md5': '79ee523f6ecb9233ac25075dee0eda83',
+ 'note': 'SD video',
+ 'info_dict': {
+ 'id': 'UBz95kOtiWYUMTA5Ghfi',
+ 'ext': 'mp4',
+ 'title': 'Benfica - Marcas na Hitória',
+ 'description': 'md5:c9082000a128c3fd57bf0299e1367f22',
+ 'duration': 264,
+ 'uploader': 'tiago_1988',
+ 'upload_date': '20080229',
+ 'categories': ['benfica', 'cabral', 'desporto', 'futebol', 'geovanni', 'hooijdonk', 'joao', 'karel', 'lisboa', 'miccoli'],
+ },
+ },
+ {
+ 'url': 'http://videos.sapo.pt/IyusNAZ791ZdoCY5H5IF',
+ 'md5': '90a2f283cfb49193fe06e861613a72aa',
+ 'note': 'HD video',
+ 'info_dict': {
+ 'id': 'IyusNAZ791ZdoCY5H5IF',
+ 'ext': 'mp4',
+ 'title': 'Codebits VII - Report',
+ 'description': 'md5:6448d6fd81ce86feac05321f354dbdc8',
+ 'duration': 144,
+ 'uploader': 'codebits',
+ 'upload_date': '20140427',
+ 'categories': ['codebits', 'codebits2014'],
+ },
+ },
+ {
+ 'url': 'http://v2.videos.sapo.pt/yLqjzPtbTimsn2wWBKHz',
+ 'md5': 'e5aa7cc0bdc6db9b33df1a48e49a15ac',
+ 'note': 'v2 video',
+ 'info_dict': {
+ 'id': 'yLqjzPtbTimsn2wWBKHz',
+ 'ext': 'mp4',
+ 'title': 'Hipnose Condicionativa 4',
+ 'description': 'md5:ef0481abf8fb4ae6f525088a6dadbc40',
+ 'duration': 692,
+ 'uploader': 'sapozen',
+ 'upload_date': '20090609',
+ 'categories': ['condicionativa', 'heloisa', 'hipnose', 'miranda', 'sapo', 'zen'],
+ },
+ },
+ ]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id')
+
+ item = self._download_xml(
+ 'http://rd3.videos.sapo.pt/%s/rss2' % video_id, video_id).find('./channel/item')
+
+ title = item.find('./title').text
+ description = item.find('./{http://videos.sapo.pt/mrss/}synopse').text
+ thumbnail = item.find('./{http://search.yahoo.com/mrss/}content').get('url')
+ duration = parse_duration(item.find('./{http://videos.sapo.pt/mrss/}time').text)
+ uploader = item.find('./{http://videos.sapo.pt/mrss/}author').text
+ upload_date = unified_strdate(item.find('./pubDate').text)
+ view_count = int(item.find('./{http://videos.sapo.pt/mrss/}views').text)
+ comment_count = int(item.find('./{http://videos.sapo.pt/mrss/}comment_count').text)
+ tags = item.find('./{http://videos.sapo.pt/mrss/}tags').text
+ categories = tags.split() if tags else []
+ age_limit = 18 if item.find('./{http://videos.sapo.pt/mrss/}m18').text == 'true' else 0
+
+ video_url = item.find('./{http://videos.sapo.pt/mrss/}videoFile').text
+ video_size = item.find('./{http://videos.sapo.pt/mrss/}videoSize').text.split('x')
+
+ formats = [{
+ 'url': video_url,
+ 'ext': 'mp4',
+ 'format_id': 'sd',
+ 'width': int(video_size[0]),
+ 'height': int(video_size[1]),
+ }]
+
+ if item.find('./{http://videos.sapo.pt/mrss/}HD').text == 'true':
+ formats.append({
+ 'url': re.sub(r'/mov/1$', '/mov/39', video_url),
+ 'ext': 'mp4',
+ 'format_id': 'hd',
+ 'width': 1280,
+ 'height': 720,
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'uploader': uploader,
+ 'upload_date': upload_date,
+ 'view_count': view_count,
+ 'comment_count': comment_count,
+ 'categories': categories,
+ 'age_limit': age_limit,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/sbs.py b/yt_dlp/extractor/sbs.py
new file mode 100644
index 0000000..8d61e22
--- /dev/null
+++ b/yt_dlp/extractor/sbs.py
@@ -0,0 +1,156 @@
+from .common import InfoExtractor
+from ..networking import HEADRequest
+from ..utils import (
+ float_or_none,
+ int_or_none,
+ parse_duration,
+ parse_iso8601,
+ traverse_obj,
+ update_url_query,
+ url_or_none,
+)
+
+
+class SBSIE(InfoExtractor):
+ IE_DESC = 'sbs.com.au'
+ _VALID_URL = r'''(?x)
+ https?://(?:www\.)?sbs\.com\.au/(?:
+ ondemand(?:
+ /video/(?:single/)?|
+ /(?:movie|tv-program)/[^/]+/|
+ /(?:tv|news)-series/(?:[^/]+/){3}|
+ .*?\bplay=|/watch/
+ )|news/(?:embeds/)?video/
+ )(?P<id>[0-9]+)'''
+ _EMBED_REGEX = [r'''(?x)]
+ (?:
+ <meta\s+property="og:video"\s+content=|
+ <iframe[^>]+?src=
+ )
+ (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''']
+
+ _TESTS = [{
+ # Original URL is handled by the generic IE which finds the iframe:
+ # http://www.sbs.com.au/thefeed/blog/2014/08/21/dingo-conservation
+ 'url': 'http://www.sbs.com.au/ondemand/video/single/320403011771/?source=drupal&vertical=thefeed',
+ 'md5': '31f84a7a19b53635db63c73f8ab0c4a7',
+ 'info_dict': {
+ 'id': '320403011771', # '_rFBPRPO4pMR',
+ 'ext': 'mp4',
+ 'title': 'Dingo Conservation (The Feed)',
+ 'description': 'md5:f250a9856fca50d22dec0b5b8015f8a5',
+ 'thumbnail': r're:https?://.*\.jpg',
+ 'duration': 308,
+ 'timestamp': 1408613220,
+ 'upload_date': '20140821',
+ 'uploader': 'SBSC',
+ },
+ 'expected_warnings': ['Unable to download JSON metadata'],
+ }, {
+ 'url': 'http://www.sbs.com.au/ondemand/video/320403011771/Dingo-Conservation-The-Feed',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.sbs.com.au/news/video/471395907773/The-Feed-July-9',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.sbs.com.au/ondemand/?play=1836638787723',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.sbs.com.au/ondemand/program/inside-windsor-castle?play=1283505731842',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.sbs.com.au/news/embeds/video/1840778819866',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.sbs.com.au/ondemand/watch/1698704451971',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.sbs.com.au/ondemand/movie/coherence/1469404227931',
+ 'only_matching': True,
+ }, {
+ 'note': 'Live stream',
+ 'url': 'https://www.sbs.com.au/ondemand/video/1726824003663/sbs-24x7-live-stream-nsw',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.sbs.com.au/ondemand/news-series/dateline/dateline-2022/dateline-s2022-ep26/2072245827515',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.sbs.com.au/ondemand/tv-series/the-handmaids-tale/season-5/the-handmaids-tale-s5-ep1/2065631811776',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.sbs.com.au/ondemand/tv-program/autun-romes-forgotten-sister/2116212803602',
+ 'only_matching': True,
+ }]
+
+ _GEO_COUNTRIES = ['AU']
+ _AUS_TV_PARENTAL_GUIDELINES = {
+ 'P': 0,
+ 'C': 7,
+ 'G': 0,
+ 'PG': 0,
+ 'M': 14,
+ 'MA15+': 15,
+ 'MAV15+': 15,
+ 'R18+': 18,
+ }
+ _PLAYER_API = 'https://www.sbs.com.au/api/v3'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ formats, subtitles = self._extract_smil_formats_and_subtitles(
+ update_url_query(f'{self._PLAYER_API}/video_smil', {'id': video_id}), video_id)
+
+ if not formats:
+ urlh = self._request_webpage(
+ HEADRequest('https://sbs-vod-prod-01.akamaized.net/'), video_id,
+ note='Checking geo-restriction', fatal=False, expected_status=403)
+ if urlh:
+ error_reasons = urlh.headers.get_all('x-error-reason') or []
+ if 'geo-blocked' in error_reasons:
+ self.raise_geo_restricted(countries=['AU'])
+ self.raise_no_formats('No formats are available', video_id=video_id)
+
+ media = traverse_obj(self._download_json(
+ f'{self._PLAYER_API}/video_stream', video_id, fatal=False,
+ query={'id': video_id, 'context': 'tv'}), ('video_object', {dict})) or {}
+
+ media.update(self._download_json(
+ f'https://catalogue.pr.sbsod.com/mpx-media/{video_id}',
+ video_id, fatal=not media) or {})
+
+ # For named episodes, use the catalogue's title to set episode, rather than generic 'Episode N'.
+ if traverse_obj(media, ('partOfSeries', {dict})):
+ media['epName'] = traverse_obj(media, ('title', {str}))
+
+ return {
+ 'id': video_id,
+ **traverse_obj(media, {
+ 'title': ('name', {str}),
+ 'description': ('description', {str}),
+ 'channel': ('taxonomy', 'channel', 'name', {str}),
+ 'series': ((('partOfSeries', 'name'), 'seriesTitle'), {str}),
+ 'series_id': ((('partOfSeries', 'uuid'), 'seriesID'), {str}),
+ 'season_number': ('seasonNumber', {int_or_none}),
+ 'episode': ('epName', {str}),
+ 'episode_number': ('episodeNumber', {int_or_none}),
+ 'timestamp': (('datePublished', ('publication', 'startDate')), {parse_iso8601}),
+ 'release_year': ('releaseYear', {int_or_none}),
+ 'duration': ('duration', ({float_or_none}, {parse_duration})),
+ 'is_live': ('liveStream', {bool}),
+ 'age_limit': (('classificationID', 'contentRating'), {str.upper}, {
+ lambda x: self._AUS_TV_PARENTAL_GUIDELINES.get(x)}), # dict.get is unhashable in py3.7
+ }, get_all=False),
+ **traverse_obj(media, {
+ 'categories': (('genres', ...), ('taxonomy', ('genre', 'subgenre'), 'name'), {str}),
+ 'tags': (('consumerAdviceTexts', ('sbsSubCertification', 'consumerAdvice')), ..., {str}),
+ 'thumbnails': ('thumbnails', lambda _, v: url_or_none(v['contentUrl']), {
+ 'id': ('name', {str}),
+ 'url': 'contentUrl',
+ 'width': ('width', {int_or_none}),
+ 'height': ('height', {int_or_none}),
+ }),
+ }),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'uploader': 'SBSC',
+ }
diff --git a/yt_dlp/extractor/sbscokr.py b/yt_dlp/extractor/sbscokr.py
new file mode 100644
index 0000000..001d19e
--- /dev/null
+++ b/yt_dlp/extractor/sbscokr.py
@@ -0,0 +1,200 @@
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ int_or_none,
+ parse_iso8601,
+ parse_resolution,
+ url_or_none,
+)
+from ..utils.traversal import traverse_obj
+
+
+class SBSCoKrIE(InfoExtractor):
+ IE_NAME = 'sbs.co.kr'
+ _VALID_URL = [r'https?://allvod\.sbs\.co\.kr/allvod/vod(?:Package)?EndPage\.do\?(?:[^#]+&)?mdaId=(?P<id>\d+)',
+ r'https?://programs\.sbs\.co\.kr/(?:enter|drama|culture|sports|plus|mtv|kth)/[a-z0-9]+/(?:vod|clip|movie)/\d+/(?P<id>(?:OC)?\d+)']
+
+ _TESTS = [{
+ 'url': 'https://programs.sbs.co.kr/enter/dongsang2/clip/52007/OC467706746?div=main_pop_clip',
+ 'md5': 'c3f6d45e1fb5682039d94cda23c36f19',
+ 'info_dict': {
+ 'id': 'OC467706746',
+ 'ext': 'mp4',
+ 'title': '‘아슬아슬’ 박군♥한영의 새 집 인테리어 대첩♨',
+ 'description': 'md5:6a71eb1979ee4a94ea380310068ccab4',
+ 'thumbnail': 'https://img2.sbs.co.kr/ops_clip_img/2023/10/10/34c4c0f9-a9a5-4ff6-a92e-9bb4b5f6fa65915w1280.jpg',
+ 'release_timestamp': 1696889400,
+ 'release_date': '20231009',
+ 'view_count': int,
+ 'like_count': int,
+ 'duration': 238,
+ 'age_limit': 15,
+ 'series': '동상이몽2_너는 내 운명',
+ 'episode': '레이디제인, ‘혼전임신설’ ‘3개월’ 앞당긴 결혼식 비하인드 스토리 최초 공개!',
+ 'episode_number': 311,
+ },
+ }, {
+ 'url': 'https://allvod.sbs.co.kr/allvod/vodPackageEndPage.do?mdaId=22000489324&combiId=PA000000284&packageType=A&isFreeYN=',
+ 'md5': 'bf46b2e89fda7ae7de01f5743cef7236',
+ 'info_dict': {
+ 'id': '22000489324',
+ 'ext': 'mp4',
+ 'title': '[다시보기] 트롤리 15회',
+ 'description': 'md5:0e55d74bef1ac55c61ae90c73ac485f4',
+ 'thumbnail': 'https://img2.sbs.co.kr/img/sbs_cms/WE/2023/02/14/arC1676333794938-1280-720.jpg',
+ 'release_timestamp': 1676325600,
+ 'release_date': '20230213',
+ 'view_count': int,
+ 'like_count': int,
+ 'duration': 5931,
+ 'age_limit': 15,
+ 'series': '트롤리',
+ 'episode': '이거 다 거짓말이야',
+ 'episode_number': 15,
+ },
+ }, {
+ 'url': 'https://programs.sbs.co.kr/enter/fourman/vod/69625/22000508948',
+ 'md5': '41e8ae4cc6c8424f4e4d76661a4becbf',
+ 'info_dict': {
+ 'id': '22000508948',
+ 'ext': 'mp4',
+ 'title': '[다시보기] 신발 벗고 돌싱포맨 104회',
+ 'description': 'md5:c6a247383c4dd661e4b956bf4d3b586e',
+ 'thumbnail': 'https://img2.sbs.co.kr/img/sbs_cms/WE/2023/08/30/2vb1693355446261-1280-720.jpg',
+ 'release_timestamp': 1693342800,
+ 'release_date': '20230829',
+ 'view_count': int,
+ 'like_count': int,
+ 'duration': 7036,
+ 'age_limit': 15,
+ 'series': '신발 벗고 돌싱포맨',
+ 'episode': '돌싱포맨 저격수들 등장!',
+ 'episode_number': 104,
+ },
+ }]
+
+ def _call_api(self, video_id, rscuse=''):
+ return self._download_json(
+ f'https://api.play.sbs.co.kr/1.0/sbs_vodall/{video_id}', video_id,
+ note=f'Downloading m3u8 information {rscuse}',
+ query={
+ 'platform': 'pcweb',
+ 'protocol': 'download',
+ 'absolute_show': 'Y',
+ 'service': 'program',
+ 'ssl': 'Y',
+ 'rscuse': rscuse,
+ })
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ details = self._call_api(video_id)
+ source = traverse_obj(details, ('vod', 'source', 'mediasource', {dict})) or {}
+
+ formats = []
+ for stream in traverse_obj(details, (
+ 'vod', 'source', 'mediasourcelist', lambda _, v: v['mediaurl'] or v['mediarscuse']
+ ), default=[source]):
+ if not stream.get('mediaurl'):
+ new_source = traverse_obj(
+ self._call_api(video_id, rscuse=stream['mediarscuse']),
+ ('vod', 'source', 'mediasource', {dict})) or {}
+ if new_source.get('mediarscuse') == source.get('mediarscuse') or not new_source.get('mediaurl'):
+ continue
+ stream = new_source
+ formats.append({
+ 'url': stream['mediaurl'],
+ 'format_id': stream.get('mediarscuse'),
+ 'format_note': stream.get('medianame'),
+ **parse_resolution(stream.get('quality')),
+ 'preference': int_or_none(stream.get('mediarscuse'))
+ })
+
+ caption_url = traverse_obj(details, ('vod', 'source', 'subtitle', {url_or_none}))
+
+ return {
+ 'id': video_id,
+ **traverse_obj(details, ('vod', {
+ 'title': ('info', 'title'),
+ 'duration': ('info', 'duration', {int_or_none}),
+ 'view_count': ('info', 'viewcount', {int_or_none}),
+ 'like_count': ('info', 'likecount', {int_or_none}),
+ 'description': ('info', 'synopsis', {clean_html}),
+ 'episode': ('info', 'content', ('contenttitle', 'title')),
+ 'episode_number': ('info', 'content', 'number', {int_or_none}),
+ 'series': ('info', 'program', 'programtitle'),
+ 'age_limit': ('info', 'targetage', {int_or_none}),
+ 'release_timestamp': ('info', 'broaddate', {parse_iso8601}),
+ 'thumbnail': ('source', 'thumbnail', 'origin', {url_or_none}),
+ }), get_all=False),
+ 'formats': formats,
+ 'subtitles': {'ko': [{'url': caption_url}]} if caption_url else None,
+ }
+
+
+class SBSCoKrAllvodProgramIE(InfoExtractor):
+ IE_NAME = 'sbs.co.kr:allvod_program'
+ _VALID_URL = r'https?://allvod\.sbs\.co\.kr/allvod/vod(?:Free)?ProgramDetail\.do\?(?:[^#]+&)?pgmId=(?P<id>P?\d+)'
+
+ _TESTS = [{
+ 'url': 'https://allvod.sbs.co.kr/allvod/vodFreeProgramDetail.do?type=legend&pgmId=22000010159&listOrder=vodCntAsc',
+ 'info_dict': {
+ '_type': 'playlist',
+ 'id': '22000010159',
+ },
+ 'playlist_count': 18,
+ }, {
+ 'url': 'https://allvod.sbs.co.kr/allvod/vodProgramDetail.do?pgmId=P460810577',
+ 'info_dict': {
+ '_type': 'playlist',
+ 'id': 'P460810577',
+ },
+ 'playlist_count': 13,
+ }]
+
+ def _real_extract(self, url):
+ program_id = self._match_id(url)
+
+ details = self._download_json(
+ 'https://allvod.sbs.co.kr/allvod/vodProgramDetail/vodProgramDetailAjax.do',
+ program_id, note='Downloading program details',
+ query={
+ 'pgmId': program_id,
+ 'currentCount': '10000',
+ })
+
+ return self.playlist_result(
+ [self.url_result(f'https://allvod.sbs.co.kr/allvod/vodEndPage.do?mdaId={video_id}', SBSCoKrIE)
+ for video_id in traverse_obj(details, ('list', ..., 'mdaId'))], program_id)
+
+
+class SBSCoKrProgramsVodIE(InfoExtractor):
+ IE_NAME = 'sbs.co.kr:programs_vod'
+ _VALID_URL = r'https?://programs\.sbs\.co\.kr/(?:enter|drama|culture|sports|plus|mtv)/(?P<id>[a-z0-9]+)/vods'
+
+ _TESTS = [{
+ 'url': 'https://programs.sbs.co.kr/culture/morningwide/vods/65007',
+ 'info_dict': {
+ '_type': 'playlist',
+ 'id': '00000210215',
+ },
+ 'playlist_mincount': 9782,
+ }, {
+ 'url': 'https://programs.sbs.co.kr/enter/dongsang2/vods/52006',
+ 'info_dict': {
+ '_type': 'playlist',
+ 'id': '22000010476',
+ },
+ 'playlist_mincount': 312,
+ }]
+
+ def _real_extract(self, url):
+ program_slug = self._match_id(url)
+
+ program_id = self._download_json(
+ f'https://static.apis.sbs.co.kr/program-api/1.0/menu/{program_slug}', program_slug,
+ note='Downloading program menu data')['program']['programid']
+
+ return self.url_result(
+ f'https://allvod.sbs.co.kr/allvod/vodProgramDetail.do?pgmId={program_id}', SBSCoKrAllvodProgramIE)
diff --git a/yt_dlp/extractor/screen9.py b/yt_dlp/extractor/screen9.py
new file mode 100644
index 0000000..5ab0b6c
--- /dev/null
+++ b/yt_dlp/extractor/screen9.py
@@ -0,0 +1,62 @@
+from .common import InfoExtractor
+from ..utils import traverse_obj
+
+
+class Screen9IE(InfoExtractor):
+ _VALID_URL = r'https?://(?:\w+\.screen9\.(?:tv|com)|play\.su\.se)/(?:embed|media)/(?P<id>[^?#/]+)'
+ _TESTS = [
+ {
+ 'url': 'https://api.screen9.com/embed/8kTNEjvoXGM33dmWwF0uDA',
+ 'md5': 'd60d23f8980583b930724b01fa6ddb41',
+ 'info_dict': {
+ 'id': '8kTNEjvoXGM33dmWwF0uDA',
+ 'ext': 'mp4',
+ 'title': 'Östersjön i förändrat klimat',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ },
+ },
+ {
+ 'url': 'https://folkhogskolekanalen.screen9.tv/media/gy35PKLHe-5K29RYHga2bw/ett-starkare-samhalle-en-snabbguide-om-sveriges-folkhogskolor',
+ 'md5': 'c9389806e78573ea34fc48b6f94465dc',
+ 'info_dict': {
+ 'id': 'gy35PKLHe-5K29RYHga2bw',
+ 'ext': 'mp4',
+ 'title': 'Ett starkare samhälle - en snabbguide om Sveriges folkhögskolor',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ },
+ },
+ {
+ 'url': 'https://play.su.se/media/H1YA0EYNCxiesrSU1kaRBQ/baltic-breakfast',
+ 'md5': '2b817647c3058002526269deff4c0683',
+ 'info_dict': {
+ 'id': 'H1YA0EYNCxiesrSU1kaRBQ',
+ 'ext': 'mp4',
+ 'title': 'Baltic Breakfast',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ },
+ },
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(f'https://api.screen9.com/embed/{video_id}', video_id)
+ config = self._search_json(r'var\s+config\s*=', webpage, 'config', video_id)
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ traverse_obj(config, ('src', lambda _, v: v['type'] == 'application/x-mpegURL', 'src'), get_all=False),
+ video_id, ext='mp4')
+ formats.append({
+ 'url': traverse_obj(config, ('src', lambda _, v: v['type'] == 'video/mp4', 'src'), get_all=False),
+ 'format': 'mp4',
+ })
+
+ return {
+ 'id': video_id,
+ 'title': traverse_obj(
+ config,
+ ('plugins', (('title', 'title'), ('googleAnalytics', 'title'), ('share', 'mediaTitle'))),
+ get_all=False),
+ 'description': traverse_obj(config, ('plugins', 'title', 'description')),
+ 'thumbnail': traverse_obj(config, ('poster')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
diff --git a/yt_dlp/extractor/screencast.py b/yt_dlp/extractor/screencast.py
new file mode 100644
index 0000000..df5e79b
--- /dev/null
+++ b/yt_dlp/extractor/screencast.py
@@ -0,0 +1,117 @@
+import urllib.request
+
+from .common import InfoExtractor
+from ..compat import compat_parse_qs
+from ..utils import ExtractorError
+
+
+class ScreencastIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?screencast\.com/t/(?P<id>[a-zA-Z0-9]+)'
+ _TESTS = [{
+ 'url': 'http://www.screencast.com/t/3ZEjQXlT',
+ 'md5': '917df1c13798a3e96211dd1561fded83',
+ 'info_dict': {
+ 'id': '3ZEjQXlT',
+ 'ext': 'm4v',
+ 'title': 'Color Measurement with Ocean Optics Spectrometers',
+ 'description': 'md5:240369cde69d8bed61349a199c5fb153',
+ 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$',
+ }
+ }, {
+ 'url': 'http://www.screencast.com/t/V2uXehPJa1ZI',
+ 'md5': 'e8e4b375a7660a9e7e35c33973410d34',
+ 'info_dict': {
+ 'id': 'V2uXehPJa1ZI',
+ 'ext': 'mov',
+ 'title': 'The Amadeus Spectrometer',
+ 'description': 're:^In this video, our friends at.*To learn more about Amadeus, visit',
+ 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$',
+ }
+ }, {
+ 'url': 'http://www.screencast.com/t/aAB3iowa',
+ 'md5': 'dedb2734ed00c9755761ccaee88527cd',
+ 'info_dict': {
+ 'id': 'aAB3iowa',
+ 'ext': 'mp4',
+ 'title': 'Google Earth Export',
+ 'description': 'Provides a demo of a CommunityViz export to Google Earth, one of the 3D viewing options.',
+ 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$',
+ }
+ }, {
+ 'url': 'http://www.screencast.com/t/X3ddTrYh',
+ 'md5': '669ee55ff9c51988b4ebc0877cc8b159',
+ 'info_dict': {
+ 'id': 'X3ddTrYh',
+ 'ext': 'wmv',
+ 'title': 'Toolkit 6 User Group Webinar (2014-03-04) - Default Judgment and First Impression',
+ 'description': 'md5:7b9f393bc92af02326a5c5889639eab0',
+ 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$',
+ }
+ }, {
+ 'url': 'http://screencast.com/t/aAB3iowa',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ video_url = self._html_search_regex(
+ r'<embed name="Video".*?src="([^"]+)"', webpage,
+ 'QuickTime embed', default=None)
+
+ if video_url is None:
+ flash_vars_s = self._html_search_regex(
+ r'<param name="flashVars" value="([^"]+)"', webpage, 'flash vars',
+ default=None)
+ if not flash_vars_s:
+ flash_vars_s = self._html_search_regex(
+ r'<param name="initParams" value="([^"]+)"', webpage, 'flash vars',
+ default=None)
+ if flash_vars_s:
+ flash_vars_s = flash_vars_s.replace(',', '&')
+ if flash_vars_s:
+ flash_vars = compat_parse_qs(flash_vars_s)
+ video_url_raw = urllib.request.quote(
+ flash_vars['content'][0])
+ video_url = video_url_raw.replace('http%3A', 'http:')
+
+ if video_url is None:
+ video_meta = self._html_search_meta(
+ 'og:video', webpage, default=None)
+ if video_meta:
+ video_url = self._search_regex(
+ r'src=(.*?)(?:$|&)', video_meta,
+ 'meta tag video URL', default=None)
+
+ if video_url is None:
+ video_url = self._html_search_regex(
+ r'MediaContentUrl["\']\s*:(["\'])(?P<url>(?:(?!\1).)+)\1',
+ webpage, 'video url', default=None, group='url')
+
+ if video_url is None:
+ video_url = self._html_search_meta(
+ 'og:video', webpage, default=None)
+
+ if video_url is None:
+ raise ExtractorError('Cannot find video')
+
+ title = self._og_search_title(webpage, default=None)
+ if title is None:
+ title = self._html_search_regex(
+ [r'<b>Title:</b> ([^<]+)</div>',
+ r'class="tabSeperator">></span><span class="tabText">(.+?)<',
+ r'<title>([^<]+)</title>'],
+ webpage, 'title')
+ thumbnail = self._og_search_thumbnail(webpage)
+ description = self._og_search_description(webpage, default=None)
+ if description is None:
+ description = self._html_search_meta('description', webpage)
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ }
diff --git a/yt_dlp/extractor/screencastify.py b/yt_dlp/extractor/screencastify.py
new file mode 100644
index 0000000..3c43043
--- /dev/null
+++ b/yt_dlp/extractor/screencastify.py
@@ -0,0 +1,70 @@
+import urllib.parse
+
+from .common import InfoExtractor
+from ..utils import traverse_obj, update_url_query
+
+
+class ScreencastifyIE(InfoExtractor):
+ _VALID_URL = [
+ r'https?://watch\.screencastify\.com/v/(?P<id>[^/?#]+)',
+ r'https?://app\.screencastify\.com/v[23]/watch/(?P<id>[^/?#]+)',
+ ]
+ _TESTS = [{
+ 'url': 'https://watch.screencastify.com/v/sYVkZip3quLKhHw4Ybk8',
+ 'info_dict': {
+ 'id': 'sYVkZip3quLKhHw4Ybk8',
+ 'ext': 'mp4',
+ 'title': 'Inserting and Aligning the Case Top and Bottom',
+ 'description': '',
+ 'uploader': 'Paul Gunn',
+ 'extra_param_to_segment_url': str,
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ }, {
+ 'url': 'https://app.screencastify.com/v3/watch/J5N7H11wofDN1jZUCr3t',
+ 'info_dict': {
+ 'id': 'J5N7H11wofDN1jZUCr3t',
+ 'ext': 'mp4',
+ 'uploader': 'Scott Piesen',
+ 'description': '',
+ 'title': 'Lesson Recording 1-17 Burrr...',
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ }, {
+ 'url': 'https://app.screencastify.com/v2/watch/BQ26VbUdfbQLhKzkktOk',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ info = self._download_json(
+ f'https://umbrella.svc.screencastify.com/api/umbrellaService/watch/{video_id}', video_id)
+
+ query_string = traverse_obj(info, ('manifest', 'auth', 'query'))
+ query = urllib.parse.parse_qs(query_string)
+ formats = []
+ dash_manifest_url = traverse_obj(info, ('manifest', 'url'))
+ if dash_manifest_url:
+ formats.extend(
+ self._extract_mpd_formats(
+ dash_manifest_url, video_id, mpd_id='dash', query=query, fatal=False))
+ hls_manifest_url = traverse_obj(info, ('manifest', 'hlsUrl'))
+ if hls_manifest_url:
+ formats.extend(
+ self._extract_m3u8_formats(
+ hls_manifest_url, video_id, ext='mp4', m3u8_id='hls', query=query, fatal=False))
+ for f in formats:
+ f['url'] = update_url_query(f['url'], query)
+
+ return {
+ 'id': video_id,
+ 'title': info.get('title'),
+ 'description': info.get('description'),
+ 'uploader': info.get('userName'),
+ 'formats': formats,
+ 'extra_param_to_segment_url': query_string,
+ }
diff --git a/yt_dlp/extractor/screencastomatic.py b/yt_dlp/extractor/screencastomatic.py
new file mode 100644
index 0000000..28e25e9
--- /dev/null
+++ b/yt_dlp/extractor/screencastomatic.py
@@ -0,0 +1,72 @@
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ get_element_by_class,
+ int_or_none,
+ remove_start,
+ strip_or_none,
+ unified_strdate,
+ urlencode_postdata,
+)
+
+
+class ScreencastOMaticIE(InfoExtractor):
+ _VALID_URL = r'https?://screencast-o-matic\.com/(?:(?:watch|player)/|embed\?.*?\bsc=)(?P<id>[0-9a-zA-Z]+)'
+ _TESTS = [{
+ 'url': 'http://screencast-o-matic.com/watch/c2lD3BeOPl',
+ 'md5': '483583cb80d92588f15ccbedd90f0c18',
+ 'info_dict': {
+ 'id': 'c2lD3BeOPl',
+ 'ext': 'mp4',
+ 'title': 'Welcome to 3-4 Philosophy @ DECV!',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'as the title says! also: some general info re 1) VCE philosophy and 2) distance learning.',
+ 'duration': 369,
+ 'upload_date': '20141216',
+ }
+ }, {
+ 'url': 'http://screencast-o-matic.com/player/c2lD3BeOPl',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://screencast-o-matic.com/embed?ff=true&sc=cbV2r4Q5TL&fromPH=true&a=1',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(
+ 'https://screencast-o-matic.com/player/' + video_id, video_id)
+
+ if (self._html_extract_title(webpage) == 'Protected Content'
+ or 'This video is private and requires a password' in webpage):
+ password = self.get_param('videopassword')
+
+ if not password:
+ raise ExtractorError('Password protected video, use --video-password <password>', expected=True)
+
+ form = self._search_regex(
+ r'(?is)<form[^>]*>(?P<form>.+?)</form>', webpage, 'login form', group='form')
+ form_data = self._hidden_inputs(form)
+ form_data.update({
+ 'scPassword': password,
+ })
+
+ webpage = self._download_webpage(
+ 'https://screencast-o-matic.com/player/password', video_id, 'Logging in',
+ data=urlencode_postdata(form_data))
+
+ if '<small class="text-danger">Invalid password</small>' in webpage:
+ raise ExtractorError('Unable to login: Invalid password', expected=True)
+
+ info = self._parse_html5_media_entries(url, webpage, video_id)[0]
+ info.update({
+ 'id': video_id,
+ 'title': get_element_by_class('overlayTitle', webpage),
+ 'description': strip_or_none(get_element_by_class('overlayDescription', webpage)) or None,
+ 'duration': int_or_none(self._search_regex(
+ r'player\.duration\s*=\s*function\(\)\s*{\s*return\s+(\d+);\s*};',
+ webpage, 'duration', default=None)),
+ 'upload_date': unified_strdate(remove_start(
+ get_element_by_class('overlayPublished', webpage), 'Published: ')),
+ })
+ return info
diff --git a/yt_dlp/extractor/scrippsnetworks.py b/yt_dlp/extractor/scrippsnetworks.py
new file mode 100644
index 0000000..3912f77
--- /dev/null
+++ b/yt_dlp/extractor/scrippsnetworks.py
@@ -0,0 +1,155 @@
+import json
+import hashlib
+
+from .aws import AWSIE
+from .anvato import AnvatoIE
+from .common import InfoExtractor
+from ..utils import (
+ smuggle_url,
+ urlencode_postdata,
+ xpath_text,
+)
+
+
+class ScrippsNetworksWatchIE(AWSIE):
+ IE_NAME = 'scrippsnetworks:watch'
+ _VALID_URL = r'''(?x)
+ https?://
+ watch\.
+ (?P<site>geniuskitchen)\.com/
+ (?:
+ player\.[A-Z0-9]+\.html\#|
+ show/(?:[^/]+/){2}|
+ player/
+ )
+ (?P<id>\d+)
+ '''
+ _TESTS = [{
+ 'url': 'http://watch.geniuskitchen.com/player/3787617/Ample-Hills-Ice-Cream-Bike/',
+ 'info_dict': {
+ 'id': '4194875',
+ 'ext': 'mp4',
+ 'title': 'Ample Hills Ice Cream Bike',
+ 'description': 'Courtney Rada churns up a signature GK Now ice cream with The Scoopmaster.',
+ 'uploader': 'ANV',
+ 'upload_date': '20171011',
+ 'timestamp': 1507698000,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [AnvatoIE.ie_key()],
+ 'skip': '404 Not Found',
+ }]
+
+ _SNI_TABLE = {
+ 'geniuskitchen': 'genius',
+ }
+
+ _AWS_API_KEY = 'E7wSQmq0qK6xPrF13WmzKiHo4BQ7tip4pQcSXVl1'
+ _AWS_PROXY_HOST = 'web.api.video.snidigital.com'
+
+ _AWS_USER_AGENT = 'aws-sdk-js/2.80.0 callback'
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ site_id, video_id = mobj.group('site', 'id')
+
+ aws_identity_id_json = json.dumps({
+ 'IdentityId': '%s:7655847c-0ae7-4d9b-80d6-56c062927eb3' % self._AWS_REGION
+ }).encode('utf-8')
+ token = self._download_json(
+ 'https://cognito-identity.%s.amazonaws.com/' % self._AWS_REGION, video_id,
+ data=aws_identity_id_json,
+ headers={
+ 'Accept': '*/*',
+ 'Content-Type': 'application/x-amz-json-1.1',
+ 'Referer': url,
+ 'X-Amz-Content-Sha256': hashlib.sha256(aws_identity_id_json).hexdigest(),
+ 'X-Amz-Target': 'AWSCognitoIdentityService.GetOpenIdToken',
+ 'X-Amz-User-Agent': self._AWS_USER_AGENT,
+ })['Token']
+
+ sts = self._download_xml(
+ 'https://sts.amazonaws.com/', video_id, data=urlencode_postdata({
+ 'Action': 'AssumeRoleWithWebIdentity',
+ 'RoleArn': 'arn:aws:iam::710330595350:role/Cognito_WebAPIUnauth_Role',
+ 'RoleSessionName': 'web-identity',
+ 'Version': '2011-06-15',
+ 'WebIdentityToken': token,
+ }), headers={
+ 'Referer': url,
+ 'X-Amz-User-Agent': self._AWS_USER_AGENT,
+ 'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8',
+ })
+
+ def get(key):
+ return xpath_text(
+ sts, './/{https://sts.amazonaws.com/doc/2011-06-15/}%s' % key,
+ fatal=True)
+
+ mcp_id = self._aws_execute_api({
+ 'uri': '/1/web/brands/%s/episodes/scrid/%s' % (self._SNI_TABLE[site_id], video_id),
+ 'access_key': get('AccessKeyId'),
+ 'secret_key': get('SecretAccessKey'),
+ 'session_token': get('SessionToken'),
+ }, video_id)['results'][0]['mcpId']
+
+ return self.url_result(
+ smuggle_url(
+ 'anvato:anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a:%s' % mcp_id,
+ {'geo_countries': ['US']}),
+ AnvatoIE.ie_key(), video_id=mcp_id)
+
+
+class ScrippsNetworksIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?(?P<site>cookingchanneltv|discovery|(?:diy|food)network|hgtv|travelchannel)\.com/videos/[0-9a-z-]+-(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.cookingchanneltv.com/videos/the-best-of-the-best-0260338',
+ 'info_dict': {
+ 'id': '0260338',
+ 'ext': 'mp4',
+ 'title': 'The Best of the Best',
+ 'description': 'Catch a new episode of MasterChef Canada Tuedsay at 9/8c.',
+ 'timestamp': 1475678834,
+ 'upload_date': '20161005',
+ 'uploader': 'SCNI-SCND',
+ 'tags': 'count:10',
+ 'creator': 'Cooking Channel',
+ 'duration': 29.995,
+ 'chapters': [{'start_time': 0.0, 'end_time': 29.995, 'title': '<Untitled Chapter 1>'}],
+ 'thumbnail': 'https://images.dds.discovery.com/up/tp/Scripps_-_Food_Category_Prod/122/987/0260338_630x355.jpg',
+ },
+ 'add_ie': ['ThePlatform'],
+ 'expected_warnings': ['No HLS formats found'],
+ }, {
+ 'url': 'https://www.diynetwork.com/videos/diy-barnwood-tablet-stand-0265790',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.foodnetwork.com/videos/chocolate-strawberry-cake-roll-7524591',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.hgtv.com/videos/cookie-decorating-101-0301929',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.travelchannel.com/videos/two-climates-one-bag-5302184',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.discovery.com/videos/guardians-of-the-glades-cooking-with-tom-cobb-5578368',
+ 'only_matching': True,
+ }]
+ _ACCOUNT_MAP = {
+ 'cookingchanneltv': 2433005105,
+ 'discovery': 2706091867,
+ 'diynetwork': 2433004575,
+ 'foodnetwork': 2433005105,
+ 'hgtv': 2433004575,
+ 'travelchannel': 2433005739,
+ }
+ _TP_TEMPL = 'https://link.theplatform.com/s/ip77QC/media/guid/%d/%s?mbr=true'
+
+ def _real_extract(self, url):
+ site, guid = self._match_valid_url(url).groups()
+ return self.url_result(smuggle_url(
+ self._TP_TEMPL % (self._ACCOUNT_MAP[site], guid),
+ {'force_smil_url': True}), 'ThePlatform', guid)
diff --git a/yt_dlp/extractor/scrolller.py b/yt_dlp/extractor/scrolller.py
new file mode 100644
index 0000000..4f9fa14
--- /dev/null
+++ b/yt_dlp/extractor/scrolller.py
@@ -0,0 +1,102 @@
+import json
+
+from .common import InfoExtractor
+from ..utils import determine_ext, int_or_none
+
+
+class ScrolllerIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?scrolller\.com/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://scrolller.com/a-helping-hand-1k9pxikxkw',
+ 'info_dict': {
+ 'id': 'a-helping-hand-1k9pxikxkw',
+ 'ext': 'mp4',
+ 'thumbnail': 'https://zepto.scrolller.com/a-helping-hand-3ty9q8x094-540x960.jpg',
+ 'title': 'A helping hand',
+ 'age_limit': 0,
+ }
+ }, {
+ 'url': 'https://scrolller.com/tigers-chasing-a-drone-c5d1f2so6j',
+ 'info_dict': {
+ 'id': 'tigers-chasing-a-drone-c5d1f2so6j',
+ 'ext': 'mp4',
+ 'thumbnail': 'https://zepto.scrolller.com/tigers-chasing-a-drone-az9pkpguwe-540x303.jpg',
+ 'title': 'Tigers chasing a drone',
+ 'age_limit': 0,
+ }
+ }, {
+ 'url': 'https://scrolller.com/baby-rhino-smells-something-9chhugsv9p',
+ 'info_dict': {
+ 'id': 'baby-rhino-smells-something-9chhugsv9p',
+ 'ext': 'mp4',
+ 'thumbnail': 'https://atto.scrolller.com/hmm-whats-that-smell-bh54mf2c52-300x224.jpg',
+ 'title': 'Baby rhino smells something',
+ 'age_limit': 0,
+ }
+ }, {
+ 'url': 'https://scrolller.com/its-all-fun-and-games-cco8jjmoh7',
+ 'info_dict': {
+ 'id': 'its-all-fun-and-games-cco8jjmoh7',
+ 'ext': 'mp4',
+ 'thumbnail': 'https://atto.scrolller.com/its-all-fun-and-games-3amk9vg7m3-540x649.jpg',
+ 'title': 'It\'s all fun and games...',
+ 'age_limit': 0,
+ }
+ }, {
+ 'url': 'https://scrolller.com/may-the-force-be-with-you-octokuro-yeytg1fs7a',
+ 'info_dict': {
+ 'id': 'may-the-force-be-with-you-octokuro-yeytg1fs7a',
+ 'ext': 'mp4',
+ 'thumbnail': 'https://thumbs2.redgifs.com/DarkStarchyNautilus-poster.jpg',
+ 'title': 'May the force be with you (Octokuro)',
+ 'age_limit': 18,
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ query = {
+ 'query': '''{
+ getSubredditPost(url:"/%s"){
+ id
+ title
+ isNsfw
+ mediaSources{
+ url
+ width
+ height
+ }
+ }
+ }''' % video_id
+ }
+
+ video_data = self._download_json(
+ 'https://api.scrolller.com/api/v2/graphql', video_id, data=json.dumps(query).encode(),
+ headers={'Content-Type': 'application/json'})['data']['getSubredditPost']
+
+ formats, thumbnails = [], []
+ for source in video_data['mediaSources']:
+ if determine_ext(source.get('url')) in ('jpg', 'png'):
+ thumbnails.append({
+ 'url': source['url'],
+ 'width': int_or_none(source.get('width')),
+ 'height': int_or_none(source.get('height')),
+ })
+ elif source.get('url'):
+ formats.append({
+ 'url': source['url'],
+ 'width': int_or_none(source.get('width')),
+ 'height': int_or_none(source.get('height')),
+ })
+
+ if not formats:
+ self.raise_no_formats('There is no video.', expected=True, video_id=video_id)
+
+ return {
+ 'id': video_id,
+ 'title': video_data.get('title'),
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ 'age_limit': 18 if video_data.get('isNsfw') else 0
+ }
diff --git a/yt_dlp/extractor/scte.py b/yt_dlp/extractor/scte.py
new file mode 100644
index 0000000..9c2ca8c
--- /dev/null
+++ b/yt_dlp/extractor/scte.py
@@ -0,0 +1,137 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ decode_packed_codes,
+ ExtractorError,
+ urlencode_postdata,
+)
+
+
+class SCTEBaseIE(InfoExtractor):
+ _LOGIN_URL = 'https://www.scte.org/SCTE/Sign_In.aspx'
+ _NETRC_MACHINE = 'scte'
+
+ def _perform_login(self, username, password):
+ login_popup = self._download_webpage(
+ self._LOGIN_URL, None, 'Downloading login popup')
+
+ def is_logged(webpage):
+ return any(re.search(p, webpage) for p in (
+ r'class=["\']welcome\b', r'>Sign Out<'))
+
+ # already logged in
+ if is_logged(login_popup):
+ return
+
+ login_form = self._hidden_inputs(login_popup)
+
+ login_form.update({
+ 'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$signInUserName': username,
+ 'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$signInPassword': password,
+ 'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$RememberMe': 'on',
+ })
+
+ response = self._download_webpage(
+ self._LOGIN_URL, None, 'Logging in',
+ data=urlencode_postdata(login_form))
+
+ if '|pageRedirect|' not in response and not is_logged(response):
+ error = self._html_search_regex(
+ r'(?s)<[^>]+class=["\']AsiError["\'][^>]*>(.+?)</',
+ response, 'error message', default=None)
+ if error:
+ raise ExtractorError('Unable to login: %s' % error, expected=True)
+ raise ExtractorError('Unable to log in')
+
+
+class SCTEIE(SCTEBaseIE):
+ _WORKING = False
+ _VALID_URL = r'https?://learning\.scte\.org/mod/scorm/view\.php?.*?\bid=(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://learning.scte.org/mod/scorm/view.php?id=31484',
+ 'info_dict': {
+ 'title': 'Introduction to DOCSIS Engineering Professional',
+ 'id': '31484',
+ },
+ 'playlist_count': 5,
+ 'skip': 'Requires account credentials',
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._search_regex(r'<h1>(.+?)</h1>', webpage, 'title')
+
+ context_id = self._search_regex(r'context-(\d+)', webpage, video_id)
+ content_base = 'https://learning.scte.org/pluginfile.php/%s/mod_scorm/content/8/' % context_id
+ context = decode_packed_codes(self._download_webpage(
+ '%smobile/data.js' % content_base, video_id))
+
+ data = self._parse_xml(
+ self._search_regex(
+ r'CreateData\(\s*"(.+?)"', context, 'data').replace(r"\'", "'"),
+ video_id)
+
+ entries = []
+ for asset in data.findall('.//asset'):
+ asset_url = asset.get('url')
+ if not asset_url or not asset_url.endswith('.mp4'):
+ continue
+ asset_id = self._search_regex(
+ r'video_([^_]+)_', asset_url, 'asset id', default=None)
+ if not asset_id:
+ continue
+ entries.append({
+ 'id': asset_id,
+ 'title': title,
+ 'url': content_base + asset_url,
+ })
+
+ return self.playlist_result(entries, video_id, title)
+
+
+class SCTECourseIE(SCTEBaseIE):
+ _WORKING = False
+ _VALID_URL = r'https?://learning\.scte\.org/(?:mod/sub)?course/view\.php?.*?\bid=(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://learning.scte.org/mod/subcourse/view.php?id=31491',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://learning.scte.org/course/view.php?id=3639',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://learning.scte.org/course/view.php?id=3073',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ course_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, course_id)
+
+ title = self._search_regex(
+ r'<h1>(.+?)</h1>', webpage, 'title', default=None)
+
+ entries = []
+ for mobj in re.finditer(
+ r'''(?x)
+ <a[^>]+
+ href=(["\'])
+ (?P<url>
+ https?://learning\.scte\.org/mod/
+ (?P<kind>scorm|subcourse)/view\.php?(?:(?!\1).)*?
+ \bid=\d+
+ )
+ ''',
+ webpage):
+ item_url = mobj.group('url')
+ if item_url == url:
+ continue
+ ie = (SCTEIE.ie_key() if mobj.group('kind') == 'scorm'
+ else SCTECourseIE.ie_key())
+ entries.append(self.url_result(item_url, ie=ie))
+
+ return self.playlist_result(entries, course_id, title)
diff --git a/yt_dlp/extractor/sejmpl.py b/yt_dlp/extractor/sejmpl.py
new file mode 100644
index 0000000..29cb015
--- /dev/null
+++ b/yt_dlp/extractor/sejmpl.py
@@ -0,0 +1,218 @@
+import datetime
+
+from .common import InfoExtractor
+from .redge import RedCDNLivxIE
+from ..utils import (
+ clean_html,
+ join_nonempty,
+ js_to_json,
+ strip_or_none,
+ update_url_query,
+)
+from ..utils.traversal import traverse_obj
+
+
+def is_dst(date):
+ last_march = datetime.datetime(date.year, 3, 31)
+ last_october = datetime.datetime(date.year, 10, 31)
+ last_sunday_march = last_march - datetime.timedelta(days=last_march.isoweekday() % 7)
+ last_sunday_october = last_october - datetime.timedelta(days=last_october.isoweekday() % 7)
+ return last_sunday_march.replace(hour=2) <= date <= last_sunday_october.replace(hour=3)
+
+
+def rfc3339_to_atende(date):
+ date = datetime.datetime.fromisoformat(date)
+ date = date + datetime.timedelta(hours=1 if is_dst(date) else 0)
+ return int((date.timestamp() - 978307200) * 1000)
+
+
+class SejmIE(InfoExtractor):
+ _VALID_URL = (
+ r'https?://(?:www\.)?sejm\.gov\.pl/[Ss]ejm(?P<term>\d+)\.nsf/transmisje(?:_arch)?\.xsp(?:\?[^#]*)?#(?P<id>[\dA-F]+)',
+ r'https?://(?:www\.)?sejm\.gov\.pl/[Ss]ejm(?P<term>\d+)\.nsf/transmisje(?:_arch)?\.xsp\?(?:[^#]+&)?unid=(?P<id>[\dA-F]+)',
+ r'https?://sejm-embed\.redcdn\.pl/[Ss]ejm(?P<term>\d+)\.nsf/VideoFrame\.xsp/(?P<id>[\dA-F]+)',
+ )
+ IE_NAME = 'sejm'
+
+ _TESTS = [{
+ # multiple cameras, polish SL iterpreter
+ 'url': 'https://www.sejm.gov.pl/Sejm10.nsf/transmisje_arch.xsp#6181EF1AD9CEEBB5C1258A6D006452B5',
+ 'info_dict': {
+ 'id': '6181EF1AD9CEEBB5C1258A6D006452B5',
+ 'title': '1. posiedzenie Sejmu X kadencji',
+ 'duration': 20145,
+ 'live_status': 'was_live',
+ 'location': 'Sala Posiedzeń',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': 'ENC01-722340000000-722360145000',
+ 'ext': 'mp4',
+ 'duration': 20145,
+ 'title': '1. posiedzenie Sejmu X kadencji - ENC01',
+ 'live_status': 'was_live',
+ },
+ }, {
+ 'info_dict': {
+ 'id': 'ENC30-722340000000-722360145000',
+ 'ext': 'mp4',
+ 'duration': 20145,
+ 'title': '1. posiedzenie Sejmu X kadencji - ENC30',
+ 'live_status': 'was_live',
+ },
+ }, {
+ 'info_dict': {
+ 'id': 'ENC31-722340000000-722360145000',
+ 'ext': 'mp4',
+ 'duration': 20145,
+ 'title': '1. posiedzenie Sejmu X kadencji - ENC31',
+ 'live_status': 'was_live',
+ },
+ }, {
+ 'info_dict': {
+ 'id': 'ENC32-722340000000-722360145000',
+ 'ext': 'mp4',
+ 'duration': 20145,
+ 'title': '1. posiedzenie Sejmu X kadencji - ENC32',
+ 'live_status': 'was_live',
+ },
+ }, {
+ # sign lang interpreter
+ 'info_dict': {
+ 'id': 'Migacz-ENC01-1-722340000000-722360145000',
+ 'ext': 'mp4',
+ 'duration': 20145,
+ 'title': '1. posiedzenie Sejmu X kadencji - Migacz-ENC01',
+ 'live_status': 'was_live',
+ },
+ }],
+ }, {
+ 'url': 'https://www.sejm.gov.pl/Sejm8.nsf/transmisje.xsp?unid=9377A9D65518E9A5C125808E002E9FF2',
+ 'info_dict': {
+ 'id': '9377A9D65518E9A5C125808E002E9FF2',
+ 'title': 'Debata "Lepsza Polska: obywatelska"',
+ 'description': 'KP .Nowoczesna',
+ 'duration': 8770,
+ 'live_status': 'was_live',
+ 'location': 'sala kolumnowa im. Kazimierza Pużaka (bud. C-D)',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': 'ENC08-1-503831270000-503840040000',
+ 'ext': 'mp4',
+ 'duration': 8770,
+ 'title': 'Debata "Lepsza Polska: obywatelska" - ENC08',
+ 'live_status': 'was_live',
+ },
+ }],
+ }, {
+ # 7th term is very special, since it does not use redcdn livx
+ 'url': 'https://www.sejm.gov.pl/sejm7.nsf/transmisje_arch.xsp?rok=2015&month=11#A6E6D475ECCC6FE5C1257EF90034817F',
+ 'info_dict': {
+ 'id': 'A6E6D475ECCC6FE5C1257EF90034817F',
+ 'title': 'Konferencja prasowa - Stanowisko SLD ws. składu nowego rządu',
+ 'description': 'SLD - Biuro Prasowe Klubu',
+ 'duration': 514,
+ 'location': 'sala 101/bud. C',
+ 'live_status': 'was_live',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': 'A6E6D475ECCC6FE5C1257EF90034817F',
+ 'ext': 'mp4',
+ 'title': 'Konferencja prasowa - Stanowisko SLD ws. składu nowego rządu',
+ 'duration': 514,
+ },
+ }],
+ }, {
+ 'url': 'https://sejm-embed.redcdn.pl/Sejm10.nsf/VideoFrame.xsp/FED58EABB97FBD53C1258A7400386492',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ term, video_id = self._match_valid_url(url).group('term', 'id')
+ frame = self._download_webpage(
+ f'https://sejm-embed.redcdn.pl/Sejm{term}.nsf/VideoFrame.xsp/{video_id}',
+ video_id)
+ # despite it says "transmisje_arch", it works for live streams too!
+ data = self._download_json(
+ f'https://www.sejm.gov.pl/Sejm{term}.nsf/transmisje_arch.xsp/json/{video_id}',
+ video_id)
+ params = data['params']
+
+ title = strip_or_none(data.get('title'))
+
+ if data.get('status') == 'VIDEO_ENDED':
+ live_status = 'was_live'
+ elif data.get('status') == 'VIDEO_PLAYING':
+ live_status = 'is_live'
+ else:
+ live_status = None
+ self.report_warning(f'unknown status: {data.get("status")}')
+
+ start_time = rfc3339_to_atende(params['start'])
+ # current streams have a stop time of *expected* end of session, but actual times
+ # can change during the transmission. setting a stop_time would artificially
+ # end the stream at that time, while the session actually keeps going.
+ if live_status == 'was_live':
+ stop_time = rfc3339_to_atende(params['stop'])
+ duration = (stop_time - start_time) // 1000
+ else:
+ stop_time, duration = None, None
+
+ entries = []
+
+ def add_entry(file, legacy_file=False):
+ if not file:
+ return
+ file = self._proto_relative_url(file)
+ if not legacy_file:
+ file = update_url_query(file, {'startTime': start_time})
+ if stop_time is not None:
+ file = update_url_query(file, {'stopTime': stop_time})
+ stream_id = self._search_regex(r'/o2/sejm/([^/]+)/[^./]+\.livx', file, 'stream id')
+ common_info = {
+ 'url': file,
+ 'duration': duration,
+ }
+ if legacy_file:
+ entries.append({
+ **common_info,
+ 'id': video_id,
+ 'title': title,
+ })
+ else:
+ entries.append({
+ **common_info,
+ '_type': 'url_transparent',
+ 'ie_key': RedCDNLivxIE.ie_key(),
+ 'id': stream_id,
+ 'title': join_nonempty(title, stream_id, delim=' - '),
+ })
+
+ cameras = self._search_json(
+ r'var\s+cameras\s*=', frame, 'camera list', video_id,
+ contains_pattern=r'\[(?s:.+)\]', transform_source=js_to_json,
+ fatal=False) or []
+ for camera_file in traverse_obj(cameras, (..., 'file', {dict})):
+ if camera_file.get('flv'):
+ add_entry(camera_file['flv'])
+ elif camera_file.get('mp4'):
+ # this is only a thing in 7th term. no streams before, and starting 8th it's redcdn livx
+ add_entry(camera_file['mp4'], legacy_file=True)
+ else:
+ self.report_warning('Unknown camera stream type found')
+
+ if params.get('mig'):
+ add_entry(self._search_regex(r"var sliUrl\s*=\s*'([^']+)'", frame, 'sign language interpreter url', fatal=False))
+
+ return {
+ '_type': 'playlist',
+ 'entries': entries,
+ 'id': video_id,
+ 'title': title,
+ 'description': clean_html(data.get('desc')) or None,
+ 'duration': duration,
+ 'live_status': live_status,
+ 'location': strip_or_none(data.get('location')),
+ }
diff --git a/yt_dlp/extractor/senalcolombia.py b/yt_dlp/extractor/senalcolombia.py
new file mode 100644
index 0000000..b2f354f
--- /dev/null
+++ b/yt_dlp/extractor/senalcolombia.py
@@ -0,0 +1,32 @@
+from .common import InfoExtractor
+from .rtvcplay import RTVCKalturaIE
+
+
+class SenalColombiaLiveIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:www\.)?senalcolombia\.tv/(?P<id>senal-en-vivo)'
+
+ _TESTS = [{
+ 'url': 'https://www.senalcolombia.tv/senal-en-vivo',
+ 'info_dict': {
+ 'id': 'indexSC',
+ 'title': 're:^Señal Colombia',
+ 'description': 'md5:799f16a401d97f40c33a2c6a3e2a507b',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ 'live_status': 'is_live',
+ 'ext': 'mp4',
+ },
+ 'params': {
+ 'skip_download': 'Livestream',
+ },
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ hydration = self._search_json(
+ r'<script\b[^>]*data-drupal-selector\s*=\s*"[^"]*drupal-settings-json[^"]*"[^>]*>',
+ webpage, 'hydration', display_id)
+
+ return self.url_result(hydration['envivosrc'], RTVCKalturaIE, display_id)
diff --git a/yt_dlp/extractor/senategov.py b/yt_dlp/extractor/senategov.py
new file mode 100644
index 0000000..7ff0cf5
--- /dev/null
+++ b/yt_dlp/extractor/senategov.py
@@ -0,0 +1,200 @@
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_parse_qs,
+ compat_urlparse,
+)
+from ..utils import (
+ ExtractorError,
+ parse_qs,
+ unsmuggle_url,
+)
+
+_COMMITTEES = {
+ 'ag': ('76440', 'http://ag-f.akamaihd.net'),
+ 'aging': ('76442', 'http://aging-f.akamaihd.net'),
+ 'approps': ('76441', 'http://approps-f.akamaihd.net'),
+ 'arch': ('', 'http://ussenate-f.akamaihd.net'),
+ 'armed': ('76445', 'http://armed-f.akamaihd.net'),
+ 'banking': ('76446', 'http://banking-f.akamaihd.net'),
+ 'budget': ('76447', 'http://budget-f.akamaihd.net'),
+ 'cecc': ('76486', 'http://srs-f.akamaihd.net'),
+ 'commerce': ('80177', 'http://commerce1-f.akamaihd.net'),
+ 'csce': ('75229', 'http://srs-f.akamaihd.net'),
+ 'dpc': ('76590', 'http://dpc-f.akamaihd.net'),
+ 'energy': ('76448', 'http://energy-f.akamaihd.net'),
+ 'epw': ('76478', 'http://epw-f.akamaihd.net'),
+ 'ethics': ('76449', 'http://ethics-f.akamaihd.net'),
+ 'finance': ('76450', 'http://finance-f.akamaihd.net'),
+ 'foreign': ('76451', 'http://foreign-f.akamaihd.net'),
+ 'govtaff': ('76453', 'http://govtaff-f.akamaihd.net'),
+ 'help': ('76452', 'http://help-f.akamaihd.net'),
+ 'indian': ('76455', 'http://indian-f.akamaihd.net'),
+ 'intel': ('76456', 'http://intel-f.akamaihd.net'),
+ 'intlnarc': ('76457', 'http://intlnarc-f.akamaihd.net'),
+ 'jccic': ('85180', 'http://jccic-f.akamaihd.net'),
+ 'jec': ('76458', 'http://jec-f.akamaihd.net'),
+ 'judiciary': ('76459', 'http://judiciary-f.akamaihd.net'),
+ 'rpc': ('76591', 'http://rpc-f.akamaihd.net'),
+ 'rules': ('76460', 'http://rules-f.akamaihd.net'),
+ 'saa': ('76489', 'http://srs-f.akamaihd.net'),
+ 'smbiz': ('76461', 'http://smbiz-f.akamaihd.net'),
+ 'srs': ('75229', 'http://srs-f.akamaihd.net'),
+ 'uscc': ('76487', 'http://srs-f.akamaihd.net'),
+ 'vetaff': ('76462', 'http://vetaff-f.akamaihd.net'),
+}
+
+
+class SenateISVPIE(InfoExtractor):
+ _IE_NAME = 'senate.gov:isvp'
+ _VALID_URL = r'https?://(?:www\.)?senate\.gov/isvp/?\?(?P<qs>.+)'
+ _EMBED_REGEX = [r"<iframe[^>]+src=['\"](?P<url>https?://www\.senate\.gov/isvp/?\?[^'\"]+)['\"]"]
+
+ _TESTS = [{
+ 'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png',
+ 'info_dict': {
+ 'id': 'judiciary031715',
+ 'ext': 'mp4',
+ 'title': 'Integrated Senate Video Player',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.senate.gov/isvp/?type=live&comm=commerce&filename=commerce011514.mp4&auto_play=false',
+ 'info_dict': {
+ 'id': 'commerce011514',
+ 'ext': 'mp4',
+ 'title': 'Integrated Senate Video Player'
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.senate.gov/isvp/?type=arch&comm=intel&filename=intel090613&hc_location=ufi',
+ # checksum differs each time
+ 'info_dict': {
+ 'id': 'intel090613',
+ 'ext': 'mp4',
+ 'title': 'Integrated Senate Video Player'
+ }
+ }, {
+ # From http://www.c-span.org/video/?96791-1
+ 'url': 'http://www.senate.gov/isvp?type=live&comm=banking&filename=banking012715',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+
+ qs = compat_parse_qs(self._match_valid_url(url).group('qs'))
+ if not qs.get('filename') or not qs.get('type') or not qs.get('comm'):
+ raise ExtractorError('Invalid URL', expected=True)
+
+ video_id = re.sub(r'.mp4$', '', qs['filename'][0])
+
+ webpage = self._download_webpage(url, video_id)
+
+ if smuggled_data.get('force_title'):
+ title = smuggled_data['force_title']
+ else:
+ title = self._html_extract_title(webpage)
+ poster = qs.get('poster')
+ thumbnail = poster[0] if poster else None
+
+ video_type = qs['type'][0]
+ committee = video_type if video_type == 'arch' else qs['comm'][0]
+
+ stream_num, domain = _COMMITTEES[committee]
+
+ formats = []
+ if video_type == 'arch':
+ filename = video_id if '.' in video_id else video_id + '.mp4'
+ m3u8_url = compat_urlparse.urljoin(domain, 'i/' + filename + '/master.m3u8')
+ formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', m3u8_id='m3u8')
+ else:
+ hdcore_sign = 'hdcore=3.1.0'
+ url_params = (domain, video_id, stream_num)
+ f4m_url = f'%s/z/%s_1@%s/manifest.f4m?{hdcore_sign}' % url_params
+ m3u8_url = '%s/i/%s_1@%s/master.m3u8' % url_params
+ for entry in self._extract_f4m_formats(f4m_url, video_id, f4m_id='f4m'):
+ # URLs without the extra param induce an 404 error
+ entry.update({'extra_param_to_segment_url': hdcore_sign})
+ formats.append(entry)
+ for entry in self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', m3u8_id='m3u8'):
+ mobj = re.search(r'(?P<tag>(?:-p|-b)).m3u8', entry['url'])
+ if mobj:
+ entry['format_id'] += mobj.group('tag')
+ formats.append(entry)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': thumbnail,
+ }
+
+
+class SenateGovIE(InfoExtractor):
+ _IE_NAME = 'senate.gov'
+ _VALID_URL = r'https?:\/\/(?:www\.)?(help|appropriations|judiciary|banking|armed-services|finance)\.senate\.gov'
+ _TESTS = [{
+ 'url': 'https://www.help.senate.gov/hearings/vaccines-saving-lives-ensuring-confidence-and-protecting-public-health',
+ 'info_dict': {
+ 'id': 'help090920',
+ 'display_id': 'vaccines-saving-lives-ensuring-confidence-and-protecting-public-health',
+ 'title': 'Vaccines: Saving Lives, Ensuring Confidence, and Protecting Public Health',
+ 'description': 'The U.S. Senate Committee on Health, Education, Labor & Pensions',
+ 'ext': 'mp4',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://www.appropriations.senate.gov/hearings/watch?hearingid=B8A25434-5056-A066-6020-1F68CB75F0CD',
+ 'info_dict': {
+ 'id': 'appropsA051518',
+ 'display_id': 'watch?hearingid=B8A25434-5056-A066-6020-1F68CB75F0CD',
+ 'title': 'Review of the FY2019 Budget Request for the U.S. Army',
+ 'ext': 'mp4',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://www.banking.senate.gov/hearings/21st-century-communities-public-transportation-infrastructure-investment-and-fast-act-reauthorization',
+ 'info_dict': {
+ 'id': 'banking041521',
+ 'display_id': '21st-century-communities-public-transportation-infrastructure-investment-and-fast-act-reauthorization',
+ 'title': '21st Century Communities: Public Transportation Infrastructure Investment and FAST Act Reauthorization',
+ 'description': 'The Official website of The United States Committee on Banking, Housing, and Urban Affairs',
+ 'ext': 'mp4',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._generic_id(url)
+ webpage = self._download_webpage(url, display_id)
+ parse_info = parse_qs(self._search_regex(
+ r'<iframe class="[^>"]*streaminghearing[^>"]*"\s[^>]*\bsrc="([^">]*)', webpage, 'hearing URL'))
+
+ stream_num, stream_domain = _COMMITTEES[parse_info['comm'][-1]]
+ filename = parse_info['filename'][-1]
+
+ formats = self._extract_m3u8_formats(
+ f'{stream_domain}/i/{filename}_1@{stream_num}/master.m3u8',
+ display_id, ext='mp4')
+
+ title = self._html_search_regex(
+ (*self._og_regexes('title'), r'(?s)<title>([^<]*?)</title>'), webpage, 'video title')
+
+ return {
+ 'id': re.sub(r'.mp4$', '', filename),
+ 'display_id': display_id,
+ 'title': re.sub(r'\s+', ' ', title.split('|')[0]).strip(),
+ 'description': self._og_search_description(webpage, default=None),
+ 'thumbnail': self._og_search_thumbnail(webpage, default=None),
+ 'age_limit': self._rta_search(webpage),
+ 'formats': formats
+ }
diff --git a/yt_dlp/extractor/sendtonews.py b/yt_dlp/extractor/sendtonews.py
new file mode 100644
index 0000000..1ecea71
--- /dev/null
+++ b/yt_dlp/extractor/sendtonews.py
@@ -0,0 +1,105 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ float_or_none,
+ parse_iso8601,
+ update_url_query,
+ int_or_none,
+ determine_protocol,
+ unescapeHTML,
+)
+
+
+class SendtoNewsIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://embed\.sendtonews\.com/player2/embedplayer\.php\?.*\bSC=(?P<id>[0-9A-Za-z-]+)'
+
+ _TEST = {
+ # From http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/
+ 'url': 'http://embed.sendtonews.com/player2/embedplayer.php?SC=GxfCe0Zo7D-175909-5588&type=single&autoplay=on&sound=YES',
+ 'info_dict': {
+ 'id': 'GxfCe0Zo7D-175909-5588'
+ },
+ 'playlist_count': 8,
+ # test the first video only to prevent lengthy tests
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '240385',
+ 'ext': 'mp4',
+ 'title': 'Indians introduce Encarnacion',
+ 'description': 'Indians president of baseball operations Chris Antonetti and Edwin Encarnacion discuss the slugger\'s three-year contract with Cleveland',
+ 'duration': 137.898,
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'upload_date': '20170105',
+ 'timestamp': 1483649762,
+ },
+ }],
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }
+
+ _URL_TEMPLATE = '//embed.sendtonews.com/player2/embedplayer.php?SC=%s'
+
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ mobj = re.search(r'''(?x)<script[^>]+src=([\'"])
+ (?:https?:)?//embed\.sendtonews\.com/player/responsiveembed\.php\?
+ .*\bSC=(?P<SC>[0-9a-zA-Z-]+).*
+ \1>''', webpage)
+ if mobj:
+ sc = mobj.group('SC')
+ yield cls._URL_TEMPLATE % sc
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ data_url = update_url_query(
+ url.replace('embedplayer.php', 'data_read.php'),
+ {'cmd': 'loadInitial'})
+ playlist_data = self._download_json(data_url, playlist_id)
+
+ entries = []
+ for video in playlist_data['playlistData'][0]:
+ info_dict = self._parse_jwplayer_data(
+ video['jwconfiguration'],
+ require_title=False, m3u8_id='hls', rtmp_params={'no_resume': True})
+
+ for f in info_dict['formats']:
+ if f.get('tbr'):
+ continue
+ tbr = int_or_none(self._search_regex(
+ r'/(\d+)k/', f['url'], 'bitrate', default=None))
+ if not tbr:
+ continue
+ f.update({
+ 'format_id': '%s-%d' % (determine_protocol(f), tbr),
+ 'tbr': tbr,
+ })
+
+ thumbnails = []
+ if video.get('thumbnailUrl'):
+ thumbnails.append({
+ 'id': 'normal',
+ 'url': video['thumbnailUrl'],
+ })
+ if video.get('smThumbnailUrl'):
+ thumbnails.append({
+ 'id': 'small',
+ 'url': video['smThumbnailUrl'],
+ })
+ info_dict.update({
+ 'title': video['S_headLine'].strip(),
+ 'description': unescapeHTML(video.get('S_fullStory')),
+ 'thumbnails': thumbnails,
+ 'duration': float_or_none(video.get('SM_length')),
+ 'timestamp': parse_iso8601(video.get('S_sysDate'), delimiter=' '),
+ # 'tbr' was explicitly set to be preferred over 'height' originally,
+ # So this is being kept unless someone can confirm this is unnecessary
+ '_format_sort_fields': ('tbr', 'res')
+ })
+ entries.append(info_dict)
+
+ return self.playlist_result(entries, playlist_id)
diff --git a/yt_dlp/extractor/servus.py b/yt_dlp/extractor/servus.py
new file mode 100644
index 0000000..dda1958
--- /dev/null
+++ b/yt_dlp/extractor/servus.py
@@ -0,0 +1,135 @@
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ format_field,
+ int_or_none,
+ join_nonempty,
+ traverse_obj,
+ unescapeHTML,
+ unified_timestamp,
+)
+
+
+class ServusIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?
+ (?:
+ servus\.com/(?:(?:at|de)/p/[^/]+|tv/videos)|
+ (?:servustv|pm-wissen)\.com/(?:[^/]+/)?v(?:ideos)?
+ )
+ /(?P<id>[aA]{2}-?\w+|\d+-\d+)
+ '''
+ _TESTS = [{
+ # URL schema v3
+ 'url': 'https://www.servustv.com/natur/v/aa-28bycqnh92111/',
+ 'info_dict': {
+ 'id': 'AA-28BYCQNH92111',
+ 'ext': 'mp4',
+ 'title': 'Klettersteige in den Alpen',
+ 'description': 'md5:25e47ddd83a009a0f9789ba18f2850ce',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 2823,
+ 'timestamp': 1655752333,
+ 'upload_date': '20220620',
+ 'series': 'Bergwelten',
+ 'season': 'Season 11',
+ 'season_number': 11,
+ 'episode': 'Episode 8 - Vie Ferrate – Klettersteige in den Alpen',
+ 'episode_number': 8,
+ },
+ 'params': {'skip_download': 'm3u8'}
+ }, {
+ 'url': 'https://www.servustv.com/natur/v/aa-1xg5xwmgw2112/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.servustv.com/natur/v/aansszcx3yi9jmlmhdc1/',
+ 'only_matching': True,
+ }, {
+ # URL schema v2
+ 'url': 'https://www.servustv.com/videos/aa-1t6vbu5pw1w12/',
+ 'only_matching': True,
+ }, {
+ # URL schema v1
+ 'url': 'https://www.servus.com/de/p/Die-Gr%C3%BCnen-aus-Sicht-des-Volkes/AA-1T6VBU5PW1W12/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.servus.com/at/p/Wie-das-Leben-beginnt/1309984137314-381415152/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.servus.com/tv/videos/aa-1t6vbu5pw1w12/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.servus.com/tv/videos/1380889096408-1235196658/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pm-wissen.com/videos/aa-24mus4g2w2112/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url).upper()
+
+ video = self._download_json(
+ 'https://api-player.redbull.com/stv/servus-tv?timeZone=Europe/Berlin',
+ video_id, 'Downloading video JSON', query={'videoId': video_id})
+ if not video.get('videoUrl'):
+ self._report_errors(video)
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ video['videoUrl'], video_id, 'mp4', m3u8_id='hls')
+
+ season = video.get('season')
+ season_number = int_or_none(self._search_regex(
+ r'Season (\d+)', season or '', 'season number', default=None))
+ episode = video.get('chapter')
+ episode_number = int_or_none(self._search_regex(
+ r'Episode (\d+)', episode or '', 'episode number', default=None))
+
+ return {
+ 'id': video_id,
+ 'title': video.get('title'),
+ 'description': self._get_description(video_id) or video.get('description'),
+ 'thumbnail': video.get('poster'),
+ 'duration': float_or_none(video.get('duration')),
+ 'timestamp': unified_timestamp(video.get('currentSunrise')),
+ 'series': video.get('label'),
+ 'season': season,
+ 'season_number': season_number,
+ 'episode': episode,
+ 'episode_number': episode_number,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+ def _get_description(self, video_id):
+ info = self._download_json(
+ f'https://backend.servustv.com/wp-json/rbmh/v2/media_asset/aa_id/{video_id}?fieldset=page',
+ video_id, fatal=False)
+
+ return join_nonempty(*traverse_obj(info, (
+ ('stv_short_description', 'stv_long_description'),
+ {lambda x: unescapeHTML(x.replace('\n\n', '\n'))})), delim='\n\n')
+
+ def _report_errors(self, video):
+ playability_errors = traverse_obj(video, ('playabilityErrors', ...))
+ if not playability_errors:
+ raise ExtractorError('No videoUrl and no information about errors')
+
+ elif 'FSK_BLOCKED' in playability_errors:
+ details = traverse_obj(video, ('playabilityErrorDetails', 'FSK_BLOCKED'), expected_type=dict)
+ message = format_field(''.join((
+ format_field(details, 'minEveningHour', ' from %02d:00'),
+ format_field(details, 'maxMorningHour', ' to %02d:00'),
+ format_field(details, 'minAge', ' (Minimum age %d)'),
+ )), None, 'Only available%s') or 'Blocked by FSK with unknown availability'
+
+ elif 'NOT_YET_AVAILABLE' in playability_errors:
+ message = format_field(
+ video, (('playabilityErrorDetails', 'NOT_YET_AVAILABLE', 'availableFrom'), 'currentSunrise'),
+ 'Only available from %s') or 'Video not yet available with unknown availability'
+
+ else:
+ message = f'Video unavailable: {", ".join(playability_errors)}'
+
+ raise ExtractorError(message, expected=True)
diff --git a/yt_dlp/extractor/sevenplus.py b/yt_dlp/extractor/sevenplus.py
new file mode 100644
index 0000000..6c688d1
--- /dev/null
+++ b/yt_dlp/extractor/sevenplus.py
@@ -0,0 +1,132 @@
+import json
+import re
+
+from .brightcove import BrightcoveNewBaseIE
+from ..compat import compat_str
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ ExtractorError,
+ try_get,
+ update_url_query,
+)
+
+
+class SevenPlusIE(BrightcoveNewBaseIE):
+ IE_NAME = '7plus'
+ _VALID_URL = r'https?://(?:www\.)?7plus\.com\.au/(?P<path>[^?]+\?.*?\bepisode-id=(?P<id>[^&#]+))'
+ _TESTS = [{
+ 'url': 'https://7plus.com.au/MTYS?episode-id=MTYS7-003',
+ 'info_dict': {
+ 'id': 'MTYS7-003',
+ 'ext': 'mp4',
+ 'title': 'S7 E3 - Wind Surf',
+ 'description': 'md5:29c6a69f21accda7601278f81b46483d',
+ 'uploader_id': '5303576322001',
+ 'upload_date': '20171201',
+ 'timestamp': 1512106377,
+ 'series': 'Mighty Ships',
+ 'season_number': 7,
+ 'episode_number': 3,
+ 'episode': 'Wind Surf',
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'https://7plus.com.au/UUUU?episode-id=AUMS43-001',
+ 'only_matching': True,
+ }]
+
+ def _real_initialize(self):
+ self.token = None
+
+ cookies = self._get_cookies('https://7plus.com.au')
+ api_key = next((x for x in cookies if x.startswith('glt_')), '')[4:]
+ if not api_key: # Cookies are signed out, skip login
+ return
+
+ login_resp = self._download_json(
+ 'https://login.7plus.com.au/accounts.getJWT', None, 'Logging in', fatal=False,
+ query={
+ 'APIKey': api_key,
+ 'sdk': 'js_latest',
+ 'login_token': cookies[f'glt_{api_key}'].value,
+ 'authMode': 'cookie',
+ 'pageURL': 'https://7plus.com.au/',
+ 'sdkBuild': '12471',
+ 'format': 'json',
+ }) or {}
+
+ if 'errorMessage' in login_resp:
+ self.report_warning(f'Unable to login: 7plus said: {login_resp["errorMessage"]}')
+ return
+ id_token = login_resp.get('id_token')
+ if not id_token:
+ self.report_warning('Unable to login: Could not extract id token')
+ return
+
+ token_resp = self._download_json(
+ 'https://7plus.com.au/auth/token', None, 'Getting auth token', fatal=False,
+ headers={'Content-Type': 'application/json'}, data=json.dumps({
+ 'idToken': id_token,
+ 'platformId': 'web',
+ 'regSource': '7plus',
+ }).encode('utf-8')) or {}
+ self.token = token_resp.get('token')
+ if not self.token:
+ self.report_warning('Unable to log in: Could not extract auth token')
+
+ def _real_extract(self, url):
+ path, episode_id = self._match_valid_url(url).groups()
+
+ headers = {}
+ if self.token:
+ headers['Authorization'] = f'Bearer {self.token}'
+
+ try:
+ media = self._download_json(
+ 'https://videoservice.swm.digital/playback', episode_id, query={
+ 'appId': '7plus',
+ 'deviceType': 'web',
+ 'platformType': 'web',
+ 'accountId': 5303576322001,
+ 'referenceId': 'ref:' + episode_id,
+ 'deliveryId': 'csai',
+ 'videoType': 'vod',
+ }, headers=headers)['media']
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 403:
+ raise ExtractorError(self._parse_json(
+ e.cause.response.read().decode(), episode_id)[0]['error_code'], expected=True)
+ raise
+
+ for source in media.get('sources', {}):
+ src = source.get('src')
+ if not src:
+ continue
+ source['src'] = update_url_query(src, {'rule': ''})
+
+ info = self._parse_brightcove_metadata(media, episode_id)
+
+ content = self._download_json(
+ 'https://component-cdn.swm.digital/content/' + path,
+ episode_id, headers={
+ 'market-id': 4,
+ }, fatal=False) or {}
+ for item in content.get('items', {}):
+ if item.get('componentData', {}).get('componentType') == 'infoPanel':
+ for src_key, dst_key in [('title', 'title'), ('shortSynopsis', 'description')]:
+ value = item.get(src_key)
+ if value:
+ info[dst_key] = value
+ info['series'] = try_get(
+ item, lambda x: x['seriesLogo']['name'], compat_str)
+ mobj = re.search(r'^S(\d+)\s+E(\d+)\s+-\s+(.+)$', info['title'])
+ if mobj:
+ info.update({
+ 'season_number': int(mobj.group(1)),
+ 'episode_number': int(mobj.group(2)),
+ 'episode': mobj.group(3),
+ })
+
+ return info
diff --git a/yt_dlp/extractor/sexu.py b/yt_dlp/extractor/sexu.py
new file mode 100644
index 0000000..989b63c
--- /dev/null
+++ b/yt_dlp/extractor/sexu.py
@@ -0,0 +1,61 @@
+from .common import InfoExtractor
+
+
+class SexuIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:www\.)?sexu\.com/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://sexu.com/961791/',
+ 'md5': 'ff615aca9691053c94f8f10d96cd7884',
+ 'info_dict': {
+ 'id': '961791',
+ 'ext': 'mp4',
+ 'title': 'md5:4d05a19a5fc049a63dbbaf05fb71d91b',
+ 'description': 'md5:2b75327061310a3afb3fbd7d09e2e403',
+ 'categories': list, # NSFW
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'age_limit': 18,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ jwvideo = self._parse_json(
+ self._search_regex(r'\.setup\(\s*({.+?})\s*\);', webpage, 'jwvideo'),
+ video_id)
+
+ sources = jwvideo['sources']
+
+ formats = [{
+ 'url': source['file'].replace('\\', ''),
+ 'format_id': source.get('label'),
+ 'height': int(self._search_regex(
+ r'^(\d+)[pP]', source.get('label', ''), 'height',
+ default=None)),
+ } for source in sources if source.get('file')]
+
+ title = self._html_search_regex(
+ r'<title>([^<]+)\s*-\s*Sexu\.Com</title>', webpage, 'title')
+
+ description = self._html_search_meta(
+ 'description', webpage, 'description')
+
+ thumbnail = jwvideo.get('image')
+
+ categories_str = self._html_search_meta(
+ 'keywords', webpage, 'categories')
+ categories = (
+ None if categories_str is None
+ else categories_str.split(','))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'categories': categories,
+ 'formats': formats,
+ 'age_limit': 18,
+ }
diff --git a/yt_dlp/extractor/seznamzpravy.py b/yt_dlp/extractor/seznamzpravy.py
new file mode 100644
index 0000000..79e8885
--- /dev/null
+++ b/yt_dlp/extractor/seznamzpravy.py
@@ -0,0 +1,157 @@
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urllib_parse_urlparse,
+)
+from ..utils import (
+ urljoin,
+ int_or_none,
+ parse_codecs,
+ parse_qs,
+ try_get,
+)
+
+
+def _raw_id(src_url):
+ return compat_urllib_parse_urlparse(src_url).path.split('/')[-1]
+
+
+class SeznamZpravyIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?seznamzpravy\.cz/iframe/player\?.*\bsrc='
+ _EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?seznamzpravy\.cz/iframe/player\?.*?)\1']
+ _TESTS = [{
+ 'url': 'https://www.seznamzpravy.cz/iframe/player?duration=241&serviceSlug=zpravy&src=https%3A%2F%2Fv39-a.sdn.szn.cz%2Fv_39%2Fvmd%2F5999c902ea707c67d8e267a9%3Ffl%3Dmdk%2C432f65a0%7C&itemType=video&autoPlay=false&title=Sv%C4%9Bt%20bez%20obalu%3A%20%C4%8Ce%C5%A1t%C3%AD%20voj%C3%A1ci%20na%20mis%C3%ADch%20(kr%C3%A1tk%C3%A1%20verze)&series=Sv%C4%9Bt%20bez%20obalu&serviceName=Seznam%20Zpr%C3%A1vy&poster=%2F%2Fd39-a.sdn.szn.cz%2Fd_39%2Fc_img_F_I%2FR5puJ.jpeg%3Ffl%3Dcro%2C0%2C0%2C1920%2C1080%7Cres%2C1200%2C%2C1%7Cjpg%2C80%2C%2C1&width=1920&height=1080&cutFrom=0&cutTo=0&splVersion=VOD&contentId=170889&contextId=35990&showAdvert=true&collocation=&autoplayPossible=true&embed=&isVideoTooShortForPreroll=false&isVideoTooLongForPostroll=true&videoCommentOpKey=&videoCommentId=&version=4.0.76&dotService=zpravy&gemiusPrismIdentifier=bVc1ZIb_Qax4W2v5xOPGpMeCP31kFfrTzj0SqPTLh_b.Z7&zoneIdPreroll=seznam.pack.videospot&skipOffsetPreroll=5&sectionPrefixPreroll=%2Fzpravy',
+ 'info_dict': {
+ 'id': '170889',
+ 'ext': 'mp4',
+ 'title': 'Svět bez obalu: Čeští vojáci na misích (krátká verze)',
+ 'thumbnail': r're:^https?://.*\.jpe?g',
+ 'duration': 241,
+ 'series': 'Svět bez obalu',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # with Location key
+ 'url': 'https://www.seznamzpravy.cz/iframe/player?duration=null&serviceSlug=zpravy&src=https%3A%2F%2Flive-a.sdn.szn.cz%2Fv_39%2F59e468fe454f8472a96af9fa%3Ffl%3Dmdk%2C5c1e2840%7C&itemType=livevod&autoPlay=false&title=P%C5%99edseda%20KDU-%C4%8CSL%20Pavel%20B%C4%9Blobr%C3%A1dek%20ve%20volebn%C3%AD%20V%C3%BDzv%C4%9B%20Seznamu&series=V%C3%BDzva&serviceName=Seznam%20Zpr%C3%A1vy&poster=%2F%2Fd39-a.sdn.szn.cz%2Fd_39%2Fc_img_G_J%2FjTBCs.jpeg%3Ffl%3Dcro%2C0%2C0%2C1280%2C720%7Cres%2C1200%2C%2C1%7Cjpg%2C80%2C%2C1&width=16&height=9&cutFrom=0&cutTo=0&splVersion=VOD&contentId=185688&contextId=38489&showAdvert=true&collocation=&hideFullScreen=false&hideSubtitles=false&embed=&isVideoTooShortForPreroll=false&isVideoTooShortForPreroll2=false&isVideoTooLongForPostroll=false&fakePostrollZoneID=seznam.clanky.zpravy.preroll&fakePrerollZoneID=seznam.clanky.zpravy.preroll&videoCommentId=&trim=default_16x9&noPrerollVideoLength=30&noPreroll2VideoLength=undefined&noMidrollVideoLength=0&noPostrollVideoLength=999999&autoplayPossible=true&version=5.0.41&dotService=zpravy&gemiusPrismIdentifier=zD3g7byfW5ekpXmxTVLaq5Srjw5i4hsYo0HY1aBwIe..27&zoneIdPreroll=seznam.pack.videospot&skipOffsetPreroll=5&sectionPrefixPreroll=%2Fzpravy%2Fvyzva&zoneIdPostroll=seznam.pack.videospot&skipOffsetPostroll=5&sectionPrefixPostroll=%2Fzpravy%2Fvyzva&regression=false',
+ 'info_dict': {
+ 'id': '185688',
+ 'ext': 'mp4',
+ 'title': 'Předseda KDU-ČSL Pavel Bělobrádek ve volební Výzvě Seznamu',
+ 'thumbnail': r're:^https?://.*\.jpe?g',
+ 'series': 'Výzva',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ def _extract_sdn_formats(self, sdn_url, video_id):
+ sdn_data = self._download_json(sdn_url, video_id)
+
+ if sdn_data.get('Location'):
+ sdn_url = sdn_data['Location']
+ sdn_data = self._download_json(sdn_url, video_id)
+
+ formats = []
+ mp4_formats = try_get(sdn_data, lambda x: x['data']['mp4'], dict) or {}
+ for format_id, format_data in mp4_formats.items():
+ relative_url = format_data.get('url')
+ if not relative_url:
+ continue
+
+ try:
+ width, height = format_data.get('resolution')
+ except (TypeError, ValueError):
+ width, height = None, None
+
+ f = {
+ 'url': urljoin(sdn_url, relative_url),
+ 'format_id': 'http-%s' % format_id,
+ 'tbr': int_or_none(format_data.get('bandwidth'), scale=1000),
+ 'width': int_or_none(width),
+ 'height': int_or_none(height),
+ }
+ f.update(parse_codecs(format_data.get('codec')))
+ formats.append(f)
+
+ pls = sdn_data.get('pls', {})
+
+ def get_url(format_id):
+ return try_get(pls, lambda x: x[format_id]['url'], compat_str)
+
+ dash_rel_url = get_url('dash')
+ if dash_rel_url:
+ formats.extend(self._extract_mpd_formats(
+ urljoin(sdn_url, dash_rel_url), video_id, mpd_id='dash',
+ fatal=False))
+
+ hls_rel_url = get_url('hls')
+ if hls_rel_url:
+ formats.extend(self._extract_m3u8_formats(
+ urljoin(sdn_url, hls_rel_url), video_id, ext='mp4',
+ m3u8_id='hls', fatal=False))
+
+ return formats
+
+ def _real_extract(self, url):
+ params = parse_qs(url)
+
+ src = params['src'][0]
+ title = params['title'][0]
+ video_id = params.get('contentId', [_raw_id(src)])[0]
+ formats = self._extract_sdn_formats(src + 'spl2,2,VOD', video_id)
+
+ duration = int_or_none(params.get('duration', [None])[0])
+ series = params.get('series', [None])[0]
+ thumbnail = params.get('poster', [None])[0]
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'series': series,
+ 'formats': formats,
+ }
+
+
+class SeznamZpravyArticleIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?(?:seznam\.cz/zpravy|seznamzpravy\.cz)/clanek/(?:[^/?#&]+)-(?P<id>\d+)'
+ _API_URL = 'https://apizpravy.seznam.cz/'
+
+ _TESTS = [{
+ # two videos on one page, with SDN URL
+ 'url': 'https://www.seznamzpravy.cz/clanek/jejich-svet-na-nas-utoci-je-lepsi-branit-se-na-jejich-pisecku-rika-reziser-a-major-v-zaloze-marhoul-35990',
+ 'info_dict': {
+ 'id': '35990',
+ 'title': 'md5:6011c877a36905f28f271fcd8dcdb0f2',
+ 'description': 'md5:933f7b06fa337a814ba199d3596d27ba',
+ },
+ 'playlist_count': 2,
+ }, {
+ # video with live stream URL
+ 'url': 'https://www.seznam.cz/zpravy/clanek/znovu-do-vlady-s-ano-pavel-belobradek-ve-volebnim-specialu-seznamu-38489',
+ 'info_dict': {
+ 'id': '38489',
+ 'title': 'md5:8fa1afdc36fd378cf0eba2b74c5aca60',
+ 'description': 'md5:428e7926a1a81986ec7eb23078004fb4',
+ },
+ 'playlist_count': 1,
+ }]
+
+ def _real_extract(self, url):
+ article_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, article_id)
+
+ info = self._search_json_ld(webpage, article_id, default={})
+
+ title = info.get('title') or self._og_search_title(webpage, fatal=False)
+ description = info.get('description') or self._og_search_description(webpage)
+
+ return self.playlist_result([
+ self.url_result(entry_url, ie=SeznamZpravyIE.ie_key())
+ for entry_url in SeznamZpravyIE._extract_embed_urls(url, webpage)],
+ article_id, title, description)
diff --git a/yt_dlp/extractor/shahid.py b/yt_dlp/extractor/shahid.py
new file mode 100644
index 0000000..d509e88
--- /dev/null
+++ b/yt_dlp/extractor/shahid.py
@@ -0,0 +1,217 @@
+import json
+import math
+import re
+
+from .aws import AWSIE
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ clean_html,
+ ExtractorError,
+ InAdvancePagedList,
+ int_or_none,
+ parse_iso8601,
+ str_or_none,
+ urlencode_postdata,
+)
+
+
+class ShahidBaseIE(AWSIE):
+ _AWS_PROXY_HOST = 'api2.shahid.net'
+ _AWS_API_KEY = '2RRtuMHx95aNI1Kvtn2rChEuwsCogUd4samGPjLh'
+ _VALID_URL_BASE = r'https?://shahid\.mbc\.net/[a-z]{2}/'
+
+ def _handle_error(self, e):
+ fail_data = self._parse_json(
+ e.cause.response.read().decode('utf-8'), None, fatal=False)
+ if fail_data:
+ faults = fail_data.get('faults', [])
+ faults_message = ', '.join([clean_html(fault['userMessage']) for fault in faults if fault.get('userMessage')])
+ if faults_message:
+ raise ExtractorError(faults_message, expected=True)
+
+ def _call_api(self, path, video_id, request=None):
+ query = {}
+ if request:
+ query['request'] = json.dumps(request)
+ try:
+ return self._aws_execute_api({
+ 'uri': '/proxy/v2/' + path,
+ 'access_key': 'AKIAI6X4TYCIXM2B7MUQ',
+ 'secret_key': '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',
+ }, video_id, query)
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError):
+ self._handle_error(e)
+ raise
+
+
+class ShahidIE(ShahidBaseIE):
+ _NETRC_MACHINE = 'shahid'
+ _VALID_URL = ShahidBaseIE._VALID_URL_BASE + r'(?:serie|show|movie)s/[^/]+/(?P<type>episode|clip|movie)-(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://shahid.mbc.net/ar/shows/%D9%85%D8%AA%D8%AD%D9%81-%D8%A7%D9%84%D8%AF%D8%AD%D9%8A%D8%AD-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-1/clip-816924',
+ 'info_dict': {
+ 'id': '816924',
+ 'ext': 'mp4',
+ 'title': 'متحف الدحيح الموسم 1 كليب 1',
+ 'timestamp': 1602806400,
+ 'upload_date': '20201016',
+ 'description': 'برومو',
+ 'duration': 22,
+ 'categories': ['كوميديا'],
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'https://shahid.mbc.net/ar/movies/%D8%A7%D9%84%D9%82%D9%86%D8%A7%D8%B5%D8%A9/movie-151746',
+ 'only_matching': True
+ }, {
+ # shahid plus subscriber only
+ 'url': 'https://shahid.mbc.net/ar/series/%D9%85%D8%B1%D8%A7%D9%8A%D8%A7-2011-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/episode-90511',
+ 'only_matching': True
+ }, {
+ 'url': 'https://shahid.mbc.net/en/shows/Ramez-Fi-Al-Shallal-season-1-episode-1/episode-359319',
+ 'only_matching': True
+ }]
+
+ def _perform_login(self, username, password):
+ try:
+ user_data = self._download_json(
+ 'https://shahid.mbc.net/wd/service/users/login',
+ None, 'Logging in', data=json.dumps({
+ 'email': username,
+ 'password': password,
+ 'basic': 'false',
+ }).encode('utf-8'), headers={
+ 'Content-Type': 'application/json; charset=UTF-8',
+ })['user']
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError):
+ self._handle_error(e)
+ raise
+
+ self._download_webpage(
+ 'https://shahid.mbc.net/populateContext',
+ None, 'Populate Context', data=urlencode_postdata({
+ 'firstName': user_data['firstName'],
+ 'lastName': user_data['lastName'],
+ 'userName': user_data['email'],
+ 'csg_user_name': user_data['email'],
+ 'subscriberId': user_data['id'],
+ 'sessionId': user_data['sessionId'],
+ }))
+
+ def _real_extract(self, url):
+ page_type, video_id = self._match_valid_url(url).groups()
+ if page_type == 'clip':
+ page_type = 'episode'
+
+ playout = self._call_api(
+ 'playout/new/url/' + video_id, video_id)['playout']
+
+ if not self.get_param('allow_unplayable_formats') and playout.get('drm'):
+ self.report_drm(video_id)
+
+ formats = self._extract_m3u8_formats(re.sub(
+ # https://docs.aws.amazon.com/mediapackage/latest/ug/manifest-filtering.html
+ r'aws\.manifestfilter=[\w:;,-]+&?',
+ '', playout['url']), video_id, 'mp4')
+
+ # video = self._call_api(
+ # 'product/id', video_id, {
+ # 'id': video_id,
+ # 'productType': 'ASSET',
+ # 'productSubType': page_type.upper()
+ # })['productModel']
+
+ response = self._download_json(
+ 'http://api.shahid.net/api/v1_1/%s/%s' % (page_type, video_id),
+ video_id, 'Downloading video JSON', query={
+ 'apiKey': 'sh@hid0nlin3',
+ 'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=',
+ })
+ data = response.get('data', {})
+ error = data.get('error')
+ if error:
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, '\n'.join(error.values())),
+ expected=True)
+
+ video = data[page_type]
+ title = video['title']
+ categories = [
+ category['name']
+ for category in video.get('genres', []) if 'name' in category]
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video.get('description'),
+ 'thumbnail': video.get('thumbnailUrl'),
+ 'duration': int_or_none(video.get('duration')),
+ 'timestamp': parse_iso8601(video.get('referenceDate')),
+ 'categories': categories,
+ 'series': video.get('showTitle') or video.get('showName'),
+ 'season': video.get('seasonTitle'),
+ 'season_number': int_or_none(video.get('seasonNumber')),
+ 'season_id': str_or_none(video.get('seasonId')),
+ 'episode_number': int_or_none(video.get('number')),
+ 'episode_id': video_id,
+ 'formats': formats,
+ }
+
+
+class ShahidShowIE(ShahidBaseIE):
+ _VALID_URL = ShahidBaseIE._VALID_URL_BASE + r'(?:show|serie)s/[^/]+/(?:show|series)-(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://shahid.mbc.net/ar/shows/%D8%B1%D8%A7%D9%85%D8%B2-%D9%82%D8%B1%D8%B4-%D8%A7%D9%84%D8%A8%D8%AD%D8%B1/show-79187',
+ 'info_dict': {
+ 'id': '79187',
+ 'title': 'رامز قرش البحر',
+ 'description': 'md5:c88fa7e0f02b0abd39d417aee0d046ff',
+ },
+ 'playlist_mincount': 32,
+ }, {
+ 'url': 'https://shahid.mbc.net/ar/series/How-to-live-Longer-(The-Big-Think)/series-291861',
+ 'only_matching': True
+ }]
+ _PAGE_SIZE = 30
+
+ def _real_extract(self, url):
+ show_id = self._match_id(url)
+
+ product = self._call_api(
+ 'playableAsset', show_id, {'showId': show_id})['productModel']
+ playlist = product['playlist']
+ playlist_id = playlist['id']
+ show = product.get('show', {})
+
+ def page_func(page_num):
+ playlist = self._call_api(
+ 'product/playlist', show_id, {
+ 'playListId': playlist_id,
+ 'pageNumber': page_num,
+ 'pageSize': 30,
+ 'sorts': [{
+ 'order': 'DESC',
+ 'type': 'SORTDATE'
+ }],
+ })
+ for product in playlist.get('productList', {}).get('products', []):
+ product_url = product.get('productUrl', []).get('url')
+ if not product_url:
+ continue
+ yield self.url_result(
+ product_url, 'Shahid',
+ str_or_none(product.get('id')),
+ product.get('title'))
+
+ entries = InAdvancePagedList(
+ page_func,
+ math.ceil(playlist['count'] / self._PAGE_SIZE),
+ self._PAGE_SIZE)
+
+ return self.playlist_result(
+ entries, show_id, show.get('title'), show.get('description'))
diff --git a/yt_dlp/extractor/sharevideos.py b/yt_dlp/extractor/sharevideos.py
new file mode 100644
index 0000000..3132c7a
--- /dev/null
+++ b/yt_dlp/extractor/sharevideos.py
@@ -0,0 +1,6 @@
+from .common import InfoExtractor
+
+
+class ShareVideosEmbedIE(InfoExtractor):
+ _VALID_URL = False
+ _EMBED_REGEX = [r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1']
diff --git a/yt_dlp/extractor/shemaroome.py b/yt_dlp/extractor/shemaroome.py
new file mode 100644
index 0000000..ec9938b
--- /dev/null
+++ b/yt_dlp/extractor/shemaroome.py
@@ -0,0 +1,102 @@
+from .common import InfoExtractor
+from ..aes import aes_cbc_decrypt, unpad_pkcs7
+from ..compat import (
+ compat_b64decode,
+)
+from ..utils import (
+ bytes_to_intlist,
+ ExtractorError,
+ intlist_to_bytes,
+ unified_strdate,
+)
+
+
+class ShemarooMeIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?shemaroome\.com/(?:movies|shows)/(?P<id>[^?#]+)'
+ _TESTS = [{
+ 'url': 'https://www.shemaroome.com/movies/dil-hai-tumhaara',
+ 'info_dict': {
+ 'id': 'dil-hai-tumhaara',
+ 'ext': 'mp4',
+ 'title': 'Dil Hai Tumhaara',
+ 'release_date': '20020906',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'md5:2782c4127807103cf5a6ae2ca33645ce',
+ },
+ 'params': {
+ 'skip_download': True
+ }
+ }, {
+ 'url': 'https://www.shemaroome.com/shows/jurm-aur-jazbaat/laalach',
+ 'info_dict': {
+ 'id': 'jurm-aur-jazbaat_laalach',
+ 'ext': 'mp4',
+ 'title': 'Laalach',
+ 'description': 'md5:92b79c2dcb539b0ab53f9fa5a048f53c',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'release_date': '20210507',
+ },
+ 'params': {
+ 'skip_download': True
+ },
+ 'skip': 'Premium videos cannot be downloaded yet.'
+ }, {
+ 'url': 'https://www.shemaroome.com/shows/jai-jai-jai-bajrang-bali/jai-jai-jai-bajrang-bali-episode-99',
+ 'info_dict': {
+ 'id': 'jai-jai-jai-bajrang-bali_jai-jai-jai-bajrang-bali-episode-99',
+ 'ext': 'mp4',
+ 'title': 'Jai Jai Jai Bajrang Bali Episode 99',
+ 'description': 'md5:850d127a18ee3f9529d7fbde2f49910d',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'release_date': '20110101',
+ },
+ 'params': {
+ 'skip_download': True
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url).replace('/', '_')
+ webpage = self._download_webpage(url, video_id)
+ title = self._search_regex(r'id=\"ma_title\" value=\"([^\"]+)', webpage, 'title')
+ thumbnail = self._og_search_thumbnail(webpage)
+ content_def = self._search_regex(r'id=\"content_definition\" value=\"([^\"]+)', webpage, 'content_def')
+ catalog_id = self._search_regex(r'id=\"catalog_id\" value=\"([^\"]+)', webpage, 'catalog_id')
+ item_category = self._search_regex(r'id=\"item_category\" value=\"([^\"]+)', webpage, 'item_category')
+ content_id = self._search_regex(r'id=\"content_id\" value=\"([^\"]+)', webpage, 'content_id')
+
+ data = f'catalog_id={catalog_id}&content_id={content_id}&category={item_category}&content_def={content_def}'
+ data_json = self._download_json('https://www.shemaroome.com/users/user_all_lists', video_id, data=data.encode())
+ if not data_json.get('status'):
+ raise ExtractorError('Premium videos cannot be downloaded yet.', expected=True)
+ url_data = bytes_to_intlist(compat_b64decode(data_json['new_play_url']))
+ key = bytes_to_intlist(compat_b64decode(data_json['key']))
+ iv = [0] * 16
+ m3u8_url = unpad_pkcs7(intlist_to_bytes(aes_cbc_decrypt(url_data, key, iv))).decode('ascii')
+ headers = {'stream_key': data_json['stream_key']}
+ formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, fatal=False, headers=headers)
+ for fmt in formats:
+ fmt['http_headers'] = headers
+
+ release_date = self._html_search_regex(
+ (r'itemprop="uploadDate">\s*([\d-]+)', r'id="release_date" value="([\d-]+)'),
+ webpage, 'release date', fatal=False)
+
+ subtitles = {}
+ sub_url = data_json.get('subtitle')
+ if sub_url:
+ subtitles.setdefault('EN', []).append({
+ 'url': self._proto_relative_url(sub_url),
+ })
+ subtitles = self._merge_subtitles(subtitles, m3u8_subs)
+ description = self._html_search_regex(r'(?s)>Synopsis(</.+?)</', webpage, 'description', fatal=False)
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'release_date': unified_strdate(release_date),
+ 'description': description,
+ 'subtitles': subtitles,
+ }
diff --git a/yt_dlp/extractor/showroomlive.py b/yt_dlp/extractor/showroomlive.py
new file mode 100644
index 0000000..ab18953
--- /dev/null
+++ b/yt_dlp/extractor/showroomlive.py
@@ -0,0 +1,80 @@
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ urljoin,
+)
+
+
+class ShowRoomLiveIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?showroom-live\.com/(?!onlive|timetable|event|campaign|news|ranking|room)(?P<id>[^/?#&]+)'
+ _TEST = {
+ 'url': 'https://www.showroom-live.com/48_Nana_Okada',
+ 'only_matching': True,
+ }
+
+ def _real_extract(self, url):
+ broadcaster_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, broadcaster_id)
+
+ room_id = self._search_regex(
+ (r'SrGlobal\.roomId\s*=\s*(\d+)',
+ r'(?:profile|room)\?room_id\=(\d+)'), webpage, 'room_id')
+
+ room = self._download_json(
+ urljoin(url, '/api/room/profile?room_id=%s' % room_id),
+ broadcaster_id)
+
+ is_live = room.get('is_onlive')
+ if is_live is not True:
+ raise ExtractorError('%s is offline' % broadcaster_id, expected=True)
+
+ uploader = room.get('performer_name') or broadcaster_id
+ title = room.get('room_name') or room.get('main_name') or uploader
+
+ streaming_url_list = self._download_json(
+ urljoin(url, '/api/live/streaming_url?room_id=%s' % room_id),
+ broadcaster_id)['streaming_url_list']
+
+ formats = []
+ for stream in streaming_url_list:
+ stream_url = stream.get('url')
+ if not stream_url:
+ continue
+ stream_type = stream.get('type')
+ if stream_type == 'hls':
+ m3u8_formats = self._extract_m3u8_formats(
+ stream_url, broadcaster_id, ext='mp4', m3u8_id='hls',
+ live=True)
+ for f in m3u8_formats:
+ f['quality'] = int_or_none(stream.get('quality', 100))
+ formats.extend(m3u8_formats)
+ elif stream_type == 'rtmp':
+ stream_name = stream.get('stream_name')
+ if not stream_name:
+ continue
+ formats.append({
+ 'url': stream_url,
+ 'play_path': stream_name,
+ 'page_url': url,
+ 'player_url': 'https://www.showroom-live.com/assets/swf/v3/ShowRoomLive.swf',
+ 'rtmp_live': True,
+ 'ext': 'flv',
+ 'format_id': 'rtmp',
+ 'format_note': stream.get('label'),
+ 'quality': int_or_none(stream.get('quality', 100)),
+ })
+
+ return {
+ 'id': compat_str(room.get('live_id') or broadcaster_id),
+ 'title': title,
+ 'description': room.get('description'),
+ 'timestamp': int_or_none(room.get('current_live_started_at')),
+ 'uploader': uploader,
+ 'uploader_id': broadcaster_id,
+ 'view_count': int_or_none(room.get('view_num')),
+ 'formats': formats,
+ 'is_live': True,
+ }
diff --git a/yt_dlp/extractor/sibnet.py b/yt_dlp/extractor/sibnet.py
new file mode 100644
index 0000000..73bb75d
--- /dev/null
+++ b/yt_dlp/extractor/sibnet.py
@@ -0,0 +1,17 @@
+from .common import InfoExtractor
+
+
+class SibnetEmbedIE(InfoExtractor):
+ # Ref: https://help.sibnet.ru/?sibnet_video_embed
+ _VALID_URL = False
+ _EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//video\.sibnet\.ru/shell\.php\?.*?\bvideoid=\d+.*?)\1']
+ _WEBPAGE_TESTS = [{
+ 'url': 'https://phpbb3.x-tk.ru/bbcode-video-sibnet-t24.html',
+ 'info_dict': {
+ 'id': 'shell', # FIXME?
+ 'ext': 'mp4',
+ 'age_limit': 0,
+ 'thumbnail': 'https://video.sibnet.ru/upload/cover/video_1887072_0.jpg',
+ 'title': 'КВН Москва не сразу строилась - Девушка впервые играет в Mortal Kombat',
+ }
+ }]
diff --git a/yt_dlp/extractor/simplecast.py b/yt_dlp/extractor/simplecast.py
new file mode 100644
index 0000000..ec349dd
--- /dev/null
+++ b/yt_dlp/extractor/simplecast.py
@@ -0,0 +1,151 @@
+from .common import InfoExtractor
+from ..utils import (
+ clean_podcast_url,
+ int_or_none,
+ parse_iso8601,
+ strip_or_none,
+ try_get,
+ urlencode_postdata,
+)
+
+
+class SimplecastBaseIE(InfoExtractor):
+ _UUID_REGEX = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}'
+ _API_BASE = 'https://api.simplecast.com/'
+
+ def _call_api(self, path_tmpl, video_id):
+ return self._download_json(
+ self._API_BASE + path_tmpl % video_id, video_id)
+
+ def _call_search_api(self, resource, resource_id, resource_url):
+ return self._download_json(
+ 'https://api.simplecast.com/%ss/search' % resource, resource_id,
+ data=urlencode_postdata({'url': resource_url}))
+
+ def _parse_episode(self, episode):
+ episode_id = episode['id']
+ title = episode['title'].strip()
+ audio_file = episode.get('audio_file') or {}
+ audio_file_url = audio_file.get('url') or episode.get('audio_file_url') or episode['enclosure_url']
+
+ season = episode.get('season') or {}
+ season_href = season.get('href')
+ season_id = None
+ if season_href:
+ season_id = self._search_regex(
+ r'https?://api.simplecast.com/seasons/(%s)' % self._UUID_REGEX,
+ season_href, 'season id', default=None)
+
+ webpage_url = episode.get('episode_url')
+ channel_url = None
+ if webpage_url:
+ channel_url = self._search_regex(
+ r'(https?://[^/]+\.simplecast\.com)',
+ webpage_url, 'channel url', default=None)
+
+ return {
+ 'id': episode_id,
+ 'display_id': episode.get('slug'),
+ 'title': title,
+ 'url': clean_podcast_url(audio_file_url),
+ 'webpage_url': webpage_url,
+ 'channel_url': channel_url,
+ 'series': try_get(episode, lambda x: x['podcast']['title']),
+ 'season_number': int_or_none(season.get('number')),
+ 'season_id': season_id,
+ 'thumbnail': episode.get('image_url'),
+ 'episode_id': episode_id,
+ 'episode_number': int_or_none(episode.get('number')),
+ 'description': strip_or_none(episode.get('description')),
+ 'timestamp': parse_iso8601(episode.get('published_at')),
+ 'duration': int_or_none(episode.get('duration')),
+ 'filesize': int_or_none(audio_file.get('size') or episode.get('audio_file_size')),
+ }
+
+
+class SimplecastIE(SimplecastBaseIE):
+ IE_NAME = 'simplecast'
+ _VALID_URL = r'https?://(?:api\.simplecast\.com/episodes|player\.simplecast\.com)/(?P<id>%s)' % SimplecastBaseIE._UUID_REGEX
+ _EMBED_REGEX = [rf'''(?x)<iframe[^>]+src=["\']
+ (?P<url>https?://(?:
+ embed\.simplecast\.com/[0-9a-f]{8}|
+ player\.simplecast\.com/{SimplecastBaseIE._UUID_REGEX}
+ ))''']
+ _COMMON_TEST_INFO = {
+ 'display_id': 'errant-signal-chris-franklin-new-wave-video-essays',
+ 'id': 'b6dc49a2-9404-4853-9aa9-9cfc097be876',
+ 'ext': 'mp3',
+ 'title': 'Errant Signal - Chris Franklin & New Wave Video Essays',
+ 'episode_number': 1,
+ 'episode_id': 'b6dc49a2-9404-4853-9aa9-9cfc097be876',
+ 'description': 'md5:34752789d3d2702e2d2c975fbd14f357',
+ 'season_number': 1,
+ 'season_id': 'e23df0da-bae4-4531-8bbf-71364a88dc13',
+ 'series': 'The RE:BIND.io Podcast',
+ 'duration': 5343,
+ 'timestamp': 1580979475,
+ 'upload_date': '20200206',
+ 'webpage_url': r're:^https?://the-re-bind-io-podcast\.simplecast\.com/episodes/errant-signal-chris-franklin-new-wave-video-essays',
+ 'channel_url': r're:^https?://the-re-bind-io-podcast\.simplecast\.com$',
+ }
+ _TESTS = [{
+ 'url': 'https://api.simplecast.com/episodes/b6dc49a2-9404-4853-9aa9-9cfc097be876',
+ 'md5': '8c93be7be54251bf29ee97464eabe61c',
+ 'info_dict': _COMMON_TEST_INFO,
+ }, {
+ 'url': 'https://player.simplecast.com/b6dc49a2-9404-4853-9aa9-9cfc097be876',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ episode_id = self._match_id(url)
+ episode = self._call_api('episodes/%s', episode_id)
+ return self._parse_episode(episode)
+
+
+class SimplecastEpisodeIE(SimplecastBaseIE):
+ IE_NAME = 'simplecast:episode'
+ _VALID_URL = r'https?://(?!api\.)[^/]+\.simplecast\.com/episodes/(?P<id>[^/?&#]+)'
+ _TEST = {
+ 'url': 'https://the-re-bind-io-podcast.simplecast.com/episodes/errant-signal-chris-franklin-new-wave-video-essays',
+ 'md5': '8c93be7be54251bf29ee97464eabe61c',
+ 'info_dict': SimplecastIE._COMMON_TEST_INFO,
+ }
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ episode = self._call_search_api(
+ 'episode', mobj.group(1), mobj.group(0))
+ return self._parse_episode(episode)
+
+
+class SimplecastPodcastIE(SimplecastBaseIE):
+ IE_NAME = 'simplecast:podcast'
+ _VALID_URL = r'https?://(?!(?:api|cdn|embed|feeds|player)\.)(?P<id>[^/]+)\.simplecast\.com(?!/episodes/[^/?&#]+)'
+ _TESTS = [{
+ 'url': 'https://the-re-bind-io-podcast.simplecast.com',
+ 'playlist_mincount': 33,
+ 'info_dict': {
+ 'id': '07d28d26-7522-42eb-8c53-2bdcfc81c43c',
+ 'title': 'The RE:BIND.io Podcast',
+ },
+ }, {
+ 'url': 'https://the-re-bind-io-podcast.simplecast.com/episodes',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ subdomain = self._match_id(url)
+ site = self._call_search_api('site', subdomain, url)
+ podcast = site['podcast']
+ podcast_id = podcast['id']
+ podcast_title = podcast.get('title')
+
+ def entries():
+ episodes = self._call_api('podcasts/%s/episodes', podcast_id)
+ for episode in (episodes.get('collection') or []):
+ info = self._parse_episode(episode)
+ info['series'] = podcast_title
+ yield info
+
+ return self.playlist_result(entries(), podcast_id, podcast_title)
diff --git a/yt_dlp/extractor/sina.py b/yt_dlp/extractor/sina.py
new file mode 100644
index 0000000..eeb9ebb
--- /dev/null
+++ b/yt_dlp/extractor/sina.py
@@ -0,0 +1,109 @@
+from .common import InfoExtractor
+from ..networking import HEADRequest
+from ..utils import (
+ ExtractorError,
+ clean_html,
+ get_element_by_attribute,
+ int_or_none,
+ qualities,
+ update_url_query,
+)
+
+
+class SinaIE(InfoExtractor):
+ _VALID_URL = r'''(?x)https?://(?:[^/?#]+\.)?video\.sina\.com\.cn/
+ (?:
+ (?:view/|.*\#)(?P<id>\d+)|
+ .+?/(?P<pseudo_id>[^/?#]+)(?:\.s?html)|
+ # This is used by external sites like Weibo
+ api/sinawebApi/outplay.php/(?P<token>.+?)\.swf
+ )
+ '''
+
+ _TESTS = [
+ {
+ 'url': 'http://video.sina.com.cn/news/spj/topvideoes20160504/?opsubject_id=top1#250576622',
+ 'md5': 'd38433e2fc886007729735650ae4b3e9',
+ 'info_dict': {
+ 'id': '250576622',
+ 'ext': 'mp4',
+ 'title': '现场:克鲁兹宣布退选 特朗普将稳获提名',
+ }
+ },
+ {
+ 'url': 'http://video.sina.com.cn/v/b/101314253-1290078633.html',
+ 'info_dict': {
+ 'id': '101314253',
+ 'ext': 'flv',
+ 'title': '军方提高对朝情报监视级别',
+ },
+ 'skip': 'the page does not exist or has been deleted',
+ },
+ {
+ 'url': 'http://video.sina.com.cn/view/250587748.html',
+ 'md5': '3d1807a25c775092aab3bc157fff49b4',
+ 'info_dict': {
+ 'id': '250587748',
+ 'ext': 'mp4',
+ 'title': '瞬间泪目:8年前汶川地震珍贵视频首曝光',
+ },
+ },
+ ]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+
+ video_id = mobj.group('id')
+ if not video_id:
+ if mobj.group('token') is not None:
+ # The video id is in the redirected url
+ self.to_screen('Getting video id')
+ request = HEADRequest(url)
+ _, urlh = self._download_webpage_handle(request, 'NA', False)
+ return self._real_extract(urlh.url)
+ else:
+ pseudo_id = mobj.group('pseudo_id')
+ webpage = self._download_webpage(url, pseudo_id)
+ error = get_element_by_attribute('class', 'errtitle', webpage)
+ if error:
+ raise ExtractorError('%s said: %s' % (
+ self.IE_NAME, clean_html(error)), expected=True)
+ video_id = self._search_regex(
+ r"video_id\s*:\s*'(\d+)'", webpage, 'video id')
+
+ video_data = self._download_json(
+ 'http://s.video.sina.com.cn/video/h5play',
+ video_id, query={'video_id': video_id})
+ if video_data['code'] != 1:
+ raise ExtractorError('%s said: %s' % (
+ self.IE_NAME, video_data['message']), expected=True)
+ else:
+ video_data = video_data['data']
+ title = video_data['title']
+ description = video_data.get('description')
+ if description:
+ description = description.strip()
+
+ preference = qualities(['cif', 'sd', 'hd', 'fhd', 'ffd'])
+ formats = []
+ for quality_id, quality in video_data.get('videos', {}).get('mp4', {}).items():
+ file_api = quality.get('file_api')
+ file_id = quality.get('file_id')
+ if not file_api or not file_id:
+ continue
+ formats.append({
+ 'format_id': quality_id,
+ 'url': update_url_query(file_api, {'vid': file_id}),
+ 'quality': preference(quality_id),
+ 'ext': 'mp4',
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': video_data.get('image'),
+ 'duration': int_or_none(video_data.get('length')),
+ 'timestamp': int_or_none(video_data.get('create_time')),
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/sixplay.py b/yt_dlp/extractor/sixplay.py
new file mode 100644
index 0000000..ef93b92
--- /dev/null
+++ b/yt_dlp/extractor/sixplay.py
@@ -0,0 +1,122 @@
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+)
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ parse_qs,
+ try_get,
+ qualities,
+)
+
+
+class SixPlayIE(InfoExtractor):
+ IE_NAME = '6play'
+ _VALID_URL = r'(?:6play:|https?://(?:www\.)?(?P<domain>6play\.fr|rtlplay\.be|play\.rtl\.hr|rtlmost\.hu)/.+?-c_)(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://www.6play.fr/minute-par-minute-p_9533/le-but-qui-a-marque-lhistoire-du-football-francais-c_12041051',
+ 'md5': '31fcd112637baa0c2ab92c4fcd8baf27',
+ 'info_dict': {
+ 'id': '12041051',
+ 'ext': 'mp4',
+ 'title': 'Le but qui a marqué l\'histoire du football français !',
+ 'description': 'md5:b59e7e841d646ef1eb42a7868eb6a851',
+ },
+ }, {
+ 'url': 'https://www.rtlplay.be/rtl-info-13h-p_8551/les-titres-du-rtlinfo-13h-c_12045869',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://play.rtl.hr/pj-masks-p_9455/epizoda-34-sezona-1-catboyevo-cudo-na-dva-kotaca-c_11984989',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.rtlmost.hu/megtorve-p_14167/megtorve-6-resz-c_12397787',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ domain, video_id = self._match_valid_url(url).groups()
+ service, consumer_name = {
+ '6play.fr': ('6play', 'm6web'),
+ 'rtlplay.be': ('rtlbe_rtl_play', 'rtlbe'),
+ 'play.rtl.hr': ('rtlhr_rtl_play', 'rtlhr'),
+ 'rtlmost.hu': ('rtlhu_rtl_most', 'rtlhu'),
+ }.get(domain, ('6play', 'm6web'))
+
+ data = self._download_json(
+ 'https://pc.middleware.6play.fr/6play/v2/platforms/m6group_web/services/%s/videos/clip_%s' % (service, video_id),
+ video_id, headers={
+ 'x-customer-name': consumer_name
+ }, query={
+ 'csa': 5,
+ 'with': 'clips',
+ })
+
+ clip_data = data['clips'][0]
+ title = clip_data['title']
+
+ urls = []
+ quality_key = qualities(['lq', 'sd', 'hq', 'hd'])
+ formats = []
+ subtitles = {}
+ assets = clip_data.get('assets') or []
+ for asset in assets:
+ asset_url = asset.get('full_physical_path')
+ protocol = asset.get('protocol')
+ if not asset_url or ((protocol == 'primetime' or asset.get('type') == 'usp_hlsfp_h264') and not ('_drmnp.ism/' in asset_url or '_unpnp.ism/' in asset_url)) or asset_url in urls:
+ continue
+ urls.append(asset_url)
+ container = asset.get('video_container')
+ ext = determine_ext(asset_url)
+ if protocol == 'http_subtitle' or ext == 'vtt':
+ subtitles.setdefault('fr', []).append({'url': asset_url})
+ continue
+ if container == 'm3u8' or ext == 'm3u8':
+ if protocol == 'usp':
+ if parse_qs(asset_url).get('token', [None])[0]:
+ urlh = self._request_webpage(
+ asset_url, video_id, fatal=False,
+ headers=self.geo_verification_headers())
+ if not urlh:
+ continue
+ asset_url = urlh.url
+ asset_url = asset_url.replace('_drmnp.ism/', '_unpnp.ism/')
+ for i in range(3, 0, -1):
+ asset_url = asset_url = asset_url.replace('_sd1/', '_sd%d/' % i)
+ m3u8_formats = self._extract_m3u8_formats(
+ asset_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False)
+ formats.extend(m3u8_formats)
+ formats.extend(self._extract_mpd_formats(
+ asset_url.replace('.m3u8', '.mpd'),
+ video_id, mpd_id='dash', fatal=False))
+ if m3u8_formats:
+ break
+ else:
+ formats.extend(self._extract_m3u8_formats(
+ asset_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif container == 'mp4' or ext == 'mp4':
+ quality = asset.get('video_quality')
+ formats.append({
+ 'url': asset_url,
+ 'format_id': quality,
+ 'quality': quality_key(quality),
+ 'ext': ext,
+ })
+
+ def get(getter):
+ for src in (data, clip_data):
+ v = try_get(src, getter, compat_str)
+ if v:
+ return v
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': get(lambda x: x['description']),
+ 'duration': int_or_none(clip_data.get('duration')),
+ 'series': get(lambda x: x['program']['title']),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
diff --git a/yt_dlp/extractor/skeb.py b/yt_dlp/extractor/skeb.py
new file mode 100644
index 0000000..54dfdc4
--- /dev/null
+++ b/yt_dlp/extractor/skeb.py
@@ -0,0 +1,140 @@
+from .common import InfoExtractor
+from ..utils import ExtractorError, determine_ext, parse_qs, traverse_obj
+
+
+class SkebIE(InfoExtractor):
+ _VALID_URL = r'https?://skeb\.jp/@[^/]+/works/(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'https://skeb.jp/@riiru_wm/works/10',
+ 'info_dict': {
+ 'id': '466853',
+ 'title': '内容はおまかせします! by 姫ノ森りぃる@一周年',
+ 'description': 'md5:1ec50901efc3437cfbfe3790468d532d',
+ 'uploader': '姫ノ森りぃる@一周年',
+ 'uploader_id': 'riiru_wm',
+ 'age_limit': 0,
+ 'tags': [],
+ 'url': r're:https://skeb.+',
+ 'thumbnail': r're:https://skeb.+',
+ 'subtitles': {
+ 'jpn': [{
+ 'url': r're:https://skeb.+',
+ 'ext': 'vtt'
+ }]
+ },
+ 'width': 720,
+ 'height': 405,
+ 'duration': 313,
+ 'fps': 30,
+ 'ext': 'mp4',
+ },
+ }, {
+ 'url': 'https://skeb.jp/@furukawa_nob/works/3',
+ 'info_dict': {
+ 'id': '489408',
+ 'title': 'いつもお世話になってお... by 古川ノブ@音楽とVlo...',
+ 'description': 'md5:5adc2e41d06d33b558bf7b1faeb7b9c2',
+ 'uploader': '古川ノブ@音楽とVlogのVtuber',
+ 'uploader_id': 'furukawa_nob',
+ 'age_limit': 0,
+ 'tags': [
+ 'よろしく', '大丈夫', 'お願い', 'でした',
+ '是非', 'O', 'バー', '遊び', 'おはよう',
+ 'オーバ', 'ボイス',
+ ],
+ 'url': r're:https://skeb.+',
+ 'thumbnail': r're:https://skeb.+',
+ 'subtitles': {
+ 'jpn': [{
+ 'url': r're:https://skeb.+',
+ 'ext': 'vtt'
+ }]
+ },
+ 'duration': 98,
+ 'ext': 'mp3',
+ 'vcodec': 'none',
+ 'abr': 128,
+ },
+ }, {
+ 'url': 'https://skeb.jp/@mollowmollow/works/6',
+ 'info_dict': {
+ 'id': '6',
+ 'title': 'ヒロ。\n\n私のキャラク... by 諸々',
+ 'description': 'md5:aa6cbf2ba320b50bce219632de195f07',
+ '_type': 'playlist',
+ 'entries': [{
+ 'id': '486430',
+ 'title': 'ヒロ。\n\n私のキャラク... by 諸々',
+ 'description': 'md5:aa6cbf2ba320b50bce219632de195f07',
+ }, {
+ 'id': '486431',
+ 'title': 'ヒロ。\n\n私のキャラク... by 諸々',
+ }]
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ nuxt_data = self._search_nuxt_data(self._download_webpage(url, video_id), video_id)
+
+ parent = {
+ 'id': video_id,
+ 'title': nuxt_data.get('title'),
+ 'description': nuxt_data.get('description'),
+ 'uploader': traverse_obj(nuxt_data, ('creator', 'name')),
+ 'uploader_id': traverse_obj(nuxt_data, ('creator', 'screen_name')),
+ 'age_limit': 18 if nuxt_data.get('nsfw') else 0,
+ 'tags': nuxt_data.get('tag_list'),
+ }
+
+ entries = []
+ for item in nuxt_data.get('previews') or []:
+ vid_url = item.get('url')
+ given_ext = traverse_obj(item, ('information', 'extension'))
+ preview_ext = determine_ext(vid_url, default_ext=None)
+ if not preview_ext:
+ content_disposition = parse_qs(vid_url)['response-content-disposition'][0]
+ preview_ext = self._search_regex(
+ r'filename="[^"]+\.([^\.]+?)"', content_disposition,
+ 'preview file extension', fatal=False, group=1)
+ if preview_ext not in ('mp4', 'mp3'):
+ continue
+ if not vid_url or not item.get('id'):
+ continue
+ width, height = traverse_obj(item, ('information', 'width')), traverse_obj(item, ('information', 'height'))
+ if width is not None and height is not None:
+ # the longest side is at most 720px for non-client viewers
+ max_size = max(width, height)
+ width, height = list(x * 720 // max_size for x in (width, height))
+ entries.append({
+ **parent,
+ 'id': str(item['id']),
+ 'url': vid_url,
+ 'thumbnail': item.get('poster_url'),
+ 'subtitles': {
+ 'jpn': [{
+ 'url': item.get('vtt_url'),
+ 'ext': 'vtt',
+ }]
+ } if item.get('vtt_url') else None,
+ 'width': width,
+ 'height': height,
+ 'duration': traverse_obj(item, ('information', 'duration')),
+ 'fps': traverse_obj(item, ('information', 'frame_rate')),
+ 'ext': preview_ext or given_ext,
+ 'vcodec': 'none' if preview_ext == 'mp3' else None,
+ # you'll always get 128kbps MP3 for non-client viewers
+ 'abr': 128 if preview_ext == 'mp3' else None,
+ })
+
+ if not entries:
+ raise ExtractorError('No video/audio attachment found in this commission.', expected=True)
+ elif len(entries) == 1:
+ return entries[0]
+ else:
+ parent.update({
+ '_type': 'playlist',
+ 'entries': entries,
+ })
+ return parent
diff --git a/yt_dlp/extractor/sky.py b/yt_dlp/extractor/sky.py
new file mode 100644
index 0000000..574ac21
--- /dev/null
+++ b/yt_dlp/extractor/sky.py
@@ -0,0 +1,135 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ extract_attributes,
+ strip_or_none,
+)
+
+
+class SkyBaseIE(InfoExtractor):
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
+ _SDC_EL_REGEX = r'(?s)(<div[^>]+data-(?:component-name|fn)="sdc-(?:articl|sit)e-video"[^>]*>)'
+
+ def _process_video_element(self, webpage, sdc_el, url):
+ sdc = extract_attributes(sdc_el)
+ provider = sdc.get('data-provider')
+ if provider == 'brightcove':
+ video_id = sdc['data-video-id']
+ account_id = sdc.get('data-account-id') or '6058004172001'
+ player_id = sdc.get('data-player-id') or 'RC9PQUaJ6'
+ video_url = self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id)
+ ie_key = 'BrightcoveNew'
+
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'url': video_url,
+ 'ie_key': ie_key,
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ info = self._process_video_element(webpage, self._search_regex(
+ self._SDC_EL_REGEX, webpage, 'sdc element'), url)
+ info.update({
+ 'title': self._og_search_title(webpage),
+ 'description': strip_or_none(self._og_search_description(webpage)),
+ })
+ return info
+
+
+class SkySportsIE(SkyBaseIE):
+ IE_NAME = 'sky:sports'
+ _VALID_URL = r'https?://(?:www\.)?skysports\.com/watch/video/([^/]+/)*(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'http://www.skysports.com/watch/video/10328419/bale-its-our-time-to-shine',
+ 'md5': '77d59166cddc8d3cb7b13e35eaf0f5ec',
+ 'info_dict': {
+ 'id': 'o3eWJnNDE6l7kfNO8BOoBlRxXRQ4ANNQ',
+ 'ext': 'mp4',
+ 'title': 'Bale: It\'s our time to shine',
+ 'description': 'md5:e88bda94ae15f7720c5cb467e777bb6d',
+ },
+ 'add_ie': ['BrightcoveNew'],
+ }, {
+ 'url': 'https://www.skysports.com/watch/video/sports/f1/12160544/abu-dhabi-gp-the-notebook',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.skysports.com/watch/video/tv-shows/12118508/rainford-brent-how-ace-programme-helps',
+ 'only_matching': True,
+ }]
+
+
+class SkyNewsIE(SkyBaseIE):
+ IE_NAME = 'sky:news'
+ _VALID_URL = r'https?://news\.sky\.com/video/[0-9a-z-]+-(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'https://news.sky.com/video/russian-plane-inspected-after-deadly-fire-11712962',
+ 'md5': '411e8893fd216c75eaf7e4c65d364115',
+ 'info_dict': {
+ 'id': 'ref:1ua21xaDE6lCtZDmbYfl8kwsKLooJbNM',
+ 'ext': 'mp4',
+ 'title': 'Russian plane inspected after deadly fire',
+ 'description': 'The Russian Investigative Committee has released video of the wreckage of a passenger plane which caught fire near Moscow.',
+ 'uploader_id': '6058004172001',
+ 'timestamp': 1567112345,
+ 'upload_date': '20190829',
+ },
+ 'add_ie': ['BrightcoveNew'],
+ }
+
+
+class SkyNewsStoryIE(SkyBaseIE):
+ IE_NAME = 'sky:news:story'
+ _VALID_URL = r'https?://news\.sky\.com/story/[0-9a-z-]+-(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'https://news.sky.com/story/budget-2021-chancellor-rishi-sunak-vows-address-will-deliver-strong-economy-fit-for-a-new-age-of-optimism-12445425',
+ 'info_dict': {
+ 'id': 'ref:0714acb9-123d-42c8-91b8-5c1bc6c73f20',
+ 'title': 'md5:e408dd7aad63f31a1817bbe40c7d276f',
+ 'description': 'md5:a881e12f49212f92be2befe4a09d288a',
+ 'ext': 'mp4',
+ 'upload_date': '20211027',
+ 'timestamp': 1635317494,
+ 'uploader_id': '6058004172001',
+ }
+ }
+
+ def _real_extract(self, url):
+ article_id = self._match_id(url)
+ webpage = self._download_webpage(url, article_id)
+
+ entries = [self._process_video_element(webpage, sdc_el, url)
+ for sdc_el in re.findall(self._SDC_EL_REGEX, webpage)]
+
+ return self.playlist_result(
+ entries, article_id, self._og_search_title(webpage),
+ self._html_search_meta(['og:description', 'description'], webpage))
+
+
+class SkySportsNewsIE(SkyBaseIE):
+ IE_NAME = 'sky:sports:news'
+ _VALID_URL = r'https?://(?:www\.)?skysports\.com/([^/]+/)*news/\d+/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://www.skysports.com/golf/news/12176/10871916/dustin-johnson-ready-to-conquer-players-championship-at-tpc-sawgrass',
+ 'info_dict': {
+ 'id': '10871916',
+ 'title': 'Dustin Johnson ready to conquer Players Championship at TPC Sawgrass',
+ 'description': 'Dustin Johnson is confident he can continue his dominant form in 2017 by adding the Players Championship to his list of victories.',
+ },
+ 'playlist_count': 2,
+ }
+
+ def _real_extract(self, url):
+ article_id = self._match_id(url)
+ webpage = self._download_webpage(url, article_id)
+
+ entries = []
+ for sdc_el in re.findall(self._SDC_EL_REGEX, webpage):
+ entries.append(self._process_video_element(webpage, sdc_el, url))
+
+ return self.playlist_result(
+ entries, article_id, self._og_search_title(webpage),
+ self._html_search_meta(['og:description', 'description'], webpage))
diff --git a/yt_dlp/extractor/skyit.py b/yt_dlp/extractor/skyit.py
new file mode 100644
index 0000000..42d30f7
--- /dev/null
+++ b/yt_dlp/extractor/skyit.py
@@ -0,0 +1,227 @@
+from .common import InfoExtractor
+from ..compat import (
+ compat_parse_qs,
+ compat_urllib_parse_urlparse,
+)
+from ..utils import (
+ dict_get,
+ int_or_none,
+ parse_duration,
+ unified_timestamp,
+)
+
+
+class SkyItPlayerIE(InfoExtractor):
+ IE_NAME = 'player.sky.it'
+ _VALID_URL = r'https?://player\.sky\.it/player/(?:external|social)\.html\?.*?\bid=(?P<id>\d+)'
+ _GEO_BYPASS = False
+ _DOMAIN = 'sky'
+ _PLAYER_TMPL = 'https://player.sky.it/player/external.html?id=%s&domain=%s'
+ # http://static.sky.it/static/skyplayer/conf.json
+ _TOKEN_MAP = {
+ 'cielo': 'Hh9O7M8ks5yi6nSROL7bKYz933rdf3GhwZlTLMgvy4Q',
+ 'hotclub': 'kW020K2jq2lk2eKRJD2vWEg832ncx2EivZlTLQput2C',
+ 'mtv8': 'A5Nn9GGb326CI7vP5e27d7E4PIaQjota',
+ 'salesforce': 'C6D585FD1615272C98DE38235F38BD86',
+ 'sitocommerciale': 'VJwfFuSGnLKnd9Phe9y96WkXgYDCguPMJ2dLhGMb2RE',
+ 'sky': 'F96WlOd8yoFmLQgiqv6fNQRvHZcsWk5jDaYnDvhbiJk',
+ 'skyarte': 'LWk29hfiU39NNdq87ePeRach3nzTSV20o0lTv2001Cd',
+ 'theupfront': 'PRSGmDMsg6QMGc04Obpoy7Vsbn7i2Whp',
+ }
+
+ def _player_url_result(self, video_id):
+ return self.url_result(
+ self._PLAYER_TMPL % (video_id, self._DOMAIN),
+ SkyItPlayerIE.ie_key(), video_id)
+
+ def _parse_video(self, video, video_id):
+ title = video['title']
+ is_live = video.get('type') == 'live'
+ hls_url = video.get(('streaming' if is_live else 'hls') + '_url')
+ if not hls_url and video.get('geoblock' if is_live else 'geob'):
+ self.raise_geo_restricted(countries=['IT'])
+
+ formats = self._extract_m3u8_formats(hls_url, video_id, 'mp4')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': dict_get(video, ('video_still', 'video_still_medium', 'thumb')),
+ 'description': video.get('short_desc') or None,
+ 'timestamp': unified_timestamp(video.get('create_date')),
+ 'duration': int_or_none(video.get('duration_sec')) or parse_duration(video.get('duration')),
+ 'is_live': is_live,
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ domain = compat_parse_qs(compat_urllib_parse_urlparse(
+ url).query).get('domain', [None])[0]
+ token = dict_get(self._TOKEN_MAP, (domain, 'sky'))
+ video = self._download_json(
+ 'https://apid.sky.it/vdp/v1/getVideoData',
+ video_id, query={
+ 'caller': 'sky',
+ 'id': video_id,
+ 'token': token
+ }, headers=self.geo_verification_headers())
+ return self._parse_video(video, video_id)
+
+
+class SkyItVideoIE(SkyItPlayerIE): # XXX: Do not subclass from concrete IE
+ IE_NAME = 'video.sky.it'
+ _VALID_URL = r'https?://(?:masterchef|video|xfactor)\.sky\.it(?:/[^/]+)*/video/[0-9a-z-]+-(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://video.sky.it/news/mondo/video/uomo-ucciso-da-uno-squalo-in-australia-631227',
+ 'md5': '5b858a62d9ffe2ab77b397553024184a',
+ 'info_dict': {
+ 'id': '631227',
+ 'ext': 'mp4',
+ 'title': 'Uomo ucciso da uno squalo in Australia',
+ 'timestamp': 1606036192,
+ 'upload_date': '20201122',
+ 'duration': 26,
+ 'thumbnail': 'https://video.sky.it/captures/thumbs/631227/631227_thumb_880x494.jpg',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://xfactor.sky.it/video/x-factor-2020-replay-audizioni-1-615820',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://masterchef.sky.it/video/masterchef-9-cosa-e-successo-nella-prima-puntata-562831',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self._player_url_result(video_id)
+
+
+class SkyItVideoLiveIE(SkyItPlayerIE): # XXX: Do not subclass from concrete IE
+ IE_NAME = 'video.sky.it:live'
+ _VALID_URL = r'https?://video\.sky\.it/diretta/(?P<id>[^/?&#]+)'
+ _TEST = {
+ 'url': 'https://video.sky.it/diretta/tg24',
+ 'info_dict': {
+ 'id': '1',
+ 'ext': 'mp4',
+ 'title': r're:Diretta TG24 \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
+ 'description': r're:(?:Clicca play e )?[Gg]uarda la diretta streaming di SkyTg24, segui con Sky tutti gli appuntamenti e gli speciali di Tg24\.',
+ 'live_status': 'is_live',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ asset_id = str(self._search_nextjs_data(webpage, display_id)['props']['initialState']['livePage']['content']['asset_id'])
+ livestream = self._download_json(
+ 'https://apid.sky.it/vdp/v1/getLivestream',
+ asset_id, query={'id': asset_id})
+ return self._parse_video(livestream, asset_id)
+
+
+class SkyItIE(SkyItPlayerIE): # XXX: Do not subclass from concrete IE
+ IE_NAME = 'sky.it'
+ _VALID_URL = r'https?://(?:sport|tg24)\.sky\.it(?:/[^/]+)*/\d{4}/\d{2}/\d{2}/(?P<id>[^/?&#]+)'
+ _TESTS = [{
+ 'url': 'https://sport.sky.it/calcio/serie-a/2022/11/03/brozovic-inter-news',
+ 'info_dict': {
+ 'id': '789222',
+ 'ext': 'mp4',
+ 'title': 'Brozovic con il gruppo: verso convocazione per Juve-Inter',
+ 'upload_date': '20221103',
+ 'timestamp': 1667484130,
+ 'duration': 22,
+ 'thumbnail': 'https://videoplatform.sky.it/still/2022/11/03/1667480526353_brozovic_videostill_1.jpg',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://tg24.sky.it/mondo/2020/11/22/australia-squalo-uccide-uomo',
+ 'md5': 'fe5c91e59a84a3437eaa0bca6e134ccd',
+ 'info_dict': {
+ 'id': '631227',
+ 'ext': 'mp4',
+ 'title': 'Uomo ucciso da uno squalo in Australia',
+ 'timestamp': 1606036192,
+ 'upload_date': '20201122',
+ 'duration': 26,
+ 'thumbnail': 'https://video.sky.it/captures/thumbs/631227/631227_thumb_880x494.jpg',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }]
+ _VIDEO_ID_REGEX = r'data-videoid="(\d+)"'
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ video_id = self._search_regex(
+ self._VIDEO_ID_REGEX, webpage, 'video id')
+ return self._player_url_result(video_id)
+
+
+class SkyItArteIE(SkyItIE): # XXX: Do not subclass from concrete IE
+ IE_NAME = 'arte.sky.it'
+ _VALID_URL = r'https?://arte\.sky\.it/video/(?P<id>[^/?&#]+)'
+ _TESTS = [{
+ 'url': 'https://arte.sky.it/video/oliviero-toscani-torino-galleria-mazzoleni-788962',
+ 'md5': '515aee97b87d7a018b6c80727d3e7e17',
+ 'info_dict': {
+ 'id': '788962',
+ 'ext': 'mp4',
+ 'title': 'La fotografia di Oliviero Toscani conquista Torino',
+ 'upload_date': '20221102',
+ 'timestamp': 1667399996,
+ 'duration': 12,
+ 'thumbnail': 'https://videoplatform.sky.it/still/2022/11/02/1667396388552_oliviero-toscani-torino-galleria-mazzoleni_videostill_1.jpg',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }]
+ _DOMAIN = 'skyarte'
+ _VIDEO_ID_REGEX = r'"embedUrl"\s*:\s*"(?:https:)?//player\.sky\.it/player/external\.html\?[^"]*\bid=(\d+)'
+
+
+class CieloTVItIE(SkyItIE): # XXX: Do not subclass from concrete IE
+ IE_NAME = 'cielotv.it'
+ _VALID_URL = r'https?://(?:www\.)?cielotv\.it/video/(?P<id>[^.]+)\.html'
+ _TESTS = [{
+ 'url': 'https://www.cielotv.it/video/Il-lunedi-e-sempre-un-dramma.html',
+ 'md5': 'c4deed77552ba901c2a0d9258320304b',
+ 'info_dict': {
+ 'id': '499240',
+ 'ext': 'mp4',
+ 'title': 'Il lunedì è sempre un dramma',
+ 'upload_date': '20190329',
+ 'timestamp': 1553862178,
+ 'duration': 30,
+ 'thumbnail': 'https://videoplatform.sky.it/still/2019/03/29/1553858575610_lunedi_dramma_mant_videostill_1.jpg',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }]
+ _DOMAIN = 'cielo'
+ _VIDEO_ID_REGEX = r'videoId\s*=\s*"(\d+)"'
+
+
+class TV8ItIE(SkyItVideoIE): # XXX: Do not subclass from concrete IE
+ IE_NAME = 'tv8.it'
+ _VALID_URL = r'https?://(?:www\.)?tv8\.it/(?:show)?video/[0-9a-z-]+-(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.tv8.it/video/ogni-mattina-ucciso-asino-di-andrea-lo-cicero-630529',
+ 'md5': '9ab906a3f75ea342ed928442f9dabd21',
+ 'info_dict': {
+ 'id': '630529',
+ 'ext': 'mp4',
+ 'title': 'Ogni mattina - Ucciso asino di Andrea Lo Cicero',
+ 'timestamp': 1605721374,
+ 'upload_date': '20201118',
+ 'duration': 114,
+ 'thumbnail': 'https://videoplatform.sky.it/still/2020/11/18/1605717753954_ogni-mattina-ucciso-asino-di-andrea-lo-cicero_videostill_1.jpg',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }]
+ _DOMAIN = 'mtv8'
diff --git a/yt_dlp/extractor/skylinewebcams.py b/yt_dlp/extractor/skylinewebcams.py
new file mode 100644
index 0000000..197407c
--- /dev/null
+++ b/yt_dlp/extractor/skylinewebcams.py
@@ -0,0 +1,40 @@
+from .common import InfoExtractor
+
+
+class SkylineWebcamsIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:www\.)?skylinewebcams\.com/[^/]+/webcam/(?:[^/]+/)+(?P<id>[^/]+)\.html'
+ _TEST = {
+ 'url': 'https://www.skylinewebcams.com/it/webcam/italia/lazio/roma/scalinata-piazza-di-spagna-barcaccia.html',
+ 'info_dict': {
+ 'id': 'scalinata-piazza-di-spagna-barcaccia',
+ 'ext': 'mp4',
+ 'title': 're:^Live Webcam Scalinata di Piazza di Spagna - La Barcaccia [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'description': 'Roma, veduta sulla Scalinata di Piazza di Spagna e sulla Barcaccia',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ stream_url = self._search_regex(
+ r'(?:url|source)\s*:\s*(["\'])(?P<url>(?:https?:)?//.+?\.m3u8.*?)\1', webpage,
+ 'stream url', group='url')
+
+ title = self._og_search_title(webpage)
+ description = self._og_search_description(webpage)
+
+ return {
+ 'id': video_id,
+ 'url': stream_url,
+ 'ext': 'mp4',
+ 'title': title,
+ 'description': description,
+ 'is_live': True,
+ }
diff --git a/yt_dlp/extractor/skynewsarabia.py b/yt_dlp/extractor/skynewsarabia.py
new file mode 100644
index 0000000..8677827
--- /dev/null
+++ b/yt_dlp/extractor/skynewsarabia.py
@@ -0,0 +1,116 @@
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ parse_iso8601,
+ parse_duration,
+)
+
+
+class SkyNewsArabiaBaseIE(InfoExtractor):
+ _IMAGE_BASE_URL = 'http://www.skynewsarabia.com/web/images'
+
+ def _call_api(self, path, value):
+ return self._download_json('http://api.skynewsarabia.com/web/rest/v2/%s/%s.json' % (path, value), value)
+
+ def _get_limelight_media_id(self, url):
+ return self._search_regex(r'/media/[^/]+/([a-z0-9]{32})', url, 'limelight media id')
+
+ def _get_image_url(self, image_path_template, width='1600', height='1200'):
+ return self._IMAGE_BASE_URL + image_path_template.format(width=width, height=height)
+
+ def _extract_video_info(self, video_data):
+ video_id = compat_str(video_data['id'])
+ topic = video_data.get('topicTitle')
+ return {
+ '_type': 'url_transparent',
+ 'url': 'limelight:media:%s' % self._get_limelight_media_id(video_data['videoUrl'][0]['url']),
+ 'id': video_id,
+ 'title': video_data['headline'],
+ 'description': video_data.get('summary'),
+ 'thumbnail': self._get_image_url(video_data['mediaAsset']['imageUrl']),
+ 'timestamp': parse_iso8601(video_data.get('date')),
+ 'duration': parse_duration(video_data.get('runTime')),
+ 'tags': video_data.get('tags', []),
+ 'categories': [topic] if topic else [],
+ 'webpage_url': 'http://www.skynewsarabia.com/web/video/%s' % video_id,
+ 'ie_key': 'LimelightMedia',
+ }
+
+
+class SkyNewsArabiaIE(SkyNewsArabiaBaseIE):
+ _WORKING = False
+ IE_NAME = 'skynewsarabia:video'
+ _VALID_URL = r'https?://(?:www\.)?skynewsarabia\.com/web/video/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://www.skynewsarabia.com/web/video/794902/%D9%86%D8%B5%D9%81-%D9%85%D9%84%D9%8A%D9%88%D9%86-%D9%85%D8%B5%D8%A8%D8%A7%D8%AD-%D8%B4%D8%AC%D8%B1%D8%A9-%D9%83%D8%B1%D9%8A%D8%B3%D9%85%D8%A7%D8%B3',
+ 'info_dict': {
+ 'id': '794902',
+ 'ext': 'flv',
+ 'title': 'نصف مليون مصباح على شجرة كريسماس',
+ 'description': 'md5:22f1b27f0850eeb10c7e59b1f16eb7c6',
+ 'upload_date': '20151128',
+ 'timestamp': 1448697198,
+ 'duration': 2119,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video_data = self._call_api('video', video_id)
+ return self._extract_video_info(video_data)
+
+
+class SkyNewsArabiaArticleIE(SkyNewsArabiaBaseIE):
+ _WORKING = False
+ IE_NAME = 'skynewsarabia:article'
+ _VALID_URL = r'https?://(?:www\.)?skynewsarabia\.com/web/article/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'http://www.skynewsarabia.com/web/article/794549/%D8%A7%D9%94%D8%AD%D8%AF%D8%A7%D8%AB-%D8%A7%D9%84%D8%B4%D8%B1%D9%82-%D8%A7%D9%84%D8%A7%D9%94%D9%88%D8%B3%D8%B7-%D8%AE%D8%B1%D9%8A%D8%B7%D8%A9-%D8%A7%D9%84%D8%A7%D9%94%D9%84%D8%B9%D8%A7%D8%A8-%D8%A7%D9%84%D8%B0%D9%83%D9%8A%D8%A9',
+ 'info_dict': {
+ 'id': '794549',
+ 'ext': 'flv',
+ 'title': 'بالفيديو.. ألعاب ذكية تحاكي واقع المنطقة',
+ 'description': 'md5:0c373d29919a851e080ee4edd0c5d97f',
+ 'upload_date': '20151126',
+ 'timestamp': 1448559336,
+ 'duration': 281.6,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.skynewsarabia.com/web/article/794844/%D8%A7%D8%B3%D8%AA%D9%87%D8%AF%D8%A7%D9%81-%D9%82%D9%88%D8%A7%D8%B1%D8%A8-%D8%A7%D9%94%D8%B3%D9%84%D8%AD%D8%A9-%D9%84%D9%85%D9%8A%D9%84%D9%8A%D8%B4%D9%8A%D8%A7%D8%AA-%D8%A7%D9%84%D8%AD%D9%88%D8%AB%D9%8A-%D9%88%D8%B5%D8%A7%D9%84%D8%AD',
+ 'info_dict': {
+ 'id': '794844',
+ 'title': 'إحباط تهريب أسلحة لميليشيات الحوثي وصالح بجنوب اليمن',
+ 'description': 'md5:5c927b8b2e805796e7f693538d96fc7e',
+ },
+ 'playlist_mincount': 2,
+ }]
+
+ def _real_extract(self, url):
+ article_id = self._match_id(url)
+ article_data = self._call_api('article', article_id)
+ media_asset = article_data['mediaAsset']
+ if media_asset['type'] == 'VIDEO':
+ topic = article_data.get('topicTitle')
+ return {
+ '_type': 'url_transparent',
+ 'url': 'limelight:media:%s' % self._get_limelight_media_id(media_asset['videoUrl'][0]['url']),
+ 'id': article_id,
+ 'title': article_data['headline'],
+ 'description': article_data.get('summary'),
+ 'thumbnail': self._get_image_url(media_asset['imageUrl']),
+ 'timestamp': parse_iso8601(article_data.get('date')),
+ 'tags': article_data.get('tags', []),
+ 'categories': [topic] if topic else [],
+ 'webpage_url': url,
+ 'ie_key': 'LimelightMedia',
+ }
+ entries = [self._extract_video_info(item) for item in article_data.get('inlineItems', []) if item['type'] == 'VIDEO']
+ return self.playlist_result(entries, article_id, article_data['headline'], article_data.get('summary'))
diff --git a/yt_dlp/extractor/skynewsau.py b/yt_dlp/extractor/skynewsau.py
new file mode 100644
index 0000000..43a9c82
--- /dev/null
+++ b/yt_dlp/extractor/skynewsau.py
@@ -0,0 +1,43 @@
+from .common import InfoExtractor
+from ..utils import (
+ try_get,
+ unified_strdate,
+)
+
+
+class SkyNewsAUIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?skynews\.com\.au/[^/]+/[^/]+/[^/]+/video/(?P<id>[a-z0-9]+)'
+
+ _TESTS = [{
+ 'url': 'https://www.skynews.com.au/world-news/united-states/incredible-vision-shows-lava-overflowing-from-spains-la-palma-volcano/video/0f4c6243d6903502c01251f228b91a71',
+ 'info_dict': {
+ 'id': '6277184925001',
+ 'ext': 'mp4',
+ 'title': 'md5:60594f1ea6d5ae93e292900f4d34e9ae',
+ 'description': 'md5:60594f1ea6d5ae93e292900f4d34e9ae',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 76.394,
+ 'timestamp': 1634271300,
+ 'uploader_id': '5348771529001',
+ 'tags': ['fblink', 'msn', 'usa', 'world', 'yt'],
+ 'upload_date': '20211015',
+ },
+ 'params': {'skip_download': True, 'format': 'bv'}
+ }]
+
+ _API_KEY = '6krsj3w249nk779d8fukqx9f'
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ webpage = self._download_webpage(url, id)
+ embedcode = self._search_regex(r'embedcode\s?=\s?\"([^\"]+)\"', webpage, 'embedcode')
+ data_json = self._download_json(
+ f'https://content.api.news/v3/videos/brightcove/{embedcode}?api_key={self._API_KEY}', id)['content']
+ return {
+ 'id': id,
+ '_type': 'url_transparent',
+ 'url': 'https://players.brightcove.net/%s/default_default/index.html?videoId=%s' % tuple(embedcode.split('-')),
+ 'ie_key': 'BrightcoveNew',
+ 'title': data_json.get('caption'),
+ 'upload_date': unified_strdate(try_get(data_json, lambda x: x['date']['created'])),
+ }
diff --git a/yt_dlp/extractor/slideshare.py b/yt_dlp/extractor/slideshare.py
new file mode 100644
index 0000000..ab9dad0
--- /dev/null
+++ b/yt_dlp/extractor/slideshare.py
@@ -0,0 +1,53 @@
+import json
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_urlparse,
+)
+from ..utils import (
+ ExtractorError,
+ get_element_by_id,
+)
+
+
+class SlideshareIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?slideshare\.net/[^/]+?/(?P<title>.+?)($|\?)'
+
+ _TEST = {
+ 'url': 'http://www.slideshare.net/Dataversity/keynote-presentation-managing-scale-and-complexity',
+ 'info_dict': {
+ 'id': '25665706',
+ 'ext': 'mp4',
+ 'title': 'Managing Scale and Complexity',
+ 'description': 'This was a keynote presentation at the NoSQL Now! 2013 Conference & Expo (http://www.nosqlnow.com). This presentation was given by Adrian Cockcroft from Netflix.',
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ page_title = mobj.group('title')
+ webpage = self._download_webpage(url, page_title)
+ slideshare_obj = self._search_regex(
+ r'\$\.extend\(.*?slideshare_object,\s*(\{.*?\})\);',
+ webpage, 'slideshare object')
+ info = json.loads(slideshare_obj)
+ if info['slideshow']['type'] != 'video':
+ raise ExtractorError('Webpage type is "%s": only video extraction is supported for Slideshare' % info['slideshow']['type'], expected=True)
+
+ doc = info['doc']
+ bucket = info['jsplayer']['video_bucket']
+ ext = info['jsplayer']['video_extension']
+ video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext)
+ description = get_element_by_id('slideshow-description-paragraph', webpage) or self._html_search_regex(
+ r'(?s)<p[^>]+itemprop="description"[^>]*>(.+?)</p>', webpage,
+ 'description', fatal=False)
+
+ return {
+ '_type': 'video',
+ 'id': info['slideshow']['id'],
+ 'title': info['slideshow']['title'],
+ 'ext': ext,
+ 'url': video_url,
+ 'thumbnail': info['slideshow']['pin_image_url'],
+ 'description': description.strip() if description else None,
+ }
diff --git a/yt_dlp/extractor/slideslive.py b/yt_dlp/extractor/slideslive.py
new file mode 100644
index 0000000..a1328de
--- /dev/null
+++ b/yt_dlp/extractor/slideslive.py
@@ -0,0 +1,554 @@
+import re
+import urllib.parse
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ parse_qs,
+ smuggle_url,
+ traverse_obj,
+ unified_timestamp,
+ update_url_query,
+ url_or_none,
+ xpath_text,
+)
+
+
+class SlidesLiveIE(InfoExtractor):
+ _VALID_URL = r'https?://slideslive\.com/(?:embed/(?:presentation/)?)?(?P<id>[0-9]+)'
+ _TESTS = [{
+ # service_name = yoda, only XML slides info
+ 'url': 'https://slideslive.com/38902413/gcc-ia16-backend',
+ 'info_dict': {
+ 'id': '38902413',
+ 'ext': 'mp4',
+ 'title': 'GCC IA16 backend',
+ 'timestamp': 1697793372,
+ 'upload_date': '20231020',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'thumbnails': 'count:42',
+ 'chapters': 'count:41',
+ 'duration': 1638,
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ }, {
+ # service_name = yoda, /v7/ slides
+ 'url': 'https://slideslive.com/38935785',
+ 'info_dict': {
+ 'id': '38935785',
+ 'ext': 'mp4',
+ 'title': 'Offline Reinforcement Learning: From Algorithms to Practical Challenges',
+ 'upload_date': '20231020',
+ 'timestamp': 1697807002,
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ 'thumbnails': 'count:640',
+ 'chapters': 'count:639',
+ 'duration': 9832,
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ }, {
+ # service_name = yoda, /v1/ slides
+ 'url': 'https://slideslive.com/38973182/how-should-a-machine-learning-researcher-think-about-ai-ethics',
+ 'info_dict': {
+ 'id': '38973182',
+ 'ext': 'mp4',
+ 'title': 'How Should a Machine Learning Researcher Think About AI Ethics?',
+ 'upload_date': '20231020',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'timestamp': 1697822521,
+ 'thumbnails': 'count:3',
+ 'chapters': 'count:2',
+ 'duration': 5889,
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ }, {
+ # formerly youtube, converted to native
+ 'url': 'https://slideslive.com/38897546/special-metaprednaska-petra-ludwiga-hodnoty-pro-lepsi-spolecnost',
+ 'md5': '8a79b5e3d700837f40bd2afca3c8fa01',
+ 'info_dict': {
+ 'id': '38897546',
+ 'ext': 'mp4',
+ 'title': 'SPECIÁL: Meta-přednáška Petra Ludwiga - Hodnoty pro lepší společnost',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'upload_date': '20231029',
+ 'timestamp': 1698588144,
+ 'thumbnails': 'count:169',
+ 'chapters': 'count:168',
+ 'duration': 6827,
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ }, {
+ # embed-only presentation, only XML slides info
+ 'url': 'https://slideslive.com/embed/presentation/38925850',
+ 'info_dict': {
+ 'id': '38925850',
+ 'ext': 'mp4',
+ 'title': 'Towards a Deep Network Architecture for Structured Smoothness',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'thumbnails': 'count:8',
+ 'timestamp': 1697803109,
+ 'upload_date': '20231020',
+ 'chapters': 'count:7',
+ 'duration': 326,
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ }, {
+ # embed-only presentation, only JSON slides info, /v5/ slides (.png)
+ 'url': 'https://slideslive.com/38979920/',
+ 'info_dict': {
+ 'id': '38979920',
+ 'ext': 'mp4',
+ 'title': 'MoReL: Multi-omics Relational Learning',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ 'thumbnails': 'count:7',
+ 'timestamp': 1697824939,
+ 'upload_date': '20231020',
+ 'chapters': 'count:6',
+ 'duration': 171,
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ }, {
+ # /v2/ slides (.jpg)
+ 'url': 'https://slideslive.com/38954074',
+ 'info_dict': {
+ 'id': '38954074',
+ 'ext': 'mp4',
+ 'title': 'Decentralized Attribution of Generative Models',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'thumbnails': 'count:16',
+ 'timestamp': 1697814901,
+ 'upload_date': '20231020',
+ 'chapters': 'count:15',
+ 'duration': 306,
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ }, {
+ # /v4/ slides (.png)
+ 'url': 'https://slideslive.com/38979570/',
+ 'info_dict': {
+ 'id': '38979570',
+ 'ext': 'mp4',
+ 'title': 'Efficient Active Search for Combinatorial Optimization Problems',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ 'thumbnails': 'count:9',
+ 'timestamp': 1697824757,
+ 'upload_date': '20231020',
+ 'chapters': 'count:8',
+ 'duration': 295,
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ }, {
+ # /v10/ slides
+ 'url': 'https://slideslive.com/embed/presentation/38979880?embed_parent_url=https%3A%2F%2Fedit.videoken.com%2F',
+ 'info_dict': {
+ 'id': '38979880',
+ 'ext': 'mp4',
+ 'title': 'The Representation Power of Neural Networks',
+ 'timestamp': 1697824919,
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ 'thumbnails': 'count:22',
+ 'upload_date': '20231020',
+ 'chapters': 'count:21',
+ 'duration': 294,
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ }, {
+ # /v7/ slides, 2 video slides
+ 'url': 'https://slideslive.com/embed/presentation/38979682?embed_container_origin=https%3A%2F%2Fedit.videoken.com',
+ 'playlist_count': 3,
+ 'info_dict': {
+ 'id': '38979682-playlist',
+ 'title': 'LoRA: Low-Rank Adaptation of Large Language Models',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '38979682',
+ 'ext': 'mp4',
+ 'title': 'LoRA: Low-Rank Adaptation of Large Language Models',
+ 'timestamp': 1697824815,
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ 'thumbnails': 'count:30',
+ 'upload_date': '20231020',
+ 'chapters': 'count:31',
+ 'duration': 272,
+ },
+ }, {
+ 'info_dict': {
+ 'id': '38979682-021',
+ 'ext': 'mp4',
+ 'title': 'LoRA: Low-Rank Adaptation of Large Language Models - Slide 021',
+ 'duration': 3,
+ 'timestamp': 1697824815,
+ 'upload_date': '20231020',
+ },
+ }, {
+ 'info_dict': {
+ 'id': '38979682-024',
+ 'ext': 'mp4',
+ 'title': 'LoRA: Low-Rank Adaptation of Large Language Models - Slide 024',
+ 'duration': 4,
+ 'timestamp': 1697824815,
+ 'upload_date': '20231020',
+ },
+ }],
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ }, {
+ # /v6/ slides, 1 video slide, edit.videoken.com embed
+ 'url': 'https://slideslive.com/38979481/',
+ 'playlist_count': 2,
+ 'info_dict': {
+ 'id': '38979481-playlist',
+ 'title': 'How to Train Your MAML to Excel in Few-Shot Classification',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '38979481',
+ 'ext': 'mp4',
+ 'title': 'How to Train Your MAML to Excel in Few-Shot Classification',
+ 'timestamp': 1697824716,
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ 'thumbnails': 'count:43',
+ 'upload_date': '20231020',
+ 'chapters': 'count:43',
+ 'duration': 315,
+ },
+ }, {
+ 'info_dict': {
+ 'id': '38979481-013',
+ 'ext': 'mp4',
+ 'title': 'How to Train Your MAML to Excel in Few-Shot Classification - Slide 013',
+ 'duration': 3,
+ 'timestamp': 1697824716,
+ 'upload_date': '20231020',
+ },
+ }],
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ }, {
+ # /v3/ slides, .jpg and .png, service_name = youtube
+ 'url': 'https://slideslive.com/embed/38932460/',
+ 'info_dict': {
+ 'id': 'RTPdrgkyTiE',
+ 'display_id': '38932460',
+ 'ext': 'mp4',
+ 'title': 'Active Learning for Hierarchical Multi-Label Classification',
+ 'description': 'Watch full version of this video at https://slideslive.com/38932460.',
+ 'channel': 'SlidesLive Videos - A',
+ 'channel_id': 'UC62SdArr41t_-_fX40QCLRw',
+ 'channel_url': 'https://www.youtube.com/channel/UC62SdArr41t_-_fX40QCLRw',
+ 'uploader': 'SlidesLive Videos - A',
+ 'uploader_id': '@slideslivevideos-a6075',
+ 'uploader_url': 'https://www.youtube.com/@slideslivevideos-a6075',
+ 'upload_date': '20200903',
+ 'timestamp': 1697805922,
+ 'duration': 942,
+ 'age_limit': 0,
+ 'live_status': 'not_live',
+ 'playable_in_embed': True,
+ 'availability': 'unlisted',
+ 'categories': ['People & Blogs'],
+ 'tags': [],
+ 'channel_follower_count': int,
+ 'like_count': int,
+ 'view_count': int,
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png|webp)',
+ 'thumbnails': 'count:21',
+ 'chapters': 'count:20',
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ }, {
+ # /v3/ slides, .png only, service_name = yoda
+ 'url': 'https://slideslive.com/38983994',
+ 'info_dict': {
+ 'id': '38983994',
+ 'ext': 'mp4',
+ 'title': 'Zero-Shot AutoML with Pretrained Models',
+ 'timestamp': 1697826708,
+ 'upload_date': '20231020',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ 'thumbnails': 'count:23',
+ 'chapters': 'count:22',
+ 'duration': 295,
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ }, {
+ # service_name = yoda
+ 'url': 'https://slideslive.com/38903721/magic-a-scientific-resurrection-of-an-esoteric-legend',
+ 'only_matching': True,
+ }, {
+ # dead link, service_name = url
+ 'url': 'https://slideslive.com/38922070/learning-transferable-skills-1',
+ 'only_matching': True,
+ }, {
+ # dead link, service_name = vimeo
+ 'url': 'https://slideslive.com/38921896/retrospectives-a-venue-for-selfreflection-in-ml-research-3',
+ 'only_matching': True,
+ }]
+
+ _WEBPAGE_TESTS = [{
+ # only XML slides info
+ 'url': 'https://iclr.cc/virtual_2020/poster_Hklr204Fvr.html',
+ 'info_dict': {
+ 'id': '38925850',
+ 'ext': 'mp4',
+ 'title': 'Towards a Deep Network Architecture for Structured Smoothness',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'thumbnails': 'count:8',
+ 'timestamp': 1697803109,
+ 'upload_date': '20231020',
+ 'chapters': 'count:7',
+ 'duration': 326,
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ }]
+
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ # Reference: https://slideslive.com/embed_presentation.js
+ for embed_id in re.findall(r'(?s)new\s+SlidesLiveEmbed\s*\([^)]+\bpresentationId:\s*["\'](\d+)["\']', webpage):
+ url_parsed = urllib.parse.urlparse(url)
+ origin = f'{url_parsed.scheme}://{url_parsed.netloc}'
+ yield update_url_query(
+ f'https://slideslive.com/embed/presentation/{embed_id}', {
+ 'embed_parent_url': url,
+ 'embed_container_origin': origin,
+ })
+
+ def _download_embed_webpage_handle(self, video_id, headers):
+ return self._download_webpage_handle(
+ f'https://slideslive.com/embed/presentation/{video_id}', video_id,
+ headers=headers, query=traverse_obj(headers, {
+ 'embed_parent_url': 'Referer',
+ 'embed_container_origin': 'Origin',
+ }))
+
+ def _extract_custom_m3u8_info(self, m3u8_data):
+ m3u8_dict = {}
+
+ lookup = {
+ 'PRESENTATION-TITLE': 'title',
+ 'PRESENTATION-UPDATED-AT': 'timestamp',
+ 'PRESENTATION-THUMBNAIL': 'thumbnail',
+ 'PLAYLIST-TYPE': 'playlist_type',
+ 'VOD-VIDEO-SERVICE-NAME': 'service_name',
+ 'VOD-VIDEO-ID': 'service_id',
+ 'VOD-VIDEO-SERVERS': 'video_servers',
+ 'VOD-SUBTITLES': 'subtitles',
+ 'VOD-SLIDES-JSON-URL': 'slides_json_url',
+ 'VOD-SLIDES-XML-URL': 'slides_xml_url',
+ }
+
+ for line in m3u8_data.splitlines():
+ if not line.startswith('#EXT-SL-'):
+ continue
+ tag, _, value = line.partition(':')
+ key = lookup.get(tag[8:])
+ if not key:
+ continue
+ m3u8_dict[key] = value
+
+ # Some values are stringified JSON arrays
+ for key in ('video_servers', 'subtitles'):
+ if key in m3u8_dict:
+ m3u8_dict[key] = self._parse_json(m3u8_dict[key], None, fatal=False) or []
+
+ return m3u8_dict
+
+ def _extract_formats_and_duration(self, cdn_hostname, path, video_id, skip_duration=False):
+ formats, duration = [], None
+
+ hls_formats = self._extract_m3u8_formats(
+ f'https://{cdn_hostname}/{path}/master.m3u8',
+ video_id, 'mp4', m3u8_id='hls', fatal=False, live=True)
+ if hls_formats:
+ if not skip_duration:
+ duration = self._extract_m3u8_vod_duration(
+ hls_formats[0]['url'], video_id, note='Extracting duration from HLS manifest')
+ formats.extend(hls_formats)
+
+ dash_formats = self._extract_mpd_formats(
+ f'https://{cdn_hostname}/{path}/master.mpd', video_id, mpd_id='dash', fatal=False)
+ if dash_formats:
+ if not duration and not skip_duration:
+ duration = self._extract_mpd_vod_duration(
+ f'https://{cdn_hostname}/{path}/master.mpd', video_id,
+ note='Extracting duration from DASH manifest')
+ formats.extend(dash_formats)
+
+ return formats, duration
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage, urlh = self._download_embed_webpage_handle(
+ video_id, headers=traverse_obj(parse_qs(url), {
+ 'Referer': ('embed_parent_url', -1),
+ 'Origin': ('embed_container_origin', -1)}))
+ redirect_url = urlh.url
+ if 'domain_not_allowed' in redirect_url:
+ domain = traverse_obj(parse_qs(redirect_url), ('allowed_domains[]', ...), get_all=False)
+ if not domain:
+ raise ExtractorError(
+ 'This is an embed-only presentation. Try passing --referer', expected=True)
+ webpage, _ = self._download_embed_webpage_handle(video_id, headers={
+ 'Referer': f'https://{domain}/',
+ 'Origin': f'https://{domain}',
+ })
+
+ player_token = self._search_regex(r'data-player-token="([^"]+)"', webpage, 'player token')
+ player_data = self._download_webpage(
+ f'https://ben.slideslive.com/player/{video_id}', video_id,
+ note='Downloading player info', query={'player_token': player_token})
+ player_info = self._extract_custom_m3u8_info(player_data)
+
+ service_name = player_info['service_name'].lower()
+ assert service_name in ('url', 'yoda', 'vimeo', 'youtube')
+ service_id = player_info['service_id']
+
+ slide_url_template = 'https://slides.slideslive.com/%s/slides/original/%s%s'
+ slides, slides_info = {}, []
+
+ if player_info.get('slides_json_url'):
+ slides = self._download_json(
+ player_info['slides_json_url'], video_id, fatal=False,
+ note='Downloading slides JSON', errnote=False) or {}
+ slide_ext_default = '.png'
+ slide_quality = traverse_obj(slides, ('slide_qualities', 0))
+ if slide_quality:
+ slide_ext_default = '.jpg'
+ slide_url_template = f'https://cdn.slideslive.com/data/presentations/%s/slides/{slide_quality}/%s%s'
+ for slide_id, slide in enumerate(traverse_obj(slides, ('slides', ...), expected_type=dict), 1):
+ slides_info.append((
+ slide_id, traverse_obj(slide, ('image', 'name')),
+ traverse_obj(slide, ('image', 'extname'), default=slide_ext_default),
+ int_or_none(slide.get('time'), scale=1000)))
+
+ if not slides and player_info.get('slides_xml_url'):
+ slides = self._download_xml(
+ player_info['slides_xml_url'], video_id, fatal=False,
+ note='Downloading slides XML', errnote='Failed to download slides info')
+ if isinstance(slides, xml.etree.ElementTree.Element):
+ slide_url_template = 'https://cdn.slideslive.com/data/presentations/%s/slides/big/%s%s'
+ for slide_id, slide in enumerate(slides.findall('./slide')):
+ slides_info.append((
+ slide_id, xpath_text(slide, './slideName', 'name'), '.jpg',
+ int_or_none(xpath_text(slide, './timeSec', 'time'))))
+
+ chapters, thumbnails = [], []
+ if url_or_none(player_info.get('thumbnail')):
+ thumbnails.append({'id': 'cover', 'url': player_info['thumbnail']})
+ for slide_id, slide_path, slide_ext, start_time in slides_info:
+ if slide_path:
+ thumbnails.append({
+ 'id': f'{slide_id:03d}',
+ 'url': slide_url_template % (video_id, slide_path, slide_ext),
+ })
+ chapters.append({
+ 'title': f'Slide {slide_id:03d}',
+ 'start_time': start_time,
+ })
+
+ subtitles = {}
+ for sub in traverse_obj(player_info, ('subtitles', ...), expected_type=dict):
+ webvtt_url = url_or_none(sub.get('webvtt_url'))
+ if not webvtt_url:
+ continue
+ subtitles.setdefault(sub.get('language') or 'en', []).append({
+ 'url': webvtt_url,
+ 'ext': 'vtt',
+ })
+
+ info = {
+ 'id': video_id,
+ 'title': player_info.get('title') or self._html_search_meta('title', webpage, default=''),
+ 'timestamp': unified_timestamp(player_info.get('timestamp')),
+ 'is_live': player_info.get('playlist_type') != 'vod',
+ 'thumbnails': thumbnails,
+ 'chapters': chapters,
+ 'subtitles': subtitles,
+ }
+
+ if service_name == 'url':
+ info['url'] = service_id
+ elif service_name == 'yoda':
+ formats, duration = self._extract_formats_and_duration(
+ player_info['video_servers'][0], service_id, video_id)
+ info.update({
+ 'duration': duration,
+ 'formats': formats,
+ })
+ else:
+ info.update({
+ '_type': 'url_transparent',
+ 'url': service_id,
+ 'ie_key': service_name.capitalize(),
+ 'display_id': video_id,
+ })
+ if service_name == 'vimeo':
+ info['url'] = smuggle_url(
+ f'https://player.vimeo.com/video/{service_id}',
+ {'referer': url})
+
+ video_slides = traverse_obj(slides, ('slides', ..., 'video', 'id'))
+ if not video_slides:
+ return info
+
+ def entries():
+ yield info
+
+ service_data = self._download_json(
+ f'https://ben.slideslive.com/player/{video_id}/slides_video_service_data',
+ video_id, fatal=False, query={
+ 'player_token': player_token,
+ 'videos': ','.join(video_slides),
+ }, note='Downloading video slides info', errnote='Failed to download video slides info') or {}
+
+ for slide_id, slide in enumerate(traverse_obj(slides, ('slides', ...)), 1):
+ if not traverse_obj(slide, ('video', 'service')) == 'yoda':
+ continue
+ video_path = traverse_obj(slide, ('video', 'id'))
+ cdn_hostname = traverse_obj(service_data, (
+ video_path, 'video_servers', ...), get_all=False)
+ if not cdn_hostname or not video_path:
+ continue
+ formats, _ = self._extract_formats_and_duration(
+ cdn_hostname, video_path, video_id, skip_duration=True)
+ if not formats:
+ continue
+ yield {
+ 'id': f'{video_id}-{slide_id:03d}',
+ 'title': f'{info["title"]} - Slide {slide_id:03d}',
+ 'timestamp': info['timestamp'],
+ 'duration': int_or_none(traverse_obj(slide, ('video', 'duration_ms')), scale=1000),
+ 'formats': formats,
+ }
+
+ return self.playlist_result(entries(), f'{video_id}-playlist', info['title'])
diff --git a/yt_dlp/extractor/slutload.py b/yt_dlp/extractor/slutload.py
new file mode 100644
index 0000000..8e6e89c
--- /dev/null
+++ b/yt_dlp/extractor/slutload.py
@@ -0,0 +1,63 @@
+from .common import InfoExtractor
+
+
+class SlutloadIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:\w+\.)?slutload\.com/(?:video/[^/]+|embed_player|watch)/(?P<id>[^/]+)'
+ _TESTS = [{
+ 'url': 'http://www.slutload.com/video/virginie-baisee-en-cam/TD73btpBqSxc/',
+ 'md5': '868309628ba00fd488cf516a113fd717',
+ 'info_dict': {
+ 'id': 'TD73btpBqSxc',
+ 'ext': 'mp4',
+ 'title': 'virginie baisee en cam',
+ 'age_limit': 18,
+ 'thumbnail': r're:https?://.*?\.jpg'
+ },
+ }, {
+ # mobile site
+ 'url': 'http://mobile.slutload.com/video/masturbation-solo/fviFLmc6kzJ/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.slutload.com/embed_player/TD73btpBqSxc/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.slutload.com/watch/TD73btpBqSxc/Virginie-Baisee-En-Cam.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ embed_page = self._download_webpage(
+ 'http://www.slutload.com/embed_player/%s' % video_id, video_id,
+ 'Downloading embed page', fatal=False)
+
+ if embed_page:
+ def extract(what):
+ return self._html_search_regex(
+ r'data-video-%s=(["\'])(?P<url>(?:(?!\1).)+)\1' % what,
+ embed_page, 'video %s' % what, default=None, group='url')
+
+ video_url = extract('url')
+ if video_url:
+ title = self._html_search_regex(
+ r'<title>([^<]+)', embed_page, 'title', default=video_id)
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'thumbnail': extract('preview'),
+ 'age_limit': 18
+ }
+
+ webpage = self._download_webpage(
+ 'http://www.slutload.com/video/_/%s/' % video_id, video_id)
+ title = self._html_search_regex(
+ r'<h1><strong>([^<]+)</strong>', webpage, 'title').strip()
+ info = self._parse_html5_media_entries(url, webpage, video_id)[0]
+ info.update({
+ 'id': video_id,
+ 'title': title,
+ 'age_limit': 18,
+ })
+ return info
diff --git a/yt_dlp/extractor/smotrim.py b/yt_dlp/extractor/smotrim.py
new file mode 100644
index 0000000..d3f1b69
--- /dev/null
+++ b/yt_dlp/extractor/smotrim.py
@@ -0,0 +1,65 @@
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+
+class SmotrimIE(InfoExtractor):
+ _VALID_URL = r'https?://smotrim\.ru/(?P<type>brand|video|article|live)/(?P<id>[0-9]+)'
+ _TESTS = [{ # video
+ 'url': 'https://smotrim.ru/video/1539617',
+ 'md5': 'b1923a533c8cab09679789d720d0b1c5',
+ 'info_dict': {
+ 'id': '1539617',
+ 'ext': 'mp4',
+ 'title': 'Полиглот. Китайский с нуля за 16 часов! Урок №16',
+ 'description': '',
+ },
+ 'add_ie': ['RUTV'],
+ }, { # article (geo-restricted? plays fine from the US and JP)
+ 'url': 'https://smotrim.ru/article/2813445',
+ 'md5': 'e0ac453952afbc6a2742e850b4dc8e77',
+ 'info_dict': {
+ 'id': '2431846',
+ 'ext': 'mp4',
+ 'title': 'Новости культуры. Съёмки первой программы "Большие и маленькие"',
+ 'description': 'md5:94a4a22472da4252bf5587a4ee441b99',
+ },
+ 'add_ie': ['RUTV'],
+ }, { # brand, redirect
+ 'url': 'https://smotrim.ru/brand/64356',
+ 'md5': '740472999ccff81d7f6df79cecd91c18',
+ 'info_dict': {
+ 'id': '2354523',
+ 'ext': 'mp4',
+ 'title': 'Большие и маленькие. Лучшее. 4-й выпуск',
+ 'description': 'md5:84089e834429008371ea41ea3507b989',
+ },
+ 'add_ie': ['RUTV'],
+ }, { # live
+ 'url': 'https://smotrim.ru/live/19201',
+ 'info_dict': {
+ 'id': '19201',
+ 'ext': 'mp4',
+ # this looks like a TV channel name
+ 'title': 'Россия Культура. Прямой эфир',
+ 'description': '',
+ },
+ 'add_ie': ['RUTV'],
+ }]
+
+ def _real_extract(self, url):
+ video_id, typ = self._match_valid_url(url).group('id', 'type')
+ rutv_type = 'video'
+ if typ not in ('video', 'live'):
+ webpage = self._download_webpage(url, video_id, f'Resolving {typ} link')
+ # there are two cases matching regex:
+ # 1. "embedUrl" in JSON LD (/brand/)
+ # 2. "src" attribute from iframe (/article/)
+ video_id = self._search_regex(
+ r'"https://player.smotrim.ru/iframe/video/id/(?P<video_id>\d+)/',
+ webpage, 'video_id', default=None)
+ if not video_id:
+ raise ExtractorError('There are no video in this page.', expected=True)
+ elif typ == 'live':
+ rutv_type = 'live'
+
+ return self.url_result(f'https://player.vgtrk.com/iframe/{rutv_type}/id/{video_id}')
diff --git a/yt_dlp/extractor/snotr.py b/yt_dlp/extractor/snotr.py
new file mode 100644
index 0000000..6889f19
--- /dev/null
+++ b/yt_dlp/extractor/snotr.py
@@ -0,0 +1,68 @@
+from .common import InfoExtractor
+from ..utils import (
+ parse_duration,
+ parse_filesize,
+ str_to_int,
+)
+
+
+class SnotrIE(InfoExtractor):
+ _VALID_URL = r'http?://(?:www\.)?snotr\.com/video/(?P<id>\d+)/([\w]+)'
+ _TESTS = [{
+ 'url': 'http://www.snotr.com/video/13708/Drone_flying_through_fireworks',
+ 'info_dict': {
+ 'id': '13708',
+ 'ext': 'mp4',
+ 'title': 'Drone flying through fireworks!',
+ 'duration': 248,
+ 'filesize_approx': 40700000,
+ 'description': 'A drone flying through Fourth of July Fireworks',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'expected_warnings': ['description'],
+ }, {
+ 'url': 'http://www.snotr.com/video/530/David_Letteman_-_George_W_Bush_Top_10',
+ 'info_dict': {
+ 'id': '530',
+ 'ext': 'mp4',
+ 'title': 'David Letteman - George W. Bush Top 10',
+ 'duration': 126,
+ 'filesize_approx': 8500000,
+ 'description': 'The top 10 George W. Bush moments, brought to you by David Letterman!',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ }
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, video_id)
+ title = self._og_search_title(webpage)
+
+ description = self._og_search_description(webpage)
+ info_dict = self._parse_html5_media_entries(
+ url, webpage, video_id, m3u8_entry_protocol='m3u8_native')[0]
+
+ view_count = str_to_int(self._html_search_regex(
+ r'<p[^>]*>\s*<strong[^>]*>Views:</strong>\s*<span[^>]*>([\d,\.]+)',
+ webpage, 'view count', fatal=False))
+
+ duration = parse_duration(self._html_search_regex(
+ r'<p[^>]*>\s*<strong[^>]*>Length:</strong>\s*<span[^>]*>([\d:]+)',
+ webpage, 'duration', fatal=False))
+
+ filesize_approx = parse_filesize(self._html_search_regex(
+ r'<p[^>]*>\s*<strong[^>]*>Filesize:</strong>\s*<span[^>]*>([^<]+)',
+ webpage, 'filesize', fatal=False))
+
+ info_dict.update({
+ 'id': video_id,
+ 'description': description,
+ 'title': title,
+ 'view_count': view_count,
+ 'duration': duration,
+ 'filesize_approx': filesize_approx,
+ })
+
+ return info_dict
diff --git a/yt_dlp/extractor/sohu.py b/yt_dlp/extractor/sohu.py
new file mode 100644
index 0000000..c0ff4f9
--- /dev/null
+++ b/yt_dlp/extractor/sohu.py
@@ -0,0 +1,293 @@
+import base64
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urllib_parse_urlencode,
+)
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ float_or_none,
+ url_or_none,
+ unified_timestamp,
+ try_get,
+ urljoin,
+ traverse_obj,
+)
+
+
+class SohuIE(InfoExtractor):
+ _VALID_URL = r'https?://(?P<mytv>my\.)?tv\.sohu\.com/.+?/(?(mytv)|n)(?P<id>\d+)\.shtml.*?'
+
+ # Sohu videos give different MD5 sums on Travis CI and my machine
+ _TESTS = [{
+ 'note': 'This video is available only in Mainland China',
+ 'url': 'http://tv.sohu.com/20130724/n382479172.shtml#super',
+ 'info_dict': {
+ 'id': '382479172',
+ 'ext': 'mp4',
+ 'title': 'MV:Far East Movement《The Illest》',
+ },
+ 'skip': 'On available in China',
+ }, {
+ 'url': 'http://tv.sohu.com/20150305/n409385080.shtml',
+ 'info_dict': {
+ 'id': '409385080',
+ 'ext': 'mp4',
+ 'title': '《2015湖南卫视羊年元宵晚会》唐嫣《花好月圆》',
+ },
+ 'skip': 'no longer available',
+ }, {
+ 'url': 'http://my.tv.sohu.com/us/232799889/78693464.shtml',
+ 'info_dict': {
+ 'id': '78693464',
+ 'ext': 'mp4',
+ 'title': '【爱范品】第31期:MWC见不到的奇葩手机',
+ 'uploader': '爱范儿视频',
+ 'duration': 213,
+ 'timestamp': 1425519600,
+ 'upload_date': '20150305',
+ 'thumbnail': 'http://e3f49eaa46b57.cdn.sohucs.com//group1/M10/83/FA/MTAuMTAuODguODA=/6_14cbccdde5eg104SysCutcloud_78693464_7_0b.jpg',
+ 'tags': ['爱范儿', '爱范品', 'MWC', '手机'],
+ }
+ }, {
+ 'note': 'Multipart video',
+ 'url': 'http://my.tv.sohu.com/pl/8384802/78910339.shtml',
+ 'info_dict': {
+ 'id': '78910339',
+ 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
+ 'uploader': '小苍cany',
+ 'duration': 744.0,
+ 'timestamp': 1426269360,
+ 'upload_date': '20150313',
+ 'thumbnail': 'http://e3f49eaa46b57.cdn.sohucs.com//group1/M11/89/57/MTAuMTAuODguODA=/6_14cea022a1dg102SysCutcloud_78910339_8_0b.jpg',
+ 'tags': ['小苍MM', '英雄联盟', '实战秘籍'],
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '78910339_part1',
+ 'ext': 'mp4',
+ 'duration': 294,
+ 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
+ }
+ }, {
+ 'info_dict': {
+ 'id': '78910339_part2',
+ 'ext': 'mp4',
+ 'duration': 300,
+ 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
+ }
+ }, {
+ 'info_dict': {
+ 'id': '78910339_part3',
+ 'ext': 'mp4',
+ 'duration': 150,
+ 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
+ }
+ }]
+ }, {
+ 'note': 'Video with title containing dash',
+ 'url': 'http://my.tv.sohu.com/us/249884221/78932792.shtml',
+ 'info_dict': {
+ 'id': '78932792',
+ 'ext': 'mp4',
+ 'title': 'youtube-dl testing video',
+ 'duration': 360,
+ 'timestamp': 1426348620,
+ 'upload_date': '20150314',
+ 'thumbnail': 'http://e3f49eaa46b57.cdn.sohucs.com//group1/M02/8A/00/MTAuMTAuODguNzk=/6_14cee1be192g102SysCutcloud_78932792_7_7b.jpg',
+ 'tags': [],
+ },
+ 'params': {
+ 'skip_download': True
+ }
+ }]
+
+ def _real_extract(self, url):
+
+ def _fetch_data(vid_id, mytv=False):
+ if mytv:
+ base_data_url = 'http://my.tv.sohu.com/play/videonew.do?vid='
+ else:
+ base_data_url = 'http://hot.vrs.sohu.com/vrs_flash.action?vid='
+
+ return self._download_json(
+ base_data_url + vid_id, video_id,
+ 'Downloading JSON data for %s' % vid_id,
+ headers=self.geo_verification_headers())
+
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id')
+ mytv = mobj.group('mytv') is not None
+
+ webpage = self._download_webpage(url, video_id)
+
+ title = re.sub(r'( - 高清正版在线观看)? - 搜狐视频$', '', self._og_search_title(webpage))
+
+ vid = self._html_search_regex(
+ r'var vid ?= ?["\'](\d+)["\']',
+ webpage, 'video path')
+ vid_data = _fetch_data(vid, mytv)
+ if vid_data['play'] != 1:
+ if vid_data.get('status') == 12:
+ raise ExtractorError(
+ '%s said: There\'s something wrong in the video.' % self.IE_NAME,
+ expected=True)
+ else:
+ self.raise_geo_restricted(
+ '%s said: The video is only licensed to users in Mainland China.' % self.IE_NAME)
+
+ formats_json = {}
+ for format_id in ('nor', 'high', 'super', 'ori', 'h2644k', 'h2654k'):
+ vid_id = vid_data['data'].get('%sVid' % format_id)
+ if not vid_id:
+ continue
+ vid_id = compat_str(vid_id)
+ formats_json[format_id] = vid_data if vid == vid_id else _fetch_data(vid_id, mytv)
+
+ part_count = vid_data['data']['totalBlocks']
+
+ playlist = []
+ for i in range(part_count):
+ formats = []
+ for format_id, format_data in formats_json.items():
+ allot = format_data['allot']
+
+ data = format_data['data']
+ clip_url = traverse_obj(data, (('clipsURL', 'mp4PlayUrl'), i, {url_or_none}), get_all=False)
+ if not clip_url:
+ raise ExtractorError(f'Unable to extract url for clip {i}')
+ su = data['su']
+
+ video_url = 'newflv.sohu.ccgslb.net'
+ cdnId = None
+ retries = 0
+
+ while 'newflv.sohu.ccgslb.net' in video_url:
+ params = {
+ 'prot': 9,
+ 'file': clip_url,
+ 'new': su[i],
+ 'prod': 'h5n',
+ 'rb': 1,
+ }
+
+ if cdnId is not None:
+ params['idc'] = cdnId
+
+ download_note = 'Downloading %s video URL part %d of %d' % (
+ format_id, i + 1, part_count)
+
+ if retries > 0:
+ download_note += ' (retry #%d)' % retries
+ part_info = self._parse_json(self._download_webpage(
+ 'http://%s/?%s' % (allot, compat_urllib_parse_urlencode(params)),
+ video_id, download_note), video_id)
+
+ video_url = part_info['url']
+ cdnId = part_info.get('nid')
+
+ retries += 1
+ if retries > 5:
+ raise ExtractorError('Failed to get video URL')
+
+ formats.append({
+ 'url': video_url,
+ 'format_id': format_id,
+ 'filesize': int_or_none(
+ try_get(data, lambda x: x['clipsBytes'][i])),
+ 'width': int_or_none(data.get('width')),
+ 'height': int_or_none(data.get('height')),
+ 'fps': int_or_none(data.get('fps')),
+ })
+
+ playlist.append({
+ 'id': '%s_part%d' % (video_id, i + 1),
+ 'title': title,
+ 'duration': vid_data['data']['clipsDuration'][i],
+ 'formats': formats,
+ })
+
+ if len(playlist) == 1:
+ info = playlist[0]
+ info['id'] = video_id
+ else:
+ info = {
+ '_type': 'multi_video',
+ 'entries': playlist,
+ 'id': video_id,
+ 'title': title,
+ 'duration': traverse_obj(vid_data, ('data', 'totalDuration', {float_or_none})),
+ }
+
+ if mytv:
+ publish_time = unified_timestamp(self._search_regex(
+ r'publishTime:\s*["\'](\d+-\d+-\d+ \d+:\d+)["\']', webpage, 'publish time', fatal=False))
+ else:
+ publish_time = traverse_obj(vid_data, ('tv_application_time', {unified_timestamp}))
+
+ return {
+ 'timestamp': publish_time - 8 * 3600 if publish_time else None,
+ **traverse_obj(vid_data, {
+ 'alt_title': ('data', 'subName', {str}),
+ 'uploader': ('wm_data', 'wm_username', {str}),
+ 'thumbnail': ('data', 'coverImg', {url_or_none}),
+ 'tags': ('data', 'tag', {str.split}),
+ }),
+ **info,
+ }
+
+
+class SohuVIE(InfoExtractor):
+ _VALID_URL = r'https?://tv\.sohu\.com/v/(?P<id>[\w=-]+)\.html(?:$|[#?])'
+
+ _TESTS = [{
+ 'note': 'Multipart video',
+ 'url': 'https://tv.sohu.com/v/MjAyMzA2MTQvbjYwMTMxNTE5Mi5zaHRtbA==.html',
+ 'info_dict': {
+ 'id': '601315192',
+ 'title': '《淬火丹心》第1集',
+ 'alt_title': '“点天灯”发生事故',
+ 'duration': 2701.692,
+ 'timestamp': 1686758040,
+ 'upload_date': '20230614',
+ 'thumbnail': 'http://photocdn.tv.sohu.com/img/20230614/vrsa_hor_1686738763256_454010551.jpg',
+ },
+ 'playlist_mincount': 9,
+ 'skip': 'Only available in China',
+ }, {
+ 'url': 'https://tv.sohu.com/v/dXMvMjMyNzk5ODg5Lzc4NjkzNDY0LnNodG1s.html',
+ 'info_dict': {
+ 'id': '78693464',
+ 'ext': 'mp4',
+ 'title': '【爱范品】第31期:MWC见不到的奇葩手机',
+ 'uploader': '爱范儿视频',
+ 'duration': 213,
+ 'timestamp': 1425519600,
+ 'upload_date': '20150305',
+ 'thumbnail': 'http://e3f49eaa46b57.cdn.sohucs.com//group1/M10/83/FA/MTAuMTAuODguODA=/6_14cbccdde5eg104SysCutcloud_78693464_7_0b.jpg',
+ 'tags': ['爱范儿', '爱范品', 'MWC', '手机'],
+ }
+ }, {
+ 'note': 'Multipart video',
+ 'url': 'https://tv.sohu.com/v/dXMvMjQyNTYyMTYzLzc4OTEwMzM5LnNodG1s.html?src=pl',
+ 'info_dict': {
+ 'id': '78910339',
+ 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
+ 'uploader': '小苍cany',
+ 'duration': 744.0,
+ 'timestamp': 1426269360,
+ 'upload_date': '20150313',
+ 'thumbnail': 'http://e3f49eaa46b57.cdn.sohucs.com//group1/M11/89/57/MTAuMTAuODguODA=/6_14cea022a1dg102SysCutcloud_78910339_8_0b.jpg',
+ 'tags': ['小苍MM', '英雄联盟', '实战秘籍'],
+ },
+ 'playlist_mincount': 3,
+ }]
+
+ def _real_extract(self, url):
+ encoded_id = self._match_id(url)
+ path = base64.urlsafe_b64decode(encoded_id).decode()
+ subdomain = 'tv' if re.match(r'\d+/n\d+\.shtml', path) else 'my.tv'
+ return self.url_result(urljoin(f'http://{subdomain}.sohu.com/', path), SohuIE)
diff --git a/yt_dlp/extractor/sonyliv.py b/yt_dlp/extractor/sonyliv.py
new file mode 100644
index 0000000..4379572
--- /dev/null
+++ b/yt_dlp/extractor/sonyliv.py
@@ -0,0 +1,220 @@
+import datetime
+import json
+import math
+import random
+import time
+import uuid
+
+from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ jwt_decode_hs256,
+ try_call,
+ try_get,
+)
+
+
+class SonyLIVIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ (?:
+ sonyliv:|
+ https?://(?:www\.)?sonyliv\.com/(?:s(?:how|port)s/[^/]+|movies|clip|trailer|music-videos)/[^/?#&]+-
+ )
+ (?P<id>\d+)
+ '''
+ _TESTS = [{
+ 'url': 'https://www.sonyliv.com/shows/bachelors-delight-1700000113/achaari-cheese-toast-1000022678?watch=true',
+ 'info_dict': {
+ 'title': 'Achaari Cheese Toast',
+ 'id': '1000022678',
+ 'ext': 'mp4',
+ 'upload_date': '20200411',
+ 'description': 'md5:3957fa31d9309bf336ceb3f37ad5b7cb',
+ 'timestamp': 1586632091,
+ 'duration': 185,
+ 'season_number': 1,
+ 'series': 'Bachelors Delight',
+ 'episode_number': 1,
+ 'release_year': 2016,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.sonyliv.com/movies/tahalka-1000050121?watch=true',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.sonyliv.com/clip/jigarbaaz-1000098925',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.sonyliv.com/trailer/sandwiched-forever-1000100286?watch=true',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.sonyliv.com/sports/india-tour-of-australia-2020-21-1700000286/cricket-hls-day-3-1st-test-aus-vs-ind-19-dec-2020-1000100959?watch=true',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.sonyliv.com/music-videos/yeh-un-dinon-ki-baat-hai-1000018779',
+ 'only_matching': True,
+ }]
+ _GEO_COUNTRIES = ['IN']
+ _HEADERS = {}
+ _LOGIN_HINT = 'Use "--username <mobile_number>" to login using OTP or "--username token --password <auth_token>" to login using auth token.'
+ _NETRC_MACHINE = 'sonyliv'
+
+ def _get_device_id(self):
+ e = int(time.time() * 1000)
+ t = list('xxxxxxxxxxxx4xxxyxxxxxxxxxxxxxxx')
+ for i, c in enumerate(t):
+ n = int((e + 16 * random.random()) % 16) | 0
+ e = math.floor(e / 16)
+ if c == 'x':
+ t[i] = str(n)
+ elif c == 'y':
+ t[i] = '{:x}'.format(3 & n | 8)
+ return ''.join(t) + '-' + str(int(time.time() * 1000))
+
+ def _perform_login(self, username, password):
+ self._HEADERS['device_id'] = self._get_device_id()
+ self._HEADERS['content-type'] = 'application/json'
+
+ if username.lower() == 'token' and try_call(lambda: jwt_decode_hs256(password)):
+ self._HEADERS['authorization'] = password
+ self.report_login()
+ return
+ elif len(username) != 10 or not username.isdigit():
+ raise ExtractorError(f'Invalid username/password; {self._LOGIN_HINT}')
+
+ self.report_login()
+ otp_request_json = self._download_json(
+ 'https://apiv2.sonyliv.com/AGL/1.6/A/ENG/WEB/IN/HR/CREATEOTP-V2',
+ None, note='Sending OTP', headers=self._HEADERS, data=json.dumps({
+ 'mobileNumber': username,
+ 'channelPartnerID': 'MSMIND',
+ 'country': 'IN',
+ 'timestamp': datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%MZ'),
+ 'otpSize': 6,
+ 'loginType': 'REGISTERORSIGNIN',
+ 'isMobileMandatory': True,
+ }).encode())
+ if otp_request_json['resultCode'] == 'KO':
+ raise ExtractorError(otp_request_json['message'], expected=True)
+
+ otp_verify_json = self._download_json(
+ 'https://apiv2.sonyliv.com/AGL/2.0/A/ENG/WEB/IN/HR/CONFIRMOTP-V2',
+ None, note='Verifying OTP', headers=self._HEADERS, data=json.dumps({
+ 'channelPartnerID': 'MSMIND',
+ 'mobileNumber': username,
+ 'country': 'IN',
+ 'otp': self._get_tfa_info('OTP'),
+ 'dmaId': 'IN',
+ 'ageConfirmation': True,
+ 'timestamp': datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%MZ'),
+ 'isMobileMandatory': True,
+ }).encode())
+ if otp_verify_json['resultCode'] == 'KO':
+ raise ExtractorError(otp_request_json['message'], expected=True)
+ self._HEADERS['authorization'] = otp_verify_json['resultObj']['accessToken']
+
+ def _call_api(self, version, path, video_id):
+ try:
+ return self._download_json(
+ 'https://apiv2.sonyliv.com/AGL/%s/A/ENG/WEB/%s' % (version, path),
+ video_id, headers=self._HEADERS)['resultObj']
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 406 and self._parse_json(
+ e.cause.response.read().decode(), video_id)['message'] == 'Please subscribe to watch this content':
+ self.raise_login_required(self._LOGIN_HINT, method=None)
+ if isinstance(e.cause, HTTPError) and e.cause.status == 403:
+ message = self._parse_json(
+ e.cause.response.read().decode(), video_id)['message']
+ if message == 'Geoblocked Country':
+ self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
+ raise ExtractorError(message)
+ raise
+
+ def _initialize_pre_login(self):
+ self._HEADERS['security_token'] = self._call_api('1.4', 'ALL/GETTOKEN', None)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ content = self._call_api(
+ '1.5', 'IN/CONTENT/VIDEOURL/VOD/' + video_id, video_id)
+ if not self.get_param('allow_unplayable_formats') and content.get('isEncrypted'):
+ self.report_drm(video_id)
+ dash_url = content['videoURL']
+ headers = {
+ 'x-playback-session-id': '%s-%d' % (uuid.uuid4().hex, time.time() * 1000)
+ }
+ formats = self._extract_mpd_formats(
+ dash_url, video_id, mpd_id='dash', headers=headers, fatal=False)
+ formats.extend(self._extract_m3u8_formats(
+ dash_url.replace('.mpd', '.m3u8').replace('/DASH/', '/HLS/'),
+ video_id, 'mp4', m3u8_id='hls', headers=headers, fatal=False))
+ for f in formats:
+ f.setdefault('http_headers', {}).update(headers)
+
+ metadata = self._call_api(
+ '1.6', 'IN/DETAIL/' + video_id, video_id)['containers'][0]['metadata']
+ title = metadata['episodeTitle']
+ subtitles = {}
+ for sub in content.get('subtitle', []):
+ sub_url = sub.get('subtitleUrl')
+ if not sub_url:
+ continue
+ subtitles.setdefault(sub.get('subtitleLanguageName', 'ENG'), []).append({
+ 'url': sub_url,
+ })
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': content.get('posterURL'),
+ 'description': metadata.get('longDescription') or metadata.get('shortDescription'),
+ 'timestamp': int_or_none(metadata.get('creationDate'), 1000),
+ 'duration': int_or_none(metadata.get('duration')),
+ 'season_number': int_or_none(metadata.get('season')),
+ 'series': metadata.get('title'),
+ 'episode_number': int_or_none(metadata.get('episodeNumber')),
+ 'release_year': int_or_none(metadata.get('year')),
+ 'subtitles': subtitles,
+ }
+
+
+class SonyLIVSeriesIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?sonyliv\.com/shows/[^/?#&]+-(?P<id>\d{10})$'
+ _TESTS = [{
+ 'url': 'https://www.sonyliv.com/shows/adaalat-1700000091',
+ 'playlist_mincount': 456,
+ 'info_dict': {
+ 'id': '1700000091',
+ },
+ }]
+ _API_SHOW_URL = "https://apiv2.sonyliv.com/AGL/1.9/R/ENG/WEB/IN/DL/DETAIL/{}?kids_safe=false&from=0&to=49"
+ _API_EPISODES_URL = "https://apiv2.sonyliv.com/AGL/1.4/R/ENG/WEB/IN/CONTENT/DETAIL/BUNDLE/{}?from=0&to=1000&orderBy=episodeNumber&sortOrder=asc"
+ _API_SECURITY_URL = 'https://apiv2.sonyliv.com/AGL/1.4/A/ENG/WEB/ALL/GETTOKEN'
+
+ def _entries(self, show_id):
+ headers = {
+ 'Accept': 'application/json, text/plain, */*',
+ 'Referer': 'https://www.sonyliv.com',
+ }
+ headers['security_token'] = self._download_json(
+ self._API_SECURITY_URL, video_id=show_id, headers=headers,
+ note='Downloading security token')['resultObj']
+ seasons = try_get(
+ self._download_json(self._API_SHOW_URL.format(show_id), video_id=show_id, headers=headers),
+ lambda x: x['resultObj']['containers'][0]['containers'], list)
+ for season in seasons or []:
+ season_id = season['id']
+ episodes = try_get(
+ self._download_json(self._API_EPISODES_URL.format(season_id), video_id=season_id, headers=headers),
+ lambda x: x['resultObj']['containers'][0]['containers'], list)
+ for episode in episodes or []:
+ video_id = episode.get('id')
+ yield self.url_result('sonyliv:%s' % video_id, ie=SonyLIVIE.ie_key(), video_id=video_id)
+
+ def _real_extract(self, url):
+ show_id = self._match_id(url)
+ return self.playlist_result(self._entries(show_id), playlist_id=show_id)
diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py
new file mode 100644
index 0000000..a7c2afd
--- /dev/null
+++ b/yt_dlp/extractor/soundcloud.py
@@ -0,0 +1,948 @@
+import itertools
+import re
+import json
+# import random
+
+from .common import (
+ InfoExtractor,
+ SearchInfoExtractor
+)
+from ..compat import compat_str
+from ..networking import HEADRequest, Request
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ error_to_compat_str,
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ KNOWN_EXTENSIONS,
+ mimetype2ext,
+ parse_qs,
+ str_or_none,
+ try_get,
+ unified_timestamp,
+ update_url_query,
+ url_or_none,
+ urlhandle_detect_ext,
+)
+
+
+class SoundcloudEmbedIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:w|player|p)\.soundcloud\.com/player/?.*?\burl=(?P<id>.+)'
+ _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:w\.)?soundcloud\.com/player.+?)\1']
+ _TEST = {
+ # from https://www.soundi.fi/uutiset/ennakkokuuntelussa-timo-kaukolammen-station-to-station-to-station-julkaisua-juhlitaan-tanaan-g-livelabissa/
+ 'url': 'https://w.soundcloud.com/player/?visual=true&url=https%3A%2F%2Fapi.soundcloud.com%2Fplaylists%2F922213810&show_artwork=true&maxwidth=640&maxheight=960&dnt=1&secret_token=s-ziYey',
+ 'only_matching': True,
+ }
+
+ def _real_extract(self, url):
+ query = parse_qs(url)
+ api_url = query['url'][0]
+ secret_token = query.get('secret_token')
+ if secret_token:
+ api_url = update_url_query(api_url, {'secret_token': secret_token[0]})
+ return self.url_result(api_url)
+
+
+class SoundcloudBaseIE(InfoExtractor):
+ _NETRC_MACHINE = 'soundcloud'
+
+ _API_V2_BASE = 'https://api-v2.soundcloud.com/'
+ _BASE_URL = 'https://soundcloud.com/'
+ _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36'
+ _API_AUTH_QUERY_TEMPLATE = '?client_id=%s'
+ _API_AUTH_URL_PW = 'https://api-auth.soundcloud.com/web-auth/sign-in/password%s'
+ _API_VERIFY_AUTH_TOKEN = 'https://api-auth.soundcloud.com/connect/session%s'
+ _access_token = None
+ _HEADERS = {}
+
+ _IMAGE_REPL_RE = r'-([0-9a-z]+)\.jpg'
+
+ _ARTWORK_MAP = {
+ 'mini': 16,
+ 'tiny': 20,
+ 'small': 32,
+ 'badge': 47,
+ 't67x67': 67,
+ 'large': 100,
+ 't300x300': 300,
+ 'crop': 400,
+ 't500x500': 500,
+ 'original': 0,
+ }
+
+ def _store_client_id(self, client_id):
+ self.cache.store('soundcloud', 'client_id', client_id)
+
+ def _update_client_id(self):
+ webpage = self._download_webpage('https://soundcloud.com/', None)
+ for src in reversed(re.findall(r'<script[^>]+src="([^"]+)"', webpage)):
+ script = self._download_webpage(src, None, fatal=False)
+ if script:
+ client_id = self._search_regex(
+ r'client_id\s*:\s*"([0-9a-zA-Z]{32})"',
+ script, 'client id', default=None)
+ if client_id:
+ self._CLIENT_ID = client_id
+ self._store_client_id(client_id)
+ return
+ raise ExtractorError('Unable to extract client id')
+
+ def _download_json(self, *args, **kwargs):
+ non_fatal = kwargs.get('fatal') is False
+ if non_fatal:
+ del kwargs['fatal']
+ query = kwargs.get('query', {}).copy()
+ for _ in range(2):
+ query['client_id'] = self._CLIENT_ID
+ kwargs['query'] = query
+ try:
+ return super()._download_json(*args, **kwargs)
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status in (401, 403):
+ self._store_client_id(None)
+ self._update_client_id()
+ continue
+ elif non_fatal:
+ self.report_warning(error_to_compat_str(e))
+ return False
+ raise
+
+ def _initialize_pre_login(self):
+ self._CLIENT_ID = self.cache.load('soundcloud', 'client_id') or 'a3e059563d7fd3372b49b37f00a00bcf'
+
+ def _perform_login(self, username, password):
+ if username != 'oauth':
+ self.report_warning(
+ 'Login using username and password is not currently supported. '
+ 'Use "--username oauth --password <oauth_token>" to login using an oauth token')
+ self._access_token = password
+ query = self._API_AUTH_QUERY_TEMPLATE % self._CLIENT_ID
+ payload = {'session': {'access_token': self._access_token}}
+ token_verification = Request(self._API_VERIFY_AUTH_TOKEN % query, json.dumps(payload).encode('utf-8'))
+ response = self._download_json(token_verification, None, note='Verifying login token...', fatal=False)
+ if response is not False:
+ self._HEADERS = {'Authorization': 'OAuth ' + self._access_token}
+ self.report_login()
+ else:
+ self.report_warning('Provided authorization token seems to be invalid. Continue as guest')
+
+ r'''
+ def genDevId():
+ def genNumBlock():
+ return ''.join([str(random.randrange(10)) for i in range(6)])
+ return '-'.join([genNumBlock() for i in range(4)])
+
+ payload = {
+ 'client_id': self._CLIENT_ID,
+ 'recaptcha_pubkey': 'null',
+ 'recaptcha_response': 'null',
+ 'credentials': {
+ 'identifier': username,
+ 'password': password
+ },
+ 'signature': self.sign(username, password, self._CLIENT_ID),
+ 'device_id': genDevId(),
+ 'user_agent': self._USER_AGENT
+ }
+
+ query = self._API_AUTH_QUERY_TEMPLATE % self._CLIENT_ID
+ login = sanitized_Request(self._API_AUTH_URL_PW % query, json.dumps(payload).encode('utf-8'))
+ response = self._download_json(login, None)
+ self._access_token = response.get('session').get('access_token')
+ if not self._access_token:
+ self.report_warning('Unable to get access token, login may has failed')
+ else:
+ self._HEADERS = {'Authorization': 'OAuth ' + self._access_token}
+ '''
+
+ # signature generation
+ def sign(self, user, pw, clid):
+ a = 33
+ i = 1
+ s = 440123
+ w = 117
+ u = 1800000
+ l = 1042
+ b = 37
+ k = 37
+ c = 5
+ n = '0763ed7314c69015fd4a0dc16bbf4b90' # _KEY
+ y = '8' # _REV
+ r = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36' # _USER_AGENT
+ e = user # _USERNAME
+ t = clid # _CLIENT_ID
+
+ d = '-'.join([str(mInt) for mInt in [a, i, s, w, u, l, b, k]])
+ p = n + y + d + r + e + t + d + n
+ h = p
+
+ m = 8011470
+ f = 0
+
+ for f in range(f, len(h)):
+ m = (m >> 1) + ((1 & m) << 23)
+ m += ord(h[f])
+ m &= 16777215
+
+ # c is not even needed
+ out = str(y) + ':' + str(d) + ':' + format(m, 'x') + ':' + str(c)
+
+ return out
+
+ def _extract_info_dict(self, info, full_title=None, secret_token=None, extract_flat=False):
+ track_id = compat_str(info['id'])
+ title = info['title']
+
+ format_urls = set()
+ formats = []
+ query = {'client_id': self._CLIENT_ID}
+ if secret_token:
+ query['secret_token'] = secret_token
+
+ if not extract_flat and info.get('downloadable') and info.get('has_downloads_left'):
+ download_url = update_url_query(
+ self._API_V2_BASE + 'tracks/' + track_id + '/download', query)
+ redirect_url = (self._download_json(download_url, track_id, fatal=False) or {}).get('redirectUri')
+ if redirect_url:
+ urlh = self._request_webpage(
+ HEADRequest(redirect_url), track_id, fatal=False)
+ if urlh:
+ format_url = urlh.url
+ format_urls.add(format_url)
+ formats.append({
+ 'format_id': 'download',
+ 'ext': urlhandle_detect_ext(urlh) or 'mp3',
+ 'filesize': int_or_none(urlh.headers.get('Content-Length')),
+ 'url': format_url,
+ 'quality': 10,
+ })
+
+ def invalid_url(url):
+ return not url or url in format_urls
+
+ def add_format(f, protocol, is_preview=False):
+ mobj = re.search(r'\.(?P<abr>\d+)\.(?P<ext>[0-9a-z]{3,4})(?=[/?])', stream_url)
+ if mobj:
+ for k, v in mobj.groupdict().items():
+ if not f.get(k):
+ f[k] = v
+ format_id_list = []
+ if protocol:
+ format_id_list.append(protocol)
+ ext = f.get('ext')
+ if ext == 'aac':
+ f['abr'] = '256'
+ for k in ('ext', 'abr'):
+ v = f.get(k)
+ if v:
+ format_id_list.append(v)
+ preview = is_preview or re.search(r'/(?:preview|playlist)/0/30/', f['url'])
+ if preview:
+ format_id_list.append('preview')
+ abr = f.get('abr')
+ if abr:
+ f['abr'] = int(abr)
+ if protocol == 'hls':
+ protocol = 'm3u8' if ext == 'aac' else 'm3u8_native'
+ else:
+ protocol = 'http'
+ f.update({
+ 'format_id': '_'.join(format_id_list),
+ 'protocol': protocol,
+ 'preference': -10 if preview else None,
+ })
+ formats.append(f)
+
+ # New API
+ transcodings = try_get(
+ info, lambda x: x['media']['transcodings'], list) or []
+ for t in transcodings:
+ if not isinstance(t, dict):
+ continue
+ format_url = url_or_none(t.get('url'))
+ if not format_url:
+ continue
+ stream = None if extract_flat else self._download_json(
+ format_url, track_id, query=query, fatal=False, headers=self._HEADERS)
+ if not isinstance(stream, dict):
+ continue
+ stream_url = url_or_none(stream.get('url'))
+ if invalid_url(stream_url):
+ continue
+ format_urls.add(stream_url)
+ stream_format = t.get('format') or {}
+ protocol = stream_format.get('protocol')
+ if protocol != 'hls' and '/hls' in format_url:
+ protocol = 'hls'
+ ext = None
+ preset = str_or_none(t.get('preset'))
+ if preset:
+ ext = preset.split('_')[0]
+ if ext not in KNOWN_EXTENSIONS:
+ ext = mimetype2ext(stream_format.get('mime_type'))
+ add_format({
+ 'url': stream_url,
+ 'ext': ext,
+ }, 'http' if protocol == 'progressive' else protocol,
+ t.get('snipped') or '/preview/' in format_url)
+
+ for f in formats:
+ f['vcodec'] = 'none'
+
+ if not formats and info.get('policy') == 'BLOCK':
+ self.raise_geo_restricted(metadata_available=True)
+
+ user = info.get('user') or {}
+
+ thumbnails = []
+ artwork_url = info.get('artwork_url')
+ thumbnail = artwork_url or user.get('avatar_url')
+ if isinstance(thumbnail, compat_str):
+ if re.search(self._IMAGE_REPL_RE, thumbnail):
+ for image_id, size in self._ARTWORK_MAP.items():
+ i = {
+ 'id': image_id,
+ 'url': re.sub(self._IMAGE_REPL_RE, '-%s.jpg' % image_id, thumbnail),
+ }
+ if image_id == 'tiny' and not artwork_url:
+ size = 18
+ elif image_id == 'original':
+ i['preference'] = 10
+ if size:
+ i.update({
+ 'width': size,
+ 'height': size,
+ })
+ thumbnails.append(i)
+ else:
+ thumbnails = [{'url': thumbnail}]
+
+ def extract_count(key):
+ return int_or_none(info.get('%s_count' % key))
+
+ return {
+ 'id': track_id,
+ 'uploader': user.get('username'),
+ 'uploader_id': str_or_none(user.get('id')) or user.get('permalink'),
+ 'uploader_url': user.get('permalink_url'),
+ 'timestamp': unified_timestamp(info.get('created_at')),
+ 'title': title,
+ 'description': info.get('description'),
+ 'thumbnails': thumbnails,
+ 'duration': float_or_none(info.get('duration'), 1000),
+ 'webpage_url': info.get('permalink_url'),
+ 'license': info.get('license'),
+ 'view_count': extract_count('playback'),
+ 'like_count': extract_count('favoritings') or extract_count('likes'),
+ 'comment_count': extract_count('comment'),
+ 'repost_count': extract_count('reposts'),
+ 'genre': info.get('genre'),
+ 'formats': formats if not extract_flat else None
+ }
+
+ @classmethod
+ def _resolv_url(cls, url):
+ return cls._API_V2_BASE + 'resolve?url=' + url
+
+
+class SoundcloudIE(SoundcloudBaseIE):
+ """Information extractor for soundcloud.com
+ To access the media, the uid of the song and a stream token
+ must be extracted from the page source and the script must make
+ a request to media.soundcloud.com/crossdomain.xml. Then
+ the media can be grabbed by requesting from an url composed
+ of the stream token and uid
+ """
+
+ _VALID_URL = r'''(?x)^(?:https?://)?
+ (?:(?:(?:www\.|m\.)?soundcloud\.com/
+ (?!stations/track)
+ (?P<uploader>[\w\d-]+)/
+ (?!(?:tracks|albums|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#]))
+ (?P<title>[\w\d-]+)
+ (?:/(?P<token>(?!(?:albums|sets|recommended))[^?]+?))?
+ (?:[?].*)?$)
+ |(?:api(?:-v2)?\.soundcloud\.com/tracks/(?P<track_id>\d+)
+ (?:/?\?secret_token=(?P<secret_token>[^&]+))?)
+ )
+ '''
+ IE_NAME = 'soundcloud'
+ _TESTS = [
+ {
+ 'url': 'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
+ 'md5': 'ebef0a451b909710ed1d7787dddbf0d7',
+ 'info_dict': {
+ 'id': '62986583',
+ 'ext': 'mp3',
+ 'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
+ 'description': 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o\'d',
+ 'uploader': 'E.T. ExTerrestrial Music',
+ 'uploader_id': '1571244',
+ 'timestamp': 1349920598,
+ 'upload_date': '20121011',
+ 'duration': 143.216,
+ 'license': 'all-rights-reserved',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
+ }
+ },
+ # geo-restricted
+ {
+ 'url': 'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep',
+ 'info_dict': {
+ 'id': '47127627',
+ 'ext': 'mp3',
+ 'title': 'Goldrushed',
+ 'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com',
+ 'uploader': 'The Royal Concept',
+ 'uploader_id': '9615865',
+ 'timestamp': 1337635207,
+ 'upload_date': '20120521',
+ 'duration': 227.155,
+ 'license': 'all-rights-reserved',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
+ },
+ },
+ # private link
+ {
+ 'url': 'https://soundcloud.com/jaimemf/youtube-dl-test-video-a-y-baw/s-8Pjrp',
+ 'md5': 'aa0dd32bfea9b0c5ef4f02aacd080604',
+ 'info_dict': {
+ 'id': '123998367',
+ 'ext': 'mp3',
+ 'title': 'Youtube - Dl Test Video \'\' Ä↭',
+ 'description': 'test chars: \"\'/\\ä↭',
+ 'uploader': 'jaimeMF',
+ 'uploader_id': '69767071',
+ 'timestamp': 1386604920,
+ 'upload_date': '20131209',
+ 'duration': 9.927,
+ 'license': 'all-rights-reserved',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
+ },
+ },
+ # private link (alt format)
+ {
+ 'url': 'https://api.soundcloud.com/tracks/123998367?secret_token=s-8Pjrp',
+ 'md5': 'aa0dd32bfea9b0c5ef4f02aacd080604',
+ 'info_dict': {
+ 'id': '123998367',
+ 'ext': 'mp3',
+ 'title': 'Youtube - Dl Test Video \'\' Ä↭',
+ 'description': 'test chars: \"\'/\\ä↭',
+ 'uploader': 'jaimeMF',
+ 'uploader_id': '69767071',
+ 'timestamp': 1386604920,
+ 'upload_date': '20131209',
+ 'duration': 9.927,
+ 'license': 'all-rights-reserved',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
+ },
+ },
+ # downloadable song
+ {
+ 'url': 'https://soundcloud.com/the80m/the-following',
+ 'md5': '9ffcddb08c87d74fb5808a3c183a1d04',
+ 'info_dict': {
+ 'id': '343609555',
+ 'ext': 'wav',
+ },
+ },
+ # private link, downloadable format
+ {
+ 'url': 'https://soundcloud.com/oriuplift/uponly-238-no-talking-wav/s-AyZUd',
+ 'md5': '64a60b16e617d41d0bef032b7f55441e',
+ 'info_dict': {
+ 'id': '340344461',
+ 'ext': 'wav',
+ 'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]',
+ 'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366',
+ 'uploader': 'Ori Uplift Music',
+ 'uploader_id': '12563093',
+ 'timestamp': 1504206263,
+ 'upload_date': '20170831',
+ 'duration': 7449.096,
+ 'license': 'all-rights-reserved',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
+ },
+ },
+ # no album art, use avatar pic for thumbnail
+ {
+ 'url': 'https://soundcloud.com/garyvee/sideways-prod-mad-real',
+ 'md5': '59c7872bc44e5d99b7211891664760c2',
+ 'info_dict': {
+ 'id': '309699954',
+ 'ext': 'mp3',
+ 'title': 'Sideways (Prod. Mad Real)',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'uploader': 'garyvee',
+ 'uploader_id': '2366352',
+ 'timestamp': 1488152409,
+ 'upload_date': '20170226',
+ 'duration': 207.012,
+ 'thumbnail': r're:https?://.*\.jpg',
+ 'license': 'all-rights-reserved',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'https://soundcloud.com/giovannisarani/mezzo-valzer',
+ 'md5': 'e22aecd2bc88e0e4e432d7dcc0a1abf7',
+ 'info_dict': {
+ 'id': '583011102',
+ 'ext': 'mp3',
+ 'title': 'Mezzo Valzer',
+ 'description': 'md5:4138d582f81866a530317bae316e8b61',
+ 'uploader': 'Micronie',
+ 'uploader_id': '3352531',
+ 'timestamp': 1551394171,
+ 'upload_date': '20190228',
+ 'duration': 180.157,
+ 'thumbnail': r're:https?://.*\.jpg',
+ 'license': 'all-rights-reserved',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
+ },
+ },
+ {
+ # AAC HQ format available (account with active subscription needed)
+ 'url': 'https://soundcloud.com/wandw/the-chainsmokers-ft-daya-dont-let-me-down-ww-remix-1',
+ 'only_matching': True,
+ },
+ {
+ # Go+ (account with active subscription needed)
+ 'url': 'https://soundcloud.com/taylorswiftofficial/look-what-you-made-me-do',
+ 'only_matching': True,
+ },
+ ]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+
+ track_id = mobj.group('track_id')
+
+ query = {}
+ if track_id:
+ info_json_url = self._API_V2_BASE + 'tracks/' + track_id
+ full_title = track_id
+ token = mobj.group('secret_token')
+ if token:
+ query['secret_token'] = token
+ else:
+ full_title = resolve_title = '%s/%s' % mobj.group('uploader', 'title')
+ token = mobj.group('token')
+ if token:
+ resolve_title += '/%s' % token
+ info_json_url = self._resolv_url(self._BASE_URL + resolve_title)
+
+ info = self._download_json(
+ info_json_url, full_title, 'Downloading info JSON', query=query, headers=self._HEADERS)
+
+ return self._extract_info_dict(info, full_title, token)
+
+
+class SoundcloudPlaylistBaseIE(SoundcloudBaseIE):
+ def _extract_set(self, playlist, token=None):
+ playlist_id = compat_str(playlist['id'])
+ tracks = playlist.get('tracks') or []
+ if not all([t.get('permalink_url') for t in tracks]) and token:
+ tracks = self._download_json(
+ self._API_V2_BASE + 'tracks', playlist_id,
+ 'Downloading tracks', query={
+ 'ids': ','.join([compat_str(t['id']) for t in tracks]),
+ 'playlistId': playlist_id,
+ 'playlistSecretToken': token,
+ }, headers=self._HEADERS)
+ entries = []
+ for track in tracks:
+ track_id = str_or_none(track.get('id'))
+ url = track.get('permalink_url')
+ if not url:
+ if not track_id:
+ continue
+ url = self._API_V2_BASE + 'tracks/' + track_id
+ if token:
+ url += '?secret_token=' + token
+ entries.append(self.url_result(
+ url, SoundcloudIE.ie_key(), track_id))
+ return self.playlist_result(
+ entries, playlist_id,
+ playlist.get('title'),
+ playlist.get('description'))
+
+
+class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
+ _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[:\w\d-]+)(?:/(?P<token>[^?/]+))?'
+ IE_NAME = 'soundcloud:set'
+ _TESTS = [{
+ 'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep',
+ 'info_dict': {
+ 'id': '2284613',
+ 'title': 'The Royal Concept EP',
+ 'description': 'md5:71d07087c7a449e8941a70a29e34671e',
+ },
+ 'playlist_mincount': 5,
+ }, {
+ 'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep/token',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://soundcloud.com/discover/sets/weekly::flacmatic',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://soundcloud.com/discover/sets/charts-top:all-music:de',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://soundcloud.com/discover/sets/charts-top:hiphoprap:kr',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+
+ full_title = '%s/sets/%s' % mobj.group('uploader', 'slug_title')
+ token = mobj.group('token')
+ if token:
+ full_title += '/' + token
+
+ info = self._download_json(self._resolv_url(
+ self._BASE_URL + full_title), full_title, headers=self._HEADERS)
+
+ if 'errors' in info:
+ msgs = (compat_str(err['error_message']) for err in info['errors'])
+ raise ExtractorError('unable to download video webpage: %s' % ','.join(msgs))
+
+ return self._extract_set(info, token)
+
+
+class SoundcloudPagedPlaylistBaseIE(SoundcloudBaseIE):
+ def _extract_playlist(self, base_url, playlist_id, playlist_title):
+ return {
+ '_type': 'playlist',
+ 'id': playlist_id,
+ 'title': playlist_title,
+ 'entries': self._entries(base_url, playlist_id),
+ }
+
+ def _entries(self, url, playlist_id):
+ # Per the SoundCloud documentation, the maximum limit for a linked partitioning query is 200.
+ # https://developers.soundcloud.com/blog/offset-pagination-deprecated
+ query = {
+ 'limit': 200,
+ 'linked_partitioning': '1',
+ 'offset': 0,
+ }
+
+ for i in itertools.count():
+ for retry in self.RetryManager():
+ try:
+ response = self._download_json(
+ url, playlist_id, query=query, headers=self._HEADERS,
+ note=f'Downloading track page {i + 1}')
+ break
+ except ExtractorError as e:
+ # Downloading page may result in intermittent 502 HTTP error
+ # See https://github.com/yt-dlp/yt-dlp/issues/872
+ if not isinstance(e.cause, HTTPError) or e.cause.status != 502:
+ raise
+ retry.error = e
+ continue
+
+ def resolve_entry(*candidates):
+ for cand in candidates:
+ if not isinstance(cand, dict):
+ continue
+ permalink_url = url_or_none(cand.get('permalink_url'))
+ if permalink_url:
+ return self.url_result(
+ permalink_url,
+ SoundcloudIE.ie_key() if SoundcloudIE.suitable(permalink_url) else None,
+ str_or_none(cand.get('id')), cand.get('title'))
+
+ for e in response['collection'] or []:
+ yield resolve_entry(e, e.get('track'), e.get('playlist'))
+
+ url = response.get('next_href')
+ if not url:
+ break
+ query.pop('offset', None)
+
+
+class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:(?:www|m)\.)?soundcloud\.com/
+ (?P<user>[^/]+)
+ (?:/
+ (?P<rsrc>tracks|albums|sets|reposts|likes|spotlight)
+ )?
+ /?(?:[?#].*)?$
+ '''
+ IE_NAME = 'soundcloud:user'
+ _TESTS = [{
+ 'url': 'https://soundcloud.com/soft-cell-official',
+ 'info_dict': {
+ 'id': '207965082',
+ 'title': 'Soft Cell (All)',
+ },
+ 'playlist_mincount': 28,
+ }, {
+ 'url': 'https://soundcloud.com/soft-cell-official/tracks',
+ 'info_dict': {
+ 'id': '207965082',
+ 'title': 'Soft Cell (Tracks)',
+ },
+ 'playlist_mincount': 27,
+ }, {
+ 'url': 'https://soundcloud.com/soft-cell-official/albums',
+ 'info_dict': {
+ 'id': '207965082',
+ 'title': 'Soft Cell (Albums)',
+ },
+ 'playlist_mincount': 1,
+ }, {
+ 'url': 'https://soundcloud.com/jcv246/sets',
+ 'info_dict': {
+ 'id': '12982173',
+ 'title': 'Jordi / cv (Sets)',
+ },
+ 'playlist_mincount': 2,
+ }, {
+ 'url': 'https://soundcloud.com/jcv246/reposts',
+ 'info_dict': {
+ 'id': '12982173',
+ 'title': 'Jordi / cv (Reposts)',
+ },
+ 'playlist_mincount': 6,
+ }, {
+ 'url': 'https://soundcloud.com/clalberg/likes',
+ 'info_dict': {
+ 'id': '11817582',
+ 'title': 'clalberg (Likes)',
+ },
+ 'playlist_mincount': 5,
+ }, {
+ 'url': 'https://soundcloud.com/grynpyret/spotlight',
+ 'info_dict': {
+ 'id': '7098329',
+ 'title': 'Grynpyret (Spotlight)',
+ },
+ 'playlist_mincount': 1,
+ }]
+
+ _BASE_URL_MAP = {
+ 'all': 'stream/users/%s',
+ 'tracks': 'users/%s/tracks',
+ 'albums': 'users/%s/albums',
+ 'sets': 'users/%s/playlists',
+ 'reposts': 'stream/users/%s/reposts',
+ 'likes': 'users/%s/likes',
+ 'spotlight': 'users/%s/spotlight',
+ }
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ uploader = mobj.group('user')
+
+ user = self._download_json(
+ self._resolv_url(self._BASE_URL + uploader),
+ uploader, 'Downloading user info', headers=self._HEADERS)
+
+ resource = mobj.group('rsrc') or 'all'
+
+ return self._extract_playlist(
+ self._API_V2_BASE + self._BASE_URL_MAP[resource] % user['id'],
+ str_or_none(user.get('id')),
+ '%s (%s)' % (user['username'], resource.capitalize()))
+
+
+class SoundcloudUserPermalinkIE(SoundcloudPagedPlaylistBaseIE):
+ _VALID_URL = r'https?://api\.soundcloud\.com/users/(?P<id>\d+)'
+ IE_NAME = 'soundcloud:user:permalink'
+ _TESTS = [{
+ 'url': 'https://api.soundcloud.com/users/30909869',
+ 'info_dict': {
+ 'id': '30909869',
+ 'title': 'neilcic',
+ },
+ 'playlist_mincount': 23,
+ }]
+
+ def _real_extract(self, url):
+ user_id = self._match_id(url)
+ user = self._download_json(
+ self._resolv_url(url), user_id, 'Downloading user info', headers=self._HEADERS)
+
+ return self._extract_playlist(
+ f'{self._API_V2_BASE}stream/users/{user["id"]}', str(user['id']), user.get('username'))
+
+
+class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE):
+ _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/stations/track/[^/]+/(?P<id>[^/?#&]+)'
+ IE_NAME = 'soundcloud:trackstation'
+ _TESTS = [{
+ 'url': 'https://soundcloud.com/stations/track/officialsundial/your-text',
+ 'info_dict': {
+ 'id': '286017854',
+ 'title': 'Track station: your text',
+ },
+ 'playlist_mincount': 47,
+ }]
+
+ def _real_extract(self, url):
+ track_name = self._match_id(url)
+
+ track = self._download_json(self._resolv_url(url), track_name, headers=self._HEADERS)
+ track_id = self._search_regex(
+ r'soundcloud:track-stations:(\d+)', track['id'], 'track id')
+
+ return self._extract_playlist(
+ self._API_V2_BASE + 'stations/%s/tracks' % track['id'],
+ track_id, 'Track station: %s' % track['title'])
+
+
+class SoundcloudRelatedIE(SoundcloudPagedPlaylistBaseIE):
+ _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<slug>[\w\d-]+/[\w\d-]+)/(?P<relation>albums|sets|recommended)'
+ IE_NAME = 'soundcloud:related'
+ _TESTS = [{
+ 'url': 'https://soundcloud.com/wajang/sexapil-pingers-5/recommended',
+ 'info_dict': {
+ 'id': '1084577272',
+ 'title': 'Sexapil - Pingers 5 (Recommended)',
+ },
+ 'playlist_mincount': 50,
+ }, {
+ 'url': 'https://soundcloud.com/wajang/sexapil-pingers-5/albums',
+ 'info_dict': {
+ 'id': '1084577272',
+ 'title': 'Sexapil - Pingers 5 (Albums)',
+ },
+ 'playlist_mincount': 1,
+ }, {
+ 'url': 'https://soundcloud.com/wajang/sexapil-pingers-5/sets',
+ 'info_dict': {
+ 'id': '1084577272',
+ 'title': 'Sexapil - Pingers 5 (Sets)',
+ },
+ 'playlist_mincount': 4,
+ }]
+
+ _BASE_URL_MAP = {
+ 'albums': 'tracks/%s/albums',
+ 'sets': 'tracks/%s/playlists_without_albums',
+ 'recommended': 'tracks/%s/related',
+ }
+
+ def _real_extract(self, url):
+ slug, relation = self._match_valid_url(url).group('slug', 'relation')
+
+ track = self._download_json(
+ self._resolv_url(self._BASE_URL + slug),
+ slug, 'Downloading track info', headers=self._HEADERS)
+
+ if track.get('errors'):
+ raise ExtractorError(f'{self.IE_NAME} said: %s' % ','.join(
+ str(err['error_message']) for err in track['errors']), expected=True)
+
+ return self._extract_playlist(
+ self._API_V2_BASE + self._BASE_URL_MAP[relation] % track['id'], str(track['id']),
+ '%s (%s)' % (track.get('title') or slug, relation.capitalize()))
+
+
+class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):
+ _VALID_URL = r'https?://api(?:-v2)?\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$'
+ IE_NAME = 'soundcloud:playlist'
+ _TESTS = [{
+ 'url': 'https://api.soundcloud.com/playlists/4110309',
+ 'info_dict': {
+ 'id': '4110309',
+ 'title': 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]',
+ 'description': 're:.*?TILT Brass - Bowery Poetry Club',
+ },
+ 'playlist_count': 6,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ playlist_id = mobj.group('id')
+
+ query = {}
+ token = mobj.group('token')
+ if token:
+ query['secret_token'] = token
+
+ data = self._download_json(
+ self._API_V2_BASE + 'playlists/' + playlist_id,
+ playlist_id, 'Downloading playlist', query=query, headers=self._HEADERS)
+
+ return self._extract_set(data, token)
+
+
+class SoundcloudSearchIE(SoundcloudBaseIE, SearchInfoExtractor):
+ IE_NAME = 'soundcloud:search'
+ IE_DESC = 'Soundcloud search'
+ _SEARCH_KEY = 'scsearch'
+ _TESTS = [{
+ 'url': 'scsearch15:post-avant jazzcore',
+ 'info_dict': {
+ 'id': 'post-avant jazzcore',
+ 'title': 'post-avant jazzcore',
+ },
+ 'playlist_count': 15,
+ }]
+
+ _MAX_RESULTS_PER_PAGE = 200
+ _DEFAULT_RESULTS_PER_PAGE = 50
+
+ def _get_collection(self, endpoint, collection_id, **query):
+ limit = min(
+ query.get('limit', self._DEFAULT_RESULTS_PER_PAGE),
+ self._MAX_RESULTS_PER_PAGE)
+ query.update({
+ 'limit': limit,
+ 'linked_partitioning': 1,
+ 'offset': 0,
+ })
+ next_url = update_url_query(self._API_V2_BASE + endpoint, query)
+
+ for i in itertools.count(1):
+ response = self._download_json(
+ next_url, collection_id, f'Downloading page {i}',
+ 'Unable to download API page', headers=self._HEADERS)
+
+ for item in response.get('collection') or []:
+ if item:
+ yield self.url_result(
+ item['uri'], SoundcloudIE.ie_key(), **self._extract_info_dict(item, extract_flat=True))
+
+ next_url = response.get('next_href')
+ if not next_url:
+ break
+
+ def _get_n_results(self, query, n):
+ return self.playlist_result(itertools.islice(
+ self._get_collection('search/tracks', query, limit=n, q=query),
+ 0, None if n == float('inf') else n), query, query)
diff --git a/yt_dlp/extractor/soundgasm.py b/yt_dlp/extractor/soundgasm.py
new file mode 100644
index 0000000..9e59c7c
--- /dev/null
+++ b/yt_dlp/extractor/soundgasm.py
@@ -0,0 +1,74 @@
+import re
+
+from .common import InfoExtractor
+
+
+class SoundgasmIE(InfoExtractor):
+ IE_NAME = 'soundgasm'
+ _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<user>[0-9a-zA-Z_-]+)/(?P<display_id>[0-9a-zA-Z_-]+)'
+ _TEST = {
+ 'url': 'http://soundgasm.net/u/ytdl/Piano-sample',
+ 'md5': '010082a2c802c5275bb00030743e75ad',
+ 'info_dict': {
+ 'id': '88abd86ea000cafe98f96321b23cc1206cbcbcc9',
+ 'ext': 'm4a',
+ 'title': 'Piano sample',
+ 'description': 'Royalty Free Sample Music',
+ 'uploader': 'ytdl',
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ display_id = mobj.group('display_id')
+
+ webpage = self._download_webpage(url, display_id)
+
+ audio_url = self._html_search_regex(
+ r'(?s)m4a\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+ 'audio URL', group='url')
+
+ title = self._search_regex(
+ r'<div[^>]+\bclass=["\']jp-title[^>]+>([^<]+)',
+ webpage, 'title', default=display_id)
+
+ description = self._html_search_regex(
+ (r'(?s)<div[^>]+\bclass=["\']jp-description[^>]+>(.+?)</div>',
+ r'(?s)<li>Description:\s(.*?)<\/li>'),
+ webpage, 'description', fatal=False)
+
+ audio_id = self._search_regex(
+ r'/([^/]+)\.m4a', audio_url, 'audio id', default=display_id)
+
+ return {
+ 'id': audio_id,
+ 'display_id': display_id,
+ 'url': audio_url,
+ 'vcodec': 'none',
+ 'title': title,
+ 'description': description,
+ 'uploader': mobj.group('user'),
+ }
+
+
+class SoundgasmProfileIE(InfoExtractor):
+ IE_NAME = 'soundgasm:profile'
+ _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<id>[^/]+)/?(?:\#.*)?$'
+ _TEST = {
+ 'url': 'http://soundgasm.net/u/ytdl',
+ 'info_dict': {
+ 'id': 'ytdl',
+ },
+ 'playlist_count': 1,
+ }
+
+ def _real_extract(self, url):
+ profile_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, profile_id)
+
+ entries = [
+ self.url_result(audio_url, 'Soundgasm')
+ for audio_url in re.findall(r'href="([^"]+/u/%s/[^"]+)' % profile_id, webpage)]
+
+ return self.playlist_result(entries, profile_id)
diff --git a/yt_dlp/extractor/southpark.py b/yt_dlp/extractor/southpark.py
new file mode 100644
index 0000000..e23f192
--- /dev/null
+++ b/yt_dlp/extractor/southpark.py
@@ -0,0 +1,188 @@
+from .mtv import MTVServicesInfoExtractor
+
+
+class SouthParkIE(MTVServicesInfoExtractor):
+ IE_NAME = 'southpark.cc.com'
+ _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark(?:\.cc|studios)\.com/((?:video-)?clips|(?:full-)?episodes|collections)/(?P<id>.+?)(\?|#|$))'
+
+ _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
+
+ _TESTS = [{
+ 'url': 'https://southpark.cc.com/video-clips/d7wr06/south-park-you-all-agreed-to-counseling',
+ 'info_dict': {
+ 'ext': 'mp4',
+ 'title': 'You All Agreed to Counseling',
+ 'description': 'Kenny, Cartman, Stan, and Kyle visit Mr. Mackey and ask for his help getting Mrs. Nelson to come back. Mr. Mackey reveals the only way to get things back to normal is to get the teachers vaccinated.',
+ 'timestamp': 1615352400,
+ 'upload_date': '20210310',
+ },
+ }, {
+ 'url': 'http://southpark.cc.com/collections/7758/fan-favorites/1',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.southparkstudios.com/episodes/h4o269/south-park-stunning-and-brave-season-19-ep-1',
+ 'only_matching': True,
+ }]
+
+ def _get_feed_query(self, uri):
+ return {
+ 'accountOverride': 'intl.mtvi.com',
+ 'arcEp': 'shared.southpark.global',
+ 'ep': '90877963',
+ 'imageEp': 'shared.southpark.global',
+ 'mgid': uri,
+ }
+
+
+class SouthParkEsIE(SouthParkIE): # XXX: Do not subclass from concrete IE
+ IE_NAME = 'southpark.cc.com:español'
+ _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/es/episodios/(?P<id>.+?)(\?|#|$))'
+ _LANG = 'es'
+
+ _TESTS = [{
+ 'url': 'http://southpark.cc.com/es/episodios/s01e01-cartman-consigue-una-sonda-anal#source=351c1323-0b96-402d-a8b9-40d01b2e9bde&position=1&sort=!airdate',
+ 'info_dict': {
+ 'title': 'Cartman Consigue Una Sonda Anal',
+ 'description': 'Cartman Consigue Una Sonda Anal',
+ },
+ 'playlist_count': 4,
+ 'skip': 'Geo-restricted',
+ }]
+
+
+class SouthParkDeIE(SouthParkIE): # XXX: Do not subclass from concrete IE
+ IE_NAME = 'southpark.de'
+ _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.de/(?:(en/(videoclip|collections|episodes|video-clips))|(videoclip|collections|folgen))/(?P<id>(?P<unique_id>.+?)/.+?)(?:\?|#|$))'
+ _TESTS = [{
+ 'url': 'https://www.southpark.de/videoclip/rsribv/south-park-rueckzug-zum-gummibonbon-wald',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.southpark.de/folgen/jiru42/south-park-verkabelung-staffel-23-ep-9',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.southpark.de/collections/zzno5a/south-park-good-eats/7q26gp',
+ 'only_matching': True,
+ }, {
+ # clip
+ 'url': 'https://www.southpark.de/en/video-clips/ct46op/south-park-tooth-fairy-cartman',
+ 'info_dict': {
+ 'id': 'e99d45ea-ed00-11e0-aca6-0026b9414f30',
+ 'ext': 'mp4',
+ 'title': 'Tooth Fairy Cartman',
+ 'description': 'md5:db02e23818b4dc9cb5f0c5a7e8833a68',
+ },
+ }, {
+ # episode
+ 'url': 'https://www.southpark.de/en/episodes/yy0vjs/south-park-the-pandemic-special-season-24-ep-1',
+ 'info_dict': {
+ 'id': 'f5fbd823-04bc-11eb-9b1b-0e40cf2fc285',
+ 'ext': 'mp4',
+ 'title': 'South Park',
+ 'description': 'md5:ae0d875eff169dcbed16b21531857ac1',
+ },
+ }, {
+ # clip
+ 'url': 'https://www.southpark.de/videoclip/ct46op/south-park-zahnfee-cartman',
+ 'info_dict': {
+ 'id': 'e99d45ea-ed00-11e0-aca6-0026b9414f30',
+ 'ext': 'mp4',
+ 'title': 'Zahnfee Cartman',
+ 'description': 'md5:b917eec991d388811d911fd1377671ac'
+ },
+ }, {
+ # episode
+ 'url': 'https://www.southpark.de/folgen/242csn/south-park-her-mit-dem-hirn-staffel-1-ep-7',
+ 'info_dict': {
+ 'id': '607115f3-496f-40c3-8647-2b0bcff486c0',
+ 'ext': 'mp4',
+ 'title': 'md5:South Park | Pink Eye | E 0107 | HDSS0107X deu | Version: 634312 | Comedy Central S1',
+ },
+ }]
+
+ def _get_feed_url(self, uri, url=None):
+ video_id = self._id_from_uri(uri)
+ config = self._download_json(
+ 'http://media.mtvnservices.com/pmt/e1/access/index.html?uri=%s&configtype=edge&ref=%s' % (uri, url), video_id)
+ return self._remove_template_parameter(config['feedWithQueryParams'])
+
+ def _get_feed_query(self, uri):
+ return
+
+
+class SouthParkLatIE(SouthParkIE): # XXX: Do not subclass from concrete IE
+ IE_NAME = 'southpark.lat'
+ _VALID_URL = r'https?://(?:www\.)?southpark\.lat/(?:en/)?(?:video-?clips?|collections|episod(?:e|io)s)/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.southpark.lat/en/video-clips/ct46op/south-park-tooth-fairy-cartman',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.southpark.lat/episodios/9h0qbg/south-park-orgia-gatuna-temporada-3-ep-7',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.southpark.lat/en/collections/29ve08/south-park-heating-up/lydbrc',
+ 'only_matching': True,
+ }, {
+ # clip
+ 'url': 'https://www.southpark.lat/en/video-clips/ct46op/south-park-tooth-fairy-cartman',
+ 'info_dict': {
+ 'id': 'e99d45ea-ed00-11e0-aca6-0026b9414f30',
+ 'ext': 'mp4',
+ 'title': 'Tooth Fairy Cartman',
+ 'description': 'md5:db02e23818b4dc9cb5f0c5a7e8833a68',
+ },
+ }, {
+ # episode
+ 'url': 'https://www.southpark.lat/episodios/9h0qbg/south-park-orgia-gatuna-temporada-3-ep-7',
+ 'info_dict': {
+ 'id': 'f5fbd823-04bc-11eb-9b1b-0e40cf2fc285',
+ 'ext': 'mp4',
+ 'title': 'South Park',
+ 'description': 'md5:ae0d875eff169dcbed16b21531857ac1',
+ },
+ }]
+
+ def _get_feed_url(self, uri, url=None):
+ video_id = self._id_from_uri(uri)
+ config = self._download_json(
+ f'http://media.mtvnservices.com/pmt/e1/access/index.html?uri={uri}&configtype=edge&ref={url}',
+ video_id)
+ return self._remove_template_parameter(config['feedWithQueryParams'])
+
+ def _get_feed_query(self, uri):
+ return
+
+
+class SouthParkNlIE(SouthParkIE): # XXX: Do not subclass from concrete IE
+ IE_NAME = 'southpark.nl'
+ _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.nl/(?:clips|(?:full-)?episodes|collections)/(?P<id>.+?)(\?|#|$))'
+ _FEED_URL = 'http://www.southpark.nl/feeds/video-player/mrss/'
+
+ _TESTS = [{
+ 'url': 'http://www.southpark.nl/full-episodes/s18e06-freemium-isnt-free',
+ 'info_dict': {
+ 'title': 'Freemium Isn\'t Free',
+ 'description': 'Stan is addicted to the new Terrance and Phillip mobile game.',
+ },
+ 'playlist_mincount': 3,
+ }]
+
+
+class SouthParkDkIE(SouthParkIE): # XXX: Do not subclass from concrete IE
+ IE_NAME = 'southparkstudios.dk'
+ _VALID_URL = r'https?://(?:www\.)?(?P<url>southparkstudios\.(?:dk|nu)/(?:clips|full-episodes|collections)/(?P<id>.+?)(\?|#|$))'
+ _FEED_URL = 'http://www.southparkstudios.dk/feeds/video-player/mrss/'
+
+ _TESTS = [{
+ 'url': 'http://www.southparkstudios.dk/full-episodes/s18e07-grounded-vindaloop',
+ 'info_dict': {
+ 'title': 'Grounded Vindaloop',
+ 'description': 'Butters is convinced he\'s living in a virtual reality.',
+ },
+ 'playlist_mincount': 3,
+ }, {
+ 'url': 'http://www.southparkstudios.dk/collections/2476/superhero-showdown/1',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.southparkstudios.nu/collections/2476/superhero-showdown/1',
+ 'only_matching': True,
+ }]
diff --git a/yt_dlp/extractor/sovietscloset.py b/yt_dlp/extractor/sovietscloset.py
new file mode 100644
index 0000000..493eea2
--- /dev/null
+++ b/yt_dlp/extractor/sovietscloset.py
@@ -0,0 +1,207 @@
+from .common import InfoExtractor
+from ..utils import (
+ try_get,
+ unified_timestamp
+)
+
+
+class SovietsClosetBaseIE(InfoExtractor):
+ MEDIADELIVERY_REFERER = {'Referer': 'https://iframe.mediadelivery.net/'}
+
+ def parse_nuxt_jsonp(self, nuxt_jsonp_url, video_id, name):
+ nuxt_jsonp = self._download_webpage(nuxt_jsonp_url, video_id, note=f'Downloading {name} __NUXT_JSONP__')
+ return self._search_nuxt_data(nuxt_jsonp, video_id, '__NUXT_JSONP__')
+
+ def video_meta(self, video_id, game_name, category_name, episode_number, stream_date):
+ title = game_name
+ if category_name and category_name != 'Misc':
+ title += f' - {category_name}'
+ if episode_number:
+ title += f' #{episode_number}'
+
+ timestamp = unified_timestamp(stream_date)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'http_headers': self.MEDIADELIVERY_REFERER,
+ 'uploader': 'SovietWomble',
+ 'creator': 'SovietWomble',
+ 'release_timestamp': timestamp,
+ 'timestamp': timestamp,
+ 'uploader_id': 'SovietWomble',
+ 'uploader_url': 'https://www.twitch.tv/SovietWomble',
+ 'was_live': True,
+ 'availability': 'public',
+ 'series': game_name,
+ 'season': category_name,
+ 'episode_number': episode_number,
+ }
+
+
+class SovietsClosetIE(SovietsClosetBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?sovietscloset\.com/video/(?P<id>[0-9]+)/?'
+ _TESTS = [
+ {
+ 'url': 'https://sovietscloset.com/video/1337',
+ 'md5': 'bd012b04b261725510ca5383074cdd55',
+ 'info_dict': {
+ 'id': '1337',
+ 'ext': 'mp4',
+ 'title': 'The Witcher #13',
+ 'thumbnail': r're:^https?://.*\.b-cdn\.net/2f0cfbf4-3588-43a9-a7d6-7c9ea3755e67/thumbnail\.jpg$',
+ 'uploader': 'SovietWomble',
+ 'creator': 'SovietWomble',
+ 'release_timestamp': 1492091580,
+ 'release_date': '20170413',
+ 'timestamp': 1492091580,
+ 'upload_date': '20170413',
+ 'uploader_id': 'SovietWomble',
+ 'uploader_url': 'https://www.twitch.tv/SovietWomble',
+ 'duration': 7007,
+ 'was_live': True,
+ 'availability': 'public',
+ 'series': 'The Witcher',
+ 'season': 'Misc',
+ 'episode_number': 13,
+ 'episode': 'Episode 13',
+ },
+ },
+ {
+ 'url': 'https://sovietscloset.com/video/1105',
+ 'md5': '89fa928f183893cb65a0b7be846d8a90',
+ 'info_dict': {
+ 'id': '1105',
+ 'ext': 'mp4',
+ 'title': 'Arma 3 - Zeus Games #5',
+ 'uploader': 'SovietWomble',
+ 'thumbnail': r're:^https?://.*\.b-cdn\.net/c0e5e76f-3a93-40b4-bf01-12343c2eec5d/thumbnail\.jpg$',
+ 'creator': 'SovietWomble',
+ 'release_timestamp': 1461157200,
+ 'release_date': '20160420',
+ 'timestamp': 1461157200,
+ 'upload_date': '20160420',
+ 'uploader_id': 'SovietWomble',
+ 'uploader_url': 'https://www.twitch.tv/SovietWomble',
+ 'duration': 8804,
+ 'was_live': True,
+ 'availability': 'public',
+ 'series': 'Arma 3',
+ 'season': 'Zeus Games',
+ 'episode_number': 5,
+ 'episode': 'Episode 5',
+ },
+ },
+ ]
+
+ def _extract_bunnycdn_iframe(self, video_id, bunnycdn_id):
+ iframe = self._download_webpage(
+ f'https://iframe.mediadelivery.net/embed/5105/{bunnycdn_id}',
+ video_id, note='Downloading BunnyCDN iframe', headers=self.MEDIADELIVERY_REFERER)
+
+ m3u8_url = self._search_regex(r'(https?://.*?\.m3u8)', iframe, 'm3u8 url')
+ thumbnail_url = self._search_regex(r'(https?://.*?thumbnail\.jpg)', iframe, 'thumbnail url')
+
+ m3u8_formats = self._extract_m3u8_formats(m3u8_url, video_id, headers=self.MEDIADELIVERY_REFERER)
+
+ if not m3u8_formats:
+ duration = None
+ else:
+ duration = self._extract_m3u8_vod_duration(
+ m3u8_formats[0]['url'], video_id, headers=self.MEDIADELIVERY_REFERER)
+
+ return {
+ 'formats': m3u8_formats,
+ 'thumbnail': thumbnail_url,
+ 'duration': duration,
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ static_assets_base = self._search_regex(r'(/_nuxt/static/\d+)', webpage, 'staticAssetsBase')
+ static_assets_base = f'https://sovietscloset.com{static_assets_base}'
+
+ stream = self.parse_nuxt_jsonp(f'{static_assets_base}/video/{video_id}/payload.js', video_id, 'video')['stream']
+
+ return {
+ **self.video_meta(
+ video_id=video_id, game_name=stream['game']['name'],
+ category_name=try_get(stream, lambda x: x['subcategory']['name'], str),
+ episode_number=stream.get('number'), stream_date=stream.get('date')),
+ **self._extract_bunnycdn_iframe(video_id, stream['bunnyId']),
+ }
+
+
+class SovietsClosetPlaylistIE(SovietsClosetBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?sovietscloset\.com/(?!video)(?P<id>[^#?]+)'
+ _TESTS = [
+
+ {
+ 'url': 'https://sovietscloset.com/The-Witcher',
+ 'info_dict': {
+ 'id': 'The-Witcher',
+ 'title': 'The Witcher',
+ },
+ 'playlist_mincount': 31,
+ },
+ {
+ 'url': 'https://sovietscloset.com/Arma-3/Zeus-Games',
+ 'info_dict': {
+ 'id': 'Arma-3/Zeus-Games',
+ 'title': 'Arma 3 - Zeus Games',
+ },
+ 'playlist_mincount': 3,
+ },
+ {
+ 'url': 'https://sovietscloset.com/arma-3/zeus-games/',
+ 'info_dict': {
+ 'id': 'arma-3/zeus-games',
+ 'title': 'Arma 3 - Zeus Games',
+ },
+ 'playlist_mincount': 3,
+ },
+ {
+ 'url': 'https://sovietscloset.com/Total-War-Warhammer',
+ 'info_dict': {
+ 'id': 'Total-War-Warhammer',
+ 'title': 'Total War: Warhammer - Greenskins',
+ },
+ 'playlist_mincount': 33,
+ },
+ ]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ if playlist_id.endswith('/'):
+ playlist_id = playlist_id[:-1]
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ static_assets_base = self._search_regex(r'(/_nuxt/static/\d+)', webpage, 'staticAssetsBase')
+ static_assets_base = f'https://sovietscloset.com{static_assets_base}'
+
+ sovietscloset = self.parse_nuxt_jsonp(f'{static_assets_base}/payload.js', playlist_id, 'global')['games']
+
+ if '/' in playlist_id:
+ game_slug, category_slug = playlist_id.lower().split('/')
+ else:
+ game_slug = playlist_id.lower()
+ category_slug = 'misc'
+
+ game = next(game for game in sovietscloset if game['slug'].lower() == game_slug)
+ category = next((cat for cat in game['subcategories'] if cat.get('slug', '').lower() == category_slug),
+ game['subcategories'][0])
+ category_slug = category.get('slug', '').lower() or category_slug
+ playlist_title = game.get('name') or game_slug
+ if category_slug != 'misc':
+ playlist_title += f' - {category.get("name") or category_slug}'
+ entries = [{
+ **self.url_result(f'https://sovietscloset.com/video/{stream["id"]}', ie=SovietsClosetIE.ie_key()),
+ **self.video_meta(
+ video_id=stream['id'], game_name=game['name'], category_name=category.get('name'),
+ episode_number=i + 1, stream_date=stream.get('date')),
+ } for i, stream in enumerate(category['streams'])]
+
+ return self.playlist_result(entries, playlist_id, playlist_title)
diff --git a/yt_dlp/extractor/spankbang.py b/yt_dlp/extractor/spankbang.py
new file mode 100644
index 0000000..43da34a
--- /dev/null
+++ b/yt_dlp/extractor/spankbang.py
@@ -0,0 +1,195 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ merge_dicts,
+ parse_duration,
+ parse_resolution,
+ str_to_int,
+ url_or_none,
+ urlencode_postdata,
+ urljoin,
+)
+
+
+class SpankBangIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:[^/]+\.)?spankbang\.com/
+ (?:
+ (?P<id>[\da-z]+)/(?:video|play|embed)\b|
+ [\da-z]+-(?P<id_2>[\da-z]+)/playlist/[^/?#&]+
+ )
+ '''
+ _TESTS = [{
+ 'url': 'https://spankbang.com/56b3d/video/the+slut+maker+hmv',
+ 'md5': '2D13903DE4ECC7895B5D55930741650A',
+ 'info_dict': {
+ 'id': '56b3d',
+ 'ext': 'mp4',
+ 'title': 'The Slut Maker HMV',
+ 'description': 'Girls getting converted into cock slaves.',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'Mindself',
+ 'uploader_id': 'mindself',
+ 'timestamp': 1617109572,
+ 'upload_date': '20210330',
+ 'age_limit': 18,
+ }
+ }, {
+ # 480p only
+ 'url': 'http://spankbang.com/1vt0/video/solvane+gangbang',
+ 'only_matching': True,
+ }, {
+ # no uploader
+ 'url': 'http://spankbang.com/lklg/video/sex+with+anyone+wedding+edition+2',
+ 'only_matching': True,
+ }, {
+ # mobile page
+ 'url': 'http://m.spankbang.com/1o2de/video/can+t+remember+her+name',
+ 'only_matching': True,
+ }, {
+ # 4k
+ 'url': 'https://spankbang.com/1vwqx/video/jade+kush+solo+4k',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://m.spankbang.com/3vvn/play/fantasy+solo/480p/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://m.spankbang.com/3vvn/play',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://spankbang.com/2y3td/embed/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://spankbang.com/2v7ik-7ecbgu/playlist/latina+booty',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id') or mobj.group('id_2')
+ webpage = self._download_webpage(
+ url.replace('/%s/embed' % video_id, '/%s/video' % video_id),
+ video_id, headers={'Cookie': 'country=US'})
+
+ if re.search(r'<[^>]+\b(?:id|class)=["\']video_removed', webpage):
+ raise ExtractorError(
+ 'Video %s is not available' % video_id, expected=True)
+
+ formats = []
+
+ def extract_format(format_id, format_url):
+ f_url = url_or_none(format_url)
+ if not f_url:
+ return
+ f = parse_resolution(format_id)
+ ext = determine_ext(f_url)
+ if format_id.startswith('m3u8') or ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ f_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif format_id.startswith('mpd') or ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ f_url, video_id, mpd_id='dash', fatal=False))
+ elif ext == 'mp4' or f.get('width') or f.get('height'):
+ f.update({
+ 'url': f_url,
+ 'format_id': format_id,
+ })
+ formats.append(f)
+
+ STREAM_URL_PREFIX = 'stream_url_'
+
+ for mobj in re.finditer(
+ r'%s(?P<id>[^\s=]+)\s*=\s*(["\'])(?P<url>(?:(?!\2).)+)\2'
+ % STREAM_URL_PREFIX, webpage):
+ extract_format(mobj.group('id', 'url'))
+
+ if not formats:
+ stream_key = self._search_regex(
+ r'data-streamkey\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
+ webpage, 'stream key', group='value')
+
+ stream = self._download_json(
+ 'https://spankbang.com/api/videos/stream', video_id,
+ 'Downloading stream JSON', data=urlencode_postdata({
+ 'id': stream_key,
+ 'data': 0,
+ }), headers={
+ 'Referer': url,
+ 'X-Requested-With': 'XMLHttpRequest',
+ })
+
+ for format_id, format_url in stream.items():
+ if format_url and isinstance(format_url, list):
+ format_url = format_url[0]
+ extract_format(format_id, format_url)
+
+ info = self._search_json_ld(webpage, video_id, default={})
+
+ title = self._html_search_regex(
+ r'(?s)<h1[^>]+\btitle=["\']([^"]+)["\']>', webpage, 'title', default=None)
+ description = self._search_regex(
+ r'<div[^>]+\bclass=["\']bottom[^>]+>\s*<p>[^<]*</p>\s*<p>([^<]+)',
+ webpage, 'description', default=None)
+ thumbnail = self._og_search_thumbnail(webpage, default=None)
+ uploader = self._html_search_regex(
+ r'<svg[^>]+\bclass="(?:[^"]*?user[^"]*?)">.*?</svg>([^<]+)', webpage, 'uploader', default=None)
+ uploader_id = self._html_search_regex(
+ r'<a[^>]+href="/profile/([^"]+)"', webpage, 'uploader_id', default=None)
+ duration = parse_duration(self._search_regex(
+ r'<div[^>]+\bclass=["\']right_side[^>]+>\s*<span>([^<]+)',
+ webpage, 'duration', default=None))
+ view_count = str_to_int(self._search_regex(
+ r'([\d,.]+)\s+plays', webpage, 'view count', default=None))
+
+ age_limit = self._rta_search(webpage)
+
+ return merge_dicts({
+ 'id': video_id,
+ 'title': title or video_id,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'formats': formats,
+ 'age_limit': age_limit,
+ }, info
+ )
+
+
+class SpankBangPlaylistIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/playlist/(?P<display_id>[^/]+)'
+ _TEST = {
+ 'url': 'https://spankbang.com/ug0k/playlist/big+ass+titties',
+ 'info_dict': {
+ 'id': 'ug0k',
+ 'title': 'Big Ass Titties',
+ },
+ 'playlist_mincount': 40,
+ }
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ playlist_id = mobj.group('id')
+
+ webpage = self._download_webpage(
+ url, playlist_id, headers={'Cookie': 'country=US; mobile=on'})
+
+ entries = [self.url_result(
+ urljoin(url, mobj.group('path')),
+ ie=SpankBangIE.ie_key(), video_id=mobj.group('id'))
+ for mobj in re.finditer(
+ r'<a[^>]+\bhref=(["\'])(?P<path>/?[\da-z]+-(?P<id>[\da-z]+)/playlist/[^"\'](?:(?!\1).)*)\1',
+ webpage)]
+
+ title = self._html_search_regex(
+ r'<em>([^<]+)</em>\s+playlist\s*<', webpage, 'playlist title',
+ fatal=False)
+
+ return self.playlist_result(entries, playlist_id, title)
diff --git a/yt_dlp/extractor/spiegel.py b/yt_dlp/extractor/spiegel.py
new file mode 100644
index 0000000..3701e29
--- /dev/null
+++ b/yt_dlp/extractor/spiegel.py
@@ -0,0 +1,51 @@
+from .common import InfoExtractor
+from .jwplatform import JWPlatformIE
+
+
+class SpiegelIE(InfoExtractor):
+ _UUID_RE = r'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}'
+ _VALID_URL = r'https?://(?:www\.)?(?:spiegel|manager-magazin)\.de(?:/[^/]+)+/[^/]*-(?P<id>[0-9]+|%s)(?:-embed|-iframe)?(?:\.html)?(?:$|[#?])' % _UUID_RE
+ _TESTS = [{
+ 'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
+ 'md5': '50c7948883ec85a3e431a0a44b7ad1d6',
+ 'info_dict': {
+ 'id': 'II0BUyxY',
+ 'display_id': '1259285',
+ 'ext': 'mp4',
+ 'title': 'Vulkan Tungurahua in Ecuador ist wieder aktiv - DER SPIEGEL - Wissenschaft',
+ 'description': 'md5:8029d8310232196eb235d27575a8b9f4',
+ 'duration': 48.0,
+ 'upload_date': '20130311',
+ 'timestamp': 1362997920,
+ },
+ }, {
+ 'url': 'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.spiegel.de/video/eifel-zoo-aufregung-um-ausgebrochene-raubtiere-video-99018031.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.spiegel.de/panorama/urteile-im-goldmuenzenprozess-haftstrafen-fuer-clanmitglieder-a-aae8df48-43c1-4c61-867d-23f0a2d254b7',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.spiegel.de/video/spiegel-tv-magazin-ueber-guellekrise-in-schleswig-holstein-video-99012776.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.spiegel.de/sport/sonst/badminton-wm-die-randsportart-soll-populaerer-werden-a-987092.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ media_id = self._html_search_regex(
+ r'(&#34;|["\'])mediaId\1\s*:\s*(&#34;|["\'])(?P<id>(?:(?!\2).)+)\2',
+ webpage, 'media id', group='id')
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'display_id': video_id,
+ 'url': 'jwplatform:%s' % media_id,
+ 'title': self._og_search_title(webpage, default=None),
+ 'ie_key': JWPlatformIE.ie_key(),
+ }
diff --git a/yt_dlp/extractor/spike.py b/yt_dlp/extractor/spike.py
new file mode 100644
index 0000000..5c1c78d
--- /dev/null
+++ b/yt_dlp/extractor/spike.py
@@ -0,0 +1,46 @@
+from .mtv import MTVServicesInfoExtractor
+
+
+class BellatorIE(MTVServicesInfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?bellator\.com/[^/]+/[\da-z]{6}(?:[/?#&]|$)'
+ _TESTS = [{
+ 'url': 'http://www.bellator.com/fight/atwr7k/bellator-158-michael-page-vs-evangelista-cyborg',
+ 'info_dict': {
+ 'title': 'Michael Page vs. Evangelista Cyborg',
+ 'description': 'md5:0d917fc00ffd72dd92814963fc6cbb05',
+ },
+ 'playlist_count': 3,
+ }, {
+ 'url': 'http://www.bellator.com/video-clips/bw6k7n/bellator-158-foundations-michael-venom-page',
+ 'only_matching': True,
+ }]
+
+ _FEED_URL = 'http://www.bellator.com/feeds/mrss/'
+ _GEO_COUNTRIES = ['US']
+
+
+class ParamountNetworkIE(MTVServicesInfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?paramountnetwork\.com/[^/]+/[\da-z]{6}(?:[/?#&]|$)'
+ _TESTS = [{
+ 'url': 'http://www.paramountnetwork.com/episodes/j830qm/lip-sync-battle-joel-mchale-vs-jim-rash-season-2-ep-13',
+ 'info_dict': {
+ 'id': '37ace3a8-1df6-48be-85b8-38df8229e241',
+ 'ext': 'mp4',
+ 'title': 'Lip Sync Battle|April 28, 2016|2|209|Joel McHale Vs. Jim Rash|Act 1',
+ 'description': 'md5:a739ca8f978a7802f67f8016d27ce114',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }]
+
+ _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
+ _GEO_COUNTRIES = ['US']
+
+ def _get_feed_query(self, uri):
+ return {
+ 'arcEp': 'paramountnetwork.com',
+ 'imageEp': 'paramountnetwork.com',
+ 'mgid': uri,
+ }
diff --git a/yt_dlp/extractor/sport5.py b/yt_dlp/extractor/sport5.py
new file mode 100644
index 0000000..44b4067
--- /dev/null
+++ b/yt_dlp/extractor/sport5.py
@@ -0,0 +1,86 @@
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+
+class Sport5IE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www|vod)?\.sport5\.co\.il/.*\b(?:Vi|docID)=(?P<id>\d+)'
+ _TESTS = [
+ {
+ 'url': 'http://vod.sport5.co.il/?Vc=147&Vi=176331&Page=1',
+ 'info_dict': {
+ 'id': 's5-Y59xx1-GUh2',
+ 'ext': 'mp4',
+ 'title': 'ולנסיה-קורדובה 0:3',
+ 'description': 'אלקאסר, גאייה ופגולי סידרו לקבוצה של נונו ניצחון על קורדובה ואת המקום הראשון בליגה',
+ 'duration': 228,
+ 'categories': list,
+ },
+ 'skip': 'Blocked outside of Israel',
+ }, {
+ 'url': 'http://www.sport5.co.il/articles.aspx?FolderID=3075&docID=176372&lang=HE',
+ 'info_dict': {
+ 'id': 's5-SiXxx1-hKh2',
+ 'ext': 'mp4',
+ 'title': 'GOALS_CELTIC_270914.mp4',
+ 'description': '',
+ 'duration': 87,
+ 'categories': list,
+ },
+ 'skip': 'Blocked outside of Israel',
+ }
+ ]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ media_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, media_id)
+
+ video_id = self._html_search_regex(r'clipId=([\w-]+)', webpage, 'video id')
+
+ metadata = self._download_xml(
+ 'http://sport5-metadata-rr-d.nsacdn.com/vod/vod/%s/HDS/metadata.xml' % video_id,
+ video_id)
+
+ error = metadata.find('./Error')
+ if error is not None:
+ raise ExtractorError(
+ '%s returned error: %s - %s' % (
+ self.IE_NAME,
+ error.find('./Name').text,
+ error.find('./Description').text),
+ expected=True)
+
+ title = metadata.find('./Title').text
+ description = metadata.find('./Description').text
+ duration = int(metadata.find('./Duration').text)
+
+ posters_el = metadata.find('./PosterLinks')
+ thumbnails = [{
+ 'url': thumbnail.text,
+ 'width': int(thumbnail.get('width')),
+ 'height': int(thumbnail.get('height')),
+ } for thumbnail in posters_el.findall('./PosterIMG')] if posters_el is not None else []
+
+ categories_el = metadata.find('./Categories')
+ categories = [
+ cat.get('name') for cat in categories_el.findall('./Category')
+ ] if categories_el is not None else []
+
+ formats = [{
+ 'url': fmt.text,
+ 'ext': 'mp4',
+ 'vbr': int(fmt.get('bitrate')),
+ 'width': int(fmt.get('width')),
+ 'height': int(fmt.get('height')),
+ } for fmt in metadata.findall('./PlaybackLinks/FileURL')]
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnails': thumbnails,
+ 'duration': duration,
+ 'categories': categories,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/sportbox.py b/yt_dlp/extractor/sportbox.py
new file mode 100644
index 0000000..ccbb0e8
--- /dev/null
+++ b/yt_dlp/extractor/sportbox.py
@@ -0,0 +1,88 @@
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ js_to_json,
+ merge_dicts,
+)
+
+
+class SportBoxIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:news\.sportbox|matchtv)\.ru/vdl/player(?:/[^/]+/|\?.*?\bn?id=)(?P<id>\d+)'
+ _EMBED_REGEX = [r'<iframe[^>]+src="(?P<url>https?://(?:news\.sportbox|matchtv)\.ru/vdl/player[^"]+)"']
+ _TESTS = [{
+ 'url': 'http://news.sportbox.ru/vdl/player/ci/211355',
+ 'info_dict': {
+ 'id': '109158',
+ 'ext': 'mp4',
+ 'title': 'В Новороссийске прошел детский турнир «Поле славы боевой»',
+ 'description': 'В Новороссийске прошел детский турнир «Поле славы боевой»',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 292,
+ 'view_count': int,
+ 'timestamp': 1426237001,
+ 'upload_date': '20150313',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://news.sportbox.ru/vdl/player?nid=370908&only_player=1&autostart=false&playeri=2&height=340&width=580',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://news.sportbox.ru/vdl/player/media/193095',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://news.sportbox.ru/vdl/player/media/109158',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://matchtv.ru/vdl/player/media/109158',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ sources = self._parse_json(
+ self._search_regex(
+ r'(?s)playerOptions\.sources(?:WithRes)?\s*=\s*(\[.+?\])\s*;\s*\n',
+ webpage, 'sources'),
+ video_id, transform_source=js_to_json)
+
+ formats = []
+ for source in sources:
+ src = source.get('src')
+ if not src:
+ continue
+ if determine_ext(src) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ src, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ formats.append({
+ 'url': src,
+ })
+
+ player = self._parse_json(
+ self._search_regex(
+ r'(?s)playerOptions\s*=\s*({.+?})\s*;\s*\n', webpage,
+ 'player options', default='{}'),
+ video_id, transform_source=js_to_json)
+ media_id = player['mediaId']
+
+ info = self._search_json_ld(webpage, media_id, default={})
+
+ view_count = int_or_none(self._search_regex(
+ r'Просмотров\s*:\s*(\d+)', webpage, 'view count', default=None))
+
+ return merge_dicts(info, {
+ 'id': media_id,
+ 'title': self._og_search_title(webpage, default=None) or media_id,
+ 'thumbnail': player.get('poster'),
+ 'duration': int_or_none(player.get('duration')),
+ 'view_count': view_count,
+ 'formats': formats,
+ })
diff --git a/yt_dlp/extractor/sportdeutschland.py b/yt_dlp/extractor/sportdeutschland.py
new file mode 100644
index 0000000..30dbcf3
--- /dev/null
+++ b/yt_dlp/extractor/sportdeutschland.py
@@ -0,0 +1,142 @@
+from .common import InfoExtractor
+from ..utils import (
+ join_nonempty,
+ strip_or_none,
+ traverse_obj,
+ unified_timestamp,
+)
+
+
+class SportDeutschlandIE(InfoExtractor):
+ _VALID_URL = r'https?://sportdeutschland\.tv/(?P<id>(?:[^/]+/)?[^?#/&]+)'
+ _TESTS = [{
+ 'url': 'https://sportdeutschland.tv/blauweissbuchholztanzsport/buchholzer-formationswochenende-2023-samstag-1-bundesliga-landesliga',
+ 'info_dict': {
+ 'id': '9839a5c7-0dbb-48a8-ab63-3b408adc7b54',
+ 'ext': 'mp4',
+ 'title': 'Buchholzer Formationswochenende 2023 - Samstag - 1. Bundesliga / Landesliga',
+ 'display_id': 'blauweissbuchholztanzsport/buchholzer-formationswochenende-2023-samstag-1-bundesliga-landesliga',
+ 'description': 'md5:a288c794a5ee69e200d8f12982f81a87',
+ 'live_status': 'was_live',
+ 'channel': 'Blau-Weiss Buchholz Tanzsport',
+ 'channel_url': 'https://sportdeutschland.tv/blauweissbuchholztanzsport',
+ 'channel_id': '93ec33c9-48be-43b6-b404-e016b64fdfa3',
+ 'duration': 32447,
+ 'upload_date': '20230114',
+ 'timestamp': 1673733618,
+ }
+ }, {
+ 'url': 'https://sportdeutschland.tv/deutscherbadmintonverband/bwf-tour-1-runde-feld-1-yonex-gainward-german-open-2022-0',
+ 'info_dict': {
+ 'id': '95c80c52-6b9a-4ae9-9197-984145adfced',
+ 'ext': 'mp4',
+ 'title': 'BWF Tour: 1. Runde Feld 1 - YONEX GAINWARD German Open 2022',
+ 'display_id': 'deutscherbadmintonverband/bwf-tour-1-runde-feld-1-yonex-gainward-german-open-2022-0',
+ 'description': 'md5:2afb5996ceb9ac0b2ac81f563d3a883e',
+ 'live_status': 'was_live',
+ 'channel': 'Deutscher Badminton Verband',
+ 'channel_url': 'https://sportdeutschland.tv/deutscherbadmintonverband',
+ 'channel_id': '93ca5866-2551-49fc-8424-6db35af58920',
+ 'duration': 41097,
+ 'upload_date': '20220309',
+ 'timestamp': 1646860727.0,
+ }
+ }, {
+ 'url': 'https://sportdeutschland.tv/ggcbremen/formationswochenende-latein-2023',
+ 'info_dict': {
+ 'id': '9889785e-55b0-4d97-a72a-ce9a9f157cce',
+ 'title': 'Formationswochenende Latein 2023 - Samstag',
+ 'display_id': 'ggcbremen/formationswochenende-latein-2023',
+ 'description': 'md5:6e4060d40ff6a8f8eeb471b51a8f08b2',
+ 'live_status': 'was_live',
+ 'channel': 'Grün-Gold-Club Bremen e.V.',
+ 'channel_id': '9888f04e-bb46-4c7f-be47-df960a4167bb',
+ 'channel_url': 'https://sportdeutschland.tv/ggcbremen',
+ },
+ 'playlist_count': 3,
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '988e1fea-9d44-4fab-8c72-3085fb667547',
+ 'ext': 'mp4',
+ 'channel_url': 'https://sportdeutschland.tv/ggcbremen',
+ 'channel_id': '9888f04e-bb46-4c7f-be47-df960a4167bb',
+ 'channel': 'Grün-Gold-Club Bremen e.V.',
+ 'duration': 86,
+ 'title': 'Formationswochenende Latein 2023 - Samstag Part 1',
+ 'upload_date': '20230225',
+ 'timestamp': 1677349909,
+ 'live_status': 'was_live',
+ }
+ }]
+ }, {
+ 'url': 'https://sportdeutschland.tv/dtb/gymnastik-international-tag-1',
+ 'info_dict': {
+ 'id': '95d71b8a-370a-4b87-ad16-94680da18528',
+ 'ext': 'mp4',
+ 'title': r're:Gymnastik International - Tag 1 .+',
+ 'display_id': 'dtb/gymnastik-international-tag-1',
+ 'channel_id': '936ecef1-2f4a-4e08-be2f-68073cb7ecab',
+ 'channel': 'Deutscher Turner-Bund',
+ 'channel_url': 'https://sportdeutschland.tv/dtb',
+ 'description': 'md5:07a885dde5838a6f0796ee21dc3b0c52',
+ 'live_status': 'is_live',
+ },
+ 'skip': 'live',
+ }]
+
+ def _process_video(self, asset_id, video):
+ is_live = video['type'] == 'mux_live'
+ token = self._download_json(
+ f'https://api.sportdeutschland.tv/api/frontend/asset-token/{asset_id}',
+ video['id'], query={'type': video['type'], 'playback_id': video['src']})['token']
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ f'https://stream.mux.com/{video["src"]}.m3u8?token={token}', video['id'], live=is_live)
+
+ return {
+ 'is_live': is_live,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ **traverse_obj(video, {
+ 'id': 'id',
+ 'duration': ('duration', {lambda x: float(x) > 0 and float(x)}),
+ 'timestamp': ('created_at', {unified_timestamp})
+ }),
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ meta = self._download_json(
+ f'https://api.sportdeutschland.tv/api/stateless/frontend/assets/{display_id}',
+ display_id, query={'access_token': 'true'})
+
+ info = {
+ 'display_id': display_id,
+ **traverse_obj(meta, {
+ 'id': (('id', 'uuid'), ),
+ 'title': (('title', 'name'), {strip_or_none}),
+ 'description': 'description',
+ 'channel': ('profile', 'name'),
+ 'channel_id': ('profile', 'id'),
+ 'is_live': 'currently_live',
+ 'was_live': 'was_live',
+ 'channel_url': ('profile', 'slug', {lambda x: f'https://sportdeutschland.tv/{x}'}),
+ }, get_all=False)
+ }
+
+ parts = traverse_obj(meta, (('livestream', ('videos', ...)), ))
+ entries = [{
+ 'title': join_nonempty(info.get('title'), f'Part {i}', delim=' '),
+ **traverse_obj(info, {'channel': 'channel', 'channel_id': 'channel_id',
+ 'channel_url': 'channel_url', 'was_live': 'was_live'}),
+ **self._process_video(info['id'], video),
+ } for i, video in enumerate(parts, 1)]
+
+ return {
+ '_type': 'multi_video',
+ **info,
+ 'entries': entries,
+ } if len(entries) > 1 else {
+ **info,
+ **entries[0],
+ 'title': info.get('title'),
+ }
diff --git a/yt_dlp/extractor/spotify.py b/yt_dlp/extractor/spotify.py
new file mode 100644
index 0000000..55ce36a
--- /dev/null
+++ b/yt_dlp/extractor/spotify.py
@@ -0,0 +1,167 @@
+import functools
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ OnDemandPagedList,
+ clean_podcast_url,
+ float_or_none,
+ int_or_none,
+ strip_or_none,
+ traverse_obj,
+ try_get,
+ unified_strdate,
+)
+
+
+class SpotifyBaseIE(InfoExtractor):
+ _WORKING = False
+ _ACCESS_TOKEN = None
+ _OPERATION_HASHES = {
+ 'Episode': '8276d4423d709ae9b68ec1b74cc047ba0f7479059a37820be730f125189ac2bf',
+ 'MinimalShow': '13ee079672fad3f858ea45a55eb109553b4fb0969ed793185b2e34cbb6ee7cc0',
+ 'ShowEpisodes': 'e0e5ce27bd7748d2c59b4d44ba245a8992a05be75d6fabc3b20753fc8857444d',
+ }
+ _VALID_URL_TEMPL = r'https?://open\.spotify\.com/(?:embed-podcast/|embed/|)%s/(?P<id>[^/?&#]+)'
+ _EMBED_REGEX = [r'<iframe[^>]+src="(?P<url>https?://open\.spotify.com/embed/[^"]+)"']
+
+ def _real_initialize(self):
+ self._ACCESS_TOKEN = self._download_json(
+ 'https://open.spotify.com/get_access_token', None)['accessToken']
+
+ def _call_api(self, operation, video_id, variables, **kwargs):
+ return self._download_json(
+ 'https://api-partner.spotify.com/pathfinder/v1/query', video_id, query={
+ 'operationName': 'query' + operation,
+ 'variables': json.dumps(variables),
+ 'extensions': json.dumps({
+ 'persistedQuery': {
+ 'sha256Hash': self._OPERATION_HASHES[operation],
+ },
+ })
+ }, headers={'authorization': 'Bearer ' + self._ACCESS_TOKEN},
+ **kwargs)['data']
+
+ def _extract_episode(self, episode, series):
+ episode_id = episode['id']
+ title = episode['name'].strip()
+
+ formats = []
+ audio_preview = episode.get('audioPreview') or {}
+ audio_preview_url = audio_preview.get('url')
+ if audio_preview_url:
+ f = {
+ 'url': audio_preview_url.replace('://p.scdn.co/mp3-preview/', '://anon-podcast.scdn.co/'),
+ 'vcodec': 'none',
+ }
+ audio_preview_format = audio_preview.get('format')
+ if audio_preview_format:
+ f['format_id'] = audio_preview_format
+ mobj = re.match(r'([0-9A-Z]{3})_(?:[A-Z]+_)?(\d+)', audio_preview_format)
+ if mobj:
+ f.update({
+ 'abr': int(mobj.group(2)),
+ 'ext': mobj.group(1).lower(),
+ })
+ formats.append(f)
+
+ for item in (try_get(episode, lambda x: x['audio']['items']) or []):
+ item_url = item.get('url')
+ if not (item_url and item.get('externallyHosted')):
+ continue
+ formats.append({
+ 'url': clean_podcast_url(item_url),
+ 'vcodec': 'none',
+ })
+
+ thumbnails = []
+ for source in (try_get(episode, lambda x: x['coverArt']['sources']) or []):
+ source_url = source.get('url')
+ if not source_url:
+ continue
+ thumbnails.append({
+ 'url': source_url,
+ 'width': int_or_none(source.get('width')),
+ 'height': int_or_none(source.get('height')),
+ })
+
+ return {
+ 'id': episode_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'description': strip_or_none(episode.get('description')),
+ 'duration': float_or_none(try_get(
+ episode, lambda x: x['duration']['totalMilliseconds']), 1000),
+ 'release_date': unified_strdate(try_get(
+ episode, lambda x: x['releaseDate']['isoString'])),
+ 'series': series,
+ }
+
+
+class SpotifyIE(SpotifyBaseIE):
+ IE_NAME = 'spotify'
+ IE_DESC = 'Spotify episodes'
+ _VALID_URL = SpotifyBaseIE._VALID_URL_TEMPL % 'episode'
+ _TESTS = [{
+ 'url': 'https://open.spotify.com/episode/4Z7GAJ50bgctf6uclHlWKo',
+ 'md5': '74010a1e3fa4d9e1ab3aa7ad14e42d3b',
+ 'info_dict': {
+ 'id': '4Z7GAJ50bgctf6uclHlWKo',
+ 'ext': 'mp3',
+ 'title': 'From the archive: Why time management is ruining our lives',
+ 'description': 'md5:b120d9c4ff4135b42aa9b6d9cde86935',
+ 'duration': 2083.605,
+ 'release_date': '20201217',
+ 'series': "The Guardian's Audio Long Reads",
+ }
+ }, {
+ 'url': 'https://open.spotify.com/embed/episode/4TvCsKKs2thXmarHigWvXE?si=7eatS8AbQb6RxqO2raIuWA',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ episode_id = self._match_id(url)
+ episode = self._call_api('Episode', episode_id, {
+ 'uri': 'spotify:episode:' + episode_id
+ })['episode']
+ return self._extract_episode(
+ episode, try_get(episode, lambda x: x['podcast']['name']))
+
+
+class SpotifyShowIE(SpotifyBaseIE):
+ IE_NAME = 'spotify:show'
+ IE_DESC = 'Spotify shows'
+ _VALID_URL = SpotifyBaseIE._VALID_URL_TEMPL % 'show'
+ _TEST = {
+ 'url': 'https://open.spotify.com/show/4PM9Ke6l66IRNpottHKV9M',
+ 'info_dict': {
+ 'id': '4PM9Ke6l66IRNpottHKV9M',
+ 'title': 'The Story from the Guardian',
+ 'description': 'The Story podcast is dedicated to our finest audio documentaries, investigations and long form stories',
+ },
+ 'playlist_mincount': 36,
+ }
+ _PER_PAGE = 100
+
+ def _fetch_page(self, show_id, page=0):
+ return self._call_api('ShowEpisodes', show_id, {
+ 'limit': 100,
+ 'offset': page * self._PER_PAGE,
+ 'uri': f'spotify:show:{show_id}',
+ }, note=f'Downloading page {page + 1} JSON metadata')['podcast']
+
+ def _real_extract(self, url):
+ show_id = self._match_id(url)
+ first_page = self._fetch_page(show_id)
+
+ def _entries(page):
+ podcast = self._fetch_page(show_id, page) if page else first_page
+ yield from map(
+ functools.partial(self._extract_episode, series=podcast.get('name')),
+ traverse_obj(podcast, ('episodes', 'items', ..., 'episode')))
+
+ return self.playlist_result(
+ OnDemandPagedList(_entries, self._PER_PAGE),
+ show_id, first_page.get('name'), first_page.get('description'))
diff --git a/yt_dlp/extractor/spreaker.py b/yt_dlp/extractor/spreaker.py
new file mode 100644
index 0000000..36a9bd2
--- /dev/null
+++ b/yt_dlp/extractor/spreaker.py
@@ -0,0 +1,173 @@
+import itertools
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ float_or_none,
+ int_or_none,
+ str_or_none,
+ try_get,
+ unified_timestamp,
+ url_or_none,
+)
+
+
+def _extract_episode(data, episode_id=None):
+ title = data['title']
+ download_url = data['download_url']
+
+ series = try_get(data, lambda x: x['show']['title'], compat_str)
+ uploader = try_get(data, lambda x: x['author']['fullname'], compat_str)
+
+ thumbnails = []
+ for image in ('image_original', 'image_medium', 'image'):
+ image_url = url_or_none(data.get('%s_url' % image))
+ if image_url:
+ thumbnails.append({'url': image_url})
+
+ def stats(key):
+ return int_or_none(try_get(
+ data,
+ (lambda x: x['%ss_count' % key],
+ lambda x: x['stats']['%ss' % key])))
+
+ def duration(key):
+ return float_or_none(data.get(key), scale=1000)
+
+ return {
+ 'id': compat_str(episode_id or data['episode_id']),
+ 'url': download_url,
+ 'display_id': data.get('permalink'),
+ 'title': title,
+ 'description': data.get('description'),
+ 'timestamp': unified_timestamp(data.get('published_at')),
+ 'uploader': uploader,
+ 'uploader_id': str_or_none(data.get('author_id')),
+ 'creator': uploader,
+ 'duration': duration('duration') or duration('length'),
+ 'view_count': stats('play'),
+ 'like_count': stats('like'),
+ 'comment_count': stats('message'),
+ 'format': 'MPEG Layer 3',
+ 'format_id': 'mp3',
+ 'container': 'mp3',
+ 'ext': 'mp3',
+ 'thumbnails': thumbnails,
+ 'series': series,
+ 'extractor_key': SpreakerIE.ie_key(),
+ }
+
+
+class SpreakerIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ api\.spreaker\.com/
+ (?:
+ (?:download/)?episode|
+ v2/episodes
+ )/
+ (?P<id>\d+)
+ '''
+ _TESTS = [{
+ 'url': 'https://api.spreaker.com/episode/12534508',
+ 'info_dict': {
+ 'id': '12534508',
+ 'display_id': 'swm-ep15-how-to-market-your-music-part-2',
+ 'ext': 'mp3',
+ 'title': 'EP:15 | Music Marketing (Likes) - Part 2',
+ 'description': 'md5:0588c43e27be46423e183076fa071177',
+ 'timestamp': 1502250336,
+ 'upload_date': '20170809',
+ 'uploader': 'SWM',
+ 'uploader_id': '9780658',
+ 'duration': 1063.42,
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'series': 'Success With Music (SWM)',
+ },
+ }, {
+ 'url': 'https://api.spreaker.com/download/episode/12534508/swm_ep15_how_to_market_your_music_part_2.mp3',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://api.spreaker.com/v2/episodes/12534508?export=episode_segments',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ episode_id = self._match_id(url)
+ data = self._download_json(
+ 'https://api.spreaker.com/v2/episodes/%s' % episode_id,
+ episode_id)['response']['episode']
+ return _extract_episode(data, episode_id)
+
+
+class SpreakerPageIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?spreaker\.com/user/[^/]+/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.spreaker.com/user/9780658/swm-ep15-how-to-market-your-music-part-2',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ episode_id = self._search_regex(
+ (r'data-episode_id=["\'](?P<id>\d+)',
+ r'episode_id\s*:\s*(?P<id>\d+)'), webpage, 'episode id')
+ return self.url_result(
+ 'https://api.spreaker.com/episode/%s' % episode_id,
+ ie=SpreakerIE.ie_key(), video_id=episode_id)
+
+
+class SpreakerShowIE(InfoExtractor):
+ _VALID_URL = r'https?://api\.spreaker\.com/show/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://api.spreaker.com/show/4652058',
+ 'info_dict': {
+ 'id': '4652058',
+ },
+ 'playlist_mincount': 118,
+ }]
+
+ def _entries(self, show_id):
+ for page_num in itertools.count(1):
+ episodes = self._download_json(
+ 'https://api.spreaker.com/show/%s/episodes' % show_id,
+ show_id, note='Downloading JSON page %d' % page_num, query={
+ 'page': page_num,
+ 'max_per_page': 100,
+ })
+ pager = try_get(episodes, lambda x: x['response']['pager'], dict)
+ if not pager:
+ break
+ results = pager.get('results')
+ if not results or not isinstance(results, list):
+ break
+ for result in results:
+ if not isinstance(result, dict):
+ continue
+ yield _extract_episode(result)
+ if page_num == pager.get('last_page'):
+ break
+
+ def _real_extract(self, url):
+ show_id = self._match_id(url)
+ return self.playlist_result(self._entries(show_id), playlist_id=show_id)
+
+
+class SpreakerShowPageIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?spreaker\.com/show/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.spreaker.com/show/success-with-music',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ show_id = self._search_regex(
+ r'show_id\s*:\s*(?P<id>\d+)', webpage, 'show id')
+ return self.url_result(
+ 'https://api.spreaker.com/show/%s' % show_id,
+ ie=SpreakerShowIE.ie_key(), video_id=show_id)
diff --git a/yt_dlp/extractor/springboardplatform.py b/yt_dlp/extractor/springboardplatform.py
new file mode 100644
index 0000000..a98584a
--- /dev/null
+++ b/yt_dlp/extractor/springboardplatform.py
@@ -0,0 +1,113 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ xpath_attr,
+ xpath_text,
+ xpath_element,
+ unescapeHTML,
+ unified_timestamp,
+)
+
+
+class SpringboardPlatformIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ cms\.springboardplatform\.com/
+ (?:
+ (?:previews|embed_iframe)/(?P<index>\d+)/video/(?P<id>\d+)|
+ xml_feeds_advanced/index/(?P<index_2>\d+)/rss3/(?P<id_2>\d+)
+ )
+ '''
+ _EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//cms\.springboardplatform\.com/embed_iframe/\d+/video/\d+.*?)\1']
+ _TESTS = [{
+ 'url': 'http://cms.springboardplatform.com/previews/159/video/981017/0/0/1',
+ 'md5': '5c3cb7b5c55740d482561099e920f192',
+ 'info_dict': {
+ 'id': '981017',
+ 'ext': 'mp4',
+ 'title': 'Redman "BUD like YOU" "Usher Good Kisser" REMIX',
+ 'description': 'Redman "BUD like YOU" "Usher Good Kisser" REMIX',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1409132328,
+ 'upload_date': '20140827',
+ 'duration': 193,
+ },
+ }, {
+ 'url': 'http://cms.springboardplatform.com/embed_iframe/159/video/981017/rab007/rapbasement.com/1/1',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://cms.springboardplatform.com/embed_iframe/20/video/1731611/ki055/kidzworld.com/10',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://cms.springboardplatform.com/xml_feeds_advanced/index/159/rss3/981017/0/0/1/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id') or mobj.group('id_2')
+ index = mobj.group('index') or mobj.group('index_2')
+
+ video = self._download_xml(
+ 'http://cms.springboardplatform.com/xml_feeds_advanced/index/%s/rss3/%s'
+ % (index, video_id), video_id)
+
+ item = xpath_element(video, './/item', 'item', fatal=True)
+
+ content = xpath_element(
+ item, './{http://search.yahoo.com/mrss/}content', 'content',
+ fatal=True)
+ title = unescapeHTML(xpath_text(item, './title', 'title', fatal=True))
+
+ video_url = content.attrib['url']
+
+ if 'error_video.mp4' in video_url:
+ raise ExtractorError(
+ 'Video %s no longer exists' % video_id, expected=True)
+
+ duration = int_or_none(content.get('duration'))
+ tbr = int_or_none(content.get('bitrate'))
+ filesize = int_or_none(content.get('fileSize'))
+ width = int_or_none(content.get('width'))
+ height = int_or_none(content.get('height'))
+
+ description = unescapeHTML(xpath_text(
+ item, './description', 'description'))
+ thumbnail = xpath_attr(
+ item, './{http://search.yahoo.com/mrss/}thumbnail', 'url',
+ 'thumbnail')
+
+ timestamp = unified_timestamp(xpath_text(
+ item, './{http://cms.springboardplatform.com/namespaces.html}created',
+ 'timestamp'))
+
+ formats = [{
+ 'url': video_url,
+ 'format_id': 'http',
+ 'tbr': tbr,
+ 'filesize': filesize,
+ 'width': width,
+ 'height': height,
+ }]
+
+ m3u8_format = formats[0].copy()
+ m3u8_format.update({
+ 'url': re.sub(r'(https?://)cdn\.', r'\1hls.', video_url) + '.m3u8',
+ 'ext': 'mp4',
+ 'format_id': 'hls',
+ 'protocol': 'm3u8_native',
+ })
+ formats.append(m3u8_format)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/sprout.py b/yt_dlp/extractor/sprout.py
new file mode 100644
index 0000000..444a6c2
--- /dev/null
+++ b/yt_dlp/extractor/sprout.py
@@ -0,0 +1,61 @@
+from .adobepass import AdobePassIE
+from ..utils import (
+ int_or_none,
+ smuggle_url,
+ update_url_query,
+)
+
+
+class SproutIE(AdobePassIE):
+ _VALID_URL = r'https?://(?:www\.)?(?:sproutonline|universalkids)\.com/(?:watch|(?:[^/]+/)*videos)/(?P<id>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'https://www.universalkids.com/shows/remy-and-boo/season/1/videos/robot-bike-race',
+ 'info_dict': {
+ 'id': 'bm0foJFaTKqb',
+ 'ext': 'mp4',
+ 'title': 'Robot Bike Race',
+ 'description': 'md5:436b1d97117cc437f54c383f4debc66d',
+ 'timestamp': 1606148940,
+ 'upload_date': '20201123',
+ 'uploader': 'NBCU-MPAT',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.sproutonline.com/watch/cowboy-adventure',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.universalkids.com/watch/robot-bike-race',
+ 'only_matching': True,
+ }]
+ _GEO_COUNTRIES = ['US']
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ mpx_metadata = self._download_json(
+ # http://nbcuunikidsprod.apps.nbcuni.com/networks/universalkids/content/videos/
+ 'https://www.universalkids.com/_api/videos/' + display_id,
+ display_id)['mpxMetadata']
+ media_pid = mpx_metadata['mediaPid']
+ theplatform_url = 'https://link.theplatform.com/s/HNK2IC/' + media_pid
+ query = {
+ 'mbr': 'true',
+ 'manifest': 'm3u',
+ }
+ if mpx_metadata.get('entitlement') == 'auth':
+ query['auth'] = self._extract_mvpd_auth(url, media_pid, 'sprout', 'sprout')
+ theplatform_url = smuggle_url(
+ update_url_query(theplatform_url, query), {
+ 'force_smil_url': True,
+ 'geo_countries': self._GEO_COUNTRIES,
+ })
+ return {
+ '_type': 'url_transparent',
+ 'id': media_pid,
+ 'url': theplatform_url,
+ 'series': mpx_metadata.get('seriesName'),
+ 'season_number': int_or_none(mpx_metadata.get('seasonNumber')),
+ 'episode_number': int_or_none(mpx_metadata.get('episodeNumber')),
+ 'ie_key': 'ThePlatform',
+ }
diff --git a/yt_dlp/extractor/srgssr.py b/yt_dlp/extractor/srgssr.py
new file mode 100644
index 0000000..145f25e
--- /dev/null
+++ b/yt_dlp/extractor/srgssr.py
@@ -0,0 +1,247 @@
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ join_nonempty,
+ parse_iso8601,
+ qualities,
+ try_get,
+)
+
+
+class SRGSSRIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ (?:
+ https?://tp\.srgssr\.ch/p(?:/[^/]+)+\?urn=urn|
+ srgssr
+ ):
+ (?P<bu>
+ srf|rts|rsi|rtr|swi
+ ):(?:[^:]+:)?
+ (?P<type>
+ video|audio
+ ):
+ (?P<id>
+ [0-9a-f\-]{36}|\d+
+ )
+ '''
+ _GEO_BYPASS = False
+ _GEO_COUNTRIES = ['CH']
+
+ _ERRORS = {
+ 'AGERATING12': 'To protect children under the age of 12, this video is only available between 8 p.m. and 6 a.m.',
+ 'AGERATING18': 'To protect children under the age of 18, this video is only available between 11 p.m. and 5 a.m.',
+ # 'ENDDATE': 'For legal reasons, this video was only available for a specified period of time.',
+ 'GEOBLOCK': 'For legal reasons, this video is only available in Switzerland.',
+ 'LEGAL': 'The video cannot be transmitted for legal reasons.',
+ 'STARTDATE': 'This video is not yet available. Please try again later.',
+ }
+ _DEFAULT_LANGUAGE_CODES = {
+ 'srf': 'de',
+ 'rts': 'fr',
+ 'rsi': 'it',
+ 'rtr': 'rm',
+ 'swi': 'en',
+ }
+
+ def _get_tokenized_src(self, url, video_id, format_id):
+ token = self._download_json(
+ 'http://tp.srgssr.ch/akahd/token?acl=*',
+ video_id, 'Downloading %s token' % format_id, fatal=False) or {}
+ auth_params = try_get(token, lambda x: x['token']['authparams'])
+ if auth_params:
+ url += ('?' if '?' not in url else '&') + auth_params
+ return url
+
+ def _get_media_data(self, bu, media_type, media_id):
+ query = {'onlyChapters': True} if media_type == 'video' else {}
+ full_media_data = self._download_json(
+ 'https://il.srgssr.ch/integrationlayer/2.0/%s/mediaComposition/%s/%s.json'
+ % (bu, media_type, media_id),
+ media_id, query=query)['chapterList']
+ try:
+ media_data = next(
+ x for x in full_media_data if x.get('id') == media_id)
+ except StopIteration:
+ raise ExtractorError('No media information found')
+
+ block_reason = media_data.get('blockReason')
+ if block_reason and block_reason in self._ERRORS:
+ message = self._ERRORS[block_reason]
+ if block_reason == 'GEOBLOCK':
+ self.raise_geo_restricted(
+ msg=message, countries=self._GEO_COUNTRIES)
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, message), expected=True)
+
+ return media_data
+
+ def _real_extract(self, url):
+ bu, media_type, media_id = self._match_valid_url(url).groups()
+ media_data = self._get_media_data(bu, media_type, media_id)
+ title = media_data['title']
+
+ formats = []
+ subtitles = {}
+ q = qualities(['SD', 'HD'])
+ for source in (media_data.get('resourceList') or []):
+ format_url = source.get('url')
+ if not format_url:
+ continue
+ protocol = source.get('protocol')
+ quality = source.get('quality')
+ format_id = join_nonempty(protocol, source.get('encoding'), quality)
+
+ if protocol in ('HDS', 'HLS'):
+ if source.get('tokenType') == 'AKAMAI':
+ format_url = self._get_tokenized_src(
+ format_url, media_id, format_id)
+ fmts, subs = self._extract_akamai_formats_and_subtitles(
+ format_url, media_id)
+ formats.extend(fmts)
+ subtitles = self._merge_subtitles(subtitles, subs)
+ elif protocol == 'HLS':
+ m3u8_fmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
+ format_url, media_id, 'mp4', 'm3u8_native',
+ m3u8_id=format_id, fatal=False)
+ formats.extend(m3u8_fmts)
+ subtitles = self._merge_subtitles(subtitles, m3u8_subs)
+ elif protocol in ('HTTP', 'HTTPS'):
+ formats.append({
+ 'format_id': format_id,
+ 'url': format_url,
+ 'quality': q(quality),
+ })
+
+ # This is needed because for audio medias the podcast url is usually
+ # always included, even if is only an audio segment and not the
+ # whole episode.
+ if int_or_none(media_data.get('position')) == 0:
+ for p in ('S', 'H'):
+ podcast_url = media_data.get('podcast%sdUrl' % p)
+ if not podcast_url:
+ continue
+ quality = p + 'D'
+ formats.append({
+ 'format_id': 'PODCAST-' + quality,
+ 'url': podcast_url,
+ 'quality': q(quality),
+ })
+
+ if media_type == 'video':
+ for sub in (media_data.get('subtitleList') or []):
+ sub_url = sub.get('url')
+ if not sub_url:
+ continue
+ lang = sub.get('locale') or self._DEFAULT_LANGUAGE_CODES[bu]
+ subtitles.setdefault(lang, []).append({
+ 'url': sub_url,
+ })
+
+ return {
+ 'id': media_id,
+ 'title': title,
+ 'description': media_data.get('description'),
+ 'timestamp': parse_iso8601(media_data.get('date')),
+ 'thumbnail': media_data.get('imageUrl'),
+ 'duration': float_or_none(media_data.get('duration'), 1000),
+ 'subtitles': subtitles,
+ 'formats': formats,
+ }
+
+
+class SRGSSRPlayIE(InfoExtractor):
+ IE_DESC = 'srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:(?:www|play)\.)?
+ (?P<bu>srf|rts|rsi|rtr|swissinfo)\.ch/play/(?:tv|radio)/
+ (?:
+ [^/]+/(?P<type>video|audio)/[^?]+|
+ popup(?P<type_2>video|audio)player
+ )
+ \?.*?\b(?:id=|urn=urn:[^:]+:video:)(?P<id>[0-9a-f\-]{36}|\d+)
+ '''
+
+ _TESTS = [{
+ 'url': 'http://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5',
+ 'md5': '6db2226ba97f62ad42ce09783680046c',
+ 'info_dict': {
+ 'id': '28e1a57d-5b76-4399-8ab3-9097f071e6c5',
+ 'ext': 'mp4',
+ 'upload_date': '20130701',
+ 'title': 'Snowden beantragt Asyl in Russland',
+ 'timestamp': 1372708215,
+ 'duration': 113.827,
+ 'thumbnail': r're:^https?://.*1383719781\.png$',
+ },
+ 'expected_warnings': ['Unable to download f4m manifest'],
+ }, {
+ 'url': 'http://www.rtr.ch/play/radio/actualitad/audio/saira-tujetsch-tuttina-cuntinuar-cun-sedrun-muster-turissem?id=63cb0778-27f8-49af-9284-8c7a8c6d15fc',
+ 'info_dict': {
+ 'id': '63cb0778-27f8-49af-9284-8c7a8c6d15fc',
+ 'ext': 'mp3',
+ 'upload_date': '20151013',
+ 'title': 'Saira: Tujetsch - tuttina cuntinuar cun Sedrun Mustér Turissem',
+ 'timestamp': 1444709160,
+ 'duration': 336.816,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.rts.ch/play/tv/-/video/le-19h30?id=6348260',
+ 'md5': '67a2a9ae4e8e62a68d0e9820cc9782df',
+ 'info_dict': {
+ 'id': '6348260',
+ 'display_id': '6348260',
+ 'ext': 'mp4',
+ 'duration': 1796.76,
+ 'title': 'Le 19h30',
+ 'upload_date': '20141201',
+ 'timestamp': 1417458600,
+ 'thumbnail': r're:^https?://.*\.image',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'http://play.swissinfo.ch/play/tv/business/video/why-people-were-against-tax-reforms?id=42960270',
+ 'info_dict': {
+ 'id': '42960270',
+ 'ext': 'mp4',
+ 'title': 'Why people were against tax reforms',
+ 'description': 'md5:7ac442c558e9630e947427469c4b824d',
+ 'duration': 94.0,
+ 'upload_date': '20170215',
+ 'timestamp': 1487173560,
+ 'thumbnail': r're:https?://www\.swissinfo\.ch/srgscalableimage/42961964',
+ 'subtitles': 'count:9',
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'https://www.srf.ch/play/tv/popupvideoplayer?id=c4dba0ca-e75b-43b2-a34f-f708a4932e01',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?urn=urn:srf:video:28e1a57d-5b76-4399-8ab3-9097f071e6c5',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.rts.ch/play/tv/19h30/video/le-19h30?urn=urn:rts:video:6348260',
+ 'only_matching': True,
+ }, {
+ # audio segment, has podcastSdUrl of the full episode
+ 'url': 'https://www.srf.ch/play/radio/popupaudioplayer?id=50b20dc8-f05b-4972-bf03-e438ff2833eb',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ bu = mobj.group('bu')
+ media_type = mobj.group('type') or mobj.group('type_2')
+ media_id = mobj.group('id')
+ return self.url_result('srgssr:%s:%s:%s' % (bu[:3], media_type, media_id), 'SRGSSR')
diff --git a/yt_dlp/extractor/srmediathek.py b/yt_dlp/extractor/srmediathek.py
new file mode 100644
index 0000000..f0b3b58
--- /dev/null
+++ b/yt_dlp/extractor/srmediathek.py
@@ -0,0 +1,57 @@
+from .ard import ARDMediathekBaseIE
+from ..utils import (
+ ExtractorError,
+ get_element_by_attribute,
+)
+
+
+class SRMediathekIE(ARDMediathekBaseIE):
+ _WORKING = False
+ IE_NAME = 'sr:mediathek'
+ IE_DESC = 'Saarländischer Rundfunk'
+ _VALID_URL = r'https?://sr-mediathek(?:\.sr-online)?\.de/index\.php\?.*?&id=(?P<id>[0-9]+)'
+
+ _TESTS = [{
+ 'url': 'http://sr-mediathek.sr-online.de/index.php?seite=7&id=28455',
+ 'info_dict': {
+ 'id': '28455',
+ 'ext': 'mp4',
+ 'title': 'sportarena (26.10.2014)',
+ 'description': 'Ringen: KSV Köllerbach gegen Aachen-Walheim; Frauen-Fußball: 1. FC Saarbrücken gegen Sindelfingen; Motorsport: Rallye in Losheim; dazu: Interview mit Timo Bernhard; Turnen: TG Saar; Reitsport: Deutscher Voltigier-Pokal; Badminton: Interview mit Michael Fuchs ',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'skip': 'no longer available',
+ }, {
+ 'url': 'http://sr-mediathek.sr-online.de/index.php?seite=7&id=37682',
+ 'info_dict': {
+ 'id': '37682',
+ 'ext': 'mp4',
+ 'title': 'Love, Cakes and Rock\'n\'Roll',
+ 'description': 'md5:18bf9763631c7d326c22603681e1123d',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://sr-mediathek.de/index.php?seite=7&id=7480',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ if '>Der gew&uuml;nschte Beitrag ist leider nicht mehr verf&uuml;gbar.<' in webpage:
+ raise ExtractorError('Video %s is no longer available' % video_id, expected=True)
+
+ media_collection_url = self._search_regex(
+ r'data-mediacollection-ardplayer="([^"]+)"', webpage, 'media collection url')
+ info = self._extract_media_info(media_collection_url, webpage, video_id)
+ info.update({
+ 'id': video_id,
+ 'title': get_element_by_attribute('class', 'ardplayer-title', webpage),
+ 'description': self._og_search_description(webpage),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ })
+ return info
diff --git a/yt_dlp/extractor/stacommu.py b/yt_dlp/extractor/stacommu.py
new file mode 100644
index 0000000..1308c59
--- /dev/null
+++ b/yt_dlp/extractor/stacommu.py
@@ -0,0 +1,231 @@
+import time
+
+from .wrestleuniverse import WrestleUniverseBaseIE
+from ..utils import (
+ int_or_none,
+ traverse_obj,
+ url_or_none,
+)
+
+
+class StacommuBaseIE(WrestleUniverseBaseIE):
+ _NETRC_MACHINE = 'stacommu'
+ _API_HOST = 'api.stacommu.jp'
+ _LOGIN_QUERY = {'key': 'AIzaSyCR9czxhH2eWuijEhTNWBZ5MCcOYEUTAhg'}
+ _LOGIN_HEADERS = {
+ 'Accept': '*/*',
+ 'Content-Type': 'application/json',
+ 'X-Client-Version': 'Chrome/JsCore/9.9.4/FirebaseCore-web',
+ 'Referer': 'https://www.stacommu.jp/',
+ 'Origin': 'https://www.stacommu.jp',
+ }
+
+ @WrestleUniverseBaseIE._TOKEN.getter
+ def _TOKEN(self):
+ if self._REAL_TOKEN and self._TOKEN_EXPIRY <= int(time.time()):
+ self._refresh_token()
+
+ return self._REAL_TOKEN
+
+ def _get_formats(self, data, path, video_id=None):
+ if not traverse_obj(data, path) and not data.get('canWatch') and not self._TOKEN:
+ self.raise_login_required(method='password')
+ return super()._get_formats(data, path, video_id)
+
+ def _extract_hls_key(self, data, path, decrypt):
+ encryption_data = traverse_obj(data, path)
+ if traverse_obj(encryption_data, ('encryptType', {int})) == 0:
+ return None
+ return traverse_obj(encryption_data, {'key': ('key', {decrypt}), 'iv': ('iv', {decrypt})})
+
+ def _extract_vod(self, url):
+ video_id = self._match_id(url)
+ video_info = self._download_metadata(
+ url, video_id, 'ja', ('dehydratedState', 'queries', 0, 'state', 'data'))
+ hls_info, decrypt = self._call_encrypted_api(
+ video_id, ':watch', 'stream information', data={'method': 1})
+
+ return {
+ 'id': video_id,
+ 'formats': self._get_formats(hls_info, ('protocolHls', 'url', {url_or_none}), video_id),
+ 'hls_aes': self._extract_hls_key(hls_info, 'protocolHls', decrypt),
+ **traverse_obj(video_info, {
+ 'title': ('displayName', {str}),
+ 'description': ('description', {str}),
+ 'timestamp': ('watchStartTime', {int_or_none}),
+ 'thumbnail': ('keyVisualUrl', {url_or_none}),
+ 'cast': ('casts', ..., 'displayName', {str}),
+ 'duration': ('duration', {int}),
+ }),
+ }
+
+ def _extract_ppv(self, url):
+ video_id = self._match_id(url)
+ video_info = self._call_api(video_id, msg='video information', query={'al': 'ja'}, auth=False)
+ hls_info, decrypt = self._call_encrypted_api(
+ video_id, ':watchArchive', 'stream information', data={'method': 1})
+
+ return {
+ 'id': video_id,
+ 'formats': self._get_formats(hls_info, ('hls', 'urls', ..., {url_or_none}), video_id),
+ 'hls_aes': self._extract_hls_key(hls_info, 'hls', decrypt),
+ **traverse_obj(video_info, {
+ 'title': ('displayName', {str}),
+ 'timestamp': ('startTime', {int_or_none}),
+ 'thumbnail': ('keyVisualUrl', {url_or_none}),
+ 'duration': ('duration', {int_or_none}),
+ }),
+ }
+
+
+class StacommuVODIE(StacommuBaseIE):
+ _VALID_URL = r'https?://www\.stacommu\.jp/(?:en/)?videos/episodes/(?P<id>[\da-zA-Z]+)'
+ _TESTS = [{
+ # not encrypted
+ 'url': 'https://www.stacommu.jp/videos/episodes/aXcVKjHyAENEjard61soZZ',
+ 'info_dict': {
+ 'id': 'aXcVKjHyAENEjard61soZZ',
+ 'ext': 'mp4',
+ 'title': 'スタコミュAWARDの裏側、ほぼ全部見せます!〜晴れ舞台の直前ドキドキ編〜',
+ 'description': 'md5:6400275c57ae75c06da36b06f96beb1c',
+ 'timestamp': 1679652000,
+ 'upload_date': '20230324',
+ 'thumbnail': 'https://image.stacommu.jp/6eLobQan8PFtBoU4RL4uGg/6eLobQan8PFtBoU4RL4uGg',
+ 'cast': 'count:11',
+ 'duration': 250,
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ }, {
+ # encrypted; requires a premium account
+ 'url': 'https://www.stacommu.jp/videos/episodes/3hybMByUvzMEqndSeu5LpD',
+ 'info_dict': {
+ 'id': '3hybMByUvzMEqndSeu5LpD',
+ 'ext': 'mp4',
+ 'title': 'スタプラフェス2023〜裏側ほぼ全部見せます〜#10',
+ 'description': 'md5:85494488ccf1dfa1934accdeadd7b340',
+ 'timestamp': 1682506800,
+ 'upload_date': '20230426',
+ 'thumbnail': 'https://image.stacommu.jp/eMdXtEefR4kEyJJMpAFi7x/eMdXtEefR4kEyJJMpAFi7x',
+ 'cast': 'count:55',
+ 'duration': 312,
+ 'hls_aes': {
+ 'key': '6bbaf241b8e1fd9f59ecf546a70e4ae7',
+ 'iv': '1fc9002a23166c3bb1d240b953d09de9',
+ },
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ }, {
+ 'url': 'https://www.stacommu.jp/en/videos/episodes/aXcVKjHyAENEjard61soZZ',
+ 'only_matching': True,
+ }]
+
+ _API_PATH = 'videoEpisodes'
+
+ def _real_extract(self, url):
+ return self._extract_vod(url)
+
+
+class StacommuLiveIE(StacommuBaseIE):
+ _VALID_URL = r'https?://www\.stacommu\.jp/(?:en/)?live/(?P<id>[\da-zA-Z]+)'
+ _TESTS = [{
+ 'url': 'https://www.stacommu.jp/live/d2FJ3zLnndegZJCAEzGM3m',
+ 'info_dict': {
+ 'id': 'd2FJ3zLnndegZJCAEzGM3m',
+ 'ext': 'mp4',
+ 'title': '仲村悠菜 2023/05/04',
+ 'timestamp': 1683195647,
+ 'upload_date': '20230504',
+ 'thumbnail': 'https://image.stacommu.jp/pHGF57SPEHE2ke83FS92FN/pHGF57SPEHE2ke83FS92FN',
+ 'duration': 5322,
+ 'hls_aes': {
+ 'key': 'efbb3ec0b8246f61adf1764c5a51213a',
+ 'iv': '80621d19a1f19167b64cedb415b05d1c',
+ },
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ }, {
+ 'url': 'https://www.stacommu.jp/en/live/d2FJ3zLnndegZJCAEzGM3m',
+ 'only_matching': True,
+ }]
+
+ _API_PATH = 'events'
+
+ def _real_extract(self, url):
+ return self._extract_ppv(url)
+
+
+class TheaterComplexTownBaseIE(StacommuBaseIE):
+ _NETRC_MACHINE = 'theatercomplextown'
+ _API_HOST = 'api.theater-complex.town'
+ _LOGIN_QUERY = {'key': 'AIzaSyAgNCqToaIz4a062EeIrkhI_xetVfAOrfc'}
+ _LOGIN_HEADERS = {
+ 'Accept': '*/*',
+ 'Content-Type': 'application/json',
+ 'X-Client-Version': 'Chrome/JsCore/9.23.0/FirebaseCore-web',
+ 'Referer': 'https://www.theater-complex.town/',
+ 'Origin': 'https://www.theater-complex.town',
+ }
+
+
+class TheaterComplexTownVODIE(TheaterComplexTownBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?theater-complex\.town/(?:en/)?videos/episodes/(?P<id>\w+)'
+ IE_NAME = 'theatercomplextown:vod'
+ _TESTS = [{
+ 'url': 'https://www.theater-complex.town/videos/episodes/hoxqidYNoAn7bP92DN6p78',
+ 'info_dict': {
+ 'id': 'hoxqidYNoAn7bP92DN6p78',
+ 'ext': 'mp4',
+ 'title': '演劇ドラフトグランプリ2023 劇団『恋のぼり』〜劇団名決定秘話ラジオ',
+ 'description': 'md5:a7e2e9cf570379ea67fb630f345ff65d',
+ 'cast': ['玉城 裕規', '石川 凌雅'],
+ 'thumbnail': 'https://image.theater-complex.town/5URnXX6KCeDysuFrPkP38o/5URnXX6KCeDysuFrPkP38o',
+ 'upload_date': '20231103',
+ 'timestamp': 1699016400,
+ 'duration': 868,
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ }, {
+ 'url': 'https://www.theater-complex.town/en/videos/episodes/6QT7XYwM9dJz5Gf9VB6K5y',
+ 'only_matching': True,
+ }]
+
+ _API_PATH = 'videoEpisodes'
+
+ def _real_extract(self, url):
+ return self._extract_vod(url)
+
+
+class TheaterComplexTownPPVIE(TheaterComplexTownBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?theater-complex\.town/(?:en/)?ppv/(?P<id>\w+)'
+ IE_NAME = 'theatercomplextown:ppv'
+ _TESTS = [{
+ 'url': 'https://www.theater-complex.town/ppv/wytW3X7khrjJBUpKuV3jen',
+ 'info_dict': {
+ 'id': 'wytW3X7khrjJBUpKuV3jen',
+ 'ext': 'mp4',
+ 'title': 'BREAK FREE STARS 11月5日(日)12:30千秋楽公演',
+ 'thumbnail': 'https://image.theater-complex.town/5GWEB31JcTUfjtgdeV5t6o/5GWEB31JcTUfjtgdeV5t6o',
+ 'upload_date': '20231105',
+ 'timestamp': 1699155000,
+ 'duration': 8378,
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ }, {
+ 'url': 'https://www.theater-complex.town/en/ppv/wytW3X7khrjJBUpKuV3jen',
+ 'only_matching': True,
+ }]
+
+ _API_PATH = 'events'
+
+ def _real_extract(self, url):
+ return self._extract_ppv(url)
diff --git a/yt_dlp/extractor/stageplus.py b/yt_dlp/extractor/stageplus.py
new file mode 100644
index 0000000..77e4362
--- /dev/null
+++ b/yt_dlp/extractor/stageplus.py
@@ -0,0 +1,515 @@
+import json
+import uuid
+
+from .common import InfoExtractor
+from ..utils import (
+ float_or_none,
+ traverse_obj,
+ try_call,
+ unified_timestamp,
+ url_or_none,
+)
+
+
+class StagePlusVODConcertIE(InfoExtractor):
+ _NETRC_MACHINE = 'stageplus'
+ _VALID_URL = r'https?://(?:www\.)?stage-plus\.com/video/(?P<id>vod_concert_\w+)'
+ _TESTS = [{
+ 'url': 'https://www.stage-plus.com/video/vod_concert_APNM8GRFDPHMASJKBSPJACG',
+ 'playlist_count': 6,
+ 'info_dict': {
+ 'id': 'vod_concert_APNM8GRFDPHMASJKBSPJACG',
+ 'title': 'Yuja Wang plays Rachmaninoff\'s Piano Concerto No. 2 – from Odeonsplatz',
+ 'description': 'md5:50f78ec180518c9bdb876bac550996fc',
+ 'artists': ['Yuja Wang', 'Lorenzo Viotti'],
+ 'upload_date': '20230331',
+ 'timestamp': 1680249600,
+ 'release_date': '20210709',
+ 'release_timestamp': 1625788800,
+ 'thumbnails': 'count:3',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': 'performance_work_A1IN4PJFE9MM2RJ3CLBMUSJBBSOJAD9O',
+ 'ext': 'mp4',
+ 'title': 'Piano Concerto No. 2 in C Minor, Op. 18',
+ 'description': 'md5:50f78ec180518c9bdb876bac550996fc',
+ 'upload_date': '20230331',
+ 'timestamp': 1680249600,
+ 'release_date': '20210709',
+ 'release_timestamp': 1625788800,
+ 'duration': 2207,
+ 'chapters': 'count:5',
+ 'artists': ['Yuja Wang'],
+ 'composers': ['Sergei Rachmaninoff'],
+ 'album': 'Yuja Wang plays Rachmaninoff\'s Piano Concerto No. 2 – from Odeonsplatz',
+ 'album_artists': ['Yuja Wang', 'Lorenzo Viotti'],
+ 'track': 'Piano Concerto No. 2 in C Minor, Op. 18',
+ 'track_number': 1,
+ 'genre': 'Instrumental Concerto',
+ },
+ }],
+ 'params': {'skip_download': 'm3u8'},
+ }]
+
+ # TODO: Prune this after livestream and/or album extractors are added
+ _GRAPHQL_QUERY = '''query videoDetailPage($videoId: ID!, $sliderItemsFirst: Int = 24) {
+ node(id: $videoId) {
+ __typename
+ ...LiveConcertFields
+ ... on LiveConcert {
+ artists {
+ edges {
+ role {
+ ...RoleFields
+ }
+ node {
+ id
+ name
+ sortName
+ }
+ }
+ }
+ isAtmos
+ maxResolution
+ groups {
+ id
+ name
+ typeDisplayName
+ }
+ shortDescription
+ performanceWorks {
+ ...livePerformanceWorkFields
+ }
+ totalDuration
+ sliders {
+ ...contentContainerFields
+ }
+ vodConcert {
+ __typename
+ id
+ }
+ }
+ ...VideoFields
+ ... on Video {
+ artists {
+ edges {
+ role {
+ ...RoleFields
+ }
+ node {
+ id
+ name
+ sortName
+ }
+ }
+ }
+ isAtmos
+ maxResolution
+ isLossless
+ description
+ productionDate
+ takedownDate
+ sliders {
+ ...contentContainerFields
+ }
+ }
+ ...VodConcertFields
+ ... on VodConcert {
+ artists {
+ edges {
+ role {
+ ...RoleFields
+ }
+ node {
+ id
+ name
+ sortName
+ }
+ }
+ }
+ isAtmos
+ maxResolution
+ groups {
+ id
+ name
+ typeDisplayName
+ }
+ performanceWorks {
+ ...PerformanceWorkFields
+ }
+ shortDescription
+ productionDate
+ takedownDate
+ sliders {
+ ...contentContainerFields
+ }
+ }
+ }
+}
+
+fragment LiveConcertFields on LiveConcert {
+ endTime
+ id
+ pictures {
+ ...PictureFields
+ }
+ reruns {
+ ...liveConcertRerunFields
+ }
+ publicationLevel
+ startTime
+ streamStartTime
+ subtitle
+ title
+ typeDisplayName
+ stream {
+ ...liveStreamFields
+ }
+ trailerStream {
+ ...streamFields
+ }
+ geoAccessCountries
+ geoAccessMode
+}
+
+fragment PictureFields on Picture {
+ id
+ url
+ type
+}
+
+fragment liveConcertRerunFields on LiveConcertRerun {
+ streamStartTime
+ endTime
+ startTime
+ stream {
+ ...rerunStreamFields
+ }
+}
+
+fragment rerunStreamFields on RerunStream {
+ publicationLevel
+ streamType
+ url
+}
+
+fragment liveStreamFields on LiveStream {
+ publicationLevel
+ streamType
+ url
+}
+
+fragment streamFields on Stream {
+ publicationLevel
+ streamType
+ url
+}
+
+fragment RoleFields on Role {
+ __typename
+ id
+ type
+ displayName
+}
+
+fragment livePerformanceWorkFields on LivePerformanceWork {
+ __typename
+ id
+ artists {
+ ...artistWithRoleFields
+ }
+ groups {
+ edges {
+ node {
+ id
+ name
+ typeDisplayName
+ }
+ }
+ }
+ work {
+ ...workFields
+ }
+}
+
+fragment artistWithRoleFields on ArtistWithRoleConnection {
+ edges {
+ role {
+ ...RoleFields
+ }
+ node {
+ id
+ name
+ sortName
+ }
+ }
+}
+
+fragment workFields on Work {
+ id
+ title
+ movements {
+ id
+ title
+ }
+ composers {
+ id
+ name
+ }
+ genre {
+ id
+ title
+ }
+}
+
+fragment contentContainerFields on CuratedContentContainer {
+ __typename
+ ...SliderFields
+ ...BannerFields
+}
+
+fragment SliderFields on Slider {
+ id
+ headline
+ items(first: $sliderItemsFirst) {
+ edges {
+ node {
+ id
+ __typename
+ ...AlbumFields
+ ...ArtistFields
+ ...EpochFields
+ ...GenreFields
+ ...GroupFields
+ ...LiveConcertFields
+ ...PartnerFields
+ ...PerformanceWorkFields
+ ...VideoFields
+ ...VodConcertFields
+ }
+ }
+ }
+}
+
+fragment AlbumFields on Album {
+ artistAndGroupDisplayInfo
+ id
+ pictures {
+ ...PictureFields
+ }
+ title
+}
+
+fragment ArtistFields on Artist {
+ id
+ name
+ roles {
+ ...RoleFields
+ }
+ pictures {
+ ...PictureFields
+ }
+}
+
+fragment EpochFields on Epoch {
+ id
+ endYear
+ pictures {
+ ...PictureFields
+ }
+ startYear
+ title
+}
+
+fragment GenreFields on Genre {
+ id
+ pictures {
+ ...PictureFields
+ }
+ title
+}
+
+fragment GroupFields on Group {
+ id
+ name
+ typeDisplayName
+ pictures {
+ ...PictureFields
+ }
+}
+
+fragment PartnerFields on Partner {
+ id
+ name
+ typeDisplayName
+ subtypeDisplayName
+ pictures {
+ ...PictureFields
+ }
+}
+
+fragment PerformanceWorkFields on PerformanceWork {
+ __typename
+ id
+ artists {
+ ...artistWithRoleFields
+ }
+ groups {
+ edges {
+ node {
+ id
+ name
+ typeDisplayName
+ }
+ }
+ }
+ work {
+ ...workFields
+ }
+ stream {
+ ...streamFields
+ }
+ vodConcert {
+ __typename
+ id
+ }
+ duration
+ cuePoints {
+ mark
+ title
+ }
+}
+
+fragment VideoFields on Video {
+ id
+ archiveReleaseDate
+ title
+ subtitle
+ pictures {
+ ...PictureFields
+ }
+ stream {
+ ...streamFields
+ }
+ trailerStream {
+ ...streamFields
+ }
+ duration
+ typeDisplayName
+ duration
+ geoAccessCountries
+ geoAccessMode
+ publicationLevel
+ takedownDate
+}
+
+fragment VodConcertFields on VodConcert {
+ id
+ archiveReleaseDate
+ pictures {
+ ...PictureFields
+ }
+ subtitle
+ title
+ typeDisplayName
+ totalDuration
+ geoAccessCountries
+ geoAccessMode
+ trailerStream {
+ ...streamFields
+ }
+ publicationLevel
+ takedownDate
+}
+
+fragment BannerFields on Banner {
+ description
+ link
+ pictures {
+ ...PictureFields
+ }
+ title
+}'''
+
+ _TOKEN = None
+
+ def _perform_login(self, username, password):
+ auth = self._download_json('https://audience.api.stageplus.io/oauth/token', None, headers={
+ 'Content-Type': 'application/json',
+ 'Origin': 'https://www.stage-plus.com',
+ }, data=json.dumps({
+ 'grant_type': 'password',
+ 'username': username,
+ 'password': password,
+ 'device_info': 'Chrome (Windows)',
+ 'client_device_id': str(uuid.uuid4()),
+ }, separators=(',', ':')).encode(), note='Logging in')
+
+ if auth.get('access_token'):
+ self._TOKEN = auth['access_token']
+
+ def _real_initialize(self):
+ if self._TOKEN:
+ return
+
+ self._TOKEN = try_call(
+ lambda: self._get_cookies('https://www.stage-plus.com/')['dgplus_access_token'].value)
+ if not self._TOKEN:
+ self.raise_login_required()
+
+ def _real_extract(self, url):
+ concert_id = self._match_id(url)
+
+ data = self._download_json('https://audience.api.stageplus.io/graphql', concert_id, headers={
+ 'authorization': f'Bearer {self._TOKEN}',
+ 'content-type': 'application/json',
+ 'Origin': 'https://www.stage-plus.com',
+ }, data=json.dumps({
+ 'query': self._GRAPHQL_QUERY,
+ 'variables': {'videoId': concert_id},
+ 'operationName': 'videoDetailPage'
+ }, separators=(',', ':')).encode())['data']['node']
+
+ metadata = traverse_obj(data, {
+ 'title': 'title',
+ 'description': ('shortDescription', {str}),
+ 'artists': ('artists', 'edges', ..., 'node', 'name'),
+ 'timestamp': ('archiveReleaseDate', {unified_timestamp}),
+ 'release_timestamp': ('productionDate', {unified_timestamp}),
+ })
+
+ thumbnails = traverse_obj(data, ('pictures', lambda _, v: url_or_none(v['url']), {
+ 'id': 'name',
+ 'url': 'url',
+ })) or None
+
+ entries = []
+ for idx, video in enumerate(traverse_obj(data, (
+ 'performanceWorks', lambda _, v: v['id'] and url_or_none(v['stream']['url']))), 1):
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ video['stream']['url'], video['id'], 'mp4', m3u8_id='hls', query={'token': self._TOKEN})
+ entries.append({
+ 'id': video['id'],
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'album': metadata.get('title'),
+ 'album_artists': metadata.get('artist'),
+ 'track_number': idx,
+ **metadata,
+ **traverse_obj(video, {
+ 'title': ('work', 'title'),
+ 'track': ('work', 'title'),
+ 'duration': ('duration', {float_or_none}),
+ 'chapters': (
+ 'cuePoints', lambda _, v: float_or_none(v['mark']) is not None, {
+ 'title': 'title',
+ 'start_time': ('mark', {float_or_none}),
+ }),
+ 'artists': ('artists', 'edges', ..., 'node', 'name'),
+ 'composers': ('work', 'composers', ..., 'name'),
+ 'genre': ('work', 'genre', 'title'),
+ }),
+ })
+
+ return self.playlist_result(entries, concert_id, thumbnails=thumbnails, **metadata)
diff --git a/yt_dlp/extractor/stanfordoc.py b/yt_dlp/extractor/stanfordoc.py
new file mode 100644
index 0000000..be0f4af
--- /dev/null
+++ b/yt_dlp/extractor/stanfordoc.py
@@ -0,0 +1,89 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ orderedSet,
+ unescapeHTML,
+)
+
+
+class StanfordOpenClassroomIE(InfoExtractor):
+ IE_NAME = 'stanfordoc'
+ IE_DESC = 'Stanford Open ClassRoom'
+ _VALID_URL = r'https?://openclassroom\.stanford\.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$'
+ _TEST = {
+ 'url': 'http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100',
+ 'md5': '544a9468546059d4e80d76265b0443b8',
+ 'info_dict': {
+ 'id': 'PracticalUnix_intro-environment',
+ 'ext': 'mp4',
+ 'title': 'Intro Environment',
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+
+ if mobj.group('course') and mobj.group('video'): # A specific video
+ course = mobj.group('course')
+ video = mobj.group('video')
+ info = {
+ 'id': course + '_' + video,
+ 'uploader': None,
+ 'upload_date': None,
+ }
+
+ baseUrl = 'http://openclassroom.stanford.edu/MainFolder/courses/' + course + '/videos/'
+ xmlUrl = baseUrl + video + '.xml'
+ mdoc = self._download_xml(xmlUrl, info['id'])
+ try:
+ info['title'] = mdoc.findall('./title')[0].text
+ info['url'] = baseUrl + mdoc.findall('./videoFile')[0].text
+ except IndexError:
+ raise ExtractorError('Invalid metadata XML file')
+ return info
+ elif mobj.group('course'): # A course page
+ course = mobj.group('course')
+ info = {
+ 'id': course,
+ '_type': 'playlist',
+ 'uploader': None,
+ 'upload_date': None,
+ }
+
+ coursepage = self._download_webpage(
+ url, info['id'],
+ note='Downloading course info page',
+ errnote='Unable to download course info page')
+
+ info['title'] = self._html_search_regex(
+ r'<h1>([^<]+)</h1>', coursepage, 'title', default=info['id'])
+
+ info['description'] = self._html_search_regex(
+ r'(?s)<description>([^<]+)</description>',
+ coursepage, 'description', fatal=False)
+
+ links = orderedSet(re.findall(r'<a href="(VideoPage\.php\?[^"]+)">', coursepage))
+ info['entries'] = [self.url_result(
+ 'http://openclassroom.stanford.edu/MainFolder/%s' % unescapeHTML(l)
+ ) for l in links]
+ return info
+ else: # Root page
+ info = {
+ 'id': 'Stanford OpenClassroom',
+ '_type': 'playlist',
+ 'uploader': None,
+ 'upload_date': None,
+ }
+ info['title'] = info['id']
+
+ rootURL = 'http://openclassroom.stanford.edu/MainFolder/HomePage.php'
+ rootpage = self._download_webpage(rootURL, info['id'],
+ errnote='Unable to download course info page')
+
+ links = orderedSet(re.findall(r'<a href="(CoursePage\.php\?[^"]+)">', rootpage))
+ info['entries'] = [self.url_result(
+ 'http://openclassroom.stanford.edu/MainFolder/%s' % unescapeHTML(l)
+ ) for l in links]
+ return info
diff --git a/yt_dlp/extractor/startrek.py b/yt_dlp/extractor/startrek.py
new file mode 100644
index 0000000..94efb58
--- /dev/null
+++ b/yt_dlp/extractor/startrek.py
@@ -0,0 +1,76 @@
+from .common import InfoExtractor
+from ..utils import int_or_none, urljoin
+
+
+class StarTrekIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'(?P<base>https?://(?:intl|www)\.startrek\.com)/videos/(?P<id>[^/]+)'
+ _TESTS = [{
+ 'url': 'https://intl.startrek.com/videos/watch-welcoming-jess-bush-to-the-ready-room',
+ 'md5': '491df5035c9d4dc7f63c79caaf9c839e',
+ 'info_dict': {
+ 'id': 'watch-welcoming-jess-bush-to-the-ready-room',
+ 'ext': 'mp4',
+ 'title': 'WATCH: Welcoming Jess Bush to The Ready Room',
+ 'duration': 1888,
+ 'timestamp': 1655388000,
+ 'upload_date': '20220616',
+ 'description': 'md5:1ffee884e3920afbdd6dd04e926a1221',
+ 'thumbnail': r're:https://(?:intl|www)\.startrek\.com/sites/default/files/styles/video_1920x1080/public/images/2022-06/pp_14794_rr_thumb_107_yt_16x9\.jpg(?:\?.+)?',
+ 'subtitles': {'en-US': [{
+ 'url': r're:https://(?:intl|www)\.startrek\.com/sites/default/files/video/captions/2022-06/TRR_SNW_107_v4\.vtt',
+ }, {
+ 'url': 'https://media.startrek.com/2022/06/16/2043801155561/1069981_hls/trr_snw_107_v4-c4bfc25d/stream_vtt.m3u8',
+ }]},
+ }
+ }, {
+ 'url': 'https://www.startrek.com/videos/watch-ethan-peck-and-gia-sandhu-beam-down-to-the-ready-room',
+ 'md5': 'f5ad74fbb86e91e0882fc0a333178d1d',
+ 'info_dict': {
+ 'id': 'watch-ethan-peck-and-gia-sandhu-beam-down-to-the-ready-room',
+ 'ext': 'mp4',
+ 'title': 'WATCH: Ethan Peck and Gia Sandhu Beam Down to The Ready Room',
+ 'duration': 1986,
+ 'timestamp': 1654221600,
+ 'upload_date': '20220603',
+ 'description': 'md5:b3aa0edacfe119386567362dec8ed51b',
+ 'thumbnail': r're:https://www\.startrek\.com/sites/default/files/styles/video_1920x1080/public/images/2022-06/pp_14792_rr_thumb_105_yt_16x9_1.jpg(?:\?.+)?',
+ 'subtitles': {'en-US': [{
+ 'url': r're:https://(?:intl|www)\.startrek\.com/sites/default/files/video/captions/2022-06/TRR_SNW_105_v5\.vtt',
+ }]},
+ }
+ }]
+
+ def _real_extract(self, url):
+ urlbase, video_id = self._match_valid_url(url).group('base', 'id')
+ webpage = self._download_webpage(url, video_id)
+
+ player = self._search_regex(
+ r'(<\s*div\s+id\s*=\s*"cvp-player-[^<]+<\s*/div\s*>)', webpage, 'player')
+
+ hls = self._html_search_regex(r'\bdata-hls\s*=\s*"([^"]+)"', player, 'HLS URL')
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(hls, video_id, 'mp4')
+
+ captions = self._html_search_regex(
+ r'\bdata-captions-url\s*=\s*"([^"]+)"', player, 'captions URL', fatal=False)
+ if captions:
+ subtitles.setdefault('en-US', [])[:0] = [{'url': urljoin(urlbase, captions)}]
+
+ # NB: Most of the data in the json_ld is undesirable
+ json_ld = self._search_json_ld(webpage, video_id, fatal=False)
+
+ return {
+ 'id': video_id,
+ 'title': self._html_search_regex(
+ r'\bdata-title\s*=\s*"([^"]+)"', player, 'title', json_ld.get('title')),
+ 'description': self._html_search_regex(
+ r'(?s)<\s*div\s+class\s*=\s*"header-body"\s*>(.+?)<\s*/div\s*>',
+ webpage, 'description', fatal=False),
+ 'duration': int_or_none(self._html_search_regex(
+ r'\bdata-duration\s*=\s*"(\d+)"', player, 'duration', fatal=False)),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnail': urljoin(urlbase, self._html_search_regex(
+ r'\bdata-poster-url\s*=\s*"([^"]+)"', player, 'thumbnail', fatal=False)),
+ 'timestamp': json_ld.get('timestamp'),
+ }
diff --git a/yt_dlp/extractor/startv.py b/yt_dlp/extractor/startv.py
new file mode 100644
index 0000000..bb6e8f1
--- /dev/null
+++ b/yt_dlp/extractor/startv.py
@@ -0,0 +1,100 @@
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+)
+from ..utils import (
+ clean_html,
+ ExtractorError,
+ traverse_obj,
+ int_or_none,
+)
+
+
+class StarTVIE(InfoExtractor):
+ _VALID_URL = r"""(?x)
+ https?://(?:www\.)?startv\.com\.tr/
+ (?:
+ (?:dizi|program)/(?:[^/?#&]+)/(?:bolumler|fragmanlar|ekstralar)|
+ video/arsiv/(?:dizi|program)/(?:[^/?#&]+)
+ )/
+ (?P<id>[^/?#&]+)
+ """
+ IE_NAME = 'startv'
+ _TESTS = [
+ {
+ 'url': 'https://www.startv.com.tr/dizi/cocuk/bolumler/3-bolum',
+ 'md5': '72381a32bcc2e2eb5841e8c8bf68f127',
+ 'info_dict': {
+ 'id': '904972',
+ 'display_id': '3-bolum',
+ 'ext': 'mp4',
+ 'title': '3. Bölüm',
+ 'description': 'md5:3a8049f05a75c2e8747116a673275de4',
+ 'thumbnail': r're:^https?://.*\.jpg(?:\?.*?)?$',
+ 'timestamp': 1569281400,
+ 'upload_date': '20190923'
+ },
+ },
+ {
+ 'url': 'https://www.startv.com.tr/video/arsiv/dizi/avlu/44-bolum',
+ 'only_matching': True
+ },
+ {
+ 'url': 'https://www.startv.com.tr/dizi/cocuk/fragmanlar/5-bolum-fragmani',
+ 'only_matching': True
+ },
+ {
+ 'url': 'https://www.startv.com.tr/dizi/cocuk/ekstralar/5-bolumun-nefes-kesen-final-sahnesi',
+ 'only_matching': True
+ },
+ {
+ 'url': 'https://www.startv.com.tr/program/burcu-ile-haftasonu/bolumler/1-bolum',
+ 'only_matching': True
+ },
+ {
+ 'url': 'https://www.startv.com.tr/program/burcu-ile-haftasonu/fragmanlar/2-fragman',
+ 'only_matching': True
+ },
+ {
+ 'url': 'https://www.startv.com.tr/video/arsiv/program/buyukrisk/14-bolumde-hangi-unlu-ne-sordu-',
+ 'only_matching': True
+ },
+ {
+ 'url': 'https://www.startv.com.tr/video/arsiv/program/buyukrisk/buyuk-risk-334-bolum',
+ 'only_matching': True
+ },
+ {
+ 'url': 'https://www.startv.com.tr/video/arsiv/program/dada/dada-58-bolum',
+ 'only_matching': True
+ }
+ ]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ info_url = self._search_regex(
+ r'(["\'])videoUrl\1\s*:\s*\1(?P<url>(?:(?!\1).)+)\1\s*',
+ webpage, 'video info url', group='url')
+
+ info = traverse_obj(self._download_json(info_url, display_id), 'data', expected_type=dict)
+ if not info:
+ raise ExtractorError('Failed to extract API data')
+
+ video_id = compat_str(info.get('id'))
+ title = info.get('title') or self._og_search_title(webpage)
+ description = clean_html(info.get('description')) or self._og_search_description(webpage, default=None)
+ thumbnail = self._proto_relative_url(
+ self._og_search_thumbnail(webpage), scheme='http:')
+
+ formats = self._extract_m3u8_formats(
+ traverse_obj(info, ('flavors', 'hls')), video_id, entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'timestamp': int_or_none(info.get('release_date')),
+ 'formats': formats
+ }
diff --git a/yt_dlp/extractor/steam.py b/yt_dlp/extractor/steam.py
new file mode 100644
index 0000000..63da966
--- /dev/null
+++ b/yt_dlp/extractor/steam.py
@@ -0,0 +1,170 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ extract_attributes,
+ get_element_by_class,
+ str_or_none,
+)
+
+
+class SteamIE(InfoExtractor):
+ _VALID_URL = r"""(?x)
+ https?://(?:store\.steampowered|steamcommunity)\.com/
+ (?:agecheck/)?
+ (?P<urltype>video|app)/ #If the page is only for videos or for a game
+ (?P<gameID>\d+)/?
+ (?P<videoID>\d*)(?P<extra>\??) # For urltype == video we sometimes get the videoID
+ |
+ https?://(?:www\.)?steamcommunity\.com/sharedfiles/filedetails/\?id=(?P<fileID>[0-9]+)
+ """
+ _VIDEO_PAGE_TEMPLATE = 'http://store.steampowered.com/video/%s/'
+ _AGECHECK_TEMPLATE = 'http://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970'
+ _TESTS = [{
+ 'url': 'http://store.steampowered.com/video/105600/',
+ 'playlist': [
+ {
+ 'md5': '695242613303ffa2a4c44c9374ddc067',
+ 'info_dict': {
+ 'id': '256785003',
+ 'ext': 'mp4',
+ 'title': 'Terraria video 256785003',
+ 'thumbnail': r're:^https://cdn\.[^\.]+\.steamstatic\.com',
+ }
+ },
+ {
+ 'md5': '6a294ee0c4b1f47f5bb76a65e31e3592',
+ 'info_dict': {
+ 'id': '2040428',
+ 'ext': 'mp4',
+ 'title': 'Terraria video 2040428',
+ 'thumbnail': r're:^https://cdn\.[^\.]+\.steamstatic\.com',
+ }
+ }
+ ],
+ 'info_dict': {
+ 'id': '105600',
+ 'title': 'Terraria',
+ },
+ 'params': {
+ 'playlistend': 2,
+ }
+ }, {
+ 'url': 'https://store.steampowered.com/app/271590/Grand_Theft_Auto_V/',
+ 'info_dict': {
+ 'id': '271590',
+ 'title': 'Grand Theft Auto V',
+ },
+ 'playlist_count': 23,
+ }]
+
+ def _real_extract(self, url):
+ m = self._match_valid_url(url)
+ fileID = m.group('fileID')
+ if fileID:
+ video_url = url
+ playlist_id = fileID
+ else:
+ gameID = m.group('gameID')
+ playlist_id = gameID
+ video_url = self._VIDEO_PAGE_TEMPLATE % playlist_id
+
+ self._set_cookie('steampowered.com', 'wants_mature_content', '1')
+ self._set_cookie('steampowered.com', 'birthtime', '944006401')
+ self._set_cookie('steampowered.com', 'lastagecheckage', '1-0-2000')
+
+ webpage = self._download_webpage(video_url, playlist_id)
+
+ if re.search('<div[^>]+>Please enter your birth date to continue:</div>', webpage) is not None:
+ video_url = self._AGECHECK_TEMPLATE % playlist_id
+ self.report_age_confirmation()
+ webpage = self._download_webpage(video_url, playlist_id)
+
+ videos = re.findall(r'(<div[^>]+id=[\'"]highlight_movie_(\d+)[\'"][^>]+>)', webpage)
+ entries = []
+ playlist_title = get_element_by_class('apphub_AppName', webpage)
+ for movie, movie_id in videos:
+ if not movie:
+ continue
+ movie = extract_attributes(movie)
+ if not movie_id:
+ continue
+ entry = {
+ 'id': movie_id,
+ 'title': f'{playlist_title} video {movie_id}',
+ }
+ formats = []
+ if movie:
+ entry['thumbnail'] = movie.get('data-poster')
+ for quality in ('', '-hd'):
+ for ext in ('webm', 'mp4'):
+ video_url = movie.get('data-%s%s-source' % (ext, quality))
+ if video_url:
+ formats.append({
+ 'format_id': ext + quality,
+ 'url': video_url,
+ })
+ entry['formats'] = formats
+ entries.append(entry)
+ embedded_videos = re.findall(r'(<iframe[^>]+>)', webpage)
+ for evideos in embedded_videos:
+ evideos = extract_attributes(evideos).get('src')
+ video_id = self._search_regex(r'youtube\.com/embed/([0-9A-Za-z_-]{11})', evideos, 'youtube_video_id', default=None)
+ if video_id:
+ entries.append({
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'url': video_id,
+ 'ie_key': 'Youtube',
+ })
+ if not entries:
+ raise ExtractorError('Could not find any videos')
+
+ return self.playlist_result(entries, playlist_id, playlist_title)
+
+
+class SteamCommunityBroadcastIE(InfoExtractor):
+ _VALID_URL = r'https?://steamcommunity\.(?:com)/broadcast/watch/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://steamcommunity.com/broadcast/watch/76561199073851486',
+ 'info_dict': {
+ 'id': '76561199073851486',
+ 'title': r're:Steam Community :: pepperm!nt :: Broadcast 2022-06-26 \d{2}:\d{2}',
+ 'ext': 'mp4',
+ 'uploader_id': '1113585758',
+ 'uploader': 'pepperm!nt',
+ 'live_status': 'is_live',
+ },
+ 'skip': 'Stream has ended',
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ json_data = self._download_json(
+ 'https://steamcommunity.com/broadcast/getbroadcastmpd/',
+ video_id, query={'steamid': f'{video_id}'})
+
+ formats, subs = self._extract_m3u8_formats_and_subtitles(json_data['hls_url'], video_id)
+
+ ''' # We cannot download live dash atm
+ mpd_formats, mpd_subs = self._extract_mpd_formats_and_subtitles(json_data['url'], video_id)
+ formats.extend(mpd_formats)
+ self._merge_subtitles(mpd_subs, target=subs)
+ '''
+
+ uploader_json = self._download_json(
+ 'https://steamcommunity.com/actions/ajaxresolveusers',
+ video_id, query={'steamids': video_id})[0]
+
+ return {
+ 'id': video_id,
+ 'title': self._generic_title('', webpage),
+ 'formats': formats,
+ 'live_status': 'is_live',
+ 'view_count': json_data.get('num_view'),
+ 'uploader': uploader_json.get('persona_name'),
+ 'uploader_id': str_or_none(uploader_json.get('accountid')),
+ 'subtitles': subs,
+ }
diff --git a/yt_dlp/extractor/stitcher.py b/yt_dlp/extractor/stitcher.py
new file mode 100644
index 0000000..2fd200f
--- /dev/null
+++ b/yt_dlp/extractor/stitcher.py
@@ -0,0 +1,142 @@
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ clean_html,
+ clean_podcast_url,
+ ExtractorError,
+ int_or_none,
+ str_or_none,
+ try_get,
+ url_or_none,
+)
+
+
+class StitcherBaseIE(InfoExtractor):
+ _VALID_URL_BASE = r'https?://(?:www\.)?stitcher\.com/(?:podcast|show)/'
+
+ def _call_api(self, path, video_id, query):
+ resp = self._download_json(
+ 'https://api.prod.stitcher.com/' + path,
+ video_id, query=query)
+ error_massage = try_get(resp, lambda x: x['errors'][0]['message'])
+ if error_massage:
+ raise ExtractorError(error_massage, expected=True)
+ return resp['data']
+
+ def _extract_description(self, data):
+ return clean_html(data.get('html_description') or data.get('description'))
+
+ def _extract_audio_url(self, episode):
+ return url_or_none(episode.get('audio_url') or episode.get('guid'))
+
+ def _extract_show_info(self, show):
+ return {
+ 'thumbnail': show.get('image_base_url'),
+ 'series': show.get('title'),
+ }
+
+ def _extract_episode(self, episode, audio_url, show_info):
+ info = {
+ 'id': compat_str(episode['id']),
+ 'display_id': episode.get('slug'),
+ 'title': episode['title'].strip(),
+ 'description': self._extract_description(episode),
+ 'duration': int_or_none(episode.get('duration')),
+ 'url': clean_podcast_url(audio_url),
+ 'vcodec': 'none',
+ 'timestamp': int_or_none(episode.get('date_published')),
+ 'season_number': int_or_none(episode.get('season')),
+ 'season_id': str_or_none(episode.get('season_id')),
+ }
+ info.update(show_info)
+ return info
+
+
+class StitcherIE(StitcherBaseIE):
+ _VALID_URL = StitcherBaseIE._VALID_URL_BASE + r'(?:[^/]+/)+e(?:pisode)?/(?:[^/#?&]+-)?(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://www.stitcher.com/podcast/the-talking-machines/e/40789481?autoplay=true',
+ 'md5': 'e9635098e0da10b21a0e2b85585530f6',
+ 'info_dict': {
+ 'id': '40789481',
+ 'ext': 'mp3',
+ 'title': 'Machine Learning Mastery and Cancer Clusters',
+ 'description': 'md5:547adb4081864be114ae3831b4c2b42f',
+ 'duration': 1604,
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'upload_date': '20151008',
+ 'timestamp': 1444285800,
+ 'series': 'Talking Machines',
+ },
+ }, {
+ 'url': 'http://www.stitcher.com/podcast/panoply/vulture-tv/e/the-rare-hourlong-comedy-plus-40846275?autoplay=true',
+ 'info_dict': {
+ 'id': '40846275',
+ 'display_id': 'the-rare-hourlong-comedy-plus',
+ 'ext': 'mp3',
+ 'title': "The CW's 'Crazy Ex-Girlfriend'",
+ 'description': 'md5:04f1e2f98eb3f5cbb094cea0f9e19b17',
+ 'duration': 2235,
+ 'thumbnail': r're:^https?://.*\.jpg',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'Page Not Found',
+ }, {
+ # escaped title
+ 'url': 'http://www.stitcher.com/podcast/marketplace-on-stitcher/e/40910226?autoplay=true',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.stitcher.com/podcast/panoply/getting-in/e/episode-2a-how-many-extracurriculars-should-i-have-40876278?autoplay=true',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.stitcher.com/show/threedom/episode/circles-on-a-stick-200212584',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ audio_id = self._match_id(url)
+ data = self._call_api(
+ 'shows/episodes', audio_id, {'episode_ids': audio_id})
+ episode = data['episodes'][0]
+ audio_url = self._extract_audio_url(episode)
+ if not audio_url:
+ self.raise_login_required()
+ show = try_get(data, lambda x: x['shows'][0], dict) or {}
+ return self._extract_episode(
+ episode, audio_url, self._extract_show_info(show))
+
+
+class StitcherShowIE(StitcherBaseIE):
+ _VALID_URL = StitcherBaseIE._VALID_URL_BASE + r'(?P<id>[^/#?&]+)/?(?:[?#&]|$)'
+ _TESTS = [{
+ 'url': 'http://www.stitcher.com/podcast/the-talking-machines',
+ 'info_dict': {
+ 'id': 'the-talking-machines',
+ 'title': 'Talking Machines',
+ 'description': 'md5:831f0995e40f26c10231af39cf1ebf0b',
+ },
+ 'playlist_mincount': 106,
+ }, {
+ 'url': 'https://www.stitcher.com/show/the-talking-machines',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ show_slug = self._match_id(url)
+ data = self._call_api(
+ 'search/show/%s/allEpisodes' % show_slug, show_slug, {'count': 10000})
+ show = try_get(data, lambda x: x['shows'][0], dict) or {}
+ show_info = self._extract_show_info(show)
+
+ entries = []
+ for episode in (data.get('episodes') or []):
+ audio_url = self._extract_audio_url(episode)
+ if not audio_url:
+ continue
+ entries.append(self._extract_episode(episode, audio_url, show_info))
+
+ return self.playlist_result(
+ entries, show_slug, show.get('title'),
+ self._extract_description(show))
diff --git a/yt_dlp/extractor/storyfire.py b/yt_dlp/extractor/storyfire.py
new file mode 100644
index 0000000..566f777
--- /dev/null
+++ b/yt_dlp/extractor/storyfire.py
@@ -0,0 +1,133 @@
+import functools
+
+from .common import InfoExtractor
+from ..utils import (
+ format_field,
+ int_or_none,
+ OnDemandPagedList,
+ smuggle_url,
+)
+
+
+class StoryFireBaseIE(InfoExtractor):
+ _VALID_URL_BASE = r'https?://(?:www\.)?storyfire\.com/'
+
+ def _call_api(self, path, video_id, resource, query=None):
+ return self._download_json(
+ 'https://storyfire.com/app/%s/%s' % (path, video_id), video_id,
+ 'Downloading %s JSON metadata' % resource, query=query)
+
+ def _parse_video(self, video):
+ title = video['title']
+ vimeo_id = self._search_regex(
+ r'https?://player\.vimeo\.com/external/(\d+)',
+ video['vimeoVideoURL'], 'vimeo id')
+
+ uploader_id = video.get('hostID')
+
+ return {
+ '_type': 'url_transparent',
+ 'id': vimeo_id,
+ 'title': title,
+ 'description': video.get('description'),
+ 'url': smuggle_url(
+ 'https://player.vimeo.com/video/' + vimeo_id, {
+ 'referer': 'https://storyfire.com/',
+ }),
+ 'thumbnail': video.get('storyImage'),
+ 'view_count': int_or_none(video.get('views')),
+ 'like_count': int_or_none(video.get('likesCount')),
+ 'comment_count': int_or_none(video.get('commentsCount')),
+ 'duration': int_or_none(video.get('videoDuration')),
+ 'timestamp': int_or_none(video.get('publishDate')),
+ 'uploader': video.get('username'),
+ 'uploader_id': uploader_id,
+ 'uploader_url': format_field(uploader_id, None, 'https://storyfire.com/user/%s/video'),
+ 'episode_number': int_or_none(video.get('episodeNumber') or video.get('episode_number')),
+ }
+
+
+class StoryFireIE(StoryFireBaseIE):
+ _VALID_URL = StoryFireBaseIE._VALID_URL_BASE + r'video-details/(?P<id>[0-9a-f]{24})'
+ _TEST = {
+ 'url': 'https://storyfire.com/video-details/5df1d132b6378700117f9181',
+ 'md5': 'caec54b9e4621186d6079c7ec100c1eb',
+ 'info_dict': {
+ 'id': '378954662',
+ 'ext': 'mp4',
+ 'title': 'Buzzfeed Teaches You About Memes',
+ 'uploader_id': 'ntZAJFECERSgqHSxzonV5K2E89s1',
+ 'timestamp': 1576129028,
+ 'description': 'md5:0b4e28021548e144bed69bb7539e62ea',
+ 'uploader': 'whang!',
+ 'upload_date': '20191212',
+ 'duration': 418,
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Unable to download JSON metadata']
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video = self._call_api(
+ 'generic/video-detail', video_id, 'video')['video']
+ return self._parse_video(video)
+
+
+class StoryFireUserIE(StoryFireBaseIE):
+ _VALID_URL = StoryFireBaseIE._VALID_URL_BASE + r'user/(?P<id>[^/]+)/video'
+ _TEST = {
+ 'url': 'https://storyfire.com/user/UQ986nFxmAWIgnkZQ0ftVhq4nOk2/video',
+ 'info_dict': {
+ 'id': 'UQ986nFxmAWIgnkZQ0ftVhq4nOk2',
+ },
+ 'playlist_mincount': 151,
+ }
+ _PAGE_SIZE = 20
+
+ def _fetch_page(self, user_id, page):
+ videos = self._call_api(
+ 'publicVideos', user_id, 'page %d' % (page + 1), {
+ 'skip': page * self._PAGE_SIZE,
+ })['videos']
+ for video in videos:
+ yield self._parse_video(video)
+
+ def _real_extract(self, url):
+ user_id = self._match_id(url)
+ entries = OnDemandPagedList(functools.partial(
+ self._fetch_page, user_id), self._PAGE_SIZE)
+ return self.playlist_result(entries, user_id)
+
+
+class StoryFireSeriesIE(StoryFireBaseIE):
+ _VALID_URL = StoryFireBaseIE._VALID_URL_BASE + r'write/series/stories/(?P<id>[^/?&#]+)'
+ _TESTS = [{
+ 'url': 'https://storyfire.com/write/series/stories/-Lq6MsuIHLODO6d2dDkr/',
+ 'info_dict': {
+ 'id': '-Lq6MsuIHLODO6d2dDkr',
+ },
+ 'playlist_mincount': 13,
+ }, {
+ 'url': 'https://storyfire.com/write/series/stories/the_mortal_one/',
+ 'info_dict': {
+ 'id': 'the_mortal_one',
+ },
+ 'playlist_count': 0,
+ }]
+
+ def _extract_videos(self, stories):
+ for story in stories.values():
+ if story.get('hasVideo'):
+ yield self._parse_video(story)
+
+ def _real_extract(self, url):
+ series_id = self._match_id(url)
+ stories = self._call_api(
+ 'seriesStories', series_id, 'series stories')
+ return self.playlist_result(self._extract_videos(stories), series_id)
diff --git a/yt_dlp/extractor/streamable.py b/yt_dlp/extractor/streamable.py
new file mode 100644
index 0000000..462861e
--- /dev/null
+++ b/yt_dlp/extractor/streamable.py
@@ -0,0 +1,103 @@
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ try_get,
+ parse_codecs,
+)
+
+
+class StreamableIE(InfoExtractor):
+ _VALID_URL = r'https?://streamable\.com/(?:[es]/)?(?P<id>\w+)'
+ _EMBED_REGEX = [r'<iframe[^>]+\bsrc=(?P<q1>[\'"])(?P<url>(?:https?:)?//streamable\.com/.+?)(?P=q1)']
+ _TESTS = [
+ {
+ 'url': 'https://streamable.com/dnd1',
+ 'md5': '3e3bc5ca088b48c2d436529b64397fef',
+ 'info_dict': {
+ 'id': 'dnd1',
+ 'ext': 'mp4',
+ 'title': 'Mikel Oiarzabal scores to make it 0-3 for La Real against Espanyol',
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'uploader': 'teabaker',
+ 'timestamp': 1454964157.35115,
+ 'upload_date': '20160208',
+ 'duration': 61.516,
+ 'view_count': int,
+ }
+ },
+ # older video without bitrate, width/height, codecs, etc. info
+ {
+ 'url': 'https://streamable.com/moo',
+ 'md5': '2cf6923639b87fba3279ad0df3a64e73',
+ 'info_dict': {
+ 'id': 'moo',
+ 'ext': 'mp4',
+ 'title': '"Please don\'t eat me!"',
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'timestamp': 1426115495,
+ 'upload_date': '20150311',
+ 'duration': 12,
+ 'view_count': int,
+ }
+ },
+ {
+ 'url': 'https://streamable.com/e/dnd1',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://streamable.com/s/okkqk/drxjds',
+ 'only_matching': True,
+ }
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ # Note: Using the ajax API, as the public Streamable API doesn't seem
+ # to return video info like the title properly sometimes, and doesn't
+ # include info like the video duration
+ video = self._download_json(
+ 'https://ajax.streamable.com/videos/%s' % video_id, video_id)
+
+ # Format IDs:
+ # 0 The video is being uploaded
+ # 1 The video is being processed
+ # 2 The video has at least one file ready
+ # 3 The video is unavailable due to an error
+ status = video.get('status')
+ if status != 2:
+ raise ExtractorError(
+ 'This video is currently unavailable. It may still be uploading or processing.',
+ expected=True)
+
+ title = video.get('reddit_title') or video['title']
+
+ formats = []
+ for key, info in video['files'].items():
+ if not info.get('url'):
+ continue
+ formats.append({
+ 'format_id': key,
+ 'url': self._proto_relative_url(info['url']),
+ 'width': int_or_none(info.get('width')),
+ 'height': int_or_none(info.get('height')),
+ 'filesize': int_or_none(info.get('size')),
+ 'fps': int_or_none(info.get('framerate')),
+ 'vbr': float_or_none(info.get('bitrate'), 1000),
+ 'vcodec': parse_codecs(try_get(info, lambda x: x['input_metadata']['video_codec_name'])).get('vcodec'),
+ 'acodec': parse_codecs(try_get(info, lambda x: x['input_metadata']['audio_codec_name'])).get('acodec'),
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video.get('description'),
+ 'thumbnail': self._proto_relative_url(video.get('thumbnail_url')),
+ 'uploader': video.get('owner', {}).get('user_name'),
+ 'timestamp': float_or_none(video.get('date_added')),
+ 'duration': float_or_none(video.get('duration')),
+ 'view_count': int_or_none(video.get('plays')),
+ 'formats': formats
+ }
diff --git a/yt_dlp/extractor/streamcz.py b/yt_dlp/extractor/streamcz.py
new file mode 100644
index 0000000..c4537ba
--- /dev/null
+++ b/yt_dlp/extractor/streamcz.py
@@ -0,0 +1,122 @@
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ float_or_none,
+ int_or_none,
+ parse_codecs,
+ traverse_obj,
+ urljoin,
+)
+
+
+class StreamCZIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?(?:stream|televizeseznam)\.cz/[^?#]+/(?P<display_id>[^?#]+)-(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://www.televizeseznam.cz/video/lajna/buh-57953890',
+ 'md5': '40c41ade1464a390a0b447e333df4239',
+ 'info_dict': {
+ 'id': '57953890',
+ 'ext': 'mp4',
+ 'title': 'Bůh',
+ 'display_id': 'buh',
+ 'description': 'md5:8f5f09b9b7bc67df910486cdd88f7165',
+ 'duration': 1369.6,
+ 'view_count': int,
+ }
+ }, {
+ 'url': 'https://www.stream.cz/kdo-to-mluvi/kdo-to-mluvi-velke-odhaleni-prinasi-novy-porad-uz-od-25-srpna-64087937',
+ 'md5': '41fd358000086a1ccdb068c77809b158',
+ 'info_dict': {
+ 'id': '64087937',
+ 'ext': 'mp4',
+ 'title': 'Kdo to mluví? Velké odhalení přináší nový pořad už od 25. srpna',
+ 'display_id': 'kdo-to-mluvi-velke-odhaleni-prinasi-novy-porad-uz-od-25-srpna',
+ 'description': 'md5:97a811000a6460266029d6c1c2ebcd59',
+ 'duration': 50.2,
+ 'view_count': int,
+ }
+ }, {
+ 'url': 'https://www.stream.cz/tajemno/znicehonic-jim-skrz-strechu-prolitnul-zahadny-predmet-badatele-vse-objasnili-64147267',
+ 'md5': '3ee4d0be040e8f4a543e67e509d55e3f',
+ 'info_dict': {
+ 'id': '64147267',
+ 'ext': 'mp4',
+ 'title': 'Zničehonic jim skrz střechu prolítnul záhadný předmět. Badatelé vše objasnili',
+ 'display_id': 'znicehonic-jim-skrz-strechu-prolitnul-zahadny-predmet-badatele-vse-objasnili',
+ 'description': 'md5:4b8ada6718d34bb011c4e04ca4bc19bf',
+ 'duration': 442.84,
+ 'view_count': int,
+ }
+ }]
+
+ def _extract_formats(self, spl_url, video):
+ for ext, pref, streams in (
+ ('ts', -1, traverse_obj(video, ('http_stream', 'qualities')) or {}),
+ ('mp4', 1, video.get('mp4') or {})):
+ for format_id, stream in streams.items():
+ if not stream.get('url'):
+ continue
+ yield {
+ 'format_id': f'{format_id}-{ext}',
+ 'ext': ext,
+ 'source_preference': pref,
+ 'url': urljoin(spl_url, stream['url']),
+ 'tbr': float_or_none(stream.get('bandwidth'), scale=1000),
+ 'duration': float_or_none(stream.get('duration'), scale=1000),
+ 'width': traverse_obj(stream, ('resolution', 0)),
+ 'height': traverse_obj(stream, ('resolution', 1)) or int_or_none(format_id.replace('p', '')),
+ **parse_codecs(stream.get('codec')),
+ }
+
+ def _real_extract(self, url):
+ display_id, video_id = self._match_valid_url(url).groups()
+
+ data = self._download_json(
+ 'https://www.televizeseznam.cz/api/graphql', video_id, 'Downloading GraphQL result',
+ data=json.dumps({
+ 'variables': {'urlName': video_id},
+ 'query': '''
+ query LoadEpisode($urlName : String){ episode(urlName: $urlName){ ...VideoDetailFragmentOnEpisode } }
+ fragment VideoDetailFragmentOnEpisode on Episode {
+ id
+ spl
+ urlName
+ name
+ perex
+ duration
+ views
+ }'''
+ }).encode('utf-8'),
+ headers={'Content-Type': 'application/json;charset=UTF-8'}
+ )['data']['episode']
+
+ spl_url = data['spl'] + 'spl2,3'
+ metadata = self._download_json(spl_url, video_id, 'Downloading playlist')
+ if 'Location' in metadata and 'data' not in metadata:
+ spl_url = metadata['Location']
+ metadata = self._download_json(spl_url, video_id, 'Downloading redirected playlist')
+ video = metadata['data']
+
+ subtitles = {}
+ for subs in video.get('subtitles', {}).values():
+ if not subs.get('language'):
+ continue
+ for ext, sub_url in subs.get('urls').items():
+ subtitles.setdefault(subs['language'], []).append({
+ 'ext': ext,
+ 'url': urljoin(spl_url, sub_url)
+ })
+
+ formats = list(self._extract_formats(spl_url, video))
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': data.get('name'),
+ 'description': data.get('perex'),
+ 'duration': float_or_none(data.get('duration')),
+ 'view_count': int_or_none(data.get('views')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
diff --git a/yt_dlp/extractor/streetvoice.py b/yt_dlp/extractor/streetvoice.py
new file mode 100644
index 0000000..a32c8bc
--- /dev/null
+++ b/yt_dlp/extractor/streetvoice.py
@@ -0,0 +1,97 @@
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+ str_or_none,
+ strip_or_none,
+ try_get,
+ urljoin,
+)
+
+
+class StreetVoiceIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:.+?\.)?streetvoice\.com/[^/]+/songs/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://streetvoice.com/skippylu/songs/123688/',
+ 'md5': '0eb535970629a5195685355f3ed60bfd',
+ 'info_dict': {
+ 'id': '123688',
+ 'ext': 'mp3',
+ 'title': '流浪',
+ 'description': 'md5:8eb0bfcc9dcd8aa82bd6efca66e3fea6',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 270,
+ 'upload_date': '20100923',
+ 'uploader': 'Crispy脆樂團',
+ 'uploader_id': '627810',
+ 'uploader_url': 're:^https?://streetvoice.com/skippylu/',
+ 'timestamp': 1285261661,
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
+ 'track': '流浪',
+ 'track_id': '123688',
+ 'album': '2010',
+ }
+ }, {
+ 'url': 'http://tw.streetvoice.com/skippylu/songs/94440/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ song_id = self._match_id(url)
+ base_url = 'https://streetvoice.com/api/v4/song/%s/' % song_id
+ song = self._download_json(base_url, song_id, query={
+ 'fields': 'album,comments_count,created_at,id,image,length,likes_count,name,nickname,plays_count,profile,share_count,synopsis,user,username',
+ })
+ title = song['name']
+
+ formats = []
+ for suffix, format_id in [('hls/file', 'hls'), ('file', 'http'), ('file/original', 'original')]:
+ f_url = (self._download_json(
+ base_url + suffix + '/', song_id,
+ 'Downloading %s format URL' % format_id,
+ data=b'', fatal=False) or {}).get('file')
+ if not f_url:
+ continue
+ f = {
+ 'ext': 'mp3',
+ 'format_id': format_id,
+ 'url': f_url,
+ 'vcodec': 'none',
+ }
+ if format_id == 'hls':
+ f['protocol'] = 'm3u8_native'
+ abr = self._search_regex(r'\.mp3\.(\d+)k', f_url, 'bitrate', default=None)
+ if abr:
+ abr = int(abr)
+ f.update({
+ 'abr': abr,
+ 'tbr': abr,
+ })
+ formats.append(f)
+
+ user = song.get('user') or {}
+ username = user.get('username')
+ get_count = lambda x: int_or_none(song.get(x + '_count'))
+
+ return {
+ 'id': song_id,
+ 'formats': formats,
+ 'title': title,
+ 'description': strip_or_none(song.get('synopsis')),
+ 'thumbnail': song.get('image'),
+ 'duration': int_or_none(song.get('length')),
+ 'timestamp': parse_iso8601(song.get('created_at')),
+ 'uploader': try_get(user, lambda x: x['profile']['nickname']),
+ 'uploader_id': str_or_none(user.get('id')),
+ 'uploader_url': urljoin(url, '/%s/' % username) if username else None,
+ 'view_count': get_count('plays'),
+ 'like_count': get_count('likes'),
+ 'comment_count': get_count('comments'),
+ 'repost_count': get_count('share'),
+ 'track': title,
+ 'track_id': song_id,
+ 'album': try_get(song, lambda x: x['album']['name']),
+ }
diff --git a/yt_dlp/extractor/stretchinternet.py b/yt_dlp/extractor/stretchinternet.py
new file mode 100644
index 0000000..e438dee
--- /dev/null
+++ b/yt_dlp/extractor/stretchinternet.py
@@ -0,0 +1,35 @@
+from .common import InfoExtractor
+
+
+class StretchInternetIE(InfoExtractor):
+ _VALID_URL = r'https?://portal\.stretchinternet\.com/[^/]+/(?:portal|full)\.htm\?.*?\beventId=(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://portal.stretchinternet.com/umary/portal.htm?eventId=573272&streamType=video',
+ 'info_dict': {
+ 'id': '573272',
+ 'ext': 'mp4',
+ 'title': 'UNIVERSITY OF MARY WRESTLING VS UPPER IOWA',
+ # 'timestamp': 1575668361,
+ # 'upload_date': '20191206',
+ 'uploader_id': '99997',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ media_url = self._download_json(
+ 'https://core.stretchlive.com/trinity/event/tcg/' + video_id,
+ video_id)[0]['media'][0]['url']
+ event = self._download_json(
+ 'https://neo-client.stretchinternet.com/portal-ws/getEvent.json',
+ video_id, query={'eventID': video_id, 'token': 'asdf'})['event']
+
+ return {
+ 'id': video_id,
+ 'title': event['title'],
+ # TODO: parse US timezone abbreviations
+ # 'timestamp': event.get('dateTimeString'),
+ 'url': 'https://' + media_url,
+ 'uploader_id': event.get('ownerID'),
+ }
diff --git a/yt_dlp/extractor/stripchat.py b/yt_dlp/extractor/stripchat.py
new file mode 100644
index 0000000..b9523c8
--- /dev/null
+++ b/yt_dlp/extractor/stripchat.py
@@ -0,0 +1,66 @@
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ UserNotLive,
+ lowercase_escape,
+ traverse_obj
+)
+
+
+class StripchatIE(InfoExtractor):
+ _VALID_URL = r'https?://stripchat\.com/(?P<id>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'https://stripchat.com/Joselin_Flower',
+ 'info_dict': {
+ 'id': 'Joselin_Flower',
+ 'ext': 'mp4',
+ 'title': 're:^Joselin_Flower [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'description': str,
+ 'is_live': True,
+ 'age_limit': 18,
+ },
+ 'skip': 'Room is offline',
+ }, {
+ 'url': 'https://stripchat.com/Rakhijaan@xh',
+ 'only_matching': True
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id, headers=self.geo_verification_headers())
+
+ data = self._parse_json(
+ self._search_regex(
+ r'<script\b[^>]*>\s*window\.__PRELOADED_STATE__\s*=(?P<value>.*?)<\/script>',
+ webpage, 'data', default='{}', group='value'),
+ video_id, transform_source=lowercase_escape, fatal=False)
+ if not data:
+ raise ExtractorError('Unable to find configuration for stream.')
+
+ if traverse_obj(data, ('viewCam', 'show'), expected_type=dict):
+ raise ExtractorError('Model is in private show', expected=True)
+ elif not traverse_obj(data, ('viewCam', 'model', 'isLive'), expected_type=bool):
+ raise UserNotLive(video_id=video_id)
+
+ model_id = traverse_obj(data, ('viewCam', 'model', 'id'), expected_type=int)
+
+ formats = []
+ for host in traverse_obj(data, ('config', 'data', (
+ (('features', 'featuresV2'), 'hlsFallback', 'fallbackDomains', ...), 'hlsStreamHost'))):
+ formats = self._extract_m3u8_formats(
+ f'https://edge-hls.{host}/hls/{model_id}/master/{model_id}_auto.m3u8',
+ video_id, ext='mp4', m3u8_id='hls', fatal=False, live=True)
+ if formats:
+ break
+ if not formats:
+ self.raise_no_formats('No active streams found', expected=True)
+
+ return {
+ 'id': video_id,
+ 'title': video_id,
+ 'description': self._og_search_description(webpage),
+ 'is_live': True,
+ 'formats': formats,
+ # Stripchat declares the RTA meta-tag, but in an non-standard format so _rta_search() can't be used
+ 'age_limit': 18,
+ }
diff --git a/yt_dlp/extractor/stv.py b/yt_dlp/extractor/stv.py
new file mode 100644
index 0000000..8b3e635
--- /dev/null
+++ b/yt_dlp/extractor/stv.py
@@ -0,0 +1,89 @@
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ float_or_none,
+ int_or_none,
+ smuggle_url,
+ str_or_none,
+ try_get,
+)
+
+
+class STVPlayerIE(InfoExtractor):
+ IE_NAME = 'stv:player'
+ _VALID_URL = r'https?://player\.stv\.tv/(?P<type>episode|video)/(?P<id>[a-z0-9]{4})'
+ _TESTS = [{
+ # shortform
+ 'url': 'https://player.stv.tv/video/4gwd/emmerdale/60-seconds-on-set-with-laura-norton/',
+ 'md5': '5adf9439c31d554f8be0707c7abe7e0a',
+ 'info_dict': {
+ 'id': '5333973339001',
+ 'ext': 'mp4',
+ 'upload_date': '20170301',
+ 'title': '60 seconds on set with Laura Norton',
+ 'description': "How many questions can Laura - a.k.a Kerry Wyatt - answer in 60 seconds? Let\'s find out!",
+ 'timestamp': 1488388054,
+ 'uploader_id': '1486976045',
+ },
+ 'skip': 'this resource is unavailable outside of the UK',
+ }, {
+ # episodes
+ 'url': 'https://player.stv.tv/episode/4125/jennifer-saunders-memory-lane',
+ 'only_matching': True,
+ }]
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1486976045/default_default/index.html?videoId=%s'
+ _PTYPE_MAP = {
+ 'episode': 'episodes',
+ 'video': 'shortform',
+ }
+
+ def _real_extract(self, url):
+ ptype, video_id = self._match_valid_url(url).groups()
+
+ webpage = self._download_webpage(url, video_id, fatal=False) or ''
+ props = self._search_nextjs_data(webpage, video_id, default='{}').get('props') or {}
+ player_api_cache = try_get(
+ props, lambda x: x['initialReduxState']['playerApiCache']) or {}
+
+ api_path, resp = None, {}
+ for k, v in player_api_cache.items():
+ if k.startswith('/episodes/') or k.startswith('/shortform/'):
+ api_path, resp = k, v
+ break
+ else:
+ episode_id = str_or_none(try_get(
+ props, lambda x: x['pageProps']['episodeId']))
+ api_path = '/%s/%s' % (self._PTYPE_MAP[ptype], episode_id or video_id)
+
+ result = resp.get('results')
+ if not result:
+ resp = self._download_json(
+ 'https://player.api.stv.tv/v1' + api_path, video_id)
+ result = resp['results']
+
+ video = result['video']
+ video_id = compat_str(video['id'])
+
+ subtitles = {}
+ _subtitles = result.get('_subtitles') or {}
+ for ext, sub_url in _subtitles.items():
+ subtitles.setdefault('en', []).append({
+ 'ext': 'vtt' if ext == 'webvtt' else ext,
+ 'url': sub_url,
+ })
+
+ programme = result.get('programme') or {}
+ if programme.get('drmEnabled'):
+ self.report_drm(video_id)
+
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'url': smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {'geo_countries': ['GB']}),
+ 'description': result.get('summary'),
+ 'duration': float_or_none(video.get('length'), 1000),
+ 'subtitles': subtitles,
+ 'view_count': int_or_none(result.get('views')),
+ 'series': programme.get('name') or programme.get('shortName'),
+ 'ie_key': 'BrightcoveNew',
+ }
diff --git a/yt_dlp/extractor/substack.py b/yt_dlp/extractor/substack.py
new file mode 100644
index 0000000..6ee3f75
--- /dev/null
+++ b/yt_dlp/extractor/substack.py
@@ -0,0 +1,108 @@
+import re
+import urllib.parse
+
+from .common import InfoExtractor
+from ..utils import js_to_json, str_or_none, traverse_obj
+
+
+class SubstackIE(InfoExtractor):
+ _VALID_URL = r'https?://(?P<username>[\w-]+)\.substack\.com/p/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://haleynahman.substack.com/p/i-made-a-vlog?s=r',
+ 'md5': 'f27e4fc6252001d48d479f45e65cdfd5',
+ 'info_dict': {
+ 'id': '47660949',
+ 'ext': 'mp4',
+ 'title': 'I MADE A VLOG',
+ 'description': 'md5:9248af9a759321e1027226f988f54d96',
+ 'thumbnail': 'md5:bec758a34d8ee9142d43bcebdf33af18',
+ 'uploader': 'Maybe Baby',
+ 'uploader_id': '33628',
+ }
+ }, {
+ 'url': 'https://haleynahman.substack.com/p/-dear-danny-i-found-my-boyfriends?s=r',
+ 'md5': '0a63eacec877a1171a62cfa69710fcea',
+ 'info_dict': {
+ 'id': '51045592',
+ 'ext': 'mpga',
+ 'title': "🎧 Dear Danny: I found my boyfriend's secret Twitter account",
+ 'description': 'md5:a57f2439319e56e0af92dd0c95d75797',
+ 'thumbnail': 'md5:daa40b6b79249417c14ff8103db29639',
+ 'uploader': 'Maybe Baby',
+ 'uploader_id': '33628',
+ }
+ }, {
+ 'url': 'https://andrewzimmern.substack.com/p/mussels-with-black-bean-sauce-recipe',
+ 'md5': 'fd3c07077b02444ff0130715b5f632bb',
+ 'info_dict': {
+ 'id': '47368578',
+ 'ext': 'mp4',
+ 'title': 'Mussels with Black Bean Sauce: Recipe of the Week #7',
+ 'description': 'md5:b96234a2906c7d854d5229818d889515',
+ 'thumbnail': 'md5:e30bfaa9da40e82aa62354263a9dd232',
+ 'uploader': "Andrew Zimmern's Spilled Milk ",
+ 'uploader_id': '577659',
+ }
+ }]
+
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ if not re.search(r'<script[^>]+src=["\']https://substackcdn.com/[^"\']+\.js', webpage):
+ return
+
+ mobj = re.search(r'{[^}]*\\?["\']subdomain\\?["\']\s*:\s*\\?["\'](?P<subdomain>[^\\"\']+)', webpage)
+ if mobj:
+ parsed = urllib.parse.urlparse(url)
+ yield parsed._replace(netloc=f'{mobj.group("subdomain")}.substack.com').geturl()
+ raise cls.StopExtraction()
+
+ def _extract_video_formats(self, video_id, url):
+ formats, subtitles = [], {}
+ for video_format in ('hls', 'mp4'):
+ video_url = urllib.parse.urljoin(url, f'/api/v1/video/upload/{video_id}/src?type={video_format}')
+
+ if video_format == 'hls':
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(video_url, video_id, 'mp4', fatal=False)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+ else:
+ formats.append({
+ 'url': video_url,
+ 'ext': video_format,
+ })
+
+ return formats, subtitles
+
+ def _real_extract(self, url):
+ display_id, username = self._match_valid_url(url).group('id', 'username')
+ webpage = self._download_webpage(url, display_id)
+
+ webpage_info = self._parse_json(self._search_json(
+ r'window\._preloads\s*=\s*JSON\.parse\(', webpage, 'json string',
+ display_id, transform_source=js_to_json, contains_pattern=r'"{(?s:.+)}"'), display_id)
+
+ canonical_url = url
+ domain = traverse_obj(webpage_info, ('domainInfo', 'customDomain', {str}))
+ if domain:
+ canonical_url = urllib.parse.urlparse(url)._replace(netloc=domain).geturl()
+
+ post_type = webpage_info['post']['type']
+ formats, subtitles = [], {}
+ if post_type == 'podcast':
+ formats, subtitles = [{'url': webpage_info['post']['podcast_url']}], {}
+ elif post_type == 'video':
+ formats, subtitles = self._extract_video_formats(webpage_info['post']['videoUpload']['id'], canonical_url)
+ else:
+ self.raise_no_formats(f'Page type "{post_type}" is not supported')
+
+ return {
+ 'id': str(webpage_info['post']['id']),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'title': traverse_obj(webpage_info, ('post', 'title')),
+ 'description': traverse_obj(webpage_info, ('post', 'description')),
+ 'thumbnail': traverse_obj(webpage_info, ('post', 'cover_image')),
+ 'uploader': traverse_obj(webpage_info, ('pub', 'name')),
+ 'uploader_id': str_or_none(traverse_obj(webpage_info, ('post', 'publication_id'))),
+ 'webpage_url': canonical_url,
+ }
diff --git a/yt_dlp/extractor/sunporno.py b/yt_dlp/extractor/sunporno.py
new file mode 100644
index 0000000..708873a
--- /dev/null
+++ b/yt_dlp/extractor/sunporno.py
@@ -0,0 +1,75 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ parse_duration,
+ int_or_none,
+ qualities,
+ determine_ext,
+)
+
+
+class SunPornoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:www\.)?sunporno\.com/videos|embeds\.sunporno\.com/embed)/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://www.sunporno.com/videos/807778/',
+ 'md5': '507887e29033502f29dba69affeebfc9',
+ 'info_dict': {
+ 'id': '807778',
+ 'ext': 'mp4',
+ 'title': 'md5:0a400058e8105d39e35c35e7c5184164',
+ 'description': 'md5:a31241990e1bd3a64e72ae99afb325fb',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 302,
+ 'age_limit': 18,
+ }
+ }, {
+ 'url': 'http://embeds.sunporno.com/embed/807778',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'http://www.sunporno.com/videos/%s' % video_id, video_id)
+
+ title = self._html_extract_title(webpage)
+ description = self._html_search_meta(
+ 'description', webpage, 'description')
+ thumbnail = self._html_search_regex(
+ r'poster="([^"]+)"', webpage, 'thumbnail', fatal=False)
+
+ duration = parse_duration(self._search_regex(
+ (r'itemprop="duration"[^>]*>\s*(\d+:\d+)\s*<',
+ r'>Duration:\s*<span[^>]+>\s*(\d+:\d+)\s*<'),
+ webpage, 'duration', fatal=False))
+
+ view_count = int_or_none(self._html_search_regex(
+ r'class="views">(?:<noscript>)?\s*(\d+)\s*<',
+ webpage, 'view count', fatal=False))
+ comment_count = int_or_none(self._html_search_regex(
+ r'(\d+)</b> Comments?',
+ webpage, 'comment count', fatal=False, default=None))
+
+ formats = []
+ quality = qualities(['mp4', 'flv'])
+ for video_url in re.findall(r'<(?:source|video) src="([^"]+)"', webpage):
+ video_ext = determine_ext(video_url)
+ formats.append({
+ 'url': video_url,
+ 'format_id': video_ext,
+ 'quality': quality(video_ext),
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'comment_count': comment_count,
+ 'formats': formats,
+ 'age_limit': 18,
+ }
diff --git a/yt_dlp/extractor/sverigesradio.py b/yt_dlp/extractor/sverigesradio.py
new file mode 100644
index 0000000..01a07b3
--- /dev/null
+++ b/yt_dlp/extractor/sverigesradio.py
@@ -0,0 +1,149 @@
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ extract_attributes,
+ get_element_by_id,
+ get_element_html_by_class,
+ int_or_none,
+ str_or_none,
+ traverse_obj,
+ url_or_none,
+)
+
+
+class SverigesRadioBaseIE(InfoExtractor):
+ _BASE_URL = 'https://sverigesradio.se/sida/playerajax/'
+ _QUALITIES = ['low', 'medium', 'high']
+ _EXT_TO_CODEC_MAP = {
+ 'mp3': 'mp3',
+ 'm4a': 'aac',
+ }
+ _CODING_FORMAT_TO_ABR_MAP = {
+ 5: 128,
+ 11: 192,
+ 12: 32,
+ 13: 96,
+ }
+
+ def _real_extract(self, url):
+ audio_id, display_id = self._match_valid_url(url).group('id', 'slug')
+ if not audio_id:
+ webpage = self._download_webpage(url, display_id)
+ audio_id = (
+ traverse_obj(
+ get_element_html_by_class('audio-button', webpage),
+ ({extract_attributes}, ('data-audio-id', 'data-publication-id')), get_all=False)
+ or self._parse_json(get_element_by_id('gtm-metadata', webpage), display_id)['pageId'])
+
+ query = {
+ 'id': audio_id,
+ 'type': self._AUDIO_TYPE,
+ }
+
+ item = self._download_json(
+ self._BASE_URL + 'audiometadata', audio_id,
+ 'Downloading audio JSON metadata', query=query)['items'][0]
+
+ query['format'] = 'iis'
+ urls = []
+ formats = []
+ for quality in self._QUALITIES:
+ query['quality'] = quality
+ audio_url_data = self._download_json(
+ self._BASE_URL + 'getaudiourl', audio_id,
+ 'Downloading %s format JSON metadata' % quality,
+ fatal=False, query=query) or {}
+ audio_url = audio_url_data.get('audioUrl')
+ if not audio_url or audio_url in urls:
+ continue
+ urls.append(audio_url)
+ ext = determine_ext(audio_url)
+ coding_format = audio_url_data.get('codingFormat')
+ abr = int_or_none(self._search_regex(
+ r'_a(\d+)\.m4a', audio_url, 'audio bitrate',
+ default=None)) or self._CODING_FORMAT_TO_ABR_MAP.get(coding_format)
+ formats.append({
+ 'abr': abr,
+ 'acodec': self._EXT_TO_CODEC_MAP.get(ext),
+ 'ext': ext,
+ 'format_id': str_or_none(coding_format),
+ 'vcodec': 'none',
+ 'url': audio_url,
+ })
+
+ return {
+ 'id': audio_id,
+ 'formats': formats,
+ **traverse_obj(item, {
+ 'title': 'subtitle',
+ 'series': 'title',
+ 'duration': ('duration', {int_or_none}),
+ 'thumbnail': ('displayimageurl', {url_or_none}),
+ 'description': 'description',
+ }),
+ }
+
+
+class SverigesRadioPublicationIE(SverigesRadioBaseIE):
+ IE_NAME = 'sverigesradio:publication'
+ _VALID_URL = r'https?://(?:www\.)?sverigesradio\.se/(?:sida/)?(?:artikel|gruppsida)(?:\.aspx\?.*?\bartikel=(?P<id>[0-9]+)|/(?P<slug>[\w-]+))'
+ _TESTS = [{
+ 'url': 'https://sverigesradio.se/sida/artikel.aspx?programid=83&artikel=7038546',
+ 'md5': '6a4917e1923fccb080e5a206a5afa542',
+ 'info_dict': {
+ 'id': '7038546',
+ 'ext': 'm4a',
+ 'duration': 132,
+ 'series': 'Nyheter (Ekot)',
+ 'title': 'Esa Teittinen: Sanningen har inte kommit fram',
+ 'description': 'md5:daf7ce66a8f0a53d5465a5984d3839df',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ },
+ }, {
+ 'url': 'https://sverigesradio.se/artikel/tysk-fotbollsfeber-bayern-munchens-10-ariga-segersvit-kan-brytas',
+ 'md5': 'f8a914ad50f491bb74eed403ab4bfef6',
+ 'info_dict': {
+ 'id': '8360345',
+ 'ext': 'm4a',
+ 'title': 'Tysk fotbollsfeber när Bayern Münchens 10-åriga segersvit kan brytas',
+ 'series': 'Radiosporten',
+ 'description': 'md5:5254610e20ce527ecb3a6102a06dcc5f',
+ 'duration': 72,
+ 'thumbnail': r're:^https?://.*\.jpg',
+ },
+ }, {
+ 'url': 'https://sverigesradio.se/sida/gruppsida.aspx?programid=3304&grupp=6247&artikel=7146887',
+ 'only_matching': True,
+ }]
+ _AUDIO_TYPE = 'publication'
+
+
+class SverigesRadioEpisodeIE(SverigesRadioBaseIE):
+ IE_NAME = 'sverigesradio:episode'
+ _VALID_URL = r'https?://(?:www\.)?sverigesradio\.se/(?:sida/)?avsnitt/(?:(?P<id>\d+)|(?P<slug>[\w-]+))(?:$|[#?])'
+ _TESTS = [{
+ 'url': 'https://sverigesradio.se/avsnitt/1140922?programid=1300',
+ 'md5': '20dc4d8db24228f846be390b0c59a07c',
+ 'info_dict': {
+ 'id': '1140922',
+ 'ext': 'mp3',
+ 'duration': 3307,
+ 'series': 'Konflikt',
+ 'title': 'Metoo och valen',
+ 'description': 'md5:fcb5c1f667f00badcc702b196f10a27e',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ },
+ }, {
+ 'url': 'https://sverigesradio.se/avsnitt/p4-live-med-first-aid-kit-scandinavium-mars-2023',
+ 'md5': 'ce17fb82520a8033dbb846993d5589fe',
+ 'info_dict': {
+ 'id': '2160416',
+ 'ext': 'm4a',
+ 'title': 'P4 Live med First Aid Kit',
+ 'description': 'md5:6d5b78eed3d2b65f6de04daa45e9285d',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'series': 'P4 Live',
+ 'duration': 5640,
+ },
+ }]
+ _AUDIO_TYPE = 'episode'
diff --git a/yt_dlp/extractor/svt.py b/yt_dlp/extractor/svt.py
new file mode 100644
index 0000000..573147a
--- /dev/null
+++ b/yt_dlp/extractor/svt.py
@@ -0,0 +1,489 @@
+import json
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ determine_ext,
+ dict_get,
+ int_or_none,
+ traverse_obj,
+ try_get,
+ unified_timestamp,
+)
+
+
+class SVTBaseIE(InfoExtractor):
+ _GEO_COUNTRIES = ['SE']
+
+ def _extract_video(self, video_info, video_id):
+ is_live = dict_get(video_info, ('live', 'simulcast'), default=False)
+ m3u8_protocol = 'm3u8' if is_live else 'm3u8_native'
+ formats = []
+ subtitles = {}
+ for vr in video_info['videoReferences']:
+ player_type = vr.get('playerType') or vr.get('format')
+ vurl = vr['url']
+ ext = determine_ext(vurl)
+ if ext == 'm3u8':
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ vurl, video_id,
+ ext='mp4', entry_protocol=m3u8_protocol,
+ m3u8_id=player_type, fatal=False)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+ elif ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ vurl + '?hdcore=3.3.0', video_id,
+ f4m_id=player_type, fatal=False))
+ elif ext == 'mpd':
+ fmts, subs = self._extract_mpd_formats_and_subtitles(
+ vurl, video_id, mpd_id=player_type, fatal=False)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+ else:
+ formats.append({
+ 'format_id': player_type,
+ 'url': vurl,
+ })
+ rights = try_get(video_info, lambda x: x['rights'], dict) or {}
+ if not formats and rights.get('geoBlockedSweden'):
+ self.raise_geo_restricted(
+ 'This video is only available in Sweden',
+ countries=self._GEO_COUNTRIES, metadata_available=True)
+
+ subtitle_references = dict_get(video_info, ('subtitles', 'subtitleReferences'))
+ if isinstance(subtitle_references, list):
+ for sr in subtitle_references:
+ subtitle_url = sr.get('url')
+ subtitle_lang = sr.get('language', 'sv')
+ if subtitle_url:
+ sub = {
+ 'url': subtitle_url,
+ }
+ if determine_ext(subtitle_url) == 'm3u8':
+ # XXX: no way of testing, is it ever hit?
+ sub['ext'] = 'vtt'
+ subtitles.setdefault(subtitle_lang, []).append(sub)
+
+ title = video_info.get('title')
+
+ series = video_info.get('programTitle')
+ season_number = int_or_none(video_info.get('season'))
+ episode = video_info.get('episodeTitle')
+ episode_number = int_or_none(video_info.get('episodeNumber'))
+
+ timestamp = unified_timestamp(rights.get('validFrom'))
+ duration = int_or_none(dict_get(video_info, ('materialLength', 'contentDuration')))
+ age_limit = None
+ adult = dict_get(
+ video_info, ('inappropriateForChildren', 'blockedForChildren'),
+ skip_false_values=False)
+ if adult is not None:
+ age_limit = 18 if adult else 0
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'age_limit': age_limit,
+ 'series': series,
+ 'season_number': season_number,
+ 'episode': episode,
+ 'episode_number': episode_number,
+ 'is_live': is_live,
+ }
+
+
+class SVTIE(SVTBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?svt\.se/wd\?(?:.*?&)?widgetId=(?P<widget_id>\d+)&.*?\barticleId=(?P<id>\d+)'
+ _EMBED_REGEX = [r'(?:<iframe src|href)="(?P<url>%s[^"]*)"' % _VALID_URL]
+ _TEST = {
+ 'url': 'http://www.svt.se/wd?widgetId=23991&sectionId=541&articleId=2900353&type=embed&contextSectionId=123&autostart=false',
+ 'md5': '33e9a5d8f646523ce0868ecfb0eed77d',
+ 'info_dict': {
+ 'id': '2900353',
+ 'ext': 'mp4',
+ 'title': 'Stjärnorna skojar till det - under SVT-intervjun',
+ 'duration': 27,
+ 'age_limit': 0,
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ widget_id = mobj.group('widget_id')
+ article_id = mobj.group('id')
+
+ info = self._download_json(
+ 'http://www.svt.se/wd?widgetId=%s&articleId=%s&format=json&type=embed&output=json' % (widget_id, article_id),
+ article_id)
+
+ info_dict = self._extract_video(info['video'], article_id)
+ info_dict['title'] = info['context']['title']
+ return info_dict
+
+
+class SVTPlayBaseIE(SVTBaseIE):
+ _SVTPLAY_RE = r'root\s*\[\s*(["\'])_*svtplay\1\s*\]\s*=\s*(?P<json>{.+?})\s*;\s*\n'
+
+
+class SVTPlayIE(SVTPlayBaseIE):
+ IE_DESC = 'SVT Play and Öppet arkiv'
+ _VALID_URL = r'''(?x)
+ (?:
+ (?:
+ svt:|
+ https?://(?:www\.)?svt\.se/barnkanalen/barnplay/[^/]+/
+ )
+ (?P<svt_id>[^/?#&]+)|
+ https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp|kanaler)/(?P<id>[^/?#&]+)
+ (?:.*?(?:modalId|id)=(?P<modal_id>[\da-zA-Z-]+))?
+ )
+ '''
+ _TESTS = [{
+ 'url': 'https://www.svtplay.se/video/30479064',
+ 'md5': '2382036fd6f8c994856c323fe51c426e',
+ 'info_dict': {
+ 'id': '8zVbDPA',
+ 'ext': 'mp4',
+ 'title': 'Designdrömmar i Stenungsund',
+ 'timestamp': 1615770000,
+ 'upload_date': '20210315',
+ 'duration': 3519,
+ 'thumbnail': r're:^https?://(?:.*[\.-]jpg|www.svtstatic.se/image/.*)$',
+ 'age_limit': 0,
+ 'subtitles': {
+ 'sv': [{
+ 'ext': 'vtt',
+ }]
+ },
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ 'skip': 'Episode is no longer available',
+ }, {
+ 'url': 'https://www.svtplay.se/video/emBxBQj',
+ 'md5': '2382036fd6f8c994856c323fe51c426e',
+ 'info_dict': {
+ 'id': 'eyBd9aj',
+ 'ext': 'mp4',
+ 'title': '1. Farlig kryssning',
+ 'timestamp': 1491019200,
+ 'upload_date': '20170401',
+ 'duration': 2566,
+ 'thumbnail': r're:^https?://(?:.*[\.-]jpg|www.svtstatic.se/image/.*)$',
+ 'age_limit': 0,
+ 'episode': '1. Farlig kryssning',
+ 'series': 'Rederiet',
+ 'subtitles': {
+ 'sv': 'count:3'
+ },
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ }, {
+ 'url': 'https://www.svtplay.se/video/jz2rYz7/anders-hansen-moter/james-fallon?info=visa',
+ 'info_dict': {
+ 'id': 'jvXAGVb',
+ 'ext': 'mp4',
+ 'title': 'James Fallon',
+ 'timestamp': 1673917200,
+ 'upload_date': '20230117',
+ 'duration': 1081,
+ 'thumbnail': r're:^https?://(?:.*[\.-]jpg|www.svtstatic.se/image/.*)$',
+ 'age_limit': 0,
+ 'episode': 'James Fallon',
+ 'series': 'Anders Hansen möter...',
+ },
+ 'params': {
+ 'skip_download': 'dash',
+ },
+ }, {
+ 'url': 'https://www.svtplay.se/video/30479064/husdrommar/husdrommar-sasong-8-designdrommar-i-stenungsund?modalId=8zVbDPA',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.svtplay.se/video/30684086/rapport/rapport-24-apr-18-00-7?id=e72gVpa',
+ 'only_matching': True,
+ }, {
+ # geo restricted to Sweden
+ 'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.svtplay.se/kanaler/svt1',
+ 'only_matching': True,
+ }, {
+ 'url': 'svt:1376446-003A',
+ 'only_matching': True,
+ }, {
+ 'url': 'svt:14278044',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.svt.se/barnkanalen/barnplay/kar/eWv5MLX/',
+ 'only_matching': True,
+ }, {
+ 'url': 'svt:eWv5MLX',
+ 'only_matching': True,
+ }]
+
+ def _extract_by_video_id(self, video_id, webpage=None):
+ data = self._download_json(
+ 'https://api.svt.se/videoplayer-api/video/%s' % video_id,
+ video_id, headers=self.geo_verification_headers())
+ info_dict = self._extract_video(data, video_id)
+ if not info_dict.get('title'):
+ title = dict_get(info_dict, ('episode', 'series'))
+ if not title and webpage:
+ title = re.sub(
+ r'\s*\|\s*.+?$', '', self._og_search_title(webpage))
+ if not title:
+ title = video_id
+ info_dict['title'] = title
+ return info_dict
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id')
+ svt_id = mobj.group('svt_id') or mobj.group('modal_id')
+
+ if svt_id:
+ return self._extract_by_video_id(svt_id)
+
+ webpage = self._download_webpage(url, video_id)
+
+ data = self._parse_json(
+ self._search_regex(
+ self._SVTPLAY_RE, webpage, 'embedded data', default='{}',
+ group='json'),
+ video_id, fatal=False)
+
+ thumbnail = self._og_search_thumbnail(webpage)
+
+ if data:
+ video_info = try_get(
+ data, lambda x: x['context']['dispatcher']['stores']['VideoTitlePageStore']['data']['video'],
+ dict)
+ if video_info:
+ info_dict = self._extract_video(video_info, video_id)
+ info_dict.update({
+ 'title': data['context']['dispatcher']['stores']['MetaStore']['title'],
+ 'thumbnail': thumbnail,
+ })
+ return info_dict
+
+ svt_id = try_get(
+ data, lambda x: x['statistics']['dataLake']['content']['id'],
+ compat_str)
+
+ if not svt_id:
+ nextjs_data = self._search_nextjs_data(webpage, video_id, fatal=False)
+ svt_id = traverse_obj(nextjs_data, (
+ 'props', 'urqlState', ..., 'data', {json.loads}, 'detailsPageByPath',
+ 'video', 'svtId', {str}), get_all=False)
+
+ if not svt_id:
+ svt_id = self._search_regex(
+ (r'<video[^>]+data-video-id=["\']([\da-zA-Z-]+)',
+ r'<[^>]+\bdata-rt=["\']top-area-play-button["\'][^>]+\bhref=["\'][^"\']*video/[\w-]+/[^"\']*\b(?:modalId|id)=([\w-]+)'),
+ webpage, 'video id')
+
+ info_dict = self._extract_by_video_id(svt_id, webpage)
+ info_dict['thumbnail'] = thumbnail
+
+ return info_dict
+
+
+class SVTSeriesIE(SVTPlayBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?svtplay\.se/(?P<id>[^/?&#]+)(?:.+?\btab=(?P<season_slug>[^&#]+))?'
+ _TESTS = [{
+ 'url': 'https://www.svtplay.se/rederiet',
+ 'info_dict': {
+ 'id': '14445680',
+ 'title': 'Rederiet',
+ 'description': 'md5:d9fdfff17f5d8f73468176ecd2836039',
+ },
+ 'playlist_mincount': 318,
+ }, {
+ 'url': 'https://www.svtplay.se/rederiet?tab=season-2-14445680',
+ 'info_dict': {
+ 'id': 'season-2-14445680',
+ 'title': 'Rederiet - Säsong 2',
+ 'description': 'md5:d9fdfff17f5d8f73468176ecd2836039',
+ },
+ 'playlist_mincount': 12,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTSeriesIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ series_slug, season_id = self._match_valid_url(url).groups()
+
+ series = self._download_json(
+ 'https://api.svt.se/contento/graphql', series_slug,
+ 'Downloading series page', query={
+ 'query': '''{
+ listablesBySlug(slugs: ["%s"]) {
+ associatedContent(include: [productionPeriod, season]) {
+ items {
+ item {
+ ... on Episode {
+ videoSvtId
+ }
+ }
+ }
+ id
+ name
+ }
+ id
+ longDescription
+ name
+ shortDescription
+ }
+}''' % series_slug,
+ })['data']['listablesBySlug'][0]
+
+ season_name = None
+
+ entries = []
+ for season in series['associatedContent']:
+ if not isinstance(season, dict):
+ continue
+ if season_id:
+ if season.get('id') != season_id:
+ continue
+ season_name = season.get('name')
+ items = season.get('items')
+ if not isinstance(items, list):
+ continue
+ for item in items:
+ video = item.get('item') or {}
+ content_id = video.get('videoSvtId')
+ if not content_id or not isinstance(content_id, compat_str):
+ continue
+ entries.append(self.url_result(
+ 'svt:' + content_id, SVTPlayIE.ie_key(), content_id))
+
+ title = series.get('name')
+ season_name = season_name or season_id
+
+ if title and season_name:
+ title = '%s - %s' % (title, season_name)
+ elif season_id:
+ title = season_id
+
+ return self.playlist_result(
+ entries, season_id or series.get('id'), title,
+ dict_get(series, ('longDescription', 'shortDescription')))
+
+
+class SVTPageIE(SVTBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?svt\.se/(?:[^/?#]+/)*(?P<id>[^/?&#]+)'
+ _TESTS = [{
+ 'url': 'https://www.svt.se/nyheter/lokalt/skane/viktor-18-forlorade-armar-och-ben-i-sepsis-vill-ateruppta-karaten-och-bli-svetsare',
+ 'info_dict': {
+ 'title': 'Viktor, 18, förlorade armar och ben i sepsis – vill återuppta karaten och bli svetsare',
+ 'id': 'viktor-18-forlorade-armar-och-ben-i-sepsis-vill-ateruppta-karaten-och-bli-svetsare',
+ },
+ 'playlist_count': 2,
+ }, {
+ 'url': 'https://www.svt.se/nyheter/lokalt/skane/forsvarsmakten-om-trafikkaoset-pa-e22-kunde-inte-varit-dar-snabbare',
+ 'info_dict': {
+ 'id': 'jXvk42E',
+ 'title': 'Försvarsmakten om trafikkaoset på E22: Kunde inte varit där snabbare',
+ 'ext': 'mp4',
+ "duration": 80,
+ 'age_limit': 0,
+ 'timestamp': 1704370009,
+ 'episode': 'Försvarsmakten om trafikkaoset på E22: Kunde inte varit där snabbare',
+ 'series': 'Lokala Nyheter Skåne',
+ 'upload_date': '20240104'
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'https://www.svt.se/nyheter/svtforum/2023-tungt-ar-for-svensk-media',
+ 'info_dict': {
+ 'title': '2023 tungt år för svensk media',
+ 'id': 'ewqAZv4',
+ 'ext': 'mp4',
+ "duration": 3074,
+ 'age_limit': 0,
+ 'series': '',
+ 'timestamp': 1702980479,
+ 'upload_date': '20231219',
+ 'episode': 'Mediestudier'
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'https://www.svt.se/sport/ishockey/bakom-masken-lehners-kamp-mot-mental-ohalsa',
+ 'info_dict': {
+ 'id': '25298267',
+ 'title': 'Bakom masken – Lehners kamp mot mental ohälsa',
+ },
+ 'playlist_count': 4,
+ 'skip': 'Video is gone'
+ }, {
+ 'url': 'https://www.svt.se/nyheter/utrikes/svenska-andrea-ar-en-mil-fran-branderna-i-kalifornien',
+ 'info_dict': {
+ 'id': '24243746',
+ 'title': 'Svenska Andrea redo att fly sitt hem i Kalifornien',
+ },
+ 'playlist_count': 2,
+ 'skip': 'Video is gone'
+ }, {
+ # only programTitle
+ 'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
+ 'info_dict': {
+ 'id': '8439V2K',
+ 'ext': 'mp4',
+ 'title': 'Stjärnorna skojar till det - under SVT-intervjun',
+ 'duration': 27,
+ 'age_limit': 0,
+ },
+ 'skip': 'Video is gone'
+ }, {
+ 'url': 'https://www.svt.se/nyheter/lokalt/vast/svt-testar-tar-nagon-upp-skrapet-1',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.svt.se/vader/manadskronikor/maj2018',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTPageIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+ title = self._og_search_title(webpage)
+
+ urql_state = self._search_json(
+ r'window\.svt\.nyh\.urqlState\s*=', webpage, 'json data', display_id)
+
+ data = traverse_obj(urql_state, (..., 'data', {str}, {json.loads}), get_all=False) or {}
+
+ def entries():
+ for video_id in set(traverse_obj(data, (
+ 'page', (('topMedia', 'svtId'), ('body', ..., 'video', 'svtId')), {str}
+ ))):
+ info = self._extract_video(
+ self._download_json(f'https://api.svt.se/video/{video_id}', video_id), video_id)
+ info['title'] = title
+ yield info
+
+ return self.playlist_result(entries(), display_id, title)
diff --git a/yt_dlp/extractor/swearnet.py b/yt_dlp/extractor/swearnet.py
new file mode 100644
index 0000000..aeaff28
--- /dev/null
+++ b/yt_dlp/extractor/swearnet.py
@@ -0,0 +1,79 @@
+from .common import InfoExtractor
+from ..utils import ExtractorError, int_or_none, traverse_obj
+
+
+class SwearnetEpisodeIE(InfoExtractor):
+ _VALID_URL = r'https?://www\.swearnet\.com/shows/(?P<id>[\w-]+)/seasons/(?P<season_num>\d+)/episodes/(?P<episode_num>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.swearnet.com/shows/gettin-learnt-with-ricky/seasons/1/episodes/1',
+ 'info_dict': {
+ 'id': '232819',
+ 'ext': 'mp4',
+ 'episode_number': 1,
+ 'episode': 'Episode 1',
+ 'duration': 719,
+ 'description': 'md5:c48ef71440ce466284c07085cd7bd761',
+ 'season': 'Season 1',
+ 'title': 'Episode 1 - Grilled Cheese Sammich',
+ 'season_number': 1,
+ 'thumbnail': 'https://cdn.vidyard.com/thumbnails/232819/_RX04IKIq60a2V6rIRqq_Q_small.jpg',
+ }
+ }]
+
+ def _get_formats_and_subtitle(self, video_source, video_id):
+ video_source = video_source or {}
+ formats, subtitles = [], {}
+ for key, value in video_source.items():
+ if key == 'hls':
+ for video_hls in value:
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(video_hls.get('url'), video_id)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+ else:
+ formats.extend({
+ 'url': video_mp4.get('url'),
+ 'ext': 'mp4'
+ } for video_mp4 in value)
+
+ return formats, subtitles
+
+ def _get_direct_subtitle(self, caption_json):
+ subs = {}
+ for caption in caption_json:
+ subs.setdefault(caption.get('language') or 'und', []).append({
+ 'url': caption.get('vttUrl'),
+ 'name': caption.get('name')
+ })
+
+ return subs
+
+ def _real_extract(self, url):
+ display_id, season_number, episode_number = self._match_valid_url(url).group('id', 'season_num', 'episode_num')
+ webpage = self._download_webpage(url, display_id)
+
+ try:
+ external_id = self._search_regex(r'externalid\s*=\s*"([^"]+)', webpage, 'externalid')
+ except ExtractorError:
+ if 'Upgrade Now' in webpage:
+ self.raise_login_required()
+ raise
+
+ json_data = self._download_json(
+ f'https://play.vidyard.com/player/{external_id}.json', display_id)['payload']['chapters'][0]
+
+ formats, subtitles = self._get_formats_and_subtitle(json_data['sources'], display_id)
+ self._merge_subtitles(self._get_direct_subtitle(json_data.get('captions')), target=subtitles)
+
+ return {
+ 'id': str(json_data['videoId']),
+ 'title': json_data.get('name') or self._html_search_meta(['og:title', 'twitter:title'], webpage),
+ 'description': (json_data.get('description')
+ or self._html_search_meta(['og:description', 'twitter:description'], webpage)),
+ 'duration': int_or_none(json_data.get('seconds')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'season_number': int_or_none(season_number),
+ 'episode_number': int_or_none(episode_number),
+ 'thumbnails': [{'url': thumbnail_url}
+ for thumbnail_url in traverse_obj(json_data, ('thumbnailUrls', ...))]
+ }
diff --git a/yt_dlp/extractor/syfy.py b/yt_dlp/extractor/syfy.py
new file mode 100644
index 0000000..bd2d738
--- /dev/null
+++ b/yt_dlp/extractor/syfy.py
@@ -0,0 +1,58 @@
+from .adobepass import AdobePassIE
+from ..utils import (
+ update_url_query,
+ smuggle_url,
+)
+
+
+class SyfyIE(AdobePassIE):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:www\.)?syfy\.com/(?:[^/]+/)?videos/(?P<id>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'http://www.syfy.com/theinternetruinedmylife/videos/the-internet-ruined-my-life-season-1-trailer',
+ 'info_dict': {
+ 'id': '2968097',
+ 'ext': 'mp4',
+ 'title': 'The Internet Ruined My Life: Season 1 Trailer',
+ 'description': 'One tweet, one post, one click, can destroy everything.',
+ 'uploader': 'NBCU-MPAT',
+ 'upload_date': '20170113',
+ 'timestamp': 1484345640,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'add_ie': ['ThePlatform'],
+ 'skip': 'Redirects to main page',
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ syfy_mpx = list(self._parse_json(self._search_regex(
+ r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', webpage, 'drupal settings'),
+ display_id)['syfy']['syfy_mpx'].values())[0]
+ video_id = syfy_mpx['mpxGUID']
+ title = syfy_mpx['episodeTitle']
+ query = {
+ 'mbr': 'true',
+ 'manifest': 'm3u',
+ }
+ if syfy_mpx.get('entitlement') == 'auth':
+ resource = self._get_mvpd_resource(
+ 'syfy', title, video_id,
+ syfy_mpx.get('mpxRating', 'TV-14'))
+ query['auth'] = self._extract_mvpd_auth(
+ url, video_id, 'syfy', resource)
+
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': 'ThePlatform',
+ 'url': smuggle_url(update_url_query(
+ self._proto_relative_url(syfy_mpx['releaseURL']), query),
+ {'force_smil_url': True}),
+ 'title': title,
+ 'id': video_id,
+ 'display_id': display_id,
+ }
diff --git a/yt_dlp/extractor/syvdk.py b/yt_dlp/extractor/syvdk.py
new file mode 100644
index 0000000..287fb26
--- /dev/null
+++ b/yt_dlp/extractor/syvdk.py
@@ -0,0 +1,33 @@
+from .common import InfoExtractor
+from ..utils import traverse_obj
+
+
+class SYVDKIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?24syv\.dk/episode/(?P<id>[\w-]+)'
+
+ _TESTS = [{
+ 'url': 'https://24syv.dk/episode/isabella-arendt-stiller-op-for-de-konservative-2',
+ 'md5': '429ce5a423dd4b1e1d0bf3a569558089',
+ 'info_dict': {
+ 'id': '12215',
+ 'display_id': 'isabella-arendt-stiller-op-for-de-konservative-2',
+ 'ext': 'mp3',
+ 'title': 'Isabella Arendt stiller op for De Konservative',
+ 'description': 'md5:f5fa6a431813bf37284f3412ad7c6c06'
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ info_data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['episodeDetails'][0]
+
+ return {
+ 'id': str(info_data['id']),
+ 'vcodec': 'none',
+ 'ext': 'mp3',
+ 'url': info_data['details']['enclosure'],
+ 'display_id': video_id,
+ 'title': traverse_obj(info_data, ('title', 'rendered')),
+ 'description': traverse_obj(info_data, ('details', 'post_title')),
+ }
diff --git a/yt_dlp/extractor/sztvhu.py b/yt_dlp/extractor/sztvhu.py
new file mode 100644
index 0000000..1cbc2a3
--- /dev/null
+++ b/yt_dlp/extractor/sztvhu.py
@@ -0,0 +1,38 @@
+from .common import InfoExtractor
+
+
+class SztvHuIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:www\.)?sztv\.hu|www\.tvszombathely\.hu)/(?:[^/]+)/.+-(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://sztv.hu/hirek/cserkeszek-nepszerusitettek-a-kornyezettudatos-eletmodot-a-savaria-teren-20130909',
+ 'md5': 'a6df607b11fb07d0e9f2ad94613375cb',
+ 'info_dict': {
+ 'id': '20130909',
+ 'ext': 'mp4',
+ 'title': 'Cserkészek népszerűsítették a környezettudatos életmódot a Savaria téren',
+ 'description': 'A zöld nap játékos ismeretterjesztő programjait a Magyar Cserkész Szövetség szervezte, akik az ország nyolc városában adják át tudásukat az érdeklődőknek. A PET...',
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ video_file = self._search_regex(
+ r'file: "...:(.*?)",', webpage, 'video file')
+ title = self._html_search_regex(
+ r'<meta name="title" content="([^"]*?) - [^-]*? - [^-]*?"',
+ webpage, 'video title')
+ description = self._html_search_regex(
+ r'<meta name="description" content="([^"]*)"/>',
+ webpage, 'video description', fatal=False)
+ thumbnail = self._og_search_thumbnail(webpage)
+
+ video_url = 'http://media.sztv.hu/vod/' + video_file
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ }
diff --git a/yt_dlp/extractor/tagesschau.py b/yt_dlp/extractor/tagesschau.py
new file mode 100644
index 0000000..c69c13d
--- /dev/null
+++ b/yt_dlp/extractor/tagesschau.py
@@ -0,0 +1,164 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ UnsupportedError,
+ extract_attributes,
+ int_or_none,
+ js_to_json,
+ parse_iso8601,
+ try_get,
+)
+
+
+class TagesschauIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:www\.)?tagesschau\.de/(?P<path>[^/]+/(?:[^/]+/)*?(?P<id>[^/#?]+?(?:-?[0-9]+)?))(?:~_?[^/#?]+?)?\.html'
+
+ _TESTS = [{
+ 'url': 'http://www.tagesschau.de/multimedia/video/video-102143.html',
+ 'md5': 'ccb9359bf8c4795836e43759f3408a93',
+ 'info_dict': {
+ 'id': 'video-102143-1',
+ 'ext': 'mp4',
+ 'title': 'Regierungsumbildung in Athen: Neue Minister in Griechenland vereidigt',
+ 'duration': 138,
+ },
+ }, {
+ 'url': 'http://www.tagesschau.de/multimedia/sendung/ts-5727.html',
+ 'md5': '5c15e8f3da049e48829ec9786d835536',
+ 'info_dict': {
+ 'id': 'ts-5727-1',
+ 'ext': 'mp4',
+ 'title': 'Ganze Sendung',
+ 'duration': 932,
+ },
+ }, {
+ # exclusive audio
+ 'url': 'http://www.tagesschau.de/multimedia/audio/audio-29417.html',
+ 'md5': '4bff8f23504df56a0d86ed312d654182',
+ 'info_dict': {
+ 'id': 'audio-29417-1',
+ 'ext': 'mp3',
+ 'title': 'EU-Gipfel: Im Verbrennerstreit hat Deutschland maximalen Schaden angerichtet',
+ },
+ }, {
+ 'url': 'http://www.tagesschau.de/inland/bnd-303.html',
+ 'md5': 'f049fa1698d7564e9ca4c3325108f034',
+ 'info_dict': {
+ 'id': 'bnd-303-1',
+ 'ext': 'mp3',
+ 'title': 'Das Siegel des Bundesnachrichtendienstes | dpa',
+ },
+ }, {
+ 'url': 'http://www.tagesschau.de/inland/afd-parteitag-135.html',
+ 'info_dict': {
+ 'id': 'afd-parteitag-135',
+ 'title': 'AfD',
+ },
+ 'playlist_mincount': 15,
+ }, {
+ 'url': 'https://www.tagesschau.de/multimedia/audio/audio-29417~player.html',
+ 'info_dict': {
+ 'id': 'audio-29417-1',
+ 'ext': 'mp3',
+ 'title': 'EU-Gipfel: Im Verbrennerstreit hat Deutschland maximalen Schaden angerichtet',
+ },
+ }, {
+ 'url': 'https://www.tagesschau.de/multimedia/audio/podcast-11km-327.html',
+ 'info_dict': {
+ 'id': 'podcast-11km-327',
+ 'ext': 'mp3',
+ 'title': 'Gewalt in der Kita – Wenn Erzieher:innen schweigen',
+ 'upload_date': '20230322',
+ 'timestamp': 1679482808,
+ 'thumbnail': 'https://www.tagesschau.de/multimedia/audio/podcast-11km-329~_v-original.jpg',
+ 'description': 'md5:dad059931fe4b3693e3656e93a249848',
+ },
+ }, {
+ 'url': 'http://www.tagesschau.de/multimedia/sendung/tsg-3771.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.tagesschau.de/multimedia/sendung/tt-3827.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.tagesschau.de/multimedia/sendung/nm-3475.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.tagesschau.de/multimedia/sendung/weltspiegel-3167.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.tagesschau.de/multimedia/tsvorzwanzig-959.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.tagesschau.de/multimedia/sendung/bab/bab-3299~_bab-sendung-209.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.tagesschau.de/multimedia/video/video-102303~_bab-sendung-211.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.tagesschau.de/100sekunden/index.html',
+ 'only_matching': True,
+ }, {
+ # playlist article with collapsing sections
+ 'url': 'http://www.tagesschau.de/wirtschaft/faq-freihandelszone-eu-usa-101.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id') or mobj.group('path')
+ display_id = video_id.lstrip('-')
+
+ webpage = self._download_webpage(url, display_id)
+
+ title = self._html_search_regex(
+ r'<span[^>]*class="headline"[^>]*>(.+?)</span>',
+ webpage, 'title', default=None) or self._og_search_title(webpage, fatal=False)
+
+ entries = []
+ videos = re.findall(r'<div[^>]+>', webpage)
+ num = 0
+ for video in videos:
+ video = extract_attributes(video).get('data-config')
+ if not video:
+ continue
+ video = self._parse_json(video, video_id, transform_source=js_to_json, fatal=False)
+ video_formats = try_get(video, lambda x: x['mc']['_mediaArray'][0]['_mediaStreamArray'])
+ if not video_formats:
+ continue
+ num += 1
+ for video_format in video_formats:
+ media_url = video_format.get('_stream') or ''
+ formats = []
+ if media_url.endswith('master.m3u8'):
+ formats = self._extract_m3u8_formats(media_url, video_id, 'mp4', m3u8_id='hls')
+ elif media_url.endswith('.mp3'):
+ formats = [{
+ 'url': media_url,
+ 'vcodec': 'none',
+ }]
+ if not formats:
+ continue
+ entries.append({
+ 'id': '%s-%d' % (display_id, num),
+ 'title': try_get(video, lambda x: x['mc']['_title']),
+ 'duration': int_or_none(try_get(video, lambda x: x['mc']['_duration'])),
+ 'formats': formats
+ })
+
+ if not entries:
+ raise UnsupportedError(url)
+
+ if len(entries) > 1:
+ return self.playlist_result(entries, display_id, title)
+
+ return {
+ 'id': display_id,
+ 'title': title,
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'formats': entries[0]['formats'],
+ 'timestamp': parse_iso8601(self._html_search_meta('date', webpage)),
+ 'description': self._og_search_description(webpage),
+ 'duration': entries[0]['duration'],
+ }
diff --git a/yt_dlp/extractor/tass.py b/yt_dlp/extractor/tass.py
new file mode 100644
index 0000000..d4c5b41
--- /dev/null
+++ b/yt_dlp/extractor/tass.py
@@ -0,0 +1,59 @@
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ js_to_json,
+ qualities,
+)
+
+
+class TassIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:tass\.ru|itar-tass\.com)/[^/]+/(?P<id>\d+)'
+ _TESTS = [
+ {
+ 'url': 'http://tass.ru/obschestvo/1586870',
+ 'md5': '3b4cdd011bc59174596b6145cda474a4',
+ 'info_dict': {
+ 'id': '1586870',
+ 'ext': 'mp4',
+ 'title': 'Посетителям московского зоопарка показали красную панду',
+ 'description': 'Приехавшую из Дублина Зейну можно увидеть в павильоне "Кошки тропиков"',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ },
+ {
+ 'url': 'http://itar-tass.com/obschestvo/1600009',
+ 'only_matching': True,
+ },
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ sources = json.loads(js_to_json(self._search_regex(
+ r'(?s)sources\s*:\s*(\[.+?\])', webpage, 'sources')))
+
+ quality = qualities(['sd', 'hd'])
+
+ formats = []
+ for source in sources:
+ video_url = source.get('file')
+ if not video_url or not video_url.startswith('http') or not video_url.endswith('.mp4'):
+ continue
+ label = source.get('label')
+ formats.append({
+ 'url': video_url,
+ 'format_id': label,
+ 'quality': quality(label),
+ })
+
+ return {
+ 'id': video_id,
+ 'title': self._og_search_title(webpage),
+ 'description': self._og_search_description(webpage),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/tbs.py b/yt_dlp/extractor/tbs.py
new file mode 100644
index 0000000..808c6c7
--- /dev/null
+++ b/yt_dlp/extractor/tbs.py
@@ -0,0 +1,89 @@
+import re
+
+from .turner import TurnerBaseIE
+from ..compat import (
+ compat_urllib_parse_urlparse,
+ compat_parse_qs,
+)
+from ..utils import (
+ float_or_none,
+ int_or_none,
+ strip_or_none,
+)
+
+
+class TBSIE(TurnerBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?(?P<site>tbs|tntdrama)\.com(?P<path>/(?:movies|watchtnt|watchtbs|shows/[^/]+/(?:clips|season-\d+/episode-\d+))/(?P<id>[^/?#]+))'
+ _TESTS = [{
+ 'url': 'http://www.tntdrama.com/shows/the-alienist/clips/monster',
+ 'info_dict': {
+ 'id': '8d384cde33b89f3a43ce5329de42903ed5099887',
+ 'ext': 'mp4',
+ 'title': 'Monster',
+ 'description': 'Get a first look at the theatrical trailer for TNT’s highly anticipated new psychological thriller The Alienist, which premieres January 22 on TNT.',
+ 'timestamp': 1508175329,
+ 'upload_date': '20171016',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'http://www.tbs.com/shows/search-party/season-1/episode-1/explicit-the-mysterious-disappearance-of-the-girl-no-one-knew',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.tntdrama.com/movies/star-wars-a-new-hope',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ site, path, display_id = self._match_valid_url(url).groups()
+ webpage = self._download_webpage(url, display_id)
+ drupal_settings = self._parse_json(self._search_regex(
+ r'<script[^>]+?data-drupal-selector="drupal-settings-json"[^>]*?>({.+?})</script>',
+ webpage, 'drupal setting'), display_id)
+ isLive = 'watchtnt' in path or 'watchtbs' in path
+ video_data = next(v for v in drupal_settings['turner_playlist'] if isLive or v.get('url') == path)
+
+ media_id = video_data['mediaID']
+ title = video_data['title']
+ tokenizer_query = compat_parse_qs(compat_urllib_parse_urlparse(
+ drupal_settings['ngtv_token_url']).query)
+
+ info = self._extract_ngtv_info(
+ media_id, tokenizer_query, {
+ 'url': url,
+ 'site_name': site[:3].upper(),
+ 'auth_required': video_data.get('authRequired') == '1' or isLive,
+ 'is_live': isLive
+ })
+
+ thumbnails = []
+ for image_id, image in video_data.get('images', {}).items():
+ image_url = image.get('url')
+ if not image_url or image.get('type') != 'video':
+ continue
+ i = {
+ 'id': image_id,
+ 'url': image_url,
+ }
+ mobj = re.search(r'(\d+)x(\d+)', image_url)
+ if mobj:
+ i.update({
+ 'width': int(mobj.group(1)),
+ 'height': int(mobj.group(2)),
+ })
+ thumbnails.append(i)
+
+ info.update({
+ 'id': media_id,
+ 'title': title,
+ 'description': strip_or_none(video_data.get('descriptionNoTags') or video_data.get('shortDescriptionNoTags')),
+ 'duration': float_or_none(video_data.get('duration')) or info.get('duration'),
+ 'timestamp': int_or_none(video_data.get('created')),
+ 'season_number': int_or_none(video_data.get('season')),
+ 'episode_number': int_or_none(video_data.get('episode')),
+ 'thumbnails': thumbnails,
+ 'is_live': isLive
+ })
+ return info
diff --git a/yt_dlp/extractor/tbsjp.py b/yt_dlp/extractor/tbsjp.py
new file mode 100644
index 0000000..77ddeca
--- /dev/null
+++ b/yt_dlp/extractor/tbsjp.py
@@ -0,0 +1,152 @@
+from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ ExtractorError,
+ clean_html,
+ get_element_text_and_html_by_tag,
+ int_or_none,
+ str_or_none,
+ traverse_obj,
+ try_call,
+ unified_timestamp,
+ urljoin,
+)
+
+
+class TBSJPEpisodeIE(InfoExtractor):
+ _VALID_URL = r'https?://cu\.tbs\.co\.jp/episode/(?P<id>[\d_]+)'
+ _GEO_BYPASS = False
+ _TESTS = [{
+ 'url': 'https://cu.tbs.co.jp/episode/23613_2044134_1000049010',
+ 'skip': 'streams geo-restricted, Japan only. Also, will likely expire eventually',
+ 'info_dict': {
+ 'title': 'VIVANT 第三話 誤送金完結へ!絶体絶命の反撃開始',
+ 'id': '23613_2044134_1000049010',
+ 'ext': 'mp4',
+ 'upload_date': '20230728',
+ 'duration': 3517,
+ 'release_timestamp': 1691118230,
+ 'episode': '第三話 誤送金完結へ!絶体絶命の反撃開始',
+ 'release_date': '20230804',
+ 'categories': 'count:11',
+ 'episode_number': 3,
+ 'timestamp': 1690522538,
+ 'description': 'md5:2b796341af1ef772034133174ba4a895',
+ 'series': 'VIVANT',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ meta = self._search_json(r'window\.app\s*=', webpage, 'episode info', video_id, fatal=False)
+ episode = traverse_obj(meta, ('falcorCache', 'catalog', 'episode', video_id, 'value'))
+
+ tf_path = self._search_regex(
+ r'<script[^>]+src=["\'](/assets/tf\.[^"\']+\.js)["\']', webpage, 'stream API config')
+ tf_js = self._download_webpage(urljoin(url, tf_path), video_id, note='Downloading stream API config')
+ video_url = self._search_regex(r'videoPlaybackUrl:\s*[\'"]([^\'"]+)[\'"]', tf_js, 'stream API url')
+ api_key = self._search_regex(r'api_key:\s*[\'"]([^\'"]+)[\'"]', tf_js, 'stream API key')
+
+ try:
+ source_meta = self._download_json(f'{video_url}ref:{video_id}', video_id,
+ headers={'X-Streaks-Api-Key': api_key},
+ note='Downloading stream metadata')
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 403:
+ self.raise_geo_restricted(countries=['JP'])
+ raise
+
+ formats, subtitles = [], {}
+ for src in traverse_obj(source_meta, ('sources', ..., 'src')):
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(src, video_id, fatal=False)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+
+ return {
+ 'title': try_call(lambda: clean_html(get_element_text_and_html_by_tag('h3', webpage)[0])),
+ 'id': video_id,
+ **traverse_obj(episode, {
+ 'categories': ('keywords', {list}),
+ 'id': ('content_id', {str}),
+ 'description': ('description', 0, 'value'),
+ 'timestamp': ('created_at', {unified_timestamp}),
+ 'release_timestamp': ('pub_date', {unified_timestamp}),
+ 'duration': ('tv_episode_info', 'duration', {int_or_none}),
+ 'episode_number': ('tv_episode_info', 'episode_number', {int_or_none}),
+ 'episode': ('title', lambda _, v: not v.get('is_phonetic'), 'value'),
+ 'series': ('custom_data', 'program_name'),
+ }, get_all=False),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+
+class TBSJPProgramIE(InfoExtractor):
+ _VALID_URL = r'https?://cu\.tbs\.co\.jp/program/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://cu.tbs.co.jp/program/23601',
+ 'playlist_mincount': 4,
+ 'info_dict': {
+ 'id': '23601',
+ 'categories': ['エンタメ', 'ミライカプセル', '会社', '働く', 'バラエティ', '動画'],
+ 'description': '幼少期の夢は大人になって、どう成長したのだろうか?\nそしてその夢は今後、どのように広がっていくのか?\nいま話題の会社で働く人の「夢の成長」を描く',
+ 'series': 'ミライカプセル -I have a dream-',
+ 'title': 'ミライカプセル -I have a dream-'
+ }
+ }]
+
+ def _real_extract(self, url):
+ programme_id = self._match_id(url)
+ webpage = self._download_webpage(url, programme_id)
+ meta = self._search_json(r'window\.app\s*=', webpage, 'programme info', programme_id)
+
+ programme = traverse_obj(meta, ('falcorCache', 'catalog', 'program', programme_id, 'false', 'value'))
+
+ return {
+ '_type': 'playlist',
+ 'entries': [self.url_result(f'https://cu.tbs.co.jp/episode/{video_id}', TBSJPEpisodeIE, video_id)
+ for video_id in traverse_obj(programme, ('custom_data', 'seriesList', 'episodeCode', ...))],
+ 'id': programme_id,
+ **traverse_obj(programme, {
+ 'categories': ('keywords', ...),
+ 'id': ('tv_episode_info', 'show_content_id', {str_or_none}),
+ 'description': ('custom_data', 'program_description'),
+ 'series': ('custom_data', 'program_name'),
+ 'title': ('custom_data', 'program_name'),
+ }),
+ }
+
+
+class TBSJPPlaylistIE(InfoExtractor):
+ _VALID_URL = r'https?://cu\.tbs\.co\.jp/playlist/(?P<id>[\da-f]+)'
+ _TESTS = [{
+ 'url': 'https://cu.tbs.co.jp/playlist/184f9970e7ba48e4915f1b252c55015e',
+ 'playlist_mincount': 4,
+ 'info_dict': {
+ 'title': 'まもなく配信終了',
+ 'id': '184f9970e7ba48e4915f1b252c55015e',
+ }
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ page = self._download_webpage(url, playlist_id)
+ meta = self._search_json(r'window\.app\s*=', page, 'playlist info', playlist_id)
+ playlist = traverse_obj(meta, ('falcorCache', 'playList', playlist_id))
+
+ def entries():
+ for entry in traverse_obj(playlist, ('catalogs', 'value', lambda _, v: v['content_id'])):
+ # TODO: it's likely possible to get all metadata from the playlist page json instead
+ content_id = entry['content_id']
+ content_type = entry.get('content_type')
+ if content_type == 'tv_show':
+ yield self.url_result(
+ f'https://cu.tbs.co.jp/program/{content_id}', TBSJPProgramIE, content_id)
+ elif content_type == 'tv_episode':
+ yield self.url_result(
+ f'https://cu.tbs.co.jp/episode/{content_id}', TBSJPEpisodeIE, content_id)
+ else:
+ self.report_warning(f'Skipping "{content_id}" with unsupported content_type "{content_type}"')
+
+ return self.playlist_result(entries(), playlist_id, traverse_obj(playlist, ('display_name', 'value')))
diff --git a/yt_dlp/extractor/teachable.py b/yt_dlp/extractor/teachable.py
new file mode 100644
index 0000000..5eac9aa
--- /dev/null
+++ b/yt_dlp/extractor/teachable.py
@@ -0,0 +1,296 @@
+import re
+
+from .common import InfoExtractor
+from .wistia import WistiaIE
+from ..utils import (
+ clean_html,
+ ExtractorError,
+ int_or_none,
+ get_element_by_class,
+ strip_or_none,
+ urlencode_postdata,
+ urljoin,
+)
+
+
+class TeachableBaseIE(InfoExtractor):
+ _NETRC_MACHINE = 'teachable'
+ _URL_PREFIX = 'teachable:'
+
+ _SITES = {
+ # Only notable ones here
+ 'v1.upskillcourses.com': 'upskill',
+ 'gns3.teachable.com': 'gns3',
+ 'academyhacker.com': 'academyhacker',
+ 'stackskills.com': 'stackskills',
+ 'market.saleshacker.com': 'saleshacker',
+ 'learnability.org': 'learnability',
+ 'edurila.com': 'edurila',
+ 'courses.workitdaily.com': 'workitdaily',
+ }
+
+ _VALID_URL_SUB_TUPLE = (_URL_PREFIX, '|'.join(re.escape(site) for site in _SITES.keys()))
+
+ def _real_initialize(self):
+ self._logged_in = False
+
+ def _login(self, site):
+ if self._logged_in:
+ return
+
+ username, password = self._get_login_info(netrc_machine=self._SITES.get(site, site))
+ if username is None:
+ return
+
+ login_page, urlh = self._download_webpage_handle(
+ 'https://%s/sign_in' % site, None,
+ 'Downloading %s login page' % site)
+
+ def is_logged(webpage):
+ return any(re.search(p, webpage) for p in (
+ r'class=["\']user-signout',
+ r'<a[^>]+\bhref=["\']/sign_out',
+ r'Log\s+[Oo]ut\s*<'))
+
+ if is_logged(login_page):
+ self._logged_in = True
+ return
+
+ login_url = urlh.url
+
+ login_form = self._hidden_inputs(login_page)
+
+ login_form.update({
+ 'user[email]': username,
+ 'user[password]': password,
+ })
+
+ post_url = self._search_regex(
+ r'<form[^>]+action=(["\'])(?P<url>(?:(?!\1).)+)\1', login_page,
+ 'post url', default=login_url, group='url')
+
+ if not post_url.startswith('http'):
+ post_url = urljoin(login_url, post_url)
+
+ response = self._download_webpage(
+ post_url, None, 'Logging in to %s' % site,
+ data=urlencode_postdata(login_form),
+ headers={
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ 'Referer': login_url,
+ })
+
+ if '>I accept the new Privacy Policy<' in response:
+ raise ExtractorError(
+ 'Unable to login: %s asks you to accept new Privacy Policy. '
+ 'Go to https://%s/ and accept.' % (site, site), expected=True)
+
+ # Successful login
+ if is_logged(response):
+ self._logged_in = True
+ return
+
+ message = get_element_by_class('alert', response)
+ if message is not None:
+ raise ExtractorError(
+ 'Unable to login: %s' % clean_html(message), expected=True)
+
+ raise ExtractorError('Unable to log in')
+
+
+class TeachableIE(TeachableBaseIE):
+ _WORKING = False
+ _VALID_URL = r'''(?x)
+ (?:
+ %shttps?://(?P<site_t>[^/]+)|
+ https?://(?:www\.)?(?P<site>%s)
+ )
+ /courses/[^/]+/lectures/(?P<id>\d+)
+ ''' % TeachableBaseIE._VALID_URL_SUB_TUPLE
+
+ _TESTS = [{
+ 'url': 'https://gns3.teachable.com/courses/gns3-certified-associate/lectures/6842364',
+ 'info_dict': {
+ 'id': 'untlgzk1v7',
+ 'ext': 'bin',
+ 'title': 'Overview',
+ 'description': 'md5:071463ff08b86c208811130ea1c2464c',
+ 'duration': 736.4,
+ 'timestamp': 1542315762,
+ 'upload_date': '20181115',
+ 'chapter': 'Welcome',
+ 'chapter_number': 1,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://v1.upskillcourses.com/courses/119763/lectures/1747100',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://gns3.teachable.com/courses/423415/lectures/6885939',
+ 'only_matching': True,
+ }, {
+ 'url': 'teachable:https://v1.upskillcourses.com/courses/essential-web-developer-course/lectures/1747100',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _is_teachable(webpage):
+ return 'teachableTracker.linker:autoLink' in webpage and re.search(
+ r'<link[^>]+href=["\']https?://(?:process\.fs|assets)\.teachablecdn\.com',
+ webpage)
+
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ if cls._is_teachable(webpage):
+ if re.match(r'https?://[^/]+/(?:courses|p)', url):
+ yield f'{cls._URL_PREFIX}{url}'
+ raise cls.StopExtraction()
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ site = mobj.group('site') or mobj.group('site_t')
+ video_id = mobj.group('id')
+
+ self._login(site)
+
+ prefixed = url.startswith(self._URL_PREFIX)
+ if prefixed:
+ url = url[len(self._URL_PREFIX):]
+
+ webpage = self._download_webpage(url, video_id)
+
+ wistia_urls = WistiaIE._extract_embed_urls(url, webpage)
+ if not wistia_urls:
+ if any(re.search(p, webpage) for p in (
+ r'class=["\']lecture-contents-locked',
+ r'>\s*Lecture contents locked',
+ r'id=["\']lecture-locked',
+ # https://academy.tailoredtutors.co.uk/courses/108779/lectures/1955313
+ r'class=["\'](?:inner-)?lesson-locked',
+ r'>LESSON LOCKED<')):
+ self.raise_login_required('Lecture contents locked')
+ raise ExtractorError('Unable to find video URL')
+
+ title = self._og_search_title(webpage, default=None)
+
+ chapter = None
+ chapter_number = None
+ section_item = self._search_regex(
+ r'(?s)(?P<li><li[^>]+\bdata-lecture-id=["\']%s[^>]+>.+?</li>)' % video_id,
+ webpage, 'section item', default=None, group='li')
+ if section_item:
+ chapter_number = int_or_none(self._search_regex(
+ r'data-ss-position=["\'](\d+)', section_item, 'section id',
+ default=None))
+ if chapter_number is not None:
+ sections = []
+ for s in re.findall(
+ r'(?s)<div[^>]+\bclass=["\']section-title[^>]+>(.+?)</div>', webpage):
+ section = strip_or_none(clean_html(s))
+ if not section:
+ sections = []
+ break
+ sections.append(section)
+ if chapter_number <= len(sections):
+ chapter = sections[chapter_number - 1]
+
+ entries = [{
+ '_type': 'url_transparent',
+ 'url': wistia_url,
+ 'ie_key': WistiaIE.ie_key(),
+ 'title': title,
+ 'chapter': chapter,
+ 'chapter_number': chapter_number,
+ } for wistia_url in wistia_urls]
+
+ return self.playlist_result(entries, video_id, title)
+
+
+class TeachableCourseIE(TeachableBaseIE):
+ _VALID_URL = r'''(?x)
+ (?:
+ %shttps?://(?P<site_t>[^/]+)|
+ https?://(?:www\.)?(?P<site>%s)
+ )
+ /(?:courses|p)/(?:enrolled/)?(?P<id>[^/?#&]+)
+ ''' % TeachableBaseIE._VALID_URL_SUB_TUPLE
+ _TESTS = [{
+ 'url': 'http://v1.upskillcourses.com/courses/essential-web-developer-course/',
+ 'info_dict': {
+ 'id': 'essential-web-developer-course',
+ 'title': 'The Essential Web Developer Course (Free)',
+ },
+ 'playlist_count': 192,
+ }, {
+ 'url': 'http://v1.upskillcourses.com/courses/119763/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://v1.upskillcourses.com/courses/enrolled/119763',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://gns3.teachable.com/courses/enrolled/423415',
+ 'only_matching': True,
+ }, {
+ 'url': 'teachable:https://learn.vrdev.school/p/gear-vr-developer-mini',
+ 'only_matching': True,
+ }, {
+ 'url': 'teachable:https://filmsimplified.com/p/davinci-resolve-15-crash-course',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if TeachableIE.suitable(url) else super(
+ TeachableCourseIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ site = mobj.group('site') or mobj.group('site_t')
+ course_id = mobj.group('id')
+
+ self._login(site)
+
+ prefixed = url.startswith(self._URL_PREFIX)
+ if prefixed:
+ prefix = self._URL_PREFIX
+ url = url[len(prefix):]
+
+ webpage = self._download_webpage(url, course_id)
+
+ url_base = 'https://%s/' % site
+
+ entries = []
+
+ for mobj in re.finditer(
+ r'(?s)(?P<li><li[^>]+class=(["\'])(?:(?!\2).)*?section-item[^>]+>.+?</li>)',
+ webpage):
+ li = mobj.group('li')
+ if 'fa-youtube-play' not in li and not re.search(r'\d{1,2}:\d{2}', li):
+ continue
+ lecture_url = self._search_regex(
+ r'<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1', li,
+ 'lecture url', default=None, group='url')
+ if not lecture_url:
+ continue
+ lecture_id = self._search_regex(
+ r'/lectures/(\d+)', lecture_url, 'lecture id', default=None)
+ title = self._html_search_regex(
+ r'<span[^>]+class=["\']lecture-name[^>]+>([^<]+)', li,
+ 'title', default=None)
+ entry_url = urljoin(url_base, lecture_url)
+ if prefixed:
+ entry_url = self._URL_PREFIX + entry_url
+ entries.append(
+ self.url_result(
+ entry_url,
+ ie=TeachableIE.ie_key(), video_id=lecture_id,
+ video_title=clean_html(title)))
+
+ course_title = self._html_search_regex(
+ (r'(?s)<img[^>]+class=["\']course-image[^>]+>\s*<h\d>(.+?)</h',
+ r'(?s)<h\d[^>]+class=["\']course-title[^>]+>(.+?)</h'),
+ webpage, 'course title', fatal=False)
+
+ return self.playlist_result(entries, course_id, course_title)
diff --git a/yt_dlp/extractor/teachertube.py b/yt_dlp/extractor/teachertube.py
new file mode 100644
index 0000000..90a9762
--- /dev/null
+++ b/yt_dlp/extractor/teachertube.py
@@ -0,0 +1,126 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ qualities,
+)
+
+
+class TeacherTubeIE(InfoExtractor):
+ _WORKING = False
+ IE_NAME = 'teachertube'
+ IE_DESC = 'teachertube.com videos'
+
+ _VALID_URL = r'https?://(?:www\.)?teachertube\.com/(viewVideo\.php\?video_id=|music\.php\?music_id=|video/(?:[\da-z-]+-)?|audio/)(?P<id>\d+)'
+
+ _TESTS = [{
+ # flowplayer
+ 'url': 'http://www.teachertube.com/viewVideo.php?video_id=339997',
+ 'md5': 'f9434ef992fd65936d72999951ee254c',
+ 'info_dict': {
+ 'id': '339997',
+ 'ext': 'mp4',
+ 'title': 'Measures of dispersion from a frequency table',
+ 'description': 'Measures of dispersion from a frequency table',
+ 'thumbnail': r're:https?://.*\.(?:jpg|png)',
+ },
+ }, {
+ # jwplayer
+ 'url': 'http://www.teachertube.com/music.php?music_id=8805',
+ 'md5': '01e8352006c65757caf7b961f6050e21',
+ 'info_dict': {
+ 'id': '8805',
+ 'ext': 'mp3',
+ 'title': 'PER ASPERA AD ASTRA',
+ 'description': 'RADIJSKA EMISIJA ZRAKOPLOVNE TEHNI?KE ?KOLE P',
+ },
+ }, {
+ # unavailable video
+ 'url': 'http://www.teachertube.com/video/intro-video-schleicher-297790',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ error = self._search_regex(
+ r'<div\b[^>]+\bclass=["\']msgBox error[^>]+>([^<]+)', webpage,
+ 'error', default=None)
+ if error:
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
+
+ title = self._html_search_meta('title', webpage, 'title', fatal=True)
+ TITLE_SUFFIX = ' - TeacherTube'
+ if title.endswith(TITLE_SUFFIX):
+ title = title[:-len(TITLE_SUFFIX)].strip()
+
+ description = self._html_search_meta('description', webpage, 'description')
+ if description:
+ description = description.strip()
+
+ quality = qualities(['mp3', 'flv', 'mp4'])
+
+ media_urls = re.findall(r'data-contenturl="([^"]+)"', webpage)
+ media_urls.extend(re.findall(r'var\s+filePath\s*=\s*"([^"]+)"', webpage))
+ media_urls.extend(re.findall(r'\'file\'\s*:\s*["\']([^"\']+)["\'],', webpage))
+
+ formats = [
+ {
+ 'url': media_url,
+ 'quality': quality(determine_ext(media_url))
+ } for media_url in set(media_urls)
+ ]
+
+ thumbnail = self._og_search_thumbnail(
+ webpage, default=None) or self._html_search_meta(
+ 'thumbnail', webpage)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'formats': formats,
+ }
+
+
+class TeacherTubeUserIE(InfoExtractor):
+ _WORKING = False
+ IE_NAME = 'teachertube:user:collection'
+ IE_DESC = 'teachertube.com user and collection videos'
+
+ _VALID_URL = r'https?://(?:www\.)?teachertube\.com/(user/profile|collection)/(?P<user>[0-9a-zA-Z]+)/?'
+
+ _MEDIA_RE = r'''(?sx)
+ class="?sidebar_thumb_time"?>[0-9:]+</div>
+ \s*
+ <a\s+href="(https?://(?:www\.)?teachertube\.com/(?:video|audio)/[^"]+)"
+ '''
+ _TEST = {
+ 'url': 'http://www.teachertube.com/user/profile/rbhagwati2',
+ 'info_dict': {
+ 'id': 'rbhagwati2'
+ },
+ 'playlist_mincount': 179,
+ }
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ user_id = mobj.group('user')
+
+ urls = []
+ webpage = self._download_webpage(url, user_id)
+ urls.extend(re.findall(self._MEDIA_RE, webpage))
+
+ pages = re.findall(r'/ajax-user/user-videos/%s\?page=([0-9]+)' % user_id, webpage)[:-1]
+ for p in pages:
+ more = 'http://www.teachertube.com/ajax-user/user-videos/%s?page=%s' % (user_id, p)
+ webpage = self._download_webpage(more, user_id, 'Downloading page %s/%s' % (p, len(pages)))
+ video_urls = re.findall(self._MEDIA_RE, webpage)
+ urls.extend(video_urls)
+
+ entries = [self.url_result(vurl, 'TeacherTube') for vurl in urls]
+ return self.playlist_result(entries, user_id)
diff --git a/yt_dlp/extractor/teachingchannel.py b/yt_dlp/extractor/teachingchannel.py
new file mode 100644
index 0000000..5791292
--- /dev/null
+++ b/yt_dlp/extractor/teachingchannel.py
@@ -0,0 +1,32 @@
+from .common import InfoExtractor
+
+
+class TeachingChannelIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:www\.)?teachingchannel\.org/videos?/(?P<id>[^/?&#]+)'
+
+ _TEST = {
+ 'url': 'https://www.teachingchannel.org/videos/teacher-teaming-evolution',
+ 'info_dict': {
+ 'id': '3swwlzkT',
+ 'ext': 'mp4',
+ 'title': 'A History of Teaming',
+ 'description': 'md5:2a9033db8da81f2edffa4c99888140b3',
+ 'duration': 422,
+ 'upload_date': '20170316',
+ 'timestamp': 1489691297,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': ['JWPlatform'],
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ mid = self._search_regex(
+ r'(?:data-mid=["\']|id=["\']jw-video-player-)([a-zA-Z0-9]{8})',
+ webpage, 'media id')
+
+ return self.url_result('jwplatform:' + mid, 'JWPlatform', mid)
diff --git a/yt_dlp/extractor/teamcoco.py b/yt_dlp/extractor/teamcoco.py
new file mode 100644
index 0000000..d32f812
--- /dev/null
+++ b/yt_dlp/extractor/teamcoco.py
@@ -0,0 +1,280 @@
+import json
+import re
+
+from .turner import TurnerBaseIE
+from ..utils import (
+ ExtractorError,
+ clean_html,
+ determine_ext,
+ make_archive_id,
+ merge_dicts,
+ mimetype2ext,
+ parse_duration,
+ parse_qs,
+ traverse_obj,
+ unified_timestamp,
+ urljoin,
+ url_or_none,
+)
+
+
+class TeamcocoBaseIE(TurnerBaseIE):
+ _QUALITIES = {
+ 'low': (480, 272),
+ 'sd': (640, 360),
+ 'hd': (1280, 720),
+ 'uhd': (1920, 1080),
+ }
+
+ def _get_formats_and_subtitles(self, info, video_id):
+ formats, subtitles = [], {}
+
+ for src in traverse_obj(info, ('src', ..., {dict})):
+ format_id = src.get('label')
+ src_url = src.get('src')
+ if re.match(r'https?:/[^/]', src_url):
+ src_url = src_url.replace(':/', '://', 1)
+ ext = determine_ext(src_url, mimetype2ext(src.get('type')))
+
+ if not format_id or not src_url:
+ continue
+ elif format_id == 'hls' or ext == 'm3u8':
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ src_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+
+ elif format_id in self._QUALITIES:
+ if src_url.startswith('/mp4:protected/'):
+ # TODO: Correct extraction for these files
+ continue
+ formats.append({
+ 'url': src_url,
+ 'ext': ext,
+ 'format_id': format_id,
+ 'width': self._QUALITIES[format_id][0],
+ 'height': self._QUALITIES[format_id][1],
+ })
+
+ return formats, subtitles
+
+
+class TeamcocoIE(TeamcocoBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?teamcoco\.com/(?P<id>([^/]+/)*[^/?#]+)'
+ _TESTS = [
+ {
+ 'url': 'http://teamcoco.com/video/mary-kay-remote',
+ 'info_dict': {
+ 'id': '80187',
+ 'display_id': 'video_mary-kay-remote',
+ 'ext': 'mp4',
+ 'title': 'Conan Becomes A Mary Kay Beauty Consultant',
+ 'description': 'md5:9fb64e45b5aef6b2af1b67612b36c162',
+ 'thumbnail': 'https://teamcoco.com/image/thumb?id=80187',
+ 'upload_date': '20140402',
+ 'timestamp': 1396440000,
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ }, {
+ 'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
+ 'info_dict': {
+ 'id': '19705',
+ 'display_id': 'video_louis-ck-interview-george-w-bush',
+ 'ext': 'mp4',
+ 'title': 'Louis C.K. Interview Pt. 1 11/3/11',
+ 'description': 'Louis C.K. got starstruck by George W. Bush, so what? Part one.',
+ 'thumbnail': 'https://teamcoco.com/image/thumb?id=19705',
+ 'upload_date': '20111104',
+ 'timestamp': 1320408000,
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ }, {
+ 'url': 'http://teamcoco.com/video/timothy-olyphant-drinking-whiskey',
+ 'info_dict': {
+ 'id': '88748',
+ 'display_id': 'video_timothy-olyphant-drinking-whiskey',
+ 'ext': 'mp4',
+ 'title': 'Timothy Olyphant Raises A Toast To “Justified”',
+ 'description': 'md5:15501f23f020e793aeca761205e42c24',
+ 'upload_date': '20150415',
+ 'timestamp': 1429099200,
+ 'thumbnail': 'https://teamcoco.com/image/thumb?id=88748',
+ },
+ }, {
+ 'url': 'http://teamcoco.com/video/full-episode-mon-6-1-joel-mchale-jake-tapper-and-musical-guest-courtney-barnett?playlist=x;eyJ0eXBlIjoidGFnIiwiaWQiOjl9',
+ 'info_dict': {
+ 'id': '89341',
+ 'ext': 'mp4',
+ 'title': 'Full Episode - Mon. 6/1 - Joel McHale, Jake Tapper, And Musical Guest Courtney Barnett',
+ 'description': 'Guests: Joel McHale, Jake Tapper, And Musical Guest Courtney Barnett',
+ },
+ 'skip': 'This video is no longer available.',
+ }, {
+ 'url': 'http://teamcoco.com/video/the-conan-audiencey-awards-for-04/25/18',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://teamcoco.com/italy/conan-jordan-schlansky-hit-the-streets-of-florence',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://teamcoco.com/haiti/conan-s-haitian-history-lesson',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://teamcoco.com/israel/conan-hits-the-streets-beaches-of-tel-aviv',
+ 'only_matching': True,
+ },
+ ]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url).replace('/', '_')
+ webpage = self._download_webpage(url, display_id)
+ data = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['pageData']
+ info = merge_dicts(*traverse_obj(data, (
+ 'blocks', lambda _, v: v['name'] in ('meta-tags', 'video-player', 'video-info'), 'props', {dict})))
+
+ thumbnail = traverse_obj(
+ info, (('image', 'poster'), {lambda x: urljoin('https://teamcoco.com/', x)}), get_all=False)
+ video_id = traverse_obj(parse_qs(thumbnail), ('id', 0)) or display_id
+
+ formats, subtitles = self._get_formats_and_subtitles(info, video_id)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnail': thumbnail,
+ **traverse_obj(info, {
+ 'title': 'title',
+ 'description': (('descriptionHtml', 'description'), {clean_html}),
+ 'timestamp': ('publishedOn', {lambda x: f'{x} 12:00AM'}, {unified_timestamp}),
+ }, get_all=False),
+ }
+
+
+class ConanClassicIE(TeamcocoBaseIE):
+ _VALID_URL = r'https?://(?:(?:www\.)?conanclassic|conan25\.teamcoco)\.com/(?P<id>([^/]+/)*[^/?#]+)'
+ _TESTS = [{
+ 'url': 'https://conanclassic.com/video/ice-cube-kevin-hart-conan-share-lyft',
+ 'info_dict': {
+ 'id': '74709',
+ 'ext': 'mp4',
+ 'title': 'Ice Cube, Kevin Hart, & Conan Share A Lyft Car',
+ 'display_id': 'video/ice-cube-kevin-hart-conan-share-lyft',
+ 'description': 'The stars of "Ride Along" teach Conan how to roll around Hollywood.',
+ 'thumbnail': 'http://cdn.teamcococdn.com/image/640x360/lyft-5bd75f82b616c.png',
+ 'duration': 570.0,
+ 'upload_date': '20131211',
+ 'timestamp': 1386721620,
+ '_old_archive_ids': ['teamcoco 74709'],
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://conan25.teamcoco.com/video/ice-cube-kevin-hart-conan-share-lyft',
+ 'only_matching': True,
+ }]
+
+ _GRAPHQL_QUERY = '''query find($id: ID!) {
+ findRecord(id: $id) {
+
+... on MetaInterface {
+ id
+ title
+ teaser
+ publishOn
+ slug
+ thumb {
+
+... on FileInterface {
+ id
+ path
+ preview
+ mime
+}
+
+ }
+}
+
+... on Video {
+ videoType
+ duration
+ isLive
+ youtubeId
+ turnerMediaId
+ turnerMediaAuthToken
+ airDate
+}
+
+... on Episode {
+ airDate
+ seasonNumber
+ episodeNumber
+ guestNames
+}
+
+ }
+ findRecordVideoMetadata(id: $id) {
+ turnerMediaId
+ turnerMediaAuthToken
+ duration
+ src
+ }
+}'''
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ data = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['pageData']
+ video_id = traverse_obj(
+ data, ('blocks', ..., 'props', 'fieldDefs', lambda _, v: v['name'] == 'incomingVideoId', 'value'),
+ ('blocks', ..., 'props', 'fields', 'incomingVideoRecord', 'id'), get_all=False)
+ if not video_id:
+ self.raise_no_formats('Unable to extract video ID from webpage', expected=True)
+
+ response = self._download_json(
+ 'https://conanclassic.com/api/legacy/graphql', video_id, data=json.dumps({
+ 'query': self._GRAPHQL_QUERY,
+ 'variables': {'id': video_id},
+ }, separators=(',', ':')).encode(), headers={
+ 'Content-Type': 'application/json',
+ })
+
+ info = traverse_obj(response, ('data', 'findRecord', {
+ 'title': 'title',
+ 'description': 'teaser',
+ 'thumbnail': ('thumb', 'preview', {url_or_none}),
+ 'duration': ('duration', {parse_duration}),
+ 'timestamp': ('publishOn', {unified_timestamp}),
+ }))
+
+ media_id = traverse_obj(
+ response, ('data', ('findRecord', 'findRecordVideoMetadata'), 'turnerMediaId'), get_all=False)
+ if media_id:
+ token = traverse_obj(
+ response, ('data', ('findRecord', 'findRecordVideoMetadata'), 'turnerMediaAuthToken'), get_all=False)
+ if not token:
+ raise ExtractorError('No Turner Media auth token found in API response')
+ self._initialize_geo_bypass({
+ 'countries': ['US'],
+ })
+ info.update(self._extract_ngtv_info(media_id, {
+ 'accessToken': token,
+ 'accessTokenType': 'jws',
+ }))
+ else:
+ formats, subtitles = self._get_formats_and_subtitles(
+ traverse_obj(response, ('data', 'findRecordVideoMetadata')), video_id)
+ info.update({
+ 'formats': formats,
+ 'subtitles': subtitles,
+ })
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ '_old_archive_ids': [make_archive_id('Teamcoco', video_id)],
+ **info,
+ }
diff --git a/yt_dlp/extractor/teamtreehouse.py b/yt_dlp/extractor/teamtreehouse.py
new file mode 100644
index 0000000..dd802db
--- /dev/null
+++ b/yt_dlp/extractor/teamtreehouse.py
@@ -0,0 +1,134 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ determine_ext,
+ ExtractorError,
+ float_or_none,
+ get_element_by_class,
+ get_element_by_id,
+ parse_duration,
+ remove_end,
+ urlencode_postdata,
+ urljoin,
+)
+
+
+class TeamTreeHouseIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?teamtreehouse\.com/library/(?P<id>[^/]+)'
+ _TESTS = [{
+ # Course
+ 'url': 'https://teamtreehouse.com/library/introduction-to-user-authentication-in-php',
+ 'info_dict': {
+ 'id': 'introduction-to-user-authentication-in-php',
+ 'title': 'Introduction to User Authentication in PHP',
+ 'description': 'md5:405d7b4287a159b27ddf30ca72b5b053',
+ },
+ 'playlist_mincount': 24,
+ }, {
+ # WorkShop
+ 'url': 'https://teamtreehouse.com/library/deploying-a-react-app',
+ 'info_dict': {
+ 'id': 'deploying-a-react-app',
+ 'title': 'Deploying a React App',
+ 'description': 'md5:10a82e3ddff18c14ac13581c9b8e5921',
+ },
+ 'playlist_mincount': 4,
+ }, {
+ # Video
+ 'url': 'https://teamtreehouse.com/library/application-overview-2',
+ 'info_dict': {
+ 'id': 'application-overview-2',
+ 'ext': 'mp4',
+ 'title': 'Application Overview',
+ 'description': 'md5:4b0a234385c27140a4378de5f1e15127',
+ },
+ 'expected_warnings': ['This is just a preview'],
+ }]
+ _NETRC_MACHINE = 'teamtreehouse'
+
+ def _perform_login(self, username, password):
+
+ signin_page = self._download_webpage(
+ 'https://teamtreehouse.com/signin',
+ None, 'Downloading signin page')
+ data = self._form_hidden_inputs('new_user_session', signin_page)
+ data.update({
+ 'user_session[email]': username,
+ 'user_session[password]': password,
+ })
+ error_message = get_element_by_class('error-message', self._download_webpage(
+ 'https://teamtreehouse.com/person_session',
+ None, 'Logging in', data=urlencode_postdata(data)))
+ if error_message:
+ raise ExtractorError(clean_html(error_message), expected=True)
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ title = self._html_search_meta(['og:title', 'twitter:title'], webpage)
+ description = self._html_search_meta(
+ ['description', 'og:description', 'twitter:description'], webpage)
+ entries = self._parse_html5_media_entries(url, webpage, display_id)
+ if entries:
+ info = entries[0]
+
+ for subtitles in info.get('subtitles', {}).values():
+ for subtitle in subtitles:
+ subtitle['ext'] = determine_ext(subtitle['url'], 'srt')
+
+ is_preview = 'data-preview="true"' in webpage
+ if is_preview:
+ self.report_warning(
+ 'This is just a preview. You need to be signed in with a Basic account to download the entire video.', display_id)
+ duration = 30
+ else:
+ duration = float_or_none(self._search_regex(
+ r'data-duration="(\d+)"', webpage, 'duration'), 1000)
+ if not duration:
+ duration = parse_duration(get_element_by_id(
+ 'video-duration', webpage))
+
+ info.update({
+ 'id': display_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ })
+ return info
+ else:
+ def extract_urls(html, extract_info=None):
+ for path in re.findall(r'<a[^>]+href="([^"]+)"', html):
+ page_url = urljoin(url, path)
+ entry = {
+ '_type': 'url_transparent',
+ 'id': self._match_id(page_url),
+ 'url': page_url,
+ 'id_key': self.ie_key(),
+ }
+ if extract_info:
+ entry.update(extract_info)
+ entries.append(entry)
+
+ workshop_videos = self._search_regex(
+ r'(?s)<ul[^>]+id="workshop-videos"[^>]*>(.+?)</ul>',
+ webpage, 'workshop videos', default=None)
+ if workshop_videos:
+ extract_urls(workshop_videos)
+ else:
+ stages_path = self._search_regex(
+ r'(?s)<div[^>]+id="syllabus-stages"[^>]+data-url="([^"]+)"',
+ webpage, 'stages path')
+ if stages_path:
+ stages_page = self._download_webpage(
+ urljoin(url, stages_path), display_id, 'Downloading stages page')
+ for chapter_number, (chapter, steps_list) in enumerate(re.findall(r'(?s)<h2[^>]*>\s*(.+?)\s*</h2>.+?<ul[^>]*>(.+?)</ul>', stages_page), 1):
+ extract_urls(steps_list, {
+ 'chapter': chapter,
+ 'chapter_number': chapter_number,
+ })
+ title = remove_end(title, ' Course')
+
+ return self.playlist_result(
+ entries, display_id, title, description)
diff --git a/yt_dlp/extractor/ted.py b/yt_dlp/extractor/ted.py
new file mode 100644
index 0000000..c28a154
--- /dev/null
+++ b/yt_dlp/extractor/ted.py
@@ -0,0 +1,236 @@
+import itertools
+import re
+
+from .common import InfoExtractor
+
+from ..utils import (
+ int_or_none,
+ str_to_int,
+ try_get,
+ url_or_none,
+ unified_strdate,
+ parse_duration,
+)
+
+
+class TedBaseIE(InfoExtractor):
+ _VALID_URL_BASE = r'https?://www\.ted\.com/(?:{type})(?:/lang/[^/#?]+)?/(?P<id>[\w-]+)'
+
+ def _parse_playlist(self, playlist):
+ for entry in try_get(playlist, lambda x: x['videos']['nodes'], list):
+ if entry.get('__typename') == 'Video' and entry.get('canonicalUrl'):
+ yield self.url_result(entry['canonicalUrl'], TedTalkIE.ie_key())
+
+
+class TedTalkIE(TedBaseIE):
+ _VALID_URL = TedBaseIE._VALID_URL_BASE.format(type='talks')
+ _TESTS = [{
+ 'url': 'https://www.ted.com/talks/candace_parker_how_to_break_down_barriers_and_not_accept_limits',
+ 'md5': '47e82c666d9c3261d4fe74748a90aada',
+ 'info_dict': {
+ 'id': '86532',
+ 'ext': 'mp4',
+ 'title': 'How to break down barriers and not accept limits',
+ 'description': 'md5:000707cece219d1e165b11550d612331',
+ 'view_count': int,
+ 'tags': ['personal growth', 'equality', 'activism', 'motivation', 'social change', 'sports'],
+ 'uploader': 'Candace Parker',
+ 'duration': 676.0,
+ 'upload_date': '20220114',
+ 'release_date': '20211201',
+ 'thumbnail': r're:http.*\.jpg',
+ },
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ talk_info = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['videoData']
+ video_id = talk_info['id']
+ playerData = self._parse_json(talk_info.get('playerData'), video_id)
+
+ http_url = None
+ formats, subtitles = [], {}
+ for format_id, resources in (playerData.get('resources') or {}).items():
+ if format_id == 'hls':
+ stream_url = url_or_none(try_get(resources, lambda x: x['stream']))
+ if not stream_url:
+ continue
+ m3u8_formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
+ stream_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)
+ formats.extend(m3u8_formats)
+ subtitles = self._merge_subtitles(subtitles, m3u8_subs)
+ continue
+
+ if not isinstance(resources, list):
+ continue
+ if format_id == 'h264':
+ for resource in resources:
+ h264_url = resource.get('file')
+ if not h264_url:
+ continue
+ bitrate = int_or_none(resource.get('bitrate'))
+ formats.append({
+ 'url': h264_url,
+ 'format_id': '%s-%sk' % (format_id, bitrate),
+ 'tbr': bitrate,
+ })
+ if re.search(r'\d+k', h264_url):
+ http_url = h264_url
+ elif format_id == 'rtmp':
+ streamer = talk_info.get('streamer')
+ if not streamer:
+ continue
+ formats.extend({
+ 'format_id': '%s-%s' % (format_id, resource.get('name')),
+ 'url': streamer,
+ 'play_path': resource['file'],
+ 'ext': 'flv',
+ 'width': int_or_none(resource.get('width')),
+ 'height': int_or_none(resource.get('height')),
+ 'tbr': int_or_none(resource.get('bitrate')),
+ } for resource in resources if resource.get('file'))
+
+ if http_url:
+ m3u8_formats = [f for f in formats if f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none']
+ for m3u8_format in m3u8_formats:
+ bitrate = self._search_regex(r'(\d+k)', m3u8_format['url'], 'bitrate', default=None)
+ if not bitrate:
+ continue
+ bitrate_url = re.sub(r'\d+k', bitrate, http_url)
+ if not self._is_valid_url(
+ bitrate_url, video_id, '%s bitrate' % bitrate):
+ continue
+ f = m3u8_format.copy()
+ f.update({
+ 'url': bitrate_url,
+ 'format_id': m3u8_format['format_id'].replace('hls', 'http'),
+ 'protocol': 'http',
+ })
+ if f.get('acodec') == 'none':
+ del f['acodec']
+ formats.append(f)
+
+ audio_download = talk_info.get('audioDownload')
+ if audio_download:
+ formats.append({
+ 'url': audio_download,
+ 'format_id': 'audio',
+ 'vcodec': 'none',
+ })
+
+ if not formats:
+ external = playerData.get('external') or {}
+ service = external.get('service') or ''
+ ext_url = external.get('code') if service.lower() == 'youtube' else None
+ return self.url_result(ext_url or external['uri'])
+
+ thumbnail = playerData.get('thumb') or self._og_search_property('image', webpage)
+ if thumbnail:
+ # trim thumbnail resize parameters
+ thumbnail = thumbnail.split('?')[0]
+
+ return {
+ 'id': video_id,
+ 'title': talk_info.get('title') or self._og_search_title(webpage),
+ 'uploader': talk_info.get('presenterDisplayName'),
+ 'thumbnail': thumbnail,
+ 'description': talk_info.get('description') or self._og_search_description(webpage),
+ 'subtitles': subtitles,
+ 'formats': formats,
+ 'duration': talk_info.get('duration') or parse_duration(self._og_search_property('video:duration', webpage)),
+ 'view_count': str_to_int(talk_info.get('viewedCount')),
+ 'upload_date': unified_strdate(talk_info.get('publishedAt')),
+ 'release_date': unified_strdate(talk_info.get('recordedOn')),
+ 'tags': try_get(playerData, lambda x: x['targeting']['tag'].split(',')),
+ }
+
+
+class TedSeriesIE(TedBaseIE):
+ _VALID_URL = fr'{TedBaseIE._VALID_URL_BASE.format(type=r"series")}(?:#season_(?P<season>\d+))?'
+ _TESTS = [{
+ 'url': 'https://www.ted.com/series/small_thing_big_idea',
+ 'info_dict': {
+ 'id': '3',
+ 'title': 'Small Thing Big Idea',
+ 'series': 'Small Thing Big Idea',
+ 'description': 'md5:6869ca52cec661aef72b3e9f7441c55c'
+ },
+ 'playlist_mincount': 16,
+ }, {
+ 'url': 'https://www.ted.com/series/the_way_we_work#season_2',
+ 'info_dict': {
+ 'id': '8_2',
+ 'title': 'The Way We Work Season 2',
+ 'series': 'The Way We Work',
+ 'description': 'md5:59469256e533e1a48c4aa926a382234c',
+ 'season_number': 2
+ },
+ 'playlist_mincount': 8,
+ }]
+
+ def _real_extract(self, url):
+ display_id, season = self._match_valid_url(url).group('id', 'season')
+ webpage = self._download_webpage(url, display_id, 'Downloading series webpage')
+ info = self._search_nextjs_data(webpage, display_id)['props']['pageProps']
+
+ entries = itertools.chain.from_iterable(
+ self._parse_playlist(s) for s in info['seasons'] if season in [None, s.get('seasonNumber')])
+
+ series_id = try_get(info, lambda x: x['series']['id'])
+ series_name = try_get(info, lambda x: x['series']['name']) or self._og_search_title(webpage, fatal=False)
+
+ return self.playlist_result(
+ entries,
+ f'{series_id}_{season}' if season and series_id else series_id,
+ f'{series_name} Season {season}' if season else series_name,
+ self._og_search_description(webpage),
+ series=series_name, season_number=int_or_none(season))
+
+
+class TedPlaylistIE(TedBaseIE):
+ _VALID_URL = TedBaseIE._VALID_URL_BASE.format(type=r'playlists(?:/\d+)?')
+ _TESTS = [{
+ 'url': 'https://www.ted.com/playlists/171/the_most_popular_talks_of_all',
+ 'info_dict': {
+ 'id': '171',
+ 'title': 'The most popular talks of all time',
+ 'description': 'md5:d2f22831dc86c7040e733a3cb3993d78'
+ },
+ 'playlist_mincount': 25,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ playlist = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['playlist']
+
+ return self.playlist_result(
+ self._parse_playlist(playlist), playlist.get('id'),
+ playlist.get('title') or self._og_search_title(webpage, default='').replace(' | TED Talks', '') or None,
+ self._og_search_description(webpage))
+
+
+class TedEmbedIE(InfoExtractor):
+ _VALID_URL = r'https?://embed(?:-ssl)?\.ted\.com/'
+ _EMBED_REGEX = [rf'<iframe[^>]+?src=(["\'])(?P<url>{_VALID_URL}.+?)\1']
+
+ _TESTS = [{
+ 'url': 'https://embed.ted.com/talks/janet_stovall_how_to_get_serious_about_diversity_and_inclusion_in_the_workplace',
+ 'info_dict': {
+ 'id': '21802',
+ 'ext': 'mp4',
+ 'title': 'How to get serious about diversity and inclusion in the workplace',
+ 'description': 'md5:0978aafe396e05341f8ecc795d22189d',
+ 'view_count': int,
+ 'tags': list,
+ 'uploader': 'Janet Stovall',
+ 'duration': 664.0,
+ 'upload_date': '20180822',
+ 'release_date': '20180719',
+ 'thumbnail': r're:http.*\.jpg',
+ },
+ }]
+
+ def _real_extract(self, url):
+ return self.url_result(re.sub(r'://embed(-ssl)?', '://www', url), TedTalkIE.ie_key())
diff --git a/yt_dlp/extractor/tele13.py b/yt_dlp/extractor/tele13.py
new file mode 100644
index 0000000..212af37
--- /dev/null
+++ b/yt_dlp/extractor/tele13.py
@@ -0,0 +1,84 @@
+from .common import InfoExtractor
+from .youtube import YoutubeIE
+from ..utils import (
+ js_to_json,
+ qualities,
+ determine_ext,
+)
+
+
+class Tele13IE(InfoExtractor):
+ _VALID_URL = r'^https?://(?:www\.)?t13\.cl/videos(?:/[^/]+)+/(?P<id>[\w-]+)'
+ _TESTS = [
+ {
+ 'url': 'http://www.t13.cl/videos/actualidad/el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
+ 'md5': '4cb1fa38adcad8fea88487a078831755',
+ 'info_dict': {
+ 'id': 'el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
+ 'ext': 'mp4',
+ 'title': 'El círculo de hierro de Michelle Bachelet en su regreso a La Moneda',
+ },
+ 'params': {
+ # HTTP Error 404: Not Found
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://www.t13.cl/videos/mundo/tendencias/video-captan-misteriosa-bola-fuego-cielos-bangkok',
+ 'md5': '867adf6a3b3fef932c68a71d70b70946',
+ 'info_dict': {
+ 'id': 'rOoKv2OMpOw',
+ 'ext': 'mp4',
+ 'title': 'Shooting star seen on 7-Sep-2015',
+ 'description': 'md5:7292ff2a34b2f673da77da222ae77e1e',
+ 'uploader': 'Porjai Jaturongkhakun',
+ 'upload_date': '20150906',
+ 'uploader_id': 'UCnLY_3ezwNcDSC_Wc6suZxw',
+ },
+ 'add_ie': ['Youtube'],
+ }
+ ]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ setup_js = self._search_regex(
+ r"(?s)jwplayer\('player-vivo'\).setup\((\{.*?\})\)",
+ webpage, 'setup code')
+ sources = self._parse_json(self._search_regex(
+ r'sources\s*:\s*(\[[^\]]+\])', setup_js, 'sources'),
+ display_id, js_to_json)
+
+ preference = qualities(['Móvil', 'SD', 'HD'])
+ formats = []
+ urls = []
+ for f in sources:
+ format_url = f['file']
+ if format_url and format_url not in urls:
+ ext = determine_ext(format_url)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, display_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif YoutubeIE.suitable(format_url):
+ return self.url_result(format_url, 'Youtube')
+ else:
+ formats.append({
+ 'url': format_url,
+ 'format_id': f.get('label'),
+ 'quality': preference(f.get('label')),
+ 'ext': ext,
+ })
+ urls.append(format_url)
+
+ return {
+ 'id': display_id,
+ 'title': self._search_regex(
+ r'title\s*:\s*"([^"]+)"', setup_js, 'title'),
+ 'description': self._html_search_meta(
+ 'description', webpage, 'description'),
+ 'thumbnail': self._search_regex(
+ r'image\s*:\s*"([^"]+)"', setup_js, 'thumbnail', default=None),
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/tele5.py b/yt_dlp/extractor/tele5.py
new file mode 100644
index 0000000..72f67e4
--- /dev/null
+++ b/yt_dlp/extractor/tele5.py
@@ -0,0 +1,89 @@
+from .dplay import DPlayIE
+from ..compat import compat_urlparse
+from ..utils import (
+ ExtractorError,
+ extract_attributes,
+)
+
+
+class Tele5IE(DPlayIE): # XXX: Do not subclass from concrete IE
+ _WORKING = False
+ _VALID_URL = r'https?://(?:www\.)?tele5\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _GEO_COUNTRIES = ['DE']
+ _TESTS = [{
+ 'url': 'https://www.tele5.de/mediathek/filme-online/videos?vid=1549416',
+ 'info_dict': {
+ 'id': '1549416',
+ 'ext': 'mp4',
+ 'upload_date': '20180814',
+ 'timestamp': 1534290623,
+ 'title': 'Pandorum',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'No longer available: "404 Seite nicht gefunden"',
+ }, {
+ # jwplatform, nexx unavailable
+ 'url': 'https://www.tele5.de/filme/ghoul-das-geheimnis-des-friedhofmonsters/',
+ 'info_dict': {
+ 'id': 'WJuiOlUp',
+ 'ext': 'mp4',
+ 'upload_date': '20200603',
+ 'timestamp': 1591214400,
+ 'title': 'Ghoul - Das Geheimnis des Friedhofmonsters',
+ 'description': 'md5:42002af1d887ff3d5b2b3ca1f8137d97',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'No longer available, redirects to Filme page',
+ }, {
+ 'url': 'https://tele5.de/mediathek/angel-of-mine/',
+ 'info_dict': {
+ 'id': '1252360',
+ 'ext': 'mp4',
+ 'upload_date': '20220109',
+ 'timestamp': 1641762000,
+ 'title': 'Angel of Mine',
+ 'description': 'md5:a72546a175e1286eb3251843a52d1ad7',
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ },
+ }, {
+ 'url': 'https://www.tele5.de/kalkofes-mattscheibe/video-clips/politik-und-gesellschaft?ve_id=1551191',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tele5.de/video-clip/?ve_id=1609440',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tele5.de/filme/schlefaz-dragon-crusaders/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tele5.de/filme/making-of/avengers-endgame/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tele5.de/star-trek/raumschiff-voyager/ganze-folge/das-vinculum/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tele5.de/anders-ist-sevda/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ player_element = self._search_regex(r'(<hyoga-player\b[^>]+?>)', webpage, 'video player')
+ player_info = extract_attributes(player_element)
+ asset_id, country, realm = (player_info[x] for x in ('assetid', 'locale', 'realm', ))
+ endpoint = compat_urlparse.urlparse(player_info['endpoint']).hostname
+ source_type = player_info.get('sourcetype')
+ if source_type:
+ endpoint = '%s-%s' % (source_type, endpoint)
+ try:
+ return self._get_disco_api_info(url, asset_id, endpoint, realm, country)
+ except ExtractorError as e:
+ if getattr(e, 'message', '') == 'Missing deviceId in context':
+ self.report_drm(video_id)
+ raise
diff --git a/yt_dlp/extractor/telebruxelles.py b/yt_dlp/extractor/telebruxelles.py
new file mode 100644
index 0000000..2c50a67
--- /dev/null
+++ b/yt_dlp/extractor/telebruxelles.py
@@ -0,0 +1,72 @@
+import re
+
+from .common import InfoExtractor
+
+
+class TeleBruxellesIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?(?:telebruxelles|bx1)\.be/(?:[^/]+/)*(?P<id>[^/#?]+)'
+ _TESTS = [{
+ 'url': 'http://bx1.be/news/que-risque-lauteur-dune-fausse-alerte-a-la-bombe/',
+ 'md5': 'a2a67a5b1c3e8c9d33109b902f474fd9',
+ 'info_dict': {
+ 'id': '158856',
+ 'display_id': 'que-risque-lauteur-dune-fausse-alerte-a-la-bombe',
+ 'ext': 'mp4',
+ 'title': 'Que risque l’auteur d’une fausse alerte à la bombe ?',
+ 'description': 'md5:3cf8df235d44ebc5426373050840e466',
+ },
+ }, {
+ 'url': 'http://bx1.be/sport/futsal-schaerbeek-sincline-5-3-a-thulin/',
+ 'md5': 'dfe07ecc9c153ceba8582ac912687675',
+ 'info_dict': {
+ 'id': '158433',
+ 'display_id': 'futsal-schaerbeek-sincline-5-3-a-thulin',
+ 'ext': 'mp4',
+ 'title': 'Futsal : Schaerbeek s’incline 5-3 à Thulin',
+ 'description': 'md5:fd013f1488d5e2dceb9cebe39e2d569b',
+ },
+ }, {
+ 'url': 'http://bx1.be/emission/bxenf1-gastronomie/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://bx1.be/berchem-sainte-agathe/personnel-carrefour-de-berchem-sainte-agathe-inquiet/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://bx1.be/dernier-jt/',
+ 'only_matching': True,
+ }, {
+ # live stream
+ 'url': 'https://bx1.be/lives/direct-tv/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ article_id = self._html_search_regex(
+ r'<article[^>]+\bid=["\']post-(\d+)', webpage, 'article ID', default=None)
+ title = self._html_search_regex(
+ r'<h1[^>]*>(.+?)</h1>', webpage, 'title',
+ default=None) or self._og_search_title(webpage)
+ description = self._og_search_description(webpage, default=None)
+
+ rtmp_url = self._html_search_regex(
+ r'file["\']?\s*:\s*"(r(?:tm|mt)ps?://[^/]+/(?:vod/mp4:"\s*\+\s*"[^"]+"\s*\+\s*"\.mp4|stream/live))"',
+ webpage, 'RTMP url')
+ # Yes, they have a typo in scheme name for live stream URLs (e.g.
+ # https://bx1.be/lives/direct-tv/)
+ rtmp_url = re.sub(r'^rmtp', 'rtmp', rtmp_url)
+ rtmp_url = re.sub(r'"\s*\+\s*"', '', rtmp_url)
+ formats = self._extract_wowza_formats(rtmp_url, article_id or display_id)
+
+ is_live = 'stream/live' in rtmp_url
+
+ return {
+ 'id': article_id or display_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'formats': formats,
+ 'is_live': is_live,
+ }
diff --git a/yt_dlp/extractor/telecaribe.py b/yt_dlp/extractor/telecaribe.py
new file mode 100644
index 0000000..91118a1
--- /dev/null
+++ b/yt_dlp/extractor/telecaribe.py
@@ -0,0 +1,91 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import traverse_obj
+
+
+class TelecaribePlayIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?play\.telecaribe\.co/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://www.play.telecaribe.co/breicok',
+ 'info_dict': {
+ 'id': 'breicok',
+ 'title': 'Breicok',
+ },
+ 'playlist_count': 7,
+ }, {
+ 'url': 'https://www.play.telecaribe.co/si-fue-gol-de-yepes',
+ 'info_dict': {
+ 'id': 'si-fue-gol-de-yepes',
+ 'title': 'Sí Fue Gol de Yepes',
+ },
+ 'playlist_count': 6,
+ }, {
+ 'url': 'https://www.play.telecaribe.co/ciudad-futura',
+ 'info_dict': {
+ 'id': 'ciudad-futura',
+ 'title': 'Ciudad Futura',
+ },
+ 'playlist_count': 10,
+ }, {
+ 'url': 'https://www.play.telecaribe.co/live',
+ 'info_dict': {
+ 'id': 'live',
+ 'title': r're:^Señal en vivo',
+ 'live_status': 'is_live',
+ 'ext': 'mp4',
+ },
+ 'params': {
+ 'skip_download': 'Livestream',
+ }
+ }, {
+ 'url': 'https://www.play.telecaribe.co/liveplus',
+ 'info_dict': {
+ 'id': 'liveplus',
+ 'title': r're:^Señal en vivo Plus',
+ 'live_status': 'is_live',
+ 'ext': 'mp4',
+ },
+ 'params': {
+ 'skip_download': 'Livestream',
+ },
+ 'skip': 'Geo-restricted to Colombia',
+ }]
+
+ def _download_player_webpage(self, webpage, display_id):
+ page_id = self._search_regex(
+ (r'window\.firstPageId\s*=\s*["\']([^"\']+)', r'<div[^>]+id\s*=\s*"pageBackground_([^"]+)'),
+ webpage, 'page_id')
+
+ props = self._download_json(self._search_regex(
+ rf'<link[^>]+href\s*=\s*"([^"]+)"[^>]+id\s*=\s*"features_{page_id}"',
+ webpage, 'json_props_url'), display_id)['props']['render']['compProps']
+
+ return self._download_webpage(traverse_obj(props, (..., 'url'))[-1], display_id)
+
+ def _get_clean_title(self, title):
+ return re.sub(r'\s*\|\s*Telecaribe\s*VOD', '', title or '').strip() or None
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ player = self._download_player_webpage(webpage, display_id)
+
+ livestream_url = self._search_regex(
+ r'(?:let|const|var)\s+source\s*=\s*["\']([^"\']+)', player, 'm3u8 url', default=None)
+
+ if not livestream_url:
+ return self.playlist_from_matches(
+ re.findall(r'<a[^>]+href\s*=\s*"([^"]+\.mp4)', player), display_id,
+ self._get_clean_title(self._og_search_title(webpage)))
+
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ livestream_url, display_id, 'mp4', live=True)
+
+ return {
+ 'id': display_id,
+ 'title': self._get_clean_title(self._og_search_title(webpage)),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'is_live': True,
+ }
diff --git a/yt_dlp/extractor/telecinco.py b/yt_dlp/extractor/telecinco.py
new file mode 100644
index 0000000..a3f0c7c
--- /dev/null
+++ b/yt_dlp/extractor/telecinco.py
@@ -0,0 +1,146 @@
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ int_or_none,
+ str_or_none,
+ try_get,
+)
+
+
+class TelecincoIE(InfoExtractor):
+ IE_DESC = 'telecinco.es, cuatro.com and mediaset.es'
+ _VALID_URL = r'https?://(?:www\.)?(?:telecinco\.es|cuatro\.com|mediaset\.es)/(?:[^/]+/)+(?P<id>.+?)\.html'
+
+ _TESTS = [{
+ 'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html',
+ 'info_dict': {
+ 'id': '1876350223',
+ 'title': 'Bacalao con kokotxas al pil-pil',
+ 'description': 'md5:716caf5601e25c3c5ab6605b1ae71529',
+ },
+ 'playlist': [{
+ 'md5': '7ee56d665cfd241c0e6d80fd175068b0',
+ 'info_dict': {
+ 'id': 'JEA5ijCnF6p5W08A1rNKn7',
+ 'ext': 'mp4',
+ 'title': 'Con Martín Berasategui, hacer un bacalao al pil-pil es fácil y divertido',
+ 'duration': 662,
+ },
+ }]
+ }, {
+ 'url': 'http://www.cuatro.com/deportes/futbol/barcelona/Leo_Messi-Champions-Roma_2_2052780128.html',
+ 'md5': 'c86fe0d99e3bdb46b7950d38bf6ef12a',
+ 'info_dict': {
+ 'id': 'jn24Od1zGLG4XUZcnUnZB6',
+ 'ext': 'mp4',
+ 'title': '¿Quién es este ex futbolista con el que hablan Leo Messi y Luis Suárez?',
+ 'description': 'md5:a62ecb5f1934fc787107d7b9a2262805',
+ 'duration': 79,
+ },
+ }, {
+ 'url': 'http://www.mediaset.es/12meses/campanas/doylacara/conlatratanohaytrato/Ayudame-dar-cara-trata-trato_2_1986630220.html',
+ 'md5': 'eddb50291df704ce23c74821b995bcac',
+ 'info_dict': {
+ 'id': 'aywerkD2Sv1vGNqq9b85Q2',
+ 'ext': 'mp4',
+ 'title': '#DOYLACARA. Con la trata no hay trato',
+ 'description': 'md5:2771356ff7bfad9179c5f5cd954f1477',
+ 'duration': 50,
+ },
+ }, {
+ # video in opening's content
+ 'url': 'https://www.telecinco.es/vivalavida/fiorella-sobrina-edmundo-arrocet-entrevista_18_2907195140.html',
+ 'info_dict': {
+ 'id': '2907195140',
+ 'title': 'La surrealista entrevista a la sobrina de Edmundo Arrocet: "No puedes venir aquí y tomarnos por tontos"',
+ 'description': 'md5:73f340a7320143d37ab895375b2bf13a',
+ },
+ 'playlist': [{
+ 'md5': 'adb28c37238b675dad0f042292f209a7',
+ 'info_dict': {
+ 'id': 'TpI2EttSDAReWpJ1o0NVh2',
+ 'ext': 'mp4',
+ 'title': 'La surrealista entrevista a la sobrina de Edmundo Arrocet: "No puedes venir aquí y tomarnos por tontos"',
+ 'duration': 1015,
+ },
+ }],
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.telecinco.es/informativos/nacional/Pablo_Iglesias-Informativos_Telecinco-entrevista-Pedro_Piqueras_2_1945155182.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.telecinco.es/espanasinirmaslejos/Espana-gran-destino-turistico_2_1240605043.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.cuatro.com/chesterinlove/a-carta/chester-chester_in_love-chester_edu_2_2331030022.html',
+ 'only_matching': True,
+ }]
+
+ def _parse_content(self, content, url):
+ video_id = content['dataMediaId']
+ config = self._download_json(
+ content['dataConfig'], video_id, 'Downloading config JSON')
+ title = config['info']['title']
+ services = config['services']
+ caronte = self._download_json(services['caronte'], video_id)
+ stream = caronte['dls'][0]['stream']
+ headers = self.geo_verification_headers()
+ headers.update({
+ 'Content-Type': 'application/json;charset=UTF-8',
+ 'Origin': re.match(r'https?://[^/]+', url).group(0),
+ })
+ cdn = self._download_json(
+ caronte['cerbero'], video_id, data=json.dumps({
+ 'bbx': caronte['bbx'],
+ 'gbx': self._download_json(services['gbx'], video_id)['gbx'],
+ }).encode(), headers=headers)['tokens']['1']['cdn']
+ formats = self._extract_m3u8_formats(
+ stream + '?' + cdn, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': content.get('dataPoster') or config.get('poster', {}).get('imageUrl'),
+ 'duration': int_or_none(content.get('dataDuration')),
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ article = self._parse_json(self._search_regex(
+ r'window\.\$REACTBASE_STATE\.article(?:_multisite)?\s*=\s*({.+})',
+ webpage, 'article'), display_id)['article']
+ title = article.get('title')
+ description = clean_html(article.get('leadParagraph')) or ''
+ if article.get('editorialType') != 'VID':
+ entries = []
+ body = [article.get('opening')]
+ body.extend(try_get(article, lambda x: x['body'], list) or [])
+ for p in body:
+ if not isinstance(p, dict):
+ continue
+ content = p.get('content')
+ if not content:
+ continue
+ type_ = p.get('type')
+ if type_ == 'paragraph':
+ content_str = str_or_none(content)
+ if content_str:
+ description += content_str
+ continue
+ if type_ == 'video' and isinstance(content, dict):
+ entries.append(self._parse_content(content, url))
+ return self.playlist_result(
+ entries, str_or_none(article.get('id')), title, description)
+ content = article['opening']['content']
+ info = self._parse_content(content, url)
+ info.update({
+ 'description': description,
+ })
+ return info
diff --git a/yt_dlp/extractor/telegraaf.py b/yt_dlp/extractor/telegraaf.py
new file mode 100644
index 0000000..13e9515
--- /dev/null
+++ b/yt_dlp/extractor/telegraaf.py
@@ -0,0 +1,86 @@
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ parse_iso8601,
+ try_get,
+)
+
+
+class TelegraafIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?telegraaf\.nl/video/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://www.telegraaf.nl/video/734366489/historisch-scheepswrak-slaat-na-100-jaar-los',
+ 'info_dict': {
+ 'id': 'gaMItuoSeUg2',
+ 'ext': 'mp4',
+ 'title': 'Historisch scheepswrak slaat na 100 jaar los',
+ 'description': 'md5:6f53b7c4f55596722ac24d6c0ec00cfb',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 55,
+ 'timestamp': 1572805527,
+ 'upload_date': '20191103',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ article_id = self._match_id(url)
+
+ video_id = self._download_json(
+ 'https://app.telegraaf.nl/graphql', article_id,
+ headers={'User-Agent': 'De Telegraaf/6.8.11 (Android 11; en_US)'},
+ query={
+ 'query': '''{
+ article(uid: %s) {
+ videos {
+ videoId
+ }
+ }
+}''' % article_id,
+ })['data']['article']['videos'][0]['videoId']
+
+ item = self._download_json(
+ 'https://content.tmgvideo.nl/playlist/item=%s/playlist.json' % video_id,
+ video_id)['items'][0]
+ title = item['title']
+
+ formats = []
+ locations = item.get('locations') or {}
+ for location in locations.get('adaptive', []):
+ manifest_url = location.get('src')
+ if not manifest_url:
+ continue
+ ext = determine_ext(manifest_url)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ manifest_url, video_id, ext='mp4', m3u8_id='hls', fatal=False))
+ elif ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ manifest_url, video_id, mpd_id='dash', fatal=False))
+ else:
+ self.report_warning('Unknown adaptive format %s' % ext)
+ for location in locations.get('progressive', []):
+ src = try_get(location, lambda x: x['sources'][0]['src'])
+ if not src:
+ continue
+ label = location.get('label')
+ formats.append({
+ 'url': src,
+ 'width': int_or_none(location.get('width')),
+ 'height': int_or_none(location.get('height')),
+ 'format_id': 'http' + ('-%s' % label if label else ''),
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': item.get('description'),
+ 'formats': formats,
+ 'duration': int_or_none(item.get('duration')),
+ 'thumbnail': item.get('poster'),
+ 'timestamp': parse_iso8601(item.get('datecreated'), ' '),
+ }
diff --git a/yt_dlp/extractor/telegram.py b/yt_dlp/extractor/telegram.py
new file mode 100644
index 0000000..5ec5485
--- /dev/null
+++ b/yt_dlp/extractor/telegram.py
@@ -0,0 +1,136 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ format_field,
+ get_element_by_class,
+ parse_duration,
+ parse_qs,
+ traverse_obj,
+ unified_timestamp,
+ update_url_query,
+ url_basename,
+)
+
+
+class TelegramEmbedIE(InfoExtractor):
+ IE_NAME = 'telegram:embed'
+ _VALID_URL = r'https?://t\.me/(?P<channel_id>[^/]+)/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://t.me/europa_press/613',
+ 'md5': 'dd707708aea958c11a590e8068825f22',
+ 'info_dict': {
+ 'id': '613',
+ 'ext': 'mp4',
+ 'title': 'md5:6ce2d7e8d56eda16d80607b23db7b252',
+ 'description': 'md5:6ce2d7e8d56eda16d80607b23db7b252',
+ 'channel_id': 'europa_press',
+ 'channel': 'Europa Press ✔',
+ 'thumbnail': r're:^https?://.+',
+ 'timestamp': 1635631203,
+ 'upload_date': '20211030',
+ 'duration': 61,
+ },
+ }, {
+ # 2-video post
+ 'url': 'https://t.me/vorposte/29342',
+ 'info_dict': {
+ 'id': 'vorposte-29342',
+ 'title': 'Форпост 29342',
+ 'description': 'md5:9d92e22169a3e136d5d69df25f82c3dc',
+ },
+ 'playlist_count': 2,
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # 2-video post with --no-playlist
+ 'url': 'https://t.me/vorposte/29343',
+ 'md5': '1724e96053c18e788c8464038876e245',
+ 'info_dict': {
+ 'id': '29343',
+ 'ext': 'mp4',
+ 'title': 'md5:9d92e22169a3e136d5d69df25f82c3dc',
+ 'description': 'md5:9d92e22169a3e136d5d69df25f82c3dc',
+ 'channel_id': 'vorposte',
+ 'channel': 'Форпост',
+ 'thumbnail': r're:^https?://.+',
+ 'timestamp': 1666384480,
+ 'upload_date': '20221021',
+ 'duration': 35,
+ },
+ 'params': {
+ 'noplaylist': True,
+ }
+ }, {
+ # 2-video post with 'single' query param
+ 'url': 'https://t.me/vorposte/29342?single',
+ 'md5': 'd20b202f1e41400a9f43201428add18f',
+ 'info_dict': {
+ 'id': '29342',
+ 'ext': 'mp4',
+ 'title': 'md5:9d92e22169a3e136d5d69df25f82c3dc',
+ 'description': 'md5:9d92e22169a3e136d5d69df25f82c3dc',
+ 'channel_id': 'vorposte',
+ 'channel': 'Форпост',
+ 'thumbnail': r're:^https?://.+',
+ 'timestamp': 1666384480,
+ 'upload_date': '20221021',
+ 'duration': 33,
+ },
+ }]
+
+ def _real_extract(self, url):
+ channel_id, msg_id = self._match_valid_url(url).group('channel_id', 'id')
+ embed = self._download_webpage(
+ url, msg_id, query={'embed': '1', 'single': []}, note='Downloading embed frame')
+
+ def clean_text(html_class, html):
+ text = clean_html(get_element_by_class(html_class, html))
+ return text.replace('\n', ' ') if text else None
+
+ description = clean_text('tgme_widget_message_text', embed)
+ message = {
+ 'title': description or '',
+ 'description': description,
+ 'channel': clean_text('tgme_widget_message_author', embed),
+ 'channel_id': channel_id,
+ 'timestamp': unified_timestamp(self._search_regex(
+ r'<time[^>]*datetime="([^"]*)"', embed, 'timestamp', fatal=False)),
+ }
+
+ videos = []
+ for video in re.findall(r'<a class="tgme_widget_message_video_player(?s:.+?)</time>', embed):
+ video_url = self._search_regex(
+ r'<video[^>]+src="([^"]+)"', video, 'video URL', fatal=False)
+ webpage_url = self._search_regex(
+ r'<a class="tgme_widget_message_video_player[^>]+href="([^"]+)"',
+ video, 'webpage URL', fatal=False)
+ if not video_url or not webpage_url:
+ continue
+ formats = [{
+ 'url': video_url,
+ 'ext': 'mp4',
+ }]
+ videos.append({
+ 'id': url_basename(webpage_url),
+ 'webpage_url': update_url_query(webpage_url, {'single': True}),
+ 'duration': parse_duration(self._search_regex(
+ r'<time[^>]+duration[^>]*>([\d:]+)</time>', video, 'duration', fatal=False)),
+ 'thumbnail': self._search_regex(
+ r'tgme_widget_message_video_thumb"[^>]+background-image:url\(\'([^\']+)\'\)',
+ video, 'thumbnail', fatal=False),
+ 'formats': formats,
+ **message,
+ })
+
+ playlist_id = None
+ if len(videos) > 1 and 'single' not in parse_qs(url, keep_blank_values=True):
+ playlist_id = f'{channel_id}-{msg_id}'
+
+ if self._yes_playlist(playlist_id, msg_id):
+ return self.playlist_result(
+ videos, playlist_id, format_field(message, 'channel', f'%s {msg_id}'), description)
+ else:
+ return traverse_obj(videos, lambda _, x: x['id'] == msg_id, get_all=False)
diff --git a/yt_dlp/extractor/telemb.py b/yt_dlp/extractor/telemb.py
new file mode 100644
index 0000000..a71b14c
--- /dev/null
+++ b/yt_dlp/extractor/telemb.py
@@ -0,0 +1,75 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import remove_start
+
+
+class TeleMBIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:www\.)?telemb\.be/(?P<display_id>.+?)_d_(?P<id>\d+)\.html'
+ _TESTS = [
+ {
+ 'url': 'http://www.telemb.be/mons-cook-with-danielle-des-cours-de-cuisine-en-anglais-_d_13466.html',
+ 'md5': 'f45ea69878516ba039835794e0f8f783',
+ 'info_dict': {
+ 'id': '13466',
+ 'display_id': 'mons-cook-with-danielle-des-cours-de-cuisine-en-anglais-',
+ 'ext': 'mp4',
+ 'title': 'Mons - Cook with Danielle : des cours de cuisine en anglais ! - Les reportages',
+ 'description': 'md5:bc5225f47b17c309761c856ad4776265',
+ 'thumbnail': r're:^http://.*\.(?:jpg|png)$',
+ }
+ },
+ {
+ # non-ASCII characters in download URL
+ 'url': 'http://telemb.be/les-reportages-havre-incendie-mortel_d_13514.html',
+ 'md5': '6e9682736e5ccd4eab7f21e855350733',
+ 'info_dict': {
+ 'id': '13514',
+ 'display_id': 'les-reportages-havre-incendie-mortel',
+ 'ext': 'mp4',
+ 'title': 'Havré - Incendie mortel - Les reportages',
+ 'description': 'md5:5e54cb449acb029c2b7734e2d946bd4a',
+ 'thumbnail': r're:^http://.*\.(?:jpg|png)$',
+ }
+ },
+ ]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id')
+
+ webpage = self._download_webpage(url, display_id)
+
+ formats = []
+ for video_url in re.findall(r'file\s*:\s*"([^"]+)"', webpage):
+ fmt = {
+ 'url': video_url,
+ 'format_id': video_url.split(':')[0]
+ }
+ rtmp = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<playpath>mp4:.+)$', video_url)
+ if rtmp:
+ fmt.update({
+ 'play_path': rtmp.group('playpath'),
+ 'app': rtmp.group('app'),
+ 'player_url': 'http://p.jwpcdn.com/6/10/jwplayer.flash.swf',
+ 'page_url': 'http://www.telemb.be',
+ 'preference': -10,
+ })
+ formats.append(fmt)
+
+ title = remove_start(self._og_search_title(webpage), 'TéléMB : ')
+ description = self._html_search_regex(
+ r'<meta property="og:description" content="(.+?)" />',
+ webpage, 'description', fatal=False)
+ thumbnail = self._og_search_thumbnail(webpage)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/telemundo.py b/yt_dlp/extractor/telemundo.py
new file mode 100644
index 0000000..84b24de
--- /dev/null
+++ b/yt_dlp/extractor/telemundo.py
@@ -0,0 +1,50 @@
+from .common import InfoExtractor
+from ..networking import HEADRequest
+from ..utils import try_get, unified_timestamp
+
+
+class TelemundoIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?:\/\/(?:www\.)?telemundo\.com\/.+?video\/[^\/]+(?P<id>tmvo\d{7})'
+ _TESTS = [{
+ 'url': 'https://www.telemundo.com/noticias/noticias-telemundo-en-la-noche/empleo/video/esta-aplicacion-gratuita-esta-ayudando-los-latinos-encontrar-trabajo-en-estados-unidos-tmvo9829325',
+ 'info_dict': {
+ 'id': 'tmvo9829325',
+ 'timestamp': 1621396800,
+ 'title': 'Esta aplicación gratuita está ayudando a los latinos a encontrar trabajo en Estados Unidos',
+ 'uploader': 'Telemundo',
+ 'uploader_id': 'NBCU_Telemundo',
+ 'ext': 'mp4',
+ 'upload_date': '20210519',
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'https://www.telemundo.com/shows/al-rojo-vivo/empleo/video/personajes-de-times-square-piden-que-la-ciudad-de-nueva-york-los-deje-volver-trabajar-tmvo9816272',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ metadata = self._search_nextjs_data(webpage, video_id)
+ redirect_url = try_get(
+ metadata,
+ lambda x: x['props']['initialState']['video']['associatedPlaylists'][0]['videos'][0]['videoAssets'][0]['publicUrl'])
+
+ m3u8_url = self._request_webpage(HEADRequest(
+ redirect_url + '?format=redirect&manifest=m3u&format=redirect&Tracking=true&Embedded=true&formats=MPEG4'),
+ video_id, 'Processing m3u8').url
+ formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
+ date = unified_timestamp(try_get(
+ metadata, lambda x: x['props']['initialState']['video']['associatedPlaylists'][0]['videos'][0]['datePublished'].split(' ', 1)[1]))
+ return {
+ 'url': url,
+ 'id': video_id,
+ 'title': self._search_regex(r'<h1[^>]+>([^<]+)', webpage, 'title', fatal=False),
+ 'formats': formats,
+ 'timestamp': date,
+ 'uploader': 'Telemundo',
+ 'uploader_id': self._search_regex(r'https?:\/\/(?:[^/]+\/){3}video\/(?P<id>[^\/]+)', m3u8_url, 'Akamai account', fatal=False)
+ }
diff --git a/yt_dlp/extractor/telequebec.py b/yt_dlp/extractor/telequebec.py
new file mode 100644
index 0000000..08a0837
--- /dev/null
+++ b/yt_dlp/extractor/telequebec.py
@@ -0,0 +1,237 @@
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ int_or_none,
+ smuggle_url,
+ try_get,
+ unified_timestamp,
+)
+
+
+class TeleQuebecBaseIE(InfoExtractor):
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
+
+ @staticmethod
+ def _brightcove_result(brightcove_id, player_id, account_id='6150020952001'):
+ return {
+ '_type': 'url_transparent',
+ 'url': smuggle_url(TeleQuebecBaseIE.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, brightcove_id), {'geo_countries': ['CA']}),
+ 'ie_key': 'BrightcoveNew',
+ }
+
+
+class TeleQuebecIE(TeleQuebecBaseIE):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ zonevideo\.telequebec\.tv/media|
+ coucou\.telequebec\.tv/videos
+ )/(?P<id>\d+)
+ '''
+ _TESTS = [{
+ # available till 01.01.2023
+ 'url': 'http://zonevideo.telequebec.tv/media/37578/un-petit-choc-et-puis-repart/un-chef-a-la-cabane',
+ 'info_dict': {
+ 'id': '6155972771001',
+ 'ext': 'mp4',
+ 'title': 'Un petit choc et puis repart!',
+ 'description': 'md5:b04a7e6b3f74e32d7b294cffe8658374',
+ 'timestamp': 1589262469,
+ 'uploader_id': '6150020952001',
+ 'upload_date': '20200512',
+ },
+ 'add_ie': ['BrightcoveNew'],
+ }, {
+ 'url': 'https://zonevideo.telequebec.tv/media/55267/le-soleil/passe-partout',
+ 'info_dict': {
+ 'id': '6167180337001',
+ 'ext': 'mp4',
+ 'title': 'Le soleil',
+ 'description': 'md5:64289c922a8de2abbe99c354daffde02',
+ 'uploader_id': '6150020952001',
+ 'upload_date': '20200625',
+ 'timestamp': 1593090307,
+ },
+ 'add_ie': ['BrightcoveNew'],
+ }, {
+ # no description
+ 'url': 'http://zonevideo.telequebec.tv/media/30261',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://coucou.telequebec.tv/videos/41788/idee-de-genie/l-heure-du-bain',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ media_id = self._match_id(url)
+ media = self._download_json(
+ 'https://mnmedias.api.telequebec.tv/api/v3/media/' + media_id,
+ media_id)['media']
+ source_id = next(source_info['sourceId'] for source_info in media['streamInfos'] if source_info.get('source') == 'Brightcove')
+ info = self._brightcove_result(source_id, '22gPKdt7f')
+ product = media.get('product') or {}
+ season = product.get('season') or {}
+ info.update({
+ 'description': try_get(media, lambda x: x['descriptions'][-1]['text'], compat_str),
+ 'series': try_get(season, lambda x: x['serie']['titre']),
+ 'season': season.get('name'),
+ 'season_number': int_or_none(season.get('seasonNo')),
+ 'episode': product.get('titre'),
+ 'episode_number': int_or_none(product.get('episodeNo')),
+ })
+ return info
+
+
+class TeleQuebecSquatIE(InfoExtractor):
+ _VALID_URL = r'https?://squat\.telequebec\.tv/videos/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://squat.telequebec.tv/videos/9314',
+ 'info_dict': {
+ 'id': 'd59ae78112d542e793d83cc9d3a5b530',
+ 'ext': 'mp4',
+ 'title': 'Poupeflekta',
+ 'description': 'md5:2f0718f8d2f8fece1646ee25fb7bce75',
+ 'duration': 1351,
+ 'timestamp': 1569057600,
+ 'upload_date': '20190921',
+ 'series': 'Miraculous : Les Aventures de Ladybug et Chat Noir',
+ 'season': 'Saison 3',
+ 'season_number': 3,
+ 'episode_number': 57,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video = self._download_json(
+ 'https://squat.api.telequebec.tv/v1/videos/%s' % video_id,
+ video_id)
+
+ media_id = video['sourceId']
+
+ return {
+ '_type': 'url_transparent',
+ 'url': 'http://zonevideo.telequebec.tv/media/%s' % media_id,
+ 'ie_key': TeleQuebecIE.ie_key(),
+ 'id': media_id,
+ 'title': video.get('titre'),
+ 'description': video.get('description'),
+ 'timestamp': unified_timestamp(video.get('datePublication')),
+ 'series': video.get('container'),
+ 'season': video.get('saison'),
+ 'season_number': int_or_none(video.get('noSaison')),
+ 'episode_number': int_or_none(video.get('episode')),
+ }
+
+
+class TeleQuebecEmissionIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ [^/]+\.telequebec\.tv/emissions/|
+ (?:www\.)?telequebec\.tv/
+ )
+ (?P<id>[^?#&]+)
+ '''
+ _TESTS = [{
+ 'url': 'http://lindicemcsween.telequebec.tv/emissions/100430013/des-soins-esthetiques-a-377-d-interets-annuels-ca-vous-tente',
+ 'info_dict': {
+ 'id': '6154476028001',
+ 'ext': 'mp4',
+ 'title': 'Des soins esthétiques à 377 % d’intérêts annuels, ça vous tente?',
+ 'description': 'md5:cb4d378e073fae6cce1f87c00f84ae9f',
+ 'upload_date': '20200505',
+ 'timestamp': 1588713424,
+ 'uploader_id': '6150020952001',
+ },
+ }, {
+ 'url': 'http://bancpublic.telequebec.tv/emissions/emission-49/31986/jeunes-meres-sous-pression',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.telequebec.tv/masha-et-michka/epi059masha-et-michka-3-053-078',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.telequebec.tv/documentaire/bebes-sur-mesure/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ media_id = self._search_regex(
+ r'mediaId\s*:\s*(?P<id>\d+)', webpage, 'media id')
+
+ return self.url_result(
+ 'http://zonevideo.telequebec.tv/media/' + media_id,
+ TeleQuebecIE.ie_key())
+
+
+class TeleQuebecLiveIE(TeleQuebecBaseIE):
+ _VALID_URL = r'https?://zonevideo\.telequebec\.tv/(?P<id>endirect)'
+ _TEST = {
+ 'url': 'http://zonevideo.telequebec.tv/endirect/',
+ 'info_dict': {
+ 'id': '6159095684001',
+ 'ext': 'mp4',
+ 'title': 're:^Télé-Québec [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'is_live': True,
+ 'description': 'Canal principal de Télé-Québec',
+ 'uploader_id': '6150020952001',
+ 'timestamp': 1590439901,
+ 'upload_date': '20200525',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ return self._brightcove_result('6159095684001', 'skCsmi2Uw')
+
+
+class TeleQuebecVideoIE(TeleQuebecBaseIE):
+ _VALID_URL = r'https?://video\.telequebec\.tv/player(?:-live)?/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://video.telequebec.tv/player/31110/stream',
+ 'info_dict': {
+ 'id': '6202570652001',
+ 'ext': 'mp4',
+ 'title': 'Le coût du véhicule le plus vendu au Canada / Tous les frais liés à la procréation assistée',
+ 'description': 'md5:685a7e4c450ba777c60adb6e71e41526',
+ 'upload_date': '20201019',
+ 'timestamp': 1603115930,
+ 'uploader_id': '6101674910001',
+ },
+ }, {
+ 'url': 'https://video.telequebec.tv/player-live/28527',
+ 'only_matching': True,
+ }]
+
+ def _call_api(self, path, video_id):
+ return self._download_json(
+ 'http://beacon.playback.api.brightcove.com/telequebec/api/assets/' + path,
+ video_id, query={'device_layout': 'web', 'device_type': 'web'})['data']
+
+ def _real_extract(self, url):
+ asset_id = self._match_id(url)
+ asset = self._call_api(asset_id, asset_id)['asset']
+ stream = self._call_api(
+ asset_id + '/streams/' + asset['streams'][0]['id'], asset_id)['stream']
+ stream_url = stream['url']
+ account_id = try_get(
+ stream, lambda x: x['video_provider_details']['account_id']) or '6101674910001'
+ info = self._brightcove_result(stream_url, 'default', account_id)
+ info.update({
+ 'description': asset.get('long_description') or asset.get('short_description'),
+ 'series': asset.get('series_original_name'),
+ 'season_number': int_or_none(asset.get('season_number')),
+ 'episode': asset.get('original_name'),
+ 'episode_number': int_or_none(asset.get('episode_number')),
+ })
+ return info
diff --git a/yt_dlp/extractor/teletask.py b/yt_dlp/extractor/teletask.py
new file mode 100644
index 0000000..fd831f5
--- /dev/null
+++ b/yt_dlp/extractor/teletask.py
@@ -0,0 +1,52 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import unified_strdate
+
+
+class TeleTaskIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:www\.)?tele-task\.de/archive/video/html5/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://www.tele-task.de/archive/video/html5/26168/',
+ 'info_dict': {
+ 'id': '26168',
+ 'title': 'Duplicate Detection',
+ },
+ 'playlist': [{
+ 'md5': '290ef69fb2792e481169c3958dbfbd57',
+ 'info_dict': {
+ 'id': '26168-speaker',
+ 'ext': 'mp4',
+ 'title': 'Duplicate Detection',
+ 'upload_date': '20141218',
+ }
+ }, {
+ 'md5': 'e1e7218c5f0e4790015a437fcf6c71b4',
+ 'info_dict': {
+ 'id': '26168-slides',
+ 'ext': 'mp4',
+ 'title': 'Duplicate Detection',
+ 'upload_date': '20141218',
+ }
+ }]
+ }
+
+ def _real_extract(self, url):
+ lecture_id = self._match_id(url)
+ webpage = self._download_webpage(url, lecture_id)
+
+ title = self._html_search_regex(
+ r'itemprop="name">([^<]+)</a>', webpage, 'title')
+ upload_date = unified_strdate(self._html_search_regex(
+ r'Date:</td><td>([^<]+)</td>', webpage, 'date', fatal=False))
+
+ entries = [{
+ 'id': '%s-%s' % (lecture_id, format_id),
+ 'url': video_url,
+ 'title': title,
+ 'upload_date': upload_date,
+ } for format_id, video_url in re.findall(
+ r'<video class="([^"]+)"[^>]*>\s*<source src="([^"]+)"', webpage)]
+
+ return self.playlist_result(entries, lecture_id, title)
diff --git a/yt_dlp/extractor/telewebion.py b/yt_dlp/extractor/telewebion.py
new file mode 100644
index 0000000..9378ed0
--- /dev/null
+++ b/yt_dlp/extractor/telewebion.py
@@ -0,0 +1,133 @@
+from __future__ import annotations
+
+import json
+from functools import partial
+from textwrap import dedent
+
+from .common import InfoExtractor
+from ..utils import ExtractorError, format_field, int_or_none, parse_iso8601
+from ..utils.traversal import traverse_obj
+
+
+def _fmt_url(url):
+ return partial(format_field, template=url, default=None)
+
+
+class TelewebionIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?telewebion\.com/episode/(?P<id>(?:0x[a-fA-F\d]+|\d+))'
+ _TESTS = [{
+ 'url': 'http://www.telewebion.com/episode/0x1b3139c/',
+ 'info_dict': {
+ 'id': '0x1b3139c',
+ 'ext': 'mp4',
+ 'title': 'قرعه‌کشی لیگ قهرمانان اروپا',
+ 'series': '+ فوتبال',
+ 'series_id': '0x1b2505c',
+ 'channel': 'شبکه 3',
+ 'channel_id': '0x1b1a761',
+ 'channel_url': 'https://telewebion.com/live/tv3',
+ 'timestamp': 1425522414,
+ 'upload_date': '20150305',
+ 'release_timestamp': 1425517020,
+ 'release_date': '20150305',
+ 'duration': 420,
+ 'view_count': int,
+ 'tags': ['ورزشی', 'لیگ اروپا', 'اروپا'],
+ 'thumbnail': 'https://static.telewebion.com/episodeImages/YjFhM2MxMDBkMDNiZTU0MjE5YjQ3ZDY0Mjk1ZDE0ZmUwZWU3OTE3OWRmMDAyODNhNzNkNjdmMWMzMWIyM2NmMA/default',
+ },
+ 'skip_download': 'm3u8',
+ }, {
+ 'url': 'https://telewebion.com/episode/162175536',
+ 'info_dict': {
+ 'id': '0x9aa9a30',
+ 'ext': 'mp4',
+ 'title': 'کارما یعنی این !',
+ 'series': 'پاورقی',
+ 'series_id': '0x29a7426',
+ 'channel': 'شبکه 2',
+ 'channel_id': '0x1b1a719',
+ 'channel_url': 'https://telewebion.com/live/tv2',
+ 'timestamp': 1699979968,
+ 'upload_date': '20231114',
+ 'release_timestamp': 1699991638,
+ 'release_date': '20231114',
+ 'duration': 78,
+ 'view_count': int,
+ 'tags': ['کلیپ های منتخب', ' کلیپ طنز ', ' کلیپ سیاست ', 'پاورقی', 'ویژه فلسطین'],
+ 'thumbnail': 'https://static.telewebion.com/episodeImages/871e9455-7567-49a5-9648-34c22c197f5f/default',
+ },
+ 'skip_download': 'm3u8',
+ }]
+
+ def _call_graphql_api(
+ self, operation, video_id, query,
+ variables: dict[str, tuple[str, str]] | None = None,
+ note='Downloading GraphQL JSON metadata',
+ ):
+ parameters = ''
+ if variables:
+ parameters = ', '.join(f'${name}: {type_}' for name, (type_, _) in variables.items())
+ parameters = f'({parameters})'
+
+ result = self._download_json('https://graph.telewebion.com/graphql', video_id, note, data=json.dumps({
+ 'operationName': operation,
+ 'query': f'query {operation}{parameters} @cacheControl(maxAge: 60) {{{query}\n}}\n',
+ 'variables': {name: value for name, (_, value) in (variables or {}).items()}
+ }, separators=(',', ':')).encode(), headers={
+ 'Content-Type': 'application/json',
+ 'Accept': 'application/json',
+ })
+ if not result or traverse_obj(result, 'errors'):
+ message = ', '.join(traverse_obj(result, ('errors', ..., 'message', {str})))
+ raise ExtractorError(message or 'Unknown GraphQL API error')
+
+ return result['data']
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ if not video_id.startswith('0x'):
+ video_id = hex(int(video_id))
+
+ episode_data = self._call_graphql_api('getEpisodeDetail', video_id, dedent('''
+ queryEpisode(filter: {EpisodeID: $EpisodeId}, first: 1) {
+ title
+ program {
+ ProgramID
+ title
+ }
+ image
+ view_count
+ duration
+ started_at
+ created_at
+ channel {
+ ChannelID
+ name
+ descriptor
+ }
+ tags {
+ name
+ }
+ }
+ '''), {'EpisodeId': ('[ID!]', video_id)})
+
+ info_dict = traverse_obj(episode_data, ('queryEpisode', 0, {
+ 'title': ('title', {str}),
+ 'view_count': ('view_count', {int_or_none}),
+ 'duration': ('duration', {int_or_none}),
+ 'tags': ('tags', ..., 'name', {str}),
+ 'release_timestamp': ('started_at', {parse_iso8601}),
+ 'timestamp': ('created_at', {parse_iso8601}),
+ 'series': ('program', 'title', {str}),
+ 'series_id': ('program', 'ProgramID', {str}),
+ 'channel': ('channel', 'name', {str}),
+ 'channel_id': ('channel', 'ChannelID', {str}),
+ 'channel_url': ('channel', 'descriptor', {_fmt_url('https://telewebion.com/live/%s')}),
+ 'thumbnail': ('image', {_fmt_url('https://static.telewebion.com/episodeImages/%s/default')}),
+ 'formats': (
+ 'channel', 'descriptor', {str},
+ {_fmt_url(f'https://cdna.telewebion.com/%s/episode/{video_id}/playlist.m3u8')},
+ {partial(self._extract_m3u8_formats, video_id=video_id, ext='mp4', m3u8_id='hls')}),
+ }))
+ info_dict['id'] = video_id
+ return info_dict
diff --git a/yt_dlp/extractor/tempo.py b/yt_dlp/extractor/tempo.py
new file mode 100644
index 0000000..9318d6f
--- /dev/null
+++ b/yt_dlp/extractor/tempo.py
@@ -0,0 +1,114 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+ traverse_obj,
+ try_call
+)
+
+
+class IVXPlayerIE(InfoExtractor):
+ _VALID_URL = r'ivxplayer:(?P<video_id>\d+):(?P<player_key>\w+)'
+ _TESTS = [{
+ 'url': 'ivxplayer:2366065:4a89dfe6bc8f002596b1dfbd600730b1',
+ 'info_dict': {
+ 'id': '2366065',
+ 'ext': 'mp4',
+ 'duration': 112,
+ 'upload_date': '20221204',
+ 'title': 'Film Indonesia di Disney Content Showcase Asia Pacific 2022',
+ 'timestamp': 1670151746,
+ 'thumbnail': 'https://ivx-image.ivideosmart.com/serve/image/video/2366065?width=300'
+ }
+ }]
+ _WEBPAGE_TESTS = [{
+ 'url': 'https://www.cantika.com/video/31737/film-indonesia-di-disney-content-showcase-asia-pacific-2022',
+ 'info_dict': {
+ 'id': '2374200',
+ 'ext': 'mp4',
+ 'duration': 110,
+ 'title': 'Serial Indonesia di Disney Content Showcase Asia Pacific 2022',
+ 'timestamp': 1670639416,
+ 'upload_date': '20221210',
+ 'thumbnail': 'https://ivx-image.ivideosmart.com/serve/image/video/2374200?width=300'
+ }
+ }, {
+ 'url': 'https://www.gooto.com/video/11437/wuling-suv-ramai-dikunjungi-di-giias-2018',
+ 'info_dict': {
+ 'id': '892109',
+ 'ext': 'mp4',
+ 'title': 'Wuling SUV Ramai Dikunjungi di GIIAS 2018',
+ 'upload_date': '20180811',
+ 'description': 'md5:6d901483d0aacc664aecb4489719aafa',
+ 'duration': 75,
+ 'timestamp': 1534011263,
+ 'thumbnail': 'https://ivx-image.ivideosmart.com/serve/image/video/892109?width=300'
+ }
+ }]
+
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ # more info at https://player.ivideosmart.com/ivsplayer/v4/dist/js/loader.js
+ mobj = re.search(
+ r'<ivs-player\s*[^>]+data-ivs-key\s*=\s*"(?P<player_key>[\w]+)\s*[^>]+\bdata-ivs-vid="(?P<video_id>[\w-]+)',
+ webpage)
+ if mobj:
+ yield f'ivxplayer:{mobj.group("video_id")}:{mobj.group("player_key")}'
+ raise cls.StopExtraction()
+
+ def _real_extract(self, url):
+ video_id, player_key = self._match_valid_url(url).group('video_id', 'player_key')
+ json_data = self._download_json(
+ f'https://ivxplayer.ivideosmart.com/prod/video/{video_id}?key={player_key}', video_id)
+
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ json_data['player']['video_url'], video_id)
+
+ return {
+ 'id': str(json_data['ivx']['id']),
+ 'title': traverse_obj(json_data, ('ivx', 'name')),
+ 'description': traverse_obj(json_data, ('ivx', 'description')),
+ 'duration': int_or_none(traverse_obj(json_data, ('ivx', 'duration'))),
+ 'timestamp': parse_iso8601(traverse_obj(json_data, ('ivx', 'published_at'))),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnail': traverse_obj(json_data, ('ivx', 'thumbnail_url'))
+ }
+
+
+class TempoIE(InfoExtractor):
+ _VALID_URL = r'https?://video\.tempo\.co/\w+/\d+/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://video.tempo.co/read/30058/anies-baswedan-ajukan-banding-putusan-ptun-batalkan-ump-dki',
+ 'info_dict': {
+ 'id': '2144275',
+ 'display_id': 'anies-baswedan-ajukan-banding-putusan-ptun-batalkan-ump-dki',
+ 'ext': 'mp4',
+ 'title': 'Anies Baswedan Ajukan Banding Putusan PTUN Batalkan UMP DKI',
+ 'duration': 85,
+ 'description': 'md5:a6822b7c4c874fa7e5bd63e96a387b66',
+ 'thumbnail': 'https://statik.tempo.co/data/2022/07/27/id_1128287/1128287_720.jpg',
+ 'timestamp': 1658907970,
+ 'upload_date': '20220727',
+ 'tags': ['Anies Baswedan', ' PTUN', ' PTUN | Pengadilan Tata Usaha Negara', ' PTUN Batalkan UMP DKI', ' UMP DKI'],
+ }
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ _, video_id, player_key = next(IVXPlayerIE._extract_embed_urls(url, webpage)).split(':')
+
+ json_ld_data = self._search_json_ld(webpage, display_id)
+
+ return self.url_result(
+ f'ivxplayer:{video_id}:{player_key}', display_id=display_id,
+ thumbnail=self._html_search_meta('twitter:image:src', webpage) or self._og_search_thumbnail(webpage),
+ tags=try_call(lambda: self._html_search_meta('keywords', webpage).split(',')),
+ description=(json_ld_data.get('description')
+ or self._html_search_meta(('description', 'twitter:description'), webpage)
+ or self._og_search_description(webpage)),
+ url_transparent=True)
diff --git a/yt_dlp/extractor/tencent.py b/yt_dlp/extractor/tencent.py
new file mode 100644
index 0000000..6618ea4
--- /dev/null
+++ b/yt_dlp/extractor/tencent.py
@@ -0,0 +1,490 @@
+import functools
+import random
+import re
+import string
+import time
+
+from .common import InfoExtractor
+from ..aes import aes_cbc_encrypt_bytes
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ determine_ext,
+ int_or_none,
+ js_to_json,
+ traverse_obj,
+ urljoin,
+)
+
+
+class TencentBaseIE(InfoExtractor):
+ """Subclasses must set _API_URL, _APP_VERSION, _PLATFORM, _HOST, _REFERER"""
+
+ def _check_api_response(self, api_response):
+ msg = api_response.get('msg')
+ if api_response.get('code') != '0.0' and msg is not None:
+ if msg in (
+ '您所在区域暂无此内容版权(如设置VPN请关闭后重试)',
+ 'This content is not available in your area due to copyright restrictions. Please choose other videos.'
+ ):
+ self.raise_geo_restricted()
+ raise ExtractorError(f'Tencent said: {msg}')
+
+ def _get_ckey(self, video_id, url, guid):
+ ua = self.get_param('http_headers')['User-Agent']
+
+ payload = (f'{video_id}|{int(time.time())}|mg3c3b04ba|{self._APP_VERSION}|{guid}|'
+ f'{self._PLATFORM}|{url[:48]}|{ua.lower()[:48]}||Mozilla|Netscape|Windows x86_64|00|')
+
+ return aes_cbc_encrypt_bytes(
+ bytes(f'|{sum(map(ord, payload))}|{payload}', 'utf-8'),
+ b'Ok\xda\xa3\x9e/\x8c\xb0\x7f^r-\x9e\xde\xf3\x14',
+ b'\x01PJ\xf3V\xe6\x19\xcf.B\xbb\xa6\x8c?p\xf9',
+ padding_mode='whitespace').hex().upper()
+
+ def _get_video_api_response(self, video_url, video_id, series_id, subtitle_format, video_format, video_quality):
+ guid = ''.join(random.choices(string.digits + string.ascii_lowercase, k=16))
+ ckey = self._get_ckey(video_id, video_url, guid)
+ query = {
+ 'vid': video_id,
+ 'cid': series_id,
+ 'cKey': ckey,
+ 'encryptVer': '8.1',
+ 'spcaptiontype': '1' if subtitle_format == 'vtt' else '0',
+ 'sphls': '2' if video_format == 'hls' else '0',
+ 'dtype': '3' if video_format == 'hls' else '0',
+ 'defn': video_quality,
+ 'spsrt': '2', # Enable subtitles
+ 'sphttps': '1', # Enable HTTPS
+ 'otype': 'json',
+ 'spwm': '1',
+ 'hevclv': '28', # Enable HEVC
+ 'drm': '40', # Enable DRM
+ # For HDR
+ 'spvideo': '4',
+ 'spsfrhdr': '100',
+ # For SHD
+ 'host': self._HOST,
+ 'referer': self._REFERER,
+ 'ehost': video_url,
+ 'appVer': self._APP_VERSION,
+ 'platform': self._PLATFORM,
+ # For VQQ
+ 'guid': guid,
+ 'flowid': ''.join(random.choices(string.digits + string.ascii_lowercase, k=32)),
+ }
+
+ return self._search_json(r'QZOutputJson=', self._download_webpage(
+ self._API_URL, video_id, query=query), 'api_response', video_id)
+
+ def _extract_video_formats_and_subtitles(self, api_response, video_id):
+ video_response = api_response['vl']['vi'][0]
+
+ formats, subtitles = [], {}
+ for video_format in video_response['ul']['ui']:
+ if video_format.get('hls') or determine_ext(video_format['url']) == 'm3u8':
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ video_format['url'] + traverse_obj(video_format, ('hls', 'pt'), default=''),
+ video_id, 'mp4', fatal=False)
+
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+ else:
+ formats.append({
+ 'url': f'{video_format["url"]}{video_response["fn"]}?vkey={video_response["fvkey"]}',
+ 'ext': 'mp4',
+ })
+
+ identifier = video_response.get('br')
+ format_response = traverse_obj(
+ api_response, ('fl', 'fi', lambda _, v: v['br'] == identifier),
+ expected_type=dict, get_all=False) or {}
+ common_info = {
+ 'width': video_response.get('vw'),
+ 'height': video_response.get('vh'),
+ 'abr': float_or_none(format_response.get('audiobandwidth'), scale=1000),
+ 'vbr': float_or_none(format_response.get('bandwidth'), scale=1000),
+ 'fps': format_response.get('vfps'),
+ 'format': format_response.get('sname'),
+ 'format_id': format_response.get('name'),
+ 'format_note': format_response.get('resolution'),
+ 'dynamic_range': {'hdr10': 'hdr10'}.get(format_response.get('name'), 'sdr'),
+ 'has_drm': format_response.get('drm', 0) != 0,
+ }
+ for f in formats:
+ f.update(common_info)
+
+ return formats, subtitles
+
+ def _extract_video_native_subtitles(self, api_response):
+ subtitles = {}
+ for subtitle in traverse_obj(api_response, ('sfl', 'fi')) or ():
+ subtitles.setdefault(subtitle['lang'].lower(), []).append({
+ 'url': subtitle['url'],
+ 'ext': 'srt' if subtitle.get('captionType') == 1 else 'vtt',
+ 'protocol': 'm3u8_native' if determine_ext(subtitle['url']) == 'm3u8' else 'http',
+ })
+
+ return subtitles
+
+ def _extract_all_video_formats_and_subtitles(self, url, video_id, series_id):
+ api_responses = [self._get_video_api_response(url, video_id, series_id, 'srt', 'hls', 'hd')]
+ self._check_api_response(api_responses[0])
+ qualities = traverse_obj(api_responses, (0, 'fl', 'fi', ..., 'name')) or ('shd', 'fhd')
+ for q in qualities:
+ if q not in ('ld', 'sd', 'hd'):
+ api_responses.append(self._get_video_api_response(
+ url, video_id, series_id, 'vtt', 'hls', q))
+ self._check_api_response(api_responses[-1])
+
+ formats, subtitles = [], {}
+ for api_response in api_responses:
+ fmts, subs = self._extract_video_formats_and_subtitles(api_response, video_id)
+ native_subtitles = self._extract_video_native_subtitles(api_response)
+
+ formats.extend(fmts)
+ self._merge_subtitles(subs, native_subtitles, target=subtitles)
+
+ return formats, subtitles
+
+ def _get_clean_title(self, title):
+ return re.sub(
+ r'\s*[_\-]\s*(?:Watch online|Watch HD Video Online|WeTV|腾讯视频|(?:高清)?1080P在线观看平台).*?$',
+ '', title or '').strip() or None
+
+
+class VQQBaseIE(TencentBaseIE):
+ _VALID_URL_BASE = r'https?://v\.qq\.com'
+
+ _API_URL = 'https://h5vv6.video.qq.com/getvinfo'
+ _APP_VERSION = '3.5.57'
+ _PLATFORM = '10901'
+ _HOST = 'v.qq.com'
+ _REFERER = 'v.qq.com'
+
+ def _get_webpage_metadata(self, webpage, video_id):
+ return self._search_json(
+ r'<script[^>]*>[^<]*window\.__(?:pinia|PINIA__)\s*=',
+ webpage, 'pinia data', video_id, transform_source=js_to_json, fatal=False)
+
+
+class VQQVideoIE(VQQBaseIE):
+ IE_NAME = 'vqq:video'
+ _VALID_URL = VQQBaseIE._VALID_URL_BASE + r'/x/(?:page|cover/(?P<series_id>\w+))/(?P<id>\w+)'
+
+ _TESTS = [{
+ 'url': 'https://v.qq.com/x/page/q326831cny0.html',
+ 'md5': 'b11c9cb781df710d686b950376676e2a',
+ 'info_dict': {
+ 'id': 'q326831cny0',
+ 'ext': 'mp4',
+ 'title': '我是选手:雷霆裂阵,终极时刻',
+ 'description': 'md5:e7ed70be89244017dac2a835a10aeb1e',
+ 'thumbnail': r're:^https?://[^?#]+q326831cny0',
+ 'format_id': r're:^shd',
+ },
+ }, {
+ 'url': 'https://v.qq.com/x/page/o3013za7cse.html',
+ 'md5': 'a1bcf42c6d28c189bd2fe2d468abb287',
+ 'info_dict': {
+ 'id': 'o3013za7cse',
+ 'ext': 'mp4',
+ 'title': '欧阳娜娜VLOG',
+ 'description': 'md5:29fe847497a98e04a8c3826e499edd2e',
+ 'thumbnail': r're:^https?://[^?#]+o3013za7cse',
+ 'format_id': r're:^shd',
+ },
+ }, {
+ 'url': 'https://v.qq.com/x/cover/7ce5noezvafma27/a00269ix3l8.html',
+ 'md5': '87968df6238a65d2478f19c25adf850b',
+ 'info_dict': {
+ 'id': 'a00269ix3l8',
+ 'ext': 'mp4',
+ 'title': '鸡毛飞上天 第01集',
+ 'description': 'md5:8cae3534327315b3872fbef5e51b5c5b',
+ 'thumbnail': r're:^https?://[^?#]+7ce5noezvafma27',
+ 'series': '鸡毛飞上天',
+ 'format_id': r're:^shd',
+ },
+ 'skip': '404',
+ }, {
+ 'url': 'https://v.qq.com/x/cover/mzc00200p29k31e/s0043cwsgj0.html',
+ 'md5': 'fadd10bf88aec3420f06f19ee1d24c5b',
+ 'info_dict': {
+ 'id': 's0043cwsgj0',
+ 'ext': 'mp4',
+ 'title': '第1集:如何快乐吃糖?',
+ 'description': 'md5:1d8c3a0b8729ae3827fa5b2d3ebd5213',
+ 'thumbnail': r're:^https?://[^?#]+s0043cwsgj0',
+ 'series': '青年理工工作者生活研究所',
+ 'format_id': r're:^shd',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ # Geo-restricted to China
+ 'url': 'https://v.qq.com/x/cover/mcv8hkc8zk8lnov/x0036x5qqsr.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id, series_id = self._match_valid_url(url).group('id', 'series_id')
+ webpage = self._download_webpage(url, video_id)
+ webpage_metadata = self._get_webpage_metadata(webpage, video_id)
+
+ formats, subtitles = self._extract_all_video_formats_and_subtitles(url, video_id, series_id)
+ return {
+ 'id': video_id,
+ 'title': self._get_clean_title(self._og_search_title(webpage)
+ or traverse_obj(webpage_metadata, ('global', 'videoInfo', 'title'))),
+ 'description': (self._og_search_description(webpage)
+ or traverse_obj(webpage_metadata, ('global', 'videoInfo', 'desc'))),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnail': (self._og_search_thumbnail(webpage)
+ or traverse_obj(webpage_metadata, ('global', 'videoInfo', 'pic160x90'))),
+ 'series': traverse_obj(webpage_metadata, ('global', 'coverInfo', 'title')),
+ }
+
+
+class VQQSeriesIE(VQQBaseIE):
+ IE_NAME = 'vqq:series'
+ _VALID_URL = VQQBaseIE._VALID_URL_BASE + r'/x/cover/(?P<id>\w+)\.html/?(?:[?#]|$)'
+
+ _TESTS = [{
+ 'url': 'https://v.qq.com/x/cover/7ce5noezvafma27.html',
+ 'info_dict': {
+ 'id': '7ce5noezvafma27',
+ 'title': '鸡毛飞上天',
+ 'description': 'md5:8cae3534327315b3872fbef5e51b5c5b',
+ },
+ 'playlist_count': 55,
+ }, {
+ 'url': 'https://v.qq.com/x/cover/oshd7r0vy9sfq8e.html',
+ 'info_dict': {
+ 'id': 'oshd7r0vy9sfq8e',
+ 'title': '恋爱细胞2',
+ 'description': 'md5:9d8a2245679f71ca828534b0f95d2a03',
+ },
+ 'playlist_count': 12,
+ }]
+
+ def _real_extract(self, url):
+ series_id = self._match_id(url)
+ webpage = self._download_webpage(url, series_id)
+ webpage_metadata = self._get_webpage_metadata(webpage, series_id)
+
+ episode_paths = [f'/x/cover/{series_id}/{video_id}.html' for video_id in re.findall(
+ r'<div[^>]+data-vid="(?P<video_id>[^"]+)"[^>]+class="[^"]+episode-item-rect--number',
+ webpage)]
+
+ return self.playlist_from_matches(
+ episode_paths, series_id, ie=VQQVideoIE, getter=functools.partial(urljoin, url),
+ title=self._get_clean_title(traverse_obj(webpage_metadata, ('coverInfo', 'title'))
+ or self._og_search_title(webpage)),
+ description=(traverse_obj(webpage_metadata, ('coverInfo', 'description'))
+ or self._og_search_description(webpage)))
+
+
+class WeTvBaseIE(TencentBaseIE):
+ _VALID_URL_BASE = r'https?://(?:www\.)?wetv\.vip/(?:[^?#]+/)?play'
+
+ _API_URL = 'https://play.wetv.vip/getvinfo'
+ _APP_VERSION = '3.5.57'
+ _PLATFORM = '4830201'
+ _HOST = 'wetv.vip'
+ _REFERER = 'wetv.vip'
+
+ def _get_webpage_metadata(self, webpage, video_id):
+ return self._parse_json(
+ traverse_obj(self._search_nextjs_data(webpage, video_id), ('props', 'pageProps', 'data')),
+ video_id, fatal=False)
+
+ def _extract_episode(self, url):
+ video_id, series_id = self._match_valid_url(url).group('id', 'series_id')
+ webpage = self._download_webpage(url, video_id)
+ webpage_metadata = self._get_webpage_metadata(webpage, video_id)
+
+ formats, subtitles = self._extract_all_video_formats_and_subtitles(url, video_id, series_id)
+ return {
+ 'id': video_id,
+ 'title': self._get_clean_title(self._og_search_title(webpage)
+ or traverse_obj(webpage_metadata, ('coverInfo', 'title'))),
+ 'description': (traverse_obj(webpage_metadata, ('coverInfo', 'description'))
+ or self._og_search_description(webpage)),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'duration': int_or_none(traverse_obj(webpage_metadata, ('videoInfo', 'duration'))),
+ 'series': traverse_obj(webpage_metadata, ('coverInfo', 'title')),
+ 'episode_number': int_or_none(traverse_obj(webpage_metadata, ('videoInfo', 'episode'))),
+ }
+
+ def _extract_series(self, url, ie):
+ series_id = self._match_id(url)
+ webpage = self._download_webpage(url, series_id)
+ webpage_metadata = self._get_webpage_metadata(webpage, series_id)
+
+ episode_paths = ([f'/play/{series_id}/{episode["vid"]}' for episode in webpage_metadata.get('videoList')]
+ or re.findall(r'<a[^>]+class="play-video__link"[^>]+href="(?P<path>[^"]+)', webpage))
+
+ return self.playlist_from_matches(
+ episode_paths, series_id, ie=ie, getter=functools.partial(urljoin, url),
+ title=self._get_clean_title(traverse_obj(webpage_metadata, ('coverInfo', 'title'))
+ or self._og_search_title(webpage)),
+ description=(traverse_obj(webpage_metadata, ('coverInfo', 'description'))
+ or self._og_search_description(webpage)))
+
+
+class WeTvEpisodeIE(WeTvBaseIE):
+ IE_NAME = 'wetv:episode'
+ _VALID_URL = WeTvBaseIE._VALID_URL_BASE + r'/(?P<series_id>\w+)(?:-[^?#]+)?/(?P<id>\w+)(?:-[^?#]+)?'
+
+ _TESTS = [{
+ 'url': 'https://wetv.vip/en/play/air11ooo2rdsdi3-Cute-Programmer/v0040pr89t9-EP1-Cute-Programmer',
+ 'md5': '0c70fdfaa5011ab022eebc598e64bbbe',
+ 'info_dict': {
+ 'id': 'v0040pr89t9',
+ 'ext': 'mp4',
+ 'title': 'EP1: Cute Programmer',
+ 'description': 'md5:e87beab3bf9f392d6b9e541a63286343',
+ 'thumbnail': r're:^https?://[^?#]+air11ooo2rdsdi3',
+ 'series': 'Cute Programmer',
+ 'episode': 'Episode 1',
+ 'episode_number': 1,
+ 'duration': 2835,
+ 'format_id': r're:^shd',
+ },
+ }, {
+ 'url': 'https://wetv.vip/en/play/u37kgfnfzs73kiu/p0039b9nvik',
+ 'md5': '3b3c15ca4b9a158d8d28d5aa9d7c0a49',
+ 'info_dict': {
+ 'id': 'p0039b9nvik',
+ 'ext': 'mp4',
+ 'title': 'EP1: You Are My Glory',
+ 'description': 'md5:831363a4c3b4d7615e1f3854be3a123b',
+ 'thumbnail': r're:^https?://[^?#]+u37kgfnfzs73kiu',
+ 'series': 'You Are My Glory',
+ 'episode': 'Episode 1',
+ 'episode_number': 1,
+ 'duration': 2454,
+ 'format_id': r're:^shd',
+ },
+ }, {
+ 'url': 'https://wetv.vip/en/play/lcxgwod5hapghvw-WeTV-PICK-A-BOO/i0042y00lxp-Zhao-Lusi-Describes-The-First-Experiences-She-Had-In-Who-Rules-The-World-%7C-WeTV-PICK-A-BOO',
+ 'md5': '71133f5c2d5d6cad3427e1b010488280',
+ 'info_dict': {
+ 'id': 'i0042y00lxp',
+ 'ext': 'mp4',
+ 'title': 'md5:f7a0857dbe5fbbe2e7ad630b92b54e6a',
+ 'description': 'md5:76260cb9cdc0ef76826d7ca9d92fadfa',
+ 'thumbnail': r're:^https?://[^?#]+i0042y00lxp',
+ 'series': 'WeTV PICK-A-BOO',
+ 'episode': 'Episode 0',
+ 'episode_number': 0,
+ 'duration': 442,
+ 'format_id': r're:^shd',
+ },
+ }]
+
+ def _real_extract(self, url):
+ return self._extract_episode(url)
+
+
+class WeTvSeriesIE(WeTvBaseIE):
+ _VALID_URL = WeTvBaseIE._VALID_URL_BASE + r'/(?P<id>\w+)(?:-[^/?#]+)?/?(?:[?#]|$)'
+
+ _TESTS = [{
+ 'url': 'https://wetv.vip/play/air11ooo2rdsdi3-Cute-Programmer',
+ 'info_dict': {
+ 'id': 'air11ooo2rdsdi3',
+ 'title': 'Cute Programmer',
+ 'description': 'md5:e87beab3bf9f392d6b9e541a63286343',
+ },
+ 'playlist_count': 30,
+ }, {
+ 'url': 'https://wetv.vip/en/play/u37kgfnfzs73kiu-You-Are-My-Glory',
+ 'info_dict': {
+ 'id': 'u37kgfnfzs73kiu',
+ 'title': 'You Are My Glory',
+ 'description': 'md5:831363a4c3b4d7615e1f3854be3a123b',
+ },
+ 'playlist_count': 32,
+ }]
+
+ def _real_extract(self, url):
+ return self._extract_series(url, WeTvEpisodeIE)
+
+
+class IflixBaseIE(WeTvBaseIE):
+ _VALID_URL_BASE = r'https?://(?:www\.)?iflix\.com/(?:[^?#]+/)?play'
+
+ _API_URL = 'https://vplay.iflix.com/getvinfo'
+ _APP_VERSION = '3.5.57'
+ _PLATFORM = '330201'
+ _HOST = 'www.iflix.com'
+ _REFERER = 'www.iflix.com'
+
+
+class IflixEpisodeIE(IflixBaseIE):
+ IE_NAME = 'iflix:episode'
+ _VALID_URL = IflixBaseIE._VALID_URL_BASE + r'/(?P<series_id>\w+)(?:-[^?#]+)?/(?P<id>\w+)(?:-[^?#]+)?'
+
+ _TESTS = [{
+ 'url': 'https://www.iflix.com/en/play/daijrxu03yypu0s/a0040kvgaza',
+ 'md5': '9740f9338c3a2105290d16b68fb3262f',
+ 'info_dict': {
+ 'id': 'a0040kvgaza',
+ 'ext': 'mp4',
+ 'title': 'EP1: Put Your Head On My Shoulder 2021',
+ 'description': 'md5:c095a742d3b7da6dfedd0c8170727a42',
+ 'thumbnail': r're:^https?://[^?#]+daijrxu03yypu0s',
+ 'series': 'Put Your Head On My Shoulder 2021',
+ 'episode': 'Episode 1',
+ 'episode_number': 1,
+ 'duration': 2639,
+ 'format_id': r're:^shd',
+ },
+ }, {
+ 'url': 'https://www.iflix.com/en/play/fvvrcc3ra9lbtt1-Take-My-Brother-Away/i0029sd3gm1-EP1%EF%BC%9ATake-My-Brother-Away',
+ 'md5': '375c9b8478fdedca062274b2c2f53681',
+ 'info_dict': {
+ 'id': 'i0029sd3gm1',
+ 'ext': 'mp4',
+ 'title': 'EP1:Take My Brother Away',
+ 'description': 'md5:f0f7be1606af51cd94d5627de96b0c76',
+ 'thumbnail': r're:^https?://[^?#]+fvvrcc3ra9lbtt1',
+ 'series': 'Take My Brother Away',
+ 'episode': 'Episode 1',
+ 'episode_number': 1,
+ 'duration': 228,
+ 'format_id': r're:^shd',
+ },
+ }]
+
+ def _real_extract(self, url):
+ return self._extract_episode(url)
+
+
+class IflixSeriesIE(IflixBaseIE):
+ _VALID_URL = IflixBaseIE._VALID_URL_BASE + r'/(?P<id>\w+)(?:-[^/?#]+)?/?(?:[?#]|$)'
+
+ _TESTS = [{
+ 'url': 'https://www.iflix.com/en/play/g21a6qk4u1s9x22-You-Are-My-Hero',
+ 'info_dict': {
+ 'id': 'g21a6qk4u1s9x22',
+ 'title': 'You Are My Hero',
+ 'description': 'md5:9c4d844bc0799cd3d2b5aed758a2050a',
+ },
+ 'playlist_count': 40,
+ }, {
+ 'url': 'https://www.iflix.com/play/0s682hc45t0ohll',
+ 'info_dict': {
+ 'id': '0s682hc45t0ohll',
+ 'title': 'Miss Gu Who Is Silent',
+ 'description': 'md5:a9651d0236f25af06435e845fa2f8c78',
+ },
+ 'playlist_count': 20,
+ }]
+
+ def _real_extract(self, url):
+ return self._extract_series(url, IflixEpisodeIE)
diff --git a/yt_dlp/extractor/tennistv.py b/yt_dlp/extractor/tennistv.py
new file mode 100644
index 0000000..c1b4a33
--- /dev/null
+++ b/yt_dlp/extractor/tennistv.py
@@ -0,0 +1,155 @@
+import urllib.parse
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ random_uuidv4,
+ unified_timestamp,
+ urlencode_postdata,
+)
+
+
+class TennisTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?tennistv\.com/videos/(?P<id>[-a-z0-9]+)'
+ _TESTS = [{
+ 'url': 'https://www.tennistv.com/videos/indian-wells-2018-verdasco-fritz',
+ 'info_dict': {
+ 'id': 'indian-wells-2018-verdasco-fritz',
+ 'ext': 'mp4',
+ 'title': 'Fernando Verdasco v Taylor Fritz',
+ 'description': 're:^After his stunning victory.{174}$',
+ 'thumbnail': 'https://atp-prod.akamaized.net/api/images/v1/images/112831/landscape/1242/0',
+ 'timestamp': 1521017381,
+ 'upload_date': '20180314',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'Requires email and password of a subscribed account',
+ }, {
+ 'url': 'https://www.tennistv.com/videos/2650480/best-matches-of-2022-part-5',
+ 'info_dict': {
+ 'id': '2650480',
+ 'ext': 'mp4',
+ 'title': 'Best Matches of 2022 - Part 5',
+ 'description': 'md5:36dec3bfae7ed74bd79e48045b17264c',
+ 'thumbnail': 'https://open.http.mp.streamamg.com/p/3001482/sp/300148200/thumbnail/entry_id/0_myef18pd/version/100001/height/1920',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ 'skip': 'Requires email and password of a subscribed account',
+ }]
+ _NETRC_MACHINE = 'tennistv'
+
+ access_token, refresh_token = None, None
+ _PARTNER_ID = 3001482
+ _FORMAT_URL = 'https://open.http.mp.streamamg.com/p/{partner}/sp/{partner}00/playManifest/entryId/{entry}/format/applehttp/protocol/https/a.m3u8?ks={session}'
+ _AUTH_BASE_URL = 'https://sso.tennistv.com/auth/realms/TennisTV/protocol/openid-connect'
+ _HEADERS = {
+ 'origin': 'https://www.tennistv.com',
+ 'referer': 'https://www.tennistv.com/',
+ 'content-Type': 'application/x-www-form-urlencoded'
+ }
+
+ def _perform_login(self, username, password):
+ login_page = self._download_webpage(
+ f'{self._AUTH_BASE_URL}/auth', None, 'Downloading login page',
+ query={
+ 'client_id': 'tennis-tv-web',
+ 'redirect_uri': 'https://tennistv.com',
+ 'response_mode': 'fragment',
+ 'response_type': 'code',
+ 'scope': 'openid'
+ })
+
+ post_url = self._html_search_regex(r'action=["\']([^"\']+?)["\']\s+method=["\']post["\']', login_page, 'login POST url')
+ temp_page = self._download_webpage(
+ post_url, None, 'Sending login data', 'Unable to send login data',
+ headers=self._HEADERS, data=urlencode_postdata({
+ 'username': username,
+ 'password': password,
+ 'submitAction': 'Log In'
+ }))
+ if 'Your username or password was incorrect' in temp_page:
+ raise ExtractorError('Your username or password was incorrect', expected=True)
+
+ handle = self._request_webpage(
+ f'{self._AUTH_BASE_URL}/auth', None, 'Logging in', headers=self._HEADERS,
+ query={
+ 'client_id': 'tennis-tv-web',
+ 'redirect_uri': 'https://www.tennistv.com/resources/v1.1.10/html/silent-check-sso.html',
+ 'state': random_uuidv4(),
+ 'response_mode': 'fragment',
+ 'response_type': 'code',
+ 'scope': 'openid',
+ 'nonce': random_uuidv4(),
+ 'prompt': 'none'
+ })
+
+ self.get_token(None, {
+ 'code': urllib.parse.parse_qs(handle.url)['code'][-1],
+ 'grant_type': 'authorization_code',
+ 'client_id': 'tennis-tv-web',
+ 'redirect_uri': 'https://www.tennistv.com/resources/v1.1.10/html/silent-check-sso.html'
+ })
+
+ def get_token(self, video_id, payload):
+ res = self._download_json(
+ f'{self._AUTH_BASE_URL}/token', video_id, 'Fetching tokens',
+ 'Unable to fetch tokens', headers=self._HEADERS, data=urlencode_postdata(payload))
+
+ self.access_token = res.get('access_token') or self.access_token
+ self.refresh_token = res.get('refresh_token') or self.refresh_token
+
+ def _real_initialize(self):
+ if self.access_token and self.refresh_token:
+ return
+
+ cookies = self._get_cookies('https://www.tennistv.com/')
+ if not cookies.get('access_token') or not cookies.get('refresh_token'):
+ self.raise_login_required()
+ self.access_token, self.refresh_token = cookies['access_token'].value, cookies['refresh_token'].value
+
+ def _download_session_json(self, video_id, entryid,):
+ return self._download_json(
+ f'https://atppayments.streamamg.com/api/v1/session/ksession/?lang=en&apijwttoken={self.access_token}&entryId={entryid}',
+ video_id, 'Downloading ksession token', 'Failed to download ksession token', headers=self._HEADERS)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ entryid = self._search_regex(r'data-entry-id=["\']([^"\']+)', webpage, 'entryID')
+ session_json = self._download_session_json(video_id, entryid)
+
+ k_session = session_json.get('KSession')
+ if k_session is None:
+ self.get_token(video_id, {
+ 'grant_type': 'refresh_token',
+ 'refresh_token': self.refresh_token,
+ 'client_id': 'tennis-tv-web'
+ })
+ k_session = self._download_session_json(video_id, entryid).get('KSession')
+ if k_session is None:
+ raise ExtractorError('Failed to get KSession, possibly a premium video', expected=True)
+
+ if session_json.get('ErrorMessage'):
+ self.report_warning(session_json['ErrorMessage'])
+
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ self._FORMAT_URL.format(partner=self._PARTNER_ID, entry=entryid, session=k_session), video_id)
+
+ return {
+ 'id': video_id,
+ 'title': self._generic_title('', webpage),
+ 'description': self._html_search_regex(
+ (r'<span itemprop="description" content=["\']([^"\']+)["\']>', *self._og_regexes('description')),
+ webpage, 'description', fatal=False),
+ 'thumbnail': f'https://open.http.mp.streamamg.com/p/{self._PARTNER_ID}/sp/{self._PARTNER_ID}00/thumbnail/entry_id/{entryid}/version/100001/height/1920',
+ 'timestamp': unified_timestamp(self._html_search_regex(
+ r'<span itemprop="uploadDate" content=["\']([^"\']+)["\']>', webpage, 'upload time', fatal=False)),
+ 'series': self._html_search_regex(r'data-series\s*?=\s*?"(.*?)"', webpage, 'series', fatal=False) or None,
+ 'season': self._html_search_regex(r'data-tournament-city\s*?=\s*?"(.*?)"', webpage, 'season', fatal=False) or None,
+ 'episode': self._html_search_regex(r'data-round\s*?=\s*?"(.*?)"', webpage, 'round', fatal=False) or None,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
diff --git a/yt_dlp/extractor/tenplay.py b/yt_dlp/extractor/tenplay.py
new file mode 100644
index 0000000..a98275d
--- /dev/null
+++ b/yt_dlp/extractor/tenplay.py
@@ -0,0 +1,170 @@
+import base64
+import functools
+import itertools
+from datetime import datetime
+
+from .common import InfoExtractor
+from ..networking import HEADRequest
+from ..utils import int_or_none, traverse_obj, urlencode_postdata, urljoin
+
+
+class TenPlayIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?10play\.com\.au/(?:[^/]+/)+(?P<id>tpv\d{6}[a-z]{5})'
+ _NETRC_MACHINE = '10play'
+ _TESTS = [{
+ 'url': 'https://10play.com.au/neighbours/web-extras/season-39/nathan-borg-is-the-first-aussie-actor-with-a-cochlear-implant-to-join-neighbours/tpv210128qupwd',
+ 'info_dict': {
+ 'id': '6226844312001',
+ 'ext': 'mp4',
+ 'title': 'Nathan Borg Is The First Aussie Actor With A Cochlear Implant To Join Neighbours',
+ 'alt_title': 'Nathan Borg Is The First Aussie Actor With A Cochlear Implant To Join Neighbours',
+ 'description': 'md5:a02d0199c901c2dd4c796f1e7dd0de43',
+ 'duration': 186,
+ 'season': 'Season 39',
+ 'season_number': 39,
+ 'series': 'Neighbours',
+ 'thumbnail': r're:https://.*\.jpg',
+ 'uploader': 'Channel 10',
+ 'age_limit': 15,
+ 'timestamp': 1611810000,
+ 'upload_date': '20210128',
+ 'uploader_id': '2199827728001',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'Only available in Australia',
+ }, {
+ 'url': 'https://10play.com.au/todd-sampsons-body-hack/episodes/season-4/episode-7/tpv200921kvngh',
+ 'info_dict': {
+ 'id': '6192880312001',
+ 'ext': 'mp4',
+ 'title': "Todd Sampson's Body Hack - S4 Ep. 2",
+ 'description': 'md5:fa278820ad90f08ea187f9458316ac74',
+ 'age_limit': 15,
+ 'timestamp': 1600770600,
+ 'upload_date': '20200922',
+ 'uploader': 'Channel 10',
+ 'uploader_id': '2199827728001'
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'https://10play.com.au/how-to-stay-married/web-extras/season-1/terrys-talks-ep-1-embracing-change/tpv190915ylupc',
+ 'only_matching': True,
+ }]
+ _GEO_BYPASS = False
+
+ _AUS_AGES = {
+ 'G': 0,
+ 'PG': 15,
+ 'M': 15,
+ 'MA': 15,
+ 'MA15+': 15,
+ 'R': 18,
+ 'X': 18
+ }
+
+ def _get_bearer_token(self, video_id):
+ username, password = self._get_login_info()
+ if username is None or password is None:
+ self.raise_login_required('Your 10play account\'s details must be provided with --username and --password.')
+ _timestamp = datetime.now().strftime('%Y%m%d000000')
+ _auth_header = base64.b64encode(_timestamp.encode('ascii')).decode('ascii')
+ data = self._download_json('https://10play.com.au/api/user/auth', video_id, 'Getting bearer token', headers={
+ 'X-Network-Ten-Auth': _auth_header,
+ }, data=urlencode_postdata({
+ 'email': username,
+ 'password': password,
+ }))
+ return 'Bearer ' + data['jwt']['accessToken']
+
+ def _real_extract(self, url):
+ content_id = self._match_id(url)
+ data = self._download_json(
+ 'https://10play.com.au/api/v1/videos/' + content_id, content_id)
+ headers = {}
+
+ if data.get('memberGated') is True:
+ _token = self._get_bearer_token(content_id)
+ headers = {'Authorization': _token}
+
+ _video_url = self._download_json(
+ data.get('playbackApiEndpoint'), content_id, 'Downloading video JSON',
+ headers=headers).get('source')
+ m3u8_url = self._request_webpage(HEADRequest(
+ _video_url), content_id).url
+ if '10play-not-in-oz' in m3u8_url:
+ self.raise_geo_restricted(countries=['AU'])
+ formats = self._extract_m3u8_formats(m3u8_url, content_id, 'mp4')
+
+ return {
+ 'formats': formats,
+ 'subtitles': {'en': [{'url': data.get('captionUrl')}]} if data.get('captionUrl') else None,
+ 'id': data.get('altId') or content_id,
+ 'duration': data.get('duration'),
+ 'title': data.get('subtitle'),
+ 'alt_title': data.get('title'),
+ 'description': data.get('description'),
+ 'age_limit': self._AUS_AGES.get(data.get('classification')),
+ 'series': data.get('tvShow'),
+ 'season_number': int_or_none(data.get('season')),
+ 'episode_number': int_or_none(data.get('episode')),
+ 'timestamp': data.get('published'),
+ 'thumbnail': data.get('imageUrl'),
+ 'uploader': 'Channel 10',
+ 'uploader_id': '2199827728001',
+ }
+
+
+class TenPlaySeasonIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?10play\.com\.au/(?P<show>[^/?#]+)/episodes/(?P<season>[^/?#]+)/?(?:$|[?#])'
+ _TESTS = [{
+ 'url': 'https://10play.com.au/masterchef/episodes/season-14',
+ 'info_dict': {
+ 'title': 'Season 14',
+ 'id': 'MjMyOTIy',
+ },
+ 'playlist_mincount': 64,
+ }, {
+ 'url': 'https://10play.com.au/the-bold-and-the-beautiful-fast-tracked/episodes/season-2022',
+ 'info_dict': {
+ 'title': 'Season 2022',
+ 'id': 'Mjc0OTIw',
+ },
+ 'playlist_mincount': 256,
+ }]
+
+ def _entries(self, load_more_url, display_id=None):
+ skip_ids = []
+ for page in itertools.count(1):
+ episodes_carousel = self._download_json(
+ load_more_url, display_id, query={'skipIds[]': skip_ids},
+ note=f'Fetching episodes page {page}')
+
+ episodes_chunk = episodes_carousel['items']
+ skip_ids.extend(ep['id'] for ep in episodes_chunk)
+
+ for ep in episodes_chunk:
+ yield ep['cardLink']
+ if not episodes_carousel['hasMore']:
+ break
+
+ def _real_extract(self, url):
+ show, season = self._match_valid_url(url).group('show', 'season')
+ season_info = self._download_json(
+ f'https://10play.com.au/api/shows/{show}/episodes/{season}', f'{show}/{season}')
+
+ episodes_carousel = traverse_obj(season_info, (
+ 'content', 0, 'components', (
+ lambda _, v: v['title'].lower() == 'episodes',
+ (..., {dict}),
+ )), get_all=False) or {}
+
+ playlist_id = episodes_carousel['tpId']
+
+ return self.playlist_from_matches(
+ self._entries(urljoin(url, episodes_carousel['loadMoreUrl']), playlist_id),
+ playlist_id, traverse_obj(season_info, ('content', 0, 'title', {str})),
+ getter=functools.partial(urljoin, url))
diff --git a/yt_dlp/extractor/testurl.py b/yt_dlp/extractor/testurl.py
new file mode 100644
index 0000000..3cf0017
--- /dev/null
+++ b/yt_dlp/extractor/testurl.py
@@ -0,0 +1,50 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+
+class TestURLIE(InfoExtractor):
+ """ Allows addressing of the test cases as test:yout.*be_1 """
+
+ IE_DESC = False # Do not list
+ _VALID_URL = r'test(?:url)?:(?P<extractor>.*?)(?:_(?P<num>\d+|all))?$'
+
+ def _real_extract(self, url):
+ from . import gen_extractor_classes
+
+ extractor_id, num = self._match_valid_url(url).group('extractor', 'num')
+ if not extractor_id:
+ return {'id': ':test', 'title': '', 'url': url}
+
+ rex = re.compile(extractor_id, flags=re.IGNORECASE)
+ matching_extractors = [e for e in gen_extractor_classes() if rex.search(e.IE_NAME)]
+
+ if len(matching_extractors) == 0:
+ raise ExtractorError(f'No extractors matching {extractor_id!r} found', expected=True)
+ elif len(matching_extractors) > 1:
+ extractor = next(( # Check for exact match
+ ie for ie in matching_extractors if ie.IE_NAME.lower() == extractor_id.lower()
+ ), None) or next(( # Check for exact match without plugin suffix
+ ie for ie in matching_extractors if ie.IE_NAME.split('+')[0].lower() == extractor_id.lower()
+ ), None)
+ if not extractor:
+ raise ExtractorError(
+ 'Found multiple matching extractors: %s' % ' '.join(ie.IE_NAME for ie in matching_extractors),
+ expected=True)
+ else:
+ extractor = matching_extractors[0]
+
+ testcases = tuple(extractor.get_testcases(True))
+ if num == 'all':
+ return self.playlist_result(
+ [self.url_result(tc['url'], extractor) for tc in testcases],
+ url, f'{extractor.IE_NAME} tests')
+ try:
+ tc = testcases[int(num or 0)]
+ except IndexError:
+ raise ExtractorError(
+ f'Test case {num or 0} not found, got only {len(testcases)} tests', expected=True)
+
+ self.to_screen(f'Test URL: {tc["url"]}')
+ return self.url_result(tc['url'], extractor)
diff --git a/yt_dlp/extractor/tf1.py b/yt_dlp/extractor/tf1.py
new file mode 100644
index 0000000..aba4927
--- /dev/null
+++ b/yt_dlp/extractor/tf1.py
@@ -0,0 +1,101 @@
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+ try_get,
+)
+
+
+class TF1IE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?tf1\.fr/[^/]+/(?P<program_slug>[^/]+)/videos/(?P<id>[^/?&#]+)\.html'
+ _TESTS = [{
+ 'url': 'https://www.tf1.fr/tmc/quotidien-avec-yann-barthes/videos/quotidien-premiere-partie-11-juin-2019.html',
+ 'info_dict': {
+ 'id': '13641379',
+ 'ext': 'mp4',
+ 'title': 'md5:f392bc52245dc5ad43771650c96fb620',
+ 'description': 'md5:a02cdb217141fb2d469d6216339b052f',
+ 'upload_date': '20190611',
+ 'timestamp': 1560273989,
+ 'duration': 1738,
+ 'series': 'Quotidien avec Yann Barthès',
+ 'tags': ['intégrale', 'quotidien', 'Replay'],
+ },
+ 'params': {
+ # Sometimes wat serves the whole file with the --test option
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.tf1.fr/tmc/burger-quiz/videos/burger-quiz-du-19-aout-2023-s03-episode-21-85585666.html',
+ 'info_dict': {
+ 'id': '14010600',
+ 'ext': 'mp4',
+ 'title': 'Burger Quiz - S03 EP21 avec Eye Haidara, Anne Depétrini, Jonathan Zaccaï et Pio Marmaï',
+ 'thumbnail': 'https://photos.tf1.fr/1280/720/burger-quiz-11-9adb79-0@1x.jpg',
+ 'description': 'Manu Payet recevra Eye Haidara, Anne Depétrini, Jonathan Zaccaï et Pio Marmaï.',
+ 'upload_date': '20230819',
+ 'timestamp': 1692469471,
+ 'season_number': 3,
+ 'series': 'Burger Quiz',
+ 'episode_number': 21,
+ 'season': 'Season 3',
+ 'tags': 'count:13',
+ 'episode': 'Episode 21',
+ 'duration': 2312
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'http://www.tf1.fr/tf1/koh-lanta/videos/replay-koh-lanta-22-mai-2015.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.tf1.fr/hd1/documentaire/videos/mylene-farmer-d-une-icone.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ program_slug, slug = self._match_valid_url(url).groups()
+ video = self._download_json(
+ 'https://www.tf1.fr/graphql/web', slug, query={
+ 'id': '9b80783950b85247541dd1d851f9cc7fa36574af015621f853ab111a679ce26f',
+ 'variables': json.dumps({
+ 'programSlug': program_slug,
+ 'slug': slug,
+ })
+ })['data']['videoBySlug']
+ wat_id = video['streamId']
+
+ tags = []
+ for tag in (video.get('tags') or []):
+ label = tag.get('label')
+ if not label:
+ continue
+ tags.append(label)
+
+ decoration = video.get('decoration') or {}
+
+ thumbnails = []
+ for source in (try_get(decoration, lambda x: x['image']['sources'], list) or []):
+ source_url = source.get('url')
+ if not source_url:
+ continue
+ thumbnails.append({
+ 'url': source_url,
+ 'width': int_or_none(source.get('width')),
+ })
+
+ return {
+ '_type': 'url_transparent',
+ 'id': wat_id,
+ 'url': 'wat:' + wat_id,
+ 'title': video.get('title'),
+ 'thumbnails': thumbnails,
+ 'description': decoration.get('description'),
+ 'timestamp': parse_iso8601(video.get('date')),
+ 'duration': int_or_none(try_get(video, lambda x: x['publicPlayingInfos']['duration'])),
+ 'tags': tags,
+ 'series': decoration.get('programLabel'),
+ 'season_number': int_or_none(video.get('season')),
+ 'episode_number': int_or_none(video.get('episode')),
+ }
diff --git a/yt_dlp/extractor/tfo.py b/yt_dlp/extractor/tfo.py
new file mode 100644
index 0000000..d417f50
--- /dev/null
+++ b/yt_dlp/extractor/tfo.py
@@ -0,0 +1,48 @@
+import json
+
+from .common import InfoExtractor
+from ..networking import HEADRequest
+from ..utils import ExtractorError, clean_html, int_or_none
+
+
+class TFOIE(InfoExtractor):
+ _GEO_COUNTRIES = ['CA']
+ _VALID_URL = r'https?://(?:www\.)?tfo\.org/(?:en|fr)/(?:[^/]+/){2}(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://www.tfo.org/en/universe/tfo-247/100463871/video-game-hackathon',
+ 'md5': 'cafbe4f47a8dae0ca0159937878100d6',
+ 'info_dict': {
+ 'id': '7da3d50e495c406b8fc0b997659cc075',
+ 'ext': 'mp4',
+ 'title': 'Video Game Hackathon',
+ 'description': 'md5:558afeba217c6c8d96c60e5421795c07',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ self._request_webpage(HEADRequest('http://www.tfo.org/'), video_id)
+ infos = self._download_json(
+ 'http://www.tfo.org/api/web/video/get_infos', video_id, data=json.dumps({
+ 'product_id': video_id,
+ }).encode(), headers={
+ 'X-tfo-session': self._get_cookies('http://www.tfo.org/')['tfo-session'].value,
+ })
+ if infos.get('success') == 0:
+ if infos.get('code') == 'ErrGeoBlocked':
+ self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, clean_html(infos['msg'])), expected=True)
+ video_data = infos['data']
+
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'url': 'limelight:media:' + video_data['llid'],
+ 'title': video_data['title'],
+ 'description': video_data.get('description'),
+ 'series': video_data.get('collection'),
+ 'season_number': int_or_none(video_data.get('season')),
+ 'episode_number': int_or_none(video_data.get('episode')),
+ 'duration': int_or_none(video_data.get('duration')),
+ 'ie_key': 'LimelightMedia',
+ }
diff --git a/yt_dlp/extractor/theguardian.py b/yt_dlp/extractor/theguardian.py
new file mode 100644
index 0000000..a231ecc
--- /dev/null
+++ b/yt_dlp/extractor/theguardian.py
@@ -0,0 +1,135 @@
+import itertools
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ extract_attributes,
+ get_element_by_class,
+ get_element_html_by_class,
+ get_elements_html_by_class,
+ parse_qs,
+ traverse_obj,
+ unified_strdate,
+ urljoin
+)
+
+
+class TheGuardianPodcastIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?theguardian\.com/\w+/audio/\d{4}/\w{3}/\d{1,2}/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://www.theguardian.com/news/audio/2023/nov/03/we-are-just-getting-started-the-plastic-eating-bacteria-that-could-change-the-world-podcast',
+ 'md5': 'd1771744681789b4cd7da2a08e487702',
+ 'info_dict': {
+ 'id': 'we-are-just-getting-started-the-plastic-eating-bacteria-that-could-change-the-world-podcast',
+ 'ext': 'mp3',
+ 'title': '‘We are just getting started’: the plastic-eating bacteria that could change the world – podcast',
+ 'description': 'md5:cfd3df2791d394d2ab62cd571d5207ee',
+ 'creator': 'Stephen Buranyi',
+ 'thumbnail': 'md5:73c12558fcb3b0e2a59422bfb33b3f79',
+ 'release_date': '20231103'
+ }
+ }, {
+ 'url': 'https://www.theguardian.com/news/audio/2023/oct/30/the-trials-of-robert-habeck-is-the-worlds-most-powerful-green-politician-doomed-to-fail-podcast',
+ 'md5': 'd1771744681789b4cd7da2a08e487702',
+ 'info_dict': {
+ 'id': 'the-trials-of-robert-habeck-is-the-worlds-most-powerful-green-politician-doomed-to-fail-podcast',
+ 'ext': 'mp3',
+ 'title': 'The trials of Robert Habeck: is the world’s most powerful green politician doomed to fail? – podcast',
+ 'description': 'md5:1b5cf6582d1771c6b7077784b5456994',
+ 'creator': 'Philip Oltermann',
+ 'thumbnail': 'md5:6e5c5ec43843e956e20be793722e9080',
+ 'release_date': '20231030'
+ }
+ }, {
+ 'url': 'https://www.theguardian.com/football/audio/2023/nov/06/arsenal-feel-hard-done-by-and-luton-hold-liverpool-football-weekly',
+ 'md5': 'a2fcff6f8e060a95b1483295273dc35e',
+ 'info_dict': {
+ 'id': 'arsenal-feel-hard-done-by-and-luton-hold-liverpool-football-weekly',
+ 'ext': 'mp3',
+ 'title': 'Arsenal feel hard done by and Luton hold Liverpool – Football Weekly',
+ 'description': 'md5:286a9fbddaeb7c83cc65d1c4a5330b2a',
+ 'creator': 'Max Rushden',
+ 'thumbnail': 'md5:93eb7d6440f1bb94eb3a6cad63f48afd',
+ 'release_date': '20231106'
+ }
+ }, {
+ 'url': 'https://www.theguardian.com/politics/audio/2023/nov/02/the-covid-inquiry-politics-weekly-uk-podcast',
+ 'md5': '06a0f7e9701a80c8064a5d35690481ec',
+ 'info_dict': {
+ 'id': 'the-covid-inquiry-politics-weekly-uk-podcast',
+ 'ext': 'mp3',
+ 'title': 'The Covid inquiry | Politics Weekly UK - podcast',
+ 'description': 'md5:207c98859c14903582b17d25b014046e',
+ 'creator': 'Gaby Hinsliff',
+ 'thumbnail': 'md5:28932a7b5a25b057be330d2ed70ea7f3',
+ 'release_date': '20231102'
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ return {
+ 'id': video_id,
+ 'title': self._og_search_title(webpage) or get_element_by_class('content__headline', webpage),
+ 'description': self._og_search_description(webpage),
+ 'creator': self._html_search_meta('author', webpage),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'release_date': unified_strdate(self._html_search_meta('article:published_time', webpage)),
+ 'url': extract_attributes(get_element_html_by_class(
+ 'podcast__player', webpage) or '').get('data-source'),
+ }
+
+
+class TheGuardianPodcastPlaylistIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?theguardian\.com/\w+/series/(?P<id>[\w-]+)(?:\?page=\d+)?'
+ _TESTS = [{
+ 'url': 'https://www.theguardian.com/football/series/theguardianswomensfootballweekly',
+ 'info_dict': {
+ 'id': 'theguardianswomensfootballweekly',
+ 'title': "The Guardian's Women's Football Weekly",
+ 'description': 'md5:e2cc021311e582d29935a73614a43f51'
+ },
+ 'playlist_mincount': 69
+ }, {
+ 'url': 'https://www.theguardian.com/news/series/todayinfocus?page=2',
+ 'info_dict': {
+ 'id': 'todayinfocus',
+ 'title': 'Today in Focus',
+ 'description': 'md5:0f097764fc0d359e0b6eb537be0387e2'
+ },
+ 'playlist_mincount': 1261
+ }, {
+ 'url': 'https://www.theguardian.com/news/series/the-audio-long-read',
+ 'info_dict': {
+ 'id': 'the-audio-long-read',
+ 'title': 'The Audio Long Read',
+ 'description': 'md5:5462994a27527309562b25b6defc4ef3'
+ },
+ 'playlist_mincount': 996
+ }]
+
+ def _entries(self, url, playlist_id):
+ for page in itertools.count(1):
+ webpage, urlh = self._download_webpage_handle(
+ url, playlist_id, f'Downloading page {page}', query={'page': page})
+ if 'page' not in parse_qs(urlh.url):
+ break
+
+ episodes = get_elements_html_by_class('fc-item--type-media', webpage)
+ for url_path in traverse_obj(episodes, (..., {extract_attributes}, 'data-id')):
+ yield url_path
+
+ def _real_extract(self, url):
+ podcast_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, podcast_id)
+
+ title = clean_html(get_element_by_class(
+ 'index-page-header__title', webpage) or get_element_by_class('flagship-audio__title', webpage))
+ description = self._og_search_description(webpage) or self._html_search_meta(
+ 'description', webpage)
+
+ return self.playlist_from_matches(
+ self._entries(url, podcast_id), podcast_id, title, description=description,
+ ie=TheGuardianPodcastIE, getter=lambda x: urljoin('https://www.theguardian.com', x))
diff --git a/yt_dlp/extractor/theholetv.py b/yt_dlp/extractor/theholetv.py
new file mode 100644
index 0000000..a13f83b
--- /dev/null
+++ b/yt_dlp/extractor/theholetv.py
@@ -0,0 +1,35 @@
+from .common import InfoExtractor
+from ..utils import extract_attributes, remove_end
+
+
+class TheHoleTvIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?the-hole\.tv/episodes/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://the-hole.tv/episodes/gromkii-vopros-sergey-orlov',
+ 'md5': 'fea6682f47786f3ae5a6cbd635ec4bf9',
+ 'info_dict': {
+ 'id': 'gromkii-vopros-sergey-orlov',
+ 'ext': 'mp4',
+ 'title': 'Сергей Орлов — Громкий вопрос',
+ 'thumbnail': 'https://assets-cdn.the-hole.tv/images/t8gan4n6zn627e7wni11b2uemqts',
+ 'description': 'md5:45741a9202331f995d9fb76996759379'
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ player_attrs = extract_attributes(self._search_regex(
+ r'(<div[^>]*\bdata-controller="player"[^>]*>)', webpage, 'video player'))
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ player_attrs['data-player-source-value'], video_id, 'mp4')
+
+ return {
+ 'id': video_id,
+ 'title': remove_end(self._html_extract_title(webpage), ' — The Hole'),
+ 'description': self._og_search_description(webpage),
+ 'thumbnail': player_attrs.get('data-player-poster-value'),
+ 'formats': formats,
+ 'subtitles': subtitles
+ }
diff --git a/yt_dlp/extractor/theintercept.py b/yt_dlp/extractor/theintercept.py
new file mode 100644
index 0000000..a991a4d
--- /dev/null
+++ b/yt_dlp/extractor/theintercept.py
@@ -0,0 +1,46 @@
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ parse_iso8601,
+ int_or_none,
+ ExtractorError,
+)
+
+
+class TheInterceptIE(InfoExtractor):
+ _VALID_URL = r'https?://theintercept\.com/fieldofvision/(?P<id>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'https://theintercept.com/fieldofvision/thisisacoup-episode-four-surrender-or-die/',
+ 'md5': '145f28b41d44aab2f87c0a4ac8ec95bd',
+ 'info_dict': {
+ 'id': '46214',
+ 'ext': 'mp4',
+ 'title': '#ThisIsACoup – Episode Four: Surrender or Die',
+ 'description': 'md5:74dd27f0e2fbd50817829f97eaa33140',
+ 'timestamp': 1450429239,
+ 'upload_date': '20151218',
+ 'comment_count': int,
+ }
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ json_data = self._parse_json(self._search_regex(
+ r'initialStoreTree\s*=\s*(?P<json_data>{.+})', webpage,
+ 'initialStoreTree'), display_id)
+
+ for post in json_data['resources']['posts'].values():
+ if post['slug'] == display_id:
+ return {
+ '_type': 'url_transparent',
+ 'url': 'jwplatform:%s' % post['fov_videoid'],
+ 'id': compat_str(post['ID']),
+ 'display_id': display_id,
+ 'title': post['title'],
+ 'description': post.get('excerpt'),
+ 'timestamp': parse_iso8601(post.get('date')),
+ 'comment_count': int_or_none(post.get('comments_number')),
+ }
+ raise ExtractorError('Unable to find the current post')
diff --git a/yt_dlp/extractor/theplatform.py b/yt_dlp/extractor/theplatform.py
new file mode 100644
index 0000000..9160f5e
--- /dev/null
+++ b/yt_dlp/extractor/theplatform.py
@@ -0,0 +1,429 @@
+import re
+import time
+import hmac
+import binascii
+import hashlib
+
+
+from .once import OnceIE
+from .adobepass import AdobePassIE
+from ..networking import Request
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ parse_qs,
+ unsmuggle_url,
+ update_url_query,
+ xpath_with_ns,
+ mimetype2ext,
+ find_xpath_attr,
+ traverse_obj,
+ update_url,
+ urlhandle_detect_ext,
+)
+from ..networking import HEADRequest
+
+default_ns = 'http://www.w3.org/2005/SMIL21/Language'
+_x = lambda p: xpath_with_ns(p, {'smil': default_ns})
+
+
+class ThePlatformBaseIE(OnceIE):
+ _TP_TLD = 'com'
+
+ def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL data'):
+ meta = self._download_xml(
+ smil_url, video_id, note=note, query={'format': 'SMIL'},
+ headers=self.geo_verification_headers())
+ error_element = find_xpath_attr(meta, _x('.//smil:ref'), 'src')
+ if error_element is not None:
+ exception = find_xpath_attr(
+ error_element, _x('.//smil:param'), 'name', 'exception')
+ if exception is not None:
+ if exception.get('value') == 'GeoLocationBlocked':
+ self.raise_geo_restricted(error_element.attrib['abstract'])
+ elif error_element.attrib['src'].startswith(
+ 'http://link.theplatform.%s/s/errorFiles/Unavailable.'
+ % self._TP_TLD):
+ raise ExtractorError(
+ error_element.attrib['abstract'], expected=True)
+
+ smil_formats, subtitles = self._parse_smil_formats_and_subtitles(
+ meta, smil_url, video_id, namespace=default_ns,
+ # the parameters are from syfy.com, other sites may use others,
+ # they also work for nbc.com
+ f4m_params={'g': 'UXWGVKRWHFSP', 'hdcore': '3.0.3'},
+ transform_rtmp_url=lambda streamer, src: (streamer, 'mp4:' + src))
+
+ formats = []
+ for _format in smil_formats:
+ if OnceIE.suitable(_format['url']):
+ formats.extend(self._extract_once_formats(_format['url']))
+ else:
+ media_url = _format['url']
+ if determine_ext(media_url) == 'm3u8':
+ hdnea2 = self._get_cookies(media_url).get('hdnea2')
+ if hdnea2:
+ _format['url'] = update_url_query(media_url, {'hdnea3': hdnea2.value})
+
+ formats.append(_format)
+
+ return formats, subtitles
+
+ def _download_theplatform_metadata(self, path, video_id):
+ info_url = 'http://link.theplatform.%s/s/%s?format=preview' % (self._TP_TLD, path)
+ return self._download_json(info_url, video_id)
+
+ def _parse_theplatform_metadata(self, info):
+ subtitles = {}
+ captions = info.get('captions')
+ if isinstance(captions, list):
+ for caption in captions:
+ lang, src, mime = caption.get('lang', 'en'), caption.get('src'), caption.get('type')
+ subtitles.setdefault(lang, []).append({
+ 'ext': mimetype2ext(mime),
+ 'url': src,
+ })
+
+ duration = info.get('duration')
+ tp_chapters = info.get('chapters', [])
+ chapters = []
+ if tp_chapters:
+ def _add_chapter(start_time, end_time):
+ start_time = float_or_none(start_time, 1000)
+ end_time = float_or_none(end_time, 1000)
+ if start_time is None or end_time is None:
+ return
+ chapters.append({
+ 'start_time': start_time,
+ 'end_time': end_time,
+ })
+
+ for chapter in tp_chapters[:-1]:
+ _add_chapter(chapter.get('startTime'), chapter.get('endTime'))
+ _add_chapter(tp_chapters[-1].get('startTime'), tp_chapters[-1].get('endTime') or duration)
+
+ def extract_site_specific_field(field):
+ # A number of sites have custom-prefixed keys, e.g. 'cbc$seasonNumber'
+ return traverse_obj(info, lambda k, v: v and k.endswith(f'${field}'), get_all=False)
+
+ return {
+ 'title': info['title'],
+ 'subtitles': subtitles,
+ 'description': info['description'],
+ 'thumbnail': info['defaultThumbnailUrl'],
+ 'duration': float_or_none(duration, 1000),
+ 'timestamp': int_or_none(info.get('pubDate'), 1000) or None,
+ 'uploader': info.get('billingCode'),
+ 'chapters': chapters,
+ 'creator': traverse_obj(info, ('author', {str})) or None,
+ 'categories': traverse_obj(info, (
+ 'categories', lambda _, v: v.get('label') in ('category', None), 'name', {str})) or None,
+ 'tags': traverse_obj(info, ('keywords', {lambda x: re.split(r'[;,]\s?', x) if x else None})),
+ 'location': extract_site_specific_field('region'),
+ 'series': extract_site_specific_field('show'),
+ 'season_number': int_or_none(extract_site_specific_field('seasonNumber')),
+ 'media_type': extract_site_specific_field('programmingType') or extract_site_specific_field('type'),
+ }
+
+ def _extract_theplatform_metadata(self, path, video_id):
+ info = self._download_theplatform_metadata(path, video_id)
+ return self._parse_theplatform_metadata(info)
+
+
+class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
+ _VALID_URL = r'''(?x)
+ (?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/
+ (?:(?:(?:[^/]+/)+select/)?(?P<media>media/(?:guid/\d+/)?)?|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))?
+ |theplatform:)(?P<id>[^/\?&]+)'''
+ _EMBED_REGEX = [
+ r'''(?x)
+ <meta\s+
+ property=(["'])(?:og:video(?::(?:secure_)?url)?|twitter:player)\1\s+
+ content=(["'])(?P<url>https?://player\.theplatform\.com/p/.+?)\2''',
+ r'(?s)<(?:iframe|script)[^>]+src=(["\'])(?P<url>(?:https?:)?//player\.theplatform\.com/p/.+?)\1'
+ ]
+
+ _TESTS = [{
+ # from http://www.metacafe.com/watch/cb-e9I_cZgTgIPd/blackberrys_big_bold_z30/
+ 'url': 'http://link.theplatform.com/s/dJ5BDC/e9I_cZgTgIPd/meta.smil?format=smil&Tracking=true&mbr=true',
+ 'info_dict': {
+ 'id': 'e9I_cZgTgIPd',
+ 'ext': 'flv',
+ 'title': 'Blackberry\'s big, bold Z30',
+ 'description': 'The Z30 is Blackberry\'s biggest, baddest mobile messaging device yet.',
+ 'duration': 247,
+ 'timestamp': 1383239700,
+ 'upload_date': '20131031',
+ 'uploader': 'CBSI-NEW',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': '404 Not Found',
+ }, {
+ # from http://www.cnet.com/videos/tesla-model-s-a-second-step-towards-a-cleaner-motoring-future/
+ 'url': 'http://link.theplatform.com/s/kYEXFC/22d_qsQ6MIRT',
+ 'info_dict': {
+ 'id': '22d_qsQ6MIRT',
+ 'ext': 'flv',
+ 'description': 'md5:ac330c9258c04f9d7512cf26b9595409',
+ 'title': 'Tesla Model S: A second step towards a cleaner motoring future',
+ 'timestamp': 1426176191,
+ 'upload_date': '20150312',
+ 'uploader': 'CBSI-NEW',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'CNet no longer uses ThePlatform',
+ }, {
+ 'url': 'https://player.theplatform.com/p/D6x-PC/pulse_preview/embed/select/media/yMBg9E8KFxZD',
+ 'info_dict': {
+ 'id': 'yMBg9E8KFxZD',
+ 'ext': 'mp4',
+ 'description': 'md5:644ad9188d655b742f942bf2e06b002d',
+ 'title': 'HIGHLIGHTS: USA bag first ever series Cup win',
+ 'uploader': 'EGSM',
+ },
+ 'skip': 'Dead link',
+ }, {
+ 'url': 'http://player.theplatform.com/p/NnzsPC/widget/select/media/4Y0TlYUr_ZT7',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://player.theplatform.com/p/2E2eJC/nbcNewsOffsite?guid=tdy_or_siri_150701',
+ 'md5': 'fb96bb3d85118930a5b055783a3bd992',
+ 'info_dict': {
+ 'id': 'tdy_or_siri_150701',
+ 'ext': 'mp4',
+ 'title': 'iPhone Siri’s sassy response to a math question has people talking',
+ 'description': 'md5:a565d1deadd5086f3331d57298ec6333',
+ 'duration': 83.0,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1435752600,
+ 'upload_date': '20150701',
+ 'uploader': 'NBCU-NEWS',
+ },
+ 'skip': 'Error: Player PID "nbcNewsOffsite" is disabled',
+ }, {
+ # From http://www.nbc.com/the-blacklist/video/sir-crispin-crandall/2928790?onid=137781#vc137781=1
+ # geo-restricted (US), HLS encrypted with AES-128
+ 'url': 'http://player.theplatform.com/p/NnzsPC/onsite_universal/select/media/guid/2410887629/2928790?fwsitesection=nbc_the_blacklist_video_library&autoPlay=true&carouselID=137781',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ # Are whitespaces ignored in URLs?
+ # https://github.com/ytdl-org/youtube-dl/issues/12044
+ for embed_url in super()._extract_embed_urls(url, webpage):
+ yield re.sub(r'\s', '', embed_url)
+
+ @staticmethod
+ def _sign_url(url, sig_key, sig_secret, life=600, include_qs=False):
+ flags = '10' if include_qs else '00'
+ expiration_date = '%x' % (int(time.time()) + life)
+
+ def str_to_hex(str):
+ return binascii.b2a_hex(str.encode('ascii')).decode('ascii')
+
+ def hex_to_bytes(hex):
+ return binascii.a2b_hex(hex.encode('ascii'))
+
+ relative_path = re.match(r'https?://link\.theplatform\.com/s/([^?]+)', url).group(1)
+ clear_text = hex_to_bytes(flags + expiration_date + str_to_hex(relative_path))
+ checksum = hmac.new(sig_key.encode('ascii'), clear_text, hashlib.sha1).hexdigest()
+ sig = flags + expiration_date + checksum + str_to_hex(sig_secret)
+ return '%s&sig=%s' % (url, sig)
+
+ def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+ self._initialize_geo_bypass({
+ 'countries': smuggled_data.get('geo_countries'),
+ })
+
+ mobj = self._match_valid_url(url)
+ provider_id = mobj.group('provider_id')
+ video_id = mobj.group('id')
+
+ if not provider_id:
+ provider_id = 'dJ5BDC'
+
+ path = provider_id + '/'
+ if mobj.group('media'):
+ path += mobj.group('media')
+ path += video_id
+
+ qs_dict = parse_qs(url)
+ if 'guid' in qs_dict:
+ webpage = self._download_webpage(url, video_id)
+ scripts = re.findall(r'<script[^>]+src="([^"]+)"', webpage)
+ feed_id = None
+ # feed id usually locates in the last script.
+ # Seems there's no pattern for the interested script filename, so
+ # I try one by one
+ for script in reversed(scripts):
+ feed_script = self._download_webpage(
+ self._proto_relative_url(script, 'http:'),
+ video_id, 'Downloading feed script')
+ feed_id = self._search_regex(
+ r'defaultFeedId\s*:\s*"([^"]+)"', feed_script,
+ 'default feed id', default=None)
+ if feed_id is not None:
+ break
+ if feed_id is None:
+ raise ExtractorError('Unable to find feed id')
+ return self.url_result('http://feed.theplatform.com/f/%s/%s?byGuid=%s' % (
+ provider_id, feed_id, qs_dict['guid'][0]))
+
+ if smuggled_data.get('force_smil_url', False):
+ smil_url = url
+ # Explicitly specified SMIL (see https://github.com/ytdl-org/youtube-dl/issues/7385)
+ elif '/guid/' in url:
+ headers = {}
+ source_url = smuggled_data.get('source_url')
+ if source_url:
+ headers['Referer'] = source_url
+ request = Request(url, headers=headers)
+ webpage = self._download_webpage(request, video_id)
+ smil_url = self._search_regex(
+ r'<link[^>]+href=(["\'])(?P<url>.+?)\1[^>]+type=["\']application/smil\+xml',
+ webpage, 'smil url', group='url')
+ path = self._search_regex(
+ r'link\.theplatform\.com/s/((?:[^/?#&]+/)+[^/?#&]+)', smil_url, 'path')
+ smil_url += '?' if '?' not in smil_url else '&' + 'formats=m3u,mpeg4'
+ elif mobj.group('config'):
+ config_url = url + '&form=json'
+ config_url = config_url.replace('swf/', 'config/')
+ config_url = config_url.replace('onsite/', 'onsite/config/')
+ config = self._download_json(config_url, video_id, 'Downloading config')
+ if 'releaseUrl' in config:
+ release_url = config['releaseUrl']
+ else:
+ release_url = 'http://link.theplatform.com/s/%s?mbr=true' % path
+ smil_url = release_url + '&formats=MPEG4&manifest=f4m'
+ else:
+ smil_url = 'http://link.theplatform.com/s/%s?mbr=true' % path
+
+ sig = smuggled_data.get('sig')
+ if sig:
+ smil_url = self._sign_url(smil_url, sig['key'], sig['secret'])
+
+ formats, subtitles = self._extract_theplatform_smil(smil_url, video_id)
+
+ # With some sites, manifest URL must be forced to extract HLS formats
+ if not traverse_obj(formats, lambda _, v: v['format_id'].startswith('hls')):
+ m3u8_url = update_url(url, query='mbr=true&manifest=m3u', fragment=None)
+ urlh = self._request_webpage(
+ HEADRequest(m3u8_url), video_id, 'Checking for HLS formats', 'No HLS formats found', fatal=False)
+ if urlh and urlhandle_detect_ext(urlh) == 'm3u8':
+ m3u8_fmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
+ m3u8_url, video_id, m3u8_id='hls', fatal=False)
+ formats.extend(m3u8_fmts)
+ self._merge_subtitles(m3u8_subs, target=subtitles)
+
+ ret = self._extract_theplatform_metadata(path, video_id)
+ combined_subtitles = self._merge_subtitles(ret.get('subtitles', {}), subtitles)
+ ret.update({
+ 'id': video_id,
+ 'formats': formats,
+ 'subtitles': combined_subtitles,
+ })
+
+ return ret
+
+
+class ThePlatformFeedIE(ThePlatformBaseIE):
+ _URL_TEMPLATE = '%s//feed.theplatform.com/f/%s/%s?form=json&%s'
+ _VALID_URL = r'https?://feed\.theplatform\.com/f/(?P<provider_id>[^/]+)/(?P<feed_id>[^?/]+)\?(?:[^&]+&)*(?P<filter>by(?:Gui|I)d=(?P<id>[^&]+))'
+ _TESTS = [{
+ # From http://player.theplatform.com/p/7wvmTC/MSNBCEmbeddedOffSite?guid=n_hardball_5biden_140207
+ 'url': 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207',
+ 'md5': '6e32495b5073ab414471b615c5ded394',
+ 'info_dict': {
+ 'id': 'n_hardball_5biden_140207',
+ 'ext': 'mp4',
+ 'title': 'The Biden factor: will Joe run in 2016?',
+ 'description': 'Could Vice President Joe Biden be preparing a 2016 campaign? Mark Halperin and Sam Stein weigh in.',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20140208',
+ 'timestamp': 1391824260,
+ 'duration': 467.0,
+ 'categories': ['MSNBC/Issues/Democrats', 'MSNBC/Issues/Elections/Election 2016'],
+ 'uploader': 'NBCU-NEWS',
+ },
+ }, {
+ 'url': 'http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews?byGuid=nn_netcast_180306.Copy.01',
+ 'only_matching': True,
+ }]
+
+ def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}, account_id=None):
+ real_url = self._URL_TEMPLATE % (self.http_scheme(), provider_id, feed_id, filter_query)
+ entry = self._download_json(real_url, video_id)['entries'][0]
+ main_smil_url = 'http://link.theplatform.com/s/%s/media/guid/%d/%s' % (provider_id, account_id, entry['guid']) if account_id else entry.get('plmedia$publicUrl')
+
+ formats = []
+ subtitles = {}
+ first_video_id = None
+ duration = None
+ asset_types = []
+ for item in entry['media$content']:
+ smil_url = item['plfile$url']
+ cur_video_id = ThePlatformIE._match_id(smil_url)
+ if first_video_id is None:
+ first_video_id = cur_video_id
+ duration = float_or_none(item.get('plfile$duration'))
+ file_asset_types = item.get('plfile$assetTypes') or parse_qs(smil_url)['assetTypes']
+ for asset_type in file_asset_types:
+ if asset_type in asset_types:
+ continue
+ asset_types.append(asset_type)
+ query = {
+ 'mbr': 'true',
+ 'formats': item['plfile$format'],
+ 'assetTypes': asset_type,
+ }
+ if asset_type in asset_types_query:
+ query.update(asset_types_query[asset_type])
+ cur_formats, cur_subtitles = self._extract_theplatform_smil(update_url_query(
+ main_smil_url or smil_url, query), video_id, 'Downloading SMIL data for %s' % asset_type)
+ formats.extend(cur_formats)
+ subtitles = self._merge_subtitles(subtitles, cur_subtitles)
+
+ thumbnails = [{
+ 'url': thumbnail['plfile$url'],
+ 'width': int_or_none(thumbnail.get('plfile$width')),
+ 'height': int_or_none(thumbnail.get('plfile$height')),
+ } for thumbnail in entry.get('media$thumbnails', [])]
+
+ timestamp = int_or_none(entry.get('media$availableDate'), scale=1000)
+ categories = [item['media$name'] for item in entry.get('media$categories', [])]
+
+ ret = self._extract_theplatform_metadata('%s/%s' % (provider_id, first_video_id), video_id)
+ subtitles = self._merge_subtitles(subtitles, ret['subtitles'])
+ ret.update({
+ 'id': video_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnails': thumbnails,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'categories': categories,
+ })
+ if custom_fields:
+ ret.update(custom_fields(entry))
+
+ return ret
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+
+ video_id = mobj.group('id')
+ provider_id = mobj.group('provider_id')
+ feed_id = mobj.group('feed_id')
+ filter_query = mobj.group('filter')
+
+ return self._extract_feed_info(provider_id, feed_id, filter_query, video_id)
diff --git a/yt_dlp/extractor/thestar.py b/yt_dlp/extractor/thestar.py
new file mode 100644
index 0000000..293c34c
--- /dev/null
+++ b/yt_dlp/extractor/thestar.py
@@ -0,0 +1,33 @@
+from .common import InfoExtractor
+
+
+class TheStarIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?thestar\.com/(?:[^/]+/)*(?P<id>.+)\.html'
+ _TEST = {
+ 'url': 'http://www.thestar.com/life/2016/02/01/mankind-why-this-woman-started-a-men-s-skincare-line.html',
+ 'md5': '2c62dd4db2027e35579fefb97a8b6554',
+ 'info_dict': {
+ 'id': '4732393888001',
+ 'ext': 'mp4',
+ 'title': 'Mankind: Why this woman started a men\'s skin care line',
+ 'description': 'Robert Cribb talks to Young Lee, the founder of Uncle Peter\'s MAN.',
+ 'uploader_id': '794267642001',
+ 'timestamp': 1454353482,
+ 'upload_date': '20160201',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ }
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/794267642001/default_default/index.html?videoId=%s'
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ brightcove_id = self._search_regex(
+ r'mainartBrightcoveVideoId["\']?\s*:\s*["\']?(\d+)',
+ webpage, 'brightcove id')
+ return self.url_result(
+ self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
+ 'BrightcoveNew', brightcove_id)
diff --git a/yt_dlp/extractor/thesun.py b/yt_dlp/extractor/thesun.py
new file mode 100644
index 0000000..5edcf1c
--- /dev/null
+++ b/yt_dlp/extractor/thesun.py
@@ -0,0 +1,43 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import extract_attributes
+
+
+class TheSunIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?the-?sun(\.co\.uk|\.com)/[^/]+/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.thesun.co.uk/tvandshowbiz/2261604/orlando-bloom-and-katy-perry-post-adorable-instagram-video-together-celebrating-thanksgiving-after-split-rumours/',
+ 'info_dict': {
+ 'id': '2261604',
+ 'title': 'md5:cba22f48bad9218b64d5bbe0e16afddf',
+ },
+ 'playlist_count': 2,
+ }, {
+ 'url': 'https://www.the-sun.com/entertainment/7611415/1000lb-sisters-fans-rip-amy-dangerous-health-decision/',
+ 'info_dict': {
+ 'id': '7611415',
+ 'title': 'md5:e0b9b976f79dc770e5c80f22f40bb844',
+ },
+ 'playlist_count': 1,
+ }]
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
+
+ def _real_extract(self, url):
+ article_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, article_id)
+
+ entries = []
+ for video in re.findall(
+ r'<video[^>]+data-video-id-pending=[^>]+>',
+ webpage):
+ attrs = extract_attributes(video)
+ video_id = attrs['data-video-id-pending']
+ account_id = attrs.get('data-account', '5067014667001')
+ entries.append(self.url_result(
+ self.BRIGHTCOVE_URL_TEMPLATE % (account_id, video_id),
+ 'BrightcoveNew', video_id))
+
+ return self.playlist_result(
+ entries, article_id, self._og_search_title(webpage, fatal=False))
diff --git a/yt_dlp/extractor/theweatherchannel.py b/yt_dlp/extractor/theweatherchannel.py
new file mode 100644
index 0000000..d1921e4
--- /dev/null
+++ b/yt_dlp/extractor/theweatherchannel.py
@@ -0,0 +1,99 @@
+import json
+
+from .theplatform import ThePlatformIE
+from ..utils import (
+ determine_ext,
+ parse_duration,
+ parse_iso8601,
+)
+
+
+class TheWeatherChannelIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
+ _VALID_URL = r'https?://(?:www\.)?weather\.com(?P<asset_name>(?:/(?P<locale>[a-z]{2}-[A-Z]{2}))?/(?:[^/]+/)*video/(?P<id>[^/?#]+))'
+ _TESTS = [{
+ 'url': 'https://weather.com/storms/hurricane/video/invest-95l-in-atlantic-has-a-medium-chance-of-development',
+ 'md5': '68f0cf616435683f27ce36bd9c927394',
+ 'info_dict': {
+ 'id': '81acef2d-ee8c-4545-ba83-bff3cc80db97',
+ 'ext': 'mp4',
+ 'title': 'Invest 95L In Atlantic Has A Medium Chance Of Development',
+ 'description': 'md5:0de720fd5f0d0e32207bd4c270fff824',
+ 'uploader': 'TWC - Digital',
+ 'uploader_id': 'b5a999e0-9e04-11e1-9ee2-001d092f5a10',
+ 'upload_date': '20230721',
+ 'timestamp': 1689967343,
+ 'display_id': 'invest-95l-in-atlantic-has-a-medium-chance-of-development',
+ 'duration': 34.0,
+ }
+ }, {
+ 'url': 'https://weather.com/en-CA/international/videos/video/unidentified-object-falls-from-sky-in-india',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ asset_name, locale, display_id = self._match_valid_url(url).groups()
+ if not locale:
+ locale = 'en-US'
+ video_data = list(self._download_json(
+ 'https://weather.com/api/v1/p/redux-dal', display_id, data=json.dumps([{
+ 'name': 'getCMSAssetsUrlConfig',
+ 'params': {
+ 'language': locale.replace('-', '_'),
+ 'query': {
+ 'assetName': {
+ '$in': asset_name,
+ },
+ },
+ }
+ }]).encode(), headers={
+ 'Content-Type': 'application/json',
+ })['dal']['getCMSAssetsUrlConfig'].values())[0]['data'][0]
+ video_id = video_data['id']
+ seo_meta = video_data.get('seometa', {})
+ title = video_data.get('title') or seo_meta['title']
+
+ urls = []
+ thumbnails = []
+ formats = []
+ for variant_id, variant_url in video_data.get('variants', []).items():
+ variant_url = variant_url.strip()
+ if not variant_url or variant_url in urls:
+ continue
+ urls.append(variant_url)
+ ext = determine_ext(variant_url)
+ if ext == 'jpg':
+ thumbnails.append({
+ 'url': variant_url,
+ 'id': variant_id,
+ })
+ elif ThePlatformIE.suitable(variant_url):
+ tp_formats, _ = self._extract_theplatform_smil(variant_url, video_id)
+ formats.extend(tp_formats)
+ elif ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ variant_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id=variant_id, fatal=False))
+ elif ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ variant_url, video_id, f4m_id=variant_id, fatal=False))
+ else:
+ formats.append({
+ 'url': variant_url,
+ 'format_id': variant_id,
+ })
+
+ cc_url = video_data.get('cc_url')
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': video_data.get('description') or seo_meta.get('description') or seo_meta.get('og:description'),
+ 'duration': parse_duration(video_data.get('duration')),
+ 'uploader': video_data.get('providername'),
+ 'uploader_id': video_data.get('providerid'),
+ 'timestamp': parse_iso8601(video_data.get('publishdate')),
+ 'subtitles': {locale[:2]: [{'url': cc_url}]} if cc_url else None,
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/thisamericanlife.py b/yt_dlp/extractor/thisamericanlife.py
new file mode 100644
index 0000000..9a3d798
--- /dev/null
+++ b/yt_dlp/extractor/thisamericanlife.py
@@ -0,0 +1,38 @@
+from .common import InfoExtractor
+
+
+class ThisAmericanLifeIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?thisamericanlife\.org/(?:radio-archives/episode/|play_full\.php\?play=)(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://www.thisamericanlife.org/radio-archives/episode/487/harper-high-school-part-one',
+ 'md5': '8f7d2da8926298fdfca2ee37764c11ce',
+ 'info_dict': {
+ 'id': '487',
+ 'ext': 'm4a',
+ 'title': '487: Harper High School, Part One',
+ 'description': 'md5:ee40bdf3fb96174a9027f76dbecea655',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ }, {
+ 'url': 'http://www.thisamericanlife.org/play_full.php?play=487',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'http://www.thisamericanlife.org/radio-archives/episode/%s' % video_id, video_id)
+
+ return {
+ 'id': video_id,
+ 'url': 'http://stream.thisamericanlife.org/{0}/stream/{0}_64k.m3u8'.format(video_id),
+ 'protocol': 'm3u8_native',
+ 'ext': 'm4a',
+ 'acodec': 'aac',
+ 'vcodec': 'none',
+ 'abr': 64,
+ 'title': self._html_search_meta(r'twitter:title', webpage, 'title', fatal=True),
+ 'description': self._html_search_meta(r'description', webpage, 'description'),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ }
diff --git a/yt_dlp/extractor/thisoldhouse.py b/yt_dlp/extractor/thisoldhouse.py
new file mode 100644
index 0000000..15f8380
--- /dev/null
+++ b/yt_dlp/extractor/thisoldhouse.py
@@ -0,0 +1,104 @@
+import json
+
+from .common import InfoExtractor
+from .zype import ZypeIE
+from ..networking import HEADRequest
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ ExtractorError,
+ filter_dict,
+ parse_qs,
+ try_call,
+ urlencode_postdata,
+)
+
+
+class ThisOldHouseIE(InfoExtractor):
+ _NETRC_MACHINE = 'thisoldhouse'
+ _VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to|tv-episode|(?:[^/?#]+/)?\d+)/(?P<id>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'https://www.thisoldhouse.com/furniture/21017078/how-to-build-a-storage-bench',
+ 'info_dict': {
+ 'id': '5dcdddf673c3f956ef5db202',
+ 'ext': 'mp4',
+ 'title': 'How to Build a Storage Bench',
+ 'description': 'In the workshop, Tom Silva and Kevin O\'Connor build a storage bench for an entryway.',
+ 'timestamp': 1442548800,
+ 'upload_date': '20150918',
+ 'duration': 674,
+ 'view_count': int,
+ 'average_rating': 0,
+ 'thumbnail': r're:^https?://.*\.jpg\?\d+$',
+ 'display_id': 'how-to-build-a-storage-bench',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # Page no longer has video
+ 'url': 'https://www.thisoldhouse.com/watch/arlington-arts-crafts-arts-and-crafts-class-begins',
+ 'only_matching': True,
+ }, {
+ # 404 Not Found
+ 'url': 'https://www.thisoldhouse.com/tv-episode/ask-toh-shelf-rough-electric',
+ 'only_matching': True,
+ }, {
+ # 404 Not Found
+ 'url': 'https://www.thisoldhouse.com/how-to/how-to-build-storage-bench',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.thisoldhouse.com/21113884/s41-e13-paradise-lost',
+ 'only_matching': True,
+ }, {
+ # iframe www.thisoldhouse.com
+ 'url': 'https://www.thisoldhouse.com/21083431/seaside-transformation-the-westerly-project',
+ 'only_matching': True,
+ }]
+
+ _LOGIN_URL = 'https://login.thisoldhouse.com/usernamepassword/login'
+
+ def _perform_login(self, username, password):
+ self._request_webpage(
+ HEADRequest('https://www.thisoldhouse.com/insider'), None, 'Requesting session cookies')
+ urlh = self._request_webpage(
+ 'https://www.thisoldhouse.com/wp-login.php', None, 'Requesting login info',
+ errnote='Unable to login', query={'redirect_to': 'https://www.thisoldhouse.com/insider'})
+
+ try:
+ auth_form = self._download_webpage(
+ self._LOGIN_URL, None, 'Submitting credentials', headers={
+ 'Content-Type': 'application/json',
+ 'Referer': urlh.url,
+ }, data=json.dumps(filter_dict({
+ **{('client_id' if k == 'client' else k): v[0] for k, v in parse_qs(urlh.url).items()},
+ 'tenant': 'thisoldhouse',
+ 'username': username,
+ 'password': password,
+ 'popup_options': {},
+ 'sso': True,
+ '_csrf': try_call(lambda: self._get_cookies(self._LOGIN_URL)['_csrf'].value),
+ '_intstate': 'deprecated',
+ }), separators=(',', ':')).encode())
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 401:
+ raise ExtractorError('Invalid username or password', expected=True)
+ raise
+
+ self._request_webpage(
+ 'https://login.thisoldhouse.com/login/callback', None, 'Completing login',
+ data=urlencode_postdata(self._hidden_inputs(auth_form)))
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ if 'To Unlock This content' in webpage:
+ self.raise_login_required(
+ 'This video is only available for subscribers. '
+ 'Note that --cookies-from-browser may not work due to this site using session cookies')
+
+ video_url, video_id = self._search_regex(
+ r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?thisoldhouse\.(?:chorus\.build|com)/videos/zype/([0-9a-f]{24})[^\'"]*)[\'"]',
+ webpage, 'video url', group=(1, 2))
+ video_url = self._request_webpage(HEADRequest(video_url), video_id, 'Resolving Zype URL').url
+
+ return self.url_result(video_url, ZypeIE, video_id)
diff --git a/yt_dlp/extractor/thisvid.py b/yt_dlp/extractor/thisvid.py
new file mode 100644
index 0000000..9d3368e
--- /dev/null
+++ b/yt_dlp/extractor/thisvid.py
@@ -0,0 +1,226 @@
+import itertools
+import re
+import urllib.parse
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ get_element_by_class,
+ int_or_none,
+ url_or_none,
+ urljoin,
+)
+
+
+class ThisVidIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?thisvid\.com/(?P<type>videos|embed)/(?P<id>[A-Za-z0-9-]+)'
+ _TESTS = [{
+ 'url': 'https://thisvid.com/videos/sitting-on-ball-tight-jeans/',
+ 'md5': '839becb572995687e11a69dc4358a386',
+ 'info_dict': {
+ 'id': '3533241',
+ 'ext': 'mp4',
+ 'title': 'Sitting on ball tight jeans',
+ 'description': 'md5:372353bb995883d1b65fddf507489acd',
+ 'thumbnail': r're:https?://\w+\.thisvid\.com/(?:[^/]+/)+3533241/preview\.jpg',
+ 'uploader_id': '150629',
+ 'uploader': 'jeanslevisjeans',
+ 'display_id': 'sitting-on-ball-tight-jeans',
+ 'age_limit': 18,
+ }
+ }, {
+ 'url': 'https://thisvid.com/embed/3533241/',
+ 'md5': '839becb572995687e11a69dc4358a386',
+ 'info_dict': {
+ 'id': '3533241',
+ 'ext': 'mp4',
+ 'title': 'Sitting on ball tight jeans',
+ 'thumbnail': r're:https?://\w+\.thisvid\.com/(?:[^/]+/)+3533241/preview\.jpg',
+ 'uploader_id': '150629',
+ 'uploader': 'jeanslevisjeans',
+ 'display_id': 'sitting-on-ball-tight-jeans',
+ 'age_limit': 18,
+ }
+ }]
+
+ def _real_extract(self, url):
+ main_id, type_ = re.match(self._VALID_URL, url).group('id', 'type')
+ webpage = self._download_webpage(url, main_id)
+
+ title = self._html_search_regex(
+ r'<title\b[^>]*?>(?:Video:\s+)?(.+?)(?:\s+-\s+ThisVid(?:\.com| tube))?</title>',
+ webpage, 'title')
+
+ if type_ == 'embed':
+ # look for more metadata
+ video_alt_url = url_or_none(self._search_regex(
+ rf'''video_alt_url\s*:\s+'({self._VALID_URL}/)',''',
+ webpage, 'video_alt_url', default=None))
+ if video_alt_url and video_alt_url != url:
+ webpage = self._download_webpage(
+ video_alt_url, main_id,
+ note='Redirecting embed to main page', fatal=False) or webpage
+
+ video_holder = get_element_by_class('video-holder', webpage) or ''
+ if '>This video is a private video' in video_holder:
+ self.raise_login_required(
+ (clean_html(video_holder) or 'Private video').partition('\n')[0])
+
+ uploader = self._html_search_regex(
+ r'''(?s)<span\b[^>]*>Added by:\s*</span><a\b[^>]+\bclass\s*=\s*["']author\b[^>]+\bhref\s*=\s*["']https://thisvid\.com/members/([0-9]+/.{3,}?)\s*</a>''',
+ webpage, 'uploader', default='')
+ uploader = re.split(r'''/["'][^>]*>\s*''', uploader)
+ if len(uploader) == 2:
+ # id must be non-empty, uploader could be ''
+ uploader_id, uploader = uploader
+ uploader = uploader or None
+ else:
+ uploader_id = uploader = None
+
+ return self.url_result(
+ url, ie='Generic', url_transparent=True,
+ title=title,
+ age_limit=18,
+ uploader=uploader,
+ uploader_id=uploader_id)
+
+
+class ThisVidPlaylistBaseIE(InfoExtractor):
+ _PLAYLIST_URL_RE = None
+
+ @classmethod
+ def _find_urls(cls, html):
+ for m in re.finditer(rf'''<a\b[^>]+\bhref\s*=\s*["'](?P<url>{cls._PLAYLIST_URL_RE}\b)[^>]+>''', html):
+ yield m.group('url')
+
+ def _generate_playlist_entries(self, url, playlist_id, html=None):
+ page_url = url
+ for page in itertools.count(1):
+ if not html:
+ html = self._download_webpage(
+ page_url, playlist_id, note=f'Downloading page {page}',
+ fatal=False) or ''
+
+ yield from self._find_urls(html)
+
+ next_page = get_element_by_class('pagination-next', html) or ''
+ if next_page:
+ # member list page
+ next_page = urljoin(url, self._search_regex(
+ r'''<a\b[^>]+\bhref\s*=\s*("|')(?P<url>(?!#)(?:(?!\1).)+)''',
+ next_page, 'next page link', group='url', default=None))
+
+ # in case a member page should have pagination-next with empty link, not just `else:`
+ if next_page is None:
+ # playlist page
+ parsed_url = urllib.parse.urlparse(page_url)
+ base_path, _, num = parsed_url.path.rpartition('/')
+ num = int_or_none(num)
+ if num is None:
+ base_path, num = parsed_url.path.rstrip('/'), 1
+ parsed_url = parsed_url._replace(path=f'{base_path}/{num + 1}')
+ next_page = urllib.parse.urlunparse(parsed_url)
+ if page_url == next_page:
+ next_page = None
+
+ if not next_page:
+ return
+ page_url, html = next_page, None
+
+ def _make_playlist_result(self, url):
+ playlist_id = self._match_id(url)
+ webpage = self._download_webpage(url, playlist_id)
+
+ title = re.split(
+ r'(?i)\s*\|\s*ThisVid\.com\s*$',
+ self._og_search_title(webpage, default=None)
+ or self._html_search_regex(r'(?s)<title\b[^>]*>(.+?)</title', webpage, 'title', fatal=False) or '', 1)[0] or None
+
+ return self.playlist_from_matches(
+ self._generate_playlist_entries(url, playlist_id, webpage),
+ playlist_id=playlist_id, playlist_title=title, ie=ThisVidIE)
+
+
+class ThisVidMemberIE(ThisVidPlaylistBaseIE):
+ _VALID_URL = r'https?://thisvid\.com/members/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://thisvid.com/members/2140501/',
+ 'info_dict': {
+ 'id': '2140501',
+ 'title': 'Rafflesia\'s Profile',
+ },
+ 'playlist_mincount': 16,
+ }, {
+ 'url': 'https://thisvid.com/members/2140501/favourite_videos/',
+ 'info_dict': {
+ 'id': '2140501',
+ 'title': 'Rafflesia\'s Favourite Videos',
+ },
+ 'playlist_mincount': 15,
+ }, {
+ 'url': 'https://thisvid.com/members/636468/public_videos/',
+ 'info_dict': {
+ 'id': '636468',
+ 'title': 'Happymouth\'s Public Videos',
+ },
+ 'playlist_mincount': 196,
+ }]
+ _PLAYLIST_URL_RE = ThisVidIE._VALID_URL
+
+ def _real_extract(self, url):
+ return self._make_playlist_result(url)
+
+
+class ThisVidPlaylistIE(ThisVidPlaylistBaseIE):
+ _VALID_URL = r'https?://thisvid\.com/playlist/(?P<id>\d+)/video/(?P<video_id>[A-Za-z0-9-]+)'
+ _TESTS = [{
+ 'url': 'https://thisvid.com/playlist/6615/video/big-italian-booty-28/',
+ 'info_dict': {
+ 'id': '6615',
+ 'title': 'Underwear Stuff',
+ },
+ 'playlist_mincount': 200,
+ }, {
+ 'url': 'https://thisvid.com/playlist/6615/video/big-italian-booty-28/',
+ 'info_dict': {
+ 'id': '1072387',
+ 'ext': 'mp4',
+ 'title': 'Big Italian Booty 28',
+ 'description': 'md5:1bccf7b13765e18fb27bf764dba7ede2',
+ 'uploader_id': '367912',
+ 'uploader': 'Jcmusclefun',
+ 'age_limit': 18,
+ 'display_id': 'big-italian-booty-28',
+ 'thumbnail': r're:https?://\w+\.thisvid\.com/(?:[^/]+/)+1072387/preview\.jpg',
+ },
+ 'params': {
+ 'noplaylist': True,
+ },
+ }]
+ _PLAYLIST_URL_RE = _VALID_URL
+
+ def _generate_playlist_entries(self, url, playlist_id, html=None):
+ for wrapped_url in super()._generate_playlist_entries(url, playlist_id, html):
+ video_id = re.match(self._VALID_URL, wrapped_url).group('video_id')
+ yield urljoin(url, f'/videos/{video_id}/')
+
+ def _real_extract(self, url):
+ playlist_id, video_id = self._match_valid_url(url).group('id', 'video_id')
+
+ if not self._yes_playlist(playlist_id, video_id):
+ redirect_url = urljoin(url, f'/videos/{video_id}/')
+ return self.url_result(redirect_url, ThisVidIE)
+
+ result = self._make_playlist_result(url)
+
+ # Fix duplicated title (`the title - the title` => `the title`)
+ title = result['title']
+ t_len = len(title)
+ if t_len > 5 and t_len % 2 != 0:
+ t_len = t_len // 2
+ if title[t_len] == '-':
+ first, second = map(str.strip, (title[:t_len], title[t_len + 1:]))
+ if first and first == second:
+ result['title'] = first
+
+ return result
diff --git a/yt_dlp/extractor/threeqsdn.py b/yt_dlp/extractor/threeqsdn.py
new file mode 100644
index 0000000..7841f8d
--- /dev/null
+++ b/yt_dlp/extractor/threeqsdn.py
@@ -0,0 +1,156 @@
+from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ join_nonempty,
+ parse_iso8601,
+)
+
+
+class ThreeQSDNIE(InfoExtractor):
+ IE_NAME = '3qsdn'
+ IE_DESC = '3Q SDN'
+ _VALID_URL = r'https?://playout\.3qsdn\.com/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+ _EMBED_REGEX = [r'<iframe[^>]+\b(?:data-)?src=(["\'])(?P<url>%s.*?)\1' % _VALID_URL]
+ _TESTS = [{
+ # https://player.3qsdn.com/demo.html
+ 'url': 'https://playout.3qsdn.com/7201c779-6b3c-11e7-a40e-002590c750be',
+ 'md5': '64a57396b16fa011b15e0ea60edce918',
+ 'info_dict': {
+ 'id': '7201c779-6b3c-11e7-a40e-002590c750be',
+ 'ext': 'mp4',
+ 'title': 'Video Ads',
+ 'is_live': False,
+ 'description': 'Video Ads Demo',
+ 'timestamp': 1500334803,
+ 'upload_date': '20170717',
+ 'duration': 888.032,
+ 'subtitles': {
+ 'eng': 'count:1',
+ },
+ },
+ 'expected_warnings': ['Unknown MIME type application/mp4 in DASH manifest'],
+ }, {
+ # live video stream
+ 'url': 'https://playout.3qsdn.com/66e68995-11ca-11e8-9273-002590c750be',
+ 'info_dict': {
+ 'id': '66e68995-11ca-11e8-9273-002590c750be',
+ 'ext': 'mp4',
+ 'title': 're:^66e68995-11ca-11e8-9273-002590c750be [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True, # m3u8 downloads
+ },
+ }, {
+ # live audio stream
+ 'url': 'http://playout.3qsdn.com/9edf36e0-6bf2-11e2-a16a-9acf09e2db48',
+ 'only_matching': True,
+ }, {
+ # live audio stream with some 404 URLs
+ 'url': 'http://playout.3qsdn.com/ac5c3186-777a-11e2-9c30-9acf09e2db48',
+ 'only_matching': True,
+ }, {
+ # geo restricted with 'This content is not available in your country'
+ 'url': 'http://playout.3qsdn.com/d63a3ffe-75e8-11e2-9c30-9acf09e2db48',
+ 'only_matching': True,
+ }, {
+ # geo restricted with 'playout.3qsdn.com/forbidden'
+ 'url': 'http://playout.3qsdn.com/8e330f26-6ae2-11e2-a16a-9acf09e2db48',
+ 'only_matching': True,
+ }, {
+ # live video with rtmp link
+ 'url': 'https://playout.3qsdn.com/6092bb9e-8f72-11e4-a173-002590c750be',
+ 'only_matching': True,
+ }, {
+ # ondemand from http://www.philharmonie.tv/veranstaltung/26/
+ 'url': 'http://playout.3qsdn.com/0280d6b9-1215-11e6-b427-0cc47a188158?protocol=http',
+ 'only_matching': True,
+ }, {
+ # live video stream
+ 'url': 'https://playout.3qsdn.com/d755d94b-4ab9-11e3-9162-0025907ad44f?js=true',
+ 'only_matching': True,
+ }]
+
+ def _extract_from_webpage(self, url, webpage):
+ for res in super()._extract_from_webpage(url, webpage):
+ yield {
+ **res,
+ '_type': 'url_transparent',
+ 'uploader': self._search_regex(r'^(?:https?://)?([^/]*)/.*', url, 'video uploader'),
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ try:
+ config = self._download_json(
+ url.replace('://playout.3qsdn.com/', '://playout.3qsdn.com/config/'), video_id)
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 401:
+ self.raise_geo_restricted()
+ raise
+
+ live = config.get('streamContent') == 'live'
+ aspect = float_or_none(config.get('aspect'))
+
+ formats = []
+ subtitles = {}
+ for source_type, source in (config.get('sources') or {}).items():
+ if not source:
+ continue
+ if source_type == 'dash':
+ fmts, subs = self._extract_mpd_formats_and_subtitles(
+ source, video_id, mpd_id='mpd', fatal=False)
+ formats.extend(fmts)
+ subtitles = self._merge_subtitles(subtitles, subs)
+ elif source_type == 'hls':
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ source, video_id, 'mp4', live=live, m3u8_id='hls', fatal=False)
+ formats.extend(fmts)
+ subtitles = self._merge_subtitles(subtitles, subs)
+ elif source_type == 'progressive':
+ for s in source:
+ src = s.get('src')
+ if not (src and self._is_valid_url(src, video_id)):
+ continue
+ ext = determine_ext(src)
+ height = int_or_none(s.get('height'))
+ formats.append({
+ 'ext': ext,
+ 'format_id': join_nonempty('http', ext, height and '%dp' % height),
+ 'height': height,
+ 'source_preference': 0,
+ 'url': src,
+ 'vcodec': 'none' if height == 0 else None,
+ 'width': int(height * aspect) if height and aspect else None,
+ })
+
+ for subtitle in (config.get('subtitles') or []):
+ src = subtitle.get('src')
+ if not src:
+ continue
+ subtitles.setdefault(subtitle.get('label') or 'eng', []).append({
+ 'url': src,
+ })
+
+ title = config.get('title') or video_id
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': config.get('poster') or None,
+ 'description': config.get('description') or None,
+ 'timestamp': parse_iso8601(config.get('upload_date')),
+ 'duration': float_or_none(config.get('vlength')) or None,
+ 'is_live': live,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ # It seems like this would be correctly handled by default
+ # However, unless someone can confirm this, the old
+ # behaviour is being kept as-is
+ '_format_sort_fields': ('res', 'source_preference')
+ }
diff --git a/yt_dlp/extractor/threespeak.py b/yt_dlp/extractor/threespeak.py
new file mode 100644
index 0000000..dbd5090
--- /dev/null
+++ b/yt_dlp/extractor/threespeak.py
@@ -0,0 +1,93 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ try_get,
+ unified_strdate,
+)
+
+
+class ThreeSpeakIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?3speak\.tv/watch\?v\=[^/]+/(?P<id>[^/$&#?]+)'
+
+ _TESTS = [{
+ 'url': 'https://3speak.tv/watch?v=dannyshine/wjgoxyfy',
+ 'info_dict': {
+ 'id': 'wjgoxyfy',
+ 'ext': 'mp4',
+ 'title': 'Can People who took the Vax think Critically',
+ 'uploader': 'dannyshine',
+ 'description': 'md5:181aa7ccb304afafa089b5af3bca7a10',
+ 'tags': ['sex', 'covid', 'antinatalism', 'comedy', 'vaccines'],
+ 'thumbnail': 'https://img.3speakcontent.co/wjgoxyfy/thumbnails/default.png',
+ 'upload_date': '20211021',
+ 'duration': 2703.867833,
+ 'filesize': 1620054781,
+ },
+ 'params': {'skip_download': True}
+ }]
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ webpage = self._download_webpage(url, id)
+ json_str = self._html_search_regex(r'JSON\.parse\(\'([^\']+)\'\)', webpage, 'json')
+ # The json string itself is escaped. Hence the double parsing
+ data_json = self._parse_json(self._parse_json(f'"{json_str}"', id), id)
+ video_json = self._parse_json(data_json['json_metadata'], id)
+ formats, subtitles = [], {}
+ og_m3u8 = self._html_search_regex(r'<meta\s?property=\"ogvideo\"\s?content=\"([^\"]+)\">', webpage, 'og m3u8', fatal=False)
+ if og_m3u8:
+ https_frmts, https_subs = self._extract_m3u8_formats_and_subtitles(og_m3u8, id, fatal=False, m3u8_id='https')
+ formats.extend(https_frmts)
+ subtitles = self._merge_subtitles(subtitles, https_subs)
+ ipfs_m3u8 = try_get(video_json, lambda x: x['video']['info']['ipfs'])
+ if ipfs_m3u8:
+ ipfs_frmts, ipfs_subs = self._extract_m3u8_formats_and_subtitles(f'https://ipfs.3speak.tv/ipfs/{ipfs_m3u8}',
+ id, fatal=False, m3u8_id='ipfs')
+ formats.extend(ipfs_frmts)
+ subtitles = self._merge_subtitles(subtitles, ipfs_subs)
+ mp4_file = try_get(video_json, lambda x: x['video']['info']['file'])
+ if mp4_file:
+ formats.append({
+ 'url': f'https://threespeakvideo.b-cdn.net/{id}/{mp4_file}',
+ 'ext': 'mp4',
+ 'format_id': 'https-mp4',
+ 'duration': try_get(video_json, lambda x: x['video']['info']['duration']),
+ 'filesize': try_get(video_json, lambda x: x['video']['info']['filesize']),
+ 'quality': 11,
+ 'format_note': 'Original file',
+ })
+ return {
+ 'id': id,
+ 'title': data_json.get('title') or data_json.get('root_title'),
+ 'uploader': data_json.get('author'),
+ 'description': try_get(video_json, lambda x: x['video']['content']['description']),
+ 'tags': try_get(video_json, lambda x: x['video']['content']['tags']),
+ 'thumbnail': try_get(video_json, lambda x: x['image'][0]),
+ 'upload_date': unified_strdate(data_json.get('created')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+
+class ThreeSpeakUserIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?3speak\.tv/user/(?P<id>[^/$&?#]+)'
+
+ _TESTS = [{
+ 'url': 'https://3speak.tv/user/theycallmedan',
+ 'info_dict': {
+ 'id': 'theycallmedan',
+ },
+ 'playlist_mincount': 115,
+ }]
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ webpage = self._download_webpage(url, id)
+ entries = [
+ self.url_result(
+ 'https://3speak.tv/watch?v=%s' % video,
+ ie=ThreeSpeakIE.ie_key())
+ for video in re.findall(r'data-payout\s?\=\s?\"([^\"]+)\"', webpage) if video
+ ]
+ return self.playlist_result(entries, id)
diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py
new file mode 100644
index 0000000..aa83567
--- /dev/null
+++ b/yt_dlp/extractor/tiktok.py
@@ -0,0 +1,1317 @@
+import itertools
+import json
+import random
+import re
+import string
+import time
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse_urlparse
+from ..networking import HEADRequest
+from ..utils import (
+ ExtractorError,
+ LazyList,
+ UnsupportedError,
+ UserNotLive,
+ determine_ext,
+ format_field,
+ int_or_none,
+ join_nonempty,
+ merge_dicts,
+ qualities,
+ remove_start,
+ srt_subtitles_timecode,
+ str_or_none,
+ traverse_obj,
+ try_call,
+ try_get,
+ url_or_none,
+)
+
+
+class TikTokBaseIE(InfoExtractor):
+ _APP_VERSIONS = [('26.1.3', '260103'), ('26.1.2', '260102'), ('26.1.1', '260101'), ('25.6.2', '250602')]
+ _WORKING_APP_VERSION = None
+ _APP_NAME = 'trill'
+ _AID = 1180
+ _UPLOADER_URL_FORMAT = 'https://www.tiktok.com/@%s'
+ _WEBPAGE_HOST = 'https://www.tiktok.com/'
+ QUALITIES = ('360p', '540p', '720p', '1080p')
+
+ @property
+ def _API_HOSTNAME(self):
+ return self._configuration_arg(
+ 'api_hostname', ['api16-normal-c-useast1a.tiktokv.com'], ie_key=TikTokIE)[0]
+
+ @staticmethod
+ def _create_url(user_id, video_id):
+ return f'https://www.tiktok.com/@{user_id or "_"}/video/{video_id}'
+
+ def _get_sigi_state(self, webpage, display_id):
+ return self._search_json(
+ r'<script[^>]+\bid="(?:SIGI_STATE|sigi-persisted-data)"[^>]*>', webpage,
+ 'sigi state', display_id, end_pattern=r'</script>', default={})
+
+ def _get_universal_data(self, webpage, display_id):
+ return traverse_obj(self._search_json(
+ r'<script[^>]+\bid="__UNIVERSAL_DATA_FOR_REHYDRATION__"[^>]*>', webpage,
+ 'universal data', display_id, end_pattern=r'</script>', default={}),
+ ('__DEFAULT_SCOPE__', {dict})) or {}
+
+ def _call_api_impl(self, ep, query, manifest_app_version, video_id, fatal=True,
+ note='Downloading API JSON', errnote='Unable to download API page'):
+ self._set_cookie(self._API_HOSTNAME, 'odin_tt', ''.join(random.choices('0123456789abcdef', k=160)))
+ webpage_cookies = self._get_cookies(self._WEBPAGE_HOST)
+ if webpage_cookies.get('sid_tt'):
+ self._set_cookie(self._API_HOSTNAME, 'sid_tt', webpage_cookies['sid_tt'].value)
+ return self._download_json(
+ 'https://%s/aweme/v1/%s/' % (self._API_HOSTNAME, ep), video_id=video_id,
+ fatal=fatal, note=note, errnote=errnote, headers={
+ 'User-Agent': f'com.ss.android.ugc.{self._APP_NAME}/{manifest_app_version} (Linux; U; Android 13; en_US; Pixel 7; Build/TD1A.220804.031; Cronet/58.0.2991.0)',
+ 'Accept': 'application/json',
+ }, query=query)
+
+ def _build_api_query(self, query, app_version, manifest_app_version):
+ return {
+ **query,
+ 'version_name': app_version,
+ 'version_code': manifest_app_version,
+ 'build_number': app_version,
+ 'manifest_version_code': manifest_app_version,
+ 'update_version_code': manifest_app_version,
+ 'openudid': ''.join(random.choices('0123456789abcdef', k=16)),
+ 'uuid': ''.join(random.choices(string.digits, k=16)),
+ '_rticket': int(time.time() * 1000),
+ 'ts': int(time.time()),
+ 'device_brand': 'Google',
+ 'device_type': 'Pixel 7',
+ 'device_platform': 'android',
+ 'resolution': '1080*2400',
+ 'dpi': 420,
+ 'os_version': '13',
+ 'os_api': '29',
+ 'carrier_region': 'US',
+ 'sys_region': 'US',
+ 'region': 'US',
+ 'app_name': self._APP_NAME,
+ 'app_language': 'en',
+ 'language': 'en',
+ 'timezone_name': 'America/New_York',
+ 'timezone_offset': '-14400',
+ 'channel': 'googleplay',
+ 'ac': 'wifi',
+ 'mcc_mnc': '310260',
+ 'is_my_cn': 0,
+ 'aid': self._AID,
+ 'ssmix': 'a',
+ 'as': 'a1qwert123',
+ 'cp': 'cbfhckdckkde1',
+ }
+
+ def _call_api(self, ep, query, video_id, fatal=True,
+ note='Downloading API JSON', errnote='Unable to download API page'):
+ if not self._WORKING_APP_VERSION:
+ app_version = self._configuration_arg('app_version', [''], ie_key=TikTokIE.ie_key())[0]
+ manifest_app_version = self._configuration_arg('manifest_app_version', [''], ie_key=TikTokIE.ie_key())[0]
+ if app_version and manifest_app_version:
+ self._WORKING_APP_VERSION = (app_version, manifest_app_version)
+ self.write_debug('Imported app version combo from extractor arguments')
+ elif app_version or manifest_app_version:
+ self.report_warning('Only one of the two required version params are passed as extractor arguments', only_once=True)
+
+ if self._WORKING_APP_VERSION:
+ app_version, manifest_app_version = self._WORKING_APP_VERSION
+ real_query = self._build_api_query(query, app_version, manifest_app_version)
+ return self._call_api_impl(ep, real_query, manifest_app_version, video_id, fatal, note, errnote)
+
+ for count, (app_version, manifest_app_version) in enumerate(self._APP_VERSIONS, start=1):
+ real_query = self._build_api_query(query, app_version, manifest_app_version)
+ try:
+ res = self._call_api_impl(ep, real_query, manifest_app_version, video_id, fatal, note, errnote)
+ self._WORKING_APP_VERSION = (app_version, manifest_app_version)
+ return res
+ except ExtractorError as e:
+ if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0:
+ if count == len(self._APP_VERSIONS):
+ if fatal:
+ raise e
+ else:
+ self.report_warning(str(e.cause or e.msg))
+ return
+ self.report_warning('%s. Retrying... (attempt %s of %s)' % (str(e.cause or e.msg), count, len(self._APP_VERSIONS)))
+ continue
+ raise e
+
+ def _extract_aweme_app(self, aweme_id):
+ feed_list = self._call_api(
+ 'feed', {'aweme_id': aweme_id}, aweme_id, note='Downloading video feed',
+ errnote='Unable to download video feed').get('aweme_list') or []
+ aweme_detail = next((aweme for aweme in feed_list if str(aweme.get('aweme_id')) == aweme_id), None)
+ if not aweme_detail:
+ raise ExtractorError('Unable to find video in feed', video_id=aweme_id)
+ return self._parse_aweme_video_app(aweme_detail)
+
+ def _get_subtitles(self, aweme_detail, aweme_id):
+ # TODO: Extract text positioning info
+ subtitles = {}
+ # aweme/detail endpoint subs
+ captions_info = traverse_obj(
+ aweme_detail, ('interaction_stickers', ..., 'auto_video_caption_info', 'auto_captions', ...), expected_type=dict)
+ for caption in captions_info:
+ caption_url = traverse_obj(caption, ('url', 'url_list', ...), expected_type=url_or_none, get_all=False)
+ if not caption_url:
+ continue
+ caption_json = self._download_json(
+ caption_url, aweme_id, note='Downloading captions', errnote='Unable to download captions', fatal=False)
+ if not caption_json:
+ continue
+ subtitles.setdefault(caption.get('language', 'en'), []).append({
+ 'ext': 'srt',
+ 'data': '\n\n'.join(
+ f'{i + 1}\n{srt_subtitles_timecode(line["start_time"] / 1000)} --> {srt_subtitles_timecode(line["end_time"] / 1000)}\n{line["text"]}'
+ for i, line in enumerate(caption_json['utterances']) if line.get('text'))
+ })
+ # feed endpoint subs
+ if not subtitles:
+ for caption in traverse_obj(aweme_detail, ('video', 'cla_info', 'caption_infos', ...), expected_type=dict):
+ if not caption.get('url'):
+ continue
+ subtitles.setdefault(caption.get('lang') or 'en', []).append({
+ 'ext': remove_start(caption.get('caption_format'), 'web'),
+ 'url': caption['url'],
+ })
+ # webpage subs
+ if not subtitles:
+ for caption in traverse_obj(aweme_detail, ('video', 'subtitleInfos', ...), expected_type=dict):
+ if not caption.get('Url'):
+ continue
+ subtitles.setdefault(caption.get('LanguageCodeName') or 'en', []).append({
+ 'ext': remove_start(caption.get('Format'), 'web'),
+ 'url': caption['Url'],
+ })
+ return subtitles
+
+ def _parse_aweme_video_app(self, aweme_detail):
+ aweme_id = aweme_detail['aweme_id']
+ video_info = aweme_detail['video']
+
+ def parse_url_key(url_key):
+ format_id, codec, res, bitrate = self._search_regex(
+ r'v[^_]+_(?P<id>(?P<codec>[^_]+)_(?P<res>\d+p)_(?P<bitrate>\d+))', url_key,
+ 'url key', default=(None, None, None, None), group=('id', 'codec', 'res', 'bitrate'))
+ if not format_id:
+ return {}, None
+ return {
+ 'format_id': format_id,
+ 'vcodec': 'h265' if codec == 'bytevc1' else codec,
+ 'tbr': int_or_none(bitrate, scale=1000) or None,
+ 'quality': qualities(self.QUALITIES)(res),
+ }, res
+
+ known_resolutions = {}
+
+ def audio_meta(url):
+ ext = determine_ext(url, default_ext='m4a')
+ return {
+ 'format_note': 'Music track',
+ 'ext': ext,
+ 'acodec': 'aac' if ext == 'm4a' else ext,
+ 'vcodec': 'none',
+ 'width': None,
+ 'height': None,
+ } if ext == 'mp3' or '-music-' in url else {}
+
+ def extract_addr(addr, add_meta={}):
+ parsed_meta, res = parse_url_key(addr.get('url_key', ''))
+ if res:
+ known_resolutions.setdefault(res, {}).setdefault('height', int_or_none(addr.get('height')))
+ known_resolutions[res].setdefault('width', int_or_none(addr.get('width')))
+ parsed_meta.update(known_resolutions.get(res, {}))
+ add_meta.setdefault('height', int_or_none(res[:-1]))
+ return [{
+ 'url': url,
+ 'filesize': int_or_none(addr.get('data_size')),
+ 'ext': 'mp4',
+ 'acodec': 'aac',
+ 'source_preference': -2 if 'aweme/v1' in url else -1, # Downloads from API might get blocked
+ **add_meta, **parsed_meta,
+ 'format_note': join_nonempty(
+ add_meta.get('format_note'), '(API)' if 'aweme/v1' in url else None, delim=' '),
+ **audio_meta(url),
+ } for url in addr.get('url_list') or []]
+
+ # Hack: Add direct video links first to prioritize them when removing duplicate formats
+ formats = []
+ width = int_or_none(video_info.get('width'))
+ height = int_or_none(video_info.get('height'))
+ if video_info.get('play_addr'):
+ formats.extend(extract_addr(video_info['play_addr'], {
+ 'format_id': 'play_addr',
+ 'format_note': 'Direct video',
+ 'vcodec': 'h265' if traverse_obj(
+ video_info, 'is_bytevc1', 'is_h265') else 'h264', # TODO: Check for "direct iOS" videos, like https://www.tiktok.com/@cookierun_dev/video/7039716639834656002
+ 'width': width,
+ 'height': height,
+ }))
+ if video_info.get('download_addr'):
+ download_addr = video_info['download_addr']
+ dl_width = int_or_none(download_addr.get('width'))
+ formats.extend(extract_addr(download_addr, {
+ 'format_id': 'download_addr',
+ 'format_note': 'Download video%s' % (', watermarked' if video_info.get('has_watermark') else ''),
+ 'vcodec': 'h264',
+ 'width': dl_width or width,
+ 'height': try_call(lambda: int(dl_width / 0.5625)) or height, # download_addr['height'] is wrong
+ 'preference': -2 if video_info.get('has_watermark') else -1,
+ }))
+ if video_info.get('play_addr_h264'):
+ formats.extend(extract_addr(video_info['play_addr_h264'], {
+ 'format_id': 'play_addr_h264',
+ 'format_note': 'Direct video',
+ 'vcodec': 'h264',
+ }))
+ if video_info.get('play_addr_bytevc1'):
+ formats.extend(extract_addr(video_info['play_addr_bytevc1'], {
+ 'format_id': 'play_addr_bytevc1',
+ 'format_note': 'Direct video',
+ 'vcodec': 'h265',
+ }))
+
+ for bitrate in video_info.get('bit_rate', []):
+ if bitrate.get('play_addr'):
+ formats.extend(extract_addr(bitrate['play_addr'], {
+ 'format_id': bitrate.get('gear_name'),
+ 'format_note': 'Playback video',
+ 'tbr': try_get(bitrate, lambda x: x['bit_rate'] / 1000),
+ 'vcodec': 'h265' if traverse_obj(
+ bitrate, 'is_bytevc1', 'is_h265') else 'h264',
+ 'fps': bitrate.get('FPS'),
+ }))
+
+ self._remove_duplicate_formats(formats)
+ auth_cookie = self._get_cookies(self._WEBPAGE_HOST).get('sid_tt')
+ if auth_cookie:
+ for f in formats:
+ self._set_cookie(compat_urllib_parse_urlparse(f['url']).hostname, 'sid_tt', auth_cookie.value)
+
+ thumbnails = []
+ for cover_id in ('cover', 'ai_dynamic_cover', 'animated_cover', 'ai_dynamic_cover_bak',
+ 'origin_cover', 'dynamic_cover'):
+ for cover_url in traverse_obj(video_info, (cover_id, 'url_list', ...)):
+ thumbnails.append({
+ 'id': cover_id,
+ 'url': cover_url,
+ })
+
+ stats_info = aweme_detail.get('statistics') or {}
+ author_info = aweme_detail.get('author') or {}
+ music_info = aweme_detail.get('music') or {}
+ user_url = self._UPLOADER_URL_FORMAT % (traverse_obj(author_info,
+ 'sec_uid', 'id', 'uid', 'unique_id',
+ expected_type=str_or_none, get_all=False))
+ labels = traverse_obj(aweme_detail, ('hybrid_label', ..., 'text'), expected_type=str)
+
+ contained_music_track = traverse_obj(
+ music_info, ('matched_song', 'title'), ('matched_pgc_sound', 'title'), expected_type=str)
+ contained_music_author = traverse_obj(
+ music_info, ('matched_song', 'author'), ('matched_pgc_sound', 'author'), 'author', expected_type=str)
+
+ is_generic_og_trackname = music_info.get('is_original_sound') and music_info.get('title') == 'original sound - %s' % music_info.get('owner_handle')
+ if is_generic_og_trackname:
+ music_track, music_author = contained_music_track or 'original sound', contained_music_author
+ else:
+ music_track, music_author = music_info.get('title'), traverse_obj(music_info, ('author', {str}))
+
+ return {
+ 'id': aweme_id,
+ **traverse_obj(aweme_detail, {
+ 'title': ('desc', {str}),
+ 'description': ('desc', {str}),
+ 'timestamp': ('create_time', {int_or_none}),
+ }),
+ **traverse_obj(stats_info, {
+ 'view_count': 'play_count',
+ 'like_count': 'digg_count',
+ 'repost_count': 'share_count',
+ 'comment_count': 'comment_count',
+ }, expected_type=int_or_none),
+ **traverse_obj(author_info, {
+ 'uploader': ('unique_id', {str}),
+ 'uploader_id': ('uid', {str_or_none}),
+ 'creators': ('nickname', {str}, {lambda x: [x] if x else None}), # for compat
+ 'channel': ('nickname', {str}),
+ 'channel_id': ('sec_uid', {str}),
+ }),
+ 'uploader_url': user_url,
+ 'track': music_track,
+ 'album': str_or_none(music_info.get('album')) or None,
+ 'artists': re.split(r'(?:, | & )', music_author) if music_author else None,
+ 'formats': formats,
+ 'subtitles': self.extract_subtitles(aweme_detail, aweme_id),
+ 'thumbnails': thumbnails,
+ 'duration': int_or_none(traverse_obj(video_info, 'duration', ('download_addr', 'duration')), scale=1000),
+ 'availability': self._availability(
+ is_private='Private' in labels,
+ needs_subscription='Friends only' in labels,
+ is_unlisted='Followers only' in labels),
+ '_format_sort_fields': ('quality', 'codec', 'size', 'br'),
+ }
+
+ def _parse_aweme_video_web(self, aweme_detail, webpage_url, video_id):
+ video_info = aweme_detail['video']
+ author_info = traverse_obj(aweme_detail, 'authorInfo', 'author', expected_type=dict, default={})
+ music_info = aweme_detail.get('music') or {}
+ stats_info = aweme_detail.get('stats') or {}
+ channel_id = traverse_obj(author_info or aweme_detail, (('authorSecId', 'secUid'), {str}), get_all=False)
+ user_url = self._UPLOADER_URL_FORMAT % channel_id if channel_id else None
+
+ formats = []
+ width = int_or_none(video_info.get('width'))
+ height = int_or_none(video_info.get('height'))
+
+ for play_url in traverse_obj(video_info, ('playAddr', ((..., 'src'), None), {url_or_none})):
+ formats.append({
+ 'url': self._proto_relative_url(play_url),
+ 'ext': 'mp4',
+ 'width': width,
+ 'height': height,
+ })
+
+ for download_url in traverse_obj(video_info, (('downloadAddr', ('download', 'url')), {url_or_none})):
+ formats.append({
+ 'format_id': 'download',
+ 'url': self._proto_relative_url(download_url),
+ 'ext': 'mp4',
+ 'width': width,
+ 'height': height,
+ })
+
+ self._remove_duplicate_formats(formats)
+
+ thumbnails = []
+ for thumb_url in traverse_obj(aweme_detail, (
+ (None, 'video'), ('thumbnail', 'cover', 'dynamicCover', 'originCover'), {url_or_none})):
+ thumbnails.append({
+ 'url': self._proto_relative_url(thumb_url),
+ 'width': width,
+ 'height': height,
+ })
+
+ return {
+ 'id': video_id,
+ **traverse_obj(aweme_detail, {
+ 'title': ('desc', {str}),
+ 'description': ('desc', {str}),
+ 'duration': ('video', 'duration', {int_or_none}),
+ 'timestamp': ('createTime', {int_or_none}),
+ }),
+ **traverse_obj(author_info or aweme_detail, {
+ 'creators': ('nickname', {str}, {lambda x: [x] if x else None}), # for compat
+ 'channel': ('nickname', {str}),
+ 'uploader': (('uniqueId', 'author'), {str}),
+ 'uploader_id': (('authorId', 'uid', 'id'), {str_or_none}),
+ }, get_all=False),
+ **traverse_obj(stats_info, {
+ 'view_count': 'playCount',
+ 'like_count': 'diggCount',
+ 'repost_count': 'shareCount',
+ 'comment_count': 'commentCount',
+ }, expected_type=int_or_none),
+ **traverse_obj(music_info, {
+ 'track': ('title', {str}),
+ 'album': ('album', {str}, {lambda x: x or None}),
+ 'artists': ('authorName', {str}, {lambda x: [x] if x else None}),
+ }),
+ 'channel_id': channel_id,
+ 'uploader_url': user_url,
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'http_headers': {
+ 'Referer': webpage_url,
+ }
+ }
+
+
+class TikTokIE(TikTokBaseIE):
+ _VALID_URL = r'https?://www\.tiktok\.com/(?:embed|@(?P<user_id>[\w\.-]+)?/video)/(?P<id>\d+)'
+ _EMBED_REGEX = [rf'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P<url>{_VALID_URL})']
+
+ _TESTS = [{
+ 'url': 'https://www.tiktok.com/@leenabhushan/video/6748451240264420610',
+ 'md5': '736bb7a466c6f0a6afeb597da1e6f5b7',
+ 'info_dict': {
+ 'id': '6748451240264420610',
+ 'ext': 'mp4',
+ 'title': '#jassmanak #lehanga #leenabhushan',
+ 'description': '#jassmanak #lehanga #leenabhushan',
+ 'duration': 13,
+ 'height': 1024,
+ 'width': 576,
+ 'uploader': 'leenabhushan',
+ 'uploader_id': '6691488002098119685',
+ 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAA_Eb4t1vodM1IuTy_cvp9CY22RAb59xqrO0Xtz9CYQJvgXaDvZxYnZYRzDWhhgJmy',
+ 'creator': 'facestoriesbyleenabh',
+ 'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?',
+ 'upload_date': '20191016',
+ 'timestamp': 1571246252,
+ 'view_count': int,
+ 'like_count': int,
+ 'repost_count': int,
+ 'comment_count': int,
+ 'artist': 'Ysrbeats',
+ 'album': 'Lehanga',
+ 'track': 'Lehanga',
+ },
+ 'skip': '404 Not Found',
+ }, {
+ 'url': 'https://www.tiktok.com/@patroxofficial/video/6742501081818877190?langCountry=en',
+ 'md5': '6f3cf8cdd9b28cb8363fe0a9a160695b',
+ 'info_dict': {
+ 'id': '6742501081818877190',
+ 'ext': 'mp4',
+ 'title': 'md5:5e2a23877420bb85ce6521dbee39ba94',
+ 'description': 'md5:5e2a23877420bb85ce6521dbee39ba94',
+ 'duration': 27,
+ 'height': 960,
+ 'width': 540,
+ 'uploader': 'patrox',
+ 'uploader_id': '18702747',
+ 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAiFnldaILebi5heDoVU6bn4jBWWycX6-9U3xuNPqZ8Ws',
+ 'channel_id': 'MS4wLjABAAAAiFnldaILebi5heDoVU6bn4jBWWycX6-9U3xuNPqZ8Ws',
+ 'channel': 'patroX',
+ 'creators': ['patroX'],
+ 'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?',
+ 'upload_date': '20190930',
+ 'timestamp': 1569860870,
+ 'view_count': int,
+ 'like_count': int,
+ 'repost_count': int,
+ 'comment_count': int,
+ 'artists': ['Evan Todd', 'Jessica Keenan Wynn', 'Alice Lee', 'Barrett Wilbert Weed', 'Jon Eidson'],
+ 'track': 'Big Fun',
+ },
+ }, {
+ # Banned audio, only available on the app
+ 'url': 'https://www.tiktok.com/@barudakhb_/video/6984138651336838402',
+ 'info_dict': {
+ 'id': '6984138651336838402',
+ 'ext': 'mp4',
+ 'title': 'Balas @yolaaftwsr hayu yu ? #SquadRandom_ 🔥',
+ 'description': 'Balas @yolaaftwsr hayu yu ? #SquadRandom_ 🔥',
+ 'uploader': 'barudakhb_',
+ 'channel': 'md5:29f238c49bc0c176cb3cef1a9cea9fa6',
+ 'creators': ['md5:29f238c49bc0c176cb3cef1a9cea9fa6'],
+ 'uploader_id': '6974687867511718913',
+ 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAbhBwQC-R1iKoix6jDFsF-vBdfx2ABoDjaZrM9fX6arU3w71q3cOWgWuTXn1soZ7d',
+ 'channel_id': 'MS4wLjABAAAAbhBwQC-R1iKoix6jDFsF-vBdfx2ABoDjaZrM9fX6arU3w71q3cOWgWuTXn1soZ7d',
+ 'track': 'Boka Dance',
+ 'artists': ['md5:29f238c49bc0c176cb3cef1a9cea9fa6'],
+ 'timestamp': 1626121503,
+ 'duration': 18,
+ 'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?',
+ 'upload_date': '20210712',
+ 'view_count': int,
+ 'like_count': int,
+ 'repost_count': int,
+ 'comment_count': int,
+ },
+ }, {
+ # Sponsored video, only available with feed workaround
+ 'url': 'https://www.tiktok.com/@MS4wLjABAAAATh8Vewkn0LYM7Fo03iec3qKdeCUOcBIouRk1mkiag6h3o_pQu_dUXvZ2EZlGST7_/video/7042692929109986561',
+ 'info_dict': {
+ 'id': '7042692929109986561',
+ 'ext': 'mp4',
+ 'title': 'Slap and Run!',
+ 'description': 'Slap and Run!',
+ 'uploader': 'user440922249',
+ 'channel': 'Slap And Run',
+ 'creators': ['Slap And Run'],
+ 'uploader_id': '7036055384943690754',
+ 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAATh8Vewkn0LYM7Fo03iec3qKdeCUOcBIouRk1mkiag6h3o_pQu_dUXvZ2EZlGST7_',
+ 'channel_id': 'MS4wLjABAAAATh8Vewkn0LYM7Fo03iec3qKdeCUOcBIouRk1mkiag6h3o_pQu_dUXvZ2EZlGST7_',
+ 'track': 'Promoted Music',
+ 'timestamp': 1639754738,
+ 'duration': 30,
+ 'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?',
+ 'upload_date': '20211217',
+ 'view_count': int,
+ 'like_count': int,
+ 'repost_count': int,
+ 'comment_count': int,
+ },
+ 'params': {'skip_download': True}, # XXX: unable to download video data: HTTP Error 403: Forbidden
+ }, {
+ # Video without title and description
+ 'url': 'https://www.tiktok.com/@pokemonlife22/video/7059698374567611694',
+ 'info_dict': {
+ 'id': '7059698374567611694',
+ 'ext': 'mp4',
+ 'title': 'TikTok video #7059698374567611694',
+ 'description': '',
+ 'uploader': 'pokemonlife22',
+ 'channel': 'Pokemon',
+ 'creators': ['Pokemon'],
+ 'uploader_id': '6820838815978423302',
+ 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAA0tF1nBwQVVMyrGu3CqttkNgM68Do1OXUFuCY0CRQk8fEtSVDj89HqoqvbSTmUP2W',
+ 'channel_id': 'MS4wLjABAAAA0tF1nBwQVVMyrGu3CqttkNgM68Do1OXUFuCY0CRQk8fEtSVDj89HqoqvbSTmUP2W',
+ 'track': 'original sound',
+ 'timestamp': 1643714123,
+ 'duration': 6,
+ 'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?',
+ 'upload_date': '20220201',
+ 'artists': ['Pokemon'],
+ 'view_count': int,
+ 'like_count': int,
+ 'repost_count': int,
+ 'comment_count': int,
+ },
+ }, {
+ # hydration JSON is sent in a <script> element
+ 'url': 'https://www.tiktok.com/@denidil6/video/7065799023130643713',
+ 'info_dict': {
+ 'id': '7065799023130643713',
+ 'ext': 'mp4',
+ 'title': '#denidil#денидил',
+ 'description': '#denidil#денидил',
+ 'uploader': 'denidil6',
+ 'uploader_id': '7046664115636405250',
+ 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAsvMSzFdQ4ikl3uR2TEJwMBbB2yZh2Zxwhx-WCo3rbDpAharE3GQCrFuJArI3C8QJ',
+ 'artist': 'Holocron Music',
+ 'album': 'Wolf Sounds (1 Hour) Enjoy the Company of the Animal That Is the Majestic King of the Night',
+ 'track': 'Wolf Sounds (1 Hour) Enjoy the Company of the Animal That Is the Majestic King of the Night',
+ 'timestamp': 1645134536,
+ 'duration': 26,
+ 'upload_date': '20220217',
+ 'view_count': int,
+ 'like_count': int,
+ 'repost_count': int,
+ 'comment_count': int,
+ },
+ 'skip': 'This video is unavailable',
+ }, {
+ # slideshow audio-only mp3 format
+ 'url': 'https://www.tiktok.com/@_le_cannibale_/video/7139980461132074283',
+ 'info_dict': {
+ 'id': '7139980461132074283',
+ 'ext': 'mp3',
+ 'title': 'TikTok video #7139980461132074283',
+ 'description': '',
+ 'channel': 'Antaura',
+ 'creators': ['Antaura'],
+ 'uploader': '_le_cannibale_',
+ 'uploader_id': '6604511138619654149',
+ 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAoShJqaw_5gvy48y3azFeFcT4jeyKWbB0VVYasOCt2tTLwjNFIaDcHAM4D-QGXFOP',
+ 'channel_id': 'MS4wLjABAAAAoShJqaw_5gvy48y3azFeFcT4jeyKWbB0VVYasOCt2tTLwjNFIaDcHAM4D-QGXFOP',
+ 'artists': ['nathan !'],
+ 'track': 'grahamscott canon',
+ 'upload_date': '20220905',
+ 'timestamp': 1662406249,
+ 'view_count': int,
+ 'like_count': int,
+ 'repost_count': int,
+ 'comment_count': int,
+ 'thumbnail': r're:^https://.+\.(?:webp|jpe?g)',
+ },
+ }, {
+ # only available via web
+ 'url': 'https://www.tiktok.com/@moxypatch/video/7206382937372134662', # FIXME
+ 'md5': '6aba7fad816e8709ff2c149679ace165',
+ 'info_dict': {
+ 'id': '7206382937372134662',
+ 'ext': 'mp4',
+ 'title': 'md5:1d95c0b96560ca0e8a231af4172b2c0a',
+ 'description': 'md5:1d95c0b96560ca0e8a231af4172b2c0a',
+ 'channel': 'MoxyPatch',
+ 'creators': ['MoxyPatch'],
+ 'uploader': 'moxypatch',
+ 'uploader_id': '7039142049363379205',
+ 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAFhqKnngMHJSsifL0w1vFOP5kn3Ndo1ODp0XuIBkNMBCkALTvwILdpu12g3pTtL4V',
+ 'channel_id': 'MS4wLjABAAAAFhqKnngMHJSsifL0w1vFOP5kn3Ndo1ODp0XuIBkNMBCkALTvwILdpu12g3pTtL4V',
+ 'artists': ['your worst nightmare'],
+ 'track': 'original sound',
+ 'upload_date': '20230303',
+ 'timestamp': 1677866781,
+ 'duration': 10,
+ 'view_count': int,
+ 'like_count': int,
+ 'repost_count': int,
+ 'comment_count': int,
+ 'thumbnail': r're:^https://.+',
+ 'thumbnails': 'count:3',
+ },
+ 'expected_warnings': ['Unable to find video in feed'],
+ }, {
+ # 1080p format
+ 'url': 'https://www.tiktok.com/@tatemcrae/video/7107337212743830830', # FIXME
+ 'md5': '982512017a8a917124d5a08c8ae79621',
+ 'info_dict': {
+ 'id': '7107337212743830830',
+ 'ext': 'mp4',
+ 'title': 'new music video 4 don’t come backkkk🧸🖤 i hope u enjoy !! @musicontiktok',
+ 'description': 'new music video 4 don’t come backkkk🧸🖤 i hope u enjoy !! @musicontiktok',
+ 'uploader': 'tatemcrae',
+ 'uploader_id': '86328792343818240',
+ 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAA-0bQT0CqebTRr6I4IkYvMDMKSRSJHLNPBo5HrSklJwyA2psXLSZG5FP-LMNpHnJd',
+ 'channel_id': 'MS4wLjABAAAA-0bQT0CqebTRr6I4IkYvMDMKSRSJHLNPBo5HrSklJwyA2psXLSZG5FP-LMNpHnJd',
+ 'channel': 'tate mcrae',
+ 'creators': ['tate mcrae'],
+ 'artists': ['tate mcrae'],
+ 'track': 'original sound',
+ 'upload_date': '20220609',
+ 'timestamp': 1654805899,
+ 'duration': 150,
+ 'view_count': int,
+ 'like_count': int,
+ 'repost_count': int,
+ 'comment_count': int,
+ 'thumbnail': r're:^https://.+\.webp',
+ },
+ 'skip': 'Unavailable via feed API, no formats available via web',
+ }, {
+ # Slideshow, audio-only m4a format
+ 'url': 'https://www.tiktok.com/@hara_yoimiya/video/7253412088251534594',
+ 'md5': '2ff8fe0174db2dbf49c597a7bef4e47d',
+ 'info_dict': {
+ 'id': '7253412088251534594',
+ 'ext': 'm4a',
+ 'title': 'я ред флаг простите #переписка #щитпост #тревожныйтиппривязанности #рекомендации ',
+ 'description': 'я ред флаг простите #переписка #щитпост #тревожныйтиппривязанности #рекомендации ',
+ 'uploader': 'hara_yoimiya',
+ 'uploader_id': '6582536342634676230',
+ 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAIAlDxriiPWLE-p8p1R_0Bx8qWKfi-7zwmGhzU8Mv25W8sNxjfIKrol31qTczzuLB',
+ 'channel_id': 'MS4wLjABAAAAIAlDxriiPWLE-p8p1R_0Bx8qWKfi-7zwmGhzU8Mv25W8sNxjfIKrol31qTczzuLB',
+ 'channel': 'лампочка',
+ 'creators': ['лампочка'],
+ 'artists': ['Øneheart'],
+ 'album': 'watching the stars',
+ 'track': 'watching the stars',
+ 'upload_date': '20230708',
+ 'timestamp': 1688816612,
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
+ 'thumbnail': r're:^https://.+\.(?:webp|jpe?g)',
+ },
+ }, {
+ # Auto-captions available
+ 'url': 'https://www.tiktok.com/@hankgreen1/video/7047596209028074758',
+ 'only_matching': True
+ }]
+
+ def _real_extract(self, url):
+ video_id, user_id = self._match_valid_url(url).group('id', 'user_id')
+ try:
+ return self._extract_aweme_app(video_id)
+ except ExtractorError as e:
+ e.expected = True
+ self.report_warning(f'{e}; trying with webpage')
+
+ url = self._create_url(user_id, video_id)
+ webpage = self._download_webpage(url, video_id, headers={'User-Agent': 'Mozilla/5.0'})
+
+ if universal_data := self._get_universal_data(webpage, video_id):
+ self.write_debug('Found universal data for rehydration')
+ status = traverse_obj(universal_data, ('webapp.video-detail', 'statusCode', {int})) or 0
+ video_data = traverse_obj(universal_data, ('webapp.video-detail', 'itemInfo', 'itemStruct', {dict}))
+
+ elif sigi_data := self._get_sigi_state(webpage, video_id):
+ self.write_debug('Found sigi state data')
+ status = traverse_obj(sigi_data, ('VideoPage', 'statusCode', {int})) or 0
+ video_data = traverse_obj(sigi_data, ('ItemModule', video_id, {dict}))
+
+ elif next_data := self._search_nextjs_data(webpage, video_id, default='{}'):
+ self.write_debug('Found next.js data')
+ status = traverse_obj(next_data, ('props', 'pageProps', 'statusCode', {int})) or 0
+ video_data = traverse_obj(next_data, ('props', 'pageProps', 'itemInfo', 'itemStruct', {dict}))
+
+ else:
+ raise ExtractorError('Unable to extract webpage video data')
+
+ if video_data and status == 0:
+ return self._parse_aweme_video_web(video_data, url, video_id)
+ elif status == 10216:
+ raise ExtractorError('This video is private', expected=True)
+ raise ExtractorError(f'Video not available, status code {status}', video_id=video_id)
+
+
+class TikTokUserIE(TikTokBaseIE):
+ IE_NAME = 'tiktok:user'
+ _VALID_URL = r'https?://(?:www\.)?tiktok\.com/@(?P<id>[\w\.-]+)/?(?:$|[#?])'
+ _WORKING = False
+ _TESTS = [{
+ 'url': 'https://tiktok.com/@corgibobaa?lang=en',
+ 'playlist_mincount': 45,
+ 'info_dict': {
+ 'id': '6935371178089399301',
+ 'title': 'corgibobaa',
+ 'thumbnail': r're:https://.+_1080x1080\.webp'
+ },
+ 'expected_warnings': ['Retrying']
+ }, {
+ 'url': 'https://www.tiktok.com/@6820838815978423302',
+ 'playlist_mincount': 5,
+ 'info_dict': {
+ 'id': '6820838815978423302',
+ 'title': '6820838815978423302',
+ 'thumbnail': r're:https://.+_1080x1080\.webp'
+ },
+ 'expected_warnings': ['Retrying']
+ }, {
+ 'url': 'https://www.tiktok.com/@meme',
+ 'playlist_mincount': 593,
+ 'info_dict': {
+ 'id': '79005827461758976',
+ 'title': 'meme',
+ 'thumbnail': r're:https://.+_1080x1080\.webp'
+ },
+ 'expected_warnings': ['Retrying']
+ }]
+
+ r''' # TODO: Fix by adding _signature to api_url
+ def _entries(self, webpage, user_id, username):
+ secuid = self._search_regex(r'\"secUid\":\"(?P<secUid>[^\"]+)', webpage, username)
+ verifyfp_cookie = self._get_cookies('https://www.tiktok.com').get('s_v_web_id')
+ if not verifyfp_cookie:
+ raise ExtractorError('Improper cookies (missing s_v_web_id).', expected=True)
+ api_url = f'https://m.tiktok.com/api/post/item_list/?aid=1988&cookie_enabled=true&count=30&verifyFp={verifyfp_cookie.value}&secUid={secuid}&cursor='
+ cursor = '0'
+ for page in itertools.count():
+ data_json = self._download_json(api_url + cursor, username, note='Downloading Page %d' % page)
+ for video in data_json.get('itemList', []):
+ video_id = video['id']
+ video_url = f'https://www.tiktok.com/@{user_id}/video/{video_id}'
+ yield self._url_result(video_url, 'TikTok', video_id, str_or_none(video.get('desc')))
+ if not data_json.get('hasMore'):
+ break
+ cursor = data_json['cursor']
+ '''
+
+ def _video_entries_api(self, webpage, user_id, username):
+ query = {
+ 'user_id': user_id,
+ 'count': 21,
+ 'max_cursor': 0,
+ 'min_cursor': 0,
+ 'retry_type': 'no_retry',
+ 'device_id': ''.join(random.choices(string.digits, k=19)), # Some endpoints don't like randomized device_id, so it isn't directly set in _call_api.
+ }
+
+ for page in itertools.count(1):
+ for retry in self.RetryManager():
+ try:
+ post_list = self._call_api(
+ 'aweme/post', query, username, note=f'Downloading user video list page {page}',
+ errnote='Unable to download user video list')
+ except ExtractorError as e:
+ if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0:
+ retry.error = e
+ continue
+ raise
+ yield from post_list.get('aweme_list', [])
+ if not post_list.get('has_more'):
+ break
+ query['max_cursor'] = post_list['max_cursor']
+
+ def _entries_api(self, user_id, videos):
+ for video in videos:
+ yield {
+ **self._parse_aweme_video_app(video),
+ 'extractor_key': TikTokIE.ie_key(),
+ 'extractor': 'TikTok',
+ 'webpage_url': f'https://tiktok.com/@{user_id}/video/{video["aweme_id"]}',
+ }
+
+ def _real_extract(self, url):
+ user_name = self._match_id(url)
+ webpage = self._download_webpage(url, user_name, headers={
+ 'User-Agent': 'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)'
+ })
+ user_id = self._html_search_regex(r'snssdk\d*://user/profile/(\d+)', webpage, 'user ID', default=None) or user_name
+
+ videos = LazyList(self._video_entries_api(webpage, user_id, user_name))
+ thumbnail = traverse_obj(videos, (0, 'author', 'avatar_larger', 'url_list', 0))
+
+ return self.playlist_result(self._entries_api(user_id, videos), user_id, user_name, thumbnail=thumbnail)
+
+
+class TikTokBaseListIE(TikTokBaseIE): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
+ def _entries(self, list_id, display_id):
+ query = {
+ self._QUERY_NAME: list_id,
+ 'cursor': 0,
+ 'count': 20,
+ 'type': 5,
+ 'device_id': ''.join(random.choices(string.digits, k=19))
+ }
+
+ for page in itertools.count(1):
+ for retry in self.RetryManager():
+ try:
+ post_list = self._call_api(
+ self._API_ENDPOINT, query, display_id, note=f'Downloading video list page {page}',
+ errnote='Unable to download video list')
+ except ExtractorError as e:
+ if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0:
+ retry.error = e
+ continue
+ raise
+ for video in post_list.get('aweme_list', []):
+ yield {
+ **self._parse_aweme_video_app(video),
+ 'extractor_key': TikTokIE.ie_key(),
+ 'extractor': 'TikTok',
+ 'webpage_url': f'https://tiktok.com/@_/video/{video["aweme_id"]}',
+ }
+ if not post_list.get('has_more'):
+ break
+ query['cursor'] = post_list['cursor']
+
+ def _real_extract(self, url):
+ list_id = self._match_id(url)
+ return self.playlist_result(self._entries(list_id, list_id), list_id)
+
+
+class TikTokSoundIE(TikTokBaseListIE):
+ IE_NAME = 'tiktok:sound'
+ _VALID_URL = r'https?://(?:www\.)?tiktok\.com/music/[\w\.-]+-(?P<id>[\d]+)[/?#&]?'
+ _WORKING = False
+ _QUERY_NAME = 'music_id'
+ _API_ENDPOINT = 'music/aweme'
+ _TESTS = [{
+ 'url': 'https://www.tiktok.com/music/Build-a-Btch-6956990112127585029?lang=en',
+ 'playlist_mincount': 100,
+ 'info_dict': {
+ 'id': '6956990112127585029'
+ },
+ 'expected_warnings': ['Retrying']
+ }, {
+ # Actual entries are less than listed video count
+ 'url': 'https://www.tiktok.com/music/jiefei-soap-remix-7036843036118469381',
+ 'playlist_mincount': 2182,
+ 'info_dict': {
+ 'id': '7036843036118469381'
+ },
+ 'expected_warnings': ['Retrying']
+ }]
+
+
+class TikTokEffectIE(TikTokBaseListIE):
+ IE_NAME = 'tiktok:effect'
+ _VALID_URL = r'https?://(?:www\.)?tiktok\.com/sticker/[\w\.-]+-(?P<id>[\d]+)[/?#&]?'
+ _WORKING = False
+ _QUERY_NAME = 'sticker_id'
+ _API_ENDPOINT = 'sticker/aweme'
+ _TESTS = [{
+ 'url': 'https://www.tiktok.com/sticker/MATERIAL-GWOOORL-1258156',
+ 'playlist_mincount': 100,
+ 'info_dict': {
+ 'id': '1258156',
+ },
+ 'expected_warnings': ['Retrying']
+ }, {
+ # Different entries between mobile and web, depending on region
+ 'url': 'https://www.tiktok.com/sticker/Elf-Friend-479565',
+ 'only_matching': True
+ }]
+
+
+class TikTokTagIE(TikTokBaseListIE):
+ IE_NAME = 'tiktok:tag'
+ _VALID_URL = r'https?://(?:www\.)?tiktok\.com/tag/(?P<id>[^/?#&]+)'
+ _WORKING = False
+ _QUERY_NAME = 'ch_id'
+ _API_ENDPOINT = 'challenge/aweme'
+ _TESTS = [{
+ 'url': 'https://tiktok.com/tag/hello2018',
+ 'playlist_mincount': 39,
+ 'info_dict': {
+ 'id': '46294678',
+ 'title': 'hello2018',
+ },
+ 'expected_warnings': ['Retrying']
+ }, {
+ 'url': 'https://tiktok.com/tag/fypシ?is_copy_url=0&is_from_webapp=v1',
+ 'only_matching': True
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id, headers={
+ 'User-Agent': 'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)'
+ })
+ tag_id = self._html_search_regex(r'snssdk\d*://challenge/detail/(\d+)', webpage, 'tag ID')
+ return self.playlist_result(self._entries(tag_id, display_id), tag_id, display_id)
+
+
+class DouyinIE(TikTokBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?douyin\.com/video/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://www.douyin.com/video/6961737553342991651',
+ 'md5': '9ecce7bc5b302601018ecb2871c63a75',
+ 'info_dict': {
+ 'id': '6961737553342991651',
+ 'ext': 'mp4',
+ 'title': '#杨超越 小小水手带你去远航❤️',
+ 'description': '#杨超越 小小水手带你去远航❤️',
+ 'uploader': '6897520xka',
+ 'uploader_id': '110403406559',
+ 'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
+ 'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
+ 'channel': '杨超越',
+ 'creators': ['杨超越'],
+ 'duration': 19,
+ 'timestamp': 1620905839,
+ 'upload_date': '20210513',
+ 'track': '@杨超越创作的原声',
+ 'artists': ['杨超越'],
+ 'view_count': int,
+ 'like_count': int,
+ 'repost_count': int,
+ 'comment_count': int,
+ 'thumbnail': r're:https?://.+\.jpe?g',
+ },
+ }, {
+ 'url': 'https://www.douyin.com/video/6982497745948921092',
+ 'md5': '15c5e660b7048af3707304e3cc02bbb5',
+ 'info_dict': {
+ 'id': '6982497745948921092',
+ 'ext': 'mp4',
+ 'title': '这个夏日和小羊@杨超越 一起遇见白色幻想',
+ 'description': '这个夏日和小羊@杨超越 一起遇见白色幻想',
+ 'uploader': '0731chaoyue',
+ 'uploader_id': '408654318141572',
+ 'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAZJpnglcjW2f_CMVcnqA_6oVBXKWMpH0F8LIHuUu8-lA',
+ 'channel_id': 'MS4wLjABAAAAZJpnglcjW2f_CMVcnqA_6oVBXKWMpH0F8LIHuUu8-lA',
+ 'channel': '杨超越工作室',
+ 'creators': ['杨超越工作室'],
+ 'duration': 42,
+ 'timestamp': 1625739481,
+ 'upload_date': '20210708',
+ 'track': '@杨超越工作室创作的原声',
+ 'artists': ['杨超越工作室'],
+ 'view_count': int,
+ 'like_count': int,
+ 'repost_count': int,
+ 'comment_count': int,
+ 'thumbnail': r're:https?://.+\.jpe?g',
+ },
+ }, {
+ 'url': 'https://www.douyin.com/video/6953975910773099811',
+ 'md5': '0e6443758b8355db9a3c34864a4276be',
+ 'info_dict': {
+ 'id': '6953975910773099811',
+ 'ext': 'mp4',
+ 'title': '#一起看海 出现在你的夏日里',
+ 'description': '#一起看海 出现在你的夏日里',
+ 'uploader': '6897520xka',
+ 'uploader_id': '110403406559',
+ 'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
+ 'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
+ 'channel': '杨超越',
+ 'creators': ['杨超越'],
+ 'duration': 17,
+ 'timestamp': 1619098692,
+ 'upload_date': '20210422',
+ 'track': '@杨超越创作的原声',
+ 'artists': ['杨超越'],
+ 'view_count': int,
+ 'like_count': int,
+ 'repost_count': int,
+ 'comment_count': int,
+ 'thumbnail': r're:https?://.+\.jpe?g',
+ },
+ }, {
+ 'url': 'https://www.douyin.com/video/6950251282489675042',
+ 'md5': 'b4db86aec367ef810ddd38b1737d2fed',
+ 'info_dict': {
+ 'id': '6950251282489675042',
+ 'ext': 'mp4',
+ 'title': '哈哈哈,成功了哈哈哈哈哈哈',
+ 'uploader': '杨超越',
+ 'upload_date': '20210412',
+ 'timestamp': 1618231483,
+ 'uploader_id': '110403406559',
+ 'view_count': int,
+ 'like_count': int,
+ 'repost_count': int,
+ 'comment_count': int,
+ },
+ 'skip': 'No longer available',
+ }, {
+ 'url': 'https://www.douyin.com/video/6963263655114722595',
+ 'md5': '1440bcf59d8700f8e014da073a4dfea8',
+ 'info_dict': {
+ 'id': '6963263655114722595',
+ 'ext': 'mp4',
+ 'title': '#哪个爱豆的105度最甜 换个角度看看我哈哈',
+ 'description': '#哪个爱豆的105度最甜 换个角度看看我哈哈',
+ 'uploader': '6897520xka',
+ 'uploader_id': '110403406559',
+ 'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
+ 'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
+ 'channel': '杨超越',
+ 'creators': ['杨超越'],
+ 'duration': 15,
+ 'timestamp': 1621261163,
+ 'upload_date': '20210517',
+ 'track': '@杨超越创作的原声',
+ 'artists': ['杨超越'],
+ 'view_count': int,
+ 'like_count': int,
+ 'repost_count': int,
+ 'comment_count': int,
+ 'thumbnail': r're:https?://.+\.jpe?g',
+ },
+ }]
+ _UPLOADER_URL_FORMAT = 'https://www.douyin.com/user/%s'
+ _WEBPAGE_HOST = 'https://www.douyin.com/'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ detail = traverse_obj(self._download_json(
+ 'https://www.douyin.com/aweme/v1/web/aweme/detail/', video_id,
+ 'Downloading web detail JSON', 'Failed to download web detail JSON',
+ query={'aweme_id': video_id}, fatal=False), ('aweme_detail', {dict}))
+ if not detail:
+ # TODO: Run verification challenge code to generate signature cookies
+ raise ExtractorError(
+ 'Fresh cookies (not necessarily logged in) are needed',
+ expected=not self._get_cookies(self._WEBPAGE_HOST).get('s_v_web_id'))
+
+ return self._parse_aweme_video_app(detail)
+
+
+class TikTokVMIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:vm|vt)\.tiktok\.com|(?:www\.)tiktok\.com/t)/(?P<id>\w+)'
+ IE_NAME = 'vm.tiktok'
+
+ _TESTS = [{
+ 'url': 'https://www.tiktok.com/t/ZTRC5xgJp',
+ 'info_dict': {
+ 'id': '7170520270497680683',
+ 'ext': 'mp4',
+ 'title': 'md5:c64f6152330c2efe98093ccc8597871c',
+ 'uploader_id': '6687535061741700102',
+ 'upload_date': '20221127',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAObqu3WCTXxmw2xwZ3iLEHnEecEIw7ks6rxWqOqOhaPja9BI7gqUQnjw8_5FSoDXX',
+ 'album': 'Wave of Mutilation: Best of Pixies',
+ 'thumbnail': r're:https://.+\.webp.*',
+ 'duration': 5,
+ 'timestamp': 1669516858,
+ 'repost_count': int,
+ 'artist': 'Pixies',
+ 'track': 'Where Is My Mind?',
+ 'description': 'md5:c64f6152330c2efe98093ccc8597871c',
+ 'uploader': 'sigmachaddeus',
+ 'creator': 'SigmaChad',
+ },
+ }, {
+ 'url': 'https://vm.tiktok.com/ZTR45GpSF/',
+ 'info_dict': {
+ 'id': '7106798200794926362',
+ 'ext': 'mp4',
+ 'title': 'md5:edc3e7ea587847f8537468f2fe51d074',
+ 'uploader_id': '6997695878846268418',
+ 'upload_date': '20220608',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'thumbnail': r're:https://.+\.webp.*',
+ 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAdZ_NcPPgMneaGrW0hN8O_J_bwLshwNNERRF5DxOw2HKIzk0kdlLrR8RkVl1ksrMO',
+ 'duration': 29,
+ 'timestamp': 1654680400,
+ 'repost_count': int,
+ 'artist': 'Akihitoko',
+ 'track': 'original sound',
+ 'description': 'md5:edc3e7ea587847f8537468f2fe51d074',
+ 'uploader': 'akihitoko1',
+ 'creator': 'Akihitoko',
+ },
+ }, {
+ 'url': 'https://vt.tiktok.com/ZSe4FqkKd',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ new_url = self._request_webpage(
+ HEADRequest(url), self._match_id(url), headers={'User-Agent': 'facebookexternalhit/1.1'}).url
+ if self.suitable(new_url): # Prevent infinite loop in case redirect fails
+ raise UnsupportedError(new_url)
+ return self.url_result(new_url)
+
+
+class TikTokLiveIE(TikTokBaseIE):
+ _VALID_URL = r'''(?x)https?://(?:
+ (?:www\.)?tiktok\.com/@(?P<uploader>[\w.-]+)/live|
+ m\.tiktok\.com/share/live/(?P<id>\d+)
+ )'''
+ IE_NAME = 'tiktok:live'
+
+ _TESTS = [{
+ 'url': 'https://www.tiktok.com/@weathernewslive/live',
+ 'info_dict': {
+ 'id': '7210809319192726273',
+ 'ext': 'mp4',
+ 'title': r're:ウェザーニュースLiVE[\d\s:-]*',
+ 'creator': 'ウェザーニュースLiVE',
+ 'uploader': 'weathernewslive',
+ 'uploader_id': '6621496731283095554',
+ 'uploader_url': 'https://www.tiktok.com/@weathernewslive',
+ 'live_status': 'is_live',
+ 'concurrent_view_count': int,
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://www.tiktok.com/@pilarmagenta/live',
+ 'info_dict': {
+ 'id': '7209423610325322522',
+ 'ext': 'mp4',
+ 'title': str,
+ 'creator': 'Pilarmagenta',
+ 'uploader': 'pilarmagenta',
+ 'uploader_id': '6624846890674683909',
+ 'uploader_url': 'https://www.tiktok.com/@pilarmagenta',
+ 'live_status': 'is_live',
+ 'concurrent_view_count': int,
+ },
+ 'skip': 'Livestream',
+ }, {
+ 'url': 'https://m.tiktok.com/share/live/7209423610325322522/?language=en',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tiktok.com/@iris04201/live',
+ 'only_matching': True,
+ }]
+
+ def _call_api(self, url, param, room_id, uploader, key=None):
+ response = traverse_obj(self._download_json(
+ url, room_id, fatal=False, query={
+ 'aid': '1988',
+ param: room_id,
+ }), (key, {dict}), default={})
+
+ # status == 2 if live else 4
+ if int_or_none(response.get('status')) == 2:
+ return response
+ # If room_id is obtained via mobile share URL and cannot be refreshed, do not wait for live
+ elif not uploader:
+ raise ExtractorError('This livestream has ended', expected=True)
+ raise UserNotLive(video_id=uploader)
+
+ def _real_extract(self, url):
+ uploader, room_id = self._match_valid_url(url).group('uploader', 'id')
+ webpage = self._download_webpage(
+ url, uploader or room_id, headers={'User-Agent': 'Mozilla/5.0'}, fatal=not room_id)
+
+ if webpage:
+ data = self._get_sigi_state(webpage, uploader or room_id)
+ room_id = (traverse_obj(data, ('UserModule', 'users', ..., 'roomId', {str_or_none}), get_all=False)
+ or self._search_regex(r'snssdk\d*://live\?room_id=(\d+)', webpage, 'room ID', default=None)
+ or room_id)
+ uploader = uploader or traverse_obj(
+ data, ('LiveRoom', 'liveRoomUserInfo', 'user', 'uniqueId'),
+ ('UserModule', 'users', ..., 'uniqueId'), get_all=False, expected_type=str)
+
+ if not room_id:
+ raise UserNotLive(video_id=uploader)
+
+ formats = []
+ live_info = self._call_api(
+ 'https://webcast.tiktok.com/webcast/room/info', 'room_id', room_id, uploader, key='data')
+
+ get_quality = qualities(('SD1', 'ld', 'SD2', 'sd', 'HD1', 'hd', 'FULL_HD1', 'uhd', 'ORIGION', 'origin'))
+ parse_inner = lambda x: self._parse_json(x, None)
+
+ for quality, stream in traverse_obj(live_info, (
+ 'stream_url', 'live_core_sdk_data', 'pull_data', 'stream_data',
+ {parse_inner}, 'data', {dict}), default={}).items():
+
+ sdk_params = traverse_obj(stream, ('main', 'sdk_params', {parse_inner}, {
+ 'vcodec': ('VCodec', {str}),
+ 'tbr': ('vbitrate', {lambda x: int_or_none(x, 1000)}),
+ 'resolution': ('resolution', {lambda x: re.match(r'(?i)\d+x\d+|\d+p', x).group().lower()}),
+ }))
+
+ flv_url = traverse_obj(stream, ('main', 'flv', {url_or_none}))
+ if flv_url:
+ formats.append({
+ 'url': flv_url,
+ 'ext': 'flv',
+ 'format_id': f'flv-{quality}',
+ 'quality': get_quality(quality),
+ **sdk_params,
+ })
+
+ hls_url = traverse_obj(stream, ('main', 'hls', {url_or_none}))
+ if hls_url:
+ formats.append({
+ 'url': hls_url,
+ 'ext': 'mp4',
+ 'protocol': 'm3u8_native',
+ 'format_id': f'hls-{quality}',
+ 'quality': get_quality(quality),
+ **sdk_params,
+ })
+
+ def get_vcodec(*keys):
+ return traverse_obj(live_info, (
+ 'stream_url', *keys, {parse_inner}, 'VCodec', {str}))
+
+ for stream in ('hls', 'rtmp'):
+ stream_url = traverse_obj(live_info, ('stream_url', f'{stream}_pull_url', {url_or_none}))
+ if stream_url:
+ formats.append({
+ 'url': stream_url,
+ 'ext': 'mp4' if stream == 'hls' else 'flv',
+ 'protocol': 'm3u8_native' if stream == 'hls' else 'https',
+ 'format_id': f'{stream}-pull',
+ 'vcodec': get_vcodec(f'{stream}_pull_url_params'),
+ 'quality': get_quality('ORIGION'),
+ })
+
+ for f_id, f_url in traverse_obj(live_info, ('stream_url', 'flv_pull_url', {dict}), default={}).items():
+ if not url_or_none(f_url):
+ continue
+ formats.append({
+ 'url': f_url,
+ 'ext': 'flv',
+ 'format_id': f'flv-{f_id}'.lower(),
+ 'vcodec': get_vcodec('flv_pull_url_params', f_id),
+ 'quality': get_quality(f_id),
+ })
+
+ # If uploader is a guest on another's livestream, primary endpoint will not have m3u8 URLs
+ if not traverse_obj(formats, lambda _, v: v['ext'] == 'mp4'):
+ live_info = merge_dicts(live_info, self._call_api(
+ 'https://www.tiktok.com/api/live/detail/', 'roomID', room_id, uploader, key='LiveRoomInfo'))
+ if url_or_none(live_info.get('liveUrl')):
+ formats.append({
+ 'url': live_info['liveUrl'],
+ 'ext': 'mp4',
+ 'protocol': 'm3u8_native',
+ 'format_id': 'hls-fallback',
+ 'vcodec': 'h264',
+ 'quality': get_quality('origin'),
+ })
+
+ uploader = uploader or traverse_obj(live_info, ('ownerInfo', 'uniqueId'), ('owner', 'display_id'))
+
+ return {
+ 'id': room_id,
+ 'uploader': uploader,
+ 'uploader_url': format_field(uploader, None, self._UPLOADER_URL_FORMAT) or None,
+ 'is_live': True,
+ 'formats': formats,
+ '_format_sort_fields': ('quality', 'ext'),
+ **traverse_obj(live_info, {
+ 'title': 'title',
+ 'uploader_id': (('ownerInfo', 'owner'), 'id', {str_or_none}),
+ 'creator': (('ownerInfo', 'owner'), 'nickname'),
+ 'concurrent_view_count': (('user_count', ('liveRoomStats', 'userCount')), {int_or_none}),
+ }, get_all=False),
+ }
diff --git a/yt_dlp/extractor/tmz.py b/yt_dlp/extractor/tmz.py
new file mode 100644
index 0000000..edd16bc
--- /dev/null
+++ b/yt_dlp/extractor/tmz.py
@@ -0,0 +1,193 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ get_element_by_attribute,
+)
+
+
+class TMZIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?tmz\.com/.*'
+ _TESTS = [
+ {
+ 'url': 'http://www.tmz.com/videos/0-cegprt2p/',
+ 'info_dict': {
+ 'id': 'http://www.tmz.com/videos/0-cegprt2p/',
+ 'ext': 'mp4',
+ 'title': 'No Charges Against Hillary Clinton? Harvey Says It Ain\'t Over Yet',
+ 'description': 'Harvey talks about Director Comey’s decision not to prosecute Hillary Clinton.',
+ 'timestamp': 1467831837,
+ 'uploader': 'TMZ Staff',
+ 'upload_date': '20160706',
+ 'thumbnail': 'https://imagez.tmz.com/image/5e/4by3/2016/07/06/5eea7dc01baa5c2e83eb06930c170e46_xl.jpg',
+ 'duration': 772.0,
+ },
+ },
+ {
+ 'url': 'https://www.tmz.com/videos/071119-chris-morgan-women-4590005-0-zcsejvcr/',
+ 'info_dict': {
+ 'id': 'https://www.tmz.com/videos/071119-chris-morgan-women-4590005-0-zcsejvcr/',
+ 'ext': 'mp4',
+ 'title': 'Angry Bagel Shop Guy Says He Doesn\'t Trust Women',
+ 'description': 'The enraged man who went viral for ranting about women on dating sites before getting ragdolled in a bagel shop is defending his misogyny ... he says it\'s women\'s fault in the first place.',
+ 'timestamp': 1562889485,
+ 'uploader': 'TMZ Staff',
+ 'upload_date': '20190711',
+ 'thumbnail': 'https://imagez.tmz.com/image/a8/4by3/2019/07/12/a85480d27b2f50a7bfea2322151d67a5_xl.jpg',
+ 'duration': 123.0,
+ },
+ },
+ {
+ 'url': 'http://www.tmz.com/2015/04/19/bobby-brown-bobbi-kristina-awake-video-concert',
+ 'md5': '5429c85db8bde39a473a56ca8c4c5602',
+ 'info_dict': {
+ 'id': 'http://www.tmz.com/2015/04/19/bobby-brown-bobbi-kristina-awake-video-concert',
+ 'ext': 'mp4',
+ 'title': 'Bobby Brown Tells Crowd ... Bobbi Kristina is Awake',
+ 'description': 'Bobby Brown stunned his audience during a concert Saturday night, when he told the crowd, "Bobbi is awake. She\'s watching me."',
+ 'timestamp': 1429467813,
+ 'uploader': 'TMZ Staff',
+ 'upload_date': '20150419',
+ 'duration': 29.0,
+ 'thumbnail': 'https://imagez.tmz.com/image/15/4by3/2015/04/20/1539c7ae136359fc979236fa6a9449dd_xl.jpg',
+ },
+ },
+ {
+ 'url': 'http://www.tmz.com/2015/09/19/patti-labelle-concert-fan-stripping-kicked-out-nicki-minaj/',
+ 'info_dict': {
+ 'id': 'http://www.tmz.com/2015/09/19/patti-labelle-concert-fan-stripping-kicked-out-nicki-minaj/',
+ 'ext': 'mp4',
+ 'title': 'Patti LaBelle -- Goes Nuclear On Stripping Fan',
+ 'description': 'Patti LaBelle made it known loud and clear last night ... NO '
+ 'ONE gets on her stage and strips down.',
+ 'timestamp': 1442683746,
+ 'uploader': 'TMZ Staff',
+ 'upload_date': '20150919',
+ 'duration': 104.0,
+ 'thumbnail': 'https://imagez.tmz.com/image/5e/4by3/2015/09/20/5e57d7575062528082994e18ac3f0f48_xl.jpg',
+ },
+ },
+ {
+ 'url': 'http://www.tmz.com/2016/01/28/adam-silver-sting-drake-blake-griffin/',
+ 'info_dict': {
+ 'id': 'http://www.tmz.com/2016/01/28/adam-silver-sting-drake-blake-griffin/',
+ 'ext': 'mp4',
+ 'title': 'NBA\'s Adam Silver -- Blake Griffin\'s a Great Guy ... He\'ll Learn from This',
+ 'description': 'Two pretty parts of this video with NBA Commish Adam Silver.',
+ 'timestamp': 1454010989,
+ 'uploader': 'TMZ Staff',
+ 'upload_date': '20160128',
+ 'duration': 59.0,
+ 'thumbnail': 'https://imagez.tmz.com/image/38/4by3/2016/01/29/3856e83e0beb57059ec412122b842fb1_xl.jpg',
+ },
+ },
+ {
+ 'url': 'http://www.tmz.com/2016/10/27/donald-trump-star-vandal-arrested-james-otis/',
+ 'info_dict': {
+ 'id': 'http://www.tmz.com/2016/10/27/donald-trump-star-vandal-arrested-james-otis/',
+ 'ext': 'mp4',
+ 'title': 'Trump Star Vandal -- I\'m Not Afraid of Donald or the Cops!',
+ 'description': 'James Otis is the the guy who took a pickaxe to Donald Trump\'s star on the Walk of Fame, and he tells TMZ .. he\'s ready and willing to go to jail for the crime.',
+ 'timestamp': 1477500095,
+ 'uploader': 'TMZ Staff',
+ 'upload_date': '20161026',
+ 'thumbnail': 'https://imagez.tmz.com/image/0d/4by3/2016/10/27/0d904814d4a75dcf9cc3b8cfd1edc1a3_xl.jpg',
+ 'duration': 128.0,
+ },
+ },
+ {
+ 'url': 'https://www.tmz.com/videos/2020-10-31-103120-beverly-hills-protest-4878209/',
+ 'info_dict': {
+ 'id': 'https://www.tmz.com/videos/2020-10-31-103120-beverly-hills-protest-4878209/',
+ 'ext': 'mp4',
+ 'title': 'Cops Use Billy Clubs Against Pro-Trump and Anti-Fascist '
+ 'Demonstrators',
+ 'description': 'Beverly Hills may be an omen of what\'s coming next week, '
+ 'because things got crazy on the streets and cops started '
+ 'swinging their billy clubs at both Anti-Fascist and Pro-Trump '
+ 'demonstrators.',
+ 'timestamp': 1604182772,
+ 'uploader': 'TMZ Staff',
+ 'upload_date': '20201031',
+ 'duration': 96.0,
+ 'thumbnail': 'https://imagez.tmz.com/image/f3/4by3/2020/10/31/f37bd5a8aef84497866f425130c58be3_xl.jpg',
+ },
+ },
+ {
+ 'url': 'https://www.tmz.com/2020/11/05/gervonta-davis-car-crash-hit-and-run-police/',
+ 'info_dict': {
+ 'id': 'Dddb6IGe-ws',
+ 'ext': 'mp4',
+ 'title': 'SICK LAMBO GERVONTA DAVIS IN HIS NEW RIDE RIGHT AFTER KO AFTER LEO EsNews Boxing',
+ 'uploader': 'ESNEWS',
+ 'description': 'md5:49675bc58883ccf80474b8aa701e1064',
+ 'upload_date': '20201102',
+ 'uploader_id': '@ESNEWS',
+ 'uploader_url': 'https://www.youtube.com/@ESNEWS',
+ 'like_count': int,
+ 'channel_id': 'UCI-Oq7oFGakzSzHFlTtsUsQ',
+ 'channel': 'ESNEWS',
+ 'view_count': int,
+ 'duration': 225,
+ 'live_status': 'not_live',
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/Dddb6IGe-ws/maxresdefault.webp',
+ 'channel_url': 'https://www.youtube.com/channel/UCI-Oq7oFGakzSzHFlTtsUsQ',
+ 'channel_follower_count': int,
+ 'playable_in_embed': True,
+ 'categories': ['Sports'],
+ 'age_limit': 0,
+ 'tags': 'count:10',
+ 'availability': 'public',
+ 'comment_count': int,
+ },
+ },
+ {
+ 'url': 'https://www.tmz.com/2020/11/19/conor-mcgregor-dustin-poirier-contract-fight-ufc-257-fight-island/',
+ 'info_dict': {
+ 'id': '1329448013937471491',
+ 'ext': 'mp4',
+ 'title': 'The Mac Life - BREAKING: Conor McGregor (@thenotoriousmma) has signed his bout agreement for his rematch with Dustin Poirier for January 23.',
+ 'uploader': 'The Mac Life',
+ 'description': 'md5:56e6009bbc3d12498e10d08a8e1f1c69',
+ 'upload_date': '20201119',
+ 'display_id': '1329450007125225473',
+ 'uploader_id': 'TheMacLife',
+ 'timestamp': 1605800556,
+ 'thumbnail': 'https://pbs.twimg.com/media/EnMmfT8XYAExgxJ.jpg?name=small',
+ 'like_count': int,
+ 'duration': 11.812,
+ 'uploader_url': 'https://twitter.com/TheMacLife',
+ 'age_limit': 0,
+ 'repost_count': int,
+ 'tags': [],
+ 'comment_count': int,
+ },
+ },
+ ]
+
+ def _real_extract(self, url):
+ webpage = self._download_webpage(url, url)
+ jsonld = self._search_json_ld(webpage, url)
+ if not jsonld or 'url' not in jsonld:
+ # try to extract from YouTube Player API
+ # see https://developers.google.com/youtube/iframe_api_reference#Video_Queueing_Functions
+ match_obj = re.search(r'\.cueVideoById\(\s*(?P<quote>[\'"])(?P<id>.*?)(?P=quote)', webpage)
+ if match_obj:
+ res = self.url_result(match_obj.group('id'))
+ return res
+ # try to extract from twitter
+ blockquote_el = get_element_by_attribute('class', 'twitter-tweet', webpage)
+ if blockquote_el:
+ matches = re.findall(
+ r'<a[^>]+href=\s*(?P<quote>[\'"])(?P<link>.*?)(?P=quote)',
+ blockquote_el)
+ if matches:
+ for _, match in matches:
+ if '/status/' in match:
+ res = self.url_result(match)
+ return res
+ raise ExtractorError('No video found!')
+ if id not in jsonld:
+ jsonld['id'] = url
+ return jsonld
diff --git a/yt_dlp/extractor/tnaflix.py b/yt_dlp/extractor/tnaflix.py
new file mode 100644
index 0000000..535e6c8
--- /dev/null
+++ b/yt_dlp/extractor/tnaflix.py
@@ -0,0 +1,336 @@
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ fix_xml_ampersands,
+ float_or_none,
+ int_or_none,
+ parse_duration,
+ str_to_int,
+ unescapeHTML,
+ url_basename,
+ xpath_text,
+)
+
+
+class TNAFlixNetworkBaseIE(InfoExtractor):
+ # May be overridden in descendants if necessary
+ _CONFIG_REGEX = [
+ r'flashvars\.config\s*=\s*escape\("(?P<url>[^"]+)"',
+ r'<input[^>]+name="config\d?" value="(?P<url>[^"]+)"',
+ r'config\s*=\s*(["\'])(?P<url>(?:https?:)?//(?:(?!\1).)+)\1',
+ ]
+ _TITLE_REGEX = r'<input[^>]+name="title" value="([^"]+)"'
+ _DESCRIPTION_REGEX = r'<input[^>]+name="description" value="([^"]+)"'
+ _UPLOADER_REGEX = r'<input[^>]+name="username" value="([^"]+)"'
+ _VIEW_COUNT_REGEX = None
+ _COMMENT_COUNT_REGEX = None
+ _AVERAGE_RATING_REGEX = None
+ _CATEGORIES_REGEX = r'<li[^>]*>\s*<span[^>]+class="infoTitle"[^>]*>Categories:</span>\s*<span[^>]+class="listView"[^>]*>(.+?)</span>\s*</li>'
+
+ def _extract_thumbnails(self, flix_xml):
+
+ def get_child(elem, names):
+ for name in names:
+ child = elem.find(name)
+ if child is not None:
+ return child
+
+ timeline = get_child(flix_xml, ['timeline', 'rolloverBarImage'])
+ if timeline is None:
+ return
+
+ pattern_el = get_child(timeline, ['imagePattern', 'pattern'])
+ if pattern_el is None or not pattern_el.text:
+ return
+
+ first_el = get_child(timeline, ['imageFirst', 'first'])
+ last_el = get_child(timeline, ['imageLast', 'last'])
+ if first_el is None or last_el is None:
+ return
+
+ first_text = first_el.text
+ last_text = last_el.text
+ if not first_text.isdigit() or not last_text.isdigit():
+ return
+
+ first = int(first_text)
+ last = int(last_text)
+ if first > last:
+ return
+
+ width = int_or_none(xpath_text(timeline, './imageWidth', 'thumbnail width'))
+ height = int_or_none(xpath_text(timeline, './imageHeight', 'thumbnail height'))
+
+ return [{
+ 'url': self._proto_relative_url(pattern_el.text.replace('#', compat_str(i)), 'http:'),
+ 'width': width,
+ 'height': height,
+ } for i in range(first, last + 1)]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id, host = mobj.group('id', 'host')
+ for display_id_key in ('display_id', 'display_id_2'):
+ if display_id_key in mobj.groupdict():
+ display_id = mobj.group(display_id_key)
+ if display_id:
+ break
+ else:
+ display_id = video_id
+
+ webpage = self._download_webpage(url, display_id)
+ inputs = self._hidden_inputs(webpage)
+ query = {}
+
+ # check for MovieFap-style config
+ cfg_url = self._proto_relative_url(self._html_search_regex(
+ self._CONFIG_REGEX, webpage, 'flashvars.config', default=None,
+ group='url'), 'http:')
+
+ if not cfg_url:
+ cfg_url = inputs.get('config')
+
+ # check for TNAFlix-style config
+ if not cfg_url and inputs.get('vkey') and inputs.get('nkey'):
+ cfg_url = f'http://cdn-fck.{host}.com/{host}/{inputs["vkey"]}.fid'
+ query.update({
+ 'key': inputs['nkey'],
+ 'VID': video_id,
+ 'premium': '1',
+ 'vip': '1',
+ 'alpha': '',
+ })
+
+ formats, json_ld = [], {}
+
+ # TNAFlix and MovieFap extraction
+ if cfg_url:
+ cfg_xml = self._download_xml(
+ cfg_url, display_id, 'Downloading metadata',
+ transform_source=fix_xml_ampersands, headers={'Referer': url}, query=query)
+
+ def extract_video_url(vl):
+ # Any URL modification now results in HTTP Error 403: Forbidden
+ return unescapeHTML(vl.text)
+
+ video_link = cfg_xml.find('./videoLink')
+ if video_link is not None:
+ formats.append({
+ 'url': extract_video_url(video_link),
+ 'ext': xpath_text(cfg_xml, './videoConfig/type', 'type', default='flv'),
+ })
+
+ for item in cfg_xml.findall('./quality/item'):
+ video_link = item.find('./videoLink')
+ if video_link is None:
+ continue
+ res = item.find('res')
+ format_id = None if res is None else res.text
+ height = int_or_none(self._search_regex(
+ r'^(\d+)[pP]', format_id, 'height', default=None))
+ formats.append({
+ 'url': self._proto_relative_url(extract_video_url(video_link), 'http:'),
+ 'format_id': format_id,
+ 'height': height,
+ })
+
+ thumbnails = self._extract_thumbnails(cfg_xml) or []
+ thumbnails.append({
+ 'url': self._proto_relative_url(xpath_text(cfg_xml, './startThumb', 'thumbnail'), 'http:')
+ })
+
+ # check for EMPFlix-style JSON and extract
+ else:
+ player = self._download_json(
+ f'http://www.{host}.com/ajax/video-player/{video_id}', video_id,
+ headers={'Referer': url}).get('html', '')
+ for mobj in re.finditer(r'<source src="(?P<src>[^"]+)"', player):
+ video_url = mobj.group('src')
+ height = self._search_regex(r'-(\d+)p\.', url_basename(video_url), 'height', default=None)
+ formats.append({
+ 'url': self._proto_relative_url(video_url, 'http:'),
+ 'ext': url_basename(video_url).split('.')[-1],
+ 'height': int_or_none(height),
+ 'format_id': f'{height}p' if height else url_basename(video_url).split('.')[0],
+ })
+ thumbnail = self._proto_relative_url(self._search_regex(
+ r'data-poster="([^"]+)"', player, 'thumbnail', default=None), 'http:')
+ thumbnails = [{'url': thumbnail}] if thumbnail else None
+ json_ld = self._search_json_ld(webpage, display_id, default={})
+
+ def extract_field(pattern, name):
+ return self._html_search_regex(pattern, webpage, name, default=None) if pattern else None
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': (extract_field(self._TITLE_REGEX, 'title')
+ or self._og_search_title(webpage, default=None)
+ or json_ld.get('title')),
+ 'description': extract_field(self._DESCRIPTION_REGEX, 'description') or json_ld.get('description'),
+ 'thumbnails': thumbnails,
+ 'duration': parse_duration(
+ self._html_search_meta('duration', webpage, 'duration', default=None)) or json_ld.get('duration'),
+ 'age_limit': self._rta_search(webpage) or 18,
+ 'uploader': extract_field(self._UPLOADER_REGEX, 'uploader') or json_ld.get('uploader'),
+ 'view_count': str_to_int(extract_field(self._VIEW_COUNT_REGEX, 'view count')),
+ 'comment_count': str_to_int(extract_field(self._COMMENT_COUNT_REGEX, 'comment count')),
+ 'average_rating': float_or_none(extract_field(self._AVERAGE_RATING_REGEX, 'average rating')),
+ 'categories': list(map(str.strip, (extract_field(self._CATEGORIES_REGEX, 'categories') or '').split(','))),
+ 'formats': formats,
+ }
+
+
+class TNAFlixNetworkEmbedIE(TNAFlixNetworkBaseIE):
+ _VALID_URL = r'https?://player\.(?P<host>tnaflix|empflix)\.com/video/(?P<id>\d+)'
+ _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.(?:tna|emp)flix\.com/video/\d+)\1']
+
+ _TESTS = [{
+ 'url': 'https://player.tnaflix.com/video/6538',
+ 'info_dict': {
+ 'id': '6538',
+ 'display_id': '6538',
+ 'ext': 'mp4',
+ 'title': 'Educational xxx video (G Spot)',
+ 'description': 'md5:b4fab8f88a8621c8fabd361a173fe5b8',
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'age_limit': 18,
+ 'duration': 164,
+ 'uploader': 'bobwhite39',
+ 'categories': list,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://player.empflix.com/video/33051',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id, host = mobj.group('id', 'host')
+ return self.url_result(f'http://www.{host}.com/category/{video_id}/video{video_id}')
+
+
+class TNAEMPFlixBaseIE(TNAFlixNetworkBaseIE):
+ _DESCRIPTION_REGEX = r'(?s)>Description:</[^>]+>(.+?)<'
+ _UPLOADER_REGEX = r'<span>by\s*<a[^>]+\bhref=["\']/profile/[^>]+>([^<]+)<'
+ _CATEGORIES_REGEX = r'(?s)<span[^>]*>Categories:</span>(.+?)</div>'
+
+
+class TNAFlixIE(TNAEMPFlixBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?(?P<host>tnaflix)\.com/[^/]+/(?P<display_id>[^/]+)/video(?P<id>\d+)'
+
+ _TITLE_REGEX = r'<title>(.+?) - (?:TNAFlix Porn Videos|TNAFlix\.com)</title>'
+
+ _TESTS = [{
+ # anonymous uploader, no categories
+ 'url': 'http://www.tnaflix.com/porn-stars/Carmella-Decesare-striptease/video553878',
+ 'md5': '7e569419fe6d69543d01e6be22f5f7c4',
+ 'info_dict': {
+ 'id': '553878',
+ 'display_id': 'Carmella-Decesare-striptease',
+ 'ext': 'mp4',
+ 'title': 'Carmella Decesare - striptease',
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'duration': 91,
+ 'age_limit': 18,
+ 'categories': list,
+ }
+ }, {
+ # non-anonymous uploader, categories
+ 'url': 'https://www.tnaflix.com/teen-porn/Educational-xxx-video/video6538',
+ 'md5': 'add5a9fa7f4da53d3e9d0845ac58f20c',
+ 'info_dict': {
+ 'id': '6538',
+ 'display_id': 'Educational-xxx-video',
+ 'ext': 'mp4',
+ 'title': 'Educational xxx video (G Spot)',
+ 'description': 'md5:b4fab8f88a8621c8fabd361a173fe5b8',
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'duration': 164,
+ 'age_limit': 18,
+ 'uploader': 'bobwhite39',
+ 'categories': list,
+ }
+ }, {
+ 'url': 'https://www.tnaflix.com/amateur-porn/bunzHD-Ms.Donk/video358632',
+ 'only_matching': True,
+ }]
+
+
+class EMPFlixIE(TNAEMPFlixBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?(?P<host>empflix)\.com/(?:videos/(?P<display_id>.+?)-|[^/]+/(?P<display_id_2>[^/]+)/video)(?P<id>[0-9]+)'
+
+ _TESTS = [{
+ 'url': 'http://www.empflix.com/amateur-porn/Amateur-Finger-Fuck/video33051',
+ 'md5': 'd761c7b26601bd14476cd9512f2654fc',
+ 'info_dict': {
+ 'id': '33051',
+ 'display_id': 'Amateur-Finger-Fuck',
+ 'ext': 'mp4',
+ 'title': 'Amateur Finger Fuck',
+ 'description': 'Amateur solo finger fucking.',
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'duration': 83,
+ 'age_limit': 18,
+ 'categories': list,
+ }
+ }, {
+ 'url': 'http://www.empflix.com/videos/[AROMA][ARMD-718]-Aoi-Yoshino-Sawa-25826.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html',
+ 'only_matching': True,
+ }]
+
+
+class MovieFapIE(TNAFlixNetworkBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?(?P<host>moviefap)\.com/videos/(?P<id>[0-9a-f]+)/(?P<display_id>[^/]+)\.html'
+
+ _VIEW_COUNT_REGEX = r'<br>Views\s*<strong>([\d,.]+)</strong>'
+ _COMMENT_COUNT_REGEX = r'<span[^>]+id="comCount"[^>]*>([\d,.]+)</span>'
+ _AVERAGE_RATING_REGEX = r'Current Rating\s*<br>\s*<strong>([\d.]+)</strong>'
+ _CATEGORIES_REGEX = r'(?s)<div[^>]+id="vid_info"[^>]*>\s*<div[^>]*>.+?</div>(.*?)<br>'
+
+ _TESTS = [{
+ # normal, multi-format video
+ 'url': 'http://www.moviefap.com/videos/be9867c9416c19f54a4a/experienced-milf-amazing-handjob.html',
+ 'md5': '26624b4e2523051b550067d547615906',
+ 'info_dict': {
+ 'id': 'be9867c9416c19f54a4a',
+ 'display_id': 'experienced-milf-amazing-handjob',
+ 'ext': 'mp4',
+ 'title': 'Experienced MILF Amazing Handjob',
+ 'description': 'Experienced MILF giving an Amazing Handjob',
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'age_limit': 18,
+ 'uploader': 'darvinfred06',
+ 'view_count': int,
+ 'comment_count': int,
+ 'average_rating': float,
+ 'categories': ['Amateur', 'Masturbation', 'Mature', 'Flashing'],
+ }
+ }, {
+ # quirky single-format case where the extension is given as fid, but the video is really an flv
+ 'url': 'http://www.moviefap.com/videos/e5da0d3edce5404418f5/jeune-couple-russe.html',
+ 'md5': 'fa56683e291fc80635907168a743c9ad',
+ 'info_dict': {
+ 'id': 'e5da0d3edce5404418f5',
+ 'display_id': 'jeune-couple-russe',
+ 'ext': 'flv',
+ 'title': 'Jeune Couple Russe',
+ 'description': 'Amateur',
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'age_limit': 18,
+ 'uploader': 'whiskeyjar',
+ 'view_count': int,
+ 'comment_count': int,
+ 'average_rating': float,
+ 'categories': ['Amateur', 'Teen'],
+ },
+ 'skip': 'This video does not exist',
+ }]
diff --git a/yt_dlp/extractor/toggle.py b/yt_dlp/extractor/toggle.py
new file mode 100644
index 0000000..7073733
--- /dev/null
+++ b/yt_dlp/extractor/toggle.py
@@ -0,0 +1,228 @@
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ float_or_none,
+ int_or_none,
+ parse_iso8601,
+ strip_or_none,
+)
+
+
+class ToggleIE(InfoExtractor):
+ IE_NAME = 'toggle'
+ _VALID_URL = r'(?:https?://(?:(?:www\.)?mewatch|video\.toggle)\.sg/(?:en|zh)/(?:[^/]+/){2,}|toggle:)(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'http://www.mewatch.sg/en/series/lion-moms-tif/trailers/lion-moms-premier/343115',
+ 'info_dict': {
+ 'id': '343115',
+ 'ext': 'mp4',
+ 'title': 'Lion Moms Premiere',
+ 'description': 'md5:aea1149404bff4d7f7b6da11fafd8e6b',
+ 'upload_date': '20150910',
+ 'timestamp': 1441858274,
+ },
+ 'params': {
+ 'skip_download': 'm3u8 download',
+ }
+ }, {
+ 'note': 'DRM-protected video',
+ 'url': 'http://www.mewatch.sg/en/movies/dug-s-special-mission/341413',
+ 'info_dict': {
+ 'id': '341413',
+ 'ext': 'wvm',
+ 'title': 'Dug\'s Special Mission',
+ 'description': 'md5:e86c6f4458214905c1772398fabc93e0',
+ 'upload_date': '20150827',
+ 'timestamp': 1440644006,
+ },
+ 'params': {
+ 'skip_download': 'DRM-protected wvm download',
+ }
+ }, {
+ # this also tests correct video id extraction
+ 'note': 'm3u8 links are geo-restricted, but Android/mp4 is okay',
+ 'url': 'http://www.mewatch.sg/en/series/28th-sea-games-5-show/28th-sea-games-5-show-ep11/332861',
+ 'info_dict': {
+ 'id': '332861',
+ 'ext': 'mp4',
+ 'title': '28th SEA Games (5 Show) - Episode 11',
+ 'description': 'md5:3cd4f5f56c7c3b1340c50a863f896faa',
+ 'upload_date': '20150605',
+ 'timestamp': 1433480166,
+ },
+ 'params': {
+ 'skip_download': 'DRM-protected wvm download',
+ },
+ 'skip': 'm3u8 links are geo-restricted'
+ }, {
+ 'url': 'http://video.toggle.sg/en/clips/seraph-sun-aloysius-will-suddenly-sing-some-old-songs-in-high-pitch-on-set/343331',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.mewatch.sg/en/clips/seraph-sun-aloysius-will-suddenly-sing-some-old-songs-in-high-pitch-on-set/343331',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.mewatch.sg/zh/series/zero-calling-s2-hd/ep13/336367',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.mewatch.sg/en/series/vetri-s2/webisodes/jeeva-is-an-orphan-vetri-s2-webisode-7/342302',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.mewatch.sg/en/movies/seven-days/321936',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.mewatch.sg/en/tv-show/news/may-2017-cna-singapore-tonight/fri-19-may-2017/512456',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.mewatch.sg/en/channels/eleven-plus/401585',
+ 'only_matching': True,
+ }]
+
+ _API_USER = 'tvpapi_147'
+ _API_PASS = '11111'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ params = {
+ 'initObj': {
+ 'Locale': {
+ 'LocaleLanguage': '',
+ 'LocaleCountry': '',
+ 'LocaleDevice': '',
+ 'LocaleUserState': 0
+ },
+ 'Platform': 0,
+ 'SiteGuid': 0,
+ 'DomainID': '0',
+ 'UDID': '',
+ 'ApiUser': self._API_USER,
+ 'ApiPass': self._API_PASS
+ },
+ 'MediaID': video_id,
+ 'mediaType': 0,
+ }
+
+ info = self._download_json(
+ 'http://tvpapi.as.tvinci.com/v2_9/gateways/jsonpostgw.aspx?m=GetMediaInfo',
+ video_id, 'Downloading video info json', data=json.dumps(params).encode('utf-8'))
+
+ title = info['MediaName']
+
+ formats = []
+ for video_file in info.get('Files', []):
+ video_url, vid_format = video_file.get('URL'), video_file.get('Format')
+ if not video_url or video_url == 'NA' or not vid_format:
+ continue
+ ext = determine_ext(video_url)
+ vid_format = vid_format.replace(' ', '')
+ # if geo-restricted, m3u8 is inaccessible, but mp4 is okay
+ if ext == 'm3u8':
+ m3u8_formats = self._extract_m3u8_formats(
+ video_url, video_id, ext='mp4', m3u8_id=vid_format,
+ note='Downloading %s m3u8 information' % vid_format,
+ errnote='Failed to download %s m3u8 information' % vid_format,
+ fatal=False)
+ for f in m3u8_formats:
+ # Apple FairPlay Streaming
+ if '/fpshls/' in f['url']:
+ continue
+ formats.append(f)
+ elif ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ video_url, video_id, mpd_id=vid_format,
+ note='Downloading %s MPD manifest' % vid_format,
+ errnote='Failed to download %s MPD manifest' % vid_format,
+ fatal=False))
+ elif ext == 'ism':
+ formats.extend(self._extract_ism_formats(
+ video_url, video_id, ism_id=vid_format,
+ note='Downloading %s ISM manifest' % vid_format,
+ errnote='Failed to download %s ISM manifest' % vid_format,
+ fatal=False))
+ elif ext == 'mp4':
+ formats.append({
+ 'ext': ext,
+ 'url': video_url,
+ 'format_id': vid_format,
+ })
+ if not formats:
+ for meta in (info.get('Metas') or []):
+ if (not self.get_param('allow_unplayable_formats')
+ and meta.get('Key') == 'Encryption' and meta.get('Value') == '1'):
+ self.report_drm(video_id)
+ # Most likely because geo-blocked if no formats and no DRM
+
+ thumbnails = []
+ for picture in info.get('Pictures', []):
+ if not isinstance(picture, dict):
+ continue
+ pic_url = picture.get('URL')
+ if not pic_url:
+ continue
+ thumbnail = {
+ 'url': pic_url,
+ }
+ pic_size = picture.get('PicSize', '')
+ m = re.search(r'(?P<width>\d+)[xX](?P<height>\d+)', pic_size)
+ if m:
+ thumbnail.update({
+ 'width': int(m.group('width')),
+ 'height': int(m.group('height')),
+ })
+ thumbnails.append(thumbnail)
+
+ def counter(prefix):
+ return int_or_none(
+ info.get(prefix + 'Counter') or info.get(prefix.lower() + '_counter'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': strip_or_none(info.get('Description')),
+ 'duration': int_or_none(info.get('Duration')),
+ 'timestamp': parse_iso8601(info.get('CreationDate') or None),
+ 'average_rating': float_or_none(info.get('Rating')),
+ 'view_count': counter('View'),
+ 'like_count': counter('Like'),
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ }
+
+
+class MeWatchIE(InfoExtractor):
+ IE_NAME = 'mewatch'
+ _VALID_URL = r'https?://(?:(?:www|live)\.)?mewatch\.sg/watch/[^/?#&]+-(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://www.mewatch.sg/watch/Recipe-Of-Life-E1-179371',
+ 'info_dict': {
+ 'id': '1008625',
+ 'ext': 'mp4',
+ 'title': 'Recipe Of Life 味之道',
+ 'timestamp': 1603306526,
+ 'description': 'md5:6e88cde8af2068444fc8e1bc3ebf257c',
+ 'upload_date': '20201021',
+ },
+ 'params': {
+ 'skip_download': 'm3u8 download',
+ },
+ }, {
+ 'url': 'https://www.mewatch.sg/watch/Little-Red-Dot-Detectives-S2-搜密。打卡。小红点-S2-E1-176232',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.mewatch.sg/watch/Little-Red-Dot-Detectives-S2-%E6%90%9C%E5%AF%86%E3%80%82%E6%89%93%E5%8D%A1%E3%80%82%E5%B0%8F%E7%BA%A2%E7%82%B9-S2-E1-176232',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://live.mewatch.sg/watch/Recipe-Of-Life-E41-189759',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ item_id = self._match_id(url)
+ custom_id = self._download_json(
+ 'https://cdn.mewatch.sg/api/items/' + item_id,
+ item_id, query={'segments': 'all'})['customId']
+ return self.url_result(
+ 'toggle:' + custom_id, ToggleIE.ie_key(), custom_id)
diff --git a/yt_dlp/extractor/toggo.py b/yt_dlp/extractor/toggo.py
new file mode 100644
index 0000000..1ddec49
--- /dev/null
+++ b/yt_dlp/extractor/toggo.py
@@ -0,0 +1,82 @@
+from .common import InfoExtractor
+from ..utils import int_or_none, parse_qs
+
+
+class ToggoIE(InfoExtractor):
+ IE_NAME = 'toggo'
+ _VALID_URL = r'https?://(?:www\.)?toggo\.de/(?:toggolino/)?[^/?#]+/(?:folge|video)/(?P<id>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'https://www.toggo.de/weihnachtsmann--co-kg/folge/ein-geschenk-fuer-zwei',
+ 'info_dict': {
+ 'id': 'VEP2977',
+ 'ext': 'mp4',
+ 'title': 'Ein Geschenk für zwei',
+ 'display_id': 'ein-geschenk-fuer-zwei',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ 'description': 'md5:b7715915bfa47824b4e4ad33fb5962f8',
+ 'release_timestamp': 1637259179,
+ 'series': 'Weihnachtsmann & Co. KG',
+ 'season': 'Weihnachtsmann & Co. KG',
+ 'season_number': 1,
+ 'season_id': 'VST118',
+ 'episode': 'Ein Geschenk für zwei',
+ 'episode_number': 7,
+ 'episode_id': 'VEP2977',
+ 'timestamp': 1581935960,
+ 'uploader_id': '6057955896001',
+ 'upload_date': '20200217',
+ },
+ 'params': {'skip_download': True},
+ }, {
+ 'url': 'https://www.toggo.de/grizzy--die-lemminge/folge/ab-durch-die-wand-vogelfrei-rock\'n\'lemming',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.toggo.de/toggolino/paw-patrol/folge/der-wetter-zeppelin-der-chili-kochwettbewerb',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.toggo.de/toggolino/paw-patrol/video/paw-patrol-rettung-im-anflug',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ data = self._download_json(
+ f'https://production-n.toggo.de/api/assetstore/vod/asset/{display_id}', display_id)['data']
+
+ brightcove_id = next(
+ x['value'] for x in data['custom_fields'] if x.get('key') == 'video-cloud-id')
+ info = self._downloader.get_info_extractor('BrightcoveNew').extract(
+ f'http://players.brightcove.net/6057955896001/default_default/index.html?videoId={brightcove_id}')
+
+ for f in info['formats']:
+ if '/dash/live/cenc/' in f.get('fragment_base_url', ''):
+ # Get hidden non-DRM format
+ f['fragment_base_url'] = f['fragment_base_url'].replace('/cenc/', '/clear/')
+ f['has_drm'] = False
+
+ if '/fairplay/' in f.get('manifest_url', ''):
+ f['has_drm'] = True
+
+ thumbnails = [{
+ 'id': name,
+ 'url': url,
+ 'width': int_or_none(next(iter(parse_qs(url).get('width', [])), None)),
+ } for name, url in (data.get('images') or {}).items()]
+
+ return {
+ **info,
+ 'id': data.get('id'),
+ 'display_id': display_id,
+ 'title': data.get('title'),
+ 'language': data.get('language'),
+ 'thumbnails': thumbnails,
+ 'description': data.get('description'),
+ 'release_timestamp': data.get('earliest_start_date'),
+ 'series': data.get('series_title'),
+ 'season': data.get('season_title'),
+ 'season_number': data.get('season_no'),
+ 'season_id': data.get('season_id'),
+ 'episode': data.get('title'),
+ 'episode_number': data.get('episode_no'),
+ 'episode_id': data.get('id'),
+ }
diff --git a/yt_dlp/extractor/tonline.py b/yt_dlp/extractor/tonline.py
new file mode 100644
index 0000000..33b9a32
--- /dev/null
+++ b/yt_dlp/extractor/tonline.py
@@ -0,0 +1,53 @@
+from .common import InfoExtractor
+from ..utils import int_or_none, join_nonempty
+
+
+class TOnlineIE(InfoExtractor):
+ _WORKING = False
+ _ENABLED = None # XXX: pass through to GenericIE
+ IE_NAME = 't-online.de'
+ _VALID_URL = r'https?://(?:www\.)?t-online\.de/tv/(?:[^/]+/)*id_(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://www.t-online.de/tv/sport/fussball/id_79166266/drittes-remis-zidane-es-muss-etwas-passieren-.html',
+ 'md5': '7d94dbdde5f9d77c5accc73c39632c29',
+ 'info_dict': {
+ 'id': '79166266',
+ 'ext': 'mp4',
+ 'title': 'Drittes Remis! Zidane: "Es muss etwas passieren"',
+ 'description': 'Es läuft nicht rund bei Real Madrid. Das 1:1 gegen den SD Eibar war das dritte Unentschieden in Folge in der Liga.',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video_data = self._download_json(
+ 'http://www.t-online.de/tv/id_%s/tid_json_video' % video_id, video_id)
+ title = video_data['subtitle']
+
+ formats = []
+ for asset in video_data.get('assets', []):
+ asset_source = asset.get('source') or asset.get('source2')
+ if not asset_source:
+ continue
+ formats.append({
+ 'format_id': join_nonempty('type', 'profile', from_dict=asset),
+ 'url': asset_source,
+ })
+
+ thumbnails = []
+ for image in video_data.get('images', []):
+ image_source = image.get('source')
+ if not image_source:
+ continue
+ thumbnails.append({
+ 'url': image_source,
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video_data.get('description'),
+ 'duration': int_or_none(video_data.get('duration')),
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/toongoggles.py b/yt_dlp/extractor/toongoggles.py
new file mode 100644
index 0000000..1b8fc3a
--- /dev/null
+++ b/yt_dlp/extractor/toongoggles.py
@@ -0,0 +1,76 @@
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_duration,
+)
+
+
+class ToonGogglesIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?toongoggles\.com/shows/(?P<show_id>\d+)(?:/[^/]+/episodes/(?P<episode_id>\d+))?'
+ _TESTS = [{
+ 'url': 'http://www.toongoggles.com/shows/217143/bernard-season-2/episodes/217147/football',
+ 'md5': '18289fc2b951eff6b953a9d8f01e6831',
+ 'info_dict': {
+ 'id': '217147',
+ 'ext': 'mp4',
+ 'title': 'Football',
+ 'uploader_id': '1',
+ 'description': 'Bernard decides to play football in order to be better than Lloyd and tries to beat him no matter how, he even cheats.',
+ 'upload_date': '20160718',
+ 'timestamp': 1468879330,
+ }
+ }, {
+ 'url': 'http://www.toongoggles.com/shows/227759/om-nom-stories-around-the-world',
+ 'info_dict': {
+ 'id': '227759',
+ 'title': 'Om Nom Stories Around The World',
+ },
+ 'playlist_mincount': 11,
+ }]
+
+ def _call_api(self, action, page_id, query):
+ query.update({
+ 'for_ng': 1,
+ 'for_web': 1,
+ 'show_meta': 1,
+ 'version': 7.0,
+ })
+ return self._download_json('http://api.toongoggles.com/' + action, page_id, query=query)
+
+ def _parse_episode_data(self, episode_data):
+ title = episode_data['episode_name']
+
+ return {
+ '_type': 'url_transparent',
+ 'id': episode_data['episode_id'],
+ 'title': title,
+ 'url': 'kaltura:513551:' + episode_data['entry_id'],
+ 'thumbnail': episode_data.get('thumbnail_url'),
+ 'description': episode_data.get('description'),
+ 'duration': parse_duration(episode_data.get('hms')),
+ 'series': episode_data.get('show_name'),
+ 'season_number': int_or_none(episode_data.get('season_num')),
+ 'episode_id': episode_data.get('episode_id'),
+ 'episode': title,
+ 'episode_number': int_or_none(episode_data.get('episode_num')),
+ 'categories': episode_data.get('categories'),
+ 'ie_key': 'Kaltura',
+ }
+
+ def _real_extract(self, url):
+ show_id, episode_id = self._match_valid_url(url).groups()
+ if episode_id:
+ episode_data = self._call_api('search', episode_id, {
+ 'filter': 'episode',
+ 'id': episode_id,
+ })['objects'][0]
+ return self._parse_episode_data(episode_data)
+ else:
+ show_data = self._call_api('getepisodesbyshow', show_id, {
+ 'max': 1000000000,
+ 'showid': show_id,
+ })
+ entries = []
+ for episode_data in show_data.get('objects', []):
+ entries.append(self._parse_episode_data(episode_data))
+ return self.playlist_result(entries, show_id, show_data.get('show_name'))
diff --git a/yt_dlp/extractor/toutv.py b/yt_dlp/extractor/toutv.py
new file mode 100644
index 0000000..ced1224
--- /dev/null
+++ b/yt_dlp/extractor/toutv.py
@@ -0,0 +1,87 @@
+import json
+
+from .radiocanada import RadioCanadaIE
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ merge_dicts,
+)
+
+
+class TouTvIE(RadioCanadaIE): # XXX: Do not subclass from concrete IE
+ _NETRC_MACHINE = 'toutv'
+ IE_NAME = 'tou.tv'
+ _VALID_URL = r'https?://ici\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+(?:/S[0-9]+[EC][0-9]+)?)'
+
+ _TESTS = [{
+ 'url': 'http://ici.tou.tv/garfield-tout-court/S2015E17',
+ 'info_dict': {
+ 'id': '122017',
+ 'ext': 'mp4',
+ 'title': 'Saison 2015 Épisode 17',
+ 'description': 'La photo de famille 2',
+ 'upload_date': '20100717',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'skip': '404 Not Found',
+ }, {
+ 'url': 'http://ici.tou.tv/hackers',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://ici.tou.tv/l-age-adulte/S01C501',
+ 'only_matching': True,
+ }]
+ _CLIENT_KEY = '90505c8d-9c34-4f34-8da1-3a85bdc6d4f4'
+
+ def _perform_login(self, username, password):
+ try:
+ self._access_token = self._download_json(
+ 'https://services.radio-canada.ca/toutv/profiling/accounts/login',
+ None, 'Logging in', data=json.dumps({
+ 'ClientId': self._CLIENT_KEY,
+ 'ClientSecret': '34026772-244b-49b6-8b06-317b30ac9a20',
+ 'Email': username,
+ 'Password': password,
+ 'Scope': 'id.write media-validation.read',
+ }).encode(), headers={
+ 'Authorization': 'client-key ' + self._CLIENT_KEY,
+ 'Content-Type': 'application/json;charset=utf-8',
+ })['access_token']
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 401:
+ error = self._parse_json(e.cause.response.read().decode(), None)['Message']
+ raise ExtractorError(error, expected=True)
+ raise
+ self._claims = self._call_api('validation/v2/getClaims')['claims']
+
+ def _real_extract(self, url):
+ path = self._match_id(url)
+ metadata = self._download_json(
+ 'https://services.radio-canada.ca/toutv/presentation/%s' % path, path, query={
+ 'client_key': self._CLIENT_KEY,
+ 'device': 'web',
+ 'version': 4,
+ })
+ # IsDrm does not necessarily mean the video is DRM protected (see
+ # https://github.com/ytdl-org/youtube-dl/issues/13994).
+ if not self.get_param('allow_unplayable_formats') and metadata.get('IsDrm'):
+ self.report_warning('This video is probably DRM protected.', path)
+ video_id = metadata['IdMedia']
+ details = metadata['Details']
+
+ return merge_dicts({
+ 'id': video_id,
+ 'title': details.get('OriginalTitle'),
+ 'description': details.get('Description'),
+ 'thumbnail': details.get('ImageUrl'),
+ 'duration': int_or_none(details.get('LengthInSeconds')),
+ 'series': metadata.get('ProgramTitle'),
+ 'season_number': int_or_none(metadata.get('SeasonNumber')),
+ 'season': metadata.get('SeasonTitle'),
+ 'episode_number': int_or_none(metadata.get('EpisodeNumber')),
+ 'episode': metadata.get('EpisodeTitle'),
+ }, self._extract_info(metadata.get('AppCode', 'toutv'), video_id))
diff --git a/yt_dlp/extractor/toypics.py b/yt_dlp/extractor/toypics.py
new file mode 100644
index 0000000..aa7ee6c
--- /dev/null
+++ b/yt_dlp/extractor/toypics.py
@@ -0,0 +1,89 @@
+from .common import InfoExtractor
+import re
+
+
+class ToypicsIE(InfoExtractor):
+ _WORKING = False
+ IE_DESC = 'Toypics video'
+ _VALID_URL = r'https?://videos\.toypics\.net/view/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://videos.toypics.net/view/514/chancebulged,-2-1/',
+ 'md5': '16e806ad6d6f58079d210fe30985e08b',
+ 'info_dict': {
+ 'id': '514',
+ 'ext': 'mp4',
+ 'title': "Chance-Bulge'd, 2",
+ 'age_limit': 18,
+ 'uploader': 'kidsune',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ formats = self._parse_html5_media_entries(
+ url, webpage, video_id)[0]['formats']
+ title = self._html_search_regex([
+ r'<h1[^>]+class=["\']view-video-title[^>]+>([^<]+)</h',
+ r'<title>([^<]+) - Toypics</title>',
+ ], webpage, 'title')
+
+ uploader = self._html_search_regex(
+ r'More videos from <strong>([^<]+)</strong>', webpage, 'uploader',
+ fatal=False)
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': title,
+ 'uploader': uploader,
+ 'age_limit': 18,
+ }
+
+
+class ToypicsUserIE(InfoExtractor):
+ _WORKING = False
+ IE_DESC = 'Toypics user profile'
+ _VALID_URL = r'https?://videos\.toypics\.net/(?!view)(?P<id>[^/?#&]+)'
+ _TEST = {
+ 'url': 'http://videos.toypics.net/Mikey',
+ 'info_dict': {
+ 'id': 'Mikey',
+ },
+ 'playlist_mincount': 19,
+ }
+
+ def _real_extract(self, url):
+ username = self._match_id(url)
+
+ profile_page = self._download_webpage(
+ url, username, note='Retrieving profile page')
+
+ video_count = int(self._search_regex(
+ r'public/">Public Videos \(([0-9]+)\)</a></li>', profile_page,
+ 'video count'))
+
+ PAGE_SIZE = 8
+ urls = []
+ page_count = (video_count + PAGE_SIZE + 1) // PAGE_SIZE
+ for n in range(1, page_count + 1):
+ lpage_url = url + '/public/%d' % n
+ lpage = self._download_webpage(
+ lpage_url, username,
+ note='Downloading page %d/%d' % (n, page_count))
+ urls.extend(
+ re.findall(
+ r'<div[^>]+class=["\']preview[^>]+>\s*<a[^>]+href="(https?://videos\.toypics\.net/view/[^"]+)"',
+ lpage))
+
+ return {
+ '_type': 'playlist',
+ 'id': username,
+ 'entries': [{
+ '_type': 'url',
+ 'url': eurl,
+ 'ie_key': 'Toypics',
+ } for eurl in urls]
+ }
diff --git a/yt_dlp/extractor/traileraddict.py b/yt_dlp/extractor/traileraddict.py
new file mode 100644
index 0000000..5c4a138
--- /dev/null
+++ b/yt_dlp/extractor/traileraddict.py
@@ -0,0 +1,61 @@
+import re
+
+from .common import InfoExtractor
+
+
+class TrailerAddictIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'(?:https?://)?(?:www\.)?traileraddict\.com/(?:trailer|clip)/(?P<movie>.+?)/(?P<trailer_name>.+)'
+ _TEST = {
+ 'url': 'http://www.traileraddict.com/trailer/prince-avalanche/trailer',
+ 'md5': '41365557f3c8c397d091da510e73ceb4',
+ 'info_dict': {
+ 'id': '76184',
+ 'ext': 'mp4',
+ 'title': 'Prince Avalanche Trailer',
+ 'description': 'Trailer for Prince Avalanche.\n\nTwo highway road workers spend the summer of 1988 away from their city lives. The isolated landscape becomes a place of misadventure as the men find themselves at odds with each other and the women they left behind.',
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ name = mobj.group('movie') + '/' + mobj.group('trailer_name')
+ webpage = self._download_webpage(url, name)
+
+ title = self._html_extract_title(webpage, 'video title').replace(' - Trailer Addict', '')
+ view_count_str = self._search_regex(
+ r'<span class="views_n">([0-9,.]+)</span>',
+ webpage, 'view count', fatal=False)
+ view_count = (
+ None if view_count_str is None
+ else int(view_count_str.replace(',', '')))
+ video_id = self._search_regex(
+ r'<param\s+name="movie"\s+value="/emb/([0-9]+)"\s*/>',
+ webpage, 'video id')
+
+ # Presence of (no)watchplus function indicates HD quality is available
+ if re.search(r'function (no)?watchplus()', webpage):
+ fvar = 'fvarhd'
+ else:
+ fvar = 'fvar'
+
+ info_url = 'http://www.traileraddict.com/%s.php?tid=%s' % (fvar, str(video_id))
+ info_webpage = self._download_webpage(info_url, video_id, 'Downloading the info webpage')
+
+ final_url = self._search_regex(r'&fileurl=(.+)',
+ info_webpage, 'Download url').replace('%3F', '?')
+ thumbnail_url = self._search_regex(r'&image=(.+?)&',
+ info_webpage, 'thumbnail url')
+
+ description = self._html_search_regex(
+ r'(?s)<div class="synopsis">.*?<div class="movie_label_info"[^>]*>(.*?)</div>',
+ webpage, 'description', fatal=False)
+
+ return {
+ 'id': video_id,
+ 'url': final_url,
+ 'title': title,
+ 'thumbnail': thumbnail_url,
+ 'description': description,
+ 'view_count': view_count,
+ }
diff --git a/yt_dlp/extractor/triller.py b/yt_dlp/extractor/triller.py
new file mode 100644
index 0000000..56e51fe
--- /dev/null
+++ b/yt_dlp/extractor/triller.py
@@ -0,0 +1,329 @@
+import itertools
+import json
+import re
+
+from .common import InfoExtractor
+from ..networking import HEADRequest
+from ..utils import (
+ ExtractorError,
+ UnsupportedError,
+ determine_ext,
+ int_or_none,
+ parse_resolution,
+ str_or_none,
+ traverse_obj,
+ unified_timestamp,
+ url_basename,
+ urljoin,
+ url_or_none,
+)
+
+
+class TrillerBaseIE(InfoExtractor):
+ _NETRC_MACHINE = 'triller'
+ _API_BASE_URL = 'https://social.triller.co/v1.5'
+ _API_HEADERS = {'Origin': 'https://triller.co'}
+
+ def _perform_login(self, username, password):
+ if self._API_HEADERS.get('Authorization'):
+ return
+
+ headers = {**self._API_HEADERS, 'Content-Type': 'application/json'}
+ user_check = traverse_obj(self._download_json(
+ f'{self._API_BASE_URL}/api/user/is-valid-username', None, note='Checking username',
+ fatal=False, expected_status=400, headers=headers,
+ data=json.dumps({'username': username}, separators=(',', ':')).encode()), 'status')
+
+ if user_check: # endpoint returns `"status":false` if username exists
+ raise ExtractorError('Unable to login: Invalid username', expected=True)
+
+ login = self._download_json(
+ f'{self._API_BASE_URL}/user/auth', None, note='Logging in', fatal=False,
+ expected_status=400, headers=headers, data=json.dumps({
+ 'username': username,
+ 'password': password,
+ }, separators=(',', ':')).encode()) or {}
+
+ if not login.get('auth_token'):
+ if login.get('error') == 1008:
+ raise ExtractorError('Unable to login: Incorrect password', expected=True)
+ raise ExtractorError('Unable to login')
+
+ self._API_HEADERS['Authorization'] = f'Bearer {login["auth_token"]}'
+
+ def _get_comments(self, video_id, limit=15):
+ comment_info = self._download_json(
+ f'{self._API_BASE_URL}/api/videos/{video_id}/comments_v2',
+ video_id, fatal=False, note='Downloading comments API JSON',
+ headers=self._API_HEADERS, query={'limit': limit}) or {}
+ if not comment_info.get('comments'):
+ return
+ yield from traverse_obj(comment_info, ('comments', ..., {
+ 'id': ('id', {str_or_none}),
+ 'text': 'body',
+ 'author': ('author', 'username'),
+ 'author_id': ('author', 'user_id'),
+ 'timestamp': ('timestamp', {unified_timestamp}),
+ }))
+
+ def _parse_video_info(self, video_info, username, user_id, display_id=None):
+ video_id = str(video_info['id'])
+ display_id = display_id or video_info.get('video_uuid')
+
+ if traverse_obj(video_info, (
+ None, ('transcoded_url', 'video_url', 'stream_url', 'audio_url'),
+ {lambda x: re.search(r'/copyright/', x)}), get_all=False):
+ self.raise_no_formats('This video has been removed due to licensing restrictions', expected=True)
+
+ def format_info(url):
+ return {
+ 'url': url,
+ 'ext': determine_ext(url),
+ 'format_id': url_basename(url).split('.')[0],
+ }
+
+ formats = []
+
+ if determine_ext(video_info.get('transcoded_url')) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_info['transcoded_url'], video_id, 'mp4', m3u8_id='hls', fatal=False))
+
+ for video in traverse_obj(video_info, ('video_set', lambda _, v: url_or_none(v['url']))):
+ formats.append({
+ **format_info(video['url']),
+ **parse_resolution(video.get('resolution')),
+ 'vcodec': video.get('codec'),
+ 'vbr': int_or_none(video.get('bitrate'), 1000),
+ })
+
+ video_url = traverse_obj(video_info, 'video_url', 'stream_url', expected_type=url_or_none)
+ if video_url:
+ formats.append({
+ **format_info(video_url),
+ 'vcodec': 'h264',
+ **traverse_obj(video_info, {
+ 'width': 'width',
+ 'height': 'height',
+ 'filesize': 'filesize',
+ }, expected_type=int_or_none),
+ })
+
+ audio_url = url_or_none(video_info.get('audio_url'))
+ if audio_url:
+ formats.append(format_info(audio_url))
+
+ comment_count = traverse_obj(video_info, ('comment_count', {int_or_none}))
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'uploader': username,
+ 'uploader_id': user_id or traverse_obj(video_info, ('user', 'user_id', {str_or_none})),
+ 'webpage_url': urljoin(f'https://triller.co/@{username}/video/', display_id),
+ 'uploader_url': f'https://triller.co/@{username}',
+ 'extractor_key': TrillerIE.ie_key(),
+ 'extractor': TrillerIE.IE_NAME,
+ 'formats': formats,
+ 'comment_count': comment_count,
+ '__post_extractor': self.extract_comments(video_id, comment_count),
+ **traverse_obj(video_info, {
+ 'title': ('description', {lambda x: x.replace('\r\n', ' ')}),
+ 'description': 'description',
+ 'creator': ((('user'), ('users', lambda _, v: str(v['user_id']) == user_id)), 'name'),
+ 'thumbnail': ('thumbnail_url', {url_or_none}),
+ 'timestamp': ('timestamp', {unified_timestamp}),
+ 'duration': ('duration', {int_or_none}),
+ 'view_count': ('play_count', {int_or_none}),
+ 'like_count': ('likes_count', {int_or_none}),
+ 'artist': 'song_artist',
+ 'track': 'song_title',
+ }, get_all=False),
+ }
+
+
+class TrillerIE(TrillerBaseIE):
+ _VALID_URL = r'''(?x)
+ https?://(?:www\.)?triller\.co/
+ @(?P<username>[\w.]+)/video/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})
+ '''
+ _TESTS = [{
+ 'url': 'https://triller.co/@theestallion/video/2358fcd7-3df2-4c77-84c8-1d091610a6cf',
+ 'md5': '228662d783923b60d78395fedddc0a20',
+ 'info_dict': {
+ 'id': '71595734',
+ 'ext': 'mp4',
+ 'title': 'md5:9a2bf9435c5c4292678996a464669416',
+ 'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
+ 'description': 'md5:9a2bf9435c5c4292678996a464669416',
+ 'uploader': 'theestallion',
+ 'uploader_id': '18992236',
+ 'creator': 'Megan Thee Stallion',
+ 'timestamp': 1660598222,
+ 'upload_date': '20220815',
+ 'duration': 47,
+ 'view_count': int,
+ 'like_count': int,
+ 'artist': 'Megan Thee Stallion',
+ 'track': 'Her',
+ 'uploader_url': 'https://triller.co/@theestallion',
+ 'comment_count': int,
+ },
+ 'skip': 'This video has been removed due to licensing restrictions',
+ }, {
+ 'url': 'https://triller.co/@charlidamelio/video/46c6fcfa-aa9e-4503-a50c-68444f44cddc',
+ 'md5': '874055f462af5b0699b9dbb527a505a0',
+ 'info_dict': {
+ 'id': '71621339',
+ 'ext': 'mp4',
+ 'title': 'md5:4c91ea82760fe0fffb71b8c3aa7295fc',
+ 'display_id': '46c6fcfa-aa9e-4503-a50c-68444f44cddc',
+ 'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
+ 'description': 'md5:4c91ea82760fe0fffb71b8c3aa7295fc',
+ 'uploader': 'charlidamelio',
+ 'uploader_id': '1875551',
+ 'creator': 'charli damelio',
+ 'timestamp': 1660773354,
+ 'upload_date': '20220817',
+ 'duration': 16,
+ 'view_count': int,
+ 'like_count': int,
+ 'artist': 'Dixie',
+ 'track': 'Someone to Blame',
+ 'uploader_url': 'https://triller.co/@charlidamelio',
+ 'comment_count': int,
+ },
+ }, {
+ 'url': 'https://triller.co/@theestallion/video/07f35f38-1f51-48e2-8c5f-f7a8e829988f',
+ 'md5': 'af7b3553e4b8bfca507636471ee2eb41',
+ 'info_dict': {
+ 'id': '71837829',
+ 'ext': 'mp4',
+ 'title': 'UNGRATEFUL VIDEO OUT NOW 👏🏾👏🏾👏🏾 💙💙 link my bio #womeninhiphop',
+ 'display_id': '07f35f38-1f51-48e2-8c5f-f7a8e829988f',
+ 'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
+ 'description': 'UNGRATEFUL VIDEO OUT NOW 👏🏾👏🏾👏🏾 💙💙 link my bio\r\n #womeninhiphop',
+ 'uploader': 'theestallion',
+ 'uploader_id': '18992236',
+ 'creator': 'Megan Thee Stallion',
+ 'timestamp': 1662486178,
+ 'upload_date': '20220906',
+ 'duration': 30,
+ 'view_count': int,
+ 'like_count': int,
+ 'artist': 'Unknown',
+ 'track': 'Unknown',
+ 'uploader_url': 'https://triller.co/@theestallion',
+ 'comment_count': int,
+ },
+ }]
+
+ def _real_extract(self, url):
+ username, display_id = self._match_valid_url(url).group('username', 'id')
+
+ video_info = self._download_json(
+ f'{self._API_BASE_URL}/api/videos/{display_id}', display_id,
+ headers=self._API_HEADERS)['videos'][0]
+
+ return self._parse_video_info(video_info, username, None, display_id)
+
+
+class TrillerUserIE(TrillerBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?triller\.co/@(?P<id>[\w.]+)/?(?:$|[#?])'
+ _TESTS = [{
+ 'url': 'https://triller.co/@theestallion',
+ 'playlist_mincount': 12,
+ 'info_dict': {
+ 'id': '18992236',
+ 'title': 'theestallion',
+ 'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
+ },
+ }, {
+ 'url': 'https://triller.co/@charlidamelio',
+ 'playlist_mincount': 150,
+ 'info_dict': {
+ 'id': '1875551',
+ 'title': 'charlidamelio',
+ 'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
+ },
+ }]
+
+ def _real_initialize(self):
+ if not self._API_HEADERS.get('Authorization'):
+ guest = self._download_json(
+ f'{self._API_BASE_URL}/user/create_guest', None,
+ note='Creating guest session', data=b'', headers=self._API_HEADERS, query={
+ 'platform': 'Web',
+ 'app_version': '',
+ })
+ if not guest.get('auth_token'):
+ raise ExtractorError('Unable to fetch required auth token for user extraction')
+
+ self._API_HEADERS['Authorization'] = f'Bearer {guest["auth_token"]}'
+
+ def _entries(self, username, user_id, limit=6):
+ query = {'limit': limit}
+ for page in itertools.count(1):
+ videos = self._download_json(
+ f'{self._API_BASE_URL}/api/users/{user_id}/videos',
+ username, note=f'Downloading user video list page {page}',
+ headers=self._API_HEADERS, query=query)
+
+ for video in traverse_obj(videos, ('videos', ...)):
+ yield self._parse_video_info(video, username, user_id)
+
+ query['before_time'] = traverse_obj(videos, ('videos', -1, 'timestamp'))
+ if not query['before_time']:
+ break
+
+ def _real_extract(self, url):
+ username = self._match_id(url)
+
+ user_info = traverse_obj(self._download_json(
+ f'{self._API_BASE_URL}/api/users/by_username/{username}',
+ username, note='Downloading user info', headers=self._API_HEADERS), ('user', {dict})) or {}
+
+ if user_info.get('private') and user_info.get('followed_by_me') not in (True, 'true'):
+ raise ExtractorError('This user profile is private', expected=True)
+ elif traverse_obj(user_info, (('blocked_by_user', 'blocking_user'), {bool}), get_all=False):
+ raise ExtractorError('The author of the video is blocked', expected=True)
+
+ user_id = str_or_none(user_info.get('user_id'))
+ if not user_id:
+ raise ExtractorError('Unable to extract user ID')
+
+ return self.playlist_result(
+ self._entries(username, user_id), user_id, username, thumbnail=user_info.get('avatar_url'))
+
+
+class TrillerShortIE(InfoExtractor):
+ _VALID_URL = r'https?://v\.triller\.co/(?P<id>\w+)'
+ _TESTS = [{
+ 'url': 'https://v.triller.co/WWZNWk',
+ 'md5': '5eb8dc2c971bd8cd794ec9e8d5e9d101',
+ 'info_dict': {
+ 'id': '66210052',
+ 'ext': 'mp4',
+ 'title': 'md5:2dfc89d154cd91a4a18cd9582ba03e16',
+ 'display_id': 'f4480e1f-fb4e-45b9-a44c-9e6c679ce7eb',
+ 'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
+ 'description': 'md5:2dfc89d154cd91a4a18cd9582ba03e16',
+ 'uploader': 'statefairent',
+ 'uploader_id': '487545193',
+ 'creator': 'Official Summer Fair of LA',
+ 'timestamp': 1629655457,
+ 'upload_date': '20210822',
+ 'duration': 19,
+ 'view_count': int,
+ 'like_count': int,
+ 'artist': 'Unknown',
+ 'track': 'Unknown',
+ 'uploader_url': 'https://triller.co/@statefairent',
+ 'comment_count': int,
+ },
+ }]
+
+ def _real_extract(self, url):
+ real_url = self._request_webpage(HEADRequest(url), self._match_id(url)).url
+ if self.suitable(real_url): # Prevent infinite loop in case redirect fails
+ raise UnsupportedError(real_url)
+ return self.url_result(real_url)
diff --git a/yt_dlp/extractor/trovo.py b/yt_dlp/extractor/trovo.py
new file mode 100644
index 0000000..545a672
--- /dev/null
+++ b/yt_dlp/extractor/trovo.py
@@ -0,0 +1,342 @@
+import itertools
+import json
+import random
+import string
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ format_field,
+ int_or_none,
+ str_or_none,
+ traverse_obj,
+ try_get,
+)
+
+
+class TrovoBaseIE(InfoExtractor):
+ _VALID_URL_BASE = r'https?://(?:www\.)?trovo\.live/'
+ _HEADERS = {'Origin': 'https://trovo.live'}
+
+ def _call_api(self, video_id, data):
+ if 'persistedQuery' in data.get('extensions', {}):
+ url = 'https://gql.trovo.live'
+ else:
+ url = 'https://api-web.trovo.live/graphql'
+
+ resp = self._download_json(
+ url, video_id, data=json.dumps([data]).encode(), headers={'Accept': 'application/json'},
+ query={
+ 'qid': ''.join(random.choices(string.ascii_uppercase + string.digits, k=16)),
+ })[0]
+ if 'errors' in resp:
+ raise ExtractorError(f'Trovo said: {resp["errors"][0]["message"]}')
+ return resp['data'][data['operationName']]
+
+ def _extract_streamer_info(self, data):
+ streamer_info = data.get('streamerInfo') or {}
+ username = streamer_info.get('userName')
+ return {
+ 'uploader': streamer_info.get('nickName'),
+ 'uploader_id': str_or_none(streamer_info.get('uid')),
+ 'uploader_url': format_field(username, None, 'https://trovo.live/%s'),
+ }
+
+
+class TrovoIE(TrovoBaseIE):
+ _VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?:s/)?(?!(?:clip|video)/)(?P<id>(?!s/)[^/?&#]+(?![^#]+[?&]vid=))'
+ _TESTS = [{
+ 'url': 'https://trovo.live/Exsl',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://trovo.live/s/SkenonSLive/549759191497',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://trovo.live/s/zijo987/208251706',
+ 'info_dict': {
+ 'id': '104125853_104125853_1656439572',
+ 'ext': 'flv',
+ 'uploader_url': 'https://trovo.live/zijo987',
+ 'uploader_id': '104125853',
+ 'thumbnail': 'https://livecover.trovo.live/screenshot/73846_104125853_104125853-2022-06-29-04-00-22-852x480.jpg',
+ 'uploader': 'zijo987',
+ 'title': '💥IGRAMO IGRICE UPADAJTE💥2500/5000 2022-06-28 22:01',
+ 'live_status': 'is_live',
+ },
+ 'skip': 'May not be live'
+ }]
+
+ def _real_extract(self, url):
+ username = self._match_id(url)
+ live_info = self._call_api(username, data={
+ 'operationName': 'live_LiveReaderService_GetLiveInfo',
+ 'variables': {
+ 'params': {
+ 'userName': username,
+ },
+ },
+ })
+ if live_info.get('isLive') == 0:
+ raise ExtractorError('%s is offline' % username, expected=True)
+ program_info = live_info['programInfo']
+ program_id = program_info['id']
+ title = program_info['title']
+
+ formats = []
+ for stream_info in (program_info.get('streamInfo') or []):
+ play_url = stream_info.get('playUrl')
+ if not play_url:
+ continue
+ format_id = stream_info.get('desc')
+ formats.append({
+ 'format_id': format_id,
+ 'height': int_or_none(format_id[:-1]) if format_id else None,
+ 'url': play_url,
+ 'tbr': stream_info.get('bitrate'),
+ 'http_headers': self._HEADERS,
+ })
+
+ info = {
+ 'id': program_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': program_info.get('coverUrl'),
+ 'is_live': True,
+ }
+ info.update(self._extract_streamer_info(live_info))
+ return info
+
+
+class TrovoVodIE(TrovoBaseIE):
+ _VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?:clip|video|s)/(?:[^/]+/\d+[^#]*[?&]vid=)?(?P<id>(?<!/s/)[^/?&#]+)'
+ _TESTS = [{
+ 'url': 'https://trovo.live/clip/lc-5285890818705062210?ltab=videos',
+ 'params': {'getcomments': True},
+ 'info_dict': {
+ 'id': 'lc-5285890818705062210',
+ 'ext': 'mp4',
+ 'title': 'fatal moaning for a super good🤣🤣',
+ 'uploader': 'OneTappedYou',
+ 'timestamp': 1621628019,
+ 'upload_date': '20210521',
+ 'uploader_id': '100719456',
+ 'duration': 31,
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'comments': 'mincount:1',
+ 'categories': ['Call of Duty: Mobile'],
+ 'uploader_url': 'https://trovo.live/OneTappedYou',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ },
+ }, {
+ 'url': 'https://trovo.live/s/SkenonSLive/549759191497?vid=ltv-100829718_100829718_387702301737980280',
+ 'info_dict': {
+ 'id': 'ltv-100829718_100829718_387702301737980280',
+ 'ext': 'mp4',
+ 'timestamp': 1654909624,
+ 'thumbnail': 'http://vod.trovo.live/1f09baf0vodtransger1301120758/ef9ea3f0387702301737980280/coverBySnapshot/coverBySnapshot_10_0.jpg',
+ 'uploader_id': '100829718',
+ 'uploader': 'SkenonSLive',
+ 'title': 'Trovo u secanju, uz par modova i muzike :)',
+ 'uploader_url': 'https://trovo.live/SkenonSLive',
+ 'duration': 10830,
+ 'view_count': int,
+ 'like_count': int,
+ 'upload_date': '20220611',
+ 'comment_count': int,
+ 'categories': ['Minecraft'],
+ },
+ 'skip': 'Not available',
+ }, {
+ 'url': 'https://trovo.live/s/Trovo/549756886599?vid=ltv-100264059_100264059_387702304241698583',
+ 'info_dict': {
+ 'id': 'ltv-100264059_100264059_387702304241698583',
+ 'ext': 'mp4',
+ 'timestamp': 1661479563,
+ 'thumbnail': 'http://vod.trovo.live/be5ae591vodtransusw1301120758/cccb9915387702304241698583/coverBySnapshot/coverBySnapshot_10_0.jpg',
+ 'uploader_id': '100264059',
+ 'uploader': 'Trovo',
+ 'title': 'Dev Corner 8/25',
+ 'uploader_url': 'https://trovo.live/Trovo',
+ 'duration': 3753,
+ 'view_count': int,
+ 'like_count': int,
+ 'upload_date': '20220826',
+ 'comment_count': int,
+ 'categories': ['Talk Shows'],
+ },
+ }, {
+ 'url': 'https://trovo.live/video/ltv-100095501_100095501_1609596043',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://trovo.live/s/SkenonSLive/549759191497?foo=bar&vid=ltv-100829718_100829718_387702301737980280',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ vid = self._match_id(url)
+
+ # NOTE: It is also possible to extract this info from the Nuxt data on the website,
+ # however that seems unreliable - sometimes it randomly doesn't return the data,
+ # at least when using a non-residential IP.
+ resp = self._call_api(vid, data={
+ 'operationName': 'vod_VodReaderService_BatchGetVodDetailInfo',
+ 'variables': {
+ 'params': {
+ 'vids': [vid],
+ },
+ },
+ 'extensions': {},
+ })
+
+ vod_detail_info = traverse_obj(resp, ('VodDetailInfos', vid), expected_type=dict)
+ if not vod_detail_info:
+ raise ExtractorError('This video not found or not available anymore', expected=True)
+ vod_info = vod_detail_info.get('vodInfo')
+ title = vod_info.get('title')
+
+ if try_get(vod_info, lambda x: x['playbackRights']['playbackRights'] != 'Normal'):
+ playback_rights_setting = vod_info['playbackRights']['playbackRightsSetting']
+ if playback_rights_setting == 'SubscriberOnly':
+ raise ExtractorError('This video is only available for subscribers', expected=True)
+ else:
+ raise ExtractorError(f'This video is not available ({playback_rights_setting})', expected=True)
+
+ language = vod_info.get('languageName')
+ formats = []
+ for play_info in (vod_info.get('playInfos') or []):
+ play_url = play_info.get('playUrl')
+ if not play_url:
+ continue
+ format_id = play_info.get('desc')
+ formats.append({
+ 'ext': 'mp4',
+ 'filesize': int_or_none(play_info.get('fileSize')),
+ 'format_id': format_id,
+ 'height': int_or_none(format_id[:-1]) if format_id else None,
+ 'language': language,
+ 'protocol': 'm3u8_native',
+ 'tbr': int_or_none(play_info.get('bitrate')),
+ 'url': play_url,
+ 'http_headers': self._HEADERS,
+ })
+
+ category = vod_info.get('categoryName')
+ get_count = lambda x: int_or_none(vod_info.get(x + 'Num'))
+
+ info = {
+ 'id': vid,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': vod_info.get('coverUrl'),
+ 'timestamp': int_or_none(vod_info.get('publishTs')),
+ 'duration': int_or_none(vod_info.get('duration')),
+ 'view_count': get_count('watch'),
+ 'like_count': get_count('like'),
+ 'comment_count': get_count('comment'),
+ 'categories': [category] if category else None,
+ '__post_extractor': self.extract_comments(vid),
+ }
+ info.update(self._extract_streamer_info(vod_detail_info))
+ return info
+
+ def _get_comments(self, vid):
+ for page in itertools.count(1):
+ comments_json = self._call_api(vid, data={
+ 'operationName': 'public_CommentProxyService_GetCommentList',
+ 'variables': {
+ 'params': {
+ 'appInfo': {
+ 'postID': vid,
+ },
+ 'preview': {},
+ 'pageSize': 99,
+ 'page': page,
+ },
+ },
+ 'extensions': {
+ 'singleReq': 'true',
+ },
+ })
+ for comment in comments_json['commentList']:
+ content = comment.get('content')
+ if not content:
+ continue
+ author = comment.get('author') or {}
+ parent = comment.get('parentID')
+ yield {
+ 'author': author.get('nickName'),
+ 'author_id': str_or_none(author.get('uid')),
+ 'id': str_or_none(comment.get('commentID')),
+ 'text': content,
+ 'timestamp': int_or_none(comment.get('createdAt')),
+ 'parent': 'root' if parent == 0 else str_or_none(parent),
+ }
+
+ if comments_json['lastPage']:
+ break
+
+
+class TrovoChannelBaseIE(TrovoBaseIE):
+ def _entries(self, spacename):
+ for page in itertools.count(1):
+ vod_json = self._call_api(spacename, data={
+ 'operationName': self._OPERATION,
+ 'variables': {
+ 'params': {
+ 'terminalSpaceID': {
+ 'spaceName': spacename,
+ },
+ 'currPage': page,
+ 'pageSize': 99,
+ },
+ },
+ 'extensions': {
+ 'singleReq': 'true',
+ },
+ })
+ vods = vod_json.get('vodInfos', [])
+ for vod in vods:
+ vid = vod.get('vid')
+ room = traverse_obj(vod, ('spaceInfo', 'roomID'))
+ yield self.url_result(
+ f'https://trovo.live/s/{spacename}/{room}?vid={vid}',
+ ie=TrovoVodIE.ie_key())
+ has_more = vod_json.get('hasMore')
+ if not has_more:
+ break
+
+ def _real_extract(self, url):
+ spacename = self._match_id(url)
+ return self.playlist_result(self._entries(spacename), playlist_id=spacename)
+
+
+class TrovoChannelVodIE(TrovoChannelBaseIE):
+ _VALID_URL = r'trovovod:(?P<id>[^\s]+)'
+ IE_DESC = 'All VODs of a trovo.live channel; "trovovod:" prefix'
+
+ _TESTS = [{
+ 'url': 'trovovod:OneTappedYou',
+ 'playlist_mincount': 24,
+ 'info_dict': {
+ 'id': 'OneTappedYou',
+ },
+ }]
+
+ _OPERATION = 'vod_VodReaderService_GetChannelLtvVideoInfos'
+
+
+class TrovoChannelClipIE(TrovoChannelBaseIE):
+ _VALID_URL = r'trovoclip:(?P<id>[^\s]+)'
+ IE_DESC = 'All Clips of a trovo.live channel; "trovoclip:" prefix'
+
+ _TESTS = [{
+ 'url': 'trovoclip:OneTappedYou',
+ 'playlist_mincount': 29,
+ 'info_dict': {
+ 'id': 'OneTappedYou',
+ },
+ }]
+
+ _OPERATION = 'vod_VodReaderService_GetChannelClipVideoInfos'
diff --git a/yt_dlp/extractor/trtcocuk.py b/yt_dlp/extractor/trtcocuk.py
new file mode 100644
index 0000000..f27f5a1
--- /dev/null
+++ b/yt_dlp/extractor/trtcocuk.py
@@ -0,0 +1,48 @@
+from .common import InfoExtractor
+from ..utils import ExtractorError, int_or_none, parse_iso8601, traverse_obj
+
+
+class TrtCocukVideoIE(InfoExtractor):
+ _VALID_URL = r'https?://www\.trtcocuk\.net\.tr/video/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://www.trtcocuk.net.tr/video/kaptan-pengu-ve-arkadaslari-1',
+ 'info_dict': {
+ 'id': '3789738',
+ 'ext': 'mp4',
+ 'season_number': 1,
+ 'series': '"Kaptan Pengu ve Arkadaşları"',
+ 'season': 'Season 1',
+ 'title': 'Kaptan Pengu ve Arkadaşları 1 Bölüm İzle TRT Çocuk',
+ 'release_date': '20201209',
+ 'release_timestamp': 1607513774,
+ }
+ }, {
+ 'url': 'https://www.trtcocuk.net.tr/video/sef-rokanin-lezzet-dunyasi-17',
+ 'info_dict': {
+ 'id': '10260842',
+ 'ext': 'mp4',
+ 'series': '"Şef Roka\'nın Lezzet Dünyası"',
+ 'title': 'Şef Roka\'nın Lezzet Dünyası 17 Bölüm İzle TRT Çocuk',
+ }
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ nuxtjs_data = self._search_nuxt_data(webpage, display_id)['data']
+
+ try:
+ video_url = self._parse_json(nuxtjs_data['video'], display_id)
+ except ExtractorError:
+ video_url = nuxtjs_data['video']
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_url, display_id)
+
+ return {
+ 'id': str(nuxtjs_data['id']),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'season_number': int_or_none(nuxtjs_data.get('season')),
+ 'release_timestamp': parse_iso8601(nuxtjs_data.get('publishedDate')),
+ 'series': traverse_obj(nuxtjs_data, ('show', 0, 'title')),
+ 'title': self._html_extract_title(webpage) # TODO: get better title
+ }
diff --git a/yt_dlp/extractor/trtworld.py b/yt_dlp/extractor/trtworld.py
new file mode 100644
index 0000000..dbb72a4
--- /dev/null
+++ b/yt_dlp/extractor/trtworld.py
@@ -0,0 +1,101 @@
+from .common import InfoExtractor
+from ..utils import ExtractorError, determine_ext, parse_iso8601, url_or_none
+from ..utils.traversal import traverse_obj
+
+
+class TrtWorldIE(InfoExtractor):
+ _VALID_URL = r'https?://www\.trtworld\.com/video/[\w-]+/[\w-]+-(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'https://www.trtworld.com/video/news/turkiye-switches-to-sustainable-tourism-16067690',
+ 'info_dict': {
+ 'id': '16067690',
+ 'ext': 'mp4',
+ 'title': 'Türkiye switches to sustainable tourism',
+ 'release_timestamp': 1701529569,
+ 'release_date': '20231202',
+ 'thumbnail': 'https://cdn-i.pr.trt.com.tr/trtworld/17647563_0-0-1920-1080.jpeg',
+ 'description': 'md5:0a975c04257fb529c8f99c7b76a2cf12',
+ }
+ }, {
+ 'url': 'https://www.trtworld.com/video/one-offs/frames-from-anatolia-recreating-a-james-bond-scene-in-istanbuls-grand-bazaar-14541780',
+ 'info_dict': {
+ 'id': '14541780',
+ 'ext': 'mp4',
+ 'title': 'Frames From Anatolia: Recreating a ‘James Bond’ Scene in Istanbul’s Grand Bazaar',
+ 'release_timestamp': 1692440844,
+ 'release_date': '20230819',
+ 'thumbnail': 'https://cdn-i.pr.trt.com.tr/trtworld/16939810_0-0-1920-1080.jpeg',
+ 'description': 'md5:4050e21570cc3c40b6c9badae800a94f',
+ }
+ }, {
+ 'url': 'https://www.trtworld.com/video/the-newsmakers/can-sudan-find-peace-amidst-failed-transition-to-democracy-12904760',
+ 'info_dict': {
+ 'id': '12904760',
+ 'ext': 'mp4',
+ 'title': 'Can Sudan find peace amidst failed transition to democracy?',
+ 'release_timestamp': 1681972747,
+ 'release_date': '20230420',
+ 'thumbnail': 'http://cdni0.trtworld.com/w768/q70/154214_NMYOUTUBETEMPLATE1_1681833018736.jpg'
+ }
+ }, {
+ 'url': 'https://www.trtworld.com/video/africa-matters/locals-learning-to-cope-with-rising-tides-of-kenyas-great-lakes-16059545',
+ 'info_dict': {
+ 'id': 'zEns2dWl00w',
+ 'ext': 'mp4',
+ 'title': "Locals learning to cope with rising tides of Kenya's Great Lakes",
+ 'thumbnail': 'https://i.ytimg.com/vi/zEns2dWl00w/maxresdefault.jpg',
+ 'description': 'md5:3ad9d7c5234d752a4ead4340c79c6b8d',
+ 'channel_id': 'UC7fWeaHhqgM4Ry-RMpM2YYw',
+ 'channel_url': 'https://www.youtube.com/channel/UC7fWeaHhqgM4Ry-RMpM2YYw',
+ 'duration': 210,
+ 'view_count': int,
+ 'age_limit': 0,
+ 'webpage_url': 'https://www.youtube.com/watch?v=zEns2dWl00w',
+ 'categories': ['News & Politics'],
+ 'channel': 'TRT World',
+ 'channel_follower_count': int,
+ 'channel_is_verified': True,
+ 'uploader': 'TRT World',
+ 'uploader_id': '@trtworld',
+ 'uploader_url': 'https://www.youtube.com/@trtworld',
+ 'upload_date': '20231202',
+ 'availability': 'public',
+ 'comment_count': int,
+ 'playable_in_embed': True,
+ 'tags': [],
+ 'live_status': 'not_live',
+ 'like_count': int,
+ }
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ nuxtjs_data = self._search_nuxt_data(webpage, display_id)['videoData']['content']['platforms']
+ formats = []
+ for media_url in traverse_obj(nuxtjs_data, (
+ ('website', 'ott'), 'metadata', ('hls_url', 'url'), {url_or_none})):
+ # NB: Website sometimes serves mp4 files under `hls_url` key
+ if determine_ext(media_url) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(media_url, display_id, fatal=False))
+ else:
+ formats.append({
+ 'format_id': 'http',
+ 'url': media_url,
+ })
+ if not formats:
+ if youtube_id := traverse_obj(nuxtjs_data, ('youtube', 'metadata', 'youtubeId')):
+ return self.url_result(youtube_id, 'Youtube')
+ raise ExtractorError('No video found', expected=True)
+
+ return {
+ 'id': display_id,
+ 'formats': formats,
+ **traverse_obj(nuxtjs_data, (('website', 'ott'), {
+ 'title': ('fields', 'title', 'text', {str}),
+ 'description': ('fields', 'description', 'text', {str}),
+ 'thumbnail': ('fields', 'thumbnail', 'url', {url_or_none}),
+ 'release_timestamp': ('published', 'date', {parse_iso8601}),
+ }), get_all=False),
+ }
diff --git a/yt_dlp/extractor/trueid.py b/yt_dlp/extractor/trueid.py
new file mode 100644
index 0000000..86f0990
--- /dev/null
+++ b/yt_dlp/extractor/trueid.py
@@ -0,0 +1,136 @@
+from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ parse_age_limit,
+ traverse_obj,
+ unified_timestamp,
+ url_or_none
+)
+
+
+class TrueIDIE(InfoExtractor):
+ _VALID_URL = r'https?://(?P<domain>vn\.trueid\.net|trueid\.(?:id|ph))/(?:movie|series/[^/]+)/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://trueid.id/movie/XYNlDOZZJzL6/pengabdi-setan/',
+ 'md5': '2552c7535125885901f1a2a4bcf32ca3',
+ 'info_dict': {
+ 'id': 'XYNlDOZZJzL6',
+ 'ext': 'mp4',
+ 'title': 'Pengabdi Setan',
+ 'display_id': 'pengabdi-setan',
+ 'description': 'md5:b0b41df08601e85e5291496c9bbe52cd',
+ 'timestamp': 1600243511,
+ 'categories': ['Film Indonesia', 'Horror', 'Mystery'],
+ 'release_timestamp': 1593536400,
+ 'release_year': 1982,
+ 'cast': list,
+ 'thumbnail': 'https://cms.dmpcdn.com/movie/2020/09/18/8b6e35c0-f97f-11ea-81fe-c52fc9dd314f_original.png',
+ 'upload_date': '20200916',
+ 'release_date': '20200630',
+ },
+ 'expected_warnings': ['Video is geo restricted.']
+ }, {
+ 'url': 'https://trueid.id/series/zZOBVPb62EwR/qXY73rwyl7oj/one-piece-ep-1/',
+ 'md5': '1c6d976049bc3c89a8a25aed2c3fb081',
+ 'info_dict': {
+ 'id': 'qXY73rwyl7oj',
+ 'ext': 'mp4',
+ 'title': 'One Piece Ep. 1',
+ 'display_id': 'one-piece-ep-1',
+ 'description': 'md5:13226d603bd03c4150a1cf5758e842ea',
+ 'timestamp': 1610421085,
+ 'categories': ['Animation & Cartoon', 'Kids & Family', 'Adventure'],
+ 'release_timestamp': 1612112400,
+ 'release_year': 1999,
+ 'age_limit': 7,
+ 'cast': ['Kounosuke Uda', 'Junji Shimizu'],
+ 'thumbnail': 'https://cms.dmpcdn.com/movie/2021/01/13/f84e9e70-5562-11eb-9fe2-dd6c2099a468_original.png',
+ 'upload_date': '20210112',
+ 'release_date': '20210131',
+ },
+ 'expected_warnings': ['Video is geo restricted.']
+ }, {
+ 'url': 'https://vn.trueid.net/series/7DNPM7Bpa9wv/pwLgEQ4Xbda2/haikyu-vua-bong-chuyen-phan-1/',
+ 'info_dict': {
+ 'id': 'pwLgEQ4Xbda2',
+ 'ext': 'mp4',
+ 'title': 'Haikyu!!: Vua Bóng Chuyền Phần 1 - Tập 1',
+ 'display_id': 'haikyu-vua-bong-chuyen-phan-1-tap-1',
+ 'description': 'md5:0374dd44d247799169449ee30cca963a',
+ 'timestamp': 1629270901,
+ 'categories': ['Anime', 'Phim Hài', 'Phim Học Đường', 'Phim Thể Thao', 'Shounen'],
+ 'release_timestamp': 1629270720,
+ 'release_year': 2014,
+ 'age_limit': 13,
+ 'thumbnail': 'https://cms.dmpcdn.com/movie/2021/09/28/b6e7ec00-2039-11ec-8436-974544e5841f_webp_original.jpg',
+ 'upload_date': '20210818',
+ 'release_date': '20210818',
+ },
+ 'expected_warnings': ['Video is geo restricted.']
+ }, {
+ 'url': 'https://trueid.ph/series/l8rvvAw7Jwv8/l8rvvAw7Jwv8/naruto-trailer/',
+ 'only_matching': True,
+ }]
+ _CUSTOM_RATINGS = {
+ 'PG': 7,
+ }
+
+ def _real_extract(self, url):
+ domain, video_id = self._match_valid_url(url).group('domain', 'id')
+ webpage = self._download_webpage(url, video_id)
+ initial_data = traverse_obj(
+ self._search_nextjs_data(webpage, video_id, fatal=False), ('props', 'pageProps', 'initialContentData'), default={})
+
+ try:
+ stream_data = self._download_json(
+ f'https://{domain}/cmsPostProxy/contents/video/{video_id}/streamer?os=android', video_id, data=b'')['data']
+ except ExtractorError as e:
+ if not isinstance(e.cause, HTTPError):
+ raise e
+ errmsg = self._parse_json(e.cause.response.read().decode(), video_id)['meta']['message']
+ if 'country' in errmsg:
+ self.raise_geo_restricted(
+ errmsg, [initial_data['display_country']] if initial_data.get('display_country') else None, True)
+ else:
+ self.raise_no_formats(errmsg, video_id=video_id)
+
+ if stream_data:
+ stream_url = stream_data['stream']['stream_url']
+ stream_ext = determine_ext(stream_url)
+ if stream_ext == 'm3u8':
+ formats, subs = self._extract_m3u8_formats_and_subtitles(stream_url, video_id, 'mp4')
+ elif stream_ext == 'mpd':
+ formats, subs = self._extract_mpd_formats_and_subtitles(stream_url, video_id)
+ else:
+ formats = [{'url': stream_url}]
+
+ thumbnails = [
+ {'id': thumb_key, 'url': thumb_url}
+ for thumb_key, thumb_url in (initial_data.get('thumb_list') or {}).items()
+ if url_or_none(thumb_url)]
+
+ return {
+ 'id': video_id,
+ 'title': initial_data.get('title') or self._html_search_regex(
+ [r'Nonton (?P<name>.+) Gratis',
+ r'Xem (?P<name>.+) Miễn phí',
+ r'Watch (?P<name>.+) Free'], webpage, 'title', group='name'),
+ 'display_id': initial_data.get('slug_title'),
+ 'description': initial_data.get('synopsis'),
+ 'timestamp': unified_timestamp(initial_data.get('create_date')),
+ # 'duration': int_or_none(initial_data.get('duration'), invscale=60), # duration field must atleast be accurate to the second
+ 'categories': traverse_obj(initial_data, ('article_category_details', ..., 'name')),
+ 'release_timestamp': unified_timestamp(initial_data.get('publish_date')),
+ 'release_year': int_or_none(initial_data.get('release_year')),
+ 'formats': formats,
+ 'subtitles': subs,
+ 'thumbnails': thumbnails,
+ 'age_limit': self._CUSTOM_RATINGS.get(initial_data.get('rate')) or parse_age_limit(initial_data.get('rate')),
+ 'cast': traverse_obj(initial_data, (('actor', 'director'), ...)),
+ 'view_count': int_or_none(initial_data.get('count_views')),
+ 'like_count': int_or_none(initial_data.get('count_likes')),
+ 'average_rating': int_or_none(initial_data.get('count_ratings')),
+ }
diff --git a/yt_dlp/extractor/trunews.py b/yt_dlp/extractor/trunews.py
new file mode 100644
index 0000000..d5ce86e
--- /dev/null
+++ b/yt_dlp/extractor/trunews.py
@@ -0,0 +1,32 @@
+from .common import InfoExtractor
+
+
+class TruNewsIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?trunews\.com/stream/(?P<id>[^/?#&]+)'
+ _TEST = {
+ 'url': 'https://www.trunews.com/stream/will-democrats-stage-a-circus-during-president-trump-s-state-of-the-union-speech',
+ 'info_dict': {
+ 'id': '5c5a21e65d3c196e1c0020cc',
+ 'display_id': 'will-democrats-stage-a-circus-during-president-trump-s-state-of-the-union-speech',
+ 'ext': 'mp4',
+ 'title': "Will Democrats Stage a Circus During President Trump's State of the Union Speech?",
+ 'description': 'md5:c583b72147cc92cf21f56a31aff7a670',
+ 'duration': 3685,
+ 'timestamp': 1549411440,
+ 'upload_date': '20190206',
+ },
+ 'add_ie': ['Zype'],
+ }
+ _ZYPE_TEMPL = 'https://player.zype.com/embed/%s.js?api_key=X5XnahkjCwJrT_l5zUqypnaLEObotyvtUKJWWlONxDoHVjP8vqxlArLV8llxMbyt'
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ zype_id = self._download_json(
+ 'https://api.zype.com/videos', display_id, query={
+ 'app_key': 'PUVKp9WgGUb3-JUw6EqafLx8tFVP6VKZTWbUOR-HOm__g4fNDt1bCsm_LgYf_k9H',
+ 'per_page': 1,
+ 'active': 'true',
+ 'friendly_title': display_id,
+ })['response'][0]['_id']
+ return self.url_result(self._ZYPE_TEMPL % zype_id, 'Zype', zype_id)
diff --git a/yt_dlp/extractor/truth.py b/yt_dlp/extractor/truth.py
new file mode 100644
index 0000000..51d28d1
--- /dev/null
+++ b/yt_dlp/extractor/truth.py
@@ -0,0 +1,68 @@
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ format_field,
+ int_or_none,
+ strip_or_none,
+ traverse_obj,
+ unified_timestamp,
+)
+
+
+class TruthIE(InfoExtractor):
+ _VALID_URL = r'https?://truthsocial\.com/@[^/]+/posts/(?P<id>\d+)'
+ _TESTS = [
+ {
+ 'url': 'https://truthsocial.com/@realDonaldTrump/posts/108779000807761862',
+ 'md5': '4a5fb1470c192e493d9efd6f19e514d3',
+ 'info_dict': {
+ 'id': '108779000807761862',
+ 'ext': 'qt',
+ 'title': 'Truth video #108779000807761862',
+ 'timestamp': 1659835827,
+ 'upload_date': '20220807',
+ 'uploader': 'Donald J. Trump',
+ 'uploader_id': 'realDonaldTrump',
+ 'uploader_url': 'https://truthsocial.com/@realDonaldTrump',
+ 'repost_count': int,
+ 'comment_count': int,
+ 'like_count': int,
+ },
+ },
+ {
+ 'url': 'https://truthsocial.com/@ProjectVeritasAction/posts/108618228543962049',
+ 'md5': 'fd47ba68933f9dce27accc52275be9c3',
+ 'info_dict': {
+ 'id': '108618228543962049',
+ 'ext': 'mp4',
+ 'title': 'md5:debde7186cf83f60ff7b44dbb9444e35',
+ 'description': 'md5:de2fc49045bf92bb8dc97e56503b150f',
+ 'timestamp': 1657382637,
+ 'upload_date': '20220709',
+ 'uploader': 'Project Veritas Action',
+ 'uploader_id': 'ProjectVeritasAction',
+ 'uploader_url': 'https://truthsocial.com/@ProjectVeritasAction',
+ 'repost_count': int,
+ 'comment_count': int,
+ 'like_count': int,
+ },
+ },
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ status = self._download_json(f'https://truthsocial.com/api/v1/statuses/{video_id}', video_id)
+ uploader_id = strip_or_none(traverse_obj(status, ('account', 'username')))
+ return {
+ 'id': video_id,
+ 'url': status['media_attachments'][0]['url'],
+ 'title': '',
+ 'description': strip_or_none(clean_html(status.get('content'))) or None,
+ 'timestamp': unified_timestamp(status.get('created_at')),
+ 'uploader': strip_or_none(traverse_obj(status, ('account', 'display_name'))),
+ 'uploader_id': uploader_id,
+ 'uploader_url': format_field(uploader_id, None, 'https://truthsocial.com/@%s'),
+ 'repost_count': int_or_none(status.get('reblogs_count')),
+ 'like_count': int_or_none(status.get('favourites_count')),
+ 'comment_count': int_or_none(status.get('replies_count')),
+ }
diff --git a/yt_dlp/extractor/trutv.py b/yt_dlp/extractor/trutv.py
new file mode 100644
index 0000000..ea0f2f4
--- /dev/null
+++ b/yt_dlp/extractor/trutv.py
@@ -0,0 +1,70 @@
+from .turner import TurnerBaseIE
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+)
+
+
+class TruTVIE(TurnerBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?trutv\.com/(?:shows|full-episodes)/(?P<series_slug>[0-9A-Za-z-]+)/(?:videos/(?P<clip_slug>[0-9A-Za-z-]+)|(?P<id>\d+))'
+ _TEST = {
+ 'url': 'https://www.trutv.com/shows/the-carbonaro-effect/videos/sunlight-activated-flower.html',
+ 'info_dict': {
+ 'id': 'f16c03beec1e84cd7d1a51f11d8fcc29124cc7f1',
+ 'ext': 'mp4',
+ 'title': 'Sunlight-Activated Flower',
+ 'description': "A customer is stunned when he sees Michael's sunlight-activated flower.",
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ series_slug, clip_slug, video_id = self._match_valid_url(url).groups()
+
+ if video_id:
+ path = 'episode'
+ display_id = video_id
+ else:
+ path = 'series/clip'
+ display_id = clip_slug
+
+ data = self._download_json(
+ 'https://api.trutv.com/v2/web/%s/%s/%s' % (path, series_slug, display_id),
+ display_id)
+ video_data = data['episode'] if video_id else data['info']
+ media_id = video_data['mediaId']
+ title = video_data['title'].strip()
+
+ info = self._extract_ngtv_info(
+ media_id, {}, {
+ 'url': url,
+ 'site_name': 'truTV',
+ 'auth_required': video_data.get('isAuthRequired'),
+ })
+
+ thumbnails = []
+ for image in video_data.get('images', []):
+ image_url = image.get('srcUrl')
+ if not image_url:
+ continue
+ thumbnails.append({
+ 'url': image_url,
+ 'width': int_or_none(image.get('width')),
+ 'height': int_or_none(image.get('height')),
+ })
+
+ info.update({
+ 'id': media_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': video_data.get('description'),
+ 'thumbnails': thumbnails,
+ 'timestamp': parse_iso8601(video_data.get('publicationDate')),
+ 'series': video_data.get('showTitle'),
+ 'season_number': int_or_none(video_data.get('seasonNum')),
+ 'episode_number': int_or_none(video_data.get('episodeNum')),
+ })
+ return info
diff --git a/yt_dlp/extractor/tube8.py b/yt_dlp/extractor/tube8.py
new file mode 100644
index 0000000..5f15b45
--- /dev/null
+++ b/yt_dlp/extractor/tube8.py
@@ -0,0 +1,170 @@
+import re
+
+from .common import InfoExtractor
+from ..aes import aes_decrypt_text
+from ..compat import compat_urllib_parse_unquote
+from ..utils import (
+ determine_ext,
+ format_field,
+ int_or_none,
+ str_to_int,
+ strip_or_none,
+ url_or_none,
+)
+
+
+class Tube8IE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:www\.)?tube8\.com/(?:[^/]+/)+(?P<display_id>[^/]+)/(?P<id>\d+)'
+ _EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?tube8\.com/embed/(?:[^/]+/)+\d+)']
+ _TESTS = [{
+ 'url': 'http://www.tube8.com/teen/kasia-music-video/229795/',
+ 'md5': '65e20c48e6abff62ed0c3965fff13a39',
+ 'info_dict': {
+ 'id': '229795',
+ 'display_id': 'kasia-music-video',
+ 'ext': 'mp4',
+ 'description': 'hot teen Kasia grinding',
+ 'uploader': 'unknown',
+ 'title': 'Kasia music video',
+ 'age_limit': 18,
+ 'duration': 230,
+ 'categories': ['Teen'],
+ 'tags': ['dancing'],
+ },
+ }, {
+ 'url': 'http://www.tube8.com/shemale/teen/blonde-cd-gets-kidnapped-by-two-blacks-and-punished-for-being-a-slutty-girl/19569151/',
+ 'only_matching': True,
+ }]
+
+ def _extract_info(self, url, fatal=True):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id')
+ display_id = (mobj.group('display_id')
+ if 'display_id' in mobj.groupdict()
+ else None) or mobj.group('id')
+
+ webpage = self._download_webpage(
+ url, display_id, headers={'Cookie': 'age_verified=1'})
+
+ formats = []
+ format_urls = set()
+
+ title = None
+ thumbnail = None
+ duration = None
+ encrypted = False
+
+ def extract_format(format_url, height=None):
+ format_url = url_or_none(format_url)
+ if not format_url or not format_url.startswith(('http', '//')):
+ return
+ if format_url in format_urls:
+ return
+ format_urls.add(format_url)
+ tbr = int_or_none(self._search_regex(
+ r'[/_](\d+)[kK][/_]', format_url, 'tbr', default=None))
+ if not height:
+ height = int_or_none(self._search_regex(
+ r'[/_](\d+)[pP][/_]', format_url, 'height', default=None))
+ if encrypted:
+ format_url = aes_decrypt_text(
+ video_url, title, 32).decode('utf-8')
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_field(height, None, '%dp'),
+ 'height': height,
+ 'tbr': tbr,
+ })
+
+ flashvars = self._parse_json(
+ self._search_regex(
+ r'flashvars\s*=\s*({.+?});', webpage,
+ 'flashvars', default='{}'),
+ display_id, fatal=False)
+
+ if flashvars:
+ title = flashvars.get('video_title')
+ thumbnail = flashvars.get('image_url')
+ duration = int_or_none(flashvars.get('video_duration'))
+ encrypted = flashvars.get('encrypted') is True
+ for key, value in flashvars.items():
+ mobj = re.search(r'quality_(\d+)[pP]', key)
+ if mobj:
+ extract_format(value, int(mobj.group(1)))
+ video_url = flashvars.get('video_url')
+ if video_url and determine_ext(video_url, None):
+ extract_format(video_url)
+
+ video_url = self._html_search_regex(
+ r'flashvars\.video_url\s*=\s*(["\'])(?P<url>http.+?)\1',
+ webpage, 'video url', default=None, group='url')
+ if video_url:
+ extract_format(compat_urllib_parse_unquote(video_url))
+
+ if not formats:
+ if 'title="This video is no longer available"' in webpage:
+ self.raise_no_formats(
+ 'Video %s is no longer available' % video_id, expected=True)
+
+ if not title:
+ title = self._html_search_regex(
+ r'<h1[^>]*>([^<]+)', webpage, 'title')
+
+ return webpage, {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': strip_or_none(title),
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'age_limit': 18,
+ 'formats': formats,
+ }
+
+ def _real_extract(self, url):
+ webpage, info = self._extract_info(url)
+
+ if not info['title']:
+ info['title'] = self._html_search_regex(
+ r'videoTitle\s*=\s*"([^"]+)', webpage, 'title')
+
+ description = self._html_search_regex(
+ r'(?s)Description:</dt>\s*<dd>(.+?)</dd>', webpage, 'description', fatal=False)
+ uploader = self._html_search_regex(
+ r'<span class="username">\s*(.+?)\s*<',
+ webpage, 'uploader', fatal=False)
+
+ like_count = int_or_none(self._search_regex(
+ r'rupVar\s*=\s*"(\d+)"', webpage, 'like count', fatal=False))
+ dislike_count = int_or_none(self._search_regex(
+ r'rdownVar\s*=\s*"(\d+)"', webpage, 'dislike count', fatal=False))
+ view_count = str_to_int(self._search_regex(
+ r'Views:\s*</dt>\s*<dd>([\d,\.]+)',
+ webpage, 'view count', fatal=False))
+ comment_count = str_to_int(self._search_regex(
+ r'<span id="allCommentsCount">(\d+)</span>',
+ webpage, 'comment count', fatal=False))
+
+ category = self._search_regex(
+ r'Category:\s*</dt>\s*<dd>\s*<a[^>]+href=[^>]+>([^<]+)',
+ webpage, 'category', fatal=False)
+ categories = [category] if category else None
+
+ tags_str = self._search_regex(
+ r'(?s)Tags:\s*</dt>\s*<dd>(.+?)</(?!a)',
+ webpage, 'tags', fatal=False)
+ tags = [t for t in re.findall(
+ r'<a[^>]+href=[^>]+>([^<]+)', tags_str)] if tags_str else None
+
+ info.update({
+ 'description': description,
+ 'uploader': uploader,
+ 'view_count': view_count,
+ 'like_count': like_count,
+ 'dislike_count': dislike_count,
+ 'comment_count': comment_count,
+ 'categories': categories,
+ 'tags': tags,
+ })
+
+ return info
diff --git a/yt_dlp/extractor/tubetugraz.py b/yt_dlp/extractor/tubetugraz.py
new file mode 100644
index 0000000..a351e4e
--- /dev/null
+++ b/yt_dlp/extractor/tubetugraz.py
@@ -0,0 +1,252 @@
+from .common import InfoExtractor
+from ..utils import (
+ float_or_none,
+ parse_resolution,
+ traverse_obj,
+ urlencode_postdata,
+ variadic,
+)
+
+
+class TubeTuGrazBaseIE(InfoExtractor):
+ _NETRC_MACHINE = 'tubetugraz'
+
+ _API_EPISODE = 'https://tube.tugraz.at/search/episode.json'
+ _FORMAT_TYPES = ('presentation', 'presenter')
+
+ def _perform_login(self, username, password):
+ urlh = self._request_webpage(
+ 'https://tube.tugraz.at/Shibboleth.sso/Login?target=/paella/ui/index.html',
+ None, fatal=False, note='downloading login page', errnote='unable to fetch login page')
+ if not urlh:
+ return
+
+ content, urlh = self._download_webpage_handle(
+ urlh.url, None, fatal=False, headers={'referer': urlh.url},
+ note='logging in', errnote='unable to log in',
+ data=urlencode_postdata({
+ 'lang': 'de',
+ '_eventId_proceed': '',
+ 'j_username': username,
+ 'j_password': password
+ }))
+ if not urlh or urlh.url == 'https://tube.tugraz.at/paella/ui/index.html':
+ return
+
+ if not self._html_search_regex(
+ r'<p\b[^>]*>(Bitte geben Sie einen OTP-Wert ein:)</p>',
+ content, 'TFA prompt', default=None):
+ self.report_warning('unable to login: incorrect password')
+ return
+
+ content, urlh = self._download_webpage_handle(
+ urlh.url, None, fatal=False, headers={'referer': urlh.url},
+ note='logging in with TFA', errnote='unable to log in with TFA',
+ data=urlencode_postdata({
+ 'lang': 'de',
+ '_eventId_proceed': '',
+ 'j_tokenNumber': self._get_tfa_info(),
+ }))
+ if not urlh or urlh.url == 'https://tube.tugraz.at/paella/ui/index.html':
+ return
+
+ self.report_warning('unable to login: incorrect TFA code')
+
+ def _extract_episode(self, episode_info):
+ id = episode_info.get('id')
+ formats = list(self._extract_formats(
+ traverse_obj(episode_info, ('mediapackage', 'media', 'track')), id))
+
+ title = traverse_obj(episode_info, ('mediapackage', 'title'), 'dcTitle')
+ series_title = traverse_obj(episode_info, ('mediapackage', 'seriestitle'))
+ creator = ', '.join(variadic(traverse_obj(
+ episode_info, ('mediapackage', 'creators', 'creator'), 'dcCreator', default='')))
+ return {
+ 'id': id,
+ 'title': title,
+ 'creator': creator or None,
+ 'duration': traverse_obj(episode_info, ('mediapackage', 'duration'), 'dcExtent'),
+ 'series': series_title,
+ 'series_id': traverse_obj(episode_info, ('mediapackage', 'series'), 'dcIsPartOf'),
+ 'episode': series_title and title,
+ 'formats': formats
+ }
+
+ def _set_format_type(self, formats, type):
+ for f in formats:
+ f['format_note'] = type
+ if not type.startswith(self._FORMAT_TYPES[0]):
+ f['preference'] = -2
+ return formats
+
+ def _extract_formats(self, format_list, id):
+ has_hls, has_dash = False, False
+
+ for format_info in format_list or []:
+ url = traverse_obj(format_info, ('tags', 'url'), 'url')
+ if url is None:
+ continue
+
+ type = format_info.get('type') or 'unknown'
+ transport = (format_info.get('transport') or 'https').lower()
+
+ if transport == 'https':
+ formats = [{
+ 'url': url,
+ 'abr': float_or_none(traverse_obj(format_info, ('audio', 'bitrate')), 1000),
+ 'vbr': float_or_none(traverse_obj(format_info, ('video', 'bitrate')), 1000),
+ 'fps': traverse_obj(format_info, ('video', 'framerate')),
+ **parse_resolution(traverse_obj(format_info, ('video', 'resolution'))),
+ }]
+ elif transport == 'hls':
+ has_hls, formats = True, self._extract_m3u8_formats(
+ url, id, 'mp4', fatal=False, note=f'downloading {type} HLS manifest')
+ elif transport == 'dash':
+ has_dash, formats = True, self._extract_mpd_formats(
+ url, id, fatal=False, note=f'downloading {type} DASH manifest')
+ else:
+ # RTMP, HDS, SMOOTH, and unknown formats
+ # - RTMP url fails on every tested entry until now
+ # - HDS url 404's on every tested entry until now
+ # - SMOOTH url 404's on every tested entry until now
+ continue
+
+ yield from self._set_format_type(formats, type)
+
+ # TODO: Add test for these
+ for type in self._FORMAT_TYPES:
+ if not has_hls:
+ hls_formats = self._extract_m3u8_formats(
+ f'https://wowza.tugraz.at/matterhorn_engage/smil:engage-player_{id}_{type}.smil/playlist.m3u8',
+ id, 'mp4', fatal=False, note=f'Downloading {type} HLS manifest', errnote=False) or []
+ yield from self._set_format_type(hls_formats, type)
+
+ if not has_dash:
+ dash_formats = self._extract_mpd_formats(
+ f'https://wowza.tugraz.at/matterhorn_engage/smil:engage-player_{id}_{type}.smil/manifest_mpm4sav_mvlist.mpd',
+ id, fatal=False, note=f'Downloading {type} DASH manifest', errnote=False)
+ yield from self._set_format_type(dash_formats, type)
+
+
+class TubeTuGrazIE(TubeTuGrazBaseIE):
+ IE_DESC = 'tube.tugraz.at'
+
+ _VALID_URL = r'''(?x)
+ https?://tube\.tugraz\.at/paella/ui/watch.html\?id=
+ (?P<id>[0-9a-fA-F]{8}-(?:[0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12})
+ '''
+ _TESTS = [
+ {
+ 'url': 'https://tube.tugraz.at/paella/ui/watch.html?id=f2634392-e40e-4ac7-9ddc-47764aa23d40',
+ 'md5': 'a23a3d5c9aaca2b84932fdba66e17145',
+ 'info_dict': {
+ 'id': 'f2634392-e40e-4ac7-9ddc-47764aa23d40',
+ 'ext': 'mp4',
+ 'title': '#6 (23.11.2017)',
+ 'episode': '#6 (23.11.2017)',
+ 'series': '[INB03001UF] Einführung in die strukturierte Programmierung',
+ 'creator': 'Safran C',
+ 'duration': 3295818,
+ 'series_id': 'b1192fff-2aa7-4bf0-a5cf-7b15c3bd3b34',
+ }
+ }, {
+ 'url': 'https://tube.tugraz.at/paella/ui/watch.html?id=2df6d787-e56a-428d-8ef4-d57f07eef238',
+ 'md5': 'de0d854a56bf7318d2b693fe1adb89a5',
+ 'info_dict': {
+ 'id': '2df6d787-e56a-428d-8ef4-d57f07eef238',
+ 'title': 'TubeTuGraz video #2df6d787-e56a-428d-8ef4-d57f07eef238',
+ 'ext': 'mp4',
+ },
+ 'expected_warnings': ['Extractor failed to obtain "title"'],
+ }
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ episode_data = self._download_json(
+ self._API_EPISODE, video_id, query={'id': video_id, 'limit': 1}, note='Downloading episode metadata')
+
+ episode_info = traverse_obj(episode_data, ('search-results', 'result'), default={'id': video_id})
+ return self._extract_episode(episode_info)
+
+
+class TubeTuGrazSeriesIE(TubeTuGrazBaseIE):
+ _VALID_URL = r'''(?x)
+ https?://tube\.tugraz\.at/paella/ui/browse\.html\?series=
+ (?P<id>[0-9a-fA-F]{8}-(?:[0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12})
+ '''
+ _TESTS = [{
+ 'url': 'https://tube.tugraz.at/paella/ui/browse.html?series=0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
+ 'id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
+ 'info_dict': {
+ 'id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
+ 'title': '[209351] Strassenwesen',
+ },
+ 'playlist': [
+ {
+ 'info_dict': {
+ 'id': 'ee17ce5d-34e2-48b7-a76a-fed148614e11',
+ 'series_id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
+ 'ext': 'mp4',
+ 'title': '#4 Detailprojekt',
+ 'episode': '#4 Detailprojekt',
+ 'series': '[209351] Strassenwesen',
+ 'creator': 'Neuhold R',
+ 'duration': 6127024,
+ }
+ },
+ {
+ 'info_dict': {
+ 'id': '87350498-799a-44d3-863f-d1518a98b114',
+ 'series_id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
+ 'ext': 'mp4',
+ 'title': '#3 Generelles Projekt',
+ 'episode': '#3 Generelles Projekt',
+ 'series': '[209351] Strassenwesen',
+ 'creator': 'Neuhold R',
+ 'duration': 5374422,
+ }
+ },
+ {
+ 'info_dict': {
+ 'id': '778599ea-489e-4189-9e05-3b4888e19bcd',
+ 'series_id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
+ 'ext': 'mp4',
+ 'title': '#2 Vorprojekt',
+ 'episode': '#2 Vorprojekt',
+ 'series': '[209351] Strassenwesen',
+ 'creator': 'Neuhold R',
+ 'duration': 5566404,
+ }
+ },
+ {
+ 'info_dict': {
+ 'id': '75e4c71c-d99d-4e56-b0e6-4f2bcdf11f29',
+ 'series_id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
+ 'ext': 'mp4',
+ 'title': '#1 Variantenstudium',
+ 'episode': '#1 Variantenstudium',
+ 'series': '[209351] Strassenwesen',
+ 'creator': 'Neuhold R',
+ 'duration': 5420200,
+ }
+ }
+ ],
+ 'min_playlist_count': 4
+ }]
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ episodes_data = self._download_json(self._API_EPISODE, id, query={'sid': id}, note='Downloading episode list')
+ series_data = self._download_json(
+ 'https://tube.tugraz.at/series/series.json', id, fatal=False,
+ note='downloading series metadata', errnote='failed to download series metadata',
+ query={
+ 'seriesId': id,
+ 'count': 1,
+ 'sort': 'TITLE'
+ })
+
+ return self.playlist_result(
+ map(self._extract_episode, episodes_data['search-results']['result']), id,
+ traverse_obj(series_data, ('catalogs', 0, 'http://purl.org/dc/terms/', 'title', 0, 'value')))
diff --git a/yt_dlp/extractor/tubitv.py b/yt_dlp/extractor/tubitv.py
new file mode 100644
index 0000000..bd46bc3
--- /dev/null
+++ b/yt_dlp/extractor/tubitv.py
@@ -0,0 +1,168 @@
+import re
+
+from .common import InfoExtractor
+from ..networking import Request
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ js_to_json,
+ traverse_obj,
+ urlencode_postdata,
+)
+
+
+class TubiTvIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ (?:
+ tubitv:|
+ https?://(?:www\.)?tubitv\.com/(?:video|movies|tv-shows)/
+ )
+ (?P<id>[0-9]+)'''
+ _LOGIN_URL = 'http://tubitv.com/login'
+ _NETRC_MACHINE = 'tubitv'
+ _GEO_COUNTRIES = ['US']
+ _TESTS = [{
+ 'url': 'https://tubitv.com/movies/383676/tracker',
+ 'md5': '566fa0f76870302d11af0de89511d3f0',
+ 'info_dict': {
+ 'id': '383676',
+ 'ext': 'mp4',
+ 'title': 'Tracker',
+ 'description': 'md5:ff320baf43d0ad2655e538c1d5cd9706',
+ 'uploader_id': 'f866e2677ea2f0dff719788e4f7f9195',
+ 'release_year': 2010,
+ 'thumbnail': r're:^https?://.+\.(jpe?g|png)$',
+ 'duration': 6122,
+ },
+ }, {
+ 'url': 'http://tubitv.com/video/283829/the_comedian_at_the_friday',
+ 'md5': '43ac06be9326f41912dc64ccf7a80320',
+ 'info_dict': {
+ 'id': '283829',
+ 'ext': 'mp4',
+ 'title': 'The Comedian at The Friday',
+ 'description': 'A stand up comedian is forced to look at the decisions in his life while on a one week trip to the west coast.',
+ 'uploader_id': 'bc168bee0d18dd1cb3b86c68706ab434',
+ },
+ 'skip': 'Content Unavailable'
+ }, {
+ 'url': 'http://tubitv.com/tv-shows/321886/s01_e01_on_nom_stories',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tubitv.com/movies/560057/penitentiary?start=true',
+ 'info_dict': {
+ 'id': '560057',
+ 'ext': 'mp4',
+ 'title': 'Penitentiary',
+ 'description': 'md5:8d2fc793a93cc1575ff426fdcb8dd3f9',
+ 'uploader_id': 'd8fed30d4f24fcb22ec294421b9defc2',
+ 'release_year': 1979,
+ },
+ 'skip': 'Content Unavailable'
+ }]
+
+ # DRM formats are included only to raise appropriate error
+ _UNPLAYABLE_FORMATS = ('hlsv6_widevine', 'hlsv6_widevine_nonclearlead', 'hlsv6_playready_psshv0',
+ 'hlsv6_fairplay', 'dash_widevine', 'dash_widevine_nonclearlead')
+
+ def _perform_login(self, username, password):
+ self.report_login()
+ form_data = {
+ 'username': username,
+ 'password': password,
+ }
+ payload = urlencode_postdata(form_data)
+ request = Request(self._LOGIN_URL, payload)
+ request.headers['Content-Type'] = 'application/x-www-form-urlencoded'
+ login_page = self._download_webpage(
+ request, None, False, 'Wrong login info')
+ if not re.search(r'id="tubi-logout"', login_page):
+ raise ExtractorError(
+ 'Login failed (invalid username/password)', expected=True)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video_data = self._download_json(f'https://tubitv.com/oz/videos/{video_id}/content', video_id, query={
+ 'video_resources': ['dash', 'hlsv3', 'hlsv6', *self._UNPLAYABLE_FORMATS],
+ })
+ title = video_data['title']
+
+ formats = []
+ drm_formats = False
+
+ for resource in video_data['video_resources']:
+ if resource['type'] in ('dash', ):
+ formats += self._extract_mpd_formats(resource['manifest']['url'], video_id, mpd_id=resource['type'], fatal=False)
+ elif resource['type'] in ('hlsv3', 'hlsv6'):
+ formats += self._extract_m3u8_formats(resource['manifest']['url'], video_id, 'mp4', m3u8_id=resource['type'], fatal=False)
+ elif resource['type'] in self._UNPLAYABLE_FORMATS:
+ drm_formats = True
+
+ if not formats and drm_formats:
+ self.report_drm(video_id)
+ elif not formats and not video_data.get('policy_match'): # policy_match is False if content was removed
+ raise ExtractorError('This content is currently unavailable', expected=True)
+
+ thumbnails = []
+ for thumbnail_url in video_data.get('thumbnails', []):
+ if not thumbnail_url:
+ continue
+ thumbnails.append({
+ 'url': self._proto_relative_url(thumbnail_url),
+ })
+
+ subtitles = {}
+ for sub in video_data.get('subtitles', []):
+ sub_url = sub.get('url')
+ if not sub_url:
+ continue
+ subtitles.setdefault(sub.get('lang', 'English'), []).append({
+ 'url': self._proto_relative_url(sub_url),
+ })
+
+ season_number, episode_number, episode_title = self._search_regex(
+ r'^S(\d+):E(\d+) - (.+)', title, 'episode info', fatal=False, group=(1, 2, 3), default=(None, None, None))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnails': thumbnails,
+ 'description': video_data.get('description'),
+ 'duration': int_or_none(video_data.get('duration')),
+ 'uploader_id': video_data.get('publisher_id'),
+ 'release_year': int_or_none(video_data.get('year')),
+ 'season_number': int_or_none(season_number),
+ 'episode_number': int_or_none(episode_number),
+ 'episode_title': episode_title
+ }
+
+
+class TubiTvShowIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?tubitv\.com/series/[0-9]+/(?P<show_name>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'https://tubitv.com/series/3936/the-joy-of-painting-with-bob-ross?start=true',
+ 'playlist_mincount': 390,
+ 'info_dict': {
+ 'id': 'the-joy-of-painting-with-bob-ross',
+ }
+ }]
+
+ def _entries(self, show_url, show_name):
+ show_webpage = self._download_webpage(show_url, show_name)
+
+ show_json = self._parse_json(self._search_regex(
+ r'window\.__data\s*=\s*({[^<]+});\s*</script>',
+ show_webpage, 'data'), show_name, transform_source=js_to_json)['video']
+
+ for episode_id in show_json['fullContentById'].keys():
+ if traverse_obj(show_json, ('byId', episode_id, 'type')) == 's':
+ continue
+ yield self.url_result(
+ 'tubitv:%s' % episode_id,
+ ie=TubiTvIE.ie_key(), video_id=episode_id)
+
+ def _real_extract(self, url):
+ show_name = self._match_valid_url(url).group('show_name')
+ return self.playlist_result(self._entries(url, show_name), playlist_id=show_name)
diff --git a/yt_dlp/extractor/tumblr.py b/yt_dlp/extractor/tumblr.py
new file mode 100644
index 0000000..a26bdca
--- /dev/null
+++ b/yt_dlp/extractor/tumblr.py
@@ -0,0 +1,387 @@
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ traverse_obj,
+ urlencode_postdata
+)
+
+
+class TumblrIE(InfoExtractor):
+ _VALID_URL = r'https?://(?P<blog_name>[^/?#&]+)\.tumblr\.com/(?:post|video)/(?P<id>[0-9]+)(?:$|[/?#])'
+ _NETRC_MACHINE = 'tumblr'
+ _LOGIN_URL = 'https://www.tumblr.com/login'
+ _OAUTH_URL = 'https://www.tumblr.com/api/v2/oauth2/token'
+ _TESTS = [{
+ 'url': 'http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes',
+ 'md5': '479bb068e5b16462f5176a6828829767',
+ 'info_dict': {
+ 'id': '54196191430',
+ 'ext': 'mp4',
+ 'title': 'md5:dfac39636969fe6bf1caa2d50405f069',
+ 'description': 'md5:390ab77358960235b6937ab3b8528956',
+ 'uploader_id': 'tatianamaslanydaily',
+ 'uploader_url': 'https://tatianamaslanydaily.tumblr.com/',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 127,
+ 'like_count': int,
+ 'repost_count': int,
+ 'age_limit': 0,
+ 'tags': ['Orphan Black', 'Tatiana Maslany', 'Interview', 'Video', 'OB S1 DVD Extras'],
+ }
+ }, {
+ 'note': 'multiple formats',
+ 'url': 'https://maskofthedragon.tumblr.com/post/626907179849564160/mona-talking-in-english',
+ 'md5': 'f43ff8a8861712b6cf0e0c2bd84cfc68',
+ 'info_dict': {
+ 'id': '626907179849564160',
+ 'ext': 'mp4',
+ 'title': 'Mona\xa0“talking” in\xa0“english”',
+ 'description': 'md5:082a3a621530cb786ad2b7592a6d9e2c',
+ 'uploader_id': 'maskofthedragon',
+ 'uploader_url': 'https://maskofthedragon.tumblr.com/',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 7,
+ 'like_count': int,
+ 'repost_count': int,
+ 'age_limit': 0,
+ 'tags': 'count:19',
+ },
+ 'params': {
+ 'format': 'hd',
+ },
+ }, {
+ 'note': 'non-iframe video (with related posts)',
+ 'url': 'https://shieldfoss.tumblr.com/post/675519763813908480',
+ 'md5': '12bdb75661ef443bffe5a4dac1dbf118',
+ 'info_dict': {
+ 'id': '675519763813908480',
+ 'ext': 'mp4',
+ 'title': 'Shieldfoss',
+ 'uploader_id': 'nerviovago',
+ 'uploader_url': 'https://nerviovago.tumblr.com/',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'like_count': int,
+ 'repost_count': int,
+ 'age_limit': 0,
+ 'tags': [],
+ }
+ }, {
+ 'note': 'dashboard only (original post)',
+ 'url': 'https://jujanon.tumblr.com/post/159704441298/my-baby-eating',
+ 'md5': '029f7c91ab386701b211e3d494d2d95e',
+ 'info_dict': {
+ 'id': '159704441298',
+ 'ext': 'mp4',
+ 'title': 'md5:ba79365861101f4911452728d2950561',
+ 'description': 'md5:773738196cea76b6996ec71e285bdabc',
+ 'uploader_id': 'jujanon',
+ 'uploader_url': 'https://jujanon.tumblr.com/',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'like_count': int,
+ 'repost_count': int,
+ 'age_limit': 0,
+ 'tags': ['crabs', 'my video', 'my pets'],
+ }
+ }, {
+ 'note': 'dashboard only (reblog)',
+ 'url': 'https://bartlebyshop.tumblr.com/post/180294460076/duality-of-bird',
+ 'md5': '04334e7cadb1af680d162912559f51a5',
+ 'info_dict': {
+ 'id': '180294460076',
+ 'ext': 'mp4',
+ 'title': 'duality of bird',
+ 'description': 'duality of bird',
+ 'uploader_id': 'todaysbird',
+ 'uploader_url': 'https://todaysbird.tumblr.com/',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'like_count': int,
+ 'repost_count': int,
+ 'age_limit': 0,
+ 'tags': [],
+ }
+ }, {
+ 'note': 'dashboard only (external)',
+ 'url': 'https://afloweroutofstone.tumblr.com/post/675661759168823296/the-blues-remembers-everything-the-country-forgot',
+ 'info_dict': {
+ 'id': 'q67_fd7b8SU',
+ 'ext': 'mp4',
+ 'title': 'The Blues Remembers Everything the Country Forgot',
+ 'alt_title': 'The Blues Remembers Everything the Country Forgot',
+ 'description': 'md5:1a6b4097e451216835a24c1023707c79',
+ 'release_date': '20201224',
+ 'creator': 'md5:c2239ba15430e87c3b971ba450773272',
+ 'uploader': 'Moor Mother - Topic',
+ 'upload_date': '20201223',
+ 'uploader_id': 'UCxrMtFBRkFvQJ_vVM4il08w',
+ 'uploader_url': 'http://www.youtube.com/channel/UCxrMtFBRkFvQJ_vVM4il08w',
+ 'thumbnail': r're:^https?://i.ytimg.com/.*',
+ 'channel': 'Moor Mother - Topic',
+ 'channel_id': 'UCxrMtFBRkFvQJ_vVM4il08w',
+ 'channel_url': 'https://www.youtube.com/channel/UCxrMtFBRkFvQJ_vVM4il08w',
+ 'channel_follower_count': int,
+ 'duration': 181,
+ 'view_count': int,
+ 'like_count': int,
+ 'age_limit': 0,
+ 'categories': ['Music'],
+ 'tags': 'count:7',
+ 'live_status': 'not_live',
+ 'playable_in_embed': True,
+ 'availability': 'public',
+ 'track': 'The Blues Remembers Everything the Country Forgot',
+ 'artist': 'md5:c2239ba15430e87c3b971ba450773272',
+ 'album': 'Brass',
+ 'release_year': 2020,
+ },
+ 'add_ie': ['Youtube'],
+ }, {
+ 'url': 'http://naked-yogi.tumblr.com/post/118312946248/naked-smoking-stretching',
+ 'md5': 'de07e5211d60d4f3a2c3df757ea9f6ab',
+ 'info_dict': {
+ 'id': 'Wmur',
+ 'ext': 'mp4',
+ 'title': 'naked smoking & stretching',
+ 'upload_date': '20150506',
+ 'timestamp': 1430931613,
+ 'age_limit': 18,
+ 'uploader_id': '1638622',
+ 'uploader': 'naked-yogi',
+ },
+ # 'add_ie': ['Vidme'],
+ 'skip': 'dead embedded video host'
+ }, {
+ 'url': 'https://prozdvoices.tumblr.com/post/673201091169681408/what-recording-voice-acting-sounds-like',
+ 'md5': 'a0063fc8110e6c9afe44065b4ea68177',
+ 'info_dict': {
+ 'id': 'eomhW5MLGWA',
+ 'ext': 'mp4',
+ 'title': 'what recording voice acting sounds like',
+ 'description': 'md5:1da3faa22d0e0b1d8b50216c284ee798',
+ 'uploader': 'ProZD',
+ 'upload_date': '20220112',
+ 'uploader_id': 'ProZD',
+ 'uploader_url': 'http://www.youtube.com/user/ProZD',
+ 'thumbnail': r're:^https?://i.ytimg.com/.*',
+ 'channel': 'ProZD',
+ 'channel_id': 'UC6MFZAOHXlKK1FI7V0XQVeA',
+ 'channel_url': 'https://www.youtube.com/channel/UC6MFZAOHXlKK1FI7V0XQVeA',
+ 'channel_follower_count': int,
+ 'duration': 20,
+ 'view_count': int,
+ 'like_count': int,
+ 'age_limit': 0,
+ 'categories': ['Film & Animation'],
+ 'tags': [],
+ 'live_status': 'not_live',
+ 'playable_in_embed': True,
+ 'availability': 'public',
+ },
+ 'add_ie': ['Youtube'],
+ }, {
+ 'url': 'https://dominustempori.tumblr.com/post/673572712813297664/youtubes-all-right-for-some-pretty-cool',
+ 'md5': '203e9eb8077e3f45bfaeb4c86c1467b8',
+ 'info_dict': {
+ 'id': '87816359',
+ 'ext': 'mov',
+ 'title': 'Harold Ramis',
+ 'description': 'md5:be8e68cbf56ce0785c77f0c6c6dfaf2c',
+ 'uploader': 'Resolution Productions Group',
+ 'uploader_id': 'resolutionproductions',
+ 'uploader_url': 'https://vimeo.com/resolutionproductions',
+ 'upload_date': '20140227',
+ 'thumbnail': r're:^https?://i.vimeocdn.com/video/.*',
+ 'timestamp': 1393523719,
+ 'duration': 291,
+ },
+ 'add_ie': ['Vimeo'],
+ }, {
+ 'url': 'http://sutiblr.tumblr.com/post/139638707273',
+ 'md5': '2dd184b3669e049ba40563a7d423f95c',
+ 'info_dict': {
+ 'id': 'ir7qBEIKqvq',
+ 'ext': 'mp4',
+ 'title': 'Vine by sutiblr',
+ 'alt_title': 'Vine by sutiblr',
+ 'uploader': 'sutiblr',
+ 'uploader_id': '1198993975374495744',
+ 'upload_date': '20160220',
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'timestamp': 1455940159,
+ 'view_count': int,
+ },
+ 'add_ie': ['Vine'],
+ }, {
+ 'url': 'https://silami.tumblr.com/post/84250043974/my-bad-river-flows-in-you-impression-on-maschine',
+ 'md5': '3c92d7c3d867f14ccbeefa2119022277',
+ 'info_dict': {
+ 'id': 'nYtvtTPuTl',
+ 'ext': 'mp4',
+ 'title': 'Video by silbulterman',
+ 'description': '#maschine',
+ 'uploader_id': '242859024',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'timestamp': 1398801174,
+ 'like_count': int,
+ 'uploader': 'Sil',
+ 'channel': 'silbulterman',
+ 'comment_count': int,
+ 'upload_date': '20140429',
+ },
+ 'add_ie': ['Instagram'],
+ }]
+
+ _providers = {
+ 'instagram': 'Instagram',
+ 'vimeo': 'Vimeo',
+ 'vine': 'Vine',
+ 'youtube': 'Youtube',
+ }
+
+ _ACCESS_TOKEN = None
+
+ def _initialize_pre_login(self):
+ login_page = self._download_webpage(
+ self._LOGIN_URL, None, 'Downloading login page', fatal=False)
+ if login_page:
+ self._ACCESS_TOKEN = self._search_regex(
+ r'"API_TOKEN":\s*"(\w+)"', login_page, 'API access token', fatal=False)
+ if not self._ACCESS_TOKEN:
+ self.report_warning('Failed to get access token; metadata will be missing and some videos may not work')
+
+ def _perform_login(self, username, password):
+ if not self._ACCESS_TOKEN:
+ return
+
+ self._download_json(
+ self._OAUTH_URL, None, 'Logging in',
+ data=urlencode_postdata({
+ 'password': password,
+ 'grant_type': 'password',
+ 'username': username,
+ }), headers={
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ 'Authorization': f'Bearer {self._ACCESS_TOKEN}',
+ },
+ errnote='Login failed', fatal=False)
+
+ def _real_extract(self, url):
+ blog, video_id = self._match_valid_url(url).groups()
+
+ url = f'http://{blog}.tumblr.com/post/{video_id}/'
+ webpage, urlh = self._download_webpage_handle(url, video_id)
+
+ redirect_url = urlh.url
+
+ api_only = bool(self._search_regex(
+ r'(tumblr.com|^)/(safe-mode|login_required|blog/view)',
+ redirect_url, 'redirect', default=None))
+
+ if api_only and not self._ACCESS_TOKEN:
+ raise ExtractorError('Cannot get data for dashboard-only post without access token')
+
+ post_json = {}
+ if self._ACCESS_TOKEN:
+ post_json = traverse_obj(
+ self._download_json(
+ f'https://www.tumblr.com/api/v2/blog/{blog}/posts/{video_id}/permalink',
+ video_id, headers={'Authorization': f'Bearer {self._ACCESS_TOKEN}'}, fatal=False),
+ ('response', 'timeline', 'elements', 0)) or {}
+ content_json = traverse_obj(post_json, ('trail', 0, 'content'), ('content')) or []
+ video_json = next(
+ (item for item in content_json if item.get('type') == 'video'), {})
+ media_json = video_json.get('media') or {}
+ if api_only and not media_json.get('url') and not video_json.get('url'):
+ raise ExtractorError('Failed to find video data for dashboard-only post')
+
+ if not media_json.get('url') and video_json.get('url'):
+ # external video host
+ return self.url_result(
+ video_json['url'],
+ self._providers.get(video_json.get('provider'), 'Generic'))
+
+ video_url = self._og_search_video_url(webpage, default=None)
+ duration = None
+ formats = []
+
+ # iframes can supply duration and sometimes additional formats, so check for one
+ iframe_url = self._search_regex(
+ fr'src=\'(https?://www\.tumblr\.com/video/{blog}/{video_id}/[^\']+)\'',
+ webpage, 'iframe url', default=None)
+ if iframe_url:
+ iframe = self._download_webpage(
+ iframe_url, video_id, 'Downloading iframe page',
+ headers={'Referer': redirect_url})
+
+ options = self._parse_json(
+ self._search_regex(
+ r'data-crt-options=(["\'])(?P<options>.+?)\1', iframe,
+ 'hd video url', default='', group='options'),
+ video_id, fatal=False)
+ if options:
+ duration = int_or_none(options.get('duration'))
+
+ hd_url = options.get('hdUrl')
+ if hd_url:
+ # there are multiple formats; extract them
+ # ignore other sources of width/height data as they may be wrong
+ sources = []
+ sd_url = self._search_regex(
+ r'<source[^>]+src=(["\'])(?P<url>.+?)\1', iframe,
+ 'sd video url', default=None, group='url')
+ if sd_url:
+ sources.append((sd_url, 'sd'))
+ sources.append((hd_url, 'hd'))
+
+ formats = [{
+ 'url': video_url,
+ 'format_id': format_id,
+ 'height': int_or_none(self._search_regex(
+ r'_(\d+)\.\w+$', video_url, 'height', default=None)),
+ 'quality': quality,
+ } for quality, (video_url, format_id) in enumerate(sources)]
+
+ if not media_json.get('url') and not video_url and not iframe_url:
+ # external video host (but we weren't able to figure it out from the api)
+ iframe_url = self._search_regex(
+ r'src=["\'](https?://safe\.txmblr\.com/svc/embed/inline/[^"\']+)["\']',
+ webpage, 'embed iframe url', default=None)
+ return self.url_result(iframe_url or redirect_url, 'Generic')
+
+ formats = formats or [{
+ 'url': media_json.get('url') or video_url,
+ 'width': int_or_none(
+ media_json.get('width') or self._og_search_property('video:width', webpage, default=None)),
+ 'height': int_or_none(
+ media_json.get('height') or self._og_search_property('video:height', webpage, default=None)),
+ }]
+
+ # the url we're extracting from might be an original post or it might be a reblog.
+ # if it's a reblog, og:description will be the reblogger's comment, not the uploader's.
+ # content_json is always the op, so if it exists but has no text, there's no description
+ if content_json:
+ description = '\n\n'.join((
+ item.get('text') for item in content_json if item.get('type') == 'text')) or None
+ else:
+ description = self._og_search_description(webpage, default=None)
+ uploader_id = traverse_obj(post_json, 'reblogged_root_name', 'blog_name')
+
+ return {
+ 'id': video_id,
+ 'title': post_json.get('summary') or (blog if api_only else self._html_search_regex(
+ r'(?s)<title>(?P<title>.*?)(?: \| Tumblr)?</title>', webpage, 'title')),
+ 'description': description,
+ 'thumbnail': (traverse_obj(video_json, ('poster', 0, 'url'))
+ or self._og_search_thumbnail(webpage, default=None)),
+ 'uploader_id': uploader_id,
+ 'uploader_url': f'https://{uploader_id}.tumblr.com/' if uploader_id else None,
+ 'duration': duration,
+ 'like_count': post_json.get('like_count'),
+ 'repost_count': post_json.get('reblog_count'),
+ 'age_limit': {True: 18, False: 0}.get(post_json.get('is_nsfw')),
+ 'tags': post_json.get('tags'),
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/tunein.py b/yt_dlp/extractor/tunein.py
new file mode 100644
index 0000000..fd2fe13
--- /dev/null
+++ b/yt_dlp/extractor/tunein.py
@@ -0,0 +1,234 @@
+import urllib.parse
+
+from .common import InfoExtractor
+from ..utils import (
+ OnDemandPagedList,
+ determine_ext,
+ parse_iso8601,
+ traverse_obj,
+)
+
+
+class TuneInBaseIE(InfoExtractor):
+ _VALID_URL_BASE = r'https?://(?:www\.)?tunein\.com'
+
+ def _extract_metadata(self, webpage, content_id):
+ return self._search_json(r'window.INITIAL_STATE=', webpage, 'hydration', content_id, fatal=False)
+
+ def _extract_formats_and_subtitles(self, content_id):
+ streams = self._download_json(
+ f'https://opml.radiotime.com/Tune.ashx?render=json&formats=mp3,aac,ogg,flash,hls&id={content_id}',
+ content_id)['body']
+
+ formats, subtitles = [], {}
+ for stream in streams:
+ if stream.get('media_type') == 'hls':
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(stream['url'], content_id, fatal=False)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+ elif determine_ext(stream['url']) == 'pls':
+ playlist_content = self._download_webpage(stream['url'], content_id)
+ formats.append({
+ 'url': self._search_regex(r'File1=(.*)', playlist_content, 'url', fatal=False),
+ 'abr': stream.get('bitrate'),
+ 'ext': stream.get('media_type'),
+ })
+ else:
+ formats.append({
+ 'url': stream['url'],
+ 'abr': stream.get('bitrate'),
+ 'ext': stream.get('media_type'),
+ })
+
+ return formats, subtitles
+
+
+class TuneInStationIE(TuneInBaseIE):
+ _VALID_URL = TuneInBaseIE._VALID_URL_BASE + r'(?:/radio/[^?#]+-|/embed/player/)(?P<id>s\d+)'
+ _EMBED_REGEX = [r'<iframe[^>]+src=["\'](?P<url>(?:https?://)?tunein\.com/embed/player/s\d+)']
+
+ _TESTS = [{
+ 'url': 'https://tunein.com/radio/Jazz24-885-s34682/',
+ 'info_dict': {
+ 'id': 's34682',
+ 'title': 're:^Jazz24',
+ 'description': 'md5:d6d0b89063fd68d529fa7058ee98619b',
+ 'thumbnail': 're:^https?://[^?&]+/s34682',
+ 'location': 'Seattle-Tacoma, US',
+ 'ext': 'mp3',
+ 'live_status': 'is_live',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://tunein.com/embed/player/s6404/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tunein.com/radio/BBC-Radio-1-988-s24939/',
+ 'info_dict': {
+ 'id': 's24939',
+ 'title': 're:^BBC Radio 1',
+ 'description': 'md5:f3f75f7423398d87119043c26e7bfb84',
+ 'thumbnail': 're:^https?://[^?&]+/s24939',
+ 'location': 'London, UK',
+ 'ext': 'mp3',
+ 'live_status': 'is_live',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ station_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, station_id)
+ metadata = self._extract_metadata(webpage, station_id)
+
+ formats, subtitles = self._extract_formats_and_subtitles(station_id)
+ return {
+ 'id': station_id,
+ 'title': traverse_obj(metadata, ('profiles', station_id, 'title')),
+ 'description': traverse_obj(metadata, ('profiles', station_id, 'description')),
+ 'thumbnail': traverse_obj(metadata, ('profiles', station_id, 'image')),
+ 'timestamp': parse_iso8601(
+ traverse_obj(metadata, ('profiles', station_id, 'actions', 'play', 'publishTime'))),
+ 'location': traverse_obj(
+ metadata, ('profiles', station_id, 'metadata', 'properties', 'location', 'displayName'),
+ ('profiles', station_id, 'properties', 'location', 'displayName')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'is_live': traverse_obj(metadata, ('profiles', station_id, 'actions', 'play', 'isLive')),
+ }
+
+
+class TuneInPodcastIE(TuneInBaseIE):
+ _VALID_URL = TuneInBaseIE._VALID_URL_BASE + r'/(?:podcasts/[^?#]+-|embed/player/)(?P<id>p\d+)/?(?:#|$)'
+ _EMBED_REGEX = [r'<iframe[^>]+src=["\'](?P<url>(?:https?://)?tunein\.com/embed/player/p\d+)']
+
+ _TESTS = [{
+ 'url': 'https://tunein.com/podcasts/Technology-Podcasts/Artificial-Intelligence-p1153019',
+ 'info_dict': {
+ 'id': 'p1153019',
+ 'title': 'Lex Fridman Podcast',
+ 'description': 'md5:bedc4e5f1c94f7dec6e4317b5654b00d',
+ },
+ 'playlist_mincount': 200,
+ }, {
+ 'url': 'https://tunein.com/embed/player/p191660/',
+ 'only_matching': True
+ }, {
+ 'url': 'https://tunein.com/podcasts/World-News/BBC-News-p14/',
+ 'info_dict': {
+ 'id': 'p14',
+ 'title': 'BBC News',
+ 'description': 'md5:1218e575eeaff75f48ed978261fa2068',
+ },
+ 'playlist_mincount': 200,
+ }]
+
+ _PAGE_SIZE = 30
+
+ def _real_extract(self, url):
+ podcast_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, podcast_id, fatal=False)
+ metadata = self._extract_metadata(webpage, podcast_id)
+
+ def page_func(page_num):
+ api_response = self._download_json(
+ f'https://api.tunein.com/profiles/{podcast_id}/contents', podcast_id,
+ note=f'Downloading page {page_num + 1}', query={
+ 'filter': 't:free',
+ 'offset': page_num * self._PAGE_SIZE,
+ 'limit': self._PAGE_SIZE,
+ })
+
+ return [
+ self.url_result(
+ f'https://tunein.com/podcasts/{podcast_id}?topicId={episode["GuideId"][1:]}',
+ TuneInPodcastEpisodeIE, title=episode.get('Title'))
+ for episode in api_response['Items']]
+
+ entries = OnDemandPagedList(page_func, self._PAGE_SIZE)
+ return self.playlist_result(
+ entries, playlist_id=podcast_id, title=traverse_obj(metadata, ('profiles', podcast_id, 'title')),
+ description=traverse_obj(metadata, ('profiles', podcast_id, 'description')))
+
+
+class TuneInPodcastEpisodeIE(TuneInBaseIE):
+ _VALID_URL = TuneInBaseIE._VALID_URL_BASE + r'/podcasts/(?:[^?&]+-)?(?P<podcast_id>p\d+)/?\?topicId=(?P<id>\w\d+)'
+
+ _TESTS = [{
+ 'url': 'https://tunein.com/podcasts/Technology-Podcasts/Artificial-Intelligence-p1153019/?topicId=236404354',
+ 'info_dict': {
+ 'id': 't236404354',
+ 'title': '#351 \u2013 MrBeast: Future of YouTube, Twitter, TikTok, and Instagram',
+ 'description': 'md5:e1734db6f525e472c0c290d124a2ad77',
+ 'thumbnail': 're:^https?://[^?&]+/p1153019',
+ 'timestamp': 1673458571,
+ 'upload_date': '20230111',
+ 'series_id': 'p1153019',
+ 'series': 'Lex Fridman Podcast',
+ 'ext': 'mp3',
+ },
+ }]
+
+ def _real_extract(self, url):
+ podcast_id, episode_id = self._match_valid_url(url).group('podcast_id', 'id')
+ episode_id = f't{episode_id}'
+
+ webpage = self._download_webpage(url, episode_id)
+ metadata = self._extract_metadata(webpage, episode_id)
+
+ formats, subtitles = self._extract_formats_and_subtitles(episode_id)
+ return {
+ 'id': episode_id,
+ 'title': traverse_obj(metadata, ('profiles', episode_id, 'title')),
+ 'description': traverse_obj(metadata, ('profiles', episode_id, 'description')),
+ 'thumbnail': traverse_obj(metadata, ('profiles', episode_id, 'image')),
+ 'timestamp': parse_iso8601(
+ traverse_obj(metadata, ('profiles', episode_id, 'actions', 'play', 'publishTime'))),
+ 'series_id': podcast_id,
+ 'series': traverse_obj(metadata, ('profiles', podcast_id, 'title')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+
+class TuneInShortenerIE(InfoExtractor):
+ IE_NAME = 'tunein:shortener'
+ IE_DESC = False # Do not list
+ _VALID_URL = r'https?://tun\.in/(?P<id>[A-Za-z0-9]+)'
+
+ _TEST = {
+ # test redirection
+ 'url': 'http://tun.in/ser7s',
+ 'info_dict': {
+ 'id': 's34682',
+ 'title': 're:^Jazz24',
+ 'description': 'md5:d6d0b89063fd68d529fa7058ee98619b',
+ 'thumbnail': 're:^https?://[^?&]+/s34682',
+ 'location': 'Seattle-Tacoma, US',
+ 'ext': 'mp3',
+ 'live_status': 'is_live',
+ },
+ 'params': {
+ 'skip_download': True, # live stream
+ },
+ }
+
+ def _real_extract(self, url):
+ redirect_id = self._match_id(url)
+ # The server doesn't support HEAD requests
+ urlh = self._request_webpage(
+ url, redirect_id, note='Downloading redirect page')
+
+ url = urlh.url
+ url_parsed = urllib.parse.urlparse(url)
+ if url_parsed.port == 443:
+ url = url_parsed._replace(netloc=url_parsed.hostname).url
+
+ self.to_screen('Following redirect: %s' % url)
+ return self.url_result(url)
diff --git a/yt_dlp/extractor/turner.py b/yt_dlp/extractor/turner.py
new file mode 100644
index 0000000..630d84b
--- /dev/null
+++ b/yt_dlp/extractor/turner.py
@@ -0,0 +1,256 @@
+import re
+
+from .adobepass import AdobePassIE
+from ..compat import compat_str
+from ..utils import (
+ fix_xml_ampersands,
+ xpath_text,
+ int_or_none,
+ determine_ext,
+ float_or_none,
+ parse_duration,
+ xpath_attr,
+ update_url_query,
+ ExtractorError,
+ strip_or_none,
+ url_or_none,
+)
+
+
+class TurnerBaseIE(AdobePassIE):
+ _AKAMAI_SPE_TOKEN_CACHE = {}
+
+ def _extract_timestamp(self, video_data):
+ return int_or_none(xpath_attr(video_data, 'dateCreated', 'uts'))
+
+ def _add_akamai_spe_token(self, tokenizer_src, video_url, content_id, ap_data, custom_tokenizer_query=None):
+ secure_path = self._search_regex(r'https?://[^/]+(.+/)', video_url, 'secure path') + '*'
+ token = self._AKAMAI_SPE_TOKEN_CACHE.get(secure_path)
+ if not token:
+ query = {
+ 'path': secure_path,
+ }
+ if custom_tokenizer_query:
+ query.update(custom_tokenizer_query)
+ else:
+ query['videoId'] = content_id
+ if ap_data.get('auth_required'):
+ query['accessToken'] = self._extract_mvpd_auth(ap_data['url'], content_id, ap_data['site_name'], ap_data['site_name'])
+ auth = self._download_xml(
+ tokenizer_src, content_id, query=query)
+ error_msg = xpath_text(auth, 'error/msg')
+ if error_msg:
+ raise ExtractorError(error_msg, expected=True)
+ token = xpath_text(auth, 'token')
+ if not token:
+ return video_url
+ self._AKAMAI_SPE_TOKEN_CACHE[secure_path] = token
+ return video_url + '?hdnea=' + token
+
+ def _extract_cvp_info(self, data_src, video_id, path_data={}, ap_data={}, fatal=False):
+ video_data = self._download_xml(
+ data_src, video_id,
+ transform_source=lambda s: fix_xml_ampersands(s).strip(),
+ fatal=fatal)
+ if not video_data:
+ return {}
+ video_id = video_data.attrib['id']
+ title = xpath_text(video_data, 'headline', fatal=True)
+ content_id = xpath_text(video_data, 'contentId') or video_id
+ # rtmp_src = xpath_text(video_data, 'akamai/src')
+ # if rtmp_src:
+ # split_rtmp_src = rtmp_src.split(',')
+ # if len(split_rtmp_src) == 2:
+ # rtmp_src = split_rtmp_src[1]
+ # aifp = xpath_text(video_data, 'akamai/aifp', default='')
+
+ urls = []
+ formats = []
+ thumbnails = []
+ subtitles = {}
+ rex = re.compile(
+ r'(?P<width>[0-9]+)x(?P<height>[0-9]+)(?:_(?P<bitrate>[0-9]+))?')
+ # Possible formats locations: files/file, files/groupFiles/files
+ # and maybe others
+ for video_file in video_data.findall('.//file'):
+ video_url = url_or_none(video_file.text.strip())
+ if not video_url:
+ continue
+ ext = determine_ext(video_url)
+ if video_url.startswith('/mp4:protected/'):
+ continue
+ # TODO Correct extraction for these files
+ # protected_path_data = path_data.get('protected')
+ # if not protected_path_data or not rtmp_src:
+ # continue
+ # protected_path = self._search_regex(
+ # r'/mp4:(.+)\.[a-z0-9]', video_url, 'secure path')
+ # auth = self._download_webpage(
+ # protected_path_data['tokenizer_src'], query={
+ # 'path': protected_path,
+ # 'videoId': content_id,
+ # 'aifp': aifp,
+ # })
+ # token = xpath_text(auth, 'token')
+ # if not token:
+ # continue
+ # video_url = rtmp_src + video_url + '?' + token
+ elif video_url.startswith('/secure/'):
+ secure_path_data = path_data.get('secure')
+ if not secure_path_data:
+ continue
+ video_url = self._add_akamai_spe_token(
+ secure_path_data['tokenizer_src'],
+ secure_path_data['media_src'] + video_url,
+ content_id, ap_data)
+ elif not re.match('https?://', video_url):
+ base_path_data = path_data.get(ext, path_data.get('default', {}))
+ media_src = base_path_data.get('media_src')
+ if not media_src:
+ continue
+ video_url = media_src + video_url
+ if video_url in urls:
+ continue
+ urls.append(video_url)
+ format_id = video_file.get('bitrate')
+ if ext in ('scc', 'srt', 'vtt'):
+ subtitles.setdefault('en', []).append({
+ 'ext': ext,
+ 'url': video_url,
+ })
+ elif ext == 'png':
+ thumbnails.append({
+ 'id': format_id,
+ 'url': video_url,
+ })
+ elif ext == 'smil':
+ formats.extend(self._extract_smil_formats(
+ video_url, video_id, fatal=False))
+ elif re.match(r'https?://[^/]+\.akamaihd\.net/[iz]/', video_url):
+ formats.extend(self._extract_akamai_formats(
+ video_url, video_id, {
+ 'hds': path_data.get('f4m', {}).get('host'),
+ # nba.cdn.turner.com, ht.cdn.turner.com, ht2.cdn.turner.com
+ # ht3.cdn.turner.com, i.cdn.turner.com, s.cdn.turner.com
+ # ssl.cdn.turner.com
+ 'http': 'pmd.cdn.turner.com',
+ }))
+ elif ext == 'm3u8':
+ m3u8_formats = self._extract_m3u8_formats(
+ video_url, video_id, 'mp4',
+ m3u8_id=format_id or 'hls', fatal=False)
+ if '/secure/' in video_url and '?hdnea=' in video_url:
+ for f in m3u8_formats:
+ f['downloader_options'] = {'ffmpeg_args': ['-seekable', '0']}
+ formats.extend(m3u8_formats)
+ elif ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ update_url_query(video_url, {'hdcore': '3.7.0'}),
+ video_id, f4m_id=format_id or 'hds', fatal=False))
+ else:
+ f = {
+ 'format_id': format_id,
+ 'url': video_url,
+ 'ext': ext,
+ }
+ mobj = rex.search(video_url)
+ if mobj:
+ f.update({
+ 'width': int(mobj.group('width')),
+ 'height': int(mobj.group('height')),
+ 'tbr': int_or_none(mobj.group('bitrate')),
+ })
+ elif isinstance(format_id, compat_str):
+ if format_id.isdigit():
+ f['tbr'] = int(format_id)
+ else:
+ mobj = re.match(r'ios_(audio|[0-9]+)$', format_id)
+ if mobj:
+ if mobj.group(1) == 'audio':
+ f.update({
+ 'vcodec': 'none',
+ 'ext': 'm4a',
+ })
+ else:
+ f['tbr'] = int(mobj.group(1))
+ formats.append(f)
+
+ for source in video_data.findall('closedCaptions/source'):
+ for track in source.findall('track'):
+ track_url = url_or_none(track.get('url'))
+ if not track_url or track_url.endswith('/big'):
+ continue
+ lang = track.get('lang') or track.get('label') or 'en'
+ subtitles.setdefault(lang, []).append({
+ 'url': track_url,
+ 'ext': {
+ 'scc': 'scc',
+ 'webvtt': 'vtt',
+ 'smptett': 'tt',
+ }.get(source.get('format'))
+ })
+
+ thumbnails.extend({
+ 'id': image.get('cut') or image.get('name'),
+ 'url': image.text,
+ 'width': int_or_none(image.get('width')),
+ 'height': int_or_none(image.get('height')),
+ } for image in video_data.findall('images/image'))
+
+ is_live = xpath_text(video_data, 'isLive') == 'true'
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnails': thumbnails,
+ 'thumbnail': xpath_text(video_data, 'poster'),
+ 'description': strip_or_none(xpath_text(video_data, 'description')),
+ 'duration': parse_duration(xpath_text(video_data, 'length') or xpath_text(video_data, 'trt')),
+ 'timestamp': self._extract_timestamp(video_data),
+ 'upload_date': xpath_attr(video_data, 'metas', 'version'),
+ 'series': xpath_text(video_data, 'showTitle'),
+ 'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')),
+ 'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')),
+ 'is_live': is_live,
+ }
+
+ def _extract_ngtv_info(self, media_id, tokenizer_query, ap_data=None):
+ is_live = ap_data.get('is_live')
+ streams_data = self._download_json(
+ 'http://medium.ngtv.io/media/%s/tv' % media_id,
+ media_id)['media']['tv']
+ duration = None
+ chapters = []
+ formats = []
+ for supported_type in ('unprotected', 'bulkaes'):
+ stream_data = streams_data.get(supported_type, {})
+ m3u8_url = stream_data.get('secureUrl') or stream_data.get('url')
+ if not m3u8_url:
+ continue
+ if stream_data.get('playlistProtection') == 'spe':
+ m3u8_url = self._add_akamai_spe_token(
+ 'http://token.ngtv.io/token/token_spe',
+ m3u8_url, media_id, ap_data or {}, tokenizer_query)
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, media_id, 'mp4', m3u8_id='hls', live=is_live, fatal=False))
+
+ duration = float_or_none(stream_data.get('totalRuntime'))
+
+ if not chapters and not is_live:
+ for chapter in stream_data.get('contentSegments', []):
+ start_time = float_or_none(chapter.get('start'))
+ chapter_duration = float_or_none(chapter.get('duration'))
+ if start_time is None or chapter_duration is None:
+ continue
+ chapters.append({
+ 'start_time': start_time,
+ 'end_time': start_time + chapter_duration,
+ })
+
+ return {
+ 'formats': formats,
+ 'chapters': chapters,
+ 'duration': duration,
+ }
diff --git a/yt_dlp/extractor/tv2.py b/yt_dlp/extractor/tv2.py
new file mode 100644
index 0000000..7756aa3
--- /dev/null
+++ b/yt_dlp/extractor/tv2.py
@@ -0,0 +1,324 @@
+import re
+
+from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ float_or_none,
+ js_to_json,
+ parse_iso8601,
+ remove_end,
+ strip_or_none,
+ try_get,
+)
+
+
+class TV2IE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?tv2\.no/v(?:ideo)?\d*/(?:[^?#]+/)*(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://www.tv2.no/v/1791207/',
+ 'info_dict': {
+ 'id': '1791207',
+ 'ext': 'mp4',
+ 'title': 'Her kolliderer romsonden med asteroiden ',
+ 'description': 'En romsonde har krasjet inn i en asteroide i verdensrommet. Kollisjonen skjedde klokken 01:14 natt til tirsdag 27. september norsk tid. \n\nNasa kaller det sitt første forsøk på planetforsvar.',
+ 'timestamp': 1664238190,
+ 'upload_date': '20220927',
+ 'duration': 146,
+ 'thumbnail': r're:^https://.*$',
+ 'view_count': int,
+ 'categories': list,
+ },
+ }, {
+ 'url': 'http://www.tv2.no/v2/916509',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tv2.no/video/nyhetene/her-kolliderer-romsonden-med-asteroiden/1791207/',
+ 'only_matching': True,
+ }]
+ _PROTOCOLS = ('HLS', 'DASH')
+ _GEO_COUNTRIES = ['NO']
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ asset = self._download_json('https://sumo.tv2.no/rest/assets/' + video_id, video_id,
+ 'Downloading metadata JSON')
+ title = asset['title']
+ is_live = asset.get('live') is True
+
+ formats = []
+ format_urls = []
+ for protocol in self._PROTOCOLS:
+ try:
+ data = self._download_json('https://api.sumo.tv2.no/play/%s?stream=%s' % (video_id, protocol),
+ video_id, 'Downloading playabck JSON',
+ headers={'content-type': 'application/json'},
+ data='{"device":{"id":"1-1-1","name":"Nettleser (HTML)"}}'.encode())['playback']
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 401:
+ error = self._parse_json(e.cause.response.read().decode(), video_id)['error']
+ error_code = error.get('code')
+ if error_code == 'ASSET_PLAYBACK_INVALID_GEO_LOCATION':
+ self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
+ elif error_code == 'SESSION_NOT_AUTHENTICATED':
+ self.raise_login_required()
+ raise ExtractorError(error['description'])
+ raise
+ items = data.get('streams', [])
+ for item in items:
+ video_url = item.get('url')
+ if not video_url or video_url in format_urls:
+ continue
+ format_id = '%s-%s' % (protocol.lower(), item.get('type'))
+ if not self._is_valid_url(video_url, video_id, format_id):
+ continue
+ format_urls.append(video_url)
+ ext = determine_ext(video_url)
+ if ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ video_url, video_id, f4m_id=format_id, fatal=False))
+ elif ext == 'm3u8':
+ if not data.get('drmProtected'):
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', live=is_live, m3u8_id=format_id, fatal=False))
+ elif ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ video_url, video_id, format_id, fatal=False))
+ elif ext == 'ism' or video_url.endswith('.ism/Manifest'):
+ pass
+ else:
+ formats.append({
+ 'url': video_url,
+ 'format_id': format_id,
+ })
+ if not formats and data.get('drmProtected'):
+ self.report_drm(video_id)
+
+ thumbnails = [{
+ 'id': type,
+ 'url': thumb_url,
+ } for type, thumb_url in (asset.get('images') or {}).items()]
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': strip_or_none(asset.get('description')),
+ 'thumbnails': thumbnails,
+ 'timestamp': parse_iso8601(asset.get('live_broadcast_time') or asset.get('update_time')),
+ 'duration': float_or_none(asset.get('accurateDuration') or asset.get('duration')),
+ 'view_count': int_or_none(asset.get('views')),
+ 'categories': asset.get('tags', '').split(','),
+ 'formats': formats,
+ 'is_live': is_live,
+ }
+
+
+class TV2ArticleIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?tv2\.no/(?!v(?:ideo)?\d*/)[^?#]+/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.tv2.no/underholdning/forraeder/katarina-flatland-angrer-etter-forraeder-exit/15095188/',
+ 'info_dict': {
+ 'id': '15095188',
+ 'title': 'Katarina Flatland angrer etter Forræder-exit',
+ 'description': 'SANDEFJORD (TV 2): Katarina Flatland (33) måtte følge i sine fars fotspor, da hun ble forvist fra Forræder.',
+ },
+ 'playlist_count': 2,
+ }, {
+ 'url': 'http://www.tv2.no/a/6930542',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ # Old embed pattern (looks unused nowadays)
+ assets = re.findall(r'data-assetid=["\'](\d+)', webpage)
+
+ if not assets:
+ # New embed pattern
+ for v in re.findall(r'(?s)(?:TV2ContentboxVideo|TV2\.TV2Video)\(({.+?})\)', webpage):
+ video = self._parse_json(
+ v, playlist_id, transform_source=js_to_json, fatal=False)
+ if not video:
+ continue
+ asset = video.get('assetId')
+ if asset:
+ assets.append(asset)
+
+ entries = [
+ self.url_result('http://www.tv2.no/v/%s' % asset_id, 'TV2')
+ for asset_id in assets]
+
+ title = remove_end(self._og_search_title(webpage), ' - TV2.no')
+ description = remove_end(self._og_search_description(webpage), ' - TV2.no')
+
+ return self.playlist_result(entries, playlist_id, title, description)
+
+
+class KatsomoIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:www\.)?(?:katsomo|mtv(uutiset)?)\.fi/(?:sarja/[0-9a-z-]+-\d+/[0-9a-z-]+-|(?:#!/)?jakso/(?:\d+/[^/]+/)?|video/prog)(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.mtv.fi/sarja/mtv-uutiset-live-33001002003/lahden-pelicans-teki-kovan-ratkaisun-ville-nieminen-pihalle-1181321',
+ 'info_dict': {
+ 'id': '1181321',
+ 'ext': 'mp4',
+ 'title': 'Lahden Pelicans teki kovan ratkaisun – Ville Nieminen pihalle',
+ 'description': 'Päätöksen teki Pelicansin hallitus.',
+ 'timestamp': 1575116484,
+ 'upload_date': '20191130',
+ 'duration': 37.12,
+ 'view_count': int,
+ 'categories': list,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.katsomo.fi/#!/jakso/33001005/studio55-fi/658521/jukka-kuoppamaki-tekee-yha-lauluja-vaikka-lentokoneessa',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.mtvuutiset.fi/video/prog1311159',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.katsomo.fi/#!/jakso/1311159',
+ 'only_matching': True,
+ }]
+ _API_DOMAIN = 'api.katsomo.fi'
+ _PROTOCOLS = ('HLS', 'MPD')
+ _GEO_COUNTRIES = ['FI']
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ api_base = 'http://%s/api/web/asset/%s' % (self._API_DOMAIN, video_id)
+
+ asset = self._download_json(
+ api_base + '.json', video_id,
+ 'Downloading metadata JSON')['asset']
+ title = asset.get('subtitle') or asset['title']
+ is_live = asset.get('live') is True
+
+ formats = []
+ format_urls = []
+ for protocol in self._PROTOCOLS:
+ try:
+ data = self._download_json(
+ api_base + '/play.json?protocol=%s&videoFormat=SMIL+ISMUSP' % protocol,
+ video_id, 'Downloading play JSON')['playback']
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 401:
+ error = self._parse_json(e.cause.response.read().decode(), video_id)['error']
+ error_code = error.get('code')
+ if error_code == 'ASSET_PLAYBACK_INVALID_GEO_LOCATION':
+ self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
+ elif error_code == 'SESSION_NOT_AUTHENTICATED':
+ self.raise_login_required()
+ raise ExtractorError(error['description'])
+ raise
+ items = try_get(data, lambda x: x['items']['item'])
+ if not items:
+ continue
+ if not isinstance(items, list):
+ items = [items]
+ for item in items:
+ if not isinstance(item, dict):
+ continue
+ video_url = item.get('url')
+ if not video_url or video_url in format_urls:
+ continue
+ format_id = '%s-%s' % (protocol.lower(), item.get('mediaFormat'))
+ if not self._is_valid_url(video_url, video_id, format_id):
+ continue
+ format_urls.append(video_url)
+ ext = determine_ext(video_url)
+ if ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ video_url, video_id, f4m_id=format_id, fatal=False))
+ elif ext == 'm3u8':
+ if not data.get('drmProtected'):
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', live=is_live, m3u8_id=format_id, fatal=False))
+ elif ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ video_url, video_id, format_id, fatal=False))
+ elif ext == 'ism' or video_url.endswith('.ism/Manifest'):
+ pass
+ else:
+ formats.append({
+ 'url': video_url,
+ 'format_id': format_id,
+ 'tbr': int_or_none(item.get('bitrate')),
+ 'filesize': int_or_none(item.get('fileSize')),
+ })
+ if not formats and data.get('drmProtected'):
+ self.report_drm(video_id)
+
+ thumbnails = [{
+ 'id': thumbnail.get('@type'),
+ 'url': thumbnail.get('url'),
+ } for _, thumbnail in (asset.get('imageVersions') or {}).items()]
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': strip_or_none(asset.get('description')),
+ 'thumbnails': thumbnails,
+ 'timestamp': parse_iso8601(asset.get('createTime')),
+ 'duration': float_or_none(asset.get('accurateDuration') or asset.get('duration')),
+ 'view_count': int_or_none(asset.get('views')),
+ 'categories': asset.get('keywords', '').split(','),
+ 'formats': formats,
+ 'is_live': is_live,
+ }
+
+
+class MTVUutisetArticleIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:www\.)mtvuutiset\.fi/artikkeli/[^/]+/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.mtvuutiset.fi/artikkeli/tallaisia-vaurioita-viking-amorellassa-on-useamman-osaston-alla-vetta/7931384',
+ 'info_dict': {
+ 'id': '1311159',
+ 'ext': 'mp4',
+ 'title': 'Viking Amorellan matkustajien evakuointi on alkanut – tältä operaatio näyttää laivalla',
+ 'description': 'Viking Amorellan matkustajien evakuointi on alkanut – tältä operaatio näyttää laivalla',
+ 'timestamp': 1600608966,
+ 'upload_date': '20200920',
+ 'duration': 153.7886666,
+ 'view_count': int,
+ 'categories': list,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ # multiple Youtube embeds
+ 'url': 'https://www.mtvuutiset.fi/artikkeli/50-vuotta-subarun-vastaiskua/6070962',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ article_id = self._match_id(url)
+ article = self._download_json(
+ 'http://api.mtvuutiset.fi/mtvuutiset/api/json/' + article_id,
+ article_id)
+
+ def entries():
+ for video in (article.get('videos') or []):
+ video_type = video.get('videotype')
+ video_url = video.get('url')
+ if not (video_url and video_type in ('katsomo', 'youtube')):
+ continue
+ yield self.url_result(
+ video_url, video_type.capitalize(), video.get('video_id'))
+
+ return self.playlist_result(
+ entries(), article_id, article.get('title'), article.get('description'))
diff --git a/yt_dlp/extractor/tv24ua.py b/yt_dlp/extractor/tv24ua.py
new file mode 100644
index 0000000..89905ac
--- /dev/null
+++ b/yt_dlp/extractor/tv24ua.py
@@ -0,0 +1,78 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import determine_ext, js_to_json, mimetype2ext, traverse_obj
+
+
+class TV24UAVideoIE(InfoExtractor):
+ _VALID_URL = r'https?://24tv\.ua/news/showPlayer\.do.*?(?:\?|&)objectId=(?P<id>\d+)'
+ _EMBED_REGEX = [rf'<iframe[^>]+?src=["\']?(?P<url>{_VALID_URL})["\']?']
+ IE_NAME = '24tv.ua'
+ _TESTS = [{
+ 'url': 'https://24tv.ua/news/showPlayer.do?objectId=2074790&videoUrl=2022/07/2074790&w=640&h=360',
+ 'info_dict': {
+ 'id': '2074790',
+ 'ext': 'mp4',
+ 'title': 'У Харкові ворожа ракета прилетіла в будинок, де слухали пісні про "офіцерів-росіян"',
+ 'thumbnail': r're:^https?://.*\.jpe?g',
+ }
+ }, {
+ 'url': 'https://24tv.ua/news/showPlayer.do?videoUrl=2022/07/2074790&objectId=2074790&w=640&h=360',
+ 'only_matching': True,
+ }]
+
+ _WEBPAGE_TESTS = [
+ {
+ # iframe embed created from share menu.
+ 'url': 'data:text/html,%3Ciframe%20src=%22https://24tv.ua/news/showPlayer.do?objectId=1886193&videoUrl'
+ '=2022/03/1886193&w=640&h=360%22%20width=%22640%22%20height=%22360%22%20frameborder=%220%22'
+ '%20scrolling=%22no%22%3E%3C/iframe%3E',
+ 'info_dict': {
+ 'id': '1886193',
+ 'ext': 'mp4',
+ 'title': 'Росіяни руйнують Бородянку на Київщині та стріляють з літаків по мешканцях: шокуючі фото',
+ 'thumbnail': r're:^https?://.*\.jpe?g',
+ }
+ },
+ {
+ 'url': 'https://24tv.ua/vipalyuyut-nashi-mista-sela-dsns-pokazali-motoroshni-naslidki_n1883966',
+ 'info_dict': {
+ 'id': '1883966',
+ 'ext': 'mp4',
+ 'title': 'Випалюють наші міста та села, – моторошні наслідки обстрілів на Чернігівщині',
+ 'thumbnail': r're:^https?://.*\.jpe?g',
+ },
+ 'params': {'allowed_extractors': ['Generic', '24tv.ua']},
+ }
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ formats = []
+ subtitles = {}
+ for j in re.findall(r'vPlayConfig\.sources\s*=\s*(?P<json>\[{\s*(?s:.+?)\s*}])', webpage):
+ sources = self._parse_json(j, video_id, fatal=False, ignore_extra=True, transform_source=js_to_json, errnote='') or []
+ for source in sources:
+ if mimetype2ext(traverse_obj(source, 'type')) == 'm3u8':
+ f, s = self._extract_m3u8_formats_and_subtitles(source['src'], video_id)
+ formats.extend(f)
+ self._merge_subtitles(subtitles, s)
+ else:
+ formats.append({
+ 'url': source['src'],
+ 'ext': determine_ext(source['src']),
+ })
+ thumbnail = traverse_obj(
+ self._search_json(
+ r'var\s*vPlayConfig\s*=\s*', webpage, 'thumbnail',
+ video_id, default=None, transform_source=js_to_json), 'poster')
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnail': thumbnail or self._og_search_thumbnail(webpage),
+ 'title': self._generic_title('', webpage),
+ 'description': self._og_search_description(webpage, default=None),
+ }
diff --git a/yt_dlp/extractor/tv2dk.py b/yt_dlp/extractor/tv2dk.py
new file mode 100644
index 0000000..35e92f1
--- /dev/null
+++ b/yt_dlp/extractor/tv2dk.py
@@ -0,0 +1,172 @@
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ extract_attributes,
+ js_to_json,
+ url_or_none,
+)
+
+
+class TV2DKIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?
+ (?:
+ tvsyd|
+ tv2ostjylland|
+ tvmidtvest|
+ tv2fyn|
+ tv2east|
+ tv2lorry|
+ tv2nord
+ )\.dk/
+ (:[^/]+/)*
+ (?P<id>[^/?\#&]+)
+ '''
+ _TESTS = [{
+ 'url': 'https://www.tvsyd.dk/nyheder/28-10-2019/1930/1930-28-okt-2019?autoplay=1#player',
+ 'info_dict': {
+ 'id': '0_52jmwa0p',
+ 'ext': 'mp4',
+ 'title': '19:30 - 28. okt. 2019',
+ 'timestamp': 1572290248,
+ 'upload_date': '20191028',
+ 'uploader_id': 'tvsyd',
+ 'duration': 1347,
+ 'view_count': int,
+ },
+ 'add_ie': ['Kaltura'],
+ }, {
+ 'url': 'https://www.tv2lorry.dk/gadekamp/gadekamp-6-hoejhuse-i-koebenhavn',
+ 'info_dict': {
+ 'id': '1_7iwll9n0',
+ 'ext': 'mp4',
+ 'upload_date': '20211027',
+ 'title': 'Gadekamp #6 - Højhuse i København',
+ 'uploader_id': 'tv2lorry',
+ 'timestamp': 1635345229,
+ },
+ 'add_ie': ['Kaltura'],
+ }, {
+ 'url': 'https://www.tv2ostjylland.dk/artikel/minister-gaar-ind-i-sag-om-diabetes-teknologi',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tv2ostjylland.dk/nyheder/28-10-2019/22/2200-nyhederne-mandag-d-28-oktober-2019?autoplay=1#player',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tvmidtvest.dk/nyheder/27-10-2019/1930/1930-27-okt-2019',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tv2fyn.dk/artikel/fyn-kan-faa-landets-foerste-fabrik-til-groent-jetbraendstof',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tv2east.dk/artikel/gods-faar-indleveret-tonsvis-af-aebler-100-kilo-aebler-gaar-til-en-aeblebrandy',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tv2lorry.dk/koebenhavn/rasmus-paludan-evakueret-til-egen-demonstration#player',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tv2nord.dk/artikel/dybt-uacceptabelt',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ entries = []
+
+ def add_entry(partner_id, kaltura_id):
+ entries.append(self.url_result(
+ 'kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura',
+ video_id=kaltura_id))
+
+ for video_el in re.findall(r'(?s)<[^>]+\bdata-entryid\s*=[^>]*>', webpage):
+ video = extract_attributes(video_el)
+ kaltura_id = video.get('data-entryid')
+ if not kaltura_id:
+ continue
+ partner_id = video.get('data-partnerid')
+ if not partner_id:
+ continue
+ add_entry(partner_id, kaltura_id)
+ if not entries:
+ kaltura_id = self._search_regex(
+ (r'entry_id\s*:\s*["\']([0-9a-z_]+)',
+ r'\\u002FentryId\\u002F(\w+)\\u002F'), webpage, 'kaltura id')
+ partner_id = self._search_regex(
+ (r'\\u002Fp\\u002F(\d+)\\u002F', r'/p/(\d+)/'), webpage,
+ 'partner id')
+ add_entry(partner_id, kaltura_id)
+ if len(entries) == 1:
+ return entries[0]
+ return self.playlist_result(entries)
+
+
+class TV2DKBornholmPlayIE(InfoExtractor):
+ _VALID_URL = r'https?://play\.tv2bornholm\.dk/\?.*?\bid=(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://play.tv2bornholm.dk/?area=specifikTV&id=781021',
+ 'info_dict': {
+ 'id': '781021',
+ 'ext': 'mp4',
+ 'title': '12Nyheder-27.11.19',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video = self._download_json(
+ 'https://play.tv2bornholm.dk/controls/AJAX.aspx/specifikVideo', video_id,
+ data=json.dumps({
+ 'playlist_id': video_id,
+ 'serienavn': '',
+ }).encode(), headers={
+ 'X-Requested-With': 'XMLHttpRequest',
+ 'Content-Type': 'application/json; charset=UTF-8',
+ })['d']
+
+ # TODO: generalize flowplayer
+ title = self._search_regex(
+ r'title\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', video, 'title',
+ group='value')
+ sources = self._parse_json(self._search_regex(
+ r'(?s)sources:\s*(\[.+?\]),', video, 'sources'),
+ video_id, js_to_json)
+
+ formats = []
+ srcs = set()
+ for source in sources:
+ src = url_or_none(source.get('src'))
+ if not src:
+ continue
+ if src in srcs:
+ continue
+ srcs.add(src)
+ ext = determine_ext(src)
+ src_type = source.get('type')
+ if src_type == 'application/x-mpegurl' or ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ src, video_id, ext='mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif src_type == 'application/dash+xml' or ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ src, video_id, mpd_id='dash', fatal=False))
+ else:
+ formats.append({
+ 'url': src,
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/tv2hu.py b/yt_dlp/extractor/tv2hu.py
new file mode 100644
index 0000000..9c0a111
--- /dev/null
+++ b/yt_dlp/extractor/tv2hu.py
@@ -0,0 +1,104 @@
+# encoding: utf-8
+from .common import InfoExtractor
+from ..utils import (
+ traverse_obj,
+ UnsupportedError,
+)
+
+
+class TV2HuIE(InfoExtractor):
+ IE_NAME = 'tv2play.hu'
+ _VALID_URL = r'https?://(?:www\.)?tv2play\.hu/(?!szalag/)(?P<id>[^#&?]+)'
+ _TESTS = [{
+ 'url': 'https://tv2play.hu/mintaapak/mintaapak_213_epizod_resz',
+ 'info_dict': {
+ 'id': '249240',
+ 'ext': 'mp4',
+ 'title': 'Mintaapák - 213. epizód',
+ 'series': 'Mintaapák',
+ 'duration': 2164,
+ 'description': 'md5:7350147e75485a59598e806c47967b07',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'release_date': '20210825',
+ 'episode_number': 213,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://tv2play.hu/taxi_2',
+ 'md5': '585e58e2e090f34603804bb2c48e98d8',
+ 'info_dict': {
+ 'id': '199363',
+ 'ext': 'mp4',
+ 'title': 'Taxi 2',
+ 'series': 'Taxi 2',
+ 'duration': 5087,
+ 'description': 'md5:47762155dc9a50241797ded101b1b08c',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'release_date': '20210118',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ json_data = self._download_json(f'https://tv2play.hu/api/search/{id}', id)
+
+ if json_data['contentType'] == 'showpage':
+ ribbon_ids = traverse_obj(json_data, ('pages', ..., 'tabs', ..., 'ribbonIds'), get_all=False, expected_type=list)
+ entries = [self.url_result(f'https://tv2play.hu/szalag/{ribbon_id}',
+ ie=TV2HuSeriesIE.ie_key(), video_id=ribbon_id) for ribbon_id in ribbon_ids]
+ return self.playlist_result(entries, playlist_id=id)
+ elif json_data['contentType'] != 'video':
+ raise UnsupportedError(url)
+
+ video_id = str(json_data['id'])
+ player_id = json_data.get('playerId')
+ series_json = json_data.get('seriesInfo', {})
+
+ video_json_url = self._download_json(f'https://tv2play.hu/api/streaming-url?playerId={player_id}', video_id)['url']
+ video_json = self._download_json(video_json_url, video_id)
+ m3u8_url = self._proto_relative_url(traverse_obj(video_json, ('bitrates', 'hls')))
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id)
+
+ return {
+ 'id': video_id,
+ 'title': json_data['title'],
+ 'series': json_data.get('seriesTitle'),
+ 'duration': json_data.get('length'),
+ 'description': json_data.get('description'),
+ 'thumbnail': 'https://tv2play.hu' + json_data.get('thumbnailUrl'),
+ 'release_date': json_data.get('uploadedAt').replace('.', ''),
+ 'season_number': series_json.get('seasonNr'),
+ 'episode_number': series_json.get('episodeNr'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+
+class TV2HuSeriesIE(InfoExtractor):
+ IE_NAME = 'tv2playseries.hu'
+ _VALID_URL = r'https?://(?:www\.)?tv2play\.hu/szalag/(?P<id>[^#&?]+)'
+
+ _TESTS = [{
+ 'url': 'https://tv2play.hu/szalag/59?rendezes=nepszeruseg',
+ 'playlist_mincount': 284,
+ 'info_dict': {
+ 'id': '59',
+ }
+ }]
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ json_data = self._download_json(f'https://tv2play.hu/api/ribbons/{id}/0?size=100000', id)
+ entries = []
+ for card in json_data.get('cards', []):
+ video_id = card.get('slug')
+ if video_id:
+ entries.append(self.url_result(f'https://tv2play.hu/{video_id}',
+ ie=TV2HuIE.ie_key(), video_id=video_id))
+
+ return self.playlist_result(entries, playlist_id=id)
diff --git a/yt_dlp/extractor/tv4.py b/yt_dlp/extractor/tv4.py
new file mode 100644
index 0000000..10a2fe6
--- /dev/null
+++ b/yt_dlp/extractor/tv4.py
@@ -0,0 +1,149 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ bool_or_none,
+ int_or_none,
+ parse_iso8601,
+ traverse_obj,
+ url_or_none,
+)
+
+
+class TV4IE(InfoExtractor):
+ IE_DESC = 'tv4.se and tv4play.se'
+ _VALID_URL = r'''(?x)https?://(?:www\.)?
+ (?:
+ tv4\.se/(?:[^/]+)/klipp/(?:.*)-|
+ tv4play\.se/
+ (?:
+ (?:program|barn)/(?:(?:[^/]+/){1,2}|(?:[^\?]+)\?video_id=)|
+ iframe/video/|
+ film/|
+ sport/|
+ )
+ )(?P<id>[0-9]+)'''
+ _GEO_BYPASS = False
+ _TESTS = [
+ {
+ # not geo-restricted
+ 'url': 'http://www.tv4.se/kalla-fakta/klipp/kalla-fakta-5-english-subtitles-2491650',
+ 'md5': 'cb837212f342d77cec06e6dad190e96d',
+ 'info_dict': {
+ 'id': '2491650',
+ 'ext': 'mp4',
+ 'title': 'Kalla Fakta 5 (english subtitles)',
+ 'description': '2491650',
+ 'series': 'Kalla fakta',
+ 'duration': 1335,
+ 'thumbnail': r're:^https?://[^/?#]+/api/v2/img/',
+ 'timestamp': 1385373240,
+ 'upload_date': '20131125',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ 'expected_warnings': ['Unable to download f4m manifest'],
+ },
+ {
+ 'url': 'http://www.tv4play.se/iframe/video/3054113',
+ 'md5': 'cb837212f342d77cec06e6dad190e96d',
+ 'info_dict': {
+ 'id': '3054113',
+ 'ext': 'mp4',
+ 'title': 'Så här jobbar ficktjuvarna - se avslöjande bilder',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'Unika bilder avslöjar hur turisternas fickor vittjas mitt på Stockholms central. Två experter på ficktjuvarna avslöjar knepen du ska se upp för.',
+ 'timestamp': int,
+ 'upload_date': '20150130',
+ },
+ 'skip': '404 Not Found',
+ },
+ {
+ 'url': 'http://www.tv4play.se/sport/3060959',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://www.tv4play.se/film/2378136',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://www.tv4play.se/barn/looney-tunes?video_id=3062412',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://www.tv4play.se/program/farang/3922081',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://www.tv4play.se/program/nyheterna/avsnitt/13315940',
+ 'only_matching': True,
+ }
+ ]
+
+ def _call_api(self, endpoint, video_id, headers=None, query={}):
+ return self._download_json(
+ f'https://playback2.a2d.tv/{endpoint}/{video_id}', video_id,
+ f'Downloading {endpoint} API JSON', headers=headers, query={
+ 'service': 'tv4',
+ 'device': 'browser',
+ 'protocol': 'hls',
+ **query,
+ })
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ info = traverse_obj(self._call_api('asset', video_id, query={
+ 'protocol': 'hls,dash',
+ 'drm': 'widevine',
+ }), ('metadata', {dict})) or {}
+
+ manifest_url = self._call_api(
+ 'play', video_id, headers=self.geo_verification_headers())['playbackItem']['manifestUrl']
+
+ formats, subtitles = [], {}
+
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ manifest_url, video_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False)
+ formats.extend(fmts)
+ subtitles = self._merge_subtitles(subtitles, subs)
+
+ fmts, subs = self._extract_mpd_formats_and_subtitles(
+ manifest_url.replace('.m3u8', '.mpd'),
+ video_id, mpd_id='dash', fatal=False)
+ formats.extend(fmts)
+ subtitles = self._merge_subtitles(subtitles, subs)
+
+ fmts = self._extract_f4m_formats(
+ manifest_url.replace('.m3u8', '.f4m'),
+ video_id, f4m_id='hds', fatal=False)
+ formats.extend(fmts)
+
+ fmts, subs = self._extract_ism_formats_and_subtitles(
+ re.sub(r'\.ism/.*?\.m3u8', r'.ism/Manifest', manifest_url),
+ video_id, ism_id='mss', fatal=False)
+ formats.extend(fmts)
+ subtitles = self._merge_subtitles(subtitles, subs)
+
+ if not formats and info.get('is_geo_restricted'):
+ self.raise_geo_restricted(
+ 'This video is not available from your location due to geo-restriction, or not being authenticated',
+ countries=['SE'])
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ **traverse_obj(info, {
+ 'title': ('title', {str}),
+ 'description': ('description', {str}),
+ 'timestamp': (('broadcast_date_time', 'broadcastDateTime'), {parse_iso8601}),
+ 'duration': ('duration', {int_or_none}),
+ 'thumbnail': ('image', {url_or_none}),
+ 'is_live': ('isLive', {bool_or_none}),
+ 'series': ('seriesTitle', {str}),
+ 'season_number': ('seasonNumber', {int_or_none}),
+ 'episode': ('episodeTitle', {str}),
+ 'episode_number': ('episodeNumber', {int_or_none}),
+ }, get_all=False),
+ }
diff --git a/yt_dlp/extractor/tv5mondeplus.py b/yt_dlp/extractor/tv5mondeplus.py
new file mode 100644
index 0000000..a445fae
--- /dev/null
+++ b/yt_dlp/extractor/tv5mondeplus.py
@@ -0,0 +1,190 @@
+import urllib.parse
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ extract_attributes,
+ int_or_none,
+ parse_duration,
+ traverse_obj,
+ try_get,
+ url_or_none,
+)
+
+
+class TV5MondePlusIE(InfoExtractor):
+ IE_DESC = 'TV5MONDE+'
+ _VALID_URL = r'https?://(?:www\.)?(?:tv5mondeplus|revoir\.tv5monde)\.com/toutes-les-videos/[^/]+/(?P<id>[^/?#]+)'
+ _TESTS = [{
+ # movie
+ 'url': 'https://revoir.tv5monde.com/toutes-les-videos/cinema/les-novices',
+ 'md5': 'c86f60bf8b75436455b1b205f9745955',
+ 'info_dict': {
+ 'id': 'ZX0ipMyFQq_6D4BA7b',
+ 'display_id': 'les-novices',
+ 'ext': 'mp4',
+ 'title': 'Les novices',
+ 'description': 'md5:2e7c33ba3ad48dabfcc2a956b88bde2b',
+ 'upload_date': '20230821',
+ 'thumbnail': 'https://revoir.tv5monde.com/uploads/media/video_thumbnail/0738/60/01e952b7ccf36b7c6007ec9131588954ab651de9.jpeg',
+ 'duration': 5177,
+ 'episode': 'Les novices',
+ },
+ }, {
+ # series episode
+ 'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/opj-les-dents-de-la-terre-2',
+ 'info_dict': {
+ 'id': 'wJ0eeEPozr_6D4BA7b',
+ 'display_id': 'opj-les-dents-de-la-terre-2',
+ 'ext': 'mp4',
+ 'title': "OPJ - Les dents de la Terre (2)",
+ 'description': 'md5:288f87fd68d993f814e66e60e5302d9d',
+ 'upload_date': '20230823',
+ 'series': 'OPJ',
+ 'episode': 'Les dents de la Terre (2)',
+ 'duration': 2877,
+ 'thumbnail': 'https://dl-revoir.tv5monde.com/images/1a/5753448.jpg'
+ },
+ }, {
+ # movie
+ 'url': 'https://revoir.tv5monde.com/toutes-les-videos/cinema/ceux-qui-travaillent',
+ 'md5': '32fa0cde16a4480d1251502a66856d5f',
+ 'info_dict': {
+ 'id': 'dc57a011-ec4b-4648-2a9a-4f03f8352ed3',
+ 'display_id': 'ceux-qui-travaillent',
+ 'ext': 'mp4',
+ 'title': 'Ceux qui travaillent',
+ 'description': 'md5:570e8bb688036ace873b2d50d24c026d',
+ 'upload_date': '20210819',
+ },
+ 'skip': 'no longer available',
+ }, {
+ # series episode
+ 'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/vestiaires-caro-actrice',
+ 'info_dict': {
+ 'id': '9e9d599e-23af-6915-843e-ecbf62e97925',
+ 'display_id': 'vestiaires-caro-actrice',
+ 'ext': 'mp4',
+ 'title': "Vestiaires - Caro actrice",
+ 'description': 'md5:db15d2e1976641e08377f942778058ea',
+ 'upload_date': '20210819',
+ 'series': "Vestiaires",
+ 'episode': 'Caro actrice',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'no longer available',
+ }, {
+ 'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/neuf-jours-en-hiver-neuf-jours-en-hiver',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://revoir.tv5monde.com/toutes-les-videos/info-societe/le-journal-de-la-rts-edition-du-30-01-20-19h30',
+ 'only_matching': True,
+ }]
+ _GEO_BYPASS = False
+
+ @staticmethod
+ def _extract_subtitles(data_captions):
+ subtitles = {}
+ for f in traverse_obj(data_captions, ('files', lambda _, v: url_or_none(v['file']))):
+ subtitles.setdefault(f.get('label') or 'fra', []).append({'url': f['file']})
+ return subtitles
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ if ">Ce programme n'est malheureusement pas disponible pour votre zone géographique.<" in webpage:
+ self.raise_geo_restricted(countries=['FR'])
+
+ title = episode = self._html_search_regex(r'<h1>([^<]+)', webpage, 'title')
+ vpl_data = extract_attributes(self._search_regex(
+ r'(<[^>]+class="video_player_loader"[^>]+>)',
+ webpage, 'video player loader'))
+
+ video_files = self._parse_json(
+ vpl_data['data-broadcast'], display_id)
+ formats = []
+ video_id = None
+
+ def process_video_files(v):
+ nonlocal video_id
+ for video_file in v:
+ v_url = video_file.get('url')
+ if not v_url:
+ continue
+ if video_file.get('type') == 'application/deferred':
+ d_param = urllib.parse.quote(v_url)
+ token = video_file.get('token')
+ if not token:
+ continue
+ deferred_json = self._download_json(
+ f'https://api.tv5monde.com/player/asset/{d_param}/resolve?condenseKS=true', display_id,
+ note='Downloading deferred info', headers={'Authorization': f'Bearer {token}'}, fatal=False)
+ v_url = traverse_obj(deferred_json, (0, 'url', {url_or_none}))
+ if not v_url:
+ continue
+ # data-guid from the webpage isn't stable, use the material id from the json urls
+ video_id = self._search_regex(
+ r'materials/([\da-zA-Z]{10}_[\da-fA-F]{7})/', v_url, 'video id', default=None)
+ process_video_files(deferred_json)
+
+ video_format = video_file.get('format') or determine_ext(v_url)
+ if video_format == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ v_url, display_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif video_format == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ v_url, display_id, fatal=False))
+ else:
+ formats.append({
+ 'url': v_url,
+ 'format_id': video_format,
+ })
+
+ process_video_files(video_files)
+
+ metadata = self._parse_json(
+ vpl_data['data-metadata'], display_id)
+ duration = (int_or_none(try_get(metadata, lambda x: x['content']['duration']))
+ or parse_duration(self._html_search_meta('duration', webpage)))
+
+ description = self._html_search_regex(
+ r'(?s)<div[^>]+class=["\']episode-texte[^>]+>(.+?)</div>', webpage,
+ 'description', fatal=False)
+
+ series = self._html_search_regex(
+ r'<p[^>]+class=["\']episode-emission[^>]+>([^<]+)', webpage,
+ 'series', default=None)
+
+ if series and series != title:
+ title = '%s - %s' % (series, title)
+
+ upload_date = self._search_regex(
+ r'(?:date_publication|publish_date)["\']\s*:\s*["\'](\d{4}_\d{2}_\d{2})',
+ webpage, 'upload date', default=None)
+ if upload_date:
+ upload_date = upload_date.replace('_', '')
+
+ if not video_id:
+ video_id = self._search_regex(
+ (r'data-guid=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
+ r'id_contenu["\']\s:\s*(\d+)'), webpage, 'video id',
+ default=display_id)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': vpl_data.get('data-image'),
+ 'duration': duration,
+ 'upload_date': upload_date,
+ 'formats': formats,
+ 'subtitles': self._extract_subtitles(self._parse_json(
+ traverse_obj(vpl_data, ('data-captions', {str}), default='{}'), display_id, fatal=False)),
+ 'series': series,
+ 'episode': episode,
+ }
diff --git a/yt_dlp/extractor/tv5unis.py b/yt_dlp/extractor/tv5unis.py
new file mode 100644
index 0000000..978255b
--- /dev/null
+++ b/yt_dlp/extractor/tv5unis.py
@@ -0,0 +1,116 @@
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_age_limit,
+ smuggle_url,
+ try_get,
+)
+
+
+class TV5UnisBaseIE(InfoExtractor):
+ _GEO_COUNTRIES = ['CA']
+
+ def _real_extract(self, url):
+ groups = self._match_valid_url(url).groups()
+ product = self._download_json(
+ 'https://api.tv5unis.ca/graphql', groups[0], query={
+ 'query': '''{
+ %s(%s) {
+ collection {
+ title
+ }
+ episodeNumber
+ rating {
+ name
+ }
+ seasonNumber
+ tags
+ title
+ videoElement {
+ ... on Video {
+ mediaId
+ }
+ }
+ }
+}''' % (self._GQL_QUERY_NAME, self._gql_args(groups)),
+ })['data'][self._GQL_QUERY_NAME]
+ media_id = product['videoElement']['mediaId']
+
+ return {
+ '_type': 'url_transparent',
+ 'id': media_id,
+ 'title': product.get('title'),
+ 'url': smuggle_url('limelight:media:' + media_id, {'geo_countries': self._GEO_COUNTRIES}),
+ 'age_limit': parse_age_limit(try_get(product, lambda x: x['rating']['name'])),
+ 'tags': product.get('tags'),
+ 'series': try_get(product, lambda x: x['collection']['title']),
+ 'season_number': int_or_none(product.get('seasonNumber')),
+ 'episode_number': int_or_none(product.get('episodeNumber')),
+ 'ie_key': 'LimelightMedia',
+ }
+
+
+class TV5UnisVideoIE(TV5UnisBaseIE):
+ IE_NAME = 'tv5unis:video'
+ _VALID_URL = r'https?://(?:www\.)?tv5unis\.ca/videos/[^/]+/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://www.tv5unis.ca/videos/bande-annonces/71843',
+ 'md5': '3d794164928bda97fb87a17e89923d9b',
+ 'info_dict': {
+ 'id': 'a883684aecb2486cad9bdc7bbe17f861',
+ 'ext': 'mp4',
+ 'title': 'Watatatow',
+ 'duration': 10.01,
+ }
+ }
+ _GQL_QUERY_NAME = 'productById'
+
+ @staticmethod
+ def _gql_args(groups):
+ return 'id: %s' % groups
+
+
+class TV5UnisIE(TV5UnisBaseIE):
+ IE_NAME = 'tv5unis'
+ _VALID_URL = r'https?://(?:www\.)?tv5unis\.ca/videos/(?P<id>[^/]+)(?:/saisons/(?P<season_number>\d+)/episodes/(?P<episode_number>\d+))?/?(?:[?#&]|$)'
+ _TESTS = [{
+ 'url': 'https://www.tv5unis.ca/videos/watatatow/saisons/6/episodes/1',
+ 'md5': 'a479907d2e531a73e1f8dc48d6388d02',
+ 'info_dict': {
+ 'id': 'e5ee23a586c44612a56aad61accf16ef',
+ 'ext': 'mp4',
+ 'title': 'Je ne peux pas lui résister',
+ 'description': "Atys, le nouveau concierge de l'école, a réussi à ébranler la confiance de Mado en affirmant qu\'une médaille, ce n'est que du métal. Comme Mado essaie de lui prouver que ses valeurs sont solides, il veut la mettre à l'épreuve...",
+ 'subtitles': {
+ 'fr': 'count:1',
+ },
+ 'duration': 1370,
+ 'age_limit': 8,
+ 'tags': 'count:3',
+ 'series': 'Watatatow',
+ 'season_number': 6,
+ 'episode_number': 1,
+ },
+ }, {
+ 'url': 'https://www.tv5unis.ca/videos/le-voyage-de-fanny',
+ 'md5': '9ca80ebb575c681d10cae1adff3d4774',
+ 'info_dict': {
+ 'id': '726188eefe094d8faefb13381d42bc06',
+ 'ext': 'mp4',
+ 'title': 'Le voyage de Fanny',
+ 'description': "Fanny, 12 ans, cachée dans un foyer loin de ses parents, s'occupe de ses deux soeurs. Devant fuir, Fanny prend la tête d'un groupe de huit enfants et s'engage dans un dangereux périple à travers la France occupée pour rejoindre la frontière suisse.",
+ 'subtitles': {
+ 'fr': 'count:1',
+ },
+ 'duration': 5587.034,
+ 'tags': 'count:4',
+ },
+ }]
+ _GQL_QUERY_NAME = 'productByRootProductSlug'
+
+ @staticmethod
+ def _gql_args(groups):
+ args = 'rootProductSlug: "%s"' % groups[0]
+ if groups[1]:
+ args += ', seasonNumber: %s, episodeNumber: %s' % groups[1:]
+ return args
diff --git a/yt_dlp/extractor/tva.py b/yt_dlp/extractor/tva.py
new file mode 100644
index 0000000..9afe233
--- /dev/null
+++ b/yt_dlp/extractor/tva.py
@@ -0,0 +1,85 @@
+from .common import InfoExtractor
+from ..utils import (
+ float_or_none,
+ int_or_none,
+ smuggle_url,
+ strip_or_none,
+)
+
+
+class TVAIE(InfoExtractor):
+ _VALID_URL = r'https?://videos?\.tva\.ca/details/_(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://videos.tva.ca/details/_5596811470001',
+ 'info_dict': {
+ 'id': '5596811470001',
+ 'ext': 'mp4',
+ 'title': 'Un extrait de l\'épisode du dimanche 8 octobre 2017 !',
+ 'uploader_id': '5481942443001',
+ 'upload_date': '20171003',
+ 'timestamp': 1507064617,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'skip': 'HTTP Error 404: Not Found',
+ }, {
+ 'url': 'https://video.tva.ca/details/_5596811470001',
+ 'only_matching': True,
+ }]
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5481942443001/default_default/index.html?videoId=%s'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'url': smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {'geo_countries': ['CA']}),
+ 'ie_key': 'BrightcoveNew',
+ }
+
+
+class QubIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?qub\.ca/(?:[^/]+/)*[0-9a-z-]+-(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.qub.ca/tvaplus/tva/alerte-amber/saison-1/episode-01-1000036619',
+ 'md5': '949490fd0e7aee11d0543777611fbd53',
+ 'info_dict': {
+ 'id': '6084352463001',
+ 'ext': 'mp4',
+ 'title': 'Épisode 01',
+ 'uploader_id': '5481942443001',
+ 'upload_date': '20190907',
+ 'timestamp': 1567899756,
+ 'description': 'md5:9c0d7fbb90939420c651fd977df90145',
+ },
+ }, {
+ 'url': 'https://www.qub.ca/tele/video/lcn-ca-vous-regarde-rev-30s-ap369664-1009357943',
+ 'only_matching': True,
+ }]
+ # reference_id also works with old account_id(5481942443001)
+ # BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5813221784001/default_default/index.html?videoId=ref:%s'
+
+ def _real_extract(self, url):
+ entity_id = self._match_id(url)
+ entity = self._download_json(
+ 'https://www.qub.ca/proxy/pfu/content-delivery-service/v1/entities',
+ entity_id, query={'id': entity_id})
+ video_id = entity['videoId']
+ episode = strip_or_none(entity.get('name'))
+
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'title': episode,
+ # 'url': self.BRIGHTCOVE_URL_TEMPLATE % entity['referenceId'],
+ 'url': 'https://videos.tva.ca/details/_' + video_id,
+ 'description': entity.get('longDescription'),
+ 'duration': float_or_none(entity.get('durationMillis'), 1000),
+ 'episode': episode,
+ 'episode_number': int_or_none(entity.get('episodeNumber')),
+ # 'ie_key': 'BrightcoveNew',
+ 'ie_key': TVAIE.ie_key(),
+ }
diff --git a/yt_dlp/extractor/tvanouvelles.py b/yt_dlp/extractor/tvanouvelles.py
new file mode 100644
index 0000000..b9f5e11
--- /dev/null
+++ b/yt_dlp/extractor/tvanouvelles.py
@@ -0,0 +1,62 @@
+import re
+
+from .common import InfoExtractor
+from .brightcove import BrightcoveNewIE
+
+
+class TVANouvellesIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?tvanouvelles\.ca/videos/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://www.tvanouvelles.ca/videos/5117035533001',
+ 'info_dict': {
+ 'id': '5117035533001',
+ 'ext': 'mp4',
+ 'title': 'L’industrie du taxi dénonce l’entente entre Québec et Uber: explications',
+ 'description': 'md5:479653b7c8cf115747bf5118066bd8b3',
+ 'uploader_id': '1741764581',
+ 'timestamp': 1473352030,
+ 'upload_date': '20160908',
+ },
+ 'add_ie': ['BrightcoveNew'],
+ }
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1741764581/default_default/index.html?videoId=%s'
+
+ def _real_extract(self, url):
+ brightcove_id = self._match_id(url)
+ return self.url_result(
+ self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
+ BrightcoveNewIE.ie_key(), brightcove_id)
+
+
+class TVANouvellesArticleIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?tvanouvelles\.ca/(?:[^/]+/)+(?P<id>[^/?#&]+)'
+ _TEST = {
+ 'url': 'http://www.tvanouvelles.ca/2016/11/17/des-policiers-qui-ont-la-meche-un-peu-courte',
+ 'info_dict': {
+ 'id': 'des-policiers-qui-ont-la-meche-un-peu-courte',
+ 'title': 'Des policiers qui ont «la mèche un peu courte»?',
+ 'description': 'md5:92d363c8eb0f0f030de9a4a84a90a3a0',
+ },
+ 'playlist_mincount': 4,
+ }
+
+ @classmethod
+ def suitable(cls, url):
+ return False if TVANouvellesIE.suitable(url) else super(TVANouvellesArticleIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ entries = [
+ self.url_result(
+ 'http://www.tvanouvelles.ca/videos/%s' % mobj.group('id'),
+ ie=TVANouvellesIE.ie_key(), video_id=mobj.group('id'))
+ for mobj in re.finditer(
+ r'data-video-id=(["\'])?(?P<id>\d+)', webpage)]
+
+ title = self._og_search_title(webpage, fatal=False)
+ description = self._og_search_description(webpage)
+
+ return self.playlist_result(entries, display_id, title, description)
diff --git a/yt_dlp/extractor/tvc.py b/yt_dlp/extractor/tvc.py
new file mode 100644
index 0000000..caa76ab
--- /dev/null
+++ b/yt_dlp/extractor/tvc.py
@@ -0,0 +1,97 @@
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ int_or_none,
+)
+
+
+class TVCIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?tvc\.ru/video/iframe/id/(?P<id>\d+)'
+ _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:http:)?//(?:www\.)?tvc\.ru/video/iframe/id/[^"]+)\1']
+ _TEST = {
+ 'url': 'http://www.tvc.ru/video/iframe/id/74622/isPlay/false/id_stat/channel/?acc_video_id=/channel/brand/id/17/show/episodes/episode_id/39702',
+ 'md5': 'bbc5ff531d1e90e856f60fc4b3afd708',
+ 'info_dict': {
+ 'id': '74622',
+ 'ext': 'mp4',
+ 'title': 'События. "События". Эфир от 22.05.2015 14:30',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 1122,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video = self._download_json(
+ 'http://www.tvc.ru/video/json/id/%s' % video_id, video_id)
+
+ formats = []
+ for info in video.get('path', {}).get('quality', []):
+ video_url = info.get('url')
+ if not video_url:
+ continue
+ format_id = self._search_regex(
+ r'cdnvideo/([^/]+?)(?:-[^/]+?)?/', video_url,
+ 'format id', default=None)
+ formats.append({
+ 'url': video_url,
+ 'format_id': format_id,
+ 'width': int_or_none(info.get('width')),
+ 'height': int_or_none(info.get('height')),
+ 'tbr': int_or_none(info.get('bitrate')),
+ })
+
+ return {
+ 'id': video_id,
+ 'title': video['title'],
+ 'thumbnail': video.get('picture'),
+ 'duration': int_or_none(video.get('duration')),
+ 'formats': formats,
+ }
+
+
+class TVCArticleIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?tvc\.ru/(?!video/iframe/id/)(?P<id>[^?#]+)'
+ _TESTS = [{
+ 'url': 'http://www.tvc.ru/channel/brand/id/29/show/episodes/episode_id/39702/',
+ 'info_dict': {
+ 'id': '74622',
+ 'ext': 'mp4',
+ 'title': 'События. "События". Эфир от 22.05.2015 14:30',
+ 'description': 'md5:ad7aa7db22903f983e687b8a3e98c6dd',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 1122,
+ },
+ }, {
+ 'url': 'http://www.tvc.ru/news/show/id/69944',
+ 'info_dict': {
+ 'id': '75399',
+ 'ext': 'mp4',
+ 'title': 'Эксперты: в столице встал вопрос о максимально безопасных остановках',
+ 'description': 'md5:f2098f71e21f309e89f69b525fd9846e',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 278,
+ },
+ }, {
+ 'url': 'http://www.tvc.ru/channel/brand/id/47/show/episodes#',
+ 'info_dict': {
+ 'id': '2185',
+ 'ext': 'mp4',
+ 'title': 'Ещё не поздно. Эфир от 03.08.2013',
+ 'description': 'md5:51fae9f3f8cfe67abce014e428e5b027',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 3316,
+ },
+ }]
+
+ def _real_extract(self, url):
+ webpage = self._download_webpage(url, self._match_id(url))
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': 'TVC',
+ 'url': self._og_search_video_url(webpage),
+ 'title': clean_html(self._og_search_title(webpage)),
+ 'description': clean_html(self._og_search_description(webpage)),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ }
diff --git a/yt_dlp/extractor/tver.py b/yt_dlp/extractor/tver.py
new file mode 100644
index 0000000..5f78968
--- /dev/null
+++ b/yt_dlp/extractor/tver.py
@@ -0,0 +1,103 @@
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ join_nonempty,
+ smuggle_url,
+ str_or_none,
+ strip_or_none,
+ traverse_obj,
+)
+
+
+class TVerIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?tver\.jp/(?:(?P<type>lp|corner|series|episodes?|feature|tokyo2020/video)/)+(?P<id>[a-zA-Z0-9]+)'
+ _TESTS = [{
+ 'skip': 'videos are only available for 7 days',
+ 'url': 'https://tver.jp/episodes/ep83nf3w4p',
+ 'info_dict': {
+ 'title': '家事ヤロウ!!! 売り場席巻のチーズSP&財前直見×森泉親子の脱東京暮らし密着!',
+ 'description': 'md5:dc2c06b6acc23f1e7c730c513737719b',
+ 'series': '家事ヤロウ!!!',
+ 'episode': '売り場席巻のチーズSP&財前直見×森泉親子の脱東京暮らし密着!',
+ 'alt_title': '売り場席巻のチーズSP&財前直見×森泉親子の脱東京暮らし密着!',
+ 'channel': 'テレビ朝日',
+ },
+ 'add_ie': ['BrightcoveNew'],
+ }, {
+ 'url': 'https://tver.jp/corner/f0103888',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tver.jp/lp/f0033031',
+ 'only_matching': True,
+ }]
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
+ _PLATFORM_UID = None
+ _PLATFORM_TOKEN = None
+
+ def _real_initialize(self):
+ create_response = self._download_json(
+ 'https://platform-api.tver.jp/v2/api/platform_users/browser/create', None,
+ note='Creating session', data=b'device_type=pc', headers={
+ 'Origin': 'https://s.tver.jp',
+ 'Referer': 'https://s.tver.jp/',
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ })
+ self._PLATFORM_UID = traverse_obj(create_response, ('result', 'platform_uid'))
+ self._PLATFORM_TOKEN = traverse_obj(create_response, ('result', 'platform_token'))
+
+ def _real_extract(self, url):
+ video_id, video_type = self._match_valid_url(url).group('id', 'type')
+ if video_type not in {'series', 'episodes'}:
+ webpage = self._download_webpage(url, video_id, note='Resolving to new URL')
+ video_id = self._match_id(self._search_regex(
+ (r'canonical"\s*href="(https?://tver\.jp/[^"]+)"', r'&link=(https?://tver\.jp/[^?&]+)[?&]'),
+ webpage, 'url regex'))
+
+ episode_info = self._download_json(
+ f'https://platform-api.tver.jp/service/api/v1/callEpisode/{video_id}?require_data=mylist,later[epefy106ur],good[epefy106ur],resume[epefy106ur]',
+ video_id, fatal=False,
+ query={
+ 'platform_uid': self._PLATFORM_UID,
+ 'platform_token': self._PLATFORM_TOKEN,
+ }, headers={
+ 'x-tver-platform-type': 'web'
+ })
+ episode_content = traverse_obj(
+ episode_info, ('result', 'episode', 'content')) or {}
+
+ video_info = self._download_json(
+ f'https://statics.tver.jp/content/episode/{video_id}.json', video_id,
+ query={
+ 'v': str_or_none(episode_content.get('version')) or '5',
+ }, headers={
+ 'Origin': 'https://tver.jp',
+ 'Referer': 'https://tver.jp/',
+ })
+ p_id = video_info['video']['accountID']
+ r_id = traverse_obj(video_info, ('video', ('videoRefID', 'videoID')), get_all=False)
+ if not r_id:
+ raise ExtractorError('Failed to extract reference ID for Brightcove')
+ if not r_id.isdigit():
+ r_id = f'ref:{r_id}'
+
+ episode = strip_or_none(episode_content.get('title'))
+ series = str_or_none(episode_content.get('seriesTitle'))
+ title = (
+ join_nonempty(series, episode, delim=' ')
+ or str_or_none(video_info.get('title')))
+ provider = str_or_none(episode_content.get('productionProviderName'))
+ onair_label = str_or_none(episode_content.get('broadcastDateLabel'))
+
+ return {
+ '_type': 'url_transparent',
+ 'title': title,
+ 'series': series,
+ 'episode': episode,
+ # an another title which is considered "full title" for some viewers
+ 'alt_title': join_nonempty(title, provider, onair_label, delim=' '),
+ 'channel': provider,
+ 'description': str_or_none(video_info.get('description')),
+ 'url': smuggle_url(
+ self.BRIGHTCOVE_URL_TEMPLATE % (p_id, r_id), {'geo_countries': ['JP']}),
+ 'ie_key': 'BrightcoveNew',
+ }
diff --git a/yt_dlp/extractor/tvigle.py b/yt_dlp/extractor/tvigle.py
new file mode 100644
index 0000000..6c98219
--- /dev/null
+++ b/yt_dlp/extractor/tvigle.py
@@ -0,0 +1,133 @@
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ parse_age_limit,
+ try_get,
+ url_or_none,
+)
+
+
+class TvigleIE(InfoExtractor):
+ IE_NAME = 'tvigle'
+ IE_DESC = 'Интернет-телевидение Tvigle.ru'
+ _VALID_URL = r'https?://(?:www\.)?(?:tvigle\.ru/(?:[^/]+/)+(?P<display_id>[^/]+)/$|cloud\.tvigle\.ru/video/(?P<id>\d+))'
+ _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1']
+
+ _GEO_BYPASS = False
+ _GEO_COUNTRIES = ['RU']
+
+ _TESTS = [
+ {
+ 'url': 'http://www.tvigle.ru/video/sokrat/',
+ 'info_dict': {
+ 'id': '1848932',
+ 'display_id': 'sokrat',
+ 'ext': 'mp4',
+ 'title': 'Сократ',
+ 'description': 'md5:d6b92ffb7217b4b8ebad2e7665253c17',
+ 'duration': 6586,
+ 'age_limit': 12,
+ },
+ 'skip': 'georestricted',
+ },
+ {
+ 'url': 'http://www.tvigle.ru/video/vladimir-vysotskii/vedushchii-teleprogrammy-60-minut-ssha-o-vladimire-vysotskom/',
+ 'info_dict': {
+ 'id': '5142516',
+ 'ext': 'flv',
+ 'title': 'Ведущий телепрограммы «60 минут» (США) о Владимире Высоцком',
+ 'description': 'md5:027f7dc872948f14c96d19b4178428a4',
+ 'duration': 186.080,
+ 'age_limit': 0,
+ },
+ 'skip': 'georestricted',
+ }, {
+ 'url': 'https://cloud.tvigle.ru/video/5267604/',
+ 'only_matching': True,
+ }
+ ]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id')
+
+ if not video_id:
+ webpage = self._download_webpage(url, display_id)
+ video_id = self._html_search_regex(
+ (r'<div[^>]+class=["\']player["\'][^>]+id=["\'](\d+)',
+ r'cloudId\s*=\s*["\'](\d+)',
+ r'class="video-preview current_playing" id="(\d+)"'),
+ webpage, 'video id')
+
+ video_data = self._download_json(
+ 'http://cloud.tvigle.ru/api/play/video/%s/' % video_id, display_id)
+
+ item = video_data['playlist']['items'][0]
+
+ videos = item.get('videos')
+
+ error_message = item.get('errorMessage')
+ if not videos and error_message:
+ if item.get('isGeoBlocked') is True:
+ self.raise_geo_restricted(
+ msg=error_message, countries=self._GEO_COUNTRIES)
+ else:
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, error_message),
+ expected=True)
+
+ title = item['title']
+ description = item.get('description')
+ thumbnail = item.get('thumbnail')
+ duration = float_or_none(item.get('durationMilliseconds'), 1000)
+ age_limit = parse_age_limit(item.get('ageRestrictions'))
+
+ formats = []
+ for vcodec, url_or_fmts in item['videos'].items():
+ if vcodec == 'hls':
+ m3u8_url = url_or_none(url_or_fmts)
+ if not m3u8_url:
+ continue
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif vcodec == 'dash':
+ mpd_url = url_or_none(url_or_fmts)
+ if not mpd_url:
+ continue
+ formats.extend(self._extract_mpd_formats(
+ mpd_url, video_id, mpd_id='dash', fatal=False))
+ else:
+ if not isinstance(url_or_fmts, dict):
+ continue
+ for format_id, video_url in url_or_fmts.items():
+ if format_id == 'm3u8':
+ continue
+ video_url = url_or_none(video_url)
+ if not video_url:
+ continue
+ height = self._search_regex(
+ r'^(\d+)[pP]$', format_id, 'height', default=None)
+ filesize = int_or_none(try_get(
+ item, lambda x: x['video_files_size'][vcodec][format_id]))
+ formats.append({
+ 'url': video_url,
+ 'format_id': '%s-%s' % (vcodec, format_id),
+ 'vcodec': vcodec,
+ 'height': int_or_none(height),
+ 'filesize': filesize,
+ })
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'age_limit': age_limit,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/tviplayer.py b/yt_dlp/extractor/tviplayer.py
new file mode 100644
index 0000000..7e9b04d
--- /dev/null
+++ b/yt_dlp/extractor/tviplayer.py
@@ -0,0 +1,78 @@
+from .common import InfoExtractor
+from ..utils import traverse_obj
+
+
+class TVIPlayerIE(InfoExtractor):
+ _VALID_URL = r'https?://tviplayer\.iol\.pt(/programa/[\w-]+/[a-f0-9]+)?/\w+/(?P<id>\w+)'
+ _TESTS = [{
+ 'url': 'https://tviplayer.iol.pt/programa/jornal-das-8/53c6b3903004dc006243d0cf/video/61c8e8b90cf2c7ea0f0f71a9',
+ 'info_dict': {
+ 'id': '61c8e8b90cf2c7ea0f0f71a9',
+ 'ext': 'mp4',
+ 'duration': 4167,
+ 'title': 'Jornal das 8 - 26 de dezembro de 2021',
+ 'thumbnail': 'https://www.iol.pt/multimedia/oratvi/multimedia/imagem/id/61c8ee630cf2cc58e7d98d9f/',
+ 'season_number': 8,
+ 'season': 'Season 8',
+ }
+ }, {
+ 'url': 'https://tviplayer.iol.pt/programa/isabel/62b471090cf26256cd2a8594/video/62be445f0cf2ea4f0a5218e5',
+ 'info_dict': {
+ 'id': '62be445f0cf2ea4f0a5218e5',
+ 'ext': 'mp4',
+ 'duration': 3255,
+ 'season': 'Season 1',
+ 'title': 'Isabel - Episódio 1',
+ 'thumbnail': 'https://www.iol.pt/multimedia/oratvi/multimedia/imagem/id/62beac200cf2f9a86eab856b/',
+ 'season_number': 1,
+ }
+ }, {
+ # no /programa/
+ 'url': 'https://tviplayer.iol.pt/video/62c4131c0cf2f9a86eac06bb',
+ 'info_dict': {
+ 'id': '62c4131c0cf2f9a86eac06bb',
+ 'ext': 'mp4',
+ 'title': 'David e Mickael Carreira respondem: «Qual é o próximo a ser pai?»',
+ 'thumbnail': 'https://www.iol.pt/multimedia/oratvi/multimedia/imagem/id/62c416490cf2ea367d4433fd/',
+ 'season': 'Season 2',
+ 'duration': 148,
+ 'season_number': 2,
+ }
+ }, {
+ # episodio url
+ 'url': 'https://tviplayer.iol.pt/programa/para-sempre/61716c360cf2365a5ed894c4/episodio/t1e187',
+ 'info_dict': {
+ 'id': 't1e187',
+ 'ext': 'mp4',
+ 'season': 'Season 1',
+ 'title': 'Quem denunciou Pedro?',
+ 'thumbnail': 'https://www.iol.pt/multimedia/oratvi/multimedia/imagem/id/62eda30b0cf2ea367d48973b/',
+ 'duration': 1250,
+ 'season_number': 1,
+ }
+ }]
+
+ def _real_initialize(self):
+ self.wms_auth_sign_token = self._download_webpage(
+ 'https://services.iol.pt/matrix?userId=', 'wmsAuthSign',
+ note='Trying to get wmsAuthSign token')
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ json_data = self._search_json(
+ r'<script>\s*jsonData\s*=', webpage, 'json_data', video_id)
+
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ f'{json_data["videoUrl"]}?wmsAuthSign={self.wms_auth_sign_token}',
+ video_id, ext='mp4')
+ return {
+ 'id': video_id,
+ 'title': json_data.get('title') or self._og_search_title(webpage),
+ 'thumbnail': json_data.get('cover') or self._og_search_thumbnail(webpage),
+ 'duration': json_data.get('duration'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'season_number': traverse_obj(json_data, ('program', 'seasonNum')),
+ }
diff --git a/yt_dlp/extractor/tvland.py b/yt_dlp/extractor/tvland.py
new file mode 100644
index 0000000..481d5eb
--- /dev/null
+++ b/yt_dlp/extractor/tvland.py
@@ -0,0 +1,37 @@
+from .mtv import MTVServicesInfoExtractor
+
+# TODO: Remove - Reason not used anymore - Service moved to youtube
+
+
+class TVLandIE(MTVServicesInfoExtractor):
+ IE_NAME = 'tvland.com'
+ _VALID_URL = r'https?://(?:www\.)?tvland\.com/(?:video-clips|(?:full-)?episodes)/(?P<id>[^/?#.]+)'
+ _FEED_URL = 'http://www.tvland.com/feeds/mrss/'
+ _TESTS = [{
+ # Geo-restricted. Without a proxy metadata are still there. With a
+ # proxy it redirects to http://m.tvland.com/app/
+ 'url': 'https://www.tvland.com/episodes/s04pzf/everybody-loves-raymond-the-dog-season-1-ep-19',
+ 'info_dict': {
+ 'description': 'md5:84928e7a8ad6649371fbf5da5e1ad75a',
+ 'title': 'The Dog',
+ },
+ 'playlist_mincount': 5,
+ 'skip': '404 Not found',
+ }, {
+ 'url': 'https://www.tvland.com/video-clips/4n87f2/younger-a-first-look-at-younger-season-6',
+ 'md5': 'e2c6389401cf485df26c79c247b08713',
+ 'info_dict': {
+ 'id': '891f7d3c-5b5b-4753-b879-b7ba1a601757',
+ 'ext': 'mp4',
+ 'title': 'Younger|April 30, 2019|6|NO-EPISODE#|A First Look at Younger Season 6',
+ 'description': 'md5:595ea74578d3a888ae878dfd1c7d4ab2',
+ 'upload_date': '20190430',
+ 'timestamp': 1556658000,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.tvland.com/full-episodes/iu0hz6/younger-a-kiss-is-just-a-kiss-season-3-ep-301',
+ 'only_matching': True,
+ }]
diff --git a/yt_dlp/extractor/tvn24.py b/yt_dlp/extractor/tvn24.py
new file mode 100644
index 0000000..5276813
--- /dev/null
+++ b/yt_dlp/extractor/tvn24.py
@@ -0,0 +1,100 @@
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ NO_DEFAULT,
+ unescapeHTML,
+)
+
+
+class TVN24IE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:(?:[^/]+)\.)?tvn24(?:bis)?\.pl/(?:[^/]+/)*(?P<id>[^/]+)'
+ _TESTS = [{
+ 'url': 'http://www.tvn24.pl/wiadomosci-z-kraju,3/oredzie-artura-andrusa,702428.html',
+ 'md5': 'fbdec753d7bc29d96036808275f2130c',
+ 'info_dict': {
+ 'id': '1584444',
+ 'ext': 'mp4',
+ 'title': '"Święta mają być wesołe, dlatego, ludziska, wszyscy pod jemiołę"',
+ 'description': 'Wyjątkowe orędzie Artura Andrusa, jednego z gości Szkła kontaktowego.',
+ 'thumbnail': 're:https?://.*[.]jpeg',
+ }
+ }, {
+ # different layout
+ 'url': 'https://tvnmeteo.tvn24.pl/magazyny/maja-w-ogrodzie,13/odcinki-online,1,4,1,0/pnacza-ptaki-i-iglaki-odc-691-hgtv-odc-29,1771763.html',
+ 'info_dict': {
+ 'id': '1771763',
+ 'ext': 'mp4',
+ 'title': 'Pnącza, ptaki i iglaki (odc. 691 /HGTV odc. 29)',
+ 'thumbnail': 're:https?://.*',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://fakty.tvn24.pl/ogladaj-online,60/53-konferencja-bezpieczenstwa-w-monachium,716431.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://sport.tvn24.pl/pilka-nozna,105/ligue-1-kamil-glik-rozcial-glowe-monaco-tylko-remisuje-z-bastia,716522.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://tvn24bis.pl/poranek,146,m/gen-koziej-w-tvn24-bis-wracamy-do-czasow-zimnej-wojny,715660.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tvn24.pl/magazyn-tvn24/angie-w-jednej-czwartej-polka-od-szarej-myszki-do-cesarzowej-europy,119,2158',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ title = self._og_search_title(
+ webpage, default=None) or self._search_regex(
+ r'<h\d+[^>]+class=["\']magazineItemHeader[^>]+>(.+?)</h',
+ webpage, 'title')
+
+ def extract_json(attr, name, default=NO_DEFAULT, fatal=True):
+ return self._parse_json(
+ self._search_regex(
+ r'\b%s=(["\'])(?P<json>(?!\1).+?)\1' % attr, webpage,
+ name, group='json', default=default, fatal=fatal) or '{}',
+ display_id, transform_source=unescapeHTML, fatal=fatal)
+
+ quality_data = extract_json('data-quality', 'formats')
+
+ formats = []
+ for format_id, url in quality_data.items():
+ formats.append({
+ 'url': url,
+ 'format_id': format_id,
+ 'height': int_or_none(format_id.rstrip('p')),
+ })
+
+ description = self._og_search_description(webpage, default=None)
+ thumbnail = self._og_search_thumbnail(
+ webpage, default=None) or self._html_search_regex(
+ r'\bdata-poster=(["\'])(?P<url>(?!\1).+?)\1', webpage,
+ 'thumbnail', group='url')
+
+ video_id = None
+
+ share_params = extract_json(
+ 'data-share-params', 'share params', default=None)
+ if isinstance(share_params, dict):
+ video_id = share_params.get('id')
+
+ if not video_id:
+ video_id = self._search_regex(
+ r'data-vid-id=["\'](\d+)', webpage, 'video id',
+ default=None) or self._search_regex(
+ r',(\d+)\.html', url, 'video id', default=display_id)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/tvnoe.py b/yt_dlp/extractor/tvnoe.py
new file mode 100644
index 0000000..917c46b
--- /dev/null
+++ b/yt_dlp/extractor/tvnoe.py
@@ -0,0 +1,46 @@
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ get_element_by_class,
+ js_to_json,
+)
+
+
+class TVNoeIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:www\.)?tvnoe\.cz/video/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://www.tvnoe.cz/video/10362',
+ 'md5': 'aee983f279aab96ec45ab6e2abb3c2ca',
+ 'info_dict': {
+ 'id': '10362',
+ 'ext': 'mp4',
+ 'series': 'Noční univerzita',
+ 'title': 'prof. Tomáš Halík, Th.D. - Návrat náboženství a střet civilizací',
+ 'description': 'md5:f337bae384e1a531a52c55ebc50fff41',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ iframe_url = self._search_regex(
+ r'<iframe[^>]+src="([^"]+)"', webpage, 'iframe URL')
+
+ ifs_page = self._download_webpage(iframe_url, video_id)
+ jwplayer_data = self._find_jwplayer_data(
+ ifs_page, video_id, transform_source=js_to_json)
+ info_dict = self._parse_jwplayer_data(
+ jwplayer_data, video_id, require_title=False, base_url=iframe_url)
+
+ info_dict.update({
+ 'id': video_id,
+ 'title': clean_html(get_element_by_class(
+ 'field-name-field-podnazev', webpage)),
+ 'description': clean_html(get_element_by_class(
+ 'field-name-body', webpage)),
+ 'series': clean_html(get_element_by_class('title', webpage))
+ })
+
+ return info_dict
diff --git a/yt_dlp/extractor/tvopengr.py b/yt_dlp/extractor/tvopengr.py
new file mode 100644
index 0000000..e208e57
--- /dev/null
+++ b/yt_dlp/extractor/tvopengr.py
@@ -0,0 +1,116 @@
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ get_elements_text_and_html_by_attribute,
+ scale_thumbnails_to_max_format_width,
+)
+
+
+class TVOpenGrBaseIE(InfoExtractor):
+ def _return_canonical_url(self, url, video_id):
+ webpage = self._download_webpage(url, video_id)
+ canonical_url = self._og_search_url(webpage)
+ title = self._og_search_title(webpage)
+ return self.url_result(canonical_url, ie=TVOpenGrWatchIE.ie_key(), video_id=video_id, video_title=title)
+
+
+class TVOpenGrWatchIE(TVOpenGrBaseIE):
+ IE_NAME = 'tvopengr:watch'
+ IE_DESC = 'tvopen.gr (and ethnos.gr) videos'
+ _VALID_URL = r'https?://(?P<netloc>(?:www\.)?(?:tvopen|ethnos)\.gr)/watch/(?P<id>\d+)/(?P<slug>[^/]+)'
+ _API_ENDPOINT = 'https://www.tvopen.gr/templates/data/player'
+
+ _TESTS = [{
+ 'url': 'https://www.ethnos.gr/watch/101009/nikoskaprabelosdenexoymekanenanasthenhsemethmethmetallaxhomikron',
+ 'md5': '8728570e3a72e0f8d9475ba94859fdc1',
+ 'info_dict': {
+ 'id': '101009',
+ 'title': 'md5:51f68773dcb6c70498cd326f45fefdf0',
+ 'display_id': 'nikoskaprabelosdenexoymekanenanasthenhsemethmethmetallaxhomikron',
+ 'description': 'md5:78fff49f18fb3effe41b070e5c7685d6',
+ 'thumbnail': 'https://opentv-static.siliconweb.com/imgHandler/1920/d573ba71-ec5f-43c6-b4cb-d181f327d3a8.jpg',
+ 'ext': 'mp4',
+ 'upload_date': '20220109',
+ 'timestamp': 1641686400,
+ },
+ }, {
+ 'url': 'https://www.tvopen.gr/watch/100979/se28099agapaomenalla7cepeisodio267cmhthrargiapashskakias',
+ 'md5': '38f98a1be0c577db4ea2d1b1c0770c48',
+ 'info_dict': {
+ 'id': '100979',
+ 'title': 'md5:e021f3001e16088ee40fa79b20df305b',
+ 'display_id': 'se28099agapaomenalla7cepeisodio267cmhthrargiapashskakias',
+ 'description': 'md5:ba17db53954134eb8d625d199e2919fb',
+ 'thumbnail': 'https://opentv-static.siliconweb.com/imgHandler/1920/9bb71cf1-21da-43a9-9d65-367950fde4e3.jpg',
+ 'ext': 'mp4',
+ 'upload_date': '20220108',
+ 'timestamp': 1641600000,
+ },
+ }]
+
+ def _extract_formats_and_subs(self, response, video_id):
+ formats, subs = [], {}
+ for format_id, format_url in response.items():
+ if format_id not in ('stream', 'httpstream', 'mpegdash'):
+ continue
+ ext = determine_ext(format_url)
+ if ext == 'm3u8':
+ formats_, subs_ = self._extract_m3u8_formats_and_subtitles(
+ format_url, video_id, 'mp4', m3u8_id=format_id,
+ fatal=False)
+ elif ext == 'mpd':
+ formats_, subs_ = self._extract_mpd_formats_and_subtitles(
+ format_url, video_id, 'mp4', fatal=False)
+ else:
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_id,
+ })
+ continue
+ formats.extend(formats_)
+ self._merge_subtitles(subs_, target=subs)
+ return formats, subs
+
+ def _real_extract(self, url):
+ netloc, video_id, display_id = self._match_valid_url(url).group('netloc', 'id', 'slug')
+ if netloc.find('tvopen.gr') == -1:
+ return self._return_canonical_url(url, video_id)
+ webpage = self._download_webpage(url, video_id)
+ info = self._search_json_ld(webpage, video_id, expected_type='VideoObject')
+ info['formats'], info['subtitles'] = self._extract_formats_and_subs(
+ self._download_json(self._API_ENDPOINT, video_id, query={'cid': video_id}),
+ video_id)
+ info['thumbnails'] = scale_thumbnails_to_max_format_width(
+ info['formats'], info['thumbnails'], r'(?<=/imgHandler/)\d+')
+ description, _html = next(get_elements_text_and_html_by_attribute('class', 'description', webpage))
+ if description and _html.startswith('<span '):
+ info['description'] = description
+ info['id'] = video_id
+ info['display_id'] = display_id
+ return info
+
+
+class TVOpenGrEmbedIE(TVOpenGrBaseIE):
+ IE_NAME = 'tvopengr:embed'
+ IE_DESC = 'tvopen.gr embedded videos'
+ _VALID_URL = r'(?:https?:)?//(?:www\.|cdn\.|)(?:tvopen|ethnos).gr/embed/(?P<id>\d+)'
+ _EMBED_REGEX = [rf'''<iframe[^>]+?src=(?P<_q1>["'])(?P<url>{_VALID_URL})(?P=_q1)''']
+
+ _TESTS = [{
+ 'url': 'https://cdn.ethnos.gr/embed/100963',
+ 'md5': '2da147881f45571d81662d94d086628b',
+ 'info_dict': {
+ 'id': '100963',
+ 'display_id': 'koronoiosapotoysdieythyntestonsxoleionselftestgiaosoysdenbrhkan',
+ 'title': 'md5:2c71876fadf0cda6043da0da5fca2936',
+ 'description': 'md5:17482b4432e5ed30eccd93b05d6ea509',
+ 'thumbnail': 'https://opentv-static.siliconweb.com/imgHandler/1920/5804e07f-799a-4247-a696-33842c94ca37.jpg',
+ 'ext': 'mp4',
+ 'upload_date': '20220108',
+ 'timestamp': 1641600000,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self._return_canonical_url(url, video_id)
diff --git a/yt_dlp/extractor/tvp.py b/yt_dlp/extractor/tvp.py
new file mode 100644
index 0000000..a8d00e2
--- /dev/null
+++ b/yt_dlp/extractor/tvp.py
@@ -0,0 +1,642 @@
+import itertools
+import random
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ determine_ext,
+ dict_get,
+ ExtractorError,
+ int_or_none,
+ js_to_json,
+ str_or_none,
+ strip_or_none,
+ traverse_obj,
+ try_get,
+ url_or_none,
+)
+
+
+class TVPIE(InfoExtractor):
+ IE_NAME = 'tvp'
+ IE_DESC = 'Telewizja Polska'
+ _VALID_URL = r'https?://(?:[^/]+\.)?(?:tvp(?:parlament)?\.(?:pl|info)|tvpworld\.com|swipeto\.pl)/(?:(?!\d+/)[^/]+/)*(?P<id>\d+)(?:[/?#]|$)'
+
+ _TESTS = [{
+ # TVPlayer 2 in js wrapper
+ 'url': 'https://swipeto.pl/64095316/uliczny-foxtrot-wypozyczalnia-kaset-kto-pamieta-dvdvideo',
+ 'info_dict': {
+ 'id': '64095316',
+ 'ext': 'mp4',
+ 'title': 'Uliczny Foxtrot — Wypożyczalnia kaset. Kto pamięta DVD-Video?',
+ 'age_limit': 0,
+ 'duration': 374,
+ 'thumbnail': r're:https://.+',
+ },
+ 'expected_warnings': [
+ 'Failed to download ISM manifest: HTTP Error 404: Not Found',
+ 'Failed to download m3u8 information: HTTP Error 404: Not Found',
+ ],
+ }, {
+ # TVPlayer legacy
+ 'url': 'https://www.tvp.pl/polska-press-video-uploader/wideo/62042351',
+ 'info_dict': {
+ 'id': '62042351',
+ 'ext': 'mp4',
+ 'title': 'Wideo',
+ 'description': 'Wideo Kamera',
+ 'duration': 24,
+ 'age_limit': 0,
+ 'thumbnail': r're:https://.+',
+ },
+ }, {
+ # TVPlayer 2 in iframe
+ 'url': 'https://wiadomosci.tvp.pl/50725617/dzieci-na-sprzedaz-dla-homoseksualistow',
+ 'info_dict': {
+ 'id': '50725617',
+ 'ext': 'mp4',
+ 'title': 'Dzieci na sprzedaż dla homoseksualistów',
+ 'description': 'md5:7d318eef04e55ddd9f87a8488ac7d590',
+ 'age_limit': 12,
+ 'duration': 259,
+ 'thumbnail': r're:https://.+',
+ },
+ }, {
+ # TVPlayer 2 in client-side rendered website (regional; window.__newsData)
+ 'url': 'https://warszawa.tvp.pl/25804446/studio-yayo',
+ 'info_dict': {
+ 'id': '25804446',
+ 'ext': 'mp4',
+ 'title': 'Studio Yayo',
+ 'upload_date': '20160616',
+ 'timestamp': 1466075700,
+ 'age_limit': 0,
+ 'duration': 20,
+ 'thumbnail': r're:https://.+',
+ },
+ 'skip': 'Geo-blocked outside PL',
+ }, {
+ # TVPlayer 2 in client-side rendered website (tvp.info; window.__videoData)
+ 'url': 'https://www.tvp.info/52880236/09042021-0800',
+ 'info_dict': {
+ 'id': '52880236',
+ 'ext': 'mp4',
+ 'title': '09.04.2021, 08:00',
+ 'age_limit': 0,
+ 'thumbnail': r're:https://.+',
+ },
+ 'skip': 'Geo-blocked outside PL',
+ }, {
+ # client-side rendered (regional) program (playlist) page
+ 'url': 'https://opole.tvp.pl/9660819/rozmowa-dnia',
+ 'info_dict': {
+ 'id': '9660819',
+ 'description': 'Od poniedziałku do piątku o 18:55',
+ 'title': 'Rozmowa dnia',
+ },
+ 'playlist_mincount': 1800,
+ 'params': {
+ 'skip_download': True,
+ }
+ }, {
+ # ABC-specific video embeding
+ # moved to https://bajkowakraina.tvp.pl/wideo/50981130,teleranek,51027049,zubr,51116450
+ 'url': 'https://abc.tvp.pl/48636269/zubry-odc-124',
+ 'info_dict': {
+ 'id': '48320456',
+ 'ext': 'mp4',
+ 'title': 'Teleranek, Żubr',
+ },
+ 'skip': 'unavailable',
+ }, {
+ # yet another vue page
+ 'url': 'https://jp2.tvp.pl/46925618/filmy',
+ 'info_dict': {
+ 'id': '46925618',
+ 'title': 'Filmy',
+ },
+ 'playlist_mincount': 19,
+ }, {
+ 'url': 'http://vod.tvp.pl/seriale/obyczajowe/na-sygnale/sezon-2-27-/odc-39/17834272',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://wiadomosci.tvp.pl/25169746/24052016-1200',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://krakow.tvp.pl/25511623/25lecie-mck-wyjatkowe-miejsce-na-mapie-krakowa',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://teleexpress.tvp.pl/25522307/wierni-wzieli-udzial-w-procesjach',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://sport.tvp.pl/25522165/krychowiak-uspokaja-w-sprawie-kontuzji-dwa-tygodnie-to-maksimum',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.tvp.info/25511919/trwa-rewolucja-wladza-zdecydowala-sie-na-pogwalcenie-konstytucji',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tvp.info/49193823/teczowe-flagi-na-pomnikach-prokuratura-wszczela-postepowanie-wieszwiecej',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tvpparlament.pl/retransmisje-vod/inne/wizyta-premiera-mateusza-morawieckiego-w-firmie-berotu-sp-z-oo/48857277',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tvpworld.com/48583640/tescos-polish-business-bought-by-danish-chain-netto',
+ 'only_matching': True,
+ }]
+
+ def _parse_vue_website_data(self, webpage, page_id):
+ website_data = self._search_regex([
+ # website - regiony, tvp.info
+ # directory - jp2.tvp.pl
+ r'window\.__(?:website|directory)Data\s*=\s*({(?:.|\s)+?});',
+ ], webpage, 'website data')
+ if not website_data:
+ return None
+ return self._parse_json(website_data, page_id, transform_source=js_to_json)
+
+ def _extract_vue_video(self, video_data, page_id=None):
+ if isinstance(video_data, str):
+ video_data = self._parse_json(video_data, page_id, transform_source=js_to_json)
+ thumbnails = []
+ image = video_data.get('image')
+ if image:
+ for thumb in (image if isinstance(image, list) else [image]):
+ thmb_url = str_or_none(thumb.get('url'))
+ if thmb_url:
+ thumbnails.append({
+ 'url': thmb_url,
+ })
+ is_website = video_data.get('type') == 'website'
+ if is_website:
+ url = video_data['url']
+ else:
+ url = 'tvp:' + str_or_none(video_data.get('_id') or page_id)
+ return {
+ '_type': 'url_transparent',
+ 'id': str_or_none(video_data.get('_id') or page_id),
+ 'url': url,
+ 'ie_key': (TVPIE if is_website else TVPEmbedIE).ie_key(),
+ 'title': str_or_none(video_data.get('title')),
+ 'description': str_or_none(video_data.get('lead')),
+ 'timestamp': int_or_none(video_data.get('release_date_long')),
+ 'duration': int_or_none(video_data.get('duration')),
+ 'thumbnails': thumbnails,
+ }
+
+ def _handle_vuejs_page(self, url, webpage, page_id):
+ # vue client-side rendered sites (all regional pages + tvp.info)
+ video_data = self._search_regex([
+ r'window\.__(?:news|video)Data\s*=\s*({(?:.|\s)+?})\s*;',
+ ], webpage, 'video data', default=None)
+ if video_data:
+ return self._extract_vue_video(video_data, page_id=page_id)
+ # paged playlists
+ website_data = self._parse_vue_website_data(webpage, page_id)
+ if website_data:
+ entries = self._vuejs_entries(url, website_data, page_id)
+
+ return {
+ '_type': 'playlist',
+ 'id': page_id,
+ 'title': str_or_none(website_data.get('title')),
+ 'description': str_or_none(website_data.get('lead')),
+ 'entries': entries,
+ }
+ raise ExtractorError('Could not extract video/website data')
+
+ def _vuejs_entries(self, url, website_data, page_id):
+
+ def extract_videos(wd):
+ if wd.get('latestVideo'):
+ yield self._extract_vue_video(wd['latestVideo'])
+ for video in wd.get('videos') or []:
+ yield self._extract_vue_video(video)
+ for video in wd.get('items') or []:
+ yield self._extract_vue_video(video)
+
+ yield from extract_videos(website_data)
+
+ if website_data.get('items_total_count') > website_data.get('items_per_page'):
+ for page in itertools.count(2):
+ page_website_data = self._parse_vue_website_data(
+ self._download_webpage(url, page_id, note='Downloading page #%d' % page,
+ query={'page': page}),
+ page_id)
+ if not page_website_data.get('videos') and not page_website_data.get('items'):
+ break
+ yield from extract_videos(page_website_data)
+
+ def _real_extract(self, url):
+ page_id = self._match_id(url)
+ webpage, urlh = self._download_webpage_handle(url, page_id)
+
+ # The URL may redirect to a VOD
+ # example: https://vod.tvp.pl/48463890/wadowickie-spotkania-z-janem-pawlem-ii
+ for ie_cls in (TVPVODSeriesIE, TVPVODVideoIE):
+ if ie_cls.suitable(urlh.url):
+ return self.url_result(urlh.url, ie=ie_cls.ie_key(), video_id=page_id)
+
+ if re.search(
+ r'window\.__(?:video|news|website|directory)Data\s*=',
+ webpage):
+ return self._handle_vuejs_page(url, webpage, page_id)
+
+ # classic server-side rendered sites
+ video_id = self._search_regex([
+ r'<iframe[^>]+src="[^"]*?embed\.php\?(?:[^&]+&)*ID=(\d+)',
+ r'<iframe[^>]+src="[^"]*?object_id=(\d+)',
+ r"object_id\s*:\s*'(\d+)'",
+ r'data-video-id="(\d+)"',
+
+ # abc.tvp.pl - somehow there are more than one video IDs that seem to be the same video?
+ # the first one is referenced to as "copyid", and seems to be unused by the website
+ r'<script>\s*tvpabc\.video\.init\(\s*\d+,\s*(\d+)\s*\)\s*</script>',
+ ], webpage, 'video id', default=page_id)
+ return {
+ '_type': 'url_transparent',
+ 'url': 'tvp:' + video_id,
+ 'description': self._og_search_description(
+ webpage, default=None) or (self._html_search_meta(
+ 'description', webpage, default=None)
+ if '//s.tvp.pl/files/portal/v' in webpage else None),
+ 'thumbnail': self._og_search_thumbnail(webpage, default=None),
+ 'ie_key': 'TVPEmbed',
+ }
+
+
+class TVPStreamIE(InfoExtractor):
+ IE_NAME = 'tvp:stream'
+ _VALID_URL = r'(?:tvpstream:|https?://(?:tvpstream\.vod|stream)\.tvp\.pl/(?:\?(?:[^&]+[&;])*channel_id=)?)(?P<id>\d*)'
+ _TESTS = [{
+ 'url': 'https://stream.tvp.pl/?channel_id=56969941',
+ 'only_matching': True,
+ }, {
+ # untestable as "video" id changes many times across a day
+ 'url': 'https://tvpstream.vod.tvp.pl/?channel_id=1455',
+ 'only_matching': True,
+ }, {
+ 'url': 'tvpstream:39821455',
+ 'only_matching': True,
+ }, {
+ # the default stream when you provide no channel_id, most probably TVP Info
+ 'url': 'tvpstream:',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tvpstream.vod.tvp.pl/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+ channel_url = self._proto_relative_url('//stream.tvp.pl/?channel_id=%s' % channel_id or 'default')
+ webpage = self._download_webpage(channel_url, channel_id or 'default', 'Downloading channel webpage')
+ channels = self._search_json(
+ r'window\.__channels\s*=', webpage, 'channel list', channel_id,
+ contains_pattern=r'\[\s*{(?s:.+)}\s*]')
+ channel = traverse_obj(channels, (lambda _, v: channel_id == str(v['id'])), get_all=False) if channel_id else channels[0]
+ audition = traverse_obj(channel, ('items', lambda _, v: v['is_live'] is True), get_all=False)
+ return {
+ '_type': 'url_transparent',
+ 'id': channel_id or channel['id'],
+ 'url': 'tvp:%s' % audition['video_id'],
+ 'title': audition.get('title'),
+ 'alt_title': channel.get('title'),
+ 'is_live': True,
+ 'ie_key': 'TVPEmbed',
+ }
+
+
+class TVPEmbedIE(InfoExtractor):
+ IE_NAME = 'tvp:embed'
+ IE_DESC = 'Telewizja Polska'
+ _GEO_BYPASS = False
+ _VALID_URL = r'''(?x)
+ (?:
+ tvp:
+ |https?://
+ (?:[^/]+\.)?
+ (?:tvp(?:parlament)?\.pl|tvp\.info|tvpworld\.com|swipeto\.pl)/
+ (?:sess/
+ (?:tvplayer\.php\?.*?object_id
+ |TVPlayer2/(?:embed|api)\.php\?.*[Ii][Dd])
+ |shared/details\.php\?.*?object_id)
+ =)
+ (?P<id>\d+)
+ '''
+ _EMBED_REGEX = [rf'(?x)<iframe[^>]+?src=(["\'])(?P<url>{_VALID_URL[4:]})']
+
+ _TESTS = [{
+ 'url': 'tvp:194536',
+ 'info_dict': {
+ 'id': '194536',
+ 'ext': 'mp4',
+ 'title': 'Czas honoru, odc. 13 – Władek',
+ 'description': 'md5:76649d2014f65c99477be17f23a4dead',
+ 'age_limit': 12,
+ 'duration': 2652,
+ 'series': 'Czas honoru',
+ 'episode': 'Episode 13',
+ 'episode_number': 13,
+ 'season': 'sezon 1',
+ 'thumbnail': r're:https://.+',
+ },
+ }, {
+ 'url': 'https://www.tvp.pl/sess/tvplayer.php?object_id=51247504&amp;autoplay=false',
+ 'info_dict': {
+ 'id': '51247504',
+ 'ext': 'mp4',
+ 'title': 'Razmova 091220',
+ 'duration': 876,
+ 'age_limit': 0,
+ 'thumbnail': r're:https://.+',
+ },
+ }, {
+ # TVPlayer2 embed URL
+ 'url': 'https://tvp.info/sess/TVPlayer2/embed.php?ID=50595757',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://wiadomosci.tvp.pl/sess/TVPlayer2/api.php?id=51233452',
+ 'only_matching': True,
+ }, {
+ # pulsembed on dziennik.pl
+ 'url': 'https://www.tvp.pl/shared/details.php?copy_id=52205981&object_id=52204505&autoplay=false&is_muted=false&allowfullscreen=true&template=external-embed/video/iframe-video.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ # it could be anything that is a valid JS function name
+ callback = random.choice((
+ 'jebac_pis',
+ 'jebacpis',
+ 'ziobro',
+ 'sasin70',
+ 'sasin_przejebal_70_milionow_PLN',
+ 'tvp_is_a_state_propaganda_service',
+ ))
+
+ webpage = self._download_webpage(
+ ('https://www.tvp.pl/sess/TVPlayer2/api.php?id=%s'
+ + '&@method=getTvpConfig&@callback=%s') % (video_id, callback), video_id)
+
+ # stripping JSONP padding
+ datastr = webpage[15 + len(callback):-3]
+ if datastr.startswith('null,'):
+ error = self._parse_json(datastr[5:], video_id, fatal=False)
+ error_desc = traverse_obj(error, (0, 'desc'))
+
+ if error_desc == 'Obiekt wymaga płatności':
+ raise ExtractorError('Video requires payment and log-in, but log-in is not implemented')
+
+ raise ExtractorError(error_desc or 'unexpected JSON error')
+
+ content = self._parse_json(datastr, video_id)['content']
+ info = content['info']
+ is_live = try_get(info, lambda x: x['isLive'], bool)
+
+ if info.get('isGeoBlocked'):
+ # actual country list is not provided, we just assume it's always available in PL
+ self.raise_geo_restricted(countries=['PL'])
+
+ formats = []
+ for file in content['files']:
+ video_url = url_or_none(file.get('url'))
+ if not video_url:
+ continue
+ ext = determine_ext(video_url, None)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(video_url, video_id, m3u8_id='hls', fatal=False, live=is_live))
+ elif ext == 'mpd':
+ if is_live:
+ # doesn't work with either ffmpeg or native downloader
+ continue
+ formats.extend(self._extract_mpd_formats(video_url, video_id, mpd_id='dash', fatal=False))
+ elif ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(video_url, video_id, f4m_id='hds', fatal=False))
+ elif video_url.endswith('.ism/manifest'):
+ formats.extend(self._extract_ism_formats(video_url, video_id, ism_id='mss', fatal=False))
+ else:
+ formats.append({
+ 'format_id': 'direct',
+ 'url': video_url,
+ 'ext': ext or file.get('type'),
+ 'fps': int_or_none(traverse_obj(file, ('quality', 'fps'))),
+ 'tbr': int_or_none(traverse_obj(file, ('quality', 'bitrate')), scale=1000),
+ 'width': int_or_none(traverse_obj(file, ('quality', 'width'))),
+ 'height': int_or_none(traverse_obj(file, ('quality', 'height'))),
+ })
+
+ title = dict_get(info, ('subtitle', 'title', 'seoTitle'))
+ description = dict_get(info, ('description', 'seoDescription'))
+ thumbnails = []
+ for thumb in content.get('posters') or ():
+ thumb_url = thumb.get('src')
+ if not thumb_url or '{width}' in thumb_url or '{height}' in thumb_url:
+ continue
+ thumbnails.append({
+ 'url': thumb.get('src'),
+ 'width': thumb.get('width'),
+ 'height': thumb.get('height'),
+ })
+ age_limit = try_get(info, lambda x: x['ageGroup']['minAge'], int)
+ if age_limit == 1:
+ age_limit = 0
+ duration = try_get(info, lambda x: x['duration'], int) if not is_live else None
+
+ subtitles = {}
+ for sub in content.get('subtitles') or []:
+ if not sub.get('url'):
+ continue
+ subtitles.setdefault(sub['lang'], []).append({
+ 'url': sub['url'],
+ 'ext': sub.get('type'),
+ })
+
+ info_dict = {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnails': thumbnails,
+ 'age_limit': age_limit,
+ 'is_live': is_live,
+ 'duration': duration,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+ # vod.tvp.pl
+ if info.get('vortalName') == 'vod':
+ info_dict.update({
+ 'title': '%s, %s' % (info.get('title'), info.get('subtitle')),
+ 'series': info.get('title'),
+ 'season': info.get('season'),
+ 'episode_number': info.get('episode'),
+ })
+
+ return info_dict
+
+
+class TVPVODBaseIE(InfoExtractor):
+ _API_BASE_URL = 'https://vod.tvp.pl/api/products'
+
+ def _call_api(self, resource, video_id, query={}, **kwargs):
+ is_valid = lambda x: 200 <= x < 300
+ document, urlh = self._download_json_handle(
+ f'{self._API_BASE_URL}/{resource}', video_id,
+ query={'lang': 'pl', 'platform': 'BROWSER', **query},
+ expected_status=lambda x: is_valid(x) or 400 <= x < 500, **kwargs)
+ if is_valid(urlh.status):
+ return document
+ raise ExtractorError(f'Woronicza said: {document.get("code")} (HTTP {urlh.status})')
+
+ def _parse_video(self, video, with_url=True):
+ info_dict = traverse_obj(video, {
+ 'id': ('id', {str_or_none}),
+ 'title': 'title',
+ 'age_limit': ('rating', {int_or_none}),
+ 'duration': ('duration', {int_or_none}),
+ 'episode_number': ('number', {int_or_none}),
+ 'series': ('season', 'serial', 'title', {str_or_none}),
+ 'thumbnails': ('images', ..., ..., {'url': ('url', {url_or_none})}),
+ })
+ info_dict['description'] = clean_html(dict_get(video, ('lead', 'description')))
+ if with_url:
+ info_dict.update({
+ '_type': 'url',
+ 'url': video['webUrl'],
+ 'ie_key': TVPVODVideoIE.ie_key(),
+ })
+ return info_dict
+
+
+class TVPVODVideoIE(TVPVODBaseIE):
+ IE_NAME = 'tvp:vod'
+ _VALID_URL = r'https?://vod\.tvp\.pl/(?P<category>[a-z\d-]+,\d+)/[a-z\d-]+(?<!-odcinki)(?:-odcinki,\d+/odcinek-\d+,S\d+E\d+)?,(?P<id>\d+)/?(?:[?#]|$)'
+
+ _TESTS = [{
+ 'url': 'https://vod.tvp.pl/dla-dzieci,24/laboratorium-alchemika-odcinki,309338/odcinek-24,S01E24,311357',
+ 'info_dict': {
+ 'id': '311357',
+ 'ext': 'mp4',
+ 'title': 'Tusze termiczne. Jak zobaczyć niewidoczne. Odcinek 24',
+ 'description': 'md5:1d4098d3e537092ccbac1abf49b7cd4c',
+ 'duration': 300,
+ 'episode_number': 24,
+ 'episode': 'Episode 24',
+ 'age_limit': 0,
+ 'series': 'Laboratorium alchemika',
+ 'thumbnail': 're:https?://.+',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://vod.tvp.pl/filmy-dokumentalne,163/ukrainski-sluga-narodu,339667',
+ 'info_dict': {
+ 'id': '339667',
+ 'ext': 'mp4',
+ 'title': 'Ukraiński sługa narodu',
+ 'description': 'md5:b7940c0a8e439b0c81653a986f544ef3',
+ 'age_limit': 12,
+ 'duration': 3051,
+ 'thumbnail': 're:https?://.+',
+ 'subtitles': 'count:2',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'note': 'embed fails with "payment required"',
+ 'url': 'https://vod.tvp.pl/seriale,18/polowanie-na-cmy-odcinki,390116/odcinek-7,S01E07,398869',
+ 'info_dict': {
+ 'id': '398869',
+ 'ext': 'mp4',
+ 'title': 'odc. 7',
+ 'description': 'md5:dd2bb33f023dc5c2fbaddfbe4cb5dba0',
+ 'duration': 2750,
+ 'age_limit': 16,
+ 'series': 'Polowanie na ćmy',
+ 'episode_number': 7,
+ 'episode': 'Episode 7',
+ 'thumbnail': 're:https?://.+',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://vod.tvp.pl/live,1/tvp-world,399731',
+ 'info_dict': {
+ 'id': '399731',
+ 'ext': 'mp4',
+ 'title': r're:TVP WORLD \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
+ 'live_status': 'is_live',
+ 'thumbnail': 're:https?://.+',
+ },
+ }]
+
+ def _real_extract(self, url):
+ category, video_id = self._match_valid_url(url).group('category', 'id')
+
+ is_live = category == 'live,1'
+ entity = 'lives' if is_live else 'vods'
+ info_dict = self._parse_video(self._call_api(f'{entity}/{video_id}', video_id), with_url=False)
+
+ playlist = self._call_api(f'{video_id}/videos/playlist', video_id, query={'videoType': 'MOVIE'})
+
+ info_dict['formats'] = []
+ for manifest_url in traverse_obj(playlist, ('sources', 'HLS', ..., 'src')):
+ info_dict['formats'].extend(self._extract_m3u8_formats(manifest_url, video_id, fatal=False))
+ for manifest_url in traverse_obj(playlist, ('sources', 'DASH', ..., 'src')):
+ info_dict['formats'].extend(self._extract_mpd_formats(manifest_url, video_id, fatal=False))
+
+ info_dict['subtitles'] = {}
+ for sub in playlist.get('subtitles') or []:
+ info_dict['subtitles'].setdefault(sub.get('language') or 'und', []).append({
+ 'url': sub['url'],
+ 'ext': 'ttml',
+ })
+
+ info_dict['is_live'] = is_live
+
+ return info_dict
+
+
+class TVPVODSeriesIE(TVPVODBaseIE):
+ IE_NAME = 'tvp:vod:series'
+ _VALID_URL = r'https?://vod\.tvp\.pl/[a-z\d-]+,\d+/[a-z\d-]+-odcinki,(?P<id>\d+)(?:\?[^#]+)?(?:#.+)?$'
+
+ _TESTS = [{
+ 'url': 'https://vod.tvp.pl/seriale,18/ranczo-odcinki,316445',
+ 'info_dict': {
+ 'id': '316445',
+ 'title': 'Ranczo',
+ 'age_limit': 12,
+ 'categories': ['seriale'],
+ },
+ 'playlist_count': 130,
+ }, {
+ 'url': 'https://vod.tvp.pl/programy,88/rolnik-szuka-zony-odcinki,284514',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://vod.tvp.pl/dla-dzieci,24/laboratorium-alchemika-odcinki,309338',
+ 'only_matching': True,
+ }]
+
+ def _entries(self, seasons, playlist_id):
+ for season in seasons:
+ episodes = self._call_api(
+ f'vods/serials/{playlist_id}/seasons/{season["id"]}/episodes', playlist_id,
+ note=f'Downloading episode list for {season["title"]}')
+ yield from map(self._parse_video, episodes)
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ metadata = self._call_api(
+ f'vods/serials/{playlist_id}', playlist_id,
+ note='Downloading serial metadata')
+ seasons = self._call_api(
+ f'vods/serials/{playlist_id}/seasons', playlist_id,
+ note='Downloading season list')
+ return self.playlist_result(
+ self._entries(seasons, playlist_id), playlist_id, strip_or_none(metadata.get('title')),
+ clean_html(traverse_obj(metadata, ('description', 'lead'), expected_type=strip_or_none)),
+ categories=[traverse_obj(metadata, ('mainCategory', 'name'))],
+ age_limit=int_or_none(metadata.get('rating')),
+ )
diff --git a/yt_dlp/extractor/tvplay.py b/yt_dlp/extractor/tvplay.py
new file mode 100644
index 0000000..48a6efe
--- /dev/null
+++ b/yt_dlp/extractor/tvplay.py
@@ -0,0 +1,306 @@
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ parse_iso8601,
+ qualities,
+ traverse_obj,
+ try_get,
+ update_url_query,
+ url_or_none,
+ urljoin,
+)
+
+
+class TVPlayIE(InfoExtractor):
+ IE_NAME = 'mtg'
+ IE_DESC = 'MTG services'
+ _VALID_URL = r'''(?x)
+ (?:
+ mtg:|
+ https?://
+ (?:www\.)?
+ (?:
+ tvplay(?:\.skaties)?\.lv(?:/parraides)?|
+ (?:tv3play|play\.tv3)\.lt(?:/programos)?|
+ tv3play(?:\.tv3)?\.ee/sisu
+ )
+ /(?:[^/]+/)+
+ )
+ (?P<id>\d+)
+ '''
+ _TESTS = [
+ {
+ 'url': 'http://www.tvplay.lv/parraides/vinas-melo-labak/418113?autostart=true',
+ 'md5': 'a1612fe0849455423ad8718fe049be21',
+ 'info_dict': {
+ 'id': '418113',
+ 'ext': 'mp4',
+ 'title': 'Kādi ir īri? - Viņas melo labāk',
+ 'description': 'Baiba apsmej īrus, kādi tie ir un ko viņi dara.',
+ 'series': 'Viņas melo labāk',
+ 'season': '2.sezona',
+ 'season_number': 2,
+ 'duration': 25,
+ 'timestamp': 1406097056,
+ 'upload_date': '20140723',
+ },
+ },
+ {
+ 'url': 'http://play.tv3.lt/programos/moterys-meluoja-geriau/409229?autostart=true',
+ 'info_dict': {
+ 'id': '409229',
+ 'ext': 'flv',
+ 'title': 'Moterys meluoja geriau',
+ 'description': 'md5:9aec0fc68e2cbc992d2a140bd41fa89e',
+ 'series': 'Moterys meluoja geriau',
+ 'episode_number': 47,
+ 'season': '1 sezonas',
+ 'season_number': 1,
+ 'duration': 1330,
+ 'timestamp': 1403769181,
+ 'upload_date': '20140626',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://www.tv3play.ee/sisu/kodu-keset-linna/238551?autostart=true',
+ 'info_dict': {
+ 'id': '238551',
+ 'ext': 'flv',
+ 'title': 'Kodu keset linna 398537',
+ 'description': 'md5:7df175e3c94db9e47c0d81ffa5d68701',
+ 'duration': 1257,
+ 'timestamp': 1292449761,
+ 'upload_date': '20101215',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://tvplay.skaties.lv/parraides/vinas-melo-labak/418113?autostart=true',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://tvplay.skaties.lv/vinas-melo-labak/418113/?autostart=true',
+ 'only_matching': True,
+ },
+ {
+ # views is null
+ 'url': 'http://tvplay.skaties.lv/parraides/tv3-zinas/760183',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://tv3play.tv3.ee/sisu/kodu-keset-linna/238551?autostart=true',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'mtg:418113',
+ 'only_matching': True,
+ }
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ geo_country = self._search_regex(
+ r'https?://[^/]+\.([a-z]{2})', url,
+ 'geo country', default=None)
+ if geo_country:
+ self._initialize_geo_bypass({'countries': [geo_country.upper()]})
+ video = self._download_json(
+ 'http://playapi.mtgx.tv/v3/videos/%s' % video_id, video_id, 'Downloading video JSON')
+
+ title = video['title']
+
+ try:
+ streams = self._download_json(
+ 'http://playapi.mtgx.tv/v3/videos/stream/%s' % video_id,
+ video_id, 'Downloading streams JSON')
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 403:
+ msg = self._parse_json(e.cause.response.read().decode('utf-8'), video_id)
+ raise ExtractorError(msg['msg'], expected=True)
+ raise
+
+ quality = qualities(['hls', 'medium', 'high'])
+ formats = []
+ for format_id, video_url in streams.get('streams', {}).items():
+ video_url = url_or_none(video_url)
+ if not video_url:
+ continue
+ ext = determine_ext(video_url)
+ if ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ update_url_query(video_url, {
+ 'hdcore': '3.5.0',
+ 'plugin': 'aasp-3.5.0.151.81'
+ }), video_id, f4m_id='hds', fatal=False))
+ elif ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ fmt = {
+ 'format_id': format_id,
+ 'quality': quality(format_id),
+ 'ext': ext,
+ }
+ if video_url.startswith('rtmp'):
+ m = re.search(
+ r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', video_url)
+ if not m:
+ continue
+ fmt.update({
+ 'ext': 'flv',
+ 'url': m.group('url'),
+ 'app': m.group('app'),
+ 'play_path': m.group('playpath'),
+ 'preference': -1,
+ })
+ else:
+ fmt.update({
+ 'url': video_url,
+ })
+ formats.append(fmt)
+
+ if not formats and video.get('is_geo_blocked'):
+ self.raise_geo_restricted(
+ 'This content might not be available in your country due to copyright reasons',
+ metadata_available=True)
+
+ # TODO: webvtt in m3u8
+ subtitles = {}
+ sami_path = video.get('sami_path')
+ if sami_path:
+ lang = self._search_regex(
+ r'_([a-z]{2})\.xml', sami_path, 'lang',
+ default=compat_urlparse.urlparse(url).netloc.rsplit('.', 1)[-1])
+ subtitles[lang] = [{
+ 'url': sami_path,
+ }]
+
+ series = video.get('format_title')
+ episode_number = int_or_none(video.get('format_position', {}).get('episode'))
+ season = video.get('_embedded', {}).get('season', {}).get('title')
+ season_number = int_or_none(video.get('format_position', {}).get('season'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video.get('description'),
+ 'series': series,
+ 'episode_number': episode_number,
+ 'season': season,
+ 'season_number': season_number,
+ 'duration': int_or_none(video.get('duration')),
+ 'timestamp': parse_iso8601(video.get('created_at')),
+ 'view_count': try_get(video, lambda x: x['views']['total'], int),
+ 'age_limit': int_or_none(video.get('age_limit', 0)),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+
+class TVPlayHomeIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:tv3?)?
+ play\.(?:tv3|skaties)\.(?P<country>lv|lt|ee)/
+ (?P<live>lives/)?
+ [^?#&]+(?:episode|programme|clip)-(?P<id>\d+)
+ '''
+ _TESTS = [{
+ 'url': 'https://play.tv3.lt/series/gauju-karai-karveliai,serial-2343791/serija-8,episode-2343828',
+ 'info_dict': {
+ 'id': '2343828',
+ 'ext': 'mp4',
+ 'title': 'Gaujų karai. Karveliai (2021) | S01E08: Serija 8',
+ 'description': 'md5:f6fcfbb236429f05531131640dfa7c81',
+ 'duration': 2710,
+ 'season': 'Gaujų karai. Karveliai',
+ 'season_number': 1,
+ 'release_year': 2021,
+ 'episode': 'Serija 8',
+ 'episode_number': 8,
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ }, {
+ 'url': 'https://play.tv3.lt/series/moterys-meluoja-geriau-n-7,serial-2574652/serija-25,episode-3284937',
+ 'info_dict': {
+ 'id': '3284937',
+ 'ext': 'mp4',
+ 'season': 'Moterys meluoja geriau [N-7]',
+ 'season_number': 14,
+ 'release_year': 2021,
+ 'episode': 'Serija 25',
+ 'episode_number': 25,
+ 'title': 'Moterys meluoja geriau [N-7] (2021) | S14|E25: Serija 25',
+ 'description': 'md5:c6926e9710f1a126f028fbe121eddb79',
+ 'duration': 2440,
+ },
+ 'skip': '404'
+ }, {
+ 'url': 'https://play.tv3.lt/lives/tv6-lt,live-2838694/optibet-a-lygos-rungtynes-marijampoles-suduva--vilniaus-riteriai,programme-3422014',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tv3play.skaties.lv/series/women-lie-better-lv,serial-1024464/women-lie-better-lv,episode-1038762',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://play.tv3.ee/series/_,serial-2654462/_,episode-2654474',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tv3play.skaties.lv/clips/tv3-zinas-valsti-lidz-15novembrim-bus-majsede,clip-3464509',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ country, is_live, video_id = self._match_valid_url(url).groups()
+
+ api_path = 'lives/programmes' if is_live else 'vods'
+ data = self._download_json(
+ urljoin(url, f'/api/products/{api_path}/{video_id}?platform=BROWSER&lang={country.upper()}'),
+ video_id)
+
+ video_type = 'CATCHUP' if is_live else 'MOVIE'
+ stream_id = data['programRecordingId'] if is_live else video_id
+ stream = self._download_json(
+ urljoin(url, f'/api/products/{stream_id}/videos/playlist?videoType={video_type}&platform=BROWSER'), video_id)
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ stream['sources']['HLS'][0]['src'], video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
+
+ thumbnails = set(traverse_obj(
+ data, (('galary', 'images', 'artworks'), ..., ..., ('miniUrl', 'mainUrl')), expected_type=url_or_none))
+
+ return {
+ 'id': video_id,
+ 'title': self._resolve_title(data),
+ 'description': traverse_obj(data, 'description', 'lead'),
+ 'duration': int_or_none(data.get('duration')),
+ 'season': traverse_obj(data, ('season', 'serial', 'title')),
+ 'season_number': int_or_none(traverse_obj(data, ('season', 'number'))),
+ 'episode': data.get('title'),
+ 'episode_number': int_or_none(data.get('episode')),
+ 'release_year': int_or_none(traverse_obj(data, ('season', 'serial', 'year'))),
+ 'thumbnails': [{'url': url, 'ext': 'jpg'} for url in thumbnails],
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+ @staticmethod
+ def _resolve_title(data):
+ return try_get(data, lambda x: (
+ f'{data["season"]["serial"]["title"]} ({data["season"]["serial"]["year"]}) | '
+ f'S{data["season"]["number"]:02d}E{data["episode"]:02d}: {data["title"]}'
+ )) or data.get('title')
diff --git a/yt_dlp/extractor/tvplayer.py b/yt_dlp/extractor/tvplayer.py
new file mode 100644
index 0000000..228c236
--- /dev/null
+++ b/yt_dlp/extractor/tvplayer.py
@@ -0,0 +1,80 @@
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ extract_attributes,
+ try_get,
+ urlencode_postdata,
+ ExtractorError,
+)
+
+
+class TVPlayerIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?tvplayer\.com/watch/(?P<id>[^/?#]+)'
+ _TEST = {
+ 'url': 'http://tvplayer.com/watch/bbcone',
+ 'info_dict': {
+ 'id': '89',
+ 'ext': 'mp4',
+ 'title': r're:^BBC One [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ current_channel = extract_attributes(self._search_regex(
+ r'(<div[^>]+class="[^"]*current-channel[^"]*"[^>]*>)',
+ webpage, 'channel element'))
+ title = current_channel['data-name']
+
+ resource_id = current_channel['data-id']
+
+ token = self._search_regex(
+ r'data-token=(["\'])(?P<token>(?!\1).+)\1', webpage,
+ 'token', group='token')
+
+ context = self._download_json(
+ 'https://tvplayer.com/watch/context', display_id,
+ 'Downloading JSON context', query={
+ 'resource': resource_id,
+ 'gen': token,
+ })
+
+ validate = context['validate']
+ platform = try_get(
+ context, lambda x: x['platform']['key'], compat_str) or 'firefox'
+
+ try:
+ response = self._download_json(
+ 'http://api.tvplayer.com/api/v2/stream/live',
+ display_id, 'Downloading JSON stream', headers={
+ 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
+ }, data=urlencode_postdata({
+ 'id': resource_id,
+ 'service': 1,
+ 'platform': platform,
+ 'validate': validate,
+ }))['tvplayer']['response']
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError):
+ response = self._parse_json(
+ e.cause.response.read().decode(), resource_id)['tvplayer']['response']
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, response['error']), expected=True)
+ raise
+
+ formats = self._extract_m3u8_formats(response['stream'], display_id, 'mp4')
+
+ return {
+ 'id': resource_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'formats': formats,
+ 'is_live': True,
+ }
diff --git a/yt_dlp/extractor/tweakers.py b/yt_dlp/extractor/tweakers.py
new file mode 100644
index 0000000..e8e1fc6
--- /dev/null
+++ b/yt_dlp/extractor/tweakers.py
@@ -0,0 +1,59 @@
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ determine_ext,
+ mimetype2ext,
+)
+
+
+class TweakersIE(InfoExtractor):
+ _VALID_URL = r'https?://tweakers\.net/video/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://tweakers.net/video/9926/new-nintendo-3ds-xl-op-alle-fronten-beter.html',
+ 'md5': 'fe73e417c093a788e0160c4025f88b15',
+ 'info_dict': {
+ 'id': '9926',
+ 'ext': 'mp4',
+ 'title': 'New Nintendo 3DS XL - Op alle fronten beter',
+ 'description': 'md5:3789b21fed9c0219e9bcaacd43fab280',
+ 'thumbnail': r're:^https?://.*\.jpe?g$',
+ 'duration': 386,
+ 'uploader_id': 's7JeEm',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video_data = self._download_json(
+ 'https://tweakers.net/video/s1playlist/%s/1920/1080/playlist.json' % video_id,
+ video_id)['items'][0]
+
+ title = video_data['title']
+
+ formats = []
+ for location in video_data.get('locations', {}).get('progressive', []):
+ format_id = location.get('label')
+ width = int_or_none(location.get('width'))
+ height = int_or_none(location.get('height'))
+ for source in location.get('sources', []):
+ source_url = source.get('src')
+ if not source_url:
+ continue
+ ext = mimetype2ext(source.get('type')) or determine_ext(source_url)
+ formats.append({
+ 'format_id': format_id,
+ 'url': source_url,
+ 'width': width,
+ 'height': height,
+ 'ext': ext,
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video_data.get('description'),
+ 'thumbnail': video_data.get('poster'),
+ 'duration': int_or_none(video_data.get('duration')),
+ 'uploader_id': video_data.get('account'),
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/twentymin.py b/yt_dlp/extractor/twentymin.py
new file mode 100644
index 0000000..74f90b0
--- /dev/null
+++ b/yt_dlp/extractor/twentymin.py
@@ -0,0 +1,80 @@
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ try_get,
+)
+
+
+class TwentyMinutenIE(InfoExtractor):
+ IE_NAME = '20min'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?20min\.ch/
+ (?:
+ videotv/*\?.*?\bvid=|
+ videoplayer/videoplayer\.html\?.*?\bvideoId@
+ )
+ (?P<id>\d+)
+ '''
+ _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:(?:https?:)?//)?(?:www\.)?20min\.ch/videoplayer/videoplayer.html\?.*?\bvideoId@\d+.*?)\1']
+ _TESTS = [{
+ 'url': 'http://www.20min.ch/videotv/?vid=469148&cid=2',
+ 'md5': 'e7264320db31eed8c38364150c12496e',
+ 'info_dict': {
+ 'id': '469148',
+ 'ext': 'mp4',
+ 'title': '85 000 Franken für 15 perfekte Minuten',
+ 'thumbnail': r're:https?://.*\.jpg$',
+ },
+ }, {
+ 'url': 'http://www.20min.ch/videoplayer/videoplayer.html?params=client@twentyDE|videoId@523629',
+ 'info_dict': {
+ 'id': '523629',
+ 'ext': 'mp4',
+ 'title': 'So kommen Sie bei Eis und Schnee sicher an',
+ 'description': 'md5:117c212f64b25e3d95747e5276863f7d',
+ 'thumbnail': r're:https?://.*\.jpg$',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.20min.ch/videotv/?cid=44&vid=468738',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video = self._download_json(
+ 'http://api.20min.ch/video/%s/show' % video_id,
+ video_id)['content']
+
+ title = video['title']
+
+ formats = [{
+ 'format_id': format_id,
+ 'url': 'http://podcast.20min-tv.ch/podcast/20min/%s%s.mp4' % (video_id, p),
+ 'quality': quality,
+ } for quality, (format_id, p) in enumerate([('sd', ''), ('hd', 'h')])]
+
+ description = video.get('lead')
+ thumbnail = video.get('thumbnail')
+
+ def extract_count(kind):
+ return try_get(
+ video,
+ lambda x: int_or_none(x['communityobject']['thumbs_%s' % kind]))
+
+ like_count = extract_count('up')
+ dislike_count = extract_count('down')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'like_count': like_count,
+ 'dislike_count': dislike_count,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/twentythreevideo.py b/yt_dlp/extractor/twentythreevideo.py
new file mode 100644
index 0000000..290c376
--- /dev/null
+++ b/yt_dlp/extractor/twentythreevideo.py
@@ -0,0 +1,76 @@
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class TwentyThreeVideoIE(InfoExtractor):
+ IE_NAME = '23video'
+ _VALID_URL = r'https?://(?P<domain>[^.]+\.(?:twentythree\.net|23video\.com|filmweb\.no))/v\.ihtml/player\.html\?(?P<query>.*?\bphoto(?:_|%5f)id=(?P<id>\d+).*)'
+ _TESTS = [{
+ 'url': 'https://video.twentythree.net/v.ihtml/player.html?showDescriptions=0&source=site&photo%5fid=20448876&autoPlay=1',
+ 'md5': '75fcf216303eb1dae9920d651f85ced4',
+ 'info_dict': {
+ 'id': '20448876',
+ 'ext': 'mp4',
+ 'title': 'Video Marketing Minute: Personalized Video',
+ 'timestamp': 1513855354,
+ 'upload_date': '20171221',
+ 'uploader_id': '12258964',
+ 'uploader': 'Rasmus Bysted',
+ }
+ }, {
+ 'url': 'https://bonnier-publications-danmark.23video.com/v.ihtml/player.html?token=f0dc46476e06e13afd5a1f84a29e31e8&source=embed&photo%5fid=36137620',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ domain, query, photo_id = self._match_valid_url(url).groups()
+ base_url = 'https://%s' % domain
+ photo_data = self._download_json(
+ base_url + '/api/photo/list?' + query, photo_id, query={
+ 'format': 'json',
+ }, transform_source=lambda s: self._search_regex(r'(?s)({.+})', s, 'photo data'))['photo']
+ title = photo_data['title']
+
+ formats = []
+
+ audio_path = photo_data.get('audio_download')
+ if audio_path:
+ formats.append({
+ 'format_id': 'audio',
+ 'url': base_url + audio_path,
+ 'filesize': int_or_none(photo_data.get('audio_size')),
+ 'vcodec': 'none',
+ })
+
+ def add_common_info_to_list(l, template, id_field, id_value):
+ f_base = template % id_value
+ f_path = photo_data.get(f_base + 'download')
+ if not f_path:
+ return
+ l.append({
+ id_field: id_value,
+ 'url': base_url + f_path,
+ 'width': int_or_none(photo_data.get(f_base + 'width')),
+ 'height': int_or_none(photo_data.get(f_base + 'height')),
+ 'filesize': int_or_none(photo_data.get(f_base + 'size')),
+ })
+
+ for f in ('mobile_high', 'medium', 'hd', '1080p', '4k'):
+ add_common_info_to_list(formats, 'video_%s_', 'format_id', f)
+
+ thumbnails = []
+ for t in ('quad16', 'quad50', 'quad75', 'quad100', 'small', 'portrait', 'standard', 'medium', 'large', 'original'):
+ add_common_info_to_list(thumbnails, '%s_', 'id', t)
+
+ return {
+ 'id': photo_id,
+ 'title': title,
+ 'timestamp': int_or_none(photo_data.get('creation_date_epoch')),
+ 'duration': int_or_none(photo_data.get('video_length')),
+ 'view_count': int_or_none(photo_data.get('view_count')),
+ 'comment_count': int_or_none(photo_data.get('number_of_comments')),
+ 'uploader_id': photo_data.get('user_id'),
+ 'uploader': photo_data.get('display_name'),
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/twitcasting.py b/yt_dlp/extractor/twitcasting.py
new file mode 100644
index 0000000..28ea16c
--- /dev/null
+++ b/yt_dlp/extractor/twitcasting.py
@@ -0,0 +1,306 @@
+import base64
+import itertools
+import re
+
+from .common import InfoExtractor
+from ..dependencies import websockets
+from ..utils import (
+ ExtractorError,
+ UserNotLive,
+ clean_html,
+ float_or_none,
+ get_element_by_class,
+ get_element_by_id,
+ parse_duration,
+ qualities,
+ str_to_int,
+ traverse_obj,
+ try_get,
+ unified_timestamp,
+ urlencode_postdata,
+ urljoin,
+)
+
+
+class TwitCastingIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[^/?#]+\.)?twitcasting\.tv/(?P<uploader_id>[^/?#]+)/(?:movie|twplayer)/(?P<id>\d+)'
+ _M3U8_HEADERS = {
+ 'Origin': 'https://twitcasting.tv',
+ 'Referer': 'https://twitcasting.tv/',
+ }
+ _TESTS = [{
+ 'url': 'https://twitcasting.tv/ivetesangalo/movie/2357609',
+ 'md5': '745243cad58c4681dc752490f7540d7f',
+ 'info_dict': {
+ 'id': '2357609',
+ 'ext': 'mp4',
+ 'title': 'Live #2357609',
+ 'uploader_id': 'ivetesangalo',
+ 'description': 'Twitter Oficial da cantora brasileira Ivete Sangalo.',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20110822',
+ 'timestamp': 1313978424,
+ 'duration': 32,
+ 'view_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://twitcasting.tv/mttbernardini/movie/3689740',
+ 'info_dict': {
+ 'id': '3689740',
+ 'ext': 'mp4',
+ 'title': 'Live playing something #3689740',
+ 'uploader_id': 'mttbernardini',
+ 'description': 'md5:1dc7efa2f1ab932fcd119265cebeec69',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20120211',
+ 'timestamp': 1328995624,
+ 'duration': 681,
+ 'view_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ 'videopassword': 'abc',
+ },
+ }, {
+ 'url': 'https://twitcasting.tv/loft_heaven/movie/685979292',
+ 'info_dict': {
+ 'id': '685979292',
+ 'ext': 'mp4',
+ 'title': '【無料配信】南波一海のhear/here “ナタリー望月哲さんに聞く編集と「渋谷系狂騒曲」”',
+ 'uploader_id': 'loft_heaven',
+ 'description': 'md5:3a0c7b53019df987ce545c935538bacf',
+ 'upload_date': '20210604',
+ 'timestamp': 1622802114,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 6964,
+ 'view_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ def _parse_data_movie_playlist(self, dmp, video_id):
+ # attempt 1: parse as JSON directly
+ try:
+ return self._parse_json(dmp, video_id)
+ except ExtractorError:
+ pass
+ # attempt 2: decode reversed base64
+ decoded = base64.b64decode(dmp[::-1])
+ return self._parse_json(decoded, video_id)
+
+ def _real_extract(self, url):
+ uploader_id, video_id = self._match_valid_url(url).groups()
+
+ webpage, urlh = self._download_webpage_handle(url, video_id)
+ video_password = self.get_param('videopassword')
+ request_data = None
+ if video_password:
+ request_data = urlencode_postdata({
+ 'password': video_password,
+ **self._hidden_inputs(webpage),
+ }, encoding='utf-8')
+ webpage, urlh = self._download_webpage_handle(
+ url, video_id, data=request_data,
+ headers={'Origin': 'https://twitcasting.tv'},
+ note='Trying video password')
+ if urlh.url != url and request_data:
+ webpage = self._download_webpage(
+ urlh.url, video_id, data=request_data,
+ headers={'Origin': 'https://twitcasting.tv'},
+ note='Retrying authentication')
+ # has to check here as the first request can contain password input form even if the password is correct
+ if re.search(r'<form\s+method="POST">\s*<input\s+[^>]+?name="password"', webpage):
+ raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
+
+ title = (clean_html(get_element_by_id('movietitle', webpage))
+ or self._html_search_meta(['og:title', 'twitter:title'], webpage, fatal=True))
+
+ video_js_data = try_get(
+ webpage,
+ lambda x: self._parse_data_movie_playlist(self._search_regex(
+ r'data-movie-playlist=\'([^\']+?)\'',
+ x, 'movie playlist', default=None), video_id)['2'], list)
+
+ thumbnail = traverse_obj(video_js_data, (0, 'thumbnailUrl')) or self._og_search_thumbnail(webpage)
+ description = clean_html(get_element_by_id(
+ 'authorcomment', webpage)) or self._html_search_meta(
+ ['description', 'og:description', 'twitter:description'], webpage)
+ duration = (try_get(video_js_data, lambda x: sum(float_or_none(y.get('duration')) for y in x) / 1000)
+ or parse_duration(clean_html(get_element_by_class('tw-player-duration-time', webpage))))
+ view_count = str_to_int(self._search_regex(
+ (r'Total\s*:\s*Views\s*([\d,]+)', r'総視聴者\s*:\s*([\d,]+)\s*</'), webpage, 'views', None))
+ timestamp = unified_timestamp(self._search_regex(
+ r'data-toggle="true"[^>]+datetime="([^"]+)"',
+ webpage, 'datetime', None))
+
+ stream_server_data = self._download_json(
+ 'https://twitcasting.tv/streamserver.php?target=%s&mode=client' % uploader_id, video_id,
+ 'Downloading live info', fatal=False)
+
+ is_live = any(f'data-{x}' in webpage for x in ['is-onlive="true"', 'live-type="live"', 'status="online"'])
+ if not traverse_obj(stream_server_data, 'llfmp4') and is_live:
+ self.raise_login_required(method='cookies')
+
+ base_dict = {
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'uploader_id': uploader_id,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'is_live': is_live,
+ }
+
+ def find_dmu(x):
+ data_movie_url = self._search_regex(
+ r'data-movie-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
+ x, 'm3u8 url', group='url', default=None)
+ if data_movie_url:
+ return [data_movie_url]
+
+ m3u8_urls = (try_get(webpage, find_dmu, list)
+ or traverse_obj(video_js_data, (..., 'source', 'url'))
+ or ([f'https://twitcasting.tv/{uploader_id}/metastream.m3u8'] if is_live else None))
+ if not m3u8_urls:
+ raise ExtractorError('Failed to get m3u8 playlist')
+
+ if is_live:
+ m3u8_url = m3u8_urls[0]
+ formats = self._extract_m3u8_formats(
+ m3u8_url, video_id, ext='mp4', m3u8_id='hls',
+ live=True, headers=self._M3U8_HEADERS)
+
+ if traverse_obj(stream_server_data, ('hls', 'source')):
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, ext='mp4', m3u8_id='source',
+ live=True, query={'mode': 'source'},
+ note='Downloading source quality m3u8',
+ headers=self._M3U8_HEADERS, fatal=False))
+
+ if websockets:
+ qq = qualities(['base', 'mobilesource', 'main'])
+ streams = traverse_obj(stream_server_data, ('llfmp4', 'streams')) or {}
+ for mode, ws_url in streams.items():
+ formats.append({
+ 'url': ws_url,
+ 'format_id': 'ws-%s' % mode,
+ 'ext': 'mp4',
+ 'quality': qq(mode),
+ 'source_preference': -10,
+ # TwitCasting simply sends moof atom directly over WS
+ 'protocol': 'websocket_frag',
+ })
+
+ infodict = {
+ 'formats': formats,
+ '_format_sort_fields': ('source', ),
+ }
+ elif len(m3u8_urls) == 1:
+ formats = self._extract_m3u8_formats(
+ m3u8_urls[0], video_id, 'mp4', headers=self._M3U8_HEADERS)
+ infodict = {
+ # No problem here since there's only one manifest
+ 'formats': formats,
+ 'http_headers': self._M3U8_HEADERS,
+ }
+ else:
+ infodict = {
+ '_type': 'multi_video',
+ 'entries': [{
+ 'id': f'{video_id}-{num}',
+ 'url': m3u8_url,
+ 'ext': 'mp4',
+ # Requesting the manifests here will cause download to fail.
+ # So use ffmpeg instead. See: https://github.com/yt-dlp/yt-dlp/issues/382
+ 'protocol': 'm3u8',
+ 'http_headers': self._M3U8_HEADERS,
+ **base_dict,
+ } for (num, m3u8_url) in enumerate(m3u8_urls)],
+ }
+
+ return {
+ 'id': video_id,
+ **base_dict,
+ **infodict,
+ }
+
+
+class TwitCastingLiveIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[^/?#]+\.)?twitcasting\.tv/(?P<id>[^/?#]+)/?(?:[#?]|$)'
+ _TESTS = [{
+ 'url': 'https://twitcasting.tv/ivetesangalo',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://twitcasting.tv/c:unusedlive',
+ 'expected_exception': 'UserNotLive',
+ }]
+
+ def _real_extract(self, url):
+ uploader_id = self._match_id(url)
+ self.to_screen(
+ 'Downloading live video of user {0}. '
+ 'Pass "https://twitcasting.tv/{0}/show" to download the history'.format(uploader_id))
+
+ is_live = traverse_obj(self._download_json(
+ f'https://frontendapi.twitcasting.tv/watch/user/{uploader_id}',
+ uploader_id, 'Checking live status', data=b'', fatal=False), ('is_live', {bool}))
+ if is_live is False: # only raise here if API response was as expected
+ raise UserNotLive(video_id=uploader_id)
+
+ # Use /show/ page so that password-protected and members-only livestreams can be found
+ webpage = self._download_webpage(
+ f'https://twitcasting.tv/{uploader_id}/show/', uploader_id, 'Downloading live history')
+ is_live = is_live or self._search_regex(
+ r'(?s)(<span\s*class="tw-movie-thumbnail2-badge"\s*data-status="live">\s*LIVE)',
+ webpage, 'is live?', default=False)
+ # Current live is always the first match
+ current_live = self._search_regex(
+ r'(?s)<a\s+class="tw-movie-thumbnail2"\s+href="/[^/"]+/movie/(?P<video_id>\d+)"',
+ webpage, 'current live ID', default=None, group='video_id')
+ if not is_live or not current_live:
+ raise UserNotLive(video_id=uploader_id)
+
+ return self.url_result(f'https://twitcasting.tv/{uploader_id}/movie/{current_live}', TwitCastingIE)
+
+
+class TwitCastingUserIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[^/?#]+\.)?twitcasting\.tv/(?P<id>[^/?#]+)/(:?show|archive)/?(?:[#?]|$)'
+ _TESTS = [{
+ 'url': 'https://twitcasting.tv/natsuiromatsuri/archive/',
+ 'info_dict': {
+ 'id': 'natsuiromatsuri',
+ 'title': 'natsuiromatsuri - Live History',
+ },
+ 'playlist_mincount': 235,
+ }, {
+ 'url': 'https://twitcasting.tv/noriyukicas/show',
+ 'only_matching': True,
+ }]
+
+ def _entries(self, uploader_id):
+ base_url = next_url = 'https://twitcasting.tv/%s/show' % uploader_id
+ for page_num in itertools.count(1):
+ webpage = self._download_webpage(
+ next_url, uploader_id, query={'filter': 'watchable'}, note='Downloading page %d' % page_num)
+ matches = re.finditer(
+ r'(?s)<a\s+class="tw-movie-thumbnail2"\s+href="(?P<url>/[^/"]+/movie/\d+)"', webpage)
+ for mobj in matches:
+ yield self.url_result(urljoin(base_url, mobj.group('url')))
+
+ next_url = self._search_regex(
+ r'<a href="(/%s/show/%d-\d+)[?"]' % (re.escape(uploader_id), page_num),
+ webpage, 'next url', default=None)
+ next_url = urljoin(base_url, next_url)
+ if not next_url:
+ return
+
+ def _real_extract(self, url):
+ uploader_id = self._match_id(url)
+ return self.playlist_result(
+ self._entries(uploader_id), uploader_id, '%s - Live History' % uploader_id)
diff --git a/yt_dlp/extractor/twitch.py b/yt_dlp/extractor/twitch.py
new file mode 100644
index 0000000..c55786a
--- /dev/null
+++ b/yt_dlp/extractor/twitch.py
@@ -0,0 +1,1211 @@
+import collections
+import itertools
+import json
+import random
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_parse_qs,
+ compat_str,
+ compat_urllib_parse_urlparse,
+)
+from ..utils import (
+ ExtractorError,
+ UserNotLive,
+ base_url,
+ clean_html,
+ dict_get,
+ float_or_none,
+ int_or_none,
+ make_archive_id,
+ parse_duration,
+ parse_iso8601,
+ parse_qs,
+ qualities,
+ str_or_none,
+ traverse_obj,
+ try_get,
+ unified_timestamp,
+ update_url_query,
+ url_or_none,
+ urljoin,
+)
+
+
+class TwitchBaseIE(InfoExtractor):
+ _VALID_URL_BASE = r'https?://(?:(?:www|go|m)\.)?twitch\.tv'
+
+ _API_BASE = 'https://api.twitch.tv'
+ _USHER_BASE = 'https://usher.ttvnw.net'
+ _LOGIN_FORM_URL = 'https://www.twitch.tv/login'
+ _LOGIN_POST_URL = 'https://passport.twitch.tv/login'
+ _NETRC_MACHINE = 'twitch'
+
+ _OPERATION_HASHES = {
+ 'CollectionSideBar': '27111f1b382effad0b6def325caef1909c733fe6a4fbabf54f8d491ef2cf2f14',
+ 'FilterableVideoTower_Videos': 'a937f1d22e269e39a03b509f65a7490f9fc247d7f83d6ac1421523e3b68042cb',
+ 'ClipsCards__User': 'b73ad2bfaecfd30a9e6c28fada15bd97032c83ec77a0440766a56fe0bd632777',
+ 'ChannelCollectionsContent': '447aec6a0cc1e8d0a8d7732d47eb0762c336a2294fdb009e9c9d854e49d484b9',
+ 'StreamMetadata': 'a647c2a13599e5991e175155f798ca7f1ecddde73f7f341f39009c14dbf59962',
+ 'ComscoreStreamingQuery': 'e1edae8122517d013405f237ffcc124515dc6ded82480a88daef69c83b53ac01',
+ 'VideoAccessToken_Clip': '36b89d2507fce29e5ca551df756d27c1cfe079e2609642b4390aa4c35796eb11',
+ 'VideoPreviewOverlay': '3006e77e51b128d838fa4e835723ca4dc9a05c5efd4466c1085215c6e437e65c',
+ 'VideoMetadata': '49b5b8f268cdeb259d75b58dcb0c1a748e3b575003448a2333dc5cdafd49adad',
+ 'VideoPlayer_ChapterSelectButtonVideo': '8d2793384aac3773beab5e59bd5d6f585aedb923d292800119e03d40cd0f9b41',
+ 'VideoPlayer_VODSeekbarPreviewVideo': '07e99e4d56c5a7c67117a154777b0baf85a5ffefa393b213f4bc712ccaf85dd6',
+ }
+
+ @property
+ def _CLIENT_ID(self):
+ return self._configuration_arg(
+ 'client_id', ['ue6666qo983tsx6so1t0vnawi233wa'], ie_key='Twitch', casesense=True)[0]
+
+ def _perform_login(self, username, password):
+ def fail(message):
+ raise ExtractorError(
+ 'Unable to login. Twitch said: %s' % message, expected=True)
+
+ def login_step(page, urlh, note, data):
+ form = self._hidden_inputs(page)
+ form.update(data)
+
+ page_url = urlh.url
+ post_url = self._search_regex(
+ r'<form[^>]+action=(["\'])(?P<url>.+?)\1', page,
+ 'post url', default=self._LOGIN_POST_URL, group='url')
+ post_url = urljoin(page_url, post_url)
+
+ headers = {
+ 'Referer': page_url,
+ 'Origin': 'https://www.twitch.tv',
+ 'Content-Type': 'text/plain;charset=UTF-8',
+ }
+
+ response = self._download_json(
+ post_url, None, note, data=json.dumps(form).encode(),
+ headers=headers, expected_status=400)
+ error = dict_get(response, ('error', 'error_description', 'error_code'))
+ if error:
+ fail(error)
+
+ if 'Authenticated successfully' in response.get('message', ''):
+ return None, None
+
+ redirect_url = urljoin(
+ post_url,
+ response.get('redirect') or response['redirect_path'])
+ return self._download_webpage_handle(
+ redirect_url, None, 'Downloading login redirect page',
+ headers=headers)
+
+ login_page, handle = self._download_webpage_handle(
+ self._LOGIN_FORM_URL, None, 'Downloading login page')
+
+ # Some TOR nodes and public proxies are blocked completely
+ if 'blacklist_message' in login_page:
+ fail(clean_html(login_page))
+
+ redirect_page, handle = login_step(
+ login_page, handle, 'Logging in', {
+ 'username': username,
+ 'password': password,
+ 'client_id': self._CLIENT_ID,
+ })
+
+ # Successful login
+ if not redirect_page:
+ return
+
+ if re.search(r'(?i)<form[^>]+id="two-factor-submit"', redirect_page) is not None:
+ # TODO: Add mechanism to request an SMS or phone call
+ tfa_token = self._get_tfa_info('two-factor authentication token')
+ login_step(redirect_page, handle, 'Submitting TFA token', {
+ 'authy_token': tfa_token,
+ 'remember_2fa': 'true',
+ })
+
+ def _prefer_source(self, formats):
+ try:
+ source = next(f for f in formats if f['format_id'] == 'Source')
+ source['quality'] = 10
+ except StopIteration:
+ for f in formats:
+ if '/chunked/' in f['url']:
+ f.update({
+ 'quality': 10,
+ 'format_note': 'Source',
+ })
+
+ def _download_base_gql(self, video_id, ops, note, fatal=True):
+ headers = {
+ 'Content-Type': 'text/plain;charset=UTF-8',
+ 'Client-ID': self._CLIENT_ID,
+ }
+ gql_auth = self._get_cookies('https://gql.twitch.tv').get('auth-token')
+ if gql_auth:
+ headers['Authorization'] = 'OAuth ' + gql_auth.value
+ return self._download_json(
+ 'https://gql.twitch.tv/gql', video_id, note,
+ data=json.dumps(ops).encode(),
+ headers=headers, fatal=fatal)
+
+ def _download_gql(self, video_id, ops, note, fatal=True):
+ for op in ops:
+ op['extensions'] = {
+ 'persistedQuery': {
+ 'version': 1,
+ 'sha256Hash': self._OPERATION_HASHES[op['operationName']],
+ }
+ }
+ return self._download_base_gql(video_id, ops, note)
+
+ def _download_access_token(self, video_id, token_kind, param_name):
+ method = '%sPlaybackAccessToken' % token_kind
+ ops = {
+ 'query': '''{
+ %s(
+ %s: "%s",
+ params: {
+ platform: "web",
+ playerBackend: "mediaplayer",
+ playerType: "site"
+ }
+ )
+ {
+ value
+ signature
+ }
+ }''' % (method, param_name, video_id),
+ }
+ return self._download_base_gql(
+ video_id, ops,
+ 'Downloading %s access token GraphQL' % token_kind)['data'][method]
+
+ def _get_thumbnails(self, thumbnail):
+ return [{
+ 'url': re.sub(r'\d+x\d+(\.\w+)($|(?=[?#]))', r'0x0\g<1>', thumbnail),
+ 'preference': 1,
+ }, {
+ 'url': thumbnail,
+ }] if thumbnail else None
+
+ def _extract_twitch_m3u8_formats(self, path, video_id, token, signature):
+ return self._extract_m3u8_formats(
+ f'{self._USHER_BASE}/{path}/{video_id}.m3u8', video_id, 'mp4', query={
+ 'allow_source': 'true',
+ 'allow_audio_only': 'true',
+ 'allow_spectre': 'true',
+ 'p': random.randint(1000000, 10000000),
+ 'player': 'twitchweb',
+ 'playlist_include_framerate': 'true',
+ 'sig': signature,
+ 'token': token,
+ })
+
+
+class TwitchVodIE(TwitchBaseIE):
+ IE_NAME = 'twitch:vod'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:(?:www|go|m)\.)?twitch\.tv/(?:[^/]+/v(?:ideo)?|videos)/|
+ player\.twitch\.tv/\?.*?\bvideo=v?|
+ www\.twitch\.tv/[^/]+/schedule\?vodID=
+ )
+ (?P<id>\d+)
+ '''
+
+ _TESTS = [{
+ 'url': 'http://www.twitch.tv/riotgames/v/6528877?t=5m10s',
+ 'info_dict': {
+ 'id': 'v6528877',
+ 'ext': 'mp4',
+ 'title': 'LCK Summer Split - Week 6 Day 1',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 17208,
+ 'timestamp': 1435131734,
+ 'upload_date': '20150624',
+ 'uploader': 'Riot Games',
+ 'uploader_id': 'riotgames',
+ 'view_count': int,
+ 'start_time': 310,
+ 'chapters': [
+ {
+ 'start_time': 0,
+ 'end_time': 17208,
+ 'title': 'League of Legends'
+ }
+ ],
+ 'live_status': 'was_live',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ # Untitled broadcast (title is None)
+ 'url': 'http://www.twitch.tv/belkao_o/v/11230755',
+ 'info_dict': {
+ 'id': 'v11230755',
+ 'ext': 'mp4',
+ 'title': 'Untitled Broadcast',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 1638,
+ 'timestamp': 1439746708,
+ 'upload_date': '20150816',
+ 'uploader': 'BelkAO_o',
+ 'uploader_id': 'belkao_o',
+ 'view_count': int,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'skip': 'HTTP Error 404: Not Found',
+ }, {
+ 'url': 'http://player.twitch.tv/?t=5m10s&video=v6528877',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.twitch.tv/videos/6528877',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://m.twitch.tv/beagsandjam/v/247478721',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.twitch.tv/northernlion/video/291940395',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://player.twitch.tv/?video=480452374',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.twitch.tv/videos/635475444',
+ 'info_dict': {
+ 'id': 'v635475444',
+ 'ext': 'mp4',
+ 'title': 'Riot Games',
+ 'duration': 11643,
+ 'uploader': 'Riot Games',
+ 'uploader_id': 'riotgames',
+ 'timestamp': 1590770569,
+ 'upload_date': '20200529',
+ 'chapters': [
+ {
+ 'start_time': 0,
+ 'end_time': 573,
+ 'title': 'League of Legends'
+ },
+ {
+ 'start_time': 573,
+ 'end_time': 3922,
+ 'title': 'Legends of Runeterra'
+ },
+ {
+ 'start_time': 3922,
+ 'end_time': 11643,
+ 'title': 'Art'
+ }
+ ],
+ 'live_status': 'was_live',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'view_count': int,
+ },
+ 'params': {
+ 'skip_download': True
+ },
+ }, {
+ 'note': 'Storyboards',
+ 'url': 'https://www.twitch.tv/videos/635475444',
+ 'info_dict': {
+ 'id': 'v635475444',
+ 'format_id': 'sb0',
+ 'ext': 'mhtml',
+ 'title': 'Riot Games',
+ 'duration': 11643,
+ 'uploader': 'Riot Games',
+ 'uploader_id': 'riotgames',
+ 'timestamp': 1590770569,
+ 'upload_date': '20200529',
+ 'chapters': [
+ {
+ 'start_time': 0,
+ 'end_time': 573,
+ 'title': 'League of Legends'
+ },
+ {
+ 'start_time': 573,
+ 'end_time': 3922,
+ 'title': 'Legends of Runeterra'
+ },
+ {
+ 'start_time': 3922,
+ 'end_time': 11643,
+ 'title': 'Art'
+ }
+ ],
+ 'live_status': 'was_live',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'view_count': int,
+ 'columns': int,
+ 'rows': int,
+ },
+ 'params': {
+ 'format': 'mhtml',
+ 'skip_download': True
+ }
+ }, {
+ 'note': 'VOD with single chapter',
+ 'url': 'https://www.twitch.tv/videos/1536751224',
+ 'info_dict': {
+ 'id': 'v1536751224',
+ 'ext': 'mp4',
+ 'title': 'Porter Robinson Star Guardian Stream Tour with LilyPichu',
+ 'duration': 8353,
+ 'uploader': 'Riot Games',
+ 'uploader_id': 'riotgames',
+ 'timestamp': 1658267731,
+ 'upload_date': '20220719',
+ 'chapters': [
+ {
+ 'start_time': 0,
+ 'end_time': 8353,
+ 'title': 'League of Legends'
+ }
+ ],
+ 'live_status': 'was_live',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'view_count': int,
+ },
+ 'params': {
+ 'skip_download': True
+ },
+ 'expected_warnings': ['Unable to download JSON metadata: HTTP Error 403: Forbidden']
+ }, {
+ 'url': 'https://www.twitch.tv/tangotek/schedule?vodID=1822395420',
+ 'only_matching': True,
+ }]
+
+ def _download_info(self, item_id):
+ data = self._download_gql(
+ item_id, [{
+ 'operationName': 'VideoMetadata',
+ 'variables': {
+ 'channelLogin': '',
+ 'videoID': item_id,
+ },
+ }, {
+ 'operationName': 'VideoPlayer_ChapterSelectButtonVideo',
+ 'variables': {
+ 'includePrivate': False,
+ 'videoID': item_id,
+ },
+ }, {
+ 'operationName': 'VideoPlayer_VODSeekbarPreviewVideo',
+ 'variables': {
+ 'includePrivate': False,
+ 'videoID': item_id,
+ },
+ }],
+ 'Downloading stream metadata GraphQL')
+
+ video = traverse_obj(data, (..., 'data', 'video'), get_all=False)
+ if video is None:
+ raise ExtractorError(f'Video {item_id} does not exist', expected=True)
+
+ video['moments'] = traverse_obj(data, (..., 'data', 'video', 'moments', 'edges', ..., 'node'))
+ video['storyboard'] = traverse_obj(
+ data, (..., 'data', 'video', 'seekPreviewsURL', {url_or_none}), get_all=False)
+
+ return video
+
+ def _extract_info(self, info):
+ status = info.get('status')
+ if status == 'recording':
+ is_live = True
+ elif status == 'recorded':
+ is_live = False
+ else:
+ is_live = None
+ _QUALITIES = ('small', 'medium', 'large')
+ quality_key = qualities(_QUALITIES)
+ thumbnails = []
+ preview = info.get('preview')
+ if isinstance(preview, dict):
+ for thumbnail_id, thumbnail_url in preview.items():
+ thumbnail_url = url_or_none(thumbnail_url)
+ if not thumbnail_url:
+ continue
+ if thumbnail_id not in _QUALITIES:
+ continue
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'preference': quality_key(thumbnail_id),
+ })
+ return {
+ 'id': info['_id'],
+ 'title': info.get('title') or 'Untitled Broadcast',
+ 'description': info.get('description'),
+ 'duration': int_or_none(info.get('length')),
+ 'thumbnails': thumbnails,
+ 'uploader': info.get('channel', {}).get('display_name'),
+ 'uploader_id': info.get('channel', {}).get('name'),
+ 'timestamp': parse_iso8601(info.get('recorded_at')),
+ 'view_count': int_or_none(info.get('views')),
+ 'is_live': is_live,
+ 'was_live': True,
+ }
+
+ def _extract_chapters(self, info, item_id):
+ if not info.get('moments'):
+ game = traverse_obj(info, ('game', 'displayName'))
+ if game:
+ yield {'title': game}
+ return
+
+ for moment in info['moments']:
+ start_time = int_or_none(moment.get('positionMilliseconds'), 1000)
+ duration = int_or_none(moment.get('durationMilliseconds'), 1000)
+ name = str_or_none(moment.get('description'))
+
+ if start_time is None or duration is None:
+ self.report_warning(f'Important chapter information missing for chapter {name}', item_id)
+ continue
+ yield {
+ 'start_time': start_time,
+ 'end_time': start_time + duration,
+ 'title': name,
+ }
+
+ def _extract_info_gql(self, info, item_id):
+ vod_id = info.get('id') or item_id
+ # id backward compatibility for download archives
+ if vod_id[0] != 'v':
+ vod_id = 'v%s' % vod_id
+ thumbnail = url_or_none(info.get('previewThumbnailURL'))
+ is_live = None
+ if thumbnail:
+ if re.findall(r'/404_processing_[^.?#]+\.png', thumbnail):
+ is_live, thumbnail = True, None
+ else:
+ is_live = False
+
+ return {
+ 'id': vod_id,
+ 'title': info.get('title') or 'Untitled Broadcast',
+ 'description': info.get('description'),
+ 'duration': int_or_none(info.get('lengthSeconds')),
+ 'thumbnails': self._get_thumbnails(thumbnail),
+ 'uploader': try_get(info, lambda x: x['owner']['displayName'], compat_str),
+ 'uploader_id': try_get(info, lambda x: x['owner']['login'], compat_str),
+ 'timestamp': unified_timestamp(info.get('publishedAt')),
+ 'view_count': int_or_none(info.get('viewCount')),
+ 'chapters': list(self._extract_chapters(info, item_id)),
+ 'is_live': is_live,
+ 'was_live': True,
+ }
+
+ def _extract_storyboard(self, item_id, storyboard_json_url, duration):
+ if not duration or not storyboard_json_url:
+ return
+ spec = self._download_json(storyboard_json_url, item_id, 'Downloading storyboard metadata JSON', fatal=False) or []
+ # sort from highest quality to lowest
+ # This makes sb0 the highest-quality format, sb1 - lower, etc which is consistent with youtube sb ordering
+ spec.sort(key=lambda x: int_or_none(x.get('width')) or 0, reverse=True)
+ base = base_url(storyboard_json_url)
+ for i, s in enumerate(spec):
+ count = int_or_none(s.get('count'))
+ images = s.get('images')
+ if not (images and count):
+ continue
+ fragment_duration = duration / len(images)
+ yield {
+ 'format_id': f'sb{i}',
+ 'format_note': 'storyboard',
+ 'ext': 'mhtml',
+ 'protocol': 'mhtml',
+ 'acodec': 'none',
+ 'vcodec': 'none',
+ 'url': urljoin(base, images[0]),
+ 'width': int_or_none(s.get('width')),
+ 'height': int_or_none(s.get('height')),
+ 'fps': count / duration,
+ 'rows': int_or_none(s.get('rows')),
+ 'columns': int_or_none(s.get('cols')),
+ 'fragments': [{
+ 'url': urljoin(base, path),
+ 'duration': fragment_duration,
+ } for path in images],
+ }
+
+ def _real_extract(self, url):
+ vod_id = self._match_id(url)
+
+ video = self._download_info(vod_id)
+ info = self._extract_info_gql(video, vod_id)
+ access_token = self._download_access_token(vod_id, 'video', 'id')
+
+ formats = self._extract_twitch_m3u8_formats(
+ 'vod', vod_id, access_token['value'], access_token['signature'])
+ formats.extend(self._extract_storyboard(vod_id, video.get('storyboard'), info.get('duration')))
+
+ self._prefer_source(formats)
+ info['formats'] = formats
+
+ parsed_url = compat_urllib_parse_urlparse(url)
+ query = compat_parse_qs(parsed_url.query)
+ if 't' in query:
+ info['start_time'] = parse_duration(query['t'][0])
+
+ if info.get('timestamp') is not None:
+ info['subtitles'] = {
+ 'rechat': [{
+ 'url': update_url_query(
+ 'https://api.twitch.tv/v5/videos/%s/comments' % vod_id, {
+ 'client_id': self._CLIENT_ID,
+ }),
+ 'ext': 'json',
+ }],
+ }
+
+ return info
+
+
+def _make_video_result(node):
+ assert isinstance(node, dict)
+ video_id = node.get('id')
+ if not video_id:
+ return
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': TwitchVodIE.ie_key(),
+ 'id': 'v' + video_id,
+ 'url': 'https://www.twitch.tv/videos/%s' % video_id,
+ 'title': node.get('title'),
+ 'thumbnail': node.get('previewThumbnailURL'),
+ 'duration': float_or_none(node.get('lengthSeconds')),
+ 'view_count': int_or_none(node.get('viewCount')),
+ }
+
+
+class TwitchCollectionIE(TwitchBaseIE):
+ _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/collections/(?P<id>[^/]+)'
+
+ _TESTS = [{
+ 'url': 'https://www.twitch.tv/collections/wlDCoH0zEBZZbQ',
+ 'info_dict': {
+ 'id': 'wlDCoH0zEBZZbQ',
+ 'title': 'Overthrow Nook, capitalism for children',
+ },
+ 'playlist_mincount': 13,
+ }]
+
+ _OPERATION_NAME = 'CollectionSideBar'
+
+ def _real_extract(self, url):
+ collection_id = self._match_id(url)
+ collection = self._download_gql(
+ collection_id, [{
+ 'operationName': self._OPERATION_NAME,
+ 'variables': {'collectionID': collection_id},
+ }],
+ 'Downloading collection GraphQL')[0]['data']['collection']
+ title = collection.get('title')
+ entries = []
+ for edge in collection['items']['edges']:
+ if not isinstance(edge, dict):
+ continue
+ node = edge.get('node')
+ if not isinstance(node, dict):
+ continue
+ video = _make_video_result(node)
+ if video:
+ entries.append(video)
+ return self.playlist_result(
+ entries, playlist_id=collection_id, playlist_title=title)
+
+
+class TwitchPlaylistBaseIE(TwitchBaseIE):
+ _PAGE_LIMIT = 100
+
+ def _entries(self, channel_name, *args):
+ cursor = None
+ variables_common = self._make_variables(channel_name, *args)
+ entries_key = '%ss' % self._ENTRY_KIND
+ for page_num in itertools.count(1):
+ variables = variables_common.copy()
+ variables['limit'] = self._PAGE_LIMIT
+ if cursor:
+ variables['cursor'] = cursor
+ page = self._download_gql(
+ channel_name, [{
+ 'operationName': self._OPERATION_NAME,
+ 'variables': variables,
+ }],
+ 'Downloading %ss GraphQL page %s' % (self._NODE_KIND, page_num),
+ fatal=False)
+ if not page:
+ break
+ edges = try_get(
+ page, lambda x: x[0]['data']['user'][entries_key]['edges'], list)
+ if not edges:
+ break
+ for edge in edges:
+ if not isinstance(edge, dict):
+ continue
+ if edge.get('__typename') != self._EDGE_KIND:
+ continue
+ node = edge.get('node')
+ if not isinstance(node, dict):
+ continue
+ if node.get('__typename') != self._NODE_KIND:
+ continue
+ entry = self._extract_entry(node)
+ if entry:
+ cursor = edge.get('cursor')
+ yield entry
+ if not cursor or not isinstance(cursor, compat_str):
+ break
+
+
+class TwitchVideosIE(TwitchPlaylistBaseIE):
+ _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/(?P<id>[^/]+)/(?:videos|profile)'
+
+ _TESTS = [{
+ # All Videos sorted by Date
+ 'url': 'https://www.twitch.tv/spamfish/videos?filter=all',
+ 'info_dict': {
+ 'id': 'spamfish',
+ 'title': 'spamfish - All Videos sorted by Date',
+ },
+ 'playlist_mincount': 924,
+ }, {
+ # All Videos sorted by Popular
+ 'url': 'https://www.twitch.tv/spamfish/videos?filter=all&sort=views',
+ 'info_dict': {
+ 'id': 'spamfish',
+ 'title': 'spamfish - All Videos sorted by Popular',
+ },
+ 'playlist_mincount': 931,
+ }, {
+ # Past Broadcasts sorted by Date
+ 'url': 'https://www.twitch.tv/spamfish/videos?filter=archives',
+ 'info_dict': {
+ 'id': 'spamfish',
+ 'title': 'spamfish - Past Broadcasts sorted by Date',
+ },
+ 'playlist_mincount': 27,
+ }, {
+ # Highlights sorted by Date
+ 'url': 'https://www.twitch.tv/spamfish/videos?filter=highlights',
+ 'info_dict': {
+ 'id': 'spamfish',
+ 'title': 'spamfish - Highlights sorted by Date',
+ },
+ 'playlist_mincount': 901,
+ }, {
+ # Uploads sorted by Date
+ 'url': 'https://www.twitch.tv/esl_csgo/videos?filter=uploads&sort=time',
+ 'info_dict': {
+ 'id': 'esl_csgo',
+ 'title': 'esl_csgo - Uploads sorted by Date',
+ },
+ 'playlist_mincount': 5,
+ }, {
+ # Past Premieres sorted by Date
+ 'url': 'https://www.twitch.tv/spamfish/videos?filter=past_premieres',
+ 'info_dict': {
+ 'id': 'spamfish',
+ 'title': 'spamfish - Past Premieres sorted by Date',
+ },
+ 'playlist_mincount': 1,
+ }, {
+ 'url': 'https://www.twitch.tv/spamfish/videos/all',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://m.twitch.tv/spamfish/videos/all',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.twitch.tv/spamfish/videos',
+ 'only_matching': True,
+ }]
+
+ Broadcast = collections.namedtuple('Broadcast', ['type', 'label'])
+
+ _DEFAULT_BROADCAST = Broadcast(None, 'All Videos')
+ _BROADCASTS = {
+ 'archives': Broadcast('ARCHIVE', 'Past Broadcasts'),
+ 'highlights': Broadcast('HIGHLIGHT', 'Highlights'),
+ 'uploads': Broadcast('UPLOAD', 'Uploads'),
+ 'past_premieres': Broadcast('PAST_PREMIERE', 'Past Premieres'),
+ 'all': _DEFAULT_BROADCAST,
+ }
+
+ _DEFAULT_SORTED_BY = 'Date'
+ _SORTED_BY = {
+ 'time': _DEFAULT_SORTED_BY,
+ 'views': 'Popular',
+ }
+
+ _OPERATION_NAME = 'FilterableVideoTower_Videos'
+ _ENTRY_KIND = 'video'
+ _EDGE_KIND = 'VideoEdge'
+ _NODE_KIND = 'Video'
+
+ @classmethod
+ def suitable(cls, url):
+ return (False
+ if any(ie.suitable(url) for ie in (
+ TwitchVideosClipsIE,
+ TwitchVideosCollectionsIE))
+ else super(TwitchVideosIE, cls).suitable(url))
+
+ @staticmethod
+ def _make_variables(channel_name, broadcast_type, sort):
+ return {
+ 'channelOwnerLogin': channel_name,
+ 'broadcastType': broadcast_type,
+ 'videoSort': sort.upper(),
+ }
+
+ @staticmethod
+ def _extract_entry(node):
+ return _make_video_result(node)
+
+ def _real_extract(self, url):
+ channel_name = self._match_id(url)
+ qs = parse_qs(url)
+ filter = qs.get('filter', ['all'])[0]
+ sort = qs.get('sort', ['time'])[0]
+ broadcast = self._BROADCASTS.get(filter, self._DEFAULT_BROADCAST)
+ return self.playlist_result(
+ self._entries(channel_name, broadcast.type, sort),
+ playlist_id=channel_name,
+ playlist_title='%s - %s sorted by %s'
+ % (channel_name, broadcast.label,
+ self._SORTED_BY.get(sort, self._DEFAULT_SORTED_BY)))
+
+
+class TwitchVideosClipsIE(TwitchPlaylistBaseIE):
+ _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/(?P<id>[^/]+)/(?:clips|videos/*?\?.*?\bfilter=clips)'
+
+ _TESTS = [{
+ # Clips
+ 'url': 'https://www.twitch.tv/vanillatv/clips?filter=clips&range=all',
+ 'info_dict': {
+ 'id': 'vanillatv',
+ 'title': 'vanillatv - Clips Top All',
+ },
+ 'playlist_mincount': 1,
+ }, {
+ 'url': 'https://www.twitch.tv/dota2ruhub/videos?filter=clips&range=7d',
+ 'only_matching': True,
+ }]
+
+ Clip = collections.namedtuple('Clip', ['filter', 'label'])
+
+ _DEFAULT_CLIP = Clip('LAST_WEEK', 'Top 7D')
+ _RANGE = {
+ '24hr': Clip('LAST_DAY', 'Top 24H'),
+ '7d': _DEFAULT_CLIP,
+ '30d': Clip('LAST_MONTH', 'Top 30D'),
+ 'all': Clip('ALL_TIME', 'Top All'),
+ }
+
+ # NB: values other than 20 result in skipped videos
+ _PAGE_LIMIT = 20
+
+ _OPERATION_NAME = 'ClipsCards__User'
+ _ENTRY_KIND = 'clip'
+ _EDGE_KIND = 'ClipEdge'
+ _NODE_KIND = 'Clip'
+
+ @staticmethod
+ def _make_variables(channel_name, filter):
+ return {
+ 'login': channel_name,
+ 'criteria': {
+ 'filter': filter,
+ },
+ }
+
+ @staticmethod
+ def _extract_entry(node):
+ assert isinstance(node, dict)
+ clip_url = url_or_none(node.get('url'))
+ if not clip_url:
+ return
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': TwitchClipsIE.ie_key(),
+ 'id': node.get('id'),
+ 'url': clip_url,
+ 'title': node.get('title'),
+ 'thumbnail': node.get('thumbnailURL'),
+ 'duration': float_or_none(node.get('durationSeconds')),
+ 'timestamp': unified_timestamp(node.get('createdAt')),
+ 'view_count': int_or_none(node.get('viewCount')),
+ 'language': node.get('language'),
+ }
+
+ def _real_extract(self, url):
+ channel_name = self._match_id(url)
+ qs = parse_qs(url)
+ range = qs.get('range', ['7d'])[0]
+ clip = self._RANGE.get(range, self._DEFAULT_CLIP)
+ return self.playlist_result(
+ self._entries(channel_name, clip.filter),
+ playlist_id=channel_name,
+ playlist_title='%s - Clips %s' % (channel_name, clip.label))
+
+
+class TwitchVideosCollectionsIE(TwitchPlaylistBaseIE):
+ _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/(?P<id>[^/]+)/videos/*?\?.*?\bfilter=collections'
+
+ _TESTS = [{
+ # Collections
+ 'url': 'https://www.twitch.tv/spamfish/videos?filter=collections',
+ 'info_dict': {
+ 'id': 'spamfish',
+ 'title': 'spamfish - Collections',
+ },
+ 'playlist_mincount': 3,
+ }, {
+ 'url': 'https://www.twitch.tv/monstercat/videos?filter=collections',
+ 'info_dict': {
+ 'id': 'monstercat',
+ 'title': 'monstercat - Collections',
+ },
+ 'playlist_mincount': 13,
+ }]
+
+ _OPERATION_NAME = 'ChannelCollectionsContent'
+ _ENTRY_KIND = 'collection'
+ _EDGE_KIND = 'CollectionsItemEdge'
+ _NODE_KIND = 'Collection'
+
+ @staticmethod
+ def _make_variables(channel_name):
+ return {
+ 'ownerLogin': channel_name,
+ }
+
+ @staticmethod
+ def _extract_entry(node):
+ assert isinstance(node, dict)
+ collection_id = node.get('id')
+ if not collection_id:
+ return
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': TwitchCollectionIE.ie_key(),
+ 'id': collection_id,
+ 'url': 'https://www.twitch.tv/collections/%s' % collection_id,
+ 'title': node.get('title'),
+ 'thumbnail': node.get('thumbnailURL'),
+ 'duration': float_or_none(node.get('lengthSeconds')),
+ 'timestamp': unified_timestamp(node.get('updatedAt')),
+ 'view_count': int_or_none(node.get('viewCount')),
+ }
+
+ def _real_extract(self, url):
+ channel_name = self._match_id(url)
+ return self.playlist_result(
+ self._entries(channel_name), playlist_id=channel_name,
+ playlist_title='%s - Collections' % channel_name)
+
+
+class TwitchStreamIE(TwitchBaseIE):
+ IE_NAME = 'twitch:stream'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:(?:www|go|m)\.)?twitch\.tv/|
+ player\.twitch\.tv/\?.*?\bchannel=
+ )
+ (?P<id>[^/#?]+)
+ '''
+
+ _TESTS = [{
+ 'url': 'http://www.twitch.tv/shroomztv',
+ 'info_dict': {
+ 'id': '12772022048',
+ 'display_id': 'shroomztv',
+ 'ext': 'mp4',
+ 'title': 're:^ShroomzTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'description': 'H1Z1 - lonewolfing with ShroomzTV | A3 Battle Royale later - @ShroomzTV',
+ 'is_live': True,
+ 'timestamp': 1421928037,
+ 'upload_date': '20150122',
+ 'uploader': 'ShroomzTV',
+ 'uploader_id': 'shroomztv',
+ 'view_count': int,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'skip': 'User does not exist',
+ }, {
+ 'url': 'http://www.twitch.tv/miracle_doto#profile-0',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://player.twitch.tv/?channel=lotsofs',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://go.twitch.tv/food',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://m.twitch.tv/food',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.twitch.tv/monstercat',
+ 'info_dict': {
+ 'id': '40500071752',
+ 'display_id': 'monstercat',
+ 'title': 're:Monstercat',
+ 'description': 'md5:0945ad625e615bc8f0469396537d87d9',
+ 'is_live': True,
+ 'timestamp': 1677107190,
+ 'upload_date': '20230222',
+ 'uploader': 'Monstercat',
+ 'uploader_id': 'monstercat',
+ 'live_status': 'is_live',
+ 'thumbnail': 're:https://.*.jpg',
+ 'ext': 'mp4',
+ },
+ 'params': {
+ 'skip_download': 'Livestream',
+ },
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return (False
+ if any(ie.suitable(url) for ie in (
+ TwitchVodIE,
+ TwitchCollectionIE,
+ TwitchVideosIE,
+ TwitchVideosClipsIE,
+ TwitchVideosCollectionsIE,
+ TwitchClipsIE))
+ else super(TwitchStreamIE, cls).suitable(url))
+
+ def _real_extract(self, url):
+ channel_name = self._match_id(url).lower()
+
+ gql = self._download_gql(
+ channel_name, [{
+ 'operationName': 'StreamMetadata',
+ 'variables': {'channelLogin': channel_name},
+ }, {
+ 'operationName': 'ComscoreStreamingQuery',
+ 'variables': {
+ 'channel': channel_name,
+ 'clipSlug': '',
+ 'isClip': False,
+ 'isLive': True,
+ 'isVodOrCollection': False,
+ 'vodID': '',
+ },
+ }, {
+ 'operationName': 'VideoPreviewOverlay',
+ 'variables': {'login': channel_name},
+ }],
+ 'Downloading stream GraphQL')
+
+ user = gql[0]['data']['user']
+
+ if not user:
+ raise ExtractorError(
+ '%s does not exist' % channel_name, expected=True)
+
+ stream = user['stream']
+
+ if not stream:
+ raise UserNotLive(video_id=channel_name)
+
+ access_token = self._download_access_token(
+ channel_name, 'stream', 'channelName')
+
+ stream_id = stream.get('id') or channel_name
+ formats = self._extract_twitch_m3u8_formats(
+ 'api/channel/hls', channel_name, access_token['value'], access_token['signature'])
+ self._prefer_source(formats)
+
+ view_count = stream.get('viewers')
+ timestamp = unified_timestamp(stream.get('createdAt'))
+
+ sq_user = try_get(gql, lambda x: x[1]['data']['user'], dict) or {}
+ uploader = sq_user.get('displayName')
+ description = try_get(
+ sq_user, lambda x: x['broadcastSettings']['title'], compat_str)
+
+ thumbnail = url_or_none(try_get(
+ gql, lambda x: x[2]['data']['user']['stream']['previewImageURL'],
+ compat_str))
+
+ title = uploader or channel_name
+ stream_type = stream.get('type')
+ if stream_type in ['rerun', 'live']:
+ title += ' (%s)' % stream_type
+
+ return {
+ 'id': stream_id,
+ 'display_id': channel_name,
+ 'title': title,
+ 'description': description,
+ 'thumbnails': self._get_thumbnails(thumbnail),
+ 'uploader': uploader,
+ 'uploader_id': channel_name,
+ 'timestamp': timestamp,
+ 'view_count': view_count,
+ 'formats': formats,
+ 'is_live': stream_type == 'live',
+ }
+
+
+class TwitchClipsIE(TwitchBaseIE):
+ IE_NAME = 'twitch:clips'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ clips\.twitch\.tv/(?:embed\?.*?\bclip=|(?:[^/]+/)*)|
+ (?:(?:www|go|m)\.)?twitch\.tv/(?:[^/]+/)?clip/
+ )
+ (?P<id>[^/?#&]+)
+ '''
+
+ _TESTS = [{
+ 'url': 'https://clips.twitch.tv/FaintLightGullWholeWheat',
+ 'md5': '761769e1eafce0ffebfb4089cb3847cd',
+ 'info_dict': {
+ 'id': '42850523',
+ 'display_id': 'FaintLightGullWholeWheat',
+ 'ext': 'mp4',
+ 'title': 'EA Play 2016 Live from the Novo Theatre',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'timestamp': 1465767393,
+ 'upload_date': '20160612',
+ 'creator': 'EA',
+ 'uploader': 'stereotype_',
+ 'uploader_id': '43566419',
+ },
+ }, {
+ # multiple formats
+ 'url': 'https://clips.twitch.tv/rflegendary/UninterestedBeeDAESuppy',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.twitch.tv/sergeynixon/clip/StormyThankfulSproutFutureMan',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://clips.twitch.tv/embed?clip=InquisitiveBreakableYogurtJebaited',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://m.twitch.tv/rossbroadcast/clip/ConfidentBraveHumanChefFrank',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://go.twitch.tv/rossbroadcast/clip/ConfidentBraveHumanChefFrank',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://m.twitch.tv/clip/FaintLightGullWholeWheat',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ clip = self._download_gql(
+ video_id, [{
+ 'operationName': 'VideoAccessToken_Clip',
+ 'variables': {
+ 'slug': video_id,
+ },
+ }],
+ 'Downloading clip access token GraphQL')[0]['data']['clip']
+
+ if not clip:
+ raise ExtractorError(
+ 'This clip is no longer available', expected=True)
+
+ access_query = {
+ 'sig': clip['playbackAccessToken']['signature'],
+ 'token': clip['playbackAccessToken']['value'],
+ }
+
+ data = self._download_base_gql(
+ video_id, {
+ 'query': '''{
+ clip(slug: "%s") {
+ broadcaster {
+ displayName
+ }
+ createdAt
+ curator {
+ displayName
+ id
+ }
+ durationSeconds
+ id
+ tiny: thumbnailURL(width: 86, height: 45)
+ small: thumbnailURL(width: 260, height: 147)
+ medium: thumbnailURL(width: 480, height: 272)
+ title
+ videoQualities {
+ frameRate
+ quality
+ sourceURL
+ }
+ viewCount
+ }
+}''' % video_id}, 'Downloading clip GraphQL', fatal=False)
+
+ if data:
+ clip = try_get(data, lambda x: x['data']['clip'], dict) or clip
+
+ formats = []
+ for option in clip.get('videoQualities', []):
+ if not isinstance(option, dict):
+ continue
+ source = url_or_none(option.get('sourceURL'))
+ if not source:
+ continue
+ formats.append({
+ 'url': update_url_query(source, access_query),
+ 'format_id': option.get('quality'),
+ 'height': int_or_none(option.get('quality')),
+ 'fps': int_or_none(option.get('frameRate')),
+ })
+
+ thumbnails = []
+ for thumbnail_id in ('tiny', 'small', 'medium'):
+ thumbnail_url = clip.get(thumbnail_id)
+ if not thumbnail_url:
+ continue
+ thumb = {
+ 'id': thumbnail_id,
+ 'url': thumbnail_url,
+ }
+ mobj = re.search(r'-(\d+)x(\d+)\.', thumbnail_url)
+ if mobj:
+ thumb.update({
+ 'height': int(mobj.group(2)),
+ 'width': int(mobj.group(1)),
+ })
+ thumbnails.append(thumb)
+
+ old_id = self._search_regex(r'%7C(\d+)(?:-\d+)?.mp4', formats[-1]['url'], 'old id', default=None)
+
+ return {
+ 'id': clip.get('id') or video_id,
+ '_old_archive_ids': [make_archive_id(self, old_id)] if old_id else None,
+ 'display_id': video_id,
+ 'title': clip.get('title'),
+ 'formats': formats,
+ 'duration': int_or_none(clip.get('durationSeconds')),
+ 'view_count': int_or_none(clip.get('viewCount')),
+ 'timestamp': unified_timestamp(clip.get('createdAt')),
+ 'thumbnails': thumbnails,
+ 'creator': try_get(clip, lambda x: x['broadcaster']['displayName'], compat_str),
+ 'uploader': try_get(clip, lambda x: x['curator']['displayName'], compat_str),
+ 'uploader_id': try_get(clip, lambda x: x['curator']['id'], compat_str),
+ }
diff --git a/yt_dlp/extractor/twitter.py b/yt_dlp/extractor/twitter.py
new file mode 100644
index 0000000..ecc8656
--- /dev/null
+++ b/yt_dlp/extractor/twitter.py
@@ -0,0 +1,1875 @@
+import json
+import random
+import re
+
+from .common import InfoExtractor
+from .periscope import PeriscopeBaseIE, PeriscopeIE
+from ..compat import functools # isort: split
+from ..compat import (
+ compat_parse_qs,
+ compat_urllib_parse_unquote,
+ compat_urllib_parse_urlparse,
+)
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ ExtractorError,
+ dict_get,
+ filter_dict,
+ float_or_none,
+ format_field,
+ int_or_none,
+ make_archive_id,
+ remove_end,
+ str_or_none,
+ strip_or_none,
+ traverse_obj,
+ try_call,
+ try_get,
+ unified_timestamp,
+ update_url_query,
+ url_or_none,
+ xpath_text,
+)
+
+
+class TwitterBaseIE(InfoExtractor):
+ _NETRC_MACHINE = 'twitter'
+ _API_BASE = 'https://api.twitter.com/1.1/'
+ _GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
+ _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
+ _AUTH = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
+ _LEGACY_AUTH = 'AAAAAAAAAAAAAAAAAAAAAIK1zgAAAAAA2tUWuhGZ2JceoId5GwYWU5GspY4%3DUq7gzFoCZs1QfwGoVdvSac3IniczZEYXIcDyumCauIXpcAPorE'
+ _flow_token = None
+
+ _LOGIN_INIT_DATA = json.dumps({
+ 'input_flow_data': {
+ 'flow_context': {
+ 'debug_overrides': {},
+ 'start_location': {
+ 'location': 'unknown'
+ }
+ }
+ },
+ 'subtask_versions': {
+ 'action_list': 2,
+ 'alert_dialog': 1,
+ 'app_download_cta': 1,
+ 'check_logged_in_account': 1,
+ 'choice_selection': 3,
+ 'contacts_live_sync_permission_prompt': 0,
+ 'cta': 7,
+ 'email_verification': 2,
+ 'end_flow': 1,
+ 'enter_date': 1,
+ 'enter_email': 2,
+ 'enter_password': 5,
+ 'enter_phone': 2,
+ 'enter_recaptcha': 1,
+ 'enter_text': 5,
+ 'enter_username': 2,
+ 'generic_urt': 3,
+ 'in_app_notification': 1,
+ 'interest_picker': 3,
+ 'js_instrumentation': 1,
+ 'menu_dialog': 1,
+ 'notifications_permission_prompt': 2,
+ 'open_account': 2,
+ 'open_home_timeline': 1,
+ 'open_link': 1,
+ 'phone_verification': 4,
+ 'privacy_options': 1,
+ 'security_key': 3,
+ 'select_avatar': 4,
+ 'select_banner': 2,
+ 'settings_list': 7,
+ 'show_code': 1,
+ 'sign_up': 2,
+ 'sign_up_review': 4,
+ 'tweet_selection_urt': 1,
+ 'update_users': 1,
+ 'upload_media': 1,
+ 'user_recommendations_list': 4,
+ 'user_recommendations_urt': 1,
+ 'wait_spinner': 3,
+ 'web_modal': 1
+ }
+ }, separators=(',', ':')).encode()
+
+ def _extract_variant_formats(self, variant, video_id):
+ variant_url = variant.get('url')
+ if not variant_url:
+ return [], {}
+ elif '.m3u8' in variant_url:
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ variant_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False)
+ for f in traverse_obj(fmts, lambda _, v: v['vcodec'] == 'none' and v.get('tbr') is None):
+ if mobj := re.match(r'hls-[Aa]udio-(?P<bitrate>\d{4,})', f['format_id']):
+ f['tbr'] = int_or_none(mobj.group('bitrate'), 1000)
+ return fmts, subs
+ else:
+ tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
+ f = {
+ 'url': variant_url,
+ 'format_id': 'http' + ('-%d' % tbr if tbr else ''),
+ 'tbr': tbr,
+ }
+ self._search_dimensions_in_video_url(f, variant_url)
+ return [f], {}
+
+ def _extract_formats_from_vmap_url(self, vmap_url, video_id):
+ vmap_url = url_or_none(vmap_url)
+ if not vmap_url:
+ return [], {}
+ vmap_data = self._download_xml(vmap_url, video_id)
+ formats = []
+ subtitles = {}
+ urls = []
+ for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
+ video_variant.attrib['url'] = compat_urllib_parse_unquote(
+ video_variant.attrib['url'])
+ urls.append(video_variant.attrib['url'])
+ fmts, subs = self._extract_variant_formats(
+ video_variant.attrib, video_id)
+ formats.extend(fmts)
+ subtitles = self._merge_subtitles(subtitles, subs)
+ video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
+ if video_url not in urls:
+ fmts, subs = self._extract_variant_formats({'url': video_url}, video_id)
+ formats.extend(fmts)
+ subtitles = self._merge_subtitles(subtitles, subs)
+ return formats, subtitles
+
+ @staticmethod
+ def _search_dimensions_in_video_url(a_format, video_url):
+ m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
+ if m:
+ a_format.update({
+ 'width': int(m.group('width')),
+ 'height': int(m.group('height')),
+ })
+
+ @property
+ def is_logged_in(self):
+ return bool(self._get_cookies(self._API_BASE).get('auth_token'))
+
+ @functools.cached_property
+ def _selected_api(self):
+ return self._configuration_arg('api', ['graphql'], ie_key='Twitter')[0]
+
+ def _fetch_guest_token(self, display_id):
+ guest_token = traverse_obj(self._download_json(
+ f'{self._API_BASE}guest/activate.json', display_id, 'Downloading guest token', data=b'',
+ headers=self._set_base_headers(legacy=display_id and self._selected_api == 'legacy')),
+ ('guest_token', {str}))
+ if not guest_token:
+ raise ExtractorError('Could not retrieve guest token')
+ return guest_token
+
+ def _set_base_headers(self, legacy=False):
+ bearer_token = self._LEGACY_AUTH if legacy and not self.is_logged_in else self._AUTH
+ return filter_dict({
+ 'Authorization': f'Bearer {bearer_token}',
+ 'x-csrf-token': try_call(lambda: self._get_cookies(self._API_BASE)['ct0'].value),
+ })
+
+ def _call_login_api(self, note, headers, query={}, data=None):
+ response = self._download_json(
+ f'{self._API_BASE}onboarding/task.json', None, note,
+ headers=headers, query=query, data=data, expected_status=400)
+ error = traverse_obj(response, ('errors', 0, 'message', {str}))
+ if error:
+ raise ExtractorError(f'Login failed, Twitter API says: {error}', expected=True)
+ elif traverse_obj(response, 'status') != 'success':
+ raise ExtractorError('Login was unsuccessful')
+
+ subtask = traverse_obj(
+ response, ('subtasks', ..., 'subtask_id', {str}), get_all=False)
+ if not subtask:
+ raise ExtractorError('Twitter API did not return next login subtask')
+
+ self._flow_token = response['flow_token']
+
+ return subtask
+
+ def _perform_login(self, username, password):
+ if self.is_logged_in:
+ return
+
+ webpage = self._download_webpage('https://twitter.com/', None, 'Downloading login page')
+ guest_token = self._search_regex(
+ r'\.cookie\s*=\s*["\']gt=(\d+);', webpage, 'gt', default=None) or self._fetch_guest_token(None)
+ headers = {
+ **self._set_base_headers(),
+ 'content-type': 'application/json',
+ 'x-guest-token': guest_token,
+ 'x-twitter-client-language': 'en',
+ 'x-twitter-active-user': 'yes',
+ 'Referer': 'https://twitter.com/',
+ 'Origin': 'https://twitter.com',
+ }
+
+ def build_login_json(*subtask_inputs):
+ return json.dumps({
+ 'flow_token': self._flow_token,
+ 'subtask_inputs': subtask_inputs
+ }, separators=(',', ':')).encode()
+
+ def input_dict(subtask_id, text):
+ return {
+ 'subtask_id': subtask_id,
+ 'enter_text': {
+ 'text': text,
+ 'link': 'next_link'
+ }
+ }
+
+ next_subtask = self._call_login_api(
+ 'Downloading flow token', headers, query={'flow_name': 'login'}, data=self._LOGIN_INIT_DATA)
+
+ while not self.is_logged_in:
+ if next_subtask == 'LoginJsInstrumentationSubtask':
+ next_subtask = self._call_login_api(
+ 'Submitting JS instrumentation response', headers, data=build_login_json({
+ 'subtask_id': next_subtask,
+ 'js_instrumentation': {
+ 'response': '{}',
+ 'link': 'next_link'
+ }
+ }))
+
+ elif next_subtask == 'LoginEnterUserIdentifierSSO':
+ next_subtask = self._call_login_api(
+ 'Submitting username', headers, data=build_login_json({
+ 'subtask_id': next_subtask,
+ 'settings_list': {
+ 'setting_responses': [{
+ 'key': 'user_identifier',
+ 'response_data': {
+ 'text_data': {
+ 'result': username
+ }
+ }
+ }],
+ 'link': 'next_link'
+ }
+ }))
+
+ elif next_subtask == 'LoginEnterAlternateIdentifierSubtask':
+ next_subtask = self._call_login_api(
+ 'Submitting alternate identifier', headers,
+ data=build_login_json(input_dict(next_subtask, self._get_tfa_info(
+ 'one of username, phone number or email that was not used as --username'))))
+
+ elif next_subtask == 'LoginEnterPassword':
+ next_subtask = self._call_login_api(
+ 'Submitting password', headers, data=build_login_json({
+ 'subtask_id': next_subtask,
+ 'enter_password': {
+ 'password': password,
+ 'link': 'next_link'
+ }
+ }))
+
+ elif next_subtask == 'AccountDuplicationCheck':
+ next_subtask = self._call_login_api(
+ 'Submitting account duplication check', headers, data=build_login_json({
+ 'subtask_id': next_subtask,
+ 'check_logged_in_account': {
+ 'link': 'AccountDuplicationCheck_false'
+ }
+ }))
+
+ elif next_subtask == 'LoginTwoFactorAuthChallenge':
+ next_subtask = self._call_login_api(
+ 'Submitting 2FA token', headers, data=build_login_json(input_dict(
+ next_subtask, self._get_tfa_info('two-factor authentication token'))))
+
+ elif next_subtask == 'LoginAcid':
+ next_subtask = self._call_login_api(
+ 'Submitting confirmation code', headers, data=build_login_json(input_dict(
+ next_subtask, self._get_tfa_info('confirmation code sent to your email or phone'))))
+
+ elif next_subtask == 'ArkoseLogin':
+ self.raise_login_required('Twitter is requiring captcha for this login attempt', method='cookies')
+
+ elif next_subtask == 'DenyLoginSubtask':
+ self.raise_login_required('Twitter rejected this login attempt as suspicious', method='cookies')
+
+ elif next_subtask == 'LoginSuccessSubtask':
+ raise ExtractorError('Twitter API did not grant auth token cookie')
+
+ else:
+ raise ExtractorError(f'Unrecognized subtask ID "{next_subtask}"')
+
+ self.report_login()
+
+ def _call_api(self, path, video_id, query={}, graphql=False):
+ headers = self._set_base_headers(legacy=not graphql and self._selected_api == 'legacy')
+ headers.update({
+ 'x-twitter-auth-type': 'OAuth2Session',
+ 'x-twitter-client-language': 'en',
+ 'x-twitter-active-user': 'yes',
+ } if self.is_logged_in else {
+ 'x-guest-token': self._fetch_guest_token(video_id)
+ })
+ allowed_status = {400, 401, 403, 404} if graphql else {403}
+ result = self._download_json(
+ (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
+ video_id, headers=headers, query=query, expected_status=allowed_status,
+ note=f'Downloading {"GraphQL" if graphql else "legacy API"} JSON')
+
+ if result.get('errors'):
+ errors = ', '.join(set(traverse_obj(result, ('errors', ..., 'message', {str}))))
+ if errors and 'not authorized' in errors:
+ self.raise_login_required(remove_end(errors, '.'))
+ raise ExtractorError(f'Error(s) while querying API: {errors or "Unknown error"}')
+
+ return result
+
+ def _build_graphql_query(self, media_id):
+ raise NotImplementedError('Method must be implemented to support GraphQL')
+
+ def _call_graphql_api(self, endpoint, media_id):
+ data = self._build_graphql_query(media_id)
+ query = {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()}
+ return traverse_obj(self._call_api(endpoint, media_id, query=query, graphql=True), 'data')
+
+
+class TwitterCardIE(InfoExtractor):
+ IE_NAME = 'twitter:card'
+ _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
+ _TESTS = [
+ {
+ 'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
+ # MD5 checksums are different in different places
+ 'info_dict': {
+ 'id': '560070131976392705',
+ 'ext': 'mp4',
+ 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
+ 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
+ 'uploader': 'Twitter',
+ 'uploader_id': 'Twitter',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 30.033,
+ 'timestamp': 1422366112,
+ 'upload_date': '20150127',
+ 'age_limit': 0,
+ 'comment_count': int,
+ 'tags': [],
+ 'repost_count': int,
+ 'like_count': int,
+ 'display_id': '560070183650213889',
+ 'uploader_url': 'https://twitter.com/Twitter',
+ },
+ },
+ {
+ 'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
+ 'md5': '7137eca597f72b9abbe61e5ae0161399',
+ 'info_dict': {
+ 'id': '623160978427936768',
+ 'ext': 'mp4',
+ 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
+ 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
+ 'uploader': 'NASA',
+ 'uploader_id': 'NASA',
+ 'timestamp': 1437408129,
+ 'upload_date': '20150720',
+ 'uploader_url': 'https://twitter.com/NASA',
+ 'age_limit': 0,
+ 'comment_count': int,
+ 'like_count': int,
+ 'repost_count': int,
+ 'tags': ['PlutoFlyby'],
+ },
+ 'params': {'format': '[protocol=https]'}
+ },
+ {
+ 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
+ 'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
+ 'info_dict': {
+ 'id': 'dq4Oj5quskI',
+ 'ext': 'mp4',
+ 'title': 'Ubuntu 11.10 Overview',
+ 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
+ 'upload_date': '20111013',
+ 'uploader': 'OMG! UBUNTU!',
+ 'uploader_id': 'omgubuntu',
+ 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
+ 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
+ 'channel_follower_count': int,
+ 'chapters': 'count:8',
+ 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
+ 'duration': 138,
+ 'categories': ['Film & Animation'],
+ 'age_limit': 0,
+ 'comment_count': int,
+ 'availability': 'public',
+ 'like_count': int,
+ 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
+ 'view_count': int,
+ 'tags': 'count:12',
+ 'channel': 'OMG! UBUNTU!',
+ 'playable_in_embed': True,
+ },
+ 'add_ie': ['Youtube'],
+ },
+ {
+ 'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
+ 'info_dict': {
+ 'id': 'iBb2x00UVlv',
+ 'ext': 'mp4',
+ 'upload_date': '20151113',
+ 'uploader_id': '1189339351084113920',
+ 'uploader': 'ArsenalTerje',
+ 'title': 'Vine by ArsenalTerje',
+ 'timestamp': 1447451307,
+ 'alt_title': 'Vine by ArsenalTerje',
+ 'comment_count': int,
+ 'like_count': int,
+ 'thumbnail': r're:^https?://[^?#]+\.jpg',
+ 'view_count': int,
+ 'repost_count': int,
+ },
+ 'add_ie': ['Vine'],
+ 'params': {'skip_download': 'm3u8'},
+ },
+ {
+ 'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
+ 'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
+ 'info_dict': {
+ 'id': '705235433198714880',
+ 'ext': 'mp4',
+ 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
+ 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
+ 'uploader': 'Brent Yarina',
+ 'uploader_id': 'BTNBrentYarina',
+ 'timestamp': 1456976204,
+ 'upload_date': '20160303',
+ },
+ 'skip': 'This content is no longer available.',
+ },
+ {
+ 'url': 'https://twitter.com/i/videos/752274308186120192',
+ 'only_matching': True,
+ },
+ ]
+
+ def _real_extract(self, url):
+ status_id = self._match_id(url)
+ return self.url_result(
+ 'https://twitter.com/statuses/' + status_id,
+ TwitterIE.ie_key(), status_id)
+
+
+class TwitterIE(TwitterBaseIE):
+ IE_NAME = 'twitter'
+ _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/(?:video|photo)/(?P<index>\d+))?'
+
+ _TESTS = [{
+ 'url': 'https://twitter.com/freethenipple/status/643211948184596480',
+ 'info_dict': {
+ 'id': '643211870443208704',
+ 'display_id': '643211948184596480',
+ 'ext': 'mp4',
+ 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
+ 'channel_id': '549749560',
+ 'uploader': 'FREE THE NIPPLE',
+ 'uploader_id': 'freethenipple',
+ 'duration': 12.922,
+ 'timestamp': 1442188653,
+ 'upload_date': '20150913',
+ 'uploader_url': 'https://twitter.com/freethenipple',
+ 'comment_count': int,
+ 'repost_count': int,
+ 'like_count': int,
+ 'tags': [],
+ 'age_limit': 18,
+ '_old_archive_ids': ['twitter 643211948184596480'],
+ },
+ 'skip': 'Requires authentication',
+ }, {
+ 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
+ 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
+ 'info_dict': {
+ 'id': '657991469417025536',
+ 'ext': 'mp4',
+ 'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
+ 'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
+ 'thumbnail': r're:^https?://.*\.png',
+ 'uploader': 'Gifs',
+ 'uploader_id': 'giphz',
+ },
+ 'expected_warnings': ['height', 'width'],
+ 'skip': 'Account suspended',
+ }, {
+ 'url': 'https://twitter.com/starwars/status/665052190608723968',
+ 'info_dict': {
+ 'id': '665052190608723968',
+ 'display_id': '665052190608723968',
+ 'ext': 'mp4',
+ 'title': r're:Star Wars.*A new beginning is coming December 18.*',
+ 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
+ 'channel_id': '20106852',
+ 'uploader_id': 'starwars',
+ 'uploader': r're:Star Wars.*',
+ 'timestamp': 1447395772,
+ 'upload_date': '20151113',
+ 'uploader_url': 'https://twitter.com/starwars',
+ 'comment_count': int,
+ 'repost_count': int,
+ 'like_count': int,
+ 'tags': ['TV', 'StarWars', 'TheForceAwakens'],
+ 'age_limit': 0,
+ '_old_archive_ids': ['twitter 665052190608723968'],
+ },
+ }, {
+ 'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
+ 'info_dict': {
+ 'id': '705235433198714880',
+ 'ext': 'mp4',
+ 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
+ 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
+ 'uploader_id': 'BTNBrentYarina',
+ 'uploader': 'Brent Yarina',
+ 'timestamp': 1456976204,
+ 'upload_date': '20160303',
+ 'uploader_url': 'https://twitter.com/BTNBrentYarina',
+ 'comment_count': int,
+ 'repost_count': int,
+ 'like_count': int,
+ 'tags': [],
+ 'age_limit': 0,
+ },
+ 'params': {
+ # The same video as https://twitter.com/i/videos/tweet/705235433198714880
+ # Test case of TwitterCardIE
+ 'skip_download': True,
+ },
+ 'skip': 'Dead external link',
+ }, {
+ 'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
+ 'info_dict': {
+ 'id': '700207414000242688',
+ 'display_id': '700207533655363584',
+ 'ext': 'mp4',
+ 'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
+ 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'channel_id': '1383165541',
+ 'uploader': 'jaydin donte geer',
+ 'uploader_id': 'jaydingeer',
+ 'duration': 30.0,
+ 'timestamp': 1455777459,
+ 'upload_date': '20160218',
+ 'uploader_url': 'https://twitter.com/jaydingeer',
+ 'comment_count': int,
+ 'repost_count': int,
+ 'like_count': int,
+ 'tags': ['Damndaniel'],
+ 'age_limit': 0,
+ '_old_archive_ids': ['twitter 700207533655363584'],
+ },
+ }, {
+ 'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
+ 'md5': '89a15ed345d13b86e9a5a5e051fa308a',
+ 'info_dict': {
+ 'id': 'MIOxnrUteUd',
+ 'ext': 'mp4',
+ 'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
+ 'uploader': 'TAKUMA',
+ 'uploader_id': '1004126642786242560',
+ 'timestamp': 1402826626,
+ 'upload_date': '20140615',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'alt_title': 'Vine by TAKUMA',
+ 'comment_count': int,
+ 'repost_count': int,
+ 'like_count': int,
+ 'view_count': int,
+ },
+ 'add_ie': ['Vine'],
+ }, {
+ 'url': 'https://twitter.com/captainamerica/status/719944021058060289',
+ 'info_dict': {
+ 'id': '717462543795523584',
+ 'display_id': '719944021058060289',
+ 'ext': 'mp4',
+ 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
+ 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
+ 'channel_id': '701615052',
+ 'uploader_id': 'CaptainAmerica',
+ 'uploader': 'Captain America',
+ 'duration': 3.17,
+ 'timestamp': 1460483005,
+ 'upload_date': '20160412',
+ 'uploader_url': 'https://twitter.com/CaptainAmerica',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'comment_count': int,
+ 'repost_count': int,
+ 'like_count': int,
+ 'tags': [],
+ 'age_limit': 0,
+ '_old_archive_ids': ['twitter 719944021058060289'],
+ },
+ }, {
+ 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
+ 'info_dict': {
+ 'id': '1zqKVVlkqLaKB',
+ 'ext': 'mp4',
+ 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
+ 'upload_date': '20160923',
+ 'uploader_id': '1PmKqpJdOJQoY',
+ 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
+ 'timestamp': 1474613214,
+ 'thumbnail': r're:^https?://.*\.jpg',
+ },
+ 'add_ie': ['Periscope'],
+ 'skip': 'Broadcast not found',
+ }, {
+ # has mp4 formats via mobile API
+ 'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
+ 'info_dict': {
+ 'id': '852077943283097602',
+ 'ext': 'mp4',
+ 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
+ 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
+ 'channel_id': '2526757026',
+ 'uploader': 'عالم الأخبار',
+ 'uploader_id': 'news_al3alm',
+ 'duration': 277.4,
+ 'timestamp': 1492000653,
+ 'upload_date': '20170412',
+ 'display_id': '852138619213144067',
+ 'age_limit': 0,
+ 'uploader_url': 'https://twitter.com/news_al3alm',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'tags': [],
+ 'repost_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ '_old_archive_ids': ['twitter 852138619213144067'],
+ },
+ }, {
+ 'url': 'https://twitter.com/i/web/status/910031516746514432',
+ 'info_dict': {
+ 'id': '910030238373089285',
+ 'display_id': '910031516746514432',
+ 'ext': 'mp4',
+ 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
+ 'channel_id': '2319432498',
+ 'uploader': 'Préfet de Guadeloupe',
+ 'uploader_id': 'Prefet971',
+ 'duration': 47.48,
+ 'timestamp': 1505803395,
+ 'upload_date': '20170919',
+ 'uploader_url': 'https://twitter.com/Prefet971',
+ 'comment_count': int,
+ 'repost_count': int,
+ 'like_count': int,
+ 'tags': ['Maria'],
+ 'age_limit': 0,
+ '_old_archive_ids': ['twitter 910031516746514432'],
+ },
+ 'params': {
+ 'skip_download': True, # requires ffmpeg
+ },
+ }, {
+ # card via api.twitter.com/1.1/videos/tweet/config
+ 'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
+ 'info_dict': {
+ 'id': '1001551417340022785',
+ 'display_id': '1001551623938805763',
+ 'ext': 'mp4',
+ 'title': 're:.*?Shep is on a roll today.*?',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
+ 'channel_id': '255036353',
+ 'uploader': 'Lis Power',
+ 'uploader_id': 'LisPower1',
+ 'duration': 111.278,
+ 'timestamp': 1527623489,
+ 'upload_date': '20180529',
+ 'uploader_url': 'https://twitter.com/LisPower1',
+ 'comment_count': int,
+ 'repost_count': int,
+ 'like_count': int,
+ 'tags': [],
+ 'age_limit': 0,
+ '_old_archive_ids': ['twitter 1001551623938805763'],
+ },
+ 'params': {
+ 'skip_download': True, # requires ffmpeg
+ },
+ }, {
+ 'url': 'https://twitter.com/foobar/status/1087791357756956680',
+ 'info_dict': {
+ 'id': '1087791272830607360',
+ 'display_id': '1087791357756956680',
+ 'ext': 'mp4',
+ 'title': 'X - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
+ 'uploader': 'X',
+ 'uploader_id': 'X',
+ 'duration': 61.567,
+ 'timestamp': 1548184644,
+ 'upload_date': '20190122',
+ 'uploader_url': 'https://twitter.com/X',
+ 'comment_count': int,
+ 'repost_count': int,
+ 'like_count': int,
+ 'view_count': int,
+ 'tags': [],
+ 'age_limit': 0,
+ },
+ 'skip': 'This Tweet is unavailable',
+ }, {
+ # not available in Periscope
+ 'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
+ 'info_dict': {
+ 'id': '1vOGwqejwoWxB',
+ 'ext': 'mp4',
+ 'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
+ 'uploader': 'Vivi',
+ 'uploader_id': '1eVjYOLGkGrQL',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'tags': ['EduTECH2019'],
+ 'view_count': int,
+ },
+ 'add_ie': ['TwitterBroadcast'],
+ 'skip': 'Broadcast no longer exists',
+ }, {
+ # unified card
+ 'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
+ 'info_dict': {
+ 'id': '1349774757969989634',
+ 'display_id': '1349794411333394432',
+ 'ext': 'mp4',
+ 'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
+ 'channel_id': '18552281',
+ 'uploader': 'Brooklyn Nets',
+ 'uploader_id': 'BrooklynNets',
+ 'duration': 324.484,
+ 'timestamp': 1610651040,
+ 'upload_date': '20210114',
+ 'uploader_url': 'https://twitter.com/BrooklynNets',
+ 'comment_count': int,
+ 'repost_count': int,
+ 'like_count': int,
+ 'tags': [],
+ 'age_limit': 0,
+ '_old_archive_ids': ['twitter 1349794411333394432'],
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://twitter.com/oshtru/status/1577855540407197696',
+ 'info_dict': {
+ 'id': '1577855447914409984',
+ 'display_id': '1577855540407197696',
+ 'ext': 'mp4',
+ 'title': 'md5:466a3a8b049b5f5a13164ce915484b51',
+ 'description': 'md5:b9c3699335447391d11753ab21c70a74',
+ 'upload_date': '20221006',
+ 'channel_id': '143077138',
+ 'uploader': 'Oshtru',
+ 'uploader_id': 'oshtru',
+ 'uploader_url': 'https://twitter.com/oshtru',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 30.03,
+ 'timestamp': 1665025050,
+ 'comment_count': int,
+ 'repost_count': int,
+ 'like_count': int,
+ 'tags': [],
+ 'age_limit': 0,
+ '_old_archive_ids': ['twitter 1577855540407197696'],
+ },
+ 'params': {'skip_download': True},
+ }, {
+ 'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
+ 'info_dict': {
+ 'id': '1577719286659006464',
+ 'title': 'Ultima Reload - Test',
+ 'description': 'Test https://t.co/Y3KEZD7Dad',
+ 'channel_id': '168922496',
+ 'uploader': 'Ultima Reload',
+ 'uploader_id': 'UltimaShadowX',
+ 'uploader_url': 'https://twitter.com/UltimaShadowX',
+ 'upload_date': '20221005',
+ 'timestamp': 1664992565,
+ 'comment_count': int,
+ 'repost_count': int,
+ 'like_count': int,
+ 'tags': [],
+ 'age_limit': 0,
+ },
+ 'playlist_count': 4,
+ 'params': {'skip_download': True},
+ }, {
+ 'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
+ 'info_dict': {
+ 'id': '1575559336759263233',
+ 'display_id': '1575560063510810624',
+ 'ext': 'mp4',
+ 'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'description': 'md5:95aea692fda36a12081b9629b02daa92',
+ 'channel_id': '1094109584',
+ 'uploader': 'Max Olson',
+ 'uploader_id': 'MesoMax919',
+ 'uploader_url': 'https://twitter.com/MesoMax919',
+ 'duration': 21.321,
+ 'timestamp': 1664477766,
+ 'upload_date': '20220929',
+ 'comment_count': int,
+ 'repost_count': int,
+ 'like_count': int,
+ 'tags': ['HurricaneIan'],
+ 'age_limit': 0,
+ '_old_archive_ids': ['twitter 1575560063510810624'],
+ },
+ }, {
+ # Adult content, fails if not logged in
+ 'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
+ 'info_dict': {
+ 'id': '1575199163847000068',
+ 'display_id': '1575199173472927762',
+ 'ext': 'mp4',
+ 'title': str,
+ 'description': str,
+ 'channel_id': '1217167793541480450',
+ 'uploader': str,
+ 'uploader_id': 'Rizdraws',
+ 'uploader_url': 'https://twitter.com/Rizdraws',
+ 'upload_date': '20220928',
+ 'timestamp': 1664391723,
+ 'thumbnail': r're:^https?://.+\.jpg',
+ 'like_count': int,
+ 'repost_count': int,
+ 'comment_count': int,
+ 'age_limit': 18,
+ 'tags': [],
+ '_old_archive_ids': ['twitter 1575199173472927762'],
+ },
+ 'params': {'skip_download': 'The media could not be played'},
+ 'skip': 'Requires authentication',
+ }, {
+ # Playlist result only with graphql API
+ 'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
+ 'playlist_mincount': 2,
+ 'info_dict': {
+ 'id': '1395079556562706435',
+ 'title': str,
+ 'tags': [],
+ 'channel_id': '21539378',
+ 'uploader': str,
+ 'like_count': int,
+ 'upload_date': '20210519',
+ 'age_limit': 0,
+ 'repost_count': int,
+ 'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw',
+ 'uploader_id': 'Srirachachau',
+ 'comment_count': int,
+ 'uploader_url': 'https://twitter.com/Srirachachau',
+ 'timestamp': 1621447860,
+ },
+ }, {
+ 'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
+ 'playlist_mincount': 2,
+ 'info_dict': {
+ 'id': '1578353380363501568',
+ 'title': str,
+ 'channel_id': '2195866214',
+ 'uploader_id': 'DavidToons_',
+ 'repost_count': int,
+ 'like_count': int,
+ 'uploader': str,
+ 'timestamp': 1665143744,
+ 'uploader_url': 'https://twitter.com/DavidToons_',
+ 'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/WgJauwIW1w',
+ 'tags': [],
+ 'comment_count': int,
+ 'upload_date': '20221007',
+ 'age_limit': 0,
+ },
+ }, {
+ 'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
+ 'playlist_count': 2,
+ 'info_dict': {
+ 'id': '1578401165338976258',
+ 'title': str,
+ 'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
+ 'channel_id': '19338359',
+ 'uploader': str,
+ 'uploader_id': 'primevideouk',
+ 'timestamp': 1665155137,
+ 'upload_date': '20221007',
+ 'age_limit': 0,
+ 'uploader_url': 'https://twitter.com/primevideouk',
+ 'comment_count': int,
+ 'repost_count': int,
+ 'like_count': int,
+ 'tags': ['TheRingsOfPower'],
+ },
+ }, {
+ # Twitter Spaces
+ 'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
+ 'info_dict': {
+ 'id': '1lPJqmBeeNAJb',
+ 'ext': 'm4a',
+ 'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
+ 'uploader': r're:Monique Camarra.+?',
+ 'uploader_id': 'MoniqueCamarra',
+ 'live_status': 'was_live',
+ 'release_timestamp': 1658417414,
+ 'description': 'md5:acce559345fd49f129c20dbcda3f1201',
+ 'timestamp': 1658407771,
+ 'release_date': '20220721',
+ 'upload_date': '20220721',
+ },
+ 'add_ie': ['TwitterSpaces'],
+ 'params': {'skip_download': 'm3u8'},
+ 'skip': 'Requires authentication',
+ }, {
+ # URL specifies video number but --yes-playlist
+ 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
+ 'playlist_mincount': 2,
+ 'info_dict': {
+ 'id': '1600649710662213632',
+ 'title': 'md5:be05989b0722e114103ed3851a0ffae2',
+ 'timestamp': 1670459604.0,
+ 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
+ 'comment_count': int,
+ 'uploader_id': 'CTVJLaidlaw',
+ 'channel_id': '80082014',
+ 'repost_count': int,
+ 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
+ 'upload_date': '20221208',
+ 'age_limit': 0,
+ 'uploader': 'Jocelyn Laidlaw',
+ 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
+ 'like_count': int,
+ },
+ }, {
+ # URL specifies video number and --no-playlist
+ 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2',
+ 'info_dict': {
+ 'id': '1600649511827013632',
+ 'ext': 'mp4',
+ 'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ 'timestamp': 1670459604.0,
+ 'channel_id': '80082014',
+ 'uploader_id': 'CTVJLaidlaw',
+ 'uploader': 'Jocelyn Laidlaw',
+ 'repost_count': int,
+ 'comment_count': int,
+ 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
+ 'duration': 102.226,
+ 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
+ 'display_id': '1600649710662213632',
+ 'like_count': int,
+ 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
+ 'upload_date': '20221208',
+ 'age_limit': 0,
+ '_old_archive_ids': ['twitter 1600649710662213632'],
+ },
+ 'params': {'noplaylist': True},
+ }, {
+ # id pointing to TweetWithVisibilityResults type entity which wraps the actual Tweet over
+ # note the id different between extraction and url
+ 'url': 'https://twitter.com/s2FAKER/status/1621117700482416640',
+ 'info_dict': {
+ 'id': '1621117577354424321',
+ 'display_id': '1621117700482416640',
+ 'ext': 'mp4',
+ 'title': '뽀 - 아 최우제 이동속도 봐',
+ 'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
+ 'duration': 24.598,
+ 'channel_id': '1281839411068432384',
+ 'uploader': '뽀',
+ 'uploader_id': 's2FAKER',
+ 'uploader_url': 'https://twitter.com/s2FAKER',
+ 'upload_date': '20230202',
+ 'timestamp': 1675339553.0,
+ 'thumbnail': r're:https?://pbs\.twimg\.com/.+',
+ 'age_limit': 18,
+ 'tags': [],
+ 'like_count': int,
+ 'repost_count': int,
+ 'comment_count': int,
+ '_old_archive_ids': ['twitter 1621117700482416640'],
+ },
+ 'skip': 'Requires authentication',
+ }, {
+ 'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
+ 'info_dict': {
+ 'id': '1599108643743473680',
+ 'display_id': '1599108751385972737',
+ 'ext': 'mp4',
+ 'title': '\u06ea - \U0001F48B',
+ 'channel_id': '1347791436809441283',
+ 'uploader_url': 'https://twitter.com/hlo_again',
+ 'like_count': int,
+ 'uploader_id': 'hlo_again',
+ 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1599108643743473680/pu/img/UG3xjov4rgg5sbYM.jpg?name=orig',
+ 'repost_count': int,
+ 'duration': 9.531,
+ 'comment_count': int,
+ 'upload_date': '20221203',
+ 'age_limit': 0,
+ 'timestamp': 1670092210.0,
+ 'tags': [],
+ 'uploader': '\u06ea',
+ 'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
+ '_old_archive_ids': ['twitter 1599108751385972737'],
+ },
+ 'params': {'noplaylist': True},
+ }, {
+ 'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625',
+ 'info_dict': {
+ 'id': '1600009362759733248',
+ 'display_id': '1600009574919962625',
+ 'ext': 'mp4',
+ 'channel_id': '211814412',
+ 'uploader_url': 'https://twitter.com/MunTheShinobi',
+ 'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
+ 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
+ 'age_limit': 0,
+ 'uploader': 'Mün',
+ 'repost_count': int,
+ 'upload_date': '20221206',
+ 'title': 'Mün - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
+ 'comment_count': int,
+ 'like_count': int,
+ 'tags': [],
+ 'uploader_id': 'MunTheShinobi',
+ 'duration': 139.987,
+ 'timestamp': 1670306984.0,
+ '_old_archive_ids': ['twitter 1600009574919962625'],
+ },
+ }, {
+ # retweeted_status (private)
+ 'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
+ 'info_dict': {
+ 'id': '1623274794488659969',
+ 'display_id': '1623739803874349067',
+ 'ext': 'mp4',
+ 'title': 'Johnny Bullets - Me after going viral to over 30million people: Whoopsie-daisy',
+ 'description': 'md5:b06864cd3dc2554821cc327f5348485a',
+ 'uploader': 'Johnny Bullets',
+ 'uploader_id': 'Johnnybull3ts',
+ 'uploader_url': 'https://twitter.com/Johnnybull3ts',
+ 'age_limit': 0,
+ 'tags': [],
+ 'duration': 8.033,
+ 'timestamp': 1675853859.0,
+ 'upload_date': '20230208',
+ 'thumbnail': r're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
+ 'like_count': int,
+ 'repost_count': int,
+ },
+ 'skip': 'Protected tweet',
+ }, {
+ # retweeted_status
+ 'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
+ 'info_dict': {
+ 'id': '1694928337846538240',
+ 'ext': 'mp4',
+ 'display_id': '1695424220702888009',
+ 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
+ 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
+ 'channel_id': '15212187',
+ 'uploader': 'Benny Johnson',
+ 'uploader_id': 'bennyjohnson',
+ 'uploader_url': 'https://twitter.com/bennyjohnson',
+ 'age_limit': 0,
+ 'tags': [],
+ 'duration': 45.001,
+ 'timestamp': 1692962814.0,
+ 'upload_date': '20230825',
+ 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
+ 'like_count': int,
+ 'repost_count': int,
+ 'comment_count': int,
+ '_old_archive_ids': ['twitter 1695424220702888009'],
+ },
+ }, {
+ # retweeted_status w/ legacy API
+ 'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
+ 'info_dict': {
+ 'id': '1694928337846538240',
+ 'ext': 'mp4',
+ 'display_id': '1695424220702888009',
+ 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
+ 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
+ 'channel_id': '15212187',
+ 'uploader': 'Benny Johnson',
+ 'uploader_id': 'bennyjohnson',
+ 'uploader_url': 'https://twitter.com/bennyjohnson',
+ 'age_limit': 0,
+ 'tags': [],
+ 'duration': 45.001,
+ 'timestamp': 1692962814.0,
+ 'upload_date': '20230825',
+ 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
+ 'like_count': int,
+ 'repost_count': int,
+ '_old_archive_ids': ['twitter 1695424220702888009'],
+ },
+ 'params': {'extractor_args': {'twitter': {'api': ['legacy']}}},
+ }, {
+ # Broadcast embedded in tweet
+ 'url': 'https://twitter.com/JessicaDobsonWX/status/1731121063248175384',
+ 'info_dict': {
+ 'id': '1rmxPMjLzAXKN',
+ 'ext': 'mp4',
+ 'title': 'WAVE Weather Now - Saturday 12/2/23 Update',
+ 'uploader': 'Jessica Dobson',
+ 'uploader_id': 'JessicaDobsonWX',
+ 'uploader_url': 'https://twitter.com/JessicaDobsonWX',
+ 'timestamp': 1701566398,
+ 'upload_date': '20231203',
+ 'live_status': 'was_live',
+ 'thumbnail': r're:https://[^/]+pscp\.tv/.+\.jpg',
+ 'concurrent_view_count': int,
+ 'view_count': int,
+ },
+ 'add_ie': ['TwitterBroadcast'],
+ }, {
+ # Animated gif and quote tweet video
+ 'url': 'https://twitter.com/BAKKOOONN/status/1696256659889565950',
+ 'playlist_mincount': 2,
+ 'info_dict': {
+ 'id': '1696256659889565950',
+ 'title': 'BAKOON - https://t.co/zom968d0a0',
+ 'description': 'https://t.co/zom968d0a0',
+ 'tags': [],
+ 'channel_id': '1263540390',
+ 'uploader': 'BAKOON',
+ 'uploader_id': 'BAKKOOONN',
+ 'uploader_url': 'https://twitter.com/BAKKOOONN',
+ 'age_limit': 18,
+ 'timestamp': 1693254077.0,
+ 'upload_date': '20230828',
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
+ },
+ 'skip': 'Requires authentication',
+ }, {
+ # "stale tweet" with typename "TweetWithVisibilityResults"
+ 'url': 'https://twitter.com/RobertKennedyJr/status/1724884212803834154',
+ 'md5': '511377ff8dfa7545307084dca4dce319',
+ 'info_dict': {
+ 'id': '1724883339285544960',
+ 'ext': 'mp4',
+ 'title': 'md5:cc56716f9ed0b368de2ba54c478e493c',
+ 'description': 'md5:9dc14f5b0f1311fc7caf591ae253a164',
+ 'display_id': '1724884212803834154',
+ 'channel_id': '337808606',
+ 'uploader': 'Robert F. Kennedy Jr',
+ 'uploader_id': 'RobertKennedyJr',
+ 'uploader_url': 'https://twitter.com/RobertKennedyJr',
+ 'upload_date': '20231115',
+ 'timestamp': 1700079417.0,
+ 'duration': 341.048,
+ 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
+ 'tags': ['Kennedy24'],
+ 'repost_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'age_limit': 0,
+ '_old_archive_ids': ['twitter 1724884212803834154'],
+ },
+ }, {
+ # onion route
+ 'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
+ 'only_matching': True,
+ }, {
+ # Twitch Clip Embed
+ 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
+ 'only_matching': True,
+ }, {
+ # promo_video_website card
+ 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
+ 'only_matching': True,
+ }, {
+ # promo_video_convo card
+ 'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
+ 'only_matching': True,
+ }, {
+ # appplayer card
+ 'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
+ 'only_matching': True,
+ }, {
+ # video_direct_message card
+ 'url': 'https://twitter.com/qarev001/status/1348948114569269251',
+ 'only_matching': True,
+ }, {
+ # poll2choice_video card
+ 'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
+ 'only_matching': True,
+ }, {
+ # poll3choice_video card
+ 'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
+ 'only_matching': True,
+ }, {
+ # poll4choice_video card
+ 'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
+ 'only_matching': True,
+ }]
+
+ _MEDIA_ID_RE = re.compile(r'_video/(\d+)/')
+
+ @property
+ def _GRAPHQL_ENDPOINT(self):
+ if self.is_logged_in:
+ return 'zZXycP0V6H7m-2r0mOnFcA/TweetDetail'
+ return '2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId'
+
+ def _graphql_to_legacy(self, data, twid):
+ result = traverse_obj(data, (
+ 'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
+ lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
+ 'tweet_results', 'result', ('tweet', None), {dict},
+ ), default={}, get_all=False) if self.is_logged_in else traverse_obj(
+ data, ('tweetResult', 'result', {dict}), default={})
+
+ typename = result.get('__typename')
+ if typename not in ('Tweet', 'TweetWithVisibilityResults', 'TweetTombstone', 'TweetUnavailable', None):
+ self.report_warning(f'Unknown typename: {typename}', twid, only_once=True)
+
+ if 'tombstone' in result:
+ cause = remove_end(traverse_obj(result, ('tombstone', 'text', 'text', {str})), '. Learn more')
+ raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
+ elif typename == 'TweetUnavailable':
+ reason = result.get('reason')
+ if reason == 'NsfwLoggedOut':
+ self.raise_login_required('NSFW tweet requires authentication')
+ elif reason == 'Protected':
+ self.raise_login_required('You are not authorized to view this protected tweet')
+ raise ExtractorError(reason or 'Requested tweet is unavailable', expected=True)
+ # Result for "stale tweet" needs additional transformation
+ elif typename == 'TweetWithVisibilityResults':
+ result = traverse_obj(result, ('tweet', {dict})) or {}
+
+ status = result.get('legacy', {})
+ status.update(traverse_obj(result, {
+ 'user': ('core', 'user_results', 'result', 'legacy'),
+ 'card': ('card', 'legacy'),
+ 'quoted_status': ('quoted_status_result', 'result', 'legacy'),
+ 'retweeted_status': ('legacy', 'retweeted_status_result', 'result', 'legacy'),
+ }, expected_type=dict, default={}))
+
+ # extra transformations needed since result does not match legacy format
+ if status.get('retweeted_status'):
+ status['retweeted_status']['user'] = traverse_obj(status, (
+ 'retweeted_status_result', 'result', 'core', 'user_results', 'result', 'legacy', {dict})) or {}
+
+ binding_values = {
+ binding_value.get('key'): binding_value.get('value')
+ for binding_value in traverse_obj(status, ('card', 'binding_values', ..., {dict}))
+ }
+ if binding_values:
+ status['card']['binding_values'] = binding_values
+
+ return status
+
+ def _build_graphql_query(self, media_id):
+ return {
+ 'variables': {
+ 'focalTweetId': media_id,
+ 'includePromotedContent': True,
+ 'with_rux_injections': False,
+ 'withBirdwatchNotes': True,
+ 'withCommunity': True,
+ 'withDownvotePerspective': False,
+ 'withQuickPromoteEligibilityTweetFields': True,
+ 'withReactionsMetadata': False,
+ 'withReactionsPerspective': False,
+ 'withSuperFollowsTweetFields': True,
+ 'withSuperFollowsUserFields': True,
+ 'withV2Timeline': True,
+ 'withVoice': True,
+ },
+ 'features': {
+ 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
+ 'interactive_text_enabled': True,
+ 'responsive_web_edit_tweet_api_enabled': True,
+ 'responsive_web_enhance_cards_enabled': True,
+ 'responsive_web_graphql_timeline_navigation_enabled': False,
+ 'responsive_web_text_conversations_enabled': False,
+ 'responsive_web_uc_gql_enabled': True,
+ 'standardized_nudges_misinfo': True,
+ 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
+ 'tweetypie_unmention_optimization_enabled': True,
+ 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
+ 'verified_phone_label_enabled': False,
+ 'vibe_api_enabled': True,
+ },
+ } if self.is_logged_in else {
+ 'variables': {
+ 'tweetId': media_id,
+ 'withCommunity': False,
+ 'includePromotedContent': False,
+ 'withVoice': False,
+ },
+ 'features': {
+ 'creator_subscriptions_tweet_preview_api_enabled': True,
+ 'tweetypie_unmention_optimization_enabled': True,
+ 'responsive_web_edit_tweet_api_enabled': True,
+ 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': True,
+ 'view_counts_everywhere_api_enabled': True,
+ 'longform_notetweets_consumption_enabled': True,
+ 'responsive_web_twitter_article_tweet_consumption_enabled': False,
+ 'tweet_awards_web_tipping_enabled': False,
+ 'freedom_of_speech_not_reach_fetch_enabled': True,
+ 'standardized_nudges_misinfo': True,
+ 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': True,
+ 'longform_notetweets_rich_text_read_enabled': True,
+ 'longform_notetweets_inline_media_enabled': True,
+ 'responsive_web_graphql_exclude_directive_enabled': True,
+ 'verified_phone_label_enabled': False,
+ 'responsive_web_media_download_video_enabled': False,
+ 'responsive_web_graphql_skip_user_profile_image_extensions_enabled': False,
+ 'responsive_web_graphql_timeline_navigation_enabled': True,
+ 'responsive_web_enhance_cards_enabled': False
+ },
+ 'fieldToggles': {
+ 'withArticleRichContentState': False
+ }
+ }
+
+ def _call_syndication_api(self, twid):
+ self.report_warning(
+ 'Not all metadata or media is available via syndication endpoint', twid, only_once=True)
+ status = self._download_json(
+ 'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
+ headers={'User-Agent': 'Googlebot'}, query={
+ 'id': twid,
+ # TODO: token = ((Number(twid) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, '')
+ 'token': ''.join(random.choices('123456789abcdefghijklmnopqrstuvwxyz', k=10)),
+ })
+ if not status:
+ raise ExtractorError('Syndication endpoint returned empty JSON response')
+ # Transform the result so its structure matches that of legacy/graphql
+ media = []
+ for detail in traverse_obj(status, ((None, 'quoted_tweet'), 'mediaDetails', ..., {dict})):
+ detail['id_str'] = traverse_obj(detail, (
+ 'video_info', 'variants', ..., 'url', {self._MEDIA_ID_RE.search}, 1), get_all=False) or twid
+ media.append(detail)
+ status['extended_entities'] = {'media': media}
+
+ return status
+
+ def _extract_status(self, twid):
+ if self._selected_api not in ('graphql', 'legacy', 'syndication'):
+ raise ExtractorError(f'{self._selected_api!r} is not a valid API selection', expected=True)
+
+ try:
+ if self.is_logged_in or self._selected_api == 'graphql':
+ status = self._graphql_to_legacy(self._call_graphql_api(self._GRAPHQL_ENDPOINT, twid), twid)
+ elif self._selected_api == 'legacy':
+ status = self._call_api(f'statuses/show/{twid}.json', twid, {
+ 'cards_platform': 'Web-12',
+ 'include_cards': 1,
+ 'include_reply_count': 1,
+ 'include_user_entities': 0,
+ 'tweet_mode': 'extended',
+ })
+ except ExtractorError as e:
+ if not isinstance(e.cause, HTTPError) or not e.cause.status == 429:
+ raise
+ self.report_warning('Rate-limit exceeded; falling back to syndication endpoint')
+ status = self._call_syndication_api(twid)
+
+ if self._selected_api == 'syndication':
+ status = self._call_syndication_api(twid)
+
+ return traverse_obj(status, 'retweeted_status', None, expected_type=dict) or {}
+
+ def _real_extract(self, url):
+ twid, selected_index = self._match_valid_url(url).group('id', 'index')
+ status = self._extract_status(twid)
+
+ title = description = traverse_obj(
+ status, (('full_text', 'text'), {lambda x: x.replace('\n', ' ')}), get_all=False) or ''
+ # strip 'https -_t.co_BJYgOjSeGA' junk from filenames
+ title = re.sub(r'\s+(https?://[^ ]+)', '', title)
+ user = status.get('user') or {}
+ uploader = user.get('name')
+ if uploader:
+ title = f'{uploader} - {title}'
+ uploader_id = user.get('screen_name')
+
+ info = {
+ 'id': twid,
+ 'title': title,
+ 'description': description,
+ 'uploader': uploader,
+ 'timestamp': unified_timestamp(status.get('created_at')),
+ 'channel_id': str_or_none(status.get('user_id_str')) or str_or_none(user.get('id_str')),
+ 'uploader_id': uploader_id,
+ 'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
+ 'like_count': int_or_none(status.get('favorite_count')),
+ 'repost_count': int_or_none(status.get('retweet_count')),
+ 'comment_count': int_or_none(status.get('reply_count')),
+ 'age_limit': 18 if status.get('possibly_sensitive') else 0,
+ 'tags': traverse_obj(status, ('entities', 'hashtags', ..., 'text')),
+ }
+
+ def extract_from_video_info(media):
+ media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
+ self.write_debug(f'Extracting from video info: {media_id}')
+
+ formats = []
+ subtitles = {}
+ for variant in traverse_obj(media, ('video_info', 'variants', ...)):
+ fmts, subs = self._extract_variant_formats(variant, twid)
+ subtitles = self._merge_subtitles(subtitles, subs)
+ formats.extend(fmts)
+
+ thumbnails = []
+ media_url = media.get('media_url_https') or media.get('media_url')
+ if media_url:
+ def add_thumbnail(name, size):
+ thumbnails.append({
+ 'id': name,
+ 'url': update_url_query(media_url, {'name': name}),
+ 'width': int_or_none(size.get('w') or size.get('width')),
+ 'height': int_or_none(size.get('h') or size.get('height')),
+ })
+ for name, size in media.get('sizes', {}).items():
+ add_thumbnail(name, size)
+ add_thumbnail('orig', media.get('original_info') or {})
+
+ return {
+ 'id': media_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnails': thumbnails,
+ 'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})), # No longer available
+ 'duration': float_or_none(traverse_obj(media, ('video_info', 'duration_millis')), 1000),
+ # Prioritize m3u8 formats for compat, see https://github.com/yt-dlp/yt-dlp/issues/8117
+ '_format_sort_fields': ('res', 'proto:m3u8', 'br', 'size'), # http format codec is unknown
+ }
+
+ def extract_from_card_info(card):
+ if not card:
+ return
+
+ self.write_debug(f'Extracting from card info: {card.get("url")}')
+ binding_values = card['binding_values']
+
+ def get_binding_value(k):
+ o = binding_values.get(k) or {}
+ return try_get(o, lambda x: x[x['type'].lower() + '_value'])
+
+ card_name = card['name'].split(':')[-1]
+ if card_name == 'player':
+ yield {
+ '_type': 'url',
+ 'url': get_binding_value('player_url'),
+ }
+ elif card_name == 'periscope_broadcast':
+ yield {
+ '_type': 'url',
+ 'url': get_binding_value('url') or get_binding_value('player_url'),
+ 'ie_key': PeriscopeIE.ie_key(),
+ }
+ elif card_name == 'broadcast':
+ yield {
+ '_type': 'url',
+ 'url': get_binding_value('broadcast_url'),
+ 'ie_key': TwitterBroadcastIE.ie_key(),
+ }
+ elif card_name == 'audiospace':
+ yield {
+ '_type': 'url',
+ 'url': f'https://twitter.com/i/spaces/{get_binding_value("id")}',
+ 'ie_key': TwitterSpacesIE.ie_key(),
+ }
+ elif card_name == 'summary':
+ yield {
+ '_type': 'url',
+ 'url': get_binding_value('card_url'),
+ }
+ elif card_name == 'unified_card':
+ unified_card = self._parse_json(get_binding_value('unified_card'), twid)
+ yield from map(extract_from_video_info, traverse_obj(
+ unified_card, ('media_entities', ...), expected_type=dict))
+ # amplify, promo_video_website, promo_video_convo, appplayer,
+ # video_direct_message, poll2choice_video, poll3choice_video,
+ # poll4choice_video, ...
+ else:
+ is_amplify = card_name == 'amplify'
+ vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
+ content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
+ formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
+
+ thumbnails = []
+ for suffix in ('_small', '', '_large', '_x_large', '_original'):
+ image = get_binding_value('player_image' + suffix) or {}
+ image_url = image.get('url')
+ if not image_url or '/player-placeholder' in image_url:
+ continue
+ thumbnails.append({
+ 'id': suffix[1:] if suffix else 'medium',
+ 'url': image_url,
+ 'width': int_or_none(image.get('width')),
+ 'height': int_or_none(image.get('height')),
+ })
+
+ yield {
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnails': thumbnails,
+ 'duration': int_or_none(get_binding_value(
+ 'content_duration_seconds')),
+ }
+
+ videos = traverse_obj(status, (
+ (None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo', {dict}))
+
+ if self._yes_playlist(twid, selected_index, video_label='URL-specified video number'):
+ selected_entries = (*map(extract_from_video_info, videos), *extract_from_card_info(status.get('card')))
+ else:
+ desired_obj = traverse_obj(status, (
+ (None, 'quoted_status'), 'extended_entities', 'media', int(selected_index) - 1, {dict}), get_all=False)
+ if not desired_obj:
+ raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True)
+ elif desired_obj.get('type') != 'video':
+ raise ExtractorError(f'Media #{selected_index} is not a video', expected=True)
+
+ # Restore original archive id and video index in title
+ for index, entry in enumerate(videos, 1):
+ if entry.get('id') != desired_obj.get('id'):
+ continue
+ if index == 1:
+ info['_old_archive_ids'] = [make_archive_id(self, twid)]
+ if len(videos) != 1:
+ info['title'] += f' #{index}'
+ break
+
+ return {**info, **extract_from_video_info(desired_obj), 'display_id': twid}
+
+ entries = [{**info, **data, 'display_id': twid} for data in selected_entries]
+ if not entries:
+ expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
+ if not expanded_url or expanded_url == url:
+ self.raise_no_formats('No video could be found in this tweet', expected=True)
+ return info
+
+ return self.url_result(expanded_url, display_id=twid, **info)
+
+ entries[0]['_old_archive_ids'] = [make_archive_id(self, twid)]
+
+ if len(entries) == 1:
+ return entries[0]
+
+ for index, entry in enumerate(entries, 1):
+ entry['title'] += f' #{index}'
+
+ return self.playlist_result(entries, **info)
+
+
+class TwitterAmplifyIE(TwitterBaseIE):
+ IE_NAME = 'twitter:amplify'
+ _VALID_URL = r'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
+
+ _TEST = {
+ 'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
+ 'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
+ 'info_dict': {
+ 'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
+ 'ext': 'mp4',
+ 'title': 'Twitter Video',
+ 'thumbnail': 're:^https?://.*',
+ },
+ 'params': {'format': '[protocol=https]'},
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ vmap_url = self._html_search_meta(
+ 'twitter:amplify:vmap', webpage, 'vmap url')
+ formats, _ = self._extract_formats_from_vmap_url(vmap_url, video_id)
+
+ thumbnails = []
+ thumbnail = self._html_search_meta(
+ 'twitter:image:src', webpage, 'thumbnail', fatal=False)
+
+ def _find_dimension(target):
+ w = int_or_none(self._html_search_meta(
+ 'twitter:%s:width' % target, webpage, fatal=False))
+ h = int_or_none(self._html_search_meta(
+ 'twitter:%s:height' % target, webpage, fatal=False))
+ return w, h
+
+ if thumbnail:
+ thumbnail_w, thumbnail_h = _find_dimension('image')
+ thumbnails.append({
+ 'url': thumbnail,
+ 'width': thumbnail_w,
+ 'height': thumbnail_h,
+ })
+
+ video_w, video_h = _find_dimension('player')
+ formats[0].update({
+ 'width': video_w,
+ 'height': video_h,
+ })
+
+ return {
+ 'id': video_id,
+ 'title': 'Twitter Video',
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ }
+
+
+class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
+ IE_NAME = 'twitter:broadcast'
+ _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
+
+ _TESTS = [{
+ # untitled Periscope video
+ 'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
+ 'info_dict': {
+ 'id': '1yNGaQLWpejGj',
+ 'ext': 'mp4',
+ 'title': 'Andrea May Sahouri - Periscope Broadcast',
+ 'uploader': 'Andrea May Sahouri',
+ 'uploader_id': 'andreamsahouri',
+ 'uploader_url': 'https://twitter.com/andreamsahouri',
+ 'timestamp': 1590973638,
+ 'upload_date': '20200601',
+ 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
+ 'view_count': int,
+ },
+ }, {
+ 'url': 'https://twitter.com/i/broadcasts/1ZkKzeyrPbaxv',
+ 'info_dict': {
+ 'id': '1ZkKzeyrPbaxv',
+ 'ext': 'mp4',
+ 'title': 'Starship | SN10 | High-Altitude Flight Test',
+ 'uploader': 'SpaceX',
+ 'uploader_id': 'SpaceX',
+ 'uploader_url': 'https://twitter.com/SpaceX',
+ 'timestamp': 1614812942,
+ 'upload_date': '20210303',
+ 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
+ 'view_count': int,
+ },
+ }, {
+ 'url': 'https://twitter.com/i/broadcasts/1OyKAVQrgzwGb',
+ 'info_dict': {
+ 'id': '1OyKAVQrgzwGb',
+ 'ext': 'mp4',
+ 'title': 'Starship Flight Test',
+ 'uploader': 'SpaceX',
+ 'uploader_id': 'SpaceX',
+ 'uploader_url': 'https://twitter.com/SpaceX',
+ 'timestamp': 1681993964,
+ 'upload_date': '20230420',
+ 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
+ 'view_count': int,
+ },
+ }]
+
+ def _real_extract(self, url):
+ broadcast_id = self._match_id(url)
+ broadcast = self._call_api(
+ 'broadcasts/show.json', broadcast_id,
+ {'ids': broadcast_id})['broadcasts'][broadcast_id]
+ if not broadcast:
+ raise ExtractorError('Broadcast no longer exists', expected=True)
+ info = self._parse_broadcast_data(broadcast, broadcast_id)
+ info['title'] = broadcast.get('status') or info.get('title')
+ info['uploader_id'] = broadcast.get('twitter_username') or info.get('uploader_id')
+ info['uploader_url'] = format_field(broadcast, 'twitter_username', 'https://twitter.com/%s', default=None)
+ if info['live_status'] == 'is_upcoming':
+ return info
+
+ media_key = broadcast['media_key']
+ source = self._call_api(
+ f'live_video_stream/status/{media_key}', media_key)['source']
+ m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
+ if '/live_video_stream/geoblocked/' in m3u8_url:
+ self.raise_geo_restricted()
+ m3u8_id = compat_parse_qs(compat_urllib_parse_urlparse(
+ m3u8_url).query).get('type', [None])[0]
+ state, width, height = self._extract_common_format_info(broadcast)
+ info['formats'] = self._extract_pscp_m3u8_formats(
+ m3u8_url, broadcast_id, m3u8_id, state, width, height)
+ return info
+
+
+class TwitterSpacesIE(TwitterBaseIE):
+ IE_NAME = 'twitter:spaces'
+ _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
+
+ _TESTS = [{
+ 'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
+ 'info_dict': {
+ 'id': '1RDxlgyvNXzJL',
+ 'ext': 'm4a',
+ 'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
+ 'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
+ 'uploader': r're:Lucio Di Gaetano.*?',
+ 'uploader_id': 'luciodigaetano',
+ 'live_status': 'was_live',
+ 'timestamp': 1659877956,
+ 'upload_date': '20220807',
+ 'release_timestamp': 1659904215,
+ 'release_date': '20220807',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ # post_live/TimedOut but downloadable
+ 'url': 'https://twitter.com/i/spaces/1vAxRAVQWONJl',
+ 'info_dict': {
+ 'id': '1vAxRAVQWONJl',
+ 'ext': 'm4a',
+ 'title': 'Framing Up FinOps: Billing Tools',
+ 'description': 'Twitter Space participated by rupa, Alfonso Hernandez',
+ 'uploader': 'Google Cloud',
+ 'uploader_id': 'googlecloud',
+ 'live_status': 'post_live',
+ 'timestamp': 1681409554,
+ 'upload_date': '20230413',
+ 'release_timestamp': 1681839000,
+ 'release_date': '20230418',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ # Needs ffmpeg as downloader, see: https://github.com/yt-dlp/yt-dlp/issues/7536
+ 'url': 'https://twitter.com/i/spaces/1eaKbrQbjoRKX',
+ 'info_dict': {
+ 'id': '1eaKbrQbjoRKX',
+ 'ext': 'm4a',
+ 'title': 'あ',
+ 'description': 'Twitter Space participated by nobody yet',
+ 'uploader': '息根とめる🔪Twitchで復活',
+ 'uploader_id': 'tomeru_ikinone',
+ 'live_status': 'was_live',
+ 'timestamp': 1685617198,
+ 'upload_date': '20230601',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }]
+
+ SPACE_STATUS = {
+ 'notstarted': 'is_upcoming',
+ 'ended': 'was_live',
+ 'running': 'is_live',
+ 'timedout': 'post_live',
+ }
+
+ def _build_graphql_query(self, space_id):
+ return {
+ 'variables': {
+ 'id': space_id,
+ 'isMetatagsQuery': True,
+ 'withDownvotePerspective': False,
+ 'withReactionsMetadata': False,
+ 'withReactionsPerspective': False,
+ 'withReplays': True,
+ 'withSuperFollowsUserFields': True,
+ 'withSuperFollowsTweetFields': True,
+ },
+ 'features': {
+ 'dont_mention_me_view_api_enabled': True,
+ 'interactive_text_enabled': True,
+ 'responsive_web_edit_tweet_api_enabled': True,
+ 'responsive_web_enhance_cards_enabled': True,
+ 'responsive_web_uc_gql_enabled': True,
+ 'spaces_2022_h2_clipping': True,
+ 'spaces_2022_h2_spaces_communities': False,
+ 'standardized_nudges_misinfo': True,
+ 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
+ 'vibe_api_enabled': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ space_id = self._match_id(url)
+ if not self.is_logged_in:
+ self.raise_login_required('Twitter Spaces require authentication')
+ space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace']
+ if not space_data:
+ raise ExtractorError('Twitter Space not found', expected=True)
+
+ metadata = space_data['metadata']
+ live_status = try_call(lambda: self.SPACE_STATUS[metadata['state'].lower()])
+ is_live = live_status == 'is_live'
+
+ formats = []
+ headers = {'Referer': 'https://twitter.com/'}
+ if live_status == 'is_upcoming':
+ self.raise_no_formats('Twitter Space not started yet', expected=True)
+ elif not is_live and not metadata.get('is_space_available_for_replay'):
+ self.raise_no_formats('Twitter Space ended and replay is disabled', expected=True)
+ elif metadata.get('media_key'):
+ source = traverse_obj(
+ self._call_api(f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key']),
+ ('source', ('noRedirectPlaybackUrl', 'location'), {url_or_none}), get_all=False)
+ formats = self._extract_m3u8_formats( # XXX: Some Spaces need ffmpeg as downloader
+ source, metadata['media_key'], 'm4a', entry_protocol='m3u8', live=is_live,
+ headers=headers, fatal=False) if source else []
+ for fmt in formats:
+ fmt.update({'vcodec': 'none', 'acodec': 'aac'})
+ if not is_live:
+ fmt['container'] = 'm4a_dash'
+
+ participants = ', '.join(traverse_obj(
+ space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
+
+ if not formats and live_status == 'post_live':
+ self.raise_no_formats('Twitter Space ended but not downloadable yet', expected=True)
+
+ return {
+ 'id': space_id,
+ 'title': metadata.get('title'),
+ 'description': f'Twitter Space participated by {participants}',
+ 'uploader': traverse_obj(
+ metadata, ('creator_results', 'result', 'legacy', 'name')),
+ 'uploader_id': traverse_obj(
+ metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
+ 'live_status': live_status,
+ 'release_timestamp': try_call(
+ lambda: int_or_none(metadata['scheduled_start'], scale=1000)),
+ 'timestamp': int_or_none(metadata.get('created_at'), scale=1000),
+ 'formats': formats,
+ 'http_headers': headers,
+ }
+
+
+class TwitterShortenerIE(TwitterBaseIE):
+ IE_NAME = 'twitter:shortener'
+ _VALID_URL = r'https?://t\.co/(?P<id>[^?#]+)|tco:(?P<eid>[^?#]+)'
+ _BASE_URL = 'https://t.co/'
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ eid, id = mobj.group('eid', 'id')
+ if eid:
+ id = eid
+ url = self._BASE_URL + id
+ new_url = self._request_webpage(url, id, headers={'User-Agent': 'curl'}).url
+ __UNSAFE_LINK = "https://twitter.com/safety/unsafe_link_warning?unsafe_link="
+ if new_url.startswith(__UNSAFE_LINK):
+ new_url = new_url.replace(__UNSAFE_LINK, "")
+ return self.url_result(new_url)
diff --git a/yt_dlp/extractor/txxx.py b/yt_dlp/extractor/txxx.py
new file mode 100644
index 0000000..77dabbc
--- /dev/null
+++ b/yt_dlp/extractor/txxx.py
@@ -0,0 +1,438 @@
+import base64
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ js_to_json,
+ merge_dicts,
+ parse_duration,
+ traverse_obj,
+ try_call,
+ url_or_none,
+ urljoin,
+ variadic,
+)
+
+
+def decode_base64(text):
+ return base64.b64decode(text.translate(text.maketrans({
+ '\u0405': 'S',
+ '\u0406': 'I',
+ '\u0408': 'J',
+ '\u0410': 'A',
+ '\u0412': 'B',
+ '\u0415': 'E',
+ '\u041a': 'K',
+ '\u041c': 'M',
+ '\u041d': 'H',
+ '\u041e': 'O',
+ '\u0420': 'P',
+ '\u0421': 'C',
+ '\u0425': 'X',
+ ',': '/',
+ '.': '+',
+ '~': '=',
+ }))).decode()
+
+
+def get_formats(host, video_file):
+ return [{
+ 'url': urljoin(f'https://{host}', decode_base64(video['video_url'])),
+ 'format_id': try_call(lambda: variadic(video['format'])[0].lstrip('_')),
+ 'quality': index,
+ } for index, video in enumerate(video_file) if video.get('video_url')]
+
+
+class TxxxIE(InfoExtractor):
+ _DOMAINS = (
+ 'hclips.com',
+ 'hdzog.com',
+ 'hdzog.tube',
+ 'hotmovs.com',
+ 'hotmovs.tube',
+ 'inporn.com',
+ 'privatehomeclips.com',
+ 'tubepornclassic.com',
+ 'txxx.com',
+ 'txxx.tube',
+ 'upornia.com',
+ 'upornia.tube',
+ 'vjav.com',
+ 'vjav.tube',
+ 'vxxx.com',
+ 'voyeurhit.com',
+ 'voyeurhit.tube',
+ )
+ _VALID_URL = rf'''(?x)
+ https?://(?:www\.)?(?P<host>{"|".join(map(re.escape, _DOMAINS))})/
+ (?:videos?[/-]|embed/)(?P<id>\d+)(?:/(?P<display_id>[^/?#]+))?
+ '''
+ _EMBED_REGEX = [rf'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?(?:{"|".join(map(re.escape, _DOMAINS))})/embed/[^"\']*)\1']
+ _TESTS = [{
+ 'url': 'https://txxx.com/videos/16574965/digital-desire-malena-morgan/',
+ 'md5': 'c54e4ace54320aaf8e2a72df87859391',
+ 'info_dict': {
+ 'id': '16574965',
+ 'display_id': 'digital-desire-malena-morgan',
+ 'ext': 'mp4',
+ 'title': 'Digital Desire - Malena Morgan',
+ 'uploader': 'Lois Argentum',
+ 'duration': 694,
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'age_limit': 18,
+ 'thumbnail': 'https://tn.txxx.tube/contents/videos_sources/16574000/16574965/screenshots/1.jpg',
+ }
+ }, {
+ 'url': 'https://txxx.tube/videos/16574965/digital-desire-malena-morgan/',
+ 'md5': 'c54e4ace54320aaf8e2a72df87859391',
+ 'info_dict': {
+ 'id': '16574965',
+ 'display_id': 'digital-desire-malena-morgan',
+ 'ext': 'mp4',
+ 'title': 'Digital Desire - Malena Morgan',
+ 'uploader': 'Lois Argentum',
+ 'duration': 694,
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'age_limit': 18,
+ 'thumbnail': 'https://tn.txxx.tube/contents/videos_sources/16574000/16574965/screenshots/1.jpg',
+ }
+ }, {
+ 'url': 'https://vxxx.com/video-68925/',
+ 'md5': '1fcff3748b0c5b41fe41d0afa22409e1',
+ 'info_dict': {
+ 'id': '68925',
+ 'display_id': '68925',
+ 'ext': 'mp4',
+ 'title': 'Malena Morgan',
+ 'uploader': 'Huge Hughes',
+ 'duration': 694,
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'age_limit': 18,
+ 'thumbnail': 'https://tn.vxxx.com/contents/videos_sources/68000/68925/screenshots/1.jpg',
+ }
+ }, {
+ 'url': 'https://hclips.com/videos/6291073/malena-morgan-masturbates-her-sweet/',
+ 'md5': 'a5dd4f83363972ee043313cff85e7e26',
+ 'info_dict': {
+ 'id': '6291073',
+ 'display_id': 'malena-morgan-masturbates-her-sweet',
+ 'ext': 'mp4',
+ 'title': 'Malena Morgan masturbates her sweet',
+ 'uploader': 'John Salt',
+ 'duration': 426,
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'age_limit': 18,
+ 'thumbnail': 'https://hctn.nv7s.com/contents/videos_sources/6291000/6291073/screenshots/1.jpg',
+ }
+ }, {
+ 'url': 'https://hdzog.com/videos/67063/gorgeous-malena-morgan-will-seduce-you-at-the-first-glance/',
+ 'md5': 'f8bdedafd45d1ec2875c43fe33a846d3',
+ 'info_dict': {
+ 'id': '67063',
+ 'display_id': 'gorgeous-malena-morgan-will-seduce-you-at-the-first-glance',
+ 'ext': 'mp4',
+ 'title': 'Gorgeous Malena Morgan will seduce you at the first glance',
+ 'uploader': 'momlesson',
+ 'duration': 601,
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'age_limit': 18,
+ 'thumbnail': 'https://tn.hdzog.com/contents/videos_sources/67000/67063/screenshots/1.jpg',
+ }
+ }, {
+ 'url': 'https://hdzog.tube/videos/67063/gorgeous-malena-morgan-will-seduce-you-at-the-first-glance/',
+ 'md5': 'f8bdedafd45d1ec2875c43fe33a846d3',
+ 'info_dict': {
+ 'id': '67063',
+ 'display_id': 'gorgeous-malena-morgan-will-seduce-you-at-the-first-glance',
+ 'ext': 'mp4',
+ 'title': 'Gorgeous Malena Morgan will seduce you at the first glance',
+ 'uploader': 'momlesson',
+ 'duration': 601,
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'age_limit': 18,
+ 'thumbnail': 'https://tn.hdzog.com/contents/videos_sources/67000/67063/screenshots/1.jpg',
+ }
+ }, {
+ 'url': 'https://hotmovs.com/videos/8789287/unbelievable-malena-morgan-performing-in-incredible-masturantion/',
+ 'md5': '71d32c51584876472db87e561171a386',
+ 'info_dict': {
+ 'id': '8789287',
+ 'display_id': 'unbelievable-malena-morgan-performing-in-incredible-masturantion',
+ 'ext': 'mp4',
+ 'title': 'Unbelievable Malena Morgan performing in incredible masturantion',
+ 'uploader': 'Davit Sanchez',
+ 'duration': 940,
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'age_limit': 18,
+ 'thumbnail': 'https://tn.hotmovs.com/contents/videos_sources/8789000/8789287/screenshots/10.jpg',
+ }
+ }, {
+ 'url': 'https://hotmovs.tube/videos/8789287/unbelievable-malena-morgan-performing-in-incredible-masturantion/',
+ 'md5': '71d32c51584876472db87e561171a386',
+ 'info_dict': {
+ 'id': '8789287',
+ 'display_id': 'unbelievable-malena-morgan-performing-in-incredible-masturantion',
+ 'ext': 'mp4',
+ 'title': 'Unbelievable Malena Morgan performing in incredible masturantion',
+ 'uploader': 'Davit Sanchez',
+ 'duration': 940,
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'age_limit': 18,
+ 'thumbnail': 'https://tn.hotmovs.com/contents/videos_sources/8789000/8789287/screenshots/10.jpg',
+ }
+ }, {
+ 'url': 'https://inporn.com/video/517897/malena-morgan-solo/',
+ 'md5': '344db467481edf78f193cdf5820a7cfb',
+ 'info_dict': {
+ 'id': '517897',
+ 'display_id': 'malena-morgan-solo',
+ 'ext': 'mp4',
+ 'title': 'Malena Morgan - Solo',
+ 'uploader': 'Ashley Oxy',
+ 'duration': 480,
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'age_limit': 18,
+ 'thumbnail': 'https://iptn.m3pd.com/media/tn/sources/517897_1.jpg',
+ }
+ }, {
+ 'url': 'https://privatehomeclips.com/videos/3630599/malena-morgan-cam-show/',
+ 'md5': 'ea657273e352493c5fb6357fbfa4f126',
+ 'info_dict': {
+ 'id': '3630599',
+ 'display_id': 'malena-morgan-cam-show',
+ 'ext': 'mp4',
+ 'title': 'malena morgan cam show',
+ 'uploader': 'Member9915',
+ 'duration': 290,
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'age_limit': 18,
+ 'thumbnail': 'https://hctn.nv7s.com/contents/videos_sources/3630000/3630599/screenshots/15.jpg',
+ }
+ }, {
+ 'url': 'https://tubepornclassic.com/videos/1015455/mimi-rogers-full-body-massage-nude-compilation/',
+ 'md5': '2e9a6cf610c9862e86e0ce24f08f4427',
+ 'info_dict': {
+ 'id': '1015455',
+ 'display_id': 'mimi-rogers-full-body-massage-nude-compilation',
+ 'ext': 'mp4',
+ 'title': 'Mimi Rogers - Full Body Massage (Nude) compilation',
+ 'uploader': '88bhuto',
+ 'duration': 286,
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'age_limit': 18,
+ 'thumbnail': 'https://tn.tubepornclassic.com/contents/videos_sources/1015000/1015455/screenshots/6.jpg',
+ }
+ }, {
+ 'url': 'https://upornia.com/videos/1498858/twistys-malena-morgan-starring-at-dr-morgan-baller/',
+ 'md5': '7ff7033340bc88a173198b7c22600e4f',
+ 'info_dict': {
+ 'id': '1498858',
+ 'display_id': 'twistys-malena-morgan-starring-at-dr-morgan-baller',
+ 'ext': 'mp4',
+ 'title': 'Twistys - Malena Morgan starring at Dr. Morgan-Baller',
+ 'uploader': 'mindgeek',
+ 'duration': 480,
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'age_limit': 18,
+ 'thumbnail': 'https://tn.upornia.com/contents/videos_sources/1498000/1498858/screenshots/1.jpg',
+ }
+ }, {
+ 'url': 'https://upornia.tube/videos/1498858/twistys-malena-morgan-starring-at-dr-morgan-baller/',
+ 'md5': '7ff7033340bc88a173198b7c22600e4f',
+ 'info_dict': {
+ 'id': '1498858',
+ 'display_id': 'twistys-malena-morgan-starring-at-dr-morgan-baller',
+ 'ext': 'mp4',
+ 'title': 'Twistys - Malena Morgan starring at Dr. Morgan-Baller',
+ 'uploader': 'mindgeek',
+ 'duration': 480,
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'age_limit': 18,
+ 'thumbnail': 'https://tn.upornia.com/contents/videos_sources/1498000/1498858/screenshots/1.jpg',
+ }
+ }, {
+ 'url': 'https://vjav.com/videos/11761/yui-hatano-in-if-yui-was-my-girlfriend2/',
+ 'md5': '6de5bc1f13bdfc3491a77f23edb1676f',
+ 'info_dict': {
+ 'id': '11761',
+ 'display_id': 'yui-hatano-in-if-yui-was-my-girlfriend2',
+ 'ext': 'mp4',
+ 'title': 'Yui Hatano in If Yui Was My Girlfriend',
+ 'uploader': 'Matheus69',
+ 'duration': 3310,
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'age_limit': 18,
+ 'thumbnail': 'https://tn.vjav.com/contents/videos_sources/11000/11761/screenshots/23.jpg',
+ }
+ }, {
+ 'url': 'https://vjav.tube/videos/11761/yui-hatano-in-if-yui-was-my-girlfriend2/',
+ 'md5': '6de5bc1f13bdfc3491a77f23edb1676f',
+ 'info_dict': {
+ 'id': '11761',
+ 'display_id': 'yui-hatano-in-if-yui-was-my-girlfriend2',
+ 'ext': 'mp4',
+ 'title': 'Yui Hatano in If Yui Was My Girlfriend',
+ 'uploader': 'Matheus69',
+ 'duration': 3310,
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'age_limit': 18,
+ 'thumbnail': 'https://tn.vjav.com/contents/videos_sources/11000/11761/screenshots/23.jpg',
+ }
+ }, {
+ 'url': 'https://voyeurhit.com/videos/332875/charlotte-stokely-elle-alexandra-malena-morgan-lingerie/',
+ 'md5': '12b4666e9c3e60dafe9182e5d12aae33',
+ 'info_dict': {
+ 'id': '332875',
+ 'display_id': 'charlotte-stokely-elle-alexandra-malena-morgan-lingerie',
+ 'ext': 'mp4',
+ 'title': 'Charlotte Stokely, Elle Alexandra, Malena Morgan-Lingerie',
+ 'uploader': 'Kyle Roberts',
+ 'duration': 655,
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'age_limit': 18,
+ 'thumbnail': 'https://tn.voyeurhit.com/contents/videos_sources/332000/332875/screenshots/1.jpg',
+ }
+ }, {
+ 'url': 'https://voyeurhit.tube/videos/332875/charlotte-stokely-elle-alexandra-malena-morgan-lingerie/',
+ 'md5': '12b4666e9c3e60dafe9182e5d12aae33',
+ 'info_dict': {
+ 'id': '332875',
+ 'display_id': 'charlotte-stokely-elle-alexandra-malena-morgan-lingerie',
+ 'ext': 'mp4',
+ 'title': 'Charlotte Stokely, Elle Alexandra, Malena Morgan-Lingerie',
+ 'uploader': 'Kyle Roberts',
+ 'duration': 655,
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'age_limit': 18,
+ 'thumbnail': 'https://tn.voyeurhit.com/contents/videos_sources/332000/332875/screenshots/1.jpg',
+ }
+ }]
+ _WEBPAGE_TESTS = [{
+ 'url': 'https://pornzog.com/video/9125519/michelle-malone-dreamgirls-wild-wet-3/',
+ 'info_dict': {
+ 'id': '5119660',
+ 'display_id': '5119660',
+ 'ext': 'mp4',
+ 'title': 'Michelle Malone - Dreamgirls - Wild Wet 3',
+ 'uploader': 'FallenAngel12',
+ 'duration': 402,
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'age_limit': 18,
+ 'thumbnail': 'https://hctn.nv7s.com/contents/videos_sources/5119000/5119660/screenshots/1.jpg',
+ }
+ }]
+
+ def _call_api(self, url, video_id, fatal=False, **kwargs):
+ content = self._download_json(url, video_id, fatal=fatal, **kwargs)
+ if traverse_obj(content, 'error'):
+ raise self._error_or_warning(ExtractorError(
+ f'Txxx said: {content["error"]}', expected=True), fatal=fatal)
+ return content or {}
+
+ def _real_extract(self, url):
+ video_id, host, display_id = self._match_valid_url(url).group('id', 'host', 'display_id')
+ headers = {'Referer': url, 'X-Requested-With': 'XMLHttpRequest'}
+
+ video_file = self._call_api(
+ f'https://{host}/api/videofile.php?video_id={video_id}&lifetime=8640000',
+ video_id, fatal=True, note='Downloading video file info', headers=headers)
+
+ slug = f'{int(1E6 * (int(video_id) // 1E6))}/{1000 * (int(video_id) // 1000)}'
+ video_info = self._call_api(
+ f'https://{host}/api/json/video/86400/{slug}/{video_id}.json',
+ video_id, note='Downloading video info', headers=headers)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': traverse_obj(video_info, ('video', 'title')),
+ 'uploader': traverse_obj(video_info, ('video', 'user', 'username')),
+ 'duration': parse_duration(traverse_obj(video_info, ('video', 'duration'))),
+ 'view_count': int_or_none(traverse_obj(video_info, ('video', 'statistics', 'viewed'))),
+ 'like_count': int_or_none(traverse_obj(video_info, ('video', 'statistics', 'likes'))),
+ 'dislike_count': int_or_none(traverse_obj(video_info, ('video', 'statistics', 'dislikes'))),
+ 'age_limit': 18,
+ 'thumbnail': traverse_obj(video_info, ('video', 'thumbsrc', {url_or_none})),
+ 'formats': get_formats(host, video_file),
+ }
+
+
+class PornTopIE(InfoExtractor):
+ _VALID_URL = r'https?://(?P<host>(?:www\.)?porntop\.com)/video/(?P<id>\d+)(?:/(?P<display_id>[^/?]+))?'
+ _TESTS = [{
+ 'url': 'https://porntop.com/video/101569/triple-threat-with-lia-lor-malena-morgan-and-dani-daniels/',
+ 'md5': '612ba7b3cb99455b382972948e200b08',
+ 'info_dict': {
+ 'id': '101569',
+ 'display_id': 'triple-threat-with-lia-lor-malena-morgan-and-dani-daniels',
+ 'ext': 'mp4',
+ 'title': 'Triple Threat With Lia Lor, Malena Morgan And Dani Daniels',
+ 'description': 'md5:285357d9d3a00ce5acb29f39f826dbf6',
+ 'uploader': 'PatrickBush',
+ 'duration': 480,
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'age_limit': 18,
+ 'timestamp': 1609455029,
+ 'upload_date': '20201231',
+ 'thumbnail': 'https://tn.porntop.com/media/tn/sources/101569_1.jpg',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id, host, display_id = self._match_valid_url(url).group('id', 'host', 'display_id')
+ webpage = self._download_webpage(url, video_id)
+
+ json_ld = self._json_ld(self._search_json(
+ r'\bschemaJson\s*=', webpage, 'JSON-LD', video_id, transform_source=js_to_json,
+ contains_pattern='{[^<]+?VideoObject[^<]+};'), video_id, fatal=True)
+
+ video_file = self._parse_json(decode_base64(self._search_regex(
+ r"window\.initPlayer\(.*}}},\s*'(?P<json_b64c>[^']+)'",
+ webpage, 'json_urls', group='json_b64c')), video_id)
+
+ return merge_dicts({
+ 'id': video_id,
+ 'display_id': display_id,
+ 'age_limit': 18,
+ 'formats': get_formats(host, video_file),
+ }, json_ld)
diff --git a/yt_dlp/extractor/udemy.py b/yt_dlp/extractor/udemy.py
new file mode 100644
index 0000000..5c29605
--- /dev/null
+++ b/yt_dlp/extractor/udemy.py
@@ -0,0 +1,474 @@
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str, compat_urlparse
+from ..networking import Request
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ ExtractorError,
+ determine_ext,
+ extract_attributes,
+ float_or_none,
+ int_or_none,
+ js_to_json,
+ smuggle_url,
+ try_get,
+ unescapeHTML,
+ unsmuggle_url,
+ url_or_none,
+ urlencode_postdata,
+)
+
+
+class UdemyIE(InfoExtractor):
+ IE_NAME = 'udemy'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:[^/]+\.)?udemy\.com/
+ (?:
+ [^#]+\#/lecture/|
+ lecture/view/?\?lectureId=|
+ [^/]+/learn/v4/t/lecture/
+ )
+ (?P<id>\d+)
+ '''
+ _LOGIN_URL = 'https://www.udemy.com/join/login-popup/?displayType=ajax&showSkipButton=1'
+ _ORIGIN_URL = 'https://www.udemy.com'
+ _NETRC_MACHINE = 'udemy'
+
+ _TESTS = [{
+ 'url': 'https://www.udemy.com/java-tutorial/#/lecture/172757',
+ 'md5': '98eda5b657e752cf945d8445e261b5c5',
+ 'info_dict': {
+ 'id': '160614',
+ 'ext': 'mp4',
+ 'title': 'Introduction and Installation',
+ 'description': 'md5:c0d51f6f21ef4ec65f091055a5eef876',
+ 'duration': 579.29,
+ },
+ 'skip': 'Requires udemy account credentials',
+ }, {
+ # new URL schema
+ 'url': 'https://www.udemy.com/electric-bass-right-from-the-start/learn/v4/t/lecture/4580906',
+ 'only_matching': True,
+ }, {
+ # no url in outputs format entry
+ 'url': 'https://www.udemy.com/learn-web-development-complete-step-by-step-guide-to-success/learn/v4/t/lecture/4125812',
+ 'only_matching': True,
+ }, {
+ # only outputs rendition
+ 'url': 'https://www.udemy.com/how-you-can-help-your-local-community-5-amazing-examples/learn/v4/t/lecture/3225750?start=0',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://wipro.udemy.com/java-tutorial/#/lecture/172757',
+ 'only_matching': True,
+ }]
+
+ def _extract_course_info(self, webpage, video_id):
+ course = self._parse_json(
+ unescapeHTML(self._search_regex(
+ r'ng-init=["\'].*\bcourse=({.+?})[;"\']',
+ webpage, 'course', default='{}')),
+ video_id, fatal=False) or {}
+ course_id = course.get('id') or self._search_regex(
+ [
+ r'data-course-id=["\'](\d+)',
+ r'&quot;courseId&quot;\s*:\s*(\d+)'
+ ], webpage, 'course id')
+ return course_id, course.get('title')
+
+ def _enroll_course(self, base_url, webpage, course_id):
+ def combine_url(base_url, url):
+ return compat_urlparse.urljoin(base_url, url) if not url.startswith('http') else url
+
+ checkout_url = unescapeHTML(self._search_regex(
+ r'href=(["\'])(?P<url>(?:https?://(?:www\.)?udemy\.com)?/(?:payment|cart)/checkout/.+?)\1',
+ webpage, 'checkout url', group='url', default=None))
+ if checkout_url:
+ raise ExtractorError(
+ 'Course %s is not free. You have to pay for it before you can download. '
+ 'Use this URL to confirm purchase: %s'
+ % (course_id, combine_url(base_url, checkout_url)),
+ expected=True)
+
+ enroll_url = unescapeHTML(self._search_regex(
+ r'href=(["\'])(?P<url>(?:https?://(?:www\.)?udemy\.com)?/course/subscribe/.+?)\1',
+ webpage, 'enroll url', group='url', default=None))
+ if enroll_url:
+ webpage = self._download_webpage(
+ combine_url(base_url, enroll_url),
+ course_id, 'Enrolling in the course',
+ headers={'Referer': base_url})
+ if '>You have enrolled in' in webpage:
+ self.to_screen('%s: Successfully enrolled in the course' % course_id)
+
+ def _download_lecture(self, course_id, lecture_id):
+ return self._download_json(
+ 'https://www.udemy.com/api-2.0/users/me/subscribed-courses/%s/lectures/%s?'
+ % (course_id, lecture_id),
+ lecture_id, 'Downloading lecture JSON', query={
+ 'fields[lecture]': 'title,description,view_html,asset',
+ 'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,stream_urls,captions,data,course_is_drmed',
+ })
+
+ def _handle_error(self, response):
+ if not isinstance(response, dict):
+ return
+ error = response.get('error')
+ if error:
+ error_str = 'Udemy returned error #%s: %s' % (error.get('code'), error.get('message'))
+ error_data = error.get('data')
+ if error_data:
+ error_str += ' - %s' % error_data.get('formErrors')
+ raise ExtractorError(error_str, expected=True)
+
+ def _download_webpage_handle(self, *args, **kwargs):
+ headers = kwargs.get('headers', {}).copy()
+ headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'
+ kwargs['headers'] = headers
+ ret = super(UdemyIE, self)._download_webpage_handle(
+ *args, **kwargs)
+ if not ret:
+ return ret
+ webpage, _ = ret
+ if any(p in webpage for p in (
+ '>Please verify you are a human',
+ 'Access to this page has been denied because we believe you are using automation tools to browse the website',
+ '"_pxCaptcha"')):
+ raise ExtractorError(
+ 'Udemy asks you to solve a CAPTCHA. Login with browser, '
+ 'solve CAPTCHA, then export cookies and pass cookie file to '
+ 'yt-dlp with --cookies.', expected=True)
+ return ret
+
+ def _download_json(self, url_or_request, *args, **kwargs):
+ headers = {
+ 'X-Udemy-Snail-Case': 'true',
+ 'X-Requested-With': 'XMLHttpRequest',
+ }
+ for cookie in self.cookiejar:
+ if cookie.name == 'client_id':
+ headers['X-Udemy-Client-Id'] = cookie.value
+ elif cookie.name == 'access_token':
+ headers['X-Udemy-Bearer-Token'] = cookie.value
+ headers['X-Udemy-Authorization'] = 'Bearer %s' % cookie.value
+
+ if isinstance(url_or_request, Request):
+ url_or_request.headers.update(headers)
+ else:
+ url_or_request = Request(url_or_request, headers=headers)
+
+ response = super(UdemyIE, self)._download_json(url_or_request, *args, **kwargs)
+ self._handle_error(response)
+ return response
+
+ def _perform_login(self, username, password):
+ login_popup = self._download_webpage(
+ self._LOGIN_URL, None, 'Downloading login popup')
+
+ def is_logged(webpage):
+ return any(re.search(p, webpage) for p in (
+ r'href=["\'](?:https://www\.udemy\.com)?/user/logout/',
+ r'>Logout<'))
+
+ # already logged in
+ if is_logged(login_popup):
+ return
+
+ login_form = self._form_hidden_inputs('login-form', login_popup)
+
+ login_form.update({
+ 'email': username,
+ 'password': password,
+ })
+
+ response = self._download_webpage(
+ self._LOGIN_URL, None, 'Logging in',
+ data=urlencode_postdata(login_form),
+ headers={
+ 'Referer': self._ORIGIN_URL,
+ 'Origin': self._ORIGIN_URL,
+ })
+
+ if not is_logged(response):
+ error = self._html_search_regex(
+ r'(?s)<div[^>]+class="form-errors[^"]*">(.+?)</div>',
+ response, 'error message', default=None)
+ if error:
+ raise ExtractorError('Unable to login: %s' % error, expected=True)
+ raise ExtractorError('Unable to log in')
+
+ def _real_extract(self, url):
+ lecture_id = self._match_id(url)
+ course_id = unsmuggle_url(url, {})[1].get('course_id')
+
+ webpage = None
+ if not course_id:
+ webpage = self._download_webpage(url, lecture_id)
+ course_id, _ = self._extract_course_info(webpage, lecture_id)
+
+ try:
+ lecture = self._download_lecture(course_id, lecture_id)
+ except ExtractorError as e:
+ # Error could possibly mean we are not enrolled in the course
+ if isinstance(e.cause, HTTPError) and e.cause.status == 403:
+ webpage = webpage or self._download_webpage(url, lecture_id)
+ self._enroll_course(url, webpage, course_id)
+ lecture = self._download_lecture(course_id, lecture_id)
+ else:
+ raise
+
+ title = lecture['title']
+ description = lecture.get('description')
+
+ asset = lecture['asset']
+
+ asset_type = asset.get('asset_type') or asset.get('assetType')
+ if asset_type != 'Video':
+ raise ExtractorError(
+ 'Lecture %s is not a video' % lecture_id, expected=True)
+
+ stream_url = asset.get('stream_url') or asset.get('streamUrl')
+ if stream_url:
+ youtube_url = self._search_regex(
+ r'(https?://www\.youtube\.com/watch\?v=.*)', stream_url, 'youtube URL', default=None)
+ if youtube_url:
+ return self.url_result(youtube_url, 'Youtube')
+
+ video_id = compat_str(asset['id'])
+ thumbnail = asset.get('thumbnail_url') or asset.get('thumbnailUrl')
+ duration = float_or_none(asset.get('data', {}).get('duration'))
+
+ subtitles = {}
+ automatic_captions = {}
+
+ formats = []
+
+ def extract_output_format(src, f_id):
+ return {
+ 'url': src.get('url'),
+ 'format_id': '%sp' % (src.get('height') or f_id),
+ 'width': int_or_none(src.get('width')),
+ 'height': int_or_none(src.get('height')),
+ 'vbr': int_or_none(src.get('video_bitrate_in_kbps')),
+ 'vcodec': src.get('video_codec'),
+ 'fps': int_or_none(src.get('frame_rate')),
+ 'abr': int_or_none(src.get('audio_bitrate_in_kbps')),
+ 'acodec': src.get('audio_codec'),
+ 'asr': int_or_none(src.get('audio_sample_rate')),
+ 'tbr': int_or_none(src.get('total_bitrate_in_kbps')),
+ 'filesize': int_or_none(src.get('file_size_in_bytes')),
+ }
+
+ outputs = asset.get('data', {}).get('outputs')
+ if not isinstance(outputs, dict):
+ outputs = {}
+
+ def add_output_format_meta(f, key):
+ output = outputs.get(key)
+ if isinstance(output, dict):
+ output_format = extract_output_format(output, key)
+ output_format.update(f)
+ return output_format
+ return f
+
+ def extract_formats(source_list):
+ if not isinstance(source_list, list):
+ return
+ for source in source_list:
+ video_url = url_or_none(source.get('file') or source.get('src'))
+ if not video_url:
+ continue
+ if source.get('type') == 'application/x-mpegURL' or determine_ext(video_url) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ continue
+ format_id = source.get('label')
+ f = {
+ 'url': video_url,
+ 'format_id': '%sp' % format_id,
+ 'height': int_or_none(format_id),
+ }
+ if format_id:
+ # Some videos contain additional metadata (e.g.
+ # https://www.udemy.com/ios9-swift/learn/#/lecture/3383208)
+ f = add_output_format_meta(f, format_id)
+ formats.append(f)
+
+ def extract_subtitles(track_list):
+ if not isinstance(track_list, list):
+ return
+ for track in track_list:
+ if not isinstance(track, dict):
+ continue
+ if track.get('kind') != 'captions':
+ continue
+ src = url_or_none(track.get('src'))
+ if not src:
+ continue
+ lang = track.get('language') or track.get(
+ 'srclang') or track.get('label')
+ sub_dict = automatic_captions if track.get(
+ 'autogenerated') is True else subtitles
+ sub_dict.setdefault(lang, []).append({
+ 'url': src,
+ })
+
+ for url_kind in ('download', 'stream'):
+ urls = asset.get('%s_urls' % url_kind)
+ if isinstance(urls, dict):
+ extract_formats(urls.get('Video'))
+
+ captions = asset.get('captions')
+ if isinstance(captions, list):
+ for cc in captions:
+ if not isinstance(cc, dict):
+ continue
+ cc_url = url_or_none(cc.get('url'))
+ if not cc_url:
+ continue
+ lang = try_get(cc, lambda x: x['locale']['locale'], compat_str)
+ sub_dict = (automatic_captions if cc.get('source') == 'auto'
+ else subtitles)
+ sub_dict.setdefault(lang or 'en', []).append({
+ 'url': cc_url,
+ })
+
+ view_html = lecture.get('view_html')
+ if view_html:
+ view_html_urls = set()
+ for source in re.findall(r'<source[^>]+>', view_html):
+ attributes = extract_attributes(source)
+ src = attributes.get('src')
+ if not src:
+ continue
+ res = attributes.get('data-res')
+ height = int_or_none(res)
+ if src in view_html_urls:
+ continue
+ view_html_urls.add(src)
+ if attributes.get('type') == 'application/x-mpegURL' or determine_ext(src) == 'm3u8':
+ m3u8_formats = self._extract_m3u8_formats(
+ src, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False)
+ for f in m3u8_formats:
+ m = re.search(r'/hls_(?P<height>\d{3,4})_(?P<tbr>\d{2,})/', f['url'])
+ if m:
+ if not f.get('height'):
+ f['height'] = int(m.group('height'))
+ if not f.get('tbr'):
+ f['tbr'] = int(m.group('tbr'))
+ formats.extend(m3u8_formats)
+ else:
+ formats.append(add_output_format_meta({
+ 'url': src,
+ 'format_id': '%dp' % height if height else None,
+ 'height': height,
+ }, res))
+
+ # react rendition since 2017.04.15 (see
+ # https://github.com/ytdl-org/youtube-dl/issues/12744)
+ data = self._parse_json(
+ self._search_regex(
+ r'videojs-setup-data=(["\'])(?P<data>{.+?})\1', view_html,
+ 'setup data', default='{}', group='data'), video_id,
+ transform_source=unescapeHTML, fatal=False)
+ if data and isinstance(data, dict):
+ extract_formats(data.get('sources'))
+ if not duration:
+ duration = int_or_none(data.get('duration'))
+ extract_subtitles(data.get('tracks'))
+
+ if not subtitles and not automatic_captions:
+ text_tracks = self._parse_json(
+ self._search_regex(
+ r'text-tracks=(["\'])(?P<data>\[.+?\])\1', view_html,
+ 'text tracks', default='{}', group='data'), video_id,
+ transform_source=lambda s: js_to_json(unescapeHTML(s)),
+ fatal=False)
+ extract_subtitles(text_tracks)
+
+ if not formats and outputs:
+ for format_id, output in outputs.items():
+ f = extract_output_format(output, format_id)
+ if f.get('url'):
+ formats.append(f)
+
+ if not formats and asset.get('course_is_drmed'):
+ self.report_drm(video_id)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'automatic_captions': automatic_captions,
+ }
+
+
+class UdemyCourseIE(UdemyIE): # XXX: Do not subclass from concrete IE
+ IE_NAME = 'udemy:course'
+ _VALID_URL = r'https?://(?:[^/]+\.)?udemy\.com/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.udemy.com/java-tutorial/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://wipro.udemy.com/java-tutorial/',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if UdemyIE.suitable(url) else super(UdemyCourseIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ course_path = self._match_id(url)
+
+ webpage = self._download_webpage(url, course_path)
+
+ course_id, title = self._extract_course_info(webpage, course_path)
+
+ self._enroll_course(url, webpage, course_id)
+
+ response = self._download_json(
+ 'https://www.udemy.com/api-2.0/courses/%s/cached-subscriber-curriculum-items' % course_id,
+ course_id, 'Downloading course curriculum', query={
+ 'fields[chapter]': 'title,object_index',
+ 'fields[lecture]': 'title,asset',
+ 'page_size': '1000',
+ })
+
+ entries = []
+ chapter, chapter_number = [None] * 2
+ for entry in response['results']:
+ clazz = entry.get('_class')
+ if clazz == 'lecture':
+ asset = entry.get('asset')
+ if isinstance(asset, dict):
+ asset_type = asset.get('asset_type') or asset.get('assetType')
+ if asset_type != 'Video':
+ continue
+ lecture_id = entry.get('id')
+ if lecture_id:
+ entry = {
+ '_type': 'url_transparent',
+ 'url': smuggle_url(
+ f'https://www.udemy.com/{course_path}/learn/v4/t/lecture/{entry["id"]}',
+ {'course_id': course_id}),
+ 'title': entry.get('title'),
+ 'ie_key': UdemyIE.ie_key(),
+ }
+ if chapter_number:
+ entry['chapter_number'] = chapter_number
+ if chapter:
+ entry['chapter'] = chapter
+ entries.append(entry)
+ elif clazz == 'chapter':
+ chapter_number = entry.get('object_index')
+ chapter = entry.get('title')
+
+ return self.playlist_result(entries, course_id, title)
diff --git a/yt_dlp/extractor/udn.py b/yt_dlp/extractor/udn.py
new file mode 100644
index 0000000..10668ac
--- /dev/null
+++ b/yt_dlp/extractor/udn.py
@@ -0,0 +1,98 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ js_to_json,
+)
+from ..compat import compat_urlparse
+
+
+class UDNEmbedIE(InfoExtractor):
+ IE_DESC = '聯合影音'
+ _PROTOCOL_RELATIVE_VALID_URL = r'//video\.udn\.com/(?:embed|play)/news/(?P<id>\d+)'
+ _VALID_URL = r'https?:' + _PROTOCOL_RELATIVE_VALID_URL
+ _EMBED_REGEX = [r'<iframe[^>]+src="(?:https?:)?(?P<url>%s)"' % _PROTOCOL_RELATIVE_VALID_URL]
+ _TESTS = [{
+ 'url': 'http://video.udn.com/embed/news/300040',
+ 'info_dict': {
+ 'id': '300040',
+ 'ext': 'mp4',
+ 'title': '生物老師男變女 全校挺"做自己"',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Failed to parse JSON Expecting value'],
+ }, {
+ 'url': 'https://video.udn.com/embed/news/300040',
+ 'only_matching': True,
+ }, {
+ # From https://video.udn.com/news/303776
+ 'url': 'https://video.udn.com/play/news/303776',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ page = self._download_webpage(url, video_id)
+
+ options_str = self._html_search_regex(
+ r'var\s+options\s*=\s*([^;]+);', page, 'options')
+ trans_options_str = js_to_json(options_str)
+ options = self._parse_json(trans_options_str, 'options', fatal=False) or {}
+ if options:
+ video_urls = options['video']
+ title = options['title']
+ poster = options.get('poster')
+ else:
+ video_urls = self._parse_json(self._html_search_regex(
+ r'"video"\s*:\s*({.+?})\s*,', trans_options_str, 'video urls'), 'video urls')
+ title = self._html_search_regex(
+ r"title\s*:\s*'(.+?)'\s*,", options_str, 'title')
+ poster = self._html_search_regex(
+ r"poster\s*:\s*'(.+?)'\s*,", options_str, 'poster', default=None)
+
+ if video_urls.get('youtube'):
+ return self.url_result(video_urls.get('youtube'), 'Youtube')
+
+ formats = []
+ for video_type, api_url in video_urls.items():
+ if not api_url:
+ continue
+
+ video_url = self._download_webpage(
+ compat_urlparse.urljoin(url, api_url), video_id,
+ note='retrieve url for %s video' % video_type)
+
+ ext = determine_ext(video_url)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, ext='mp4', m3u8_id='hls'))
+ elif ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ video_url, video_id, f4m_id='hds'))
+ else:
+ mobj = re.search(r'_(?P<height>\d+)p_(?P<tbr>\d+)\.mp4', video_url)
+ a_format = {
+ 'url': video_url,
+ # video_type may be 'mp4', which confuses YoutubeDL
+ 'format_id': 'http-' + video_type,
+ }
+ if mobj:
+ a_format.update({
+ 'height': int_or_none(mobj.group('height')),
+ 'tbr': int_or_none(mobj.group('tbr')),
+ })
+ formats.append(a_format)
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': title,
+ 'thumbnail': poster,
+ }
diff --git a/yt_dlp/extractor/ufctv.py b/yt_dlp/extractor/ufctv.py
new file mode 100644
index 0000000..2c1c5e0
--- /dev/null
+++ b/yt_dlp/extractor/ufctv.py
@@ -0,0 +1,13 @@
+from .imggaming import ImgGamingBaseIE
+
+
+class UFCTVIE(ImgGamingBaseIE):
+ _VALID_URL = ImgGamingBaseIE._VALID_URL_TEMPL % r'(?:(?:app|www)\.)?(?:ufc\.tv|(?:ufc)?fightpass\.com)|ufcfightpass\.img(?:dge|gaming)\.com'
+ _NETRC_MACHINE = 'ufctv'
+ _REALM = 'ufc'
+
+
+class UFCArabiaIE(ImgGamingBaseIE):
+ _VALID_URL = ImgGamingBaseIE._VALID_URL_TEMPL % r'(?:(?:app|www)\.)?ufcarabia\.(?:ae|com)'
+ _NETRC_MACHINE = 'ufcarabia'
+ _REALM = 'admufc'
diff --git a/yt_dlp/extractor/ukcolumn.py b/yt_dlp/extractor/ukcolumn.py
new file mode 100644
index 0000000..f914613
--- /dev/null
+++ b/yt_dlp/extractor/ukcolumn.py
@@ -0,0 +1,71 @@
+from ..utils import (
+ unescapeHTML,
+ urljoin,
+ ExtractorError,
+)
+from .common import InfoExtractor
+from .vimeo import VimeoIE
+from .youtube import YoutubeIE
+
+
+class UkColumnIE(InfoExtractor):
+ _WORKING = False
+ IE_NAME = 'ukcolumn'
+ _VALID_URL = r'(?i)https?://(?:www\.)?ukcolumn\.org(/index\.php)?/(?:video|ukcolumn-news)/(?P<id>[-a-z0-9]+)'
+
+ _TESTS = [{
+ 'url': 'https://www.ukcolumn.org/ukcolumn-news/uk-column-news-28th-april-2021',
+ 'info_dict': {
+ 'id': '541632443',
+ 'ext': 'mp4',
+ 'title': 'UK Column News - 28th April 2021',
+ 'uploader_id': 'ukcolumn',
+ 'uploader': 'UK Column',
+ },
+ 'add_ie': [VimeoIE.ie_key()],
+ 'expected_warnings': ['Unable to download JSON metadata'],
+ 'params': {
+ 'skip_download': 'Handled by Vimeo',
+ },
+ }, {
+ 'url': 'https://www.ukcolumn.org/video/insight-eu-military-unification',
+ 'info_dict': {
+ 'id': 'Fzbnb9t7XAw',
+ 'ext': 'mp4',
+ 'title': 'Insight: EU Military Unification',
+ 'uploader_id': 'ukcolumn',
+ 'description': 'md5:29a207965271af89baa0bc191f5de576',
+ 'uploader': 'UK Column',
+ 'upload_date': '20170514',
+ },
+ 'add_ie': [YoutubeIE.ie_key()],
+ 'params': {
+ 'skip_download': 'Handled by Youtube',
+ },
+ }, {
+ 'url': 'https://www.ukcolumn.org/index.php/ukcolumn-news/uk-column-news-30th-april-2021',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ oembed_url = urljoin(url, unescapeHTML(self._search_regex(
+ r'<iframe[^>]+src=(["\'])(?P<url>/media/oembed\?url=.+?)\1',
+ webpage, 'OEmbed URL', group='url')))
+ oembed_webpage = self._download_webpage(
+ oembed_url, display_id, note='Downloading OEmbed page')
+
+ ie, video_url = YoutubeIE, YoutubeIE._extract_url(oembed_webpage)
+ if not video_url:
+ ie, video_url = VimeoIE, VimeoIE._extract_url(url, oembed_webpage)
+ if not video_url:
+ raise ExtractorError('No embedded video found')
+
+ return {
+ '_type': 'url_transparent',
+ 'title': self._og_search_title(webpage),
+ 'url': video_url,
+ 'ie_key': ie.ie_key(),
+ }
diff --git a/yt_dlp/extractor/uktvplay.py b/yt_dlp/extractor/uktvplay.py
new file mode 100644
index 0000000..ab22a8e
--- /dev/null
+++ b/yt_dlp/extractor/uktvplay.py
@@ -0,0 +1,36 @@
+from .common import InfoExtractor
+
+
+class UKTVPlayIE(InfoExtractor):
+ _VALID_URL = r'https?://uktvplay\.(?:uktv\.)?co\.uk/(?:.+?\?.*?\bvideo=|([^/]+/)*)(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://uktvplay.uktv.co.uk/shows/world-at-war/c/200/watch-online/?video=2117008346001',
+ 'info_dict': {
+ 'id': '2117008346001',
+ 'ext': 'mp4',
+ 'title': 'Pincers',
+ 'description': 'Pincers',
+ 'uploader_id': '1242911124001',
+ 'upload_date': '20130124',
+ 'timestamp': 1359049267,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Failed to download MPD manifest']
+ }, {
+ 'url': 'https://uktvplay.uktv.co.uk/shows/africa/watch-online/5983349675001',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://uktvplay.co.uk/shows/hornby-a-model-world/series-1/episode-1/6276739790001?autoplaying=true',
+ 'only_matching': True,
+ }]
+ # BRIGHTCOVE_URL_TEMPLATE = 'https://players.brightcove.net/1242911124001/OrCyvJ2gyL_default/index.html?videoId=%s'
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1242911124001/H1xnMOqP_default/index.html?videoId=%s'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self.url_result(
+ self.BRIGHTCOVE_URL_TEMPLATE % video_id,
+ 'BrightcoveNew', video_id)
diff --git a/yt_dlp/extractor/umg.py b/yt_dlp/extractor/umg.py
new file mode 100644
index 0000000..1da4ecd
--- /dev/null
+++ b/yt_dlp/extractor/umg.py
@@ -0,0 +1,98 @@
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_filesize,
+ parse_iso8601,
+)
+
+
+class UMGDeIE(InfoExtractor):
+ _WORKING = False
+ IE_NAME = 'umg:de'
+ IE_DESC = 'Universal Music Deutschland'
+ _VALID_URL = r'https?://(?:www\.)?universal-music\.de/[^/]+/videos/[^/?#]+-(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://www.universal-music.de/sido/videos/jedes-wort-ist-gold-wert-457803',
+ 'md5': 'ebd90f48c80dcc82f77251eb1902634f',
+ 'info_dict': {
+ 'id': '457803',
+ 'ext': 'mp4',
+ 'title': 'Jedes Wort ist Gold wert',
+ 'timestamp': 1513591800,
+ 'upload_date': '20171218',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video_data = self._download_json(
+ 'https://graphql.universal-music.de/',
+ video_id, query={
+ 'query': '''{
+ universalMusic(channel:16) {
+ video(id:%s) {
+ headline
+ formats {
+ formatId
+ url
+ type
+ width
+ height
+ mimeType
+ fileSize
+ }
+ duration
+ createdDate
+ }
+ }
+}''' % video_id})['data']['universalMusic']['video']
+
+ title = video_data['headline']
+ hls_url_template = 'http://mediadelivery.universal-music-services.de/vod/mp4:autofill/storage/' + '/'.join(list(video_id)) + '/content/%s/file/playlist.m3u8'
+
+ thumbnails = []
+ formats = []
+
+ def add_m3u8_format(format_id):
+ formats.extend(self._extract_m3u8_formats(
+ hls_url_template % format_id, video_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False))
+
+ for f in video_data.get('formats', []):
+ f_url = f.get('url')
+ mime_type = f.get('mimeType')
+ if not f_url or mime_type == 'application/mxf':
+ continue
+ fmt = {
+ 'url': f_url,
+ 'width': int_or_none(f.get('width')),
+ 'height': int_or_none(f.get('height')),
+ 'filesize': parse_filesize(f.get('fileSize')),
+ }
+ f_type = f.get('type')
+ if f_type == 'Image':
+ thumbnails.append(fmt)
+ elif f_type == 'Video':
+ format_id = f.get('formatId')
+ if format_id:
+ fmt['format_id'] = format_id
+ if mime_type == 'video/mp4':
+ add_m3u8_format(format_id)
+ urlh = self._request_webpage(f_url, video_id, fatal=False)
+ if urlh:
+ first_byte = urlh.read(1)
+ if first_byte not in (b'F', b'\x00'):
+ continue
+ formats.append(fmt)
+ if not formats:
+ for format_id in (867, 836, 940):
+ add_m3u8_format(format_id)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'duration': int_or_none(video_data.get('duration')),
+ 'timestamp': parse_iso8601(video_data.get('createdDate'), ' '),
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/unistra.py b/yt_dlp/extractor/unistra.py
new file mode 100644
index 0000000..6e872cd
--- /dev/null
+++ b/yt_dlp/extractor/unistra.py
@@ -0,0 +1,64 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import qualities
+
+
+class UnistraIE(InfoExtractor):
+ _VALID_URL = r'https?://utv\.unistra\.fr/(?:index|video)\.php\?id_video\=(?P<id>\d+)'
+
+ _TESTS = [
+ {
+ 'url': 'http://utv.unistra.fr/video.php?id_video=154',
+ 'md5': '736f605cfdc96724d55bb543ab3ced24',
+ 'info_dict': {
+ 'id': '154',
+ 'ext': 'mp4',
+ 'title': 'M!ss Yella',
+ 'description': 'md5:104892c71bd48e55d70b902736b81bbf',
+ },
+ },
+ {
+ 'url': 'http://utv.unistra.fr/index.php?id_video=437',
+ 'md5': '1ddddd6cccaae76f622ce29b8779636d',
+ 'info_dict': {
+ 'id': '437',
+ 'ext': 'mp4',
+ 'title': 'Prix Louise Weiss 2014',
+ 'description': 'md5:cc3a8735f079f4fb6b0b570fc10c135a',
+ },
+ }
+ ]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, video_id)
+
+ files = set(re.findall(r'file\s*:\s*"(/[^"]+)"', webpage))
+
+ quality = qualities(['SD', 'HD'])
+ formats = []
+ for file_path in files:
+ format_id = 'HD' if file_path.endswith('-HD.mp4') else 'SD'
+ formats.append({
+ 'url': 'http://vod-flash.u-strasbg.fr:8080%s' % file_path,
+ 'format_id': format_id,
+ 'quality': quality(format_id)
+ })
+
+ title = self._html_search_regex(
+ r'<title>UTV - (.*?)</', webpage, 'title')
+ description = self._html_search_regex(
+ r'<meta name="Description" content="(.*?)"', webpage, 'description', flags=re.DOTALL)
+ thumbnail = self._search_regex(
+ r'image: "(.*?)"', webpage, 'thumbnail')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'formats': formats
+ }
diff --git a/yt_dlp/extractor/unity.py b/yt_dlp/extractor/unity.py
new file mode 100644
index 0000000..6d8bc05
--- /dev/null
+++ b/yt_dlp/extractor/unity.py
@@ -0,0 +1,31 @@
+from .common import InfoExtractor
+from .youtube import YoutubeIE
+
+
+class UnityIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:www\.)?unity3d\.com/learn/tutorials/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://unity3d.com/learn/tutorials/topics/animation/animate-anything-mecanim',
+ 'info_dict': {
+ 'id': 'jWuNtik0C8E',
+ 'ext': 'mp4',
+ 'title': 'Live Training 22nd September 2014 - Animate Anything',
+ 'description': 'md5:e54913114bd45a554c56cdde7669636e',
+ 'duration': 2893,
+ 'uploader': 'Unity',
+ 'uploader_id': 'Unity3D',
+ 'upload_date': '20140926',
+ }
+ }, {
+ 'url': 'https://unity3d.com/learn/tutorials/projects/2d-ufo-tutorial/following-player-camera?playlist=25844',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ youtube_id = self._search_regex(
+ r'data-video-id="([_0-9a-zA-Z-]+)"',
+ webpage, 'youtube ID')
+ return self.url_result(youtube_id, ie=YoutubeIE.ie_key(), video_id=video_id)
diff --git a/yt_dlp/extractor/unsupported.py b/yt_dlp/extractor/unsupported.py
new file mode 100644
index 0000000..4316c31
--- /dev/null
+++ b/yt_dlp/extractor/unsupported.py
@@ -0,0 +1,189 @@
+from .common import InfoExtractor
+from ..utils import ExtractorError, classproperty, remove_start
+
+
+class UnsupportedInfoExtractor(InfoExtractor):
+ IE_DESC = False
+ URLS = () # Redefine in subclasses
+
+ @classproperty
+ def IE_NAME(cls):
+ return remove_start(super().IE_NAME, 'Known')
+
+ @classproperty
+ def _VALID_URL(cls):
+ return rf'https?://(?:www\.)?(?:{"|".join(cls.URLS)})'
+
+
+LF = '\n '
+
+
+class KnownDRMIE(UnsupportedInfoExtractor):
+ """Sites that are known to use DRM for all their videos
+
+ Add to this list only if:
+ * You are reasonably certain that the site uses DRM for ALL their videos
+ * Multiple users have asked about this site on github/discord
+ """
+
+ URLS = (
+ r'play\.hbomax\.com',
+ r'channel(?:4|5)\.com',
+ r'peacocktv\.com',
+ r'(?:[\w\.]+\.)?disneyplus\.com',
+ r'open\.spotify\.com/(?:track|playlist|album|artist)',
+ r'tvnz\.co\.nz',
+ r'oneplus\.ch',
+ r'artstation\.com/learning/courses',
+ r'philo\.com',
+ r'(?:[\w\.]+\.)?mech-plus\.com',
+ r'aha\.video',
+ r'mubi\.com',
+ r'vootkids\.com',
+ r'nowtv\.it/watch',
+ r'tv\.apple\.com',
+ r'primevideo\.com',
+ r'hulu\.com',
+ r'resource\.inkryptvideos\.com',
+ r'joyn\.de',
+ r'amazon\.(?:\w{2}\.)?\w+/gp/video',
+ r'music\.amazon\.(?:\w{2}\.)?\w+',
+ r'(?:watch|front)\.njpwworld\.com',
+ )
+
+ _TESTS = [{
+ # https://github.com/yt-dlp/yt-dlp/issues/4309
+ 'url': 'https://peacocktv.com/watch/playback/vod/GMO_00000000073159_01/f9d03003-eb04-3c7f-a7b6-a83ab7eb55bc',
+ 'only_matching': True,
+ }, {
+ # https://github.com/yt-dlp/yt-dlp/issues/1719,
+ 'url': 'https://www.channel4.com/programmes/gurren-lagann/on-demand/69960-001',
+ 'only_matching': True,
+ }, {
+ # https://github.com/yt-dlp/yt-dlp/issues/1548
+ 'url': 'https://www.channel5.com/show/uk-s-strongest-man-2021/season-2021/episode-1',
+ 'only_matching': True,
+ }, {
+ 'url': r'https://hsesn.apps.disneyplus.com',
+ 'only_matching': True,
+ }, {
+ 'url': r'https://www.disneyplus.com',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://open.spotify.com/artist/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://open.spotify.com/track/',
+ 'only_matching': True,
+ }, {
+ # https://github.com/yt-dlp/yt-dlp/issues/4122
+ 'url': 'https://www.tvnz.co.nz/shows/ice-airport-alaska/episodes/s1-e1',
+ 'only_matching': True,
+ }, {
+ # https://github.com/yt-dlp/yt-dlp/issues/1922
+ 'url': 'https://www.oneplus.ch/play/1008188',
+ 'only_matching': True,
+ }, {
+ # https://github.com/yt-dlp/yt-dlp/issues/1140
+ 'url': 'https://www.artstation.com/learning/courses/dqQ/character-design-masterclass-with-serge-birault/chapters/Rxn3/introduction',
+ 'only_matching': True,
+ }, {
+ # https://github.com/yt-dlp/yt-dlp/issues/3544
+ 'url': 'https://www.philo.com/player/player/vod/Vk9EOjYwODU0ODg5OTY0ODY0OTQ5NA',
+ 'only_matching': True,
+ }, {
+ # https://github.com/yt-dlp/yt-dlp/issues/3533
+ 'url': 'https://www.mech-plus.com/player/24892/stream?assetType=episodes&playlist_id=6',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://watch.mech-plus.com/details/25240?playlist_id=6',
+ 'only_matching': True,
+ }, {
+ # https://github.com/yt-dlp/yt-dlp/issues/2934
+ 'url': 'https://www.aha.video/player/movie/lucky-man',
+ 'only_matching': True,
+ }, {
+ # https://github.com/yt-dlp/yt-dlp/issues/2743
+ 'url': 'https://mubi.com/films/the-night-doctor',
+ 'only_matching': True,
+ }, {
+ # https://github.com/yt-dlp/yt-dlp/issues/3287
+ 'url': 'https://www.vootkids.com/movies/chhota-bheem-the-rise-of-kirmada/764459',
+ 'only_matching': True,
+ }, {
+ # https://github.com/yt-dlp/yt-dlp/issues/2744
+ 'url': 'https://www.nowtv.it/watch/home/asset/and-just-like-that/skyserie_f8fe979772e8437d8a61ab83b6d293e9/seasons/1/episodes/8/R_126182_HD',
+ 'only_matching': True,
+ }, {
+ # https://github.com/yt-dlp/yt-dlp/issues/5557
+ 'url': 'https://tv.apple.com/it/show/loot---una-fortuna/umc.cmc.5erbujil1mpazuerhr1udnk45?ctx_brand=tvs.sbd.4000',
+ 'only_matching': True,
+ }, {
+ # https://github.com/yt-dlp/yt-dlp/issues/3072
+ 'url': 'https://www.joyn.de/play/serien/clannad/1-1-wo-die-kirschblueten-fallen',
+ 'only_matching': True,
+ }, {
+ # https://github.com/yt-dlp/yt-dlp/issues/7323
+ 'url': 'https://music.amazon.co.jp/albums/B088Y368TK',
+ 'only_matching': True,
+ }, {
+ # https://github.com/yt-dlp/yt-dlp/issues/7323
+ 'url': 'https://www.amazon.co.jp/gp/video/detail/B09X5HBYRS/',
+ 'only_matching': True,
+ }, {
+ # https://github.com/yt-dlp/yt-dlp/issues/6125
+ 'url': 'https://www.primevideo.com/region/eu/detail/0H3DDB4KBJFNDCKKLHNRLRLVKQ/ref=atv_br_def_r_br_c_unkc_1_10',
+ 'only_matching': True,
+ }, {
+ # https://github.com/yt-dlp/yt-dlp/issues/5740
+ 'url': 'https://resource.inkryptvideos.com/v2-a83ns52/iframe/index.html#video_id=7999ea0f6e03439eb40d056258c2d736&otp=xxx',
+ 'only_matching': True,
+ }, {
+ # https://github.com/yt-dlp/yt-dlp/issues/5767
+ 'url': 'https://www.hulu.com/movie/anthem-6b25fac9-da2b-45a3-8e09-e4156b0471cc',
+ 'only_matching': True,
+ }, {
+ # https://github.com/yt-dlp/yt-dlp/pull/8570
+ 'url': 'https://watch.njpwworld.com/player/36447/series?assetType=series',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://front.njpwworld.com/p/s_series_00563_16_bs',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ raise ExtractorError(
+ f'The requested site is known to use DRM protection. '
+ f'It will {self._downloader._format_err("NOT", self._downloader.Styles.EMPHASIS)} be supported.{LF}'
+ f'Please {self._downloader._format_err("DO NOT", self._downloader.Styles.ERROR)} open an issue, '
+ 'unless you have evidence that the video is not DRM protected', expected=True)
+
+
+class KnownPiracyIE(UnsupportedInfoExtractor):
+ """Sites that have been deemed to be piracy
+
+ In order for this to not end up being a catalog of piracy sites,
+ only sites that were once supported should be added to this list
+ """
+
+ URLS = (
+ r'dood\.(?:to|watch|so|pm|wf|re)',
+ # Sites youtube-dl supports, but we won't
+ r'viewsb\.com',
+ r'filemoon\.sx',
+ r'hentai\.animestigma\.com',
+ r'thisav\.com',
+ )
+
+ _TESTS = [{
+ 'url': 'http://dood.to/e/5s1wmbdacezb',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://thisav.com/en/terms',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ raise ExtractorError(
+ f'This website is no longer supported since it has been determined to be primarily used for piracy.{LF}'
+ f'{self._downloader._format_err("DO NOT", self._downloader.Styles.ERROR)} open issues for it', expected=True)
diff --git a/yt_dlp/extractor/uol.py b/yt_dlp/extractor/uol.py
new file mode 100644
index 0000000..068c2b8
--- /dev/null
+++ b/yt_dlp/extractor/uol.py
@@ -0,0 +1,138 @@
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urllib_parse_urlencode,
+)
+from ..utils import (
+ clean_html,
+ int_or_none,
+ parse_duration,
+ parse_iso8601,
+ qualities,
+ update_url_query,
+)
+
+
+class UOLIE(InfoExtractor):
+ IE_NAME = 'uol.com.br'
+ _VALID_URL = r'https?://(?:.+?\.)?uol\.com\.br/.*?(?:(?:mediaId|v)=|view/(?:[a-z0-9]+/)?|video(?:=|/(?:\d{4}/\d{2}/\d{2}/)?))(?P<id>\d+|[\w-]+-[A-Z0-9]+)'
+ _TESTS = [{
+ 'url': 'http://player.mais.uol.com.br/player_video_v3.swf?mediaId=15951931',
+ 'md5': '4f1e26683979715ff64e4e29099cf020',
+ 'info_dict': {
+ 'id': '15951931',
+ 'ext': 'mp4',
+ 'title': 'Miss simpatia é encontrada morta',
+ 'description': 'md5:3f8c11a0c0556d66daf7e5b45ef823b2',
+ 'timestamp': 1470421860,
+ 'upload_date': '20160805',
+ }
+ }, {
+ 'url': 'http://tvuol.uol.com.br/video/incendio-destroi-uma-das-maiores-casas-noturnas-de-londres-04024E9A3268D4C95326',
+ 'md5': '2850a0e8dfa0a7307e04a96c5bdc5bc2',
+ 'info_dict': {
+ 'id': '15954259',
+ 'ext': 'mp4',
+ 'title': 'Incêndio destrói uma das maiores casas noturnas de Londres',
+ 'description': 'Em Londres, um incêndio destruiu uma das maiores boates da cidade. Não há informações sobre vítimas.',
+ 'timestamp': 1470674520,
+ 'upload_date': '20160808',
+ }
+ }, {
+ 'url': 'http://mais.uol.com.br/static/uolplayer/index.html?mediaId=15951931',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://mais.uol.com.br/view/15954259',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://noticias.band.uol.com.br/brasilurgente/video/2016/08/05/15951931/miss-simpatia-e-encontrada-morta.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://videos.band.uol.com.br/programa.asp?e=noticias&pr=brasil-urgente&v=15951931&t=Policia-desmonte-base-do-PCC-na-Cracolandia',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://mais.uol.com.br/view/cphaa0gl2x8r/incendio-destroi-uma-das-maiores-casas-noturnas-de-londres-04024E9A3268D4C95326',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://noticias.uol.com.br//videos/assistir.htm?video=rafaela-silva-inspira-criancas-no-judo-04024D983968D4C95326',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://mais.uol.com.br/view/e0qbgxid79uv/15275470',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video_data = self._download_json(
+ # https://api.mais.uol.com.br/apiuol/v4/player/data/[MEDIA_ID]
+ 'https://api.mais.uol.com.br/apiuol/v3/media/detail/' + video_id,
+ video_id)['item']
+ media_id = compat_str(video_data['mediaId'])
+ title = video_data['title']
+ ver = video_data.get('revision', 2)
+
+ uol_formats = self._download_json(
+ 'https://croupier.mais.uol.com.br/v3/formats/%s/jsonp' % media_id,
+ media_id)
+ quality = qualities(['mobile', 'WEBM', '360p', '720p', '1080p'])
+ formats = []
+ for format_id, f in uol_formats.items():
+ if not isinstance(f, dict):
+ continue
+ f_url = f.get('url') or f.get('secureUrl')
+ if not f_url:
+ continue
+ query = {
+ 'ver': ver,
+ 'r': 'http://mais.uol.com.br',
+ }
+ for k in ('token', 'sign'):
+ v = f.get(k)
+ if v:
+ query[k] = v
+ f_url = update_url_query(f_url, query)
+ if format_id == 'HLS':
+ m3u8_formats = self._extract_m3u8_formats(
+ f_url, media_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False)
+ encoded_query = compat_urllib_parse_urlencode(query)
+ for m3u8_f in m3u8_formats:
+ m3u8_f['extra_param_to_segment_url'] = encoded_query
+ m3u8_f['url'] = update_url_query(m3u8_f['url'], query)
+ formats.extend(m3u8_formats)
+ continue
+ formats.append({
+ 'format_id': format_id,
+ 'url': f_url,
+ 'quality': quality(format_id),
+ })
+
+ tags = []
+ for tag in video_data.get('tags', []):
+ tag_description = tag.get('description')
+ if not tag_description:
+ continue
+ tags.append(tag_description)
+
+ thumbnails = []
+ for q in ('Small', 'Medium', 'Wmedium', 'Large', 'Wlarge', 'Xlarge'):
+ q_url = video_data.get('thumb' + q)
+ if not q_url:
+ continue
+ thumbnails.append({
+ 'id': q,
+ 'url': q_url,
+ })
+
+ return {
+ 'id': media_id,
+ 'title': title,
+ 'description': clean_html(video_data.get('description')),
+ 'thumbnails': thumbnails,
+ 'duration': parse_duration(video_data.get('duration')),
+ 'tags': tags,
+ 'formats': formats,
+ 'timestamp': parse_iso8601(video_data.get('publishDate'), ' '),
+ 'view_count': int_or_none(video_data.get('viewsQtty')),
+ }
diff --git a/yt_dlp/extractor/uplynk.py b/yt_dlp/extractor/uplynk.py
new file mode 100644
index 0000000..e7d816e
--- /dev/null
+++ b/yt_dlp/extractor/uplynk.py
@@ -0,0 +1,88 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ smuggle_url,
+ traverse_obj,
+ unsmuggle_url,
+ update_url_query,
+)
+
+
+class UplynkBaseIE(InfoExtractor):
+ _UPLYNK_URL_RE = r'''(?x)
+ https?://[\w-]+\.uplynk\.com/(?P<path>
+ ext/[0-9a-f]{32}/(?P<external_id>[^/?&]+)|
+ (?P<id>[0-9a-f]{32})
+ )\.(?:m3u8|json)
+ (?:.*?\bpbs=(?P<session_id>[^&]+))?'''
+
+ def _extract_uplynk_info(self, url):
+ uplynk_content_url, smuggled_data = unsmuggle_url(url, {})
+ mobj = re.match(self._UPLYNK_URL_RE, uplynk_content_url)
+ if not mobj:
+ raise ExtractorError('Necessary parameters not found in Uplynk URL')
+ path, external_id, video_id, session_id = mobj.group('path', 'external_id', 'id', 'session_id')
+ display_id = video_id or external_id
+ headers = traverse_obj(
+ smuggled_data, {'Referer': 'Referer', 'Origin': 'Origin'}, casesense=False)
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ f'http://content.uplynk.com/{path}.m3u8', display_id, 'mp4', headers=headers)
+ if session_id:
+ for f in formats:
+ f['extra_param_to_segment_url'] = f'pbs={session_id}'
+ asset = self._download_json(
+ f'http://content.uplynk.com/player/assetinfo/{path}.json', display_id)
+ if asset.get('error') == 1:
+ msg = asset.get('msg') or 'unknown error'
+ raise ExtractorError(f'{self.IE_NAME} said: {msg}', expected=True)
+
+ return {
+ 'id': asset['asset'],
+ 'title': asset['desc'],
+ 'thumbnail': asset.get('default_poster_url'),
+ 'duration': float_or_none(asset.get('duration')),
+ 'uploader_id': asset.get('owner'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+
+class UplynkIE(UplynkBaseIE):
+ IE_NAME = 'uplynk'
+ _VALID_URL = UplynkBaseIE._UPLYNK_URL_RE
+ _TEST = {
+ 'url': 'http://content.uplynk.com/e89eaf2ce9054aa89d92ddb2d817a52e.m3u8',
+ 'info_dict': {
+ 'id': 'e89eaf2ce9054aa89d92ddb2d817a52e',
+ 'ext': 'mp4',
+ 'title': '030816-kgo-530pm-solar-eclipse-vid_web.mp4',
+ 'uploader_id': '4413701bf5a1488db55b767f8ae9d4fa',
+ 'duration': 530.2739166666679,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ }
+
+ def _real_extract(self, url):
+ return self._extract_uplynk_info(url)
+
+
+class UplynkPreplayIE(UplynkBaseIE):
+ IE_NAME = 'uplynk:preplay'
+ _VALID_URL = r'https?://[\w-]+\.uplynk\.com/preplay2?/(?P<path>ext/[0-9a-f]{32}/(?P<external_id>[^/?&]+)|(?P<id>[0-9a-f]{32}))\.json'
+
+ def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+ path, external_id, video_id = self._match_valid_url(url).groups()
+ display_id = video_id or external_id
+ preplay = self._download_json(url, display_id)
+ content_url = f'http://content.uplynk.com/{path}.m3u8'
+ session_id = preplay.get('sid')
+ if session_id:
+ content_url = update_url_query(content_url, {'pbs': session_id})
+ return self._extract_uplynk_info(smuggle_url(content_url, smuggled_data))
diff --git a/yt_dlp/extractor/urort.py b/yt_dlp/extractor/urort.py
new file mode 100644
index 0000000..f14d7cc
--- /dev/null
+++ b/yt_dlp/extractor/urort.py
@@ -0,0 +1,60 @@
+import urllib.parse
+
+from .common import InfoExtractor
+from ..utils import unified_strdate
+
+
+class UrortIE(InfoExtractor):
+ _WORKING = False
+ IE_DESC = 'NRK P3 Urørt'
+ _VALID_URL = r'https?://(?:www\.)?urort\.p3\.no/#!/Band/(?P<id>[^/]+)$'
+
+ _TEST = {
+ 'url': 'https://urort.p3.no/#!/Band/Gerilja',
+ 'md5': '5ed31a924be8a05e47812678a86e127b',
+ 'info_dict': {
+ 'id': '33124-24',
+ 'ext': 'mp3',
+ 'title': 'The Bomb',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ 'uploader': 'Gerilja',
+ 'uploader_id': 'Gerilja',
+ 'upload_date': '20100323',
+ },
+ 'params': {
+ 'matchtitle': '^The Bomb$', # To test, we want just one video
+ }
+ }
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ fstr = urllib.parse.quote("InternalBandUrl eq '%s'" % playlist_id)
+ json_url = 'http://urort.p3.no/breeze/urort/TrackDTOViews?$filter=%s&$orderby=Released%%20desc&$expand=Tags%%2CFiles' % fstr
+ songs = self._download_json(json_url, playlist_id)
+ entries = []
+ for s in songs:
+ formats = [{
+ 'tbr': f.get('Quality'),
+ 'ext': f['FileType'],
+ 'format_id': '%s-%s' % (f['FileType'], f.get('Quality', '')),
+ 'url': 'http://p3urort.blob.core.windows.net/tracks/%s' % f['FileRef'],
+ 'quality': 3 if f['FileType'] == 'mp3' else 2,
+ } for f in s['Files']]
+ e = {
+ 'id': '%d-%s' % (s['BandId'], s['$id']),
+ 'title': s['Title'],
+ 'uploader_id': playlist_id,
+ 'uploader': s.get('BandName', playlist_id),
+ 'thumbnail': 'http://urort.p3.no/cloud/images/%s' % s['Image'],
+ 'upload_date': unified_strdate(s.get('Released')),
+ 'formats': formats,
+ }
+ entries.append(e)
+
+ return {
+ '_type': 'playlist',
+ 'id': playlist_id,
+ 'title': playlist_id,
+ 'entries': entries,
+ }
diff --git a/yt_dlp/extractor/urplay.py b/yt_dlp/extractor/urplay.py
new file mode 100644
index 0000000..7f97fc9
--- /dev/null
+++ b/yt_dlp/extractor/urplay.py
@@ -0,0 +1,164 @@
+from .common import InfoExtractor
+from ..utils import (
+ dict_get,
+ ExtractorError,
+ int_or_none,
+ ISO639Utils,
+ parse_age_limit,
+ try_get,
+ unified_timestamp,
+)
+
+
+class URPlayIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?ur(?:play|skola)\.se/(?:program|Produkter)/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://urplay.se/program/203704-ur-samtiden-livet-universum-och-rymdens-markliga-musik-om-vetenskap-kritiskt-tankande-och-motstand',
+ 'md5': '5ba36643c77cc3d34ffeadad89937d1e',
+ 'info_dict': {
+ 'id': '203704',
+ 'ext': 'mp4',
+ 'title': 'UR Samtiden - Livet, universum och rymdens märkliga musik : Om vetenskap, kritiskt tänkande och motstånd',
+ 'description': 'md5:5344508a52aa78c1ced6c1b8b9e44e9a',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ 'timestamp': 1513292400,
+ 'upload_date': '20171214',
+ 'series': 'UR Samtiden - Livet, universum och rymdens märkliga musik',
+ 'duration': 2269,
+ 'categories': ['Vetenskap & teknik'],
+ 'tags': ['Kritiskt tänkande', 'Vetenskap', 'Vetenskaplig verksamhet'],
+ 'episode': 'Om vetenskap, kritiskt tänkande och motstånd',
+ 'age_limit': 15,
+ },
+ }, {
+ 'url': 'https://urplay.se/program/222967-en-foralders-dagbok-mitt-barn-skadar-sig-sjalv',
+ 'info_dict': {
+ 'id': '222967',
+ 'ext': 'mp4',
+ 'title': 'En förälders dagbok : Mitt barn skadar sig själv',
+ 'description': 'md5:9f771eef03a732a213b367b52fe826ca',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ 'timestamp': 1629676800,
+ 'upload_date': '20210823',
+ 'series': 'En förälders dagbok',
+ 'duration': 1740,
+ 'age_limit': 15,
+ 'episode_number': 3,
+ 'categories': 'count:2',
+ 'tags': 'count:7',
+ 'episode': 'Mitt barn skadar sig själv',
+ },
+ }, {
+ 'url': 'https://urskola.se/Produkter/190031-Tripp-Trapp-Trad-Sovkudde',
+ 'info_dict': {
+ 'id': '190031',
+ 'ext': 'mp4',
+ 'title': 'Tripp, Trapp, Träd : Sovkudde',
+ 'description': 'md5:b86bffdae04a7e9379d1d7e5947df1d1',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ 'timestamp': 1440086400,
+ 'upload_date': '20150820',
+ 'series': 'Tripp, Trapp, Träd',
+ 'duration': 865,
+ 'age_limit': 1,
+ 'episode_number': 1,
+ 'categories': [],
+ 'tags': ['Sova'],
+ 'episode': 'Sovkudde',
+ 'season': 'Säsong 1',
+ },
+ }, {
+ 'url': 'http://urskola.se/Produkter/155794-Smasagor-meankieli-Grodan-i-vida-varlden',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ url = url.replace('skola.se/Produkter', 'play.se/program')
+ webpage = self._download_webpage(url, video_id)
+ urplayer_data = self._search_nextjs_data(webpage, video_id, fatal=False) or {}
+ if urplayer_data:
+ urplayer_data = try_get(urplayer_data, lambda x: x['props']['pageProps']['program'], dict)
+ if not urplayer_data:
+ raise ExtractorError('Unable to parse __NEXT_DATA__')
+ else:
+ accessible_episodes = self._parse_json(self._html_search_regex(
+ r'data-react-class="routes/Product/components/ProgramContainer/ProgramContainer"[^>]+data-react-props="({.+?})"',
+ webpage, 'urplayer data'), video_id)['accessibleEpisodes']
+ urplayer_data = next(e for e in accessible_episodes if e.get('id') == int_or_none(video_id))
+ episode = urplayer_data['title']
+
+ host = self._download_json('http://streaming-loadbalancer.ur.se/loadbalancer.json', video_id)['redirect']
+ formats = []
+ urplayer_streams = urplayer_data.get('streamingInfo', {})
+
+ for k, v in urplayer_streams.get('raw', {}).items():
+ if not (k in ('sd', 'hd', 'mp3', 'm4a') and isinstance(v, dict)):
+ continue
+ file_http = v.get('location')
+ if file_http:
+ formats.extend(self._extract_wowza_formats(
+ 'http://%s/%splaylist.m3u8' % (host, file_http),
+ video_id, skip_protocols=['f4m', 'rtmp', 'rtsp']))
+
+ subtitles = {}
+
+ def parse_lang_code(code):
+ "3-character language code or None (utils candidate)"
+ if code is None:
+ return
+ lang = code.lower()
+ if not ISO639Utils.long2short(lang):
+ lang = ISO639Utils.short2long(lang)
+ return lang or None
+
+ for stream in urplayer_data['streamingInfo'].values():
+ for k, v in stream.items():
+ if (k in ('sd', 'hd') or not isinstance(v, dict)):
+ continue
+ lang, sttl_url = (v.get(kk) for kk in ('language', 'location', ))
+ if not sttl_url:
+ continue
+ lang = parse_lang_code(lang)
+ if not lang:
+ continue
+ sttl = subtitles.get(lang) or []
+ sttl.append({'ext': k, 'url': sttl_url, })
+ subtitles[lang] = sttl
+
+ image = urplayer_data.get('image') or {}
+ thumbnails = []
+ for k, v in image.items():
+ t = {
+ 'id': k,
+ 'url': v,
+ }
+ wh = k.split('x')
+ if len(wh) == 2:
+ t.update({
+ 'width': int_or_none(wh[0]),
+ 'height': int_or_none(wh[1]),
+ })
+ thumbnails.append(t)
+
+ series = urplayer_data.get('series') or {}
+ series_title = dict_get(series, ('seriesTitle', 'title')) or dict_get(urplayer_data, ('seriesTitle', 'mainTitle'))
+
+ return {
+ 'id': video_id,
+ 'title': '%s : %s' % (series_title, episode) if series_title else episode,
+ 'description': urplayer_data.get('description'),
+ 'thumbnails': thumbnails,
+ 'timestamp': unified_timestamp(urplayer_data.get('publishedAt')),
+ 'series': series_title,
+ 'formats': formats,
+ 'duration': int_or_none(urplayer_data.get('duration')),
+ 'categories': urplayer_data.get('categories'),
+ 'tags': urplayer_data.get('keywords'),
+ 'season': series.get('label'),
+ 'episode': episode,
+ 'episode_number': int_or_none(urplayer_data.get('episodeNumber')),
+ 'age_limit': parse_age_limit(min(try_get(a, lambda x: x['from'], int) or 0
+ for a in urplayer_data.get('ageRanges', []))),
+ 'subtitles': subtitles,
+ }
diff --git a/yt_dlp/extractor/usanetwork.py b/yt_dlp/extractor/usanetwork.py
new file mode 100644
index 0000000..4a06a9a
--- /dev/null
+++ b/yt_dlp/extractor/usanetwork.py
@@ -0,0 +1,21 @@
+from .nbc import NBCIE
+
+
+class USANetworkIE(NBCIE): # XXX: Do not subclass from concrete IE
+ _VALID_URL = r'https?(?P<permalink>://(?:www\.)?usanetwork\.com/(?:[^/]+/videos?|movies?)/(?:[^/]+/)?(?P<id>\d+))'
+ _TESTS = [{
+ 'url': 'https://www.usanetwork.com/peacock-trailers/video/intelligence-trailer/4185302',
+ 'info_dict': {
+ 'id': '4185302',
+ 'ext': 'mp4',
+ 'title': 'Intelligence (Trailer)',
+ 'description': 'A maverick NSA agent enlists the help of a junior systems analyst in a workplace power grab.',
+ 'upload_date': '20200715',
+ 'timestamp': 1594785600,
+ 'uploader': 'NBCU-MPAT',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }]
diff --git a/yt_dlp/extractor/usatoday.py b/yt_dlp/extractor/usatoday.py
new file mode 100644
index 0000000..3243f3e
--- /dev/null
+++ b/yt_dlp/extractor/usatoday.py
@@ -0,0 +1,60 @@
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ get_element_by_attribute,
+ parse_duration,
+ try_get,
+ update_url_query,
+)
+from ..compat import compat_str
+
+
+class USATodayIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?usatoday\.com/(?:[^/]+/)*(?P<id>[^?/#]+)'
+ _TESTS = [{
+ # Brightcove Partner ID = 29906170001
+ 'url': 'http://www.usatoday.com/media/cinematic/video/81729424/us-france-warn-syrian-regime-ahead-of-new-peace-talks/',
+ 'md5': '033587d2529dc3411a1ab3644c3b8827',
+ 'info_dict': {
+ 'id': '4799374959001',
+ 'ext': 'mp4',
+ 'title': 'US, France warn Syrian regime ahead of new peace talks',
+ 'timestamp': 1457891045,
+ 'description': 'md5:7e50464fdf2126b0f533748d3c78d58f',
+ 'uploader_id': '29906170001',
+ 'upload_date': '20160313',
+ }
+ }, {
+ # ui-video-data[asset_metadata][items][brightcoveaccount] = 28911775001
+ 'url': 'https://www.usatoday.com/story/tech/science/2018/08/21/yellowstone-supervolcano-eruption-stop-worrying-its-blow/973633002/',
+ 'info_dict': {
+ 'id': '5824495846001',
+ 'ext': 'mp4',
+ 'title': 'Yellowstone more likely to crack rather than explode',
+ 'timestamp': 1534790612,
+ 'description': 'md5:3715e7927639a4f16b474e9391687c62',
+ 'uploader_id': '28911775001',
+ 'upload_date': '20180820',
+ }
+ }]
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(update_url_query(url, {'ajax': 'true'}), display_id)
+ ui_video_data = get_element_by_attribute('class', 'ui-video-data', webpage)
+ if not ui_video_data:
+ raise ExtractorError('no video on the webpage', expected=True)
+ video_data = self._parse_json(ui_video_data, display_id)
+ item = try_get(video_data, lambda x: x['asset_metadata']['items'], dict) or {}
+
+ return {
+ '_type': 'url_transparent',
+ 'url': self.BRIGHTCOVE_URL_TEMPLATE % (item.get('brightcoveaccount', '29906170001'), item.get('brightcoveid') or video_data['brightcove_id']),
+ 'id': compat_str(video_data['id']),
+ 'title': video_data['title'],
+ 'thumbnail': video_data.get('thumbnail'),
+ 'description': video_data.get('description'),
+ 'duration': parse_duration(video_data.get('length')),
+ 'ie_key': 'BrightcoveNew',
+ }
diff --git a/yt_dlp/extractor/ustream.py b/yt_dlp/extractor/ustream.py
new file mode 100644
index 0000000..5df2416
--- /dev/null
+++ b/yt_dlp/extractor/ustream.py
@@ -0,0 +1,275 @@
+import random
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urlparse,
+)
+from ..utils import (
+ encode_data_uri,
+ ExtractorError,
+ int_or_none,
+ float_or_none,
+ join_nonempty,
+ mimetype2ext,
+ str_or_none,
+)
+
+
+class UstreamIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?(?:ustream\.tv|video\.ibm\.com)/(?P<type>recorded|embed|embed/recorded)/(?P<id>\d+)'
+ IE_NAME = 'ustream'
+ _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?(?:ustream\.tv|video\.ibm\.com)/embed/.+?)\1']
+ _TESTS = [{
+ 'url': 'http://www.ustream.tv/recorded/20274954',
+ 'md5': '088f151799e8f572f84eb62f17d73e5c',
+ 'info_dict': {
+ 'id': '20274954',
+ 'ext': 'flv',
+ 'title': 'Young Americans for Liberty February 7, 2012 2:28 AM',
+ 'description': 'Young Americans for Liberty February 7, 2012 2:28 AM',
+ 'timestamp': 1328577035,
+ 'upload_date': '20120207',
+ 'uploader': 'yaliberty',
+ 'uploader_id': '6780869',
+ },
+ }, {
+ # From http://sportscanada.tv/canadagames/index.php/week2/figure-skating/444
+ # Title and uploader available only from params JSON
+ 'url': 'http://www.ustream.tv/embed/recorded/59307601?ub=ff0000&lc=ff0000&oc=ffffff&uc=ffffff&v=3&wmode=direct',
+ 'md5': '5a2abf40babeac9812ed20ae12d34e10',
+ 'info_dict': {
+ 'id': '59307601',
+ 'ext': 'flv',
+ 'title': '-CG11- Canada Games Figure Skating',
+ 'uploader': 'sportscanadatv',
+ },
+ 'skip': 'This Pro Broadcaster has chosen to remove this video from the ustream.tv site.',
+ }, {
+ 'url': 'http://www.ustream.tv/embed/10299409',
+ 'info_dict': {
+ 'id': '10299409',
+ },
+ 'playlist_count': 3,
+ }, {
+ 'url': 'http://www.ustream.tv/recorded/91343263',
+ 'info_dict': {
+ 'id': '91343263',
+ 'ext': 'mp4',
+ 'title': 'GitHub Universe - General Session - Day 1',
+ 'upload_date': '20160914',
+ 'description': 'GitHub Universe - General Session - Day 1',
+ 'timestamp': 1473872730,
+ 'uploader': 'wa0dnskeqkr',
+ 'uploader_id': '38977840',
+ },
+ 'params': {
+ 'skip_download': True, # m3u8 download
+ },
+ }, {
+ 'url': 'https://video.ibm.com/embed/recorded/128240221?&autoplay=true&controls=true&volume=100',
+ 'only_matching': True,
+ }]
+
+ def _get_stream_info(self, url, video_id, app_id_ver, extra_note=None):
+ def num_to_hex(n):
+ return hex(n)[2:]
+
+ rnd = random.randrange
+
+ if not extra_note:
+ extra_note = ''
+
+ conn_info = self._download_json(
+ 'http://r%d-1-%s-recorded-lp-live.ums.ustream.tv/1/ustream' % (rnd(1e8), video_id),
+ video_id, note='Downloading connection info' + extra_note,
+ query={
+ 'type': 'viewer',
+ 'appId': app_id_ver[0],
+ 'appVersion': app_id_ver[1],
+ 'rsid': '%s:%s' % (num_to_hex(rnd(1e8)), num_to_hex(rnd(1e8))),
+ 'rpin': '_rpin.%d' % rnd(1e15),
+ 'referrer': url,
+ 'media': video_id,
+ 'application': 'recorded',
+ })
+ host = conn_info[0]['args'][0]['host']
+ connection_id = conn_info[0]['args'][0]['connectionId']
+
+ return self._download_json(
+ 'http://%s/1/ustream?connectionId=%s' % (host, connection_id),
+ video_id, note='Downloading stream info' + extra_note)
+
+ def _get_streams(self, url, video_id, app_id_ver):
+ # Sometimes the return dict does not have 'stream'
+ for trial_count in range(3):
+ stream_info = self._get_stream_info(
+ url, video_id, app_id_ver,
+ extra_note=' (try %d)' % (trial_count + 1) if trial_count > 0 else '')
+ if 'stream' in stream_info[0]['args'][0]:
+ return stream_info[0]['args'][0]['stream']
+ return []
+
+ def _parse_segmented_mp4(self, dash_stream_info):
+ def resolve_dash_template(template, idx, chunk_hash):
+ return template.replace('%', compat_str(idx), 1).replace('%', chunk_hash)
+
+ formats = []
+ for stream in dash_stream_info['streams']:
+ # Use only one provider to avoid too many formats
+ provider = dash_stream_info['providers'][0]
+ fragments = [{
+ 'url': resolve_dash_template(
+ provider['url'] + stream['initUrl'], 0, dash_stream_info['hashes']['0'])
+ }]
+ for idx in range(dash_stream_info['videoLength'] // dash_stream_info['chunkTime']):
+ fragments.append({
+ 'url': resolve_dash_template(
+ provider['url'] + stream['segmentUrl'], idx,
+ dash_stream_info['hashes'][compat_str(idx // 10 * 10)])
+ })
+ content_type = stream['contentType']
+ kind = content_type.split('/')[0]
+ f = {
+ 'format_id': join_nonempty(
+ 'dash', kind, str_or_none(stream.get('bitrate'))),
+ 'protocol': 'http_dash_segments',
+ # TODO: generate a MPD doc for external players?
+ 'url': encode_data_uri(b'<MPD/>', 'text/xml'),
+ 'ext': mimetype2ext(content_type),
+ 'height': stream.get('height'),
+ 'width': stream.get('width'),
+ 'fragments': fragments,
+ }
+ if kind == 'video':
+ f.update({
+ 'vcodec': stream.get('codec'),
+ 'acodec': 'none',
+ 'vbr': stream.get('bitrate'),
+ })
+ else:
+ f.update({
+ 'vcodec': 'none',
+ 'acodec': stream.get('codec'),
+ 'abr': stream.get('bitrate'),
+ })
+ formats.append(f)
+ return formats
+
+ def _real_extract(self, url):
+ m = self._match_valid_url(url)
+ video_id = m.group('id')
+
+ # some sites use this embed format (see: https://github.com/ytdl-org/youtube-dl/issues/2990)
+ if m.group('type') == 'embed/recorded':
+ video_id = m.group('id')
+ desktop_url = 'http://www.ustream.tv/recorded/' + video_id
+ return self.url_result(desktop_url, 'Ustream')
+ if m.group('type') == 'embed':
+ video_id = m.group('id')
+ webpage = self._download_webpage(url, video_id)
+ content_video_ids = self._parse_json(self._search_regex(
+ r'ustream\.vars\.offAirContentVideoIds=([^;]+);', webpage,
+ 'content video IDs'), video_id)
+ return self.playlist_result(
+ map(lambda u: self.url_result('http://www.ustream.tv/recorded/' + u, 'Ustream'), content_video_ids),
+ video_id)
+
+ params = self._download_json(
+ 'https://api.ustream.tv/videos/%s.json' % video_id, video_id)
+
+ error = params.get('error')
+ if error:
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, error), expected=True)
+
+ video = params['video']
+
+ title = video['title']
+ filesize = float_or_none(video.get('file_size'))
+
+ formats = [{
+ 'id': video_id,
+ 'url': video_url,
+ 'ext': format_id,
+ 'filesize': filesize,
+ } for format_id, video_url in video['media_urls'].items() if video_url]
+
+ if not formats:
+ hls_streams = self._get_streams(url, video_id, app_id_ver=(11, 2))
+ if hls_streams:
+ # m3u8_native leads to intermittent ContentTooShortError
+ formats.extend(self._extract_m3u8_formats(
+ hls_streams[0]['url'], video_id, ext='mp4', m3u8_id='hls'))
+
+ '''
+ # DASH streams handling is incomplete as 'url' is missing
+ dash_streams = self._get_streams(url, video_id, app_id_ver=(3, 1))
+ if dash_streams:
+ formats.extend(self._parse_segmented_mp4(dash_streams))
+ '''
+
+ description = video.get('description')
+ timestamp = int_or_none(video.get('created_at'))
+ duration = float_or_none(video.get('length'))
+ view_count = int_or_none(video.get('views'))
+
+ uploader = video.get('owner', {}).get('username')
+ uploader_id = video.get('owner', {}).get('id')
+
+ thumbnails = [{
+ 'id': thumbnail_id,
+ 'url': thumbnail_url,
+ } for thumbnail_id, thumbnail_url in video.get('thumbnail', {}).items()]
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnails': thumbnails,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'formats': formats,
+ }
+
+
+class UstreamChannelIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?ustream\.tv/channel/(?P<slug>.+)'
+ IE_NAME = 'ustream:channel'
+ _TEST = {
+ 'url': 'http://www.ustream.tv/channel/channeljapan',
+ 'info_dict': {
+ 'id': '10874166',
+ },
+ 'playlist_mincount': 17,
+ }
+
+ def _real_extract(self, url):
+ m = self._match_valid_url(url)
+ display_id = m.group('slug')
+ webpage = self._download_webpage(url, display_id)
+ channel_id = self._html_search_meta('ustream:channel_id', webpage)
+
+ BASE = 'http://www.ustream.tv'
+ next_url = '/ajax/socialstream/videos/%s/1.json' % channel_id
+ video_ids = []
+ while next_url:
+ reply = self._download_json(
+ compat_urlparse.urljoin(BASE, next_url), display_id,
+ note='Downloading video information (next: %d)' % (len(video_ids) + 1))
+ video_ids.extend(re.findall(r'data-content-id="(\d.*)"', reply['data']))
+ next_url = reply['nextUrl']
+
+ entries = [
+ self.url_result('http://www.ustream.tv/recorded/' + vid, 'Ustream')
+ for vid in video_ids]
+ return {
+ '_type': 'playlist',
+ 'id': channel_id,
+ 'display_id': display_id,
+ 'entries': entries,
+ }
diff --git a/yt_dlp/extractor/ustudio.py b/yt_dlp/extractor/ustudio.py
new file mode 100644
index 0000000..c3aeeb9
--- /dev/null
+++ b/yt_dlp/extractor/ustudio.py
@@ -0,0 +1,119 @@
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ unified_strdate,
+ unescapeHTML,
+)
+
+
+class UstudioIE(InfoExtractor):
+ IE_NAME = 'ustudio'
+ _VALID_URL = r'https?://(?:(?:www|v1)\.)?ustudio\.com/video/(?P<id>[^/]+)/(?P<display_id>[^/?#&]+)'
+ _TEST = {
+ 'url': 'http://ustudio.com/video/Uxu2my9bgSph/san_francisco_golden_gate_bridge',
+ 'md5': '58bbfca62125378742df01fc2abbdef6',
+ 'info_dict': {
+ 'id': 'Uxu2my9bgSph',
+ 'display_id': 'san_francisco_golden_gate_bridge',
+ 'ext': 'mp4',
+ 'title': 'San Francisco: Golden Gate Bridge',
+ 'description': 'md5:23925500697f2c6d4830e387ba51a9be',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20111107',
+ 'uploader': 'Tony Farley',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id, display_id = self._match_valid_url(url).groups()
+
+ config = self._download_xml(
+ 'http://v1.ustudio.com/embed/%s/ustudio/config.xml' % video_id,
+ display_id)
+
+ def extract(kind):
+ return [{
+ 'url': unescapeHTML(item.attrib['url']),
+ 'width': int_or_none(item.get('width')),
+ 'height': int_or_none(item.get('height')),
+ } for item in config.findall('./qualities/quality/%s' % kind) if item.get('url')]
+
+ formats = extract('video')
+
+ webpage = self._download_webpage(url, display_id)
+
+ title = self._og_search_title(webpage)
+ upload_date = unified_strdate(self._search_regex(
+ r'(?s)Uploaded by\s*.+?\s*on\s*<span>([^<]+)</span>',
+ webpage, 'upload date', fatal=False))
+ uploader = self._search_regex(
+ r'Uploaded by\s*<a[^>]*>([^<]+)<',
+ webpage, 'uploader', fatal=False)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': self._og_search_description(webpage),
+ 'thumbnails': extract('image'),
+ 'upload_date': upload_date,
+ 'uploader': uploader,
+ 'formats': formats,
+ }
+
+
+class UstudioEmbedIE(InfoExtractor):
+ IE_NAME = 'ustudio:embed'
+ _VALID_URL = r'https?://(?:(?:app|embed)\.)?ustudio\.com/embed/(?P<uid>[^/]+)/(?P<id>[^/]+)'
+ _TEST = {
+ 'url': 'http://app.ustudio.com/embed/DeN7VdYRDKhP/Uw7G1kMCe65T',
+ 'md5': '47c0be52a09b23a7f40de9469cec58f4',
+ 'info_dict': {
+ 'id': 'Uw7G1kMCe65T',
+ 'ext': 'mp4',
+ 'title': '5 Things IT Should Know About Video',
+ 'description': 'md5:93d32650884b500115e158c5677d25ad',
+ 'uploader_id': 'DeN7VdYRDKhP',
+ }
+ }
+
+ def _real_extract(self, url):
+ uploader_id, video_id = self._match_valid_url(url).groups()
+ video_data = self._download_json(
+ 'http://app.ustudio.com/embed/%s/%s/config.json' % (uploader_id, video_id),
+ video_id)['videos'][0]
+ title = video_data['name']
+
+ formats = []
+ for ext, qualities in video_data.get('transcodes', {}).items():
+ for quality in qualities:
+ quality_url = quality.get('url')
+ if not quality_url:
+ continue
+ height = int_or_none(quality.get('height'))
+ formats.append({
+ 'format_id': '%s-%dp' % (ext, height) if height else ext,
+ 'url': quality_url,
+ 'width': int_or_none(quality.get('width')),
+ 'height': height,
+ })
+
+ thumbnails = []
+ for image in video_data.get('images', []):
+ image_url = image.get('url')
+ if not image_url:
+ continue
+ thumbnails.append({
+ 'url': image_url,
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video_data.get('description'),
+ 'duration': int_or_none(video_data.get('duration')),
+ 'uploader_id': uploader_id,
+ 'tags': video_data.get('keywords'),
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/utreon.py b/yt_dlp/extractor/utreon.py
new file mode 100644
index 0000000..12a7e49
--- /dev/null
+++ b/yt_dlp/extractor/utreon.py
@@ -0,0 +1,98 @@
+from .common import InfoExtractor
+from ..utils import (
+ dict_get,
+ int_or_none,
+ str_or_none,
+ try_get,
+ unified_strdate,
+ url_or_none,
+)
+
+
+class UtreonIE(InfoExtractor):
+ IE_NAME = 'playeur'
+ _VALID_URL = r'https?://(?:www\.)?(?:utreon|playeur)\.com/v/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://utreon.com/v/z_I7ikQbuDw',
+ 'info_dict': {
+ 'id': 'z_I7ikQbuDw',
+ 'ext': 'mp4',
+ 'title': 'Freedom Friday meditation - Rising in the wind',
+ 'description': 'md5:a9bf15a42434a062fe313b938343ad1b',
+ 'uploader': 'Heather Dawn Elemental Health',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ 'release_date': '20210723',
+ 'duration': 586,
+ }
+ }, {
+ 'url': 'https://utreon.com/v/jerJw5EOOVU',
+ 'info_dict': {
+ 'id': 'jerJw5EOOVU',
+ 'ext': 'mp4',
+ 'title': 'When I\'m alone, I love to reflect in peace, to make my dreams come true... [Quotes and Poems]',
+ 'description': 'md5:4026aa3a2c10169c3649926ac8ef62b6',
+ 'uploader': 'Frases e Poemas Quotes and Poems',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ 'release_date': '20210723',
+ 'duration': 60,
+ }
+ }, {
+ 'url': 'https://utreon.com/v/C4ZxXhYBBmE',
+ 'info_dict': {
+ 'id': 'C4ZxXhYBBmE',
+ 'ext': 'mp4',
+ 'title': 'Biden’s Capital Gains Tax Rate to Test World’s Highest',
+ 'description': 'md5:995aa9ad0733c0e5863ebdeff954f40e',
+ 'uploader': 'Nomad Capitalist',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ 'release_date': '20210723',
+ 'duration': 884,
+ }
+ }, {
+ 'url': 'https://utreon.com/v/Y-stEH-FBm8',
+ 'info_dict': {
+ 'id': 'Y-stEH-FBm8',
+ 'ext': 'mp4',
+ 'title': 'Creeper-Chan Pranks Steve! 💚 [MINECRAFT ANIME]',
+ 'description': 'md5:7a48450b0d761b96dec194be0c5ecb5f',
+ 'uploader': 'Merryweather Comics',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ 'release_date': '20210718',
+ 'duration': 151,
+ }
+ }, {
+ 'url': 'https://playeur.com/v/Wzqp-UrxSeu',
+ 'info_dict': {
+ 'id': 'Wzqp-UrxSeu',
+ 'ext': 'mp4',
+ 'title': 'Update: Clockwork Basilisk Books on the Way!',
+ 'description': 'md5:d9756b0b1884c904655b0e170d17cea5',
+ 'uploader': 'Forgotten Weapons',
+ 'release_date': '20240208',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ 'duration': 262,
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ json_data = self._download_json(
+ 'https://api.playeur.com/v1/videos/' + video_id,
+ video_id)
+ videos_json = json_data['videos']
+ formats = [{
+ 'url': format_url,
+ 'format_id': format_key.split('_')[1],
+ 'height': int(format_key.split('_')[1][:-1]),
+ } for format_key, format_url in videos_json.items() if url_or_none(format_url)]
+ thumbnail = url_or_none(dict_get(json_data, ('cover_image_url', 'preview_image_url')))
+ return {
+ 'id': video_id,
+ 'title': json_data['title'],
+ 'formats': formats,
+ 'description': str_or_none(json_data.get('description')),
+ 'duration': int_or_none(json_data.get('duration')),
+ 'uploader': str_or_none(try_get(json_data, lambda x: x['channel']['title'])),
+ 'thumbnail': thumbnail,
+ 'release_date': unified_strdate(json_data.get('published_datetime')),
+ }
diff --git a/yt_dlp/extractor/varzesh3.py b/yt_dlp/extractor/varzesh3.py
new file mode 100644
index 0000000..07a2d53
--- /dev/null
+++ b/yt_dlp/extractor/varzesh3.py
@@ -0,0 +1,73 @@
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ parse_qs,
+ remove_start,
+)
+
+
+class Varzesh3IE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:www\.)?video\.varzesh3\.com/(?:[^/]+/)+(?P<id>[^/]+)/?'
+ _TESTS = [{
+ 'url': 'http://video.varzesh3.com/germany/bundesliga/5-%D9%88%D8%A7%DA%A9%D9%86%D8%B4-%D8%A8%D8%B1%D8%AA%D8%B1-%D8%AF%D8%B1%D9%88%D8%A7%D8%B2%D9%87%E2%80%8C%D8%A8%D8%A7%D9%86%D8%A7%D9%86%D8%9B%D9%87%D9%81%D8%AA%D9%87-26-%D8%A8%D9%88%D9%86%D8%AF%D8%B3/',
+ 'md5': '2a933874cb7dce4366075281eb49e855',
+ 'info_dict': {
+ 'id': '76337',
+ 'ext': 'mp4',
+ 'title': '۵ واکنش برتر دروازه‌بانان؛هفته ۲۶ بوندسلیگا',
+ 'description': 'فصل ۲۰۱۵-۲۰۱۴',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'skip': 'HTTP 404 Error',
+ }, {
+ 'url': 'http://video.varzesh3.com/video/112785/%D8%AF%D9%84%D9%87-%D8%B9%D9%84%DB%8C%D8%9B-%D8%B3%D8%AA%D8%A7%D8%B1%D9%87-%D9%86%D9%88%D8%B8%D9%87%D9%88%D8%B1-%D9%84%DB%8C%DA%AF-%D8%A8%D8%B1%D8%AA%D8%B1-%D8%AC%D8%B2%DB%8C%D8%B1%D9%87',
+ 'md5': '841b7cd3afbc76e61708d94e53a4a4e7',
+ 'info_dict': {
+ 'id': '112785',
+ 'ext': 'mp4',
+ 'title': 'دله علی؛ ستاره نوظهور لیگ برتر جزیره',
+ 'description': 'فوتبال 120',
+ },
+ 'expected_warnings': ['description'],
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ video_url = self._search_regex(
+ r'<source[^>]+src="([^"]+)"', webpage, 'video url')
+
+ title = remove_start(self._html_extract_title(webpage), 'ویدیو ورزش 3 | ')
+
+ description = self._html_search_regex(
+ r'(?s)<div class="matn">(.+?)</div>',
+ webpage, 'description', default=None)
+ if description is None:
+ description = clean_html(self._html_search_meta('description', webpage))
+
+ thumbnail = self._og_search_thumbnail(webpage, default=None)
+ if thumbnail is None:
+ fb_sharer_url = self._search_regex(
+ r'<a[^>]+href="(https?://www\.facebook\.com/sharer/sharer\.php?[^"]+)"',
+ webpage, 'facebook sharer URL', fatal=False)
+ sharer_params = parse_qs(fb_sharer_url)
+ thumbnail = sharer_params.get('p[images][0]', [None])[0]
+
+ video_id = self._search_regex(
+ r"<link[^>]+rel='(?:canonical|shortlink)'[^>]+href='/\?p=([^']+)'",
+ webpage, display_id, default=None)
+ if video_id is None:
+ video_id = self._search_regex(
+ r'var\s+VideoId\s*=\s*(\d+);', webpage, 'video id',
+ default=display_id)
+
+ return {
+ 'url': video_url,
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ }
diff --git a/yt_dlp/extractor/vbox7.py b/yt_dlp/extractor/vbox7.py
new file mode 100644
index 0000000..f5d0502
--- /dev/null
+++ b/yt_dlp/extractor/vbox7.py
@@ -0,0 +1,97 @@
+from .common import InfoExtractor
+from ..utils import ExtractorError, base_url, int_or_none, url_basename
+from ..utils.traversal import traverse_obj
+
+
+class Vbox7IE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:[^/]+\.)?vbox7\.com/
+ (?:
+ play:|
+ (?:
+ emb/external\.php|
+ player/ext\.swf
+ )\?.*?\bvid=
+ )
+ (?P<id>[\da-fA-F]+)
+ '''
+ _EMBED_REGEX = [r'<iframe[^>]+src=(?P<q>["\'])(?P<url>(?:https?:)?//vbox7\.com/emb/external\.php.+?)(?P=q)']
+ _GEO_COUNTRIES = ['BG']
+ _TESTS = [{
+ 'url': 'http://vbox7.com/play:0946fff23c',
+ 'md5': '50ca1f78345a9c15391af47d8062d074',
+ 'info_dict': {
+ 'id': '0946fff23c',
+ 'ext': 'mp4',
+ 'title': 'Борисов: Притеснен съм за бъдещето на България',
+ 'description': 'По думите му е опасно страната ни да бъде обявена за "сигурна"',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1470982814,
+ 'upload_date': '20160812',
+ 'uploader': 'zdraveibulgaria',
+ 'view_count': int,
+ 'duration': 2640,
+ },
+ }, {
+ 'url': 'http://vbox7.com/play:249bb972c2',
+ 'md5': 'da1dd2eb245200cb86e6d09d43232116',
+ 'info_dict': {
+ 'id': '249bb972c2',
+ 'ext': 'mp4',
+ 'title': 'Смях! Чудо - чист за секунди - Скрита камера',
+ 'uploader': 'svideteliat_ot_varshava',
+ 'view_count': int,
+ 'timestamp': 1360215023,
+ 'thumbnail': 'https://i49.vbox7.com/o/249/249bb972c20.jpg',
+ 'description': 'Смях! Чудо - чист за секунди - Скрита камера',
+ 'upload_date': '20130207',
+ 'duration': 83,
+ },
+ 'expected_warnings': ['Failed to download m3u8 information'],
+ }, {
+ 'url': 'http://vbox7.com/emb/external.php?vid=a240d20f9c&autoplay=1',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://i49.vbox7.com/player/ext.swf?vid=0946fff23c&autoplay=1',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ data = self._download_json(
+ 'https://www.vbox7.com/aj/player/item/options', video_id,
+ query={'vid': video_id})['options']
+
+ src_url = data.get('src')
+ if src_url in (None, '', 'blank'):
+ raise ExtractorError('Video is unavailable', expected=True)
+
+ fmt_base = url_basename(src_url).rsplit('.', 1)[0].rsplit('_', 1)[0]
+ if fmt_base == 'vn':
+ self.raise_geo_restricted()
+
+ fmt_base = base_url(src_url) + fmt_base
+
+ formats = self._extract_m3u8_formats(
+ f'{fmt_base}.m3u8', video_id, m3u8_id='hls', fatal=False)
+ # TODO: Add MPD formats, when dash range support is added
+ for res in traverse_obj(data, ('resolutions', lambda _, v: v != 0, {int})):
+ formats.append({
+ 'url': f'{fmt_base}_{res}.mp4',
+ 'format_id': f'http-{res}',
+ 'height': res,
+ })
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ **self._search_json_ld(self._download_webpage(
+ f'https://www.vbox7.com/play:{video_id}', video_id, fatal=False) or '', video_id, fatal=False),
+ **traverse_obj(data, {
+ 'title': ('title', {str}),
+ 'uploader': ('uploader', {str}),
+ 'duration': ('duration', {int_or_none}),
+ }),
+ }
diff --git a/yt_dlp/extractor/veo.py b/yt_dlp/extractor/veo.py
new file mode 100644
index 0000000..ef44d42
--- /dev/null
+++ b/yt_dlp/extractor/veo.py
@@ -0,0 +1,76 @@
+from .common import InfoExtractor
+
+from ..utils import (
+ int_or_none,
+ mimetype2ext,
+ str_or_none,
+ unified_timestamp,
+ url_or_none,
+)
+
+
+class VeoIE(InfoExtractor):
+ _VALID_URL = r'https?://app\.veo\.co/matches/(?P<id>[0-9A-Za-z-_]+)'
+
+ _TESTS = [{
+ 'url': 'https://app.veo.co/matches/20201027-last-period/',
+ 'info_dict': {
+ 'id': '20201027-last-period',
+ 'ext': 'mp4',
+ 'title': 'Akidemy u11s v Bradford Boys u11s (Game 3)',
+ 'thumbnail': 're:https://c.veocdn.com/.+/thumbnail.jpg',
+ 'upload_date': '20201028',
+ 'timestamp': 1603847208,
+ 'duration': 1916,
+ 'view_count': int,
+ }
+ }, {
+ 'url': 'https://app.veo.co/matches/20220313-2022-03-13_u15m-plsjq-vs-csl/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ metadata = self._download_json(
+ 'https://app.veo.co/api/app/matches/%s' % video_id, video_id)
+
+ video_data = self._download_json(
+ 'https://app.veo.co/api/app/matches/%s/videos' % video_id, video_id, 'Downloading video data')
+
+ formats = []
+ for fmt in video_data:
+ mimetype = str_or_none(fmt.get('mime_type'))
+ format_url = url_or_none(fmt.get('url'))
+ # skip configuration file for panoramic video
+ if not format_url or mimetype == 'video/mp2t':
+ continue
+
+ height = int_or_none(fmt.get('height'))
+ render_type = str_or_none(fmt.get('render_type'))
+ format_id = f'{render_type}-{height}p' if render_type and height else None
+
+ # Veo returns panoramic video information even if panoramic video is not available.
+ # e.g. https://app.veo.co/matches/20201027-last-period/
+ if render_type == 'panorama':
+ if not self._is_valid_url(format_url, video_id, format_id):
+ continue
+
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_id,
+ 'ext': mimetype2ext(mimetype),
+ 'width': int_or_none(fmt.get('width')),
+ 'height': height,
+ 'vbr': int_or_none(fmt.get('bit_rate'), scale=1000),
+ })
+
+ return {
+ 'id': video_id,
+ 'title': str_or_none(metadata.get('title')),
+ 'formats': formats,
+ 'thumbnail': url_or_none(metadata.get('thumbnail')),
+ 'timestamp': unified_timestamp(metadata.get('created')),
+ 'view_count': int_or_none(metadata.get('view_count')),
+ 'duration': int_or_none(metadata.get('duration')),
+ }
diff --git a/yt_dlp/extractor/veoh.py b/yt_dlp/extractor/veoh.py
new file mode 100644
index 0000000..92ff865
--- /dev/null
+++ b/yt_dlp/extractor/veoh.py
@@ -0,0 +1,188 @@
+import functools
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ OnDemandPagedList,
+ int_or_none,
+ parse_duration,
+ qualities,
+ try_get,
+)
+
+
+class VeohIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?veoh\.com/(?:watch|videos|embed|iphone/#_Watch)/(?P<id>(?:v|e|yapi-)[\da-zA-Z]+)'
+
+ _TESTS = [{
+ 'url': 'http://www.veoh.com/watch/v56314296nk7Zdmz3',
+ 'md5': '620e68e6a3cff80086df3348426c9ca3',
+ 'info_dict': {
+ 'id': 'v56314296nk7Zdmz3',
+ 'ext': 'mp4',
+ 'title': 'Straight Backs Are Stronger',
+ 'description': 'md5:203f976279939a6dc664d4001e13f5f4',
+ 'thumbnail': 're:https://fcache\\.veoh\\.com/file/f/th56314296\\.jpg(\\?.*)?',
+ 'uploader': 'LUMOback',
+ 'duration': 46,
+ 'view_count': int,
+ 'average_rating': int,
+ 'comment_count': int,
+ 'age_limit': 0,
+ 'categories': ['technology_and_gaming'],
+ 'tags': ['posture', 'posture', 'sensor', 'back', 'pain', 'wearable', 'tech', 'lumo'],
+ },
+ }, {
+ 'url': 'http://www.veoh.com/embed/v56314296nk7Zdmz3',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.veoh.com/watch/v27701988pbTc4wzN?h1=Chile+workers+cover+up+to+avoid+skin+damage',
+ 'md5': '4a6ff84b87d536a6a71e6aa6c0ad07fa',
+ 'info_dict': {
+ 'id': '27701988',
+ 'ext': 'mp4',
+ 'title': 'Chile workers cover up to avoid skin damage',
+ 'description': 'md5:2bd151625a60a32822873efc246ba20d',
+ 'uploader': 'afp-news',
+ 'duration': 123,
+ },
+ 'skip': 'This video has been deleted.',
+ }, {
+ 'url': 'http://www.veoh.com/watch/v69525809F6Nc4frX',
+ 'md5': '4fde7b9e33577bab2f2f8f260e30e979',
+ 'note': 'Embedded ooyala video',
+ 'info_dict': {
+ 'id': '69525809',
+ 'ext': 'mp4',
+ 'title': 'Doctors Alter Plan For Preteen\'s Weight Loss Surgery',
+ 'description': 'md5:f5a11c51f8fb51d2315bca0937526891',
+ 'uploader': 'newsy-videos',
+ },
+ 'skip': 'This video has been deleted.',
+ }, {
+ 'url': 'http://www.veoh.com/watch/e152215AJxZktGS',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.veoh.com/videos/v16374379WA437rMH',
+ 'md5': 'cceb73f3909063d64f4b93d4defca1b3',
+ 'info_dict': {
+ 'id': 'v16374379WA437rMH',
+ 'ext': 'mp4',
+ 'title': 'Phantasmagoria 2, pt. 1-3',
+ 'description': 'Phantasmagoria: a Puzzle of Flesh',
+ 'thumbnail': 're:https://fcache\\.veoh\\.com/file/f/th16374379\\.jpg(\\?.*)?',
+ 'uploader': 'davidspackage',
+ 'duration': 968,
+ 'view_count': int,
+ 'average_rating': int,
+ 'comment_count': int,
+ 'age_limit': 18,
+ 'categories': ['technology_and_gaming', 'gaming'],
+ 'tags': ['puzzle', 'of', 'flesh'],
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ metadata = self._download_json(
+ 'https://www.veoh.com/watch/getVideo/' + video_id,
+ video_id)
+ video = metadata['video']
+ title = video['title']
+
+ thumbnail_url = None
+ q = qualities(['Regular', 'HQ'])
+ formats = []
+ for f_id, f_url in video.get('src', {}).items():
+ if not f_url:
+ continue
+ if f_id == 'poster':
+ thumbnail_url = f_url
+ else:
+ formats.append({
+ 'format_id': f_id,
+ 'quality': q(f_id),
+ 'url': f_url,
+ })
+
+ categories = metadata.get('categoryPath')
+ if not categories:
+ category = try_get(video, lambda x: x['category'].strip().removeprefix('category_'))
+ categories = [category] if category else None
+ tags = video.get('tags')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video.get('description'),
+ 'thumbnail': thumbnail_url,
+ 'uploader': video.get('author', {}).get('nickname'),
+ 'duration': int_or_none(video.get('lengthBySec')) or parse_duration(video.get('length')),
+ 'view_count': int_or_none(video.get('views')),
+ 'formats': formats,
+ 'average_rating': int_or_none(video.get('rating')),
+ 'comment_count': int_or_none(video.get('numOfComments')),
+ 'age_limit': 18 if video.get('contentRatingId') == 2 else 0,
+ 'categories': categories,
+ 'tags': tags.split(', ') if tags else None,
+ }
+
+
+class VeohUserIE(VeohIE): # XXX: Do not subclass from concrete IE
+ _VALID_URL = r'https?://(?:www\.)?veoh\.com/users/(?P<id>[\w-]+)'
+ IE_NAME = 'veoh:user'
+
+ _TESTS = [
+ {
+ 'url': 'https://www.veoh.com/users/valentinazoe',
+ 'info_dict': {
+ 'id': 'valentinazoe',
+ 'title': 'valentinazoe (Uploads)'
+ },
+ 'playlist_mincount': 75
+ },
+ {
+ 'url': 'https://www.veoh.com/users/PiensaLibre',
+ 'info_dict': {
+ 'id': 'PiensaLibre',
+ 'title': 'PiensaLibre (Uploads)'
+ },
+ 'playlist_mincount': 2
+ }]
+
+ _PAGE_SIZE = 16
+
+ def _fetch_page(self, uploader, page):
+ response = self._download_json(
+ 'https://www.veoh.com/users/published/videos', uploader,
+ note=f'Downloading videos page {page + 1}',
+ headers={
+ 'x-csrf-token': self._TOKEN,
+ 'content-type': 'application/json;charset=UTF-8'
+ },
+ data=json.dumps({
+ 'username': uploader,
+ 'maxResults': self._PAGE_SIZE,
+ 'page': page + 1,
+ 'requestName': 'userPage'
+ }).encode('utf-8'))
+ if not response.get('success'):
+ raise ExtractorError(response['message'])
+
+ for video in response['videos']:
+ yield self.url_result(f'https://www.veoh.com/watch/{video["permalinkId"]}', VeohIE,
+ video['permalinkId'], video.get('title'))
+
+ def _real_initialize(self):
+ webpage = self._download_webpage(
+ 'https://www.veoh.com', None, note='Downloading authorization token')
+ self._TOKEN = self._search_regex(
+ r'csrfToken:\s*(["\'])(?P<token>[0-9a-zA-Z]{40})\1', webpage,
+ 'request token', group='token')
+
+ def _real_extract(self, url):
+ uploader = self._match_id(url)
+ return self.playlist_result(OnDemandPagedList(
+ functools.partial(self._fetch_page, uploader),
+ self._PAGE_SIZE), uploader, f'{uploader} (Uploads)')
diff --git a/yt_dlp/extractor/vesti.py b/yt_dlp/extractor/vesti.py
new file mode 100644
index 0000000..3f2dddb
--- /dev/null
+++ b/yt_dlp/extractor/vesti.py
@@ -0,0 +1,119 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+from .rutv import RUTVIE
+
+
+class VestiIE(InfoExtractor):
+ _WORKING = False
+ IE_DESC = 'Вести.Ru'
+ _VALID_URL = r'https?://(?:.+?\.)?vesti\.ru/(?P<id>.+)'
+
+ _TESTS = [
+ {
+ 'url': 'http://www.vesti.ru/videos?vid=575582&cid=1',
+ 'info_dict': {
+ 'id': '765035',
+ 'ext': 'mp4',
+ 'title': 'Вести.net: биткоины в России не являются законными',
+ 'description': 'md5:d4bb3859dc1177b28a94c5014c35a36b',
+ 'duration': 302,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://www.vesti.ru/doc.html?id=1349233',
+ 'info_dict': {
+ 'id': '773865',
+ 'ext': 'mp4',
+ 'title': 'Участники митинга штурмуют Донецкую областную администрацию',
+ 'description': 'md5:1a160e98b3195379b4c849f2f4958009',
+ 'duration': 210,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://www.vesti.ru/only_video.html?vid=576180',
+ 'info_dict': {
+ 'id': '766048',
+ 'ext': 'mp4',
+ 'title': 'США заморозило, Британию затопило',
+ 'description': 'md5:f0ed0695ec05aed27c56a70a58dc4cc1',
+ 'duration': 87,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://hitech.vesti.ru/news/view/id/4000',
+ 'info_dict': {
+ 'id': '766888',
+ 'ext': 'mp4',
+ 'title': 'Вести.net: интернет-гиганты начали перетягивание программных "одеял"',
+ 'description': 'md5:65ddd47f9830c4f42ed6475f8730c995',
+ 'duration': 279,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://sochi2014.vesti.ru/video/index/video_id/766403',
+ 'info_dict': {
+ 'id': '766403',
+ 'ext': 'mp4',
+ 'title': 'XXII зимние Олимпийские игры. Российские хоккеисты стартовали на Олимпиаде с победы',
+ 'description': 'md5:55805dfd35763a890ff50fa9e35e31b3',
+ 'duration': 271,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'skip': 'Blocked outside Russia',
+ },
+ {
+ 'url': 'http://sochi2014.vesti.ru/live/play/live_id/301',
+ 'info_dict': {
+ 'id': '51499',
+ 'ext': 'flv',
+ 'title': 'Сочи-2014. Биатлон. Индивидуальная гонка. Мужчины ',
+ 'description': 'md5:9e0ed5c9d2fa1efbfdfed90c9a6d179c',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'Translation has finished'
+ },
+ ]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id')
+
+ page = self._download_webpage(url, video_id, 'Downloading page')
+
+ mobj = re.search(
+ r'<meta[^>]+?property="og:video"[^>]+?content="http://www\.vesti\.ru/i/flvplayer_videoHost\.swf\?vid=(?P<id>\d+)',
+ page)
+ if mobj:
+ video_id = mobj.group('id')
+ page = self._download_webpage('http://www.vesti.ru/only_video.html?vid=%s' % video_id, video_id,
+ 'Downloading video page')
+
+ rutv_url = RUTVIE._extract_url(page)
+ if rutv_url:
+ return self.url_result(rutv_url, 'RUTV')
+
+ raise ExtractorError('No video found', expected=True)
diff --git a/yt_dlp/extractor/vevo.py b/yt_dlp/extractor/vevo.py
new file mode 100644
index 0000000..aa40227
--- /dev/null
+++ b/yt_dlp/extractor/vevo.py
@@ -0,0 +1,353 @@
+import re
+import json
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ parse_iso8601,
+ parse_qs,
+)
+
+
+class VevoBaseIE(InfoExtractor):
+ def _extract_json(self, webpage, video_id):
+ return self._parse_json(
+ self._search_regex(
+ r'window\.__INITIAL_STORE__\s*=\s*({.+?});\s*</script>',
+ webpage, 'initial store'),
+ video_id)
+
+
+class VevoIE(VevoBaseIE):
+ '''
+ Accepts urls from vevo.com or in the format 'vevo:{id}'
+ (currently used by MTVIE and MySpaceIE)
+ '''
+ _VALID_URL = r'''(?x)
+ (?:https?://(?:www\.)?vevo\.com/watch/(?!playlist|genre)(?:[^/]+/(?:[^/]+/)?)?|
+ https?://cache\.vevo\.com/m/html/embed\.html\?video=|
+ https?://videoplayer\.vevo\.com/embed/embedded\?videoId=|
+ https?://embed\.vevo\.com/.*?[?&]isrc=|
+ https?://tv\.vevo\.com/watch/artist/(?:[^/]+)/|
+ vevo:)
+ (?P<id>[^&?#]+)'''
+ _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1']
+
+ _TESTS = [{
+ 'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
+ 'md5': '95ee28ee45e70130e3ab02b0f579ae23',
+ 'info_dict': {
+ 'id': 'GB1101300280',
+ 'ext': 'mp4',
+ 'title': 'Hurts - Somebody to Die For',
+ 'timestamp': 1372057200,
+ 'upload_date': '20130624',
+ 'uploader': 'Hurts',
+ 'track': 'Somebody to Die For',
+ 'artist': 'Hurts',
+ 'genre': 'Pop',
+ },
+ 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
+ }, {
+ 'note': 'v3 SMIL format',
+ 'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923',
+ 'md5': 'f6ab09b034f8c22969020b042e5ac7fc',
+ 'info_dict': {
+ 'id': 'USUV71302923',
+ 'ext': 'mp4',
+ 'title': 'Cassadee Pope - I Wish I Could Break Your Heart',
+ 'timestamp': 1392796919,
+ 'upload_date': '20140219',
+ 'uploader': 'Cassadee Pope',
+ 'track': 'I Wish I Could Break Your Heart',
+ 'artist': 'Cassadee Pope',
+ 'genre': 'Country',
+ },
+ 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
+ }, {
+ 'note': 'Age-limited video',
+ 'url': 'https://www.vevo.com/watch/justin-timberlake/tunnel-vision-explicit/USRV81300282',
+ 'info_dict': {
+ 'id': 'USRV81300282',
+ 'ext': 'mp4',
+ 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
+ 'age_limit': 18,
+ 'timestamp': 1372888800,
+ 'upload_date': '20130703',
+ 'uploader': 'Justin Timberlake',
+ 'track': 'Tunnel Vision (Explicit)',
+ 'artist': 'Justin Timberlake',
+ 'genre': 'Pop',
+ },
+ 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
+ }, {
+ 'note': 'No video_info',
+ 'url': 'http://www.vevo.com/watch/k-camp-1/Till-I-Die/USUV71503000',
+ 'md5': '8b83cc492d72fc9cf74a02acee7dc1b0',
+ 'info_dict': {
+ 'id': 'USUV71503000',
+ 'ext': 'mp4',
+ 'title': 'K Camp ft. T.I. - Till I Die',
+ 'age_limit': 18,
+ 'timestamp': 1449468000,
+ 'upload_date': '20151207',
+ 'uploader': 'K Camp',
+ 'track': 'Till I Die',
+ 'artist': 'K Camp',
+ 'genre': 'Hip-Hop',
+ },
+ 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
+ }, {
+ 'note': 'Featured test',
+ 'url': 'https://www.vevo.com/watch/lemaitre/Wait/USUV71402190',
+ 'md5': 'd28675e5e8805035d949dc5cf161071d',
+ 'info_dict': {
+ 'id': 'USUV71402190',
+ 'ext': 'mp4',
+ 'title': 'Lemaitre ft. LoLo - Wait',
+ 'age_limit': 0,
+ 'timestamp': 1413432000,
+ 'upload_date': '20141016',
+ 'uploader': 'Lemaitre',
+ 'track': 'Wait',
+ 'artist': 'Lemaitre',
+ 'genre': 'Electronic',
+ },
+ 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
+ }, {
+ 'note': 'Only available via webpage',
+ 'url': 'http://www.vevo.com/watch/GBUV71600656',
+ 'md5': '67e79210613865b66a47c33baa5e37fe',
+ 'info_dict': {
+ 'id': 'GBUV71600656',
+ 'ext': 'mp4',
+ 'title': 'ABC - Viva Love',
+ 'age_limit': 0,
+ 'timestamp': 1461830400,
+ 'upload_date': '20160428',
+ 'uploader': 'ABC',
+ 'track': 'Viva Love',
+ 'artist': 'ABC',
+ 'genre': 'Pop',
+ },
+ 'expected_warnings': ['Failed to download video versions info'],
+ }, {
+ # no genres available
+ 'url': 'http://www.vevo.com/watch/INS171400764',
+ 'only_matching': True,
+ }, {
+ # Another case available only via the webpage; using streams/streamsV3 formats
+ # Geo-restricted to Netherlands/Germany
+ 'url': 'http://www.vevo.com/watch/boostee/pop-corn-clip-officiel/FR1A91600909',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://embed.vevo.com/?isrc=USH5V1923499&partnerId=4d61b777-8023-4191-9ede-497ed6c24647&partnerAdCode=',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tv.vevo.com/watch/artist/janet-jackson/US0450100550',
+ 'only_matching': True,
+ }]
+ _VERSIONS = {
+ 0: 'youtube', # only in AuthenticateVideo videoVersions
+ 1: 'level3',
+ 2: 'akamai',
+ 3: 'level3',
+ 4: 'amazon',
+ }
+
+ def _initialize_api(self, video_id):
+ webpage = self._download_webpage(
+ 'https://accounts.vevo.com/token', None,
+ note='Retrieving oauth token',
+ errnote='Unable to retrieve oauth token',
+ data=json.dumps({
+ 'client_id': 'SPupX1tvqFEopQ1YS6SS',
+ 'grant_type': 'urn:vevo:params:oauth:grant-type:anonymous',
+ }).encode('utf-8'),
+ headers={
+ 'Content-Type': 'application/json',
+ })
+
+ if re.search(r'(?i)THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION', webpage):
+ self.raise_geo_restricted(
+ '%s said: This page is currently unavailable in your region' % self.IE_NAME)
+
+ auth_info = self._parse_json(webpage, video_id)
+ self._api_url_template = self.http_scheme() + '//apiv2.vevo.com/%s?token=' + auth_info['legacy_token']
+
+ def _call_api(self, path, *args, **kwargs):
+ try:
+ data = self._download_json(self._api_url_template % path, *args, **kwargs)
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError):
+ errors = self._parse_json(e.cause.response.read().decode(), None)['errors']
+ error_message = ', '.join([error['message'] for error in errors])
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)
+ raise
+ return data
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ self._initialize_api(video_id)
+
+ video_info = self._call_api(
+ 'video/%s' % video_id, video_id, 'Downloading api video info',
+ 'Failed to download video info')
+
+ video_versions = self._call_api(
+ 'video/%s/streams' % video_id, video_id,
+ 'Downloading video versions info',
+ 'Failed to download video versions info',
+ fatal=False)
+
+ # Some videos are only available via webpage (e.g.
+ # https://github.com/ytdl-org/youtube-dl/issues/9366)
+ if not video_versions:
+ webpage = self._download_webpage(url, video_id)
+ json_data = self._extract_json(webpage, video_id)
+ if 'streams' in json_data.get('default', {}):
+ video_versions = json_data['default']['streams'][video_id][0]
+ else:
+ video_versions = [
+ value
+ for key, value in json_data['apollo']['data'].items()
+ if key.startswith('%s.streams' % video_id)]
+
+ uploader = None
+ artist = None
+ featured_artist = None
+ artists = video_info.get('artists')
+ for curr_artist in artists:
+ if curr_artist.get('role') == 'Featured':
+ featured_artist = curr_artist['name']
+ else:
+ artist = uploader = curr_artist['name']
+
+ formats = []
+ for video_version in video_versions:
+ version = self._VERSIONS.get(video_version.get('version'), 'generic')
+ version_url = video_version.get('url')
+ if not version_url:
+ continue
+
+ if '.ism' in version_url:
+ continue
+ elif '.mpd' in version_url:
+ formats.extend(self._extract_mpd_formats(
+ version_url, video_id, mpd_id='dash-%s' % version,
+ note='Downloading %s MPD information' % version,
+ errnote='Failed to download %s MPD information' % version,
+ fatal=False))
+ elif '.m3u8' in version_url:
+ formats.extend(self._extract_m3u8_formats(
+ version_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls-%s' % version,
+ note='Downloading %s m3u8 information' % version,
+ errnote='Failed to download %s m3u8 information' % version,
+ fatal=False))
+ else:
+ m = re.search(r'''(?xi)
+ _(?P<quality>[a-z0-9]+)
+ _(?P<width>[0-9]+)x(?P<height>[0-9]+)
+ _(?P<vcodec>[a-z0-9]+)
+ _(?P<vbr>[0-9]+)
+ _(?P<acodec>[a-z0-9]+)
+ _(?P<abr>[0-9]+)
+ \.(?P<ext>[a-z0-9]+)''', version_url)
+ if not m:
+ continue
+
+ formats.append({
+ 'url': version_url,
+ 'format_id': f'http-{version}-{video_version.get("quality") or m.group("quality")}',
+ 'vcodec': m.group('vcodec'),
+ 'acodec': m.group('acodec'),
+ 'vbr': int(m.group('vbr')),
+ 'abr': int(m.group('abr')),
+ 'ext': m.group('ext'),
+ 'width': int(m.group('width')),
+ 'height': int(m.group('height')),
+ })
+
+ track = video_info['title']
+ if featured_artist:
+ artist = '%s ft. %s' % (artist, featured_artist)
+ title = '%s - %s' % (artist, track) if artist else track
+
+ genres = video_info.get('genres')
+ genre = (
+ genres[0] if genres and isinstance(genres, list)
+ and isinstance(genres[0], compat_str) else None)
+
+ is_explicit = video_info.get('isExplicit')
+ if is_explicit is True:
+ age_limit = 18
+ elif is_explicit is False:
+ age_limit = 0
+ else:
+ age_limit = None
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': video_info.get('imageUrl') or video_info.get('thumbnailUrl'),
+ 'timestamp': parse_iso8601(video_info.get('releaseDate')),
+ 'uploader': uploader,
+ 'duration': int_or_none(video_info.get('duration')),
+ 'view_count': int_or_none(video_info.get('views', {}).get('total')),
+ 'age_limit': age_limit,
+ 'track': track,
+ 'artist': uploader,
+ 'genre': genre,
+ }
+
+
+class VevoPlaylistIE(VevoBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?vevo\.com/watch/(?P<kind>playlist|genre)/(?P<id>[^/?#&]+)'
+
+ _TESTS = [{
+ 'url': 'http://www.vevo.com/watch/genre/rock',
+ 'info_dict': {
+ 'id': 'rock',
+ 'title': 'Rock',
+ },
+ 'playlist_count': 20,
+ }, {
+ 'url': 'http://www.vevo.com/watch/genre/rock?index=0',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ playlist_id = mobj.group('id')
+ playlist_kind = mobj.group('kind')
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ qs = parse_qs(url)
+ index = qs.get('index', [None])[0]
+
+ if index:
+ video_id = self._search_regex(
+ r'<meta[^>]+content=(["\'])vevo://video/(?P<id>.+?)\1[^>]*>',
+ webpage, 'video id', default=None, group='id')
+ if video_id:
+ return self.url_result('vevo:%s' % video_id, VevoIE.ie_key())
+
+ playlists = self._extract_json(webpage, playlist_id)['default']['%ss' % playlist_kind]
+
+ playlist = (list(playlists.values())[0]
+ if playlist_kind == 'playlist' else playlists[playlist_id])
+
+ entries = [
+ self.url_result('vevo:%s' % src, VevoIE.ie_key())
+ for src in playlist['isrcs']]
+
+ return self.playlist_result(
+ entries, playlist.get('playlistId') or playlist_id,
+ playlist.get('name'), playlist.get('description'))
diff --git a/yt_dlp/extractor/vgtv.py b/yt_dlp/extractor/vgtv.py
new file mode 100644
index 0000000..db338fa
--- /dev/null
+++ b/yt_dlp/extractor/vgtv.py
@@ -0,0 +1,311 @@
+import re
+
+from .common import InfoExtractor
+from .xstream import XstreamIE
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ try_get,
+)
+
+
+class VGTVIE(XstreamIE): # XXX: Do not subclass from concrete IE
+ IE_DESC = 'VGTV, BTTV, FTV, Aftenposten and Aftonbladet'
+ _GEO_BYPASS = False
+
+ _HOST_TO_APPNAME = {
+ 'tv.vg.no': 'vgtv',
+ 'vgtv.no': 'vgtv',
+ 'bt.no/tv': 'bttv',
+ 'aftenbladet.no/tv': 'satv',
+ 'fvn.no/fvntv': 'fvntv',
+ 'aftenposten.no/webtv': 'aptv',
+ 'ap.vgtv.no/webtv': 'aptv',
+ 'tv.aftonbladet.se': 'abtv',
+ # obsolete URL schemas, kept in order to save one HTTP redirect
+ 'tv.aftonbladet.se/abtv': 'abtv',
+ 'www.aftonbladet.se/tv': 'abtv',
+ }
+
+ _APP_NAME_TO_VENDOR = {
+ 'vgtv': 'vgtv',
+ 'bttv': 'bt',
+ 'satv': 'sa',
+ 'fvntv': 'fvn',
+ 'aptv': 'ap',
+ 'abtv': 'ab',
+ }
+
+ _VALID_URL = r'''(?x)
+ (?:https?://(?:www\.)?
+ (?P<host>
+ %s
+ )
+ /?
+ (?:
+ (?:\#!/)?(?:video|live)/|
+ embed?.*id=|
+ a(?:rticles)?/
+ )|
+ (?P<appname>
+ %s
+ ):)
+ (?P<id>\d+)
+ ''' % ('|'.join(_HOST_TO_APPNAME.keys()), '|'.join(_APP_NAME_TO_VENDOR.keys()))
+
+ _TESTS = [
+ {
+ # streamType: vod
+ 'url': 'http://www.vgtv.no/#!/video/84196/hevnen-er-soet-episode-10-abu',
+ 'md5': 'b8be7a234cebb840c0d512c78013e02f',
+ 'info_dict': {
+ 'id': '84196',
+ 'ext': 'mp4',
+ 'title': 'Hevnen er søt: Episode 10 - Abu',
+ 'description': 'md5:e25e4badb5f544b04341e14abdc72234',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 648.000,
+ 'timestamp': 1404626400,
+ 'upload_date': '20140706',
+ 'view_count': int,
+ },
+ },
+ {
+ # streamType: wasLive
+ 'url': 'http://www.vgtv.no/#!/live/100764/opptak-vgtv-foelger-em-kvalifiseringen',
+ 'info_dict': {
+ 'id': '100764',
+ 'ext': 'flv',
+ 'title': 'OPPTAK: VGTV følger EM-kvalifiseringen',
+ 'description': 'md5:3772d9c0dc2dff92a886b60039a7d4d3',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 9103.0,
+ 'timestamp': 1410113864,
+ 'upload_date': '20140907',
+ 'view_count': int,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'skip': 'Video is no longer available',
+ },
+ {
+ # streamType: wasLive
+ 'url': 'http://www.vgtv.no/#!/live/113063/direkte-v75-fra-solvalla',
+ 'info_dict': {
+ 'id': '113063',
+ 'ext': 'mp4',
+ 'title': 'V75 fra Solvalla 30.05.15',
+ 'description': 'md5:b3743425765355855f88e096acc93231',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 25966,
+ 'timestamp': 1432975582,
+ 'upload_date': '20150530',
+ 'view_count': int,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://www.aftenposten.no/webtv/#!/video/21039/trailer-sweatshop-i-can-t-take-any-more',
+ 'md5': 'fd828cd29774a729bf4d4425fe192972',
+ 'info_dict': {
+ 'id': '21039',
+ 'ext': 'mp4',
+ 'title': 'TRAILER: «SWEATSHOP» - I can´t take any more',
+ 'description': 'md5:21891f2b0dd7ec2f78d84a50e54f8238',
+ 'duration': 66,
+ 'timestamp': 1417002452,
+ 'upload_date': '20141126',
+ 'view_count': int,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'https://tv.vg.no/video/241779/politiets-ekstremkjoering',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://www.bt.no/tv/#!/video/100250/norling-dette-er-forskjellen-paa-1-divisjon-og-eliteserien',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://ap.vgtv.no/webtv#!/video/111084/de-nye-bysyklene-lettere-bedre-gir-stoerre-hjul-og-feste-til-mobil',
+ 'only_matching': True,
+ },
+ {
+ # geoblocked
+ 'url': 'http://www.vgtv.no/#!/video/127205/inside-the-mind-of-favela-funk',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://tv.aftonbladet.se/video/36015/vulkanutbrott-i-rymden-nu-slapper-nasa-bilderna',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://tv.aftonbladet.se/abtv/articles/36015',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://www.aftonbladet.se/tv/a/36015',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'abtv:140026',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://www.vgtv.no/video/84196/hevnen-er-soet-episode-10-abu',
+ 'only_matching': True,
+ },
+ ]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id')
+ host = mobj.group('host')
+ appname = self._HOST_TO_APPNAME[host] if host else mobj.group('appname')
+ vendor = self._APP_NAME_TO_VENDOR[appname]
+
+ data = self._download_json(
+ 'http://svp.vg.no/svp/api/v1/%s/assets/%s?appName=%s-website'
+ % (vendor, video_id, appname),
+ video_id, 'Downloading media JSON')
+
+ if data.get('status') == 'inactive':
+ raise ExtractorError(
+ 'Video %s is no longer available' % video_id, expected=True)
+
+ info = {
+ 'formats': [],
+ }
+ if len(video_id) == 5:
+ if appname == 'bttv':
+ info = self._extract_video_info('btno', video_id)
+
+ streams = data['streamUrls']
+ stream_type = data.get('streamType')
+ is_live = stream_type == 'live'
+ formats = []
+
+ hls_url = streams.get('hls')
+ if hls_url:
+ formats.extend(self._extract_m3u8_formats(
+ hls_url, video_id, 'mp4', live=is_live, m3u8_id='hls', fatal=False))
+
+ hds_url = streams.get('hds')
+ if hds_url:
+ hdcore_sign = 'hdcore=3.7.0'
+ f4m_formats = self._extract_f4m_formats(
+ hds_url + '?%s' % hdcore_sign, video_id, f4m_id='hds', fatal=False)
+ if f4m_formats:
+ for entry in f4m_formats:
+ # URLs without the extra param induce an 404 error
+ entry.update({'extra_param_to_segment_url': hdcore_sign})
+ formats.append(entry)
+
+ mp4_urls = streams.get('pseudostreaming') or []
+ mp4_url = streams.get('mp4')
+ if mp4_url:
+ mp4_urls.append(mp4_url)
+ for mp4_url in mp4_urls:
+ format_info = {
+ 'url': mp4_url,
+ }
+ mobj = re.search(r'(\d+)_(\d+)_(\d+)', mp4_url)
+ if mobj:
+ tbr = int(mobj.group(3))
+ format_info.update({
+ 'width': int(mobj.group(1)),
+ 'height': int(mobj.group(2)),
+ 'tbr': tbr,
+ 'format_id': 'mp4-%s' % tbr,
+ })
+ formats.append(format_info)
+
+ info['formats'].extend(formats)
+
+ if not info['formats']:
+ properties = try_get(
+ data, lambda x: x['streamConfiguration']['properties'], list)
+ if properties and 'geoblocked' in properties:
+ raise self.raise_geo_restricted(
+ countries=[host.rpartition('.')[-1].partition('/')[0].upper()])
+
+ info.update({
+ 'id': video_id,
+ 'title': data['title'],
+ 'description': data['description'],
+ 'thumbnail': data['images']['main'] + '?t[]=900x506q80',
+ 'timestamp': data['published'],
+ 'duration': float_or_none(data['duration'], 1000),
+ 'view_count': data['displays'],
+ 'is_live': is_live,
+ })
+ return info
+
+
+class BTArticleIE(InfoExtractor):
+ IE_NAME = 'bt:article'
+ IE_DESC = 'Bergens Tidende Articles'
+ _VALID_URL = r'https?://(?:www\.)?bt\.no/(?:[^/]+/)+(?P<id>[^/]+)-\d+\.html'
+ _TEST = {
+ 'url': 'http://www.bt.no/nyheter/lokalt/Kjemper-for-internatet-1788214.html',
+ 'md5': '2acbe8ad129b3469d5ae51b1158878df',
+ 'info_dict': {
+ 'id': '23199',
+ 'ext': 'mp4',
+ 'title': 'Alrekstad internat',
+ 'description': 'md5:dc81a9056c874fedb62fc48a300dac58',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 191,
+ 'timestamp': 1289991323,
+ 'upload_date': '20101117',
+ 'view_count': int,
+ },
+ }
+
+ def _real_extract(self, url):
+ webpage = self._download_webpage(url, self._match_id(url))
+ video_id = self._search_regex(
+ r'<video[^>]+data-id="(\d+)"', webpage, 'video id')
+ return self.url_result('bttv:%s' % video_id, 'VGTV')
+
+
+class BTVestlendingenIE(InfoExtractor):
+ IE_NAME = 'bt:vestlendingen'
+ IE_DESC = 'Bergens Tidende - Vestlendingen'
+ _VALID_URL = r'https?://(?:www\.)?bt\.no/spesial/vestlendingen/#!/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://www.bt.no/spesial/vestlendingen/#!/86588',
+ 'md5': 'd7d17e3337dc80de6d3a540aefbe441b',
+ 'info_dict': {
+ 'id': '86588',
+ 'ext': 'mov',
+ 'title': 'Otto Wollertsen',
+ 'description': 'Vestlendingen Otto Fredrik Wollertsen',
+ 'timestamp': 1430473209,
+ 'upload_date': '20150501',
+ },
+ 'skip': '404 Error',
+ }, {
+ 'url': 'http://www.bt.no/spesial/vestlendingen/#!/86255',
+ 'md5': 'a2893f8632e96389f4bdf36aa9463ceb',
+ 'info_dict': {
+ 'id': '86255',
+ 'ext': 'mov',
+ 'title': 'Du må tåle å fryse og være sulten',
+ 'description': 'md5:b8046f4d022d5830ddab04865791d063',
+ 'upload_date': '20150321',
+ 'timestamp': 1426942023,
+ },
+ }]
+
+ def _real_extract(self, url):
+ return self.url_result('bttv:%s' % self._match_id(url), 'VGTV')
diff --git a/yt_dlp/extractor/vh1.py b/yt_dlp/extractor/vh1.py
new file mode 100644
index 0000000..41b8a46
--- /dev/null
+++ b/yt_dlp/extractor/vh1.py
@@ -0,0 +1,33 @@
+from .mtv import MTVServicesInfoExtractor
+
+# TODO Remove - Reason: Outdated Site
+
+
+class VH1IE(MTVServicesInfoExtractor):
+ IE_NAME = 'vh1.com'
+ _FEED_URL = 'http://www.vh1.com/feeds/mrss/'
+ _TESTS = [{
+ 'url': 'https://www.vh1.com/episodes/0aqivv/nick-cannon-presents-wild-n-out-foushee-season-16-ep-12',
+ 'info_dict': {
+ 'title': 'Fousheé',
+ 'description': 'Fousheé joins Team Evolutions fight against Nick and Team Revolution in Baby Daddy, Baby Mama; Kick Em Out the Classroom; Backseat of My Ride and Wildstyle; and Fousheé performs.',
+ },
+ 'playlist_mincount': 4,
+ 'skip': '404 Not found',
+ }, {
+ # Clip
+ 'url': 'https://www.vh1.com/video-clips/e0sja0/nick-cannon-presents-wild-n-out-foushee-clap-for-him',
+ 'info_dict': {
+ 'id': 'a07563f7-a37b-4e7f-af68-85855c2c7cc3',
+ 'ext': 'mp4',
+ 'title': 'Fousheé - "clap for him"',
+ 'description': 'Singer Fousheé hits the Wild N Out: In the Dark stage with a performance of the tongue-in-cheek track "clap for him" from her 2021 album "time machine."',
+ 'upload_date': '20210826',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }]
+
+ _VALID_URL = r'https?://(?:www\.)?vh1\.com/(?:video-clips|episodes)/(?P<id>[^/?#.]+)'
diff --git a/yt_dlp/extractor/vice.py b/yt_dlp/extractor/vice.py
new file mode 100644
index 0000000..d31908f
--- /dev/null
+++ b/yt_dlp/extractor/vice.py
@@ -0,0 +1,313 @@
+import functools
+import hashlib
+import json
+import random
+import time
+
+from .adobepass import AdobePassIE
+from .common import InfoExtractor
+from .youtube import YoutubeIE
+from ..compat import compat_str
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ clean_html,
+ ExtractorError,
+ int_or_none,
+ OnDemandPagedList,
+ parse_age_limit,
+ str_or_none,
+ try_get,
+)
+
+
+class ViceBaseIE(InfoExtractor):
+ def _call_api(self, resource, resource_key, resource_id, locale, fields, args=''):
+ return self._download_json(
+ 'https://video.vice.com/api/v1/graphql', resource_id, query={
+ 'query': '''{
+ %s(locale: "%s", %s: "%s"%s) {
+ %s
+ }
+}''' % (resource, locale, resource_key, resource_id, args, fields),
+ })['data'][resource]
+
+
+class ViceIE(ViceBaseIE, AdobePassIE):
+ IE_NAME = 'vice'
+ _VALID_URL = r'https?://(?:(?:video|vms)\.vice|(?:www\.)?vice(?:land|tv))\.com/(?P<locale>[^/]+)/(?:video/[^/]+|embed)/(?P<id>[\da-f]{24})'
+ _EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=["\'](?P<url>(?:https?:)?//video\.vice\.com/[^/]+/embed/[\da-f]{24})']
+ _TESTS = [{
+ 'url': 'https://video.vice.com/en_us/video/pet-cremator/58c69e38a55424f1227dc3f7',
+ 'info_dict': {
+ 'id': '58c69e38a55424f1227dc3f7',
+ 'ext': 'mp4',
+ 'title': '10 Questions You Always Wanted To Ask: Pet Cremator',
+ 'description': 'md5:fe856caacf61fe0e74fab15ce2b07ca5',
+ 'uploader': 'vice',
+ 'uploader_id': '57a204088cb727dec794c67b',
+ 'timestamp': 1489664942,
+ 'upload_date': '20170316',
+ 'age_limit': 14,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ # geo restricted to US
+ 'url': 'https://video.vice.com/en_us/video/the-signal-from-tolva/5816510690b70e6c5fd39a56',
+ 'info_dict': {
+ 'id': '5816510690b70e6c5fd39a56',
+ 'ext': 'mp4',
+ 'uploader': 'vice',
+ 'title': 'The Signal From Tölva',
+ 'description': 'md5:3927e3c79f9e8094606a2b3c5b5e55d5',
+ 'uploader_id': '57a204088cb727dec794c67b',
+ 'timestamp': 1477941983,
+ 'upload_date': '20161031',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://video.vice.com/alps/video/ulfs-wien-beruchtigste-grafitti-crew-part-1/581b12b60a0e1f4c0fb6ea2f',
+ 'info_dict': {
+ 'id': '581b12b60a0e1f4c0fb6ea2f',
+ 'ext': 'mp4',
+ 'title': 'ULFs - Wien berüchtigste Grafitti Crew - Part 1',
+ 'description': 'Zwischen Hinterzimmer-Tattoos und U-Bahnschächten erzählen uns die Ulfs, wie es ist, "süchtig nach Sachbeschädigung" zu sein.',
+ 'uploader': 'vice',
+ 'uploader_id': '57a204088cb727dec794c67b',
+ 'timestamp': 1485368119,
+ 'upload_date': '20170125',
+ 'age_limit': 14,
+ },
+ 'params': {
+ # AES-encrypted m3u8
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://video.vice.com/en_us/video/pizza-show-trailer/56d8c9a54d286ed92f7f30e4',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.vice.com/en_us/embed/57f41d3556a0a80f54726060',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://vms.vice.com/en_us/video/preplay/58c69e38a55424f1227dc3f7',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.viceland.com/en_us/video/thursday-march-1-2018/5a8f2d7ff1cdb332dd446ec1',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ locale, video_id = self._match_valid_url(url).groups()
+
+ video = self._call_api('videos', 'id', video_id, locale, '''body
+ locked
+ rating
+ thumbnail_url
+ title''')[0]
+ title = video['title'].strip()
+ rating = video.get('rating')
+
+ query = {}
+ if video.get('locked'):
+ resource = self._get_mvpd_resource(
+ 'VICELAND', title, video_id, rating)
+ query['tvetoken'] = self._extract_mvpd_auth(
+ url, video_id, 'VICELAND', resource)
+
+ # signature generation algorithm is reverse engineered from signatureGenerator in
+ # webpack:///../shared/~/vice-player/dist/js/vice-player.js in
+ # https://www.viceland.com/assets/common/js/web.vendor.bundle.js
+ # new JS is located here https://vice-web-statics-cdn.vice.com/vice-player/player-embed.js
+ exp = int(time.time()) + 1440
+
+ query.update({
+ 'exp': exp,
+ 'sign': hashlib.sha512(('%s:GET:%d' % (video_id, exp)).encode()).hexdigest(),
+ 'skipadstitching': 1,
+ 'platform': 'desktop',
+ 'rn': random.randint(10000, 100000),
+ })
+
+ try:
+ preplay = self._download_json(
+ 'https://vms.vice.com/%s/video/preplay/%s' % (locale, video_id),
+ video_id, query=query)
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status in (400, 401):
+ error = json.loads(e.cause.response.read().decode())
+ error_message = error.get('error_description') or error['details']
+ raise ExtractorError('%s said: %s' % (
+ self.IE_NAME, error_message), expected=True)
+ raise
+
+ video_data = preplay['video']
+ formats = self._extract_m3u8_formats(
+ preplay['playURL'], video_id, 'mp4', 'm3u8_native')
+ episode = video_data.get('episode') or {}
+ channel = video_data.get('channel') or {}
+ season = video_data.get('season') or {}
+
+ subtitles = {}
+ for subtitle in preplay.get('subtitleURLs', []):
+ cc_url = subtitle.get('url')
+ if not cc_url:
+ continue
+ language_code = try_get(subtitle, lambda x: x['languages'][0]['language_code'], compat_str) or 'en'
+ subtitles.setdefault(language_code, []).append({
+ 'url': cc_url,
+ })
+
+ return {
+ 'formats': formats,
+ 'id': video_id,
+ 'title': title,
+ 'description': clean_html(video.get('body')),
+ 'thumbnail': video.get('thumbnail_url'),
+ 'duration': int_or_none(video_data.get('video_duration')),
+ 'timestamp': int_or_none(video_data.get('created_at'), 1000),
+ 'age_limit': parse_age_limit(video_data.get('video_rating') or rating),
+ 'series': try_get(video_data, lambda x: x['show']['base']['display_title'], compat_str),
+ 'episode_number': int_or_none(episode.get('episode_number')),
+ 'episode_id': str_or_none(episode.get('id') or video_data.get('episode_id')),
+ 'season_number': int_or_none(season.get('season_number')),
+ 'season_id': str_or_none(season.get('id') or video_data.get('season_id')),
+ 'uploader': channel.get('name'),
+ 'uploader_id': str_or_none(channel.get('id')),
+ 'subtitles': subtitles,
+ }
+
+
+class ViceShowIE(ViceBaseIE):
+ IE_NAME = 'vice:show'
+ _VALID_URL = r'https?://(?:video\.vice|(?:www\.)?vice(?:land|tv))\.com/(?P<locale>[^/]+)/show/(?P<id>[^/?#&]+)'
+ _PAGE_SIZE = 25
+ _TESTS = [{
+ 'url': 'https://video.vice.com/en_us/show/fck-thats-delicious',
+ 'info_dict': {
+ 'id': '57a2040c8cb727dec794c901',
+ 'title': 'F*ck, That’s Delicious',
+ 'description': 'The life and eating habits of rap’s greatest bon vivant, Action Bronson.',
+ },
+ 'playlist_mincount': 64,
+ }, {
+ 'url': 'https://www.vicetv.com/en_us/show/fck-thats-delicious',
+ 'only_matching': True,
+ }]
+
+ def _fetch_page(self, locale, show_id, page):
+ videos = self._call_api('videos', 'show_id', show_id, locale, '''body
+ id
+ url''', ', page: %d, per_page: %d' % (page + 1, self._PAGE_SIZE))
+ for video in videos:
+ yield self.url_result(
+ video['url'], ViceIE.ie_key(), video.get('id'))
+
+ def _real_extract(self, url):
+ locale, display_id = self._match_valid_url(url).groups()
+ show = self._call_api('shows', 'slug', display_id, locale, '''dek
+ id
+ title''')[0]
+ show_id = show['id']
+
+ entries = OnDemandPagedList(
+ functools.partial(self._fetch_page, locale, show_id),
+ self._PAGE_SIZE)
+
+ return self.playlist_result(
+ entries, show_id, show.get('title'), show.get('dek'))
+
+
+class ViceArticleIE(ViceBaseIE):
+ IE_NAME = 'vice:article'
+ _VALID_URL = r'https?://(?:www\.)?vice\.com/(?P<locale>[^/]+)/article/(?:[0-9a-z]{6}/)?(?P<id>[^?#]+)'
+
+ _TESTS = [{
+ 'url': 'https://www.vice.com/en_us/article/on-set-with-the-woman-making-mormon-porn-in-utah',
+ 'info_dict': {
+ 'id': '58dc0a3dee202d2a0ccfcbd8',
+ 'ext': 'mp4',
+ 'title': 'Mormon War on Porn',
+ 'description': 'md5:1c5d91fe25fa8aa304f9def118b92dbf',
+ 'uploader': 'vice',
+ 'uploader_id': '57a204088cb727dec794c67b',
+ 'timestamp': 1491883129,
+ 'upload_date': '20170411',
+ 'age_limit': 17,
+ },
+ 'params': {
+ # AES-encrypted m3u8
+ 'skip_download': True,
+ },
+ 'add_ie': [ViceIE.ie_key()],
+ }, {
+ 'url': 'https://www.vice.com/en_us/article/how-to-hack-a-car',
+ 'md5': '13010ee0bc694ea87ec40724397c2349',
+ 'info_dict': {
+ 'id': '3jstaBeXgAs',
+ 'ext': 'mp4',
+ 'title': 'How to Hack a Car: Phreaked Out (Episode 2)',
+ 'description': 'md5:ee95453f7ff495db8efe14ae8bf56f30',
+ 'uploader': 'Motherboard',
+ 'uploader_id': 'MotherboardTV',
+ 'upload_date': '20140529',
+ },
+ 'add_ie': [YoutubeIE.ie_key()],
+ }, {
+ 'url': 'https://www.vice.com/en_us/article/znm9dx/karley-sciortino-slutever-reloaded',
+ 'md5': 'a7ecf64ee4fa19b916c16f4b56184ae2',
+ 'info_dict': {
+ 'id': '57f41d3556a0a80f54726060',
+ 'ext': 'mp4',
+ 'title': "Making The World's First Male Sex Doll",
+ 'description': 'md5:19b00b215b99961cf869c40fbe9df755',
+ 'uploader': 'vice',
+ 'uploader_id': '57a204088cb727dec794c67b',
+ 'timestamp': 1476919911,
+ 'upload_date': '20161019',
+ 'age_limit': 17,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [ViceIE.ie_key()],
+ }, {
+ 'url': 'https://www.vice.com/en_us/article/cowboy-capitalists-part-1',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.vice.com/ru/article/big-night-out-ibiza-clive-martin-229',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ locale, display_id = self._match_valid_url(url).groups()
+
+ article = self._call_api('articles', 'slug', display_id, locale, '''body
+ embed_code''')[0]
+ body = article['body']
+
+ def _url_res(video_url, ie_key):
+ return {
+ '_type': 'url_transparent',
+ 'url': video_url,
+ 'display_id': display_id,
+ 'ie_key': ie_key,
+ }
+
+ vice_url = ViceIE._extract_url(body)
+ if vice_url:
+ return _url_res(vice_url, ViceIE.ie_key())
+
+ youtube_url = YoutubeIE._extract_url(body)
+ if youtube_url:
+ return _url_res(youtube_url, YoutubeIE.ie_key())
+
+ video_url = self._html_search_regex(
+ r'data-video-url="([^"]+)"',
+ article['embed_code'], 'video URL')
+
+ return _url_res(video_url, ViceIE.ie_key())
diff --git a/yt_dlp/extractor/viddler.py b/yt_dlp/extractor/viddler.py
new file mode 100644
index 0000000..4091477
--- /dev/null
+++ b/yt_dlp/extractor/viddler.py
@@ -0,0 +1,135 @@
+from .common import InfoExtractor
+from ..utils import (
+ float_or_none,
+ int_or_none,
+)
+
+
+class ViddlerIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?viddler\.com/(?:v|embed|player)/(?P<id>[a-z0-9]+)(?:.+?\bsecret=(\d+))?'
+ _EMBED_REGEX = [r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1']
+
+ _TESTS = [{
+ 'url': 'http://www.viddler.com/v/43903784',
+ 'md5': '9eee21161d2c7f5b39690c3e325fab2f',
+ 'info_dict': {
+ 'id': '43903784',
+ 'ext': 'mov',
+ 'title': 'Video Made Easy',
+ 'description': 'md5:6a697ebd844ff3093bd2e82c37b409cd',
+ 'uploader': 'viddler',
+ 'timestamp': 1335371429,
+ 'upload_date': '20120425',
+ 'duration': 100.89,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'view_count': int,
+ 'comment_count': int,
+ 'categories': ['video content', 'high quality video', 'video made easy', 'how to produce video with limited resources', 'viddler'],
+ }
+ }, {
+ 'url': 'http://www.viddler.com/v/4d03aad9/',
+ 'md5': 'f12c5a7fa839c47a79363bfdf69404fb',
+ 'info_dict': {
+ 'id': '4d03aad9',
+ 'ext': 'ts',
+ 'title': 'WALL-TO-GORTAT',
+ 'upload_date': '20150126',
+ 'uploader': 'deadspin',
+ 'timestamp': 1422285291,
+ 'view_count': int,
+ 'comment_count': int,
+ }
+ }, {
+ 'url': 'http://www.viddler.com/player/221ebbbd/0/',
+ 'md5': '740511f61d3d1bb71dc14a0fe01a1c10',
+ 'info_dict': {
+ 'id': '221ebbbd',
+ 'ext': 'mov',
+ 'title': 'LETeens-Grammar-snack-third-conditional',
+ 'description': ' ',
+ 'upload_date': '20140929',
+ 'uploader': 'BCLETeens',
+ 'timestamp': 1411997190,
+ 'view_count': int,
+ 'comment_count': int,
+ }
+ }, {
+ # secret protected
+ 'url': 'http://www.viddler.com/v/890c0985?secret=34051570',
+ 'info_dict': {
+ 'id': '890c0985',
+ 'ext': 'mp4',
+ 'title': 'Complete Property Training - Traineeships',
+ 'description': ' ',
+ 'upload_date': '20130606',
+ 'uploader': 'TiffanyBowtell',
+ 'timestamp': 1370496993,
+ 'view_count': int,
+ 'comment_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id, secret = self._match_valid_url(url).groups()
+
+ query = {
+ 'video_id': video_id,
+ 'key': 'v0vhrt7bg2xq1vyxhkct',
+ }
+ if secret:
+ query['secret'] = secret
+
+ data = self._download_json(
+ 'http://api.viddler.com/api/v2/viddler.videos.getPlaybackDetails.json',
+ video_id, headers={'Referer': url}, query=query)['video']
+
+ formats = []
+ for filed in data['files']:
+ if filed.get('status', 'ready') != 'ready':
+ continue
+ format_id = filed.get('profile_id') or filed['profile_name']
+ f = {
+ 'format_id': format_id,
+ 'format_note': filed['profile_name'],
+ 'url': self._proto_relative_url(filed['url']),
+ 'width': int_or_none(filed.get('width')),
+ 'height': int_or_none(filed.get('height')),
+ 'filesize': int_or_none(filed.get('size')),
+ 'ext': filed.get('ext'),
+ 'source_preference': -1,
+ }
+ formats.append(f)
+
+ if filed.get('cdn_url'):
+ f = f.copy()
+ f['url'] = self._proto_relative_url(filed['cdn_url'], 'http:')
+ f['format_id'] = format_id + '-cdn'
+ f['source_preference'] = 1
+ formats.append(f)
+
+ if filed.get('html5_video_source'):
+ f = f.copy()
+ f['url'] = self._proto_relative_url(filed['html5_video_source'])
+ f['format_id'] = format_id + '-html5'
+ f['source_preference'] = 0
+ formats.append(f)
+
+ categories = [
+ t.get('text') for t in data.get('tags', []) if 'text' in t]
+
+ return {
+ 'id': video_id,
+ 'title': data['title'],
+ 'formats': formats,
+ 'description': data.get('description'),
+ 'timestamp': int_or_none(data.get('upload_time')),
+ 'thumbnail': self._proto_relative_url(data.get('thumbnail_url')),
+ 'uploader': data.get('author'),
+ 'duration': float_or_none(data.get('length')),
+ 'view_count': int_or_none(data.get('view_count')),
+ 'comment_count': int_or_none(data.get('comment_count')),
+ 'categories': categories,
+ }
diff --git a/yt_dlp/extractor/videa.py b/yt_dlp/extractor/videa.py
new file mode 100644
index 0000000..634d2ed
--- /dev/null
+++ b/yt_dlp/extractor/videa.py
@@ -0,0 +1,188 @@
+import random
+import string
+import struct
+
+from .common import InfoExtractor
+from ..compat import compat_b64decode, compat_ord
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ mimetype2ext,
+ parse_codecs,
+ parse_qs,
+ update_url_query,
+ urljoin,
+ xpath_element,
+ xpath_text,
+)
+
+
+class VideaIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ videa(?:kid)?\.hu/
+ (?:
+ videok/(?:[^/]+/)*[^?#&]+-|
+ (?:videojs_)?player\?.*?\bv=|
+ player/v/
+ )
+ (?P<id>[^?#&]+)
+ '''
+ _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//videa\.hu/player\?.*?\bv=.+?)\1']
+ _TESTS = [{
+ 'url': 'http://videa.hu/videok/allatok/az-orult-kigyasz-285-kigyot-kigyo-8YfIAjxwWGwT8HVQ',
+ 'md5': '97a7af41faeaffd9f1fc864a7c7e7603',
+ 'info_dict': {
+ 'id': '8YfIAjxwWGwT8HVQ',
+ 'ext': 'mp4',
+ 'title': 'Az őrült kígyász 285 kígyót enged szabadon',
+ 'thumbnail': r're:^https?://.*',
+ 'duration': 21,
+ 'age_limit': 0,
+ },
+ }, {
+ 'url': 'http://videa.hu/videok/origo/jarmuvek/supercars-elozes-jAHDWfWSJH5XuFhH',
+ 'md5': 'd57ccd8812c7fd491d33b1eab8c99975',
+ 'info_dict': {
+ 'id': 'jAHDWfWSJH5XuFhH',
+ 'ext': 'mp4',
+ 'title': 'Supercars előzés',
+ 'thumbnail': r're:^https?://.*',
+ 'duration': 64,
+ 'age_limit': 0,
+ },
+ }, {
+ 'url': 'http://videa.hu/player?v=8YfIAjxwWGwT8HVQ',
+ 'md5': '97a7af41faeaffd9f1fc864a7c7e7603',
+ 'info_dict': {
+ 'id': '8YfIAjxwWGwT8HVQ',
+ 'ext': 'mp4',
+ 'title': 'Az őrült kígyász 285 kígyót enged szabadon',
+ 'thumbnail': r're:^https?://.*',
+ 'duration': 21,
+ 'age_limit': 0,
+ },
+ }, {
+ 'url': 'http://videa.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://videakid.hu/videok/origo/jarmuvek/supercars-elozes-jAHDWfWSJH5XuFhH',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://videakid.hu/player?v=8YfIAjxwWGwT8HVQ',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://videakid.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1',
+ 'only_matching': True,
+ }]
+ _STATIC_SECRET = 'xHb0ZvME5q8CBcoQi6AngerDu3FGO9fkUlwPmLVY_RTzj2hJIS4NasXWKy1td7p'
+
+ @staticmethod
+ def rc4(cipher_text, key):
+ res = b''
+
+ key_len = len(key)
+ S = list(range(256))
+
+ j = 0
+ for i in range(256):
+ j = (j + S[i] + ord(key[i % key_len])) % 256
+ S[i], S[j] = S[j], S[i]
+
+ i = 0
+ j = 0
+ for m in range(len(cipher_text)):
+ i = (i + 1) % 256
+ j = (j + S[i]) % 256
+ S[i], S[j] = S[j], S[i]
+ k = S[(S[i] + S[j]) % 256]
+ res += struct.pack('B', k ^ compat_ord(cipher_text[m]))
+
+ return res.decode()
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video_page = self._download_webpage(url, video_id)
+
+ if 'videa.hu/player' in url:
+ player_url = url
+ player_page = video_page
+ else:
+ player_url = self._search_regex(
+ r'<iframe.*?src="(/player\?[^"]+)"', video_page, 'player url')
+ player_url = urljoin(url, player_url)
+ player_page = self._download_webpage(player_url, video_id)
+
+ nonce = self._search_regex(
+ r'_xt\s*=\s*"([^"]+)"', player_page, 'nonce')
+ l = nonce[:32]
+ s = nonce[32:]
+ result = ''
+ for i in range(0, 32):
+ result += s[i - (self._STATIC_SECRET.index(l[i]) - 31)]
+
+ query = parse_qs(player_url)
+ random_seed = ''.join(random.choices(string.ascii_letters + string.digits, k=8))
+ query['_s'] = random_seed
+ query['_t'] = result[:16]
+
+ b64_info, handle = self._download_webpage_handle(
+ 'http://videa.hu/player/xml', video_id, query=query)
+ if b64_info.startswith('<?xml'):
+ info = self._parse_xml(b64_info, video_id)
+ else:
+ key = result[16:] + random_seed + handle.headers['x-videa-xs']
+ info = self._parse_xml(self.rc4(
+ compat_b64decode(b64_info), key), video_id)
+
+ video = xpath_element(info, './video', 'video')
+ if video is None:
+ raise ExtractorError(xpath_element(
+ info, './error', fatal=True), expected=True)
+ sources = xpath_element(
+ info, './video_sources', 'sources', fatal=True)
+ hash_values = xpath_element(
+ info, './hash_values', 'hash values', fatal=False)
+
+ title = xpath_text(video, './title', fatal=True)
+
+ formats = []
+ for source in sources.findall('./video_source'):
+ source_url = source.text
+ source_name = source.get('name')
+ source_exp = source.get('exp')
+ if not (source_url and source_name):
+ continue
+ hash_value = (
+ xpath_text(hash_values, 'hash_value_' + source_name)
+ if hash_values is not None else None)
+ if hash_value and source_exp:
+ source_url = update_url_query(source_url, {
+ 'md5': hash_value,
+ 'expires': source_exp,
+ })
+ f = parse_codecs(source.get('codecs'))
+ f.update({
+ 'url': self._proto_relative_url(source_url),
+ 'ext': mimetype2ext(source.get('mimetype')) or 'mp4',
+ 'format_id': source.get('name'),
+ 'width': int_or_none(source.get('width')),
+ 'height': int_or_none(source.get('height')),
+ })
+ formats.append(f)
+
+ thumbnail = self._proto_relative_url(xpath_text(video, './poster_src'))
+
+ age_limit = None
+ is_adult = xpath_text(video, './is_adult_content', default=None)
+ if is_adult:
+ age_limit = 18 if is_adult == '1' else 0
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': int_or_none(xpath_text(video, './duration')),
+ 'age_limit': age_limit,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/videocampus_sachsen.py b/yt_dlp/extractor/videocampus_sachsen.py
new file mode 100644
index 0000000..37bc7d7
--- /dev/null
+++ b/yt_dlp/extractor/videocampus_sachsen.py
@@ -0,0 +1,253 @@
+import functools
+import re
+
+from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
+from ..utils import ExtractorError, OnDemandPagedList, urlencode_postdata
+
+
+class VideocampusSachsenIE(InfoExtractor):
+ IE_NAME = 'ViMP'
+ _INSTANCES = (
+ 'bergauf.tv',
+ 'campus.demo.vimp.com',
+ 'corporate.demo.vimp.com',
+ 'dancehalldatabase.com',
+ 'drehzahl.tv',
+ 'educhannel.hs-gesundheit.de',
+ 'emedia.ls.haw-hamburg.de',
+ 'globale-evolution.net',
+ 'hohu.tv',
+ 'htvideos.hightechhigh.org',
+ 'k210039.vimp.mivitec.net',
+ 'media.cmslegal.com',
+ 'media.hs-furtwangen.de',
+ 'media.hwr-berlin.de',
+ 'mediathek.dkfz.de',
+ 'mediathek.htw-berlin.de',
+ 'mediathek.polizei-bw.de',
+ 'medien.hs-merseburg.de',
+ 'mportal.europa-uni.de',
+ 'pacific.demo.vimp.com',
+ 'slctv.com',
+ 'streaming.prairiesouth.ca',
+ 'tube.isbonline.cn',
+ 'univideo.uni-kassel.de',
+ 'ursula2.genetics.emory.edu',
+ 'ursulablicklevideoarchiv.com',
+ 'v.agrarumweltpaedagogik.at',
+ 'video.eplay-tv.de',
+ 'video.fh-dortmund.de',
+ 'video.hs-offenburg.de',
+ 'video.hs-pforzheim.de',
+ 'video.hspv.nrw.de',
+ 'video.irtshdf.fr',
+ 'video.pareygo.de',
+ 'video.tu-freiberg.de',
+ 'videocampus.sachsen.de',
+ 'videoportal.uni-freiburg.de',
+ 'videoportal.vm.uni-freiburg.de',
+ 'videos.duoc.cl',
+ 'videos.uni-paderborn.de',
+ 'vimp-bemus.udk-berlin.de',
+ 'vimp.aekwl.de',
+ 'vimp.hs-mittweida.de',
+ 'vimp.oth-regensburg.de',
+ 'vimp.ph-heidelberg.de',
+ 'vimp.sma-events.com',
+ 'vimp.weka-fachmedien.de',
+ 'webtv.univ-montp3.fr',
+ 'www.b-tu.de/media',
+ 'www.bergauf.tv',
+ 'www.bigcitytv.de',
+ 'www.cad-videos.de',
+ 'www.drehzahl.tv',
+ 'www.fh-bielefeld.de/medienportal',
+ 'www.hohu.tv',
+ 'www.orvovideo.com',
+ 'www.rwe.tv',
+ 'www.salzi.tv',
+ 'www.wenglor-media.com',
+ 'www2.univ-sba.dz',
+ )
+ _VALID_URL = r'''(?x)https?://(?P<host>%s)/(?:
+ m/(?P<tmp_id>[0-9a-f]+)|
+ (?:category/)?video/(?P<display_id>[\w-]+)/(?P<id>[0-9a-f]{32})|
+ media/embed.*(?:\?|&)key=(?P<embed_id>[0-9a-f]{32}&?)
+ )''' % ('|'.join(map(re.escape, _INSTANCES)))
+
+ _TESTS = [
+ {
+ 'url': 'https://videocampus.sachsen.de/m/e0d6c8ce6e394c188f1342f1ab7c50ed6fc4490b808699801def5cb2e46d76ca7367f622a9f516c542ffb805b24d6b643bd7c81f385acaac4c59081b87a2767b',
+ 'info_dict': {
+ 'id': 'e6b9349905c1628631f175712250f2a1',
+ 'title': 'Konstruktiver Entwicklungsprozess Vorlesung 7',
+ 'description': 'Konstruktiver Entwicklungsprozess Vorlesung 7',
+ 'thumbnail': 'https://videocampus.sachsen.de/cache/1a985379ad3aecba8097a6902c7daa4e.jpg',
+ 'ext': 'mp4',
+ },
+ },
+ {
+ 'url': 'https://videocampus.sachsen.de/video/Was-ist-selbstgesteuertes-Lernen/fc99c527e4205b121cb7c74433469262',
+ 'info_dict': {
+ 'id': 'fc99c527e4205b121cb7c74433469262',
+ 'title': 'Was ist selbstgesteuertes Lernen?',
+ 'description': 'md5:196aa3b0509a526db62f84679522a2f5',
+ 'thumbnail': 'https://videocampus.sachsen.de/cache/6f4a85096ba24cb398e6ce54446b57ae.jpg',
+ 'display_id': 'Was-ist-selbstgesteuertes-Lernen',
+ 'ext': 'mp4',
+ },
+ },
+ {
+ 'url': 'https://videocampus.sachsen.de/category/video/Tutorial-zur-Nutzung-von-Adobe-Connect-aus-Veranstalter-Sicht/09d4ed029002eb1bdda610f1103dd54c/100',
+ 'info_dict': {
+ 'id': '09d4ed029002eb1bdda610f1103dd54c',
+ 'title': 'Tutorial zur Nutzung von Adobe Connect aus Veranstalter-Sicht',
+ 'description': 'md5:3d379ca3cc17b9da6784d7f58cca4d58',
+ 'thumbnail': 'https://videocampus.sachsen.de/cache/2452498fe8c2d5a7dc79a05d30f407b6.jpg',
+ 'display_id': 'Tutorial-zur-Nutzung-von-Adobe-Connect-aus-Veranstalter-Sicht',
+ 'ext': 'mp4',
+ },
+ },
+ {
+ 'url': 'https://www2.univ-sba.dz/video/Presentation-de-la-Faculte-de-droit-et-des-sciences-politiques-Journee-portes-ouvertes-202122/0183356e41af7bfb83d7667b20d9b6a3',
+ 'info_dict': {
+ 'url': 'https://www2.univ-sba.dz/getMedium/0183356e41af7bfb83d7667b20d9b6a3.mp4',
+ 'id': '0183356e41af7bfb83d7667b20d9b6a3',
+ 'title': 'Présentation de la Faculté de droit et des sciences politiques - Journée portes ouvertes 2021/22',
+ 'description': 'md5:508958bd93e0ca002ac731d94182a54f',
+ 'thumbnail': 'https://www2.univ-sba.dz/cache/4d5d4a0b4189271a8cc6cb5328e14769.jpg',
+ 'display_id': 'Presentation-de-la-Faculte-de-droit-et-des-sciences-politiques-Journee-portes-ouvertes-202122',
+ 'ext': 'mp4',
+ }
+ },
+ {
+ 'url': 'https://vimp.weka-fachmedien.de/video/Preisverleihung-Produkte-des-Jahres-2022/c8816f1cc942c12b6cce57c835cffd7c',
+ 'info_dict': {
+ 'id': 'c8816f1cc942c12b6cce57c835cffd7c',
+ 'title': 'Preisverleihung »Produkte des Jahres 2022«',
+ 'description': 'md5:60c347568ca89aa25b772c4ea564ebd3',
+ 'thumbnail': 'https://vimp.weka-fachmedien.de/cache/da9f3090e9227b25beacf67ccf94de14.png',
+ 'display_id': 'Preisverleihung-Produkte-des-Jahres-2022',
+ 'ext': 'mp4',
+ },
+ },
+ {
+ 'url': 'https://videocampus.sachsen.de/media/embed?key=fc99c527e4205b121cb7c74433469262',
+ 'info_dict': {
+ 'id': 'fc99c527e4205b121cb7c74433469262',
+ 'title': 'Was ist selbstgesteuertes Lernen?',
+ 'ext': 'mp4',
+ },
+ },
+ ]
+
+ def _real_extract(self, url):
+ host, video_id, tmp_id, display_id, embed_id = self._match_valid_url(url).group(
+ 'host', 'id', 'tmp_id', 'display_id', 'embed_id')
+ webpage = self._download_webpage(url, video_id or tmp_id, fatal=False) or ''
+
+ if not video_id:
+ video_id = embed_id or self._html_search_regex(
+ rf'src="https?://{host}/media/embed.*(?:\?|&)key=([0-9a-f]+)&?',
+ webpage, 'video_id')
+
+ if not (display_id or tmp_id):
+ # Title, description from embedded page's meta wouldn't be correct
+ title = self._html_search_regex(r'<video-js[^>]* data-piwik-title="([^"<]+)"', webpage, 'title', fatal=False)
+ description = None
+ thumbnail = None
+ else:
+ title = self._html_search_meta(('og:title', 'twitter:title', 'title'), webpage, fatal=False)
+ description = self._html_search_meta(
+ ('og:description', 'twitter:description', 'description'), webpage, fatal=False)
+ thumbnail = self._html_search_meta(('og:image', 'twitter:image'), webpage, fatal=False)
+
+ formats, subtitles = [], {}
+ try:
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ f'https://{host}/media/hlsMedium/key/{video_id}/format/auto/ext/mp4/learning/0/path/m3u8',
+ video_id, 'mp4', m3u8_id='hls', fatal=True)
+ except ExtractorError as e:
+ if not isinstance(e.cause, HTTPError) or e.cause.status not in (404, 500):
+ raise
+
+ formats.append({'url': f'https://{host}/getMedium/{video_id}.mp4'})
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'display_id': display_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+
+class ViMPPlaylistIE(InfoExtractor):
+ IE_NAME = 'ViMP:Playlist'
+ _VALID_URL = r'''(?x)(?P<host>https?://(?:%s))/(?:
+ album/view/aid/(?P<album_id>[0-9]+)|
+ (?P<mode>category|channel)/(?P<name>[\w-]+)/(?P<id>[0-9]+)
+ )''' % '|'.join(map(re.escape, VideocampusSachsenIE._INSTANCES))
+
+ _TESTS = [{
+ 'url': 'https://vimp.oth-regensburg.de/channel/Designtheorie-1-SoSe-2020/3',
+ 'info_dict': {
+ 'id': 'channel-3',
+ 'title': 'Designtheorie 1 SoSe 2020 :: Channels :: ViMP OTH Regensburg',
+ },
+ 'playlist_mincount': 9,
+ }, {
+ 'url': 'https://www.fh-bielefeld.de/medienportal/album/view/aid/208',
+ 'info_dict': {
+ 'id': 'album-208',
+ 'title': 'KG Praktikum ABT/MEC :: Playlists :: FH-Medienportal',
+ },
+ 'playlist_mincount': 4,
+ }, {
+ 'url': 'https://videocampus.sachsen.de/category/online-tutorials-onyx/91',
+ 'info_dict': {
+ 'id': 'category-91',
+ 'title': 'Online-Seminare ONYX - BPS - Bildungseinrichtungen - VCS',
+ },
+ 'playlist_mincount': 7,
+ }]
+ _PAGE_SIZE = 10
+
+ def _fetch_page(self, host, url_part, id, data, page):
+ webpage = self._download_webpage(
+ f'{host}/media/ajax/component/boxList/{url_part}', id,
+ query={'page': page, 'page_only': 1}, data=urlencode_postdata(data))
+ urls = re.findall(r'"([^"]+/video/[^"]+)"', webpage)
+
+ for url in urls:
+ yield self.url_result(host + url, VideocampusSachsenIE)
+
+ def _real_extract(self, url):
+ host, album_id, mode, name, id = self._match_valid_url(url).group(
+ 'host', 'album_id', 'mode', 'name', 'id')
+
+ webpage = self._download_webpage(url, album_id or id, fatal=False) or ''
+ title = (self._html_search_meta('title', webpage, fatal=False)
+ or self._html_extract_title(webpage))
+
+ url_part = (f'aid/{album_id}' if album_id
+ else f'category/{name}/category_id/{id}' if mode == 'category'
+ else f'title/{name}/channel/{id}')
+
+ mode = mode or 'album'
+ data = {
+ 'vars[mode]': mode,
+ f'vars[{mode}]': album_id or id,
+ 'vars[context]': '4' if album_id else '1' if mode == 'category' else '3',
+ 'vars[context_id]': album_id or id,
+ 'vars[layout]': 'thumb',
+ 'vars[per_page][thumb]': str(self._PAGE_SIZE),
+ }
+
+ return self.playlist_result(
+ OnDemandPagedList(functools.partial(
+ self._fetch_page, host, url_part, album_id or id, data), self._PAGE_SIZE),
+ playlist_title=title, id=f'{mode}-{album_id or id}')
diff --git a/yt_dlp/extractor/videodetective.py b/yt_dlp/extractor/videodetective.py
new file mode 100644
index 0000000..7928a41
--- /dev/null
+++ b/yt_dlp/extractor/videodetective.py
@@ -0,0 +1,27 @@
+from .common import InfoExtractor
+from .internetvideoarchive import InternetVideoArchiveIE
+
+
+class VideoDetectiveIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?videodetective\.com/[^/]+/[^/]+/(?P<id>\d+)'
+
+ _TEST = {
+ 'url': 'http://www.videodetective.com/movies/kick-ass-2/194487',
+ 'info_dict': {
+ 'id': '194487',
+ 'ext': 'mp4',
+ 'title': 'Kick-Ass 2',
+ 'description': 'md5:c189d5b7280400630a1d3dd17eaa8d8a',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ query = 'customerid=69249&publishedid=' + video_id
+ return self.url_result(
+ InternetVideoArchiveIE._build_json_url(query),
+ ie=InternetVideoArchiveIE.ie_key())
diff --git a/yt_dlp/extractor/videofyme.py b/yt_dlp/extractor/videofyme.py
new file mode 100644
index 0000000..f1f88c4
--- /dev/null
+++ b/yt_dlp/extractor/videofyme.py
@@ -0,0 +1,51 @@
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+)
+
+
+class VideofyMeIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:www\.videofy\.me/.+?|p\.videofy\.me/v)/(?P<id>\d+)(&|#|$)'
+ IE_NAME = 'videofy.me'
+
+ _TEST = {
+ 'url': 'http://www.videofy.me/thisisvideofyme/1100701',
+ 'md5': 'c77d700bdc16ae2e9f3c26019bd96143',
+ 'info_dict': {
+ 'id': '1100701',
+ 'ext': 'mp4',
+ 'title': 'This is VideofyMe',
+ 'description': '',
+ 'upload_date': '20130326',
+ 'timestamp': 1364288959,
+ 'uploader': 'VideofyMe',
+ 'uploader_id': 'thisisvideofyme',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ config = self._download_json('http://vf-player-info-loader.herokuapp.com/%s.json' % video_id, video_id)['videoinfo']
+
+ video = config.get('video')
+ blog = config.get('blog', {})
+
+ return {
+ 'id': video_id,
+ 'title': video['title'],
+ 'url': video['sources']['source']['url'],
+ 'thumbnail': video.get('thumb'),
+ 'description': video.get('description'),
+ 'timestamp': parse_iso8601(video.get('date')),
+ 'uploader': blog.get('name'),
+ 'uploader_id': blog.get('identifier'),
+ 'view_count': int_or_none(self._search_regex(r'([0-9]+)', video.get('views'), 'view count', fatal=False)),
+ 'like_count': int_or_none(video.get('likes')),
+ 'comment_count': int_or_none(video.get('nrOfComments')),
+ }
diff --git a/yt_dlp/extractor/videoken.py b/yt_dlp/extractor/videoken.py
new file mode 100644
index 0000000..eaf0cc8
--- /dev/null
+++ b/yt_dlp/extractor/videoken.py
@@ -0,0 +1,337 @@
+import base64
+import functools
+import math
+import re
+import time
+import urllib.parse
+
+from .common import InfoExtractor
+from .slideslive import SlidesLiveIE
+from ..utils import (
+ ExtractorError,
+ InAdvancePagedList,
+ int_or_none,
+ remove_start,
+ traverse_obj,
+ update_url_query,
+ url_or_none,
+)
+
+
+class VideoKenBaseIE(InfoExtractor):
+ _ORGANIZATIONS = {
+ 'videos.icts.res.in': 'icts',
+ 'videos.cncf.io': 'cncf',
+ 'videos.neurips.cc': 'neurips',
+ }
+ _BASE_URL_RE = rf'https?://(?P<host>{"|".join(map(re.escape, _ORGANIZATIONS))})/'
+
+ _PAGE_SIZE = 12
+
+ def _get_org_id_and_api_key(self, org, video_id):
+ details = self._download_json(
+ f'https://analytics.videoken.com/api/videolake/{org}/details', video_id,
+ note='Downloading organization ID and API key', headers={
+ 'Accept': 'application/json',
+ })
+ return details['id'], details['apikey']
+
+ def _create_slideslive_url(self, video_url, video_id, referer):
+ if not video_url and not video_id:
+ return
+ elif not video_url or 'embed/sign-in' in video_url:
+ video_url = f'https://slideslive.com/embed/{remove_start(video_id, "slideslive-")}'
+ if url_or_none(referer):
+ return update_url_query(video_url, {
+ 'embed_parent_url': referer,
+ 'embed_container_origin': f'https://{urllib.parse.urlparse(referer).hostname}',
+ })
+ return video_url
+
+ def _extract_videos(self, videos, url):
+ for video in traverse_obj(videos, (('videos', 'results'), ...)):
+ video_id = traverse_obj(video, 'youtube_id', 'videoid')
+ if not video_id:
+ continue
+ ie_key = None
+ if traverse_obj(video, 'type', 'source') == 'youtube':
+ video_url = video_id
+ ie_key = 'Youtube'
+ else:
+ video_url = traverse_obj(video, 'embed_url', 'embeddableurl', expected_type=url_or_none)
+ if not video_url:
+ continue
+ elif urllib.parse.urlparse(video_url).hostname == 'slideslive.com':
+ ie_key = SlidesLiveIE
+ video_url = self._create_slideslive_url(video_url, video_id, url)
+ yield self.url_result(video_url, ie_key, video_id)
+
+
+class VideoKenIE(VideoKenBaseIE):
+ _VALID_URL = VideoKenBaseIE._BASE_URL_RE + r'(?:(?:topic|category)/[^/#?]+/)?video/(?P<id>[\w-]+)'
+ _TESTS = [{
+ # neurips -> videoken -> slideslive
+ 'url': 'https://videos.neurips.cc/video/slideslive-38922815',
+ 'info_dict': {
+ 'id': '38922815',
+ 'ext': 'mp4',
+ 'title': 'Efficient Processing of Deep Neural Network: from Algorithms to Hardware Architectures',
+ 'timestamp': 1630939331,
+ 'upload_date': '20210906',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ 'thumbnails': 'count:330',
+ 'chapters': 'count:329',
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ 'expected_warnings': ['Failed to download VideoKen API JSON'],
+ }, {
+ # neurips -> videoken -> slideslive -> youtube
+ 'url': 'https://videos.neurips.cc/topic/machine%20learning/video/slideslive-38923348',
+ 'info_dict': {
+ 'id': '2Xa_dt78rJE',
+ 'ext': 'mp4',
+ 'display_id': '38923348',
+ 'title': 'Machine Education',
+ 'description': 'Watch full version of this video at https://slideslive.com/38923348.',
+ 'channel': 'SlidesLive Videos - G2',
+ 'channel_id': 'UCOExahQQ588Da8Nft_Ltb9w',
+ 'channel_url': 'https://www.youtube.com/channel/UCOExahQQ588Da8Nft_Ltb9w',
+ 'uploader': 'SlidesLive Videos - G2',
+ 'uploader_id': 'UCOExahQQ588Da8Nft_Ltb9w',
+ 'uploader_url': 'http://www.youtube.com/channel/UCOExahQQ588Da8Nft_Ltb9w',
+ 'duration': 2504,
+ 'timestamp': 1618922125,
+ 'upload_date': '20200131',
+ 'age_limit': 0,
+ 'channel_follower_count': int,
+ 'view_count': int,
+ 'availability': 'unlisted',
+ 'live_status': 'not_live',
+ 'playable_in_embed': True,
+ 'categories': ['People & Blogs'],
+ 'tags': [],
+ 'thumbnail': r're:^https?://.*\.(?:jpg|webp)',
+ 'thumbnails': 'count:78',
+ 'chapters': 'count:77',
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ 'expected_warnings': ['Failed to download VideoKen API JSON'],
+ }, {
+ # icts -> videoken -> youtube
+ 'url': 'https://videos.icts.res.in/topic/random%20variable/video/zysIsojYdvc',
+ 'info_dict': {
+ 'id': 'zysIsojYdvc',
+ 'ext': 'mp4',
+ 'title': 'Small-worlds, complex networks and random graphs (Lecture 3) by Remco van der Hofstad',
+ 'description': 'md5:87433069d79719eeadc1962cc2ace00b',
+ 'channel': 'International Centre for Theoretical Sciences',
+ 'channel_id': 'UCO3xnVTHzB7l-nc8mABUJIQ',
+ 'channel_url': 'https://www.youtube.com/channel/UCO3xnVTHzB7l-nc8mABUJIQ',
+ 'uploader': 'International Centre for Theoretical Sciences',
+ 'uploader_id': 'ICTStalks',
+ 'uploader_url': 'http://www.youtube.com/user/ICTStalks',
+ 'duration': 3372,
+ 'upload_date': '20191004',
+ 'age_limit': 0,
+ 'live_status': 'not_live',
+ 'availability': 'public',
+ 'playable_in_embed': True,
+ 'channel_follower_count': int,
+ 'like_count': int,
+ 'view_count': int,
+ 'categories': ['Science & Technology'],
+ 'tags': [],
+ 'thumbnail': r're:^https?://.*\.(?:jpg|webp)',
+ 'thumbnails': 'count:42',
+ 'chapters': 'count:20',
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ }, {
+ 'url': 'https://videos.cncf.io/category/478/video/IL4nxbmUIX8',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://videos.cncf.io/topic/kubernetes/video/YAM2d7yTrrI',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://videos.icts.res.in/video/d7HuP_abpKU',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ hostname, video_id = self._match_valid_url(url).group('host', 'id')
+ org_id, _ = self._get_org_id_and_api_key(self._ORGANIZATIONS[hostname], video_id)
+ details = self._download_json(
+ 'https://analytics.videoken.com/api/videoinfo_private', video_id, query={
+ 'videoid': video_id,
+ 'org_id': org_id,
+ }, headers={'Accept': 'application/json'}, note='Downloading VideoKen API JSON',
+ errnote='Failed to download VideoKen API JSON', fatal=False)
+ if details:
+ return next(self._extract_videos({'videos': [details]}, url))
+ # fallback for API error 400 response
+ elif video_id.startswith('slideslive-'):
+ return self.url_result(
+ self._create_slideslive_url(None, video_id, url), SlidesLiveIE, video_id)
+ elif re.match(r'^[\w-]{11}$', video_id):
+ return self.url_result(video_id, 'Youtube', video_id)
+ else:
+ raise ExtractorError('Unable to extract without VideoKen API response')
+
+
+class VideoKenPlayerIE(VideoKenBaseIE):
+ _VALID_URL = r'https?://player\.videoken\.com/embed/slideslive-(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://player.videoken.com/embed/slideslive-38968434',
+ 'info_dict': {
+ 'id': '38968434',
+ 'ext': 'mp4',
+ 'title': 'Deep Learning with Label Differential Privacy',
+ 'timestamp': 1643377020,
+ 'upload_date': '20220128',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ 'thumbnails': 'count:30',
+ 'chapters': 'count:29',
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self.url_result(
+ self._create_slideslive_url(None, video_id, url), SlidesLiveIE, video_id)
+
+
+class VideoKenPlaylistIE(VideoKenBaseIE):
+ _VALID_URL = VideoKenBaseIE._BASE_URL_RE + r'(?:category/\d+/)?playlist/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://videos.icts.res.in/category/1822/playlist/381',
+ 'playlist_mincount': 117,
+ 'info_dict': {
+ 'id': '381',
+ 'title': 'Cosmology - The Next Decade',
+ },
+ }]
+
+ def _real_extract(self, url):
+ hostname, playlist_id = self._match_valid_url(url).group('host', 'id')
+ org_id, _ = self._get_org_id_and_api_key(self._ORGANIZATIONS[hostname], playlist_id)
+ videos = self._download_json(
+ f'https://analytics.videoken.com/api/{org_id}/playlistitems/{playlist_id}/',
+ playlist_id, headers={'Accept': 'application/json'}, note='Downloading API JSON')
+ return self.playlist_result(self._extract_videos(videos, url), playlist_id, videos.get('title'))
+
+
+class VideoKenCategoryIE(VideoKenBaseIE):
+ _VALID_URL = VideoKenBaseIE._BASE_URL_RE + r'category/(?P<id>\d+)/?(?:$|[?#])'
+ _TESTS = [{
+ 'url': 'https://videos.icts.res.in/category/1822/',
+ 'playlist_mincount': 500,
+ 'info_dict': {
+ 'id': '1822',
+ 'title': 'Programs',
+ },
+ }, {
+ 'url': 'https://videos.neurips.cc/category/350/',
+ 'playlist_mincount': 34,
+ 'info_dict': {
+ 'id': '350',
+ 'title': 'NeurIPS 2018',
+ },
+ }, {
+ 'url': 'https://videos.cncf.io/category/479/',
+ 'playlist_mincount': 328,
+ 'info_dict': {
+ 'id': '479',
+ 'title': 'KubeCon + CloudNativeCon Europe\'19',
+ },
+ }]
+
+ def _get_category_page(self, category_id, org_id, page=1, note=None):
+ return self._download_json(
+ f'https://analytics.videoken.com/api/videolake/{org_id}/category_videos', category_id,
+ fatal=False, note=note if note else f'Downloading category page {page}',
+ query={
+ 'category_id': category_id,
+ 'page_number': page,
+ 'length': self._PAGE_SIZE,
+ }, headers={'Accept': 'application/json'}) or {}
+
+ def _entries(self, category_id, org_id, url, page):
+ videos = self._get_category_page(category_id, org_id, page + 1)
+ yield from self._extract_videos(videos, url)
+
+ def _real_extract(self, url):
+ hostname, category_id = self._match_valid_url(url).group('host', 'id')
+ org_id, _ = self._get_org_id_and_api_key(self._ORGANIZATIONS[hostname], category_id)
+ category_info = self._get_category_page(category_id, org_id, note='Downloading category info')
+ category = category_info['category_name']
+ total_pages = math.ceil(int(category_info['recordsTotal']) / self._PAGE_SIZE)
+ return self.playlist_result(InAdvancePagedList(
+ functools.partial(self._entries, category_id, org_id, url),
+ total_pages, self._PAGE_SIZE), category_id, category)
+
+
+class VideoKenTopicIE(VideoKenBaseIE):
+ _VALID_URL = VideoKenBaseIE._BASE_URL_RE + r'topic/(?P<id>[^/#?]+)/?(?:$|[?#])'
+ _TESTS = [{
+ 'url': 'https://videos.neurips.cc/topic/machine%20learning/',
+ 'playlist_mincount': 500,
+ 'info_dict': {
+ 'id': 'machine_learning',
+ 'title': 'machine learning',
+ },
+ }, {
+ 'url': 'https://videos.icts.res.in/topic/gravitational%20waves/',
+ 'playlist_mincount': 77,
+ 'info_dict': {
+ 'id': 'gravitational_waves',
+ 'title': 'gravitational waves'
+ },
+ }, {
+ 'url': 'https://videos.cncf.io/topic/prometheus/',
+ 'playlist_mincount': 134,
+ 'info_dict': {
+ 'id': 'prometheus',
+ 'title': 'prometheus',
+ },
+ }]
+
+ def _get_topic_page(self, topic, org_id, search_id, api_key, page=1, note=None):
+ return self._download_json(
+ 'https://es.videoken.com/api/v1.0/get_results', topic, fatal=False, query={
+ 'orgid': org_id,
+ 'size': self._PAGE_SIZE,
+ 'query': topic,
+ 'page': page,
+ 'sort': 'upload_desc',
+ 'filter': 'all',
+ 'token': api_key,
+ 'is_topic': 'true',
+ 'category': '',
+ 'searchid': search_id,
+ }, headers={'Accept': 'application/json'},
+ note=note if note else f'Downloading topic page {page}') or {}
+
+ def _entries(self, topic, org_id, search_id, api_key, url, page):
+ videos = self._get_topic_page(topic, org_id, search_id, api_key, page + 1)
+ yield from self._extract_videos(videos, url)
+
+ def _real_extract(self, url):
+ hostname, topic_id = self._match_valid_url(url).group('host', 'id')
+ topic = urllib.parse.unquote(topic_id)
+ topic_id = topic.replace(' ', '_')
+ org_id, api_key = self._get_org_id_and_api_key(self._ORGANIZATIONS[hostname], topic)
+ search_id = base64.b64encode(f':{topic}:{int(time.time())}:transient'.encode()).decode()
+ total_pages = int_or_none(self._get_topic_page(
+ topic, org_id, search_id, api_key, note='Downloading topic info')['total_no_of_pages'])
+ return self.playlist_result(InAdvancePagedList(
+ functools.partial(self._entries, topic, org_id, search_id, api_key, url),
+ total_pages, self._PAGE_SIZE), topic_id, topic)
diff --git a/yt_dlp/extractor/videomore.py b/yt_dlp/extractor/videomore.py
new file mode 100644
index 0000000..ddc33f7
--- /dev/null
+++ b/yt_dlp/extractor/videomore.py
@@ -0,0 +1,307 @@
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+)
+from ..utils import (
+ int_or_none,
+ parse_qs,
+)
+
+
+class VideomoreBaseIE(InfoExtractor):
+ _API_BASE_URL = 'https://more.tv/api/v3/web/'
+ _VALID_URL_BASE = r'https?://(?:videomore\.ru|more\.tv)/'
+
+ def _download_page_data(self, display_id):
+ return self._download_json(
+ self._API_BASE_URL + 'PageData', display_id, query={
+ 'url': '/' + display_id,
+ })['attributes']['response']['data']
+
+ def _track_url_result(self, track):
+ track_vod = track['trackVod']
+ video_url = track_vod.get('playerLink') or track_vod['link']
+ return self.url_result(
+ video_url, VideomoreIE.ie_key(), track_vod.get('hubId'))
+
+
+class VideomoreIE(InfoExtractor):
+ IE_NAME = 'videomore'
+ _VALID_URL = r'''(?x)
+ videomore:(?P<sid>\d+)$|
+ https?://
+ (?:
+ videomore\.ru/
+ (?:
+ embed|
+ [^/]+/[^/]+
+ )/|
+ (?:
+ (?:player\.)?videomore\.ru|
+ siren\.more\.tv/player
+ )/[^/]*\?.*?\btrack_id=|
+ odysseus\.more.tv/player/(?P<partner_id>\d+)/
+ )
+ (?P<id>\d+)
+ (?:[/?#&]|\.(?:xml|json)|$)
+ '''
+ _EMBED_REGEX = [r'''(?x)
+ (?:
+ <iframe[^>]+src=([\'"])|
+ <object[^>]+data=(["\'])https?://videomore\.ru/player\.swf\?.*config=
+ )(?P<url>https?://videomore\.ru/[^?#"']+/\d+(?:\.xml)?)
+ ''']
+ _TESTS = [{
+ 'url': 'http://videomore.ru/kino_v_detalayah/5_sezon/367617',
+ 'md5': '44455a346edc0d509ac5b5a5b531dc35',
+ 'info_dict': {
+ 'id': '367617',
+ 'ext': 'flv',
+ 'title': 'Кино в деталях 5 сезон В гостях Алексей Чумаков и Юлия Ковальчук',
+ 'series': 'Кино в деталях',
+ 'episode': 'В гостях Алексей Чумаков и Юлия Ковальчук',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 2910,
+ 'view_count': int,
+ 'comment_count': int,
+ 'age_limit': 16,
+ },
+ 'skip': 'The video is not available for viewing.',
+ }, {
+ 'url': 'http://videomore.ru/embed/259974',
+ 'info_dict': {
+ 'id': '259974',
+ 'ext': 'mp4',
+ 'title': 'Молодежка 2 сезон 40 серия',
+ 'series': 'Молодежка',
+ 'season': '2 сезон',
+ 'episode': '40 серия',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 2789,
+ 'view_count': int,
+ 'age_limit': 16,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://videomore.ru/molodezhka/sezon_promo/341073',
+ 'info_dict': {
+ 'id': '341073',
+ 'ext': 'flv',
+ 'title': 'Промо Команда проиграла из-за Бакина?',
+ 'episode': 'Команда проиграла из-за Бакина?',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 29,
+ 'age_limit': 16,
+ 'view_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'The video is not available for viewing.',
+ }, {
+ 'url': 'http://videomore.ru/elki_3?track_id=364623',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://videomore.ru/embed/364623',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://videomore.ru/video/tracks/364623.xml',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://videomore.ru/video/tracks/364623.json',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://videomore.ru/video/tracks/158031/quotes/33248',
+ 'only_matching': True,
+ }, {
+ 'url': 'videomore:367617',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://player.videomore.ru/?partner_id=97&track_id=736234&autoplay=0&userToken=',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://odysseus.more.tv/player/1788/352317',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://siren.more.tv/player/config?track_id=352317&partner_id=1788&user_token=',
+ 'only_matching': True,
+ }]
+ _GEO_BYPASS = False
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('sid') or mobj.group('id')
+ partner_id = mobj.group('partner_id') or parse_qs(url).get('partner_id', [None])[0] or '97'
+
+ item = self._download_json(
+ 'https://siren.more.tv/player/config', video_id, query={
+ 'partner_id': partner_id,
+ 'track_id': video_id,
+ })['data']['playlist']['items'][0]
+
+ title = item.get('title')
+ series = item.get('project_name')
+ season = item.get('season_name')
+ episode = item.get('episode_name')
+ if not title:
+ title = []
+ for v in (series, season, episode):
+ if v:
+ title.append(v)
+ title = ' '.join(title)
+
+ streams = item.get('streams') or []
+ for protocol in ('DASH', 'HLS'):
+ stream_url = item.get(protocol.lower() + '_url')
+ if stream_url:
+ streams.append({'protocol': protocol, 'url': stream_url})
+
+ formats = []
+ for stream in streams:
+ stream_url = stream.get('url')
+ if not stream_url:
+ continue
+ protocol = stream.get('protocol')
+ if protocol == 'DASH':
+ formats.extend(self._extract_mpd_formats(
+ stream_url, video_id, mpd_id='dash', fatal=False))
+ elif protocol == 'HLS':
+ formats.extend(self._extract_m3u8_formats(
+ stream_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif protocol == 'MSS':
+ formats.extend(self._extract_ism_formats(
+ stream_url, video_id, ism_id='mss', fatal=False))
+
+ if not formats:
+ error = item.get('error')
+ if error:
+ if error in ('Данное видео недоступно для просмотра на территории этой страны', 'Данное видео доступно для просмотра только на территории России'):
+ self.raise_geo_restricted(countries=['RU'], metadata_available=True)
+ self.raise_no_formats(error, expected=True)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'series': series,
+ 'season': season,
+ 'episode': episode,
+ 'thumbnail': item.get('thumbnail_url'),
+ 'duration': int_or_none(item.get('duration')),
+ 'view_count': int_or_none(item.get('views')),
+ 'age_limit': int_or_none(item.get('min_age')),
+ 'formats': formats,
+ }
+
+
+class VideomoreVideoIE(VideomoreBaseIE):
+ IE_NAME = 'videomore:video'
+ _VALID_URL = VideomoreBaseIE._VALID_URL_BASE + r'(?P<id>(?:(?:[^/]+/){2})?[^/?#&]+)(?:/*|[?#&].*?)$'
+ _TESTS = [{
+ # single video with og:video:iframe
+ 'url': 'http://videomore.ru/elki_3',
+ 'info_dict': {
+ 'id': '364623',
+ 'ext': 'flv',
+ 'title': 'Ёлки 3',
+ 'description': '',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 5579,
+ 'age_limit': 6,
+ 'view_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'Requires logging in',
+ }, {
+ # season single series with og:video:iframe
+ 'url': 'http://videomore.ru/poslednii_ment/1_sezon/14_seriya',
+ 'info_dict': {
+ 'id': '352317',
+ 'ext': 'mp4',
+ 'title': 'Последний мент 1 сезон 14 серия',
+ 'series': 'Последний мент',
+ 'season': '1 сезон',
+ 'episode': '14 серия',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 2464,
+ 'age_limit': 16,
+ 'view_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://videomore.ru/sejchas_v_seti/serii_221-240/226_vypusk',
+ 'only_matching': True,
+ }, {
+ # single video without og:video:iframe
+ 'url': 'http://videomore.ru/marin_i_ego_druzya',
+ 'info_dict': {
+ 'id': '359073',
+ 'ext': 'flv',
+ 'title': '1 серия. Здравствуй, Аквавилль!',
+ 'description': 'md5:c6003179538b5d353e7bcd5b1372b2d7',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 754,
+ 'age_limit': 6,
+ 'view_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'redirects to https://more.tv/'
+ }, {
+ 'url': 'https://videomore.ru/molodezhka/6_sezon/29_seriya?utm_so',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://more.tv/poslednii_ment/1_sezon/14_seriya',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if VideomoreIE.suitable(url) else super(VideomoreVideoIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ return self._track_url_result(self._download_page_data(display_id))
+
+
+class VideomoreSeasonIE(VideomoreBaseIE):
+ IE_NAME = 'videomore:season'
+ _VALID_URL = VideomoreBaseIE._VALID_URL_BASE + r'(?!embed)(?P<id>[^/]+/[^/?#&]+)(?:/*|[?#&].*?)$'
+ _TESTS = [{
+ 'url': 'http://videomore.ru/molodezhka/film_o_filme',
+ 'info_dict': {
+ 'id': 'molodezhka/film_o_filme',
+ 'title': 'Фильм о фильме',
+ },
+ 'playlist_mincount': 3,
+ }, {
+ 'url': 'http://videomore.ru/molodezhka/sezon_promo?utm_so',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://more.tv/molodezhka/film_o_filme',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return (False if (VideomoreIE.suitable(url) or VideomoreVideoIE.suitable(url))
+ else super(VideomoreSeasonIE, cls).suitable(url))
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ season = self._download_page_data(display_id)
+ season_id = compat_str(season['id'])
+ tracks = self._download_json(
+ self._API_BASE_URL + 'seasons/%s/tracks' % season_id,
+ season_id)['data']
+ entries = []
+ for track in tracks:
+ entries.append(self._track_url_result(track))
+ return self.playlist_result(entries, display_id, season.get('title'))
diff --git a/yt_dlp/extractor/videopress.py b/yt_dlp/extractor/videopress.py
new file mode 100644
index 0000000..0734aee
--- /dev/null
+++ b/yt_dlp/extractor/videopress.py
@@ -0,0 +1,89 @@
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ float_or_none,
+ int_or_none,
+ parse_age_limit,
+ qualities,
+ random_birthday,
+ unified_timestamp,
+ urljoin,
+)
+
+
+class VideoPressIE(InfoExtractor):
+ _ID_REGEX = r'[\da-zA-Z]{8}'
+ _PATH_REGEX = r'video(?:\.word)?press\.com/embed/'
+ _VALID_URL = r'https?://%s(?P<id>%s)' % (_PATH_REGEX, _ID_REGEX)
+ _EMBED_REGEX = [rf'<iframe[^>]+src=["\'](?P<url>(?:https?://)?{_PATH_REGEX}{_ID_REGEX})']
+ _TESTS = [{
+ 'url': 'https://videopress.com/embed/kUJmAcSf',
+ 'md5': '706956a6c875873d51010921310e4bc6',
+ 'info_dict': {
+ 'id': 'kUJmAcSf',
+ 'ext': 'mp4',
+ 'title': 'VideoPress Demo',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 634.6,
+ 'timestamp': 1434983935,
+ 'upload_date': '20150622',
+ 'age_limit': 0,
+ },
+ }, {
+ # 17+, requires birth_* params
+ 'url': 'https://videopress.com/embed/iH3gstfZ',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.wordpress.com/embed/kUJmAcSf',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ query = random_birthday('birth_year', 'birth_month', 'birth_day')
+ query['fields'] = 'description,duration,file_url_base,files,height,original,poster,rating,title,upload_date,width'
+ video = self._download_json(
+ 'https://public-api.wordpress.com/rest/v1.1/videos/%s' % video_id,
+ video_id, query=query)
+
+ title = video['title']
+
+ file_url_base = video.get('file_url_base') or {}
+ base_url = file_url_base.get('https') or file_url_base.get('http')
+
+ QUALITIES = ('std', 'dvd', 'hd')
+ quality = qualities(QUALITIES)
+
+ formats = []
+ for format_id, f in (video.get('files') or {}).items():
+ if not isinstance(f, dict):
+ continue
+ for ext, path in f.items():
+ if ext in ('mp4', 'ogg'):
+ formats.append({
+ 'url': urljoin(base_url, path),
+ 'format_id': '%s-%s' % (format_id, ext),
+ 'ext': determine_ext(path, ext),
+ 'quality': quality(format_id),
+ })
+ original_url = video.get('original')
+ if original_url:
+ formats.append({
+ 'url': original_url,
+ 'format_id': 'original',
+ 'quality': len(QUALITIES),
+ 'width': int_or_none(video.get('width')),
+ 'height': int_or_none(video.get('height')),
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video.get('description'),
+ 'thumbnail': video.get('poster'),
+ 'duration': float_or_none(video.get('duration'), 1000),
+ 'timestamp': unified_timestamp(video.get('upload_date')),
+ 'age_limit': parse_age_limit(video.get('rating')),
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/vidio.py b/yt_dlp/extractor/vidio.py
new file mode 100644
index 0000000..770aa28
--- /dev/null
+++ b/yt_dlp/extractor/vidio.py
@@ -0,0 +1,309 @@
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ ExtractorError,
+ format_field,
+ get_element_by_class,
+ int_or_none,
+ parse_iso8601,
+ smuggle_url,
+ str_or_none,
+ strip_or_none,
+ try_get,
+ unsmuggle_url,
+ urlencode_postdata,
+)
+
+
+class VidioBaseIE(InfoExtractor):
+ _LOGIN_URL = 'https://www.vidio.com/users/login'
+ _NETRC_MACHINE = 'vidio'
+
+ def _perform_login(self, username, password):
+ def is_logged_in():
+ res = self._download_json(
+ 'https://www.vidio.com/interactions.json', None, 'Checking if logged in', fatal=False) or {}
+ return bool(res.get('current_user'))
+
+ if is_logged_in():
+ return
+
+ login_page = self._download_webpage(
+ self._LOGIN_URL, None, 'Downloading log in page')
+
+ login_form = self._form_hidden_inputs("login-form", login_page)
+ login_form.update({
+ 'user[login]': username,
+ 'user[password]': password,
+ })
+ login_post, login_post_urlh = self._download_webpage_handle(
+ self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata(login_form), expected_status=[302, 401])
+
+ if login_post_urlh.status == 401:
+ if get_element_by_class('onboarding-content-register-popup__title', login_post):
+ raise ExtractorError(
+ 'Unable to log in: The provided email has not registered yet.', expected=True)
+
+ reason = get_element_by_class('onboarding-form__general-error', login_post) or get_element_by_class('onboarding-modal__title', login_post)
+ if 'Akun terhubung ke' in reason:
+ raise ExtractorError(
+ 'Unable to log in: Your account is linked to a social media account. '
+ 'Use --cookies to provide account credentials instead', expected=True)
+ elif reason:
+ subreason = get_element_by_class('onboarding-modal__description-text', login_post) or ''
+ raise ExtractorError(
+ 'Unable to log in: %s. %s' % (reason, clean_html(subreason)), expected=True)
+ raise ExtractorError('Unable to log in')
+
+ def _initialize_pre_login(self):
+ self._api_key = self._download_json(
+ 'https://www.vidio.com/auth', None, data=b'')['api_key']
+
+ def _call_api(self, url, video_id, note=None):
+ return self._download_json(url, video_id, note=note, headers={
+ 'Content-Type': 'application/vnd.api+json',
+ 'X-API-KEY': self._api_key,
+ })
+
+
+class VidioIE(VidioBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?vidio\.com/(watch|embed)/(?P<id>\d+)-(?P<display_id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'http://www.vidio.com/watch/165683-dj_ambred-booyah-live-2015',
+ 'md5': 'abac81b1a205a8d94c609a473b5ea62a',
+ 'info_dict': {
+ 'id': '165683',
+ 'display_id': 'dj_ambred-booyah-live-2015',
+ 'ext': 'mp4',
+ 'title': 'DJ_AMBRED - Booyah (Live 2015)',
+ 'description': 'md5:27dc15f819b6a78a626490881adbadf8',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 149,
+ 'like_count': int,
+ 'uploader': 'TWELVE Pic',
+ 'timestamp': 1444902800,
+ 'upload_date': '20151015',
+ 'uploader_id': 'twelvepictures',
+ 'channel': 'Cover Music Video',
+ 'channel_id': '280236',
+ 'view_count': int,
+ 'dislike_count': int,
+ 'comment_count': int,
+ 'tags': 'count:3',
+ 'uploader_url': 'https://www.vidio.com/@twelvepictures',
+ },
+ }, {
+ 'url': 'https://www.vidio.com/watch/77949-south-korea-test-fires-missile-that-can-strike-all-of-the-north',
+ 'only_matching': True,
+ }, {
+ # Premier-exclusive video
+ 'url': 'https://www.vidio.com/watch/1550718-stand-by-me-doraemon',
+ 'only_matching': True
+ }, {
+ # embed url from https://enamplus.liputan6.com/read/5033648/video-fakta-temuan-suspek-cacar-monyet-di-jawa-tengah
+ 'url': 'https://www.vidio.com/embed/7115874-fakta-temuan-suspek-cacar-monyet-di-jawa-tengah',
+ 'info_dict': {
+ 'id': '7115874',
+ 'ext': 'mp4',
+ 'channel_id': '40172876',
+ 'comment_count': int,
+ 'uploader_id': 'liputan6',
+ 'view_count': int,
+ 'dislike_count': int,
+ 'upload_date': '20220804',
+ 'uploader': 'Liputan6.com',
+ 'display_id': 'fakta-temuan-suspek-cacar-monyet-di-jawa-tengah',
+ 'channel': 'ENAM PLUS 165',
+ 'timestamp': 1659605520,
+ 'title': 'Fakta Temuan Suspek Cacar Monyet di Jawa Tengah',
+ 'duration': 59,
+ 'like_count': int,
+ 'tags': ['monkeypox indonesia', 'cacar monyet menyebar', 'suspek cacar monyet di indonesia', 'fakta', 'hoax atau bukan?', 'jawa tengah'],
+ 'thumbnail': 'https://thumbor.prod.vidiocdn.com/83PN-_BKm5sS7emLtRxl506MLqQ=/640x360/filters:quality(70)/vidio-web-prod-video/uploads/video/image/7115874/fakta-suspek-cacar-monyet-di-jawa-tengah-24555a.jpg',
+ 'uploader_url': 'https://www.vidio.com/@liputan6',
+ 'description': 'md5:6d595a18d3b19ee378e335a6f288d5ac',
+ },
+ }]
+
+ def _real_extract(self, url):
+ match = self._match_valid_url(url).groupdict()
+ video_id, display_id = match.get('id'), match.get('display_id')
+ data = self._call_api('https://api.vidio.com/videos/' + video_id, display_id)
+ video = data['videos'][0]
+ title = video['title'].strip()
+ is_premium = video.get('is_premium')
+
+ if is_premium:
+ sources = self._download_json(
+ 'https://www.vidio.com/interactions_stream.json?video_id=%s&type=videos' % video_id,
+ display_id, note='Downloading premier API JSON')
+ if not (sources.get('source') or sources.get('source_dash')):
+ self.raise_login_required('This video is only available for registered users with the appropriate subscription')
+
+ formats, subs = [], {}
+ if sources.get('source'):
+ hls_formats, hls_subs = self._extract_m3u8_formats_and_subtitles(
+ sources['source'], display_id, 'mp4', 'm3u8_native')
+ formats.extend(hls_formats)
+ subs.update(hls_subs)
+ if sources.get('source_dash'): # TODO: Find video example with source_dash
+ dash_formats, dash_subs = self._extract_mpd_formats_and_subtitles(
+ sources['source_dash'], display_id, 'dash')
+ formats.extend(dash_formats)
+ subs.update(dash_subs)
+ else:
+ hls_url = data['clips'][0]['hls_url']
+ formats, subs = self._extract_m3u8_formats_and_subtitles(
+ hls_url, display_id, 'mp4', 'm3u8_native')
+
+ get_first = lambda x: try_get(data, lambda y: y[x + 's'][0], dict) or {}
+ channel = get_first('channel')
+ user = get_first('user')
+ username = user.get('username')
+ get_count = lambda x: int_or_none(video.get('total_' + x))
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': strip_or_none(video.get('description')),
+ 'thumbnail': video.get('image_url_medium'),
+ 'duration': int_or_none(video.get('duration')),
+ 'like_count': get_count('likes'),
+ 'formats': formats,
+ 'subtitles': subs,
+ 'uploader': user.get('name'),
+ 'timestamp': parse_iso8601(video.get('created_at')),
+ 'uploader_id': username,
+ 'uploader_url': format_field(username, None, 'https://www.vidio.com/@%s'),
+ 'channel': channel.get('name'),
+ 'channel_id': str_or_none(channel.get('id')),
+ 'view_count': get_count('view_count'),
+ 'dislike_count': get_count('dislikes'),
+ 'comment_count': get_count('comments'),
+ 'tags': video.get('tag_list'),
+ }
+
+
+class VidioPremierIE(VidioBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?vidio\.com/premier/(?P<id>\d+)/(?P<display_id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.vidio.com/premier/2885/badai-pasti-berlalu',
+ 'playlist_mincount': 14,
+ }, {
+ # Series with both free and premier-exclusive videos
+ 'url': 'https://www.vidio.com/premier/2567/sosmed',
+ 'only_matching': True,
+ }]
+
+ def _playlist_entries(self, playlist_url, display_id):
+ index = 1
+ while playlist_url:
+ playlist_json = self._call_api(playlist_url, display_id, 'Downloading API JSON page %s' % index)
+ for video_json in playlist_json.get('data', []):
+ link = video_json['links']['watchpage']
+ yield self.url_result(link, 'Vidio', video_json['id'])
+ playlist_url = try_get(playlist_json, lambda x: x['links']['next'])
+ index += 1
+
+ def _real_extract(self, url):
+ url, idata = unsmuggle_url(url, {})
+ playlist_id, display_id = self._match_valid_url(url).groups()
+
+ playlist_url = idata.get('url')
+ if playlist_url: # Smuggled data contains an API URL. Download only that playlist
+ playlist_id = idata['id']
+ return self.playlist_result(
+ self._playlist_entries(playlist_url, playlist_id),
+ playlist_id=playlist_id, playlist_title=idata.get('title'))
+
+ playlist_data = self._call_api('https://api.vidio.com/content_profiles/%s/playlists' % playlist_id, display_id)
+
+ return self.playlist_from_matches(
+ playlist_data.get('data', []), playlist_id=playlist_id, ie=self.ie_key(),
+ getter=lambda data: smuggle_url(url, {
+ 'url': data['relationships']['videos']['links']['related'],
+ 'id': data['id'],
+ 'title': try_get(data, lambda x: x['attributes']['name'])
+ }))
+
+
+class VidioLiveIE(VidioBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?vidio\.com/live/(?P<id>\d+)-(?P<display_id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.vidio.com/live/204-sctv',
+ 'info_dict': {
+ 'id': '204',
+ 'title': 'SCTV',
+ 'uploader': 'SCTV',
+ 'uploader_id': 'sctv',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ }, {
+ # Premier-exclusive livestream
+ 'url': 'https://www.vidio.com/live/6362-tvn',
+ 'only_matching': True,
+ }, {
+ # DRM premier-exclusive livestream
+ 'url': 'https://www.vidio.com/live/6299-bein-1',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id, display_id = self._match_valid_url(url).groups()
+ stream_data = self._call_api(
+ 'https://www.vidio.com/api/livestreamings/%s/detail' % video_id, display_id)
+ stream_meta = stream_data['livestreamings'][0]
+ user = stream_data.get('users', [{}])[0]
+
+ title = stream_meta.get('title')
+ username = user.get('username')
+
+ formats = []
+ if stream_meta.get('is_drm'):
+ if not self.get_param('allow_unplayable_formats'):
+ self.report_drm(video_id)
+ if stream_meta.get('is_premium'):
+ sources = self._download_json(
+ 'https://www.vidio.com/interactions_stream.json?video_id=%s&type=livestreamings' % video_id,
+ display_id, note='Downloading premier API JSON')
+ if not (sources.get('source') or sources.get('source_dash')):
+ self.raise_login_required('This video is only available for registered users with the appropriate subscription')
+
+ if str_or_none(sources.get('source')):
+ token_json = self._download_json(
+ 'https://www.vidio.com/live/%s/tokens' % video_id,
+ display_id, note='Downloading HLS token JSON', data=b'')
+ formats.extend(self._extract_m3u8_formats(
+ sources['source'] + '?' + token_json.get('token', ''), display_id, 'mp4', 'm3u8_native'))
+ if str_or_none(sources.get('source_dash')):
+ pass
+ else:
+ if stream_meta.get('stream_token_url'):
+ token_json = self._download_json(
+ 'https://www.vidio.com/live/%s/tokens' % video_id,
+ display_id, note='Downloading HLS token JSON', data=b'')
+ formats.extend(self._extract_m3u8_formats(
+ stream_meta['stream_token_url'] + '?' + token_json.get('token', ''),
+ display_id, 'mp4', 'm3u8_native'))
+ if stream_meta.get('stream_dash_url'):
+ pass
+ if stream_meta.get('stream_url'):
+ formats.extend(self._extract_m3u8_formats(
+ stream_meta['stream_url'], display_id, 'mp4', 'm3u8_native'))
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'is_live': True,
+ 'description': strip_or_none(stream_meta.get('description')),
+ 'thumbnail': stream_meta.get('image'),
+ 'like_count': int_or_none(stream_meta.get('like')),
+ 'dislike_count': int_or_none(stream_meta.get('dislike')),
+ 'formats': formats,
+ 'uploader': user.get('name'),
+ 'timestamp': parse_iso8601(stream_meta.get('start_time')),
+ 'uploader_id': username,
+ 'uploader_url': format_field(username, None, 'https://www.vidio.com/@%s'),
+ }
diff --git a/yt_dlp/extractor/vidlii.py b/yt_dlp/extractor/vidlii.py
new file mode 100644
index 0000000..44353b7
--- /dev/null
+++ b/yt_dlp/extractor/vidlii.py
@@ -0,0 +1,154 @@
+import re
+
+from .common import InfoExtractor
+from ..networking import HEADRequest
+from ..utils import (
+ format_field,
+ float_or_none,
+ get_element_by_id,
+ int_or_none,
+ str_to_int,
+ strip_or_none,
+ unified_strdate,
+ urljoin,
+)
+
+
+class VidLiiIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?vidlii\.com/(?:watch|embed)\?.*?\bv=(?P<id>[0-9A-Za-z_-]{11})'
+ _TESTS = [{
+ 'url': 'https://www.vidlii.com/watch?v=tJluaH4BJ3v',
+ 'md5': '9bf7d1e005dfa909b6efb0a1ff5175e2',
+ 'info_dict': {
+ 'id': 'tJluaH4BJ3v',
+ 'ext': 'mp4',
+ 'title': 'Vidlii is against me',
+ 'description': 'md5:fa3f119287a2bfb922623b52b1856145',
+ 'thumbnail': 're:https://.*.jpg',
+ 'uploader': 'APPle5auc31995',
+ 'uploader_url': 'https://www.vidlii.com/user/APPle5auc31995',
+ 'upload_date': '20171107',
+ 'duration': 212,
+ 'view_count': int,
+ 'comment_count': int,
+ 'average_rating': float,
+ 'categories': ['News & Politics'],
+ 'tags': ['Vidlii', 'Jan', 'Videogames'],
+ }
+ }, {
+ 'url': 'https://www.vidlii.com/watch?v=zTAtaAgOLKt',
+ 'md5': '5778f7366aa4c569b77002f8bf6b614f',
+ 'info_dict': {
+ 'id': 'zTAtaAgOLKt',
+ 'ext': 'mp4',
+ 'title': 'FULPTUBE SUCKS.',
+ 'description': 'md5:087b2ca355d4c8f8f77e97c43e72d711',
+ 'thumbnail': 'https://www.vidlii.com/usfi/thmp/zTAtaAgOLKt.jpg',
+ 'uploader': 'Homicide',
+ 'uploader_url': 'https://www.vidlii.com/user/Homicide',
+ 'upload_date': '20210612',
+ 'duration': 89,
+ 'view_count': int,
+ 'comment_count': int,
+ 'average_rating': float,
+ 'categories': ['News & Politics'],
+ 'tags': ['fulp', 'tube', 'sucks', 'bad', 'fulptube'],
+ },
+ }, {
+ 'url': 'https://www.vidlii.com/embed?v=tJluaH4BJ3v&a=0',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'https://www.vidlii.com/watch?v=%s' % video_id, video_id)
+ formats = []
+
+ sources = [source[1] for source in re.findall(
+ r'src\s*:\s*(["\'])(?P<url>(?:https?://)?(?:(?!\1).)+)\1',
+ webpage) or []]
+ for source in sources:
+ source = urljoin(url, source)
+ height = int(self._search_regex(r'(\d+).mp4', source, 'height', default=360))
+ if self._request_webpage(HEADRequest(source), video_id, f'Checking {height}p url', errnote=False):
+ formats.append({
+ 'url': source,
+ 'format_id': f'{height}p',
+ 'height': height,
+ })
+
+ title = self._search_regex(
+ (r'<h1>([^<]+)</h1>', r'<title>([^<]+) - VidLii<'), webpage,
+ 'title')
+
+ description = self._html_search_meta(
+ ('description', 'twitter:description'), webpage,
+ default=None) or strip_or_none(
+ get_element_by_id('des_text', webpage))
+
+ thumbnail = self._html_search_meta(
+ 'twitter:image', webpage, default=None)
+ if not thumbnail:
+ thumbnail_path = self._search_regex(
+ r'img\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+ 'thumbnail', fatal=False, group='url')
+ if thumbnail_path:
+ thumbnail = urljoin(url, thumbnail_path)
+
+ uploader = self._search_regex(
+ r'<div[^>]+class=["\']wt_person[^>]+>\s*<a[^>]+\bhref=["\']/user/[^>]+>([^<]+)',
+ webpage, 'uploader', fatal=False)
+ uploader_url = format_field(uploader, None, 'https://www.vidlii.com/user/%s')
+
+ upload_date = unified_strdate(self._html_search_meta(
+ 'datePublished', webpage, default=None) or self._search_regex(
+ r'<date>([^<]+)', webpage, 'upload date', fatal=False))
+
+ duration = int_or_none(self._html_search_meta(
+ 'video:duration', webpage, 'duration',
+ default=None) or self._search_regex(
+ r'duration\s*:\s*(\d+)', webpage, 'duration', fatal=False))
+
+ view_count = str_to_int(self._search_regex(
+ (r'<strong>([,0-9]+)</strong> views',
+ r'Views\s*:\s*<strong>([,0-9]+)</strong>'),
+ webpage, 'view count', fatal=False))
+
+ comment_count = int_or_none(self._search_regex(
+ (r'<span[^>]+id=["\']cmt_num[^>]+>(\d+)',
+ r'Comments\s*:\s*<strong>(\d+)'),
+ webpage, 'comment count', fatal=False))
+
+ average_rating = float_or_none(self._search_regex(
+ r'rating\s*:\s*([\d.]+)', webpage, 'average rating', fatal=False))
+
+ category = self._html_search_regex(
+ r'<div>Category\s*:\s*</div>\s*<div>\s*<a[^>]+>([^<]+)', webpage,
+ 'category', fatal=False)
+ categories = [category] if category else None
+
+ tags = [
+ strip_or_none(tag)
+ for tag in re.findall(
+ r'<a[^>]+\bhref=["\']/results\?.*?q=[^>]*>([^<]+)',
+ webpage) if strip_or_none(tag)
+ ] or None
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ 'formats': formats,
+ 'uploader_url': uploader_url,
+ 'upload_date': upload_date,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'comment_count': comment_count,
+ 'average_rating': average_rating,
+ 'categories': categories,
+ 'tags': tags,
+ }
diff --git a/yt_dlp/extractor/vidly.py b/yt_dlp/extractor/vidly.py
new file mode 100644
index 0000000..49a1960
--- /dev/null
+++ b/yt_dlp/extractor/vidly.py
@@ -0,0 +1,83 @@
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ mimetype2ext,
+ url_or_none,
+)
+from ..utils.traversal import traverse_obj
+
+
+class VidlyIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:vid\.ly/|(?:s\.)?vid\.ly/embeded\.html\?(?:[^#]+&)?link=)(?P<id>\w+)'
+ _EMBED_REGEX = [r'<script[^>]+\bsrc=[\'"](?P<url>(?:https?:)?//vid\.ly/\w+/embed[^\'"]+)',
+ r'<iframe[^>]+\bsrc=[\'"](?P<url>(?:https?:)?//(?:s\.)?vid\.ly/embeded\.html\?(?:[^#\'"]+&)?link=\w+[^\'"]+)']
+ _TESTS = [{
+ # JWPlayer 7, Embeds forbidden
+ 'url': 'https://vid.ly/2i3o9j/embed',
+ 'info_dict': {
+ 'id': '2i3o9j',
+ 'ext': 'mp4',
+ 'title': '2i3o9j',
+ 'thumbnail': r're:https://\w+\.cloudfront\.net/',
+ },
+ }, {
+ # JWPlayer 6
+ 'url': 'http://s.vid.ly/embeded.html?link=jw_test&new=1&autoplay=true&controls=true',
+ 'info_dict': {
+ 'id': 'jw_test',
+ 'ext': 'mp4',
+ 'title': '2x8m8t',
+ 'thumbnail': r're:https://\w+\.cloudfront\.net/',
+ },
+ }, {
+ # Vidlyplayer
+ 'url': 'https://vid.ly/7x0e6l',
+ 'info_dict': {
+ 'id': '7x0e6l',
+ 'ext': 'mp4',
+ 'title': '7x0e6l',
+ },
+ }]
+ _WEBPAGE_TESTS = [{
+ 'url': 'https://www.petfinder.com/dog/gus-57378930/tn/ooltewah/furever-furkids-rescue-tn592/',
+ 'info_dict': {
+ 'id': 'w8p5b0',
+ 'ext': 'mp4',
+ 'title': 'w8p5b0',
+ 'thumbnail': r're:https://\w+\.cloudfront\.net/',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ embed_script = self._download_webpage(
+ f'https://vid.ly/{video_id}/embed', video_id, headers={'Referer': 'https://vid.ly/'})
+ player = self._search_json(r'initCallback\(', embed_script, 'player', video_id)
+
+ player_type = player.get('player') or ''
+ if player_type.startswith('jwplayer'):
+ return self._parse_jwplayer_data(player['config'], video_id)
+ elif not player_type.startswith('vidly'):
+ raise ExtractorError(f'Unknown player type {player_type!r}')
+
+ formats = []
+ ext = mimetype2ext(traverse_obj(player, ('config', 'type')))
+ for source, fid in [('source', 'sd'), ('source_hd', 'hd')]:
+ if traverse_obj(player, ('config', source, {url_or_none})):
+ formats.append({
+ 'url': player['config'][source],
+ 'format_id': f'http-{fid}',
+ 'ext': ext,
+ })
+ # Has higher quality formats
+ formats.extend(self._extract_m3u8_formats(
+ f'https://d3fenhwk93s16g.cloudfront.net/{video_id}/hls.m3u8', video_id,
+ fatal=False, note='Requesting higher quality m3u8 formats',
+ errnote='No higher quality m3u8 formats found') or [])
+
+ return {
+ 'id': video_id,
+ 'title': video_id,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/viewlift.py b/yt_dlp/extractor/viewlift.py
new file mode 100644
index 0000000..c5d65cd
--- /dev/null
+++ b/yt_dlp/extractor/viewlift.py
@@ -0,0 +1,362 @@
+import json
+
+from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ parse_age_limit,
+ traverse_obj,
+)
+
+
+class ViewLiftBaseIE(InfoExtractor):
+ _API_BASE = 'https://prod-api.viewlift.com/'
+ _DOMAINS_REGEX = r'(?:(?:main\.)?snagfilms|snagxtreme|funnyforfree|kiddovid|winnersview|(?:monumental|lax)sportsnetwork|vayafilm|failarmy|ftfnext|lnppass\.legapallacanestro|moviespree|app\.myoutdoortv|neoufitness|pflmma|theidentitytb|chorki)\.com|(?:hoichoi|app\.horseandcountry|kronon|marquee|supercrosslive)\.tv'
+ _SITE_MAP = {
+ 'ftfnext': 'lax',
+ 'funnyforfree': 'snagfilms',
+ 'hoichoi': 'hoichoitv',
+ 'kiddovid': 'snagfilms',
+ 'laxsportsnetwork': 'lax',
+ 'legapallacanestro': 'lnp',
+ 'marquee': 'marquee-tv',
+ 'monumentalsportsnetwork': 'monumental-network',
+ 'moviespree': 'bingeflix',
+ 'pflmma': 'pfl',
+ 'snagxtreme': 'snagfilms',
+ 'theidentitytb': 'tampabay',
+ 'vayafilm': 'snagfilms',
+ 'chorki': 'prothomalo',
+ }
+ _TOKENS = {}
+
+ def _fetch_token(self, site, url):
+ if self._TOKENS.get(site):
+ return
+
+ cookies = self._get_cookies(url)
+ if cookies and cookies.get('token'):
+ self._TOKENS[site] = self._search_regex(r'22authorizationToken\%22:\%22([^\%]+)\%22', cookies['token'].value, 'token')
+ if not self._TOKENS.get(site):
+ self.raise_login_required('Cookies (not necessarily logged in) are needed to download from this website', method='cookies')
+
+ def _call_api(self, site, path, video_id, url, query):
+ self._fetch_token(site, url)
+ try:
+ return self._download_json(
+ self._API_BASE + path, video_id, headers={'Authorization': self._TOKENS.get(site)}, query=query)
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 403:
+ webpage = e.cause.response.read().decode()
+ try:
+ error_message = traverse_obj(json.loads(webpage), 'errorMessage', 'message')
+ except json.JSONDecodeError:
+ raise ExtractorError(f'{site} said: {webpage}', cause=e.cause)
+ if error_message:
+ if 'has not purchased' in error_message:
+ self.raise_login_required(method='cookies')
+ raise ExtractorError(error_message, expected=True)
+ raise
+
+
+class ViewLiftEmbedIE(ViewLiftBaseIE):
+ IE_NAME = 'viewlift:embed'
+ _VALID_URL = r'https?://(?:(?:www|embed)\.)?(?P<domain>%s)/embed/player\?.*\bfilmId=(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})' % ViewLiftBaseIE._DOMAINS_REGEX
+ _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:embed\.)?(?:%s)/embed/player.+?)\1' % ViewLiftBaseIE._DOMAINS_REGEX]
+ _TESTS = [{
+ 'url': 'http://embed.snagfilms.com/embed/player?filmId=74849a00-85a9-11e1-9660-123139220831&w=500',
+ 'md5': '2924e9215c6eff7a55ed35b72276bd93',
+ 'info_dict': {
+ 'id': '74849a00-85a9-11e1-9660-123139220831',
+ 'ext': 'mp4',
+ 'title': '#whilewewatch',
+ 'description': 'md5:b542bef32a6f657dadd0df06e26fb0c8',
+ 'timestamp': 1334350096,
+ 'upload_date': '20120413',
+ }
+ }, {
+ # invalid labels, 360p is better that 480p
+ 'url': 'http://www.snagfilms.com/embed/player?filmId=17ca0950-a74a-11e0-a92a-0026bb61d036',
+ 'md5': '882fca19b9eb27ef865efeeaed376a48',
+ 'info_dict': {
+ 'id': '17ca0950-a74a-11e0-a92a-0026bb61d036',
+ 'ext': 'mp4',
+ 'title': 'Life in Limbo',
+ },
+ 'skip': 'The video does not exist',
+ }, {
+ 'url': 'http://www.snagfilms.com/embed/player?filmId=0000014c-de2f-d5d6-abcf-ffef58af0017',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ domain, film_id = self._match_valid_url(url).groups()
+ site = domain.split('.')[-2]
+ if site in self._SITE_MAP:
+ site = self._SITE_MAP[site]
+
+ content_data = self._call_api(
+ site, 'entitlement/video/status', film_id, url, {
+ 'id': film_id
+ })['video']
+ gist = content_data['gist']
+ title = gist['title']
+ video_assets = content_data['streamingInfo']['videoAssets']
+
+ hls_url = video_assets.get('hls')
+ formats, subtitles = [], {}
+ if hls_url:
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ hls_url, film_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
+
+ for video_asset in video_assets.get('mpeg') or []:
+ video_asset_url = video_asset.get('url')
+ if not video_asset_url:
+ continue
+ bitrate = int_or_none(video_asset.get('bitrate'))
+ height = int_or_none(self._search_regex(
+ r'^_?(\d+)[pP]$', video_asset.get('renditionValue'),
+ 'height', default=None))
+ formats.append({
+ 'url': video_asset_url,
+ 'format_id': 'http%s' % ('-%d' % bitrate if bitrate else ''),
+ 'tbr': bitrate,
+ 'height': height,
+ 'vcodec': video_asset.get('codec'),
+ })
+
+ subs = {}
+ for sub in traverse_obj(content_data, ('contentDetails', 'closedCaptions')) or []:
+ sub_url = sub.get('url')
+ if not sub_url:
+ continue
+ subs.setdefault(sub.get('language', 'English'), []).append({
+ 'url': sub_url,
+ })
+
+ return {
+ 'id': film_id,
+ 'title': title,
+ 'description': gist.get('description'),
+ 'thumbnail': gist.get('videoImageUrl'),
+ 'duration': int_or_none(gist.get('runtime')),
+ 'age_limit': parse_age_limit(content_data.get('parentalRating')),
+ 'timestamp': int_or_none(gist.get('publishDate'), 1000),
+ 'formats': formats,
+ 'subtitles': self._merge_subtitles(subs, subtitles),
+ 'categories': traverse_obj(content_data, ('categories', ..., 'title')),
+ 'tags': traverse_obj(content_data, ('tags', ..., 'title')),
+ }
+
+
+class ViewLiftIE(ViewLiftBaseIE):
+ IE_NAME = 'viewlift'
+ _API_BASE = 'https://prod-api-cached-2.viewlift.com/'
+ _VALID_URL = r'https?://(?:www\.)?(?P<domain>%s)(?P<path>(?:/(?:films/title|show|(?:news/)?videos?|watch))?/(?P<id>[^?#]+))' % ViewLiftBaseIE._DOMAINS_REGEX
+ _TESTS = [{
+ 'url': 'http://www.snagfilms.com/films/title/lost_for_life',
+ 'md5': '19844f897b35af219773fd63bdec2942',
+ 'info_dict': {
+ 'id': '0000014c-de2f-d5d6-abcf-ffef58af0017',
+ 'display_id': 'lost_for_life',
+ 'ext': 'mp4',
+ 'title': 'Lost for Life',
+ 'description': 'md5:ea10b5a50405ae1f7b5269a6ec594102',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 4489,
+ 'categories': 'mincount:3',
+ 'age_limit': 14,
+ 'upload_date': '20150421',
+ 'timestamp': 1429656820,
+ }
+ }, {
+ 'url': 'http://www.snagfilms.com/show/the_world_cut_project/india',
+ 'md5': 'e6292e5b837642bbda82d7f8bf3fbdfd',
+ 'info_dict': {
+ 'id': '00000145-d75c-d96e-a9c7-ff5c67b20000',
+ 'display_id': 'the_world_cut_project/india',
+ 'ext': 'mp4',
+ 'title': 'India',
+ 'description': 'md5:5c168c5a8f4719c146aad2e0dfac6f5f',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 979,
+ 'timestamp': 1399478279,
+ 'upload_date': '20140507',
+ }
+ }, {
+ 'url': 'http://main.snagfilms.com/augie_alone/s_2_ep_12_love',
+ 'info_dict': {
+ 'id': '00000148-7b53-de26-a9fb-fbf306f70020',
+ 'display_id': 'augie_alone/s_2_ep_12_love',
+ 'ext': 'mp4',
+ 'title': 'S. 2 Ep. 12 - Love',
+ 'description': 'Augie finds love.',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 107,
+ 'upload_date': '20141012',
+ 'timestamp': 1413129540,
+ 'age_limit': 17,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://main.snagfilms.com/films/title/the_freebie',
+ 'only_matching': True,
+ }, {
+ # Film is not playable in your area.
+ 'url': 'http://www.snagfilms.com/films/title/inside_mecca',
+ 'only_matching': True,
+ }, {
+ # Film is not available.
+ 'url': 'http://www.snagfilms.com/show/augie_alone/flirting',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.winnersview.com/videos/the-good-son',
+ 'only_matching': True,
+ }, {
+ # Was once Kaltura embed
+ 'url': 'https://www.monumentalsportsnetwork.com/videos/john-carlson-postgame-2-25-15',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.marquee.tv/watch/sadlerswells-sacredmonsters',
+ 'only_matching': True,
+ }, { # Free film with langauge code
+ 'url': 'https://www.hoichoi.tv/bn/films/title/shuyopoka',
+ 'info_dict': {
+ 'id': '7a7a9d33-1f4c-4771-9173-ee4fb6dbf196',
+ 'ext': 'mp4',
+ 'title': 'Shuyopoka',
+ 'description': 'md5:e28f2fb8680096a69c944d37c1fa5ffc',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20211006',
+ },
+ 'params': {'skip_download': True},
+ }, { # Free film
+ 'url': 'https://www.hoichoi.tv/films/title/dadu-no1',
+ 'info_dict': {
+ 'id': '0000015b-b009-d126-a1db-b81ff3780000',
+ 'ext': 'mp4',
+ 'title': 'Dadu No.1',
+ 'description': 'md5:605cba408e51a79dafcb824bdeded51e',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20210827',
+ },
+ 'params': {'skip_download': True},
+ }, { # Free episode
+ 'url': 'https://www.hoichoi.tv/webseries/case-jaundice-s01-e01',
+ 'info_dict': {
+ 'id': 'f779e07c-30c8-459c-8612-5a834ab5e5ba',
+ 'ext': 'mp4',
+ 'title': 'Humans Vs. Corona',
+ 'description': 'md5:ca30a682b4528d02a3eb6d0427dd0f87',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20210830',
+ 'series': 'Case Jaundice'
+ },
+ 'params': {'skip_download': True},
+ }, { # Free video
+ 'url': 'https://www.hoichoi.tv/videos/1549072415320-six-episode-02-hindi',
+ 'info_dict': {
+ 'id': 'b41fa1ce-aca6-47b6-b208-283ff0a2de30',
+ 'ext': 'mp4',
+ 'title': 'Woman in red - Hindi',
+ 'description': 'md5:9d21edc1827d32f8633eb67c2054fc31',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20211006',
+ 'series': 'Six (Hindi)'
+ },
+ 'params': {'skip_download': True},
+ }, { # Free episode
+ 'url': 'https://www.hoichoi.tv/shows/watch-asian-paints-moner-thikana-online-season-1-episode-1',
+ 'info_dict': {
+ 'id': '1f45d185-8500-455c-b88d-13252307c3eb',
+ 'ext': 'mp4',
+ 'title': 'Jisshu Sengupta',
+ 'description': 'md5:ef6ffae01a3d83438597367400f824ed',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20211004',
+ 'series': 'Asian Paints Moner Thikana'
+ },
+ 'params': {'skip_download': True},
+ }, { # Free series
+ 'url': 'https://www.hoichoi.tv/shows/watch-moner-thikana-bengali-web-series-online',
+ 'playlist_mincount': 5,
+ 'info_dict': {
+ 'id': 'watch-moner-thikana-bengali-web-series-online',
+ },
+ }, { # Premium series
+ 'url': 'https://www.hoichoi.tv/shows/watch-byomkesh-bengali-web-series-online',
+ 'playlist_mincount': 14,
+ 'info_dict': {
+ 'id': 'watch-byomkesh-bengali-web-series-online',
+ },
+ }, { # Premium movie
+ 'url': 'https://www.hoichoi.tv/movies/detective-2020',
+ 'only_matching': True
+ }, { # Chorki Premium series
+ 'url': 'https://www.chorki.com/bn/series/sinpaat',
+ 'playlist_mincount': 7,
+ 'info_dict': {
+ 'id': 'bn/series/sinpaat',
+ },
+ }, { # Chorki free movie
+ 'url': 'https://www.chorki.com/bn/videos/bangla-movie-bikkhov',
+ 'info_dict': {
+ 'id': '564e755b-f5c7-4515-aee6-8959bee18c93',
+ 'title': 'Bikkhov',
+ 'ext': 'mp4',
+ 'upload_date': '20230824',
+ 'timestamp': 1692860553,
+ 'categories': ['Action Movies', 'Salman Special'],
+ 'tags': 'count:14',
+ 'thumbnail': 'https://snagfilms-a.akamaihd.net/dd078ff5-b16e-45e4-9723-501b56b9df0a/images/2023/08/24/1692860450729_1920x1080_16x9Images.jpg',
+ 'display_id': 'bn/videos/bangla-movie-bikkhov',
+ 'description': 'md5:71492b086450625f4374a3eb824f27dc',
+ 'duration': 8002,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, { # Chorki Premium movie
+ 'url': 'https://www.chorki.com/bn/videos/something-like-an-autobiography',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if ViewLiftEmbedIE.suitable(url) else super(ViewLiftIE, cls).suitable(url)
+
+ def _show_entries(self, domain, seasons):
+ for season in seasons:
+ for episode in season.get('episodes') or []:
+ path = traverse_obj(episode, ('gist', 'permalink'))
+ if path:
+ yield self.url_result(f'https://www.{domain}{path}', ie=self.ie_key())
+
+ def _real_extract(self, url):
+ domain, path, display_id = self._match_valid_url(url).groups()
+ site = domain.split('.')[-2]
+ if site in self._SITE_MAP:
+ site = self._SITE_MAP[site]
+ modules = self._call_api(
+ site, 'content/pages', display_id, url, {
+ 'includeContent': 'true',
+ 'moduleOffset': 1,
+ 'path': path,
+ 'site': site,
+ })['modules']
+
+ seasons = next((m['contentData'][0]['seasons'] for m in modules if m.get('moduleType') == 'ShowDetailModule'), None)
+ if seasons:
+ return self.playlist_result(self._show_entries(domain, seasons), display_id)
+
+ film_id = next(m['contentData'][0]['gist']['id'] for m in modules if m.get('moduleType') == 'VideoDetailModule')
+ return {
+ '_type': 'url_transparent',
+ 'url': 'http://%s/embed/player?filmId=%s' % (domain, film_id),
+ 'id': film_id,
+ 'display_id': display_id,
+ 'ie_key': 'ViewLiftEmbed',
+ }
diff --git a/yt_dlp/extractor/viidea.py b/yt_dlp/extractor/viidea.py
new file mode 100644
index 0000000..649ffe3
--- /dev/null
+++ b/yt_dlp/extractor/viidea.py
@@ -0,0 +1,199 @@
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urlparse,
+)
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ ExtractorError,
+ js_to_json,
+ parse_duration,
+ parse_iso8601,
+)
+
+
+class ViideaIE(InfoExtractor):
+ _VALID_URL = r'''(?x)https?://(?:www\.)?(?:
+ videolectures\.net|
+ flexilearn\.viidea\.net|
+ presentations\.ocwconsortium\.org|
+ video\.travel-zoom\.si|
+ video\.pomp-forum\.si|
+ tv\.nil\.si|
+ video\.hekovnik.com|
+ video\.szko\.si|
+ kpk\.viidea\.com|
+ inside\.viidea\.net|
+ video\.kiberpipa\.org|
+ bvvideo\.si|
+ kongres\.viidea\.net|
+ edemokracija\.viidea\.com
+ )(?:/lecture)?/(?P<id>[^/]+)(?:/video/(?P<part>\d+))?/*(?:[#?].*)?$'''
+
+ _TESTS = [{
+ 'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/',
+ 'info_dict': {
+ 'id': '20171',
+ 'display_id': 'promogram_igor_mekjavic_eng',
+ 'ext': 'mp4',
+ 'title': 'Automatics, robotics and biocybernetics',
+ 'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
+ 'thumbnail': r're:http://.*\.jpg',
+ 'timestamp': 1372349289,
+ 'upload_date': '20130627',
+ 'duration': 565,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ # video with invalid direct format links (HTTP 403)
+ 'url': 'http://videolectures.net/russir2010_filippova_nlp/',
+ 'info_dict': {
+ 'id': '14891',
+ 'display_id': 'russir2010_filippova_nlp',
+ 'ext': 'flv',
+ 'title': 'NLP at Google',
+ 'description': 'md5:fc7a6d9bf0302d7cc0e53f7ca23747b3',
+ 'thumbnail': r're:http://.*\.jpg',
+ 'timestamp': 1284375600,
+ 'upload_date': '20100913',
+ 'duration': 5352,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ }, {
+ # event playlist
+ 'url': 'http://videolectures.net/deeplearning2015_montreal/',
+ 'info_dict': {
+ 'id': '23181',
+ 'title': 'Deep Learning Summer School, Montreal 2015',
+ 'description': 'md5:0533a85e4bd918df52a01f0e1ebe87b7',
+ 'thumbnail': r're:http://.*\.jpg',
+ 'timestamp': 1438560000,
+ },
+ 'playlist_count': 30,
+ }, {
+ # multi part lecture
+ 'url': 'http://videolectures.net/mlss09uk_bishop_ibi/',
+ 'info_dict': {
+ 'id': '9737',
+ 'display_id': 'mlss09uk_bishop_ibi',
+ 'title': 'Introduction To Bayesian Inference',
+ 'thumbnail': r're:http://.*\.jpg',
+ 'timestamp': 1251622800,
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '9737_part1',
+ 'display_id': 'mlss09uk_bishop_ibi_part1',
+ 'ext': 'wmv',
+ 'title': 'Introduction To Bayesian Inference (Part 1)',
+ 'thumbnail': r're:http://.*\.jpg',
+ 'duration': 4622,
+ 'timestamp': 1251622800,
+ 'upload_date': '20090830',
+ },
+ }, {
+ 'info_dict': {
+ 'id': '9737_part2',
+ 'display_id': 'mlss09uk_bishop_ibi_part2',
+ 'ext': 'wmv',
+ 'title': 'Introduction To Bayesian Inference (Part 2)',
+ 'thumbnail': r're:http://.*\.jpg',
+ 'duration': 5641,
+ 'timestamp': 1251622800,
+ 'upload_date': '20090830',
+ },
+ }],
+ 'playlist_count': 2,
+ }]
+
+ def _real_extract(self, url):
+ lecture_slug, explicit_part_id = self._match_valid_url(url).groups()
+
+ webpage = self._download_webpage(url, lecture_slug)
+
+ cfg = self._parse_json(self._search_regex(
+ [r'cfg\s*:\s*({.+?})\s*,\s*[\da-zA-Z_]+\s*:\s*\(?\s*function',
+ r'cfg\s*:\s*({[^}]+})'],
+ webpage, 'cfg'), lecture_slug, js_to_json)
+
+ lecture_id = compat_str(cfg['obj_id'])
+
+ base_url = self._proto_relative_url(cfg['livepipe'], 'http:')
+
+ try:
+ lecture_data = self._download_json(
+ '%s/site/api/lecture/%s?format=json' % (base_url, lecture_id),
+ lecture_id)['lecture'][0]
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 403:
+ msg = self._parse_json(
+ e.cause.response.read().decode('utf-8'), lecture_id)
+ raise ExtractorError(msg['detail'], expected=True)
+ raise
+
+ lecture_info = {
+ 'id': lecture_id,
+ 'display_id': lecture_slug,
+ 'title': lecture_data['title'],
+ 'timestamp': parse_iso8601(lecture_data.get('time')),
+ 'description': lecture_data.get('description_wiki'),
+ 'thumbnail': lecture_data.get('thumb'),
+ }
+
+ playlist_entries = []
+ lecture_type = lecture_data.get('type')
+ parts = [compat_str(video) for video in cfg.get('videos', [])]
+ if parts:
+ multipart = len(parts) > 1
+
+ def extract_part(part_id):
+ smil_url = '%s/%s/video/%s/smil.xml' % (base_url, lecture_slug, part_id)
+ smil = self._download_smil(smil_url, lecture_id)
+ info = self._parse_smil(smil, smil_url, lecture_id)
+ info['id'] = lecture_id if not multipart else '%s_part%s' % (lecture_id, part_id)
+ info['display_id'] = lecture_slug if not multipart else '%s_part%s' % (lecture_slug, part_id)
+ if multipart:
+ info['title'] += ' (Part %s)' % part_id
+ switch = smil.find('.//switch')
+ if switch is not None:
+ info['duration'] = parse_duration(switch.attrib.get('dur'))
+ item_info = lecture_info.copy()
+ item_info.update(info)
+ return item_info
+
+ if explicit_part_id or not multipart:
+ result = extract_part(explicit_part_id or parts[0])
+ else:
+ result = {
+ '_type': 'multi_video',
+ 'entries': [extract_part(part) for part in parts],
+ }
+ result.update(lecture_info)
+
+ # Immediately return explicitly requested part or non event item
+ if explicit_part_id or lecture_type != 'evt':
+ return result
+
+ playlist_entries.append(result)
+
+ # It's probably a playlist
+ if not parts or lecture_type == 'evt':
+ playlist_webpage = self._download_webpage(
+ '%s/site/ajax/drilldown/?id=%s' % (base_url, lecture_id), lecture_id)
+ entries = [
+ self.url_result(compat_urlparse.urljoin(url, video_url), 'Viidea')
+ for _, video_url in re.findall(
+ r'<a[^>]+href=(["\'])(.+?)\1[^>]+id=["\']lec=\d+', playlist_webpage)]
+ playlist_entries.extend(entries)
+
+ playlist = self.playlist_result(playlist_entries, lecture_id)
+ playlist.update(lecture_info)
+ return playlist
diff --git a/yt_dlp/extractor/viki.py b/yt_dlp/extractor/viki.py
new file mode 100644
index 0000000..3246dab
--- /dev/null
+++ b/yt_dlp/extractor/viki.py
@@ -0,0 +1,346 @@
+import hashlib
+import hmac
+import json
+import time
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ parse_age_limit,
+ parse_iso8601,
+ try_get,
+)
+
+
+class VikiBaseIE(InfoExtractor):
+ _VALID_URL_BASE = r'https?://(?:www\.)?viki\.(?:com|net|mx|jp|fr)/'
+ _API_URL_TEMPLATE = 'https://api.viki.io%s'
+
+ _DEVICE_ID = '112395910d'
+ _APP = '100005a'
+ _APP_VERSION = '6.11.3'
+ _APP_SECRET = 'd96704b180208dbb2efa30fe44c48bd8690441af9f567ba8fd710a72badc85198f7472'
+
+ _GEO_BYPASS = False
+ _NETRC_MACHINE = 'viki'
+
+ _token = None
+
+ _ERRORS = {
+ 'geo': 'Sorry, this content is not available in your region.',
+ 'upcoming': 'Sorry, this content is not yet available.',
+ 'paywall': 'Sorry, this content is only available to Viki Pass Plus subscribers',
+ }
+
+ def _stream_headers(self, timestamp, sig):
+ return {
+ 'X-Viki-manufacturer': 'vivo',
+ 'X-Viki-device-model': 'vivo 1606',
+ 'X-Viki-device-os-ver': '6.0.1',
+ 'X-Viki-connection-type': 'WIFI',
+ 'X-Viki-carrier': '',
+ 'X-Viki-as-id': '100005a-1625321982-3932',
+ 'timestamp': str(timestamp),
+ 'signature': str(sig),
+ 'x-viki-app-ver': self._APP_VERSION
+ }
+
+ def _api_query(self, path, version=4, **kwargs):
+ path += '?' if '?' not in path else '&'
+ query = f'/v{version}/{path}app={self._APP}'
+ if self._token:
+ query += '&token=%s' % self._token
+ return query + ''.join(f'&{name}={val}' for name, val in kwargs.items())
+
+ def _sign_query(self, path):
+ timestamp = int(time.time())
+ query = self._api_query(path, version=5)
+ sig = hmac.new(
+ self._APP_SECRET.encode('ascii'), f'{query}&t={timestamp}'.encode('ascii'), hashlib.sha1).hexdigest()
+ return timestamp, sig, self._API_URL_TEMPLATE % query
+
+ def _call_api(
+ self, path, video_id, note='Downloading JSON metadata', data=None, query=None, fatal=True):
+ if query is None:
+ timestamp, sig, url = self._sign_query(path)
+ else:
+ url = self._API_URL_TEMPLATE % self._api_query(path, version=4)
+ resp = self._download_json(
+ url, video_id, note, fatal=fatal, query=query,
+ data=json.dumps(data).encode('utf-8') if data else None,
+ headers=({'x-viki-app-ver': self._APP_VERSION} if data
+ else self._stream_headers(timestamp, sig) if query is None
+ else None), expected_status=400) or {}
+
+ self._raise_error(resp.get('error'), fatal)
+ return resp
+
+ def _raise_error(self, error, fatal=True):
+ if error is None:
+ return
+ msg = '%s said: %s' % (self.IE_NAME, error)
+ if fatal:
+ raise ExtractorError(msg, expected=True)
+ else:
+ self.report_warning(msg)
+
+ def _check_errors(self, data):
+ for reason, status in (data.get('blocking') or {}).items():
+ if status and reason in self._ERRORS:
+ message = self._ERRORS[reason]
+ if reason == 'geo':
+ self.raise_geo_restricted(msg=message)
+ elif reason == 'paywall':
+ if try_get(data, lambda x: x['paywallable']['tvod']):
+ self._raise_error('This video is for rent only or TVOD (Transactional Video On demand)')
+ self.raise_login_required(message)
+ self._raise_error(message)
+
+ def _perform_login(self, username, password):
+ self._token = self._call_api(
+ 'sessions.json', None, 'Logging in', fatal=False,
+ data={'username': username, 'password': password}).get('token')
+ if not self._token:
+ self.report_warning('Login Failed: Unable to get session token')
+
+ @staticmethod
+ def dict_selection(dict_obj, preferred_key):
+ if preferred_key in dict_obj:
+ return dict_obj[preferred_key]
+ return (list(filter(None, dict_obj.values())) or [None])[0]
+
+
+class VikiIE(VikiBaseIE):
+ IE_NAME = 'viki'
+ _VALID_URL = r'%s(?:videos|player)/(?P<id>[0-9]+v)' % VikiBaseIE._VALID_URL_BASE
+ _TESTS = [{
+ 'note': 'Free non-DRM video with storyboards in MPD',
+ 'url': 'https://www.viki.com/videos/1175236v-choosing-spouse-by-lottery-episode-1',
+ 'info_dict': {
+ 'id': '1175236v',
+ 'ext': 'mp4',
+ 'title': 'Choosing Spouse by Lottery - Episode 1',
+ 'timestamp': 1606463239,
+ 'age_limit': 13,
+ 'uploader': 'FCC',
+ 'upload_date': '20201127',
+ },
+ }, {
+ 'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14',
+ 'info_dict': {
+ 'id': '1023585v',
+ 'ext': 'mp4',
+ 'title': 'Heirs - Episode 14',
+ 'uploader': 'SBS Contents Hub',
+ 'timestamp': 1385047627,
+ 'upload_date': '20131121',
+ 'age_limit': 13,
+ 'duration': 3570,
+ 'episode_number': 14,
+ },
+ 'skip': 'Blocked in the US',
+ }, {
+ # clip
+ 'url': 'http://www.viki.com/videos/1067139v-the-avengers-age-of-ultron-press-conference',
+ 'md5': '86c0b5dbd4d83a6611a79987cc7a1989',
+ 'info_dict': {
+ 'id': '1067139v',
+ 'ext': 'mp4',
+ 'title': "'The Avengers: Age of Ultron' Press Conference",
+ 'description': 'md5:d70b2f9428f5488321bfe1db10d612ea',
+ 'duration': 352,
+ 'timestamp': 1430380829,
+ 'upload_date': '20150430',
+ 'uploader': 'Arirang TV',
+ 'like_count': int,
+ 'age_limit': 0,
+ },
+ 'skip': 'Sorry. There was an error loading this video',
+ }, {
+ 'url': 'http://www.viki.com/videos/1048879v-ankhon-dekhi',
+ 'info_dict': {
+ 'id': '1048879v',
+ 'ext': 'mp4',
+ 'title': 'Ankhon Dekhi',
+ 'duration': 6512,
+ 'timestamp': 1408532356,
+ 'upload_date': '20140820',
+ 'uploader': 'Spuul',
+ 'like_count': int,
+ 'age_limit': 13,
+ },
+ 'skip': 'Blocked in the US',
+ }, {
+ # episode
+ 'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1',
+ 'md5': '0a53dc252e6e690feccd756861495a8c',
+ 'info_dict': {
+ 'id': '44699v',
+ 'ext': 'mp4',
+ 'title': 'Boys Over Flowers - Episode 1',
+ 'description': 'md5:b89cf50038b480b88b5b3c93589a9076',
+ 'duration': 4172,
+ 'timestamp': 1270496524,
+ 'upload_date': '20100405',
+ 'uploader': 'group8',
+ 'like_count': int,
+ 'age_limit': 13,
+ 'episode_number': 1,
+ },
+ }, {
+ # youtube external
+ 'url': 'http://www.viki.com/videos/50562v-poor-nastya-complete-episode-1',
+ 'md5': '63f8600c1da6f01b7640eee7eca4f1da',
+ 'info_dict': {
+ 'id': '50562v',
+ 'ext': 'webm',
+ 'title': 'Poor Nastya [COMPLETE] - Episode 1',
+ 'description': '',
+ 'duration': 606,
+ 'timestamp': 1274949505,
+ 'upload_date': '20101213',
+ 'uploader': 'ad14065n',
+ 'uploader_id': 'ad14065n',
+ 'like_count': int,
+ 'age_limit': 13,
+ },
+ 'skip': 'Page not found!',
+ }, {
+ 'url': 'http://www.viki.com/player/44699v',
+ 'only_matching': True,
+ }, {
+ # non-English description
+ 'url': 'http://www.viki.com/videos/158036v-love-in-magic',
+ 'md5': '41faaba0de90483fb4848952af7c7d0d',
+ 'info_dict': {
+ 'id': '158036v',
+ 'ext': 'mp4',
+ 'uploader': 'I Planet Entertainment',
+ 'upload_date': '20111122',
+ 'timestamp': 1321985454,
+ 'description': 'md5:44b1e46619df3a072294645c770cef36',
+ 'title': 'Love In Magic',
+ 'age_limit': 13,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video = self._call_api(f'videos/{video_id}.json', video_id, 'Downloading video JSON', query={})
+ self._check_errors(video)
+
+ title = try_get(video, lambda x: x['titles']['en'], str)
+ episode_number = int_or_none(video.get('number'))
+ if not title:
+ title = 'Episode %d' % episode_number if video.get('type') == 'episode' else video.get('id') or video_id
+ container_titles = try_get(video, lambda x: x['container']['titles'], dict) or {}
+ container_title = self.dict_selection(container_titles, 'en')
+ title = '%s - %s' % (container_title, title)
+
+ thumbnails = [{
+ 'id': thumbnail_id,
+ 'url': thumbnail['url'],
+ } for thumbnail_id, thumbnail in (video.get('images') or {}).items() if thumbnail.get('url')]
+
+ resp = self._call_api(
+ 'playback_streams/%s.json?drms=dt3&device_id=%s' % (video_id, self._DEVICE_ID),
+ video_id, 'Downloading video streams JSON')['main'][0]
+
+ stream_id = try_get(resp, lambda x: x['properties']['track']['stream_id'])
+ subtitles = dict((lang, [{
+ 'ext': ext,
+ 'url': self._API_URL_TEMPLATE % self._api_query(
+ f'videos/{video_id}/auth_subtitles/{lang}.{ext}', stream_id=stream_id)
+ } for ext in ('srt', 'vtt')]) for lang in (video.get('subtitle_completions') or {}).keys())
+
+ mpd_url = resp['url']
+ # 720p is hidden in another MPD which can be found in the current manifest content
+ mpd_content = self._download_webpage(mpd_url, video_id, note='Downloading initial MPD manifest')
+ mpd_url = self._search_regex(
+ r'(?mi)<BaseURL>(http.+.mpd)', mpd_content, 'new manifest', default=mpd_url)
+ if 'mpdhd_high' not in mpd_url and 'sig=' not in mpd_url:
+ # Modify the URL to get 1080p
+ mpd_url = mpd_url.replace('mpdhd', 'mpdhd_high')
+ formats = self._extract_mpd_formats(mpd_url, video_id)
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': title,
+ 'description': self.dict_selection(video.get('descriptions', {}), 'en'),
+ 'duration': int_or_none(video.get('duration')),
+ 'timestamp': parse_iso8601(video.get('created_at')),
+ 'uploader': video.get('author'),
+ 'uploader_url': video.get('author_url'),
+ 'like_count': int_or_none(try_get(video, lambda x: x['likes']['count'])),
+ 'age_limit': parse_age_limit(video.get('rating')),
+ 'thumbnails': thumbnails,
+ 'subtitles': subtitles,
+ 'episode_number': episode_number,
+ }
+
+
+class VikiChannelIE(VikiBaseIE):
+ IE_NAME = 'viki:channel'
+ _VALID_URL = r'%s(?:tv|news|movies|artists)/(?P<id>[0-9]+c)' % VikiBaseIE._VALID_URL_BASE
+ _TESTS = [{
+ 'url': 'http://www.viki.com/tv/50c-boys-over-flowers',
+ 'info_dict': {
+ 'id': '50c',
+ 'title': 'Boys Over Flowers',
+ 'description': 'md5:804ce6e7837e1fd527ad2f25420f4d59',
+ },
+ 'playlist_mincount': 51,
+ }, {
+ 'url': 'http://www.viki.com/tv/1354c-poor-nastya-complete',
+ 'info_dict': {
+ 'id': '1354c',
+ 'title': 'Poor Nastya [COMPLETE]',
+ 'description': 'md5:05bf5471385aa8b21c18ad450e350525',
+ },
+ 'playlist_count': 127,
+ 'skip': 'Page not found',
+ }, {
+ 'url': 'http://www.viki.com/news/24569c-showbiz-korea',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.viki.com/movies/22047c-pride-and-prejudice-2005',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.viki.com/artists/2141c-shinee',
+ 'only_matching': True,
+ }]
+
+ _video_types = ('episodes', 'movies', 'clips', 'trailers')
+
+ def _entries(self, channel_id):
+ params = {
+ 'app': self._APP, 'token': self._token, 'only_ids': 'true',
+ 'direction': 'asc', 'sort': 'number', 'per_page': 30
+ }
+ video_types = self._configuration_arg('video_types') or self._video_types
+ for video_type in video_types:
+ if video_type not in self._video_types:
+ self.report_warning(f'Unknown video_type: {video_type}')
+ page_num = 0
+ while True:
+ page_num += 1
+ params['page'] = page_num
+ res = self._call_api(
+ f'containers/{channel_id}/{video_type}.json', channel_id, query=params, fatal=False,
+ note='Downloading %s JSON page %d' % (video_type.title(), page_num))
+
+ for video_id in res.get('response') or []:
+ yield self.url_result(f'https://www.viki.com/videos/{video_id}', VikiIE.ie_key(), video_id)
+ if not res.get('more'):
+ break
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+ channel = self._call_api('containers/%s.json' % channel_id, channel_id, 'Downloading channel JSON')
+ self._check_errors(channel)
+ return self.playlist_result(
+ self._entries(channel_id), channel_id,
+ self.dict_selection(channel['titles'], 'en'),
+ self.dict_selection(channel['descriptions'], 'en'))
diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py
new file mode 100644
index 0000000..91b9764
--- /dev/null
+++ b/yt_dlp/extractor/vimeo.py
@@ -0,0 +1,1455 @@
+import base64
+import functools
+import re
+import itertools
+
+from .common import InfoExtractor
+from ..compat import compat_str, compat_urlparse
+from ..networking import HEADRequest, Request
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ clean_html,
+ determine_ext,
+ ExtractorError,
+ get_element_by_class,
+ js_to_json,
+ int_or_none,
+ merge_dicts,
+ OnDemandPagedList,
+ parse_filesize,
+ parse_iso8601,
+ parse_qs,
+ smuggle_url,
+ str_or_none,
+ traverse_obj,
+ try_get,
+ unified_timestamp,
+ unsmuggle_url,
+ urlencode_postdata,
+ urljoin,
+ urlhandle_detect_ext,
+)
+
+
+class VimeoBaseInfoExtractor(InfoExtractor):
+ _NETRC_MACHINE = 'vimeo'
+ _LOGIN_REQUIRED = False
+ _LOGIN_URL = 'https://vimeo.com/log_in'
+
+ @staticmethod
+ def _smuggle_referrer(url, referrer_url):
+ return smuggle_url(url, {'referer': referrer_url})
+
+ def _unsmuggle_headers(self, url):
+ """@returns (url, smuggled_data, headers)"""
+ url, data = unsmuggle_url(url, {})
+ headers = self.get_param('http_headers').copy()
+ if 'referer' in data:
+ headers['Referer'] = data['referer']
+ return url, data, headers
+
+ def _perform_login(self, username, password):
+ viewer = self._download_json('https://vimeo.com/_next/viewer', None, 'Downloading login token')
+ data = {
+ 'action': 'login',
+ 'email': username,
+ 'password': password,
+ 'service': 'vimeo',
+ 'token': viewer['xsrft'],
+ }
+ self._set_vimeo_cookie('vuid', viewer['vuid'])
+ try:
+ self._download_webpage(
+ self._LOGIN_URL, None, 'Logging in',
+ data=urlencode_postdata(data), headers={
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ 'Referer': self._LOGIN_URL,
+ })
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 418:
+ raise ExtractorError(
+ 'Unable to log in: bad username or password',
+ expected=True)
+ raise ExtractorError('Unable to log in')
+
+ def _real_initialize(self):
+ if self._LOGIN_REQUIRED and not self._get_cookies('https://vimeo.com').get('vuid'):
+ self._raise_login_required()
+
+ def _get_video_password(self):
+ password = self.get_param('videopassword')
+ if password is None:
+ raise ExtractorError(
+ 'This video is protected by a password, use the --video-password option',
+ expected=True)
+ return password
+
+ def _verify_video_password(self, url, video_id, password, token, vuid):
+ if url.startswith('http://'):
+ # vimeo only supports https now, but the user can give an http url
+ url = url.replace('http://', 'https://')
+ self._set_vimeo_cookie('vuid', vuid)
+ return self._download_webpage(
+ url + '/password', video_id, 'Verifying the password',
+ 'Wrong password', data=urlencode_postdata({
+ 'password': password,
+ 'token': token,
+ }), headers={
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ 'Referer': url,
+ })
+
+ def _extract_xsrft_and_vuid(self, webpage):
+ xsrft = self._search_regex(
+ r'(?:(?P<q1>["\'])xsrft(?P=q1)\s*:|xsrft\s*[=:])\s*(?P<q>["\'])(?P<xsrft>.+?)(?P=q)',
+ webpage, 'login token', group='xsrft')
+ vuid = self._search_regex(
+ r'["\']vuid["\']\s*:\s*(["\'])(?P<vuid>.+?)\1',
+ webpage, 'vuid', group='vuid')
+ return xsrft, vuid
+
+ def _extract_vimeo_config(self, webpage, video_id, *args, **kwargs):
+ vimeo_config = self._search_regex(
+ r'vimeo\.config\s*=\s*(?:({.+?})|_extend\([^,]+,\s+({.+?})\));',
+ webpage, 'vimeo config', *args, **kwargs)
+ if vimeo_config:
+ return self._parse_json(vimeo_config, video_id)
+
+ def _set_vimeo_cookie(self, name, value):
+ self._set_cookie('vimeo.com', name, value)
+
+ def _parse_config(self, config, video_id):
+ video_data = config['video']
+ video_title = video_data.get('title')
+ live_event = video_data.get('live_event') or {}
+ live_status = {
+ 'pending': 'is_upcoming',
+ 'active': 'is_upcoming',
+ 'started': 'is_live',
+ 'ended': 'post_live',
+ }.get(live_event.get('status'))
+ is_live = live_status == 'is_live'
+ request = config.get('request') or {}
+
+ formats = []
+ subtitles = {}
+
+ config_files = video_data.get('files') or request.get('files') or {}
+ for f in (config_files.get('progressive') or []):
+ video_url = f.get('url')
+ if not video_url:
+ continue
+ formats.append({
+ 'url': video_url,
+ 'format_id': 'http-%s' % f.get('quality'),
+ 'source_preference': 10,
+ 'width': int_or_none(f.get('width')),
+ 'height': int_or_none(f.get('height')),
+ 'fps': int_or_none(f.get('fps')),
+ 'tbr': int_or_none(f.get('bitrate')),
+ })
+
+ # TODO: fix handling of 308 status code returned for live archive manifest requests
+ sep_pattern = r'/sep/video/'
+ for files_type in ('hls', 'dash'):
+ for cdn_name, cdn_data in (try_get(config_files, lambda x: x[files_type]['cdns']) or {}).items():
+ manifest_url = cdn_data.get('url')
+ if not manifest_url:
+ continue
+ format_id = '%s-%s' % (files_type, cdn_name)
+ sep_manifest_urls = []
+ if re.search(sep_pattern, manifest_url):
+ for suffix, repl in (('', 'video'), ('_sep', 'sep/video')):
+ sep_manifest_urls.append((format_id + suffix, re.sub(
+ sep_pattern, '/%s/' % repl, manifest_url)))
+ else:
+ sep_manifest_urls = [(format_id, manifest_url)]
+ for f_id, m_url in sep_manifest_urls:
+ if files_type == 'hls':
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ m_url, video_id, 'mp4', live=is_live, m3u8_id=f_id,
+ note='Downloading %s m3u8 information' % cdn_name,
+ fatal=False)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+ elif files_type == 'dash':
+ if 'json=1' in m_url:
+ real_m_url = (self._download_json(m_url, video_id, fatal=False) or {}).get('url')
+ if real_m_url:
+ m_url = real_m_url
+ fmts, subs = self._extract_mpd_formats_and_subtitles(
+ m_url.replace('/master.json', '/master.mpd'), video_id, f_id,
+ 'Downloading %s MPD information' % cdn_name,
+ fatal=False)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+
+ live_archive = live_event.get('archive') or {}
+ live_archive_source_url = live_archive.get('source_url')
+ if live_archive_source_url and live_archive.get('status') == 'done':
+ formats.append({
+ 'format_id': 'live-archive-source',
+ 'url': live_archive_source_url,
+ 'quality': 10,
+ })
+
+ for tt in (request.get('text_tracks') or []):
+ subtitles.setdefault(tt['lang'], []).append({
+ 'ext': 'vtt',
+ 'url': urljoin('https://vimeo.com', tt['url']),
+ })
+
+ thumbnails = []
+ if not is_live:
+ for key, thumb in (video_data.get('thumbs') or {}).items():
+ thumbnails.append({
+ 'id': key,
+ 'width': int_or_none(key),
+ 'url': thumb,
+ })
+ thumbnail = video_data.get('thumbnail')
+ if thumbnail:
+ thumbnails.append({
+ 'url': thumbnail,
+ })
+
+ owner = video_data.get('owner') or {}
+ video_uploader_url = owner.get('url')
+
+ duration = int_or_none(video_data.get('duration'))
+ chapter_data = try_get(config, lambda x: x['embed']['chapters']) or []
+ chapters = [{
+ 'title': current_chapter.get('title'),
+ 'start_time': current_chapter.get('timecode'),
+ 'end_time': next_chapter.get('timecode'),
+ } for current_chapter, next_chapter in zip(chapter_data, chapter_data[1:] + [{'timecode': duration}])]
+ if chapters and chapters[0]['start_time']: # Chapters may not start from 0
+ chapters[:0] = [{'title': '<Untitled>', 'start_time': 0, 'end_time': chapters[0]['start_time']}]
+
+ return {
+ 'id': str_or_none(video_data.get('id')) or video_id,
+ 'title': video_title,
+ 'uploader': owner.get('name'),
+ 'uploader_id': video_uploader_url.split('/')[-1] if video_uploader_url else None,
+ 'uploader_url': video_uploader_url,
+ 'thumbnails': thumbnails,
+ 'duration': duration,
+ 'chapters': chapters or None,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'live_status': live_status,
+ 'release_timestamp': traverse_obj(live_event, ('ingest', 'scheduled_start_time', {parse_iso8601})),
+ # Note: Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps
+ # at the same time without actual units specified.
+ '_format_sort_fields': ('quality', 'res', 'fps', 'hdr:12', 'source'),
+ }
+
+ def _extract_original_format(self, url, video_id, unlisted_hash=None):
+ query = {'action': 'load_download_config'}
+ if unlisted_hash:
+ query['unlisted_hash'] = unlisted_hash
+ download_data = self._download_json(
+ url, video_id, fatal=False, query=query,
+ headers={'X-Requested-With': 'XMLHttpRequest'},
+ expected_status=(403, 404)) or {}
+ source_file = download_data.get('source_file')
+ download_url = try_get(source_file, lambda x: x['download_url'])
+ if download_url and not source_file.get('is_cold') and not source_file.get('is_defrosting'):
+ source_name = source_file.get('public_name', 'Original')
+ if self._is_valid_url(download_url, video_id, '%s video' % source_name):
+ ext = (try_get(
+ source_file, lambda x: x['extension'],
+ compat_str) or determine_ext(
+ download_url, None) or 'mp4').lower()
+ return {
+ 'url': download_url,
+ 'ext': ext,
+ 'width': int_or_none(source_file.get('width')),
+ 'height': int_or_none(source_file.get('height')),
+ 'filesize': parse_filesize(source_file.get('size')),
+ 'format_id': source_name,
+ 'quality': 1,
+ }
+
+ jwt_response = self._download_json(
+ 'https://vimeo.com/_rv/viewer', video_id, note='Downloading jwt token', fatal=False) or {}
+ if not jwt_response.get('jwt'):
+ return
+ headers = {'Authorization': 'jwt %s' % jwt_response['jwt'], 'Accept': 'application/json'}
+ original_response = self._download_json(
+ f'https://api.vimeo.com/videos/{video_id}', video_id,
+ headers=headers, fatal=False, expected_status=(403, 404)) or {}
+ for download_data in original_response.get('download') or []:
+ download_url = download_data.get('link')
+ if not download_url or download_data.get('quality') != 'source':
+ continue
+ ext = determine_ext(parse_qs(download_url).get('filename', [''])[0].lower(), default_ext=None)
+ if not ext:
+ urlh = self._request_webpage(
+ HEADRequest(download_url), video_id, fatal=False, note='Determining source extension')
+ ext = urlh and urlhandle_detect_ext(urlh)
+ return {
+ 'url': download_url,
+ 'ext': ext or 'unknown_video',
+ 'format_id': download_data.get('public_name', 'Original'),
+ 'width': int_or_none(download_data.get('width')),
+ 'height': int_or_none(download_data.get('height')),
+ 'fps': int_or_none(download_data.get('fps')),
+ 'filesize': int_or_none(download_data.get('size')),
+ 'quality': 1,
+ }
+
+
+class VimeoIE(VimeoBaseInfoExtractor):
+ """Information extractor for vimeo.com."""
+
+ # _VALID_URL matches Vimeo URLs
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:
+ www|
+ player
+ )
+ \.
+ )?
+ vimeo\.com/
+ (?:
+ (?P<u>user)|
+ (?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/)
+ (?:.*?/)??
+ (?P<q>
+ (?:
+ play_redirect_hls|
+ moogaloop\.swf)\?clip_id=
+ )?
+ (?:videos?/)?
+ )
+ (?P<id>[0-9]+)
+ (?(u)
+ /(?!videos|likes)[^/?#]+/?|
+ (?(q)|/(?P<unlisted_hash>[\da-f]{10}))?
+ )
+ (?:(?(q)[&]|(?(u)|/?)[?]).*?)?(?:[#].*)?$
+ '''
+ IE_NAME = 'vimeo'
+ _EMBED_REGEX = [
+ # iframe
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/\d+.*?)\1',
+ # Embedded (swf embed) Vimeo player
+ r'<embed[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)\1',
+ # Non-standard embedded Vimeo player
+ r'<video[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vimeo\.com/[0-9]+)\1',
+ ]
+ _TESTS = [
+ {
+ 'url': 'http://vimeo.com/56015672#at=0',
+ 'md5': '8879b6cc097e987f02484baf890129e5',
+ 'info_dict': {
+ 'id': '56015672',
+ 'ext': 'mp4',
+ 'title': "youtube-dl test video '' ä↭𝕐-BaW jenozKc",
+ 'description': 'md5:2d3305bad981a06ff79f027f19865021',
+ 'timestamp': 1355990239,
+ 'upload_date': '20121220',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user7108434',
+ 'uploader_id': 'user7108434',
+ 'uploader': 'Filippo Valsorda',
+ 'duration': 10,
+ 'license': 'by-sa',
+ },
+ 'params': {
+ 'format': 'best[protocol=https]',
+ },
+ 'skip': 'No longer available'
+ },
+ {
+ 'url': 'http://player.vimeo.com/video/54469442',
+ 'md5': '619b811a4417aa4abe78dc653becf511',
+ 'note': 'Videos that embed the url in the player page',
+ 'info_dict': {
+ 'id': '54469442',
+ 'ext': 'mp4',
+ 'title': 'Kathy Sierra: Building the minimum Badass User, Business of Software 2012',
+ 'uploader': 'Business of Software',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/businessofsoftware',
+ 'uploader_id': 'businessofsoftware',
+ 'duration': 3610,
+ 'thumbnail': 'https://i.vimeocdn.com/video/376682406-f34043e7b766af6bef2af81366eacd6724f3fc3173179a11a97a1e26587c9529-d_1280',
+ },
+ 'params': {
+ 'format': 'best[protocol=https]',
+ },
+ },
+ {
+ 'url': 'http://vimeo.com/68375962',
+ 'md5': 'aaf896bdb7ddd6476df50007a0ac0ae7',
+ 'note': 'Video protected with password',
+ 'info_dict': {
+ 'id': '68375962',
+ 'ext': 'mp4',
+ 'title': 'youtube-dl password protected test video',
+ 'timestamp': 1371200155,
+ 'upload_date': '20130614',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user18948128',
+ 'uploader_id': 'user18948128',
+ 'uploader': 'Jaime Marquínez Ferrándiz',
+ 'duration': 10,
+ 'description': 'md5:6173f270cd0c0119f22817204b3eb86c',
+ 'thumbnail': 'https://i.vimeocdn.com/video/440665496-b2c5aee2b61089442c794f64113a8e8f7d5763c3e6b3ebfaf696ae6413f8b1f4-d_1280',
+ 'view_count': int,
+ 'comment_count': int,
+ 'like_count': int,
+ },
+ 'params': {
+ 'format': 'best[protocol=https]',
+ 'videopassword': 'youtube-dl',
+ },
+ },
+ {
+ 'url': 'http://vimeo.com/channels/keypeele/75629013',
+ 'md5': '2f86a05afe9d7abc0b9126d229bbe15d',
+ 'info_dict': {
+ 'id': '75629013',
+ 'ext': 'mp4',
+ 'title': 'Key & Peele: Terrorist Interrogation',
+ 'description': 'md5:6173f270cd0c0119f22817204b3eb86c',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/atencio',
+ 'uploader_id': 'atencio',
+ 'uploader': 'Peter Atencio',
+ 'channel_id': 'keypeele',
+ 'channel_url': r're:https?://(?:www\.)?vimeo\.com/channels/keypeele',
+ 'timestamp': 1380339469,
+ 'upload_date': '20130928',
+ 'duration': 187,
+ 'thumbnail': 'https://i.vimeocdn.com/video/450239872-a05512d9b1e55d707a7c04365c10980f327b06d966351bc403a5d5d65c95e572-d_1280',
+ 'view_count': int,
+ 'comment_count': int,
+ 'like_count': int,
+ },
+ 'params': {'format': 'http-1080p'},
+ },
+ {
+ 'url': 'http://vimeo.com/76979871',
+ 'note': 'Video with subtitles',
+ 'info_dict': {
+ 'id': '76979871',
+ 'ext': 'mov',
+ 'title': 'The New Vimeo Player (You Know, For Videos)',
+ 'description': 'md5:2ec900bf97c3f389378a96aee11260ea',
+ 'timestamp': 1381846109,
+ 'upload_date': '20131015',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/staff',
+ 'uploader_id': 'staff',
+ 'uploader': 'Vimeo Staff',
+ 'duration': 62,
+ 'subtitles': {
+ 'de': [{'ext': 'vtt'}],
+ 'en': [{'ext': 'vtt'}],
+ 'es': [{'ext': 'vtt'}],
+ 'fr': [{'ext': 'vtt'}],
+ },
+ },
+ 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
+ },
+ {
+ # from https://www.ouya.tv/game/Pier-Solar-and-the-Great-Architects/
+ 'url': 'https://player.vimeo.com/video/98044508',
+ 'note': 'The js code contains assignments to the same variable as the config',
+ 'info_dict': {
+ 'id': '98044508',
+ 'ext': 'mp4',
+ 'title': 'Pier Solar OUYA Official Trailer',
+ 'uploader': 'Tulio Gonçalves',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user28849593',
+ 'uploader_id': 'user28849593',
+ 'duration': 118,
+ 'thumbnail': 'https://i.vimeocdn.com/video/478636036-c18440305ef3df9decfb6bf207a61fe39d2d17fa462a96f6f2d93d30492b037d-d_1280',
+ },
+ },
+ {
+ # contains original format
+ 'url': 'https://vimeo.com/33951933',
+ 'md5': '53c688fa95a55bf4b7293d37a89c5c53',
+ 'info_dict': {
+ 'id': '33951933',
+ 'ext': 'mp4',
+ 'title': 'FOX CLASSICS - Forever Classic ID - A Full Minute',
+ 'uploader': 'The DMCI',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/dmci',
+ 'uploader_id': 'dmci',
+ 'timestamp': 1324343742,
+ 'upload_date': '20111220',
+ 'description': 'md5:ae23671e82d05415868f7ad1aec21147',
+ 'duration': 60,
+ 'comment_count': int,
+ 'view_count': int,
+ 'thumbnail': 'https://i.vimeocdn.com/video/231174622-dd07f015e9221ff529d451e1cc31c982b5d87bfafa48c4189b1da72824ee289a-d_1280',
+ 'like_count': int,
+ },
+ },
+ {
+ 'note': 'Contains original format not accessible in webpage',
+ 'url': 'https://vimeo.com/393756517',
+ 'md5': 'c464af248b592190a5ffbb5d33f382b0',
+ 'info_dict': {
+ 'id': '393756517',
+ 'ext': 'mov',
+ 'timestamp': 1582642091,
+ 'uploader_id': 'frameworkla',
+ 'title': 'Straight To Hell - Sabrina: Netflix',
+ 'uploader': 'Framework Studio',
+ 'description': 'md5:f2edc61af3ea7a5592681ddbb683db73',
+ 'upload_date': '20200225',
+ 'duration': 176,
+ 'thumbnail': 'https://i.vimeocdn.com/video/859377297-836494a4ef775e9d4edbace83937d9ad34dc846c688c0c419c0e87f7ab06c4b3-d_1280',
+ 'uploader_url': 'https://vimeo.com/frameworkla',
+ },
+ },
+ {
+ # only available via https://vimeo.com/channels/tributes/6213729 and
+ # not via https://vimeo.com/6213729
+ 'url': 'https://vimeo.com/channels/tributes/6213729',
+ 'info_dict': {
+ 'id': '6213729',
+ 'ext': 'mp4',
+ 'title': 'Vimeo Tribute: The Shining',
+ 'uploader': 'Casey Donahue',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/caseydonahue',
+ 'uploader_id': 'caseydonahue',
+ 'channel_url': r're:https?://(?:www\.)?vimeo\.com/channels/tributes',
+ 'channel_id': 'tributes',
+ 'timestamp': 1250886430,
+ 'upload_date': '20090821',
+ 'description': 'md5:bdbf314014e58713e6e5b66eb252f4a6',
+ 'duration': 321,
+ 'comment_count': int,
+ 'view_count': int,
+ 'thumbnail': 'https://i.vimeocdn.com/video/22728298-bfc22146f930de7cf497821c7b0b9f168099201ecca39b00b6bd31fcedfca7a6-d_1280',
+ 'like_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # redirects to ondemand extractor and should be passed through it
+ # for successful extraction
+ 'url': 'https://vimeo.com/73445910',
+ 'info_dict': {
+ 'id': '73445910',
+ 'ext': 'mp4',
+ 'title': 'The Reluctant Revolutionary',
+ 'uploader': '10Ft Films',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/tenfootfilms',
+ 'uploader_id': 'tenfootfilms',
+ 'description': 'md5:0fa704e05b04f91f40b7f3ca2e801384',
+ 'upload_date': '20130830',
+ 'timestamp': 1377853339,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'this page is no longer available.',
+ },
+ {
+ 'url': 'http://player.vimeo.com/video/68375962',
+ 'md5': 'aaf896bdb7ddd6476df50007a0ac0ae7',
+ 'info_dict': {
+ 'id': '68375962',
+ 'ext': 'mp4',
+ 'title': 'youtube-dl password protected test video',
+ 'timestamp': 1371200155,
+ 'upload_date': '20130614',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user18948128',
+ 'uploader_id': 'user18948128',
+ 'uploader': 'Jaime Marquínez Ferrándiz',
+ 'duration': 10,
+ 'description': 'md5:6173f270cd0c0119f22817204b3eb86c',
+ 'thumbnail': 'https://i.vimeocdn.com/video/440665496-b2c5aee2b61089442c794f64113a8e8f7d5763c3e6b3ebfaf696ae6413f8b1f4-d_1280',
+ 'view_count': int,
+ 'comment_count': int,
+ 'like_count': int,
+ },
+ 'params': {
+ 'format': 'best[protocol=https]',
+ 'videopassword': 'youtube-dl',
+ },
+ },
+ {
+ 'url': 'http://vimeo.com/moogaloop.swf?clip_id=2539741',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://vimeo.com/109815029',
+ 'note': 'Video not completely processed, "failed" seed status',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://vimeo.com/groups/travelhd/videos/22439234',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://vimeo.com/album/2632481/video/79010983',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://vimeo.com/showcase/3253534/video/119195465',
+ 'note': 'A video in a password protected album (showcase)',
+ 'info_dict': {
+ 'id': '119195465',
+ 'ext': 'mp4',
+ 'title': "youtube-dl test video '' ä↭𝕐-BaW jenozKc",
+ 'uploader': 'Philipp Hagemeister',
+ 'uploader_id': 'user20132939',
+ 'description': 'md5:fa7b6c6d8db0bdc353893df2f111855b',
+ 'upload_date': '20150209',
+ 'timestamp': 1423518307,
+ 'thumbnail': 'https://i.vimeocdn.com/video/default_1280',
+ 'duration': 10,
+ 'like_count': int,
+ 'uploader_url': 'https://vimeo.com/user20132939',
+ 'view_count': int,
+ 'comment_count': int,
+ },
+ 'params': {
+ 'format': 'best[protocol=https]',
+ 'videopassword': 'youtube-dl',
+ },
+ },
+ {
+ # source file returns 403: Forbidden
+ 'url': 'https://vimeo.com/7809605',
+ 'only_matching': True,
+ },
+ {
+ 'note': 'Direct URL with hash',
+ 'url': 'https://vimeo.com/160743502/abd0e13fb4',
+ 'info_dict': {
+ 'id': '160743502',
+ 'ext': 'mp4',
+ 'uploader': 'Julian Tryba',
+ 'uploader_id': 'aliniamedia',
+ 'title': 'Harrisville New Hampshire',
+ 'timestamp': 1459259666,
+ 'upload_date': '20160329',
+ 'release_timestamp': 1459259666,
+ 'license': 'by-nc',
+ 'duration': 159,
+ 'comment_count': int,
+ 'thumbnail': 'https://i.vimeocdn.com/video/562802436-585eeb13b5020c6ac0f171a2234067938098f84737787df05ff0d767f6d54ee9-d_1280',
+ 'like_count': int,
+ 'uploader_url': 'https://vimeo.com/aliniamedia',
+ 'release_date': '20160329',
+ },
+ 'params': {'skip_download': True},
+ },
+ {
+ 'url': 'https://vimeo.com/138909882',
+ 'info_dict': {
+ 'id': '138909882',
+ 'ext': 'mp4',
+ 'title': 'Eastnor Castle 2015 Firework Champions - The Promo!',
+ 'description': 'md5:5967e090768a831488f6e74b7821b3c1',
+ 'uploader_id': 'fireworkchampions',
+ 'uploader': 'Firework Champions',
+ 'upload_date': '20150910',
+ 'timestamp': 1441901895,
+ },
+ 'params': {
+ 'skip_download': True,
+ 'format': 'Original',
+ },
+ },
+ {
+ 'url': 'https://vimeo.com/channels/staffpicks/143603739',
+ 'info_dict': {
+ 'id': '143603739',
+ 'ext': 'mp4',
+ 'uploader': 'Karim Huu Do',
+ 'timestamp': 1445846953,
+ 'upload_date': '20151026',
+ 'title': 'The Shoes - Submarine Feat. Blaine Harrison',
+ 'uploader_id': 'karimhd',
+ 'description': 'md5:8e2eea76de4504c2e8020a9bcfa1e843',
+ 'channel_id': 'staffpicks',
+ 'duration': 336,
+ 'comment_count': int,
+ 'view_count': int,
+ 'thumbnail': 'https://i.vimeocdn.com/video/541243181-b593db36a16db2f0096f655da3f5a4dc46b8766d77b0f440df937ecb0c418347-d_1280',
+ 'like_count': int,
+ 'uploader_url': 'https://vimeo.com/karimhd',
+ 'channel_url': 'https://vimeo.com/channels/staffpicks',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ },
+ {
+ # requires passing unlisted_hash(a52724358e) to load_download_config request
+ 'url': 'https://vimeo.com/392479337/a52724358e',
+ 'only_matching': True,
+ },
+ {
+ # similar, but all numeric: ID must be 581039021, not 9603038895
+ # issue #29690
+ 'url': 'https://vimeo.com/581039021/9603038895',
+ 'info_dict': {
+ 'id': '581039021',
+ 'ext': 'mp4',
+ 'timestamp': 1627621014,
+ 'release_timestamp': 1627621014,
+ 'duration': 976,
+ 'comment_count': int,
+ 'thumbnail': 'https://i.vimeocdn.com/video/1202249320-4ddb2c30398c0dc0ee059172d1bd5ea481ad12f0e0e3ad01d2266f56c744b015-d_1280',
+ 'like_count': int,
+ 'uploader_url': 'https://vimeo.com/txwestcapital',
+ 'release_date': '20210730',
+ 'uploader': 'Christopher Inks',
+ 'title': 'Thursday, July 29, 2021 BMA Evening Video Update',
+ 'uploader_id': 'txwestcapital',
+ 'upload_date': '20210730',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # user playlist alias -> https://vimeo.com/258705797
+ 'url': 'https://vimeo.com/user26785108/newspiritualguide',
+ 'only_matching': True,
+ },
+ # https://gettingthingsdone.com/workflowmap/
+ # vimeo embed with check-password page protected by Referer header
+ ]
+
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ for embed_url in super()._extract_embed_urls(url, webpage):
+ yield cls._smuggle_referrer(embed_url, url)
+
+ @classmethod
+ def _extract_url(cls, url, webpage):
+ return next(cls._extract_embed_urls(url, webpage), None)
+
+ def _verify_player_video_password(self, url, video_id, headers):
+ password = self._get_video_password()
+ data = urlencode_postdata({
+ 'password': base64.b64encode(password.encode()),
+ })
+ headers = merge_dicts(headers, {
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ })
+ checked = self._download_json(
+ f'{compat_urlparse.urlsplit(url)._replace(query=None).geturl()}/check-password',
+ video_id, 'Verifying the password', data=data, headers=headers)
+ if checked is False:
+ raise ExtractorError('Wrong video password', expected=True)
+ return checked
+
+ def _extract_from_api(self, video_id, unlisted_hash=None):
+ token = self._download_json(
+ 'https://vimeo.com/_rv/jwt', video_id, headers={
+ 'X-Requested-With': 'XMLHttpRequest'
+ })['token']
+ api_url = 'https://api.vimeo.com/videos/' + video_id
+ if unlisted_hash:
+ api_url += ':' + unlisted_hash
+ video = self._download_json(
+ api_url, video_id, headers={
+ 'Authorization': 'jwt ' + token,
+ 'Accept': 'application/json',
+ }, query={
+ 'fields': 'config_url,created_time,description,license,metadata.connections.comments.total,metadata.connections.likes.total,release_time,stats.plays',
+ })
+ info = self._parse_config(self._download_json(
+ video['config_url'], video_id), video_id)
+ get_timestamp = lambda x: parse_iso8601(video.get(x + '_time'))
+ info.update({
+ 'description': video.get('description'),
+ 'license': video.get('license'),
+ 'release_timestamp': get_timestamp('release'),
+ 'timestamp': get_timestamp('created'),
+ 'view_count': int_or_none(try_get(video, lambda x: x['stats']['plays'])),
+ })
+ connections = try_get(
+ video, lambda x: x['metadata']['connections'], dict) or {}
+ for k in ('comment', 'like'):
+ info[k + '_count'] = int_or_none(try_get(connections, lambda x: x[k + 's']['total']))
+ return info
+
+ def _try_album_password(self, url):
+ album_id = self._search_regex(
+ r'vimeo\.com/(?:album|showcase)/([^/]+)', url, 'album id', default=None)
+ if not album_id:
+ return
+ viewer = self._download_json(
+ 'https://vimeo.com/_rv/viewer', album_id, fatal=False)
+ if not viewer:
+ webpage = self._download_webpage(url, album_id)
+ viewer = self._parse_json(self._search_regex(
+ r'bootstrap_data\s*=\s*({.+?})</script>',
+ webpage, 'bootstrap data'), album_id)['viewer']
+ jwt = viewer['jwt']
+ album = self._download_json(
+ 'https://api.vimeo.com/albums/' + album_id,
+ album_id, headers={'Authorization': 'jwt ' + jwt, 'Accept': 'application/json'},
+ query={'fields': 'description,name,privacy'})
+ if try_get(album, lambda x: x['privacy']['view']) == 'password':
+ password = self.get_param('videopassword')
+ if not password:
+ raise ExtractorError(
+ 'This album is protected by a password, use the --video-password option',
+ expected=True)
+ self._set_vimeo_cookie('vuid', viewer['vuid'])
+ try:
+ self._download_json(
+ 'https://vimeo.com/showcase/%s/auth' % album_id,
+ album_id, 'Verifying the password', data=urlencode_postdata({
+ 'password': password,
+ 'token': viewer['xsrft'],
+ }), headers={
+ 'X-Requested-With': 'XMLHttpRequest',
+ })
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 401:
+ raise ExtractorError('Wrong password', expected=True)
+ raise
+
+ def _real_extract(self, url):
+ url, data, headers = self._unsmuggle_headers(url)
+ if 'Referer' not in headers:
+ headers['Referer'] = url
+
+ # Extract ID from URL
+ mobj = self._match_valid_url(url).groupdict()
+ video_id, unlisted_hash = mobj['id'], mobj.get('unlisted_hash')
+ if unlisted_hash:
+ return self._extract_from_api(video_id, unlisted_hash)
+
+ if any(p in url for p in ('play_redirect_hls', 'moogaloop.swf')):
+ url = 'https://vimeo.com/' + video_id
+
+ self._try_album_password(url)
+ try:
+ # Retrieve video webpage to extract further information
+ webpage, urlh = self._download_webpage_handle(
+ url, video_id, headers=headers)
+ redirect_url = urlh.url
+ except ExtractorError as ee:
+ if isinstance(ee.cause, HTTPError) and ee.cause.status == 403:
+ errmsg = ee.cause.response.read()
+ if b'Because of its privacy settings, this video cannot be played here' in errmsg:
+ raise ExtractorError(
+ 'Cannot download embed-only video without embedding '
+ 'URL. Please call yt-dlp with the URL of the page '
+ 'that embeds this video.',
+ expected=True)
+ raise
+
+ if '://player.vimeo.com/video/' in url:
+ config = self._search_json(
+ r'\b(?:playerC|c)onfig\s*=', webpage, 'info section', video_id)
+ if config.get('view') == 4:
+ config = self._verify_player_video_password(
+ redirect_url, video_id, headers)
+ return self._parse_config(config, video_id)
+
+ if re.search(r'<form[^>]+?id="pw_form"', webpage):
+ video_password = self._get_video_password()
+ token, vuid = self._extract_xsrft_and_vuid(webpage)
+ webpage = self._verify_video_password(
+ redirect_url, video_id, video_password, token, vuid)
+
+ vimeo_config = self._extract_vimeo_config(webpage, video_id, default=None)
+ if vimeo_config:
+ seed_status = vimeo_config.get('seed_status') or {}
+ if seed_status.get('state') == 'failed':
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, seed_status['title']),
+ expected=True)
+
+ cc_license = None
+ timestamp = None
+ video_description = None
+ info_dict = {}
+ config_url = None
+
+ channel_id = self._search_regex(
+ r'vimeo\.com/channels/([^/]+)', url, 'channel id', default=None)
+ if channel_id:
+ config_url = self._html_search_regex(
+ r'\bdata-config-url="([^"]+)"', webpage, 'config URL', default=None)
+ video_description = clean_html(get_element_by_class('description', webpage))
+ info_dict.update({
+ 'channel_id': channel_id,
+ 'channel_url': 'https://vimeo.com/channels/' + channel_id,
+ })
+ if not config_url:
+ page_config = self._parse_json(self._search_regex(
+ r'vimeo\.(?:clip|vod_title)_page_config\s*=\s*({.+?});',
+ webpage, 'page config', default='{}'), video_id, fatal=False)
+ if not page_config:
+ return self._extract_from_api(video_id)
+ config_url = page_config['player']['config_url']
+ cc_license = page_config.get('cc_license')
+ clip = page_config.get('clip') or {}
+ timestamp = clip.get('uploaded_on')
+ video_description = clean_html(
+ clip.get('description') or page_config.get('description_html_escaped'))
+ config = self._download_json(config_url, video_id)
+ video = config.get('video') or {}
+ vod = video.get('vod') or {}
+
+ def is_rented():
+ if '>You rented this title.<' in webpage:
+ return True
+ if try_get(config, lambda x: x['user']['purchased']):
+ return True
+ for purchase_option in (vod.get('purchase_options') or []):
+ if purchase_option.get('purchased'):
+ return True
+ label = purchase_option.get('label_string')
+ if label and (label.startswith('You rented this') or label.endswith(' remaining')):
+ return True
+ return False
+
+ if is_rented() and vod.get('is_trailer'):
+ feature_id = vod.get('feature_id')
+ if feature_id and not data.get('force_feature_id', False):
+ return self.url_result(smuggle_url(
+ 'https://player.vimeo.com/player/%s' % feature_id,
+ {'force_feature_id': True}), 'Vimeo')
+
+ if not video_description:
+ video_description = self._html_search_regex(
+ r'(?s)<div\s+class="[^"]*description[^"]*"[^>]*>(.*?)</div>',
+ webpage, 'description', default=None)
+ if not video_description:
+ video_description = self._html_search_meta(
+ ['description', 'og:description', 'twitter:description'],
+ webpage, default=None)
+ if not video_description:
+ self.report_warning('Cannot find video description')
+
+ if not timestamp:
+ timestamp = self._search_regex(
+ r'<time[^>]+datetime="([^"]+)"', webpage,
+ 'timestamp', default=None)
+
+ view_count = int_or_none(self._search_regex(r'UserPlays:(\d+)', webpage, 'view count', default=None))
+ like_count = int_or_none(self._search_regex(r'UserLikes:(\d+)', webpage, 'like count', default=None))
+ comment_count = int_or_none(self._search_regex(r'UserComments:(\d+)', webpage, 'comment count', default=None))
+
+ formats = []
+
+ source_format = self._extract_original_format(
+ 'https://vimeo.com/' + video_id, video_id, video.get('unlisted_hash'))
+ if source_format:
+ formats.append(source_format)
+
+ info_dict_config = self._parse_config(config, video_id)
+ formats.extend(info_dict_config['formats'])
+ info_dict['_format_sort_fields'] = info_dict_config['_format_sort_fields']
+
+ json_ld = self._search_json_ld(webpage, video_id, default={})
+
+ if not cc_license:
+ cc_license = self._search_regex(
+ r'<link[^>]+rel=["\']license["\'][^>]+href=(["\'])(?P<license>(?:(?!\1).)+)\1',
+ webpage, 'license', default=None, group='license')
+
+ info_dict.update({
+ 'formats': formats,
+ 'timestamp': unified_timestamp(timestamp),
+ 'description': video_description,
+ 'webpage_url': url,
+ 'view_count': view_count,
+ 'like_count': like_count,
+ 'comment_count': comment_count,
+ 'license': cc_license,
+ })
+
+ return merge_dicts(info_dict, info_dict_config, json_ld)
+
+
+class VimeoOndemandIE(VimeoIE): # XXX: Do not subclass from concrete IE
+ IE_NAME = 'vimeo:ondemand'
+ _VALID_URL = r'https?://(?:www\.)?vimeo\.com/ondemand/(?:[^/]+/)?(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ # ondemand video not available via https://vimeo.com/id
+ 'url': 'https://vimeo.com/ondemand/20704',
+ 'md5': 'c424deda8c7f73c1dfb3edd7630e2f35',
+ 'info_dict': {
+ 'id': '105442900',
+ 'ext': 'mp4',
+ 'title': 'המעבדה - במאי יותם פלדמן',
+ 'uploader': 'גם סרטים',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/gumfilms',
+ 'uploader_id': 'gumfilms',
+ 'description': 'md5:aeeba3dbd4d04b0fa98a4fdc9c639998',
+ 'upload_date': '20140906',
+ 'timestamp': 1410032453,
+ 'thumbnail': 'https://i.vimeocdn.com/video/488238335-d7bf151c364cff8d467f1b73784668fe60aae28a54573a35d53a1210ae283bd8-d_1280',
+ 'comment_count': int,
+ 'license': 'https://creativecommons.org/licenses/by-nc-nd/3.0/',
+ 'duration': 53,
+ 'view_count': int,
+ 'like_count': int,
+ },
+ 'params': {
+ 'format': 'best[protocol=https]',
+ },
+ 'expected_warnings': ['Unable to download JSON metadata'],
+ }, {
+ # requires Referer to be passed along with og:video:url
+ 'url': 'https://vimeo.com/ondemand/36938/126682985',
+ 'info_dict': {
+ 'id': '126584684',
+ 'ext': 'mp4',
+ 'title': 'Rävlock, rätt läte på rätt plats',
+ 'uploader': 'Lindroth & Norin',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/lindrothnorin',
+ 'uploader_id': 'lindrothnorin',
+ 'description': 'md5:c3c46a90529612c8279fb6af803fc0df',
+ 'upload_date': '20150502',
+ 'timestamp': 1430586422,
+ 'duration': 121,
+ 'comment_count': int,
+ 'view_count': int,
+ 'thumbnail': 'https://i.vimeocdn.com/video/517077723-7066ae1d9a79d3eb361334fb5d58ec13c8f04b52f8dd5eadfbd6fb0bcf11f613-d_1280',
+ 'like_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Unable to download JSON metadata'],
+ }, {
+ 'url': 'https://vimeo.com/ondemand/nazmaalik',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://vimeo.com/ondemand/141692381',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://vimeo.com/ondemand/thelastcolony/150274832',
+ 'only_matching': True,
+ }]
+
+
+class VimeoChannelIE(VimeoBaseInfoExtractor):
+ IE_NAME = 'vimeo:channel'
+ _VALID_URL = r'https://vimeo\.com/channels/(?P<id>[^/?#]+)/?(?:$|[?#])'
+ _MORE_PAGES_INDICATOR = r'<a.+?rel="next"'
+ _TITLE = None
+ _TITLE_RE = r'<link rel="alternate"[^>]+?title="(.*?)"'
+ _TESTS = [{
+ 'url': 'https://vimeo.com/channels/tributes',
+ 'info_dict': {
+ 'id': 'tributes',
+ 'title': 'Vimeo Tributes',
+ },
+ 'playlist_mincount': 22,
+ }]
+ _BASE_URL_TEMPL = 'https://vimeo.com/channels/%s'
+
+ def _page_url(self, base_url, pagenum):
+ return '%s/videos/page:%d/' % (base_url, pagenum)
+
+ def _extract_list_title(self, webpage):
+ return self._TITLE or self._html_search_regex(
+ self._TITLE_RE, webpage, 'list title', fatal=False)
+
+ def _title_and_entries(self, list_id, base_url):
+ for pagenum in itertools.count(1):
+ page_url = self._page_url(base_url, pagenum)
+ webpage = self._download_webpage(
+ page_url, list_id,
+ 'Downloading page %s' % pagenum)
+
+ if pagenum == 1:
+ yield self._extract_list_title(webpage)
+
+ # Try extracting href first since not all videos are available via
+ # short https://vimeo.com/id URL (e.g. https://vimeo.com/channels/tributes/6213729)
+ clips = re.findall(
+ r'id="clip_(\d+)"[^>]*>\s*<a[^>]+href="(/(?:[^/]+/)*\1)(?:[^>]+\btitle="([^"]+)")?', webpage)
+ if clips:
+ for video_id, video_url, video_title in clips:
+ yield self.url_result(
+ compat_urlparse.urljoin(base_url, video_url),
+ VimeoIE.ie_key(), video_id=video_id, video_title=video_title)
+ # More relaxed fallback
+ else:
+ for video_id in re.findall(r'id=["\']clip_(\d+)', webpage):
+ yield self.url_result(
+ 'https://vimeo.com/%s' % video_id,
+ VimeoIE.ie_key(), video_id=video_id)
+
+ if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
+ break
+
+ def _extract_videos(self, list_id, base_url):
+ title_and_entries = self._title_and_entries(list_id, base_url)
+ list_title = next(title_and_entries)
+ return self.playlist_result(title_and_entries, list_id, list_title)
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+ return self._extract_videos(channel_id, self._BASE_URL_TEMPL % channel_id)
+
+
+class VimeoUserIE(VimeoChannelIE): # XXX: Do not subclass from concrete IE
+ IE_NAME = 'vimeo:user'
+ _VALID_URL = r'https://vimeo\.com/(?!(?:[0-9]+|watchlater)(?:$|[?#/]))(?P<id>[^/]+)(?:/videos)?/?(?:$|[?#])'
+ _TITLE_RE = r'<a[^>]+?class="user">([^<>]+?)</a>'
+ _TESTS = [{
+ 'url': 'https://vimeo.com/nkistudio/videos',
+ 'info_dict': {
+ 'title': 'Nki',
+ 'id': 'nkistudio',
+ },
+ 'playlist_mincount': 66,
+ }, {
+ 'url': 'https://vimeo.com/nkistudio/',
+ 'only_matching': True,
+ }]
+ _BASE_URL_TEMPL = 'https://vimeo.com/%s'
+
+
+class VimeoAlbumIE(VimeoBaseInfoExtractor):
+ IE_NAME = 'vimeo:album'
+ _VALID_URL = r'https://vimeo\.com/(?:album|showcase)/(?P<id>\d+)(?:$|[?#]|/(?!video))'
+ _TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>'
+ _TESTS = [{
+ 'url': 'https://vimeo.com/album/2632481',
+ 'info_dict': {
+ 'id': '2632481',
+ 'title': 'Staff Favorites: November 2013',
+ },
+ 'playlist_mincount': 13,
+ }, {
+ 'note': 'Password-protected album',
+ 'url': 'https://vimeo.com/album/3253534',
+ 'info_dict': {
+ 'title': 'test',
+ 'id': '3253534',
+ },
+ 'playlist_count': 1,
+ 'params': {
+ 'videopassword': 'youtube-dl',
+ }
+ }]
+ _PAGE_SIZE = 100
+
+ def _fetch_page(self, album_id, authorization, hashed_pass, page):
+ api_page = page + 1
+ query = {
+ 'fields': 'link,uri',
+ 'page': api_page,
+ 'per_page': self._PAGE_SIZE,
+ }
+ if hashed_pass:
+ query['_hashed_pass'] = hashed_pass
+ try:
+ videos = self._download_json(
+ 'https://api.vimeo.com/albums/%s/videos' % album_id,
+ album_id, 'Downloading page %d' % api_page, query=query, headers={
+ 'Authorization': 'jwt ' + authorization,
+ 'Accept': 'application/json',
+ })['data']
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 400:
+ return
+ raise
+ for video in videos:
+ link = video.get('link')
+ if not link:
+ continue
+ uri = video.get('uri')
+ video_id = self._search_regex(r'/videos/(\d+)', uri, 'video_id', default=None) if uri else None
+ yield self.url_result(link, VimeoIE.ie_key(), video_id)
+
+ def _real_extract(self, url):
+ album_id = self._match_id(url)
+ viewer = self._download_json(
+ 'https://vimeo.com/_rv/viewer', album_id, fatal=False)
+ if not viewer:
+ webpage = self._download_webpage(url, album_id)
+ viewer = self._parse_json(self._search_regex(
+ r'bootstrap_data\s*=\s*({.+?})</script>',
+ webpage, 'bootstrap data'), album_id)['viewer']
+ jwt = viewer['jwt']
+ album = self._download_json(
+ 'https://api.vimeo.com/albums/' + album_id,
+ album_id, headers={'Authorization': 'jwt ' + jwt, 'Accept': 'application/json'},
+ query={'fields': 'description,name,privacy'})
+ hashed_pass = None
+ if try_get(album, lambda x: x['privacy']['view']) == 'password':
+ password = self.get_param('videopassword')
+ if not password:
+ raise ExtractorError(
+ 'This album is protected by a password, use the --video-password option',
+ expected=True)
+ self._set_vimeo_cookie('vuid', viewer['vuid'])
+ try:
+ hashed_pass = self._download_json(
+ 'https://vimeo.com/showcase/%s/auth' % album_id,
+ album_id, 'Verifying the password', data=urlencode_postdata({
+ 'password': password,
+ 'token': viewer['xsrft'],
+ }), headers={
+ 'X-Requested-With': 'XMLHttpRequest',
+ })['hashed_pass']
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 401:
+ raise ExtractorError('Wrong password', expected=True)
+ raise
+ entries = OnDemandPagedList(functools.partial(
+ self._fetch_page, album_id, jwt, hashed_pass), self._PAGE_SIZE)
+ return self.playlist_result(
+ entries, album_id, album.get('name'), album.get('description'))
+
+
+class VimeoGroupsIE(VimeoChannelIE): # XXX: Do not subclass from concrete IE
+ IE_NAME = 'vimeo:group'
+ _VALID_URL = r'https://vimeo\.com/groups/(?P<id>[^/]+)(?:/(?!videos?/\d+)|$)'
+ _TESTS = [{
+ 'url': 'https://vimeo.com/groups/meetup',
+ 'info_dict': {
+ 'id': 'meetup',
+ 'title': 'Vimeo Meetup!',
+ },
+ 'playlist_mincount': 27,
+ }]
+ _BASE_URL_TEMPL = 'https://vimeo.com/groups/%s'
+
+
+class VimeoReviewIE(VimeoBaseInfoExtractor):
+ IE_NAME = 'vimeo:review'
+ IE_DESC = 'Review pages on vimeo'
+ _VALID_URL = r'(?P<url>https://vimeo\.com/[^/]+/review/(?P<id>[^/]+)/[0-9a-f]{10})'
+ _TESTS = [{
+ 'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d',
+ 'md5': 'c507a72f780cacc12b2248bb4006d253',
+ 'info_dict': {
+ 'id': '75524534',
+ 'ext': 'mp4',
+ 'title': "DICK HARDWICK 'Comedian'",
+ 'uploader': 'Richard Hardwick',
+ 'uploader_id': 'user21297594',
+ 'description': "Comedian Dick Hardwick's five minute demo filmed in front of a live theater audience.\nEdit by Doug Mattocks",
+ 'duration': 304,
+ 'thumbnail': 'https://i.vimeocdn.com/video/450115033-43303819d9ebe24c2630352e18b7056d25197d09b3ae901abdac4c4f1d68de71-d_1280',
+ 'uploader_url': 'https://vimeo.com/user21297594',
+ },
+ }, {
+ 'note': 'video player needs Referer',
+ 'url': 'https://vimeo.com/user22258446/review/91613211/13f927e053',
+ 'md5': '6295fdab8f4bf6a002d058b2c6dce276',
+ 'info_dict': {
+ 'id': '91613211',
+ 'ext': 'mp4',
+ 'title': 're:(?i)^Death by dogma versus assembling agile . Sander Hoogendoorn',
+ 'uploader': 'DevWeek Events',
+ 'duration': 2773,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader_id': 'user22258446',
+ },
+ 'skip': 'video gone',
+ }, {
+ 'note': 'Password protected',
+ 'url': 'https://vimeo.com/user37284429/review/138823582/c4d865efde',
+ 'info_dict': {
+ 'id': '138823582',
+ 'ext': 'mp4',
+ 'title': 'EFFICIENT PICKUP MASTERCLASS MODULE 1',
+ 'uploader': 'TMB',
+ 'uploader_id': 'user37284429',
+ },
+ 'params': {
+ 'videopassword': 'holygrail',
+ },
+ 'skip': 'video gone',
+ }]
+
+ def _real_extract(self, url):
+ page_url, video_id = self._match_valid_url(url).groups()
+ data = self._download_json(
+ page_url.replace('/review/', '/review/data/'), video_id)
+ if data.get('isLocked') is True:
+ video_password = self._get_video_password()
+ viewer = self._download_json(
+ 'https://vimeo.com/_rv/viewer', video_id)
+ webpage = self._verify_video_password(
+ 'https://vimeo.com/' + video_id, video_id,
+ video_password, viewer['xsrft'], viewer['vuid'])
+ clip_page_config = self._parse_json(self._search_regex(
+ r'window\.vimeo\.clip_page_config\s*=\s*({.+?});',
+ webpage, 'clip page config'), video_id)
+ config_url = clip_page_config['player']['config_url']
+ clip_data = clip_page_config.get('clip') or {}
+ else:
+ clip_data = data['clipData']
+ config_url = clip_data['configUrl']
+ config = self._download_json(config_url, video_id)
+ info_dict = self._parse_config(config, video_id)
+ source_format = self._extract_original_format(
+ page_url + '/action', video_id)
+ if source_format:
+ info_dict['formats'].append(source_format)
+ info_dict['description'] = clean_html(clip_data.get('description'))
+ return info_dict
+
+
+class VimeoWatchLaterIE(VimeoChannelIE): # XXX: Do not subclass from concrete IE
+ IE_NAME = 'vimeo:watchlater'
+ IE_DESC = 'Vimeo watch later list, ":vimeowatchlater" keyword (requires authentication)'
+ _VALID_URL = r'https://vimeo\.com/(?:home/)?watchlater|:vimeowatchlater'
+ _TITLE = 'Watch Later'
+ _LOGIN_REQUIRED = True
+ _TESTS = [{
+ 'url': 'https://vimeo.com/watchlater',
+ 'only_matching': True,
+ }]
+
+ def _page_url(self, base_url, pagenum):
+ url = '%s/page:%d/' % (base_url, pagenum)
+ request = Request(url)
+ # Set the header to get a partial html page with the ids,
+ # the normal page doesn't contain them.
+ request.headers['X-Requested-With'] = 'XMLHttpRequest'
+ return request
+
+ def _real_extract(self, url):
+ return self._extract_videos('watchlater', 'https://vimeo.com/watchlater')
+
+
+class VimeoLikesIE(VimeoChannelIE): # XXX: Do not subclass from concrete IE
+ _VALID_URL = r'https://(?:www\.)?vimeo\.com/(?P<id>[^/]+)/likes/?(?:$|[?#]|sort:)'
+ IE_NAME = 'vimeo:likes'
+ IE_DESC = 'Vimeo user likes'
+ _TESTS = [{
+ 'url': 'https://vimeo.com/user755559/likes/',
+ 'playlist_mincount': 293,
+ 'info_dict': {
+ 'id': 'user755559',
+ 'title': 'urza’s Likes',
+ },
+ }, {
+ 'url': 'https://vimeo.com/stormlapse/likes',
+ 'only_matching': True,
+ }]
+
+ def _page_url(self, base_url, pagenum):
+ return '%s/page:%d/' % (base_url, pagenum)
+
+ def _real_extract(self, url):
+ user_id = self._match_id(url)
+ return self._extract_videos(user_id, 'https://vimeo.com/%s/likes' % user_id)
+
+
+class VHXEmbedIE(VimeoBaseInfoExtractor):
+ IE_NAME = 'vhx:embed'
+ _VALID_URL = r'https?://embed\.vhx\.tv/videos/(?P<id>\d+)'
+ _EMBED_REGEX = [r'<iframe[^>]+src="(?P<url>https?://embed\.vhx\.tv/videos/\d+[^"]*)"']
+
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ for embed_url in super()._extract_embed_urls(url, webpage):
+ yield cls._smuggle_referrer(embed_url, url)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ url, _, headers = self._unsmuggle_headers(url)
+ webpage = self._download_webpage(url, video_id, headers=headers)
+ config_url = self._parse_json(self._search_regex(
+ r'window\.OTTData\s*=\s*({.+})', webpage,
+ 'ott data'), video_id, js_to_json)['config_url']
+ config = self._download_json(config_url, video_id)
+ info = self._parse_config(config, video_id)
+ info['id'] = video_id
+ return info
+
+
+class VimeoProIE(VimeoBaseInfoExtractor):
+ IE_NAME = 'vimeo:pro'
+ _VALID_URL = r'https?://(?:www\.)?vimeopro\.com/[^/?#]+/(?P<slug>[^/?#]+)(?:(?:/videos?/(?P<id>[0-9]+)))?'
+ _TESTS = [{
+ # Vimeo URL derived from video_id
+ 'url': 'http://vimeopro.com/openstreetmapus/state-of-the-map-us-2013/video/68093876',
+ 'md5': '3b5ca6aa22b60dfeeadf50b72e44ed82',
+ 'note': 'Vimeo Pro video (#1197)',
+ 'info_dict': {
+ 'id': '68093876',
+ 'ext': 'mp4',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/openstreetmapus',
+ 'uploader_id': 'openstreetmapus',
+ 'uploader': 'OpenStreetMap US',
+ 'title': 'Andy Allan - Putting the Carto into OpenStreetMap Cartography',
+ 'description': 'md5:2c362968038d4499f4d79f88458590c1',
+ 'duration': 1595,
+ 'upload_date': '20130610',
+ 'timestamp': 1370893156,
+ 'license': 'by',
+ 'thumbnail': 'https://i.vimeocdn.com/video/440260469-19b0d92fca3bd84066623b53f1eb8aaa3980c6c809e2d67b6b39ab7b4a77a344-d_960',
+ 'view_count': int,
+ 'comment_count': int,
+ 'like_count': int,
+ 'tags': 'count:1',
+ },
+ 'params': {
+ 'format': 'best[protocol=https]',
+ },
+ }, {
+ # password-protected VimeoPro page with Vimeo player embed
+ 'url': 'https://vimeopro.com/cadfem/simulation-conference-mechanische-systeme-in-perfektion',
+ 'info_dict': {
+ 'id': '764543723',
+ 'ext': 'mp4',
+ 'title': 'Mechanische Systeme in Perfektion: Realität erfassen, Innovation treiben',
+ 'thumbnail': 'https://i.vimeocdn.com/video/1543784598-a1a750494a485e601110136b9fe11e28c2131942452b3a5d30391cb3800ca8fd-d_1280',
+ 'description': 'md5:2a9d195cd1b0f6f79827107dc88c2420',
+ 'uploader': 'CADFEM',
+ 'uploader_id': 'cadfem',
+ 'uploader_url': 'https://vimeo.com/cadfem',
+ 'duration': 12505,
+ 'chapters': 'count:10',
+ },
+ 'params': {
+ 'videopassword': 'Conference2022',
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ display_id, video_id = self._match_valid_url(url).group('slug', 'id')
+ if video_id:
+ display_id = video_id
+ webpage = self._download_webpage(url, display_id)
+
+ password_form = self._search_regex(
+ r'(?is)<form[^>]+?method=["\']post["\'][^>]*>(.+?password.+?)</form>',
+ webpage, 'password form', default=None)
+ if password_form:
+ try:
+ webpage = self._download_webpage(url, display_id, data=urlencode_postdata({
+ 'password': self._get_video_password(),
+ **self._hidden_inputs(password_form),
+ }), note='Logging in with video password')
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 418:
+ raise ExtractorError('Wrong video password', expected=True)
+ raise
+
+ description = None
+ # even if we have video_id, some videos require player URL with portfolio_id query param
+ # https://github.com/ytdl-org/youtube-dl/issues/20070
+ vimeo_url = VimeoIE._extract_url(url, webpage)
+ if vimeo_url:
+ description = self._html_search_meta('description', webpage, default=None)
+ elif video_id:
+ vimeo_url = f'https://vimeo.com/{video_id}'
+ else:
+ raise ExtractorError(
+ 'No Vimeo embed or video ID could be found in VimeoPro page', expected=True)
+
+ return self.url_result(vimeo_url, VimeoIE, video_id, url_transparent=True,
+ description=description)
diff --git a/yt_dlp/extractor/vimm.py b/yt_dlp/extractor/vimm.py
new file mode 100644
index 0000000..7097149
--- /dev/null
+++ b/yt_dlp/extractor/vimm.py
@@ -0,0 +1,66 @@
+from .common import InfoExtractor
+
+
+class VimmIE(InfoExtractor):
+ IE_NAME = 'Vimm:stream'
+ _VALID_URL = r'https?://(?:www\.)?vimm\.tv/(?:c/)?(?P<id>[0-9a-z-]+)$'
+ _TESTS = [{
+ 'url': 'https://www.vimm.tv/c/calimeatwagon',
+ 'info_dict': {
+ 'id': 'calimeatwagon',
+ 'ext': 'mp4',
+ 'title': 're:^calimeatwagon [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'live_status': 'is_live',
+ },
+ 'skip': 'Live',
+ }, {
+ 'url': 'https://www.vimm.tv/octaafradio',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+
+ formats, subs = self._extract_m3u8_formats_and_subtitles(
+ f'https://www.vimm.tv/hls/{channel_id}.m3u8', channel_id, 'mp4', m3u8_id='hls', live=True)
+
+ return {
+ 'id': channel_id,
+ 'title': channel_id,
+ 'is_live': True,
+ 'formats': formats,
+ 'subtitles': subs,
+ }
+
+
+class VimmRecordingIE(InfoExtractor):
+ IE_NAME = 'Vimm:recording'
+ _VALID_URL = r'https?://(?:www\.)?vimm\.tv/c/(?P<channel_id>[0-9a-z-]+)\?v=(?P<video_id>[0-9A-Za-z]+)'
+ _TESTS = [{
+ 'url': 'https://www.vimm.tv/c/kaldewei?v=2JZsrPTFxsSz',
+ 'md5': '15122ee95baa32a548e4a3e120b598f1',
+ 'info_dict': {
+ 'id': '2JZsrPTFxsSz',
+ 'ext': 'mp4',
+ 'title': 'VIMM - [DE/GER] Kaldewei Live - In Farbe und Bunt',
+ 'uploader_id': 'kaldewei',
+ },
+ }]
+
+ def _real_extract(self, url):
+ channel_id, video_id = self._match_valid_url(url).groups()
+
+ webpage = self._download_webpage(url, video_id)
+ title = self._og_search_title(webpage)
+
+ formats, subs = self._extract_m3u8_formats_and_subtitles(
+ f'https://d211qfrkztakg3.cloudfront.net/{channel_id}/{video_id}/index.m3u8', video_id, 'mp4', m3u8_id='hls', live=False)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'is_live': False,
+ 'uploader_id': channel_id,
+ 'formats': formats,
+ 'subtitles': subs,
+ }
diff --git a/yt_dlp/extractor/vine.py b/yt_dlp/extractor/vine.py
new file mode 100644
index 0000000..1909980
--- /dev/null
+++ b/yt_dlp/extractor/vine.py
@@ -0,0 +1,151 @@
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ determine_ext,
+ format_field,
+ int_or_none,
+ unified_timestamp,
+)
+
+
+class VineIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?vine\.co/(?:v|oembed)/(?P<id>\w+)'
+ _EMBED_REGEX = [r'<iframe[^>]+src=[\'"](?P<url>(?:https?:)?//(?:www\.)?vine\.co/v/[^/]+/embed/(?:simple|postcard))']
+ _TESTS = [{
+ 'url': 'https://vine.co/v/b9KOOWX7HUx',
+ 'md5': '2f36fed6235b16da96ce9b4dc890940d',
+ 'info_dict': {
+ 'id': 'b9KOOWX7HUx',
+ 'ext': 'mp4',
+ 'title': 'Chicken.',
+ 'alt_title': 'Vine by Jack',
+ 'timestamp': 1368997951,
+ 'upload_date': '20130519',
+ 'uploader': 'Jack',
+ 'uploader_id': '76',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
+ },
+ }, {
+ 'url': 'https://vine.co/v/e192BnZnZ9V',
+ 'info_dict': {
+ 'id': 'e192BnZnZ9V',
+ 'ext': 'mp4',
+ 'title': 'ยิ้ม~ เขิน~ อาย~ น่าร้ากอ้ะ >//< @n_whitewo @orlameena #lovesicktheseries #lovesickseason2',
+ 'alt_title': 'Vine by Pimry_zaa',
+ 'timestamp': 1436057405,
+ 'upload_date': '20150705',
+ 'uploader': 'Pimry_zaa',
+ 'uploader_id': '1135760698325307392',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://vine.co/v/MYxVapFvz2z',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://vine.co/v/bxVjBbZlPUH',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://vine.co/oembed/MYxVapFvz2z.json',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ data = self._download_json(
+ 'https://archive.vine.co/posts/%s.json' % video_id, video_id)
+
+ def video_url(kind):
+ for url_suffix in ('Url', 'URL'):
+ format_url = data.get('video%s%s' % (kind, url_suffix))
+ if format_url:
+ return format_url
+
+ formats = []
+ for quality, format_id in enumerate(('low', '', 'dash')):
+ format_url = video_url(format_id.capitalize())
+ if not format_url:
+ continue
+ # DASH link returns plain mp4
+ if format_id == 'dash' and determine_ext(format_url) == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ format_url, video_id, mpd_id='dash', fatal=False))
+ else:
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_id or 'standard',
+ 'quality': quality,
+ })
+ self._check_formats(formats, video_id)
+
+ username = data.get('username')
+
+ alt_title = format_field(username, None, 'Vine by %s')
+
+ return {
+ 'id': video_id,
+ 'title': data.get('description') or alt_title or 'Vine video',
+ 'alt_title': alt_title,
+ 'thumbnail': data.get('thumbnailUrl'),
+ 'timestamp': unified_timestamp(data.get('created')),
+ 'uploader': username,
+ 'uploader_id': data.get('userIdStr'),
+ 'view_count': int_or_none(data.get('loops')),
+ 'like_count': int_or_none(data.get('likes')),
+ 'comment_count': int_or_none(data.get('comments')),
+ 'repost_count': int_or_none(data.get('reposts')),
+ 'formats': formats,
+ }
+
+
+class VineUserIE(InfoExtractor):
+ IE_NAME = 'vine:user'
+ _VALID_URL = r'https?://vine\.co/(?P<u>u/)?(?P<user>[^/]+)'
+ _VINE_BASE_URL = 'https://vine.co/'
+ _TESTS = [{
+ 'url': 'https://vine.co/itsruthb',
+ 'info_dict': {
+ 'id': 'itsruthb',
+ 'title': 'Ruth B',
+ 'description': '| Instagram/Twitter: itsruthb | still a lost boy from neverland',
+ },
+ 'playlist_mincount': 611,
+ }, {
+ 'url': 'https://vine.co/u/942914934646415360',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if VineIE.suitable(url) else super(VineUserIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ user = mobj.group('user')
+ u = mobj.group('u')
+
+ profile_url = '%sapi/users/profiles/%s%s' % (
+ self._VINE_BASE_URL, 'vanity/' if not u else '', user)
+ profile_data = self._download_json(
+ profile_url, user, note='Downloading user profile data')
+
+ data = profile_data['data']
+ user_id = data.get('userId') or data['userIdStr']
+ profile = self._download_json(
+ 'https://archive.vine.co/profiles/%s.json' % user_id, user_id)
+ entries = [
+ self.url_result(
+ 'https://vine.co/v/%s' % post_id, ie='Vine', video_id=post_id)
+ for post_id in profile['posts']
+ if post_id and isinstance(post_id, compat_str)]
+ return self.playlist_result(
+ entries, user, profile.get('username'), profile.get('description'))
diff --git a/yt_dlp/extractor/viously.py b/yt_dlp/extractor/viously.py
new file mode 100644
index 0000000..9ec7ed3
--- /dev/null
+++ b/yt_dlp/extractor/viously.py
@@ -0,0 +1,60 @@
+import base64
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ extract_attributes,
+ int_or_none,
+ parse_iso8601,
+)
+from ..utils.traversal import traverse_obj
+
+
+class ViouslyIE(InfoExtractor):
+ _VALID_URL = False
+ _WEBPAGE_TESTS = [{
+ 'url': 'http://www.turbo.fr/videos-voiture/454443-turbo-du-07-09-2014-renault-twingo-3-bentley-continental-gt-speed-ces-guide-achat-dacia.html',
+ 'md5': '37a6c3381599381ff53a7e1e0575c0bc',
+ 'info_dict': {
+ 'id': 'F_xQzS2jwb3',
+ 'ext': 'mp4',
+ 'title': 'Turbo du 07/09/2014\xa0: Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia...',
+ 'description': 'Turbo du 07/09/2014\xa0: Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia...',
+ 'age_limit': 0,
+ 'upload_date': '20230328',
+ 'timestamp': 1680037507,
+ 'duration': 3716,
+ 'categories': ['motors'],
+ }
+ }]
+
+ def _extract_from_webpage(self, url, webpage):
+ viously_players = re.findall(r'<div[^>]*class="(?:[^"]*\s)?v(?:iou)?sly-player(?:\s[^"]*)?"[^>]*>', webpage)
+ if not viously_players:
+ return
+
+ def custom_decode(text):
+ STANDARD_ALPHABET = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/='
+ CUSTOM_ALPHABET = 'VIOUSLYABCDEFGHJKMNPQRTWXZviouslyabcdefghjkmnpqrtwxz9876543210+/='
+ data = base64.b64decode(text.translate(str.maketrans(CUSTOM_ALPHABET, STANDARD_ALPHABET)))
+ return data.decode('utf-8').strip('\x00')
+
+ for video_id in traverse_obj(viously_players, (..., {extract_attributes}, 'id')):
+ formats = self._extract_m3u8_formats(
+ f'https://www.viously.com/video/hls/{video_id}/index.m3u8', video_id, fatal=False)
+ if not formats:
+ continue
+ data = self._download_json(
+ f'https://www.viously.com/export/json/{video_id}', video_id,
+ transform_source=custom_decode, fatal=False)
+ yield {
+ 'id': video_id,
+ 'formats': formats,
+ **traverse_obj(data, ('video', {
+ 'title': ('title', {str}),
+ 'description': ('description', {str}),
+ 'duration': ('duration', {int_or_none}),
+ 'timestamp': ('iso_date', {parse_iso8601}),
+ 'categories': ('category', 'name', {str}, {lambda x: [x] if x else None}),
+ })),
+ }
diff --git a/yt_dlp/extractor/viqeo.py b/yt_dlp/extractor/viqeo.py
new file mode 100644
index 0000000..f0a7b5e
--- /dev/null
+++ b/yt_dlp/extractor/viqeo.py
@@ -0,0 +1,87 @@
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ str_or_none,
+ url_or_none,
+)
+
+
+class ViqeoIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'''(?x)
+ (?:
+ viqeo:|
+ https?://cdn\.viqeo\.tv/embed/*\?.*?\bvid=|
+ https?://api\.viqeo\.tv/v\d+/data/startup?.*?\bvideo(?:%5B%5D|\[\])=
+ )
+ (?P<id>[\da-f]+)
+ '''
+ _EMBED_REGEX = [r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//cdn\.viqeo\.tv/embed/*\?.*?\bvid=[\da-f]+.*?)\1']
+ _TESTS = [{
+ 'url': 'https://cdn.viqeo.tv/embed/?vid=cde96f09d25f39bee837',
+ 'md5': 'a169dd1a6426b350dca4296226f21e76',
+ 'info_dict': {
+ 'id': 'cde96f09d25f39bee837',
+ 'ext': 'mp4',
+ 'title': 'cde96f09d25f39bee837',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 76,
+ },
+ }, {
+ 'url': 'viqeo:cde96f09d25f39bee837',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://api.viqeo.tv/v1/data/startup?video%5B%5D=71bbec412ade45c3216c&profile=112',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'https://cdn.viqeo.tv/embed/?vid=%s' % video_id, video_id)
+
+ data = self._parse_json(
+ self._search_regex(
+ r'SLOT_DATA\s*=\s*({.+?})\s*;', webpage, 'slot data'),
+ video_id)
+
+ formats = []
+ thumbnails = []
+ for media_file in data['mediaFiles']:
+ if not isinstance(media_file, dict):
+ continue
+ media_url = url_or_none(media_file.get('url'))
+ if not media_url or not media_url.startswith(('http', '//')):
+ continue
+ media_type = str_or_none(media_file.get('type'))
+ if not media_type:
+ continue
+ media_kind = media_type.split('/')[0].lower()
+ f = {
+ 'url': media_url,
+ 'width': int_or_none(media_file.get('width')),
+ 'height': int_or_none(media_file.get('height')),
+ }
+ format_id = str_or_none(media_file.get('quality'))
+ if media_kind == 'image':
+ f['id'] = format_id
+ thumbnails.append(f)
+ elif media_kind in ('video', 'audio'):
+ is_audio = media_kind == 'audio'
+ f.update({
+ 'format_id': 'audio' if is_audio else format_id,
+ 'fps': int_or_none(media_file.get('fps')),
+ 'vcodec': 'none' if is_audio else None,
+ })
+ formats.append(f)
+
+ duration = int_or_none(data.get('duration'))
+
+ return {
+ 'id': video_id,
+ 'title': video_id,
+ 'duration': duration,
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/viu.py b/yt_dlp/extractor/viu.py
new file mode 100644
index 0000000..6f9af9f
--- /dev/null
+++ b/yt_dlp/extractor/viu.py
@@ -0,0 +1,542 @@
+import re
+import json
+import uuid
+import random
+import urllib.parse
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ remove_end,
+ strip_or_none,
+ traverse_obj,
+ try_get,
+ smuggle_url,
+ unified_timestamp,
+ unsmuggle_url,
+ url_or_none,
+)
+
+
+class ViuBaseIE(InfoExtractor):
+ def _call_api(self, path, *args, headers={}, **kwargs):
+ response = self._download_json(
+ f'https://www.viu.com/api/{path}', *args, **kwargs,
+ headers={**self.geo_verification_headers(), **headers})['response']
+ if response.get('status') != 'success':
+ raise ExtractorError(f'{self.IE_NAME} said: {response["message"]}', expected=True)
+ return response
+
+
+class ViuIE(ViuBaseIE):
+ _VALID_URL = r'(?:viu:|https?://[^/]+\.viu\.com/[a-z]{2}/media/)(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.viu.com/en/media/1116705532?containerId=playlist-22168059',
+ 'info_dict': {
+ 'id': '1116705532',
+ 'ext': 'mp4',
+ 'title': 'Citizen Khan - Ep 1',
+ 'description': 'md5:d7ea1604f49e5ba79c212c551ce2110e',
+ },
+ 'params': {
+ 'skip_download': 'm3u8 download',
+ },
+ 'skip': 'Geo-restricted to India',
+ }, {
+ 'url': 'https://www.viu.com/en/media/1130599965',
+ 'info_dict': {
+ 'id': '1130599965',
+ 'ext': 'mp4',
+ 'title': 'Jealousy Incarnate - Episode 1',
+ 'description': 'md5:d3d82375cab969415d2720b6894361e9',
+ },
+ 'params': {
+ 'skip_download': 'm3u8 download',
+ },
+ 'skip': 'Geo-restricted to Indonesia',
+ }, {
+ 'url': 'https://india.viu.com/en/media/1126286865',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video_data = self._call_api(
+ 'clip/load', video_id, 'Downloading video data', query={
+ 'appid': 'viu_desktop',
+ 'fmt': 'json',
+ 'id': video_id
+ })['item'][0]
+
+ title = video_data['title']
+
+ m3u8_url = None
+ url_path = video_data.get('urlpathd') or video_data.get('urlpath')
+ tdirforwhole = video_data.get('tdirforwhole')
+ # #EXT-X-BYTERANGE is not supported by native hls downloader
+ # and ffmpeg (#10955)
+ # FIXME: It is supported in yt-dlp
+ # hls_file = video_data.get('hlsfile')
+ hls_file = video_data.get('jwhlsfile')
+ if url_path and tdirforwhole and hls_file:
+ m3u8_url = '%s/%s/%s' % (url_path, tdirforwhole, hls_file)
+ else:
+ # m3u8_url = re.sub(
+ # r'(/hlsc_)[a-z]+(\d+\.m3u8)',
+ # r'\1whe\2', video_data['href'])
+ m3u8_url = video_data['href']
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4')
+
+ for key, value in video_data.items():
+ mobj = re.match(r'^subtitle_(?P<lang>[^_]+)_(?P<ext>(vtt|srt))', key)
+ if not mobj:
+ continue
+ subtitles.setdefault(mobj.group('lang'), []).append({
+ 'url': value,
+ 'ext': mobj.group('ext')
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video_data.get('description'),
+ 'series': video_data.get('moviealbumshowname'),
+ 'episode': title,
+ 'episode_number': int_or_none(video_data.get('episodeno')),
+ 'duration': int_or_none(video_data.get('duration')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+
+class ViuPlaylistIE(ViuBaseIE):
+ IE_NAME = 'viu:playlist'
+ _VALID_URL = r'https?://www\.viu\.com/[^/]+/listing/playlist-(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://www.viu.com/en/listing/playlist-22461380',
+ 'info_dict': {
+ 'id': '22461380',
+ 'title': 'The Good Wife',
+ },
+ 'playlist_count': 16,
+ 'skip': 'Geo-restricted to Indonesia',
+ }
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ playlist_data = self._call_api(
+ 'container/load', playlist_id,
+ 'Downloading playlist info', query={
+ 'appid': 'viu_desktop',
+ 'fmt': 'json',
+ 'id': 'playlist-' + playlist_id
+ })['container']
+
+ entries = []
+ for item in playlist_data.get('item', []):
+ item_id = item.get('id')
+ if not item_id:
+ continue
+ item_id = compat_str(item_id)
+ entries.append(self.url_result(
+ 'viu:' + item_id, 'Viu', item_id))
+
+ return self.playlist_result(
+ entries, playlist_id, playlist_data.get('title'))
+
+
+class ViuOTTIE(InfoExtractor):
+ IE_NAME = 'viu:ott'
+ _NETRC_MACHINE = 'viu'
+ _VALID_URL = r'https?://(?:www\.)?viu\.com/ott/(?P<country_code>[a-z]{2})/(?P<lang_code>[a-z]{2}-[a-z]{2})/vod/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://www.viu.com/ott/sg/en-us/vod/3421/The%20Prime%20Minister%20and%20I',
+ 'info_dict': {
+ 'id': '3421',
+ 'ext': 'mp4',
+ 'title': 'A New Beginning',
+ 'description': 'md5:1e7486a619b6399b25ba6a41c0fe5b2c',
+ },
+ 'params': {
+ 'skip_download': 'm3u8 download',
+ 'noplaylist': True,
+ },
+ 'skip': 'Geo-restricted to Singapore',
+ }, {
+ 'url': 'https://www.viu.com/ott/hk/zh-hk/vod/430078/%E7%AC%AC%E5%85%AD%E6%84%9F-3',
+ 'info_dict': {
+ 'id': '430078',
+ 'ext': 'mp4',
+ 'title': '大韓民國的1%',
+ 'description': 'md5:74d6db47ddd9ddb9c89a05739103ccdb',
+ 'episode_number': 1,
+ 'duration': 6614,
+ 'episode': '大韓民國的1%',
+ 'series': '第六感 3',
+ 'thumbnail': 'https://d2anahhhmp1ffz.cloudfront.net/1313295781/d2b14f48d008ef2f3a9200c98d8e9b63967b9cc2',
+ },
+ 'params': {
+ 'skip_download': 'm3u8 download',
+ 'noplaylist': True,
+ },
+ 'skip': 'Geo-restricted to Hong Kong',
+ }, {
+ 'url': 'https://www.viu.com/ott/hk/zh-hk/vod/444666/%E6%88%91%E7%9A%84%E5%AE%A4%E5%8F%8B%E6%98%AF%E4%B9%9D%E5%B0%BE%E7%8B%90',
+ 'playlist_count': 16,
+ 'info_dict': {
+ 'id': '23807',
+ 'title': '我的室友是九尾狐',
+ 'description': 'md5:b42c95f2b4a316cdd6ae14ca695f33b9',
+ },
+ 'params': {
+ 'skip_download': 'm3u8 download',
+ 'noplaylist': False,
+ },
+ 'skip': 'Geo-restricted to Hong Kong',
+ }]
+
+ _AREA_ID = {
+ 'HK': 1,
+ 'SG': 2,
+ 'TH': 4,
+ 'PH': 5,
+ }
+ _LANGUAGE_FLAG = {
+ 'zh-hk': 1,
+ 'zh-cn': 2,
+ 'en-us': 3,
+ }
+
+ _user_token = None
+ _auth_codes = {}
+
+ def _detect_error(self, response):
+ code = try_get(response, lambda x: x['status']['code'])
+ if code and code > 0:
+ message = try_get(response, lambda x: x['status']['message'])
+ raise ExtractorError(f'{self.IE_NAME} said: {message} ({code})', expected=True)
+ return response.get('data') or {}
+
+ def _login(self, country_code, video_id):
+ if self._user_token is None:
+ username, password = self._get_login_info()
+ if username is None:
+ return
+ headers = {
+ 'Authorization': f'Bearer {self._auth_codes[country_code]}',
+ 'Content-Type': 'application/json'
+ }
+ data = self._download_json(
+ 'https://api-gateway-global.viu.com/api/account/validate',
+ video_id, 'Validating email address', headers=headers,
+ data=json.dumps({
+ 'principal': username,
+ 'provider': 'email'
+ }).encode())
+ if not data.get('exists'):
+ raise ExtractorError('Invalid email address')
+
+ data = self._download_json(
+ 'https://api-gateway-global.viu.com/api/auth/login',
+ video_id, 'Logging in', headers=headers,
+ data=json.dumps({
+ 'email': username,
+ 'password': password,
+ 'provider': 'email',
+ }).encode())
+ self._detect_error(data)
+ self._user_token = data.get('identity')
+ # need to update with valid user's token else will throw an error again
+ self._auth_codes[country_code] = data.get('token')
+ return self._user_token
+
+ def _get_token(self, country_code, video_id):
+ rand = ''.join(random.choices('0123456789', k=10))
+ return self._download_json(
+ f'https://api-gateway-global.viu.com/api/auth/token?v={rand}000', video_id,
+ headers={'Content-Type': 'application/json'}, note='Getting bearer token',
+ data=json.dumps({
+ 'countryCode': country_code.upper(),
+ 'platform': 'browser',
+ 'platformFlagLabel': 'web',
+ 'language': 'en',
+ 'uuid': str(uuid.uuid4()),
+ 'carrierId': '0'
+ }).encode('utf-8'))['token']
+
+ def _real_extract(self, url):
+ url, idata = unsmuggle_url(url, {})
+ country_code, lang_code, video_id = self._match_valid_url(url).groups()
+
+ query = {
+ 'r': 'vod/ajax-detail',
+ 'platform_flag_label': 'web',
+ 'product_id': video_id,
+ }
+
+ area_id = self._AREA_ID.get(country_code.upper())
+ if area_id:
+ query['area_id'] = area_id
+
+ product_data = self._download_json(
+ f'http://www.viu.com/ott/{country_code}/index.php', video_id,
+ 'Downloading video info', query=query)['data']
+
+ video_data = product_data.get('current_product')
+ if not video_data:
+ self.raise_geo_restricted()
+
+ series_id = video_data.get('series_id')
+ if self._yes_playlist(series_id, video_id, idata):
+ series = product_data.get('series') or {}
+ product = series.get('product')
+ if product:
+ entries = []
+ for entry in sorted(product, key=lambda x: int_or_none(x.get('number', 0))):
+ item_id = entry.get('product_id')
+ if not item_id:
+ continue
+ entries.append(self.url_result(
+ smuggle_url(f'http://www.viu.com/ott/{country_code}/{lang_code}/vod/{item_id}/',
+ {'force_noplaylist': True}),
+ ViuOTTIE, str(item_id), entry.get('synopsis', '').strip()))
+
+ return self.playlist_result(entries, series_id, series.get('name'), series.get('description'))
+
+ duration_limit = False
+ query = {
+ 'ccs_product_id': video_data['ccs_product_id'],
+ 'language_flag_id': self._LANGUAGE_FLAG.get(lang_code.lower()) or '3',
+ }
+
+ def download_playback():
+ stream_data = self._download_json(
+ 'https://api-gateway-global.viu.com/api/playback/distribute',
+ video_id=video_id, query=query, fatal=False, note='Downloading stream info',
+ headers={
+ 'Authorization': f'Bearer {self._auth_codes[country_code]}',
+ 'Referer': url,
+ 'Origin': url
+ })
+ return self._detect_error(stream_data).get('stream')
+
+ if not self._auth_codes.get(country_code):
+ self._auth_codes[country_code] = self._get_token(country_code, video_id)
+
+ stream_data = None
+ try:
+ stream_data = download_playback()
+ except (ExtractorError, KeyError):
+ token = self._login(country_code, video_id)
+ if token is not None:
+ query['identity'] = token
+ else:
+ # The content is Preview or for VIP only.
+ # We can try to bypass the duration which is limited to 3mins only
+ duration_limit, query['duration'] = True, '180'
+ try:
+ stream_data = download_playback()
+ except (ExtractorError, KeyError):
+ if token is not None:
+ raise
+ self.raise_login_required(method='password')
+ if not stream_data:
+ raise ExtractorError('Cannot get stream info', expected=True)
+
+ formats = []
+ for vid_format, stream_url in (stream_data.get('url') or {}).items():
+ height = int(self._search_regex(r's(\d+)p', vid_format, 'height', default=None))
+
+ # bypass preview duration limit
+ if duration_limit:
+ old_stream_url = urllib.parse.urlparse(stream_url)
+ query = dict(urllib.parse.parse_qsl(old_stream_url.query, keep_blank_values=True))
+ query.update({
+ 'duration': video_data.get('time_duration') or '9999999',
+ 'duration_start': '0',
+ })
+ stream_url = old_stream_url._replace(query=urllib.parse.urlencode(query)).geturl()
+
+ formats.append({
+ 'format_id': vid_format,
+ 'url': stream_url,
+ 'height': height,
+ 'ext': 'mp4',
+ 'filesize': try_get(stream_data, lambda x: x['size'][vid_format], int)
+ })
+
+ subtitles = {}
+ for sub in video_data.get('subtitle') or []:
+ lang = sub.get('name') or 'und'
+ if sub.get('url'):
+ subtitles.setdefault(lang, []).append({
+ 'url': sub['url'],
+ 'ext': 'srt',
+ 'name': f'Spoken text for {lang}',
+ })
+ if sub.get('second_subtitle_url'):
+ subtitles.setdefault(f'{lang}_ost', []).append({
+ 'url': sub['second_subtitle_url'],
+ 'ext': 'srt',
+ 'name': f'On-screen text for {lang}',
+ })
+
+ title = strip_or_none(video_data.get('synopsis'))
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video_data.get('description'),
+ 'series': try_get(product_data, lambda x: x['series']['name']),
+ 'episode': title,
+ 'episode_number': int_or_none(video_data.get('number')),
+ 'duration': int_or_none(stream_data.get('duration')),
+ 'thumbnail': url_or_none(video_data.get('cover_image_url')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+
+class ViuOTTIndonesiaBaseIE(InfoExtractor):
+ _BASE_QUERY = {
+ 'ver': 1.0,
+ 'fmt': 'json',
+ 'aver': 5.0,
+ 'appver': 2.0,
+ 'appid': 'viu_desktop',
+ 'platform': 'desktop',
+ }
+
+ _DEVICE_ID = str(uuid.uuid4())
+ _SESSION_ID = str(uuid.uuid4())
+ _TOKEN = None
+
+ _HEADERS = {
+ 'x-session-id': _SESSION_ID,
+ 'x-client': 'browser'
+ }
+
+ _AGE_RATINGS_MAPPER = {
+ 'ADULTS': 18,
+ 'teens': 13
+ }
+
+ def _real_initialize(self):
+ ViuOTTIndonesiaBaseIE._TOKEN = self._download_json(
+ 'https://um.viuapi.io/user/identity', None,
+ headers={'Content-type': 'application/json', **self._HEADERS},
+ query={**self._BASE_QUERY, 'iid': self._DEVICE_ID},
+ data=json.dumps({'deviceId': self._DEVICE_ID}).encode(),
+ note='Downloading token information')['token']
+
+
+class ViuOTTIndonesiaIE(ViuOTTIndonesiaBaseIE):
+ _VALID_URL = r'https?://www\.viu\.com/ott/\w+/\w+/all/video-[\w-]+-(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.viu.com/ott/id/id/all/video-japanese-drama-tv_shows-detective_conan_episode_793-1165863142?containerId=playlist-26271226',
+ 'info_dict': {
+ 'id': '1165863142',
+ 'ext': 'mp4',
+ 'episode_number': 793,
+ 'episode': 'Episode 793',
+ 'title': 'Detective Conan - Episode 793',
+ 'duration': 1476,
+ 'description': 'md5:b79d55345bc1e0217ece22616267c9a5',
+ 'thumbnail': 'https://vuclipi-a.akamaihd.net/p/cloudinary/h_171,w_304,dpr_1.5,f_auto,c_thumb,q_auto:low/1165863189/d-1',
+ 'upload_date': '20210101',
+ 'timestamp': 1609459200,
+ }
+ }, {
+ 'url': 'https://www.viu.com/ott/id/id/all/video-korean-reality-tv_shows-entertainment_weekly_episode_1622-1118617054',
+ 'info_dict': {
+ 'id': '1118617054',
+ 'ext': 'mp4',
+ 'episode_number': 1622,
+ 'episode': 'Episode 1622',
+ 'description': 'md5:6d68ca450004020113e9bf27ad99f0f8',
+ 'title': 'Entertainment Weekly - Episode 1622',
+ 'duration': 4729,
+ 'thumbnail': 'https://vuclipi-a.akamaihd.net/p/cloudinary/h_171,w_304,dpr_1.5,f_auto,c_thumb,q_auto:low/1120187848/d-1',
+ 'timestamp': 1420070400,
+ 'upload_date': '20150101',
+ 'cast': ['Shin Hyun-joon', 'Lee Da-Hee']
+ }
+ }, {
+ # age-limit test
+ 'url': 'https://www.viu.com/ott/id/id/all/video-japanese-trailer-tv_shows-trailer_jujutsu_kaisen_ver_01-1166044219?containerId=playlist-26273140',
+ 'info_dict': {
+ 'id': '1166044219',
+ 'ext': 'mp4',
+ 'upload_date': '20200101',
+ 'timestamp': 1577836800,
+ 'title': 'Trailer \'Jujutsu Kaisen\' Ver.01',
+ 'duration': 92,
+ 'thumbnail': 'https://vuclipi-a.akamaihd.net/p/cloudinary/h_171,w_304,dpr_1.5,f_auto,c_thumb,q_auto:low/1166044240/d-1',
+ 'description': 'Trailer \'Jujutsu Kaisen\' Ver.01',
+ 'cast': ['Junya Enoki', ' Yûichi Nakamura', ' Yuma Uchida', 'Asami Seto'],
+ 'age_limit': 13,
+ }
+ }, {
+ # json ld metadata type equal to Movie instead of TVEpisodes
+ 'url': 'https://www.viu.com/ott/id/id/all/video-japanese-animation-movies-demon_slayer_kimetsu_no_yaiba_the_movie_mugen_train-1165892707?containerId=1675060691786',
+ 'info_dict': {
+ 'id': '1165892707',
+ 'ext': 'mp4',
+ 'timestamp': 1577836800,
+ 'upload_date': '20200101',
+ 'title': 'Demon Slayer - Kimetsu no Yaiba - The Movie: Mugen Train',
+ 'age_limit': 13,
+ 'cast': 'count:9',
+ 'thumbnail': 'https://vuclipi-a.akamaihd.net/p/cloudinary/h_171,w_304,dpr_1.5,f_auto,c_thumb,q_auto:low/1165895279/d-1',
+ 'description': 'md5:1ce9c35a3aeab384085533f746c87469',
+ 'duration': 7021,
+ }
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ video_data = self._download_json(
+ f'https://um.viuapi.io/drm/v1/content/{display_id}', display_id, data=b'',
+ headers={'Authorization': ViuOTTIndonesiaBaseIE._TOKEN, **self._HEADERS, 'ccode': 'ID'})
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_data['playUrl'], display_id)
+
+ initial_state = self._search_json(
+ r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state',
+ display_id)['content']['clipDetails']
+ for key, url in initial_state.items():
+ lang, ext = self._search_regex(
+ r'^subtitle_(?P<lang>[\w-]+)_(?P<ext>\w+)$', key, 'subtitle metadata',
+ default=(None, None), group=('lang', 'ext'))
+ if lang and ext:
+ subtitles.setdefault(lang, []).append({
+ 'ext': ext,
+ 'url': url,
+ })
+
+ if ext == 'vtt':
+ subtitles[lang].append({
+ 'ext': 'srt',
+ 'url': f'{remove_end(initial_state[key], "vtt")}srt',
+ })
+
+ episode = traverse_obj(list(filter(
+ lambda x: x.get('@type') in ('TVEpisode', 'Movie'), self._yield_json_ld(webpage, display_id))), 0) or {}
+ return {
+ 'id': display_id,
+ 'title': (traverse_obj(initial_state, 'title', 'display_title')
+ or episode.get('name')),
+ 'description': initial_state.get('description') or episode.get('description'),
+ 'duration': initial_state.get('duration'),
+ 'thumbnail': traverse_obj(episode, ('image', 'url')),
+ 'timestamp': unified_timestamp(episode.get('dateCreated')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'episode_number': (traverse_obj(initial_state, 'episode_no', 'episodeno', expected_type=int_or_none)
+ or int_or_none(episode.get('episodeNumber'))),
+ 'cast': traverse_obj(episode, ('actor', ..., 'name'), default=None),
+ 'age_limit': self._AGE_RATINGS_MAPPER.get(initial_state.get('internal_age_rating'))
+ }
diff --git a/yt_dlp/extractor/vk.py b/yt_dlp/extractor/vk.py
new file mode 100644
index 0000000..e4a78c2
--- /dev/null
+++ b/yt_dlp/extractor/vk.py
@@ -0,0 +1,842 @@
+import collections
+import hashlib
+import re
+
+from .common import InfoExtractor
+from .dailymotion import DailymotionIE
+from .odnoklassniki import OdnoklassnikiIE
+from .pladform import PladformIE
+from .sibnet import SibnetEmbedIE
+from .vimeo import VimeoIE
+from .youtube import YoutubeIE
+from ..utils import (
+ ExtractorError,
+ UserNotLive,
+ clean_html,
+ get_element_by_class,
+ get_element_html_by_id,
+ int_or_none,
+ join_nonempty,
+ parse_resolution,
+ str_or_none,
+ str_to_int,
+ try_call,
+ unescapeHTML,
+ unified_timestamp,
+ update_url_query,
+ url_or_none,
+ urlencode_postdata,
+ urljoin,
+ traverse_obj,
+)
+
+
+class VKBaseIE(InfoExtractor):
+ _NETRC_MACHINE = 'vk'
+
+ def _download_webpage_handle(self, url_or_request, video_id, *args, fatal=True, **kwargs):
+ response = super()._download_webpage_handle(url_or_request, video_id, *args, fatal=fatal, **kwargs)
+ challenge_url, cookie = response[1].url if response else '', None
+ if challenge_url.startswith('https://vk.com/429.html?'):
+ cookie = self._get_cookies(challenge_url).get('hash429')
+ if not cookie:
+ return response
+
+ hash429 = hashlib.md5(cookie.value.encode('ascii')).hexdigest()
+ self._request_webpage(
+ update_url_query(challenge_url, {'key': hash429}), video_id, fatal=fatal,
+ note='Resolving WAF challenge', errnote='Failed to bypass WAF challenge')
+ return super()._download_webpage_handle(url_or_request, video_id, *args, fatal=True, **kwargs)
+
+ def _perform_login(self, username, password):
+ login_page, url_handle = self._download_webpage_handle(
+ 'https://vk.com', None, 'Downloading login page')
+
+ login_form = self._hidden_inputs(login_page)
+
+ login_form.update({
+ 'email': username.encode('cp1251'),
+ 'pass': password.encode('cp1251'),
+ })
+
+ # vk serves two same remixlhk cookies in Set-Cookie header and expects
+ # first one to be actually set
+ self._apply_first_set_cookie_header(url_handle, 'remixlhk')
+
+ login_page = self._download_webpage(
+ 'https://vk.com/login', None,
+ note='Logging in',
+ data=urlencode_postdata(login_form))
+
+ if re.search(r'onLoginFailed', login_page):
+ raise ExtractorError(
+ 'Unable to login, incorrect username and/or password', expected=True)
+
+ def _download_payload(self, path, video_id, data, fatal=True):
+ endpoint = f'https://vk.com/{path}.php'
+ data['al'] = 1
+ code, payload = self._download_json(
+ endpoint, video_id, data=urlencode_postdata(data), fatal=fatal,
+ headers={
+ 'Referer': endpoint,
+ 'X-Requested-With': 'XMLHttpRequest',
+ })['payload']
+ if code == '3':
+ self.raise_login_required()
+ elif code == '8':
+ raise ExtractorError(clean_html(payload[0][1:-1]), expected=True)
+ return payload
+
+
+class VKIE(VKBaseIE):
+ IE_NAME = 'vk'
+ IE_DESC = 'VK'
+ _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1']
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:
+ (?:(?:m|new)\.)?vk\.com/video_|
+ (?:www\.)?daxab\.com/
+ )
+ ext\.php\?(?P<embed_query>.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+).*)|
+ (?:
+ (?:(?:m|new)\.)?vk\.com/(?:.+?\?.*?z=)?(?:video|clip)|
+ (?:www\.)?daxab\.com/embed/
+ )
+ (?P<videoid>-?\d+_\d+)(?:.*\blist=(?P<list_id>([\da-f]+)|(ln-[\da-zA-Z]+)))?
+ )
+ '''
+
+ _TESTS = [
+ {
+ 'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521',
+ 'info_dict': {
+ 'id': '-77521_162222515',
+ 'ext': 'mp4',
+ 'title': 'ProtivoGunz - Хуёвая песня',
+ 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
+ 'uploader_id': '39545378',
+ 'duration': 195,
+ 'timestamp': 1329049880,
+ 'upload_date': '20120212',
+ 'comment_count': int,
+ 'like_count': int,
+ 'thumbnail': r're:https?://.+(?:\.jpg|getVideoPreview.*)$',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ },
+ {
+ 'url': 'http://vk.com/video205387401_165548505',
+ 'info_dict': {
+ 'id': '205387401_165548505',
+ 'ext': 'mp4',
+ 'title': 'No name',
+ 'uploader': 'Tom Cruise',
+ 'uploader_id': '205387401',
+ 'duration': 9,
+ 'timestamp': 1374364108,
+ 'upload_date': '20130720',
+ 'comment_count': int,
+ 'like_count': int,
+ 'thumbnail': r're:https?://.+(?:\.jpg|getVideoPreview.*)$',
+ }
+ },
+ {
+ 'note': 'Embedded video',
+ 'url': 'https://vk.com/video_ext.php?oid=-77521&id=162222515&hash=87b046504ccd8bfa',
+ 'info_dict': {
+ 'id': '-77521_162222515',
+ 'ext': 'mp4',
+ 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
+ 'title': 'ProtivoGunz - Хуёвая песня',
+ 'duration': 195,
+ 'upload_date': '20120212',
+ 'timestamp': 1329049880,
+ 'uploader_id': '39545378',
+ 'thumbnail': r're:https?://.+(?:\.jpg|getVideoPreview.*)$',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ },
+ {
+ 'url': 'https://vk.com/video-93049196_456239755?list=ln-cBjJ7S4jYYx3ADnmDT',
+ 'info_dict': {
+ 'id': '-93049196_456239755',
+ 'ext': 'mp4',
+ 'title': '8 серия (озвучка)',
+ 'duration': 8383,
+ 'comment_count': int,
+ 'uploader': 'Dizi2021',
+ 'like_count': int,
+ 'timestamp': 1640162189,
+ 'upload_date': '20211222',
+ 'uploader_id': '-93049196',
+ 'thumbnail': r're:https?://.+(?:\.jpg|getVideoPreview.*)$',
+ },
+ },
+ {
+ 'note': 'youtube embed',
+ 'url': 'https://vk.com/video276849682_170681728',
+ 'info_dict': {
+ 'id': 'V3K4mi0SYkc',
+ 'ext': 'mp4',
+ 'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate",
+ 'description': 'md5:bf9c26cfa4acdfb146362682edd3827a',
+ 'duration': 178,
+ 'upload_date': '20130117',
+ 'uploader': "Children's Joy Foundation Inc.",
+ 'uploader_id': 'thecjf',
+ 'view_count': int,
+ 'channel_id': 'UCgzCNQ11TmR9V97ECnhi3gw',
+ 'availability': 'public',
+ 'like_count': int,
+ 'live_status': 'not_live',
+ 'playable_in_embed': True,
+ 'channel': 'Children\'s Joy Foundation Inc.',
+ 'uploader_url': 'http://www.youtube.com/user/thecjf',
+ 'thumbnail': r're:https?://.+\.jpg$',
+ 'tags': 'count:27',
+ 'start_time': 0.0,
+ 'categories': ['Nonprofits & Activism'],
+ 'channel_url': 'https://www.youtube.com/channel/UCgzCNQ11TmR9V97ECnhi3gw',
+ 'channel_follower_count': int,
+ 'age_limit': 0,
+ },
+ },
+ {
+ 'note': 'dailymotion embed',
+ 'url': 'https://vk.com/video-95168827_456239103?list=cca524a0f0d5557e16',
+ 'info_dict': {
+ 'id': 'x8gfli0',
+ 'ext': 'mp4',
+ 'title': 'md5:45410f60ccd4b2760da98cb5fc777d70',
+ 'description': 'md5:2e71c5c9413735cfa06cf1a166f16c84',
+ 'uploader': 'Movies and cinema.',
+ 'upload_date': '20221218',
+ 'uploader_id': 'x1jdavv',
+ 'timestamp': 1671387617,
+ 'age_limit': 0,
+ 'duration': 2918,
+ 'like_count': int,
+ 'view_count': int,
+ 'thumbnail': r're:https?://.+x1080$',
+ 'tags': list
+ },
+ },
+ {
+ 'url': 'https://vk.com/clips-74006511?z=clip-74006511_456247211',
+ 'info_dict': {
+ 'id': '-74006511_456247211',
+ 'ext': 'mp4',
+ 'comment_count': int,
+ 'duration': 9,
+ 'like_count': int,
+ 'thumbnail': r're:https?://.+(?:\.jpg|getVideoPreview.*)$',
+ 'timestamp': 1664995597,
+ 'title': 'Clip by @madempress',
+ 'upload_date': '20221005',
+ 'uploader': 'Шальная императрица',
+ 'uploader_id': '-74006511',
+ },
+ },
+ {
+ # video key is extra_data not url\d+
+ 'url': 'http://vk.com/video-110305615_171782105',
+ 'md5': 'e13fcda136f99764872e739d13fac1d1',
+ 'info_dict': {
+ 'id': '-110305615_171782105',
+ 'ext': 'mp4',
+ 'title': 'S-Dance, репетиции к The way show',
+ 'uploader': 'THE WAY SHOW | 17 апреля',
+ 'uploader_id': '-110305615',
+ 'timestamp': 1454859345,
+ 'upload_date': '20160207',
+ },
+ 'skip': 'Removed',
+ },
+ {
+ 'note': 'finished live stream, postlive_mp4',
+ 'url': 'https://vk.com/videos-387766?z=video-387766_456242764%2Fpl_-387766_-2',
+ 'info_dict': {
+ 'id': '-387766_456242764',
+ 'ext': 'mp4',
+ 'title': 'ИгроМир 2016 День 1 — Игромания Утром',
+ 'uploader': 'Игромания',
+ 'duration': 5239,
+ 'upload_date': '20160929',
+ 'uploader_id': '-387766',
+ 'timestamp': 1475137527,
+ 'thumbnail': r're:https?://.+\.jpg$',
+ 'comment_count': int,
+ 'like_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # live stream, hls and rtmp links, most likely already finished live
+ # stream by the time you are reading this comment
+ 'url': 'https://vk.com/video-140332_456239111',
+ 'only_matching': True,
+ },
+ {
+ # removed video, just testing that we match the pattern
+ 'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a',
+ 'only_matching': True,
+ },
+ {
+ # age restricted video, requires vk account credentials
+ 'url': 'https://vk.com/video205387401_164765225',
+ 'only_matching': True,
+ },
+ {
+ # pladform embed
+ 'url': 'https://vk.com/video-76116461_171554880',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://new.vk.com/video205387401_165548505',
+ 'only_matching': True,
+ },
+ {
+ # This video is no longer available, because its author has been blocked.
+ 'url': 'https://vk.com/video-10639516_456240611',
+ 'only_matching': True,
+ },
+ {
+ # The video is not available in your region.
+ 'url': 'https://vk.com/video-51812607_171445436',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://vk.com/clip30014565_456240946',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('videoid')
+
+ mv_data = {}
+ if video_id:
+ data = {
+ 'act': 'show',
+ 'video': video_id,
+ }
+ # Some videos (removed?) can only be downloaded with list id specified
+ list_id = mobj.group('list_id')
+ if list_id:
+ data['list'] = list_id
+
+ payload = self._download_payload('al_video', video_id, data)
+ info_page = payload[1]
+ opts = payload[-1]
+ mv_data = opts.get('mvData') or {}
+ player = opts.get('player') or {}
+ else:
+ video_id = '%s_%s' % (mobj.group('oid'), mobj.group('id'))
+
+ info_page = self._download_webpage(
+ 'http://vk.com/video_ext.php?' + mobj.group('embed_query'), video_id)
+
+ error_message = self._html_search_regex(
+ [r'(?s)<!><div[^>]+class="video_layer_message"[^>]*>(.+?)</div>',
+ r'(?s)<div[^>]+id="video_ext_msg"[^>]*>(.+?)</div>'],
+ info_page, 'error message', default=None)
+ if error_message:
+ raise ExtractorError(error_message, expected=True)
+
+ if re.search(r'<!>/login\.php\?.*\bact=security_check', info_page):
+ raise ExtractorError(
+ 'You are trying to log in from an unusual location. You should confirm ownership at vk.com to log in with this IP.',
+ expected=True)
+
+ ERROR_COPYRIGHT = 'Video %s has been removed from public access due to rightholder complaint.'
+
+ ERRORS = {
+ r'>Видеозапись .*? была изъята из публичного доступа в связи с обращением правообладателя.<':
+ ERROR_COPYRIGHT,
+
+ r'>The video .*? was removed from public access by request of the copyright holder.<':
+ ERROR_COPYRIGHT,
+
+ r'<!>Please log in or <':
+ 'Video %s is only available for registered users, '
+ 'use --username and --password options to provide account credentials.',
+
+ r'<!>Unknown error':
+ 'Video %s does not exist.',
+
+ r'<!>Видео временно недоступно':
+ 'Video %s is temporarily unavailable.',
+
+ r'<!>Access denied':
+ 'Access denied to video %s.',
+
+ r'<!>Видеозапись недоступна, так как её автор был заблокирован.':
+ 'Video %s is no longer available, because its author has been blocked.',
+
+ r'<!>This video is no longer available, because its author has been blocked.':
+ 'Video %s is no longer available, because its author has been blocked.',
+
+ r'<!>This video is no longer available, because it has been deleted.':
+ 'Video %s is no longer available, because it has been deleted.',
+
+ r'<!>The video .+? is not available in your region.':
+ 'Video %s is not available in your region.',
+ }
+
+ for error_re, error_msg in ERRORS.items():
+ if re.search(error_re, info_page):
+ raise ExtractorError(error_msg % video_id, expected=True)
+
+ player = self._parse_json(self._search_regex(
+ r'var\s+playerParams\s*=\s*({.+?})\s*;\s*\n',
+ info_page, 'player params'), video_id)
+
+ youtube_url = YoutubeIE._extract_url(info_page)
+ if youtube_url:
+ return self.url_result(youtube_url, YoutubeIE.ie_key())
+
+ vimeo_url = VimeoIE._extract_url(url, info_page)
+ if vimeo_url is not None:
+ return self.url_result(vimeo_url, VimeoIE.ie_key())
+
+ pladform_url = PladformIE._extract_url(info_page)
+ if pladform_url:
+ return self.url_result(pladform_url, PladformIE.ie_key())
+
+ m_rutube = re.search(
+ r'\ssrc="((?:https?:)?//rutube\.ru\\?/(?:video|play)\\?/embed(?:.*?))\\?"', info_page)
+ if m_rutube is not None:
+ rutube_url = self._proto_relative_url(
+ m_rutube.group(1).replace('\\', ''))
+ return self.url_result(rutube_url)
+
+ dailymotion_url = next(DailymotionIE._extract_embed_urls(url, info_page), None)
+ if dailymotion_url:
+ return self.url_result(dailymotion_url, DailymotionIE.ie_key())
+
+ odnoklassniki_url = OdnoklassnikiIE._extract_url(info_page)
+ if odnoklassniki_url:
+ return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key())
+
+ sibnet_url = next(SibnetEmbedIE._extract_embed_urls(url, info_page), None)
+ if sibnet_url:
+ return self.url_result(sibnet_url)
+
+ m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.+?});', info_page)
+ if m_opts:
+ m_opts_url = re.search(r"url\s*:\s*'((?!/\b)[^']+)", m_opts.group(1))
+ if m_opts_url:
+ opts_url = m_opts_url.group(1)
+ if opts_url.startswith('//'):
+ opts_url = 'http:' + opts_url
+ return self.url_result(opts_url)
+
+ data = player['params'][0]
+ title = unescapeHTML(data['md_title'])
+
+ # 2 = live
+ # 3 = post live (finished live)
+ is_live = data.get('live') == 2
+
+ timestamp = unified_timestamp(self._html_search_regex(
+ r'class=["\']mv_info_date[^>]+>([^<]+)(?:<|from)', info_page,
+ 'upload date', default=None)) or int_or_none(data.get('date'))
+
+ view_count = str_to_int(self._search_regex(
+ r'class=["\']mv_views_count[^>]+>\s*([\d,.]+)',
+ info_page, 'view count', default=None))
+
+ formats = []
+ for format_id, format_url in data.items():
+ format_url = url_or_none(format_url)
+ if not format_url or not format_url.startswith(('http', '//', 'rtmp')):
+ continue
+ if (format_id.startswith(('url', 'cache'))
+ or format_id in ('extra_data', 'live_mp4', 'postlive_mp4')):
+ height = int_or_none(self._search_regex(
+ r'^(?:url|cache)(\d+)', format_id, 'height', default=None))
+ formats.append({
+ 'format_id': format_id,
+ 'url': format_url,
+ 'height': height,
+ })
+ elif format_id == 'hls':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id=format_id, fatal=False, live=is_live))
+ elif format_id == 'rtmp':
+ formats.append({
+ 'format_id': format_id,
+ 'url': format_url,
+ 'ext': 'flv',
+ })
+
+ subtitles = {}
+ for sub in data.get('subs') or {}:
+ subtitles.setdefault(sub.get('lang', 'en'), []).append({
+ 'ext': sub.get('title', '.srt').split('.')[-1],
+ 'url': url_or_none(sub.get('url')),
+ })
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': title,
+ 'thumbnail': data.get('jpg'),
+ 'uploader': data.get('md_author'),
+ 'uploader_id': str_or_none(data.get('author_id') or mv_data.get('authorId')),
+ 'duration': int_or_none(data.get('duration') or mv_data.get('duration')),
+ 'timestamp': timestamp,
+ 'view_count': view_count,
+ 'like_count': int_or_none(mv_data.get('likes')),
+ 'comment_count': int_or_none(mv_data.get('commcount')),
+ 'is_live': is_live,
+ 'subtitles': subtitles,
+ }
+
+
+class VKUserVideosIE(VKBaseIE):
+ IE_NAME = 'vk:uservideos'
+ IE_DESC = "VK - User's Videos"
+ _VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/video/(?:playlist/)?(?P<id>[^?$#/&]+)(?!\?.*\bz=video)(?:[/?#&](?:.*?\bsection=(?P<section>\w+))?|$)'
+ _TEMPLATE_URL = 'https://vk.com/videos'
+ _TESTS = [{
+ 'url': 'https://vk.com/video/@mobidevices',
+ 'info_dict': {
+ 'id': '-17892518_all',
+ },
+ 'playlist_mincount': 1355,
+ }, {
+ 'url': 'https://vk.com/video/@mobidevices?section=uploaded',
+ 'info_dict': {
+ 'id': '-17892518_uploaded',
+ },
+ 'playlist_mincount': 182,
+ }, {
+ 'url': 'https://vk.com/video/playlist/-174476437_2',
+ 'info_dict': {
+ 'id': '-174476437_playlist_2',
+ 'title': 'Анонсы'
+ },
+ 'playlist_mincount': 108,
+ }]
+ _VIDEO = collections.namedtuple('Video', ['owner_id', 'id'])
+
+ def _entries(self, page_id, section):
+ video_list_json = self._download_payload('al_video', page_id, {
+ 'act': 'load_videos_silent',
+ 'offset': 0,
+ 'oid': page_id,
+ 'section': section,
+ })[0][section]
+ count = video_list_json['count']
+ total = video_list_json['total']
+ video_list = video_list_json['list']
+
+ while True:
+ for video in video_list:
+ v = self._VIDEO._make(video[:2])
+ video_id = '%d_%d' % (v.owner_id, v.id)
+ yield self.url_result(
+ 'http://vk.com/video' + video_id, VKIE.ie_key(), video_id)
+ if count >= total:
+ break
+ video_list_json = self._download_payload('al_video', page_id, {
+ 'act': 'load_videos_silent',
+ 'offset': count,
+ 'oid': page_id,
+ 'section': section,
+ })[0][section]
+ count += video_list_json['count']
+ video_list = video_list_json['list']
+
+ def _real_extract(self, url):
+ u_id, section = self._match_valid_url(url).groups()
+ webpage = self._download_webpage(url, u_id)
+
+ if u_id.startswith('@'):
+ page_id = self._search_regex(r'data-owner-id\s?=\s?"([^"]+)"', webpage, 'page_id')
+ elif '_' in u_id:
+ page_id, section = u_id.split('_', 1)
+ section = f'playlist_{section}'
+ else:
+ raise ExtractorError('Invalid URL', expected=True)
+
+ if not section:
+ section = 'all'
+
+ playlist_title = clean_html(get_element_by_class('VideoInfoPanel__title', webpage))
+ return self.playlist_result(self._entries(page_id, section), '%s_%s' % (page_id, section), playlist_title)
+
+
+class VKWallPostIE(VKBaseIE):
+ IE_NAME = 'vk:wallpost'
+ _VALID_URL = r'https?://(?:(?:(?:(?:m|new)\.)?vk\.com/(?:[^?]+\?.*\bw=)?wall(?P<id>-?\d+_\d+)))'
+ _TESTS = [{
+ # public page URL, audio playlist
+ 'url': 'https://vk.com/bs.official?w=wall-23538238_35',
+ 'info_dict': {
+ 'id': '-23538238_35',
+ 'title': 'Black Shadow - Wall post -23538238_35',
+ 'description': 'md5:190c78f905a53e0de793d83933c6e67f',
+ },
+ 'playlist': [{
+ 'md5': '5ba93864ec5b85f7ce19a9af4af080f6',
+ 'info_dict': {
+ 'id': '135220665_111806521',
+ 'ext': 'm4a',
+ 'title': 'Black Shadow - Слепое Верование',
+ 'duration': 370,
+ 'uploader': 'Black Shadow',
+ 'artist': 'Black Shadow',
+ 'track': 'Слепое Верование',
+ },
+ }, {
+ 'md5': '4cc7e804579122b17ea95af7834c9233',
+ 'info_dict': {
+ 'id': '135220665_111802303',
+ 'ext': 'm4a',
+ 'title': 'Black Shadow - Война - Негасимое Бездны Пламя!',
+ 'duration': 423,
+ 'uploader': 'Black Shadow',
+ 'artist': 'Black Shadow',
+ 'track': 'Война - Негасимое Бездны Пламя!',
+ },
+ }],
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # single YouTube embed with irrelevant reaction videos
+ 'url': 'https://vk.com/wall-32370614_7173954',
+ 'info_dict': {
+ 'id': '-32370614_7173954',
+ 'title': 'md5:9f93c405bbc00061d34007d78c75e3bc',
+ 'description': 'md5:953b811f26fa9f21ee5856e2ea8e68fc',
+ },
+ 'playlist_count': 1,
+ }, {
+ # wall page URL
+ 'url': 'https://vk.com/wall-23538238_35',
+ 'only_matching': True,
+ }, {
+ # mobile wall page URL
+ 'url': 'https://m.vk.com/wall-23538238_35',
+ 'only_matching': True,
+ }]
+ _BASE64_CHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN0PQRSTUVWXYZO123456789+/='
+ _AUDIO = collections.namedtuple('Audio', ['id', 'owner_id', 'url', 'title', 'performer', 'duration', 'album_id', 'unk', 'author_link', 'lyrics', 'flags', 'context', 'extra', 'hashes', 'cover_url', 'ads'])
+
+ def _decode(self, enc):
+ dec = ''
+ e = n = 0
+ for c in enc:
+ r = self._BASE64_CHARS.index(c)
+ cond = n % 4
+ e = 64 * e + r if cond else r
+ n += 1
+ if cond:
+ dec += chr(255 & e >> (-2 * n & 6))
+ return dec
+
+ def _unmask_url(self, mask_url, vk_id):
+ if 'audio_api_unavailable' in mask_url:
+ extra = mask_url.split('?extra=')[1].split('#')
+ func, base = self._decode(extra[1]).split(chr(11))
+ mask_url = list(self._decode(extra[0]))
+ url_len = len(mask_url)
+ indexes = [None] * url_len
+ index = int(base) ^ vk_id
+ for n in range(url_len - 1, -1, -1):
+ index = (url_len * (n + 1) ^ index + n) % url_len
+ indexes[n] = index
+ for n in range(1, url_len):
+ c = mask_url[n]
+ index = indexes[url_len - 1 - n]
+ mask_url[n] = mask_url[index]
+ mask_url[index] = c
+ mask_url = ''.join(mask_url)
+ return mask_url
+
+ def _real_extract(self, url):
+ post_id = self._match_id(url)
+
+ webpage = self._download_payload('wkview', post_id, {
+ 'act': 'show',
+ 'w': 'wall' + post_id,
+ })[1]
+
+ uploader = clean_html(get_element_by_class('PostHeaderTitle__authorName', webpage))
+
+ entries = []
+
+ for audio in re.findall(r'data-audio="([^"]+)', webpage):
+ audio = self._parse_json(unescapeHTML(audio), post_id)
+ if not audio['url']:
+ continue
+ title = unescapeHTML(audio.get('title'))
+ artist = unescapeHTML(audio.get('artist'))
+ entries.append({
+ 'id': f'{audio["owner_id"]}_{audio["id"]}',
+ 'title': join_nonempty(artist, title, delim=' - '),
+ 'thumbnails': try_call(lambda: [{'url': u} for u in audio['coverUrl'].split(',')]),
+ 'duration': int_or_none(audio.get('duration')),
+ 'uploader': uploader,
+ 'artist': artist,
+ 'track': title,
+ 'formats': [{
+ 'url': audio['url'],
+ 'ext': 'm4a',
+ 'vcodec': 'none',
+ 'acodec': 'mp3',
+ 'container': 'm4a_dash',
+ }],
+ })
+
+ entries.extend(self.url_result(urljoin(url, entry), VKIE) for entry in set(re.findall(
+ r'<a[^>]+href=(?:["\'])(/video(?:-?[\d_]+)[^"\']*)',
+ get_element_html_by_id('wl_post_body', webpage))))
+
+ return self.playlist_result(
+ entries, post_id, join_nonempty(uploader, f'Wall post {post_id}', delim=' - '),
+ clean_html(get_element_by_class('wall_post_text', webpage)))
+
+
+class VKPlayBaseIE(InfoExtractor):
+ _RESOLUTIONS = {
+ 'tiny': '256x144',
+ 'lowest': '426x240',
+ 'low': '640x360',
+ 'medium': '852x480',
+ 'high': '1280x720',
+ 'full_hd': '1920x1080',
+ 'quad_hd': '2560x1440',
+ }
+
+ def _extract_from_initial_state(self, url, video_id, path):
+ webpage = self._download_webpage(url, video_id)
+ video_info = traverse_obj(self._search_json(
+ r'<script[^>]+\bid="initial-state"[^>]*>', webpage, 'initial state', video_id),
+ path, expected_type=dict)
+ if not video_info:
+ raise ExtractorError('Unable to extract video info from html inline initial state')
+ return video_info
+
+ def _extract_formats(self, stream_info, video_id):
+ formats = []
+ for stream in traverse_obj(stream_info, (
+ 'data', 0, 'playerUrls', lambda _, v: url_or_none(v['url']) and v['type'])):
+ url = stream['url']
+ format_id = str_or_none(stream['type'])
+ if format_id in ('hls', 'live_hls', 'live_playback_hls') or '.m3u8' in url:
+ formats.extend(self._extract_m3u8_formats(url, video_id, m3u8_id=format_id, fatal=False))
+ elif format_id == 'dash':
+ formats.extend(self._extract_mpd_formats(url, video_id, mpd_id=format_id, fatal=False))
+ elif format_id in ('live_dash', 'live_playback_dash'):
+ self.write_debug(f'Not extracting unsupported format "{format_id}"')
+ else:
+ formats.append({
+ 'url': url,
+ 'ext': 'mp4',
+ 'format_id': format_id,
+ **parse_resolution(self._RESOLUTIONS.get(format_id)),
+ })
+ return formats
+
+ def _extract_common_meta(self, stream_info):
+ return traverse_obj(stream_info, {
+ 'id': ('id', {str_or_none}),
+ 'title': ('title', {str}),
+ 'release_timestamp': ('startTime', {int_or_none}),
+ 'thumbnail': ('previewUrl', {url_or_none}),
+ 'view_count': ('count', 'views', {int_or_none}),
+ 'like_count': ('count', 'likes', {int_or_none}),
+ 'categories': ('category', 'title', {str}, {lambda x: [x] if x else None}),
+ 'uploader': (('user', ('blog', 'owner')), 'nick', {str}),
+ 'uploader_id': (('user', ('blog', 'owner')), 'id', {str_or_none}),
+ 'duration': ('duration', {int_or_none}),
+ 'is_live': ('isOnline', {bool}),
+ 'concurrent_view_count': ('count', 'viewers', {int_or_none}),
+ }, get_all=False)
+
+
+class VKPlayIE(VKPlayBaseIE):
+ _VALID_URL = r'https?://vkplay\.live/(?P<username>[^/#?]+)/record/(?P<id>[a-f0-9-]+)'
+ _TESTS = [{
+ 'url': 'https://vkplay.live/zitsmann/record/f5e6e3b5-dc52-4d14-965d-0680dd2882da',
+ 'info_dict': {
+ 'id': 'f5e6e3b5-dc52-4d14-965d-0680dd2882da',
+ 'ext': 'mp4',
+ 'title': 'Atomic Heart (пробуем!) спасибо подписчику EKZO!',
+ 'uploader': 'ZitsmanN',
+ 'uploader_id': '13159830',
+ 'release_timestamp': 1683461378,
+ 'release_date': '20230507',
+ 'thumbnail': r're:https://images.vkplay.live/public_video_stream/record/f5e6e3b5-dc52-4d14-965d-0680dd2882da/preview\?change_time=\d+',
+ 'duration': 10608,
+ 'view_count': int,
+ 'like_count': int,
+ 'categories': ['Atomic Heart'],
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }]
+
+ def _real_extract(self, url):
+ username, video_id = self._match_valid_url(url).groups()
+
+ record_info = traverse_obj(self._download_json(
+ f'https://api.vkplay.live/v1/blog/{username}/public_video_stream/record/{video_id}', video_id, fatal=False),
+ ('data', 'record', {dict}))
+ if not record_info:
+ record_info = self._extract_from_initial_state(url, video_id, ('record', 'currentRecord', 'data'))
+
+ return {
+ **self._extract_common_meta(record_info),
+ 'id': video_id,
+ 'formats': self._extract_formats(record_info, video_id),
+ }
+
+
+class VKPlayLiveIE(VKPlayBaseIE):
+ _VALID_URL = r'https?://vkplay\.live/(?P<id>[^/#?]+)/?(?:[#?]|$)'
+ _TESTS = [{
+ 'url': 'https://vkplay.live/bayda',
+ 'info_dict': {
+ 'id': 'f02c321e-427b-408d-b12f-ae34e53e0ea2',
+ 'ext': 'mp4',
+ 'title': r're:эскапизм крута .*',
+ 'uploader': 'Bayda',
+ 'uploader_id': '12279401',
+ 'release_timestamp': 1687209962,
+ 'release_date': '20230619',
+ 'thumbnail': r're:https://images.vkplay.live/public_video_stream/12279401/preview\?change_time=\d+',
+ 'view_count': int,
+ 'concurrent_view_count': int,
+ 'like_count': int,
+ 'categories': ['EVE Online'],
+ 'live_status': 'is_live',
+ },
+ 'skip': 'livestream',
+ 'params': {'skip_download': True},
+ }]
+
+ def _real_extract(self, url):
+ username = self._match_id(url)
+
+ stream_info = self._download_json(
+ f'https://api.vkplay.live/v1/blog/{username}/public_video_stream', username, fatal=False)
+ if not stream_info:
+ stream_info = self._extract_from_initial_state(url, username, ('stream', 'stream', 'data', 'stream'))
+
+ formats = self._extract_formats(stream_info, username)
+ if not formats and not traverse_obj(stream_info, ('isOnline', {bool})):
+ raise UserNotLive(video_id=username)
+
+ return {
+ **self._extract_common_meta(stream_info),
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/vocaroo.py b/yt_dlp/extractor/vocaroo.py
new file mode 100644
index 0000000..e30c959
--- /dev/null
+++ b/yt_dlp/extractor/vocaroo.py
@@ -0,0 +1,63 @@
+from .common import InfoExtractor
+from ..networking import HEADRequest
+from ..utils import float_or_none
+
+
+class VocarooIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?(?:vocaroo\.com|voca\.ro)/(?:embed/)?(?P<id>\w+)'
+ _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:www\.)?vocaroo\.com/embed/.+?)\1']
+ _TESTS = [
+ {
+ 'url': 'https://vocaroo.com/1de8yA3LNe77',
+ 'md5': 'c557841d5e50261777a6585648adf439',
+ 'info_dict': {
+ 'id': '1de8yA3LNe77',
+ 'ext': 'mp3',
+ 'title': 'Vocaroo video #1de8yA3LNe77',
+ 'timestamp': 1675059800.370,
+ 'upload_date': '20230130',
+ },
+ },
+ {
+ 'url': 'https://vocaroo.com/embed/12WqtjLnpj6g?autoplay=0',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://voca.ro/12D52rgpzkB0',
+ 'only_matching': True,
+ },
+ ]
+
+ _WEBPAGE_TESTS = [
+ {
+ 'url': 'https://qbnu.github.io/cool.html',
+ 'md5': 'f322e529275dd8a47994919eeac404a5',
+ 'info_dict': {
+ 'id': '19cgWmKO6AmC',
+ 'ext': 'mp3',
+ 'title': 'Vocaroo video #19cgWmKO6AmC',
+ 'timestamp': 1675093841.408,
+ 'upload_date': '20230130',
+ },
+ },
+ ]
+
+ def _real_extract(self, url):
+ audio_id = self._match_id(url)
+ if len(audio_id) == 10 or (len(audio_id) == 12 and audio_id[0] == '1'):
+ media_subdomain = 'media1'
+ else:
+ media_subdomain = 'media'
+
+ url = f'https://{media_subdomain}.vocaroo.com/mp3/{audio_id}'
+ http_headers = {'Referer': 'https://vocaroo.com/'}
+ resp = self._request_webpage(HEADRequest(url), audio_id, headers=http_headers)
+ return {
+ 'id': audio_id,
+ 'title': '',
+ 'url': url,
+ 'ext': 'mp3',
+ 'timestamp': float_or_none(resp.headers.get('x-bz-upload-timestamp'), scale=1000),
+ 'vcodec': 'none',
+ 'http_headers': http_headers,
+ }
diff --git a/yt_dlp/extractor/vodpl.py b/yt_dlp/extractor/vodpl.py
new file mode 100644
index 0000000..8af1572
--- /dev/null
+++ b/yt_dlp/extractor/vodpl.py
@@ -0,0 +1,29 @@
+from .onet import OnetBaseIE
+
+
+class VODPlIE(OnetBaseIE):
+ _VALID_URL = r'https?://vod\.pl/(?:[^/]+/)+(?P<id>[0-9a-zA-Z]+)'
+
+ _TESTS = [{
+ 'url': 'https://vod.pl/filmy/chlopaki-nie-placza/3ep3jns',
+ 'md5': 'a7dc3b2f7faa2421aefb0ecaabf7ec74',
+ 'info_dict': {
+ 'id': '3ep3jns',
+ 'ext': 'mp4',
+ 'title': 'Chłopaki nie płaczą',
+ 'description': 'md5:f5f03b84712e55f5ac9f0a3f94445224',
+ 'timestamp': 1463415154,
+ 'duration': 5765,
+ 'upload_date': '20160516',
+ },
+ }, {
+ 'url': 'https://vod.pl/seriale/belfer-na-planie-praca-kamery-online/2c10heh',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ info_dict = self._extract_from_id(self._search_mvp_id(webpage), webpage)
+ info_dict['id'] = video_id
+ return info_dict
diff --git a/yt_dlp/extractor/vodplatform.py b/yt_dlp/extractor/vodplatform.py
new file mode 100644
index 0000000..5ff0500
--- /dev/null
+++ b/yt_dlp/extractor/vodplatform.py
@@ -0,0 +1,37 @@
+from .common import InfoExtractor
+from ..utils import unescapeHTML
+
+
+class VODPlatformIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:www\.)?vod-platform\.net|embed\.kwikmotion\.com)/[eE]mbed/(?P<id>[^/?#]+)'
+ _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:(?:www\.)?vod-platform\.net|embed\.kwikmotion\.com)/[eE]mbed/.+?)\1']
+ _TESTS = [{
+ # from http://www.lbcgroup.tv/watch/chapter/29143/52844/%D8%A7%D9%84%D9%86%D8%B5%D8%B1%D8%A9-%D9%81%D9%8A-%D8%B6%D9%8A%D8%A7%D9%81%D8%A9-%D8%A7%D9%84%D9%80-cnn/ar
+ 'url': 'http://vod-platform.net/embed/RufMcytHDolTH1MuKHY9Fw',
+ 'md5': '1db2b7249ce383d6be96499006e951fc',
+ 'info_dict': {
+ 'id': 'RufMcytHDolTH1MuKHY9Fw',
+ 'ext': 'mp4',
+ 'title': 'LBCi News_ النصرة في ضيافة الـ "سي.أن.أن"',
+ }
+ }, {
+ 'url': 'http://embed.kwikmotion.com/embed/RufMcytHDolTH1MuKHY9Fw',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = unescapeHTML(self._og_search_title(webpage))
+ hidden_inputs = self._hidden_inputs(webpage)
+
+ formats = self._extract_wowza_formats(
+ hidden_inputs.get('HiddenmyhHlsLink') or hidden_inputs['HiddenmyDashLink'], video_id, skip_protocols=['f4m', 'smil'])
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': hidden_inputs.get('HiddenThumbnail') or self._og_search_thumbnail(webpage),
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/voicy.py b/yt_dlp/extractor/voicy.py
new file mode 100644
index 0000000..9ab9768
--- /dev/null
+++ b/yt_dlp/extractor/voicy.py
@@ -0,0 +1,146 @@
+import itertools
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ smuggle_url,
+ str_or_none,
+ traverse_obj,
+ unified_strdate,
+ unsmuggle_url,
+)
+
+
+class VoicyBaseIE(InfoExtractor):
+ def _extract_from_playlist_data(self, value):
+ voice_id = compat_str(value.get('PlaylistId'))
+ upload_date = unified_strdate(value.get('Published'), False)
+ items = [self._extract_single_article(voice_data) for voice_data in value['VoiceData']]
+ return {
+ '_type': 'multi_video',
+ 'entries': items,
+ 'id': voice_id,
+ 'title': compat_str(value.get('PlaylistName')),
+ 'uploader': value.get('SpeakerName'),
+ 'uploader_id': str_or_none(value.get('SpeakerId')),
+ 'channel': value.get('ChannelName'),
+ 'channel_id': str_or_none(value.get('ChannelId')),
+ 'upload_date': upload_date,
+ }
+
+ def _extract_single_article(self, entry):
+ formats = [{
+ 'url': entry['VoiceHlsFile'],
+ 'format_id': 'hls',
+ 'ext': 'm4a',
+ 'acodec': 'aac',
+ 'vcodec': 'none',
+ 'protocol': 'm3u8_native',
+ }, {
+ 'url': entry['VoiceFile'],
+ 'format_id': 'mp3',
+ 'ext': 'mp3',
+ 'acodec': 'mp3',
+ 'vcodec': 'none',
+ }]
+ return {
+ 'id': compat_str(entry.get('ArticleId')),
+ 'title': entry.get('ArticleTitle'),
+ 'description': entry.get('MediaName'),
+ 'formats': formats,
+ }
+
+ def _call_api(self, url, video_id, **kwargs):
+ response = self._download_json(url, video_id, **kwargs)
+ if response.get('Status') != 0:
+ message = traverse_obj(response, ('Value', 'Error', 'Message'), expected_type=compat_str)
+ if not message:
+ message = 'There was a error in the response: %d' % response.get('Status')
+ raise ExtractorError(message, expected=False)
+ return response.get('Value')
+
+
+class VoicyIE(VoicyBaseIE):
+ _WORKING = False
+ IE_NAME = 'voicy'
+ _VALID_URL = r'https?://voicy\.jp/channel/(?P<channel_id>\d+)/(?P<id>\d+)'
+ ARTICLE_LIST_API_URL = 'https://vmw.api.voicy.jp/articles_list?channel_id=%s&pid=%s'
+ _TESTS = [{
+ 'url': 'https://voicy.jp/channel/1253/122754',
+ 'info_dict': {
+ 'id': '122754',
+ 'title': '1/21(木)声日記:ついに原稿終わった!!',
+ 'uploader': 'ちょまど@ ITエンジニアなオタク',
+ 'uploader_id': '7339',
+ },
+ 'playlist_mincount': 9,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ assert mobj
+ voice_id = mobj.group('id')
+ channel_id = mobj.group('channel_id')
+ url, article_list = unsmuggle_url(url)
+ if not article_list:
+ article_list = self._call_api(self.ARTICLE_LIST_API_URL % (channel_id, voice_id), voice_id)
+ return self._extract_from_playlist_data(article_list)
+
+
+class VoicyChannelIE(VoicyBaseIE):
+ _WORKING = False
+ IE_NAME = 'voicy:channel'
+ _VALID_URL = r'https?://voicy\.jp/channel/(?P<id>\d+)'
+ PROGRAM_LIST_API_URL = 'https://vmw.api.voicy.jp/program_list/all?channel_id=%s&limit=20&public_type=3%s'
+ _TESTS = [{
+ 'url': 'https://voicy.jp/channel/1253/',
+ 'info_dict': {
+ 'id': '7339',
+ 'title': 'ゆるふわ日常ラジオ #ちょまラジ',
+ 'uploader': 'ちょまど@ ITエンジニアなオタク',
+ 'uploader_id': '7339',
+ },
+ 'playlist_mincount': 54,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return not VoicyIE.suitable(url) and super().suitable(url)
+
+ def _entries(self, channel_id):
+ pager = ''
+ for count in itertools.count(1):
+ article_list = self._call_api(self.PROGRAM_LIST_API_URL % (channel_id, pager), channel_id, note='Paging #%d' % count)
+ playlist_data = article_list.get('PlaylistData')
+ if not playlist_data:
+ break
+ yield from playlist_data
+ last = playlist_data[-1]
+ pager = '&pid=%d&p_date=%s&play_count=%s' % (last['PlaylistId'], last['Published'], last['PlayCount'])
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+ articles = self._entries(channel_id)
+
+ first_article = next(articles, None)
+ title = traverse_obj(first_article, ('ChannelName', ), expected_type=compat_str)
+ speaker_name = traverse_obj(first_article, ('SpeakerName', ), expected_type=compat_str)
+ if not title and speaker_name:
+ title = 'Uploads from %s' % speaker_name
+ if not title:
+ title = 'Uploads from channel ID %s' % channel_id
+
+ articles = itertools.chain([first_article], articles) if first_article else articles
+
+ playlist = (
+ self.url_result(smuggle_url('https://voicy.jp/channel/%s/%d' % (channel_id, value['PlaylistId']), value), VoicyIE.ie_key())
+ for value in articles)
+ return {
+ '_type': 'playlist',
+ 'entries': playlist,
+ 'id': channel_id,
+ 'title': title,
+ 'channel': speaker_name,
+ 'channel_id': channel_id,
+ }
diff --git a/yt_dlp/extractor/volejtv.py b/yt_dlp/extractor/volejtv.py
new file mode 100644
index 0000000..622d841
--- /dev/null
+++ b/yt_dlp/extractor/volejtv.py
@@ -0,0 +1,40 @@
+from .common import InfoExtractor
+
+
+class VolejTVIE(InfoExtractor):
+ _VALID_URL = r'https?://volej\.tv/video/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://volej.tv/video/725742/',
+ 'info_dict': {
+ 'id': '725742',
+ 'ext': 'mp4',
+ 'description': 'Zápas VK Královo Pole vs VK Prostějov 10.12.2022 v 19:00 na Volej.TV',
+ 'thumbnail': 'https://volej.tv/images/og/16/17186/og.png',
+ 'title': 'VK Královo Pole vs VK Prostějov',
+ }
+ }, {
+ 'url': 'https://volej.tv/video/725605/',
+ 'info_dict': {
+ 'id': '725605',
+ 'ext': 'mp4',
+ 'thumbnail': 'https://volej.tv/images/og/15/17185/og.png',
+ 'title': 'VK Lvi Praha vs VK Euro Sitex Příbram',
+ 'description': 'Zápas VK Lvi Praha vs VK Euro Sitex Příbram 11.12.2022 v 19:00 na Volej.TV',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ json_data = self._search_json(
+ r'<\s*!\[CDATA[^=]+=', webpage, 'CDATA', video_id)
+ formats, subtitle = self._extract_m3u8_formats_and_subtitles(
+ json_data['urls']['hls'], video_id)
+ return {
+ 'id': video_id,
+ 'title': self._html_search_meta(['og:title', 'twitter:title'], webpage),
+ 'thumbnail': self._html_search_meta(['og:image', 'twitter:image'], webpage),
+ 'description': self._html_search_meta(['description', 'og:description', 'twitter:description'], webpage),
+ 'formats': formats,
+ 'subtitles': subtitle,
+ }
diff --git a/yt_dlp/extractor/voot.py b/yt_dlp/extractor/voot.py
new file mode 100644
index 0000000..ef77bed
--- /dev/null
+++ b/yt_dlp/extractor/voot.py
@@ -0,0 +1,212 @@
+import json
+import time
+import uuid
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ jwt_decode_hs256,
+ parse_age_limit,
+ traverse_obj,
+ try_call,
+ try_get,
+ unified_strdate,
+)
+
+
+class VootBaseIE(InfoExtractor):
+ _NETRC_MACHINE = 'voot'
+ _GEO_BYPASS = False
+ _LOGIN_HINT = 'Log in with "-u <email_address> -p <password>", or use "-u token -p <auth_token>" to login with auth token.'
+ _TOKEN = None
+ _EXPIRY = 0
+ _API_HEADERS = {'Origin': 'https://www.voot.com', 'Referer': 'https://www.voot.com/'}
+
+ def _perform_login(self, username, password):
+ if self._TOKEN and self._EXPIRY:
+ return
+
+ if username.lower() == 'token' and try_call(lambda: jwt_decode_hs256(password)):
+ VootBaseIE._TOKEN = password
+ VootBaseIE._EXPIRY = jwt_decode_hs256(password)['exp']
+ self.report_login()
+
+ # Mobile number as username is not supported
+ elif not username.isdigit():
+ check_username = self._download_json(
+ 'https://userauth.voot.com/usersV3/v3/checkUser', None, data=json.dumps({
+ 'type': 'email',
+ 'email': username
+ }, separators=(',', ':')).encode(), headers={
+ **self._API_HEADERS,
+ 'Content-Type': 'application/json;charset=utf-8',
+ }, note='Checking username', expected_status=403)
+ if not traverse_obj(check_username, ('isExist', {bool})):
+ if traverse_obj(check_username, ('status', 'code', {int})) == 9999:
+ self.raise_geo_restricted(countries=['IN'])
+ raise ExtractorError('Incorrect username', expected=True)
+ auth_token = traverse_obj(self._download_json(
+ 'https://userauth.voot.com/usersV3/v3/login', None, data=json.dumps({
+ 'type': 'traditional',
+ 'deviceId': str(uuid.uuid4()),
+ 'deviceBrand': 'PC/MAC',
+ 'data': {
+ 'email': username,
+ 'password': password
+ }
+ }, separators=(',', ':')).encode(), headers={
+ **self._API_HEADERS,
+ 'Content-Type': 'application/json;charset=utf-8',
+ }, note='Logging in', expected_status=400), ('data', 'authToken', {dict}))
+ if not auth_token:
+ raise ExtractorError('Incorrect password', expected=True)
+ VootBaseIE._TOKEN = auth_token['accessToken']
+ VootBaseIE._EXPIRY = auth_token['expirationTime']
+
+ else:
+ raise ExtractorError(self._LOGIN_HINT, expected=True)
+
+ def _check_token_expiry(self):
+ if int(time.time()) >= self._EXPIRY:
+ raise ExtractorError('Access token has expired', expected=True)
+
+ def _real_initialize(self):
+ if not self._TOKEN:
+ self.raise_login_required(self._LOGIN_HINT, method=None)
+ self._check_token_expiry()
+
+
+class VootIE(VootBaseIE):
+ _WORKING = False
+ _VALID_URL = r'''(?x)
+ (?:
+ voot:|
+ https?://(?:www\.)?voot\.com/?
+ (?:
+ movies?/[^/]+/|
+ (?:shows|kids)/(?:[^/]+/){4}
+ )
+ )
+ (?P<id>\d{3,})
+ '''
+ _TESTS = [{
+ 'url': 'https://www.voot.com/shows/ishq-ka-rang-safed/1/360558/is-this-the-end-of-kamini-/441353',
+ 'info_dict': {
+ 'id': '441353',
+ 'ext': 'mp4',
+ 'title': 'Is this the end of Kamini?',
+ 'description': 'md5:06291fbbbc4dcbe21235c40c262507c1',
+ 'timestamp': 1472103000,
+ 'upload_date': '20160825',
+ 'series': 'Ishq Ka Rang Safed',
+ 'season_number': 1,
+ 'episode': 'Is this the end of Kamini?',
+ 'episode_number': 340,
+ 'release_date': '20160825',
+ 'season': 'Season 1',
+ 'age_limit': 13,
+ 'duration': 1146.0,
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://www.voot.com/kids/characters/mighty-cat-masked-niyander-e-/400478/school-bag-disappears/440925',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.voot.com/movies/pandavas-5/424627',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.voot.com/movie/fight-club/621842',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ media_info = self._download_json(
+ 'https://psapi.voot.com/jio/voot/v1/voot-web/content/query/asset-details', video_id,
+ query={'ids': f'include:{video_id}', 'responseType': 'common'}, headers={'accesstoken': self._TOKEN})
+
+ try:
+ m3u8_url = self._download_json(
+ 'https://vootapi.media.jio.com/playback/v1/playbackrights', video_id,
+ 'Downloading playback JSON', data=b'{}', headers={
+ **self.geo_verification_headers(),
+ **self._API_HEADERS,
+ 'Content-Type': 'application/json;charset=utf-8',
+ 'platform': 'androidwebdesktop',
+ 'vootid': video_id,
+ 'voottoken': self._TOKEN,
+ })['m3u8']
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 400:
+ self._check_token_expiry()
+ raise
+
+ formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls')
+ self._remove_duplicate_formats(formats)
+
+ return {
+ 'id': video_id,
+ # '/_definst_/smil:vod/' m3u8 manifests claim to have 720p+ formats but max out at 480p
+ 'formats': traverse_obj(formats, (
+ lambda _, v: '/_definst_/smil:vod/' not in v['url'] or v['height'] <= 480)),
+ 'http_headers': self._API_HEADERS,
+ **traverse_obj(media_info, ('result', 0, {
+ 'title': ('fullTitle', {str}),
+ 'description': ('fullSynopsis', {str}),
+ 'series': ('showName', {str}),
+ 'season_number': ('season', {int_or_none}),
+ 'episode': ('fullTitle', {str}),
+ 'episode_number': ('episode', {int_or_none}),
+ 'timestamp': ('uploadTime', {int_or_none}),
+ 'release_date': ('telecastDate', {unified_strdate}),
+ 'age_limit': ('ageNemonic', {parse_age_limit}),
+ 'duration': ('duration', {float_or_none}),
+ })),
+ }
+
+
+class VootSeriesIE(VootBaseIE):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:www\.)?voot\.com/shows/[^/]+/(?P<id>\d{3,})'
+ _TESTS = [{
+ 'url': 'https://www.voot.com/shows/chakravartin-ashoka-samrat/100002',
+ 'playlist_mincount': 442,
+ 'info_dict': {
+ 'id': '100002',
+ },
+ }, {
+ 'url': 'https://www.voot.com/shows/ishq-ka-rang-safed/100003',
+ 'playlist_mincount': 341,
+ 'info_dict': {
+ 'id': '100003',
+ },
+ }]
+ _SHOW_API = 'https://psapi.voot.com/media/voot/v1/voot-web/content/generic/season-by-show?sort=season%3Aasc&id={}&responseType=common'
+ _SEASON_API = 'https://psapi.voot.com/media/voot/v1/voot-web/content/generic/series-wise-episode?sort=episode%3Aasc&id={}&responseType=common&page={:d}'
+
+ def _entries(self, show_id):
+ show_json = self._download_json(self._SHOW_API.format(show_id), video_id=show_id)
+ for season in show_json.get('result', []):
+ page_num = 1
+ season_id = try_get(season, lambda x: x['id'], compat_str)
+ season_json = self._download_json(self._SEASON_API.format(season_id, page_num),
+ video_id=season_id,
+ note='Downloading JSON metadata page %d' % page_num)
+ episodes_json = season_json.get('result', [])
+ while episodes_json:
+ page_num += 1
+ for episode in episodes_json:
+ video_id = episode.get('id')
+ yield self.url_result(
+ 'voot:%s' % video_id, ie=VootIE.ie_key(), video_id=video_id)
+ episodes_json = self._download_json(self._SEASON_API.format(season_id, page_num),
+ video_id=season_id,
+ note='Downloading JSON metadata page %d' % page_num)['result']
+
+ def _real_extract(self, url):
+ show_id = self._match_id(url)
+ return self.playlist_result(self._entries(show_id), playlist_id=show_id)
diff --git a/yt_dlp/extractor/voxmedia.py b/yt_dlp/extractor/voxmedia.py
new file mode 100644
index 0000000..f369087
--- /dev/null
+++ b/yt_dlp/extractor/voxmedia.py
@@ -0,0 +1,215 @@
+from .common import InfoExtractor
+from .once import OnceIE
+from ..compat import compat_urllib_parse_unquote
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ try_get,
+ unified_timestamp,
+)
+
+
+class VoxMediaVolumeIE(OnceIE):
+ _VALID_URL = r'https?://volume\.vox-cdn\.com/embed/(?P<id>[0-9a-f]{9})'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ setup = self._parse_json(self._search_regex(
+ r'setup\s*=\s*({.+});', webpage, 'setup'), video_id)
+ player_setup = setup.get('player_setup') or setup
+ video_data = player_setup.get('video') or {}
+ formatted_metadata = video_data.get('formatted_metadata') or {}
+ info = {
+ 'id': video_id,
+ 'title': player_setup.get('title') or video_data.get('title_short'),
+ 'description': video_data.get('description_long') or video_data.get('description_short'),
+ 'thumbnail': formatted_metadata.get('thumbnail') or video_data.get('brightcove_thumbnail'),
+ 'timestamp': unified_timestamp(formatted_metadata.get('video_publish_date')),
+ }
+ asset = try_get(setup, lambda x: x['embed_assets']['chorus'], dict) or {}
+
+ formats = []
+ hls_url = asset.get('hls_url')
+ if hls_url:
+ formats.extend(self._extract_m3u8_formats(
+ hls_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
+ mp4_url = asset.get('mp4_url')
+ if mp4_url:
+ tbr = self._search_regex(r'-(\d+)k\.', mp4_url, 'bitrate', default=None)
+ format_id = 'http'
+ if tbr:
+ format_id += '-' + tbr
+ formats.append({
+ 'format_id': format_id,
+ 'url': mp4_url,
+ 'tbr': int_or_none(tbr),
+ })
+ if formats:
+ info['formats'] = formats
+ info['duration'] = int_or_none(asset.get('duration'))
+ return info
+
+ for provider_video_type in ('youtube', 'brightcove'):
+ provider_video_id = video_data.get('%s_id' % provider_video_type)
+ if not provider_video_id:
+ continue
+ if provider_video_type == 'brightcove':
+ info['formats'] = self._extract_once_formats(provider_video_id)
+ else:
+ info.update({
+ '_type': 'url_transparent',
+ 'url': provider_video_id if provider_video_type == 'youtube' else '%s:%s' % (provider_video_type, provider_video_id),
+ 'ie_key': provider_video_type.capitalize(),
+ })
+ return info
+ raise ExtractorError('Unable to find provider video id')
+
+
+class VoxMediaIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?(?:(?:theverge|vox|sbnation|eater|polygon|curbed|racked|funnyordie)\.com|recode\.net)/(?:[^/]+/)*(?P<id>[^/?]+)'
+ _EMBED_REGEX = [r'<iframe[^>]+?src="(?P<url>https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"']
+ _TESTS = [{
+ # Volume embed, Youtube
+ 'url': 'http://www.theverge.com/2014/6/27/5849272/material-world-how-google-discovered-what-software-is-made-of',
+ 'info_dict': {
+ 'id': 'j4mLW6x17VM',
+ 'ext': 'mp4',
+ 'title': 'Material world: how Google discovered what software is made of',
+ 'description': 'md5:dfc17e7715e3b542d66e33a109861382',
+ 'upload_date': '20190710',
+ 'uploader_id': 'TheVerge',
+ 'uploader': 'The Verge',
+ },
+ 'add_ie': ['Youtube'],
+ }, {
+ # Volume embed, Youtube
+ 'url': 'http://www.theverge.com/2014/10/21/7025853/google-nexus-6-hands-on-photos-video-android-phablet',
+ 'md5': 'fd19aa0cf3a0eea515d4fd5c8c0e9d68',
+ 'info_dict': {
+ 'id': 'Gy8Md3Eky38',
+ 'ext': 'mp4',
+ 'title': 'The Nexus 6: hands-on with Google\'s phablet',
+ 'description': 'md5:d9f0216e5fb932dd2033d6db37ac3f1d',
+ 'uploader_id': 'TheVerge',
+ 'upload_date': '20141021',
+ 'uploader': 'The Verge',
+ 'timestamp': 1413907200,
+ },
+ 'add_ie': ['Youtube'],
+ 'skip': 'similar to the previous test',
+ }, {
+ # Volume embed, Youtube
+ 'url': 'http://www.vox.com/2016/3/31/11336640/mississippi-lgbt-religious-freedom-bill',
+ 'info_dict': {
+ 'id': '22986359b',
+ 'ext': 'mp4',
+ 'title': "Mississippi's laws are so bad that its anti-LGBTQ law isn't needed to allow discrimination",
+ 'description': 'md5:fc1317922057de31cd74bce91eb1c66c',
+ 'upload_date': '20150915',
+ 'timestamp': 1442332800,
+ 'duration': 285,
+ },
+ 'add_ie': ['Youtube'],
+ 'skip': 'similar to the previous test',
+ }, {
+ # youtube embed
+ 'url': 'http://www.vox.com/2016/3/24/11291692/robot-dance',
+ 'md5': '83b3080489fb103941e549352d3e0977',
+ 'info_dict': {
+ 'id': 'FcNHTJU1ufM',
+ 'ext': 'mp4',
+ 'title': 'How "the robot" became the greatest novelty dance of all time',
+ 'description': 'md5:b081c0d588b8b2085870cda55e6da176',
+ 'upload_date': '20160324',
+ 'uploader_id': 'voxdotcom',
+ 'uploader': 'Vox',
+ },
+ 'add_ie': ['Youtube'],
+ 'skip': 'Page no longer contain videos',
+ }, {
+ # SBN.VideoLinkset.entryGroup multiple ooyala embeds
+ 'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
+ 'info_dict': {
+ 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
+ 'title': '25 lies you will tell yourself on National Signing Day',
+ 'description': 'It\'s the most self-delusional time of the year, and everyone\'s gonna tell the same lies together!',
+ },
+ 'playlist': [{
+ 'md5': '721fededf2ab74ae4176c8c8cbfe092e',
+ 'info_dict': {
+ 'id': 'p3cThlMjE61VDi_SD9JlIteSNPWVDBB9',
+ 'ext': 'mp4',
+ 'title': 'Buddy Hield vs Steph Curry (and the world)',
+ 'description': 'Let’s dissect only the most important Final Four storylines.',
+ },
+ }, {
+ 'md5': 'bf0c5cc115636af028be1bab79217ea9',
+ 'info_dict': {
+ 'id': 'BmbmVjMjE6esPHxdALGubTrouQ0jYLHj',
+ 'ext': 'mp4',
+ 'title': 'Chasing Cinderella 2016: Syracuse basketball',
+ 'description': 'md5:e02d56b026d51aa32c010676765a690d',
+ },
+ }],
+ 'skip': 'Page no longer contain videos',
+ }, {
+ # volume embed, Brightcove Once
+ 'url': 'https://www.recode.net/2014/6/17/11628066/post-post-pc-ceo-the-full-code-conference-video-of-microsofts-satya',
+ 'md5': '2dbc77b8b0bff1894c2fce16eded637d',
+ 'info_dict': {
+ 'id': '1231c973d',
+ 'ext': 'mp4',
+ 'title': 'Post-Post-PC CEO: The Full Code Conference Video of Microsoft\'s Satya Nadella',
+ 'description': 'The longtime veteran was chosen earlier this year as the software giant\'s third leader in its history.',
+ 'timestamp': 1402938000,
+ 'upload_date': '20140616',
+ 'duration': 4114,
+ },
+ 'add_ie': ['VoxMediaVolume'],
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = compat_urllib_parse_unquote(self._download_webpage(url, display_id))
+
+ def create_entry(provider_video_id, provider_video_type, title=None, description=None):
+ video_url = {
+ 'youtube': '%s',
+ 'volume': 'http://volume.vox-cdn.com/embed/%s',
+ }[provider_video_type] % provider_video_id
+ return {
+ '_type': 'url_transparent',
+ 'url': video_url,
+ 'title': title or self._og_search_title(webpage),
+ 'description': description or self._og_search_description(webpage),
+ }
+
+ entries = []
+ entries_data = self._search_regex([
+ r'Chorus\.VideoContext\.addVideo\((\[{.+}\])\);',
+ r'var\s+entry\s*=\s*({.+});',
+ r'SBN\.VideoLinkset\.entryGroup\(\s*(\[.+\])',
+ ], webpage, 'video data', default=None)
+ if entries_data:
+ entries_data = self._parse_json(entries_data, display_id)
+ if isinstance(entries_data, dict):
+ entries_data = [entries_data]
+ for video_data in entries_data:
+ provider_video_id = video_data.get('provider_video_id')
+ provider_video_type = video_data.get('provider_video_type')
+ if provider_video_id and provider_video_type:
+ entries.append(create_entry(
+ provider_video_id, provider_video_type,
+ video_data.get('title'), video_data.get('description')))
+
+ volume_uuid = self._search_regex(
+ r'data-volume-uuid="([^"]+)"', webpage, 'volume uuid', default=None)
+ if volume_uuid:
+ entries.append(create_entry(volume_uuid, 'volume'))
+
+ if len(entries) == 1:
+ return entries[0]
+ else:
+ return self.playlist_result(entries, display_id, self._og_search_title(webpage), self._og_search_description(webpage))
diff --git a/yt_dlp/extractor/vrt.py b/yt_dlp/extractor/vrt.py
new file mode 100644
index 0000000..497233d
--- /dev/null
+++ b/yt_dlp/extractor/vrt.py
@@ -0,0 +1,427 @@
+import functools
+import json
+import time
+import urllib.parse
+
+from .gigya import GigyaBaseIE
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ ExtractorError,
+ clean_html,
+ extract_attributes,
+ float_or_none,
+ get_element_by_class,
+ get_element_html_by_class,
+ int_or_none,
+ join_nonempty,
+ jwt_encode_hs256,
+ make_archive_id,
+ parse_age_limit,
+ parse_iso8601,
+ str_or_none,
+ strip_or_none,
+ traverse_obj,
+ url_or_none,
+ urlencode_postdata,
+)
+
+
+class VRTBaseIE(GigyaBaseIE):
+ _GEO_BYPASS = False
+ _PLAYER_INFO = {
+ 'platform': 'desktop',
+ 'app': {
+ 'type': 'browser',
+ 'name': 'Chrome',
+ },
+ 'device': 'undefined (undefined)',
+ 'os': {
+ 'name': 'Windows',
+ 'version': 'x86_64'
+ },
+ 'player': {
+ 'name': 'VRT web player',
+ 'version': '2.7.4-prod-2023-04-19T06:05:45'
+ }
+ }
+ # From https://player.vrt.be/vrtnws/js/main.js & https://player.vrt.be/ketnet/js/main.8cdb11341bcb79e4cd44.js
+ _JWT_KEY_ID = '0-0Fp51UZykfaiCJrfTE3+oMI8zvDteYfPtR+2n1R+z8w='
+ _JWT_SIGNING_KEY = 'b5f500d55cb44715107249ccd8a5c0136cfb2788dbb71b90a4f142423bacaf38' # -dev
+ # player-stag.vrt.be key: d23987504521ae6fbf2716caca6700a24bb1579477b43c84e146b279de5ca595
+ # player.vrt.be key: 2a9251d782700769fb856da5725daf38661874ca6f80ae7dc2b05ec1a81a24ae
+
+ def _extract_formats_and_subtitles(self, data, video_id):
+ if traverse_obj(data, 'drm'):
+ self.report_drm(video_id)
+
+ formats, subtitles = [], {}
+ for target in traverse_obj(data, ('targetUrls', lambda _, v: url_or_none(v['url']) and v['type'])):
+ format_type = target['type'].upper()
+ format_url = target['url']
+ if format_type in ('HLS', 'HLS_AES'):
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ format_url, video_id, 'mp4', m3u8_id=format_type, fatal=False)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+ elif format_type == 'HDS':
+ formats.extend(self._extract_f4m_formats(
+ format_url, video_id, f4m_id=format_type, fatal=False))
+ elif format_type == 'MPEG_DASH':
+ fmts, subs = self._extract_mpd_formats_and_subtitles(
+ format_url, video_id, mpd_id=format_type, fatal=False)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+ elif format_type == 'HSS':
+ fmts, subs = self._extract_ism_formats_and_subtitles(
+ format_url, video_id, ism_id='mss', fatal=False)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+ else:
+ formats.append({
+ 'format_id': format_type,
+ 'url': format_url,
+ })
+
+ for sub in traverse_obj(data, ('subtitleUrls', lambda _, v: v['url'] and v['type'] == 'CLOSED')):
+ subtitles.setdefault('nl', []).append({'url': sub['url']})
+
+ return formats, subtitles
+
+ def _call_api(self, video_id, client='null', id_token=None, version='v2'):
+ player_info = {'exp': (round(time.time(), 3) + 900), **self._PLAYER_INFO}
+ player_token = self._download_json(
+ 'https://media-services-public.vrt.be/vualto-video-aggregator-web/rest/external/v2/tokens',
+ video_id, 'Downloading player token', headers={
+ **self.geo_verification_headers(),
+ 'Content-Type': 'application/json',
+ }, data=json.dumps({
+ 'identityToken': id_token or {},
+ 'playerInfo': jwt_encode_hs256(player_info, self._JWT_SIGNING_KEY, headers={
+ 'kid': self._JWT_KEY_ID
+ }).decode()
+ }, separators=(',', ':')).encode())['vrtPlayerToken']
+
+ return self._download_json(
+ f'https://media-services-public.vrt.be/media-aggregator/{version}/media-items/{video_id}',
+ video_id, 'Downloading API JSON', query={
+ 'vrtPlayerToken': player_token,
+ 'client': client,
+ }, expected_status=400)
+
+
+class VRTIE(VRTBaseIE):
+ IE_DESC = 'VRT NWS, Flanders News, Flandern Info and Sporza'
+ _VALID_URL = r'https?://(?:www\.)?(?P<site>vrt\.be/vrtnws|sporza\.be)/[a-z]{2}/\d{4}/\d{2}/\d{2}/(?P<id>[^/?&#]+)'
+ _TESTS = [{
+ 'url': 'https://www.vrt.be/vrtnws/nl/2019/05/15/beelden-van-binnenkant-notre-dame-een-maand-na-de-brand/',
+ 'info_dict': {
+ 'id': 'pbs-pub-7855fc7b-1448-49bc-b073-316cb60caa71$vid-2ca50305-c38a-4762-9890-65cbd098b7bd',
+ 'ext': 'mp4',
+ 'title': 'Beelden van binnenkant Notre-Dame, één maand na de brand',
+ 'description': 'md5:6fd85f999b2d1841aa5568f4bf02c3ff',
+ 'duration': 31.2,
+ 'thumbnail': 'https://images.vrt.be/orig/2019/05/15/2d914d61-7710-11e9-abcc-02b7b76bf47f.jpg',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://sporza.be/nl/2019/05/15/de-belgian-cats-zijn-klaar-voor-het-ek/',
+ 'info_dict': {
+ 'id': 'pbs-pub-f2c86a46-8138-413a-a4b9-a0015a16ce2c$vid-1f112b31-e58e-4379-908d-aca6d80f8818',
+ 'ext': 'mp4',
+ 'title': 'De Belgian Cats zijn klaar voor het EK',
+ 'description': 'Video: De Belgian Cats zijn klaar voor het EK mét Ann Wauters | basketbal, sport in het journaal',
+ 'duration': 115.17,
+ 'thumbnail': 'https://images.vrt.be/orig/2019/05/15/11c0dba3-770e-11e9-abcc-02b7b76bf47f.jpg',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }]
+ _CLIENT_MAP = {
+ 'vrt.be/vrtnws': 'vrtnieuws',
+ 'sporza.be': 'sporza',
+ }
+
+ def _real_extract(self, url):
+ site, display_id = self._match_valid_url(url).groups()
+ webpage = self._download_webpage(url, display_id)
+ attrs = extract_attributes(get_element_html_by_class('vrtvideo', webpage) or '')
+
+ asset_id = attrs.get('data-video-id') or attrs['data-videoid']
+ publication_id = traverse_obj(attrs, 'data-publication-id', 'data-publicationid')
+ if publication_id:
+ asset_id = f'{publication_id}${asset_id}'
+ client = traverse_obj(attrs, 'data-client-code', 'data-client') or self._CLIENT_MAP[site]
+
+ data = self._call_api(asset_id, client)
+ formats, subtitles = self._extract_formats_and_subtitles(data, asset_id)
+
+ description = self._html_search_meta(
+ ['og:description', 'twitter:description', 'description'], webpage)
+ if description == '…':
+ description = None
+
+ return {
+ 'id': asset_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'description': description,
+ 'thumbnail': url_or_none(attrs.get('data-posterimage')),
+ 'duration': float_or_none(attrs.get('data-duration'), 1000),
+ '_old_archive_ids': [make_archive_id('Canvas', asset_id)],
+ **traverse_obj(data, {
+ 'title': ('title', {str}),
+ 'description': ('shortDescription', {str}),
+ 'duration': ('duration', {functools.partial(float_or_none, scale=1000)}),
+ 'thumbnail': ('posterImageUrl', {url_or_none}),
+ }),
+ }
+
+
+class VrtNUIE(VRTBaseIE):
+ IE_DESC = 'VRT MAX'
+ _VALID_URL = r'https?://(?:www\.)?vrt\.be/vrtnu/a-z/(?:[^/]+/){2}(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ # CONTENT_IS_AGE_RESTRICTED
+ 'url': 'https://www.vrt.be/vrtnu/a-z/de-ideale-wereld/2023-vj/de-ideale-wereld-d20230116/',
+ 'info_dict': {
+ 'id': 'pbs-pub-855b00a8-6ce2-4032-ac4f-1fcf3ae78524$vid-d2243aa1-ec46-4e34-a55b-92568459906f',
+ 'ext': 'mp4',
+ 'title': 'Tom Waes',
+ 'description': 'Satirisch actualiteitenmagazine met Ella Leyers. Tom Waes is te gast.',
+ 'timestamp': 1673905125,
+ 'release_timestamp': 1673905125,
+ 'series': 'De ideale wereld',
+ 'season_id': '1672830988794',
+ 'episode': 'Aflevering 1',
+ 'episode_number': 1,
+ 'episode_id': '1672830988861',
+ 'display_id': 'de-ideale-wereld-d20230116',
+ 'channel': 'VRT',
+ 'duration': 1939.0,
+ 'thumbnail': 'https://images.vrt.be/orig/2023/01/10/1bb39cb3-9115-11ed-b07d-02b7b76bf47f.jpg',
+ 'release_date': '20230116',
+ 'upload_date': '20230116',
+ 'age_limit': 12,
+ },
+ }, {
+ 'url': 'https://www.vrt.be/vrtnu/a-z/buurman--wat-doet-u-nu-/6/buurman--wat-doet-u-nu--s6-trailer/',
+ 'info_dict': {
+ 'id': 'pbs-pub-ad4050eb-d9e5-48c2-9ec8-b6c355032361$vid-0465537a-34a8-4617-8352-4d8d983b4eee',
+ 'ext': 'mp4',
+ 'title': 'Trailer seizoen 6 \'Buurman, wat doet u nu?\'',
+ 'description': 'md5:197424726c61384b4e5c519f16c0cf02',
+ 'timestamp': 1652940000,
+ 'release_timestamp': 1652940000,
+ 'series': 'Buurman, wat doet u nu?',
+ 'season': 'Seizoen 6',
+ 'season_number': 6,
+ 'season_id': '1652344200907',
+ 'episode': 'Aflevering 0',
+ 'episode_number': 0,
+ 'episode_id': '1652951873524',
+ 'display_id': 'buurman--wat-doet-u-nu--s6-trailer',
+ 'channel': 'VRT',
+ 'duration': 33.13,
+ 'thumbnail': 'https://images.vrt.be/orig/2022/05/23/3c234d21-da83-11ec-b07d-02b7b76bf47f.jpg',
+ 'release_date': '20220519',
+ 'upload_date': '20220519',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }]
+ _NETRC_MACHINE = 'vrtnu'
+ _authenticated = False
+
+ def _perform_login(self, username, password):
+ auth_info = self._gigya_login({
+ 'APIKey': '3_0Z2HujMtiWq_pkAjgnS2Md2E11a1AwZjYiBETtwNE-EoEHDINgtnvcAOpNgmrVGy',
+ 'targetEnv': 'jssdk',
+ 'loginID': username,
+ 'password': password,
+ 'authMode': 'cookie',
+ })
+
+ if auth_info.get('errorDetails'):
+ raise ExtractorError(f'Unable to login. VrtNU said: {auth_info["errorDetails"]}', expected=True)
+
+ # Sometimes authentication fails for no good reason, retry
+ for retry in self.RetryManager():
+ if retry.attempt > 1:
+ self._sleep(1, None)
+ try:
+ self._request_webpage(
+ 'https://token.vrt.be/vrtnuinitlogin', None, note='Requesting XSRF Token',
+ errnote='Could not get XSRF Token', query={
+ 'provider': 'site',
+ 'destination': 'https://www.vrt.be/vrtnu/',
+ })
+ self._request_webpage(
+ 'https://login.vrt.be/perform_login', None,
+ note='Performing login', errnote='Login failed',
+ query={'client_id': 'vrtnu-site'}, data=urlencode_postdata({
+ 'UID': auth_info['UID'],
+ 'UIDSignature': auth_info['UIDSignature'],
+ 'signatureTimestamp': auth_info['signatureTimestamp'],
+ '_csrf': self._get_cookies('https://login.vrt.be').get('OIDCXSRF').value,
+ }))
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 401:
+ retry.error = e
+ continue
+ raise
+
+ self._authenticated = True
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ parsed_url = urllib.parse.urlparse(url)
+ details = self._download_json(
+ f'{parsed_url.scheme}://{parsed_url.netloc}{parsed_url.path.rstrip("/")}.model.json',
+ display_id, 'Downloading asset JSON', 'Unable to download asset JSON')['details']
+
+ watch_info = traverse_obj(details, (
+ 'actions', lambda _, v: v['type'] == 'watch-episode', {dict}), get_all=False) or {}
+ video_id = join_nonempty(
+ 'episodePublicationId', 'episodeVideoId', delim='$', from_dict=watch_info)
+ if '$' not in video_id:
+ raise ExtractorError('Unable to extract video ID')
+
+ vrtnutoken = self._download_json(
+ 'https://token.vrt.be/refreshtoken', video_id, note='Retrieving vrtnutoken',
+ errnote='Token refresh failed')['vrtnutoken'] if self._authenticated else None
+
+ video_info = self._call_api(video_id, 'vrtnu-web@PROD', vrtnutoken)
+
+ if 'title' not in video_info:
+ code = video_info.get('code')
+ if code in ('AUTHENTICATION_REQUIRED', 'CONTENT_IS_AGE_RESTRICTED'):
+ self.raise_login_required(code, method='password')
+ elif code in ('INVALID_LOCATION', 'CONTENT_AVAILABLE_ONLY_IN_BE'):
+ self.raise_geo_restricted(countries=['BE'])
+ elif code == 'CONTENT_AVAILABLE_ONLY_FOR_BE_RESIDENTS_AND_EXPATS':
+ if not self._authenticated:
+ self.raise_login_required(code, method='password')
+ self.raise_geo_restricted(countries=['BE'])
+ raise ExtractorError(code, expected=True)
+
+ formats, subtitles = self._extract_formats_and_subtitles(video_info, video_id)
+
+ return {
+ **traverse_obj(details, {
+ 'title': 'title',
+ 'description': ('description', {clean_html}),
+ 'timestamp': ('data', 'episode', 'onTime', 'raw', {parse_iso8601}),
+ 'release_timestamp': ('data', 'episode', 'onTime', 'raw', {parse_iso8601}),
+ 'series': ('data', 'program', 'title'),
+ 'season': ('data', 'season', 'title', 'value'),
+ 'season_number': ('data', 'season', 'title', 'raw', {int_or_none}),
+ 'season_id': ('data', 'season', 'id', {str_or_none}),
+ 'episode': ('data', 'episode', 'number', 'value', {str_or_none}),
+ 'episode_number': ('data', 'episode', 'number', 'raw', {int_or_none}),
+ 'episode_id': ('data', 'episode', 'id', {str_or_none}),
+ 'age_limit': ('data', 'episode', 'age', 'raw', {parse_age_limit}),
+ }),
+ 'id': video_id,
+ 'display_id': display_id,
+ 'channel': 'VRT',
+ 'formats': formats,
+ 'duration': float_or_none(video_info.get('duration'), 1000),
+ 'thumbnail': url_or_none(video_info.get('posterImageUrl')),
+ 'subtitles': subtitles,
+ '_old_archive_ids': [make_archive_id('Canvas', video_id)],
+ }
+
+
+class KetnetIE(VRTBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?ketnet\.be/(?P<id>(?:[^/]+/)*[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.ketnet.be/kijken/m/meisjes/6/meisjes-s6a5',
+ 'info_dict': {
+ 'id': 'pbs-pub-39f8351c-a0a0-43e6-8394-205d597d6162$vid-5e306921-a9aa-4fa9-9f39-5b82c8f1028e',
+ 'ext': 'mp4',
+ 'title': 'Meisjes',
+ 'episode': 'Reeks 6: Week 5',
+ 'season': 'Reeks 6',
+ 'series': 'Meisjes',
+ 'timestamp': 1685251800,
+ 'upload_date': '20230528',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ video = self._download_json(
+ 'https://senior-bff.ketnet.be/graphql', display_id, query={
+ 'query': '''{
+ video(id: "content/ketnet/nl/%s.model.json") {
+ description
+ episodeNr
+ imageUrl
+ mediaReference
+ programTitle
+ publicationDate
+ seasonTitle
+ subtitleVideodetail
+ titleVideodetail
+ }
+}''' % display_id,
+ })['data']['video']
+
+ video_id = urllib.parse.unquote(video['mediaReference'])
+ data = self._call_api(video_id, 'ketnet@PROD', version='v1')
+ formats, subtitles = self._extract_formats_and_subtitles(data, video_id)
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ '_old_archive_ids': [make_archive_id('Canvas', video_id)],
+ **traverse_obj(video, {
+ 'title': ('titleVideodetail', {str}),
+ 'description': ('description', {str}),
+ 'thumbnail': ('thumbnail', {url_or_none}),
+ 'timestamp': ('publicationDate', {parse_iso8601}),
+ 'series': ('programTitle', {str}),
+ 'season': ('seasonTitle', {str}),
+ 'episode': ('subtitleVideodetail', {str}),
+ 'episode_number': ('episodeNr', {int_or_none}),
+ }),
+ }
+
+
+class DagelijkseKostIE(VRTBaseIE):
+ IE_DESC = 'dagelijksekost.een.be'
+ _VALID_URL = r'https?://dagelijksekost\.een\.be/gerechten/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://dagelijksekost.een.be/gerechten/hachis-parmentier-met-witloof',
+ 'info_dict': {
+ 'id': 'md-ast-27a4d1ff-7d7b-425e-b84f-a4d227f592fa',
+ 'ext': 'mp4',
+ 'title': 'Hachis parmentier met witloof',
+ 'description': 'md5:9960478392d87f63567b5b117688cdc5',
+ 'display_id': 'hachis-parmentier-met-witloof',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ video_id = self._html_search_regex(
+ r'data-url=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id', group='id')
+
+ data = self._call_api(video_id, 'dako@prod', version='v1')
+ formats, subtitles = self._extract_formats_and_subtitles(data, video_id)
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'display_id': display_id,
+ 'title': strip_or_none(get_element_by_class(
+ 'dish-metadata__title', webpage) or self._html_search_meta('twitter:title', webpage)),
+ 'description': clean_html(get_element_by_class(
+ 'dish-description', webpage)) or self._html_search_meta(
+ ['description', 'twitter:description', 'og:description'], webpage),
+ '_old_archive_ids': [make_archive_id('Canvas', video_id)],
+ }
diff --git a/yt_dlp/extractor/vtm.py b/yt_dlp/extractor/vtm.py
new file mode 100644
index 0000000..6db49c5
--- /dev/null
+++ b/yt_dlp/extractor/vtm.py
@@ -0,0 +1,60 @@
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+ try_get,
+)
+
+
+class VTMIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:www\.)?vtm\.be/([^/?&#]+)~v(?P<id>[0-9a-f]{8}(?:-[0-9a-f]{4}){3}-[0-9a-f]{12})'
+ _TEST = {
+ 'url': 'https://vtm.be/gast-vernielt-genkse-hotelkamer~ve7534523-279f-4b4d-a5c9-a33ffdbe23e1',
+ 'md5': '37dca85fbc3a33f2de28ceb834b071f8',
+ 'info_dict': {
+ 'id': '192445',
+ 'ext': 'mp4',
+ 'title': 'Gast vernielt Genkse hotelkamer',
+ 'timestamp': 1611060180,
+ 'upload_date': '20210119',
+ 'duration': 74,
+ # TODO: fix url _type result processing
+ # 'series': 'Op Interventie',
+ }
+ }
+
+ def _real_extract(self, url):
+ uuid = self._match_id(url)
+ video = self._download_json(
+ 'https://omc4vm23offuhaxx6hekxtzspi.appsync-api.eu-west-1.amazonaws.com/graphql',
+ uuid, query={
+ 'query': '''{
+ getComponent(type: Video, uuid: "%s") {
+ ... on Video {
+ description
+ duration
+ myChannelsVideo
+ program {
+ title
+ }
+ publishedAt
+ title
+ }
+ }
+}''' % uuid,
+ }, headers={
+ 'x-api-key': 'da2-lz2cab4tfnah3mve6wiye4n77e',
+ })['data']['getComponent']
+
+ return {
+ '_type': 'url',
+ 'id': uuid,
+ 'title': video.get('title'),
+ 'url': 'http://mychannels.video/embed/%d' % video['myChannelsVideo'],
+ 'description': video.get('description'),
+ 'timestamp': parse_iso8601(video.get('publishedAt')),
+ 'duration': int_or_none(video.get('duration')),
+ 'series': try_get(video, lambda x: x['program']['title']),
+ 'ie_key': 'Medialaan',
+ }
diff --git a/yt_dlp/extractor/vuclip.py b/yt_dlp/extractor/vuclip.py
new file mode 100644
index 0000000..0e56298
--- /dev/null
+++ b/yt_dlp/extractor/vuclip.py
@@ -0,0 +1,68 @@
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_urllib_parse_urlparse,
+)
+from ..utils import (
+ ExtractorError,
+ parse_duration,
+ remove_end,
+)
+
+
+class VuClipIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:m\.)?vuclip\.com/w\?.*?cid=(?P<id>[0-9]+)'
+
+ _TEST = {
+ 'url': 'http://m.vuclip.com/w?cid=1129900602&bu=8589892792&frm=w&z=34801&op=0&oc=843169247&section=recommend',
+ 'info_dict': {
+ 'id': '1129900602',
+ 'ext': '3gp',
+ 'title': 'Top 10 TV Convicts',
+ 'duration': 733,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ ad_m = re.search(
+ r'''value="No.*?" onClick="location.href='([^"']+)'"''', webpage)
+ if ad_m:
+ urlr = compat_urllib_parse_urlparse(url)
+ adfree_url = urlr.scheme + '://' + urlr.netloc + ad_m.group(1)
+ webpage = self._download_webpage(
+ adfree_url, video_id, note='Download post-ad page')
+
+ error_msg = self._html_search_regex(
+ r'<p class="message">(.*?)</p>', webpage, 'error message',
+ default=None)
+ if error_msg:
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, error_msg), expected=True)
+
+ # These clowns alternate between two page types
+ video_url = self._search_regex(
+ r'<a[^>]+href="([^"]+)"[^>]*><img[^>]+src="[^"]*/play\.gif',
+ webpage, 'video URL', default=None)
+ if video_url:
+ formats = [{
+ 'url': video_url,
+ }]
+ else:
+ formats = self._parse_html5_media_entries(url, webpage, video_id)[0]['formats']
+
+ title = remove_end(self._html_search_regex(
+ r'<title>(.*?)-\s*Vuclip</title>', webpage, 'title').strip(), ' - Video')
+
+ duration = parse_duration(self._html_search_regex(
+ r'[(>]([0-9]+:[0-9]+)(?:<span|\))', webpage, 'duration', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': title,
+ 'duration': duration,
+ }
diff --git a/yt_dlp/extractor/vvvvid.py b/yt_dlp/extractor/vvvvid.py
new file mode 100644
index 0000000..b961123
--- /dev/null
+++ b/yt_dlp/extractor/vvvvid.py
@@ -0,0 +1,336 @@
+import functools
+import re
+
+from .common import InfoExtractor
+from .youtube import YoutubeIE
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ str_or_none,
+)
+
+
+class VVVVIDIE(InfoExtractor):
+ _VALID_URL_BASE = r'https?://(?:www\.)?vvvvid\.it/(?:#!)?(?:show|anime|film|series)/'
+ _VALID_URL = r'%s(?P<show_id>\d+)/[^/]+/(?P<season_id>\d+)/(?P<id>[0-9]+)' % _VALID_URL_BASE
+ _TESTS = [{
+ # video_type == 'video/vvvvid'
+ 'url': 'https://www.vvvvid.it/show/498/the-power-of-computing/518/505692/playstation-vr-cambiera-il-nostro-modo-di-giocare',
+ 'info_dict': {
+ 'id': '505692',
+ 'ext': 'mp4',
+ 'title': 'Playstation VR cambierà il nostro modo di giocare',
+ 'duration': 93,
+ 'series': 'The Power of Computing',
+ 'season_id': '518',
+ 'episode': 'Playstation VR cambierà il nostro modo di giocare',
+ 'episode_id': '4747',
+ 'view_count': int,
+ 'like_count': int,
+ 'repost_count': int,
+ 'thumbnail': 'https://static.vvvvid.it/img/zoomin/28CA2409-E663-34F0-2B02E72356556EA3_500k.jpg',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # video_type == 'video/rcs'
+ 'url': 'https://www.vvvvid.it/#!show/376/death-note-live-action/377/482493/episodio-01',
+ 'info_dict': {
+ 'id': '482493',
+ 'ext': 'mp4',
+ 'title': 'Episodio 01',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'Every video/rcs is not working even in real website',
+ }, {
+ # video_type == 'video/youtube'
+ 'url': 'https://www.vvvvid.it/show/404/one-punch-man/406/486683/trailer',
+ 'md5': '33e0edfba720ad73a8782157fdebc648',
+ 'info_dict': {
+ 'id': 'RzmFKUDOUgw',
+ 'ext': 'mp4',
+ 'title': 'Trailer',
+ 'upload_date': '20150906',
+ 'description': 'md5:a5e802558d35247fee285875328c0b80',
+ 'uploader_id': '@EMOTIONLabelChannel',
+ 'uploader': 'EMOTION Label Channel',
+ 'episode_id': '3115',
+ 'view_count': int,
+ 'like_count': int,
+ 'repost_count': int,
+ 'availability': str,
+ 'categories': list,
+ 'age_limit': 0,
+ 'channel': 'EMOTION Label Channel',
+ 'channel_follower_count': int,
+ 'channel_id': 'UCQ5URCSs1f5Cz9rh-cDGxNQ',
+ 'channel_url': 'https://www.youtube.com/channel/UCQ5URCSs1f5Cz9rh-cDGxNQ',
+ 'comment_count': int,
+ 'duration': 133,
+ 'episode': 'Trailer',
+ 'heatmap': list,
+ 'live_status': 'not_live',
+ 'playable_in_embed': True,
+ 'season_id': '406',
+ 'series': 'One-Punch Man',
+ 'tags': list,
+ 'uploader_url': 'https://www.youtube.com/@EMOTIONLabelChannel',
+ 'thumbnail': 'https://i.ytimg.com/vi/RzmFKUDOUgw/maxresdefault.jpg',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # video_type == 'video/dash'
+ 'url': 'https://www.vvvvid.it/show/844/le-bizzarre-avventure-di-jojo-vento-aureo/938/527551/golden-wind',
+ 'info_dict': {
+ 'id': '527551',
+ 'ext': 'mp4',
+ 'title': 'Golden Wind',
+ 'duration': 1430,
+ 'series': 'Le bizzarre avventure di Jojo - Vento Aureo',
+ 'season_id': '938',
+ 'episode': 'Golden Wind',
+ 'episode_number': 1,
+ 'episode_id': '9089',
+ 'view_count': int,
+ 'like_count': int,
+ 'repost_count': int,
+ 'thumbnail': 'https://static.vvvvid.it/img/thumbs/Dynit/Jojo/Jojo_S05Ep01-t.jpg',
+ 'season': 'Season 5',
+ 'season_number': 5,
+ },
+ 'params': {
+ 'skip_download': True,
+ 'format': 'mp4',
+ },
+ }, {
+ 'url': 'https://www.vvvvid.it/show/434/perche-dovrei-guardarlo-di-dario-moccia/437/489048',
+ 'only_matching': True
+ }]
+ _conn_id = None
+
+ @functools.cached_property
+ def _headers(self):
+ return {
+ **self.geo_verification_headers(),
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.50 Safari/537.37',
+ }
+
+ def _real_initialize(self):
+ self._conn_id = self._download_json(
+ 'https://www.vvvvid.it/user/login',
+ None, headers=self._headers)['data']['conn_id']
+
+ def _download_info(self, show_id, path, video_id, fatal=True, query=None):
+ q = {
+ 'conn_id': self._conn_id,
+ }
+ if query:
+ q.update(query)
+ response = self._download_json(
+ 'https://www.vvvvid.it/vvvvid/ondemand/%s/%s' % (show_id, path),
+ video_id, headers=self._headers, query=q, fatal=fatal)
+ if not (response or fatal):
+ return
+ if response.get('result') == 'error':
+ raise ExtractorError('%s said: %s' % (
+ self.IE_NAME, response['message']), expected=True)
+ return response['data']
+
+ def _extract_common_video_info(self, video_data):
+ return {
+ 'thumbnail': video_data.get('thumbnail'),
+ 'episode_id': str_or_none(video_data.get('id')),
+ }
+
+ def _real_extract(self, url):
+ show_id, season_id, video_id = self._match_valid_url(url).groups()
+
+ response = self._download_info(
+ show_id, 'season/%s' % season_id,
+ video_id, query={'video_id': video_id})
+
+ vid = int(video_id)
+ video_data = list(filter(
+ lambda episode: episode.get('video_id') == vid, response))[0]
+ title = video_data['title']
+ formats = []
+
+ # vvvvid embed_info decryption algorithm is reverse engineered from function $ds(h) at vvvvid.js
+ def ds(h):
+ g = "MNOPIJKL89+/4567UVWXQRSTEFGHABCDcdefYZabstuvopqr0123wxyzklmnghij"
+
+ def f(m):
+ l = []
+ o = 0
+ b = False
+ m_len = len(m)
+ while ((not b) and o < m_len):
+ n = m[o] << 2
+ o += 1
+ k = -1
+ j = -1
+ if o < m_len:
+ n += m[o] >> 4
+ o += 1
+ if o < m_len:
+ k = (m[o - 1] << 4) & 255
+ k += m[o] >> 2
+ o += 1
+ if o < m_len:
+ j = (m[o - 1] << 6) & 255
+ j += m[o]
+ o += 1
+ else:
+ b = True
+ else:
+ b = True
+ else:
+ b = True
+ l.append(n)
+ if k != -1:
+ l.append(k)
+ if j != -1:
+ l.append(j)
+ return l
+
+ c = []
+ for e in h:
+ c.append(g.index(e))
+
+ c_len = len(c)
+ for e in range(c_len * 2 - 1, -1, -1):
+ a = c[e % c_len] ^ c[(e + 1) % c_len]
+ c[e % c_len] = a
+
+ c = f(c)
+ d = ''
+ for e in c:
+ d += chr(e)
+
+ return d
+
+ info = {}
+
+ def metadata_from_url(r_url):
+ if not info and r_url:
+ mobj = re.search(r'_(?:S(\d+))?Ep(\d+)', r_url)
+ if mobj:
+ info['episode_number'] = int(mobj.group(2))
+ season_number = mobj.group(1)
+ if season_number:
+ info['season_number'] = int(season_number)
+
+ video_type = video_data.get('video_type')
+ is_youtube = False
+ for quality in ('', '_sd'):
+ embed_code = video_data.get('embed_info' + quality)
+ if not embed_code:
+ continue
+ embed_code = ds(embed_code)
+ if video_type == 'video/kenc':
+ embed_code = re.sub(r'https?(://[^/]+)/z/', r'https\1/i/', embed_code).replace('/manifest.f4m', '/master.m3u8')
+ kenc = self._download_json(
+ 'https://www.vvvvid.it/kenc', video_id, query={
+ 'action': 'kt',
+ 'conn_id': self._conn_id,
+ 'url': embed_code,
+ }, fatal=False) or {}
+ kenc_message = kenc.get('message')
+ if kenc_message:
+ embed_code += '?' + ds(kenc_message)
+ formats.extend(self._extract_m3u8_formats(
+ embed_code, video_id, 'mp4', m3u8_id='hls', fatal=False))
+ elif video_type == 'video/rcs':
+ formats.extend(self._extract_akamai_formats(embed_code, video_id))
+ elif video_type == 'video/youtube':
+ info.update({
+ '_type': 'url_transparent',
+ 'ie_key': YoutubeIE.ie_key(),
+ 'url': embed_code,
+ })
+ is_youtube = True
+ break
+ elif video_type == 'video/dash':
+ formats.extend(self._extract_m3u8_formats(
+ embed_code, video_id, 'mp4', m3u8_id='hls', fatal=False))
+ else:
+ formats.extend(self._extract_wowza_formats(
+ 'http://sb.top-ix.org/videomg/_definst_/mp4:%s/playlist.m3u8' % embed_code, video_id, skip_protocols=['f4m']))
+ metadata_from_url(embed_code)
+
+ if not is_youtube:
+ info['formats'] = formats
+
+ metadata_from_url(video_data.get('thumbnail'))
+ info.update(self._extract_common_video_info(video_data))
+ info.update({
+ 'id': video_id,
+ 'title': title,
+ 'duration': int_or_none(video_data.get('length')),
+ 'series': video_data.get('show_title'),
+ 'season_id': season_id,
+ 'episode': title,
+ 'view_count': int_or_none(video_data.get('views')),
+ 'like_count': int_or_none(video_data.get('video_likes')),
+ 'repost_count': int_or_none(video_data.get('video_shares')),
+ })
+ return info
+
+
+class VVVVIDShowIE(VVVVIDIE): # XXX: Do not subclass from concrete IE
+ _VALID_URL = r'(?P<base_url>%s(?P<id>\d+)(?:/(?P<show_title>[^/?&#]+))?)/?(?:[?#&]|$)' % VVVVIDIE._VALID_URL_BASE
+ _TESTS = [{
+ 'url': 'https://www.vvvvid.it/show/156/psyco-pass',
+ 'info_dict': {
+ 'id': '156',
+ 'title': 'Psycho-Pass',
+ 'description': 'md5:94d572c0bd85894b193b8aebc9a3a806',
+ },
+ 'playlist_count': 46,
+ }, {
+ 'url': 'https://www.vvvvid.it/show/156',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ base_url, show_id, show_title = self._match_valid_url(url).groups()
+
+ seasons = self._download_info(
+ show_id, 'seasons/', show_title)
+
+ show_info = self._download_info(
+ show_id, 'info/', show_title, fatal=False)
+
+ if not show_title:
+ base_url += "/title"
+
+ entries = []
+ for season in (seasons or []):
+ episodes = season.get('episodes') or []
+ playlist_title = season.get('name') or show_info.get('title')
+ for episode in episodes:
+ if episode.get('playable') is False:
+ continue
+ season_id = str_or_none(episode.get('season_id'))
+ video_id = str_or_none(episode.get('video_id'))
+ if not (season_id and video_id):
+ continue
+ info = self._extract_common_video_info(episode)
+ info.update({
+ '_type': 'url_transparent',
+ 'ie_key': VVVVIDIE.ie_key(),
+ 'url': '/'.join([base_url, season_id, video_id]),
+ 'title': episode.get('title'),
+ 'description': episode.get('description'),
+ 'season_id': season_id,
+ 'playlist_title': playlist_title,
+ })
+ entries.append(info)
+
+ return self.playlist_result(
+ entries, show_id, show_info.get('title'), show_info.get('description'))
diff --git a/yt_dlp/extractor/walla.py b/yt_dlp/extractor/walla.py
new file mode 100644
index 0000000..a1a9c17
--- /dev/null
+++ b/yt_dlp/extractor/walla.py
@@ -0,0 +1,82 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ xpath_text,
+ int_or_none,
+)
+
+
+class WallaIE(InfoExtractor):
+ _VALID_URL = r'https?://vod\.walla\.co\.il/[^/]+/(?P<id>\d+)/(?P<display_id>.+)'
+ _TEST = {
+ 'url': 'http://vod.walla.co.il/movie/2642630/one-direction-all-for-one',
+ 'info_dict': {
+ 'id': '2642630',
+ 'display_id': 'one-direction-all-for-one',
+ 'ext': 'flv',
+ 'title': 'וואן דיירקשן: ההיסטריה',
+ 'description': 'md5:de9e2512a92442574cdb0913c49bc4d8',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 3600,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ }
+ }
+
+ _SUBTITLE_LANGS = {
+ 'עברית': 'heb',
+ }
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id')
+
+ video = self._download_xml(
+ 'http://video2.walla.co.il/?w=null/null/%s/@@/video/flv_pl' % video_id,
+ display_id)
+
+ item = video.find('./items/item')
+
+ title = xpath_text(item, './title', 'title')
+ description = xpath_text(item, './synopsis', 'description')
+ thumbnail = xpath_text(item, './preview_pic', 'thumbnail')
+ duration = int_or_none(xpath_text(item, './duration', 'duration'))
+
+ subtitles = {}
+ for subtitle in item.findall('./subtitles/subtitle'):
+ lang = xpath_text(subtitle, './title')
+ subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{
+ 'ext': 'srt',
+ 'url': xpath_text(subtitle, './src'),
+ }]
+
+ formats = []
+ for quality in item.findall('./qualities/quality'):
+ format_id = xpath_text(quality, './title')
+ fmt = {
+ 'url': 'rtmp://wafla.walla.co.il/vod',
+ 'play_path': xpath_text(quality, './src'),
+ 'player_url': 'http://isc.walla.co.il/w9/swf/video_swf/vod/WallaMediaPlayerAvod.swf',
+ 'page_url': url,
+ 'ext': 'flv',
+ 'format_id': xpath_text(quality, './title'),
+ }
+ m = re.search(r'^(?P<height>\d+)[Pp]', format_id)
+ if m:
+ fmt['height'] = int(m.group('height'))
+ formats.append(fmt)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
diff --git a/yt_dlp/extractor/washingtonpost.py b/yt_dlp/extractor/washingtonpost.py
new file mode 100644
index 0000000..74501b1
--- /dev/null
+++ b/yt_dlp/extractor/washingtonpost.py
@@ -0,0 +1,123 @@
+import re
+
+from .common import InfoExtractor
+
+from ..utils import traverse_obj
+
+
+class WashingtonPostIE(InfoExtractor):
+ IE_NAME = 'washingtonpost'
+ _VALID_URL = r'(?:washingtonpost:|https?://(?:www\.)?washingtonpost\.com/(?:video|posttv)/(?:[^/]+/)*)(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+ _EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>https?://(?:www\.)?washingtonpost\.com/video/c/embed/[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})']
+ _TESTS = [{
+ 'url': 'https://www.washingtonpost.com/video/c/video/480ba4ee-1ec7-11e6-82c2-a7dcb313287d',
+ 'md5': '6f537e1334b714eb15f9563bd4b9cdfa',
+ 'info_dict': {
+ 'id': '480ba4ee-1ec7-11e6-82c2-a7dcb313287d',
+ 'ext': 'mp4',
+ 'title': 'Egypt finds belongings, debris from plane crash',
+ 'description': 'md5:a17ceee432f215a5371388c1f680bd86',
+ 'upload_date': '20160520',
+ 'timestamp': 1463775187,
+ },
+ }, {
+ 'url': 'https://www.washingtonpost.com/video/world/egypt-finds-belongings-debris-from-plane-crash/2016/05/20/480ba4ee-1ec7-11e6-82c2-a7dcb313287d_video.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.washingtonpost.com/posttv/world/iraq-to-track-down-antiquities-after-islamic-state-museum-rampage/2015/02/28/7c57e916-bf86-11e4-9dfb-03366e719af8_video.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self.url_result(
+ 'arcpublishing:wapo:' + video_id, 'ArcPublishing', video_id)
+
+
+class WashingtonPostArticleIE(InfoExtractor):
+ IE_NAME = 'washingtonpost:article'
+ _VALID_URL = r'https?://(?:www\.)?washingtonpost\.com/(?:[^/]+/)*(?P<id>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/',
+ 'info_dict': {
+ 'id': 'sinkhole-of-bureaucracy',
+ 'title': 'Sinkhole of bureaucracy',
+ },
+ 'playlist': [{
+ 'md5': '7ccf53ea8cbb77de5f570242b3b21a59',
+ 'info_dict': {
+ 'id': 'fc433c38-b146-11e3-b8b3-44b1d1cd4c1f',
+ 'ext': 'mp4',
+ 'title': 'Breaking Points: The Paper Mine',
+ 'duration': 1290,
+ 'description': 'Overly complicated paper pushing is nothing new to government bureaucracy. But the way federal retirement applications are filed may be the most outdated. David Fahrenthold explains.',
+ 'timestamp': 1395440416,
+ 'upload_date': '20140321',
+ 'thumbnail': r're:https://[^\.]+.cloudfront\.net/PAPERMINESplash\.jpg',
+ },
+ }, {
+ 'md5': '7ccf53ea8cbb77de5f570242b3b21a59',
+ 'info_dict': {
+ 'id': '41255e28-b14a-11e3-b8b3-44b1d1cd4c1f',
+ 'ext': 'mp4',
+ 'title': 'The town bureaucracy sustains',
+ 'description': 'Underneath the friendly town of Boyers is a sea of government paperwork. In a disused limestone mine, hundreds of locals now track, file and process retirement applications for the federal government. We set out to find out what it\'s like to do paperwork 230 feet underground.',
+ 'duration': 2220,
+ 'timestamp': 1395441819,
+ 'upload_date': '20140321',
+ 'thumbnail': r're:https://[^\.]+.cloudfront\.net/BoyersSplash\.jpeg',
+ },
+ }],
+ }, {
+ 'url': 'http://www.washingtonpost.com/blogs/wonkblog/wp/2014/12/31/one-airline-figured-out-how-to-make-sure-its-airplanes-never-disappear/',
+ 'info_dict': {
+ 'id': 'one-airline-figured-out-how-to-make-sure-its-airplanes-never-disappear',
+ 'title': 'One airline figured out how to make sure its airplanes never disappear',
+ },
+ 'playlist': [{
+ 'md5': 'a7c1b5634ba5e57a6a82cdffa5b1e0d0',
+ 'info_dict': {
+ 'id': '0e4bb54c-9065-11e4-a66f-0ca5037a597d',
+ 'ext': 'mp4',
+ 'description': 'Washington Post transportation reporter Ashley Halsey III explains why a plane\'s black box needs to be recovered from a crash site instead of having its information streamed in real time throughout the flight.',
+ 'upload_date': '20141230',
+ 'timestamp': 1419972442,
+ 'title': 'Why black boxes don’t transmit data in real time',
+ }
+ }],
+ 'skip': 'Doesnt have a video anymore',
+ }, {
+ 'url': 'https://www.washingtonpost.com/nation/2021/08/05/dixie-river-fire-california-climate/',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if WashingtonPostIE.suitable(url) else super(WashingtonPostArticleIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ page_id = self._match_id(url)
+ webpage = self._download_webpage(url, page_id)
+
+ title = self._og_search_title(webpage)
+
+ uuids = re.findall(r'''(?x)
+ (?:
+ <div\s+class="posttv-video-embed[^>]*?data-uuid=|
+ data-video-uuid=
+ )"([^"]+)"''', webpage)
+
+ if not uuids:
+ json_data = self._search_nextjs_data(webpage, page_id)
+ for content_element in traverse_obj(json_data, ('props', 'pageProps', 'globalContent', 'content_elements')):
+ if content_element.get('type') == 'video':
+ uuids.append(content_element.get('_id'))
+
+ entries = [self.url_result('washingtonpost:%s' % uuid, 'WashingtonPost', uuid) for uuid in uuids]
+
+ return {
+ '_type': 'playlist',
+ 'entries': entries,
+ 'id': page_id,
+ 'title': title,
+ }
diff --git a/yt_dlp/extractor/wat.py b/yt_dlp/extractor/wat.py
new file mode 100644
index 0000000..9ea3fdd
--- /dev/null
+++ b/yt_dlp/extractor/wat.py
@@ -0,0 +1,119 @@
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ try_get,
+ unified_strdate,
+)
+
+
+class WatIE(InfoExtractor):
+ _VALID_URL = r'(?:wat:|https?://(?:www\.)?wat\.tv/video/.*-)(?P<id>[0-9a-z]+)'
+ IE_NAME = 'wat.tv'
+ _TESTS = [
+ {
+ 'url': 'http://www.wat.tv/video/soupe-figues-l-orange-aux-epices-6z1uz_2hvf7_.html',
+ 'info_dict': {
+ 'id': '11713067',
+ 'ext': 'mp4',
+ 'title': 'Soupe de figues à l\'orange et aux épices',
+ 'description': 'Retrouvez l\'émission "Petits plats en équilibre", diffusée le 18 août 2014.',
+ 'upload_date': '20140819',
+ 'duration': 120,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['HTTP Error 404'],
+ 'skip': 'This content is no longer available',
+ },
+ {
+ 'url': 'http://www.wat.tv/video/gregory-lemarchal-voix-ange-6z1v7_6ygkj_.html',
+ 'md5': 'b16574df2c3cd1a36ca0098f2a791925',
+ 'info_dict': {
+ 'id': '11713075',
+ 'ext': 'mp4',
+ 'title': 'Grégory Lemarchal, une voix d\'ange depuis 10 ans (1/3)',
+ 'upload_date': '20140816',
+ },
+ 'expected_warnings': ["Ce contenu n'est pas disponible pour l'instant."],
+ 'skip': 'This content is no longer available',
+ },
+ {
+ 'url': 'wat:14010600',
+ 'info_dict': {
+ 'id': '14010600',
+ 'title': 'Burger Quiz - S03 EP21 avec Eye Haidara, Anne Depétrini, Jonathan Zaccaï et Pio Marmaï',
+ 'thumbnail': 'https://photos.tf1.fr/1280/720/burger-quiz-11-9adb79-0@1x.jpg',
+ 'upload_date': '20230819',
+ 'duration': 2312,
+ 'ext': 'mp4',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }
+ ]
+ _GEO_BYPASS = False
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video_id = video_id if video_id.isdigit() and len(video_id) > 6 else compat_str(int(video_id, 36))
+
+ # 'contentv4' is used in the website, but it also returns the related
+ # videos, we don't need them
+ # video_data = self._download_json(
+ # 'http://www.wat.tv/interface/contentv4s/' + video_id, video_id)
+ video_data = self._download_json(
+ 'https://mediainfo.tf1.fr/mediainfocombo/' + video_id,
+ video_id, query={'pver': '5010000'})
+ video_info = video_data['media']
+
+ error_desc = video_info.get('error_desc')
+ if error_desc:
+ if video_info.get('error_code') == 'GEOBLOCKED':
+ self.raise_geo_restricted(error_desc, video_info.get('geoList'))
+ raise ExtractorError(error_desc, expected=True)
+
+ title = video_info['title']
+
+ formats = []
+ subtitles = {}
+
+ def extract_formats(manifest_urls):
+ for f, f_url in manifest_urls.items():
+ if not f_url:
+ continue
+ if f in ('dash', 'mpd'):
+ fmts, subs = self._extract_mpd_formats_and_subtitles(
+ f_url.replace('://das-q1.tf1.fr/', '://das-q1-ssl.tf1.fr/'),
+ video_id, mpd_id='dash', fatal=False)
+ elif f == 'hls':
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ f_url, video_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False)
+ else:
+ continue
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+
+ delivery = video_data.get('delivery') or {}
+ extract_formats({delivery.get('format'): delivery.get('url')})
+ if not formats:
+ if delivery.get('drm'):
+ self.report_drm(video_id)
+ manifest_urls = self._download_json(
+ 'http://www.wat.tv/get/webhtml/' + video_id, video_id, fatal=False)
+ if manifest_urls:
+ extract_formats(manifest_urls)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': video_info.get('preview'),
+ 'upload_date': unified_strdate(try_get(
+ video_data, lambda x: x['mediametrie']['chapters'][0]['estatS4'])),
+ 'duration': int_or_none(video_info.get('duration')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
diff --git a/yt_dlp/extractor/wdr.py b/yt_dlp/extractor/wdr.py
new file mode 100644
index 0000000..f80f140
--- /dev/null
+++ b/yt_dlp/extractor/wdr.py
@@ -0,0 +1,384 @@
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urlparse,
+)
+from ..utils import (
+ determine_ext,
+ dict_get,
+ ExtractorError,
+ js_to_json,
+ strip_jsonp,
+ try_get,
+ unified_strdate,
+ update_url_query,
+ urlhandle_detect_ext,
+ url_or_none,
+)
+
+
+class WDRIE(InfoExtractor):
+ __API_URL_TPL = '//deviceids-medp.wdr.de/ondemand/%s/%s'
+ _VALID_URL = r'''(?x)https?://
+ (?:deviceids-medp\.wdr\.de/ondemand/\d+/|
+ kinder\.wdr\.de/(?!mediathek/)[^#?]+-)
+ (?P<id>\d+)\.(?:js|assetjsonp)
+ '''
+ _GEO_COUNTRIES = ['DE']
+ _TESTS = [{
+ 'url': 'http://deviceids-medp.wdr.de/ondemand/155/1557833.js',
+ 'info_dict': {
+ 'id': 'mdb-1557833',
+ 'ext': 'mp4',
+ 'title': 'Biathlon-Staffel verpasst Podest bei Olympia-Generalprobe',
+ 'upload_date': '20180112',
+ },
+ }]
+
+ def _asset_url(self, wdr_id):
+ id_len = max(len(wdr_id), 5)
+ return ''.join(('https:', self.__API_URL_TPL % (wdr_id[:id_len - 4], wdr_id, ), '.js'))
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ if url.startswith('wdr:'):
+ video_id = url[4:]
+ url = self._asset_url(video_id)
+
+ metadata = self._download_json(
+ url, video_id, transform_source=strip_jsonp)
+
+ is_live = metadata.get('mediaType') == 'live'
+
+ tracker_data = metadata['trackerData']
+ title = tracker_data['trackerClipTitle']
+ media_resource = metadata['mediaResource']
+
+ formats = []
+ subtitles = {}
+
+ # check if the metadata contains a direct URL to a file
+ for kind, media in media_resource.items():
+ if kind == 'captionsHash':
+ for ext, url in media.items():
+ subtitles.setdefault('de', []).append({
+ 'url': url,
+ 'ext': ext,
+ })
+ continue
+
+ if kind not in ('dflt', 'alt'):
+ continue
+ if not isinstance(media, dict):
+ continue
+
+ for tag_name, medium_url in media.items():
+ if tag_name not in ('videoURL', 'audioURL'):
+ continue
+
+ ext = determine_ext(medium_url)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ medium_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls'))
+ elif ext == 'f4m':
+ manifest_url = update_url_query(
+ medium_url, {'hdcore': '3.2.0', 'plugin': 'aasp-3.2.0.77.18'})
+ formats.extend(self._extract_f4m_formats(
+ manifest_url, video_id, f4m_id='hds', fatal=False))
+ elif ext == 'smil':
+ formats.extend(self._extract_smil_formats(
+ medium_url, 'stream', fatal=False))
+ else:
+ a_format = {
+ 'url': medium_url
+ }
+ if ext == 'unknown_video':
+ urlh = self._request_webpage(
+ medium_url, video_id, note='Determining extension')
+ ext = urlhandle_detect_ext(urlh)
+ a_format['ext'] = ext
+ formats.append(a_format)
+
+ caption_url = media_resource.get('captionURL')
+ if caption_url:
+ subtitles['de'] = [{
+ 'url': caption_url,
+ 'ext': 'ttml',
+ }]
+ captions_hash = media_resource.get('captionsHash')
+ if isinstance(captions_hash, dict):
+ for ext, format_url in captions_hash.items():
+ format_url = url_or_none(format_url)
+ if not format_url:
+ continue
+ subtitles.setdefault('de', []).append({
+ 'url': format_url,
+ 'ext': determine_ext(format_url, None) or ext,
+ })
+
+ return {
+ 'id': tracker_data.get('trackerClipId', video_id),
+ 'title': title,
+ 'alt_title': tracker_data.get('trackerClipSubcategory'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'upload_date': unified_strdate(tracker_data.get('trackerClipAirTime')),
+ 'is_live': is_live,
+ }
+
+
+class WDRPageIE(WDRIE): # XXX: Do not subclass from concrete IE
+ _MAUS_REGEX = r'https?://(?:www\.)wdrmaus.de/(?:[^/]+/)*?(?P<maus_id>[^/?#.]+)(?:/?|/index\.php5|\.php5)$'
+ _PAGE_REGEX = r'/(?:mediathek/)?(?:[^/]+/)*(?P<display_id>[^/]+)\.html'
+ _VALID_URL = r'https?://(?:www\d?\.)?(?:(?:kinder\.)?wdr\d?|sportschau)\.de' + _PAGE_REGEX + '|' + _MAUS_REGEX
+
+ _TESTS = [
+ {
+ 'url': 'http://www1.wdr.de/mediathek/video/sendungen/doku-am-freitag/video-geheimnis-aachener-dom-100.html',
+ # HDS download, MD5 is unstable
+ 'info_dict': {
+ 'id': 'mdb-1058683',
+ 'ext': 'flv',
+ 'display_id': 'doku-am-freitag/video-geheimnis-aachener-dom-100',
+ 'title': 'Geheimnis Aachener Dom',
+ 'alt_title': 'Doku am Freitag',
+ 'upload_date': '20160304',
+ 'description': 'md5:87be8ff14d8dfd7a7ee46f0299b52318',
+ 'is_live': False,
+ 'subtitles': {'de': [{
+ 'url': 'http://ondemand-ww.wdr.de/medp/fsk0/105/1058683/1058683_12220974.xml',
+ 'ext': 'ttml',
+ }]},
+ },
+ 'skip': 'HTTP Error 404: Not Found',
+ },
+ {
+ 'url': 'http://www1.wdr.de/mediathek/audio/wdr3/wdr3-gespraech-am-samstag/audio-schriftstellerin-juli-zeh-100.html',
+ 'md5': 'f4c1f96d01cf285240f53ea4309663d8',
+ 'info_dict': {
+ 'id': 'mdb-1072000',
+ 'ext': 'mp3',
+ 'display_id': 'wdr3-gespraech-am-samstag/audio-schriftstellerin-juli-zeh-100',
+ 'title': 'Schriftstellerin Juli Zeh',
+ 'alt_title': 'WDR 3 Gespräch am Samstag',
+ 'upload_date': '20160312',
+ 'description': 'md5:e127d320bc2b1f149be697ce044a3dd7',
+ 'is_live': False,
+ 'subtitles': {}
+ },
+ 'skip': 'HTTP Error 404: Not Found',
+ },
+ {
+ # FIXME: Asset JSON is directly embedded in webpage
+ 'url': 'http://www1.wdr.de/mediathek/video/live/index.html',
+ 'info_dict': {
+ 'id': 'mdb-2296252',
+ 'ext': 'mp4',
+ 'title': r're:^WDR Fernsehen im Livestream (?:\(nur in Deutschland erreichbar\) )?[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'alt_title': 'WDR Fernsehen Live',
+ 'upload_date': '20201112',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True, # m3u8 download
+ },
+ },
+ {
+ 'url': 'http://www1.wdr.de/mediathek/video/sendungen/aktuelle-stunde/aktuelle-stunde-120.html',
+ 'playlist_mincount': 6,
+ 'info_dict': {
+ 'id': 'aktuelle-stunde-120',
+ },
+ },
+ {
+ 'url': 'http://www.wdrmaus.de/aktuelle-sendung/index.php5',
+ 'info_dict': {
+ 'id': 'mdb-2627637',
+ 'ext': 'mp4',
+ 'upload_date': 're:^[0-9]{8}$',
+ 'title': 're:^Die Sendung (?:mit der Maus )?vom [0-9.]{10}$',
+ },
+ 'skip': 'The id changes from week to week because of the new episode'
+ },
+ {
+ 'url': 'http://www.wdrmaus.de/filme/sachgeschichten/achterbahn.php5',
+ 'md5': '803138901f6368ee497b4d195bb164f2',
+ 'info_dict': {
+ 'id': 'mdb-186083',
+ 'ext': 'mp4',
+ 'upload_date': '20130919',
+ 'title': 'Sachgeschichte - Achterbahn ',
+ },
+ 'skip': 'HTTP Error 404: Not Found',
+ },
+ {
+ 'url': 'http://www1.wdr.de/radio/player/radioplayer116~_layout-popupVersion.html',
+ # Live stream, MD5 unstable
+ 'info_dict': {
+ 'id': 'mdb-869971',
+ 'ext': 'mp4',
+ 'title': r're:^COSMO Livestream [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'alt_title': 'COSMO Livestream',
+ 'live_status': 'is_live',
+ 'upload_date': '20160101',
+ },
+ 'params': {
+ 'skip_download': True, # m3u8 download
+ }
+ },
+ {
+ 'url': 'http://www.sportschau.de/handballem2018/handball-nationalmannschaft-em-stolperstein-vorrunde-100.html',
+ 'info_dict': {
+ 'id': 'mdb-1556012',
+ 'ext': 'mp4',
+ 'title': 'DHB-Vizepräsident Bob Hanning - "Die Weltspitze ist extrem breit"',
+ 'upload_date': '20180111',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'HTTP Error 404: Not Found',
+ },
+ {
+ 'url': 'http://www.sportschau.de/handballem2018/audio-vorschau---die-handball-em-startet-mit-grossem-favoritenfeld-100.html',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://kinder.wdr.de/tv/die-sendung-mit-dem-elefanten/av/video-folge---astronaut-100.html',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://www1.wdr.de/mediathek/video/sendungen/rockpalast/video-baroness---freak-valley-festival--100.html',
+ 'info_dict': {
+ 'id': 'mdb-2741028',
+ 'ext': 'mp4',
+ 'title': 'Baroness - Freak Valley Festival 2022',
+ 'alt_title': 'Rockpalast',
+ 'upload_date': '20220725',
+ },
+ }
+ ]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ display_id = dict_get(mobj.groupdict(), ('display_id', 'maus_id'), 'wdrmaus')
+ webpage = self._download_webpage(url, display_id)
+
+ entries = []
+
+ # Article with several videos
+
+ # for wdr.de the data-extension-ard is in a tag with the class "mediaLink"
+ # for wdr.de radio players, in a tag with the class "wdrrPlayerPlayBtn"
+ # for wdrmaus, in a tag with the class "videoButton" (previously a link
+ # to the page in a multiline "videoLink"-tag)
+ for mobj in re.finditer(
+ r'''(?sx)class=
+ (?:
+ (["\'])(?:mediaLink|wdrrPlayerPlayBtn|videoButton)\b.*?\1[^>]+|
+ (["\'])videoLink\b.*?\2[\s]*>\n[^\n]*
+ )data-extension(?:-ard)?=(["\'])(?P<data>(?:(?!\3).)+)\3
+ ''', webpage):
+ media_link_obj = self._parse_json(
+ mobj.group('data'), display_id, transform_source=js_to_json,
+ fatal=False)
+ if not media_link_obj:
+ continue
+ jsonp_url = try_get(
+ media_link_obj, lambda x: x['mediaObj']['url'], compat_str)
+ if jsonp_url:
+ # metadata, or player JS with ['ref'] giving WDR id, or just media, perhaps
+ clip_id = media_link_obj['mediaObj'].get('ref')
+ if jsonp_url.endswith('.assetjsonp'):
+ asset = self._download_json(
+ jsonp_url, display_id, fatal=False, transform_source=strip_jsonp)
+ clip_id = try_get(asset, lambda x: x['trackerData']['trackerClipId'], compat_str)
+ if clip_id:
+ jsonp_url = self._asset_url(clip_id[4:])
+ entries.append(self.url_result(jsonp_url, ie=WDRIE.ie_key()))
+
+ # Playlist (e.g. https://www1.wdr.de/mediathek/video/sendungen/aktuelle-stunde/aktuelle-stunde-120.html)
+ if not entries:
+ entries = [
+ self.url_result(
+ compat_urlparse.urljoin(url, mobj.group('href')),
+ ie=WDRPageIE.ie_key())
+ for mobj in re.finditer(
+ r'<a[^>]+\bhref=(["\'])(?P<href>(?:(?!\1).)+)\1[^>]+\bdata-extension(?:-ard)?=',
+ webpage) if re.match(self._PAGE_REGEX, mobj.group('href'))
+ ]
+
+ return self.playlist_result(entries, playlist_id=display_id)
+
+
+class WDRElefantIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)wdrmaus\.de/elefantenseite/#(?P<id>.+)'
+ _TEST = {
+ 'url': 'http://www.wdrmaus.de/elefantenseite/#elefantenkino_wippe',
+ # adaptive stream: unstable file MD5
+ 'info_dict': {
+ 'title': 'Wippe',
+ 'id': 'mdb-1198320',
+ 'ext': 'mp4',
+ 'upload_date': '20071003'
+ },
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ # Table of Contents seems to always be at this address, so fetch it directly.
+ # The website fetches configurationJS.php5, which links to tableOfContentsJS.php5.
+ table_of_contents = self._download_json(
+ 'https://www.wdrmaus.de/elefantenseite/data/tableOfContentsJS.php5',
+ display_id)
+ if display_id not in table_of_contents:
+ raise ExtractorError(
+ 'No entry in site\'s table of contents for this URL. '
+ 'Is the fragment part of the URL (after the #) correct?',
+ expected=True)
+ xml_metadata_path = table_of_contents[display_id]['xmlPath']
+ xml_metadata = self._download_xml(
+ 'https://www.wdrmaus.de/elefantenseite/' + xml_metadata_path,
+ display_id)
+ zmdb_url_element = xml_metadata.find('./movie/zmdb_url')
+ if zmdb_url_element is None:
+ raise ExtractorError(
+ '%s is not a video' % display_id, expected=True)
+ return self.url_result(zmdb_url_element.text, ie=WDRIE.ie_key())
+
+
+class WDRMobileIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://mobile-ondemand\.wdr\.de/
+ .*?/fsk(?P<age_limit>[0-9]+)
+ /[0-9]+/[0-9]+/
+ (?P<id>[0-9]+)_(?P<title>[0-9]+)'''
+ IE_NAME = 'wdr:mobile'
+ _WORKING = False # no such domain
+ _TEST = {
+ 'url': 'http://mobile-ondemand.wdr.de/CMS2010/mdb/ondemand/weltweit/fsk0/42/421735/421735_4283021.mp4',
+ 'info_dict': {
+ 'title': '4283021',
+ 'id': '421735',
+ 'ext': 'mp4',
+ 'age_limit': 0,
+ },
+ 'skip': 'Problems with loading data.'
+ }
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ return {
+ 'id': mobj.group('id'),
+ 'title': mobj.group('title'),
+ 'age_limit': int(mobj.group('age_limit')),
+ 'url': url,
+ 'http_headers': {
+ 'User-Agent': 'mobile',
+ },
+ }
diff --git a/yt_dlp/extractor/webcamerapl.py b/yt_dlp/extractor/webcamerapl.py
new file mode 100644
index 0000000..a02d951
--- /dev/null
+++ b/yt_dlp/extractor/webcamerapl.py
@@ -0,0 +1,44 @@
+import codecs
+
+from .common import InfoExtractor
+
+
+class WebcameraplIE(InfoExtractor):
+ _VALID_URL = r'https?://(?P<id>[\w-]+)\.webcamera\.pl'
+ _TESTS = [{
+ 'url': 'https://warszawa-plac-zamkowy.webcamera.pl',
+ 'info_dict': {
+ 'id': 'warszawa-plac-zamkowy',
+ 'ext': 'mp4',
+ 'title': r're:WIDOK NA PLAC ZAMKOWY W WARSZAWIE \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
+ 'live_status': 'is_live',
+ }
+ }, {
+ 'url': 'https://gdansk-stare-miasto.webcamera.pl/',
+ 'info_dict': {
+ 'id': 'gdansk-stare-miasto',
+ 'ext': 'mp4',
+ 'title': r're:GDAŃSK - widok na Stare Miasto \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
+ 'live_status': 'is_live',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ rot13_m3u8_url = self._search_regex(r'data-src\s*=\s*"(uggc[^"]+\.z3h8)"',
+ webpage, 'm3u8 url', default=None)
+ if not rot13_m3u8_url:
+ self.raise_no_formats('No video/audio found at the provided url', expected=True)
+
+ m3u8_url = codecs.decode(rot13_m3u8_url, 'rot-13')
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, live=True)
+
+ return {
+ 'id': video_id,
+ 'title': self._html_search_regex(r'<h1\b[^>]*>([^>]+)</h1>', webpage, 'title'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'is_live': True,
+ }
diff --git a/yt_dlp/extractor/webcaster.py b/yt_dlp/extractor/webcaster.py
new file mode 100644
index 0000000..43eeca0
--- /dev/null
+++ b/yt_dlp/extractor/webcaster.py
@@ -0,0 +1,92 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ join_nonempty,
+ xpath_text,
+)
+
+
+class WebcasterIE(InfoExtractor):
+ _VALID_URL = r'https?://bl\.webcaster\.pro/(?:quote|media)/start/free_(?P<id>[^/]+)'
+ _TESTS = [{
+ # http://video.khl.ru/quotes/393859
+ 'url': 'http://bl.webcaster.pro/quote/start/free_c8cefd240aa593681c8d068cff59f407_hd/q393859/eb173f99dd5f558674dae55f4ba6806d/1480289104?sr%3D105%26fa%3D1%26type_id%3D18',
+ 'md5': '0c162f67443f30916ff1c89425dcd4cd',
+ 'info_dict': {
+ 'id': 'c8cefd240aa593681c8d068cff59f407_hd',
+ 'ext': 'mp4',
+ 'title': 'Сибирь - Нефтехимик. Лучшие моменты первого периода',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ }, {
+ 'url': 'http://bl.webcaster.pro/media/start/free_6246c7a4453ac4c42b4398f840d13100_hd/2_2991109016/e8d0d82587ef435480118f9f9c41db41/4635726126',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video = self._download_xml(url, video_id)
+
+ title = xpath_text(video, './/event_name', 'event name', fatal=True)
+
+ formats = []
+ for format_id in (None, 'noise'):
+ track_tag = join_nonempty('track', format_id, delim='_')
+ for track in video.findall('.//iphone/%s' % track_tag):
+ track_url = track.text
+ if not track_url:
+ continue
+ if determine_ext(track_url) == 'm3u8':
+ m3u8_formats = self._extract_m3u8_formats(
+ track_url, video_id, 'mp4',
+ entry_protocol='m3u8_native',
+ m3u8_id=join_nonempty('hls', format_id, delim='-'), fatal=False)
+ for f in m3u8_formats:
+ f.update({
+ 'source_preference': 0 if format_id == 'noise' else 1,
+ 'format_note': track.get('title'),
+ })
+ formats.extend(m3u8_formats)
+
+ thumbnail = xpath_text(video, './/image', 'thumbnail')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'formats': formats,
+ }
+
+
+class WebcasterFeedIE(InfoExtractor):
+ _VALID_URL = r'https?://bl\.webcaster\.pro/feed/start/free_(?P<id>[^/]+)'
+ _EMBED_REGEX = [r'<(?:object|a[^>]+class=["\']webcaster-player["\'])[^>]+data(?:-config)?=(["\']).*?config=(?P<url>https?://bl\.webcaster\.pro/feed/start/free_.*?)(?:[?&]|\1)']
+ _TEST = {
+ 'url': 'http://bl.webcaster.pro/feed/start/free_c8cefd240aa593681c8d068cff59f407_hd/q393859/eb173f99dd5f558674dae55f4ba6806d/1480289104',
+ 'only_matching': True,
+ }
+
+ def _extract_from_webpage(self, url, webpage):
+ yield from super()._extract_from_webpage(url, webpage)
+
+ for secure in (True, False):
+ video_url = self._og_search_video_url(webpage, secure=secure, default=None)
+ if video_url:
+ mobj = re.search(
+ r'config=(?P<url>https?://bl\.webcaster\.pro/feed/start/free_[^?&=]+)',
+ video_url)
+ if mobj:
+ yield self.url_result(mobj.group('url'), self)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ feed = self._download_xml(url, video_id)
+
+ video_url = xpath_text(
+ feed, ('video_hd', 'video'), 'video url', fatal=True)
+
+ return self.url_result(video_url, WebcasterIE.ie_key())
diff --git a/yt_dlp/extractor/webofstories.py b/yt_dlp/extractor/webofstories.py
new file mode 100644
index 0000000..65f48f3
--- /dev/null
+++ b/yt_dlp/extractor/webofstories.py
@@ -0,0 +1,155 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ orderedSet,
+)
+
+
+class WebOfStoriesIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?webofstories\.com/play/(?:[^/]+/)?(?P<id>[0-9]+)'
+ _VIDEO_DOMAIN = 'http://eu-mobile.webofstories.com/'
+ _GREAT_LIFE_STREAMER = 'rtmp://eu-cdn1.webofstories.com/cfx/st/'
+ _USER_STREAMER = 'rtmp://eu-users.webofstories.com/cfx/st/'
+ _TESTS = [{
+ 'url': 'http://www.webofstories.com/play/hans.bethe/71',
+ 'md5': '373e4dd915f60cfe3116322642ddf364',
+ 'info_dict': {
+ 'id': '4536',
+ 'ext': 'mp4',
+ 'title': 'The temperature of the sun',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'Hans Bethe talks about calculating the temperature of the sun',
+ 'duration': 238,
+ }
+ }, {
+ 'url': 'http://www.webofstories.com/play/55908',
+ 'md5': '2985a698e1fe3211022422c4b5ed962c',
+ 'info_dict': {
+ 'id': '55908',
+ 'ext': 'mp4',
+ 'title': 'The story of Gemmata obscuriglobus',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'Planctomycete talks about The story of Gemmata obscuriglobus',
+ 'duration': 169,
+ },
+ 'skip': 'notfound',
+ }, {
+ # malformed og:title meta
+ 'url': 'http://www.webofstories.com/play/54215?o=MS',
+ 'info_dict': {
+ 'id': '54215',
+ 'ext': 'mp4',
+ 'title': '"A Leg to Stand On"',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'Oliver Sacks talks about the death and resurrection of a limb',
+ 'duration': 97,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+ # Sometimes og:title meta is malformed
+ title = self._og_search_title(webpage, default=None) or self._html_search_regex(
+ r'(?s)<strong>Title:\s*</strong>(.+?)<', webpage, 'title')
+ description = self._html_search_meta('description', webpage)
+ thumbnail = self._og_search_thumbnail(webpage)
+
+ embed_params = [s.strip(" \r\n\t'") for s in self._search_regex(
+ r'(?s)\$\("#embedCode"\).html\(getEmbedCode\((.*?)\)',
+ webpage, 'embed params').split(',')]
+
+ (
+ _, speaker_id, story_id, story_duration,
+ speaker_type, great_life, _thumbnail, _has_subtitles,
+ story_filename, _story_order) = embed_params
+
+ is_great_life_series = great_life == 'true'
+ duration = int_or_none(story_duration)
+
+ # URL building, see: http://www.webofstories.com/scripts/player.js
+ ms_prefix = ''
+ if speaker_type.lower() == 'ms':
+ ms_prefix = 'mini_sites/'
+
+ if is_great_life_series:
+ mp4_url = '{0:}lives/{1:}/{2:}.mp4'.format(
+ self._VIDEO_DOMAIN, speaker_id, story_filename)
+ rtmp_ext = 'flv'
+ streamer = self._GREAT_LIFE_STREAMER
+ play_path = 'stories/{0:}/{1:}'.format(
+ speaker_id, story_filename)
+ else:
+ mp4_url = '{0:}{1:}{2:}/{3:}.mp4'.format(
+ self._VIDEO_DOMAIN, ms_prefix, speaker_id, story_filename)
+ rtmp_ext = 'mp4'
+ streamer = self._USER_STREAMER
+ play_path = 'mp4:{0:}{1:}/{2}.mp4'.format(
+ ms_prefix, speaker_id, story_filename)
+
+ formats = [{
+ 'format_id': 'mp4_sd',
+ 'url': mp4_url,
+ }, {
+ 'format_id': 'rtmp_sd',
+ 'page_url': url,
+ 'url': streamer,
+ 'ext': rtmp_ext,
+ 'play_path': play_path,
+ }]
+
+ return {
+ 'id': story_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': thumbnail,
+ 'description': description,
+ 'duration': duration,
+ }
+
+
+class WebOfStoriesPlaylistIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?webofstories\.com/playAll/(?P<id>[^/]+)'
+ _TEST = {
+ 'url': 'http://www.webofstories.com/playAll/donald.knuth',
+ 'info_dict': {
+ 'id': 'donald.knuth',
+ 'title': 'Donald Knuth (Scientist)',
+ },
+ 'playlist_mincount': 97,
+ }
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ entries = [
+ self.url_result(
+ 'http://www.webofstories.com/play/%s' % video_id,
+ 'WebOfStories', video_id=video_id)
+ for video_id in orderedSet(re.findall(r'\bid=["\']td_(\d+)', webpage))
+ ]
+
+ title = self._search_regex(
+ r'<div id="speakerName">\s*<span>([^<]+)</span>',
+ webpage, 'speaker', default=None)
+ if title:
+ field = self._search_regex(
+ r'<span id="primaryField">([^<]+)</span>',
+ webpage, 'field', default=None)
+ if field:
+ title += ' (%s)' % field
+
+ if not title:
+ title = self._search_regex(
+ r'<title>Play\s+all\s+stories\s*-\s*([^<]+)\s*-\s*Web\s+of\s+Stories</title>',
+ webpage, 'title')
+
+ return self.playlist_result(entries, playlist_id, title)
diff --git a/yt_dlp/extractor/weibo.py b/yt_dlp/extractor/weibo.py
new file mode 100644
index 0000000..2fca745
--- /dev/null
+++ b/yt_dlp/extractor/weibo.py
@@ -0,0 +1,251 @@
+import json
+import random
+import itertools
+import urllib.parse
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ make_archive_id,
+ mimetype2ext,
+ parse_resolution,
+ str_or_none,
+ strip_jsonp,
+ traverse_obj,
+ url_or_none,
+ urlencode_postdata,
+ urljoin,
+)
+
+
+class WeiboBaseIE(InfoExtractor):
+ def _update_visitor_cookies(self, visitor_url, video_id):
+ headers = {'Referer': visitor_url}
+ chrome_ver = self._search_regex(
+ r'Chrome/(\d+)', self.get_param('http_headers')['User-Agent'], 'user agent version', default='90')
+ visitor_data = self._download_json(
+ 'https://passport.weibo.com/visitor/genvisitor', video_id,
+ note='Generating first-visit guest request',
+ headers=headers, transform_source=strip_jsonp,
+ data=urlencode_postdata({
+ 'cb': 'gen_callback',
+ 'fp': json.dumps({
+ 'os': '1',
+ 'browser': f'Chrome{chrome_ver},0,0,0',
+ 'fonts': 'undefined',
+ 'screenInfo': '1920*1080*24',
+ 'plugins': ''
+ }, separators=(',', ':'))}))['data']
+
+ self._download_webpage(
+ 'https://passport.weibo.com/visitor/visitor', video_id,
+ note='Running first-visit callback to get guest cookies',
+ headers=headers, query={
+ 'a': 'incarnate',
+ 't': visitor_data['tid'],
+ 'w': 3 if visitor_data.get('new_tid') else 2,
+ 'c': f'{visitor_data.get("confidence", 100):03d}',
+ 'gc': '',
+ 'cb': 'cross_domain',
+ 'from': 'weibo',
+ '_rand': random.random(),
+ })
+
+ def _weibo_download_json(self, url, video_id, *args, fatal=True, note='Downloading JSON metadata', **kwargs):
+ webpage, urlh = self._download_webpage_handle(url, video_id, *args, fatal=fatal, note=note, **kwargs)
+ if urllib.parse.urlparse(urlh.url).netloc == 'passport.weibo.com':
+ self._update_visitor_cookies(urlh.url, video_id)
+ webpage = self._download_webpage(url, video_id, *args, fatal=fatal, note=note, **kwargs)
+ return self._parse_json(webpage, video_id, fatal=fatal)
+
+ def _extract_formats(self, video_info):
+ media_info = traverse_obj(video_info, ('page_info', 'media_info'))
+ formats = traverse_obj(media_info, (
+ 'playback_list', lambda _, v: url_or_none(v['play_info']['url']), 'play_info', {
+ 'url': 'url',
+ 'format': ('quality_desc', {str}),
+ 'format_id': ('label', {str}),
+ 'ext': ('mime', {mimetype2ext}),
+ 'tbr': ('bitrate', {int_or_none}, {lambda x: x or None}),
+ 'vcodec': ('video_codecs', {str}),
+ 'fps': ('fps', {int_or_none}),
+ 'width': ('width', {int_or_none}),
+ 'height': ('height', {int_or_none}),
+ 'filesize': ('size', {int_or_none}),
+ 'acodec': ('audio_codecs', {str}),
+ 'asr': ('audio_sample_rate', {int_or_none}),
+ 'audio_channels': ('audio_channels', {int_or_none}),
+ }))
+ if not formats: # fallback, should be barely used
+ for url in set(traverse_obj(media_info, (..., {url_or_none}))):
+ if 'label=' in url: # filter out non-video urls
+ format_id, resolution = self._search_regex(
+ r'label=(\w+)&template=(\d+x\d+)', url, 'format info',
+ group=(1, 2), default=(None, None))
+ formats.append({
+ 'url': url,
+ 'format_id': format_id,
+ **parse_resolution(resolution),
+ **traverse_obj(media_info, (
+ 'video_details', lambda _, v: v['label'].startswith(format_id), {
+ 'size': ('size', {int_or_none}),
+ 'tbr': ('bitrate', {int_or_none}),
+ }
+ ), get_all=False),
+ })
+ return formats
+
+ def _parse_video_info(self, video_info, video_id=None):
+ return {
+ 'id': video_id,
+ 'extractor_key': WeiboIE.ie_key(),
+ 'extractor': WeiboIE.IE_NAME,
+ 'formats': self._extract_formats(video_info),
+ 'http_headers': {'Referer': 'https://weibo.com/'},
+ '_old_archive_ids': [make_archive_id('WeiboMobile', video_id)],
+ **traverse_obj(video_info, {
+ 'id': (('id', 'id_str', 'mid'), {str_or_none}),
+ 'display_id': ('mblogid', {str_or_none}),
+ 'title': ('page_info', 'media_info', ('video_title', 'kol_title', 'name'), {str}, {lambda x: x or None}),
+ 'description': ('text_raw', {str}),
+ 'duration': ('page_info', 'media_info', 'duration', {int_or_none}),
+ 'timestamp': ('page_info', 'media_info', 'video_publish_time', {int_or_none}),
+ 'thumbnail': ('page_info', 'page_pic', {url_or_none}),
+ 'uploader': ('user', 'screen_name', {str}),
+ 'uploader_id': ('user', ('id', 'id_str'), {str_or_none}),
+ 'uploader_url': ('user', 'profile_url', {lambda x: urljoin('https://weibo.com/', x)}),
+ 'view_count': ('page_info', 'media_info', 'online_users_number', {int_or_none}),
+ 'like_count': ('attitudes_count', {int_or_none}),
+ 'repost_count': ('reposts_count', {int_or_none}),
+ }, get_all=False),
+ 'tags': traverse_obj(video_info, ('topic_struct', ..., 'topic_title', {str})) or None,
+ }
+
+
+class WeiboIE(WeiboBaseIE):
+ _VALID_URL = r'https?://(?:m\.weibo\.cn/status|(?:www\.)?weibo\.com/\d+)/(?P<id>[a-zA-Z0-9]+)'
+ _TESTS = [{
+ 'url': 'https://weibo.com/7827771738/N4xlMvjhI',
+ 'info_dict': {
+ 'id': '4910815147462302',
+ 'ext': 'mp4',
+ 'display_id': 'N4xlMvjhI',
+ 'title': '【睡前消息暑假版第一期:拉泰国一把 对中国有好处】',
+ 'description': 'md5:e2637a7673980d68694ea7c43cf12a5f',
+ 'duration': 918,
+ 'timestamp': 1686312819,
+ 'upload_date': '20230609',
+ 'thumbnail': r're:https://.*\.jpg',
+ 'uploader': '睡前视频基地',
+ 'uploader_id': '7827771738',
+ 'uploader_url': 'https://weibo.com/u/7827771738',
+ 'view_count': int,
+ 'like_count': int,
+ 'repost_count': int,
+ 'tags': ['泰国大选远进党获胜', '睡前消息', '暑期版'],
+ },
+ }, {
+ 'url': 'https://m.weibo.cn/status/4189191225395228',
+ 'info_dict': {
+ 'id': '4189191225395228',
+ 'ext': 'mp4',
+ 'display_id': 'FBqgOmDxO',
+ 'title': '柴犬柴犬的秒拍视频',
+ 'description': 'md5:80f461ab5cdae6bbdb70efbf5a1db24f',
+ 'duration': 53,
+ 'timestamp': 1514264429,
+ 'upload_date': '20171226',
+ 'thumbnail': r're:https://.*\.jpg',
+ 'uploader': '柴犬柴犬',
+ 'uploader_id': '5926682210',
+ 'uploader_url': 'https://weibo.com/u/5926682210',
+ 'view_count': int,
+ 'like_count': int,
+ 'repost_count': int,
+ }
+ }, {
+ 'url': 'https://weibo.com/0/4224132150961381',
+ 'note': 'no playback_list example',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ return self._parse_video_info(self._weibo_download_json(
+ f'https://weibo.com/ajax/statuses/show?id={video_id}', video_id))
+
+
+class WeiboVideoIE(WeiboBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?weibo\.com/tv/show/(?P<id>\d+:\d+)'
+ _TESTS = [{
+ 'url': 'https://weibo.com/tv/show/1034:4797699866951785?from=old_pc_videoshow',
+ 'info_dict': {
+ 'id': '4797700463137878',
+ 'ext': 'mp4',
+ 'display_id': 'LEZDodaiW',
+ 'title': '呃,稍微了解了一下靡烟miya,感觉这东西也太二了',
+ 'description': '呃,稍微了解了一下靡烟miya,感觉这东西也太二了 http://t.cn/A6aerGsM ​​​',
+ 'duration': 76,
+ 'timestamp': 1659344278,
+ 'upload_date': '20220801',
+ 'thumbnail': r're:https://.*\.jpg',
+ 'uploader': '君子爱财陈平安',
+ 'uploader_id': '3905382233',
+ 'uploader_url': 'https://weibo.com/u/3905382233',
+ 'view_count': int,
+ 'like_count': int,
+ 'repost_count': int,
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ post_data = f'data={{"Component_Play_Playinfo":{{"oid":"{video_id}"}}}}'.encode()
+ video_info = self._weibo_download_json(
+ f'https://weibo.com/tv/api/component?page=%2Ftv%2Fshow%2F{video_id.replace(":", "%3A")}',
+ video_id, headers={'Referer': url}, data=post_data)['data']['Component_Play_Playinfo']
+ return self.url_result(f'https://weibo.com/0/{video_info["mid"]}', WeiboIE)
+
+
+class WeiboUserIE(WeiboBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?weibo\.com/u/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://weibo.com/u/2066652961?tabtype=video',
+ 'info_dict': {
+ 'id': '2066652961',
+ 'title': '萧影殿下的视频',
+ 'description': '萧影殿下的全部视频',
+ 'uploader': '萧影殿下',
+ },
+ 'playlist_mincount': 195,
+ }]
+
+ def _fetch_page(self, uid, cursor=0, page=1):
+ return self._weibo_download_json(
+ 'https://weibo.com/ajax/profile/getWaterFallContent',
+ uid, note=f'Downloading videos page {page}',
+ query={'uid': uid, 'cursor': cursor})['data']
+
+ def _entries(self, uid, first_page):
+ cursor = 0
+ for page in itertools.count(1):
+ response = first_page if page == 1 else self._fetch_page(uid, cursor, page)
+ for video_info in traverse_obj(response, ('list', ..., {dict})):
+ yield self._parse_video_info(video_info)
+ cursor = response.get('next_cursor')
+ if (int_or_none(cursor) or -1) < 0:
+ break
+
+ def _real_extract(self, url):
+ uid = self._match_id(url)
+ first_page = self._fetch_page(uid)
+ uploader = traverse_obj(first_page, ('list', ..., 'user', 'screen_name', {str}), get_all=False)
+ metainfo = {
+ 'title': f'{uploader}的视频',
+ 'description': f'{uploader}的全部视频',
+ 'uploader': uploader,
+ } if uploader else {}
+
+ return self.playlist_result(self._entries(uid, first_page), uid, **metainfo)
diff --git a/yt_dlp/extractor/weiqitv.py b/yt_dlp/extractor/weiqitv.py
new file mode 100644
index 0000000..89e4856
--- /dev/null
+++ b/yt_dlp/extractor/weiqitv.py
@@ -0,0 +1,50 @@
+from .common import InfoExtractor
+
+
+class WeiqiTVIE(InfoExtractor):
+ _WORKING = False
+ IE_DESC = 'WQTV'
+ _VALID_URL = r'https?://(?:www\.)?weiqitv\.com/index/video_play\?videoId=(?P<id>[A-Za-z0-9]+)'
+
+ _TESTS = [{
+ 'url': 'http://www.weiqitv.com/index/video_play?videoId=53c744f09874f0e76a8b46f3',
+ 'md5': '26450599afd64c513bc77030ad15db44',
+ 'info_dict': {
+ 'id': '53c744f09874f0e76a8b46f3',
+ 'ext': 'mp4',
+ 'title': '2013年度盘点',
+ },
+ }, {
+ 'url': 'http://www.weiqitv.com/index/video_play?videoId=567379a2d4c36cca518b4569',
+ 'info_dict': {
+ 'id': '567379a2d4c36cca518b4569',
+ 'ext': 'mp4',
+ 'title': '民国围棋史',
+ },
+ }, {
+ 'url': 'http://www.weiqitv.com/index/video_play?videoId=5430220a9874f088658b4567',
+ 'info_dict': {
+ 'id': '5430220a9874f088658b4567',
+ 'ext': 'mp4',
+ 'title': '二路托过的手段和运用',
+ },
+ }]
+
+ def _real_extract(self, url):
+ media_id = self._match_id(url)
+ page = self._download_webpage(url, media_id)
+
+ info_json_str = self._search_regex(
+ r'var\s+video\s*=\s*(.+});', page, 'info json str')
+ info_json = self._parse_json(info_json_str, media_id)
+
+ letvcloud_url = self._search_regex(
+ r'var\s+letvurl\s*=\s*"([^"]+)', page, 'letvcloud url')
+
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': 'LetvCloud',
+ 'url': letvcloud_url,
+ 'title': info_json['name'],
+ 'id': media_id,
+ }
diff --git a/yt_dlp/extractor/weverse.py b/yt_dlp/extractor/weverse.py
new file mode 100644
index 0000000..c94ca9d
--- /dev/null
+++ b/yt_dlp/extractor/weverse.py
@@ -0,0 +1,608 @@
+import base64
+import hashlib
+import hmac
+import itertools
+import json
+import re
+import time
+import urllib.parse
+import uuid
+
+from .common import InfoExtractor
+from .naver import NaverBaseIE
+from .youtube import YoutubeIE
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ ExtractorError,
+ UserNotLive,
+ float_or_none,
+ int_or_none,
+ str_or_none,
+ traverse_obj,
+ try_call,
+ update_url_query,
+ url_or_none,
+)
+
+
+class WeverseBaseIE(InfoExtractor):
+ _NETRC_MACHINE = 'weverse'
+ _ACCOUNT_API_BASE = 'https://accountapi.weverse.io/web/api/v2'
+ _API_HEADERS = {
+ 'Referer': 'https://weverse.io/',
+ 'WEV-device-Id': str(uuid.uuid4()),
+ }
+
+ def _perform_login(self, username, password):
+ if self._API_HEADERS.get('Authorization'):
+ return
+
+ headers = {
+ 'x-acc-app-secret': '5419526f1c624b38b10787e5c10b2a7a',
+ 'x-acc-app-version': '2.2.6',
+ 'x-acc-language': 'en',
+ 'x-acc-service-id': 'weverse',
+ 'x-acc-trace-id': str(uuid.uuid4()),
+ 'x-clog-user-device-id': str(uuid.uuid4()),
+ }
+ valid_username = traverse_obj(self._download_json(
+ f'{self._ACCOUNT_API_BASE}/signup/email/status', None, note='Checking username',
+ query={'email': username}, headers=headers, expected_status=(400, 404)), 'hasPassword')
+ if not valid_username:
+ raise ExtractorError('Invalid username provided', expected=True)
+
+ headers['content-type'] = 'application/json'
+ try:
+ auth = self._download_json(
+ f'{self._ACCOUNT_API_BASE}/auth/token/by-credentials', None, data=json.dumps({
+ 'email': username,
+ 'password': password,
+ }, separators=(',', ':')).encode(), headers=headers, note='Logging in')
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 401:
+ raise ExtractorError('Invalid password provided', expected=True)
+ raise
+
+ WeverseBaseIE._API_HEADERS['Authorization'] = f'Bearer {auth["accessToken"]}'
+
+ def _real_initialize(self):
+ if self._API_HEADERS.get('Authorization'):
+ return
+
+ token = try_call(lambda: self._get_cookies('https://weverse.io/')['we2_access_token'].value)
+ if token:
+ WeverseBaseIE._API_HEADERS['Authorization'] = f'Bearer {token}'
+
+ def _call_api(self, ep, video_id, data=None, note='Downloading API JSON'):
+ # Ref: https://ssl.pstatic.net/static/wevweb/2_3_2_11101725/public/static/js/2488.a09b41ff.chunk.js
+ # From https://ssl.pstatic.net/static/wevweb/2_3_2_11101725/public/static/js/main.e206f7c1.js:
+ key = b'1b9cb6378d959b45714bec49971ade22e6e24e42'
+ api_path = update_url_query(ep, {
+ 'appId': 'be4d79eb8fc7bd008ee82c8ec4ff6fd4',
+ 'language': 'en',
+ 'platform': 'WEB',
+ 'wpf': 'pc',
+ })
+ wmsgpad = int(time.time() * 1000)
+ wmd = base64.b64encode(hmac.HMAC(
+ key, f'{api_path[:255]}{wmsgpad}'.encode(), digestmod=hashlib.sha1).digest()).decode()
+ headers = {'Content-Type': 'application/json'} if data else {}
+ try:
+ return self._download_json(
+ f'https://global.apis.naver.com/weverse/wevweb{api_path}', video_id, note=note,
+ data=data, headers={**self._API_HEADERS, **headers}, query={
+ 'wmsgpad': wmsgpad,
+ 'wmd': wmd,
+ })
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 401:
+ self.raise_login_required(
+ 'Session token has expired. Log in again or refresh cookies in browser')
+ elif isinstance(e.cause, HTTPError) and e.cause.status == 403:
+ if 'Authorization' in self._API_HEADERS:
+ raise ExtractorError('Your account does not have access to this content', expected=True)
+ self.raise_login_required()
+ raise
+
+ def _call_post_api(self, video_id):
+ path = '' if 'Authorization' in self._API_HEADERS else '/preview'
+ return self._call_api(f'/post/v1.0/post-{video_id}{path}?fieldSet=postV1', video_id)
+
+ def _get_community_id(self, channel):
+ return str(self._call_api(
+ f'/community/v1.0/communityIdUrlPathByUrlPathArtistCode?keyword={channel}',
+ channel, note='Fetching community ID')['communityId'])
+
+ def _get_formats(self, data, video_id):
+ formats = traverse_obj(data, ('videos', 'list', lambda _, v: url_or_none(v['source']), {
+ 'url': 'source',
+ 'width': ('encodingOption', 'width', {int_or_none}),
+ 'height': ('encodingOption', 'height', {int_or_none}),
+ 'vcodec': 'type',
+ 'vbr': ('bitrate', 'video', {int_or_none}),
+ 'abr': ('bitrate', 'audio', {int_or_none}),
+ 'filesize': ('size', {int_or_none}),
+ 'format_id': ('encodingOption', 'id', {str_or_none}),
+ }))
+
+ for stream in traverse_obj(data, ('streams', lambda _, v: v['type'] == 'HLS' and url_or_none(v['source']))):
+ query = {}
+ for param in traverse_obj(stream, ('keys', lambda _, v: v['type'] == 'param' and v['name'])):
+ query[param['name']] = param.get('value', '')
+ fmts = self._extract_m3u8_formats(
+ stream['source'], video_id, 'mp4', m3u8_id='hls', fatal=False, query=query)
+ if query:
+ for fmt in fmts:
+ fmt['url'] = update_url_query(fmt['url'], query)
+ fmt['extra_param_to_segment_url'] = urllib.parse.urlencode(query)
+ formats.extend(fmts)
+
+ return formats
+
+ def _get_subs(self, caption_url):
+ subs_ext_re = r'\.(?:ttml|vtt)'
+ replace_ext = lambda x, y: re.sub(subs_ext_re, y, x)
+ if re.search(subs_ext_re, caption_url):
+ return [replace_ext(caption_url, '.ttml'), replace_ext(caption_url, '.vtt')]
+ return [caption_url]
+
+ def _parse_post_meta(self, metadata):
+ return traverse_obj(metadata, {
+ 'title': ((('extension', 'mediaInfo', 'title'), 'title'), {str}),
+ 'description': ((('extension', 'mediaInfo', 'body'), 'body'), {str}),
+ 'uploader': ('author', 'profileName', {str}),
+ 'uploader_id': ('author', 'memberId', {str}),
+ 'creator': ('community', 'communityName', {str}),
+ 'channel_id': (('community', 'author'), 'communityId', {str_or_none}),
+ 'duration': ('extension', 'video', 'playTime', {float_or_none}),
+ 'timestamp': ('publishedAt', {lambda x: int_or_none(x, 1000)}),
+ 'release_timestamp': ('extension', 'video', 'onAirStartAt', {lambda x: int_or_none(x, 1000)}),
+ 'thumbnail': ('extension', (('mediaInfo', 'thumbnail', 'url'), ('video', 'thumb')), {url_or_none}),
+ 'view_count': ('extension', 'video', 'playCount', {int_or_none}),
+ 'like_count': ('extension', 'video', 'likeCount', {int_or_none}),
+ 'comment_count': ('commentCount', {int_or_none}),
+ }, get_all=False)
+
+ def _extract_availability(self, data):
+ return self._availability(**traverse_obj(data, ((('extension', 'video'), None), {
+ 'needs_premium': 'paid',
+ 'needs_subscription': 'membershipOnly',
+ }), get_all=False, expected_type=bool), needs_auth=True)
+
+ def _extract_live_status(self, data):
+ data = traverse_obj(data, ('extension', 'video', {dict})) or {}
+ if data.get('type') == 'LIVE':
+ return traverse_obj({
+ 'ONAIR': 'is_live',
+ 'DONE': 'post_live',
+ 'STANDBY': 'is_upcoming',
+ 'DELAY': 'is_upcoming',
+ }, (data.get('status'), {str})) or 'is_live'
+ return 'was_live' if data.get('liveToVod') else 'not_live'
+
+
+class WeverseIE(WeverseBaseIE):
+ _VALID_URL = r'https?://(?:www\.|m\.)?weverse\.io/(?P<artist>[^/?#]+)/live/(?P<id>[\d-]+)'
+ _TESTS = [{
+ 'url': 'https://weverse.io/billlie/live/0-107323480',
+ 'md5': '1fa849f00181eef9100d3c8254c47979',
+ 'info_dict': {
+ 'id': '0-107323480',
+ 'ext': 'mp4',
+ 'title': '행복한 평이루💜',
+ 'description': '',
+ 'uploader': 'Billlie',
+ 'uploader_id': '5ae14aed7b7cdc65fa87c41fe06cc936',
+ 'channel': 'billlie',
+ 'channel_id': '72',
+ 'channel_url': 'https://weverse.io/billlie',
+ 'creator': 'Billlie',
+ 'timestamp': 1666262062,
+ 'upload_date': '20221020',
+ 'release_timestamp': 1666262058,
+ 'release_date': '20221020',
+ 'duration': 3102,
+ 'thumbnail': r're:^https?://.*\.jpe?g$',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'availability': 'needs_auth',
+ 'live_status': 'was_live',
+ },
+ }, {
+ 'url': 'https://weverse.io/lesserafim/live/2-102331763',
+ 'md5': 'e46125c08b13a6c8c1f4565035cca987',
+ 'info_dict': {
+ 'id': '2-102331763',
+ 'ext': 'mp4',
+ 'title': '🎂김채원 생신🎂',
+ 'description': '🎂김채원 생신🎂',
+ 'uploader': 'LE SSERAFIM ',
+ 'uploader_id': 'd26ddc1e258488a0a2b795218d14d59d',
+ 'channel': 'lesserafim',
+ 'channel_id': '47',
+ 'channel_url': 'https://weverse.io/lesserafim',
+ 'creator': 'LE SSERAFIM',
+ 'timestamp': 1659353400,
+ 'upload_date': '20220801',
+ 'release_timestamp': 1659353400,
+ 'release_date': '20220801',
+ 'duration': 3006,
+ 'thumbnail': r're:^https?://.*\.jpe?g$',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'availability': 'needs_auth',
+ 'live_status': 'was_live',
+ 'subtitles': {
+ 'id_ID': 'count:2',
+ 'en_US': 'count:2',
+ 'es_ES': 'count:2',
+ 'vi_VN': 'count:2',
+ 'th_TH': 'count:2',
+ 'zh_CN': 'count:2',
+ 'zh_TW': 'count:2',
+ 'ja_JP': 'count:2',
+ 'ko_KR': 'count:2',
+ },
+ },
+ }, {
+ 'url': 'https://weverse.io/treasure/live/2-117230416',
+ 'info_dict': {
+ 'id': '2-117230416',
+ 'ext': 'mp4',
+ 'title': r're:스껄도려님 첫 스무살 생파🦋',
+ 'description': '',
+ 'uploader': 'TREASURE',
+ 'uploader_id': '77eabbc449ca37f7970054a136f60082',
+ 'channel': 'treasure',
+ 'channel_id': '20',
+ 'channel_url': 'https://weverse.io/treasure',
+ 'creator': 'TREASURE',
+ 'timestamp': 1680667651,
+ 'upload_date': '20230405',
+ 'release_timestamp': 1680667639,
+ 'release_date': '20230405',
+ 'thumbnail': r're:^https?://.*\.jpe?g$',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'availability': 'needs_auth',
+ 'live_status': 'is_live',
+ },
+ 'skip': 'Livestream has ended',
+ }]
+
+ def _real_extract(self, url):
+ channel, video_id = self._match_valid_url(url).group('artist', 'id')
+ post = self._call_post_api(video_id)
+ api_video_id = post['extension']['video']['videoId']
+ availability = self._extract_availability(post)
+ live_status = self._extract_live_status(post)
+ video_info, formats = {}, []
+
+ if live_status == 'is_upcoming':
+ self.raise_no_formats('Livestream has not yet started', expected=True)
+
+ elif live_status == 'is_live':
+ video_info = self._call_api(
+ f'/video/v1.0/lives/{api_video_id}/playInfo?preview.format=json&preview.version=v2',
+ video_id, note='Downloading live JSON')
+ playback = self._parse_json(video_info['lipPlayback'], video_id)
+ m3u8_url = traverse_obj(playback, (
+ 'media', lambda _, v: v['protocol'] == 'HLS', 'path', {url_or_none}), get_all=False)
+ formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls', live=True)
+
+ elif live_status == 'post_live':
+ if availability in ('premium_only', 'subscriber_only'):
+ self.report_drm(video_id)
+ self.raise_no_formats(
+ 'Livestream has ended and downloadable VOD is not available', expected=True)
+
+ else:
+ infra_video_id = post['extension']['video']['infraVideoId']
+ in_key = self._call_api(
+ f'/video/v1.0/vod/{api_video_id}/inKey?preview=false', video_id,
+ data=b'{}', note='Downloading VOD API key')['inKey']
+
+ video_info = self._download_json(
+ f'https://global.apis.naver.com/rmcnmv/rmcnmv/vod/play/v2.0/{infra_video_id}',
+ video_id, note='Downloading VOD JSON', query={
+ 'key': in_key,
+ 'sid': traverse_obj(post, ('extension', 'video', 'serviceId')) or '2070',
+ 'pid': str(uuid.uuid4()),
+ 'nonce': int(time.time() * 1000),
+ 'devt': 'html5_pc',
+ 'prv': 'Y' if post.get('membershipOnly') else 'N',
+ 'aup': 'N',
+ 'stpb': 'N',
+ 'cpl': 'en',
+ 'env': 'prod',
+ 'lc': 'en',
+ 'adi': '[{"adSystem":"null"}]',
+ 'adu': '/',
+ })
+
+ formats = self._get_formats(video_info, video_id)
+ has_drm = traverse_obj(video_info, ('meta', 'provider', 'name', {str.lower})) == 'drm'
+ if has_drm and formats:
+ self.report_warning(
+ 'Requested content is DRM-protected, only a 30-second preview is available', video_id)
+ elif has_drm and not formats:
+ self.report_drm(video_id)
+
+ return {
+ 'id': video_id,
+ 'channel': channel,
+ 'channel_url': f'https://weverse.io/{channel}',
+ 'formats': formats,
+ 'availability': availability,
+ 'live_status': live_status,
+ **self._parse_post_meta(post),
+ **NaverBaseIE.process_subtitles(video_info, self._get_subs),
+ }
+
+
+class WeverseMediaIE(WeverseBaseIE):
+ _VALID_URL = r'https?://(?:www\.|m\.)?weverse\.io/(?P<artist>[^/?#]+)/media/(?P<id>[\d-]+)'
+ _TESTS = [{
+ 'url': 'https://weverse.io/billlie/media/4-116372884',
+ 'md5': '8efc9cfd61b2f25209eb1a5326314d28',
+ 'info_dict': {
+ 'id': 'e-C9wLSQs6o',
+ 'ext': 'mp4',
+ 'title': 'Billlie | \'EUNOIA\' Performance Video (heartbeat ver.)',
+ 'description': 'md5:6181caaf2a2397bca913ffe368c104e5',
+ 'channel': 'Billlie',
+ 'channel_id': 'UCyc9sUCxELTDK9vELO5Fzeg',
+ 'channel_url': 'https://www.youtube.com/channel/UCyc9sUCxELTDK9vELO5Fzeg',
+ 'uploader': 'Billlie',
+ 'uploader_id': '@Billlie',
+ 'uploader_url': 'http://www.youtube.com/@Billlie',
+ 'upload_date': '20230403',
+ 'duration': 211,
+ 'age_limit': 0,
+ 'playable_in_embed': True,
+ 'live_status': 'not_live',
+ 'availability': 'public',
+ 'view_count': int,
+ 'comment_count': int,
+ 'like_count': int,
+ 'channel_follower_count': int,
+ 'thumbnail': 'https://i.ytimg.com/vi/e-C9wLSQs6o/maxresdefault.jpg',
+ 'categories': ['Entertainment'],
+ 'tags': 'count:7',
+ },
+ }, {
+ 'url': 'https://weverse.io/billlie/media/3-102914520',
+ 'md5': '031551fcbd716bc4f080cb6174a43d8a',
+ 'info_dict': {
+ 'id': '3-102914520',
+ 'ext': 'mp4',
+ 'title': 'From. SUHYEON🌸',
+ 'description': 'Billlie 멤버별 독점 영상 공개💙💜',
+ 'uploader': 'Billlie_official',
+ 'uploader_id': 'f569c6e92f7eaffef0a395037dcaa54f',
+ 'channel': 'billlie',
+ 'channel_id': '72',
+ 'channel_url': 'https://weverse.io/billlie',
+ 'creator': 'Billlie',
+ 'timestamp': 1662174000,
+ 'upload_date': '20220903',
+ 'release_timestamp': 1662174000,
+ 'release_date': '20220903',
+ 'duration': 17.0,
+ 'thumbnail': r're:^https?://.*\.jpe?g$',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'availability': 'needs_auth',
+ 'live_status': 'not_live',
+ },
+ }]
+
+ def _real_extract(self, url):
+ channel, video_id = self._match_valid_url(url).group('artist', 'id')
+ post = self._call_post_api(video_id)
+ media_type = traverse_obj(post, ('extension', 'mediaInfo', 'mediaType', {str.lower}))
+ youtube_id = traverse_obj(post, ('extension', 'youtube', 'youtubeVideoId', {str}))
+
+ if media_type == 'vod':
+ return self.url_result(f'https://weverse.io/{channel}/live/{video_id}', WeverseIE)
+ elif media_type == 'youtube' and youtube_id:
+ return self.url_result(youtube_id, YoutubeIE)
+ elif media_type == 'image':
+ self.raise_no_formats('No video content found in webpage', expected=True)
+ elif media_type:
+ raise ExtractorError(f'Unsupported media type "{media_type}"')
+
+ self.raise_no_formats('No video content found in webpage')
+
+
+class WeverseMomentIE(WeverseBaseIE):
+ _VALID_URL = r'https?://(?:www\.|m\.)?weverse\.io/(?P<artist>[^/?#]+)/moment/(?P<uid>[\da-f]+)/post/(?P<id>[\d-]+)'
+ _TESTS = [{
+ 'url': 'https://weverse.io/secretnumber/moment/66a07e164b56a696ee71c99315ffe27b/post/1-117229444',
+ 'md5': '87733ac19a54081b7dfc2442036d282b',
+ 'info_dict': {
+ 'id': '1-117229444',
+ 'ext': 'mp4',
+ 'title': '今日もめっちゃいい天気☀️🌤️',
+ 'uploader': '레아',
+ 'uploader_id': '66a07e164b56a696ee71c99315ffe27b',
+ 'channel': 'secretnumber',
+ 'channel_id': '56',
+ 'creator': 'SECRET NUMBER',
+ 'duration': 10,
+ 'upload_date': '20230405',
+ 'timestamp': 1680653968,
+ 'thumbnail': r're:^https?://.*\.jpe?g$',
+ 'like_count': int,
+ 'comment_count': int,
+ 'availability': 'needs_auth',
+ },
+ 'skip': 'Moment has expired',
+ }]
+
+ def _real_extract(self, url):
+ channel, uploader_id, video_id = self._match_valid_url(url).group('artist', 'uid', 'id')
+ post = self._call_post_api(video_id)
+ api_video_id = post['extension']['moment']['video']['videoId']
+ video_info = self._call_api(
+ f'/cvideo/v1.0/cvideo-{api_video_id}/playInfo?videoId={api_video_id}', video_id,
+ note='Downloading moment JSON')['playInfo']
+
+ return {
+ 'id': video_id,
+ 'channel': channel,
+ 'uploader_id': uploader_id,
+ 'formats': self._get_formats(video_info, video_id),
+ 'availability': self._extract_availability(post),
+ **traverse_obj(post, {
+ 'title': ((('extension', 'moment', 'body'), 'body'), {str}),
+ 'uploader': ('author', 'profileName', {str}),
+ 'creator': (('community', 'author'), 'communityName', {str}),
+ 'channel_id': (('community', 'author'), 'communityId', {str_or_none}),
+ 'duration': ('extension', 'moment', 'video', 'uploadInfo', 'playTime', {float_or_none}),
+ 'timestamp': ('publishedAt', {lambda x: int_or_none(x, 1000)}),
+ 'thumbnail': ('extension', 'moment', 'video', 'uploadInfo', 'imageUrl', {url_or_none}),
+ 'like_count': ('emotionCount', {int_or_none}),
+ 'comment_count': ('commentCount', {int_or_none}),
+ }, get_all=False),
+ **NaverBaseIE.process_subtitles(video_info, self._get_subs),
+ }
+
+
+class WeverseTabBaseIE(WeverseBaseIE):
+ _ENDPOINT = None
+ _PATH = None
+ _QUERY = {}
+ _RESULT_IE = None
+
+ def _entries(self, channel_id, channel, first_page):
+ query = self._QUERY.copy()
+
+ for page in itertools.count(1):
+ posts = first_page if page == 1 else self._call_api(
+ update_url_query(self._ENDPOINT % channel_id, query), channel,
+ note=f'Downloading {self._PATH} tab page {page}')
+
+ for post in traverse_obj(posts, ('data', lambda _, v: v['postId'])):
+ yield self.url_result(
+ f'https://weverse.io/{channel}/{self._PATH}/{post["postId"]}',
+ self._RESULT_IE, post['postId'], **self._parse_post_meta(post),
+ channel=channel, channel_url=f'https://weverse.io/{channel}',
+ availability=self._extract_availability(post),
+ live_status=self._extract_live_status(post))
+
+ query['after'] = traverse_obj(posts, ('paging', 'nextParams', 'after', {str}))
+ if not query['after']:
+ break
+
+ def _real_extract(self, url):
+ channel = self._match_id(url)
+ channel_id = self._get_community_id(channel)
+
+ first_page = self._call_api(
+ update_url_query(self._ENDPOINT % channel_id, self._QUERY), channel,
+ note=f'Downloading {self._PATH} tab page 1')
+
+ return self.playlist_result(
+ self._entries(channel_id, channel, first_page), f'{channel}-{self._PATH}',
+ **traverse_obj(first_page, ('data', ..., {
+ 'playlist_title': ('community', 'communityName', {str}),
+ 'thumbnail': ('author', 'profileImageUrl', {url_or_none}),
+ }), get_all=False))
+
+
+class WeverseLiveTabIE(WeverseTabBaseIE):
+ _VALID_URL = r'https?://(?:www\.|m\.)?weverse\.io/(?P<id>[^/?#]+)/live/?(?:[?#]|$)'
+ _TESTS = [{
+ 'url': 'https://weverse.io/billlie/live/',
+ 'playlist_mincount': 55,
+ 'info_dict': {
+ 'id': 'billlie-live',
+ 'title': 'Billlie',
+ 'thumbnail': r're:^https?://.*\.jpe?g$',
+ },
+ }]
+
+ _ENDPOINT = '/post/v1.0/community-%s/liveTabPosts'
+ _PATH = 'live'
+ _QUERY = {'fieldSet': 'postsV1'}
+ _RESULT_IE = WeverseIE
+
+
+class WeverseMediaTabIE(WeverseTabBaseIE):
+ _VALID_URL = r'https?://(?:www\.|m\.)?weverse\.io/(?P<id>[^/?#]+)/media(?:/|/all|/new)?(?:[?#]|$)'
+ _TESTS = [{
+ 'url': 'https://weverse.io/billlie/media/',
+ 'playlist_mincount': 231,
+ 'info_dict': {
+ 'id': 'billlie-media',
+ 'title': 'Billlie',
+ 'thumbnail': r're:^https?://.*\.jpe?g$',
+ },
+ }, {
+ 'url': 'https://weverse.io/lesserafim/media/all',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://weverse.io/lesserafim/media/new',
+ 'only_matching': True,
+ }]
+
+ _ENDPOINT = '/media/v1.0/community-%s/more'
+ _PATH = 'media'
+ _QUERY = {'fieldSet': 'postsV1', 'filterType': 'RECENT'}
+ _RESULT_IE = WeverseMediaIE
+
+
+class WeverseLiveIE(WeverseBaseIE):
+ _VALID_URL = r'https?://(?:www\.|m\.)?weverse\.io/(?P<id>[^/?#]+)/?(?:[?#]|$)'
+ _TESTS = [{
+ 'url': 'https://weverse.io/purplekiss',
+ 'info_dict': {
+ 'id': '3-116560493',
+ 'ext': 'mp4',
+ 'title': r're:모하냥🫶🏻',
+ 'description': '내일은 금요일~><',
+ 'uploader': '채인',
+ 'uploader_id': '1ffb1d9d904d6b3db2783f876eb9229d',
+ 'channel': 'purplekiss',
+ 'channel_id': '35',
+ 'channel_url': 'https://weverse.io/purplekiss',
+ 'creator': 'PURPLE KISS',
+ 'timestamp': 1680780892,
+ 'upload_date': '20230406',
+ 'release_timestamp': 1680780883,
+ 'release_date': '20230406',
+ 'thumbnail': 'https://weverse-live.pstatic.net/v1.0/live/62044/thumb',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'availability': 'needs_auth',
+ 'live_status': 'is_live',
+ },
+ 'skip': 'Livestream has ended',
+ }, {
+ 'url': 'https://weverse.io/billlie/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ channel = self._match_id(url)
+ channel_id = self._get_community_id(channel)
+
+ video_id = traverse_obj(
+ self._call_api(update_url_query(f'/post/v1.0/community-{channel_id}/liveTab', {
+ 'debugMessage': 'true',
+ 'fields': 'onAirLivePosts.fieldSet(postsV1).limit(10),reservedLivePosts.fieldSet(postsV1).limit(10)',
+ }), channel, note='Downloading live JSON'), (
+ ('onAirLivePosts', 'reservedLivePosts'), 'data',
+ lambda _, v: self._extract_live_status(v) in ('is_live', 'is_upcoming'), 'postId', {str}),
+ get_all=False)
+
+ if not video_id:
+ raise UserNotLive(video_id=channel)
+
+ return self.url_result(f'https://weverse.io/{channel}/live/{video_id}', WeverseIE)
diff --git a/yt_dlp/extractor/wevidi.py b/yt_dlp/extractor/wevidi.py
new file mode 100644
index 0000000..3b6d032
--- /dev/null
+++ b/yt_dlp/extractor/wevidi.py
@@ -0,0 +1,108 @@
+from .common import InfoExtractor
+from ..utils import clean_html, float_or_none, get_element_by_class, js_to_json, traverse_obj
+
+
+class WeVidiIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?wevidi\.net/watch/(?P<id>[\w-]{11})'
+ _TESTS = [{
+ 'url': 'https://wevidi.net/watch/2th7UO5F4KV',
+ 'md5': 'b913d1ff5bbad499e2c7ef4aa6d829d7',
+ 'info_dict': {
+ 'id': '2th7UO5F4KV',
+ 'ext': 'mp4',
+ 'title': 'YouTube Alternative: WeVidi - customizable channels & more',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'md5:73a27d0a87d49fbcc5584566326ebeed',
+ 'uploader': 'eclecRC',
+ 'duration': 932.098,
+ }
+ }, {
+ 'url': 'https://wevidi.net/watch/ievRuuQHbPS',
+ 'md5': 'ce8a94989a959bff9003fa27ee572935',
+ 'info_dict': {
+ 'id': 'ievRuuQHbPS',
+ 'ext': 'mp4',
+ 'title': 'WeVidi Playlists',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'md5:32cdfca272687390d9bd9b0c9c6153ee',
+ 'uploader': 'WeVidi',
+ 'duration': 36.1999,
+ }
+ }, {
+ 'url': 'https://wevidi.net/watch/PcMzDWaQSWb',
+ 'md5': '55ee0d3434be5d9e5cc76b83f2bb57ec',
+ 'info_dict': {
+ 'id': 'PcMzDWaQSWb',
+ 'ext': 'mp4',
+ 'title': 'Cat blep',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'md5:e2c9e2b54b8bb424cc64937c8fdc068f',
+ 'uploader': 'WeVidi',
+ 'duration': 41.972,
+ }
+ }, {
+ 'url': 'https://wevidi.net/watch/wJnRqDHNe_u',
+ 'md5': 'c8f263dd47e66cc17546b3abf47b5a77',
+ 'info_dict': {
+ 'id': 'wJnRqDHNe_u',
+ 'ext': 'mp4',
+ 'title': 'Gissy Talks: YouTube Alternatives',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'md5:e65036f0d4af80e0af191bd11af5195e',
+ 'uploader': 'GissyEva',
+ 'duration': 630.451,
+ }
+ }, {
+ 'url': 'https://wevidi.net/watch/4m1c4yJR_yc',
+ 'md5': 'c63ce5ca6990dce86855fc02ca5bc1ed',
+ 'info_dict': {
+ 'id': '4m1c4yJR_yc',
+ 'ext': 'mp4',
+ 'title': 'Enough of that! - Awesome Exilez Podcast',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'md5:96af99dd63468b2dfab3020560e3e9b2',
+ 'uploader': 'eclecRC',
+ 'duration': 6.804,
+ }
+ }]
+
+ def _extract_formats(self, wvplayer_props):
+ # Taken from WeVidi player JS: https://wevidi.net/layouts/default/static/player.min.js
+ resolution_map = {
+ 1: 144,
+ 2: 240,
+ 3: 360,
+ 4: 480,
+ 5: 720,
+ 6: 1080
+ }
+
+ src_path = f'{wvplayer_props["srcVID"]}/{wvplayer_props["srcUID"]}/{wvplayer_props["srcNAME"]}'
+ for res in traverse_obj(wvplayer_props, ('resolutions', ..., {int}, {lambda x: x or None})):
+ format_id = str(-(res // -2) - 1)
+ yield {
+ 'acodec': 'mp4a.40.2',
+ 'ext': 'mp4',
+ 'format_id': format_id,
+ 'height': resolution_map.get(res),
+ 'url': f'https://www.wevidi.net/videoplayback/{src_path}/{format_id}',
+ 'vcodec': 'avc1.42E01E',
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ wvplayer_props = self._search_json(
+ r'WVPlayer\(', webpage, 'player', video_id,
+ transform_source=lambda x: js_to_json(x.replace('||', '}')))
+
+ return {
+ 'id': video_id,
+ 'title': clean_html(get_element_by_class('video_title', webpage)),
+ 'description': clean_html(get_element_by_class('descr_long', webpage)),
+ 'uploader': clean_html(get_element_by_class('username', webpage)),
+ 'formats': list(self._extract_formats(wvplayer_props)),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'duration': float_or_none(wvplayer_props.get('duration')),
+ }
diff --git a/yt_dlp/extractor/weyyak.py b/yt_dlp/extractor/weyyak.py
new file mode 100644
index 0000000..ef12be8
--- /dev/null
+++ b/yt_dlp/extractor/weyyak.py
@@ -0,0 +1,86 @@
+from .common import InfoExtractor
+from ..utils import (
+ float_or_none,
+ int_or_none,
+ parse_age_limit,
+ traverse_obj,
+ unified_timestamp,
+ url_or_none,
+)
+
+
+class WeyyakIE(InfoExtractor):
+ _VALID_URL = r'https?://weyyak\.com/(?P<lang>\w+)/(?:player/)?(?P<type>episode|movie)/(?P<id>\d+)'
+ _TESTS = [
+ {
+ 'url': 'https://weyyak.com/en/player/episode/1341952/Ribat-Al-Hob-Episode49',
+ 'md5': '0caf55c1a615531c8fe60f146ae46849',
+ 'info_dict': {
+ 'id': '1341952',
+ 'ext': 'mp4',
+ 'title': 'Ribat Al Hob',
+ 'duration': 2771,
+ 'alt_title': 'رباط الحب',
+ 'season': 'Season 1',
+ 'season_number': 1,
+ 'episode': 'Episode 49',
+ 'episode_number': 49,
+ 'timestamp': 1485907200,
+ 'upload_date': '20170201',
+ 'thumbnail': r're:^https://content\.weyyak\.com/.+/poster-image',
+ 'categories': ['Drama', 'Thrillers', 'Romance'],
+ 'tags': 'count:8',
+ },
+ },
+ {
+ 'url': 'https://weyyak.com/en/movie/233255/8-Seconds',
+ 'md5': 'fe740ae0f63e4d1c8a7fc147a410c564',
+ 'info_dict': {
+ 'id': '233255',
+ 'ext': 'mp4',
+ 'title': '8 Seconds',
+ 'duration': 6490,
+ 'alt_title': '8 ثواني',
+ 'description': 'md5:45b83a155c30b49950624c7e99600b9d',
+ 'age_limit': 15,
+ 'release_year': 2015,
+ 'timestamp': 1683106031,
+ 'upload_date': '20230503',
+ 'thumbnail': r're:^https://content\.weyyak\.com/.+/poster-image',
+ 'categories': ['Drama', 'Social'],
+ 'cast': ['Ceylin Adiyaman', 'Esra Inal'],
+ },
+ },
+ ]
+
+ def _real_extract(self, url):
+ video_id, lang, type_ = self._match_valid_url(url).group('id', 'lang', 'type')
+
+ path = 'episode/' if type_ == 'episode' else 'contents/moviedetails?contentkey='
+ data = self._download_json(
+ f'https://msapifo-prod-me.weyyak.z5.com/v1/{lang}/{path}{video_id}', video_id)['data']
+ m3u8_url = self._download_json(
+ f'https://api-weyyak.akamaized.net/get_info/{data["video_id"]}',
+ video_id, 'Extracting video details')['url_video']
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id)
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ **traverse_obj(data, {
+ 'title': ('title', {str}),
+ 'alt_title': ('translated_title', {str}),
+ 'description': ('synopsis', {str}),
+ 'duration': ('length', {float_or_none}),
+ 'age_limit': ('age_rating', {parse_age_limit}),
+ 'season_number': ('season_number', {int_or_none}),
+ 'episode_number': ('episode_number', {int_or_none}),
+ 'thumbnail': ('imagery', 'thumbnail', {url_or_none}),
+ 'categories': ('genres', ..., {str}),
+ 'tags': ('tags', ..., {str}),
+ 'cast': (('main_actor', 'main_actress'), {str}),
+ 'timestamp': ('insertedAt', {unified_timestamp}),
+ 'release_year': ('production_year', {int_or_none}),
+ }),
+ }
diff --git a/yt_dlp/extractor/whowatch.py b/yt_dlp/extractor/whowatch.py
new file mode 100644
index 0000000..f2808cd
--- /dev/null
+++ b/yt_dlp/extractor/whowatch.py
@@ -0,0 +1,96 @@
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ qualities,
+ try_call,
+ try_get,
+ ExtractorError,
+)
+from ..compat import compat_str
+
+
+class WhoWatchIE(InfoExtractor):
+ IE_NAME = 'whowatch'
+ _VALID_URL = r'https?://whowatch\.tv/viewer/(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'https://whowatch.tv/viewer/21450171',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ self._download_webpage(url, video_id)
+ metadata = self._download_json('https://api.whowatch.tv/lives/%s' % video_id, video_id)
+ live_data = self._download_json('https://api.whowatch.tv/lives/%s/play' % video_id, video_id)
+
+ title = try_call(
+ lambda: live_data['share_info']['live_title'][1:-1],
+ lambda: metadata['live']['title'],
+ expected_type=str)
+
+ hls_url = live_data.get('hls_url')
+ if not hls_url:
+ raise ExtractorError(live_data.get('error_message') or 'The user is offline.', expected=True)
+
+ QUALITIES = qualities(['low', 'medium', 'high', 'veryhigh'])
+ formats = []
+
+ for i, fmt in enumerate(live_data.get('streams') or []):
+ name = fmt.get('quality') or fmt.get('name') or compat_str(i)
+ hls_url = fmt.get('hls_url')
+ rtmp_url = fmt.get('rtmp_url')
+ audio_only = fmt.get('audio_only')
+ quality = QUALITIES(fmt.get('quality'))
+
+ if hls_url:
+ hls_fmts = self._extract_m3u8_formats(
+ hls_url, video_id, ext='mp4', m3u8_id='hls-%s' % name, quality=quality)
+ formats.extend(hls_fmts)
+ else:
+ hls_fmts = []
+
+ # RTMP url for audio_only is same as high format, so skip it
+ if rtmp_url and not audio_only:
+ formats.append({
+ 'url': rtmp_url,
+ 'format_id': 'rtmp-%s' % name,
+ 'ext': 'mp4',
+ 'protocol': 'rtmp_ffmpeg', # ffmpeg can, while rtmpdump can't
+ 'vcodec': 'h264',
+ 'acodec': 'aac',
+ 'quality': quality,
+ 'format_note': fmt.get('label'),
+ # note: HLS and RTMP have same resolution for now, so it's acceptable
+ 'width': try_get(hls_fmts, lambda x: x[0]['width'], int),
+ 'height': try_get(hls_fmts, lambda x: x[0]['height'], int),
+ })
+
+ # This contains the same formats as the above manifests and is used only as a fallback
+ formats.extend(self._extract_m3u8_formats(
+ hls_url, video_id, ext='mp4', m3u8_id='hls'))
+ self._remove_duplicate_formats(formats)
+
+ uploader_url = try_get(metadata, lambda x: x['live']['user']['user_path'], compat_str)
+ if uploader_url:
+ uploader_url = 'https://whowatch.tv/profile/%s' % uploader_url
+ uploader_id = compat_str(try_get(metadata, lambda x: x['live']['user']['id'], int))
+ uploader = try_get(metadata, lambda x: x['live']['user']['name'], compat_str)
+ thumbnail = try_get(metadata, lambda x: x['live']['latest_thumbnail_url'], compat_str)
+ timestamp = int_or_none(try_get(metadata, lambda x: x['live']['started_at'], int), scale=1000)
+ view_count = try_get(metadata, lambda x: x['live']['total_view_count'], int)
+ comment_count = try_get(metadata, lambda x: x['live']['comment_count'], int)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'uploader_id': uploader_id,
+ 'uploader_url': uploader_url,
+ 'uploader': uploader,
+ 'formats': formats,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'view_count': view_count,
+ 'comment_count': comment_count,
+ 'is_live': True,
+ }
diff --git a/yt_dlp/extractor/whyp.py b/yt_dlp/extractor/whyp.py
new file mode 100644
index 0000000..fef89c3
--- /dev/null
+++ b/yt_dlp/extractor/whyp.py
@@ -0,0 +1,50 @@
+from .common import InfoExtractor
+from ..utils import (
+ float_or_none,
+ str_or_none,
+ traverse_obj,
+ url_or_none,
+)
+
+
+class WhypIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?whyp\.it/tracks/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.whyp.it/tracks/18337/home-page-example-track-b4kq7',
+ 'md5': 'c1187b42ebf8605284e3dc92aeb33d16',
+ 'info_dict': {
+ 'url': 'https://cdn.whyp.it/50eb17cc-e9ff-4e18-b89b-dc9206a95cb1.mp3',
+ 'id': '18337',
+ 'title': 'Home Page Example Track',
+ 'description': 'md5:bd758000fb93f3159339c852b5b9133c',
+ 'ext': 'mp3',
+ 'duration': 52.82,
+ 'uploader': 'Brad',
+ 'uploader_id': '1',
+ 'thumbnail': 'https://cdn.whyp.it/a537bb36-3373-4c61-96c8-27fc1b2f427a.jpg',
+ },
+ }, {
+ 'url': 'https://www.whyp.it/tracks/18337',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ unique_id = self._match_id(url)
+ webpage = self._download_webpage(url, unique_id)
+ data = self._search_nuxt_data(webpage, unique_id)['rawTrack']
+
+ return {
+ 'url': data['audio_url'],
+ 'id': unique_id,
+ **traverse_obj(data, {
+ 'title': 'title',
+ 'description': 'description',
+ 'duration': ('duration', {float_or_none}),
+ 'uploader': ('user', 'username'),
+ 'uploader_id': ('user', 'id', {str_or_none}),
+ 'thumbnail': ('artwork_url', {url_or_none}),
+ }),
+ 'ext': 'mp3',
+ 'vcodec': 'none',
+ 'http_headers': {'Referer': 'https://whyp.it/'},
+ }
diff --git a/yt_dlp/extractor/wikimedia.py b/yt_dlp/extractor/wikimedia.py
new file mode 100644
index 0000000..11c801f
--- /dev/null
+++ b/yt_dlp/extractor/wikimedia.py
@@ -0,0 +1,55 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ get_element_by_class,
+ parse_qs,
+ remove_start,
+ unescapeHTML,
+ urljoin,
+)
+
+
+class WikimediaIE(InfoExtractor):
+ IE_NAME = 'wikimedia.org'
+ _VALID_URL = r'https?://commons\.wikimedia\.org/wiki/File:(?P<id>[^/#?]+)\.\w+'
+ _TESTS = [{
+ 'url': 'https://commons.wikimedia.org/wiki/File:Die_Temperaturkurve_der_Erde_(ZDF,_Terra_X)_720p_HD_50FPS.webm',
+ 'info_dict': {
+ 'url': 're:https?://upload.wikimedia.org/wikipedia',
+ 'ext': 'webm',
+ 'id': 'Die_Temperaturkurve_der_Erde_(ZDF,_Terra_X)_720p_HD_50FPS',
+ 'title': 'Die Temperaturkurve der Erde (ZDF, Terra X) 720p HD 50FPS.webm - Wikimedia Commons',
+ 'description': 'md5:7cd84f76e7081f1be033d0b155b4a460',
+ 'license': 'Creative Commons Attribution 4.0 International',
+ 'uploader': 'ZDF/Terra X/Gruppe 5/Luise Wagner, Jonas Sichert, Andreas Hougardy',
+ 'subtitles': 'count:4'
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ subtitles = {}
+ for sub in set(re.findall(r'\bsrc\s*=\s*["\'](/w/api[^"]+)["\']', webpage)):
+ sub = urljoin('https://commons.wikimedia.org', unescapeHTML(sub))
+ qs = parse_qs(sub)
+ lang = qs.get('lang', [None])[-1]
+ sub_ext = qs.get('trackformat', [None])[-1]
+ if lang and sub_ext:
+ subtitles.setdefault(lang, []).append({'ext': sub_ext, 'url': sub})
+
+ return {
+ 'id': video_id,
+ 'url': self._html_search_regex(r'<source\s[^>]*\bsrc="([^"]+)"', webpage, 'video URL'),
+ 'description': clean_html(get_element_by_class('description', webpage)),
+ 'title': remove_start(self._og_search_title(webpage), 'File:'),
+ 'license': self._html_search_regex(
+ r'licensed under(?: the)? (.+?) license',
+ get_element_by_class('licensetpl', webpage), 'license', default=None),
+ 'uploader': self._html_search_regex(
+ r'>\s*Author\s*</td>\s*<td\b[^>]*>\s*([^<]+)\s*</td>', webpage, 'video author', default=None),
+ 'subtitles': subtitles,
+ }
diff --git a/yt_dlp/extractor/wimbledon.py b/yt_dlp/extractor/wimbledon.py
new file mode 100644
index 0000000..0223e54
--- /dev/null
+++ b/yt_dlp/extractor/wimbledon.py
@@ -0,0 +1,61 @@
+from .common import InfoExtractor
+from ..utils import (
+ parse_duration,
+ traverse_obj,
+)
+
+
+class WimbledonIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?wimbledon\.com/\w+/video/media/(?P<id>\d+)\.html'
+ _TESTS = [{
+ 'url': 'https://www.wimbledon.com/en_GB/video/media/6330247525112.html',
+ 'info_dict': {
+ 'id': '6330247525112',
+ 'ext': 'mp4',
+ 'timestamp': 1687972186,
+ 'description': '',
+ 'thumbnail': r're:^https://[\w.-]+\.prod\.boltdns\.net/[^?#]+/image\.jpg',
+ 'upload_date': '20230628',
+ 'title': 'Coco Gauff | My Wimbledon Inspiration',
+ 'tags': ['features', 'trending', 'homepage'],
+ 'uploader_id': '3506358525001',
+ 'duration': 163072.0,
+ },
+ }, {
+ 'url': 'https://www.wimbledon.com/en_GB/video/media/6308703111112.html',
+ 'info_dict': {
+ 'id': '6308703111112',
+ 'ext': 'mp4',
+ 'thumbnail': r're:^https://[\w.-]+\.prod\.boltdns\.net/[^?#]+/image\.jpg',
+ 'description': 'null',
+ 'upload_date': '20220629',
+ 'uploader_id': '3506358525001',
+ 'title': 'Roblox | WimbleWorld ',
+ 'duration': 101440.0,
+ 'tags': ['features', 'kids'],
+ 'timestamp': 1656500867,
+ },
+ }, {
+ 'url': 'https://www.wimbledon.com/en_US/video/media/6309327106112.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.wimbledon.com/es_Es/video/media/6308377909112.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ metadata = self._download_json(
+ f'https://www.wimbledon.com/relatedcontent/rest/v2/wim_v1/en/content/wim_v1_{video_id}_en', video_id)
+
+ return {
+ '_type': 'url_transparent',
+ 'url': f'http://players.brightcove.net/3506358525001/default_default/index.html?videoId={video_id}',
+ 'ie_key': 'BrightcoveNew',
+ 'id': video_id,
+ **traverse_obj(metadata, {
+ 'title': 'title',
+ 'description': 'description',
+ 'duration': ('metadata', 'duration', {parse_duration}),
+ }),
+ }
diff --git a/yt_dlp/extractor/wimtv.py b/yt_dlp/extractor/wimtv.py
new file mode 100644
index 0000000..f9bf092
--- /dev/null
+++ b/yt_dlp/extractor/wimtv.py
@@ -0,0 +1,150 @@
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ parse_duration,
+ urlencode_postdata,
+ ExtractorError,
+)
+
+
+class WimTVIE(InfoExtractor):
+ _player = None
+ _UUID_RE = r'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}'
+ _VALID_URL = r'''(?x:
+ https?://platform\.wim\.tv/
+ (?:
+ (?:embed/)?\?
+ |\#/webtv/.+?/
+ )
+ (?P<type>vod|live|cast)[=/]
+ (?P<id>%s).*?)''' % _UUID_RE
+ _EMBED_REGEX = [rf'<iframe[^>]+src=["\'](?P<url>{_VALID_URL})']
+ _TESTS = [{
+ # vod stream
+ 'url': 'https://platform.wim.tv/embed/?vod=db29fb32-bade-47b6-a3a6-cb69fe80267a',
+ 'md5': 'db29fb32-bade-47b6-a3a6-cb69fe80267a',
+ 'info_dict': {
+ 'id': 'db29fb32-bade-47b6-a3a6-cb69fe80267a',
+ 'ext': 'mp4',
+ 'title': 'AMA SUPERCROSS 2020 - R2 ST. LOUIS',
+ 'duration': 6481,
+ 'thumbnail': r're:https?://.+?/thumbnail/.+?/720$'
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # live stream
+ 'url': 'https://platform.wim.tv/embed/?live=28e22c22-49db-40f3-8c37-8cbb0ff44556&autostart=true',
+ 'info_dict': {
+ 'id': '28e22c22-49db-40f3-8c37-8cbb0ff44556',
+ 'ext': 'mp4',
+ 'title': 'Streaming MSmotorTV',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://platform.wim.tv/#/webtv/automotornews/vod/422492b6-539e-474d-9c6b-68c9d5893365',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://platform.wim.tv/#/webtv/renzoarborechannel/cast/f47e0d15-5b45-455e-bf0d-dba8ffa96365',
+ 'only_matching': True,
+ }]
+
+ def _real_initialize(self):
+ if not self._player:
+ self._get_player_data()
+
+ def _get_player_data(self):
+ msg_id = 'Player data'
+ self._player = {}
+
+ datas = [{
+ 'url': 'https://platform.wim.tv/common/libs/player/wimtv/wim-rest.js',
+ 'vars': [{
+ 'regex': r'appAuth = "(.+?)"',
+ 'variable': 'app_auth',
+ }]
+ }, {
+ 'url': 'https://platform.wim.tv/common/config/endpointconfig.js',
+ 'vars': [{
+ 'regex': r'PRODUCTION_HOSTNAME_THUMB = "(.+?)"',
+ 'variable': 'thumb_server',
+ }, {
+ 'regex': r'PRODUCTION_HOSTNAME_THUMB\s*\+\s*"(.+?)"',
+ 'variable': 'thumb_server_path',
+ }]
+ }]
+
+ for data in datas:
+ temp = self._download_webpage(data['url'], msg_id)
+ for var in data['vars']:
+ val = self._search_regex(var['regex'], temp, msg_id)
+ if not val:
+ raise ExtractorError('%s not found' % var['variable'])
+ self._player[var['variable']] = val
+
+ def _generate_token(self):
+ json = self._download_json(
+ 'https://platform.wim.tv/wimtv-server/oauth/token', 'Token generation',
+ headers={'Authorization': 'Basic %s' % self._player['app_auth']},
+ data=urlencode_postdata({'grant_type': 'client_credentials'}))
+ token = json.get('access_token')
+ if not token:
+ raise ExtractorError('access token not generated')
+ return token
+
+ def _generate_thumbnail(self, thumb_id, width='720'):
+ if not thumb_id or not self._player.get('thumb_server'):
+ return None
+ if not self._player.get('thumb_server_path'):
+ self._player['thumb_server_path'] = ''
+ return '%s%s/asset/thumbnail/%s/%s' % (
+ self._player['thumb_server'],
+ self._player['thumb_server_path'],
+ thumb_id, width)
+
+ def _real_extract(self, url):
+ urlc = self._match_valid_url(url).groupdict()
+ video_id = urlc['id']
+ stream_type = is_live = None
+ if urlc['type'] in {'live', 'cast'}:
+ stream_type = urlc['type'] + '/channel'
+ is_live = True
+ else:
+ stream_type = 'vod'
+ is_live = False
+ token = self._generate_token()
+ json = self._download_json(
+ 'https://platform.wim.tv/wimtv-server/api/public/%s/%s/play' % (
+ stream_type, video_id), video_id,
+ headers={'Authorization': 'Bearer %s' % token,
+ 'Content-Type': 'application/json'},
+ data=bytes('{}', 'utf-8'))
+
+ formats = []
+ for src in json.get('srcs') or []:
+ if src.get('mimeType') == 'application/x-mpegurl':
+ formats.extend(
+ self._extract_m3u8_formats(
+ src.get('uniqueStreamer'), video_id, 'mp4'))
+ if src.get('mimeType') == 'video/flash':
+ formats.append({
+ 'format_id': 'rtmp',
+ 'url': src.get('uniqueStreamer'),
+ 'ext': determine_ext(src.get('uniqueStreamer'), 'flv'),
+ 'rtmp_live': is_live,
+ })
+ json = json.get('resource')
+ thumb = self._generate_thumbnail(json.get('thumbnailId'))
+
+ return {
+ 'id': video_id,
+ 'title': json.get('title') or json.get('name'),
+ 'duration': parse_duration(json.get('duration')),
+ 'formats': formats,
+ 'thumbnail': thumb,
+ 'is_live': is_live,
+ }
diff --git a/yt_dlp/extractor/wistia.py b/yt_dlp/extractor/wistia.py
new file mode 100644
index 0000000..bce5e83
--- /dev/null
+++ b/yt_dlp/extractor/wistia.py
@@ -0,0 +1,394 @@
+import re
+import urllib.parse
+from base64 import b64decode
+
+from .common import InfoExtractor
+from ..networking import HEADRequest
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ ExtractorError,
+ determine_ext,
+ float_or_none,
+ int_or_none,
+ parse_qs,
+ traverse_obj,
+ try_get,
+ update_url_query,
+ urlhandle_detect_ext,
+)
+
+
+class WistiaBaseIE(InfoExtractor):
+ _VALID_ID_REGEX = r'(?P<id>[a-z0-9]{10})'
+ _VALID_URL_BASE = r'https?://(?:\w+\.)?wistia\.(?:net|com)/(?:embed/)?'
+ _EMBED_BASE_URL = 'http://fast.wistia.net/embed/'
+
+ def _download_embed_config(self, config_type, config_id, referer):
+ base_url = self._EMBED_BASE_URL + '%s/%s' % (config_type, config_id)
+ embed_config = self._download_json(
+ base_url + '.json', config_id, headers={
+ 'Referer': referer if referer.startswith('http') else base_url, # Some videos require this.
+ })
+
+ error = traverse_obj(embed_config, 'error')
+ if error:
+ raise ExtractorError(
+ f'Error while getting the playlist: {error}', expected=True)
+
+ return embed_config
+
+ def _get_real_ext(self, url):
+ ext = determine_ext(url, default_ext='bin')
+ if ext == 'bin':
+ urlh = self._request_webpage(
+ HEADRequest(url), None, note='Checking media extension',
+ errnote='HEAD request returned error', fatal=False)
+ if urlh:
+ ext = urlhandle_detect_ext(urlh, default='bin')
+ return 'mp4' if ext == 'mov' else ext
+
+ def _extract_media(self, embed_config):
+ data = embed_config['media']
+ video_id = data['hashedId']
+ title = data['name']
+
+ formats = []
+ thumbnails = []
+ for a in data['assets']:
+ aurl = a.get('url')
+ if not aurl:
+ continue
+ astatus = a.get('status')
+ atype = a.get('type')
+ if (astatus is not None and astatus != 2) or atype in ('preview', 'storyboard'):
+ continue
+ elif atype in ('still', 'still_image'):
+ thumbnails.append({
+ 'url': aurl.replace('.bin', f'.{self._get_real_ext(aurl)}'),
+ 'width': int_or_none(a.get('width')),
+ 'height': int_or_none(a.get('height')),
+ 'filesize': int_or_none(a.get('size')),
+ })
+ else:
+ aext = a.get('ext') or self._get_real_ext(aurl)
+ display_name = a.get('display_name')
+ format_id = atype
+ if atype and atype.endswith('_video') and display_name:
+ format_id = '%s-%s' % (atype[:-6], display_name)
+ f = {
+ 'format_id': format_id,
+ 'url': aurl,
+ 'tbr': int_or_none(a.get('bitrate')) or None,
+ 'quality': 1 if atype == 'original' else None,
+ }
+ if display_name == 'Audio':
+ f.update({
+ 'vcodec': 'none',
+ })
+ else:
+ f.update({
+ 'width': int_or_none(a.get('width')),
+ 'height': int_or_none(a.get('height')),
+ 'vcodec': a.get('codec'),
+ })
+ if a.get('container') == 'm3u8' or aext == 'm3u8':
+ ts_f = f.copy()
+ ts_f.update({
+ 'ext': 'ts',
+ 'format_id': f['format_id'].replace('hls-', 'ts-'),
+ 'url': f['url'].replace('.bin', '.ts'),
+ })
+ formats.append(ts_f)
+ f.update({
+ 'ext': 'mp4',
+ 'protocol': 'm3u8_native',
+ })
+ else:
+ f.update({
+ 'container': a.get('container'),
+ 'ext': aext,
+ 'filesize': int_or_none(a.get('size')),
+ })
+ formats.append(f)
+
+ subtitles = {}
+ for caption in data.get('captions', []):
+ language = caption.get('language')
+ if not language:
+ continue
+ subtitles[language] = [{
+ 'url': self._EMBED_BASE_URL + 'captions/' + video_id + '.vtt?language=' + language,
+ }]
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': data.get('seoDescription'),
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'duration': float_or_none(data.get('duration')),
+ 'timestamp': int_or_none(data.get('createdAt')),
+ 'subtitles': subtitles,
+ }
+
+ @classmethod
+ def _extract_from_webpage(cls, url, webpage):
+ from .teachable import TeachableIE
+
+ if list(TeachableIE._extract_embed_urls(url, webpage)):
+ return
+
+ yield from super()._extract_from_webpage(url, webpage)
+
+ @classmethod
+ def _extract_wistia_async_embed(cls, webpage):
+ # https://wistia.com/support/embed-and-share/video-on-your-website
+ # https://wistia.com/support/embed-and-share/channel-embeds
+ yield from re.finditer(
+ r'''(?sx)
+ <(?:div|section)[^>]+class=([\"'])(?:(?!\1).)*?(?P<type>wistia[a-z_0-9]+)\s*\bwistia_async_(?P<id>[a-z0-9]{10})\b(?:(?!\1).)*?\1
+ ''', webpage)
+
+ @classmethod
+ def _extract_url_media_id(cls, url):
+ mobj = re.search(r'(?:wmediaid|wvideo(?:id)?)]?=(?P<id>[a-z0-9]{10})', urllib.parse.unquote_plus(url))
+ if mobj:
+ return mobj.group('id')
+
+
+class WistiaIE(WistiaBaseIE):
+ _VALID_URL = r'(?:wistia:|%s(?:iframe|medias)/)%s' % (WistiaBaseIE._VALID_URL_BASE, WistiaBaseIE._VALID_ID_REGEX)
+ _EMBED_REGEX = [
+ r'''(?x)
+ <(?:meta[^>]+?content|(?:iframe|script)[^>]+?src)=["\']
+ (?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/[a-z0-9]{10})
+ ''']
+ _TESTS = [{
+ # with hls video
+ 'url': 'wistia:807fafadvk',
+ 'md5': 'daff0f3687a41d9a71b40e0e8c2610fe',
+ 'info_dict': {
+ 'id': '807fafadvk',
+ 'ext': 'mp4',
+ 'title': 'Drip Brennan Dunn Workshop',
+ 'description': 'a JV Webinars video',
+ 'upload_date': '20160518',
+ 'timestamp': 1463607249,
+ 'duration': 4987.11,
+ },
+ 'skip': 'video unavailable',
+ }, {
+ 'url': 'wistia:a6ndpko1wg',
+ 'md5': '10c1ce9c4dde638202513ed17a3767bd',
+ 'info_dict': {
+ 'id': 'a6ndpko1wg',
+ 'ext': 'mp4',
+ 'title': 'Episode 2: Boxed Water\'s retention is thirsty',
+ 'upload_date': '20210324',
+ 'description': 'md5:da5994c2c2d254833b412469d9666b7a',
+ 'duration': 966.0,
+ 'timestamp': 1616614369,
+ 'thumbnail': 'https://embed-ssl.wistia.com/deliveries/53dc60239348dc9b9fba3755173ea4c2.png',
+ }
+ }, {
+ 'url': 'wistia:5vd7p4bct5',
+ 'md5': 'b9676d24bf30945d97060638fbfe77f0',
+ 'info_dict': {
+ 'id': '5vd7p4bct5',
+ 'ext': 'mp4',
+ 'title': 'md5:eaa9f64c4efd7b5f098b9b6118597679',
+ 'description': 'md5:a9bea0315f0616aa5df2dc413ddcdd0f',
+ 'upload_date': '20220915',
+ 'timestamp': 1663258727,
+ 'duration': 623.019,
+ 'thumbnail': r're:https?://embed(?:-ssl)?.wistia.com/.+\.jpg$',
+ },
+ }, {
+ 'url': 'wistia:sh7fpupwlt',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://fast.wistia.com/embed/iframe/sh7fpupwlt',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://fast.wistia.net/embed/medias/sh7fpupwlt.json',
+ 'only_matching': True,
+ }]
+
+ _WEBPAGE_TESTS = [{
+ 'url': 'https://www.weidert.com/blog/wistia-channels-video-marketing-tool',
+ 'info_dict': {
+ 'id': 'cqwukac3z1',
+ 'ext': 'mp4',
+ 'title': 'How Wistia Channels Can Help Capture Inbound Value From Your Video Content',
+ 'duration': 158.125,
+ 'timestamp': 1618974400,
+ 'description': 'md5:27abc99a758573560be72600ef95cece',
+ 'upload_date': '20210421',
+ 'thumbnail': 'https://embed-ssl.wistia.com/deliveries/6c551820ae950cdee2306d6cbe9ef742.jpg',
+ }
+ }, {
+ 'url': 'https://study.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
+ 'md5': 'b9676d24bf30945d97060638fbfe77f0',
+ 'info_dict': {
+ 'id': '5vd7p4bct5',
+ 'ext': 'mp4',
+ 'title': 'paywall_north-american-exploration-failed-colonies-of-spain-france-england',
+ 'upload_date': '20220915',
+ 'timestamp': 1663258727,
+ 'duration': 623.019,
+ 'thumbnail': 'https://embed-ssl.wistia.com/deliveries/83e6ec693e2c05a0ce65809cbaead86a.jpg',
+ 'description': 'a Paywall Videos video',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ embed_config = self._download_embed_config('medias', video_id, url)
+ return self._extract_media(embed_config)
+
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ urls = list(super()._extract_embed_urls(url, webpage))
+ for match in cls._extract_wistia_async_embed(webpage):
+ if match.group('type') != 'wistia_channel':
+ urls.append('wistia:%s' % match.group('id'))
+ for match in re.finditer(r'(?:data-wistia-?id=["\']|Wistia\.embed\(["\']|id=["\']wistia_)(?P<id>[a-z0-9]{10})',
+ webpage):
+ urls.append('wistia:%s' % match.group('id'))
+ if not WistiaChannelIE._extract_embed_urls(url, webpage): # Fallback
+ media_id = cls._extract_url_media_id(url)
+ if media_id:
+ urls.append('wistia:%s' % match.group('id'))
+ return urls
+
+
+class WistiaPlaylistIE(WistiaBaseIE):
+ _VALID_URL = r'%splaylists/%s' % (WistiaBaseIE._VALID_URL_BASE, WistiaBaseIE._VALID_ID_REGEX)
+
+ _TEST = {
+ 'url': 'https://fast.wistia.net/embed/playlists/aodt9etokc',
+ 'info_dict': {
+ 'id': 'aodt9etokc',
+ },
+ 'playlist_count': 3,
+ }
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ playlist = self._download_embed_config('playlists', playlist_id, url)
+
+ entries = []
+ for media in (try_get(playlist, lambda x: x[0]['medias']) or []):
+ embed_config = media.get('embed_config')
+ if not embed_config:
+ continue
+ entries.append(self._extract_media(embed_config))
+
+ return self.playlist_result(entries, playlist_id)
+
+
+class WistiaChannelIE(WistiaBaseIE):
+ _VALID_URL = r'(?:wistiachannel:|%schannel/)%s' % (WistiaBaseIE._VALID_URL_BASE, WistiaBaseIE._VALID_ID_REGEX)
+
+ _TESTS = [{
+ # JSON Embed API returns 403, should fall back to webpage
+ 'url': 'https://fast.wistia.net/embed/channel/yvyvu7wjbg?wchannelid=yvyvu7wjbg',
+ 'info_dict': {
+ 'id': 'yvyvu7wjbg',
+ 'title': 'Copysmith Tutorials and Education!',
+ 'description': 'Learn all things Copysmith via short and informative videos!'
+ },
+ 'playlist_mincount': 7,
+ 'expected_warnings': ['falling back to webpage'],
+ }, {
+ 'url': 'https://fast.wistia.net/embed/channel/3802iirk0l',
+ 'info_dict': {
+ 'id': '3802iirk0l',
+ 'title': 'The Roof',
+ },
+ 'playlist_mincount': 20,
+ }, {
+ # link to popup video, follow --no-playlist
+ 'url': 'https://fast.wistia.net/embed/channel/3802iirk0l?wchannelid=3802iirk0l&wmediaid=sp5dqjzw3n',
+ 'info_dict': {
+ 'id': 'sp5dqjzw3n',
+ 'ext': 'mp4',
+ 'title': 'The Roof S2: The Modern CRO',
+ 'thumbnail': 'https://embed-ssl.wistia.com/deliveries/dadfa9233eaa505d5e0c85c23ff70741.png',
+ 'duration': 86.487,
+ 'description': 'A sales leader on The Roof? Man, they really must be letting anyone up here this season.\n',
+ 'timestamp': 1619790290,
+ 'upload_date': '20210430',
+ },
+ 'params': {'noplaylist': True, 'skip_download': True},
+ }]
+ _WEBPAGE_TESTS = [{
+ 'url': 'https://www.profitwell.com/recur/boxed-out',
+ 'info_dict': {
+ 'id': '6jyvmqz6zs',
+ 'title': 'Boxed Out',
+ 'description': 'md5:14a8a93a1dbe236718e6a59f8c8c7bae',
+ },
+ 'playlist_mincount': 30,
+ }, {
+ # section instead of div
+ 'url': 'https://360learning.com/studio/onboarding-joei/',
+ 'info_dict': {
+ 'id': 'z874k93n2o',
+ 'title': 'Onboarding Joei.',
+ 'description': 'Coming to you weekly starting Feb 19th.',
+ },
+ 'playlist_mincount': 20,
+ }, {
+ 'url': 'https://amplitude.com/amplify-sessions?amp%5Bwmediaid%5D=pz0m0l0if3&amp%5Bwvideo%5D=pz0m0l0if3&wchannelid=emyjmwjf79&wmediaid=i8um783bdt',
+ 'info_dict': {
+ 'id': 'pz0m0l0if3',
+ 'title': 'A Framework for Improving Product Team Performance',
+ 'ext': 'mp4',
+ 'timestamp': 1653935275,
+ 'upload_date': '20220530',
+ 'description': 'Learn how to help your company improve and achieve your product related goals.',
+ 'duration': 1854.39,
+ 'thumbnail': 'https://embed-ssl.wistia.com/deliveries/12fd19e56413d9d6f04e2185c16a6f8854e25226.png',
+ },
+ 'params': {'noplaylist': True, 'skip_download': True},
+ }]
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+ media_id = self._extract_url_media_id(url)
+ if not self._yes_playlist(channel_id, media_id, playlist_label='channel'):
+ return self.url_result(f'wistia:{media_id}', 'Wistia')
+
+ try:
+ data = self._download_embed_config('channel', channel_id, url)
+ except (ExtractorError, HTTPError):
+ # Some channels give a 403 from the JSON API
+ self.report_warning('Failed to download channel data from API, falling back to webpage.')
+ webpage = self._download_webpage(f'https://fast.wistia.net/embed/channel/{channel_id}', channel_id)
+ data = self._parse_json(
+ self._search_regex(r'wchanneljsonp-%s\'\]\s*=[^\"]*\"([A-Za-z0-9=/]*)' % channel_id, webpage, 'jsonp', channel_id),
+ channel_id, transform_source=lambda x: urllib.parse.unquote_plus(b64decode(x).decode('utf-8')))
+
+ # XXX: can there be more than one series?
+ series = traverse_obj(data, ('series', 0), default={})
+
+ entries = [
+ self.url_result(f'wistia:{video["hashedId"]}', WistiaIE, title=video.get('name'))
+ for video in traverse_obj(series, ('sections', ..., 'videos', ...)) or []
+ if video.get('hashedId')
+ ]
+
+ return self.playlist_result(
+ entries, channel_id, playlist_title=series.get('title'), playlist_description=series.get('description'))
+
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ yield from super()._extract_embed_urls(url, webpage)
+ for match in cls._extract_wistia_async_embed(webpage):
+ if match.group('type') == 'wistia_channel':
+ # original url may contain wmediaid query param
+ yield update_url_query(f'wistiachannel:{match.group("id")}', parse_qs(url))
diff --git a/yt_dlp/extractor/wordpress.py b/yt_dlp/extractor/wordpress.py
new file mode 100644
index 0000000..378d99d
--- /dev/null
+++ b/yt_dlp/extractor/wordpress.py
@@ -0,0 +1,154 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ extract_attributes,
+ get_elements_by_class,
+ get_elements_text_and_html_by_attribute,
+ int_or_none,
+ parse_duration,
+ traverse_obj,
+)
+
+
+# https://codex.wordpress.org/Playlist_Shortcode
+class WordpressPlaylistEmbedIE(InfoExtractor):
+ _VALID_URL = False
+ IE_NAME = 'wordpress:playlist'
+ _WEBPAGE_TESTS = [{
+ # 5 WordPress playlists. This is using wpse-playlist, which is similar.
+ # See: https://github.com/birgire/wpse-playlist
+ 'url': 'https://xlino.com/wordpress-playlist-shortcode-with-external-audio-or-video-files/',
+ 'info_dict': {
+ 'id': 'wordpress-playlist-shortcode-with-external-audio-or-video-files',
+ 'title': 'WordPress: Playlist shortcode with external audio or video files – Birgir Erlendsson (birgire)',
+ 'age_limit': 0,
+ },
+ 'playlist_count': 5,
+ }, {
+ 'url': 'https://pianoadventures.com/products/piano-adventures-level-1-lesson-book-enhanced-cd/',
+ 'info_dict': {
+ 'id': 'piano-adventures-level-1-lesson-book-enhanced-cd-wp-playlist-1',
+ 'title': 'Wordpress Playlist',
+ 'thumbnail': 'https://pianoadventures.com/wp-content/uploads/sites/13/2022/01/CD1002cover.jpg',
+ 'age_limit': 0,
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': 'CD1002-21',
+ 'ext': 'mp3',
+ 'title': '21 Half-Time Show',
+ 'thumbnail': 'https://pianoadventures.com/wp-content/plugins/media-library-assistant/images/crystal/audio.png',
+ 'album': 'Piano Adventures Level 1 Lesson Book (2nd Edition)',
+ 'genre': 'Classical',
+ 'duration': 49.0,
+ 'artist': 'Nancy and Randall Faber',
+ 'description': 'md5:a9f8e9aeabbd2912bc13cc0fab1a4ce8',
+ }
+ }],
+ 'playlist_count': 6,
+ 'params': {'skip_download': True}
+ }]
+
+ def _extract_from_webpage(self, url, webpage):
+ # class should always be "wp-playlist-script"
+ # See: https://core.trac.wordpress.org/browser/trunk/src/wp-includes/media.php#L2930
+ for i, j in enumerate(get_elements_by_class('wp-playlist-script', webpage)):
+ playlist_json = self._parse_json(j, self._generic_id(url), fatal=False, ignore_extra=True, errnote='') or {}
+ if not playlist_json:
+ continue
+ entries = [{
+ 'id': self._generic_id(track['src']),
+ 'title': track.get('title'),
+ 'url': track.get('src'),
+ 'thumbnail': traverse_obj(track, ('thumb', 'src')),
+ 'album': traverse_obj(track, ('meta', 'album')),
+ 'artist': traverse_obj(track, ('meta', 'artist')),
+ 'genre': traverse_obj(track, ('meta', 'genre')),
+ 'duration': parse_duration(traverse_obj(track, ('meta', 'length_formatted'))),
+ 'description': track.get('description'),
+ 'height': int_or_none(traverse_obj(track, ('dimensions', 'original', 'height'))),
+ 'width': int_or_none(traverse_obj(track, ('dimensions', 'original', 'width'))),
+ } for track in traverse_obj(playlist_json, ('tracks', ...), expected_type=dict)]
+ yield self.playlist_result(entries, self._generic_id(url) + f'-wp-playlist-{i + 1}', 'Wordpress Playlist')
+
+
+class WordpressMiniAudioPlayerEmbedIE(InfoExtractor):
+ # WordPress MB Mini Player Plugin
+ # https://wordpress.org/plugins/wp-miniaudioplayer/
+ # Note: This is for the WordPress plugin version only.
+ _VALID_URL = False
+ IE_NAME = 'wordpress:mb.miniAudioPlayer'
+ _WEBPAGE_TESTS = [{
+ # Version 1.8.10: https://plugins.trac.wordpress.org/browser/wp-miniaudioplayer/tags/1.8.10
+ 'url': 'https://news.samsung.com/global/over-the-horizon-the-evolution-of-the-samsung-galaxy-brand-sound',
+ 'info_dict': {
+ 'id': 'over-the-horizon-the-evolution-of-the-samsung-galaxy-brand-sound',
+ 'title': 'Over the Horizon: The Evolution of the Samsung Galaxy Brand Sound',
+ 'age_limit': 0,
+ 'thumbnail': 'https://img.global.news.samsung.com/global/wp-content/uploads/2015/04/OTH_Main_Title-e1429612467870.jpg',
+ 'description': 'md5:bc3dd738d1f11d9232e94e6629983bf7',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': 'over_the_horizon_2013',
+ 'ext': 'mp3',
+ 'title': 'Over the Horizon 2013',
+ 'url': 'http://news.samsung.com/global/wp-content/uploads/ringtones/over_the_horizon_2013.mp3'
+ }
+ }],
+ 'playlist_count': 6,
+ 'params': {'skip_download': True}
+ }, {
+ # Version 1.9.3: https://plugins.trac.wordpress.org/browser/wp-miniaudioplayer/tags/1.9.3
+ 'url': 'https://www.booksontape.com/collections/audiobooks-with-teacher-guides/',
+ 'info_dict': {
+ 'id': 'audiobooks-with-teacher-guides',
+ 'title': 'Audiobooks with Teacher Guides | Books on Tape',
+ 'age_limit': 0,
+ 'thumbnail': 'https://www.booksontape.com/wp-content/uploads/2016/09/bot-logo-1200x630.jpg',
+ },
+ 'playlist_mincount': 12
+ }, {
+ # Version 1.9.7: https://plugins.trac.wordpress.org/browser/wp-miniaudioplayer/tags/1.9.7
+ # But has spaces around href filter
+ 'url': 'https://www.estudiords.com.br/temas/',
+ 'info_dict': {
+ 'id': 'temas',
+ 'title': 'Temas Variados',
+ 'age_limit': 0,
+ 'timestamp': float,
+ 'upload_date': str,
+ 'thumbnail': 'https://www.estudiords.com.br/wp-content/uploads/2021/03/LOGO-TEMAS.png',
+ 'description': 'md5:ab24d6a7ed0312ad2d466e721679f5a0',
+ },
+ 'playlist_mincount': 30
+ }]
+
+ def _extract_from_webpage(self, url, webpage):
+ # Common function for the WordPress plugin version only.
+ mb_player_params = self._search_regex(
+ r'function\s*initializeMiniAudioPlayer\(\){[^}]+jQuery([^;]+)\.mb_miniPlayer',
+ webpage, 'mb player params', default=None)
+ if not mb_player_params:
+ return
+ # v1.55 - 1.9.3 has "a[href*='.mp3'] ,a[href*='.m4a']"
+ # v1.9.4+ has "a[href*='.mp3']" only
+ file_exts = re.findall(r'a\[href\s*\*=\s*\'\.([a-zA-Z\d]+)\'', mb_player_params)
+ if not file_exts:
+ return
+
+ candidates = get_elements_text_and_html_by_attribute(
+ 'href', rf'(?:[^\"\']+\.(?:{"|".join(file_exts)}))', webpage, escape_value=False, tag='a')
+
+ for title, html in candidates:
+ attrs = extract_attributes(html)
+ # XXX: not tested - have not found any example of it being used
+ if any(c in (attrs.get('class') or '') for c in re.findall(r'\.not\("\.([^"]+)', mb_player_params)):
+ continue
+ href = attrs['href']
+ yield {
+ 'id': self._generic_id(href),
+ 'title': title or self._generic_title(href),
+ 'url': href,
+ }
diff --git a/yt_dlp/extractor/worldstarhiphop.py b/yt_dlp/extractor/worldstarhiphop.py
new file mode 100644
index 0000000..c6948a1
--- /dev/null
+++ b/yt_dlp/extractor/worldstarhiphop.py
@@ -0,0 +1,38 @@
+from .common import InfoExtractor
+
+
+class WorldStarHipHopIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/(?:videos|android)/video\.php\?.*?\bv=(?P<id>[^&]+)'
+ _TESTS = [{
+ 'url': 'http://www.worldstarhiphop.com/videos/video.php?v=wshh6a7q1ny0G34ZwuIO',
+ 'md5': '9d04de741161603bf7071bbf4e883186',
+ 'info_dict': {
+ 'id': 'wshh6a7q1ny0G34ZwuIO',
+ 'ext': 'mp4',
+ 'title': 'KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick!'
+ }
+ }, {
+ 'url': 'http://m.worldstarhiphop.com/android/video.php?v=wshh6a7q1ny0G34ZwuIO',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ entries = self._parse_html5_media_entries(url, webpage, video_id)
+
+ if not entries:
+ return self.url_result(url, 'Generic')
+
+ title = self._html_search_regex(
+ [r'(?s)<div class="content-heading">\s*<h1>(.*?)</h1>',
+ r'<span[^>]+class="tc-sp-pinned-title">(.*)</span>'],
+ webpage, 'title')
+
+ info = entries[0]
+ info.update({
+ 'id': video_id,
+ 'title': title,
+ })
+ return info
diff --git a/yt_dlp/extractor/wppilot.py b/yt_dlp/extractor/wppilot.py
new file mode 100644
index 0000000..5e590e2
--- /dev/null
+++ b/yt_dlp/extractor/wppilot.py
@@ -0,0 +1,173 @@
+from .common import InfoExtractor
+from ..utils import (
+ try_get,
+ ExtractorError,
+)
+
+import json
+import random
+import re
+
+
+class WPPilotBaseIE(InfoExtractor):
+ _VIDEO_URL = 'https://pilot.wp.pl/api/v1/channel/%s'
+ _VIDEO_GUEST_URL = 'https://pilot.wp.pl/api/v1/guest/channel/%s'
+
+ _HEADERS_WEB = {
+ 'Content-Type': 'application/json; charset=UTF-8',
+ 'Referer': 'https://pilot.wp.pl/tv/',
+ }
+
+ def _get_channel_list(self, cache=True):
+ if cache is True:
+ cache_res = self.cache.load('wppilot', 'channel-list')
+ if cache_res:
+ return cache_res, True
+ webpage = self._download_webpage('https://pilot.wp.pl/tv/', None, 'Downloading webpage')
+ page_data_base_url = self._search_regex(
+ r'<script src="(https://wp-pilot-gatsby\.wpcdn\.pl/v[\d.-]+/desktop)',
+ webpage, 'gatsby build version') + '/page-data'
+ page_data = self._download_json(f'{page_data_base_url}/tv/page-data.json', None, 'Downloading page data')
+ for qhash in page_data['staticQueryHashes']:
+ qhash_content = self._download_json(
+ f'{page_data_base_url}/sq/d/{qhash}.json', None,
+ 'Searching for channel list')
+ channel_list = try_get(qhash_content, lambda x: x['data']['allChannels']['nodes'])
+ if channel_list is None:
+ continue
+ self.cache.store('wppilot', 'channel-list', channel_list)
+ return channel_list, False
+ raise ExtractorError('Unable to find the channel list')
+
+ def _parse_channel(self, chan):
+ return {
+ 'id': str(chan['id']),
+ 'title': chan['name'],
+ 'is_live': True,
+ 'thumbnails': [{
+ 'id': key,
+ 'url': chan[key],
+ } for key in ('thumbnail', 'thumbnail_mobile', 'icon') if chan.get(key)],
+ }
+
+
+class WPPilotIE(WPPilotBaseIE):
+ _VALID_URL = r'(?:https?://pilot\.wp\.pl/tv/?#|wppilot:)(?P<id>[a-z\d-]+)'
+ IE_NAME = 'wppilot'
+
+ _TESTS = [{
+ 'url': 'https://pilot.wp.pl/tv/#telewizja-wp-hd',
+ 'info_dict': {
+ 'id': '158',
+ 'ext': 'mp4',
+ 'title': 'Telewizja WP HD',
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ },
+ }, {
+ # audio only
+ 'url': 'https://pilot.wp.pl/tv/#radio-nowy-swiat',
+ 'info_dict': {
+ 'id': '238',
+ 'ext': 'm4a',
+ 'title': 'Radio Nowy Świat',
+ },
+ 'params': {
+ 'format': 'bestaudio',
+ },
+ }, {
+ 'url': 'wppilot:9',
+ 'only_matching': True,
+ }]
+
+ def _get_channel(self, id_or_slug):
+ video_list, is_cached = self._get_channel_list(cache=True)
+ key = 'id' if re.match(r'^\d+$', id_or_slug) else 'slug'
+ for video in video_list:
+ if video.get(key) == id_or_slug:
+ return self._parse_channel(video)
+ # if cached channel not found, download and retry
+ if is_cached:
+ video_list, _ = self._get_channel_list(cache=False)
+ for video in video_list:
+ if video.get(key) == id_or_slug:
+ return self._parse_channel(video)
+ raise ExtractorError('Channel not found')
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ channel = self._get_channel(video_id)
+ video_id = str(channel['id'])
+
+ is_authorized = next((c for c in self.cookiejar if c.name == 'netviapisessid'), None)
+ # cookies starting with "g:" are assigned to guests
+ is_authorized = True if is_authorized is not None and not is_authorized.value.startswith('g:') else False
+
+ video = self._download_json(
+ (self._VIDEO_URL if is_authorized else self._VIDEO_GUEST_URL) % video_id,
+ video_id, query={
+ 'device_type': 'web',
+ }, headers=self._HEADERS_WEB,
+ expected_status=(200, 422))
+
+ stream_token = try_get(video, lambda x: x['_meta']['error']['info']['stream_token'])
+ if stream_token:
+ close = self._download_json(
+ 'https://pilot.wp.pl/api/v1/channels/close', video_id,
+ 'Invalidating previous stream session', headers=self._HEADERS_WEB,
+ data=json.dumps({
+ 'channelId': video_id,
+ 't': stream_token,
+ }).encode('utf-8'))
+ if try_get(close, lambda x: x['data']['status']) == 'ok':
+ return self.url_result(url, ie=WPPilotIE.ie_key())
+
+ formats = []
+
+ for fmt in video['data']['stream_channel']['streams']:
+ # live DASH does not work for now
+ # if fmt['type'] == 'dash@live:abr':
+ # formats.extend(
+ # self._extract_mpd_formats(
+ # random.choice(fmt['url']), video_id))
+ if fmt['type'] == 'hls@live:abr':
+ formats.extend(
+ self._extract_m3u8_formats(
+ random.choice(fmt['url']),
+ video_id, live=True))
+
+ channel['formats'] = formats
+ return channel
+
+
+class WPPilotChannelsIE(WPPilotBaseIE):
+ _VALID_URL = r'(?:https?://pilot\.wp\.pl/(?:tv/?)?(?:\?[^#]*)?#?|wppilot:)$'
+ IE_NAME = 'wppilot:channels'
+
+ _TESTS = [{
+ 'url': 'wppilot:',
+ 'info_dict': {
+ 'id': 'wppilot',
+ 'title': 'WP Pilot',
+ },
+ 'playlist_mincount': 100,
+ }, {
+ 'url': 'https://pilot.wp.pl/',
+ 'only_matching': True,
+ }]
+
+ def _entries(self):
+ channel_list, _ = self._get_channel_list()
+ for chan in channel_list:
+ entry = self._parse_channel(chan)
+ entry.update({
+ '_type': 'url_transparent',
+ 'url': f'wppilot:{chan["id"]}',
+ 'ie_key': WPPilotIE.ie_key(),
+ })
+ yield entry
+
+ def _real_extract(self, url):
+ return self.playlist_result(self._entries(), 'wppilot', 'WP Pilot')
diff --git a/yt_dlp/extractor/wrestleuniverse.py b/yt_dlp/extractor/wrestleuniverse.py
new file mode 100644
index 0000000..145246a
--- /dev/null
+++ b/yt_dlp/extractor/wrestleuniverse.py
@@ -0,0 +1,304 @@
+import base64
+import binascii
+import json
+import time
+import uuid
+
+from .common import InfoExtractor
+from ..dependencies import Cryptodome
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ jwt_decode_hs256,
+ traverse_obj,
+ try_call,
+ url_or_none,
+ urlencode_postdata,
+ variadic,
+)
+
+
+class WrestleUniverseBaseIE(InfoExtractor):
+ _NETRC_MACHINE = 'wrestleuniverse'
+ _VALID_URL_TMPL = r'https?://(?:www\.)?wrestle-universe\.com/(?:(?P<lang>\w{2})/)?%s/(?P<id>\w+)'
+ _API_HOST = 'api.wrestle-universe.com'
+ _API_PATH = None
+ _REAL_TOKEN = None
+ _TOKEN_EXPIRY = None
+ _REFRESH_TOKEN = None
+ _DEVICE_ID = None
+ _LOGIN_QUERY = {'key': 'AIzaSyCaRPBsDQYVDUWWBXjsTrHESi2r_F3RAdA'}
+ _LOGIN_HEADERS = {
+ 'Accept': '*/*',
+ 'Content-Type': 'application/json',
+ 'X-Client-Version': 'Chrome/JsCore/9.9.4/FirebaseCore-web',
+ 'X-Firebase-gmpid': '1:307308870738:web:820f38fe5150c8976e338b',
+ 'Referer': 'https://www.wrestle-universe.com/',
+ 'Origin': 'https://www.wrestle-universe.com',
+ }
+
+ @property
+ def _TOKEN(self):
+ if not self._REAL_TOKEN or not self._TOKEN_EXPIRY:
+ token = try_call(lambda: self._get_cookies('https://www.wrestle-universe.com/')['token'].value)
+ if not token and not self._REFRESH_TOKEN:
+ self.raise_login_required()
+ self._TOKEN = token
+
+ if not self._REAL_TOKEN or self._TOKEN_EXPIRY <= int(time.time()):
+ if not self._REFRESH_TOKEN:
+ raise ExtractorError(
+ 'Expired token. Refresh your cookies in browser and try again', expected=True)
+ self._refresh_token()
+
+ return self._REAL_TOKEN
+
+ @_TOKEN.setter
+ def _TOKEN(self, value):
+ self._REAL_TOKEN = value
+
+ expiry = traverse_obj(value, ({jwt_decode_hs256}, 'exp', {int_or_none}))
+ if not expiry:
+ raise ExtractorError('There was a problem with the auth token')
+ self._TOKEN_EXPIRY = expiry
+
+ def _perform_login(self, username, password):
+ login = self._download_json(
+ 'https://identitytoolkit.googleapis.com/v1/accounts:signInWithPassword', None,
+ 'Logging in', query=self._LOGIN_QUERY, headers=self._LOGIN_HEADERS, data=json.dumps({
+ 'returnSecureToken': True,
+ 'email': username,
+ 'password': password,
+ }, separators=(',', ':')).encode(), expected_status=400)
+ token = traverse_obj(login, ('idToken', {str}))
+ if not token:
+ raise ExtractorError(
+ f'Unable to log in: {traverse_obj(login, ("error", "message"))}', expected=True)
+ self._REFRESH_TOKEN = traverse_obj(login, ('refreshToken', {str}))
+ if not self._REFRESH_TOKEN:
+ self.report_warning('No refresh token was granted')
+ self._TOKEN = token
+
+ def _real_initialize(self):
+ if self._DEVICE_ID:
+ return
+
+ self._DEVICE_ID = self._configuration_arg('device_id', [None], ie_key=self._NETRC_MACHINE)[0]
+ if not self._DEVICE_ID:
+ self._DEVICE_ID = self.cache.load(self._NETRC_MACHINE, 'device_id')
+ if self._DEVICE_ID:
+ return
+ self._DEVICE_ID = str(uuid.uuid4())
+
+ self.cache.store(self._NETRC_MACHINE, 'device_id', self._DEVICE_ID)
+
+ def _refresh_token(self):
+ refresh = self._download_json(
+ 'https://securetoken.googleapis.com/v1/token', None, 'Refreshing token',
+ query=self._LOGIN_QUERY, data=urlencode_postdata({
+ 'grant_type': 'refresh_token',
+ 'refresh_token': self._REFRESH_TOKEN,
+ }), headers={
+ **self._LOGIN_HEADERS,
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ })
+ if traverse_obj(refresh, ('refresh_token', {str})):
+ self._REFRESH_TOKEN = refresh['refresh_token']
+ token = traverse_obj(refresh, 'access_token', 'id_token', expected_type=str)
+ if not token:
+ raise ExtractorError('No auth token returned from refresh request')
+ self._TOKEN = token
+
+ def _call_api(self, video_id, param='', msg='API', auth=True, data=None, query={}, fatal=True):
+ headers = {'CA-CID': ''}
+ if data:
+ headers['Content-Type'] = 'application/json;charset=utf-8'
+ data = json.dumps(data, separators=(',', ':')).encode()
+ if auth and self._TOKEN:
+ headers['Authorization'] = f'Bearer {self._TOKEN}'
+ return self._download_json(
+ f'https://{self._API_HOST}/v1/{self._API_PATH}/{video_id}{param}', video_id,
+ note=f'Downloading {msg} JSON', errnote=f'Failed to download {msg} JSON',
+ data=data, headers=headers, query=query, fatal=fatal)
+
+ def _call_encrypted_api(self, video_id, param='', msg='API', data={}, query={}, fatal=True):
+ if not Cryptodome.RSA:
+ raise ExtractorError('pycryptodomex not found. Please install', expected=True)
+ private_key = Cryptodome.RSA.generate(2048)
+ cipher = Cryptodome.PKCS1_OAEP.new(private_key, hashAlgo=Cryptodome.SHA1)
+
+ def decrypt(data):
+ if not data:
+ return None
+ try:
+ return cipher.decrypt(base64.b64decode(data)).decode()
+ except (ValueError, binascii.Error) as e:
+ raise ExtractorError(f'Could not decrypt data: {e}')
+
+ token = base64.b64encode(private_key.public_key().export_key('DER')).decode()
+ api_json = self._call_api(video_id, param, msg, data={
+ 'deviceId': self._DEVICE_ID,
+ 'token': token,
+ **data,
+ }, query=query, fatal=fatal)
+ return api_json, decrypt
+
+ def _download_metadata(self, url, video_id, lang, props_keys):
+ metadata = self._call_api(video_id, msg='metadata', query={'al': lang or 'ja'}, auth=False, fatal=False)
+ if not metadata:
+ webpage = self._download_webpage(url, video_id)
+ nextjs_data = self._search_nextjs_data(webpage, video_id)
+ metadata = traverse_obj(nextjs_data, (
+ 'props', 'pageProps', *variadic(props_keys, (str, bytes, dict, set)), {dict})) or {}
+ return metadata
+
+ def _get_formats(self, data, path, video_id=None):
+ hls_url = traverse_obj(data, path, get_all=False)
+ if not hls_url and not data.get('canWatch'):
+ self.raise_no_formats(
+ 'This account does not have access to the requested content', expected=True)
+ elif not hls_url:
+ self.raise_no_formats('No supported formats found')
+ return self._extract_m3u8_formats(hls_url, video_id, 'mp4', m3u8_id='hls', live=True)
+
+
+class WrestleUniverseVODIE(WrestleUniverseBaseIE):
+ _VALID_URL = WrestleUniverseBaseIE._VALID_URL_TMPL % 'videos'
+ _TESTS = [{
+ 'url': 'https://www.wrestle-universe.com/en/videos/dp8mpjmcKfxzUhEHM2uFws',
+ 'info_dict': {
+ 'id': 'dp8mpjmcKfxzUhEHM2uFws',
+ 'ext': 'mp4',
+ 'title': 'The 3rd “Futari wa Princess” Max Heart Tournament',
+ 'description': 'md5:318d5061e944797fbbb81d5c7dd00bf5',
+ 'location': '埼玉・春日部ふれあいキューブ',
+ 'channel': 'tjpw',
+ 'duration': 7119,
+ 'timestamp': 1674979200,
+ 'upload_date': '20230129',
+ 'thumbnail': 'https://image.asset.wrestle-universe.com/8FjD67P8rZc446RBQs5RBN/8FjD67P8rZc446RBQs5RBN',
+ 'chapters': 'count:7',
+ 'cast': 'count:21',
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ }]
+
+ _API_PATH = 'videoEpisodes'
+
+ def _real_extract(self, url):
+ lang, video_id = self._match_valid_url(url).group('lang', 'id')
+ metadata = self._download_metadata(url, video_id, lang, 'videoEpisodeFallbackData')
+ video_data = self._call_api(video_id, ':watch', 'watch', data={'deviceId': self._DEVICE_ID})
+
+ return {
+ 'id': video_id,
+ 'formats': self._get_formats(video_data, (
+ (('protocolHls', 'url'), ('chromecastUrls', ...)), {url_or_none}), video_id),
+ **traverse_obj(metadata, {
+ 'title': ('displayName', {str}),
+ 'description': ('description', {str}),
+ 'channel': ('labels', 'group', {str}),
+ 'location': ('labels', 'venue', {str}),
+ 'timestamp': ('watchStartTime', {int_or_none}),
+ 'thumbnail': ('keyVisualUrl', {url_or_none}),
+ 'cast': ('casts', ..., 'displayName', {str}),
+ 'duration': ('duration', {int}),
+ 'chapters': ('videoChapters', lambda _, v: isinstance(v.get('start'), int), {
+ 'title': ('displayName', {str}),
+ 'start_time': ('start', {int}),
+ 'end_time': ('end', {int}),
+ }),
+ }),
+ }
+
+
+class WrestleUniversePPVIE(WrestleUniverseBaseIE):
+ _VALID_URL = WrestleUniverseBaseIE._VALID_URL_TMPL % 'lives'
+ _TESTS = [{
+ 'note': 'HLS AES-128 key obtained via API',
+ 'url': 'https://www.wrestle-universe.com/en/lives/buH9ibbfhdJAY4GKZcEuJX',
+ 'info_dict': {
+ 'id': 'buH9ibbfhdJAY4GKZcEuJX',
+ 'ext': 'mp4',
+ 'title': '【PPV】Beyond the origins, into the future',
+ 'description': 'md5:9a872db68cd09be4a1e35a3ee8b0bdfc',
+ 'channel': 'tjpw',
+ 'location': '東京・Twin Box AKIHABARA',
+ 'duration': 10098,
+ 'timestamp': 1675076400,
+ 'upload_date': '20230130',
+ 'thumbnail': 'https://image.asset.wrestle-universe.com/rJs2m7cBaLXrwCcxMdQGRM/rJs2m7cBaLXrwCcxMdQGRM',
+ 'thumbnails': 'count:3',
+ 'hls_aes': {
+ 'key': '5633184acd6e43f1f1ac71c6447a4186',
+ 'iv': '5bac71beb33197d5600337ce86de7862',
+ },
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ 'skip': 'No longer available',
+ }, {
+ 'note': 'unencrypted HLS',
+ 'url': 'https://www.wrestle-universe.com/en/lives/wUG8hP5iApC63jbtQzhVVx',
+ 'info_dict': {
+ 'id': 'wUG8hP5iApC63jbtQzhVVx',
+ 'ext': 'mp4',
+ 'title': 'GRAND PRINCESS \'22',
+ 'description': 'md5:e4f43d0d4262de3952ff34831bc99858',
+ 'channel': 'tjpw',
+ 'location': '東京・両国国技館',
+ 'duration': 18044,
+ 'timestamp': 1647665400,
+ 'upload_date': '20220319',
+ 'thumbnail': 'https://image.asset.wrestle-universe.com/i8jxSTCHPfdAKD4zN41Psx/i8jxSTCHPfdAKD4zN41Psx',
+ 'thumbnails': 'count:3',
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ }]
+
+ _API_PATH = 'events'
+
+ def _real_extract(self, url):
+ lang, video_id = self._match_valid_url(url).group('lang', 'id')
+ metadata = self._download_metadata(url, video_id, lang, 'eventFallbackData')
+
+ info = {
+ 'id': video_id,
+ **traverse_obj(metadata, {
+ 'title': ('displayName', {str}),
+ 'description': ('description', {str}),
+ 'channel': ('labels', 'group', {str}),
+ 'location': ('labels', 'venue', {str}),
+ 'timestamp': ('startTime', {int_or_none}),
+ 'thumbnails': (('keyVisualUrl', 'alterKeyVisualUrl', 'heroKeyVisualUrl'), {'url': {url_or_none}}),
+ }),
+ }
+
+ ended_time = traverse_obj(metadata, ('endedTime', {int_or_none}))
+ if info.get('timestamp') and ended_time:
+ info['duration'] = ended_time - info['timestamp']
+
+ video_data, decrypt = self._call_encrypted_api(
+ video_id, ':watchArchive', 'watch archive', data={'method': 1})
+ info['formats'] = self._get_formats(video_data, (
+ ('hls', None), ('urls', 'chromecastUrls'), ..., {url_or_none}), video_id)
+ for f in info['formats']:
+ # bitrates are exaggerated in PPV playlists, so avoid wrong/huge filesize_approx values
+ if f.get('tbr'):
+ f['tbr'] = int(f['tbr'] / 2.5)
+
+ hls_aes_key = traverse_obj(video_data, ('hls', 'key', {decrypt}))
+ if hls_aes_key:
+ info['hls_aes'] = {
+ 'key': hls_aes_key,
+ 'iv': traverse_obj(video_data, ('hls', 'iv', {decrypt})),
+ }
+ elif traverse_obj(video_data, ('hls', 'encryptType', {int})):
+ self.report_warning('HLS AES-128 key was not found in API response')
+
+ return info
diff --git a/yt_dlp/extractor/wsj.py b/yt_dlp/extractor/wsj.py
new file mode 100644
index 0000000..86e2646
--- /dev/null
+++ b/yt_dlp/extractor/wsj.py
@@ -0,0 +1,120 @@
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ float_or_none,
+ unified_strdate,
+)
+
+
+class WSJIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ (?:
+ https?://video-api\.wsj\.com/api-video/player/iframe\.html\?.*?\bguid=|
+ https?://(?:www\.)?(?:wsj|barrons)\.com/video/(?:[^/]+/)+|
+ wsj:
+ )
+ (?P<id>[a-fA-F0-9-]{36})
+ '''
+ IE_DESC = 'Wall Street Journal'
+ _TESTS = [{
+ 'url': 'http://video-api.wsj.com/api-video/player/iframe.html?guid=1BD01A4C-BFE8-40A5-A42F-8A8AF9898B1A',
+ 'md5': 'e230a5bb249075e40793b655a54a02e4',
+ 'info_dict': {
+ 'id': '1BD01A4C-BFE8-40A5-A42F-8A8AF9898B1A',
+ 'ext': 'mp4',
+ 'upload_date': '20150202',
+ 'uploader_id': 'jdesai',
+ 'creator': 'jdesai',
+ 'categories': list, # a long list
+ 'duration': 90,
+ 'title': 'Bills Coach Rex Ryan Updates His Old Jets Tattoo',
+ },
+ }, {
+ 'url': 'http://www.wsj.com/video/can-alphabet-build-a-smarter-city/359DDAA8-9AC1-489C-82E6-0429C1E430E0.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.barrons.com/video/capitalism-deserves-more-respect-from-millennials/F301217E-6F46-43AE-B8D2-B7180D642EE9.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.wsj.com/video/series/a-brief-history-of/the-modern-cell-carrier-how-we-got-here/980E2187-401D-48A1-B82B-1486CEE06CB9',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ info = self._download_json(
+ 'http://video-api.wsj.com/api-video/find_all_videos.asp', video_id,
+ query={
+ 'type': 'guid',
+ 'count': 1,
+ 'query': video_id,
+ 'fields': ','.join((
+ 'type', 'hls', 'videoMP4List', 'thumbnailList', 'author',
+ 'description', 'name', 'duration', 'videoURL', 'titletag',
+ 'formattedCreationDate', 'keywords', 'editor')),
+ })['items'][0]
+ title = info.get('name', info.get('titletag'))
+
+ formats = []
+
+ f4m_url = info.get('videoURL')
+ if f4m_url:
+ formats.extend(self._extract_f4m_formats(
+ f4m_url, video_id, f4m_id='hds', fatal=False))
+
+ m3u8_url = info.get('hls')
+ if m3u8_url:
+ formats.extend(self._extract_m3u8_formats(
+ info['hls'], video_id, ext='mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
+
+ for v in info.get('videoMP4List', []):
+ mp4_url = v.get('url')
+ if not mp4_url:
+ continue
+ tbr = int_or_none(v.get('bitrate'))
+ formats.append({
+ 'url': mp4_url,
+ 'format_id': 'http' + ('-%d' % tbr if tbr else ''),
+ 'tbr': tbr,
+ 'width': int_or_none(v.get('width')),
+ 'height': int_or_none(v.get('height')),
+ 'fps': float_or_none(v.get('fps')),
+ })
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ # Thumbnails are conveniently in the correct format already
+ 'thumbnails': info.get('thumbnailList'),
+ 'creator': info.get('author'),
+ 'uploader_id': info.get('editor'),
+ 'duration': int_or_none(info.get('duration')),
+ 'upload_date': unified_strdate(info.get(
+ 'formattedCreationDate'), day_first=False),
+ 'title': title,
+ 'categories': info.get('keywords'),
+ }
+
+
+class WSJArticleIE(InfoExtractor):
+ _VALID_URL = r'(?i)https?://(?:www\.)?wsj\.com/articles/(?P<id>[^/?#&]+)'
+ _TEST = {
+ 'url': 'https://www.wsj.com/articles/dont-like-china-no-pandas-for-you-1490366939?',
+ 'info_dict': {
+ 'id': '4B13FA62-1D8C-45DB-8EA1-4105CB20B362',
+ 'ext': 'mp4',
+ 'upload_date': '20170221',
+ 'uploader_id': 'ralcaraz',
+ 'title': 'Bao Bao the Panda Leaves for China',
+ }
+ }
+
+ def _real_extract(self, url):
+ article_id = self._match_id(url)
+ webpage = self._download_webpage(url, article_id)
+ video_id = self._search_regex(
+ r'(?:id=["\']video|video-|iframe\.html\?guid=|data-src=["\'])([a-fA-F0-9-]{36})',
+ webpage, 'video id')
+ return self.url_result('wsj:%s' % video_id, WSJIE.ie_key(), video_id)
diff --git a/yt_dlp/extractor/wwe.py b/yt_dlp/extractor/wwe.py
new file mode 100644
index 0000000..9bbd477
--- /dev/null
+++ b/yt_dlp/extractor/wwe.py
@@ -0,0 +1,138 @@
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ try_get,
+ unescapeHTML,
+ url_or_none,
+ urljoin,
+)
+
+
+class WWEBaseIE(InfoExtractor):
+ _SUBTITLE_LANGS = {
+ 'English': 'en',
+ 'Deutsch': 'de',
+ }
+
+ def _extract_entry(self, data, url, video_id=None):
+ video_id = compat_str(video_id or data['nid'])
+ title = data['title']
+
+ formats = self._extract_m3u8_formats(
+ data['file'], video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls')
+
+ description = data.get('description')
+ thumbnail = urljoin(url, data.get('image'))
+ series = data.get('show_name')
+ episode = data.get('episode_name')
+
+ subtitles = {}
+ tracks = data.get('tracks')
+ if isinstance(tracks, list):
+ for track in tracks:
+ if not isinstance(track, dict):
+ continue
+ if track.get('kind') != 'captions':
+ continue
+ track_file = url_or_none(track.get('file'))
+ if not track_file:
+ continue
+ label = track.get('label')
+ lang = self._SUBTITLE_LANGS.get(label, label) or 'en'
+ subtitles.setdefault(lang, []).append({
+ 'url': track_file,
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'series': series,
+ 'episode': episode,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+
+class WWEIE(WWEBaseIE):
+ _VALID_URL = r'https?://(?:[^/]+\.)?wwe\.com/(?:[^/]+/)*videos/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.wwe.com/videos/daniel-bryan-vs-andrade-cien-almas-smackdown-live-sept-4-2018',
+ 'md5': '92811c6a14bfc206f7a6a9c5d9140184',
+ 'info_dict': {
+ 'id': '40048199',
+ 'ext': 'mp4',
+ 'title': 'Daniel Bryan vs. Andrade "Cien" Almas: SmackDown LIVE, Sept. 4, 2018',
+ 'description': 'md5:2d7424dbc6755c61a0e649d2a8677f67',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ }
+ }, {
+ 'url': 'https://de.wwe.com/videos/gran-metalik-vs-tony-nese-wwe-205-live-sept-4-2018',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ landing = self._parse_json(
+ self._html_search_regex(
+ r'(?s)Drupal\.settings\s*,\s*({.+?})\s*\)\s*;',
+ webpage, 'drupal settings'),
+ display_id)['WWEVideoLanding']
+
+ data = landing['initialVideo']['playlist'][0]
+ video_id = landing.get('initialVideoId')
+
+ info = self._extract_entry(data, url, video_id)
+ info['display_id'] = display_id
+ return info
+
+
+class WWEPlaylistIE(WWEBaseIE):
+ _VALID_URL = r'https?://(?:[^/]+\.)?wwe\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.wwe.com/shows/raw/2018-11-12',
+ 'info_dict': {
+ 'id': '2018-11-12',
+ },
+ 'playlist_mincount': 11,
+ }, {
+ 'url': 'http://www.wwe.com/article/walk-the-prank-wwe-edition',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.wwe.com/shows/wwenxt/article/matt-riddle-interview',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if WWEIE.suitable(url) else super(WWEPlaylistIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ entries = []
+ for mobj in re.finditer(
+ r'data-video\s*=\s*(["\'])(?P<data>{.+?})\1', webpage):
+ video = self._parse_json(
+ mobj.group('data'), display_id, transform_source=unescapeHTML,
+ fatal=False)
+ if not video:
+ continue
+ data = try_get(video, lambda x: x['playlist'][0], dict)
+ if not data:
+ continue
+ try:
+ entry = self._extract_entry(data, url)
+ except Exception:
+ continue
+ entry['extractor_key'] = WWEIE.ie_key()
+ entries.append(entry)
+
+ return self.playlist_result(entries, display_id)
diff --git a/yt_dlp/extractor/wykop.py b/yt_dlp/extractor/wykop.py
new file mode 100644
index 0000000..1d29cc8
--- /dev/null
+++ b/yt_dlp/extractor/wykop.py
@@ -0,0 +1,268 @@
+import json
+
+from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ ExtractorError,
+ format_field,
+ parse_iso8601,
+ traverse_obj,
+ url_or_none,
+)
+
+
+class WykopBaseExtractor(InfoExtractor):
+ def _get_token(self, force_refresh=False):
+ if not force_refresh:
+ maybe_cached = self.cache.load('wykop', 'bearer')
+ if maybe_cached:
+ return maybe_cached
+
+ new_token = traverse_obj(
+ self._do_call_api('auth', None, 'Downloading anonymous auth token', data={
+ # hardcoded in frontend
+ 'key': 'w53947240748',
+ 'secret': 'd537d9e0a7adc1510842059ae5316419',
+ }), ('data', 'token'))
+
+ self.cache.store('wykop', 'bearer', new_token)
+ return new_token
+
+ def _do_call_api(self, path, video_id, note='Downloading JSON metadata', data=None, headers={}):
+ if data:
+ data = json.dumps({'data': data}).encode()
+ headers['Content-Type'] = 'application/json'
+
+ return self._download_json(
+ f'https://wykop.pl/api/v3/{path}', video_id,
+ note=note, data=data, headers=headers)
+
+ def _call_api(self, path, video_id, note='Downloading JSON metadata'):
+ token = self._get_token()
+ for retrying in range(2):
+ try:
+ return self._do_call_api(path, video_id, note, headers={'Authorization': f'Bearer {token}'})
+ except ExtractorError as e:
+ if not retrying and isinstance(e.cause, HTTPError) and e.cause.status == 403:
+ token = self._get_token(True)
+ continue
+ raise
+
+ def _common_data_extract(self, data):
+ author = traverse_obj(data, ('author', 'username'), expected_type=str)
+
+ return {
+ '_type': 'url_transparent',
+ 'display_id': data.get('slug'),
+ 'url': traverse_obj(data,
+ ('media', 'embed', 'url'), # what gets an iframe embed
+ ('source', 'url'), # clickable url (dig only)
+ expected_type=url_or_none),
+ 'thumbnail': traverse_obj(
+ data, ('media', 'photo', 'url'), ('media', 'embed', 'thumbnail'), expected_type=url_or_none),
+ 'uploader': author,
+ 'uploader_id': author,
+ 'uploader_url': format_field(author, None, 'https://wykop.pl/ludzie/%s'),
+ 'timestamp': parse_iso8601(data.get('created_at'), delimiter=' '), # time it got submitted
+ 'like_count': traverse_obj(data, ('votes', 'up'), expected_type=int),
+ 'dislike_count': traverse_obj(data, ('votes', 'down'), expected_type=int),
+ 'comment_count': traverse_obj(data, ('comments', 'count'), expected_type=int),
+ 'age_limit': 18 if data.get('adult') else 0,
+ 'tags': data.get('tags'),
+ }
+
+
+class WykopDigIE(WykopBaseExtractor):
+ IE_NAME = 'wykop:dig'
+ _VALID_URL = r'https?://(?:www\.)?wykop\.pl/link/(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'https://wykop.pl/link/6912923/najbardziej-zrzedliwy-kot-na-swiecie-i-frozen-planet-ii-i-bbc-earth',
+ 'info_dict': {
+ 'id': 'rlSTBvViflc',
+ 'ext': 'mp4',
+ 'title': 'Najbardziej zrzędliwy kot na świecie I Frozen Planet II I BBC Earth',
+ 'display_id': 'najbardziej-zrzedliwy-kot-na-swiecie-i-frozen-planet-ii-i-bbc-earth',
+ 'description': 'md5:ac0f87dea1cdcb6b0c53f3612a095c87',
+ 'tags': ['zwierzaczki', 'koty', 'smiesznykotek', 'humor', 'rozrywka', 'ciekawostki'],
+ 'age_limit': 0,
+ 'timestamp': 1669154480,
+ 'release_timestamp': 1669194241,
+ 'release_date': '20221123',
+ 'uploader': 'starnak',
+ 'uploader_id': 'starnak',
+ 'uploader_url': 'https://wykop.pl/ludzie/starnak',
+ 'like_count': int,
+ 'dislike_count': int,
+ 'comment_count': int,
+ 'thumbnail': r're:https?://wykop\.pl/cdn/.+',
+ 'view_count': int,
+ 'channel': 'BBC Earth',
+ 'channel_id': 'UCwmZiChSryoWQCZMIQezgTg',
+ 'channel_url': 'https://www.youtube.com/channel/UCwmZiChSryoWQCZMIQezgTg',
+ 'categories': ['Pets & Animals'],
+ 'upload_date': '20220923',
+ 'duration': 191,
+ 'channel_follower_count': int,
+ 'availability': 'public',
+ 'live_status': 'not_live',
+ 'playable_in_embed': True,
+ },
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return cls._match_valid_url(url) and not WykopDigCommentIE.suitable(url)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ data = self._call_api(f'links/{video_id}', video_id)['data']
+
+ return {
+ **self._common_data_extract(data),
+ 'id': video_id,
+ 'title': data['title'],
+ 'description': data.get('description'),
+ # time it got "digged" to the homepage
+ 'release_timestamp': parse_iso8601(data.get('published_at'), delimiter=' '),
+ }
+
+
+class WykopDigCommentIE(WykopBaseExtractor):
+ IE_NAME = 'wykop:dig:comment'
+ _VALID_URL = r'https?://(?:www\.)?wykop\.pl/link/(?P<dig_id>\d+)/[^/]+/komentarz/(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'https://wykop.pl/link/6992589/strollowal-oszusta-przez-ponad-24-minuty-udawal-naiwniaka-i-nagral-rozmowe/komentarz/114540527/podobna-sytuacja-ponizej-ciekawa-dyskusja-z-oszustem-na-sam-koniec-sam-bylem-w-biurze-swiadkiem-podobnej-rozmowy-niemal-zakonczonej-sukcesem-bandyty-g',
+ 'info_dict': {
+ 'id': 'u6tEi2FmKZY',
+ 'ext': 'mp4',
+ 'title': 'md5:e7c741c5baa7ed6478000caf72865577',
+ 'display_id': 'md5:45b2d12bd0e262d09cc7cf7abc8412db',
+ 'description': 'md5:bcec7983429f9c0630f9deb9d3d1ba5e',
+ 'timestamp': 1674476945,
+ 'uploader': 'Bartholomew',
+ 'uploader_id': 'Bartholomew',
+ 'uploader_url': 'https://wykop.pl/ludzie/Bartholomew',
+ 'thumbnail': r're:https?://wykop\.pl/cdn/.+',
+ 'tags': [],
+ 'availability': 'public',
+ 'duration': 1838,
+ 'upload_date': '20230117',
+ 'categories': ['Entertainment'],
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'comment_count': int,
+ 'channel_follower_count': int,
+ 'playable_in_embed': True,
+ 'live_status': 'not_live',
+ 'age_limit': 0,
+ 'chapters': 'count:3',
+ 'channel': 'Poszukiwacze Okazji',
+ 'channel_id': 'UCzzvJDZThwv06dR4xmzrZBw',
+ 'channel_url': 'https://www.youtube.com/channel/UCzzvJDZThwv06dR4xmzrZBw',
+ },
+ }]
+
+ def _real_extract(self, url):
+ dig_id, comment_id = self._search_regex(self._VALID_URL, url, 'dig and comment ids', group=('dig_id', 'id'))
+ data = self._call_api(f'links/{dig_id}/comments/{comment_id}', comment_id)['data']
+
+ return {
+ **self._common_data_extract(data),
+ 'id': comment_id,
+ 'title': f"{traverse_obj(data, ('author', 'username'))} - {data.get('content') or ''}",
+ 'description': data.get('content'),
+ }
+
+
+class WykopPostIE(WykopBaseExtractor):
+ IE_NAME = 'wykop:post'
+ _VALID_URL = r'https?://(?:www\.)?wykop\.pl/wpis/(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'https://wykop.pl/wpis/68893343/kot-koty-smiesznykotek',
+ 'info_dict': {
+ 'id': 'PL8JMjiUPHUhwc9ZlKa_5IFeBwBV8Xe7jI',
+ 'title': 'PawelW124 - #kot #koty #smiesznykotek',
+ 'description': '#kot #koty #smiesznykotek',
+ 'display_id': 'kot-koty-smiesznykotek',
+ 'tags': ['kot', 'koty', 'smiesznykotek'],
+ 'uploader': 'PawelW124',
+ 'uploader_id': 'PawelW124',
+ 'uploader_url': 'https://wykop.pl/ludzie/PawelW124',
+ 'timestamp': 1668938142,
+ 'age_limit': 0,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'thumbnail': r're:https?://wykop\.pl/cdn/.+',
+ 'comment_count': int,
+ 'channel': 'Revan',
+ 'channel_id': 'UCW9T_-uZoiI7ROARQdTDyOw',
+ 'channel_url': 'https://www.youtube.com/channel/UCW9T_-uZoiI7ROARQdTDyOw',
+ 'upload_date': '20221120',
+ 'modified_date': '20220814',
+ 'availability': 'public',
+ 'view_count': int,
+ },
+ 'playlist_mincount': 15,
+ 'params': {
+ 'flat_playlist': True,
+ }
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return cls._match_valid_url(url) and not WykopPostCommentIE.suitable(url)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ data = self._call_api(f'entries/{video_id}', video_id)['data']
+
+ return {
+ **self._common_data_extract(data),
+ 'id': video_id,
+ 'title': f"{traverse_obj(data, ('author', 'username'))} - {data.get('content') or ''}",
+ 'description': data.get('content'),
+ }
+
+
+class WykopPostCommentIE(WykopBaseExtractor):
+ IE_NAME = 'wykop:post:comment'
+ _VALID_URL = r'https?://(?:www\.)?wykop\.pl/wpis/(?P<post_id>\d+)/[^/#]+#(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'https://wykop.pl/wpis/70084873/test-test-test#249303979',
+ 'info_dict': {
+ 'id': 'confusedquickarmyant',
+ 'ext': 'mp4',
+ 'title': 'tpap - treść komentarza',
+ 'display_id': 'tresc-komentarza',
+ 'description': 'treść komentarza',
+ 'uploader': 'tpap',
+ 'uploader_id': 'tpap',
+ 'uploader_url': 'https://wykop.pl/ludzie/tpap',
+ 'timestamp': 1675349470,
+ 'upload_date': '20230202',
+ 'tags': [],
+ 'duration': 2.12,
+ 'age_limit': 0,
+ 'categories': [],
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'thumbnail': r're:https?://wykop\.pl/cdn/.+',
+ },
+ }]
+
+ def _real_extract(self, url):
+ post_id, comment_id = self._search_regex(self._VALID_URL, url, 'post and comment ids', group=('post_id', 'id'))
+ data = self._call_api(f'entries/{post_id}/comments/{comment_id}', comment_id)['data']
+
+ return {
+ **self._common_data_extract(data),
+ 'id': comment_id,
+ 'title': f"{traverse_obj(data, ('author', 'username'))} - {data.get('content') or ''}",
+ 'description': data.get('content'),
+ }
diff --git a/yt_dlp/extractor/xanimu.py b/yt_dlp/extractor/xanimu.py
new file mode 100644
index 0000000..e0b7bf9
--- /dev/null
+++ b/yt_dlp/extractor/xanimu.py
@@ -0,0 +1,51 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class XanimuIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?xanimu\.com/(?P<id>[^/]+)/?'
+ _TESTS = [{
+ 'url': 'https://xanimu.com/51944-the-princess-the-frog-hentai/',
+ 'md5': '899b88091d753d92dad4cb63bbf357a7',
+ 'info_dict': {
+ 'id': '51944-the-princess-the-frog-hentai',
+ 'ext': 'mp4',
+ 'title': 'The Princess + The Frog Hentai',
+ 'thumbnail': 'https://xanimu.com/storage/2020/09/the-princess-and-the-frog-hentai.jpg',
+ 'description': r're:^Enjoy The Princess \+ The Frog Hentai',
+ 'duration': 207.0,
+ 'age_limit': 18
+ }
+ }, {
+ 'url': 'https://xanimu.com/huge-expansion/',
+ 'only_matching': True
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ formats = []
+ for format in ['videoHigh', 'videoLow']:
+ format_url = self._search_json(r'var\s+%s\s*=' % re.escape(format), webpage, format,
+ video_id, default=None, contains_pattern=r'[\'"]([^\'"]+)[\'"]')
+ if format_url:
+ formats.append({
+ 'url': format_url,
+ 'format_id': format,
+ 'quality': -2 if format.endswith('Low') else None,
+ })
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': self._search_regex(r'[\'"]headline[\'"]:\s*[\'"]([^"]+)[\'"]', webpage,
+ 'title', default=None) or self._html_extract_title(webpage),
+ 'thumbnail': self._html_search_meta('thumbnailUrl', webpage, default=None),
+ 'description': self._html_search_meta('description', webpage, default=None),
+ 'duration': int_or_none(self._search_regex(r'duration:\s*[\'"]([^\'"]+?)[\'"]',
+ webpage, 'duration', fatal=False)),
+ 'age_limit': 18
+ }
diff --git a/yt_dlp/extractor/xboxclips.py b/yt_dlp/extractor/xboxclips.py
new file mode 100644
index 0000000..235b567
--- /dev/null
+++ b/yt_dlp/extractor/xboxclips.py
@@ -0,0 +1,62 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ month_by_abbreviation,
+ parse_filesize,
+ parse_qs,
+)
+
+
+class XboxClipsIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?(?:xboxclips\.com|gameclips\.io)/(?:video\.php\?.*vid=|[^/]+/)(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
+ _TESTS = [{
+ 'url': 'http://xboxclips.com/video.php?uid=2533274823424419&gamertag=Iabdulelah&vid=074a69a9-5faf-46aa-b93b-9909c1720325',
+ 'md5': 'fbe1ec805e920aeb8eced3c3e657df5d',
+ 'info_dict': {
+ 'id': '074a69a9-5faf-46aa-b93b-9909c1720325',
+ 'ext': 'mp4',
+ 'title': 'iAbdulElah playing Titanfall',
+ 'filesize_approx': 26800000,
+ 'upload_date': '20140807',
+ 'duration': 56,
+ }
+ }, {
+ 'url': 'https://gameclips.io/iAbdulElah/074a69a9-5faf-46aa-b93b-9909c1720325',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ if '/video.php' in url:
+ qs = parse_qs(url)
+ url = 'https://gameclips.io/%s/%s' % (qs['gamertag'][0], qs['vid'][0])
+
+ webpage = self._download_webpage(url, video_id)
+ info = self._parse_html5_media_entries(url, webpage, video_id)[0]
+
+ title = self._html_search_meta(['og:title', 'twitter:title'], webpage)
+ upload_date = None
+ mobj = re.search(
+ r'>Recorded: (\d{2})-(Jan|Feb|Mar|Apr|May|Ju[nl]|Aug|Sep|Oct|Nov|Dec)-(\d{4})',
+ webpage)
+ if mobj:
+ upload_date = '%s%.2d%s' % (mobj.group(3), month_by_abbreviation(mobj.group(2)), mobj.group(1))
+ filesize = parse_filesize(self._html_search_regex(
+ r'>Size: ([^<]+)<', webpage, 'file size', fatal=False))
+ duration = int_or_none(self._html_search_regex(
+ r'>Duration: (\d+) Seconds<', webpage, 'duration', fatal=False))
+ view_count = int_or_none(self._html_search_regex(
+ r'>Views: (\d+)<', webpage, 'view count', fatal=False))
+
+ info.update({
+ 'id': video_id,
+ 'title': title,
+ 'upload_date': upload_date,
+ 'filesize_approx': filesize,
+ 'duration': duration,
+ 'view_count': view_count,
+ })
+ return info
diff --git a/yt_dlp/extractor/xfileshare.py b/yt_dlp/extractor/xfileshare.py
new file mode 100644
index 0000000..08c6d6c
--- /dev/null
+++ b/yt_dlp/extractor/xfileshare.py
@@ -0,0 +1,198 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ decode_packed_codes,
+ determine_ext,
+ int_or_none,
+ js_to_json,
+ urlencode_postdata,
+)
+
+
+# based on openload_decode from 2bfeee69b976fe049761dd3012e30b637ee05a58
+def aa_decode(aa_code):
+ symbol_table = [
+ ('7', '((゚ー゚) + (o^_^o))'),
+ ('6', '((o^_^o) +(o^_^o))'),
+ ('5', '((゚ー゚) + (゚Θ゚))'),
+ ('2', '((o^_^o) - (゚Θ゚))'),
+ ('4', '(゚ー゚)'),
+ ('3', '(o^_^o)'),
+ ('1', '(゚Θ゚)'),
+ ('0', '(c^_^o)'),
+ ]
+ delim = '(゚Д゚)[゚ε゚]+'
+ ret = ''
+ for aa_char in aa_code.split(delim):
+ for val, pat in symbol_table:
+ aa_char = aa_char.replace(pat, val)
+ aa_char = aa_char.replace('+ ', '')
+ m = re.match(r'^\d+', aa_char)
+ if m:
+ ret += chr(int(m.group(0), 8))
+ else:
+ m = re.match(r'^u([\da-f]+)', aa_char)
+ if m:
+ ret += chr(int(m.group(1), 16))
+ return ret
+
+
+class XFileShareIE(InfoExtractor):
+ _SITES = (
+ (r'aparat\.cam', 'Aparat'),
+ (r'clipwatching\.com', 'ClipWatching'),
+ (r'gounlimited\.to', 'GoUnlimited'),
+ (r'govid\.me', 'GoVid'),
+ (r'holavid\.com', 'HolaVid'),
+ (r'streamty\.com', 'Streamty'),
+ (r'thevideobee\.to', 'TheVideoBee'),
+ (r'uqload\.com', 'Uqload'),
+ (r'vidbom\.com', 'VidBom'),
+ (r'vidlo\.us', 'vidlo'),
+ (r'vidlocker\.xyz', 'VidLocker'),
+ (r'vidshare\.tv', 'VidShare'),
+ (r'vup\.to', 'VUp'),
+ (r'wolfstream\.tv', 'WolfStream'),
+ (r'xvideosharing\.com', 'XVideoSharing'),
+ )
+
+ IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1])
+ _VALID_URL = (r'https?://(?:www\.)?(?P<host>%s)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
+ % '|'.join(site for site in list(zip(*_SITES))[0]))
+ _EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:%s)/embed-[0-9a-zA-Z]+.*?)\1' % '|'.join(site for site in list(zip(*_SITES))[0])]
+
+ _FILE_NOT_FOUND_REGEXES = (
+ r'>(?:404 - )?File Not Found<',
+ r'>The file was removed by administrator<',
+ )
+
+ _TESTS = [{
+ 'url': 'https://uqload.com/dltx1wztngdz',
+ 'md5': '3cfbb65e4c90e93d7b37bcb65a595557',
+ 'info_dict': {
+ 'id': 'dltx1wztngdz',
+ 'ext': 'mp4',
+ 'title': 'Rick Astley Never Gonna Give You mp4',
+ 'thumbnail': r're:https://.*\.jpg'
+ }
+ }, {
+ 'url': 'http://xvideosharing.com/fq65f94nd2ve',
+ 'md5': '4181f63957e8fe90ac836fa58dc3c8a6',
+ 'info_dict': {
+ 'id': 'fq65f94nd2ve',
+ 'ext': 'mp4',
+ 'title': 'sample',
+ 'thumbnail': r're:http://.*\.jpg',
+ },
+ }, {
+ 'url': 'https://aparat.cam/n4d6dh0wvlpr',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://wolfstream.tv/nthme29v9u2x',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ host, video_id = self._match_valid_url(url).groups()
+
+ url = 'https://%s/' % host + ('embed-%s.html' % video_id if host in ('govid.me', 'vidlo.us') else video_id)
+ webpage = self._download_webpage(url, video_id)
+
+ if any(re.search(p, webpage) for p in self._FILE_NOT_FOUND_REGEXES):
+ raise ExtractorError('Video %s does not exist' % video_id, expected=True)
+
+ fields = self._hidden_inputs(webpage)
+
+ if fields.get('op') == 'download1':
+ countdown = int_or_none(self._search_regex(
+ r'<span id="countdown_str">(?:[Ww]ait)?\s*<span id="cxc">(\d+)</span>\s*(?:seconds?)?</span>',
+ webpage, 'countdown', default=None))
+ if countdown:
+ self._sleep(countdown, video_id)
+
+ webpage = self._download_webpage(
+ url, video_id, 'Downloading video page',
+ data=urlencode_postdata(fields), headers={
+ 'Referer': url,
+ 'Content-type': 'application/x-www-form-urlencoded',
+ })
+
+ title = (self._search_regex(
+ (r'style="z-index: [0-9]+;">([^<]+)</span>',
+ r'<td nowrap>([^<]+)</td>',
+ r'h4-fine[^>]*>([^<]+)<',
+ r'>Watch (.+)[ <]',
+ r'<h2 class="video-page-head">([^<]+)</h2>',
+ r'<h2 style="[^"]*color:#403f3d[^"]*"[^>]*>([^<]+)<', # streamin.to
+ r'title\s*:\s*"([^"]+)"'), # govid.me
+ webpage, 'title', default=None) or self._og_search_title(
+ webpage, default=None) or video_id).strip()
+
+ for regex, func in (
+ (r'(eval\(function\(p,a,c,k,e,d\){.+)', decode_packed_codes),
+ (r'(゚.+)', aa_decode)):
+ obf_code = self._search_regex(regex, webpage, 'obfuscated code', default=None)
+ if obf_code:
+ webpage = webpage.replace(obf_code, func(obf_code))
+
+ formats = []
+
+ jwplayer_data = self._search_regex(
+ [
+ r'jwplayer\("[^"]+"\)\.load\(\[({.+?})\]\);',
+ r'jwplayer\("[^"]+"\)\.setup\(({.+?})\);',
+ ], webpage,
+ 'jwplayer data', default=None)
+ if jwplayer_data:
+ jwplayer_data = self._parse_json(
+ jwplayer_data.replace(r"\'", "'"), video_id, js_to_json)
+ if jwplayer_data:
+ formats = self._parse_jwplayer_data(
+ jwplayer_data, video_id, False,
+ m3u8_id='hls', mpd_id='dash')['formats']
+
+ if not formats:
+ urls = []
+ for regex in (
+ r'(?:file|src)\s*:\s*(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1',
+ r'file_link\s*=\s*(["\'])(?P<url>http(?:(?!\1).)+)\1',
+ r'addVariable\((\\?["\'])file\1\s*,\s*(\\?["\'])(?P<url>http(?:(?!\2).)+)\2\)',
+ r'<embed[^>]+src=(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1'):
+ for mobj in re.finditer(regex, webpage):
+ video_url = mobj.group('url')
+ if video_url not in urls:
+ urls.append(video_url)
+
+ sources = self._search_regex(
+ r'sources\s*:\s*(\[(?!{)[^\]]+\])', webpage, 'sources', default=None)
+ if sources:
+ urls.extend(self._parse_json(sources, video_id))
+
+ formats = []
+ for video_url in urls:
+ if determine_ext(video_url) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls',
+ fatal=False))
+ else:
+ formats.append({
+ 'url': video_url,
+ 'format_id': 'sd',
+ })
+
+ thumbnail = self._search_regex(
+ [
+ r'<video[^>]+poster="([^"]+)"',
+ r'(?:image|poster)\s*:\s*["\'](http[^"\']+)["\'],',
+ ], webpage, 'thumbnail', default=None)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'formats': formats,
+ 'http_headers': {'Referer': url}
+ }
diff --git a/yt_dlp/extractor/xhamster.py b/yt_dlp/extractor/xhamster.py
new file mode 100644
index 0000000..01ac5dd
--- /dev/null
+++ b/yt_dlp/extractor/xhamster.py
@@ -0,0 +1,465 @@
+import itertools
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ clean_html,
+ determine_ext,
+ dict_get,
+ extract_attributes,
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ parse_duration,
+ str_or_none,
+ try_get,
+ unified_strdate,
+ url_or_none,
+ urljoin,
+)
+
+
+class XHamsterIE(InfoExtractor):
+ _DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com|xhday\.com|xhvid\.com)'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:[^/?#]+\.)?%s/
+ (?:
+ movies/(?P<id>[\dA-Za-z]+)/(?P<display_id>[^/]*)\.html|
+ videos/(?P<display_id_2>[^/]*)-(?P<id_2>[\dA-Za-z]+)
+ )
+ ''' % _DOMAINS
+ _TESTS = [{
+ 'url': 'https://xhamster.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
+ 'md5': '34e1ab926db5dc2750fed9e1f34304bb',
+ 'info_dict': {
+ 'id': '1509445',
+ 'display_id': 'femaleagent-shy-beauty-takes-the-bait',
+ 'ext': 'mp4',
+ 'title': 'FemaleAgent Shy beauty takes the bait',
+ 'timestamp': 1350194821,
+ 'upload_date': '20121014',
+ 'uploader': 'Ruseful2011',
+ 'uploader_id': 'ruseful2011',
+ 'duration': 893,
+ 'age_limit': 18,
+ },
+ }, {
+ 'url': 'https://xhamster.com/videos/britney-spears-sexy-booty-2221348?hd=',
+ 'info_dict': {
+ 'id': '2221348',
+ 'display_id': 'britney-spears-sexy-booty',
+ 'ext': 'mp4',
+ 'title': 'Britney Spears Sexy Booty',
+ 'timestamp': 1379123460,
+ 'upload_date': '20130914',
+ 'uploader': 'jojo747400',
+ 'duration': 200,
+ 'age_limit': 18,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # empty seo, unavailable via new URL schema
+ 'url': 'http://xhamster.com/movies/5667973/.html',
+ 'info_dict': {
+ 'id': '5667973',
+ 'ext': 'mp4',
+ 'title': '....',
+ 'timestamp': 1454948101,
+ 'upload_date': '20160208',
+ 'uploader': 'parejafree',
+ 'uploader_id': 'parejafree',
+ 'duration': 72,
+ 'age_limit': 18,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # mobile site
+ 'url': 'https://m.xhamster.com/videos/cute-teen-jacqueline-solo-masturbation-8559111',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://xhamster.com/movies/2272726/amber_slayed_by_the_knight.html',
+ 'only_matching': True,
+ }, {
+ # This video is visible for marcoalfa123456's friends only
+ 'url': 'https://it.xhamster.com/movies/7263980/la_mia_vicina.html',
+ 'only_matching': True,
+ }, {
+ # new URL schema
+ 'url': 'https://pt.xhamster.com/videos/euro-pedal-pumping-7937821',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://xhamster.one/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://xhamster.desi/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://xhamster2.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://xhamster11.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://xhamster26.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://de.xhamster.com/videos/skinny-girl-fucks-herself-hard-in-the-forest-xhnBJZx',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://xhday.com/videos/strapless-threesome-xhh7yVf',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://xhvid.com/videos/lk-mm-xhc6wn6',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id') or mobj.group('id_2')
+ display_id = mobj.group('display_id') or mobj.group('display_id_2')
+
+ desktop_url = re.sub(r'^(https?://(?:.+?\.)?)m\.', r'\1', url)
+ webpage, urlh = self._download_webpage_handle(desktop_url, video_id)
+
+ error = self._html_search_regex(
+ r'<div[^>]+id=["\']videoClosed["\'][^>]*>(.+?)</div>',
+ webpage, 'error', default=None)
+ if error:
+ raise ExtractorError(error, expected=True)
+
+ age_limit = self._rta_search(webpage)
+
+ def get_height(s):
+ return int_or_none(self._search_regex(
+ r'^(\d+)[pP]', s, 'height', default=None))
+
+ initials = self._parse_json(
+ self._search_regex(
+ (r'window\.initials\s*=\s*({.+?})\s*;\s*</script>',
+ r'window\.initials\s*=\s*({.+?})\s*;'), webpage, 'initials',
+ default='{}'),
+ video_id, fatal=False)
+ if initials:
+ video = initials['videoModel']
+ title = video['title']
+ formats = []
+ format_urls = set()
+ format_sizes = {}
+ sources = try_get(video, lambda x: x['sources'], dict) or {}
+ for format_id, formats_dict in sources.items():
+ if not isinstance(formats_dict, dict):
+ continue
+ download_sources = try_get(sources, lambda x: x['download'], dict) or {}
+ for quality, format_dict in download_sources.items():
+ if not isinstance(format_dict, dict):
+ continue
+ format_sizes[quality] = float_or_none(format_dict.get('size'))
+ for quality, format_item in formats_dict.items():
+ if format_id == 'download':
+ # Download link takes some time to be generated,
+ # skipping for now
+ continue
+ format_url = format_item
+ format_url = url_or_none(format_url)
+ if not format_url or format_url in format_urls:
+ continue
+ format_urls.add(format_url)
+ formats.append({
+ 'format_id': '%s-%s' % (format_id, quality),
+ 'url': format_url,
+ 'ext': determine_ext(format_url, 'mp4'),
+ 'height': get_height(quality),
+ 'filesize': format_sizes.get(quality),
+ 'http_headers': {
+ 'Referer': urlh.url,
+ },
+ })
+ xplayer_sources = try_get(
+ initials, lambda x: x['xplayerSettings']['sources'], dict)
+ if xplayer_sources:
+ hls_sources = xplayer_sources.get('hls')
+ if isinstance(hls_sources, dict):
+ for hls_format_key in ('url', 'fallback'):
+ hls_url = hls_sources.get(hls_format_key)
+ if not hls_url:
+ continue
+ hls_url = urljoin(url, hls_url)
+ if not hls_url or hls_url in format_urls:
+ continue
+ format_urls.add(hls_url)
+ formats.extend(self._extract_m3u8_formats(
+ hls_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ standard_sources = xplayer_sources.get('standard')
+ if isinstance(standard_sources, dict):
+ for format_id, formats_list in standard_sources.items():
+ if not isinstance(formats_list, list):
+ continue
+ for standard_format in formats_list:
+ if not isinstance(standard_format, dict):
+ continue
+ for standard_format_key in ('url', 'fallback'):
+ standard_url = standard_format.get(standard_format_key)
+ if not standard_url:
+ continue
+ standard_url = urljoin(url, standard_url)
+ if not standard_url or standard_url in format_urls:
+ continue
+ format_urls.add(standard_url)
+ ext = determine_ext(standard_url, 'mp4')
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ standard_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ continue
+ quality = (str_or_none(standard_format.get('quality'))
+ or str_or_none(standard_format.get('label'))
+ or '')
+ formats.append({
+ 'format_id': '%s-%s' % (format_id, quality),
+ 'url': standard_url,
+ 'ext': ext,
+ 'height': get_height(quality),
+ 'filesize': format_sizes.get(quality),
+ 'http_headers': {
+ 'Referer': standard_url,
+ },
+ })
+
+ categories_list = video.get('categories')
+ if isinstance(categories_list, list):
+ categories = []
+ for c in categories_list:
+ if not isinstance(c, dict):
+ continue
+ c_name = c.get('name')
+ if isinstance(c_name, compat_str):
+ categories.append(c_name)
+ else:
+ categories = None
+
+ uploader_url = url_or_none(try_get(video, lambda x: x['author']['pageURL']))
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': video.get('description'),
+ 'timestamp': int_or_none(video.get('created')),
+ 'uploader': try_get(
+ video, lambda x: x['author']['name'], compat_str),
+ 'uploader_url': uploader_url,
+ 'uploader_id': uploader_url.split('/')[-1] if uploader_url else None,
+ 'thumbnail': video.get('thumbURL'),
+ 'duration': int_or_none(video.get('duration')),
+ 'view_count': int_or_none(video.get('views')),
+ 'like_count': int_or_none(try_get(
+ video, lambda x: x['rating']['likes'], int)),
+ 'dislike_count': int_or_none(try_get(
+ video, lambda x: x['rating']['dislikes'], int)),
+ 'comment_count': int_or_none(video.get('views')),
+ 'age_limit': age_limit if age_limit is not None else 18,
+ 'categories': categories,
+ 'formats': formats,
+ }
+
+ # Old layout fallback
+
+ title = self._html_search_regex(
+ [r'<h1[^>]*>([^<]+)</h1>',
+ r'<meta[^>]+itemprop=".*?caption.*?"[^>]+content="(.+?)"',
+ r'<title[^>]*>(.+?)(?:,\s*[^,]*?\s*Porn\s*[^,]*?:\s*xHamster[^<]*| - xHamster\.com)</title>'],
+ webpage, 'title')
+
+ formats = []
+ format_urls = set()
+
+ sources = self._parse_json(
+ self._search_regex(
+ r'sources\s*:\s*({.+?})\s*,?\s*\n', webpage, 'sources',
+ default='{}'),
+ video_id, fatal=False)
+ for format_id, format_url in sources.items():
+ format_url = url_or_none(format_url)
+ if not format_url:
+ continue
+ if format_url in format_urls:
+ continue
+ format_urls.add(format_url)
+ formats.append({
+ 'format_id': format_id,
+ 'url': format_url,
+ 'height': get_height(format_id),
+ })
+
+ video_url = self._search_regex(
+ [r'''file\s*:\s*(?P<q>["'])(?P<mp4>.+?)(?P=q)''',
+ r'''<a\s+href=(?P<q>["'])(?P<mp4>.+?)(?P=q)\s+class=["']mp4Thumb''',
+ r'''<video[^>]+file=(?P<q>["'])(?P<mp4>.+?)(?P=q)[^>]*>'''],
+ webpage, 'video url', group='mp4', default=None)
+ if video_url and video_url not in format_urls:
+ formats.append({
+ 'url': video_url,
+ })
+
+ # Only a few videos have an description
+ mobj = re.search(r'<span>Description: </span>([^<]+)', webpage)
+ description = mobj.group(1) if mobj else None
+
+ upload_date = unified_strdate(self._search_regex(
+ r'hint=["\'](\d{4}-\d{2}-\d{2}) \d{2}:\d{2}:\d{2} [A-Z]{3,4}',
+ webpage, 'upload date', fatal=False))
+
+ uploader = self._html_search_regex(
+ r'<span[^>]+itemprop=["\']author[^>]+><a[^>]+><span[^>]+>([^<]+)',
+ webpage, 'uploader', default='anonymous')
+
+ thumbnail = self._search_regex(
+ [r'''["']thumbUrl["']\s*:\s*(?P<q>["'])(?P<thumbnail>.+?)(?P=q)''',
+ r'''<video[^>]+"poster"=(?P<q>["'])(?P<thumbnail>.+?)(?P=q)[^>]*>'''],
+ webpage, 'thumbnail', fatal=False, group='thumbnail')
+
+ duration = parse_duration(self._search_regex(
+ [r'<[^<]+\bitemprop=["\']duration["\'][^<]+\bcontent=["\'](.+?)["\']',
+ r'Runtime:\s*</span>\s*([\d:]+)'], webpage,
+ 'duration', fatal=False))
+
+ view_count = int_or_none(self._search_regex(
+ r'content=["\']User(?:View|Play)s:(\d+)',
+ webpage, 'view count', fatal=False))
+
+ mobj = re.search(r'hint=[\'"](?P<likecount>\d+) Likes / (?P<dislikecount>\d+) Dislikes', webpage)
+ (like_count, dislike_count) = (mobj.group('likecount'), mobj.group('dislikecount')) if mobj else (None, None)
+
+ mobj = re.search(r'</label>Comments \((?P<commentcount>\d+)\)</div>', webpage)
+ comment_count = mobj.group('commentcount') if mobj else 0
+
+ categories_html = self._search_regex(
+ r'(?s)<table.+?(<span>Categories:.+?)</table>', webpage,
+ 'categories', default=None)
+ categories = [clean_html(category) for category in re.findall(
+ r'<a[^>]+>(.+?)</a>', categories_html)] if categories_html else None
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'upload_date': upload_date,
+ 'uploader': uploader,
+ 'uploader_id': uploader.lower() if uploader else None,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'like_count': int_or_none(like_count),
+ 'dislike_count': int_or_none(dislike_count),
+ 'comment_count': int_or_none(comment_count),
+ 'age_limit': age_limit,
+ 'categories': categories,
+ 'formats': formats,
+ }
+
+
+class XHamsterEmbedIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[^/?#]+\.)?%s/xembed\.php\?video=(?P<id>\d+)' % XHamsterIE._DOMAINS
+ _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?xhamster\.com/xembed\.php\?video=\d+)\1']
+ _TEST = {
+ 'url': 'http://xhamster.com/xembed.php?video=3328539',
+ 'info_dict': {
+ 'id': '3328539',
+ 'ext': 'mp4',
+ 'title': 'Pen Masturbation',
+ 'timestamp': 1406581861,
+ 'upload_date': '20140728',
+ 'uploader': 'ManyakisArt',
+ 'duration': 5,
+ 'age_limit': 18,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ video_url = self._search_regex(
+ r'href="(https?://xhamster\.com/(?:movies/{0}/[^"]*\.html|videos/[^/]*-{0})[^"]*)"'.format(video_id),
+ webpage, 'xhamster url', default=None)
+
+ if not video_url:
+ vars = self._parse_json(
+ self._search_regex(r'vars\s*:\s*({.+?})\s*,\s*\n', webpage, 'vars'),
+ video_id)
+ video_url = dict_get(vars, ('downloadLink', 'homepageLink', 'commentsLink', 'shareUrl'))
+
+ return self.url_result(video_url, 'XHamster')
+
+
+class XHamsterUserIE(InfoExtractor):
+ _VALID_URL = rf'https?://(?:[^/?#]+\.)?{XHamsterIE._DOMAINS}/(?:(?P<user>users)|creators)/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ # Paginated user profile
+ 'url': 'https://xhamster.com/users/netvideogirls/videos',
+ 'info_dict': {
+ 'id': 'netvideogirls',
+ },
+ 'playlist_mincount': 267,
+ }, {
+ # Non-paginated user profile
+ 'url': 'https://xhamster.com/users/firatkaan/videos',
+ 'info_dict': {
+ 'id': 'firatkaan',
+ },
+ 'playlist_mincount': 1,
+ }, {
+ 'url': 'https://xhamster.com/creators/squirt-orgasm-69',
+ 'info_dict': {
+ 'id': 'squirt-orgasm-69',
+ },
+ 'playlist_mincount': 150,
+ }, {
+ 'url': 'https://xhday.com/users/mobhunter',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://xhvid.com/users/pelushe21',
+ 'only_matching': True,
+ }]
+
+ def _entries(self, user_id, is_user):
+ prefix, suffix = ('users', 'videos') if is_user else ('creators', 'exclusive')
+ next_page_url = f'https://xhamster.com/{prefix}/{user_id}/{suffix}/1'
+ for pagenum in itertools.count(1):
+ page = self._download_webpage(
+ next_page_url, user_id, 'Downloading page %s' % pagenum)
+ for video_tag in re.findall(
+ r'(<a[^>]+class=["\'].*?\bvideo-thumb__image-container[^>]+>)',
+ page):
+ video = extract_attributes(video_tag)
+ video_url = url_or_none(video.get('href'))
+ if not video_url or not XHamsterIE.suitable(video_url):
+ continue
+ video_id = XHamsterIE._match_id(video_url)
+ yield self.url_result(
+ video_url, ie=XHamsterIE.ie_key(), video_id=video_id)
+ mobj = re.search(r'<a[^>]+data-page=["\']next[^>]+>', page)
+ if not mobj:
+ break
+ next_page = extract_attributes(mobj.group(0))
+ next_page_url = url_or_none(next_page.get('href'))
+ if not next_page_url:
+ break
+
+ def _real_extract(self, url):
+ user, user_id = self._match_valid_url(url).group('user', 'id')
+ return self.playlist_result(self._entries(user_id, bool(user)), user_id)
diff --git a/yt_dlp/extractor/ximalaya.py b/yt_dlp/extractor/ximalaya.py
new file mode 100644
index 0000000..c98c8a4
--- /dev/null
+++ b/yt_dlp/extractor/ximalaya.py
@@ -0,0 +1,167 @@
+import math
+
+from .common import InfoExtractor
+from ..utils import InAdvancePagedList, str_or_none, traverse_obj, try_call
+
+
+class XimalayaBaseIE(InfoExtractor):
+ _GEO_COUNTRIES = ['CN']
+
+
+class XimalayaIE(XimalayaBaseIE):
+ IE_NAME = 'ximalaya'
+ IE_DESC = '喜马拉雅FM'
+ _VALID_URL = r'https?://(?:www\.|m\.)?ximalaya\.com/(:?(?P<uid>\d+)/)?sound/(?P<id>[0-9]+)'
+ _TESTS = [
+ {
+ 'url': 'http://www.ximalaya.com/sound/47740352/',
+ 'info_dict': {
+ 'id': '47740352',
+ 'ext': 'm4a',
+ 'uploader': '小彬彬爱听书',
+ 'uploader_id': '61425525',
+ 'uploader_url': 'http://www.ximalaya.com/zhubo/61425525/',
+ 'title': '261.唐诗三百首.卷八.送孟浩然之广陵.李白',
+ 'description': "contains:《送孟浩然之广陵》\n作者:李白\n故人西辞黄鹤楼,烟花三月下扬州。\n孤帆远影碧空尽,惟见长江天际流。",
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'thumbnails': [
+ {
+ 'name': 'cover_url',
+ 'url': r're:^https?://.*\.jpg',
+ },
+ {
+ 'name': 'cover_url_142',
+ 'url': r're:^https?://.*\.jpg',
+ 'width': 180,
+ 'height': 180
+ }
+ ],
+ 'categories': ['其他'],
+ 'duration': 93,
+ 'view_count': int,
+ 'like_count': int,
+ }
+ },
+ {
+ 'url': 'http://m.ximalaya.com/61425525/sound/47740352/',
+ 'info_dict': {
+ 'id': '47740352',
+ 'ext': 'm4a',
+ 'uploader': '小彬彬爱听书',
+ 'uploader_id': '61425525',
+ 'uploader_url': 'http://www.ximalaya.com/zhubo/61425525/',
+ 'title': '261.唐诗三百首.卷八.送孟浩然之广陵.李白',
+ 'description': "contains:《送孟浩然之广陵》\n作者:李白\n故人西辞黄鹤楼,烟花三月下扬州。\n孤帆远影碧空尽,惟见长江天际流。",
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'thumbnails': [
+ {
+ 'name': 'cover_url',
+ 'url': r're:^https?://.*\.jpg',
+ },
+ {
+ 'name': 'cover_url_142',
+ 'url': r're:^https?://.*\.jpg',
+ 'width': 180,
+ 'height': 180
+ }
+ ],
+ 'categories': ['人文'],
+ 'duration': 93,
+ 'view_count': int,
+ 'like_count': int,
+ }
+ }
+ ]
+
+ def _real_extract(self, url):
+ scheme = 'https' if url.startswith('https') else 'http'
+
+ audio_id = self._match_id(url)
+ audio_info_file = '%s://m.ximalaya.com/tracks/%s.json' % (scheme, audio_id)
+ audio_info = self._download_json(audio_info_file, audio_id,
+ 'Downloading info json %s' % audio_info_file,
+ 'Unable to download info file')
+
+ formats = [{
+ 'format_id': f'{bps}k',
+ 'url': audio_info[k],
+ 'abr': bps,
+ 'vcodec': 'none'
+ } for bps, k in ((24, 'play_path_32'), (64, 'play_path_64')) if audio_info.get(k)]
+
+ thumbnails = []
+ for k in audio_info.keys():
+ # cover pics kyes like: cover_url', 'cover_url_142'
+ if k.startswith('cover_url'):
+ thumbnail = {'name': k, 'url': audio_info[k]}
+ if k == 'cover_url_142':
+ thumbnail['width'] = 180
+ thumbnail['height'] = 180
+ thumbnails.append(thumbnail)
+
+ audio_uploader_id = audio_info.get('uid')
+
+ audio_description = try_call(
+ lambda: audio_info['intro'].replace('\r\n\r\n\r\n ', '\n').replace('\r\n', '\n'))
+
+ return {
+ 'id': audio_id,
+ 'uploader': audio_info.get('nickname'),
+ 'uploader_id': str_or_none(audio_uploader_id),
+ 'uploader_url': f'{scheme}://www.ximalaya.com/zhubo/{audio_uploader_id}/' if audio_uploader_id else None,
+ 'title': audio_info['title'],
+ 'thumbnails': thumbnails,
+ 'description': audio_description,
+ 'categories': list(filter(None, [audio_info.get('category_name')])),
+ 'duration': audio_info.get('duration'),
+ 'view_count': audio_info.get('play_count'),
+ 'like_count': audio_info.get('favorites_count'),
+ 'formats': formats,
+ }
+
+
+class XimalayaAlbumIE(XimalayaBaseIE):
+ IE_NAME = 'ximalaya:album'
+ IE_DESC = '喜马拉雅FM 专辑'
+ _VALID_URL = r'https?://(?:www\.|m\.)?ximalaya\.com/(?:\d+/)?album/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'http://www.ximalaya.com/61425525/album/5534601/',
+ 'info_dict': {
+ 'title': '唐诗三百首(含赏析)',
+ 'id': '5534601',
+ },
+ 'playlist_mincount': 323,
+ }, {
+ 'url': 'https://www.ximalaya.com/album/6912905',
+ 'info_dict': {
+ 'title': '埃克哈特《修炼当下的力量》',
+ 'id': '6912905',
+ },
+ 'playlist_mincount': 41,
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ first_page = self._fetch_page(playlist_id, 1)
+ page_count = math.ceil(first_page['trackTotalCount'] / first_page['pageSize'])
+
+ entries = InAdvancePagedList(
+ lambda idx: self._get_entries(self._fetch_page(playlist_id, idx + 1) if idx else first_page),
+ page_count, first_page['pageSize'])
+
+ title = traverse_obj(first_page, ('tracks', 0, 'albumTitle'), expected_type=str)
+
+ return self.playlist_result(entries, playlist_id, title)
+
+ def _fetch_page(self, playlist_id, page_idx):
+ return self._download_json(
+ 'https://www.ximalaya.com/revision/album/v1/getTracksList',
+ playlist_id, note=f'Downloading tracks list page {page_idx}',
+ query={'albumId': playlist_id, 'pageNum': page_idx})['data']
+
+ def _get_entries(self, page_data):
+ for e in page_data['tracks']:
+ yield self.url_result(
+ self._proto_relative_url(f'//www.ximalaya.com{e["url"]}'),
+ XimalayaIE, e.get('trackId'), e.get('title'))
diff --git a/yt_dlp/extractor/xinpianchang.py b/yt_dlp/extractor/xinpianchang.py
new file mode 100644
index 0000000..bd67e8b
--- /dev/null
+++ b/yt_dlp/extractor/xinpianchang.py
@@ -0,0 +1,92 @@
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ str_or_none,
+ try_get,
+ update_url_query,
+ url_or_none,
+)
+
+
+class XinpianchangIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://www\.xinpianchang\.com/(?P<id>[^/]+?)(?:\D|$)'
+ IE_NAME = 'xinpianchang'
+ IE_DESC = 'xinpianchang.com'
+ _TESTS = [{
+ 'url': 'https://www.xinpianchang.com/a11766551',
+ 'info_dict': {
+ 'id': 'a11766551',
+ 'ext': 'mp4',
+ 'title': '北京2022冬奥会闭幕式再见短片-冰墩墩下班了',
+ 'description': 'md5:4a730c10639a82190fabe921c0fa4b87',
+ 'duration': 151,
+ 'thumbnail': r're:^https?://oss-xpc0\.xpccdn\.com.+/assets/',
+ 'uploader': '正时文创',
+ 'uploader_id': '10357277',
+ 'categories': ['宣传片', '国家城市', '广告', '其他'],
+ 'tags': ['北京冬奥会', '冰墩墩', '再见', '告别', '冰墩墩哭了', '感动', '闭幕式', '熄火']
+ },
+ }, {
+ 'url': 'https://www.xinpianchang.com/a11762904',
+ 'info_dict': {
+ 'id': 'a11762904',
+ 'ext': 'mp4',
+ 'title': '冬奥会决胜时刻《法国派出三只鸡?》',
+ 'description': 'md5:55cb139ef8f48f0c877932d1f196df8b',
+ 'duration': 136,
+ 'thumbnail': r're:^https?://oss-xpc0\.xpccdn\.com.+/assets/',
+ 'uploader': '精品动画',
+ 'uploader_id': '10858927',
+ 'categories': ['动画', '三维CG'],
+ 'tags': ['France Télévisions', '法国3台', '蠢萌', '冬奥会']
+ },
+ }, {
+ 'url': 'https://www.xinpianchang.com/a11779743?from=IndexPick&part=%E7%BC%96%E8%BE%91%E7%B2%BE%E9%80%89&index=2',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id=video_id)
+ domain = self.find_value_with_regex(var='requireNewDomain', webpage=webpage)
+ vid = self.find_value_with_regex(var='vid', webpage=webpage)
+ app_key = self.find_value_with_regex(var='modeServerAppKey', webpage=webpage)
+ api = update_url_query(f'{domain}/mod/api/v2/media/{vid}', {'appKey': app_key})
+ data = self._download_json(api, video_id=video_id)['data']
+ formats, subtitles = [], {}
+ for k, v in data.get('resource').items():
+ if k in ('dash', 'hls'):
+ v_url = v.get('url')
+ if not v_url:
+ continue
+ if k == 'dash':
+ fmts, subs = self._extract_mpd_formats_and_subtitles(v_url, video_id=video_id)
+ elif k == 'hls':
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(v_url, video_id=video_id)
+ formats.extend(fmts)
+ subtitles = self._merge_subtitles(subtitles, subs)
+ elif k == 'progressive':
+ formats.extend([{
+ 'url': url_or_none(prog.get('url')),
+ 'width': int_or_none(prog.get('width')),
+ 'height': int_or_none(prog.get('height')),
+ 'ext': 'mp4',
+ } for prog in v if prog.get('url') or []])
+
+ return {
+ 'id': video_id,
+ 'title': data.get('title'),
+ 'description': data.get('description'),
+ 'duration': int_or_none(data.get('duration')),
+ 'categories': data.get('categories'),
+ 'tags': data.get('keywords'),
+ 'thumbnail': data.get('cover'),
+ 'uploader': try_get(data, lambda x: x['owner']['username']),
+ 'uploader_id': str_or_none(try_get(data, lambda x: x['owner']['id'])),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+ def find_value_with_regex(self, var, webpage):
+ return self._search_regex(rf'var\s{var}\s=\s\"(?P<vid>[^\"]+)\"', webpage, name=var)
diff --git a/yt_dlp/extractor/xminus.py b/yt_dlp/extractor/xminus.py
new file mode 100644
index 0000000..37e3104
--- /dev/null
+++ b/yt_dlp/extractor/xminus.py
@@ -0,0 +1,77 @@
+import re
+import time
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_ord,
+)
+from ..utils import (
+ int_or_none,
+ parse_duration,
+)
+
+
+class XMinusIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://(?:www\.)?x-minus\.org/track/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://x-minus.org/track/4542/%D0%BF%D0%B5%D1%81%D0%B5%D0%BD%D0%BA%D0%B0-%D1%88%D0%BE%D1%84%D0%B5%D1%80%D0%B0.html',
+ 'md5': '401a15f2d2dcf6d592cb95528d72a2a8',
+ 'info_dict': {
+ 'id': '4542',
+ 'ext': 'mp3',
+ 'title': 'Леонид Агутин-Песенка шофёра',
+ 'duration': 156,
+ 'tbr': 320,
+ 'filesize_approx': 5900000,
+ 'view_count': int,
+ 'description': 'md5:03238c5b663810bc79cf42ef3c03e371',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ artist = self._html_search_regex(
+ r'<a[^>]+href="/artist/\d+">([^<]+)</a>', webpage, 'artist')
+ title = artist + '-' + self._html_search_regex(
+ r'<span[^>]+class="minustrack-full-title(?:\s+[^"]+)?"[^>]*>([^<]+)', webpage, 'title')
+ duration = parse_duration(self._html_search_regex(
+ r'<span[^>]+class="player-duration(?:\s+[^"]+)?"[^>]*>([^<]+)',
+ webpage, 'duration', fatal=False))
+ mobj = re.search(
+ r'<div[^>]+class="dw-info(?:\s+[^"]+)?"[^>]*>(?P<tbr>\d+)\s*кбит/c\s+(?P<filesize>[0-9.]+)\s*мб</div>',
+ webpage)
+ tbr = filesize_approx = None
+ if mobj:
+ filesize_approx = float(mobj.group('filesize')) * 1000000
+ tbr = float(mobj.group('tbr'))
+ view_count = int_or_none(self._html_search_regex(
+ r'<span><[^>]+class="icon-chart-bar".*?>(\d+)</span>',
+ webpage, 'view count', fatal=False))
+ description = self._html_search_regex(
+ r'(?s)<pre[^>]+id="lyrics-original"[^>]*>(.*?)</pre>',
+ webpage, 'song lyrics', fatal=False)
+ if description:
+ description = re.sub(' *\r *', '\n', description)
+
+ k = self._search_regex(
+ r'<div[^>]+id="player-bottom"[^>]+data-k="([^"]+)">', webpage,
+ 'encoded data')
+ h = time.time() / 3600
+ a = sum(map(int, [compat_ord(c) for c in k])) + int(video_id) + h
+ video_url = 'http://x-minus.me/dl/minus?id=%s&tkn2=%df%d' % (video_id, a, h)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'url': video_url,
+ # The extension is unknown until actual downloading
+ 'ext': 'mp3',
+ 'duration': duration,
+ 'filesize_approx': filesize_approx,
+ 'tbr': tbr,
+ 'view_count': view_count,
+ 'description': description,
+ }
diff --git a/yt_dlp/extractor/xnxx.py b/yt_dlp/extractor/xnxx.py
new file mode 100644
index 0000000..1452aae
--- /dev/null
+++ b/yt_dlp/extractor/xnxx.py
@@ -0,0 +1,83 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ NO_DEFAULT,
+ str_to_int,
+)
+
+
+class XNXXIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:video|www)\.xnxx3?\.com/video-?(?P<id>[0-9a-z]+)/'
+ _TESTS = [{
+ 'url': 'http://www.xnxx.com/video-55awb78/skyrim_test_video',
+ 'md5': '7583e96c15c0f21e9da3453d9920fbba',
+ 'info_dict': {
+ 'id': '55awb78',
+ 'ext': 'mp4',
+ 'title': 'Skyrim Test Video',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 469,
+ 'view_count': int,
+ 'age_limit': 18,
+ },
+ }, {
+ 'url': 'http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.xnxx.com/video-55awb78/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.xnxx3.com/video-55awb78/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ def get(meta, default=NO_DEFAULT, fatal=True):
+ return self._search_regex(
+ r'set%s\s*\(\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % meta,
+ webpage, meta, default=default, fatal=fatal, group='value')
+
+ title = self._og_search_title(
+ webpage, default=None) or get('VideoTitle')
+
+ formats = []
+ for mobj in re.finditer(
+ r'setVideo(?:Url(?P<id>Low|High)|HLS)\s*\(\s*(?P<q>["\'])(?P<url>(?:https?:)?//.+?)(?P=q)', webpage):
+ format_url = mobj.group('url')
+ if determine_ext(format_url) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ quality=1, m3u8_id='hls', fatal=False))
+ else:
+ format_id = mobj.group('id')
+ if format_id:
+ format_id = format_id.lower()
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_id,
+ 'quality': -1 if format_id == 'low' else 0,
+ })
+
+ thumbnail = self._og_search_thumbnail(webpage, default=None) or get(
+ 'ThumbUrl', fatal=False) or get('ThumbUrl169', fatal=False)
+ duration = int_or_none(self._og_search_property('duration', webpage))
+ view_count = str_to_int(self._search_regex(
+ r'id=["\']nb-views-number[^>]+>([\d,.]+)', webpage, 'view count',
+ default=None))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'age_limit': 18,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/xstream.py b/yt_dlp/extractor/xstream.py
new file mode 100644
index 0000000..8dd1cd9
--- /dev/null
+++ b/yt_dlp/extractor/xstream.py
@@ -0,0 +1,115 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+ xpath_with_ns,
+ xpath_text,
+ find_xpath_attr,
+)
+
+
+class XstreamIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ (?:
+ xstream:|
+ https?://frontend\.xstream\.(?:dk|net)/
+ )
+ (?P<partner_id>[^/]+)
+ (?:
+ :|
+ /feed/video/\?.*?\bid=
+ )
+ (?P<id>\d+)
+ '''
+ _TESTS = [{
+ 'url': 'http://frontend.xstream.dk/btno/feed/video/?platform=web&id=86588',
+ 'md5': 'd7d17e3337dc80de6d3a540aefbe441b',
+ 'info_dict': {
+ 'id': '86588',
+ 'ext': 'mov',
+ 'title': 'Otto Wollertsen',
+ 'description': 'Vestlendingen Otto Fredrik Wollertsen',
+ 'timestamp': 1430473209,
+ 'upload_date': '20150501',
+ },
+ }, {
+ 'url': 'http://frontend.xstream.dk/ap/feed/video/?platform=web&id=21039',
+ 'only_matching': True,
+ }]
+
+ def _extract_video_info(self, partner_id, video_id):
+ data = self._download_xml(
+ 'http://frontend.xstream.dk/%s/feed/video/?platform=web&id=%s'
+ % (partner_id, video_id),
+ video_id)
+
+ NS_MAP = {
+ 'atom': 'http://www.w3.org/2005/Atom',
+ 'xt': 'http://xstream.dk/',
+ 'media': 'http://search.yahoo.com/mrss/',
+ }
+
+ entry = data.find(xpath_with_ns('./atom:entry', NS_MAP))
+
+ title = xpath_text(
+ entry, xpath_with_ns('./atom:title', NS_MAP), 'title')
+ description = xpath_text(
+ entry, xpath_with_ns('./atom:summary', NS_MAP), 'description')
+ timestamp = parse_iso8601(xpath_text(
+ entry, xpath_with_ns('./atom:published', NS_MAP), 'upload date'))
+
+ formats = []
+ media_group = entry.find(xpath_with_ns('./media:group', NS_MAP))
+ for media_content in media_group.findall(xpath_with_ns('./media:content', NS_MAP)):
+ media_url = media_content.get('url')
+ if not media_url:
+ continue
+ tbr = int_or_none(media_content.get('bitrate'))
+ mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', media_url)
+ if mobj:
+ formats.append({
+ 'url': mobj.group('url'),
+ 'play_path': 'mp4:%s' % mobj.group('playpath'),
+ 'app': mobj.group('app'),
+ 'ext': 'flv',
+ 'tbr': tbr,
+ 'format_id': 'rtmp-%d' % tbr,
+ })
+ else:
+ formats.append({
+ 'url': media_url,
+ 'tbr': tbr,
+ })
+
+ link = find_xpath_attr(
+ entry, xpath_with_ns('./atom:link', NS_MAP), 'rel', 'original')
+ if link is not None:
+ formats.append({
+ 'url': link.get('href'),
+ 'format_id': link.get('rel'),
+ 'quality': 1,
+ })
+
+ thumbnails = [{
+ 'url': splash.get('url'),
+ 'width': int_or_none(splash.get('width')),
+ 'height': int_or_none(splash.get('height')),
+ } for splash in media_group.findall(xpath_with_ns('./xt:splash', NS_MAP))]
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'timestamp': timestamp,
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ }
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ partner_id = mobj.group('partner_id')
+ video_id = mobj.group('id')
+
+ return self._extract_video_info(partner_id, video_id)
diff --git a/yt_dlp/extractor/xvideos.py b/yt_dlp/extractor/xvideos.py
new file mode 100644
index 0000000..5df0715
--- /dev/null
+++ b/yt_dlp/extractor/xvideos.py
@@ -0,0 +1,180 @@
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse_unquote
+from ..utils import (
+ clean_html,
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ parse_duration,
+)
+
+
+class XVideosIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:[^/]+\.)?xvideos2?\.com/video|
+ (?:www\.)?xvideos\.es/video|
+ (?:www|flashservice)\.xvideos\.com/embedframe/|
+ static-hw\.xvideos\.com/swf/xv-player\.swf\?.*?\bid_video=
+ )
+ (?P<id>[0-9]+)
+ '''
+ _TESTS = [{
+ 'url': 'https://www.xvideos.com/video4588838/motorcycle_guy_cucks_influencer_steals_his_gf',
+ 'md5': '14cea69fcb84db54293b1e971466c2e1',
+ 'info_dict': {
+ 'id': '4588838',
+ 'ext': 'mp4',
+ 'title': 'Motorcycle Guy Cucks Influencer, Steals his GF',
+ 'duration': 108,
+ 'age_limit': 18,
+ 'thumbnail': r're:^https://img-hw.xvideos-cdn.com/.+\.jpg',
+ }
+ }, {
+ # Broken HLS formats
+ 'url': 'https://www.xvideos.com/video65982001/what_s_her_name',
+ 'md5': 'b82d7d7ef7d65a84b1fa6965f81f95a5',
+ 'info_dict': {
+ 'id': '65982001',
+ 'ext': 'mp4',
+ 'title': 'what\'s her name?',
+ 'duration': 120,
+ 'age_limit': 18,
+ 'thumbnail': r're:^https://img-hw.xvideos-cdn.com/.+\.jpg',
+ }
+ }, {
+ 'url': 'https://flashservice.xvideos.com/embedframe/4588838',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.xvideos.com/embedframe/4588838',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://static-hw.xvideos.com/swf/xv-player.swf?id_video=4588838',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://xvideos.com/video4588838/biker_takes_his_girl',
+ 'only_matching': True
+ }, {
+ 'url': 'https://xvideos.com/video4588838/biker_takes_his_girl',
+ 'only_matching': True
+ }, {
+ 'url': 'https://xvideos.es/video4588838/biker_takes_his_girl',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.xvideos.es/video4588838/biker_takes_his_girl',
+ 'only_matching': True
+ }, {
+ 'url': 'http://xvideos.es/video4588838/biker_takes_his_girl',
+ 'only_matching': True
+ }, {
+ 'url': 'http://www.xvideos.es/video4588838/biker_takes_his_girl',
+ 'only_matching': True
+ }, {
+ 'url': 'http://fr.xvideos.com/video4588838/biker_takes_his_girl',
+ 'only_matching': True
+ }, {
+ 'url': 'https://fr.xvideos.com/video4588838/biker_takes_his_girl',
+ 'only_matching': True
+ }, {
+ 'url': 'http://it.xvideos.com/video4588838/biker_takes_his_girl',
+ 'only_matching': True
+ }, {
+ 'url': 'https://it.xvideos.com/video4588838/biker_takes_his_girl',
+ 'only_matching': True
+ }, {
+ 'url': 'http://de.xvideos.com/video4588838/biker_takes_his_girl',
+ 'only_matching': True
+ }, {
+ 'url': 'https://de.xvideos.com/video4588838/biker_takes_his_girl',
+ 'only_matching': True
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ mobj = re.search(r'<h1 class="inlineError">(.+?)</h1>', webpage)
+ if mobj:
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, clean_html(mobj.group(1))), expected=True)
+
+ title = self._html_search_regex(
+ (r'<title>(?P<title>.+?)\s+-\s+XVID',
+ r'setVideoTitle\s*\(\s*(["\'])(?P<title>(?:(?!\1).)+)\1'),
+ webpage, 'title', default=None,
+ group='title') or self._og_search_title(webpage)
+
+ thumbnails = []
+ for preference, thumbnail in enumerate(('', '169')):
+ thumbnail_url = self._search_regex(
+ r'setThumbUrl%s\(\s*(["\'])(?P<thumbnail>(?:(?!\1).)+)\1' % thumbnail,
+ webpage, 'thumbnail', default=None, group='thumbnail')
+ if thumbnail_url:
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'preference': preference,
+ })
+
+ duration = int_or_none(self._og_search_property(
+ 'duration', webpage, default=None)) or parse_duration(
+ self._search_regex(
+ r'<span[^>]+class=["\']duration["\'][^>]*>.*?(\d[^<]+)',
+ webpage, 'duration', fatal=False))
+
+ formats = []
+
+ video_url = compat_urllib_parse_unquote(self._search_regex(
+ r'flv_url=(.+?)&', webpage, 'video URL', default=''))
+ if video_url:
+ formats.append({
+ 'url': video_url,
+ 'format_id': 'flv',
+ })
+
+ for kind, _, format_url in re.findall(
+ r'setVideo([^(]+)\((["\'])(http.+?)\2\)', webpage):
+ format_id = kind.lower()
+ if format_id == 'hls':
+ hls_formats = self._extract_m3u8_formats(
+ format_url, video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
+ self._check_formats(hls_formats, video_id)
+ formats.extend(hls_formats)
+ elif format_id in ('urllow', 'urlhigh'):
+ formats.append({
+ 'url': format_url,
+ 'format_id': '%s-%s' % (determine_ext(format_url, 'mp4'), format_id[3:]),
+ 'quality': -2 if format_id.endswith('low') else None,
+ })
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': title,
+ 'duration': duration,
+ 'thumbnails': thumbnails,
+ 'age_limit': 18,
+ }
+
+
+class XVideosQuickiesIE(InfoExtractor):
+ IE_NAME = 'xvideos:quickies'
+ _VALID_URL = r'https?://(?P<domain>(?:[^/]+\.)?xvideos2?\.com)/amateur-channels/[^#]+#quickies/a/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.xvideos.com/amateur-channels/wifeluna#quickies/a/47258683',
+ 'md5': '16e322a93282667f1963915568f782c1',
+ 'info_dict': {
+ 'id': '47258683',
+ 'ext': 'mp4',
+ 'title': 'Verification video',
+ 'age_limit': 18,
+ 'duration': 16,
+ 'thumbnail': r're:^https://cdn.*-pic.xvideos-cdn.com/.+\.jpg',
+ }
+ }]
+
+ def _real_extract(self, url):
+ domain, id_ = self._match_valid_url(url).group('domain', 'id')
+ return self.url_result(f'https://{domain}/video{id_}/_', XVideosIE, id_)
diff --git a/yt_dlp/extractor/xxxymovies.py b/yt_dlp/extractor/xxxymovies.py
new file mode 100644
index 0000000..e3e3a9f
--- /dev/null
+++ b/yt_dlp/extractor/xxxymovies.py
@@ -0,0 +1,77 @@
+from .common import InfoExtractor
+from ..utils import (
+ parse_duration,
+ int_or_none,
+)
+
+
+class XXXYMoviesIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?xxxymovies\.com/videos/(?P<id>\d+)/(?P<display_id>[^/]+)'
+ _TEST = {
+ 'url': 'http://xxxymovies.com/videos/138669/ecstatic-orgasm-sofcore/',
+ 'md5': '810b1bdbbffff89dd13bdb369fe7be4b',
+ 'info_dict': {
+ 'id': '138669',
+ 'display_id': 'ecstatic-orgasm-sofcore',
+ 'ext': 'mp4',
+ 'title': 'Ecstatic Orgasm Sofcore',
+ 'duration': 931,
+ 'categories': list,
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'age_limit': 18,
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id')
+
+ webpage = self._download_webpage(url, display_id)
+
+ video_url = self._search_regex(
+ r"video_url\s*:\s*'([^']+)'", webpage, 'video URL')
+
+ title = self._html_search_regex(
+ [r'<div[^>]+\bclass="block_header"[^>]*>\s*<h1>([^<]+)<',
+ r'<title>(.*?)\s*-\s*(?:XXXYMovies\.com|XXX\s+Movies)</title>'],
+ webpage, 'title')
+
+ thumbnail = self._search_regex(
+ r"preview_url\s*:\s*'([^']+)'",
+ webpage, 'thumbnail', fatal=False)
+
+ categories = self._html_search_meta(
+ 'keywords', webpage, 'categories', default='').split(',')
+
+ duration = parse_duration(self._search_regex(
+ r'<span>Duration:</span>\s*(\d+:\d+)',
+ webpage, 'duration', fatal=False))
+
+ view_count = int_or_none(self._html_search_regex(
+ r'<div class="video_views">\s*(\d+)',
+ webpage, 'view count', fatal=False))
+ like_count = int_or_none(self._search_regex(
+ r'>\s*Likes? <b>\((\d+)\)',
+ webpage, 'like count', fatal=False))
+ dislike_count = int_or_none(self._search_regex(
+ r'>\s*Dislike <b>\((\d+)\)</b>',
+ webpage, 'dislike count', fatal=False))
+
+ age_limit = self._rta_search(webpage)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'url': video_url,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'categories': categories,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'like_count': like_count,
+ 'dislike_count': dislike_count,
+ 'age_limit': age_limit,
+ }
diff --git a/yt_dlp/extractor/yahoo.py b/yt_dlp/extractor/yahoo.py
new file mode 100644
index 0000000..24148a0
--- /dev/null
+++ b/yt_dlp/extractor/yahoo.py
@@ -0,0 +1,430 @@
+import hashlib
+import itertools
+import urllib.parse
+
+from .common import InfoExtractor, SearchInfoExtractor
+from .youtube import YoutubeIE
+from ..utils import (
+ ExtractorError,
+ clean_html,
+ int_or_none,
+ mimetype2ext,
+ parse_iso8601,
+ traverse_obj,
+ try_get,
+ url_or_none,
+)
+
+
+class YahooIE(InfoExtractor):
+ IE_DESC = 'Yahoo screen and movies'
+ _VALID_URL = r'(?P<url>https?://(?:(?P<country>[a-zA-Z]{2}(?:-[a-zA-Z]{2})?|malaysia)\.)?(?:[\da-zA-Z_-]+\.)?yahoo\.com/(?:[^/]+/)*(?P<id>[^?&#]*-[0-9]+(?:-[a-z]+)?)\.html)'
+ _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1']
+
+ _TESTS = [{
+ 'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
+ 'info_dict': {
+ 'id': '2d25e626-2378-391f-ada0-ddaf1417e588',
+ 'ext': 'mp4',
+ 'title': 'Julian Smith & Travis Legg Watch Julian Smith',
+ 'description': 'Julian and Travis watch Julian Smith',
+ 'duration': 6863,
+ 'timestamp': 1369812016,
+ 'upload_date': '20130529',
+ },
+ 'skip': 'No longer exists',
+ }, {
+ 'url': 'https://screen.yahoo.com/community/community-sizzle-reel-203225340.html?format=embed',
+ 'md5': '7993e572fac98e044588d0b5260f4352',
+ 'info_dict': {
+ 'id': '4fe78544-8d48-39d8-97cd-13f205d9fcdb',
+ 'ext': 'mp4',
+ 'title': "Yahoo Saves 'Community'",
+ 'description': 'md5:4d4145af2fd3de00cbb6c1d664105053',
+ 'duration': 170,
+ 'timestamp': 1406838636,
+ 'upload_date': '20140731',
+ },
+ 'skip': 'Unfortunately, this video is not available in your region',
+ }, {
+ 'url': 'https://uk.screen.yahoo.com/editor-picks/cute-raccoon-freed-drain-using-091756545.html',
+ 'md5': '71298482f7c64cbb7fa064e4553ff1c1',
+ 'info_dict': {
+ 'id': 'b3affa53-2e14-3590-852b-0e0db6cd1a58',
+ 'ext': 'webm',
+ 'title': 'Cute Raccoon Freed From Drain\u00a0Using Angle Grinder',
+ 'description': 'md5:f66c890e1490f4910a9953c941dee944',
+ 'duration': 97,
+ 'timestamp': 1414489862,
+ 'upload_date': '20141028',
+ },
+ 'skip': 'No longer exists',
+ }, {
+ 'url': 'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html',
+ 'md5': '88e209b417f173d86186bef6e4d1f160',
+ 'info_dict': {
+ 'id': 'f885cf7f-43d4-3450-9fac-46ac30ece521',
+ 'ext': 'mp4',
+ 'title': 'China Moses Is Crazy About the Blues',
+ 'description': 'md5:9900ab8cd5808175c7b3fe55b979bed0',
+ 'duration': 128,
+ 'timestamp': 1385722202,
+ 'upload_date': '20131129',
+ }
+ }, {
+ 'url': 'https://www.yahoo.com/movies/v/true-story-trailer-173000497.html',
+ 'md5': '2a9752f74cb898af5d1083ea9f661b58',
+ 'info_dict': {
+ 'id': '071c4013-ce30-3a93-a5b2-e0413cd4a9d1',
+ 'ext': 'mp4',
+ 'title': '\'True Story\' Trailer',
+ 'description': 'True Story',
+ 'duration': 150,
+ 'timestamp': 1418919206,
+ 'upload_date': '20141218',
+ },
+ }, {
+ 'url': 'https://gma.yahoo.com/pizza-delivery-man-surprised-huge-tip-college-kids-195200785.html',
+ 'only_matching': True,
+ }, {
+ 'note': 'NBC Sports embeds',
+ 'url': 'http://sports.yahoo.com/blogs/ncaab-the-dagger/tyler-kalinoski-s-buzzer-beater-caps-davidson-s-comeback-win-185609842.html?guid=nbc_cbk_davidsonbuzzerbeater_150313',
+ 'info_dict': {
+ 'id': '9CsDKds0kvHI',
+ 'ext': 'flv',
+ 'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
+ 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
+ 'upload_date': '20150313',
+ 'uploader': 'NBCU-SPORTS',
+ 'timestamp': 1426270238,
+ },
+ }, {
+ 'url': 'https://tw.news.yahoo.com/-100120367.html',
+ 'only_matching': True,
+ }, {
+ # Query result is embedded in webpage, but explicit request to video API fails with geo restriction
+ 'url': 'https://screen.yahoo.com/community/communitary-community-episode-1-ladders-154501237.html',
+ 'md5': '4fbafb9c9b6f07aa8f870629f6671b35',
+ 'info_dict': {
+ 'id': '1f32853c-a271-3eef-8cb6-f6d6872cb504',
+ 'ext': 'mp4',
+ 'title': 'Communitary - Community Episode 1: Ladders',
+ 'description': 'md5:8fc39608213295748e1e289807838c97',
+ 'duration': 1646,
+ 'timestamp': 1440436550,
+ 'upload_date': '20150824',
+ 'series': 'Communitary',
+ 'season_number': 6,
+ 'episode_number': 1,
+ },
+ 'skip': 'No longer exists',
+ }, {
+ # ytwnews://cavideo/
+ 'url': 'https://tw.video.yahoo.com/movie-tw/單車天使-中文版預-092316541.html',
+ 'info_dict': {
+ 'id': 'ba133ff2-0793-3510-b636-59dfe9ff6cff',
+ 'ext': 'mp4',
+ 'title': '單車天使 - 中文版預',
+ 'description': '中文版預',
+ 'timestamp': 1476696196,
+ 'upload_date': '20161017',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # Contains both a Yahoo hosted video and multiple Youtube embeds
+ 'url': 'https://www.yahoo.com/entertainment/gwen-stefani-reveals-the-pop-hit-she-passed-on-assigns-it-to-her-voice-contestant-instead-033045672.html',
+ 'info_dict': {
+ 'id': '46c5d95a-528f-3d03-b732-732fcadd51de',
+ 'title': 'Gwen Stefani reveals the pop hit she passed on, assigns it to her \'Voice\' contestant instead',
+ 'description': 'Gwen decided not to record this hit herself, but she decided it was the perfect fit for Kyndall Inskeep.',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '966d4262-4fd1-3aaa-b45b-049ca6e38ba6',
+ 'ext': 'mp4',
+ 'title': 'Gwen Stefani reveals she turned down one of Sia\'s best songs',
+ 'description': 'On "The Voice" Tuesday, Gwen Stefani told Taylor Swift which Sia hit was almost hers.',
+ 'timestamp': 1572406500,
+ 'upload_date': '20191030',
+ },
+ }, {
+ 'info_dict': {
+ 'id': '352CFDOQrKg',
+ 'ext': 'mp4',
+ 'title': 'Kyndal Inskeep "Performs the Hell Out of" Sia\'s "Elastic Heart" - The Voice Knockouts 2019',
+ 'description': 'md5:7fe8e3d5806f96002e55f190d1d94479',
+ 'uploader': 'The Voice',
+ 'uploader_id': 'NBCTheVoice',
+ 'upload_date': '20191029',
+ },
+ }],
+ 'params': {
+ 'playlistend': 2,
+ },
+ 'expected_warnings': ['HTTP Error 404', 'Ignoring subtitle tracks'],
+ }, {
+ 'url': 'https://malaysia.news.yahoo.com/video/bystanders-help-ontario-policeman-bust-190932818.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://es-us.noticias.yahoo.com/es-la-puerta-irrompible-que-110539379.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.yahoo.com/entertainment/v/longtime-cbs-news-60-minutes-032036500-cbs.html',
+ 'only_matching': True,
+ }]
+
+ def _extract_yahoo_video(self, video_id, country):
+ video = self._download_json(
+ 'https://%s.yahoo.com/_td/api/resource/VideoService.videos;view=full;video_ids=["%s"]' % (country, video_id),
+ video_id, 'Downloading video JSON metadata')[0]
+ title = video['title']
+
+ if country == 'malaysia':
+ country = 'my'
+
+ is_live = video.get('live_state') == 'live'
+ fmts = ('m3u8',) if is_live else ('webm', 'mp4')
+
+ urls = []
+ formats = []
+ subtitles = {}
+ for fmt in fmts:
+ media_obj = self._download_json(
+ 'https://video-api.yql.yahoo.com/v1/video/sapi/streams/' + video_id,
+ video_id, 'Downloading %s JSON metadata' % fmt,
+ headers=self.geo_verification_headers(), query={
+ 'format': fmt,
+ 'region': country.upper(),
+ })['query']['results']['mediaObj'][0]
+ msg = media_obj.get('status', {}).get('msg')
+
+ for s in media_obj.get('streams', []):
+ host = s.get('host')
+ path = s.get('path')
+ if not host or not path:
+ continue
+ s_url = host + path
+ if s.get('format') == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ s_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
+ continue
+ tbr = int_or_none(s.get('bitrate'))
+ formats.append({
+ 'url': s_url,
+ 'format_id': fmt + ('-%d' % tbr if tbr else ''),
+ 'width': int_or_none(s.get('width')),
+ 'height': int_or_none(s.get('height')),
+ 'tbr': tbr,
+ 'fps': int_or_none(s.get('framerate')),
+ })
+
+ for cc in media_obj.get('closedcaptions', []):
+ cc_url = cc.get('url')
+ if not cc_url or cc_url in urls:
+ continue
+ urls.append(cc_url)
+ subtitles.setdefault(cc.get('lang') or 'en-US', []).append({
+ 'url': cc_url,
+ 'ext': mimetype2ext(cc.get('content_type')),
+ })
+
+ streaming_url = video.get('streaming_url')
+ if streaming_url and not is_live:
+ formats.extend(self._extract_m3u8_formats(
+ streaming_url, video_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False))
+
+ if not formats and msg == 'geo restricted':
+ self.raise_geo_restricted(metadata_available=True)
+
+ thumbnails = []
+ for thumb in video.get('thumbnails', []):
+ thumb_url = thumb.get('url')
+ if not thumb_url:
+ continue
+ thumbnails.append({
+ 'id': thumb.get('tag'),
+ 'url': thumb.get('url'),
+ 'width': int_or_none(thumb.get('width')),
+ 'height': int_or_none(thumb.get('height')),
+ })
+
+ series_info = video.get('series_info') or {}
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'description': clean_html(video.get('description')),
+ 'timestamp': parse_iso8601(video.get('publish_time')),
+ 'subtitles': subtitles,
+ 'duration': int_or_none(video.get('duration')),
+ 'view_count': int_or_none(video.get('view_count')),
+ 'is_live': is_live,
+ 'series': video.get('show_name'),
+ 'season_number': int_or_none(series_info.get('season_number')),
+ 'episode_number': int_or_none(series_info.get('episode_number')),
+ }
+
+ def _real_extract(self, url):
+ url, country, display_id = self._match_valid_url(url).groups()
+ if not country:
+ country = 'us'
+ else:
+ country = country.split('-')[0]
+
+ items = self._download_json(
+ 'https://%s.yahoo.com/caas/content/article' % country, display_id,
+ 'Downloading content JSON metadata', query={
+ 'url': url
+ })['items'][0]
+
+ item = items['data']['partnerData']
+ if item.get('type') != 'video':
+ entries = []
+
+ cover = item.get('cover') or {}
+ if cover.get('type') == 'yvideo':
+ cover_url = cover.get('url')
+ if cover_url:
+ entries.append(self.url_result(
+ cover_url, 'Yahoo', cover.get('uuid')))
+
+ for e in (item.get('body') or []):
+ if e.get('type') == 'videoIframe':
+ iframe_url = e.get('url')
+ if iframe_url:
+ entries.append(self.url_result(iframe_url))
+
+ if item.get('type') == 'storywithleadvideo':
+ iframe_url = try_get(item, lambda x: x['meta']['player']['url'])
+ if iframe_url:
+ entries.append(self.url_result(iframe_url))
+ else:
+ self.report_warning("Yahoo didn't provide an iframe url for this storywithleadvideo")
+
+ if items.get('markup'):
+ entries.extend(
+ self.url_result(yt_url) for yt_url in YoutubeIE._extract_embed_urls(url, items['markup']))
+
+ return self.playlist_result(
+ entries, item.get('uuid'),
+ item.get('title'), item.get('summary'))
+
+ info = self._extract_yahoo_video(item['uuid'], country)
+ info['display_id'] = display_id
+ return info
+
+
+class YahooSearchIE(SearchInfoExtractor):
+ IE_DESC = 'Yahoo screen search'
+ _MAX_RESULTS = 1000
+ IE_NAME = 'screen.yahoo:search'
+ _SEARCH_KEY = 'yvsearch'
+
+ def _search_results(self, query):
+ for pagenum in itertools.count(0):
+ result_url = 'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (urllib.parse.quote_plus(query), pagenum * 30)
+ info = self._download_json(result_url, query,
+ note='Downloading results page ' + str(pagenum + 1))
+ yield from (self.url_result(result['rurl']) for result in info['results'])
+ if info['m']['last'] >= info['m']['total'] - 1:
+ break
+
+
+class YahooJapanNewsIE(InfoExtractor):
+ IE_NAME = 'yahoo:japannews'
+ IE_DESC = 'Yahoo! Japan News'
+ _VALID_URL = r'https?://news\.yahoo\.co\.jp/(?:articles|feature)/(?P<id>[a-zA-Z0-9]+)'
+ _GEO_COUNTRIES = ['JP']
+ _TESTS = [{
+ 'url': 'https://news.yahoo.co.jp/articles/a70fe3a064f1cfec937e2252c7fc6c1ba3201c0e',
+ 'info_dict': {
+ 'id': 'a70fe3a064f1cfec937e2252c7fc6c1ba3201c0e',
+ 'ext': 'mp4',
+ 'title': '【独自】安倍元総理「国葬」中止求め“脅迫メール”…「子ども誘拐」“送信者”を追跡',
+ 'description': 'md5:1c06974575f930f692d8696fbcfdc546',
+ 'thumbnail': r're:https://.+',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://news.yahoo.co.jp/feature/1356',
+ 'only_matching': True
+ }]
+
+ def _extract_formats(self, json_data, content_id):
+ formats = []
+
+ for vid in traverse_obj(json_data, ('ResultSet', 'Result', ..., 'VideoUrlSet', 'VideoUrl', ...)) or []:
+ delivery = vid.get('delivery')
+ url = url_or_none(vid.get('Url'))
+ if not delivery or not url:
+ continue
+ elif delivery == 'hls':
+ formats.extend(
+ self._extract_m3u8_formats(
+ url, content_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ formats.append({
+ 'url': url,
+ 'format_id': f'http-{vid.get("bitrate")}',
+ 'height': int_or_none(vid.get('height')),
+ 'width': int_or_none(vid.get('width')),
+ 'tbr': int_or_none(vid.get('bitrate')),
+ })
+ self._remove_duplicate_formats(formats)
+
+ return formats
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ preloaded_state = self._search_json(r'__PRELOADED_STATE__\s*=', webpage, 'preloaded state', video_id)
+
+ content_id = traverse_obj(
+ preloaded_state, ('articleDetail', 'paragraphs', ..., 'objectItems', ..., 'video', 'vid'),
+ get_all=False, expected_type=int)
+ if content_id is None:
+ raise ExtractorError('This article does not contain a video', expected=True)
+
+ HOST = 'news.yahoo.co.jp'
+ space_id = traverse_obj(preloaded_state, ('pageData', 'spaceId'), expected_type=str)
+ json_data = self._download_json(
+ f'https://feapi-yvpub.yahooapis.jp/v1/content/{content_id}',
+ video_id, query={
+ 'appid': 'dj0zaiZpPVZMTVFJR0FwZWpiMyZzPWNvbnN1bWVyc2VjcmV0Jng9YjU-',
+ 'output': 'json',
+ 'domain': HOST,
+ 'ak': hashlib.md5('_'.join((space_id, HOST)).encode()).hexdigest() if space_id else '',
+ 'device_type': '1100',
+ })
+
+ title = (
+ traverse_obj(preloaded_state,
+ ('articleDetail', 'headline'), ('pageData', 'pageParam', 'title'),
+ expected_type=str)
+ or self._html_search_meta(('og:title', 'twitter:title'), webpage, 'title', default=None)
+ or self._html_extract_title(webpage))
+ description = (
+ traverse_obj(preloaded_state, ('pageData', 'description'), expected_type=str)
+ or self._html_search_meta(
+ ('og:description', 'description', 'twitter:description'),
+ webpage, 'description', default=None))
+ thumbnail = (
+ traverse_obj(preloaded_state, ('pageData', 'ogpImage'), expected_type=str)
+ or self._og_search_thumbnail(webpage, default=None)
+ or self._html_search_meta('twitter:image', webpage, 'thumbnail', default=None))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'formats': self._extract_formats(json_data, video_id),
+ }
diff --git a/yt_dlp/extractor/yandexdisk.py b/yt_dlp/extractor/yandexdisk.py
new file mode 100644
index 0000000..d5eecbd
--- /dev/null
+++ b/yt_dlp/extractor/yandexdisk.py
@@ -0,0 +1,142 @@
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ float_or_none,
+ int_or_none,
+ mimetype2ext,
+ try_get,
+ urljoin,
+)
+
+
+class YandexDiskIE(InfoExtractor):
+ _VALID_URL = r'''(?x)https?://
+ (?P<domain>
+ yadi\.sk|
+ disk\.yandex\.
+ (?:
+ az|
+ by|
+ co(?:m(?:\.(?:am|ge|tr))?|\.il)|
+ ee|
+ fr|
+ k[gz]|
+ l[tv]|
+ md|
+ t[jm]|
+ u[az]|
+ ru
+ )
+ )/(?:[di]/|public.*?\bhash=)(?P<id>[^/?#&]+)'''
+
+ _TESTS = [{
+ 'url': 'https://yadi.sk/i/VdOeDou8eZs6Y',
+ 'md5': 'a4a8d52958c8fddcf9845935070402ae',
+ 'info_dict': {
+ 'id': 'VdOeDou8eZs6Y',
+ 'ext': 'mp4',
+ 'title': '4.mp4',
+ 'duration': 168.6,
+ 'uploader': 'y.botova',
+ 'uploader_id': '300043621',
+ 'view_count': int,
+ },
+ 'expected_warnings': ['Unable to download JSON metadata'],
+ }, {
+ 'url': 'https://yadi.sk/d/h3WAXvDS3Li3Ce',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://yadi.sk/public?hash=5DZ296JK9GWCLp02f6jrObjnctjRxMs8L6%2B%2FuhNqk38%3D',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ domain, video_id = self._match_valid_url(url).groups()
+
+ webpage = self._download_webpage(url, video_id)
+ store = self._parse_json(self._search_regex(
+ r'<script[^>]+id="store-prefetch"[^>]*>\s*({.+?})\s*</script>',
+ webpage, 'store'), video_id)
+ resource = store['resources'][store['rootResourceId']]
+
+ title = resource['name']
+ meta = resource.get('meta') or {}
+
+ public_url = meta.get('short_url')
+ if public_url:
+ video_id = self._match_id(public_url)
+
+ source_url = (self._download_json(
+ 'https://cloud-api.yandex.net/v1/disk/public/resources/download',
+ video_id, query={'public_key': url}, fatal=False) or {}).get('href')
+ video_streams = resource.get('videoStreams') or {}
+ video_hash = resource.get('hash') or url
+ environment = store.get('environment') or {}
+ sk = environment.get('sk')
+ yandexuid = environment.get('yandexuid')
+ if sk and yandexuid and not (source_url and video_streams):
+ self._set_cookie(domain, 'yandexuid', yandexuid)
+
+ def call_api(action):
+ return (self._download_json(
+ urljoin(url, '/public/api/') + action, video_id, data=json.dumps({
+ 'hash': video_hash,
+ 'sk': sk,
+ }).encode(), headers={
+ 'Content-Type': 'text/plain',
+ }, fatal=False) or {}).get('data') or {}
+ if not source_url:
+ # TODO: figure out how to detect if download limit has
+ # been reached and then avoid unnecessary source format
+ # extraction requests
+ source_url = call_api('download-url').get('url')
+ if not video_streams:
+ video_streams = call_api('get-video-streams')
+
+ formats = []
+ if source_url:
+ formats.append({
+ 'url': source_url,
+ 'format_id': 'source',
+ 'ext': determine_ext(title, meta.get('ext') or mimetype2ext(meta.get('mime_type')) or 'mp4'),
+ 'quality': 1,
+ 'filesize': int_or_none(meta.get('size'))
+ })
+
+ for video in (video_streams.get('videos') or []):
+ format_url = video.get('url')
+ if not format_url:
+ continue
+ if video.get('dimension') == 'adaptive':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ size = video.get('size') or {}
+ height = int_or_none(size.get('height'))
+ format_id = 'hls'
+ if height:
+ format_id += '-%dp' % height
+ formats.append({
+ 'ext': 'mp4',
+ 'format_id': format_id,
+ 'height': height,
+ 'protocol': 'm3u8_native',
+ 'url': format_url,
+ 'width': int_or_none(size.get('width')),
+ })
+
+ uid = resource.get('uid')
+ display_name = try_get(store, lambda x: x['users'][uid]['displayName'])
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'duration': float_or_none(video_streams.get('duration'), 1000),
+ 'uploader': display_name,
+ 'uploader_id': uid,
+ 'view_count': int_or_none(meta.get('views_counter')),
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/yandexmusic.py b/yt_dlp/extractor/yandexmusic.py
new file mode 100644
index 0000000..794dc3e
--- /dev/null
+++ b/yt_dlp/extractor/yandexmusic.py
@@ -0,0 +1,454 @@
+import hashlib
+import itertools
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ float_or_none,
+ try_get,
+)
+
+
+class YandexMusicBaseIE(InfoExtractor):
+ _VALID_URL_BASE = r'https?://music\.yandex\.(?P<tld>ru|kz|ua|by|com)'
+
+ @staticmethod
+ def _handle_error(response):
+ if isinstance(response, dict):
+ error = response.get('error')
+ if error:
+ raise ExtractorError(error, expected=True)
+ if response.get('type') == 'captcha' or 'captcha' in response:
+ YandexMusicBaseIE._raise_captcha()
+
+ @staticmethod
+ def _raise_captcha():
+ raise ExtractorError(
+ 'YandexMusic has considered yt-dlp requests automated and '
+ 'asks you to solve a CAPTCHA. You can either wait for some '
+ 'time until unblocked and optionally use --sleep-interval '
+ 'in future or alternatively you can go to https://music.yandex.ru/ '
+ 'solve CAPTCHA, then export cookies and pass cookie file to '
+ 'yt-dlp with --cookies',
+ expected=True)
+
+ def _download_webpage_handle(self, *args, **kwargs):
+ webpage = super(YandexMusicBaseIE, self)._download_webpage_handle(*args, **kwargs)
+ if 'Нам очень жаль, но&nbsp;запросы, поступившие с&nbsp;вашего IP-адреса, похожи на&nbsp;автоматические.' in webpage:
+ self._raise_captcha()
+ return webpage
+
+ def _download_json(self, *args, **kwargs):
+ response = super(YandexMusicBaseIE, self)._download_json(*args, **kwargs)
+ self._handle_error(response)
+ return response
+
+ def _call_api(self, ep, tld, url, item_id, note, query):
+ return self._download_json(
+ 'https://music.yandex.%s/handlers/%s.jsx' % (tld, ep),
+ item_id, note,
+ fatal=False,
+ headers={
+ 'Referer': url,
+ 'X-Requested-With': 'XMLHttpRequest',
+ 'X-Retpath-Y': url,
+ },
+ query=query)
+
+
+class YandexMusicTrackIE(YandexMusicBaseIE):
+ IE_NAME = 'yandexmusic:track'
+ IE_DESC = 'Яндекс.Музыка - Трек'
+ _VALID_URL = r'%s/album/(?P<album_id>\d+)/track/(?P<id>\d+)' % YandexMusicBaseIE._VALID_URL_BASE
+
+ _TESTS = [{
+ 'url': 'http://music.yandex.ru/album/540508/track/4878838',
+ 'md5': 'dec8b661f12027ceaba33318787fff76',
+ 'info_dict': {
+ 'id': '4878838',
+ 'ext': 'mp3',
+ 'title': 'md5:c63e19341fdbe84e43425a30bc777856',
+ 'filesize': int,
+ 'duration': 193.04,
+ 'track': 'md5:210508c6ffdfd67a493a6c378f22c3ff',
+ 'album': 'md5:cd04fb13c4efeafdfa0a6a6aca36d01a',
+ 'album_artist': 'md5:5f54c35462c07952df33d97cfb5fc200',
+ 'artist': 'md5:e6fd86621825f14dc0b25db3acd68160',
+ 'release_year': 2009,
+ },
+ # 'skip': 'Travis CI servers blocked by YandexMusic',
+ }, {
+ # multiple disks
+ 'url': 'http://music.yandex.ru/album/3840501/track/705105',
+ 'md5': '82a54e9e787301dd45aba093cf6e58c0',
+ 'info_dict': {
+ 'id': '705105',
+ 'ext': 'mp3',
+ 'title': 'md5:f86d4a9188279860a83000277024c1a6',
+ 'filesize': int,
+ 'duration': 239.27,
+ 'track': 'md5:40f887f0666ba1aa10b835aca44807d1',
+ 'album': 'md5:624f5224b14f5c88a8e812fd7fbf1873',
+ 'album_artist': 'md5:dd35f2af4e8927100cbe6f5e62e1fb12',
+ 'artist': 'md5:dd35f2af4e8927100cbe6f5e62e1fb12',
+ 'release_year': 2016,
+ 'genre': 'pop',
+ 'disc_number': 2,
+ 'track_number': 9,
+ },
+ # 'skip': 'Travis CI servers blocked by YandexMusic',
+ }, {
+ 'url': 'http://music.yandex.com/album/540508/track/4878838',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ tld, album_id, track_id = mobj.group('tld'), mobj.group('album_id'), mobj.group('id')
+
+ track = self._call_api(
+ 'track', tld, url, track_id, 'Downloading track JSON',
+ {'track': '%s:%s' % (track_id, album_id)})['track']
+ track_title = track['title']
+
+ download_data = self._download_json(
+ 'https://music.yandex.ru/api/v2.1/handlers/track/%s:%s/web-album_track-track-track-main/download/m' % (track_id, album_id),
+ track_id, 'Downloading track location url JSON', query={'hq': 1}, headers={'X-Retpath-Y': url})
+
+ fd_data = self._download_json(
+ download_data['src'], track_id,
+ 'Downloading track location JSON',
+ query={'format': 'json'})
+ key = hashlib.md5(('XGRlBW9FXlekgbPrRHuSiA' + fd_data['path'][1:] + fd_data['s']).encode('utf-8')).hexdigest()
+ f_url = 'http://%s/get-mp3/%s/%s?track-id=%s ' % (fd_data['host'], key, fd_data['ts'] + fd_data['path'], track['id'])
+
+ thumbnail = None
+ cover_uri = track.get('albums', [{}])[0].get('coverUri')
+ if cover_uri:
+ thumbnail = cover_uri.replace('%%', 'orig')
+ if not thumbnail.startswith('http'):
+ thumbnail = 'http://' + thumbnail
+
+ track_info = {
+ 'id': track_id,
+ 'ext': 'mp3',
+ 'url': f_url,
+ 'filesize': int_or_none(track.get('fileSize')),
+ 'duration': float_or_none(track.get('durationMs'), 1000),
+ 'thumbnail': thumbnail,
+ 'track': track_title,
+ 'acodec': download_data.get('codec'),
+ 'abr': int_or_none(download_data.get('bitrate')),
+ }
+
+ def extract_artist_name(artist):
+ decomposed = artist.get('decomposed')
+ if not isinstance(decomposed, list):
+ return artist['name']
+ parts = [artist['name']]
+ for element in decomposed:
+ if isinstance(element, dict) and element.get('name'):
+ parts.append(element['name'])
+ elif isinstance(element, compat_str):
+ parts.append(element)
+ return ''.join(parts)
+
+ def extract_artist(artist_list):
+ if artist_list and isinstance(artist_list, list):
+ artists_names = [extract_artist_name(a) for a in artist_list if a.get('name')]
+ if artists_names:
+ return ', '.join(artists_names)
+
+ albums = track.get('albums')
+ if albums and isinstance(albums, list):
+ album = albums[0]
+ if isinstance(album, dict):
+ year = album.get('year')
+ disc_number = int_or_none(try_get(
+ album, lambda x: x['trackPosition']['volume']))
+ track_number = int_or_none(try_get(
+ album, lambda x: x['trackPosition']['index']))
+ track_info.update({
+ 'album': album.get('title'),
+ 'album_artist': extract_artist(album.get('artists')),
+ 'release_year': int_or_none(year),
+ 'genre': album.get('genre'),
+ 'disc_number': disc_number,
+ 'track_number': track_number,
+ })
+
+ track_artist = extract_artist(track.get('artists'))
+ if track_artist:
+ track_info.update({
+ 'artist': track_artist,
+ 'title': '%s - %s' % (track_artist, track_title),
+ })
+ else:
+ track_info['title'] = track_title
+
+ return track_info
+
+
+class YandexMusicPlaylistBaseIE(YandexMusicBaseIE):
+ def _extract_tracks(self, source, item_id, url, tld):
+ tracks = source['tracks']
+ track_ids = [compat_str(track_id) for track_id in source['trackIds']]
+
+ # tracks dictionary shipped with playlist.jsx API is limited to 150 tracks,
+ # missing tracks should be retrieved manually.
+ if len(tracks) < len(track_ids):
+ present_track_ids = set([
+ compat_str(track['id'])
+ for track in tracks if track.get('id')])
+ missing_track_ids = [
+ track_id for track_id in track_ids
+ if track_id not in present_track_ids]
+ # Request missing tracks in chunks to avoid exceeding max HTTP header size,
+ # see https://github.com/ytdl-org/youtube-dl/issues/27355
+ _TRACKS_PER_CHUNK = 250
+ for chunk_num in itertools.count(0):
+ start = chunk_num * _TRACKS_PER_CHUNK
+ end = start + _TRACKS_PER_CHUNK
+ missing_track_ids_req = missing_track_ids[start:end]
+ assert missing_track_ids_req
+ missing_tracks = self._call_api(
+ 'track-entries', tld, url, item_id,
+ 'Downloading missing tracks JSON chunk %d' % (chunk_num + 1), {
+ 'entries': ','.join(missing_track_ids_req),
+ 'lang': tld,
+ 'external-domain': 'music.yandex.%s' % tld,
+ 'overembed': 'false',
+ 'strict': 'true',
+ })
+ if missing_tracks:
+ tracks.extend(missing_tracks)
+ if end >= len(missing_track_ids):
+ break
+
+ return tracks
+
+ def _build_playlist(self, tracks):
+ entries = []
+ for track in tracks:
+ track_id = track.get('id') or track.get('realId')
+ if not track_id:
+ continue
+ albums = track.get('albums')
+ if not albums or not isinstance(albums, list):
+ continue
+ album = albums[0]
+ if not isinstance(album, dict):
+ continue
+ album_id = album.get('id')
+ if not album_id:
+ continue
+ entries.append(self.url_result(
+ 'http://music.yandex.ru/album/%s/track/%s' % (album_id, track_id),
+ ie=YandexMusicTrackIE.ie_key(), video_id=track_id))
+ return entries
+
+
+class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE):
+ IE_NAME = 'yandexmusic:album'
+ IE_DESC = 'Яндекс.Музыка - Альбом'
+ _VALID_URL = r'%s/album/(?P<id>\d+)' % YandexMusicBaseIE._VALID_URL_BASE
+
+ _TESTS = [{
+ 'url': 'http://music.yandex.ru/album/540508',
+ 'info_dict': {
+ 'id': '540508',
+ 'title': 'md5:7ed1c3567f28d14be9f61179116f5571',
+ },
+ 'playlist_count': 50,
+ # 'skip': 'Travis CI servers blocked by YandexMusic',
+ }, {
+ 'url': 'https://music.yandex.ru/album/3840501',
+ 'info_dict': {
+ 'id': '3840501',
+ 'title': 'md5:36733472cdaa7dcb1fd9473f7da8e50f',
+ },
+ 'playlist_count': 33,
+ # 'skip': 'Travis CI servers blocked by YandexMusic',
+ }, {
+ # empty artists
+ 'url': 'https://music.yandex.ru/album/9091882',
+ 'info_dict': {
+ 'id': '9091882',
+ 'title': 'ТЕД на русском',
+ },
+ 'playlist_count': 187,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if YandexMusicTrackIE.suitable(url) else super(YandexMusicAlbumIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ tld = mobj.group('tld')
+ album_id = mobj.group('id')
+
+ album = self._call_api(
+ 'album', tld, url, album_id, 'Downloading album JSON',
+ {'album': album_id})
+
+ entries = self._build_playlist([track for volume in album['volumes'] for track in volume])
+
+ title = album['title']
+ artist = try_get(album, lambda x: x['artists'][0]['name'], compat_str)
+ if artist:
+ title = '%s - %s' % (artist, title)
+ year = album.get('year')
+ if year:
+ title += ' (%s)' % year
+
+ return self.playlist_result(entries, compat_str(album['id']), title)
+
+
+class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):
+ IE_NAME = 'yandexmusic:playlist'
+ IE_DESC = 'Яндекс.Музыка - Плейлист'
+ _VALID_URL = r'%s/users/(?P<user>[^/]+)/playlists/(?P<id>\d+)' % YandexMusicBaseIE._VALID_URL_BASE
+
+ _TESTS = [{
+ 'url': 'http://music.yandex.ru/users/music.partners/playlists/1245',
+ 'info_dict': {
+ 'id': '1245',
+ 'title': 'md5:841559b3fe2b998eca88d0d2e22a3097',
+ 'description': 'md5:3b9f27b0efbe53f2ee1e844d07155cc9',
+ },
+ 'playlist_count': 5,
+ # 'skip': 'Travis CI servers blocked by YandexMusic',
+ }, {
+ 'url': 'https://music.yandex.ru/users/ya.playlist/playlists/1036',
+ 'only_matching': True,
+ }, {
+ # playlist exceeding the limit of 150 tracks (see
+ # https://github.com/ytdl-org/youtube-dl/issues/6666)
+ 'url': 'https://music.yandex.ru/users/mesiaz/playlists/1364',
+ 'info_dict': {
+ 'id': '1364',
+ 'title': 'md5:b3b400f997d3f878a13ae0699653f7db',
+ },
+ 'playlist_mincount': 437,
+ # 'skip': 'Travis CI servers blocked by YandexMusic',
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ tld = mobj.group('tld')
+ user = mobj.group('user')
+ playlist_id = mobj.group('id')
+
+ playlist = self._call_api(
+ 'playlist', tld, url, playlist_id, 'Downloading playlist JSON', {
+ 'owner': user,
+ 'kinds': playlist_id,
+ 'light': 'true',
+ 'lang': tld,
+ 'external-domain': 'music.yandex.%s' % tld,
+ 'overembed': 'false',
+ })['playlist']
+
+ tracks = self._extract_tracks(playlist, playlist_id, url, tld)
+
+ return self.playlist_result(
+ self._build_playlist(tracks),
+ compat_str(playlist_id),
+ playlist.get('title'), playlist.get('description'))
+
+
+class YandexMusicArtistBaseIE(YandexMusicPlaylistBaseIE):
+ def _call_artist(self, tld, url, artist_id):
+ return self._call_api(
+ 'artist', tld, url, artist_id,
+ 'Downloading artist %s JSON' % self._ARTIST_WHAT, {
+ 'artist': artist_id,
+ 'what': self._ARTIST_WHAT,
+ 'sort': self._ARTIST_SORT or '',
+ 'dir': '',
+ 'period': '',
+ 'lang': tld,
+ 'external-domain': 'music.yandex.%s' % tld,
+ 'overembed': 'false',
+ })
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ tld = mobj.group('tld')
+ artist_id = mobj.group('id')
+ data = self._call_artist(tld, url, artist_id)
+ tracks = self._extract_tracks(data, artist_id, url, tld)
+ title = try_get(data, lambda x: x['artist']['name'], compat_str)
+ return self.playlist_result(
+ self._build_playlist(tracks), artist_id, title)
+
+
+class YandexMusicArtistTracksIE(YandexMusicArtistBaseIE):
+ IE_NAME = 'yandexmusic:artist:tracks'
+ IE_DESC = 'Яндекс.Музыка - Артист - Треки'
+ _VALID_URL = r'%s/artist/(?P<id>\d+)/tracks' % YandexMusicBaseIE._VALID_URL_BASE
+
+ _TESTS = [{
+ 'url': 'https://music.yandex.ru/artist/617526/tracks',
+ 'info_dict': {
+ 'id': '617526',
+ 'title': 'md5:131aef29d45fd5a965ca613e708c040b',
+ },
+ 'playlist_count': 507,
+ # 'skip': 'Travis CI servers blocked by YandexMusic',
+ }]
+
+ _ARTIST_SORT = ''
+ _ARTIST_WHAT = 'tracks'
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ tld = mobj.group('tld')
+ artist_id = mobj.group('id')
+ data = self._call_artist(tld, url, artist_id)
+ tracks = self._extract_tracks(data, artist_id, url, tld)
+ artist = try_get(data, lambda x: x['artist']['name'], compat_str)
+ title = '%s - %s' % (artist or artist_id, 'Треки')
+ return self.playlist_result(
+ self._build_playlist(tracks), artist_id, title)
+
+
+class YandexMusicArtistAlbumsIE(YandexMusicArtistBaseIE):
+ IE_NAME = 'yandexmusic:artist:albums'
+ IE_DESC = 'Яндекс.Музыка - Артист - Альбомы'
+ _VALID_URL = r'%s/artist/(?P<id>\d+)/albums' % YandexMusicBaseIE._VALID_URL_BASE
+
+ _TESTS = [{
+ 'url': 'https://music.yandex.ru/artist/617526/albums',
+ 'info_dict': {
+ 'id': '617526',
+ 'title': 'md5:55dc58d5c85699b7fb41ee926700236c',
+ },
+ 'playlist_count': 8,
+ # 'skip': 'Travis CI servers blocked by YandexMusic',
+ }]
+
+ _ARTIST_SORT = 'year'
+ _ARTIST_WHAT = 'albums'
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ tld = mobj.group('tld')
+ artist_id = mobj.group('id')
+ data = self._call_artist(tld, url, artist_id)
+ entries = []
+ for album in data['albums']:
+ if not isinstance(album, dict):
+ continue
+ album_id = album.get('id')
+ if not album_id:
+ continue
+ entries.append(self.url_result(
+ 'http://music.yandex.ru/album/%s' % album_id,
+ ie=YandexMusicAlbumIE.ie_key(), video_id=album_id))
+ artist = try_get(data, lambda x: x['artist']['name'], compat_str)
+ title = '%s - %s' % (artist or artist_id, 'Альбомы')
+ return self.playlist_result(entries, artist_id, title)
diff --git a/yt_dlp/extractor/yandexvideo.py b/yt_dlp/extractor/yandexvideo.py
new file mode 100644
index 0000000..4382a56
--- /dev/null
+++ b/yt_dlp/extractor/yandexvideo.py
@@ -0,0 +1,390 @@
+import itertools
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ extract_attributes,
+ int_or_none,
+ lowercase_escape,
+ parse_qs,
+ traverse_obj,
+ try_get,
+ url_or_none,
+)
+
+
+class YandexVideoIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ yandex\.ru(?:/(?:portal/(?:video|efir)|efir))?/?\?.*?stream_id=|
+ frontend\.vh\.yandex\.ru/player/
+ )
+ (?P<id>(?:[\da-f]{32}|[\w-]{12}))
+ '''
+ _TESTS = [{
+ 'url': 'https://yandex.ru/portal/video?stream_id=4dbb36ec4e0526d58f9f2dc8f0ecf374',
+ 'info_dict': {
+ 'id': '4dbb36ec4e0526d58f9f2dc8f0ecf374',
+ 'ext': 'mp4',
+ 'title': 'Русский Вудсток - главный рок-фест в истории СССР / вДудь',
+ 'description': 'md5:7d6b8d4bc4a3b9a56499916c1ea5b5fa',
+ 'thumbnail': r're:^https?://',
+ 'timestamp': 1549972939,
+ 'duration': 5575,
+ 'age_limit': 18,
+ 'upload_date': '20190212',
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://yandex.ru/portal/efir?stream_id=4dbb262b4fe5cf15a215de4f34eee34d&from=morda',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://yandex.ru/?stream_id=4dbb262b4fe5cf15a215de4f34eee34d',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://frontend.vh.yandex.ru/player/4dbb262b4fe5cf15a215de4f34eee34d?from=morda',
+ 'only_matching': True,
+ }, {
+ # vod-episode, series episode
+ 'url': 'https://yandex.ru/portal/video?stream_id=45b11db6e4b68797919c93751a938cee',
+ 'only_matching': True,
+ }, {
+ # episode, sports
+ 'url': 'https://yandex.ru/?stream_channel=1538487871&stream_id=4132a07f71fb0396be93d74b3477131d',
+ 'only_matching': True,
+ }, {
+ # DASH with DRM
+ 'url': 'https://yandex.ru/portal/video?from=morda&stream_id=485a92d94518d73a9d0ff778e13505f8',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://yandex.ru/efir?stream_active=watching&stream_id=v7a2dZ-v5mSI&from_block=efir_newtab',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ player = try_get((self._download_json(
+ 'https://frontend.vh.yandex.ru/graphql', video_id, data=('''{
+ player(content_id: "%s") {
+ computed_title
+ content_url
+ description
+ dislikes
+ duration
+ likes
+ program_title
+ release_date
+ release_date_ut
+ release_year
+ restriction_age
+ season
+ start_time
+ streams
+ thumbnail
+ title
+ views_count
+ }
+}''' % video_id).encode(), fatal=False)), lambda x: x['player']['content'])
+ if not player or player.get('error'):
+ player = self._download_json(
+ 'https://frontend.vh.yandex.ru/v23/player/%s.json' % video_id,
+ video_id, query={
+ 'stream_options': 'hires',
+ 'disable_trackings': 1,
+ })
+ content = player['content']
+
+ title = content.get('title') or content['computed_title']
+
+ formats = []
+ streams = content.get('streams') or []
+ streams.append({'url': content.get('content_url')})
+ for stream in streams:
+ content_url = url_or_none(stream.get('url'))
+ if not content_url:
+ continue
+ ext = determine_ext(content_url)
+ if ext == 'ismc':
+ continue
+ elif ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ content_url, video_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False))
+ elif ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ content_url, video_id, mpd_id='dash', fatal=False))
+ else:
+ formats.append({'url': content_url})
+
+ timestamp = (int_or_none(content.get('release_date'))
+ or int_or_none(content.get('release_date_ut'))
+ or int_or_none(content.get('start_time')))
+ season = content.get('season') or {}
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': content.get('description'),
+ 'thumbnail': content.get('thumbnail'),
+ 'timestamp': timestamp,
+ 'duration': int_or_none(content.get('duration')),
+ 'series': content.get('program_title'),
+ 'age_limit': int_or_none(content.get('restriction_age')),
+ 'view_count': int_or_none(content.get('views_count')),
+ 'like_count': int_or_none(content.get('likes')),
+ 'dislike_count': int_or_none(content.get('dislikes')),
+ 'season_number': int_or_none(season.get('season_number')),
+ 'season_id': season.get('id'),
+ 'release_year': int_or_none(content.get('release_year')),
+ 'formats': formats,
+ }
+
+
+class YandexVideoPreviewIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?yandex\.\w{2,3}(?:\.(?:am|ge|il|tr))?/video/preview(?:/?\?.*?filmId=|/)(?P<id>\d+)'
+ _TESTS = [{ # Odnoklassniki
+ 'url': 'https://yandex.ru/video/preview/?filmId=10682852472978372885&text=summer',
+ 'info_dict': {
+ 'id': '1352565459459',
+ 'ext': 'mp4',
+ 'like_count': int,
+ 'upload_date': '20191202',
+ 'age_limit': 0,
+ 'duration': 196,
+ 'thumbnail': 'https://i.mycdn.me/videoPreview?id=544866765315&type=37&idx=13&tkn=TY5qjLYZHxpmcnK8U2LgzYkgmaU&fn=external_8',
+ 'uploader_id': '481054701571',
+ 'title': 'LOFT - summer, summer, summer HD',
+ 'uploader': 'АРТЁМ КУДРОВ',
+ },
+ }, { # youtube
+ 'url': 'https://yandex.ru/video/preview/?filmId=4479424425337895262&source=main_redirect&text=видео&utm_source=main_stripe_big',
+ 'only_matching': True,
+ }, { # YandexVideo
+ 'url': 'https://yandex.ru/video/preview/5275069442094787341',
+ 'only_matching': True,
+ }, { # youtube
+ 'url': 'https://yandex.ru/video/preview/?filmId=16658118429797832897&from=tabbar&p=1&text=%D0%BF%D1%80%D0%BE%D1%81%D0%BC%D0%BE%D1%82%D1%80+%D1%84%D1%80%D0%B0%D0%B3%D0%BC%D0%B5%D0%BD%D1%82%D0%B0+%D0%BC%D0%B0%D0%BB%D0%B5%D0%BD%D1%8C%D0%BA%D0%B8%D0%B9+%D0%BF%D1%80%D0%B8%D0%BD%D1%86+%D0%BC%D1%8B+%D0%B2+%D0%BE%D1%82%D0%B2%D0%B5%D1%82%D0%B5+%D0%B7%D0%B0+%D1%82%D0%B5%D1%85+%D0%BA%D0%BE%D0%B3%D0%BE+%D0%BF%D1%80%D0%B8%D1%80%D1%83%D1%87%D0%B8%D0%BB%D0%B8',
+ 'only_matching': True,
+ }, { # Odnoklassniki
+ 'url': 'https://yandex.ru/video/preview/?text=Francis%20Lai%20-%20Le%20Bon%20Et%20Les%20MC)chants&path=wizard&parent-reqid=1643208087979310-1481782809207673478-sas3-0931-2f9-sas-l7-balancer-8080-BAL-9380&wiz_type=vital&filmId=12508152936505397283',
+ 'only_matching': True,
+ }, { # Odnoklassniki
+ 'url': 'https://yandex.com/video/preview/?text=dossier%2051%20film%201978&path=yandex_search&parent-reqid=1664361087754492-8727541069609384458-sas2-0340-sas-l7-balancer-8080-BAL-8045&noreask=1&from_type=vast&filmId=5794987234584444632',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ webpage = self._download_webpage(url, id)
+ data_raw = self._search_regex(r'window.Ya.__inline_params__\s*=\s*JSON.parse\(\'([^"]+?\\u0022video\\u0022:[^"]+?})\'\);', webpage, 'data_raw')
+ data_json = self._parse_json(data_raw, id, transform_source=lowercase_escape)
+ return self.url_result(data_json['video']['url'])
+
+
+class ZenYandexIE(InfoExtractor):
+ _VALID_URL = r'https?://(zen\.yandex|dzen)\.ru(?:/video)?/(media|watch)/(?:(?:id/[^/]+/|[^/]+/)(?:[a-z0-9-]+)-)?(?P<id>[a-z0-9-]+)'
+ _TESTS = [{
+ 'url': 'https://zen.yandex.ru/media/id/606fd806cc13cb3c58c05cf5/vot-eto-focus-dedy-morozy-na-gidrociklah-60c7c443da18892ebfe85ed7',
+ 'info_dict': {
+ 'id': '60c7c443da18892ebfe85ed7',
+ 'ext': 'mp4',
+ 'title': 'ВОТ ЭТО Focus. Деды Морозы на гидроциклах',
+ 'description': 'md5:8684912f6086f298f8078d4af0e8a600',
+ 'thumbnail': 're:^https://avatars.dzeninfra.ru/',
+ 'uploader': 'AcademeG DailyStream'
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ 'format': 'bestvideo',
+ },
+ 'skip': 'The page does not exist',
+ }, {
+ 'url': 'https://dzen.ru/media/id/606fd806cc13cb3c58c05cf5/vot-eto-focus-dedy-morozy-na-gidrociklah-60c7c443da18892ebfe85ed7',
+ 'info_dict': {
+ 'id': '60c7c443da18892ebfe85ed7',
+ 'ext': 'mp4',
+ 'title': 'ВОТ ЭТО Focus. Деды Морозы на гидроциклах',
+ 'description': 'md5:8684912f6086f298f8078d4af0e8a600',
+ 'thumbnail': r're:^https://avatars\.dzeninfra\.ru/',
+ 'uploader': 'AcademeG DailyStream',
+ 'upload_date': '20191111',
+ 'timestamp': 1573465585,
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://zen.yandex.ru/video/watch/6002240ff8b1af50bb2da5e3',
+ 'info_dict': {
+ 'id': '6002240ff8b1af50bb2da5e3',
+ 'ext': 'mp4',
+ 'title': 'Извержение вулкана из спичек: зрелищный опыт',
+ 'description': 'md5:053ad3c61b5596d510c9a199dc8ee633',
+ 'thumbnail': r're:^https://avatars\.dzeninfra\.ru/',
+ 'uploader': 'TechInsider',
+ 'timestamp': 1611378221,
+ 'upload_date': '20210123',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://dzen.ru/video/watch/6002240ff8b1af50bb2da5e3',
+ 'info_dict': {
+ 'id': '6002240ff8b1af50bb2da5e3',
+ 'ext': 'mp4',
+ 'title': 'Извержение вулкана из спичек: зрелищный опыт',
+ 'description': 'md5:053ad3c61b5596d510c9a199dc8ee633',
+ 'thumbnail': 're:^https://avatars.dzeninfra.ru/',
+ 'uploader': 'TechInsider',
+ 'upload_date': '20210123',
+ 'timestamp': 1611378221,
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://zen.yandex.ru/media/id/606fd806cc13cb3c58c05cf5/novyi-samsung-fold-3-moskvich-barahlit-612f93b7f8d48e7e945792a2?from=channel&rid=2286618386.482.1630817595976.42360',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://dzen.ru/media/id/606fd806cc13cb3c58c05cf5/novyi-samsung-fold-3-moskvich-barahlit-612f93b7f8d48e7e945792a2?from=channel&rid=2286618386.482.1630817595976.42360',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ redirect = self._search_json(r'var it\s*=', webpage, 'redirect', id, default={}).get('retpath')
+ if redirect:
+ video_id = self._match_id(redirect)
+ webpage = self._download_webpage(redirect, video_id, note='Redirecting')
+ data_json = self._search_json(
+ r'("data"\s*:|data\s*=)', webpage, 'metadata', video_id, contains_pattern=r'{["\']_*serverState_*video.+}')
+ serverstate = self._search_regex(r'(_+serverState_+video-site_[^_]+_+)',
+ webpage, 'server state').replace('State', 'Settings')
+ uploader = self._search_regex(r'(<a\s*class=["\']card-channel-link[^"\']+["\'][^>]+>)',
+ webpage, 'uploader', default='<a>')
+ uploader_name = extract_attributes(uploader).get('aria-label')
+ video_json = try_get(data_json, lambda x: x[serverstate]['exportData']['video'], dict)
+ stream_urls = try_get(video_json, lambda x: x['video']['streams'])
+ formats, subtitles = [], {}
+ for s_url in stream_urls:
+ ext = determine_ext(s_url)
+ if ext == 'mpd':
+ fmts, subs = self._extract_mpd_formats_and_subtitles(s_url, video_id, mpd_id='dash')
+ elif ext == 'm3u8':
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(s_url, video_id, 'mp4')
+ formats.extend(fmts)
+ subtitles = self._merge_subtitles(subtitles, subs)
+ return {
+ 'id': video_id,
+ 'title': video_json.get('title') or self._og_search_title(webpage),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'duration': int_or_none(video_json.get('duration')),
+ 'view_count': int_or_none(video_json.get('views')),
+ 'timestamp': int_or_none(video_json.get('publicationDate')),
+ 'uploader': uploader_name or data_json.get('authorName') or try_get(data_json, lambda x: x['publisher']['name']),
+ 'description': video_json.get('description') or self._og_search_description(webpage),
+ 'thumbnail': self._og_search_thumbnail(webpage) or try_get(data_json, lambda x: x['og']['imageUrl']),
+ }
+
+
+class ZenYandexChannelIE(InfoExtractor):
+ _VALID_URL = r'https?://(zen\.yandex|dzen)\.ru/(?!media|video)(?:id/)?(?P<id>[a-z0-9-_]+)'
+ _TESTS = [{
+ 'url': 'https://zen.yandex.ru/tok_media',
+ 'info_dict': {
+ 'id': 'tok_media',
+ 'title': 'СПЕКТР',
+ 'description': 'md5:a9e5b3c247b7fe29fd21371a428bcf56',
+ },
+ 'playlist_mincount': 169,
+ 'skip': 'The page does not exist',
+ }, {
+ 'url': 'https://dzen.ru/tok_media',
+ 'info_dict': {
+ 'id': 'tok_media',
+ 'title': 'СПЕКТР',
+ 'description': 'md5:a9e5b3c247b7fe29fd21371a428bcf56',
+ },
+ 'playlist_mincount': 169,
+ 'skip': 'The page does not exist',
+ }, {
+ 'url': 'https://zen.yandex.ru/id/606fd806cc13cb3c58c05cf5',
+ 'info_dict': {
+ 'id': '606fd806cc13cb3c58c05cf5',
+ 'description': 'md5:517b7c97d8ca92e940f5af65448fd928',
+ 'title': 'AcademeG DailyStream',
+ },
+ 'playlist_mincount': 657,
+ }, {
+ # Test that the playlist extractor finishes extracting when the
+ # channel has less than one page
+ 'url': 'https://zen.yandex.ru/jony_me',
+ 'info_dict': {
+ 'id': 'jony_me',
+ 'description': 'md5:ce0a5cad2752ab58701b5497835b2cc5',
+ 'title': 'JONY ',
+ },
+ 'playlist_count': 18,
+ }, {
+ # Test that the playlist extractor finishes extracting when the
+ # channel has more than one page of entries
+ 'url': 'https://zen.yandex.ru/tatyanareva',
+ 'info_dict': {
+ 'id': 'tatyanareva',
+ 'description': 'md5:40a1e51f174369ec3ba9d657734ac31f',
+ 'title': 'Татьяна Рева',
+ 'entries': 'maxcount:200',
+ },
+ 'playlist_mincount': 46,
+ }, {
+ 'url': 'https://dzen.ru/id/606fd806cc13cb3c58c05cf5',
+ 'info_dict': {
+ 'id': '606fd806cc13cb3c58c05cf5',
+ 'title': 'AcademeG DailyStream',
+ 'description': 'md5:517b7c97d8ca92e940f5af65448fd928',
+ },
+ 'playlist_mincount': 657,
+ }]
+
+ def _entries(self, item_id, server_state_json, server_settings_json):
+ items = (traverse_obj(server_state_json, ('feed', 'items', ...))
+ or traverse_obj(server_settings_json, ('exportData', 'items', ...)))
+
+ more = (traverse_obj(server_state_json, ('links', 'more'))
+ or traverse_obj(server_settings_json, ('exportData', 'more', 'link')))
+
+ next_page_id = None
+ for page in itertools.count(1):
+ for item in items or []:
+ if item.get('type') != 'gif':
+ continue
+ video_id = traverse_obj(item, 'publication_id', 'publicationId') or ''
+ yield self.url_result(item['link'], ZenYandexIE, video_id.split(':')[-1])
+
+ current_page_id = next_page_id
+ next_page_id = traverse_obj(parse_qs(more), ('next_page_id', -1))
+ if not all((more, items, next_page_id, next_page_id != current_page_id)):
+ break
+
+ data = self._download_json(more, item_id, note=f'Downloading Page {page}')
+ items, more = data.get('items'), traverse_obj(data, ('more', 'link'))
+
+ def _real_extract(self, url):
+ item_id = self._match_id(url)
+ webpage = self._download_webpage(url, item_id)
+ redirect = self._search_json(
+ r'var it\s*=', webpage, 'redirect', item_id, default={}).get('retpath')
+ if redirect:
+ item_id = self._match_id(redirect)
+ webpage = self._download_webpage(redirect, item_id, note='Redirecting')
+ data = self._search_json(
+ r'("data"\s*:|data\s*=)', webpage, 'channel data', item_id, contains_pattern=r'{\"__serverState__.+}')
+ server_state_json = traverse_obj(data, lambda k, _: k.startswith('__serverState__'), get_all=False)
+ server_settings_json = traverse_obj(data, lambda k, _: k.startswith('__serverSettings__'), get_all=False)
+
+ return self.playlist_result(
+ self._entries(item_id, server_state_json, server_settings_json),
+ item_id, traverse_obj(server_state_json, ('channel', 'source', 'title')),
+ traverse_obj(server_state_json, ('channel', 'source', 'description')))
diff --git a/yt_dlp/extractor/yapfiles.py b/yt_dlp/extractor/yapfiles.py
new file mode 100644
index 0000000..d6024d9
--- /dev/null
+++ b/yt_dlp/extractor/yapfiles.py
@@ -0,0 +1,90 @@
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ qualities,
+ url_or_none,
+)
+
+
+class YapFilesIE(InfoExtractor):
+ _WORKING = False
+ _YAPFILES_URL = r'//(?:(?:www|api)\.)?yapfiles\.ru/get_player/*\?.*?\bv=(?P<id>\w+)'
+ _VALID_URL = r'https?:%s' % _YAPFILES_URL
+ _EMBED_REGEX = [rf'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?{_YAPFILES_URL}.*?)\1']
+ _TESTS = [{
+ # with hd
+ 'url': 'http://www.yapfiles.ru/get_player/?v=vMDE1NjcyNDUt0413',
+ 'md5': '2db19e2bfa2450568868548a1aa1956c',
+ 'info_dict': {
+ 'id': 'vMDE1NjcyNDUt0413',
+ 'ext': 'mp4',
+ 'title': 'Самый худший пароль WIFI',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 72,
+ },
+ }, {
+ # without hd
+ 'url': 'https://api.yapfiles.ru/get_player/?uid=video_player_1872528&plroll=1&adv=1&v=vMDE4NzI1Mjgt690b',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id, fatal=False)
+
+ player_url = None
+ query = {}
+ if webpage:
+ player_url = self._search_regex(
+ r'player\.init\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+ 'player url', default=None, group='url')
+
+ if not player_url:
+ player_url = 'http://api.yapfiles.ru/load/%s/' % video_id
+ query = {
+ 'md5': 'ded5f369be61b8ae5f88e2eeb2f3caff',
+ 'type': 'json',
+ 'ref': url,
+ }
+
+ player = self._download_json(
+ player_url, video_id, query=query)['player']
+
+ playlist_url = player['playlist']
+ title = player['title']
+ thumbnail = player.get('poster')
+
+ if title == 'Ролик удален' or 'deleted.jpg' in (thumbnail or ''):
+ raise ExtractorError(
+ 'Video %s has been removed' % video_id, expected=True)
+
+ playlist = self._download_json(
+ playlist_url, video_id)['player']['main']
+
+ hd_height = int_or_none(player.get('hd'))
+
+ QUALITIES = ('sd', 'hd')
+ quality_key = qualities(QUALITIES)
+ formats = []
+ for format_id in QUALITIES:
+ is_hd = format_id == 'hd'
+ format_url = url_or_none(playlist.get(
+ 'file%s' % ('_hd' if is_hd else '')))
+ if not format_url:
+ continue
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_id,
+ 'quality': quality_key(format_id),
+ 'height': hd_height if is_hd else None,
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': int_or_none(player.get('length')),
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/yappy.py b/yt_dlp/extractor/yappy.py
new file mode 100644
index 0000000..5ce647e
--- /dev/null
+++ b/yt_dlp/extractor/yappy.py
@@ -0,0 +1,128 @@
+from .common import InfoExtractor
+from ..utils import (
+ OnDemandPagedList,
+ int_or_none,
+ traverse_obj,
+ unified_timestamp,
+ url_or_none,
+)
+
+
+class YappyIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'https?://yappy\.media/video/(?P<id>\w+)'
+ _TESTS = [{
+ 'url': 'https://yappy.media/video/47fea6d8586f48d1a0cf96a7342aabd2',
+ 'info_dict': {
+ 'id': '47fea6d8586f48d1a0cf96a7342aabd2',
+ 'ext': 'mp4',
+ 'title': 'Куда нажимать? Как снимать? Смотри видос и погнали!🤘🏻',
+ 'timestamp': 1661893200,
+ 'description': 'Куда нажимать? Как снимать? Смотри видос и погнали!🤘🏻',
+ 'thumbnail': 'https://cdn-st.ritm.media/static/pic/thumbnails/0c7c4d73388f47848acaf540d2e2bb8c-thumbnail.jpg',
+ 'upload_date': '20220830',
+ 'view_count': int,
+ 'like_count': int,
+ 'uploader_id': '59a0c8c485e5410b9c43474bf4c6a373',
+ 'categories': ['Образование и наука', 'Лайфхак', 'Технологии', 'Арт/искусство'],
+ 'repost_count': int,
+ 'uploader': 'YAPPY',
+ }
+ }, {
+ 'url': 'https://yappy.media/video/3862451954ad4bd58ae2ccefddb0bd33',
+ 'info_dict': {
+ 'id': '3862451954ad4bd58ae2ccefddb0bd33',
+ 'ext': 'mp4',
+ 'title': 'Опиши свой характер 3 словами🙃\n#психология #дружба #отношения',
+ 'timestamp': 1674726985,
+ 'like_count': int,
+ 'description': 'Опиши свой характер 3 словами🙃\n#психология #дружба #отношения',
+ 'uploader_id': '6793ee3581974a3586fc01e157de6c99',
+ 'view_count': int,
+ 'repost_count': int,
+ 'uploader': 'LENA SHTURMAN',
+ 'upload_date': '20230126',
+ 'thumbnail': 'https://cdn-st.ritm.media/static/pic/user_thumbnails/6e76bb4bbad640b6/9ec84c115b2b1967/1674716171.jpg',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ json_ld = self._search_json_ld(webpage, video_id)
+ nextjs_data = self._search_nextjs_data(webpage, video_id)
+
+ media_data = (
+ traverse_obj(
+ nextjs_data, ('props', 'pageProps', ('data', 'OpenGraphParameters')), get_all=False)
+ or self._download_json(f'https://yappy.media/api/video/{video_id}', video_id))
+
+ media_url = traverse_obj(media_data, ('link', {url_or_none})) or ''
+ has_watermark = media_url.endswith('-wm.mp4')
+
+ formats = [{
+ 'url': media_url,
+ 'ext': 'mp4',
+ 'format_note': 'Watermarked' if has_watermark else None,
+ 'preference': -10 if has_watermark else None
+ }] if media_url else []
+
+ if has_watermark:
+ formats.append({
+ 'url': media_url.replace('-wm.mp4', '.mp4'),
+ 'ext': 'mp4'
+ })
+
+ audio_link = traverse_obj(media_data, ('audio', 'link'))
+ if audio_link:
+ formats.append({
+ 'url': audio_link,
+ 'ext': 'mp3',
+ 'acodec': 'mp3',
+ 'vcodec': 'none'
+ })
+
+ return {
+ 'id': video_id,
+ 'title': (json_ld.get('description') or self._html_search_meta(['og:title'], webpage)
+ or self._html_extract_title(webpage)),
+ 'formats': formats,
+ 'thumbnail': (media_data.get('thumbnail')
+ or self._html_search_meta(['og:image', 'og:image:secure_url'], webpage)),
+ 'description': (media_data.get('description') or json_ld.get('description')
+ or self._html_search_meta(['description', 'og:description'], webpage)),
+ 'timestamp': unified_timestamp(media_data.get('publishedAt') or json_ld.get('timestamp')),
+ 'view_count': int_or_none(media_data.get('viewsCount') or json_ld.get('view_count')),
+ 'like_count': int_or_none(media_data.get('likesCount')),
+ 'uploader': traverse_obj(media_data, ('creator', 'firstName')),
+ 'uploader_id': traverse_obj(media_data, ('creator', ('uuid', 'nickname')), get_all=False),
+ 'categories': traverse_obj(media_data, ('categories', ..., 'name')) or None,
+ 'repost_count': int_or_none(media_data.get('sharingCount'))
+ }
+
+
+class YappyProfileIE(InfoExtractor):
+ _VALID_URL = r'https?://yappy\.media/profile/(?P<id>\w+)'
+ _TESTS = [{
+ 'url': 'https://yappy.media/profile/59a0c8c485e5410b9c43474bf4c6a373',
+ 'info_dict': {
+ 'id': '59a0c8c485e5410b9c43474bf4c6a373',
+ },
+ 'playlist_mincount': 527,
+ }]
+
+ def _real_extract(self, url):
+ profile_id = self._match_id(url)
+
+ def fetch_page(page_num):
+ page_num += 1
+ videos = self._download_json(
+ f'https://yappy.media/api/video/list/{profile_id}?page={page_num}',
+ profile_id, f'Downloading profile page {page_num} JSON')
+
+ for video in traverse_obj(videos, ('results', lambda _, v: v['uuid'])):
+ yield self.url_result(
+ f'https://yappy.media/video/{video["uuid"]}', YappyIE,
+ video['uuid'], video.get('description'))
+
+ return self.playlist_result(OnDemandPagedList(fetch_page, 15), profile_id)
diff --git a/yt_dlp/extractor/yle_areena.py b/yt_dlp/extractor/yle_areena.py
new file mode 100644
index 0000000..dd0e599
--- /dev/null
+++ b/yt_dlp/extractor/yle_areena.py
@@ -0,0 +1,134 @@
+from .common import InfoExtractor
+from .kaltura import KalturaIE
+from ..utils import (
+ int_or_none,
+ smuggle_url,
+ traverse_obj,
+ unified_strdate,
+ url_or_none,
+)
+
+
+class YleAreenaIE(InfoExtractor):
+ _VALID_URL = r'https?://areena\.yle\.fi/(?P<id>[\d-]+)'
+ _TESTS = [
+ {
+ 'url': 'https://areena.yle.fi/1-4371942',
+ 'md5': '932edda0ecf5dfd6423804182d32f8ac',
+ 'info_dict': {
+ 'id': '0_a3tjk92c',
+ 'ext': 'mp4',
+ 'title': 'Pouchit',
+ 'description': 'md5:d487309c3abbe5650265bbd1742d2f82',
+ 'series': 'Modernit miehet',
+ 'season': 'Season 1',
+ 'season_number': 1,
+ 'episode': 'Episode 2',
+ 'episode_number': 2,
+ 'thumbnail': 'http://cfvod.kaltura.com/p/1955031/sp/195503100/thumbnail/entry_id/0_a3tjk92c/version/100061',
+ 'uploader_id': 'ovp@yle.fi',
+ 'duration': 1435,
+ 'view_count': int,
+ 'upload_date': '20181204',
+ 'release_date': '20190106',
+ 'timestamp': 1543916210,
+ 'subtitles': {'fin': [{'url': r're:^https?://', 'ext': 'srt'}]},
+ 'age_limit': 7,
+ 'webpage_url': 'https://areena.yle.fi/1-4371942'
+ }
+ },
+ {
+ 'url': 'https://areena.yle.fi/1-2158940',
+ 'md5': 'cecb603661004e36af8c5188b5212b12',
+ 'info_dict': {
+ 'id': '1_l38iz9ur',
+ 'ext': 'mp4',
+ 'title': 'Albi haluaa vessan',
+ 'description': 'md5:15236d810c837bed861fae0e88663c33',
+ 'series': 'Albi Lumiukko',
+ 'thumbnail': 'http://cfvod.kaltura.com/p/1955031/sp/195503100/thumbnail/entry_id/1_l38iz9ur/version/100021',
+ 'uploader_id': 'ovp@yle.fi',
+ 'duration': 319,
+ 'view_count': int,
+ 'upload_date': '20211202',
+ 'release_date': '20211215',
+ 'timestamp': 1638448202,
+ 'subtitles': {},
+ 'age_limit': 0,
+ 'webpage_url': 'https://areena.yle.fi/1-2158940'
+ }
+ },
+ {
+ 'url': 'https://areena.yle.fi/1-64829589',
+ 'info_dict': {
+ 'id': '1-64829589',
+ 'ext': 'mp4',
+ 'title': 'HKO & Mälkki & Tanner',
+ 'description': 'md5:b4f1b1af2c6569b33f75179a86eea156',
+ 'series': 'Helsingin kaupunginorkesterin konsertteja',
+ 'thumbnail': r're:^https?://.+\.jpg$',
+ 'release_date': '20230120',
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ },
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ info = self._search_json_ld(self._download_webpage(url, video_id), video_id, default={})
+ video_data = self._download_json(
+ f'https://player.api.yle.fi/v1/preview/{video_id}.json?app_id=player_static_prod&app_key=8930d72170e48303cf5f3867780d549b',
+ video_id, headers={
+ 'origin': 'https://areena.yle.fi',
+ 'referer': 'https://areena.yle.fi/',
+ 'content-type': 'application/json'
+ })
+
+ # Example title: 'K1, J2: Pouchit | Modernit miehet'
+ series, season_number, episode_number, episode = self._search_regex(
+ r'K(?P<season_no>[\d]+),\s*J(?P<episode_no>[\d]+):?\s*\b(?P<episode>[^|]+)\s*|\s*(?P<series>.+)',
+ info.get('title') or '', 'episode metadata', group=('season_no', 'episode_no', 'episode', 'series'),
+ default=(None, None, None, None))
+ description = traverse_obj(video_data, ('data', 'ongoing_ondemand', 'description', 'fin'), expected_type=str)
+
+ subtitles = {}
+ for sub in traverse_obj(video_data, ('data', 'ongoing_ondemand', 'subtitles', ...)):
+ if url_or_none(sub.get('uri')):
+ subtitles.setdefault(sub.get('language') or 'und', []).append({
+ 'url': sub['uri'],
+ 'ext': 'srt',
+ 'name': sub.get('kind'),
+ })
+
+ kaltura_id = traverse_obj(video_data, ('data', 'ongoing_ondemand', 'kaltura', 'id'), expected_type=str)
+ if kaltura_id:
+ info_dict = {
+ '_type': 'url_transparent',
+ 'url': smuggle_url(f'kaltura:1955031:{kaltura_id}', {'source_url': url}),
+ 'ie_key': KalturaIE.ie_key(),
+ }
+ else:
+ info_dict = {
+ 'id': video_id,
+ 'formats': self._extract_m3u8_formats(
+ video_data['data']['ongoing_ondemand']['manifest_url'], video_id, 'mp4', m3u8_id='hls'),
+ }
+
+ return {
+ **info_dict,
+ 'title': (traverse_obj(video_data, ('data', 'ongoing_ondemand', 'title', 'fin'), expected_type=str)
+ or episode or info.get('title')),
+ 'description': description,
+ 'series': (traverse_obj(video_data, ('data', 'ongoing_ondemand', 'series', 'title', 'fin'), expected_type=str)
+ or series),
+ 'season_number': (int_or_none(self._search_regex(r'Kausi (\d+)', description, 'season number', default=None))
+ or int_or_none(season_number)),
+ 'episode_number': (traverse_obj(video_data, ('data', 'ongoing_ondemand', 'episode_number'), expected_type=int_or_none)
+ or int_or_none(episode_number)),
+ 'thumbnails': traverse_obj(info, ('thumbnails', ..., {'url': 'url'})),
+ 'age_limit': traverse_obj(video_data, ('data', 'ongoing_ondemand', 'content_rating', 'age_restriction'), expected_type=int_or_none),
+ 'subtitles': subtitles,
+ 'release_date': unified_strdate(traverse_obj(video_data, ('data', 'ongoing_ondemand', 'start_time'), expected_type=str)),
+ }
diff --git a/yt_dlp/extractor/youjizz.py b/yt_dlp/extractor/youjizz.py
new file mode 100644
index 0000000..cd12be5
--- /dev/null
+++ b/yt_dlp/extractor/youjizz.py
@@ -0,0 +1,90 @@
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ parse_duration,
+ url_or_none,
+)
+
+
+class YouJizzIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:\w+\.)?youjizz\.com/videos/(?:[^/#?]*-(?P<id>\d+)\.html|embed/(?P<embed_id>\d+))'
+ _TESTS = [{
+ 'url': 'http://www.youjizz.com/videos/zeichentrick-1-2189178.html',
+ 'md5': 'b1e1dfaa8bb9537d8b84eeda9cf4acf4',
+ 'info_dict': {
+ 'id': '2189178',
+ 'ext': 'mp4',
+ 'title': 'Zeichentrick 1',
+ 'age_limit': 18,
+ 'duration': 2874,
+ }
+ }, {
+ 'url': 'http://www.youjizz.com/videos/-2189178.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youjizz.com/videos/embed/31991001',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id') or mobj.group('embed_id')
+
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_extract_title(webpage)
+
+ formats = []
+
+ encodings = self._parse_json(
+ self._search_regex(
+ r'[Ee]ncodings\s*=\s*(\[.+?\]);\n', webpage, 'encodings',
+ default='[]'),
+ video_id, fatal=False)
+ for encoding in encodings:
+ if not isinstance(encoding, dict):
+ continue
+ format_url = url_or_none(encoding.get('filename'))
+ if not format_url:
+ continue
+ if determine_ext(format_url) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ format_id = encoding.get('name') or encoding.get('quality')
+ height = int_or_none(self._search_regex(
+ r'^(\d+)[pP]', format_id, 'height', default=None))
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_id,
+ 'height': height,
+ })
+
+ if formats:
+ info_dict = {
+ 'formats': formats,
+ }
+ else:
+ # YouJizz's HTML5 player has invalid HTML
+ webpage = webpage.replace('"controls', '" controls')
+ info_dict = self._parse_html5_media_entries(
+ url, webpage, video_id)[0]
+
+ duration = parse_duration(self._search_regex(
+ r'<strong>Runtime:</strong>([^<]+)', webpage, 'duration',
+ default=None))
+ uploader = self._search_regex(
+ r'<strong>Uploaded By:.*?<a[^>]*>([^<]+)', webpage, 'uploader',
+ default=None)
+
+ info_dict.update({
+ 'id': video_id,
+ 'title': title,
+ 'age_limit': self._rta_search(webpage),
+ 'duration': duration,
+ 'uploader': uploader,
+ })
+
+ return info_dict
diff --git a/yt_dlp/extractor/youku.py b/yt_dlp/extractor/youku.py
new file mode 100644
index 0000000..1f3f98a
--- /dev/null
+++ b/yt_dlp/extractor/youku.py
@@ -0,0 +1,290 @@
+import random
+import re
+import string
+import time
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ clean_html,
+ get_element_by_class,
+ js_to_json,
+ str_or_none,
+ strip_jsonp,
+)
+
+
+class YoukuIE(InfoExtractor):
+ IE_NAME = 'youku'
+ IE_DESC = '优酷'
+ _VALID_URL = r'''(?x)
+ (?:
+ https?://(
+ (?:v|play(?:er)?)\.(?:youku|tudou)\.com/(?:v_show/id_|player\.php/sid/)|
+ video\.tudou\.com/v/)|
+ youku:)
+ (?P<id>[A-Za-z0-9]+)(?:\.html|/v\.swf|)
+ '''
+
+ _TESTS = [{
+ 'url': 'http://player.youku.com/player.php/sid/XNDgyMDQ2NTQw/v.swf',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://v.youku.com/v_show/id_XNjA1NzA2Njgw.html',
+ 'note': 'Video protected with password',
+ 'info_dict': {
+ 'id': 'XNjA1NzA2Njgw',
+ 'ext': 'mp4',
+ 'title': '邢義田复旦讲座之想象中的胡人—从“左衽孔子”说起',
+ 'duration': 7264.5,
+ 'thumbnail': r're:^https?://.*',
+ 'uploader': 'FoxJin1006',
+ 'uploader_id': '322014285',
+ 'uploader_url': 'http://i.youku.com/u/UMTI4ODA1NzE0MA==',
+ 'tags': list,
+ },
+ 'params': {
+ 'videopassword': '100600',
+ },
+ 'skip': '404',
+ }, {
+ # /play/get.json contains streams with "channel_type":"tail"
+ 'url': 'http://v.youku.com/v_show/id_XOTUxMzg4NDMy.html',
+ 'info_dict': {
+ 'id': 'XOTUxMzg4NDMy',
+ 'ext': 'mp4',
+ 'title': '我的世界☆明月庄主☆车震猎杀☆杀人艺术Minecraft',
+ 'duration': 702.08,
+ 'thumbnail': r're:^https?://.*',
+ 'uploader': '明月庄主moon',
+ 'uploader_id': '38465621',
+ 'uploader_url': 'https://www.youku.com/profile/index/?uid=UMTUzODYyNDg0',
+ 'tags': list,
+ },
+ }, {
+ 'url': 'https://v.youku.com/v_show/id_XNTA2NTA0MjA1Mg==.html',
+ 'info_dict': {
+ 'id': 'XNTA2NTA0MjA1Mg',
+ 'ext': 'mp4',
+ 'title': 'Minecraft我的世界:建造超大巨型航空飞机,菜鸟vs高手vs黑客',
+ 'duration': 542.13,
+ 'thumbnail': r're:^https?://.*',
+ 'uploader': '波哥游戏解说',
+ 'uploader_id': '156688084',
+ 'uploader_url': 'https://www.youku.com/profile/index/?uid=UNjI2NzUyMzM2',
+ 'tags': list,
+ },
+ }, {
+ 'url': 'https://v.youku.com/v_show/id_XNTE1MzczOTg4MA==.html',
+ 'info_dict': {
+ 'id': 'XNTE1MzczOTg4MA',
+ 'ext': 'mp4',
+ 'title': '国产超A特工片',
+ 'duration': 362.97,
+ 'thumbnail': r're:^https?://.*',
+ 'uploader': '陈晓娟说历史',
+ 'uploader_id': '1640913339',
+ 'uploader_url': 'https://www.youku.com/profile/index/?uid=UNjU2MzY1MzM1Ng==',
+ 'tags': list,
+ },
+ }, {
+ 'url': 'https://play.tudou.com/v_show/id_XNjAxNjI2OTU3Ng==.html?',
+ 'info_dict': {
+ 'id': 'XNjAxNjI2OTU3Ng',
+ 'ext': 'mp4',
+ 'title': '阿斯塔意识到哈里杀了人,自己被骗了',
+ 'thumbnail': 'https://m.ykimg.com/0541010164F732752794D4D7B70331D1',
+ 'uploader_id': '88758207',
+ 'tags': [],
+ 'uploader_url': 'https://www.youku.com/profile/index/?uid=UMzU1MDMyODI4',
+ 'uploader': '英美剧场',
+ 'duration': 72.91,
+ },
+ }]
+
+ @staticmethod
+ def get_ysuid():
+ return '%d%s' % (int(time.time()), ''.join(
+ random.choices(string.ascii_letters, k=3)))
+
+ def get_format_name(self, fm):
+ _dict = {
+ '3gp': 'h6',
+ '3gphd': 'h5',
+ 'flv': 'h4',
+ 'flvhd': 'h4',
+ 'mp4': 'h3',
+ 'mp4hd': 'h3',
+ 'mp4hd2': 'h4',
+ 'mp4hd3': 'h4',
+ 'hd2': 'h2',
+ 'hd3': 'h1',
+ }
+ return _dict.get(fm)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ self._set_cookie('youku.com', '__ysuid', self.get_ysuid())
+ self._set_cookie('youku.com', 'xreferrer', 'http://www.youku.com')
+
+ _, urlh = self._download_webpage_handle(
+ 'https://log.mmstat.com/eg.js', video_id, 'Retrieving cna info')
+ # The etag header is '"foobar"'; let's remove the double quotes
+ cna = urlh.headers['etag'][1:-1]
+
+ # request basic data
+ basic_data_params = {
+ 'vid': video_id,
+ 'ccode': '0524',
+ 'client_ip': '192.168.1.1',
+ 'utid': cna,
+ 'client_ts': time.time() / 1000,
+ }
+
+ video_password = self.get_param('videopassword')
+ if video_password:
+ basic_data_params['password'] = video_password
+
+ headers = {
+ 'Referer': url,
+ }
+ headers.update(self.geo_verification_headers())
+ data = self._download_json(
+ 'https://ups.youku.com/ups/get.json', video_id,
+ 'Downloading JSON metadata',
+ query=basic_data_params, headers=headers)['data']
+
+ error = data.get('error')
+ if error:
+ error_note = error.get('note')
+ if error_note is not None and '因版权原因无法观看此视频' in error_note:
+ raise ExtractorError(
+ 'Youku said: Sorry, this video is available in China only', expected=True)
+ elif error_note and '该视频被设为私密' in error_note:
+ raise ExtractorError(
+ 'Youku said: Sorry, this video is private', expected=True)
+ else:
+ msg = 'Youku server reported error %i' % error.get('code')
+ if error_note is not None:
+ msg += ': ' + clean_html(error_note)
+ raise ExtractorError(msg)
+
+ # get video title
+ video_data = data['video']
+ title = video_data['title']
+
+ formats = [{
+ 'url': stream['m3u8_url'],
+ 'format_id': self.get_format_name(stream.get('stream_type')),
+ 'ext': 'mp4',
+ 'protocol': 'm3u8_native',
+ 'filesize': int(stream.get('size')),
+ 'width': stream.get('width'),
+ 'height': stream.get('height'),
+ } for stream in data['stream'] if stream.get('channel_type') != 'tail']
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'duration': video_data.get('seconds'),
+ 'thumbnail': video_data.get('logo'),
+ 'uploader': video_data.get('username'),
+ 'uploader_id': str_or_none(video_data.get('userid')),
+ 'uploader_url': data.get('uploader', {}).get('homepage'),
+ 'tags': video_data.get('tags'),
+ }
+
+
+class YoukuShowIE(InfoExtractor):
+ _VALID_URL = r'https?://list\.youku\.com/show/id_(?P<id>[0-9a-z]+)\.html'
+ IE_NAME = 'youku:show'
+
+ _TESTS = [{
+ 'url': 'http://list.youku.com/show/id_zc7c670be07ff11e48b3f.html',
+ 'info_dict': {
+ 'id': 'zc7c670be07ff11e48b3f',
+ 'title': '花千骨 DVD版',
+ 'description': 'md5:a1ae6f5618571bbeb5c9821f9c81b558',
+ },
+ 'playlist_count': 50,
+ }, {
+ # Episode number not starting from 1
+ 'url': 'http://list.youku.com/show/id_zefbfbd70efbfbd780bef.html',
+ 'info_dict': {
+ 'id': 'zefbfbd70efbfbd780bef',
+ 'title': '超级飞侠3',
+ 'description': 'md5:275715156abebe5ccc2a1992e9d56b98',
+ },
+ 'playlist_count': 24,
+ }, {
+ # Ongoing playlist. The initial page is the last one
+ 'url': 'http://list.youku.com/show/id_za7c275ecd7b411e1a19e.html',
+ 'only_matching': True,
+ }, {
+ # No data-id value.
+ 'url': 'http://list.youku.com/show/id_zefbfbd61237fefbfbdef.html',
+ 'only_matching': True,
+ }, {
+ # Wrong number of reload_id.
+ 'url': 'http://list.youku.com/show/id_z20eb4acaf5c211e3b2ad.html',
+ 'only_matching': True,
+ }]
+
+ def _extract_entries(self, playlist_data_url, show_id, note, query):
+ query['callback'] = 'cb'
+ playlist_data = self._download_json(
+ playlist_data_url, show_id, query=query, note=note,
+ transform_source=lambda s: js_to_json(strip_jsonp(s))).get('html')
+ if playlist_data is None:
+ return [None, None]
+ drama_list = (get_element_by_class('p-drama-grid', playlist_data)
+ or get_element_by_class('p-drama-half-row', playlist_data))
+ if drama_list is None:
+ raise ExtractorError('No episodes found')
+ video_urls = re.findall(r'<a[^>]+href="([^"]+)"', drama_list)
+ return playlist_data, [
+ self.url_result(self._proto_relative_url(video_url, 'http:'), YoukuIE.ie_key())
+ for video_url in video_urls]
+
+ def _real_extract(self, url):
+ show_id = self._match_id(url)
+ webpage = self._download_webpage(url, show_id)
+
+ entries = []
+ page_config = self._parse_json(self._search_regex(
+ r'var\s+PageConfig\s*=\s*({.+});', webpage, 'page config'),
+ show_id, transform_source=js_to_json)
+ first_page, initial_entries = self._extract_entries(
+ 'http://list.youku.com/show/module', show_id,
+ note='Downloading initial playlist data page',
+ query={
+ 'id': page_config['showid'],
+ 'tab': 'showInfo',
+ })
+ first_page_reload_id = self._html_search_regex(
+ r'<div[^>]+id="(reload_\d+)', first_page, 'first page reload id')
+ # The first reload_id has the same items as first_page
+ reload_ids = re.findall('<li[^>]+data-id="([^"]+)">', first_page)
+ entries.extend(initial_entries)
+ for idx, reload_id in enumerate(reload_ids):
+ if reload_id == first_page_reload_id:
+ continue
+ _, new_entries = self._extract_entries(
+ 'http://list.youku.com/show/episode', show_id,
+ note='Downloading playlist data page %d' % (idx + 1),
+ query={
+ 'id': page_config['showid'],
+ 'stage': reload_id,
+ })
+ if new_entries is not None:
+ entries.extend(new_entries)
+ desc = self._html_search_meta('description', webpage, fatal=False)
+ playlist_title = desc.split(',')[0] if desc else None
+ detail_li = get_element_by_class('p-intro', webpage)
+ playlist_description = get_element_by_class(
+ 'intro-more', detail_li) if detail_li else None
+
+ return self.playlist_result(
+ entries, show_id, playlist_title, playlist_description)
diff --git a/yt_dlp/extractor/younow.py b/yt_dlp/extractor/younow.py
new file mode 100644
index 0000000..b67cb2e
--- /dev/null
+++ b/yt_dlp/extractor/younow.py
@@ -0,0 +1,201 @@
+import itertools
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ format_field,
+ int_or_none,
+ str_or_none,
+ try_get,
+)
+
+CDN_API_BASE = 'https://cdn.younow.com/php/api'
+MOMENT_URL_FORMAT = '%s/moment/fetch/id=%%s' % CDN_API_BASE
+
+
+class YouNowLiveIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?younow\.com/(?P<id>[^/?#&]+)'
+ _TEST = {
+ 'url': 'https://www.younow.com/AmandaPadeezy',
+ 'info_dict': {
+ 'id': 'AmandaPadeezy',
+ 'ext': 'mp4',
+ 'is_live': True,
+ 'title': 'March 26, 2017',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'tags': ['girls'],
+ 'categories': ['girls'],
+ 'uploader': 'AmandaPadeezy',
+ 'uploader_id': '6716501',
+ 'uploader_url': 'https://www.younow.com/AmandaPadeezy',
+ 'creator': 'AmandaPadeezy',
+ },
+ 'skip': True,
+ }
+
+ @classmethod
+ def suitable(cls, url):
+ return (False
+ if YouNowChannelIE.suitable(url) or YouNowMomentIE.suitable(url)
+ else super(YouNowLiveIE, cls).suitable(url))
+
+ def _real_extract(self, url):
+ username = self._match_id(url)
+
+ data = self._download_json(
+ 'https://api.younow.com/php/api/broadcast/info/curId=0/user=%s'
+ % username, username)
+
+ if data.get('errorCode') != 0:
+ raise ExtractorError(data['errorMsg'], expected=True)
+
+ uploader = try_get(
+ data, lambda x: x['user']['profileUrlString'],
+ compat_str) or username
+
+ return {
+ 'id': uploader,
+ 'is_live': True,
+ 'title': uploader,
+ 'thumbnail': data.get('awsUrl'),
+ 'tags': data.get('tags'),
+ 'categories': data.get('tags'),
+ 'uploader': uploader,
+ 'uploader_id': data.get('userId'),
+ 'uploader_url': 'https://www.younow.com/%s' % username,
+ 'creator': uploader,
+ 'view_count': int_or_none(data.get('viewers')),
+ 'like_count': int_or_none(data.get('likes')),
+ 'formats': [{
+ 'url': '%s/broadcast/videoPath/hls=1/broadcastId=%s/channelId=%s'
+ % (CDN_API_BASE, data['broadcastId'], data['userId']),
+ 'ext': 'mp4',
+ 'protocol': 'm3u8',
+ }],
+ }
+
+
+def _extract_moment(item, fatal=True):
+ moment_id = item.get('momentId')
+ if not moment_id:
+ if not fatal:
+ return
+ raise ExtractorError('Unable to extract moment id')
+
+ moment_id = compat_str(moment_id)
+
+ title = item.get('text')
+ if not title:
+ title = 'YouNow %s' % (
+ item.get('momentType') or item.get('titleType') or 'moment')
+
+ uploader = try_get(item, lambda x: x['owner']['name'], compat_str)
+ uploader_id = try_get(item, lambda x: x['owner']['userId'])
+ uploader_url = format_field(uploader, None, 'https://www.younow.com/%s')
+
+ entry = {
+ 'extractor_key': 'YouNowMoment',
+ 'id': moment_id,
+ 'title': title,
+ 'view_count': int_or_none(item.get('views')),
+ 'like_count': int_or_none(item.get('likes')),
+ 'timestamp': int_or_none(item.get('created')),
+ 'creator': uploader,
+ 'uploader': uploader,
+ 'uploader_id': str_or_none(uploader_id),
+ 'uploader_url': uploader_url,
+ 'formats': [{
+ 'url': 'https://hls.younow.com/momentsplaylists/live/%s/%s.m3u8'
+ % (moment_id, moment_id),
+ 'ext': 'mp4',
+ 'protocol': 'm3u8_native',
+ }],
+ }
+
+ return entry
+
+
+class YouNowChannelIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?younow\.com/(?P<id>[^/]+)/channel'
+ _TEST = {
+ 'url': 'https://www.younow.com/its_Kateee_/channel',
+ 'info_dict': {
+ 'id': '14629760',
+ 'title': 'its_Kateee_ moments'
+ },
+ 'playlist_mincount': 8,
+ }
+
+ def _entries(self, username, channel_id):
+ created_before = 0
+ for page_num in itertools.count(1):
+ if created_before is None:
+ break
+ info = self._download_json(
+ '%s/moment/profile/channelId=%s/createdBefore=%d/records=20'
+ % (CDN_API_BASE, channel_id, created_before), username,
+ note='Downloading moments page %d' % page_num)
+ items = info.get('items')
+ if not items or not isinstance(items, list):
+ break
+ for item in items:
+ if not isinstance(item, dict):
+ continue
+ item_type = item.get('type')
+ if item_type == 'moment':
+ entry = _extract_moment(item, fatal=False)
+ if entry:
+ yield entry
+ elif item_type == 'collection':
+ moments = item.get('momentsIds')
+ if isinstance(moments, list):
+ for moment_id in moments:
+ m = self._download_json(
+ MOMENT_URL_FORMAT % moment_id, username,
+ note='Downloading %s moment JSON' % moment_id,
+ fatal=False)
+ if m and isinstance(m, dict) and m.get('item'):
+ entry = _extract_moment(m['item'])
+ if entry:
+ yield entry
+ created_before = int_or_none(item.get('created'))
+
+ def _real_extract(self, url):
+ username = self._match_id(url)
+ channel_id = compat_str(self._download_json(
+ 'https://api.younow.com/php/api/broadcast/info/curId=0/user=%s'
+ % username, username, note='Downloading user information')['userId'])
+ return self.playlist_result(
+ self._entries(username, channel_id), channel_id,
+ '%s moments' % username)
+
+
+class YouNowMomentIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?younow\.com/[^/]+/(?P<id>[^/?#&]+)'
+ _TEST = {
+ 'url': 'https://www.younow.com/GABO.../20712117/36319236/3b316doc/m',
+ 'md5': 'a30c70eadb9fb39a1aa3c8c0d22a0807',
+ 'info_dict': {
+ 'id': '20712117',
+ 'ext': 'mp4',
+ 'title': 'YouNow capture',
+ 'view_count': int,
+ 'like_count': int,
+ 'timestamp': 1490432040,
+ 'upload_date': '20170325',
+ 'uploader': 'GABO...',
+ 'uploader_id': '35917228',
+ },
+ }
+
+ @classmethod
+ def suitable(cls, url):
+ return (False
+ if YouNowChannelIE.suitable(url)
+ else super(YouNowMomentIE, cls).suitable(url))
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ item = self._download_json(MOMENT_URL_FORMAT % video_id, video_id)
+ return _extract_moment(item['item'])
diff --git a/yt_dlp/extractor/youporn.py b/yt_dlp/extractor/youporn.py
new file mode 100644
index 0000000..6ee0abc
--- /dev/null
+++ b/yt_dlp/extractor/youporn.py
@@ -0,0 +1,198 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ extract_attributes,
+ int_or_none,
+ merge_dicts,
+ str_to_int,
+ traverse_obj,
+ unified_strdate,
+ url_or_none,
+)
+
+
+class YouPornIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?youporn\.com/(?:watch|embed)/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?'
+ _EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?youporn\.com/embed/\d+)']
+ _TESTS = [{
+ 'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
+ 'md5': '3744d24c50438cf5b6f6d59feb5055c2',
+ 'info_dict': {
+ 'id': '505835',
+ 'display_id': 'sex-ed-is-it-safe-to-masturbate-daily',
+ 'ext': 'mp4',
+ 'title': 'Sex Ed: Is It Safe To Masturbate Daily?',
+ 'description': 'Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 210,
+ 'uploader': 'Ask Dan And Jennifer',
+ 'upload_date': '20101217',
+ 'average_rating': int,
+ 'view_count': int,
+ 'categories': list,
+ 'tags': list,
+ 'age_limit': 18,
+ },
+ 'skip': 'This video has been disabled',
+ }, {
+ # Unknown uploader
+ 'url': 'http://www.youporn.com/watch/561726/big-tits-awesome-brunette-on-amazing-webcam-show/?from=related3&al=2&from_id=561726&pos=4',
+ 'info_dict': {
+ 'id': '561726',
+ 'display_id': 'big-tits-awesome-brunette-on-amazing-webcam-show',
+ 'ext': 'mp4',
+ 'title': 'Big Tits Awesome Brunette On amazing webcam show',
+ 'description': 'http://sweetlivegirls.com Big Tits Awesome Brunette On amazing webcam show.mp4',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'Unknown',
+ 'upload_date': '20110418',
+ 'average_rating': int,
+ 'view_count': int,
+ 'categories': list,
+ 'tags': list,
+ 'age_limit': 18,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': '404',
+ }, {
+ 'url': 'https://www.youporn.com/embed/505835/sex-ed-is-it-safe-to-masturbate-daily/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.youporn.com/watch/505835',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youporn.com/watch/13922959/femdom-principal/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youporn.com/watch/16290308/tinderspecial-trailer1/',
+ 'info_dict': {
+ 'id': '16290308',
+ 'age_limit': 18,
+ 'categories': [],
+ 'description': 'md5:00ea70f642f431c379763c17c2f396bc',
+ 'display_id': 'tinderspecial-trailer1',
+ 'duration': 298.0,
+ 'ext': 'mp4',
+ 'upload_date': '20201123',
+ 'uploader': 'Ersties',
+ 'tags': [],
+ 'thumbnail': 'https://fi1.ypncdn.com/202011/23/16290308/original/8/tinderspecial-trailer1-8(m=eaAaaEPbaaaa).jpg',
+ 'timestamp': 1606089600,
+ 'title': 'Tinder In Real Life',
+ 'view_count': int,
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
+ definitions = self._download_json(
+ f'https://www.youporn.com/api/video/media_definitions/{video_id}/', display_id or video_id)
+
+ def get_format_data(data, f):
+ return traverse_obj(data, lambda _, v: v['format'] == f and url_or_none(v['videoUrl']))
+
+ formats = []
+ # Try to extract only the actual master m3u8 first, avoiding the duplicate single resolution "master" m3u8s
+ for hls_url in traverse_obj(get_format_data(definitions, 'hls'), (
+ lambda _, v: not isinstance(v['defaultQuality'], bool), 'videoUrl'), (..., 'videoUrl')):
+ formats.extend(self._extract_m3u8_formats(hls_url, video_id, 'mp4', fatal=False, m3u8_id='hls'))
+
+ for definition in get_format_data(definitions, 'mp4'):
+ f = traverse_obj(definition, {
+ 'url': 'videoUrl',
+ 'filesize': ('videoSize', {int_or_none})
+ })
+ height = int_or_none(definition.get('quality'))
+ # Video URL's path looks like this:
+ # /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
+ # /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
+ # /videos/201703/11/109285532/1080P_4000K_109285532.mp4
+ # We will benefit from it by extracting some metadata
+ mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+', definition['videoUrl'])
+ if mobj:
+ if not height:
+ height = int(mobj.group('height'))
+ bitrate = int(mobj.group('bitrate'))
+ f.update({
+ 'format_id': '%dp-%dk' % (height, bitrate),
+ 'tbr': bitrate,
+ })
+ f['height'] = height
+ formats.append(f)
+
+ webpage = self._download_webpage(
+ 'http://www.youporn.com/watch/%s' % video_id, display_id,
+ headers={'Cookie': 'age_verified=1'})
+
+ title = self._html_search_regex(
+ r'(?s)<div[^>]+class=["\']watchVideoTitle[^>]+>(.+?)</div>',
+ webpage, 'title', default=None) or self._og_search_title(
+ webpage, default=None) or self._html_search_meta(
+ 'title', webpage, fatal=True)
+
+ description = self._html_search_regex(
+ r'(?s)<div[^>]+\bid=["\']description["\'][^>]*>(.+?)</div>',
+ webpage, 'description',
+ default=None) or self._og_search_description(
+ webpage, default=None)
+ thumbnail = self._search_regex(
+ r'(?:imageurl\s*=|poster\s*:)\s*(["\'])(?P<thumbnail>.+?)\1',
+ webpage, 'thumbnail', fatal=False, group='thumbnail')
+ duration = int_or_none(self._html_search_meta(
+ 'video:duration', webpage, 'duration', fatal=False))
+
+ uploader = self._html_search_regex(
+ r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>',
+ webpage, 'uploader', fatal=False)
+ upload_date = unified_strdate(self._html_search_regex(
+ (r'UPLOADED:\s*<span>([^<]+)',
+ r'Date\s+[Aa]dded:\s*<span>([^<]+)',
+ r'''(?s)<div[^>]+class=["']videoInfo(?:Date|Time)\b[^>]*>(.+?)</div>''',
+ r'(?s)<label\b[^>]*>Uploaded[^<]*</label>\s*<span\b[^>]*>(.+?)</span>'),
+ webpage, 'upload date', fatal=False))
+
+ age_limit = self._rta_search(webpage)
+
+ view_count = None
+ views = self._search_regex(
+ r'(<div[^>]+\bclass=["\']js_videoInfoViews["\']>)', webpage,
+ 'views', default=None)
+ if views:
+ view_count = str_to_int(extract_attributes(views).get('data-value'))
+ comment_count = str_to_int(self._search_regex(
+ r'>All [Cc]omments? \(([\d,.]+)\)',
+ webpage, 'comment count', default=None))
+
+ def extract_tag_box(regex, title):
+ tag_box = self._search_regex(regex, webpage, title, default=None)
+ if not tag_box:
+ return []
+ return re.findall(r'<a[^>]+href=[^>]+>([^<]+)', tag_box)
+
+ categories = extract_tag_box(
+ r'(?s)Categories:.*?</[^>]+>(.+?)</div>', 'categories')
+ tags = extract_tag_box(
+ r'(?s)Tags:.*?</div>\s*<div[^>]+class=["\']tagBoxContent["\'][^>]*>(.+?)</div>',
+ 'tags')
+
+ data = self._search_json_ld(webpage, video_id, expected_type='VideoObject', fatal=False)
+ data.pop('url', None)
+ return merge_dicts(data, {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'uploader': uploader,
+ 'upload_date': upload_date,
+ 'view_count': view_count,
+ 'comment_count': comment_count,
+ 'categories': categories,
+ 'tags': tags,
+ 'age_limit': age_limit,
+ 'formats': formats,
+ })
diff --git a/yt_dlp/extractor/yourporn.py b/yt_dlp/extractor/yourporn.py
new file mode 100644
index 0000000..38f42a9
--- /dev/null
+++ b/yt_dlp/extractor/yourporn.py
@@ -0,0 +1,65 @@
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ parse_duration,
+ urljoin,
+)
+
+
+class YourPornIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?sxyprn\.com/post/(?P<id>[^/?#&.]+)'
+ _TESTS = [{
+ 'url': 'https://sxyprn.com/post/57ffcb2e1179b.html',
+ 'md5': '6f8682b6464033d87acaa7a8ff0c092e',
+ 'info_dict': {
+ 'id': '57ffcb2e1179b',
+ 'ext': 'mp4',
+ 'title': 'md5:c9f43630bd968267672651ba905a7d35',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 165,
+ 'age_limit': 18,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://sxyprn.com/post/57ffcb2e1179b.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ parts = self._parse_json(
+ self._search_regex(
+ r'data-vnfo=(["\'])(?P<data>{.+?})\1', webpage, 'data info',
+ group='data'),
+ video_id)[video_id].split('/')
+
+ num = 0
+ for c in parts[6] + parts[7]:
+ if c.isnumeric():
+ num += int(c)
+ parts[5] = compat_str(int(parts[5]) - num)
+ parts[1] += '8'
+ video_url = urljoin(url, '/'.join(parts))
+
+ title = (self._search_regex(
+ r'<[^>]+\bclass=["\']PostEditTA[^>]+>([^<]+)', webpage, 'title',
+ default=None) or self._og_search_description(webpage)).strip()
+ thumbnail = self._og_search_thumbnail(webpage)
+ duration = parse_duration(self._search_regex(
+ r'duration\s*:\s*<[^>]+>([\d:]+)', webpage, 'duration',
+ default=None))
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'age_limit': 18,
+ 'ext': 'mp4',
+ }
diff --git a/yt_dlp/extractor/yourupload.py b/yt_dlp/extractor/yourupload.py
new file mode 100644
index 0000000..def6329
--- /dev/null
+++ b/yt_dlp/extractor/yourupload.py
@@ -0,0 +1,43 @@
+from .common import InfoExtractor
+from ..utils import urljoin
+
+
+class YourUploadIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?(?:yourupload\.com/(?:watch|embed)|embed\.yourupload\.com)/(?P<id>[A-Za-z0-9]+)'
+ _TESTS = [{
+ 'url': 'http://yourupload.com/watch/14i14h',
+ 'md5': '5e2c63385454c557f97c4c4131a393cd',
+ 'info_dict': {
+ 'id': '14i14h',
+ 'ext': 'mp4',
+ 'title': 'BigBuckBunny_320x180.mp4',
+ 'thumbnail': r're:^https?://.*\.jpe?g',
+ }
+ }, {
+ 'url': 'http://www.yourupload.com/embed/14i14h',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://embed.yourupload.com/14i14h',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ embed_url = 'http://www.yourupload.com/embed/%s' % video_id
+
+ webpage = self._download_webpage(embed_url, video_id)
+
+ title = self._og_search_title(webpage)
+ video_url = urljoin(embed_url, self._og_search_video_url(webpage))
+ thumbnail = self._og_search_thumbnail(webpage, default=None)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'url': video_url,
+ 'thumbnail': thumbnail,
+ 'http_headers': {
+ 'Referer': embed_url,
+ },
+ }
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
new file mode 100644
index 0000000..33fd3b4
--- /dev/null
+++ b/yt_dlp/extractor/youtube.py
@@ -0,0 +1,7387 @@
+import base64
+import calendar
+import collections
+import copy
+import datetime
+import enum
+import hashlib
+import itertools
+import json
+import math
+import os.path
+import random
+import re
+import shlex
+import sys
+import threading
+import time
+import traceback
+import urllib.parse
+
+from .common import InfoExtractor, SearchInfoExtractor
+from .openload import PhantomJSwrapper
+from ..compat import functools
+from ..jsinterp import JSInterpreter
+from ..networking.exceptions import HTTPError, network_exceptions
+from ..utils import (
+ NO_DEFAULT,
+ ExtractorError,
+ LazyList,
+ UserNotLive,
+ bug_reports_message,
+ classproperty,
+ clean_html,
+ datetime_from_str,
+ dict_get,
+ filter_dict,
+ float_or_none,
+ format_field,
+ get_first,
+ int_or_none,
+ is_html,
+ join_nonempty,
+ js_to_json,
+ mimetype2ext,
+ orderedSet,
+ parse_codecs,
+ parse_count,
+ parse_duration,
+ parse_iso8601,
+ parse_qs,
+ qualities,
+ remove_start,
+ smuggle_url,
+ str_or_none,
+ str_to_int,
+ strftime_or_none,
+ traverse_obj,
+ try_get,
+ unescapeHTML,
+ unified_strdate,
+ unified_timestamp,
+ unsmuggle_url,
+ update_url_query,
+ url_or_none,
+ urljoin,
+ variadic,
+)
+
+STREAMING_DATA_CLIENT_NAME = '__yt_dlp_client'
+# any clients starting with _ cannot be explicitly requested by the user
+INNERTUBE_CLIENTS = {
+ 'web': {
+ 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'WEB',
+ 'clientVersion': '2.20220801.00.00',
+ }
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
+ },
+ 'web_embedded': {
+ 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'WEB_EMBEDDED_PLAYER',
+ 'clientVersion': '1.20220731.00.00',
+ },
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
+ },
+ 'web_music': {
+ 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
+ 'INNERTUBE_HOST': 'music.youtube.com',
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'WEB_REMIX',
+ 'clientVersion': '1.20220727.01.00',
+ }
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
+ },
+ 'web_creator': {
+ 'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'WEB_CREATOR',
+ 'clientVersion': '1.20220726.00.00',
+ }
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
+ },
+ 'android': {
+ 'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'ANDROID',
+ 'clientVersion': '19.09.37',
+ 'androidSdkVersion': 30,
+ 'userAgent': 'com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip'
+ }
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
+ 'REQUIRE_JS_PLAYER': False
+ },
+ 'android_embedded': {
+ 'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'ANDROID_EMBEDDED_PLAYER',
+ 'clientVersion': '19.09.37',
+ 'androidSdkVersion': 30,
+ 'userAgent': 'com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip'
+ },
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
+ 'REQUIRE_JS_PLAYER': False
+ },
+ 'android_music': {
+ 'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'ANDROID_MUSIC',
+ 'clientVersion': '6.42.52',
+ 'androidSdkVersion': 30,
+ 'userAgent': 'com.google.android.apps.youtube.music/6.42.52 (Linux; U; Android 11) gzip'
+ }
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
+ 'REQUIRE_JS_PLAYER': False
+ },
+ 'android_creator': {
+ 'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'ANDROID_CREATOR',
+ 'clientVersion': '22.30.100',
+ 'androidSdkVersion': 30,
+ 'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'
+ },
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
+ 'REQUIRE_JS_PLAYER': False
+ },
+ # iOS clients have HLS live streams. Setting device model to get 60fps formats.
+ # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
+ 'ios': {
+ 'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'IOS',
+ 'clientVersion': '19.09.3',
+ 'deviceModel': 'iPhone14,3',
+ 'userAgent': 'com.google.ios.youtube/19.09.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
+ }
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
+ 'REQUIRE_JS_PLAYER': False
+ },
+ 'ios_embedded': {
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'IOS_MESSAGES_EXTENSION',
+ 'clientVersion': '19.09.3',
+ 'deviceModel': 'iPhone14,3',
+ 'userAgent': 'com.google.ios.youtube/19.09.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
+ },
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
+ 'REQUIRE_JS_PLAYER': False
+ },
+ 'ios_music': {
+ 'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'IOS_MUSIC',
+ 'clientVersion': '6.33.3',
+ 'deviceModel': 'iPhone14,3',
+ 'userAgent': 'com.google.ios.youtubemusic/6.33.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
+ },
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
+ 'REQUIRE_JS_PLAYER': False
+ },
+ 'ios_creator': {
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'IOS_CREATOR',
+ 'clientVersion': '22.33.101',
+ 'deviceModel': 'iPhone14,3',
+ 'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
+ },
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
+ 'REQUIRE_JS_PLAYER': False
+ },
+ # mweb has 'ultralow' formats
+ # See: https://github.com/yt-dlp/yt-dlp/pull/557
+ 'mweb': {
+ 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'MWEB',
+ 'clientVersion': '2.20220801.00.00',
+ }
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
+ },
+ # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
+ # See: https://github.com/zerodytrash/YouTube-Internal-Clients
+ 'tv_embedded': {
+ 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
+ 'clientVersion': '2.0',
+ },
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 85
+ },
+}
+
+
+def _split_innertube_client(client_name):
+ variant, *base = client_name.rsplit('.', 1)
+ if base:
+ return variant, base[0], variant
+ base, *variant = client_name.split('_', 1)
+ return client_name, base, variant[0] if variant else None
+
+
+def short_client_name(client_name):
+ main, *parts = _split_innertube_client(client_name)[0].replace('embedscreen', 'e_s').split('_')
+ return join_nonempty(main[:4], ''.join(x[0] for x in parts)).upper()
+
+
+def build_innertube_clients():
+ THIRD_PARTY = {
+ 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
+ }
+ BASE_CLIENTS = ('ios', 'android', 'web', 'tv', 'mweb')
+ priority = qualities(BASE_CLIENTS[::-1])
+
+ for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
+ ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
+ ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
+ ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
+ ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
+
+ _, base_client, variant = _split_innertube_client(client)
+ ytcfg['priority'] = 10 * priority(base_client)
+
+ if not variant:
+ INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
+ embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
+ embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
+ embedscreen['priority'] -= 3
+ elif variant == 'embedded':
+ ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
+ ytcfg['priority'] -= 2
+ else:
+ ytcfg['priority'] -= 3
+
+
+build_innertube_clients()
+
+
+class BadgeType(enum.Enum):
+ AVAILABILITY_UNLISTED = enum.auto()
+ AVAILABILITY_PRIVATE = enum.auto()
+ AVAILABILITY_PUBLIC = enum.auto()
+ AVAILABILITY_PREMIUM = enum.auto()
+ AVAILABILITY_SUBSCRIPTION = enum.auto()
+ LIVE_NOW = enum.auto()
+ VERIFIED = enum.auto()
+
+
+class YoutubeBaseInfoExtractor(InfoExtractor):
+ """Provide base functions for Youtube extractors"""
+
+ _RESERVED_NAMES = (
+ r'channel|c|user|playlist|watch|w|v|embed|e|live|watch_popup|clip|'
+ r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|'
+ r'browse|oembed|get_video_info|iframe_api|s/player|source|'
+ r'storefront|oops|index|account|t/terms|about|upload|signin|logout')
+
+ _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
+
+ # _NETRC_MACHINE = 'youtube'
+
+ # If True it will raise an error if no login info is provided
+ _LOGIN_REQUIRED = False
+
+ _INVIDIOUS_SITES = (
+ # invidious-redirect websites
+ r'(?:www\.)?redirect\.invidious\.io',
+ r'(?:(?:www|dev)\.)?invidio\.us',
+ # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
+ r'(?:www\.)?invidious\.pussthecat\.org',
+ r'(?:www\.)?invidious\.zee\.li',
+ r'(?:www\.)?invidious\.ethibox\.fr',
+ r'(?:www\.)?iv\.ggtyler\.dev',
+ r'(?:www\.)?inv\.vern\.i2p',
+ r'(?:www\.)?am74vkcrjp2d5v36lcdqgsj2m6x36tbrkhsruoegwfcizzabnfgf5zyd\.onion',
+ r'(?:www\.)?inv\.riverside\.rocks',
+ r'(?:www\.)?invidious\.silur\.me',
+ r'(?:www\.)?inv\.bp\.projectsegfau\.lt',
+ r'(?:www\.)?invidious\.g4c3eya4clenolymqbpgwz3q3tawoxw56yhzk4vugqrl6dtu3ejvhjid\.onion',
+ r'(?:www\.)?invidious\.slipfox\.xyz',
+ r'(?:www\.)?invidious\.esmail5pdn24shtvieloeedh7ehz3nrwcdivnfhfcedl7gf4kwddhkqd\.onion',
+ r'(?:www\.)?inv\.vernccvbvyi5qhfzyqengccj7lkove6bjot2xhh5kajhwvidqafczrad\.onion',
+ r'(?:www\.)?invidious\.tiekoetter\.com',
+ r'(?:www\.)?iv\.odysfvr23q5wgt7i456o5t3trw2cw5dgn56vbjfbq2m7xsc5vqbqpcyd\.onion',
+ r'(?:www\.)?invidious\.nerdvpn\.de',
+ r'(?:www\.)?invidious\.weblibre\.org',
+ r'(?:www\.)?inv\.odyssey346\.dev',
+ r'(?:www\.)?invidious\.dhusch\.de',
+ r'(?:www\.)?iv\.melmac\.space',
+ r'(?:www\.)?watch\.thekitty\.zone',
+ r'(?:www\.)?invidious\.privacydev\.net',
+ r'(?:www\.)?ng27owmagn5amdm7l5s3rsqxwscl5ynppnis5dqcasogkyxcfqn7psid\.onion',
+ r'(?:www\.)?invidious\.drivet\.xyz',
+ r'(?:www\.)?vid\.priv\.au',
+ r'(?:www\.)?euxxcnhsynwmfidvhjf6uzptsmh4dipkmgdmcmxxuo7tunp3ad2jrwyd\.onion',
+ r'(?:www\.)?inv\.vern\.cc',
+ r'(?:www\.)?invidious\.esmailelbob\.xyz',
+ r'(?:www\.)?invidious\.sethforprivacy\.com',
+ r'(?:www\.)?yt\.oelrichsgarcia\.de',
+ r'(?:www\.)?yt\.artemislena\.eu',
+ r'(?:www\.)?invidious\.flokinet\.to',
+ r'(?:www\.)?invidious\.baczek\.me',
+ r'(?:www\.)?y\.com\.sb',
+ r'(?:www\.)?invidious\.epicsite\.xyz',
+ r'(?:www\.)?invidious\.lidarshield\.cloud',
+ r'(?:www\.)?yt\.funami\.tech',
+ r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
+ r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
+ r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
+ # youtube-dl invidious instances list
+ r'(?:(?:www|no)\.)?invidiou\.sh',
+ r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
+ r'(?:www\.)?invidious\.kabi\.tk',
+ r'(?:www\.)?invidious\.mastodon\.host',
+ r'(?:www\.)?invidious\.zapashcanon\.fr',
+ r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
+ r'(?:www\.)?invidious\.tinfoil-hat\.net',
+ r'(?:www\.)?invidious\.himiko\.cloud',
+ r'(?:www\.)?invidious\.reallyancient\.tech',
+ r'(?:www\.)?invidious\.tube',
+ r'(?:www\.)?invidiou\.site',
+ r'(?:www\.)?invidious\.site',
+ r'(?:www\.)?invidious\.xyz',
+ r'(?:www\.)?invidious\.nixnet\.xyz',
+ r'(?:www\.)?invidious\.048596\.xyz',
+ r'(?:www\.)?invidious\.drycat\.fr',
+ r'(?:www\.)?inv\.skyn3t\.in',
+ r'(?:www\.)?tube\.poal\.co',
+ r'(?:www\.)?tube\.connect\.cafe',
+ r'(?:www\.)?vid\.wxzm\.sx',
+ r'(?:www\.)?vid\.mint\.lgbt',
+ r'(?:www\.)?vid\.puffyan\.us',
+ r'(?:www\.)?yewtu\.be',
+ r'(?:www\.)?yt\.elukerio\.org',
+ r'(?:www\.)?yt\.lelux\.fi',
+ r'(?:www\.)?invidious\.ggc-project\.de',
+ r'(?:www\.)?yt\.maisputain\.ovh',
+ r'(?:www\.)?ytprivate\.com',
+ r'(?:www\.)?invidious\.13ad\.de',
+ r'(?:www\.)?invidious\.toot\.koeln',
+ r'(?:www\.)?invidious\.fdn\.fr',
+ r'(?:www\.)?watch\.nettohikari\.com',
+ r'(?:www\.)?invidious\.namazso\.eu',
+ r'(?:www\.)?invidious\.silkky\.cloud',
+ r'(?:www\.)?invidious\.exonip\.de',
+ r'(?:www\.)?invidious\.riverside\.rocks',
+ r'(?:www\.)?invidious\.blamefran\.net',
+ r'(?:www\.)?invidious\.moomoo\.de',
+ r'(?:www\.)?ytb\.trom\.tf',
+ r'(?:www\.)?yt\.cyberhost\.uk',
+ r'(?:www\.)?kgg2m7yk5aybusll\.onion',
+ r'(?:www\.)?qklhadlycap4cnod\.onion',
+ r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
+ r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
+ r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
+ r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
+ r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
+ r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
+ r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
+ r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
+ r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
+ r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
+ # piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
+ r'(?:www\.)?piped\.kavin\.rocks',
+ r'(?:www\.)?piped\.tokhmi\.xyz',
+ r'(?:www\.)?piped\.syncpundit\.io',
+ r'(?:www\.)?piped\.mha\.fi',
+ r'(?:www\.)?watch\.whatever\.social',
+ r'(?:www\.)?piped\.garudalinux\.org',
+ r'(?:www\.)?piped\.rivo\.lol',
+ r'(?:www\.)?piped-libre\.kavin\.rocks',
+ r'(?:www\.)?yt\.jae\.fi',
+ r'(?:www\.)?piped\.mint\.lgbt',
+ r'(?:www\.)?il\.ax',
+ r'(?:www\.)?piped\.esmailelbob\.xyz',
+ r'(?:www\.)?piped\.projectsegfau\.lt',
+ r'(?:www\.)?piped\.privacydev\.net',
+ r'(?:www\.)?piped\.palveluntarjoaja\.eu',
+ r'(?:www\.)?piped\.smnz\.de',
+ r'(?:www\.)?piped\.adminforge\.de',
+ r'(?:www\.)?watch\.whatevertinfoil\.de',
+ r'(?:www\.)?piped\.qdi\.fi',
+ r'(?:(?:www|cf)\.)?piped\.video',
+ r'(?:www\.)?piped\.aeong\.one',
+ r'(?:www\.)?piped\.moomoo\.me',
+ r'(?:www\.)?piped\.chauvet\.pro',
+ r'(?:www\.)?watch\.leptons\.xyz',
+ r'(?:www\.)?pd\.vern\.cc',
+ r'(?:www\.)?piped\.hostux\.net',
+ r'(?:www\.)?piped\.lunar\.icu',
+ # Hyperpipe instances from https://hyperpipe.codeberg.page/
+ r'(?:www\.)?hyperpipe\.surge\.sh',
+ r'(?:www\.)?hyperpipe\.esmailelbob\.xyz',
+ r'(?:www\.)?listen\.whatever\.social',
+ r'(?:www\.)?music\.adminforge\.de',
+ )
+
+ # extracted from account/account_menu ep
+ # XXX: These are the supported YouTube UI and API languages,
+ # which is slightly different from languages supported for translation in YouTube studio
+ _SUPPORTED_LANG_CODES = [
+ 'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',
+ 'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',
+ 'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',
+ 'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',
+ 'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',
+ 'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko'
+ ]
+
+ _IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'}
+
+ _YT_HANDLE_RE = r'@[\w.-]{3,30}' # https://support.google.com/youtube/answer/11585688?hl=en
+ _YT_CHANNEL_UCID_RE = r'UC[\w-]{22}'
+
+ def ucid_or_none(self, ucid):
+ return self._search_regex(rf'^({self._YT_CHANNEL_UCID_RE})$', ucid, 'UC-id', default=None)
+
+ def handle_or_none(self, handle):
+ return self._search_regex(rf'^({self._YT_HANDLE_RE})$', handle, '@-handle', default=None)
+
+ def handle_from_url(self, url):
+ return self._search_regex(rf'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_HANDLE_RE})',
+ url, 'channel handle', default=None)
+
+ def ucid_from_url(self, url):
+ return self._search_regex(rf'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_CHANNEL_UCID_RE})',
+ url, 'channel id', default=None)
+
+ @functools.cached_property
+ def _preferred_lang(self):
+ """
+ Returns a language code supported by YouTube for the user preferred language.
+ Returns None if no preferred language set.
+ """
+ preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0]
+ if not preferred_lang:
+ return
+ if preferred_lang not in self._SUPPORTED_LANG_CODES:
+ raise ExtractorError(
+ f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',
+ expected=True)
+ elif preferred_lang != 'en':
+ self.report_warning(
+ f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')
+ return preferred_lang
+
+ def _initialize_consent(self):
+ cookies = self._get_cookies('https://www.youtube.com/')
+ if cookies.get('__Secure-3PSID'):
+ return
+ socs = cookies.get('SOCS')
+ if socs and not socs.value.startswith('CAA'): # not consented
+ return
+ self._set_cookie('.youtube.com', 'SOCS', 'CAI', secure=True) # accept all (required for mixes)
+
+ def _initialize_pref(self):
+ cookies = self._get_cookies('https://www.youtube.com/')
+ pref_cookie = cookies.get('PREF')
+ pref = {}
+ if pref_cookie:
+ try:
+ pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
+ except ValueError:
+ self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
+ pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'})
+ self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
+
+ def _real_initialize(self):
+ self._initialize_pref()
+ self._initialize_consent()
+ self._check_login_required()
+
+ def _check_login_required(self):
+ if self._LOGIN_REQUIRED and not self._cookies_passed:
+ self.raise_login_required('Login details are needed to download this content', method='cookies')
+
+ _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='
+ _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
+
+ def _get_default_ytcfg(self, client='web'):
+ return copy.deepcopy(INNERTUBE_CLIENTS[client])
+
+ def _get_innertube_host(self, client='web'):
+ return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
+
+ def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
+ # try_get but with fallback to default ytcfg client values when present
+ _func = lambda y: try_get(y, getter, expected_type)
+ return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
+
+ def _extract_client_name(self, ytcfg, default_client='web'):
+ return self._ytcfg_get_safe(
+ ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
+ lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
+
+ def _extract_client_version(self, ytcfg, default_client='web'):
+ return self._ytcfg_get_safe(
+ ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
+ lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
+
+ def _select_api_hostname(self, req_api_hostname, default_client=None):
+ return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
+ or req_api_hostname or self._get_innertube_host(default_client or 'web'))
+
+ def _extract_api_key(self, ytcfg=None, default_client='web'):
+ return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
+
+ def _extract_context(self, ytcfg=None, default_client='web'):
+ context = get_first(
+ (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
+ # Enforce language and tz for extraction
+ client_context = traverse_obj(context, 'client', expected_type=dict, default={})
+ client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
+ return context
+
+ _SAPISID = None
+
+ def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
+ time_now = round(time.time())
+ if self._SAPISID is None:
+ yt_cookies = self._get_cookies('https://www.youtube.com')
+ # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
+ # See: https://github.com/yt-dlp/yt-dlp/issues/393
+ sapisid_cookie = dict_get(
+ yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
+ if sapisid_cookie and sapisid_cookie.value:
+ self._SAPISID = sapisid_cookie.value
+ self.write_debug('Extracted SAPISID cookie')
+ # SAPISID cookie is required if not already present
+ if not yt_cookies.get('SAPISID'):
+ self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
+ self._set_cookie(
+ '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
+ else:
+ self._SAPISID = False
+ if not self._SAPISID:
+ return None
+ # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
+ sapisidhash = hashlib.sha1(
+ f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
+ return f'SAPISIDHASH {time_now}_{sapisidhash}'
+
+ def _call_api(self, ep, query, video_id, fatal=True, headers=None,
+ note='Downloading API JSON', errnote='Unable to download API page',
+ context=None, api_key=None, api_hostname=None, default_client='web'):
+
+ data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
+ data.update(query)
+ real_headers = self.generate_api_headers(default_client=default_client)
+ real_headers.update({'content-type': 'application/json'})
+ if headers:
+ real_headers.update(headers)
+ api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]
+ or api_key or self._extract_api_key(default_client=default_client))
+ return self._download_json(
+ f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
+ video_id=video_id, fatal=fatal, note=note, errnote=errnote,
+ data=json.dumps(data).encode('utf8'), headers=real_headers,
+ query={'key': api_key, 'prettyPrint': 'false'})
+
+ def extract_yt_initial_data(self, item_id, webpage, fatal=True):
+ return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
+
+ @staticmethod
+ def _extract_session_index(*data):
+ """
+ Index of current account in account list.
+ See: https://github.com/yt-dlp/yt-dlp/pull/519
+ """
+ for ytcfg in data:
+ session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
+ if session_index is not None:
+ return session_index
+
+ # Deprecated?
+ def _extract_identity_token(self, ytcfg=None, webpage=None):
+ if ytcfg:
+ token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)
+ if token:
+ return token
+ if webpage:
+ return self._search_regex(
+ r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
+ 'identity token', default=None, fatal=False)
+
+ @staticmethod
+ def _extract_account_syncid(*args):
+ """
+ Extract syncId required to download private playlists of secondary channels
+ @params response and/or ytcfg
+ """
+ for data in args:
+ # ytcfg includes channel_syncid if on secondary channel
+ delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)
+ if delegated_sid:
+ return delegated_sid
+ sync_ids = (try_get(
+ data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
+ lambda x: x['DATASYNC_ID']), str) or '').split('||')
+ if len(sync_ids) >= 2 and sync_ids[1]:
+ # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
+ # and just "user_syncid||" for primary channel. We only want the channel_syncid
+ return sync_ids[0]
+
+ @staticmethod
+ def _extract_visitor_data(*args):
+ """
+ Extracts visitorData from an API response or ytcfg
+ Appears to be used to track session state
+ """
+ return get_first(
+ args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
+ expected_type=str)
+
+ @functools.cached_property
+ def is_authenticated(self):
+ return bool(self._generate_sapisidhash_header())
+
+ def extract_ytcfg(self, video_id, webpage):
+ if not webpage:
+ return {}
+ return self._parse_json(
+ self._search_regex(
+ r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
+ default='{}'), video_id, fatal=False) or {}
+
+ def generate_api_headers(
+ self, *, ytcfg=None, account_syncid=None, session_index=None,
+ visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
+
+ origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))
+ headers = {
+ 'X-YouTube-Client-Name': str(
+ self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
+ 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
+ 'Origin': origin,
+ 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
+ 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
+ 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),
+ 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client)
+ }
+ if session_index is None:
+ session_index = self._extract_session_index(ytcfg)
+ if account_syncid or session_index is not None:
+ headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
+
+ auth = self._generate_sapisidhash_header(origin)
+ if auth is not None:
+ headers['Authorization'] = auth
+ headers['X-Origin'] = origin
+ return filter_dict(headers)
+
+ def _download_ytcfg(self, client, video_id):
+ url = {
+ 'web': 'https://www.youtube.com',
+ 'web_music': 'https://music.youtube.com',
+ 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
+ }.get(client)
+ if not url:
+ return {}
+ webpage = self._download_webpage(
+ url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')
+ return self.extract_ytcfg(video_id, webpage) or {}
+
+ @staticmethod
+ def _build_api_continuation_query(continuation, ctp=None):
+ query = {
+ 'continuation': continuation
+ }
+ # TODO: Inconsistency with clickTrackingParams.
+ # Currently we have a fixed ctp contained within context (from ytcfg)
+ # and a ctp in root query for continuation.
+ if ctp:
+ query['clickTracking'] = {'clickTrackingParams': ctp}
+ return query
+
+ @classmethod
+ def _extract_next_continuation_data(cls, renderer):
+ next_continuation = try_get(
+ renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
+ lambda x: x['continuation']['reloadContinuationData']), dict)
+ if not next_continuation:
+ return
+ continuation = next_continuation.get('continuation')
+ if not continuation:
+ return
+ ctp = next_continuation.get('clickTrackingParams')
+ return cls._build_api_continuation_query(continuation, ctp)
+
+ @classmethod
+ def _extract_continuation_ep_data(cls, continuation_ep: dict):
+ if isinstance(continuation_ep, dict):
+ continuation = try_get(
+ continuation_ep, lambda x: x['continuationCommand']['token'], str)
+ if not continuation:
+ return
+ ctp = continuation_ep.get('clickTrackingParams')
+ return cls._build_api_continuation_query(continuation, ctp)
+
+ @classmethod
+ def _extract_continuation(cls, renderer):
+ next_continuation = cls._extract_next_continuation_data(renderer)
+ if next_continuation:
+ return next_continuation
+
+ return traverse_obj(renderer, (
+ ('contents', 'items', 'rows'), ..., 'continuationItemRenderer',
+ ('continuationEndpoint', ('button', 'buttonRenderer', 'command'))
+ ), get_all=False, expected_type=cls._extract_continuation_ep_data)
+
+ @classmethod
+ def _extract_alerts(cls, data):
+ for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
+ if not isinstance(alert_dict, dict):
+ continue
+ for alert in alert_dict.values():
+ alert_type = alert.get('type')
+ if not alert_type:
+ continue
+ message = cls._get_text(alert, 'text')
+ if message:
+ yield alert_type, message
+
+ def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
+ errors, warnings = [], []
+ for alert_type, alert_message in alerts:
+ if alert_type.lower() == 'error' and fatal:
+ errors.append([alert_type, alert_message])
+ elif alert_message not in self._IGNORED_WARNINGS:
+ warnings.append([alert_type, alert_message])
+
+ for alert_type, alert_message in (warnings + errors[:-1]):
+ self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)
+ if errors:
+ raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
+
+ def _extract_and_report_alerts(self, data, *args, **kwargs):
+ return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
+
+ def _extract_badges(self, badge_list: list):
+ """
+ Extract known BadgeType's from a list of badge renderers.
+ @returns [{'type': BadgeType}]
+ """
+ icon_type_map = {
+ 'PRIVACY_UNLISTED': BadgeType.AVAILABILITY_UNLISTED,
+ 'PRIVACY_PRIVATE': BadgeType.AVAILABILITY_PRIVATE,
+ 'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC,
+ 'CHECK_CIRCLE_THICK': BadgeType.VERIFIED,
+ 'OFFICIAL_ARTIST_BADGE': BadgeType.VERIFIED,
+ 'CHECK': BadgeType.VERIFIED,
+ }
+
+ badge_style_map = {
+ 'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType.AVAILABILITY_SUBSCRIPTION,
+ 'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM,
+ 'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW,
+ 'BADGE_STYLE_TYPE_VERIFIED': BadgeType.VERIFIED,
+ 'BADGE_STYLE_TYPE_VERIFIED_ARTIST': BadgeType.VERIFIED,
+ }
+
+ label_map = {
+ 'unlisted': BadgeType.AVAILABILITY_UNLISTED,
+ 'private': BadgeType.AVAILABILITY_PRIVATE,
+ 'members only': BadgeType.AVAILABILITY_SUBSCRIPTION,
+ 'live': BadgeType.LIVE_NOW,
+ 'premium': BadgeType.AVAILABILITY_PREMIUM,
+ 'verified': BadgeType.VERIFIED,
+ 'official artist channel': BadgeType.VERIFIED,
+ }
+
+ badges = []
+ for badge in traverse_obj(badge_list, (..., lambda key, _: re.search(r'[bB]adgeRenderer$', key))):
+ badge_type = (
+ icon_type_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))
+ or badge_style_map.get(traverse_obj(badge, 'style'))
+ )
+ if badge_type:
+ badges.append({'type': badge_type})
+ continue
+
+ # fallback, won't work in some languages
+ label = traverse_obj(
+ badge, 'label', ('accessibilityData', 'label'), 'tooltip', 'iconTooltip', get_all=False, expected_type=str, default='')
+ for match, label_badge_type in label_map.items():
+ if match in label.lower():
+ badges.append({'type': label_badge_type})
+ break
+
+ return badges
+
+ @staticmethod
+ def _has_badge(badges, badge_type):
+ return bool(traverse_obj(badges, lambda _, v: v['type'] == badge_type))
+
+ @staticmethod
+ def _get_text(data, *path_list, max_runs=None):
+ for path in path_list or [None]:
+ if path is None:
+ obj = [data]
+ else:
+ obj = traverse_obj(data, path, default=[])
+ if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
+ obj = [obj]
+ for item in obj:
+ text = try_get(item, lambda x: x['simpleText'], str)
+ if text:
+ return text
+ runs = try_get(item, lambda x: x['runs'], list) or []
+ if not runs and isinstance(item, list):
+ runs = item
+
+ runs = runs[:min(len(runs), max_runs or len(runs))]
+ text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str))
+ if text:
+ return text
+
+ def _get_count(self, data, *path_list):
+ count_text = self._get_text(data, *path_list) or ''
+ count = parse_count(count_text)
+ if count is None:
+ count = str_to_int(
+ self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))
+ return count
+
+ @staticmethod
+ def _extract_thumbnails(data, *path_list):
+ """
+ Extract thumbnails from thumbnails dict
+ @param path_list: path list to level that contains 'thumbnails' key
+ """
+ thumbnails = []
+ for path in path_list or [()]:
+ for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...)):
+ thumbnail_url = url_or_none(thumbnail.get('url'))
+ if not thumbnail_url:
+ continue
+ # Sometimes youtube gives a wrong thumbnail URL. See:
+ # https://github.com/yt-dlp/yt-dlp/issues/233
+ # https://github.com/ytdl-org/youtube-dl/issues/28023
+ if 'maxresdefault' in thumbnail_url:
+ thumbnail_url = thumbnail_url.split('?')[0]
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'height': int_or_none(thumbnail.get('height')),
+ 'width': int_or_none(thumbnail.get('width')),
+ })
+ return thumbnails
+
+ @staticmethod
+ def extract_relative_time(relative_time_text):
+ """
+ Extracts a relative time from string and converts to dt object
+ e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today', '8 yr ago'
+ """
+
+ # XXX: this could be moved to a general function in utils/_utils.py
+ # The relative time text strings are roughly the same as what
+ # Javascript's Intl.RelativeTimeFormat function generates.
+ # See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/RelativeTimeFormat
+ mobj = re.search(
+ r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>sec(?:ond)?|s|min(?:ute)?|h(?:our|r)?|d(?:ay)?|w(?:eek|k)?|mo(?:nth)?|y(?:ear|r)?)s?\s*ago',
+ relative_time_text)
+ if mobj:
+ start = mobj.group('start')
+ if start:
+ return datetime_from_str(start)
+ try:
+ return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))
+ except ValueError:
+ return None
+
+ def _parse_time_text(self, text):
+ if not text:
+ return
+ dt = self.extract_relative_time(text)
+ timestamp = None
+ if isinstance(dt, datetime.datetime):
+ timestamp = calendar.timegm(dt.timetuple())
+
+ if timestamp is None:
+ timestamp = (
+ unified_timestamp(text) or unified_timestamp(
+ self._search_regex(
+ (r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),
+ text.lower(), 'time text', default=None)))
+
+ if text and timestamp is None and self._preferred_lang in (None, 'en'):
+ self.report_warning(
+ f'Cannot parse localized time text "{text}"', only_once=True)
+ return timestamp
+
+ def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
+ ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
+ default_client='web'):
+ raise_for_incomplete = bool(self._configuration_arg('raise_incomplete_data', ie_key=YoutubeIE))
+ # Incomplete Data should be a warning by default when retries are exhausted, while other errors should be fatal.
+ icd_retries = iter(self.RetryManager(fatal=raise_for_incomplete))
+ icd_rm = next(icd_retries)
+ main_retries = iter(self.RetryManager())
+ main_rm = next(main_retries)
+ # Manual retry loop for multiple RetryManagers
+ # The proper RetryManager MUST be advanced after an error
+ # and its result MUST be checked if the manager is non fatal
+ while True:
+ try:
+ response = self._call_api(
+ ep=ep, fatal=True, headers=headers,
+ video_id=item_id, query=query, note=note,
+ context=self._extract_context(ytcfg, default_client),
+ api_key=self._extract_api_key(ytcfg, default_client),
+ api_hostname=api_hostname, default_client=default_client)
+ except ExtractorError as e:
+ if not isinstance(e.cause, network_exceptions):
+ return self._error_or_warning(e, fatal=fatal)
+ elif not isinstance(e.cause, HTTPError):
+ main_rm.error = e
+ next(main_retries)
+ continue
+
+ first_bytes = e.cause.response.read(512)
+ if not is_html(first_bytes):
+ yt_error = try_get(
+ self._parse_json(
+ self._webpage_read_content(e.cause.response, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
+ lambda x: x['error']['message'], str)
+ if yt_error:
+ self._report_alerts([('ERROR', yt_error)], fatal=False)
+ # Downloading page may result in intermittent 5xx HTTP error
+ # Sometimes a 404 is also received. See: https://github.com/ytdl-org/youtube-dl/issues/28289
+ # We also want to catch all other network exceptions since errors in later pages can be troublesome
+ # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
+ if e.cause.status not in (403, 429):
+ main_rm.error = e
+ next(main_retries)
+ continue
+ return self._error_or_warning(e, fatal=fatal)
+
+ try:
+ self._extract_and_report_alerts(response, only_once=True)
+ except ExtractorError as e:
+ # YouTube's servers may return errors we want to retry on in a 200 OK response
+ # See: https://github.com/yt-dlp/yt-dlp/issues/839
+ if 'unknown error' in e.msg.lower():
+ main_rm.error = e
+ next(main_retries)
+ continue
+ return self._error_or_warning(e, fatal=fatal)
+ # Youtube sometimes sends incomplete data
+ # See: https://github.com/ytdl-org/youtube-dl/issues/28194
+ if not traverse_obj(response, *variadic(check_get_keys)):
+ icd_rm.error = ExtractorError('Incomplete data received', expected=True)
+ should_retry = next(icd_retries, None)
+ if not should_retry:
+ return None
+ continue
+
+ return response
+
+ @staticmethod
+ def is_music_url(url):
+ return re.match(r'(https?://)?music\.youtube\.com/', url) is not None
+
+ def _extract_video(self, renderer):
+ video_id = renderer.get('videoId')
+
+ reel_header_renderer = traverse_obj(renderer, (
+ 'navigationEndpoint', 'reelWatchEndpoint', 'overlay', 'reelPlayerOverlayRenderer',
+ 'reelPlayerHeaderSupportedRenderers', 'reelPlayerHeaderRenderer'))
+
+ title = self._get_text(renderer, 'title', 'headline') or self._get_text(reel_header_renderer, 'reelTitleText')
+ description = self._get_text(renderer, 'descriptionSnippet')
+
+ duration = int_or_none(renderer.get('lengthSeconds'))
+ if duration is None:
+ duration = parse_duration(self._get_text(
+ renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
+ if duration is None:
+ # XXX: should write a parser to be more general to support more cases (e.g. shorts in shorts tab)
+ duration = parse_duration(self._search_regex(
+ r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',
+ traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),
+ video_id, default=None, group='duration'))
+
+ channel_id = traverse_obj(
+ renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
+ expected_type=str, get_all=False)
+ if not channel_id:
+ channel_id = traverse_obj(reel_header_renderer, ('channelNavigationEndpoint', 'browseEndpoint', 'browseId'))
+
+ channel_id = self.ucid_or_none(channel_id)
+
+ overlay_style = traverse_obj(
+ renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
+ get_all=False, expected_type=str)
+ badges = self._extract_badges(traverse_obj(renderer, 'badges'))
+ owner_badges = self._extract_badges(traverse_obj(renderer, 'ownerBadges'))
+ navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
+ renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
+ expected_type=str)) or ''
+ url = f'https://www.youtube.com/watch?v={video_id}'
+ if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:
+ url = f'https://www.youtube.com/shorts/{video_id}'
+
+ time_text = (self._get_text(renderer, 'publishedTimeText', 'videoInfo')
+ or self._get_text(reel_header_renderer, 'timestampText') or '')
+ scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
+
+ live_status = (
+ 'is_upcoming' if scheduled_timestamp is not None
+ else 'was_live' if 'streamed' in time_text.lower()
+ else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW)
+ else None)
+
+ # videoInfo is a string like '50K views • 10 years ago'.
+ view_count_text = self._get_text(renderer, 'viewCountText', 'shortViewCountText', 'videoInfo') or ''
+ view_count = (0 if 'no views' in view_count_text.lower()
+ else self._get_count({'simpleText': view_count_text}))
+ view_count_field = 'concurrent_view_count' if live_status in ('is_live', 'is_upcoming') else 'view_count'
+
+ channel = (self._get_text(renderer, 'ownerText', 'shortBylineText')
+ or self._get_text(reel_header_renderer, 'channelTitleText'))
+
+ channel_handle = traverse_obj(renderer, (
+ 'shortBylineText', 'runs', ..., 'navigationEndpoint',
+ (('commandMetadata', 'webCommandMetadata', 'url'), ('browseEndpoint', 'canonicalBaseUrl'))),
+ expected_type=self.handle_from_url, get_all=False)
+ return {
+ '_type': 'url',
+ 'ie_key': YoutubeIE.ie_key(),
+ 'id': video_id,
+ 'url': url,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'channel_id': channel_id,
+ 'channel': channel,
+ 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
+ 'uploader': channel,
+ 'uploader_id': channel_handle,
+ 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
+ 'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
+ 'timestamp': (self._parse_time_text(time_text)
+ if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)
+ else None),
+ 'release_timestamp': scheduled_timestamp,
+ 'availability':
+ 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
+ else self._availability(
+ is_private=self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE) or None,
+ needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
+ needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
+ is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None),
+ view_count_field: view_count,
+ 'live_status': live_status,
+ 'channel_is_verified': True if self._has_badge(owner_badges, BadgeType.VERIFIED) else None
+ }
+
+
+class YoutubeIE(YoutubeBaseInfoExtractor):
+ IE_DESC = 'YouTube'
+ _VALID_URL = r"""(?x)^
+ (
+ (?:https?://|//) # http(s):// or protocol-independent URL
+ (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
+ (?:www\.)?deturl\.com/www\.youtube\.com|
+ (?:www\.)?pwnyoutube\.com|
+ (?:www\.)?hooktube\.com|
+ (?:www\.)?yourepeat\.com|
+ tube\.majestyc\.net|
+ %(invidious)s|
+ youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
+ (?:.*?\#/)? # handle anchor (#/) redirect urls
+ (?: # the various things that can precede the ID:
+ (?:(?:v|embed|e|shorts|live)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/
+ |(?: # or the v= param in all its forms
+ (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
+ (?:\?|\#!?) # the params delimiter ? or # or #!
+ (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
+ v=
+ )
+ ))
+ |(?:
+ youtu\.be| # just youtu.be/xxxx
+ vid\.plus| # or vid.plus/xxxx
+ zwearz\.com/watch| # or zwearz.com/watch/xxxx
+ %(invidious)s
+ )/
+ |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
+ )
+ )? # all until now is optional -> you can pass the naked ID
+ (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
+ (?(1).+)? # if we found the ID, everything can follow
+ (?:\#|$)""" % {
+ 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
+ }
+ _EMBED_REGEX = [
+ r'''(?x)
+ (?:
+ <(?:[0-9A-Za-z-]+?)?iframe[^>]+?src=|
+ data-video-url=|
+ <embed[^>]+?src=|
+ embedSWF\(?:\s*|
+ <object[^>]+data=|
+ new\s+SWFObject\(
+ )
+ (["\'])
+ (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
+ (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
+ \1''',
+ # https://wordpress.org/plugins/lazy-load-for-videos/
+ r'''(?xs)
+ <a\s[^>]*\bhref="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"
+ \s[^>]*\bclass="[^"]*\blazy-load-youtube''',
+ ]
+ _RETURN_TYPE = 'video' # XXX: How to handle multifeed?
+
+ _PLAYER_INFO_RE = (
+ r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
+ r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
+ r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
+ )
+ _formats = {
+ '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
+ '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
+ '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
+ '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
+ '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
+ '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
+ '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
+ '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
+ # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
+ '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
+ '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
+ '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
+ '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
+ '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
+ '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
+ '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
+ '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
+ '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
+
+
+ # 3D videos
+ '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
+ '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
+ '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
+ '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
+ '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
+ '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
+ '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
+
+ # Apple HTTP Live Streaming
+ '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
+ '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
+ '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
+ '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
+ '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
+ '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
+ '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
+ '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
+
+ # DASH mp4 video
+ '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
+ '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
+ '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
+ '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
+
+ # Dash mp4 audio
+ '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
+ '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
+ '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
+ '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
+ '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
+ '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
+ '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
+
+ # Dash webm
+ '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
+ '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
+ '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
+ '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
+ '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
+ '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
+ '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
+ '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
+ '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
+ '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
+ '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
+ '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
+
+ # Dash webm audio
+ '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
+ '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
+
+ # Dash webm audio with opus inside
+ '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
+ '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
+ '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
+
+ # RTMP (unnamed)
+ '_rtmp': {'protocol': 'rtmp'},
+
+ # av01 video only formats sometimes served with "unknown" codecs
+ '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
+ '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
+ '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
+ '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
+ '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
+ '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
+ '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
+ '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
+ }
+ _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
+
+ _GEO_BYPASS = False
+
+ IE_NAME = 'youtube'
+ _TESTS = [
+ {
+ 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
+ 'info_dict': {
+ 'id': 'BaW_jenozKc',
+ 'ext': 'mp4',
+ 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
+ 'channel': 'Philipp Hagemeister',
+ 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
+ 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
+ 'upload_date': '20121002',
+ 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
+ 'categories': ['Science & Technology'],
+ 'tags': ['youtube-dl'],
+ 'duration': 10,
+ 'view_count': int,
+ 'like_count': int,
+ 'availability': 'public',
+ 'playable_in_embed': True,
+ 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
+ 'live_status': 'not_live',
+ 'age_limit': 0,
+ 'start_time': 1,
+ 'end_time': 9,
+ 'comment_count': int,
+ 'channel_follower_count': int,
+ 'uploader': 'Philipp Hagemeister',
+ 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
+ 'uploader_id': '@PhilippHagemeister',
+ 'heatmap': 'count:100',
+ }
+ },
+ {
+ 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
+ 'note': 'Embed-only video (#1746)',
+ 'info_dict': {
+ 'id': 'yZIXLfi8CZQ',
+ 'ext': 'mp4',
+ 'upload_date': '20120608',
+ 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
+ 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
+ 'age_limit': 18,
+ },
+ 'skip': 'Private video',
+ },
+ {
+ 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
+ 'note': 'Use the first video ID in the URL',
+ 'info_dict': {
+ 'id': 'BaW_jenozKc',
+ 'ext': 'mp4',
+ 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
+ 'channel': 'Philipp Hagemeister',
+ 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
+ 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
+ 'upload_date': '20121002',
+ 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
+ 'categories': ['Science & Technology'],
+ 'tags': ['youtube-dl'],
+ 'duration': 10,
+ 'view_count': int,
+ 'like_count': int,
+ 'availability': 'public',
+ 'playable_in_embed': True,
+ 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
+ 'live_status': 'not_live',
+ 'age_limit': 0,
+ 'comment_count': int,
+ 'channel_follower_count': int,
+ 'uploader': 'Philipp Hagemeister',
+ 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
+ 'uploader_id': '@PhilippHagemeister',
+ 'heatmap': 'count:100',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
+ 'note': '256k DASH audio (format 141) via DASH manifest',
+ 'info_dict': {
+ 'id': 'a9LDPn-MO4I',
+ 'ext': 'm4a',
+ 'upload_date': '20121002',
+ 'description': '',
+ 'title': 'UHDTV TEST 8K VIDEO.mp4'
+ },
+ 'params': {
+ 'youtube_include_dash_manifest': True,
+ 'format': '141',
+ },
+ 'skip': 'format 141 not served anymore',
+ },
+ # DASH manifest with encrypted signature
+ {
+ 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
+ 'info_dict': {
+ 'id': 'IB3lcPjvWLA',
+ 'ext': 'm4a',
+ 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
+ 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
+ 'duration': 244,
+ 'upload_date': '20131011',
+ 'abr': 129.495,
+ 'like_count': int,
+ 'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',
+ 'playable_in_embed': True,
+ 'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',
+ 'view_count': int,
+ 'track': 'The Spark',
+ 'live_status': 'not_live',
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',
+ 'channel': 'Afrojack',
+ 'tags': 'count:19',
+ 'availability': 'public',
+ 'categories': ['Music'],
+ 'age_limit': 0,
+ 'alt_title': 'The Spark',
+ 'channel_follower_count': int,
+ 'uploader': 'Afrojack',
+ 'uploader_url': 'https://www.youtube.com/@Afrojack',
+ 'uploader_id': '@Afrojack',
+ },
+ 'params': {
+ 'youtube_include_dash_manifest': True,
+ 'format': '141/bestaudio[ext=m4a]',
+ },
+ },
+ # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
+ {
+ 'note': 'Embed allowed age-gate video',
+ 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
+ 'info_dict': {
+ 'id': 'HtVdAasjOgU',
+ 'ext': 'mp4',
+ 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
+ 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
+ 'duration': 142,
+ 'upload_date': '20140605',
+ 'age_limit': 18,
+ 'categories': ['Gaming'],
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',
+ 'availability': 'needs_auth',
+ 'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
+ 'like_count': int,
+ 'channel': 'The Witcher',
+ 'live_status': 'not_live',
+ 'tags': 'count:17',
+ 'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
+ 'playable_in_embed': True,
+ 'view_count': int,
+ 'channel_follower_count': int,
+ 'uploader': 'The Witcher',
+ 'uploader_url': 'https://www.youtube.com/@thewitcher',
+ 'uploader_id': '@thewitcher',
+ 'comment_count': int,
+ 'channel_is_verified': True,
+ 'heatmap': 'count:100',
+ },
+ },
+ {
+ 'note': 'Age-gate video with embed allowed in public site',
+ 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
+ 'info_dict': {
+ 'id': 'HsUATh_Nc2U',
+ 'ext': 'mp4',
+ 'title': 'Godzilla 2 (Official Video)',
+ 'description': 'md5:bf77e03fcae5529475e500129b05668a',
+ 'upload_date': '20200408',
+ 'age_limit': 18,
+ 'availability': 'needs_auth',
+ 'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
+ 'channel': 'FlyingKitty',
+ 'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
+ 'view_count': int,
+ 'categories': ['Entertainment'],
+ 'live_status': 'not_live',
+ 'tags': ['Flyingkitty', 'godzilla 2'],
+ 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
+ 'like_count': int,
+ 'duration': 177,
+ 'playable_in_embed': True,
+ 'channel_follower_count': int,
+ 'uploader': 'FlyingKitty',
+ 'uploader_url': 'https://www.youtube.com/@FlyingKitty900',
+ 'uploader_id': '@FlyingKitty900',
+ 'comment_count': int,
+ 'channel_is_verified': True,
+ },
+ },
+ {
+ 'note': 'Age-gate video embedable only with clientScreen=EMBED',
+ 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
+ 'info_dict': {
+ 'id': 'Tq92D6wQ1mg',
+ 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
+ 'ext': 'mp4',
+ 'upload_date': '20191228',
+ 'description': 'md5:17eccca93a786d51bc67646756894066',
+ 'age_limit': 18,
+ 'like_count': int,
+ 'availability': 'needs_auth',
+ 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
+ 'view_count': int,
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',
+ 'channel': 'Projekt Melody',
+ 'live_status': 'not_live',
+ 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
+ 'playable_in_embed': True,
+ 'categories': ['Entertainment'],
+ 'duration': 106,
+ 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
+ 'comment_count': int,
+ 'channel_follower_count': int,
+ 'uploader': 'Projekt Melody',
+ 'uploader_url': 'https://www.youtube.com/@ProjektMelody',
+ 'uploader_id': '@ProjektMelody',
+ },
+ },
+ {
+ 'note': 'Non-Agegated non-embeddable video',
+ 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
+ 'info_dict': {
+ 'id': 'MeJVWBSsPAY',
+ 'ext': 'mp4',
+ 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
+ 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
+ 'upload_date': '20130730',
+ 'track': 'Such mich find mich',
+ 'age_limit': 0,
+ 'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
+ 'like_count': int,
+ 'playable_in_embed': False,
+ 'creator': 'OOMPH!',
+ 'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',
+ 'view_count': int,
+ 'alt_title': 'Such mich find mich',
+ 'duration': 210,
+ 'channel': 'Herr Lurik',
+ 'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
+ 'categories': ['Music'],
+ 'availability': 'public',
+ 'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
+ 'live_status': 'not_live',
+ 'artist': 'OOMPH!',
+ 'channel_follower_count': int,
+ 'uploader': 'Herr Lurik',
+ 'uploader_url': 'https://www.youtube.com/@HerrLurik',
+ 'uploader_id': '@HerrLurik',
+ },
+ },
+ {
+ 'note': 'Non-bypassable age-gated video',
+ 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
+ 'only_matching': True,
+ },
+ # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
+ # YouTube Red ad is not captured for creator
+ {
+ 'url': '__2ABJjxzNo',
+ 'info_dict': {
+ 'id': '__2ABJjxzNo',
+ 'ext': 'mp4',
+ 'duration': 266,
+ 'upload_date': '20100430',
+ 'creator': 'deadmau5',
+ 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
+ 'title': 'Deadmau5 - Some Chords (HD)',
+ 'alt_title': 'Some Chords',
+ 'availability': 'public',
+ 'tags': 'count:14',
+ 'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',
+ 'view_count': int,
+ 'live_status': 'not_live',
+ 'channel': 'deadmau5',
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',
+ 'like_count': int,
+ 'track': 'Some Chords',
+ 'artist': 'deadmau5',
+ 'playable_in_embed': True,
+ 'age_limit': 0,
+ 'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',
+ 'categories': ['Music'],
+ 'album': 'Some Chords',
+ 'channel_follower_count': int,
+ 'uploader': 'deadmau5',
+ 'uploader_url': 'https://www.youtube.com/@deadmau5',
+ 'uploader_id': '@deadmau5',
+ },
+ 'expected_warnings': [
+ 'DASH manifest missing',
+ ]
+ },
+ # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
+ {
+ 'url': 'lqQg6PlCWgI',
+ 'info_dict': {
+ 'id': 'lqQg6PlCWgI',
+ 'ext': 'mp4',
+ 'duration': 6085,
+ 'upload_date': '20150827',
+ 'description': 'md5:04bbbf3ccceb6795947572ca36f45904',
+ 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
+ 'like_count': int,
+ 'release_timestamp': 1343767800,
+ 'playable_in_embed': True,
+ 'categories': ['Sports'],
+ 'release_date': '20120731',
+ 'channel': 'Olympics',
+ 'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
+ 'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
+ 'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
+ 'age_limit': 0,
+ 'availability': 'public',
+ 'live_status': 'was_live',
+ 'view_count': int,
+ 'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
+ 'channel_follower_count': int,
+ 'uploader': 'Olympics',
+ 'uploader_url': 'https://www.youtube.com/@Olympics',
+ 'uploader_id': '@Olympics',
+ 'channel_is_verified': True,
+ },
+ 'params': {
+ 'skip_download': 'requires avconv',
+ }
+ },
+ # Non-square pixels
+ {
+ 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
+ 'info_dict': {
+ 'id': '_b-2C3KPAM0',
+ 'ext': 'mp4',
+ 'stretched_ratio': 16 / 9.,
+ 'duration': 85,
+ 'upload_date': '20110310',
+ 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
+ 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
+ 'playable_in_embed': True,
+ 'channel': '孫ᄋᄅ',
+ 'age_limit': 0,
+ 'tags': 'count:11',
+ 'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',
+ 'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',
+ 'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',
+ 'view_count': int,
+ 'categories': ['People & Blogs'],
+ 'like_count': int,
+ 'live_status': 'not_live',
+ 'availability': 'unlisted',
+ 'comment_count': int,
+ 'channel_follower_count': int,
+ 'uploader': '孫ᄋᄅ',
+ 'uploader_url': 'https://www.youtube.com/@AllenMeow',
+ 'uploader_id': '@AllenMeow',
+ },
+ },
+ # url_encoded_fmt_stream_map is empty string
+ {
+ 'url': 'qEJwOuvDf7I',
+ 'info_dict': {
+ 'id': 'qEJwOuvDf7I',
+ 'ext': 'webm',
+ 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
+ 'description': '',
+ 'upload_date': '20150404',
+ },
+ 'params': {
+ 'skip_download': 'requires avconv',
+ },
+ 'skip': 'This live event has ended.',
+ },
+ # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
+ {
+ 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
+ 'info_dict': {
+ 'id': 'FIl7x6_3R5Y',
+ 'ext': 'webm',
+ 'title': 'md5:7b81415841e02ecd4313668cde88737a',
+ 'description': 'md5:116377fd2963b81ec4ce64b542173306',
+ 'duration': 220,
+ 'upload_date': '20150625',
+ 'formats': 'mincount:31',
+ },
+ 'skip': 'not actual anymore',
+ },
+ # DASH manifest with segment_list
+ {
+ 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
+ 'md5': '8ce563a1d667b599d21064e982ab9e31',
+ 'info_dict': {
+ 'id': 'CsmdDsKjzN8',
+ 'ext': 'mp4',
+ 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
+ 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
+ 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
+ },
+ 'params': {
+ 'youtube_include_dash_manifest': True,
+ 'format': '135', # bestvideo
+ },
+ 'skip': 'This live event has ended.',
+ },
+ {
+ # Multifeed videos (multiple cameras), URL can be of any Camera
+ # TODO: fix multifeed titles
+ 'url': 'https://www.youtube.com/watch?v=zaPI8MvL8pg',
+ 'info_dict': {
+ 'id': 'zaPI8MvL8pg',
+ 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04',
+ 'description': 'md5:563ccbc698b39298481ca3c571169519',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': 'j5yGuxZ8lLU',
+ 'ext': 'mp4',
+ 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Chris)',
+ 'description': 'md5:563ccbc698b39298481ca3c571169519',
+ 'duration': 10120,
+ 'channel_follower_count': int,
+ 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
+ 'availability': 'public',
+ 'playable_in_embed': True,
+ 'upload_date': '20131105',
+ 'categories': ['Gaming'],
+ 'live_status': 'was_live',
+ 'tags': 'count:24',
+ 'release_timestamp': 1383701910,
+ 'thumbnail': 'https://i.ytimg.com/vi/j5yGuxZ8lLU/maxresdefault.jpg',
+ 'comment_count': int,
+ 'age_limit': 0,
+ 'like_count': int,
+ 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
+ 'channel': 'WiiLikeToPlay',
+ 'view_count': int,
+ 'release_date': '20131106',
+ 'uploader': 'WiiLikeToPlay',
+ 'uploader_id': '@WLTP',
+ 'uploader_url': 'https://www.youtube.com/@WLTP',
+ },
+ }, {
+ 'info_dict': {
+ 'id': 'zaPI8MvL8pg',
+ 'ext': 'mp4',
+ 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Tyson)',
+ 'availability': 'public',
+ 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
+ 'channel': 'WiiLikeToPlay',
+ 'channel_follower_count': int,
+ 'description': 'md5:563ccbc698b39298481ca3c571169519',
+ 'duration': 10108,
+ 'age_limit': 0,
+ 'like_count': int,
+ 'tags': 'count:24',
+ 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
+ 'release_timestamp': 1383701915,
+ 'comment_count': int,
+ 'upload_date': '20131105',
+ 'thumbnail': 'https://i.ytimg.com/vi/zaPI8MvL8pg/maxresdefault.jpg',
+ 'release_date': '20131106',
+ 'playable_in_embed': True,
+ 'live_status': 'was_live',
+ 'categories': ['Gaming'],
+ 'view_count': int,
+ 'uploader': 'WiiLikeToPlay',
+ 'uploader_id': '@WLTP',
+ 'uploader_url': 'https://www.youtube.com/@WLTP',
+ },
+ }, {
+ 'info_dict': {
+ 'id': 'R7r3vfO7Hao',
+ 'ext': 'mp4',
+ 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Spencer)',
+ 'thumbnail': 'https://i.ytimg.com/vi/R7r3vfO7Hao/maxresdefault.jpg',
+ 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
+ 'like_count': int,
+ 'availability': 'public',
+ 'playable_in_embed': True,
+ 'upload_date': '20131105',
+ 'description': 'md5:563ccbc698b39298481ca3c571169519',
+ 'channel_follower_count': int,
+ 'tags': 'count:24',
+ 'release_date': '20131106',
+ 'comment_count': int,
+ 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
+ 'channel': 'WiiLikeToPlay',
+ 'categories': ['Gaming'],
+ 'release_timestamp': 1383701914,
+ 'live_status': 'was_live',
+ 'age_limit': 0,
+ 'duration': 10128,
+ 'view_count': int,
+ 'uploader': 'WiiLikeToPlay',
+ 'uploader_id': '@WLTP',
+ 'uploader_url': 'https://www.youtube.com/@WLTP',
+ },
+ }],
+ 'params': {'skip_download': True},
+ },
+ {
+ # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
+ 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
+ 'info_dict': {
+ 'id': 'gVfLd0zydlo',
+ 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
+ },
+ 'playlist_count': 2,
+ 'skip': 'Not multifeed anymore',
+ },
+ {
+ 'url': 'https://vid.plus/FlRa-iH7PGw',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
+ 'only_matching': True,
+ },
+ {
+ # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
+ # Also tests cut-off URL expansion in video description (see
+ # https://github.com/ytdl-org/youtube-dl/issues/1892,
+ # https://github.com/ytdl-org/youtube-dl/issues/8164)
+ 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
+ 'info_dict': {
+ 'id': 'lsguqyKfVQg',
+ 'ext': 'mp4',
+ 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
+ 'alt_title': 'Dark Walk',
+ 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
+ 'duration': 133,
+ 'upload_date': '20151119',
+ 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
+ 'track': 'Dark Walk',
+ 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
+ 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',
+ 'categories': ['Film & Animation'],
+ 'view_count': int,
+ 'live_status': 'not_live',
+ 'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',
+ 'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',
+ 'tags': 'count:13',
+ 'availability': 'public',
+ 'channel': 'IronSoulElf',
+ 'playable_in_embed': True,
+ 'like_count': int,
+ 'age_limit': 0,
+ 'channel_follower_count': int
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
+ 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
+ 'only_matching': True,
+ },
+ {
+ # Video with yt:stretch=17:0
+ 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
+ 'info_dict': {
+ 'id': 'Q39EVAstoRM',
+ 'ext': 'mp4',
+ 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
+ 'description': 'md5:ee18a25c350637c8faff806845bddee9',
+ 'upload_date': '20151107',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'This video does not exist.',
+ },
+ {
+ # Video with incomplete 'yt:stretch=16:'
+ 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
+ 'only_matching': True,
+ },
+ {
+ # Video licensed under Creative Commons
+ 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
+ 'info_dict': {
+ 'id': 'M4gD1WSo5mA',
+ 'ext': 'mp4',
+ 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
+ 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
+ 'duration': 721,
+ 'upload_date': '20150128',
+ 'license': 'Creative Commons Attribution license (reuse allowed)',
+ 'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',
+ 'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',
+ 'like_count': int,
+ 'age_limit': 0,
+ 'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],
+ 'channel': 'The Berkman Klein Center for Internet & Society',
+ 'availability': 'public',
+ 'view_count': int,
+ 'categories': ['Education'],
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',
+ 'live_status': 'not_live',
+ 'playable_in_embed': True,
+ 'channel_follower_count': int,
+ 'chapters': list,
+ 'uploader': 'The Berkman Klein Center for Internet & Society',
+ 'uploader_id': '@BKCHarvard',
+ 'uploader_url': 'https://www.youtube.com/@BKCHarvard',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
+ 'info_dict': {
+ 'id': 'eQcmzGIKrzg',
+ 'ext': 'mp4',
+ 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
+ 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
+ 'duration': 4060,
+ 'upload_date': '20151120',
+ 'license': 'Creative Commons Attribution license (reuse allowed)',
+ 'playable_in_embed': True,
+ 'tags': 'count:12',
+ 'like_count': int,
+ 'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
+ 'age_limit': 0,
+ 'availability': 'public',
+ 'categories': ['News & Politics'],
+ 'channel': 'Bernie Sanders',
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',
+ 'view_count': int,
+ 'live_status': 'not_live',
+ 'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
+ 'comment_count': int,
+ 'channel_follower_count': int,
+ 'chapters': list,
+ 'uploader': 'Bernie Sanders',
+ 'uploader_url': 'https://www.youtube.com/@BernieSanders',
+ 'uploader_id': '@BernieSanders',
+ 'channel_is_verified': True,
+ 'heatmap': 'count:100',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
+ 'only_matching': True,
+ },
+ {
+ # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
+ 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
+ 'only_matching': True,
+ },
+ {
+ # Rental video preview
+ 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
+ 'info_dict': {
+ 'id': 'uGpuVWrhIzE',
+ 'ext': 'mp4',
+ 'title': 'Piku - Trailer',
+ 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
+ 'upload_date': '20150811',
+ 'license': 'Standard YouTube License',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'This video is not available.',
+ },
+ {
+ # YouTube Red video with episode data
+ 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
+ 'info_dict': {
+ 'id': 'iqKdEhx-dD4',
+ 'ext': 'mp4',
+ 'title': 'Isolation - Mind Field (Ep 1)',
+ 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
+ 'duration': 2085,
+ 'upload_date': '20170118',
+ 'series': 'Mind Field',
+ 'season_number': 1,
+ 'episode_number': 1,
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',
+ 'tags': 'count:12',
+ 'view_count': int,
+ 'availability': 'public',
+ 'age_limit': 0,
+ 'channel': 'Vsauce',
+ 'episode': 'Episode 1',
+ 'categories': ['Entertainment'],
+ 'season': 'Season 1',
+ 'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',
+ 'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',
+ 'like_count': int,
+ 'playable_in_embed': True,
+ 'live_status': 'not_live',
+ 'channel_follower_count': int,
+ 'uploader': 'Vsauce',
+ 'uploader_url': 'https://www.youtube.com/@Vsauce',
+ 'uploader_id': '@Vsauce',
+ 'comment_count': int,
+ 'channel_is_verified': True,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': [
+ 'Skipping DASH manifest',
+ ],
+ },
+ {
+ # The following content has been identified by the YouTube community
+ # as inappropriate or offensive to some audiences.
+ 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
+ 'info_dict': {
+ 'id': '6SJNVb0GnPI',
+ 'ext': 'mp4',
+ 'title': 'Race Differences in Intelligence',
+ 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
+ 'duration': 965,
+ 'upload_date': '20140124',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
+ },
+ {
+ # itag 212
+ 'url': '1t24XAntNCY',
+ 'only_matching': True,
+ },
+ {
+ # geo restricted to JP
+ 'url': 'sJL6WA-aGkQ',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
+ 'only_matching': True,
+ },
+ {
+ # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
+ 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
+ 'only_matching': True,
+ },
+ {
+ # DRM protected
+ 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
+ 'only_matching': True,
+ },
+ {
+ # Video with unsupported adaptive stream type formats
+ 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
+ 'info_dict': {
+ 'id': 'Z4Vy8R84T1U',
+ 'ext': 'mp4',
+ 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'duration': 433,
+ 'upload_date': '20130923',
+ 'formats': 'maxcount:10',
+ },
+ 'params': {
+ 'skip_download': True,
+ 'youtube_include_dash_manifest': False,
+ },
+ 'skip': 'not actual anymore',
+ },
+ {
+ # Youtube Music Auto-generated description
+ # TODO: fix metadata extraction
+ 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
+ 'info_dict': {
+ 'id': 'MgNrAu2pzNs',
+ 'ext': 'mp4',
+ 'title': 'Voyeur Girl',
+ 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
+ 'upload_date': '20190312',
+ 'artists': ['Stephen'],
+ 'creators': ['Stephen'],
+ 'track': 'Voyeur Girl',
+ 'album': 'it\'s too much love to know my dear',
+ 'release_date': '20190313',
+ 'alt_title': 'Voyeur Girl',
+ 'view_count': int,
+ 'playable_in_embed': True,
+ 'like_count': int,
+ 'categories': ['Music'],
+ 'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
+ 'channel': 'Stephen', # TODO: should be "Stephen - Topic"
+ 'uploader': 'Stephen',
+ 'availability': 'public',
+ 'duration': 169,
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
+ 'age_limit': 0,
+ 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
+ 'tags': 'count:11',
+ 'live_status': 'not_live',
+ 'channel_follower_count': int
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
+ 'only_matching': True,
+ },
+ {
+ # invalid -> valid video id redirection
+ 'url': 'DJztXj2GPfl',
+ 'info_dict': {
+ 'id': 'DJztXj2GPfk',
+ 'ext': 'mp4',
+ 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
+ 'description': 'md5:bf577a41da97918e94fa9798d9228825',
+ 'upload_date': '20090125',
+ 'artist': 'Panjabi MC',
+ 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
+ 'album': 'Beware of the Boys (Mundian To Bach Ke)',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'Video unavailable',
+ },
+ {
+ # empty description results in an empty string
+ 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
+ 'info_dict': {
+ 'id': 'x41yOUIvK2k',
+ 'ext': 'mp4',
+ 'title': 'IMG 3456',
+ 'description': '',
+ 'upload_date': '20170613',
+ 'view_count': int,
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',
+ 'like_count': int,
+ 'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',
+ 'tags': [],
+ 'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',
+ 'availability': 'public',
+ 'age_limit': 0,
+ 'categories': ['Pets & Animals'],
+ 'duration': 7,
+ 'playable_in_embed': True,
+ 'live_status': 'not_live',
+ 'channel': 'l\'Or Vert asbl',
+ 'channel_follower_count': int,
+ 'uploader': 'l\'Or Vert asbl',
+ 'uploader_url': 'https://www.youtube.com/@ElevageOrVert',
+ 'uploader_id': '@ElevageOrVert',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # with '};' inside yt initial data (see [1])
+ # see [2] for an example with '};' inside ytInitialPlayerResponse
+ # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
+ # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
+ 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
+ 'info_dict': {
+ 'id': 'CHqg6qOn4no',
+ 'ext': 'mp4',
+ 'title': 'Part 77 Sort a list of simple types in c#',
+ 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
+ 'upload_date': '20130831',
+ 'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',
+ 'like_count': int,
+ 'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',
+ 'live_status': 'not_live',
+ 'categories': ['Education'],
+ 'availability': 'public',
+ 'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',
+ 'tags': 'count:12',
+ 'playable_in_embed': True,
+ 'age_limit': 0,
+ 'view_count': int,
+ 'duration': 522,
+ 'channel': 'kudvenkat',
+ 'comment_count': int,
+ 'channel_follower_count': int,
+ 'chapters': list,
+ 'uploader': 'kudvenkat',
+ 'uploader_url': 'https://www.youtube.com/@Csharp-video-tutorialsBlogspot',
+ 'uploader_id': '@Csharp-video-tutorialsBlogspot',
+ 'channel_is_verified': True,
+ 'heatmap': 'count:100',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # another example of '};' in ytInitialData
+ 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
+ 'only_matching': True,
+ },
+ {
+ # https://github.com/ytdl-org/youtube-dl/pull/28094
+ 'url': 'OtqTfy26tG0',
+ 'info_dict': {
+ 'id': 'OtqTfy26tG0',
+ 'ext': 'mp4',
+ 'title': 'Burn Out',
+ 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
+ 'upload_date': '20141120',
+ 'artist': 'The Cinematic Orchestra',
+ 'track': 'Burn Out',
+ 'album': 'Every Day',
+ 'like_count': int,
+ 'live_status': 'not_live',
+ 'alt_title': 'Burn Out',
+ 'duration': 614,
+ 'age_limit': 0,
+ 'view_count': int,
+ 'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
+ 'creator': 'The Cinematic Orchestra',
+ 'channel': 'The Cinematic Orchestra',
+ 'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],
+ 'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
+ 'availability': 'public',
+ 'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',
+ 'categories': ['Music'],
+ 'playable_in_embed': True,
+ 'channel_follower_count': int,
+ 'uploader': 'The Cinematic Orchestra',
+ 'comment_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # controversial video, only works with bpctr when authenticated with cookies
+ 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
+ 'only_matching': True,
+ },
+ {
+ # controversial video, requires bpctr/contentCheckOk
+ 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
+ 'info_dict': {
+ 'id': 'SZJvDhaSDnc',
+ 'ext': 'mp4',
+ 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
+ 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
+ 'upload_date': '20140716',
+ 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',
+ 'duration': 170,
+ 'categories': ['News & Politics'],
+ 'view_count': int,
+ 'channel': 'CBS Mornings',
+ 'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],
+ 'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',
+ 'age_limit': 18,
+ 'availability': 'needs_auth',
+ 'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',
+ 'like_count': int,
+ 'live_status': 'not_live',
+ 'playable_in_embed': True,
+ 'channel_follower_count': int,
+ 'uploader': 'CBS Mornings',
+ 'uploader_url': 'https://www.youtube.com/@CBSMornings',
+ 'uploader_id': '@CBSMornings',
+ 'comment_count': int,
+ 'channel_is_verified': True,
+ }
+ },
+ {
+ # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
+ 'url': 'cBvYw8_A0vQ',
+ 'info_dict': {
+ 'id': 'cBvYw8_A0vQ',
+ 'ext': 'mp4',
+ 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
+ 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
+ 'upload_date': '20201120',
+ 'duration': 1456,
+ 'categories': ['Travel & Events'],
+ 'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
+ 'view_count': int,
+ 'channel': 'Walk around Japan',
+ 'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',
+ 'age_limit': 0,
+ 'availability': 'public',
+ 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
+ 'live_status': 'not_live',
+ 'playable_in_embed': True,
+ 'channel_follower_count': int,
+ 'uploader': 'Walk around Japan',
+ 'uploader_url': 'https://www.youtube.com/@walkaroundjapan7124',
+ 'uploader_id': '@walkaroundjapan7124',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # Has multiple audio streams
+ 'url': 'WaOKSUlf4TM',
+ 'only_matching': True
+ }, {
+ # Requires Premium: has format 141 when requested using YTM url
+ 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
+ 'only_matching': True
+ }, {
+ # multiple subtitles with same lang_code
+ 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
+ 'only_matching': True,
+ }, {
+ # Force use android client fallback
+ 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
+ 'info_dict': {
+ 'id': 'YOelRv7fMxY',
+ 'title': 'DIGGING A SECRET TUNNEL Part 1',
+ 'ext': '3gp',
+ 'upload_date': '20210624',
+ 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
+ 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
+ 'description': 'md5:5d5991195d599b56cd0c4148907eec50',
+ 'duration': 596,
+ 'categories': ['Entertainment'],
+ 'view_count': int,
+ 'channel': 'colinfurze',
+ 'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],
+ 'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',
+ 'age_limit': 0,
+ 'availability': 'public',
+ 'like_count': int,
+ 'live_status': 'not_live',
+ 'playable_in_embed': True,
+ 'channel_follower_count': int,
+ 'chapters': list,
+ 'uploader': 'colinfurze',
+ 'uploader_url': 'https://www.youtube.com/@colinfurze',
+ 'uploader_id': '@colinfurze',
+ 'comment_count': int,
+ 'channel_is_verified': True,
+ 'heatmap': 'count:100',
+ },
+ 'params': {
+ 'format': '17', # 3gp format available on android
+ 'extractor_args': {'youtube': {'player_client': ['android']}},
+ },
+ },
+ {
+ # Skip download of additional client configs (remix client config in this case)
+ 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
+ 'only_matching': True,
+ 'params': {
+ 'extractor_args': {'youtube': {'player_skip': ['configs']}},
+ },
+ }, {
+ # shorts
+ 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
+ 'only_matching': True,
+ }, {
+ 'note': 'Storyboards',
+ 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
+ 'info_dict': {
+ 'id': '5KLPxDtMqe8',
+ 'ext': 'mhtml',
+ 'format_id': 'sb0',
+ 'title': 'Your Brain is Plastic',
+ 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
+ 'upload_date': '20140324',
+ 'like_count': int,
+ 'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',
+ 'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',
+ 'view_count': int,
+ 'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',
+ 'playable_in_embed': True,
+ 'tags': 'count:12',
+ 'availability': 'public',
+ 'channel': 'SciShow',
+ 'live_status': 'not_live',
+ 'duration': 248,
+ 'categories': ['Education'],
+ 'age_limit': 0,
+ 'channel_follower_count': int,
+ 'chapters': list,
+ 'uploader': 'SciShow',
+ 'uploader_url': 'https://www.youtube.com/@SciShow',
+ 'uploader_id': '@SciShow',
+ 'comment_count': int,
+ 'channel_is_verified': True,
+ 'heatmap': 'count:100',
+ }, 'params': {'format': 'mhtml', 'skip_download': True}
+ }, {
+ # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
+ 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
+ 'info_dict': {
+ 'id': '2NUZ8W2llS4',
+ 'ext': 'mp4',
+ 'title': 'The NP that test your phone performance 🙂',
+ 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
+ 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
+ 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
+ 'duration': 21,
+ 'view_count': int,
+ 'age_limit': 0,
+ 'categories': ['Gaming'],
+ 'tags': 'count:23',
+ 'playable_in_embed': True,
+ 'live_status': 'not_live',
+ 'upload_date': '20220103',
+ 'like_count': int,
+ 'availability': 'public',
+ 'channel': 'Leon Nguyen',
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
+ 'comment_count': int,
+ 'channel_follower_count': int,
+ 'uploader': 'Leon Nguyen',
+ 'uploader_url': 'https://www.youtube.com/@LeonNguyen',
+ 'uploader_id': '@LeonNguyen',
+ 'heatmap': 'count:100',
+ }
+ }, {
+ # Same video as above, but with --compat-opt no-youtube-prefer-utc-upload-date
+ 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
+ 'info_dict': {
+ 'id': '2NUZ8W2llS4',
+ 'ext': 'mp4',
+ 'title': 'The NP that test your phone performance 🙂',
+ 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
+ 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
+ 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
+ 'duration': 21,
+ 'view_count': int,
+ 'age_limit': 0,
+ 'categories': ['Gaming'],
+ 'tags': 'count:23',
+ 'playable_in_embed': True,
+ 'live_status': 'not_live',
+ 'upload_date': '20220102',
+ 'like_count': int,
+ 'availability': 'public',
+ 'channel': 'Leon Nguyen',
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
+ 'comment_count': int,
+ 'channel_follower_count': int,
+ 'uploader': 'Leon Nguyen',
+ 'uploader_url': 'https://www.youtube.com/@LeonNguyen',
+ 'uploader_id': '@LeonNguyen',
+ 'heatmap': 'count:100',
+ },
+ 'params': {'compat_opts': ['no-youtube-prefer-utc-upload-date']}
+ }, {
+ # date text is premiered video, ensure upload date in UTC (published 1641172509)
+ 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',
+ 'info_dict': {
+ 'id': 'mzZzzBU6lrM',
+ 'ext': 'mp4',
+ 'title': 'I Met GeorgeNotFound In Real Life...',
+ 'description': 'md5:978296ec9783a031738b684d4ebf302d',
+ 'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',
+ 'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',
+ 'duration': 955,
+ 'view_count': int,
+ 'age_limit': 0,
+ 'categories': ['Entertainment'],
+ 'tags': 'count:26',
+ 'playable_in_embed': True,
+ 'live_status': 'not_live',
+ 'release_timestamp': 1641172509,
+ 'release_date': '20220103',
+ 'upload_date': '20220103',
+ 'like_count': int,
+ 'availability': 'public',
+ 'channel': 'Quackity',
+ 'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',
+ 'channel_follower_count': int,
+ 'uploader': 'Quackity',
+ 'uploader_id': '@Quackity',
+ 'uploader_url': 'https://www.youtube.com/@Quackity',
+ 'comment_count': int,
+ 'channel_is_verified': True,
+ 'heatmap': 'count:100',
+ }
+ },
+ { # continuous livestream. Microformat upload date should be preferred.
+ # Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27
+ 'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',
+ 'info_dict': {
+ 'id': 'kgx4WGK0oNU',
+ 'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
+ 'ext': 'mp4',
+ 'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',
+ 'availability': 'public',
+ 'age_limit': 0,
+ 'release_timestamp': 1637975704,
+ 'upload_date': '20210619',
+ 'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
+ 'live_status': 'is_live',
+ 'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',
+ 'channel': 'Abao in Tokyo',
+ 'channel_follower_count': int,
+ 'release_date': '20211127',
+ 'tags': 'count:39',
+ 'categories': ['People & Blogs'],
+ 'like_count': int,
+ 'view_count': int,
+ 'playable_in_embed': True,
+ 'description': 'md5:2ef1d002cad520f65825346e2084e49d',
+ 'concurrent_view_count': int,
+ 'uploader': 'Abao in Tokyo',
+ 'uploader_url': 'https://www.youtube.com/@abaointokyo',
+ 'uploader_id': '@abaointokyo',
+ },
+ 'params': {'skip_download': True}
+ }, {
+ 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
+ 'info_dict': {
+ 'id': 'tjjjtzRLHvA',
+ 'ext': 'mp4',
+ 'title': 'ハッシュタグ無し };if window.ytcsi',
+ 'upload_date': '20220323',
+ 'like_count': int,
+ 'availability': 'unlisted',
+ 'channel': 'Lesmiscore',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'age_limit': 0,
+ 'categories': ['Music'],
+ 'view_count': int,
+ 'description': '',
+ 'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',
+ 'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',
+ 'live_status': 'not_live',
+ 'playable_in_embed': True,
+ 'channel_follower_count': int,
+ 'duration': 6,
+ 'tags': [],
+ 'uploader_id': '@lesmiscore',
+ 'uploader': 'Lesmiscore',
+ 'uploader_url': 'https://www.youtube.com/@lesmiscore',
+ }
+ }, {
+ # Prefer primary title+description language metadata by default
+ # Do not prefer translated description if primary is empty
+ 'url': 'https://www.youtube.com/watch?v=el3E4MbxRqQ',
+ 'info_dict': {
+ 'id': 'el3E4MbxRqQ',
+ 'ext': 'mp4',
+ 'title': 'dlp test video 2 - primary sv no desc',
+ 'description': '',
+ 'channel': 'cole-dlp-test-acc',
+ 'tags': [],
+ 'view_count': int,
+ 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
+ 'like_count': int,
+ 'playable_in_embed': True,
+ 'availability': 'unlisted',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'age_limit': 0,
+ 'duration': 5,
+ 'live_status': 'not_live',
+ 'upload_date': '20220908',
+ 'categories': ['People & Blogs'],
+ 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
+ 'uploader_url': 'https://www.youtube.com/@coletdjnz',
+ 'uploader_id': '@coletdjnz',
+ 'uploader': 'cole-dlp-test-acc',
+ },
+ 'params': {'skip_download': True}
+ }, {
+ # Extractor argument: prefer translated title+description
+ 'url': 'https://www.youtube.com/watch?v=gHKT4uU8Zng',
+ 'info_dict': {
+ 'id': 'gHKT4uU8Zng',
+ 'ext': 'mp4',
+ 'channel': 'cole-dlp-test-acc',
+ 'tags': [],
+ 'duration': 5,
+ 'live_status': 'not_live',
+ 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
+ 'upload_date': '20220728',
+ 'view_count': int,
+ 'categories': ['People & Blogs'],
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'title': 'dlp test video title translated (fr)',
+ 'availability': 'public',
+ 'age_limit': 0,
+ 'description': 'dlp test video description translated (fr)',
+ 'playable_in_embed': True,
+ 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
+ 'uploader_url': 'https://www.youtube.com/@coletdjnz',
+ 'uploader_id': '@coletdjnz',
+ 'uploader': 'cole-dlp-test-acc',
+ },
+ 'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}},
+ 'expected_warnings': [r'Preferring "fr" translated fields'],
+ }, {
+ 'note': '6 channel audio',
+ 'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',
+ 'only_matching': True,
+ }, {
+ 'note': 'Multiple HLS formats with same itag',
+ 'url': 'https://www.youtube.com/watch?v=kX3nB4PpJko',
+ 'info_dict': {
+ 'id': 'kX3nB4PpJko',
+ 'ext': 'mp4',
+ 'categories': ['Entertainment'],
+ 'description': 'md5:e8031ff6e426cdb6a77670c9b81f6fa6',
+ 'live_status': 'not_live',
+ 'duration': 937,
+ 'channel_follower_count': int,
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/kX3nB4PpJko/maxresdefault.webp',
+ 'title': 'Last To Take Hand Off Jet, Keeps It!',
+ 'channel': 'MrBeast',
+ 'playable_in_embed': True,
+ 'view_count': int,
+ 'upload_date': '20221112',
+ 'channel_url': 'https://www.youtube.com/channel/UCX6OQ3DkcsbYNE6H8uQQuVA',
+ 'age_limit': 0,
+ 'availability': 'public',
+ 'channel_id': 'UCX6OQ3DkcsbYNE6H8uQQuVA',
+ 'like_count': int,
+ 'tags': [],
+ 'uploader': 'MrBeast',
+ 'uploader_url': 'https://www.youtube.com/@MrBeast',
+ 'uploader_id': '@MrBeast',
+ 'comment_count': int,
+ 'channel_is_verified': True,
+ 'heatmap': 'count:100',
+ },
+ 'params': {'extractor_args': {'youtube': {'player_client': ['ios']}}, 'format': '233-1'},
+ }, {
+ 'note': 'Audio formats with Dynamic Range Compression',
+ 'url': 'https://www.youtube.com/watch?v=Tq92D6wQ1mg',
+ 'info_dict': {
+ 'id': 'Tq92D6wQ1mg',
+ 'ext': 'webm',
+ 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
+ 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
+ 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
+ 'channel_follower_count': int,
+ 'description': 'md5:17eccca93a786d51bc67646756894066',
+ 'upload_date': '20191228',
+ 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
+ 'playable_in_embed': True,
+ 'like_count': int,
+ 'categories': ['Entertainment'],
+ 'thumbnail': 'https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg',
+ 'age_limit': 18,
+ 'channel': 'Projekt Melody',
+ 'view_count': int,
+ 'availability': 'needs_auth',
+ 'comment_count': int,
+ 'live_status': 'not_live',
+ 'duration': 106,
+ 'uploader': 'Projekt Melody',
+ 'uploader_id': '@ProjektMelody',
+ 'uploader_url': 'https://www.youtube.com/@ProjektMelody',
+ },
+ 'params': {'extractor_args': {'youtube': {'player_client': ['tv_embedded']}}, 'format': '251-drc'},
+ },
+ {
+ 'url': 'https://www.youtube.com/live/qVv6vCqciTM',
+ 'info_dict': {
+ 'id': 'qVv6vCqciTM',
+ 'ext': 'mp4',
+ 'age_limit': 0,
+ 'comment_count': int,
+ 'chapters': 'count:13',
+ 'upload_date': '20221223',
+ 'thumbnail': 'https://i.ytimg.com/vi/qVv6vCqciTM/maxresdefault.jpg',
+ 'channel_url': 'https://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',
+ 'like_count': int,
+ 'release_date': '20221223',
+ 'tags': ['Vtuber', '月ノ美兎', '名取さな', 'にじさんじ', 'クリスマス', '3D配信'],
+ 'title': '【 #インターネット女クリスマス 】3Dで歌ってはしゃぐインターネットの女たち【月ノ美兎/名取さな】',
+ 'view_count': int,
+ 'playable_in_embed': True,
+ 'duration': 4438,
+ 'availability': 'public',
+ 'channel_follower_count': int,
+ 'channel_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',
+ 'categories': ['Entertainment'],
+ 'live_status': 'was_live',
+ 'release_timestamp': 1671793345,
+ 'channel': 'さなちゃんねる',
+ 'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',
+ 'uploader': 'さなちゃんねる',
+ 'uploader_url': 'https://www.youtube.com/@sana_natori',
+ 'uploader_id': '@sana_natori',
+ 'channel_is_verified': True,
+ 'heatmap': 'count:100',
+ },
+ },
+ {
+ # Fallbacks when webpage and web client is unavailable
+ 'url': 'https://www.youtube.com/watch?v=wSSmNUl9Snw',
+ 'info_dict': {
+ 'id': 'wSSmNUl9Snw',
+ 'ext': 'mp4',
+ # 'categories': ['Science & Technology'],
+ 'view_count': int,
+ 'chapters': 'count:2',
+ 'channel': 'Scott Manley',
+ 'like_count': int,
+ 'age_limit': 0,
+ # 'availability': 'public',
+ 'channel_follower_count': int,
+ 'live_status': 'not_live',
+ 'upload_date': '20170831',
+ 'duration': 682,
+ 'tags': 'count:8',
+ 'uploader_url': 'https://www.youtube.com/@scottmanley',
+ 'description': 'md5:f4bed7b200404b72a394c2f97b782c02',
+ 'uploader': 'Scott Manley',
+ 'uploader_id': '@scottmanley',
+ 'title': 'The Computer Hack That Saved Apollo 14',
+ 'channel_id': 'UCxzC4EngIsMrPmbm6Nxvb-A',
+ 'thumbnail': r're:^https?://.*\.webp',
+ 'channel_url': 'https://www.youtube.com/channel/UCxzC4EngIsMrPmbm6Nxvb-A',
+ 'playable_in_embed': True,
+ 'comment_count': int,
+ 'channel_is_verified': True,
+ 'heatmap': 'count:100',
+ },
+ 'params': {
+ 'extractor_args': {'youtube': {'player_client': ['android'], 'player_skip': ['webpage']}},
+ },
+ },
+ ]
+
+ _WEBPAGE_TESTS = [
+ # YouTube <object> embed
+ {
+ 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
+ 'md5': '873c81d308b979f0e23ee7e620b312a3',
+ 'info_dict': {
+ 'id': 'msN87y-iEx0',
+ 'ext': 'mp4',
+ 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
+ 'upload_date': '20080526',
+ 'description': 'md5:873c81d308b979f0e23ee7e620b312a3',
+ 'age_limit': 0,
+ 'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],
+ 'channel_id': 'UCCeo--lls1vna5YJABWAcVA',
+ 'playable_in_embed': True,
+ 'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',
+ 'like_count': int,
+ 'comment_count': int,
+ 'channel': 'Christopher Sykes',
+ 'live_status': 'not_live',
+ 'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',
+ 'availability': 'public',
+ 'duration': 195,
+ 'view_count': int,
+ 'categories': ['Science & Technology'],
+ 'channel_follower_count': int,
+ 'uploader': 'Christopher Sykes',
+ 'uploader_url': 'https://www.youtube.com/@ChristopherSykesDocumentaries',
+ 'uploader_id': '@ChristopherSykesDocumentaries',
+ 'heatmap': 'count:100',
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ },
+ ]
+
+ @classmethod
+ def suitable(cls, url):
+ from ..utils import parse_qs
+
+ qs = parse_qs(url)
+ if qs.get('list', [None])[0]:
+ return False
+ return super().suitable(url)
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self._code_cache = {}
+ self._player_cache = {}
+
+ def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data, is_live):
+ lock = threading.Lock()
+ start_time = time.time()
+ formats = [f for f in formats if f.get('is_from_start')]
+
+ def refetch_manifest(format_id, delay):
+ nonlocal formats, start_time, is_live
+ if time.time() <= start_time + delay:
+ return
+
+ _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
+ video_details = traverse_obj(prs, (..., 'videoDetails'), expected_type=dict)
+ microformats = traverse_obj(
+ prs, (..., 'microformat', 'playerMicroformatRenderer'),
+ expected_type=dict)
+ _, live_status, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
+ is_live = live_status == 'is_live'
+ start_time = time.time()
+
+ def mpd_feed(format_id, delay):
+ """
+ @returns (manifest_url, manifest_stream_number, is_live) or None
+ """
+ for retry in self.RetryManager(fatal=False):
+ with lock:
+ refetch_manifest(format_id, delay)
+
+ f = next((f for f in formats if f['format_id'] == format_id), None)
+ if not f:
+ if not is_live:
+ retry.error = f'{video_id}: Video is no longer live'
+ else:
+ retry.error = f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}'
+ continue
+ return f['manifest_url'], f['manifest_stream_number'], is_live
+ return None
+
+ for f in formats:
+ f['is_live'] = is_live
+ gen = functools.partial(self._live_dash_fragments, video_id, f['format_id'],
+ live_start_time, mpd_feed, not is_live and f.copy())
+ if is_live:
+ f['fragments'] = gen
+ f['protocol'] = 'http_dash_segments_generator'
+ else:
+ f['fragments'] = LazyList(gen({}))
+ del f['is_from_start']
+
+ def _live_dash_fragments(self, video_id, format_id, live_start_time, mpd_feed, manifestless_orig_fmt, ctx):
+ FETCH_SPAN, MAX_DURATION = 5, 432000
+
+ mpd_url, stream_number, is_live = None, None, True
+
+ begin_index = 0
+ download_start_time = ctx.get('start') or time.time()
+
+ lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
+ if lack_early_segments:
+ self.report_warning(bug_reports_message(
+ 'Starting download from the last 120 hours of the live stream since '
+ 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
+ lack_early_segments = True
+
+ known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
+ fragments, fragment_base_url = None, None
+
+ def _extract_sequence_from_mpd(refresh_sequence, immediate):
+ nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
+ # Obtain from MPD's maximum seq value
+ old_mpd_url = mpd_url
+ last_error = ctx.pop('last_error', None)
+ expire_fast = immediate or last_error and isinstance(last_error, HTTPError) and last_error.status == 403
+ mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
+ or (mpd_url, stream_number, False))
+ if not refresh_sequence:
+ if expire_fast and not is_live:
+ return False, last_seq
+ elif old_mpd_url == mpd_url:
+ return True, last_seq
+ if manifestless_orig_fmt:
+ fmt_info = manifestless_orig_fmt
+ else:
+ try:
+ fmts, _ = self._extract_mpd_formats_and_subtitles(
+ mpd_url, None, note=False, errnote=False, fatal=False)
+ except ExtractorError:
+ fmts = None
+ if not fmts:
+ no_fragment_score += 2
+ return False, last_seq
+ fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
+ fragments = fmt_info['fragments']
+ fragment_base_url = fmt_info['fragment_base_url']
+ assert fragment_base_url
+
+ _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
+ return True, _last_seq
+
+ self.write_debug(f'[{video_id}] Generating fragments for format {format_id}')
+ while is_live:
+ fetch_time = time.time()
+ if no_fragment_score > 30:
+ return
+ if last_segment_url:
+ # Obtain from "X-Head-Seqnum" header value from each segment
+ try:
+ urlh = self._request_webpage(
+ last_segment_url, None, note=False, errnote=False, fatal=False)
+ except ExtractorError:
+ urlh = None
+ last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
+ if last_seq is None:
+ no_fragment_score += 2
+ last_segment_url = None
+ continue
+ else:
+ should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)
+ no_fragment_score += 2
+ if not should_continue:
+ continue
+
+ if known_idx > last_seq:
+ last_segment_url = None
+ continue
+
+ last_seq += 1
+
+ if begin_index < 0 and known_idx < 0:
+ # skip from the start when it's negative value
+ known_idx = last_seq + begin_index
+ if lack_early_segments:
+ known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
+ try:
+ for idx in range(known_idx, last_seq):
+ # do not update sequence here or you'll get skipped some part of it
+ should_continue, _ = _extract_sequence_from_mpd(False, False)
+ if not should_continue:
+ known_idx = idx - 1
+ raise ExtractorError('breaking out of outer loop')
+ last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
+ yield {
+ 'url': last_segment_url,
+ 'fragment_count': last_seq,
+ }
+ if known_idx == last_seq:
+ no_fragment_score += 5
+ else:
+ no_fragment_score = 0
+ known_idx = last_seq
+ except ExtractorError:
+ continue
+
+ if manifestless_orig_fmt:
+ # Stop at the first iteration if running for post-live manifestless;
+ # fragment count no longer increase since it starts
+ break
+
+ time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
+
+ def _extract_player_url(self, *ytcfgs, webpage=None):
+ player_url = traverse_obj(
+ ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
+ get_all=False, expected_type=str)
+ if not player_url:
+ return
+ return urljoin('https://www.youtube.com', player_url)
+
+ def _download_player_url(self, video_id, fatal=False):
+ res = self._download_webpage(
+ 'https://www.youtube.com/iframe_api',
+ note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
+ if res:
+ player_version = self._search_regex(
+ r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
+ if player_version:
+ return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
+
+ def _signature_cache_id(self, example_sig):
+ """ Return a string representation of a signature """
+ return '.'.join(str(len(part)) for part in example_sig.split('.'))
+
+ @classmethod
+ def _extract_player_info(cls, player_url):
+ for player_re in cls._PLAYER_INFO_RE:
+ id_m = re.search(player_re, player_url)
+ if id_m:
+ break
+ else:
+ raise ExtractorError('Cannot identify player %r' % player_url)
+ return id_m.group('id')
+
+ def _load_player(self, video_id, player_url, fatal=True):
+ player_id = self._extract_player_info(player_url)
+ if player_id not in self._code_cache:
+ code = self._download_webpage(
+ player_url, video_id, fatal=fatal,
+ note='Downloading player ' + player_id,
+ errnote='Download of %s failed' % player_url)
+ if code:
+ self._code_cache[player_id] = code
+ return self._code_cache.get(player_id)
+
+ def _extract_signature_function(self, video_id, player_url, example_sig):
+ player_id = self._extract_player_info(player_url)
+
+ # Read from filesystem cache
+ func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'
+ assert os.path.basename(func_id) == func_id
+
+ self.write_debug(f'Extracting signature function {func_id}')
+ cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
+
+ if not cache_spec:
+ code = self._load_player(video_id, player_url)
+ if code:
+ res = self._parse_sig_js(code)
+ test_string = ''.join(map(chr, range(len(example_sig))))
+ cache_spec = [ord(c) for c in res(test_string)]
+ self.cache.store('youtube-sigfuncs', func_id, cache_spec)
+
+ return lambda s: ''.join(s[i] for i in cache_spec)
+
+ def _print_sig_code(self, func, example_sig):
+ if not self.get_param('youtube_print_sig_code'):
+ return
+
+ def gen_sig_code(idxs):
+ def _genslice(start, end, step):
+ starts = '' if start == 0 else str(start)
+ ends = (':%d' % (end + step)) if end + step >= 0 else ':'
+ steps = '' if step == 1 else (':%d' % step)
+ return f's[{starts}{ends}{steps}]'
+
+ step = None
+ # Quelch pyflakes warnings - start will be set when step is set
+ start = '(Never used)'
+ for i, prev in zip(idxs[1:], idxs[:-1]):
+ if step is not None:
+ if i - prev == step:
+ continue
+ yield _genslice(start, prev, step)
+ step = None
+ continue
+ if i - prev in [-1, 1]:
+ step = i - prev
+ start = prev
+ continue
+ else:
+ yield 's[%d]' % prev
+ if step is None:
+ yield 's[%d]' % i
+ else:
+ yield _genslice(start, i, step)
+
+ test_string = ''.join(map(chr, range(len(example_sig))))
+ cache_res = func(test_string)
+ cache_spec = [ord(c) for c in cache_res]
+ expr_code = ' + '.join(gen_sig_code(cache_spec))
+ signature_id_tuple = '(%s)' % (
+ ', '.join(str(len(p)) for p in example_sig.split('.')))
+ code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
+ ' return %s\n') % (signature_id_tuple, expr_code)
+ self.to_screen('Extracted signature function:\n' + code)
+
+ def _parse_sig_js(self, jscode):
+ funcname = self._search_regex(
+ (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
+ r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
+ r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\))?',
+ r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
+ # Obsolete patterns
+ r'("|\')signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
+ jscode, 'Initial JS player signature function name', group='sig')
+
+ jsi = JSInterpreter(jscode)
+ initial_function = jsi.extract_function(funcname)
+ return lambda s: initial_function([s])
+
+ def _cached(self, func, *cache_id):
+ def inner(*args, **kwargs):
+ if cache_id not in self._player_cache:
+ try:
+ self._player_cache[cache_id] = func(*args, **kwargs)
+ except ExtractorError as e:
+ self._player_cache[cache_id] = e
+ except Exception as e:
+ self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)
+
+ ret = self._player_cache[cache_id]
+ if isinstance(ret, Exception):
+ raise ret
+ return ret
+ return inner
+
+ def _decrypt_signature(self, s, video_id, player_url):
+ """Turn the encrypted s field into a working signature"""
+ extract_sig = self._cached(
+ self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))
+ func = extract_sig(video_id, player_url, s)
+ self._print_sig_code(func, s)
+ return func(s)
+
+ def _decrypt_nsig(self, s, video_id, player_url):
+ """Turn the encrypted n field into a working signature"""
+ if player_url is None:
+ raise ExtractorError('Cannot decrypt nsig without player_url')
+ player_url = urljoin('https://www.youtube.com', player_url)
+
+ try:
+ jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
+ except ExtractorError as e:
+ raise ExtractorError('Unable to extract nsig function code', cause=e)
+ if self.get_param('youtube_print_sig_code'):
+ self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
+
+ try:
+ extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)
+ ret = extract_nsig(jsi, func_code)(s)
+ except JSInterpreter.Exception as e:
+ try:
+ jsi = PhantomJSwrapper(self, timeout=5000)
+ except ExtractorError:
+ raise e
+ self.report_warning(
+ f'Native nsig extraction failed: Trying with PhantomJS\n'
+ f' n = {s} ; player = {player_url}', video_id)
+ self.write_debug(e, only_once=True)
+
+ args, func_body = func_code
+ ret = jsi.execute(
+ f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',
+ video_id=video_id, note='Executing signature code').strip()
+
+ self.write_debug(f'Decrypted nsig {s} => {ret}')
+ return ret
+
+ def _extract_n_function_name(self, jscode):
+ funcname, idx = self._search_regex(
+ r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)',
+ jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
+ if not idx:
+ return funcname
+
+ return json.loads(js_to_json(self._search_regex(
+ rf'var {re.escape(funcname)}\s*=\s*(\[.+?\])\s*[,;]', jscode,
+ f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
+
+ def _extract_n_function_code(self, video_id, player_url):
+ player_id = self._extract_player_info(player_url)
+ func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.09.1')
+ jscode = func_code or self._load_player(video_id, player_url)
+ jsi = JSInterpreter(jscode)
+
+ if func_code:
+ return jsi, player_id, func_code
+
+ func_name = self._extract_n_function_name(jscode)
+
+ # For redundancy
+ func_code = self._search_regex(
+ r'''(?xs)%s\s*=\s*function\s*\((?P<var>[\w$]+)\)\s*
+ # NB: The end of the regex is intentionally kept strict
+ {(?P<code>.+?}\s*return\ [\w$]+.join\(""\))};''' % func_name,
+ jscode, 'nsig function', group=('var', 'code'), default=None)
+ if func_code:
+ func_code = ([func_code[0]], func_code[1])
+ else:
+ self.write_debug('Extracting nsig function with jsinterp')
+ func_code = jsi.extract_function_code(func_name)
+
+ self.cache.store('youtube-nsig', player_id, func_code)
+ return jsi, player_id, func_code
+
+ def _extract_n_function_from_code(self, jsi, func_code):
+ func = jsi.extract_function_from_code(*func_code)
+
+ def extract_nsig(s):
+ try:
+ ret = func([s])
+ except JSInterpreter.Exception:
+ raise
+ except Exception as e:
+ raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
+
+ if ret.startswith('enhanced_except_'):
+ raise JSInterpreter.Exception('Signature function returned an exception')
+ return ret
+
+ return extract_nsig
+
+ def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
+ """
+ Extract signatureTimestamp (sts)
+ Required to tell API what sig/player version is in use.
+ """
+ sts = None
+ if isinstance(ytcfg, dict):
+ sts = int_or_none(ytcfg.get('STS'))
+
+ if not sts:
+ # Attempt to extract from player
+ if player_url is None:
+ error_msg = 'Cannot extract signature timestamp without player_url.'
+ if fatal:
+ raise ExtractorError(error_msg)
+ self.report_warning(error_msg)
+ return
+ code = self._load_player(video_id, player_url, fatal=fatal)
+ if code:
+ sts = int_or_none(self._search_regex(
+ r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
+ 'JS player signature timestamp', group='sts', fatal=fatal))
+ return sts
+
+ def _mark_watched(self, video_id, player_responses):
+ for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):
+ label = 'fully ' if is_full else ''
+ url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),
+ expected_type=url_or_none)
+ if not url:
+ self.report_warning(f'Unable to mark {label}watched')
+ return
+ parsed_url = urllib.parse.urlparse(url)
+ qs = urllib.parse.parse_qs(parsed_url.query)
+
+ # cpn generation algorithm is reverse engineered from base.js.
+ # In fact it works even with dummy cpn.
+ CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
+ cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))
+
+ # # more consistent results setting it to right before the end
+ video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]
+
+ qs.update({
+ 'ver': ['2'],
+ 'cpn': [cpn],
+ 'cmt': video_length,
+ 'el': 'detailpage', # otherwise defaults to "shorts"
+ })
+
+ if is_full:
+ # these seem to mark watchtime "history" in the real world
+ # they're required, so send in a single value
+ qs.update({
+ 'st': 0,
+ 'et': video_length,
+ })
+
+ url = urllib.parse.urlunparse(
+ parsed_url._replace(query=urllib.parse.urlencode(qs, True)))
+
+ self._download_webpage(
+ url, video_id, f'Marking {label}watched',
+ 'Unable to mark watched', fatal=False)
+
+ @classmethod
+ def _extract_from_webpage(cls, url, webpage):
+ # Invidious Instances
+ # https://github.com/yt-dlp/yt-dlp/issues/195
+ # https://github.com/iv-org/invidious/pull/1730
+ mobj = re.search(
+ r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',
+ webpage)
+ if mobj:
+ yield cls.url_result(mobj.group('url'), cls)
+ raise cls.StopExtraction()
+
+ yield from super()._extract_from_webpage(url, webpage)
+
+ # lazyYT YouTube embed
+ for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):
+ yield cls.url_result(unescapeHTML(id_), cls, id_)
+
+ # Wordpress "YouTube Video Importer" plugin
+ for m in re.findall(r'''(?x)<div[^>]+
+ class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
+ data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):
+ yield cls.url_result(m[-1], cls, m[-1])
+
+ @classmethod
+ def extract_id(cls, url):
+ video_id = cls.get_temp_id(url)
+ if not video_id:
+ raise ExtractorError(f'Invalid URL: {url}')
+ return video_id
+
+ def _extract_chapters_from_json(self, data, duration):
+ chapter_list = traverse_obj(
+ data, (
+ 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
+ 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
+ ), expected_type=list)
+
+ return self._extract_chapters_helper(
+ chapter_list,
+ start_function=lambda chapter: float_or_none(
+ traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
+ title_function=lambda chapter: traverse_obj(
+ chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
+ duration=duration)
+
+ def _extract_chapters_from_engagement_panel(self, data, duration):
+ content_list = traverse_obj(
+ data,
+ ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
+ expected_type=list)
+ chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
+ chapter_title = lambda chapter: self._get_text(chapter, 'title')
+
+ return next(filter(None, (
+ self._extract_chapters_helper(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
+ chapter_time, chapter_title, duration)
+ for contents in content_list)), [])
+
+ def _extract_heatmap(self, data):
+ return traverse_obj(data, (
+ 'frameworkUpdates', 'entityBatchUpdate', 'mutations',
+ lambda _, v: v['payload']['macroMarkersListEntity']['markersList']['markerType'] == 'MARKER_TYPE_HEATMAP',
+ 'payload', 'macroMarkersListEntity', 'markersList', 'markers', ..., {
+ 'start_time': ('startMillis', {functools.partial(float_or_none, scale=1000)}),
+ 'end_time': {lambda x: (int(x['startMillis']) + int(x['durationMillis'])) / 1000},
+ 'value': ('intensityScoreNormalized', {float_or_none}),
+ })) or None
+
+ def _extract_comment(self, comment_renderer, parent=None):
+ comment_id = comment_renderer.get('commentId')
+ if not comment_id:
+ return
+
+ info = {
+ 'id': comment_id,
+ 'text': self._get_text(comment_renderer, 'contentText'),
+ 'like_count': self._get_count(comment_renderer, 'voteCount'),
+ 'author_id': traverse_obj(comment_renderer, ('authorEndpoint', 'browseEndpoint', 'browseId', {self.ucid_or_none})),
+ 'author': self._get_text(comment_renderer, 'authorText'),
+ 'author_thumbnail': traverse_obj(comment_renderer, ('authorThumbnail', 'thumbnails', -1, 'url', {url_or_none})),
+ 'parent': parent or 'root',
+ }
+
+ # Timestamp is an estimate calculated from the current time and time_text
+ time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
+ timestamp = self._parse_time_text(time_text)
+
+ info.update({
+ # FIXME: non-standard, but we need a way of showing that it is an estimate.
+ '_time_text': time_text,
+ 'timestamp': timestamp,
+ })
+
+ info['author_url'] = urljoin(
+ 'https://www.youtube.com', traverse_obj(comment_renderer, ('authorEndpoint', (
+ ('browseEndpoint', 'canonicalBaseUrl'), ('commandMetadata', 'webCommandMetadata', 'url'))),
+ expected_type=str, get_all=False))
+
+ author_is_uploader = traverse_obj(comment_renderer, 'authorIsChannelOwner')
+ if author_is_uploader is not None:
+ info['author_is_uploader'] = author_is_uploader
+
+ comment_abr = traverse_obj(
+ comment_renderer, ('actionButtons', 'commentActionButtonsRenderer'), expected_type=dict)
+ if comment_abr is not None:
+ info['is_favorited'] = 'creatorHeart' in comment_abr
+
+ badges = self._extract_badges([traverse_obj(comment_renderer, 'authorCommentBadge')])
+ if self._has_badge(badges, BadgeType.VERIFIED):
+ info['author_is_verified'] = True
+
+ is_pinned = traverse_obj(comment_renderer, 'pinnedCommentBadge')
+ if is_pinned:
+ info['is_pinned'] = True
+
+ return info
+
+ def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
+
+ get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
+
+ def extract_header(contents):
+ _continuation = None
+ for content in contents:
+ comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
+ expected_comment_count = self._get_count(
+ comments_header_renderer, 'countText', 'commentsCount')
+
+ if expected_comment_count is not None:
+ tracker['est_total'] = expected_comment_count
+ self.to_screen(f'Downloading ~{expected_comment_count} comments')
+ comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
+
+ sort_menu_item = try_get(
+ comments_header_renderer,
+ lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
+ sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
+
+ _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
+ if not _continuation:
+ continue
+
+ sort_text = str_or_none(sort_menu_item.get('title'))
+ if not sort_text:
+ sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
+ self.to_screen('Sorting comments by %s' % sort_text.lower())
+ break
+ return _continuation
+
+ def extract_thread(contents):
+ if not parent:
+ tracker['current_page_thread'] = 0
+ for content in contents:
+ if not parent and tracker['total_parent_comments'] >= max_parents:
+ yield
+ comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
+ comment_renderer = get_first(
+ (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
+ expected_type=dict, default={})
+
+ comment = self._extract_comment(comment_renderer, parent)
+ if not comment:
+ continue
+ comment_id = comment['id']
+ if comment.get('is_pinned'):
+ tracker['pinned_comment_ids'].add(comment_id)
+ # Sometimes YouTube may break and give us infinite looping comments.
+ # See: https://github.com/yt-dlp/yt-dlp/issues/6290
+ if comment_id in tracker['seen_comment_ids']:
+ if comment_id in tracker['pinned_comment_ids'] and not comment.get('is_pinned'):
+ # Pinned comments may appear a second time in newest first sort
+ # See: https://github.com/yt-dlp/yt-dlp/issues/6712
+ continue
+ self.report_warning(
+ 'Detected YouTube comments looping. Stopping comment extraction '
+ f'{"for this thread" if parent else ""} as we probably cannot get any more.')
+ yield
+ else:
+ tracker['seen_comment_ids'].add(comment['id'])
+
+ tracker['running_total'] += 1
+ tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
+ yield comment
+
+ # Attempt to get the replies
+ comment_replies_renderer = try_get(
+ comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
+
+ if comment_replies_renderer:
+ tracker['current_page_thread'] += 1
+ comment_entries_iter = self._comment_entries(
+ comment_replies_renderer, ytcfg, video_id,
+ parent=comment.get('id'), tracker=tracker)
+ yield from itertools.islice(comment_entries_iter, min(
+ max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))
+
+ # Keeps track of counts across recursive calls
+ if not tracker:
+ tracker = dict(
+ running_total=0,
+ est_total=None,
+ current_page_thread=0,
+ total_parent_comments=0,
+ total_reply_comments=0,
+ seen_comment_ids=set(),
+ pinned_comment_ids=set()
+ )
+
+ # TODO: Deprecated
+ # YouTube comments have a max depth of 2
+ max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
+ if max_depth:
+ self._downloader.deprecated_feature('[youtube] max_comment_depth extractor argument is deprecated. '
+ 'Set max replies in the max-comments extractor argument instead')
+ if max_depth == 1 and parent:
+ return
+
+ max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(
+ lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
+
+ continuation = self._extract_continuation(root_continuation_data)
+
+ response = None
+ is_forced_continuation = False
+ is_first_continuation = parent is None
+ if is_first_continuation and not continuation:
+ # Sometimes you can get comments by generating the continuation yourself,
+ # even if YouTube initially reports them being disabled - e.g. stories comments.
+ # Note: if the comment section is actually disabled, YouTube may return a response with
+ # required check_get_keys missing. So we will disable that check initially in this case.
+ continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
+ is_forced_continuation = True
+
+ continuation_items_path = (
+ 'onResponseReceivedEndpoints', ..., ('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems')
+ for page_num in itertools.count(0):
+ if not continuation:
+ break
+ headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
+ comment_prog_str = f"({tracker['running_total']}/~{tracker['est_total']})"
+ if page_num == 0:
+ if is_first_continuation:
+ note_prefix = 'Downloading comment section API JSON'
+ else:
+ note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
+ tracker['current_page_thread'], comment_prog_str)
+ else:
+ note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
+ ' ' if parent else '', ' replies' if parent else '',
+ page_num, comment_prog_str)
+
+ # Do a deep check for incomplete data as sometimes YouTube may return no comments for a continuation
+ # Ignore check if YouTube says the comment count is 0.
+ check_get_keys = None
+ if not is_forced_continuation and not (tracker['est_total'] == 0 and tracker['running_total'] == 0):
+ check_get_keys = [[*continuation_items_path, ..., (
+ 'commentsHeaderRenderer' if is_first_continuation else ('commentThreadRenderer', 'commentRenderer'))]]
+ try:
+ response = self._extract_response(
+ item_id=None, query=continuation,
+ ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
+ check_get_keys=check_get_keys)
+ except ExtractorError as e:
+ # Ignore incomplete data error for replies if retries didn't work.
+ # This is to allow any other parent comments and comment threads to be downloaded.
+ # See: https://github.com/yt-dlp/yt-dlp/issues/4669
+ if 'incomplete data' in str(e).lower() and parent:
+ if self.get_param('ignoreerrors') in (True, 'only_download'):
+ self.report_warning(
+ 'Received incomplete data for a comment reply thread and retrying did not help. '
+ 'Ignoring to let other comments be downloaded. Pass --no-ignore-errors to not ignore.')
+ return
+ else:
+ raise ExtractorError(
+ 'Incomplete data received for comment reply thread. '
+ 'Pass --ignore-errors to ignore and allow rest of comments to download.',
+ expected=True)
+ raise
+ is_forced_continuation = False
+ continuation = None
+ for continuation_items in traverse_obj(response, continuation_items_path, expected_type=list, default=[]):
+ if is_first_continuation:
+ continuation = extract_header(continuation_items)
+ is_first_continuation = False
+ if continuation:
+ break
+ continue
+
+ for entry in extract_thread(continuation_items):
+ if not entry:
+ return
+ yield entry
+ continuation = self._extract_continuation({'contents': continuation_items})
+ if continuation:
+ break
+
+ message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
+ if message and not parent and tracker['running_total'] == 0:
+ self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)
+ raise self.CommentsDisabled
+
+ @staticmethod
+ def _generate_comment_continuation(video_id):
+ """
+ Generates initial comment section continuation token from given video id
+ """
+ token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'
+ return base64.b64encode(token.encode()).decode()
+
+ def _get_comments(self, ytcfg, video_id, contents, webpage):
+ """Entry for comment extraction"""
+ def _real_comment_extract(contents):
+ renderer = next((
+ item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
+ if item.get('sectionIdentifier') == 'comment-item-section'), None)
+ yield from self._comment_entries(renderer, ytcfg, video_id)
+
+ max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
+ return itertools.islice(_real_comment_extract(contents), 0, max_comments)
+
+ @staticmethod
+ def _get_checkok_params():
+ return {'contentCheckOk': True, 'racyCheckOk': True}
+
+ @classmethod
+ def _generate_player_context(cls, sts=None):
+ context = {
+ 'html5Preference': 'HTML5_PREF_WANTS',
+ }
+ if sts is not None:
+ context['signatureTimestamp'] = sts
+ return {
+ 'playbackContext': {
+ 'contentPlaybackContext': context
+ },
+ **cls._get_checkok_params()
+ }
+
+ @staticmethod
+ def _is_agegated(player_response):
+ if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
+ return True
+
+ reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')))
+ AGE_GATE_REASONS = (
+ 'confirm your age', 'age-restricted', 'inappropriate', # reason
+ 'age_verification_required', 'age_check_required', # status
+ )
+ return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
+
+ @staticmethod
+ def _is_unplayable(player_response):
+ return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
+
+ def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):
+
+ session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
+ syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
+ sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
+ headers = self.generate_api_headers(
+ ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
+
+ yt_query = {
+ 'videoId': video_id,
+ }
+ if _split_innertube_client(client)[0] == 'android':
+ yt_query['params'] = 'CgIQBg=='
+
+ pp_arg = self._configuration_arg('player_params', [None], casesense=True)[0]
+ if pp_arg:
+ yt_query['params'] = pp_arg
+
+ yt_query.update(self._generate_player_context(sts))
+ return self._extract_response(
+ item_id=video_id, ep='player', query=yt_query,
+ ytcfg=player_ytcfg, headers=headers, fatal=True,
+ default_client=client,
+ note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
+ ) or None
+
+ def _get_requested_clients(self, url, smuggled_data):
+ requested_clients = []
+ default = ['ios', 'android', 'web']
+ allowed_clients = sorted(
+ (client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),
+ key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
+ for client in self._configuration_arg('player_client'):
+ if client in allowed_clients:
+ requested_clients.append(client)
+ elif client == 'default':
+ requested_clients.extend(default)
+ elif client == 'all':
+ requested_clients.extend(allowed_clients)
+ else:
+ self.report_warning(f'Skipping unsupported client {client}')
+ if not requested_clients:
+ requested_clients = default
+
+ if smuggled_data.get('is_music_url') or self.is_music_url(url):
+ requested_clients.extend(
+ f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
+
+ return orderedSet(requested_clients)
+
+ def _invalid_player_response(self, pr, video_id):
+ # YouTube may return a different video player response than expected.
+ # See: https://github.com/TeamNewPipe/NewPipe/issues/8713
+ if (pr_id := traverse_obj(pr, ('videoDetails', 'videoId'))) != video_id:
+ return pr_id
+
+ def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
+ initial_pr = None
+ if webpage:
+ initial_pr = self._search_json(
+ self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
+
+ prs = []
+ if initial_pr and not self._invalid_player_response(initial_pr, video_id):
+ # Android player_response does not have microFormats which are needed for
+ # extraction of some data. So we return the initial_pr with formats
+ # stripped out even if not requested by the user
+ # See: https://github.com/yt-dlp/yt-dlp/issues/501
+ prs.append({**initial_pr, 'streamingData': None})
+
+ all_clients = set(clients)
+ clients = clients[::-1]
+
+ def append_client(*client_names):
+ """ Append the first client name that exists but not already used """
+ for client_name in client_names:
+ actual_client = _split_innertube_client(client_name)[0]
+ if actual_client in INNERTUBE_CLIENTS:
+ if actual_client not in all_clients:
+ clients.append(client_name)
+ all_clients.add(actual_client)
+ return
+
+ tried_iframe_fallback = False
+ player_url = None
+ skipped_clients = {}
+ while clients:
+ client, base_client, variant = _split_innertube_client(clients.pop())
+ player_ytcfg = master_ytcfg if client == 'web' else {}
+ if 'configs' not in self._configuration_arg('player_skip') and client != 'web':
+ player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
+
+ player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
+ require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
+ if 'js' in self._configuration_arg('player_skip'):
+ require_js_player = False
+ player_url = None
+
+ if not player_url and not tried_iframe_fallback and require_js_player:
+ player_url = self._download_player_url(video_id)
+ tried_iframe_fallback = True
+
+ try:
+ pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
+ client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)
+ except ExtractorError as e:
+ self.report_warning(e)
+ continue
+
+ if pr_id := self._invalid_player_response(pr, video_id):
+ skipped_clients[client] = pr_id
+ elif pr:
+ # Save client name for introspection later
+ name = short_client_name(client)
+ sd = traverse_obj(pr, ('streamingData', {dict})) or {}
+ sd[STREAMING_DATA_CLIENT_NAME] = name
+ for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
+ f[STREAMING_DATA_CLIENT_NAME] = name
+ prs.append(pr)
+
+ # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
+ if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
+ append_client(f'{base_client}_creator')
+ elif self._is_agegated(pr):
+ if variant == 'tv_embedded':
+ append_client(f'{base_client}_embedded')
+ elif not variant:
+ append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
+
+ if skipped_clients:
+ self.report_warning(
+ f'Skipping player responses from {"/".join(skipped_clients)} clients '
+ f'(got player responses for video "{"/".join(set(skipped_clients.values()))}" instead of "{video_id}")')
+ if not prs:
+ raise ExtractorError(
+ 'All player responses are invalid. Your IP is likely being blocked by Youtube', expected=True)
+ elif not prs:
+ raise ExtractorError('Failed to extract any player response')
+ return prs, player_url
+
+ def _needs_live_processing(self, live_status, duration):
+ if (live_status == 'is_live' and self.get_param('live_from_start')
+ or live_status == 'post_live' and (duration or 0) > 2 * 3600):
+ return live_status
+
+ def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):
+ CHUNK_SIZE = 10 << 20
+ itags, stream_ids = collections.defaultdict(set), []
+ itag_qualities, res_qualities = {}, {0: None}
+ q = qualities([
+ # Normally tiny is the smallest video-only formats. But
+ # audio-only formats with unknown quality may get tagged as tiny
+ 'tiny',
+ 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
+ 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
+ ])
+ streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...))
+ format_types = self._configuration_arg('formats')
+ all_formats = 'duplicate' in format_types
+ if self._configuration_arg('include_duplicate_formats'):
+ all_formats = True
+ self._downloader.deprecated_feature('[youtube] include_duplicate_formats extractor argument is deprecated. '
+ 'Use formats=duplicate extractor argument instead')
+
+ def build_fragments(f):
+ return LazyList({
+ 'url': update_url_query(f['url'], {
+ 'range': f'{range_start}-{min(range_start + CHUNK_SIZE - 1, f["filesize"])}'
+ })
+ } for range_start in range(0, f['filesize'], CHUNK_SIZE))
+
+ for fmt in streaming_formats:
+ if fmt.get('targetDurationSec'):
+ continue
+
+ itag = str_or_none(fmt.get('itag'))
+ audio_track = fmt.get('audioTrack') or {}
+ stream_id = (itag, audio_track.get('id'), fmt.get('isDrc'))
+ if not all_formats:
+ if stream_id in stream_ids:
+ continue
+
+ quality = fmt.get('quality')
+ height = int_or_none(fmt.get('height'))
+ if quality == 'tiny' or not quality:
+ quality = fmt.get('audioQuality', '').lower() or quality
+ # The 3gp format (17) in android client has a quality of "small",
+ # but is actually worse than other formats
+ if itag == '17':
+ quality = 'tiny'
+ if quality:
+ if itag:
+ itag_qualities[itag] = quality
+ if height:
+ res_qualities[height] = quality
+ # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
+ # (adding `&sq=0` to the URL) and parsing emsg box to determine the
+ # number of fragment that would subsequently requested with (`&sq=N`)
+ if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
+ continue
+
+ fmt_url = fmt.get('url')
+ if not fmt_url:
+ sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))
+ fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
+ encrypted_sig = try_get(sc, lambda x: x['s'][0])
+ if not all((sc, fmt_url, player_url, encrypted_sig)):
+ continue
+ try:
+ fmt_url += '&%s=%s' % (
+ traverse_obj(sc, ('sp', -1)) or 'signature',
+ self._decrypt_signature(encrypted_sig, video_id, player_url)
+ )
+ except ExtractorError as e:
+ self.report_warning('Signature extraction failed: Some formats may be missing',
+ video_id=video_id, only_once=True)
+ self.write_debug(e, only_once=True)
+ continue
+
+ query = parse_qs(fmt_url)
+ throttled = False
+ if query.get('n'):
+ try:
+ decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])
+ fmt_url = update_url_query(fmt_url, {
+ 'n': decrypt_nsig(query['n'][0], video_id, player_url)
+ })
+ except ExtractorError as e:
+ phantomjs_hint = ''
+ if isinstance(e, JSInterpreter.Exception):
+ phantomjs_hint = (f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} '
+ f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')
+ if player_url:
+ self.report_warning(
+ f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'
+ f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)
+ self.write_debug(e, only_once=True)
+ else:
+ self.report_warning(
+ 'Cannot decrypt nsig without player_url: You may experience throttling for some formats',
+ video_id=video_id, only_once=True)
+ throttled = True
+
+ tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
+ language_preference = (
+ 10 if audio_track.get('audioIsDefault') and 10
+ else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
+ else -1)
+ # Some formats may have much smaller duration than others (possibly damaged during encoding)
+ # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
+ # Make sure to avoid false positives with small duration differences.
+ # E.g. __2ABJjxzNo, ySuUZEjARPY
+ is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)
+ if is_damaged:
+ self.report_warning(
+ f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
+
+ client_name = fmt.get(STREAMING_DATA_CLIENT_NAME)
+ name = fmt.get('qualityLabel') or quality.replace('audio_quality_', '') or ''
+ fps = int_or_none(fmt.get('fps')) or 0
+ dct = {
+ 'asr': int_or_none(fmt.get('audioSampleRate')),
+ 'filesize': int_or_none(fmt.get('contentLength')),
+ 'format_id': f'{itag}{"-drc" if fmt.get("isDrc") else ""}',
+ 'format_note': join_nonempty(
+ join_nonempty(audio_track.get('displayName'),
+ language_preference > 0 and ' (default)', delim=''),
+ name, fmt.get('isDrc') and 'DRC',
+ try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
+ try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
+ throttled and 'THROTTLED', is_damaged and 'DAMAGED',
+ (self.get_param('verbose') or all_formats) and client_name,
+ delim=', '),
+ # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
+ 'source_preference': ((-10 if throttled else -5 if itag == '22' else -1)
+ + (100 if 'Premium' in name else 0)),
+ 'fps': fps if fps > 1 else None, # For some formats, fps is wrongly returned as 1
+ 'audio_channels': fmt.get('audioChannels'),
+ 'height': height,
+ 'quality': q(quality) - bool(fmt.get('isDrc')) / 2,
+ 'has_drm': bool(fmt.get('drmFamilies')),
+ 'tbr': tbr,
+ 'url': fmt_url,
+ 'width': int_or_none(fmt.get('width')),
+ 'language': join_nonempty(audio_track.get('id', '').split('.')[0],
+ 'desc' if language_preference < -1 else '') or None,
+ 'language_preference': language_preference,
+ # Strictly de-prioritize damaged and 3gp formats
+ 'preference': -10 if is_damaged else -2 if itag == '17' else None,
+ }
+ mime_mobj = re.match(
+ r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
+ if mime_mobj:
+ dct['ext'] = mimetype2ext(mime_mobj.group(1))
+ dct.update(parse_codecs(mime_mobj.group(2)))
+ if itag:
+ itags[itag].add(('https', dct.get('language')))
+ stream_ids.append(stream_id)
+ single_stream = 'none' in (dct.get('acodec'), dct.get('vcodec'))
+ if single_stream and dct.get('ext'):
+ dct['container'] = dct['ext'] + '_dash'
+
+ if (all_formats or 'dashy' in format_types) and dct['filesize']:
+ yield {
+ **dct,
+ 'format_id': f'{dct["format_id"]}-dashy' if all_formats else dct['format_id'],
+ 'protocol': 'http_dash_segments',
+ 'fragments': build_fragments(dct),
+ }
+ if all_formats or 'dashy' not in format_types:
+ dct['downloader_options'] = {'http_chunk_size': CHUNK_SIZE}
+ yield dct
+
+ needs_live_processing = self._needs_live_processing(live_status, duration)
+ skip_bad_formats = 'incomplete' not in format_types
+ if self._configuration_arg('include_incomplete_formats'):
+ skip_bad_formats = False
+ self._downloader.deprecated_feature('[youtube] include_incomplete_formats extractor argument is deprecated. '
+ 'Use formats=incomplete extractor argument instead')
+
+ skip_manifests = set(self._configuration_arg('skip'))
+ if (not self.get_param('youtube_include_hls_manifest', True)
+ or needs_live_processing == 'is_live' # These will be filtered out by YoutubeDL anyway
+ or needs_live_processing and skip_bad_formats):
+ skip_manifests.add('hls')
+
+ if not self.get_param('youtube_include_dash_manifest', True):
+ skip_manifests.add('dash')
+ if self._configuration_arg('include_live_dash'):
+ self._downloader.deprecated_feature('[youtube] include_live_dash extractor argument is deprecated. '
+ 'Use formats=incomplete extractor argument instead')
+ elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':
+ skip_manifests.add('dash')
+
+ def process_manifest_format(f, proto, client_name, itag):
+ key = (proto, f.get('language'))
+ if not all_formats and key in itags[itag]:
+ return False
+ itags[itag].add(key)
+
+ if itag and all_formats:
+ f['format_id'] = f'{itag}-{proto}'
+ elif any(p != proto for p, _ in itags[itag]):
+ f['format_id'] = f'{itag}-{proto}'
+ elif itag:
+ f['format_id'] = itag
+
+ if f.get('source_preference') is None:
+ f['source_preference'] = -1
+
+ if itag in ('616', '235'):
+ f['format_note'] = join_nonempty(f.get('format_note'), 'Premium', delim=' ')
+ f['source_preference'] += 100
+
+ f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
+ if f['quality'] == -1 and f.get('height'):
+ f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
+ if self.get_param('verbose') or all_formats:
+ f['format_note'] = join_nonempty(f.get('format_note'), client_name, delim=', ')
+ if f.get('fps') and f['fps'] <= 1:
+ del f['fps']
+
+ if proto == 'hls' and f.get('has_drm'):
+ f['has_drm'] = 'maybe'
+ f['source_preference'] -= 5
+ return True
+
+ subtitles = {}
+ for sd in streaming_data:
+ client_name = sd.get(STREAMING_DATA_CLIENT_NAME)
+
+ hls_manifest_url = 'hls' not in skip_manifests and sd.get('hlsManifestUrl')
+ if hls_manifest_url:
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ hls_manifest_url, video_id, 'mp4', fatal=False, live=live_status == 'is_live')
+ subtitles = self._merge_subtitles(subs, subtitles)
+ for f in fmts:
+ if process_manifest_format(f, 'hls', client_name, self._search_regex(
+ r'/itag/(\d+)', f['url'], 'itag', default=None)):
+ yield f
+
+ dash_manifest_url = 'dash' not in skip_manifests and sd.get('dashManifestUrl')
+ if dash_manifest_url:
+ formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
+ subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
+ for f in formats:
+ if process_manifest_format(f, 'dash', client_name, f['format_id']):
+ f['filesize'] = int_or_none(self._search_regex(
+ r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
+ if needs_live_processing:
+ f['is_from_start'] = True
+
+ yield f
+ yield subtitles
+
+ def _extract_storyboard(self, player_responses, duration):
+ spec = get_first(
+ player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
+ base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))
+ if not base_url:
+ return
+ L = len(spec) - 1
+ for i, args in enumerate(spec):
+ args = args.split('#')
+ counts = list(map(int_or_none, args[:5]))
+ if len(args) != 8 or not all(counts):
+ self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
+ continue
+ width, height, frame_count, cols, rows = counts
+ N, sigh = args[6:]
+
+ url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
+ fragment_count = frame_count / (cols * rows)
+ fragment_duration = duration / fragment_count
+ yield {
+ 'format_id': f'sb{i}',
+ 'format_note': 'storyboard',
+ 'ext': 'mhtml',
+ 'protocol': 'mhtml',
+ 'acodec': 'none',
+ 'vcodec': 'none',
+ 'url': url,
+ 'width': width,
+ 'height': height,
+ 'fps': frame_count / duration,
+ 'rows': rows,
+ 'columns': cols,
+ 'fragments': [{
+ 'url': url.replace('$M', str(j)),
+ 'duration': min(fragment_duration, duration - (j * fragment_duration)),
+ } for j in range(math.ceil(fragment_count))],
+ }
+
+ def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
+ webpage = None
+ if 'webpage' not in self._configuration_arg('player_skip'):
+ query = {'bpctr': '9999999999', 'has_verified': '1'}
+ pp = self._configuration_arg('player_params', [None], casesense=True)[0]
+ if pp:
+ query['pp'] = pp
+ webpage = self._download_webpage(
+ webpage_url, video_id, fatal=False, query=query)
+
+ master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
+
+ player_responses, player_url = self._extract_player_responses(
+ self._get_requested_clients(url, smuggled_data),
+ video_id, webpage, master_ytcfg, smuggled_data)
+
+ return webpage, master_ytcfg, player_responses, player_url
+
+ def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):
+ live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
+ is_live = get_first(video_details, 'isLive')
+ if is_live is None:
+ is_live = get_first(live_broadcast_details, 'isLiveNow')
+ live_content = get_first(video_details, 'isLiveContent')
+ is_upcoming = get_first(video_details, 'isUpcoming')
+ post_live = get_first(video_details, 'isPostLiveDvr')
+ live_status = ('post_live' if post_live
+ else 'is_live' if is_live
+ else 'is_upcoming' if is_upcoming
+ else 'was_live' if live_content
+ else 'not_live' if False in (is_live, live_content)
+ else None)
+ streaming_data = traverse_obj(player_responses, (..., 'streamingData'))
+ *formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, live_status, duration)
+ if all(f.get('has_drm') for f in formats):
+ # If there are no formats that definitely don't have DRM, all have DRM
+ for f in formats:
+ f['has_drm'] = True
+
+ return live_broadcast_details, live_status, streaming_data, formats, subtitles
+
+ def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+ video_id = self._match_id(url)
+
+ base_url = self.http_scheme() + '//www.youtube.com/'
+ webpage_url = base_url + 'watch?v=' + video_id
+
+ webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
+
+ playability_statuses = traverse_obj(
+ player_responses, (..., 'playabilityStatus'), expected_type=dict)
+
+ trailer_video_id = get_first(
+ playability_statuses,
+ ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
+ expected_type=str)
+ if trailer_video_id:
+ return self.url_result(
+ trailer_video_id, self.ie_key(), trailer_video_id)
+
+ search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
+ if webpage else (lambda x: None))
+
+ video_details = traverse_obj(player_responses, (..., 'videoDetails'), expected_type=dict)
+ microformats = traverse_obj(
+ player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
+ expected_type=dict)
+
+ translated_title = self._get_text(microformats, (..., 'title'))
+ video_title = (self._preferred_lang and translated_title
+ or get_first(video_details, 'title') # primary
+ or translated_title
+ or search_meta(['og:title', 'twitter:title', 'title']))
+ translated_description = self._get_text(microformats, (..., 'description'))
+ original_description = get_first(video_details, 'shortDescription')
+ video_description = (
+ self._preferred_lang and translated_description
+ # If original description is blank, it will be an empty string.
+ # Do not prefer translated description in this case.
+ or original_description if original_description is not None else translated_description)
+
+ multifeed_metadata_list = get_first(
+ player_responses,
+ ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
+ expected_type=str)
+ if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
+ if self.get_param('noplaylist'):
+ self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
+ else:
+ entries = []
+ feed_ids = []
+ for feed in multifeed_metadata_list.split(','):
+ # Unquote should take place before split on comma (,) since textual
+ # fields may contain comma as well (see
+ # https://github.com/ytdl-org/youtube-dl/issues/8536)
+ feed_data = urllib.parse.parse_qs(
+ urllib.parse.unquote_plus(feed))
+
+ def feed_entry(name):
+ return try_get(
+ feed_data, lambda x: x[name][0], str)
+
+ feed_id = feed_entry('id')
+ if not feed_id:
+ continue
+ feed_title = feed_entry('title')
+ title = video_title
+ if feed_title:
+ title += ' (%s)' % feed_title
+ entries.append({
+ '_type': 'url_transparent',
+ 'ie_key': 'Youtube',
+ 'url': smuggle_url(
+ '%swatch?v=%s' % (base_url, feed_data['id'][0]),
+ {'force_singlefeed': True}),
+ 'title': title,
+ })
+ feed_ids.append(feed_id)
+ self.to_screen(
+ 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
+ % (', '.join(feed_ids), video_id))
+ return self.playlist_result(
+ entries, video_id, video_title, video_description)
+
+ duration = (int_or_none(get_first(video_details, 'lengthSeconds'))
+ or int_or_none(get_first(microformats, 'lengthSeconds'))
+ or parse_duration(search_meta('duration')) or None)
+
+ live_broadcast_details, live_status, streaming_data, formats, automatic_captions = \
+ self._list_formats(video_id, microformats, video_details, player_responses, player_url, duration)
+ if live_status == 'post_live':
+ self.write_debug(f'{video_id}: Video is in Post-Live Manifestless mode')
+
+ if not formats:
+ if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
+ self.report_drm(video_id)
+ pemr = get_first(
+ playability_statuses,
+ ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
+ reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
+ subreason = clean_html(self._get_text(pemr, 'subreason') or '')
+ if subreason:
+ if subreason == 'The uploader has not made this video available in your country.':
+ countries = get_first(microformats, 'availableCountries')
+ if not countries:
+ regions_allowed = search_meta('regionsAllowed')
+ countries = regions_allowed.split(',') if regions_allowed else None
+ self.raise_geo_restricted(subreason, countries, metadata_available=True)
+ reason += f'. {subreason}'
+ if reason:
+ self.raise_no_formats(reason, expected=True)
+
+ keywords = get_first(video_details, 'keywords', expected_type=list) or []
+ if not keywords and webpage:
+ keywords = [
+ unescapeHTML(m.group('content'))
+ for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
+ for keyword in keywords:
+ if keyword.startswith('yt:stretch='):
+ mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
+ if mobj:
+ # NB: float is intentional for forcing float division
+ w, h = (float(v) for v in mobj.groups())
+ if w > 0 and h > 0:
+ ratio = w / h
+ for f in formats:
+ if f.get('vcodec') != 'none':
+ f['stretched_ratio'] = ratio
+ break
+ thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))
+ thumbnail_url = search_meta(['og:image', 'twitter:image'])
+ if thumbnail_url:
+ thumbnails.append({
+ 'url': thumbnail_url,
+ })
+ original_thumbnails = thumbnails.copy()
+
+ # The best resolution thumbnails sometimes does not appear in the webpage
+ # See: https://github.com/yt-dlp/yt-dlp/issues/340
+ # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
+ thumbnail_names = [
+ # While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants
+ # in resolution, these are not the custom thumbnail. So de-prioritize them
+ 'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',
+ 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'
+ ]
+ n_thumbnail_names = len(thumbnail_names)
+ thumbnails.extend({
+ 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
+ video_id=video_id, name=name, ext=ext,
+ webp='_webp' if ext == 'webp' else '', live='_live' if live_status == 'is_live' else ''),
+ } for name in thumbnail_names for ext in ('webp', 'jpg'))
+ for thumb in thumbnails:
+ i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
+ thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
+ self._remove_duplicate_formats(thumbnails)
+ self._downloader._sort_thumbnails(original_thumbnails)
+
+ category = get_first(microformats, 'category') or search_meta('genre')
+ channel_id = self.ucid_or_none(str_or_none(
+ get_first(video_details, 'channelId')
+ or get_first(microformats, 'externalChannelId')
+ or search_meta('channelId')))
+ owner_profile_url = get_first(microformats, 'ownerProfileUrl')
+
+ live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
+ live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
+ if not duration and live_end_time and live_start_time:
+ duration = live_end_time - live_start_time
+
+ needs_live_processing = self._needs_live_processing(live_status, duration)
+
+ def is_bad_format(fmt):
+ if needs_live_processing and not fmt.get('is_from_start'):
+ return True
+ elif (live_status == 'is_live' and needs_live_processing != 'is_live'
+ and fmt.get('protocol') == 'http_dash_segments'):
+ return True
+
+ for fmt in filter(is_bad_format, formats):
+ fmt['preference'] = (fmt.get('preference') or -1) - 10
+ fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 2 hours)', delim=' ')
+
+ if needs_live_processing:
+ self._prepare_live_from_start_formats(
+ formats, video_id, live_start_time, url, webpage_url, smuggled_data, live_status == 'is_live')
+
+ formats.extend(self._extract_storyboard(player_responses, duration))
+
+ channel_handle = self.handle_from_url(owner_profile_url)
+
+ info = {
+ 'id': video_id,
+ 'title': video_title,
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ # The best thumbnail that we are sure exists. Prevents unnecessary
+ # URL checking if user don't care about getting the best possible thumbnail
+ 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
+ 'description': video_description,
+ 'channel_id': channel_id,
+ 'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s', default=None),
+ 'duration': duration,
+ 'view_count': int_or_none(
+ get_first((video_details, microformats), (..., 'viewCount'))
+ or search_meta('interactionCount')),
+ 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
+ 'age_limit': 18 if (
+ get_first(microformats, 'isFamilySafe') is False
+ or search_meta('isFamilyFriendly') == 'false'
+ or search_meta('og:restrictions:age') == '18+') else 0,
+ 'webpage_url': webpage_url,
+ 'categories': [category] if category else None,
+ 'tags': keywords,
+ 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
+ 'live_status': live_status,
+ 'release_timestamp': live_start_time,
+ '_format_sort_fields': ( # source_preference is lower for throttled/potentially damaged formats
+ 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto')
+ }
+
+ subtitles = {}
+ pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
+ if pctr:
+ def get_lang_code(track):
+ return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
+ or track.get('languageCode'))
+
+ # Converted into dicts to remove duplicates
+ captions = {
+ get_lang_code(sub): sub
+ for sub in traverse_obj(pctr, (..., 'captionTracks', ...))}
+ translation_languages = {
+ lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
+ for lang in traverse_obj(pctr, (..., 'translationLanguages', ...))}
+
+ def process_language(container, base_url, lang_code, sub_name, query):
+ lang_subs = container.setdefault(lang_code, [])
+ for fmt in self._SUBTITLE_FORMATS:
+ query.update({
+ 'fmt': fmt,
+ })
+ lang_subs.append({
+ 'ext': fmt,
+ 'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),
+ 'name': sub_name,
+ })
+
+ # NB: Constructing the full subtitle dictionary is slow
+ get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (
+ self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))
+ for lang_code, caption_track in captions.items():
+ base_url = caption_track.get('baseUrl')
+ orig_lang = parse_qs(base_url).get('lang', [None])[-1]
+ if not base_url:
+ continue
+ lang_name = self._get_text(caption_track, 'name', max_runs=1)
+ if caption_track.get('kind') != 'asr':
+ if not lang_code:
+ continue
+ process_language(
+ subtitles, base_url, lang_code, lang_name, {})
+ if not caption_track.get('isTranslatable'):
+ continue
+ for trans_code, trans_name in translation_languages.items():
+ if not trans_code:
+ continue
+ orig_trans_code = trans_code
+ if caption_track.get('kind') != 'asr' and trans_code != 'und':
+ if not get_translated_subs:
+ continue
+ trans_code += f'-{lang_code}'
+ trans_name += format_field(lang_name, None, ' from %s')
+ if lang_code == f'a-{orig_trans_code}':
+ # Set audio language based on original subtitles
+ for f in formats:
+ if f.get('acodec') != 'none' and not f.get('language'):
+ f['language'] = orig_trans_code
+ # Add an "-orig" label to the original language so that it can be distinguished.
+ # The subs are returned without "-orig" as well for compatibility
+ process_language(
+ automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
+ # Setting tlang=lang returns damaged subtitles.
+ process_language(automatic_captions, base_url, trans_code, trans_name,
+ {} if orig_lang == orig_trans_code else {'tlang': trans_code})
+
+ info['automatic_captions'] = automatic_captions
+ info['subtitles'] = subtitles
+
+ parsed_url = urllib.parse.urlparse(url)
+ for component in [parsed_url.fragment, parsed_url.query]:
+ query = urllib.parse.parse_qs(component)
+ for k, v in query.items():
+ for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
+ d_k += '_time'
+ if d_k not in info and k in s_ks:
+ info[d_k] = parse_duration(query[k][0])
+
+ # Youtube Music Auto-generated description
+ if (video_description or '').strip().endswith('\nAuto-generated by YouTube.'):
+ # XXX: Causes catastrophic backtracking if description has "·"
+ # E.g. https://www.youtube.com/watch?v=DoPaAxMQoiI
+ # Simulating atomic groups: (?P<a>[^xy]+)x => (?=(?P<a>[^xy]+))(?P=a)x
+ # reduces it, but does not fully fix it. https://regex101.com/r/8Ssf2h/2
+ mobj = re.search(
+ r'''(?xs)
+ (?=(?P<track>[^\n·]+))(?P=track)·
+ (?=(?P<artist>[^\n]+))(?P=artist)\n+
+ (?=(?P<album>[^\n]+))(?P=album)\n
+ (?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
+ (?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
+ (.+?\nArtist\s*:\s*
+ (?=(?P<clean_artist>[^\n]+))(?P=clean_artist)\n
+ )?.+\nAuto-generated\ by\ YouTube\.\s*$
+ ''', video_description)
+ if mobj:
+ release_year = mobj.group('release_year')
+ release_date = mobj.group('release_date')
+ if release_date:
+ release_date = release_date.replace('-', '')
+ if not release_year:
+ release_year = release_date[:4]
+ info.update({
+ 'album': mobj.group('album'.strip()),
+ 'artists': ([a] if (a := mobj.group('clean_artist'))
+ else [a.strip() for a in mobj.group('artist').split('·')]),
+ 'track': mobj.group('track').strip(),
+ 'release_date': release_date,
+ 'release_year': int_or_none(release_year),
+ })
+
+ initial_data = None
+ if webpage:
+ initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)
+ if not traverse_obj(initial_data, 'contents'):
+ self.report_warning('Incomplete data received in embedded initial data; re-fetching using API.')
+ initial_data = None
+ if not initial_data:
+ query = {'videoId': video_id}
+ query.update(self._get_checkok_params())
+ initial_data = self._extract_response(
+ item_id=video_id, ep='next', fatal=False,
+ ytcfg=master_ytcfg, query=query, check_get_keys='contents',
+ headers=self.generate_api_headers(ytcfg=master_ytcfg),
+ note='Downloading initial data API JSON')
+
+ info['comment_count'] = traverse_obj(initial_data, (
+ 'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',
+ 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount'
+ ), (
+ 'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',
+ 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo'
+ ), expected_type=self._get_count, get_all=False)
+
+ try: # This will error if there is no livechat
+ initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
+ except (KeyError, IndexError, TypeError):
+ pass
+ else:
+ info.setdefault('subtitles', {})['live_chat'] = [{
+ # url is needed to set cookies
+ 'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',
+ 'video_id': video_id,
+ 'ext': 'json',
+ 'protocol': ('youtube_live_chat' if live_status in ('is_live', 'is_upcoming')
+ else 'youtube_live_chat_replay'),
+ }]
+
+ if initial_data:
+ info['chapters'] = (
+ self._extract_chapters_from_json(initial_data, duration)
+ or self._extract_chapters_from_engagement_panel(initial_data, duration)
+ or self._extract_chapters_from_description(video_description, duration)
+ or None)
+
+ info['heatmap'] = self._extract_heatmap(initial_data)
+
+ contents = traverse_obj(
+ initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
+ expected_type=list, default=[])
+
+ vpir = get_first(contents, 'videoPrimaryInfoRenderer')
+ if vpir:
+ stl = vpir.get('superTitleLink')
+ if stl:
+ stl = self._get_text(stl)
+ if try_get(
+ vpir,
+ lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
+ info['location'] = stl
+ else:
+ mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)
+ if mobj:
+ info.update({
+ 'series': mobj.group(1),
+ 'season_number': int(mobj.group(2)),
+ 'episode_number': int(mobj.group(3)),
+ })
+ for tlb in (try_get(
+ vpir,
+ lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
+ list) or []):
+ tbrs = variadic(
+ traverse_obj(
+ tlb, ('toggleButtonRenderer', ...),
+ ('segmentedLikeDislikeButtonRenderer', ..., 'toggleButtonRenderer')))
+ for tbr in tbrs:
+ for getter, regex in [(
+ lambda x: x['defaultText']['accessibility']['accessibilityData'],
+ r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
+ lambda x: x['accessibility'],
+ lambda x: x['accessibilityData']['accessibilityData'],
+ ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
+ label = (try_get(tbr, getter, dict) or {}).get('label')
+ if label:
+ mobj = re.match(regex, label)
+ if mobj:
+ info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
+ break
+
+ info['like_count'] = traverse_obj(vpir, (
+ 'videoActions', 'menuRenderer', 'topLevelButtons', ...,
+ 'segmentedLikeDislikeButtonViewModel', 'likeButtonViewModel', 'likeButtonViewModel',
+ 'toggleButtonViewModel', 'toggleButtonViewModel', 'defaultButtonViewModel',
+ 'buttonViewModel', 'accessibilityText', {parse_count}), get_all=False)
+
+ vcr = traverse_obj(vpir, ('viewCount', 'videoViewCountRenderer'))
+ if vcr:
+ vc = self._get_count(vcr, 'viewCount')
+ # Upcoming premieres with waiting count are treated as live here
+ if vcr.get('isLive'):
+ info['concurrent_view_count'] = vc
+ elif info.get('view_count') is None:
+ info['view_count'] = vc
+
+ vsir = get_first(contents, 'videoSecondaryInfoRenderer')
+ if vsir:
+ vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))
+ info.update({
+ 'channel': self._get_text(vor, 'title'),
+ 'channel_follower_count': self._get_count(vor, 'subscriberCountText')})
+
+ if not channel_handle:
+ channel_handle = self.handle_from_url(
+ traverse_obj(vor, (
+ ('navigationEndpoint', ('title', 'runs', ..., 'navigationEndpoint')),
+ (('commandMetadata', 'webCommandMetadata', 'url'), ('browseEndpoint', 'canonicalBaseUrl')),
+ {str}), get_all=False))
+
+ rows = try_get(
+ vsir,
+ lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
+ list) or []
+ multiple_songs = False
+ for row in rows:
+ if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
+ multiple_songs = True
+ break
+ for row in rows:
+ mrr = row.get('metadataRowRenderer') or {}
+ mrr_title = mrr.get('title')
+ if not mrr_title:
+ continue
+ mrr_title = self._get_text(mrr, 'title')
+ mrr_contents_text = self._get_text(mrr, ('contents', 0))
+ if mrr_title == 'License':
+ info['license'] = mrr_contents_text
+ elif not multiple_songs:
+ if mrr_title == 'Album':
+ info['album'] = mrr_contents_text
+ elif mrr_title == 'Artist':
+ info['artists'] = [mrr_contents_text] if mrr_contents_text else None
+ elif mrr_title == 'Song':
+ info['track'] = mrr_contents_text
+ owner_badges = self._extract_badges(traverse_obj(vsir, ('owner', 'videoOwnerRenderer', 'badges')))
+ if self._has_badge(owner_badges, BadgeType.VERIFIED):
+ info['channel_is_verified'] = True
+
+ info.update({
+ 'uploader': info.get('channel'),
+ 'uploader_id': channel_handle,
+ 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
+ })
+ # The upload date for scheduled, live and past live streams / premieres in microformats
+ # may be different from the stream date. Although not in UTC, we will prefer it in this case.
+ # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139
+ upload_date = (
+ unified_strdate(get_first(microformats, 'uploadDate'))
+ or unified_strdate(search_meta('uploadDate')))
+ if not upload_date or (
+ live_status in ('not_live', None)
+ and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', [])
+ ):
+ upload_date = strftime_or_none(
+ self._parse_time_text(self._get_text(vpir, 'dateText'))) or upload_date
+ info['upload_date'] = upload_date
+
+ if upload_date and live_status not in ('is_live', 'post_live', 'is_upcoming'):
+ # Newly uploaded videos' HLS formats are potentially problematic and need to be checked
+ upload_datetime = datetime_from_str(upload_date).replace(tzinfo=datetime.timezone.utc)
+ if upload_datetime >= datetime_from_str('today-2days'):
+ for fmt in info['formats']:
+ if fmt.get('protocol') == 'm3u8_native':
+ fmt['__needs_testing'] = True
+
+ for s_k, d_k in [('artists', 'creators'), ('track', 'alt_title')]:
+ v = info.get(s_k)
+ if v:
+ info[d_k] = v
+
+ badges = self._extract_badges(traverse_obj(vpir, 'badges'))
+
+ is_private = (self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
+ or get_first(video_details, 'isPrivate', expected_type=bool))
+
+ info['availability'] = (
+ 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
+ else self._availability(
+ is_private=is_private,
+ needs_premium=(
+ self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM)
+ or False if initial_data and is_private is not None else None),
+ needs_subscription=(
+ self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION)
+ or False if initial_data and is_private is not None else None),
+ needs_auth=info['age_limit'] >= 18,
+ is_unlisted=None if is_private is None else (
+ self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
+ or get_first(microformats, 'isUnlisted', expected_type=bool))))
+
+ info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
+
+ self.mark_watched(video_id, player_responses)
+
+ return info
+
+
+class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
+ @staticmethod
+ def passthrough_smuggled_data(func):
+ def _smuggle(info, smuggled_data):
+ if info.get('_type') not in ('url', 'url_transparent'):
+ return info
+ if smuggled_data.get('is_music_url'):
+ parsed_url = urllib.parse.urlparse(info['url'])
+ if parsed_url.netloc in ('www.youtube.com', 'music.youtube.com'):
+ smuggled_data.pop('is_music_url')
+ info['url'] = urllib.parse.urlunparse(parsed_url._replace(netloc='music.youtube.com'))
+ if smuggled_data:
+ info['url'] = smuggle_url(info['url'], smuggled_data)
+ return info
+
+ @functools.wraps(func)
+ def wrapper(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+ if self.is_music_url(url):
+ smuggled_data['is_music_url'] = True
+ info_dict = func(self, url, smuggled_data)
+ if smuggled_data:
+ _smuggle(info_dict, smuggled_data)
+ if info_dict.get('entries'):
+ info_dict['entries'] = (_smuggle(i, smuggled_data.copy()) for i in info_dict['entries'])
+ return info_dict
+ return wrapper
+
+ @staticmethod
+ def _extract_basic_item_renderer(item):
+ # Modified from _extract_grid_item_renderer
+ known_basic_renderers = (
+ 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'
+ )
+ for key, renderer in item.items():
+ if not isinstance(renderer, dict):
+ continue
+ elif key in known_basic_renderers:
+ return renderer
+ elif key.startswith('grid') and key.endswith('Renderer'):
+ return renderer
+
+ def _extract_channel_renderer(self, renderer):
+ channel_id = self.ucid_or_none(renderer['channelId'])
+ title = self._get_text(renderer, 'title')
+ channel_url = format_field(channel_id, None, 'https://www.youtube.com/channel/%s', default=None)
+ channel_handle = self.handle_from_url(
+ traverse_obj(renderer, (
+ 'navigationEndpoint', (('commandMetadata', 'webCommandMetadata', 'url'),
+ ('browseEndpoint', 'canonicalBaseUrl')),
+ {str}), get_all=False))
+ if not channel_handle:
+ # As of 2023-06-01, YouTube sets subscriberCountText to the handle in search
+ channel_handle = self.handle_or_none(self._get_text(renderer, 'subscriberCountText'))
+ return {
+ '_type': 'url',
+ 'url': channel_url,
+ 'id': channel_id,
+ 'ie_key': YoutubeTabIE.ie_key(),
+ 'channel': title,
+ 'uploader': title,
+ 'channel_id': channel_id,
+ 'channel_url': channel_url,
+ 'title': title,
+ 'uploader_id': channel_handle,
+ 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
+ # See above. YouTube sets videoCountText to the subscriber text in search channel renderers.
+ # However, in feed/channels this is set correctly to the subscriber count
+ 'channel_follower_count': traverse_obj(
+ renderer, 'subscriberCountText', 'videoCountText', expected_type=self._get_count),
+ 'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
+ 'playlist_count': (
+ # videoCountText may be the subscriber count
+ self._get_count(renderer, 'videoCountText')
+ if self._get_count(renderer, 'subscriberCountText') is not None else None),
+ 'description': self._get_text(renderer, 'descriptionSnippet'),
+ 'channel_is_verified': True if self._has_badge(
+ self._extract_badges(traverse_obj(renderer, 'ownerBadges')), BadgeType.VERIFIED) else None,
+ }
+
+ def _grid_entries(self, grid_renderer):
+ for item in grid_renderer['items']:
+ if not isinstance(item, dict):
+ continue
+ renderer = self._extract_basic_item_renderer(item)
+ if not isinstance(renderer, dict):
+ continue
+ title = self._get_text(renderer, 'title')
+
+ # playlist
+ playlist_id = renderer.get('playlistId')
+ if playlist_id:
+ yield self.url_result(
+ 'https://www.youtube.com/playlist?list=%s' % playlist_id,
+ ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
+ video_title=title)
+ continue
+ # video
+ video_id = renderer.get('videoId')
+ if video_id:
+ yield self._extract_video(renderer)
+ continue
+ # channel
+ channel_id = renderer.get('channelId')
+ if channel_id:
+ yield self._extract_channel_renderer(renderer)
+ continue
+ # generic endpoint URL support
+ ep_url = urljoin('https://www.youtube.com/', try_get(
+ renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
+ str))
+ if ep_url:
+ for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
+ if ie.suitable(ep_url):
+ yield self.url_result(
+ ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
+ break
+
+ def _music_reponsive_list_entry(self, renderer):
+ video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))
+ if video_id:
+ title = traverse_obj(renderer, (
+ 'flexColumns', 0, 'musicResponsiveListItemFlexColumnRenderer',
+ 'text', 'runs', 0, 'text'))
+ return self.url_result(f'https://music.youtube.com/watch?v={video_id}',
+ ie=YoutubeIE.ie_key(), video_id=video_id, title=title)
+ playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))
+ if playlist_id:
+ video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))
+ if video_id:
+ return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',
+ ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
+ return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',
+ ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
+ browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))
+ if browse_id:
+ return self.url_result(f'https://music.youtube.com/browse/{browse_id}',
+ ie=YoutubeTabIE.ie_key(), video_id=browse_id)
+
+ def _shelf_entries_from_content(self, shelf_renderer):
+ content = shelf_renderer.get('content')
+ if not isinstance(content, dict):
+ return
+ renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
+ if renderer:
+ # TODO: add support for nested playlists so each shelf is processed
+ # as separate playlist
+ # TODO: this includes only first N items
+ yield from self._grid_entries(renderer)
+ renderer = content.get('horizontalListRenderer')
+ if renderer:
+ # TODO
+ pass
+
+ def _shelf_entries(self, shelf_renderer, skip_channels=False):
+ ep = try_get(
+ shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
+ str)
+ shelf_url = urljoin('https://www.youtube.com', ep)
+ if shelf_url:
+ # Skipping links to another channels, note that checking for
+ # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
+ # will not work
+ if skip_channels and '/channels?' in shelf_url:
+ return
+ title = self._get_text(shelf_renderer, 'title')
+ yield self.url_result(shelf_url, video_title=title)
+ # Shelf may not contain shelf URL, fallback to extraction from content
+ yield from self._shelf_entries_from_content(shelf_renderer)
+
+ def _playlist_entries(self, video_list_renderer):
+ for content in video_list_renderer['contents']:
+ if not isinstance(content, dict):
+ continue
+ renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
+ if not isinstance(renderer, dict):
+ continue
+ video_id = renderer.get('videoId')
+ if not video_id:
+ continue
+ yield self._extract_video(renderer)
+
+ def _rich_entries(self, rich_grid_renderer):
+ renderer = traverse_obj(
+ rich_grid_renderer,
+ ('content', ('videoRenderer', 'reelItemRenderer', 'playlistRenderer')), get_all=False) or {}
+ video_id = renderer.get('videoId')
+ if video_id:
+ yield self._extract_video(renderer)
+ return
+ playlist_id = renderer.get('playlistId')
+ if playlist_id:
+ yield self.url_result(
+ f'https://www.youtube.com/playlist?list={playlist_id}',
+ ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
+ video_title=self._get_text(renderer, 'title'))
+ return
+
+ def _video_entry(self, video_renderer):
+ video_id = video_renderer.get('videoId')
+ if video_id:
+ return self._extract_video(video_renderer)
+
+ def _hashtag_tile_entry(self, hashtag_tile_renderer):
+ url = urljoin('https://youtube.com', traverse_obj(
+ hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))
+ if url:
+ return self.url_result(
+ url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))
+
+ def _post_thread_entries(self, post_thread_renderer):
+ post_renderer = try_get(
+ post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
+ if not post_renderer:
+ return
+ # video attachment
+ video_renderer = try_get(
+ post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
+ video_id = video_renderer.get('videoId')
+ if video_id:
+ entry = self._extract_video(video_renderer)
+ if entry:
+ yield entry
+ # playlist attachment
+ playlist_id = try_get(
+ post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)
+ if playlist_id:
+ yield self.url_result(
+ 'https://www.youtube.com/playlist?list=%s' % playlist_id,
+ ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
+ # inline video links
+ runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
+ for run in runs:
+ if not isinstance(run, dict):
+ continue
+ ep_url = try_get(
+ run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)
+ if not ep_url:
+ continue
+ if not YoutubeIE.suitable(ep_url):
+ continue
+ ep_video_id = YoutubeIE._match_id(ep_url)
+ if video_id == ep_video_id:
+ continue
+ yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
+
+ def _post_thread_continuation_entries(self, post_thread_continuation):
+ contents = post_thread_continuation.get('contents')
+ if not isinstance(contents, list):
+ return
+ for content in contents:
+ renderer = content.get('backstagePostThreadRenderer')
+ if isinstance(renderer, dict):
+ yield from self._post_thread_entries(renderer)
+ continue
+ renderer = content.get('videoRenderer')
+ if isinstance(renderer, dict):
+ yield self._video_entry(renderer)
+
+ r''' # unused
+ def _rich_grid_entries(self, contents):
+ for content in contents:
+ video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
+ if video_renderer:
+ entry = self._video_entry(video_renderer)
+ if entry:
+ yield entry
+ '''
+
+ def _report_history_entries(self, renderer):
+ for url in traverse_obj(renderer, (
+ 'rows', ..., 'reportHistoryTableRowRenderer', 'cells', ...,
+ 'reportHistoryTableCellRenderer', 'cell', 'reportHistoryTableTextCellRenderer', 'text', 'runs', ...,
+ 'navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url')):
+ yield self.url_result(urljoin('https://www.youtube.com', url), YoutubeIE)
+
+ def _extract_entries(self, parent_renderer, continuation_list):
+ # continuation_list is modified in-place with continuation_list = [continuation_token]
+ continuation_list[:] = [None]
+ contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
+ for content in contents:
+ if not isinstance(content, dict):
+ continue
+ is_renderer = traverse_obj(
+ content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',
+ expected_type=dict)
+ if not is_renderer:
+ if content.get('richItemRenderer'):
+ for entry in self._rich_entries(content['richItemRenderer']):
+ yield entry
+ continuation_list[0] = self._extract_continuation(parent_renderer)
+ elif content.get('reportHistorySectionRenderer'): # https://www.youtube.com/reporthistory
+ table = traverse_obj(content, ('reportHistorySectionRenderer', 'table', 'tableRenderer'))
+ yield from self._report_history_entries(table)
+ continuation_list[0] = self._extract_continuation(table)
+ continue
+
+ isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
+ for isr_content in isr_contents:
+ if not isinstance(isr_content, dict):
+ continue
+
+ known_renderers = {
+ 'playlistVideoListRenderer': self._playlist_entries,
+ 'gridRenderer': self._grid_entries,
+ 'reelShelfRenderer': self._grid_entries,
+ 'shelfRenderer': self._shelf_entries,
+ 'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],
+ 'backstagePostThreadRenderer': self._post_thread_entries,
+ 'videoRenderer': lambda x: [self._video_entry(x)],
+ 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
+ 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
+ 'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)],
+ 'richGridRenderer': lambda x: self._extract_entries(x, continuation_list),
+ }
+ for key, renderer in isr_content.items():
+ if key not in known_renderers:
+ continue
+ for entry in known_renderers[key](renderer):
+ if entry:
+ yield entry
+ continuation_list[0] = self._extract_continuation(renderer)
+ break
+
+ if not continuation_list[0]:
+ continuation_list[0] = self._extract_continuation(is_renderer)
+
+ if not continuation_list[0]:
+ continuation_list[0] = self._extract_continuation(parent_renderer)
+
+ def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
+ continuation_list = [None]
+ extract_entries = lambda x: self._extract_entries(x, continuation_list)
+ tab_content = try_get(tab, lambda x: x['content'], dict)
+ if not tab_content:
+ return
+ parent_renderer = (
+ try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
+ or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
+ yield from extract_entries(parent_renderer)
+ continuation = continuation_list[0]
+ seen_continuations = set()
+ for page_num in itertools.count(1):
+ if not continuation:
+ break
+ continuation_token = continuation.get('continuation')
+ if continuation_token is not None and continuation_token in seen_continuations:
+ self.write_debug('Detected YouTube feed looping - assuming end of feed.')
+ break
+ seen_continuations.add(continuation_token)
+ headers = self.generate_api_headers(
+ ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
+ response = self._extract_response(
+ item_id=f'{item_id} page {page_num}',
+ query=continuation, headers=headers, ytcfg=ytcfg,
+ check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
+
+ if not response:
+ break
+ # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
+ # See: https://github.com/ytdl-org/youtube-dl/issues/28702
+ visitor_data = self._extract_visitor_data(response) or visitor_data
+
+ known_renderers = {
+ 'videoRenderer': (self._grid_entries, 'items'), # for membership tab
+ 'gridPlaylistRenderer': (self._grid_entries, 'items'),
+ 'gridVideoRenderer': (self._grid_entries, 'items'),
+ 'gridChannelRenderer': (self._grid_entries, 'items'),
+ 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
+ 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
+ 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
+ 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents'),
+ 'reportHistoryTableRowRenderer': (self._report_history_entries, 'rows'),
+ 'playlistVideoListContinuation': (self._playlist_entries, None),
+ 'gridContinuation': (self._grid_entries, None),
+ 'itemSectionContinuation': (self._post_thread_continuation_entries, None),
+ 'sectionListContinuation': (extract_entries, None), # for feeds
+ }
+
+ continuation_items = traverse_obj(response, (
+ ('onResponseReceivedActions', 'onResponseReceivedEndpoints'), ...,
+ 'appendContinuationItemsAction', 'continuationItems'
+ ), 'continuationContents', get_all=False)
+ continuation_item = traverse_obj(continuation_items, 0, None, expected_type=dict, default={})
+
+ video_items_renderer = None
+ for key in continuation_item.keys():
+ if key not in known_renderers:
+ continue
+ func, parent_key = known_renderers[key]
+ video_items_renderer = {parent_key: continuation_items} if parent_key else continuation_items
+ continuation_list = [None]
+ yield from func(video_items_renderer)
+ continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
+
+ if not video_items_renderer:
+ break
+
+ @staticmethod
+ def _extract_selected_tab(tabs, fatal=True):
+ for tab_renderer in tabs:
+ if tab_renderer.get('selected'):
+ return tab_renderer
+ if fatal:
+ raise ExtractorError('Unable to find selected tab')
+
+ @staticmethod
+ def _extract_tab_renderers(response):
+ return traverse_obj(
+ response, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., ('tabRenderer', 'expandableTabRenderer')), expected_type=dict)
+
+ def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
+ metadata = self._extract_metadata_from_tabs(item_id, data)
+
+ selected_tab = self._extract_selected_tab(tabs)
+ metadata['title'] += format_field(selected_tab, 'title', ' - %s')
+ metadata['title'] += format_field(selected_tab, 'expandedText', ' - %s')
+
+ return self.playlist_result(
+ self._entries(
+ selected_tab, metadata['id'], ytcfg,
+ self._extract_account_syncid(ytcfg, data),
+ self._extract_visitor_data(data, ytcfg)),
+ **metadata)
+
+ def _extract_metadata_from_tabs(self, item_id, data):
+ info = {'id': item_id}
+
+ metadata_renderer = traverse_obj(data, ('metadata', 'channelMetadataRenderer'), expected_type=dict)
+ if metadata_renderer:
+ channel_id = traverse_obj(metadata_renderer, ('externalId', {self.ucid_or_none}),
+ ('channelUrl', {self.ucid_from_url}))
+ info.update({
+ 'channel': metadata_renderer.get('title'),
+ 'channel_id': channel_id,
+ })
+ if info['channel_id']:
+ info['id'] = info['channel_id']
+ else:
+ metadata_renderer = traverse_obj(data, ('metadata', 'playlistMetadataRenderer'), expected_type=dict)
+
+ # We can get the uncropped banner/avatar by replacing the crop params with '=s0'
+ # See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714
+ def _get_uncropped(url):
+ return url_or_none((url or '').split('=')[0] + '=s0')
+
+ avatar_thumbnails = self._extract_thumbnails(metadata_renderer, 'avatar')
+ if avatar_thumbnails:
+ uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])
+ if uncropped_avatar:
+ avatar_thumbnails.append({
+ 'url': uncropped_avatar,
+ 'id': 'avatar_uncropped',
+ 'preference': 1
+ })
+
+ channel_banners = self._extract_thumbnails(
+ data, ('header', ..., ('banner', 'mobileBanner', 'tvBanner')))
+ for banner in channel_banners:
+ banner['preference'] = -10
+
+ if channel_banners:
+ uncropped_banner = _get_uncropped(channel_banners[0]['url'])
+ if uncropped_banner:
+ channel_banners.append({
+ 'url': uncropped_banner,
+ 'id': 'banner_uncropped',
+ 'preference': -5
+ })
+
+ # Deprecated - remove primary_sidebar_renderer when layout discontinued
+ primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
+ playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer'), expected_type=dict)
+
+ primary_thumbnails = self._extract_thumbnails(
+ primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))
+ playlist_thumbnails = self._extract_thumbnails(
+ playlist_header_renderer, ('playlistHeaderBanner', 'heroPlaylistThumbnailRenderer', 'thumbnail'))
+
+ info.update({
+ 'title': (traverse_obj(metadata_renderer, 'title')
+ or self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag'))
+ or info['id']),
+ 'availability': self._extract_availability(data),
+ 'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),
+ 'description': try_get(metadata_renderer, lambda x: x.get('description', '')),
+ 'tags': (traverse_obj(data, ('microformat', 'microformatDataRenderer', 'tags', ..., {str}))
+ or traverse_obj(metadata_renderer, ('keywords', {lambda x: x and shlex.split(x)}, ...))),
+ 'thumbnails': (primary_thumbnails or playlist_thumbnails) + avatar_thumbnails + channel_banners,
+ })
+
+ channel_handle = (
+ traverse_obj(metadata_renderer, (('vanityChannelUrl', ('ownerUrls', ...)), {self.handle_from_url}), get_all=False)
+ or traverse_obj(data, ('header', ..., 'channelHandleText', {self.handle_or_none}), get_all=False))
+
+ if channel_handle:
+ info.update({
+ 'uploader_id': channel_handle,
+ 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
+ })
+
+ channel_badges = self._extract_badges(traverse_obj(data, ('header', ..., 'badges'), get_all=False))
+ if self._has_badge(channel_badges, BadgeType.VERIFIED):
+ info['channel_is_verified'] = True
+ # Playlist stats is a text runs array containing [video count, view count, last updated].
+ # last updated or (view count and last updated) may be missing.
+ playlist_stats = get_first(
+ (primary_sidebar_renderer, playlist_header_renderer), (('stats', 'briefStats', 'numVideosText'), ))
+
+ last_updated_unix = self._parse_time_text(
+ self._get_text(playlist_stats, 2) # deprecated, remove when old layout discontinued
+ or self._get_text(playlist_header_renderer, ('byline', 1, 'playlistBylineRenderer', 'text')))
+ info['modified_date'] = strftime_or_none(last_updated_unix)
+
+ info['view_count'] = self._get_count(playlist_stats, 1)
+ if info['view_count'] is None: # 0 is allowed
+ info['view_count'] = self._get_count(playlist_header_renderer, 'viewCountText')
+ if info['view_count'] is None:
+ info['view_count'] = self._get_count(data, (
+ 'contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., 'tabRenderer', 'content', 'sectionListRenderer',
+ 'contents', ..., 'itemSectionRenderer', 'contents', ..., 'channelAboutFullMetadataRenderer', 'viewCountText'))
+
+ info['playlist_count'] = self._get_count(playlist_stats, 0)
+ if info['playlist_count'] is None: # 0 is allowed
+ info['playlist_count'] = self._get_count(playlist_header_renderer, ('byline', 0, 'playlistBylineRenderer', 'text'))
+
+ if not info.get('channel_id'):
+ owner = traverse_obj(playlist_header_renderer, 'ownerText')
+ if not owner: # Deprecated
+ owner = traverse_obj(
+ self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer'),
+ ('videoOwner', 'videoOwnerRenderer', 'title'))
+ owner_text = self._get_text(owner)
+ browse_ep = traverse_obj(owner, ('runs', 0, 'navigationEndpoint', 'browseEndpoint')) or {}
+ info.update({
+ 'channel': self._search_regex(r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text),
+ 'channel_id': self.ucid_or_none(browse_ep.get('browseId')),
+ 'uploader_id': self.handle_from_url(urljoin('https://www.youtube.com', browse_ep.get('canonicalBaseUrl')))
+ })
+
+ info.update({
+ 'uploader': info['channel'],
+ 'channel_url': format_field(info.get('channel_id'), None, 'https://www.youtube.com/channel/%s', default=None),
+ 'uploader_url': format_field(info.get('uploader_id'), None, 'https://www.youtube.com/%s', default=None),
+ })
+
+ return info
+
+ def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):
+ first_id = last_id = response = None
+ for page_num in itertools.count(1):
+ videos = list(self._playlist_entries(playlist))
+ if not videos:
+ return
+ start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
+ if start >= len(videos):
+ return
+ yield from videos[start:]
+ first_id = first_id or videos[0]['id']
+ last_id = videos[-1]['id']
+ watch_endpoint = try_get(
+ playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
+ headers = self.generate_api_headers(
+ ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
+ visitor_data=self._extract_visitor_data(response, data, ytcfg))
+ query = {
+ 'playlistId': playlist_id,
+ 'videoId': watch_endpoint.get('videoId') or last_id,
+ 'index': watch_endpoint.get('index') or len(videos),
+ 'params': watch_endpoint.get('params') or 'OAE%3D'
+ }
+ response = self._extract_response(
+ item_id='%s page %d' % (playlist_id, page_num),
+ query=query, ep='next', headers=headers, ytcfg=ytcfg,
+ check_get_keys='contents'
+ )
+ playlist = try_get(
+ response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
+
+ def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
+ title = playlist.get('title') or try_get(
+ data, lambda x: x['titleText']['simpleText'], str)
+ playlist_id = playlist.get('playlistId') or item_id
+
+ # Delegating everything except mix playlists to regular tab-based playlist URL
+ playlist_url = urljoin(url, try_get(
+ playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
+ str))
+
+ # Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]
+ # [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg
+ is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)
+
+ if playlist_url and playlist_url != url and not is_known_unviewable:
+ return self.url_result(
+ playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
+ video_title=title)
+
+ return self.playlist_result(
+ self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),
+ playlist_id=playlist_id, playlist_title=title)
+
+ def _extract_availability(self, data):
+ """
+ Gets the availability of a given playlist/tab.
+ Note: Unless YouTube tells us explicitly, we do not assume it is public
+ @param data: response
+ """
+ sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
+ playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer')) or {}
+ player_header_privacy = playlist_header_renderer.get('privacy')
+
+ badges = self._extract_badges(traverse_obj(sidebar_renderer, 'badges'))
+
+ # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
+ privacy_setting_icon = get_first(
+ (playlist_header_renderer, sidebar_renderer),
+ ('privacyForm', 'dropdownFormFieldRenderer', 'dropdown', 'dropdownRenderer', 'entries',
+ lambda _, v: v['privacyDropdownItemRenderer']['isSelected'], 'privacyDropdownItemRenderer', 'icon', 'iconType'),
+ expected_type=str)
+
+ microformats_is_unlisted = traverse_obj(
+ data, ('microformat', 'microformatDataRenderer', 'unlisted'), expected_type=bool)
+
+ return (
+ 'public' if (
+ self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
+ or player_header_privacy == 'PUBLIC'
+ or privacy_setting_icon == 'PRIVACY_PUBLIC')
+ else self._availability(
+ is_private=(
+ self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
+ or player_header_privacy == 'PRIVATE' if player_header_privacy is not None
+ else privacy_setting_icon == 'PRIVACY_PRIVATE' if privacy_setting_icon is not None else None),
+ is_unlisted=(
+ self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
+ or player_header_privacy == 'UNLISTED' if player_header_privacy is not None
+ else privacy_setting_icon == 'PRIVACY_UNLISTED' if privacy_setting_icon is not None
+ else microformats_is_unlisted if microformats_is_unlisted is not None else None),
+ needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
+ needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
+ needs_auth=False))
+
+ @staticmethod
+ def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
+ sidebar_renderer = try_get(
+ data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
+ for item in sidebar_renderer:
+ renderer = try_get(item, lambda x: x[info_renderer], expected_type)
+ if renderer:
+ return renderer
+
+ def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
+ """
+ Reload playlists with unavailable videos (e.g. private videos, region blocked, etc.)
+ """
+ is_playlist = bool(traverse_obj(
+ data, ('metadata', 'playlistMetadataRenderer'), ('header', 'playlistHeaderRenderer')))
+ if not is_playlist:
+ return
+ headers = self.generate_api_headers(
+ ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
+ visitor_data=self._extract_visitor_data(data, ytcfg))
+ query = {
+ 'params': 'wgYCCAA=',
+ 'browseId': f'VL{item_id}'
+ }
+ return self._extract_response(
+ item_id=item_id, headers=headers, query=query,
+ check_get_keys='contents', fatal=False, ytcfg=ytcfg,
+ note='Redownloading playlist API JSON with unavailable videos')
+
+ @functools.cached_property
+ def skip_webpage(self):
+ return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())
+
+ def _extract_webpage(self, url, item_id, fatal=True):
+ webpage, data = None, None
+ for retry in self.RetryManager(fatal=fatal):
+ try:
+ webpage = self._download_webpage(url, item_id, note='Downloading webpage')
+ data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
+ except ExtractorError as e:
+ if isinstance(e.cause, network_exceptions):
+ if not isinstance(e.cause, HTTPError) or e.cause.status not in (403, 429):
+ retry.error = e
+ continue
+ self._error_or_warning(e, fatal=fatal)
+ break
+
+ try:
+ self._extract_and_report_alerts(data)
+ except ExtractorError as e:
+ self._error_or_warning(e, fatal=fatal)
+ break
+
+ # Sometimes youtube returns a webpage with incomplete ytInitialData
+ # See: https://github.com/yt-dlp/yt-dlp/issues/116
+ if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):
+ retry.error = ExtractorError('Incomplete yt initial data received')
+ data = None
+ continue
+
+ return webpage, data
+
+ def _report_playlist_authcheck(self, ytcfg, fatal=True):
+ """Use if failed to extract ytcfg (and data) from initial webpage"""
+ if not ytcfg and self.is_authenticated:
+ msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'
+ if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:
+ raise ExtractorError(
+ f'{msg}. If you are not downloading private content, or '
+ 'your cookies are only for the first account and channel,'
+ ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
+ expected=True)
+ self.report_warning(msg, only_once=True)
+
+ def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
+ data = None
+ if not self.skip_webpage:
+ webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
+ ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
+ # Reject webpage data if redirected to home page without explicitly requesting
+ selected_tab = self._extract_selected_tab(self._extract_tab_renderers(data), fatal=False) or {}
+ if (url != 'https://www.youtube.com/feed/recommended'
+ and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page
+ and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):
+ msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'
+ if fatal:
+ raise ExtractorError(msg, expected=True)
+ self.report_warning(msg, only_once=True)
+ if not data:
+ self._report_playlist_authcheck(ytcfg, fatal=fatal)
+ data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
+ return data, ytcfg
+
+ def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
+ headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
+ resolve_response = self._extract_response(
+ item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
+ ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
+ endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
+ for ep_key, ep in endpoints.items():
+ params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
+ if params:
+ return self._extract_response(
+ item_id=item_id, query=params, ep=ep, headers=headers,
+ ytcfg=ytcfg, fatal=fatal, default_client=default_client,
+ check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))
+ err_note = 'Failed to resolve url (does the playlist exist?)'
+ if fatal:
+ raise ExtractorError(err_note, expected=True)
+ self.report_warning(err_note, item_id)
+
+ _SEARCH_PARAMS = None
+
+ def _search_results(self, query, params=NO_DEFAULT, default_client='web'):
+ data = {'query': query}
+ if params is NO_DEFAULT:
+ params = self._SEARCH_PARAMS
+ if params:
+ data['params'] = params
+
+ content_keys = (
+ ('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),
+ ('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),
+ # ytmusic search
+ ('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),
+ ('continuationContents', ),
+ )
+ display_id = f'query "{query}"'
+ check_get_keys = tuple({keys[0] for keys in content_keys})
+ ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}
+ self._report_playlist_authcheck(ytcfg, fatal=False)
+
+ continuation_list = [None]
+ search = None
+ for page_num in itertools.count(1):
+ data.update(continuation_list[0] or {})
+ headers = self.generate_api_headers(
+ ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)
+ search = self._extract_response(
+ item_id=f'{display_id} page {page_num}', ep='search', query=data,
+ default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)
+ slr_contents = traverse_obj(search, *content_keys)
+ yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)
+ if not continuation_list[0]:
+ break
+
+
+class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
+ IE_DESC = 'YouTube Tabs'
+ _VALID_URL = r'''(?x:
+ https?://
+ (?!consent\.)(?:\w+\.)?
+ (?:
+ youtube(?:kids)?\.com|
+ %(invidious)s
+ )/
+ (?:
+ (?P<channel_type>channel|c|user|browse)/|
+ (?P<not_channel>
+ feed/|hashtag/|
+ (?:playlist|watch)\?.*?\blist=
+ )|
+ (?!(?:%(reserved_names)s)\b) # Direct URLs
+ )
+ (?P<id>[^/?\#&]+)
+ )''' % {
+ 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
+ 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
+ }
+ IE_NAME = 'youtube:tab'
+
+ _TESTS = [{
+ 'note': 'playlists, multipage',
+ 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
+ 'playlist_mincount': 94,
+ 'info_dict': {
+ 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
+ 'title': 'Igor Kleiner Ph.D. - Playlists',
+ 'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',
+ 'uploader': 'Igor Kleiner Ph.D.',
+ 'uploader_id': '@IgorDataScience',
+ 'uploader_url': 'https://www.youtube.com/@IgorDataScience',
+ 'channel': 'Igor Kleiner Ph.D.',
+ 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
+ 'tags': ['критическое мышление', 'наука просто', 'математика', 'анализ данных'],
+ 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
+ 'channel_follower_count': int
+ },
+ }, {
+ 'note': 'playlists, multipage, different order',
+ 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
+ 'playlist_mincount': 94,
+ 'info_dict': {
+ 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
+ 'title': 'Igor Kleiner Ph.D. - Playlists',
+ 'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',
+ 'uploader': 'Igor Kleiner Ph.D.',
+ 'uploader_id': '@IgorDataScience',
+ 'uploader_url': 'https://www.youtube.com/@IgorDataScience',
+ 'tags': ['критическое мышление', 'наука просто', 'математика', 'анализ данных'],
+ 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
+ 'channel': 'Igor Kleiner Ph.D.',
+ 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
+ 'channel_follower_count': int
+ },
+ }, {
+ 'note': 'playlists, series',
+ 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
+ 'playlist_mincount': 5,
+ 'info_dict': {
+ 'id': 'UCYO_jab_esuFRV4b17AJtAw',
+ 'title': '3Blue1Brown - Playlists',
+ 'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',
+ 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
+ 'channel': '3Blue1Brown',
+ 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
+ 'uploader_id': '@3blue1brown',
+ 'uploader_url': 'https://www.youtube.com/@3blue1brown',
+ 'uploader': '3Blue1Brown',
+ 'tags': ['Mathematics'],
+ 'channel_follower_count': int,
+ 'channel_is_verified': True,
+ },
+ }, {
+ 'note': 'playlists, singlepage',
+ 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
+ 'playlist_mincount': 4,
+ 'info_dict': {
+ 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
+ 'title': 'ThirstForScience - Playlists',
+ 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
+ 'uploader': 'ThirstForScience',
+ 'uploader_url': 'https://www.youtube.com/@ThirstForScience',
+ 'uploader_id': '@ThirstForScience',
+ 'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
+ 'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
+ 'tags': 'count:12',
+ 'channel': 'ThirstForScience',
+ 'channel_follower_count': int
+ }
+ }, {
+ 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
+ 'only_matching': True,
+ }, {
+ 'note': 'basic, single video playlist',
+ 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
+ 'info_dict': {
+ 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
+ 'title': 'youtube-dl public playlist',
+ 'description': '',
+ 'tags': [],
+ 'view_count': int,
+ 'modified_date': '20201130',
+ 'channel': 'Sergey M.',
+ 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
+ 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
+ 'availability': 'public',
+ 'uploader': 'Sergey M.',
+ 'uploader_url': 'https://www.youtube.com/@sergeym.6173',
+ 'uploader_id': '@sergeym.6173',
+ },
+ 'playlist_count': 1,
+ }, {
+ 'note': 'empty playlist',
+ 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
+ 'info_dict': {
+ 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
+ 'title': 'youtube-dl empty playlist',
+ 'tags': [],
+ 'channel': 'Sergey M.',
+ 'description': '',
+ 'modified_date': '20230921',
+ 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
+ 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
+ 'availability': 'unlisted',
+ 'uploader_url': 'https://www.youtube.com/@sergeym.6173',
+ 'uploader_id': '@sergeym.6173',
+ 'uploader': 'Sergey M.',
+ },
+ 'playlist_count': 0,
+ }, {
+ 'note': 'Home tab',
+ 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
+ 'info_dict': {
+ 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
+ 'title': 'lex will - Home',
+ 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
+ 'uploader': 'lex will',
+ 'uploader_id': '@lexwill718',
+ 'channel': 'lex will',
+ 'tags': ['bible', 'history', 'prophesy'],
+ 'uploader_url': 'https://www.youtube.com/@lexwill718',
+ 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
+ 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
+ 'channel_follower_count': int
+ },
+ 'playlist_mincount': 2,
+ }, {
+ 'note': 'Videos tab',
+ 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
+ 'info_dict': {
+ 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
+ 'title': 'lex will - Videos',
+ 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
+ 'uploader': 'lex will',
+ 'uploader_id': '@lexwill718',
+ 'tags': ['bible', 'history', 'prophesy'],
+ 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
+ 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
+ 'uploader_url': 'https://www.youtube.com/@lexwill718',
+ 'channel': 'lex will',
+ 'channel_follower_count': int
+ },
+ 'playlist_mincount': 975,
+ }, {
+ 'note': 'Videos tab, sorted by popular',
+ 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
+ 'info_dict': {
+ 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
+ 'title': 'lex will - Videos',
+ 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
+ 'uploader': 'lex will',
+ 'uploader_id': '@lexwill718',
+ 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
+ 'uploader_url': 'https://www.youtube.com/@lexwill718',
+ 'channel': 'lex will',
+ 'tags': ['bible', 'history', 'prophesy'],
+ 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
+ 'channel_follower_count': int
+ },
+ 'playlist_mincount': 199,
+ }, {
+ 'note': 'Playlists tab',
+ 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
+ 'info_dict': {
+ 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
+ 'title': 'lex will - Playlists',
+ 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
+ 'uploader': 'lex will',
+ 'uploader_id': '@lexwill718',
+ 'uploader_url': 'https://www.youtube.com/@lexwill718',
+ 'channel': 'lex will',
+ 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
+ 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
+ 'tags': ['bible', 'history', 'prophesy'],
+ 'channel_follower_count': int
+ },
+ 'playlist_mincount': 17,
+ }, {
+ 'note': 'Community tab',
+ 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
+ 'info_dict': {
+ 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
+ 'title': 'lex will - Community',
+ 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
+ 'channel': 'lex will',
+ 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
+ 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
+ 'tags': ['bible', 'history', 'prophesy'],
+ 'channel_follower_count': int,
+ 'uploader_url': 'https://www.youtube.com/@lexwill718',
+ 'uploader_id': '@lexwill718',
+ 'uploader': 'lex will',
+ },
+ 'playlist_mincount': 18,
+ }, {
+ 'note': 'Channels tab',
+ 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
+ 'info_dict': {
+ 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
+ 'title': 'lex will - Channels',
+ 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
+ 'channel': 'lex will',
+ 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
+ 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
+ 'tags': ['bible', 'history', 'prophesy'],
+ 'channel_follower_count': int,
+ 'uploader_url': 'https://www.youtube.com/@lexwill718',
+ 'uploader_id': '@lexwill718',
+ 'uploader': 'lex will',
+ },
+ 'playlist_mincount': 12,
+ }, {
+ 'note': 'Search tab',
+ 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
+ 'playlist_mincount': 40,
+ 'info_dict': {
+ 'id': 'UCYO_jab_esuFRV4b17AJtAw',
+ 'title': '3Blue1Brown - Search - linear algebra',
+ 'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',
+ 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
+ 'tags': ['Mathematics'],
+ 'channel': '3Blue1Brown',
+ 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
+ 'channel_follower_count': int,
+ 'uploader_url': 'https://www.youtube.com/@3blue1brown',
+ 'uploader_id': '@3blue1brown',
+ 'uploader': '3Blue1Brown',
+ 'channel_is_verified': True,
+ },
+ }, {
+ 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
+ 'only_matching': True,
+ }, {
+ 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
+ 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
+ 'info_dict': {
+ 'title': '29C3: Not my department',
+ 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
+ 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
+ 'tags': [],
+ 'view_count': int,
+ 'modified_date': '20150605',
+ 'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
+ 'channel_url': 'https://www.youtube.com/channel/UCEPzS1rYsrkqzSLNp76nrcg',
+ 'channel': 'Christiaan008',
+ 'availability': 'public',
+ 'uploader_id': '@ChRiStIaAn008',
+ 'uploader': 'Christiaan008',
+ 'uploader_url': 'https://www.youtube.com/@ChRiStIaAn008',
+ },
+ 'playlist_count': 96,
+ }, {
+ 'note': 'Large playlist',
+ 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
+ 'info_dict': {
+ 'title': 'Uploads from Cauchemar',
+ 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
+ 'channel_url': 'https://www.youtube.com/channel/UCBABnxM4Ar9ten8Mdjj1j0Q',
+ 'tags': [],
+ 'modified_date': r're:\d{8}',
+ 'channel': 'Cauchemar',
+ 'view_count': int,
+ 'description': '',
+ 'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
+ 'availability': 'public',
+ 'uploader_id': '@Cauchemar89',
+ 'uploader': 'Cauchemar',
+ 'uploader_url': 'https://www.youtube.com/@Cauchemar89',
+ },
+ 'playlist_mincount': 1123,
+ 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
+ }, {
+ 'note': 'even larger playlist, 8832 videos',
+ 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
+ 'only_matching': True,
+ }, {
+ 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
+ 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
+ 'info_dict': {
+ 'title': 'Uploads from Interstellar Movie',
+ 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
+ 'tags': [],
+ 'view_count': int,
+ 'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
+ 'channel_url': 'https://www.youtube.com/channel/UCXw-G3eDE9trcvY2sBMM_aA',
+ 'channel': 'Interstellar Movie',
+ 'description': '',
+ 'modified_date': r're:\d{8}',
+ 'availability': 'public',
+ 'uploader_id': '@InterstellarMovie',
+ 'uploader': 'Interstellar Movie',
+ 'uploader_url': 'https://www.youtube.com/@InterstellarMovie',
+ },
+ 'playlist_mincount': 21,
+ }, {
+ 'note': 'Playlist with "show unavailable videos" button',
+ 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
+ 'info_dict': {
+ 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
+ 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
+ 'view_count': int,
+ 'channel': 'Phim Siêu Nhân Nhật Bản',
+ 'tags': [],
+ 'description': '',
+ 'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
+ 'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
+ 'modified_date': r're:\d{8}',
+ 'availability': 'public',
+ 'uploader_url': 'https://www.youtube.com/@phimsieunhannhatban',
+ 'uploader_id': '@phimsieunhannhatban',
+ 'uploader': 'Phim Siêu Nhân Nhật Bản',
+ },
+ 'playlist_mincount': 200,
+ 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
+ }, {
+ 'note': 'Playlist with unavailable videos in page 7',
+ 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
+ 'info_dict': {
+ 'title': 'Uploads from BlankTV',
+ 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
+ 'channel': 'BlankTV',
+ 'channel_url': 'https://www.youtube.com/channel/UC8l9frL61Yl5KFOl87nIm2w',
+ 'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',
+ 'view_count': int,
+ 'tags': [],
+ 'modified_date': r're:\d{8}',
+ 'description': '',
+ 'availability': 'public',
+ 'uploader_id': '@blanktv',
+ 'uploader': 'BlankTV',
+ 'uploader_url': 'https://www.youtube.com/@blanktv',
+ },
+ 'playlist_mincount': 1000,
+ 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
+ }, {
+ 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
+ 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
+ 'info_dict': {
+ 'title': 'Data Analysis with Dr Mike Pound',
+ 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
+ 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
+ 'tags': [],
+ 'view_count': int,
+ 'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
+ 'channel_url': 'https://www.youtube.com/channel/UC9-y-6csu5WGm29I7JiwpnA',
+ 'channel': 'Computerphile',
+ 'availability': 'public',
+ 'modified_date': '20190712',
+ 'uploader_id': '@Computerphile',
+ 'uploader': 'Computerphile',
+ 'uploader_url': 'https://www.youtube.com/@Computerphile',
+ },
+ 'playlist_mincount': 11,
+ }, {
+ 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
+ 'only_matching': True,
+ }, {
+ 'note': 'Playlist URL that does not actually serve a playlist',
+ 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
+ 'info_dict': {
+ 'id': 'FqZTN594JQw',
+ 'ext': 'webm',
+ 'title': "Smiley's People 01 detective, Adventure Series, Action",
+ 'upload_date': '20150526',
+ 'license': 'Standard YouTube License',
+ 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
+ 'categories': ['People & Blogs'],
+ 'tags': list,
+ 'view_count': int,
+ 'like_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'This video is not available.',
+ 'add_ie': [YoutubeIE.ie_key()],
+ }, {
+ 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
+ 'info_dict': {
+ 'id': 'hGkQjiJLjWQ', # This will keep changing
+ 'ext': 'mp4',
+ 'title': str,
+ 'upload_date': r're:\d{8}',
+ 'description': str,
+ 'categories': ['News & Politics'],
+ 'tags': list,
+ 'like_count': int,
+ 'release_timestamp': int,
+ 'channel': 'Sky News',
+ 'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',
+ 'age_limit': 0,
+ 'view_count': int,
+ 'thumbnail': r're:https?://i\.ytimg\.com/vi/[^/]+/maxresdefault(?:_live)?\.jpg',
+ 'playable_in_embed': True,
+ 'release_date': r're:\d+',
+ 'availability': 'public',
+ 'live_status': 'is_live',
+ 'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',
+ 'channel_follower_count': int,
+ 'concurrent_view_count': int,
+ 'uploader_url': 'https://www.youtube.com/@SkyNews',
+ 'uploader_id': '@SkyNews',
+ 'uploader': 'Sky News',
+ 'channel_is_verified': True,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Ignoring subtitle tracks found in '],
+ }, {
+ 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
+ 'info_dict': {
+ 'id': 'a48o2S1cPoo',
+ 'ext': 'mp4',
+ 'title': 'The Young Turks - Live Main Show',
+ 'upload_date': '20150715',
+ 'license': 'Standard YouTube License',
+ 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
+ 'categories': ['News & Politics'],
+ 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
+ 'like_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
+ 'only_matching': True,
+ }, {
+ 'note': 'A channel that is not live. Should raise error',
+ 'url': 'https://www.youtube.com/user/numberphile/live',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/feed/trending',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/feed/library',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/feed/history',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/feed/subscriptions',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/feed/watch_later',
+ 'only_matching': True,
+ }, {
+ 'note': 'Recommended - redirects to home page.',
+ 'url': 'https://www.youtube.com/feed/recommended',
+ 'only_matching': True,
+ }, {
+ 'note': 'inline playlist with not always working continuations',
+ 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/course',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/zsecurity',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.youtube.com/NASAgovVideo/videos',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/TheYoungTurks/live',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/hashtag/cctv9',
+ 'info_dict': {
+ 'id': 'cctv9',
+ 'title': 'cctv9 - All',
+ 'tags': [],
+ },
+ 'playlist_mincount': 300, # not consistent but should be over 300
+ }, {
+ 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
+ 'only_matching': True,
+ }, {
+ 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
+ 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
+ 'only_matching': True
+ }, {
+ 'note': '/browse/ should redirect to /channel/',
+ 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
+ 'only_matching': True
+ }, {
+ 'note': 'VLPL, should redirect to playlist?list=PL...',
+ 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
+ 'info_dict': {
+ 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
+ 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
+ 'title': 'NCS : All Releases 💿',
+ 'channel_url': 'https://www.youtube.com/channel/UC_aEa8K-EOJ3D6gOs7HcyNg',
+ 'modified_date': r're:\d{8}',
+ 'view_count': int,
+ 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
+ 'tags': [],
+ 'channel': 'NoCopyrightSounds',
+ 'availability': 'public',
+ 'uploader_url': 'https://www.youtube.com/@NoCopyrightSounds',
+ 'uploader': 'NoCopyrightSounds',
+ 'uploader_id': '@NoCopyrightSounds',
+ },
+ 'playlist_mincount': 166,
+ 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden', 'YouTube Music is not directly supported'],
+ }, {
+ # TODO: fix 'unviewable' issue with this playlist when reloading with unavailable videos
+ 'note': 'Topic, should redirect to playlist?list=UU...',
+ 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
+ 'info_dict': {
+ 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
+ 'title': 'Uploads from Royalty Free Music - Topic',
+ 'tags': [],
+ 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
+ 'channel': 'Royalty Free Music - Topic',
+ 'view_count': int,
+ 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
+ 'modified_date': r're:\d{8}',
+ 'description': '',
+ 'availability': 'public',
+ 'uploader': 'Royalty Free Music - Topic',
+ },
+ 'playlist_mincount': 101,
+ 'expected_warnings': ['YouTube Music is not directly supported', r'[Uu]navailable videos (are|will be) hidden'],
+ }, {
+ # Destination channel with only a hidden self tab (tab id is UCtFRv9O2AHqOZjjynzrv-xg)
+ # Treat as a general feed
+ 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
+ 'info_dict': {
+ 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
+ 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
+ 'tags': [],
+ },
+ 'playlist_mincount': 9,
+ }, {
+ 'note': 'Youtube music Album',
+ 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
+ 'info_dict': {
+ 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
+ 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
+ 'tags': [],
+ 'view_count': int,
+ 'description': '',
+ 'availability': 'unlisted',
+ 'modified_date': r're:\d{8}',
+ },
+ 'playlist_count': 50,
+ 'expected_warnings': ['YouTube Music is not directly supported'],
+ }, {
+ 'note': 'unlisted single video playlist',
+ 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
+ 'info_dict': {
+ 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
+ 'title': 'yt-dlp unlisted playlist test',
+ 'availability': 'unlisted',
+ 'tags': [],
+ 'modified_date': '20220418',
+ 'channel': 'colethedj',
+ 'view_count': int,
+ 'description': '',
+ 'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
+ 'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
+ 'uploader_url': 'https://www.youtube.com/@colethedj1894',
+ 'uploader_id': '@colethedj1894',
+ 'uploader': 'colethedj',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
+ 'id': 'BaW_jenozKc',
+ '_type': 'url',
+ 'ie_key': 'Youtube',
+ 'duration': 10,
+ 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
+ 'channel_url': 'https://www.youtube.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
+ 'view_count': int,
+ 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc',
+ 'channel': 'Philipp Hagemeister',
+ 'uploader_id': '@PhilippHagemeister',
+ 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
+ 'uploader': 'Philipp Hagemeister',
+ }
+ }],
+ 'playlist_count': 1,
+ 'params': {'extract_flat': True},
+ }, {
+ 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
+ 'url': 'https://www.youtube.com/feed/recommended',
+ 'info_dict': {
+ 'id': 'recommended',
+ 'title': 'recommended',
+ 'tags': [],
+ },
+ 'playlist_mincount': 50,
+ 'params': {
+ 'skip_download': True,
+ 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
+ },
+ }, {
+ 'note': 'API Fallback: /videos tab, sorted by oldest first',
+ 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
+ 'info_dict': {
+ 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
+ 'title': 'Cody\'sLab - Videos',
+ 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
+ 'channel': 'Cody\'sLab',
+ 'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
+ 'tags': [],
+ 'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
+ 'channel_follower_count': int
+ },
+ 'playlist_mincount': 650,
+ 'params': {
+ 'skip_download': True,
+ 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
+ },
+ 'skip': 'Query for sorting no longer works',
+ }, {
+ 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
+ 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
+ 'info_dict': {
+ 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
+ 'title': 'Uploads from Royalty Free Music - Topic',
+ 'modified_date': r're:\d{8}',
+ 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
+ 'description': '',
+ 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
+ 'tags': [],
+ 'channel': 'Royalty Free Music - Topic',
+ 'view_count': int,
+ 'availability': 'public',
+ 'uploader': 'Royalty Free Music - Topic',
+ },
+ 'playlist_mincount': 101,
+ 'params': {
+ 'skip_download': True,
+ 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
+ },
+ 'expected_warnings': ['YouTube Music is not directly supported', r'[Uu]navailable videos (are|will be) hidden'],
+ }, {
+ 'note': 'non-standard redirect to regional channel',
+ 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
+ 'only_matching': True
+ }, {
+ 'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
+ 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
+ 'info_dict': {
+ 'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
+ 'modified_date': '20220407',
+ 'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
+ 'tags': [],
+ 'availability': 'unlisted',
+ 'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
+ 'channel': 'pukkandan',
+ 'description': 'Test for collaborative playlist',
+ 'title': 'yt-dlp test - collaborative playlist',
+ 'view_count': int,
+ 'uploader_url': 'https://www.youtube.com/@pukkandan',
+ 'uploader_id': '@pukkandan',
+ 'uploader': 'pukkandan',
+ },
+ 'playlist_mincount': 2
+ }, {
+ 'note': 'translated tab name',
+ 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/playlists',
+ 'info_dict': {
+ 'id': 'UCiu-3thuViMebBjw_5nWYrA',
+ 'tags': [],
+ 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
+ 'description': 'test description',
+ 'title': 'cole-dlp-test-acc - 再生リスト',
+ 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
+ 'channel': 'cole-dlp-test-acc',
+ 'uploader_url': 'https://www.youtube.com/@coletdjnz',
+ 'uploader_id': '@coletdjnz',
+ 'uploader': 'cole-dlp-test-acc',
+ },
+ 'playlist_mincount': 1,
+ 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
+ 'expected_warnings': ['Preferring "ja"'],
+ }, {
+ # XXX: this should really check flat playlist entries, but the test suite doesn't support that
+ 'note': 'preferred lang set with playlist with translated video titles',
+ 'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
+ 'info_dict': {
+ 'id': 'PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
+ 'tags': [],
+ 'view_count': int,
+ 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
+ 'channel': 'cole-dlp-test-acc',
+ 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
+ 'description': 'test',
+ 'title': 'dlp test playlist',
+ 'availability': 'public',
+ 'uploader_url': 'https://www.youtube.com/@coletdjnz',
+ 'uploader_id': '@coletdjnz',
+ 'uploader': 'cole-dlp-test-acc',
+ },
+ 'playlist_mincount': 1,
+ 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
+ 'expected_warnings': ['Preferring "ja"'],
+ }, {
+ # shorts audio pivot for 2GtVksBMYFM.
+ 'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==',
+ 'info_dict': {
+ 'id': 'sfv_audio_pivot',
+ 'title': 'sfv_audio_pivot',
+ 'tags': [],
+ },
+ 'playlist_mincount': 50,
+
+ }, {
+ # Channel with a real live tab (not to be mistaken with streams tab)
+ # Do not treat like it should redirect to live stream
+ 'url': 'https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live',
+ 'info_dict': {
+ 'id': 'UCEH7P7kyJIkS_gJf93VYbmg',
+ 'title': 'UCEH7P7kyJIkS_gJf93VYbmg - Live',
+ 'tags': [],
+ },
+ 'playlist_mincount': 20,
+ }, {
+ # Tab name is not the same as tab id
+ 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/letsplay',
+ 'info_dict': {
+ 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
+ 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Let\'s play',
+ 'tags': [],
+ },
+ 'playlist_mincount': 8,
+ }, {
+ # Home tab id is literally home. Not to get mistaken with featured
+ 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/home',
+ 'info_dict': {
+ 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
+ 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Home',
+ 'tags': [],
+ },
+ 'playlist_mincount': 8,
+ }, {
+ # Should get three playlists for videos, shorts and streams tabs
+ 'url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
+ 'info_dict': {
+ 'id': 'UCK9V2B22uJYu3N7eR_BT9QA',
+ 'title': 'Polka Ch. 尾丸ポルカ',
+ 'channel_follower_count': int,
+ 'channel_id': 'UCK9V2B22uJYu3N7eR_BT9QA',
+ 'channel_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
+ 'description': 'md5:49809d8bf9da539bc48ed5d1f83c33f2',
+ 'channel': 'Polka Ch. 尾丸ポルカ',
+ 'tags': 'count:35',
+ 'uploader_url': 'https://www.youtube.com/@OmaruPolka',
+ 'uploader': 'Polka Ch. 尾丸ポルカ',
+ 'uploader_id': '@OmaruPolka',
+ 'channel_is_verified': True,
+ },
+ 'playlist_count': 3,
+ }, {
+ # Shorts tab with channel with handle
+ # TODO: fix channel description
+ 'url': 'https://www.youtube.com/@NotJustBikes/shorts',
+ 'info_dict': {
+ 'id': 'UC0intLFzLaudFG-xAvUEO-A',
+ 'title': 'Not Just Bikes - Shorts',
+ 'tags': 'count:10',
+ 'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',
+ 'description': 'md5:5e82545b3a041345927a92d0585df247',
+ 'channel_follower_count': int,
+ 'channel_id': 'UC0intLFzLaudFG-xAvUEO-A',
+ 'channel': 'Not Just Bikes',
+ 'uploader_url': 'https://www.youtube.com/@NotJustBikes',
+ 'uploader': 'Not Just Bikes',
+ 'uploader_id': '@NotJustBikes',
+ 'channel_is_verified': True,
+ },
+ 'playlist_mincount': 10,
+ }, {
+ # Streams tab
+ 'url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig/streams',
+ 'info_dict': {
+ 'id': 'UC3eYAvjCVwNHgkaGbXX3sig',
+ 'title': '中村悠一 - Live',
+ 'tags': 'count:7',
+ 'channel_id': 'UC3eYAvjCVwNHgkaGbXX3sig',
+ 'channel_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',
+ 'channel': '中村悠一',
+ 'channel_follower_count': int,
+ 'description': 'md5:e744f6c93dafa7a03c0c6deecb157300',
+ 'uploader_url': 'https://www.youtube.com/@Yuichi-Nakamura',
+ 'uploader_id': '@Yuichi-Nakamura',
+ 'uploader': '中村悠一',
+ },
+ 'playlist_mincount': 60,
+ }, {
+ # Channel with no uploads and hence no videos, streams, shorts tabs or uploads playlist. This should fail.
+ # See test_youtube_lists
+ 'url': 'https://www.youtube.com/channel/UC2yXPzFejc422buOIzn_0CA',
+ 'only_matching': True,
+ }, {
+ # No uploads and no UCID given. Should fail with no uploads error
+ # See test_youtube_lists
+ 'url': 'https://www.youtube.com/news',
+ 'only_matching': True
+ }, {
+ # No videos tab but has a shorts tab
+ 'url': 'https://www.youtube.com/c/TKFShorts',
+ 'info_dict': {
+ 'id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
+ 'title': 'Shorts Break - Shorts',
+ 'tags': 'count:48',
+ 'channel_id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
+ 'channel': 'Shorts Break',
+ 'description': 'md5:6de33c5e7ba686e5f3efd4e19c7ef499',
+ 'channel_follower_count': int,
+ 'channel_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg',
+ 'uploader_url': 'https://www.youtube.com/@ShortsBreak_Official',
+ 'uploader': 'Shorts Break',
+ 'uploader_id': '@ShortsBreak_Official',
+ },
+ 'playlist_mincount': 30,
+ }, {
+ # Trending Now Tab. tab id is empty
+ 'url': 'https://www.youtube.com/feed/trending',
+ 'info_dict': {
+ 'id': 'trending',
+ 'title': 'trending - Now',
+ 'tags': [],
+ },
+ 'playlist_mincount': 30,
+ }, {
+ # Trending Gaming Tab. tab id is empty
+ 'url': 'https://www.youtube.com/feed/trending?bp=4gIcGhpnYW1pbmdfY29ycHVzX21vc3RfcG9wdWxhcg%3D%3D',
+ 'info_dict': {
+ 'id': 'trending',
+ 'title': 'trending - Gaming',
+ 'tags': [],
+ },
+ 'playlist_mincount': 30,
+ }, {
+ # Shorts url result in shorts tab
+ # TODO: Fix channel id extraction
+ 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/shorts',
+ 'info_dict': {
+ 'id': 'UCiu-3thuViMebBjw_5nWYrA',
+ 'title': 'cole-dlp-test-acc - Shorts',
+ 'channel': 'cole-dlp-test-acc',
+ 'description': 'test description',
+ 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
+ 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
+ 'tags': [],
+ 'uploader_url': 'https://www.youtube.com/@coletdjnz',
+ 'uploader_id': '@coletdjnz',
+ 'uploader': 'cole-dlp-test-acc',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ # Channel data is not currently available for short renderers (as of 2023-03-01)
+ '_type': 'url',
+ 'ie_key': 'Youtube',
+ 'url': 'https://www.youtube.com/shorts/sSM9J5YH_60',
+ 'id': 'sSM9J5YH_60',
+ 'title': 'SHORT short',
+ 'view_count': int,
+ 'thumbnails': list,
+ }
+ }],
+ 'params': {'extract_flat': True},
+ }, {
+ # Live video status should be extracted
+ 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live',
+ 'info_dict': {
+ 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
+ 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Live', # TODO, should be Minecraft - Live or Minecraft - Topic - Live
+ 'tags': []
+ },
+ 'playlist': [{
+ 'info_dict': {
+ '_type': 'url',
+ 'ie_key': 'Youtube',
+ 'url': 'startswith:https://www.youtube.com/watch?v=',
+ 'id': str,
+ 'title': str,
+ 'live_status': 'is_live',
+ 'channel_id': str,
+ 'channel_url': str,
+ 'concurrent_view_count': int,
+ 'channel': str,
+ 'uploader': str,
+ 'uploader_url': str,
+ 'uploader_id': str,
+ 'channel_is_verified': bool, # this will keep changing
+ }
+ }],
+ 'params': {'extract_flat': True, 'playlist_items': '1'},
+ 'playlist_mincount': 1
+ }, {
+ # Channel renderer metadata. Contains number of videos on the channel
+ 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/channels',
+ 'info_dict': {
+ 'id': 'UCiu-3thuViMebBjw_5nWYrA',
+ 'title': 'cole-dlp-test-acc - Channels',
+ 'channel': 'cole-dlp-test-acc',
+ 'description': 'test description',
+ 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
+ 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
+ 'tags': [],
+ 'uploader_url': 'https://www.youtube.com/@coletdjnz',
+ 'uploader_id': '@coletdjnz',
+ 'uploader': 'cole-dlp-test-acc',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ '_type': 'url',
+ 'ie_key': 'YoutubeTab',
+ 'url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
+ 'id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
+ 'channel_id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
+ 'title': 'PewDiePie',
+ 'channel': 'PewDiePie',
+ 'channel_url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
+ 'thumbnails': list,
+ 'channel_follower_count': int,
+ 'playlist_count': int,
+ 'uploader': 'PewDiePie',
+ 'uploader_url': 'https://www.youtube.com/@PewDiePie',
+ 'uploader_id': '@PewDiePie',
+ 'channel_is_verified': True,
+ }
+ }],
+ 'params': {'extract_flat': True},
+ }, {
+ 'url': 'https://www.youtube.com/@3blue1brown/about',
+ 'info_dict': {
+ 'id': '@3blue1brown',
+ 'tags': ['Mathematics'],
+ 'title': '3Blue1Brown',
+ 'channel_follower_count': int,
+ 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
+ 'channel': '3Blue1Brown',
+ 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
+ 'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',
+ 'uploader_url': 'https://www.youtube.com/@3blue1brown',
+ 'uploader_id': '@3blue1brown',
+ 'uploader': '3Blue1Brown',
+ 'channel_is_verified': True,
+ },
+ 'playlist_count': 0,
+ }, {
+ # Podcasts tab, with rich entry playlistRenderers
+ 'url': 'https://www.youtube.com/@99percentinvisiblepodcast/podcasts',
+ 'info_dict': {
+ 'id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',
+ 'channel_id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',
+ 'uploader_url': 'https://www.youtube.com/@99percentinvisiblepodcast',
+ 'description': 'md5:3a0ed38f1ad42a68ef0428c04a15695c',
+ 'title': '99 Percent Invisible - Podcasts',
+ 'uploader': '99 Percent Invisible',
+ 'channel_follower_count': int,
+ 'channel_url': 'https://www.youtube.com/channel/UCVMF2HD4ZgC0QHpU9Yq5Xrw',
+ 'tags': [],
+ 'channel': '99 Percent Invisible',
+ 'uploader_id': '@99percentinvisiblepodcast',
+ },
+ 'playlist_count': 0,
+ }, {
+ # Releases tab, with rich entry playlistRenderers (same as Podcasts tab)
+ 'url': 'https://www.youtube.com/@AHimitsu/releases',
+ 'info_dict': {
+ 'id': 'UCgFwu-j5-xNJml2FtTrrB3A',
+ 'channel': 'A Himitsu',
+ 'uploader_url': 'https://www.youtube.com/@AHimitsu',
+ 'title': 'A Himitsu - Releases',
+ 'uploader_id': '@AHimitsu',
+ 'uploader': 'A Himitsu',
+ 'channel_id': 'UCgFwu-j5-xNJml2FtTrrB3A',
+ 'tags': 'count:12',
+ 'description': 'I make music',
+ 'channel_url': 'https://www.youtube.com/channel/UCgFwu-j5-xNJml2FtTrrB3A',
+ 'channel_follower_count': int,
+ 'channel_is_verified': True,
+ },
+ 'playlist_mincount': 10,
+ }, {
+ # Playlist with only shorts, shown as reel renderers
+ # FIXME: future: YouTube currently doesn't give continuation for this,
+ # may do in future.
+ 'url': 'https://www.youtube.com/playlist?list=UUxqPAgubo4coVn9Lx1FuKcg',
+ 'info_dict': {
+ 'id': 'UUxqPAgubo4coVn9Lx1FuKcg',
+ 'channel_url': 'https://www.youtube.com/channel/UCxqPAgubo4coVn9Lx1FuKcg',
+ 'view_count': int,
+ 'uploader_id': '@BangyShorts',
+ 'description': '',
+ 'uploader_url': 'https://www.youtube.com/@BangyShorts',
+ 'channel_id': 'UCxqPAgubo4coVn9Lx1FuKcg',
+ 'channel': 'Bangy Shorts',
+ 'uploader': 'Bangy Shorts',
+ 'tags': [],
+ 'availability': 'public',
+ 'modified_date': r're:\d{8}',
+ 'title': 'Uploads from Bangy Shorts',
+ },
+ 'playlist_mincount': 100,
+ 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
+ }, {
+ 'note': 'Tags containing spaces',
+ 'url': 'https://www.youtube.com/channel/UC7_YxT-KID8kRbqZo7MyscQ',
+ 'playlist_count': 3,
+ 'info_dict': {
+ 'id': 'UC7_YxT-KID8kRbqZo7MyscQ',
+ 'channel': 'Markiplier',
+ 'channel_id': 'UC7_YxT-KID8kRbqZo7MyscQ',
+ 'title': 'Markiplier',
+ 'channel_follower_count': int,
+ 'description': 'md5:0c010910558658824402809750dc5d97',
+ 'uploader_id': '@markiplier',
+ 'uploader_url': 'https://www.youtube.com/@markiplier',
+ 'uploader': 'Markiplier',
+ 'channel_url': 'https://www.youtube.com/channel/UC7_YxT-KID8kRbqZo7MyscQ',
+ 'channel_is_verified': True,
+ 'tags': ['markiplier', 'comedy', 'gaming', 'funny videos', 'funny moments',
+ 'sketch comedy', 'laughing', 'lets play', 'challenge videos', 'hilarious',
+ 'challenges', 'sketches', 'scary games', 'funny games', 'rage games',
+ 'mark fischbach'],
+ },
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if YoutubeIE.suitable(url) else super().suitable(url)
+
+ _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/[^?#/]+))?(?P<post>.*)$')
+
+ def _get_url_mobj(self, url):
+ mobj = self._URL_RE.match(url).groupdict()
+ mobj.update((k, '') for k, v in mobj.items() if v is None)
+ return mobj
+
+ def _extract_tab_id_and_name(self, tab, base_url='https://www.youtube.com'):
+ tab_name = (tab.get('title') or '').lower()
+ tab_url = urljoin(base_url, traverse_obj(
+ tab, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url')))
+
+ tab_id = (tab_url and self._get_url_mobj(tab_url)['tab'][1:]
+ or traverse_obj(tab, 'tabIdentifier', expected_type=str))
+ if tab_id:
+ return {
+ 'TAB_ID_SPONSORSHIPS': 'membership',
+ }.get(tab_id, tab_id), tab_name
+
+ # Fallback to tab name if we cannot get the tab id.
+ # XXX: should we strip non-ascii letters? e.g. in case of 'let's play' tab example on special gaming channel
+ # Note that in the case of translated tab name this may result in an empty string, which we don't want.
+ if tab_name:
+ self.write_debug(f'Falling back to selected tab name: {tab_name}')
+ return {
+ 'home': 'featured',
+ 'live': 'streams',
+ }.get(tab_name, tab_name), tab_name
+
+ def _has_tab(self, tabs, tab_id):
+ return any(self._extract_tab_id_and_name(tab)[0] == tab_id for tab in tabs)
+
+ def _empty_playlist(self, item_id, data):
+ return self.playlist_result([], item_id, **self._extract_metadata_from_tabs(item_id, data))
+
+ @YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
+ def _real_extract(self, url, smuggled_data):
+ item_id = self._match_id(url)
+ url = urllib.parse.urlunparse(
+ urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))
+ compat_opts = self.get_param('compat_opts', [])
+
+ mobj = self._get_url_mobj(url)
+ pre, tab, post, is_channel = mobj['pre'], mobj['tab'], mobj['post'], not mobj['not_channel']
+ if is_channel and smuggled_data.get('is_music_url'):
+ if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
+ return self.url_result(
+ f'https://music.youtube.com/playlist?list={item_id[2:]}', YoutubeTabIE, item_id[2:])
+ elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
+ mdata = self._extract_tab_endpoint(
+ f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
+ murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
+ get_all=False, expected_type=str)
+ if not murl:
+ raise ExtractorError('Failed to resolve album to playlist')
+ return self.url_result(murl, YoutubeTabIE)
+ elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
+ return self.url_result(
+ f'https://music.youtube.com/channel/{item_id}{tab}{post}', YoutubeTabIE, item_id)
+
+ original_tab_id, display_id = tab[1:], f'{item_id}{tab}'
+ if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
+ url = f'{pre}/videos{post}'
+ if smuggled_data.get('is_music_url'):
+ self.report_warning(f'YouTube Music is not directly supported. Redirecting to {url}')
+
+ # Handle both video/playlist URLs
+ qs = parse_qs(url)
+ video_id, playlist_id = [traverse_obj(qs, (key, 0)) for key in ('v', 'list')]
+ if not video_id and mobj['not_channel'].startswith('watch'):
+ if not playlist_id:
+ # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
+ raise ExtractorError('A video URL was given without video ID', expected=True)
+ # Common mistake: https://www.youtube.com/watch?list=playlist_id
+ self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
+ return self.url_result(
+ f'https://www.youtube.com/playlist?list={playlist_id}', YoutubeTabIE, playlist_id)
+
+ if not self._yes_playlist(playlist_id, video_id):
+ return self.url_result(
+ f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)
+
+ data, ytcfg = self._extract_data(url, display_id)
+
+ # YouTube may provide a non-standard redirect to the regional channel
+ # See: https://github.com/yt-dlp/yt-dlp/issues/2694
+ # https://support.google.com/youtube/answer/2976814#zippy=,conditional-redirects
+ redirect_url = traverse_obj(
+ data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)
+ if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:
+ redirect_url = ''.join((urljoin('https://www.youtube.com', redirect_url), tab, post))
+ self.to_screen(f'This playlist is likely not available in your region. Following conditional redirect to {redirect_url}')
+ return self.url_result(redirect_url, YoutubeTabIE)
+
+ tabs, extra_tabs = self._extract_tab_renderers(data), []
+ if is_channel and tabs and 'no-youtube-channel-redirect' not in compat_opts:
+ selected_tab = self._extract_selected_tab(tabs)
+ selected_tab_id, selected_tab_name = self._extract_tab_id_and_name(selected_tab, url) # NB: Name may be translated
+ self.write_debug(f'Selected tab: {selected_tab_id!r} ({selected_tab_name}), Requested tab: {original_tab_id!r}')
+
+ # /about is no longer a tab
+ if original_tab_id == 'about':
+ return self._empty_playlist(item_id, data)
+
+ if not original_tab_id and selected_tab_name:
+ self.to_screen('Downloading all uploads of the channel. '
+ 'To download only the videos in a specific tab, pass the tab\'s URL')
+ if self._has_tab(tabs, 'streams'):
+ extra_tabs.append(''.join((pre, '/streams', post)))
+ if self._has_tab(tabs, 'shorts'):
+ extra_tabs.append(''.join((pre, '/shorts', post)))
+ # XXX: Members-only tab should also be extracted
+
+ if not extra_tabs and selected_tab_id != 'videos':
+ # Channel does not have streams, shorts or videos tabs
+ if item_id[:2] != 'UC':
+ return self._empty_playlist(item_id, data)
+
+ # Topic channels don't have /videos. Use the equivalent playlist instead
+ pl_id = f'UU{item_id[2:]}'
+ pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
+ try:
+ data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)
+ except ExtractorError:
+ return self._empty_playlist(item_id, data)
+ else:
+ item_id, url = pl_id, pl_url
+ self.to_screen(
+ f'The channel does not have a videos, shorts, or live tab. Redirecting to playlist {pl_id} instead')
+
+ elif extra_tabs and selected_tab_id != 'videos':
+ # When there are shorts/live tabs but not videos tab
+ url, data = f'{pre}{post}', None
+
+ elif (original_tab_id or 'videos') != selected_tab_id:
+ if original_tab_id == 'live':
+ # Live tab should have redirected to the video
+ # Except in the case the channel has an actual live tab
+ # Example: https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live
+ raise UserNotLive(video_id=item_id)
+ elif selected_tab_name:
+ raise ExtractorError(f'This channel does not have a {original_tab_id} tab', expected=True)
+
+ # For channels such as https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg
+ url = f'{pre}{post}'
+
+ # YouTube sometimes provides a button to reload playlist with unavailable videos.
+ if 'no-youtube-unavailable-videos' not in compat_opts:
+ data = self._reload_with_unavailable_videos(display_id, data, ytcfg) or data
+ self._extract_and_report_alerts(data, only_once=True)
+
+ tabs, entries = self._extract_tab_renderers(data), []
+ if tabs:
+ entries = [self._extract_from_tabs(item_id, ytcfg, data, tabs)]
+ entries[0].update({
+ 'extractor_key': YoutubeTabIE.ie_key(),
+ 'extractor': YoutubeTabIE.IE_NAME,
+ 'webpage_url': url,
+ })
+ if self.get_param('playlist_items') == '0':
+ entries.extend(self.url_result(u, YoutubeTabIE) for u in extra_tabs)
+ else: # Users expect to get all `video_id`s even with `--flat-playlist`. So don't return `url_result`
+ entries.extend(map(self._real_extract, extra_tabs))
+
+ if len(entries) == 1:
+ return entries[0]
+ elif entries:
+ metadata = self._extract_metadata_from_tabs(item_id, data)
+ uploads_url = 'the Uploads (UU) playlist URL'
+ if try_get(metadata, lambda x: x['channel_id'].startswith('UC')):
+ uploads_url = f'https://www.youtube.com/playlist?list=UU{metadata["channel_id"][2:]}'
+ self.to_screen(
+ 'Downloading as multiple playlists, separated by tabs. '
+ f'To download as a single playlist instead, pass {uploads_url}')
+ return self.playlist_result(entries, item_id, **metadata)
+
+ # Inline playlist
+ playlist = traverse_obj(
+ data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
+ if playlist:
+ return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
+
+ video_id = traverse_obj(
+ data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
+ if video_id:
+ if tab != '/live': # live tab is expected to redirect to video
+ self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
+ return self.url_result(f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)
+
+ raise ExtractorError('Unable to recognize tab page')
+
+
+class YoutubePlaylistIE(InfoExtractor):
+ IE_DESC = 'YouTube playlists'
+ _VALID_URL = r'''(?x)(?:
+ (?:https?://)?
+ (?:\w+\.)?
+ (?:
+ (?:
+ youtube(?:kids)?\.com|
+ %(invidious)s
+ )
+ /.*?\?.*?\blist=
+ )?
+ (?P<id>%(playlist_id)s)
+ )''' % {
+ 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
+ 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
+ }
+ IE_NAME = 'youtube:playlist'
+ _TESTS = [{
+ 'note': 'issue #673',
+ 'url': 'PLBB231211A4F62143',
+ 'info_dict': {
+ 'title': '[OLD]Team Fortress 2 (Class-based LP)',
+ 'id': 'PLBB231211A4F62143',
+ 'uploader': 'Wickman',
+ 'uploader_id': '@WickmanVT',
+ 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
+ 'view_count': int,
+ 'uploader_url': 'https://www.youtube.com/@WickmanVT',
+ 'modified_date': r're:\d{8}',
+ 'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
+ 'channel': 'Wickman',
+ 'tags': [],
+ 'channel_url': 'https://www.youtube.com/channel/UCKSpbfbl5kRQpTdL7kMc-1Q',
+ 'availability': 'public',
+ },
+ 'playlist_mincount': 29,
+ }, {
+ 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
+ 'info_dict': {
+ 'title': 'YDL_safe_search',
+ 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
+ },
+ 'playlist_count': 2,
+ 'skip': 'This playlist is private',
+ }, {
+ 'note': 'embedded',
+ 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
+ 'playlist_count': 4,
+ 'info_dict': {
+ 'title': 'JODA15',
+ 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
+ 'uploader': 'milan',
+ 'uploader_id': '@milan5503',
+ 'description': '',
+ 'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
+ 'tags': [],
+ 'modified_date': '20140919',
+ 'view_count': int,
+ 'channel': 'milan',
+ 'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
+ 'uploader_url': 'https://www.youtube.com/@milan5503',
+ 'availability': 'public',
+ },
+ 'expected_warnings': [r'[Uu]navailable videos? (is|are|will be) hidden', 'Retrying', 'Giving up'],
+ }, {
+ 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
+ 'playlist_mincount': 455,
+ 'info_dict': {
+ 'title': '2018 Chinese New Singles (11/6 updated)',
+ 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
+ 'uploader': 'LBK',
+ 'uploader_id': '@music_king',
+ 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
+ 'channel': 'LBK',
+ 'view_count': int,
+ 'channel_url': 'https://www.youtube.com/channel/UC21nz3_MesPLqtDqwdvnoxA',
+ 'tags': [],
+ 'uploader_url': 'https://www.youtube.com/@music_king',
+ 'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
+ 'modified_date': r're:\d{8}',
+ 'availability': 'public',
+ },
+ 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
+ }, {
+ 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
+ 'only_matching': True,
+ }, {
+ # music album playlist
+ 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ if YoutubeTabIE.suitable(url):
+ return False
+ from ..utils import parse_qs
+ qs = parse_qs(url)
+ if qs.get('v', [None])[0]:
+ return False
+ return super().suitable(url)
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
+ url = update_url_query(
+ 'https://www.youtube.com/playlist',
+ parse_qs(url) or {'list': playlist_id})
+ if is_music_url:
+ url = smuggle_url(url, {'is_music_url': True})
+ return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
+
+
+class YoutubeYtBeIE(InfoExtractor):
+ IE_DESC = 'youtu.be'
+ _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
+ _TESTS = [{
+ 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
+ 'info_dict': {
+ 'id': 'yeWKywCrFtk',
+ 'ext': 'mp4',
+ 'title': 'Small Scale Baler and Braiding Rugs',
+ 'uploader': 'Backus-Page House Museum',
+ 'uploader_id': '@backuspagemuseum',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/@backuspagemuseum',
+ 'upload_date': '20161008',
+ 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
+ 'categories': ['Nonprofits & Activism'],
+ 'tags': list,
+ 'like_count': int,
+ 'age_limit': 0,
+ 'playable_in_embed': True,
+ 'thumbnail': r're:^https?://.*\.webp',
+ 'channel': 'Backus-Page House Museum',
+ 'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',
+ 'live_status': 'not_live',
+ 'view_count': int,
+ 'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',
+ 'availability': 'public',
+ 'duration': 59,
+ 'comment_count': int,
+ 'channel_follower_count': int
+ },
+ 'params': {
+ 'noplaylist': True,
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id')
+ playlist_id = mobj.group('playlist_id')
+ return self.url_result(
+ update_url_query('https://www.youtube.com/watch', {
+ 'v': video_id,
+ 'list': playlist_id,
+ 'feature': 'youtu.be',
+ }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
+
+
+class YoutubeLivestreamEmbedIE(InfoExtractor):
+ IE_DESC = 'YouTube livestream embeds'
+ _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'
+ _TESTS = [{
+ 'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+ return self.url_result(
+ f'https://www.youtube.com/channel/{channel_id}/live',
+ ie=YoutubeTabIE.ie_key(), video_id=channel_id)
+
+
+class YoutubeYtUserIE(InfoExtractor):
+ IE_DESC = 'YouTube user videos; "ytuser:" prefix'
+ IE_NAME = 'youtube:user'
+ _VALID_URL = r'ytuser:(?P<id>.+)'
+ _TESTS = [{
+ 'url': 'ytuser:phihag',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ user_id = self._match_id(url)
+ return self.url_result(f'https://www.youtube.com/user/{user_id}', YoutubeTabIE, user_id)
+
+
+class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
+ IE_NAME = 'youtube:favorites'
+ IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
+ _VALID_URL = r':ytfav(?:ou?rite)?s?'
+ _LOGIN_REQUIRED = True
+ _TESTS = [{
+ 'url': ':ytfav',
+ 'only_matching': True,
+ }, {
+ 'url': ':ytfavorites',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ return self.url_result(
+ 'https://www.youtube.com/playlist?list=LL',
+ ie=YoutubeTabIE.ie_key())
+
+
+class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):
+ IE_NAME = 'youtube:notif'
+ IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'
+ _VALID_URL = r':ytnotif(?:ication)?s?'
+ _LOGIN_REQUIRED = True
+ _TESTS = [{
+ 'url': ':ytnotif',
+ 'only_matching': True,
+ }, {
+ 'url': ':ytnotifications',
+ 'only_matching': True,
+ }]
+
+ def _extract_notification_menu(self, response, continuation_list):
+ notification_list = traverse_obj(
+ response,
+ ('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),
+ ('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),
+ expected_type=list) or []
+ continuation_list[0] = None
+ for item in notification_list:
+ entry = self._extract_notification_renderer(item.get('notificationRenderer'))
+ if entry:
+ yield entry
+ continuation = item.get('continuationItemRenderer')
+ if continuation:
+ continuation_list[0] = continuation
+
+ def _extract_notification_renderer(self, notification):
+ video_id = traverse_obj(
+ notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)
+ url = f'https://www.youtube.com/watch?v={video_id}'
+ channel_id = None
+ if not video_id:
+ browse_ep = traverse_obj(
+ notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)
+ channel_id = self.ucid_or_none(traverse_obj(browse_ep, 'browseId', expected_type=str))
+ post_id = self._search_regex(
+ r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),
+ 'post id', default=None)
+ if not channel_id or not post_id:
+ return
+ # The direct /post url redirects to this in the browser
+ url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'
+
+ channel = traverse_obj(
+ notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),
+ expected_type=str)
+ notification_title = self._get_text(notification, 'shortMessage')
+ if notification_title:
+ notification_title = notification_title.replace('\xad', '') # remove soft hyphens
+ # TODO: handle recommended videos
+ title = self._search_regex(
+ rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,
+ 'video title', default=None)
+ timestamp = (self._parse_time_text(self._get_text(notification, 'sentTimeText'))
+ if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)
+ else None)
+ return {
+ '_type': 'url',
+ 'url': url,
+ 'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),
+ 'video_id': video_id,
+ 'title': title,
+ 'channel_id': channel_id,
+ 'channel': channel,
+ 'uploader': channel,
+ 'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),
+ 'timestamp': timestamp,
+ }
+
+ def _notification_menu_entries(self, ytcfg):
+ continuation_list = [None]
+ response = None
+ for page in itertools.count(1):
+ ctoken = traverse_obj(
+ continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)
+ response = self._extract_response(
+ item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,
+ ep='notification/get_notification_menu', check_get_keys='actions',
+ headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))
+ yield from self._extract_notification_menu(response, continuation_list)
+ if not continuation_list[0]:
+ break
+
+ def _real_extract(self, url):
+ display_id = 'notifications'
+ ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}
+ self._report_playlist_authcheck(ytcfg)
+ return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)
+
+
+class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
+ IE_DESC = 'YouTube search'
+ IE_NAME = 'youtube:search'
+ _SEARCH_KEY = 'ytsearch'
+ _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
+ _TESTS = [{
+ 'url': 'ytsearch5:youtube-dl test video',
+ 'playlist_count': 5,
+ 'info_dict': {
+ 'id': 'youtube-dl test video',
+ 'title': 'youtube-dl test video',
+ }
+ }]
+
+
+class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
+ IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
+ _SEARCH_KEY = 'ytsearchdate'
+ IE_DESC = 'YouTube search, newest videos first'
+ _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
+ _TESTS = [{
+ 'url': 'ytsearchdate5:youtube-dl test video',
+ 'playlist_count': 5,
+ 'info_dict': {
+ 'id': 'youtube-dl test video',
+ 'title': 'youtube-dl test video',
+ }
+ }]
+
+
+class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
+ IE_DESC = 'YouTube search URLs with sorting and filter support'
+ IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
+ _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
+ _TESTS = [{
+ 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
+ 'playlist_mincount': 5,
+ 'info_dict': {
+ 'id': 'youtube-dl test video',
+ 'title': 'youtube-dl test video',
+ }
+ }, {
+ 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
+ 'playlist_mincount': 5,
+ 'info_dict': {
+ 'id': 'python',
+ 'title': 'python',
+ }
+ }, {
+ 'url': 'https://www.youtube.com/results?search_query=%23cats',
+ 'playlist_mincount': 1,
+ 'info_dict': {
+ 'id': '#cats',
+ 'title': '#cats',
+ # The test suite does not have support for nested playlists
+ # 'entries': [{
+ # 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',
+ # 'title': '#cats',
+ # }],
+ },
+ }, {
+ # Channel results
+ 'url': 'https://www.youtube.com/results?search_query=kurzgesagt&sp=EgIQAg%253D%253D',
+ 'info_dict': {
+ 'id': 'kurzgesagt',
+ 'title': 'kurzgesagt',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ '_type': 'url',
+ 'id': 'UCsXVk37bltHxD1rDPwtNM8Q',
+ 'url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
+ 'ie_key': 'YoutubeTab',
+ 'channel': 'Kurzgesagt – In a Nutshell',
+ 'description': 'md5:4ae48dfa9505ffc307dad26342d06bfc',
+ 'title': 'Kurzgesagt – In a Nutshell',
+ 'channel_id': 'UCsXVk37bltHxD1rDPwtNM8Q',
+ # No longer available for search as it is set to the handle.
+ # 'playlist_count': int,
+ 'channel_url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
+ 'thumbnails': list,
+ 'uploader_id': '@kurzgesagt',
+ 'uploader_url': 'https://www.youtube.com/@kurzgesagt',
+ 'uploader': 'Kurzgesagt – In a Nutshell',
+ 'channel_is_verified': True,
+ 'channel_follower_count': int,
+ }
+ }],
+ 'params': {'extract_flat': True, 'playlist_items': '1'},
+ 'playlist_mincount': 1,
+ }, {
+ 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ qs = parse_qs(url)
+ query = (qs.get('search_query') or qs.get('q'))[0]
+ return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
+
+
+class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
+ IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'
+ IE_NAME = 'youtube:music:search_url'
+ _VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
+ _TESTS = [{
+ 'url': 'https://music.youtube.com/search?q=royalty+free+music',
+ 'playlist_count': 16,
+ 'info_dict': {
+ 'id': 'royalty free music',
+ 'title': 'royalty free music',
+ }
+ }, {
+ 'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',
+ 'playlist_mincount': 30,
+ 'info_dict': {
+ 'id': 'royalty free music - songs',
+ 'title': 'royalty free music - songs',
+ },
+ 'params': {'extract_flat': 'in_playlist'}
+ }, {
+ 'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',
+ 'playlist_mincount': 30,
+ 'info_dict': {
+ 'id': 'royalty free music - community playlists',
+ 'title': 'royalty free music - community playlists',
+ },
+ 'params': {'extract_flat': 'in_playlist'}
+ }]
+
+ _SECTIONS = {
+ 'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',
+ 'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',
+ 'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',
+ 'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',
+ 'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',
+ 'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',
+ }
+
+ def _real_extract(self, url):
+ qs = parse_qs(url)
+ query = (qs.get('search_query') or qs.get('q'))[0]
+ params = qs.get('sp', (None,))[0]
+ if params:
+ section = next((k for k, v in self._SECTIONS.items() if v == params), params)
+ else:
+ section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower()
+ params = self._SECTIONS.get(section)
+ if not params:
+ section = None
+ title = join_nonempty(query, section, delim=' - ')
+ return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)
+
+
+class YoutubeFeedsInfoExtractor(InfoExtractor):
+ """
+ Base class for feed extractors
+ Subclasses must re-define the _FEED_NAME property.
+ """
+ _LOGIN_REQUIRED = True
+ _FEED_NAME = 'feeds'
+
+ def _real_initialize(self):
+ YoutubeBaseInfoExtractor._check_login_required(self)
+
+ @classproperty
+ def IE_NAME(self):
+ return f'youtube:{self._FEED_NAME}'
+
+ def _real_extract(self, url):
+ return self.url_result(
+ f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())
+
+
+class YoutubeWatchLaterIE(InfoExtractor):
+ IE_NAME = 'youtube:watchlater'
+ IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
+ _VALID_URL = r':ytwatchlater'
+ _TESTS = [{
+ 'url': ':ytwatchlater',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ return self.url_result(
+ 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
+
+
+class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
+ IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
+ _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
+ _FEED_NAME = 'recommended'
+ _LOGIN_REQUIRED = False
+ _TESTS = [{
+ 'url': ':ytrec',
+ 'only_matching': True,
+ }, {
+ 'url': ':ytrecommended',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://youtube.com',
+ 'only_matching': True,
+ }]
+
+
+class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
+ IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
+ _VALID_URL = r':ytsub(?:scription)?s?'
+ _FEED_NAME = 'subscriptions'
+ _TESTS = [{
+ 'url': ':ytsubs',
+ 'only_matching': True,
+ }, {
+ 'url': ':ytsubscriptions',
+ 'only_matching': True,
+ }]
+
+
+class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
+ IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
+ _VALID_URL = r':ythis(?:tory)?'
+ _FEED_NAME = 'history'
+ _TESTS = [{
+ 'url': ':ythistory',
+ 'only_matching': True,
+ }]
+
+
+class YoutubeShortsAudioPivotIE(InfoExtractor):
+ IE_DESC = 'YouTube Shorts audio pivot (Shorts using audio of a given video)'
+ IE_NAME = 'youtube:shorts:pivot:audio'
+ _VALID_URL = r'https?://(?:www\.)?youtube\.com/source/(?P<id>[\w-]{11})/shorts'
+ _TESTS = [{
+ 'url': 'https://www.youtube.com/source/Lyj-MZSAA9o/shorts',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _generate_audio_pivot_params(video_id):
+ """
+ Generates sfv_audio_pivot browse params for this video id
+ """
+ pb_params = b'\xf2\x05+\n)\x12\'\n\x0b%b\x12\x0b%b\x1a\x0b%b' % ((video_id.encode(),) * 3)
+ return urllib.parse.quote(base64.b64encode(pb_params).decode())
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self.url_result(
+ f'https://www.youtube.com/feed/sfv_audio_pivot?bp={self._generate_audio_pivot_params(video_id)}',
+ ie=YoutubeTabIE)
+
+
+class YoutubeTruncatedURLIE(InfoExtractor):
+ IE_NAME = 'youtube:truncated_url'
+ IE_DESC = False # Do not list
+ _VALID_URL = r'''(?x)
+ (?:https?://)?
+ (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
+ (?:watch\?(?:
+ feature=[a-z_]+|
+ annotation_id=annotation_[^&]+|
+ x-yt-cl=[0-9]+|
+ hl=[^&]*|
+ t=[0-9]+
+ )?
+ |
+ attribution_link\?a=[^&]+
+ )
+ $
+ '''
+
+ _TESTS = [{
+ 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/watch?',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/watch?feature=foo',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/watch?hl=en-GB',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/watch?t=2372',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ raise ExtractorError(
+ 'Did you forget to quote the URL? Remember that & is a meta '
+ 'character in most shells, so you want to put the URL in quotes, '
+ 'like youtube-dl '
+ '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
+ ' or simply youtube-dl BaW_jenozKc .',
+ expected=True)
+
+
+class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
+ IE_NAME = 'youtube:clip'
+ _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'
+ _TESTS = [{
+ # FIXME: Other metadata should be extracted from the clip, not from the base video
+ 'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',
+ 'info_dict': {
+ 'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',
+ 'ext': 'mp4',
+ 'section_start': 29.0,
+ 'section_end': 39.7,
+ 'duration': 10.7,
+ 'age_limit': 0,
+ 'availability': 'public',
+ 'categories': ['Gaming'],
+ 'channel': 'Scott The Woz',
+ 'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',
+ 'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',
+ 'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',
+ 'like_count': int,
+ 'playable_in_embed': True,
+ 'tags': 'count:17',
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',
+ 'title': 'Mobile Games on Console - Scott The Woz',
+ 'upload_date': '20210920',
+ 'uploader': 'Scott The Woz',
+ 'uploader_id': '@ScottTheWoz',
+ 'uploader_url': 'https://www.youtube.com/@ScottTheWoz',
+ 'view_count': int,
+ 'live_status': 'not_live',
+ 'channel_follower_count': int,
+ 'chapters': 'count:20',
+ 'comment_count': int,
+ 'heatmap': 'count:100',
+ }
+ }]
+
+ def _real_extract(self, url):
+ clip_id = self._match_id(url)
+ _, data = self._extract_webpage(url, clip_id)
+
+ video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))
+ if not video_id:
+ raise ExtractorError('Unable to find video ID')
+
+ clip_data = traverse_obj(data, (
+ 'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',
+ 'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,
+ 'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',
+ 'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)
+
+ return {
+ '_type': 'url_transparent',
+ 'url': f'https://www.youtube.com/watch?v={video_id}',
+ 'ie_key': YoutubeIE.ie_key(),
+ 'id': clip_id,
+ 'section_start': int(clip_data['startTimeMs']) / 1000,
+ 'section_end': int(clip_data['endTimeMs']) / 1000,
+ }
+
+
+class YoutubeConsentRedirectIE(YoutubeBaseInfoExtractor):
+ IE_NAME = 'youtube:consent'
+ IE_DESC = False # Do not list
+ _VALID_URL = r'https?://consent\.youtube\.com/m\?'
+ _TESTS = [{
+ 'url': 'https://consent.youtube.com/m?continue=https%3A%2F%2Fwww.youtube.com%2Flive%2FqVv6vCqciTM%3Fcbrd%3D1&gl=NL&m=0&pc=yt&hl=en&src=1',
+ 'info_dict': {
+ 'id': 'qVv6vCqciTM',
+ 'ext': 'mp4',
+ 'age_limit': 0,
+ 'uploader_id': '@sana_natori',
+ 'comment_count': int,
+ 'chapters': 'count:13',
+ 'upload_date': '20221223',
+ 'thumbnail': 'https://i.ytimg.com/vi/qVv6vCqciTM/maxresdefault.jpg',
+ 'channel_url': 'https://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',
+ 'uploader_url': 'https://www.youtube.com/@sana_natori',
+ 'like_count': int,
+ 'release_date': '20221223',
+ 'tags': ['Vtuber', '月ノ美兎', '名取さな', 'にじさんじ', 'クリスマス', '3D配信'],
+ 'title': '【 #インターネット女クリスマス 】3Dで歌ってはしゃぐインターネットの女たち【月ノ美兎/名取さな】',
+ 'view_count': int,
+ 'playable_in_embed': True,
+ 'duration': 4438,
+ 'availability': 'public',
+ 'channel_follower_count': int,
+ 'channel_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',
+ 'categories': ['Entertainment'],
+ 'live_status': 'was_live',
+ 'release_timestamp': 1671793345,
+ 'channel': 'さなちゃんねる',
+ 'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',
+ 'uploader': 'さなちゃんねる',
+ 'channel_is_verified': True,
+ 'heatmap': 'count:100',
+ },
+ 'add_ie': ['Youtube'],
+ 'params': {'skip_download': 'Youtube'},
+ }]
+
+ def _real_extract(self, url):
+ redirect_url = url_or_none(parse_qs(url).get('continue', [None])[-1])
+ if not redirect_url:
+ raise ExtractorError('Invalid cookie consent redirect URL', expected=True)
+ return self.url_result(redirect_url)
+
+
+class YoutubeTruncatedIDIE(InfoExtractor):
+ IE_NAME = 'youtube:truncated_id'
+ IE_DESC = False # Do not list
+ _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
+
+ _TESTS = [{
+ 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ raise ExtractorError(
+ f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',
+ expected=True)
diff --git a/yt_dlp/extractor/zaiko.py b/yt_dlp/extractor/zaiko.py
new file mode 100644
index 0000000..2b6221d
--- /dev/null
+++ b/yt_dlp/extractor/zaiko.py
@@ -0,0 +1,139 @@
+import base64
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ extract_attributes,
+ int_or_none,
+ str_or_none,
+ traverse_obj,
+ try_call,
+ unescapeHTML,
+ url_basename,
+ url_or_none,
+)
+
+
+class ZaikoBaseIE(InfoExtractor):
+ def _download_real_webpage(self, url, video_id):
+ webpage, urlh = self._download_webpage_handle(url, video_id)
+ final_url = urlh.url
+ if 'zaiko.io/login' in final_url:
+ self.raise_login_required()
+ elif '/_buy/' in final_url:
+ raise ExtractorError('Your account does not have tickets to this event', expected=True)
+ return webpage
+
+ def _parse_vue_element_attr(self, name, string, video_id):
+ page_elem = self._search_regex(rf'(<{name}[^>]+>)', string, name)
+ attrs = {}
+ for key, value in extract_attributes(page_elem).items():
+ if key.startswith(':'):
+ attrs[key[1:]] = self._parse_json(
+ value, video_id, transform_source=unescapeHTML, fatal=False)
+ return attrs
+
+
+class ZaikoIE(ZaikoBaseIE):
+ _VALID_URL = r'https?://(?:[\w-]+\.)?zaiko\.io/event/(?P<id>\d+)/stream(?:/\d+)+'
+ _TESTS = [{
+ 'url': 'https://zaiko.io/event/324868/stream/20571/20571',
+ 'info_dict': {
+ 'id': '324868',
+ 'ext': 'mp4',
+ 'title': 'ZAIKO STREAMING TEST',
+ 'alt_title': '[VOD] ZAIKO STREAMING TEST_20210603(Do Not Delete)',
+ 'uploader_id': '454',
+ 'uploader': 'ZAIKO ZERO',
+ 'release_timestamp': 1583809200,
+ 'thumbnail': r're:^https://[\w.-]+/\w+/\w+',
+ 'thumbnails': 'maxcount:2',
+ 'release_date': '20200310',
+ 'categories': ['Tech House'],
+ 'live_status': 'was_live',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ 'skip': 'Your account does not have tickets to this event',
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_real_webpage(url, video_id)
+ stream_meta = self._parse_vue_element_attr('stream-page', webpage, video_id)
+
+ player_page = self._download_webpage(
+ stream_meta['stream-access']['video_source'], video_id,
+ 'Downloading player page', headers={'referer': 'https://zaiko.io/'})
+ player_meta = self._parse_vue_element_attr('player', player_page, video_id)
+ status = traverse_obj(player_meta, ('initial_event_info', 'status', {str}))
+ live_status, msg, expected = {
+ 'vod': ('was_live', 'No VOD stream URL was found', False),
+ 'archiving': ('post_live', 'Event VOD is still being processed', True),
+ 'deleting': ('post_live', 'This event has ended', True),
+ 'deleted': ('post_live', 'This event has ended', True),
+ 'error': ('post_live', 'This event has ended', True),
+ 'disconnected': ('post_live', 'Stream has been disconnected', True),
+ 'live_to_disconnected': ('post_live', 'Stream has been disconnected', True),
+ 'live': ('is_live', 'No livestream URL found was found', False),
+ 'waiting': ('is_upcoming', 'Live event has not yet started', True),
+ 'cancelled': ('not_live', 'Event has been cancelled', True),
+ }.get(status) or ('not_live', f'Unknown event status "{status}"', False)
+
+ stream_url = traverse_obj(player_meta, ('initial_event_info', 'endpoint', {url_or_none}))
+ formats = self._extract_m3u8_formats(
+ stream_url, video_id, live=True, fatal=False) if stream_url else []
+ if not formats:
+ self.raise_no_formats(msg, expected=expected)
+
+ thumbnail_urls = [
+ traverse_obj(player_meta, ('initial_event_info', 'poster_url')),
+ self._og_search_thumbnail(self._download_webpage(
+ f'https://zaiko.io/event/{video_id}', video_id, 'Downloading event page', fatal=False) or ''),
+ ]
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'live_status': live_status,
+ **traverse_obj(stream_meta, {
+ 'title': ('event', 'name', {str}),
+ 'uploader': ('profile', 'name', {str}),
+ 'uploader_id': ('profile', 'id', {str_or_none}),
+ 'release_timestamp': ('stream', 'start', 'timestamp', {int_or_none}),
+ 'categories': ('event', 'genres', ..., {lambda x: x or None}),
+ }),
+ **traverse_obj(player_meta, ('initial_event_info', {
+ 'alt_title': ('title', {str}),
+ })),
+ 'thumbnails': [{'url': url, 'id': url_basename(url)} for url in thumbnail_urls if url_or_none(url)]
+ }
+
+
+class ZaikoETicketIE(ZaikoBaseIE):
+ _VALID_URL = r'https?://(?:www.)?zaiko\.io/account/eticket/(?P<id>[\w=-]{49})'
+ _TESTS = [{
+ 'url': 'https://zaiko.io/account/eticket/TZjMwMzQ2Y2EzMXwyMDIzMDYwNzEyMTMyNXw1MDViOWU2Mw==',
+ 'playlist_count': 1,
+ 'info_dict': {
+ 'id': 'f30346ca31-20230607121325-505b9e63',
+ 'title': 'ZAIKO STREAMING TEST',
+ 'thumbnail': 'https://media.zkocdn.net/pf_1/1_3wdyjcjyupseatkwid34u',
+ },
+ 'skip': 'Only available with the ticketholding account',
+ }]
+
+ def _real_extract(self, url):
+ ticket_id = self._match_id(url)
+ ticket_id = try_call(
+ lambda: base64.urlsafe_b64decode(ticket_id[1:]).decode().replace('|', '-')) or ticket_id
+
+ webpage = self._download_real_webpage(url, ticket_id)
+ eticket = self._parse_vue_element_attr('eticket', webpage, ticket_id)
+
+ return self.playlist_result(
+ [self.url_result(stream, ZaikoIE) for stream in traverse_obj(eticket, ('streams', ..., 'url'))],
+ ticket_id, **traverse_obj(eticket, ('ticket-details', {
+ 'title': 'event_name',
+ 'thumbnail': 'event_img_url',
+ })))
diff --git a/yt_dlp/extractor/zapiks.py b/yt_dlp/extractor/zapiks.py
new file mode 100644
index 0000000..88f526b
--- /dev/null
+++ b/yt_dlp/extractor/zapiks.py
@@ -0,0 +1,106 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ parse_duration,
+ parse_iso8601,
+ xpath_with_ns,
+ xpath_text,
+ int_or_none,
+)
+
+
+class ZapiksIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?zapiks\.(?:fr|com)/(?:(?:[a-z]{2}/)?(?P<display_id>.+?)\.html|index\.php\?.*\bmedia_id=(?P<id>\d+))'
+ _EMBED_REGEX = [r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"']
+ _TESTS = [
+ {
+ 'url': 'http://www.zapiks.fr/ep2s3-bon-appetit-eh-be-viva.html',
+ 'md5': 'aeb3c473b2d564b2d46d664d28d5f050',
+ 'info_dict': {
+ 'id': '80798',
+ 'ext': 'mp4',
+ 'title': 'EP2S3 - Bon Appétit - Eh bé viva les pyrénées con!',
+ 'description': 'md5:7054d6f6f620c6519be1fe710d4da847',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 528,
+ 'timestamp': 1359044972,
+ 'upload_date': '20130124',
+ 'view_count': int,
+ },
+ },
+ {
+ 'url': 'http://www.zapiks.com/ep3s5-bon-appetit-baqueira-m-1.html',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://www.zapiks.com/nl/ep3s5-bon-appetit-baqueira-m-1.html',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://www.zapiks.fr/index.php?action=playerIframe&amp;media_id=118046&amp;width=640&amp;height=360&amp;autoStart=false&amp;language=fr',
+ 'only_matching': True,
+ },
+ ]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id') or video_id
+
+ webpage = self._download_webpage(url, display_id)
+
+ if not video_id:
+ video_id = self._search_regex(
+ r'data-media-id="(\d+)"', webpage, 'video id')
+
+ playlist = self._download_xml(
+ 'http://www.zapiks.fr/view/index.php?action=playlist&media_id=%s&lang=en' % video_id,
+ display_id)
+
+ NS_MAP = {
+ 'jwplayer': 'http://rss.jwpcdn.com/'
+ }
+
+ def ns(path):
+ return xpath_with_ns(path, NS_MAP)
+
+ item = playlist.find('./channel/item')
+
+ title = xpath_text(item, 'title', 'title') or self._og_search_title(webpage)
+ description = self._og_search_description(webpage, default=None)
+ thumbnail = xpath_text(
+ item, ns('./jwplayer:image'), 'thumbnail') or self._og_search_thumbnail(webpage, default=None)
+ duration = parse_duration(self._html_search_meta(
+ 'duration', webpage, 'duration', default=None))
+ timestamp = parse_iso8601(self._html_search_meta(
+ 'uploadDate', webpage, 'upload date', default=None), ' ')
+
+ view_count = int_or_none(self._search_regex(
+ r'UserPlays:(\d+)', webpage, 'view count', default=None))
+ comment_count = int_or_none(self._search_regex(
+ r'UserComments:(\d+)', webpage, 'comment count', default=None))
+
+ formats = []
+ for source in item.findall(ns('./jwplayer:source')):
+ format_id = source.attrib['label']
+ f = {
+ 'url': source.attrib['file'],
+ 'format_id': format_id,
+ }
+ m = re.search(r'^(?P<height>\d+)[pP]', format_id)
+ if m:
+ f['height'] = int(m.group('height'))
+ formats.append(f)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'view_count': view_count,
+ 'comment_count': comment_count,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/zattoo.py b/yt_dlp/extractor/zattoo.py
new file mode 100644
index 0000000..6bd9ea0
--- /dev/null
+++ b/yt_dlp/extractor/zattoo.py
@@ -0,0 +1,865 @@
+import re
+from uuid import uuid4
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ join_nonempty,
+ try_get,
+ url_or_none,
+ urlencode_postdata,
+)
+
+
+class ZattooPlatformBaseIE(InfoExtractor):
+ _power_guide_hash = None
+
+ def _host_url(self):
+ return 'https://%s' % (self._API_HOST if hasattr(self, '_API_HOST') else self._HOST)
+
+ def _real_initialize(self):
+ if not self._power_guide_hash:
+ self.raise_login_required('An account is needed to access this media', method='password')
+
+ def _perform_login(self, username, password):
+ try:
+ data = self._download_json(
+ '%s/zapi/v2/account/login' % self._host_url(), None, 'Logging in',
+ data=urlencode_postdata({
+ 'login': username,
+ 'password': password,
+ 'remember': 'true',
+ }), headers={
+ 'Referer': '%s/login' % self._host_url(),
+ 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
+ })
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 400:
+ raise ExtractorError(
+ 'Unable to login: incorrect username and/or password',
+ expected=True)
+ raise
+
+ self._power_guide_hash = data['session']['power_guide_hash']
+
+ def _initialize_pre_login(self):
+ session_token = self._download_json(
+ f'{self._host_url()}/token.json', None, 'Downloading session token')['session_token']
+
+ # Will setup appropriate cookies
+ self._request_webpage(
+ '%s/zapi/v3/session/hello' % self._host_url(), None,
+ 'Opening session', data=urlencode_postdata({
+ 'uuid': compat_str(uuid4()),
+ 'lang': 'en',
+ 'app_version': '1.8.2',
+ 'format': 'json',
+ 'client_app_token': session_token,
+ }))
+
+ def _extract_video_id_from_recording(self, recid):
+ playlist = self._download_json(
+ f'{self._host_url()}/zapi/v2/playlist', recid, 'Downloading playlist')
+ try:
+ return next(
+ str(item['program_id']) for item in playlist['recordings']
+ if item.get('program_id') and str(item.get('id')) == recid)
+ except (StopIteration, KeyError):
+ raise ExtractorError('Could not extract video id from recording')
+
+ def _extract_cid(self, video_id, channel_name):
+ channel_groups = self._download_json(
+ '%s/zapi/v2/cached/channels/%s' % (self._host_url(),
+ self._power_guide_hash),
+ video_id, 'Downloading channel list',
+ query={'details': False})['channel_groups']
+ channel_list = []
+ for chgrp in channel_groups:
+ channel_list.extend(chgrp['channels'])
+ try:
+ return next(
+ chan['cid'] for chan in channel_list
+ if chan.get('cid') and (
+ chan.get('display_alias') == channel_name
+ or chan.get('cid') == channel_name))
+ except StopIteration:
+ raise ExtractorError('Could not extract channel id')
+
+ def _extract_cid_and_video_info(self, video_id):
+ data = self._download_json(
+ '%s/zapi/v2/cached/program/power_details/%s' % (
+ self._host_url(), self._power_guide_hash),
+ video_id,
+ 'Downloading video information',
+ query={
+ 'program_ids': video_id,
+ 'complete': True,
+ })
+
+ p = data['programs'][0]
+ cid = p['cid']
+
+ info_dict = {
+ 'id': video_id,
+ 'title': p.get('t') or p['et'],
+ 'description': p.get('d'),
+ 'thumbnail': p.get('i_url'),
+ 'creator': p.get('channel_name'),
+ 'episode': p.get('et'),
+ 'episode_number': int_or_none(p.get('e_no')),
+ 'season_number': int_or_none(p.get('s_no')),
+ 'release_year': int_or_none(p.get('year')),
+ 'categories': try_get(p, lambda x: x['c'], list),
+ 'tags': try_get(p, lambda x: x['g'], list)
+ }
+
+ return cid, info_dict
+
+ def _extract_ondemand_info(self, ondemand_id):
+ """
+ @returns (ondemand_token, ondemand_type, info_dict)
+ """
+ data = self._download_json(
+ '%s/zapi/vod/movies/%s' % (self._host_url(), ondemand_id),
+ ondemand_id, 'Downloading ondemand information')
+ info_dict = {
+ 'id': ondemand_id,
+ 'title': data.get('title'),
+ 'description': data.get('description'),
+ 'duration': int_or_none(data.get('duration')),
+ 'release_year': int_or_none(data.get('year')),
+ 'episode_number': int_or_none(data.get('episode_number')),
+ 'season_number': int_or_none(data.get('season_number')),
+ 'categories': try_get(data, lambda x: x['categories'], list),
+ }
+ return data['terms_catalog'][0]['terms'][0]['token'], data['type'], info_dict
+
+ def _extract_formats(self, cid, video_id, record_id=None, ondemand_id=None, ondemand_termtoken=None, ondemand_type=None, is_live=False):
+ postdata_common = {
+ 'https_watch_urls': True,
+ }
+
+ if is_live:
+ postdata_common.update({'timeshift': 10800})
+ url = '%s/zapi/watch/live/%s' % (self._host_url(), cid)
+ elif record_id:
+ url = '%s/zapi/watch/recording/%s' % (self._host_url(), record_id)
+ elif ondemand_id:
+ postdata_common.update({
+ 'teasable_id': ondemand_id,
+ 'term_token': ondemand_termtoken,
+ 'teasable_type': ondemand_type
+ })
+ url = '%s/zapi/watch/vod/video' % self._host_url()
+ else:
+ url = '%s/zapi/v3/watch/replay/%s/%s' % (self._host_url(), cid, video_id)
+ formats = []
+ subtitles = {}
+ for stream_type in ('dash', 'hls7'):
+ postdata = postdata_common.copy()
+ postdata['stream_type'] = stream_type
+
+ data = self._download_json(
+ url, video_id, 'Downloading %s formats' % stream_type.upper(),
+ data=urlencode_postdata(postdata), fatal=False)
+ if not data:
+ continue
+
+ watch_urls = try_get(
+ data, lambda x: x['stream']['watch_urls'], list)
+ if not watch_urls:
+ continue
+
+ for watch in watch_urls:
+ if not isinstance(watch, dict):
+ continue
+ watch_url = url_or_none(watch.get('url'))
+ if not watch_url:
+ continue
+ audio_channel = watch.get('audio_channel')
+ preference = 1 if audio_channel == 'A' else None
+ format_id = join_nonempty(stream_type, watch.get('maxrate'), audio_channel)
+ if stream_type.startswith('dash'):
+ this_formats, subs = self._extract_mpd_formats_and_subtitles(
+ watch_url, video_id, mpd_id=format_id, fatal=False)
+ self._merge_subtitles(subs, target=subtitles)
+ elif stream_type.startswith('hls'):
+ this_formats, subs = self._extract_m3u8_formats_and_subtitles(
+ watch_url, video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id=format_id,
+ fatal=False)
+ self._merge_subtitles(subs, target=subtitles)
+ elif stream_type == 'hds':
+ this_formats = self._extract_f4m_formats(
+ watch_url, video_id, f4m_id=format_id, fatal=False)
+ elif stream_type == 'smooth_playready':
+ this_formats = self._extract_ism_formats(
+ watch_url, video_id, ism_id=format_id, fatal=False)
+ else:
+ assert False
+ for this_format in this_formats:
+ this_format['quality'] = preference
+ formats.extend(this_formats)
+ return formats, subtitles
+
+ def _extract_video(self, video_id, record_id=None):
+ cid, info_dict = self._extract_cid_and_video_info(video_id)
+ info_dict['formats'], info_dict['subtitles'] = self._extract_formats(cid, video_id, record_id=record_id)
+ return info_dict
+
+ def _extract_live(self, channel_name):
+ cid = self._extract_cid(channel_name, channel_name)
+ formats, subtitles = self._extract_formats(cid, cid, is_live=True)
+ return {
+ 'id': channel_name,
+ 'title': channel_name,
+ 'is_live': True,
+ 'formats': formats,
+ 'subtitles': subtitles
+ }
+
+ def _extract_record(self, record_id):
+ video_id = self._extract_video_id_from_recording(record_id)
+ cid, info_dict = self._extract_cid_and_video_info(video_id)
+ info_dict['formats'], info_dict['subtitles'] = self._extract_formats(cid, video_id, record_id=record_id)
+ return info_dict
+
+ def _extract_ondemand(self, ondemand_id):
+ ondemand_termtoken, ondemand_type, info_dict = self._extract_ondemand_info(ondemand_id)
+ info_dict['formats'], info_dict['subtitles'] = self._extract_formats(
+ None, ondemand_id, ondemand_id=ondemand_id,
+ ondemand_termtoken=ondemand_termtoken, ondemand_type=ondemand_type)
+ return info_dict
+
+ def _real_extract(self, url):
+ video_id, record_id = self._match_valid_url(url).groups()
+ return getattr(self, f'_extract_{self._TYPE}')(video_id or record_id)
+
+
+def _create_valid_url(host, match, qs, base_re=None):
+ match_base = fr'|{base_re}/(?P<vid1>{match})' if base_re else '(?P<vid1>)'
+ return rf'''(?x)https?://(?:www\.)?{re.escape(host)}/(?:
+ [^?#]+\?(?:[^#]+&)?{qs}=(?P<vid2>{match})
+ {match_base}
+ )'''
+
+
+class ZattooBaseIE(ZattooPlatformBaseIE):
+ _NETRC_MACHINE = 'zattoo'
+ _HOST = 'zattoo.com'
+
+
+class ZattooIE(ZattooBaseIE):
+ _VALID_URL = _create_valid_url(ZattooBaseIE._HOST, r'\d+', 'program', '(?:program|watch)/[^/]+')
+ _TYPE = 'video'
+ _TESTS = [{
+ 'url': 'https://zattoo.com/program/zdf/250170418',
+ 'info_dict': {
+ 'id': '250170418',
+ 'ext': 'mp4',
+ 'title': 'Markus Lanz',
+ 'description': 'md5:e41cb1257de008ca62a73bb876ffa7fc',
+ 'thumbnail': 're:http://images.zattic.com/cms/.+/format_480x360.jpg',
+ 'creator': 'ZDF HD',
+ 'release_year': 2022,
+ 'episode': 'Folge 1655',
+ 'categories': 'count:1',
+ 'tags': 'count:2'
+ },
+ 'params': {'skip_download': 'm3u8'}
+ }, {
+ 'url': 'https://zattoo.com/program/daserste/210177916',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://zattoo.com/guide/german?channel=srf1&program=169860555',
+ 'only_matching': True,
+ }]
+
+
+class ZattooLiveIE(ZattooBaseIE):
+ _VALID_URL = _create_valid_url(ZattooBaseIE._HOST, r'[^/?&#]+', 'channel', 'live')
+ _TYPE = 'live'
+ _TESTS = [{
+ 'url': 'https://zattoo.com/channels/german?channel=srf_zwei',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://zattoo.com/live/srf1',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if ZattooIE.suitable(url) else super().suitable(url)
+
+
+class ZattooMoviesIE(ZattooBaseIE):
+ _VALID_URL = _create_valid_url(ZattooBaseIE._HOST, r'\w+', 'movie_id', 'vod/movies')
+ _TYPE = 'ondemand'
+ _TESTS = [{
+ 'url': 'https://zattoo.com/vod/movies/7521',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://zattoo.com/ondemand?movie_id=7521&term_token=9f00f43183269484edde',
+ 'only_matching': True,
+ }]
+
+
+class ZattooRecordingsIE(ZattooBaseIE):
+ _VALID_URL = _create_valid_url('zattoo.com', r'\d+', 'recording')
+ _TYPE = 'record'
+ _TESTS = [{
+ 'url': 'https://zattoo.com/recordings?recording=193615508',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://zattoo.com/tc/ptc_recordings_all_recordings?recording=193615420',
+ 'only_matching': True,
+ }]
+
+
+class NetPlusTVBaseIE(ZattooPlatformBaseIE):
+ _NETRC_MACHINE = 'netplus'
+ _HOST = 'netplus.tv'
+ _API_HOST = 'www.%s' % _HOST
+
+
+class NetPlusTVIE(NetPlusTVBaseIE):
+ _VALID_URL = _create_valid_url(NetPlusTVBaseIE._HOST, r'\d+', 'program', '(?:program|watch)/[^/]+')
+ _TYPE = 'video'
+ _TESTS = [{
+ 'url': 'https://netplus.tv/program/daserste/210177916',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://netplus.tv/guide/german?channel=srf1&program=169860555',
+ 'only_matching': True,
+ }]
+
+
+class NetPlusTVLiveIE(NetPlusTVBaseIE):
+ _VALID_URL = _create_valid_url(NetPlusTVBaseIE._HOST, r'[^/?&#]+', 'channel', 'live')
+ _TYPE = 'live'
+ _TESTS = [{
+ 'url': 'https://netplus.tv/channels/german?channel=srf_zwei',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://netplus.tv/live/srf1',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if NetPlusTVIE.suitable(url) else super().suitable(url)
+
+
+class NetPlusTVRecordingsIE(NetPlusTVBaseIE):
+ _VALID_URL = _create_valid_url(NetPlusTVBaseIE._HOST, r'\d+', 'recording')
+ _TYPE = 'record'
+ _TESTS = [{
+ 'url': 'https://netplus.tv/recordings?recording=193615508',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://netplus.tv/tc/ptc_recordings_all_recordings?recording=193615420',
+ 'only_matching': True,
+ }]
+
+
+class MNetTVBaseIE(ZattooPlatformBaseIE):
+ _NETRC_MACHINE = 'mnettv'
+ _HOST = 'tvplus.m-net.de'
+
+
+class MNetTVIE(MNetTVBaseIE):
+ _VALID_URL = _create_valid_url(MNetTVBaseIE._HOST, r'\d+', 'program', '(?:program|watch)/[^/]+')
+ _TYPE = 'video'
+ _TESTS = [{
+ 'url': 'https://tvplus.m-net.de/program/daserste/210177916',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tvplus.m-net.de/guide/german?channel=srf1&program=169860555',
+ 'only_matching': True,
+ }]
+
+
+class MNetTVLiveIE(MNetTVBaseIE):
+ _VALID_URL = _create_valid_url(MNetTVBaseIE._HOST, r'[^/?&#]+', 'channel', 'live')
+ _TYPE = 'live'
+ _TESTS = [{
+ 'url': 'https://tvplus.m-net.de/channels/german?channel=srf_zwei',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tvplus.m-net.de/live/srf1',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if MNetTVIE.suitable(url) else super().suitable(url)
+
+
+class MNetTVRecordingsIE(MNetTVBaseIE):
+ _VALID_URL = _create_valid_url(MNetTVBaseIE._HOST, r'\d+', 'recording')
+ _TYPE = 'record'
+ _TESTS = [{
+ 'url': 'https://tvplus.m-net.de/recordings?recording=193615508',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tvplus.m-net.de/tc/ptc_recordings_all_recordings?recording=193615420',
+ 'only_matching': True,
+ }]
+
+
+class WalyTVBaseIE(ZattooPlatformBaseIE):
+ _NETRC_MACHINE = 'walytv'
+ _HOST = 'player.waly.tv'
+
+
+class WalyTVIE(WalyTVBaseIE):
+ _VALID_URL = _create_valid_url(WalyTVBaseIE._HOST, r'\d+', 'program', '(?:program|watch)/[^/]+')
+ _TYPE = 'video'
+ _TESTS = [{
+ 'url': 'https://player.waly.tv/program/daserste/210177916',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://player.waly.tv/guide/german?channel=srf1&program=169860555',
+ 'only_matching': True,
+ }]
+
+
+class WalyTVLiveIE(WalyTVBaseIE):
+ _VALID_URL = _create_valid_url(WalyTVBaseIE._HOST, r'[^/?&#]+', 'channel', 'live')
+ _TYPE = 'live'
+ _TESTS = [{
+ 'url': 'https://player.waly.tv/channels/german?channel=srf_zwei',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://player.waly.tv/live/srf1',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if WalyTVIE.suitable(url) else super().suitable(url)
+
+
+class WalyTVRecordingsIE(WalyTVBaseIE):
+ _VALID_URL = _create_valid_url(WalyTVBaseIE._HOST, r'\d+', 'recording')
+ _TYPE = 'record'
+ _TESTS = [{
+ 'url': 'https://player.waly.tv/recordings?recording=193615508',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://player.waly.tv/tc/ptc_recordings_all_recordings?recording=193615420',
+ 'only_matching': True,
+ }]
+
+
+class BBVTVBaseIE(ZattooPlatformBaseIE):
+ _NETRC_MACHINE = 'bbvtv'
+ _HOST = 'bbv-tv.net'
+ _API_HOST = 'www.%s' % _HOST
+
+
+class BBVTVIE(BBVTVBaseIE):
+ _VALID_URL = _create_valid_url(BBVTVBaseIE._HOST, r'\d+', 'program', '(?:program|watch)/[^/]+')
+ _TYPE = 'video'
+ _TESTS = [{
+ 'url': 'https://bbv-tv.net/program/daserste/210177916',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://bbv-tv.net/guide/german?channel=srf1&program=169860555',
+ 'only_matching': True,
+ }]
+
+
+class BBVTVLiveIE(BBVTVBaseIE):
+ _VALID_URL = _create_valid_url(BBVTVBaseIE._HOST, r'[^/?&#]+', 'channel', 'live')
+ _TYPE = 'live'
+ _TESTS = [{
+ 'url': 'https://bbv-tv.net/channels/german?channel=srf_zwei',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://bbv-tv.net/live/srf1',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if BBVTVIE.suitable(url) else super().suitable(url)
+
+
+class BBVTVRecordingsIE(BBVTVBaseIE):
+ _VALID_URL = _create_valid_url(BBVTVBaseIE._HOST, r'\d+', 'recording')
+ _TYPE = 'record'
+ _TESTS = [{
+ 'url': 'https://bbv-tv.net/recordings?recording=193615508',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://bbv-tv.net/tc/ptc_recordings_all_recordings?recording=193615420',
+ 'only_matching': True,
+ }]
+
+
+class VTXTVBaseIE(ZattooPlatformBaseIE):
+ _NETRC_MACHINE = 'vtxtv'
+ _HOST = 'vtxtv.ch'
+ _API_HOST = 'www.%s' % _HOST
+
+
+class VTXTVIE(VTXTVBaseIE):
+ _VALID_URL = _create_valid_url(VTXTVBaseIE._HOST, r'\d+', 'program', '(?:program|watch)/[^/]+')
+ _TYPE = 'video'
+ _TESTS = [{
+ 'url': 'https://vtxtv.ch/program/daserste/210177916',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://vtxtv.ch/guide/german?channel=srf1&program=169860555',
+ 'only_matching': True,
+ }]
+
+
+class VTXTVLiveIE(VTXTVBaseIE):
+ _VALID_URL = _create_valid_url(VTXTVBaseIE._HOST, r'[^/?&#]+', 'channel', 'live')
+ _TYPE = 'live'
+ _TESTS = [{
+ 'url': 'https://vtxtv.ch/channels/german?channel=srf_zwei',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://vtxtv.ch/live/srf1',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if VTXTVIE.suitable(url) else super().suitable(url)
+
+
+class VTXTVRecordingsIE(VTXTVBaseIE):
+ _VALID_URL = _create_valid_url(VTXTVBaseIE._HOST, r'\d+', 'recording')
+ _TYPE = 'record'
+ _TESTS = [{
+ 'url': 'https://vtxtv.ch/recordings?recording=193615508',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://vtxtv.ch/tc/ptc_recordings_all_recordings?recording=193615420',
+ 'only_matching': True,
+ }]
+
+
+class GlattvisionTVBaseIE(ZattooPlatformBaseIE):
+ _NETRC_MACHINE = 'glattvisiontv'
+ _HOST = 'iptv.glattvision.ch'
+
+
+class GlattvisionTVIE(GlattvisionTVBaseIE):
+ _VALID_URL = _create_valid_url(GlattvisionTVBaseIE._HOST, r'\d+', 'program', '(?:program|watch)/[^/]+')
+ _TYPE = 'video'
+ _TESTS = [{
+ 'url': 'https://iptv.glattvision.ch/program/daserste/210177916',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://iptv.glattvision.ch/guide/german?channel=srf1&program=169860555',
+ 'only_matching': True,
+ }]
+
+
+class GlattvisionTVLiveIE(GlattvisionTVBaseIE):
+ _VALID_URL = _create_valid_url(GlattvisionTVBaseIE._HOST, r'[^/?&#]+', 'channel', 'live')
+ _TYPE = 'live'
+ _TESTS = [{
+ 'url': 'https://iptv.glattvision.ch/channels/german?channel=srf_zwei',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://iptv.glattvision.ch/live/srf1',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if GlattvisionTVIE.suitable(url) else super().suitable(url)
+
+
+class GlattvisionTVRecordingsIE(GlattvisionTVBaseIE):
+ _VALID_URL = _create_valid_url(GlattvisionTVBaseIE._HOST, r'\d+', 'recording')
+ _TYPE = 'record'
+ _TESTS = [{
+ 'url': 'https://iptv.glattvision.ch/recordings?recording=193615508',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://iptv.glattvision.ch/tc/ptc_recordings_all_recordings?recording=193615420',
+ 'only_matching': True,
+ }]
+
+
+class SAKTVBaseIE(ZattooPlatformBaseIE):
+ _NETRC_MACHINE = 'saktv'
+ _HOST = 'saktv.ch'
+ _API_HOST = 'www.%s' % _HOST
+
+
+class SAKTVIE(SAKTVBaseIE):
+ _VALID_URL = _create_valid_url(SAKTVBaseIE._HOST, r'\d+', 'program', '(?:program|watch)/[^/]+')
+ _TYPE = 'video'
+ _TESTS = [{
+ 'url': 'https://saktv.ch/program/daserste/210177916',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://saktv.ch/guide/german?channel=srf1&program=169860555',
+ 'only_matching': True,
+ }]
+
+
+class SAKTVLiveIE(SAKTVBaseIE):
+ _VALID_URL = _create_valid_url(SAKTVBaseIE._HOST, r'[^/?&#]+', 'channel', 'live')
+ _TYPE = 'live'
+ _TESTS = [{
+ 'url': 'https://saktv.ch/channels/german?channel=srf_zwei',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://saktv.ch/live/srf1',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if SAKTVIE.suitable(url) else super().suitable(url)
+
+
+class SAKTVRecordingsIE(SAKTVBaseIE):
+ _VALID_URL = _create_valid_url(SAKTVBaseIE._HOST, r'\d+', 'recording')
+ _TYPE = 'record'
+ _TESTS = [{
+ 'url': 'https://saktv.ch/recordings?recording=193615508',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://saktv.ch/tc/ptc_recordings_all_recordings?recording=193615420',
+ 'only_matching': True,
+ }]
+
+
+class EWETVBaseIE(ZattooPlatformBaseIE):
+ _NETRC_MACHINE = 'ewetv'
+ _HOST = 'tvonline.ewe.de'
+
+
+class EWETVIE(EWETVBaseIE):
+ _VALID_URL = _create_valid_url(EWETVBaseIE._HOST, r'\d+', 'program', '(?:program|watch)/[^/]+')
+ _TYPE = 'video'
+ _TESTS = [{
+ 'url': 'https://tvonline.ewe.de/program/daserste/210177916',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tvonline.ewe.de/guide/german?channel=srf1&program=169860555',
+ 'only_matching': True,
+ }]
+
+
+class EWETVLiveIE(EWETVBaseIE):
+ _VALID_URL = _create_valid_url(EWETVBaseIE._HOST, r'[^/?&#]+', 'channel', 'live')
+ _TYPE = 'live'
+ _TESTS = [{
+ 'url': 'https://tvonline.ewe.de/channels/german?channel=srf_zwei',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tvonline.ewe.de/live/srf1',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if EWETVIE.suitable(url) else super().suitable(url)
+
+
+class EWETVRecordingsIE(EWETVBaseIE):
+ _VALID_URL = _create_valid_url(EWETVBaseIE._HOST, r'\d+', 'recording')
+ _TYPE = 'record'
+ _TESTS = [{
+ 'url': 'https://tvonline.ewe.de/recordings?recording=193615508',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tvonline.ewe.de/tc/ptc_recordings_all_recordings?recording=193615420',
+ 'only_matching': True,
+ }]
+
+
+class QuantumTVBaseIE(ZattooPlatformBaseIE):
+ _NETRC_MACHINE = 'quantumtv'
+ _HOST = 'quantum-tv.com'
+ _API_HOST = 'www.%s' % _HOST
+
+
+class QuantumTVIE(QuantumTVBaseIE):
+ _VALID_URL = _create_valid_url(QuantumTVBaseIE._HOST, r'\d+', 'program', '(?:program|watch)/[^/]+')
+ _TYPE = 'video'
+ _TESTS = [{
+ 'url': 'https://quantum-tv.com/program/daserste/210177916',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://quantum-tv.com/guide/german?channel=srf1&program=169860555',
+ 'only_matching': True,
+ }]
+
+
+class QuantumTVLiveIE(QuantumTVBaseIE):
+ _VALID_URL = _create_valid_url(QuantumTVBaseIE._HOST, r'[^/?&#]+', 'channel', 'live')
+ _TYPE = 'live'
+ _TESTS = [{
+ 'url': 'https://quantum-tv.com/channels/german?channel=srf_zwei',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://quantum-tv.com/live/srf1',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if QuantumTVIE.suitable(url) else super().suitable(url)
+
+
+class QuantumTVRecordingsIE(QuantumTVBaseIE):
+ _VALID_URL = _create_valid_url(QuantumTVBaseIE._HOST, r'\d+', 'recording')
+ _TYPE = 'record'
+ _TESTS = [{
+ 'url': 'https://quantum-tv.com/recordings?recording=193615508',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://quantum-tv.com/tc/ptc_recordings_all_recordings?recording=193615420',
+ 'only_matching': True,
+ }]
+
+
+class OsnatelTVBaseIE(ZattooPlatformBaseIE):
+ _NETRC_MACHINE = 'osnateltv'
+ _HOST = 'tvonline.osnatel.de'
+
+
+class OsnatelTVIE(OsnatelTVBaseIE):
+ _VALID_URL = _create_valid_url(OsnatelTVBaseIE._HOST, r'\d+', 'program', '(?:program|watch)/[^/]+')
+ _TYPE = 'video'
+ _TESTS = [{
+ 'url': 'https://tvonline.osnatel.de/program/daserste/210177916',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tvonline.osnatel.de/guide/german?channel=srf1&program=169860555',
+ 'only_matching': True,
+ }]
+
+
+class OsnatelTVLiveIE(OsnatelTVBaseIE):
+ _VALID_URL = _create_valid_url(OsnatelTVBaseIE._HOST, r'[^/?&#]+', 'channel', 'live')
+ _TYPE = 'live'
+ _TESTS = [{
+ 'url': 'https://tvonline.osnatel.de/channels/german?channel=srf_zwei',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tvonline.osnatel.de/live/srf1',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if OsnatelTVIE.suitable(url) else super().suitable(url)
+
+
+class OsnatelTVRecordingsIE(OsnatelTVBaseIE):
+ _VALID_URL = _create_valid_url(OsnatelTVBaseIE._HOST, r'\d+', 'recording')
+ _TYPE = 'record'
+ _TESTS = [{
+ 'url': 'https://tvonline.osnatel.de/recordings?recording=193615508',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tvonline.osnatel.de/tc/ptc_recordings_all_recordings?recording=193615420',
+ 'only_matching': True,
+ }]
+
+
+class EinsUndEinsTVBaseIE(ZattooPlatformBaseIE):
+ _NETRC_MACHINE = '1und1tv'
+ _HOST = '1und1.tv'
+ _API_HOST = 'www.%s' % _HOST
+
+
+class EinsUndEinsTVIE(EinsUndEinsTVBaseIE):
+ _VALID_URL = _create_valid_url(EinsUndEinsTVBaseIE._HOST, r'\d+', 'program', '(?:program|watch)/[^/]+')
+ _TYPE = 'video'
+ _TESTS = [{
+ 'url': 'https://1und1.tv/program/daserste/210177916',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://1und1.tv/guide/german?channel=srf1&program=169860555',
+ 'only_matching': True,
+ }]
+
+
+class EinsUndEinsTVLiveIE(EinsUndEinsTVBaseIE):
+ _VALID_URL = _create_valid_url(EinsUndEinsTVBaseIE._HOST, r'[^/?&#]+', 'channel', 'live')
+ _TYPE = 'live'
+ _TESTS = [{
+ 'url': 'https://1und1.tv/channels/german?channel=srf_zwei',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://1und1.tv/live/srf1',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if EinsUndEinsTVIE.suitable(url) else super().suitable(url)
+
+
+class EinsUndEinsTVRecordingsIE(EinsUndEinsTVBaseIE):
+ _VALID_URL = _create_valid_url(EinsUndEinsTVBaseIE._HOST, r'\d+', 'recording')
+ _TYPE = 'record'
+ _TESTS = [{
+ 'url': 'https://1und1.tv/recordings?recording=193615508',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://1und1.tv/tc/ptc_recordings_all_recordings?recording=193615420',
+ 'only_matching': True,
+ }]
+
+
+class SaltTVBaseIE(ZattooPlatformBaseIE):
+ _NETRC_MACHINE = 'salttv'
+ _HOST = 'tv.salt.ch'
+
+
+class SaltTVIE(SaltTVBaseIE):
+ _VALID_URL = _create_valid_url(SaltTVBaseIE._HOST, r'\d+', 'program', '(?:program|watch)/[^/]+')
+ _TYPE = 'video'
+ _TESTS = [{
+ 'url': 'https://tv.salt.ch/program/daserste/210177916',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tv.salt.ch/guide/german?channel=srf1&program=169860555',
+ 'only_matching': True,
+ }]
+
+
+class SaltTVLiveIE(SaltTVBaseIE):
+ _VALID_URL = _create_valid_url(SaltTVBaseIE._HOST, r'[^/?&#]+', 'channel', 'live')
+ _TYPE = 'live'
+ _TESTS = [{
+ 'url': 'https://tv.salt.ch/channels/german?channel=srf_zwei',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tv.salt.ch/live/srf1',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if SaltTVIE.suitable(url) else super().suitable(url)
+
+
+class SaltTVRecordingsIE(SaltTVBaseIE):
+ _VALID_URL = _create_valid_url(SaltTVBaseIE._HOST, r'\d+', 'recording')
+ _TYPE = 'record'
+ _TESTS = [{
+ 'url': 'https://tv.salt.ch/recordings?recording=193615508',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tv.salt.ch/tc/ptc_recordings_all_recordings?recording=193615420',
+ 'only_matching': True,
+ }]
diff --git a/yt_dlp/extractor/zdf.py b/yt_dlp/extractor/zdf.py
new file mode 100644
index 0000000..c04d51b
--- /dev/null
+++ b/yt_dlp/extractor/zdf.py
@@ -0,0 +1,442 @@
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ NO_DEFAULT,
+ ExtractorError,
+ determine_ext,
+ extract_attributes,
+ float_or_none,
+ int_or_none,
+ join_nonempty,
+ merge_dicts,
+ parse_codecs,
+ qualities,
+ traverse_obj,
+ try_get,
+ unified_timestamp,
+ update_url_query,
+ url_or_none,
+ urljoin,
+)
+
+
+class ZDFBaseIE(InfoExtractor):
+ _GEO_COUNTRIES = ['DE']
+ _QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh', 'hd', 'fhd', 'uhd')
+
+ def _call_api(self, url, video_id, item, api_token=None, referrer=None):
+ headers = {}
+ if api_token:
+ headers['Api-Auth'] = 'Bearer %s' % api_token
+ if referrer:
+ headers['Referer'] = referrer
+ return self._download_json(
+ url, video_id, 'Downloading JSON %s' % item, headers=headers)
+
+ @staticmethod
+ def _extract_subtitles(src):
+ subtitles = {}
+ for caption in try_get(src, lambda x: x['captions'], list) or []:
+ subtitle_url = url_or_none(caption.get('uri'))
+ if subtitle_url:
+ lang = caption.get('language', 'deu')
+ subtitles.setdefault(lang, []).append({
+ 'url': subtitle_url,
+ })
+ return subtitles
+
+ def _extract_format(self, video_id, formats, format_urls, meta):
+ format_url = url_or_none(meta.get('url'))
+ if not format_url or format_url in format_urls:
+ return
+ format_urls.add(format_url)
+
+ mime_type, ext = meta.get('mimeType'), determine_ext(format_url)
+ if mime_type == 'application/x-mpegURL' or ext == 'm3u8':
+ new_formats = self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', m3u8_id='hls',
+ entry_protocol='m3u8_native', fatal=False)
+ elif mime_type == 'application/f4m+xml' or ext == 'f4m':
+ new_formats = self._extract_f4m_formats(
+ update_url_query(format_url, {'hdcore': '3.7.0'}), video_id, f4m_id='hds', fatal=False)
+ elif ext == 'mpd':
+ new_formats = self._extract_mpd_formats(
+ format_url, video_id, mpd_id='dash', fatal=False)
+ else:
+ f = parse_codecs(meta.get('mimeCodec'))
+ if not f and meta.get('type'):
+ data = meta['type'].split('_')
+ if try_get(data, lambda x: x[2]) == ext:
+ f = {'vcodec': data[0], 'acodec': data[1]}
+ f.update({
+ 'url': format_url,
+ 'format_id': join_nonempty('http', meta.get('type'), meta.get('quality')),
+ 'tbr': int_or_none(self._search_regex(r'_(\d+)k_', format_url, 'tbr', default=None))
+ })
+ new_formats = [f]
+ formats.extend(merge_dicts(f, {
+ 'format_note': join_nonempty('quality', 'class', from_dict=meta, delim=', '),
+ 'language': meta.get('language'),
+ 'language_preference': 10 if meta.get('class') == 'main' else -10 if meta.get('class') == 'ad' else -1,
+ 'quality': qualities(self._QUALITIES)(meta.get('quality')),
+ }) for f in new_formats)
+
+ def _extract_ptmd(self, ptmd_url, video_id, api_token, referrer):
+ ptmd = self._call_api(
+ ptmd_url, video_id, 'metadata', api_token, referrer)
+
+ content_id = ptmd.get('basename') or ptmd_url.split('/')[-1]
+
+ formats = []
+ track_uris = set()
+ for p in ptmd['priorityList']:
+ formitaeten = p.get('formitaeten')
+ if not isinstance(formitaeten, list):
+ continue
+ for f in formitaeten:
+ f_qualities = f.get('qualities')
+ if not isinstance(f_qualities, list):
+ continue
+ for quality in f_qualities:
+ tracks = try_get(quality, lambda x: x['audio']['tracks'], list)
+ if not tracks:
+ continue
+ for track in tracks:
+ self._extract_format(
+ content_id, formats, track_uris, {
+ 'url': track.get('uri'),
+ 'type': f.get('type'),
+ 'mimeType': f.get('mimeType'),
+ 'quality': quality.get('quality'),
+ 'class': track.get('class'),
+ 'language': track.get('language'),
+ })
+
+ duration = float_or_none(try_get(
+ ptmd, lambda x: x['attributes']['duration']['value']), scale=1000)
+
+ return {
+ 'extractor_key': ZDFIE.ie_key(),
+ 'id': content_id,
+ 'duration': duration,
+ 'formats': formats,
+ 'subtitles': self._extract_subtitles(ptmd),
+ '_format_sort_fields': ('tbr', 'res', 'quality', 'language_preference'),
+ }
+
+ def _extract_player(self, webpage, video_id, fatal=True):
+ return self._parse_json(
+ self._search_regex(
+ r'(?s)data-zdfplayer-jsb=(["\'])(?P<json>{.+?})\1', webpage,
+ 'player JSON', default='{}' if not fatal else NO_DEFAULT,
+ group='json'),
+ video_id)
+
+
+class ZDFIE(ZDFBaseIE):
+ _VALID_URL = r'https?://www\.zdf\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)\.html'
+ _TESTS = [{
+ # Same as https://www.phoenix.de/sendungen/ereignisse/corona-nachgehakt/wohin-fuehrt-der-protest-in-der-pandemie-a-2050630.html
+ 'url': 'https://www.zdf.de/politik/phoenix-sendungen/wohin-fuehrt-der-protest-in-der-pandemie-100.html',
+ 'md5': '34ec321e7eb34231fd88616c65c92db0',
+ 'info_dict': {
+ 'id': '210222_phx_nachgehakt_corona_protest',
+ 'ext': 'mp4',
+ 'title': 'Wohin führt der Protest in der Pandemie?',
+ 'description': 'md5:7d643fe7f565e53a24aac036b2122fbd',
+ 'duration': 1691,
+ 'timestamp': 1613948400,
+ 'upload_date': '20210221',
+ },
+ 'skip': 'No longer available: "Diese Seite wurde leider nicht gefunden"',
+ }, {
+ # Same as https://www.3sat.de/film/ab-18/10-wochen-sommer-108.html
+ 'url': 'https://www.zdf.de/dokumentation/ab-18/10-wochen-sommer-102.html',
+ 'md5': '0aff3e7bc72c8813f5e0fae333316a1d',
+ 'info_dict': {
+ 'id': '141007_ab18_10wochensommer_film',
+ 'ext': 'mp4',
+ 'title': 'Ab 18! - 10 Wochen Sommer',
+ 'description': 'md5:8253f41dc99ce2c3ff892dac2d65fe26',
+ 'duration': 2660,
+ 'timestamp': 1608604200,
+ 'upload_date': '20201222',
+ },
+ 'skip': 'No longer available: "Diese Seite wurde leider nicht gefunden"',
+ }, {
+ 'url': 'https://www.zdf.de/nachrichten/heute-journal/heute-journal-vom-30-12-2021-100.html',
+ 'info_dict': {
+ 'id': '211230_sendung_hjo',
+ 'ext': 'mp4',
+ 'description': 'md5:47dff85977bde9fb8cba9e9c9b929839',
+ 'duration': 1890.0,
+ 'upload_date': '20211230',
+ 'chapters': list,
+ 'thumbnail': 'md5:e65f459f741be5455c952cd820eb188e',
+ 'title': 'heute journal vom 30.12.2021',
+ 'timestamp': 1640897100,
+ },
+ 'skip': 'No longer available: "Diese Seite wurde leider nicht gefunden"',
+ }, {
+ 'url': 'https://www.zdf.de/dokumentation/terra-x/die-magie-der-farben-von-koenigspurpur-und-jeansblau-100.html',
+ 'info_dict': {
+ 'id': '151025_magie_farben2_tex',
+ 'ext': 'mp4',
+ 'title': 'Die Magie der Farben (2/2)',
+ 'description': 'md5:a89da10c928c6235401066b60a6d5c1a',
+ 'duration': 2615,
+ 'timestamp': 1465021200,
+ 'upload_date': '20160604',
+ 'thumbnail': 'https://www.zdf.de/assets/mauve-im-labor-100~768x432?cb=1464909117806',
+ },
+ }, {
+ 'url': 'https://www.zdf.de/funk/druck-11790/funk-alles-ist-verzaubert-102.html',
+ 'md5': '57af4423db0455a3975d2dc4578536bc',
+ 'info_dict': {
+ 'ext': 'mp4',
+ 'id': 'video_funk_1770473',
+ 'duration': 1278,
+ 'description': 'Die Neue an der Schule verdreht Ismail den Kopf.',
+ 'title': 'Alles ist verzaubert',
+ 'timestamp': 1635520560,
+ 'upload_date': '20211029',
+ 'thumbnail': 'https://www.zdf.de/assets/teaser-funk-alles-ist-verzaubert-102~1920x1080?cb=1663848412907',
+ },
+ }, {
+ # Same as https://www.phoenix.de/sendungen/dokumentationen/gesten-der-maechtigen-i-a-89468.html?ref=suche
+ 'url': 'https://www.zdf.de/politik/phoenix-sendungen/die-gesten-der-maechtigen-100.html',
+ 'only_matching': True,
+ }, {
+ # Same as https://www.3sat.de/film/spielfilm/der-hauptmann-100.html
+ 'url': 'https://www.zdf.de/filme/filme-sonstige/der-hauptmann-112.html',
+ 'only_matching': True,
+ }, {
+ # Same as https://www.3sat.de/wissen/nano/nano-21-mai-2019-102.html, equal media ids
+ 'url': 'https://www.zdf.de/wissen/nano/nano-21-mai-2019-102.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.zdf.de/service-und-hilfe/die-neue-zdf-mediathek/zdfmediathek-trailer-100.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.zdf.de/filme/taunuskrimi/die-lebenden-und-die-toten-1---ein-taunuskrimi-100.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.zdf.de/dokumentation/planet-e/planet-e-uebersichtsseite-weitere-dokumentationen-von-planet-e-100.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.zdf.de/arte/todliche-flucht/page-video-artede-toedliche-flucht-16-100.html',
+ 'info_dict': {
+ 'id': 'video_artede_083871-001-A',
+ 'ext': 'mp4',
+ 'title': 'Tödliche Flucht (1/6)',
+ 'description': 'md5:e34f96a9a5f8abd839ccfcebad3d5315',
+ 'duration': 3193.0,
+ 'timestamp': 1641355200,
+ 'upload_date': '20220105',
+ },
+ 'skip': 'No longer available "Diese Seite wurde leider nicht gefunden"'
+ }, {
+ 'url': 'https://www.zdf.de/serien/soko-stuttgart/das-geld-anderer-leute-100.html',
+ 'info_dict': {
+ 'id': '191205_1800_sendung_sok8',
+ 'ext': 'mp4',
+ 'title': 'Das Geld anderer Leute',
+ 'description': 'md5:cb6f660850dc5eb7d1ab776ea094959d',
+ 'duration': 2581.0,
+ 'timestamp': 1675160100,
+ 'upload_date': '20230131',
+ 'thumbnail': 'https://epg-image.zdf.de/fotobase-webdelivery/images/e2d7e55a-09f0-424e-ac73-6cac4dd65f35?layout=2400x1350',
+ },
+ }, {
+ 'url': 'https://www.zdf.de/dokumentation/terra-x/unser-gruener-planet-wuesten-doku-100.html',
+ 'info_dict': {
+ 'id': '220605_dk_gruener_planet_wuesten_tex',
+ 'ext': 'mp4',
+ 'title': 'Unser grüner Planet - Wüsten',
+ 'description': 'md5:4fc647b6f9c3796eea66f4a0baea2862',
+ 'duration': 2613.0,
+ 'timestamp': 1654450200,
+ 'upload_date': '20220605',
+ 'format_note': 'uhd, main',
+ 'thumbnail': 'https://www.zdf.de/assets/saguaro-kakteen-102~3840x2160?cb=1655910690796',
+ },
+ }]
+
+ def _extract_entry(self, url, player, content, video_id):
+ title = content.get('title') or content['teaserHeadline']
+
+ t = content['mainVideoContent']['http://zdf.de/rels/target']
+ ptmd_path = traverse_obj(t, (
+ (('streams', 'default'), None),
+ ('http://zdf.de/rels/streams/ptmd', 'http://zdf.de/rels/streams/ptmd-template')
+ ), get_all=False)
+ if not ptmd_path:
+ raise ExtractorError('Could not extract ptmd_path')
+
+ info = self._extract_ptmd(
+ urljoin(url, ptmd_path.replace('{playerId}', 'android_native_5')), video_id, player['apiToken'], url)
+
+ thumbnails = []
+ layouts = try_get(
+ content, lambda x: x['teaserImageRef']['layouts'], dict)
+ if layouts:
+ for layout_key, layout_url in layouts.items():
+ layout_url = url_or_none(layout_url)
+ if not layout_url:
+ continue
+ thumbnail = {
+ 'url': layout_url,
+ 'format_id': layout_key,
+ }
+ mobj = re.search(r'(?P<width>\d+)x(?P<height>\d+)', layout_key)
+ if mobj:
+ thumbnail.update({
+ 'width': int(mobj.group('width')),
+ 'height': int(mobj.group('height')),
+ })
+ thumbnails.append(thumbnail)
+
+ chapter_marks = t.get('streamAnchorTag') or []
+ chapter_marks.append({'anchorOffset': int_or_none(t.get('duration'))})
+ chapters = [{
+ 'start_time': chap.get('anchorOffset'),
+ 'end_time': next_chap.get('anchorOffset'),
+ 'title': chap.get('anchorLabel')
+ } for chap, next_chap in zip(chapter_marks, chapter_marks[1:])]
+
+ return merge_dicts(info, {
+ 'title': title,
+ 'description': content.get('leadParagraph') or content.get('teasertext'),
+ 'duration': int_or_none(t.get('duration')),
+ 'timestamp': unified_timestamp(content.get('editorialDate')),
+ 'thumbnails': thumbnails,
+ 'chapters': chapters or None
+ })
+
+ def _extract_regular(self, url, player, video_id):
+ content = self._call_api(
+ player['content'], video_id, 'content', player['apiToken'], url)
+ return self._extract_entry(player['content'], player, content, video_id)
+
+ def _extract_mobile(self, video_id):
+ video = self._download_json(
+ 'https://zdf-cdn.live.cellular.de/mediathekV2/document/%s' % video_id,
+ video_id)
+
+ formats = []
+ formitaeten = try_get(video, lambda x: x['document']['formitaeten'], list)
+ document = formitaeten and video['document']
+ if formitaeten:
+ title = document['titel']
+ content_id = document['basename']
+
+ format_urls = set()
+ for f in formitaeten or []:
+ self._extract_format(content_id, formats, format_urls, f)
+
+ thumbnails = []
+ teaser_bild = document.get('teaserBild')
+ if isinstance(teaser_bild, dict):
+ for thumbnail_key, thumbnail in teaser_bild.items():
+ thumbnail_url = try_get(
+ thumbnail, lambda x: x['url'], compat_str)
+ if thumbnail_url:
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'id': thumbnail_key,
+ 'width': int_or_none(thumbnail.get('width')),
+ 'height': int_or_none(thumbnail.get('height')),
+ })
+
+ return {
+ 'id': content_id,
+ 'title': title,
+ 'description': document.get('beschreibung'),
+ 'duration': int_or_none(document.get('length')),
+ 'timestamp': unified_timestamp(document.get('date')) or unified_timestamp(
+ try_get(video, lambda x: x['meta']['editorialDate'], compat_str)),
+ 'thumbnails': thumbnails,
+ 'subtitles': self._extract_subtitles(document),
+ 'formats': formats,
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id, fatal=False)
+ if webpage:
+ player = self._extract_player(webpage, url, fatal=False)
+ if player:
+ return self._extract_regular(url, player, video_id)
+
+ return self._extract_mobile(video_id)
+
+
+class ZDFChannelIE(ZDFBaseIE):
+ _VALID_URL = r'https?://www\.zdf\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.zdf.de/sport/das-aktuelle-sportstudio',
+ 'info_dict': {
+ 'id': 'das-aktuelle-sportstudio',
+ 'title': 'das aktuelle sportstudio',
+ },
+ 'playlist_mincount': 18,
+ }, {
+ 'url': 'https://www.zdf.de/dokumentation/planet-e',
+ 'info_dict': {
+ 'id': 'planet-e',
+ 'title': 'planet e.',
+ },
+ 'playlist_mincount': 50,
+ }, {
+ 'url': 'https://www.zdf.de/gesellschaft/aktenzeichen-xy-ungeloest',
+ 'info_dict': {
+ 'id': 'aktenzeichen-xy-ungeloest',
+ 'title': 'Aktenzeichen XY... ungelöst',
+ 'entries': "lambda x: not any('xy580-fall1-kindermoerder-gesucht-100' in e['url'] for e in x)",
+ },
+ 'playlist_mincount': 2,
+ }, {
+ 'url': 'https://www.zdf.de/filme/taunuskrimi/',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if ZDFIE.suitable(url) else super(ZDFChannelIE, cls).suitable(url)
+
+ def _og_search_title(self, webpage, fatal=False):
+ title = super(ZDFChannelIE, self)._og_search_title(webpage, fatal=fatal)
+ return re.split(r'\s+[-|]\s+ZDF(?:mediathek)?$', title or '')[0] or None
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, channel_id)
+
+ matches = re.finditer(
+ r'''<div\b[^>]*?\sdata-plusbar-id\s*=\s*(["'])(?P<p_id>[\w-]+)\1[^>]*?\sdata-plusbar-url=\1(?P<url>%s)\1''' % ZDFIE._VALID_URL,
+ webpage)
+
+ if self._downloader.params.get('noplaylist', False):
+ entry = next(
+ (self.url_result(m.group('url'), ie=ZDFIE.ie_key()) for m in matches),
+ None)
+ self.to_screen('Downloading just the main video because of --no-playlist')
+ if entry:
+ return entry
+ else:
+ self.to_screen('Downloading playlist %s - add --no-playlist to download just the main video' % (channel_id, ))
+
+ def check_video(m):
+ v_ref = self._search_regex(
+ r'''(<a\b[^>]*?\shref\s*=[^>]+?\sdata-target-id\s*=\s*(["'])%s\2[^>]*>)''' % (m.group('p_id'), ),
+ webpage, 'check id', default='')
+ v_ref = extract_attributes(v_ref)
+ return v_ref.get('data-target-video-type') != 'novideo'
+
+ return self.playlist_from_matches(
+ (m.group('url') for m in matches if check_video(m)),
+ channel_id, self._og_search_title(webpage, fatal=False))
diff --git a/yt_dlp/extractor/zee5.py b/yt_dlp/extractor/zee5.py
new file mode 100644
index 0000000..ca79cf0
--- /dev/null
+++ b/yt_dlp/extractor/zee5.py
@@ -0,0 +1,270 @@
+import json
+import time
+import uuid
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ jwt_decode_hs256,
+ parse_age_limit,
+ str_or_none,
+ try_call,
+ try_get,
+ unified_strdate,
+ unified_timestamp,
+ url_or_none,
+)
+
+
+class Zee5IE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ (?:
+ zee5:|
+ https?://(?:www\.)?zee5\.com/(?:[^#?]+/)?
+ (?:
+ (?:tv-shows|kids|web-series|zee5originals)(?:/[^#/?]+){3}
+ |(?:movies|kids|videos|news|music-videos)/(?!kids-shows)[^#/?]+
+ )/(?P<display_id>[^#/?]+)/
+ )
+ (?P<id>[^#/?]+)/?(?:$|[?#])
+ '''
+ _TESTS = [{
+ 'url': 'https://www.zee5.com/movies/details/adavari-matalaku-ardhale-verule/0-0-movie_1143162669',
+ 'info_dict': {
+ 'id': '0-0-movie_1143162669',
+ 'ext': 'mp4',
+ 'display_id': 'adavari-matalaku-ardhale-verule',
+ 'title': 'Adavari Matalaku Ardhale Verule',
+ 'duration': 9360,
+ 'description': compat_str,
+ 'alt_title': 'Adavari Matalaku Ardhale Verule',
+ 'uploader': 'Zee Entertainment Enterprises Ltd',
+ 'release_date': '20070427',
+ 'upload_date': '20070427',
+ 'timestamp': 1177632000,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'episode_number': 0,
+ 'episode': 'Episode 0',
+ 'tags': list
+ },
+ 'params': {
+ 'format': 'bv',
+ },
+ }, {
+ 'url': 'https://www.zee5.com/kids/kids-shows/bandbudh-aur-budbak/0-6-1899/yoga-se-hoga-bandbudh-aur-budbak/0-1-239839',
+ 'info_dict': {
+ 'id': '0-1-239839',
+ 'ext': 'mp4',
+ 'display_id': 'yoga-se-hoga-bandbudh-aur-budbak',
+ 'title': 'Yoga Se Hoga-Bandbudh aur Budbak',
+ 'duration': 659,
+ 'description': compat_str,
+ 'alt_title': 'Yoga Se Hoga-Bandbudh aur Budbak',
+ 'uploader': 'Zee Entertainment Enterprises Ltd',
+ 'release_date': '20150101',
+ 'upload_date': '20150101',
+ 'timestamp': 1420070400,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'series': 'Bandbudh Aur Budbak',
+ 'season_number': 1,
+ 'episode_number': 1,
+ 'episode': 'Episode 1',
+ 'season': 'Season 1',
+ 'tags': list,
+ },
+ 'params': {
+ 'format': 'bv',
+ },
+ }, {
+ 'url': 'https://www.zee5.com/hi/tv-shows/details/kundali-bhagya/0-6-366/kundali-bhagya-march-08-2021/0-1-manual_7g9jv1os7730?country=IN',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.zee5.com/global/hi/tv-shows/details/kundali-bhagya/0-6-366/kundali-bhagya-march-08-2021/0-1-manual_7g9jv1os7730',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.zee5.com/web-series/details/mithya/0-6-4z587408/maine-dekhi-hai-uski-mrityu/0-1-6z587412',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.zee5.com/kids/kids-movies/maya-bommalu/0-0-movie_1040370005',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.zee5.com/news/details/jana-sena-chief-pawan-kalyan-shows-slippers-to-ysrcp-leaders/0-0-newsauto_6ettj4242oo0',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.zee5.com/music-videos/details/adhento-gaani-vunnapaatuga-jersey-nani-shraddha-srinath/0-0-56973',
+ 'only_matching': True
+ }]
+ _DEVICE_ID = str(uuid.uuid4())
+ _USER_TOKEN = None
+ _LOGIN_HINT = 'Use "--username <mobile_number>" to login using otp or "--username token" and "--password <user_token>" to login using user token.'
+ _NETRC_MACHINE = 'zee5'
+ _GEO_COUNTRIES = ['IN']
+ _USER_COUNTRY = None
+
+ def _perform_login(self, username, password):
+ if len(username) == 10 and username.isdigit() and self._USER_TOKEN is None:
+ self.report_login()
+ otp_request_json = self._download_json(f'https://b2bapi.zee5.com/device/sendotp_v1.php?phoneno=91{username}',
+ None, note='Sending OTP')
+ if otp_request_json['code'] == 0:
+ self.to_screen(otp_request_json['message'])
+ else:
+ raise ExtractorError(otp_request_json['message'], expected=True)
+ otp_code = self._get_tfa_info('OTP')
+ otp_verify_json = self._download_json(f'https://b2bapi.zee5.com/device/verifyotp_v1.php?phoneno=91{username}&otp={otp_code}&guest_token={self._DEVICE_ID}&platform=web',
+ None, note='Verifying OTP', fatal=False)
+ if not otp_verify_json:
+ raise ExtractorError('Unable to verify OTP.', expected=True)
+ self._USER_TOKEN = otp_verify_json.get('token')
+ if not self._USER_TOKEN:
+ raise ExtractorError(otp_request_json['message'], expected=True)
+ elif username.lower() == 'token' and try_call(lambda: jwt_decode_hs256(password)):
+ self._USER_TOKEN = password
+ else:
+ raise ExtractorError(self._LOGIN_HINT, expected=True)
+
+ token = jwt_decode_hs256(self._USER_TOKEN)
+ if token.get('exp', 0) <= int(time.time()):
+ raise ExtractorError('User token has expired', expected=True)
+ self._USER_COUNTRY = token.get('current_country')
+
+ def _real_extract(self, url):
+ video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
+ access_token_request = self._download_json(
+ 'https://launchapi.zee5.com/launch?platform_name=web_app',
+ video_id, note='Downloading access token')['platform_token']
+ data = {
+ 'x-access-token': access_token_request['token']
+ }
+ if self._USER_TOKEN:
+ data['Authorization'] = 'bearer %s' % self._USER_TOKEN
+ else:
+ data['X-Z5-Guest-Token'] = self._DEVICE_ID
+
+ json_data = self._download_json(
+ 'https://spapi.zee5.com/singlePlayback/getDetails/secure', video_id, query={
+ 'content_id': video_id,
+ 'device_id': self._DEVICE_ID,
+ 'platform_name': 'desktop_web',
+ 'country': self._USER_COUNTRY or self.get_param('geo_bypass_country') or 'IN',
+ 'check_parental_control': False,
+ }, headers={'content-type': 'application/json'}, data=json.dumps(data).encode('utf-8'))
+ asset_data = json_data['assetDetails']
+ show_data = json_data.get('showDetails', {})
+ if 'premium' in asset_data['business_type']:
+ raise ExtractorError('Premium content is DRM protected.', expected=True)
+ if not asset_data.get('hls_url'):
+ self.raise_login_required(self._LOGIN_HINT, metadata_available=True, method=None)
+ formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(asset_data['hls_url'], video_id, 'mp4', fatal=False)
+
+ subtitles = {}
+ for sub in asset_data.get('subtitle_url', []):
+ sub_url = sub.get('url')
+ if not sub_url:
+ continue
+ subtitles.setdefault(sub.get('language', 'en'), []).append({
+ 'url': self._proto_relative_url(sub_url),
+ })
+ subtitles = self._merge_subtitles(subtitles, m3u8_subs)
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': asset_data['title'],
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'duration': int_or_none(asset_data.get('duration')),
+ 'description': str_or_none(asset_data.get('description')),
+ 'alt_title': str_or_none(asset_data.get('original_title')),
+ 'uploader': str_or_none(asset_data.get('content_owner')),
+ 'age_limit': parse_age_limit(asset_data.get('age_rating')),
+ 'release_date': unified_strdate(asset_data.get('release_date')),
+ 'timestamp': unified_timestamp(asset_data.get('release_date')),
+ 'thumbnail': url_or_none(asset_data.get('image_url')),
+ 'series': str_or_none(asset_data.get('tvshow_name')),
+ 'season': try_get(show_data, lambda x: x['seasons']['title'], str),
+ 'season_number': int_or_none(try_get(show_data, lambda x: x['seasons'][0]['orderid'])),
+ 'episode_number': int_or_none(try_get(asset_data, lambda x: x['orderid'])),
+ 'tags': try_get(asset_data, lambda x: x['tags'], list)
+ }
+
+
+class Zee5SeriesIE(InfoExtractor):
+ IE_NAME = 'zee5:series'
+ _VALID_URL = r'''(?x)
+ (?:
+ zee5:series:|
+ https?://(?:www\.)?zee5\.com/(?:[^#?]+/)?
+ (?:tv-shows|web-series|kids|zee5originals)/(?!kids-movies)(?:[^#/?]+/){2}
+ )
+ (?P<id>[^#/?]+)(?:/episodes)?/?(?:$|[?#])
+ '''
+ _TESTS = [{
+ 'url': 'https://www.zee5.com/kids/kids-shows/bandbudh-aur-budbak/0-6-1899',
+ 'playlist_mincount': 156,
+ 'info_dict': {
+ 'id': '0-6-1899',
+ },
+ }, {
+ 'url': 'https://www.zee5.com/tv-shows/details/bhabi-ji-ghar-par-hai/0-6-199',
+ 'playlist_mincount': 1500,
+ 'info_dict': {
+ 'id': '0-6-199',
+ },
+ }, {
+ 'url': 'https://www.zee5.com/tv-shows/details/agent-raghav-crime-branch/0-6-965',
+ 'playlist_mincount': 24,
+ 'info_dict': {
+ 'id': '0-6-965',
+ },
+ }, {
+ 'url': 'https://www.zee5.com/ta/tv-shows/details/nagabhairavi/0-6-3201',
+ 'playlist_mincount': 3,
+ 'info_dict': {
+ 'id': '0-6-3201',
+ },
+ }, {
+ 'url': 'https://www.zee5.com/global/hi/tv-shows/details/khwaabon-ki-zamin-par/0-6-270',
+ 'playlist_mincount': 150,
+ 'info_dict': {
+ 'id': '0-6-270',
+ },
+ }, {
+ 'url': 'https://www.zee5.com/tv-shows/details/chala-hawa-yeu-dya-ladies-zindabaad/0-6-2943/episodes',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.zee5.com/web-series/details/mithya/0-6-4z587408',
+ 'only_matching': True,
+ }]
+
+ def _entries(self, show_id):
+ access_token_request = self._download_json(
+ 'https://launchapi.zee5.com/launch?platform_name=web_app',
+ show_id, note='Downloading access token')['platform_token']
+ headers = {
+ 'X-Access-Token': access_token_request['token'],
+ 'Referer': 'https://www.zee5.com/',
+ }
+ show_url = f'https://gwapi.zee5.com/content/tvshow/{show_id}?translation=en&country=IN'
+
+ page_num = 0
+ show_json = self._download_json(show_url, video_id=show_id, headers=headers)
+ for season in show_json.get('seasons') or []:
+ season_id = try_get(season, lambda x: x['id'], compat_str)
+ next_url = f'https://gwapi.zee5.com/content/tvshow/?season_id={season_id}&type=episode&translation=en&country=IN&on_air=false&asset_subtype=tvshow&page=1&limit=100'
+ while next_url:
+ page_num += 1
+ episodes_json = self._download_json(
+ next_url, video_id=show_id, headers=headers,
+ note='Downloading JSON metadata page %d' % page_num)
+ for episode in try_get(episodes_json, lambda x: x['episode'], list) or []:
+ video_id = episode.get('id')
+ yield self.url_result(
+ 'zee5:%s' % video_id,
+ ie=Zee5IE.ie_key(), video_id=video_id)
+ next_url = url_or_none(episodes_json.get('next_episode_api'))
+
+ def _real_extract(self, url):
+ show_id = self._match_id(url)
+ return self.playlist_result(self._entries(show_id), playlist_id=show_id)
diff --git a/yt_dlp/extractor/zeenews.py b/yt_dlp/extractor/zeenews.py
new file mode 100644
index 0000000..e2cb1e7
--- /dev/null
+++ b/yt_dlp/extractor/zeenews.py
@@ -0,0 +1,59 @@
+from .common import InfoExtractor
+from ..utils import ExtractorError, traverse_obj
+
+
+class ZeeNewsIE(InfoExtractor):
+ _WORKING = False
+ _ENABLED = None # XXX: pass through to GenericIE
+ _VALID_URL = r'https?://zeenews\.india\.com/[^#?]+/video/(?P<display_id>[^#/?]+)/(?P<id>\d+)'
+ _TESTS = [
+ {
+ 'url': 'https://zeenews.india.com/hindi/india/delhi-ncr-haryana/delhi-ncr/video/greater-noida-video-viral-on-social-media-attackers-beat-businessman-and-his-son-oppose-market-closed-atdnh/1402138',
+ 'info_dict': {
+ 'id': '1402138',
+ 'ext': 'mp4',
+ 'title': 'Greater Noida Video: हमलावरों ने दिनदहाड़े दुकान में घुसकर की मारपीट, देखें वीडियो',
+ 'display_id': 'greater-noida-video-viral-on-social-media-attackers-beat-businessman-and-his-son-oppose-market-closed-atdnh',
+ 'upload_date': '20221019',
+ 'thumbnail': r're:^https?://.*\.jpg*',
+ 'timestamp': 1666174501,
+ 'view_count': int,
+ 'duration': 97,
+ 'description': 'ग्रेटर नोएडा जारचा थाना क्षेत्र के प्याबली में दिनदहाड़े दुकान में घुसकर अज्ञात हमलावरों ने हमला कर',
+ }
+ },
+ {
+ 'url': 'https://zeenews.india.com/hindi/india/video/videsh-superfast-queen-elizabeth-iis-funeral-today/1357710',
+ 'info_dict': {
+ 'id': '1357710',
+ 'ext': 'mp4',
+ 'title': 'Videsh Superfast: महारानी के अंतिम संस्कार की तैयारी शुरू',
+ 'display_id': 'videsh-superfast-queen-elizabeth-iis-funeral-today',
+ 'upload_date': '20220919',
+ 'thumbnail': r're:^https?://.*\.jpg*',
+ 'timestamp': 1663556881,
+ 'view_count': int,
+ 'duration': 133,
+ 'description': 'सेगमेंट विदेश सुपराफास्ट में देखिए देश और दुनिया की सभी बड़ी खबरें, वो भी हर खबर फटाफट अंदाज में.',
+ }
+ }
+ ]
+
+ def _real_extract(self, url):
+ content_id, display_id = self._match_valid_url(url).group('id', 'display_id')
+ webpage = self._download_webpage(url, content_id)
+ json_ld_list = list(self._yield_json_ld(webpage, display_id))
+
+ embed_url = traverse_obj(
+ json_ld_list, (lambda _, v: v['@type'] == 'VideoObject', 'embedUrl'), get_all=False)
+ if not embed_url:
+ raise ExtractorError('No video found', expected=True)
+
+ formats = self._extract_m3u8_formats(embed_url, content_id, 'mp4')
+
+ return {
+ **self._json_ld(json_ld_list, display_id),
+ 'id': content_id,
+ 'display_id': display_id,
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/zenporn.py b/yt_dlp/extractor/zenporn.py
new file mode 100644
index 0000000..8faa0e3
--- /dev/null
+++ b/yt_dlp/extractor/zenporn.py
@@ -0,0 +1,118 @@
+import base64
+import binascii
+
+from .common import InfoExtractor
+from ..utils import ExtractorError, determine_ext, unified_strdate, url_or_none
+from ..utils.traversal import traverse_obj
+
+
+class ZenPornIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?zenporn\.com/video/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://zenporn.com/video/15627016/desi-bhabi-ki-chudai',
+ 'md5': '07bd576b5920714d74975c054ca28dee',
+ 'info_dict': {
+ 'id': '9563799',
+ 'display_id': '15627016',
+ 'ext': 'mp4',
+ 'title': 'md5:669eafd3bbc688aa29770553b738ada2',
+ 'description': '',
+ 'thumbnail': 'md5:2fc044a19bab450fef8f1931e7920a18',
+ 'upload_date': '20230925',
+ 'uploader': 'md5:9fae59847f1f58d1da8f2772016c12f3',
+ 'age_limit': 18,
+ }
+ }, {
+ 'url': 'https://zenporn.com/video/15570701',
+ 'md5': 'acba0d080d692664fcc8c4e5502b1a67',
+ 'info_dict': {
+ 'id': '2297875',
+ 'display_id': '15570701',
+ 'ext': 'mp4',
+ 'title': 'md5:47aebdf87644ec91e8b1a844bc832451',
+ 'description': '',
+ 'thumbnail': 'https://mstn.nv7s.com/contents/videos_screenshots/2297000/2297875/480x270/1.jpg',
+ 'upload_date': '20230921',
+ 'uploader': 'Lois Clarke',
+ 'age_limit': 18,
+ }
+ }, {
+ 'url': 'https://zenporn.com/video/8531117/amateur-students-having-a-fuck-fest-at-club/',
+ 'md5': '67411256aa9451449e4d29f3be525541',
+ 'info_dict': {
+ 'id': '12791908',
+ 'display_id': '8531117',
+ 'ext': 'mp4',
+ 'title': 'Amateur students having a fuck fest at club',
+ 'description': '',
+ 'thumbnail': 'https://tn.txxx.tube/contents/videos_screenshots/12791000/12791908/288x162/1.jpg',
+ 'upload_date': '20191005',
+ 'uploader': 'Jackopenass',
+ 'age_limit': 18,
+ }
+ }, {
+ 'url': 'https://zenporn.com/video/15872038/glad-you-came/',
+ 'md5': '296ccab437f5bac6099433768449d8e1',
+ 'info_dict': {
+ 'id': '111585',
+ 'display_id': '15872038',
+ 'ext': 'mp4',
+ 'title': 'Glad You Came',
+ 'description': '',
+ 'thumbnail': 'https://vpim.m3pd.com/contents/videos_screenshots/111000/111585/480x270/1.jpg',
+ 'upload_date': '20231024',
+ 'uploader': 'Martin Rudenko',
+ 'age_limit': 18,
+ }
+ }]
+
+ def _gen_info_url(self, ext_domain, extr_id, lifetime=86400):
+ """ This function is a reverse engineering from the website javascript """
+ result = '/'.join(str(int(extr_id) // i * i) for i in (1_000_000, 1_000, 1))
+ return f'https://{ext_domain}/api/json/video/{lifetime}/{result}.json'
+
+ @staticmethod
+ def _decode_video_url(encoded_url):
+ """ This function is a reverse engineering from the website javascript """
+ # Replace lookalike characters and standardize map
+ translation = str.maketrans('АВСЕМ.,~', 'ABCEM+/=')
+ try:
+ return base64.b64decode(encoded_url.translate(translation), validate=True).decode()
+ except (binascii.Error, ValueError):
+ return None
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ ext_domain, video_id = self._search_regex(
+ r'https://(?P<ext_domain>[\w.-]+\.\w{3})/embed/(?P<extr_id>\d+)/',
+ webpage, 'embed info', group=('ext_domain', 'extr_id'))
+
+ info_json = self._download_json(
+ self._gen_info_url(ext_domain, video_id), video_id, fatal=False)
+
+ video_json = self._download_json(
+ f'https://{ext_domain}/api/videofile.php', video_id, query={
+ 'video_id': video_id,
+ 'lifetime': 8640000,
+ }, note='Downloading video file JSON', errnote='Failed to download video file JSON')
+
+ decoded_url = self._decode_video_url(video_json[0]['video_url'])
+ if not decoded_url:
+ raise ExtractorError('Unable to decode the video url')
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'ext': traverse_obj(video_json, (0, 'format', {determine_ext})),
+ 'url': f'https://{ext_domain}{decoded_url}',
+ 'age_limit': 18,
+ **traverse_obj(info_json, ('video', {
+ 'title': ('title', {str}),
+ 'description': ('description', {str}),
+ 'thumbnail': ('thumb', {url_or_none}),
+ 'upload_date': ('post_date', {unified_strdate}),
+ 'uploader': ('user', 'username', {str}),
+ })),
+ }
diff --git a/yt_dlp/extractor/zetland.py b/yt_dlp/extractor/zetland.py
new file mode 100644
index 0000000..055a643
--- /dev/null
+++ b/yt_dlp/extractor/zetland.py
@@ -0,0 +1,71 @@
+from .common import InfoExtractor
+from ..utils import merge_dicts, unified_timestamp, url_or_none
+from ..utils.traversal import traverse_obj
+
+
+class ZetlandDKArticleIE(InfoExtractor):
+ _VALID_URL = r'https?://www\.zetland\.dk/\w+/(?P<id>(?P<story_id>\w{8})-(?P<uploader_id>\w{8})-(?:\w{5}))'
+ _TESTS = [{
+ 'url': 'https://www.zetland.dk/historie/sO9aq2MY-a81VP3BY-66e69?utm_source=instagram&utm_medium=linkibio&utm_campaign=artikel',
+ 'info_dict': {
+ 'id': 'sO9aq2MY-a81VP3BY-66e69',
+ 'ext': 'mp3',
+ 'modified_date': '20240118',
+ 'title': 'Afsnit 1: “Det føltes som en kidnapning.” ',
+ 'upload_date': '20240116',
+ 'uploader_id': 'a81VP3BY',
+ 'modified_timestamp': 1705568739,
+ 'release_timestamp': 1705377592,
+ 'uploader_url': 'https://www.zetland.dk/skribent/a81VP3BY',
+ 'uploader': 'Helle Fuusager',
+ 'release_date': '20240116',
+ 'thumbnail': r're:https://zetland\.imgix\.net/2aafe500-b14e-11ee-bf83-65d5e1283a57/Zetland_Image_1\.jpg',
+ 'description': 'md5:9619d426772c133f5abb26db27f26a01',
+ 'timestamp': 1705377592,
+ 'series_id': '62d54630-e87b-4ab1-a255-8de58dbe1b14',
+ }
+
+ }]
+
+ def _real_extract(self, url):
+ display_id, uploader_id = self._match_valid_url(url).group('id', 'uploader_id')
+ webpage = self._download_webpage(url, display_id)
+
+ next_js_data = self._search_nextjs_data(webpage, display_id)['props']['pageProps']
+ story_data = traverse_obj(next_js_data, ('initialState', 'consume', 'story', 'story'))
+
+ formats = []
+ for audio_url in traverse_obj(story_data, ('story_content', 'meta', 'audioFiles', ..., {url_or_none})):
+ formats.append({
+ 'url': audio_url,
+ 'vcodec': 'none',
+ })
+
+ return merge_dicts({
+ 'id': display_id,
+ 'formats': formats,
+ 'uploader_id': uploader_id
+ }, traverse_obj(story_data, {
+ 'title': ((('story_content', 'content', 'title'), 'title'), {str}),
+ 'uploader': ('sharer', 'name'),
+ 'uploader_id': ('sharer', 'sharer_id'),
+ 'description': ('story_content', 'content', 'socialDescription'),
+ 'series_id': ('story_content', 'meta', 'seriesId'),
+ 'release_timestamp': ('published_at', {unified_timestamp}),
+ 'modified_timestamp': ('revised_at', {unified_timestamp}),
+ }, get_all=False), traverse_obj(next_js_data, ('metaInfo', {
+ 'title': ((('meta', 'title'), ('ld', 'headline'), ('og', 'og:title'), ('og', 'twitter:title')), {str}),
+ 'description': ((('meta', 'description'), ('ld', 'description'), ('og', 'og:description'), ('og', 'twitter:description')), {str}),
+ 'uploader': ((('meta', 'author'), ('ld', 'author', 'name')), {str}),
+ 'uploader_url': ('ld', 'author', 'url', {url_or_none}),
+ 'thumbnail': ((('ld', 'image'), ('og', 'og:image'), ('og', 'twitter:image')), {url_or_none}),
+ 'modified_timestamp': ('ld', 'dateModified', {unified_timestamp}),
+ 'release_timestamp': ('ld', 'datePublished', {unified_timestamp}),
+ 'timestamp': ('ld', 'dateCreated', {unified_timestamp}),
+ }), get_all=False), {
+ 'title': self._html_search_meta(['title', 'og:title', 'twitter:title'], webpage),
+ 'description': self._html_search_meta(['description', 'og:description', 'twitter:description'], webpage),
+ 'thumbnail': self._html_search_meta(['og:image', 'twitter:image'], webpage),
+ 'uploader': self._html_search_meta(['author'], webpage),
+ 'release_timestamp': unified_timestamp(self._html_search_meta(['article:published_time'], webpage)),
+ }, self._search_json_ld(webpage, display_id, fatal=False))
diff --git a/yt_dlp/extractor/zhihu.py b/yt_dlp/extractor/zhihu.py
new file mode 100644
index 0000000..c24b338
--- /dev/null
+++ b/yt_dlp/extractor/zhihu.py
@@ -0,0 +1,65 @@
+from .common import InfoExtractor
+from ..utils import format_field, float_or_none, int_or_none
+
+
+class ZhihuIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?zhihu\.com/zvideo/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'https://www.zhihu.com/zvideo/1342930761977176064',
+ 'md5': 'c8d4c9cd72dd58e6f9bc9c2c84266464',
+ 'info_dict': {
+ 'id': '1342930761977176064',
+ 'ext': 'mp4',
+ 'title': '写春联也太难了吧!',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'uploader': '桥半舫',
+ 'timestamp': 1612959715,
+ 'upload_date': '20210210',
+ 'uploader_id': '244ecb13b0fd7daf92235288c8ca3365',
+ 'duration': 146.333,
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ zvideo = self._download_json(
+ 'https://www.zhihu.com/api/v4/zvideos/' + video_id, video_id)
+ title = zvideo['title']
+ video = zvideo.get('video') or {}
+
+ formats = []
+ for format_id, q in (video.get('playlist') or {}).items():
+ play_url = q.get('url') or q.get('play_url')
+ if not play_url:
+ continue
+ formats.append({
+ 'asr': int_or_none(q.get('sample_rate')),
+ 'filesize': int_or_none(q.get('size')),
+ 'format_id': format_id,
+ 'fps': int_or_none(q.get('fps')),
+ 'height': int_or_none(q.get('height')),
+ 'tbr': float_or_none(q.get('bitrate')),
+ 'url': play_url,
+ 'width': int_or_none(q.get('width')),
+ })
+
+ author = zvideo.get('author') or {}
+ url_token = author.get('url_token')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': video.get('thumbnail') or zvideo.get('image_url'),
+ 'uploader': author.get('name'),
+ 'timestamp': int_or_none(zvideo.get('published_at')),
+ 'uploader_id': author.get('id'),
+ 'uploader_url': format_field(url_token, None, 'https://www.zhihu.com/people/%s'),
+ 'duration': float_or_none(video.get('duration')),
+ 'view_count': int_or_none(zvideo.get('play_count')),
+ 'like_count': int_or_none(zvideo.get('liked_count')),
+ 'comment_count': int_or_none(zvideo.get('comment_count')),
+ }
diff --git a/yt_dlp/extractor/zingmp3.py b/yt_dlp/extractor/zingmp3.py
new file mode 100644
index 0000000..ff5eac8
--- /dev/null
+++ b/yt_dlp/extractor/zingmp3.py
@@ -0,0 +1,628 @@
+import hashlib
+import hmac
+import itertools
+import json
+import urllib.parse
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ join_nonempty,
+ try_call,
+ urljoin,
+ url_or_none
+)
+from ..utils.traversal import traverse_obj
+
+
+class ZingMp3BaseIE(InfoExtractor):
+ _VALID_URL_TMPL = r'https?://(?:mp3\.zing|zingmp3)\.vn/(?P<type>(?:%s))/[^/?#]+/(?P<id>\w+)(?:\.html|\?)'
+ _GEO_COUNTRIES = ['VN']
+ _DOMAIN = 'https://zingmp3.vn'
+ _PER_PAGE = 50
+ _API_SLUGS = {
+ # Audio/video
+ 'bai-hat': '/api/v2/page/get/song',
+ 'embed': '/api/v2/page/get/song',
+ 'video-clip': '/api/v2/page/get/video',
+ 'lyric': '/api/v2/lyric/get/lyric',
+ 'song-streaming': '/api/v2/song/get/streaming',
+ 'liveradio': '/api/v2/livestream/get/info',
+ 'eps': '/api/v2/page/get/podcast-episode',
+ 'episode-streaming': '/api/v2/podcast/episode/get/streaming',
+ # Playlist
+ 'playlist': '/api/v2/page/get/playlist',
+ 'album': '/api/v2/page/get/playlist',
+ 'pgr': '/api/v2/page/get/podcast-program',
+ 'pgr-list': '/api/v2/podcast/episode/get/list',
+ 'cgr': '/api/v2/page/get/podcast-category',
+ 'cgr-list': '/api/v2/podcast/program/get/list-by-cate',
+ 'cgrs': '/api/v2/page/get/podcast-categories',
+ # Chart
+ 'zing-chart': '/api/v2/page/get/chart-home',
+ 'zing-chart-tuan': '/api/v2/page/get/week-chart',
+ 'moi-phat-hanh': '/api/v2/page/get/newrelease-chart',
+ 'the-loai-video': '/api/v2/video/get/list',
+ # User
+ 'info-artist': '/api/v2/page/get/artist',
+ 'user-list-song': '/api/v2/song/get/list',
+ 'user-list-video': '/api/v2/video/get/list',
+ 'hub': '/api/v2/page/get/hub-detail',
+ 'new-release': '/api/v2/chart/get/new-release',
+ 'top100': '/api/v2/page/get/top-100',
+ 'podcast-new': '/api/v2/podcast/program/get/list-by-type',
+ 'top-podcast': '/api/v2/podcast/program/get/top-episode',
+ }
+
+ def _api_url(self, url_type, params):
+ api_slug = self._API_SLUGS[url_type]
+ params.update({'ctime': '1'})
+ sha256 = hashlib.sha256(
+ ''.join(f'{k}={v}' for k, v in sorted(params.items())).encode()).hexdigest()
+ data = {
+ **params,
+ 'apiKey': 'X5BM3w8N7MKozC0B85o4KMlzLZKhV00y',
+ 'sig': hmac.new(b'acOrvUS15XRW2o9JksiK1KgQ6Vbds8ZW',
+ f'{api_slug}{sha256}'.encode(), hashlib.sha512).hexdigest(),
+ }
+ return f'{self._DOMAIN}{api_slug}?{urllib.parse.urlencode(data)}'
+
+ def _call_api(self, url_type, params, display_id=None, **kwargs):
+ resp = self._download_json(
+ self._api_url(url_type, params), display_id or params.get('id'),
+ note=f'Downloading {url_type} JSON metadata', **kwargs)
+ return (resp or {}).get('data') or {}
+
+ def _real_initialize(self):
+ if not self._cookies_passed:
+ self._request_webpage(
+ self._api_url('bai-hat', {'id': ''}), None, note='Updating cookies')
+
+ def _parse_items(self, items):
+ for url in traverse_obj(items, (..., 'link')) or []:
+ yield self.url_result(urljoin(self._DOMAIN, url))
+
+ def _fetch_page(self, id_, url_type, page):
+ raise NotImplementedError('This method must be implemented by subclasses')
+
+ def _paged_list(self, _id, url_type):
+ count = 0
+ for page in itertools.count(1):
+ data = self._fetch_page(_id, url_type, page)
+ entries = list(self._parse_items(data.get('items')))
+ count += len(entries)
+ yield from entries
+ if not data.get('hasMore') or try_call(lambda: count > data['total']):
+ break
+
+
+class ZingMp3IE(ZingMp3BaseIE):
+ _VALID_URL = ZingMp3BaseIE._VALID_URL_TMPL % 'bai-hat|video-clip|embed|eps'
+ IE_NAME = 'zingmp3'
+ IE_DESC = 'zingmp3.vn'
+ _TESTS = [{
+ 'url': 'https://mp3.zing.vn/bai-hat/Xa-Mai-Xa-Bao-Thy/ZWZB9WAB.html',
+ 'md5': 'ead7ae13693b3205cbc89536a077daed',
+ 'info_dict': {
+ 'id': 'ZWZB9WAB',
+ 'title': 'Xa Mãi Xa',
+ 'ext': 'mp3',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ 'subtitles': {
+ 'origin': [{
+ 'ext': 'lrc',
+ }]
+ },
+ 'duration': 255,
+ 'track': 'Xa Mãi Xa',
+ 'artist': 'Bảo Thy',
+ 'album': 'Special Album',
+ 'album_artist': 'Bảo Thy',
+ },
+ }, {
+ 'url': 'https://zingmp3.vn/video-clip/Suong-Hoa-Dua-Loi-K-ICM-RYO/ZO8ZF7C7.html',
+ 'md5': '92c6e7a019f06b4682a6c35ae5785fab',
+ 'info_dict': {
+ 'id': 'ZO8ZF7C7',
+ 'title': 'Sương Hoa Đưa Lối',
+ 'ext': 'mp4',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ 'duration': 207,
+ 'track': 'Sương Hoa Đưa Lối',
+ 'artist': 'K-ICM, RYO',
+ 'album': 'Sương Hoa Đưa Lối (Single)',
+ 'album_artist': 'K-ICM, RYO',
+ },
+ }, {
+ 'url': 'https://zingmp3.vn/bai-hat/Nguoi-Yeu-Toi-Lanh-Lung-Sat-Da-Mr-Siro/ZZ6IW7OU.html',
+ 'md5': '3e9f7a9bd0d965573dbff8d7c68b629d',
+ 'info_dict': {
+ 'id': 'ZZ6IW7OU',
+ 'title': 'Người Yêu Tôi Lạnh Lùng Sắt Đá',
+ 'ext': 'mp3',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ 'duration': 303,
+ 'track': 'Người Yêu Tôi Lạnh Lùng Sắt Đá',
+ 'artist': 'Mr. Siro',
+ 'album': 'Người Yêu Tôi Lạnh Lùng Sắt Đá (Single)',
+ 'album_artist': 'Mr. Siro',
+ },
+ }, {
+ 'url': 'https://zingmp3.vn/eps/Cham-x-Ban-Noi-Goi-La-Nha/ZZD9ACWI.html',
+ 'md5': 'd52f9f63e2631e004e4f15188eedcf80',
+ 'info_dict': {
+ 'id': 'ZZD9ACWI',
+ 'title': 'Chạm x Bạn - Nơi Gọi Là Nhà',
+ 'ext': 'mp3',
+ 'duration': 3716,
+ 'thumbnail': r're:^https?://.+\.jpg',
+ 'track': 'Chạm x Bạn - Nơi Gọi Là Nhà',
+ 'artist': 'On Air',
+ 'album': 'Top Podcast',
+ 'album_artist': 'On Air',
+ },
+ }, {
+ 'url': 'https://zingmp3.vn/embed/song/ZWZEI76B?start=false',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://zingmp3.vn/bai-hat/Xa-Mai-Xa-Bao-Thy/ZWZB9WAB.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ song_id, url_type = self._match_valid_url(url).group('id', 'type')
+ item = self._call_api(url_type, {'id': song_id})
+
+ item_id = item.get('encodeId') or song_id
+ if url_type == 'video-clip':
+ source = item.get('streaming')
+ source['mp4'] = self._download_json(
+ 'http://api.mp3.zing.vn/api/mobile/video/getvideoinfo', item_id,
+ query={'requestdata': json.dumps({'id': item_id})},
+ note='Downloading mp4 JSON metadata').get('source')
+ elif url_type == 'eps':
+ source = self._call_api('episode-streaming', {'id': item_id})
+ else:
+ source = self._call_api('song-streaming', {'id': item_id})
+
+ formats = []
+ for k, v in (source or {}).items():
+ if not v or v == 'VIP':
+ continue
+ if k not in ('mp4', 'hls'):
+ formats.append({
+ 'ext': 'mp3',
+ 'format_id': k,
+ 'tbr': int_or_none(k),
+ 'url': self._proto_relative_url(v),
+ 'vcodec': 'none',
+ })
+ continue
+ for res, video_url in v.items():
+ if not video_url:
+ continue
+ if k == 'hls':
+ formats.extend(self._extract_m3u8_formats(video_url, item_id, 'mp4', m3u8_id=k, fatal=False))
+ continue
+ formats.append({
+ 'format_id': f'mp4-{res}',
+ 'url': video_url,
+ 'height': int_or_none(res),
+ })
+
+ if not formats:
+ if item.get('msg') == 'Sorry, this content is not available in your country.':
+ self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
+ else:
+ self.raise_no_formats('The song is only for VIP accounts.')
+
+ lyric = item.get('lyric') or self._call_api('lyric', {'id': item_id}, fatal=False).get('file')
+
+ return {
+ 'id': item_id,
+ 'title': traverse_obj(item, 'title', 'alias'),
+ 'thumbnail': traverse_obj(item, 'thumbnail', 'thumbnailM'),
+ 'duration': int_or_none(item.get('duration')),
+ 'track': traverse_obj(item, 'title', 'alias'),
+ 'artist': traverse_obj(item, 'artistsNames', 'artists_names', ('artists', 0, 'name')),
+ 'album': traverse_obj(item, ('album', ('name', 'title')), ('genres', 0, 'name'), get_all=False),
+ 'album_artist': traverse_obj(item, ('album', ('artistsNames', 'artists_names')),
+ ('artists', 0, 'name'), get_all=False),
+ 'formats': formats,
+ 'subtitles': {'origin': [{'url': lyric}]} if lyric else None,
+ }
+
+
+class ZingMp3AlbumIE(ZingMp3BaseIE):
+ _VALID_URL = ZingMp3BaseIE._VALID_URL_TMPL % 'album|playlist'
+ _TESTS = [{
+ 'url': 'https://zingmp3.vn/album/Ca-Phe-Quan-Quen-Hoang-Dung-My-Anh-Da-LAB-Thinh-Suy/ZOC7WUZC.html',
+ 'info_dict': {
+ 'id': 'ZOC7WUZC',
+ 'title': 'Cà Phê Quán Quen',
+ },
+ 'playlist_mincount': 10,
+ }, {
+ 'url': 'https://zingmp3.vn/album/Nhung-Bai-Hat-Hay-Nhat-Cua-Mr-Siro-Mr-Siro/ZWZAEZZD.html',
+ 'info_dict': {
+ 'id': 'ZWZAEZZD',
+ 'title': 'Những Bài Hát Hay Nhất Của Mr. Siro',
+ },
+ 'playlist_mincount': 20,
+ }, {
+ 'url': 'http://mp3.zing.vn/playlist/Duong-Hong-Loan-apollobee/IWCAACCB.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://zingmp3.vn/album/Lau-Dai-Tinh-Ai-Bang-Kieu-Minh-Tuyet/ZWZBWDAF.html',
+ 'only_matching': True,
+ }]
+ IE_NAME = 'zingmp3:album'
+
+ def _real_extract(self, url):
+ song_id, url_type = self._match_valid_url(url).group('id', 'type')
+ data = self._call_api(url_type, {'id': song_id})
+ return self.playlist_result(
+ self._parse_items(traverse_obj(data, ('song', 'items'))),
+ traverse_obj(data, 'id', 'encodeId'), traverse_obj(data, 'name', 'title'))
+
+
+class ZingMp3ChartHomeIE(ZingMp3BaseIE):
+ _VALID_URL = r'https?://(?:mp3\.zing|zingmp3)\.vn/(?P<id>(?:zing-chart|moi-phat-hanh|top100|podcast-discover))/?(?:[#?]|$)'
+ _TESTS = [{
+ 'url': 'https://zingmp3.vn/zing-chart',
+ 'info_dict': {
+ 'id': 'zing-chart',
+ },
+ 'playlist_mincount': 100,
+ }, {
+ 'url': 'https://zingmp3.vn/moi-phat-hanh',
+ 'info_dict': {
+ 'id': 'moi-phat-hanh',
+ },
+ 'playlist_mincount': 100,
+ }, {
+ 'url': 'https://zingmp3.vn/top100',
+ 'info_dict': {
+ 'id': 'top100',
+ },
+ 'playlist_mincount': 50,
+ }, {
+ 'url': 'https://zingmp3.vn/podcast-discover',
+ 'info_dict': {
+ 'id': 'podcast-discover',
+ },
+ 'playlist_mincount': 4,
+ }]
+ IE_NAME = 'zingmp3:chart-home'
+
+ def _real_extract(self, url):
+ url_type = self._match_id(url)
+ params = {'id': url_type}
+ if url_type == 'podcast-discover':
+ params['type'] = 'discover'
+ data = self._call_api(url_type, params)
+ items = []
+ if url_type == 'top100':
+ items.extend(traverse_obj(data, (..., 'items', ..., {dict})))
+ elif url_type == 'zing-chart':
+ items.extend(traverse_obj(data, ('RTChart', 'items', ..., {dict})))
+ else:
+ items.extend(traverse_obj(data, ('items', ..., {dict})))
+ return self.playlist_result(self._parse_items(items), url_type)
+
+
+class ZingMp3WeekChartIE(ZingMp3BaseIE):
+ _VALID_URL = ZingMp3BaseIE._VALID_URL_TMPL % 'zing-chart-tuan'
+ IE_NAME = 'zingmp3:week-chart'
+ _TESTS = [{
+ 'url': 'https://zingmp3.vn/zing-chart-tuan/Bai-hat-Viet-Nam/IWZ9Z08I.html',
+ 'info_dict': {
+ 'id': 'IWZ9Z08I',
+ 'title': 'zing-chart-vn',
+ },
+ 'playlist_mincount': 10,
+ }, {
+ 'url': 'https://zingmp3.vn/zing-chart-tuan/Bai-hat-US-UK/IWZ9Z0BW.html',
+ 'info_dict': {
+ 'id': 'IWZ9Z0BW',
+ 'title': 'zing-chart-us',
+ },
+ 'playlist_mincount': 10,
+ }, {
+ 'url': 'https://zingmp3.vn/zing-chart-tuan/Bai-hat-KPop/IWZ9Z0BO.html',
+ 'info_dict': {
+ 'id': 'IWZ9Z0BO',
+ 'title': 'zing-chart-korea',
+ },
+ 'playlist_mincount': 10,
+ }]
+
+ def _real_extract(self, url):
+ song_id, url_type = self._match_valid_url(url).group('id', 'type')
+ data = self._call_api(url_type, {'id': song_id})
+ return self.playlist_result(
+ self._parse_items(data['items']), song_id, f'zing-chart-{data.get("country", "")}')
+
+
+class ZingMp3ChartMusicVideoIE(ZingMp3BaseIE):
+ _VALID_URL = r'https?://(?:mp3\.zing|zingmp3)\.vn/(?P<type>the-loai-video)/(?P<regions>[^/]+)/(?P<id>[^\.]+)'
+ IE_NAME = 'zingmp3:chart-music-video'
+ _TESTS = [{
+ 'url': 'https://zingmp3.vn/the-loai-video/Viet-Nam/IWZ9Z08I.html',
+ 'info_dict': {
+ 'id': 'IWZ9Z08I',
+ 'title': 'the-loai-video_Viet-Nam',
+ },
+ 'playlist_mincount': 400,
+ }, {
+ 'url': 'https://zingmp3.vn/the-loai-video/Au-My/IWZ9Z08O.html',
+ 'info_dict': {
+ 'id': 'IWZ9Z08O',
+ 'title': 'the-loai-video_Au-My',
+ },
+ 'playlist_mincount': 40,
+ }, {
+ 'url': 'https://zingmp3.vn/the-loai-video/Han-Quoc/IWZ9Z08W.html',
+ 'info_dict': {
+ 'id': 'IWZ9Z08W',
+ 'title': 'the-loai-video_Han-Quoc',
+ },
+ 'playlist_mincount': 30,
+ }, {
+ 'url': 'https://zingmp3.vn/the-loai-video/Khong-Loi/IWZ9Z086.html',
+ 'info_dict': {
+ 'id': 'IWZ9Z086',
+ 'title': 'the-loai-video_Khong-Loi',
+ },
+ 'playlist_mincount': 1,
+ }]
+
+ def _fetch_page(self, song_id, url_type, page):
+ return self._call_api(url_type, {
+ 'id': song_id,
+ 'type': 'genre',
+ 'page': page,
+ 'count': self._PER_PAGE
+ })
+
+ def _real_extract(self, url):
+ song_id, regions, url_type = self._match_valid_url(url).group('id', 'regions', 'type')
+ return self.playlist_result(self._paged_list(song_id, url_type), song_id, f'{url_type}_{regions}')
+
+
+class ZingMp3UserIE(ZingMp3BaseIE):
+ _VALID_URL = r'https?://(?:mp3\.zing|zingmp3)\.vn/(?P<user>[^/]+)/(?P<type>bai-hat|single|album|video|song)/?(?:[?#]|$)'
+ IE_NAME = 'zingmp3:user'
+ _TESTS = [{
+ 'url': 'https://zingmp3.vn/Mr-Siro/bai-hat',
+ 'info_dict': {
+ 'id': 'IWZ98609',
+ 'title': 'Mr. Siro - bai-hat',
+ 'description': 'md5:5bdcf45e955dc1b8d7f518f322ffef36',
+ },
+ 'playlist_mincount': 91,
+ }, {
+ 'url': 'https://zingmp3.vn/Mr-Siro/album',
+ 'info_dict': {
+ 'id': 'IWZ98609',
+ 'title': 'Mr. Siro - album',
+ 'description': 'md5:5bdcf45e955dc1b8d7f518f322ffef36',
+ },
+ 'playlist_mincount': 3,
+ }, {
+ 'url': 'https://zingmp3.vn/Mr-Siro/single',
+ 'info_dict': {
+ 'id': 'IWZ98609',
+ 'title': 'Mr. Siro - single',
+ 'description': 'md5:5bdcf45e955dc1b8d7f518f322ffef36',
+ },
+ 'playlist_mincount': 20,
+ }, {
+ 'url': 'https://zingmp3.vn/Mr-Siro/video',
+ 'info_dict': {
+ 'id': 'IWZ98609',
+ 'title': 'Mr. Siro - video',
+ 'description': 'md5:5bdcf45e955dc1b8d7f518f322ffef36',
+ },
+ 'playlist_mincount': 15,
+ }, {
+ 'url': 'https://zingmp3.vn/new-release/song',
+ 'info_dict': {
+ 'id': 'new-release-song',
+ },
+ 'playlist_mincount': 50,
+ }, {
+ 'url': 'https://zingmp3.vn/new-release/album',
+ 'info_dict': {
+ 'id': 'new-release-album',
+ },
+ 'playlist_mincount': 20,
+ }]
+
+ def _fetch_page(self, user_id, url_type, page):
+ url_type = 'user-list-song' if url_type == 'bai-hat' else 'user-list-video'
+ return self._call_api(url_type, {
+ 'id': user_id,
+ 'type': 'artist',
+ 'page': page,
+ 'count': self._PER_PAGE
+ })
+
+ def _real_extract(self, url):
+ alias, url_type = self._match_valid_url(url).group('user', 'type')
+ if not url_type:
+ url_type = 'bai-hat'
+
+ user_info = self._call_api('info-artist', {}, alias, query={'alias': alias})
+
+ # Handle for new-release
+ if alias == 'new-release' and url_type in ('song', 'album'):
+ _id = f'{alias}-{url_type}'
+ return self.playlist_result(self._parse_items(
+ self._call_api('new-release', params={'type': url_type}, display_id=_id)), _id)
+ else:
+ # Handle for user/artist
+ if url_type in ('bai-hat', 'video'):
+ entries = self._paged_list(user_info['id'], url_type)
+ else:
+ section_id = 'aAlbum' if url_type == 'album' else 'aSingle'
+ entries = self._parse_items(traverse_obj(user_info, (
+ 'sections', lambda _, v: v['sectionId'] == section_id, 'items', ...)))
+ return self.playlist_result(
+ entries, user_info['id'], join_nonempty(user_info.get('name'), url_type, delim=' - '),
+ user_info.get('biography'))
+
+
+class ZingMp3HubIE(ZingMp3BaseIE):
+ IE_NAME = 'zingmp3:hub'
+ _VALID_URL = r'https?://(?:mp3\.zing|zingmp3)\.vn/(?P<type>hub)/(?P<regions>[^/]+)/(?P<id>[^\.]+)'
+ _TESTS = [{
+ 'url': 'https://zingmp3.vn/hub/Nhac-Moi/IWZ9Z0CA.html',
+ 'info_dict': {
+ 'id': 'IWZ9Z0CA',
+ 'title': 'BXH Nhạc Mới',
+ 'description': 'md5:1cc31b68a6f746427b07b2756c22a558',
+ },
+ 'playlist_mincount': 20,
+ }, {
+ 'url': 'https://zingmp3.vn/hub/Nhac-Viet/IWZ9Z087.html',
+ 'info_dict': {
+ 'id': 'IWZ9Z087',
+ 'title': 'Nhạc Việt',
+ 'description': 'md5:acc976c8bdde64d5c6ee4a92c39f7a77',
+ },
+ 'playlist_mincount': 30,
+ }]
+
+ def _real_extract(self, url):
+ song_id, regions, url_type = self._match_valid_url(url).group('id', 'regions', 'type')
+ hub_detail = self._call_api(url_type, {'id': song_id})
+ entries = self._parse_items(traverse_obj(hub_detail, (
+ 'sections', lambda _, v: v['sectionId'] == 'hub', 'items', ...)))
+ return self.playlist_result(
+ entries, song_id, hub_detail.get('title'), hub_detail.get('description'))
+
+
+class ZingMp3LiveRadioIE(ZingMp3BaseIE):
+ IE_NAME = 'zingmp3:liveradio'
+ _VALID_URL = r'https?://(?:mp3\.zing|zingmp3)\.vn/(?P<type>(?:liveradio))/(?P<id>\w+)(?:\.html|\?)'
+ _TESTS = [{
+ 'url': 'https://zingmp3.vn/liveradio/IWZ979UB.html',
+ 'info_dict': {
+ 'id': 'IWZ979UB',
+ 'title': r're:^V\-POP',
+ 'description': 'md5:aa857f8a91dc9ce69e862a809e4bdc10',
+ 'ext': 'mp4',
+ 'view_count': int,
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'like_count': int,
+ 'live_status': 'is_live',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://zingmp3.vn/liveradio/IWZ97CWB.html',
+ 'info_dict': {
+ 'id': 'IWZ97CWB',
+ 'title': r're:^Live\s247',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'ext': 'm4a',
+ 'view_count': int,
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'like_count': int,
+ 'live_status': 'is_live',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ url_type, live_radio_id = self._match_valid_url(url).group('type', 'id')
+ info = self._call_api(url_type, {'id': live_radio_id})
+ manifest_url = info.get('streaming')
+ if not manifest_url:
+ raise ExtractorError('This radio is offline.', expected=True)
+ fmts, subtitles = self._extract_m3u8_formats_and_subtitles(manifest_url, live_radio_id, fatal=False)
+ return {
+ 'id': live_radio_id,
+ 'is_live': True,
+ 'formats': fmts,
+ 'subtitles': subtitles,
+ **traverse_obj(info, {
+ 'title': 'title',
+ 'thumbnail': (('thumbnail', 'thumbnailM', 'thumbnailV', 'thumbnailH'), {url_or_none}),
+ 'view_count': ('activeUsers', {int_or_none}),
+ 'like_count': ('totalReaction', {int_or_none}),
+ 'description': 'description',
+ }, get_all=False),
+ }
+
+
+class ZingMp3PodcastEpisodeIE(ZingMp3BaseIE):
+ IE_NAME = 'zingmp3:podcast-episode'
+ _VALID_URL = ZingMp3BaseIE._VALID_URL_TMPL % 'pgr|cgr'
+ _TESTS = [{
+ 'url': 'https://zingmp3.vn/pgr/Nhac-Moi-Moi-Ngay/68Z9W66B.html',
+ 'info_dict': {
+ 'id': '68Z9W66B',
+ 'title': 'Nhạc Mới Mỗi Ngày',
+ 'description': 'md5:2875dfa951f8e5356742f1610cf20691'
+ },
+ 'playlist_mincount': 20,
+ }, {
+ 'url': 'https://zingmp3.vn/cgr/Am-nhac/IWZ980AO.html',
+ 'info_dict': {
+ 'id': 'IWZ980AO',
+ 'title': 'Âm nhạc'
+ },
+ 'playlist_mincount': 2,
+ }]
+
+ def _fetch_page(self, eps_id, url_type, page):
+ return self._call_api(url_type, {
+ 'id': eps_id,
+ 'page': page,
+ 'count': self._PER_PAGE
+ })
+
+ def _real_extract(self, url):
+ podcast_id, url_type = self._match_valid_url(url).group('id', 'type')
+ podcast_info = self._call_api(url_type, {'id': podcast_id})
+ entries = self._paged_list(podcast_id, 'pgr-list' if url_type == 'pgr' else 'cgr-list')
+ return self.playlist_result(
+ entries, podcast_id, podcast_info.get('title'), podcast_info.get('description'))
+
+
+class ZingMp3PodcastIE(ZingMp3BaseIE):
+ IE_NAME = 'zingmp3:podcast'
+ _VALID_URL = r'https?://(?:mp3\.zing|zingmp3)\.vn/(?P<id>(?:cgr|top-podcast|podcast-new))/?(?:[#?]|$)'
+ _TESTS = [{
+ 'url': 'https://zingmp3.vn/cgr',
+ 'info_dict': {
+ 'id': 'cgr',
+ },
+ 'playlist_mincount': 5,
+ }, {
+ 'url': 'https://zingmp3.vn/top-podcast',
+ 'info_dict': {
+ 'id': 'top-podcast',
+ },
+ 'playlist_mincount': 7,
+ }, {
+ 'url': 'https://zingmp3.vn/podcast-new',
+ 'info_dict': {
+ 'id': 'podcast-new',
+ },
+ 'playlist_mincount': 4,
+ }]
+
+ def _real_extract(self, url):
+ url_type = self._match_id(url)
+ params = {'id': url_type}
+ if url_type == 'podcast-new':
+ params['type'] = 'new'
+ items = self._call_api('cgrs' if url_type == 'cgr' else url_type, params)['items']
+ return self.playlist_result(self._parse_items(items), url_type)
diff --git a/yt_dlp/extractor/zoom.py b/yt_dlp/extractor/zoom.py
new file mode 100644
index 0000000..e2bf817
--- /dev/null
+++ b/yt_dlp/extractor/zoom.py
@@ -0,0 +1,164 @@
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ js_to_json,
+ parse_filesize,
+ parse_resolution,
+ str_or_none,
+ traverse_obj,
+ url_basename,
+ urlencode_postdata,
+ urljoin,
+)
+
+
+class ZoomIE(InfoExtractor):
+ IE_NAME = 'zoom'
+ _VALID_URL = r'(?P<base_url>https?://(?:[^.]+\.)?zoom\.us/)rec(?:ording)?/(?P<type>play|share)/(?P<id>[\w.-]+)'
+ _TESTS = [{
+ 'url': 'https://economist.zoom.us/rec/play/dUk_CNBETmZ5VA2BwEl-jjakPpJ3M1pcfVYAPRsoIbEByGsLjUZtaa4yCATQuOL3der8BlTwxQePl_j0.EImBkXzTIaPvdZO5',
+ 'md5': 'ab445e8c911fddc4f9adc842c2c5d434',
+ 'info_dict': {
+ 'id': 'dUk_CNBETmZ5VA2BwEl-jjakPpJ3M1pcfVYAPRsoIbEByGsLjUZtaa4yCATQuOL3der8BlTwxQePl_j0.EImBkXzTIaPvdZO5',
+ 'ext': 'mp4',
+ 'title': 'China\'s "two sessions" and the new five-year plan',
+ },
+ 'skip': 'Recording requires email authentication to access',
+ }, {
+ # play URL
+ 'url': 'https://ffgolf.zoom.us/rec/play/qhEhXbrxq1Zoucx8CMtHzq1Z_2YZRPVCqWK_K-2FkEGRsSLDeOX8Tu4P6jtjZcRry8QhIbvKZdtr4UNo.QcPn2debFskI9whJ',
+ 'md5': '2c4b1c4e5213ebf9db293e88d9385bee',
+ 'info_dict': {
+ 'id': 'qhEhXbrxq1Zoucx8CMtHzq1Z_2YZRPVCqWK_K-2FkEGRsSLDeOX8Tu4P6jtjZcRry8QhIbvKZdtr4UNo.QcPn2debFskI9whJ',
+ 'ext': 'mp4',
+ 'title': 'Prépa AF2023 - Séance 5 du 11 avril - R20/VM/GO',
+ },
+ }, {
+ # share URL
+ 'url': 'https://us02web.zoom.us/rec/share/hkUk5Zxcga0nkyNGhVCRfzkA2gX_mzgS3LpTxEEWJz9Y_QpIQ4mZFOUx7KZRZDQA.9LGQBdqmDAYgiZ_8',
+ 'md5': '90fdc7cfcaee5d52d1c817fc03c43c9b',
+ 'info_dict': {
+ 'id': 'hkUk5Zxcga0nkyNGhVCRfzkA2gX_mzgS3LpTxEEWJz9Y_QpIQ4mZFOUx7KZRZDQA.9LGQBdqmDAYgiZ_8',
+ 'ext': 'mp4',
+ 'title': 'Timea Andrea Lelik\'s Personal Meeting Room',
+ },
+ 'skip': 'This recording has expired',
+ }, {
+ # view_with_share URL
+ 'url': 'https://cityofdetroit.zoom.us/rec/share/VjE-5kW3xmgbEYqR5KzRgZ1OFZvtMtiXk5HyRJo5kK4m5PYE6RF4rF_oiiO_9qaM.UTAg1MI7JSnF3ZjX',
+ 'md5': 'bdc7867a5934c151957fb81321b3c024',
+ 'info_dict': {
+ 'id': 'VjE-5kW3xmgbEYqR5KzRgZ1OFZvtMtiXk5HyRJo5kK4m5PYE6RF4rF_oiiO_9qaM.UTAg1MI7JSnF3ZjX',
+ 'ext': 'mp4',
+ 'title': 'February 2022 Detroit Revenue Estimating Conference',
+ 'duration': 7299,
+ 'formats': 'mincount:3',
+ },
+ }]
+
+ def _get_page_data(self, webpage, video_id):
+ return self._search_json(
+ r'window\.__data__\s*=', webpage, 'data', video_id, transform_source=js_to_json)
+
+ def _get_real_webpage(self, url, base_url, video_id, url_type):
+ webpage = self._download_webpage(url, video_id, note=f'Downloading {url_type} webpage')
+ try:
+ form = self._form_hidden_inputs('password_form', webpage)
+ except ExtractorError:
+ return webpage
+
+ password = self.get_param('videopassword')
+ if not password:
+ raise ExtractorError(
+ 'This video is protected by a passcode, use the --video-password option', expected=True)
+ is_meeting = form.get('useWhichPasswd') == 'meeting'
+ validation = self._download_json(
+ base_url + 'rec/validate%s_passwd' % ('_meet' if is_meeting else ''),
+ video_id, 'Validating passcode', 'Wrong passcode', data=urlencode_postdata({
+ 'id': form[('meet' if is_meeting else 'file') + 'Id'],
+ 'passwd': password,
+ 'action': form.get('action'),
+ }))
+ if not validation.get('status'):
+ raise ExtractorError(validation['errorMessage'], expected=True)
+ return self._download_webpage(url, video_id, note=f'Re-downloading {url_type} webpage')
+
+ def _real_extract(self, url):
+ base_url, url_type, video_id = self._match_valid_url(url).group('base_url', 'type', 'id')
+ query = {}
+
+ if url_type == 'share':
+ webpage = self._get_real_webpage(url, base_url, video_id, 'share')
+ meeting_id = self._get_page_data(webpage, video_id)['meetingId']
+ redirect_path = self._download_json(
+ f'{base_url}nws/recording/1.0/play/share-info/{meeting_id}',
+ video_id, note='Downloading share info JSON')['result']['redirectUrl']
+ url = urljoin(base_url, redirect_path)
+ query['continueMode'] = 'true'
+
+ webpage = self._get_real_webpage(url, base_url, video_id, 'play')
+ file_id = self._get_page_data(webpage, video_id)['fileId']
+ if not file_id:
+ # When things go wrong, file_id can be empty string
+ raise ExtractorError('Unable to extract file ID')
+
+ data = self._download_json(
+ f'{base_url}nws/recording/1.0/play/info/{file_id}', video_id, query=query,
+ note='Downloading play info JSON')['result']
+
+ subtitles = {}
+ for _type in ('transcript', 'cc', 'chapter'):
+ if data.get('%sUrl' % _type):
+ subtitles[_type] = [{
+ 'url': urljoin(base_url, data['%sUrl' % _type]),
+ 'ext': 'vtt',
+ }]
+
+ formats = []
+
+ if data.get('viewMp4Url'):
+ formats.append({
+ 'format_note': 'Camera stream',
+ 'url': data['viewMp4Url'],
+ 'width': int_or_none(traverse_obj(data, ('viewResolvtions', 0))),
+ 'height': int_or_none(traverse_obj(data, ('viewResolvtions', 1))),
+ 'format_id': 'view',
+ 'ext': 'mp4',
+ 'filesize_approx': parse_filesize(str_or_none(traverse_obj(data, ('recording', 'fileSizeInMB')))),
+ 'preference': 0
+ })
+
+ if data.get('shareMp4Url'):
+ formats.append({
+ 'format_note': 'Screen share stream',
+ 'url': data['shareMp4Url'],
+ 'width': int_or_none(traverse_obj(data, ('shareResolvtions', 0))),
+ 'height': int_or_none(traverse_obj(data, ('shareResolvtions', 1))),
+ 'format_id': 'share',
+ 'ext': 'mp4',
+ 'preference': -1
+ })
+
+ view_with_share_url = data.get('viewMp4WithshareUrl')
+ if view_with_share_url:
+ formats.append({
+ **parse_resolution(self._search_regex(
+ r'_(\d+x\d+)\.mp4', url_basename(view_with_share_url), 'resolution', default=None)),
+ 'format_note': 'Screen share with camera',
+ 'url': view_with_share_url,
+ 'format_id': 'view_with_share',
+ 'ext': 'mp4',
+ 'preference': 1
+ })
+
+ return {
+ 'id': video_id,
+ 'title': str_or_none(traverse_obj(data, ('meet', 'topic'))),
+ 'duration': int_or_none(data.get('duration')),
+ 'subtitles': subtitles,
+ 'formats': formats,
+ 'http_headers': {
+ 'Referer': base_url,
+ },
+ }
diff --git a/yt_dlp/extractor/zype.py b/yt_dlp/extractor/zype.py
new file mode 100644
index 0000000..2f3b4c4
--- /dev/null
+++ b/yt_dlp/extractor/zype.py
@@ -0,0 +1,135 @@
+import re
+
+from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ dict_get,
+ ExtractorError,
+ int_or_none,
+ js_to_json,
+ parse_iso8601,
+)
+
+
+class ZypeIE(InfoExtractor):
+ _ID_RE = r'[\da-fA-F]+'
+ _COMMON_RE = r'//player\.zype\.com/embed/%s\.(?:js|json|html)\?.*?(?:access_token|(?:ap[ip]|player)_key)='
+ _VALID_URL = r'https?:%s[^&]+' % (_COMMON_RE % ('(?P<id>%s)' % _ID_RE))
+ _EMBED_REGEX = [fr'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?{_COMMON_RE % _ID_RE}.+?)\1']
+ _TEST = {
+ 'url': 'https://player.zype.com/embed/5b400b834b32992a310622b9.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ&autoplay=false&controls=true&da=false',
+ 'md5': 'eaee31d474c76a955bdaba02a505c595',
+ 'info_dict': {
+ 'id': '5b400b834b32992a310622b9',
+ 'ext': 'mp4',
+ 'title': 'Smoky Barbecue Favorites',
+ 'thumbnail': r're:^https?://.*\.jpe?g',
+ 'description': 'md5:5ff01e76316bd8d46508af26dc86023b',
+ 'timestamp': 1504915200,
+ 'upload_date': '20170909',
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ try:
+ response = self._download_json(re.sub(
+ r'\.(?:js|html)\?', '.json?', url), video_id)['response']
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status in (400, 401, 403):
+ raise ExtractorError(self._parse_json(
+ e.cause.response.read().decode(), video_id)['message'], expected=True)
+ raise
+
+ body = response['body']
+ video = response['video']
+ title = video['title']
+
+ subtitles = {}
+
+ if isinstance(body, dict):
+ formats = []
+ for output in body.get('outputs', []):
+ output_url = output.get('url')
+ if not output_url:
+ continue
+ name = output.get('name')
+ if name == 'm3u8':
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ output_url, video_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False)
+ else:
+ f = {
+ 'format_id': name,
+ 'tbr': int_or_none(output.get('bitrate')),
+ 'url': output_url,
+ }
+ if name in ('m4a', 'mp3'):
+ f['vcodec'] = 'none'
+ else:
+ f.update({
+ 'height': int_or_none(output.get('height')),
+ 'width': int_or_none(output.get('width')),
+ })
+ formats.append(f)
+ text_tracks = body.get('subtitles') or []
+ else:
+ m3u8_url = self._search_regex(
+ r'(["\'])(?P<url>(?:(?!\1).)+\.m3u8(?:(?!\1).)*)\1',
+ body, 'm3u8 url', group='url', default=None)
+ if not m3u8_url:
+ source = self._search_regex(
+ r'(?s)sources\s*:\s*\[\s*({.+?})\s*\]', body, 'source')
+
+ def get_attr(key):
+ return self._search_regex(
+ r'\b%s\s*:\s*([\'"])(?P<val>(?:(?!\1).)+)\1' % key,
+ source, key, group='val')
+
+ if get_attr('integration') == 'verizon-media':
+ m3u8_url = 'https://content.uplynk.com/%s.m3u8' % get_attr('id')
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
+ text_tracks = self._search_regex(
+ r'textTracks\s*:\s*(\[[^]]+\])',
+ body, 'text tracks', default=None)
+ if text_tracks:
+ text_tracks = self._parse_json(
+ text_tracks, video_id, js_to_json, False)
+
+ if text_tracks:
+ for text_track in text_tracks:
+ tt_url = dict_get(text_track, ('file', 'src'))
+ if not tt_url:
+ continue
+ subtitles.setdefault(text_track.get('label') or 'English', []).append({
+ 'url': tt_url,
+ })
+
+ thumbnails = []
+ for thumbnail in video.get('thumbnails', []):
+ thumbnail_url = thumbnail.get('url')
+ if not thumbnail_url:
+ continue
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'width': int_or_none(thumbnail.get('width')),
+ 'height': int_or_none(thumbnail.get('height')),
+ })
+
+ return {
+ 'id': video_id,
+ 'display_id': video.get('friendly_title'),
+ 'title': title,
+ 'thumbnails': thumbnails,
+ 'description': dict_get(video, ('description', 'ott_description', 'short_description')),
+ 'timestamp': parse_iso8601(video.get('published_at')),
+ 'duration': int_or_none(video.get('duration')),
+ 'view_count': int_or_none(video.get('request_count')),
+ 'average_rating': int_or_none(video.get('rating')),
+ 'season_number': int_or_none(video.get('season')),
+ 'episode_number': int_or_none(video.get('episode')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
diff --git a/yt_dlp/jsinterp.py b/yt_dlp/jsinterp.py
new file mode 100644
index 0000000..bda3fb4
--- /dev/null
+++ b/yt_dlp/jsinterp.py
@@ -0,0 +1,853 @@
+import collections
+import contextlib
+import itertools
+import json
+import math
+import operator
+import re
+
+from .utils import (
+ NO_DEFAULT,
+ ExtractorError,
+ function_with_repr,
+ js_to_json,
+ remove_quotes,
+ truncate_string,
+ unified_timestamp,
+ write_string,
+)
+
+
+def _js_bit_op(op):
+ def zeroise(x):
+ if x in (None, JS_Undefined):
+ return 0
+ with contextlib.suppress(TypeError):
+ if math.isnan(x): # NB: NaN cannot be checked by membership
+ return 0
+ return x
+
+ def wrapped(a, b):
+ return op(zeroise(a), zeroise(b)) & 0xffffffff
+
+ return wrapped
+
+
+def _js_arith_op(op):
+
+ def wrapped(a, b):
+ if JS_Undefined in (a, b):
+ return float('nan')
+ return op(a or 0, b or 0)
+
+ return wrapped
+
+
+def _js_div(a, b):
+ if JS_Undefined in (a, b) or not (a or b):
+ return float('nan')
+ return (a or 0) / b if b else float('inf')
+
+
+def _js_mod(a, b):
+ if JS_Undefined in (a, b) or not b:
+ return float('nan')
+ return (a or 0) % b
+
+
+def _js_exp(a, b):
+ if not b:
+ return 1 # even 0 ** 0 !!
+ elif JS_Undefined in (a, b):
+ return float('nan')
+ return (a or 0) ** b
+
+
+def _js_eq_op(op):
+
+ def wrapped(a, b):
+ if {a, b} <= {None, JS_Undefined}:
+ return op(a, a)
+ return op(a, b)
+
+ return wrapped
+
+
+def _js_comp_op(op):
+
+ def wrapped(a, b):
+ if JS_Undefined in (a, b):
+ return False
+ if isinstance(a, str) or isinstance(b, str):
+ return op(str(a or 0), str(b or 0))
+ return op(a or 0, b or 0)
+
+ return wrapped
+
+
+def _js_ternary(cndn, if_true=True, if_false=False):
+ """Simulate JS's ternary operator (cndn?if_true:if_false)"""
+ if cndn in (False, None, 0, '', JS_Undefined):
+ return if_false
+ with contextlib.suppress(TypeError):
+ if math.isnan(cndn): # NB: NaN cannot be checked by membership
+ return if_false
+ return if_true
+
+
+# Ref: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence
+_OPERATORS = { # None => Defined in JSInterpreter._operator
+ '?': None,
+ '??': None,
+ '||': None,
+ '&&': None,
+
+ '|': _js_bit_op(operator.or_),
+ '^': _js_bit_op(operator.xor),
+ '&': _js_bit_op(operator.and_),
+
+ '===': operator.is_,
+ '!==': operator.is_not,
+ '==': _js_eq_op(operator.eq),
+ '!=': _js_eq_op(operator.ne),
+
+ '<=': _js_comp_op(operator.le),
+ '>=': _js_comp_op(operator.ge),
+ '<': _js_comp_op(operator.lt),
+ '>': _js_comp_op(operator.gt),
+
+ '>>': _js_bit_op(operator.rshift),
+ '<<': _js_bit_op(operator.lshift),
+
+ '+': _js_arith_op(operator.add),
+ '-': _js_arith_op(operator.sub),
+
+ '*': _js_arith_op(operator.mul),
+ '%': _js_mod,
+ '/': _js_div,
+ '**': _js_exp,
+}
+
+_COMP_OPERATORS = {'===', '!==', '==', '!=', '<=', '>=', '<', '>'}
+
+_NAME_RE = r'[a-zA-Z_$][\w$]*'
+_MATCHING_PARENS = dict(zip(*zip('()', '{}', '[]')))
+_QUOTES = '\'"/'
+
+
+class JS_Undefined:
+ pass
+
+
+class JS_Break(ExtractorError):
+ def __init__(self):
+ ExtractorError.__init__(self, 'Invalid break')
+
+
+class JS_Continue(ExtractorError):
+ def __init__(self):
+ ExtractorError.__init__(self, 'Invalid continue')
+
+
+class JS_Throw(ExtractorError):
+ def __init__(self, e):
+ self.error = e
+ ExtractorError.__init__(self, f'Uncaught exception {e}')
+
+
+class LocalNameSpace(collections.ChainMap):
+ def __setitem__(self, key, value):
+ for scope in self.maps:
+ if key in scope:
+ scope[key] = value
+ return
+ self.maps[0][key] = value
+
+ def __delitem__(self, key):
+ raise NotImplementedError('Deleting is not supported')
+
+
+class Debugger:
+ import sys
+ ENABLED = False and 'pytest' in sys.modules
+
+ @staticmethod
+ def write(*args, level=100):
+ write_string(f'[debug] JS: {" " * (100 - level)}'
+ f'{" ".join(truncate_string(str(x), 50, 50) for x in args)}\n')
+
+ @classmethod
+ def wrap_interpreter(cls, f):
+ def interpret_statement(self, stmt, local_vars, allow_recursion, *args, **kwargs):
+ if cls.ENABLED and stmt.strip():
+ cls.write(stmt, level=allow_recursion)
+ try:
+ ret, should_ret = f(self, stmt, local_vars, allow_recursion, *args, **kwargs)
+ except Exception as e:
+ if cls.ENABLED:
+ if isinstance(e, ExtractorError):
+ e = e.orig_msg
+ cls.write('=> Raises:', e, '<-|', stmt, level=allow_recursion)
+ raise
+ if cls.ENABLED and stmt.strip():
+ if should_ret or not repr(ret) == stmt:
+ cls.write(['->', '=>'][should_ret], repr(ret), '<-|', stmt, level=allow_recursion)
+ return ret, should_ret
+ return interpret_statement
+
+
+class JSInterpreter:
+ __named_object_counter = 0
+
+ _RE_FLAGS = {
+ # special knowledge: Python's re flags are bitmask values, current max 128
+ # invent new bitmask values well above that for literal parsing
+ # TODO: new pattern class to execute matches with these flags
+ 'd': 1024, # Generate indices for substring matches
+ 'g': 2048, # Global search
+ 'i': re.I, # Case-insensitive search
+ 'm': re.M, # Multi-line search
+ 's': re.S, # Allows . to match newline characters
+ 'u': re.U, # Treat a pattern as a sequence of unicode code points
+ 'y': 4096, # Perform a "sticky" search that matches starting at the current position in the target string
+ }
+
+ def __init__(self, code, objects=None):
+ self.code, self._functions = code, {}
+ self._objects = {} if objects is None else objects
+
+ class Exception(ExtractorError):
+ def __init__(self, msg, expr=None, *args, **kwargs):
+ if expr is not None:
+ msg = f'{msg.rstrip()} in: {truncate_string(expr, 50, 50)}'
+ super().__init__(msg, *args, **kwargs)
+
+ def _named_object(self, namespace, obj):
+ self.__named_object_counter += 1
+ name = f'__yt_dlp_jsinterp_obj{self.__named_object_counter}'
+ if callable(obj) and not isinstance(obj, function_with_repr):
+ obj = function_with_repr(obj, f'F<{self.__named_object_counter}>')
+ namespace[name] = obj
+ return name
+
+ @classmethod
+ def _regex_flags(cls, expr):
+ flags = 0
+ if not expr:
+ return flags, expr
+ for idx, ch in enumerate(expr):
+ if ch not in cls._RE_FLAGS:
+ break
+ flags |= cls._RE_FLAGS[ch]
+ return flags, expr[idx + 1:]
+
+ @staticmethod
+ def _separate(expr, delim=',', max_split=None):
+ OP_CHARS = '+-*/%&|^=<>!,;{}:['
+ if not expr:
+ return
+ counters = {k: 0 for k in _MATCHING_PARENS.values()}
+ start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1
+ in_quote, escaping, after_op, in_regex_char_group = None, False, True, False
+ for idx, char in enumerate(expr):
+ if not in_quote and char in _MATCHING_PARENS:
+ counters[_MATCHING_PARENS[char]] += 1
+ elif not in_quote and char in counters:
+ # Something's wrong if we get negative, but ignore it anyway
+ if counters[char]:
+ counters[char] -= 1
+ elif not escaping:
+ if char in _QUOTES and in_quote in (char, None):
+ if in_quote or after_op or char != '/':
+ in_quote = None if in_quote and not in_regex_char_group else char
+ elif in_quote == '/' and char in '[]':
+ in_regex_char_group = char == '['
+ escaping = not escaping and in_quote and char == '\\'
+ in_unary_op = (not in_quote and not in_regex_char_group
+ and after_op not in (True, False) and char in '-+')
+ after_op = char if (not in_quote and char in OP_CHARS) else (char.isspace() and after_op)
+
+ if char != delim[pos] or any(counters.values()) or in_quote or in_unary_op:
+ pos = 0
+ continue
+ elif pos != delim_len:
+ pos += 1
+ continue
+ yield expr[start: idx - delim_len]
+ start, pos = idx + 1, 0
+ splits += 1
+ if max_split and splits >= max_split:
+ break
+ yield expr[start:]
+
+ @classmethod
+ def _separate_at_paren(cls, expr, delim=None):
+ if delim is None:
+ delim = expr and _MATCHING_PARENS[expr[0]]
+ separated = list(cls._separate(expr, delim, 1))
+ if len(separated) < 2:
+ raise cls.Exception(f'No terminating paren {delim}', expr)
+ return separated[0][1:].strip(), separated[1].strip()
+
+ def _operator(self, op, left_val, right_expr, expr, local_vars, allow_recursion):
+ if op in ('||', '&&'):
+ if (op == '&&') ^ _js_ternary(left_val):
+ return left_val # short circuiting
+ elif op == '??':
+ if left_val not in (None, JS_Undefined):
+ return left_val
+ elif op == '?':
+ right_expr = _js_ternary(left_val, *self._separate(right_expr, ':', 1))
+
+ right_val = self.interpret_expression(right_expr, local_vars, allow_recursion)
+ if not _OPERATORS.get(op):
+ return right_val
+
+ try:
+ return _OPERATORS[op](left_val, right_val)
+ except Exception as e:
+ raise self.Exception(f'Failed to evaluate {left_val!r} {op} {right_val!r}', expr, cause=e)
+
+ def _index(self, obj, idx, allow_undefined=False):
+ if idx == 'length':
+ return len(obj)
+ try:
+ return obj[int(idx)] if isinstance(obj, list) else obj[idx]
+ except Exception as e:
+ if allow_undefined:
+ return JS_Undefined
+ raise self.Exception(f'Cannot get index {idx}', repr(obj), cause=e)
+
+ def _dump(self, obj, namespace):
+ try:
+ return json.dumps(obj)
+ except TypeError:
+ return self._named_object(namespace, obj)
+
+ @Debugger.wrap_interpreter
+ def interpret_statement(self, stmt, local_vars, allow_recursion=100):
+ if allow_recursion < 0:
+ raise self.Exception('Recursion limit reached')
+ allow_recursion -= 1
+
+ should_return = False
+ sub_statements = list(self._separate(stmt, ';')) or ['']
+ expr = stmt = sub_statements.pop().strip()
+
+ for sub_stmt in sub_statements:
+ ret, should_return = self.interpret_statement(sub_stmt, local_vars, allow_recursion)
+ if should_return:
+ return ret, should_return
+
+ m = re.match(r'(?P<var>(?:var|const|let)\s)|return(?:\s+|(?=["\'])|$)|(?P<throw>throw\s+)', stmt)
+ if m:
+ expr = stmt[len(m.group(0)):].strip()
+ if m.group('throw'):
+ raise JS_Throw(self.interpret_expression(expr, local_vars, allow_recursion))
+ should_return = not m.group('var')
+ if not expr:
+ return None, should_return
+
+ if expr[0] in _QUOTES:
+ inner, outer = self._separate(expr, expr[0], 1)
+ if expr[0] == '/':
+ flags, outer = self._regex_flags(outer)
+ # We don't support regex methods yet, so no point compiling it
+ inner = f'{inner}/{flags}'
+ # Avoid https://github.com/python/cpython/issues/74534
+ # inner = re.compile(inner[1:].replace('[[', r'[\['), flags=flags)
+ else:
+ inner = json.loads(js_to_json(f'{inner}{expr[0]}', strict=True))
+ if not outer:
+ return inner, should_return
+ expr = self._named_object(local_vars, inner) + outer
+
+ if expr.startswith('new '):
+ obj = expr[4:]
+ if obj.startswith('Date('):
+ left, right = self._separate_at_paren(obj[4:])
+ date = unified_timestamp(
+ self.interpret_expression(left, local_vars, allow_recursion), False)
+ if date is None:
+ raise self.Exception(f'Failed to parse date {left!r}', expr)
+ expr = self._dump(int(date * 1000), local_vars) + right
+ else:
+ raise self.Exception(f'Unsupported object {obj}', expr)
+
+ if expr.startswith('void '):
+ left = self.interpret_expression(expr[5:], local_vars, allow_recursion)
+ return None, should_return
+
+ if expr.startswith('{'):
+ inner, outer = self._separate_at_paren(expr)
+ # try for object expression (Map)
+ sub_expressions = [list(self._separate(sub_expr.strip(), ':', 1)) for sub_expr in self._separate(inner)]
+ if all(len(sub_expr) == 2 for sub_expr in sub_expressions):
+ def dict_item(key, val):
+ val = self.interpret_expression(val, local_vars, allow_recursion)
+ if re.match(_NAME_RE, key):
+ return key, val
+ return self.interpret_expression(key, local_vars, allow_recursion), val
+
+ return dict(dict_item(k, v) for k, v in sub_expressions), should_return
+
+ inner, should_abort = self.interpret_statement(inner, local_vars, allow_recursion)
+ if not outer or should_abort:
+ return inner, should_abort or should_return
+ else:
+ expr = self._dump(inner, local_vars) + outer
+
+ if expr.startswith('('):
+ inner, outer = self._separate_at_paren(expr)
+ inner, should_abort = self.interpret_statement(inner, local_vars, allow_recursion)
+ if not outer or should_abort:
+ return inner, should_abort or should_return
+ else:
+ expr = self._dump(inner, local_vars) + outer
+
+ if expr.startswith('['):
+ inner, outer = self._separate_at_paren(expr)
+ name = self._named_object(local_vars, [
+ self.interpret_expression(item, local_vars, allow_recursion)
+ for item in self._separate(inner)])
+ expr = name + outer
+
+ m = re.match(r'''(?x)
+ (?P<try>try)\s*\{|
+ (?P<if>if)\s*\(|
+ (?P<switch>switch)\s*\(|
+ (?P<for>for)\s*\(
+ ''', expr)
+ md = m.groupdict() if m else {}
+ if md.get('if'):
+ cndn, expr = self._separate_at_paren(expr[m.end() - 1:])
+ if_expr, expr = self._separate_at_paren(expr.lstrip())
+ # TODO: "else if" is not handled
+ else_expr = None
+ m = re.match(r'else\s*{', expr)
+ if m:
+ else_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
+ cndn = _js_ternary(self.interpret_expression(cndn, local_vars, allow_recursion))
+ ret, should_abort = self.interpret_statement(
+ if_expr if cndn else else_expr, local_vars, allow_recursion)
+ if should_abort:
+ return ret, True
+
+ if md.get('try'):
+ try_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
+ err = None
+ try:
+ ret, should_abort = self.interpret_statement(try_expr, local_vars, allow_recursion)
+ if should_abort:
+ return ret, True
+ except Exception as e:
+ # XXX: This works for now, but makes debugging future issues very hard
+ err = e
+
+ pending = (None, False)
+ m = re.match(fr'catch\s*(?P<err>\(\s*{_NAME_RE}\s*\))?\{{', expr)
+ if m:
+ sub_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
+ if err:
+ catch_vars = {}
+ if m.group('err'):
+ catch_vars[m.group('err')] = err.error if isinstance(err, JS_Throw) else err
+ catch_vars = local_vars.new_child(catch_vars)
+ err, pending = None, self.interpret_statement(sub_expr, catch_vars, allow_recursion)
+
+ m = re.match(r'finally\s*\{', expr)
+ if m:
+ sub_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
+ ret, should_abort = self.interpret_statement(sub_expr, local_vars, allow_recursion)
+ if should_abort:
+ return ret, True
+
+ ret, should_abort = pending
+ if should_abort:
+ return ret, True
+
+ if err:
+ raise err
+
+ elif md.get('for'):
+ constructor, remaining = self._separate_at_paren(expr[m.end() - 1:])
+ if remaining.startswith('{'):
+ body, expr = self._separate_at_paren(remaining)
+ else:
+ switch_m = re.match(r'switch\s*\(', remaining) # FIXME
+ if switch_m:
+ switch_val, remaining = self._separate_at_paren(remaining[switch_m.end() - 1:])
+ body, expr = self._separate_at_paren(remaining, '}')
+ body = 'switch(%s){%s}' % (switch_val, body)
+ else:
+ body, expr = remaining, ''
+ start, cndn, increment = self._separate(constructor, ';')
+ self.interpret_expression(start, local_vars, allow_recursion)
+ while True:
+ if not _js_ternary(self.interpret_expression(cndn, local_vars, allow_recursion)):
+ break
+ try:
+ ret, should_abort = self.interpret_statement(body, local_vars, allow_recursion)
+ if should_abort:
+ return ret, True
+ except JS_Break:
+ break
+ except JS_Continue:
+ pass
+ self.interpret_expression(increment, local_vars, allow_recursion)
+
+ elif md.get('switch'):
+ switch_val, remaining = self._separate_at_paren(expr[m.end() - 1:])
+ switch_val = self.interpret_expression(switch_val, local_vars, allow_recursion)
+ body, expr = self._separate_at_paren(remaining, '}')
+ items = body.replace('default:', 'case default:').split('case ')[1:]
+ for default in (False, True):
+ matched = False
+ for item in items:
+ case, stmt = (i.strip() for i in self._separate(item, ':', 1))
+ if default:
+ matched = matched or case == 'default'
+ elif not matched:
+ matched = (case != 'default'
+ and switch_val == self.interpret_expression(case, local_vars, allow_recursion))
+ if not matched:
+ continue
+ try:
+ ret, should_abort = self.interpret_statement(stmt, local_vars, allow_recursion)
+ if should_abort:
+ return ret
+ except JS_Break:
+ break
+ if matched:
+ break
+
+ if md:
+ ret, should_abort = self.interpret_statement(expr, local_vars, allow_recursion)
+ return ret, should_abort or should_return
+
+ # Comma separated statements
+ sub_expressions = list(self._separate(expr))
+ if len(sub_expressions) > 1:
+ for sub_expr in sub_expressions:
+ ret, should_abort = self.interpret_statement(sub_expr, local_vars, allow_recursion)
+ if should_abort:
+ return ret, True
+ return ret, False
+
+ for m in re.finditer(rf'''(?x)
+ (?P<pre_sign>\+\+|--)(?P<var1>{_NAME_RE})|
+ (?P<var2>{_NAME_RE})(?P<post_sign>\+\+|--)''', expr):
+ var = m.group('var1') or m.group('var2')
+ start, end = m.span()
+ sign = m.group('pre_sign') or m.group('post_sign')
+ ret = local_vars[var]
+ local_vars[var] += 1 if sign[0] == '+' else -1
+ if m.group('pre_sign'):
+ ret = local_vars[var]
+ expr = expr[:start] + self._dump(ret, local_vars) + expr[end:]
+
+ if not expr:
+ return None, should_return
+
+ m = re.match(fr'''(?x)
+ (?P<assign>
+ (?P<out>{_NAME_RE})(?:\[(?P<index>[^\]]+?)\])?\s*
+ (?P<op>{"|".join(map(re.escape, set(_OPERATORS) - _COMP_OPERATORS))})?
+ =(?!=)(?P<expr>.*)$
+ )|(?P<return>
+ (?!if|return|true|false|null|undefined|NaN)(?P<name>{_NAME_RE})$
+ )|(?P<indexing>
+ (?P<in>{_NAME_RE})\[(?P<idx>.+)\]$
+ )|(?P<attribute>
+ (?P<var>{_NAME_RE})(?:(?P<nullish>\?)?\.(?P<member>[^(]+)|\[(?P<member2>[^\]]+)\])\s*
+ )|(?P<function>
+ (?P<fname>{_NAME_RE})\((?P<args>.*)\)$
+ )''', expr)
+ if m and m.group('assign'):
+ left_val = local_vars.get(m.group('out'))
+
+ if not m.group('index'):
+ local_vars[m.group('out')] = self._operator(
+ m.group('op'), left_val, m.group('expr'), expr, local_vars, allow_recursion)
+ return local_vars[m.group('out')], should_return
+ elif left_val in (None, JS_Undefined):
+ raise self.Exception(f'Cannot index undefined variable {m.group("out")}', expr)
+
+ idx = self.interpret_expression(m.group('index'), local_vars, allow_recursion)
+ if not isinstance(idx, (int, float)):
+ raise self.Exception(f'List index {idx} must be integer', expr)
+ idx = int(idx)
+ left_val[idx] = self._operator(
+ m.group('op'), self._index(left_val, idx), m.group('expr'), expr, local_vars, allow_recursion)
+ return left_val[idx], should_return
+
+ elif expr.isdigit():
+ return int(expr), should_return
+
+ elif expr == 'break':
+ raise JS_Break()
+ elif expr == 'continue':
+ raise JS_Continue()
+ elif expr == 'undefined':
+ return JS_Undefined, should_return
+ elif expr == 'NaN':
+ return float('NaN'), should_return
+
+ elif m and m.group('return'):
+ return local_vars.get(m.group('name'), JS_Undefined), should_return
+
+ with contextlib.suppress(ValueError):
+ return json.loads(js_to_json(expr, strict=True)), should_return
+
+ if m and m.group('indexing'):
+ val = local_vars[m.group('in')]
+ idx = self.interpret_expression(m.group('idx'), local_vars, allow_recursion)
+ return self._index(val, idx), should_return
+
+ for op in _OPERATORS:
+ separated = list(self._separate(expr, op))
+ right_expr = separated.pop()
+ while True:
+ if op in '?<>*-' and len(separated) > 1 and not separated[-1].strip():
+ separated.pop()
+ elif not (separated and op == '?' and right_expr.startswith('.')):
+ break
+ right_expr = f'{op}{right_expr}'
+ if op != '-':
+ right_expr = f'{separated.pop()}{op}{right_expr}'
+ if not separated:
+ continue
+ left_val = self.interpret_expression(op.join(separated), local_vars, allow_recursion)
+ return self._operator(op, left_val, right_expr, expr, local_vars, allow_recursion), should_return
+
+ if m and m.group('attribute'):
+ variable, member, nullish = m.group('var', 'member', 'nullish')
+ if not member:
+ member = self.interpret_expression(m.group('member2'), local_vars, allow_recursion)
+ arg_str = expr[m.end():]
+ if arg_str.startswith('('):
+ arg_str, remaining = self._separate_at_paren(arg_str)
+ else:
+ arg_str, remaining = None, arg_str
+
+ def assertion(cndn, msg):
+ """ assert, but without risk of getting optimized out """
+ if not cndn:
+ raise self.Exception(f'{member} {msg}', expr)
+
+ def eval_method():
+ if (variable, member) == ('console', 'debug'):
+ if Debugger.ENABLED:
+ Debugger.write(self.interpret_expression(f'[{arg_str}]', local_vars, allow_recursion))
+ return
+
+ types = {
+ 'String': str,
+ 'Math': float,
+ }
+ obj = local_vars.get(variable, types.get(variable, NO_DEFAULT))
+ if obj is NO_DEFAULT:
+ if variable not in self._objects:
+ try:
+ self._objects[variable] = self.extract_object(variable)
+ except self.Exception:
+ if not nullish:
+ raise
+ obj = self._objects.get(variable, JS_Undefined)
+
+ if nullish and obj is JS_Undefined:
+ return JS_Undefined
+
+ # Member access
+ if arg_str is None:
+ return self._index(obj, member, nullish)
+
+ # Function call
+ argvals = [
+ self.interpret_expression(v, local_vars, allow_recursion)
+ for v in self._separate(arg_str)]
+
+ if obj == str:
+ if member == 'fromCharCode':
+ assertion(argvals, 'takes one or more arguments')
+ return ''.join(map(chr, argvals))
+ raise self.Exception(f'Unsupported String method {member}', expr)
+ elif obj == float:
+ if member == 'pow':
+ assertion(len(argvals) == 2, 'takes two arguments')
+ return argvals[0] ** argvals[1]
+ raise self.Exception(f'Unsupported Math method {member}', expr)
+
+ if member == 'split':
+ assertion(argvals, 'takes one or more arguments')
+ assertion(len(argvals) == 1, 'with limit argument is not implemented')
+ return obj.split(argvals[0]) if argvals[0] else list(obj)
+ elif member == 'join':
+ assertion(isinstance(obj, list), 'must be applied on a list')
+ assertion(len(argvals) == 1, 'takes exactly one argument')
+ return argvals[0].join(obj)
+ elif member == 'reverse':
+ assertion(not argvals, 'does not take any arguments')
+ obj.reverse()
+ return obj
+ elif member == 'slice':
+ assertion(isinstance(obj, list), 'must be applied on a list')
+ assertion(len(argvals) == 1, 'takes exactly one argument')
+ return obj[argvals[0]:]
+ elif member == 'splice':
+ assertion(isinstance(obj, list), 'must be applied on a list')
+ assertion(argvals, 'takes one or more arguments')
+ index, howMany = map(int, (argvals + [len(obj)])[:2])
+ if index < 0:
+ index += len(obj)
+ add_items = argvals[2:]
+ res = []
+ for i in range(index, min(index + howMany, len(obj))):
+ res.append(obj.pop(index))
+ for i, item in enumerate(add_items):
+ obj.insert(index + i, item)
+ return res
+ elif member == 'unshift':
+ assertion(isinstance(obj, list), 'must be applied on a list')
+ assertion(argvals, 'takes one or more arguments')
+ for item in reversed(argvals):
+ obj.insert(0, item)
+ return obj
+ elif member == 'pop':
+ assertion(isinstance(obj, list), 'must be applied on a list')
+ assertion(not argvals, 'does not take any arguments')
+ if not obj:
+ return
+ return obj.pop()
+ elif member == 'push':
+ assertion(argvals, 'takes one or more arguments')
+ obj.extend(argvals)
+ return obj
+ elif member == 'forEach':
+ assertion(argvals, 'takes one or more arguments')
+ assertion(len(argvals) <= 2, 'takes at-most 2 arguments')
+ f, this = (argvals + [''])[:2]
+ return [f((item, idx, obj), {'this': this}, allow_recursion) for idx, item in enumerate(obj)]
+ elif member == 'indexOf':
+ assertion(argvals, 'takes one or more arguments')
+ assertion(len(argvals) <= 2, 'takes at-most 2 arguments')
+ idx, start = (argvals + [0])[:2]
+ try:
+ return obj.index(idx, start)
+ except ValueError:
+ return -1
+ elif member == 'charCodeAt':
+ assertion(isinstance(obj, str), 'must be applied on a string')
+ assertion(len(argvals) == 1, 'takes exactly one argument')
+ idx = argvals[0] if isinstance(argvals[0], int) else 0
+ if idx >= len(obj):
+ return None
+ return ord(obj[idx])
+
+ idx = int(member) if isinstance(obj, list) else member
+ return obj[idx](argvals, allow_recursion=allow_recursion)
+
+ if remaining:
+ ret, should_abort = self.interpret_statement(
+ self._named_object(local_vars, eval_method()) + remaining,
+ local_vars, allow_recursion)
+ return ret, should_return or should_abort
+ else:
+ return eval_method(), should_return
+
+ elif m and m.group('function'):
+ fname = m.group('fname')
+ argvals = [self.interpret_expression(v, local_vars, allow_recursion)
+ for v in self._separate(m.group('args'))]
+ if fname in local_vars:
+ return local_vars[fname](argvals, allow_recursion=allow_recursion), should_return
+ elif fname not in self._functions:
+ self._functions[fname] = self.extract_function(fname)
+ return self._functions[fname](argvals, allow_recursion=allow_recursion), should_return
+
+ raise self.Exception(
+ f'Unsupported JS expression {truncate_string(expr, 20, 20) if expr != stmt else ""}', stmt)
+
+ def interpret_expression(self, expr, local_vars, allow_recursion):
+ ret, should_return = self.interpret_statement(expr, local_vars, allow_recursion)
+ if should_return:
+ raise self.Exception('Cannot return from an expression', expr)
+ return ret
+
+ def extract_object(self, objname):
+ _FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')'''
+ obj = {}
+ obj_m = re.search(
+ r'''(?x)
+ (?<!\.)%s\s*=\s*{\s*
+ (?P<fields>(%s\s*:\s*function\s*\(.*?\)\s*{.*?}(?:,\s*)?)*)
+ }\s*;
+ ''' % (re.escape(objname), _FUNC_NAME_RE),
+ self.code)
+ if not obj_m:
+ raise self.Exception(f'Could not find object {objname}')
+ fields = obj_m.group('fields')
+ # Currently, it only supports function definitions
+ fields_m = re.finditer(
+ r'''(?x)
+ (?P<key>%s)\s*:\s*function\s*\((?P<args>(?:%s|,)*)\){(?P<code>[^}]+)}
+ ''' % (_FUNC_NAME_RE, _NAME_RE),
+ fields)
+ for f in fields_m:
+ argnames = f.group('args').split(',')
+ name = remove_quotes(f.group('key'))
+ obj[name] = function_with_repr(self.build_function(argnames, f.group('code')), f'F<{name}>')
+
+ return obj
+
+ def extract_function_code(self, funcname):
+ """ @returns argnames, code """
+ func_m = re.search(
+ r'''(?xs)
+ (?:
+ function\s+%(name)s|
+ [{;,]\s*%(name)s\s*=\s*function|
+ (?:var|const|let)\s+%(name)s\s*=\s*function
+ )\s*
+ \((?P<args>[^)]*)\)\s*
+ (?P<code>{.+})''' % {'name': re.escape(funcname)},
+ self.code)
+ if func_m is None:
+ raise self.Exception(f'Could not find JS function "{funcname}"')
+ code, _ = self._separate_at_paren(func_m.group('code'))
+ return [x.strip() for x in func_m.group('args').split(',')], code
+
+ def extract_function(self, funcname):
+ return function_with_repr(
+ self.extract_function_from_code(*self.extract_function_code(funcname)),
+ f'F<{funcname}>')
+
+ def extract_function_from_code(self, argnames, code, *global_stack):
+ local_vars = {}
+ while True:
+ mobj = re.search(r'function\((?P<args>[^)]*)\)\s*{', code)
+ if mobj is None:
+ break
+ start, body_start = mobj.span()
+ body, remaining = self._separate_at_paren(code[body_start - 1:])
+ name = self._named_object(local_vars, self.extract_function_from_code(
+ [x.strip() for x in mobj.group('args').split(',')],
+ body, local_vars, *global_stack))
+ code = code[:start] + name + remaining
+ return self.build_function(argnames, code, local_vars, *global_stack)
+
+ def call_function(self, funcname, *args):
+ return self.extract_function(funcname)(args)
+
+ def build_function(self, argnames, code, *global_stack):
+ global_stack = list(global_stack) or [{}]
+ argnames = tuple(argnames)
+
+ def resf(args, kwargs={}, allow_recursion=100):
+ global_stack[0].update(itertools.zip_longest(argnames, args, fillvalue=None))
+ global_stack[0].update(kwargs)
+ var_stack = LocalNameSpace(*global_stack)
+ ret, should_abort = self.interpret_statement(code.replace('\n', ' '), var_stack, allow_recursion - 1)
+ if should_abort:
+ return ret
+ return resf
diff --git a/yt_dlp/minicurses.py b/yt_dlp/minicurses.py
new file mode 100644
index 0000000..7db02cb
--- /dev/null
+++ b/yt_dlp/minicurses.py
@@ -0,0 +1,182 @@
+import functools
+from threading import Lock
+
+from .utils import supports_terminal_sequences, write_string
+
+CONTROL_SEQUENCES = {
+ 'DOWN': '\n',
+ 'UP': '\033[A',
+ 'ERASE_LINE': '\033[K',
+ 'RESET': '\033[0m',
+}
+
+
+_COLORS = {
+ 'BLACK': '0',
+ 'RED': '1',
+ 'GREEN': '2',
+ 'YELLOW': '3',
+ 'BLUE': '4',
+ 'PURPLE': '5',
+ 'CYAN': '6',
+ 'WHITE': '7',
+}
+
+
+_TEXT_STYLES = {
+ 'NORMAL': '0',
+ 'BOLD': '1',
+ 'UNDERLINED': '4',
+}
+
+
+def format_text(text, f):
+ '''
+ @param f String representation of formatting to apply in the form:
+ [style] [light] font_color [on [light] bg_color]
+ E.g. "red", "bold green on light blue"
+ '''
+ f = f.upper()
+ tokens = f.strip().split()
+
+ bg_color = ''
+ if 'ON' in tokens:
+ if tokens[-1] == 'ON':
+ raise SyntaxError(f'Empty background format specified in {f!r}')
+ if tokens[-1] not in _COLORS:
+ raise SyntaxError(f'{tokens[-1]} in {f!r} must be a color')
+ bg_color = f'4{_COLORS[tokens.pop()]}'
+ if tokens[-1] == 'LIGHT':
+ bg_color = f'0;10{bg_color[1:]}'
+ tokens.pop()
+ if tokens[-1] != 'ON':
+ raise SyntaxError(f'Invalid format {f.split(" ON ", 1)[1]!r} in {f!r}')
+ bg_color = f'\033[{bg_color}m'
+ tokens.pop()
+
+ if not tokens:
+ fg_color = ''
+ elif tokens[-1] not in _COLORS:
+ raise SyntaxError(f'{tokens[-1]} in {f!r} must be a color')
+ else:
+ fg_color = f'3{_COLORS[tokens.pop()]}'
+ if tokens and tokens[-1] == 'LIGHT':
+ fg_color = f'9{fg_color[1:]}'
+ tokens.pop()
+ fg_style = tokens.pop() if tokens and tokens[-1] in _TEXT_STYLES else 'NORMAL'
+ fg_color = f'\033[{_TEXT_STYLES[fg_style]};{fg_color}m'
+ if tokens:
+ raise SyntaxError(f'Invalid format {" ".join(tokens)!r} in {f!r}')
+
+ if fg_color or bg_color:
+ text = text.replace(CONTROL_SEQUENCES['RESET'], f'{fg_color}{bg_color}')
+ return f'{fg_color}{bg_color}{text}{CONTROL_SEQUENCES["RESET"]}'
+ else:
+ return text
+
+
+class MultilinePrinterBase:
+ def __init__(self, stream=None, lines=1):
+ self.stream = stream
+ self.maximum = lines - 1
+ self._HAVE_FULLCAP = supports_terminal_sequences(stream)
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, *args):
+ self.end()
+
+ def print_at_line(self, text, pos):
+ pass
+
+ def end(self):
+ pass
+
+ def _add_line_number(self, text, line):
+ if self.maximum:
+ return f'{line + 1}: {text}'
+ return text
+
+ def write(self, *text):
+ write_string(''.join(text), self.stream)
+
+
+class QuietMultilinePrinter(MultilinePrinterBase):
+ pass
+
+
+class MultilineLogger(MultilinePrinterBase):
+ def write(self, *text):
+ self.stream.debug(''.join(text))
+
+ def print_at_line(self, text, pos):
+ # stream is the logger object, not an actual stream
+ self.write(self._add_line_number(text, pos))
+
+
+class BreaklineStatusPrinter(MultilinePrinterBase):
+ def print_at_line(self, text, pos):
+ self.write(self._add_line_number(text, pos), '\n')
+
+
+class MultilinePrinter(MultilinePrinterBase):
+ def __init__(self, stream=None, lines=1, preserve_output=True):
+ super().__init__(stream, lines)
+ self.preserve_output = preserve_output
+ self._lastline = self._lastlength = 0
+ self._movelock = Lock()
+
+ def lock(func):
+ @functools.wraps(func)
+ def wrapper(self, *args, **kwargs):
+ with self._movelock:
+ return func(self, *args, **kwargs)
+ return wrapper
+
+ def _move_cursor(self, dest):
+ current = min(self._lastline, self.maximum)
+ yield '\r'
+ distance = dest - current
+ if distance < 0:
+ yield CONTROL_SEQUENCES['UP'] * -distance
+ elif distance > 0:
+ yield CONTROL_SEQUENCES['DOWN'] * distance
+ self._lastline = dest
+
+ @lock
+ def print_at_line(self, text, pos):
+ if self._HAVE_FULLCAP:
+ self.write(*self._move_cursor(pos), CONTROL_SEQUENCES['ERASE_LINE'], text)
+ return
+
+ text = self._add_line_number(text, pos)
+ textlen = len(text)
+ if self._lastline == pos:
+ # move cursor at the start of progress when writing to same line
+ prefix = '\r'
+ if self._lastlength > textlen:
+ text += ' ' * (self._lastlength - textlen)
+ self._lastlength = textlen
+ else:
+ # otherwise, break the line
+ prefix = '\n'
+ self._lastlength = textlen
+ self.write(prefix, text)
+ self._lastline = pos
+
+ @lock
+ def end(self):
+ # move cursor to the end of the last line, and write line break
+ # so that other to_screen calls can precede
+ text = self._move_cursor(self.maximum) if self._HAVE_FULLCAP else []
+ if self.preserve_output:
+ self.write(*text, '\n')
+ return
+
+ if self._HAVE_FULLCAP:
+ self.write(
+ *text, CONTROL_SEQUENCES['ERASE_LINE'],
+ f'{CONTROL_SEQUENCES["UP"]}{CONTROL_SEQUENCES["ERASE_LINE"]}' * self.maximum)
+ else:
+ self.write('\r', ' ' * self._lastlength, '\r')
diff --git a/yt_dlp/networking/__init__.py b/yt_dlp/networking/__init__.py
new file mode 100644
index 0000000..acadc01
--- /dev/null
+++ b/yt_dlp/networking/__init__.py
@@ -0,0 +1,30 @@
+# flake8: noqa: F401
+import warnings
+
+from .common import (
+ HEADRequest,
+ PUTRequest,
+ Request,
+ RequestDirector,
+ RequestHandler,
+ Response,
+)
+
+# isort: split
+# TODO: all request handlers should be safely imported
+from . import _urllib
+from ..utils import bug_reports_message
+
+try:
+ from . import _requests
+except ImportError:
+ pass
+except Exception as e:
+ warnings.warn(f'Failed to import "requests" request handler: {e}' + bug_reports_message())
+
+try:
+ from . import _websockets
+except ImportError:
+ pass
+except Exception as e:
+ warnings.warn(f'Failed to import "websockets" request handler: {e}' + bug_reports_message())
diff --git a/yt_dlp/networking/_helper.py b/yt_dlp/networking/_helper.py
new file mode 100644
index 0000000..d79dd79
--- /dev/null
+++ b/yt_dlp/networking/_helper.py
@@ -0,0 +1,283 @@
+from __future__ import annotations
+
+import contextlib
+import functools
+import socket
+import ssl
+import sys
+import typing
+import urllib.parse
+import urllib.request
+
+from .exceptions import RequestError, UnsupportedRequest
+from ..dependencies import certifi
+from ..socks import ProxyType, sockssocket
+from ..utils import format_field, traverse_obj
+
+if typing.TYPE_CHECKING:
+ from collections.abc import Iterable
+
+ from ..utils.networking import HTTPHeaderDict
+
+
+def ssl_load_certs(context: ssl.SSLContext, use_certifi=True):
+ if certifi and use_certifi:
+ context.load_verify_locations(cafile=certifi.where())
+ else:
+ try:
+ context.load_default_certs()
+ # Work around the issue in load_default_certs when there are bad certificates. See:
+ # https://github.com/yt-dlp/yt-dlp/issues/1060,
+ # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
+ except ssl.SSLError:
+ # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
+ if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
+ for storename in ('CA', 'ROOT'):
+ ssl_load_windows_store_certs(context, storename)
+ context.set_default_verify_paths()
+
+
+def ssl_load_windows_store_certs(ssl_context, storename):
+ # Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py
+ try:
+ certs = [cert for cert, encoding, trust in ssl.enum_certificates(storename)
+ if encoding == 'x509_asn' and (
+ trust is True or ssl.Purpose.SERVER_AUTH.oid in trust)]
+ except PermissionError:
+ return
+ for cert in certs:
+ with contextlib.suppress(ssl.SSLError):
+ ssl_context.load_verify_locations(cadata=cert)
+
+
+def make_socks_proxy_opts(socks_proxy):
+ url_components = urllib.parse.urlparse(socks_proxy)
+ if url_components.scheme.lower() == 'socks5':
+ socks_type = ProxyType.SOCKS5
+ rdns = False
+ elif url_components.scheme.lower() == 'socks5h':
+ socks_type = ProxyType.SOCKS5
+ rdns = True
+ elif url_components.scheme.lower() == 'socks4':
+ socks_type = ProxyType.SOCKS4
+ rdns = False
+ elif url_components.scheme.lower() == 'socks4a':
+ socks_type = ProxyType.SOCKS4A
+ rdns = True
+ else:
+ raise ValueError(f'Unknown SOCKS proxy version: {url_components.scheme.lower()}')
+
+ def unquote_if_non_empty(s):
+ if not s:
+ return s
+ return urllib.parse.unquote_plus(s)
+ return {
+ 'proxytype': socks_type,
+ 'addr': url_components.hostname,
+ 'port': url_components.port or 1080,
+ 'rdns': rdns,
+ 'username': unquote_if_non_empty(url_components.username),
+ 'password': unquote_if_non_empty(url_components.password),
+ }
+
+
+def select_proxy(url, proxies):
+ """Unified proxy selector for all backends"""
+ url_components = urllib.parse.urlparse(url)
+ if 'no' in proxies:
+ hostport = url_components.hostname + format_field(url_components.port, None, ':%s')
+ if urllib.request.proxy_bypass_environment(hostport, {'no': proxies['no']}):
+ return
+ elif urllib.request.proxy_bypass(hostport): # check system settings
+ return
+
+ return traverse_obj(proxies, url_components.scheme or 'http', 'all')
+
+
+def get_redirect_method(method, status):
+ """Unified redirect method handling"""
+
+ # A 303 must either use GET or HEAD for subsequent request
+ # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.4
+ if status == 303 and method != 'HEAD':
+ method = 'GET'
+ # 301 and 302 redirects are commonly turned into a GET from a POST
+ # for subsequent requests by browsers, so we'll do the same.
+ # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.2
+ # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.3
+ if status in (301, 302) and method == 'POST':
+ method = 'GET'
+ return method
+
+
+def make_ssl_context(
+ verify=True,
+ client_certificate=None,
+ client_certificate_key=None,
+ client_certificate_password=None,
+ legacy_support=False,
+ use_certifi=True,
+):
+ context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
+ context.check_hostname = verify
+ context.verify_mode = ssl.CERT_REQUIRED if verify else ssl.CERT_NONE
+
+ # Some servers may reject requests if ALPN extension is not sent. See:
+ # https://github.com/python/cpython/issues/85140
+ # https://github.com/yt-dlp/yt-dlp/issues/3878
+ with contextlib.suppress(NotImplementedError):
+ context.set_alpn_protocols(['http/1.1'])
+ if verify:
+ ssl_load_certs(context, use_certifi)
+
+ if legacy_support:
+ context.options |= 4 # SSL_OP_LEGACY_SERVER_CONNECT
+ context.set_ciphers('DEFAULT') # compat
+
+ elif ssl.OPENSSL_VERSION_INFO >= (1, 1, 1) and not ssl.OPENSSL_VERSION.startswith('LibreSSL'):
+ # Use the default SSL ciphers and minimum TLS version settings from Python 3.10 [1].
+ # This is to ensure consistent behavior across Python versions and libraries, and help avoid fingerprinting
+ # in some situations [2][3].
+ # Python 3.10 only supports OpenSSL 1.1.1+ [4]. Because this change is likely
+ # untested on older versions, we only apply this to OpenSSL 1.1.1+ to be safe.
+ # LibreSSL is excluded until further investigation due to cipher support issues [5][6].
+ # 1. https://github.com/python/cpython/commit/e983252b516edb15d4338b0a47631b59ef1e2536
+ # 2. https://github.com/yt-dlp/yt-dlp/issues/4627
+ # 3. https://github.com/yt-dlp/yt-dlp/pull/5294
+ # 4. https://peps.python.org/pep-0644/
+ # 5. https://peps.python.org/pep-0644/#libressl-support
+ # 6. https://github.com/yt-dlp/yt-dlp/commit/5b9f253fa0aee996cf1ed30185d4b502e00609c4#commitcomment-89054368
+ context.set_ciphers(
+ '@SECLEVEL=2:ECDH+AESGCM:ECDH+CHACHA20:ECDH+AES:DHE+AES:!aNULL:!eNULL:!aDSS:!SHA1:!AESCCM')
+ context.minimum_version = ssl.TLSVersion.TLSv1_2
+
+ if client_certificate:
+ try:
+ context.load_cert_chain(
+ client_certificate, keyfile=client_certificate_key,
+ password=client_certificate_password)
+ except ssl.SSLError:
+ raise RequestError('Unable to load client certificate')
+
+ if getattr(context, 'post_handshake_auth', None) is not None:
+ context.post_handshake_auth = True
+ return context
+
+
+class InstanceStoreMixin:
+ def __init__(self, **kwargs):
+ self.__instances = []
+ super().__init__(**kwargs) # So that both MRO works
+
+ @staticmethod
+ def _create_instance(**kwargs):
+ raise NotImplementedError
+
+ def _get_instance(self, **kwargs):
+ for key, instance in self.__instances:
+ if key == kwargs:
+ return instance
+
+ instance = self._create_instance(**kwargs)
+ self.__instances.append((kwargs, instance))
+ return instance
+
+ def _close_instance(self, instance):
+ if callable(getattr(instance, 'close', None)):
+ instance.close()
+
+ def _clear_instances(self):
+ for _, instance in self.__instances:
+ self._close_instance(instance)
+ self.__instances.clear()
+
+
+def add_accept_encoding_header(headers: HTTPHeaderDict, supported_encodings: Iterable[str]):
+ if 'Accept-Encoding' not in headers:
+ headers['Accept-Encoding'] = ', '.join(supported_encodings) or 'identity'
+
+
+def wrap_request_errors(func):
+ @functools.wraps(func)
+ def wrapper(self, *args, **kwargs):
+ try:
+ return func(self, *args, **kwargs)
+ except UnsupportedRequest as e:
+ if e.handler is None:
+ e.handler = self
+ raise
+ return wrapper
+
+
+def _socket_connect(ip_addr, timeout, source_address):
+ af, socktype, proto, canonname, sa = ip_addr
+ sock = socket.socket(af, socktype, proto)
+ try:
+ if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
+ sock.settimeout(timeout)
+ if source_address:
+ sock.bind(source_address)
+ sock.connect(sa)
+ return sock
+ except OSError:
+ sock.close()
+ raise
+
+
+def create_socks_proxy_socket(dest_addr, proxy_args, proxy_ip_addr, timeout, source_address):
+ af, socktype, proto, canonname, sa = proxy_ip_addr
+ sock = sockssocket(af, socktype, proto)
+ try:
+ connect_proxy_args = proxy_args.copy()
+ connect_proxy_args.update({'addr': sa[0], 'port': sa[1]})
+ sock.setproxy(**connect_proxy_args)
+ if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT: # noqa: E721
+ sock.settimeout(timeout)
+ if source_address:
+ sock.bind(source_address)
+ sock.connect(dest_addr)
+ return sock
+ except OSError:
+ sock.close()
+ raise
+
+
+def create_connection(
+ address,
+ timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
+ source_address=None,
+ *,
+ _create_socket_func=_socket_connect
+):
+ # Work around socket.create_connection() which tries all addresses from getaddrinfo() including IPv6.
+ # This filters the addresses based on the given source_address.
+ # Based on: https://github.com/python/cpython/blob/main/Lib/socket.py#L810
+ host, port = address
+ ip_addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
+ if not ip_addrs:
+ raise OSError('getaddrinfo returns an empty list')
+ if source_address is not None:
+ af = socket.AF_INET if ':' not in source_address[0] else socket.AF_INET6
+ ip_addrs = [addr for addr in ip_addrs if addr[0] == af]
+ if not ip_addrs:
+ raise OSError(
+ f'No remote IPv{4 if af == socket.AF_INET else 6} addresses available for connect. '
+ f'Can\'t use "{source_address[0]}" as source address')
+
+ err = None
+ for ip_addr in ip_addrs:
+ try:
+ sock = _create_socket_func(ip_addr, timeout, source_address)
+ # Explicitly break __traceback__ reference cycle
+ # https://bugs.python.org/issue36820
+ err = None
+ return sock
+ except OSError as e:
+ err = e
+
+ try:
+ raise err
+ finally:
+ # Explicitly break __traceback__ reference cycle
+ # https://bugs.python.org/issue36820
+ err = None
diff --git a/yt_dlp/networking/_requests.py b/yt_dlp/networking/_requests.py
new file mode 100644
index 0000000..6545028
--- /dev/null
+++ b/yt_dlp/networking/_requests.py
@@ -0,0 +1,408 @@
+import contextlib
+import functools
+import http.client
+import logging
+import re
+import socket
+import warnings
+
+from ..dependencies import brotli, requests, urllib3
+from ..utils import bug_reports_message, int_or_none, variadic
+from ..utils.networking import normalize_url
+
+if requests is None:
+ raise ImportError('requests module is not installed')
+
+if urllib3 is None:
+ raise ImportError('urllib3 module is not installed')
+
+urllib3_version = tuple(int_or_none(x, default=0) for x in urllib3.__version__.split('.'))
+
+if urllib3_version < (1, 26, 17):
+ raise ImportError('Only urllib3 >= 1.26.17 is supported')
+
+if requests.__build__ < 0x023100:
+ raise ImportError('Only requests >= 2.31.0 is supported')
+
+import requests.adapters
+import requests.utils
+import urllib3.connection
+import urllib3.exceptions
+
+from ._helper import (
+ InstanceStoreMixin,
+ add_accept_encoding_header,
+ create_connection,
+ create_socks_proxy_socket,
+ get_redirect_method,
+ make_socks_proxy_opts,
+ select_proxy,
+)
+from .common import (
+ Features,
+ RequestHandler,
+ Response,
+ register_preference,
+ register_rh,
+)
+from .exceptions import (
+ CertificateVerifyError,
+ HTTPError,
+ IncompleteRead,
+ ProxyError,
+ RequestError,
+ SSLError,
+ TransportError,
+)
+from ..socks import ProxyError as SocksProxyError
+
+SUPPORTED_ENCODINGS = [
+ 'gzip', 'deflate'
+]
+
+if brotli is not None:
+ SUPPORTED_ENCODINGS.append('br')
+
+"""
+Override urllib3's behavior to not convert lower-case percent-encoded characters
+to upper-case during url normalization process.
+
+RFC3986 defines that the lower or upper case percent-encoded hexidecimal characters are equivalent
+and normalizers should convert them to uppercase for consistency [1].
+
+However, some sites may have an incorrect implementation where they provide
+a percent-encoded url that is then compared case-sensitively.[2]
+
+While this is a very rare case, since urllib does not do this normalization step, it
+is best to avoid it in requests too for compatability reasons.
+
+1: https://tools.ietf.org/html/rfc3986#section-2.1
+2: https://github.com/streamlink/streamlink/pull/4003
+"""
+
+
+class Urllib3PercentREOverride:
+ def __init__(self, r: re.Pattern):
+ self.re = r
+
+ # pass through all other attribute calls to the original re
+ def __getattr__(self, item):
+ return self.re.__getattribute__(item)
+
+ def subn(self, repl, string, *args, **kwargs):
+ return string, self.re.subn(repl, string, *args, **kwargs)[1]
+
+
+# urllib3 >= 1.25.8 uses subn:
+# https://github.com/urllib3/urllib3/commit/a2697e7c6b275f05879b60f593c5854a816489f0
+import urllib3.util.url # noqa: E305
+
+if hasattr(urllib3.util.url, 'PERCENT_RE'):
+ urllib3.util.url.PERCENT_RE = Urllib3PercentREOverride(urllib3.util.url.PERCENT_RE)
+elif hasattr(urllib3.util.url, '_PERCENT_RE'): # urllib3 >= 2.0.0
+ urllib3.util.url._PERCENT_RE = Urllib3PercentREOverride(urllib3.util.url._PERCENT_RE)
+else:
+ warnings.warn('Failed to patch PERCENT_RE in urllib3 (does the attribute exist?)' + bug_reports_message())
+
+"""
+Workaround for issue in urllib.util.ssl_.py: ssl_wrap_context does not pass
+server_hostname to SSLContext.wrap_socket if server_hostname is an IP,
+however this is an issue because we set check_hostname to True in our SSLContext.
+
+Monkey-patching IS_SECURETRANSPORT forces ssl_wrap_context to pass server_hostname regardless.
+
+This has been fixed in urllib3 2.0+.
+See: https://github.com/urllib3/urllib3/issues/517
+"""
+
+if urllib3_version < (2, 0, 0):
+ with contextlib.suppress(Exception):
+ urllib3.util.IS_SECURETRANSPORT = urllib3.util.ssl_.IS_SECURETRANSPORT = True
+
+
+# Requests will not automatically handle no_proxy by default
+# due to buggy no_proxy handling with proxy dict [1].
+# 1. https://github.com/psf/requests/issues/5000
+requests.adapters.select_proxy = select_proxy
+
+
+class RequestsResponseAdapter(Response):
+ def __init__(self, res: requests.models.Response):
+ super().__init__(
+ fp=res.raw, headers=res.headers, url=res.url,
+ status=res.status_code, reason=res.reason)
+
+ self._requests_response = res
+
+ def read(self, amt: int = None):
+ try:
+ # Interact with urllib3 response directly.
+ return self.fp.read(amt, decode_content=True)
+
+ # See urllib3.response.HTTPResponse.read() for exceptions raised on read
+ except urllib3.exceptions.SSLError as e:
+ raise SSLError(cause=e) from e
+
+ except urllib3.exceptions.ProtocolError as e:
+ # IncompleteRead is always contained within ProtocolError
+ # See urllib3.response.HTTPResponse._error_catcher()
+ ir_err = next(
+ (err for err in (e.__context__, e.__cause__, *variadic(e.args))
+ if isinstance(err, http.client.IncompleteRead)), None)
+ if ir_err is not None:
+ # `urllib3.exceptions.IncompleteRead` is subclass of `http.client.IncompleteRead`
+ # but uses an `int` for its `partial` property.
+ partial = ir_err.partial if isinstance(ir_err.partial, int) else len(ir_err.partial)
+ raise IncompleteRead(partial=partial, expected=ir_err.expected) from e
+ raise TransportError(cause=e) from e
+
+ except urllib3.exceptions.HTTPError as e:
+ # catch-all for any other urllib3 response exceptions
+ raise TransportError(cause=e) from e
+
+
+class RequestsHTTPAdapter(requests.adapters.HTTPAdapter):
+ def __init__(self, ssl_context=None, proxy_ssl_context=None, source_address=None, **kwargs):
+ self._pm_args = {}
+ if ssl_context:
+ self._pm_args['ssl_context'] = ssl_context
+ if source_address:
+ self._pm_args['source_address'] = (source_address, 0)
+ self._proxy_ssl_context = proxy_ssl_context or ssl_context
+ super().__init__(**kwargs)
+
+ def init_poolmanager(self, *args, **kwargs):
+ return super().init_poolmanager(*args, **kwargs, **self._pm_args)
+
+ def proxy_manager_for(self, proxy, **proxy_kwargs):
+ extra_kwargs = {}
+ if not proxy.lower().startswith('socks') and self._proxy_ssl_context:
+ extra_kwargs['proxy_ssl_context'] = self._proxy_ssl_context
+ return super().proxy_manager_for(proxy, **proxy_kwargs, **self._pm_args, **extra_kwargs)
+
+ def cert_verify(*args, **kwargs):
+ # lean on SSLContext for cert verification
+ pass
+
+
+class RequestsSession(requests.sessions.Session):
+ """
+ Ensure unified redirect method handling with our urllib redirect handler.
+ """
+
+ def rebuild_method(self, prepared_request, response):
+ new_method = get_redirect_method(prepared_request.method, response.status_code)
+
+ # HACK: requests removes headers/body on redirect unless code was a 307/308.
+ if new_method == prepared_request.method:
+ response._real_status_code = response.status_code
+ response.status_code = 308
+
+ prepared_request.method = new_method
+
+ # Requests fails to resolve dot segments on absolute redirect locations
+ # See: https://github.com/yt-dlp/yt-dlp/issues/9020
+ prepared_request.url = normalize_url(prepared_request.url)
+
+ def rebuild_auth(self, prepared_request, response):
+ # HACK: undo status code change from rebuild_method, if applicable.
+ # rebuild_auth runs after requests would remove headers/body based on status code
+ if hasattr(response, '_real_status_code'):
+ response.status_code = response._real_status_code
+ del response._real_status_code
+ return super().rebuild_auth(prepared_request, response)
+
+
+class Urllib3LoggingFilter(logging.Filter):
+
+ def filter(self, record):
+ # Ignore HTTP request messages since HTTPConnection prints those
+ if record.msg == '%s://%s:%s "%s %s %s" %s %s':
+ return False
+ return True
+
+
+class Urllib3LoggingHandler(logging.Handler):
+ """Redirect urllib3 logs to our logger"""
+
+ def __init__(self, logger, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self._logger = logger
+
+ def emit(self, record):
+ try:
+ msg = self.format(record)
+ if record.levelno >= logging.ERROR:
+ self._logger.error(msg)
+ else:
+ self._logger.stdout(msg)
+
+ except Exception:
+ self.handleError(record)
+
+
+@register_rh
+class RequestsRH(RequestHandler, InstanceStoreMixin):
+
+ """Requests RequestHandler
+ https://github.com/psf/requests
+ """
+ _SUPPORTED_URL_SCHEMES = ('http', 'https')
+ _SUPPORTED_ENCODINGS = tuple(SUPPORTED_ENCODINGS)
+ _SUPPORTED_PROXY_SCHEMES = ('http', 'https', 'socks4', 'socks4a', 'socks5', 'socks5h')
+ _SUPPORTED_FEATURES = (Features.NO_PROXY, Features.ALL_PROXY)
+ RH_NAME = 'requests'
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+
+ # Forward urllib3 debug messages to our logger
+ logger = logging.getLogger('urllib3')
+ self.__logging_handler = Urllib3LoggingHandler(logger=self._logger)
+ self.__logging_handler.setFormatter(logging.Formatter('requests: %(message)s'))
+ self.__logging_handler.addFilter(Urllib3LoggingFilter())
+ logger.addHandler(self.__logging_handler)
+ # TODO: Use a logger filter to suppress pool reuse warning instead
+ logger.setLevel(logging.ERROR)
+
+ if self.verbose:
+ # Setting this globally is not ideal, but is easier than hacking with urllib3.
+ # It could technically be problematic for scripts embedding yt-dlp.
+ # However, it is unlikely debug traffic is used in that context in a way this will cause problems.
+ urllib3.connection.HTTPConnection.debuglevel = 1
+ logger.setLevel(logging.DEBUG)
+ # this is expected if we are using --no-check-certificate
+ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+
+ def close(self):
+ self._clear_instances()
+ # Remove the logging handler that contains a reference to our logger
+ # See: https://github.com/yt-dlp/yt-dlp/issues/8922
+ logging.getLogger('urllib3').removeHandler(self.__logging_handler)
+
+ def _check_extensions(self, extensions):
+ super()._check_extensions(extensions)
+ extensions.pop('cookiejar', None)
+ extensions.pop('timeout', None)
+
+ def _create_instance(self, cookiejar):
+ session = RequestsSession()
+ http_adapter = RequestsHTTPAdapter(
+ ssl_context=self._make_sslcontext(),
+ source_address=self.source_address,
+ max_retries=urllib3.util.retry.Retry(False),
+ )
+ session.adapters.clear()
+ session.headers = requests.models.CaseInsensitiveDict({'Connection': 'keep-alive'})
+ session.mount('https://', http_adapter)
+ session.mount('http://', http_adapter)
+ session.cookies = cookiejar
+ session.trust_env = False # no need, we already load proxies from env
+ return session
+
+ def _send(self, request):
+
+ headers = self._merge_headers(request.headers)
+ add_accept_encoding_header(headers, SUPPORTED_ENCODINGS)
+
+ max_redirects_exceeded = False
+
+ session = self._get_instance(
+ cookiejar=request.extensions.get('cookiejar') or self.cookiejar)
+
+ try:
+ requests_res = session.request(
+ method=request.method,
+ url=request.url,
+ data=request.data,
+ headers=headers,
+ timeout=float(request.extensions.get('timeout') or self.timeout),
+ proxies=request.proxies or self.proxies,
+ allow_redirects=True,
+ stream=True
+ )
+
+ except requests.exceptions.TooManyRedirects as e:
+ max_redirects_exceeded = True
+ requests_res = e.response
+
+ except requests.exceptions.SSLError as e:
+ if 'CERTIFICATE_VERIFY_FAILED' in str(e):
+ raise CertificateVerifyError(cause=e) from e
+ raise SSLError(cause=e) from e
+
+ except requests.exceptions.ProxyError as e:
+ raise ProxyError(cause=e) from e
+
+ except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as e:
+ raise TransportError(cause=e) from e
+
+ except urllib3.exceptions.HTTPError as e:
+ # Catch any urllib3 exceptions that may leak through
+ raise TransportError(cause=e) from e
+
+ except requests.exceptions.RequestException as e:
+ # Miscellaneous Requests exceptions. May not necessary be network related e.g. InvalidURL
+ raise RequestError(cause=e) from e
+
+ res = RequestsResponseAdapter(requests_res)
+
+ if not 200 <= res.status < 300:
+ raise HTTPError(res, redirect_loop=max_redirects_exceeded)
+
+ return res
+
+
+@register_preference(RequestsRH)
+def requests_preference(rh, request):
+ return 100
+
+
+# Use our socks proxy implementation with requests to avoid an extra dependency.
+class SocksHTTPConnection(urllib3.connection.HTTPConnection):
+ def __init__(self, _socks_options, *args, **kwargs): # must use _socks_options to pass PoolKey checks
+ self._proxy_args = _socks_options
+ super().__init__(*args, **kwargs)
+
+ def _new_conn(self):
+ try:
+ return create_connection(
+ address=(self._proxy_args['addr'], self._proxy_args['port']),
+ timeout=self.timeout,
+ source_address=self.source_address,
+ _create_socket_func=functools.partial(
+ create_socks_proxy_socket, (self.host, self.port), self._proxy_args))
+ except (socket.timeout, TimeoutError) as e:
+ raise urllib3.exceptions.ConnectTimeoutError(
+ self, f'Connection to {self.host} timed out. (connect timeout={self.timeout})') from e
+ except SocksProxyError as e:
+ raise urllib3.exceptions.ProxyError(str(e), e) from e
+ except OSError as e:
+ raise urllib3.exceptions.NewConnectionError(
+ self, f'Failed to establish a new connection: {e}') from e
+
+
+class SocksHTTPSConnection(SocksHTTPConnection, urllib3.connection.HTTPSConnection):
+ pass
+
+
+class SocksHTTPConnectionPool(urllib3.HTTPConnectionPool):
+ ConnectionCls = SocksHTTPConnection
+
+
+class SocksHTTPSConnectionPool(urllib3.HTTPSConnectionPool):
+ ConnectionCls = SocksHTTPSConnection
+
+
+class SocksProxyManager(urllib3.PoolManager):
+
+ def __init__(self, socks_proxy, username=None, password=None, num_pools=10, headers=None, **connection_pool_kw):
+ connection_pool_kw['_socks_options'] = make_socks_proxy_opts(socks_proxy)
+ super().__init__(num_pools, headers, **connection_pool_kw)
+ self.pool_classes_by_scheme = {
+ 'http': SocksHTTPConnectionPool,
+ 'https': SocksHTTPSConnectionPool
+ }
+
+
+requests.adapters.SOCKSProxyManager = SocksProxyManager
diff --git a/yt_dlp/networking/_urllib.py b/yt_dlp/networking/_urllib.py
new file mode 100644
index 0000000..cb4dae3
--- /dev/null
+++ b/yt_dlp/networking/_urllib.py
@@ -0,0 +1,422 @@
+from __future__ import annotations
+
+import functools
+import http.client
+import io
+import ssl
+import urllib.error
+import urllib.parse
+import urllib.request
+import urllib.response
+import zlib
+from urllib.request import (
+ DataHandler,
+ FileHandler,
+ FTPHandler,
+ HTTPCookieProcessor,
+ HTTPDefaultErrorHandler,
+ HTTPErrorProcessor,
+ UnknownHandler,
+)
+
+from ._helper import (
+ InstanceStoreMixin,
+ add_accept_encoding_header,
+ create_connection,
+ create_socks_proxy_socket,
+ get_redirect_method,
+ make_socks_proxy_opts,
+ select_proxy,
+)
+from .common import Features, RequestHandler, Response, register_rh
+from .exceptions import (
+ CertificateVerifyError,
+ HTTPError,
+ IncompleteRead,
+ ProxyError,
+ RequestError,
+ SSLError,
+ TransportError,
+)
+from ..dependencies import brotli
+from ..socks import ProxyError as SocksProxyError
+from ..utils import update_url_query
+from ..utils.networking import normalize_url
+
+SUPPORTED_ENCODINGS = ['gzip', 'deflate']
+CONTENT_DECODE_ERRORS = [zlib.error, OSError]
+
+if brotli:
+ SUPPORTED_ENCODINGS.append('br')
+ CONTENT_DECODE_ERRORS.append(brotli.error)
+
+
+def _create_http_connection(http_class, source_address, *args, **kwargs):
+ hc = http_class(*args, **kwargs)
+
+ if hasattr(hc, '_create_connection'):
+ hc._create_connection = create_connection
+
+ if source_address is not None:
+ hc.source_address = (source_address, 0)
+
+ return hc
+
+
+class HTTPHandler(urllib.request.AbstractHTTPHandler):
+ """Handler for HTTP requests and responses.
+
+ This class, when installed with an OpenerDirector, automatically adds
+ the standard headers to every HTTP request and handles gzipped, deflated and
+ brotli responses from web servers.
+
+ Part of this code was copied from:
+
+ http://techknack.net/python-urllib2-handlers/
+
+ Andrew Rowls, the author of that code, agreed to release it to the
+ public domain.
+ """
+
+ def __init__(self, context=None, source_address=None, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self._source_address = source_address
+ self._context = context
+
+ @staticmethod
+ def _make_conn_class(base, req):
+ conn_class = base
+ socks_proxy = req.headers.pop('Ytdl-socks-proxy', None)
+ if socks_proxy:
+ conn_class = make_socks_conn_class(conn_class, socks_proxy)
+ return conn_class
+
+ def http_open(self, req):
+ conn_class = self._make_conn_class(http.client.HTTPConnection, req)
+ return self.do_open(functools.partial(
+ _create_http_connection, conn_class, self._source_address), req)
+
+ def https_open(self, req):
+ conn_class = self._make_conn_class(http.client.HTTPSConnection, req)
+ return self.do_open(
+ functools.partial(
+ _create_http_connection, conn_class, self._source_address),
+ req, context=self._context)
+
+ @staticmethod
+ def deflate(data):
+ if not data:
+ return data
+ try:
+ return zlib.decompress(data, -zlib.MAX_WBITS)
+ except zlib.error:
+ return zlib.decompress(data)
+
+ @staticmethod
+ def brotli(data):
+ if not data:
+ return data
+ return brotli.decompress(data)
+
+ @staticmethod
+ def gz(data):
+ # There may be junk added the end of the file
+ # We ignore it by only ever decoding a single gzip payload
+ if not data:
+ return data
+ return zlib.decompress(data, wbits=zlib.MAX_WBITS | 16)
+
+ def http_request(self, req):
+ # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
+ # always respected by websites, some tend to give out URLs with non percent-encoded
+ # non-ASCII characters (see telemb.py, ard.py [#3412])
+ # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
+ # To work around aforementioned issue we will replace request's original URL with
+ # percent-encoded one
+ # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
+ # the code of this workaround has been moved here from YoutubeDL.urlopen()
+ url = req.get_full_url()
+ url_escaped = normalize_url(url)
+
+ # Substitute URL if any change after escaping
+ if url != url_escaped:
+ req = update_Request(req, url=url_escaped)
+
+ return super().do_request_(req)
+
+ def http_response(self, req, resp):
+ old_resp = resp
+
+ # Content-Encoding header lists the encodings in order that they were applied [1].
+ # To decompress, we simply do the reverse.
+ # [1]: https://datatracker.ietf.org/doc/html/rfc9110#name-content-encoding
+ decoded_response = None
+ for encoding in (e.strip() for e in reversed(resp.headers.get('Content-encoding', '').split(','))):
+ if encoding == 'gzip':
+ decoded_response = self.gz(decoded_response or resp.read())
+ elif encoding == 'deflate':
+ decoded_response = self.deflate(decoded_response or resp.read())
+ elif encoding == 'br' and brotli:
+ decoded_response = self.brotli(decoded_response or resp.read())
+
+ if decoded_response is not None:
+ resp = urllib.request.addinfourl(io.BytesIO(decoded_response), old_resp.headers, old_resp.url, old_resp.code)
+ resp.msg = old_resp.msg
+ # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
+ # https://github.com/ytdl-org/youtube-dl/issues/6457).
+ if 300 <= resp.code < 400:
+ location = resp.headers.get('Location')
+ if location:
+ # As of RFC 2616 default charset is iso-8859-1 that is respected by Python 3
+ location = location.encode('iso-8859-1').decode()
+ location_escaped = normalize_url(location)
+ if location != location_escaped:
+ del resp.headers['Location']
+ resp.headers['Location'] = location_escaped
+ return resp
+
+ https_request = http_request
+ https_response = http_response
+
+
+def make_socks_conn_class(base_class, socks_proxy):
+ assert issubclass(base_class, (
+ http.client.HTTPConnection, http.client.HTTPSConnection))
+
+ proxy_args = make_socks_proxy_opts(socks_proxy)
+
+ class SocksConnection(base_class):
+ _create_connection = create_connection
+
+ def connect(self):
+ self.sock = create_connection(
+ (proxy_args['addr'], proxy_args['port']),
+ timeout=self.timeout,
+ source_address=self.source_address,
+ _create_socket_func=functools.partial(
+ create_socks_proxy_socket, (self.host, self.port), proxy_args))
+ if isinstance(self, http.client.HTTPSConnection):
+ self.sock = self._context.wrap_socket(self.sock, server_hostname=self.host)
+
+ return SocksConnection
+
+
+class RedirectHandler(urllib.request.HTTPRedirectHandler):
+ """YoutubeDL redirect handler
+
+ The code is based on HTTPRedirectHandler implementation from CPython [1].
+
+ This redirect handler fixes and improves the logic to better align with RFC7261
+ and what browsers tend to do [2][3]
+
+ 1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
+ 2. https://datatracker.ietf.org/doc/html/rfc7231
+ 3. https://github.com/python/cpython/issues/91306
+ """
+
+ http_error_301 = http_error_303 = http_error_307 = http_error_308 = urllib.request.HTTPRedirectHandler.http_error_302
+
+ def redirect_request(self, req, fp, code, msg, headers, newurl):
+ if code not in (301, 302, 303, 307, 308):
+ raise urllib.error.HTTPError(req.full_url, code, msg, headers, fp)
+
+ new_data = req.data
+
+ # Technically the Cookie header should be in unredirected_hdrs,
+ # however in practice some may set it in normal headers anyway.
+ # We will remove it here to prevent any leaks.
+ remove_headers = ['Cookie']
+
+ new_method = get_redirect_method(req.get_method(), code)
+ # only remove payload if method changed (e.g. POST to GET)
+ if new_method != req.get_method():
+ new_data = None
+ remove_headers.extend(['Content-Length', 'Content-Type'])
+
+ new_headers = {k: v for k, v in req.headers.items() if k.title() not in remove_headers}
+
+ return urllib.request.Request(
+ newurl, headers=new_headers, origin_req_host=req.origin_req_host,
+ unverifiable=True, method=new_method, data=new_data)
+
+
+class ProxyHandler(urllib.request.BaseHandler):
+ handler_order = 100
+
+ def __init__(self, proxies=None):
+ self.proxies = proxies
+ # Set default handlers
+ for type in ('http', 'https', 'ftp'):
+ setattr(self, '%s_open' % type, lambda r, meth=self.proxy_open: meth(r))
+
+ def proxy_open(self, req):
+ proxy = select_proxy(req.get_full_url(), self.proxies)
+ if proxy is None:
+ return
+ if urllib.parse.urlparse(proxy).scheme.lower() in ('socks4', 'socks4a', 'socks5', 'socks5h'):
+ req.add_header('Ytdl-socks-proxy', proxy)
+ # yt-dlp's http/https handlers do wrapping the socket with socks
+ return None
+ return urllib.request.ProxyHandler.proxy_open(
+ self, req, proxy, None)
+
+
+class PUTRequest(urllib.request.Request):
+ def get_method(self):
+ return 'PUT'
+
+
+class HEADRequest(urllib.request.Request):
+ def get_method(self):
+ return 'HEAD'
+
+
+def update_Request(req, url=None, data=None, headers=None, query=None):
+ req_headers = req.headers.copy()
+ req_headers.update(headers or {})
+ req_data = data if data is not None else req.data
+ req_url = update_url_query(url or req.get_full_url(), query)
+ req_get_method = req.get_method()
+ if req_get_method == 'HEAD':
+ req_type = HEADRequest
+ elif req_get_method == 'PUT':
+ req_type = PUTRequest
+ else:
+ req_type = urllib.request.Request
+ new_req = req_type(
+ req_url, data=req_data, headers=req_headers,
+ origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
+ if hasattr(req, 'timeout'):
+ new_req.timeout = req.timeout
+ return new_req
+
+
+class UrllibResponseAdapter(Response):
+ """
+ HTTP Response adapter class for urllib addinfourl and http.client.HTTPResponse
+ """
+
+ def __init__(self, res: http.client.HTTPResponse | urllib.response.addinfourl):
+ # addinfourl: In Python 3.9+, .status was introduced and .getcode() was deprecated [1]
+ # HTTPResponse: .getcode() was deprecated, .status always existed [2]
+ # 1. https://docs.python.org/3/library/urllib.request.html#urllib.response.addinfourl.getcode
+ # 2. https://docs.python.org/3.10/library/http.client.html#http.client.HTTPResponse.status
+ super().__init__(
+ fp=res, headers=res.headers, url=res.url,
+ status=getattr(res, 'status', None) or res.getcode(), reason=getattr(res, 'reason', None))
+
+ def read(self, amt=None):
+ try:
+ return self.fp.read(amt)
+ except Exception as e:
+ handle_response_read_exceptions(e)
+ raise e
+
+
+def handle_sslerror(e: ssl.SSLError):
+ if not isinstance(e, ssl.SSLError):
+ return
+ if isinstance(e, ssl.SSLCertVerificationError):
+ raise CertificateVerifyError(cause=e) from e
+ raise SSLError(cause=e) from e
+
+
+def handle_response_read_exceptions(e):
+ if isinstance(e, http.client.IncompleteRead):
+ raise IncompleteRead(partial=len(e.partial), cause=e, expected=e.expected) from e
+ elif isinstance(e, ssl.SSLError):
+ handle_sslerror(e)
+ elif isinstance(e, (OSError, EOFError, http.client.HTTPException, *CONTENT_DECODE_ERRORS)):
+ # OSErrors raised here should mostly be network related
+ raise TransportError(cause=e) from e
+
+
+@register_rh
+class UrllibRH(RequestHandler, InstanceStoreMixin):
+ _SUPPORTED_URL_SCHEMES = ('http', 'https', 'data', 'ftp')
+ _SUPPORTED_PROXY_SCHEMES = ('http', 'socks4', 'socks4a', 'socks5', 'socks5h')
+ _SUPPORTED_FEATURES = (Features.NO_PROXY, Features.ALL_PROXY)
+ RH_NAME = 'urllib'
+
+ def __init__(self, *, enable_file_urls: bool = False, **kwargs):
+ super().__init__(**kwargs)
+ self.enable_file_urls = enable_file_urls
+ if self.enable_file_urls:
+ self._SUPPORTED_URL_SCHEMES = (*self._SUPPORTED_URL_SCHEMES, 'file')
+
+ def _check_extensions(self, extensions):
+ super()._check_extensions(extensions)
+ extensions.pop('cookiejar', None)
+ extensions.pop('timeout', None)
+
+ def _create_instance(self, proxies, cookiejar):
+ opener = urllib.request.OpenerDirector()
+ handlers = [
+ ProxyHandler(proxies),
+ HTTPHandler(
+ debuglevel=int(bool(self.verbose)),
+ context=self._make_sslcontext(),
+ source_address=self.source_address),
+ HTTPCookieProcessor(cookiejar),
+ DataHandler(),
+ UnknownHandler(),
+ HTTPDefaultErrorHandler(),
+ FTPHandler(),
+ HTTPErrorProcessor(),
+ RedirectHandler(),
+ ]
+
+ if self.enable_file_urls:
+ handlers.append(FileHandler())
+
+ for handler in handlers:
+ opener.add_handler(handler)
+
+ # Delete the default user-agent header, which would otherwise apply in
+ # cases where our custom HTTP handler doesn't come into play
+ # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
+ opener.addheaders = []
+ return opener
+
+ def _send(self, request):
+ headers = self._merge_headers(request.headers)
+ add_accept_encoding_header(headers, SUPPORTED_ENCODINGS)
+ urllib_req = urllib.request.Request(
+ url=request.url,
+ data=request.data,
+ headers=dict(headers),
+ method=request.method
+ )
+
+ opener = self._get_instance(
+ proxies=request.proxies or self.proxies,
+ cookiejar=request.extensions.get('cookiejar') or self.cookiejar
+ )
+ try:
+ res = opener.open(urllib_req, timeout=float(request.extensions.get('timeout') or self.timeout))
+ except urllib.error.HTTPError as e:
+ if isinstance(e.fp, (http.client.HTTPResponse, urllib.response.addinfourl)):
+ # Prevent file object from being closed when urllib.error.HTTPError is destroyed.
+ e._closer.close_called = True
+ raise HTTPError(UrllibResponseAdapter(e.fp), redirect_loop='redirect error' in str(e)) from e
+ raise # unexpected
+ except urllib.error.URLError as e:
+ cause = e.reason # NOTE: cause may be a string
+
+ # proxy errors
+ if 'tunnel connection failed' in str(cause).lower() or isinstance(cause, SocksProxyError):
+ raise ProxyError(cause=e) from e
+
+ handle_response_read_exceptions(cause)
+ raise TransportError(cause=e) from e
+ except (http.client.InvalidURL, ValueError) as e:
+ # Validation errors
+ # http.client.HTTPConnection raises ValueError in some validation cases
+ # such as if request method contains illegal control characters [1]
+ # 1. https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1256
+ raise RequestError(cause=e) from e
+ except Exception as e:
+ handle_response_read_exceptions(e)
+ raise # unexpected
+
+ return UrllibResponseAdapter(res)
diff --git a/yt_dlp/networking/_websockets.py b/yt_dlp/networking/_websockets.py
new file mode 100644
index 0000000..1597932
--- /dev/null
+++ b/yt_dlp/networking/_websockets.py
@@ -0,0 +1,173 @@
+from __future__ import annotations
+
+import io
+import logging
+import ssl
+import sys
+
+from ._helper import (
+ create_connection,
+ create_socks_proxy_socket,
+ make_socks_proxy_opts,
+ select_proxy,
+)
+from .common import Features, Response, register_rh
+from .exceptions import (
+ CertificateVerifyError,
+ HTTPError,
+ ProxyError,
+ RequestError,
+ SSLError,
+ TransportError,
+)
+from .websocket import WebSocketRequestHandler, WebSocketResponse
+from ..compat import functools
+from ..dependencies import websockets
+from ..socks import ProxyError as SocksProxyError
+from ..utils import int_or_none
+
+if not websockets:
+ raise ImportError('websockets is not installed')
+
+import websockets.version
+
+websockets_version = tuple(map(int_or_none, websockets.version.version.split('.')))
+if websockets_version < (12, 0):
+ raise ImportError('Only websockets>=12.0 is supported')
+
+import websockets.sync.client
+from websockets.uri import parse_uri
+
+
+class WebsocketsResponseAdapter(WebSocketResponse):
+
+ def __init__(self, wsw: websockets.sync.client.ClientConnection, url):
+ super().__init__(
+ fp=io.BytesIO(wsw.response.body or b''),
+ url=url,
+ headers=wsw.response.headers,
+ status=wsw.response.status_code,
+ reason=wsw.response.reason_phrase,
+ )
+ self.wsw = wsw
+
+ def close(self):
+ self.wsw.close()
+ super().close()
+
+ def send(self, message):
+ # https://websockets.readthedocs.io/en/stable/reference/sync/client.html#websockets.sync.client.ClientConnection.send
+ try:
+ return self.wsw.send(message)
+ except (websockets.exceptions.WebSocketException, RuntimeError, TimeoutError) as e:
+ raise TransportError(cause=e) from e
+ except SocksProxyError as e:
+ raise ProxyError(cause=e) from e
+ except TypeError as e:
+ raise RequestError(cause=e) from e
+
+ def recv(self):
+ # https://websockets.readthedocs.io/en/stable/reference/sync/client.html#websockets.sync.client.ClientConnection.recv
+ try:
+ return self.wsw.recv()
+ except SocksProxyError as e:
+ raise ProxyError(cause=e) from e
+ except (websockets.exceptions.WebSocketException, RuntimeError, TimeoutError) as e:
+ raise TransportError(cause=e) from e
+
+
+@register_rh
+class WebsocketsRH(WebSocketRequestHandler):
+ """
+ Websockets request handler
+ https://websockets.readthedocs.io
+ https://github.com/python-websockets/websockets
+ """
+ _SUPPORTED_URL_SCHEMES = ('wss', 'ws')
+ _SUPPORTED_PROXY_SCHEMES = ('socks4', 'socks4a', 'socks5', 'socks5h')
+ _SUPPORTED_FEATURES = (Features.ALL_PROXY, Features.NO_PROXY)
+ RH_NAME = 'websockets'
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.__logging_handlers = {}
+ for name in ('websockets.client', 'websockets.server'):
+ logger = logging.getLogger(name)
+ handler = logging.StreamHandler(stream=sys.stdout)
+ handler.setFormatter(logging.Formatter(f'{self.RH_NAME}: %(message)s'))
+ self.__logging_handlers[name] = handler
+ logger.addHandler(handler)
+ if self.verbose:
+ logger.setLevel(logging.DEBUG)
+
+ def _check_extensions(self, extensions):
+ super()._check_extensions(extensions)
+ extensions.pop('timeout', None)
+ extensions.pop('cookiejar', None)
+
+ def close(self):
+ # Remove the logging handler that contains a reference to our logger
+ # See: https://github.com/yt-dlp/yt-dlp/issues/8922
+ for name, handler in self.__logging_handlers.items():
+ logging.getLogger(name).removeHandler(handler)
+
+ def _send(self, request):
+ timeout = float(request.extensions.get('timeout') or self.timeout)
+ headers = self._merge_headers(request.headers)
+ if 'cookie' not in headers:
+ cookiejar = request.extensions.get('cookiejar') or self.cookiejar
+ cookie_header = cookiejar.get_cookie_header(request.url)
+ if cookie_header:
+ headers['cookie'] = cookie_header
+
+ wsuri = parse_uri(request.url)
+ create_conn_kwargs = {
+ 'source_address': (self.source_address, 0) if self.source_address else None,
+ 'timeout': timeout
+ }
+ proxy = select_proxy(request.url, request.proxies or self.proxies or {})
+ try:
+ if proxy:
+ socks_proxy_options = make_socks_proxy_opts(proxy)
+ sock = create_connection(
+ address=(socks_proxy_options['addr'], socks_proxy_options['port']),
+ _create_socket_func=functools.partial(
+ create_socks_proxy_socket, (wsuri.host, wsuri.port), socks_proxy_options),
+ **create_conn_kwargs
+ )
+ else:
+ sock = create_connection(
+ address=(wsuri.host, wsuri.port),
+ **create_conn_kwargs
+ )
+ conn = websockets.sync.client.connect(
+ sock=sock,
+ uri=request.url,
+ additional_headers=headers,
+ open_timeout=timeout,
+ user_agent_header=None,
+ ssl_context=self._make_sslcontext() if wsuri.secure else None,
+ close_timeout=0, # not ideal, but prevents yt-dlp hanging
+ )
+ return WebsocketsResponseAdapter(conn, url=request.url)
+
+ # Exceptions as per https://websockets.readthedocs.io/en/stable/reference/sync/client.html
+ except SocksProxyError as e:
+ raise ProxyError(cause=e) from e
+ except websockets.exceptions.InvalidURI as e:
+ raise RequestError(cause=e) from e
+ except ssl.SSLCertVerificationError as e:
+ raise CertificateVerifyError(cause=e) from e
+ except ssl.SSLError as e:
+ raise SSLError(cause=e) from e
+ except websockets.exceptions.InvalidStatus as e:
+ raise HTTPError(
+ Response(
+ fp=io.BytesIO(e.response.body),
+ url=request.url,
+ headers=e.response.headers,
+ status=e.response.status_code,
+ reason=e.response.reason_phrase),
+ ) from e
+ except (OSError, TimeoutError, websockets.exceptions.WebSocketException) as e:
+ raise TransportError(cause=e) from e
diff --git a/yt_dlp/networking/common.py b/yt_dlp/networking/common.py
new file mode 100644
index 0000000..39442ba
--- /dev/null
+++ b/yt_dlp/networking/common.py
@@ -0,0 +1,565 @@
+from __future__ import annotations
+
+import abc
+import copy
+import enum
+import functools
+import io
+import typing
+import urllib.parse
+import urllib.request
+import urllib.response
+from collections.abc import Iterable, Mapping
+from email.message import Message
+from http import HTTPStatus
+
+from ._helper import make_ssl_context, wrap_request_errors
+from .exceptions import (
+ NoSupportingHandlers,
+ RequestError,
+ TransportError,
+ UnsupportedRequest,
+)
+from ..compat.types import NoneType
+from ..cookies import YoutubeDLCookieJar
+from ..utils import (
+ bug_reports_message,
+ classproperty,
+ deprecation_warning,
+ error_to_str,
+ update_url_query,
+)
+from ..utils.networking import HTTPHeaderDict, normalize_url
+
+
+def register_preference(*handlers: type[RequestHandler]):
+ assert all(issubclass(handler, RequestHandler) for handler in handlers)
+
+ def outer(preference: Preference):
+ @functools.wraps(preference)
+ def inner(handler, *args, **kwargs):
+ if not handlers or isinstance(handler, handlers):
+ return preference(handler, *args, **kwargs)
+ return 0
+ _RH_PREFERENCES.add(inner)
+ return inner
+ return outer
+
+
+class RequestDirector:
+ """RequestDirector class
+
+ Helper class that, when given a request, forward it to a RequestHandler that supports it.
+
+ Preference functions in the form of func(handler, request) -> int
+ can be registered into the `preferences` set. These are used to sort handlers
+ in order of preference.
+
+ @param logger: Logger instance.
+ @param verbose: Print debug request information to stdout.
+ """
+
+ def __init__(self, logger, verbose=False):
+ self.handlers: dict[str, RequestHandler] = {}
+ self.preferences: set[Preference] = set()
+ self.logger = logger # TODO(Grub4k): default logger
+ self.verbose = verbose
+
+ def close(self):
+ for handler in self.handlers.values():
+ handler.close()
+ self.handlers.clear()
+
+ def add_handler(self, handler: RequestHandler):
+ """Add a handler. If a handler of the same RH_KEY exists, it will overwrite it"""
+ assert isinstance(handler, RequestHandler), 'handler must be a RequestHandler'
+ self.handlers[handler.RH_KEY] = handler
+
+ def _get_handlers(self, request: Request) -> list[RequestHandler]:
+ """Sorts handlers by preference, given a request"""
+ preferences = {
+ rh: sum(pref(rh, request) for pref in self.preferences)
+ for rh in self.handlers.values()
+ }
+ self._print_verbose('Handler preferences for this request: %s' % ', '.join(
+ f'{rh.RH_NAME}={pref}' for rh, pref in preferences.items()))
+ return sorted(self.handlers.values(), key=preferences.get, reverse=True)
+
+ def _print_verbose(self, msg):
+ if self.verbose:
+ self.logger.stdout(f'director: {msg}')
+
+ def send(self, request: Request) -> Response:
+ """
+ Passes a request onto a suitable RequestHandler
+ """
+ if not self.handlers:
+ raise RequestError('No request handlers configured')
+
+ assert isinstance(request, Request)
+
+ unexpected_errors = []
+ unsupported_errors = []
+ for handler in self._get_handlers(request):
+ self._print_verbose(f'Checking if "{handler.RH_NAME}" supports this request.')
+ try:
+ handler.validate(request)
+ except UnsupportedRequest as e:
+ self._print_verbose(
+ f'"{handler.RH_NAME}" cannot handle this request (reason: {error_to_str(e)})')
+ unsupported_errors.append(e)
+ continue
+
+ self._print_verbose(f'Sending request via "{handler.RH_NAME}"')
+ try:
+ response = handler.send(request)
+ except RequestError:
+ raise
+ except Exception as e:
+ self.logger.error(
+ f'[{handler.RH_NAME}] Unexpected error: {error_to_str(e)}{bug_reports_message()}',
+ is_error=False)
+ unexpected_errors.append(e)
+ continue
+
+ assert isinstance(response, Response)
+ return response
+
+ raise NoSupportingHandlers(unsupported_errors, unexpected_errors)
+
+
+_REQUEST_HANDLERS = {}
+
+
+def register_rh(handler):
+ """Register a RequestHandler class"""
+ assert issubclass(handler, RequestHandler), f'{handler} must be a subclass of RequestHandler'
+ assert handler.RH_KEY not in _REQUEST_HANDLERS, f'RequestHandler {handler.RH_KEY} already registered'
+ _REQUEST_HANDLERS[handler.RH_KEY] = handler
+ return handler
+
+
+class Features(enum.Enum):
+ ALL_PROXY = enum.auto()
+ NO_PROXY = enum.auto()
+
+
+class RequestHandler(abc.ABC):
+
+ """Request Handler class
+
+ Request handlers are class that, given a Request,
+ process the request from start to finish and return a Response.
+
+ Concrete subclasses need to redefine the _send(request) method,
+ which handles the underlying request logic and returns a Response.
+
+ RH_NAME class variable may contain a display name for the RequestHandler.
+ By default, this is generated from the class name.
+
+ The concrete request handler MUST have "RH" as the suffix in the class name.
+
+ All exceptions raised by a RequestHandler should be an instance of RequestError.
+ Any other exception raised will be treated as a handler issue.
+
+ If a Request is not supported by the handler, an UnsupportedRequest
+ should be raised with a reason.
+
+ By default, some checks are done on the request in _validate() based on the following class variables:
+ - `_SUPPORTED_URL_SCHEMES`: a tuple of supported url schemes.
+ Any Request with an url scheme not in this list will raise an UnsupportedRequest.
+
+ - `_SUPPORTED_PROXY_SCHEMES`: a tuple of support proxy url schemes. Any Request that contains
+ a proxy url with an url scheme not in this list will raise an UnsupportedRequest.
+
+ - `_SUPPORTED_FEATURES`: a tuple of supported features, as defined in Features enum.
+
+ The above may be set to None to disable the checks.
+
+ Parameters:
+ @param logger: logger instance
+ @param headers: HTTP Headers to include when sending requests.
+ @param cookiejar: Cookiejar to use for requests.
+ @param timeout: Socket timeout to use when sending requests.
+ @param proxies: Proxies to use for sending requests.
+ @param source_address: Client-side IP address to bind to for requests.
+ @param verbose: Print debug request and traffic information to stdout.
+ @param prefer_system_certs: Whether to prefer system certificates over other means (e.g. certifi).
+ @param client_cert: SSL client certificate configuration.
+ dict with {client_certificate, client_certificate_key, client_certificate_password}
+ @param verify: Verify SSL certificates
+ @param legacy_ssl_support: Enable legacy SSL options such as legacy server connect and older cipher support.
+
+ Some configuration options may be available for individual Requests too. In this case,
+ either the Request configuration option takes precedence or they are merged.
+
+ Requests may have additional optional parameters defined as extensions.
+ RequestHandler subclasses may choose to support custom extensions.
+
+ If an extension is supported, subclasses should extend _check_extensions(extensions)
+ to pop and validate the extension.
+ - Extensions left in `extensions` are treated as unsupported and UnsupportedRequest will be raised.
+
+ The following extensions are defined for RequestHandler:
+ - `cookiejar`: Cookiejar to use for this request.
+ - `timeout`: socket timeout to use for this request.
+ To enable these, add extensions.pop('<extension>', None) to _check_extensions
+
+ Apart from the url protocol, proxies dict may contain the following keys:
+ - `all`: proxy to use for all protocols. Used as a fallback if no proxy is set for a specific protocol.
+ - `no`: comma seperated list of hostnames (optionally with port) to not use a proxy for.
+ Note: a RequestHandler may not support these, as defined in `_SUPPORTED_FEATURES`.
+
+ """
+
+ _SUPPORTED_URL_SCHEMES = ()
+ _SUPPORTED_PROXY_SCHEMES = ()
+ _SUPPORTED_FEATURES = ()
+
+ def __init__(
+ self, *,
+ logger, # TODO(Grub4k): default logger
+ headers: HTTPHeaderDict = None,
+ cookiejar: YoutubeDLCookieJar = None,
+ timeout: float | int | None = None,
+ proxies: dict = None,
+ source_address: str = None,
+ verbose: bool = False,
+ prefer_system_certs: bool = False,
+ client_cert: dict[str, str | None] = None,
+ verify: bool = True,
+ legacy_ssl_support: bool = False,
+ **_,
+ ):
+
+ self._logger = logger
+ self.headers = headers or {}
+ self.cookiejar = cookiejar if cookiejar is not None else YoutubeDLCookieJar()
+ self.timeout = float(timeout or 20)
+ self.proxies = proxies or {}
+ self.source_address = source_address
+ self.verbose = verbose
+ self.prefer_system_certs = prefer_system_certs
+ self._client_cert = client_cert or {}
+ self.verify = verify
+ self.legacy_ssl_support = legacy_ssl_support
+ super().__init__()
+
+ def _make_sslcontext(self):
+ return make_ssl_context(
+ verify=self.verify,
+ legacy_support=self.legacy_ssl_support,
+ use_certifi=not self.prefer_system_certs,
+ **self._client_cert,
+ )
+
+ def _merge_headers(self, request_headers):
+ return HTTPHeaderDict(self.headers, request_headers)
+
+ def _check_url_scheme(self, request: Request):
+ scheme = urllib.parse.urlparse(request.url).scheme.lower()
+ if self._SUPPORTED_URL_SCHEMES is not None and scheme not in self._SUPPORTED_URL_SCHEMES:
+ raise UnsupportedRequest(f'Unsupported url scheme: "{scheme}"')
+ return scheme # for further processing
+
+ def _check_proxies(self, proxies):
+ for proxy_key, proxy_url in proxies.items():
+ if proxy_url is None:
+ continue
+ if proxy_key == 'no':
+ if self._SUPPORTED_FEATURES is not None and Features.NO_PROXY not in self._SUPPORTED_FEATURES:
+ raise UnsupportedRequest('"no" proxy is not supported')
+ continue
+ if (
+ proxy_key == 'all'
+ and self._SUPPORTED_FEATURES is not None
+ and Features.ALL_PROXY not in self._SUPPORTED_FEATURES
+ ):
+ raise UnsupportedRequest('"all" proxy is not supported')
+
+ # Unlikely this handler will use this proxy, so ignore.
+ # This is to allow a case where a proxy may be set for a protocol
+ # for one handler in which such protocol (and proxy) is not supported by another handler.
+ if self._SUPPORTED_URL_SCHEMES is not None and proxy_key not in (*self._SUPPORTED_URL_SCHEMES, 'all'):
+ continue
+
+ if self._SUPPORTED_PROXY_SCHEMES is None:
+ # Skip proxy scheme checks
+ continue
+
+ try:
+ if urllib.request._parse_proxy(proxy_url)[0] is None:
+ # Scheme-less proxies are not supported
+ raise UnsupportedRequest(f'Proxy "{proxy_url}" missing scheme')
+ except ValueError as e:
+ # parse_proxy may raise on some invalid proxy urls such as "/a/b/c"
+ raise UnsupportedRequest(f'Invalid proxy url "{proxy_url}": {e}')
+
+ scheme = urllib.parse.urlparse(proxy_url).scheme.lower()
+ if scheme not in self._SUPPORTED_PROXY_SCHEMES:
+ raise UnsupportedRequest(f'Unsupported proxy type: "{scheme}"')
+
+ def _check_extensions(self, extensions):
+ """Check extensions for unsupported extensions. Subclasses should extend this."""
+ assert isinstance(extensions.get('cookiejar'), (YoutubeDLCookieJar, NoneType))
+ assert isinstance(extensions.get('timeout'), (float, int, NoneType))
+
+ def _validate(self, request):
+ self._check_url_scheme(request)
+ self._check_proxies(request.proxies or self.proxies)
+ extensions = request.extensions.copy()
+ self._check_extensions(extensions)
+ if extensions:
+ # TODO: add support for optional extensions
+ raise UnsupportedRequest(f'Unsupported extensions: {", ".join(extensions.keys())}')
+
+ @wrap_request_errors
+ def validate(self, request: Request):
+ if not isinstance(request, Request):
+ raise TypeError('Expected an instance of Request')
+ self._validate(request)
+
+ @wrap_request_errors
+ def send(self, request: Request) -> Response:
+ if not isinstance(request, Request):
+ raise TypeError('Expected an instance of Request')
+ return self._send(request)
+
+ @abc.abstractmethod
+ def _send(self, request: Request):
+ """Handle a request from start to finish. Redefine in subclasses."""
+ pass
+
+ def close(self):
+ pass
+
+ @classproperty
+ def RH_NAME(cls):
+ return cls.__name__[:-2]
+
+ @classproperty
+ def RH_KEY(cls):
+ assert cls.__name__.endswith('RH'), 'RequestHandler class names must end with "RH"'
+ return cls.__name__[:-2]
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, *args):
+ self.close()
+
+
+class Request:
+ """
+ Represents a request to be made.
+ Partially backwards-compatible with urllib.request.Request.
+
+ @param url: url to send. Will be sanitized.
+ @param data: payload data to send. Must be bytes, iterable of bytes, a file-like object or None
+ @param headers: headers to send.
+ @param proxies: proxy dict mapping of proto:proxy to use for the request and any redirects.
+ @param query: URL query parameters to update the url with.
+ @param method: HTTP method to use. If no method specified, will use POST if payload data is present else GET
+ @param extensions: Dictionary of Request extensions to add, as supported by handlers.
+ """
+
+ def __init__(
+ self,
+ url: str,
+ data: RequestData = None,
+ headers: typing.Mapping = None,
+ proxies: dict = None,
+ query: dict = None,
+ method: str = None,
+ extensions: dict = None
+ ):
+
+ self._headers = HTTPHeaderDict()
+ self._data = None
+
+ if query:
+ url = update_url_query(url, query)
+
+ self.url = url
+ self.method = method
+ if headers:
+ self.headers = headers
+ self.data = data # note: must be done after setting headers
+ self.proxies = proxies or {}
+ self.extensions = extensions or {}
+
+ @property
+ def url(self):
+ return self._url
+
+ @url.setter
+ def url(self, url):
+ if not isinstance(url, str):
+ raise TypeError('url must be a string')
+ elif url.startswith('//'):
+ url = 'http:' + url
+ self._url = normalize_url(url)
+
+ @property
+ def method(self):
+ return self._method or ('POST' if self.data is not None else 'GET')
+
+ @method.setter
+ def method(self, method):
+ if method is None:
+ self._method = None
+ elif isinstance(method, str):
+ self._method = method.upper()
+ else:
+ raise TypeError('method must be a string')
+
+ @property
+ def data(self):
+ return self._data
+
+ @data.setter
+ def data(self, data: RequestData):
+ # Try catch some common mistakes
+ if data is not None and (
+ not isinstance(data, (bytes, io.IOBase, Iterable)) or isinstance(data, (str, Mapping))
+ ):
+ raise TypeError('data must be bytes, iterable of bytes, or a file-like object')
+
+ if data == self._data and self._data is None:
+ self.headers.pop('Content-Length', None)
+
+ # https://docs.python.org/3/library/urllib.request.html#urllib.request.Request.data
+ if data != self._data:
+ if self._data is not None:
+ self.headers.pop('Content-Length', None)
+ self._data = data
+
+ if self._data is None:
+ self.headers.pop('Content-Type', None)
+
+ if 'Content-Type' not in self.headers and self._data is not None:
+ self.headers['Content-Type'] = 'application/x-www-form-urlencoded'
+
+ @property
+ def headers(self) -> HTTPHeaderDict:
+ return self._headers
+
+ @headers.setter
+ def headers(self, new_headers: Mapping):
+ """Replaces headers of the request. If not a HTTPHeaderDict, it will be converted to one."""
+ if isinstance(new_headers, HTTPHeaderDict):
+ self._headers = new_headers
+ elif isinstance(new_headers, Mapping):
+ self._headers = HTTPHeaderDict(new_headers)
+ else:
+ raise TypeError('headers must be a mapping')
+
+ def update(self, url=None, data=None, headers=None, query=None):
+ self.data = data if data is not None else self.data
+ self.headers.update(headers or {})
+ self.url = update_url_query(url or self.url, query or {})
+
+ def copy(self):
+ return self.__class__(
+ url=self.url,
+ headers=copy.deepcopy(self.headers),
+ proxies=copy.deepcopy(self.proxies),
+ data=self._data,
+ extensions=copy.copy(self.extensions),
+ method=self._method,
+ )
+
+
+HEADRequest = functools.partial(Request, method='HEAD')
+PUTRequest = functools.partial(Request, method='PUT')
+
+
+class Response(io.IOBase):
+ """
+ Base class for HTTP response adapters.
+
+ By default, it provides a basic wrapper for a file-like response object.
+
+ Interface partially backwards-compatible with addinfourl and http.client.HTTPResponse.
+
+ @param fp: Original, file-like, response.
+ @param url: URL that this is a response of.
+ @param headers: response headers.
+ @param status: Response HTTP status code. Default is 200 OK.
+ @param reason: HTTP status reason. Will use built-in reasons based on status code if not provided.
+ """
+
+ def __init__(
+ self,
+ fp: typing.IO,
+ url: str,
+ headers: Mapping[str, str],
+ status: int = 200,
+ reason: str = None):
+
+ self.fp = fp
+ self.headers = Message()
+ for name, value in headers.items():
+ self.headers.add_header(name, value)
+ self.status = status
+ self.url = url
+ try:
+ self.reason = reason or HTTPStatus(status).phrase
+ except ValueError:
+ self.reason = None
+
+ def readable(self):
+ return self.fp.readable()
+
+ def read(self, amt: int = None) -> bytes:
+ # Expected errors raised here should be of type RequestError or subclasses.
+ # Subclasses should redefine this method with more precise error handling.
+ try:
+ return self.fp.read(amt)
+ except Exception as e:
+ raise TransportError(cause=e) from e
+
+ def close(self):
+ self.fp.close()
+ return super().close()
+
+ def get_header(self, name, default=None):
+ """Get header for name.
+ If there are multiple matching headers, return all seperated by comma."""
+ headers = self.headers.get_all(name)
+ if not headers:
+ return default
+ if name.title() == 'Set-Cookie':
+ # Special case, only get the first one
+ # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.3-4.1
+ return headers[0]
+ return ', '.join(headers)
+
+ # The following methods are for compatability reasons and are deprecated
+ @property
+ def code(self):
+ deprecation_warning('Response.code is deprecated, use Response.status', stacklevel=2)
+ return self.status
+
+ def getcode(self):
+ deprecation_warning('Response.getcode() is deprecated, use Response.status', stacklevel=2)
+ return self.status
+
+ def geturl(self):
+ deprecation_warning('Response.geturl() is deprecated, use Response.url', stacklevel=2)
+ return self.url
+
+ def info(self):
+ deprecation_warning('Response.info() is deprecated, use Response.headers', stacklevel=2)
+ return self.headers
+
+ def getheader(self, name, default=None):
+ deprecation_warning('Response.getheader() is deprecated, use Response.get_header', stacklevel=2)
+ return self.get_header(name, default)
+
+
+if typing.TYPE_CHECKING:
+ RequestData = bytes | Iterable[bytes] | typing.IO | None
+ Preference = typing.Callable[[RequestHandler, Request], int]
+
+_RH_PREFERENCES: set[Preference] = set()
diff --git a/yt_dlp/networking/exceptions.py b/yt_dlp/networking/exceptions.py
new file mode 100644
index 0000000..9037f18
--- /dev/null
+++ b/yt_dlp/networking/exceptions.py
@@ -0,0 +1,103 @@
+from __future__ import annotations
+
+import typing
+
+from ..utils import YoutubeDLError
+
+if typing.TYPE_CHECKING:
+ from .common import RequestHandler, Response
+
+
+class RequestError(YoutubeDLError):
+ def __init__(
+ self,
+ msg: str | None = None,
+ cause: Exception | str | None = None,
+ handler: RequestHandler = None
+ ):
+ self.handler = handler
+ self.cause = cause
+ if not msg and cause:
+ msg = str(cause)
+ super().__init__(msg)
+
+
+class UnsupportedRequest(RequestError):
+ """raised when a handler cannot handle a request"""
+ pass
+
+
+class NoSupportingHandlers(RequestError):
+ """raised when no handlers can support a request for various reasons"""
+
+ def __init__(self, unsupported_errors: list[UnsupportedRequest], unexpected_errors: list[Exception]):
+ self.unsupported_errors = unsupported_errors or []
+ self.unexpected_errors = unexpected_errors or []
+
+ # Print a quick summary of the errors
+ err_handler_map = {}
+ for err in unsupported_errors:
+ err_handler_map.setdefault(err.msg, []).append(err.handler.RH_NAME)
+
+ reason_str = ', '.join([f'{msg} ({", ".join(handlers)})' for msg, handlers in err_handler_map.items()])
+ if unexpected_errors:
+ reason_str = ' + '.join(filter(None, [reason_str, f'{len(unexpected_errors)} unexpected error(s)']))
+
+ err_str = 'Unable to handle request'
+ if reason_str:
+ err_str += f': {reason_str}'
+
+ super().__init__(msg=err_str)
+
+
+class TransportError(RequestError):
+ """Network related errors"""
+
+
+class HTTPError(RequestError):
+ def __init__(self, response: Response, redirect_loop=False):
+ self.response = response
+ self.status = response.status
+ self.reason = response.reason
+ self.redirect_loop = redirect_loop
+ msg = f'HTTP Error {response.status}: {response.reason}'
+ if redirect_loop:
+ msg += ' (redirect loop detected)'
+
+ super().__init__(msg=msg)
+
+ def close(self):
+ self.response.close()
+
+ def __repr__(self):
+ return f'<HTTPError {self.status}: {self.reason}>'
+
+
+class IncompleteRead(TransportError):
+ def __init__(self, partial: int, expected: int | None = None, **kwargs):
+ self.partial = partial
+ self.expected = expected
+ msg = f'{partial} bytes read'
+ if expected is not None:
+ msg += f', {expected} more expected'
+
+ super().__init__(msg=msg, **kwargs)
+
+ def __repr__(self):
+ return f'<IncompleteRead: {self.msg}>'
+
+
+class SSLError(TransportError):
+ pass
+
+
+class CertificateVerifyError(SSLError):
+ """Raised when certificate validated has failed"""
+ pass
+
+
+class ProxyError(TransportError):
+ pass
+
+
+network_exceptions = (HTTPError, TransportError)
diff --git a/yt_dlp/networking/websocket.py b/yt_dlp/networking/websocket.py
new file mode 100644
index 0000000..0e7e73c
--- /dev/null
+++ b/yt_dlp/networking/websocket.py
@@ -0,0 +1,23 @@
+from __future__ import annotations
+
+import abc
+
+from .common import RequestHandler, Response
+
+
+class WebSocketResponse(Response):
+
+ def send(self, message: bytes | str):
+ """
+ Send a message to the server.
+
+ @param message: The message to send. A string (str) is sent as a text frame, bytes is sent as a binary frame.
+ """
+ raise NotImplementedError
+
+ def recv(self):
+ raise NotImplementedError
+
+
+class WebSocketRequestHandler(RequestHandler, abc.ABC):
+ pass
diff --git a/yt_dlp/options.py b/yt_dlp/options.py
new file mode 100644
index 0000000..f884727
--- /dev/null
+++ b/yt_dlp/options.py
@@ -0,0 +1,1920 @@
+import collections
+import contextlib
+import optparse
+import os.path
+import re
+import shlex
+import shutil
+import string
+import sys
+
+from .compat import compat_expanduser
+from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS
+from .downloader.external import list_external_downloaders
+from .postprocessor import (
+ FFmpegExtractAudioPP,
+ FFmpegMergerPP,
+ FFmpegSubtitlesConvertorPP,
+ FFmpegThumbnailsConvertorPP,
+ FFmpegVideoRemuxerPP,
+ SponsorBlockPP,
+)
+from .postprocessor.modify_chapters import DEFAULT_SPONSORBLOCK_CHAPTER_TITLE
+from .update import UPDATE_SOURCES, detect_variant, is_non_updateable
+from .utils import (
+ OUTTMPL_TYPES,
+ POSTPROCESS_WHEN,
+ Config,
+ deprecation_warning,
+ expand_path,
+ format_field,
+ get_executable_path,
+ get_system_config_dirs,
+ get_user_config_dirs,
+ join_nonempty,
+ orderedSet_from_options,
+ remove_end,
+ variadic,
+ write_string,
+)
+from .version import CHANNEL, __version__
+
+
+def parseOpts(overrideArguments=None, ignore_config_files='if_override'):
+ PACKAGE_NAME = 'yt-dlp'
+
+ root = Config(create_parser())
+ if ignore_config_files == 'if_override':
+ ignore_config_files = overrideArguments is not None
+
+ def read_config(*paths):
+ path = os.path.join(*paths)
+ conf = Config.read_file(path, default=None)
+ if conf is not None:
+ return conf, path
+
+ def _load_from_config_dirs(config_dirs):
+ for config_dir in config_dirs:
+ head, tail = os.path.split(config_dir)
+ assert tail == PACKAGE_NAME or config_dir == os.path.join(compat_expanduser('~'), f'.{PACKAGE_NAME}')
+
+ yield read_config(head, f'{PACKAGE_NAME}.conf')
+ if tail.startswith('.'): # ~/.PACKAGE_NAME
+ yield read_config(head, f'{PACKAGE_NAME}.conf.txt')
+ yield read_config(config_dir, 'config')
+ yield read_config(config_dir, 'config.txt')
+
+ def add_config(label, path=None, func=None):
+ """ Adds config and returns whether to continue """
+ if root.parse_known_args()[0].ignoreconfig:
+ return False
+ elif func:
+ assert path is None
+ args, current_path = next(
+ filter(None, _load_from_config_dirs(func(PACKAGE_NAME))), (None, None))
+ else:
+ current_path = os.path.join(path, 'yt-dlp.conf')
+ args = Config.read_file(current_path, default=None)
+ if args is not None:
+ root.append_config(args, current_path, label=label)
+ return True
+
+ def load_configs():
+ yield not ignore_config_files
+ yield add_config('Portable', get_executable_path())
+ yield add_config('Home', expand_path(root.parse_known_args()[0].paths.get('home', '')).strip())
+ yield add_config('User', func=get_user_config_dirs)
+ yield add_config('System', func=get_system_config_dirs)
+
+ opts = optparse.Values({'verbose': True, 'print_help': False})
+ try:
+ try:
+ if overrideArguments is not None:
+ root.append_config(overrideArguments, label='Override')
+ else:
+ root.append_config(sys.argv[1:], label='Command-line')
+ loaded_all_configs = all(load_configs())
+ except ValueError as err:
+ raise root.parser.error(err)
+
+ if loaded_all_configs:
+ # If ignoreconfig is found inside the system configuration file,
+ # the user configuration is removed
+ if root.parse_known_args()[0].ignoreconfig:
+ user_conf = next((i for i, conf in enumerate(root.configs) if conf.label == 'User'), None)
+ if user_conf is not None:
+ root.configs.pop(user_conf)
+
+ try:
+ root.configs[0].load_configs() # Resolve any aliases using --config-location
+ except ValueError as err:
+ raise root.parser.error(err)
+
+ opts, args = root.parse_args()
+ except optparse.OptParseError:
+ with contextlib.suppress(optparse.OptParseError):
+ opts, _ = root.parse_known_args(strict=False)
+ raise
+ except (SystemExit, KeyboardInterrupt):
+ opts.verbose = False
+ raise
+ finally:
+ verbose = opts.verbose and f'\n{root}'.replace('\n| ', '\n[debug] ')[1:]
+ if verbose:
+ write_string(f'{verbose}\n')
+ if opts.print_help:
+ if verbose:
+ write_string('\n')
+ root.parser.print_help()
+ if opts.print_help:
+ sys.exit()
+ return root.parser, opts, args
+
+
+class _YoutubeDLHelpFormatter(optparse.IndentedHelpFormatter):
+ def __init__(self):
+ # No need to wrap help messages if we're on a wide console
+ max_width = shutil.get_terminal_size().columns or 80
+ # The % is chosen to get a pretty output in README.md
+ super().__init__(width=max_width, max_help_position=int(0.45 * max_width))
+
+ @staticmethod
+ def format_option_strings(option):
+ """ ('-o', '--option') -> -o, --format METAVAR """
+ opts = join_nonempty(
+ option._short_opts and option._short_opts[0],
+ option._long_opts and option._long_opts[0],
+ delim=', ')
+ if option.takes_value():
+ opts += f' {option.metavar}'
+ return opts
+
+
+class _YoutubeDLOptionParser(optparse.OptionParser):
+ # optparse is deprecated since Python 3.2. So assume a stable interface even for private methods
+ ALIAS_DEST = '_triggered_aliases'
+ ALIAS_TRIGGER_LIMIT = 100
+
+ def __init__(self):
+ super().__init__(
+ prog='yt-dlp' if detect_variant() == 'source' else None,
+ version=__version__,
+ usage='%prog [OPTIONS] URL [URL...]',
+ epilog='See full documentation at https://github.com/yt-dlp/yt-dlp#readme',
+ formatter=_YoutubeDLHelpFormatter(),
+ conflict_handler='resolve',
+ )
+ self.set_default(self.ALIAS_DEST, collections.defaultdict(int))
+
+ _UNKNOWN_OPTION = (optparse.BadOptionError, optparse.AmbiguousOptionError)
+ _BAD_OPTION = optparse.OptionValueError
+
+ def parse_known_args(self, args=None, values=None, strict=True):
+ """Same as parse_args, but ignore unknown switches. Similar to argparse.parse_known_args"""
+ self.rargs, self.largs = self._get_args(args), []
+ self.values = values or self.get_default_values()
+ while self.rargs:
+ arg = self.rargs[0]
+ try:
+ if arg == '--':
+ del self.rargs[0]
+ break
+ elif arg.startswith('--'):
+ self._process_long_opt(self.rargs, self.values)
+ elif arg.startswith('-') and arg != '-':
+ self._process_short_opts(self.rargs, self.values)
+ elif self.allow_interspersed_args:
+ self.largs.append(self.rargs.pop(0))
+ else:
+ break
+ except optparse.OptParseError as err:
+ if isinstance(err, self._UNKNOWN_OPTION):
+ self.largs.append(err.opt_str)
+ elif strict:
+ if isinstance(err, self._BAD_OPTION):
+ self.error(str(err))
+ raise
+ return self.check_values(self.values, self.largs)
+
+ def _generate_error_message(self, msg):
+ msg = f'{self.get_prog_name()}: error: {str(msg).strip()}\n'
+ return f'{self.get_usage()}\n{msg}' if self.usage else msg
+
+ def error(self, msg):
+ raise optparse.OptParseError(self._generate_error_message(msg))
+
+ def _get_args(self, args):
+ return sys.argv[1:] if args is None else list(args)
+
+ def _match_long_opt(self, opt):
+ """Improve ambiguous argument resolution by comparing option objects instead of argument strings"""
+ try:
+ return super()._match_long_opt(opt)
+ except optparse.AmbiguousOptionError as e:
+ if len({self._long_opt[p] for p in e.possibilities}) == 1:
+ return e.possibilities[0]
+ raise
+
+
+def create_parser():
+ def _list_from_options_callback(option, opt_str, value, parser, append=True, delim=',', process=str.strip):
+ # append can be True, False or -1 (prepend)
+ current = list(getattr(parser.values, option.dest)) if append else []
+ value = list(filter(None, [process(value)] if delim is None else map(process, value.split(delim))))
+ setattr(
+ parser.values, option.dest,
+ current + value if append is True else value + current)
+
+ def _set_from_options_callback(
+ option, opt_str, value, parser, allowed_values, delim=',', aliases={},
+ process=lambda x: x.lower().strip()):
+ values = [process(value)] if delim is None else map(process, value.split(delim))
+ try:
+ requested = orderedSet_from_options(values, collections.ChainMap(aliases, {'all': allowed_values}),
+ start=getattr(parser.values, option.dest))
+ except ValueError as e:
+ raise optparse.OptionValueError(f'wrong {option.metavar} for {opt_str}: {e.args[0]}')
+
+ setattr(parser.values, option.dest, set(requested))
+
+ def _dict_from_options_callback(
+ option, opt_str, value, parser,
+ allowed_keys=r'[\w-]+', delimiter=':', default_key=None, process=None, multiple_keys=True,
+ process_key=str.lower, append=False):
+
+ out_dict = dict(getattr(parser.values, option.dest))
+ multiple_args = not isinstance(value, str)
+ if multiple_keys:
+ allowed_keys = fr'({allowed_keys})(,({allowed_keys}))*'
+ mobj = re.match(
+ fr'(?is)(?P<keys>{allowed_keys}){delimiter}(?P<val>.*)$',
+ value[0] if multiple_args else value)
+ if mobj is not None:
+ keys, val = mobj.group('keys').split(','), mobj.group('val')
+ if multiple_args:
+ val = [val, *value[1:]]
+ elif default_key is not None:
+ keys, val = variadic(default_key), value
+ else:
+ raise optparse.OptionValueError(
+ f'wrong {opt_str} formatting; it should be {option.metavar}, not "{value}"')
+ try:
+ keys = map(process_key, keys) if process_key else keys
+ val = process(val) if process else val
+ except Exception as err:
+ raise optparse.OptionValueError(f'wrong {opt_str} formatting; {err}')
+ for key in keys:
+ out_dict[key] = out_dict.get(key, []) + [val] if append else val
+ setattr(parser.values, option.dest, out_dict)
+
+ def when_prefix(default):
+ return {
+ 'default': {},
+ 'type': 'str',
+ 'action': 'callback',
+ 'callback': _dict_from_options_callback,
+ 'callback_kwargs': {
+ 'allowed_keys': '|'.join(map(re.escape, POSTPROCESS_WHEN)),
+ 'default_key': default,
+ 'multiple_keys': False,
+ 'append': True,
+ },
+ }
+
+ parser = _YoutubeDLOptionParser()
+ alias_group = optparse.OptionGroup(parser, 'Aliases')
+ Formatter = string.Formatter()
+
+ def _create_alias(option, opt_str, value, parser):
+ aliases, opts = value
+ try:
+ nargs = len({i if f == '' else f
+ for i, (_, f, _, _) in enumerate(Formatter.parse(opts)) if f is not None})
+ opts.format(*map(str, range(nargs))) # validate
+ except Exception as err:
+ raise optparse.OptionValueError(f'wrong {opt_str} OPTIONS formatting; {err}')
+ if alias_group not in parser.option_groups:
+ parser.add_option_group(alias_group)
+
+ aliases = (x if x.startswith('-') else f'--{x}' for x in map(str.strip, aliases.split(',')))
+ try:
+ args = [f'ARG{i}' for i in range(nargs)]
+ alias_group.add_option(
+ *aliases, nargs=nargs, dest=parser.ALIAS_DEST, type='str' if nargs else None,
+ metavar=' '.join(args), help=opts.format(*args), action='callback',
+ callback=_alias_callback, callback_kwargs={'opts': opts, 'nargs': nargs})
+ except Exception as err:
+ raise optparse.OptionValueError(f'wrong {opt_str} formatting; {err}')
+
+ def _alias_callback(option, opt_str, value, parser, opts, nargs):
+ counter = getattr(parser.values, option.dest)
+ counter[opt_str] += 1
+ if counter[opt_str] > parser.ALIAS_TRIGGER_LIMIT:
+ raise optparse.OptionValueError(f'Alias {opt_str} exceeded invocation limit')
+ if nargs == 1:
+ value = [value]
+ assert (nargs == 0 and value is None) or len(value) == nargs
+ parser.rargs[:0] = shlex.split(
+ opts if value is None else opts.format(*map(shlex.quote, value)))
+
+ general = optparse.OptionGroup(parser, 'General Options')
+ general.add_option(
+ '-h', '--help', dest='print_help', action='store_true',
+ help='Print this help text and exit')
+ general.add_option(
+ '--version',
+ action='version',
+ help='Print program version and exit')
+ general.add_option(
+ '-U', '--update',
+ action='store_const', dest='update_self', const=CHANNEL,
+ help=format_field(
+ is_non_updateable(), None, 'Check if updates are available. %s',
+ default=f'Update this program to the latest {CHANNEL} version'))
+ general.add_option(
+ '--no-update',
+ action='store_false', dest='update_self',
+ help='Do not check for updates (default)')
+ general.add_option(
+ '--update-to',
+ action='store', dest='update_self', metavar='[CHANNEL]@[TAG]',
+ help=(
+ 'Upgrade/downgrade to a specific version. CHANNEL can be a repository as well. '
+ f'CHANNEL and TAG default to "{CHANNEL.partition("@")[0]}" and "latest" respectively if omitted; '
+ f'See "UPDATE" for details. Supported channels: {", ".join(UPDATE_SOURCES)}'))
+ general.add_option(
+ '-i', '--ignore-errors',
+ action='store_true', dest='ignoreerrors',
+ help='Ignore download and postprocessing errors. The download will be considered successful even if the postprocessing fails')
+ general.add_option(
+ '--no-abort-on-error',
+ action='store_const', dest='ignoreerrors', const='only_download',
+ help='Continue with next video on download errors; e.g. to skip unavailable videos in a playlist (default)')
+ general.add_option(
+ '--abort-on-error', '--no-ignore-errors',
+ action='store_false', dest='ignoreerrors',
+ help='Abort downloading of further videos if an error occurs (Alias: --no-ignore-errors)')
+ general.add_option(
+ '--dump-user-agent',
+ action='store_true', dest='dump_user_agent', default=False,
+ help='Display the current user-agent and exit')
+ general.add_option(
+ '--list-extractors',
+ action='store_true', dest='list_extractors', default=False,
+ help='List all supported extractors and exit')
+ general.add_option(
+ '--extractor-descriptions',
+ action='store_true', dest='list_extractor_descriptions', default=False,
+ help='Output descriptions of all supported extractors and exit')
+ general.add_option(
+ '--use-extractors', '--ies',
+ action='callback', dest='allowed_extractors', metavar='NAMES', type='str',
+ default=[], callback=_list_from_options_callback,
+ help=(
+ 'Extractor names to use separated by commas. '
+ 'You can also use regexes, "all", "default" and "end" (end URL matching); '
+ 'e.g. --ies "holodex.*,end,youtube". '
+ 'Prefix the name with a "-" to exclude it, e.g. --ies default,-generic. '
+ 'Use --list-extractors for a list of extractor names. (Alias: --ies)'))
+ general.add_option(
+ '--force-generic-extractor',
+ action='store_true', dest='force_generic_extractor', default=False,
+ help=optparse.SUPPRESS_HELP)
+ general.add_option(
+ '--default-search',
+ dest='default_search', metavar='PREFIX',
+ help=(
+ 'Use this prefix for unqualified URLs. '
+ 'E.g. "gvsearch2:python" downloads two videos from google videos for the search term "python". '
+ 'Use the value "auto" to let yt-dlp guess ("auto_warning" to emit a warning when guessing). '
+ '"error" just throws an error. The default value "fixup_error" repairs broken URLs, '
+ 'but emits an error if this is not possible instead of searching'))
+ general.add_option(
+ '--ignore-config', '--no-config',
+ action='store_true', dest='ignoreconfig',
+ help=(
+ 'Don\'t load any more configuration files except those given to --config-locations. '
+ 'For backward compatibility, if this option is found inside the system configuration file, the user configuration is not loaded. '
+ '(Alias: --no-config)'))
+ general.add_option(
+ '--no-config-locations',
+ action='store_const', dest='config_locations', const=[],
+ help=(
+ 'Do not load any custom configuration files (default). When given inside a '
+ 'configuration file, ignore all previous --config-locations defined in the current file'))
+ general.add_option(
+ '--config-locations',
+ dest='config_locations', metavar='PATH', action='append',
+ help=(
+ 'Location of the main configuration file; either the path to the config or its containing directory '
+ '("-" for stdin). Can be used multiple times and inside other configuration files'))
+ general.add_option(
+ '--flat-playlist',
+ action='store_const', dest='extract_flat', const='in_playlist', default=False,
+ help='Do not extract the videos of a playlist, only list them')
+ general.add_option(
+ '--no-flat-playlist',
+ action='store_false', dest='extract_flat',
+ help='Fully extract the videos of a playlist (default)')
+ general.add_option(
+ '--live-from-start',
+ action='store_true', dest='live_from_start',
+ help='Download livestreams from the start. Currently only supported for YouTube (Experimental)')
+ general.add_option(
+ '--no-live-from-start',
+ action='store_false', dest='live_from_start',
+ help='Download livestreams from the current time (default)')
+ general.add_option(
+ '--wait-for-video',
+ dest='wait_for_video', metavar='MIN[-MAX]', default=None,
+ help=(
+ 'Wait for scheduled streams to become available. '
+ 'Pass the minimum number of seconds (or range) to wait between retries'))
+ general.add_option(
+ '--no-wait-for-video',
+ dest='wait_for_video', action='store_const', const=None,
+ help='Do not wait for scheduled streams (default)')
+ general.add_option(
+ '--mark-watched',
+ action='store_true', dest='mark_watched', default=False,
+ help='Mark videos watched (even with --simulate)')
+ general.add_option(
+ '--no-mark-watched',
+ action='store_false', dest='mark_watched',
+ help='Do not mark videos watched (default)')
+ general.add_option(
+ '--no-colors', '--no-colours',
+ action='store_const', dest='color', const={
+ 'stdout': 'no_color',
+ 'stderr': 'no_color',
+ },
+ help=optparse.SUPPRESS_HELP)
+ general.add_option(
+ '--color',
+ dest='color', metavar='[STREAM:]POLICY', default={}, type='str',
+ action='callback', callback=_dict_from_options_callback,
+ callback_kwargs={
+ 'allowed_keys': 'stdout|stderr',
+ 'default_key': ['stdout', 'stderr'],
+ 'process': str.strip,
+ }, help=(
+ 'Whether to emit color codes in output, optionally prefixed by '
+ 'the STREAM (stdout or stderr) to apply the setting to. '
+ 'Can be one of "always", "auto" (default), "never", or '
+ '"no_color" (use non color terminal sequences). '
+ 'Can be used multiple times'))
+ general.add_option(
+ '--compat-options',
+ metavar='OPTS', dest='compat_opts', default=set(), type='str',
+ action='callback', callback=_set_from_options_callback,
+ callback_kwargs={
+ 'allowed_values': {
+ 'filename', 'filename-sanitization', 'format-sort', 'abort-on-error', 'format-spec', 'no-playlist-metafiles',
+ 'multistreams', 'no-live-chat', 'playlist-index', 'list-formats', 'no-direct-merge', 'playlist-match-filter',
+ 'no-attach-info-json', 'embed-thumbnail-atomicparsley', 'no-external-downloader-progress',
+ 'embed-metadata', 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', 'no-certifi',
+ 'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-youtube-prefer-utc-upload-date',
+ 'prefer-legacy-http-handler', 'manifest-filesize-approx'
+ }, 'aliases': {
+ 'youtube-dl': ['all', '-multistreams', '-playlist-match-filter', '-manifest-filesize-approx'],
+ 'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat', '-playlist-match-filter', '-manifest-filesize-approx'],
+ '2021': ['2022', 'no-certifi', 'filename-sanitization', 'no-youtube-prefer-utc-upload-date'],
+ '2022': ['2023', 'no-external-downloader-progress', 'playlist-match-filter', 'prefer-legacy-http-handler', 'manifest-filesize-approx'],
+ '2023': [],
+ }
+ }, help=(
+ 'Options that can help keep compatibility with youtube-dl or youtube-dlc '
+ 'configurations by reverting some of the changes made in yt-dlp. '
+ 'See "Differences in default behavior" for details'))
+ general.add_option(
+ '--alias', metavar='ALIASES OPTIONS', dest='_', type='str', nargs=2,
+ action='callback', callback=_create_alias,
+ help=(
+ 'Create aliases for an option string. Unless an alias starts with a dash "-", it is prefixed with "--". '
+ 'Arguments are parsed according to the Python string formatting mini-language. '
+ 'E.g. --alias get-audio,-X "-S=aext:{0},abr -x --audio-format {0}" creates options '
+ '"--get-audio" and "-X" that takes an argument (ARG0) and expands to '
+ '"-S=aext:ARG0,abr -x --audio-format ARG0". All defined aliases are listed in the --help output. '
+ 'Alias options can trigger more aliases; so be careful to avoid defining recursive options. '
+ f'As a safety measure, each alias may be triggered a maximum of {_YoutubeDLOptionParser.ALIAS_TRIGGER_LIMIT} times. '
+ 'This option can be used multiple times'))
+
+ network = optparse.OptionGroup(parser, 'Network Options')
+ network.add_option(
+ '--proxy', dest='proxy',
+ default=None, metavar='URL',
+ help=(
+ 'Use the specified HTTP/HTTPS/SOCKS proxy. To enable SOCKS proxy, specify a proper scheme, '
+ 'e.g. socks5://user:pass@127.0.0.1:1080/. Pass in an empty string (--proxy "") for direct connection'))
+ network.add_option(
+ '--socket-timeout',
+ dest='socket_timeout', type=float, default=None, metavar='SECONDS',
+ help='Time to wait before giving up, in seconds')
+ network.add_option(
+ '--source-address',
+ metavar='IP', dest='source_address', default=None,
+ help='Client-side IP address to bind to',
+ )
+ network.add_option(
+ '-4', '--force-ipv4',
+ action='store_const', const='0.0.0.0', dest='source_address',
+ help='Make all connections via IPv4',
+ )
+ network.add_option(
+ '-6', '--force-ipv6',
+ action='store_const', const='::', dest='source_address',
+ help='Make all connections via IPv6',
+ )
+ network.add_option(
+ '--enable-file-urls', action='store_true',
+ dest='enable_file_urls', default=False,
+ help='Enable file:// URLs. This is disabled by default for security reasons.'
+ )
+
+ geo = optparse.OptionGroup(parser, 'Geo-restriction')
+ geo.add_option(
+ '--geo-verification-proxy',
+ dest='geo_verification_proxy', default=None, metavar='URL',
+ help=(
+ 'Use this proxy to verify the IP address for some geo-restricted sites. '
+ 'The default proxy specified by --proxy (or none, if the option is not present) is used for the actual downloading'))
+ geo.add_option(
+ '--cn-verification-proxy',
+ dest='cn_verification_proxy', default=None, metavar='URL',
+ help=optparse.SUPPRESS_HELP)
+ geo.add_option(
+ '--xff', metavar='VALUE',
+ dest='geo_bypass', default='default',
+ help=(
+ 'How to fake X-Forwarded-For HTTP header to try bypassing geographic restriction. '
+ 'One of "default" (only when known to be useful), "never", '
+ 'an IP block in CIDR notation, or a two-letter ISO 3166-2 country code'))
+ geo.add_option(
+ '--geo-bypass',
+ action='store_const', dest='geo_bypass', const='default',
+ help=optparse.SUPPRESS_HELP)
+ geo.add_option(
+ '--no-geo-bypass',
+ action='store_const', dest='geo_bypass', const='never',
+ help=optparse.SUPPRESS_HELP)
+ geo.add_option(
+ '--geo-bypass-country', metavar='CODE', dest='geo_bypass',
+ help=optparse.SUPPRESS_HELP)
+ geo.add_option(
+ '--geo-bypass-ip-block', metavar='IP_BLOCK', dest='geo_bypass',
+ help=optparse.SUPPRESS_HELP)
+
+ selection = optparse.OptionGroup(parser, 'Video Selection')
+ selection.add_option(
+ '--playlist-start',
+ dest='playliststart', metavar='NUMBER', default=1, type=int,
+ help=optparse.SUPPRESS_HELP)
+ selection.add_option(
+ '--playlist-end',
+ dest='playlistend', metavar='NUMBER', default=None, type=int,
+ help=optparse.SUPPRESS_HELP)
+ selection.add_option(
+ '-I', '--playlist-items',
+ dest='playlist_items', metavar='ITEM_SPEC', default=None,
+ help=(
+ 'Comma separated playlist_index of the items to download. '
+ 'You can specify a range using "[START]:[STOP][:STEP]". For backward compatibility, START-STOP is also supported. '
+ 'Use negative indices to count from the right and negative STEP to download in reverse order. '
+ 'E.g. "-I 1:3,7,-5::2" used on a playlist of size 15 will download the items at index 1,2,3,7,11,13,15'))
+ selection.add_option(
+ '--match-title',
+ dest='matchtitle', metavar='REGEX',
+ help=optparse.SUPPRESS_HELP)
+ selection.add_option(
+ '--reject-title',
+ dest='rejecttitle', metavar='REGEX',
+ help=optparse.SUPPRESS_HELP)
+ selection.add_option(
+ '--min-filesize',
+ metavar='SIZE', dest='min_filesize', default=None,
+ help='Abort download if filesize is smaller than SIZE, e.g. 50k or 44.6M')
+ selection.add_option(
+ '--max-filesize',
+ metavar='SIZE', dest='max_filesize', default=None,
+ help='Abort download if filesize is larger than SIZE, e.g. 50k or 44.6M')
+ selection.add_option(
+ '--date',
+ metavar='DATE', dest='date', default=None,
+ help=(
+ 'Download only videos uploaded on this date. '
+ 'The date can be "YYYYMMDD" or in the format [now|today|yesterday][-N[day|week|month|year]]. '
+ 'E.g. "--date today-2weeks" downloads only videos uploaded on the same day two weeks ago'))
+ selection.add_option(
+ '--datebefore',
+ metavar='DATE', dest='datebefore', default=None,
+ help=(
+ 'Download only videos uploaded on or before this date. '
+ 'The date formats accepted is the same as --date'))
+ selection.add_option(
+ '--dateafter',
+ metavar='DATE', dest='dateafter', default=None,
+ help=(
+ 'Download only videos uploaded on or after this date. '
+ 'The date formats accepted is the same as --date'))
+ selection.add_option(
+ '--min-views',
+ metavar='COUNT', dest='min_views', default=None, type=int,
+ help=optparse.SUPPRESS_HELP)
+ selection.add_option(
+ '--max-views',
+ metavar='COUNT', dest='max_views', default=None, type=int,
+ help=optparse.SUPPRESS_HELP)
+ selection.add_option(
+ '--match-filters',
+ metavar='FILTER', dest='match_filter', action='append',
+ help=(
+ 'Generic video filter. Any "OUTPUT TEMPLATE" field can be compared with a '
+ 'number or a string using the operators defined in "Filtering Formats". '
+ 'You can also simply specify a field to match if the field is present, '
+ 'use "!field" to check if the field is not present, and "&" to check multiple conditions. '
+ 'Use a "\\" to escape "&" or quotes if needed. If used multiple times, '
+ 'the filter matches if atleast one of the conditions are met. E.g. --match-filter '
+ '!is_live --match-filter "like_count>?100 & description~=\'(?i)\\bcats \\& dogs\\b\'" '
+ 'matches only videos that are not live OR those that have a like count more than 100 '
+ '(or the like field is not available) and also has a description '
+ 'that contains the phrase "cats & dogs" (caseless). '
+ 'Use "--match-filter -" to interactively ask whether to download each video'))
+ selection.add_option(
+ '--no-match-filters',
+ dest='match_filter', action='store_const', const=None,
+ help='Do not use any --match-filter (default)')
+ selection.add_option(
+ '--break-match-filters',
+ metavar='FILTER', dest='breaking_match_filter', action='append',
+ help='Same as "--match-filters" but stops the download process when a video is rejected')
+ selection.add_option(
+ '--no-break-match-filters',
+ dest='breaking_match_filter', action='store_const', const=None,
+ help='Do not use any --break-match-filters (default)')
+ selection.add_option(
+ '--no-playlist',
+ action='store_true', dest='noplaylist', default=False,
+ help='Download only the video, if the URL refers to a video and a playlist')
+ selection.add_option(
+ '--yes-playlist',
+ action='store_false', dest='noplaylist',
+ help='Download the playlist, if the URL refers to a video and a playlist')
+ selection.add_option(
+ '--age-limit',
+ metavar='YEARS', dest='age_limit', default=None, type=int,
+ help='Download only videos suitable for the given age')
+ selection.add_option(
+ '--download-archive', metavar='FILE',
+ dest='download_archive',
+ help='Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it')
+ selection.add_option(
+ '--no-download-archive',
+ dest='download_archive', action="store_const", const=None,
+ help='Do not use archive file (default)')
+ selection.add_option(
+ '--max-downloads',
+ dest='max_downloads', metavar='NUMBER', type=int, default=None,
+ help='Abort after downloading NUMBER files')
+ selection.add_option(
+ '--break-on-existing',
+ action='store_true', dest='break_on_existing', default=False,
+ help='Stop the download process when encountering a file that is in the archive')
+ selection.add_option(
+ '--break-on-reject',
+ action='store_true', dest='break_on_reject', default=False,
+ help=optparse.SUPPRESS_HELP)
+ selection.add_option(
+ '--break-per-input',
+ action='store_true', dest='break_per_url', default=False,
+ help='Alters --max-downloads, --break-on-existing, --break-match-filter, and autonumber to reset per input URL')
+ selection.add_option(
+ '--no-break-per-input',
+ action='store_false', dest='break_per_url',
+ help='--break-on-existing and similar options terminates the entire download queue')
+ selection.add_option(
+ '--skip-playlist-after-errors', metavar='N',
+ dest='skip_playlist_after_errors', default=None, type=int,
+ help='Number of allowed failures until the rest of the playlist is skipped')
+ selection.add_option(
+ '--include-ads',
+ dest='include_ads', action='store_true',
+ help=optparse.SUPPRESS_HELP)
+ selection.add_option(
+ '--no-include-ads',
+ dest='include_ads', action='store_false',
+ help=optparse.SUPPRESS_HELP)
+
+ authentication = optparse.OptionGroup(parser, 'Authentication Options')
+ authentication.add_option(
+ '-u', '--username',
+ dest='username', metavar='USERNAME',
+ help='Login with this account ID')
+ authentication.add_option(
+ '-p', '--password',
+ dest='password', metavar='PASSWORD',
+ help='Account password. If this option is left out, yt-dlp will ask interactively')
+ authentication.add_option(
+ '-2', '--twofactor',
+ dest='twofactor', metavar='TWOFACTOR',
+ help='Two-factor authentication code')
+ authentication.add_option(
+ '-n', '--netrc',
+ action='store_true', dest='usenetrc', default=False,
+ help='Use .netrc authentication data')
+ authentication.add_option(
+ '--netrc-location',
+ dest='netrc_location', metavar='PATH',
+ help='Location of .netrc authentication data; either the path or its containing directory. Defaults to ~/.netrc')
+ authentication.add_option(
+ '--netrc-cmd',
+ dest='netrc_cmd', metavar='NETRC_CMD',
+ help='Command to execute to get the credentials for an extractor.')
+ authentication.add_option(
+ '--video-password',
+ dest='videopassword', metavar='PASSWORD',
+ help='Video-specific password')
+ authentication.add_option(
+ '--ap-mso',
+ dest='ap_mso', metavar='MSO',
+ help='Adobe Pass multiple-system operator (TV provider) identifier, use --ap-list-mso for a list of available MSOs')
+ authentication.add_option(
+ '--ap-username',
+ dest='ap_username', metavar='USERNAME',
+ help='Multiple-system operator account login')
+ authentication.add_option(
+ '--ap-password',
+ dest='ap_password', metavar='PASSWORD',
+ help='Multiple-system operator account password. If this option is left out, yt-dlp will ask interactively')
+ authentication.add_option(
+ '--ap-list-mso',
+ action='store_true', dest='ap_list_mso', default=False,
+ help='List all supported multiple-system operators')
+ authentication.add_option(
+ '--client-certificate',
+ dest='client_certificate', metavar='CERTFILE',
+ help='Path to client certificate file in PEM format. May include the private key')
+ authentication.add_option(
+ '--client-certificate-key',
+ dest='client_certificate_key', metavar='KEYFILE',
+ help='Path to private key file for client certificate')
+ authentication.add_option(
+ '--client-certificate-password',
+ dest='client_certificate_password', metavar='PASSWORD',
+ help='Password for client certificate private key, if encrypted. '
+ 'If not provided, and the key is encrypted, yt-dlp will ask interactively')
+
+ video_format = optparse.OptionGroup(parser, 'Video Format Options')
+ video_format.add_option(
+ '-f', '--format',
+ action='store', dest='format', metavar='FORMAT', default=None,
+ help='Video format code, see "FORMAT SELECTION" for more details')
+ video_format.add_option(
+ '-S', '--format-sort', metavar='SORTORDER',
+ dest='format_sort', default=[], type='str', action='callback',
+ callback=_list_from_options_callback, callback_kwargs={'append': -1},
+ help='Sort the formats by the fields given, see "Sorting Formats" for more details')
+ video_format.add_option(
+ '--format-sort-force', '--S-force',
+ action='store_true', dest='format_sort_force', metavar='FORMAT', default=False,
+ help=(
+ 'Force user specified sort order to have precedence over all fields, '
+ 'see "Sorting Formats" for more details (Alias: --S-force)'))
+ video_format.add_option(
+ '--no-format-sort-force',
+ action='store_false', dest='format_sort_force', metavar='FORMAT', default=False,
+ help='Some fields have precedence over the user specified sort order (default)')
+ video_format.add_option(
+ '--video-multistreams',
+ action='store_true', dest='allow_multiple_video_streams', default=None,
+ help='Allow multiple video streams to be merged into a single file')
+ video_format.add_option(
+ '--no-video-multistreams',
+ action='store_false', dest='allow_multiple_video_streams',
+ help='Only one video stream is downloaded for each output file (default)')
+ video_format.add_option(
+ '--audio-multistreams',
+ action='store_true', dest='allow_multiple_audio_streams', default=None,
+ help='Allow multiple audio streams to be merged into a single file')
+ video_format.add_option(
+ '--no-audio-multistreams',
+ action='store_false', dest='allow_multiple_audio_streams',
+ help='Only one audio stream is downloaded for each output file (default)')
+ video_format.add_option(
+ '--all-formats',
+ action='store_const', dest='format', const='all',
+ help=optparse.SUPPRESS_HELP)
+ video_format.add_option(
+ '--prefer-free-formats',
+ action='store_true', dest='prefer_free_formats', default=False,
+ help=(
+ 'Prefer video formats with free containers over non-free ones of same quality. '
+ 'Use with "-S ext" to strictly prefer free containers irrespective of quality'))
+ video_format.add_option(
+ '--no-prefer-free-formats',
+ action='store_false', dest='prefer_free_formats', default=False,
+ help="Don't give any special preference to free containers (default)")
+ video_format.add_option(
+ '--check-formats',
+ action='store_const', const='selected', dest='check_formats', default=None,
+ help='Make sure formats are selected only from those that are actually downloadable')
+ video_format.add_option(
+ '--check-all-formats',
+ action='store_true', dest='check_formats',
+ help='Check all formats for whether they are actually downloadable')
+ video_format.add_option(
+ '--no-check-formats',
+ action='store_false', dest='check_formats',
+ help='Do not check that the formats are actually downloadable')
+ video_format.add_option(
+ '-F', '--list-formats',
+ action='store_true', dest='listformats',
+ help='List available formats of each video. Simulate unless --no-simulate is used')
+ video_format.add_option(
+ '--list-formats-as-table',
+ action='store_true', dest='listformats_table', default=True,
+ help=optparse.SUPPRESS_HELP)
+ video_format.add_option(
+ '--list-formats-old', '--no-list-formats-as-table',
+ action='store_false', dest='listformats_table',
+ help=optparse.SUPPRESS_HELP)
+ video_format.add_option(
+ '--merge-output-format',
+ action='store', dest='merge_output_format', metavar='FORMAT', default=None,
+ help=(
+ 'Containers that may be used when merging formats, separated by "/", e.g. "mp4/mkv". '
+ 'Ignored if no merge is required. '
+ f'(currently supported: {", ".join(sorted(FFmpegMergerPP.SUPPORTED_EXTS))})'))
+ video_format.add_option(
+ '--allow-unplayable-formats',
+ action='store_true', dest='allow_unplayable_formats', default=False,
+ help=optparse.SUPPRESS_HELP)
+ video_format.add_option(
+ '--no-allow-unplayable-formats',
+ action='store_false', dest='allow_unplayable_formats',
+ help=optparse.SUPPRESS_HELP)
+
+ subtitles = optparse.OptionGroup(parser, 'Subtitle Options')
+ subtitles.add_option(
+ '--write-subs', '--write-srt',
+ action='store_true', dest='writesubtitles', default=False,
+ help='Write subtitle file')
+ subtitles.add_option(
+ '--no-write-subs', '--no-write-srt',
+ action='store_false', dest='writesubtitles',
+ help='Do not write subtitle file (default)')
+ subtitles.add_option(
+ '--write-auto-subs', '--write-automatic-subs',
+ action='store_true', dest='writeautomaticsub', default=False,
+ help='Write automatically generated subtitle file (Alias: --write-automatic-subs)')
+ subtitles.add_option(
+ '--no-write-auto-subs', '--no-write-automatic-subs',
+ action='store_false', dest='writeautomaticsub', default=False,
+ help='Do not write auto-generated subtitles (default) (Alias: --no-write-automatic-subs)')
+ subtitles.add_option(
+ '--all-subs',
+ action='store_true', dest='allsubtitles', default=False,
+ help=optparse.SUPPRESS_HELP)
+ subtitles.add_option(
+ '--list-subs',
+ action='store_true', dest='listsubtitles', default=False,
+ help='List available subtitles of each video. Simulate unless --no-simulate is used')
+ subtitles.add_option(
+ '--sub-format',
+ action='store', dest='subtitlesformat', metavar='FORMAT', default='best',
+ help='Subtitle format; accepts formats preference, e.g. "srt" or "ass/srt/best"')
+ subtitles.add_option(
+ '--sub-langs', '--srt-langs',
+ action='callback', dest='subtitleslangs', metavar='LANGS', type='str',
+ default=[], callback=_list_from_options_callback,
+ help=(
+ 'Languages of the subtitles to download (can be regex) or "all" separated by commas, e.g. --sub-langs "en.*,ja". '
+ 'You can prefix the language code with a "-" to exclude it from the requested languages, e.g. --sub-langs all,-live_chat. '
+ 'Use --list-subs for a list of available language tags'))
+
+ downloader = optparse.OptionGroup(parser, 'Download Options')
+ downloader.add_option(
+ '-N', '--concurrent-fragments',
+ dest='concurrent_fragment_downloads', metavar='N', default=1, type=int,
+ help='Number of fragments of a dash/hlsnative video that should be downloaded concurrently (default is %default)')
+ downloader.add_option(
+ '-r', '--limit-rate', '--rate-limit',
+ dest='ratelimit', metavar='RATE',
+ help='Maximum download rate in bytes per second, e.g. 50K or 4.2M')
+ downloader.add_option(
+ '--throttled-rate',
+ dest='throttledratelimit', metavar='RATE',
+ help='Minimum download rate in bytes per second below which throttling is assumed and the video data is re-extracted, e.g. 100K')
+ downloader.add_option(
+ '-R', '--retries',
+ dest='retries', metavar='RETRIES', default=10,
+ help='Number of retries (default is %default), or "infinite"')
+ downloader.add_option(
+ '--file-access-retries',
+ dest='file_access_retries', metavar='RETRIES', default=3,
+ help='Number of times to retry on file access error (default is %default), or "infinite"')
+ downloader.add_option(
+ '--fragment-retries',
+ dest='fragment_retries', metavar='RETRIES', default=10,
+ help='Number of retries for a fragment (default is %default), or "infinite" (DASH, hlsnative and ISM)')
+ downloader.add_option(
+ '--retry-sleep',
+ dest='retry_sleep', metavar='[TYPE:]EXPR', default={}, type='str',
+ action='callback', callback=_dict_from_options_callback,
+ callback_kwargs={
+ 'allowed_keys': 'http|fragment|file_access|extractor',
+ 'default_key': 'http',
+ }, help=(
+ 'Time to sleep between retries in seconds (optionally) prefixed by the type of retry '
+ '(http (default), fragment, file_access, extractor) to apply the sleep to. '
+ 'EXPR can be a number, linear=START[:END[:STEP=1]] or exp=START[:END[:BASE=2]]. '
+ 'This option can be used multiple times to set the sleep for the different retry types, '
+ 'e.g. --retry-sleep linear=1::2 --retry-sleep fragment:exp=1:20'))
+ downloader.add_option(
+ '--skip-unavailable-fragments', '--no-abort-on-unavailable-fragments',
+ action='store_true', dest='skip_unavailable_fragments', default=True,
+ help='Skip unavailable fragments for DASH, hlsnative and ISM downloads (default) (Alias: --no-abort-on-unavailable-fragments)')
+ downloader.add_option(
+ '--abort-on-unavailable-fragments', '--no-skip-unavailable-fragments',
+ action='store_false', dest='skip_unavailable_fragments',
+ help='Abort download if a fragment is unavailable (Alias: --no-skip-unavailable-fragments)')
+ downloader.add_option(
+ '--keep-fragments',
+ action='store_true', dest='keep_fragments', default=False,
+ help='Keep downloaded fragments on disk after downloading is finished')
+ downloader.add_option(
+ '--no-keep-fragments',
+ action='store_false', dest='keep_fragments',
+ help='Delete downloaded fragments after downloading is finished (default)')
+ downloader.add_option(
+ '--buffer-size',
+ dest='buffersize', metavar='SIZE', default='1024',
+ help='Size of download buffer, e.g. 1024 or 16K (default is %default)')
+ downloader.add_option(
+ '--resize-buffer',
+ action='store_false', dest='noresizebuffer',
+ help='The buffer size is automatically resized from an initial value of --buffer-size (default)')
+ downloader.add_option(
+ '--no-resize-buffer',
+ action='store_true', dest='noresizebuffer', default=False,
+ help='Do not automatically adjust the buffer size')
+ downloader.add_option(
+ '--http-chunk-size',
+ dest='http_chunk_size', metavar='SIZE', default=None,
+ help=(
+ 'Size of a chunk for chunk-based HTTP downloading, e.g. 10485760 or 10M (default is disabled). '
+ 'May be useful for bypassing bandwidth throttling imposed by a webserver (experimental)'))
+ downloader.add_option(
+ '--test',
+ action='store_true', dest='test', default=False,
+ help=optparse.SUPPRESS_HELP)
+ downloader.add_option(
+ '--playlist-reverse',
+ action='store_true', dest='playlist_reverse',
+ help=optparse.SUPPRESS_HELP)
+ downloader.add_option(
+ '--no-playlist-reverse',
+ action='store_false', dest='playlist_reverse',
+ help=optparse.SUPPRESS_HELP)
+ downloader.add_option(
+ '--playlist-random',
+ action='store_true', dest='playlist_random',
+ help='Download playlist videos in random order')
+ downloader.add_option(
+ '--lazy-playlist',
+ action='store_true', dest='lazy_playlist',
+ help='Process entries in the playlist as they are received. This disables n_entries, --playlist-random and --playlist-reverse')
+ downloader.add_option(
+ '--no-lazy-playlist',
+ action='store_false', dest='lazy_playlist',
+ help='Process videos in the playlist only after the entire playlist is parsed (default)')
+ downloader.add_option(
+ '--xattr-set-filesize',
+ dest='xattr_set_filesize', action='store_true',
+ help='Set file xattribute ytdl.filesize with expected file size')
+ downloader.add_option(
+ '--hls-prefer-native',
+ dest='hls_prefer_native', action='store_true', default=None,
+ help=optparse.SUPPRESS_HELP)
+ downloader.add_option(
+ '--hls-prefer-ffmpeg',
+ dest='hls_prefer_native', action='store_false', default=None,
+ help=optparse.SUPPRESS_HELP)
+ downloader.add_option(
+ '--hls-use-mpegts',
+ dest='hls_use_mpegts', action='store_true', default=None,
+ help=(
+ 'Use the mpegts container for HLS videos; '
+ 'allowing some players to play the video while downloading, '
+ 'and reducing the chance of file corruption if download is interrupted. '
+ 'This is enabled by default for live streams'))
+ downloader.add_option(
+ '--no-hls-use-mpegts',
+ dest='hls_use_mpegts', action='store_false',
+ help=(
+ 'Do not use the mpegts container for HLS videos. '
+ 'This is default when not downloading live streams'))
+ downloader.add_option(
+ '--download-sections',
+ metavar='REGEX', dest='download_ranges', action='append',
+ help=(
+ 'Download only chapters that match the regular expression. '
+ 'A "*" prefix denotes time-range instead of chapter. Negative timestamps are calculated from the end. '
+ '"*from-url" can be used to download between the "start_time" and "end_time" extracted from the URL. '
+ 'Needs ffmpeg. This option can be used multiple times to download multiple sections, '
+ 'e.g. --download-sections "*10:15-inf" --download-sections "intro"'))
+ downloader.add_option(
+ '--downloader', '--external-downloader',
+ dest='external_downloader', metavar='[PROTO:]NAME', default={}, type='str',
+ action='callback', callback=_dict_from_options_callback,
+ callback_kwargs={
+ 'allowed_keys': 'http|ftp|m3u8|dash|rtsp|rtmp|mms',
+ 'default_key': 'default',
+ 'process': str.strip
+ }, help=(
+ 'Name or path of the external downloader to use (optionally) prefixed by '
+ 'the protocols (http, ftp, m3u8, dash, rstp, rtmp, mms) to use it for. '
+ f'Currently supports native, {", ".join(sorted(list_external_downloaders()))}. '
+ 'You can use this option multiple times to set different downloaders for different protocols. '
+ 'E.g. --downloader aria2c --downloader "dash,m3u8:native" will use '
+ 'aria2c for http/ftp downloads, and the native downloader for dash/m3u8 downloads '
+ '(Alias: --external-downloader)'))
+ downloader.add_option(
+ '--downloader-args', '--external-downloader-args',
+ metavar='NAME:ARGS', dest='external_downloader_args', default={}, type='str',
+ action='callback', callback=_dict_from_options_callback,
+ callback_kwargs={
+ 'allowed_keys': r'ffmpeg_[io]\d*|%s' % '|'.join(map(re.escape, list_external_downloaders())),
+ 'default_key': 'default',
+ 'process': shlex.split
+ }, help=(
+ 'Give these arguments to the external downloader. '
+ 'Specify the downloader name and the arguments separated by a colon ":". '
+ 'For ffmpeg, arguments can be passed to different positions using the same syntax as --postprocessor-args. '
+ 'You can use this option multiple times to give different arguments to different downloaders '
+ '(Alias: --external-downloader-args)'))
+
+ workarounds = optparse.OptionGroup(parser, 'Workarounds')
+ workarounds.add_option(
+ '--encoding',
+ dest='encoding', metavar='ENCODING',
+ help='Force the specified encoding (experimental)')
+ workarounds.add_option(
+ '--legacy-server-connect',
+ action='store_true', dest='legacy_server_connect', default=False,
+ help='Explicitly allow HTTPS connection to servers that do not support RFC 5746 secure renegotiation')
+ workarounds.add_option(
+ '--no-check-certificates',
+ action='store_true', dest='no_check_certificate', default=False,
+ help='Suppress HTTPS certificate validation')
+ workarounds.add_option(
+ '--prefer-insecure', '--prefer-unsecure',
+ action='store_true', dest='prefer_insecure',
+ help='Use an unencrypted connection to retrieve information about the video (Currently supported only for YouTube)')
+ workarounds.add_option(
+ '--user-agent',
+ metavar='UA', dest='user_agent',
+ help=optparse.SUPPRESS_HELP)
+ workarounds.add_option(
+ '--referer',
+ metavar='URL', dest='referer', default=None,
+ help=optparse.SUPPRESS_HELP)
+ workarounds.add_option(
+ '--add-headers',
+ metavar='FIELD:VALUE', dest='headers', default={}, type='str',
+ action='callback', callback=_dict_from_options_callback,
+ callback_kwargs={'multiple_keys': False},
+ help='Specify a custom HTTP header and its value, separated by a colon ":". You can use this option multiple times',
+ )
+ workarounds.add_option(
+ '--bidi-workaround',
+ dest='bidi_workaround', action='store_true',
+ help='Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')
+ workarounds.add_option(
+ '--sleep-requests', metavar='SECONDS',
+ dest='sleep_interval_requests', type=float,
+ help='Number of seconds to sleep between requests during data extraction')
+ workarounds.add_option(
+ '--sleep-interval', '--min-sleep-interval', metavar='SECONDS',
+ dest='sleep_interval', type=float,
+ help=(
+ 'Number of seconds to sleep before each download. '
+ 'This is the minimum time to sleep when used along with --max-sleep-interval '
+ '(Alias: --min-sleep-interval)'))
+ workarounds.add_option(
+ '--max-sleep-interval', metavar='SECONDS',
+ dest='max_sleep_interval', type=float,
+ help='Maximum number of seconds to sleep. Can only be used along with --min-sleep-interval')
+ workarounds.add_option(
+ '--sleep-subtitles', metavar='SECONDS',
+ dest='sleep_interval_subtitles', default=0, type=int,
+ help='Number of seconds to sleep before each subtitle download')
+
+ verbosity = optparse.OptionGroup(parser, 'Verbosity and Simulation Options')
+ verbosity.add_option(
+ '-q', '--quiet',
+ action='store_true', dest='quiet', default=None,
+ help='Activate quiet mode. If used with --verbose, print the log to stderr')
+ verbosity.add_option(
+ '--no-quiet',
+ action='store_false', dest='quiet',
+ help='Deactivate quiet mode. (Default)')
+ verbosity.add_option(
+ '--no-warnings',
+ dest='no_warnings', action='store_true', default=False,
+ help='Ignore warnings')
+ verbosity.add_option(
+ '-s', '--simulate',
+ action='store_true', dest='simulate', default=None,
+ help='Do not download the video and do not write anything to disk')
+ verbosity.add_option(
+ '--no-simulate',
+ action='store_false', dest='simulate',
+ help='Download the video even if printing/listing options are used')
+ verbosity.add_option(
+ '--ignore-no-formats-error',
+ action='store_true', dest='ignore_no_formats_error', default=False,
+ help=(
+ 'Ignore "No video formats" error. Useful for extracting metadata '
+ 'even if the videos are not actually available for download (experimental)'))
+ verbosity.add_option(
+ '--no-ignore-no-formats-error',
+ action='store_false', dest='ignore_no_formats_error',
+ help='Throw error when no downloadable video formats are found (default)')
+ verbosity.add_option(
+ '--skip-download', '--no-download',
+ action='store_true', dest='skip_download', default=False,
+ help='Do not download the video but write all related files (Alias: --no-download)')
+ verbosity.add_option(
+ '-O', '--print',
+ metavar='[WHEN:]TEMPLATE', dest='forceprint', **when_prefix('video'),
+ help=(
+ 'Field name or output template to print to screen, optionally prefixed with when to print it, separated by a ":". '
+ 'Supported values of "WHEN" are the same as that of --use-postprocessor (default: video). '
+ 'Implies --quiet. Implies --simulate unless --no-simulate or later stages of WHEN are used. '
+ 'This option can be used multiple times'))
+ verbosity.add_option(
+ '--print-to-file',
+ metavar='[WHEN:]TEMPLATE FILE', dest='print_to_file', nargs=2, **when_prefix('video'),
+ help=(
+ 'Append given template to the file. The values of WHEN and TEMPLATE are same as that of --print. '
+ 'FILE uses the same syntax as the output template. This option can be used multiple times'))
+ verbosity.add_option(
+ '-g', '--get-url',
+ action='store_true', dest='geturl', default=False,
+ help=optparse.SUPPRESS_HELP)
+ verbosity.add_option(
+ '-e', '--get-title',
+ action='store_true', dest='gettitle', default=False,
+ help=optparse.SUPPRESS_HELP)
+ verbosity.add_option(
+ '--get-id',
+ action='store_true', dest='getid', default=False,
+ help=optparse.SUPPRESS_HELP)
+ verbosity.add_option(
+ '--get-thumbnail',
+ action='store_true', dest='getthumbnail', default=False,
+ help=optparse.SUPPRESS_HELP)
+ verbosity.add_option(
+ '--get-description',
+ action='store_true', dest='getdescription', default=False,
+ help=optparse.SUPPRESS_HELP)
+ verbosity.add_option(
+ '--get-duration',
+ action='store_true', dest='getduration', default=False,
+ help=optparse.SUPPRESS_HELP)
+ verbosity.add_option(
+ '--get-filename',
+ action='store_true', dest='getfilename', default=False,
+ help=optparse.SUPPRESS_HELP)
+ verbosity.add_option(
+ '--get-format',
+ action='store_true', dest='getformat', default=False,
+ help=optparse.SUPPRESS_HELP)
+ verbosity.add_option(
+ '-j', '--dump-json',
+ action='store_true', dest='dumpjson', default=False,
+ help=(
+ 'Quiet, but print JSON information for each video. Simulate unless --no-simulate is used. '
+ 'See "OUTPUT TEMPLATE" for a description of available keys'))
+ verbosity.add_option(
+ '-J', '--dump-single-json',
+ action='store_true', dest='dump_single_json', default=False,
+ help=(
+ 'Quiet, but print JSON information for each url or infojson passed. Simulate unless --no-simulate is used. '
+ 'If the URL refers to a playlist, the whole playlist information is dumped in a single line'))
+ verbosity.add_option(
+ '--print-json',
+ action='store_true', dest='print_json', default=False,
+ help=optparse.SUPPRESS_HELP)
+ verbosity.add_option(
+ '--force-write-archive', '--force-write-download-archive', '--force-download-archive',
+ action='store_true', dest='force_write_download_archive', default=False,
+ help=(
+ 'Force download archive entries to be written as far as no errors occur, '
+ 'even if -s or another simulation option is used (Alias: --force-download-archive)'))
+ verbosity.add_option(
+ '--newline',
+ action='store_true', dest='progress_with_newline', default=False,
+ help='Output progress bar as new lines')
+ verbosity.add_option(
+ '--no-progress',
+ action='store_true', dest='noprogress', default=None,
+ help='Do not print progress bar')
+ verbosity.add_option(
+ '--progress',
+ action='store_false', dest='noprogress',
+ help='Show progress bar, even if in quiet mode')
+ verbosity.add_option(
+ '--console-title',
+ action='store_true', dest='consoletitle', default=False,
+ help='Display progress in console titlebar')
+ verbosity.add_option(
+ '--progress-template',
+ metavar='[TYPES:]TEMPLATE', dest='progress_template', default={}, type='str',
+ action='callback', callback=_dict_from_options_callback,
+ callback_kwargs={
+ 'allowed_keys': '(download|postprocess)(-title)?',
+ 'default_key': 'download'
+ }, help=(
+ 'Template for progress outputs, optionally prefixed with one of "download:" (default), '
+ '"download-title:" (the console title), "postprocess:", or "postprocess-title:". '
+ 'The video\'s fields are accessible under the "info" key and '
+ 'the progress attributes are accessible under "progress" key. E.g. '
+ # TODO: Document the fields inside "progress"
+ '--console-title --progress-template "download-title:%(info.id)s-%(progress.eta)s"'))
+ verbosity.add_option(
+ '-v', '--verbose',
+ action='store_true', dest='verbose', default=False,
+ help='Print various debugging information')
+ verbosity.add_option(
+ '--dump-pages', '--dump-intermediate-pages',
+ action='store_true', dest='dump_intermediate_pages', default=False,
+ help='Print downloaded pages encoded using base64 to debug problems (very verbose)')
+ verbosity.add_option(
+ '--write-pages',
+ action='store_true', dest='write_pages', default=False,
+ help='Write downloaded intermediary pages to files in the current directory to debug problems')
+ verbosity.add_option(
+ '--load-pages',
+ action='store_true', dest='load_pages', default=False,
+ help=optparse.SUPPRESS_HELP)
+ verbosity.add_option(
+ '--youtube-print-sig-code',
+ action='store_true', dest='youtube_print_sig_code', default=False,
+ help=optparse.SUPPRESS_HELP)
+ verbosity.add_option(
+ '--print-traffic', '--dump-headers',
+ dest='debug_printtraffic', action='store_true', default=False,
+ help='Display sent and read HTTP traffic')
+ verbosity.add_option(
+ '-C', '--call-home',
+ dest='call_home', action='store_true', default=False,
+ # help='Contact the yt-dlp server for debugging')
+ help=optparse.SUPPRESS_HELP)
+ verbosity.add_option(
+ '--no-call-home',
+ dest='call_home', action='store_false',
+ # help='Do not contact the yt-dlp server for debugging (default)')
+ help=optparse.SUPPRESS_HELP)
+
+ filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
+ filesystem.add_option(
+ '-a', '--batch-file',
+ dest='batchfile', metavar='FILE',
+ help=(
+ 'File containing URLs to download ("-" for stdin), one URL per line. '
+ 'Lines starting with "#", ";" or "]" are considered as comments and ignored'))
+ filesystem.add_option(
+ '--no-batch-file',
+ dest='batchfile', action='store_const', const=None,
+ help='Do not read URLs from batch file (default)')
+ filesystem.add_option(
+ '--id', default=False,
+ action='store_true', dest='useid', help=optparse.SUPPRESS_HELP)
+ filesystem.add_option(
+ '-P', '--paths',
+ metavar='[TYPES:]PATH', dest='paths', default={}, type='str',
+ action='callback', callback=_dict_from_options_callback,
+ callback_kwargs={
+ 'allowed_keys': 'home|temp|%s' % '|'.join(map(re.escape, OUTTMPL_TYPES.keys())),
+ 'default_key': 'home'
+ }, help=(
+ 'The paths where the files should be downloaded. '
+ 'Specify the type of file and the path separated by a colon ":". '
+ 'All the same TYPES as --output are supported. '
+ 'Additionally, you can also provide "home" (default) and "temp" paths. '
+ 'All intermediary files are first downloaded to the temp path and '
+ 'then the final files are moved over to the home path after download is finished. '
+ 'This option is ignored if --output is an absolute path'))
+ filesystem.add_option(
+ '-o', '--output',
+ metavar='[TYPES:]TEMPLATE', dest='outtmpl', default={}, type='str',
+ action='callback', callback=_dict_from_options_callback,
+ callback_kwargs={
+ 'allowed_keys': '|'.join(map(re.escape, OUTTMPL_TYPES.keys())),
+ 'default_key': 'default'
+ }, help='Output filename template; see "OUTPUT TEMPLATE" for details')
+ filesystem.add_option(
+ '--output-na-placeholder',
+ dest='outtmpl_na_placeholder', metavar='TEXT', default='NA',
+ help=('Placeholder for unavailable fields in --output (default: "%default")'))
+ filesystem.add_option(
+ '--autonumber-size',
+ dest='autonumber_size', metavar='NUMBER', type=int,
+ help=optparse.SUPPRESS_HELP)
+ filesystem.add_option(
+ '--autonumber-start',
+ dest='autonumber_start', metavar='NUMBER', default=1, type=int,
+ help=optparse.SUPPRESS_HELP)
+ filesystem.add_option(
+ '--restrict-filenames',
+ action='store_true', dest='restrictfilenames', default=False,
+ help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames')
+ filesystem.add_option(
+ '--no-restrict-filenames',
+ action='store_false', dest='restrictfilenames',
+ help='Allow Unicode characters, "&" and spaces in filenames (default)')
+ filesystem.add_option(
+ '--windows-filenames',
+ action='store_true', dest='windowsfilenames', default=False,
+ help='Force filenames to be Windows-compatible')
+ filesystem.add_option(
+ '--no-windows-filenames',
+ action='store_false', dest='windowsfilenames',
+ help='Make filenames Windows-compatible only if using Windows (default)')
+ filesystem.add_option(
+ '--trim-filenames', '--trim-file-names', metavar='LENGTH',
+ dest='trim_file_name', default=0, type=int,
+ help='Limit the filename length (excluding extension) to the specified number of characters')
+ filesystem.add_option(
+ '-w', '--no-overwrites',
+ action='store_false', dest='overwrites', default=None,
+ help='Do not overwrite any files')
+ filesystem.add_option(
+ '--force-overwrites', '--yes-overwrites',
+ action='store_true', dest='overwrites',
+ help='Overwrite all video and metadata files. This option includes --no-continue')
+ filesystem.add_option(
+ '--no-force-overwrites',
+ action='store_const', dest='overwrites', const=None,
+ help='Do not overwrite the video, but overwrite related files (default)')
+ filesystem.add_option(
+ '-c', '--continue',
+ action='store_true', dest='continue_dl', default=True,
+ help='Resume partially downloaded files/fragments (default)')
+ filesystem.add_option(
+ '--no-continue',
+ action='store_false', dest='continue_dl',
+ help=(
+ 'Do not resume partially downloaded fragments. '
+ 'If the file is not fragmented, restart download of the entire file'))
+ filesystem.add_option(
+ '--part',
+ action='store_false', dest='nopart', default=False,
+ help='Use .part files instead of writing directly into output file (default)')
+ filesystem.add_option(
+ '--no-part',
+ action='store_true', dest='nopart',
+ help='Do not use .part files - write directly into output file')
+ filesystem.add_option(
+ '--mtime',
+ action='store_true', dest='updatetime', default=True,
+ help='Use the Last-modified header to set the file modification time (default)')
+ filesystem.add_option(
+ '--no-mtime',
+ action='store_false', dest='updatetime',
+ help='Do not use the Last-modified header to set the file modification time')
+ filesystem.add_option(
+ '--write-description',
+ action='store_true', dest='writedescription', default=False,
+ help='Write video description to a .description file')
+ filesystem.add_option(
+ '--no-write-description',
+ action='store_false', dest='writedescription',
+ help='Do not write video description (default)')
+ filesystem.add_option(
+ '--write-info-json',
+ action='store_true', dest='writeinfojson', default=None,
+ help='Write video metadata to a .info.json file (this may contain personal information)')
+ filesystem.add_option(
+ '--no-write-info-json',
+ action='store_false', dest='writeinfojson',
+ help='Do not write video metadata (default)')
+ filesystem.add_option(
+ '--write-annotations',
+ action='store_true', dest='writeannotations', default=False,
+ help=optparse.SUPPRESS_HELP)
+ filesystem.add_option(
+ '--no-write-annotations',
+ action='store_false', dest='writeannotations',
+ help=optparse.SUPPRESS_HELP)
+ filesystem.add_option(
+ '--write-playlist-metafiles',
+ action='store_true', dest='allow_playlist_files', default=None,
+ help=(
+ 'Write playlist metadata in addition to the video metadata '
+ 'when using --write-info-json, --write-description etc. (default)'))
+ filesystem.add_option(
+ '--no-write-playlist-metafiles',
+ action='store_false', dest='allow_playlist_files',
+ help='Do not write playlist metadata when using --write-info-json, --write-description etc.')
+ filesystem.add_option(
+ '--clean-info-json', '--clean-infojson',
+ action='store_true', dest='clean_infojson', default=None,
+ help=(
+ 'Remove some internal metadata such as filenames from the infojson (default)'))
+ filesystem.add_option(
+ '--no-clean-info-json', '--no-clean-infojson',
+ action='store_false', dest='clean_infojson',
+ help='Write all fields to the infojson')
+ filesystem.add_option(
+ '--write-comments', '--get-comments',
+ action='store_true', dest='getcomments', default=False,
+ help=(
+ 'Retrieve video comments to be placed in the infojson. '
+ 'The comments are fetched even without this option if the extraction is known to be quick (Alias: --get-comments)'))
+ filesystem.add_option(
+ '--no-write-comments', '--no-get-comments',
+ action='store_false', dest='getcomments',
+ help='Do not retrieve video comments unless the extraction is known to be quick (Alias: --no-get-comments)')
+ filesystem.add_option(
+ '--load-info-json', '--load-info',
+ dest='load_info_filename', metavar='FILE',
+ help='JSON file containing the video information (created with the "--write-info-json" option)')
+ filesystem.add_option(
+ '--cookies',
+ dest='cookiefile', metavar='FILE',
+ help='Netscape formatted file to read cookies from and dump cookie jar in')
+ filesystem.add_option(
+ '--no-cookies',
+ action='store_const', const=None, dest='cookiefile', metavar='FILE',
+ help='Do not read/dump cookies from/to file (default)')
+ filesystem.add_option(
+ '--cookies-from-browser',
+ dest='cookiesfrombrowser', metavar='BROWSER[+KEYRING][:PROFILE][::CONTAINER]',
+ help=(
+ 'The name of the browser to load cookies from. '
+ f'Currently supported browsers are: {", ".join(sorted(SUPPORTED_BROWSERS))}. '
+ 'Optionally, the KEYRING used for decrypting Chromium cookies on Linux, '
+ 'the name/path of the PROFILE to load cookies from, '
+ 'and the CONTAINER name (if Firefox) ("none" for no container) '
+ 'can be given with their respective seperators. '
+ 'By default, all containers of the most recently accessed profile are used. '
+ f'Currently supported keyrings are: {", ".join(map(str.lower, sorted(SUPPORTED_KEYRINGS)))}'))
+ filesystem.add_option(
+ '--no-cookies-from-browser',
+ action='store_const', const=None, dest='cookiesfrombrowser',
+ help='Do not load cookies from browser (default)')
+ filesystem.add_option(
+ '--cache-dir', dest='cachedir', default=None, metavar='DIR',
+ help=(
+ 'Location in the filesystem where yt-dlp can store some downloaded information '
+ '(such as client ids and signatures) permanently. By default ${XDG_CACHE_HOME}/yt-dlp'))
+ filesystem.add_option(
+ '--no-cache-dir', action='store_false', dest='cachedir',
+ help='Disable filesystem caching')
+ filesystem.add_option(
+ '--rm-cache-dir',
+ action='store_true', dest='rm_cachedir',
+ help='Delete all filesystem cache files')
+
+ thumbnail = optparse.OptionGroup(parser, 'Thumbnail Options')
+ thumbnail.add_option(
+ '--write-thumbnail',
+ action='callback', dest='writethumbnail', default=False,
+ # Should override --no-write-thumbnail, but not --write-all-thumbnail
+ callback=lambda option, _, __, parser: setattr(
+ parser.values, option.dest, getattr(parser.values, option.dest) or True),
+ help='Write thumbnail image to disk')
+ thumbnail.add_option(
+ '--no-write-thumbnail',
+ action='store_false', dest='writethumbnail',
+ help='Do not write thumbnail image to disk (default)')
+ thumbnail.add_option(
+ '--write-all-thumbnails',
+ action='store_const', dest='writethumbnail', const='all',
+ help='Write all thumbnail image formats to disk')
+ thumbnail.add_option(
+ '--list-thumbnails',
+ action='store_true', dest='list_thumbnails', default=False,
+ help='List available thumbnails of each video. Simulate unless --no-simulate is used')
+
+ link = optparse.OptionGroup(parser, 'Internet Shortcut Options')
+ link.add_option(
+ '--write-link',
+ action='store_true', dest='writelink', default=False,
+ help='Write an internet shortcut file, depending on the current platform (.url, .webloc or .desktop). The URL may be cached by the OS')
+ link.add_option(
+ '--write-url-link',
+ action='store_true', dest='writeurllink', default=False,
+ help='Write a .url Windows internet shortcut. The OS caches the URL based on the file path')
+ link.add_option(
+ '--write-webloc-link',
+ action='store_true', dest='writewebloclink', default=False,
+ help='Write a .webloc macOS internet shortcut')
+ link.add_option(
+ '--write-desktop-link',
+ action='store_true', dest='writedesktoplink', default=False,
+ help='Write a .desktop Linux internet shortcut')
+
+ postproc = optparse.OptionGroup(parser, 'Post-Processing Options')
+ postproc.add_option(
+ '-x', '--extract-audio',
+ action='store_true', dest='extractaudio', default=False,
+ help='Convert video files to audio-only files (requires ffmpeg and ffprobe)')
+ postproc.add_option(
+ '--audio-format', metavar='FORMAT', dest='audioformat', default='best',
+ help=(
+ 'Format to convert the audio to when -x is used. '
+ f'(currently supported: best (default), {", ".join(sorted(FFmpegExtractAudioPP.SUPPORTED_EXTS))}). '
+ 'You can specify multiple rules using similar syntax as --remux-video'))
+ postproc.add_option(
+ '--audio-quality', metavar='QUALITY',
+ dest='audioquality', default='5',
+ help=(
+ 'Specify ffmpeg audio quality to use when converting the audio with -x. '
+ 'Insert a value between 0 (best) and 10 (worst) for VBR or a specific bitrate like 128K (default %default)'))
+ postproc.add_option(
+ '--remux-video',
+ metavar='FORMAT', dest='remuxvideo', default=None,
+ help=(
+ 'Remux the video into another container if necessary '
+ f'(currently supported: {", ".join(FFmpegVideoRemuxerPP.SUPPORTED_EXTS)}). '
+ 'If target container does not support the video/audio codec, remuxing will fail. You can specify multiple rules; '
+ 'e.g. "aac>m4a/mov>mp4/mkv" will remux aac to m4a, mov to mp4 and anything else to mkv'))
+ postproc.add_option(
+ '--recode-video',
+ metavar='FORMAT', dest='recodevideo', default=None,
+ help='Re-encode the video into another format if necessary. The syntax and supported formats are the same as --remux-video')
+ postproc.add_option(
+ '--postprocessor-args', '--ppa',
+ metavar='NAME:ARGS', dest='postprocessor_args', default={}, type='str',
+ action='callback', callback=_dict_from_options_callback,
+ callback_kwargs={
+ 'allowed_keys': r'\w+(?:\+\w+)?',
+ 'default_key': 'default-compat',
+ 'process': shlex.split,
+ 'multiple_keys': False
+ }, help=(
+ 'Give these arguments to the postprocessors. '
+ 'Specify the postprocessor/executable name and the arguments separated by a colon ":" '
+ 'to give the argument to the specified postprocessor/executable. Supported PP are: '
+ 'Merger, ModifyChapters, SplitChapters, ExtractAudio, VideoRemuxer, VideoConvertor, '
+ 'Metadata, EmbedSubtitle, EmbedThumbnail, SubtitlesConvertor, ThumbnailsConvertor, '
+ 'FixupStretched, FixupM4a, FixupM3u8, FixupTimestamp and FixupDuration. '
+ 'The supported executables are: AtomicParsley, FFmpeg and FFprobe. '
+ 'You can also specify "PP+EXE:ARGS" to give the arguments to the specified executable '
+ 'only when being used by the specified postprocessor. Additionally, for ffmpeg/ffprobe, '
+ '"_i"/"_o" can be appended to the prefix optionally followed by a number to pass the argument '
+ 'before the specified input/output file, e.g. --ppa "Merger+ffmpeg_i1:-v quiet". '
+ 'You can use this option multiple times to give different arguments to different '
+ 'postprocessors. (Alias: --ppa)'))
+ postproc.add_option(
+ '-k', '--keep-video',
+ action='store_true', dest='keepvideo', default=False,
+ help='Keep the intermediate video file on disk after post-processing')
+ postproc.add_option(
+ '--no-keep-video',
+ action='store_false', dest='keepvideo',
+ help='Delete the intermediate video file after post-processing (default)')
+ postproc.add_option(
+ '--post-overwrites',
+ action='store_false', dest='nopostoverwrites',
+ help='Overwrite post-processed files (default)')
+ postproc.add_option(
+ '--no-post-overwrites',
+ action='store_true', dest='nopostoverwrites', default=False,
+ help='Do not overwrite post-processed files')
+ postproc.add_option(
+ '--embed-subs',
+ action='store_true', dest='embedsubtitles', default=False,
+ help='Embed subtitles in the video (only for mp4, webm and mkv videos)')
+ postproc.add_option(
+ '--no-embed-subs',
+ action='store_false', dest='embedsubtitles',
+ help='Do not embed subtitles (default)')
+ postproc.add_option(
+ '--embed-thumbnail',
+ action='store_true', dest='embedthumbnail', default=False,
+ help='Embed thumbnail in the video as cover art')
+ postproc.add_option(
+ '--no-embed-thumbnail',
+ action='store_false', dest='embedthumbnail',
+ help='Do not embed thumbnail (default)')
+ postproc.add_option(
+ '--embed-metadata', '--add-metadata',
+ action='store_true', dest='addmetadata', default=False,
+ help=(
+ 'Embed metadata to the video file. Also embeds chapters/infojson if present '
+ 'unless --no-embed-chapters/--no-embed-info-json are used (Alias: --add-metadata)'))
+ postproc.add_option(
+ '--no-embed-metadata', '--no-add-metadata',
+ action='store_false', dest='addmetadata',
+ help='Do not add metadata to file (default) (Alias: --no-add-metadata)')
+ postproc.add_option(
+ '--embed-chapters', '--add-chapters',
+ action='store_true', dest='addchapters', default=None,
+ help='Add chapter markers to the video file (Alias: --add-chapters)')
+ postproc.add_option(
+ '--no-embed-chapters', '--no-add-chapters',
+ action='store_false', dest='addchapters',
+ help='Do not add chapter markers (default) (Alias: --no-add-chapters)')
+ postproc.add_option(
+ '--embed-info-json',
+ action='store_true', dest='embed_infojson', default=None,
+ help='Embed the infojson as an attachment to mkv/mka video files')
+ postproc.add_option(
+ '--no-embed-info-json',
+ action='store_false', dest='embed_infojson',
+ help='Do not embed the infojson as an attachment to the video file')
+ postproc.add_option(
+ '--metadata-from-title',
+ metavar='FORMAT', dest='metafromtitle',
+ help=optparse.SUPPRESS_HELP)
+ postproc.add_option(
+ '--parse-metadata',
+ metavar='[WHEN:]FROM:TO', dest='parse_metadata', **when_prefix('pre_process'),
+ help=(
+ 'Parse additional metadata like title/artist from other fields; see "MODIFYING METADATA" for details. '
+ 'Supported values of "WHEN" are the same as that of --use-postprocessor (default: pre_process)'))
+ postproc.add_option(
+ '--replace-in-metadata',
+ dest='parse_metadata', metavar='[WHEN:]FIELDS REGEX REPLACE', nargs=3, **when_prefix('pre_process'),
+ help=(
+ 'Replace text in a metadata field using the given regex. This option can be used multiple times. '
+ 'Supported values of "WHEN" are the same as that of --use-postprocessor (default: pre_process)'))
+ postproc.add_option(
+ '--xattrs', '--xattr',
+ action='store_true', dest='xattrs', default=False,
+ help='Write metadata to the video file\'s xattrs (using dublin core and xdg standards)')
+ postproc.add_option(
+ '--concat-playlist',
+ metavar='POLICY', dest='concat_playlist', default='multi_video',
+ choices=('never', 'always', 'multi_video'),
+ help=(
+ 'Concatenate videos in a playlist. One of "never", "always", or '
+ '"multi_video" (default; only when the videos form a single show). '
+ 'All the video files must have same codecs and number of streams to be concatable. '
+ 'The "pl_video:" prefix can be used with "--paths" and "--output" to '
+ 'set the output filename for the concatenated files. See "OUTPUT TEMPLATE" for details'))
+ postproc.add_option(
+ '--fixup',
+ metavar='POLICY', dest='fixup', default=None,
+ choices=('never', 'ignore', 'warn', 'detect_or_warn', 'force'),
+ help=(
+ 'Automatically correct known faults of the file. '
+ 'One of never (do nothing), warn (only emit a warning), '
+ 'detect_or_warn (the default; fix file if we can, warn otherwise), '
+ 'force (try fixing even if file already exists)'))
+ postproc.add_option(
+ '--prefer-avconv', '--no-prefer-ffmpeg',
+ action='store_false', dest='prefer_ffmpeg',
+ help=optparse.SUPPRESS_HELP)
+ postproc.add_option(
+ '--prefer-ffmpeg', '--no-prefer-avconv',
+ action='store_true', dest='prefer_ffmpeg', default=True,
+ help=optparse.SUPPRESS_HELP)
+ postproc.add_option(
+ '--ffmpeg-location', '--avconv-location', metavar='PATH',
+ dest='ffmpeg_location',
+ help='Location of the ffmpeg binary; either the path to the binary or its containing directory')
+ postproc.add_option(
+ '--exec',
+ metavar='[WHEN:]CMD', dest='exec_cmd', **when_prefix('after_move'),
+ help=(
+ 'Execute a command, optionally prefixed with when to execute it, separated by a ":". '
+ 'Supported values of "WHEN" are the same as that of --use-postprocessor (default: after_move). '
+ 'Same syntax as the output template can be used to pass any field as arguments to the command. '
+ 'If no fields are passed, %(filepath,_filename|)q is appended to the end of the command. '
+ 'This option can be used multiple times'))
+ postproc.add_option(
+ '--no-exec',
+ action='store_const', dest='exec_cmd', const={},
+ help='Remove any previously defined --exec')
+ postproc.add_option(
+ '--exec-before-download', metavar='CMD',
+ action='append', dest='exec_before_dl_cmd',
+ help=optparse.SUPPRESS_HELP)
+ postproc.add_option(
+ '--no-exec-before-download',
+ action='store_const', dest='exec_before_dl_cmd', const=None,
+ help=optparse.SUPPRESS_HELP)
+ postproc.add_option(
+ '--convert-subs', '--convert-sub', '--convert-subtitles',
+ metavar='FORMAT', dest='convertsubtitles', default=None,
+ help=(
+ 'Convert the subtitles to another format (currently supported: %s) '
+ '(Alias: --convert-subtitles)' % ', '.join(sorted(FFmpegSubtitlesConvertorPP.SUPPORTED_EXTS))))
+ postproc.add_option(
+ '--convert-thumbnails',
+ metavar='FORMAT', dest='convertthumbnails', default=None,
+ help=(
+ 'Convert the thumbnails to another format '
+ f'(currently supported: {", ".join(sorted(FFmpegThumbnailsConvertorPP.SUPPORTED_EXTS))}). '
+ 'You can specify multiple rules using similar syntax as --remux-video'))
+ postproc.add_option(
+ '--split-chapters', '--split-tracks',
+ dest='split_chapters', action='store_true', default=False,
+ help=(
+ 'Split video into multiple files based on internal chapters. '
+ 'The "chapter:" prefix can be used with "--paths" and "--output" to '
+ 'set the output filename for the split files. See "OUTPUT TEMPLATE" for details'))
+ postproc.add_option(
+ '--no-split-chapters', '--no-split-tracks',
+ dest='split_chapters', action='store_false',
+ help='Do not split video based on chapters (default)')
+ postproc.add_option(
+ '--remove-chapters',
+ metavar='REGEX', dest='remove_chapters', action='append',
+ help=(
+ 'Remove chapters whose title matches the given regular expression. '
+ 'The syntax is the same as --download-sections. This option can be used multiple times'))
+ postproc.add_option(
+ '--no-remove-chapters', dest='remove_chapters', action='store_const', const=None,
+ help='Do not remove any chapters from the file (default)')
+ postproc.add_option(
+ '--force-keyframes-at-cuts',
+ action='store_true', dest='force_keyframes_at_cuts', default=False,
+ help=(
+ 'Force keyframes at cuts when downloading/splitting/removing sections. '
+ 'This is slow due to needing a re-encode, but the resulting video may have fewer artifacts around the cuts'))
+ postproc.add_option(
+ '--no-force-keyframes-at-cuts',
+ action='store_false', dest='force_keyframes_at_cuts',
+ help='Do not force keyframes around the chapters when cutting/splitting (default)')
+ _postprocessor_opts_parser = lambda key, val='': (
+ *(item.split('=', 1) for item in (val.split(';') if val else [])),
+ ('key', remove_end(key, 'PP')))
+ postproc.add_option(
+ '--use-postprocessor',
+ metavar='NAME[:ARGS]', dest='add_postprocessors', default=[], type='str',
+ action='callback', callback=_list_from_options_callback,
+ callback_kwargs={
+ 'delim': None,
+ 'process': lambda val: dict(_postprocessor_opts_parser(*val.split(':', 1)))
+ }, help=(
+ 'The (case sensitive) name of plugin postprocessors to be enabled, '
+ 'and (optionally) arguments to be passed to it, separated by a colon ":". '
+ 'ARGS are a semicolon ";" delimited list of NAME=VALUE. '
+ 'The "when" argument determines when the postprocessor is invoked. '
+ 'It can be one of "pre_process" (after video extraction), "after_filter" (after video passes filter), '
+ '"video" (after --format; before --print/--output), "before_dl" (before each video download), '
+ '"post_process" (after each video download; default), '
+ '"after_move" (after moving video file to it\'s final locations), '
+ '"after_video" (after downloading and processing all formats of a video), '
+ 'or "playlist" (at end of playlist). '
+ 'This option can be used multiple times to add different postprocessors'))
+
+ sponsorblock = optparse.OptionGroup(parser, 'SponsorBlock Options', description=(
+ 'Make chapter entries for, or remove various segments (sponsor, introductions, etc.) '
+ 'from downloaded YouTube videos using the SponsorBlock API (https://sponsor.ajay.app)'))
+ sponsorblock.add_option(
+ '--sponsorblock-mark', metavar='CATS',
+ dest='sponsorblock_mark', default=set(), action='callback', type='str',
+ callback=_set_from_options_callback, callback_kwargs={
+ 'allowed_values': SponsorBlockPP.CATEGORIES.keys(),
+ 'aliases': {'default': ['all']}
+ }, help=(
+ 'SponsorBlock categories to create chapters for, separated by commas. '
+ f'Available categories are {", ".join(SponsorBlockPP.CATEGORIES.keys())}, all and default (=all). '
+ 'You can prefix the category with a "-" to exclude it. See [1] for description of the categories. '
+ 'E.g. --sponsorblock-mark all,-preview [1] https://wiki.sponsor.ajay.app/w/Segment_Categories'))
+ sponsorblock.add_option(
+ '--sponsorblock-remove', metavar='CATS',
+ dest='sponsorblock_remove', default=set(), action='callback', type='str',
+ callback=_set_from_options_callback, callback_kwargs={
+ 'allowed_values': set(SponsorBlockPP.CATEGORIES.keys()) - set(SponsorBlockPP.NON_SKIPPABLE_CATEGORIES.keys()),
+ # Note: From https://wiki.sponsor.ajay.app/w/Types:
+ # The filler category is very aggressive.
+ # It is strongly recommended to not use this in a client by default.
+ 'aliases': {'default': ['all', '-filler']}
+ }, help=(
+ 'SponsorBlock categories to be removed from the video file, separated by commas. '
+ 'If a category is present in both mark and remove, remove takes precedence. '
+ 'The syntax and available categories are the same as for --sponsorblock-mark '
+ 'except that "default" refers to "all,-filler" '
+ f'and {", ".join(SponsorBlockPP.NON_SKIPPABLE_CATEGORIES.keys())} are not available'))
+ sponsorblock.add_option(
+ '--sponsorblock-chapter-title', metavar='TEMPLATE',
+ default=DEFAULT_SPONSORBLOCK_CHAPTER_TITLE, dest='sponsorblock_chapter_title',
+ help=(
+ 'An output template for the title of the SponsorBlock chapters created by --sponsorblock-mark. '
+ 'The only available fields are start_time, end_time, category, categories, name, category_names. '
+ 'Defaults to "%default"'))
+ sponsorblock.add_option(
+ '--no-sponsorblock', default=False,
+ action='store_true', dest='no_sponsorblock',
+ help='Disable both --sponsorblock-mark and --sponsorblock-remove')
+ sponsorblock.add_option(
+ '--sponsorblock-api', metavar='URL',
+ default='https://sponsor.ajay.app', dest='sponsorblock_api',
+ help='SponsorBlock API location, defaults to %default')
+
+ sponsorblock.add_option(
+ '--sponskrub',
+ action='store_true', dest='sponskrub', default=False,
+ help=optparse.SUPPRESS_HELP)
+ sponsorblock.add_option(
+ '--no-sponskrub',
+ action='store_false', dest='sponskrub',
+ help=optparse.SUPPRESS_HELP)
+ sponsorblock.add_option(
+ '--sponskrub-cut', default=False,
+ action='store_true', dest='sponskrub_cut',
+ help=optparse.SUPPRESS_HELP)
+ sponsorblock.add_option(
+ '--no-sponskrub-cut',
+ action='store_false', dest='sponskrub_cut',
+ help=optparse.SUPPRESS_HELP)
+ sponsorblock.add_option(
+ '--sponskrub-force', default=False,
+ action='store_true', dest='sponskrub_force',
+ help=optparse.SUPPRESS_HELP)
+ sponsorblock.add_option(
+ '--no-sponskrub-force',
+ action='store_true', dest='sponskrub_force',
+ help=optparse.SUPPRESS_HELP)
+ sponsorblock.add_option(
+ '--sponskrub-location', metavar='PATH',
+ dest='sponskrub_path', default='',
+ help=optparse.SUPPRESS_HELP)
+ sponsorblock.add_option(
+ '--sponskrub-args', dest='sponskrub_args', metavar='ARGS',
+ help=optparse.SUPPRESS_HELP)
+
+ extractor = optparse.OptionGroup(parser, 'Extractor Options')
+ extractor.add_option(
+ '--extractor-retries',
+ dest='extractor_retries', metavar='RETRIES', default=3,
+ help='Number of retries for known extractor errors (default is %default), or "infinite"')
+ extractor.add_option(
+ '--allow-dynamic-mpd', '--no-ignore-dynamic-mpd',
+ action='store_true', dest='dynamic_mpd', default=True,
+ help='Process dynamic DASH manifests (default) (Alias: --no-ignore-dynamic-mpd)')
+ extractor.add_option(
+ '--ignore-dynamic-mpd', '--no-allow-dynamic-mpd',
+ action='store_false', dest='dynamic_mpd',
+ help='Do not process dynamic DASH manifests (Alias: --no-allow-dynamic-mpd)')
+ extractor.add_option(
+ '--hls-split-discontinuity',
+ dest='hls_split_discontinuity', action='store_true', default=False,
+ help='Split HLS playlists to different formats at discontinuities such as ad breaks'
+ )
+ extractor.add_option(
+ '--no-hls-split-discontinuity',
+ dest='hls_split_discontinuity', action='store_false',
+ help='Do not split HLS playlists to different formats at discontinuities such as ad breaks (default)')
+ _extractor_arg_parser = lambda key, vals='': (key.strip().lower().replace('-', '_'), [
+ val.replace(r'\,', ',').strip() for val in re.split(r'(?<!\\),', vals)])
+ extractor.add_option(
+ '--extractor-args',
+ metavar='IE_KEY:ARGS', dest='extractor_args', default={}, type='str',
+ action='callback', callback=_dict_from_options_callback,
+ callback_kwargs={
+ 'multiple_keys': False,
+ 'process': lambda val: dict(
+ _extractor_arg_parser(*arg.split('=', 1)) for arg in val.split(';'))
+ }, help=(
+ 'Pass ARGS arguments to the IE_KEY extractor. See "EXTRACTOR ARGUMENTS" for details. '
+ 'You can use this option multiple times to give arguments for different extractors'))
+ extractor.add_option(
+ '--youtube-include-dash-manifest', '--no-youtube-skip-dash-manifest',
+ action='store_true', dest='youtube_include_dash_manifest', default=True,
+ help=optparse.SUPPRESS_HELP)
+ extractor.add_option(
+ '--youtube-skip-dash-manifest', '--no-youtube-include-dash-manifest',
+ action='store_false', dest='youtube_include_dash_manifest',
+ help=optparse.SUPPRESS_HELP)
+ extractor.add_option(
+ '--youtube-include-hls-manifest', '--no-youtube-skip-hls-manifest',
+ action='store_true', dest='youtube_include_hls_manifest', default=True,
+ help=optparse.SUPPRESS_HELP)
+ extractor.add_option(
+ '--youtube-skip-hls-manifest', '--no-youtube-include-hls-manifest',
+ action='store_false', dest='youtube_include_hls_manifest',
+ help=optparse.SUPPRESS_HELP)
+
+ parser.add_option_group(general)
+ parser.add_option_group(network)
+ parser.add_option_group(geo)
+ parser.add_option_group(selection)
+ parser.add_option_group(downloader)
+ parser.add_option_group(filesystem)
+ parser.add_option_group(thumbnail)
+ parser.add_option_group(link)
+ parser.add_option_group(verbosity)
+ parser.add_option_group(workarounds)
+ parser.add_option_group(video_format)
+ parser.add_option_group(subtitles)
+ parser.add_option_group(authentication)
+ parser.add_option_group(postproc)
+ parser.add_option_group(sponsorblock)
+ parser.add_option_group(extractor)
+
+ return parser
+
+
+def _hide_login_info(opts):
+ deprecation_warning(f'"{__name__}._hide_login_info" is deprecated and may be removed '
+ 'in a future version. Use "yt_dlp.utils.Config.hide_login_info" instead')
+ return Config.hide_login_info(opts)
diff --git a/yt_dlp/plugins.py b/yt_dlp/plugins.py
new file mode 100644
index 0000000..3cc879f
--- /dev/null
+++ b/yt_dlp/plugins.py
@@ -0,0 +1,176 @@
+import contextlib
+import importlib
+import importlib.abc
+import importlib.machinery
+import importlib.util
+import inspect
+import itertools
+import pkgutil
+import sys
+import traceback
+import zipimport
+from pathlib import Path
+from zipfile import ZipFile
+
+from .compat import functools # isort: split
+from .utils import (
+ get_executable_path,
+ get_system_config_dirs,
+ get_user_config_dirs,
+ orderedSet,
+ write_string,
+)
+
+PACKAGE_NAME = 'yt_dlp_plugins'
+COMPAT_PACKAGE_NAME = 'ytdlp_plugins'
+
+
+class PluginLoader(importlib.abc.Loader):
+ """Dummy loader for virtual namespace packages"""
+
+ def exec_module(self, module):
+ return None
+
+
+@functools.cache
+def dirs_in_zip(archive):
+ try:
+ with ZipFile(archive) as zip_:
+ return set(itertools.chain.from_iterable(
+ Path(file).parents for file in zip_.namelist()))
+ except FileNotFoundError:
+ pass
+ except Exception as e:
+ write_string(f'WARNING: Could not read zip file {archive}: {e}\n')
+ return set()
+
+
+class PluginFinder(importlib.abc.MetaPathFinder):
+ """
+ This class provides one or multiple namespace packages.
+ It searches in sys.path and yt-dlp config folders for
+ the existing subdirectories from which the modules can be imported
+ """
+
+ def __init__(self, *packages):
+ self._zip_content_cache = {}
+ self.packages = set(itertools.chain.from_iterable(
+ itertools.accumulate(name.split('.'), lambda a, b: '.'.join((a, b)))
+ for name in packages))
+
+ def search_locations(self, fullname):
+ candidate_locations = []
+
+ def _get_package_paths(*root_paths, containing_folder='plugins'):
+ for config_dir in orderedSet(map(Path, root_paths), lazy=True):
+ with contextlib.suppress(OSError):
+ yield from (config_dir / containing_folder).iterdir()
+
+ # Load from yt-dlp config folders
+ candidate_locations.extend(_get_package_paths(
+ *get_user_config_dirs('yt-dlp'),
+ *get_system_config_dirs('yt-dlp'),
+ containing_folder='plugins'))
+
+ # Load from yt-dlp-plugins folders
+ candidate_locations.extend(_get_package_paths(
+ get_executable_path(),
+ *get_user_config_dirs(''),
+ *get_system_config_dirs(''),
+ containing_folder='yt-dlp-plugins'))
+
+ candidate_locations.extend(map(Path, sys.path)) # PYTHONPATH
+ with contextlib.suppress(ValueError): # Added when running __main__.py directly
+ candidate_locations.remove(Path(__file__).parent)
+
+ parts = Path(*fullname.split('.'))
+ for path in orderedSet(candidate_locations, lazy=True):
+ candidate = path / parts
+ try:
+ if candidate.is_dir():
+ yield candidate
+ elif path.suffix in ('.zip', '.egg', '.whl') and path.is_file():
+ if parts in dirs_in_zip(path):
+ yield candidate
+ except PermissionError as e:
+ write_string(f'Permission error while accessing modules in "{e.filename}"\n')
+
+ def find_spec(self, fullname, path=None, target=None):
+ if fullname not in self.packages:
+ return None
+
+ search_locations = list(map(str, self.search_locations(fullname)))
+ if not search_locations:
+ return None
+
+ spec = importlib.machinery.ModuleSpec(fullname, PluginLoader(), is_package=True)
+ spec.submodule_search_locations = search_locations
+ return spec
+
+ def invalidate_caches(self):
+ dirs_in_zip.cache_clear()
+ for package in self.packages:
+ if package in sys.modules:
+ del sys.modules[package]
+
+
+def directories():
+ spec = importlib.util.find_spec(PACKAGE_NAME)
+ return spec.submodule_search_locations if spec else []
+
+
+def iter_modules(subpackage):
+ fullname = f'{PACKAGE_NAME}.{subpackage}'
+ with contextlib.suppress(ModuleNotFoundError):
+ pkg = importlib.import_module(fullname)
+ yield from pkgutil.iter_modules(path=pkg.__path__, prefix=f'{fullname}.')
+
+
+def load_module(module, module_name, suffix):
+ return inspect.getmembers(module, lambda obj: (
+ inspect.isclass(obj)
+ and obj.__name__.endswith(suffix)
+ and obj.__module__.startswith(module_name)
+ and not obj.__name__.startswith('_')
+ and obj.__name__ in getattr(module, '__all__', [obj.__name__])))
+
+
+def load_plugins(name, suffix):
+ classes = {}
+
+ for finder, module_name, _ in iter_modules(name):
+ if any(x.startswith('_') for x in module_name.split('.')):
+ continue
+ try:
+ if sys.version_info < (3, 10) and isinstance(finder, zipimport.zipimporter):
+ # zipimporter.load_module() is deprecated in 3.10 and removed in 3.12
+ # The exec_module branch below is the replacement for >= 3.10
+ # See: https://docs.python.org/3/library/zipimport.html#zipimport.zipimporter.exec_module
+ module = finder.load_module(module_name)
+ else:
+ spec = finder.find_spec(module_name)
+ module = importlib.util.module_from_spec(spec)
+ sys.modules[module_name] = module
+ spec.loader.exec_module(module)
+ except Exception:
+ write_string(f'Error while importing module {module_name!r}\n{traceback.format_exc(limit=-1)}')
+ continue
+ classes.update(load_module(module, module_name, suffix))
+
+ # Compat: old plugin system using __init__.py
+ # Note: plugins imported this way do not show up in directories()
+ # nor are considered part of the yt_dlp_plugins namespace package
+ with contextlib.suppress(FileNotFoundError):
+ spec = importlib.util.spec_from_file_location(
+ name, Path(get_executable_path(), COMPAT_PACKAGE_NAME, name, '__init__.py'))
+ plugins = importlib.util.module_from_spec(spec)
+ sys.modules[spec.name] = plugins
+ spec.loader.exec_module(plugins)
+ classes.update(load_module(plugins, spec.name, suffix))
+
+ return classes
+
+
+sys.meta_path.insert(0, PluginFinder(f'{PACKAGE_NAME}.extractor', f'{PACKAGE_NAME}.postprocessor'))
+
+__all__ = ['directories', 'load_plugins', 'PACKAGE_NAME', 'COMPAT_PACKAGE_NAME']
diff --git a/yt_dlp/postprocessor/__init__.py b/yt_dlp/postprocessor/__init__.py
new file mode 100644
index 0000000..bfe9df7
--- /dev/null
+++ b/yt_dlp/postprocessor/__init__.py
@@ -0,0 +1,47 @@
+# flake8: noqa: F401
+
+from .common import PostProcessor
+from .embedthumbnail import EmbedThumbnailPP
+from .exec import ExecAfterDownloadPP, ExecPP
+from .ffmpeg import (
+ FFmpegConcatPP,
+ FFmpegCopyStreamPP,
+ FFmpegEmbedSubtitlePP,
+ FFmpegExtractAudioPP,
+ FFmpegFixupDuplicateMoovPP,
+ FFmpegFixupDurationPP,
+ FFmpegFixupM3u8PP,
+ FFmpegFixupM4aPP,
+ FFmpegFixupStretchedPP,
+ FFmpegFixupTimestampPP,
+ FFmpegMergerPP,
+ FFmpegMetadataPP,
+ FFmpegPostProcessor,
+ FFmpegSplitChaptersPP,
+ FFmpegSubtitlesConvertorPP,
+ FFmpegThumbnailsConvertorPP,
+ FFmpegVideoConvertorPP,
+ FFmpegVideoRemuxerPP,
+)
+from .metadataparser import (
+ MetadataFromFieldPP,
+ MetadataFromTitlePP,
+ MetadataParserPP,
+)
+from .modify_chapters import ModifyChaptersPP
+from .movefilesafterdownload import MoveFilesAfterDownloadPP
+from .sponskrub import SponSkrubPP
+from .sponsorblock import SponsorBlockPP
+from .xattrpp import XAttrMetadataPP
+from ..plugins import load_plugins
+
+_PLUGIN_CLASSES = load_plugins('postprocessor', 'PP')
+
+
+def get_postprocessor(key):
+ return globals()[key + 'PP']
+
+
+globals().update(_PLUGIN_CLASSES)
+__all__ = [name for name in globals().keys() if name.endswith('PP')]
+__all__.extend(('PostProcessor', 'FFmpegPostProcessor'))
diff --git a/yt_dlp/postprocessor/common.py b/yt_dlp/postprocessor/common.py
new file mode 100644
index 0000000..8cef86c
--- /dev/null
+++ b/yt_dlp/postprocessor/common.py
@@ -0,0 +1,215 @@
+import functools
+import json
+import os
+
+from ..networking import Request
+from ..networking.exceptions import HTTPError, network_exceptions
+from ..utils import (
+ PostProcessingError,
+ RetryManager,
+ _configuration_args,
+ deprecation_warning,
+ encodeFilename,
+)
+
+
+class PostProcessorMetaClass(type):
+ @staticmethod
+ def run_wrapper(func):
+ @functools.wraps(func)
+ def run(self, info, *args, **kwargs):
+ info_copy = self._copy_infodict(info)
+ self._hook_progress({'status': 'started'}, info_copy)
+ ret = func(self, info, *args, **kwargs)
+ if ret is not None:
+ _, info = ret
+ self._hook_progress({'status': 'finished'}, info_copy)
+ return ret
+ return run
+
+ def __new__(cls, name, bases, attrs):
+ if 'run' in attrs:
+ attrs['run'] = cls.run_wrapper(attrs['run'])
+ return type.__new__(cls, name, bases, attrs)
+
+
+class PostProcessor(metaclass=PostProcessorMetaClass):
+ """Post Processor class.
+
+ PostProcessor objects can be added to downloaders with their
+ add_post_processor() method. When the downloader has finished a
+ successful download, it will take its internal chain of PostProcessors
+ and start calling the run() method on each one of them, first with
+ an initial argument and then with the returned value of the previous
+ PostProcessor.
+
+ PostProcessor objects follow a "mutual registration" process similar
+ to InfoExtractor objects.
+
+ Optionally PostProcessor can use a list of additional command-line arguments
+ with self._configuration_args.
+ """
+
+ _downloader = None
+
+ def __init__(self, downloader=None):
+ self._progress_hooks = []
+ self.add_progress_hook(self.report_progress)
+ self.set_downloader(downloader)
+ self.PP_NAME = self.pp_key()
+
+ @classmethod
+ def pp_key(cls):
+ name = cls.__name__[:-2]
+ return name[6:] if name[:6].lower() == 'ffmpeg' else name
+
+ def to_screen(self, text, prefix=True, *args, **kwargs):
+ if self._downloader:
+ tag = '[%s] ' % self.PP_NAME if prefix else ''
+ return self._downloader.to_screen(f'{tag}{text}', *args, **kwargs)
+
+ def report_warning(self, text, *args, **kwargs):
+ if self._downloader:
+ return self._downloader.report_warning(text, *args, **kwargs)
+
+ def deprecation_warning(self, msg):
+ warn = getattr(self._downloader, 'deprecation_warning', deprecation_warning)
+ return warn(msg, stacklevel=1)
+
+ def deprecated_feature(self, msg):
+ if self._downloader:
+ return self._downloader.deprecated_feature(msg)
+ return deprecation_warning(msg, stacklevel=1)
+
+ def report_error(self, text, *args, **kwargs):
+ self.deprecation_warning('"yt_dlp.postprocessor.PostProcessor.report_error" is deprecated. '
+ 'raise "yt_dlp.utils.PostProcessingError" instead')
+ if self._downloader:
+ return self._downloader.report_error(text, *args, **kwargs)
+
+ def write_debug(self, text, *args, **kwargs):
+ if self._downloader:
+ return self._downloader.write_debug(text, *args, **kwargs)
+
+ def _delete_downloaded_files(self, *files_to_delete, **kwargs):
+ if self._downloader:
+ return self._downloader._delete_downloaded_files(*files_to_delete, **kwargs)
+ for filename in set(filter(None, files_to_delete)):
+ os.remove(filename)
+
+ def get_param(self, name, default=None, *args, **kwargs):
+ if self._downloader:
+ return self._downloader.params.get(name, default, *args, **kwargs)
+ return default
+
+ def set_downloader(self, downloader):
+ """Sets the downloader for this PP."""
+ self._downloader = downloader
+ for ph in getattr(downloader, '_postprocessor_hooks', []):
+ self.add_progress_hook(ph)
+
+ def _copy_infodict(self, info_dict):
+ return getattr(self._downloader, '_copy_infodict', dict)(info_dict)
+
+ @staticmethod
+ def _restrict_to(*, video=True, audio=True, images=True, simulated=True):
+ allowed = {'video': video, 'audio': audio, 'images': images}
+
+ def decorator(func):
+ @functools.wraps(func)
+ def wrapper(self, info):
+ if not simulated and (self.get_param('simulate') or self.get_param('skip_download')):
+ return [], info
+ format_type = (
+ 'video' if info.get('vcodec') != 'none'
+ else 'audio' if info.get('acodec') != 'none'
+ else 'images')
+ if allowed[format_type]:
+ return func(self, info)
+ else:
+ self.to_screen('Skipping %s' % format_type)
+ return [], info
+ return wrapper
+ return decorator
+
+ def run(self, information):
+ """Run the PostProcessor.
+
+ The "information" argument is a dictionary like the ones
+ composed by InfoExtractors. The only difference is that this
+ one has an extra field called "filepath" that points to the
+ downloaded file.
+
+ This method returns a tuple, the first element is a list of the files
+ that can be deleted, and the second of which is the updated
+ information.
+
+ In addition, this method may raise a PostProcessingError
+ exception if post processing fails.
+ """
+ return [], information # by default, keep file and do nothing
+
+ def try_utime(self, path, atime, mtime, errnote='Cannot update utime of file'):
+ try:
+ os.utime(encodeFilename(path), (atime, mtime))
+ except Exception:
+ self.report_warning(errnote)
+
+ def _configuration_args(self, exe, *args, **kwargs):
+ return _configuration_args(
+ self.pp_key(), self.get_param('postprocessor_args'), exe, *args, **kwargs)
+
+ def _hook_progress(self, status, info_dict):
+ if not self._progress_hooks:
+ return
+ status.update({
+ 'info_dict': info_dict,
+ 'postprocessor': self.pp_key(),
+ })
+ for ph in self._progress_hooks:
+ ph(status)
+
+ def add_progress_hook(self, ph):
+ # See YoutubeDl.py (search for postprocessor_hooks) for a description of this interface
+ self._progress_hooks.append(ph)
+
+ def report_progress(self, s):
+ s['_default_template'] = '%(postprocessor)s %(status)s' % s
+ if not self._downloader:
+ return
+
+ progress_dict = s.copy()
+ progress_dict.pop('info_dict')
+ progress_dict = {'info': s['info_dict'], 'progress': progress_dict}
+
+ progress_template = self.get_param('progress_template', {})
+ tmpl = progress_template.get('postprocess')
+ if tmpl:
+ self._downloader.to_screen(
+ self._downloader.evaluate_outtmpl(tmpl, progress_dict), quiet=False)
+
+ self._downloader.to_console_title(self._downloader.evaluate_outtmpl(
+ progress_template.get('postprocess-title') or 'yt-dlp %(progress._default_template)s',
+ progress_dict))
+
+ def _retry_download(self, err, count, retries):
+ # While this is not an extractor, it behaves similar to one and
+ # so obey extractor_retries and "--retry-sleep extractor"
+ RetryManager.report_retry(err, count, retries, info=self.to_screen, warn=self.report_warning,
+ sleep_func=self.get_param('retry_sleep_functions', {}).get('extractor'))
+
+ def _download_json(self, url, *, expected_http_errors=(404,)):
+ self.write_debug(f'{self.PP_NAME} query: {url}')
+ for retry in RetryManager(self.get_param('extractor_retries', 3), self._retry_download):
+ try:
+ rsp = self._downloader.urlopen(Request(url))
+ except network_exceptions as e:
+ if isinstance(e, HTTPError) and e.status in expected_http_errors:
+ return None
+ retry.error = PostProcessingError(f'Unable to communicate with {self.PP_NAME} API: {e}')
+ continue
+ return json.loads(rsp.read().decode(rsp.headers.get_param('charset') or 'utf-8'))
+
+
+class AudioConversionError(PostProcessingError): # Deprecated
+ pass
diff --git a/yt_dlp/postprocessor/embedthumbnail.py b/yt_dlp/postprocessor/embedthumbnail.py
new file mode 100644
index 0000000..9c53729
--- /dev/null
+++ b/yt_dlp/postprocessor/embedthumbnail.py
@@ -0,0 +1,227 @@
+import base64
+import os
+import re
+import subprocess
+
+from .common import PostProcessor
+from .ffmpeg import FFmpegPostProcessor, FFmpegThumbnailsConvertorPP
+from ..compat import imghdr
+from ..dependencies import mutagen
+from ..utils import (
+ Popen,
+ PostProcessingError,
+ check_executable,
+ encodeArgument,
+ encodeFilename,
+ error_to_compat_str,
+ prepend_extension,
+ shell_quote,
+)
+
+if mutagen:
+ from mutagen.flac import FLAC, Picture
+ from mutagen.mp4 import MP4, MP4Cover
+ from mutagen.oggopus import OggOpus
+ from mutagen.oggvorbis import OggVorbis
+
+
+class EmbedThumbnailPPError(PostProcessingError):
+ pass
+
+
+class EmbedThumbnailPP(FFmpegPostProcessor):
+
+ def __init__(self, downloader=None, already_have_thumbnail=False):
+ FFmpegPostProcessor.__init__(self, downloader)
+ self._already_have_thumbnail = already_have_thumbnail
+
+ def _get_thumbnail_resolution(self, filename, thumbnail_dict):
+ def guess():
+ width, height = thumbnail_dict.get('width'), thumbnail_dict.get('height')
+ if width and height:
+ return width, height
+
+ try:
+ size_regex = r',\s*(?P<w>\d+)x(?P<h>\d+)\s*[,\[]'
+ size_result = self.run_ffmpeg(filename, None, ['-hide_banner'], expected_retcodes=(1,))
+ mobj = re.search(size_regex, size_result)
+ if mobj is None:
+ return guess()
+ except PostProcessingError as err:
+ self.report_warning('unable to find the thumbnail resolution; %s' % error_to_compat_str(err))
+ return guess()
+ return int(mobj.group('w')), int(mobj.group('h'))
+
+ def _report_run(self, exe, filename):
+ self.to_screen(f'{exe}: Adding thumbnail to "{filename}"')
+
+ @PostProcessor._restrict_to(images=False)
+ def run(self, info):
+ filename = info['filepath']
+ temp_filename = prepend_extension(filename, 'temp')
+
+ if not info.get('thumbnails'):
+ self.to_screen('There aren\'t any thumbnails to embed')
+ return [], info
+
+ idx = next((-i for i, t in enumerate(info['thumbnails'][::-1], 1) if t.get('filepath')), None)
+ if idx is None:
+ self.to_screen('There are no thumbnails on disk')
+ return [], info
+ thumbnail_filename = info['thumbnails'][idx]['filepath']
+ if not os.path.exists(encodeFilename(thumbnail_filename)):
+ self.report_warning('Skipping embedding the thumbnail because the file is missing.')
+ return [], info
+
+ # Correct extension for WebP file with wrong extension (see #25687, #25717)
+ convertor = FFmpegThumbnailsConvertorPP(self._downloader)
+ convertor.fixup_webp(info, idx)
+
+ original_thumbnail = thumbnail_filename = info['thumbnails'][idx]['filepath']
+
+ # Convert unsupported thumbnail formats (see #25687, #25717)
+ # PNG is preferred since JPEG is lossy
+ thumbnail_ext = os.path.splitext(thumbnail_filename)[1][1:]
+ if info['ext'] not in ('mkv', 'mka') and thumbnail_ext not in ('jpg', 'jpeg', 'png'):
+ thumbnail_filename = convertor.convert_thumbnail(thumbnail_filename, 'png')
+ thumbnail_ext = 'png'
+
+ mtime = os.stat(encodeFilename(filename)).st_mtime
+
+ success = True
+ if info['ext'] == 'mp3':
+ options = [
+ '-c', 'copy', '-map', '0:0', '-map', '1:0', '-write_id3v1', '1', '-id3v2_version', '3',
+ '-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment=Cover (front)']
+
+ self._report_run('ffmpeg', filename)
+ self.run_ffmpeg_multiple_files([filename, thumbnail_filename], temp_filename, options)
+
+ elif info['ext'] in ['mkv', 'mka']:
+ options = list(self.stream_copy_opts())
+
+ mimetype = f'image/{thumbnail_ext.replace("jpg", "jpeg")}'
+ old_stream, new_stream = self.get_stream_number(
+ filename, ('tags', 'mimetype'), mimetype)
+ if old_stream is not None:
+ options.extend(['-map', '-0:%d' % old_stream])
+ new_stream -= 1
+ options.extend([
+ '-attach', self._ffmpeg_filename_argument(thumbnail_filename),
+ '-metadata:s:%d' % new_stream, 'mimetype=%s' % mimetype,
+ '-metadata:s:%d' % new_stream, 'filename=cover.%s' % thumbnail_ext])
+
+ self._report_run('ffmpeg', filename)
+ self.run_ffmpeg(filename, temp_filename, options)
+
+ elif info['ext'] in ['m4a', 'mp4', 'm4v', 'mov']:
+ prefer_atomicparsley = 'embed-thumbnail-atomicparsley' in self.get_param('compat_opts', [])
+ # Method 1: Use mutagen
+ if not mutagen or prefer_atomicparsley:
+ success = False
+ else:
+ try:
+ self._report_run('mutagen', filename)
+ meta = MP4(filename)
+ # NOTE: the 'covr' atom is a non-standard MPEG-4 atom,
+ # Apple iTunes 'M4A' files include the 'moov.udta.meta.ilst' atom.
+ f = {'jpeg': MP4Cover.FORMAT_JPEG, 'png': MP4Cover.FORMAT_PNG}[imghdr.what(thumbnail_filename)]
+ with open(thumbnail_filename, 'rb') as thumbfile:
+ thumb_data = thumbfile.read()
+ meta.tags['covr'] = [MP4Cover(data=thumb_data, imageformat=f)]
+ meta.save()
+ temp_filename = filename
+ except Exception as err:
+ self.report_warning('unable to embed using mutagen; %s' % error_to_compat_str(err))
+ success = False
+
+ # Method 2: Use AtomicParsley
+ if not success:
+ success = True
+ atomicparsley = next((
+ # libatomicparsley.so : See https://github.com/xibr/ytdlp-lazy/issues/1
+ x for x in ['AtomicParsley', 'atomicparsley', 'libatomicparsley.so']
+ if check_executable(x, ['-v'])), None)
+ if atomicparsley is None:
+ self.to_screen('Neither mutagen nor AtomicParsley was found. Falling back to ffmpeg')
+ success = False
+ else:
+ if not prefer_atomicparsley:
+ self.to_screen('mutagen was not found. Falling back to AtomicParsley')
+ cmd = [encodeFilename(atomicparsley, True),
+ encodeFilename(filename, True),
+ encodeArgument('--artwork'),
+ encodeFilename(thumbnail_filename, True),
+ encodeArgument('-o'),
+ encodeFilename(temp_filename, True)]
+ cmd += [encodeArgument(o) for o in self._configuration_args('AtomicParsley')]
+
+ self._report_run('atomicparsley', filename)
+ self.write_debug('AtomicParsley command line: %s' % shell_quote(cmd))
+ stdout, stderr, returncode = Popen.run(cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ if returncode:
+ self.report_warning(f'Unable to embed thumbnails using AtomicParsley; {stderr.strip()}')
+ # for formats that don't support thumbnails (like 3gp) AtomicParsley
+ # won't create to the temporary file
+ if 'No changes' in stdout:
+ self.report_warning('The file format doesn\'t support embedding a thumbnail')
+ success = False
+
+ # Method 3: Use ffmpeg+ffprobe
+ # Thumbnails attached using this method doesn't show up as cover in some cases
+ # See https://github.com/yt-dlp/yt-dlp/issues/2125, https://github.com/yt-dlp/yt-dlp/issues/411
+ if not success:
+ success = True
+ try:
+ options = [*self.stream_copy_opts(), '-map', '1']
+
+ old_stream, new_stream = self.get_stream_number(
+ filename, ('disposition', 'attached_pic'), 1)
+ if old_stream is not None:
+ options.extend(['-map', '-0:%d' % old_stream])
+ new_stream -= 1
+ options.extend(['-disposition:%s' % new_stream, 'attached_pic'])
+
+ self._report_run('ffmpeg', filename)
+ self.run_ffmpeg_multiple_files([filename, thumbnail_filename], temp_filename, options)
+ except PostProcessingError as err:
+ success = False
+ raise EmbedThumbnailPPError(f'Unable to embed using ffprobe & ffmpeg; {err}')
+
+ elif info['ext'] in ['ogg', 'opus', 'flac']:
+ if not mutagen:
+ raise EmbedThumbnailPPError('module mutagen was not found. Please install using `python3 -m pip install mutagen`')
+
+ self._report_run('mutagen', filename)
+ f = {'opus': OggOpus, 'flac': FLAC, 'ogg': OggVorbis}[info['ext']](filename)
+
+ pic = Picture()
+ pic.mime = 'image/%s' % imghdr.what(thumbnail_filename)
+ with open(thumbnail_filename, 'rb') as thumbfile:
+ pic.data = thumbfile.read()
+ pic.type = 3 # front cover
+ res = self._get_thumbnail_resolution(thumbnail_filename, info['thumbnails'][idx])
+ if res is not None:
+ pic.width, pic.height = res
+
+ if info['ext'] == 'flac':
+ f.add_picture(pic)
+ else:
+ # https://wiki.xiph.org/VorbisComment#METADATA_BLOCK_PICTURE
+ f['METADATA_BLOCK_PICTURE'] = base64.b64encode(pic.write()).decode('ascii')
+ f.save()
+ temp_filename = filename
+
+ else:
+ raise EmbedThumbnailPPError('Supported filetypes for thumbnail embedding are: mp3, mkv/mka, ogg/opus/flac, m4a/mp4/m4v/mov')
+
+ if success and temp_filename != filename:
+ os.replace(temp_filename, filename)
+
+ self.try_utime(filename, mtime, mtime)
+ converted = original_thumbnail != thumbnail_filename
+ self._delete_downloaded_files(
+ thumbnail_filename if converted or not self._already_have_thumbnail else None,
+ original_thumbnail if converted and not self._already_have_thumbnail else None,
+ info=info)
+ return [], info
diff --git a/yt_dlp/postprocessor/exec.py b/yt_dlp/postprocessor/exec.py
new file mode 100644
index 0000000..c2e73fb
--- /dev/null
+++ b/yt_dlp/postprocessor/exec.py
@@ -0,0 +1,41 @@
+from .common import PostProcessor
+from ..compat import compat_shlex_quote
+from ..utils import Popen, PostProcessingError, variadic
+
+
+class ExecPP(PostProcessor):
+
+ def __init__(self, downloader, exec_cmd):
+ PostProcessor.__init__(self, downloader)
+ self.exec_cmd = variadic(exec_cmd)
+
+ def parse_cmd(self, cmd, info):
+ tmpl, tmpl_dict = self._downloader.prepare_outtmpl(cmd, info)
+ if tmpl_dict: # if there are no replacements, tmpl_dict = {}
+ return self._downloader.escape_outtmpl(tmpl) % tmpl_dict
+
+ filepath = info.get('filepath', info.get('_filename'))
+ # If video, and no replacements are found, replace {} for backard compatibility
+ if filepath:
+ if '{}' not in cmd:
+ cmd += ' {}'
+ cmd = cmd.replace('{}', compat_shlex_quote(filepath))
+ return cmd
+
+ def run(self, info):
+ for tmpl in self.exec_cmd:
+ cmd = self.parse_cmd(tmpl, info)
+ self.to_screen(f'Executing command: {cmd}')
+ _, _, return_code = Popen.run(cmd, shell=True)
+ if return_code != 0:
+ raise PostProcessingError(f'Command returned error code {return_code}')
+ return [], info
+
+
+# Deprecated
+class ExecAfterDownloadPP(ExecPP):
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.deprecation_warning(
+ 'yt_dlp.postprocessor.ExecAfterDownloadPP is deprecated '
+ 'and may be removed in a future version. Use yt_dlp.postprocessor.ExecPP instead')
diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py
new file mode 100644
index 0000000..7d7f3f0
--- /dev/null
+++ b/yt_dlp/postprocessor/ffmpeg.py
@@ -0,0 +1,1192 @@
+import collections
+import contextvars
+import itertools
+import json
+import os
+import re
+import subprocess
+import time
+
+from .common import PostProcessor
+from ..compat import functools, imghdr
+from ..utils import (
+ MEDIA_EXTENSIONS,
+ ISO639Utils,
+ Popen,
+ PostProcessingError,
+ _get_exe_version_output,
+ deprecation_warning,
+ detect_exe_version,
+ determine_ext,
+ dfxp2srt,
+ encodeArgument,
+ encodeFilename,
+ filter_dict,
+ float_or_none,
+ is_outdated_version,
+ orderedSet,
+ prepend_extension,
+ replace_extension,
+ shell_quote,
+ traverse_obj,
+ variadic,
+ write_json_file,
+)
+
+EXT_TO_OUT_FORMATS = {
+ 'aac': 'adts',
+ 'flac': 'flac',
+ 'm4a': 'ipod',
+ 'mka': 'matroska',
+ 'mkv': 'matroska',
+ 'mpg': 'mpeg',
+ 'ogv': 'ogg',
+ 'ts': 'mpegts',
+ 'wma': 'asf',
+ 'wmv': 'asf',
+ 'weba': 'webm',
+ 'vtt': 'webvtt',
+}
+ACODECS = {
+ # name: (ext, encoder, opts)
+ 'mp3': ('mp3', 'libmp3lame', ()),
+ 'aac': ('m4a', 'aac', ('-f', 'adts')),
+ 'm4a': ('m4a', 'aac', ('-bsf:a', 'aac_adtstoasc')),
+ 'opus': ('opus', 'libopus', ()),
+ 'vorbis': ('ogg', 'libvorbis', ()),
+ 'flac': ('flac', 'flac', ()),
+ 'alac': ('m4a', None, ('-acodec', 'alac')),
+ 'wav': ('wav', None, ('-f', 'wav')),
+}
+
+
+def create_mapping_re(supported):
+ return re.compile(r'{0}(?:/{0})*$'.format(r'(?:\s*\w+\s*>)?\s*(?:%s)\s*' % '|'.join(supported)))
+
+
+def resolve_mapping(source, mapping):
+ """
+ Get corresponding item from a mapping string like 'A>B/C>D/E'
+ @returns (target, error_message)
+ """
+ for pair in mapping.lower().split('/'):
+ kv = pair.split('>', 1)
+ if len(kv) == 1 or kv[0].strip() == source:
+ target = kv[-1].strip()
+ if target == source:
+ return target, f'already is in target format {source}'
+ return target, None
+ return None, f'could not find a mapping for {source}'
+
+
+class FFmpegPostProcessorError(PostProcessingError):
+ pass
+
+
+class FFmpegPostProcessor(PostProcessor):
+ _ffmpeg_location = contextvars.ContextVar('ffmpeg_location', default=None)
+
+ def __init__(self, downloader=None):
+ PostProcessor.__init__(self, downloader)
+ self._prefer_ffmpeg = self.get_param('prefer_ffmpeg', True)
+ self._paths = self._determine_executables()
+
+ @staticmethod
+ def get_versions_and_features(downloader=None):
+ pp = FFmpegPostProcessor(downloader)
+ return pp._versions, pp._features
+
+ @staticmethod
+ def get_versions(downloader=None):
+ return FFmpegPostProcessor.get_versions_and_features(downloader)[0]
+
+ _ffmpeg_to_avconv = {'ffmpeg': 'avconv', 'ffprobe': 'avprobe'}
+
+ def _determine_executables(self):
+ programs = [*self._ffmpeg_to_avconv.keys(), *self._ffmpeg_to_avconv.values()]
+
+ location = self.get_param('ffmpeg_location', self._ffmpeg_location.get())
+ if location is None:
+ return {p: p for p in programs}
+
+ if not os.path.exists(location):
+ self.report_warning(
+ f'ffmpeg-location {location} does not exist! Continuing without ffmpeg', only_once=True)
+ return {}
+ elif os.path.isdir(location):
+ dirname, basename, filename = location, None, None
+ else:
+ filename = os.path.basename(location)
+ basename = next((p for p in programs if p in filename), 'ffmpeg')
+ dirname = os.path.dirname(os.path.abspath(location))
+ if basename in self._ffmpeg_to_avconv.keys():
+ self._prefer_ffmpeg = True
+
+ paths = {p: os.path.join(dirname, p) for p in programs}
+ if basename and basename in filename:
+ for p in programs:
+ path = os.path.join(dirname, filename.replace(basename, p))
+ if os.path.exists(path):
+ paths[p] = path
+ if basename:
+ paths[basename] = location
+ return paths
+
+ _version_cache, _features_cache = {None: None}, {}
+
+ def _get_ffmpeg_version(self, prog):
+ path = self._paths.get(prog)
+ if path in self._version_cache:
+ return self._version_cache[path], self._features_cache.get(path, {})
+ out = _get_exe_version_output(path, ['-bsfs'])
+ ver = detect_exe_version(out) if out else False
+ if ver:
+ regexs = [
+ r'(?:\d+:)?([0-9.]+)-[0-9]+ubuntu[0-9.]+$', # Ubuntu, see [1]
+ r'n([0-9.]+)$', # Arch Linux
+ # 1. http://www.ducea.com/2006/06/17/ubuntu-package-version-naming-explanation/
+ ]
+ for regex in regexs:
+ mobj = re.match(regex, ver)
+ if mobj:
+ ver = mobj.group(1)
+ self._version_cache[path] = ver
+ if prog != 'ffmpeg' or not out:
+ return ver, {}
+
+ mobj = re.search(r'(?m)^\s+libavformat\s+(?:[0-9. ]+)\s+/\s+(?P<runtime>[0-9. ]+)', out)
+ lavf_runtime_version = mobj.group('runtime').replace(' ', '') if mobj else None
+ self._features_cache[path] = features = {
+ 'fdk': '--enable-libfdk-aac' in out,
+ 'setts': 'setts' in out.splitlines(),
+ 'needs_adtstoasc': is_outdated_version(lavf_runtime_version, '57.56.100', False),
+ }
+ return ver, features
+
+ @property
+ def _versions(self):
+ return filter_dict({self.basename: self._version, self.probe_basename: self._probe_version})
+
+ @functools.cached_property
+ def basename(self):
+ self._version # run property
+ return self.basename
+
+ @functools.cached_property
+ def probe_basename(self):
+ self._probe_version # run property
+ return self.probe_basename
+
+ def _get_version(self, kind):
+ executables = (kind, )
+ if not self._prefer_ffmpeg:
+ executables = (kind, self._ffmpeg_to_avconv[kind])
+ basename, version, features = next(filter(
+ lambda x: x[1], ((p, *self._get_ffmpeg_version(p)) for p in executables)), (None, None, {}))
+ if kind == 'ffmpeg':
+ self.basename, self._features = basename, features
+ else:
+ self.probe_basename = basename
+ if basename == self._ffmpeg_to_avconv[kind]:
+ self.deprecated_feature(f'Support for {self._ffmpeg_to_avconv[kind]} is deprecated and '
+ f'may be removed in a future version. Use {kind} instead')
+ return version
+
+ @functools.cached_property
+ def _version(self):
+ return self._get_version('ffmpeg')
+
+ @functools.cached_property
+ def _probe_version(self):
+ return self._get_version('ffprobe')
+
+ @property
+ def available(self):
+ return self.basename is not None
+
+ @property
+ def executable(self):
+ return self._paths.get(self.basename)
+
+ @property
+ def probe_available(self):
+ return self.probe_basename is not None
+
+ @property
+ def probe_executable(self):
+ return self._paths.get(self.probe_basename)
+
+ @staticmethod
+ def stream_copy_opts(copy=True, *, ext=None):
+ yield from ('-map', '0')
+ # Don't copy Apple TV chapters track, bin_data
+ # See https://github.com/yt-dlp/yt-dlp/issues/2, #19042, #19024, https://trac.ffmpeg.org/ticket/6016
+ yield from ('-dn', '-ignore_unknown')
+ if copy:
+ yield from ('-c', 'copy')
+ if ext in ('mp4', 'mov', 'm4a'):
+ yield from ('-c:s', 'mov_text')
+
+ def check_version(self):
+ if not self.available:
+ raise FFmpegPostProcessorError('ffmpeg not found. Please install or provide the path using --ffmpeg-location')
+
+ required_version = '10-0' if self.basename == 'avconv' else '1.0'
+ if is_outdated_version(self._version, required_version):
+ self.report_warning(f'Your copy of {self.basename} is outdated, update {self.basename} '
+ f'to version {required_version} or newer if you encounter any errors')
+
+ def get_audio_codec(self, path):
+ if not self.probe_available and not self.available:
+ raise PostProcessingError('ffprobe and ffmpeg not found. Please install or provide the path using --ffmpeg-location')
+ try:
+ if self.probe_available:
+ cmd = [
+ encodeFilename(self.probe_executable, True),
+ encodeArgument('-show_streams')]
+ else:
+ cmd = [
+ encodeFilename(self.executable, True),
+ encodeArgument('-i')]
+ cmd.append(encodeFilename(self._ffmpeg_filename_argument(path), True))
+ self.write_debug(f'{self.basename} command line: {shell_quote(cmd)}')
+ stdout, stderr, returncode = Popen.run(
+ cmd, text=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ if returncode != (0 if self.probe_available else 1):
+ return None
+ except OSError:
+ return None
+ output = stdout if self.probe_available else stderr
+ if self.probe_available:
+ audio_codec = None
+ for line in output.split('\n'):
+ if line.startswith('codec_name='):
+ audio_codec = line.split('=')[1].strip()
+ elif line.strip() == 'codec_type=audio' and audio_codec is not None:
+ return audio_codec
+ else:
+ # Stream #FILE_INDEX:STREAM_INDEX[STREAM_ID](LANGUAGE): CODEC_TYPE: CODEC_NAME
+ mobj = re.search(
+ r'Stream\s*#\d+:\d+(?:\[0x[0-9a-f]+\])?(?:\([a-z]{3}\))?:\s*Audio:\s*([0-9a-z]+)',
+ output)
+ if mobj:
+ return mobj.group(1)
+ return None
+
+ def get_metadata_object(self, path, opts=[]):
+ if self.probe_basename != 'ffprobe':
+ if self.probe_available:
+ self.report_warning('Only ffprobe is supported for metadata extraction')
+ raise PostProcessingError('ffprobe not found. Please install or provide the path using --ffmpeg-location')
+ self.check_version()
+
+ cmd = [
+ encodeFilename(self.probe_executable, True),
+ encodeArgument('-hide_banner'),
+ encodeArgument('-show_format'),
+ encodeArgument('-show_streams'),
+ encodeArgument('-print_format'),
+ encodeArgument('json'),
+ ]
+
+ cmd += opts
+ cmd.append(self._ffmpeg_filename_argument(path))
+ self.write_debug(f'ffprobe command line: {shell_quote(cmd)}')
+ stdout, _, _ = Popen.run(cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
+ return json.loads(stdout)
+
+ def get_stream_number(self, path, keys, value):
+ streams = self.get_metadata_object(path)['streams']
+ num = next(
+ (i for i, stream in enumerate(streams) if traverse_obj(stream, keys, casesense=False) == value),
+ None)
+ return num, len(streams)
+
+ def _fixup_chapters(self, info):
+ last_chapter = traverse_obj(info, ('chapters', -1))
+ if last_chapter and not last_chapter.get('end_time'):
+ last_chapter['end_time'] = self._get_real_video_duration(info['filepath'])
+
+ def _get_real_video_duration(self, filepath, fatal=True):
+ try:
+ duration = float_or_none(
+ traverse_obj(self.get_metadata_object(filepath), ('format', 'duration')))
+ if not duration:
+ raise PostProcessingError('ffprobe returned empty duration')
+ return duration
+ except PostProcessingError as e:
+ if fatal:
+ raise PostProcessingError(f'Unable to determine video duration: {e.msg}')
+
+ def _duration_mismatch(self, d1, d2, tolerance=2):
+ if not d1 or not d2:
+ return None
+ # The duration is often only known to nearest second. So there can be <1sec disparity natually.
+ # Further excuse an additional <1sec difference.
+ return abs(d1 - d2) > tolerance
+
+ def run_ffmpeg_multiple_files(self, input_paths, out_path, opts, **kwargs):
+ return self.real_run_ffmpeg(
+ [(path, []) for path in input_paths],
+ [(out_path, opts)], **kwargs)
+
+ def real_run_ffmpeg(self, input_path_opts, output_path_opts, *, expected_retcodes=(0,)):
+ self.check_version()
+
+ oldest_mtime = min(
+ os.stat(encodeFilename(path)).st_mtime for path, _ in input_path_opts if path)
+
+ cmd = [encodeFilename(self.executable, True), encodeArgument('-y')]
+ # avconv does not have repeat option
+ if self.basename == 'ffmpeg':
+ cmd += [encodeArgument('-loglevel'), encodeArgument('repeat+info')]
+
+ def make_args(file, args, name, number):
+ keys = ['_%s%d' % (name, number), '_%s' % name]
+ if name == 'o':
+ args += ['-movflags', '+faststart']
+ if number == 1:
+ keys.append('')
+ args += self._configuration_args(self.basename, keys)
+ if name == 'i':
+ args.append('-i')
+ return (
+ [encodeArgument(arg) for arg in args]
+ + [encodeFilename(self._ffmpeg_filename_argument(file), True)])
+
+ for arg_type, path_opts in (('i', input_path_opts), ('o', output_path_opts)):
+ cmd += itertools.chain.from_iterable(
+ make_args(path, list(opts), arg_type, i + 1)
+ for i, (path, opts) in enumerate(path_opts) if path)
+
+ self.write_debug('ffmpeg command line: %s' % shell_quote(cmd))
+ _, stderr, returncode = Popen.run(
+ cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
+ if returncode not in variadic(expected_retcodes):
+ self.write_debug(stderr)
+ raise FFmpegPostProcessorError(stderr.strip().splitlines()[-1])
+ for out_path, _ in output_path_opts:
+ if out_path:
+ self.try_utime(out_path, oldest_mtime, oldest_mtime)
+ return stderr
+
+ def run_ffmpeg(self, path, out_path, opts, **kwargs):
+ return self.run_ffmpeg_multiple_files([path], out_path, opts, **kwargs)
+
+ @staticmethod
+ def _ffmpeg_filename_argument(fn):
+ # Always use 'file:' because the filename may contain ':' (ffmpeg
+ # interprets that as a protocol) or can start with '-' (-- is broken in
+ # ffmpeg, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details)
+ # Also leave '-' intact in order not to break streaming to stdout.
+ if fn.startswith(('http://', 'https://')):
+ return fn
+ return 'file:' + fn if fn != '-' else fn
+
+ @staticmethod
+ def _quote_for_ffmpeg(string):
+ # See https://ffmpeg.org/ffmpeg-utils.html#toc-Quoting-and-escaping
+ # A sequence of '' produces '\'''\'';
+ # final replace removes the empty '' between \' \'.
+ string = string.replace("'", r"'\''").replace("'''", "'")
+ # Handle potential ' at string boundaries.
+ string = string[1:] if string[0] == "'" else "'" + string
+ return string[:-1] if string[-1] == "'" else string + "'"
+
+ def force_keyframes(self, filename, timestamps):
+ timestamps = orderedSet(timestamps)
+ if timestamps[0] == 0:
+ timestamps = timestamps[1:]
+ keyframe_file = prepend_extension(filename, 'keyframes.temp')
+ self.to_screen(f'Re-encoding "{filename}" with appropriate keyframes')
+ self.run_ffmpeg(filename, keyframe_file, [
+ *self.stream_copy_opts(False, ext=determine_ext(filename)),
+ '-force_key_frames', ','.join(f'{t:.6f}' for t in timestamps)])
+ return keyframe_file
+
+ def concat_files(self, in_files, out_file, concat_opts=None):
+ """
+ Use concat demuxer to concatenate multiple files having identical streams.
+
+ Only inpoint, outpoint, and duration concat options are supported.
+ See https://ffmpeg.org/ffmpeg-formats.html#concat-1 for details
+ """
+ concat_file = f'{out_file}.concat'
+ self.write_debug(f'Writing concat spec to {concat_file}')
+ with open(concat_file, 'w', encoding='utf-8') as f:
+ f.writelines(self._concat_spec(in_files, concat_opts))
+
+ out_flags = list(self.stream_copy_opts(ext=determine_ext(out_file)))
+
+ self.real_run_ffmpeg(
+ [(concat_file, ['-hide_banner', '-nostdin', '-f', 'concat', '-safe', '0'])],
+ [(out_file, out_flags)])
+ self._delete_downloaded_files(concat_file)
+
+ @classmethod
+ def _concat_spec(cls, in_files, concat_opts=None):
+ if concat_opts is None:
+ concat_opts = [{}] * len(in_files)
+ yield 'ffconcat version 1.0\n'
+ for file, opts in zip(in_files, concat_opts):
+ yield f'file {cls._quote_for_ffmpeg(cls._ffmpeg_filename_argument(file))}\n'
+ # Iterate explicitly to yield the following directives in order, ignoring the rest.
+ for directive in 'inpoint', 'outpoint', 'duration':
+ if directive in opts:
+ yield f'{directive} {opts[directive]}\n'
+
+
+class FFmpegExtractAudioPP(FFmpegPostProcessor):
+ COMMON_AUDIO_EXTS = MEDIA_EXTENSIONS.common_audio + ('wma', )
+ SUPPORTED_EXTS = tuple(ACODECS.keys())
+ FORMAT_RE = create_mapping_re(('best', *SUPPORTED_EXTS))
+
+ def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False):
+ FFmpegPostProcessor.__init__(self, downloader)
+ self.mapping = preferredcodec or 'best'
+ self._preferredquality = float_or_none(preferredquality)
+ self._nopostoverwrites = nopostoverwrites
+
+ def _quality_args(self, codec):
+ if self._preferredquality is None:
+ return []
+ elif self._preferredquality > 10:
+ return ['-b:a', f'{self._preferredquality}k']
+
+ limits = {
+ 'libmp3lame': (10, 0),
+ 'libvorbis': (0, 10),
+ # FFmpeg's AAC encoder does not have an upper limit for the value of -q:a.
+ # Experimentally, with values over 4, bitrate changes were minimal or non-existent
+ 'aac': (0.1, 4),
+ 'libfdk_aac': (1, 5),
+ }.get(codec)
+ if not limits:
+ return []
+
+ q = limits[1] + (limits[0] - limits[1]) * (self._preferredquality / 10)
+ if codec == 'libfdk_aac':
+ return ['-vbr', f'{int(q)}']
+ return ['-q:a', f'{q}']
+
+ def run_ffmpeg(self, path, out_path, codec, more_opts):
+ if codec is None:
+ acodec_opts = []
+ else:
+ acodec_opts = ['-acodec', codec]
+ opts = ['-vn'] + acodec_opts + more_opts
+ try:
+ FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts)
+ except FFmpegPostProcessorError as err:
+ raise PostProcessingError(f'audio conversion failed: {err.msg}')
+
+ @PostProcessor._restrict_to(images=False)
+ def run(self, information):
+ orig_path = path = information['filepath']
+ target_format, _skip_msg = resolve_mapping(information['ext'], self.mapping)
+ if target_format == 'best' and information['ext'] in self.COMMON_AUDIO_EXTS:
+ target_format, _skip_msg = None, 'the file is already in a common audio format'
+ if not target_format:
+ self.to_screen(f'Not converting audio {orig_path}; {_skip_msg}')
+ return [], information
+
+ filecodec = self.get_audio_codec(path)
+ if filecodec is None:
+ raise PostProcessingError('WARNING: unable to obtain file audio codec with ffprobe')
+
+ if filecodec == 'aac' and target_format in ('m4a', 'best'):
+ # Lossless, but in another container
+ extension, _, more_opts, acodec = *ACODECS['m4a'], 'copy'
+ elif target_format == 'best' or target_format == filecodec:
+ # Lossless if possible
+ try:
+ extension, _, more_opts, acodec = *ACODECS[filecodec], 'copy'
+ except KeyError:
+ extension, acodec, more_opts = ACODECS['mp3']
+ else:
+ # We convert the audio (lossy if codec is lossy)
+ extension, acodec, more_opts = ACODECS[target_format]
+ if acodec == 'aac' and self._features.get('fdk'):
+ acodec, more_opts = 'libfdk_aac', []
+
+ more_opts = list(more_opts)
+ if acodec != 'copy':
+ more_opts = self._quality_args(acodec)
+
+ temp_path = new_path = replace_extension(path, extension, information['ext'])
+
+ if new_path == path:
+ if acodec == 'copy':
+ self.to_screen(f'Not converting audio {orig_path}; file is already in target format {target_format}')
+ return [], information
+ orig_path = prepend_extension(path, 'orig')
+ temp_path = prepend_extension(path, 'temp')
+ if (self._nopostoverwrites and os.path.exists(encodeFilename(new_path))
+ and os.path.exists(encodeFilename(orig_path))):
+ self.to_screen('Post-process file %s exists, skipping' % new_path)
+ return [], information
+
+ self.to_screen(f'Destination: {new_path}')
+ self.run_ffmpeg(path, temp_path, acodec, more_opts)
+
+ os.replace(path, orig_path)
+ os.replace(temp_path, new_path)
+ information['filepath'] = new_path
+ information['ext'] = extension
+
+ # Try to update the date time for extracted audio file.
+ if information.get('filetime') is not None:
+ self.try_utime(
+ new_path, time.time(), information['filetime'], errnote='Cannot update utime of audio file')
+
+ return [orig_path], information
+
+
+class FFmpegVideoConvertorPP(FFmpegPostProcessor):
+ SUPPORTED_EXTS = (
+ *sorted((*MEDIA_EXTENSIONS.common_video, 'gif')),
+ *sorted((*MEDIA_EXTENSIONS.common_audio, 'aac', 'vorbis')),
+ )
+ FORMAT_RE = create_mapping_re(SUPPORTED_EXTS)
+ _ACTION = 'converting'
+
+ def __init__(self, downloader=None, preferedformat=None):
+ super().__init__(downloader)
+ self.mapping = preferedformat
+
+ @staticmethod
+ def _options(target_ext):
+ yield from FFmpegPostProcessor.stream_copy_opts(False)
+ if target_ext == 'avi':
+ yield from ('-c:v', 'libxvid', '-vtag', 'XVID')
+
+ @PostProcessor._restrict_to(images=False)
+ def run(self, info):
+ filename, source_ext = info['filepath'], info['ext'].lower()
+ target_ext, _skip_msg = resolve_mapping(source_ext, self.mapping)
+ if _skip_msg:
+ self.to_screen(f'Not {self._ACTION} media file "{filename}"; {_skip_msg}')
+ return [], info
+
+ outpath = replace_extension(filename, target_ext, source_ext)
+ self.to_screen(f'{self._ACTION.title()} video from {source_ext} to {target_ext}; Destination: {outpath}')
+ self.run_ffmpeg(filename, outpath, self._options(target_ext))
+
+ info['filepath'] = outpath
+ info['format'] = info['ext'] = target_ext
+ return [filename], info
+
+
+class FFmpegVideoRemuxerPP(FFmpegVideoConvertorPP):
+ _ACTION = 'remuxing'
+
+ @staticmethod
+ def _options(target_ext):
+ return FFmpegPostProcessor.stream_copy_opts()
+
+
+class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
+ SUPPORTED_EXTS = ('mp4', 'mov', 'm4a', 'webm', 'mkv', 'mka')
+
+ def __init__(self, downloader=None, already_have_subtitle=False):
+ super().__init__(downloader)
+ self._already_have_subtitle = already_have_subtitle
+
+ @PostProcessor._restrict_to(images=False)
+ def run(self, info):
+ if info['ext'] not in self.SUPPORTED_EXTS:
+ self.to_screen(f'Subtitles can only be embedded in {", ".join(self.SUPPORTED_EXTS)} files')
+ return [], info
+ subtitles = info.get('requested_subtitles')
+ if not subtitles:
+ self.to_screen('There aren\'t any subtitles to embed')
+ return [], info
+
+ filename = info['filepath']
+
+ # Disabled temporarily. There needs to be a way to override this
+ # in case of duration actually mismatching in extractor
+ # See: https://github.com/yt-dlp/yt-dlp/issues/1870, https://github.com/yt-dlp/yt-dlp/issues/1385
+ '''
+ if info.get('duration') and not info.get('__real_download') and self._duration_mismatch(
+ self._get_real_video_duration(filename, False), info['duration']):
+ self.to_screen(f'Skipping {self.pp_key()} since the real and expected durations mismatch')
+ return [], info
+ '''
+
+ ext = info['ext']
+ sub_langs, sub_names, sub_filenames = [], [], []
+ webm_vtt_warn = False
+ mp4_ass_warn = False
+
+ for lang, sub_info in subtitles.items():
+ if not os.path.exists(sub_info.get('filepath', '')):
+ self.report_warning(f'Skipping embedding {lang} subtitle because the file is missing')
+ continue
+ sub_ext = sub_info['ext']
+ if sub_ext == 'json':
+ self.report_warning('JSON subtitles cannot be embedded')
+ elif ext != 'webm' or ext == 'webm' and sub_ext == 'vtt':
+ sub_langs.append(lang)
+ sub_names.append(sub_info.get('name'))
+ sub_filenames.append(sub_info['filepath'])
+ else:
+ if not webm_vtt_warn and ext == 'webm' and sub_ext != 'vtt':
+ webm_vtt_warn = True
+ self.report_warning('Only WebVTT subtitles can be embedded in webm files')
+ if not mp4_ass_warn and ext == 'mp4' and sub_ext == 'ass':
+ mp4_ass_warn = True
+ self.report_warning('ASS subtitles cannot be properly embedded in mp4 files; expect issues')
+
+ if not sub_langs:
+ return [], info
+
+ input_files = [filename] + sub_filenames
+
+ opts = [
+ *self.stream_copy_opts(ext=info['ext']),
+ # Don't copy the existing subtitles, we may be running the
+ # postprocessor a second time
+ '-map', '-0:s',
+ ]
+ for i, (lang, name) in enumerate(zip(sub_langs, sub_names)):
+ opts.extend(['-map', '%d:0' % (i + 1)])
+ lang_code = ISO639Utils.short2long(lang) or lang
+ opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
+ if name:
+ opts.extend(['-metadata:s:s:%d' % i, 'handler_name=%s' % name,
+ '-metadata:s:s:%d' % i, 'title=%s' % name])
+
+ temp_filename = prepend_extension(filename, 'temp')
+ self.to_screen('Embedding subtitles in "%s"' % filename)
+ self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
+ os.replace(temp_filename, filename)
+
+ files_to_delete = [] if self._already_have_subtitle else sub_filenames
+ return files_to_delete, info
+
+
+class FFmpegMetadataPP(FFmpegPostProcessor):
+
+ def __init__(self, downloader, add_metadata=True, add_chapters=True, add_infojson='if_exists'):
+ FFmpegPostProcessor.__init__(self, downloader)
+ self._add_metadata = add_metadata
+ self._add_chapters = add_chapters
+ self._add_infojson = add_infojson
+
+ @staticmethod
+ def _options(target_ext):
+ audio_only = target_ext == 'm4a'
+ yield from FFmpegPostProcessor.stream_copy_opts(not audio_only)
+ if audio_only:
+ yield from ('-vn', '-acodec', 'copy')
+
+ @PostProcessor._restrict_to(images=False)
+ def run(self, info):
+ self._fixup_chapters(info)
+ filename, metadata_filename = info['filepath'], None
+ files_to_delete, options = [], []
+ if self._add_chapters and info.get('chapters'):
+ metadata_filename = replace_extension(filename, 'meta')
+ options.extend(self._get_chapter_opts(info['chapters'], metadata_filename))
+ files_to_delete.append(metadata_filename)
+ if self._add_metadata:
+ options.extend(self._get_metadata_opts(info))
+
+ if self._add_infojson:
+ if info['ext'] in ('mkv', 'mka'):
+ infojson_filename = info.get('infojson_filename')
+ options.extend(self._get_infojson_opts(info, infojson_filename))
+ if not infojson_filename:
+ files_to_delete.append(info.get('infojson_filename'))
+ elif self._add_infojson is True:
+ self.to_screen('The info-json can only be attached to mkv/mka files')
+
+ if not options:
+ self.to_screen('There isn\'t any metadata to add')
+ return [], info
+
+ temp_filename = prepend_extension(filename, 'temp')
+ self.to_screen('Adding metadata to "%s"' % filename)
+ self.run_ffmpeg_multiple_files(
+ (filename, metadata_filename), temp_filename,
+ itertools.chain(self._options(info['ext']), *options))
+ self._delete_downloaded_files(*files_to_delete)
+ os.replace(temp_filename, filename)
+ return [], info
+
+ @staticmethod
+ def _get_chapter_opts(chapters, metadata_filename):
+ with open(metadata_filename, 'w', encoding='utf-8') as f:
+ def ffmpeg_escape(text):
+ return re.sub(r'([\\=;#\n])', r'\\\1', text)
+
+ metadata_file_content = ';FFMETADATA1\n'
+ for chapter in chapters:
+ metadata_file_content += '[CHAPTER]\nTIMEBASE=1/1000\n'
+ metadata_file_content += 'START=%d\n' % (chapter['start_time'] * 1000)
+ metadata_file_content += 'END=%d\n' % (chapter['end_time'] * 1000)
+ chapter_title = chapter.get('title')
+ if chapter_title:
+ metadata_file_content += 'title=%s\n' % ffmpeg_escape(chapter_title)
+ f.write(metadata_file_content)
+ yield ('-map_metadata', '1')
+
+ def _get_metadata_opts(self, info):
+ meta_prefix = 'meta'
+ metadata = collections.defaultdict(dict)
+
+ def add(meta_list, info_list=None):
+ value = next((
+ info[key] for key in [f'{meta_prefix}_'] + list(variadic(info_list or meta_list))
+ if info.get(key) is not None), None)
+ if value not in ('', None):
+ value = ', '.join(map(str, variadic(value)))
+ value = value.replace('\0', '') # nul character cannot be passed in command line
+ metadata['common'].update({meta_f: value for meta_f in variadic(meta_list)})
+
+ # Info on media metadata/metadata supported by ffmpeg:
+ # https://wiki.multimedia.cx/index.php/FFmpeg_Metadata
+ # https://kdenlive.org/en/project/adding-meta-data-to-mp4-video/
+ # https://kodi.wiki/view/Video_file_tagging
+
+ add('title', ('track', 'title'))
+ add('date', 'upload_date')
+ add(('description', 'synopsis'), 'description')
+ add(('purl', 'comment'), 'webpage_url')
+ add('track', 'track_number')
+ add('artist', ('artist', 'artists', 'creator', 'creators', 'uploader', 'uploader_id'))
+ add('composer', ('composer', 'composers'))
+ add('genre', ('genre', 'genres'))
+ add('album')
+ add('album_artist', ('album_artist', 'album_artists'))
+ add('disc', 'disc_number')
+ add('show', 'series')
+ add('season_number')
+ add('episode_id', ('episode', 'episode_id'))
+ add('episode_sort', 'episode_number')
+ if 'embed-metadata' in self.get_param('compat_opts', []):
+ add('comment', 'description')
+ metadata['common'].pop('synopsis', None)
+
+ meta_regex = rf'{re.escape(meta_prefix)}(?P<i>\d+)?_(?P<key>.+)'
+ for key, value in info.items():
+ mobj = re.fullmatch(meta_regex, key)
+ if value is not None and mobj:
+ metadata[mobj.group('i') or 'common'][mobj.group('key')] = value.replace('\0', '')
+
+ # Write id3v1 metadata also since Windows Explorer can't handle id3v2 tags
+ yield ('-write_id3v1', '1')
+
+ for name, value in metadata['common'].items():
+ yield ('-metadata', f'{name}={value}')
+
+ stream_idx = 0
+ for fmt in info.get('requested_formats') or [info]:
+ stream_count = 2 if 'none' not in (fmt.get('vcodec'), fmt.get('acodec')) else 1
+ lang = ISO639Utils.short2long(fmt.get('language') or '') or fmt.get('language')
+ for i in range(stream_idx, stream_idx + stream_count):
+ if lang:
+ metadata[str(i)].setdefault('language', lang)
+ for name, value in metadata[str(i)].items():
+ yield (f'-metadata:s:{i}', f'{name}={value}')
+ stream_idx += stream_count
+
+ def _get_infojson_opts(self, info, infofn):
+ if not infofn or not os.path.exists(infofn):
+ if self._add_infojson is not True:
+ return
+ infofn = infofn or '%s.temp' % (
+ self._downloader.prepare_filename(info, 'infojson')
+ or replace_extension(self._downloader.prepare_filename(info), 'info.json', info['ext']))
+ if not self._downloader._ensure_dir_exists(infofn):
+ return
+ self.write_debug(f'Writing info-json to: {infofn}')
+ write_json_file(self._downloader.sanitize_info(info, self.get_param('clean_infojson', True)), infofn)
+ info['infojson_filename'] = infofn
+
+ old_stream, new_stream = self.get_stream_number(info['filepath'], ('tags', 'mimetype'), 'application/json')
+ if old_stream is not None:
+ yield ('-map', '-0:%d' % old_stream)
+ new_stream -= 1
+
+ yield (
+ '-attach', self._ffmpeg_filename_argument(infofn),
+ f'-metadata:s:{new_stream}', 'mimetype=application/json',
+ f'-metadata:s:{new_stream}', 'filename=info.json',
+ )
+
+
+class FFmpegMergerPP(FFmpegPostProcessor):
+ SUPPORTED_EXTS = MEDIA_EXTENSIONS.common_video
+
+ @PostProcessor._restrict_to(images=False)
+ def run(self, info):
+ filename = info['filepath']
+ temp_filename = prepend_extension(filename, 'temp')
+ args = ['-c', 'copy']
+ audio_streams = 0
+ for (i, fmt) in enumerate(info['requested_formats']):
+ if fmt.get('acodec') != 'none':
+ args.extend(['-map', f'{i}:a:0'])
+ aac_fixup = fmt['protocol'].startswith('m3u8') and self.get_audio_codec(fmt['filepath']) == 'aac'
+ if aac_fixup:
+ args.extend([f'-bsf:a:{audio_streams}', 'aac_adtstoasc'])
+ audio_streams += 1
+ if fmt.get('vcodec') != 'none':
+ args.extend(['-map', '%u:v:0' % (i)])
+ self.to_screen('Merging formats into "%s"' % filename)
+ self.run_ffmpeg_multiple_files(info['__files_to_merge'], temp_filename, args)
+ os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+ return info['__files_to_merge'], info
+
+ def can_merge(self):
+ # TODO: figure out merge-capable ffmpeg version
+ if self.basename != 'avconv':
+ return True
+
+ required_version = '10-0'
+ if is_outdated_version(
+ self._versions[self.basename], required_version):
+ warning = ('Your copy of %s is outdated and unable to properly mux separate video and audio files, '
+ 'yt-dlp will download single file media. '
+ 'Update %s to version %s or newer to fix this.') % (
+ self.basename, self.basename, required_version)
+ self.report_warning(warning)
+ return False
+ return True
+
+
+class FFmpegFixupPostProcessor(FFmpegPostProcessor):
+ def _fixup(self, msg, filename, options):
+ temp_filename = prepend_extension(filename, 'temp')
+
+ self.to_screen(f'{msg} of "{filename}"')
+ self.run_ffmpeg(filename, temp_filename, options)
+
+ os.replace(temp_filename, filename)
+
+
+class FFmpegFixupStretchedPP(FFmpegFixupPostProcessor):
+ @PostProcessor._restrict_to(images=False, audio=False)
+ def run(self, info):
+ stretched_ratio = info.get('stretched_ratio')
+ if stretched_ratio not in (None, 1):
+ self._fixup('Fixing aspect ratio', info['filepath'], [
+ *self.stream_copy_opts(), '-aspect', '%f' % stretched_ratio])
+ return [], info
+
+
+class FFmpegFixupM4aPP(FFmpegFixupPostProcessor):
+ @PostProcessor._restrict_to(images=False, video=False)
+ def run(self, info):
+ if info.get('container') == 'm4a_dash':
+ self._fixup('Correcting container', info['filepath'], [*self.stream_copy_opts(), '-f', 'mp4'])
+ return [], info
+
+
+class FFmpegFixupM3u8PP(FFmpegFixupPostProcessor):
+ def _needs_fixup(self, info):
+ yield info['ext'] in ('mp4', 'm4a')
+ yield info['protocol'].startswith('m3u8')
+ try:
+ metadata = self.get_metadata_object(info['filepath'])
+ except PostProcessingError as e:
+ self.report_warning(f'Unable to extract metadata: {e.msg}')
+ yield True
+ else:
+ yield traverse_obj(metadata, ('format', 'format_name'), casesense=False) == 'mpegts'
+
+ @PostProcessor._restrict_to(images=False)
+ def run(self, info):
+ if all(self._needs_fixup(info)):
+ args = ['-f', 'mp4']
+ if self.get_audio_codec(info['filepath']) == 'aac':
+ args.extend(['-bsf:a', 'aac_adtstoasc'])
+ self._fixup('Fixing MPEG-TS in MP4 container', info['filepath'], [
+ *self.stream_copy_opts(), *args])
+ return [], info
+
+
+class FFmpegFixupTimestampPP(FFmpegFixupPostProcessor):
+
+ def __init__(self, downloader=None, trim=0.001):
+ # "trim" should be used when the video contains unintended packets
+ super().__init__(downloader)
+ assert isinstance(trim, (int, float))
+ self.trim = str(trim)
+
+ @PostProcessor._restrict_to(images=False)
+ def run(self, info):
+ if not self._features.get('setts'):
+ self.report_warning(
+ 'A re-encode is needed to fix timestamps in older versions of ffmpeg. '
+ 'Please install ffmpeg 4.4 or later to fixup without re-encoding')
+ opts = ['-vf', 'setpts=PTS-STARTPTS']
+ else:
+ opts = ['-c', 'copy', '-bsf', 'setts=ts=TS-STARTPTS']
+ self._fixup('Fixing frame timestamp', info['filepath'], opts + [*self.stream_copy_opts(False), '-ss', self.trim])
+ return [], info
+
+
+class FFmpegCopyStreamPP(FFmpegFixupPostProcessor):
+ MESSAGE = 'Copying stream'
+
+ @PostProcessor._restrict_to(images=False)
+ def run(self, info):
+ self._fixup(self.MESSAGE, info['filepath'], self.stream_copy_opts())
+ return [], info
+
+
+class FFmpegFixupDurationPP(FFmpegCopyStreamPP):
+ MESSAGE = 'Fixing video duration'
+
+
+class FFmpegFixupDuplicateMoovPP(FFmpegCopyStreamPP):
+ MESSAGE = 'Fixing duplicate MOOV atoms'
+
+
+class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
+ SUPPORTED_EXTS = MEDIA_EXTENSIONS.subtitles
+
+ def __init__(self, downloader=None, format=None):
+ super().__init__(downloader)
+ self.format = format
+
+ def run(self, info):
+ subs = info.get('requested_subtitles')
+ new_ext = self.format
+ new_format = new_ext
+ if new_format == 'vtt':
+ new_format = 'webvtt'
+ if subs is None:
+ self.to_screen('There aren\'t any subtitles to convert')
+ return [], info
+ self.to_screen('Converting subtitles')
+ sub_filenames = []
+ for lang, sub in subs.items():
+ if not os.path.exists(sub.get('filepath', '')):
+ self.report_warning(f'Skipping embedding {lang} subtitle because the file is missing')
+ continue
+ ext = sub['ext']
+ if ext == new_ext:
+ self.to_screen('Subtitle file for %s is already in the requested format' % new_ext)
+ continue
+ elif ext == 'json':
+ self.to_screen(
+ 'You have requested to convert json subtitles into another format, '
+ 'which is currently not possible')
+ continue
+ old_file = sub['filepath']
+ sub_filenames.append(old_file)
+ new_file = replace_extension(old_file, new_ext)
+
+ if ext in ('dfxp', 'ttml', 'tt'):
+ self.report_warning(
+ 'You have requested to convert dfxp (TTML) subtitles into another format, '
+ 'which results in style information loss')
+
+ dfxp_file = old_file
+ srt_file = replace_extension(old_file, 'srt')
+
+ with open(dfxp_file, 'rb') as f:
+ srt_data = dfxp2srt(f.read())
+
+ with open(srt_file, 'w', encoding='utf-8') as f:
+ f.write(srt_data)
+ old_file = srt_file
+
+ subs[lang] = {
+ 'ext': 'srt',
+ 'data': srt_data,
+ 'filepath': srt_file,
+ }
+
+ if new_ext == 'srt':
+ continue
+ else:
+ sub_filenames.append(srt_file)
+
+ self.run_ffmpeg(old_file, new_file, ['-f', new_format])
+
+ with open(new_file, encoding='utf-8') as f:
+ subs[lang] = {
+ 'ext': new_ext,
+ 'data': f.read(),
+ 'filepath': new_file,
+ }
+
+ info['__files_to_move'][new_file] = replace_extension(
+ info['__files_to_move'][sub['filepath']], new_ext)
+
+ return sub_filenames, info
+
+
+class FFmpegSplitChaptersPP(FFmpegPostProcessor):
+ def __init__(self, downloader, force_keyframes=False):
+ FFmpegPostProcessor.__init__(self, downloader)
+ self._force_keyframes = force_keyframes
+
+ def _prepare_filename(self, number, chapter, info):
+ info = info.copy()
+ info.update({
+ 'section_number': number,
+ 'section_title': chapter.get('title'),
+ 'section_start': chapter.get('start_time'),
+ 'section_end': chapter.get('end_time'),
+ })
+ return self._downloader.prepare_filename(info, 'chapter')
+
+ def _ffmpeg_args_for_chapter(self, number, chapter, info):
+ destination = self._prepare_filename(number, chapter, info)
+ if not self._downloader._ensure_dir_exists(encodeFilename(destination)):
+ return
+
+ chapter['filepath'] = destination
+ self.to_screen('Chapter %03d; Destination: %s' % (number, destination))
+ return (
+ destination,
+ ['-ss', str(chapter['start_time']),
+ '-t', str(chapter['end_time'] - chapter['start_time'])])
+
+ @PostProcessor._restrict_to(images=False)
+ def run(self, info):
+ self._fixup_chapters(info)
+ chapters = info.get('chapters') or []
+ if not chapters:
+ self.to_screen('Chapter information is unavailable')
+ return [], info
+
+ in_file = info['filepath']
+ if self._force_keyframes and len(chapters) > 1:
+ in_file = self.force_keyframes(in_file, (c['start_time'] for c in chapters))
+ self.to_screen('Splitting video by chapters; %d chapters found' % len(chapters))
+ for idx, chapter in enumerate(chapters):
+ destination, opts = self._ffmpeg_args_for_chapter(idx + 1, chapter, info)
+ self.real_run_ffmpeg([(in_file, opts)], [(destination, self.stream_copy_opts())])
+ if in_file != info['filepath']:
+ self._delete_downloaded_files(in_file, msg=None)
+ return [], info
+
+
+class FFmpegThumbnailsConvertorPP(FFmpegPostProcessor):
+ SUPPORTED_EXTS = MEDIA_EXTENSIONS.thumbnails
+ FORMAT_RE = create_mapping_re(SUPPORTED_EXTS)
+
+ def __init__(self, downloader=None, format=None):
+ super().__init__(downloader)
+ self.mapping = format
+
+ @classmethod
+ def is_webp(cls, path):
+ deprecation_warning(f'{cls.__module__}.{cls.__name__}.is_webp is deprecated')
+ return imghdr.what(path) == 'webp'
+
+ def fixup_webp(self, info, idx=-1):
+ thumbnail_filename = info['thumbnails'][idx]['filepath']
+ _, thumbnail_ext = os.path.splitext(thumbnail_filename)
+ if thumbnail_ext:
+ if thumbnail_ext.lower() != '.webp' and imghdr.what(thumbnail_filename) == 'webp':
+ self.to_screen('Correcting thumbnail "%s" extension to webp' % thumbnail_filename)
+ webp_filename = replace_extension(thumbnail_filename, 'webp')
+ os.replace(thumbnail_filename, webp_filename)
+ info['thumbnails'][idx]['filepath'] = webp_filename
+ info['__files_to_move'][webp_filename] = replace_extension(
+ info['__files_to_move'].pop(thumbnail_filename), 'webp')
+
+ @staticmethod
+ def _options(target_ext):
+ yield from ('-update', '1')
+ if target_ext == 'jpg':
+ yield from ('-bsf:v', 'mjpeg2jpeg')
+
+ def convert_thumbnail(self, thumbnail_filename, target_ext):
+ thumbnail_conv_filename = replace_extension(thumbnail_filename, target_ext)
+
+ self.to_screen(f'Converting thumbnail "{thumbnail_filename}" to {target_ext}')
+ _, source_ext = os.path.splitext(thumbnail_filename)
+ self.real_run_ffmpeg(
+ [(thumbnail_filename, [] if source_ext == '.gif' else ['-f', 'image2', '-pattern_type', 'none'])],
+ [(thumbnail_conv_filename, self._options(target_ext))])
+ return thumbnail_conv_filename
+
+ def run(self, info):
+ files_to_delete = []
+ has_thumbnail = False
+
+ for idx, thumbnail_dict in enumerate(info.get('thumbnails') or []):
+ original_thumbnail = thumbnail_dict.get('filepath')
+ if not original_thumbnail:
+ continue
+ has_thumbnail = True
+ self.fixup_webp(info, idx)
+ original_thumbnail = thumbnail_dict['filepath'] # Path can change during fixup
+ thumbnail_ext = os.path.splitext(original_thumbnail)[1][1:].lower()
+ if thumbnail_ext == 'jpeg':
+ thumbnail_ext = 'jpg'
+ target_ext, _skip_msg = resolve_mapping(thumbnail_ext, self.mapping)
+ if _skip_msg:
+ self.to_screen(f'Not converting thumbnail "{original_thumbnail}"; {_skip_msg}')
+ continue
+ thumbnail_dict['filepath'] = self.convert_thumbnail(original_thumbnail, target_ext)
+ files_to_delete.append(original_thumbnail)
+ info['__files_to_move'][thumbnail_dict['filepath']] = replace_extension(
+ info['__files_to_move'][original_thumbnail], target_ext)
+
+ if not has_thumbnail:
+ self.to_screen('There aren\'t any thumbnails to convert')
+ return files_to_delete, info
+
+
+class FFmpegConcatPP(FFmpegPostProcessor):
+ def __init__(self, downloader, only_multi_video=False):
+ self._only_multi_video = only_multi_video
+ super().__init__(downloader)
+
+ def _get_codecs(self, file):
+ codecs = traverse_obj(self.get_metadata_object(file), ('streams', ..., 'codec_name'))
+ self.write_debug(f'Codecs = {", ".join(codecs)}')
+ return tuple(codecs)
+
+ def concat_files(self, in_files, out_file):
+ if not self._downloader._ensure_dir_exists(out_file):
+ return
+ if len(in_files) == 1:
+ if os.path.realpath(in_files[0]) != os.path.realpath(out_file):
+ self.to_screen(f'Moving "{in_files[0]}" to "{out_file}"')
+ os.replace(in_files[0], out_file)
+ return []
+
+ if len(set(map(self._get_codecs, in_files))) > 1:
+ raise PostProcessingError(
+ 'The files have different streams/codecs and cannot be concatenated. '
+ 'Either select different formats or --recode-video them to a common format')
+
+ self.to_screen(f'Concatenating {len(in_files)} files; Destination: {out_file}')
+ super().concat_files(in_files, out_file)
+ return in_files
+
+ @PostProcessor._restrict_to(images=False, simulated=False)
+ def run(self, info):
+ entries = info.get('entries') or []
+ if not any(entries) or (self._only_multi_video and info['_type'] != 'multi_video'):
+ return [], info
+ elif traverse_obj(entries, (..., lambda k, v: k == 'requested_downloads' and len(v) > 1)):
+ raise PostProcessingError('Concatenation is not supported when downloading multiple separate formats')
+
+ in_files = traverse_obj(entries, (..., 'requested_downloads', 0, 'filepath')) or []
+ if len(in_files) < len(entries):
+ raise PostProcessingError('Aborting concatenation because some downloads failed')
+
+ exts = traverse_obj(entries, (..., 'requested_downloads', 0, 'ext'), (..., 'ext'))
+ ie_copy = collections.ChainMap({'ext': exts[0] if len(set(exts)) == 1 else 'mkv'},
+ info, self._downloader._playlist_infodict(info))
+ out_file = self._downloader.prepare_filename(ie_copy, 'pl_video')
+
+ files_to_delete = self.concat_files(in_files, out_file)
+
+ info['requested_downloads'] = [{
+ 'filepath': out_file,
+ 'ext': ie_copy['ext'],
+ }]
+ return files_to_delete, info
diff --git a/yt_dlp/postprocessor/metadataparser.py b/yt_dlp/postprocessor/metadataparser.py
new file mode 100644
index 0000000..1d60542
--- /dev/null
+++ b/yt_dlp/postprocessor/metadataparser.py
@@ -0,0 +1,125 @@
+import re
+
+from .common import PostProcessor
+from ..utils import Namespace, filter_dict, function_with_repr
+
+
+class MetadataParserPP(PostProcessor):
+ def __init__(self, downloader, actions):
+ super().__init__(downloader)
+ self._actions = []
+ for f in actions:
+ action, *args = f
+ assert action in self.Actions
+ self._actions.append(action(self, *args))
+
+ @classmethod
+ def validate_action(cls, action, *data):
+ """Each action can be:
+ (Actions.INTERPRET, from, to) OR
+ (Actions.REPLACE, field, search, replace)
+ """
+ if action not in cls.Actions:
+ raise ValueError(f'{action!r} is not a valid action')
+ action(cls, *data) # So this can raise error to validate
+
+ @staticmethod
+ def field_to_template(tmpl):
+ if re.match(r'[a-zA-Z_]+$', tmpl):
+ return f'%({tmpl})s'
+
+ from ..YoutubeDL import YoutubeDL
+ err = YoutubeDL.validate_outtmpl(tmpl)
+ if err:
+ raise err
+ return tmpl
+
+ @staticmethod
+ def format_to_regex(fmt):
+ r"""
+ Converts a string like
+ '%(title)s - %(artist)s'
+ to a regex like
+ '(?P<title>.+)\ \-\ (?P<artist>.+)'
+ """
+ if not re.search(r'%\(\w+\)s', fmt):
+ return fmt
+ lastpos = 0
+ regex = ''
+ # replace %(..)s with regex group and escape other string parts
+ for match in re.finditer(r'%\((\w+)\)s', fmt):
+ regex += re.escape(fmt[lastpos:match.start()])
+ regex += rf'(?P<{match.group(1)}>.+)'
+ lastpos = match.end()
+ if lastpos < len(fmt):
+ regex += re.escape(fmt[lastpos:])
+ return regex
+
+ def run(self, info):
+ for f in self._actions:
+ f(info)
+ return [], info
+
+ @function_with_repr
+ def interpretter(self, inp, out):
+ def f(info):
+ data_to_parse = self._downloader.evaluate_outtmpl(template, info)
+ self.write_debug(f'Searching for {out_re.pattern!r} in {template!r}')
+ match = out_re.search(data_to_parse)
+ if match is None:
+ self.to_screen(f'Could not interpret {inp!r} as {out!r}')
+ return
+ for attribute, value in filter_dict(match.groupdict()).items():
+ info[attribute] = value
+ self.to_screen(f'Parsed {attribute} from {template!r}: {value!r}')
+
+ template = self.field_to_template(inp)
+ out_re = re.compile(self.format_to_regex(out))
+ return f
+
+ @function_with_repr
+ def replacer(self, field, search, replace):
+ def f(info):
+ val = info.get(field)
+ if val is None:
+ self.to_screen(f'Video does not have a {field}')
+ return
+ elif not isinstance(val, str):
+ self.report_warning(f'Cannot replace in field {field} since it is a {type(val).__name__}')
+ return
+ self.write_debug(f'Replacing all {search!r} in {field} with {replace!r}')
+ info[field], n = search_re.subn(replace, val)
+ if n:
+ self.to_screen(f'Changed {field} to: {info[field]}')
+ else:
+ self.to_screen(f'Did not find {search!r} in {field}')
+
+ search_re = re.compile(search)
+ return f
+
+ Actions = Namespace(INTERPRET=interpretter, REPLACE=replacer)
+
+
+class MetadataFromFieldPP(MetadataParserPP):
+ @classmethod
+ def to_action(cls, f):
+ match = re.match(r'(?s)(?P<in>.*?)(?<!\\):(?P<out>.+)$', f)
+ if match is None:
+ raise ValueError(f'it should be FROM:TO, not {f!r}')
+ return (
+ cls.Actions.INTERPRET,
+ match.group('in').replace('\\:', ':'),
+ match.group('out'),
+ )
+
+ def __init__(self, downloader, formats):
+ super().__init__(downloader, [self.to_action(f) for f in formats])
+
+
+# Deprecated
+class MetadataFromTitlePP(MetadataParserPP):
+ def __init__(self, downloader, titleformat):
+ super().__init__(downloader, [(self.Actions.INTERPRET, 'title', titleformat)])
+ self.deprecation_warning(
+ 'yt_dlp.postprocessor.MetadataFromTitlePP is deprecated '
+ 'and may be removed in a future version. Use yt_dlp.postprocessor.MetadataFromFieldPP instead')
diff --git a/yt_dlp/postprocessor/modify_chapters.py b/yt_dlp/postprocessor/modify_chapters.py
new file mode 100644
index 0000000..f521986
--- /dev/null
+++ b/yt_dlp/postprocessor/modify_chapters.py
@@ -0,0 +1,336 @@
+import copy
+import heapq
+import os
+
+from .common import PostProcessor
+from .ffmpeg import FFmpegPostProcessor, FFmpegSubtitlesConvertorPP
+from .sponsorblock import SponsorBlockPP
+from ..utils import PostProcessingError, orderedSet, prepend_extension
+
+_TINY_CHAPTER_DURATION = 1
+DEFAULT_SPONSORBLOCK_CHAPTER_TITLE = '[SponsorBlock]: %(category_names)l'
+
+
+class ModifyChaptersPP(FFmpegPostProcessor):
+ def __init__(self, downloader, remove_chapters_patterns=None, remove_sponsor_segments=None, remove_ranges=None,
+ *, sponsorblock_chapter_title=DEFAULT_SPONSORBLOCK_CHAPTER_TITLE, force_keyframes=False):
+ FFmpegPostProcessor.__init__(self, downloader)
+ self._remove_chapters_patterns = set(remove_chapters_patterns or [])
+ self._remove_sponsor_segments = set(remove_sponsor_segments or []) - set(SponsorBlockPP.NON_SKIPPABLE_CATEGORIES.keys())
+ self._ranges_to_remove = set(remove_ranges or [])
+ self._sponsorblock_chapter_title = sponsorblock_chapter_title
+ self._force_keyframes = force_keyframes
+
+ @PostProcessor._restrict_to(images=False)
+ def run(self, info):
+ self._fixup_chapters(info)
+ # Chapters must be preserved intact when downloading multiple formats of the same video.
+ chapters, sponsor_chapters = self._mark_chapters_to_remove(
+ copy.deepcopy(info.get('chapters')) or [],
+ copy.deepcopy(info.get('sponsorblock_chapters')) or [])
+ if not chapters and not sponsor_chapters:
+ return [], info
+
+ real_duration = self._get_real_video_duration(info['filepath'])
+ if not chapters:
+ chapters = [{'start_time': 0, 'end_time': info.get('duration') or real_duration, 'title': info['title']}]
+
+ info['chapters'], cuts = self._remove_marked_arrange_sponsors(chapters + sponsor_chapters)
+ if not cuts:
+ return [], info
+ elif not info['chapters']:
+ self.report_warning('You have requested to remove the entire video, which is not possible')
+ return [], info
+
+ original_duration, info['duration'] = info.get('duration'), info['chapters'][-1]['end_time']
+ if self._duration_mismatch(real_duration, original_duration, 1):
+ if not self._duration_mismatch(real_duration, info['duration']):
+ self.to_screen(f'Skipping {self.pp_key()} since the video appears to be already cut')
+ return [], info
+ if not info.get('__real_download'):
+ raise PostProcessingError('Cannot cut video since the real and expected durations mismatch. '
+ 'Different chapters may have already been removed')
+ else:
+ self.write_debug('Expected and actual durations mismatch')
+
+ concat_opts = self._make_concat_opts(cuts, real_duration)
+ self.write_debug('Concat spec = %s' % ', '.join(f'{c.get("inpoint", 0.0)}-{c.get("outpoint", "inf")}' for c in concat_opts))
+
+ def remove_chapters(file, is_sub):
+ return file, self.remove_chapters(file, cuts, concat_opts, self._force_keyframes and not is_sub)
+
+ in_out_files = [remove_chapters(info['filepath'], False)]
+ in_out_files.extend(remove_chapters(in_file, True) for in_file in self._get_supported_subs(info))
+
+ # Renaming should only happen after all files are processed
+ files_to_remove = []
+ for in_file, out_file in in_out_files:
+ mtime = os.stat(in_file).st_mtime
+ uncut_file = prepend_extension(in_file, 'uncut')
+ os.replace(in_file, uncut_file)
+ os.replace(out_file, in_file)
+ self.try_utime(in_file, mtime, mtime)
+ files_to_remove.append(uncut_file)
+
+ return files_to_remove, info
+
+ def _mark_chapters_to_remove(self, chapters, sponsor_chapters):
+ if self._remove_chapters_patterns:
+ warn_no_chapter_to_remove = True
+ if not chapters:
+ self.to_screen('Chapter information is unavailable')
+ warn_no_chapter_to_remove = False
+ for c in chapters:
+ if any(regex.search(c['title']) for regex in self._remove_chapters_patterns):
+ c['remove'] = True
+ warn_no_chapter_to_remove = False
+ if warn_no_chapter_to_remove:
+ self.to_screen('There are no chapters matching the regex')
+
+ if self._remove_sponsor_segments:
+ warn_no_chapter_to_remove = True
+ if not sponsor_chapters:
+ self.to_screen('SponsorBlock information is unavailable')
+ warn_no_chapter_to_remove = False
+ for c in sponsor_chapters:
+ if c['category'] in self._remove_sponsor_segments:
+ c['remove'] = True
+ warn_no_chapter_to_remove = False
+ if warn_no_chapter_to_remove:
+ self.to_screen('There are no matching SponsorBlock chapters')
+
+ sponsor_chapters.extend({
+ 'start_time': start,
+ 'end_time': end,
+ 'category': 'manually_removed',
+ '_categories': [('manually_removed', start, end, 'Manually removed')],
+ 'remove': True,
+ } for start, end in self._ranges_to_remove)
+
+ return chapters, sponsor_chapters
+
+ def _get_supported_subs(self, info):
+ for sub in (info.get('requested_subtitles') or {}).values():
+ sub_file = sub.get('filepath')
+ # The file might have been removed by --embed-subs
+ if not sub_file or not os.path.exists(sub_file):
+ continue
+ ext = sub['ext']
+ if ext not in FFmpegSubtitlesConvertorPP.SUPPORTED_EXTS:
+ self.report_warning(f'Cannot remove chapters from external {ext} subtitles; "{sub_file}" is now out of sync')
+ continue
+ # TODO: create __real_download for subs?
+ yield sub_file
+
+ def _remove_marked_arrange_sponsors(self, chapters):
+ # Store cuts separately, since adjacent and overlapping cuts must be merged.
+ cuts = []
+
+ def append_cut(c):
+ assert 'remove' in c, 'Not a cut is appended to cuts'
+ last_to_cut = cuts[-1] if cuts else None
+ if last_to_cut and last_to_cut['end_time'] >= c['start_time']:
+ last_to_cut['end_time'] = max(last_to_cut['end_time'], c['end_time'])
+ else:
+ cuts.append(c)
+ return len(cuts) - 1
+
+ def excess_duration(c):
+ # Cuts that are completely within the chapter reduce chapters' duration.
+ # Since cuts can overlap, excess duration may be less that the sum of cuts' durations.
+ # To avoid that, chapter stores the index to the fist cut within the chapter,
+ # instead of storing excess duration. append_cut ensures that subsequent cuts (if any)
+ # will be merged with previous ones (if necessary).
+ cut_idx, excess = c.pop('cut_idx', len(cuts)), 0
+ while cut_idx < len(cuts):
+ cut = cuts[cut_idx]
+ if cut['start_time'] >= c['end_time']:
+ break
+ if cut['end_time'] > c['start_time']:
+ excess += min(cut['end_time'], c['end_time'])
+ excess -= max(cut['start_time'], c['start_time'])
+ cut_idx += 1
+ return excess
+
+ new_chapters = []
+
+ def append_chapter(c):
+ assert 'remove' not in c, 'Cut is appended to chapters'
+ length = c['end_time'] - c['start_time'] - excess_duration(c)
+ # Chapter is completely covered by cuts or sponsors.
+ if length <= 0:
+ return
+ start = new_chapters[-1]['end_time'] if new_chapters else 0
+ c.update(start_time=start, end_time=start + length)
+ new_chapters.append(c)
+
+ # Turn into a priority queue, index is a tie breaker.
+ # Plain stack sorted by start_time is not enough: after splitting the chapter,
+ # the part returned to the stack is not guaranteed to have start_time
+ # less than or equal to the that of the stack's head.
+ chapters = [(c['start_time'], i, c) for i, c in enumerate(chapters)]
+ heapq.heapify(chapters)
+
+ _, cur_i, cur_chapter = heapq.heappop(chapters)
+ while chapters:
+ _, i, c = heapq.heappop(chapters)
+ # Non-overlapping chapters or cuts can be appended directly. However,
+ # adjacent non-overlapping cuts must be merged, which is handled by append_cut.
+ if cur_chapter['end_time'] <= c['start_time']:
+ (append_chapter if 'remove' not in cur_chapter else append_cut)(cur_chapter)
+ cur_i, cur_chapter = i, c
+ continue
+
+ # Eight possibilities for overlapping chapters: (cut, cut), (cut, sponsor),
+ # (cut, normal), (sponsor, cut), (normal, cut), (sponsor, sponsor),
+ # (sponsor, normal), and (normal, sponsor). There is no (normal, normal):
+ # normal chapters are assumed not to overlap.
+ if 'remove' in cur_chapter:
+ # (cut, cut): adjust end_time.
+ if 'remove' in c:
+ cur_chapter['end_time'] = max(cur_chapter['end_time'], c['end_time'])
+ # (cut, sponsor/normal): chop the beginning of the later chapter
+ # (if it's not completely hidden by the cut). Push to the priority queue
+ # to restore sorting by start_time: with beginning chopped, c may actually
+ # start later than the remaining chapters from the queue.
+ elif cur_chapter['end_time'] < c['end_time']:
+ c['start_time'] = cur_chapter['end_time']
+ c['_was_cut'] = True
+ heapq.heappush(chapters, (c['start_time'], i, c))
+ # (sponsor/normal, cut).
+ elif 'remove' in c:
+ cur_chapter['_was_cut'] = True
+ # Chop the end of the current chapter if the cut is not contained within it.
+ # Chopping the end doesn't break start_time sorting, no PQ push is necessary.
+ if cur_chapter['end_time'] <= c['end_time']:
+ cur_chapter['end_time'] = c['start_time']
+ append_chapter(cur_chapter)
+ cur_i, cur_chapter = i, c
+ continue
+ # Current chapter contains the cut within it. If the current chapter is
+ # a sponsor chapter, check whether the categories before and after the cut differ.
+ if '_categories' in cur_chapter:
+ after_c = dict(cur_chapter, start_time=c['end_time'], _categories=[])
+ cur_cats = []
+ for cat_start_end in cur_chapter['_categories']:
+ if cat_start_end[1] < c['start_time']:
+ cur_cats.append(cat_start_end)
+ if cat_start_end[2] > c['end_time']:
+ after_c['_categories'].append(cat_start_end)
+ cur_chapter['_categories'] = cur_cats
+ if cur_chapter['_categories'] != after_c['_categories']:
+ # Categories before and after the cut differ: push the after part to PQ.
+ heapq.heappush(chapters, (after_c['start_time'], cur_i, after_c))
+ cur_chapter['end_time'] = c['start_time']
+ append_chapter(cur_chapter)
+ cur_i, cur_chapter = i, c
+ continue
+ # Either sponsor categories before and after the cut are the same or
+ # we're dealing with a normal chapter. Just register an outstanding cut:
+ # subsequent append_chapter will reduce the duration.
+ cur_chapter.setdefault('cut_idx', append_cut(c))
+ # (sponsor, normal): if a normal chapter is not completely overlapped,
+ # chop the beginning of it and push it to PQ.
+ elif '_categories' in cur_chapter and '_categories' not in c:
+ if cur_chapter['end_time'] < c['end_time']:
+ c['start_time'] = cur_chapter['end_time']
+ c['_was_cut'] = True
+ heapq.heappush(chapters, (c['start_time'], i, c))
+ # (normal, sponsor) and (sponsor, sponsor)
+ else:
+ assert '_categories' in c, 'Normal chapters overlap'
+ cur_chapter['_was_cut'] = True
+ c['_was_cut'] = True
+ # Push the part after the sponsor to PQ.
+ if cur_chapter['end_time'] > c['end_time']:
+ # deepcopy to make categories in after_c and cur_chapter/c refer to different lists.
+ after_c = dict(copy.deepcopy(cur_chapter), start_time=c['end_time'])
+ heapq.heappush(chapters, (after_c['start_time'], cur_i, after_c))
+ # Push the part after the overlap to PQ.
+ elif c['end_time'] > cur_chapter['end_time']:
+ after_cur = dict(copy.deepcopy(c), start_time=cur_chapter['end_time'])
+ heapq.heappush(chapters, (after_cur['start_time'], cur_i, after_cur))
+ c['end_time'] = cur_chapter['end_time']
+ # (sponsor, sponsor): merge categories in the overlap.
+ if '_categories' in cur_chapter:
+ c['_categories'] = cur_chapter['_categories'] + c['_categories']
+ # Inherit the cuts that the current chapter has accumulated within it.
+ if 'cut_idx' in cur_chapter:
+ c['cut_idx'] = cur_chapter['cut_idx']
+ cur_chapter['end_time'] = c['start_time']
+ append_chapter(cur_chapter)
+ cur_i, cur_chapter = i, c
+ (append_chapter if 'remove' not in cur_chapter else append_cut)(cur_chapter)
+ return self._remove_tiny_rename_sponsors(new_chapters), cuts
+
+ def _remove_tiny_rename_sponsors(self, chapters):
+ new_chapters = []
+ for i, c in enumerate(chapters):
+ # Merge with the previous/next if the chapter is tiny.
+ # Only tiny chapters resulting from a cut can be skipped.
+ # Chapters that were already tiny in the original list will be preserved.
+ if (('_was_cut' in c or '_categories' in c)
+ and c['end_time'] - c['start_time'] < _TINY_CHAPTER_DURATION):
+ if not new_chapters:
+ # Prepend tiny chapter to the next one if possible.
+ if i < len(chapters) - 1:
+ chapters[i + 1]['start_time'] = c['start_time']
+ continue
+ else:
+ old_c = new_chapters[-1]
+ if i < len(chapters) - 1:
+ next_c = chapters[i + 1]
+ # Not a typo: key names in old_c and next_c are really different.
+ prev_is_sponsor = 'categories' in old_c
+ next_is_sponsor = '_categories' in next_c
+ # Preferentially prepend tiny normals to normals and sponsors to sponsors.
+ if (('_categories' not in c and prev_is_sponsor and not next_is_sponsor)
+ or ('_categories' in c and not prev_is_sponsor and next_is_sponsor)):
+ next_c['start_time'] = c['start_time']
+ continue
+ old_c['end_time'] = c['end_time']
+ continue
+
+ c.pop('_was_cut', None)
+ cats = c.pop('_categories', None)
+ if cats:
+ category, _, _, category_name = min(cats, key=lambda c: c[2] - c[1])
+ c.update({
+ 'category': category,
+ 'categories': orderedSet(x[0] for x in cats),
+ 'name': category_name,
+ 'category_names': orderedSet(x[3] for x in cats),
+ })
+ c['title'] = self._downloader.evaluate_outtmpl(self._sponsorblock_chapter_title, c.copy())
+ # Merge identically named sponsors.
+ if (new_chapters and 'categories' in new_chapters[-1]
+ and new_chapters[-1]['title'] == c['title']):
+ new_chapters[-1]['end_time'] = c['end_time']
+ continue
+ new_chapters.append(c)
+ return new_chapters
+
+ def remove_chapters(self, filename, ranges_to_cut, concat_opts, force_keyframes=False):
+ in_file = filename
+ out_file = prepend_extension(in_file, 'temp')
+ if force_keyframes:
+ in_file = self.force_keyframes(in_file, (t for c in ranges_to_cut for t in (c['start_time'], c['end_time'])))
+ self.to_screen(f'Removing chapters from {filename}')
+ self.concat_files([in_file] * len(concat_opts), out_file, concat_opts)
+ if in_file != filename:
+ self._delete_downloaded_files(in_file, msg=None)
+ return out_file
+
+ @staticmethod
+ def _make_concat_opts(chapters_to_remove, duration):
+ opts = [{}]
+ for s in chapters_to_remove:
+ # Do not create 0 duration chunk at the beginning.
+ if s['start_time'] == 0:
+ opts[-1]['inpoint'] = f'{s["end_time"]:.6f}'
+ continue
+ opts[-1]['outpoint'] = f'{s["start_time"]:.6f}'
+ # Do not create 0 duration chunk at the end.
+ if s['end_time'] < duration:
+ opts.append({'inpoint': f'{s["end_time"]:.6f}'})
+ return opts
diff --git a/yt_dlp/postprocessor/movefilesafterdownload.py b/yt_dlp/postprocessor/movefilesafterdownload.py
new file mode 100644
index 0000000..23b0924
--- /dev/null
+++ b/yt_dlp/postprocessor/movefilesafterdownload.py
@@ -0,0 +1,53 @@
+import os
+
+from .common import PostProcessor
+from ..compat import shutil
+from ..utils import (
+ PostProcessingError,
+ decodeFilename,
+ encodeFilename,
+ make_dir,
+)
+
+
+class MoveFilesAfterDownloadPP(PostProcessor):
+
+ def __init__(self, downloader=None, downloaded=True):
+ PostProcessor.__init__(self, downloader)
+ self._downloaded = downloaded
+
+ @classmethod
+ def pp_key(cls):
+ return 'MoveFiles'
+
+ def run(self, info):
+ dl_path, dl_name = os.path.split(encodeFilename(info['filepath']))
+ finaldir = info.get('__finaldir', dl_path)
+ finalpath = os.path.join(finaldir, dl_name)
+ if self._downloaded:
+ info['__files_to_move'][info['filepath']] = decodeFilename(finalpath)
+
+ make_newfilename = lambda old: decodeFilename(os.path.join(finaldir, os.path.basename(encodeFilename(old))))
+ for oldfile, newfile in info['__files_to_move'].items():
+ if not newfile:
+ newfile = make_newfilename(oldfile)
+ if os.path.abspath(encodeFilename(oldfile)) == os.path.abspath(encodeFilename(newfile)):
+ continue
+ if not os.path.exists(encodeFilename(oldfile)):
+ self.report_warning('File "%s" cannot be found' % oldfile)
+ continue
+ if os.path.exists(encodeFilename(newfile)):
+ if self.get_param('overwrites', True):
+ self.report_warning('Replacing existing file "%s"' % newfile)
+ os.remove(encodeFilename(newfile))
+ else:
+ self.report_warning(
+ 'Cannot move file "%s" out of temporary directory since "%s" already exists. '
+ % (oldfile, newfile))
+ continue
+ make_dir(newfile, PostProcessingError)
+ self.to_screen(f'Moving file "{oldfile}" to "{newfile}"')
+ shutil.move(oldfile, newfile) # os.rename cannot move between volumes
+
+ info['filepath'] = finalpath
+ return [], info
diff --git a/yt_dlp/postprocessor/sponskrub.py b/yt_dlp/postprocessor/sponskrub.py
new file mode 100644
index 0000000..ff50d5b
--- /dev/null
+++ b/yt_dlp/postprocessor/sponskrub.py
@@ -0,0 +1,98 @@
+import os
+import shlex
+import subprocess
+
+from .common import PostProcessor
+from ..utils import (
+ Popen,
+ PostProcessingError,
+ check_executable,
+ cli_option,
+ encodeArgument,
+ encodeFilename,
+ prepend_extension,
+ shell_quote,
+ str_or_none,
+)
+
+
+# Deprecated in favor of the native implementation
+class SponSkrubPP(PostProcessor):
+ _temp_ext = 'spons'
+ _exe_name = 'sponskrub'
+
+ def __init__(self, downloader, path='', args=None, ignoreerror=False, cut=False, force=False, _from_cli=False):
+ PostProcessor.__init__(self, downloader)
+ self.force = force
+ self.cutout = cut
+ self.args = str_or_none(args) or '' # For backward compatibility
+ self.path = self.get_exe(path)
+
+ if not _from_cli:
+ self.deprecation_warning(
+ 'yt_dlp.postprocessor.SponSkrubPP support is deprecated and may be removed in a future version. '
+ 'Use yt_dlp.postprocessor.SponsorBlock and yt_dlp.postprocessor.ModifyChaptersPP instead')
+
+ if not ignoreerror and self.path is None:
+ if path:
+ raise PostProcessingError('sponskrub not found in "%s"' % path)
+ else:
+ raise PostProcessingError('sponskrub not found. Please install or provide the path using --sponskrub-path')
+
+ def get_exe(self, path=''):
+ if not path or not check_executable(path, ['-h']):
+ path = os.path.join(path, self._exe_name)
+ if not check_executable(path, ['-h']):
+ return None
+ return path
+
+ @PostProcessor._restrict_to(images=False)
+ def run(self, information):
+ if self.path is None:
+ return [], information
+
+ filename = information['filepath']
+ if not os.path.exists(encodeFilename(filename)): # no download
+ return [], information
+
+ if information['extractor_key'].lower() != 'youtube':
+ self.to_screen('Skipping sponskrub since it is not a YouTube video')
+ return [], information
+ if self.cutout and not self.force and not information.get('__real_download', False):
+ self.report_warning(
+ 'Skipping sponskrub since the video was already downloaded. '
+ 'Use --sponskrub-force to run sponskrub anyway')
+ return [], information
+
+ self.to_screen('Trying to %s sponsor sections' % ('remove' if self.cutout else 'mark'))
+ if self.cutout:
+ self.report_warning('Cutting out sponsor segments will cause the subtitles to go out of sync.')
+ if not information.get('__real_download', False):
+ self.report_warning('If sponskrub is run multiple times, unintended parts of the video could be cut out.')
+
+ temp_filename = prepend_extension(filename, self._temp_ext)
+ if os.path.exists(encodeFilename(temp_filename)):
+ os.remove(encodeFilename(temp_filename))
+
+ cmd = [self.path]
+ if not self.cutout:
+ cmd += ['-chapter']
+ cmd += cli_option(self._downloader.params, '-proxy', 'proxy')
+ cmd += shlex.split(self.args) # For backward compatibility
+ cmd += self._configuration_args(self._exe_name, use_compat=False)
+ cmd += ['--', information['id'], filename, temp_filename]
+ cmd = [encodeArgument(i) for i in cmd]
+
+ self.write_debug('sponskrub command line: %s' % shell_quote(cmd))
+ stdout, _, returncode = Popen.run(cmd, text=True, stdout=None if self.get_param('verbose') else subprocess.PIPE)
+
+ if not returncode:
+ os.replace(temp_filename, filename)
+ self.to_screen('Sponsor sections have been %s' % ('removed' if self.cutout else 'marked'))
+ elif returncode == 3:
+ self.to_screen('No segments in the SponsorBlock database')
+ else:
+ raise PostProcessingError(
+ stdout.strip().splitlines()[0 if stdout.strip().lower().startswith('unrecognised') else -1]
+ or f'sponskrub failed with error code {returncode}')
+ return [], information
diff --git a/yt_dlp/postprocessor/sponsorblock.py b/yt_dlp/postprocessor/sponsorblock.py
new file mode 100644
index 0000000..6ba87cd
--- /dev/null
+++ b/yt_dlp/postprocessor/sponsorblock.py
@@ -0,0 +1,104 @@
+import hashlib
+import json
+import re
+import urllib.parse
+
+from .ffmpeg import FFmpegPostProcessor
+
+
+class SponsorBlockPP(FFmpegPostProcessor):
+ # https://wiki.sponsor.ajay.app/w/Types
+ EXTRACTORS = {
+ 'Youtube': 'YouTube',
+ }
+ POI_CATEGORIES = {
+ 'poi_highlight': 'Highlight',
+ }
+ NON_SKIPPABLE_CATEGORIES = {
+ **POI_CATEGORIES,
+ 'chapter': 'Chapter',
+ }
+ CATEGORIES = {
+ 'sponsor': 'Sponsor',
+ 'intro': 'Intermission/Intro Animation',
+ 'outro': 'Endcards/Credits',
+ 'selfpromo': 'Unpaid/Self Promotion',
+ 'preview': 'Preview/Recap',
+ 'filler': 'Filler Tangent',
+ 'interaction': 'Interaction Reminder',
+ 'music_offtopic': 'Non-Music Section',
+ **NON_SKIPPABLE_CATEGORIES
+ }
+
+ def __init__(self, downloader, categories=None, api='https://sponsor.ajay.app'):
+ FFmpegPostProcessor.__init__(self, downloader)
+ self._categories = tuple(categories or self.CATEGORIES.keys())
+ self._API_URL = api if re.match('^https?://', api) else 'https://' + api
+
+ def run(self, info):
+ extractor = info['extractor_key']
+ if extractor not in self.EXTRACTORS:
+ self.to_screen(f'SponsorBlock is not supported for {extractor}')
+ return [], info
+
+ self.to_screen('Fetching SponsorBlock segments')
+ info['sponsorblock_chapters'] = self._get_sponsor_chapters(info, info.get('duration'))
+ return [], info
+
+ def _get_sponsor_chapters(self, info, duration):
+ segments = self._get_sponsor_segments(info['id'], self.EXTRACTORS[info['extractor_key']])
+
+ def duration_filter(s):
+ start_end = s['segment']
+ # Ignore entire video segments (https://wiki.sponsor.ajay.app/w/Types).
+ if start_end == (0, 0):
+ return False
+ # Ignore milliseconds difference at the start.
+ if start_end[0] <= 1:
+ start_end[0] = 0
+ # Make POI chapters 1 sec so that we can properly mark them
+ if s['category'] in self.POI_CATEGORIES.keys():
+ start_end[1] += 1
+ # Ignore milliseconds difference at the end.
+ # Never allow the segment to exceed the video.
+ if duration and duration - start_end[1] <= 1:
+ start_end[1] = duration
+ # SponsorBlock duration may be absent or it may deviate from the real one.
+ diff = abs(duration - s['videoDuration']) if s['videoDuration'] else 0
+ return diff < 1 or (diff < 5 and diff / (start_end[1] - start_end[0]) < 0.05)
+
+ duration_match = [s for s in segments if duration_filter(s)]
+ if len(duration_match) != len(segments):
+ self.report_warning('Some SponsorBlock segments are from a video of different duration, maybe from an old version of this video')
+
+ def to_chapter(s):
+ (start, end), cat = s['segment'], s['category']
+ title = s['description'] if cat == 'chapter' else self.CATEGORIES[cat]
+ return {
+ 'start_time': start,
+ 'end_time': end,
+ 'category': cat,
+ 'title': title,
+ 'type': s['actionType'],
+ '_categories': [(cat, start, end, title)],
+ }
+
+ sponsor_chapters = [to_chapter(s) for s in duration_match]
+ if not sponsor_chapters:
+ self.to_screen('No matching segments were found in the SponsorBlock database')
+ else:
+ self.to_screen(f'Found {len(sponsor_chapters)} segments in the SponsorBlock database')
+ return sponsor_chapters
+
+ def _get_sponsor_segments(self, video_id, service):
+ hash = hashlib.sha256(video_id.encode('ascii')).hexdigest()
+ # SponsorBlock API recommends using first 4 hash characters.
+ url = f'{self._API_URL}/api/skipSegments/{hash[:4]}?' + urllib.parse.urlencode({
+ 'service': service,
+ 'categories': json.dumps(self._categories),
+ 'actionTypes': json.dumps(['skip', 'poi', 'chapter'])
+ })
+ for d in self._download_json(url) or []:
+ if d['videoID'] == video_id:
+ return d['segments']
+ return []
diff --git a/yt_dlp/postprocessor/xattrpp.py b/yt_dlp/postprocessor/xattrpp.py
new file mode 100644
index 0000000..f822eff
--- /dev/null
+++ b/yt_dlp/postprocessor/xattrpp.py
@@ -0,0 +1,63 @@
+import os
+
+from .common import PostProcessor
+from ..compat import compat_os_name
+from ..utils import (
+ PostProcessingError,
+ XAttrMetadataError,
+ XAttrUnavailableError,
+ hyphenate_date,
+ write_xattr,
+)
+
+
+class XAttrMetadataPP(PostProcessor):
+ """Set extended attributes on downloaded file (if xattr support is found)
+
+ More info about extended attributes for media:
+ http://freedesktop.org/wiki/CommonExtendedAttributes/
+ http://www.freedesktop.org/wiki/PhreedomDraft/
+ http://dublincore.org/documents/usageguide/elements.shtml
+
+ TODO:
+ * capture youtube keywords and put them in 'user.dublincore.subject' (comma-separated)
+ * figure out which xattrs can be used for 'duration', 'thumbnail', 'resolution'
+ """
+
+ XATTR_MAPPING = {
+ 'user.xdg.referrer.url': 'webpage_url',
+ # 'user.xdg.comment': 'description',
+ 'user.dublincore.title': 'title',
+ 'user.dublincore.date': 'upload_date',
+ 'user.dublincore.description': 'description',
+ 'user.dublincore.contributor': 'uploader',
+ 'user.dublincore.format': 'format',
+ }
+
+ def run(self, info):
+ mtime = os.stat(info['filepath']).st_mtime
+ self.to_screen('Writing metadata to file\'s xattrs')
+ try:
+ for xattrname, infoname in self.XATTR_MAPPING.items():
+ value = info.get(infoname)
+ if value:
+ if infoname == 'upload_date':
+ value = hyphenate_date(value)
+ write_xattr(info['filepath'], xattrname, value.encode())
+
+ except XAttrUnavailableError as e:
+ raise PostProcessingError(str(e))
+ except XAttrMetadataError as e:
+ if e.reason == 'NO_SPACE':
+ self.report_warning(
+ 'There\'s no disk space left, disk quota exceeded or filesystem xattr limit exceeded. '
+ 'Some extended attributes are not written')
+ elif e.reason == 'VALUE_TOO_LONG':
+ self.report_warning('Unable to write extended attributes due to too long values.')
+ else:
+ tip = ('You need to use NTFS' if compat_os_name == 'nt'
+ else 'You may have to enable them in your "/etc/fstab"')
+ raise PostProcessingError(f'This filesystem doesn\'t support extended attributes. {tip}')
+
+ self.try_utime(info['filepath'], mtime, mtime)
+ return [], info
diff --git a/yt_dlp/socks.py b/yt_dlp/socks.py
new file mode 100644
index 0000000..b4957ac
--- /dev/null
+++ b/yt_dlp/socks.py
@@ -0,0 +1,274 @@
+# Public Domain SOCKS proxy protocol implementation
+# Adapted from https://gist.github.com/bluec0re/cafd3764412967417fd3
+# References:
+# SOCKS4 protocol http://www.openssh.com/txt/socks4.protocol
+# SOCKS4A protocol http://www.openssh.com/txt/socks4a.protocol
+# SOCKS5 protocol https://tools.ietf.org/html/rfc1928
+# SOCKS5 username/password authentication https://tools.ietf.org/html/rfc1929
+
+import collections
+import socket
+import struct
+
+from .compat import compat_ord
+
+__author__ = 'Timo Schmid <coding@timoschmid.de>'
+
+SOCKS4_VERSION = 4
+SOCKS4_REPLY_VERSION = 0x00
+# Excerpt from SOCKS4A protocol:
+# if the client cannot resolve the destination host's domain name to find its
+# IP address, it should set the first three bytes of DSTIP to NULL and the last
+# byte to a non-zero value.
+SOCKS4_DEFAULT_DSTIP = struct.pack('!BBBB', 0, 0, 0, 0xFF)
+
+SOCKS5_VERSION = 5
+SOCKS5_USER_AUTH_VERSION = 0x01
+SOCKS5_USER_AUTH_SUCCESS = 0x00
+
+
+class Socks4Command:
+ CMD_CONNECT = 0x01
+ CMD_BIND = 0x02
+
+
+class Socks5Command(Socks4Command):
+ CMD_UDP_ASSOCIATE = 0x03
+
+
+class Socks5Auth:
+ AUTH_NONE = 0x00
+ AUTH_GSSAPI = 0x01
+ AUTH_USER_PASS = 0x02
+ AUTH_NO_ACCEPTABLE = 0xFF # For server response
+
+
+class Socks5AddressType:
+ ATYP_IPV4 = 0x01
+ ATYP_DOMAINNAME = 0x03
+ ATYP_IPV6 = 0x04
+
+
+class ProxyError(OSError):
+ ERR_SUCCESS = 0x00
+
+ def __init__(self, code=None, msg=None):
+ if code is not None and msg is None:
+ msg = self.CODES.get(code) or 'unknown error'
+ super().__init__(code, msg)
+
+
+class InvalidVersionError(ProxyError):
+ def __init__(self, expected_version, got_version):
+ msg = ('Invalid response version from server. Expected {:02x} got '
+ '{:02x}'.format(expected_version, got_version))
+ super().__init__(0, msg)
+
+
+class Socks4Error(ProxyError):
+ ERR_SUCCESS = 90
+
+ CODES = {
+ 91: 'request rejected or failed',
+ 92: 'request rejected because SOCKS server cannot connect to identd on the client',
+ 93: 'request rejected because the client program and identd report different user-ids'
+ }
+
+
+class Socks5Error(ProxyError):
+ ERR_GENERAL_FAILURE = 0x01
+
+ CODES = {
+ 0x01: 'general SOCKS server failure',
+ 0x02: 'connection not allowed by ruleset',
+ 0x03: 'Network unreachable',
+ 0x04: 'Host unreachable',
+ 0x05: 'Connection refused',
+ 0x06: 'TTL expired',
+ 0x07: 'Command not supported',
+ 0x08: 'Address type not supported',
+ 0xFE: 'unknown username or invalid password',
+ 0xFF: 'all offered authentication methods were rejected'
+ }
+
+
+class ProxyType:
+ SOCKS4 = 0
+ SOCKS4A = 1
+ SOCKS5 = 2
+
+
+Proxy = collections.namedtuple('Proxy', (
+ 'type', 'host', 'port', 'username', 'password', 'remote_dns'))
+
+
+class sockssocket(socket.socket):
+ def __init__(self, *args, **kwargs):
+ self._proxy = None
+ super().__init__(*args, **kwargs)
+
+ def setproxy(self, proxytype, addr, port, rdns=True, username=None, password=None):
+ assert proxytype in (ProxyType.SOCKS4, ProxyType.SOCKS4A, ProxyType.SOCKS5)
+
+ self._proxy = Proxy(proxytype, addr, port, username, password, rdns)
+
+ def recvall(self, cnt):
+ data = b''
+ while len(data) < cnt:
+ cur = self.recv(cnt - len(data))
+ if not cur:
+ raise EOFError(f'{cnt - len(data)} bytes missing')
+ data += cur
+ return data
+
+ def _recv_bytes(self, cnt):
+ data = self.recvall(cnt)
+ return struct.unpack(f'!{cnt}B', data)
+
+ @staticmethod
+ def _len_and_data(data):
+ return struct.pack('!B', len(data)) + data
+
+ def _check_response_version(self, expected_version, got_version):
+ if got_version != expected_version:
+ self.close()
+ raise InvalidVersionError(expected_version, got_version)
+
+ def _resolve_address(self, destaddr, default, use_remote_dns, family=None):
+ for f in (family,) if family else (socket.AF_INET, socket.AF_INET6):
+ try:
+ return f, socket.inet_pton(f, destaddr)
+ except OSError:
+ continue
+
+ if use_remote_dns and self._proxy.remote_dns:
+ return 0, default
+ else:
+ res = socket.getaddrinfo(destaddr, None, family=family or 0)
+ f, _, _, _, ipaddr = res[0]
+ return f, socket.inet_pton(f, ipaddr[0])
+
+ def _setup_socks4(self, address, is_4a=False):
+ destaddr, port = address
+
+ _, ipaddr = self._resolve_address(destaddr, SOCKS4_DEFAULT_DSTIP, use_remote_dns=is_4a, family=socket.AF_INET)
+
+ packet = struct.pack('!BBH', SOCKS4_VERSION, Socks4Command.CMD_CONNECT, port) + ipaddr
+
+ username = (self._proxy.username or '').encode()
+ packet += username + b'\x00'
+
+ if is_4a and self._proxy.remote_dns and ipaddr == SOCKS4_DEFAULT_DSTIP:
+ packet += destaddr.encode() + b'\x00'
+
+ self.sendall(packet)
+
+ version, resp_code, dstport, dsthost = struct.unpack('!BBHI', self.recvall(8))
+
+ self._check_response_version(SOCKS4_REPLY_VERSION, version)
+
+ if resp_code != Socks4Error.ERR_SUCCESS:
+ self.close()
+ raise Socks4Error(resp_code)
+
+ return (dsthost, dstport)
+
+ def _setup_socks4a(self, address):
+ self._setup_socks4(address, is_4a=True)
+
+ def _socks5_auth(self):
+ packet = struct.pack('!B', SOCKS5_VERSION)
+
+ auth_methods = [Socks5Auth.AUTH_NONE]
+ if self._proxy.username and self._proxy.password:
+ auth_methods.append(Socks5Auth.AUTH_USER_PASS)
+
+ packet += struct.pack('!B', len(auth_methods))
+ packet += struct.pack(f'!{len(auth_methods)}B', *auth_methods)
+
+ self.sendall(packet)
+
+ version, method = self._recv_bytes(2)
+
+ self._check_response_version(SOCKS5_VERSION, version)
+
+ if method == Socks5Auth.AUTH_NO_ACCEPTABLE or (
+ method == Socks5Auth.AUTH_USER_PASS and (not self._proxy.username or not self._proxy.password)):
+ self.close()
+ raise Socks5Error(Socks5Auth.AUTH_NO_ACCEPTABLE)
+
+ if method == Socks5Auth.AUTH_USER_PASS:
+ username = self._proxy.username.encode()
+ password = self._proxy.password.encode()
+ packet = struct.pack('!B', SOCKS5_USER_AUTH_VERSION)
+ packet += self._len_and_data(username) + self._len_and_data(password)
+ self.sendall(packet)
+
+ version, status = self._recv_bytes(2)
+
+ self._check_response_version(SOCKS5_USER_AUTH_VERSION, version)
+
+ if status != SOCKS5_USER_AUTH_SUCCESS:
+ self.close()
+ raise Socks5Error(Socks5Error.ERR_GENERAL_FAILURE)
+
+ def _setup_socks5(self, address):
+ destaddr, port = address
+
+ family, ipaddr = self._resolve_address(destaddr, None, use_remote_dns=True)
+
+ self._socks5_auth()
+
+ reserved = 0
+ packet = struct.pack('!BBB', SOCKS5_VERSION, Socks5Command.CMD_CONNECT, reserved)
+ if ipaddr is None:
+ destaddr = destaddr.encode()
+ packet += struct.pack('!B', Socks5AddressType.ATYP_DOMAINNAME)
+ packet += self._len_and_data(destaddr)
+ elif family == socket.AF_INET:
+ packet += struct.pack('!B', Socks5AddressType.ATYP_IPV4) + ipaddr
+ elif family == socket.AF_INET6:
+ packet += struct.pack('!B', Socks5AddressType.ATYP_IPV6) + ipaddr
+ packet += struct.pack('!H', port)
+
+ self.sendall(packet)
+
+ version, status, reserved, atype = self._recv_bytes(4)
+
+ self._check_response_version(SOCKS5_VERSION, version)
+
+ if status != Socks5Error.ERR_SUCCESS:
+ self.close()
+ raise Socks5Error(status)
+
+ if atype == Socks5AddressType.ATYP_IPV4:
+ destaddr = self.recvall(4)
+ elif atype == Socks5AddressType.ATYP_DOMAINNAME:
+ alen = compat_ord(self.recv(1))
+ destaddr = self.recvall(alen)
+ elif atype == Socks5AddressType.ATYP_IPV6:
+ destaddr = self.recvall(16)
+ destport = struct.unpack('!H', self.recvall(2))[0]
+
+ return (destaddr, destport)
+
+ def _make_proxy(self, connect_func, address):
+ if not self._proxy:
+ return connect_func(self, address)
+
+ result = connect_func(self, (self._proxy.host, self._proxy.port))
+ if result != 0 and result is not None:
+ return result
+ setup_funcs = {
+ ProxyType.SOCKS4: self._setup_socks4,
+ ProxyType.SOCKS4A: self._setup_socks4a,
+ ProxyType.SOCKS5: self._setup_socks5,
+ }
+ setup_funcs[self._proxy.type](address)
+ return result
+
+ def connect(self, address):
+ self._make_proxy(socket.socket.connect, address)
+
+ def connect_ex(self, address):
+ return self._make_proxy(socket.socket.connect_ex, address)
diff --git a/yt_dlp/update.py b/yt_dlp/update.py
new file mode 100644
index 0000000..db50cfa
--- /dev/null
+++ b/yt_dlp/update.py
@@ -0,0 +1,619 @@
+from __future__ import annotations
+
+import atexit
+import contextlib
+import hashlib
+import json
+import os
+import platform
+import re
+import subprocess
+import sys
+from dataclasses import dataclass
+from zipimport import zipimporter
+
+from .compat import functools # isort: split
+from .compat import compat_realpath, compat_shlex_quote
+from .networking import Request
+from .networking.exceptions import HTTPError, network_exceptions
+from .utils import (
+ NO_DEFAULT,
+ Popen,
+ deprecation_warning,
+ format_field,
+ remove_end,
+ shell_quote,
+ system_identifier,
+ version_tuple,
+)
+from .version import (
+ CHANNEL,
+ ORIGIN,
+ RELEASE_GIT_HEAD,
+ UPDATE_HINT,
+ VARIANT,
+ __version__,
+)
+
+UPDATE_SOURCES = {
+ 'stable': 'yt-dlp/yt-dlp',
+ 'nightly': 'yt-dlp/yt-dlp-nightly-builds',
+ 'master': 'yt-dlp/yt-dlp-master-builds',
+}
+REPOSITORY = UPDATE_SOURCES['stable']
+_INVERSE_UPDATE_SOURCES = {value: key for key, value in UPDATE_SOURCES.items()}
+
+_VERSION_RE = re.compile(r'(\d+\.)*\d+')
+_HASH_PATTERN = r'[\da-f]{40}'
+_COMMIT_RE = re.compile(rf'Generated from: https://(?:[^/?#]+/){{3}}commit/(?P<hash>{_HASH_PATTERN})')
+
+API_BASE_URL = 'https://api.github.com/repos'
+
+# Backwards compatibility variables for the current channel
+API_URL = f'{API_BASE_URL}/{REPOSITORY}/releases'
+
+
+@functools.cache
+def _get_variant_and_executable_path():
+ """@returns (variant, executable_path)"""
+ if getattr(sys, 'frozen', False):
+ path = sys.executable
+ if not hasattr(sys, '_MEIPASS'):
+ return 'py2exe', path
+ elif sys._MEIPASS == os.path.dirname(path):
+ return f'{sys.platform}_dir', path
+ elif sys.platform == 'darwin':
+ machine = '_legacy' if version_tuple(platform.mac_ver()[0]) < (10, 15) else ''
+ else:
+ machine = f'_{platform.machine().lower()}'
+ # Ref: https://en.wikipedia.org/wiki/Uname#Examples
+ if machine[1:] in ('x86', 'x86_64', 'amd64', 'i386', 'i686'):
+ machine = '_x86' if platform.architecture()[0][:2] == '32' else ''
+ return f'{remove_end(sys.platform, "32")}{machine}_exe', path
+
+ path = os.path.dirname(__file__)
+ if isinstance(__loader__, zipimporter):
+ return 'zip', os.path.join(path, '..')
+ elif (os.path.basename(sys.argv[0]) in ('__main__.py', '-m')
+ and os.path.exists(os.path.join(path, '../.git/HEAD'))):
+ return 'source', path
+ return 'unknown', path
+
+
+def detect_variant():
+ return VARIANT or _get_variant_and_executable_path()[0]
+
+
+@functools.cache
+def current_git_head():
+ if detect_variant() != 'source':
+ return
+ with contextlib.suppress(Exception):
+ stdout, _, _ = Popen.run(
+ ['git', 'rev-parse', '--short', 'HEAD'],
+ text=True, cwd=os.path.dirname(os.path.abspath(__file__)),
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ if re.fullmatch('[0-9a-f]+', stdout.strip()):
+ return stdout.strip()
+
+
+_FILE_SUFFIXES = {
+ 'zip': '',
+ 'py2exe': '_min.exe',
+ 'win_exe': '.exe',
+ 'win_x86_exe': '_x86.exe',
+ 'darwin_exe': '_macos',
+ 'darwin_legacy_exe': '_macos_legacy',
+ 'linux_exe': '_linux',
+ 'linux_aarch64_exe': '_linux_aarch64',
+ 'linux_armv7l_exe': '_linux_armv7l',
+}
+
+_NON_UPDATEABLE_REASONS = {
+ **{variant: None for variant in _FILE_SUFFIXES}, # Updatable
+ **{variant: f'Auto-update is not supported for unpackaged {name} executable; Re-download the latest release'
+ for variant, name in {'win32_dir': 'Windows', 'darwin_dir': 'MacOS', 'linux_dir': 'Linux'}.items()},
+ 'source': 'You cannot update when running from source code; Use git to pull the latest changes',
+ 'unknown': 'You installed yt-dlp with a package manager or setup.py; Use that to update',
+ 'other': 'You are using an unofficial build of yt-dlp; Build the executable again',
+}
+
+
+def is_non_updateable():
+ if UPDATE_HINT:
+ return UPDATE_HINT
+ return _NON_UPDATEABLE_REASONS.get(
+ detect_variant(), _NON_UPDATEABLE_REASONS['unknown' if VARIANT else 'other'])
+
+
+def _get_binary_name():
+ return format_field(_FILE_SUFFIXES, detect_variant(), template='yt-dlp%s', ignore=None, default=None)
+
+
+def _get_system_deprecation():
+ MIN_SUPPORTED, MIN_RECOMMENDED = (3, 8), (3, 8)
+
+ if sys.version_info > MIN_RECOMMENDED:
+ return None
+
+ major, minor = sys.version_info[:2]
+ if sys.version_info < MIN_SUPPORTED:
+ msg = f'Python version {major}.{minor} is no longer supported'
+ else:
+ msg = (f'Support for Python version {major}.{minor} has been deprecated. '
+ '\nYou may stop receiving updates on this version at any time')
+
+ major, minor = MIN_RECOMMENDED
+ return f'{msg}! Please update to Python {major}.{minor} or above'
+
+
+def _sha256_file(path):
+ h = hashlib.sha256()
+ mv = memoryview(bytearray(128 * 1024))
+ with open(os.path.realpath(path), 'rb', buffering=0) as f:
+ for n in iter(lambda: f.readinto(mv), 0):
+ h.update(mv[:n])
+ return h.hexdigest()
+
+
+def _make_label(origin, tag, version=None):
+ if '/' in origin:
+ channel = _INVERSE_UPDATE_SOURCES.get(origin, origin)
+ else:
+ channel = origin
+ label = f'{channel}@{tag}'
+ if version and version != tag:
+ label += f' build {version}'
+ if channel != origin:
+ label += f' from {origin}'
+ return label
+
+
+@dataclass
+class UpdateInfo:
+ """
+ Update target information
+
+ Can be created by `query_update()` or manually.
+
+ Attributes:
+ tag The release tag that will be updated to. If from query_update,
+ the value is after API resolution and update spec processing.
+ The only property that is required.
+ version The actual numeric version (if available) of the binary to be updated to,
+ after API resolution and update spec processing. (default: None)
+ requested_version Numeric version of the binary being requested (if available),
+ after API resolution only. (default: None)
+ commit Commit hash (if available) of the binary to be updated to,
+ after API resolution and update spec processing. (default: None)
+ This value will only match the RELEASE_GIT_HEAD of prerelease builds.
+ binary_name Filename of the binary to be updated to. (default: current binary name)
+ checksum Expected checksum (if available) of the binary to be
+ updated to. (default: None)
+ """
+ tag: str
+ version: str | None = None
+ requested_version: str | None = None
+ commit: str | None = None
+
+ binary_name: str | None = _get_binary_name()
+ checksum: str | None = None
+
+ _has_update = True
+
+
+class Updater:
+ # XXX: use class variables to simplify testing
+ _channel = CHANNEL
+ _origin = ORIGIN
+ _update_sources = UPDATE_SOURCES
+
+ def __init__(self, ydl, target: str | None = None):
+ self.ydl = ydl
+ # For backwards compat, target needs to be treated as if it could be None
+ self.requested_channel, sep, self.requested_tag = (target or self._channel).rpartition('@')
+ # Check if requested_tag is actually the requested repo/channel
+ if not sep and ('/' in self.requested_tag or self.requested_tag in self._update_sources):
+ self.requested_channel = self.requested_tag
+ self.requested_tag: str = None # type: ignore (we set it later)
+ elif not self.requested_channel:
+ # User did not specify a channel, so we are requesting the default channel
+ self.requested_channel = self._channel.partition('@')[0]
+
+ # --update should not be treated as an exact tag request even if CHANNEL has a @tag
+ self._exact = bool(target) and target != self._channel
+ if not self.requested_tag:
+ # User did not specify a tag, so we request 'latest' and track that no exact tag was passed
+ self.requested_tag = 'latest'
+ self._exact = False
+
+ if '/' in self.requested_channel:
+ # requested_channel is actually a repository
+ self.requested_repo = self.requested_channel
+ if not self.requested_repo.startswith('yt-dlp/') and self.requested_repo != self._origin:
+ self.ydl.report_warning(
+ f'You are switching to an {self.ydl._format_err("unofficial", "red")} executable '
+ f'from {self.ydl._format_err(self.requested_repo, self.ydl.Styles.EMPHASIS)}. '
+ f'Run {self.ydl._format_err("at your own risk", "light red")}')
+ self._block_restart('Automatically restarting into custom builds is disabled for security reasons')
+ else:
+ # Check if requested_channel resolves to a known repository or else raise
+ self.requested_repo = self._update_sources.get(self.requested_channel)
+ if not self.requested_repo:
+ self._report_error(
+ f'Invalid update channel {self.requested_channel!r} requested. '
+ f'Valid channels are {", ".join(self._update_sources)}', True)
+
+ self._identifier = f'{detect_variant()} {system_identifier()}'
+
+ @property
+ def current_version(self):
+ """Current version"""
+ return __version__
+
+ @property
+ def current_commit(self):
+ """Current commit hash"""
+ return RELEASE_GIT_HEAD
+
+ def _download_asset(self, name, tag=None):
+ if not tag:
+ tag = self.requested_tag
+
+ path = 'latest/download' if tag == 'latest' else f'download/{tag}'
+ url = f'https://github.com/{self.requested_repo}/releases/{path}/{name}'
+ self.ydl.write_debug(f'Downloading {name} from {url}')
+ return self.ydl.urlopen(url).read()
+
+ def _call_api(self, tag):
+ tag = f'tags/{tag}' if tag != 'latest' else tag
+ url = f'{API_BASE_URL}/{self.requested_repo}/releases/{tag}'
+ self.ydl.write_debug(f'Fetching release info: {url}')
+ return json.loads(self.ydl.urlopen(Request(url, headers={
+ 'Accept': 'application/vnd.github+json',
+ 'User-Agent': 'yt-dlp',
+ 'X-GitHub-Api-Version': '2022-11-28',
+ })).read().decode())
+
+ def _get_version_info(self, tag: str) -> tuple[str | None, str | None]:
+ if _VERSION_RE.fullmatch(tag):
+ return tag, None
+
+ api_info = self._call_api(tag)
+
+ if tag == 'latest':
+ requested_version = api_info['tag_name']
+ else:
+ match = re.search(rf'\s+(?P<version>{_VERSION_RE.pattern})$', api_info.get('name', ''))
+ requested_version = match.group('version') if match else None
+
+ if re.fullmatch(_HASH_PATTERN, api_info.get('target_commitish', '')):
+ target_commitish = api_info['target_commitish']
+ else:
+ match = _COMMIT_RE.match(api_info.get('body', ''))
+ target_commitish = match.group('hash') if match else None
+
+ if not (requested_version or target_commitish):
+ self._report_error('One of either version or commit hash must be available on the release', expected=True)
+
+ return requested_version, target_commitish
+
+ def _download_update_spec(self, source_tags):
+ for tag in source_tags:
+ try:
+ return self._download_asset('_update_spec', tag=tag).decode()
+ except network_exceptions as error:
+ if isinstance(error, HTTPError) and error.status == 404:
+ continue
+ self._report_network_error(f'fetch update spec: {error}')
+
+ self._report_error(
+ f'The requested tag {self.requested_tag} does not exist for {self.requested_repo}', True)
+ return None
+
+ def _process_update_spec(self, lockfile: str, resolved_tag: str):
+ lines = lockfile.splitlines()
+ is_version2 = any(line.startswith('lockV2 ') for line in lines)
+
+ for line in lines:
+ if is_version2:
+ if not line.startswith(f'lockV2 {self.requested_repo} '):
+ continue
+ _, _, tag, pattern = line.split(' ', 3)
+ else:
+ if not line.startswith('lock '):
+ continue
+ _, tag, pattern = line.split(' ', 2)
+
+ if re.match(pattern, self._identifier):
+ if _VERSION_RE.fullmatch(tag):
+ if not self._exact:
+ return tag
+ elif self._version_compare(tag, resolved_tag):
+ return resolved_tag
+ elif tag != resolved_tag:
+ continue
+
+ self._report_error(
+ f'yt-dlp cannot be updated to {resolved_tag} since you are on an older Python version', True)
+ return None
+
+ return resolved_tag
+
+ def _version_compare(self, a: str, b: str):
+ """
+ Compare two version strings
+
+ This function SHOULD NOT be called if self._exact == True
+ """
+ if _VERSION_RE.fullmatch(f'{a}.{b}'):
+ return version_tuple(a) >= version_tuple(b)
+ return a == b
+
+ def query_update(self, *, _output=False) -> UpdateInfo | None:
+ """Fetches info about the available update
+ @returns An `UpdateInfo` if there is an update available, else None
+ """
+ if not self.requested_repo:
+ self._report_error('No target repository could be determined from input')
+ return None
+
+ try:
+ requested_version, target_commitish = self._get_version_info(self.requested_tag)
+ except network_exceptions as e:
+ self._report_network_error(f'obtain version info ({e})', delim='; Please try again later or')
+ return None
+
+ if self._exact and self._origin != self.requested_repo:
+ has_update = True
+ elif requested_version:
+ if self._exact:
+ has_update = self.current_version != requested_version
+ else:
+ has_update = not self._version_compare(self.current_version, requested_version)
+ elif target_commitish:
+ has_update = target_commitish != self.current_commit
+ else:
+ has_update = False
+
+ resolved_tag = requested_version if self.requested_tag == 'latest' else self.requested_tag
+ current_label = _make_label(self._origin, self._channel.partition("@")[2] or self.current_version, self.current_version)
+ requested_label = _make_label(self.requested_repo, resolved_tag, requested_version)
+ latest_or_requested = f'{"Latest" if self.requested_tag == "latest" else "Requested"} version: {requested_label}'
+ if not has_update:
+ if _output:
+ self.ydl.to_screen(f'{latest_or_requested}\nyt-dlp is up to date ({current_label})')
+ return None
+
+ update_spec = self._download_update_spec(('latest', None) if requested_version else (None,))
+ if not update_spec:
+ return None
+ # `result_` prefixed vars == post-_process_update_spec() values
+ result_tag = self._process_update_spec(update_spec, resolved_tag)
+ if not result_tag or result_tag == self.current_version:
+ return None
+ elif result_tag == resolved_tag:
+ result_version = requested_version
+ elif _VERSION_RE.fullmatch(result_tag):
+ result_version = result_tag
+ else: # actual version being updated to is unknown
+ result_version = None
+
+ checksum = None
+ # Non-updateable variants can get update_info but need to skip checksum
+ if not is_non_updateable():
+ try:
+ hashes = self._download_asset('SHA2-256SUMS', result_tag)
+ except network_exceptions as error:
+ if not isinstance(error, HTTPError) or error.status != 404:
+ self._report_network_error(f'fetch checksums: {error}')
+ return None
+ self.ydl.report_warning('No hash information found for the release, skipping verification')
+ else:
+ for ln in hashes.decode().splitlines():
+ if ln.endswith(_get_binary_name()):
+ checksum = ln.split()[0]
+ break
+ if not checksum:
+ self.ydl.report_warning('The hash could not be found in the checksum file, skipping verification')
+
+ if _output:
+ update_label = _make_label(self.requested_repo, result_tag, result_version)
+ self.ydl.to_screen(
+ f'Current version: {current_label}\n{latest_or_requested}'
+ + (f'\nUpgradable to: {update_label}' if update_label != requested_label else ''))
+
+ return UpdateInfo(
+ tag=result_tag,
+ version=result_version,
+ requested_version=requested_version,
+ commit=target_commitish if result_tag == resolved_tag else None,
+ checksum=checksum)
+
+ def update(self, update_info=NO_DEFAULT):
+ """Update yt-dlp executable to the latest version
+ @param update_info `UpdateInfo | None` as returned by query_update()
+ """
+ if update_info is NO_DEFAULT:
+ update_info = self.query_update(_output=True)
+ if not update_info:
+ return False
+
+ err = is_non_updateable()
+ if err:
+ self._report_error(err, True)
+ return False
+
+ self.ydl.to_screen(f'Current Build Hash: {_sha256_file(self.filename)}')
+
+ update_label = _make_label(self.requested_repo, update_info.tag, update_info.version)
+ self.ydl.to_screen(f'Updating to {update_label} ...')
+
+ directory = os.path.dirname(self.filename)
+ if not os.access(self.filename, os.W_OK):
+ return self._report_permission_error(self.filename)
+ elif not os.access(directory, os.W_OK):
+ return self._report_permission_error(directory)
+
+ new_filename, old_filename = f'{self.filename}.new', f'{self.filename}.old'
+ if detect_variant() == 'zip': # Can be replaced in-place
+ new_filename, old_filename = self.filename, None
+
+ try:
+ if os.path.exists(old_filename or ''):
+ os.remove(old_filename)
+ except OSError:
+ return self._report_error('Unable to remove the old version')
+
+ try:
+ newcontent = self._download_asset(update_info.binary_name, update_info.tag)
+ except network_exceptions as e:
+ if isinstance(e, HTTPError) and e.status == 404:
+ return self._report_error(
+ f'The requested tag {self.requested_repo}@{update_info.tag} does not exist', True)
+ return self._report_network_error(f'fetch updates: {e}', tag=update_info.tag)
+
+ if not update_info.checksum:
+ self._block_restart('Automatically restarting into unverified builds is disabled for security reasons')
+ elif hashlib.sha256(newcontent).hexdigest() != update_info.checksum:
+ return self._report_network_error('verify the new executable', tag=update_info.tag)
+
+ try:
+ with open(new_filename, 'wb') as outf:
+ outf.write(newcontent)
+ except OSError:
+ return self._report_permission_error(new_filename)
+
+ if old_filename:
+ mask = os.stat(self.filename).st_mode
+ try:
+ os.rename(self.filename, old_filename)
+ except OSError:
+ return self._report_error('Unable to move current version')
+
+ try:
+ os.rename(new_filename, self.filename)
+ except OSError:
+ self._report_error('Unable to overwrite current version')
+ return os.rename(old_filename, self.filename)
+
+ variant = detect_variant()
+ if variant.startswith('win') or variant == 'py2exe':
+ atexit.register(Popen, f'ping 127.0.0.1 -n 5 -w 1000 & del /F "{old_filename}"',
+ shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+ elif old_filename:
+ try:
+ os.remove(old_filename)
+ except OSError:
+ self._report_error('Unable to remove the old version')
+
+ try:
+ os.chmod(self.filename, mask)
+ except OSError:
+ return self._report_error(
+ f'Unable to set permissions. Run: sudo chmod a+rx {compat_shlex_quote(self.filename)}')
+
+ self.ydl.to_screen(f'Updated yt-dlp to {update_label}')
+ return True
+
+ @functools.cached_property
+ def filename(self):
+ """Filename of the executable"""
+ return compat_realpath(_get_variant_and_executable_path()[1])
+
+ @functools.cached_property
+ def cmd(self):
+ """The command-line to run the executable, if known"""
+ # There is no sys.orig_argv in py < 3.10. Also, it can be [] when frozen
+ if getattr(sys, 'orig_argv', None):
+ return sys.orig_argv
+ elif getattr(sys, 'frozen', False):
+ return sys.argv
+
+ def restart(self):
+ """Restart the executable"""
+ assert self.cmd, 'Must be frozen or Py >= 3.10'
+ self.ydl.write_debug(f'Restarting: {shell_quote(self.cmd)}')
+ _, _, returncode = Popen.run(self.cmd)
+ return returncode
+
+ def _block_restart(self, msg):
+ def wrapper():
+ self._report_error(f'{msg}. Restart yt-dlp to use the updated version', expected=True)
+ return self.ydl._download_retcode
+ self.restart = wrapper
+
+ def _report_error(self, msg, expected=False):
+ self.ydl.report_error(msg, tb=False if expected else None)
+ self.ydl._download_retcode = 100
+
+ def _report_permission_error(self, file):
+ self._report_error(f'Unable to write to {file}; try running as administrator', True)
+
+ def _report_network_error(self, action, delim=';', tag=None):
+ if not tag:
+ tag = self.requested_tag
+ self._report_error(
+ f'Unable to {action}{delim} visit https://github.com/{self.requested_repo}/releases/'
+ + tag if tag == "latest" else f"tag/{tag}", True)
+
+ # XXX: Everything below this line in this class is deprecated / for compat only
+ @property
+ def _target_tag(self):
+ """Deprecated; requested tag with 'tags/' prepended when necessary for API calls"""
+ return f'tags/{self.requested_tag}' if self.requested_tag != 'latest' else self.requested_tag
+
+ def _check_update(self):
+ """Deprecated; report whether there is an update available"""
+ return bool(self.query_update(_output=True))
+
+ def __getattr__(self, attribute: str):
+ """Compat getter function for deprecated attributes"""
+ deprecated_props_map = {
+ 'check_update': '_check_update',
+ 'target_tag': '_target_tag',
+ 'target_channel': 'requested_channel',
+ }
+ update_info_props_map = {
+ 'has_update': '_has_update',
+ 'new_version': 'version',
+ 'latest_version': 'requested_version',
+ 'release_name': 'binary_name',
+ 'release_hash': 'checksum',
+ }
+
+ if attribute not in deprecated_props_map and attribute not in update_info_props_map:
+ raise AttributeError(f'{type(self).__name__!r} object has no attribute {attribute!r}')
+
+ msg = f'{type(self).__name__}.{attribute} is deprecated and will be removed in a future version'
+ if attribute in deprecated_props_map:
+ source_name = deprecated_props_map[attribute]
+ if not source_name.startswith('_'):
+ msg += f'. Please use {source_name!r} instead'
+ source = self
+ mapping = deprecated_props_map
+
+ else: # attribute in update_info_props_map
+ msg += '. Please call query_update() instead'
+ source = self.query_update()
+ if source is None:
+ source = UpdateInfo('', None, None, None)
+ source._has_update = False
+ mapping = update_info_props_map
+
+ deprecation_warning(msg)
+ for target_name, source_name in mapping.items():
+ value = getattr(source, source_name)
+ setattr(self, target_name, value)
+
+ return getattr(self, attribute)
+
+
+def run_update(ydl):
+ """Update the program file with the latest version from the repository
+ @returns Whether there was a successful update (No update = False)
+ """
+ return Updater(ydl).update()
+
+
+__all__ = ['Updater']
diff --git a/yt_dlp/utils/__init__.py b/yt_dlp/utils/__init__.py
new file mode 100644
index 0000000..c267e32
--- /dev/null
+++ b/yt_dlp/utils/__init__.py
@@ -0,0 +1,10 @@
+# flake8: noqa: F403
+from ..compat.compat_utils import passthrough_module
+
+passthrough_module(__name__, '._deprecated')
+del passthrough_module
+
+# isort: off
+from .traversal import *
+from ._utils import *
+from ._utils import _configuration_args, _get_exe_version_output # noqa: F401
diff --git a/yt_dlp/utils/_deprecated.py b/yt_dlp/utils/_deprecated.py
new file mode 100644
index 0000000..a8ae8ec
--- /dev/null
+++ b/yt_dlp/utils/_deprecated.py
@@ -0,0 +1,39 @@
+"""Deprecated - New code should avoid these"""
+import warnings
+
+from ..compat.compat_utils import passthrough_module
+
+# XXX: Implement this the same way as other DeprecationWarnings without circular import
+passthrough_module(__name__, '.._legacy', callback=lambda attr: warnings.warn(
+ DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=6))
+del passthrough_module
+
+
+from ._utils import preferredencoding
+
+
+def encodeFilename(s, for_subprocess=False):
+ assert isinstance(s, str)
+ return s
+
+
+def decodeFilename(b, for_subprocess=False):
+ return b
+
+
+def decodeArgument(b):
+ return b
+
+
+def decodeOption(optval):
+ if optval is None:
+ return optval
+ if isinstance(optval, bytes):
+ optval = optval.decode(preferredencoding())
+
+ assert isinstance(optval, str)
+ return optval
+
+
+def error_to_compat_str(err):
+ return str(err)
diff --git a/yt_dlp/utils/_legacy.py b/yt_dlp/utils/_legacy.py
new file mode 100644
index 0000000..a23248b
--- /dev/null
+++ b/yt_dlp/utils/_legacy.py
@@ -0,0 +1,315 @@
+"""No longer used and new code should not use. Exists only for API compat."""
+import asyncio
+import atexit
+import platform
+import struct
+import sys
+import urllib.error
+import urllib.parse
+import urllib.request
+import zlib
+
+from ._utils import Popen, decode_base_n, preferredencoding
+from .traversal import traverse_obj
+from ..dependencies import certifi, websockets
+from ..networking._helper import make_ssl_context
+from ..networking._urllib import HTTPHandler
+
+# isort: split
+from .networking import escape_rfc3986 # noqa: F401
+from .networking import normalize_url as escape_url # noqa: F401
+from .networking import random_user_agent, std_headers # noqa: F401
+from ..cookies import YoutubeDLCookieJar # noqa: F401
+from ..networking._urllib import PUTRequest # noqa: F401
+from ..networking._urllib import SUPPORTED_ENCODINGS, HEADRequest # noqa: F401
+from ..networking._urllib import ProxyHandler as PerRequestProxyHandler # noqa: F401
+from ..networking._urllib import RedirectHandler as YoutubeDLRedirectHandler # noqa: F401
+from ..networking._urllib import ( # noqa: F401
+ make_socks_conn_class,
+ update_Request,
+)
+from ..networking.exceptions import HTTPError, network_exceptions # noqa: F401
+
+has_certifi = bool(certifi)
+has_websockets = bool(websockets)
+
+
+class WebSocketsWrapper:
+ """Wraps websockets module to use in non-async scopes"""
+ pool = None
+
+ def __init__(self, url, headers=None, connect=True, **ws_kwargs):
+ self.loop = asyncio.new_event_loop()
+ # XXX: "loop" is deprecated
+ self.conn = websockets.connect(
+ url, extra_headers=headers, ping_interval=None,
+ close_timeout=float('inf'), loop=self.loop, ping_timeout=float('inf'), **ws_kwargs)
+ if connect:
+ self.__enter__()
+ atexit.register(self.__exit__, None, None, None)
+
+ def __enter__(self):
+ if not self.pool:
+ self.pool = self.run_with_loop(self.conn.__aenter__(), self.loop)
+ return self
+
+ def send(self, *args):
+ self.run_with_loop(self.pool.send(*args), self.loop)
+
+ def recv(self, *args):
+ return self.run_with_loop(self.pool.recv(*args), self.loop)
+
+ def __exit__(self, type, value, traceback):
+ try:
+ return self.run_with_loop(self.conn.__aexit__(type, value, traceback), self.loop)
+ finally:
+ self.loop.close()
+ self._cancel_all_tasks(self.loop)
+
+ # taken from https://github.com/python/cpython/blob/3.9/Lib/asyncio/runners.py with modifications
+ # for contributors: If there's any new library using asyncio needs to be run in non-async, move these function out of this class
+ @staticmethod
+ def run_with_loop(main, loop):
+ if not asyncio.iscoroutine(main):
+ raise ValueError(f'a coroutine was expected, got {main!r}')
+
+ try:
+ return loop.run_until_complete(main)
+ finally:
+ loop.run_until_complete(loop.shutdown_asyncgens())
+ if hasattr(loop, 'shutdown_default_executor'):
+ loop.run_until_complete(loop.shutdown_default_executor())
+
+ @staticmethod
+ def _cancel_all_tasks(loop):
+ to_cancel = asyncio.all_tasks(loop)
+
+ if not to_cancel:
+ return
+
+ for task in to_cancel:
+ task.cancel()
+
+ # XXX: "loop" is removed in Python 3.10+
+ loop.run_until_complete(
+ asyncio.gather(*to_cancel, loop=loop, return_exceptions=True))
+
+ for task in to_cancel:
+ if task.cancelled():
+ continue
+ if task.exception() is not None:
+ loop.call_exception_handler({
+ 'message': 'unhandled exception during asyncio.run() shutdown',
+ 'exception': task.exception(),
+ 'task': task,
+ })
+
+
+def load_plugins(name, suffix, namespace):
+ from ..plugins import load_plugins
+ ret = load_plugins(name, suffix)
+ namespace.update(ret)
+ return ret
+
+
+def traverse_dict(dictn, keys, casesense=True):
+ return traverse_obj(dictn, keys, casesense=casesense, is_user_input=True, traverse_string=True)
+
+
+def decode_base(value, digits):
+ return decode_base_n(value, table=digits)
+
+
+def platform_name():
+ """ Returns the platform name as a str """
+ return platform.platform()
+
+
+def get_subprocess_encoding():
+ if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
+ # For subprocess calls, encode with locale encoding
+ # Refer to http://stackoverflow.com/a/9951851/35070
+ encoding = preferredencoding()
+ else:
+ encoding = sys.getfilesystemencoding()
+ if encoding is None:
+ encoding = 'utf-8'
+ return encoding
+
+
+# UNUSED
+# Based on png2str() written by @gdkchan and improved by @yokrysty
+# Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
+def decode_png(png_data):
+ # Reference: https://www.w3.org/TR/PNG/
+ header = png_data[8:]
+
+ if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
+ raise OSError('Not a valid PNG file.')
+
+ int_map = {1: '>B', 2: '>H', 4: '>I'}
+ unpack_integer = lambda x: struct.unpack(int_map[len(x)], x)[0]
+
+ chunks = []
+
+ while header:
+ length = unpack_integer(header[:4])
+ header = header[4:]
+
+ chunk_type = header[:4]
+ header = header[4:]
+
+ chunk_data = header[:length]
+ header = header[length:]
+
+ header = header[4:] # Skip CRC
+
+ chunks.append({
+ 'type': chunk_type,
+ 'length': length,
+ 'data': chunk_data
+ })
+
+ ihdr = chunks[0]['data']
+
+ width = unpack_integer(ihdr[:4])
+ height = unpack_integer(ihdr[4:8])
+
+ idat = b''
+
+ for chunk in chunks:
+ if chunk['type'] == b'IDAT':
+ idat += chunk['data']
+
+ if not idat:
+ raise OSError('Unable to read PNG data.')
+
+ decompressed_data = bytearray(zlib.decompress(idat))
+
+ stride = width * 3
+ pixels = []
+
+ def _get_pixel(idx):
+ x = idx % stride
+ y = idx // stride
+ return pixels[y][x]
+
+ for y in range(height):
+ basePos = y * (1 + stride)
+ filter_type = decompressed_data[basePos]
+
+ current_row = []
+
+ pixels.append(current_row)
+
+ for x in range(stride):
+ color = decompressed_data[1 + basePos + x]
+ basex = y * stride + x
+ left = 0
+ up = 0
+
+ if x > 2:
+ left = _get_pixel(basex - 3)
+ if y > 0:
+ up = _get_pixel(basex - stride)
+
+ if filter_type == 1: # Sub
+ color = (color + left) & 0xff
+ elif filter_type == 2: # Up
+ color = (color + up) & 0xff
+ elif filter_type == 3: # Average
+ color = (color + ((left + up) >> 1)) & 0xff
+ elif filter_type == 4: # Paeth
+ a = left
+ b = up
+ c = 0
+
+ if x > 2 and y > 0:
+ c = _get_pixel(basex - stride - 3)
+
+ p = a + b - c
+
+ pa = abs(p - a)
+ pb = abs(p - b)
+ pc = abs(p - c)
+
+ if pa <= pb and pa <= pc:
+ color = (color + a) & 0xff
+ elif pb <= pc:
+ color = (color + b) & 0xff
+ else:
+ color = (color + c) & 0xff
+
+ current_row.append(color)
+
+ return width, height, pixels
+
+
+def register_socks_protocols():
+ # "Register" SOCKS protocols
+ # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
+ # URLs with protocols not in urlparse.uses_netloc are not handled correctly
+ for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
+ if scheme not in urllib.parse.uses_netloc:
+ urllib.parse.uses_netloc.append(scheme)
+
+
+def handle_youtubedl_headers(headers):
+ filtered_headers = headers
+
+ if 'Youtubedl-no-compression' in filtered_headers:
+ filtered_headers = {k: v for k, v in filtered_headers.items() if k.lower() != 'accept-encoding'}
+ del filtered_headers['Youtubedl-no-compression']
+
+ return filtered_headers
+
+
+def request_to_url(req):
+ if isinstance(req, urllib.request.Request):
+ return req.get_full_url()
+ else:
+ return req
+
+
+def sanitized_Request(url, *args, **kwargs):
+ from ..utils import extract_basic_auth, sanitize_url
+ url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
+ if auth_header is not None:
+ headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
+ headers['Authorization'] = auth_header
+ return urllib.request.Request(url, *args, **kwargs)
+
+
+class YoutubeDLHandler(HTTPHandler):
+ def __init__(self, params, *args, **kwargs):
+ self._params = params
+ super().__init__(*args, **kwargs)
+
+
+YoutubeDLHTTPSHandler = YoutubeDLHandler
+
+
+class YoutubeDLCookieProcessor(urllib.request.HTTPCookieProcessor):
+ def __init__(self, cookiejar=None):
+ urllib.request.HTTPCookieProcessor.__init__(self, cookiejar)
+
+ def http_response(self, request, response):
+ return urllib.request.HTTPCookieProcessor.http_response(self, request, response)
+
+ https_request = urllib.request.HTTPCookieProcessor.http_request
+ https_response = http_response
+
+
+def make_HTTPS_handler(params, **kwargs):
+ return YoutubeDLHTTPSHandler(params, context=make_ssl_context(
+ verify=not params.get('nocheckcertificate'),
+ client_certificate=params.get('client_certificate'),
+ client_certificate_key=params.get('client_certificate_key'),
+ client_certificate_password=params.get('client_certificate_password'),
+ legacy_support=params.get('legacyserverconnect'),
+ use_certifi='no-certifi' not in params.get('compat_opts', []),
+ ), **kwargs)
+
+
+def process_communicate_or_kill(p, *args, **kwargs):
+ return Popen.communicate_or_kill(p, *args, **kwargs)
diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py
new file mode 100644
index 0000000..9efeb6a
--- /dev/null
+++ b/yt_dlp/utils/_utils.py
@@ -0,0 +1,5445 @@
+import base64
+import binascii
+import calendar
+import codecs
+import collections
+import collections.abc
+import contextlib
+import datetime
+import email.header
+import email.utils
+import errno
+import hashlib
+import hmac
+import html.entities
+import html.parser
+import inspect
+import io
+import itertools
+import json
+import locale
+import math
+import mimetypes
+import netrc
+import operator
+import os
+import platform
+import random
+import re
+import shlex
+import socket
+import ssl
+import struct
+import subprocess
+import sys
+import tempfile
+import time
+import traceback
+import types
+import unicodedata
+import urllib.error
+import urllib.parse
+import urllib.request
+import xml.etree.ElementTree
+
+from . import traversal
+
+from ..compat import functools # isort: split
+from ..compat import (
+ compat_etree_fromstring,
+ compat_expanduser,
+ compat_HTMLParseError,
+ compat_os_name,
+ compat_shlex_quote,
+)
+from ..dependencies import xattr
+
+__name__ = __name__.rsplit('.', 1)[0] # Pretend to be the parent module
+
+# This is not clearly defined otherwise
+compiled_regex_type = type(re.compile(''))
+
+
+class NO_DEFAULT:
+ pass
+
+
+def IDENTITY(x):
+ return x
+
+
+ENGLISH_MONTH_NAMES = [
+ 'January', 'February', 'March', 'April', 'May', 'June',
+ 'July', 'August', 'September', 'October', 'November', 'December']
+
+MONTH_NAMES = {
+ 'en': ENGLISH_MONTH_NAMES,
+ 'fr': [
+ 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
+ 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
+ # these follow the genitive grammatical case (dopełniacz)
+ # some websites might be using nominative, which will require another month list
+ # https://en.wikibooks.org/wiki/Polish/Noun_cases
+ 'pl': ['stycznia', 'lutego', 'marca', 'kwietnia', 'maja', 'czerwca',
+ 'lipca', 'sierpnia', 'września', 'października', 'listopada', 'grudnia'],
+}
+
+# From https://github.com/python/cpython/blob/3.11/Lib/email/_parseaddr.py#L36-L42
+TIMEZONE_NAMES = {
+ 'UT': 0, 'UTC': 0, 'GMT': 0, 'Z': 0,
+ 'AST': -4, 'ADT': -3, # Atlantic (used in Canada)
+ 'EST': -5, 'EDT': -4, # Eastern
+ 'CST': -6, 'CDT': -5, # Central
+ 'MST': -7, 'MDT': -6, # Mountain
+ 'PST': -8, 'PDT': -7 # Pacific
+}
+
+# needed for sanitizing filenames in restricted mode
+ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
+ itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
+ 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
+
+DATE_FORMATS = (
+ '%d %B %Y',
+ '%d %b %Y',
+ '%B %d %Y',
+ '%B %dst %Y',
+ '%B %dnd %Y',
+ '%B %drd %Y',
+ '%B %dth %Y',
+ '%b %d %Y',
+ '%b %dst %Y',
+ '%b %dnd %Y',
+ '%b %drd %Y',
+ '%b %dth %Y',
+ '%b %dst %Y %I:%M',
+ '%b %dnd %Y %I:%M',
+ '%b %drd %Y %I:%M',
+ '%b %dth %Y %I:%M',
+ '%Y %m %d',
+ '%Y-%m-%d',
+ '%Y.%m.%d.',
+ '%Y/%m/%d',
+ '%Y/%m/%d %H:%M',
+ '%Y/%m/%d %H:%M:%S',
+ '%Y%m%d%H%M',
+ '%Y%m%d%H%M%S',
+ '%Y%m%d',
+ '%Y-%m-%d %H:%M',
+ '%Y-%m-%d %H:%M:%S',
+ '%Y-%m-%d %H:%M:%S.%f',
+ '%Y-%m-%d %H:%M:%S:%f',
+ '%d.%m.%Y %H:%M',
+ '%d.%m.%Y %H.%M',
+ '%Y-%m-%dT%H:%M:%SZ',
+ '%Y-%m-%dT%H:%M:%S.%fZ',
+ '%Y-%m-%dT%H:%M:%S.%f0Z',
+ '%Y-%m-%dT%H:%M:%S',
+ '%Y-%m-%dT%H:%M:%S.%f',
+ '%Y-%m-%dT%H:%M',
+ '%b %d %Y at %H:%M',
+ '%b %d %Y at %H:%M:%S',
+ '%B %d %Y at %H:%M',
+ '%B %d %Y at %H:%M:%S',
+ '%H:%M %d-%b-%Y',
+)
+
+DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
+DATE_FORMATS_DAY_FIRST.extend([
+ '%d-%m-%Y',
+ '%d.%m.%Y',
+ '%d.%m.%y',
+ '%d/%m/%Y',
+ '%d/%m/%y',
+ '%d/%m/%Y %H:%M:%S',
+ '%d-%m-%Y %H:%M',
+ '%H:%M %d/%m/%Y',
+])
+
+DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
+DATE_FORMATS_MONTH_FIRST.extend([
+ '%m-%d-%Y',
+ '%m.%d.%Y',
+ '%m/%d/%Y',
+ '%m/%d/%y',
+ '%m/%d/%Y %H:%M:%S',
+])
+
+PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
+JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>\s*(?P<json_ld>{.+?}|\[.+?\])\s*</script>'
+
+NUMBER_RE = r'\d+(?:\.\d+)?'
+
+
+@functools.cache
+def preferredencoding():
+ """Get preferred encoding.
+
+ Returns the best encoding scheme for the system, based on
+ locale.getpreferredencoding() and some further tweaks.
+ """
+ try:
+ pref = locale.getpreferredencoding()
+ 'TEST'.encode(pref)
+ except Exception:
+ pref = 'UTF-8'
+
+ return pref
+
+
+def write_json_file(obj, fn):
+ """ Encode obj as JSON and write it to fn, atomically if possible """
+
+ tf = tempfile.NamedTemporaryFile(
+ prefix=f'{os.path.basename(fn)}.', dir=os.path.dirname(fn),
+ suffix='.tmp', delete=False, mode='w', encoding='utf-8')
+
+ try:
+ with tf:
+ json.dump(obj, tf, ensure_ascii=False)
+ if sys.platform == 'win32':
+ # Need to remove existing file on Windows, else os.rename raises
+ # WindowsError or FileExistsError.
+ with contextlib.suppress(OSError):
+ os.unlink(fn)
+ with contextlib.suppress(OSError):
+ mask = os.umask(0)
+ os.umask(mask)
+ os.chmod(tf.name, 0o666 & ~mask)
+ os.rename(tf.name, fn)
+ except Exception:
+ with contextlib.suppress(OSError):
+ os.remove(tf.name)
+ raise
+
+
+def find_xpath_attr(node, xpath, key, val=None):
+ """ Find the xpath xpath[@key=val] """
+ assert re.match(r'^[a-zA-Z_-]+$', key)
+ expr = xpath + ('[@%s]' % key if val is None else f"[@{key}='{val}']")
+ return node.find(expr)
+
+# On python2.6 the xml.etree.ElementTree.Element methods don't support
+# the namespace parameter
+
+
+def xpath_with_ns(path, ns_map):
+ components = [c.split(':') for c in path.split('/')]
+ replaced = []
+ for c in components:
+ if len(c) == 1:
+ replaced.append(c[0])
+ else:
+ ns, tag = c
+ replaced.append('{%s}%s' % (ns_map[ns], tag))
+ return '/'.join(replaced)
+
+
+def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
+ def _find_xpath(xpath):
+ return node.find(xpath)
+
+ if isinstance(xpath, str):
+ n = _find_xpath(xpath)
+ else:
+ for xp in xpath:
+ n = _find_xpath(xp)
+ if n is not None:
+ break
+
+ if n is None:
+ if default is not NO_DEFAULT:
+ return default
+ elif fatal:
+ name = xpath if name is None else name
+ raise ExtractorError('Could not find XML element %s' % name)
+ else:
+ return None
+ return n
+
+
+def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
+ n = xpath_element(node, xpath, name, fatal=fatal, default=default)
+ if n is None or n == default:
+ return n
+ if n.text is None:
+ if default is not NO_DEFAULT:
+ return default
+ elif fatal:
+ name = xpath if name is None else name
+ raise ExtractorError('Could not find XML element\'s text %s' % name)
+ else:
+ return None
+ return n.text
+
+
+def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
+ n = find_xpath_attr(node, xpath, key)
+ if n is None:
+ if default is not NO_DEFAULT:
+ return default
+ elif fatal:
+ name = f'{xpath}[@{key}]' if name is None else name
+ raise ExtractorError('Could not find XML attribute %s' % name)
+ else:
+ return None
+ return n.attrib[key]
+
+
+def get_element_by_id(id, html, **kwargs):
+ """Return the content of the tag with the specified ID in the passed HTML document"""
+ return get_element_by_attribute('id', id, html, **kwargs)
+
+
+def get_element_html_by_id(id, html, **kwargs):
+ """Return the html of the tag with the specified ID in the passed HTML document"""
+ return get_element_html_by_attribute('id', id, html, **kwargs)
+
+
+def get_element_by_class(class_name, html):
+ """Return the content of the first tag with the specified class in the passed HTML document"""
+ retval = get_elements_by_class(class_name, html)
+ return retval[0] if retval else None
+
+
+def get_element_html_by_class(class_name, html):
+ """Return the html of the first tag with the specified class in the passed HTML document"""
+ retval = get_elements_html_by_class(class_name, html)
+ return retval[0] if retval else None
+
+
+def get_element_by_attribute(attribute, value, html, **kwargs):
+ retval = get_elements_by_attribute(attribute, value, html, **kwargs)
+ return retval[0] if retval else None
+
+
+def get_element_html_by_attribute(attribute, value, html, **kargs):
+ retval = get_elements_html_by_attribute(attribute, value, html, **kargs)
+ return retval[0] if retval else None
+
+
+def get_elements_by_class(class_name, html, **kargs):
+ """Return the content of all tags with the specified class in the passed HTML document as a list"""
+ return get_elements_by_attribute(
+ 'class', r'[^\'"]*(?<=[\'"\s])%s(?=[\'"\s])[^\'"]*' % re.escape(class_name),
+ html, escape_value=False)
+
+
+def get_elements_html_by_class(class_name, html):
+ """Return the html of all tags with the specified class in the passed HTML document as a list"""
+ return get_elements_html_by_attribute(
+ 'class', r'[^\'"]*(?<=[\'"\s])%s(?=[\'"\s])[^\'"]*' % re.escape(class_name),
+ html, escape_value=False)
+
+
+def get_elements_by_attribute(*args, **kwargs):
+ """Return the content of the tag with the specified attribute in the passed HTML document"""
+ return [content for content, _ in get_elements_text_and_html_by_attribute(*args, **kwargs)]
+
+
+def get_elements_html_by_attribute(*args, **kwargs):
+ """Return the html of the tag with the specified attribute in the passed HTML document"""
+ return [whole for _, whole in get_elements_text_and_html_by_attribute(*args, **kwargs)]
+
+
+def get_elements_text_and_html_by_attribute(attribute, value, html, *, tag=r'[\w:.-]+', escape_value=True):
+ """
+ Return the text (content) and the html (whole) of the tag with the specified
+ attribute in the passed HTML document
+ """
+ if not value:
+ return
+
+ quote = '' if re.match(r'''[\s"'`=<>]''', value) else '?'
+
+ value = re.escape(value) if escape_value else value
+
+ partial_element_re = rf'''(?x)
+ <(?P<tag>{tag})
+ (?:\s(?:[^>"']|"[^"]*"|'[^']*')*)?
+ \s{re.escape(attribute)}\s*=\s*(?P<_q>['"]{quote})(?-x:{value})(?P=_q)
+ '''
+
+ for m in re.finditer(partial_element_re, html):
+ content, whole = get_element_text_and_html_by_tag(m.group('tag'), html[m.start():])
+
+ yield (
+ unescapeHTML(re.sub(r'^(?P<q>["\'])(?P<content>.*)(?P=q)$', r'\g<content>', content, flags=re.DOTALL)),
+ whole
+ )
+
+
+class HTMLBreakOnClosingTagParser(html.parser.HTMLParser):
+ """
+ HTML parser which raises HTMLBreakOnClosingTagException upon reaching the
+ closing tag for the first opening tag it has encountered, and can be used
+ as a context manager
+ """
+
+ class HTMLBreakOnClosingTagException(Exception):
+ pass
+
+ def __init__(self):
+ self.tagstack = collections.deque()
+ html.parser.HTMLParser.__init__(self)
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, *_):
+ self.close()
+
+ def close(self):
+ # handle_endtag does not return upon raising HTMLBreakOnClosingTagException,
+ # so data remains buffered; we no longer have any interest in it, thus
+ # override this method to discard it
+ pass
+
+ def handle_starttag(self, tag, _):
+ self.tagstack.append(tag)
+
+ def handle_endtag(self, tag):
+ if not self.tagstack:
+ raise compat_HTMLParseError('no tags in the stack')
+ while self.tagstack:
+ inner_tag = self.tagstack.pop()
+ if inner_tag == tag:
+ break
+ else:
+ raise compat_HTMLParseError(f'matching opening tag for closing {tag} tag not found')
+ if not self.tagstack:
+ raise self.HTMLBreakOnClosingTagException()
+
+
+# XXX: This should be far less strict
+def get_element_text_and_html_by_tag(tag, html):
+ """
+ For the first element with the specified tag in the passed HTML document
+ return its' content (text) and the whole element (html)
+ """
+ def find_or_raise(haystack, needle, exc):
+ try:
+ return haystack.index(needle)
+ except ValueError:
+ raise exc
+ closing_tag = f'</{tag}>'
+ whole_start = find_or_raise(
+ html, f'<{tag}', compat_HTMLParseError(f'opening {tag} tag not found'))
+ content_start = find_or_raise(
+ html[whole_start:], '>', compat_HTMLParseError(f'malformed opening {tag} tag'))
+ content_start += whole_start + 1
+ with HTMLBreakOnClosingTagParser() as parser:
+ parser.feed(html[whole_start:content_start])
+ if not parser.tagstack or parser.tagstack[0] != tag:
+ raise compat_HTMLParseError(f'parser did not match opening {tag} tag')
+ offset = content_start
+ while offset < len(html):
+ next_closing_tag_start = find_or_raise(
+ html[offset:], closing_tag,
+ compat_HTMLParseError(f'closing {tag} tag not found'))
+ next_closing_tag_end = next_closing_tag_start + len(closing_tag)
+ try:
+ parser.feed(html[offset:offset + next_closing_tag_end])
+ offset += next_closing_tag_end
+ except HTMLBreakOnClosingTagParser.HTMLBreakOnClosingTagException:
+ return html[content_start:offset + next_closing_tag_start], \
+ html[whole_start:offset + next_closing_tag_end]
+ raise compat_HTMLParseError('unexpected end of html')
+
+
+class HTMLAttributeParser(html.parser.HTMLParser):
+ """Trivial HTML parser to gather the attributes for a single element"""
+
+ def __init__(self):
+ self.attrs = {}
+ html.parser.HTMLParser.__init__(self)
+
+ def handle_starttag(self, tag, attrs):
+ self.attrs = dict(attrs)
+ raise compat_HTMLParseError('done')
+
+
+class HTMLListAttrsParser(html.parser.HTMLParser):
+ """HTML parser to gather the attributes for the elements of a list"""
+
+ def __init__(self):
+ html.parser.HTMLParser.__init__(self)
+ self.items = []
+ self._level = 0
+
+ def handle_starttag(self, tag, attrs):
+ if tag == 'li' and self._level == 0:
+ self.items.append(dict(attrs))
+ self._level += 1
+
+ def handle_endtag(self, tag):
+ self._level -= 1
+
+
+def extract_attributes(html_element):
+ """Given a string for an HTML element such as
+ <el
+ a="foo" B="bar" c="&98;az" d=boz
+ empty= noval entity="&amp;"
+ sq='"' dq="'"
+ >
+ Decode and return a dictionary of attributes.
+ {
+ 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
+ 'empty': '', 'noval': None, 'entity': '&',
+ 'sq': '"', 'dq': '\''
+ }.
+ """
+ parser = HTMLAttributeParser()
+ with contextlib.suppress(compat_HTMLParseError):
+ parser.feed(html_element)
+ parser.close()
+ return parser.attrs
+
+
+def parse_list(webpage):
+ """Given a string for an series of HTML <li> elements,
+ return a dictionary of their attributes"""
+ parser = HTMLListAttrsParser()
+ parser.feed(webpage)
+ parser.close()
+ return parser.items
+
+
+def clean_html(html):
+ """Clean an HTML snippet into a readable string"""
+
+ if html is None: # Convenience for sanitizing descriptions etc.
+ return html
+
+ html = re.sub(r'\s+', ' ', html)
+ html = re.sub(r'(?u)\s?<\s?br\s?/?\s?>\s?', '\n', html)
+ html = re.sub(r'(?u)<\s?/\s?p\s?>\s?<\s?p[^>]*>', '\n', html)
+ # Strip html tags
+ html = re.sub('<.*?>', '', html)
+ # Replace html entities
+ html = unescapeHTML(html)
+ return html.strip()
+
+
+class LenientJSONDecoder(json.JSONDecoder):
+ # TODO: Write tests
+ def __init__(self, *args, transform_source=None, ignore_extra=False, close_objects=0, **kwargs):
+ self.transform_source, self.ignore_extra = transform_source, ignore_extra
+ self._close_attempts = 2 * close_objects
+ super().__init__(*args, **kwargs)
+
+ @staticmethod
+ def _close_object(err):
+ doc = err.doc[:err.pos]
+ # We need to add comma first to get the correct error message
+ if err.msg.startswith('Expecting \',\''):
+ return doc + ','
+ elif not doc.endswith(','):
+ return
+
+ if err.msg.startswith('Expecting property name'):
+ return doc[:-1] + '}'
+ elif err.msg.startswith('Expecting value'):
+ return doc[:-1] + ']'
+
+ def decode(self, s):
+ if self.transform_source:
+ s = self.transform_source(s)
+ for attempt in range(self._close_attempts + 1):
+ try:
+ if self.ignore_extra:
+ return self.raw_decode(s.lstrip())[0]
+ return super().decode(s)
+ except json.JSONDecodeError as e:
+ if e.pos is None:
+ raise
+ elif attempt < self._close_attempts:
+ s = self._close_object(e)
+ if s is not None:
+ continue
+ raise type(e)(f'{e.msg} in {s[e.pos - 10:e.pos + 10]!r}', s, e.pos)
+ assert False, 'Too many attempts to decode JSON'
+
+
+def sanitize_open(filename, open_mode):
+ """Try to open the given filename, and slightly tweak it if this fails.
+
+ Attempts to open the given filename. If this fails, it tries to change
+ the filename slightly, step by step, until it's either able to open it
+ or it fails and raises a final exception, like the standard open()
+ function.
+
+ It returns the tuple (stream, definitive_file_name).
+ """
+ if filename == '-':
+ if sys.platform == 'win32':
+ import msvcrt
+
+ # stdout may be any IO stream, e.g. when using contextlib.redirect_stdout
+ with contextlib.suppress(io.UnsupportedOperation):
+ msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
+ return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
+
+ for attempt in range(2):
+ try:
+ try:
+ if sys.platform == 'win32':
+ # FIXME: An exclusive lock also locks the file from being read.
+ # Since windows locks are mandatory, don't lock the file on windows (for now).
+ # Ref: https://github.com/yt-dlp/yt-dlp/issues/3124
+ raise LockingUnsupportedError()
+ stream = locked_file(filename, open_mode, block=False).__enter__()
+ except OSError:
+ stream = open(filename, open_mode)
+ return stream, filename
+ except OSError as err:
+ if attempt or err.errno in (errno.EACCES,):
+ raise
+ old_filename, filename = filename, sanitize_path(filename)
+ if old_filename == filename:
+ raise
+
+
+def timeconvert(timestr):
+ """Convert RFC 2822 defined time string into system timestamp"""
+ timestamp = None
+ timetuple = email.utils.parsedate_tz(timestr)
+ if timetuple is not None:
+ timestamp = email.utils.mktime_tz(timetuple)
+ return timestamp
+
+
+def sanitize_filename(s, restricted=False, is_id=NO_DEFAULT):
+ """Sanitizes a string so it could be used as part of a filename.
+ @param restricted Use a stricter subset of allowed characters
+ @param is_id Whether this is an ID that should be kept unchanged if possible.
+ If unset, yt-dlp's new sanitization rules are in effect
+ """
+ if s == '':
+ return ''
+
+ def replace_insane(char):
+ if restricted and char in ACCENT_CHARS:
+ return ACCENT_CHARS[char]
+ elif not restricted and char == '\n':
+ return '\0 '
+ elif is_id is NO_DEFAULT and not restricted and char in '"*:<>?|/\\':
+ # Replace with their full-width unicode counterparts
+ return {'/': '\u29F8', '\\': '\u29f9'}.get(char, chr(ord(char) + 0xfee0))
+ elif char == '?' or ord(char) < 32 or ord(char) == 127:
+ return ''
+ elif char == '"':
+ return '' if restricted else '\''
+ elif char == ':':
+ return '\0_\0-' if restricted else '\0 \0-'
+ elif char in '\\/|*<>':
+ return '\0_'
+ if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace() or ord(char) > 127):
+ return '' if unicodedata.category(char)[0] in 'CM' else '\0_'
+ return char
+
+ # Replace look-alike Unicode glyphs
+ if restricted and (is_id is NO_DEFAULT or not is_id):
+ s = unicodedata.normalize('NFKC', s)
+ s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s) # Handle timestamps
+ result = ''.join(map(replace_insane, s))
+ if is_id is NO_DEFAULT:
+ result = re.sub(r'(\0.)(?:(?=\1)..)+', r'\1', result) # Remove repeated substitute chars
+ STRIP_RE = r'(?:\0.|[ _-])*'
+ result = re.sub(f'^\0.{STRIP_RE}|{STRIP_RE}\0.$', '', result) # Remove substitute chars from start/end
+ result = result.replace('\0', '') or '_'
+
+ if not is_id:
+ while '__' in result:
+ result = result.replace('__', '_')
+ result = result.strip('_')
+ # Common case of "Foreign band name - English song title"
+ if restricted and result.startswith('-_'):
+ result = result[2:]
+ if result.startswith('-'):
+ result = '_' + result[len('-'):]
+ result = result.lstrip('.')
+ if not result:
+ result = '_'
+ return result
+
+
+def sanitize_path(s, force=False):
+ """Sanitizes and normalizes path on Windows"""
+ # XXX: this handles drive relative paths (c:sth) incorrectly
+ if sys.platform == 'win32':
+ force = False
+ drive_or_unc, _ = os.path.splitdrive(s)
+ elif force:
+ drive_or_unc = ''
+ else:
+ return s
+
+ norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
+ if drive_or_unc:
+ norm_path.pop(0)
+ sanitized_path = [
+ path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
+ for path_part in norm_path]
+ if drive_or_unc:
+ sanitized_path.insert(0, drive_or_unc + os.path.sep)
+ elif force and s and s[0] == os.path.sep:
+ sanitized_path.insert(0, os.path.sep)
+ # TODO: Fix behavioral differences <3.12
+ # The workaround using `normpath` only superficially passes tests
+ # Ref: https://github.com/python/cpython/pull/100351
+ return os.path.normpath(os.path.join(*sanitized_path))
+
+
+def sanitize_url(url, *, scheme='http'):
+ # Prepend protocol-less URLs with `http:` scheme in order to mitigate
+ # the number of unwanted failures due to missing protocol
+ if url is None:
+ return
+ elif url.startswith('//'):
+ return f'{scheme}:{url}'
+ # Fix some common typos seen so far
+ COMMON_TYPOS = (
+ # https://github.com/ytdl-org/youtube-dl/issues/15649
+ (r'^httpss://', r'https://'),
+ # https://bx1.be/lives/direct-tv/
+ (r'^rmtp([es]?)://', r'rtmp\1://'),
+ )
+ for mistake, fixup in COMMON_TYPOS:
+ if re.match(mistake, url):
+ return re.sub(mistake, fixup, url)
+ return url
+
+
+def extract_basic_auth(url):
+ parts = urllib.parse.urlsplit(url)
+ if parts.username is None:
+ return url, None
+ url = urllib.parse.urlunsplit(parts._replace(netloc=(
+ parts.hostname if parts.port is None
+ else '%s:%d' % (parts.hostname, parts.port))))
+ auth_payload = base64.b64encode(
+ ('%s:%s' % (parts.username, parts.password or '')).encode())
+ return url, f'Basic {auth_payload.decode()}'
+
+
+def expand_path(s):
+ """Expand shell variables and ~"""
+ return os.path.expandvars(compat_expanduser(s))
+
+
+def orderedSet(iterable, *, lazy=False):
+ """Remove all duplicates from the input iterable"""
+ def _iter():
+ seen = [] # Do not use set since the items can be unhashable
+ for x in iterable:
+ if x not in seen:
+ seen.append(x)
+ yield x
+
+ return _iter() if lazy else list(_iter())
+
+
+def _htmlentity_transform(entity_with_semicolon):
+ """Transforms an HTML entity to a character."""
+ entity = entity_with_semicolon[:-1]
+
+ # Known non-numeric HTML entity
+ if entity in html.entities.name2codepoint:
+ return chr(html.entities.name2codepoint[entity])
+
+ # TODO: HTML5 allows entities without a semicolon.
+ # E.g. '&Eacuteric' should be decoded as 'Éric'.
+ if entity_with_semicolon in html.entities.html5:
+ return html.entities.html5[entity_with_semicolon]
+
+ mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
+ if mobj is not None:
+ numstr = mobj.group(1)
+ if numstr.startswith('x'):
+ base = 16
+ numstr = '0%s' % numstr
+ else:
+ base = 10
+ # See https://github.com/ytdl-org/youtube-dl/issues/7518
+ with contextlib.suppress(ValueError):
+ return chr(int(numstr, base))
+
+ # Unknown entity in name, return its literal representation
+ return '&%s;' % entity
+
+
+def unescapeHTML(s):
+ if s is None:
+ return None
+ assert isinstance(s, str)
+
+ return re.sub(
+ r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
+
+
+def escapeHTML(text):
+ return (
+ text
+ .replace('&', '&amp;')
+ .replace('<', '&lt;')
+ .replace('>', '&gt;')
+ .replace('"', '&quot;')
+ .replace("'", '&#39;')
+ )
+
+
+class netrc_from_content(netrc.netrc):
+ def __init__(self, content):
+ self.hosts, self.macros = {}, {}
+ with io.StringIO(content) as stream:
+ self._parse('-', stream, False)
+
+
+class Popen(subprocess.Popen):
+ if sys.platform == 'win32':
+ _startupinfo = subprocess.STARTUPINFO()
+ _startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
+ else:
+ _startupinfo = None
+
+ @staticmethod
+ def _fix_pyinstaller_ld_path(env):
+ """Restore LD_LIBRARY_PATH when using PyInstaller
+ Ref: https://github.com/pyinstaller/pyinstaller/blob/develop/doc/runtime-information.rst#ld_library_path--libpath-considerations
+ https://github.com/yt-dlp/yt-dlp/issues/4573
+ """
+ if not hasattr(sys, '_MEIPASS'):
+ return
+
+ def _fix(key):
+ orig = env.get(f'{key}_ORIG')
+ if orig is None:
+ env.pop(key, None)
+ else:
+ env[key] = orig
+
+ _fix('LD_LIBRARY_PATH') # Linux
+ _fix('DYLD_LIBRARY_PATH') # macOS
+
+ def __init__(self, args, *remaining, env=None, text=False, shell=False, **kwargs):
+ if env is None:
+ env = os.environ.copy()
+ self._fix_pyinstaller_ld_path(env)
+
+ self.__text_mode = kwargs.get('encoding') or kwargs.get('errors') or text or kwargs.get('universal_newlines')
+ if text is True:
+ kwargs['universal_newlines'] = True # For 3.6 compatibility
+ kwargs.setdefault('encoding', 'utf-8')
+ kwargs.setdefault('errors', 'replace')
+
+ if shell and compat_os_name == 'nt' and kwargs.get('executable') is None:
+ if not isinstance(args, str):
+ args = ' '.join(compat_shlex_quote(a) for a in args)
+ shell = False
+ args = f'{self.__comspec()} /Q /S /D /V:OFF /C "{args}"'
+
+ super().__init__(args, *remaining, env=env, shell=shell, **kwargs, startupinfo=self._startupinfo)
+
+ def __comspec(self):
+ comspec = os.environ.get('ComSpec') or os.path.join(
+ os.environ.get('SystemRoot', ''), 'System32', 'cmd.exe')
+ if os.path.isabs(comspec):
+ return comspec
+ raise FileNotFoundError('shell not found: neither %ComSpec% nor %SystemRoot% is set')
+
+ def communicate_or_kill(self, *args, **kwargs):
+ try:
+ return self.communicate(*args, **kwargs)
+ except BaseException: # Including KeyboardInterrupt
+ self.kill(timeout=None)
+ raise
+
+ def kill(self, *, timeout=0):
+ super().kill()
+ if timeout != 0:
+ self.wait(timeout=timeout)
+
+ @classmethod
+ def run(cls, *args, timeout=None, **kwargs):
+ with cls(*args, **kwargs) as proc:
+ default = '' if proc.__text_mode else b''
+ stdout, stderr = proc.communicate_or_kill(timeout=timeout)
+ return stdout or default, stderr or default, proc.returncode
+
+
+def encodeArgument(s):
+ # Legacy code that uses byte strings
+ # Uncomment the following line after fixing all post processors
+ # assert isinstance(s, str), 'Internal error: %r should be of type %r, is %r' % (s, str, type(s))
+ return s if isinstance(s, str) else s.decode('ascii')
+
+
+_timetuple = collections.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
+
+
+def timetuple_from_msec(msec):
+ secs, msec = divmod(msec, 1000)
+ mins, secs = divmod(secs, 60)
+ hrs, mins = divmod(mins, 60)
+ return _timetuple(hrs, mins, secs, msec)
+
+
+def formatSeconds(secs, delim=':', msec=False):
+ time = timetuple_from_msec(secs * 1000)
+ if time.hours:
+ ret = '%d%s%02d%s%02d' % (time.hours, delim, time.minutes, delim, time.seconds)
+ elif time.minutes:
+ ret = '%d%s%02d' % (time.minutes, delim, time.seconds)
+ else:
+ ret = '%d' % time.seconds
+ return '%s.%03d' % (ret, time.milliseconds) if msec else ret
+
+
+def bug_reports_message(before=';'):
+ from ..update import REPOSITORY
+
+ msg = (f'please report this issue on https://github.com/{REPOSITORY}/issues?q= , '
+ 'filling out the appropriate issue template. Confirm you are on the latest version using yt-dlp -U')
+
+ before = before.rstrip()
+ if not before or before.endswith(('.', '!', '?')):
+ msg = msg[0].title() + msg[1:]
+
+ return (before + ' ' if before else '') + msg
+
+
+class YoutubeDLError(Exception):
+ """Base exception for YoutubeDL errors."""
+ msg = None
+
+ def __init__(self, msg=None):
+ if msg is not None:
+ self.msg = msg
+ elif self.msg is None:
+ self.msg = type(self).__name__
+ super().__init__(self.msg)
+
+
+class ExtractorError(YoutubeDLError):
+ """Error during info extraction."""
+
+ def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
+ """ tb, if given, is the original traceback (so that it can be printed out).
+ If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
+ """
+ from ..networking.exceptions import network_exceptions
+ if sys.exc_info()[0] in network_exceptions:
+ expected = True
+
+ self.orig_msg = str(msg)
+ self.traceback = tb
+ self.expected = expected
+ self.cause = cause
+ self.video_id = video_id
+ self.ie = ie
+ self.exc_info = sys.exc_info() # preserve original exception
+ if isinstance(self.exc_info[1], ExtractorError):
+ self.exc_info = self.exc_info[1].exc_info
+ super().__init__(self.__msg)
+
+ @property
+ def __msg(self):
+ return ''.join((
+ format_field(self.ie, None, '[%s] '),
+ format_field(self.video_id, None, '%s: '),
+ self.orig_msg,
+ format_field(self.cause, None, ' (caused by %r)'),
+ '' if self.expected else bug_reports_message()))
+
+ def format_traceback(self):
+ return join_nonempty(
+ self.traceback and ''.join(traceback.format_tb(self.traceback)),
+ self.cause and ''.join(traceback.format_exception(None, self.cause, self.cause.__traceback__)[1:]),
+ delim='\n') or None
+
+ def __setattr__(self, name, value):
+ super().__setattr__(name, value)
+ if getattr(self, 'msg', None) and name not in ('msg', 'args'):
+ self.msg = self.__msg or type(self).__name__
+ self.args = (self.msg, ) # Cannot be property
+
+
+class UnsupportedError(ExtractorError):
+ def __init__(self, url):
+ super().__init__(
+ 'Unsupported URL: %s' % url, expected=True)
+ self.url = url
+
+
+class RegexNotFoundError(ExtractorError):
+ """Error when a regex didn't match"""
+ pass
+
+
+class GeoRestrictedError(ExtractorError):
+ """Geographic restriction Error exception.
+
+ This exception may be thrown when a video is not available from your
+ geographic location due to geographic restrictions imposed by a website.
+ """
+
+ def __init__(self, msg, countries=None, **kwargs):
+ kwargs['expected'] = True
+ super().__init__(msg, **kwargs)
+ self.countries = countries
+
+
+class UserNotLive(ExtractorError):
+ """Error when a channel/user is not live"""
+
+ def __init__(self, msg=None, **kwargs):
+ kwargs['expected'] = True
+ super().__init__(msg or 'The channel is not currently live', **kwargs)
+
+
+class DownloadError(YoutubeDLError):
+ """Download Error exception.
+
+ This exception may be thrown by FileDownloader objects if they are not
+ configured to continue on errors. They will contain the appropriate
+ error message.
+ """
+
+ def __init__(self, msg, exc_info=None):
+ """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
+ super().__init__(msg)
+ self.exc_info = exc_info
+
+
+class EntryNotInPlaylist(YoutubeDLError):
+ """Entry not in playlist exception.
+
+ This exception will be thrown by YoutubeDL when a requested entry
+ is not found in the playlist info_dict
+ """
+ msg = 'Entry not found in info'
+
+
+class SameFileError(YoutubeDLError):
+ """Same File exception.
+
+ This exception will be thrown by FileDownloader objects if they detect
+ multiple files would have to be downloaded to the same file on disk.
+ """
+ msg = 'Fixed output name but more than one file to download'
+
+ def __init__(self, filename=None):
+ if filename is not None:
+ self.msg += f': {filename}'
+ super().__init__(self.msg)
+
+
+class PostProcessingError(YoutubeDLError):
+ """Post Processing exception.
+
+ This exception may be raised by PostProcessor's .run() method to
+ indicate an error in the postprocessing task.
+ """
+
+
+class DownloadCancelled(YoutubeDLError):
+ """ Exception raised when the download queue should be interrupted """
+ msg = 'The download was cancelled'
+
+
+class ExistingVideoReached(DownloadCancelled):
+ """ --break-on-existing triggered """
+ msg = 'Encountered a video that is already in the archive, stopping due to --break-on-existing'
+
+
+class RejectedVideoReached(DownloadCancelled):
+ """ --break-match-filter triggered """
+ msg = 'Encountered a video that did not match filter, stopping due to --break-match-filter'
+
+
+class MaxDownloadsReached(DownloadCancelled):
+ """ --max-downloads limit has been reached. """
+ msg = 'Maximum number of downloads reached, stopping due to --max-downloads'
+
+
+class ReExtractInfo(YoutubeDLError):
+ """ Video info needs to be re-extracted. """
+
+ def __init__(self, msg, expected=False):
+ super().__init__(msg)
+ self.expected = expected
+
+
+class ThrottledDownload(ReExtractInfo):
+ """ Download speed below --throttled-rate. """
+ msg = 'The download speed is below throttle limit'
+
+ def __init__(self):
+ super().__init__(self.msg, expected=False)
+
+
+class UnavailableVideoError(YoutubeDLError):
+ """Unavailable Format exception.
+
+ This exception will be thrown when a video is requested
+ in a format that is not available for that video.
+ """
+ msg = 'Unable to download video'
+
+ def __init__(self, err=None):
+ if err is not None:
+ self.msg += f': {err}'
+ super().__init__(self.msg)
+
+
+class ContentTooShortError(YoutubeDLError):
+ """Content Too Short exception.
+
+ This exception may be raised by FileDownloader objects when a file they
+ download is too small for what the server announced first, indicating
+ the connection was probably interrupted.
+ """
+
+ def __init__(self, downloaded, expected):
+ super().__init__(f'Downloaded {downloaded} bytes, expected {expected} bytes')
+ # Both in bytes
+ self.downloaded = downloaded
+ self.expected = expected
+
+
+class XAttrMetadataError(YoutubeDLError):
+ def __init__(self, code=None, msg='Unknown error'):
+ super().__init__(msg)
+ self.code = code
+ self.msg = msg
+
+ # Parsing code and msg
+ if (self.code in (errno.ENOSPC, errno.EDQUOT)
+ or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
+ self.reason = 'NO_SPACE'
+ elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
+ self.reason = 'VALUE_TOO_LONG'
+ else:
+ self.reason = 'NOT_SUPPORTED'
+
+
+class XAttrUnavailableError(YoutubeDLError):
+ pass
+
+
+def is_path_like(f):
+ return isinstance(f, (str, bytes, os.PathLike))
+
+
+def extract_timezone(date_str):
+ m = re.search(
+ r'''(?x)
+ ^.{8,}? # >=8 char non-TZ prefix, if present
+ (?P<tz>Z| # just the UTC Z, or
+ (?:(?<=.\b\d{4}|\b\d{2}:\d\d)| # preceded by 4 digits or hh:mm or
+ (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d)) # not preceded by 3 alpha word or >= 4 alpha or 2 digits
+ [ ]? # optional space
+ (?P<sign>\+|-) # +/-
+ (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2}) # hh[:]mm
+ $)
+ ''', date_str)
+ if not m:
+ m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
+ timezone = TIMEZONE_NAMES.get(m and m.group('tz').strip())
+ if timezone is not None:
+ date_str = date_str[:-len(m.group('tz'))]
+ timezone = datetime.timedelta(hours=timezone or 0)
+ else:
+ date_str = date_str[:-len(m.group('tz'))]
+ if not m.group('sign'):
+ timezone = datetime.timedelta()
+ else:
+ sign = 1 if m.group('sign') == '+' else -1
+ timezone = datetime.timedelta(
+ hours=sign * int(m.group('hours')),
+ minutes=sign * int(m.group('minutes')))
+ return timezone, date_str
+
+
+def parse_iso8601(date_str, delimiter='T', timezone=None):
+ """ Return a UNIX timestamp from the given date """
+
+ if date_str is None:
+ return None
+
+ date_str = re.sub(r'\.[0-9]+', '', date_str)
+
+ if timezone is None:
+ timezone, date_str = extract_timezone(date_str)
+
+ with contextlib.suppress(ValueError):
+ date_format = f'%Y-%m-%d{delimiter}%H:%M:%S'
+ dt = datetime.datetime.strptime(date_str, date_format) - timezone
+ return calendar.timegm(dt.timetuple())
+
+
+def date_formats(day_first=True):
+ return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
+
+
+def unified_strdate(date_str, day_first=True):
+ """Return a string with the date in the format YYYYMMDD"""
+
+ if date_str is None:
+ return None
+ upload_date = None
+ # Replace commas
+ date_str = date_str.replace(',', ' ')
+ # Remove AM/PM + timezone
+ date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
+ _, date_str = extract_timezone(date_str)
+
+ for expression in date_formats(day_first):
+ with contextlib.suppress(ValueError):
+ upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
+ if upload_date is None:
+ timetuple = email.utils.parsedate_tz(date_str)
+ if timetuple:
+ with contextlib.suppress(ValueError):
+ upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
+ if upload_date is not None:
+ return str(upload_date)
+
+
+def unified_timestamp(date_str, day_first=True):
+ if not isinstance(date_str, str):
+ return None
+
+ date_str = re.sub(r'\s+', ' ', re.sub(
+ r'(?i)[,|]|(mon|tues?|wed(nes)?|thu(rs)?|fri|sat(ur)?)(day)?', '', date_str))
+
+ pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
+ timezone, date_str = extract_timezone(date_str)
+
+ # Remove AM/PM + timezone
+ date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
+
+ # Remove unrecognized timezones from ISO 8601 alike timestamps
+ m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
+ if m:
+ date_str = date_str[:-len(m.group('tz'))]
+
+ # Python only supports microseconds, so remove nanoseconds
+ m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
+ if m:
+ date_str = m.group(1)
+
+ for expression in date_formats(day_first):
+ with contextlib.suppress(ValueError):
+ dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
+ return calendar.timegm(dt.timetuple())
+
+ timetuple = email.utils.parsedate_tz(date_str)
+ if timetuple:
+ return calendar.timegm(timetuple) + pm_delta * 3600 - timezone.total_seconds()
+
+
+def determine_ext(url, default_ext='unknown_video'):
+ if url is None or '.' not in url:
+ return default_ext
+ guess = url.partition('?')[0].rpartition('.')[2]
+ if re.match(r'^[A-Za-z0-9]+$', guess):
+ return guess
+ # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
+ elif guess.rstrip('/') in KNOWN_EXTENSIONS:
+ return guess.rstrip('/')
+ else:
+ return default_ext
+
+
+def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
+ return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
+
+
+def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
+ R"""
+ Return a datetime object from a string.
+ Supported format:
+ (now|today|yesterday|DATE)([+-]\d+(microsecond|second|minute|hour|day|week|month|year)s?)?
+
+ @param format strftime format of DATE
+ @param precision Round the datetime object: auto|microsecond|second|minute|hour|day
+ auto: round to the unit provided in date_str (if applicable).
+ """
+ auto_precision = False
+ if precision == 'auto':
+ auto_precision = True
+ precision = 'microsecond'
+ today = datetime_round(datetime.datetime.now(datetime.timezone.utc), precision)
+ if date_str in ('now', 'today'):
+ return today
+ if date_str == 'yesterday':
+ return today - datetime.timedelta(days=1)
+ match = re.match(
+ r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?',
+ date_str)
+ if match is not None:
+ start_time = datetime_from_str(match.group('start'), precision, format)
+ time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
+ unit = match.group('unit')
+ if unit == 'month' or unit == 'year':
+ new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
+ unit = 'day'
+ else:
+ if unit == 'week':
+ unit = 'day'
+ time *= 7
+ delta = datetime.timedelta(**{unit + 's': time})
+ new_date = start_time + delta
+ if auto_precision:
+ return datetime_round(new_date, unit)
+ return new_date
+
+ return datetime_round(datetime.datetime.strptime(date_str, format), precision)
+
+
+def date_from_str(date_str, format='%Y%m%d', strict=False):
+ R"""
+ Return a date object from a string using datetime_from_str
+
+ @param strict Restrict allowed patterns to "YYYYMMDD" and
+ (now|today|yesterday)(-\d+(day|week|month|year)s?)?
+ """
+ if strict and not re.fullmatch(r'\d{8}|(now|today|yesterday)(-\d+(day|week|month|year)s?)?', date_str):
+ raise ValueError(f'Invalid date format "{date_str}"')
+ return datetime_from_str(date_str, precision='microsecond', format=format).date()
+
+
+def datetime_add_months(dt, months):
+ """Increment/Decrement a datetime object by months."""
+ month = dt.month + months - 1
+ year = dt.year + month // 12
+ month = month % 12 + 1
+ day = min(dt.day, calendar.monthrange(year, month)[1])
+ return dt.replace(year, month, day)
+
+
+def datetime_round(dt, precision='day'):
+ """
+ Round a datetime object's time to a specific precision
+ """
+ if precision == 'microsecond':
+ return dt
+
+ unit_seconds = {
+ 'day': 86400,
+ 'hour': 3600,
+ 'minute': 60,
+ 'second': 1,
+ }
+ roundto = lambda x, n: ((x + n / 2) // n) * n
+ timestamp = roundto(calendar.timegm(dt.timetuple()), unit_seconds[precision])
+ return datetime.datetime.fromtimestamp(timestamp, datetime.timezone.utc)
+
+
+def hyphenate_date(date_str):
+ """
+ Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
+ match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
+ if match is not None:
+ return '-'.join(match.groups())
+ else:
+ return date_str
+
+
+class DateRange:
+ """Represents a time interval between two dates"""
+
+ def __init__(self, start=None, end=None):
+ """start and end must be strings in the format accepted by date"""
+ if start is not None:
+ self.start = date_from_str(start, strict=True)
+ else:
+ self.start = datetime.datetime.min.date()
+ if end is not None:
+ self.end = date_from_str(end, strict=True)
+ else:
+ self.end = datetime.datetime.max.date()
+ if self.start > self.end:
+ raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
+
+ @classmethod
+ def day(cls, day):
+ """Returns a range that only contains the given day"""
+ return cls(day, day)
+
+ def __contains__(self, date):
+ """Check if the date is in the range"""
+ if not isinstance(date, datetime.date):
+ date = date_from_str(date)
+ return self.start <= date <= self.end
+
+ def __repr__(self):
+ return f'{__name__}.{type(self).__name__}({self.start.isoformat()!r}, {self.end.isoformat()!r})'
+
+ def __str__(self):
+ return f'{self.start} to {self.end}'
+
+ def __eq__(self, other):
+ return (isinstance(other, DateRange)
+ and self.start == other.start and self.end == other.end)
+
+
+@functools.cache
+def system_identifier():
+ python_implementation = platform.python_implementation()
+ if python_implementation == 'PyPy' and hasattr(sys, 'pypy_version_info'):
+ python_implementation += ' version %d.%d.%d' % sys.pypy_version_info[:3]
+ libc_ver = []
+ with contextlib.suppress(OSError): # We may not have access to the executable
+ libc_ver = platform.libc_ver()
+
+ return 'Python %s (%s %s %s) - %s (%s%s)' % (
+ platform.python_version(),
+ python_implementation,
+ platform.machine(),
+ platform.architecture()[0],
+ platform.platform(),
+ ssl.OPENSSL_VERSION,
+ format_field(join_nonempty(*libc_ver, delim=' '), None, ', %s'),
+ )
+
+
+@functools.cache
+def get_windows_version():
+ ''' Get Windows version. returns () if it's not running on Windows '''
+ if compat_os_name == 'nt':
+ return version_tuple(platform.win32_ver()[1])
+ else:
+ return ()
+
+
+def write_string(s, out=None, encoding=None):
+ assert isinstance(s, str)
+ out = out or sys.stderr
+ # `sys.stderr` might be `None` (Ref: https://github.com/pyinstaller/pyinstaller/pull/7217)
+ if not out:
+ return
+
+ if compat_os_name == 'nt' and supports_terminal_sequences(out):
+ s = re.sub(r'([\r\n]+)', r' \1', s)
+
+ enc, buffer = None, out
+ # `mode` might be `None` (Ref: https://github.com/yt-dlp/yt-dlp/issues/8816)
+ if 'b' in (getattr(out, 'mode', None) or ''):
+ enc = encoding or preferredencoding()
+ elif hasattr(out, 'buffer'):
+ buffer = out.buffer
+ enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
+
+ buffer.write(s.encode(enc, 'ignore') if enc else s)
+ out.flush()
+
+
+# TODO: Use global logger
+def deprecation_warning(msg, *, printer=None, stacklevel=0, **kwargs):
+ from .. import _IN_CLI
+ if _IN_CLI:
+ if msg in deprecation_warning._cache:
+ return
+ deprecation_warning._cache.add(msg)
+ if printer:
+ return printer(f'{msg}{bug_reports_message()}', **kwargs)
+ return write_string(f'ERROR: {msg}{bug_reports_message()}\n', **kwargs)
+ else:
+ import warnings
+ warnings.warn(DeprecationWarning(msg), stacklevel=stacklevel + 3)
+
+
+deprecation_warning._cache = set()
+
+
+def bytes_to_intlist(bs):
+ if not bs:
+ return []
+ if isinstance(bs[0], int): # Python 3
+ return list(bs)
+ else:
+ return [ord(c) for c in bs]
+
+
+def intlist_to_bytes(xs):
+ if not xs:
+ return b''
+ return struct.pack('%dB' % len(xs), *xs)
+
+
+class LockingUnsupportedError(OSError):
+ msg = 'File locking is not supported'
+
+ def __init__(self):
+ super().__init__(self.msg)
+
+
+# Cross-platform file locking
+if sys.platform == 'win32':
+ import ctypes
+ import ctypes.wintypes
+ import msvcrt
+
+ class OVERLAPPED(ctypes.Structure):
+ _fields_ = [
+ ('Internal', ctypes.wintypes.LPVOID),
+ ('InternalHigh', ctypes.wintypes.LPVOID),
+ ('Offset', ctypes.wintypes.DWORD),
+ ('OffsetHigh', ctypes.wintypes.DWORD),
+ ('hEvent', ctypes.wintypes.HANDLE),
+ ]
+
+ kernel32 = ctypes.WinDLL('kernel32')
+ LockFileEx = kernel32.LockFileEx
+ LockFileEx.argtypes = [
+ ctypes.wintypes.HANDLE, # hFile
+ ctypes.wintypes.DWORD, # dwFlags
+ ctypes.wintypes.DWORD, # dwReserved
+ ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
+ ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
+ ctypes.POINTER(OVERLAPPED) # Overlapped
+ ]
+ LockFileEx.restype = ctypes.wintypes.BOOL
+ UnlockFileEx = kernel32.UnlockFileEx
+ UnlockFileEx.argtypes = [
+ ctypes.wintypes.HANDLE, # hFile
+ ctypes.wintypes.DWORD, # dwReserved
+ ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
+ ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
+ ctypes.POINTER(OVERLAPPED) # Overlapped
+ ]
+ UnlockFileEx.restype = ctypes.wintypes.BOOL
+ whole_low = 0xffffffff
+ whole_high = 0x7fffffff
+
+ def _lock_file(f, exclusive, block):
+ overlapped = OVERLAPPED()
+ overlapped.Offset = 0
+ overlapped.OffsetHigh = 0
+ overlapped.hEvent = 0
+ f._lock_file_overlapped_p = ctypes.pointer(overlapped)
+
+ if not LockFileEx(msvcrt.get_osfhandle(f.fileno()),
+ (0x2 if exclusive else 0x0) | (0x0 if block else 0x1),
+ 0, whole_low, whole_high, f._lock_file_overlapped_p):
+ # NB: No argument form of "ctypes.FormatError" does not work on PyPy
+ raise BlockingIOError(f'Locking file failed: {ctypes.FormatError(ctypes.GetLastError())!r}')
+
+ def _unlock_file(f):
+ assert f._lock_file_overlapped_p
+ handle = msvcrt.get_osfhandle(f.fileno())
+ if not UnlockFileEx(handle, 0, whole_low, whole_high, f._lock_file_overlapped_p):
+ raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
+
+else:
+ try:
+ import fcntl
+
+ def _lock_file(f, exclusive, block):
+ flags = fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH
+ if not block:
+ flags |= fcntl.LOCK_NB
+ try:
+ fcntl.flock(f, flags)
+ except BlockingIOError:
+ raise
+ except OSError: # AOSP does not have flock()
+ fcntl.lockf(f, flags)
+
+ def _unlock_file(f):
+ with contextlib.suppress(OSError):
+ return fcntl.flock(f, fcntl.LOCK_UN)
+ with contextlib.suppress(OSError):
+ return fcntl.lockf(f, fcntl.LOCK_UN) # AOSP does not have flock()
+ return fcntl.flock(f, fcntl.LOCK_UN | fcntl.LOCK_NB) # virtiofs needs LOCK_NB on unlocking
+
+ except ImportError:
+
+ def _lock_file(f, exclusive, block):
+ raise LockingUnsupportedError()
+
+ def _unlock_file(f):
+ raise LockingUnsupportedError()
+
+
+class locked_file:
+ locked = False
+
+ def __init__(self, filename, mode, block=True, encoding=None):
+ if mode not in {'r', 'rb', 'a', 'ab', 'w', 'wb'}:
+ raise NotImplementedError(mode)
+ self.mode, self.block = mode, block
+
+ writable = any(f in mode for f in 'wax+')
+ readable = any(f in mode for f in 'r+')
+ flags = functools.reduce(operator.ior, (
+ getattr(os, 'O_CLOEXEC', 0), # UNIX only
+ getattr(os, 'O_BINARY', 0), # Windows only
+ getattr(os, 'O_NOINHERIT', 0), # Windows only
+ os.O_CREAT if writable else 0, # O_TRUNC only after locking
+ os.O_APPEND if 'a' in mode else 0,
+ os.O_EXCL if 'x' in mode else 0,
+ os.O_RDONLY if not writable else os.O_RDWR if readable else os.O_WRONLY,
+ ))
+
+ self.f = os.fdopen(os.open(filename, flags, 0o666), mode, encoding=encoding)
+
+ def __enter__(self):
+ exclusive = 'r' not in self.mode
+ try:
+ _lock_file(self.f, exclusive, self.block)
+ self.locked = True
+ except OSError:
+ self.f.close()
+ raise
+ if 'w' in self.mode:
+ try:
+ self.f.truncate()
+ except OSError as e:
+ if e.errno not in (
+ errno.ESPIPE, # Illegal seek - expected for FIFO
+ errno.EINVAL, # Invalid argument - expected for /dev/null
+ ):
+ raise
+ return self
+
+ def unlock(self):
+ if not self.locked:
+ return
+ try:
+ _unlock_file(self.f)
+ finally:
+ self.locked = False
+
+ def __exit__(self, *_):
+ try:
+ self.unlock()
+ finally:
+ self.f.close()
+
+ open = __enter__
+ close = __exit__
+
+ def __getattr__(self, attr):
+ return getattr(self.f, attr)
+
+ def __iter__(self):
+ return iter(self.f)
+
+
+@functools.cache
+def get_filesystem_encoding():
+ encoding = sys.getfilesystemencoding()
+ return encoding if encoding is not None else 'utf-8'
+
+
+def shell_quote(args):
+ quoted_args = []
+ encoding = get_filesystem_encoding()
+ for a in args:
+ if isinstance(a, bytes):
+ # We may get a filename encoded with 'encodeFilename'
+ a = a.decode(encoding)
+ quoted_args.append(compat_shlex_quote(a))
+ return ' '.join(quoted_args)
+
+
+def smuggle_url(url, data):
+ """ Pass additional data in a URL for internal use. """
+
+ url, idata = unsmuggle_url(url, {})
+ data.update(idata)
+ sdata = urllib.parse.urlencode(
+ {'__youtubedl_smuggle': json.dumps(data)})
+ return url + '#' + sdata
+
+
+def unsmuggle_url(smug_url, default=None):
+ if '#__youtubedl_smuggle' not in smug_url:
+ return smug_url, default
+ url, _, sdata = smug_url.rpartition('#')
+ jsond = urllib.parse.parse_qs(sdata)['__youtubedl_smuggle'][0]
+ data = json.loads(jsond)
+ return url, data
+
+
+def format_decimal_suffix(num, fmt='%d%s', *, factor=1000):
+ """ Formats numbers with decimal sufixes like K, M, etc """
+ num, factor = float_or_none(num), float(factor)
+ if num is None or num < 0:
+ return None
+ POSSIBLE_SUFFIXES = 'kMGTPEZY'
+ exponent = 0 if num == 0 else min(int(math.log(num, factor)), len(POSSIBLE_SUFFIXES))
+ suffix = ['', *POSSIBLE_SUFFIXES][exponent]
+ if factor == 1024:
+ suffix = {'k': 'Ki', '': ''}.get(suffix, f'{suffix}i')
+ converted = num / (factor ** exponent)
+ return fmt % (converted, suffix)
+
+
+def format_bytes(bytes):
+ return format_decimal_suffix(bytes, '%.2f%sB', factor=1024) or 'N/A'
+
+
+def lookup_unit_table(unit_table, s, strict=False):
+ num_re = NUMBER_RE if strict else NUMBER_RE.replace(R'\.', '[,.]')
+ units_re = '|'.join(re.escape(u) for u in unit_table)
+ m = (re.fullmatch if strict else re.match)(
+ rf'(?P<num>{num_re})\s*(?P<unit>{units_re})\b', s)
+ if not m:
+ return None
+
+ num = float(m.group('num').replace(',', '.'))
+ mult = unit_table[m.group('unit')]
+ return round(num * mult)
+
+
+def parse_bytes(s):
+ """Parse a string indicating a byte quantity into an integer"""
+ return lookup_unit_table(
+ {u: 1024**i for i, u in enumerate(['', *'KMGTPEZY'])},
+ s.upper(), strict=True)
+
+
+def parse_filesize(s):
+ if s is None:
+ return None
+
+ # The lower-case forms are of course incorrect and unofficial,
+ # but we support those too
+ _UNIT_TABLE = {
+ 'B': 1,
+ 'b': 1,
+ 'bytes': 1,
+ 'KiB': 1024,
+ 'KB': 1000,
+ 'kB': 1024,
+ 'Kb': 1000,
+ 'kb': 1000,
+ 'kilobytes': 1000,
+ 'kibibytes': 1024,
+ 'MiB': 1024 ** 2,
+ 'MB': 1000 ** 2,
+ 'mB': 1024 ** 2,
+ 'Mb': 1000 ** 2,
+ 'mb': 1000 ** 2,
+ 'megabytes': 1000 ** 2,
+ 'mebibytes': 1024 ** 2,
+ 'GiB': 1024 ** 3,
+ 'GB': 1000 ** 3,
+ 'gB': 1024 ** 3,
+ 'Gb': 1000 ** 3,
+ 'gb': 1000 ** 3,
+ 'gigabytes': 1000 ** 3,
+ 'gibibytes': 1024 ** 3,
+ 'TiB': 1024 ** 4,
+ 'TB': 1000 ** 4,
+ 'tB': 1024 ** 4,
+ 'Tb': 1000 ** 4,
+ 'tb': 1000 ** 4,
+ 'terabytes': 1000 ** 4,
+ 'tebibytes': 1024 ** 4,
+ 'PiB': 1024 ** 5,
+ 'PB': 1000 ** 5,
+ 'pB': 1024 ** 5,
+ 'Pb': 1000 ** 5,
+ 'pb': 1000 ** 5,
+ 'petabytes': 1000 ** 5,
+ 'pebibytes': 1024 ** 5,
+ 'EiB': 1024 ** 6,
+ 'EB': 1000 ** 6,
+ 'eB': 1024 ** 6,
+ 'Eb': 1000 ** 6,
+ 'eb': 1000 ** 6,
+ 'exabytes': 1000 ** 6,
+ 'exbibytes': 1024 ** 6,
+ 'ZiB': 1024 ** 7,
+ 'ZB': 1000 ** 7,
+ 'zB': 1024 ** 7,
+ 'Zb': 1000 ** 7,
+ 'zb': 1000 ** 7,
+ 'zettabytes': 1000 ** 7,
+ 'zebibytes': 1024 ** 7,
+ 'YiB': 1024 ** 8,
+ 'YB': 1000 ** 8,
+ 'yB': 1024 ** 8,
+ 'Yb': 1000 ** 8,
+ 'yb': 1000 ** 8,
+ 'yottabytes': 1000 ** 8,
+ 'yobibytes': 1024 ** 8,
+ }
+
+ return lookup_unit_table(_UNIT_TABLE, s)
+
+
+def parse_count(s):
+ if s is None:
+ return None
+
+ s = re.sub(r'^[^\d]+\s', '', s).strip()
+
+ if re.match(r'^[\d,.]+$', s):
+ return str_to_int(s)
+
+ _UNIT_TABLE = {
+ 'k': 1000,
+ 'K': 1000,
+ 'm': 1000 ** 2,
+ 'M': 1000 ** 2,
+ 'kk': 1000 ** 2,
+ 'KK': 1000 ** 2,
+ 'b': 1000 ** 3,
+ 'B': 1000 ** 3,
+ }
+
+ ret = lookup_unit_table(_UNIT_TABLE, s)
+ if ret is not None:
+ return ret
+
+ mobj = re.match(r'([\d,.]+)(?:$|\s)', s)
+ if mobj:
+ return str_to_int(mobj.group(1))
+
+
+def parse_resolution(s, *, lenient=False):
+ if s is None:
+ return {}
+
+ if lenient:
+ mobj = re.search(r'(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)', s)
+ else:
+ mobj = re.search(r'(?<![a-zA-Z0-9])(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)(?![a-zA-Z0-9])', s)
+ if mobj:
+ return {
+ 'width': int(mobj.group('w')),
+ 'height': int(mobj.group('h')),
+ }
+
+ mobj = re.search(r'(?<![a-zA-Z0-9])(\d+)[pPiI](?![a-zA-Z0-9])', s)
+ if mobj:
+ return {'height': int(mobj.group(1))}
+
+ mobj = re.search(r'\b([48])[kK]\b', s)
+ if mobj:
+ return {'height': int(mobj.group(1)) * 540}
+
+ return {}
+
+
+def parse_bitrate(s):
+ if not isinstance(s, str):
+ return
+ mobj = re.search(r'\b(\d+)\s*kbps', s)
+ if mobj:
+ return int(mobj.group(1))
+
+
+def month_by_name(name, lang='en'):
+ """ Return the number of a month by (locale-independently) English name """
+
+ month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
+
+ try:
+ return month_names.index(name) + 1
+ except ValueError:
+ return None
+
+
+def month_by_abbreviation(abbrev):
+ """ Return the number of a month by (locale-independently) English
+ abbreviations """
+
+ try:
+ return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
+ except ValueError:
+ return None
+
+
+def fix_xml_ampersands(xml_str):
+ """Replace all the '&' by '&amp;' in XML"""
+ return re.sub(
+ r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
+ '&amp;',
+ xml_str)
+
+
+def setproctitle(title):
+ assert isinstance(title, str)
+
+ # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4541
+ try:
+ import ctypes
+ except ImportError:
+ return
+
+ try:
+ libc = ctypes.cdll.LoadLibrary('libc.so.6')
+ except OSError:
+ return
+ except TypeError:
+ # LoadLibrary in Windows Python 2.7.13 only expects
+ # a bytestring, but since unicode_literals turns
+ # every string into a unicode string, it fails.
+ return
+ title_bytes = title.encode()
+ buf = ctypes.create_string_buffer(len(title_bytes))
+ buf.value = title_bytes
+ try:
+ # PR_SET_NAME = 15 Ref: /usr/include/linux/prctl.h
+ libc.prctl(15, buf, 0, 0, 0)
+ except AttributeError:
+ return # Strange libc, just skip this
+
+
+def remove_start(s, start):
+ return s[len(start):] if s is not None and s.startswith(start) else s
+
+
+def remove_end(s, end):
+ return s[:-len(end)] if s is not None and s.endswith(end) else s
+
+
+def remove_quotes(s):
+ if s is None or len(s) < 2:
+ return s
+ for quote in ('"', "'", ):
+ if s[0] == quote and s[-1] == quote:
+ return s[1:-1]
+ return s
+
+
+def get_domain(url):
+ """
+ This implementation is inconsistent, but is kept for compatibility.
+ Use this only for "webpage_url_domain"
+ """
+ return remove_start(urllib.parse.urlparse(url).netloc, 'www.') or None
+
+
+def url_basename(url):
+ path = urllib.parse.urlparse(url).path
+ return path.strip('/').split('/')[-1]
+
+
+def base_url(url):
+ return re.match(r'https?://[^?#]+/', url).group()
+
+
+def urljoin(base, path):
+ if isinstance(path, bytes):
+ path = path.decode()
+ if not isinstance(path, str) or not path:
+ return None
+ if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
+ return path
+ if isinstance(base, bytes):
+ base = base.decode()
+ if not isinstance(base, str) or not re.match(
+ r'^(?:https?:)?//', base):
+ return None
+ return urllib.parse.urljoin(base, path)
+
+
+def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
+ if get_attr and v is not None:
+ v = getattr(v, get_attr, None)
+ try:
+ return int(v) * invscale // scale
+ except (ValueError, TypeError, OverflowError):
+ return default
+
+
+def str_or_none(v, default=None):
+ return default if v is None else str(v)
+
+
+def str_to_int(int_str):
+ """ A more relaxed version of int_or_none """
+ if isinstance(int_str, int):
+ return int_str
+ elif isinstance(int_str, str):
+ int_str = re.sub(r'[,\.\+]', '', int_str)
+ return int_or_none(int_str)
+
+
+def float_or_none(v, scale=1, invscale=1, default=None):
+ if v is None:
+ return default
+ try:
+ return float(v) * invscale / scale
+ except (ValueError, TypeError):
+ return default
+
+
+def bool_or_none(v, default=None):
+ return v if isinstance(v, bool) else default
+
+
+def strip_or_none(v, default=None):
+ return v.strip() if isinstance(v, str) else default
+
+
+def url_or_none(url):
+ if not url or not isinstance(url, str):
+ return None
+ url = url.strip()
+ return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
+
+
+def strftime_or_none(timestamp, date_format='%Y%m%d', default=None):
+ datetime_object = None
+ try:
+ if isinstance(timestamp, (int, float)): # unix timestamp
+ # Using naive datetime here can break timestamp() in Windows
+ # Ref: https://github.com/yt-dlp/yt-dlp/issues/5185, https://github.com/python/cpython/issues/94414
+ # Also, datetime.datetime.fromtimestamp breaks for negative timestamps
+ # Ref: https://github.com/yt-dlp/yt-dlp/issues/6706#issuecomment-1496842642
+ datetime_object = (datetime.datetime.fromtimestamp(0, datetime.timezone.utc)
+ + datetime.timedelta(seconds=timestamp))
+ elif isinstance(timestamp, str): # assume YYYYMMDD
+ datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
+ date_format = re.sub( # Support %s on windows
+ r'(?<!%)(%%)*%s', rf'\g<1>{int(datetime_object.timestamp())}', date_format)
+ return datetime_object.strftime(date_format)
+ except (ValueError, TypeError, AttributeError):
+ return default
+
+
+def parse_duration(s):
+ if not isinstance(s, str):
+ return None
+ s = s.strip()
+ if not s:
+ return None
+
+ days, hours, mins, secs, ms = [None] * 5
+ m = re.match(r'''(?x)
+ (?P<before_secs>
+ (?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?
+ (?P<secs>(?(before_secs)[0-9]{1,2}|[0-9]+))
+ (?P<ms>[.:][0-9]+)?Z?$
+ ''', s)
+ if m:
+ days, hours, mins, secs, ms = m.group('days', 'hours', 'mins', 'secs', 'ms')
+ else:
+ m = re.match(
+ r'''(?ix)(?:P?
+ (?:
+ [0-9]+\s*y(?:ears?)?,?\s*
+ )?
+ (?:
+ [0-9]+\s*m(?:onths?)?,?\s*
+ )?
+ (?:
+ [0-9]+\s*w(?:eeks?)?,?\s*
+ )?
+ (?:
+ (?P<days>[0-9]+)\s*d(?:ays?)?,?\s*
+ )?
+ T)?
+ (?:
+ (?P<hours>[0-9]+)\s*h(?:(?:ou)?rs?)?,?\s*
+ )?
+ (?:
+ (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?,?\s*
+ )?
+ (?:
+ (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
+ )?Z?$''', s)
+ if m:
+ days, hours, mins, secs, ms = m.groups()
+ else:
+ m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
+ if m:
+ hours, mins = m.groups()
+ else:
+ return None
+
+ if ms:
+ ms = ms.replace(':', '.')
+ return sum(float(part or 0) * mult for part, mult in (
+ (days, 86400), (hours, 3600), (mins, 60), (secs, 1), (ms, 1)))
+
+
+def prepend_extension(filename, ext, expected_real_ext=None):
+ name, real_ext = os.path.splitext(filename)
+ return (
+ f'{name}.{ext}{real_ext}'
+ if not expected_real_ext or real_ext[1:] == expected_real_ext
+ else f'{filename}.{ext}')
+
+
+def replace_extension(filename, ext, expected_real_ext=None):
+ name, real_ext = os.path.splitext(filename)
+ return '{}.{}'.format(
+ name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
+ ext)
+
+
+def check_executable(exe, args=[]):
+ """ Checks if the given binary is installed somewhere in PATH, and returns its name.
+ args can be a list of arguments for a short output (like -version) """
+ try:
+ Popen.run([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ except OSError:
+ return False
+ return exe
+
+
+def _get_exe_version_output(exe, args):
+ try:
+ # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
+ # SIGTTOU if yt-dlp is run in the background.
+ # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
+ stdout, _, ret = Popen.run([encodeArgument(exe)] + args, text=True,
+ stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+ if ret:
+ return None
+ except OSError:
+ return False
+ return stdout
+
+
+def detect_exe_version(output, version_re=None, unrecognized='present'):
+ assert isinstance(output, str)
+ if version_re is None:
+ version_re = r'version\s+([-0-9._a-zA-Z]+)'
+ m = re.search(version_re, output)
+ if m:
+ return m.group(1)
+ else:
+ return unrecognized
+
+
+def get_exe_version(exe, args=['--version'],
+ version_re=None, unrecognized=('present', 'broken')):
+ """ Returns the version of the specified executable,
+ or False if the executable is not present """
+ unrecognized = variadic(unrecognized)
+ assert len(unrecognized) in (1, 2)
+ out = _get_exe_version_output(exe, args)
+ if out is None:
+ return unrecognized[-1]
+ return out and detect_exe_version(out, version_re, unrecognized[0])
+
+
+def frange(start=0, stop=None, step=1):
+ """Float range"""
+ if stop is None:
+ start, stop = 0, start
+ sign = [-1, 1][step > 0] if step else 0
+ while sign * start < sign * stop:
+ yield start
+ start += step
+
+
+class LazyList(collections.abc.Sequence):
+ """Lazy immutable list from an iterable
+ Note that slices of a LazyList are lists and not LazyList"""
+
+ class IndexError(IndexError):
+ pass
+
+ def __init__(self, iterable, *, reverse=False, _cache=None):
+ self._iterable = iter(iterable)
+ self._cache = [] if _cache is None else _cache
+ self._reversed = reverse
+
+ def __iter__(self):
+ if self._reversed:
+ # We need to consume the entire iterable to iterate in reverse
+ yield from self.exhaust()
+ return
+ yield from self._cache
+ for item in self._iterable:
+ self._cache.append(item)
+ yield item
+
+ def _exhaust(self):
+ self._cache.extend(self._iterable)
+ self._iterable = [] # Discard the emptied iterable to make it pickle-able
+ return self._cache
+
+ def exhaust(self):
+ """Evaluate the entire iterable"""
+ return self._exhaust()[::-1 if self._reversed else 1]
+
+ @staticmethod
+ def _reverse_index(x):
+ return None if x is None else ~x
+
+ def __getitem__(self, idx):
+ if isinstance(idx, slice):
+ if self._reversed:
+ idx = slice(self._reverse_index(idx.start), self._reverse_index(idx.stop), -(idx.step or 1))
+ start, stop, step = idx.start, idx.stop, idx.step or 1
+ elif isinstance(idx, int):
+ if self._reversed:
+ idx = self._reverse_index(idx)
+ start, stop, step = idx, idx, 0
+ else:
+ raise TypeError('indices must be integers or slices')
+ if ((start or 0) < 0 or (stop or 0) < 0
+ or (start is None and step < 0)
+ or (stop is None and step > 0)):
+ # We need to consume the entire iterable to be able to slice from the end
+ # Obviously, never use this with infinite iterables
+ self._exhaust()
+ try:
+ return self._cache[idx]
+ except IndexError as e:
+ raise self.IndexError(e) from e
+ n = max(start or 0, stop or 0) - len(self._cache) + 1
+ if n > 0:
+ self._cache.extend(itertools.islice(self._iterable, n))
+ try:
+ return self._cache[idx]
+ except IndexError as e:
+ raise self.IndexError(e) from e
+
+ def __bool__(self):
+ try:
+ self[-1] if self._reversed else self[0]
+ except self.IndexError:
+ return False
+ return True
+
+ def __len__(self):
+ self._exhaust()
+ return len(self._cache)
+
+ def __reversed__(self):
+ return type(self)(self._iterable, reverse=not self._reversed, _cache=self._cache)
+
+ def __copy__(self):
+ return type(self)(self._iterable, reverse=self._reversed, _cache=self._cache)
+
+ def __repr__(self):
+ # repr and str should mimic a list. So we exhaust the iterable
+ return repr(self.exhaust())
+
+ def __str__(self):
+ return repr(self.exhaust())
+
+
+class PagedList:
+
+ class IndexError(IndexError):
+ pass
+
+ def __len__(self):
+ # This is only useful for tests
+ return len(self.getslice())
+
+ def __init__(self, pagefunc, pagesize, use_cache=True):
+ self._pagefunc = pagefunc
+ self._pagesize = pagesize
+ self._pagecount = float('inf')
+ self._use_cache = use_cache
+ self._cache = {}
+
+ def getpage(self, pagenum):
+ page_results = self._cache.get(pagenum)
+ if page_results is None:
+ page_results = [] if pagenum > self._pagecount else list(self._pagefunc(pagenum))
+ if self._use_cache:
+ self._cache[pagenum] = page_results
+ return page_results
+
+ def getslice(self, start=0, end=None):
+ return list(self._getslice(start, end))
+
+ def _getslice(self, start, end):
+ raise NotImplementedError('This method must be implemented by subclasses')
+
+ def __getitem__(self, idx):
+ assert self._use_cache, 'Indexing PagedList requires cache'
+ if not isinstance(idx, int) or idx < 0:
+ raise TypeError('indices must be non-negative integers')
+ entries = self.getslice(idx, idx + 1)
+ if not entries:
+ raise self.IndexError()
+ return entries[0]
+
+ def __bool__(self):
+ return bool(self.getslice(0, 1))
+
+
+class OnDemandPagedList(PagedList):
+ """Download pages until a page with less than maximum results"""
+
+ def _getslice(self, start, end):
+ for pagenum in itertools.count(start // self._pagesize):
+ firstid = pagenum * self._pagesize
+ nextfirstid = pagenum * self._pagesize + self._pagesize
+ if start >= nextfirstid:
+ continue
+
+ startv = (
+ start % self._pagesize
+ if firstid <= start < nextfirstid
+ else 0)
+ endv = (
+ ((end - 1) % self._pagesize) + 1
+ if (end is not None and firstid <= end <= nextfirstid)
+ else None)
+
+ try:
+ page_results = self.getpage(pagenum)
+ except Exception:
+ self._pagecount = pagenum - 1
+ raise
+ if startv != 0 or endv is not None:
+ page_results = page_results[startv:endv]
+ yield from page_results
+
+ # A little optimization - if current page is not "full", ie. does
+ # not contain page_size videos then we can assume that this page
+ # is the last one - there are no more ids on further pages -
+ # i.e. no need to query again.
+ if len(page_results) + startv < self._pagesize:
+ break
+
+ # If we got the whole page, but the next page is not interesting,
+ # break out early as well
+ if end == nextfirstid:
+ break
+
+
+class InAdvancePagedList(PagedList):
+ """PagedList with total number of pages known in advance"""
+
+ def __init__(self, pagefunc, pagecount, pagesize):
+ PagedList.__init__(self, pagefunc, pagesize, True)
+ self._pagecount = pagecount
+
+ def _getslice(self, start, end):
+ start_page = start // self._pagesize
+ end_page = self._pagecount if end is None else min(self._pagecount, end // self._pagesize + 1)
+ skip_elems = start - start_page * self._pagesize
+ only_more = None if end is None else end - start
+ for pagenum in range(start_page, end_page):
+ page_results = self.getpage(pagenum)
+ if skip_elems:
+ page_results = page_results[skip_elems:]
+ skip_elems = None
+ if only_more is not None:
+ if len(page_results) < only_more:
+ only_more -= len(page_results)
+ else:
+ yield from page_results[:only_more]
+ break
+ yield from page_results
+
+
+class PlaylistEntries:
+ MissingEntry = object()
+ is_exhausted = False
+
+ def __init__(self, ydl, info_dict):
+ self.ydl = ydl
+
+ # _entries must be assigned now since infodict can change during iteration
+ entries = info_dict.get('entries')
+ if entries is None:
+ raise EntryNotInPlaylist('There are no entries')
+ elif isinstance(entries, list):
+ self.is_exhausted = True
+
+ requested_entries = info_dict.get('requested_entries')
+ self.is_incomplete = requested_entries is not None
+ if self.is_incomplete:
+ assert self.is_exhausted
+ self._entries = [self.MissingEntry] * max(requested_entries or [0])
+ for i, entry in zip(requested_entries, entries):
+ self._entries[i - 1] = entry
+ elif isinstance(entries, (list, PagedList, LazyList)):
+ self._entries = entries
+ else:
+ self._entries = LazyList(entries)
+
+ PLAYLIST_ITEMS_RE = re.compile(r'''(?x)
+ (?P<start>[+-]?\d+)?
+ (?P<range>[:-]
+ (?P<end>[+-]?\d+|inf(?:inite)?)?
+ (?::(?P<step>[+-]?\d+))?
+ )?''')
+
+ @classmethod
+ def parse_playlist_items(cls, string):
+ for segment in string.split(','):
+ if not segment:
+ raise ValueError('There is two or more consecutive commas')
+ mobj = cls.PLAYLIST_ITEMS_RE.fullmatch(segment)
+ if not mobj:
+ raise ValueError(f'{segment!r} is not a valid specification')
+ start, end, step, has_range = mobj.group('start', 'end', 'step', 'range')
+ if int_or_none(step) == 0:
+ raise ValueError(f'Step in {segment!r} cannot be zero')
+ yield slice(int_or_none(start), float_or_none(end), int_or_none(step)) if has_range else int(start)
+
+ def get_requested_items(self):
+ playlist_items = self.ydl.params.get('playlist_items')
+ playlist_start = self.ydl.params.get('playliststart', 1)
+ playlist_end = self.ydl.params.get('playlistend')
+ # For backwards compatibility, interpret -1 as whole list
+ if playlist_end in (-1, None):
+ playlist_end = ''
+ if not playlist_items:
+ playlist_items = f'{playlist_start}:{playlist_end}'
+ elif playlist_start != 1 or playlist_end:
+ self.ydl.report_warning('Ignoring playliststart and playlistend because playlistitems was given', only_once=True)
+
+ for index in self.parse_playlist_items(playlist_items):
+ for i, entry in self[index]:
+ yield i, entry
+ if not entry:
+ continue
+ try:
+ # The item may have just been added to archive. Don't break due to it
+ if not self.ydl.params.get('lazy_playlist'):
+ # TODO: Add auto-generated fields
+ self.ydl._match_entry(entry, incomplete=True, silent=True)
+ except (ExistingVideoReached, RejectedVideoReached):
+ return
+
+ def get_full_count(self):
+ if self.is_exhausted and not self.is_incomplete:
+ return len(self)
+ elif isinstance(self._entries, InAdvancePagedList):
+ if self._entries._pagesize == 1:
+ return self._entries._pagecount
+
+ @functools.cached_property
+ def _getter(self):
+ if isinstance(self._entries, list):
+ def get_entry(i):
+ try:
+ entry = self._entries[i]
+ except IndexError:
+ entry = self.MissingEntry
+ if not self.is_incomplete:
+ raise self.IndexError()
+ if entry is self.MissingEntry:
+ raise EntryNotInPlaylist(f'Entry {i + 1} cannot be found')
+ return entry
+ else:
+ def get_entry(i):
+ try:
+ return type(self.ydl)._handle_extraction_exceptions(lambda _, i: self._entries[i])(self.ydl, i)
+ except (LazyList.IndexError, PagedList.IndexError):
+ raise self.IndexError()
+ return get_entry
+
+ def __getitem__(self, idx):
+ if isinstance(idx, int):
+ idx = slice(idx, idx)
+
+ # NB: PlaylistEntries[1:10] => (0, 1, ... 9)
+ step = 1 if idx.step is None else idx.step
+ if idx.start is None:
+ start = 0 if step > 0 else len(self) - 1
+ else:
+ start = idx.start - 1 if idx.start >= 0 else len(self) + idx.start
+
+ # NB: Do not call len(self) when idx == [:]
+ if idx.stop is None:
+ stop = 0 if step < 0 else float('inf')
+ else:
+ stop = idx.stop - 1 if idx.stop >= 0 else len(self) + idx.stop
+ stop += [-1, 1][step > 0]
+
+ for i in frange(start, stop, step):
+ if i < 0:
+ continue
+ try:
+ entry = self._getter(i)
+ except self.IndexError:
+ self.is_exhausted = True
+ if step > 0:
+ break
+ continue
+ yield i + 1, entry
+
+ def __len__(self):
+ return len(tuple(self[:]))
+
+ class IndexError(IndexError):
+ pass
+
+
+def uppercase_escape(s):
+ unicode_escape = codecs.getdecoder('unicode_escape')
+ return re.sub(
+ r'\\U[0-9a-fA-F]{8}',
+ lambda m: unicode_escape(m.group(0))[0],
+ s)
+
+
+def lowercase_escape(s):
+ unicode_escape = codecs.getdecoder('unicode_escape')
+ return re.sub(
+ r'\\u[0-9a-fA-F]{4}',
+ lambda m: unicode_escape(m.group(0))[0],
+ s)
+
+
+def parse_qs(url, **kwargs):
+ return urllib.parse.parse_qs(urllib.parse.urlparse(url).query, **kwargs)
+
+
+def read_batch_urls(batch_fd):
+ def fixup(url):
+ if not isinstance(url, str):
+ url = url.decode('utf-8', 'replace')
+ BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
+ for bom in BOM_UTF8:
+ if url.startswith(bom):
+ url = url[len(bom):]
+ url = url.lstrip()
+ if not url or url.startswith(('#', ';', ']')):
+ return False
+ # "#" cannot be stripped out since it is part of the URI
+ # However, it can be safely stripped out if following a whitespace
+ return re.split(r'\s#', url, 1)[0].rstrip()
+
+ with contextlib.closing(batch_fd) as fd:
+ return [url for url in map(fixup, fd) if url]
+
+
+def urlencode_postdata(*args, **kargs):
+ return urllib.parse.urlencode(*args, **kargs).encode('ascii')
+
+
+def update_url(url, *, query_update=None, **kwargs):
+ """Replace URL components specified by kwargs
+ @param url str or parse url tuple
+ @param query_update update query
+ @returns str
+ """
+ if isinstance(url, str):
+ if not kwargs and not query_update:
+ return url
+ else:
+ url = urllib.parse.urlparse(url)
+ if query_update:
+ assert 'query' not in kwargs, 'query_update and query cannot be specified at the same time'
+ kwargs['query'] = urllib.parse.urlencode({
+ **urllib.parse.parse_qs(url.query),
+ **query_update
+ }, True)
+ return urllib.parse.urlunparse(url._replace(**kwargs))
+
+
+def update_url_query(url, query):
+ return update_url(url, query_update=query)
+
+
+def _multipart_encode_impl(data, boundary):
+ content_type = 'multipart/form-data; boundary=%s' % boundary
+
+ out = b''
+ for k, v in data.items():
+ out += b'--' + boundary.encode('ascii') + b'\r\n'
+ if isinstance(k, str):
+ k = k.encode()
+ if isinstance(v, str):
+ v = v.encode()
+ # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
+ # suggests sending UTF-8 directly. Firefox sends UTF-8, too
+ content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
+ if boundary.encode('ascii') in content:
+ raise ValueError('Boundary overlaps with data')
+ out += content
+
+ out += b'--' + boundary.encode('ascii') + b'--\r\n'
+
+ return out, content_type
+
+
+def multipart_encode(data, boundary=None):
+ '''
+ Encode a dict to RFC 7578-compliant form-data
+
+ data:
+ A dict where keys and values can be either Unicode or bytes-like
+ objects.
+ boundary:
+ If specified a Unicode object, it's used as the boundary. Otherwise
+ a random boundary is generated.
+
+ Reference: https://tools.ietf.org/html/rfc7578
+ '''
+ has_specified_boundary = boundary is not None
+
+ while True:
+ if boundary is None:
+ boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
+
+ try:
+ out, content_type = _multipart_encode_impl(data, boundary)
+ break
+ except ValueError:
+ if has_specified_boundary:
+ raise
+ boundary = None
+
+ return out, content_type
+
+
+def is_iterable_like(x, allowed_types=collections.abc.Iterable, blocked_types=NO_DEFAULT):
+ if blocked_types is NO_DEFAULT:
+ blocked_types = (str, bytes, collections.abc.Mapping)
+ return isinstance(x, allowed_types) and not isinstance(x, blocked_types)
+
+
+def variadic(x, allowed_types=NO_DEFAULT):
+ if not isinstance(allowed_types, (tuple, type)):
+ deprecation_warning('allowed_types should be a tuple or a type')
+ allowed_types = tuple(allowed_types)
+ return x if is_iterable_like(x, blocked_types=allowed_types) else (x, )
+
+
+def try_call(*funcs, expected_type=None, args=[], kwargs={}):
+ for f in funcs:
+ try:
+ val = f(*args, **kwargs)
+ except (AttributeError, KeyError, TypeError, IndexError, ValueError, ZeroDivisionError):
+ pass
+ else:
+ if expected_type is None or isinstance(val, expected_type):
+ return val
+
+
+def try_get(src, getter, expected_type=None):
+ return try_call(*variadic(getter), args=(src,), expected_type=expected_type)
+
+
+def filter_dict(dct, cndn=lambda _, v: v is not None):
+ return {k: v for k, v in dct.items() if cndn(k, v)}
+
+
+def merge_dicts(*dicts):
+ merged = {}
+ for a_dict in dicts:
+ for k, v in a_dict.items():
+ if (v is not None and k not in merged
+ or isinstance(v, str) and merged[k] == ''):
+ merged[k] = v
+ return merged
+
+
+def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
+ return string if isinstance(string, str) else str(string, encoding, errors)
+
+
+US_RATINGS = {
+ 'G': 0,
+ 'PG': 10,
+ 'PG-13': 13,
+ 'R': 16,
+ 'NC': 18,
+}
+
+
+TV_PARENTAL_GUIDELINES = {
+ 'TV-Y': 0,
+ 'TV-Y7': 7,
+ 'TV-G': 0,
+ 'TV-PG': 0,
+ 'TV-14': 14,
+ 'TV-MA': 17,
+}
+
+
+def parse_age_limit(s):
+ # isinstance(False, int) is True. So type() must be used instead
+ if type(s) is int: # noqa: E721
+ return s if 0 <= s <= 21 else None
+ elif not isinstance(s, str):
+ return None
+ m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
+ if m:
+ return int(m.group('age'))
+ s = s.upper()
+ if s in US_RATINGS:
+ return US_RATINGS[s]
+ m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
+ if m:
+ return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
+ return None
+
+
+def strip_jsonp(code):
+ return re.sub(
+ r'''(?sx)^
+ (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
+ (?:\s*&&\s*(?P=func_name))?
+ \s*\(\s*(?P<callback_data>.*)\);?
+ \s*?(?://[^\n]*)*$''',
+ r'\g<callback_data>', code)
+
+
+def js_to_json(code, vars={}, *, strict=False):
+ # vars is a dict of var, val pairs to substitute
+ STRING_QUOTES = '\'"`'
+ STRING_RE = '|'.join(rf'{q}(?:\\.|[^\\{q}])*{q}' for q in STRING_QUOTES)
+ COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
+ SKIP_RE = fr'\s*(?:{COMMENT_RE})?\s*'
+ INTEGER_TABLE = (
+ (fr'(?s)^(0[xX][0-9a-fA-F]+){SKIP_RE}:?$', 16),
+ (fr'(?s)^(0+[0-7]+){SKIP_RE}:?$', 8),
+ )
+
+ def process_escape(match):
+ JSON_PASSTHROUGH_ESCAPES = R'"\bfnrtu'
+ escape = match.group(1) or match.group(2)
+
+ return (Rf'\{escape}' if escape in JSON_PASSTHROUGH_ESCAPES
+ else R'\u00' if escape == 'x'
+ else '' if escape == '\n'
+ else escape)
+
+ def template_substitute(match):
+ evaluated = js_to_json(match.group(1), vars, strict=strict)
+ if evaluated[0] == '"':
+ return json.loads(evaluated)
+ return evaluated
+
+ def fix_kv(m):
+ v = m.group(0)
+ if v in ('true', 'false', 'null'):
+ return v
+ elif v in ('undefined', 'void 0'):
+ return 'null'
+ elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
+ return ''
+
+ if v[0] in STRING_QUOTES:
+ v = re.sub(r'(?s)\${([^}]+)}', template_substitute, v[1:-1]) if v[0] == '`' else v[1:-1]
+ escaped = re.sub(r'(?s)(")|\\(.)', process_escape, v)
+ return f'"{escaped}"'
+
+ for regex, base in INTEGER_TABLE:
+ im = re.match(regex, v)
+ if im:
+ i = int(im.group(1), base)
+ return f'"{i}":' if v.endswith(':') else str(i)
+
+ if v in vars:
+ try:
+ if not strict:
+ json.loads(vars[v])
+ except json.JSONDecodeError:
+ return json.dumps(vars[v])
+ else:
+ return vars[v]
+
+ if not strict:
+ return f'"{v}"'
+
+ raise ValueError(f'Unknown value: {v}')
+
+ def create_map(mobj):
+ return json.dumps(dict(json.loads(js_to_json(mobj.group(1) or '[]', vars=vars))))
+
+ code = re.sub(r'(?:new\s+)?Array\((.*?)\)', r'[\g<1>]', code)
+ code = re.sub(r'new Map\((\[.*?\])?\)', create_map, code)
+ if not strict:
+ code = re.sub(rf'new Date\(({STRING_RE})\)', r'\g<1>', code)
+ code = re.sub(r'new \w+\((.*?)\)', lambda m: json.dumps(m.group(0)), code)
+ code = re.sub(r'parseInt\([^\d]+(\d+)[^\d]+\)', r'\1', code)
+ code = re.sub(r'\(function\([^)]*\)\s*\{[^}]*\}\s*\)\s*\(\s*(["\'][^)]*["\'])\s*\)', r'\1', code)
+
+ return re.sub(rf'''(?sx)
+ {STRING_RE}|
+ {COMMENT_RE}|,(?={SKIP_RE}[\]}}])|
+ void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
+ \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{SKIP_RE}:)?|
+ [0-9]+(?={SKIP_RE}:)|
+ !+
+ ''', fix_kv, code)
+
+
+def qualities(quality_ids):
+ """ Get a numeric quality value out of a list of possible values """
+ def q(qid):
+ try:
+ return quality_ids.index(qid)
+ except ValueError:
+ return -1
+ return q
+
+
+POSTPROCESS_WHEN = ('pre_process', 'after_filter', 'video', 'before_dl', 'post_process', 'after_move', 'after_video', 'playlist')
+
+
+DEFAULT_OUTTMPL = {
+ 'default': '%(title)s [%(id)s].%(ext)s',
+ 'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
+}
+OUTTMPL_TYPES = {
+ 'chapter': None,
+ 'subtitle': None,
+ 'thumbnail': None,
+ 'description': 'description',
+ 'annotation': 'annotations.xml',
+ 'infojson': 'info.json',
+ 'link': None,
+ 'pl_video': None,
+ 'pl_thumbnail': None,
+ 'pl_description': 'description',
+ 'pl_infojson': 'info.json',
+}
+
+# As of [1] format syntax is:
+# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
+# 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
+STR_FORMAT_RE_TMPL = r'''(?x)
+ (?<!%)(?P<prefix>(?:%%)*)
+ %
+ (?P<has_key>\((?P<key>{0})\))?
+ (?P<format>
+ (?P<conversion>[#0\-+ ]+)?
+ (?P<min_width>\d+)?
+ (?P<precision>\.\d+)?
+ (?P<len_mod>[hlL])? # unused in python
+ {1} # conversion type
+ )
+'''
+
+
+STR_FORMAT_TYPES = 'diouxXeEfFgGcrsa'
+
+
+def limit_length(s, length):
+ """ Add ellipses to overly long strings """
+ if s is None:
+ return None
+ ELLIPSES = '...'
+ if len(s) > length:
+ return s[:length - len(ELLIPSES)] + ELLIPSES
+ return s
+
+
+def version_tuple(v):
+ return tuple(int(e) for e in re.split(r'[-.]', v))
+
+
+def is_outdated_version(version, limit, assume_new=True):
+ if not version:
+ return not assume_new
+ try:
+ return version_tuple(version) < version_tuple(limit)
+ except ValueError:
+ return not assume_new
+
+
+def ytdl_is_updateable():
+ """ Returns if yt-dlp can be updated with -U """
+
+ from ..update import is_non_updateable
+
+ return not is_non_updateable()
+
+
+def args_to_str(args):
+ # Get a short string representation for a subprocess command
+ return ' '.join(compat_shlex_quote(a) for a in args)
+
+
+def error_to_str(err):
+ return f'{type(err).__name__}: {err}'
+
+
+def mimetype2ext(mt, default=NO_DEFAULT):
+ if not isinstance(mt, str):
+ if default is not NO_DEFAULT:
+ return default
+ return None
+
+ MAP = {
+ # video
+ '3gpp': '3gp',
+ 'mp2t': 'ts',
+ 'mp4': 'mp4',
+ 'mpeg': 'mpeg',
+ 'mpegurl': 'm3u8',
+ 'quicktime': 'mov',
+ 'webm': 'webm',
+ 'vp9': 'vp9',
+ 'video/ogg': 'ogv',
+ 'x-flv': 'flv',
+ 'x-m4v': 'm4v',
+ 'x-matroska': 'mkv',
+ 'x-mng': 'mng',
+ 'x-mp4-fragmented': 'mp4',
+ 'x-ms-asf': 'asf',
+ 'x-ms-wmv': 'wmv',
+ 'x-msvideo': 'avi',
+
+ # application (streaming playlists)
+ 'dash+xml': 'mpd',
+ 'f4m+xml': 'f4m',
+ 'hds+xml': 'f4m',
+ 'vnd.apple.mpegurl': 'm3u8',
+ 'vnd.ms-sstr+xml': 'ism',
+ 'x-mpegurl': 'm3u8',
+
+ # audio
+ 'audio/mp4': 'm4a',
+ # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3.
+ # Using .mp3 as it's the most popular one
+ 'audio/mpeg': 'mp3',
+ 'audio/webm': 'webm',
+ 'audio/x-matroska': 'mka',
+ 'audio/x-mpegurl': 'm3u',
+ 'midi': 'mid',
+ 'ogg': 'ogg',
+ 'wav': 'wav',
+ 'wave': 'wav',
+ 'x-aac': 'aac',
+ 'x-flac': 'flac',
+ 'x-m4a': 'm4a',
+ 'x-realaudio': 'ra',
+ 'x-wav': 'wav',
+
+ # image
+ 'avif': 'avif',
+ 'bmp': 'bmp',
+ 'gif': 'gif',
+ 'jpeg': 'jpg',
+ 'png': 'png',
+ 'svg+xml': 'svg',
+ 'tiff': 'tif',
+ 'vnd.wap.wbmp': 'wbmp',
+ 'webp': 'webp',
+ 'x-icon': 'ico',
+ 'x-jng': 'jng',
+ 'x-ms-bmp': 'bmp',
+
+ # caption
+ 'filmstrip+json': 'fs',
+ 'smptett+xml': 'tt',
+ 'ttaf+xml': 'dfxp',
+ 'ttml+xml': 'ttml',
+ 'x-ms-sami': 'sami',
+
+ # misc
+ 'gzip': 'gz',
+ 'json': 'json',
+ 'xml': 'xml',
+ 'zip': 'zip',
+ }
+
+ mimetype = mt.partition(';')[0].strip().lower()
+ _, _, subtype = mimetype.rpartition('/')
+
+ ext = traversal.traverse_obj(MAP, mimetype, subtype, subtype.rsplit('+')[-1])
+ if ext:
+ return ext
+ elif default is not NO_DEFAULT:
+ return default
+ return subtype.replace('+', '.')
+
+
+def ext2mimetype(ext_or_url):
+ if not ext_or_url:
+ return None
+ if '.' not in ext_or_url:
+ ext_or_url = f'file.{ext_or_url}'
+ return mimetypes.guess_type(ext_or_url)[0]
+
+
+def parse_codecs(codecs_str):
+ # http://tools.ietf.org/html/rfc6381
+ if not codecs_str:
+ return {}
+ split_codecs = list(filter(None, map(
+ str.strip, codecs_str.strip().strip(',').split(','))))
+ vcodec, acodec, scodec, hdr = None, None, None, None
+ for full_codec in split_codecs:
+ parts = re.sub(r'0+(?=\d)', '', full_codec).split('.')
+ if parts[0] in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
+ 'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
+ if vcodec:
+ continue
+ vcodec = full_codec
+ if parts[0] in ('dvh1', 'dvhe'):
+ hdr = 'DV'
+ elif parts[0] == 'av1' and traversal.traverse_obj(parts, 3) == '10':
+ hdr = 'HDR10'
+ elif parts[:2] == ['vp9', '2']:
+ hdr = 'HDR10'
+ elif parts[0] in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-4',
+ 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
+ acodec = acodec or full_codec
+ elif parts[0] in ('stpp', 'wvtt'):
+ scodec = scodec or full_codec
+ else:
+ write_string(f'WARNING: Unknown codec {full_codec}\n')
+ if vcodec or acodec or scodec:
+ return {
+ 'vcodec': vcodec or 'none',
+ 'acodec': acodec or 'none',
+ 'dynamic_range': hdr,
+ **({'scodec': scodec} if scodec is not None else {}),
+ }
+ elif len(split_codecs) == 2:
+ return {
+ 'vcodec': split_codecs[0],
+ 'acodec': split_codecs[1],
+ }
+ return {}
+
+
+def get_compatible_ext(*, vcodecs, acodecs, vexts, aexts, preferences=None):
+ assert len(vcodecs) == len(vexts) and len(acodecs) == len(aexts)
+
+ allow_mkv = not preferences or 'mkv' in preferences
+
+ if allow_mkv and max(len(acodecs), len(vcodecs)) > 1:
+ return 'mkv' # TODO: any other format allows this?
+
+ # TODO: All codecs supported by parse_codecs isn't handled here
+ COMPATIBLE_CODECS = {
+ 'mp4': {
+ 'av1', 'hevc', 'avc1', 'mp4a', 'ac-4', # fourcc (m3u8, mpd)
+ 'h264', 'aacl', 'ec-3', # Set in ISM
+ },
+ 'webm': {
+ 'av1', 'vp9', 'vp8', 'opus', 'vrbs',
+ 'vp9x', 'vp8x', # in the webm spec
+ },
+ }
+
+ sanitize_codec = functools.partial(
+ try_get, getter=lambda x: x[0].split('.')[0].replace('0', '').lower())
+ vcodec, acodec = sanitize_codec(vcodecs), sanitize_codec(acodecs)
+
+ for ext in preferences or COMPATIBLE_CODECS.keys():
+ codec_set = COMPATIBLE_CODECS.get(ext, set())
+ if ext == 'mkv' or codec_set.issuperset((vcodec, acodec)):
+ return ext
+
+ COMPATIBLE_EXTS = (
+ {'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma', 'mov'},
+ {'webm', 'weba'},
+ )
+ for ext in preferences or vexts:
+ current_exts = {ext, *vexts, *aexts}
+ if ext == 'mkv' or current_exts == {ext} or any(
+ ext_sets.issuperset(current_exts) for ext_sets in COMPATIBLE_EXTS):
+ return ext
+ return 'mkv' if allow_mkv else preferences[-1]
+
+
+def urlhandle_detect_ext(url_handle, default=NO_DEFAULT):
+ getheader = url_handle.headers.get
+
+ cd = getheader('Content-Disposition')
+ if cd:
+ m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
+ if m:
+ e = determine_ext(m.group('filename'), default_ext=None)
+ if e:
+ return e
+
+ meta_ext = getheader('x-amz-meta-name')
+ if meta_ext:
+ e = meta_ext.rpartition('.')[2]
+ if e:
+ return e
+
+ return mimetype2ext(getheader('Content-Type'), default=default)
+
+
+def encode_data_uri(data, mime_type):
+ return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
+
+
+def age_restricted(content_limit, age_limit):
+ """ Returns True iff the content should be blocked """
+
+ if age_limit is None: # No limit set
+ return False
+ if content_limit is None:
+ return False # Content available for everyone
+ return age_limit < content_limit
+
+
+# List of known byte-order-marks (BOM)
+BOMS = [
+ (b'\xef\xbb\xbf', 'utf-8'),
+ (b'\x00\x00\xfe\xff', 'utf-32-be'),
+ (b'\xff\xfe\x00\x00', 'utf-32-le'),
+ (b'\xff\xfe', 'utf-16-le'),
+ (b'\xfe\xff', 'utf-16-be'),
+]
+
+
+def is_html(first_bytes):
+ """ Detect whether a file contains HTML by examining its first bytes. """
+
+ encoding = 'utf-8'
+ for bom, enc in BOMS:
+ while first_bytes.startswith(bom):
+ encoding, first_bytes = enc, first_bytes[len(bom):]
+
+ return re.match(r'^\s*<', first_bytes.decode(encoding, 'replace'))
+
+
+def determine_protocol(info_dict):
+ protocol = info_dict.get('protocol')
+ if protocol is not None:
+ return protocol
+
+ url = sanitize_url(info_dict['url'])
+ if url.startswith('rtmp'):
+ return 'rtmp'
+ elif url.startswith('mms'):
+ return 'mms'
+ elif url.startswith('rtsp'):
+ return 'rtsp'
+
+ ext = determine_ext(url)
+ if ext == 'm3u8':
+ return 'm3u8' if info_dict.get('is_live') else 'm3u8_native'
+ elif ext == 'f4m':
+ return 'f4m'
+
+ return urllib.parse.urlparse(url).scheme
+
+
+def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False):
+ """ Render a list of rows, each as a list of values.
+ Text after a \t will be right aligned """
+ def width(string):
+ return len(remove_terminal_sequences(string).replace('\t', ''))
+
+ def get_max_lens(table):
+ return [max(width(str(v)) for v in col) for col in zip(*table)]
+
+ def filter_using_list(row, filterArray):
+ return [col for take, col in itertools.zip_longest(filterArray, row, fillvalue=True) if take]
+
+ max_lens = get_max_lens(data) if hide_empty else []
+ header_row = filter_using_list(header_row, max_lens)
+ data = [filter_using_list(row, max_lens) for row in data]
+
+ table = [header_row] + data
+ max_lens = get_max_lens(table)
+ extra_gap += 1
+ if delim:
+ table = [header_row, [delim * (ml + extra_gap) for ml in max_lens]] + data
+ table[1][-1] = table[1][-1][:-extra_gap * len(delim)] # Remove extra_gap from end of delimiter
+ for row in table:
+ for pos, text in enumerate(map(str, row)):
+ if '\t' in text:
+ row[pos] = text.replace('\t', ' ' * (max_lens[pos] - width(text))) + ' ' * extra_gap
+ else:
+ row[pos] = text + ' ' * (max_lens[pos] - width(text) + extra_gap)
+ ret = '\n'.join(''.join(row).rstrip() for row in table)
+ return ret
+
+
+def _match_one(filter_part, dct, incomplete):
+ # TODO: Generalize code with YoutubeDL._build_format_filter
+ STRING_OPERATORS = {
+ '*=': operator.contains,
+ '^=': lambda attr, value: attr.startswith(value),
+ '$=': lambda attr, value: attr.endswith(value),
+ '~=': lambda attr, value: re.search(value, attr),
+ }
+ COMPARISON_OPERATORS = {
+ **STRING_OPERATORS,
+ '<=': operator.le, # "<=" must be defined above "<"
+ '<': operator.lt,
+ '>=': operator.ge,
+ '>': operator.gt,
+ '=': operator.eq,
+ }
+
+ if isinstance(incomplete, bool):
+ is_incomplete = lambda _: incomplete
+ else:
+ is_incomplete = lambda k: k in incomplete
+
+ operator_rex = re.compile(r'''(?x)
+ (?P<key>[a-z_]+)
+ \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
+ (?:
+ (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
+ (?P<strval>.+?)
+ )
+ ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
+ m = operator_rex.fullmatch(filter_part.strip())
+ if m:
+ m = m.groupdict()
+ unnegated_op = COMPARISON_OPERATORS[m['op']]
+ if m['negation']:
+ op = lambda attr, value: not unnegated_op(attr, value)
+ else:
+ op = unnegated_op
+ comparison_value = m['quotedstrval'] or m['strval'] or m['intval']
+ if m['quote']:
+ comparison_value = comparison_value.replace(r'\%s' % m['quote'], m['quote'])
+ actual_value = dct.get(m['key'])
+ numeric_comparison = None
+ if isinstance(actual_value, (int, float)):
+ # If the original field is a string and matching comparisonvalue is
+ # a number we should respect the origin of the original field
+ # and process comparison value as a string (see
+ # https://github.com/ytdl-org/youtube-dl/issues/11082)
+ try:
+ numeric_comparison = int(comparison_value)
+ except ValueError:
+ numeric_comparison = parse_filesize(comparison_value)
+ if numeric_comparison is None:
+ numeric_comparison = parse_filesize(f'{comparison_value}B')
+ if numeric_comparison is None:
+ numeric_comparison = parse_duration(comparison_value)
+ if numeric_comparison is not None and m['op'] in STRING_OPERATORS:
+ raise ValueError('Operator %s only supports string values!' % m['op'])
+ if actual_value is None:
+ return is_incomplete(m['key']) or m['none_inclusive']
+ return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
+
+ UNARY_OPERATORS = {
+ '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
+ '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
+ }
+ operator_rex = re.compile(r'''(?x)
+ (?P<op>%s)\s*(?P<key>[a-z_]+)
+ ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
+ m = operator_rex.fullmatch(filter_part.strip())
+ if m:
+ op = UNARY_OPERATORS[m.group('op')]
+ actual_value = dct.get(m.group('key'))
+ if is_incomplete(m.group('key')) and actual_value is None:
+ return True
+ return op(actual_value)
+
+ raise ValueError('Invalid filter part %r' % filter_part)
+
+
+def match_str(filter_str, dct, incomplete=False):
+ """ Filter a dictionary with a simple string syntax.
+ @returns Whether the filter passes
+ @param incomplete Set of keys that is expected to be missing from dct.
+ Can be True/False to indicate all/none of the keys may be missing.
+ All conditions on incomplete keys pass if the key is missing
+ """
+ return all(
+ _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
+ for filter_part in re.split(r'(?<!\\)&', filter_str))
+
+
+def match_filter_func(filters, breaking_filters=None):
+ if not filters and not breaking_filters:
+ return None
+ repr_ = f'{match_filter_func.__module__}.{match_filter_func.__qualname__}({filters}, {breaking_filters})'
+
+ breaking_filters = match_filter_func(breaking_filters) or (lambda _, __: None)
+ filters = set(variadic(filters or []))
+
+ interactive = '-' in filters
+ if interactive:
+ filters.remove('-')
+
+ @function_with_repr.set_repr(repr_)
+ def _match_func(info_dict, incomplete=False):
+ ret = breaking_filters(info_dict, incomplete)
+ if ret is not None:
+ raise RejectedVideoReached(ret)
+
+ if not filters or any(match_str(f, info_dict, incomplete) for f in filters):
+ return NO_DEFAULT if interactive and not incomplete else None
+ else:
+ video_title = info_dict.get('title') or info_dict.get('id') or 'entry'
+ filter_str = ') | ('.join(map(str.strip, filters))
+ return f'{video_title} does not pass filter ({filter_str}), skipping ..'
+ return _match_func
+
+
+class download_range_func:
+ def __init__(self, chapters, ranges, from_info=False):
+ self.chapters, self.ranges, self.from_info = chapters, ranges, from_info
+
+ def __call__(self, info_dict, ydl):
+
+ warning = ('There are no chapters matching the regex' if info_dict.get('chapters')
+ else 'Cannot match chapters since chapter information is unavailable')
+ for regex in self.chapters or []:
+ for i, chapter in enumerate(info_dict.get('chapters') or []):
+ if re.search(regex, chapter['title']):
+ warning = None
+ yield {**chapter, 'index': i}
+ if self.chapters and warning:
+ ydl.to_screen(f'[info] {info_dict["id"]}: {warning}')
+
+ for start, end in self.ranges or []:
+ yield {
+ 'start_time': self._handle_negative_timestamp(start, info_dict),
+ 'end_time': self._handle_negative_timestamp(end, info_dict),
+ }
+
+ if self.from_info and (info_dict.get('start_time') or info_dict.get('end_time')):
+ yield {
+ 'start_time': info_dict.get('start_time') or 0,
+ 'end_time': info_dict.get('end_time') or float('inf'),
+ }
+ elif not self.ranges and not self.chapters:
+ yield {}
+
+ @staticmethod
+ def _handle_negative_timestamp(time, info):
+ return max(info['duration'] + time, 0) if info.get('duration') and time < 0 else time
+
+ def __eq__(self, other):
+ return (isinstance(other, download_range_func)
+ and self.chapters == other.chapters and self.ranges == other.ranges)
+
+ def __repr__(self):
+ return f'{__name__}.{type(self).__name__}({self.chapters}, {self.ranges})'
+
+
+def parse_dfxp_time_expr(time_expr):
+ if not time_expr:
+ return
+
+ mobj = re.match(rf'^(?P<time_offset>{NUMBER_RE})s?$', time_expr)
+ if mobj:
+ return float(mobj.group('time_offset'))
+
+ mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
+ if mobj:
+ return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
+
+
+def srt_subtitles_timecode(seconds):
+ return '%02d:%02d:%02d,%03d' % timetuple_from_msec(seconds * 1000)
+
+
+def ass_subtitles_timecode(seconds):
+ time = timetuple_from_msec(seconds * 1000)
+ return '%01d:%02d:%02d.%02d' % (*time[:-1], time.milliseconds / 10)
+
+
+def dfxp2srt(dfxp_data):
+ '''
+ @param dfxp_data A bytes-like object containing DFXP data
+ @returns A unicode object containing converted SRT data
+ '''
+ LEGACY_NAMESPACES = (
+ (b'http://www.w3.org/ns/ttml', [
+ b'http://www.w3.org/2004/11/ttaf1',
+ b'http://www.w3.org/2006/04/ttaf1',
+ b'http://www.w3.org/2006/10/ttaf1',
+ ]),
+ (b'http://www.w3.org/ns/ttml#styling', [
+ b'http://www.w3.org/ns/ttml#style',
+ ]),
+ )
+
+ SUPPORTED_STYLING = [
+ 'color',
+ 'fontFamily',
+ 'fontSize',
+ 'fontStyle',
+ 'fontWeight',
+ 'textDecoration'
+ ]
+
+ _x = functools.partial(xpath_with_ns, ns_map={
+ 'xml': 'http://www.w3.org/XML/1998/namespace',
+ 'ttml': 'http://www.w3.org/ns/ttml',
+ 'tts': 'http://www.w3.org/ns/ttml#styling',
+ })
+
+ styles = {}
+ default_style = {}
+
+ class TTMLPElementParser:
+ _out = ''
+ _unclosed_elements = []
+ _applied_styles = []
+
+ def start(self, tag, attrib):
+ if tag in (_x('ttml:br'), 'br'):
+ self._out += '\n'
+ else:
+ unclosed_elements = []
+ style = {}
+ element_style_id = attrib.get('style')
+ if default_style:
+ style.update(default_style)
+ if element_style_id:
+ style.update(styles.get(element_style_id, {}))
+ for prop in SUPPORTED_STYLING:
+ prop_val = attrib.get(_x('tts:' + prop))
+ if prop_val:
+ style[prop] = prop_val
+ if style:
+ font = ''
+ for k, v in sorted(style.items()):
+ if self._applied_styles and self._applied_styles[-1].get(k) == v:
+ continue
+ if k == 'color':
+ font += ' color="%s"' % v
+ elif k == 'fontSize':
+ font += ' size="%s"' % v
+ elif k == 'fontFamily':
+ font += ' face="%s"' % v
+ elif k == 'fontWeight' and v == 'bold':
+ self._out += '<b>'
+ unclosed_elements.append('b')
+ elif k == 'fontStyle' and v == 'italic':
+ self._out += '<i>'
+ unclosed_elements.append('i')
+ elif k == 'textDecoration' and v == 'underline':
+ self._out += '<u>'
+ unclosed_elements.append('u')
+ if font:
+ self._out += '<font' + font + '>'
+ unclosed_elements.append('font')
+ applied_style = {}
+ if self._applied_styles:
+ applied_style.update(self._applied_styles[-1])
+ applied_style.update(style)
+ self._applied_styles.append(applied_style)
+ self._unclosed_elements.append(unclosed_elements)
+
+ def end(self, tag):
+ if tag not in (_x('ttml:br'), 'br'):
+ unclosed_elements = self._unclosed_elements.pop()
+ for element in reversed(unclosed_elements):
+ self._out += '</%s>' % element
+ if unclosed_elements and self._applied_styles:
+ self._applied_styles.pop()
+
+ def data(self, data):
+ self._out += data
+
+ def close(self):
+ return self._out.strip()
+
+ # Fix UTF-8 encoded file wrongly marked as UTF-16. See https://github.com/yt-dlp/yt-dlp/issues/6543#issuecomment-1477169870
+ # This will not trigger false positives since only UTF-8 text is being replaced
+ dfxp_data = dfxp_data.replace(b'encoding=\'UTF-16\'', b'encoding=\'UTF-8\'')
+
+ def parse_node(node):
+ target = TTMLPElementParser()
+ parser = xml.etree.ElementTree.XMLParser(target=target)
+ parser.feed(xml.etree.ElementTree.tostring(node))
+ return parser.close()
+
+ for k, v in LEGACY_NAMESPACES:
+ for ns in v:
+ dfxp_data = dfxp_data.replace(ns, k)
+
+ dfxp = compat_etree_fromstring(dfxp_data)
+ out = []
+ paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
+
+ if not paras:
+ raise ValueError('Invalid dfxp/TTML subtitle')
+
+ repeat = False
+ while True:
+ for style in dfxp.findall(_x('.//ttml:style')):
+ style_id = style.get('id') or style.get(_x('xml:id'))
+ if not style_id:
+ continue
+ parent_style_id = style.get('style')
+ if parent_style_id:
+ if parent_style_id not in styles:
+ repeat = True
+ continue
+ styles[style_id] = styles[parent_style_id].copy()
+ for prop in SUPPORTED_STYLING:
+ prop_val = style.get(_x('tts:' + prop))
+ if prop_val:
+ styles.setdefault(style_id, {})[prop] = prop_val
+ if repeat:
+ repeat = False
+ else:
+ break
+
+ for p in ('body', 'div'):
+ ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
+ if ele is None:
+ continue
+ style = styles.get(ele.get('style'))
+ if not style:
+ continue
+ default_style.update(style)
+
+ for para, index in zip(paras, itertools.count(1)):
+ begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
+ end_time = parse_dfxp_time_expr(para.attrib.get('end'))
+ dur = parse_dfxp_time_expr(para.attrib.get('dur'))
+ if begin_time is None:
+ continue
+ if not end_time:
+ if not dur:
+ continue
+ end_time = begin_time + dur
+ out.append('%d\n%s --> %s\n%s\n\n' % (
+ index,
+ srt_subtitles_timecode(begin_time),
+ srt_subtitles_timecode(end_time),
+ parse_node(para)))
+
+ return ''.join(out)
+
+
+def cli_option(params, command_option, param, separator=None):
+ param = params.get(param)
+ return ([] if param is None
+ else [command_option, str(param)] if separator is None
+ else [f'{command_option}{separator}{param}'])
+
+
+def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
+ param = params.get(param)
+ assert param in (True, False, None)
+ return cli_option({True: true_value, False: false_value}, command_option, param, separator)
+
+
+def cli_valueless_option(params, command_option, param, expected_value=True):
+ return [command_option] if params.get(param) == expected_value else []
+
+
+def cli_configuration_args(argdict, keys, default=[], use_compat=True):
+ if isinstance(argdict, (list, tuple)): # for backward compatibility
+ if use_compat:
+ return argdict
+ else:
+ argdict = None
+ if argdict is None:
+ return default
+ assert isinstance(argdict, dict)
+
+ assert isinstance(keys, (list, tuple))
+ for key_list in keys:
+ arg_list = list(filter(
+ lambda x: x is not None,
+ [argdict.get(key.lower()) for key in variadic(key_list)]))
+ if arg_list:
+ return [arg for args in arg_list for arg in args]
+ return default
+
+
+def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
+ main_key, exe = main_key.lower(), exe.lower()
+ root_key = exe if main_key == exe else f'{main_key}+{exe}'
+ keys = [f'{root_key}{k}' for k in (keys or [''])]
+ if root_key in keys:
+ if main_key != exe:
+ keys.append((main_key, exe))
+ keys.append('default')
+ else:
+ use_compat = False
+ return cli_configuration_args(argdict, keys, default, use_compat)
+
+
+class ISO639Utils:
+ # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
+ _lang_map = {
+ 'aa': 'aar',
+ 'ab': 'abk',
+ 'ae': 'ave',
+ 'af': 'afr',
+ 'ak': 'aka',
+ 'am': 'amh',
+ 'an': 'arg',
+ 'ar': 'ara',
+ 'as': 'asm',
+ 'av': 'ava',
+ 'ay': 'aym',
+ 'az': 'aze',
+ 'ba': 'bak',
+ 'be': 'bel',
+ 'bg': 'bul',
+ 'bh': 'bih',
+ 'bi': 'bis',
+ 'bm': 'bam',
+ 'bn': 'ben',
+ 'bo': 'bod',
+ 'br': 'bre',
+ 'bs': 'bos',
+ 'ca': 'cat',
+ 'ce': 'che',
+ 'ch': 'cha',
+ 'co': 'cos',
+ 'cr': 'cre',
+ 'cs': 'ces',
+ 'cu': 'chu',
+ 'cv': 'chv',
+ 'cy': 'cym',
+ 'da': 'dan',
+ 'de': 'deu',
+ 'dv': 'div',
+ 'dz': 'dzo',
+ 'ee': 'ewe',
+ 'el': 'ell',
+ 'en': 'eng',
+ 'eo': 'epo',
+ 'es': 'spa',
+ 'et': 'est',
+ 'eu': 'eus',
+ 'fa': 'fas',
+ 'ff': 'ful',
+ 'fi': 'fin',
+ 'fj': 'fij',
+ 'fo': 'fao',
+ 'fr': 'fra',
+ 'fy': 'fry',
+ 'ga': 'gle',
+ 'gd': 'gla',
+ 'gl': 'glg',
+ 'gn': 'grn',
+ 'gu': 'guj',
+ 'gv': 'glv',
+ 'ha': 'hau',
+ 'he': 'heb',
+ 'iw': 'heb', # Replaced by he in 1989 revision
+ 'hi': 'hin',
+ 'ho': 'hmo',
+ 'hr': 'hrv',
+ 'ht': 'hat',
+ 'hu': 'hun',
+ 'hy': 'hye',
+ 'hz': 'her',
+ 'ia': 'ina',
+ 'id': 'ind',
+ 'in': 'ind', # Replaced by id in 1989 revision
+ 'ie': 'ile',
+ 'ig': 'ibo',
+ 'ii': 'iii',
+ 'ik': 'ipk',
+ 'io': 'ido',
+ 'is': 'isl',
+ 'it': 'ita',
+ 'iu': 'iku',
+ 'ja': 'jpn',
+ 'jv': 'jav',
+ 'ka': 'kat',
+ 'kg': 'kon',
+ 'ki': 'kik',
+ 'kj': 'kua',
+ 'kk': 'kaz',
+ 'kl': 'kal',
+ 'km': 'khm',
+ 'kn': 'kan',
+ 'ko': 'kor',
+ 'kr': 'kau',
+ 'ks': 'kas',
+ 'ku': 'kur',
+ 'kv': 'kom',
+ 'kw': 'cor',
+ 'ky': 'kir',
+ 'la': 'lat',
+ 'lb': 'ltz',
+ 'lg': 'lug',
+ 'li': 'lim',
+ 'ln': 'lin',
+ 'lo': 'lao',
+ 'lt': 'lit',
+ 'lu': 'lub',
+ 'lv': 'lav',
+ 'mg': 'mlg',
+ 'mh': 'mah',
+ 'mi': 'mri',
+ 'mk': 'mkd',
+ 'ml': 'mal',
+ 'mn': 'mon',
+ 'mr': 'mar',
+ 'ms': 'msa',
+ 'mt': 'mlt',
+ 'my': 'mya',
+ 'na': 'nau',
+ 'nb': 'nob',
+ 'nd': 'nde',
+ 'ne': 'nep',
+ 'ng': 'ndo',
+ 'nl': 'nld',
+ 'nn': 'nno',
+ 'no': 'nor',
+ 'nr': 'nbl',
+ 'nv': 'nav',
+ 'ny': 'nya',
+ 'oc': 'oci',
+ 'oj': 'oji',
+ 'om': 'orm',
+ 'or': 'ori',
+ 'os': 'oss',
+ 'pa': 'pan',
+ 'pe': 'per',
+ 'pi': 'pli',
+ 'pl': 'pol',
+ 'ps': 'pus',
+ 'pt': 'por',
+ 'qu': 'que',
+ 'rm': 'roh',
+ 'rn': 'run',
+ 'ro': 'ron',
+ 'ru': 'rus',
+ 'rw': 'kin',
+ 'sa': 'san',
+ 'sc': 'srd',
+ 'sd': 'snd',
+ 'se': 'sme',
+ 'sg': 'sag',
+ 'si': 'sin',
+ 'sk': 'slk',
+ 'sl': 'slv',
+ 'sm': 'smo',
+ 'sn': 'sna',
+ 'so': 'som',
+ 'sq': 'sqi',
+ 'sr': 'srp',
+ 'ss': 'ssw',
+ 'st': 'sot',
+ 'su': 'sun',
+ 'sv': 'swe',
+ 'sw': 'swa',
+ 'ta': 'tam',
+ 'te': 'tel',
+ 'tg': 'tgk',
+ 'th': 'tha',
+ 'ti': 'tir',
+ 'tk': 'tuk',
+ 'tl': 'tgl',
+ 'tn': 'tsn',
+ 'to': 'ton',
+ 'tr': 'tur',
+ 'ts': 'tso',
+ 'tt': 'tat',
+ 'tw': 'twi',
+ 'ty': 'tah',
+ 'ug': 'uig',
+ 'uk': 'ukr',
+ 'ur': 'urd',
+ 'uz': 'uzb',
+ 've': 'ven',
+ 'vi': 'vie',
+ 'vo': 'vol',
+ 'wa': 'wln',
+ 'wo': 'wol',
+ 'xh': 'xho',
+ 'yi': 'yid',
+ 'ji': 'yid', # Replaced by yi in 1989 revision
+ 'yo': 'yor',
+ 'za': 'zha',
+ 'zh': 'zho',
+ 'zu': 'zul',
+ }
+
+ @classmethod
+ def short2long(cls, code):
+ """Convert language code from ISO 639-1 to ISO 639-2/T"""
+ return cls._lang_map.get(code[:2])
+
+ @classmethod
+ def long2short(cls, code):
+ """Convert language code from ISO 639-2/T to ISO 639-1"""
+ for short_name, long_name in cls._lang_map.items():
+ if long_name == code:
+ return short_name
+
+
+class ISO3166Utils:
+ # From http://data.okfn.org/data/core/country-list
+ _country_map = {
+ 'AF': 'Afghanistan',
+ 'AX': 'Åland Islands',
+ 'AL': 'Albania',
+ 'DZ': 'Algeria',
+ 'AS': 'American Samoa',
+ 'AD': 'Andorra',
+ 'AO': 'Angola',
+ 'AI': 'Anguilla',
+ 'AQ': 'Antarctica',
+ 'AG': 'Antigua and Barbuda',
+ 'AR': 'Argentina',
+ 'AM': 'Armenia',
+ 'AW': 'Aruba',
+ 'AU': 'Australia',
+ 'AT': 'Austria',
+ 'AZ': 'Azerbaijan',
+ 'BS': 'Bahamas',
+ 'BH': 'Bahrain',
+ 'BD': 'Bangladesh',
+ 'BB': 'Barbados',
+ 'BY': 'Belarus',
+ 'BE': 'Belgium',
+ 'BZ': 'Belize',
+ 'BJ': 'Benin',
+ 'BM': 'Bermuda',
+ 'BT': 'Bhutan',
+ 'BO': 'Bolivia, Plurinational State of',
+ 'BQ': 'Bonaire, Sint Eustatius and Saba',
+ 'BA': 'Bosnia and Herzegovina',
+ 'BW': 'Botswana',
+ 'BV': 'Bouvet Island',
+ 'BR': 'Brazil',
+ 'IO': 'British Indian Ocean Territory',
+ 'BN': 'Brunei Darussalam',
+ 'BG': 'Bulgaria',
+ 'BF': 'Burkina Faso',
+ 'BI': 'Burundi',
+ 'KH': 'Cambodia',
+ 'CM': 'Cameroon',
+ 'CA': 'Canada',
+ 'CV': 'Cape Verde',
+ 'KY': 'Cayman Islands',
+ 'CF': 'Central African Republic',
+ 'TD': 'Chad',
+ 'CL': 'Chile',
+ 'CN': 'China',
+ 'CX': 'Christmas Island',
+ 'CC': 'Cocos (Keeling) Islands',
+ 'CO': 'Colombia',
+ 'KM': 'Comoros',
+ 'CG': 'Congo',
+ 'CD': 'Congo, the Democratic Republic of the',
+ 'CK': 'Cook Islands',
+ 'CR': 'Costa Rica',
+ 'CI': 'Côte d\'Ivoire',
+ 'HR': 'Croatia',
+ 'CU': 'Cuba',
+ 'CW': 'Curaçao',
+ 'CY': 'Cyprus',
+ 'CZ': 'Czech Republic',
+ 'DK': 'Denmark',
+ 'DJ': 'Djibouti',
+ 'DM': 'Dominica',
+ 'DO': 'Dominican Republic',
+ 'EC': 'Ecuador',
+ 'EG': 'Egypt',
+ 'SV': 'El Salvador',
+ 'GQ': 'Equatorial Guinea',
+ 'ER': 'Eritrea',
+ 'EE': 'Estonia',
+ 'ET': 'Ethiopia',
+ 'FK': 'Falkland Islands (Malvinas)',
+ 'FO': 'Faroe Islands',
+ 'FJ': 'Fiji',
+ 'FI': 'Finland',
+ 'FR': 'France',
+ 'GF': 'French Guiana',
+ 'PF': 'French Polynesia',
+ 'TF': 'French Southern Territories',
+ 'GA': 'Gabon',
+ 'GM': 'Gambia',
+ 'GE': 'Georgia',
+ 'DE': 'Germany',
+ 'GH': 'Ghana',
+ 'GI': 'Gibraltar',
+ 'GR': 'Greece',
+ 'GL': 'Greenland',
+ 'GD': 'Grenada',
+ 'GP': 'Guadeloupe',
+ 'GU': 'Guam',
+ 'GT': 'Guatemala',
+ 'GG': 'Guernsey',
+ 'GN': 'Guinea',
+ 'GW': 'Guinea-Bissau',
+ 'GY': 'Guyana',
+ 'HT': 'Haiti',
+ 'HM': 'Heard Island and McDonald Islands',
+ 'VA': 'Holy See (Vatican City State)',
+ 'HN': 'Honduras',
+ 'HK': 'Hong Kong',
+ 'HU': 'Hungary',
+ 'IS': 'Iceland',
+ 'IN': 'India',
+ 'ID': 'Indonesia',
+ 'IR': 'Iran, Islamic Republic of',
+ 'IQ': 'Iraq',
+ 'IE': 'Ireland',
+ 'IM': 'Isle of Man',
+ 'IL': 'Israel',
+ 'IT': 'Italy',
+ 'JM': 'Jamaica',
+ 'JP': 'Japan',
+ 'JE': 'Jersey',
+ 'JO': 'Jordan',
+ 'KZ': 'Kazakhstan',
+ 'KE': 'Kenya',
+ 'KI': 'Kiribati',
+ 'KP': 'Korea, Democratic People\'s Republic of',
+ 'KR': 'Korea, Republic of',
+ 'KW': 'Kuwait',
+ 'KG': 'Kyrgyzstan',
+ 'LA': 'Lao People\'s Democratic Republic',
+ 'LV': 'Latvia',
+ 'LB': 'Lebanon',
+ 'LS': 'Lesotho',
+ 'LR': 'Liberia',
+ 'LY': 'Libya',
+ 'LI': 'Liechtenstein',
+ 'LT': 'Lithuania',
+ 'LU': 'Luxembourg',
+ 'MO': 'Macao',
+ 'MK': 'Macedonia, the Former Yugoslav Republic of',
+ 'MG': 'Madagascar',
+ 'MW': 'Malawi',
+ 'MY': 'Malaysia',
+ 'MV': 'Maldives',
+ 'ML': 'Mali',
+ 'MT': 'Malta',
+ 'MH': 'Marshall Islands',
+ 'MQ': 'Martinique',
+ 'MR': 'Mauritania',
+ 'MU': 'Mauritius',
+ 'YT': 'Mayotte',
+ 'MX': 'Mexico',
+ 'FM': 'Micronesia, Federated States of',
+ 'MD': 'Moldova, Republic of',
+ 'MC': 'Monaco',
+ 'MN': 'Mongolia',
+ 'ME': 'Montenegro',
+ 'MS': 'Montserrat',
+ 'MA': 'Morocco',
+ 'MZ': 'Mozambique',
+ 'MM': 'Myanmar',
+ 'NA': 'Namibia',
+ 'NR': 'Nauru',
+ 'NP': 'Nepal',
+ 'NL': 'Netherlands',
+ 'NC': 'New Caledonia',
+ 'NZ': 'New Zealand',
+ 'NI': 'Nicaragua',
+ 'NE': 'Niger',
+ 'NG': 'Nigeria',
+ 'NU': 'Niue',
+ 'NF': 'Norfolk Island',
+ 'MP': 'Northern Mariana Islands',
+ 'NO': 'Norway',
+ 'OM': 'Oman',
+ 'PK': 'Pakistan',
+ 'PW': 'Palau',
+ 'PS': 'Palestine, State of',
+ 'PA': 'Panama',
+ 'PG': 'Papua New Guinea',
+ 'PY': 'Paraguay',
+ 'PE': 'Peru',
+ 'PH': 'Philippines',
+ 'PN': 'Pitcairn',
+ 'PL': 'Poland',
+ 'PT': 'Portugal',
+ 'PR': 'Puerto Rico',
+ 'QA': 'Qatar',
+ 'RE': 'Réunion',
+ 'RO': 'Romania',
+ 'RU': 'Russian Federation',
+ 'RW': 'Rwanda',
+ 'BL': 'Saint Barthélemy',
+ 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
+ 'KN': 'Saint Kitts and Nevis',
+ 'LC': 'Saint Lucia',
+ 'MF': 'Saint Martin (French part)',
+ 'PM': 'Saint Pierre and Miquelon',
+ 'VC': 'Saint Vincent and the Grenadines',
+ 'WS': 'Samoa',
+ 'SM': 'San Marino',
+ 'ST': 'Sao Tome and Principe',
+ 'SA': 'Saudi Arabia',
+ 'SN': 'Senegal',
+ 'RS': 'Serbia',
+ 'SC': 'Seychelles',
+ 'SL': 'Sierra Leone',
+ 'SG': 'Singapore',
+ 'SX': 'Sint Maarten (Dutch part)',
+ 'SK': 'Slovakia',
+ 'SI': 'Slovenia',
+ 'SB': 'Solomon Islands',
+ 'SO': 'Somalia',
+ 'ZA': 'South Africa',
+ 'GS': 'South Georgia and the South Sandwich Islands',
+ 'SS': 'South Sudan',
+ 'ES': 'Spain',
+ 'LK': 'Sri Lanka',
+ 'SD': 'Sudan',
+ 'SR': 'Suriname',
+ 'SJ': 'Svalbard and Jan Mayen',
+ 'SZ': 'Swaziland',
+ 'SE': 'Sweden',
+ 'CH': 'Switzerland',
+ 'SY': 'Syrian Arab Republic',
+ 'TW': 'Taiwan, Province of China',
+ 'TJ': 'Tajikistan',
+ 'TZ': 'Tanzania, United Republic of',
+ 'TH': 'Thailand',
+ 'TL': 'Timor-Leste',
+ 'TG': 'Togo',
+ 'TK': 'Tokelau',
+ 'TO': 'Tonga',
+ 'TT': 'Trinidad and Tobago',
+ 'TN': 'Tunisia',
+ 'TR': 'Turkey',
+ 'TM': 'Turkmenistan',
+ 'TC': 'Turks and Caicos Islands',
+ 'TV': 'Tuvalu',
+ 'UG': 'Uganda',
+ 'UA': 'Ukraine',
+ 'AE': 'United Arab Emirates',
+ 'GB': 'United Kingdom',
+ 'US': 'United States',
+ 'UM': 'United States Minor Outlying Islands',
+ 'UY': 'Uruguay',
+ 'UZ': 'Uzbekistan',
+ 'VU': 'Vanuatu',
+ 'VE': 'Venezuela, Bolivarian Republic of',
+ 'VN': 'Viet Nam',
+ 'VG': 'Virgin Islands, British',
+ 'VI': 'Virgin Islands, U.S.',
+ 'WF': 'Wallis and Futuna',
+ 'EH': 'Western Sahara',
+ 'YE': 'Yemen',
+ 'ZM': 'Zambia',
+ 'ZW': 'Zimbabwe',
+ # Not ISO 3166 codes, but used for IP blocks
+ 'AP': 'Asia/Pacific Region',
+ 'EU': 'Europe',
+ }
+
+ @classmethod
+ def short2full(cls, code):
+ """Convert an ISO 3166-2 country code to the corresponding full name"""
+ return cls._country_map.get(code.upper())
+
+
+class GeoUtils:
+ # Major IPv4 address blocks per country
+ _country_ip_map = {
+ 'AD': '46.172.224.0/19',
+ 'AE': '94.200.0.0/13',
+ 'AF': '149.54.0.0/17',
+ 'AG': '209.59.64.0/18',
+ 'AI': '204.14.248.0/21',
+ 'AL': '46.99.0.0/16',
+ 'AM': '46.70.0.0/15',
+ 'AO': '105.168.0.0/13',
+ 'AP': '182.50.184.0/21',
+ 'AQ': '23.154.160.0/24',
+ 'AR': '181.0.0.0/12',
+ 'AS': '202.70.112.0/20',
+ 'AT': '77.116.0.0/14',
+ 'AU': '1.128.0.0/11',
+ 'AW': '181.41.0.0/18',
+ 'AX': '185.217.4.0/22',
+ 'AZ': '5.197.0.0/16',
+ 'BA': '31.176.128.0/17',
+ 'BB': '65.48.128.0/17',
+ 'BD': '114.130.0.0/16',
+ 'BE': '57.0.0.0/8',
+ 'BF': '102.178.0.0/15',
+ 'BG': '95.42.0.0/15',
+ 'BH': '37.131.0.0/17',
+ 'BI': '154.117.192.0/18',
+ 'BJ': '137.255.0.0/16',
+ 'BL': '185.212.72.0/23',
+ 'BM': '196.12.64.0/18',
+ 'BN': '156.31.0.0/16',
+ 'BO': '161.56.0.0/16',
+ 'BQ': '161.0.80.0/20',
+ 'BR': '191.128.0.0/12',
+ 'BS': '24.51.64.0/18',
+ 'BT': '119.2.96.0/19',
+ 'BW': '168.167.0.0/16',
+ 'BY': '178.120.0.0/13',
+ 'BZ': '179.42.192.0/18',
+ 'CA': '99.224.0.0/11',
+ 'CD': '41.243.0.0/16',
+ 'CF': '197.242.176.0/21',
+ 'CG': '160.113.0.0/16',
+ 'CH': '85.0.0.0/13',
+ 'CI': '102.136.0.0/14',
+ 'CK': '202.65.32.0/19',
+ 'CL': '152.172.0.0/14',
+ 'CM': '102.244.0.0/14',
+ 'CN': '36.128.0.0/10',
+ 'CO': '181.240.0.0/12',
+ 'CR': '201.192.0.0/12',
+ 'CU': '152.206.0.0/15',
+ 'CV': '165.90.96.0/19',
+ 'CW': '190.88.128.0/17',
+ 'CY': '31.153.0.0/16',
+ 'CZ': '88.100.0.0/14',
+ 'DE': '53.0.0.0/8',
+ 'DJ': '197.241.0.0/17',
+ 'DK': '87.48.0.0/12',
+ 'DM': '192.243.48.0/20',
+ 'DO': '152.166.0.0/15',
+ 'DZ': '41.96.0.0/12',
+ 'EC': '186.68.0.0/15',
+ 'EE': '90.190.0.0/15',
+ 'EG': '156.160.0.0/11',
+ 'ER': '196.200.96.0/20',
+ 'ES': '88.0.0.0/11',
+ 'ET': '196.188.0.0/14',
+ 'EU': '2.16.0.0/13',
+ 'FI': '91.152.0.0/13',
+ 'FJ': '144.120.0.0/16',
+ 'FK': '80.73.208.0/21',
+ 'FM': '119.252.112.0/20',
+ 'FO': '88.85.32.0/19',
+ 'FR': '90.0.0.0/9',
+ 'GA': '41.158.0.0/15',
+ 'GB': '25.0.0.0/8',
+ 'GD': '74.122.88.0/21',
+ 'GE': '31.146.0.0/16',
+ 'GF': '161.22.64.0/18',
+ 'GG': '62.68.160.0/19',
+ 'GH': '154.160.0.0/12',
+ 'GI': '95.164.0.0/16',
+ 'GL': '88.83.0.0/19',
+ 'GM': '160.182.0.0/15',
+ 'GN': '197.149.192.0/18',
+ 'GP': '104.250.0.0/19',
+ 'GQ': '105.235.224.0/20',
+ 'GR': '94.64.0.0/13',
+ 'GT': '168.234.0.0/16',
+ 'GU': '168.123.0.0/16',
+ 'GW': '197.214.80.0/20',
+ 'GY': '181.41.64.0/18',
+ 'HK': '113.252.0.0/14',
+ 'HN': '181.210.0.0/16',
+ 'HR': '93.136.0.0/13',
+ 'HT': '148.102.128.0/17',
+ 'HU': '84.0.0.0/14',
+ 'ID': '39.192.0.0/10',
+ 'IE': '87.32.0.0/12',
+ 'IL': '79.176.0.0/13',
+ 'IM': '5.62.80.0/20',
+ 'IN': '117.192.0.0/10',
+ 'IO': '203.83.48.0/21',
+ 'IQ': '37.236.0.0/14',
+ 'IR': '2.176.0.0/12',
+ 'IS': '82.221.0.0/16',
+ 'IT': '79.0.0.0/10',
+ 'JE': '87.244.64.0/18',
+ 'JM': '72.27.0.0/17',
+ 'JO': '176.29.0.0/16',
+ 'JP': '133.0.0.0/8',
+ 'KE': '105.48.0.0/12',
+ 'KG': '158.181.128.0/17',
+ 'KH': '36.37.128.0/17',
+ 'KI': '103.25.140.0/22',
+ 'KM': '197.255.224.0/20',
+ 'KN': '198.167.192.0/19',
+ 'KP': '175.45.176.0/22',
+ 'KR': '175.192.0.0/10',
+ 'KW': '37.36.0.0/14',
+ 'KY': '64.96.0.0/15',
+ 'KZ': '2.72.0.0/13',
+ 'LA': '115.84.64.0/18',
+ 'LB': '178.135.0.0/16',
+ 'LC': '24.92.144.0/20',
+ 'LI': '82.117.0.0/19',
+ 'LK': '112.134.0.0/15',
+ 'LR': '102.183.0.0/16',
+ 'LS': '129.232.0.0/17',
+ 'LT': '78.56.0.0/13',
+ 'LU': '188.42.0.0/16',
+ 'LV': '46.109.0.0/16',
+ 'LY': '41.252.0.0/14',
+ 'MA': '105.128.0.0/11',
+ 'MC': '88.209.64.0/18',
+ 'MD': '37.246.0.0/16',
+ 'ME': '178.175.0.0/17',
+ 'MF': '74.112.232.0/21',
+ 'MG': '154.126.0.0/17',
+ 'MH': '117.103.88.0/21',
+ 'MK': '77.28.0.0/15',
+ 'ML': '154.118.128.0/18',
+ 'MM': '37.111.0.0/17',
+ 'MN': '49.0.128.0/17',
+ 'MO': '60.246.0.0/16',
+ 'MP': '202.88.64.0/20',
+ 'MQ': '109.203.224.0/19',
+ 'MR': '41.188.64.0/18',
+ 'MS': '208.90.112.0/22',
+ 'MT': '46.11.0.0/16',
+ 'MU': '105.16.0.0/12',
+ 'MV': '27.114.128.0/18',
+ 'MW': '102.70.0.0/15',
+ 'MX': '187.192.0.0/11',
+ 'MY': '175.136.0.0/13',
+ 'MZ': '197.218.0.0/15',
+ 'NA': '41.182.0.0/16',
+ 'NC': '101.101.0.0/18',
+ 'NE': '197.214.0.0/18',
+ 'NF': '203.17.240.0/22',
+ 'NG': '105.112.0.0/12',
+ 'NI': '186.76.0.0/15',
+ 'NL': '145.96.0.0/11',
+ 'NO': '84.208.0.0/13',
+ 'NP': '36.252.0.0/15',
+ 'NR': '203.98.224.0/19',
+ 'NU': '49.156.48.0/22',
+ 'NZ': '49.224.0.0/14',
+ 'OM': '5.36.0.0/15',
+ 'PA': '186.72.0.0/15',
+ 'PE': '186.160.0.0/14',
+ 'PF': '123.50.64.0/18',
+ 'PG': '124.240.192.0/19',
+ 'PH': '49.144.0.0/13',
+ 'PK': '39.32.0.0/11',
+ 'PL': '83.0.0.0/11',
+ 'PM': '70.36.0.0/20',
+ 'PR': '66.50.0.0/16',
+ 'PS': '188.161.0.0/16',
+ 'PT': '85.240.0.0/13',
+ 'PW': '202.124.224.0/20',
+ 'PY': '181.120.0.0/14',
+ 'QA': '37.210.0.0/15',
+ 'RE': '102.35.0.0/16',
+ 'RO': '79.112.0.0/13',
+ 'RS': '93.86.0.0/15',
+ 'RU': '5.136.0.0/13',
+ 'RW': '41.186.0.0/16',
+ 'SA': '188.48.0.0/13',
+ 'SB': '202.1.160.0/19',
+ 'SC': '154.192.0.0/11',
+ 'SD': '102.120.0.0/13',
+ 'SE': '78.64.0.0/12',
+ 'SG': '8.128.0.0/10',
+ 'SI': '188.196.0.0/14',
+ 'SK': '78.98.0.0/15',
+ 'SL': '102.143.0.0/17',
+ 'SM': '89.186.32.0/19',
+ 'SN': '41.82.0.0/15',
+ 'SO': '154.115.192.0/18',
+ 'SR': '186.179.128.0/17',
+ 'SS': '105.235.208.0/21',
+ 'ST': '197.159.160.0/19',
+ 'SV': '168.243.0.0/16',
+ 'SX': '190.102.0.0/20',
+ 'SY': '5.0.0.0/16',
+ 'SZ': '41.84.224.0/19',
+ 'TC': '65.255.48.0/20',
+ 'TD': '154.68.128.0/19',
+ 'TG': '196.168.0.0/14',
+ 'TH': '171.96.0.0/13',
+ 'TJ': '85.9.128.0/18',
+ 'TK': '27.96.24.0/21',
+ 'TL': '180.189.160.0/20',
+ 'TM': '95.85.96.0/19',
+ 'TN': '197.0.0.0/11',
+ 'TO': '175.176.144.0/21',
+ 'TR': '78.160.0.0/11',
+ 'TT': '186.44.0.0/15',
+ 'TV': '202.2.96.0/19',
+ 'TW': '120.96.0.0/11',
+ 'TZ': '156.156.0.0/14',
+ 'UA': '37.52.0.0/14',
+ 'UG': '102.80.0.0/13',
+ 'US': '6.0.0.0/8',
+ 'UY': '167.56.0.0/13',
+ 'UZ': '84.54.64.0/18',
+ 'VA': '212.77.0.0/19',
+ 'VC': '207.191.240.0/21',
+ 'VE': '186.88.0.0/13',
+ 'VG': '66.81.192.0/20',
+ 'VI': '146.226.0.0/16',
+ 'VN': '14.160.0.0/11',
+ 'VU': '202.80.32.0/20',
+ 'WF': '117.20.32.0/21',
+ 'WS': '202.4.32.0/19',
+ 'YE': '134.35.0.0/16',
+ 'YT': '41.242.116.0/22',
+ 'ZA': '41.0.0.0/11',
+ 'ZM': '102.144.0.0/13',
+ 'ZW': '102.177.192.0/18',
+ }
+
+ @classmethod
+ def random_ipv4(cls, code_or_block):
+ if len(code_or_block) == 2:
+ block = cls._country_ip_map.get(code_or_block.upper())
+ if not block:
+ return None
+ else:
+ block = code_or_block
+ addr, preflen = block.split('/')
+ addr_min = struct.unpack('!L', socket.inet_aton(addr))[0]
+ addr_max = addr_min | (0xffffffff >> int(preflen))
+ return str(socket.inet_ntoa(
+ struct.pack('!L', random.randint(addr_min, addr_max))))
+
+
+# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
+# released into Public Domain
+# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
+
+def long_to_bytes(n, blocksize=0):
+ """long_to_bytes(n:long, blocksize:int) : string
+ Convert a long integer to a byte string.
+
+ If optional blocksize is given and greater than zero, pad the front of the
+ byte string with binary zeros so that the length is a multiple of
+ blocksize.
+ """
+ # after much testing, this algorithm was deemed to be the fastest
+ s = b''
+ n = int(n)
+ while n > 0:
+ s = struct.pack('>I', n & 0xffffffff) + s
+ n = n >> 32
+ # strip off leading zeros
+ for i in range(len(s)):
+ if s[i] != b'\000'[0]:
+ break
+ else:
+ # only happens when n == 0
+ s = b'\000'
+ i = 0
+ s = s[i:]
+ # add back some pad bytes. this could be done more efficiently w.r.t. the
+ # de-padding being done above, but sigh...
+ if blocksize > 0 and len(s) % blocksize:
+ s = (blocksize - len(s) % blocksize) * b'\000' + s
+ return s
+
+
+def bytes_to_long(s):
+ """bytes_to_long(string) : long
+ Convert a byte string to a long integer.
+
+ This is (essentially) the inverse of long_to_bytes().
+ """
+ acc = 0
+ length = len(s)
+ if length % 4:
+ extra = (4 - length % 4)
+ s = b'\000' * extra + s
+ length = length + extra
+ for i in range(0, length, 4):
+ acc = (acc << 32) + struct.unpack('>I', s[i:i + 4])[0]
+ return acc
+
+
+def ohdave_rsa_encrypt(data, exponent, modulus):
+ '''
+ Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
+
+ Input:
+ data: data to encrypt, bytes-like object
+ exponent, modulus: parameter e and N of RSA algorithm, both integer
+ Output: hex string of encrypted data
+
+ Limitation: supports one block encryption only
+ '''
+
+ payload = int(binascii.hexlify(data[::-1]), 16)
+ encrypted = pow(payload, exponent, modulus)
+ return '%x' % encrypted
+
+
+def pkcs1pad(data, length):
+ """
+ Padding input data with PKCS#1 scheme
+
+ @param {int[]} data input data
+ @param {int} length target length
+ @returns {int[]} padded data
+ """
+ if len(data) > length - 11:
+ raise ValueError('Input data too long for PKCS#1 padding')
+
+ pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
+ return [0, 2] + pseudo_random + [0] + data
+
+
+def _base_n_table(n, table):
+ if not table and not n:
+ raise ValueError('Either table or n must be specified')
+ table = (table or '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')[:n]
+
+ if n and n != len(table):
+ raise ValueError(f'base {n} exceeds table length {len(table)}')
+ return table
+
+
+def encode_base_n(num, n=None, table=None):
+ """Convert given int to a base-n string"""
+ table = _base_n_table(n, table)
+ if not num:
+ return table[0]
+
+ result, base = '', len(table)
+ while num:
+ result = table[num % base] + result
+ num = num // base
+ return result
+
+
+def decode_base_n(string, n=None, table=None):
+ """Convert given base-n string to int"""
+ table = {char: index for index, char in enumerate(_base_n_table(n, table))}
+ result, base = 0, len(table)
+ for char in string:
+ result = result * base + table[char]
+ return result
+
+
+def decode_packed_codes(code):
+ mobj = re.search(PACKED_CODES_RE, code)
+ obfuscated_code, base, count, symbols = mobj.groups()
+ base = int(base)
+ count = int(count)
+ symbols = symbols.split('|')
+ symbol_table = {}
+
+ while count:
+ count -= 1
+ base_n_count = encode_base_n(count, base)
+ symbol_table[base_n_count] = symbols[count] or base_n_count
+
+ return re.sub(
+ r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
+ obfuscated_code)
+
+
+def caesar(s, alphabet, shift):
+ if shift == 0:
+ return s
+ l = len(alphabet)
+ return ''.join(
+ alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
+ for c in s)
+
+
+def rot47(s):
+ return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
+
+
+def parse_m3u8_attributes(attrib):
+ info = {}
+ for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
+ if val.startswith('"'):
+ val = val[1:-1]
+ info[key] = val
+ return info
+
+
+def urshift(val, n):
+ return val >> n if val >= 0 else (val + 0x100000000) >> n
+
+
+def write_xattr(path, key, value):
+ # Windows: Write xattrs to NTFS Alternate Data Streams:
+ # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
+ if compat_os_name == 'nt':
+ assert ':' not in key
+ assert os.path.exists(path)
+
+ try:
+ with open(f'{path}:{key}', 'wb') as f:
+ f.write(value)
+ except OSError as e:
+ raise XAttrMetadataError(e.errno, e.strerror)
+ return
+
+ # UNIX Method 1. Use os.setxattr/xattrs/pyxattrs modules
+
+ setxattr = None
+ if callable(getattr(os, 'setxattr', None)):
+ setxattr = os.setxattr
+ elif getattr(xattr, '_yt_dlp__identifier', None) == 'pyxattr':
+ # Unicode arguments are not supported in pyxattr until version 0.5.0
+ # See https://github.com/ytdl-org/youtube-dl/issues/5498
+ if version_tuple(xattr.__version__) >= (0, 5, 0):
+ setxattr = xattr.set
+ elif xattr:
+ setxattr = xattr.setxattr
+
+ if setxattr:
+ try:
+ setxattr(path, key, value)
+ except OSError as e:
+ raise XAttrMetadataError(e.errno, e.strerror)
+ return
+
+ # UNIX Method 2. Use setfattr/xattr executables
+ exe = ('setfattr' if check_executable('setfattr', ['--version'])
+ else 'xattr' if check_executable('xattr', ['-h']) else None)
+ if not exe:
+ raise XAttrUnavailableError(
+ 'Couldn\'t find a tool to set the xattrs. Install either the "xattr" or "pyxattr" Python modules or the '
+ + ('"xattr" binary' if sys.platform != 'linux' else 'GNU "attr" package (which contains the "setfattr" tool)'))
+
+ value = value.decode()
+ try:
+ _, stderr, returncode = Popen.run(
+ [exe, '-w', key, value, path] if exe == 'xattr' else [exe, '-n', key, '-v', value, path],
+ text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
+ except OSError as e:
+ raise XAttrMetadataError(e.errno, e.strerror)
+ if returncode:
+ raise XAttrMetadataError(returncode, stderr)
+
+
+def random_birthday(year_field, month_field, day_field):
+ start_date = datetime.date(1950, 1, 1)
+ end_date = datetime.date(1995, 12, 31)
+ offset = random.randint(0, (end_date - start_date).days)
+ random_date = start_date + datetime.timedelta(offset)
+ return {
+ year_field: str(random_date.year),
+ month_field: str(random_date.month),
+ day_field: str(random_date.day),
+ }
+
+
+def find_available_port(interface=''):
+ try:
+ with socket.socket() as sock:
+ sock.bind((interface, 0))
+ return sock.getsockname()[1]
+ except OSError:
+ return None
+
+
+# Templates for internet shortcut files, which are plain text files.
+DOT_URL_LINK_TEMPLATE = '''\
+[InternetShortcut]
+URL=%(url)s
+'''
+
+DOT_WEBLOC_LINK_TEMPLATE = '''\
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+\t<key>URL</key>
+\t<string>%(url)s</string>
+</dict>
+</plist>
+'''
+
+DOT_DESKTOP_LINK_TEMPLATE = '''\
+[Desktop Entry]
+Encoding=UTF-8
+Name=%(filename)s
+Type=Link
+URL=%(url)s
+Icon=text-html
+'''
+
+LINK_TEMPLATES = {
+ 'url': DOT_URL_LINK_TEMPLATE,
+ 'desktop': DOT_DESKTOP_LINK_TEMPLATE,
+ 'webloc': DOT_WEBLOC_LINK_TEMPLATE,
+}
+
+
+def iri_to_uri(iri):
+ """
+ Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
+
+ The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
+ """
+
+ iri_parts = urllib.parse.urlparse(iri)
+
+ if '[' in iri_parts.netloc:
+ raise ValueError('IPv6 URIs are not, yet, supported.')
+ # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
+
+ # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
+
+ net_location = ''
+ if iri_parts.username:
+ net_location += urllib.parse.quote(iri_parts.username, safe=r"!$%&'()*+,~")
+ if iri_parts.password is not None:
+ net_location += ':' + urllib.parse.quote(iri_parts.password, safe=r"!$%&'()*+,~")
+ net_location += '@'
+
+ net_location += iri_parts.hostname.encode('idna').decode() # Punycode for Unicode hostnames.
+ # The 'idna' encoding produces ASCII text.
+ if iri_parts.port is not None and iri_parts.port != 80:
+ net_location += ':' + str(iri_parts.port)
+
+ return urllib.parse.urlunparse(
+ (iri_parts.scheme,
+ net_location,
+
+ urllib.parse.quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
+
+ # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
+ urllib.parse.quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
+
+ # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
+ urllib.parse.quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
+
+ urllib.parse.quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
+
+ # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
+
+
+def to_high_limit_path(path):
+ if sys.platform in ['win32', 'cygwin']:
+ # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
+ return '\\\\?\\' + os.path.abspath(path)
+
+ return path
+
+
+def format_field(obj, field=None, template='%s', ignore=NO_DEFAULT, default='', func=IDENTITY):
+ val = traversal.traverse_obj(obj, *variadic(field))
+ if not val if ignore is NO_DEFAULT else val in variadic(ignore):
+ return default
+ return template % func(val)
+
+
+def clean_podcast_url(url):
+ url = re.sub(r'''(?x)
+ (?:
+ (?:
+ chtbl\.com/track|
+ media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
+ play\.podtrac\.com|
+ chrt\.fm/track|
+ mgln\.ai/e
+ )(?:/[^/.]+)?|
+ (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
+ flex\.acast\.com|
+ pd(?:
+ cn\.co| # https://podcorn.com/analytics-prefix/
+ st\.fm # https://podsights.com/docs/
+ )/e|
+ [0-9]\.gum\.fm|
+ pscrb\.fm/rss/p
+ )/''', '', url)
+ return re.sub(r'^\w+://(\w+://)', r'\1', url)
+
+
+_HEX_TABLE = '0123456789abcdef'
+
+
+def random_uuidv4():
+ return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
+
+
+def make_dir(path, to_screen=None):
+ try:
+ dn = os.path.dirname(path)
+ if dn:
+ os.makedirs(dn, exist_ok=True)
+ return True
+ except OSError as err:
+ if callable(to_screen) is not None:
+ to_screen(f'unable to create directory {err}')
+ return False
+
+
+def get_executable_path():
+ from ..update import _get_variant_and_executable_path
+
+ return os.path.dirname(os.path.abspath(_get_variant_and_executable_path()[1]))
+
+
+def get_user_config_dirs(package_name):
+ # .config (e.g. ~/.config/package_name)
+ xdg_config_home = os.getenv('XDG_CONFIG_HOME') or compat_expanduser('~/.config')
+ yield os.path.join(xdg_config_home, package_name)
+
+ # appdata (%APPDATA%/package_name)
+ appdata_dir = os.getenv('appdata')
+ if appdata_dir:
+ yield os.path.join(appdata_dir, package_name)
+
+ # home (~/.package_name)
+ yield os.path.join(compat_expanduser('~'), f'.{package_name}')
+
+
+def get_system_config_dirs(package_name):
+ # /etc/package_name
+ yield os.path.join('/etc', package_name)
+
+
+def time_seconds(**kwargs):
+ """
+ Returns TZ-aware time in seconds since the epoch (1970-01-01T00:00:00Z)
+ """
+ return time.time() + datetime.timedelta(**kwargs).total_seconds()
+
+
+# create a JSON Web Signature (jws) with HS256 algorithm
+# the resulting format is in JWS Compact Serialization
+# implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
+# implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
+def jwt_encode_hs256(payload_data, key, headers={}):
+ header_data = {
+ 'alg': 'HS256',
+ 'typ': 'JWT',
+ }
+ if headers:
+ header_data.update(headers)
+ header_b64 = base64.b64encode(json.dumps(header_data).encode())
+ payload_b64 = base64.b64encode(json.dumps(payload_data).encode())
+ h = hmac.new(key.encode(), header_b64 + b'.' + payload_b64, hashlib.sha256)
+ signature_b64 = base64.b64encode(h.digest())
+ token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64
+ return token
+
+
+# can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
+def jwt_decode_hs256(jwt):
+ header_b64, payload_b64, signature_b64 = jwt.split('.')
+ # add trailing ='s that may have been stripped, superfluous ='s are ignored
+ payload_data = json.loads(base64.urlsafe_b64decode(f'{payload_b64}==='))
+ return payload_data
+
+
+WINDOWS_VT_MODE = False if compat_os_name == 'nt' else None
+
+
+@functools.cache
+def supports_terminal_sequences(stream):
+ if compat_os_name == 'nt':
+ if not WINDOWS_VT_MODE:
+ return False
+ elif not os.getenv('TERM'):
+ return False
+ try:
+ return stream.isatty()
+ except BaseException:
+ return False
+
+
+def windows_enable_vt_mode():
+ """Ref: https://bugs.python.org/issue30075 """
+ if get_windows_version() < (10, 0, 10586):
+ return
+
+ import ctypes
+ import ctypes.wintypes
+ import msvcrt
+
+ ENABLE_VIRTUAL_TERMINAL_PROCESSING = 0x0004
+
+ dll = ctypes.WinDLL('kernel32', use_last_error=False)
+ handle = os.open('CONOUT$', os.O_RDWR)
+ try:
+ h_out = ctypes.wintypes.HANDLE(msvcrt.get_osfhandle(handle))
+ dw_original_mode = ctypes.wintypes.DWORD()
+ success = dll.GetConsoleMode(h_out, ctypes.byref(dw_original_mode))
+ if not success:
+ raise Exception('GetConsoleMode failed')
+
+ success = dll.SetConsoleMode(h_out, ctypes.wintypes.DWORD(
+ dw_original_mode.value | ENABLE_VIRTUAL_TERMINAL_PROCESSING))
+ if not success:
+ raise Exception('SetConsoleMode failed')
+ finally:
+ os.close(handle)
+
+ global WINDOWS_VT_MODE
+ WINDOWS_VT_MODE = True
+ supports_terminal_sequences.cache_clear()
+
+
+_terminal_sequences_re = re.compile('\033\\[[^m]+m')
+
+
+def remove_terminal_sequences(string):
+ return _terminal_sequences_re.sub('', string)
+
+
+def number_of_digits(number):
+ return len('%d' % number)
+
+
+def join_nonempty(*values, delim='-', from_dict=None):
+ if from_dict is not None:
+ values = (traversal.traverse_obj(from_dict, variadic(v)) for v in values)
+ return delim.join(map(str, filter(None, values)))
+
+
+def scale_thumbnails_to_max_format_width(formats, thumbnails, url_width_re):
+ """
+ Find the largest format dimensions in terms of video width and, for each thumbnail:
+ * Modify the URL: Match the width with the provided regex and replace with the former width
+ * Update dimensions
+
+ This function is useful with video services that scale the provided thumbnails on demand
+ """
+ _keys = ('width', 'height')
+ max_dimensions = max(
+ (tuple(format.get(k) or 0 for k in _keys) for format in formats),
+ default=(0, 0))
+ if not max_dimensions[0]:
+ return thumbnails
+ return [
+ merge_dicts(
+ {'url': re.sub(url_width_re, str(max_dimensions[0]), thumbnail['url'])},
+ dict(zip(_keys, max_dimensions)), thumbnail)
+ for thumbnail in thumbnails
+ ]
+
+
+def parse_http_range(range):
+ """ Parse value of "Range" or "Content-Range" HTTP header into tuple. """
+ if not range:
+ return None, None, None
+ crg = re.search(r'bytes[ =](\d+)-(\d+)?(?:/(\d+))?', range)
+ if not crg:
+ return None, None, None
+ return int(crg.group(1)), int_or_none(crg.group(2)), int_or_none(crg.group(3))
+
+
+def read_stdin(what):
+ if what:
+ eof = 'Ctrl+Z' if compat_os_name == 'nt' else 'Ctrl+D'
+ write_string(f'Reading {what} from STDIN - EOF ({eof}) to end:\n')
+ return sys.stdin
+
+
+def determine_file_encoding(data):
+ """
+ Detect the text encoding used
+ @returns (encoding, bytes to skip)
+ """
+
+ # BOM marks are given priority over declarations
+ for bom, enc in BOMS:
+ if data.startswith(bom):
+ return enc, len(bom)
+
+ # Strip off all null bytes to match even when UTF-16 or UTF-32 is used.
+ # We ignore the endianness to get a good enough match
+ data = data.replace(b'\0', b'')
+ mobj = re.match(rb'(?m)^#\s*coding\s*:\s*(\S+)\s*$', data)
+ return mobj.group(1).decode() if mobj else None, 0
+
+
+class Config:
+ own_args = None
+ parsed_args = None
+ filename = None
+ __initialized = False
+
+ def __init__(self, parser, label=None):
+ self.parser, self.label = parser, label
+ self._loaded_paths, self.configs = set(), []
+
+ def init(self, args=None, filename=None):
+ assert not self.__initialized
+ self.own_args, self.filename = args, filename
+ return self.load_configs()
+
+ def load_configs(self):
+ directory = ''
+ if self.filename:
+ location = os.path.realpath(self.filename)
+ directory = os.path.dirname(location)
+ if location in self._loaded_paths:
+ return False
+ self._loaded_paths.add(location)
+
+ self.__initialized = True
+ opts, _ = self.parser.parse_known_args(self.own_args)
+ self.parsed_args = self.own_args
+ for location in opts.config_locations or []:
+ if location == '-':
+ if location in self._loaded_paths:
+ continue
+ self._loaded_paths.add(location)
+ self.append_config(shlex.split(read_stdin('options'), comments=True), label='stdin')
+ continue
+ location = os.path.join(directory, expand_path(location))
+ if os.path.isdir(location):
+ location = os.path.join(location, 'yt-dlp.conf')
+ if not os.path.exists(location):
+ self.parser.error(f'config location {location} does not exist')
+ self.append_config(self.read_file(location), location)
+ return True
+
+ def __str__(self):
+ label = join_nonempty(
+ self.label, 'config', f'"{self.filename}"' if self.filename else '',
+ delim=' ')
+ return join_nonempty(
+ self.own_args is not None and f'{label[0].upper()}{label[1:]}: {self.hide_login_info(self.own_args)}',
+ *(f'\n{c}'.replace('\n', '\n| ')[1:] for c in self.configs),
+ delim='\n')
+
+ @staticmethod
+ def read_file(filename, default=[]):
+ try:
+ optionf = open(filename, 'rb')
+ except OSError:
+ return default # silently skip if file is not present
+ try:
+ enc, skip = determine_file_encoding(optionf.read(512))
+ optionf.seek(skip, io.SEEK_SET)
+ except OSError:
+ enc = None # silently skip read errors
+ try:
+ # FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
+ contents = optionf.read().decode(enc or preferredencoding())
+ res = shlex.split(contents, comments=True)
+ except Exception as err:
+ raise ValueError(f'Unable to parse "{filename}": {err}')
+ finally:
+ optionf.close()
+ return res
+
+ @staticmethod
+ def hide_login_info(opts):
+ PRIVATE_OPTS = {'-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'}
+ eqre = re.compile('^(?P<key>' + ('|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$')
+
+ def _scrub_eq(o):
+ m = eqre.match(o)
+ if m:
+ return m.group('key') + '=PRIVATE'
+ else:
+ return o
+
+ opts = list(map(_scrub_eq, opts))
+ for idx, opt in enumerate(opts):
+ if opt in PRIVATE_OPTS and idx + 1 < len(opts):
+ opts[idx + 1] = 'PRIVATE'
+ return opts
+
+ def append_config(self, *args, label=None):
+ config = type(self)(self.parser, label)
+ config._loaded_paths = self._loaded_paths
+ if config.init(*args):
+ self.configs.append(config)
+
+ @property
+ def all_args(self):
+ for config in reversed(self.configs):
+ yield from config.all_args
+ yield from self.parsed_args or []
+
+ def parse_known_args(self, **kwargs):
+ return self.parser.parse_known_args(self.all_args, **kwargs)
+
+ def parse_args(self):
+ return self.parser.parse_args(self.all_args)
+
+
+def merge_headers(*dicts):
+ """Merge dicts of http headers case insensitively, prioritizing the latter ones"""
+ return {k.title(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))}
+
+
+def cached_method(f):
+ """Cache a method"""
+ signature = inspect.signature(f)
+
+ @functools.wraps(f)
+ def wrapper(self, *args, **kwargs):
+ bound_args = signature.bind(self, *args, **kwargs)
+ bound_args.apply_defaults()
+ key = tuple(bound_args.arguments.values())[1:]
+
+ cache = vars(self).setdefault('_cached_method__cache', {}).setdefault(f.__name__, {})
+ if key not in cache:
+ cache[key] = f(self, *args, **kwargs)
+ return cache[key]
+ return wrapper
+
+
+class classproperty:
+ """property access for class methods with optional caching"""
+ def __new__(cls, func=None, *args, **kwargs):
+ if not func:
+ return functools.partial(cls, *args, **kwargs)
+ return super().__new__(cls)
+
+ def __init__(self, func, *, cache=False):
+ functools.update_wrapper(self, func)
+ self.func = func
+ self._cache = {} if cache else None
+
+ def __get__(self, _, cls):
+ if self._cache is None:
+ return self.func(cls)
+ elif cls not in self._cache:
+ self._cache[cls] = self.func(cls)
+ return self._cache[cls]
+
+
+class function_with_repr:
+ def __init__(self, func, repr_=None):
+ functools.update_wrapper(self, func)
+ self.func, self.__repr = func, repr_
+
+ def __call__(self, *args, **kwargs):
+ return self.func(*args, **kwargs)
+
+ @classmethod
+ def set_repr(cls, repr_):
+ return functools.partial(cls, repr_=repr_)
+
+ def __repr__(self):
+ if self.__repr:
+ return self.__repr
+ return f'{self.func.__module__}.{self.func.__qualname__}'
+
+
+class Namespace(types.SimpleNamespace):
+ """Immutable namespace"""
+
+ def __iter__(self):
+ return iter(self.__dict__.values())
+
+ @property
+ def items_(self):
+ return self.__dict__.items()
+
+
+MEDIA_EXTENSIONS = Namespace(
+ common_video=('avi', 'flv', 'mkv', 'mov', 'mp4', 'webm'),
+ video=('3g2', '3gp', 'f4v', 'mk3d', 'divx', 'mpg', 'ogv', 'm4v', 'wmv'),
+ common_audio=('aiff', 'alac', 'flac', 'm4a', 'mka', 'mp3', 'ogg', 'opus', 'wav'),
+ audio=('aac', 'ape', 'asf', 'f4a', 'f4b', 'm4b', 'm4p', 'm4r', 'oga', 'ogx', 'spx', 'vorbis', 'wma', 'weba'),
+ thumbnails=('jpg', 'png', 'webp'),
+ storyboards=('mhtml', ),
+ subtitles=('srt', 'vtt', 'ass', 'lrc'),
+ manifests=('f4f', 'f4m', 'm3u8', 'smil', 'mpd'),
+)
+MEDIA_EXTENSIONS.video += MEDIA_EXTENSIONS.common_video
+MEDIA_EXTENSIONS.audio += MEDIA_EXTENSIONS.common_audio
+
+KNOWN_EXTENSIONS = (*MEDIA_EXTENSIONS.video, *MEDIA_EXTENSIONS.audio, *MEDIA_EXTENSIONS.manifests)
+
+
+class RetryManager:
+ """Usage:
+ for retry in RetryManager(...):
+ try:
+ ...
+ except SomeException as err:
+ retry.error = err
+ continue
+ """
+ attempt, _error = 0, None
+
+ def __init__(self, _retries, _error_callback, **kwargs):
+ self.retries = _retries or 0
+ self.error_callback = functools.partial(_error_callback, **kwargs)
+
+ def _should_retry(self):
+ return self._error is not NO_DEFAULT and self.attempt <= self.retries
+
+ @property
+ def error(self):
+ if self._error is NO_DEFAULT:
+ return None
+ return self._error
+
+ @error.setter
+ def error(self, value):
+ self._error = value
+
+ def __iter__(self):
+ while self._should_retry():
+ self.error = NO_DEFAULT
+ self.attempt += 1
+ yield self
+ if self.error:
+ self.error_callback(self.error, self.attempt, self.retries)
+
+ @staticmethod
+ def report_retry(e, count, retries, *, sleep_func, info, warn, error=None, suffix=None):
+ """Utility function for reporting retries"""
+ if count > retries:
+ if error:
+ return error(f'{e}. Giving up after {count - 1} retries') if count > 1 else error(str(e))
+ raise e
+
+ if not count:
+ return warn(e)
+ elif isinstance(e, ExtractorError):
+ e = remove_end(str_or_none(e.cause) or e.orig_msg, '.')
+ warn(f'{e}. Retrying{format_field(suffix, None, " %s")} ({count}/{retries})...')
+
+ delay = float_or_none(sleep_func(n=count - 1)) if callable(sleep_func) else sleep_func
+ if delay:
+ info(f'Sleeping {delay:.2f} seconds ...')
+ time.sleep(delay)
+
+
+def make_archive_id(ie, video_id):
+ ie_key = ie if isinstance(ie, str) else ie.ie_key()
+ return f'{ie_key.lower()} {video_id}'
+
+
+def truncate_string(s, left, right=0):
+ assert left > 3 and right >= 0
+ if s is None or len(s) <= left + right:
+ return s
+ return f'{s[:left - 3]}...{s[-right:] if right else ""}'
+
+
+def orderedSet_from_options(options, alias_dict, *, use_regex=False, start=None):
+ assert 'all' in alias_dict, '"all" alias is required'
+ requested = list(start or [])
+ for val in options:
+ discard = val.startswith('-')
+ if discard:
+ val = val[1:]
+
+ if val in alias_dict:
+ val = alias_dict[val] if not discard else [
+ i[1:] if i.startswith('-') else f'-{i}' for i in alias_dict[val]]
+ # NB: Do not allow regex in aliases for performance
+ requested = orderedSet_from_options(val, alias_dict, start=requested)
+ continue
+
+ current = (filter(re.compile(val, re.I).fullmatch, alias_dict['all']) if use_regex
+ else [val] if val in alias_dict['all'] else None)
+ if current is None:
+ raise ValueError(val)
+
+ if discard:
+ for item in current:
+ while item in requested:
+ requested.remove(item)
+ else:
+ requested.extend(current)
+
+ return orderedSet(requested)
+
+
+# TODO: Rewrite
+class FormatSorter:
+ regex = r' *((?P<reverse>\+)?(?P<field>[a-zA-Z0-9_]+)((?P<separator>[~:])(?P<limit>.*?))?)? *$'
+
+ default = ('hidden', 'aud_or_vid', 'hasvid', 'ie_pref', 'lang', 'quality',
+ 'res', 'fps', 'hdr:12', 'vcodec:vp9.2', 'channels', 'acodec',
+ 'size', 'br', 'asr', 'proto', 'ext', 'hasaud', 'source', 'id') # These must not be aliases
+ ytdl_default = ('hasaud', 'lang', 'quality', 'tbr', 'filesize', 'vbr',
+ 'height', 'width', 'proto', 'vext', 'abr', 'aext',
+ 'fps', 'fs_approx', 'source', 'id')
+
+ settings = {
+ 'vcodec': {'type': 'ordered', 'regex': True,
+ 'order': ['av0?1', 'vp0?9.2', 'vp0?9', '[hx]265|he?vc?', '[hx]264|avc', 'vp0?8', 'mp4v|h263', 'theora', '', None, 'none']},
+ 'acodec': {'type': 'ordered', 'regex': True,
+ 'order': ['[af]lac', 'wav|aiff', 'opus', 'vorbis|ogg', 'aac', 'mp?4a?', 'mp3', 'ac-?4', 'e-?a?c-?3', 'ac-?3', 'dts', '', None, 'none']},
+ 'hdr': {'type': 'ordered', 'regex': True, 'field': 'dynamic_range',
+ 'order': ['dv', '(hdr)?12', r'(hdr)?10\+', '(hdr)?10', 'hlg', '', 'sdr', None]},
+ 'proto': {'type': 'ordered', 'regex': True, 'field': 'protocol',
+ 'order': ['(ht|f)tps', '(ht|f)tp$', 'm3u8.*', '.*dash', 'websocket_frag', 'rtmpe?', '', 'mms|rtsp', 'ws|websocket', 'f4']},
+ 'vext': {'type': 'ordered', 'field': 'video_ext',
+ 'order': ('mp4', 'mov', 'webm', 'flv', '', 'none'),
+ 'order_free': ('webm', 'mp4', 'mov', 'flv', '', 'none')},
+ 'aext': {'type': 'ordered', 'regex': True, 'field': 'audio_ext',
+ 'order': ('m4a', 'aac', 'mp3', 'ogg', 'opus', 'web[am]', '', 'none'),
+ 'order_free': ('ogg', 'opus', 'web[am]', 'mp3', 'm4a', 'aac', '', 'none')},
+ 'hidden': {'visible': False, 'forced': True, 'type': 'extractor', 'max': -1000},
+ 'aud_or_vid': {'visible': False, 'forced': True, 'type': 'multiple',
+ 'field': ('vcodec', 'acodec'),
+ 'function': lambda it: int(any(v != 'none' for v in it))},
+ 'ie_pref': {'priority': True, 'type': 'extractor'},
+ 'hasvid': {'priority': True, 'field': 'vcodec', 'type': 'boolean', 'not_in_list': ('none',)},
+ 'hasaud': {'field': 'acodec', 'type': 'boolean', 'not_in_list': ('none',)},
+ 'lang': {'convert': 'float', 'field': 'language_preference', 'default': -1},
+ 'quality': {'convert': 'float', 'default': -1},
+ 'filesize': {'convert': 'bytes'},
+ 'fs_approx': {'convert': 'bytes', 'field': 'filesize_approx'},
+ 'id': {'convert': 'string', 'field': 'format_id'},
+ 'height': {'convert': 'float_none'},
+ 'width': {'convert': 'float_none'},
+ 'fps': {'convert': 'float_none'},
+ 'channels': {'convert': 'float_none', 'field': 'audio_channels'},
+ 'tbr': {'convert': 'float_none'},
+ 'vbr': {'convert': 'float_none'},
+ 'abr': {'convert': 'float_none'},
+ 'asr': {'convert': 'float_none'},
+ 'source': {'convert': 'float', 'field': 'source_preference', 'default': -1},
+
+ 'codec': {'type': 'combined', 'field': ('vcodec', 'acodec')},
+ 'br': {'type': 'multiple', 'field': ('tbr', 'vbr', 'abr'), 'convert': 'float_none',
+ 'function': lambda it: next(filter(None, it), None)},
+ 'size': {'type': 'multiple', 'field': ('filesize', 'fs_approx'), 'convert': 'bytes',
+ 'function': lambda it: next(filter(None, it), None)},
+ 'ext': {'type': 'combined', 'field': ('vext', 'aext')},
+ 'res': {'type': 'multiple', 'field': ('height', 'width'),
+ 'function': lambda it: (lambda l: min(l) if l else 0)(tuple(filter(None, it)))},
+
+ # Actual field names
+ 'format_id': {'type': 'alias', 'field': 'id'},
+ 'preference': {'type': 'alias', 'field': 'ie_pref'},
+ 'language_preference': {'type': 'alias', 'field': 'lang'},
+ 'source_preference': {'type': 'alias', 'field': 'source'},
+ 'protocol': {'type': 'alias', 'field': 'proto'},
+ 'filesize_approx': {'type': 'alias', 'field': 'fs_approx'},
+ 'audio_channels': {'type': 'alias', 'field': 'channels'},
+
+ # Deprecated
+ 'dimension': {'type': 'alias', 'field': 'res', 'deprecated': True},
+ 'resolution': {'type': 'alias', 'field': 'res', 'deprecated': True},
+ 'extension': {'type': 'alias', 'field': 'ext', 'deprecated': True},
+ 'bitrate': {'type': 'alias', 'field': 'br', 'deprecated': True},
+ 'total_bitrate': {'type': 'alias', 'field': 'tbr', 'deprecated': True},
+ 'video_bitrate': {'type': 'alias', 'field': 'vbr', 'deprecated': True},
+ 'audio_bitrate': {'type': 'alias', 'field': 'abr', 'deprecated': True},
+ 'framerate': {'type': 'alias', 'field': 'fps', 'deprecated': True},
+ 'filesize_estimate': {'type': 'alias', 'field': 'size', 'deprecated': True},
+ 'samplerate': {'type': 'alias', 'field': 'asr', 'deprecated': True},
+ 'video_ext': {'type': 'alias', 'field': 'vext', 'deprecated': True},
+ 'audio_ext': {'type': 'alias', 'field': 'aext', 'deprecated': True},
+ 'video_codec': {'type': 'alias', 'field': 'vcodec', 'deprecated': True},
+ 'audio_codec': {'type': 'alias', 'field': 'acodec', 'deprecated': True},
+ 'video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True},
+ 'has_video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True},
+ 'audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True},
+ 'has_audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True},
+ 'extractor': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True},
+ 'extractor_preference': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True},
+ }
+
+ def __init__(self, ydl, field_preference):
+ self.ydl = ydl
+ self._order = []
+ self.evaluate_params(self.ydl.params, field_preference)
+ if ydl.params.get('verbose'):
+ self.print_verbose_info(self.ydl.write_debug)
+
+ def _get_field_setting(self, field, key):
+ if field not in self.settings:
+ if key in ('forced', 'priority'):
+ return False
+ self.ydl.deprecated_feature(f'Using arbitrary fields ({field}) for format sorting is '
+ 'deprecated and may be removed in a future version')
+ self.settings[field] = {}
+ propObj = self.settings[field]
+ if key not in propObj:
+ type = propObj.get('type')
+ if key == 'field':
+ default = 'preference' if type == 'extractor' else (field,) if type in ('combined', 'multiple') else field
+ elif key == 'convert':
+ default = 'order' if type == 'ordered' else 'float_string' if field else 'ignore'
+ else:
+ default = {'type': 'field', 'visible': True, 'order': [], 'not_in_list': (None,)}.get(key, None)
+ propObj[key] = default
+ return propObj[key]
+
+ def _resolve_field_value(self, field, value, convertNone=False):
+ if value is None:
+ if not convertNone:
+ return None
+ else:
+ value = value.lower()
+ conversion = self._get_field_setting(field, 'convert')
+ if conversion == 'ignore':
+ return None
+ if conversion == 'string':
+ return value
+ elif conversion == 'float_none':
+ return float_or_none(value)
+ elif conversion == 'bytes':
+ return parse_bytes(value)
+ elif conversion == 'order':
+ order_list = (self._use_free_order and self._get_field_setting(field, 'order_free')) or self._get_field_setting(field, 'order')
+ use_regex = self._get_field_setting(field, 'regex')
+ list_length = len(order_list)
+ empty_pos = order_list.index('') if '' in order_list else list_length + 1
+ if use_regex and value is not None:
+ for i, regex in enumerate(order_list):
+ if regex and re.match(regex, value):
+ return list_length - i
+ return list_length - empty_pos # not in list
+ else: # not regex or value = None
+ return list_length - (order_list.index(value) if value in order_list else empty_pos)
+ else:
+ if value.isnumeric():
+ return float(value)
+ else:
+ self.settings[field]['convert'] = 'string'
+ return value
+
+ def evaluate_params(self, params, sort_extractor):
+ self._use_free_order = params.get('prefer_free_formats', False)
+ self._sort_user = params.get('format_sort', [])
+ self._sort_extractor = sort_extractor
+
+ def add_item(field, reverse, closest, limit_text):
+ field = field.lower()
+ if field in self._order:
+ return
+ self._order.append(field)
+ limit = self._resolve_field_value(field, limit_text)
+ data = {
+ 'reverse': reverse,
+ 'closest': False if limit is None else closest,
+ 'limit_text': limit_text,
+ 'limit': limit}
+ if field in self.settings:
+ self.settings[field].update(data)
+ else:
+ self.settings[field] = data
+
+ sort_list = (
+ tuple(field for field in self.default if self._get_field_setting(field, 'forced'))
+ + (tuple() if params.get('format_sort_force', False)
+ else tuple(field for field in self.default if self._get_field_setting(field, 'priority')))
+ + tuple(self._sort_user) + tuple(sort_extractor) + self.default)
+
+ for item in sort_list:
+ match = re.match(self.regex, item)
+ if match is None:
+ raise ExtractorError('Invalid format sort string "%s" given by extractor' % item)
+ field = match.group('field')
+ if field is None:
+ continue
+ if self._get_field_setting(field, 'type') == 'alias':
+ alias, field = field, self._get_field_setting(field, 'field')
+ if self._get_field_setting(alias, 'deprecated'):
+ self.ydl.deprecated_feature(f'Format sorting alias {alias} is deprecated and may '
+ f'be removed in a future version. Please use {field} instead')
+ reverse = match.group('reverse') is not None
+ closest = match.group('separator') == '~'
+ limit_text = match.group('limit')
+
+ has_limit = limit_text is not None
+ has_multiple_fields = self._get_field_setting(field, 'type') == 'combined'
+ has_multiple_limits = has_limit and has_multiple_fields and not self._get_field_setting(field, 'same_limit')
+
+ fields = self._get_field_setting(field, 'field') if has_multiple_fields else (field,)
+ limits = limit_text.split(':') if has_multiple_limits else (limit_text,) if has_limit else tuple()
+ limit_count = len(limits)
+ for (i, f) in enumerate(fields):
+ add_item(f, reverse, closest,
+ limits[i] if i < limit_count
+ else limits[0] if has_limit and not has_multiple_limits
+ else None)
+
+ def print_verbose_info(self, write_debug):
+ if self._sort_user:
+ write_debug('Sort order given by user: %s' % ', '.join(self._sort_user))
+ if self._sort_extractor:
+ write_debug('Sort order given by extractor: %s' % ', '.join(self._sort_extractor))
+ write_debug('Formats sorted by: %s' % ', '.join(['%s%s%s' % (
+ '+' if self._get_field_setting(field, 'reverse') else '', field,
+ '%s%s(%s)' % ('~' if self._get_field_setting(field, 'closest') else ':',
+ self._get_field_setting(field, 'limit_text'),
+ self._get_field_setting(field, 'limit'))
+ if self._get_field_setting(field, 'limit_text') is not None else '')
+ for field in self._order if self._get_field_setting(field, 'visible')]))
+
+ def _calculate_field_preference_from_value(self, format, field, type, value):
+ reverse = self._get_field_setting(field, 'reverse')
+ closest = self._get_field_setting(field, 'closest')
+ limit = self._get_field_setting(field, 'limit')
+
+ if type == 'extractor':
+ maximum = self._get_field_setting(field, 'max')
+ if value is None or (maximum is not None and value >= maximum):
+ value = -1
+ elif type == 'boolean':
+ in_list = self._get_field_setting(field, 'in_list')
+ not_in_list = self._get_field_setting(field, 'not_in_list')
+ value = 0 if ((in_list is None or value in in_list) and (not_in_list is None or value not in not_in_list)) else -1
+ elif type == 'ordered':
+ value = self._resolve_field_value(field, value, True)
+
+ # try to convert to number
+ val_num = float_or_none(value, default=self._get_field_setting(field, 'default'))
+ is_num = self._get_field_setting(field, 'convert') != 'string' and val_num is not None
+ if is_num:
+ value = val_num
+
+ return ((-10, 0) if value is None
+ else (1, value, 0) if not is_num # if a field has mixed strings and numbers, strings are sorted higher
+ else (0, -abs(value - limit), value - limit if reverse else limit - value) if closest
+ else (0, value, 0) if not reverse and (limit is None or value <= limit)
+ else (0, -value, 0) if limit is None or (reverse and value == limit) or value > limit
+ else (-1, value, 0))
+
+ def _calculate_field_preference(self, format, field):
+ type = self._get_field_setting(field, 'type') # extractor, boolean, ordered, field, multiple
+ get_value = lambda f: format.get(self._get_field_setting(f, 'field'))
+ if type == 'multiple':
+ type = 'field' # Only 'field' is allowed in multiple for now
+ actual_fields = self._get_field_setting(field, 'field')
+
+ value = self._get_field_setting(field, 'function')(get_value(f) for f in actual_fields)
+ else:
+ value = get_value(field)
+ return self._calculate_field_preference_from_value(format, field, type, value)
+
+ def calculate_preference(self, format):
+ # Determine missing protocol
+ if not format.get('protocol'):
+ format['protocol'] = determine_protocol(format)
+
+ # Determine missing ext
+ if not format.get('ext') and 'url' in format:
+ format['ext'] = determine_ext(format['url'])
+ if format.get('vcodec') == 'none':
+ format['audio_ext'] = format['ext'] if format.get('acodec') != 'none' else 'none'
+ format['video_ext'] = 'none'
+ else:
+ format['video_ext'] = format['ext']
+ format['audio_ext'] = 'none'
+ # if format.get('preference') is None and format.get('ext') in ('f4f', 'f4m'): # Not supported?
+ # format['preference'] = -1000
+
+ if format.get('preference') is None and format.get('ext') == 'flv' and re.match('[hx]265|he?vc?', format.get('vcodec') or ''):
+ # HEVC-over-FLV is out-of-spec by FLV's original spec
+ # ref. https://trac.ffmpeg.org/ticket/6389
+ # ref. https://github.com/yt-dlp/yt-dlp/pull/5821
+ format['preference'] = -100
+
+ # Determine missing bitrates
+ if format.get('vcodec') == 'none':
+ format['vbr'] = 0
+ if format.get('acodec') == 'none':
+ format['abr'] = 0
+ if not format.get('vbr') and format.get('vcodec') != 'none':
+ format['vbr'] = try_call(lambda: format['tbr'] - format['abr']) or None
+ if not format.get('abr') and format.get('acodec') != 'none':
+ format['abr'] = try_call(lambda: format['tbr'] - format['vbr']) or None
+ if not format.get('tbr'):
+ format['tbr'] = try_call(lambda: format['vbr'] + format['abr']) or None
+
+ return tuple(self._calculate_field_preference(format, field) for field in self._order)
+
+
+# XXX: Temporary
+class _YDLLogger:
+ def __init__(self, ydl=None):
+ self._ydl = ydl
+
+ def debug(self, message):
+ if self._ydl:
+ self._ydl.write_debug(message)
+
+ def info(self, message):
+ if self._ydl:
+ self._ydl.to_screen(message)
+
+ def warning(self, message, *, once=False):
+ if self._ydl:
+ self._ydl.report_warning(message, once)
+
+ def error(self, message, *, is_error=True):
+ if self._ydl:
+ self._ydl.report_error(message, is_error=is_error)
+
+ def stdout(self, message):
+ if self._ydl:
+ self._ydl.to_stdout(message)
+
+ def stderr(self, message):
+ if self._ydl:
+ self._ydl.to_stderr(message)
diff --git a/yt_dlp/utils/networking.py b/yt_dlp/utils/networking.py
new file mode 100644
index 0000000..4b73252
--- /dev/null
+++ b/yt_dlp/utils/networking.py
@@ -0,0 +1,164 @@
+import collections
+import random
+import urllib.parse
+import urllib.request
+
+from ._utils import remove_start
+
+
+def random_user_agent():
+ _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
+ _CHROME_VERSIONS = (
+ '90.0.4430.212',
+ '90.0.4430.24',
+ '90.0.4430.70',
+ '90.0.4430.72',
+ '90.0.4430.85',
+ '90.0.4430.93',
+ '91.0.4472.101',
+ '91.0.4472.106',
+ '91.0.4472.114',
+ '91.0.4472.124',
+ '91.0.4472.164',
+ '91.0.4472.19',
+ '91.0.4472.77',
+ '92.0.4515.107',
+ '92.0.4515.115',
+ '92.0.4515.131',
+ '92.0.4515.159',
+ '92.0.4515.43',
+ '93.0.4556.0',
+ '93.0.4577.15',
+ '93.0.4577.63',
+ '93.0.4577.82',
+ '94.0.4606.41',
+ '94.0.4606.54',
+ '94.0.4606.61',
+ '94.0.4606.71',
+ '94.0.4606.81',
+ '94.0.4606.85',
+ '95.0.4638.17',
+ '95.0.4638.50',
+ '95.0.4638.54',
+ '95.0.4638.69',
+ '95.0.4638.74',
+ '96.0.4664.18',
+ '96.0.4664.45',
+ '96.0.4664.55',
+ '96.0.4664.93',
+ '97.0.4692.20',
+ )
+ return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
+
+
+class HTTPHeaderDict(collections.UserDict, dict):
+ """
+ Store and access keys case-insensitively.
+ The constructor can take multiple dicts, in which keys in the latter are prioritised.
+ """
+
+ def __init__(self, *args, **kwargs):
+ super().__init__()
+ for dct in args:
+ if dct is not None:
+ self.update(dct)
+ self.update(kwargs)
+
+ def __setitem__(self, key, value):
+ if isinstance(value, bytes):
+ value = value.decode('latin-1')
+ super().__setitem__(key.title(), str(value).strip())
+
+ def __getitem__(self, key):
+ return super().__getitem__(key.title())
+
+ def __delitem__(self, key):
+ super().__delitem__(key.title())
+
+ def __contains__(self, key):
+ return super().__contains__(key.title() if isinstance(key, str) else key)
+
+
+std_headers = HTTPHeaderDict({
+ 'User-Agent': random_user_agent(),
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+ 'Accept-Language': 'en-us,en;q=0.5',
+ 'Sec-Fetch-Mode': 'navigate',
+})
+
+
+def clean_proxies(proxies: dict, headers: HTTPHeaderDict):
+ req_proxy = headers.pop('Ytdl-Request-Proxy', None)
+ if req_proxy:
+ proxies.clear() # XXX: compat: Ytdl-Request-Proxy takes preference over everything, including NO_PROXY
+ proxies['all'] = req_proxy
+ for proxy_key, proxy_url in proxies.items():
+ if proxy_url == '__noproxy__':
+ proxies[proxy_key] = None
+ continue
+ if proxy_key == 'no': # special case
+ continue
+ if proxy_url is not None:
+ # Ensure proxies without a scheme are http.
+ try:
+ proxy_scheme = urllib.request._parse_proxy(proxy_url)[0]
+ except ValueError:
+ # Ignore invalid proxy URLs. Sometimes these may be introduced through environment
+ # variables unrelated to proxy settings - e.g. Colab `COLAB_LANGUAGE_SERVER_PROXY`.
+ # If the proxy is going to be used, the Request Handler proxy validation will handle it.
+ continue
+ if proxy_scheme is None:
+ proxies[proxy_key] = 'http://' + remove_start(proxy_url, '//')
+
+ replace_scheme = {
+ 'socks5': 'socks5h', # compat: socks5 was treated as socks5h
+ 'socks': 'socks4' # compat: non-standard
+ }
+ if proxy_scheme in replace_scheme:
+ proxies[proxy_key] = urllib.parse.urlunparse(
+ urllib.parse.urlparse(proxy_url)._replace(scheme=replace_scheme[proxy_scheme]))
+
+
+def clean_headers(headers: HTTPHeaderDict):
+ if 'Youtubedl-No-Compression' in headers: # compat
+ del headers['Youtubedl-No-Compression']
+ headers['Accept-Encoding'] = 'identity'
+ headers.pop('Ytdl-socks-proxy', None)
+
+
+def remove_dot_segments(path):
+ # Implements RFC3986 5.2.4 remote_dot_segments
+ # Pseudo-code: https://tools.ietf.org/html/rfc3986#section-5.2.4
+ # https://github.com/urllib3/urllib3/blob/ba49f5c4e19e6bca6827282feb77a3c9f937e64b/src/urllib3/util/url.py#L263
+ output = []
+ segments = path.split('/')
+ for s in segments:
+ if s == '.':
+ continue
+ elif s == '..':
+ if output:
+ output.pop()
+ else:
+ output.append(s)
+ if not segments[0] and (not output or output[0]):
+ output.insert(0, '')
+ if segments[-1] in ('.', '..'):
+ output.append('')
+ return '/'.join(output)
+
+
+def escape_rfc3986(s):
+ """Escape non-ASCII characters as suggested by RFC 3986"""
+ return urllib.parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
+
+
+def normalize_url(url):
+ """Normalize URL as suggested by RFC 3986"""
+ url_parsed = urllib.parse.urlparse(url)
+ return url_parsed._replace(
+ netloc=url_parsed.netloc.encode('idna').decode('ascii'),
+ path=escape_rfc3986(remove_dot_segments(url_parsed.path)),
+ params=escape_rfc3986(url_parsed.params),
+ query=escape_rfc3986(url_parsed.query),
+ fragment=escape_rfc3986(url_parsed.fragment)
+ ).geturl()
diff --git a/yt_dlp/utils/progress.py b/yt_dlp/utils/progress.py
new file mode 100644
index 0000000..f254a38
--- /dev/null
+++ b/yt_dlp/utils/progress.py
@@ -0,0 +1,109 @@
+from __future__ import annotations
+
+import bisect
+import threading
+import time
+
+
+class ProgressCalculator:
+ # Time to calculate the speed over (seconds)
+ SAMPLING_WINDOW = 3
+ # Minimum timeframe before to sample next downloaded bytes (seconds)
+ SAMPLING_RATE = 0.05
+ # Time before showing eta (seconds)
+ GRACE_PERIOD = 1
+
+ def __init__(self, initial: int):
+ self._initial = initial or 0
+ self.downloaded = self._initial
+
+ self.elapsed: float = 0
+ self.speed = SmoothValue(0, smoothing=0.7)
+ self.eta = SmoothValue(None, smoothing=0.9)
+
+ self._total = 0
+ self._start_time = time.monotonic()
+ self._last_update = self._start_time
+
+ self._lock = threading.Lock()
+ self._thread_sizes: dict[int, int] = {}
+
+ self._times = [self._start_time]
+ self._downloaded = [self.downloaded]
+
+ @property
+ def total(self):
+ return self._total
+
+ @total.setter
+ def total(self, value: int | None):
+ with self._lock:
+ if value is not None and value < self.downloaded:
+ value = self.downloaded
+
+ self._total = value
+
+ def thread_reset(self):
+ current_thread = threading.get_ident()
+ with self._lock:
+ self._thread_sizes[current_thread] = 0
+
+ def update(self, size: int | None):
+ if not size:
+ return
+
+ current_thread = threading.get_ident()
+
+ with self._lock:
+ last_size = self._thread_sizes.get(current_thread, 0)
+ self._thread_sizes[current_thread] = size
+ self._update(size - last_size)
+
+ def _update(self, size: int):
+ current_time = time.monotonic()
+
+ self.downloaded += size
+ self.elapsed = current_time - self._start_time
+ if self.total is not None and self.downloaded > self.total:
+ self._total = self.downloaded
+
+ if self._last_update + self.SAMPLING_RATE > current_time:
+ return
+ self._last_update = current_time
+
+ self._times.append(current_time)
+ self._downloaded.append(self.downloaded)
+
+ offset = bisect.bisect_left(self._times, current_time - self.SAMPLING_WINDOW)
+ del self._times[:offset]
+ del self._downloaded[:offset]
+ if len(self._times) < 2:
+ self.speed.reset()
+ self.eta.reset()
+ return
+
+ download_time = current_time - self._times[0]
+ if not download_time:
+ return
+
+ self.speed.set((self.downloaded - self._downloaded[0]) / download_time)
+ if self.total and self.speed.value and self.elapsed > self.GRACE_PERIOD:
+ self.eta.set((self.total - self.downloaded) / self.speed.value)
+ else:
+ self.eta.reset()
+
+
+class SmoothValue:
+ def __init__(self, initial: float | None, smoothing: float):
+ self.value = self.smooth = self._initial = initial
+ self._smoothing = smoothing
+
+ def set(self, value: float):
+ self.value = value
+ if self.smooth is None:
+ self.smooth = self.value
+ else:
+ self.smooth = (1 - self._smoothing) * value + self._smoothing * self.smooth
+
+ def reset(self):
+ self.value = self.smooth = self._initial
diff --git a/yt_dlp/utils/traversal.py b/yt_dlp/utils/traversal.py
new file mode 100644
index 0000000..8938f4c
--- /dev/null
+++ b/yt_dlp/utils/traversal.py
@@ -0,0 +1,276 @@
+import collections.abc
+import contextlib
+import inspect
+import itertools
+import re
+import xml.etree.ElementTree
+
+from ._utils import (
+ IDENTITY,
+ NO_DEFAULT,
+ LazyList,
+ deprecation_warning,
+ is_iterable_like,
+ try_call,
+ variadic,
+)
+
+
+def traverse_obj(
+ obj, *paths, default=NO_DEFAULT, expected_type=None, get_all=True,
+ casesense=True, is_user_input=NO_DEFAULT, traverse_string=False):
+ """
+ Safely traverse nested `dict`s and `Iterable`s
+
+ >>> obj = [{}, {"key": "value"}]
+ >>> traverse_obj(obj, (1, "key"))
+ 'value'
+
+ Each of the provided `paths` is tested and the first producing a valid result will be returned.
+ The next path will also be tested if the path branched but no results could be found.
+ Supported values for traversal are `Mapping`, `Iterable` and `re.Match`.
+ Unhelpful values (`{}`, `None`) are treated as the absence of a value and discarded.
+
+ The paths will be wrapped in `variadic`, so that `'key'` is conveniently the same as `('key', )`.
+
+ The keys in the path can be one of:
+ - `None`: Return the current object.
+ - `set`: Requires the only item in the set to be a type or function,
+ like `{type}`/`{func}`. If a `type`, returns only values
+ of this type. If a function, returns `func(obj)`.
+ - `str`/`int`: Return `obj[key]`. For `re.Match`, return `obj.group(key)`.
+ - `slice`: Branch out and return all values in `obj[key]`.
+ - `Ellipsis`: Branch out and return a list of all values.
+ - `tuple`/`list`: Branch out and return a list of all matching values.
+ Read as: `[traverse_obj(obj, branch) for branch in branches]`.
+ - `function`: Branch out and return values filtered by the function.
+ Read as: `[value for key, value in obj if function(key, value)]`.
+ For `Iterable`s, `key` is the index of the value.
+ For `re.Match`es, `key` is the group number (0 = full match)
+ as well as additionally any group names, if given.
+ - `dict` Transform the current object and return a matching dict.
+ Read as: `{key: traverse_obj(obj, path) for key, path in dct.items()}`.
+
+ `tuple`, `list`, and `dict` all support nested paths and branches.
+
+ @params paths Paths which to traverse by.
+ @param default Value to return if the paths do not match.
+ If the last key in the path is a `dict`, it will apply to each value inside
+ the dict instead, depth first. Try to avoid if using nested `dict` keys.
+ @param expected_type If a `type`, only accept final values of this type.
+ If any other callable, try to call the function on each result.
+ If the last key in the path is a `dict`, it will apply to each value inside
+ the dict instead, recursively. This does respect branching paths.
+ @param get_all If `False`, return the first matching result, otherwise all matching ones.
+ @param casesense If `False`, consider string dictionary keys as case insensitive.
+
+ `traverse_string` is only meant to be used by YoutubeDL.prepare_outtmpl and is not part of the API
+
+ @param traverse_string Whether to traverse into objects as strings.
+ If `True`, any non-compatible object will first be
+ converted into a string and then traversed into.
+ The return value of that path will be a string instead,
+ not respecting any further branching.
+
+
+ @returns The result of the object traversal.
+ If successful, `get_all=True`, and the path branches at least once,
+ then a list of results is returned instead.
+ If no `default` is given and the last path branches, a `list` of results
+ is always returned. If a path ends on a `dict` that result will always be a `dict`.
+ """
+ if is_user_input is not NO_DEFAULT:
+ deprecation_warning('The is_user_input parameter is deprecated and no longer works')
+
+ casefold = lambda k: k.casefold() if isinstance(k, str) else k
+
+ if isinstance(expected_type, type):
+ type_test = lambda val: val if isinstance(val, expected_type) else None
+ else:
+ type_test = lambda val: try_call(expected_type or IDENTITY, args=(val,))
+
+ def apply_key(key, obj, is_last):
+ branching = False
+ result = None
+
+ if obj is None and traverse_string:
+ if key is ... or callable(key) or isinstance(key, slice):
+ branching = True
+ result = ()
+
+ elif key is None:
+ result = obj
+
+ elif isinstance(key, set):
+ assert len(key) == 1, 'Set should only be used to wrap a single item'
+ item = next(iter(key))
+ if isinstance(item, type):
+ if isinstance(obj, item):
+ result = obj
+ else:
+ result = try_call(item, args=(obj,))
+
+ elif isinstance(key, (list, tuple)):
+ branching = True
+ result = itertools.chain.from_iterable(
+ apply_path(obj, branch, is_last)[0] for branch in key)
+
+ elif key is ...:
+ branching = True
+ if isinstance(obj, collections.abc.Mapping):
+ result = obj.values()
+ elif is_iterable_like(obj) or isinstance(obj, xml.etree.ElementTree.Element):
+ result = obj
+ elif isinstance(obj, re.Match):
+ result = obj.groups()
+ elif traverse_string:
+ branching = False
+ result = str(obj)
+ else:
+ result = ()
+
+ elif callable(key):
+ branching = True
+ if isinstance(obj, collections.abc.Mapping):
+ iter_obj = obj.items()
+ elif is_iterable_like(obj) or isinstance(obj, xml.etree.ElementTree.Element):
+ iter_obj = enumerate(obj)
+ elif isinstance(obj, re.Match):
+ iter_obj = itertools.chain(
+ enumerate((obj.group(), *obj.groups())),
+ obj.groupdict().items())
+ elif traverse_string:
+ branching = False
+ iter_obj = enumerate(str(obj))
+ else:
+ iter_obj = ()
+
+ result = (v for k, v in iter_obj if try_call(key, args=(k, v)))
+ if not branching: # string traversal
+ result = ''.join(result)
+
+ elif isinstance(key, dict):
+ iter_obj = ((k, _traverse_obj(obj, v, False, is_last)) for k, v in key.items())
+ result = {
+ k: v if v is not None else default for k, v in iter_obj
+ if v is not None or default is not NO_DEFAULT
+ } or None
+
+ elif isinstance(obj, collections.abc.Mapping):
+ result = (try_call(obj.get, args=(key,)) if casesense or try_call(obj.__contains__, args=(key,)) else
+ next((v for k, v in obj.items() if casefold(k) == key), None))
+
+ elif isinstance(obj, re.Match):
+ if isinstance(key, int) or casesense:
+ with contextlib.suppress(IndexError):
+ result = obj.group(key)
+
+ elif isinstance(key, str):
+ result = next((v for k, v in obj.groupdict().items() if casefold(k) == key), None)
+
+ elif isinstance(key, (int, slice)):
+ if is_iterable_like(obj, (collections.abc.Sequence, xml.etree.ElementTree.Element)):
+ branching = isinstance(key, slice)
+ with contextlib.suppress(IndexError):
+ result = obj[key]
+ elif traverse_string:
+ with contextlib.suppress(IndexError):
+ result = str(obj)[key]
+
+ elif isinstance(obj, xml.etree.ElementTree.Element) and isinstance(key, str):
+ xpath, _, special = key.rpartition('/')
+ if not special.startswith('@') and special != 'text()':
+ xpath = key
+ special = None
+
+ # Allow abbreviations of relative paths, absolute paths error
+ if xpath.startswith('/'):
+ xpath = f'.{xpath}'
+ elif xpath and not xpath.startswith('./'):
+ xpath = f'./{xpath}'
+
+ def apply_specials(element):
+ if special is None:
+ return element
+ if special == '@':
+ return element.attrib
+ if special.startswith('@'):
+ return try_call(element.attrib.get, args=(special[1:],))
+ if special == 'text()':
+ return element.text
+ assert False, f'apply_specials is missing case for {special!r}'
+
+ if xpath:
+ result = list(map(apply_specials, obj.iterfind(xpath)))
+ else:
+ result = apply_specials(obj)
+
+ return branching, result if branching else (result,)
+
+ def lazy_last(iterable):
+ iterator = iter(iterable)
+ prev = next(iterator, NO_DEFAULT)
+ if prev is NO_DEFAULT:
+ return
+
+ for item in iterator:
+ yield False, prev
+ prev = item
+
+ yield True, prev
+
+ def apply_path(start_obj, path, test_type):
+ objs = (start_obj,)
+ has_branched = False
+
+ key = None
+ for last, key in lazy_last(variadic(path, (str, bytes, dict, set))):
+ if not casesense and isinstance(key, str):
+ key = key.casefold()
+
+ if __debug__ and callable(key):
+ # Verify function signature
+ inspect.signature(key).bind(None, None)
+
+ new_objs = []
+ for obj in objs:
+ branching, results = apply_key(key, obj, last)
+ has_branched |= branching
+ new_objs.append(results)
+
+ objs = itertools.chain.from_iterable(new_objs)
+
+ if test_type and not isinstance(key, (dict, list, tuple)):
+ objs = map(type_test, objs)
+
+ return objs, has_branched, isinstance(key, dict)
+
+ def _traverse_obj(obj, path, allow_empty, test_type):
+ results, has_branched, is_dict = apply_path(obj, path, test_type)
+ results = LazyList(item for item in results if item not in (None, {}))
+ if get_all and has_branched:
+ if results:
+ return results.exhaust()
+ if allow_empty:
+ return [] if default is NO_DEFAULT else default
+ return None
+
+ return results[0] if results else {} if allow_empty and is_dict else None
+
+ for index, path in enumerate(paths, 1):
+ result = _traverse_obj(obj, path, index == len(paths), True)
+ if result is not None:
+ return result
+
+ return None if default is NO_DEFAULT else default
+
+
+def get_first(obj, *paths, **kwargs):
+ return traverse_obj(obj, *((..., *variadic(keys)) for keys in paths), **kwargs, get_all=False)
+
+
+def dict_get(d, key_or_keys, default=None, skip_false_values=True):
+ for val in map(d.get, variadic(key_or_keys)):
+ if val is not None and (val or not skip_false_values):
+ return val
+ return default
diff --git a/yt_dlp/version.py b/yt_dlp/version.py
new file mode 100644
index 0000000..68c3f00
--- /dev/null
+++ b/yt_dlp/version.py
@@ -0,0 +1,15 @@
+# Autogenerated by devscripts/update-version.py
+
+__version__ = '2024.03.10'
+
+RELEASE_GIT_HEAD = '615a84447e8322720be77a0e64298d7f42848693'
+
+VARIANT = None
+
+UPDATE_HINT = None
+
+CHANNEL = 'stable'
+
+ORIGIN = 'yt-dlp/yt-dlp'
+
+_pkg_version = '2024.03.10'
diff --git a/yt_dlp/webvtt.py b/yt_dlp/webvtt.py
new file mode 100644
index 0000000..7683bfb
--- /dev/null
+++ b/yt_dlp/webvtt.py
@@ -0,0 +1,399 @@
+"""
+A partial parser for WebVTT segments. Interprets enough of the WebVTT stream
+to be able to assemble a single stand-alone subtitle file, suitably adjusting
+timestamps on the way, while everything else is passed through unmodified.
+
+Regular expressions based on the W3C WebVTT specification
+<https://www.w3.org/TR/webvtt1/>. The X-TIMESTAMP-MAP extension is described
+in RFC 8216 §3.5 <https://tools.ietf.org/html/rfc8216#section-3.5>.
+"""
+
+import io
+import re
+
+from .utils import int_or_none, timetuple_from_msec
+
+
+class _MatchParser:
+ """
+ An object that maintains the current parsing position and allows
+ conveniently advancing it as syntax elements are successfully parsed.
+ """
+
+ def __init__(self, string):
+ self._data = string
+ self._pos = 0
+
+ def match(self, r):
+ if isinstance(r, re.Pattern):
+ return r.match(self._data, self._pos)
+ if isinstance(r, str):
+ if self._data.startswith(r, self._pos):
+ return len(r)
+ return None
+ raise ValueError(r)
+
+ def advance(self, by):
+ if by is None:
+ amt = 0
+ elif isinstance(by, re.Match):
+ amt = len(by.group(0))
+ elif isinstance(by, str):
+ amt = len(by)
+ elif isinstance(by, int):
+ amt = by
+ else:
+ raise ValueError(by)
+ self._pos += amt
+ return by
+
+ def consume(self, r):
+ return self.advance(self.match(r))
+
+ def child(self):
+ return _MatchChildParser(self)
+
+
+class _MatchChildParser(_MatchParser):
+ """
+ A child parser state, which advances through the same data as
+ its parent, but has an independent position. This is useful when
+ advancing through syntax elements we might later want to backtrack
+ from.
+ """
+
+ def __init__(self, parent):
+ super().__init__(parent._data)
+ self.__parent = parent
+ self._pos = parent._pos
+
+ def commit(self):
+ """
+ Advance the parent state to the current position of this child state.
+ """
+ self.__parent._pos = self._pos
+ return self.__parent
+
+
+class ParseError(Exception):
+ def __init__(self, parser):
+ super().__init__("Parse error at position %u (near %r)" % (
+ parser._pos, parser._data[parser._pos:parser._pos + 100]
+ ))
+
+
+# While the specification <https://www.w3.org/TR/webvtt1/#webvtt-timestamp>
+# prescribes that hours must be *2 or more* digits, timestamps with a single
+# digit for the hour part has been seen in the wild.
+# See https://github.com/yt-dlp/yt-dlp/issues/921
+_REGEX_TS = re.compile(r'''(?x)
+ (?:([0-9]{1,}):)?
+ ([0-9]{2}):
+ ([0-9]{2})\.
+ ([0-9]{3})?
+''')
+_REGEX_EOF = re.compile(r'\Z')
+_REGEX_NL = re.compile(r'(?:\r\n|[\r\n]|$)')
+_REGEX_BLANK = re.compile(r'(?:\r\n|[\r\n])+')
+_REGEX_OPTIONAL_WHITESPACE = re.compile(r'[ \t]*')
+
+
+def _parse_ts(ts):
+ """
+ Convert a parsed WebVTT timestamp (a re.Match obtained from _REGEX_TS)
+ into an MPEG PES timestamp: a tick counter at 90 kHz resolution.
+ """
+ return 90 * sum(
+ int(part or 0) * mult for part, mult in zip(ts.groups(), (3600_000, 60_000, 1000, 1)))
+
+
+def _format_ts(ts):
+ """
+ Convert an MPEG PES timestamp into a WebVTT timestamp.
+ This will lose sub-millisecond precision.
+ """
+ return '%02u:%02u:%02u.%03u' % timetuple_from_msec(int((ts + 45) // 90))
+
+
+class Block:
+ """
+ An abstract WebVTT block.
+ """
+
+ def __init__(self, **kwargs):
+ for key, val in kwargs.items():
+ setattr(self, key, val)
+
+ @classmethod
+ def parse(cls, parser):
+ m = parser.match(cls._REGEX)
+ if not m:
+ return None
+ parser.advance(m)
+ return cls(raw=m.group(0))
+
+ def write_into(self, stream):
+ stream.write(self.raw)
+
+
+class HeaderBlock(Block):
+ """
+ A WebVTT block that may only appear in the header part of the file,
+ i.e. before any cue blocks.
+ """
+ pass
+
+
+class Magic(HeaderBlock):
+ _REGEX = re.compile(r'\ufeff?WEBVTT([ \t][^\r\n]*)?(?:\r\n|[\r\n])')
+
+ # XXX: The X-TIMESTAMP-MAP extension is described in RFC 8216 §3.5
+ # <https://tools.ietf.org/html/rfc8216#section-3.5>, but the RFC
+ # doesn’t specify the exact grammar nor where in the WebVTT
+ # syntax it should be placed; the below has been devised based
+ # on usage in the wild
+ #
+ # And strictly speaking, the presence of this extension violates
+ # the W3C WebVTT spec. Oh well.
+
+ _REGEX_TSMAP = re.compile(r'X-TIMESTAMP-MAP=')
+ _REGEX_TSMAP_LOCAL = re.compile(r'LOCAL:')
+ _REGEX_TSMAP_MPEGTS = re.compile(r'MPEGTS:([0-9]+)')
+ _REGEX_TSMAP_SEP = re.compile(r'[ \t]*,[ \t]*')
+
+ # This was removed from the spec in the 2017 revision;
+ # the last spec draft to describe this syntax element is
+ # <https://www.w3.org/TR/2015/WD-webvtt1-20151208/#webvtt-metadata-header>.
+ # Nevertheless, YouTube keeps serving those
+ _REGEX_META = re.compile(r'(?:(?!-->)[^\r\n])+:(?:(?!-->)[^\r\n])+(?:\r\n|[\r\n])')
+
+ @classmethod
+ def __parse_tsmap(cls, parser):
+ parser = parser.child()
+
+ while True:
+ m = parser.consume(cls._REGEX_TSMAP_LOCAL)
+ if m:
+ m = parser.consume(_REGEX_TS)
+ if m is None:
+ raise ParseError(parser)
+ local = _parse_ts(m)
+ if local is None:
+ raise ParseError(parser)
+ else:
+ m = parser.consume(cls._REGEX_TSMAP_MPEGTS)
+ if m:
+ mpegts = int_or_none(m.group(1))
+ if mpegts is None:
+ raise ParseError(parser)
+ else:
+ raise ParseError(parser)
+ if parser.consume(cls._REGEX_TSMAP_SEP):
+ continue
+ if parser.consume(_REGEX_NL):
+ break
+ raise ParseError(parser)
+
+ parser.commit()
+ return local, mpegts
+
+ @classmethod
+ def parse(cls, parser):
+ parser = parser.child()
+
+ m = parser.consume(cls._REGEX)
+ if not m:
+ raise ParseError(parser)
+
+ extra = m.group(1)
+ local, mpegts, meta = None, None, ''
+ while not parser.consume(_REGEX_NL):
+ if parser.consume(cls._REGEX_TSMAP):
+ local, mpegts = cls.__parse_tsmap(parser)
+ continue
+ m = parser.consume(cls._REGEX_META)
+ if m:
+ meta += m.group(0)
+ continue
+ raise ParseError(parser)
+ parser.commit()
+ return cls(extra=extra, mpegts=mpegts, local=local, meta=meta)
+
+ def write_into(self, stream):
+ stream.write('WEBVTT')
+ if self.extra is not None:
+ stream.write(self.extra)
+ stream.write('\n')
+ if self.local or self.mpegts:
+ stream.write('X-TIMESTAMP-MAP=LOCAL:')
+ stream.write(_format_ts(self.local if self.local is not None else 0))
+ stream.write(',MPEGTS:')
+ stream.write(str(self.mpegts if self.mpegts is not None else 0))
+ stream.write('\n')
+ if self.meta:
+ stream.write(self.meta)
+ stream.write('\n')
+
+
+class StyleBlock(HeaderBlock):
+ _REGEX = re.compile(r'''(?x)
+ STYLE[\ \t]*(?:\r\n|[\r\n])
+ ((?:(?!-->)[^\r\n])+(?:\r\n|[\r\n]))*
+ (?:\r\n|[\r\n])
+ ''')
+
+
+class RegionBlock(HeaderBlock):
+ _REGEX = re.compile(r'''(?x)
+ REGION[\ \t]*
+ ((?:(?!-->)[^\r\n])+(?:\r\n|[\r\n]))*
+ (?:\r\n|[\r\n])
+ ''')
+
+
+class CommentBlock(Block):
+ _REGEX = re.compile(r'''(?x)
+ NOTE(?:\r\n|[\ \t\r\n])
+ ((?:(?!-->)[^\r\n])+(?:\r\n|[\r\n]))*
+ (?:\r\n|[\r\n])
+ ''')
+
+
+class CueBlock(Block):
+ """
+ A cue block. The payload is not interpreted.
+ """
+
+ _REGEX_ID = re.compile(r'((?:(?!-->)[^\r\n])+)(?:\r\n|[\r\n])')
+ _REGEX_ARROW = re.compile(r'[ \t]+-->[ \t]+')
+ _REGEX_SETTINGS = re.compile(r'[ \t]+((?:(?!-->)[^\r\n])+)')
+ _REGEX_PAYLOAD = re.compile(r'[^\r\n]+(?:\r\n|[\r\n])?')
+
+ @classmethod
+ def parse(cls, parser):
+ parser = parser.child()
+
+ id = None
+ m = parser.consume(cls._REGEX_ID)
+ if m:
+ id = m.group(1)
+
+ m0 = parser.consume(_REGEX_TS)
+ if not m0:
+ return None
+ if not parser.consume(cls._REGEX_ARROW):
+ return None
+ m1 = parser.consume(_REGEX_TS)
+ if not m1:
+ return None
+ m2 = parser.consume(cls._REGEX_SETTINGS)
+ parser.consume(_REGEX_OPTIONAL_WHITESPACE)
+ if not parser.consume(_REGEX_NL):
+ return None
+
+ start = _parse_ts(m0)
+ end = _parse_ts(m1)
+ settings = m2.group(1) if m2 is not None else None
+
+ text = io.StringIO()
+ while True:
+ m = parser.consume(cls._REGEX_PAYLOAD)
+ if not m:
+ break
+ text.write(m.group(0))
+
+ parser.commit()
+ return cls(
+ id=id,
+ start=start, end=end, settings=settings,
+ text=text.getvalue()
+ )
+
+ def write_into(self, stream):
+ if self.id is not None:
+ stream.write(self.id)
+ stream.write('\n')
+ stream.write(_format_ts(self.start))
+ stream.write(' --> ')
+ stream.write(_format_ts(self.end))
+ if self.settings is not None:
+ stream.write(' ')
+ stream.write(self.settings)
+ stream.write('\n')
+ stream.write(self.text)
+ stream.write('\n')
+
+ @property
+ def as_json(self):
+ return {
+ 'id': self.id,
+ 'start': self.start,
+ 'end': self.end,
+ 'text': self.text,
+ 'settings': self.settings,
+ }
+
+ def __eq__(self, other):
+ return self.as_json == other.as_json
+
+ @classmethod
+ def from_json(cls, json):
+ return cls(
+ id=json['id'],
+ start=json['start'],
+ end=json['end'],
+ text=json['text'],
+ settings=json['settings']
+ )
+
+ def hinges(self, other):
+ if self.text != other.text:
+ return False
+ if self.settings != other.settings:
+ return False
+ return self.start <= self.end == other.start <= other.end
+
+
+def parse_fragment(frag_content):
+ """
+ A generator that yields (partially) parsed WebVTT blocks when given
+ a bytes object containing the raw contents of a WebVTT file.
+ """
+
+ parser = _MatchParser(frag_content.decode())
+
+ yield Magic.parse(parser)
+
+ while not parser.match(_REGEX_EOF):
+ if parser.consume(_REGEX_BLANK):
+ continue
+
+ block = RegionBlock.parse(parser)
+ if block:
+ yield block
+ continue
+ block = StyleBlock.parse(parser)
+ if block:
+ yield block
+ continue
+ block = CommentBlock.parse(parser)
+ if block:
+ yield block # XXX: or skip
+ continue
+
+ break
+
+ while not parser.match(_REGEX_EOF):
+ if parser.consume(_REGEX_BLANK):
+ continue
+
+ block = CommentBlock.parse(parser)
+ if block:
+ yield block # XXX: or skip
+ continue
+ block = CueBlock.parse(parser)
+ if block:
+ yield block
+ continue
+
+ raise ParseError(parser)